PyPI - partial-qr - Versions diffs - 0.0.29__tar.gz - Mend

partial-qr 0.0.29__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

partial_qr-0.0.29/LICENSE +21 -0
partial_qr-0.0.29/PKG-INFO +50 -0
partial_qr-0.0.29/README.md +24 -0
partial_qr-0.0.29/partial_qr/__index__.py +0 -0
partial_qr-0.0.29/partial_qr/__init__.py +0 -0
partial_qr-0.0.29/partial_qr/__main__.py +149 -0
partial_qr-0.0.29/partial_qr/decoders/__init__.py +0 -0
partial_qr-0.0.29/partial_qr/decoders/data/__init__.py +0 -0
partial_qr-0.0.29/partial_qr/decoders/data/constraints.py +270 -0
partial_qr-0.0.29/partial_qr/decoders/data/data.py +407 -0
partial_qr-0.0.29/partial_qr/decoders/data/decode.py +550 -0
partial_qr-0.0.29/partial_qr/decoders/data/galois.py +99 -0
partial_qr-0.0.29/partial_qr/decoders/data/reed_solomon.py +391 -0
partial_qr-0.0.29/partial_qr/decoders/data/stream.py +272 -0
partial_qr-0.0.29/partial_qr/decoders/formatinfo.py +106 -0
partial_qr-0.0.29/partial_qr/decoders/mask.py +163 -0
partial_qr-0.0.29/partial_qr/decoders/version.py +102 -0
partial_qr-0.0.29/partial_qr/matrix.py +75 -0
partial_qr-0.0.29/partial_qr.egg-info/PKG-INFO +50 -0
partial_qr-0.0.29/partial_qr.egg-info/SOURCES.txt +24 -0
partial_qr-0.0.29/partial_qr.egg-info/dependency_links.txt +1 -0
partial_qr-0.0.29/partial_qr.egg-info/entry_points.txt +2 -0
partial_qr-0.0.29/partial_qr.egg-info/requires.txt +14 -0
partial_qr-0.0.29/partial_qr.egg-info/top_level.txt +1 -0
partial_qr-0.0.29/pyproject.toml +47 -0
partial_qr-0.0.29/setup.cfg +4 -0

partial_qr-0.0.29/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Bence Skorka
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

partial_qr-0.0.29/PKG-INFO ADDED Viewed

@@ -0,0 +1,50 @@
+Metadata-Version: 2.4
+Name: partial-qr
+Version: 0.0.29
+Summary: Partial QR Code Decoder
+Author: Bence Skorka
+License-Expression: MIT
+Classifier: Development Status :: 2 - Pre-Alpha
+Classifier: Environment :: Console
+Requires-Python: >=3.12
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: click
+Requires-Dist: pillow
+Provides-Extra: test
+Requires-Dist: black; extra == "test"
+Requires-Dist: mock; extra == "test"
+Requires-Dist: mypy; extra == "test"
+Requires-Dist: pytest-cov; extra == "test"
+Requires-Dist: pytest-mock; extra == "test"
+Requires-Dist: pytest-mypy; extra == "test"
+Requires-Dist: pytest-timeout; extra == "test"
+Requires-Dist: pytest; extra == "test"
+Requires-Dist: qrcode; extra == "test"
+Requires-Dist: types-mock; extra == "test"
+Dynamic: license-file
+# Partial QR Code Decoder
+An aggressive QR code decoder that can recover data from partially missing or damaged QR codes.
+## Usage
+```bash
+partial-qr --input path/to/qr_code_matrix.txt
+```
+If the input is too corrupted, bruteforce strength can be increased, but beware of the exponential
+runtime increase, especially for higher versions:
+```bash
+partial-qr --input path/to/qr_code_matrix.txt --max-brute-bits 24
+```
+Additional hints can be provided to the bruteforce to speed it up:
+`--ascii` - Assume the data is ASCII, which reduces the search space significantly.
+`--url` - Assume the data is a URL, so it must start with either `http://` or `https://`.
+`--parallel` - Enable parallel bruteforce using all available CPU cores.

partial_qr-0.0.29/README.md ADDED Viewed

@@ -0,0 +1,24 @@
+# Partial QR Code Decoder
+An aggressive QR code decoder that can recover data from partially missing or damaged QR codes.
+## Usage
+```bash
+partial-qr --input path/to/qr_code_matrix.txt
+```
+If the input is too corrupted, bruteforce strength can be increased, but beware of the exponential
+runtime increase, especially for higher versions:
+```bash
+partial-qr --input path/to/qr_code_matrix.txt --max-brute-bits 24
+```
+Additional hints can be provided to the bruteforce to speed it up:
+`--ascii` - Assume the data is ASCII, which reduces the search space significantly.
+`--url` - Assume the data is a URL, so it must start with either `http://` or `https://`.
+`--parallel` - Enable parallel bruteforce using all available CPU cores.

partial_qr-0.0.29/partial_qr/__index__.py ADDED Viewed

File without changes

partial_qr-0.0.29/partial_qr/__init__.py ADDED Viewed

File without changes

partial_qr-0.0.29/partial_qr/__main__.py ADDED Viewed

@@ -0,0 +1,149 @@
+import click
+from .decoders.data.decode import decode_qr_data, matrix_to_image, reconstruct_matrix
+from .decoders.formatinfo import decode_format_version_information
+from .decoders.mask import apply_mask
+from .decoders.version import decode_version_information
+from .matrix import load_matrix_from_file
+@click.command()
+@click.option(
+    "-i",
+    "--input-file",
+    type=click.Path(exists=True),
+    required=True,
+    help="Path to the input data file.",
+)
+@click.option(
+    "--ascii",
+    "assume_ascii",
+    is_flag=True,
+    default=False,
+    help="Assume payload is printable ASCII text (byte mode, MSB=0 per byte).",
+)
+@click.option(
+    "--url",
+    "assume_url",
+    is_flag=True,
+    default=False,
+    help="Assume data starts with http:// or https:// (implies --ascii).",
+)
+@click.option(
+    "--max-brute-bits",
+    type=int,
+    default=10,
+    show_default=True,
+    help="Max unknown bits to brute-force per RS block (2^N combinations). Higher values take exponentially longer.",
+)
+@click.option(
+    "--parallel",
+    is_flag=True,
+    default=False,
+    help="Distribute brute-force work across all CPU cores (useful with large --max-brute-bits).",
+)
+@click.option(
+    "--reconstruct",
+    type=click.Path(),
+    default=None,
+    help="Save the fully reconstructed QR code as an image to the given path.",
+)
+@click.option(
+    "--transpose-reconstruct",
+    is_flag=True,
+    default=False,
+    help="Transpose the reconstructed QR code before saving (to fix reconstruction when input is column-major).",
+)
+def main(
+    input_file: str,
+    assume_ascii: bool,
+    assume_url: bool,
+    max_brute_bits: int,
+    parallel: bool,
+    reconstruct: str | None,
+    transpose_reconstruct: bool,
+) -> None:
+    matrix = load_matrix_from_file(input_file)
+    format_info = decode_format_version_information(matrix)
+    version_info = decode_version_information(matrix)
+    print("Decoded format information:")
+    print(f"  Error correction level: {format_info.error_correction_level}")
+    print(f"  Mask pattern: {format_info.mask_pattern}")
+    print(f"Decoded version information: {version_info.version if version_info else 'Unknown'}")
+    if format_info.mask_pattern is None:
+        print("Cannot decode format information, stopping here.")
+        return
+    flipped = apply_mask(matrix, format_info.mask_pattern, version_info.version)
+    if format_info.error_correction_level is None:
+        print("Cannot decode error correction level, so not applying error correction decoding.")
+        print("Partially decoded (masked) matrix:")
+        print(flipped)
+        return
+    result = decode_qr_data(
+        flipped,
+        version_info.version,
+        format_info.error_correction_level,
+        assume_ascii=assume_ascii,
+        assume_url=assume_url,
+        max_brute_bits=max_brute_bits,
+        parallel=parallel,
+    )
+    print("\nData decoding:")
+    print(f"  Data codewords: {result.known_data_codewords}/{result.total_data_codewords} known")
+    print(f"  EC codewords: {result.known_ec_codewords}/{result.total_ec_codewords} known")
+    print(f"  RS blocks corrected: {result.blocks_corrected}/{result.block_info.total_blocks}")
+    print(f"  EC capacity per block: {result.block_info.ec_codewords_per_block} erasures")
+    if result.constraint_stats:
+        cs = result.constraint_stats
+        parts = []
+        if cs.get("prefix"):
+            parts.append(f"URL prefix \"{cs['prefix']}\"")
+        if cs.get("bits_set"):
+            parts.append(f"{cs['bits_set']} bits set")
+        if cs.get("codewords_resolved"):
+            parts.append(f"{cs['codewords_resolved']} codewords resolved")
+        if cs.get("padding_bits_set"):
+            parts.append(f"{cs['padding_bits_set']} padding bits set")
+        if cs.get("padding_codewords_resolved"):
+            parts.append(f"{cs['padding_codewords_resolved']} padding codewords resolved")
+        if parts:
+            print(f"  Constraints: {', '.join(parts)}")
+    if result.decoded.segments:
+        print("\nDecoded segments:")
+        for seg in result.decoded.segments:
+            print(f"  [{seg.mode_name}] count={seg.char_count}: {repr(seg.data)}")
+        print(f"\nRecovered text: {result.decoded.raw_text}")
+    if reconstruct:
+        recovered = reconstruct_matrix(matrix, result, format_info.mask_pattern)
+        if transpose_reconstruct:
+            recovered = recovered.transpose()
+        matrix_to_image(recovered, reconstruct)
+        print(f"\nReconstructed QR code saved to: {reconstruct}")
+    if not result.decoded.segments:
+        total_erasures_per_block = (
+            result.total_data_codewords
+            + result.total_ec_codewords
+            - result.known_data_codewords
+            - result.known_ec_codewords
+        ) // result.block_info.total_blocks
+        print("\nNo data segments could be decoded.")
+        print(f"  Average erasures per block: ~{total_erasures_per_block}")
+        print(
+            f"  RS can correct up to {result.block_info.ec_codewords_per_block} erasures per block."
+        )
+        if total_erasures_per_block > result.block_info.ec_codewords_per_block:
+            print(f"  (Too many unknowns for RS correction — need more of the QR code)")
+if __name__ == "__main__":
+    main()

partial_qr-0.0.29/partial_qr/decoders/__init__.py ADDED Viewed

File without changes

partial_qr-0.0.29/partial_qr/decoders/data/__init__.py ADDED Viewed

File without changes

partial_qr-0.0.29/partial_qr/decoders/data/constraints.py ADDED Viewed

@@ -0,0 +1,270 @@
+"""Content-based constraints for narrowing partial QR code data.
+When we know (or assume) something about the QR code's content, we can
+set additional data bits and reduce the erasure count for Reed-Solomon.
+Supported constraints:
+- ASCII: assume all payload bytes are printable ASCII (0x20-0x7E).
+  Sets MSB=0 for each payload byte (since printable ASCII < 0x80).
+- URL prefix: assume the data starts with "http://" or "https://".
+  Sets exact bit values for the prefix bytes.
+- Padding: after all data segments end, QR codes fill remaining data
+  codewords with a deterministic padding sequence: 4-bit terminator
+  (0000), byte alignment (0s), then alternating 0xEC and 0x11.
+  This constraint is always applied when segment headers are readable.
+ASCII and URL constraints assume byte mode (mode indicator 0100).
+"""
+from typing import Optional
+from .data import PartialCodeword, bits_to_partial_codewords
+from .stream import CHAR_COUNT_BITS
+def _header_bit_count(version: int, mode: int = 0b0100) -> int:
+    """Total header bits (mode indicator + character count) for byte mode."""
+    if version <= 9:
+        vg = 0
+    elif version <= 26:
+        vg = 1
+    else:
+        vg = 2
+    return 4 + CHAR_COUNT_BITS[mode][vg]
+def apply_content_constraints(
+    data_partial_cws: list[PartialCodeword],
+    version: int,
+    assume_ascii: bool = False,
+    url_prefix: str | None = None,
+) -> tuple[list[PartialCodeword], dict[str, object]]:
+    """Apply content constraints to data partial codewords.
+    The input should be the concatenated data partial codewords in
+    data-stream order (block 0 data, block 1 data, etc.).
+    Returns (constrained_codewords, stats).
+    stats["conflict"] is set if a constraint contradicts a known bit
+    (meaning the assumption is wrong); the original codewords are returned.
+    """
+    if not assume_ascii and url_prefix is None:
+        return list(data_partial_cws), {"bits_set": 0}
+    bits: list[Optional[int]] = []
+    for pcw in data_partial_cws:
+        bits.extend(pcw.known_bits)
+    original_known = sum(1 for b in bits if b is not None)
+    header_len = _header_bit_count(version)
+    def try_set(idx: int, val: int) -> bool:
+        """Set a bit if unknown. Returns False on conflict."""
+        if idx >= len(bits):
+            return True
+        if bits[idx] is None:
+            bits[idx] = val
+            return True
+        return bits[idx] == val
+    # Mode indicator = 0100 (byte mode)
+    for i, b in enumerate([0, 1, 0, 0]):
+        if not try_set(i, b):
+            return list(data_partial_cws), {"bits_set": 0, "conflict": "mode"}
+    if url_prefix is not None:
+        for char_idx, byte_val in enumerate(url_prefix.encode("ascii")):
+            for bit_pos in range(8):
+                flat_idx = header_len + char_idx * 8 + bit_pos
+                bit_val = (byte_val >> (7 - bit_pos)) & 1
+                if not try_set(flat_idx, bit_val):
+                    return list(data_partial_cws), {
+                        "bits_set": 0,
+                        "conflict": f"prefix_byte_{char_idx}",
+                    }
+    # ASCII: MSB=0 for each payload byte
+    if assume_ascii or url_prefix is not None:
+        start_byte = len(url_prefix) if url_prefix else 0
+        char_count = _read_known_bits(bits, 4, header_len - 4)
+        if char_count is not None:
+            max_payload_bytes = char_count
+        else:
+            max_payload_bytes = (len(bits) - header_len) // 8
+        for byte_idx in range(start_byte, max_payload_bytes):
+            try_set(header_len + byte_idx * 8, 0)
+    result = bits_to_partial_codewords(bits)
+    new_known = sum(1 for b in bits if b is not None)
+    orig_resolved = sum(1 for p in data_partial_cws if p.is_fully_known)
+    new_resolved = sum(1 for p in result if p.is_fully_known)
+    stats: dict[str, object] = {
+        "bits_set": new_known - original_known,
+        "codewords_resolved": new_resolved - orig_resolved,
+    }
+    if url_prefix:
+        stats["prefix"] = url_prefix
+    return result, stats
+def _compute_segment_data_bits(mode: int, char_count: int) -> int | None:
+    """Return number of data bits for a segment with given mode and char count."""
+    if mode == 0b0001:  # Numeric
+        full_groups, remainder = divmod(char_count, 3)
+        data_bits = full_groups * 10
+        if remainder == 2:
+            data_bits += 7
+        elif remainder == 1:
+            data_bits += 4
+        return data_bits
+    elif mode == 0b0010:  # Alphanumeric
+        full_pairs, remainder = divmod(char_count, 2)
+        return full_pairs * 11 + remainder * 6
+    elif mode == 0b0100:  # Byte
+        return char_count * 8
+    elif mode == 0b1000:  # Kanji
+        return char_count * 13
+    return None
+def _read_known_bits(bits: list[Optional[int]], pos: int, count: int) -> int | None:
+    """Read `count` bits from position `pos`. Returns None if any bit is unknown."""
+    if pos + count > len(bits):
+        return None
+    value = 0
+    for i in range(count):
+        b = bits[pos + i]
+        if b is None:
+            return None
+        value = (value << 1) | b
+    return value
+def apply_padding_constraints(
+    data_partial_cws: list[PartialCodeword],
+    version: int,
+) -> tuple[list[PartialCodeword], dict[str, object]]:
+    """Apply QR padding constraints to data partial codewords.
+    After all encoded data segments end, QR codes use a deterministic
+    padding sequence:
+    1. Terminator: up to 4 zero bits (0000)
+    2. Byte alignment: zero bits to reach the next byte boundary
+    3. Pad bytes: alternating 0xEC (11101100) and 0x11 (00010001)
+    This function parses known segment headers (mode indicator +
+    character count) to determine where padding starts, then fills
+    in the known padding bits.
+    Returns (constrained_codewords, stats).
+    """
+    bits: list[Optional[int]] = []
+    for pcw in data_partial_cws:
+        bits.extend(pcw.known_bits)
+    total_bits = len(bits)
+    original_known = sum(1 for b in bits if b is not None)
+    if version <= 9:
+        vg = 0
+    elif version <= 26:
+        vg = 1
+    else:
+        vg = 2
+    # Parse segment headers to find where data ends
+    pos = 0
+    parsed_segments = 0
+    while pos + 4 <= total_bits:
+        mode_val = _read_known_bits(bits, pos, 4)
+        if mode_val is None:
+            if parsed_segments == 0:
+                return list(data_partial_cws), {"padding_bits_set": 0}
+            # We've parsed at least one segment.  The unknown mode bits
+            # might be a terminator (0000) or another segment.  Optimistically
+            # try setting them to 0000 — if any conflict, we can't determine
+            # padding, but if they accept zeros that's consistent with a
+            # terminator.
+            can_terminate = True
+            for i in range(4):
+                if pos + i < total_bits:
+                    b = bits[pos + i]
+                    if b is not None and b != 0:
+                        can_terminate = False
+                        break
+            if can_terminate:
+                # Treat as terminator — break out so we start padding at pos
+                break
+            return list(data_partial_cws), {"padding_bits_set": 0}
+        if mode_val == 0b0000:
+            break
+        if mode_val not in CHAR_COUNT_BITS:
+            return list(data_partial_cws), {"padding_bits_set": 0}
+        cc_bit_len = CHAR_COUNT_BITS[mode_val][vg]
+        char_count = _read_known_bits(bits, pos + 4, cc_bit_len)
+        if char_count is None:
+            return list(data_partial_cws), {"padding_bits_set": 0}
+        data_bits = _compute_segment_data_bits(mode_val, char_count)
+        if data_bits is None:
+            return list(data_partial_cws), {"padding_bits_set": 0}
+        pos = pos + 4 + cc_bit_len + data_bits
+        parsed_segments += 1
+    if pos >= total_bits:
+        return list(data_partial_cws), {"padding_bits_set": 0}
+    def try_set(idx: int, val: int) -> bool:
+        if idx >= total_bits:
+            return True
+        if bits[idx] is None:
+            bits[idx] = val
+            return True
+        return bits[idx] == val
+    # 1. Terminator: up to 4 zero bits
+    terminator_len = min(4, total_bits - pos)
+    for i in range(terminator_len):
+        if not try_set(pos + i, 0):
+            return list(data_partial_cws), {"padding_bits_set": 0, "conflict": "terminator"}
+    pos += terminator_len
+    # 2. Byte alignment: zeros to next byte boundary
+    byte_boundary = ((pos + 7) // 8) * 8
+    while pos < byte_boundary and pos < total_bits:
+        if not try_set(pos, 0):
+            return list(data_partial_cws), {"padding_bits_set": 0, "conflict": "byte_align"}
+        pos += 1
+    # 3. Padding bytes: alternating 0xEC and 0x11
+    PAD_BYTES = [0xEC, 0x11]
+    pad_idx = 0
+    while pos + 8 <= total_bits:
+        pad_val = PAD_BYTES[pad_idx % 2]
+        for bit_pos in range(8):
+            bit_val = (pad_val >> (7 - bit_pos)) & 1
+            if not try_set(pos + bit_pos, bit_val):
+                return list(data_partial_cws), {
+                    "padding_bits_set": 0,
+                    "conflict": f"pad_byte_{pad_idx}",
+                }
+        pos += 8
+        pad_idx += 1
+    result = bits_to_partial_codewords(bits)
+    new_known = sum(1 for b in bits if b is not None)
+    orig_resolved = sum(1 for p in data_partial_cws if p.is_fully_known)
+    new_resolved = sum(1 for p in result if p.is_fully_known)
+    return result, {
+        "padding_bits_set": new_known - original_known,
+        "padding_codewords_resolved": new_resolved - orig_resolved,
+    }