partial-qr 0.0.29__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Bence Skorka
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,50 @@
1
+ Metadata-Version: 2.4
2
+ Name: partial-qr
3
+ Version: 0.0.29
4
+ Summary: Partial QR Code Decoder
5
+ Author: Bence Skorka
6
+ License-Expression: MIT
7
+ Classifier: Development Status :: 2 - Pre-Alpha
8
+ Classifier: Environment :: Console
9
+ Requires-Python: >=3.12
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: click
13
+ Requires-Dist: pillow
14
+ Provides-Extra: test
15
+ Requires-Dist: black; extra == "test"
16
+ Requires-Dist: mock; extra == "test"
17
+ Requires-Dist: mypy; extra == "test"
18
+ Requires-Dist: pytest-cov; extra == "test"
19
+ Requires-Dist: pytest-mock; extra == "test"
20
+ Requires-Dist: pytest-mypy; extra == "test"
21
+ Requires-Dist: pytest-timeout; extra == "test"
22
+ Requires-Dist: pytest; extra == "test"
23
+ Requires-Dist: qrcode; extra == "test"
24
+ Requires-Dist: types-mock; extra == "test"
25
+ Dynamic: license-file
26
+
27
+ # Partial QR Code Decoder
28
+
29
+ An aggressive QR code decoder that can recover data from partially missing or damaged QR codes.
30
+
31
+ ## Usage
32
+
33
+ ```bash
34
+ partial-qr --input path/to/qr_code_matrix.txt
35
+ ```
36
+
37
+ If the input is too corrupted, bruteforce strength can be increased, but beware of the exponential
38
+ runtime increase, especially for higher versions:
39
+
40
+ ```bash
41
+ partial-qr --input path/to/qr_code_matrix.txt --max-brute-bits 24
42
+ ```
43
+
44
+ Additional hints can be provided to the bruteforce to speed it up:
45
+
46
+ `--ascii` - Assume the data is ASCII, which reduces the search space significantly.
47
+
48
+ `--url` - Assume the data is a URL, so it must start with either `http://` or `https://`.
49
+
50
+ `--parallel` - Enable parallel bruteforce using all available CPU cores.
@@ -0,0 +1,24 @@
1
+ # Partial QR Code Decoder
2
+
3
+ An aggressive QR code decoder that can recover data from partially missing or damaged QR codes.
4
+
5
+ ## Usage
6
+
7
+ ```bash
8
+ partial-qr --input path/to/qr_code_matrix.txt
9
+ ```
10
+
11
+ If the input is too corrupted, bruteforce strength can be increased, but beware of the exponential
12
+ runtime increase, especially for higher versions:
13
+
14
+ ```bash
15
+ partial-qr --input path/to/qr_code_matrix.txt --max-brute-bits 24
16
+ ```
17
+
18
+ Additional hints can be provided to the bruteforce to speed it up:
19
+
20
+ `--ascii` - Assume the data is ASCII, which reduces the search space significantly.
21
+
22
+ `--url` - Assume the data is a URL, so it must start with either `http://` or `https://`.
23
+
24
+ `--parallel` - Enable parallel bruteforce using all available CPU cores.
File without changes
File without changes
@@ -0,0 +1,149 @@
1
+ import click
2
+
3
+ from .decoders.data.decode import decode_qr_data, matrix_to_image, reconstruct_matrix
4
+ from .decoders.formatinfo import decode_format_version_information
5
+ from .decoders.mask import apply_mask
6
+ from .decoders.version import decode_version_information
7
+ from .matrix import load_matrix_from_file
8
+
9
+
10
+ @click.command()
11
+ @click.option(
12
+ "-i",
13
+ "--input-file",
14
+ type=click.Path(exists=True),
15
+ required=True,
16
+ help="Path to the input data file.",
17
+ )
18
+ @click.option(
19
+ "--ascii",
20
+ "assume_ascii",
21
+ is_flag=True,
22
+ default=False,
23
+ help="Assume payload is printable ASCII text (byte mode, MSB=0 per byte).",
24
+ )
25
+ @click.option(
26
+ "--url",
27
+ "assume_url",
28
+ is_flag=True,
29
+ default=False,
30
+ help="Assume data starts with http:// or https:// (implies --ascii).",
31
+ )
32
+ @click.option(
33
+ "--max-brute-bits",
34
+ type=int,
35
+ default=10,
36
+ show_default=True,
37
+ help="Max unknown bits to brute-force per RS block (2^N combinations). Higher values take exponentially longer.",
38
+ )
39
+ @click.option(
40
+ "--parallel",
41
+ is_flag=True,
42
+ default=False,
43
+ help="Distribute brute-force work across all CPU cores (useful with large --max-brute-bits).",
44
+ )
45
+ @click.option(
46
+ "--reconstruct",
47
+ type=click.Path(),
48
+ default=None,
49
+ help="Save the fully reconstructed QR code as an image to the given path.",
50
+ )
51
+ @click.option(
52
+ "--transpose-reconstruct",
53
+ is_flag=True,
54
+ default=False,
55
+ help="Transpose the reconstructed QR code before saving (to fix reconstruction when input is column-major).",
56
+ )
57
+ def main(
58
+ input_file: str,
59
+ assume_ascii: bool,
60
+ assume_url: bool,
61
+ max_brute_bits: int,
62
+ parallel: bool,
63
+ reconstruct: str | None,
64
+ transpose_reconstruct: bool,
65
+ ) -> None:
66
+ matrix = load_matrix_from_file(input_file)
67
+ format_info = decode_format_version_information(matrix)
68
+ version_info = decode_version_information(matrix)
69
+
70
+ print("Decoded format information:")
71
+ print(f" Error correction level: {format_info.error_correction_level}")
72
+ print(f" Mask pattern: {format_info.mask_pattern}")
73
+ print(f"Decoded version information: {version_info.version if version_info else 'Unknown'}")
74
+
75
+ if format_info.mask_pattern is None:
76
+ print("Cannot decode format information, stopping here.")
77
+ return
78
+
79
+ flipped = apply_mask(matrix, format_info.mask_pattern, version_info.version)
80
+
81
+ if format_info.error_correction_level is None:
82
+ print("Cannot decode error correction level, so not applying error correction decoding.")
83
+ print("Partially decoded (masked) matrix:")
84
+ print(flipped)
85
+ return
86
+
87
+ result = decode_qr_data(
88
+ flipped,
89
+ version_info.version,
90
+ format_info.error_correction_level,
91
+ assume_ascii=assume_ascii,
92
+ assume_url=assume_url,
93
+ max_brute_bits=max_brute_bits,
94
+ parallel=parallel,
95
+ )
96
+
97
+ print("\nData decoding:")
98
+ print(f" Data codewords: {result.known_data_codewords}/{result.total_data_codewords} known")
99
+ print(f" EC codewords: {result.known_ec_codewords}/{result.total_ec_codewords} known")
100
+ print(f" RS blocks corrected: {result.blocks_corrected}/{result.block_info.total_blocks}")
101
+ print(f" EC capacity per block: {result.block_info.ec_codewords_per_block} erasures")
102
+
103
+ if result.constraint_stats:
104
+ cs = result.constraint_stats
105
+ parts = []
106
+ if cs.get("prefix"):
107
+ parts.append(f"URL prefix \"{cs['prefix']}\"")
108
+ if cs.get("bits_set"):
109
+ parts.append(f"{cs['bits_set']} bits set")
110
+ if cs.get("codewords_resolved"):
111
+ parts.append(f"{cs['codewords_resolved']} codewords resolved")
112
+ if cs.get("padding_bits_set"):
113
+ parts.append(f"{cs['padding_bits_set']} padding bits set")
114
+ if cs.get("padding_codewords_resolved"):
115
+ parts.append(f"{cs['padding_codewords_resolved']} padding codewords resolved")
116
+ if parts:
117
+ print(f" Constraints: {', '.join(parts)}")
118
+
119
+ if result.decoded.segments:
120
+ print("\nDecoded segments:")
121
+ for seg in result.decoded.segments:
122
+ print(f" [{seg.mode_name}] count={seg.char_count}: {repr(seg.data)}")
123
+ print(f"\nRecovered text: {result.decoded.raw_text}")
124
+
125
+ if reconstruct:
126
+ recovered = reconstruct_matrix(matrix, result, format_info.mask_pattern)
127
+ if transpose_reconstruct:
128
+ recovered = recovered.transpose()
129
+ matrix_to_image(recovered, reconstruct)
130
+ print(f"\nReconstructed QR code saved to: {reconstruct}")
131
+
132
+ if not result.decoded.segments:
133
+ total_erasures_per_block = (
134
+ result.total_data_codewords
135
+ + result.total_ec_codewords
136
+ - result.known_data_codewords
137
+ - result.known_ec_codewords
138
+ ) // result.block_info.total_blocks
139
+ print("\nNo data segments could be decoded.")
140
+ print(f" Average erasures per block: ~{total_erasures_per_block}")
141
+ print(
142
+ f" RS can correct up to {result.block_info.ec_codewords_per_block} erasures per block."
143
+ )
144
+ if total_erasures_per_block > result.block_info.ec_codewords_per_block:
145
+ print(f" (Too many unknowns for RS correction — need more of the QR code)")
146
+
147
+
148
+ if __name__ == "__main__":
149
+ main()
File without changes
File without changes
@@ -0,0 +1,270 @@
1
+ """Content-based constraints for narrowing partial QR code data.
2
+
3
+ When we know (or assume) something about the QR code's content, we can
4
+ set additional data bits and reduce the erasure count for Reed-Solomon.
5
+
6
+ Supported constraints:
7
+ - ASCII: assume all payload bytes are printable ASCII (0x20-0x7E).
8
+ Sets MSB=0 for each payload byte (since printable ASCII < 0x80).
9
+ - URL prefix: assume the data starts with "http://" or "https://".
10
+ Sets exact bit values for the prefix bytes.
11
+ - Padding: after all data segments end, QR codes fill remaining data
12
+ codewords with a deterministic padding sequence: 4-bit terminator
13
+ (0000), byte alignment (0s), then alternating 0xEC and 0x11.
14
+ This constraint is always applied when segment headers are readable.
15
+
16
+ ASCII and URL constraints assume byte mode (mode indicator 0100).
17
+ """
18
+
19
+ from typing import Optional
20
+
21
+ from .data import PartialCodeword, bits_to_partial_codewords
22
+ from .stream import CHAR_COUNT_BITS
23
+
24
+
25
+ def _header_bit_count(version: int, mode: int = 0b0100) -> int:
26
+ """Total header bits (mode indicator + character count) for byte mode."""
27
+ if version <= 9:
28
+ vg = 0
29
+ elif version <= 26:
30
+ vg = 1
31
+ else:
32
+ vg = 2
33
+ return 4 + CHAR_COUNT_BITS[mode][vg]
34
+
35
+
36
+ def apply_content_constraints(
37
+ data_partial_cws: list[PartialCodeword],
38
+ version: int,
39
+ assume_ascii: bool = False,
40
+ url_prefix: str | None = None,
41
+ ) -> tuple[list[PartialCodeword], dict[str, object]]:
42
+ """Apply content constraints to data partial codewords.
43
+
44
+ The input should be the concatenated data partial codewords in
45
+ data-stream order (block 0 data, block 1 data, etc.).
46
+
47
+ Returns (constrained_codewords, stats).
48
+ stats["conflict"] is set if a constraint contradicts a known bit
49
+ (meaning the assumption is wrong); the original codewords are returned.
50
+ """
51
+ if not assume_ascii and url_prefix is None:
52
+ return list(data_partial_cws), {"bits_set": 0}
53
+
54
+ bits: list[Optional[int]] = []
55
+ for pcw in data_partial_cws:
56
+ bits.extend(pcw.known_bits)
57
+
58
+ original_known = sum(1 for b in bits if b is not None)
59
+ header_len = _header_bit_count(version)
60
+
61
+ def try_set(idx: int, val: int) -> bool:
62
+ """Set a bit if unknown. Returns False on conflict."""
63
+ if idx >= len(bits):
64
+ return True
65
+ if bits[idx] is None:
66
+ bits[idx] = val
67
+ return True
68
+ return bits[idx] == val
69
+
70
+ # Mode indicator = 0100 (byte mode)
71
+ for i, b in enumerate([0, 1, 0, 0]):
72
+ if not try_set(i, b):
73
+ return list(data_partial_cws), {"bits_set": 0, "conflict": "mode"}
74
+
75
+ if url_prefix is not None:
76
+ for char_idx, byte_val in enumerate(url_prefix.encode("ascii")):
77
+ for bit_pos in range(8):
78
+ flat_idx = header_len + char_idx * 8 + bit_pos
79
+ bit_val = (byte_val >> (7 - bit_pos)) & 1
80
+ if not try_set(flat_idx, bit_val):
81
+ return list(data_partial_cws), {
82
+ "bits_set": 0,
83
+ "conflict": f"prefix_byte_{char_idx}",
84
+ }
85
+
86
+ # ASCII: MSB=0 for each payload byte
87
+ if assume_ascii or url_prefix is not None:
88
+ start_byte = len(url_prefix) if url_prefix else 0
89
+ char_count = _read_known_bits(bits, 4, header_len - 4)
90
+ if char_count is not None:
91
+ max_payload_bytes = char_count
92
+ else:
93
+ max_payload_bytes = (len(bits) - header_len) // 8
94
+ for byte_idx in range(start_byte, max_payload_bytes):
95
+ try_set(header_len + byte_idx * 8, 0)
96
+
97
+ result = bits_to_partial_codewords(bits)
98
+
99
+ new_known = sum(1 for b in bits if b is not None)
100
+ orig_resolved = sum(1 for p in data_partial_cws if p.is_fully_known)
101
+ new_resolved = sum(1 for p in result if p.is_fully_known)
102
+
103
+ stats: dict[str, object] = {
104
+ "bits_set": new_known - original_known,
105
+ "codewords_resolved": new_resolved - orig_resolved,
106
+ }
107
+ if url_prefix:
108
+ stats["prefix"] = url_prefix
109
+
110
+ return result, stats
111
+
112
+
113
+ def _compute_segment_data_bits(mode: int, char_count: int) -> int | None:
114
+ """Return number of data bits for a segment with given mode and char count."""
115
+ if mode == 0b0001: # Numeric
116
+ full_groups, remainder = divmod(char_count, 3)
117
+ data_bits = full_groups * 10
118
+ if remainder == 2:
119
+ data_bits += 7
120
+ elif remainder == 1:
121
+ data_bits += 4
122
+ return data_bits
123
+ elif mode == 0b0010: # Alphanumeric
124
+ full_pairs, remainder = divmod(char_count, 2)
125
+ return full_pairs * 11 + remainder * 6
126
+ elif mode == 0b0100: # Byte
127
+ return char_count * 8
128
+ elif mode == 0b1000: # Kanji
129
+ return char_count * 13
130
+ return None
131
+
132
+
133
+ def _read_known_bits(bits: list[Optional[int]], pos: int, count: int) -> int | None:
134
+ """Read `count` bits from position `pos`. Returns None if any bit is unknown."""
135
+ if pos + count > len(bits):
136
+ return None
137
+ value = 0
138
+ for i in range(count):
139
+ b = bits[pos + i]
140
+ if b is None:
141
+ return None
142
+ value = (value << 1) | b
143
+ return value
144
+
145
+
146
+ def apply_padding_constraints(
147
+ data_partial_cws: list[PartialCodeword],
148
+ version: int,
149
+ ) -> tuple[list[PartialCodeword], dict[str, object]]:
150
+ """Apply QR padding constraints to data partial codewords.
151
+
152
+ After all encoded data segments end, QR codes use a deterministic
153
+ padding sequence:
154
+ 1. Terminator: up to 4 zero bits (0000)
155
+ 2. Byte alignment: zero bits to reach the next byte boundary
156
+ 3. Pad bytes: alternating 0xEC (11101100) and 0x11 (00010001)
157
+
158
+ This function parses known segment headers (mode indicator +
159
+ character count) to determine where padding starts, then fills
160
+ in the known padding bits.
161
+
162
+ Returns (constrained_codewords, stats).
163
+ """
164
+ bits: list[Optional[int]] = []
165
+ for pcw in data_partial_cws:
166
+ bits.extend(pcw.known_bits)
167
+
168
+ total_bits = len(bits)
169
+ original_known = sum(1 for b in bits if b is not None)
170
+
171
+ if version <= 9:
172
+ vg = 0
173
+ elif version <= 26:
174
+ vg = 1
175
+ else:
176
+ vg = 2
177
+
178
+ # Parse segment headers to find where data ends
179
+ pos = 0
180
+ parsed_segments = 0
181
+ while pos + 4 <= total_bits:
182
+ mode_val = _read_known_bits(bits, pos, 4)
183
+ if mode_val is None:
184
+ if parsed_segments == 0:
185
+ return list(data_partial_cws), {"padding_bits_set": 0}
186
+ # We've parsed at least one segment. The unknown mode bits
187
+ # might be a terminator (0000) or another segment. Optimistically
188
+ # try setting them to 0000 — if any conflict, we can't determine
189
+ # padding, but if they accept zeros that's consistent with a
190
+ # terminator.
191
+ can_terminate = True
192
+ for i in range(4):
193
+ if pos + i < total_bits:
194
+ b = bits[pos + i]
195
+ if b is not None and b != 0:
196
+ can_terminate = False
197
+ break
198
+ if can_terminate:
199
+ # Treat as terminator — break out so we start padding at pos
200
+ break
201
+ return list(data_partial_cws), {"padding_bits_set": 0}
202
+
203
+ if mode_val == 0b0000:
204
+ break
205
+
206
+ if mode_val not in CHAR_COUNT_BITS:
207
+ return list(data_partial_cws), {"padding_bits_set": 0}
208
+
209
+ cc_bit_len = CHAR_COUNT_BITS[mode_val][vg]
210
+ char_count = _read_known_bits(bits, pos + 4, cc_bit_len)
211
+ if char_count is None:
212
+ return list(data_partial_cws), {"padding_bits_set": 0}
213
+
214
+ data_bits = _compute_segment_data_bits(mode_val, char_count)
215
+ if data_bits is None:
216
+ return list(data_partial_cws), {"padding_bits_set": 0}
217
+
218
+ pos = pos + 4 + cc_bit_len + data_bits
219
+ parsed_segments += 1
220
+
221
+ if pos >= total_bits:
222
+ return list(data_partial_cws), {"padding_bits_set": 0}
223
+
224
+ def try_set(idx: int, val: int) -> bool:
225
+ if idx >= total_bits:
226
+ return True
227
+ if bits[idx] is None:
228
+ bits[idx] = val
229
+ return True
230
+ return bits[idx] == val
231
+
232
+ # 1. Terminator: up to 4 zero bits
233
+ terminator_len = min(4, total_bits - pos)
234
+ for i in range(terminator_len):
235
+ if not try_set(pos + i, 0):
236
+ return list(data_partial_cws), {"padding_bits_set": 0, "conflict": "terminator"}
237
+ pos += terminator_len
238
+
239
+ # 2. Byte alignment: zeros to next byte boundary
240
+ byte_boundary = ((pos + 7) // 8) * 8
241
+ while pos < byte_boundary and pos < total_bits:
242
+ if not try_set(pos, 0):
243
+ return list(data_partial_cws), {"padding_bits_set": 0, "conflict": "byte_align"}
244
+ pos += 1
245
+
246
+ # 3. Padding bytes: alternating 0xEC and 0x11
247
+ PAD_BYTES = [0xEC, 0x11]
248
+ pad_idx = 0
249
+ while pos + 8 <= total_bits:
250
+ pad_val = PAD_BYTES[pad_idx % 2]
251
+ for bit_pos in range(8):
252
+ bit_val = (pad_val >> (7 - bit_pos)) & 1
253
+ if not try_set(pos + bit_pos, bit_val):
254
+ return list(data_partial_cws), {
255
+ "padding_bits_set": 0,
256
+ "conflict": f"pad_byte_{pad_idx}",
257
+ }
258
+ pos += 8
259
+ pad_idx += 1
260
+
261
+ result = bits_to_partial_codewords(bits)
262
+
263
+ new_known = sum(1 for b in bits if b is not None)
264
+ orig_resolved = sum(1 for p in data_partial_cws if p.is_fully_known)
265
+ new_resolved = sum(1 for p in result if p.is_fully_known)
266
+
267
+ return result, {
268
+ "padding_bits_set": new_known - original_known,
269
+ "padding_codewords_resolved": new_resolved - orig_resolved,
270
+ }