py-lzstring 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ Copyright 2026 Frank Hoffmann
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
8
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,155 @@
1
+ Metadata-Version: 2.4
2
+ Name: py-lzstring
3
+ Version: 0.1.0
4
+ Summary: LZ-based string compression - Python port of lz-string@1.5.0 (JavaScript)
5
+ Author-email: "Original Code: Pieroxy pieroxy@pieroxy.net, Port: Frank Hoffmann" <frank.h.dev@protonmail.com>
6
+ License: Copyright 2026 Frank Hoffmann
7
+
8
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11
+
12
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
13
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
14
+ Project-URL: Homepage, https://github.com/Frank-Hoffmann-Dev/py-lzstring
15
+ Project-URL: Bug Tracker, https://github.com/Frank-Hoffmann-Dev/py-lzstring/issues
16
+ Keywords: compression,lz-string,lzstring,lz78,lzw
17
+ Classifier: Development Status :: 5 - Production/Stable
18
+ Classifier: Intended Audience :: Developers
19
+ Classifier: License :: OSI Approved :: MIT License
20
+ Classifier: Programming Language :: Python :: 3
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Programming Language :: Python :: 3.13
25
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
26
+ Classifier: Topic :: System :: Archiving :: Compression
27
+ Classifier: Typing :: Typed
28
+ Requires-Python: >=3.10
29
+ Description-Content-Type: text/markdown
30
+ License-File: LICENSE
31
+ Provides-Extra: dev
32
+ Requires-Dist: pytest>=8; extra == "dev"
33
+ Requires-Dist: pytest-benchmark>=4; extra == "dev"
34
+ Dynamic: license-file
35
+
36
+ # py-lzstring
37
+
38
+ Python port of [lz-string](https://github.com/pieroxy/lz-string) - byte-for-byte compatible with lz-string@1.5.0.
39
+
40
+
41
+ ## Installation
42
+ ```bash
43
+ pip install py-lzstring
44
+ ```
45
+
46
+ No runtime dependencies. Requires Python 3.10+.
47
+
48
+
49
+ ## Quickstart
50
+
51
+ ```python
52
+ import lzstring
53
+
54
+ # Compress to base64 (safe for HTTP, JSON, localStorage);
55
+ compressed = lzstring.compress_to_base64("Hello, World!")
56
+ print(compressed) # 'BIUwNmD2A0AEDqkBOYAmBCIA';
57
+
58
+ original = lzstring.decompress_from_base64(compressed)
59
+ print(original) # 'Hello, World!';
60
+ ```
61
+
62
+
63
+ ## Encodings
64
+
65
+ | Function pair | bits/char | Use case |
66
+ |---|---|---|
67
+ | `compress` / `decompress` | 16 | In-memory; smallest output |
68
+ | `compress_to_utf16` / `decompress_from_utf16` | 15 | localStorage (all browsers) |
69
+ | `compress_to_base64` / `decompress_from_base64` | 6 | HTTP, JSON, data-URLs |
70
+ | `compress_to_encoded_uri_component` / `decompress_from_encoded_uri_component` | 6 | URL query strings |
71
+ | `compress_to_uint8array` / `decompress_from_uint8array` | — | Binary I/O, `bytes` output |
72
+
73
+ ## Interoperability with JavaScript
74
+
75
+ All encodings are bit-exact with lz-string@1.5.0:
76
+
77
+ ```js
78
+ // JavaScript
79
+ const LZString = require("lz-string");
80
+ const compressed = LZString.compressToBase64("Hello, World!");
81
+ // "BIUwNmD2A0AEDqkBOYAmBCIA"
82
+ ```
83
+
84
+ ```python
85
+ # Python
86
+ import lzstring
87
+ lzstring.decompress_from_base64("BIUwNmD2A0AEDqkBOYAmBCIA")
88
+ # "Hello, World!"
89
+ ```
90
+
91
+ ## API reference
92
+
93
+ All compress functions accept `str | None` and return `str` (or `bytes` for the
94
+ Uint8Array variant). Passing `None` mirrors the JavaScript null-handling and
95
+ returns `""`.
96
+
97
+ All decompress functions accept `str | None` (or `bytes | None` for Uint8Array)
98
+ and return `str | None`. An empty string returns `None` (like JS `null`),
99
+ indicating invalid input.
100
+
101
+ ```python
102
+ import lzstring
103
+
104
+ # Raw (most compact, arbitrary Unicode output)
105
+ lzstring.compress("...")
106
+ lzstring.decompress("...")
107
+
108
+ # UTF-16 (printable characters only, safe for all localStorage implementations)
109
+ lzstring.compress_to_utf16("...")
110
+ lzstring.decompress_from_utf16("...")
111
+
112
+ # Base64 (standard alphabet with = padding)
113
+ lzstring.compress_to_base64("...")
114
+ lzstring.decompress_from_base64("...")
115
+
116
+ # URI component (URL-safe, no padding)
117
+ lzstring.compress_to_encoded_uri_component("...")
118
+ lzstring.decompress_from_encoded_uri_component("...")
119
+
120
+ # Uint8Array (returns / accepts bytes)
121
+ lzstring.compress_to_uint8array("...") # → bytes
122
+ lzstring.decompress_from_uint8array(b"...") # → str | None
123
+ ```
124
+
125
+
126
+ ## License
127
+
128
+ MIT License
129
+ ```
130
+ Copyright 2026 Frank Hoffmann
131
+
132
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
133
+
134
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
135
+
136
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
137
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
138
+ ```
139
+
140
+
141
+ ## Thanks
142
+
143
+ Many thanks to all the developers of the libraries used and to the community for creating so many incredibly useful tools.
144
+
145
+
146
+ ## AI Usage
147
+
148
+ I used the AI assistant **Anthropic Claude AI - Sonnet 4.6** to create this tool.
149
+
150
+ As a computer scientist, I have reviewed and approved every single line of code, and I understand the tool’s internal processes and how it works.
151
+ I didn’t just copy and paste the code from the AI.
152
+ Instead, I wrote it by hand, line by line, making changes whenever I deemed it necessary.
153
+
154
+ Nevertheless, there may still be errors or poor design choices.
155
+ Everyone is free to examine, modify, improve, fork the code or call it AI slop :D
@@ -0,0 +1,120 @@
1
+ # py-lzstring
2
+
3
+ Python port of [lz-string](https://github.com/pieroxy/lz-string) - byte-for-byte compatible with lz-string@1.5.0.
4
+
5
+
6
+ ## Installation
7
+ ```bash
8
+ pip install py-lzstring
9
+ ```
10
+
11
+ No runtime dependencies. Requires Python 3.10+.
12
+
13
+
14
+ ## Quickstart
15
+
16
+ ```python
17
+ import lzstring
18
+
19
+ # Compress to base64 (safe for HTTP, JSON, localStorage);
20
+ compressed = lzstring.compress_to_base64("Hello, World!")
21
+ print(compressed) # 'BIUwNmD2A0AEDqkBOYAmBCIA';
22
+
23
+ original = lzstring.decompress_from_base64(compressed)
24
+ print(original) # 'Hello, World!';
25
+ ```
26
+
27
+
28
+ ## Encodings
29
+
30
+ | Function pair | bits/char | Use case |
31
+ |---|---|---|
32
+ | `compress` / `decompress` | 16 | In-memory; smallest output |
33
+ | `compress_to_utf16` / `decompress_from_utf16` | 15 | localStorage (all browsers) |
34
+ | `compress_to_base64` / `decompress_from_base64` | 6 | HTTP, JSON, data-URLs |
35
+ | `compress_to_encoded_uri_component` / `decompress_from_encoded_uri_component` | 6 | URL query strings |
36
+ | `compress_to_uint8array` / `decompress_from_uint8array` | — | Binary I/O, `bytes` output |
37
+
38
+ ## Interoperability with JavaScript
39
+
40
+ All encodings are bit-exact with lz-string@1.5.0:
41
+
42
+ ```js
43
+ // JavaScript
44
+ const LZString = require("lz-string");
45
+ const compressed = LZString.compressToBase64("Hello, World!");
46
+ // "BIUwNmD2A0AEDqkBOYAmBCIA"
47
+ ```
48
+
49
+ ```python
50
+ # Python
51
+ import lzstring
52
+ lzstring.decompress_from_base64("BIUwNmD2A0AEDqkBOYAmBCIA")
53
+ # "Hello, World!"
54
+ ```
55
+
56
+ ## API reference
57
+
58
+ All compress functions accept `str | None` and return `str` (or `bytes` for the
59
+ Uint8Array variant). Passing `None` mirrors the JavaScript null-handling and
60
+ returns `""`.
61
+
62
+ All decompress functions accept `str | None` (or `bytes | None` for Uint8Array)
63
+ and return `str | None`. An empty string returns `None` (like JS `null`),
64
+ indicating invalid input.
65
+
66
+ ```python
67
+ import lzstring
68
+
69
+ # Raw (most compact, arbitrary Unicode output)
70
+ lzstring.compress("...")
71
+ lzstring.decompress("...")
72
+
73
+ # UTF-16 (printable characters only, safe for all localStorage implementations)
74
+ lzstring.compress_to_utf16("...")
75
+ lzstring.decompress_from_utf16("...")
76
+
77
+ # Base64 (standard alphabet with = padding)
78
+ lzstring.compress_to_base64("...")
79
+ lzstring.decompress_from_base64("...")
80
+
81
+ # URI component (URL-safe, no padding)
82
+ lzstring.compress_to_encoded_uri_component("...")
83
+ lzstring.decompress_from_encoded_uri_component("...")
84
+
85
+ # Uint8Array (returns / accepts bytes)
86
+ lzstring.compress_to_uint8array("...") # → bytes
87
+ lzstring.decompress_from_uint8array(b"...") # → str | None
88
+ ```
89
+
90
+
91
+ ## License
92
+
93
+ MIT License
94
+ ```
95
+ Copyright 2026 Frank Hoffmann
96
+
97
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
98
+
99
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
100
+
101
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
102
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
103
+ ```
104
+
105
+
106
+ ## Thanks
107
+
108
+ Many thanks to all the developers of the libraries used and to the community for creating so many incredibly useful tools.
109
+
110
+
111
+ ## AI Usage
112
+
113
+ I used the AI assistant **Anthropic Claude AI - Sonnet 4.6** to create this tool.
114
+
115
+ As a computer scientist, I have reviewed and approved every single line of code, and I understand the tool’s internal processes and how it works.
116
+ I didn’t just copy and paste the code from the AI.
117
+ Instead, I wrote it by hand, line by line, making changes whenever I deemed it necessary.
118
+
119
+ Nevertheless, there may still be errors or poor design choices.
120
+ Everyone is free to examine, modify, improve, fork the code or call it AI slop :D
@@ -0,0 +1,59 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "py-lzstring"
7
+ version = "0.1.0"
8
+ description = "LZ-based string compression - Python port of lz-string@1.5.0 (JavaScript)"
9
+ readme = "README.md"
10
+ license = { file = "LICENSE" }
11
+ requires-python = ">=3.10"
12
+ authors = [
13
+ { name = "Original Code: Pieroxy pieroxy@pieroxy.net, Port: Frank Hoffmann", email = "frank.h.dev@protonmail.com" }
14
+ ]
15
+ keywords = ["compression", "lz-string", "lzstring", "lz78", "lzw"]
16
+ classifiers = [
17
+ "Development Status :: 5 - Production/Stable",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.10",
22
+ "Programming Language :: Python :: 3.11",
23
+ "Programming Language :: Python :: 3.12",
24
+ "Programming Language :: Python :: 3.13",
25
+ "Topic :: Software Development :: Libraries :: Python Modules",
26
+ "Topic :: System :: Archiving :: Compression",
27
+ "Typing :: Typed"
28
+ ]
29
+
30
+ dependencies = [] # Pure stdlib - no runtime dependencies;
31
+
32
+ [project.optional-dependencies]
33
+ dev = [
34
+ "pytest>=8",
35
+ "pytest-benchmark>=4"
36
+ ]
37
+
38
+ [project.scripts]
39
+ lz_string = "lz_string.main:main"
40
+
41
+ [project.urls]
42
+ Homepage = "https://github.com/Frank-Hoffmann-Dev/py-lzstring"
43
+ "Bug Tracker" = "https://github.com/Frank-Hoffmann-Dev/py-lzstring/issues"
44
+
45
+ [tool.setuptools.packages.find]
46
+ where = ["src"]
47
+
48
+ [tool.pytest.ini_options]
49
+ testpaths = ["tests"]
50
+ addopts = "-v --tb=short"
51
+
52
+ [tool.ruff]
53
+ line-length = 99
54
+ target-version = "py310"
55
+
56
+ [tool.mypy]
57
+ python_version = "3.10"
58
+ strict = true
59
+ files = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,47 @@
1
+ """
2
+ py-lzstring - Python port of lz-string (https://github.com/pieroxy/lz-string).
3
+
4
+ LZ-based string compression compatible with the JavaScript library v1.5.0.
5
+ Five encoding variants are available for different transport or storage needs.
6
+
7
+ Quick Start:
8
+ >>> import lzstring
9
+ >>> compressed = lzstring.compress_to_base64("Hello, World!")
10
+ >>> lzstring.decompress_from_base64(compressed)
11
+ 'Hello, World!'
12
+
13
+ All functions accept 'str | None' and return 'str | None' or 'bytes'.
14
+ Passing 'None' mirror the JavaScript library's null-handling behaviour (compress and decompress functions return '""').
15
+
16
+
17
+ Encoding Overview:
18
+
19
+ Function pair bits/char Best suited for
20
+ --------------------------------------------------------------------------- --------- -------------------------
21
+ compress / decompress 16 In-memory; smallest output
22
+ compress_to_utf16 / decompress_from_utf16 15 localStorage (all browsers)
23
+ compress_to_base64 / decompress_from_base64 6 HTTP, JSON, data-URLs
24
+ compress_to_encoded_uri_component / decompress_from_encoded_uri_component 6 URL query strings
25
+ compress_to_uint8array / decompress_from_uint8array 8 (bytes) Binary I/O, Node-style
26
+
27
+ Compability:
28
+ All functions are byte-for-byte compatible with lz-string@1.5.0 and verified against its JavaScript test suits.
29
+ """
30
+ from lzstring._encodings import (
31
+ compress, decompress,
32
+ compress_to_utf16, decompress_from_utf16,
33
+ compress_to_base64, decompress_from_base64,
34
+ compress_to_encoded_uri_component, decompress_from_encoded_uri_component,
35
+ compress_to_uint8array, decompress_from_uint8array
36
+ )
37
+
38
+ __version__ = "0.1.0"
39
+
40
+ __all__ = [
41
+ "__version__",
42
+ "compress", "decompress",
43
+ "compress_to_utf16", "decompress_from_utf16",
44
+ "compress_to_base64", "decompress_from_base64",
45
+ "compress_to_encoded_uri_component", "decompress_from_encoded_uri_component",
46
+ "compress_to_uint8array", "decompress_from_uint8array"
47
+ ]
@@ -0,0 +1,196 @@
1
+ """
2
+ Bit-level I/O primitives for the LZString compression algorithm.
3
+
4
+ The LZString format packs variable-width token codes (starting at 2 bits, growing as the directory expands) into a stream of fixed-width 'characters'.
5
+ Each output character holds exactly 'bits_per_char' bits, where bits_per_char is determined by the encoding:
6
+
7
+ - Raw / UTF-16 -> 15 bits per character
8
+ - Base64 / URI -> 6 bits per character
9
+ - Uint8Array -> 8 bits per character (handled at a higher layer)
10
+
11
+ Bit order within every character is LSB-first: the first token bit lands in bit-0 of the accumulator, the next in bit-1, and so on.
12
+ Once the accumulator is full (position reaches bits_per_char) it's integer value is handed to a caller-supplied 'emit(value: int) -> None' callback, then it resets to 0.
13
+
14
+ This LSB-first, fixed-width-character packing is the exact scheme used by the original JavaScript implementation and all compatible ports.
15
+ """
16
+ from __future__ import annotations
17
+
18
+ from typing import Callable
19
+
20
+
21
+ # ----------------------------------------------------------------------------------------------------------------
22
+ # BitWriter;
23
+ # ----------------------------------------------------------------------------------------------------------------
24
+ class BitWriter:
25
+ """
26
+ Accumulate individual bits and flush them as fixed-width characters.
27
+
28
+ :param bits_per_char: Number of bits that make up a single output character (e.g. 15 for the raw/UTF-16 encoding, 6 for base64/URI).
29
+ :param emit: Callback invoked with the integer value of each completed character.
30
+ The caller is responsible for mapping that integer to the appropriate output character (e.g. 'chr(value)') for raw, or a lookup into a base64 alphabet.
31
+ :return: None
32
+
33
+ Usage:
34
+ >>> chunks: list[int] = []
35
+ >>> w = BitWriter(bits_per_char=6, emit=chunks.append)
36
+ >>> w.write_bits(value=0b10110, n_bits=5)
37
+ >>> w.flush()
38
+ """
39
+ __slots__ = ("_bits_per_char", "_emit", "_val", "_position")
40
+
41
+ def __init__(self, bits_per_char: int, emit: Callable[[int], None]) -> None:
42
+ if bits_per_char < 1:
43
+ raise ValueError(f"bits_per_char must be >= 1, got {bits_per_char}")
44
+
45
+ self._bits_per_char = bits_per_char
46
+ self._emit = emit
47
+ self._val: int = 0 # Accumulator;
48
+ self._position: int = 0 # How many bits are currently in _val;
49
+
50
+
51
+ # ----------------------------------------------------------------------------------------------------------------
52
+ # Public Interface;
53
+ # ----------------------------------------------------------------------------------------------------------------
54
+ def write_bits(self, value: int, n_bits: int) -> None:
55
+ """
56
+ Write the lowest *n_bits* bits of *value* into the stream, LSB first.
57
+
58
+ Mirrors the inner loop found in every LZString compress implementation:
59
+ for i in range(n_bits):
60
+ val = (val << 1) | (value & 1) # Push LSB into accumulator
61
+ value >>= 1
62
+
63
+ :param value: The integer whose lowest 'n_bits' bits will be written.
64
+ :param n_bits: Number of bits to write (must be >= 0).
65
+ """
66
+ for _ in range(n_bits):
67
+ # Shift the accumulator left and OR in the current LSB;
68
+ self._val = (self._val << 1) | (value & 1)
69
+ value >>= 1
70
+ self._position += 1
71
+
72
+ if self._position == self._bits_per_char:
73
+ self._flush_char()
74
+
75
+
76
+ def flush(self) -> None:
77
+ """
78
+ Flush any remaining bits as a final (zero-padded) character.
79
+
80
+ The original JS flush loop checks 'position == bitsPerChar - 1' (not '== bitsPerChar'),
81
+ so it always emits exactly one character, even when the accumulator is completely empty.
82
+ This garantees that every compress stream ends with a sentinel character, which the
83
+ decompressor can safely peek at without an out-of-bounds read.
84
+
85
+ This must be called exactly once after all tokens have been written.
86
+ """
87
+ while True:
88
+ self._val <<= 1
89
+
90
+ if self._position == self._bits_per_char - 1:
91
+ self._flush_char()
92
+ break
93
+
94
+ self._position += 1
95
+
96
+
97
+
98
+ # ----------------------------------------------------------------------------------------------------------------
99
+ # Internal Helper Functions;
100
+ # ----------------------------------------------------------------------------------------------------------------
101
+ def _flush_char(self) -> None:
102
+ self._emit(self._val)
103
+ self._val = 0
104
+ self._position = 0
105
+
106
+
107
+
108
+ # ----------------------------------------------------------------------------------------------------------------
109
+ # BitReader;
110
+ # ----------------------------------------------------------------------------------------------------------------
111
+ class BitReader:
112
+ """
113
+ Read individual bits from a sequence of fixed-width characters.
114
+
115
+ :param get_char_value: Callable that accepts a 0-based *index* and returns the integer value of the character at that position in the compressed input.
116
+ For the raw encoding this is simply 'ord(compressed[index])'; for base64 it would be a reverse-alphabet lookup.
117
+ :param reset_value: The integer whose single set bit marks the *start* position within the first character.
118
+ For 'bits_per_char = N' this is '1 << (N - 1)'.
119
+ :return: None
120
+
121
+ Examples:
122
+ - Raw (15 bit): reset_value = 16384 (0x4000, i.e. 1 << 14)
123
+ - Base64 (6 bit): reset_value = 32 (0x20, i.e. 1 << 5)
124
+ - UTF-16 (15 bit): reset_value = 16384
125
+
126
+ The decompressor checks the *current bit* by ANDing 'data_val' with 'data_position', then shifts 'data_position' right by one.
127
+ When 'data_position' reaches 0, the next character is loaded and 'data_position' is reset to 'reset_value'.
128
+
129
+ Usage:
130
+ >>> data = [0b101101] # One 6-bit char;
131
+ >>> r = BitReader(lambda i: data[i], reset_value=32)
132
+ >>> r.read_bits(3) # Reads lowest 3 bits: 1, 0, 1 -> integer: 5;
133
+ """
134
+ __slots__ = ("_get_char_value", "_reset_value", "_val", "_position", "_index")
135
+
136
+ def __init__(self, get_char_value: Callable[[int], int], reset_value: int) -> None:
137
+ if reset_value < 1:
138
+ raise ValueError(f"reset_value must be >= 1, got {reset_value}")
139
+
140
+ self._get_char_value = get_char_value
141
+ self._reset_value = reset_value
142
+
143
+ # Start with position=0 so the very first read_bits call loads chars[0];
144
+ self._val: int = 0
145
+ self._position: int = 0
146
+ self._index: int = 0 # Next character index to fetch
147
+
148
+
149
+ # ----------------------------------------------------------------------------------------------------------------
150
+ # Public Interface;
151
+ # ----------------------------------------------------------------------------------------------------------------
152
+ def read_bits(self, n_bits: int) -> int:
153
+ """
154
+ Read 'n_bits' bits and return them as an integer, LSB first.
155
+
156
+ This mirrors the inner loop in the original decompressor:
157
+ bits = 0
158
+ power = 1
159
+ while power != (1 << n_bits):
160
+ resb = data_val & data_position
161
+ data_position >>= 1
162
+ if data_position == 0:
163
+ data_position = reset_value
164
+ data_val = get_next_value(data_index++)
165
+ bits |= (1 if resb > 0 else 0) * power
166
+ power <<= 1
167
+
168
+ :param n_bits: Number of bits to read(must be >= 1).
169
+ :return: int - The reconstructed integer value (LSB-first accumulation).
170
+ """
171
+ result = 0
172
+ power = 1
173
+ max_power = 1 << n_bits
174
+
175
+ while power != max_power:
176
+ # If position == 0, the previous read_bits call exhausted the last character;
177
+ # Load the next one now, at the start of this bit read;
178
+ if self._position == 0:
179
+ self._position = self._reset_value
180
+ self._val = self._get_char_value(self._index)
181
+ self._index += 1
182
+
183
+ resb = self._val & self._position
184
+ self._position >>= 1
185
+ result |= (1 if resb > 0 else 0) * power
186
+ power <<= 1
187
+
188
+ return result
189
+
190
+
191
+ @property
192
+ def index(self) -> int:
193
+ """
194
+ The index of the next character that *would* be fetched.
195
+ """
196
+ return self._index