py-lzstring 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py_lzstring-0.1.0/LICENSE +8 -0
- py_lzstring-0.1.0/PKG-INFO +155 -0
- py_lzstring-0.1.0/README.md +120 -0
- py_lzstring-0.1.0/pyproject.toml +59 -0
- py_lzstring-0.1.0/setup.cfg +4 -0
- py_lzstring-0.1.0/src/lzstring/__init__.py +47 -0
- py_lzstring-0.1.0/src/lzstring/_bitstream.py +196 -0
- py_lzstring-0.1.0/src/lzstring/_core.py +280 -0
- py_lzstring-0.1.0/src/lzstring/_encodings.py +274 -0
- py_lzstring-0.1.0/src/py_lzstring.egg-info/PKG-INFO +155 -0
- py_lzstring-0.1.0/src/py_lzstring.egg-info/SOURCES.txt +16 -0
- py_lzstring-0.1.0/src/py_lzstring.egg-info/dependency_links.txt +1 -0
- py_lzstring-0.1.0/src/py_lzstring.egg-info/entry_points.txt +2 -0
- py_lzstring-0.1.0/src/py_lzstring.egg-info/requires.txt +4 -0
- py_lzstring-0.1.0/src/py_lzstring.egg-info/top_level.txt +2 -0
- py_lzstring-0.1.0/src/tests/test_bitstream.py +424 -0
- py_lzstring-0.1.0/src/tests/test_core.py +485 -0
- py_lzstring-0.1.0/src/tests/test_encodings.py +464 -0
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
Copyright 2026 Frank Hoffmann
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
4
|
+
|
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
6
|
+
|
|
7
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
8
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: py-lzstring
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: LZ-based string compression - Python port of lz-string@1.5.0 (JavaScript)
|
|
5
|
+
Author-email: "Original Code: Pieroxy pieroxy@pieroxy.net, Port: Frank Hoffmann" <frank.h.dev@protonmail.com>
|
|
6
|
+
License: Copyright 2026 Frank Hoffmann
|
|
7
|
+
|
|
8
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
9
|
+
|
|
10
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
11
|
+
|
|
12
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
13
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
14
|
+
Project-URL: Homepage, https://github.com/Frank-Hoffmann-Dev/py-lzstring
|
|
15
|
+
Project-URL: Bug Tracker, https://github.com/Frank-Hoffmann-Dev/py-lzstring/issues
|
|
16
|
+
Keywords: compression,lz-string,lzstring,lz78,lzw
|
|
17
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
18
|
+
Classifier: Intended Audience :: Developers
|
|
19
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
20
|
+
Classifier: Programming Language :: Python :: 3
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
25
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
26
|
+
Classifier: Topic :: System :: Archiving :: Compression
|
|
27
|
+
Classifier: Typing :: Typed
|
|
28
|
+
Requires-Python: >=3.10
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: pytest>=8; extra == "dev"
|
|
33
|
+
Requires-Dist: pytest-benchmark>=4; extra == "dev"
|
|
34
|
+
Dynamic: license-file
|
|
35
|
+
|
|
36
|
+
# py-lzstring
|
|
37
|
+
|
|
38
|
+
Python port of [lz-string](https://github.com/pieroxy/lz-string) - byte-for-byte compatible with lz-string@1.5.0.
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
## Installation
|
|
42
|
+
```bash
|
|
43
|
+
pip install py-lzstring
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
No runtime dependencies. Requires Python 3.10+.
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
## Quickstart
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
import lzstring
|
|
53
|
+
|
|
54
|
+
# Compress to base64 (safe for HTTP, JSON, localStorage);
|
|
55
|
+
compressed = lzstring.compress_to_base64("Hello, World!")
|
|
56
|
+
print(compressed) # 'BIUwNmD2A0AEDqkBOYAmBCIA';
|
|
57
|
+
|
|
58
|
+
original = lzstring.decompress_from_base64(compressed)
|
|
59
|
+
print(original) # 'Hello, World!';
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
## Encodings
|
|
64
|
+
|
|
65
|
+
| Function pair | bits/char | Use case |
|
|
66
|
+
|---|---|---|
|
|
67
|
+
| `compress` / `decompress` | 16 | In-memory; smallest output |
|
|
68
|
+
| `compress_to_utf16` / `decompress_from_utf16` | 15 | localStorage (all browsers) |
|
|
69
|
+
| `compress_to_base64` / `decompress_from_base64` | 6 | HTTP, JSON, data-URLs |
|
|
70
|
+
| `compress_to_encoded_uri_component` / `decompress_from_encoded_uri_component` | 6 | URL query strings |
|
|
71
|
+
| `compress_to_uint8array` / `decompress_from_uint8array` | — | Binary I/O, `bytes` output |
|
|
72
|
+
|
|
73
|
+
## Interoperability with JavaScript
|
|
74
|
+
|
|
75
|
+
All encodings are bit-exact with lz-string@1.5.0:
|
|
76
|
+
|
|
77
|
+
```js
|
|
78
|
+
// JavaScript
|
|
79
|
+
const LZString = require("lz-string");
|
|
80
|
+
const compressed = LZString.compressToBase64("Hello, World!");
|
|
81
|
+
// "BIUwNmD2A0AEDqkBOYAmBCIA"
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
# Python
|
|
86
|
+
import lzstring
|
|
87
|
+
lzstring.decompress_from_base64("BIUwNmD2A0AEDqkBOYAmBCIA")
|
|
88
|
+
# "Hello, World!"
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## API reference
|
|
92
|
+
|
|
93
|
+
All compress functions accept `str | None` and return `str` (or `bytes` for the
|
|
94
|
+
Uint8Array variant). Passing `None` mirrors the JavaScript null-handling and
|
|
95
|
+
returns `""`.
|
|
96
|
+
|
|
97
|
+
All decompress functions accept `str | None` (or `bytes | None` for Uint8Array)
|
|
98
|
+
and return `str | None`. An empty string returns `None` (like JS `null`),
|
|
99
|
+
indicating invalid input.
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
import lzstring
|
|
103
|
+
|
|
104
|
+
# Raw (most compact, arbitrary Unicode output)
|
|
105
|
+
lzstring.compress("...")
|
|
106
|
+
lzstring.decompress("...")
|
|
107
|
+
|
|
108
|
+
# UTF-16 (printable characters only, safe for all localStorage implementations)
|
|
109
|
+
lzstring.compress_to_utf16("...")
|
|
110
|
+
lzstring.decompress_from_utf16("...")
|
|
111
|
+
|
|
112
|
+
# Base64 (standard alphabet with = padding)
|
|
113
|
+
lzstring.compress_to_base64("...")
|
|
114
|
+
lzstring.decompress_from_base64("...")
|
|
115
|
+
|
|
116
|
+
# URI component (URL-safe, no padding)
|
|
117
|
+
lzstring.compress_to_encoded_uri_component("...")
|
|
118
|
+
lzstring.decompress_from_encoded_uri_component("...")
|
|
119
|
+
|
|
120
|
+
# Uint8Array (returns / accepts bytes)
|
|
121
|
+
lzstring.compress_to_uint8array("...") # → bytes
|
|
122
|
+
lzstring.decompress_from_uint8array(b"...") # → str | None
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
## License
|
|
127
|
+
|
|
128
|
+
MIT License
|
|
129
|
+
```
|
|
130
|
+
Copyright 2026 Frank Hoffmann
|
|
131
|
+
|
|
132
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
133
|
+
|
|
134
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
135
|
+
|
|
136
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
137
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
## Thanks
|
|
142
|
+
|
|
143
|
+
Many thanks to all the developers of the libraries used and to the community for creating so many incredibly useful tools.
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
## AI Usage
|
|
147
|
+
|
|
148
|
+
I used the AI assistant **Anthropic Claude AI - Sonnet 4.6** to create this tool.
|
|
149
|
+
|
|
150
|
+
As a computer scientist, I have reviewed and approved every single line of code, and I understand the tool’s internal processes and how it works.
|
|
151
|
+
I didn’t just copy and paste the code from the AI.
|
|
152
|
+
Instead, I wrote it by hand, line by line, making changes whenever I deemed it necessary.
|
|
153
|
+
|
|
154
|
+
Nevertheless, there may still be errors or poor design choices.
|
|
155
|
+
Everyone is free to examine, modify, improve, fork the code or call it AI slop :D
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# py-lzstring
|
|
2
|
+
|
|
3
|
+
Python port of [lz-string](https://github.com/pieroxy/lz-string) - byte-for-byte compatible with lz-string@1.5.0.
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
## Installation
|
|
7
|
+
```bash
|
|
8
|
+
pip install py-lzstring
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
No runtime dependencies. Requires Python 3.10+.
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
## Quickstart
|
|
15
|
+
|
|
16
|
+
```python
|
|
17
|
+
import lzstring
|
|
18
|
+
|
|
19
|
+
# Compress to base64 (safe for HTTP, JSON, localStorage);
|
|
20
|
+
compressed = lzstring.compress_to_base64("Hello, World!")
|
|
21
|
+
print(compressed) # 'BIUwNmD2A0AEDqkBOYAmBCIA';
|
|
22
|
+
|
|
23
|
+
original = lzstring.decompress_from_base64(compressed)
|
|
24
|
+
print(original) # 'Hello, World!';
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
## Encodings
|
|
29
|
+
|
|
30
|
+
| Function pair | bits/char | Use case |
|
|
31
|
+
|---|---|---|
|
|
32
|
+
| `compress` / `decompress` | 16 | In-memory; smallest output |
|
|
33
|
+
| `compress_to_utf16` / `decompress_from_utf16` | 15 | localStorage (all browsers) |
|
|
34
|
+
| `compress_to_base64` / `decompress_from_base64` | 6 | HTTP, JSON, data-URLs |
|
|
35
|
+
| `compress_to_encoded_uri_component` / `decompress_from_encoded_uri_component` | 6 | URL query strings |
|
|
36
|
+
| `compress_to_uint8array` / `decompress_from_uint8array` | — | Binary I/O, `bytes` output |
|
|
37
|
+
|
|
38
|
+
## Interoperability with JavaScript
|
|
39
|
+
|
|
40
|
+
All encodings are bit-exact with lz-string@1.5.0:
|
|
41
|
+
|
|
42
|
+
```js
|
|
43
|
+
// JavaScript
|
|
44
|
+
const LZString = require("lz-string");
|
|
45
|
+
const compressed = LZString.compressToBase64("Hello, World!");
|
|
46
|
+
// "BIUwNmD2A0AEDqkBOYAmBCIA"
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
# Python
|
|
51
|
+
import lzstring
|
|
52
|
+
lzstring.decompress_from_base64("BIUwNmD2A0AEDqkBOYAmBCIA")
|
|
53
|
+
# "Hello, World!"
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## API reference
|
|
57
|
+
|
|
58
|
+
All compress functions accept `str | None` and return `str` (or `bytes` for the
|
|
59
|
+
Uint8Array variant). Passing `None` mirrors the JavaScript null-handling and
|
|
60
|
+
returns `""`.
|
|
61
|
+
|
|
62
|
+
All decompress functions accept `str | None` (or `bytes | None` for Uint8Array)
|
|
63
|
+
and return `str | None`. An empty string returns `None` (like JS `null`),
|
|
64
|
+
indicating invalid input.
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
import lzstring
|
|
68
|
+
|
|
69
|
+
# Raw (most compact, arbitrary Unicode output)
|
|
70
|
+
lzstring.compress("...")
|
|
71
|
+
lzstring.decompress("...")
|
|
72
|
+
|
|
73
|
+
# UTF-16 (printable characters only, safe for all localStorage implementations)
|
|
74
|
+
lzstring.compress_to_utf16("...")
|
|
75
|
+
lzstring.decompress_from_utf16("...")
|
|
76
|
+
|
|
77
|
+
# Base64 (standard alphabet with = padding)
|
|
78
|
+
lzstring.compress_to_base64("...")
|
|
79
|
+
lzstring.decompress_from_base64("...")
|
|
80
|
+
|
|
81
|
+
# URI component (URL-safe, no padding)
|
|
82
|
+
lzstring.compress_to_encoded_uri_component("...")
|
|
83
|
+
lzstring.decompress_from_encoded_uri_component("...")
|
|
84
|
+
|
|
85
|
+
# Uint8Array (returns / accepts bytes)
|
|
86
|
+
lzstring.compress_to_uint8array("...") # → bytes
|
|
87
|
+
lzstring.decompress_from_uint8array(b"...") # → str | None
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
## License
|
|
92
|
+
|
|
93
|
+
MIT License
|
|
94
|
+
```
|
|
95
|
+
Copyright 2026 Frank Hoffmann
|
|
96
|
+
|
|
97
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
98
|
+
|
|
99
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
100
|
+
|
|
101
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
102
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
## Thanks
|
|
107
|
+
|
|
108
|
+
Many thanks to all the developers of the libraries used and to the community for creating so many incredibly useful tools.
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
## AI Usage
|
|
112
|
+
|
|
113
|
+
I used the AI assistant **Anthropic Claude AI - Sonnet 4.6** to create this tool.
|
|
114
|
+
|
|
115
|
+
As a computer scientist, I have reviewed and approved every single line of code, and I understand the tool’s internal processes and how it works.
|
|
116
|
+
I didn’t just copy and paste the code from the AI.
|
|
117
|
+
Instead, I wrote it by hand, line by line, making changes whenever I deemed it necessary.
|
|
118
|
+
|
|
119
|
+
Nevertheless, there may still be errors or poor design choices.
|
|
120
|
+
Everyone is free to examine, modify, improve, fork the code or call it AI slop :D
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "py-lzstring"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "LZ-based string compression - Python port of lz-string@1.5.0 (JavaScript)"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { file = "LICENSE" }
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Original Code: Pieroxy pieroxy@pieroxy.net, Port: Frank Hoffmann", email = "frank.h.dev@protonmail.com" }
|
|
14
|
+
]
|
|
15
|
+
keywords = ["compression", "lz-string", "lzstring", "lz78", "lzw"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 5 - Production/Stable",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.10",
|
|
22
|
+
"Programming Language :: Python :: 3.11",
|
|
23
|
+
"Programming Language :: Python :: 3.12",
|
|
24
|
+
"Programming Language :: Python :: 3.13",
|
|
25
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
26
|
+
"Topic :: System :: Archiving :: Compression",
|
|
27
|
+
"Typing :: Typed"
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
dependencies = [] # Pure stdlib - no runtime dependencies;
|
|
31
|
+
|
|
32
|
+
[project.optional-dependencies]
|
|
33
|
+
dev = [
|
|
34
|
+
"pytest>=8",
|
|
35
|
+
"pytest-benchmark>=4"
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
[project.scripts]
|
|
39
|
+
lz_string = "lz_string.main:main"
|
|
40
|
+
|
|
41
|
+
[project.urls]
|
|
42
|
+
Homepage = "https://github.com/Frank-Hoffmann-Dev/py-lzstring"
|
|
43
|
+
"Bug Tracker" = "https://github.com/Frank-Hoffmann-Dev/py-lzstring/issues"
|
|
44
|
+
|
|
45
|
+
[tool.setuptools.packages.find]
|
|
46
|
+
where = ["src"]
|
|
47
|
+
|
|
48
|
+
[tool.pytest.ini_options]
|
|
49
|
+
testpaths = ["tests"]
|
|
50
|
+
addopts = "-v --tb=short"
|
|
51
|
+
|
|
52
|
+
[tool.ruff]
|
|
53
|
+
line-length = 99
|
|
54
|
+
target-version = "py310"
|
|
55
|
+
|
|
56
|
+
[tool.mypy]
|
|
57
|
+
python_version = "3.10"
|
|
58
|
+
strict = true
|
|
59
|
+
files = ["src"]
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""
|
|
2
|
+
py-lzstring - Python port of lz-string (https://github.com/pieroxy/lz-string).
|
|
3
|
+
|
|
4
|
+
LZ-based string compression compatible with the JavaScript library v1.5.0.
|
|
5
|
+
Five encoding variants are available for different transport or storage needs.
|
|
6
|
+
|
|
7
|
+
Quick Start:
|
|
8
|
+
>>> import lzstring
|
|
9
|
+
>>> compressed = lzstring.compress_to_base64("Hello, World!")
|
|
10
|
+
>>> lzstring.decompress_from_base64(compressed)
|
|
11
|
+
'Hello, World!'
|
|
12
|
+
|
|
13
|
+
All functions accept 'str | None' and return 'str | None' or 'bytes'.
|
|
14
|
+
Passing 'None' mirror the JavaScript library's null-handling behaviour (compress and decompress functions return '""').
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
Encoding Overview:
|
|
18
|
+
|
|
19
|
+
Function pair bits/char Best suited for
|
|
20
|
+
--------------------------------------------------------------------------- --------- -------------------------
|
|
21
|
+
compress / decompress 16 In-memory; smallest output
|
|
22
|
+
compress_to_utf16 / decompress_from_utf16 15 localStorage (all browsers)
|
|
23
|
+
compress_to_base64 / decompress_from_base64 6 HTTP, JSON, data-URLs
|
|
24
|
+
compress_to_encoded_uri_component / decompress_from_encoded_uri_component 6 URL query strings
|
|
25
|
+
compress_to_uint8array / decompress_from_uint8array 8 (bytes) Binary I/O, Node-style
|
|
26
|
+
|
|
27
|
+
Compability:
|
|
28
|
+
All functions are byte-for-byte compatible with lz-string@1.5.0 and verified against its JavaScript test suits.
|
|
29
|
+
"""
|
|
30
|
+
from lzstring._encodings import (
|
|
31
|
+
compress, decompress,
|
|
32
|
+
compress_to_utf16, decompress_from_utf16,
|
|
33
|
+
compress_to_base64, decompress_from_base64,
|
|
34
|
+
compress_to_encoded_uri_component, decompress_from_encoded_uri_component,
|
|
35
|
+
compress_to_uint8array, decompress_from_uint8array
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
__version__ = "0.1.0"
|
|
39
|
+
|
|
40
|
+
__all__ = [
|
|
41
|
+
"__version__",
|
|
42
|
+
"compress", "decompress",
|
|
43
|
+
"compress_to_utf16", "decompress_from_utf16",
|
|
44
|
+
"compress_to_base64", "decompress_from_base64",
|
|
45
|
+
"compress_to_encoded_uri_component", "decompress_from_encoded_uri_component",
|
|
46
|
+
"compress_to_uint8array", "decompress_from_uint8array"
|
|
47
|
+
]
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Bit-level I/O primitives for the LZString compression algorithm.
|
|
3
|
+
|
|
4
|
+
The LZString format packs variable-width token codes (starting at 2 bits, growing as the directory expands) into a stream of fixed-width 'characters'.
|
|
5
|
+
Each output character holds exactly 'bits_per_char' bits, where bits_per_char is determined by the encoding:
|
|
6
|
+
|
|
7
|
+
- Raw / UTF-16 -> 15 bits per character
|
|
8
|
+
- Base64 / URI -> 6 bits per character
|
|
9
|
+
- Uint8Array -> 8 bits per character (handled at a higher layer)
|
|
10
|
+
|
|
11
|
+
Bit order within every character is LSB-first: the first token bit lands in bit-0 of the accumulator, the next in bit-1, and so on.
|
|
12
|
+
Once the accumulator is full (position reaches bits_per_char) it's integer value is handed to a caller-supplied 'emit(value: int) -> None' callback, then it resets to 0.
|
|
13
|
+
|
|
14
|
+
This LSB-first, fixed-width-character packing is the exact scheme used by the original JavaScript implementation and all compatible ports.
|
|
15
|
+
"""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from typing import Callable
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# ----------------------------------------------------------------------------------------------------------------
|
|
22
|
+
# BitWriter;
|
|
23
|
+
# ----------------------------------------------------------------------------------------------------------------
|
|
24
|
+
class BitWriter:
|
|
25
|
+
"""
|
|
26
|
+
Accumulate individual bits and flush them as fixed-width characters.
|
|
27
|
+
|
|
28
|
+
:param bits_per_char: Number of bits that make up a single output character (e.g. 15 for the raw/UTF-16 encoding, 6 for base64/URI).
|
|
29
|
+
:param emit: Callback invoked with the integer value of each completed character.
|
|
30
|
+
The caller is responsible for mapping that integer to the appropriate output character (e.g. 'chr(value)') for raw, or a lookup into a base64 alphabet.
|
|
31
|
+
:return: None
|
|
32
|
+
|
|
33
|
+
Usage:
|
|
34
|
+
>>> chunks: list[int] = []
|
|
35
|
+
>>> w = BitWriter(bits_per_char=6, emit=chunks.append)
|
|
36
|
+
>>> w.write_bits(value=0b10110, n_bits=5)
|
|
37
|
+
>>> w.flush()
|
|
38
|
+
"""
|
|
39
|
+
__slots__ = ("_bits_per_char", "_emit", "_val", "_position")
|
|
40
|
+
|
|
41
|
+
def __init__(self, bits_per_char: int, emit: Callable[[int], None]) -> None:
|
|
42
|
+
if bits_per_char < 1:
|
|
43
|
+
raise ValueError(f"bits_per_char must be >= 1, got {bits_per_char}")
|
|
44
|
+
|
|
45
|
+
self._bits_per_char = bits_per_char
|
|
46
|
+
self._emit = emit
|
|
47
|
+
self._val: int = 0 # Accumulator;
|
|
48
|
+
self._position: int = 0 # How many bits are currently in _val;
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# ----------------------------------------------------------------------------------------------------------------
|
|
52
|
+
# Public Interface;
|
|
53
|
+
# ----------------------------------------------------------------------------------------------------------------
|
|
54
|
+
def write_bits(self, value: int, n_bits: int) -> None:
|
|
55
|
+
"""
|
|
56
|
+
Write the lowest *n_bits* bits of *value* into the stream, LSB first.
|
|
57
|
+
|
|
58
|
+
Mirrors the inner loop found in every LZString compress implementation:
|
|
59
|
+
for i in range(n_bits):
|
|
60
|
+
val = (val << 1) | (value & 1) # Push LSB into accumulator
|
|
61
|
+
value >>= 1
|
|
62
|
+
|
|
63
|
+
:param value: The integer whose lowest 'n_bits' bits will be written.
|
|
64
|
+
:param n_bits: Number of bits to write (must be >= 0).
|
|
65
|
+
"""
|
|
66
|
+
for _ in range(n_bits):
|
|
67
|
+
# Shift the accumulator left and OR in the current LSB;
|
|
68
|
+
self._val = (self._val << 1) | (value & 1)
|
|
69
|
+
value >>= 1
|
|
70
|
+
self._position += 1
|
|
71
|
+
|
|
72
|
+
if self._position == self._bits_per_char:
|
|
73
|
+
self._flush_char()
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def flush(self) -> None:
|
|
77
|
+
"""
|
|
78
|
+
Flush any remaining bits as a final (zero-padded) character.
|
|
79
|
+
|
|
80
|
+
The original JS flush loop checks 'position == bitsPerChar - 1' (not '== bitsPerChar'),
|
|
81
|
+
so it always emits exactly one character, even when the accumulator is completely empty.
|
|
82
|
+
This garantees that every compress stream ends with a sentinel character, which the
|
|
83
|
+
decompressor can safely peek at without an out-of-bounds read.
|
|
84
|
+
|
|
85
|
+
This must be called exactly once after all tokens have been written.
|
|
86
|
+
"""
|
|
87
|
+
while True:
|
|
88
|
+
self._val <<= 1
|
|
89
|
+
|
|
90
|
+
if self._position == self._bits_per_char - 1:
|
|
91
|
+
self._flush_char()
|
|
92
|
+
break
|
|
93
|
+
|
|
94
|
+
self._position += 1
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# ----------------------------------------------------------------------------------------------------------------
|
|
99
|
+
# Internal Helper Functions;
|
|
100
|
+
# ----------------------------------------------------------------------------------------------------------------
|
|
101
|
+
def _flush_char(self) -> None:
|
|
102
|
+
self._emit(self._val)
|
|
103
|
+
self._val = 0
|
|
104
|
+
self._position = 0
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
# ----------------------------------------------------------------------------------------------------------------
|
|
109
|
+
# BitReader;
|
|
110
|
+
# ----------------------------------------------------------------------------------------------------------------
|
|
111
|
+
class BitReader:
|
|
112
|
+
"""
|
|
113
|
+
Read individual bits from a sequence of fixed-width characters.
|
|
114
|
+
|
|
115
|
+
:param get_char_value: Callable that accepts a 0-based *index* and returns the integer value of the character at that position in the compressed input.
|
|
116
|
+
For the raw encoding this is simply 'ord(compressed[index])'; for base64 it would be a reverse-alphabet lookup.
|
|
117
|
+
:param reset_value: The integer whose single set bit marks the *start* position within the first character.
|
|
118
|
+
For 'bits_per_char = N' this is '1 << (N - 1)'.
|
|
119
|
+
:return: None
|
|
120
|
+
|
|
121
|
+
Examples:
|
|
122
|
+
- Raw (15 bit): reset_value = 16384 (0x4000, i.e. 1 << 14)
|
|
123
|
+
- Base64 (6 bit): reset_value = 32 (0x20, i.e. 1 << 5)
|
|
124
|
+
- UTF-16 (15 bit): reset_value = 16384
|
|
125
|
+
|
|
126
|
+
The decompressor checks the *current bit* by ANDing 'data_val' with 'data_position', then shifts 'data_position' right by one.
|
|
127
|
+
When 'data_position' reaches 0, the next character is loaded and 'data_position' is reset to 'reset_value'.
|
|
128
|
+
|
|
129
|
+
Usage:
|
|
130
|
+
>>> data = [0b101101] # One 6-bit char;
|
|
131
|
+
>>> r = BitReader(lambda i: data[i], reset_value=32)
|
|
132
|
+
>>> r.read_bits(3) # Reads lowest 3 bits: 1, 0, 1 -> integer: 5;
|
|
133
|
+
"""
|
|
134
|
+
__slots__ = ("_get_char_value", "_reset_value", "_val", "_position", "_index")
|
|
135
|
+
|
|
136
|
+
def __init__(self, get_char_value: Callable[[int], int], reset_value: int) -> None:
|
|
137
|
+
if reset_value < 1:
|
|
138
|
+
raise ValueError(f"reset_value must be >= 1, got {reset_value}")
|
|
139
|
+
|
|
140
|
+
self._get_char_value = get_char_value
|
|
141
|
+
self._reset_value = reset_value
|
|
142
|
+
|
|
143
|
+
# Start with position=0 so the very first read_bits call loads chars[0];
|
|
144
|
+
self._val: int = 0
|
|
145
|
+
self._position: int = 0
|
|
146
|
+
self._index: int = 0 # Next character index to fetch
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# ----------------------------------------------------------------------------------------------------------------
|
|
150
|
+
# Public Interface;
|
|
151
|
+
# ----------------------------------------------------------------------------------------------------------------
|
|
152
|
+
def read_bits(self, n_bits: int) -> int:
|
|
153
|
+
"""
|
|
154
|
+
Read 'n_bits' bits and return them as an integer, LSB first.
|
|
155
|
+
|
|
156
|
+
This mirrors the inner loop in the original decompressor:
|
|
157
|
+
bits = 0
|
|
158
|
+
power = 1
|
|
159
|
+
while power != (1 << n_bits):
|
|
160
|
+
resb = data_val & data_position
|
|
161
|
+
data_position >>= 1
|
|
162
|
+
if data_position == 0:
|
|
163
|
+
data_position = reset_value
|
|
164
|
+
data_val = get_next_value(data_index++)
|
|
165
|
+
bits |= (1 if resb > 0 else 0) * power
|
|
166
|
+
power <<= 1
|
|
167
|
+
|
|
168
|
+
:param n_bits: Number of bits to read(must be >= 1).
|
|
169
|
+
:return: int - The reconstructed integer value (LSB-first accumulation).
|
|
170
|
+
"""
|
|
171
|
+
result = 0
|
|
172
|
+
power = 1
|
|
173
|
+
max_power = 1 << n_bits
|
|
174
|
+
|
|
175
|
+
while power != max_power:
|
|
176
|
+
# If position == 0, the previous read_bits call exhausted the last character;
|
|
177
|
+
# Load the next one now, at the start of this bit read;
|
|
178
|
+
if self._position == 0:
|
|
179
|
+
self._position = self._reset_value
|
|
180
|
+
self._val = self._get_char_value(self._index)
|
|
181
|
+
self._index += 1
|
|
182
|
+
|
|
183
|
+
resb = self._val & self._position
|
|
184
|
+
self._position >>= 1
|
|
185
|
+
result |= (1 if resb > 0 else 0) * power
|
|
186
|
+
power <<= 1
|
|
187
|
+
|
|
188
|
+
return result
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
@property
|
|
192
|
+
def index(self) -> int:
|
|
193
|
+
"""
|
|
194
|
+
The index of the next character that *would* be fetched.
|
|
195
|
+
"""
|
|
196
|
+
return self._index
|