dissect.util 3.24.dev4__cp314-cp314t-musllinux_1_2_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dissect/util/__init__.py +20 -0
- dissect/util/_build.py +17 -0
- dissect/util/_native/__init__.pyi +3 -0
- dissect/util/_native/compression/__init__.pyi +3 -0
- dissect/util/_native/compression/lz4.pyi +7 -0
- dissect/util/_native/compression/lzo.pyi +3 -0
- dissect/util/_native/hash/__init__.py +3 -0
- dissect/util/_native/hash/crc32c.py +2 -0
- dissect/util/_native.cpython-314t-aarch64-linux-musl.so +0 -0
- dissect/util/compression/__init__.py +45 -0
- dissect/util/compression/lz4.py +95 -0
- dissect/util/compression/lzbitmap.py +130 -0
- dissect/util/compression/lzfse.py +467 -0
- dissect/util/compression/lznt1.py +92 -0
- dissect/util/compression/lzo.py +118 -0
- dissect/util/compression/lzvn.py +241 -0
- dissect/util/compression/lzxpress.py +80 -0
- dissect/util/compression/lzxpress_huffman.py +184 -0
- dissect/util/compression/sevenbit.py +77 -0
- dissect/util/compression/snappy.py +86 -0
- dissect/util/compression/xz.py +112 -0
- dissect/util/cpio.py +226 -0
- dissect/util/encoding/__init__.py +0 -0
- dissect/util/encoding/surrogateescape.py +21 -0
- dissect/util/exceptions.py +6 -0
- dissect/util/hash/__init__.py +28 -0
- dissect/util/hash/crc32.py +55 -0
- dissect/util/hash/crc32c.py +60 -0
- dissect/util/hash/jenkins.py +102 -0
- dissect/util/ldap.py +237 -0
- dissect/util/plist.py +156 -0
- dissect/util/sid.py +81 -0
- dissect/util/stream.py +772 -0
- dissect/util/tools/__init__.py +0 -0
- dissect/util/tools/dump_nskeyedarchiver.py +61 -0
- dissect/util/ts.py +295 -0
- dissect/util/xmemoryview.py +117 -0
- dissect_util-3.24.dev4.dist-info/METADATA +89 -0
- dissect_util-3.24.dev4.dist-info/RECORD +46 -0
- dissect_util-3.24.dev4.dist-info/WHEEL +5 -0
- dissect_util-3.24.dev4.dist-info/entry_points.txt +2 -0
- dissect_util-3.24.dev4.dist-info/licenses/COPYRIGHT +5 -0
- dissect_util-3.24.dev4.dist-info/licenses/LICENSE +201 -0
- dissect_util-3.24.dev4.dist-info/sboms/auditwheel.cdx.json +1 -0
- dissect_util-3.24.dev4.dist-info/top_level.txt +1 -0
- dissect_util.libs/libgcc_s-2d945d6c.so.1 +0 -0
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
# References:
|
|
2
|
+
# - https://github.com/lzfse/lzfse
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import io
|
|
6
|
+
import struct
|
|
7
|
+
from typing import BinaryIO
|
|
8
|
+
|
|
9
|
+
# fmt: off
|
|
10
|
+
OP_SML_D = (
|
|
11
|
+
0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 16, 17, 18, 19,
|
|
12
|
+
20, 21, 24, 25, 26, 27, 28, 29, 32, 33, 34, 35, 36, 37, 40, 41,
|
|
13
|
+
42, 43, 44, 45, 48, 49, 50, 51, 52, 53, 56, 57, 58, 59, 60, 61,
|
|
14
|
+
64, 65, 66, 67, 68, 69, 72, 73, 74, 75, 76, 77, 80, 81, 82, 83,
|
|
15
|
+
84, 85, 88, 89, 90, 91, 92, 93, 96, 97, 98, 99, 100, 101, 104, 105,
|
|
16
|
+
106, 107, 108, 109, 128, 129, 130, 131, 132, 133, 136, 137, 138, 139, 140, 141,
|
|
17
|
+
144, 145, 146, 147, 148, 149, 152, 153, 154, 155, 156, 157, 192, 193, 194, 195,
|
|
18
|
+
196, 197, 200, 201, 202, 203, 204, 205,
|
|
19
|
+
)
|
|
20
|
+
OP_MED_D = (
|
|
21
|
+
160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
|
|
22
|
+
176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
|
|
23
|
+
)
|
|
24
|
+
OP_LRG_D = (
|
|
25
|
+
7, 15, 23, 31, 39, 47, 55, 63, 71, 79, 87, 95, 103, 111, 135, 143,
|
|
26
|
+
151, 159, 199, 207,
|
|
27
|
+
)
|
|
28
|
+
OP_PRE_D = (
|
|
29
|
+
70, 78, 86, 94, 102, 110, 134, 142, 150, 158, 198, 206,
|
|
30
|
+
)
|
|
31
|
+
OP_SML_M = (
|
|
32
|
+
241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
|
|
33
|
+
)
|
|
34
|
+
OP_LRG_M = (
|
|
35
|
+
240,
|
|
36
|
+
)
|
|
37
|
+
OP_SML_L = (
|
|
38
|
+
225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
|
|
39
|
+
)
|
|
40
|
+
OP_LRG_L = (
|
|
41
|
+
224,
|
|
42
|
+
)
|
|
43
|
+
OP_NOP = (
|
|
44
|
+
14, 22,
|
|
45
|
+
)
|
|
46
|
+
OP_EOS = (
|
|
47
|
+
6,
|
|
48
|
+
)
|
|
49
|
+
OP_UDEF = (
|
|
50
|
+
30, 38, 46, 54, 62, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
|
|
51
|
+
122, 123, 124, 125, 126, 127, 208, 209, 210, 211, 212, 213, 214, 215, 216,
|
|
52
|
+
217, 218, 219, 220, 221, 222, 223,
|
|
53
|
+
)
|
|
54
|
+
# fmt: on
|
|
55
|
+
|
|
56
|
+
_H = struct.Struct("<H")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def decompress(src: bytes | BinaryIO) -> bytes:
|
|
60
|
+
"""LZVN decompress from a file-like object or bytes.
|
|
61
|
+
|
|
62
|
+
Decompresses until EOF or EOS of the input data.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
src: File-like object or bytes to decompress.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
The decompressed data.
|
|
69
|
+
"""
|
|
70
|
+
if not hasattr(src, "read"):
|
|
71
|
+
src = io.BytesIO(src)
|
|
72
|
+
|
|
73
|
+
offset = src.tell()
|
|
74
|
+
src.seek(0, io.SEEK_END)
|
|
75
|
+
src_size = src.tell() - offset
|
|
76
|
+
src.seek(offset)
|
|
77
|
+
|
|
78
|
+
dst = bytearray()
|
|
79
|
+
|
|
80
|
+
opc_len = 0
|
|
81
|
+
|
|
82
|
+
# ruff: noqa: N806
|
|
83
|
+
L = None
|
|
84
|
+
M = None
|
|
85
|
+
D = 0
|
|
86
|
+
|
|
87
|
+
while src_size > 0:
|
|
88
|
+
opc = src.read(1)[0]
|
|
89
|
+
|
|
90
|
+
if opc in OP_SML_D:
|
|
91
|
+
# "small distance": This opcode has the structure LLMMMDDD DDDDDDDD LITERAL
|
|
92
|
+
# where the length of literal (0-3 bytes) is encoded by the high 2 bits of
|
|
93
|
+
# the first byte. We first extract the literal length so we know how long
|
|
94
|
+
# the opcode is, then check that the source can hold both this opcode and
|
|
95
|
+
# at least one byte of the next (because any valid input stream must be
|
|
96
|
+
# terminated with an eos token).
|
|
97
|
+
opc_len = 2
|
|
98
|
+
L = _extract(opc, 8, 6, 2)
|
|
99
|
+
M = _extract(opc, 8, 3, 3) + 3
|
|
100
|
+
|
|
101
|
+
if src_size <= opc_len + L:
|
|
102
|
+
break
|
|
103
|
+
|
|
104
|
+
D = _extract(opc, 8, 0, 3) << 8 | src.read(1)[0]
|
|
105
|
+
|
|
106
|
+
elif opc in OP_MED_D:
|
|
107
|
+
# "medium distance": This is a minor variant of the "small distance"
|
|
108
|
+
# encoding, where we will now use two extra bytes instead of one to encode
|
|
109
|
+
# the restof the match length and distance. This allows an extra two bits
|
|
110
|
+
# for the match length, and an extra three bits for the match distance. The
|
|
111
|
+
# full structure of the opcode is 101LLMMM DDDDDDMM DDDDDDDD LITERAL.
|
|
112
|
+
opc_len = 3
|
|
113
|
+
L = _extract(opc, 8, 3, 2)
|
|
114
|
+
|
|
115
|
+
if src_size <= opc_len + L:
|
|
116
|
+
break
|
|
117
|
+
|
|
118
|
+
(opc23,) = _H.unpack(src.read(2))
|
|
119
|
+
M = (_extract(opc, 8, 0, 3) << 2 | _extract(opc23, 16, 0, 2)) + 3
|
|
120
|
+
D = _extract(opc23, 16, 2, 14)
|
|
121
|
+
|
|
122
|
+
elif opc in OP_LRG_D:
|
|
123
|
+
# "large distance": This is another variant of the "small distance"
|
|
124
|
+
# encoding, where we will now use two extra bytes to encode the match
|
|
125
|
+
# distance, which allows distances up to 65535 to be represented. The full
|
|
126
|
+
# structure of the opcode is LLMMM111 DDDDDDDD DDDDDDDD LITERAL.
|
|
127
|
+
opc_len = 3
|
|
128
|
+
L = _extract(opc, 8, 6, 2)
|
|
129
|
+
M = _extract(opc, 8, 3, 3) + 3
|
|
130
|
+
|
|
131
|
+
if src_size <= opc_len + L:
|
|
132
|
+
break
|
|
133
|
+
|
|
134
|
+
(D,) = _H.unpack(src.read(2))
|
|
135
|
+
|
|
136
|
+
elif opc in OP_PRE_D:
|
|
137
|
+
# "previous distance": This opcode has the structure LLMMM110, where the
|
|
138
|
+
# length of the literal (0-3 bytes) is encoded by the high 2 bits of the
|
|
139
|
+
# first byte. We first extract the literal length so we know how long
|
|
140
|
+
# the opcode is, then check that the source can hold both this opcode and
|
|
141
|
+
# at least one byte of the next (because any valid input stream must be
|
|
142
|
+
# terminated with an eos token).
|
|
143
|
+
opc_len = 1
|
|
144
|
+
L = _extract(opc, 8, 6, 2)
|
|
145
|
+
M = _extract(opc, 8, 3, 3) + 3
|
|
146
|
+
|
|
147
|
+
if src_size <= opc_len + L:
|
|
148
|
+
break
|
|
149
|
+
|
|
150
|
+
elif opc in OP_SML_M:
|
|
151
|
+
# "small match": This opcode has no literal, and uses the previous match
|
|
152
|
+
# distance (i.e. it encodes only the match length), in a single byte as
|
|
153
|
+
# 1111MMMM.
|
|
154
|
+
opc_len = 1
|
|
155
|
+
L = None
|
|
156
|
+
M = _extract(opc, 8, 0, 4)
|
|
157
|
+
|
|
158
|
+
if src_size <= opc_len:
|
|
159
|
+
break
|
|
160
|
+
|
|
161
|
+
elif opc in OP_LRG_M:
|
|
162
|
+
# "large match": This opcode has no literal, and uses the previous match
|
|
163
|
+
# distance (i.e. it encodes only the match length). It is encoded in two
|
|
164
|
+
# bytes as 11110000 MMMMMMMM. Because matches smaller than 16 bytes can
|
|
165
|
+
# be represented by sml_m, there is an implicit bias of 16 on the match
|
|
166
|
+
# length; the representable values are [16,271].
|
|
167
|
+
opc_len = 2
|
|
168
|
+
L = None
|
|
169
|
+
|
|
170
|
+
if src_size <= opc_len:
|
|
171
|
+
break
|
|
172
|
+
|
|
173
|
+
M = src.read(1)[0] + 16
|
|
174
|
+
|
|
175
|
+
elif opc in OP_SML_L:
|
|
176
|
+
# "small literal": This opcode has no match, and encodes only a literal
|
|
177
|
+
# of length up to 15 bytes. The format is 1110LLLL LITERAL.
|
|
178
|
+
opc_len = 1
|
|
179
|
+
L = _extract(opc, 8, 0, 4)
|
|
180
|
+
M = None
|
|
181
|
+
|
|
182
|
+
elif opc in OP_LRG_L:
|
|
183
|
+
# "large literal": This opcode has no match, and uses the previous match
|
|
184
|
+
# distance (i.e. it encodes only the match length). It is encoded in two
|
|
185
|
+
# bytes as 11100000 LLLLLLLL LITERAL. Because literals smaller than 16
|
|
186
|
+
# bytes can be represented by sml_l, there is an implicit bias of 16 on
|
|
187
|
+
# the literal length; the representable values are [16,271].
|
|
188
|
+
opc_len = 2
|
|
189
|
+
|
|
190
|
+
if src_size <= opc_len:
|
|
191
|
+
break
|
|
192
|
+
|
|
193
|
+
L = src.read(1)[0] + 16
|
|
194
|
+
M = None
|
|
195
|
+
|
|
196
|
+
elif opc in OP_NOP:
|
|
197
|
+
opc_len = 1
|
|
198
|
+
L = None
|
|
199
|
+
M = None
|
|
200
|
+
|
|
201
|
+
if src_size <= opc_len:
|
|
202
|
+
break
|
|
203
|
+
|
|
204
|
+
elif opc in OP_EOS:
|
|
205
|
+
opc_len = 8
|
|
206
|
+
|
|
207
|
+
if src_size < opc_len:
|
|
208
|
+
break
|
|
209
|
+
|
|
210
|
+
src_size -= opc_len + L
|
|
211
|
+
break
|
|
212
|
+
|
|
213
|
+
elif opc in OP_UDEF:
|
|
214
|
+
raise ValueError("Undefined opcode")
|
|
215
|
+
|
|
216
|
+
# Update remaining source size
|
|
217
|
+
src_size -= opc_len
|
|
218
|
+
|
|
219
|
+
# Copy literal
|
|
220
|
+
if L is not None:
|
|
221
|
+
src_size -= L
|
|
222
|
+
dst += src.read(L)
|
|
223
|
+
|
|
224
|
+
# Match
|
|
225
|
+
if M is not None:
|
|
226
|
+
if len(dst) < D or D == 0:
|
|
227
|
+
raise ValueError("Invalid match distance")
|
|
228
|
+
|
|
229
|
+
remaining = M
|
|
230
|
+
while remaining > 0:
|
|
231
|
+
match_size = min(remaining, D)
|
|
232
|
+
dst += dst[-D : (-D + match_size) or None]
|
|
233
|
+
remaining -= match_size
|
|
234
|
+
|
|
235
|
+
return bytes(dst)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _extract(container: int, container_width: int, lsb: int, width: int) -> int:
|
|
239
|
+
if width == container_width:
|
|
240
|
+
return container
|
|
241
|
+
return (container >> lsb) & ((1 << width) - 1)
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# Reference: [MS-XCA]
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import io
|
|
5
|
+
import struct
|
|
6
|
+
from typing import BinaryIO
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def decompress(src: bytes | BinaryIO) -> bytes:
|
|
10
|
+
"""LZXPRESS decompress from a file-like object or bytes.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
src: File-like object or bytes to decompress.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
The decompressed data.
|
|
17
|
+
"""
|
|
18
|
+
if not hasattr(src, "read"):
|
|
19
|
+
src = io.BytesIO(src)
|
|
20
|
+
|
|
21
|
+
offset = src.tell()
|
|
22
|
+
src.seek(0, io.SEEK_END)
|
|
23
|
+
size = src.tell() - offset
|
|
24
|
+
src.seek(offset)
|
|
25
|
+
|
|
26
|
+
dst = bytearray()
|
|
27
|
+
|
|
28
|
+
buffered_flags = 0
|
|
29
|
+
buffered_flags_count = 0
|
|
30
|
+
last_length_half_byte = 0
|
|
31
|
+
|
|
32
|
+
while src.tell() - offset < size:
|
|
33
|
+
if buffered_flags_count == 0:
|
|
34
|
+
buffered_flags = struct.unpack("<I", src.read(4))[0]
|
|
35
|
+
buffered_flags_count = 32
|
|
36
|
+
|
|
37
|
+
buffered_flags_count -= 1
|
|
38
|
+
if buffered_flags & (1 << buffered_flags_count) == 0:
|
|
39
|
+
dst.append(ord(src.read(1)))
|
|
40
|
+
else:
|
|
41
|
+
if src.tell() - offset == size:
|
|
42
|
+
break
|
|
43
|
+
|
|
44
|
+
match = struct.unpack("<H", src.read(2))[0]
|
|
45
|
+
match_offset, match_length = divmod(match, 8)
|
|
46
|
+
match_offset += 1
|
|
47
|
+
|
|
48
|
+
if match_length == 7:
|
|
49
|
+
if last_length_half_byte == 0:
|
|
50
|
+
last_length_half_byte = src.tell()
|
|
51
|
+
match_length = ord(src.read(1)) % 16
|
|
52
|
+
else:
|
|
53
|
+
rewind = src.tell()
|
|
54
|
+
src.seek(last_length_half_byte)
|
|
55
|
+
match_length = ord(src.read(1)) // 16
|
|
56
|
+
src.seek(rewind)
|
|
57
|
+
last_length_half_byte = 0
|
|
58
|
+
|
|
59
|
+
if match_length == 15:
|
|
60
|
+
match_length = ord(src.read(1))
|
|
61
|
+
if match_length == 255:
|
|
62
|
+
match_length = struct.unpack("<H", src.read(2))[0]
|
|
63
|
+
if match_length == 0:
|
|
64
|
+
match_length = struct.unpack("<I", src.read(4))[0]
|
|
65
|
+
|
|
66
|
+
if match_length < 15 + 7:
|
|
67
|
+
raise ValueError("wrong match length")
|
|
68
|
+
|
|
69
|
+
match_length -= 15 + 7
|
|
70
|
+
match_length += 15
|
|
71
|
+
match_length += 7
|
|
72
|
+
match_length += 3
|
|
73
|
+
|
|
74
|
+
remaining = match_length
|
|
75
|
+
while remaining > 0:
|
|
76
|
+
match_size = min(remaining, match_offset)
|
|
77
|
+
dst += dst[-match_offset : (-match_offset + match_size) or None]
|
|
78
|
+
remaining -= match_size
|
|
79
|
+
|
|
80
|
+
return bytes(dst)
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
# https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-frs2/8cb5bae9-edf3-4833-9f0a-9d7e24218d3d
|
|
2
|
+
# https://winprotocoldoc.blob.core.windows.net/productionwindowsarchives/MS-XCA/[MS-XCA].pdf
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import io
|
|
6
|
+
import struct
|
|
7
|
+
from typing import BinaryIO, NamedTuple
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Symbol(NamedTuple):
|
|
11
|
+
length: int
|
|
12
|
+
symbol: int
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _read_16_bit(fh: BinaryIO) -> int:
|
|
16
|
+
return struct.unpack("<H", fh.read(2).rjust(2, b"\x00"))[0]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Node:
|
|
20
|
+
__slots__ = ("children", "is_leaf", "symbol")
|
|
21
|
+
|
|
22
|
+
def __init__(self, symbol: Symbol | None = None, is_leaf: bool = False):
|
|
23
|
+
self.symbol = symbol
|
|
24
|
+
self.is_leaf = is_leaf
|
|
25
|
+
self.children = [None, None]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _add_leaf(nodes: list[Node], idx: int, mask: int, bits: int) -> int:
|
|
29
|
+
node = nodes[0]
|
|
30
|
+
i = idx + 1
|
|
31
|
+
|
|
32
|
+
while bits > 1:
|
|
33
|
+
bits -= 1
|
|
34
|
+
childidx = (mask >> bits) & 1
|
|
35
|
+
if node.children[childidx] is None:
|
|
36
|
+
node.children[childidx] = nodes[i]
|
|
37
|
+
nodes[i].is_leaf = False
|
|
38
|
+
i += 1
|
|
39
|
+
node = node.children[childidx]
|
|
40
|
+
|
|
41
|
+
node.children[mask & 1] = nodes[idx]
|
|
42
|
+
return i
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _build_tree(buf: bytes) -> Node:
|
|
46
|
+
if len(buf) != 256:
|
|
47
|
+
raise ValueError("Not enough data for Huffman code tree")
|
|
48
|
+
|
|
49
|
+
nodes = [Node() for _ in range(1024)]
|
|
50
|
+
symbols: list[Symbol] = []
|
|
51
|
+
|
|
52
|
+
for i, c in enumerate(buf):
|
|
53
|
+
symbols.append(Symbol(c & 0x0F, i * 2))
|
|
54
|
+
symbols.append(Symbol((c >> 4) & 0x0F, i * 2 + 1))
|
|
55
|
+
|
|
56
|
+
symbols = sorted(symbols)
|
|
57
|
+
|
|
58
|
+
symbol_index_start = 0
|
|
59
|
+
for s in symbols:
|
|
60
|
+
if s.length > 0:
|
|
61
|
+
break
|
|
62
|
+
symbol_index_start += 1
|
|
63
|
+
|
|
64
|
+
mask = 0
|
|
65
|
+
bits = 1
|
|
66
|
+
|
|
67
|
+
root = nodes[0]
|
|
68
|
+
|
|
69
|
+
tree_index = 1
|
|
70
|
+
for symbol_index in range(symbol_index_start, 512):
|
|
71
|
+
s = symbols[symbol_index]
|
|
72
|
+
|
|
73
|
+
node = nodes[tree_index]
|
|
74
|
+
node.symbol = s.symbol
|
|
75
|
+
node.is_leaf = True
|
|
76
|
+
|
|
77
|
+
mask = (mask << s.length - bits) & 0xFFFFFFFF
|
|
78
|
+
bits = s.length
|
|
79
|
+
|
|
80
|
+
tree_index = _add_leaf(nodes, tree_index, mask, bits)
|
|
81
|
+
mask += 1
|
|
82
|
+
|
|
83
|
+
return root
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class BitString:
|
|
87
|
+
def __init__(self):
|
|
88
|
+
self.source = None
|
|
89
|
+
self.mask = 0
|
|
90
|
+
self.bits = 0
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def index(self) -> int:
|
|
94
|
+
return self.source.tell()
|
|
95
|
+
|
|
96
|
+
def init(self, fh: BinaryIO) -> None:
|
|
97
|
+
self.mask = (_read_16_bit(fh) << 16) + _read_16_bit(fh)
|
|
98
|
+
self.bits = 32
|
|
99
|
+
self.source = fh
|
|
100
|
+
|
|
101
|
+
def read(self, n: int) -> bytes:
|
|
102
|
+
return self.source.read(n)
|
|
103
|
+
|
|
104
|
+
def lookup(self, n: int) -> int:
|
|
105
|
+
if n == 0:
|
|
106
|
+
return 0
|
|
107
|
+
|
|
108
|
+
return self.mask >> (32 - n)
|
|
109
|
+
|
|
110
|
+
def skip(self, n: int) -> None:
|
|
111
|
+
self.mask = (self.mask << n) & 0xFFFFFFFF
|
|
112
|
+
self.bits -= n
|
|
113
|
+
if self.bits < 16:
|
|
114
|
+
self.mask += _read_16_bit(self.source) << (16 - self.bits)
|
|
115
|
+
self.bits += 16
|
|
116
|
+
|
|
117
|
+
def decode(self, root: Node) -> Symbol:
|
|
118
|
+
node = root
|
|
119
|
+
while not node.is_leaf:
|
|
120
|
+
bit = self.lookup(1)
|
|
121
|
+
self.skip(1)
|
|
122
|
+
node = node.children[bit]
|
|
123
|
+
return node.symbol
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def decompress(src: bytes | BinaryIO) -> bytes:
|
|
127
|
+
"""LZXPRESS decompress from a file-like object or bytes.
|
|
128
|
+
|
|
129
|
+
Decompresses until EOF of the input data.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
src: File-like object or bytes to decompress.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
The decompressed data.
|
|
136
|
+
"""
|
|
137
|
+
if not hasattr(src, "read"):
|
|
138
|
+
src = io.BytesIO(src)
|
|
139
|
+
|
|
140
|
+
dst = bytearray()
|
|
141
|
+
|
|
142
|
+
start_offset = src.tell()
|
|
143
|
+
src.seek(0, io.SEEK_END)
|
|
144
|
+
size = src.tell() - start_offset
|
|
145
|
+
src.seek(start_offset, io.SEEK_SET)
|
|
146
|
+
|
|
147
|
+
bitstring = BitString()
|
|
148
|
+
|
|
149
|
+
while src.tell() - start_offset < size:
|
|
150
|
+
root = _build_tree(src.read(256))
|
|
151
|
+
bitstring.init(src)
|
|
152
|
+
|
|
153
|
+
chunk_size = 0
|
|
154
|
+
while chunk_size < 65536 and src.tell() - start_offset < size:
|
|
155
|
+
symbol = bitstring.decode(root)
|
|
156
|
+
if symbol < 256:
|
|
157
|
+
dst.append(symbol)
|
|
158
|
+
chunk_size += 1
|
|
159
|
+
else:
|
|
160
|
+
symbol -= 256
|
|
161
|
+
length = symbol & 0x0F
|
|
162
|
+
symbol >>= 4
|
|
163
|
+
|
|
164
|
+
offset = (1 << symbol) + bitstring.lookup(symbol)
|
|
165
|
+
|
|
166
|
+
if length == 15:
|
|
167
|
+
length = ord(bitstring.read(1)) + 15
|
|
168
|
+
|
|
169
|
+
if length == 270:
|
|
170
|
+
length = _read_16_bit(bitstring.source)
|
|
171
|
+
|
|
172
|
+
bitstring.skip(symbol)
|
|
173
|
+
|
|
174
|
+
length += 3
|
|
175
|
+
|
|
176
|
+
remaining = length
|
|
177
|
+
while remaining > 0:
|
|
178
|
+
match_size = min(remaining, offset)
|
|
179
|
+
dst += dst[-offset : (-offset + match_size) or None]
|
|
180
|
+
remaining -= match_size
|
|
181
|
+
|
|
182
|
+
chunk_size += length
|
|
183
|
+
|
|
184
|
+
return bytes(dst)
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from io import BytesIO
|
|
4
|
+
from typing import BinaryIO
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def compress(src: bytes | BinaryIO) -> bytes:
|
|
8
|
+
"""Sevenbit compress from a file-like object or bytes.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
src: File-like object or bytes to compress.
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
The compressed data.
|
|
15
|
+
"""
|
|
16
|
+
if not hasattr(src, "read"):
|
|
17
|
+
src = BytesIO(src)
|
|
18
|
+
|
|
19
|
+
dst = bytearray()
|
|
20
|
+
|
|
21
|
+
val = 0
|
|
22
|
+
shift = 0
|
|
23
|
+
while True:
|
|
24
|
+
_byte = src.read(1)
|
|
25
|
+
if not len(_byte):
|
|
26
|
+
break
|
|
27
|
+
|
|
28
|
+
val |= (_byte[0] & 0x7F) << shift
|
|
29
|
+
shift += 7
|
|
30
|
+
|
|
31
|
+
if shift >= 8:
|
|
32
|
+
dst.append(val & 0xFF)
|
|
33
|
+
val >>= 8
|
|
34
|
+
shift -= 8
|
|
35
|
+
|
|
36
|
+
if val:
|
|
37
|
+
dst.append(val & 0xFF)
|
|
38
|
+
|
|
39
|
+
return bytes(dst)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def decompress(src: bytes | BinaryIO, wide: bool = False) -> bytes:
|
|
43
|
+
"""Sevenbit decompress from a file-like object or bytes.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
src: File-like object or bytes to decompress.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
The decompressed data.
|
|
50
|
+
"""
|
|
51
|
+
if not hasattr(src, "read"):
|
|
52
|
+
src = BytesIO(src)
|
|
53
|
+
|
|
54
|
+
dst = bytearray()
|
|
55
|
+
|
|
56
|
+
val = 0
|
|
57
|
+
shift = 0
|
|
58
|
+
while True:
|
|
59
|
+
_byte = src.read(1)
|
|
60
|
+
if not len(_byte):
|
|
61
|
+
break
|
|
62
|
+
|
|
63
|
+
val |= _byte[0] << shift
|
|
64
|
+
dst.append(val & 0x7F)
|
|
65
|
+
if wide:
|
|
66
|
+
dst.append(0)
|
|
67
|
+
|
|
68
|
+
val >>= 7
|
|
69
|
+
shift += 1
|
|
70
|
+
if shift == 7:
|
|
71
|
+
dst.append(val & 0x7F)
|
|
72
|
+
if wide:
|
|
73
|
+
dst.append(0)
|
|
74
|
+
val >>= 7
|
|
75
|
+
shift = 0
|
|
76
|
+
|
|
77
|
+
return bytes(dst)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# References:
|
|
2
|
+
# - https://github.com/google/snappy/blob/main/format_description.txt
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import io
|
|
6
|
+
import struct
|
|
7
|
+
from typing import BinaryIO
|
|
8
|
+
|
|
9
|
+
_H = struct.Struct("<H")
|
|
10
|
+
_I = struct.Struct("<I")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def varint(src: BinaryIO) -> int:
|
|
14
|
+
result = 0
|
|
15
|
+
shift = 0
|
|
16
|
+
|
|
17
|
+
while byte := src.read(1):
|
|
18
|
+
value = byte[0]
|
|
19
|
+
if value < 0x80:
|
|
20
|
+
return result | (value << shift)
|
|
21
|
+
result |= (value & 0x7F) << shift
|
|
22
|
+
shift += 7
|
|
23
|
+
|
|
24
|
+
raise EOFError("Unexpected EOF while reading varint")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def decompress(src: bytes | BinaryIO) -> bytes:
|
|
28
|
+
"""Snappy decompress from a file-like object or bytes.
|
|
29
|
+
|
|
30
|
+
Decompresses until the stored uncompressed length in the preamble.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
src: File-like object or bytes to decompress.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
The decompressed data.
|
|
37
|
+
"""
|
|
38
|
+
if not hasattr(src, "read"):
|
|
39
|
+
src = io.BytesIO(src)
|
|
40
|
+
|
|
41
|
+
dst = io.BytesIO()
|
|
42
|
+
|
|
43
|
+
uncompressed_length = varint(src)
|
|
44
|
+
|
|
45
|
+
while dst.tell() < uncompressed_length:
|
|
46
|
+
tag_byte = src.read(1)[0]
|
|
47
|
+
|
|
48
|
+
if (tag := tag_byte & 0b11) == 0:
|
|
49
|
+
# Literal
|
|
50
|
+
length = tag_byte >> 2
|
|
51
|
+
if length < 60:
|
|
52
|
+
length += 1
|
|
53
|
+
elif length == 60:
|
|
54
|
+
length = src.read(1)[0] + 1
|
|
55
|
+
elif length == 61:
|
|
56
|
+
length = _H.unpack(src.read(2))[0] + 1
|
|
57
|
+
elif length == 62:
|
|
58
|
+
length = _I.unpack(src.read(3) + b"\x00")[0] + 1
|
|
59
|
+
elif length == 63:
|
|
60
|
+
length = _I.unpack(src.read(4))[0] + 1
|
|
61
|
+
|
|
62
|
+
if len(buf := src.read(length)) < length:
|
|
63
|
+
raise EOFError("Unexpected EOF while reading literal")
|
|
64
|
+
|
|
65
|
+
dst.write(buf)
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
# Copy with 1, 2 or 4 byte offset
|
|
69
|
+
if tag == 1:
|
|
70
|
+
length = ((tag_byte >> 2) & 0b111) + 4
|
|
71
|
+
offset = ((tag_byte & 0b11100000) << 3) | src.read(1)[0]
|
|
72
|
+
elif tag == 2:
|
|
73
|
+
length = (tag_byte >> 2) + 1
|
|
74
|
+
offset = _H.unpack(src.read(2))[0]
|
|
75
|
+
else:
|
|
76
|
+
length = (tag_byte >> 2) + 1
|
|
77
|
+
offset = _I.unpack(src.read(4))[0]
|
|
78
|
+
|
|
79
|
+
dst_offset = dst.tell() - offset
|
|
80
|
+
buf = dst.getbuffer()[dst_offset : dst_offset + length].tobytes()
|
|
81
|
+
if offset - length <= 0:
|
|
82
|
+
buf = (buf * ((length // len(buf)) + 1))[:length]
|
|
83
|
+
|
|
84
|
+
dst.write(buf)
|
|
85
|
+
|
|
86
|
+
return dst.getvalue()
|