dissect.util 3.24.dev4__cp314-cp314t-musllinux_1_2_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. dissect/util/__init__.py +20 -0
  2. dissect/util/_build.py +17 -0
  3. dissect/util/_native/__init__.pyi +3 -0
  4. dissect/util/_native/compression/__init__.pyi +3 -0
  5. dissect/util/_native/compression/lz4.pyi +7 -0
  6. dissect/util/_native/compression/lzo.pyi +3 -0
  7. dissect/util/_native/hash/__init__.py +3 -0
  8. dissect/util/_native/hash/crc32c.py +2 -0
  9. dissect/util/_native.cpython-314t-aarch64-linux-musl.so +0 -0
  10. dissect/util/compression/__init__.py +45 -0
  11. dissect/util/compression/lz4.py +95 -0
  12. dissect/util/compression/lzbitmap.py +130 -0
  13. dissect/util/compression/lzfse.py +467 -0
  14. dissect/util/compression/lznt1.py +92 -0
  15. dissect/util/compression/lzo.py +118 -0
  16. dissect/util/compression/lzvn.py +241 -0
  17. dissect/util/compression/lzxpress.py +80 -0
  18. dissect/util/compression/lzxpress_huffman.py +184 -0
  19. dissect/util/compression/sevenbit.py +77 -0
  20. dissect/util/compression/snappy.py +86 -0
  21. dissect/util/compression/xz.py +112 -0
  22. dissect/util/cpio.py +226 -0
  23. dissect/util/encoding/__init__.py +0 -0
  24. dissect/util/encoding/surrogateescape.py +21 -0
  25. dissect/util/exceptions.py +6 -0
  26. dissect/util/hash/__init__.py +28 -0
  27. dissect/util/hash/crc32.py +55 -0
  28. dissect/util/hash/crc32c.py +60 -0
  29. dissect/util/hash/jenkins.py +102 -0
  30. dissect/util/ldap.py +237 -0
  31. dissect/util/plist.py +156 -0
  32. dissect/util/sid.py +81 -0
  33. dissect/util/stream.py +772 -0
  34. dissect/util/tools/__init__.py +0 -0
  35. dissect/util/tools/dump_nskeyedarchiver.py +61 -0
  36. dissect/util/ts.py +295 -0
  37. dissect/util/xmemoryview.py +117 -0
  38. dissect_util-3.24.dev4.dist-info/METADATA +89 -0
  39. dissect_util-3.24.dev4.dist-info/RECORD +46 -0
  40. dissect_util-3.24.dev4.dist-info/WHEEL +5 -0
  41. dissect_util-3.24.dev4.dist-info/entry_points.txt +2 -0
  42. dissect_util-3.24.dev4.dist-info/licenses/COPYRIGHT +5 -0
  43. dissect_util-3.24.dev4.dist-info/licenses/LICENSE +201 -0
  44. dissect_util-3.24.dev4.dist-info/sboms/auditwheel.cdx.json +1 -0
  45. dissect_util-3.24.dev4.dist-info/top_level.txt +1 -0
  46. dissect_util.libs/libgcc_s-2d945d6c.so.1 +0 -0
@@ -0,0 +1,467 @@
1
+ # References:
2
+ # - https://github.com/lzfse/lzfse
3
+ from __future__ import annotations
4
+
5
+ import io
6
+ import struct
7
+ from typing import BinaryIO, NamedTuple
8
+
9
+ from dissect.util.compression import lzvn
10
+
11
+ LZFSE_ENDOFSTREAM_BLOCK_MAGIC = b"bvx$" # 0x24787662 (end of stream)
12
+ LZFSE_UNCOMPRESSED_BLOCK_MAGIC = b"bvx-" # 0x2d787662 (raw data)
13
+ LZFSE_COMPRESSEDV1_BLOCK_MAGIC = b"bvx1" # 0x31787662 (lzfse compressed, uncompressed tables)
14
+ LZFSE_COMPRESSEDV2_BLOCK_MAGIC = b"bvx2" # 0x32787662 (lzfse compressed, compressed tables)
15
+ LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC = b"bvxn" # 0x6e787662 (lzvn compressed)
16
+
17
+ # Throughout LZFSE we refer to "L", "M" and "D"; these will always appear as
18
+ # a triplet, and represent a "usual" LZ-style literal and match pair. "L"
19
+ # is the number of literal bytes, "M" is the number of match bytes, and "D"
20
+ # is the match "distance"; the distance in bytes between the current pointer
21
+ # and the start of the match.
22
+ LZFSE_ENCODE_L_SYMBOLS = 20
23
+ LZFSE_ENCODE_M_SYMBOLS = 20
24
+ LZFSE_ENCODE_D_SYMBOLS = 64
25
+ LZFSE_ENCODE_LITERAL_SYMBOLS = 256
26
+ LZFSE_ENCODE_L_STATES = 64
27
+ LZFSE_ENCODE_M_STATES = 64
28
+ LZFSE_ENCODE_D_STATES = 256
29
+ LZFSE_ENCODE_LITERAL_STATES = 1024
30
+ LZFSE_MATCHES_PER_BLOCK = 10000
31
+ LZFSE_LITERALS_PER_BLOCK = 4 * LZFSE_MATCHES_PER_BLOCK
32
+
33
+ # fmt: off
34
+ _lzfse_freq_nbits_table = (
35
+ 2, 3, 2, 5, 2, 3, 2, 8, 2, 3, 2, 5, 2, 3, 2, 14,
36
+ 2, 3, 2, 5, 2, 3, 2, 8, 2, 3, 2, 5, 2, 3, 2, 14
37
+ )
38
+ _lzfse_freq_value_table = (
39
+ 0, 2, 1, 4, 0, 3, 1, -1, 0, 2, 1, 5, 0, 3, 1, -1,
40
+ 0, 2, 1, 6, 0, 3, 1, -1, 0, 2, 1, 7, 0, 3, 1, -1
41
+ )
42
+
43
+ # The L, M, D data streams are all encoded as a "base" value, which is
44
+ # FSE-encoded, and an "extra bits" value, which is the difference between
45
+ # value and base, and is simply represented as a raw bit value (because it
46
+ # is the low-order bits of a larger number, not much entropy can be
47
+ # extracted from these bits by more complex encoding schemes). The following
48
+ # tables represent the number of low-order bits to encode separately and the
49
+ # base values for each of L, M, and D.
50
+ _l_extra_bits = (
51
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 5, 8
52
+ )
53
+ _l_base_value = (
54
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 28, 60
55
+ )
56
+ _m_extra_bits = (
57
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11
58
+ )
59
+ _m_base_value = (
60
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 56, 312
61
+ )
62
+ _d_extra_bits = (
63
+ 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
64
+ 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
65
+ 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11,
66
+ 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15
67
+ )
68
+ _d_base_value = (
69
+ 0, 1, 2, 3, 4, 6, 8, 10, 12, 16,
70
+ 20, 24, 28, 36, 44, 52, 60, 76, 92, 108,
71
+ 124, 156, 188, 220, 252, 316, 380, 444, 508, 636,
72
+ 764, 892, 1020, 1276, 1532, 1788, 2044, 2556, 3068, 3580,
73
+ 4092, 5116, 6140, 7164, 8188, 10236, 12284, 14332, 16380, 20476,
74
+ 24572, 28668, 32764, 40956, 49148, 57340, 65532, 81916, 98300, 114684,
75
+ 131068, 163836, 196604, 229372
76
+ )
77
+ # fmt: on
78
+
79
+
80
+ _I = struct.Struct("<I")
81
+
82
+
83
+ def _clz(n: int) -> int:
84
+ if n == 0:
85
+ return 32
86
+ return 32 - n.bit_length()
87
+
88
+
89
+ class LZFSECompressedBlockHeader(NamedTuple):
90
+ """LZFSE compressed block header."""
91
+
92
+ __struct_v1__ = struct.Struct(
93
+ "<IIIIIIi4HiHHH"
94
+ f"{LZFSE_ENCODE_L_SYMBOLS}H"
95
+ f"{LZFSE_ENCODE_M_SYMBOLS}H"
96
+ f"{LZFSE_ENCODE_D_SYMBOLS}H"
97
+ f"{LZFSE_ENCODE_LITERAL_SYMBOLS}H"
98
+ )
99
+ __struct_v2__ = struct.Struct("<I3Q")
100
+
101
+ n_raw_bytes: int
102
+ n_payload_bytes: int
103
+ n_literals: int
104
+ n_matches: int
105
+ n_literal_payload_bytes: int
106
+ n_lmd_payload_bytes: int
107
+ literal_bits: int
108
+ literal_state: tuple[int, int, int, int]
109
+ lmd_bits: int
110
+ l_state: int
111
+ m_state: int
112
+ d_state: int
113
+ l_freq: tuple[int, ...]
114
+ m_freq: tuple[int, ...]
115
+ d_freq: tuple[int, ...]
116
+ literal_freq: tuple[int, ...]
117
+
118
+ @classmethod
119
+ def parse_v1(cls, fh: BinaryIO) -> LZFSECompressedBlockHeader:
120
+ """Decode all fields from a v1 header."""
121
+ values = cls.__struct_v1__.unpack(fh.read(cls.__struct_v1__.size))
122
+
123
+ return cls(
124
+ n_raw_bytes=values[0],
125
+ n_payload_bytes=values[1],
126
+ n_literals=values[2],
127
+ n_matches=values[3],
128
+ n_literal_payload_bytes=values[4],
129
+ n_lmd_payload_bytes=values[5],
130
+ literal_bits=values[6],
131
+ literal_state=values[7:11],
132
+ lmd_bits=values[11],
133
+ l_state=values[12],
134
+ m_state=values[13],
135
+ d_state=values[14],
136
+ l_freq=values[15 : 15 + LZFSE_ENCODE_L_SYMBOLS],
137
+ m_freq=values[35 : 35 + LZFSE_ENCODE_M_SYMBOLS],
138
+ d_freq=values[55 : 55 + LZFSE_ENCODE_D_SYMBOLS],
139
+ literal_freq=values[119 : 119 + LZFSE_ENCODE_LITERAL_SYMBOLS],
140
+ )
141
+
142
+ @classmethod
143
+ def parse_v2(cls, fh: BinaryIO) -> LZFSECompressedBlockHeader:
144
+ """Decode all fields from a v2 header."""
145
+ values = cls.__struct_v2__.unpack(fh.read(cls.__struct_v2__.size))
146
+ v0, v1, v2 = values[1:4]
147
+
148
+ n_literal_payload_bytes = _get_field(v0, 20, 20)
149
+ n_lmd_payload_bytes = _get_field(v1, 40, 20)
150
+ n_payload_bytes = n_literal_payload_bytes + n_lmd_payload_bytes
151
+
152
+ freq_tables_size = _get_field(v2, 0, 32) - cls.__struct_v2__.size - 4 # exclude magic
153
+
154
+ if freq_tables_size == 0:
155
+ l_freq = (0,) * 20
156
+ m_freq = (0,) * 20
157
+ d_freq = (0,) * 64
158
+ literal_freq = (0,) * 256
159
+ else:
160
+ accum = 0
161
+ accum = int.from_bytes(fh.read(freq_tables_size), "little")
162
+ result = [0] * 720
163
+
164
+ for i in range(
165
+ LZFSE_ENCODE_L_SYMBOLS + LZFSE_ENCODE_M_SYMBOLS + LZFSE_ENCODE_D_SYMBOLS + LZFSE_ENCODE_LITERAL_SYMBOLS
166
+ ):
167
+ # Decode and store value
168
+ nbits, value = _decode_v1_freq_value(accum)
169
+ result[i] = value
170
+
171
+ # Consume nbits bits
172
+ accum >>= nbits
173
+
174
+ l_freq = tuple(result[0:20])
175
+ m_freq = tuple(result[20:40])
176
+ d_freq = tuple(result[40:104])
177
+ literal_freq = tuple(result[104:360])
178
+
179
+ return cls(
180
+ n_raw_bytes=values[0],
181
+ n_payload_bytes=n_payload_bytes,
182
+ n_literals=_get_field(v0, 0, 20),
183
+ n_matches=_get_field(v0, 40, 20),
184
+ n_literal_payload_bytes=n_literal_payload_bytes,
185
+ n_lmd_payload_bytes=n_lmd_payload_bytes,
186
+ literal_bits=_get_field(v0, 60, 3) - 7,
187
+ literal_state=(
188
+ _get_field(v1, 0, 10),
189
+ _get_field(v1, 10, 10),
190
+ _get_field(v1, 20, 10),
191
+ _get_field(v1, 30, 10),
192
+ ),
193
+ lmd_bits=_get_field(v1, 60, 3) - 7,
194
+ l_state=_get_field(v2, 32, 10),
195
+ m_state=_get_field(v2, 42, 10),
196
+ d_state=_get_field(v2, 52, 10),
197
+ l_freq=l_freq,
198
+ m_freq=m_freq,
199
+ d_freq=d_freq,
200
+ literal_freq=literal_freq,
201
+ )
202
+
203
+
204
+ def _get_field(value: int, offset: int, nbits: int) -> int:
205
+ """Extracts up to 32 bits from a 64-bit field beginning at offset, and zero-extends them to a 32-bit int.
206
+
207
+ If we number the bits of ``value`` from 0 (least significant) to 63 (most significant),
208
+ the result is bits ``offset`` to ``offset+nbits-1``.
209
+ """
210
+ return (value >> offset) & ((1 << nbits) - 1)
211
+
212
+
213
+ def _decode_v1_freq_value(bits: int) -> tuple[int, int]:
214
+ """Decode an entry value from next bits of stream."""
215
+ b = bits & 31
216
+ n = _lzfse_freq_nbits_table[b]
217
+
218
+ # Special cases for > 5 bits encoding
219
+ if n == 8:
220
+ value = 8 + ((bits >> 4) & 0xF)
221
+ elif n == 14:
222
+ value = 24 + ((bits >> 4) & 0x3FF)
223
+ else:
224
+ value = _lzfse_freq_value_table[b]
225
+
226
+ return n, value
227
+
228
+
229
+ class DecoderEntry(NamedTuple):
230
+ """Entry for one state in the decoder table."""
231
+
232
+ k: int # Number of bits to read
233
+ symbol: int # Emitted symbol
234
+ delta: int # Signed increment used to compute next state (+bias)
235
+
236
+
237
+ class ValueDecoderEntry(NamedTuple):
238
+ """Entry for one state in the value decoder table."""
239
+
240
+ total_bits: int # state bits + extra value bits = shift for next decode
241
+ value_bits: int # extra value bits
242
+ delta: int # state base (delta)
243
+ vbase: int # value base
244
+
245
+
246
+ def _init_decoder_table(nstates: int, freq: tuple[int, ...]) -> list[DecoderEntry]:
247
+ """Initialize decoder table ``T[nstates]``.
248
+
249
+ ``nstates = sum freq[i]`` is the number of states (a power of 2).
250
+ ``freq`` is a normalized histogram of symbol frequencies, with ``freq[i] >= 0``.
251
+
252
+ Some symbols may have a 0 frequency. In that case, they should not be present in the data.
253
+ """
254
+ table = []
255
+
256
+ n_clz = _clz(nstates)
257
+ sum_of_freq = 0
258
+
259
+ for i, f in enumerate(freq):
260
+ if f == 0:
261
+ # skip this symbol, no occurrences
262
+ continue
263
+
264
+ sum_of_freq += f
265
+ if sum_of_freq > nstates:
266
+ raise ValueError("Invalid frequency table")
267
+ k = _clz(f) - n_clz # shift needed to ensure N <= (F<<K) < 2*N
268
+ j0 = ((2 * nstates) >> k) - f
269
+
270
+ # Initialize all states S reached by this symbol: OFFSET <= S < OFFSET + F
271
+ table.extend(
272
+ DecoderEntry(
273
+ k=k if j < j0 else k - 1,
274
+ symbol=i,
275
+ delta=((f + j) << k) - nstates if j < j0 else (j - j0) << (k - 1),
276
+ )
277
+ for j in range(f)
278
+ )
279
+
280
+ return table
281
+
282
+
283
+ def _init_value_decoder_table(
284
+ nstates: int, freq: tuple[int, ...], symbol_vbits: tuple[int, ...], symbol_vbase: tuple[int, ...]
285
+ ) -> list[ValueDecoderEntry]:
286
+ """Initialize value decoder table ``T[nstates]``.
287
+
288
+ ``nstates = sum freq[i]`` is the number of states (a power of 2).
289
+ ``freq`` is a normalized histogram of symbol frequencies, with ``freq[i] >= 0``.
290
+ ``symbol_vbits`` and ``symbol_vbase`` are the number of value bits to read and the base value for each symbol.
291
+
292
+ Some symbols may have a 0 frequency. In that case, they should not be present in the data.
293
+ """
294
+ table = []
295
+
296
+ n_clz = _clz(nstates)
297
+ for i, f in enumerate(freq):
298
+ if f == 0:
299
+ # skip this symbol, no occurrences
300
+ continue
301
+
302
+ k = _clz(f) - n_clz # shift needed to ensure N <= (F<<K) < 2*N
303
+ j0 = ((2 * nstates) >> k) - f
304
+
305
+ # Initialize all states S reached by this symbol: OFFSET <= S < OFFSET + F
306
+ table.extend(
307
+ ValueDecoderEntry(
308
+ total_bits=k + symbol_vbits[i] if j < j0 else (k - 1) + symbol_vbits[i],
309
+ value_bits=symbol_vbits[i],
310
+ delta=(((f + j) << k) - nstates) if j < j0 else ((j - j0) << (k - 1)),
311
+ vbase=symbol_vbase[i],
312
+ )
313
+ for j in range(f)
314
+ )
315
+
316
+ return table
317
+
318
+
319
+ class _BitStream:
320
+ def __init__(self, data: bytes, nbits: int):
321
+ self.accum = int.from_bytes(data, "little")
322
+ self.nbits = nbits + (len(data) * 8)
323
+
324
+ def pull(self, n: int) -> int:
325
+ self.nbits -= n
326
+ result = self.accum >> self.nbits
327
+ self.accum &= (1 << self.nbits) - 1
328
+ return result
329
+
330
+
331
+ def _decode(state: int, decoder_table: list[DecoderEntry], in_stream: _BitStream) -> tuple[int, int]:
332
+ """Decode and return symbol using the decoder table, and update state."""
333
+ e = decoder_table[state]
334
+
335
+ # Update state from K bits of input + DELTA
336
+ state = e.delta + in_stream.pull(e.k)
337
+
338
+ # Return the symbol for this state
339
+ return state, e.symbol
340
+
341
+
342
+ def _value_decode(state: int, decoder_table: list[ValueDecoderEntry], in_stream: _BitStream) -> tuple[int, int]:
343
+ """Decode and return value using the decoder table, and update state."""
344
+ entry = decoder_table[state]
345
+
346
+ state_and_value_bits = in_stream.pull(entry.total_bits)
347
+ state = entry.delta + (state_and_value_bits >> entry.value_bits)
348
+
349
+ return state, entry.vbase + (state_and_value_bits & ((1 << entry.value_bits) - 1))
350
+
351
+
352
+ def _decode_lmd(
353
+ header: LZFSECompressedBlockHeader,
354
+ literals: list[int],
355
+ l_decoder: list[ValueDecoderEntry],
356
+ m_decoder: list[ValueDecoderEntry],
357
+ d_decoder: list[ValueDecoderEntry],
358
+ in_stream: _BitStream,
359
+ ) -> bytes:
360
+ symbols = header.n_matches
361
+
362
+ l_state = header.l_state
363
+ m_state = header.m_state
364
+ d_state = header.d_state
365
+
366
+ lit = io.BytesIO(bytes(literals))
367
+
368
+ # ruff: noqa: N806
369
+ L = 0
370
+ M = 0
371
+ D = None
372
+
373
+ dst = bytearray()
374
+
375
+ while symbols:
376
+ # Decode the next L, M, D symbol from the input stream
377
+ l_state, L = _value_decode(l_state, l_decoder, in_stream)
378
+ if (lit.tell() + L) >= LZFSE_LITERALS_PER_BLOCK + 64:
379
+ raise ValueError("Literal overflow")
380
+
381
+ m_state, M = _value_decode(m_state, m_decoder, in_stream)
382
+ d_state, new_d = _value_decode(d_state, d_decoder, in_stream)
383
+ D = new_d if new_d != 0 else D
384
+
385
+ if D is None or len(dst) + L < D:
386
+ raise ValueError("Invalid match distance")
387
+
388
+ dst += lit.read(L)
389
+
390
+ remaining = M
391
+ while remaining > 0:
392
+ match_size = min(remaining, D)
393
+ dst += dst[-D : (-D + match_size) or None]
394
+ remaining -= match_size
395
+
396
+ symbols -= 1
397
+
398
+ return bytes(dst)
399
+
400
+
401
+ def decompress(src: bytes | BinaryIO) -> bytes:
402
+ """LZFSE decompress from a file-like object or bytes.
403
+
404
+ Decompresses until EOF or EOS of the input data.
405
+
406
+ Args:
407
+ src: File-like object or bytes to decompress.
408
+
409
+ Returns:
410
+ The decompressed data.
411
+ """
412
+ if not hasattr(src, "read"):
413
+ src = io.BytesIO(src)
414
+
415
+ dst = bytearray()
416
+
417
+ while True:
418
+ magic = src.read(4)
419
+
420
+ if magic == LZFSE_ENDOFSTREAM_BLOCK_MAGIC:
421
+ break
422
+
423
+ if magic == LZFSE_UNCOMPRESSED_BLOCK_MAGIC:
424
+ (n_raw_bytes,) = _I.unpack(src.read(4))
425
+ if n_raw_bytes == 0:
426
+ continue
427
+
428
+ dst += src.read(n_raw_bytes)
429
+
430
+ elif magic in (LZFSE_COMPRESSEDV1_BLOCK_MAGIC, LZFSE_COMPRESSEDV2_BLOCK_MAGIC):
431
+ if magic == LZFSE_COMPRESSEDV1_BLOCK_MAGIC:
432
+ header = LZFSECompressedBlockHeader.parse_v1(src)
433
+ else:
434
+ header = LZFSECompressedBlockHeader.parse_v2(src)
435
+
436
+ literal_decoder = _init_decoder_table(LZFSE_ENCODE_LITERAL_STATES, header.literal_freq)
437
+ l_decoder = _init_value_decoder_table(LZFSE_ENCODE_L_STATES, header.l_freq, _l_extra_bits, _l_base_value)
438
+ m_decoder = _init_value_decoder_table(LZFSE_ENCODE_M_STATES, header.m_freq, _m_extra_bits, _m_base_value)
439
+ d_decoder = _init_value_decoder_table(LZFSE_ENCODE_D_STATES, header.d_freq, _d_extra_bits, _d_base_value)
440
+
441
+ in_stream = _BitStream(src.read(header.n_literal_payload_bytes), header.literal_bits)
442
+
443
+ literals = []
444
+ state0, state1, state2, state3 = header.literal_state
445
+ for _ in range(0, header.n_literals, 4):
446
+ state0, result = _decode(state0, literal_decoder, in_stream)
447
+ literals.append(result)
448
+ state1, result = _decode(state1, literal_decoder, in_stream)
449
+ literals.append(result)
450
+ state2, result = _decode(state2, literal_decoder, in_stream)
451
+ literals.append(result)
452
+ state3, result = _decode(state3, literal_decoder, in_stream)
453
+ literals.append(result)
454
+
455
+ in_stream = _BitStream(src.read(header.n_lmd_payload_bytes), header.lmd_bits)
456
+ dst += _decode_lmd(header, literals, l_decoder, m_decoder, d_decoder, in_stream)
457
+
458
+ elif magic == LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC:
459
+ (n_raw_bytes,) = _I.unpack(src.read(4))
460
+ (n_payload_bytes,) = _I.unpack(src.read(4))
461
+
462
+ dst += lzvn.decompress(src.read(n_payload_bytes))
463
+
464
+ else:
465
+ raise ValueError(f"Invalid LZFSE block magic: {magic!r}")
466
+
467
+ return bytes(dst)
@@ -0,0 +1,92 @@
1
+ # Reference: https://github.com/google/rekall/blob/master/rekall-core/rekall/plugins/filesystems/lznt1.py
2
+ from __future__ import annotations
3
+
4
+ import array
5
+ import io
6
+ import struct
7
+ from typing import BinaryIO
8
+
9
+
10
+ def _get_displacement(offset: int) -> int:
11
+ """Calculate the displacement."""
12
+ result = 0
13
+ while offset >= 0x10:
14
+ offset >>= 1
15
+ result += 1
16
+
17
+ return result
18
+
19
+
20
+ DISPLACEMENT_TABLE = array.array("B", [_get_displacement(x) for x in range(8192)])
21
+
22
+ COMPRESSED_MASK = 1 << 15
23
+ SIGNATURE_MASK = 3 << 12
24
+ SIZE_MASK = (1 << 12) - 1
25
+ TAG_MASKS = [(1 << i) for i in range(8)]
26
+
27
+
28
+ def decompress(src: bytes | BinaryIO) -> bytes:
29
+ """LZNT1 decompress from a file-like object or bytes.
30
+
31
+ Args:
32
+ src: File-like object or bytes to decompress.
33
+
34
+ Returns:
35
+ The decompressed data.
36
+ """
37
+ if not hasattr(src, "read"):
38
+ src = io.BytesIO(src)
39
+
40
+ offset = src.tell()
41
+ src.seek(0, io.SEEK_END)
42
+ size = src.tell() - offset
43
+ src.seek(offset)
44
+
45
+ dst = io.BytesIO()
46
+
47
+ while src.tell() - offset < size:
48
+ block_offset = src.tell()
49
+ uncompressed_chunk_offset = dst.tell()
50
+
51
+ block_header = struct.unpack("<H", src.read(2))[0]
52
+ if block_header & SIGNATURE_MASK != SIGNATURE_MASK:
53
+ break
54
+
55
+ hsize = block_header & SIZE_MASK
56
+
57
+ block_end = block_offset + hsize + 3
58
+
59
+ if block_header & COMPRESSED_MASK:
60
+ while src.tell() < block_end:
61
+ header = ord(src.read(1))
62
+ for mask in TAG_MASKS:
63
+ if src.tell() >= block_end:
64
+ break
65
+
66
+ if header & mask:
67
+ pointer = struct.unpack("<H", src.read(2))[0]
68
+ displacement = DISPLACEMENT_TABLE[dst.tell() - uncompressed_chunk_offset - 1]
69
+
70
+ symbol_offset = (pointer >> (12 - displacement)) + 1
71
+ symbol_length = (pointer & (0xFFF >> displacement)) + 3
72
+
73
+ dst.seek(-symbol_offset, io.SEEK_END)
74
+ data = dst.read(symbol_length)
75
+
76
+ # Pad the data to make it fit.
77
+ if 0 < len(data) < symbol_length:
78
+ data = data * (symbol_length // len(data) + 1)
79
+ data = data[:symbol_length]
80
+
81
+ dst.seek(0, io.SEEK_END)
82
+ dst.write(data)
83
+ else:
84
+ data = src.read(1)
85
+ dst.write(data)
86
+
87
+ else:
88
+ # Block is not compressed
89
+ data = src.read(hsize + 1)
90
+ dst.write(data)
91
+
92
+ return dst.getvalue()
@@ -0,0 +1,118 @@
1
+ # References:
2
+ # - https://github.com/FFmpeg/FFmpeg/blob/master/libavutil/lzo.c
3
+ # - https://docs.kernel.org/staging/lzo.html
4
+ # - https://github.com/torvalds/linux/blob/master/lib/lzo/lzo1x_decompress_safe.c
5
+ from __future__ import annotations
6
+
7
+ import io
8
+ import struct
9
+ from typing import BinaryIO
10
+
11
+ MAX_READ_LENGTH = (1 << 32) - 1000
12
+
13
+
14
+ def _read_length(src: BinaryIO, val: int, mask: int) -> int:
15
+ if (length := val & mask) != 0:
16
+ return length
17
+
18
+ while (val := src.read(1)[0]) == 0:
19
+ if length >= MAX_READ_LENGTH:
20
+ raise ValueError("Invalid encoded length")
21
+ length += 255
22
+
23
+ return length + mask + val
24
+
25
+
26
+ def decompress(src: bytes | BinaryIO, header: bool = True, buflen: int = -1) -> bytes:
27
+ """LZO decompress from a file-like object or bytes. Assumes no header.
28
+
29
+ Arguments are largely compatible with python-lzo API.
30
+
31
+ Args:
32
+ src: File-like object or bytes to decompress.
33
+ header: Whether the metadata header is included in the input.
34
+ buflen: If ``header`` is ``False``, a buffer length in bytes must be given that will fit the output.
35
+
36
+ Returns:
37
+ The decompressed data.
38
+ """
39
+ if not hasattr(src, "read"):
40
+ src = io.BytesIO(src)
41
+
42
+ dst = bytearray()
43
+
44
+ if header:
45
+ byte = src.read(1)[0]
46
+ if byte not in [0xF0, 0xF1]:
47
+ raise ValueError("Invalid header value")
48
+ out_len = struct.unpack("<I", src.read(4))[0]
49
+ else:
50
+ out_len = buflen
51
+
52
+ val = src.read(1)[0]
53
+ offset = src.tell()
54
+ if src.seek(5) == 5 and val == 17:
55
+ src.seek(offset)
56
+ _ = src.read(1) # This would be the bitstream version but we don't currently use it
57
+ val = src.read(1)[0]
58
+ else:
59
+ src.seek(offset)
60
+
61
+ if val > 17:
62
+ dst += src.read(val - 17)
63
+ val = src.read(1)[0]
64
+
65
+ if val < 16:
66
+ raise ValueError("Invalid LZO stream")
67
+
68
+ state = 0
69
+ while True:
70
+ if val > 15:
71
+ if val > 63:
72
+ # Copy 3-8 bytes from block within 2kb distance
73
+ length = (val >> 5) - 1
74
+ dist = (src.read(1)[0] << 3) + ((val >> 2) & 7) + 1
75
+ elif val > 31:
76
+ # Copy of small block within 16kb distance
77
+ length = _read_length(src, val, 31)
78
+ val = src.read(1)[0]
79
+ dist = (src.read(1)[0] << 6) + (val >> 2) + 1
80
+ else:
81
+ # Copy of a block within 16...48kB distance
82
+ length = _read_length(src, val, 7)
83
+ dist = (1 << 14) + ((val & 8) << 11)
84
+ val = src.read(1)[0]
85
+ dist += (src.read(1)[0] << 6) + (val >> 2)
86
+ if dist == (1 << 14):
87
+ if length != 1:
88
+ raise ValueError("Invalid LZO stream")
89
+ break
90
+ elif not state:
91
+ # Copy 4 or more literals, depending on the last 4 bits
92
+ length = _read_length(src, val, 15)
93
+ dst += src.read(length + 3)
94
+ val = src.read(1)[0]
95
+ if val > 15:
96
+ continue
97
+ length = 1
98
+ dist = (1 << 11) + (src.read(1)[0] << 2) + (val >> 2) + 1
99
+ else:
100
+ length = 0
101
+ dist = (src.read(1)[0] << 2) + (val >> 2) + 1
102
+
103
+ remaining = length + 2
104
+ while remaining > 0:
105
+ match_size = min(remaining, dist)
106
+ dst += dst[-dist : (-dist + match_size) or None]
107
+ remaining -= match_size
108
+
109
+ # State is often encoded in the last 2 bits of the value, and used in subsequent iterations
110
+ state = length = val & 3
111
+ dst += src.read(length)
112
+
113
+ if len(dst) == out_len:
114
+ break
115
+
116
+ val = src.read(1)[0]
117
+
118
+ return bytes(dst)