dfindexeddb 20241031__py3-none-any.whl → 20251109__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dfindexeddb/indexeddb/chromium/blink.py +116 -74
- dfindexeddb/indexeddb/chromium/definitions.py +152 -124
- dfindexeddb/indexeddb/chromium/record.py +536 -348
- dfindexeddb/indexeddb/chromium/v8.py +112 -141
- dfindexeddb/indexeddb/cli.py +125 -114
- dfindexeddb/indexeddb/firefox/definitions.py +7 -4
- dfindexeddb/indexeddb/firefox/gecko.py +103 -79
- dfindexeddb/indexeddb/firefox/record.py +66 -24
- dfindexeddb/indexeddb/safari/definitions.py +12 -10
- dfindexeddb/indexeddb/safari/record.py +68 -51
- dfindexeddb/indexeddb/safari/webkit.py +112 -189
- dfindexeddb/indexeddb/types.py +5 -2
- dfindexeddb/leveldb/cli.py +146 -131
- dfindexeddb/leveldb/definitions.py +6 -2
- dfindexeddb/leveldb/descriptor.py +75 -45
- dfindexeddb/leveldb/ldb.py +39 -30
- dfindexeddb/leveldb/log.py +44 -27
- dfindexeddb/leveldb/plugins/chrome_notifications.py +30 -18
- dfindexeddb/leveldb/plugins/interface.py +5 -6
- dfindexeddb/leveldb/plugins/manager.py +11 -10
- dfindexeddb/leveldb/record.py +71 -62
- dfindexeddb/leveldb/utils.py +21 -13
- dfindexeddb/utils.py +35 -30
- dfindexeddb/version.py +2 -2
- dfindexeddb-20251109.dist-info/METADATA +222 -0
- dfindexeddb-20251109.dist-info/RECORD +40 -0
- {dfindexeddb-20241031.dist-info → dfindexeddb-20251109.dist-info}/WHEEL +1 -1
- dfindexeddb-20241031.dist-info/AUTHORS +0 -12
- dfindexeddb-20241031.dist-info/METADATA +0 -424
- dfindexeddb-20241031.dist-info/RECORD +0 -41
- {dfindexeddb-20241031.dist-info → dfindexeddb-20251109.dist-info}/entry_points.txt +0 -0
- {dfindexeddb-20241031.dist-info → dfindexeddb-20251109.dist-info/licenses}/LICENSE +0 -0
- {dfindexeddb-20241031.dist-info → dfindexeddb-20251109.dist-info}/top_level.txt +0 -0
|
@@ -14,14 +14,13 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
"""Parser for LevelDB Descriptor (MANIFEST) files."""
|
|
16
16
|
from __future__ import annotations
|
|
17
|
+
|
|
17
18
|
from collections import defaultdict
|
|
18
19
|
from dataclasses import dataclass, field
|
|
19
|
-
from typing import Generator, Optional
|
|
20
|
+
from typing import Dict, Generator, Optional
|
|
20
21
|
|
|
21
22
|
from dfindexeddb import errors
|
|
22
|
-
from dfindexeddb.leveldb import definitions
|
|
23
|
-
from dfindexeddb.leveldb import log
|
|
24
|
-
from dfindexeddb.leveldb import utils
|
|
23
|
+
from dfindexeddb.leveldb import definitions, log, utils
|
|
25
24
|
|
|
26
25
|
|
|
27
26
|
@dataclass
|
|
@@ -34,6 +33,7 @@ class InternalKey:
|
|
|
34
33
|
sequence_number: the sequence number.
|
|
35
34
|
key_type: the key type.
|
|
36
35
|
"""
|
|
36
|
+
|
|
37
37
|
offset: int
|
|
38
38
|
user_key: bytes
|
|
39
39
|
sequence_number: int
|
|
@@ -41,7 +41,8 @@ class InternalKey:
|
|
|
41
41
|
|
|
42
42
|
@classmethod
|
|
43
43
|
def FromDecoder(
|
|
44
|
-
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
44
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
45
|
+
) -> InternalKey:
|
|
45
46
|
"""Decodes an InternalKey from the current position of a LevelDBDecoder.
|
|
46
47
|
|
|
47
48
|
Args:
|
|
@@ -54,20 +55,22 @@ class InternalKey:
|
|
|
54
55
|
offset, slice_bytes = decoder.DecodeLengthPrefixedSlice()
|
|
55
56
|
|
|
56
57
|
if len(slice_bytes) < definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH:
|
|
57
|
-
raise errors.ParserError(
|
|
58
|
+
raise errors.ParserError("Insufficient bytes to parse InternalKey")
|
|
58
59
|
|
|
59
|
-
user_key = slice_bytes[
|
|
60
|
+
user_key = slice_bytes[: -definitions.SEQUENCE_LENGTH]
|
|
60
61
|
sequence_number = int.from_bytes(
|
|
61
|
-
slice_bytes[-definitions.SEQUENCE_LENGTH:],
|
|
62
|
-
byteorder=
|
|
63
|
-
signed=False
|
|
62
|
+
slice_bytes[-definitions.SEQUENCE_LENGTH :],
|
|
63
|
+
byteorder="little",
|
|
64
|
+
signed=False,
|
|
65
|
+
)
|
|
64
66
|
key_type = slice_bytes[-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
|
|
65
67
|
|
|
66
68
|
return cls(
|
|
67
69
|
offset=base_offset + offset,
|
|
68
70
|
user_key=user_key,
|
|
69
71
|
sequence_number=sequence_number,
|
|
70
|
-
key_type=key_type
|
|
72
|
+
key_type=key_type,
|
|
73
|
+
)
|
|
71
74
|
|
|
72
75
|
|
|
73
76
|
@dataclass
|
|
@@ -82,6 +85,7 @@ class NewFile(utils.FromDecoderMixin):
|
|
|
82
85
|
smallest: the smallest internal key.
|
|
83
86
|
largest: the largest internal key.
|
|
84
87
|
"""
|
|
88
|
+
|
|
85
89
|
offset: int
|
|
86
90
|
level: int
|
|
87
91
|
number: int
|
|
@@ -91,7 +95,8 @@ class NewFile(utils.FromDecoderMixin):
|
|
|
91
95
|
|
|
92
96
|
@classmethod
|
|
93
97
|
def FromDecoder(
|
|
94
|
-
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
98
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
99
|
+
) -> NewFile:
|
|
95
100
|
"""Decodes a NewFile from the current position of a LevelDBDecoder.
|
|
96
101
|
|
|
97
102
|
Args:
|
|
@@ -113,7 +118,8 @@ class NewFile(utils.FromDecoderMixin):
|
|
|
113
118
|
number=number,
|
|
114
119
|
file_size=file_size,
|
|
115
120
|
smallest=smallest,
|
|
116
|
-
largest=largest
|
|
121
|
+
largest=largest,
|
|
122
|
+
)
|
|
117
123
|
|
|
118
124
|
|
|
119
125
|
@dataclass
|
|
@@ -125,6 +131,7 @@ class CompactPointer(utils.FromDecoderMixin):
|
|
|
125
131
|
level: the level.
|
|
126
132
|
key: the key bytes.
|
|
127
133
|
"""
|
|
134
|
+
|
|
128
135
|
offset: int
|
|
129
136
|
level: int
|
|
130
137
|
key: bytes
|
|
@@ -156,13 +163,15 @@ class DeletedFile(utils.FromDecoderMixin):
|
|
|
156
163
|
level: the level.
|
|
157
164
|
number: the file number.
|
|
158
165
|
"""
|
|
166
|
+
|
|
159
167
|
offset: int
|
|
160
168
|
level: int
|
|
161
169
|
number: int
|
|
162
170
|
|
|
163
171
|
@classmethod
|
|
164
172
|
def FromDecoder(
|
|
165
|
-
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
173
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
174
|
+
) -> DeletedFile:
|
|
166
175
|
"""Decodes a DeletedFile from the current position of a LevelDBDecoder.
|
|
167
176
|
|
|
168
177
|
Args:
|
|
@@ -192,6 +201,7 @@ class VersionEdit(utils.FromDecoderMixin):
|
|
|
192
201
|
deleted_files: the list of DeletedFiles.
|
|
193
202
|
new_files: the list of NewFiles.
|
|
194
203
|
"""
|
|
204
|
+
|
|
195
205
|
offset: int
|
|
196
206
|
comparator: Optional[bytes] = None
|
|
197
207
|
log_number: Optional[int] = None
|
|
@@ -204,7 +214,8 @@ class VersionEdit(utils.FromDecoderMixin):
|
|
|
204
214
|
|
|
205
215
|
@classmethod
|
|
206
216
|
def FromDecoder(
|
|
207
|
-
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
217
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
218
|
+
) -> VersionEdit:
|
|
208
219
|
"""Decodes a VersionEdit from the current position of a LevelDBDecoder.
|
|
209
220
|
|
|
210
221
|
Args:
|
|
@@ -225,7 +236,8 @@ class VersionEdit(utils.FromDecoderMixin):
|
|
|
225
236
|
tag = definitions.VersionEditTags(tag_byte)
|
|
226
237
|
except TypeError as error:
|
|
227
238
|
raise errors.ParserError(
|
|
228
|
-
f
|
|
239
|
+
f"Invalid VersionEditTag at offset {offset}"
|
|
240
|
+
) from error
|
|
229
241
|
|
|
230
242
|
if tag == definitions.VersionEditTags.COMPARATOR:
|
|
231
243
|
_, version_edit.comparator = decoder.DecodeLengthPrefixedSlice()
|
|
@@ -239,15 +251,18 @@ class VersionEdit(utils.FromDecoderMixin):
|
|
|
239
251
|
_, version_edit.last_sequence = decoder.DecodeUint64Varint()
|
|
240
252
|
elif tag == definitions.VersionEditTags.COMPACT_POINTER:
|
|
241
253
|
compact_pointer = CompactPointer.FromDecoder(
|
|
242
|
-
decoder=decoder, base_offset=base_offset + offset
|
|
254
|
+
decoder=decoder, base_offset=base_offset + offset
|
|
255
|
+
)
|
|
243
256
|
version_edit.compact_pointers.append(compact_pointer)
|
|
244
257
|
elif tag == definitions.VersionEditTags.DELETED_FILE:
|
|
245
258
|
deleted_file = DeletedFile.FromDecoder(
|
|
246
|
-
decoder=decoder, base_offset=base_offset + offset
|
|
259
|
+
decoder=decoder, base_offset=base_offset + offset
|
|
260
|
+
)
|
|
247
261
|
version_edit.deleted_files.append(deleted_file)
|
|
248
262
|
elif tag == definitions.VersionEditTags.NEW_FILE:
|
|
249
263
|
file_metadata = NewFile.FromDecoder(
|
|
250
|
-
decoder=decoder, base_offset=base_offset + offset
|
|
264
|
+
decoder=decoder, base_offset=base_offset + offset
|
|
265
|
+
)
|
|
251
266
|
version_edit.new_files.append(file_metadata)
|
|
252
267
|
|
|
253
268
|
if decoder.NumRemainingBytes() == 0:
|
|
@@ -272,11 +287,12 @@ class LevelDBVersion:
|
|
|
272
287
|
"Deleted files" will typically no longer exist but may be forensically
|
|
273
288
|
recoverable.
|
|
274
289
|
"""
|
|
275
|
-
|
|
290
|
+
|
|
291
|
+
current_log: Optional[str]
|
|
276
292
|
version_edit_offset: int
|
|
277
|
-
last_sequence: int
|
|
278
|
-
active_files: dict[int, dict[
|
|
279
|
-
deleted_files: dict[int, dict[
|
|
293
|
+
last_sequence: Optional[int]
|
|
294
|
+
active_files: dict[int, dict[str, NewFile]]
|
|
295
|
+
deleted_files: dict[int, dict[str, DeletedFile]]
|
|
280
296
|
|
|
281
297
|
|
|
282
298
|
class FileReader:
|
|
@@ -288,6 +304,7 @@ class FileReader:
|
|
|
288
304
|
* records (PhysicalRecord)
|
|
289
305
|
* version edits (VersionEdit)
|
|
290
306
|
"""
|
|
307
|
+
|
|
291
308
|
def __init__(self, filename: str):
|
|
292
309
|
"""Initializes the Descriptor a.k.a. MANIFEST file.
|
|
293
310
|
|
|
@@ -296,7 +313,6 @@ class FileReader:
|
|
|
296
313
|
"""
|
|
297
314
|
self.filename = filename
|
|
298
315
|
|
|
299
|
-
|
|
300
316
|
def GetBlocks(self) -> Generator[log.Block, None, None]:
|
|
301
317
|
"""Returns an iterator of Block instances.
|
|
302
318
|
|
|
@@ -305,7 +321,7 @@ class FileReader:
|
|
|
305
321
|
Yields:
|
|
306
322
|
Block
|
|
307
323
|
"""
|
|
308
|
-
with open(self.filename,
|
|
324
|
+
with open(self.filename, "rb") as fh:
|
|
309
325
|
block = log.Block.FromStream(fh)
|
|
310
326
|
while block:
|
|
311
327
|
yield block
|
|
@@ -332,23 +348,33 @@ class FileReader:
|
|
|
332
348
|
VersionEdit
|
|
333
349
|
"""
|
|
334
350
|
buffer = bytearray()
|
|
351
|
+
offset = 0
|
|
335
352
|
for physical_record in self.GetPhysicalRecords():
|
|
336
|
-
if (
|
|
337
|
-
|
|
338
|
-
|
|
353
|
+
if (
|
|
354
|
+
physical_record.record_type
|
|
355
|
+
== definitions.LogFilePhysicalRecordType.FULL
|
|
356
|
+
):
|
|
339
357
|
offset = physical_record.contents_offset + physical_record.base_offset
|
|
340
|
-
version_edit = VersionEdit.FromBytes(
|
|
358
|
+
version_edit = VersionEdit.FromBytes(
|
|
359
|
+
physical_record.contents, base_offset=offset
|
|
360
|
+
)
|
|
341
361
|
yield version_edit
|
|
342
362
|
buffer = bytearray()
|
|
343
|
-
elif (
|
|
344
|
-
|
|
363
|
+
elif (
|
|
364
|
+
physical_record.record_type
|
|
365
|
+
== definitions.LogFilePhysicalRecordType.FIRST
|
|
366
|
+
):
|
|
345
367
|
offset = physical_record.contents_offset + physical_record.base_offset
|
|
346
368
|
buffer = bytearray(physical_record.contents)
|
|
347
|
-
elif (
|
|
348
|
-
|
|
369
|
+
elif (
|
|
370
|
+
physical_record.record_type
|
|
371
|
+
== definitions.LogFilePhysicalRecordType.MIDDLE
|
|
372
|
+
):
|
|
349
373
|
buffer.extend(bytearray(physical_record.contents))
|
|
350
|
-
elif (
|
|
351
|
-
|
|
374
|
+
elif (
|
|
375
|
+
physical_record.record_type
|
|
376
|
+
== definitions.LogFilePhysicalRecordType.LAST
|
|
377
|
+
):
|
|
352
378
|
buffer.extend(bytearray(physical_record.contents))
|
|
353
379
|
version_edit = VersionEdit.FromBytes(buffer, base_offset=offset)
|
|
354
380
|
yield version_edit
|
|
@@ -360,30 +386,34 @@ class FileReader:
|
|
|
360
386
|
Yields:
|
|
361
387
|
LevelDBVersion
|
|
362
388
|
"""
|
|
363
|
-
active_files = defaultdict(dict)
|
|
364
|
-
deleted_files = defaultdict(
|
|
389
|
+
active_files: Dict[int, dict[str, NewFile]] = defaultdict(dict)
|
|
390
|
+
deleted_files: Dict[int, dict[str, DeletedFile]] = defaultdict(dict)
|
|
365
391
|
current_log = None
|
|
366
392
|
|
|
367
393
|
for version_edit in self.GetVersionEdits():
|
|
368
394
|
if version_edit.log_number:
|
|
369
|
-
current_log = f
|
|
395
|
+
current_log = f"{version_edit.log_number:06d}.log"
|
|
370
396
|
|
|
371
397
|
for new_file in version_edit.new_files:
|
|
372
|
-
active_files[new_file.level][f
|
|
398
|
+
active_files[new_file.level][f"{new_file.number:06d}.ldb"] = new_file
|
|
373
399
|
|
|
374
400
|
for deleted_file in version_edit.deleted_files:
|
|
375
|
-
active_files[deleted_file.level].pop(f
|
|
376
|
-
deleted_files[deleted_file.level]
|
|
401
|
+
active_files[deleted_file.level].pop(f"{deleted_file.number:06d}.ldb")
|
|
402
|
+
deleted_files[deleted_file.level][
|
|
403
|
+
f"{deleted_file.number:06d}.ldb"
|
|
404
|
+
] = deleted_file
|
|
377
405
|
|
|
378
406
|
yield LevelDBVersion(
|
|
379
407
|
current_log=current_log,
|
|
380
|
-
active_files=
|
|
381
|
-
deleted_files=
|
|
408
|
+
active_files=active_files,
|
|
409
|
+
deleted_files=deleted_files,
|
|
382
410
|
version_edit_offset=version_edit.offset,
|
|
383
|
-
last_sequence=version_edit.last_sequence
|
|
411
|
+
last_sequence=version_edit.last_sequence,
|
|
412
|
+
)
|
|
384
413
|
|
|
385
|
-
def GetLatestVersion(self) -> LevelDBVersion:
|
|
414
|
+
def GetLatestVersion(self) -> Optional[LevelDBVersion]:
|
|
386
415
|
"""Returns the latest LevelDBVersion instance."""
|
|
416
|
+
latest = None
|
|
387
417
|
for version in self.GetVersions():
|
|
388
418
|
latest = version
|
|
389
419
|
return latest
|
dfindexeddb/leveldb/ldb.py
CHANGED
|
@@ -15,16 +15,15 @@
|
|
|
15
15
|
"""Parser for LevelDB Table (.ldb) files."""
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
|
-
from dataclasses import dataclass, field
|
|
19
18
|
import io
|
|
20
19
|
import os
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
21
|
from typing import BinaryIO, Iterable, Tuple
|
|
22
22
|
|
|
23
23
|
import snappy
|
|
24
24
|
import zstd
|
|
25
25
|
|
|
26
|
-
from dfindexeddb.leveldb import definitions
|
|
27
|
-
from dfindexeddb.leveldb import utils
|
|
26
|
+
from dfindexeddb.leveldb import definitions, utils
|
|
28
27
|
|
|
29
28
|
|
|
30
29
|
@dataclass
|
|
@@ -38,6 +37,7 @@ class KeyValueRecord:
|
|
|
38
37
|
sequence_number: the sequence number of the record.
|
|
39
38
|
record_type: the type of the record.
|
|
40
39
|
"""
|
|
40
|
+
|
|
41
41
|
offset: int
|
|
42
42
|
key: bytes
|
|
43
43
|
value: bytes
|
|
@@ -65,17 +65,24 @@ class KeyValueRecord:
|
|
|
65
65
|
_, value = decoder.ReadBytes(value_length)
|
|
66
66
|
|
|
67
67
|
shared_key = shared_key[:shared_bytes] + key_delta
|
|
68
|
-
key = shared_key[
|
|
68
|
+
key = shared_key[: -definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
|
|
69
69
|
sequence_number = int.from_bytes(
|
|
70
|
-
|
|
70
|
+
shared_key[-definitions.SEQUENCE_LENGTH :],
|
|
71
|
+
byteorder="little",
|
|
72
|
+
signed=False,
|
|
73
|
+
)
|
|
71
74
|
key_type = shared_key[-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
|
|
72
75
|
record_type = definitions.InternalRecordType(key_type)
|
|
73
|
-
return
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
76
|
+
return (
|
|
77
|
+
cls(
|
|
78
|
+
offset=offset + block_offset,
|
|
79
|
+
key=key,
|
|
80
|
+
value=value,
|
|
81
|
+
sequence_number=sequence_number,
|
|
82
|
+
record_type=record_type,
|
|
83
|
+
),
|
|
84
|
+
shared_key,
|
|
85
|
+
)
|
|
79
86
|
|
|
80
87
|
|
|
81
88
|
@dataclass
|
|
@@ -86,6 +93,7 @@ class Block:
|
|
|
86
93
|
offset: the offset of the block.
|
|
87
94
|
block_offset:
|
|
88
95
|
"""
|
|
96
|
+
|
|
89
97
|
offset: int
|
|
90
98
|
block_offset: int
|
|
91
99
|
length: int
|
|
@@ -103,9 +111,9 @@ class Block:
|
|
|
103
111
|
def GetBuffer(self) -> bytes:
|
|
104
112
|
"""Returns the block buffer, decompressing if required."""
|
|
105
113
|
if self.IsSnappyCompressed():
|
|
106
|
-
return snappy.decompress(self.data)
|
|
114
|
+
return bytes(snappy.decompress(self.data))
|
|
107
115
|
if self.IsZstdCompressed():
|
|
108
|
-
return zstd.decompress(self.data)
|
|
116
|
+
return bytes(zstd.decompress(self.data))
|
|
109
117
|
return self.data
|
|
110
118
|
|
|
111
119
|
def GetRecords(self) -> Iterable[KeyValueRecord]:
|
|
@@ -123,18 +131,19 @@ class Block:
|
|
|
123
131
|
# num_restarts: uint32
|
|
124
132
|
decoder.stream.seek(-definitions.BLOCK_RESTART_ENTRY_LENGTH, os.SEEK_END)
|
|
125
133
|
_, num_restarts = decoder.DecodeUint32()
|
|
126
|
-
restarts_offset = (
|
|
127
|
-
|
|
128
|
-
|
|
134
|
+
restarts_offset = (decoder.stream.tell()) - (
|
|
135
|
+
(num_restarts + 1) * definitions.BLOCK_RESTART_ENTRY_LENGTH
|
|
136
|
+
)
|
|
129
137
|
|
|
130
138
|
decoder.stream.seek(restarts_offset)
|
|
131
139
|
_, offset = decoder.DecodeUint32()
|
|
132
140
|
decoder.stream.seek(offset)
|
|
133
|
-
key = b
|
|
141
|
+
key = b""
|
|
134
142
|
|
|
135
143
|
while decoder.stream.tell() < restarts_offset:
|
|
136
144
|
key_value_record, key = KeyValueRecord.FromDecoder(
|
|
137
|
-
decoder, self.block_offset, key
|
|
145
|
+
decoder, self.block_offset, key
|
|
146
|
+
)
|
|
138
147
|
yield key_value_record
|
|
139
148
|
|
|
140
149
|
# TODO: parse trailer of the block for restart points (where the full
|
|
@@ -151,6 +160,7 @@ class BlockHandle(utils.FromDecoderMixin):
|
|
|
151
160
|
block_offset: the offset of the block.
|
|
152
161
|
length: the length of the block.
|
|
153
162
|
"""
|
|
163
|
+
|
|
154
164
|
offset: int
|
|
155
165
|
block_offset: int
|
|
156
166
|
length: int
|
|
@@ -170,20 +180,18 @@ class BlockHandle(utils.FromDecoderMixin):
|
|
|
170
180
|
stream.seek(self.block_offset, os.SEEK_SET)
|
|
171
181
|
data = stream.read(self.length)
|
|
172
182
|
if len(data) != self.length:
|
|
173
|
-
raise ValueError(
|
|
183
|
+
raise ValueError("Could not read all of the block")
|
|
174
184
|
|
|
175
185
|
footer = stream.read(definitions.BLOCK_TRAILER_SIZE)
|
|
176
186
|
if len(footer) != definitions.BLOCK_TRAILER_SIZE:
|
|
177
|
-
raise ValueError(
|
|
187
|
+
raise ValueError("Could not read all of the block footer")
|
|
178
188
|
|
|
179
189
|
return Block(self.offset, self.block_offset, self.length, data, footer)
|
|
180
190
|
|
|
181
191
|
@classmethod
|
|
182
192
|
def FromDecoder(
|
|
183
|
-
cls:
|
|
184
|
-
|
|
185
|
-
base_offset: int = 0
|
|
186
|
-
) -> BlockHandle:
|
|
193
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
194
|
+
) -> BlockHandle:
|
|
187
195
|
"""Decodes a BlockHandle from the current position of a LevelDBDecoder.
|
|
188
196
|
|
|
189
197
|
Args:
|
|
@@ -217,10 +225,10 @@ class FileReader:
|
|
|
217
225
|
ValueError if the file has an invalid magic number at the end.
|
|
218
226
|
"""
|
|
219
227
|
self.filename = filename
|
|
220
|
-
with open(self.filename,
|
|
228
|
+
with open(self.filename, "rb") as fh:
|
|
221
229
|
fh.seek(-len(definitions.TABLE_MAGIC), os.SEEK_END)
|
|
222
230
|
if fh.read(len(definitions.TABLE_MAGIC)) != definitions.TABLE_MAGIC:
|
|
223
|
-
raise ValueError(f
|
|
231
|
+
raise ValueError(f"Invalid magic number in {self.filename}")
|
|
224
232
|
|
|
225
233
|
fh.seek(-definitions.TABLE_FOOTER_SIZE, os.SEEK_END)
|
|
226
234
|
# meta_handle, need to read first due to variable integers
|
|
@@ -236,11 +244,12 @@ class FileReader:
|
|
|
236
244
|
Yields:
|
|
237
245
|
Block.
|
|
238
246
|
"""
|
|
239
|
-
with open(self.filename,
|
|
247
|
+
with open(self.filename, "rb") as fh:
|
|
240
248
|
for key_value_record in self.index_block.GetRecords():
|
|
241
249
|
block_handle = BlockHandle.FromStream(
|
|
242
250
|
io.BytesIO(key_value_record.value),
|
|
243
|
-
base_offset=key_value_record.offset
|
|
251
|
+
base_offset=key_value_record.offset,
|
|
252
|
+
)
|
|
244
253
|
yield block_handle.Load(fh)
|
|
245
254
|
|
|
246
255
|
def GetKeyValueRecords(self) -> Iterable[KeyValueRecord]:
|
|
@@ -252,11 +261,11 @@ class FileReader:
|
|
|
252
261
|
for block in self.GetBlocks():
|
|
253
262
|
yield from block.GetRecords()
|
|
254
263
|
|
|
255
|
-
def RangeIter(self) -> Iterable[Tuple[bytes, bytes]]: #pylint: disable=C0103
|
|
264
|
+
def RangeIter(self) -> Iterable[Tuple[bytes, bytes]]: # pylint: disable=C0103
|
|
256
265
|
"""Returns an iterator of key-value pairs.
|
|
257
266
|
|
|
258
267
|
Yields:
|
|
259
268
|
A tuple of key and value as bytes.
|
|
260
269
|
"""
|
|
261
270
|
for record in self.GetKeyValueRecords():
|
|
262
|
-
yield
|
|
271
|
+
yield record.key, record.value
|
dfindexeddb/leveldb/log.py
CHANGED
|
@@ -15,13 +15,12 @@
|
|
|
15
15
|
"""Parser for LevelDB Log (.log) files."""
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
|
-
from dataclasses import dataclass
|
|
19
18
|
import io
|
|
19
|
+
from dataclasses import dataclass
|
|
20
20
|
from typing import BinaryIO, Generator, Iterable, Optional
|
|
21
21
|
|
|
22
22
|
from dfindexeddb import errors
|
|
23
|
-
from dfindexeddb.leveldb import definitions
|
|
24
|
-
from dfindexeddb.leveldb import utils
|
|
23
|
+
from dfindexeddb.leveldb import definitions, utils
|
|
25
24
|
|
|
26
25
|
|
|
27
26
|
@dataclass
|
|
@@ -36,6 +35,7 @@ class ParsedInternalKey:
|
|
|
36
35
|
key: the record key.
|
|
37
36
|
value: the record value.
|
|
38
37
|
"""
|
|
38
|
+
|
|
39
39
|
offset: int
|
|
40
40
|
record_type: definitions.InternalRecordType
|
|
41
41
|
sequence_number: int
|
|
@@ -70,15 +70,16 @@ class ParsedInternalKey:
|
|
|
70
70
|
if record_type == definitions.InternalRecordType.VALUE:
|
|
71
71
|
_, value = decoder.DecodeBlobWithLength()
|
|
72
72
|
elif record_type == definitions.InternalRecordType.DELETED:
|
|
73
|
-
value =
|
|
73
|
+
value = b""
|
|
74
74
|
else:
|
|
75
|
-
raise ValueError(f
|
|
75
|
+
raise ValueError(f"Invalid record type {record_type}")
|
|
76
76
|
return cls(
|
|
77
77
|
offset=base_offset + offset,
|
|
78
78
|
record_type=record_type,
|
|
79
79
|
key=key,
|
|
80
80
|
value=value,
|
|
81
|
-
sequence_number=sequence_number
|
|
81
|
+
sequence_number=sequence_number,
|
|
82
|
+
)
|
|
82
83
|
|
|
83
84
|
|
|
84
85
|
@dataclass
|
|
@@ -91,6 +92,7 @@ class WriteBatch(utils.FromDecoderMixin):
|
|
|
91
92
|
count: the number of ParsedInternalKey in the batch.
|
|
92
93
|
records: the ParsedInternalKey parsed from the batch.
|
|
93
94
|
"""
|
|
95
|
+
|
|
94
96
|
offset: int
|
|
95
97
|
sequence_number: int
|
|
96
98
|
count: int
|
|
@@ -98,7 +100,7 @@ class WriteBatch(utils.FromDecoderMixin):
|
|
|
98
100
|
|
|
99
101
|
@classmethod
|
|
100
102
|
def FromDecoder(
|
|
101
|
-
|
|
103
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
102
104
|
) -> WriteBatch:
|
|
103
105
|
"""Parses a WriteBatch from a binary stream.
|
|
104
106
|
|
|
@@ -116,15 +118,17 @@ class WriteBatch(utils.FromDecoderMixin):
|
|
|
116
118
|
records = []
|
|
117
119
|
for relative_sequence_number in range(count):
|
|
118
120
|
record = ParsedInternalKey.FromDecoder(
|
|
119
|
-
decoder,
|
|
120
|
-
|
|
121
|
+
decoder,
|
|
122
|
+
base_offset + offset,
|
|
123
|
+
relative_sequence_number + sequence_number,
|
|
121
124
|
)
|
|
122
125
|
records.append(record)
|
|
123
126
|
return cls(
|
|
124
127
|
offset=base_offset + offset,
|
|
125
128
|
sequence_number=sequence_number,
|
|
126
129
|
count=count,
|
|
127
|
-
records=records
|
|
130
|
+
records=records,
|
|
131
|
+
)
|
|
128
132
|
|
|
129
133
|
|
|
130
134
|
@dataclass
|
|
@@ -132,13 +136,15 @@ class PhysicalRecord(utils.FromDecoderMixin):
|
|
|
132
136
|
"""A physical record from a leveldb log file.
|
|
133
137
|
|
|
134
138
|
Attributes:
|
|
135
|
-
|
|
139
|
+
base_offset: the base offset.
|
|
136
140
|
checksum: the record checksum.
|
|
137
141
|
length: the length of the record in bytes.
|
|
138
|
-
|
|
142
|
+
offset: the record offset.
|
|
143
|
+
record_type: the record type.
|
|
139
144
|
contents: the record contents.
|
|
140
145
|
contents_offset: the offset of where the record contents are stored.
|
|
141
146
|
"""
|
|
147
|
+
|
|
142
148
|
base_offset: int
|
|
143
149
|
offset: int
|
|
144
150
|
checksum: int
|
|
@@ -172,8 +178,9 @@ class PhysicalRecord(utils.FromDecoderMixin):
|
|
|
172
178
|
record_type = definitions.LogFilePhysicalRecordType(record_type_byte)
|
|
173
179
|
except ValueError as error:
|
|
174
180
|
raise errors.ParserError(
|
|
175
|
-
f
|
|
176
|
-
f
|
|
181
|
+
f"Error parsing record type of Physical Record at offset "
|
|
182
|
+
f"{offset + base_offset}"
|
|
183
|
+
) from error
|
|
177
184
|
contents_offset, contents = decoder.ReadBytes(length)
|
|
178
185
|
return cls(
|
|
179
186
|
base_offset=base_offset,
|
|
@@ -182,7 +189,8 @@ class PhysicalRecord(utils.FromDecoderMixin):
|
|
|
182
189
|
length=length,
|
|
183
190
|
record_type=record_type,
|
|
184
191
|
contents=contents,
|
|
185
|
-
contents_offset=contents_offset
|
|
192
|
+
contents_offset=contents_offset,
|
|
193
|
+
)
|
|
186
194
|
|
|
187
195
|
|
|
188
196
|
@dataclass
|
|
@@ -193,6 +201,7 @@ class Block:
|
|
|
193
201
|
offset: the block offset.
|
|
194
202
|
data: the block data.
|
|
195
203
|
"""
|
|
204
|
+
|
|
196
205
|
offset: int
|
|
197
206
|
data: bytes
|
|
198
207
|
|
|
@@ -237,7 +246,7 @@ class FileReader:
|
|
|
237
246
|
A Log FileReader provides read-only sequential iteration of serialized
|
|
238
247
|
structures in a leveldb logfile. These structures include:
|
|
239
248
|
* blocks (Block)
|
|
240
|
-
*
|
|
249
|
+
* physical records (PhysicalRecord)
|
|
241
250
|
* batches (WriteBatch) and
|
|
242
251
|
* key/value records (ParsedInternalKey).
|
|
243
252
|
|
|
@@ -261,7 +270,7 @@ class FileReader:
|
|
|
261
270
|
Yields:
|
|
262
271
|
a Block
|
|
263
272
|
"""
|
|
264
|
-
with open(self.filename,
|
|
273
|
+
with open(self.filename, "rb") as fh:
|
|
265
274
|
block = Block.FromStream(fh)
|
|
266
275
|
while block:
|
|
267
276
|
yield block
|
|
@@ -288,22 +297,30 @@ class FileReader:
|
|
|
288
297
|
WriteBatch
|
|
289
298
|
"""
|
|
290
299
|
buffer = bytearray()
|
|
300
|
+
offset = 0
|
|
291
301
|
for physical_record in self.GetPhysicalRecords():
|
|
292
|
-
if (
|
|
293
|
-
|
|
294
|
-
|
|
302
|
+
if (
|
|
303
|
+
physical_record.record_type
|
|
304
|
+
== definitions.LogFilePhysicalRecordType.FULL
|
|
305
|
+
):
|
|
295
306
|
offset = physical_record.contents_offset + physical_record.base_offset
|
|
296
|
-
yield WriteBatch.FromBytes(
|
|
307
|
+
yield WriteBatch.FromBytes(physical_record.contents, base_offset=offset)
|
|
297
308
|
buffer = bytearray()
|
|
298
|
-
elif (
|
|
299
|
-
|
|
309
|
+
elif (
|
|
310
|
+
physical_record.record_type
|
|
311
|
+
== definitions.LogFilePhysicalRecordType.FIRST
|
|
312
|
+
):
|
|
300
313
|
offset = physical_record.contents_offset + physical_record.base_offset
|
|
301
314
|
buffer = bytearray(physical_record.contents)
|
|
302
|
-
elif (
|
|
303
|
-
|
|
315
|
+
elif (
|
|
316
|
+
physical_record.record_type
|
|
317
|
+
== definitions.LogFilePhysicalRecordType.MIDDLE
|
|
318
|
+
):
|
|
304
319
|
buffer.extend(bytearray(physical_record.contents))
|
|
305
|
-
elif (
|
|
306
|
-
|
|
320
|
+
elif (
|
|
321
|
+
physical_record.record_type
|
|
322
|
+
== definitions.LogFilePhysicalRecordType.LAST
|
|
323
|
+
):
|
|
307
324
|
buffer.extend(bytearray(physical_record.contents))
|
|
308
325
|
yield WriteBatch.FromBytes(buffer, base_offset=offset)
|
|
309
326
|
buffer = bytearray()
|