dfindexeddb 20241105__py3-none-any.whl → 20260205__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dfindexeddb/indexeddb/chromium/blink.py +116 -74
- dfindexeddb/indexeddb/chromium/definitions.py +240 -125
- dfindexeddb/indexeddb/chromium/record.py +651 -346
- dfindexeddb/indexeddb/chromium/sqlite.py +362 -0
- dfindexeddb/indexeddb/chromium/v8.py +100 -78
- dfindexeddb/indexeddb/cli.py +282 -121
- dfindexeddb/indexeddb/firefox/definitions.py +7 -4
- dfindexeddb/indexeddb/firefox/gecko.py +98 -74
- dfindexeddb/indexeddb/firefox/record.py +78 -26
- dfindexeddb/indexeddb/safari/definitions.py +5 -3
- dfindexeddb/indexeddb/safari/record.py +86 -53
- dfindexeddb/indexeddb/safari/webkit.py +85 -71
- dfindexeddb/indexeddb/types.py +4 -1
- dfindexeddb/leveldb/cli.py +146 -138
- dfindexeddb/leveldb/definitions.py +6 -2
- dfindexeddb/leveldb/descriptor.py +70 -56
- dfindexeddb/leveldb/ldb.py +39 -33
- dfindexeddb/leveldb/log.py +41 -30
- dfindexeddb/leveldb/plugins/chrome_notifications.py +30 -18
- dfindexeddb/leveldb/plugins/interface.py +5 -6
- dfindexeddb/leveldb/plugins/manager.py +10 -9
- dfindexeddb/leveldb/record.py +71 -62
- dfindexeddb/leveldb/utils.py +105 -13
- dfindexeddb/utils.py +36 -31
- dfindexeddb/version.py +2 -2
- dfindexeddb-20260205.dist-info/METADATA +171 -0
- dfindexeddb-20260205.dist-info/RECORD +41 -0
- {dfindexeddb-20241105.dist-info → dfindexeddb-20260205.dist-info}/WHEEL +1 -1
- dfindexeddb-20241105.dist-info/AUTHORS +0 -12
- dfindexeddb-20241105.dist-info/METADATA +0 -424
- dfindexeddb-20241105.dist-info/RECORD +0 -41
- {dfindexeddb-20241105.dist-info → dfindexeddb-20260205.dist-info}/entry_points.txt +0 -0
- {dfindexeddb-20241105.dist-info → dfindexeddb-20260205.dist-info/licenses}/LICENSE +0 -0
- {dfindexeddb-20241105.dist-info → dfindexeddb-20260205.dist-info}/top_level.txt +0 -0
|
@@ -14,14 +14,13 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
"""Parser for LevelDB Descriptor (MANIFEST) files."""
|
|
16
16
|
from __future__ import annotations
|
|
17
|
+
|
|
17
18
|
from collections import defaultdict
|
|
18
19
|
from dataclasses import dataclass, field
|
|
19
|
-
from typing import Generator, Optional
|
|
20
|
+
from typing import Dict, Generator, Optional
|
|
20
21
|
|
|
21
22
|
from dfindexeddb import errors
|
|
22
|
-
from dfindexeddb.leveldb import definitions
|
|
23
|
-
from dfindexeddb.leveldb import log
|
|
24
|
-
from dfindexeddb.leveldb import utils
|
|
23
|
+
from dfindexeddb.leveldb import definitions, log, utils
|
|
25
24
|
|
|
26
25
|
|
|
27
26
|
@dataclass
|
|
@@ -34,6 +33,7 @@ class InternalKey:
|
|
|
34
33
|
sequence_number: the sequence number.
|
|
35
34
|
key_type: the key type.
|
|
36
35
|
"""
|
|
36
|
+
|
|
37
37
|
offset: int
|
|
38
38
|
user_key: bytes
|
|
39
39
|
sequence_number: int
|
|
@@ -41,9 +41,7 @@ class InternalKey:
|
|
|
41
41
|
|
|
42
42
|
@classmethod
|
|
43
43
|
def FromDecoder(
|
|
44
|
-
cls,
|
|
45
|
-
decoder: utils.LevelDBDecoder,
|
|
46
|
-
base_offset: int = 0
|
|
44
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
47
45
|
) -> InternalKey:
|
|
48
46
|
"""Decodes an InternalKey from the current position of a LevelDBDecoder.
|
|
49
47
|
|
|
@@ -57,20 +55,22 @@ class InternalKey:
|
|
|
57
55
|
offset, slice_bytes = decoder.DecodeLengthPrefixedSlice()
|
|
58
56
|
|
|
59
57
|
if len(slice_bytes) < definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH:
|
|
60
|
-
raise errors.ParserError(
|
|
58
|
+
raise errors.ParserError("Insufficient bytes to parse InternalKey")
|
|
61
59
|
|
|
62
|
-
user_key = slice_bytes[
|
|
60
|
+
user_key = slice_bytes[: -definitions.SEQUENCE_LENGTH]
|
|
63
61
|
sequence_number = int.from_bytes(
|
|
64
|
-
slice_bytes[-definitions.SEQUENCE_LENGTH:],
|
|
65
|
-
byteorder=
|
|
66
|
-
signed=False
|
|
62
|
+
slice_bytes[-definitions.SEQUENCE_LENGTH :],
|
|
63
|
+
byteorder="little",
|
|
64
|
+
signed=False,
|
|
65
|
+
)
|
|
67
66
|
key_type = slice_bytes[-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
|
|
68
67
|
|
|
69
68
|
return cls(
|
|
70
69
|
offset=base_offset + offset,
|
|
71
70
|
user_key=user_key,
|
|
72
71
|
sequence_number=sequence_number,
|
|
73
|
-
key_type=key_type
|
|
72
|
+
key_type=key_type,
|
|
73
|
+
)
|
|
74
74
|
|
|
75
75
|
|
|
76
76
|
@dataclass
|
|
@@ -85,6 +85,7 @@ class NewFile(utils.FromDecoderMixin):
|
|
|
85
85
|
smallest: the smallest internal key.
|
|
86
86
|
largest: the largest internal key.
|
|
87
87
|
"""
|
|
88
|
+
|
|
88
89
|
offset: int
|
|
89
90
|
level: int
|
|
90
91
|
number: int
|
|
@@ -94,9 +95,7 @@ class NewFile(utils.FromDecoderMixin):
|
|
|
94
95
|
|
|
95
96
|
@classmethod
|
|
96
97
|
def FromDecoder(
|
|
97
|
-
cls,
|
|
98
|
-
decoder: utils.LevelDBDecoder,
|
|
99
|
-
base_offset: int = 0
|
|
98
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
100
99
|
) -> NewFile:
|
|
101
100
|
"""Decodes a NewFile from the current position of a LevelDBDecoder.
|
|
102
101
|
|
|
@@ -119,7 +118,8 @@ class NewFile(utils.FromDecoderMixin):
|
|
|
119
118
|
number=number,
|
|
120
119
|
file_size=file_size,
|
|
121
120
|
smallest=smallest,
|
|
122
|
-
largest=largest
|
|
121
|
+
largest=largest,
|
|
122
|
+
)
|
|
123
123
|
|
|
124
124
|
|
|
125
125
|
@dataclass
|
|
@@ -131,15 +131,14 @@ class CompactPointer(utils.FromDecoderMixin):
|
|
|
131
131
|
level: the level.
|
|
132
132
|
key: the key bytes.
|
|
133
133
|
"""
|
|
134
|
+
|
|
134
135
|
offset: int
|
|
135
136
|
level: int
|
|
136
137
|
key: bytes
|
|
137
138
|
|
|
138
139
|
@classmethod
|
|
139
140
|
def FromDecoder(
|
|
140
|
-
cls,
|
|
141
|
-
decoder: utils.LevelDBDecoder,
|
|
142
|
-
base_offset: int = 0
|
|
141
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
143
142
|
) -> CompactPointer:
|
|
144
143
|
"""Decodes a CompactPointer from the current position of a LevelDBDecoder.
|
|
145
144
|
|
|
@@ -164,15 +163,14 @@ class DeletedFile(utils.FromDecoderMixin):
|
|
|
164
163
|
level: the level.
|
|
165
164
|
number: the file number.
|
|
166
165
|
"""
|
|
166
|
+
|
|
167
167
|
offset: int
|
|
168
168
|
level: int
|
|
169
169
|
number: int
|
|
170
170
|
|
|
171
171
|
@classmethod
|
|
172
172
|
def FromDecoder(
|
|
173
|
-
cls,
|
|
174
|
-
decoder: utils.LevelDBDecoder,
|
|
175
|
-
base_offset: int = 0
|
|
173
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
176
174
|
) -> DeletedFile:
|
|
177
175
|
"""Decodes a DeletedFile from the current position of a LevelDBDecoder.
|
|
178
176
|
|
|
@@ -203,6 +201,7 @@ class VersionEdit(utils.FromDecoderMixin):
|
|
|
203
201
|
deleted_files: the list of DeletedFiles.
|
|
204
202
|
new_files: the list of NewFiles.
|
|
205
203
|
"""
|
|
204
|
+
|
|
206
205
|
offset: int
|
|
207
206
|
comparator: Optional[bytes] = None
|
|
208
207
|
log_number: Optional[int] = None
|
|
@@ -215,9 +214,7 @@ class VersionEdit(utils.FromDecoderMixin):
|
|
|
215
214
|
|
|
216
215
|
@classmethod
|
|
217
216
|
def FromDecoder(
|
|
218
|
-
cls,
|
|
219
|
-
decoder: utils.LevelDBDecoder,
|
|
220
|
-
base_offset: int = 0
|
|
217
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
221
218
|
) -> VersionEdit:
|
|
222
219
|
"""Decodes a VersionEdit from the current position of a LevelDBDecoder.
|
|
223
220
|
|
|
@@ -239,7 +236,8 @@ class VersionEdit(utils.FromDecoderMixin):
|
|
|
239
236
|
tag = definitions.VersionEditTags(tag_byte)
|
|
240
237
|
except TypeError as error:
|
|
241
238
|
raise errors.ParserError(
|
|
242
|
-
f
|
|
239
|
+
f"Invalid VersionEditTag at offset {offset}"
|
|
240
|
+
) from error
|
|
243
241
|
|
|
244
242
|
if tag == definitions.VersionEditTags.COMPARATOR:
|
|
245
243
|
_, version_edit.comparator = decoder.DecodeLengthPrefixedSlice()
|
|
@@ -253,15 +251,18 @@ class VersionEdit(utils.FromDecoderMixin):
|
|
|
253
251
|
_, version_edit.last_sequence = decoder.DecodeUint64Varint()
|
|
254
252
|
elif tag == definitions.VersionEditTags.COMPACT_POINTER:
|
|
255
253
|
compact_pointer = CompactPointer.FromDecoder(
|
|
256
|
-
decoder=decoder, base_offset=base_offset + offset
|
|
254
|
+
decoder=decoder, base_offset=base_offset + offset
|
|
255
|
+
)
|
|
257
256
|
version_edit.compact_pointers.append(compact_pointer)
|
|
258
257
|
elif tag == definitions.VersionEditTags.DELETED_FILE:
|
|
259
258
|
deleted_file = DeletedFile.FromDecoder(
|
|
260
|
-
decoder=decoder, base_offset=base_offset + offset
|
|
259
|
+
decoder=decoder, base_offset=base_offset + offset
|
|
260
|
+
)
|
|
261
261
|
version_edit.deleted_files.append(deleted_file)
|
|
262
262
|
elif tag == definitions.VersionEditTags.NEW_FILE:
|
|
263
263
|
file_metadata = NewFile.FromDecoder(
|
|
264
|
-
decoder=decoder, base_offset=base_offset + offset
|
|
264
|
+
decoder=decoder, base_offset=base_offset + offset
|
|
265
|
+
)
|
|
265
266
|
version_edit.new_files.append(file_metadata)
|
|
266
267
|
|
|
267
268
|
if decoder.NumRemainingBytes() == 0:
|
|
@@ -286,11 +287,12 @@ class LevelDBVersion:
|
|
|
286
287
|
"Deleted files" will typically no longer exist but may be forensically
|
|
287
288
|
recoverable.
|
|
288
289
|
"""
|
|
289
|
-
|
|
290
|
+
|
|
291
|
+
current_log: Optional[str]
|
|
290
292
|
version_edit_offset: int
|
|
291
|
-
last_sequence: int
|
|
292
|
-
active_files: dict[int, dict[
|
|
293
|
-
deleted_files: dict[int, dict[
|
|
293
|
+
last_sequence: Optional[int]
|
|
294
|
+
active_files: dict[int, dict[str, NewFile]]
|
|
295
|
+
deleted_files: dict[int, dict[str, DeletedFile]]
|
|
294
296
|
|
|
295
297
|
|
|
296
298
|
class FileReader:
|
|
@@ -302,6 +304,7 @@ class FileReader:
|
|
|
302
304
|
* records (PhysicalRecord)
|
|
303
305
|
* version edits (VersionEdit)
|
|
304
306
|
"""
|
|
307
|
+
|
|
305
308
|
def __init__(self, filename: str):
|
|
306
309
|
"""Initializes the Descriptor a.k.a. MANIFEST file.
|
|
307
310
|
|
|
@@ -310,7 +313,6 @@ class FileReader:
|
|
|
310
313
|
"""
|
|
311
314
|
self.filename = filename
|
|
312
315
|
|
|
313
|
-
|
|
314
316
|
def GetBlocks(self) -> Generator[log.Block, None, None]:
|
|
315
317
|
"""Returns an iterator of Block instances.
|
|
316
318
|
|
|
@@ -319,7 +321,7 @@ class FileReader:
|
|
|
319
321
|
Yields:
|
|
320
322
|
Block
|
|
321
323
|
"""
|
|
322
|
-
with open(self.filename,
|
|
324
|
+
with open(self.filename, "rb") as fh:
|
|
323
325
|
block = log.Block.FromStream(fh)
|
|
324
326
|
while block:
|
|
325
327
|
yield block
|
|
@@ -346,24 +348,33 @@ class FileReader:
|
|
|
346
348
|
VersionEdit
|
|
347
349
|
"""
|
|
348
350
|
buffer = bytearray()
|
|
349
|
-
offset =
|
|
351
|
+
offset = 0
|
|
350
352
|
for physical_record in self.GetPhysicalRecords():
|
|
351
|
-
if (
|
|
352
|
-
|
|
353
|
-
|
|
353
|
+
if (
|
|
354
|
+
physical_record.record_type
|
|
355
|
+
== definitions.LogFilePhysicalRecordType.FULL
|
|
356
|
+
):
|
|
354
357
|
offset = physical_record.contents_offset + physical_record.base_offset
|
|
355
|
-
version_edit = VersionEdit.FromBytes(
|
|
358
|
+
version_edit = VersionEdit.FromBytes(
|
|
359
|
+
physical_record.contents, base_offset=offset
|
|
360
|
+
)
|
|
356
361
|
yield version_edit
|
|
357
362
|
buffer = bytearray()
|
|
358
|
-
elif (
|
|
359
|
-
|
|
363
|
+
elif (
|
|
364
|
+
physical_record.record_type
|
|
365
|
+
== definitions.LogFilePhysicalRecordType.FIRST
|
|
366
|
+
):
|
|
360
367
|
offset = physical_record.contents_offset + physical_record.base_offset
|
|
361
368
|
buffer = bytearray(physical_record.contents)
|
|
362
|
-
elif (
|
|
363
|
-
|
|
369
|
+
elif (
|
|
370
|
+
physical_record.record_type
|
|
371
|
+
== definitions.LogFilePhysicalRecordType.MIDDLE
|
|
372
|
+
):
|
|
364
373
|
buffer.extend(bytearray(physical_record.contents))
|
|
365
|
-
elif (
|
|
366
|
-
|
|
374
|
+
elif (
|
|
375
|
+
physical_record.record_type
|
|
376
|
+
== definitions.LogFilePhysicalRecordType.LAST
|
|
377
|
+
):
|
|
367
378
|
buffer.extend(bytearray(physical_record.contents))
|
|
368
379
|
version_edit = VersionEdit.FromBytes(buffer, base_offset=offset)
|
|
369
380
|
yield version_edit
|
|
@@ -375,27 +386,30 @@ class FileReader:
|
|
|
375
386
|
Yields:
|
|
376
387
|
LevelDBVersion
|
|
377
388
|
"""
|
|
378
|
-
active_files = defaultdict(dict)
|
|
379
|
-
deleted_files = defaultdict(
|
|
389
|
+
active_files: Dict[int, dict[str, NewFile]] = defaultdict(dict)
|
|
390
|
+
deleted_files: Dict[int, dict[str, DeletedFile]] = defaultdict(dict)
|
|
380
391
|
current_log = None
|
|
381
392
|
|
|
382
393
|
for version_edit in self.GetVersionEdits():
|
|
383
394
|
if version_edit.log_number:
|
|
384
|
-
current_log = f
|
|
395
|
+
current_log = f"{version_edit.log_number:06d}.log"
|
|
385
396
|
|
|
386
397
|
for new_file in version_edit.new_files:
|
|
387
|
-
active_files[new_file.level][f
|
|
398
|
+
active_files[new_file.level][f"{new_file.number:06d}.ldb"] = new_file
|
|
388
399
|
|
|
389
400
|
for deleted_file in version_edit.deleted_files:
|
|
390
|
-
active_files[deleted_file.level].pop(f
|
|
391
|
-
deleted_files[deleted_file.level]
|
|
401
|
+
active_files[deleted_file.level].pop(f"{deleted_file.number:06d}.ldb")
|
|
402
|
+
deleted_files[deleted_file.level][
|
|
403
|
+
f"{deleted_file.number:06d}.ldb"
|
|
404
|
+
] = deleted_file
|
|
392
405
|
|
|
393
406
|
yield LevelDBVersion(
|
|
394
407
|
current_log=current_log,
|
|
395
|
-
active_files=
|
|
396
|
-
deleted_files=
|
|
408
|
+
active_files=active_files,
|
|
409
|
+
deleted_files=deleted_files,
|
|
397
410
|
version_edit_offset=version_edit.offset,
|
|
398
|
-
last_sequence=version_edit.last_sequence
|
|
411
|
+
last_sequence=version_edit.last_sequence,
|
|
412
|
+
)
|
|
399
413
|
|
|
400
414
|
def GetLatestVersion(self) -> Optional[LevelDBVersion]:
|
|
401
415
|
"""Returns the latest LevelDBVersion instance."""
|
dfindexeddb/leveldb/ldb.py
CHANGED
|
@@ -15,16 +15,15 @@
|
|
|
15
15
|
"""Parser for LevelDB Table (.ldb) files."""
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
|
-
from dataclasses import dataclass, field
|
|
19
18
|
import io
|
|
20
19
|
import os
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
21
|
from typing import BinaryIO, Iterable, Tuple
|
|
22
22
|
|
|
23
23
|
import snappy
|
|
24
24
|
import zstd
|
|
25
25
|
|
|
26
|
-
from dfindexeddb.leveldb import definitions
|
|
27
|
-
from dfindexeddb.leveldb import utils
|
|
26
|
+
from dfindexeddb.leveldb import definitions, utils
|
|
28
27
|
|
|
29
28
|
|
|
30
29
|
@dataclass
|
|
@@ -38,6 +37,7 @@ class KeyValueRecord:
|
|
|
38
37
|
sequence_number: the sequence number of the record.
|
|
39
38
|
record_type: the type of the record.
|
|
40
39
|
"""
|
|
40
|
+
|
|
41
41
|
offset: int
|
|
42
42
|
key: bytes
|
|
43
43
|
value: bytes
|
|
@@ -46,10 +46,7 @@ class KeyValueRecord:
|
|
|
46
46
|
|
|
47
47
|
@classmethod
|
|
48
48
|
def FromDecoder(
|
|
49
|
-
cls,
|
|
50
|
-
decoder: utils.LevelDBDecoder,
|
|
51
|
-
block_offset: int,
|
|
52
|
-
shared_key: bytes
|
|
49
|
+
cls, decoder: utils.LevelDBDecoder, block_offset: int, shared_key: bytes
|
|
53
50
|
) -> Tuple[KeyValueRecord, bytes]:
|
|
54
51
|
"""Decodes a ldb key value record.
|
|
55
52
|
|
|
@@ -68,17 +65,24 @@ class KeyValueRecord:
|
|
|
68
65
|
_, value = decoder.ReadBytes(value_length)
|
|
69
66
|
|
|
70
67
|
shared_key = shared_key[:shared_bytes] + key_delta
|
|
71
|
-
key = shared_key[
|
|
68
|
+
key = shared_key[: -definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
|
|
72
69
|
sequence_number = int.from_bytes(
|
|
73
|
-
|
|
70
|
+
shared_key[-definitions.SEQUENCE_LENGTH :],
|
|
71
|
+
byteorder="little",
|
|
72
|
+
signed=False,
|
|
73
|
+
)
|
|
74
74
|
key_type = shared_key[-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
|
|
75
75
|
record_type = definitions.InternalRecordType(key_type)
|
|
76
|
-
return
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
76
|
+
return (
|
|
77
|
+
cls(
|
|
78
|
+
offset=offset + block_offset,
|
|
79
|
+
key=key,
|
|
80
|
+
value=value,
|
|
81
|
+
sequence_number=sequence_number,
|
|
82
|
+
record_type=record_type,
|
|
83
|
+
),
|
|
84
|
+
shared_key,
|
|
85
|
+
)
|
|
82
86
|
|
|
83
87
|
|
|
84
88
|
@dataclass
|
|
@@ -89,6 +93,7 @@ class Block:
|
|
|
89
93
|
offset: the offset of the block.
|
|
90
94
|
block_offset:
|
|
91
95
|
"""
|
|
96
|
+
|
|
92
97
|
offset: int
|
|
93
98
|
block_offset: int
|
|
94
99
|
length: int
|
|
@@ -106,9 +111,9 @@ class Block:
|
|
|
106
111
|
def GetBuffer(self) -> bytes:
|
|
107
112
|
"""Returns the block buffer, decompressing if required."""
|
|
108
113
|
if self.IsSnappyCompressed():
|
|
109
|
-
return snappy.decompress(self.data)
|
|
114
|
+
return bytes(snappy.decompress(self.data))
|
|
110
115
|
if self.IsZstdCompressed():
|
|
111
|
-
return zstd.decompress(self.data)
|
|
116
|
+
return bytes(zstd.decompress(self.data))
|
|
112
117
|
return self.data
|
|
113
118
|
|
|
114
119
|
def GetRecords(self) -> Iterable[KeyValueRecord]:
|
|
@@ -126,18 +131,19 @@ class Block:
|
|
|
126
131
|
# num_restarts: uint32
|
|
127
132
|
decoder.stream.seek(-definitions.BLOCK_RESTART_ENTRY_LENGTH, os.SEEK_END)
|
|
128
133
|
_, num_restarts = decoder.DecodeUint32()
|
|
129
|
-
restarts_offset = (
|
|
130
|
-
|
|
131
|
-
|
|
134
|
+
restarts_offset = (decoder.stream.tell()) - (
|
|
135
|
+
(num_restarts + 1) * definitions.BLOCK_RESTART_ENTRY_LENGTH
|
|
136
|
+
)
|
|
132
137
|
|
|
133
138
|
decoder.stream.seek(restarts_offset)
|
|
134
139
|
_, offset = decoder.DecodeUint32()
|
|
135
140
|
decoder.stream.seek(offset)
|
|
136
|
-
key = b
|
|
141
|
+
key = b""
|
|
137
142
|
|
|
138
143
|
while decoder.stream.tell() < restarts_offset:
|
|
139
144
|
key_value_record, key = KeyValueRecord.FromDecoder(
|
|
140
|
-
decoder, self.block_offset, key
|
|
145
|
+
decoder, self.block_offset, key
|
|
146
|
+
)
|
|
141
147
|
yield key_value_record
|
|
142
148
|
|
|
143
149
|
# TODO: parse trailer of the block for restart points (where the full
|
|
@@ -154,6 +160,7 @@ class BlockHandle(utils.FromDecoderMixin):
|
|
|
154
160
|
block_offset: the offset of the block.
|
|
155
161
|
length: the length of the block.
|
|
156
162
|
"""
|
|
163
|
+
|
|
157
164
|
offset: int
|
|
158
165
|
block_offset: int
|
|
159
166
|
length: int
|
|
@@ -173,20 +180,18 @@ class BlockHandle(utils.FromDecoderMixin):
|
|
|
173
180
|
stream.seek(self.block_offset, os.SEEK_SET)
|
|
174
181
|
data = stream.read(self.length)
|
|
175
182
|
if len(data) != self.length:
|
|
176
|
-
raise ValueError(
|
|
183
|
+
raise ValueError("Could not read all of the block")
|
|
177
184
|
|
|
178
185
|
footer = stream.read(definitions.BLOCK_TRAILER_SIZE)
|
|
179
186
|
if len(footer) != definitions.BLOCK_TRAILER_SIZE:
|
|
180
|
-
raise ValueError(
|
|
187
|
+
raise ValueError("Could not read all of the block footer")
|
|
181
188
|
|
|
182
189
|
return Block(self.offset, self.block_offset, self.length, data, footer)
|
|
183
190
|
|
|
184
191
|
@classmethod
|
|
185
192
|
def FromDecoder(
|
|
186
|
-
cls:
|
|
187
|
-
|
|
188
|
-
base_offset: int = 0
|
|
189
|
-
) -> BlockHandle:
|
|
193
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
194
|
+
) -> BlockHandle:
|
|
190
195
|
"""Decodes a BlockHandle from the current position of a LevelDBDecoder.
|
|
191
196
|
|
|
192
197
|
Args:
|
|
@@ -220,10 +225,10 @@ class FileReader:
|
|
|
220
225
|
ValueError if the file has an invalid magic number at the end.
|
|
221
226
|
"""
|
|
222
227
|
self.filename = filename
|
|
223
|
-
with open(self.filename,
|
|
228
|
+
with open(self.filename, "rb") as fh:
|
|
224
229
|
fh.seek(-len(definitions.TABLE_MAGIC), os.SEEK_END)
|
|
225
230
|
if fh.read(len(definitions.TABLE_MAGIC)) != definitions.TABLE_MAGIC:
|
|
226
|
-
raise ValueError(f
|
|
231
|
+
raise ValueError(f"Invalid magic number in {self.filename}")
|
|
227
232
|
|
|
228
233
|
fh.seek(-definitions.TABLE_FOOTER_SIZE, os.SEEK_END)
|
|
229
234
|
# meta_handle, need to read first due to variable integers
|
|
@@ -239,11 +244,12 @@ class FileReader:
|
|
|
239
244
|
Yields:
|
|
240
245
|
Block.
|
|
241
246
|
"""
|
|
242
|
-
with open(self.filename,
|
|
247
|
+
with open(self.filename, "rb") as fh:
|
|
243
248
|
for key_value_record in self.index_block.GetRecords():
|
|
244
249
|
block_handle = BlockHandle.FromStream(
|
|
245
250
|
io.BytesIO(key_value_record.value),
|
|
246
|
-
base_offset=key_value_record.offset
|
|
251
|
+
base_offset=key_value_record.offset,
|
|
252
|
+
)
|
|
247
253
|
yield block_handle.Load(fh)
|
|
248
254
|
|
|
249
255
|
def GetKeyValueRecords(self) -> Iterable[KeyValueRecord]:
|
|
@@ -255,7 +261,7 @@ class FileReader:
|
|
|
255
261
|
for block in self.GetBlocks():
|
|
256
262
|
yield from block.GetRecords()
|
|
257
263
|
|
|
258
|
-
def RangeIter(self) -> Iterable[Tuple[bytes, bytes]]: #pylint: disable=C0103
|
|
264
|
+
def RangeIter(self) -> Iterable[Tuple[bytes, bytes]]: # pylint: disable=C0103
|
|
259
265
|
"""Returns an iterator of key-value pairs.
|
|
260
266
|
|
|
261
267
|
Yields:
|
dfindexeddb/leveldb/log.py
CHANGED
|
@@ -15,13 +15,12 @@
|
|
|
15
15
|
"""Parser for LevelDB Log (.log) files."""
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
|
-
from dataclasses import dataclass
|
|
19
18
|
import io
|
|
19
|
+
from dataclasses import dataclass
|
|
20
20
|
from typing import BinaryIO, Generator, Iterable, Optional
|
|
21
21
|
|
|
22
22
|
from dfindexeddb import errors
|
|
23
|
-
from dfindexeddb.leveldb import definitions
|
|
24
|
-
from dfindexeddb.leveldb import utils
|
|
23
|
+
from dfindexeddb.leveldb import definitions, utils
|
|
25
24
|
|
|
26
25
|
|
|
27
26
|
@dataclass
|
|
@@ -36,6 +35,7 @@ class ParsedInternalKey:
|
|
|
36
35
|
key: the record key.
|
|
37
36
|
value: the record value.
|
|
38
37
|
"""
|
|
38
|
+
|
|
39
39
|
offset: int
|
|
40
40
|
record_type: definitions.InternalRecordType
|
|
41
41
|
sequence_number: int
|
|
@@ -70,15 +70,16 @@ class ParsedInternalKey:
|
|
|
70
70
|
if record_type == definitions.InternalRecordType.VALUE:
|
|
71
71
|
_, value = decoder.DecodeBlobWithLength()
|
|
72
72
|
elif record_type == definitions.InternalRecordType.DELETED:
|
|
73
|
-
value =
|
|
73
|
+
value = b""
|
|
74
74
|
else:
|
|
75
|
-
raise ValueError(f
|
|
75
|
+
raise ValueError(f"Invalid record type {record_type}")
|
|
76
76
|
return cls(
|
|
77
77
|
offset=base_offset + offset,
|
|
78
78
|
record_type=record_type,
|
|
79
79
|
key=key,
|
|
80
80
|
value=value,
|
|
81
|
-
sequence_number=sequence_number
|
|
81
|
+
sequence_number=sequence_number,
|
|
82
|
+
)
|
|
82
83
|
|
|
83
84
|
|
|
84
85
|
@dataclass
|
|
@@ -91,6 +92,7 @@ class WriteBatch(utils.FromDecoderMixin):
|
|
|
91
92
|
count: the number of ParsedInternalKey in the batch.
|
|
92
93
|
records: the ParsedInternalKey parsed from the batch.
|
|
93
94
|
"""
|
|
95
|
+
|
|
94
96
|
offset: int
|
|
95
97
|
sequence_number: int
|
|
96
98
|
count: int
|
|
@@ -98,9 +100,7 @@ class WriteBatch(utils.FromDecoderMixin):
|
|
|
98
100
|
|
|
99
101
|
@classmethod
|
|
100
102
|
def FromDecoder(
|
|
101
|
-
|
|
102
|
-
decoder: utils.LevelDBDecoder,
|
|
103
|
-
base_offset: int = 0
|
|
103
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
104
104
|
) -> WriteBatch:
|
|
105
105
|
"""Parses a WriteBatch from a binary stream.
|
|
106
106
|
|
|
@@ -118,15 +118,17 @@ class WriteBatch(utils.FromDecoderMixin):
|
|
|
118
118
|
records = []
|
|
119
119
|
for relative_sequence_number in range(count):
|
|
120
120
|
record = ParsedInternalKey.FromDecoder(
|
|
121
|
-
decoder,
|
|
122
|
-
|
|
121
|
+
decoder,
|
|
122
|
+
base_offset + offset,
|
|
123
|
+
relative_sequence_number + sequence_number,
|
|
123
124
|
)
|
|
124
125
|
records.append(record)
|
|
125
126
|
return cls(
|
|
126
127
|
offset=base_offset + offset,
|
|
127
128
|
sequence_number=sequence_number,
|
|
128
129
|
count=count,
|
|
129
|
-
records=records
|
|
130
|
+
records=records,
|
|
131
|
+
)
|
|
130
132
|
|
|
131
133
|
|
|
132
134
|
@dataclass
|
|
@@ -142,6 +144,7 @@ class PhysicalRecord(utils.FromDecoderMixin):
|
|
|
142
144
|
contents: the record contents.
|
|
143
145
|
contents_offset: the offset of where the record contents are stored.
|
|
144
146
|
"""
|
|
147
|
+
|
|
145
148
|
base_offset: int
|
|
146
149
|
offset: int
|
|
147
150
|
checksum: int
|
|
@@ -154,9 +157,7 @@ class PhysicalRecord(utils.FromDecoderMixin):
|
|
|
154
157
|
|
|
155
158
|
@classmethod
|
|
156
159
|
def FromDecoder(
|
|
157
|
-
cls,
|
|
158
|
-
decoder: utils.LevelDBDecoder,
|
|
159
|
-
base_offset: int = 0
|
|
160
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
160
161
|
) -> Optional[PhysicalRecord]:
|
|
161
162
|
"""Decodes a PhysicalRecord from the current position of a LevelDBDecoder.
|
|
162
163
|
|
|
@@ -177,8 +178,9 @@ class PhysicalRecord(utils.FromDecoderMixin):
|
|
|
177
178
|
record_type = definitions.LogFilePhysicalRecordType(record_type_byte)
|
|
178
179
|
except ValueError as error:
|
|
179
180
|
raise errors.ParserError(
|
|
180
|
-
f
|
|
181
|
-
f
|
|
181
|
+
f"Error parsing record type of Physical Record at offset "
|
|
182
|
+
f"{offset + base_offset}"
|
|
183
|
+
) from error
|
|
182
184
|
contents_offset, contents = decoder.ReadBytes(length)
|
|
183
185
|
return cls(
|
|
184
186
|
base_offset=base_offset,
|
|
@@ -187,7 +189,8 @@ class PhysicalRecord(utils.FromDecoderMixin):
|
|
|
187
189
|
length=length,
|
|
188
190
|
record_type=record_type,
|
|
189
191
|
contents=contents,
|
|
190
|
-
contents_offset=contents_offset
|
|
192
|
+
contents_offset=contents_offset,
|
|
193
|
+
)
|
|
191
194
|
|
|
192
195
|
|
|
193
196
|
@dataclass
|
|
@@ -198,6 +201,7 @@ class Block:
|
|
|
198
201
|
offset: the block offset.
|
|
199
202
|
data: the block data.
|
|
200
203
|
"""
|
|
204
|
+
|
|
201
205
|
offset: int
|
|
202
206
|
data: bytes
|
|
203
207
|
|
|
@@ -266,7 +270,7 @@ class FileReader:
|
|
|
266
270
|
Yields:
|
|
267
271
|
a Block
|
|
268
272
|
"""
|
|
269
|
-
with open(self.filename,
|
|
273
|
+
with open(self.filename, "rb") as fh:
|
|
270
274
|
block = Block.FromStream(fh)
|
|
271
275
|
while block:
|
|
272
276
|
yield block
|
|
@@ -293,23 +297,30 @@ class FileReader:
|
|
|
293
297
|
WriteBatch
|
|
294
298
|
"""
|
|
295
299
|
buffer = bytearray()
|
|
296
|
-
offset =
|
|
300
|
+
offset = 0
|
|
297
301
|
for physical_record in self.GetPhysicalRecords():
|
|
298
|
-
if (
|
|
299
|
-
|
|
300
|
-
|
|
302
|
+
if (
|
|
303
|
+
physical_record.record_type
|
|
304
|
+
== definitions.LogFilePhysicalRecordType.FULL
|
|
305
|
+
):
|
|
301
306
|
offset = physical_record.contents_offset + physical_record.base_offset
|
|
302
|
-
yield WriteBatch.FromBytes(
|
|
307
|
+
yield WriteBatch.FromBytes(physical_record.contents, base_offset=offset)
|
|
303
308
|
buffer = bytearray()
|
|
304
|
-
elif (
|
|
305
|
-
|
|
309
|
+
elif (
|
|
310
|
+
physical_record.record_type
|
|
311
|
+
== definitions.LogFilePhysicalRecordType.FIRST
|
|
312
|
+
):
|
|
306
313
|
offset = physical_record.contents_offset + physical_record.base_offset
|
|
307
314
|
buffer = bytearray(physical_record.contents)
|
|
308
|
-
elif (
|
|
309
|
-
|
|
315
|
+
elif (
|
|
316
|
+
physical_record.record_type
|
|
317
|
+
== definitions.LogFilePhysicalRecordType.MIDDLE
|
|
318
|
+
):
|
|
310
319
|
buffer.extend(bytearray(physical_record.contents))
|
|
311
|
-
elif (
|
|
312
|
-
|
|
320
|
+
elif (
|
|
321
|
+
physical_record.record_type
|
|
322
|
+
== definitions.LogFilePhysicalRecordType.LAST
|
|
323
|
+
):
|
|
313
324
|
buffer.extend(bytearray(physical_record.contents))
|
|
314
325
|
yield WriteBatch.FromBytes(buffer, base_offset=offset)
|
|
315
326
|
buffer = bytearray()
|