dfindexeddb 20240331a0__py3-none-any.whl → 20240417__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dfindexeddb/indexeddb/chromium/blink.py +915 -18
- dfindexeddb/indexeddb/chromium/definitions.py +66 -0
- dfindexeddb/indexeddb/chromium/record.py +28 -20
- dfindexeddb/indexeddb/chromium/v8.py +8 -3
- dfindexeddb/indexeddb/cli.py +110 -10
- dfindexeddb/leveldb/cli.py +11 -2
- dfindexeddb/leveldb/definitions.py +2 -0
- dfindexeddb/leveldb/descriptor.py +7 -1
- dfindexeddb/leveldb/record.py +109 -21
- dfindexeddb/version.py +2 -1
- {dfindexeddb-20240331a0.dist-info → dfindexeddb-20240417.dist-info}/METADATA +64 -2
- dfindexeddb-20240417.dist-info/RECORD +29 -0
- dfindexeddb-20240331a0.dist-info/RECORD +0 -29
- {dfindexeddb-20240331a0.dist-info → dfindexeddb-20240417.dist-info}/AUTHORS +0 -0
- {dfindexeddb-20240331a0.dist-info → dfindexeddb-20240417.dist-info}/LICENSE +0 -0
- {dfindexeddb-20240331a0.dist-info → dfindexeddb-20240417.dist-info}/WHEEL +0 -0
- {dfindexeddb-20240331a0.dist-info → dfindexeddb-20240417.dist-info}/entry_points.txt +0 -0
- {dfindexeddb-20240331a0.dist-info → dfindexeddb-20240417.dist-info}/top_level.txt +0 -0
|
@@ -141,10 +141,12 @@ class BlinkSerializationTag(IntEnum):
|
|
|
141
141
|
ENCODED_AUDIO_CHUNK = ord('y')
|
|
142
142
|
ENCODED_VIDEO_CHUNK = ord('z')
|
|
143
143
|
CROP_TARGET = ord('c')
|
|
144
|
+
RESTRICTION_TARGET = ord('D')
|
|
144
145
|
MEDIA_SOURCE_HANDLE = ord('S')
|
|
145
146
|
DEPRECATED_DETECTED_BARCODE = ord('B')
|
|
146
147
|
DEPRECATED_DETECTED_FACE = ord('F')
|
|
147
148
|
DEPRECATED_DETECTED_TEXT = ord('t')
|
|
149
|
+
FENCED_FRAME_CONFIG = ord('C')
|
|
148
150
|
DOM_EXCEPTION = ord('x')
|
|
149
151
|
TRAILER_OFFSET = 0xFE
|
|
150
152
|
VERSION = 0xFF
|
|
@@ -304,3 +306,67 @@ class V8ErrorTag(IntEnum):
|
|
|
304
306
|
CAUSE = ord('c')
|
|
305
307
|
STACK = ord('s')
|
|
306
308
|
END = ord('.')
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
class ImageSerializationTag(IntEnum):
|
|
312
|
+
"""Image Serialization tags."""
|
|
313
|
+
END = 0
|
|
314
|
+
PREDEFINED_COLOR_SPACE = 1
|
|
315
|
+
CANVAS_PIXEL_FORMAT = 2
|
|
316
|
+
IMAGE_DATA_STORAGE_FORMAT = 3
|
|
317
|
+
ORIGIN_CLEAN = 4
|
|
318
|
+
IS_PREMULTIPLIED = 5
|
|
319
|
+
CANVAS_OPACITY_MODE = 6
|
|
320
|
+
PARAMETRIC_COLOR_SPACE = 7
|
|
321
|
+
IMAGE_ORIENTATION = 8
|
|
322
|
+
LAST = IMAGE_ORIENTATION
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
class SerializedPredefinedColorSpace(IntEnum):
|
|
326
|
+
"""Serialized Predefined Color Space enumeration."""
|
|
327
|
+
LEGACY_OBSOLETE = 0
|
|
328
|
+
SRGB = 1
|
|
329
|
+
REC2020 = 2
|
|
330
|
+
P3 = 3
|
|
331
|
+
REC2100HLG = 4
|
|
332
|
+
REC2100PQ = 5
|
|
333
|
+
SRGB_LINEAR = 6
|
|
334
|
+
LAST = SRGB_LINEAR
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
class SerializedPixelFormat(IntEnum):
|
|
338
|
+
"""Serialized Pixel Format enumeration."""
|
|
339
|
+
NATIVE8_LEGACY_OBSOLETE = 0
|
|
340
|
+
F16 = 1
|
|
341
|
+
RGBA8 = 2
|
|
342
|
+
BGRA8 = 3
|
|
343
|
+
RGBX8 = 4
|
|
344
|
+
LAST = RGBX8
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
class SerializedImageDataStorageFormat(IntEnum):
|
|
348
|
+
"""The Serialized Image Data Storage Format."""
|
|
349
|
+
UINT8CLAMPED = 0
|
|
350
|
+
UINT16 = 1
|
|
351
|
+
FLOAT32 = 2
|
|
352
|
+
LAST = FLOAT32
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
class SerializedOpacityMode(IntEnum):
|
|
356
|
+
"""The Serialized Opacity Mode."""
|
|
357
|
+
KNONOPAQUE = 0
|
|
358
|
+
KOPAQUE = 1
|
|
359
|
+
KLAST = KOPAQUE
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
class SerializedImageOrientation(IntEnum):
|
|
363
|
+
"""The Serialized Image Orientation."""
|
|
364
|
+
TOP_LEFT = 0
|
|
365
|
+
TOP_RIGHT = 1
|
|
366
|
+
BOTTOM_RIGHT = 2
|
|
367
|
+
BOTTOM_LEFT = 3
|
|
368
|
+
LEFT_TOP = 4
|
|
369
|
+
RIGHT_TOP = 5
|
|
370
|
+
RIGHT_BOTTOM = 6
|
|
371
|
+
LEFT_BOTTOM = 7
|
|
372
|
+
LAST = LEFT_BOTTOM
|
|
@@ -22,8 +22,7 @@ from typing import Any, BinaryIO, Optional, Tuple, Type, TypeVar, Union
|
|
|
22
22
|
from dfindexeddb import errors
|
|
23
23
|
from dfindexeddb.indexeddb.chromium import blink
|
|
24
24
|
from dfindexeddb.indexeddb.chromium import definitions
|
|
25
|
-
from dfindexeddb.leveldb import
|
|
26
|
-
from dfindexeddb.leveldb import log
|
|
25
|
+
from dfindexeddb.leveldb import record
|
|
27
26
|
from dfindexeddb.leveldb import utils
|
|
28
27
|
|
|
29
28
|
|
|
@@ -546,7 +545,7 @@ class EarliestSweepKey(BaseIndexedDBKey):
|
|
|
546
545
|
|
|
547
546
|
|
|
548
547
|
@dataclass
|
|
549
|
-
class
|
|
548
|
+
class EarliestCompactionTimeKey(BaseIndexedDBKey):
|
|
550
549
|
"""An earliest compaction time IndexedDB key."""
|
|
551
550
|
|
|
552
551
|
def DecodeValue(self, decoder: utils.LevelDBDecoder) -> int:
|
|
@@ -558,11 +557,11 @@ class EarlistCompactionTimeKey(BaseIndexedDBKey):
|
|
|
558
557
|
def FromDecoder(
|
|
559
558
|
cls, decoder: utils.LevelDBDecoder, key_prefix: KeyPrefix,
|
|
560
559
|
base_offset: int = 0
|
|
561
|
-
) ->
|
|
560
|
+
) -> EarliestCompactionTimeKey:
|
|
562
561
|
"""Decodes the earliest compaction time key."""
|
|
563
562
|
offset, key_type = decoder.DecodeUint8()
|
|
564
563
|
if key_type != definitions.GlobalMetadataKeyType.EARLIEST_COMPACTION_TIME:
|
|
565
|
-
raise errors.ParserError('Not a
|
|
564
|
+
raise errors.ParserError('Not a EarliestCompactionTimeKey')
|
|
566
565
|
return cls(offset=base_offset + offset, key_prefix=key_prefix)
|
|
567
566
|
|
|
568
567
|
|
|
@@ -668,7 +667,7 @@ class GlobalMetaDataKey(BaseIndexedDBKey):
|
|
|
668
667
|
definitions.GlobalMetadataKeyType
|
|
669
668
|
.EARLIEST_SWEEP: EarliestSweepKey,
|
|
670
669
|
definitions.GlobalMetadataKeyType
|
|
671
|
-
.EARLIEST_COMPACTION_TIME:
|
|
670
|
+
.EARLIEST_COMPACTION_TIME: EarliestCompactionTimeKey,
|
|
672
671
|
definitions.GlobalMetadataKeyType
|
|
673
672
|
.SCOPES_PREFIX: ScopesPrefixKey,
|
|
674
673
|
definitions.GlobalMetadataKeyType
|
|
@@ -692,7 +691,7 @@ class GlobalMetaDataKey(BaseIndexedDBKey):
|
|
|
692
691
|
Type[DatabaseFreeListKey],
|
|
693
692
|
Type[DatabaseNameKey],
|
|
694
693
|
Type[EarliestSweepKey],
|
|
695
|
-
Type[
|
|
694
|
+
Type[EarliestCompactionTimeKey],
|
|
696
695
|
Type[MaxDatabaseIdKey],
|
|
697
696
|
Type[RecoveryBlobJournalKey],
|
|
698
697
|
Type[SchemaVersionKey],
|
|
@@ -972,7 +971,7 @@ class ObjectStoreDataValue:
|
|
|
972
971
|
blob_offset: the blob offset, only valid if wrapped.
|
|
973
972
|
value: the blink serialized value, only valid if not wrapped.
|
|
974
973
|
"""
|
|
975
|
-
|
|
974
|
+
unknown: int
|
|
976
975
|
is_wrapped: bool
|
|
977
976
|
blob_size: Optional[int]
|
|
978
977
|
blob_offset: Optional[int]
|
|
@@ -1003,7 +1002,7 @@ class ObjectStoreDataKey(BaseIndexedDBKey):
|
|
|
1003
1002
|
_, blob_size = decoder.DecodeVarint()
|
|
1004
1003
|
_, blob_offset = decoder.DecodeVarint()
|
|
1005
1004
|
return ObjectStoreDataValue(
|
|
1006
|
-
|
|
1005
|
+
unknown=unknown_integer,
|
|
1007
1006
|
is_wrapped=True,
|
|
1008
1007
|
blob_size=blob_size,
|
|
1009
1008
|
blob_offset=blob_offset,
|
|
@@ -1011,7 +1010,7 @@ class ObjectStoreDataKey(BaseIndexedDBKey):
|
|
|
1011
1010
|
_, blink_bytes = decoder.ReadBytes()
|
|
1012
1011
|
blink_value = blink.V8ScriptValueDecoder.FromBytes(blink_bytes)
|
|
1013
1012
|
return ObjectStoreDataValue(
|
|
1014
|
-
|
|
1013
|
+
unknown=unknown_integer,
|
|
1015
1014
|
is_wrapped=False,
|
|
1016
1015
|
blob_size=None,
|
|
1017
1016
|
blob_offset=None,
|
|
@@ -1337,24 +1336,33 @@ class IndexedDBRecord:
|
|
|
1337
1336
|
value: the value of the record.
|
|
1338
1337
|
sequence_number: if available, the sequence number of the record.
|
|
1339
1338
|
type: the type of the record.
|
|
1339
|
+
level: the leveldb level, None indicates the record came from a log file.
|
|
1340
|
+
recovered: True if the record is a recovered record.
|
|
1340
1341
|
"""
|
|
1342
|
+
path: str
|
|
1341
1343
|
offset: int
|
|
1342
1344
|
key: Any
|
|
1343
1345
|
value: Any
|
|
1344
|
-
sequence_number: int
|
|
1346
|
+
sequence_number: Optional[int]
|
|
1345
1347
|
type: int
|
|
1348
|
+
level: Optional[int]
|
|
1349
|
+
recovered: Optional[bool]
|
|
1346
1350
|
|
|
1347
1351
|
@classmethod
|
|
1348
1352
|
def FromLevelDBRecord(
|
|
1349
|
-
cls,
|
|
1353
|
+
cls, db_record: record.LevelDBRecord
|
|
1350
1354
|
) -> IndexedDBRecord:
|
|
1351
1355
|
"""Returns an IndexedDBRecord from a ParsedInternalKey."""
|
|
1352
|
-
idb_key = IndexedDbKey.FromBytes(
|
|
1353
|
-
|
|
1356
|
+
idb_key = IndexedDbKey.FromBytes(
|
|
1357
|
+
db_record.record.key, base_offset=db_record.record.offset)
|
|
1358
|
+
idb_value = idb_key.ParseValue(db_record.record.value)
|
|
1354
1359
|
return cls(
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1360
|
+
path=db_record.path,
|
|
1361
|
+
offset=db_record.record.offset,
|
|
1362
|
+
key=idb_key,
|
|
1363
|
+
value=idb_value,
|
|
1364
|
+
sequence_number=db_record.record.sequence_number if hasattr(
|
|
1365
|
+
db_record.record, 'sequence_number') else None,
|
|
1366
|
+
type=db_record.record.record_type,
|
|
1367
|
+
level=db_record.level,
|
|
1368
|
+
recovered=db_record.recovered)
|
|
@@ -152,7 +152,12 @@ class ValueDeserializer:
|
|
|
152
152
|
_, tag_value = self.decoder.PeekBytes(1)
|
|
153
153
|
except errors.DecoderError:
|
|
154
154
|
return None
|
|
155
|
-
|
|
155
|
+
try:
|
|
156
|
+
return definitions.V8SerializationTag(tag_value[0])
|
|
157
|
+
except ValueError as error:
|
|
158
|
+
raise errors.ParserError(
|
|
159
|
+
f'Invalid v8 tag value {tag_value} at offset'
|
|
160
|
+
f' {self.decoder.stream.tell()}') from error
|
|
156
161
|
|
|
157
162
|
def _ReadTag(self) -> definitions.V8SerializationTag:
|
|
158
163
|
"""Returns the next non-padding serialization tag.
|
|
@@ -269,7 +274,7 @@ class ValueDeserializer:
|
|
|
269
274
|
self.version >= 15):
|
|
270
275
|
parsed_object = self.ReadSharedObject()
|
|
271
276
|
elif self.version < 13:
|
|
272
|
-
self.decoder.stream.seek(-1)
|
|
277
|
+
self.decoder.stream.seek(-1, os.SEEK_CUR)
|
|
273
278
|
parsed_object = self.ReadHostObject()
|
|
274
279
|
else:
|
|
275
280
|
parsed_object = None
|
|
@@ -492,7 +497,7 @@ class ValueDeserializer:
|
|
|
492
497
|
return value
|
|
493
498
|
|
|
494
499
|
def _ReadJSRegExp(self) -> RegExp:
|
|
495
|
-
"""Reads a
|
|
500
|
+
"""Reads a Javascript regular expression from the current position."""
|
|
496
501
|
next_id = self._GetNextId()
|
|
497
502
|
pattern = self.ReadString()
|
|
498
503
|
_, flags = self.decoder.DecodeUint32Varint() # TODO: verify flags
|
dfindexeddb/indexeddb/cli.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
"""A CLI tool for dfindexeddb."""
|
|
16
16
|
import argparse
|
|
17
17
|
import dataclasses
|
|
18
|
+
import enum
|
|
18
19
|
from datetime import datetime
|
|
19
20
|
import json
|
|
20
21
|
import pathlib
|
|
@@ -57,6 +58,8 @@ class Encoder(json.JSONEncoder):
|
|
|
57
58
|
return list(o)
|
|
58
59
|
if isinstance(o, v8.RegExp):
|
|
59
60
|
return str(o)
|
|
61
|
+
if isinstance(o, enum.Enum):
|
|
62
|
+
return o.name
|
|
60
63
|
return json.JSONEncoder.default(self, o)
|
|
61
64
|
|
|
62
65
|
|
|
@@ -70,22 +73,78 @@ def _Output(structure, output):
|
|
|
70
73
|
print(structure)
|
|
71
74
|
|
|
72
75
|
|
|
73
|
-
def
|
|
74
|
-
"""The CLI for processing a
|
|
75
|
-
|
|
76
|
+
def DbCommand(args):
|
|
77
|
+
"""The CLI for processing a directory as indexeddb."""
|
|
78
|
+
if args.use_manifest:
|
|
79
|
+
for db_record in leveldb_record.LevelDBRecord.FromManifest(args.source):
|
|
80
|
+
record = db_record.record
|
|
81
|
+
try:
|
|
82
|
+
idb_record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
|
|
83
|
+
db_record)
|
|
84
|
+
except(
|
|
85
|
+
errors.ParserError,
|
|
86
|
+
errors.DecoderError,
|
|
87
|
+
NotImplementedError) as err:
|
|
88
|
+
print((
|
|
89
|
+
f'Error parsing Indexeddb record {record.__class__.__name__}: {err}'
|
|
90
|
+
f' at offset {record.offset} in {db_record.path}'), file=sys.stderr)
|
|
91
|
+
print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
|
|
92
|
+
continue
|
|
93
|
+
_Output(idb_record, output=args.output)
|
|
94
|
+
else:
|
|
95
|
+
for db_record in leveldb_record.LevelDBRecord.FromDir(args.source):
|
|
96
|
+
record = db_record.record
|
|
97
|
+
try:
|
|
98
|
+
idb_record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
|
|
99
|
+
db_record)
|
|
100
|
+
except(
|
|
101
|
+
errors.ParserError,
|
|
102
|
+
errors.DecoderError,
|
|
103
|
+
NotImplementedError) as err:
|
|
104
|
+
print((
|
|
105
|
+
f'Error parsing Indexeddb record {record.__class__.__name__}: {err}'
|
|
106
|
+
f' at offset {record.offset} in {db_record.path}'), file=sys.stderr)
|
|
107
|
+
print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
|
|
108
|
+
continue
|
|
109
|
+
_Output(idb_record, output=args.output)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def LdbCommand(args):
|
|
113
|
+
"""The CLI for processing a leveldb table (.ldb) file as indexeddb."""
|
|
114
|
+
for db_record in leveldb_record.LevelDBRecord.FromFile(args.source):
|
|
115
|
+
record = db_record.record
|
|
116
|
+
try:
|
|
117
|
+
idb_record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
|
|
118
|
+
db_record)
|
|
119
|
+
except(
|
|
120
|
+
errors.ParserError,
|
|
121
|
+
errors.DecoderError,
|
|
122
|
+
NotImplementedError) as err:
|
|
123
|
+
print(
|
|
124
|
+
(f'Error parsing Indexeddb record {record.__class__.__name__}: {err} '
|
|
125
|
+
f'at offset {record.offset} in {db_record.path}'), file=sys.stderr)
|
|
126
|
+
print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
|
|
127
|
+
continue
|
|
128
|
+
_Output(idb_record, output=args.output)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def LogCommand(args):
|
|
132
|
+
"""The CLI for processing a leveldb log file as indexeddb."""
|
|
133
|
+
for db_record in leveldb_record.LevelDBRecord.FromFile(args.source):
|
|
76
134
|
record = db_record.record
|
|
77
135
|
try:
|
|
78
|
-
|
|
79
|
-
|
|
136
|
+
idb_record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
|
|
137
|
+
db_record)
|
|
80
138
|
except(
|
|
81
139
|
errors.ParserError,
|
|
82
140
|
errors.DecoderError,
|
|
83
141
|
NotImplementedError) as err:
|
|
84
142
|
print(
|
|
85
|
-
(f'Error parsing
|
|
143
|
+
(f'Error parsing Indexeddb record {record.__class__.__name__}: {err} '
|
|
86
144
|
f'at offset {record.offset} in {db_record.path}'), file=sys.stderr)
|
|
87
145
|
print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
|
|
88
|
-
|
|
146
|
+
continue
|
|
147
|
+
_Output(idb_record, output=args.output)
|
|
89
148
|
|
|
90
149
|
|
|
91
150
|
def App():
|
|
@@ -94,10 +153,51 @@ def App():
|
|
|
94
153
|
prog='dfindexeddb',
|
|
95
154
|
description='A cli tool for parsing indexeddb files',
|
|
96
155
|
epilog=f'Version {version.GetVersion()}')
|
|
97
|
-
|
|
156
|
+
|
|
157
|
+
subparsers = parser.add_subparsers()
|
|
158
|
+
|
|
159
|
+
parser_db = subparsers.add_parser(
|
|
160
|
+
'db', help='Parse a directory as indexeddb.')
|
|
161
|
+
parser_db.add_argument(
|
|
98
162
|
'-s', '--source', required=True, type=pathlib.Path,
|
|
99
163
|
help='The source leveldb folder')
|
|
100
|
-
|
|
164
|
+
parser_db.add_argument(
|
|
165
|
+
'--use_manifest',
|
|
166
|
+
action='store_true',
|
|
167
|
+
help='Use manifest file to determine active/deleted records.')
|
|
168
|
+
parser_db.add_argument(
|
|
169
|
+
'-o',
|
|
170
|
+
'--output',
|
|
171
|
+
choices=[
|
|
172
|
+
'json',
|
|
173
|
+
'jsonl',
|
|
174
|
+
'repr'],
|
|
175
|
+
default='json',
|
|
176
|
+
help='Output format. Default is json')
|
|
177
|
+
parser_db.set_defaults(func=DbCommand)
|
|
178
|
+
|
|
179
|
+
parser_ldb = subparsers.add_parser(
|
|
180
|
+
'ldb', help='Parse a ldb file as indexeddb.')
|
|
181
|
+
parser_ldb.add_argument(
|
|
182
|
+
'-s', '--source', required=True, type=pathlib.Path,
|
|
183
|
+
help='The source .ldb file.')
|
|
184
|
+
parser_ldb.add_argument(
|
|
185
|
+
'-o',
|
|
186
|
+
'--output',
|
|
187
|
+
choices=[
|
|
188
|
+
'json',
|
|
189
|
+
'jsonl',
|
|
190
|
+
'repr'],
|
|
191
|
+
default='json',
|
|
192
|
+
help='Output format. Default is json')
|
|
193
|
+
parser_ldb.set_defaults(func=LdbCommand)
|
|
194
|
+
|
|
195
|
+
parser_log = subparsers.add_parser(
|
|
196
|
+
'log', help='Parse a log file as indexeddb.')
|
|
197
|
+
parser_log.add_argument(
|
|
198
|
+
'-s', '--source', required=True, type=pathlib.Path,
|
|
199
|
+
help='The source .log file.')
|
|
200
|
+
parser_log.add_argument(
|
|
101
201
|
'-o',
|
|
102
202
|
'--output',
|
|
103
203
|
choices=[
|
|
@@ -106,7 +206,7 @@ def App():
|
|
|
106
206
|
'repr'],
|
|
107
207
|
default='json',
|
|
108
208
|
help='Output format. Default is json')
|
|
109
|
-
|
|
209
|
+
parser_log.set_defaults(func=LogCommand)
|
|
110
210
|
|
|
111
211
|
args = parser.parse_args()
|
|
112
212
|
args.func(args)
|
dfindexeddb/leveldb/cli.py
CHANGED
|
@@ -66,8 +66,12 @@ def _Output(structure, output):
|
|
|
66
66
|
|
|
67
67
|
def DbCommand(args):
|
|
68
68
|
"""The CLI for processing leveldb folders."""
|
|
69
|
-
|
|
70
|
-
|
|
69
|
+
if args.use_manifest:
|
|
70
|
+
for rec in record.LevelDBRecord.FromManifest(args.source):
|
|
71
|
+
_Output(rec, output=args.output)
|
|
72
|
+
else:
|
|
73
|
+
for rec in record.LevelDBRecord.FromDir(args.source):
|
|
74
|
+
_Output(rec, output=args.output)
|
|
71
75
|
|
|
72
76
|
|
|
73
77
|
def LdbCommand(args):
|
|
@@ -159,6 +163,10 @@ def App():
|
|
|
159
163
|
required=True,
|
|
160
164
|
type=pathlib.Path,
|
|
161
165
|
help='The source leveldb directory')
|
|
166
|
+
parser_db.add_argument(
|
|
167
|
+
'--use_manifest',
|
|
168
|
+
action='store_true',
|
|
169
|
+
help='Use manifest file to determine active/deleted records.')
|
|
162
170
|
parser_db.add_argument(
|
|
163
171
|
'-o',
|
|
164
172
|
'--output',
|
|
@@ -168,6 +176,7 @@ def App():
|
|
|
168
176
|
'repr'],
|
|
169
177
|
default='json',
|
|
170
178
|
help='Output format. Default is json')
|
|
179
|
+
parser_db.set_defaults(func=DbCommand)
|
|
171
180
|
|
|
172
181
|
parser_log = subparsers.add_parser(
|
|
173
182
|
'log', help='Parse a leveldb log file.')
|
|
@@ -365,7 +365,8 @@ class FileReader:
|
|
|
365
365
|
current_log = None
|
|
366
366
|
|
|
367
367
|
for version_edit in self.GetVersionEdits():
|
|
368
|
-
|
|
368
|
+
if version_edit.log_number:
|
|
369
|
+
current_log = f'{version_edit.log_number:06d}.log'
|
|
369
370
|
|
|
370
371
|
for new_file in version_edit.new_files:
|
|
371
372
|
active_files[new_file.level][f'{new_file.number:06d}.ldb'] = new_file
|
|
@@ -380,3 +381,8 @@ class FileReader:
|
|
|
380
381
|
deleted_files=dict(deleted_files),
|
|
381
382
|
version_edit_offset=version_edit.offset,
|
|
382
383
|
last_sequence=version_edit.last_sequence)
|
|
384
|
+
|
|
385
|
+
def GetLatestVersion(self) -> LevelDBVersion:
|
|
386
|
+
"""Returns the latest LevelDBVersion instance."""
|
|
387
|
+
*_, latest = self.GetVersions()
|
|
388
|
+
return latest
|
dfindexeddb/leveldb/record.py
CHANGED
|
@@ -16,9 +16,12 @@
|
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
import dataclasses
|
|
18
18
|
import pathlib
|
|
19
|
+
import re
|
|
19
20
|
import sys
|
|
20
|
-
from typing import Any, Generator, Union
|
|
21
|
+
from typing import Any, Generator, Optional, Union
|
|
21
22
|
|
|
23
|
+
from dfindexeddb import errors
|
|
24
|
+
from dfindexeddb.leveldb import definitions
|
|
22
25
|
from dfindexeddb.leveldb import descriptor
|
|
23
26
|
from dfindexeddb.leveldb import ldb
|
|
24
27
|
from dfindexeddb.leveldb import log
|
|
@@ -34,18 +37,20 @@ class LevelDBRecord:
|
|
|
34
37
|
Attributes:
|
|
35
38
|
path: the file path where the record was parsed from.
|
|
36
39
|
record: the leveldb record.
|
|
40
|
+
level: the leveldb level, None indicates the record came from a log file.
|
|
41
|
+
recovered: True if the record is a recovered record.
|
|
37
42
|
"""
|
|
38
43
|
path: str
|
|
39
44
|
record: Union[
|
|
40
45
|
ldb.KeyValueRecord,
|
|
41
|
-
log.ParsedInternalKey
|
|
42
|
-
|
|
46
|
+
log.ParsedInternalKey]
|
|
47
|
+
level: Optional[int] = None
|
|
48
|
+
recovered: Optional[bool] = None
|
|
43
49
|
|
|
44
50
|
@classmethod
|
|
45
51
|
def FromFile(
|
|
46
52
|
cls,
|
|
47
|
-
file_path: pathlib.Path
|
|
48
|
-
include_versionedit: bool = False
|
|
53
|
+
file_path: pathlib.Path
|
|
49
54
|
) -> Generator[LevelDBRecord, Any, Any]:
|
|
50
55
|
"""Yields leveldb records from the given path.
|
|
51
56
|
|
|
@@ -54,7 +59,6 @@ class LevelDBRecord:
|
|
|
54
59
|
|
|
55
60
|
Args:
|
|
56
61
|
file_path: the file path.
|
|
57
|
-
include_versionedit: include VersionEdit records from descriptor files.
|
|
58
62
|
"""
|
|
59
63
|
if file_path.name.endswith('.log'):
|
|
60
64
|
for record in log.FileReader(
|
|
@@ -64,12 +68,7 @@ class LevelDBRecord:
|
|
|
64
68
|
for record in ldb.FileReader(file_path.as_posix()).GetKeyValueRecords():
|
|
65
69
|
yield cls(path=file_path.as_posix(), record=record)
|
|
66
70
|
elif file_path.name.startswith('MANIFEST'):
|
|
67
|
-
|
|
68
|
-
print(f'Ignoring {file_path.as_posix()}', file=sys.stderr)
|
|
69
|
-
return
|
|
70
|
-
for record in descriptor.FileReader(
|
|
71
|
-
file_path.as_posix()).GetVersionEdits():
|
|
72
|
-
yield cls(path=file_path.as_posix(), record=record)
|
|
71
|
+
print(f'Ignoring descriptor file {file_path.as_posix()}', file=sys.stderr)
|
|
73
72
|
elif file_path.name in ('LOCK', 'CURRENT', 'LOG', 'LOG.old'):
|
|
74
73
|
print(f'Ignoring {file_path.as_posix()}', file=sys.stderr)
|
|
75
74
|
else:
|
|
@@ -78,25 +77,114 @@ class LevelDBRecord:
|
|
|
78
77
|
@classmethod
|
|
79
78
|
def FromDir(
|
|
80
79
|
cls,
|
|
81
|
-
path: pathlib.Path
|
|
82
|
-
include_versionedit: bool = False
|
|
80
|
+
path: pathlib.Path
|
|
83
81
|
) -> Generator[LevelDBRecord, Any, Any]:
|
|
84
82
|
"""Yields LevelDBRecords from the given directory.
|
|
85
83
|
|
|
86
84
|
Args:
|
|
87
85
|
path: the file path.
|
|
88
|
-
|
|
86
|
+
|
|
87
|
+
Yields:
|
|
88
|
+
LevelDBRecords
|
|
89
|
+
"""
|
|
90
|
+
if not path or not path.is_dir():
|
|
91
|
+
raise ValueError(f'{path} is not a directory')
|
|
92
|
+
for file_path in path.iterdir():
|
|
93
|
+
yield from cls.FromFile(file_path=file_path)
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def FromManifest(
|
|
97
|
+
cls,
|
|
98
|
+
path: pathlib.Path
|
|
99
|
+
) -> Generator[LevelDBRecord, Any, Any]:
|
|
100
|
+
"""Yields LevelDBRecords from the given directory using the manifest.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
path: the file path.
|
|
89
104
|
|
|
90
105
|
Yields:
|
|
91
106
|
LevelDBRecords
|
|
92
107
|
|
|
93
108
|
Raises:
|
|
109
|
+
ParserError: if the CURRENT or MANIFEST-* file does not exist.
|
|
94
110
|
ValueError: if path is not a directory.
|
|
95
111
|
"""
|
|
96
|
-
if path.is_dir():
|
|
97
|
-
for file_path in path.iterdir():
|
|
98
|
-
yield from cls.FromFile(
|
|
99
|
-
file_path=file_path,
|
|
100
|
-
include_versionedit=include_versionedit)
|
|
101
|
-
else:
|
|
112
|
+
if not path or not path.is_dir():
|
|
102
113
|
raise ValueError(f'{path} is not a directory')
|
|
114
|
+
|
|
115
|
+
current_path = path / 'CURRENT'
|
|
116
|
+
if not current_path.exists():
|
|
117
|
+
raise errors.ParserError(f'{current_path!s} does not exist.')
|
|
118
|
+
|
|
119
|
+
current_manifest = current_path.read_text().strip()
|
|
120
|
+
manifest_regex = re.compile(definitions.MANIFEST_FILENAME_PATTERN)
|
|
121
|
+
if not manifest_regex.fullmatch(current_manifest):
|
|
122
|
+
raise errors.ParserError(
|
|
123
|
+
f'{current_path!s} does not contain the expected content')
|
|
124
|
+
|
|
125
|
+
manifest_path = path / current_manifest
|
|
126
|
+
if not manifest_path.exists():
|
|
127
|
+
raise errors.ParserError(f'{manifest_path!s} does not exist.')
|
|
128
|
+
|
|
129
|
+
latest_version = descriptor.FileReader(
|
|
130
|
+
str(manifest_path)).GetLatestVersion()
|
|
131
|
+
if not latest_version:
|
|
132
|
+
raise errors.ParserError(
|
|
133
|
+
f'Could not parse a leveldb version from {manifest_path!s}')
|
|
134
|
+
|
|
135
|
+
# read log records
|
|
136
|
+
log_records = []
|
|
137
|
+
if latest_version.current_log:
|
|
138
|
+
current_log = path / latest_version.current_log
|
|
139
|
+
if current_log.exists():
|
|
140
|
+
for log_record in cls.FromFile(file_path=current_log):
|
|
141
|
+
log_records.append(log_record)
|
|
142
|
+
else:
|
|
143
|
+
print('No current log file.', file=sys.stderr)
|
|
144
|
+
|
|
145
|
+
# read records from the "young" or 0-level
|
|
146
|
+
young_records = []
|
|
147
|
+
for active_file in latest_version.active_files.get(0, {}).keys():
|
|
148
|
+
current_young = path / active_file
|
|
149
|
+
if current_young.exists():
|
|
150
|
+
for young_record in cls.FromFile(current_young):
|
|
151
|
+
young_records.append(young_record)
|
|
152
|
+
|
|
153
|
+
active_records = {}
|
|
154
|
+
for record in sorted(
|
|
155
|
+
log_records,
|
|
156
|
+
key=lambda record: record.record.sequence_number,
|
|
157
|
+
reverse=True):
|
|
158
|
+
if record.record.key not in active_records:
|
|
159
|
+
record.recovered = False
|
|
160
|
+
active_records[record.record.key] = record
|
|
161
|
+
else:
|
|
162
|
+
record.recovered = True
|
|
163
|
+
|
|
164
|
+
for record in sorted(
|
|
165
|
+
young_records,
|
|
166
|
+
key=lambda record: record.record.sequence_number,
|
|
167
|
+
reverse=True):
|
|
168
|
+
if record.record.key not in active_records:
|
|
169
|
+
record.recovered = False
|
|
170
|
+
active_records[record.record.key] = record
|
|
171
|
+
else:
|
|
172
|
+
record.recovered = True
|
|
173
|
+
record.level = 0
|
|
174
|
+
|
|
175
|
+
yield from sorted(
|
|
176
|
+
log_records + young_records,
|
|
177
|
+
key=lambda record: record.record.sequence_number,
|
|
178
|
+
reverse=False)
|
|
179
|
+
|
|
180
|
+
if latest_version.active_files.keys():
|
|
181
|
+
for level in range(1, max(latest_version.active_files.keys()) + 1):
|
|
182
|
+
for filename in latest_version.active_files.get(level, []):
|
|
183
|
+
current_filename = path / filename
|
|
184
|
+
for record in cls.FromFile(file_path=current_filename):
|
|
185
|
+
if record.record.key in active_records:
|
|
186
|
+
record.recovered = True
|
|
187
|
+
else:
|
|
188
|
+
record.recovered = False
|
|
189
|
+
record.level = level
|
|
190
|
+
yield record
|