dfindexeddb 20240229__py3-none-any.whl → 20240305__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dfindexeddb/cli.py +41 -8
- dfindexeddb/indexeddb/chromium.py +60 -37
- dfindexeddb/leveldb/definitions.py +43 -0
- dfindexeddb/leveldb/descriptor.py +335 -0
- dfindexeddb/leveldb/ldb.py +33 -33
- dfindexeddb/leveldb/log.py +58 -56
- dfindexeddb/utils.py +11 -5
- dfindexeddb/version.py +1 -1
- {dfindexeddb-20240229.dist-info → dfindexeddb-20240305.dist-info}/METADATA +1 -1
- dfindexeddb-20240305.dist-info/RECORD +22 -0
- dfindexeddb-20240229.dist-info/RECORD +0 -20
- {dfindexeddb-20240229.dist-info → dfindexeddb-20240305.dist-info}/AUTHORS +0 -0
- {dfindexeddb-20240229.dist-info → dfindexeddb-20240305.dist-info}/LICENSE +0 -0
- {dfindexeddb-20240229.dist-info → dfindexeddb-20240305.dist-info}/WHEEL +0 -0
- {dfindexeddb-20240229.dist-info → dfindexeddb-20240305.dist-info}/entry_points.txt +0 -0
- {dfindexeddb-20240229.dist-info → dfindexeddb-20240305.dist-info}/top_level.txt +0 -0
dfindexeddb/cli.py
CHANGED
|
@@ -19,18 +19,33 @@ from datetime import datetime
|
|
|
19
19
|
import json
|
|
20
20
|
import pathlib
|
|
21
21
|
import sys
|
|
22
|
+
import traceback
|
|
22
23
|
|
|
23
|
-
from dfindexeddb
|
|
24
|
+
from dfindexeddb import errors
|
|
25
|
+
from dfindexeddb import version
|
|
26
|
+
from dfindexeddb.leveldb import descriptor
|
|
24
27
|
from dfindexeddb.leveldb import ldb
|
|
28
|
+
from dfindexeddb.leveldb import log
|
|
25
29
|
from dfindexeddb.indexeddb import chromium
|
|
26
30
|
from dfindexeddb.indexeddb import v8
|
|
27
31
|
|
|
28
32
|
|
|
33
|
+
_VALID_PRINTABLE_CHARACTERS = (
|
|
34
|
+
'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' +
|
|
35
|
+
'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~.')
|
|
36
|
+
|
|
37
|
+
|
|
29
38
|
class Encoder(json.JSONEncoder):
|
|
30
39
|
"""A JSON encoder class for dfindexeddb fields."""
|
|
31
40
|
def default(self, o):
|
|
32
41
|
if isinstance(o, bytes):
|
|
33
|
-
|
|
42
|
+
out = []
|
|
43
|
+
for x in o:
|
|
44
|
+
if chr(x) not in _VALID_PRINTABLE_CHARACTERS:
|
|
45
|
+
out.append(f'\\x{x:02X}')
|
|
46
|
+
else:
|
|
47
|
+
out.append(chr(x))
|
|
48
|
+
return ''.join(out)
|
|
34
49
|
if isinstance(o, datetime):
|
|
35
50
|
return o.isoformat()
|
|
36
51
|
if isinstance(o, v8.Undefined):
|
|
@@ -57,22 +72,36 @@ def IndexeddbCommand(args):
|
|
|
57
72
|
"""The CLI for processing a log/ldb file as indexeddb."""
|
|
58
73
|
if args.source.name.endswith('.log'):
|
|
59
74
|
records = list(
|
|
60
|
-
log.
|
|
75
|
+
log.FileReader(args.source).GetKeyValueRecords())
|
|
61
76
|
elif args.source.name.endswith('.ldb'):
|
|
62
77
|
records = list(
|
|
63
|
-
ldb.
|
|
78
|
+
ldb.FileReader(args.source).GetKeyValueRecords())
|
|
64
79
|
else:
|
|
65
80
|
print('Unsupported file type.', file=sys.stderr)
|
|
66
81
|
return
|
|
67
82
|
|
|
68
83
|
for record in records:
|
|
69
|
-
|
|
84
|
+
try:
|
|
85
|
+
record = chromium.IndexedDBRecord.FromLevelDBRecord(record)
|
|
86
|
+
except (errors.ParserError, errors.DecoderError) as err:
|
|
87
|
+
print(
|
|
88
|
+
(f'Error parsing blink value: {err} for {record.__class__.__name__} '
|
|
89
|
+
f'at offset {record.offset}'), file=sys.stderr)
|
|
90
|
+
print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
|
|
70
91
|
_Output(record, to_json=args.json)
|
|
71
92
|
|
|
72
93
|
|
|
94
|
+
def ManifestCommand(args):
|
|
95
|
+
"""The CLI for processing MANIFEST aka Descriptor files."""
|
|
96
|
+
manifest_file = descriptor.FileReader(args.source)
|
|
97
|
+
|
|
98
|
+
for version_edit in manifest_file.GetVersionEdits():
|
|
99
|
+
_Output(version_edit, to_json=args.json)
|
|
100
|
+
|
|
101
|
+
|
|
73
102
|
def LdbCommand(args):
|
|
74
103
|
"""The CLI for processing ldb files."""
|
|
75
|
-
ldb_file = ldb.
|
|
104
|
+
ldb_file = ldb.FileReader(args.source)
|
|
76
105
|
|
|
77
106
|
if args.structure_type == 'blocks':
|
|
78
107
|
# Prints block information.
|
|
@@ -87,7 +116,7 @@ def LdbCommand(args):
|
|
|
87
116
|
|
|
88
117
|
def LogCommand(args):
|
|
89
118
|
"""The CLI for processing log files."""
|
|
90
|
-
log_file = log.
|
|
119
|
+
log_file = log.FileReader(args.source)
|
|
91
120
|
|
|
92
121
|
if args.structure_type == 'blocks':
|
|
93
122
|
# Prints block information.
|
|
@@ -114,7 +143,8 @@ def App():
|
|
|
114
143
|
"""The CLI app entrypoint."""
|
|
115
144
|
parser = argparse.ArgumentParser(
|
|
116
145
|
prog='dfindexeddb',
|
|
117
|
-
description='A cli tool for the dfindexeddb package'
|
|
146
|
+
description='A cli tool for the dfindexeddb package',
|
|
147
|
+
epilog=f'Version {version.GetVersion()}')
|
|
118
148
|
|
|
119
149
|
parser.add_argument(
|
|
120
150
|
'-s', '--source', required=True, type=pathlib.Path,
|
|
@@ -139,6 +169,9 @@ def App():
|
|
|
139
169
|
'records'])
|
|
140
170
|
parser_log.set_defaults(func=LdbCommand)
|
|
141
171
|
|
|
172
|
+
parser_log = subparsers.add_parser('manifest')
|
|
173
|
+
parser_log.set_defaults(func=ManifestCommand)
|
|
174
|
+
|
|
142
175
|
parser_log = subparsers.add_parser('indexeddb')
|
|
143
176
|
parser_log.set_defaults(func=IndexeddbCommand)
|
|
144
177
|
|
|
@@ -17,8 +17,6 @@ from __future__ import annotations
|
|
|
17
17
|
from dataclasses import dataclass, field
|
|
18
18
|
from datetime import datetime
|
|
19
19
|
import io
|
|
20
|
-
import sys
|
|
21
|
-
import traceback
|
|
22
20
|
from typing import Any, BinaryIO, Optional, Tuple, Type, TypeVar, Union
|
|
23
21
|
|
|
24
22
|
from dfindexeddb import errors
|
|
@@ -33,7 +31,7 @@ T = TypeVar('T')
|
|
|
33
31
|
|
|
34
32
|
|
|
35
33
|
@dataclass
|
|
36
|
-
class KeyPrefix(utils.
|
|
34
|
+
class KeyPrefix(utils.FromDecoderMixin):
|
|
37
35
|
"""The IndexedDB key prefix.
|
|
38
36
|
|
|
39
37
|
Attributes:
|
|
@@ -113,7 +111,7 @@ class KeyPrefix(utils.FromStreamMixin):
|
|
|
113
111
|
|
|
114
112
|
|
|
115
113
|
@dataclass
|
|
116
|
-
class IDBKey(utils.
|
|
114
|
+
class IDBKey(utils.FromDecoderMixin):
|
|
117
115
|
"""An IDBKey.
|
|
118
116
|
|
|
119
117
|
Attributes:
|
|
@@ -201,7 +199,7 @@ class IDBKey(utils.FromStreamMixin):
|
|
|
201
199
|
|
|
202
200
|
|
|
203
201
|
@dataclass
|
|
204
|
-
class IDBKeyPath(utils.
|
|
202
|
+
class IDBKeyPath(utils.FromDecoderMixin):
|
|
205
203
|
"""An IDBKeyPath.
|
|
206
204
|
|
|
207
205
|
Arguments:
|
|
@@ -258,7 +256,7 @@ class IDBKeyPath(utils.FromStreamMixin):
|
|
|
258
256
|
|
|
259
257
|
|
|
260
258
|
@dataclass
|
|
261
|
-
class BlobJournalEntry(utils.
|
|
259
|
+
class BlobJournalEntry(utils.FromDecoderMixin):
|
|
262
260
|
"""A blob journal entry.
|
|
263
261
|
|
|
264
262
|
Attributes:
|
|
@@ -289,7 +287,7 @@ class BlobJournalEntry(utils.FromStreamMixin):
|
|
|
289
287
|
|
|
290
288
|
|
|
291
289
|
@dataclass
|
|
292
|
-
class BlobJournal(utils.
|
|
290
|
+
class BlobJournal(utils.FromDecoderMixin):
|
|
293
291
|
"""A BlobJournal.
|
|
294
292
|
|
|
295
293
|
Attributes:
|
|
@@ -406,7 +404,7 @@ class BaseIndexedDBKey:
|
|
|
406
404
|
The decoded key.
|
|
407
405
|
"""
|
|
408
406
|
decoder = utils.LevelDBDecoder(stream)
|
|
409
|
-
key_prefix = KeyPrefix.FromDecoder(decoder)
|
|
407
|
+
key_prefix = KeyPrefix.FromDecoder(decoder, base_offset=base_offset)
|
|
410
408
|
return cls.FromDecoder(
|
|
411
409
|
decoder=decoder, key_prefix=key_prefix, base_offset=base_offset)
|
|
412
410
|
|
|
@@ -963,6 +961,23 @@ class ObjectStoreMetaDataKey(BaseIndexedDBKey):
|
|
|
963
961
|
offset=base_offset + offset, key_prefix=key_prefix,
|
|
964
962
|
object_store_id=object_store_id, metadata_type=metadata_type)
|
|
965
963
|
|
|
964
|
+
@dataclass
|
|
965
|
+
class ObjectStoreDataValue:
|
|
966
|
+
"""The parsed values from an ObjectStoreDataKey.
|
|
967
|
+
|
|
968
|
+
Attributes:
|
|
969
|
+
unknown: an unknown integer (possibly a sequence number?).
|
|
970
|
+
is_wrapped: True if the value was wrapped.
|
|
971
|
+
blob_size: the blob size, only valid if wrapped.
|
|
972
|
+
blob_offset: the blob offset, only valid if wrapped.
|
|
973
|
+
value: the blink serialized value, only valid if not wrapped.
|
|
974
|
+
"""
|
|
975
|
+
unkown: int
|
|
976
|
+
is_wrapped: bool
|
|
977
|
+
blob_size: Optional[int]
|
|
978
|
+
blob_offset: Optional[int]
|
|
979
|
+
value: Any
|
|
980
|
+
|
|
966
981
|
|
|
967
982
|
@dataclass
|
|
968
983
|
class ObjectStoreDataKey(BaseIndexedDBKey):
|
|
@@ -974,11 +989,33 @@ class ObjectStoreDataKey(BaseIndexedDBKey):
|
|
|
974
989
|
encoded_user_key: IDBKey
|
|
975
990
|
|
|
976
991
|
def DecodeValue(
|
|
977
|
-
self, decoder: utils.LevelDBDecoder) ->
|
|
992
|
+
self, decoder: utils.LevelDBDecoder) -> ObjectStoreDataValue:
|
|
978
993
|
"""Decodes the object store data value."""
|
|
979
|
-
_,
|
|
980
|
-
|
|
981
|
-
|
|
994
|
+
_, unknown_integer = decoder.DecodeVarint()
|
|
995
|
+
|
|
996
|
+
_, wrapped_header_bytes = decoder.PeekBytes(3)
|
|
997
|
+
if len(wrapped_header_bytes) != 3:
|
|
998
|
+
raise errors.DecoderError('Insufficient bytes')
|
|
999
|
+
|
|
1000
|
+
if (wrapped_header_bytes[0] == definitions.BlinkSerializationTag.VERSION and
|
|
1001
|
+
wrapped_header_bytes[1] == 0x11 and
|
|
1002
|
+
wrapped_header_bytes[2] == 0x01):
|
|
1003
|
+
_, blob_size = decoder.DecodeVarint()
|
|
1004
|
+
_, blob_offset = decoder.DecodeVarint()
|
|
1005
|
+
return ObjectStoreDataValue(
|
|
1006
|
+
unkown=unknown_integer,
|
|
1007
|
+
is_wrapped=True,
|
|
1008
|
+
blob_size=blob_size,
|
|
1009
|
+
blob_offset=blob_offset,
|
|
1010
|
+
value=None)
|
|
1011
|
+
_, blink_bytes = decoder.ReadBytes()
|
|
1012
|
+
blink_value = blink.V8ScriptValueDecoder.FromBytes(blink_bytes)
|
|
1013
|
+
return ObjectStoreDataValue(
|
|
1014
|
+
unkown=unknown_integer,
|
|
1015
|
+
is_wrapped=False,
|
|
1016
|
+
blob_size=None,
|
|
1017
|
+
blob_offset=None,
|
|
1018
|
+
value=blink_value)
|
|
982
1019
|
|
|
983
1020
|
@classmethod
|
|
984
1021
|
def FromDecoder(
|
|
@@ -990,7 +1027,7 @@ class ObjectStoreDataKey(BaseIndexedDBKey):
|
|
|
990
1027
|
definitions.KeyPrefixType.OBJECT_STORE_DATA):
|
|
991
1028
|
raise errors.ParserError('Invalid KeyPrefix for ObjectStoreDataKey')
|
|
992
1029
|
offset = decoder.stream.tell()
|
|
993
|
-
encoded_user_key = IDBKey.FromDecoder(decoder,
|
|
1030
|
+
encoded_user_key = IDBKey.FromDecoder(decoder, offset)
|
|
994
1031
|
return cls(
|
|
995
1032
|
offset=base_offset + offset,
|
|
996
1033
|
key_prefix=key_prefix, encoded_user_key=encoded_user_key)
|
|
@@ -1017,7 +1054,7 @@ class ExistsEntryKey(BaseIndexedDBKey):
|
|
|
1017
1054
|
) -> ExistsEntryKey:
|
|
1018
1055
|
"""Decodes the exists entry key."""
|
|
1019
1056
|
offset = decoder.stream.tell()
|
|
1020
|
-
encoded_user_key = IDBKey.FromDecoder(decoder,
|
|
1057
|
+
encoded_user_key = IDBKey.FromDecoder(decoder, offset)
|
|
1021
1058
|
|
|
1022
1059
|
return cls(
|
|
1023
1060
|
offset=base_offset + offset,
|
|
@@ -1048,7 +1085,7 @@ class IndexDataKey(BaseIndexedDBKey):
|
|
|
1048
1085
|
base_offset: int = 0) -> IndexDataKey:
|
|
1049
1086
|
"""Decodes the index data key."""
|
|
1050
1087
|
offset = decoder.stream.tell()
|
|
1051
|
-
encoded_user_key = IDBKey.FromDecoder(decoder,
|
|
1088
|
+
encoded_user_key = IDBKey.FromDecoder(decoder, offset)
|
|
1052
1089
|
|
|
1053
1090
|
if decoder.NumRemainingBytes() > 0:
|
|
1054
1091
|
_, sequence_number = decoder.DecodeVarint()
|
|
@@ -1056,7 +1093,9 @@ class IndexDataKey(BaseIndexedDBKey):
|
|
|
1056
1093
|
sequence_number = None
|
|
1057
1094
|
|
|
1058
1095
|
if decoder.NumRemainingBytes() > 0:
|
|
1059
|
-
|
|
1096
|
+
encoded_primary_key_offset = decoder.stream.tell()
|
|
1097
|
+
encoded_primary_key = IDBKey.FromDecoder(
|
|
1098
|
+
decoder, encoded_primary_key_offset)
|
|
1060
1099
|
else:
|
|
1061
1100
|
encoded_primary_key = None
|
|
1062
1101
|
|
|
@@ -1089,7 +1128,7 @@ class BlobEntryKey(BaseIndexedDBKey):
|
|
|
1089
1128
|
) -> BlobEntryKey:
|
|
1090
1129
|
"""Decodes the blob entry key."""
|
|
1091
1130
|
offset = decoder.stream.tell()
|
|
1092
|
-
user_key = IDBKey.FromDecoder(decoder,
|
|
1131
|
+
user_key = IDBKey.FromDecoder(decoder, offset)
|
|
1093
1132
|
|
|
1094
1133
|
return cls(key_prefix=key_prefix, user_key=user_key,
|
|
1095
1134
|
offset=base_offset + offset)
|
|
@@ -1187,7 +1226,7 @@ class IndexMetaDataKey(BaseIndexedDBKey):
|
|
|
1187
1226
|
|
|
1188
1227
|
|
|
1189
1228
|
@dataclass
|
|
1190
|
-
class ExternalObjectEntry(utils.
|
|
1229
|
+
class ExternalObjectEntry(utils.FromDecoderMixin):
|
|
1191
1230
|
"""An IndexedDB external object entry.
|
|
1192
1231
|
|
|
1193
1232
|
Args:
|
|
@@ -1248,7 +1287,7 @@ class ExternalObjectEntry(utils.FromStreamMixin):
|
|
|
1248
1287
|
|
|
1249
1288
|
|
|
1250
1289
|
@dataclass
|
|
1251
|
-
class IndexedDBExternalObject(utils.
|
|
1290
|
+
class IndexedDBExternalObject(utils.FromDecoderMixin):
|
|
1252
1291
|
"""An IndexedDB external object.
|
|
1253
1292
|
|
|
1254
1293
|
Args:
|
|
@@ -1307,27 +1346,11 @@ class IndexedDBRecord:
|
|
|
1307
1346
|
|
|
1308
1347
|
@classmethod
|
|
1309
1348
|
def FromLevelDBRecord(
|
|
1310
|
-
cls, record: Union[ldb.
|
|
1349
|
+
cls, record: Union[ldb.KeyValueRecord, log.ParsedInternalKey]
|
|
1311
1350
|
) -> IndexedDBRecord:
|
|
1312
1351
|
"""Returns an IndexedDBRecord from a ParsedInternalKey."""
|
|
1313
|
-
idb_key = IndexedDbKey.FromBytes(
|
|
1314
|
-
record.key, base_offset=record.offset)
|
|
1315
|
-
|
|
1352
|
+
idb_key = IndexedDbKey.FromBytes(record.key, base_offset=record.offset)
|
|
1316
1353
|
idb_value = idb_key.ParseValue(record.value)
|
|
1317
|
-
if isinstance(idb_key, ObjectStoreDataKey):
|
|
1318
|
-
|
|
1319
|
-
# The ObjectStoreDataKey value should decode as a 2-tuple comprising
|
|
1320
|
-
# a version integer and a SSV as a raw byte string
|
|
1321
|
-
if (isinstance(idb_value, tuple) and len(idb_value) == 2 and
|
|
1322
|
-
isinstance(idb_value[1], bytes)):
|
|
1323
|
-
|
|
1324
|
-
try:
|
|
1325
|
-
blink_value = blink.V8ScriptValueDecoder.FromBytes(idb_value[1])
|
|
1326
|
-
idb_value = idb_value[0], blink_value
|
|
1327
|
-
except (errors.ParserError, errors.DecoderError) as err:
|
|
1328
|
-
print(f'Error parsing blink value: {err}', file=sys.stderr)
|
|
1329
|
-
print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
|
|
1330
|
-
|
|
1331
1354
|
return cls(
|
|
1332
1355
|
offset=record.offset,
|
|
1333
1356
|
key=idb_key,
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright 2024 Google LLC
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""Definitions for LevelDB."""
|
|
16
|
+
|
|
17
|
+
import enum
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
PACKED_SEQUENCE_AND_TYPE_LENGTH = 8
|
|
21
|
+
SEQUENCE_LENGTH = 7
|
|
22
|
+
TYPE_LENGTH = 1
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class VersionEditTags(enum.IntEnum):
|
|
26
|
+
"""VersionEdit tags."""
|
|
27
|
+
COMPARATOR = 1
|
|
28
|
+
LOG_NUMBER = 2
|
|
29
|
+
NEXT_FILE_NUMBER = 3
|
|
30
|
+
LAST_SEQUENCE = 4
|
|
31
|
+
COMPACT_POINTER = 5
|
|
32
|
+
DELETED_FILE = 6
|
|
33
|
+
NEW_FILE = 7
|
|
34
|
+
# 8 was used for large value refs
|
|
35
|
+
PREV_LOG_NUMBER = 9
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class LogFilePhysicalRecordType(enum.IntEnum):
|
|
39
|
+
"""Log file physical record types."""
|
|
40
|
+
FULL = 1
|
|
41
|
+
FIRST = 2
|
|
42
|
+
MIDDLE = 3
|
|
43
|
+
LAST = 4
|
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright 2024 Google LLC
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""Parser for LevelDB Manifest files."""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from typing import Generator, Optional
|
|
20
|
+
|
|
21
|
+
from dfindexeddb import errors
|
|
22
|
+
from dfindexeddb import utils
|
|
23
|
+
from dfindexeddb.leveldb import definitions
|
|
24
|
+
from dfindexeddb.leveldb import log
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class InternalKey:
|
|
30
|
+
"""An InternalKey.
|
|
31
|
+
|
|
32
|
+
Attributes:
|
|
33
|
+
offset: the offset.
|
|
34
|
+
user_key: the user key.
|
|
35
|
+
sequence_number: the sequence number.
|
|
36
|
+
key_type: the key type.
|
|
37
|
+
"""
|
|
38
|
+
offset: int
|
|
39
|
+
user_key: bytes = field(repr=False)
|
|
40
|
+
sequence_number: int
|
|
41
|
+
key_type: int
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def FromDecoder(
|
|
45
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> InternalKey:
|
|
46
|
+
"""Decodes a InternalKey from the current position of a LevelDBDecoder.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
decoder: the LevelDBDecoder.
|
|
50
|
+
base_offset: the base offset.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
The InternalKey instance.
|
|
54
|
+
"""
|
|
55
|
+
offset, slice_bytes = decoder.DecodeLengthPrefixedSlice()
|
|
56
|
+
|
|
57
|
+
if len(slice_bytes) < definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH:
|
|
58
|
+
raise errors.ParserError('Insufficient bytes to parse InternalKey')
|
|
59
|
+
|
|
60
|
+
user_key = slice_bytes[:-definitions.SEQUENCE_LENGTH]
|
|
61
|
+
sequence_number = int.from_bytes(
|
|
62
|
+
slice_bytes[-definitions.SEQUENCE_LENGTH:],
|
|
63
|
+
byteorder='little',
|
|
64
|
+
signed=False)
|
|
65
|
+
key_type = slice_bytes[-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
|
|
66
|
+
|
|
67
|
+
return cls(
|
|
68
|
+
offset=base_offset + offset,
|
|
69
|
+
user_key=user_key,
|
|
70
|
+
sequence_number=sequence_number,
|
|
71
|
+
key_type=key_type)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class NewFile(utils.FromDecoderMixin):
|
|
76
|
+
"""A NewFile.
|
|
77
|
+
|
|
78
|
+
Attributes:
|
|
79
|
+
offset: the offset.
|
|
80
|
+
level: the level.
|
|
81
|
+
number: the number.
|
|
82
|
+
file_size: the file size.
|
|
83
|
+
smallest: the smallest internal key.
|
|
84
|
+
largest: the largest internal key.
|
|
85
|
+
"""
|
|
86
|
+
offset: int
|
|
87
|
+
level: int
|
|
88
|
+
number: int
|
|
89
|
+
file_size: int
|
|
90
|
+
smallest: InternalKey
|
|
91
|
+
largest: InternalKey
|
|
92
|
+
|
|
93
|
+
@classmethod
|
|
94
|
+
def FromDecoder(
|
|
95
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> NewFile:
|
|
96
|
+
"""Decodes a NewFile from the current position of a LevelDBDecoder.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
decoder: the LevelDBDecoder.
|
|
100
|
+
base_offset: the base offset.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
The NewFile instance.
|
|
104
|
+
"""
|
|
105
|
+
offset, level = decoder.DecodeUint32Varint()
|
|
106
|
+
_, number = decoder.DecodeUint64Varint()
|
|
107
|
+
_, file_size = decoder.DecodeUint64Varint()
|
|
108
|
+
smallest = InternalKey.FromDecoder(decoder, base_offset=base_offset)
|
|
109
|
+
largest = InternalKey.FromDecoder(decoder, base_offset=base_offset)
|
|
110
|
+
|
|
111
|
+
return cls(
|
|
112
|
+
offset=offset + base_offset,
|
|
113
|
+
level=level,
|
|
114
|
+
number=number,
|
|
115
|
+
file_size=file_size,
|
|
116
|
+
smallest=smallest,
|
|
117
|
+
largest=largest)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@dataclass
|
|
121
|
+
class CompactPointer(utils.FromDecoderMixin):
|
|
122
|
+
"""A NewFile.
|
|
123
|
+
|
|
124
|
+
Attributes:
|
|
125
|
+
offset: the offset.
|
|
126
|
+
level: the level.
|
|
127
|
+
key: the key bytes.
|
|
128
|
+
"""
|
|
129
|
+
offset: int
|
|
130
|
+
level: int
|
|
131
|
+
key: bytes = field(repr=False)
|
|
132
|
+
|
|
133
|
+
@classmethod
|
|
134
|
+
def FromDecoder(
|
|
135
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
136
|
+
) -> CompactPointer:
|
|
137
|
+
"""Decodes a CompactPointer from the current position of a LevelDBDecoder.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
decoder: the LevelDBDecoder.
|
|
141
|
+
base_offset: the base offset.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
The CompactPointer instance.
|
|
145
|
+
"""
|
|
146
|
+
offset, level = decoder.DecodeUint32Varint()
|
|
147
|
+
_, key = decoder.DecodeLengthPrefixedSlice()
|
|
148
|
+
return cls(offset=base_offset + offset, level=level, key=key)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@dataclass
|
|
152
|
+
class DeletedFile(utils.FromDecoderMixin):
|
|
153
|
+
"""A DeletedFile.
|
|
154
|
+
|
|
155
|
+
Attributes:
|
|
156
|
+
offset: the offset.
|
|
157
|
+
level: the level.
|
|
158
|
+
number: the number.
|
|
159
|
+
"""
|
|
160
|
+
offset: int
|
|
161
|
+
level: int
|
|
162
|
+
number: int
|
|
163
|
+
|
|
164
|
+
@classmethod
|
|
165
|
+
def FromDecoder(
|
|
166
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> DeletedFile:
|
|
167
|
+
"""Decodes a DeletedFile from the current position of a LevelDBDecoder.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
decoder: the LevelDBDecoder.
|
|
171
|
+
base_offset: the base offset.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
The DeletedFile instance.
|
|
175
|
+
"""
|
|
176
|
+
offset, level = decoder.DecodeUint32Varint()
|
|
177
|
+
_, number = decoder.DecodeUint64Varint()
|
|
178
|
+
return cls(offset=base_offset + offset, level=level, number=number)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
@dataclass
|
|
182
|
+
class VersionEdit(utils.FromDecoderMixin):
|
|
183
|
+
"""A VersionEdit is recorded in a LevelDB descriptor/manifest file.
|
|
184
|
+
|
|
185
|
+
Attributes:
|
|
186
|
+
offset: the offset where the VersionEdit was parsed.
|
|
187
|
+
comparator: the comparator.
|
|
188
|
+
log_number: the log number.
|
|
189
|
+
prev_log_number: the previous log number.
|
|
190
|
+
next_file_number: the next file number.
|
|
191
|
+
last_sequence: the last sequence.
|
|
192
|
+
compact_pointers: the list of CompactPointers.
|
|
193
|
+
deleted_files: the list of DeletedFiles.
|
|
194
|
+
new_files: the list of NewFiles.
|
|
195
|
+
"""
|
|
196
|
+
offset: int
|
|
197
|
+
comparator: Optional[bytes] = None
|
|
198
|
+
log_number: Optional[int] = None
|
|
199
|
+
prev_log_number: Optional[int] = None
|
|
200
|
+
next_file_number: Optional[int] = None
|
|
201
|
+
last_sequence: Optional[int] = None
|
|
202
|
+
compact_pointers: list[CompactPointer] = field(default_factory=list)
|
|
203
|
+
deleted_files: list[DeletedFile] = field(default_factory=list)
|
|
204
|
+
new_files: list[NewFile] = field(default_factory=list)
|
|
205
|
+
|
|
206
|
+
@classmethod
|
|
207
|
+
def FromDecoder(
|
|
208
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> VersionEdit:
|
|
209
|
+
"""Decodes a VersionEdit from the current position of a LevelDBDecoder.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
decoder: the LevelDBDecoder.
|
|
213
|
+
base_offset: the base offset.
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
The VersionEdit instance.
|
|
217
|
+
|
|
218
|
+
Raises:
|
|
219
|
+
ParserError if an invalid VersionEditTag is parsed.
|
|
220
|
+
"""
|
|
221
|
+
offset, tag_byte = decoder.DecodeUint32Varint()
|
|
222
|
+
version_edit = cls(offset=base_offset + offset)
|
|
223
|
+
|
|
224
|
+
while tag_byte:
|
|
225
|
+
try:
|
|
226
|
+
tag = definitions.VersionEditTags(tag_byte)
|
|
227
|
+
except TypeError as error:
|
|
228
|
+
raise errors.ParserError(
|
|
229
|
+
f'Invalid VersionEditTag at offset {offset}') from error
|
|
230
|
+
|
|
231
|
+
if tag == definitions.VersionEditTags.COMPARATOR:
|
|
232
|
+
_, version_edit.comparator = decoder.DecodeLengthPrefixedSlice()
|
|
233
|
+
elif tag == definitions.VersionEditTags.LOG_NUMBER:
|
|
234
|
+
_, version_edit.log_number = decoder.DecodeUint64Varint()
|
|
235
|
+
elif tag == definitions.VersionEditTags.PREV_LOG_NUMBER:
|
|
236
|
+
_, version_edit.prev_log_number = decoder.DecodeUint64Varint()
|
|
237
|
+
elif tag == definitions.VersionEditTags.NEXT_FILE_NUMBER:
|
|
238
|
+
_, version_edit.next_file_number = decoder.DecodeUint64Varint()
|
|
239
|
+
elif tag == definitions.VersionEditTags.LAST_SEQUENCE:
|
|
240
|
+
_, version_edit.last_sequence = decoder.DecodeUint64Varint()
|
|
241
|
+
elif tag == definitions.VersionEditTags.COMPACT_POINTER:
|
|
242
|
+
compact_pointer = CompactPointer.FromDecoder(
|
|
243
|
+
decoder=decoder, base_offset=base_offset + offset)
|
|
244
|
+
version_edit.compact_pointers.append(compact_pointer)
|
|
245
|
+
elif tag == definitions.VersionEditTags.DELETED_FILE:
|
|
246
|
+
deleted_file = DeletedFile.FromDecoder(
|
|
247
|
+
decoder=decoder, base_offset=base_offset + offset)
|
|
248
|
+
version_edit.deleted_files.append(deleted_file)
|
|
249
|
+
elif tag == definitions.VersionEditTags.NEW_FILE:
|
|
250
|
+
file_metadata = NewFile.FromDecoder(
|
|
251
|
+
decoder=decoder, base_offset=base_offset + offset)
|
|
252
|
+
version_edit.new_files.append(file_metadata)
|
|
253
|
+
|
|
254
|
+
if decoder.NumRemainingBytes() == 0:
|
|
255
|
+
break
|
|
256
|
+
|
|
257
|
+
offset, tag_byte = decoder.DecodeUint32Varint()
|
|
258
|
+
|
|
259
|
+
return version_edit
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
class FileReader:
|
|
263
|
+
"""A Descriptor file reader.
|
|
264
|
+
|
|
265
|
+
A DescriptorFileReader provides read-only sequential iteration of serialized
|
|
266
|
+
structures in a leveldb Descriptor file. These structures include:
|
|
267
|
+
* blocks (log.Block)
|
|
268
|
+
* records (log.PhysicalRecords)
|
|
269
|
+
* version edits (VersionEdit)
|
|
270
|
+
"""
|
|
271
|
+
def __init__(self, filename: str):
|
|
272
|
+
"""Initializes the Descriptor a.k.a. MANIFEST file.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
filename: the Descriptor filename (e.g. MANIFEST-000001)
|
|
276
|
+
"""
|
|
277
|
+
self.filename = filename
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def GetBlocks(self) -> Generator[log.Block, None, None]:
|
|
281
|
+
"""Returns an iterator of Block instances.
|
|
282
|
+
|
|
283
|
+
A Descriptor file is composed of one or more blocks.
|
|
284
|
+
|
|
285
|
+
Yields:
|
|
286
|
+
Block
|
|
287
|
+
"""
|
|
288
|
+
with open(self.filename, 'rb') as fh:
|
|
289
|
+
block = log.Block.FromStream(fh)
|
|
290
|
+
while block:
|
|
291
|
+
yield block
|
|
292
|
+
block = log.Block.FromStream(fh)
|
|
293
|
+
|
|
294
|
+
def GetPhysicalRecords(self) -> Generator[log.PhysicalRecord, None, None]:
|
|
295
|
+
"""Returns an iterator of PhysicalRecord instances.
|
|
296
|
+
|
|
297
|
+
A block is composed of one or more physical records.
|
|
298
|
+
|
|
299
|
+
Yields:
|
|
300
|
+
PhysicalRecord
|
|
301
|
+
"""
|
|
302
|
+
for block in self.GetBlocks():
|
|
303
|
+
yield from block.GetPhysicalRecords()
|
|
304
|
+
|
|
305
|
+
def GetVersionEdits(self) -> Generator[VersionEdit, None, None]:
|
|
306
|
+
"""Returns an iterator of VersionEdit instances.
|
|
307
|
+
|
|
308
|
+
Depending on the VersionEdit size, it can be spread across one or
|
|
309
|
+
more physical records.
|
|
310
|
+
|
|
311
|
+
Yields:
|
|
312
|
+
VersionEdit
|
|
313
|
+
"""
|
|
314
|
+
buffer = bytearray()
|
|
315
|
+
for physical_record in self.GetPhysicalRecords():
|
|
316
|
+
if (physical_record.record_type ==
|
|
317
|
+
definitions.LogFilePhysicalRecordType.FULL):
|
|
318
|
+
buffer = physical_record.contents
|
|
319
|
+
offset = physical_record.contents_offset + physical_record.base_offset
|
|
320
|
+
version_edit = VersionEdit.FromBytes(buffer, base_offset=offset)
|
|
321
|
+
yield version_edit
|
|
322
|
+
buffer = bytearray()
|
|
323
|
+
elif (physical_record.record_type ==
|
|
324
|
+
definitions.LogFilePhysicalRecordType.FIRST):
|
|
325
|
+
offset = physical_record.contents_offset + physical_record.base_offset
|
|
326
|
+
buffer = bytearray(physical_record.contents)
|
|
327
|
+
elif (physical_record.record_type ==
|
|
328
|
+
definitions.LogFilePhysicalRecordType.MIDDLE):
|
|
329
|
+
buffer.extend(bytearray(physical_record.contents))
|
|
330
|
+
elif (physical_record.record_type ==
|
|
331
|
+
definitions.LogFilePhysicalRecordType.LAST):
|
|
332
|
+
buffer.extend(bytearray(physical_record.contents))
|
|
333
|
+
version_edit = VersionEdit.FromBytes(buffer, base_offset=offset)
|
|
334
|
+
yield version_edit
|
|
335
|
+
buffer = bytearray()
|
dfindexeddb/leveldb/ldb.py
CHANGED
|
@@ -24,10 +24,11 @@ import snappy
|
|
|
24
24
|
import zstd
|
|
25
25
|
|
|
26
26
|
from dfindexeddb import utils
|
|
27
|
+
from dfindexeddb.leveldb import definitions
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
@dataclass
|
|
30
|
-
class
|
|
31
|
+
class KeyValueRecord:
|
|
31
32
|
"""A leveldb table key-value record.
|
|
32
33
|
|
|
33
34
|
Attributes:
|
|
@@ -43,14 +44,10 @@ class LdbKeyValueRecord:
|
|
|
43
44
|
sequence_number: int
|
|
44
45
|
type: int
|
|
45
46
|
|
|
46
|
-
PACKED_SEQUENCE_AND_TYPE_LENGTH = 8
|
|
47
|
-
SEQUENCE_LENGTH = 7
|
|
48
|
-
TYPE_LENGTH = 1
|
|
49
|
-
|
|
50
47
|
@classmethod
|
|
51
48
|
def FromDecoder(
|
|
52
49
|
cls, decoder: utils.LevelDBDecoder, block_offset: int, shared_key: bytes
|
|
53
|
-
) -> Tuple[
|
|
50
|
+
) -> Tuple[KeyValueRecord, bytes]:
|
|
54
51
|
"""Decodes a ldb key value record.
|
|
55
52
|
|
|
56
53
|
Args:
|
|
@@ -59,7 +56,7 @@ class LdbKeyValueRecord:
|
|
|
59
56
|
shared_key: the shared key bytes.
|
|
60
57
|
|
|
61
58
|
Returns:
|
|
62
|
-
A tuple of the parsed
|
|
59
|
+
A tuple of the parsed KeyValueRecord and the updated shared key bytes.
|
|
63
60
|
"""
|
|
64
61
|
offset, shared_bytes = decoder.DecodeUint32Varint()
|
|
65
62
|
_, unshared_bytes = decoder.DecodeUint32Varint()
|
|
@@ -68,17 +65,17 @@ class LdbKeyValueRecord:
|
|
|
68
65
|
_, value = decoder.ReadBytes(value_length)
|
|
69
66
|
|
|
70
67
|
shared_key = shared_key[:shared_bytes] + key_delta
|
|
71
|
-
key = shared_key[:-
|
|
68
|
+
key = shared_key[:-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
|
|
72
69
|
sequence_number = int.from_bytes(
|
|
73
|
-
key[-
|
|
74
|
-
key_type = shared_key[-
|
|
70
|
+
key[-definitions.SEQUENCE_LENGTH:], byteorder='little', signed=False)
|
|
71
|
+
key_type = shared_key[-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
|
|
75
72
|
|
|
76
73
|
return cls(offset + block_offset, key, value, sequence_number,
|
|
77
74
|
key_type), shared_key
|
|
78
75
|
|
|
79
76
|
|
|
80
77
|
@dataclass
|
|
81
|
-
class
|
|
78
|
+
class Block:
|
|
82
79
|
"""A leveldb table block.
|
|
83
80
|
|
|
84
81
|
Attributes:
|
|
@@ -111,11 +108,11 @@ class LdbBlock:
|
|
|
111
108
|
return zstd.decompress(self.data)
|
|
112
109
|
return self.data
|
|
113
110
|
|
|
114
|
-
def GetRecords(self) -> Iterable[
|
|
111
|
+
def GetRecords(self) -> Iterable[KeyValueRecord]:
|
|
115
112
|
"""Returns an iterator over the key value records in the block.
|
|
116
113
|
|
|
117
114
|
Yields:
|
|
118
|
-
|
|
115
|
+
KeyValueRecords
|
|
119
116
|
"""
|
|
120
117
|
# get underlying block content, decompressing if required
|
|
121
118
|
buffer = self.GetBuffer()
|
|
@@ -135,7 +132,7 @@ class LdbBlock:
|
|
|
135
132
|
key = b''
|
|
136
133
|
|
|
137
134
|
while decoder.stream.tell() < restarts_offset:
|
|
138
|
-
key_value_record, key =
|
|
135
|
+
key_value_record, key = KeyValueRecord.FromDecoder(
|
|
139
136
|
decoder, self.block_offset, key)
|
|
140
137
|
yield key_value_record
|
|
141
138
|
|
|
@@ -145,7 +142,7 @@ class LdbBlock:
|
|
|
145
142
|
|
|
146
143
|
|
|
147
144
|
@dataclass
|
|
148
|
-
class BlockHandle:
|
|
145
|
+
class BlockHandle(utils.FromDecoderMixin):
|
|
149
146
|
"""A handle to a block in the ldb file.
|
|
150
147
|
|
|
151
148
|
Attributes:
|
|
@@ -159,14 +156,14 @@ class BlockHandle:
|
|
|
159
156
|
|
|
160
157
|
BLOCK_TRAILER_SIZE = 5
|
|
161
158
|
|
|
162
|
-
def Load(self, stream: BinaryIO) ->
|
|
159
|
+
def Load(self, stream: BinaryIO) -> Block:
|
|
163
160
|
"""Loads the block data.
|
|
164
161
|
|
|
165
162
|
Args:
|
|
166
163
|
stream: the binary stream of the ldb file.
|
|
167
164
|
|
|
168
165
|
Returns:
|
|
169
|
-
a
|
|
166
|
+
a Block.
|
|
170
167
|
|
|
171
168
|
Raises:
|
|
172
169
|
ValueError: if it could not read all of the block or block footer.
|
|
@@ -180,32 +177,35 @@ class BlockHandle:
|
|
|
180
177
|
if len(footer) != self.BLOCK_TRAILER_SIZE:
|
|
181
178
|
raise ValueError('Could not read all of the block footer')
|
|
182
179
|
|
|
183
|
-
return
|
|
180
|
+
return Block(self.offset, self.block_offset, self.length, data, footer)
|
|
184
181
|
|
|
185
182
|
@classmethod
|
|
186
|
-
def
|
|
187
|
-
|
|
183
|
+
def FromDecoder(
|
|
184
|
+
cls: BlockHandle,
|
|
185
|
+
decoder: utils.LevelDBDecoder,
|
|
186
|
+
base_offset: int = 0
|
|
187
|
+
) -> BlockHandle:
|
|
188
|
+
"""Decodes a BlockHandle from the current position of a LevelDBDecoder.
|
|
188
189
|
|
|
189
190
|
Args:
|
|
190
|
-
|
|
191
|
+
decoder: the LevelDBDecoder.
|
|
191
192
|
base_offset: the base offset.
|
|
192
193
|
|
|
193
194
|
Returns:
|
|
194
|
-
|
|
195
|
+
The BlockHandle instance.
|
|
195
196
|
"""
|
|
196
|
-
decoder = utils.LevelDBDecoder(stream)
|
|
197
197
|
offset, block_offset = decoder.DecodeUint64Varint()
|
|
198
198
|
_, length = decoder.DecodeUint64Varint()
|
|
199
199
|
return cls(offset + base_offset, block_offset, length)
|
|
200
200
|
|
|
201
201
|
|
|
202
|
-
class
|
|
202
|
+
class FileReader:
|
|
203
203
|
"""A leveldb table (.ldb or .sst) file reader.
|
|
204
204
|
|
|
205
|
-
A
|
|
205
|
+
A Ldb FileReader provides read-only sequential iteration of serialized
|
|
206
206
|
structures in a leveldb ldb file. These structures include:
|
|
207
|
-
* blocks (
|
|
208
|
-
* records (
|
|
207
|
+
* blocks (Block)
|
|
208
|
+
* records (KeyValueRecord)
|
|
209
209
|
"""
|
|
210
210
|
|
|
211
211
|
FOOTER_SIZE = 48
|
|
@@ -234,11 +234,11 @@ class LdbFileReader:
|
|
|
234
234
|
# self.meta_block = meta_handle.load(fh) # TODO: support meta blocks
|
|
235
235
|
self.index_block = index_handle.Load(fh)
|
|
236
236
|
|
|
237
|
-
def GetBlocks(self) -> Iterable[
|
|
238
|
-
"""Returns an iterator of
|
|
237
|
+
def GetBlocks(self) -> Iterable[Block]:
|
|
238
|
+
"""Returns an iterator of Blocks.
|
|
239
239
|
|
|
240
240
|
Yields:
|
|
241
|
-
|
|
241
|
+
Block.
|
|
242
242
|
"""
|
|
243
243
|
with open(self.filename, 'rb') as fh:
|
|
244
244
|
for key_value_record in self.index_block.GetRecords():
|
|
@@ -247,11 +247,11 @@ class LdbFileReader:
|
|
|
247
247
|
base_offset=key_value_record.offset)
|
|
248
248
|
yield block_handle.Load(fh)
|
|
249
249
|
|
|
250
|
-
def GetKeyValueRecords(self) -> Iterable[
|
|
251
|
-
"""Returns an iterator of
|
|
250
|
+
def GetKeyValueRecords(self) -> Iterable[KeyValueRecord]:
|
|
251
|
+
"""Returns an iterator of KeyValueRecords.
|
|
252
252
|
|
|
253
253
|
Yields:
|
|
254
|
-
|
|
254
|
+
KeyValueRecords.
|
|
255
255
|
"""
|
|
256
256
|
for block in self.GetBlocks():
|
|
257
257
|
yield from block.GetRecords()
|
dfindexeddb/leveldb/log.py
CHANGED
|
@@ -16,19 +16,11 @@
|
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
18
|
from dataclasses import dataclass, field
|
|
19
|
-
from enum import IntEnum
|
|
20
19
|
import io
|
|
21
20
|
from typing import BinaryIO, Generator, Iterable, Optional
|
|
22
21
|
|
|
23
22
|
from dfindexeddb import utils
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class LogFilePhysicalRecordType(IntEnum):
|
|
27
|
-
"""LevelDB log file physical record types."""
|
|
28
|
-
FULL = 1
|
|
29
|
-
FIRST = 2
|
|
30
|
-
MIDDLE = 3
|
|
31
|
-
LAST = 4
|
|
23
|
+
from dfindexeddb.leveldb import definitions
|
|
32
24
|
|
|
33
25
|
|
|
34
26
|
@dataclass
|
|
@@ -38,28 +30,35 @@ class ParsedInternalKey:
|
|
|
38
30
|
Attributes:
|
|
39
31
|
offset: the offset of the record.
|
|
40
32
|
type: the record type.
|
|
33
|
+
sequence_number: the sequence number (inferred from the relative location
|
|
34
|
+
the ParsedInternalKey in a WriteBatch.)
|
|
41
35
|
key: the record key.
|
|
42
36
|
value: the record value.
|
|
43
37
|
"""
|
|
44
38
|
offset: int
|
|
45
39
|
type: int
|
|
40
|
+
sequence_number: int
|
|
46
41
|
key: bytes
|
|
47
42
|
value: bytes
|
|
43
|
+
__type__: str = 'ParsedInternalKey'
|
|
48
44
|
|
|
49
45
|
@classmethod
|
|
50
46
|
def FromDecoder(
|
|
51
47
|
cls,
|
|
52
48
|
decoder: utils.LevelDBDecoder,
|
|
53
|
-
base_offset: int = 0
|
|
49
|
+
base_offset: int = 0,
|
|
50
|
+
sequence_number: int = 0,
|
|
54
51
|
) -> ParsedInternalKey:
|
|
55
52
|
"""Decodes an internal key value record.
|
|
56
53
|
|
|
57
54
|
Args:
|
|
58
55
|
decoder: the leveldb decoder.
|
|
59
|
-
base_offset: the base offset for the parsed key value record.
|
|
56
|
+
base_offset: the base offset for the parsed internal key value record.
|
|
57
|
+
sequence_number: the sequence number for the parsed internal key value
|
|
58
|
+
record.
|
|
60
59
|
|
|
61
60
|
Returns:
|
|
62
|
-
|
|
61
|
+
A ParsedInternalKey
|
|
63
62
|
|
|
64
63
|
Raises:
|
|
65
64
|
ValueError: if there is an invalid record type encountered.
|
|
@@ -72,15 +71,20 @@ class ParsedInternalKey:
|
|
|
72
71
|
value = b''
|
|
73
72
|
else:
|
|
74
73
|
raise ValueError(f'Invalid record type {record_type}')
|
|
75
|
-
return cls(
|
|
74
|
+
return cls(
|
|
75
|
+
offset=base_offset + offset,
|
|
76
|
+
type=record_type,
|
|
77
|
+
key=key,
|
|
78
|
+
value=value,
|
|
79
|
+
sequence_number=sequence_number)
|
|
76
80
|
|
|
77
81
|
|
|
78
82
|
@dataclass
|
|
79
|
-
class WriteBatch:
|
|
83
|
+
class WriteBatch(utils.FromDecoderMixin):
|
|
80
84
|
"""A write batch from a leveldb log file.
|
|
81
85
|
|
|
82
86
|
Attributes:
|
|
83
|
-
offset: the batch offset.
|
|
87
|
+
offset: the write batch offset.
|
|
84
88
|
sequence_number: the batch sequence number.
|
|
85
89
|
count: the number of ParsedInternalKey in the batch.
|
|
86
90
|
records: the ParsedInternalKey parsed from the batch.
|
|
@@ -91,46 +95,38 @@ class WriteBatch:
|
|
|
91
95
|
records: Iterable[ParsedInternalKey] = field(repr=False)
|
|
92
96
|
|
|
93
97
|
@classmethod
|
|
94
|
-
def
|
|
95
|
-
cls,
|
|
98
|
+
def FromDecoder(
|
|
99
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
96
100
|
) -> WriteBatch:
|
|
97
101
|
"""Parses a WriteBatch from a binary stream.
|
|
98
102
|
|
|
99
103
|
Args:
|
|
100
|
-
|
|
104
|
+
decoder: the LevelDBDecoder
|
|
101
105
|
base_offset: the base offset of the Block from which the data is
|
|
102
106
|
read from.
|
|
103
107
|
|
|
104
108
|
Returns:
|
|
105
109
|
A WriteBatch.
|
|
106
110
|
"""
|
|
107
|
-
|
|
108
|
-
_, sequence_number = decoder.DecodeUint64()
|
|
111
|
+
offset, sequence_number = decoder.DecodeUint64()
|
|
109
112
|
_, count = decoder.DecodeUint32()
|
|
110
113
|
|
|
111
114
|
records = []
|
|
112
|
-
for
|
|
113
|
-
record = ParsedInternalKey.FromDecoder(
|
|
115
|
+
for relative_sequence_number in range(count):
|
|
116
|
+
record = ParsedInternalKey.FromDecoder(
|
|
117
|
+
decoder, base_offset + offset,
|
|
118
|
+
relative_sequence_number + sequence_number
|
|
119
|
+
)
|
|
114
120
|
records.append(record)
|
|
115
|
-
return cls(
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
Args:
|
|
122
|
-
data: the bytes to be parsed.
|
|
123
|
-
base_offset: the base offset of the Block from which the data is
|
|
124
|
-
read from.
|
|
125
|
-
|
|
126
|
-
Returns:
|
|
127
|
-
A WriteBatch.
|
|
128
|
-
"""
|
|
129
|
-
return cls.FromStream(io.BytesIO(data), base_offset)
|
|
121
|
+
return cls(
|
|
122
|
+
offset=base_offset + offset,
|
|
123
|
+
sequence_number=sequence_number,
|
|
124
|
+
count=count,
|
|
125
|
+
records=records)
|
|
130
126
|
|
|
131
127
|
|
|
132
128
|
@dataclass
|
|
133
|
-
class PhysicalRecord:
|
|
129
|
+
class PhysicalRecord(utils.FromDecoderMixin):
|
|
134
130
|
"""A physical record from a leveldb log file.
|
|
135
131
|
|
|
136
132
|
Attributes:
|
|
@@ -145,27 +141,30 @@ class PhysicalRecord:
|
|
|
145
141
|
offset: int
|
|
146
142
|
checksum: int
|
|
147
143
|
length: int
|
|
148
|
-
record_type: LogFilePhysicalRecordType
|
|
144
|
+
record_type: definitions.LogFilePhysicalRecordType
|
|
149
145
|
contents: bytes = field(repr=False)
|
|
150
146
|
contents_offset: int
|
|
151
147
|
|
|
148
|
+
PHYSICAL_HEADER_LENGTH = 7
|
|
149
|
+
|
|
152
150
|
@classmethod
|
|
153
|
-
def
|
|
154
|
-
cls,
|
|
155
|
-
|
|
151
|
+
def FromDecoder(
|
|
152
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
153
|
+
) -> PhysicalRecord:
|
|
154
|
+
"""Decodes a PhysicalRecord from the current position of a LevelDBDecoder.
|
|
156
155
|
|
|
157
156
|
Args:
|
|
158
|
-
|
|
157
|
+
decoder: the LevelDBDecoder.
|
|
159
158
|
base_offset: the base offset of the WriteBatch from which the data is
|
|
160
159
|
read from.
|
|
161
160
|
|
|
162
161
|
Returns:
|
|
163
162
|
A PhysicalRecord.
|
|
164
163
|
"""
|
|
165
|
-
decoder = utils.StreamDecoder(stream)
|
|
166
164
|
offset, checksum = decoder.DecodeUint32()
|
|
167
165
|
_, length = decoder.DecodeUint16()
|
|
168
|
-
record_type = LogFilePhysicalRecordType(
|
|
166
|
+
record_type = definitions.LogFilePhysicalRecordType(
|
|
167
|
+
decoder.DecodeUint8()[1])
|
|
169
168
|
contents_offset, contents = decoder.ReadBytes(length)
|
|
170
169
|
return cls(
|
|
171
170
|
base_offset=base_offset,
|
|
@@ -199,7 +198,7 @@ class Block:
|
|
|
199
198
|
buffer = io.BytesIO(self.data)
|
|
200
199
|
buffer_length = len(self.data)
|
|
201
200
|
|
|
202
|
-
while buffer.tell() < buffer_length:
|
|
201
|
+
while buffer.tell() + PhysicalRecord.PHYSICAL_HEADER_LENGTH < buffer_length:
|
|
203
202
|
yield PhysicalRecord.FromStream(buffer, base_offset=self.offset)
|
|
204
203
|
|
|
205
204
|
@classmethod
|
|
@@ -219,10 +218,10 @@ class Block:
|
|
|
219
218
|
return cls(offset, data)
|
|
220
219
|
|
|
221
220
|
|
|
222
|
-
class
|
|
221
|
+
class FileReader:
|
|
223
222
|
"""A leveldb log file reader.
|
|
224
223
|
|
|
225
|
-
A
|
|
224
|
+
A Log FileReader provides read-only sequential iteration of serialized
|
|
226
225
|
structures in a leveldb logfile. These structures include:
|
|
227
226
|
* blocks (Block)
|
|
228
227
|
* phyiscal records (PhysicalRecord)
|
|
@@ -250,11 +249,10 @@ class LogFileReader:
|
|
|
250
249
|
a Block
|
|
251
250
|
"""
|
|
252
251
|
with open(self.filename, 'rb') as fh:
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
if not block:
|
|
256
|
-
break
|
|
252
|
+
block = Block.FromStream(fh)
|
|
253
|
+
while block:
|
|
257
254
|
yield block
|
|
255
|
+
block = Block.FromStream(fh)
|
|
258
256
|
|
|
259
257
|
def GetPhysicalRecords(self) -> Generator[PhysicalRecord, None, None]:
|
|
260
258
|
"""Returns an iterator of PhysicalRecord instances.
|
|
@@ -278,17 +276,21 @@ class LogFileReader:
|
|
|
278
276
|
"""
|
|
279
277
|
buffer = bytearray()
|
|
280
278
|
for physical_record in self.GetPhysicalRecords():
|
|
281
|
-
if
|
|
279
|
+
if(physical_record.record_type ==
|
|
280
|
+
definitions.LogFilePhysicalRecordType.FULL):
|
|
282
281
|
buffer = physical_record.contents
|
|
283
282
|
offset = physical_record.contents_offset + physical_record.base_offset
|
|
284
283
|
yield WriteBatch.FromBytes(buffer, base_offset=offset)
|
|
285
284
|
buffer = bytearray()
|
|
286
|
-
elif physical_record.record_type
|
|
285
|
+
elif (physical_record.record_type
|
|
286
|
+
== definitions.LogFilePhysicalRecordType.FIRST):
|
|
287
287
|
offset = physical_record.contents_offset + physical_record.base_offset
|
|
288
288
|
buffer = bytearray(physical_record.contents)
|
|
289
|
-
elif physical_record.record_type ==
|
|
289
|
+
elif (physical_record.record_type ==
|
|
290
|
+
definitions.LogFilePhysicalRecordType.MIDDLE):
|
|
290
291
|
buffer.extend(bytearray(physical_record.contents))
|
|
291
|
-
elif physical_record.record_type ==
|
|
292
|
+
elif (physical_record.record_type ==
|
|
293
|
+
definitions.LogFilePhysicalRecordType.LAST):
|
|
292
294
|
buffer.extend(bytearray(physical_record.contents))
|
|
293
295
|
yield WriteBatch.FromBytes(buffer, base_offset=offset)
|
|
294
296
|
buffer = bytearray()
|
dfindexeddb/utils.py
CHANGED
|
@@ -229,24 +229,30 @@ class LevelDBDecoder(StreamDecoder):
|
|
|
229
229
|
f'Odd number of bytes encountered at offset {offset}')
|
|
230
230
|
return offset, buffer.decode('utf-16-be')
|
|
231
231
|
|
|
232
|
+
def DecodeLengthPrefixedSlice(self) -> Tuple[int, bytes]:
|
|
233
|
+
"""Returns a tuple of the offset of decoding and the byte 'slice'."""
|
|
234
|
+
offset, num_bytes = self.DecodeUint32Varint()
|
|
235
|
+
_, blob = self.ReadBytes(num_bytes)
|
|
236
|
+
return offset, blob
|
|
237
|
+
|
|
232
238
|
def DecodeBlobWithLength(self) -> Tuple[int, bytes]:
|
|
233
239
|
"""Returns a tuple of a the offset of decoding and the binary blob."""
|
|
234
240
|
offset, num_bytes = self.DecodeUint64Varint()
|
|
235
241
|
_, blob = self.ReadBytes(num_bytes)
|
|
236
242
|
return offset, blob
|
|
237
243
|
|
|
238
|
-
def DecodeStringWithLength(self) -> Tuple[int, str]:
|
|
244
|
+
def DecodeStringWithLength(self, encoding='utf-16-be') -> Tuple[int, str]:
|
|
239
245
|
"""Returns a tuple of the offset of decoding and the string value."""
|
|
240
246
|
offset, length = self.DecodeUint64Varint()
|
|
241
247
|
_, buffer = self.ReadBytes(length*2)
|
|
242
|
-
return offset, buffer.decode(
|
|
248
|
+
return offset, buffer.decode(encoding=encoding)
|
|
243
249
|
|
|
244
250
|
|
|
245
251
|
T = TypeVar('T')
|
|
246
252
|
|
|
247
253
|
|
|
248
|
-
class
|
|
249
|
-
"""A mixin for
|
|
254
|
+
class FromDecoderMixin:
|
|
255
|
+
"""A mixin for parsing dataclass attributes using a LevelDBDecoder."""
|
|
250
256
|
|
|
251
257
|
@classmethod
|
|
252
258
|
def FromDecoder(
|
|
@@ -278,7 +284,7 @@ class FromStreamMixin: # TODO: refactor leveldb parsers
|
|
|
278
284
|
The class instance.
|
|
279
285
|
"""
|
|
280
286
|
decoder = LevelDBDecoder(stream)
|
|
281
|
-
return cls.FromDecoder(decoder, base_offset)
|
|
287
|
+
return cls.FromDecoder(decoder=decoder, base_offset=base_offset)
|
|
282
288
|
|
|
283
289
|
@classmethod
|
|
284
290
|
def FromBytes(
|
dfindexeddb/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dfindexeddb
|
|
3
|
-
Version:
|
|
3
|
+
Version: 20240305
|
|
4
4
|
Summary: dfindexeddb is an experimental Python tool for performing digital forensic analysis of IndexedDB and leveldb files.
|
|
5
5
|
Author-email: Syd Pleno <sydp@google.com>
|
|
6
6
|
Maintainer-email: dfIndexeddb Developers <dfindexeddb-dev@googlegroups.com>
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
dfindexeddb/__init__.py,sha256=KPYL9__l8od6_OyDfGRTgaJ6iy_fqIgZ-dS2S-e3Rac,599
|
|
2
|
+
dfindexeddb/cli.py,sha256=fm7iGA5k1VVIc4jT80w8yQ2kb6h9AucByikwNFNuG3A,5593
|
|
3
|
+
dfindexeddb/errors.py,sha256=PNpwyf_lrPc4TE77oAakX3mu5D_YcP3f80wq8Y1LkvY,749
|
|
4
|
+
dfindexeddb/utils.py,sha256=fp4NaJhJR1LI4THH3PPo_dGyuMkQ6sOtk-l3e9lIdLY,10349
|
|
5
|
+
dfindexeddb/version.py,sha256=KH2nfTUTTcPB19XDYtCGJUOAturiQSDckkQvGgYWdYg,750
|
|
6
|
+
dfindexeddb/indexeddb/__init__.py,sha256=kExXSVBCTKCD5BZJkdMfUMqGksH-DMJxP2_lI0gq-BE,575
|
|
7
|
+
dfindexeddb/indexeddb/blink.py,sha256=MblpYfv-ByG7n_fjYKu2EUhpfVJdUveoW4oSAg5T4tY,3534
|
|
8
|
+
dfindexeddb/indexeddb/chromium.py,sha256=6aePZ7quQzQgHi2KYtlmrjeWZyURGdUFYKIPH80cEAs,44696
|
|
9
|
+
dfindexeddb/indexeddb/definitions.py,sha256=yline3y3gmZx6s-dwjpPDNs5HO4zT6KZqPWQfEsHDoM,7413
|
|
10
|
+
dfindexeddb/indexeddb/v8.py,sha256=ldqpc9T1kG7BOdjnHjQ5hNO9OCXZ3_Zd6vRSpC-NrEA,21893
|
|
11
|
+
dfindexeddb/leveldb/__init__.py,sha256=KPYL9__l8od6_OyDfGRTgaJ6iy_fqIgZ-dS2S-e3Rac,599
|
|
12
|
+
dfindexeddb/leveldb/definitions.py,sha256=d34YBXNRJZklS-KLKOKiwf_ojxevQedYaoJoNGaeM5g,1109
|
|
13
|
+
dfindexeddb/leveldb/descriptor.py,sha256=tfJ0Smk4apUuVZIxEWDTPPjNaTqVtRndA1YsZdHlceI,10394
|
|
14
|
+
dfindexeddb/leveldb/ldb.py,sha256=dOZpzh9WHL3qwfTyIJFn6-OaApTNnqTHeJgXYwOM1-c,7931
|
|
15
|
+
dfindexeddb/leveldb/log.py,sha256=5m6OADTM7BP3AWbamlqAwTn_UGcG2UXZz1YpCq1o4Gk,8858
|
|
16
|
+
dfindexeddb-20240305.dist-info/AUTHORS,sha256=QbvjbAom57fpEkekkCVFUj0B9KUMGraR510aUMBC-PE,286
|
|
17
|
+
dfindexeddb-20240305.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
|
|
18
|
+
dfindexeddb-20240305.dist-info/METADATA,sha256=_iqPyDYZsvVd-ITp--C8yyfwB11gs_D1JIIKFDAezW4,15933
|
|
19
|
+
dfindexeddb-20240305.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
20
|
+
dfindexeddb-20240305.dist-info/entry_points.txt,sha256=UsfPLLhTiVAAtZ8Rq3ZR7JNFGMuHqJy-tugGWonQWtc,52
|
|
21
|
+
dfindexeddb-20240305.dist-info/top_level.txt,sha256=X9OTaub1c8S_JJ7g-f8JdkhhdiZ4x1j4eni1hdUCwE4,12
|
|
22
|
+
dfindexeddb-20240305.dist-info/RECORD,,
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
dfindexeddb/__init__.py,sha256=KPYL9__l8od6_OyDfGRTgaJ6iy_fqIgZ-dS2S-e3Rac,599
|
|
2
|
-
dfindexeddb/cli.py,sha256=LD2-BwmXC3qFcJwgP09QDFxU3HOGtsg0Kbtyx-hAqzA,4525
|
|
3
|
-
dfindexeddb/errors.py,sha256=PNpwyf_lrPc4TE77oAakX3mu5D_YcP3f80wq8Y1LkvY,749
|
|
4
|
-
dfindexeddb/utils.py,sha256=g9iiGRX4DB1wFBSBHa6b9lg7JzAdE0SN0DrdB2aS_Co,10091
|
|
5
|
-
dfindexeddb/version.py,sha256=XwHKYiT0CeLWo90AaJfOYHD1mEEgIlUUSB6ot_rU8wc,750
|
|
6
|
-
dfindexeddb/indexeddb/__init__.py,sha256=kExXSVBCTKCD5BZJkdMfUMqGksH-DMJxP2_lI0gq-BE,575
|
|
7
|
-
dfindexeddb/indexeddb/blink.py,sha256=MblpYfv-ByG7n_fjYKu2EUhpfVJdUveoW4oSAg5T4tY,3534
|
|
8
|
-
dfindexeddb/indexeddb/chromium.py,sha256=Anw6QIU7PrsxpUW7qxrUXRb5vBRcxozhv3mHov7Ti8k,43984
|
|
9
|
-
dfindexeddb/indexeddb/definitions.py,sha256=yline3y3gmZx6s-dwjpPDNs5HO4zT6KZqPWQfEsHDoM,7413
|
|
10
|
-
dfindexeddb/indexeddb/v8.py,sha256=ldqpc9T1kG7BOdjnHjQ5hNO9OCXZ3_Zd6vRSpC-NrEA,21893
|
|
11
|
-
dfindexeddb/leveldb/__init__.py,sha256=KPYL9__l8od6_OyDfGRTgaJ6iy_fqIgZ-dS2S-e3Rac,599
|
|
12
|
-
dfindexeddb/leveldb/ldb.py,sha256=uShhXjQe4Sz3dn54IXbGxRtE6D8RNpu1NDy5Zb0P9LA,7927
|
|
13
|
-
dfindexeddb/leveldb/log.py,sha256=cyMfjDz5a6gfGb5NonxC1Y72OmHYBWzYK8UMVzP_umw,8532
|
|
14
|
-
dfindexeddb-20240229.dist-info/AUTHORS,sha256=QbvjbAom57fpEkekkCVFUj0B9KUMGraR510aUMBC-PE,286
|
|
15
|
-
dfindexeddb-20240229.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
|
|
16
|
-
dfindexeddb-20240229.dist-info/METADATA,sha256=ILzTLaRO96ALuHL8d72tt3a3shliEGzGHZC54n5wPpc,15933
|
|
17
|
-
dfindexeddb-20240229.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
18
|
-
dfindexeddb-20240229.dist-info/entry_points.txt,sha256=UsfPLLhTiVAAtZ8Rq3ZR7JNFGMuHqJy-tugGWonQWtc,52
|
|
19
|
-
dfindexeddb-20240229.dist-info/top_level.txt,sha256=X9OTaub1c8S_JJ7g-f8JdkhhdiZ4x1j4eni1hdUCwE4,12
|
|
20
|
-
dfindexeddb-20240229.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|