dfindexeddb 20240305__py3-none-any.whl → 20240331__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dfindexeddb/indexeddb/cli.py +112 -0
- dfindexeddb/indexeddb/utils.py +0 -0
- dfindexeddb/leveldb/cli.py +260 -0
- dfindexeddb/leveldb/definitions.py +16 -0
- dfindexeddb/leveldb/descriptor.py +61 -14
- dfindexeddb/leveldb/ldb.py +20 -24
- dfindexeddb/leveldb/log.py +25 -18
- dfindexeddb/leveldb/record.py +102 -0
- dfindexeddb/leveldb/utils.py +116 -0
- dfindexeddb/utils.py +5 -46
- dfindexeddb/version.py +1 -1
- {dfindexeddb-20240305.dist-info → dfindexeddb-20240331.dist-info}/METADATA +74 -30
- dfindexeddb-20240331.dist-info/RECORD +22 -0
- {dfindexeddb-20240305.dist-info → dfindexeddb-20240331.dist-info}/WHEEL +1 -1
- dfindexeddb-20240331.dist-info/entry_points.txt +3 -0
- dfindexeddb/cli.py +0 -180
- dfindexeddb/indexeddb/blink.py +0 -115
- dfindexeddb/indexeddb/chromium.py +0 -1360
- dfindexeddb/indexeddb/definitions.py +0 -306
- dfindexeddb/indexeddb/v8.py +0 -642
- dfindexeddb-20240305.dist-info/RECORD +0 -22
- dfindexeddb-20240305.dist-info/entry_points.txt +0 -2
- {dfindexeddb-20240305.dist-info → dfindexeddb-20240331.dist-info}/AUTHORS +0 -0
- {dfindexeddb-20240305.dist-info → dfindexeddb-20240331.dist-info}/LICENSE +0 -0
- {dfindexeddb-20240305.dist-info → dfindexeddb-20240331.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright 2024 Google LLC
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""A CLI tool for dfindexeddb."""
|
|
16
|
+
import argparse
|
|
17
|
+
import dataclasses
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
import json
|
|
20
|
+
import pathlib
|
|
21
|
+
import sys
|
|
22
|
+
import traceback
|
|
23
|
+
|
|
24
|
+
from dfindexeddb import errors
|
|
25
|
+
from dfindexeddb import version
|
|
26
|
+
from dfindexeddb.leveldb import record as leveldb_record
|
|
27
|
+
from dfindexeddb.indexeddb.chromium import record as chromium_record
|
|
28
|
+
from dfindexeddb.indexeddb.chromium import v8
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
_VALID_PRINTABLE_CHARACTERS = (
|
|
32
|
+
' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' +
|
|
33
|
+
'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~.')
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class Encoder(json.JSONEncoder):
|
|
37
|
+
"""A JSON encoder class for dfindexeddb fields."""
|
|
38
|
+
def default(self, o):
|
|
39
|
+
if dataclasses.is_dataclass(o):
|
|
40
|
+
o_dict = dataclasses.asdict(o)
|
|
41
|
+
return o_dict
|
|
42
|
+
if isinstance(o, bytes):
|
|
43
|
+
out = []
|
|
44
|
+
for x in o:
|
|
45
|
+
if chr(x) not in _VALID_PRINTABLE_CHARACTERS:
|
|
46
|
+
out.append(f'\\x{x:02X}')
|
|
47
|
+
else:
|
|
48
|
+
out.append(chr(x))
|
|
49
|
+
return ''.join(out)
|
|
50
|
+
if isinstance(o, datetime):
|
|
51
|
+
return o.isoformat()
|
|
52
|
+
if isinstance(o, v8.Undefined):
|
|
53
|
+
return "<undefined>"
|
|
54
|
+
if isinstance(o, v8.Null):
|
|
55
|
+
return "<null>"
|
|
56
|
+
if isinstance(o, set):
|
|
57
|
+
return list(o)
|
|
58
|
+
if isinstance(o, v8.RegExp):
|
|
59
|
+
return str(o)
|
|
60
|
+
return json.JSONEncoder.default(self, o)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _Output(structure, output):
|
|
64
|
+
"""Helper method to output parsed structure to stdout."""
|
|
65
|
+
if output == 'json':
|
|
66
|
+
print(json.dumps(structure, indent=2, cls=Encoder))
|
|
67
|
+
elif output == 'jsonl':
|
|
68
|
+
print(json.dumps(structure, cls=Encoder))
|
|
69
|
+
elif output == 'repr':
|
|
70
|
+
print(structure)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def IndexeddbCommand(args):
|
|
74
|
+
"""The CLI for processing a log/ldb file as indexeddb."""
|
|
75
|
+
for db_record in leveldb_record.LevelDBRecord.FromDir(args.source):
|
|
76
|
+
record = db_record.record
|
|
77
|
+
try:
|
|
78
|
+
db_record.record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
|
|
79
|
+
record)
|
|
80
|
+
except(
|
|
81
|
+
errors.ParserError,
|
|
82
|
+
errors.DecoderError,
|
|
83
|
+
NotImplementedError) as err:
|
|
84
|
+
print(
|
|
85
|
+
(f'Error parsing blink value: {err} for {record.__class__.__name__} '
|
|
86
|
+
f'at offset {record.offset} in {db_record.path}'), file=sys.stderr)
|
|
87
|
+
print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
|
|
88
|
+
_Output(db_record, output=args.output)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def App():
|
|
92
|
+
"""The CLI app entrypoint for dfindexeddb."""
|
|
93
|
+
parser = argparse.ArgumentParser(
|
|
94
|
+
prog='dfindexeddb',
|
|
95
|
+
description='A cli tool for parsing indexeddb files',
|
|
96
|
+
epilog=f'Version {version.GetVersion()}')
|
|
97
|
+
parser.add_argument(
|
|
98
|
+
'-s', '--source', required=True, type=pathlib.Path,
|
|
99
|
+
help='The source leveldb folder')
|
|
100
|
+
parser.add_argument(
|
|
101
|
+
'-o',
|
|
102
|
+
'--output',
|
|
103
|
+
choices=[
|
|
104
|
+
'json',
|
|
105
|
+
'jsonl',
|
|
106
|
+
'repr'],
|
|
107
|
+
default='json',
|
|
108
|
+
help='Output format. Default is json')
|
|
109
|
+
parser.set_defaults(func=IndexeddbCommand)
|
|
110
|
+
|
|
111
|
+
args = parser.parse_args()
|
|
112
|
+
args.func(args)
|
|
File without changes
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright 2024 Google LLC
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""A CLI tool for leveldb files."""
|
|
16
|
+
import argparse
|
|
17
|
+
import dataclasses
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
import json
|
|
20
|
+
import pathlib
|
|
21
|
+
|
|
22
|
+
from dfindexeddb import version
|
|
23
|
+
from dfindexeddb.leveldb import descriptor
|
|
24
|
+
from dfindexeddb.leveldb import ldb
|
|
25
|
+
from dfindexeddb.leveldb import log
|
|
26
|
+
from dfindexeddb.leveldb import record
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
_VALID_PRINTABLE_CHARACTERS = (
|
|
30
|
+
' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' +
|
|
31
|
+
'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~.')
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Encoder(json.JSONEncoder):
|
|
35
|
+
"""A JSON encoder class for dfleveldb fields."""
|
|
36
|
+
|
|
37
|
+
def default(self, o):
|
|
38
|
+
"""Returns a serializable object for o."""
|
|
39
|
+
if dataclasses.is_dataclass(o):
|
|
40
|
+
o_dict = dataclasses.asdict(o)
|
|
41
|
+
return o_dict
|
|
42
|
+
if isinstance(o, bytes):
|
|
43
|
+
out = []
|
|
44
|
+
for x in o:
|
|
45
|
+
if chr(x) not in _VALID_PRINTABLE_CHARACTERS:
|
|
46
|
+
out.append(f'\\x{x:02X}')
|
|
47
|
+
else:
|
|
48
|
+
out.append(chr(x))
|
|
49
|
+
return ''.join(out)
|
|
50
|
+
if isinstance(o, datetime):
|
|
51
|
+
return o.isoformat()
|
|
52
|
+
if isinstance(o, set):
|
|
53
|
+
return list(o)
|
|
54
|
+
return json.JSONEncoder.default(self, o)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _Output(structure, output):
|
|
58
|
+
"""Helper method to output parsed structure to stdout."""
|
|
59
|
+
if output == 'json':
|
|
60
|
+
print(json.dumps(structure, indent=2, cls=Encoder))
|
|
61
|
+
elif output == 'jsonl':
|
|
62
|
+
print(json.dumps(structure, cls=Encoder))
|
|
63
|
+
elif output == 'repr':
|
|
64
|
+
print(structure)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def DbCommand(args):
|
|
68
|
+
"""The CLI for processing leveldb folders."""
|
|
69
|
+
for rec in record.LevelDBRecord.FromDir(args.source):
|
|
70
|
+
_Output(rec, output=args.output)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def LdbCommand(args):
|
|
74
|
+
"""The CLI for processing ldb files."""
|
|
75
|
+
ldb_file = ldb.FileReader(args.source)
|
|
76
|
+
|
|
77
|
+
if args.structure_type == 'blocks':
|
|
78
|
+
# Prints block information.
|
|
79
|
+
for block in ldb_file.GetBlocks():
|
|
80
|
+
_Output(block, output=args.output)
|
|
81
|
+
|
|
82
|
+
elif args.structure_type == 'records' or not args.structure_type:
|
|
83
|
+
# Prints key value record information.
|
|
84
|
+
for key_value_record in ldb_file.GetKeyValueRecords():
|
|
85
|
+
_Output(key_value_record, output=args.output)
|
|
86
|
+
|
|
87
|
+
else:
|
|
88
|
+
print(f'{args.structure_type} is not supported for ldb files.')
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def LogCommand(args):
|
|
92
|
+
"""The CLI for processing log files."""
|
|
93
|
+
log_file = log.FileReader(args.source)
|
|
94
|
+
|
|
95
|
+
if args.structure_type == 'blocks':
|
|
96
|
+
# Prints block information.
|
|
97
|
+
for block in log_file.GetBlocks():
|
|
98
|
+
_Output(block, output=args.output)
|
|
99
|
+
|
|
100
|
+
elif args.structure_type == 'physical_records':
|
|
101
|
+
# Prints log file physical record information.
|
|
102
|
+
for log_file_record in log_file.GetPhysicalRecords():
|
|
103
|
+
_Output(log_file_record, output=args.output)
|
|
104
|
+
|
|
105
|
+
elif args.structure_type == 'write_batches':
|
|
106
|
+
# Prints log file batch information.
|
|
107
|
+
for batch in log_file.GetWriteBatches():
|
|
108
|
+
_Output(batch, output=args.output)
|
|
109
|
+
|
|
110
|
+
elif (args.structure_type in ('parsed_internal_key', 'records')
|
|
111
|
+
or not args.structure_type):
|
|
112
|
+
# Prints key value record information.
|
|
113
|
+
for internal_key_record in log_file.GetParsedInternalKeys():
|
|
114
|
+
_Output(internal_key_record, output=args.output)
|
|
115
|
+
|
|
116
|
+
else:
|
|
117
|
+
print(f'{args.structure_type} is not supported for log files.')
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def DescriptorCommand(args):
|
|
121
|
+
"""The CLI for processing descriptor (MANIFEST) files."""
|
|
122
|
+
manifest_file = descriptor.FileReader(args.source)
|
|
123
|
+
|
|
124
|
+
if args.version_history:
|
|
125
|
+
for levels in manifest_file.GetVersions():
|
|
126
|
+
_Output(levels, output=args.output)
|
|
127
|
+
|
|
128
|
+
elif args.structure_type == 'blocks':
|
|
129
|
+
# Prints block information.
|
|
130
|
+
for block in manifest_file.GetBlocks():
|
|
131
|
+
_Output(block, output=args.output)
|
|
132
|
+
|
|
133
|
+
elif args.structure_type == 'physical_records':
|
|
134
|
+
# Prints log file physical record information.
|
|
135
|
+
for log_file_record in manifest_file.GetPhysicalRecords():
|
|
136
|
+
_Output(log_file_record, output=args.output)
|
|
137
|
+
|
|
138
|
+
elif (args.structure_type == 'versionedit'
|
|
139
|
+
or not args.structure_type):
|
|
140
|
+
for version_edit in manifest_file.GetVersionEdits():
|
|
141
|
+
_Output(version_edit, output=args.output)
|
|
142
|
+
|
|
143
|
+
else:
|
|
144
|
+
print(f'{args.structure_type} is not supported for descriptor files.')
|
|
145
|
+
|
|
146
|
+
def App():
|
|
147
|
+
"""The CLI app entrypoint for parsing leveldb files."""
|
|
148
|
+
parser = argparse.ArgumentParser(
|
|
149
|
+
prog='dfleveldb',
|
|
150
|
+
description='A cli tool for parsing leveldb files',
|
|
151
|
+
epilog=f'Version {version.GetVersion()}')
|
|
152
|
+
|
|
153
|
+
subparsers = parser.add_subparsers()
|
|
154
|
+
|
|
155
|
+
parser_db = subparsers.add_parser(
|
|
156
|
+
'db', help='Parse a directory as leveldb.')
|
|
157
|
+
parser_db.add_argument(
|
|
158
|
+
'-s', '--source',
|
|
159
|
+
required=True,
|
|
160
|
+
type=pathlib.Path,
|
|
161
|
+
help='The source leveldb directory')
|
|
162
|
+
parser_db.add_argument(
|
|
163
|
+
'-o',
|
|
164
|
+
'--output',
|
|
165
|
+
choices=[
|
|
166
|
+
'json',
|
|
167
|
+
'jsonl',
|
|
168
|
+
'repr'],
|
|
169
|
+
default='json',
|
|
170
|
+
help='Output format. Default is json')
|
|
171
|
+
|
|
172
|
+
parser_log = subparsers.add_parser(
|
|
173
|
+
'log', help='Parse a leveldb log file.')
|
|
174
|
+
parser_log.add_argument(
|
|
175
|
+
'-s', '--source',
|
|
176
|
+
required=True,
|
|
177
|
+
type=pathlib.Path,
|
|
178
|
+
help='The source leveldb file')
|
|
179
|
+
parser_log.add_argument(
|
|
180
|
+
'-o',
|
|
181
|
+
'--output',
|
|
182
|
+
choices=[
|
|
183
|
+
'json',
|
|
184
|
+
'jsonl',
|
|
185
|
+
'repr'],
|
|
186
|
+
default='json',
|
|
187
|
+
help='Output format. Default is json')
|
|
188
|
+
parser_log.add_argument(
|
|
189
|
+
'-t',
|
|
190
|
+
'--structure_type',
|
|
191
|
+
choices=[
|
|
192
|
+
'blocks',
|
|
193
|
+
'physical_records',
|
|
194
|
+
'write_batches',
|
|
195
|
+
'parsed_internal_key'],
|
|
196
|
+
help='Parses the specified structure. Default is parsed_internal_key.')
|
|
197
|
+
parser_log.set_defaults(func=LogCommand)
|
|
198
|
+
|
|
199
|
+
parser_ldb = subparsers.add_parser(
|
|
200
|
+
'ldb', help='Parse a leveldb table (.ldb) file.')
|
|
201
|
+
parser_ldb.add_argument(
|
|
202
|
+
'-s', '--source',
|
|
203
|
+
required=True,
|
|
204
|
+
type=pathlib.Path,
|
|
205
|
+
help='The source leveldb file')
|
|
206
|
+
parser_ldb.add_argument(
|
|
207
|
+
'-o',
|
|
208
|
+
'--output',
|
|
209
|
+
choices=[
|
|
210
|
+
'json',
|
|
211
|
+
'jsonl',
|
|
212
|
+
'repr'],
|
|
213
|
+
default='json',
|
|
214
|
+
help='Output format. Default is json')
|
|
215
|
+
parser_ldb.add_argument(
|
|
216
|
+
'-t',
|
|
217
|
+
'--structure_type',
|
|
218
|
+
choices=[
|
|
219
|
+
'blocks',
|
|
220
|
+
'records'],
|
|
221
|
+
help='Parses the specified structure. Default is records.')
|
|
222
|
+
parser_ldb.set_defaults(func=LdbCommand)
|
|
223
|
+
|
|
224
|
+
parser_descriptor = subparsers.add_parser(
|
|
225
|
+
'descriptor', help='Parse a leveldb descriptor (MANIFEST) file.')
|
|
226
|
+
parser_descriptor.add_argument(
|
|
227
|
+
'-s', '--source',
|
|
228
|
+
required=True,
|
|
229
|
+
type=pathlib.Path,
|
|
230
|
+
help='The source leveldb file')
|
|
231
|
+
parser_descriptor.add_argument(
|
|
232
|
+
'-o',
|
|
233
|
+
'--output',
|
|
234
|
+
choices=[
|
|
235
|
+
'json',
|
|
236
|
+
'jsonl',
|
|
237
|
+
'repr'],
|
|
238
|
+
default='json',
|
|
239
|
+
help='Output format. Default is json')
|
|
240
|
+
db_group = parser_descriptor.add_mutually_exclusive_group()
|
|
241
|
+
db_group.add_argument(
|
|
242
|
+
'-t',
|
|
243
|
+
'--structure_type',
|
|
244
|
+
choices=[
|
|
245
|
+
'blocks', 'physical_records', 'versionedit'],
|
|
246
|
+
help='Parses the specified structure. Default is versionedit.')
|
|
247
|
+
db_group.add_argument(
|
|
248
|
+
'-v',
|
|
249
|
+
'--version_history',
|
|
250
|
+
action='store_true',
|
|
251
|
+
help='Parses the leveldb version history.'
|
|
252
|
+
)
|
|
253
|
+
parser_descriptor.set_defaults(func=DescriptorCommand)
|
|
254
|
+
|
|
255
|
+
args = parser.parse_args()
|
|
256
|
+
|
|
257
|
+
if not hasattr(args, 'func'):
|
|
258
|
+
parser.print_usage()
|
|
259
|
+
else:
|
|
260
|
+
args.func(args)
|
|
@@ -16,12 +16,22 @@
|
|
|
16
16
|
|
|
17
17
|
import enum
|
|
18
18
|
|
|
19
|
+
BLOCK_RESTART_ENTRY_LENGTH = 4
|
|
20
|
+
BLOCK_TRAILER_SIZE = 5
|
|
21
|
+
TABLE_FOOTER_SIZE = 48
|
|
22
|
+
TABLE_MAGIC = b'\x57\xfb\x80\x8b\x24\x75\x47\xdb'
|
|
19
23
|
|
|
20
24
|
PACKED_SEQUENCE_AND_TYPE_LENGTH = 8
|
|
21
25
|
SEQUENCE_LENGTH = 7
|
|
22
26
|
TYPE_LENGTH = 1
|
|
23
27
|
|
|
24
28
|
|
|
29
|
+
class BlockCompressionType(enum.IntEnum):
|
|
30
|
+
"""Block compression types."""
|
|
31
|
+
SNAPPY = 1
|
|
32
|
+
ZSTD = 2
|
|
33
|
+
|
|
34
|
+
|
|
25
35
|
class VersionEditTags(enum.IntEnum):
|
|
26
36
|
"""VersionEdit tags."""
|
|
27
37
|
COMPARATOR = 1
|
|
@@ -41,3 +51,9 @@ class LogFilePhysicalRecordType(enum.IntEnum):
|
|
|
41
51
|
FIRST = 2
|
|
42
52
|
MIDDLE = 3
|
|
43
53
|
LAST = 4
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class InternalRecordType(enum.IntEnum):
|
|
57
|
+
"""Internal record types."""
|
|
58
|
+
DELETED = 0
|
|
59
|
+
VALUE = 1
|
|
@@ -12,17 +12,16 @@
|
|
|
12
12
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
|
-
"""Parser for LevelDB
|
|
15
|
+
"""Parser for LevelDB Descriptor (MANIFEST) files."""
|
|
16
16
|
from __future__ import annotations
|
|
17
|
-
|
|
17
|
+
from collections import defaultdict
|
|
18
18
|
from dataclasses import dataclass, field
|
|
19
19
|
from typing import Generator, Optional
|
|
20
20
|
|
|
21
21
|
from dfindexeddb import errors
|
|
22
|
-
from dfindexeddb import utils
|
|
23
22
|
from dfindexeddb.leveldb import definitions
|
|
24
23
|
from dfindexeddb.leveldb import log
|
|
25
|
-
|
|
24
|
+
from dfindexeddb.leveldb import utils
|
|
26
25
|
|
|
27
26
|
|
|
28
27
|
@dataclass
|
|
@@ -36,14 +35,14 @@ class InternalKey:
|
|
|
36
35
|
key_type: the key type.
|
|
37
36
|
"""
|
|
38
37
|
offset: int
|
|
39
|
-
user_key: bytes
|
|
38
|
+
user_key: bytes
|
|
40
39
|
sequence_number: int
|
|
41
40
|
key_type: int
|
|
42
41
|
|
|
43
42
|
@classmethod
|
|
44
43
|
def FromDecoder(
|
|
45
44
|
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> InternalKey:
|
|
46
|
-
"""Decodes
|
|
45
|
+
"""Decodes an InternalKey from the current position of a LevelDBDecoder.
|
|
47
46
|
|
|
48
47
|
Args:
|
|
49
48
|
decoder: the LevelDBDecoder.
|
|
@@ -78,7 +77,7 @@ class NewFile(utils.FromDecoderMixin):
|
|
|
78
77
|
Attributes:
|
|
79
78
|
offset: the offset.
|
|
80
79
|
level: the level.
|
|
81
|
-
number: the number.
|
|
80
|
+
number: the file number.
|
|
82
81
|
file_size: the file size.
|
|
83
82
|
smallest: the smallest internal key.
|
|
84
83
|
largest: the largest internal key.
|
|
@@ -119,7 +118,7 @@ class NewFile(utils.FromDecoderMixin):
|
|
|
119
118
|
|
|
120
119
|
@dataclass
|
|
121
120
|
class CompactPointer(utils.FromDecoderMixin):
|
|
122
|
-
"""A
|
|
121
|
+
"""A CompactPointer.
|
|
123
122
|
|
|
124
123
|
Attributes:
|
|
125
124
|
offset: the offset.
|
|
@@ -128,7 +127,7 @@ class CompactPointer(utils.FromDecoderMixin):
|
|
|
128
127
|
"""
|
|
129
128
|
offset: int
|
|
130
129
|
level: int
|
|
131
|
-
key: bytes
|
|
130
|
+
key: bytes
|
|
132
131
|
|
|
133
132
|
@classmethod
|
|
134
133
|
def FromDecoder(
|
|
@@ -155,7 +154,7 @@ class DeletedFile(utils.FromDecoderMixin):
|
|
|
155
154
|
Attributes:
|
|
156
155
|
offset: the offset.
|
|
157
156
|
level: the level.
|
|
158
|
-
number: the number.
|
|
157
|
+
number: the file number.
|
|
159
158
|
"""
|
|
160
159
|
offset: int
|
|
161
160
|
level: int
|
|
@@ -259,13 +258,34 @@ class VersionEdit(utils.FromDecoderMixin):
|
|
|
259
258
|
return version_edit
|
|
260
259
|
|
|
261
260
|
|
|
261
|
+
@dataclass
|
|
262
|
+
class LevelDBVersion:
|
|
263
|
+
"""A LevelDBVersion.
|
|
264
|
+
|
|
265
|
+
A LevelDBVersion represents the current state of the table files and log file
|
|
266
|
+
that are currently "active". The current state is determined by the sequence
|
|
267
|
+
of VersionEdits that are parsed from a descriptor file.
|
|
268
|
+
|
|
269
|
+
Active files can contain overlapping keys in the current log file and the
|
|
270
|
+
"young" or 0-level.
|
|
271
|
+
|
|
272
|
+
"Deleted files" will typically no longer exist but may be forensically
|
|
273
|
+
recoverable.
|
|
274
|
+
"""
|
|
275
|
+
current_log: str
|
|
276
|
+
version_edit_offset: int
|
|
277
|
+
last_sequence: int
|
|
278
|
+
active_files: dict[int, dict[int, NewFile]]
|
|
279
|
+
deleted_files: dict[int, dict[int, DeletedFile]]
|
|
280
|
+
|
|
281
|
+
|
|
262
282
|
class FileReader:
|
|
263
|
-
"""A Descriptor
|
|
264
|
-
|
|
283
|
+
"""A reader for Descriptor files.
|
|
284
|
+
|
|
265
285
|
A DescriptorFileReader provides read-only sequential iteration of serialized
|
|
266
286
|
structures in a leveldb Descriptor file. These structures include:
|
|
267
|
-
* blocks (
|
|
268
|
-
* records (
|
|
287
|
+
* blocks (Block)
|
|
288
|
+
* records (PhysicalRecord)
|
|
269
289
|
* version edits (VersionEdit)
|
|
270
290
|
"""
|
|
271
291
|
def __init__(self, filename: str):
|
|
@@ -333,3 +353,30 @@ class FileReader:
|
|
|
333
353
|
version_edit = VersionEdit.FromBytes(buffer, base_offset=offset)
|
|
334
354
|
yield version_edit
|
|
335
355
|
buffer = bytearray()
|
|
356
|
+
|
|
357
|
+
def GetVersions(self) -> Generator[LevelDBVersion, None, None]:
|
|
358
|
+
"""Returns an iterator of LevelDBVersion instances.
|
|
359
|
+
|
|
360
|
+
Yields:
|
|
361
|
+
LevelDBVersion
|
|
362
|
+
"""
|
|
363
|
+
active_files = defaultdict(dict)
|
|
364
|
+
deleted_files = defaultdict(set)
|
|
365
|
+
current_log = None
|
|
366
|
+
|
|
367
|
+
for version_edit in self.GetVersionEdits():
|
|
368
|
+
current_log = f'{version_edit.log_number:06d}.log'
|
|
369
|
+
|
|
370
|
+
for new_file in version_edit.new_files:
|
|
371
|
+
active_files[new_file.level][f'{new_file.number:06d}.ldb'] = new_file
|
|
372
|
+
|
|
373
|
+
for deleted_file in version_edit.deleted_files:
|
|
374
|
+
active_files[deleted_file.level].pop(f'{deleted_file.number:06d}.ldb')
|
|
375
|
+
deleted_files[deleted_file.level].add(f'{deleted_file.number:06d}.ldb')
|
|
376
|
+
|
|
377
|
+
yield LevelDBVersion(
|
|
378
|
+
current_log=current_log,
|
|
379
|
+
active_files=dict(active_files),
|
|
380
|
+
deleted_files=dict(deleted_files),
|
|
381
|
+
version_edit_offset=version_edit.offset,
|
|
382
|
+
last_sequence=version_edit.last_sequence)
|
dfindexeddb/leveldb/ldb.py
CHANGED
|
@@ -23,8 +23,8 @@ from typing import BinaryIO, Iterable, Tuple
|
|
|
23
23
|
import snappy
|
|
24
24
|
import zstd
|
|
25
25
|
|
|
26
|
-
from dfindexeddb import utils
|
|
27
26
|
from dfindexeddb.leveldb import definitions
|
|
27
|
+
from dfindexeddb.leveldb import utils
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
@dataclass
|
|
@@ -36,13 +36,13 @@ class KeyValueRecord:
|
|
|
36
36
|
key: the key of the record.
|
|
37
37
|
value: the value of the record.
|
|
38
38
|
sequence_number: the sequence number of the record.
|
|
39
|
-
|
|
39
|
+
record_type: the type of the record.
|
|
40
40
|
"""
|
|
41
41
|
offset: int
|
|
42
42
|
key: bytes
|
|
43
43
|
value: bytes
|
|
44
44
|
sequence_number: int
|
|
45
|
-
|
|
45
|
+
record_type: definitions.InternalRecordType
|
|
46
46
|
|
|
47
47
|
@classmethod
|
|
48
48
|
def FromDecoder(
|
|
@@ -69,9 +69,13 @@ class KeyValueRecord:
|
|
|
69
69
|
sequence_number = int.from_bytes(
|
|
70
70
|
key[-definitions.SEQUENCE_LENGTH:], byteorder='little', signed=False)
|
|
71
71
|
key_type = shared_key[-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
|
|
72
|
-
|
|
73
|
-
return cls(
|
|
74
|
-
|
|
72
|
+
record_type = definitions.InternalRecordType(key_type)
|
|
73
|
+
return cls(
|
|
74
|
+
offset=offset + block_offset,
|
|
75
|
+
key=key,
|
|
76
|
+
value=value,
|
|
77
|
+
sequence_number=sequence_number,
|
|
78
|
+
record_type=record_type), shared_key
|
|
75
79
|
|
|
76
80
|
|
|
77
81
|
@dataclass
|
|
@@ -88,17 +92,13 @@ class Block:
|
|
|
88
92
|
data: bytes = field(repr=False)
|
|
89
93
|
footer: bytes # 5 bytes = 1 byte compressed flag + 4 bytes checksum.
|
|
90
94
|
|
|
91
|
-
SNAPPY_COMPRESSED = 1
|
|
92
|
-
ZSTD_COMPRESSED = 2
|
|
93
|
-
RESTART_ENTRY_LENGTH = 4
|
|
94
|
-
|
|
95
95
|
def IsSnappyCompressed(self) -> bool:
|
|
96
96
|
"""Returns true if the block is snappy compressed."""
|
|
97
|
-
return self.footer[0] ==
|
|
97
|
+
return self.footer[0] == definitions.BlockCompressionType.SNAPPY
|
|
98
98
|
|
|
99
99
|
def IsZstdCompressed(self) -> bool:
|
|
100
100
|
"""Returns true if the block is zstd compressed."""
|
|
101
|
-
return self.footer[0] ==
|
|
101
|
+
return self.footer[0] == definitions.BlockCompressionType.ZSTD
|
|
102
102
|
|
|
103
103
|
def GetBuffer(self) -> bytes:
|
|
104
104
|
"""Returns the block buffer, decompressing if required."""
|
|
@@ -121,10 +121,11 @@ class Block:
|
|
|
121
121
|
# trailer of a block has the form:
|
|
122
122
|
# restarts: uint32[num_restarts]
|
|
123
123
|
# num_restarts: uint32
|
|
124
|
-
decoder.stream.seek(-
|
|
124
|
+
decoder.stream.seek(-definitions.BLOCK_RESTART_ENTRY_LENGTH, os.SEEK_END)
|
|
125
125
|
_, num_restarts = decoder.DecodeUint32()
|
|
126
126
|
restarts_offset = (
|
|
127
|
-
decoder.stream.tell()) - (
|
|
127
|
+
decoder.stream.tell()) - (
|
|
128
|
+
(num_restarts + 1) * definitions.BLOCK_RESTART_ENTRY_LENGTH)
|
|
128
129
|
|
|
129
130
|
decoder.stream.seek(restarts_offset)
|
|
130
131
|
_, offset = decoder.DecodeUint32()
|
|
@@ -154,8 +155,6 @@ class BlockHandle(utils.FromDecoderMixin):
|
|
|
154
155
|
block_offset: int
|
|
155
156
|
length: int
|
|
156
157
|
|
|
157
|
-
BLOCK_TRAILER_SIZE = 5
|
|
158
|
-
|
|
159
158
|
def Load(self, stream: BinaryIO) -> Block:
|
|
160
159
|
"""Loads the block data.
|
|
161
160
|
|
|
@@ -173,8 +172,8 @@ class BlockHandle(utils.FromDecoderMixin):
|
|
|
173
172
|
if len(data) != self.length:
|
|
174
173
|
raise ValueError('Could not read all of the block')
|
|
175
174
|
|
|
176
|
-
footer = stream.read(
|
|
177
|
-
if len(footer) !=
|
|
175
|
+
footer = stream.read(definitions.BLOCK_TRAILER_SIZE)
|
|
176
|
+
if len(footer) != definitions.BLOCK_TRAILER_SIZE:
|
|
178
177
|
raise ValueError('Could not read all of the block footer')
|
|
179
178
|
|
|
180
179
|
return Block(self.offset, self.block_offset, self.length, data, footer)
|
|
@@ -208,9 +207,6 @@ class FileReader:
|
|
|
208
207
|
* records (KeyValueRecord)
|
|
209
208
|
"""
|
|
210
209
|
|
|
211
|
-
FOOTER_SIZE = 48
|
|
212
|
-
MAGIC = b'\x57\xfb\x80\x8b\x24\x75\x47\xdb'
|
|
213
|
-
|
|
214
210
|
def __init__(self, filename: str):
|
|
215
211
|
"""Initializes the LogFile.
|
|
216
212
|
|
|
@@ -222,11 +218,11 @@ class FileReader:
|
|
|
222
218
|
"""
|
|
223
219
|
self.filename = filename
|
|
224
220
|
with open(self.filename, 'rb') as fh:
|
|
225
|
-
fh.seek(-len(
|
|
226
|
-
if fh.read(len(
|
|
221
|
+
fh.seek(-len(definitions.TABLE_MAGIC), os.SEEK_END)
|
|
222
|
+
if fh.read(len(definitions.TABLE_MAGIC)) != definitions.TABLE_MAGIC:
|
|
227
223
|
raise ValueError(f'Invalid magic number in {self.filename}')
|
|
228
224
|
|
|
229
|
-
fh.seek(-
|
|
225
|
+
fh.seek(-definitions.TABLE_FOOTER_SIZE, os.SEEK_END)
|
|
230
226
|
# meta_handle, need to read first due to variable integers
|
|
231
227
|
_ = BlockHandle.FromStream(fh)
|
|
232
228
|
index_handle = BlockHandle.FromStream(fh)
|