dfindexeddb 20240305__py3-none-any.whl → 20240324__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dfindexeddb/indexeddb/blink.py +2 -1
- dfindexeddb/indexeddb/chromium.py +4 -4
- dfindexeddb/indexeddb/cli.py +101 -0
- dfindexeddb/indexeddb/utils.py +0 -0
- dfindexeddb/leveldb/cli.py +217 -0
- dfindexeddb/leveldb/definitions.py +16 -0
- dfindexeddb/leveldb/descriptor.py +10 -11
- dfindexeddb/leveldb/ldb.py +20 -24
- dfindexeddb/leveldb/log.py +25 -18
- dfindexeddb/leveldb/record.py +102 -0
- dfindexeddb/leveldb/utils.py +116 -0
- dfindexeddb/utils.py +5 -46
- dfindexeddb/version.py +1 -1
- {dfindexeddb-20240305.dist-info → dfindexeddb-20240324.dist-info}/METADATA +46 -32
- dfindexeddb-20240324.dist-info/RECORD +26 -0
- {dfindexeddb-20240305.dist-info → dfindexeddb-20240324.dist-info}/WHEEL +1 -1
- dfindexeddb-20240324.dist-info/entry_points.txt +3 -0
- dfindexeddb/cli.py +0 -180
- dfindexeddb-20240305.dist-info/RECORD +0 -22
- dfindexeddb-20240305.dist-info/entry_points.txt +0 -2
- {dfindexeddb-20240305.dist-info → dfindexeddb-20240324.dist-info}/AUTHORS +0 -0
- {dfindexeddb-20240305.dist-info → dfindexeddb-20240324.dist-info}/LICENSE +0 -0
- {dfindexeddb-20240305.dist-info → dfindexeddb-20240324.dist-info}/top_level.txt +0 -0
dfindexeddb/indexeddb/blink.py
CHANGED
|
@@ -86,7 +86,8 @@ class V8ScriptValueDecoder:
|
|
|
86
86
|
NotImplementedError: when called.
|
|
87
87
|
"""
|
|
88
88
|
tag = self.ReadTag()
|
|
89
|
-
raise NotImplementedError(
|
|
89
|
+
raise NotImplementedError(
|
|
90
|
+
f'V8ScriptValueDecoder.ReadHostObject - {tag.name}')
|
|
90
91
|
|
|
91
92
|
def Deserialize(self) -> Any:
|
|
92
93
|
"""Deserializes a Blink SSV.
|
|
@@ -20,11 +20,11 @@ import io
|
|
|
20
20
|
from typing import Any, BinaryIO, Optional, Tuple, Type, TypeVar, Union
|
|
21
21
|
|
|
22
22
|
from dfindexeddb import errors
|
|
23
|
-
from dfindexeddb import utils
|
|
24
23
|
from dfindexeddb.indexeddb import blink
|
|
25
24
|
from dfindexeddb.indexeddb import definitions
|
|
26
25
|
from dfindexeddb.leveldb import ldb
|
|
27
26
|
from dfindexeddb.leveldb import log
|
|
27
|
+
from dfindexeddb.leveldb import utils
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
T = TypeVar('T')
|
|
@@ -570,7 +570,7 @@ class EarlistCompactionTimeKey(BaseIndexedDBKey):
|
|
|
570
570
|
class ScopesPrefixKey(BaseIndexedDBKey):
|
|
571
571
|
"""A scopes prefix IndexedDB key."""
|
|
572
572
|
|
|
573
|
-
def DecodeValue(self, decoder: utils.
|
|
573
|
+
def DecodeValue(self, decoder: utils.LevelDBDecoder) -> Optional[bytes]:
|
|
574
574
|
"""Decodes the scopes prefix value."""
|
|
575
575
|
if decoder.NumRemainingBytes:
|
|
576
576
|
return decoder.ReadBytes()[1]
|
|
@@ -578,7 +578,7 @@ class ScopesPrefixKey(BaseIndexedDBKey):
|
|
|
578
578
|
|
|
579
579
|
@classmethod
|
|
580
580
|
def FromDecoder(
|
|
581
|
-
cls, decoder: utils.
|
|
581
|
+
cls, decoder: utils.LevelDBDecoder, key_prefix: KeyPrefix,
|
|
582
582
|
base_offset: int = 0
|
|
583
583
|
) -> ScopesPrefixKey:
|
|
584
584
|
"""Decodes the scopes prefix key."""
|
|
@@ -1357,4 +1357,4 @@ class IndexedDBRecord:
|
|
|
1357
1357
|
value=idb_value,
|
|
1358
1358
|
sequence_number=record.sequence_number if hasattr(
|
|
1359
1359
|
record, 'sequence_number') else None,
|
|
1360
|
-
type=record.
|
|
1360
|
+
type=record.record_type)
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright 2024 Google LLC
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""A CLI tool for dfindexeddb."""
|
|
16
|
+
import argparse
|
|
17
|
+
import dataclasses
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
import json
|
|
20
|
+
import pathlib
|
|
21
|
+
import sys
|
|
22
|
+
import traceback
|
|
23
|
+
|
|
24
|
+
from dfindexeddb import errors
|
|
25
|
+
from dfindexeddb import version
|
|
26
|
+
from dfindexeddb.leveldb import record as leveldb_record
|
|
27
|
+
from dfindexeddb.indexeddb import chromium
|
|
28
|
+
from dfindexeddb.indexeddb import v8
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
_VALID_PRINTABLE_CHARACTERS = (
|
|
32
|
+
' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' +
|
|
33
|
+
'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~.')
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class Encoder(json.JSONEncoder):
|
|
37
|
+
"""A JSON encoder class for dfindexeddb fields."""
|
|
38
|
+
def default(self, o):
|
|
39
|
+
if dataclasses.is_dataclass(o):
|
|
40
|
+
o_dict = dataclasses.asdict(o)
|
|
41
|
+
return o_dict
|
|
42
|
+
if isinstance(o, bytes):
|
|
43
|
+
out = []
|
|
44
|
+
for x in o:
|
|
45
|
+
if chr(x) not in _VALID_PRINTABLE_CHARACTERS:
|
|
46
|
+
out.append(f'\\x{x:02X}')
|
|
47
|
+
else:
|
|
48
|
+
out.append(chr(x))
|
|
49
|
+
return ''.join(out)
|
|
50
|
+
if isinstance(o, datetime):
|
|
51
|
+
return o.isoformat()
|
|
52
|
+
if isinstance(o, v8.Undefined):
|
|
53
|
+
return "<undefined>"
|
|
54
|
+
if isinstance(o, v8.Null):
|
|
55
|
+
return "<null>"
|
|
56
|
+
if isinstance(o, set):
|
|
57
|
+
return list(o)
|
|
58
|
+
if isinstance(o, v8.RegExp):
|
|
59
|
+
return str(o)
|
|
60
|
+
return json.JSONEncoder.default(self, o)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _Output(structure, to_json=False):
|
|
64
|
+
"""Helper method to output parsed structure to stdout."""
|
|
65
|
+
if to_json:
|
|
66
|
+
print(json.dumps(structure, indent=2, cls=Encoder))
|
|
67
|
+
else:
|
|
68
|
+
print(structure)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def IndexeddbCommand(args):
|
|
72
|
+
"""The CLI for processing a log/ldb file as indexeddb."""
|
|
73
|
+
for db_record in leveldb_record.LevelDBRecord.FromDir(args.source):
|
|
74
|
+
record = db_record.record
|
|
75
|
+
try:
|
|
76
|
+
db_record.record = chromium.IndexedDBRecord.FromLevelDBRecord(record)
|
|
77
|
+
except(
|
|
78
|
+
errors.ParserError,
|
|
79
|
+
errors.DecoderError,
|
|
80
|
+
NotImplementedError) as err:
|
|
81
|
+
print(
|
|
82
|
+
(f'Error parsing blink value: {err} for {record.__class__.__name__} '
|
|
83
|
+
f'at offset {record.offset} in {db_record.path}'), file=sys.stderr)
|
|
84
|
+
print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
|
|
85
|
+
_Output(db_record, to_json=args.json)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def App():
|
|
89
|
+
"""The CLI app entrypoint for dfindexeddb."""
|
|
90
|
+
parser = argparse.ArgumentParser(
|
|
91
|
+
prog='dfindexeddb',
|
|
92
|
+
description='A cli tool for parsing indexeddb files',
|
|
93
|
+
epilog=f'Version {version.GetVersion()}')
|
|
94
|
+
parser.add_argument(
|
|
95
|
+
'-s', '--source', required=True, type=pathlib.Path,
|
|
96
|
+
help='The source leveldb folder')
|
|
97
|
+
parser.add_argument('--json', action='store_true', help='Output as JSON')
|
|
98
|
+
parser.set_defaults(func=IndexeddbCommand)
|
|
99
|
+
|
|
100
|
+
args = parser.parse_args()
|
|
101
|
+
args.func(args)
|
|
File without changes
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright 2024 Google LLC
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""A CLI tool for leveldb files."""
|
|
16
|
+
import argparse
|
|
17
|
+
import dataclasses
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
import json
|
|
20
|
+
import pathlib
|
|
21
|
+
|
|
22
|
+
from dfindexeddb import version
|
|
23
|
+
from dfindexeddb.leveldb import descriptor
|
|
24
|
+
from dfindexeddb.leveldb import ldb
|
|
25
|
+
from dfindexeddb.leveldb import log
|
|
26
|
+
from dfindexeddb.leveldb import record
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
_VALID_PRINTABLE_CHARACTERS = (
|
|
30
|
+
' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' +
|
|
31
|
+
'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~.')
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Encoder(json.JSONEncoder):
|
|
35
|
+
"""A JSON encoder class for dfleveldb fields."""
|
|
36
|
+
|
|
37
|
+
def default(self, o):
|
|
38
|
+
"""Returns a serializable object for o."""
|
|
39
|
+
if dataclasses.is_dataclass(o):
|
|
40
|
+
o_dict = dataclasses.asdict(o)
|
|
41
|
+
return o_dict
|
|
42
|
+
if isinstance(o, bytes):
|
|
43
|
+
out = []
|
|
44
|
+
for x in o:
|
|
45
|
+
if chr(x) not in _VALID_PRINTABLE_CHARACTERS:
|
|
46
|
+
out.append(f'\\x{x:02X}')
|
|
47
|
+
else:
|
|
48
|
+
out.append(chr(x))
|
|
49
|
+
return ''.join(out)
|
|
50
|
+
if isinstance(o, datetime):
|
|
51
|
+
return o.isoformat()
|
|
52
|
+
if isinstance(o, set):
|
|
53
|
+
return list(o)
|
|
54
|
+
return json.JSONEncoder.default(self, o)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _Output(structure, to_json=False):
|
|
58
|
+
"""Helper method to output parsed structure to stdout."""
|
|
59
|
+
if to_json:
|
|
60
|
+
print(json.dumps(structure, indent=2, cls=Encoder))
|
|
61
|
+
else:
|
|
62
|
+
print(structure)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def DbCommand(args):
|
|
66
|
+
"""The CLI for processing leveldb folders."""
|
|
67
|
+
for rec in record.LevelDBRecord.FromDir(args.source):
|
|
68
|
+
_Output(rec, to_json=args.json)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def LdbCommand(args):
|
|
72
|
+
"""The CLI for processing ldb files."""
|
|
73
|
+
ldb_file = ldb.FileReader(args.source)
|
|
74
|
+
|
|
75
|
+
if args.structure_type == 'blocks':
|
|
76
|
+
# Prints block information.
|
|
77
|
+
for block in ldb_file.GetBlocks():
|
|
78
|
+
_Output(block, to_json=args.json)
|
|
79
|
+
|
|
80
|
+
elif args.structure_type == 'records' or not args.structure_type:
|
|
81
|
+
# Prints key value record information.
|
|
82
|
+
for key_value_record in ldb_file.GetKeyValueRecords():
|
|
83
|
+
_Output(key_value_record, to_json=args.json)
|
|
84
|
+
|
|
85
|
+
else:
|
|
86
|
+
print(f'{args.structure_type} is not supported for ldb files.')
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def LogCommand(args):
|
|
90
|
+
"""The CLI for processing log files."""
|
|
91
|
+
log_file = log.FileReader(args.source)
|
|
92
|
+
|
|
93
|
+
if args.structure_type == 'blocks':
|
|
94
|
+
# Prints block information.
|
|
95
|
+
for block in log_file.GetBlocks():
|
|
96
|
+
_Output(block, to_json=args.json)
|
|
97
|
+
|
|
98
|
+
elif args.structure_type == 'physical_records':
|
|
99
|
+
# Prints log file physical record information.
|
|
100
|
+
for log_file_record in log_file.GetPhysicalRecords():
|
|
101
|
+
_Output(log_file_record, to_json=args.json)
|
|
102
|
+
|
|
103
|
+
elif args.structure_type == 'write_batches':
|
|
104
|
+
# Prints log file batch information.
|
|
105
|
+
for batch in log_file.GetWriteBatches():
|
|
106
|
+
_Output(batch, to_json=args.json)
|
|
107
|
+
|
|
108
|
+
elif (args.structure_type in ('parsed_internal_key', 'records')
|
|
109
|
+
or not args.structure_type):
|
|
110
|
+
# Prints key value record information.
|
|
111
|
+
for internal_key_record in log_file.GetParsedInternalKeys():
|
|
112
|
+
_Output(internal_key_record, to_json=args.json)
|
|
113
|
+
|
|
114
|
+
else:
|
|
115
|
+
print(f'{args.structure_type} is not supported for log files.')
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def DescriptorCommand(args):
|
|
119
|
+
"""The CLI for processing descriptor (MANIFEST) files."""
|
|
120
|
+
manifest_file = descriptor.FileReader(args.source)
|
|
121
|
+
|
|
122
|
+
if args.structure_type == 'blocks':
|
|
123
|
+
# Prints block information.
|
|
124
|
+
for block in manifest_file.GetBlocks():
|
|
125
|
+
_Output(block, to_json=args.json)
|
|
126
|
+
|
|
127
|
+
elif args.structure_type == 'physical_records':
|
|
128
|
+
# Prints log file physical record information.
|
|
129
|
+
for log_file_record in manifest_file.GetPhysicalRecords():
|
|
130
|
+
_Output(log_file_record, to_json=args.json)
|
|
131
|
+
|
|
132
|
+
elif (args.structure_type == 'versionedit'
|
|
133
|
+
or not args.structure_type):
|
|
134
|
+
for version_edit in manifest_file.GetVersionEdits():
|
|
135
|
+
_Output(version_edit, to_json=args.json)
|
|
136
|
+
|
|
137
|
+
else:
|
|
138
|
+
print(f'{args.structure_type} is not supported for descriptor files.')
|
|
139
|
+
|
|
140
|
+
def App():
|
|
141
|
+
"""The CLI app entrypoint for parsing leveldb files."""
|
|
142
|
+
parser = argparse.ArgumentParser(
|
|
143
|
+
prog='dfleveldb',
|
|
144
|
+
description='A cli tool for parsing leveldb files',
|
|
145
|
+
epilog=f'Version {version.GetVersion()}')
|
|
146
|
+
|
|
147
|
+
subparsers = parser.add_subparsers()
|
|
148
|
+
|
|
149
|
+
parser_db = subparsers.add_parser(
|
|
150
|
+
'db', help='Parse a directory as leveldb.')
|
|
151
|
+
parser_db.add_argument(
|
|
152
|
+
'-s', '--source',
|
|
153
|
+
required=True,
|
|
154
|
+
type=pathlib.Path,
|
|
155
|
+
help='The source leveldb directory')
|
|
156
|
+
parser_db.add_argument(
|
|
157
|
+
'--json', action='store_true', help='Output as JSON')
|
|
158
|
+
parser_db.set_defaults(func=DbCommand)
|
|
159
|
+
|
|
160
|
+
parser_log = subparsers.add_parser(
|
|
161
|
+
'log', help='Parse a leveldb log file.')
|
|
162
|
+
parser_log.add_argument(
|
|
163
|
+
'-s', '--source',
|
|
164
|
+
required=True,
|
|
165
|
+
type=pathlib.Path,
|
|
166
|
+
help='The source leveldb file')
|
|
167
|
+
parser_log.add_argument(
|
|
168
|
+
'--json', action='store_true', help='Output as JSON')
|
|
169
|
+
parser_log.add_argument(
|
|
170
|
+
'-t',
|
|
171
|
+
'--structure_type',
|
|
172
|
+
choices=[
|
|
173
|
+
'blocks',
|
|
174
|
+
'physical_records',
|
|
175
|
+
'write_batches',
|
|
176
|
+
'parsed_internal_key'])
|
|
177
|
+
parser_log.set_defaults(func=LogCommand)
|
|
178
|
+
|
|
179
|
+
parser_ldb = subparsers.add_parser(
|
|
180
|
+
'ldb', help='Parse a leveldb table (.ldb) file.')
|
|
181
|
+
parser_ldb.add_argument(
|
|
182
|
+
'-s', '--source',
|
|
183
|
+
required=True,
|
|
184
|
+
type=pathlib.Path,
|
|
185
|
+
help='The source leveldb file')
|
|
186
|
+
parser_ldb.add_argument(
|
|
187
|
+
'--json', action='store_true', help='Output as JSON')
|
|
188
|
+
parser_ldb.add_argument(
|
|
189
|
+
'-t',
|
|
190
|
+
'--structure_type',
|
|
191
|
+
choices=[
|
|
192
|
+
'blocks',
|
|
193
|
+
'records'])
|
|
194
|
+
parser_ldb.set_defaults(func=LdbCommand)
|
|
195
|
+
|
|
196
|
+
parser_descriptor = subparsers.add_parser(
|
|
197
|
+
'descriptor', help='Parse a leveldb descriptor (MANIFEST) file.')
|
|
198
|
+
parser_descriptor.add_argument(
|
|
199
|
+
'-s', '--source',
|
|
200
|
+
required=True,
|
|
201
|
+
type=pathlib.Path,
|
|
202
|
+
help='The source leveldb file')
|
|
203
|
+
parser_descriptor.add_argument(
|
|
204
|
+
'--json', action='store_true', help='Output as JSON')
|
|
205
|
+
parser_descriptor.add_argument(
|
|
206
|
+
'-t',
|
|
207
|
+
'--structure_type',
|
|
208
|
+
choices=[
|
|
209
|
+
'blocks', 'physical_records', 'versionedit'])
|
|
210
|
+
parser_descriptor.set_defaults(func=DescriptorCommand)
|
|
211
|
+
|
|
212
|
+
args = parser.parse_args()
|
|
213
|
+
|
|
214
|
+
if not hasattr(args, 'func'):
|
|
215
|
+
parser.print_usage()
|
|
216
|
+
else:
|
|
217
|
+
args.func(args)
|
|
@@ -16,12 +16,22 @@
|
|
|
16
16
|
|
|
17
17
|
import enum
|
|
18
18
|
|
|
19
|
+
BLOCK_RESTART_ENTRY_LENGTH = 4
|
|
20
|
+
BLOCK_TRAILER_SIZE = 5
|
|
21
|
+
TABLE_FOOTER_SIZE = 48
|
|
22
|
+
TABLE_MAGIC = b'\x57\xfb\x80\x8b\x24\x75\x47\xdb'
|
|
19
23
|
|
|
20
24
|
PACKED_SEQUENCE_AND_TYPE_LENGTH = 8
|
|
21
25
|
SEQUENCE_LENGTH = 7
|
|
22
26
|
TYPE_LENGTH = 1
|
|
23
27
|
|
|
24
28
|
|
|
29
|
+
class BlockCompressionType(enum.IntEnum):
|
|
30
|
+
"""Block compression types."""
|
|
31
|
+
SNAPPY = 1
|
|
32
|
+
ZSTD = 2
|
|
33
|
+
|
|
34
|
+
|
|
25
35
|
class VersionEditTags(enum.IntEnum):
|
|
26
36
|
"""VersionEdit tags."""
|
|
27
37
|
COMPARATOR = 1
|
|
@@ -41,3 +51,9 @@ class LogFilePhysicalRecordType(enum.IntEnum):
|
|
|
41
51
|
FIRST = 2
|
|
42
52
|
MIDDLE = 3
|
|
43
53
|
LAST = 4
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class InternalRecordType(enum.IntEnum):
|
|
57
|
+
"""Internal record types."""
|
|
58
|
+
DELETED = 0
|
|
59
|
+
VALUE = 1
|
|
@@ -12,17 +12,16 @@
|
|
|
12
12
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
|
-
"""Parser for LevelDB
|
|
15
|
+
"""Parser for LevelDB Descriptor (MANIFEST) files."""
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
18
|
from dataclasses import dataclass, field
|
|
19
19
|
from typing import Generator, Optional
|
|
20
20
|
|
|
21
21
|
from dfindexeddb import errors
|
|
22
|
-
from dfindexeddb import utils
|
|
23
22
|
from dfindexeddb.leveldb import definitions
|
|
24
23
|
from dfindexeddb.leveldb import log
|
|
25
|
-
|
|
24
|
+
from dfindexeddb.leveldb import utils
|
|
26
25
|
|
|
27
26
|
|
|
28
27
|
@dataclass
|
|
@@ -43,7 +42,7 @@ class InternalKey:
|
|
|
43
42
|
@classmethod
|
|
44
43
|
def FromDecoder(
|
|
45
44
|
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> InternalKey:
|
|
46
|
-
"""Decodes
|
|
45
|
+
"""Decodes an InternalKey from the current position of a LevelDBDecoder.
|
|
47
46
|
|
|
48
47
|
Args:
|
|
49
48
|
decoder: the LevelDBDecoder.
|
|
@@ -78,7 +77,7 @@ class NewFile(utils.FromDecoderMixin):
|
|
|
78
77
|
Attributes:
|
|
79
78
|
offset: the offset.
|
|
80
79
|
level: the level.
|
|
81
|
-
number: the number.
|
|
80
|
+
number: the file number.
|
|
82
81
|
file_size: the file size.
|
|
83
82
|
smallest: the smallest internal key.
|
|
84
83
|
largest: the largest internal key.
|
|
@@ -119,7 +118,7 @@ class NewFile(utils.FromDecoderMixin):
|
|
|
119
118
|
|
|
120
119
|
@dataclass
|
|
121
120
|
class CompactPointer(utils.FromDecoderMixin):
|
|
122
|
-
"""A
|
|
121
|
+
"""A CompactPointer.
|
|
123
122
|
|
|
124
123
|
Attributes:
|
|
125
124
|
offset: the offset.
|
|
@@ -155,7 +154,7 @@ class DeletedFile(utils.FromDecoderMixin):
|
|
|
155
154
|
Attributes:
|
|
156
155
|
offset: the offset.
|
|
157
156
|
level: the level.
|
|
158
|
-
number: the number.
|
|
157
|
+
number: the file number.
|
|
159
158
|
"""
|
|
160
159
|
offset: int
|
|
161
160
|
level: int
|
|
@@ -260,12 +259,12 @@ class VersionEdit(utils.FromDecoderMixin):
|
|
|
260
259
|
|
|
261
260
|
|
|
262
261
|
class FileReader:
|
|
263
|
-
"""A Descriptor
|
|
264
|
-
|
|
262
|
+
"""A reader for Descriptor files.
|
|
263
|
+
|
|
265
264
|
A DescriptorFileReader provides read-only sequential iteration of serialized
|
|
266
265
|
structures in a leveldb Descriptor file. These structures include:
|
|
267
|
-
* blocks (
|
|
268
|
-
* records (
|
|
266
|
+
* blocks (Block)
|
|
267
|
+
* records (PhysicalRecord)
|
|
269
268
|
* version edits (VersionEdit)
|
|
270
269
|
"""
|
|
271
270
|
def __init__(self, filename: str):
|
dfindexeddb/leveldb/ldb.py
CHANGED
|
@@ -23,8 +23,8 @@ from typing import BinaryIO, Iterable, Tuple
|
|
|
23
23
|
import snappy
|
|
24
24
|
import zstd
|
|
25
25
|
|
|
26
|
-
from dfindexeddb import utils
|
|
27
26
|
from dfindexeddb.leveldb import definitions
|
|
27
|
+
from dfindexeddb.leveldb import utils
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
@dataclass
|
|
@@ -36,13 +36,13 @@ class KeyValueRecord:
|
|
|
36
36
|
key: the key of the record.
|
|
37
37
|
value: the value of the record.
|
|
38
38
|
sequence_number: the sequence number of the record.
|
|
39
|
-
|
|
39
|
+
record_type: the type of the record.
|
|
40
40
|
"""
|
|
41
41
|
offset: int
|
|
42
42
|
key: bytes
|
|
43
43
|
value: bytes
|
|
44
44
|
sequence_number: int
|
|
45
|
-
|
|
45
|
+
record_type: definitions.InternalRecordType
|
|
46
46
|
|
|
47
47
|
@classmethod
|
|
48
48
|
def FromDecoder(
|
|
@@ -69,9 +69,13 @@ class KeyValueRecord:
|
|
|
69
69
|
sequence_number = int.from_bytes(
|
|
70
70
|
key[-definitions.SEQUENCE_LENGTH:], byteorder='little', signed=False)
|
|
71
71
|
key_type = shared_key[-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
|
|
72
|
-
|
|
73
|
-
return cls(
|
|
74
|
-
|
|
72
|
+
record_type = definitions.InternalRecordType(key_type)
|
|
73
|
+
return cls(
|
|
74
|
+
offset=offset + block_offset,
|
|
75
|
+
key=key,
|
|
76
|
+
value=value,
|
|
77
|
+
sequence_number=sequence_number,
|
|
78
|
+
record_type=record_type), shared_key
|
|
75
79
|
|
|
76
80
|
|
|
77
81
|
@dataclass
|
|
@@ -88,17 +92,13 @@ class Block:
|
|
|
88
92
|
data: bytes = field(repr=False)
|
|
89
93
|
footer: bytes # 5 bytes = 1 byte compressed flag + 4 bytes checksum.
|
|
90
94
|
|
|
91
|
-
SNAPPY_COMPRESSED = 1
|
|
92
|
-
ZSTD_COMPRESSED = 2
|
|
93
|
-
RESTART_ENTRY_LENGTH = 4
|
|
94
|
-
|
|
95
95
|
def IsSnappyCompressed(self) -> bool:
|
|
96
96
|
"""Returns true if the block is snappy compressed."""
|
|
97
|
-
return self.footer[0] ==
|
|
97
|
+
return self.footer[0] == definitions.BlockCompressionType.SNAPPY
|
|
98
98
|
|
|
99
99
|
def IsZstdCompressed(self) -> bool:
|
|
100
100
|
"""Returns true if the block is zstd compressed."""
|
|
101
|
-
return self.footer[0] ==
|
|
101
|
+
return self.footer[0] == definitions.BlockCompressionType.ZSTD
|
|
102
102
|
|
|
103
103
|
def GetBuffer(self) -> bytes:
|
|
104
104
|
"""Returns the block buffer, decompressing if required."""
|
|
@@ -121,10 +121,11 @@ class Block:
|
|
|
121
121
|
# trailer of a block has the form:
|
|
122
122
|
# restarts: uint32[num_restarts]
|
|
123
123
|
# num_restarts: uint32
|
|
124
|
-
decoder.stream.seek(-
|
|
124
|
+
decoder.stream.seek(-definitions.BLOCK_RESTART_ENTRY_LENGTH, os.SEEK_END)
|
|
125
125
|
_, num_restarts = decoder.DecodeUint32()
|
|
126
126
|
restarts_offset = (
|
|
127
|
-
decoder.stream.tell()) - (
|
|
127
|
+
decoder.stream.tell()) - (
|
|
128
|
+
(num_restarts + 1) * definitions.BLOCK_RESTART_ENTRY_LENGTH)
|
|
128
129
|
|
|
129
130
|
decoder.stream.seek(restarts_offset)
|
|
130
131
|
_, offset = decoder.DecodeUint32()
|
|
@@ -154,8 +155,6 @@ class BlockHandle(utils.FromDecoderMixin):
|
|
|
154
155
|
block_offset: int
|
|
155
156
|
length: int
|
|
156
157
|
|
|
157
|
-
BLOCK_TRAILER_SIZE = 5
|
|
158
|
-
|
|
159
158
|
def Load(self, stream: BinaryIO) -> Block:
|
|
160
159
|
"""Loads the block data.
|
|
161
160
|
|
|
@@ -173,8 +172,8 @@ class BlockHandle(utils.FromDecoderMixin):
|
|
|
173
172
|
if len(data) != self.length:
|
|
174
173
|
raise ValueError('Could not read all of the block')
|
|
175
174
|
|
|
176
|
-
footer = stream.read(
|
|
177
|
-
if len(footer) !=
|
|
175
|
+
footer = stream.read(definitions.BLOCK_TRAILER_SIZE)
|
|
176
|
+
if len(footer) != definitions.BLOCK_TRAILER_SIZE:
|
|
178
177
|
raise ValueError('Could not read all of the block footer')
|
|
179
178
|
|
|
180
179
|
return Block(self.offset, self.block_offset, self.length, data, footer)
|
|
@@ -208,9 +207,6 @@ class FileReader:
|
|
|
208
207
|
* records (KeyValueRecord)
|
|
209
208
|
"""
|
|
210
209
|
|
|
211
|
-
FOOTER_SIZE = 48
|
|
212
|
-
MAGIC = b'\x57\xfb\x80\x8b\x24\x75\x47\xdb'
|
|
213
|
-
|
|
214
210
|
def __init__(self, filename: str):
|
|
215
211
|
"""Initializes the LogFile.
|
|
216
212
|
|
|
@@ -222,11 +218,11 @@ class FileReader:
|
|
|
222
218
|
"""
|
|
223
219
|
self.filename = filename
|
|
224
220
|
with open(self.filename, 'rb') as fh:
|
|
225
|
-
fh.seek(-len(
|
|
226
|
-
if fh.read(len(
|
|
221
|
+
fh.seek(-len(definitions.TABLE_MAGIC), os.SEEK_END)
|
|
222
|
+
if fh.read(len(definitions.TABLE_MAGIC)) != definitions.TABLE_MAGIC:
|
|
227
223
|
raise ValueError(f'Invalid magic number in {self.filename}')
|
|
228
224
|
|
|
229
|
-
fh.seek(-
|
|
225
|
+
fh.seek(-definitions.TABLE_FOOTER_SIZE, os.SEEK_END)
|
|
230
226
|
# meta_handle, need to read first due to variable integers
|
|
231
227
|
_ = BlockHandle.FromStream(fh)
|
|
232
228
|
index_handle = BlockHandle.FromStream(fh)
|