dfindexeddb 20240301__py3-none-any.whl → 20240324__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dfindexeddb/indexeddb/blink.py +2 -1
- dfindexeddb/indexeddb/chromium.py +12 -12
- dfindexeddb/indexeddb/cli.py +101 -0
- dfindexeddb/indexeddb/utils.py +0 -0
- dfindexeddb/leveldb/cli.py +217 -0
- dfindexeddb/leveldb/definitions.py +59 -0
- dfindexeddb/leveldb/descriptor.py +334 -0
- dfindexeddb/leveldb/ldb.py +53 -57
- dfindexeddb/leveldb/log.py +78 -69
- dfindexeddb/leveldb/record.py +102 -0
- dfindexeddb/leveldb/utils.py +116 -0
- dfindexeddb/utils.py +8 -43
- dfindexeddb/version.py +1 -1
- {dfindexeddb-20240301.dist-info → dfindexeddb-20240324.dist-info}/METADATA +46 -32
- dfindexeddb-20240324.dist-info/RECORD +26 -0
- {dfindexeddb-20240301.dist-info → dfindexeddb-20240324.dist-info}/WHEEL +1 -1
- dfindexeddb-20240324.dist-info/entry_points.txt +3 -0
- dfindexeddb/cli.py +0 -155
- dfindexeddb-20240301.dist-info/RECORD +0 -20
- dfindexeddb-20240301.dist-info/entry_points.txt +0 -2
- {dfindexeddb-20240301.dist-info → dfindexeddb-20240324.dist-info}/AUTHORS +0 -0
- {dfindexeddb-20240301.dist-info → dfindexeddb-20240324.dist-info}/LICENSE +0 -0
- {dfindexeddb-20240301.dist-info → dfindexeddb-20240324.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright 2024 Google LLC
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""Parser for LevelDB Descriptor (MANIFEST) files."""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from typing import Generator, Optional
|
|
20
|
+
|
|
21
|
+
from dfindexeddb import errors
|
|
22
|
+
from dfindexeddb.leveldb import definitions
|
|
23
|
+
from dfindexeddb.leveldb import log
|
|
24
|
+
from dfindexeddb.leveldb import utils
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class InternalKey:
|
|
29
|
+
"""An InternalKey.
|
|
30
|
+
|
|
31
|
+
Attributes:
|
|
32
|
+
offset: the offset.
|
|
33
|
+
user_key: the user key.
|
|
34
|
+
sequence_number: the sequence number.
|
|
35
|
+
key_type: the key type.
|
|
36
|
+
"""
|
|
37
|
+
offset: int
|
|
38
|
+
user_key: bytes = field(repr=False)
|
|
39
|
+
sequence_number: int
|
|
40
|
+
key_type: int
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def FromDecoder(
|
|
44
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> InternalKey:
|
|
45
|
+
"""Decodes an InternalKey from the current position of a LevelDBDecoder.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
decoder: the LevelDBDecoder.
|
|
49
|
+
base_offset: the base offset.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
The InternalKey instance.
|
|
53
|
+
"""
|
|
54
|
+
offset, slice_bytes = decoder.DecodeLengthPrefixedSlice()
|
|
55
|
+
|
|
56
|
+
if len(slice_bytes) < definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH:
|
|
57
|
+
raise errors.ParserError('Insufficient bytes to parse InternalKey')
|
|
58
|
+
|
|
59
|
+
user_key = slice_bytes[:-definitions.SEQUENCE_LENGTH]
|
|
60
|
+
sequence_number = int.from_bytes(
|
|
61
|
+
slice_bytes[-definitions.SEQUENCE_LENGTH:],
|
|
62
|
+
byteorder='little',
|
|
63
|
+
signed=False)
|
|
64
|
+
key_type = slice_bytes[-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
|
|
65
|
+
|
|
66
|
+
return cls(
|
|
67
|
+
offset=base_offset + offset,
|
|
68
|
+
user_key=user_key,
|
|
69
|
+
sequence_number=sequence_number,
|
|
70
|
+
key_type=key_type)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclass
|
|
74
|
+
class NewFile(utils.FromDecoderMixin):
|
|
75
|
+
"""A NewFile.
|
|
76
|
+
|
|
77
|
+
Attributes:
|
|
78
|
+
offset: the offset.
|
|
79
|
+
level: the level.
|
|
80
|
+
number: the file number.
|
|
81
|
+
file_size: the file size.
|
|
82
|
+
smallest: the smallest internal key.
|
|
83
|
+
largest: the largest internal key.
|
|
84
|
+
"""
|
|
85
|
+
offset: int
|
|
86
|
+
level: int
|
|
87
|
+
number: int
|
|
88
|
+
file_size: int
|
|
89
|
+
smallest: InternalKey
|
|
90
|
+
largest: InternalKey
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def FromDecoder(
|
|
94
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> NewFile:
|
|
95
|
+
"""Decodes a NewFile from the current position of a LevelDBDecoder.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
decoder: the LevelDBDecoder.
|
|
99
|
+
base_offset: the base offset.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
The NewFile instance.
|
|
103
|
+
"""
|
|
104
|
+
offset, level = decoder.DecodeUint32Varint()
|
|
105
|
+
_, number = decoder.DecodeUint64Varint()
|
|
106
|
+
_, file_size = decoder.DecodeUint64Varint()
|
|
107
|
+
smallest = InternalKey.FromDecoder(decoder, base_offset=base_offset)
|
|
108
|
+
largest = InternalKey.FromDecoder(decoder, base_offset=base_offset)
|
|
109
|
+
|
|
110
|
+
return cls(
|
|
111
|
+
offset=offset + base_offset,
|
|
112
|
+
level=level,
|
|
113
|
+
number=number,
|
|
114
|
+
file_size=file_size,
|
|
115
|
+
smallest=smallest,
|
|
116
|
+
largest=largest)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@dataclass
|
|
120
|
+
class CompactPointer(utils.FromDecoderMixin):
|
|
121
|
+
"""A CompactPointer.
|
|
122
|
+
|
|
123
|
+
Attributes:
|
|
124
|
+
offset: the offset.
|
|
125
|
+
level: the level.
|
|
126
|
+
key: the key bytes.
|
|
127
|
+
"""
|
|
128
|
+
offset: int
|
|
129
|
+
level: int
|
|
130
|
+
key: bytes = field(repr=False)
|
|
131
|
+
|
|
132
|
+
@classmethod
|
|
133
|
+
def FromDecoder(
|
|
134
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
135
|
+
) -> CompactPointer:
|
|
136
|
+
"""Decodes a CompactPointer from the current position of a LevelDBDecoder.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
decoder: the LevelDBDecoder.
|
|
140
|
+
base_offset: the base offset.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
The CompactPointer instance.
|
|
144
|
+
"""
|
|
145
|
+
offset, level = decoder.DecodeUint32Varint()
|
|
146
|
+
_, key = decoder.DecodeLengthPrefixedSlice()
|
|
147
|
+
return cls(offset=base_offset + offset, level=level, key=key)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@dataclass
|
|
151
|
+
class DeletedFile(utils.FromDecoderMixin):
|
|
152
|
+
"""A DeletedFile.
|
|
153
|
+
|
|
154
|
+
Attributes:
|
|
155
|
+
offset: the offset.
|
|
156
|
+
level: the level.
|
|
157
|
+
number: the file number.
|
|
158
|
+
"""
|
|
159
|
+
offset: int
|
|
160
|
+
level: int
|
|
161
|
+
number: int
|
|
162
|
+
|
|
163
|
+
@classmethod
|
|
164
|
+
def FromDecoder(
|
|
165
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> DeletedFile:
|
|
166
|
+
"""Decodes a DeletedFile from the current position of a LevelDBDecoder.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
decoder: the LevelDBDecoder.
|
|
170
|
+
base_offset: the base offset.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
The DeletedFile instance.
|
|
174
|
+
"""
|
|
175
|
+
offset, level = decoder.DecodeUint32Varint()
|
|
176
|
+
_, number = decoder.DecodeUint64Varint()
|
|
177
|
+
return cls(offset=base_offset + offset, level=level, number=number)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
@dataclass
|
|
181
|
+
class VersionEdit(utils.FromDecoderMixin):
|
|
182
|
+
"""A VersionEdit is recorded in a LevelDB descriptor/manifest file.
|
|
183
|
+
|
|
184
|
+
Attributes:
|
|
185
|
+
offset: the offset where the VersionEdit was parsed.
|
|
186
|
+
comparator: the comparator.
|
|
187
|
+
log_number: the log number.
|
|
188
|
+
prev_log_number: the previous log number.
|
|
189
|
+
next_file_number: the next file number.
|
|
190
|
+
last_sequence: the last sequence.
|
|
191
|
+
compact_pointers: the list of CompactPointers.
|
|
192
|
+
deleted_files: the list of DeletedFiles.
|
|
193
|
+
new_files: the list of NewFiles.
|
|
194
|
+
"""
|
|
195
|
+
offset: int
|
|
196
|
+
comparator: Optional[bytes] = None
|
|
197
|
+
log_number: Optional[int] = None
|
|
198
|
+
prev_log_number: Optional[int] = None
|
|
199
|
+
next_file_number: Optional[int] = None
|
|
200
|
+
last_sequence: Optional[int] = None
|
|
201
|
+
compact_pointers: list[CompactPointer] = field(default_factory=list)
|
|
202
|
+
deleted_files: list[DeletedFile] = field(default_factory=list)
|
|
203
|
+
new_files: list[NewFile] = field(default_factory=list)
|
|
204
|
+
|
|
205
|
+
@classmethod
|
|
206
|
+
def FromDecoder(
|
|
207
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> VersionEdit:
|
|
208
|
+
"""Decodes a VersionEdit from the current position of a LevelDBDecoder.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
decoder: the LevelDBDecoder.
|
|
212
|
+
base_offset: the base offset.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
The VersionEdit instance.
|
|
216
|
+
|
|
217
|
+
Raises:
|
|
218
|
+
ParserError if an invalid VersionEditTag is parsed.
|
|
219
|
+
"""
|
|
220
|
+
offset, tag_byte = decoder.DecodeUint32Varint()
|
|
221
|
+
version_edit = cls(offset=base_offset + offset)
|
|
222
|
+
|
|
223
|
+
while tag_byte:
|
|
224
|
+
try:
|
|
225
|
+
tag = definitions.VersionEditTags(tag_byte)
|
|
226
|
+
except TypeError as error:
|
|
227
|
+
raise errors.ParserError(
|
|
228
|
+
f'Invalid VersionEditTag at offset {offset}') from error
|
|
229
|
+
|
|
230
|
+
if tag == definitions.VersionEditTags.COMPARATOR:
|
|
231
|
+
_, version_edit.comparator = decoder.DecodeLengthPrefixedSlice()
|
|
232
|
+
elif tag == definitions.VersionEditTags.LOG_NUMBER:
|
|
233
|
+
_, version_edit.log_number = decoder.DecodeUint64Varint()
|
|
234
|
+
elif tag == definitions.VersionEditTags.PREV_LOG_NUMBER:
|
|
235
|
+
_, version_edit.prev_log_number = decoder.DecodeUint64Varint()
|
|
236
|
+
elif tag == definitions.VersionEditTags.NEXT_FILE_NUMBER:
|
|
237
|
+
_, version_edit.next_file_number = decoder.DecodeUint64Varint()
|
|
238
|
+
elif tag == definitions.VersionEditTags.LAST_SEQUENCE:
|
|
239
|
+
_, version_edit.last_sequence = decoder.DecodeUint64Varint()
|
|
240
|
+
elif tag == definitions.VersionEditTags.COMPACT_POINTER:
|
|
241
|
+
compact_pointer = CompactPointer.FromDecoder(
|
|
242
|
+
decoder=decoder, base_offset=base_offset + offset)
|
|
243
|
+
version_edit.compact_pointers.append(compact_pointer)
|
|
244
|
+
elif tag == definitions.VersionEditTags.DELETED_FILE:
|
|
245
|
+
deleted_file = DeletedFile.FromDecoder(
|
|
246
|
+
decoder=decoder, base_offset=base_offset + offset)
|
|
247
|
+
version_edit.deleted_files.append(deleted_file)
|
|
248
|
+
elif tag == definitions.VersionEditTags.NEW_FILE:
|
|
249
|
+
file_metadata = NewFile.FromDecoder(
|
|
250
|
+
decoder=decoder, base_offset=base_offset + offset)
|
|
251
|
+
version_edit.new_files.append(file_metadata)
|
|
252
|
+
|
|
253
|
+
if decoder.NumRemainingBytes() == 0:
|
|
254
|
+
break
|
|
255
|
+
|
|
256
|
+
offset, tag_byte = decoder.DecodeUint32Varint()
|
|
257
|
+
|
|
258
|
+
return version_edit
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
class FileReader:
|
|
262
|
+
"""A reader for Descriptor files.
|
|
263
|
+
|
|
264
|
+
A DescriptorFileReader provides read-only sequential iteration of serialized
|
|
265
|
+
structures in a leveldb Descriptor file. These structures include:
|
|
266
|
+
* blocks (Block)
|
|
267
|
+
* records (PhysicalRecord)
|
|
268
|
+
* version edits (VersionEdit)
|
|
269
|
+
"""
|
|
270
|
+
def __init__(self, filename: str):
|
|
271
|
+
"""Initializes the Descriptor a.k.a. MANIFEST file.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
filename: the Descriptor filename (e.g. MANIFEST-000001)
|
|
275
|
+
"""
|
|
276
|
+
self.filename = filename
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def GetBlocks(self) -> Generator[log.Block, None, None]:
|
|
280
|
+
"""Returns an iterator of Block instances.
|
|
281
|
+
|
|
282
|
+
A Descriptor file is composed of one or more blocks.
|
|
283
|
+
|
|
284
|
+
Yields:
|
|
285
|
+
Block
|
|
286
|
+
"""
|
|
287
|
+
with open(self.filename, 'rb') as fh:
|
|
288
|
+
block = log.Block.FromStream(fh)
|
|
289
|
+
while block:
|
|
290
|
+
yield block
|
|
291
|
+
block = log.Block.FromStream(fh)
|
|
292
|
+
|
|
293
|
+
def GetPhysicalRecords(self) -> Generator[log.PhysicalRecord, None, None]:
|
|
294
|
+
"""Returns an iterator of PhysicalRecord instances.
|
|
295
|
+
|
|
296
|
+
A block is composed of one or more physical records.
|
|
297
|
+
|
|
298
|
+
Yields:
|
|
299
|
+
PhysicalRecord
|
|
300
|
+
"""
|
|
301
|
+
for block in self.GetBlocks():
|
|
302
|
+
yield from block.GetPhysicalRecords()
|
|
303
|
+
|
|
304
|
+
def GetVersionEdits(self) -> Generator[VersionEdit, None, None]:
|
|
305
|
+
"""Returns an iterator of VersionEdit instances.
|
|
306
|
+
|
|
307
|
+
Depending on the VersionEdit size, it can be spread across one or
|
|
308
|
+
more physical records.
|
|
309
|
+
|
|
310
|
+
Yields:
|
|
311
|
+
VersionEdit
|
|
312
|
+
"""
|
|
313
|
+
buffer = bytearray()
|
|
314
|
+
for physical_record in self.GetPhysicalRecords():
|
|
315
|
+
if (physical_record.record_type ==
|
|
316
|
+
definitions.LogFilePhysicalRecordType.FULL):
|
|
317
|
+
buffer = physical_record.contents
|
|
318
|
+
offset = physical_record.contents_offset + physical_record.base_offset
|
|
319
|
+
version_edit = VersionEdit.FromBytes(buffer, base_offset=offset)
|
|
320
|
+
yield version_edit
|
|
321
|
+
buffer = bytearray()
|
|
322
|
+
elif (physical_record.record_type ==
|
|
323
|
+
definitions.LogFilePhysicalRecordType.FIRST):
|
|
324
|
+
offset = physical_record.contents_offset + physical_record.base_offset
|
|
325
|
+
buffer = bytearray(physical_record.contents)
|
|
326
|
+
elif (physical_record.record_type ==
|
|
327
|
+
definitions.LogFilePhysicalRecordType.MIDDLE):
|
|
328
|
+
buffer.extend(bytearray(physical_record.contents))
|
|
329
|
+
elif (physical_record.record_type ==
|
|
330
|
+
definitions.LogFilePhysicalRecordType.LAST):
|
|
331
|
+
buffer.extend(bytearray(physical_record.contents))
|
|
332
|
+
version_edit = VersionEdit.FromBytes(buffer, base_offset=offset)
|
|
333
|
+
yield version_edit
|
|
334
|
+
buffer = bytearray()
|
dfindexeddb/leveldb/ldb.py
CHANGED
|
@@ -23,11 +23,12 @@ from typing import BinaryIO, Iterable, Tuple
|
|
|
23
23
|
import snappy
|
|
24
24
|
import zstd
|
|
25
25
|
|
|
26
|
-
from dfindexeddb import
|
|
26
|
+
from dfindexeddb.leveldb import definitions
|
|
27
|
+
from dfindexeddb.leveldb import utils
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
@dataclass
|
|
30
|
-
class
|
|
31
|
+
class KeyValueRecord:
|
|
31
32
|
"""A leveldb table key-value record.
|
|
32
33
|
|
|
33
34
|
Attributes:
|
|
@@ -35,22 +36,18 @@ class LdbKeyValueRecord:
|
|
|
35
36
|
key: the key of the record.
|
|
36
37
|
value: the value of the record.
|
|
37
38
|
sequence_number: the sequence number of the record.
|
|
38
|
-
|
|
39
|
+
record_type: the type of the record.
|
|
39
40
|
"""
|
|
40
41
|
offset: int
|
|
41
42
|
key: bytes
|
|
42
43
|
value: bytes
|
|
43
44
|
sequence_number: int
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
PACKED_SEQUENCE_AND_TYPE_LENGTH = 8
|
|
47
|
-
SEQUENCE_LENGTH = 7
|
|
48
|
-
TYPE_LENGTH = 1
|
|
45
|
+
record_type: definitions.InternalRecordType
|
|
49
46
|
|
|
50
47
|
@classmethod
|
|
51
48
|
def FromDecoder(
|
|
52
49
|
cls, decoder: utils.LevelDBDecoder, block_offset: int, shared_key: bytes
|
|
53
|
-
) -> Tuple[
|
|
50
|
+
) -> Tuple[KeyValueRecord, bytes]:
|
|
54
51
|
"""Decodes a ldb key value record.
|
|
55
52
|
|
|
56
53
|
Args:
|
|
@@ -59,7 +56,7 @@ class LdbKeyValueRecord:
|
|
|
59
56
|
shared_key: the shared key bytes.
|
|
60
57
|
|
|
61
58
|
Returns:
|
|
62
|
-
A tuple of the parsed
|
|
59
|
+
A tuple of the parsed KeyValueRecord and the updated shared key bytes.
|
|
63
60
|
"""
|
|
64
61
|
offset, shared_bytes = decoder.DecodeUint32Varint()
|
|
65
62
|
_, unshared_bytes = decoder.DecodeUint32Varint()
|
|
@@ -68,17 +65,21 @@ class LdbKeyValueRecord:
|
|
|
68
65
|
_, value = decoder.ReadBytes(value_length)
|
|
69
66
|
|
|
70
67
|
shared_key = shared_key[:shared_bytes] + key_delta
|
|
71
|
-
key = shared_key[:-
|
|
68
|
+
key = shared_key[:-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
|
|
72
69
|
sequence_number = int.from_bytes(
|
|
73
|
-
key[-
|
|
74
|
-
key_type = shared_key[-
|
|
75
|
-
|
|
76
|
-
return cls(
|
|
77
|
-
|
|
70
|
+
key[-definitions.SEQUENCE_LENGTH:], byteorder='little', signed=False)
|
|
71
|
+
key_type = shared_key[-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
|
|
72
|
+
record_type = definitions.InternalRecordType(key_type)
|
|
73
|
+
return cls(
|
|
74
|
+
offset=offset + block_offset,
|
|
75
|
+
key=key,
|
|
76
|
+
value=value,
|
|
77
|
+
sequence_number=sequence_number,
|
|
78
|
+
record_type=record_type), shared_key
|
|
78
79
|
|
|
79
80
|
|
|
80
81
|
@dataclass
|
|
81
|
-
class
|
|
82
|
+
class Block:
|
|
82
83
|
"""A leveldb table block.
|
|
83
84
|
|
|
84
85
|
Attributes:
|
|
@@ -91,17 +92,13 @@ class LdbBlock:
|
|
|
91
92
|
data: bytes = field(repr=False)
|
|
92
93
|
footer: bytes # 5 bytes = 1 byte compressed flag + 4 bytes checksum.
|
|
93
94
|
|
|
94
|
-
SNAPPY_COMPRESSED = 1
|
|
95
|
-
ZSTD_COMPRESSED = 2
|
|
96
|
-
RESTART_ENTRY_LENGTH = 4
|
|
97
|
-
|
|
98
95
|
def IsSnappyCompressed(self) -> bool:
|
|
99
96
|
"""Returns true if the block is snappy compressed."""
|
|
100
|
-
return self.footer[0] ==
|
|
97
|
+
return self.footer[0] == definitions.BlockCompressionType.SNAPPY
|
|
101
98
|
|
|
102
99
|
def IsZstdCompressed(self) -> bool:
|
|
103
100
|
"""Returns true if the block is zstd compressed."""
|
|
104
|
-
return self.footer[0] ==
|
|
101
|
+
return self.footer[0] == definitions.BlockCompressionType.ZSTD
|
|
105
102
|
|
|
106
103
|
def GetBuffer(self) -> bytes:
|
|
107
104
|
"""Returns the block buffer, decompressing if required."""
|
|
@@ -111,11 +108,11 @@ class LdbBlock:
|
|
|
111
108
|
return zstd.decompress(self.data)
|
|
112
109
|
return self.data
|
|
113
110
|
|
|
114
|
-
def GetRecords(self) -> Iterable[
|
|
111
|
+
def GetRecords(self) -> Iterable[KeyValueRecord]:
|
|
115
112
|
"""Returns an iterator over the key value records in the block.
|
|
116
113
|
|
|
117
114
|
Yields:
|
|
118
|
-
|
|
115
|
+
KeyValueRecords
|
|
119
116
|
"""
|
|
120
117
|
# get underlying block content, decompressing if required
|
|
121
118
|
buffer = self.GetBuffer()
|
|
@@ -124,10 +121,11 @@ class LdbBlock:
|
|
|
124
121
|
# trailer of a block has the form:
|
|
125
122
|
# restarts: uint32[num_restarts]
|
|
126
123
|
# num_restarts: uint32
|
|
127
|
-
decoder.stream.seek(-
|
|
124
|
+
decoder.stream.seek(-definitions.BLOCK_RESTART_ENTRY_LENGTH, os.SEEK_END)
|
|
128
125
|
_, num_restarts = decoder.DecodeUint32()
|
|
129
126
|
restarts_offset = (
|
|
130
|
-
decoder.stream.tell()) - (
|
|
127
|
+
decoder.stream.tell()) - (
|
|
128
|
+
(num_restarts + 1) * definitions.BLOCK_RESTART_ENTRY_LENGTH)
|
|
131
129
|
|
|
132
130
|
decoder.stream.seek(restarts_offset)
|
|
133
131
|
_, offset = decoder.DecodeUint32()
|
|
@@ -135,7 +133,7 @@ class LdbBlock:
|
|
|
135
133
|
key = b''
|
|
136
134
|
|
|
137
135
|
while decoder.stream.tell() < restarts_offset:
|
|
138
|
-
key_value_record, key =
|
|
136
|
+
key_value_record, key = KeyValueRecord.FromDecoder(
|
|
139
137
|
decoder, self.block_offset, key)
|
|
140
138
|
yield key_value_record
|
|
141
139
|
|
|
@@ -145,7 +143,7 @@ class LdbBlock:
|
|
|
145
143
|
|
|
146
144
|
|
|
147
145
|
@dataclass
|
|
148
|
-
class BlockHandle:
|
|
146
|
+
class BlockHandle(utils.FromDecoderMixin):
|
|
149
147
|
"""A handle to a block in the ldb file.
|
|
150
148
|
|
|
151
149
|
Attributes:
|
|
@@ -157,16 +155,14 @@ class BlockHandle:
|
|
|
157
155
|
block_offset: int
|
|
158
156
|
length: int
|
|
159
157
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
def Load(self, stream: BinaryIO) -> LdbBlock:
|
|
158
|
+
def Load(self, stream: BinaryIO) -> Block:
|
|
163
159
|
"""Loads the block data.
|
|
164
160
|
|
|
165
161
|
Args:
|
|
166
162
|
stream: the binary stream of the ldb file.
|
|
167
163
|
|
|
168
164
|
Returns:
|
|
169
|
-
a
|
|
165
|
+
a Block.
|
|
170
166
|
|
|
171
167
|
Raises:
|
|
172
168
|
ValueError: if it could not read all of the block or block footer.
|
|
@@ -176,41 +172,41 @@ class BlockHandle:
|
|
|
176
172
|
if len(data) != self.length:
|
|
177
173
|
raise ValueError('Could not read all of the block')
|
|
178
174
|
|
|
179
|
-
footer = stream.read(
|
|
180
|
-
if len(footer) !=
|
|
175
|
+
footer = stream.read(definitions.BLOCK_TRAILER_SIZE)
|
|
176
|
+
if len(footer) != definitions.BLOCK_TRAILER_SIZE:
|
|
181
177
|
raise ValueError('Could not read all of the block footer')
|
|
182
178
|
|
|
183
|
-
return
|
|
179
|
+
return Block(self.offset, self.block_offset, self.length, data, footer)
|
|
184
180
|
|
|
185
181
|
@classmethod
|
|
186
|
-
def
|
|
187
|
-
|
|
182
|
+
def FromDecoder(
|
|
183
|
+
cls: BlockHandle,
|
|
184
|
+
decoder: utils.LevelDBDecoder,
|
|
185
|
+
base_offset: int = 0
|
|
186
|
+
) -> BlockHandle:
|
|
187
|
+
"""Decodes a BlockHandle from the current position of a LevelDBDecoder.
|
|
188
188
|
|
|
189
189
|
Args:
|
|
190
|
-
|
|
190
|
+
decoder: the LevelDBDecoder.
|
|
191
191
|
base_offset: the base offset.
|
|
192
192
|
|
|
193
193
|
Returns:
|
|
194
|
-
|
|
194
|
+
The BlockHandle instance.
|
|
195
195
|
"""
|
|
196
|
-
decoder = utils.LevelDBDecoder(stream)
|
|
197
196
|
offset, block_offset = decoder.DecodeUint64Varint()
|
|
198
197
|
_, length = decoder.DecodeUint64Varint()
|
|
199
198
|
return cls(offset + base_offset, block_offset, length)
|
|
200
199
|
|
|
201
200
|
|
|
202
|
-
class
|
|
201
|
+
class FileReader:
|
|
203
202
|
"""A leveldb table (.ldb or .sst) file reader.
|
|
204
203
|
|
|
205
|
-
A
|
|
204
|
+
A Ldb FileReader provides read-only sequential iteration of serialized
|
|
206
205
|
structures in a leveldb ldb file. These structures include:
|
|
207
|
-
* blocks (
|
|
208
|
-
* records (
|
|
206
|
+
* blocks (Block)
|
|
207
|
+
* records (KeyValueRecord)
|
|
209
208
|
"""
|
|
210
209
|
|
|
211
|
-
FOOTER_SIZE = 48
|
|
212
|
-
MAGIC = b'\x57\xfb\x80\x8b\x24\x75\x47\xdb'
|
|
213
|
-
|
|
214
210
|
def __init__(self, filename: str):
|
|
215
211
|
"""Initializes the LogFile.
|
|
216
212
|
|
|
@@ -222,11 +218,11 @@ class LdbFileReader:
|
|
|
222
218
|
"""
|
|
223
219
|
self.filename = filename
|
|
224
220
|
with open(self.filename, 'rb') as fh:
|
|
225
|
-
fh.seek(-len(
|
|
226
|
-
if fh.read(len(
|
|
221
|
+
fh.seek(-len(definitions.TABLE_MAGIC), os.SEEK_END)
|
|
222
|
+
if fh.read(len(definitions.TABLE_MAGIC)) != definitions.TABLE_MAGIC:
|
|
227
223
|
raise ValueError(f'Invalid magic number in {self.filename}')
|
|
228
224
|
|
|
229
|
-
fh.seek(-
|
|
225
|
+
fh.seek(-definitions.TABLE_FOOTER_SIZE, os.SEEK_END)
|
|
230
226
|
# meta_handle, need to read first due to variable integers
|
|
231
227
|
_ = BlockHandle.FromStream(fh)
|
|
232
228
|
index_handle = BlockHandle.FromStream(fh)
|
|
@@ -234,11 +230,11 @@ class LdbFileReader:
|
|
|
234
230
|
# self.meta_block = meta_handle.load(fh) # TODO: support meta blocks
|
|
235
231
|
self.index_block = index_handle.Load(fh)
|
|
236
232
|
|
|
237
|
-
def GetBlocks(self) -> Iterable[
|
|
238
|
-
"""Returns an iterator of
|
|
233
|
+
def GetBlocks(self) -> Iterable[Block]:
|
|
234
|
+
"""Returns an iterator of Blocks.
|
|
239
235
|
|
|
240
236
|
Yields:
|
|
241
|
-
|
|
237
|
+
Block.
|
|
242
238
|
"""
|
|
243
239
|
with open(self.filename, 'rb') as fh:
|
|
244
240
|
for key_value_record in self.index_block.GetRecords():
|
|
@@ -247,11 +243,11 @@ class LdbFileReader:
|
|
|
247
243
|
base_offset=key_value_record.offset)
|
|
248
244
|
yield block_handle.Load(fh)
|
|
249
245
|
|
|
250
|
-
def GetKeyValueRecords(self) -> Iterable[
|
|
251
|
-
"""Returns an iterator of
|
|
246
|
+
def GetKeyValueRecords(self) -> Iterable[KeyValueRecord]:
|
|
247
|
+
"""Returns an iterator of KeyValueRecords.
|
|
252
248
|
|
|
253
249
|
Yields:
|
|
254
|
-
|
|
250
|
+
KeyValueRecords.
|
|
255
251
|
"""
|
|
256
252
|
for block in self.GetBlocks():
|
|
257
253
|
yield from block.GetRecords()
|