dfindexeddb 20240301__py3-none-any.whl → 20240324__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dfindexeddb/indexeddb/blink.py +2 -1
- dfindexeddb/indexeddb/chromium.py +12 -12
- dfindexeddb/indexeddb/cli.py +101 -0
- dfindexeddb/indexeddb/utils.py +0 -0
- dfindexeddb/leveldb/cli.py +217 -0
- dfindexeddb/leveldb/definitions.py +59 -0
- dfindexeddb/leveldb/descriptor.py +334 -0
- dfindexeddb/leveldb/ldb.py +53 -57
- dfindexeddb/leveldb/log.py +78 -69
- dfindexeddb/leveldb/record.py +102 -0
- dfindexeddb/leveldb/utils.py +116 -0
- dfindexeddb/utils.py +8 -43
- dfindexeddb/version.py +1 -1
- {dfindexeddb-20240301.dist-info → dfindexeddb-20240324.dist-info}/METADATA +46 -32
- dfindexeddb-20240324.dist-info/RECORD +26 -0
- {dfindexeddb-20240301.dist-info → dfindexeddb-20240324.dist-info}/WHEEL +1 -1
- dfindexeddb-20240324.dist-info/entry_points.txt +3 -0
- dfindexeddb/cli.py +0 -155
- dfindexeddb-20240301.dist-info/RECORD +0 -20
- dfindexeddb-20240301.dist-info/entry_points.txt +0 -2
- {dfindexeddb-20240301.dist-info → dfindexeddb-20240324.dist-info}/AUTHORS +0 -0
- {dfindexeddb-20240301.dist-info → dfindexeddb-20240324.dist-info}/LICENSE +0 -0
- {dfindexeddb-20240301.dist-info → dfindexeddb-20240324.dist-info}/top_level.txt +0 -0
dfindexeddb/leveldb/log.py
CHANGED
|
@@ -15,20 +15,13 @@
|
|
|
15
15
|
"""Parser for LevelDB Log (.log) files."""
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
|
-
from dataclasses import dataclass
|
|
19
|
-
from enum import IntEnum
|
|
18
|
+
from dataclasses import dataclass
|
|
20
19
|
import io
|
|
21
20
|
from typing import BinaryIO, Generator, Iterable, Optional
|
|
22
21
|
|
|
23
|
-
from dfindexeddb import
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class LogFilePhysicalRecordType(IntEnum):
|
|
27
|
-
"""LevelDB log file physical record types."""
|
|
28
|
-
FULL = 1
|
|
29
|
-
FIRST = 2
|
|
30
|
-
MIDDLE = 3
|
|
31
|
-
LAST = 4
|
|
22
|
+
from dfindexeddb import errors
|
|
23
|
+
from dfindexeddb.leveldb import definitions
|
|
24
|
+
from dfindexeddb.leveldb import utils
|
|
32
25
|
|
|
33
26
|
|
|
34
27
|
@dataclass
|
|
@@ -37,12 +30,15 @@ class ParsedInternalKey:
|
|
|
37
30
|
|
|
38
31
|
Attributes:
|
|
39
32
|
offset: the offset of the record.
|
|
40
|
-
|
|
33
|
+
record_type: the record type.
|
|
34
|
+
sequence_number: the sequence number (inferred from the relative location
|
|
35
|
+
the ParsedInternalKey in a WriteBatch.)
|
|
41
36
|
key: the record key.
|
|
42
37
|
value: the record value.
|
|
43
38
|
"""
|
|
44
39
|
offset: int
|
|
45
|
-
|
|
40
|
+
record_type: definitions.InternalRecordType
|
|
41
|
+
sequence_number: int
|
|
46
42
|
key: bytes
|
|
47
43
|
value: bytes
|
|
48
44
|
|
|
@@ -50,37 +46,47 @@ class ParsedInternalKey:
|
|
|
50
46
|
def FromDecoder(
|
|
51
47
|
cls,
|
|
52
48
|
decoder: utils.LevelDBDecoder,
|
|
53
|
-
base_offset: int = 0
|
|
49
|
+
base_offset: int = 0,
|
|
50
|
+
sequence_number: int = 0,
|
|
54
51
|
) -> ParsedInternalKey:
|
|
55
52
|
"""Decodes an internal key value record.
|
|
56
53
|
|
|
57
54
|
Args:
|
|
58
55
|
decoder: the leveldb decoder.
|
|
59
|
-
base_offset: the base offset for the parsed key value record.
|
|
56
|
+
base_offset: the base offset for the parsed internal key value record.
|
|
57
|
+
sequence_number: the sequence number for the parsed internal key value
|
|
58
|
+
record.
|
|
60
59
|
|
|
61
60
|
Returns:
|
|
62
|
-
|
|
61
|
+
A ParsedInternalKey
|
|
63
62
|
|
|
64
63
|
Raises:
|
|
65
64
|
ValueError: if there is an invalid record type encountered.
|
|
66
65
|
"""
|
|
67
66
|
offset, record_type = decoder.DecodeUint8()
|
|
68
67
|
_, key = decoder.DecodeBlobWithLength()
|
|
69
|
-
|
|
68
|
+
record_type = definitions.InternalRecordType(record_type)
|
|
69
|
+
|
|
70
|
+
if record_type == definitions.InternalRecordType.VALUE:
|
|
70
71
|
_, value = decoder.DecodeBlobWithLength()
|
|
71
|
-
elif record_type ==
|
|
72
|
+
elif record_type == definitions.InternalRecordType.DELETED:
|
|
72
73
|
value = b''
|
|
73
74
|
else:
|
|
74
75
|
raise ValueError(f'Invalid record type {record_type}')
|
|
75
|
-
return cls(
|
|
76
|
+
return cls(
|
|
77
|
+
offset=base_offset + offset,
|
|
78
|
+
record_type=record_type,
|
|
79
|
+
key=key,
|
|
80
|
+
value=value,
|
|
81
|
+
sequence_number=sequence_number)
|
|
76
82
|
|
|
77
83
|
|
|
78
84
|
@dataclass
|
|
79
|
-
class WriteBatch:
|
|
85
|
+
class WriteBatch(utils.FromDecoderMixin):
|
|
80
86
|
"""A write batch from a leveldb log file.
|
|
81
87
|
|
|
82
88
|
Attributes:
|
|
83
|
-
offset: the batch offset.
|
|
89
|
+
offset: the write batch offset.
|
|
84
90
|
sequence_number: the batch sequence number.
|
|
85
91
|
count: the number of ParsedInternalKey in the batch.
|
|
86
92
|
records: the ParsedInternalKey parsed from the batch.
|
|
@@ -88,49 +94,41 @@ class WriteBatch:
|
|
|
88
94
|
offset: int
|
|
89
95
|
sequence_number: int
|
|
90
96
|
count: int
|
|
91
|
-
records: Iterable[ParsedInternalKey]
|
|
97
|
+
records: Iterable[ParsedInternalKey]
|
|
92
98
|
|
|
93
99
|
@classmethod
|
|
94
|
-
def
|
|
95
|
-
cls,
|
|
100
|
+
def FromDecoder(
|
|
101
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
96
102
|
) -> WriteBatch:
|
|
97
103
|
"""Parses a WriteBatch from a binary stream.
|
|
98
104
|
|
|
99
105
|
Args:
|
|
100
|
-
|
|
106
|
+
decoder: the LevelDBDecoder
|
|
101
107
|
base_offset: the base offset of the Block from which the data is
|
|
102
108
|
read from.
|
|
103
109
|
|
|
104
110
|
Returns:
|
|
105
111
|
A WriteBatch.
|
|
106
112
|
"""
|
|
107
|
-
|
|
108
|
-
_, sequence_number = decoder.DecodeUint64()
|
|
113
|
+
offset, sequence_number = decoder.DecodeUint64()
|
|
109
114
|
_, count = decoder.DecodeUint32()
|
|
110
115
|
|
|
111
116
|
records = []
|
|
112
|
-
for
|
|
113
|
-
record = ParsedInternalKey.FromDecoder(
|
|
117
|
+
for relative_sequence_number in range(count):
|
|
118
|
+
record = ParsedInternalKey.FromDecoder(
|
|
119
|
+
decoder, base_offset + offset,
|
|
120
|
+
relative_sequence_number + sequence_number
|
|
121
|
+
)
|
|
114
122
|
records.append(record)
|
|
115
|
-
return cls(
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
Args:
|
|
122
|
-
data: the bytes to be parsed.
|
|
123
|
-
base_offset: the base offset of the Block from which the data is
|
|
124
|
-
read from.
|
|
125
|
-
|
|
126
|
-
Returns:
|
|
127
|
-
A WriteBatch.
|
|
128
|
-
"""
|
|
129
|
-
return cls.FromStream(io.BytesIO(data), base_offset)
|
|
123
|
+
return cls(
|
|
124
|
+
offset=base_offset + offset,
|
|
125
|
+
sequence_number=sequence_number,
|
|
126
|
+
count=count,
|
|
127
|
+
records=records)
|
|
130
128
|
|
|
131
129
|
|
|
132
130
|
@dataclass
|
|
133
|
-
class PhysicalRecord:
|
|
131
|
+
class PhysicalRecord(utils.FromDecoderMixin):
|
|
134
132
|
"""A physical record from a leveldb log file.
|
|
135
133
|
|
|
136
134
|
Attributes:
|
|
@@ -145,27 +143,35 @@ class PhysicalRecord:
|
|
|
145
143
|
offset: int
|
|
146
144
|
checksum: int
|
|
147
145
|
length: int
|
|
148
|
-
record_type: LogFilePhysicalRecordType
|
|
149
|
-
contents: bytes
|
|
146
|
+
record_type: definitions.LogFilePhysicalRecordType
|
|
147
|
+
contents: bytes
|
|
150
148
|
contents_offset: int
|
|
151
149
|
|
|
150
|
+
PHYSICAL_HEADER_LENGTH = 7
|
|
151
|
+
|
|
152
152
|
@classmethod
|
|
153
|
-
def
|
|
154
|
-
cls,
|
|
155
|
-
|
|
153
|
+
def FromDecoder(
|
|
154
|
+
cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
|
|
155
|
+
) -> PhysicalRecord:
|
|
156
|
+
"""Decodes a PhysicalRecord from the current position of a LevelDBDecoder.
|
|
156
157
|
|
|
157
158
|
Args:
|
|
158
|
-
|
|
159
|
+
decoder: the LevelDBDecoder.
|
|
159
160
|
base_offset: the base offset of the WriteBatch from which the data is
|
|
160
161
|
read from.
|
|
161
162
|
|
|
162
163
|
Returns:
|
|
163
164
|
A PhysicalRecord.
|
|
164
165
|
"""
|
|
165
|
-
decoder = utils.StreamDecoder(stream)
|
|
166
166
|
offset, checksum = decoder.DecodeUint32()
|
|
167
167
|
_, length = decoder.DecodeUint16()
|
|
168
|
-
|
|
168
|
+
_, record_type_byte = decoder.DecodeUint8()
|
|
169
|
+
try:
|
|
170
|
+
record_type = definitions.LogFilePhysicalRecordType(record_type_byte)
|
|
171
|
+
except ValueError as error:
|
|
172
|
+
raise errors.ParserError(
|
|
173
|
+
f'Error parsing record type of Physical Record at offset '
|
|
174
|
+
f'{offset + base_offset}') from error
|
|
169
175
|
contents_offset, contents = decoder.ReadBytes(length)
|
|
170
176
|
return cls(
|
|
171
177
|
base_offset=base_offset,
|
|
@@ -186,7 +192,7 @@ class Block:
|
|
|
186
192
|
data: the block data.
|
|
187
193
|
"""
|
|
188
194
|
offset: int
|
|
189
|
-
data: bytes
|
|
195
|
+
data: bytes
|
|
190
196
|
|
|
191
197
|
BLOCK_SIZE = 32768
|
|
192
198
|
|
|
@@ -199,7 +205,7 @@ class Block:
|
|
|
199
205
|
buffer = io.BytesIO(self.data)
|
|
200
206
|
buffer_length = len(self.data)
|
|
201
207
|
|
|
202
|
-
while buffer.tell() < buffer_length:
|
|
208
|
+
while buffer.tell() + PhysicalRecord.PHYSICAL_HEADER_LENGTH < buffer_length:
|
|
203
209
|
yield PhysicalRecord.FromStream(buffer, base_offset=self.offset)
|
|
204
210
|
|
|
205
211
|
@classmethod
|
|
@@ -219,10 +225,10 @@ class Block:
|
|
|
219
225
|
return cls(offset, data)
|
|
220
226
|
|
|
221
227
|
|
|
222
|
-
class
|
|
228
|
+
class FileReader:
|
|
223
229
|
"""A leveldb log file reader.
|
|
224
230
|
|
|
225
|
-
A
|
|
231
|
+
A Log FileReader provides read-only sequential iteration of serialized
|
|
226
232
|
structures in a leveldb logfile. These structures include:
|
|
227
233
|
* blocks (Block)
|
|
228
234
|
* phyiscal records (PhysicalRecord)
|
|
@@ -250,11 +256,10 @@ class LogFileReader:
|
|
|
250
256
|
a Block
|
|
251
257
|
"""
|
|
252
258
|
with open(self.filename, 'rb') as fh:
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
if not block:
|
|
256
|
-
break
|
|
259
|
+
block = Block.FromStream(fh)
|
|
260
|
+
while block:
|
|
257
261
|
yield block
|
|
262
|
+
block = Block.FromStream(fh)
|
|
258
263
|
|
|
259
264
|
def GetPhysicalRecords(self) -> Generator[PhysicalRecord, None, None]:
|
|
260
265
|
"""Returns an iterator of PhysicalRecord instances.
|
|
@@ -278,28 +283,32 @@ class LogFileReader:
|
|
|
278
283
|
"""
|
|
279
284
|
buffer = bytearray()
|
|
280
285
|
for physical_record in self.GetPhysicalRecords():
|
|
281
|
-
if physical_record.record_type ==
|
|
286
|
+
if (physical_record.record_type ==
|
|
287
|
+
definitions.LogFilePhysicalRecordType.FULL):
|
|
282
288
|
buffer = physical_record.contents
|
|
283
289
|
offset = physical_record.contents_offset + physical_record.base_offset
|
|
284
290
|
yield WriteBatch.FromBytes(buffer, base_offset=offset)
|
|
285
291
|
buffer = bytearray()
|
|
286
|
-
elif physical_record.record_type
|
|
292
|
+
elif (physical_record.record_type
|
|
293
|
+
== definitions.LogFilePhysicalRecordType.FIRST):
|
|
287
294
|
offset = physical_record.contents_offset + physical_record.base_offset
|
|
288
295
|
buffer = bytearray(physical_record.contents)
|
|
289
|
-
elif physical_record.record_type ==
|
|
296
|
+
elif (physical_record.record_type ==
|
|
297
|
+
definitions.LogFilePhysicalRecordType.MIDDLE):
|
|
290
298
|
buffer.extend(bytearray(physical_record.contents))
|
|
291
|
-
elif physical_record.record_type ==
|
|
299
|
+
elif (physical_record.record_type ==
|
|
300
|
+
definitions.LogFilePhysicalRecordType.LAST):
|
|
292
301
|
buffer.extend(bytearray(physical_record.contents))
|
|
293
302
|
yield WriteBatch.FromBytes(buffer, base_offset=offset)
|
|
294
303
|
buffer = bytearray()
|
|
295
304
|
|
|
296
|
-
def
|
|
297
|
-
"""Returns an iterator of
|
|
305
|
+
def GetParsedInternalKeys(self) -> Generator[ParsedInternalKey, None, None]:
|
|
306
|
+
"""Returns an iterator of ParsedInternalKey instances.
|
|
298
307
|
|
|
299
|
-
A batch can contain
|
|
308
|
+
A batch can contain one or more key value records.
|
|
300
309
|
|
|
301
310
|
Yields:
|
|
302
|
-
|
|
311
|
+
ParsedInternalKey
|
|
303
312
|
"""
|
|
304
313
|
for batch in self.GetWriteBatches():
|
|
305
314
|
yield from batch.records
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright 2024 Google LLC
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""A module for records from LevelDB files."""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
import dataclasses
|
|
18
|
+
import pathlib
|
|
19
|
+
import sys
|
|
20
|
+
from typing import Any, Generator, Union
|
|
21
|
+
|
|
22
|
+
from dfindexeddb.leveldb import descriptor
|
|
23
|
+
from dfindexeddb.leveldb import ldb
|
|
24
|
+
from dfindexeddb.leveldb import log
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclasses.dataclass
|
|
28
|
+
class LevelDBRecord:
|
|
29
|
+
"""A leveldb record.
|
|
30
|
+
|
|
31
|
+
A record can come from a log file, a table (ldb) file or a descriptor
|
|
32
|
+
(MANIFEST) file.
|
|
33
|
+
|
|
34
|
+
Attributes:
|
|
35
|
+
path: the file path where the record was parsed from.
|
|
36
|
+
record: the leveldb record.
|
|
37
|
+
"""
|
|
38
|
+
path: str
|
|
39
|
+
record: Union[
|
|
40
|
+
ldb.KeyValueRecord,
|
|
41
|
+
log.ParsedInternalKey,
|
|
42
|
+
descriptor.VersionEdit]
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def FromFile(
|
|
46
|
+
cls,
|
|
47
|
+
file_path: pathlib.Path,
|
|
48
|
+
include_versionedit: bool = False
|
|
49
|
+
) -> Generator[LevelDBRecord, Any, Any]:
|
|
50
|
+
"""Yields leveldb records from the given path.
|
|
51
|
+
|
|
52
|
+
Yields:
|
|
53
|
+
LevelDBRecords
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
file_path: the file path.
|
|
57
|
+
include_versionedit: include VersionEdit records from descriptor files.
|
|
58
|
+
"""
|
|
59
|
+
if file_path.name.endswith('.log'):
|
|
60
|
+
for record in log.FileReader(
|
|
61
|
+
file_path.as_posix()).GetParsedInternalKeys():
|
|
62
|
+
yield cls(path=file_path.as_posix(), record=record)
|
|
63
|
+
elif file_path.name.endswith('.ldb'):
|
|
64
|
+
for record in ldb.FileReader(file_path.as_posix()).GetKeyValueRecords():
|
|
65
|
+
yield cls(path=file_path.as_posix(), record=record)
|
|
66
|
+
elif file_path.name.startswith('MANIFEST'):
|
|
67
|
+
if not include_versionedit:
|
|
68
|
+
print(f'Ignoring {file_path.as_posix()}', file=sys.stderr)
|
|
69
|
+
return
|
|
70
|
+
for record in descriptor.FileReader(
|
|
71
|
+
file_path.as_posix()).GetVersionEdits():
|
|
72
|
+
yield cls(path=file_path.as_posix(), record=record)
|
|
73
|
+
elif file_path.name in ('LOCK', 'CURRENT', 'LOG', 'LOG.old'):
|
|
74
|
+
print(f'Ignoring {file_path.as_posix()}', file=sys.stderr)
|
|
75
|
+
else:
|
|
76
|
+
print(f'Unsupported file type {file_path.as_posix()}', file=sys.stderr)
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def FromDir(
|
|
80
|
+
cls,
|
|
81
|
+
path: pathlib.Path,
|
|
82
|
+
include_versionedit: bool = False
|
|
83
|
+
) -> Generator[LevelDBRecord, Any, Any]:
|
|
84
|
+
"""Yields LevelDBRecords from the given directory.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
path: the file path.
|
|
88
|
+
include_versionedit: include VersionEdit records from descriptor files.
|
|
89
|
+
|
|
90
|
+
Yields:
|
|
91
|
+
LevelDBRecords
|
|
92
|
+
|
|
93
|
+
Raises:
|
|
94
|
+
ValueError: if path is not a directory.
|
|
95
|
+
"""
|
|
96
|
+
if path.is_dir():
|
|
97
|
+
for file_path in path.iterdir():
|
|
98
|
+
yield from cls.FromFile(
|
|
99
|
+
file_path=file_path,
|
|
100
|
+
include_versionedit=include_versionedit)
|
|
101
|
+
else:
|
|
102
|
+
raise ValueError(f'{path} is not a directory')
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright 2024 Google LLC
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""Helper/utility classes for LevelDB."""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
import io
|
|
18
|
+
from typing import BinaryIO, Tuple, Type, TypeVar
|
|
19
|
+
|
|
20
|
+
from dfindexeddb import errors
|
|
21
|
+
from dfindexeddb import utils
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class LevelDBDecoder(utils.StreamDecoder):
|
|
25
|
+
"""A helper class to decode data types from LevelDB files."""
|
|
26
|
+
|
|
27
|
+
def DecodeBool(self) -> Tuple[int, bool]:
|
|
28
|
+
"""Returns a Tuple of the offset of decoding and the bool value."""
|
|
29
|
+
offset, buffer = self.ReadBytes(1)
|
|
30
|
+
return offset, buffer[0] is not None
|
|
31
|
+
|
|
32
|
+
def DecodeString(self) -> Tuple[int, str]:
|
|
33
|
+
"""Returns a tuple of the offset of decoding and the string value.
|
|
34
|
+
|
|
35
|
+
Raises:
|
|
36
|
+
errors.DecoderError: when the parsed string buffer is not even (i.e.
|
|
37
|
+
cannot be decoded as a UTF-16-BE string.
|
|
38
|
+
"""
|
|
39
|
+
offset = self.stream.tell()
|
|
40
|
+
buffer = self.stream.read()
|
|
41
|
+
if len(buffer) % 2:
|
|
42
|
+
raise errors.DecoderError(
|
|
43
|
+
f'Odd number of bytes encountered at offset {offset}')
|
|
44
|
+
return offset, buffer.decode('utf-16-be')
|
|
45
|
+
|
|
46
|
+
def DecodeLengthPrefixedSlice(self) -> Tuple[int, bytes]:
|
|
47
|
+
"""Returns a tuple of the offset of decoding and the byte 'slice'."""
|
|
48
|
+
offset, num_bytes = self.DecodeUint32Varint()
|
|
49
|
+
_, blob = self.ReadBytes(num_bytes)
|
|
50
|
+
return offset, blob
|
|
51
|
+
|
|
52
|
+
def DecodeBlobWithLength(self) -> Tuple[int, bytes]:
|
|
53
|
+
"""Returns a tuple of a the offset of decoding and the binary blob."""
|
|
54
|
+
offset, num_bytes = self.DecodeUint64Varint()
|
|
55
|
+
_, blob = self.ReadBytes(num_bytes)
|
|
56
|
+
return offset, blob
|
|
57
|
+
|
|
58
|
+
def DecodeStringWithLength(self, encoding='utf-16-be') -> Tuple[int, str]:
|
|
59
|
+
"""Returns a tuple of the offset of decoding and the string value."""
|
|
60
|
+
offset, length = self.DecodeUint64Varint()
|
|
61
|
+
_, buffer = self.ReadBytes(length*2)
|
|
62
|
+
return offset, buffer.decode(encoding=encoding)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
T = TypeVar('T')
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class FromDecoderMixin:
|
|
69
|
+
"""A mixin for parsing dataclass attributes using a LevelDBDecoder."""
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def FromDecoder(
|
|
73
|
+
cls: Type[T], decoder: LevelDBDecoder, base_offset: int = 0) -> T:
|
|
74
|
+
"""Decodes a class type from the current position of a LevelDBDecoder.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
decoder: the LevelDBDecoder.
|
|
78
|
+
base_offset: the base offset.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
The class instance.
|
|
82
|
+
|
|
83
|
+
Raises:
|
|
84
|
+
NotImplementedError if the child class does not implement this method.
|
|
85
|
+
"""
|
|
86
|
+
raise NotImplementedError
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def FromStream(
|
|
90
|
+
cls: Type[T], stream: BinaryIO, base_offset: int = 0) -> T:
|
|
91
|
+
"""Decodes a class type from the current position of a binary stream.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
stream: the binary stream.
|
|
95
|
+
base_offset: the base offset of the binary stream.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
The class instance.
|
|
99
|
+
"""
|
|
100
|
+
decoder = LevelDBDecoder(stream)
|
|
101
|
+
return cls.FromDecoder(decoder=decoder, base_offset=base_offset)
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
def FromBytes(
|
|
105
|
+
cls: Type[T], raw_data: bytes, base_offset: int = 0) -> T:
|
|
106
|
+
"""Parses a class type from raw bytes.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
raw_data: the raw data.
|
|
110
|
+
base_offset: the base offset of the raw data.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
The class instance.
|
|
114
|
+
"""
|
|
115
|
+
stream = io.BytesIO(raw_data)
|
|
116
|
+
return cls.FromStream(stream=stream, base_offset=base_offset)
|
dfindexeddb/utils.py
CHANGED
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
"""Utilities for dfindexeddb."""
|
|
16
|
+
from __future__ import annotations
|
|
16
17
|
import io
|
|
17
18
|
import os
|
|
18
19
|
import struct
|
|
@@ -206,55 +207,19 @@ class StreamDecoder:
|
|
|
206
207
|
return self.DecodeZigzagVarint(max_bytes=10)
|
|
207
208
|
|
|
208
209
|
|
|
209
|
-
|
|
210
|
-
class LevelDBDecoder(StreamDecoder):
|
|
211
|
-
"""A helper class to decode data types from LevelDB files."""
|
|
212
|
-
|
|
213
|
-
def DecodeBool(self) -> Tuple[int, bool]:
|
|
214
|
-
"""Returns a Tuple of the offset of decoding and the bool value."""
|
|
215
|
-
offset, buffer = self.ReadBytes(1)
|
|
216
|
-
return offset, buffer[0] is not None
|
|
217
|
-
|
|
218
|
-
def DecodeString(self) -> Tuple[int, str]:
|
|
219
|
-
"""Returns a tuple of the offset of decoding and the string value.
|
|
220
|
-
|
|
221
|
-
Raises:
|
|
222
|
-
errors.DecoderError: when the parsed string buffer is not even (i.e.
|
|
223
|
-
cannot be decoded as a UTF-16-BE string.
|
|
224
|
-
"""
|
|
225
|
-
offset = self.stream.tell()
|
|
226
|
-
buffer = self.stream.read()
|
|
227
|
-
if len(buffer) % 2:
|
|
228
|
-
raise errors.DecoderError(
|
|
229
|
-
f'Odd number of bytes encountered at offset {offset}')
|
|
230
|
-
return offset, buffer.decode('utf-16-be')
|
|
231
|
-
|
|
232
|
-
def DecodeBlobWithLength(self) -> Tuple[int, bytes]:
|
|
233
|
-
"""Returns a tuple of a the offset of decoding and the binary blob."""
|
|
234
|
-
offset, num_bytes = self.DecodeUint64Varint()
|
|
235
|
-
_, blob = self.ReadBytes(num_bytes)
|
|
236
|
-
return offset, blob
|
|
237
|
-
|
|
238
|
-
def DecodeStringWithLength(self) -> Tuple[int, str]:
|
|
239
|
-
"""Returns a tuple of the offset of decoding and the string value."""
|
|
240
|
-
offset, length = self.DecodeUint64Varint()
|
|
241
|
-
_, buffer = self.ReadBytes(length*2)
|
|
242
|
-
return offset, buffer.decode('utf-16-be')
|
|
243
|
-
|
|
244
|
-
|
|
245
210
|
T = TypeVar('T')
|
|
246
211
|
|
|
247
212
|
|
|
248
|
-
class
|
|
249
|
-
"""A mixin for
|
|
213
|
+
class FromDecoderMixin:
|
|
214
|
+
"""A mixin for parsing dataclass attributes using a LevelDBDecoder."""
|
|
250
215
|
|
|
251
216
|
@classmethod
|
|
252
217
|
def FromDecoder(
|
|
253
|
-
cls: Type[T], decoder:
|
|
254
|
-
"""Decodes a class type from the current position of a
|
|
218
|
+
cls: Type[T], decoder: StreamDecoder, base_offset: int = 0) -> T:
|
|
219
|
+
"""Decodes a class type from the current position of a StreamDecoder.
|
|
255
220
|
|
|
256
221
|
Args:
|
|
257
|
-
decoder: the
|
|
222
|
+
decoder: the StreamDecoder.
|
|
258
223
|
base_offset: the base offset.
|
|
259
224
|
|
|
260
225
|
Returns:
|
|
@@ -277,8 +242,8 @@ class FromStreamMixin: # TODO: refactor leveldb parsers
|
|
|
277
242
|
Returns:
|
|
278
243
|
The class instance.
|
|
279
244
|
"""
|
|
280
|
-
decoder =
|
|
281
|
-
return cls.FromDecoder(decoder, base_offset)
|
|
245
|
+
decoder = StreamDecoder(stream)
|
|
246
|
+
return cls.FromDecoder(decoder=decoder, base_offset=base_offset)
|
|
282
247
|
|
|
283
248
|
@classmethod
|
|
284
249
|
def FromBytes(
|