dfindexeddb 20240301__tar.gz → 20240305__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {dfindexeddb-20240301/dfindexeddb.egg-info → dfindexeddb-20240305}/PKG-INFO +1 -1
  2. {dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb/cli.py +33 -8
  3. {dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb/indexeddb/chromium.py +8 -8
  4. dfindexeddb-20240305/dfindexeddb/leveldb/definitions.py +43 -0
  5. dfindexeddb-20240305/dfindexeddb/leveldb/descriptor.py +335 -0
  6. {dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb/leveldb/ldb.py +33 -33
  7. {dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb/leveldb/log.py +58 -56
  8. {dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb/utils.py +11 -5
  9. {dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb/version.py +1 -1
  10. {dfindexeddb-20240301 → dfindexeddb-20240305/dfindexeddb.egg-info}/PKG-INFO +1 -1
  11. {dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb.egg-info/SOURCES.txt +2 -0
  12. {dfindexeddb-20240301 → dfindexeddb-20240305}/pyproject.toml +1 -1
  13. {dfindexeddb-20240301 → dfindexeddb-20240305}/AUTHORS +0 -0
  14. {dfindexeddb-20240301 → dfindexeddb-20240305}/LICENSE +0 -0
  15. {dfindexeddb-20240301 → dfindexeddb-20240305}/README.md +0 -0
  16. {dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb/__init__.py +0 -0
  17. {dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb/errors.py +0 -0
  18. {dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb/indexeddb/__init__.py +0 -0
  19. {dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb/indexeddb/blink.py +0 -0
  20. {dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb/indexeddb/definitions.py +0 -0
  21. {dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb/indexeddb/v8.py +0 -0
  22. {dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb/leveldb/__init__.py +0 -0
  23. {dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb.egg-info/dependency_links.txt +0 -0
  24. {dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb.egg-info/entry_points.txt +0 -0
  25. {dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb.egg-info/requires.txt +0 -0
  26. {dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb.egg-info/top_level.txt +0 -0
  27. {dfindexeddb-20240301 → dfindexeddb-20240305}/setup.cfg +0 -0
  28. {dfindexeddb-20240301 → dfindexeddb-20240305}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dfindexeddb
3
- Version: 20240301
3
+ Version: 20240305
4
4
  Summary: dfindexeddb is an experimental Python tool for performing digital forensic analysis of IndexedDB and leveldb files.
5
5
  Author-email: Syd Pleno <sydp@google.com>
6
6
  Maintainer-email: dfIndexeddb Developers <dfindexeddb-dev@googlegroups.com>
@@ -21,18 +21,31 @@ import pathlib
21
21
  import sys
22
22
  import traceback
23
23
 
24
- from dfindexeddb.leveldb import log
24
+ from dfindexeddb import errors
25
+ from dfindexeddb import version
26
+ from dfindexeddb.leveldb import descriptor
25
27
  from dfindexeddb.leveldb import ldb
28
+ from dfindexeddb.leveldb import log
26
29
  from dfindexeddb.indexeddb import chromium
27
- from dfindexeddb import errors
28
30
  from dfindexeddb.indexeddb import v8
29
31
 
30
32
 
33
+ _VALID_PRINTABLE_CHARACTERS = (
34
+ 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' +
35
+ '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~.')
36
+
37
+
31
38
  class Encoder(json.JSONEncoder):
32
39
  """A JSON encoder class for dfindexeddb fields."""
33
40
  def default(self, o):
34
41
  if isinstance(o, bytes):
35
- return o.decode(encoding='ascii', errors='backslashreplace')
42
+ out = []
43
+ for x in o:
44
+ if chr(x) not in _VALID_PRINTABLE_CHARACTERS:
45
+ out.append(f'\\x{x:02X}')
46
+ else:
47
+ out.append(chr(x))
48
+ return ''.join(out)
36
49
  if isinstance(o, datetime):
37
50
  return o.isoformat()
38
51
  if isinstance(o, v8.Undefined):
@@ -59,10 +72,10 @@ def IndexeddbCommand(args):
59
72
  """The CLI for processing a log/ldb file as indexeddb."""
60
73
  if args.source.name.endswith('.log'):
61
74
  records = list(
62
- log.LogFileReader(args.source).GetKeyValueRecords())
75
+ log.FileReader(args.source).GetKeyValueRecords())
63
76
  elif args.source.name.endswith('.ldb'):
64
77
  records = list(
65
- ldb.LdbFileReader(args.source).GetKeyValueRecords())
78
+ ldb.FileReader(args.source).GetKeyValueRecords())
66
79
  else:
67
80
  print('Unsupported file type.', file=sys.stderr)
68
81
  return
@@ -78,9 +91,17 @@ def IndexeddbCommand(args):
78
91
  _Output(record, to_json=args.json)
79
92
 
80
93
 
94
+ def ManifestCommand(args):
95
+ """The CLI for processing MANIFEST aka Descriptor files."""
96
+ manifest_file = descriptor.FileReader(args.source)
97
+
98
+ for version_edit in manifest_file.GetVersionEdits():
99
+ _Output(version_edit, to_json=args.json)
100
+
101
+
81
102
  def LdbCommand(args):
82
103
  """The CLI for processing ldb files."""
83
- ldb_file = ldb.LdbFileReader(args.source)
104
+ ldb_file = ldb.FileReader(args.source)
84
105
 
85
106
  if args.structure_type == 'blocks':
86
107
  # Prints block information.
@@ -95,7 +116,7 @@ def LdbCommand(args):
95
116
 
96
117
  def LogCommand(args):
97
118
  """The CLI for processing log files."""
98
- log_file = log.LogFileReader(args.source)
119
+ log_file = log.FileReader(args.source)
99
120
 
100
121
  if args.structure_type == 'blocks':
101
122
  # Prints block information.
@@ -122,7 +143,8 @@ def App():
122
143
  """The CLI app entrypoint."""
123
144
  parser = argparse.ArgumentParser(
124
145
  prog='dfindexeddb',
125
- description='A cli tool for the dfindexeddb package')
146
+ description='A cli tool for the dfindexeddb package',
147
+ epilog=f'Version {version.GetVersion()}')
126
148
 
127
149
  parser.add_argument(
128
150
  '-s', '--source', required=True, type=pathlib.Path,
@@ -147,6 +169,9 @@ def App():
147
169
  'records'])
148
170
  parser_log.set_defaults(func=LdbCommand)
149
171
 
172
+ parser_log = subparsers.add_parser('manifest')
173
+ parser_log.set_defaults(func=ManifestCommand)
174
+
150
175
  parser_log = subparsers.add_parser('indexeddb')
151
176
  parser_log.set_defaults(func=IndexeddbCommand)
152
177
 
@@ -31,7 +31,7 @@ T = TypeVar('T')
31
31
 
32
32
 
33
33
  @dataclass
34
- class KeyPrefix(utils.FromStreamMixin):
34
+ class KeyPrefix(utils.FromDecoderMixin):
35
35
  """The IndexedDB key prefix.
36
36
 
37
37
  Attributes:
@@ -111,7 +111,7 @@ class KeyPrefix(utils.FromStreamMixin):
111
111
 
112
112
 
113
113
  @dataclass
114
- class IDBKey(utils.FromStreamMixin):
114
+ class IDBKey(utils.FromDecoderMixin):
115
115
  """An IDBKey.
116
116
 
117
117
  Attributes:
@@ -199,7 +199,7 @@ class IDBKey(utils.FromStreamMixin):
199
199
 
200
200
 
201
201
  @dataclass
202
- class IDBKeyPath(utils.FromStreamMixin):
202
+ class IDBKeyPath(utils.FromDecoderMixin):
203
203
  """An IDBKeyPath.
204
204
 
205
205
  Arguments:
@@ -256,7 +256,7 @@ class IDBKeyPath(utils.FromStreamMixin):
256
256
 
257
257
 
258
258
  @dataclass
259
- class BlobJournalEntry(utils.FromStreamMixin):
259
+ class BlobJournalEntry(utils.FromDecoderMixin):
260
260
  """A blob journal entry.
261
261
 
262
262
  Attributes:
@@ -287,7 +287,7 @@ class BlobJournalEntry(utils.FromStreamMixin):
287
287
 
288
288
 
289
289
  @dataclass
290
- class BlobJournal(utils.FromStreamMixin):
290
+ class BlobJournal(utils.FromDecoderMixin):
291
291
  """A BlobJournal.
292
292
 
293
293
  Attributes:
@@ -1226,7 +1226,7 @@ class IndexMetaDataKey(BaseIndexedDBKey):
1226
1226
 
1227
1227
 
1228
1228
  @dataclass
1229
- class ExternalObjectEntry(utils.FromStreamMixin):
1229
+ class ExternalObjectEntry(utils.FromDecoderMixin):
1230
1230
  """An IndexedDB external object entry.
1231
1231
 
1232
1232
  Args:
@@ -1287,7 +1287,7 @@ class ExternalObjectEntry(utils.FromStreamMixin):
1287
1287
 
1288
1288
 
1289
1289
  @dataclass
1290
- class IndexedDBExternalObject(utils.FromStreamMixin):
1290
+ class IndexedDBExternalObject(utils.FromDecoderMixin):
1291
1291
  """An IndexedDB external object.
1292
1292
 
1293
1293
  Args:
@@ -1346,7 +1346,7 @@ class IndexedDBRecord:
1346
1346
 
1347
1347
  @classmethod
1348
1348
  def FromLevelDBRecord(
1349
- cls, record: Union[ldb.LdbKeyValueRecord, log.ParsedInternalKey]
1349
+ cls, record: Union[ldb.KeyValueRecord, log.ParsedInternalKey]
1350
1350
  ) -> IndexedDBRecord:
1351
1351
  """Returns an IndexedDBRecord from a ParsedInternalKey."""
1352
1352
  idb_key = IndexedDbKey.FromBytes(record.key, base_offset=record.offset)
@@ -0,0 +1,43 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright 2024 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # https://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Definitions for LevelDB."""
16
+
17
+ import enum
18
+
19
+
20
+ PACKED_SEQUENCE_AND_TYPE_LENGTH = 8
21
+ SEQUENCE_LENGTH = 7
22
+ TYPE_LENGTH = 1
23
+
24
+
25
+ class VersionEditTags(enum.IntEnum):
26
+ """VersionEdit tags."""
27
+ COMPARATOR = 1
28
+ LOG_NUMBER = 2
29
+ NEXT_FILE_NUMBER = 3
30
+ LAST_SEQUENCE = 4
31
+ COMPACT_POINTER = 5
32
+ DELETED_FILE = 6
33
+ NEW_FILE = 7
34
+ # 8 was used for large value refs
35
+ PREV_LOG_NUMBER = 9
36
+
37
+
38
+ class LogFilePhysicalRecordType(enum.IntEnum):
39
+ """Log file physical record types."""
40
+ FULL = 1
41
+ FIRST = 2
42
+ MIDDLE = 3
43
+ LAST = 4
@@ -0,0 +1,335 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright 2024 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # https://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Parser for LevelDB Manifest files."""
16
+ from __future__ import annotations
17
+
18
+ from dataclasses import dataclass, field
19
+ from typing import Generator, Optional
20
+
21
+ from dfindexeddb import errors
22
+ from dfindexeddb import utils
23
+ from dfindexeddb.leveldb import definitions
24
+ from dfindexeddb.leveldb import log
25
+
26
+
27
+
28
+ @dataclass
29
+ class InternalKey:
30
+ """An InternalKey.
31
+
32
+ Attributes:
33
+ offset: the offset.
34
+ user_key: the user key.
35
+ sequence_number: the sequence number.
36
+ key_type: the key type.
37
+ """
38
+ offset: int
39
+ user_key: bytes = field(repr=False)
40
+ sequence_number: int
41
+ key_type: int
42
+
43
+ @classmethod
44
+ def FromDecoder(
45
+ cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> InternalKey:
46
+ """Decodes a InternalKey from the current position of a LevelDBDecoder.
47
+
48
+ Args:
49
+ decoder: the LevelDBDecoder.
50
+ base_offset: the base offset.
51
+
52
+ Returns:
53
+ The InternalKey instance.
54
+ """
55
+ offset, slice_bytes = decoder.DecodeLengthPrefixedSlice()
56
+
57
+ if len(slice_bytes) < definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH:
58
+ raise errors.ParserError('Insufficient bytes to parse InternalKey')
59
+
60
+ user_key = slice_bytes[:-definitions.SEQUENCE_LENGTH]
61
+ sequence_number = int.from_bytes(
62
+ slice_bytes[-definitions.SEQUENCE_LENGTH:],
63
+ byteorder='little',
64
+ signed=False)
65
+ key_type = slice_bytes[-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
66
+
67
+ return cls(
68
+ offset=base_offset + offset,
69
+ user_key=user_key,
70
+ sequence_number=sequence_number,
71
+ key_type=key_type)
72
+
73
+
74
+ @dataclass
75
+ class NewFile(utils.FromDecoderMixin):
76
+ """A NewFile.
77
+
78
+ Attributes:
79
+ offset: the offset.
80
+ level: the level.
81
+ number: the number.
82
+ file_size: the file size.
83
+ smallest: the smallest internal key.
84
+ largest: the largest internal key.
85
+ """
86
+ offset: int
87
+ level: int
88
+ number: int
89
+ file_size: int
90
+ smallest: InternalKey
91
+ largest: InternalKey
92
+
93
+ @classmethod
94
+ def FromDecoder(
95
+ cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> NewFile:
96
+ """Decodes a NewFile from the current position of a LevelDBDecoder.
97
+
98
+ Args:
99
+ decoder: the LevelDBDecoder.
100
+ base_offset: the base offset.
101
+
102
+ Returns:
103
+ The NewFile instance.
104
+ """
105
+ offset, level = decoder.DecodeUint32Varint()
106
+ _, number = decoder.DecodeUint64Varint()
107
+ _, file_size = decoder.DecodeUint64Varint()
108
+ smallest = InternalKey.FromDecoder(decoder, base_offset=base_offset)
109
+ largest = InternalKey.FromDecoder(decoder, base_offset=base_offset)
110
+
111
+ return cls(
112
+ offset=offset + base_offset,
113
+ level=level,
114
+ number=number,
115
+ file_size=file_size,
116
+ smallest=smallest,
117
+ largest=largest)
118
+
119
+
120
+ @dataclass
121
+ class CompactPointer(utils.FromDecoderMixin):
122
+ """A NewFile.
123
+
124
+ Attributes:
125
+ offset: the offset.
126
+ level: the level.
127
+ key: the key bytes.
128
+ """
129
+ offset: int
130
+ level: int
131
+ key: bytes = field(repr=False)
132
+
133
+ @classmethod
134
+ def FromDecoder(
135
+ cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
136
+ ) -> CompactPointer:
137
+ """Decodes a CompactPointer from the current position of a LevelDBDecoder.
138
+
139
+ Args:
140
+ decoder: the LevelDBDecoder.
141
+ base_offset: the base offset.
142
+
143
+ Returns:
144
+ The CompactPointer instance.
145
+ """
146
+ offset, level = decoder.DecodeUint32Varint()
147
+ _, key = decoder.DecodeLengthPrefixedSlice()
148
+ return cls(offset=base_offset + offset, level=level, key=key)
149
+
150
+
151
+ @dataclass
152
+ class DeletedFile(utils.FromDecoderMixin):
153
+ """A DeletedFile.
154
+
155
+ Attributes:
156
+ offset: the offset.
157
+ level: the level.
158
+ number: the number.
159
+ """
160
+ offset: int
161
+ level: int
162
+ number: int
163
+
164
+ @classmethod
165
+ def FromDecoder(
166
+ cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> DeletedFile:
167
+ """Decodes a DeletedFile from the current position of a LevelDBDecoder.
168
+
169
+ Args:
170
+ decoder: the LevelDBDecoder.
171
+ base_offset: the base offset.
172
+
173
+ Returns:
174
+ The DeletedFile instance.
175
+ """
176
+ offset, level = decoder.DecodeUint32Varint()
177
+ _, number = decoder.DecodeUint64Varint()
178
+ return cls(offset=base_offset + offset, level=level, number=number)
179
+
180
+
181
+ @dataclass
182
+ class VersionEdit(utils.FromDecoderMixin):
183
+ """A VersionEdit is recorded in a LevelDB descriptor/manifest file.
184
+
185
+ Attributes:
186
+ offset: the offset where the VersionEdit was parsed.
187
+ comparator: the comparator.
188
+ log_number: the log number.
189
+ prev_log_number: the previous log number.
190
+ next_file_number: the next file number.
191
+ last_sequence: the last sequence.
192
+ compact_pointers: the list of CompactPointers.
193
+ deleted_files: the list of DeletedFiles.
194
+ new_files: the list of NewFiles.
195
+ """
196
+ offset: int
197
+ comparator: Optional[bytes] = None
198
+ log_number: Optional[int] = None
199
+ prev_log_number: Optional[int] = None
200
+ next_file_number: Optional[int] = None
201
+ last_sequence: Optional[int] = None
202
+ compact_pointers: list[CompactPointer] = field(default_factory=list)
203
+ deleted_files: list[DeletedFile] = field(default_factory=list)
204
+ new_files: list[NewFile] = field(default_factory=list)
205
+
206
+ @classmethod
207
+ def FromDecoder(
208
+ cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> VersionEdit:
209
+ """Decodes a VersionEdit from the current position of a LevelDBDecoder.
210
+
211
+ Args:
212
+ decoder: the LevelDBDecoder.
213
+ base_offset: the base offset.
214
+
215
+ Returns:
216
+ The VersionEdit instance.
217
+
218
+ Raises:
219
+ ParserError if an invalid VersionEditTag is parsed.
220
+ """
221
+ offset, tag_byte = decoder.DecodeUint32Varint()
222
+ version_edit = cls(offset=base_offset + offset)
223
+
224
+ while tag_byte:
225
+ try:
226
+ tag = definitions.VersionEditTags(tag_byte)
227
+ except TypeError as error:
228
+ raise errors.ParserError(
229
+ f'Invalid VersionEditTag at offset {offset}') from error
230
+
231
+ if tag == definitions.VersionEditTags.COMPARATOR:
232
+ _, version_edit.comparator = decoder.DecodeLengthPrefixedSlice()
233
+ elif tag == definitions.VersionEditTags.LOG_NUMBER:
234
+ _, version_edit.log_number = decoder.DecodeUint64Varint()
235
+ elif tag == definitions.VersionEditTags.PREV_LOG_NUMBER:
236
+ _, version_edit.prev_log_number = decoder.DecodeUint64Varint()
237
+ elif tag == definitions.VersionEditTags.NEXT_FILE_NUMBER:
238
+ _, version_edit.next_file_number = decoder.DecodeUint64Varint()
239
+ elif tag == definitions.VersionEditTags.LAST_SEQUENCE:
240
+ _, version_edit.last_sequence = decoder.DecodeUint64Varint()
241
+ elif tag == definitions.VersionEditTags.COMPACT_POINTER:
242
+ compact_pointer = CompactPointer.FromDecoder(
243
+ decoder=decoder, base_offset=base_offset + offset)
244
+ version_edit.compact_pointers.append(compact_pointer)
245
+ elif tag == definitions.VersionEditTags.DELETED_FILE:
246
+ deleted_file = DeletedFile.FromDecoder(
247
+ decoder=decoder, base_offset=base_offset + offset)
248
+ version_edit.deleted_files.append(deleted_file)
249
+ elif tag == definitions.VersionEditTags.NEW_FILE:
250
+ file_metadata = NewFile.FromDecoder(
251
+ decoder=decoder, base_offset=base_offset + offset)
252
+ version_edit.new_files.append(file_metadata)
253
+
254
+ if decoder.NumRemainingBytes() == 0:
255
+ break
256
+
257
+ offset, tag_byte = decoder.DecodeUint32Varint()
258
+
259
+ return version_edit
260
+
261
+
262
+ class FileReader:
263
+ """A Descriptor file reader.
264
+
265
+ A DescriptorFileReader provides read-only sequential iteration of serialized
266
+ structures in a leveldb Descriptor file. These structures include:
267
+ * blocks (log.Block)
268
+ * records (log.PhysicalRecords)
269
+ * version edits (VersionEdit)
270
+ """
271
+ def __init__(self, filename: str):
272
+ """Initializes the Descriptor a.k.a. MANIFEST file.
273
+
274
+ Args:
275
+ filename: the Descriptor filename (e.g. MANIFEST-000001)
276
+ """
277
+ self.filename = filename
278
+
279
+
280
+ def GetBlocks(self) -> Generator[log.Block, None, None]:
281
+ """Returns an iterator of Block instances.
282
+
283
+ A Descriptor file is composed of one or more blocks.
284
+
285
+ Yields:
286
+ Block
287
+ """
288
+ with open(self.filename, 'rb') as fh:
289
+ block = log.Block.FromStream(fh)
290
+ while block:
291
+ yield block
292
+ block = log.Block.FromStream(fh)
293
+
294
+ def GetPhysicalRecords(self) -> Generator[log.PhysicalRecord, None, None]:
295
+ """Returns an iterator of PhysicalRecord instances.
296
+
297
+ A block is composed of one or more physical records.
298
+
299
+ Yields:
300
+ PhysicalRecord
301
+ """
302
+ for block in self.GetBlocks():
303
+ yield from block.GetPhysicalRecords()
304
+
305
+ def GetVersionEdits(self) -> Generator[VersionEdit, None, None]:
306
+ """Returns an iterator of VersionEdit instances.
307
+
308
+ Depending on the VersionEdit size, it can be spread across one or
309
+ more physical records.
310
+
311
+ Yields:
312
+ VersionEdit
313
+ """
314
+ buffer = bytearray()
315
+ for physical_record in self.GetPhysicalRecords():
316
+ if (physical_record.record_type ==
317
+ definitions.LogFilePhysicalRecordType.FULL):
318
+ buffer = physical_record.contents
319
+ offset = physical_record.contents_offset + physical_record.base_offset
320
+ version_edit = VersionEdit.FromBytes(buffer, base_offset=offset)
321
+ yield version_edit
322
+ buffer = bytearray()
323
+ elif (physical_record.record_type ==
324
+ definitions.LogFilePhysicalRecordType.FIRST):
325
+ offset = physical_record.contents_offset + physical_record.base_offset
326
+ buffer = bytearray(physical_record.contents)
327
+ elif (physical_record.record_type ==
328
+ definitions.LogFilePhysicalRecordType.MIDDLE):
329
+ buffer.extend(bytearray(physical_record.contents))
330
+ elif (physical_record.record_type ==
331
+ definitions.LogFilePhysicalRecordType.LAST):
332
+ buffer.extend(bytearray(physical_record.contents))
333
+ version_edit = VersionEdit.FromBytes(buffer, base_offset=offset)
334
+ yield version_edit
335
+ buffer = bytearray()
@@ -24,10 +24,11 @@ import snappy
24
24
  import zstd
25
25
 
26
26
  from dfindexeddb import utils
27
+ from dfindexeddb.leveldb import definitions
27
28
 
28
29
 
29
30
  @dataclass
30
- class LdbKeyValueRecord:
31
+ class KeyValueRecord:
31
32
  """A leveldb table key-value record.
32
33
 
33
34
  Attributes:
@@ -43,14 +44,10 @@ class LdbKeyValueRecord:
43
44
  sequence_number: int
44
45
  type: int
45
46
 
46
- PACKED_SEQUENCE_AND_TYPE_LENGTH = 8
47
- SEQUENCE_LENGTH = 7
48
- TYPE_LENGTH = 1
49
-
50
47
  @classmethod
51
48
  def FromDecoder(
52
49
  cls, decoder: utils.LevelDBDecoder, block_offset: int, shared_key: bytes
53
- ) -> Tuple[LdbKeyValueRecord, bytes]:
50
+ ) -> Tuple[KeyValueRecord, bytes]:
54
51
  """Decodes a ldb key value record.
55
52
 
56
53
  Args:
@@ -59,7 +56,7 @@ class LdbKeyValueRecord:
59
56
  shared_key: the shared key bytes.
60
57
 
61
58
  Returns:
62
- A tuple of the parsed LdbKeyValueRecord and the updated shared key bytes.
59
+ A tuple of the parsed KeyValueRecord and the updated shared key bytes.
63
60
  """
64
61
  offset, shared_bytes = decoder.DecodeUint32Varint()
65
62
  _, unshared_bytes = decoder.DecodeUint32Varint()
@@ -68,17 +65,17 @@ class LdbKeyValueRecord:
68
65
  _, value = decoder.ReadBytes(value_length)
69
66
 
70
67
  shared_key = shared_key[:shared_bytes] + key_delta
71
- key = shared_key[:-cls.PACKED_SEQUENCE_AND_TYPE_LENGTH]
68
+ key = shared_key[:-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
72
69
  sequence_number = int.from_bytes(
73
- key[-cls.SEQUENCE_LENGTH:], byteorder='little', signed=False)
74
- key_type = shared_key[-cls.PACKED_SEQUENCE_AND_TYPE_LENGTH]
70
+ key[-definitions.SEQUENCE_LENGTH:], byteorder='little', signed=False)
71
+ key_type = shared_key[-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
75
72
 
76
73
  return cls(offset + block_offset, key, value, sequence_number,
77
74
  key_type), shared_key
78
75
 
79
76
 
80
77
  @dataclass
81
- class LdbBlock:
78
+ class Block:
82
79
  """A leveldb table block.
83
80
 
84
81
  Attributes:
@@ -111,11 +108,11 @@ class LdbBlock:
111
108
  return zstd.decompress(self.data)
112
109
  return self.data
113
110
 
114
- def GetRecords(self) -> Iterable[LdbKeyValueRecord]:
111
+ def GetRecords(self) -> Iterable[KeyValueRecord]:
115
112
  """Returns an iterator over the key value records in the block.
116
113
 
117
114
  Yields:
118
- LdbKeyValueRecords
115
+ KeyValueRecords
119
116
  """
120
117
  # get underlying block content, decompressing if required
121
118
  buffer = self.GetBuffer()
@@ -135,7 +132,7 @@ class LdbBlock:
135
132
  key = b''
136
133
 
137
134
  while decoder.stream.tell() < restarts_offset:
138
- key_value_record, key = LdbKeyValueRecord.FromDecoder(
135
+ key_value_record, key = KeyValueRecord.FromDecoder(
139
136
  decoder, self.block_offset, key)
140
137
  yield key_value_record
141
138
 
@@ -145,7 +142,7 @@ class LdbBlock:
145
142
 
146
143
 
147
144
  @dataclass
148
- class BlockHandle:
145
+ class BlockHandle(utils.FromDecoderMixin):
149
146
  """A handle to a block in the ldb file.
150
147
 
151
148
  Attributes:
@@ -159,14 +156,14 @@ class BlockHandle:
159
156
 
160
157
  BLOCK_TRAILER_SIZE = 5
161
158
 
162
- def Load(self, stream: BinaryIO) -> LdbBlock:
159
+ def Load(self, stream: BinaryIO) -> Block:
163
160
  """Loads the block data.
164
161
 
165
162
  Args:
166
163
  stream: the binary stream of the ldb file.
167
164
 
168
165
  Returns:
169
- a LdbBlock.
166
+ a Block.
170
167
 
171
168
  Raises:
172
169
  ValueError: if it could not read all of the block or block footer.
@@ -180,32 +177,35 @@ class BlockHandle:
180
177
  if len(footer) != self.BLOCK_TRAILER_SIZE:
181
178
  raise ValueError('Could not read all of the block footer')
182
179
 
183
- return LdbBlock(self.offset, self.block_offset, self.length, data, footer)
180
+ return Block(self.offset, self.block_offset, self.length, data, footer)
184
181
 
185
182
  @classmethod
186
- def FromStream(cls, stream: BinaryIO, base_offset: int = 0) -> BlockHandle:
187
- """Reads a block handle from a binary stream.
183
+ def FromDecoder(
184
+ cls: BlockHandle,
185
+ decoder: utils.LevelDBDecoder,
186
+ base_offset: int = 0
187
+ ) -> BlockHandle:
188
+ """Decodes a BlockHandle from the current position of a LevelDBDecoder.
188
189
 
189
190
  Args:
190
- stream: the binary stream.
191
+ decoder: the LevelDBDecoder.
191
192
  base_offset: the base offset.
192
193
 
193
194
  Returns:
194
- A BlockHandle.
195
+ The BlockHandle instance.
195
196
  """
196
- decoder = utils.LevelDBDecoder(stream)
197
197
  offset, block_offset = decoder.DecodeUint64Varint()
198
198
  _, length = decoder.DecodeUint64Varint()
199
199
  return cls(offset + base_offset, block_offset, length)
200
200
 
201
201
 
202
- class LdbFileReader:
202
+ class FileReader:
203
203
  """A leveldb table (.ldb or .sst) file reader.
204
204
 
205
- A LdbFileReader provides read-only sequential iteration of serialized
205
+ A Ldb FileReader provides read-only sequential iteration of serialized
206
206
  structures in a leveldb ldb file. These structures include:
207
- * blocks (LdbBlock)
208
- * records (LdbKeyValueRecord)
207
+ * blocks (Block)
208
+ * records (KeyValueRecord)
209
209
  """
210
210
 
211
211
  FOOTER_SIZE = 48
@@ -234,11 +234,11 @@ class LdbFileReader:
234
234
  # self.meta_block = meta_handle.load(fh) # TODO: support meta blocks
235
235
  self.index_block = index_handle.Load(fh)
236
236
 
237
- def GetBlocks(self) -> Iterable[LdbBlock]:
238
- """Returns an iterator of LdbBlocks.
237
+ def GetBlocks(self) -> Iterable[Block]:
238
+ """Returns an iterator of Blocks.
239
239
 
240
240
  Yields:
241
- LdbBlock.
241
+ Block.
242
242
  """
243
243
  with open(self.filename, 'rb') as fh:
244
244
  for key_value_record in self.index_block.GetRecords():
@@ -247,11 +247,11 @@ class LdbFileReader:
247
247
  base_offset=key_value_record.offset)
248
248
  yield block_handle.Load(fh)
249
249
 
250
- def GetKeyValueRecords(self) -> Iterable[LdbKeyValueRecord]:
251
- """Returns an iterator of LdbKeyValueRecords.
250
+ def GetKeyValueRecords(self) -> Iterable[KeyValueRecord]:
251
+ """Returns an iterator of KeyValueRecords.
252
252
 
253
253
  Yields:
254
- LdbKeyValueRecords.
254
+ KeyValueRecords.
255
255
  """
256
256
  for block in self.GetBlocks():
257
257
  yield from block.GetRecords()
@@ -16,19 +16,11 @@
16
16
  from __future__ import annotations
17
17
 
18
18
  from dataclasses import dataclass, field
19
- from enum import IntEnum
20
19
  import io
21
20
  from typing import BinaryIO, Generator, Iterable, Optional
22
21
 
23
22
  from dfindexeddb import utils
24
-
25
-
26
- class LogFilePhysicalRecordType(IntEnum):
27
- """LevelDB log file physical record types."""
28
- FULL = 1
29
- FIRST = 2
30
- MIDDLE = 3
31
- LAST = 4
23
+ from dfindexeddb.leveldb import definitions
32
24
 
33
25
 
34
26
  @dataclass
@@ -38,28 +30,35 @@ class ParsedInternalKey:
38
30
  Attributes:
39
31
  offset: the offset of the record.
40
32
  type: the record type.
33
+ sequence_number: the sequence number (inferred from the relative location
34
+ the ParsedInternalKey in a WriteBatch.)
41
35
  key: the record key.
42
36
  value: the record value.
43
37
  """
44
38
  offset: int
45
39
  type: int
40
+ sequence_number: int
46
41
  key: bytes
47
42
  value: bytes
43
+ __type__: str = 'ParsedInternalKey'
48
44
 
49
45
  @classmethod
50
46
  def FromDecoder(
51
47
  cls,
52
48
  decoder: utils.LevelDBDecoder,
53
- base_offset: int = 0
49
+ base_offset: int = 0,
50
+ sequence_number: int = 0,
54
51
  ) -> ParsedInternalKey:
55
52
  """Decodes an internal key value record.
56
53
 
57
54
  Args:
58
55
  decoder: the leveldb decoder.
59
- base_offset: the base offset for the parsed key value record.
56
+ base_offset: the base offset for the parsed internal key value record.
57
+ sequence_number: the sequence number for the parsed internal key value
58
+ record.
60
59
 
61
60
  Returns:
62
- a ParsedInternalKey
61
+ A ParsedInternalKey
63
62
 
64
63
  Raises:
65
64
  ValueError: if there is an invalid record type encountered.
@@ -72,15 +71,20 @@ class ParsedInternalKey:
72
71
  value = b''
73
72
  else:
74
73
  raise ValueError(f'Invalid record type {record_type}')
75
- return cls(base_offset + offset, record_type, key, value)
74
+ return cls(
75
+ offset=base_offset + offset,
76
+ type=record_type,
77
+ key=key,
78
+ value=value,
79
+ sequence_number=sequence_number)
76
80
 
77
81
 
78
82
  @dataclass
79
- class WriteBatch:
83
+ class WriteBatch(utils.FromDecoderMixin):
80
84
  """A write batch from a leveldb log file.
81
85
 
82
86
  Attributes:
83
- offset: the batch offset.
87
+ offset: the write batch offset.
84
88
  sequence_number: the batch sequence number.
85
89
  count: the number of ParsedInternalKey in the batch.
86
90
  records: the ParsedInternalKey parsed from the batch.
@@ -91,46 +95,38 @@ class WriteBatch:
91
95
  records: Iterable[ParsedInternalKey] = field(repr=False)
92
96
 
93
97
  @classmethod
94
- def FromStream(
95
- cls, stream: BinaryIO, base_offset: int = 0
98
+ def FromDecoder(
99
+ cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
96
100
  ) -> WriteBatch:
97
101
  """Parses a WriteBatch from a binary stream.
98
102
 
99
103
  Args:
100
- stream: the binary stream to be parsed.
104
+ decoder: the LevelDBDecoder
101
105
  base_offset: the base offset of the Block from which the data is
102
106
  read from.
103
107
 
104
108
  Returns:
105
109
  A WriteBatch.
106
110
  """
107
- decoder = utils.LevelDBDecoder(stream)
108
- _, sequence_number = decoder.DecodeUint64()
111
+ offset, sequence_number = decoder.DecodeUint64()
109
112
  _, count = decoder.DecodeUint32()
110
113
 
111
114
  records = []
112
- for _ in range(count):
113
- record = ParsedInternalKey.FromDecoder(decoder, base_offset)
115
+ for relative_sequence_number in range(count):
116
+ record = ParsedInternalKey.FromDecoder(
117
+ decoder, base_offset + offset,
118
+ relative_sequence_number + sequence_number
119
+ )
114
120
  records.append(record)
115
- return cls(base_offset, sequence_number, count, records)
116
-
117
- @classmethod
118
- def FromBytes(cls, data: bytes, base_offset: int = 0) -> WriteBatch:
119
- """Parses a WriteBatch from bytes.
120
-
121
- Args:
122
- data: the bytes to be parsed.
123
- base_offset: the base offset of the Block from which the data is
124
- read from.
125
-
126
- Returns:
127
- A WriteBatch.
128
- """
129
- return cls.FromStream(io.BytesIO(data), base_offset)
121
+ return cls(
122
+ offset=base_offset + offset,
123
+ sequence_number=sequence_number,
124
+ count=count,
125
+ records=records)
130
126
 
131
127
 
132
128
  @dataclass
133
- class PhysicalRecord:
129
+ class PhysicalRecord(utils.FromDecoderMixin):
134
130
  """A physical record from a leveldb log file.
135
131
 
136
132
  Attributes:
@@ -145,27 +141,30 @@ class PhysicalRecord:
145
141
  offset: int
146
142
  checksum: int
147
143
  length: int
148
- record_type: LogFilePhysicalRecordType
144
+ record_type: definitions.LogFilePhysicalRecordType
149
145
  contents: bytes = field(repr=False)
150
146
  contents_offset: int
151
147
 
148
+ PHYSICAL_HEADER_LENGTH = 7
149
+
152
150
  @classmethod
153
- def FromStream(
154
- cls, stream: BinaryIO, base_offset: int = 0) -> PhysicalRecord:
155
- """Parses a PhysicalRecord from a binary stream.
151
+ def FromDecoder(
152
+ cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
153
+ ) -> PhysicalRecord:
154
+ """Decodes a PhysicalRecord from the current position of a LevelDBDecoder.
156
155
 
157
156
  Args:
158
- stream: the binary stream to be parsed.
157
+ decoder: the LevelDBDecoder.
159
158
  base_offset: the base offset of the WriteBatch from which the data is
160
159
  read from.
161
160
 
162
161
  Returns:
163
162
  A PhysicalRecord.
164
163
  """
165
- decoder = utils.StreamDecoder(stream)
166
164
  offset, checksum = decoder.DecodeUint32()
167
165
  _, length = decoder.DecodeUint16()
168
- record_type = LogFilePhysicalRecordType(decoder.DecodeUint8()[1])
166
+ record_type = definitions.LogFilePhysicalRecordType(
167
+ decoder.DecodeUint8()[1])
169
168
  contents_offset, contents = decoder.ReadBytes(length)
170
169
  return cls(
171
170
  base_offset=base_offset,
@@ -199,7 +198,7 @@ class Block:
199
198
  buffer = io.BytesIO(self.data)
200
199
  buffer_length = len(self.data)
201
200
 
202
- while buffer.tell() < buffer_length:
201
+ while buffer.tell() + PhysicalRecord.PHYSICAL_HEADER_LENGTH < buffer_length:
203
202
  yield PhysicalRecord.FromStream(buffer, base_offset=self.offset)
204
203
 
205
204
  @classmethod
@@ -219,10 +218,10 @@ class Block:
219
218
  return cls(offset, data)
220
219
 
221
220
 
222
- class LogFileReader:
221
+ class FileReader:
223
222
  """A leveldb log file reader.
224
223
 
225
- A LogFileReader provides read-only sequential iteration of serialized
224
+ A Log FileReader provides read-only sequential iteration of serialized
226
225
  structures in a leveldb logfile. These structures include:
227
226
  * blocks (Block)
228
227
  * phyiscal records (PhysicalRecord)
@@ -250,11 +249,10 @@ class LogFileReader:
250
249
  a Block
251
250
  """
252
251
  with open(self.filename, 'rb') as fh:
253
- while True:
254
- block = Block.FromStream(fh)
255
- if not block:
256
- break
252
+ block = Block.FromStream(fh)
253
+ while block:
257
254
  yield block
255
+ block = Block.FromStream(fh)
258
256
 
259
257
  def GetPhysicalRecords(self) -> Generator[PhysicalRecord, None, None]:
260
258
  """Returns an iterator of PhysicalRecord instances.
@@ -278,17 +276,21 @@ class LogFileReader:
278
276
  """
279
277
  buffer = bytearray()
280
278
  for physical_record in self.GetPhysicalRecords():
281
- if physical_record.record_type == LogFilePhysicalRecordType.FULL:
279
+ if(physical_record.record_type ==
280
+ definitions.LogFilePhysicalRecordType.FULL):
282
281
  buffer = physical_record.contents
283
282
  offset = physical_record.contents_offset + physical_record.base_offset
284
283
  yield WriteBatch.FromBytes(buffer, base_offset=offset)
285
284
  buffer = bytearray()
286
- elif physical_record.record_type == LogFilePhysicalRecordType.FIRST:
285
+ elif (physical_record.record_type
286
+ == definitions.LogFilePhysicalRecordType.FIRST):
287
287
  offset = physical_record.contents_offset + physical_record.base_offset
288
288
  buffer = bytearray(physical_record.contents)
289
- elif physical_record.record_type == LogFilePhysicalRecordType.MIDDLE:
289
+ elif (physical_record.record_type ==
290
+ definitions.LogFilePhysicalRecordType.MIDDLE):
290
291
  buffer.extend(bytearray(physical_record.contents))
291
- elif physical_record.record_type == LogFilePhysicalRecordType.LAST:
292
+ elif (physical_record.record_type ==
293
+ definitions.LogFilePhysicalRecordType.LAST):
292
294
  buffer.extend(bytearray(physical_record.contents))
293
295
  yield WriteBatch.FromBytes(buffer, base_offset=offset)
294
296
  buffer = bytearray()
@@ -229,24 +229,30 @@ class LevelDBDecoder(StreamDecoder):
229
229
  f'Odd number of bytes encountered at offset {offset}')
230
230
  return offset, buffer.decode('utf-16-be')
231
231
 
232
+ def DecodeLengthPrefixedSlice(self) -> Tuple[int, bytes]:
233
+ """Returns a tuple of the offset of decoding and the byte 'slice'."""
234
+ offset, num_bytes = self.DecodeUint32Varint()
235
+ _, blob = self.ReadBytes(num_bytes)
236
+ return offset, blob
237
+
232
238
  def DecodeBlobWithLength(self) -> Tuple[int, bytes]:
233
239
  """Returns a tuple of a the offset of decoding and the binary blob."""
234
240
  offset, num_bytes = self.DecodeUint64Varint()
235
241
  _, blob = self.ReadBytes(num_bytes)
236
242
  return offset, blob
237
243
 
238
- def DecodeStringWithLength(self) -> Tuple[int, str]:
244
+ def DecodeStringWithLength(self, encoding='utf-16-be') -> Tuple[int, str]:
239
245
  """Returns a tuple of the offset of decoding and the string value."""
240
246
  offset, length = self.DecodeUint64Varint()
241
247
  _, buffer = self.ReadBytes(length*2)
242
- return offset, buffer.decode('utf-16-be')
248
+ return offset, buffer.decode(encoding=encoding)
243
249
 
244
250
 
245
251
  T = TypeVar('T')
246
252
 
247
253
 
248
- class FromStreamMixin: # TODO: refactor leveldb parsers
249
- """A mixin for dataclasses parsing their attributes from a binary stream."""
254
+ class FromDecoderMixin:
255
+ """A mixin for parsing dataclass attributes using a LevelDBDecoder."""
250
256
 
251
257
  @classmethod
252
258
  def FromDecoder(
@@ -278,7 +284,7 @@ class FromStreamMixin: # TODO: refactor leveldb parsers
278
284
  The class instance.
279
285
  """
280
286
  decoder = LevelDBDecoder(stream)
281
- return cls.FromDecoder(decoder, base_offset)
287
+ return cls.FromDecoder(decoder=decoder, base_offset=base_offset)
282
288
 
283
289
  @classmethod
284
290
  def FromBytes(
@@ -14,7 +14,7 @@
14
14
  # limitations under the License.
15
15
  """Version information for dfIndexeddb."""
16
16
 
17
- __version__ = "20240301"
17
+ __version__ = "20240305"
18
18
 
19
19
 
20
20
  def GetVersion():
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dfindexeddb
3
- Version: 20240301
3
+ Version: 20240305
4
4
  Summary: dfindexeddb is an experimental Python tool for performing digital forensic analysis of IndexedDB and leveldb files.
5
5
  Author-email: Syd Pleno <sydp@google.com>
6
6
  Maintainer-email: dfIndexeddb Developers <dfindexeddb-dev@googlegroups.com>
@@ -20,5 +20,7 @@ dfindexeddb/indexeddb/chromium.py
20
20
  dfindexeddb/indexeddb/definitions.py
21
21
  dfindexeddb/indexeddb/v8.py
22
22
  dfindexeddb/leveldb/__init__.py
23
+ dfindexeddb/leveldb/definitions.py
24
+ dfindexeddb/leveldb/descriptor.py
23
25
  dfindexeddb/leveldb/ldb.py
24
26
  dfindexeddb/leveldb/log.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "dfindexeddb"
7
- version = "20240301"
7
+ version = "20240305"
8
8
  requires-python = ">=3.8"
9
9
  description = "dfindexeddb is an experimental Python tool for performing digital forensic analysis of IndexedDB and leveldb files."
10
10
  license = {file = "LICENSE"}
File without changes
File without changes
File without changes
File without changes
File without changes