dfindexeddb 20240224__py3-none-any.whl → 20240229__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dfindexeddb/cli.py ADDED
@@ -0,0 +1,147 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright 2024 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # https://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """A CLI tool for dfindexeddb."""
16
+ import argparse
17
+ import dataclasses
18
+ from datetime import datetime
19
+ import json
20
+ import pathlib
21
+ import sys
22
+
23
+ from dfindexeddb.leveldb import log
24
+ from dfindexeddb.leveldb import ldb
25
+ from dfindexeddb.indexeddb import chromium
26
+ from dfindexeddb.indexeddb import v8
27
+
28
+
29
+ class Encoder(json.JSONEncoder):
30
+ """A JSON encoder class for dfindexeddb fields."""
31
+ def default(self, o):
32
+ if isinstance(o, bytes):
33
+ return o.decode(encoding='ascii', errors='backslashreplace')
34
+ if isinstance(o, datetime):
35
+ return o.isoformat()
36
+ if isinstance(o, v8.Undefined):
37
+ return "<undefined>"
38
+ if isinstance(o, v8.Null):
39
+ return "<null>"
40
+ if isinstance(o, set):
41
+ return list(o)
42
+ if isinstance(o, v8.RegExp):
43
+ return str(o)
44
+ return json.JSONEncoder.default(self, o)
45
+
46
+
47
+ def _Output(structure, to_json=False):
48
+ """Helper method to output parsed structure to stdout."""
49
+ if to_json:
50
+ structure_dict = dataclasses.asdict(structure)
51
+ print(json.dumps(structure_dict, indent=2, cls=Encoder))
52
+ else:
53
+ print(structure)
54
+
55
+
56
+ def IndexeddbCommand(args):
57
+ """The CLI for processing a log/ldb file as indexeddb."""
58
+ if args.source.name.endswith('.log'):
59
+ records = list(
60
+ log.LogFileReader(args.source).GetKeyValueRecords())
61
+ elif args.source.name.endswith('.ldb'):
62
+ records = list(
63
+ ldb.LdbFileReader(args.source).GetKeyValueRecords())
64
+ else:
65
+ print('Unsupported file type.', file=sys.stderr)
66
+ return
67
+
68
+ for record in records:
69
+ record = chromium.IndexedDBRecord.FromLevelDBRecord(record)
70
+ _Output(record, to_json=args.json)
71
+
72
+
73
+ def LdbCommand(args):
74
+ """The CLI for processing ldb files."""
75
+ ldb_file = ldb.LdbFileReader(args.source)
76
+
77
+ if args.structure_type == 'blocks':
78
+ # Prints block information.
79
+ for block in ldb_file.GetBlocks():
80
+ _Output(block, to_json=args.json)
81
+
82
+ elif args.structure_type == 'records':
83
+ # Prints key value record information.
84
+ for record in ldb_file.GetKeyValueRecords():
85
+ _Output(record, to_json=args.json)
86
+
87
+
88
+ def LogCommand(args):
89
+ """The CLI for processing log files."""
90
+ log_file = log.LogFileReader(args.source)
91
+
92
+ if args.structure_type == 'blocks':
93
+ # Prints block information.
94
+ for block in log_file.GetBlocks():
95
+ _Output(block, to_json=args.json)
96
+
97
+ elif args.structure_type == 'physical_records':
98
+ # Prints log file physical record information.
99
+ for log_file_record in log_file.GetPhysicalRecords():
100
+ _Output(log_file_record, to_json=args.json)
101
+
102
+ elif args.structure_type == 'write_batches':
103
+ # Prints log file batch information.
104
+ for batch in log_file.GetWriteBatches():
105
+ _Output(batch, to_json=args.json)
106
+
107
+ elif args.structure_type in ('parsed_internal_key', 'records'):
108
+ # Prints key value record information.
109
+ for record in log_file.GetKeyValueRecords():
110
+ _Output(record, to_json=args.json)
111
+
112
+
113
+ def App():
114
+ """The CLI app entrypoint."""
115
+ parser = argparse.ArgumentParser(
116
+ prog='dfindexeddb',
117
+ description='A cli tool for the dfindexeddb package')
118
+
119
+ parser.add_argument(
120
+ '-s', '--source', required=True, type=pathlib.Path,
121
+ help='The source leveldb file')
122
+ parser.add_argument('--json', action='store_true', help='Output as JSON')
123
+ subparsers = parser.add_subparsers(required=True)
124
+
125
+ parser_log = subparsers.add_parser('log')
126
+ parser_log.add_argument(
127
+ 'structure_type', choices=[
128
+ 'blocks',
129
+ 'physical_records',
130
+ 'write_batches',
131
+ 'parsed_internal_key',
132
+ 'records'])
133
+ parser_log.set_defaults(func=LogCommand)
134
+
135
+ parser_log = subparsers.add_parser('ldb')
136
+ parser_log.add_argument(
137
+ 'structure_type', choices=[
138
+ 'blocks',
139
+ 'records'])
140
+ parser_log.set_defaults(func=LdbCommand)
141
+
142
+ parser_log = subparsers.add_parser('indexeddb')
143
+ parser_log.set_defaults(func=IndexeddbCommand)
144
+
145
+ args = parser.parse_args()
146
+
147
+ args.func(args)
@@ -17,11 +17,16 @@ from __future__ import annotations
17
17
  from dataclasses import dataclass, field
18
18
  from datetime import datetime
19
19
  import io
20
+ import sys
21
+ import traceback
20
22
  from typing import Any, BinaryIO, Optional, Tuple, Type, TypeVar, Union
21
23
 
22
24
  from dfindexeddb import errors
23
25
  from dfindexeddb import utils
26
+ from dfindexeddb.indexeddb import blink
24
27
  from dfindexeddb.indexeddb import definitions
28
+ from dfindexeddb.leveldb import ldb
29
+ from dfindexeddb.leveldb import log
25
30
 
26
31
 
27
32
  T = TypeVar('T')
@@ -1281,3 +1286,52 @@ class ObjectStore:
1281
1286
  id: int
1282
1287
  name: str
1283
1288
  records: list = field(default_factory=list, repr=False)
1289
+
1290
+
1291
+ @dataclass
1292
+ class IndexedDBRecord:
1293
+ """An IndexedDB Record.
1294
+
1295
+ Attributes:
1296
+ offset: the offset of the record.
1297
+ key: the key of the record.
1298
+ value: the value of the record.
1299
+ sequence_number: if available, the sequence number of the record.
1300
+ type: the type of the record.
1301
+ """
1302
+ offset: int
1303
+ key: Any
1304
+ value: Any
1305
+ sequence_number: int
1306
+ type: int
1307
+
1308
+ @classmethod
1309
+ def FromLevelDBRecord(
1310
+ cls, record: Union[ldb.LdbKeyValueRecord, log.ParsedInternalKey]
1311
+ ) -> IndexedDBRecord:
1312
+ """Returns an IndexedDBRecord from a ParsedInternalKey."""
1313
+ idb_key = IndexedDbKey.FromBytes(
1314
+ record.key, base_offset=record.offset)
1315
+
1316
+ idb_value = idb_key.ParseValue(record.value)
1317
+ if isinstance(idb_key, ObjectStoreDataKey):
1318
+
1319
+ # The ObjectStoreDataKey value should decode as a 2-tuple comprising
1320
+ # a version integer and a SSV as a raw byte string
1321
+ if (isinstance(idb_value, tuple) and len(idb_value) == 2 and
1322
+ isinstance(idb_value[1], bytes)):
1323
+
1324
+ try:
1325
+ blink_value = blink.V8ScriptValueDecoder.FromBytes(idb_value[1])
1326
+ idb_value = idb_value[0], blink_value
1327
+ except (errors.ParserError, errors.DecoderError) as err:
1328
+ print(f'Error parsing blink value: {err}', file=sys.stderr)
1329
+ print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
1330
+
1331
+ return cls(
1332
+ offset=record.offset,
1333
+ key=idb_key,
1334
+ value=idb_value,
1335
+ sequence_number=record.sequence_number if hasattr(
1336
+ record, 'sequence_number') else None,
1337
+ type=record.type)
@@ -21,6 +21,7 @@ import os
21
21
  from typing import BinaryIO, Iterable, Tuple
22
22
 
23
23
  import snappy
24
+ import zstd
24
25
 
25
26
  from dfindexeddb import utils
26
27
 
@@ -90,17 +91,24 @@ class LdbBlock:
90
91
  data: bytes = field(repr=False)
91
92
  footer: bytes # 5 bytes = 1 byte compressed flag + 4 bytes checksum.
92
93
 
93
- COMPRESSED = 1
94
+ SNAPPY_COMPRESSED = 1
95
+ ZSTD_COMPRESSED = 2
94
96
  RESTART_ENTRY_LENGTH = 4
95
97
 
96
- def IsCompressed(self) -> bool:
97
- """Returns true if the block is compressed."""
98
- return self.footer[0] == self.COMPRESSED
98
+ def IsSnappyCompressed(self) -> bool:
99
+ """Returns true if the block is snappy compressed."""
100
+ return self.footer[0] == self.SNAPPY_COMPRESSED
101
+
102
+ def IsZstdCompressed(self) -> bool:
103
+ """Returns true if the block is zstd compressed."""
104
+ return self.footer[0] == self.ZSTD_COMPRESSED
99
105
 
100
106
  def GetBuffer(self) -> bytes:
101
107
  """Returns the block buffer, decompressing if required."""
102
- if self.IsCompressed():
108
+ if self.IsSnappyCompressed():
103
109
  return snappy.decompress(self.data)
110
+ if self.IsZstdCompressed():
111
+ return zstd.decompress(self.data)
104
112
  return self.data
105
113
 
106
114
  def GetRecords(self) -> Iterable[LdbKeyValueRecord]:
@@ -246,8 +254,7 @@ class LdbFileReader:
246
254
  LdbKeyValueRecords.
247
255
  """
248
256
  for block in self.GetBlocks():
249
- for record in block.GetRecords():
250
- yield record
257
+ yield from block.GetRecords()
251
258
 
252
259
  def RangeIter(self) -> Iterable[Tuple[bytes, bytes]]: #pylint: disable=C0103
253
260
  """Returns an iterator of key-value pairs.
@@ -20,7 +20,6 @@ from enum import IntEnum
20
20
  import io
21
21
  from typing import BinaryIO, Generator, Iterable, Optional
22
22
 
23
- from dfindexeddb import errors
24
23
  from dfindexeddb import utils
25
24
 
26
25
 
@@ -198,11 +197,10 @@ class Block:
198
197
  LogFileRecord
199
198
  """
200
199
  buffer = io.BytesIO(self.data)
201
- while True:
202
- try:
203
- yield PhysicalRecord.FromStream(buffer, base_offset=self.offset)
204
- except errors.DecoderError:
205
- return
200
+ buffer_length = len(self.data)
201
+
202
+ while buffer.tell() < buffer_length:
203
+ yield PhysicalRecord.FromStream(buffer, base_offset=self.offset)
206
204
 
207
205
  @classmethod
208
206
  def FromStream(cls, stream: BinaryIO) -> Optional[Block]:
@@ -212,7 +210,8 @@ class Block:
212
210
  stream: the binary stream to be parsed.
213
211
 
214
212
  Returns:
215
- the Block or None if there is no data to read from the stream."""
213
+ the Block or None if there is no data to read from the stream.
214
+ """
216
215
  offset = stream.tell()
217
216
  data = stream.read(cls.BLOCK_SIZE) # reads full and partial blocks
218
217
  if not data:
@@ -266,8 +265,7 @@ class LogFileReader:
266
265
  PhysicalRecord
267
266
  """
268
267
  for block in self.GetBlocks():
269
- for physical_record in block.GetPhysicalRecords():
270
- yield physical_record
268
+ yield from block.GetPhysicalRecords()
271
269
 
272
270
  def GetWriteBatches(self) -> Generator[WriteBatch, None, None]:
273
271
  """Returns an iterator of WriteBatch instances.
@@ -304,5 +302,4 @@ class LogFileReader:
304
302
  KeyValueRecord
305
303
  """
306
304
  for batch in self.GetWriteBatches():
307
- for record in batch.records:
308
- yield record
305
+ yield from batch.records
dfindexeddb/utils.py CHANGED
@@ -68,10 +68,10 @@ class StreamDecoder:
68
68
  offset = self.stream.tell()
69
69
  buffer = self.stream.read(count)
70
70
  if count == -1 and not buffer:
71
- raise errors.DecoderError('No bytes available')
71
+ raise errors.DecoderError(f'No bytes available at offset {offset}')
72
72
  if count != -1 and len(buffer) != count:
73
73
  raise errors.DecoderError(
74
- f'Read {len(buffer)}, wanted {count}, at stream offset {offset}')
74
+ f'Read {len(buffer)} bytes, but wanted {count} at offset {offset}')
75
75
  return offset, buffer
76
76
 
77
77
  def PeekBytes(self, count: int) -> Tuple[int, bytes]:
@@ -225,7 +225,8 @@ class LevelDBDecoder(StreamDecoder):
225
225
  offset = self.stream.tell()
226
226
  buffer = self.stream.read()
227
227
  if len(buffer) % 2:
228
- raise errors.DecoderError('Odd number of bytes encountered')
228
+ raise errors.DecoderError(
229
+ f'Odd number of bytes encountered at offset {offset}')
229
230
  return offset, buffer.decode('utf-16-be')
230
231
 
231
232
  def DecodeBlobWithLength(self) -> Tuple[int, bytes]:
dfindexeddb/version.py CHANGED
@@ -14,7 +14,7 @@
14
14
  # limitations under the License.
15
15
  """Version information for dfIndexeddb."""
16
16
 
17
- __version__ = "20240224"
17
+ __version__ = "20240229"
18
18
 
19
19
 
20
20
  def GetVersion():
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dfindexeddb
3
- Version: 20240224
3
+ Version: 20240229
4
4
  Summary: dfindexeddb is an experimental Python tool for performing digital forensic analysis of IndexedDB and leveldb files.
5
5
  Author-email: Syd Pleno <sydp@google.com>
6
6
  Maintainer-email: dfIndexeddb Developers <dfindexeddb-dev@googlegroups.com>
@@ -218,6 +218,7 @@ Description-Content-Type: text/markdown
218
218
  License-File: LICENSE
219
219
  License-File: AUTHORS
220
220
  Requires-Dist: python-snappy ==0.6.1
221
+ Requires-Dist: zstd ==1.5.5.1
221
222
 
222
223
  # dfIndexeddb
223
224
 
@@ -235,6 +236,12 @@ include:
235
236
  * emails and contact information from an e-mail application,
236
237
  * images and metadata from a photo gallery application
237
238
 
239
+ ## Installation
240
+
241
+ ```
242
+ $ pip install dfindexeddb
243
+ ```
244
+
238
245
  ## Installation from source
239
246
 
240
247
  ### Linux
@@ -255,23 +262,57 @@ include:
255
262
  $ pip install .
256
263
  ```
257
264
 
258
- ## Tools
265
+ ## Usage
266
+
267
+ A CLI tool is available after installation:
259
268
 
260
- This repository contains a number of scripts which demonstrate how one can use
261
- this library. To run these tools, please install the `click` python package.
269
+ ```
270
+ $ dfindexeddb -h
271
+ usage: dfindexeddb [-h] -s SOURCE [--json] {log,ldb,indexeddb} ...
262
272
 
263
- * `tools/indexeddb_dump.py` - parses structures from an IndexedDB and prints
264
- them to standard output.
265
- - Optionally, you can also install the `leveldb` python package if you
266
- would prefer to use a native leveldb library instead of the leveldb parser in
267
- this repository.
268
- * `tools/ldb_dump.py` - parses structures from a LevelDB .ldb file and prints
269
- them to standard output.
270
- * `tools/log_dump.py` - parses structures from a LevelDB .log file and prints
271
- them to standard output.
273
+ A cli tool for the dfindexeddb package
272
274
 
275
+ positional arguments:
276
+ {log,ldb,indexeddb}
277
+
278
+ options:
279
+ -s SOURCE, --source SOURCE
280
+ The source leveldb file
281
+ --json Output as JSON
282
+ ```
283
+
284
+ To parse a LevelDB .log file:
285
+
286
+ ```
287
+ $ dfindexeddb -s <SOURCE> log -h
288
+ usage: dfindexeddb log [-h] {blocks,physical_records,write_batches,parsed_internal_key,records}
273
289
 
290
+ positional arguments:
291
+ {blocks,physical_records,write_batches,parsed_internal_key,records}
274
292
 
293
+ options:
294
+ -h, --help show this help message and exit
275
295
  ```
276
- $ pip install click leveldb
296
+
297
+ To parse a LevelDB .ldb file:
298
+
299
+ ```
300
+ $ dfindexeddb -s <SOURCE> ldb -h
301
+ usage: dfindexeddb ldb [-h] {blocks,records}
302
+
303
+ positional arguments:
304
+ {blocks,records}
305
+
306
+ options:
307
+ -h, --help show this help message and exit
308
+ ```
309
+
310
+ To parse a LevelDB .ldb or .log file as IndexedDB:
311
+
312
+ ```
313
+ $ dfindexeddb -s <SOURCE> indexeddb -h
314
+ usage: dfindexeddb indexeddb [-h]
315
+
316
+ options:
317
+ -h, --help show this help message and exit
277
318
  ```
@@ -0,0 +1,20 @@
1
+ dfindexeddb/__init__.py,sha256=KPYL9__l8od6_OyDfGRTgaJ6iy_fqIgZ-dS2S-e3Rac,599
2
+ dfindexeddb/cli.py,sha256=LD2-BwmXC3qFcJwgP09QDFxU3HOGtsg0Kbtyx-hAqzA,4525
3
+ dfindexeddb/errors.py,sha256=PNpwyf_lrPc4TE77oAakX3mu5D_YcP3f80wq8Y1LkvY,749
4
+ dfindexeddb/utils.py,sha256=g9iiGRX4DB1wFBSBHa6b9lg7JzAdE0SN0DrdB2aS_Co,10091
5
+ dfindexeddb/version.py,sha256=XwHKYiT0CeLWo90AaJfOYHD1mEEgIlUUSB6ot_rU8wc,750
6
+ dfindexeddb/indexeddb/__init__.py,sha256=kExXSVBCTKCD5BZJkdMfUMqGksH-DMJxP2_lI0gq-BE,575
7
+ dfindexeddb/indexeddb/blink.py,sha256=MblpYfv-ByG7n_fjYKu2EUhpfVJdUveoW4oSAg5T4tY,3534
8
+ dfindexeddb/indexeddb/chromium.py,sha256=Anw6QIU7PrsxpUW7qxrUXRb5vBRcxozhv3mHov7Ti8k,43984
9
+ dfindexeddb/indexeddb/definitions.py,sha256=yline3y3gmZx6s-dwjpPDNs5HO4zT6KZqPWQfEsHDoM,7413
10
+ dfindexeddb/indexeddb/v8.py,sha256=ldqpc9T1kG7BOdjnHjQ5hNO9OCXZ3_Zd6vRSpC-NrEA,21893
11
+ dfindexeddb/leveldb/__init__.py,sha256=KPYL9__l8od6_OyDfGRTgaJ6iy_fqIgZ-dS2S-e3Rac,599
12
+ dfindexeddb/leveldb/ldb.py,sha256=uShhXjQe4Sz3dn54IXbGxRtE6D8RNpu1NDy5Zb0P9LA,7927
13
+ dfindexeddb/leveldb/log.py,sha256=cyMfjDz5a6gfGb5NonxC1Y72OmHYBWzYK8UMVzP_umw,8532
14
+ dfindexeddb-20240229.dist-info/AUTHORS,sha256=QbvjbAom57fpEkekkCVFUj0B9KUMGraR510aUMBC-PE,286
15
+ dfindexeddb-20240229.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
16
+ dfindexeddb-20240229.dist-info/METADATA,sha256=ILzTLaRO96ALuHL8d72tt3a3shliEGzGHZC54n5wPpc,15933
17
+ dfindexeddb-20240229.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
18
+ dfindexeddb-20240229.dist-info/entry_points.txt,sha256=UsfPLLhTiVAAtZ8Rq3ZR7JNFGMuHqJy-tugGWonQWtc,52
19
+ dfindexeddb-20240229.dist-info/top_level.txt,sha256=X9OTaub1c8S_JJ7g-f8JdkhhdiZ4x1j4eni1hdUCwE4,12
20
+ dfindexeddb-20240229.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ dfindexeddb = dfindexeddb.cli:App
@@ -1,18 +0,0 @@
1
- dfindexeddb/__init__.py,sha256=KPYL9__l8od6_OyDfGRTgaJ6iy_fqIgZ-dS2S-e3Rac,599
2
- dfindexeddb/errors.py,sha256=PNpwyf_lrPc4TE77oAakX3mu5D_YcP3f80wq8Y1LkvY,749
3
- dfindexeddb/utils.py,sha256=TG27xY8AjwNs_736ZaPHMERy1HiLv0E123PhPQ-JKKQ,10038
4
- dfindexeddb/version.py,sha256=-nNzbnRjBpknBvUjJZ8_2lFtuUdQv-yv4r8l78vkDRw,750
5
- dfindexeddb/indexeddb/__init__.py,sha256=kExXSVBCTKCD5BZJkdMfUMqGksH-DMJxP2_lI0gq-BE,575
6
- dfindexeddb/indexeddb/blink.py,sha256=MblpYfv-ByG7n_fjYKu2EUhpfVJdUveoW4oSAg5T4tY,3534
7
- dfindexeddb/indexeddb/chromium.py,sha256=N1aCoJETNqLER8T_C4bmfrxiNr1csJhUJ4-14qrl0nc,42291
8
- dfindexeddb/indexeddb/definitions.py,sha256=yline3y3gmZx6s-dwjpPDNs5HO4zT6KZqPWQfEsHDoM,7413
9
- dfindexeddb/indexeddb/v8.py,sha256=ldqpc9T1kG7BOdjnHjQ5hNO9OCXZ3_Zd6vRSpC-NrEA,21893
10
- dfindexeddb/leveldb/__init__.py,sha256=KPYL9__l8od6_OyDfGRTgaJ6iy_fqIgZ-dS2S-e3Rac,599
11
- dfindexeddb/leveldb/ldb.py,sha256=AE50TW7zxSUBbi9cmcUs63m_JKnrD24OQSCoamIJ4eU,7668
12
- dfindexeddb/leveldb/log.py,sha256=nNiycsC9uEmvBatjhsN37cWz0_3A6WBo5byv28-cZdg,8627
13
- dfindexeddb-20240224.dist-info/AUTHORS,sha256=QbvjbAom57fpEkekkCVFUj0B9KUMGraR510aUMBC-PE,286
14
- dfindexeddb-20240224.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
15
- dfindexeddb-20240224.dist-info/METADATA,sha256=YoQCcGKgTzYH2j9hr2YjZSy5q6NC55IG2AAS-0Ht89Q,15444
16
- dfindexeddb-20240224.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
17
- dfindexeddb-20240224.dist-info/top_level.txt,sha256=X9OTaub1c8S_JJ7g-f8JdkhhdiZ4x1j4eni1hdUCwE4,12
18
- dfindexeddb-20240224.dist-info/RECORD,,