dfindexeddb 20240225__py3-none-any.whl → 20240301__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dfindexeddb/cli.py ADDED
@@ -0,0 +1,155 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright 2024 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # https://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """A CLI tool for dfindexeddb."""
16
+ import argparse
17
+ import dataclasses
18
+ from datetime import datetime
19
+ import json
20
+ import pathlib
21
+ import sys
22
+ import traceback
23
+
24
+ from dfindexeddb.leveldb import log
25
+ from dfindexeddb.leveldb import ldb
26
+ from dfindexeddb.indexeddb import chromium
27
+ from dfindexeddb import errors
28
+ from dfindexeddb.indexeddb import v8
29
+
30
+
31
+ class Encoder(json.JSONEncoder):
32
+ """A JSON encoder class for dfindexeddb fields."""
33
+ def default(self, o):
34
+ if isinstance(o, bytes):
35
+ return o.decode(encoding='ascii', errors='backslashreplace')
36
+ if isinstance(o, datetime):
37
+ return o.isoformat()
38
+ if isinstance(o, v8.Undefined):
39
+ return "<undefined>"
40
+ if isinstance(o, v8.Null):
41
+ return "<null>"
42
+ if isinstance(o, set):
43
+ return list(o)
44
+ if isinstance(o, v8.RegExp):
45
+ return str(o)
46
+ return json.JSONEncoder.default(self, o)
47
+
48
+
49
+ def _Output(structure, to_json=False):
50
+ """Helper method to output parsed structure to stdout."""
51
+ if to_json:
52
+ structure_dict = dataclasses.asdict(structure)
53
+ print(json.dumps(structure_dict, indent=2, cls=Encoder))
54
+ else:
55
+ print(structure)
56
+
57
+
58
+ def IndexeddbCommand(args):
59
+ """The CLI for processing a log/ldb file as indexeddb."""
60
+ if args.source.name.endswith('.log'):
61
+ records = list(
62
+ log.LogFileReader(args.source).GetKeyValueRecords())
63
+ elif args.source.name.endswith('.ldb'):
64
+ records = list(
65
+ ldb.LdbFileReader(args.source).GetKeyValueRecords())
66
+ else:
67
+ print('Unsupported file type.', file=sys.stderr)
68
+ return
69
+
70
+ for record in records:
71
+ try:
72
+ record = chromium.IndexedDBRecord.FromLevelDBRecord(record)
73
+ except (errors.ParserError, errors.DecoderError) as err:
74
+ print(
75
+ (f'Error parsing blink value: {err} for {record.__class__.__name__} '
76
+ f'at offset {record.offset}'), file=sys.stderr)
77
+ print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
78
+ _Output(record, to_json=args.json)
79
+
80
+
81
+ def LdbCommand(args):
82
+ """The CLI for processing ldb files."""
83
+ ldb_file = ldb.LdbFileReader(args.source)
84
+
85
+ if args.structure_type == 'blocks':
86
+ # Prints block information.
87
+ for block in ldb_file.GetBlocks():
88
+ _Output(block, to_json=args.json)
89
+
90
+ elif args.structure_type == 'records':
91
+ # Prints key value record information.
92
+ for record in ldb_file.GetKeyValueRecords():
93
+ _Output(record, to_json=args.json)
94
+
95
+
96
+ def LogCommand(args):
97
+ """The CLI for processing log files."""
98
+ log_file = log.LogFileReader(args.source)
99
+
100
+ if args.structure_type == 'blocks':
101
+ # Prints block information.
102
+ for block in log_file.GetBlocks():
103
+ _Output(block, to_json=args.json)
104
+
105
+ elif args.structure_type == 'physical_records':
106
+ # Prints log file physical record information.
107
+ for log_file_record in log_file.GetPhysicalRecords():
108
+ _Output(log_file_record, to_json=args.json)
109
+
110
+ elif args.structure_type == 'write_batches':
111
+ # Prints log file batch information.
112
+ for batch in log_file.GetWriteBatches():
113
+ _Output(batch, to_json=args.json)
114
+
115
+ elif args.structure_type in ('parsed_internal_key', 'records'):
116
+ # Prints key value record information.
117
+ for record in log_file.GetKeyValueRecords():
118
+ _Output(record, to_json=args.json)
119
+
120
+
121
+ def App():
122
+ """The CLI app entrypoint."""
123
+ parser = argparse.ArgumentParser(
124
+ prog='dfindexeddb',
125
+ description='A cli tool for the dfindexeddb package')
126
+
127
+ parser.add_argument(
128
+ '-s', '--source', required=True, type=pathlib.Path,
129
+ help='The source leveldb file')
130
+ parser.add_argument('--json', action='store_true', help='Output as JSON')
131
+ subparsers = parser.add_subparsers(required=True)
132
+
133
+ parser_log = subparsers.add_parser('log')
134
+ parser_log.add_argument(
135
+ 'structure_type', choices=[
136
+ 'blocks',
137
+ 'physical_records',
138
+ 'write_batches',
139
+ 'parsed_internal_key',
140
+ 'records'])
141
+ parser_log.set_defaults(func=LogCommand)
142
+
143
+ parser_log = subparsers.add_parser('ldb')
144
+ parser_log.add_argument(
145
+ 'structure_type', choices=[
146
+ 'blocks',
147
+ 'records'])
148
+ parser_log.set_defaults(func=LdbCommand)
149
+
150
+ parser_log = subparsers.add_parser('indexeddb')
151
+ parser_log.set_defaults(func=IndexeddbCommand)
152
+
153
+ args = parser.parse_args()
154
+
155
+ args.func(args)
@@ -21,7 +21,10 @@ from typing import Any, BinaryIO, Optional, Tuple, Type, TypeVar, Union
21
21
 
22
22
  from dfindexeddb import errors
23
23
  from dfindexeddb import utils
24
+ from dfindexeddb.indexeddb import blink
24
25
  from dfindexeddb.indexeddb import definitions
26
+ from dfindexeddb.leveldb import ldb
27
+ from dfindexeddb.leveldb import log
25
28
 
26
29
 
27
30
  T = TypeVar('T')
@@ -401,7 +404,7 @@ class BaseIndexedDBKey:
401
404
  The decoded key.
402
405
  """
403
406
  decoder = utils.LevelDBDecoder(stream)
404
- key_prefix = KeyPrefix.FromDecoder(decoder)
407
+ key_prefix = KeyPrefix.FromDecoder(decoder, base_offset=base_offset)
405
408
  return cls.FromDecoder(
406
409
  decoder=decoder, key_prefix=key_prefix, base_offset=base_offset)
407
410
 
@@ -958,6 +961,23 @@ class ObjectStoreMetaDataKey(BaseIndexedDBKey):
958
961
  offset=base_offset + offset, key_prefix=key_prefix,
959
962
  object_store_id=object_store_id, metadata_type=metadata_type)
960
963
 
964
+ @dataclass
965
+ class ObjectStoreDataValue:
966
+ """The parsed values from an ObjectStoreDataKey.
967
+
968
+ Attributes:
969
+ unknown: an unknown integer (possibly a sequence number?).
970
+ is_wrapped: True if the value was wrapped.
971
+ blob_size: the blob size, only valid if wrapped.
972
+ blob_offset: the blob offset, only valid if wrapped.
973
+ value: the blink serialized value, only valid if not wrapped.
974
+ """
975
+ unkown: int
976
+ is_wrapped: bool
977
+ blob_size: Optional[int]
978
+ blob_offset: Optional[int]
979
+ value: Any
980
+
961
981
 
962
982
  @dataclass
963
983
  class ObjectStoreDataKey(BaseIndexedDBKey):
@@ -969,11 +989,33 @@ class ObjectStoreDataKey(BaseIndexedDBKey):
969
989
  encoded_user_key: IDBKey
970
990
 
971
991
  def DecodeValue(
972
- self, decoder: utils.LevelDBDecoder) -> Tuple[int, bytes]:
992
+ self, decoder: utils.LevelDBDecoder) -> ObjectStoreDataValue:
973
993
  """Decodes the object store data value."""
974
- _, version = decoder.DecodeVarint()
975
- _, encoded_string = decoder.ReadBytes()
976
- return version, encoded_string
994
+ _, unknown_integer = decoder.DecodeVarint()
995
+
996
+ _, wrapped_header_bytes = decoder.PeekBytes(3)
997
+ if len(wrapped_header_bytes) != 3:
998
+ raise errors.DecoderError('Insufficient bytes')
999
+
1000
+ if (wrapped_header_bytes[0] == definitions.BlinkSerializationTag.VERSION and
1001
+ wrapped_header_bytes[1] == 0x11 and
1002
+ wrapped_header_bytes[2] == 0x01):
1003
+ _, blob_size = decoder.DecodeVarint()
1004
+ _, blob_offset = decoder.DecodeVarint()
1005
+ return ObjectStoreDataValue(
1006
+ unkown=unknown_integer,
1007
+ is_wrapped=True,
1008
+ blob_size=blob_size,
1009
+ blob_offset=blob_offset,
1010
+ value=None)
1011
+ _, blink_bytes = decoder.ReadBytes()
1012
+ blink_value = blink.V8ScriptValueDecoder.FromBytes(blink_bytes)
1013
+ return ObjectStoreDataValue(
1014
+ unkown=unknown_integer,
1015
+ is_wrapped=False,
1016
+ blob_size=None,
1017
+ blob_offset=None,
1018
+ value=blink_value)
977
1019
 
978
1020
  @classmethod
979
1021
  def FromDecoder(
@@ -985,7 +1027,7 @@ class ObjectStoreDataKey(BaseIndexedDBKey):
985
1027
  definitions.KeyPrefixType.OBJECT_STORE_DATA):
986
1028
  raise errors.ParserError('Invalid KeyPrefix for ObjectStoreDataKey')
987
1029
  offset = decoder.stream.tell()
988
- encoded_user_key = IDBKey.FromDecoder(decoder, base_offset)
1030
+ encoded_user_key = IDBKey.FromDecoder(decoder, offset)
989
1031
  return cls(
990
1032
  offset=base_offset + offset,
991
1033
  key_prefix=key_prefix, encoded_user_key=encoded_user_key)
@@ -1012,7 +1054,7 @@ class ExistsEntryKey(BaseIndexedDBKey):
1012
1054
  ) -> ExistsEntryKey:
1013
1055
  """Decodes the exists entry key."""
1014
1056
  offset = decoder.stream.tell()
1015
- encoded_user_key = IDBKey.FromDecoder(decoder, base_offset)
1057
+ encoded_user_key = IDBKey.FromDecoder(decoder, offset)
1016
1058
 
1017
1059
  return cls(
1018
1060
  offset=base_offset + offset,
@@ -1043,7 +1085,7 @@ class IndexDataKey(BaseIndexedDBKey):
1043
1085
  base_offset: int = 0) -> IndexDataKey:
1044
1086
  """Decodes the index data key."""
1045
1087
  offset = decoder.stream.tell()
1046
- encoded_user_key = IDBKey.FromDecoder(decoder, base_offset)
1088
+ encoded_user_key = IDBKey.FromDecoder(decoder, offset)
1047
1089
 
1048
1090
  if decoder.NumRemainingBytes() > 0:
1049
1091
  _, sequence_number = decoder.DecodeVarint()
@@ -1051,7 +1093,9 @@ class IndexDataKey(BaseIndexedDBKey):
1051
1093
  sequence_number = None
1052
1094
 
1053
1095
  if decoder.NumRemainingBytes() > 0:
1054
- encoded_primary_key = IDBKey.FromDecoder(decoder, base_offset)
1096
+ encoded_primary_key_offset = decoder.stream.tell()
1097
+ encoded_primary_key = IDBKey.FromDecoder(
1098
+ decoder, encoded_primary_key_offset)
1055
1099
  else:
1056
1100
  encoded_primary_key = None
1057
1101
 
@@ -1084,7 +1128,7 @@ class BlobEntryKey(BaseIndexedDBKey):
1084
1128
  ) -> BlobEntryKey:
1085
1129
  """Decodes the blob entry key."""
1086
1130
  offset = decoder.stream.tell()
1087
- user_key = IDBKey.FromDecoder(decoder, base_offset)
1131
+ user_key = IDBKey.FromDecoder(decoder, offset)
1088
1132
 
1089
1133
  return cls(key_prefix=key_prefix, user_key=user_key,
1090
1134
  offset=base_offset + offset)
@@ -1281,3 +1325,36 @@ class ObjectStore:
1281
1325
  id: int
1282
1326
  name: str
1283
1327
  records: list = field(default_factory=list, repr=False)
1328
+
1329
+
1330
+ @dataclass
1331
+ class IndexedDBRecord:
1332
+ """An IndexedDB Record.
1333
+
1334
+ Attributes:
1335
+ offset: the offset of the record.
1336
+ key: the key of the record.
1337
+ value: the value of the record.
1338
+ sequence_number: if available, the sequence number of the record.
1339
+ type: the type of the record.
1340
+ """
1341
+ offset: int
1342
+ key: Any
1343
+ value: Any
1344
+ sequence_number: int
1345
+ type: int
1346
+
1347
+ @classmethod
1348
+ def FromLevelDBRecord(
1349
+ cls, record: Union[ldb.LdbKeyValueRecord, log.ParsedInternalKey]
1350
+ ) -> IndexedDBRecord:
1351
+ """Returns an IndexedDBRecord from a ParsedInternalKey."""
1352
+ idb_key = IndexedDbKey.FromBytes(record.key, base_offset=record.offset)
1353
+ idb_value = idb_key.ParseValue(record.value)
1354
+ return cls(
1355
+ offset=record.offset,
1356
+ key=idb_key,
1357
+ value=idb_value,
1358
+ sequence_number=record.sequence_number if hasattr(
1359
+ record, 'sequence_number') else None,
1360
+ type=record.type)
dfindexeddb/version.py CHANGED
@@ -14,7 +14,7 @@
14
14
  # limitations under the License.
15
15
  """Version information for dfIndexeddb."""
16
16
 
17
- __version__ = "20240225"
17
+ __version__ = "20240301"
18
18
 
19
19
 
20
20
  def GetVersion():
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dfindexeddb
3
- Version: 20240225
3
+ Version: 20240301
4
4
  Summary: dfindexeddb is an experimental Python tool for performing digital forensic analysis of IndexedDB and leveldb files.
5
5
  Author-email: Syd Pleno <sydp@google.com>
6
6
  Maintainer-email: dfIndexeddb Developers <dfindexeddb-dev@googlegroups.com>
@@ -236,6 +236,12 @@ include:
236
236
  * emails and contact information from an e-mail application,
237
237
  * images and metadata from a photo gallery application
238
238
 
239
+ ## Installation
240
+
241
+ ```
242
+ $ pip install dfindexeddb
243
+ ```
244
+
239
245
  ## Installation from source
240
246
 
241
247
  ### Linux
@@ -256,23 +262,57 @@ include:
256
262
  $ pip install .
257
263
  ```
258
264
 
259
- ## Tools
265
+ ## Usage
266
+
267
+ A CLI tool is available after installation:
260
268
 
261
- This repository contains a number of scripts which demonstrate how one can use
262
- this library. To run these tools, please install the `click` python package.
269
+ ```
270
+ $ dfindexeddb -h
271
+ usage: dfindexeddb [-h] -s SOURCE [--json] {log,ldb,indexeddb} ...
263
272
 
264
- * `tools/indexeddb_dump.py` - parses structures from an IndexedDB and prints
265
- them to standard output.
266
- - Optionally, you can also install the `leveldb` python package if you
267
- would prefer to use a native leveldb library instead of the leveldb parser in
268
- this repository.
269
- * `tools/ldb_dump.py` - parses structures from a LevelDB .ldb file and prints
270
- them to standard output.
271
- * `tools/log_dump.py` - parses structures from a LevelDB .log file and prints
272
- them to standard output.
273
+ A cli tool for the dfindexeddb package
273
274
 
275
+ positional arguments:
276
+ {log,ldb,indexeddb}
277
+
278
+ options:
279
+ -s SOURCE, --source SOURCE
280
+ The source leveldb file
281
+ --json Output as JSON
282
+ ```
283
+
284
+ To parse a LevelDB .log file:
285
+
286
+ ```
287
+ $ dfindexeddb -s <SOURCE> log -h
288
+ usage: dfindexeddb log [-h] {blocks,physical_records,write_batches,parsed_internal_key,records}
274
289
 
290
+ positional arguments:
291
+ {blocks,physical_records,write_batches,parsed_internal_key,records}
275
292
 
293
+ options:
294
+ -h, --help show this help message and exit
276
295
  ```
277
- $ pip install click leveldb
296
+
297
+ To parse a LevelDB .ldb file:
298
+
299
+ ```
300
+ $ dfindexeddb -s <SOURCE> ldb -h
301
+ usage: dfindexeddb ldb [-h] {blocks,records}
302
+
303
+ positional arguments:
304
+ {blocks,records}
305
+
306
+ options:
307
+ -h, --help show this help message and exit
308
+ ```
309
+
310
+ To parse a LevelDB .ldb or .log file as IndexedDB:
311
+
312
+ ```
313
+ $ dfindexeddb -s <SOURCE> indexeddb -h
314
+ usage: dfindexeddb indexeddb [-h]
315
+
316
+ options:
317
+ -h, --help show this help message and exit
278
318
  ```
@@ -1,18 +1,20 @@
1
1
  dfindexeddb/__init__.py,sha256=KPYL9__l8od6_OyDfGRTgaJ6iy_fqIgZ-dS2S-e3Rac,599
2
+ dfindexeddb/cli.py,sha256=QsFPC_B_PW6E42kBPUO0Ny3oRspYJsXlDCQDHmKuDHQ,4866
2
3
  dfindexeddb/errors.py,sha256=PNpwyf_lrPc4TE77oAakX3mu5D_YcP3f80wq8Y1LkvY,749
3
4
  dfindexeddb/utils.py,sha256=g9iiGRX4DB1wFBSBHa6b9lg7JzAdE0SN0DrdB2aS_Co,10091
4
- dfindexeddb/version.py,sha256=PiFacIFljjKL8Q1cSjdiapC9lqsgpPS4GDZDrPlBy4o,750
5
+ dfindexeddb/version.py,sha256=d2fDd3V2U_CkGi3PawWMR77qU0DzHEC3_bk7Ywh3xlM,750
5
6
  dfindexeddb/indexeddb/__init__.py,sha256=kExXSVBCTKCD5BZJkdMfUMqGksH-DMJxP2_lI0gq-BE,575
6
7
  dfindexeddb/indexeddb/blink.py,sha256=MblpYfv-ByG7n_fjYKu2EUhpfVJdUveoW4oSAg5T4tY,3534
7
- dfindexeddb/indexeddb/chromium.py,sha256=N1aCoJETNqLER8T_C4bmfrxiNr1csJhUJ4-14qrl0nc,42291
8
+ dfindexeddb/indexeddb/chromium.py,sha256=oHHyGuy7BanRWRoJu4zqXo5QvxO2j_qLk2KMHBBwZvs,44692
8
9
  dfindexeddb/indexeddb/definitions.py,sha256=yline3y3gmZx6s-dwjpPDNs5HO4zT6KZqPWQfEsHDoM,7413
9
10
  dfindexeddb/indexeddb/v8.py,sha256=ldqpc9T1kG7BOdjnHjQ5hNO9OCXZ3_Zd6vRSpC-NrEA,21893
10
11
  dfindexeddb/leveldb/__init__.py,sha256=KPYL9__l8od6_OyDfGRTgaJ6iy_fqIgZ-dS2S-e3Rac,599
11
12
  dfindexeddb/leveldb/ldb.py,sha256=uShhXjQe4Sz3dn54IXbGxRtE6D8RNpu1NDy5Zb0P9LA,7927
12
13
  dfindexeddb/leveldb/log.py,sha256=cyMfjDz5a6gfGb5NonxC1Y72OmHYBWzYK8UMVzP_umw,8532
13
- dfindexeddb-20240225.dist-info/AUTHORS,sha256=QbvjbAom57fpEkekkCVFUj0B9KUMGraR510aUMBC-PE,286
14
- dfindexeddb-20240225.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
15
- dfindexeddb-20240225.dist-info/METADATA,sha256=ZRr-aOFy5SlgZ2nTgeaYpLCjauP7qzW5XSSsFCyeFOQ,15474
16
- dfindexeddb-20240225.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
17
- dfindexeddb-20240225.dist-info/top_level.txt,sha256=X9OTaub1c8S_JJ7g-f8JdkhhdiZ4x1j4eni1hdUCwE4,12
18
- dfindexeddb-20240225.dist-info/RECORD,,
14
+ dfindexeddb-20240301.dist-info/AUTHORS,sha256=QbvjbAom57fpEkekkCVFUj0B9KUMGraR510aUMBC-PE,286
15
+ dfindexeddb-20240301.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
16
+ dfindexeddb-20240301.dist-info/METADATA,sha256=AYggbgjQv3dNPwO-f3e7tK3EPvKsnlNTKM2nPTbAQYg,15933
17
+ dfindexeddb-20240301.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
18
+ dfindexeddb-20240301.dist-info/entry_points.txt,sha256=UsfPLLhTiVAAtZ8Rq3ZR7JNFGMuHqJy-tugGWonQWtc,52
19
+ dfindexeddb-20240301.dist-info/top_level.txt,sha256=X9OTaub1c8S_JJ7g-f8JdkhhdiZ4x1j4eni1hdUCwE4,12
20
+ dfindexeddb-20240301.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ dfindexeddb = dfindexeddb.cli:App