PyPI - dfindexeddb - Versions diffs - 20240224__py3-none-any.whl → 20240229__py3-none-any.whl - Mend

dfindexeddb 20240224py3-none-any.whl → 20240229py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

dfindexeddb/cli.py +147 -0
dfindexeddb/indexeddb/chromium.py +54 -0
dfindexeddb/leveldb/ldb.py +14 -7
dfindexeddb/leveldb/log.py +8 -11
dfindexeddb/utils.py +4 -3
dfindexeddb/version.py +1 -1
{dfindexeddb-20240224.dist-info → dfindexeddb-20240229.dist-info}/METADATA +55 -14
dfindexeddb-20240229.dist-info/RECORD +20 -0
dfindexeddb-20240229.dist-info/entry_points.txt +2 -0
dfindexeddb-20240224.dist-info/RECORD +0 -18
{dfindexeddb-20240224.dist-info → dfindexeddb-20240229.dist-info}/AUTHORS +0 -0
{dfindexeddb-20240224.dist-info → dfindexeddb-20240229.dist-info}/LICENSE +0 -0
{dfindexeddb-20240224.dist-info → dfindexeddb-20240229.dist-info}/WHEEL +0 -0
{dfindexeddb-20240224.dist-info → dfindexeddb-20240229.dist-info}/top_level.txt +0 -0

dfindexeddb/cli.py ADDED Viewed

@@ -0,0 +1,147 @@
+# -*- coding: utf-8 -*-
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""A CLI tool for dfindexeddb."""
+import argparse
+import dataclasses
+from datetime import datetime
+import json
+import pathlib
+import sys
+from dfindexeddb.leveldb import log
+from dfindexeddb.leveldb import ldb
+from dfindexeddb.indexeddb import chromium
+from dfindexeddb.indexeddb import v8
+class Encoder(json.JSONEncoder):
+  """A JSON encoder class for dfindexeddb fields."""
+  def default(self, o):
+    if isinstance(o, bytes):
+      return o.decode(encoding='ascii', errors='backslashreplace')
+    if isinstance(o, datetime):
+      return o.isoformat()
+    if isinstance(o, v8.Undefined):
+      return "<undefined>"
+    if isinstance(o, v8.Null):
+      return "<null>"
+    if isinstance(o, set):
+      return list(o)
+    if isinstance(o, v8.RegExp):
+      return str(o)
+    return json.JSONEncoder.default(self, o)
+def _Output(structure, to_json=False):
+  """Helper method to output parsed structure to stdout."""
+  if to_json:
+    structure_dict = dataclasses.asdict(structure)
+    print(json.dumps(structure_dict, indent=2, cls=Encoder))
+  else:
+    print(structure)
+def IndexeddbCommand(args):
+  """The CLI for processing a log/ldb file as indexeddb."""
+  if args.source.name.endswith('.log'):
+    records = list(
+        log.LogFileReader(args.source).GetKeyValueRecords())
+  elif args.source.name.endswith('.ldb'):
+    records = list(
+        ldb.LdbFileReader(args.source).GetKeyValueRecords())
+  else:
+    print('Unsupported file type.', file=sys.stderr)
+    return
+  for record in records:
+    record = chromium.IndexedDBRecord.FromLevelDBRecord(record)
+    _Output(record, to_json=args.json)
+def LdbCommand(args):
+  """The CLI for processing ldb files."""
+  ldb_file = ldb.LdbFileReader(args.source)
+  if args.structure_type == 'blocks':
+    # Prints block information.
+    for block in ldb_file.GetBlocks():
+      _Output(block, to_json=args.json)
+  elif args.structure_type == 'records':
+    # Prints key value record information.
+    for record in ldb_file.GetKeyValueRecords():
+      _Output(record, to_json=args.json)
+def LogCommand(args):
+  """The CLI for processing log files."""
+  log_file = log.LogFileReader(args.source)
+  if args.structure_type == 'blocks':
+    # Prints block information.
+    for block in log_file.GetBlocks():
+      _Output(block, to_json=args.json)
+  elif args.structure_type == 'physical_records':
+    # Prints log file physical record information.
+    for log_file_record in log_file.GetPhysicalRecords():
+      _Output(log_file_record, to_json=args.json)
+  elif args.structure_type == 'write_batches':
+    # Prints log file batch information.
+    for batch in log_file.GetWriteBatches():
+      _Output(batch, to_json=args.json)
+  elif args.structure_type in ('parsed_internal_key', 'records'):
+    # Prints key value record information.
+    for record in log_file.GetKeyValueRecords():
+      _Output(record, to_json=args.json)
+def App():
+  """The CLI app entrypoint."""
+  parser = argparse.ArgumentParser(
+      prog='dfindexeddb',
+      description='A cli tool for the dfindexeddb package')
+  parser.add_argument(
+      '-s', '--source', required=True, type=pathlib.Path,
+      help='The source leveldb file')
+  parser.add_argument('--json', action='store_true', help='Output as JSON')
+  subparsers = parser.add_subparsers(required=True)
+  parser_log = subparsers.add_parser('log')
+  parser_log.add_argument(
+      'structure_type', choices=[
+          'blocks',
+          'physical_records',
+          'write_batches',
+          'parsed_internal_key',
+          'records'])
+  parser_log.set_defaults(func=LogCommand)
+  parser_log = subparsers.add_parser('ldb')
+  parser_log.add_argument(
+      'structure_type', choices=[
+          'blocks',
+          'records'])
+  parser_log.set_defaults(func=LdbCommand)
+  parser_log = subparsers.add_parser('indexeddb')
+  parser_log.set_defaults(func=IndexeddbCommand)
+  args = parser.parse_args()
+  args.func(args)

dfindexeddb/indexeddb/chromium.py CHANGED Viewed

@@ -17,11 +17,16 @@ from __future__ import annotations
 from dataclasses import dataclass, field
 from datetime import datetime
 import io
+import sys
+import traceback
 from typing import Any, BinaryIO, Optional, Tuple, Type, TypeVar, Union
 from dfindexeddb import errors
 from dfindexeddb import utils
+from dfindexeddb.indexeddb import blink
 from dfindexeddb.indexeddb import definitions
+from dfindexeddb.leveldb import ldb
+from dfindexeddb.leveldb import log
 T = TypeVar('T')
@@ -1281,3 +1286,52 @@ class ObjectStore:
   id: int
   name: str
   records: list = field(default_factory=list, repr=False)
+@dataclass
+class IndexedDBRecord:
+  """An IndexedDB Record.
+  Attributes:
+    offset: the offset of the record.
+    key: the key of the record.
+    value: the value of the record.
+    sequence_number: if available, the sequence number of the record.
+    type: the type of the record.
+  """
+  offset: int
+  key: Any
+  value: Any
+  sequence_number: int
+  type: int
+  @classmethod
+  def FromLevelDBRecord(
+      cls, record: Union[ldb.LdbKeyValueRecord, log.ParsedInternalKey]
+  ) -> IndexedDBRecord:
+    """Returns an IndexedDBRecord from a ParsedInternalKey."""
+    idb_key = IndexedDbKey.FromBytes(
+        record.key, base_offset=record.offset)
+    idb_value = idb_key.ParseValue(record.value)
+    if isinstance(idb_key, ObjectStoreDataKey):
+      # The ObjectStoreDataKey value should decode as a 2-tuple comprising
+      # a version integer and a SSV as a raw byte string
+      if (isinstance(idb_value, tuple) and len(idb_value) == 2 and
+          isinstance(idb_value[1], bytes)):
+        try:
+          blink_value = blink.V8ScriptValueDecoder.FromBytes(idb_value[1])
+          idb_value = idb_value[0], blink_value
+        except (errors.ParserError, errors.DecoderError) as err:
+          print(f'Error parsing blink value: {err}', file=sys.stderr)
+          print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
+    return cls(
+      offset=record.offset,
+      key=idb_key,
+      value=idb_value,
+      sequence_number=record.sequence_number if hasattr(
+          record, 'sequence_number') else None,
+      type=record.type)

dfindexeddb/leveldb/ldb.py CHANGED Viewed

@@ -21,6 +21,7 @@ import os
 from typing import BinaryIO, Iterable, Tuple
 import snappy
+import zstd
 from dfindexeddb import utils
@@ -90,17 +91,24 @@ class LdbBlock:
   data: bytes = field(repr=False)
   footer: bytes  # 5 bytes = 1 byte compressed flag + 4 bytes checksum.
-  COMPRESSED = 1
+  SNAPPY_COMPRESSED = 1
+  ZSTD_COMPRESSED = 2
   RESTART_ENTRY_LENGTH = 4
-  def IsCompressed(self) -> bool:
-    """Returns true if the block is compressed."""
-    return self.footer[0] == self.COMPRESSED
+  def IsSnappyCompressed(self) -> bool:
+    """Returns true if the block is snappy compressed."""
+    return self.footer[0] == self.SNAPPY_COMPRESSED
+  def IsZstdCompressed(self) -> bool:
+    """Returns true if the block is zstd compressed."""
+    return self.footer[0] == self.ZSTD_COMPRESSED
   def GetBuffer(self) -> bytes:
     """Returns the block buffer, decompressing if required."""
-    if self.IsCompressed():
+    if self.IsSnappyCompressed():
       return snappy.decompress(self.data)
+    if self.IsZstdCompressed():
+      return zstd.decompress(self.data)
     return self.data
   def GetRecords(self) -> Iterable[LdbKeyValueRecord]:
@@ -246,8 +254,7 @@ class LdbFileReader:
       LdbKeyValueRecords.
     """
     for block in self.GetBlocks():
-      for record in block.GetRecords():
-        yield record
+      yield from block.GetRecords()
   def RangeIter(self) -> Iterable[Tuple[bytes, bytes]]:  #pylint: disable=C0103
     """Returns an iterator of key-value pairs.

dfindexeddb/leveldb/log.py CHANGED Viewed

@@ -20,7 +20,6 @@ from enum import IntEnum
 import io
 from typing import BinaryIO, Generator, Iterable, Optional
-from dfindexeddb import errors
 from dfindexeddb import utils
@@ -198,11 +197,10 @@ class Block:
       LogFileRecord
     """
     buffer = io.BytesIO(self.data)
-    while True:
-      try:
-        yield PhysicalRecord.FromStream(buffer, base_offset=self.offset)
-      except errors.DecoderError:
-        return
+    buffer_length = len(self.data)
+    while buffer.tell() < buffer_length:
+      yield PhysicalRecord.FromStream(buffer, base_offset=self.offset)
   @classmethod
   def FromStream(cls, stream: BinaryIO) -> Optional[Block]:
@@ -212,7 +210,8 @@ class Block:
       stream: the binary stream to be parsed.
     Returns:
-      the Block or None if there is no data to read from the stream."""
+      the Block or None if there is no data to read from the stream.
+    """
     offset = stream.tell()
     data = stream.read(cls.BLOCK_SIZE)  # reads full and partial blocks
     if not data:
@@ -266,8 +265,7 @@ class LogFileReader:
       PhysicalRecord
     """
     for block in self.GetBlocks():
-      for physical_record in block.GetPhysicalRecords():
-        yield physical_record
+      yield from block.GetPhysicalRecords()
   def GetWriteBatches(self) -> Generator[WriteBatch, None, None]:
     """Returns an iterator of WriteBatch instances.
@@ -304,5 +302,4 @@ class LogFileReader:
       KeyValueRecord
     """
     for batch in self.GetWriteBatches():
-      for record in batch.records:
-        yield record
+      yield from batch.records

dfindexeddb/utils.py CHANGED Viewed

@@ -68,10 +68,10 @@ class StreamDecoder:
     offset = self.stream.tell()
     buffer = self.stream.read(count)
     if count == -1 and not buffer:
-      raise errors.DecoderError('No bytes available')
+      raise errors.DecoderError(f'No bytes available at offset {offset}')
     if count != -1 and len(buffer) != count:
       raise errors.DecoderError(
-          f'Read {len(buffer)}, wanted {count}, at stream offset {offset}')
+          f'Read {len(buffer)} bytes, but wanted {count} at offset {offset}')
     return offset, buffer
   def PeekBytes(self, count: int) -> Tuple[int, bytes]:
@@ -225,7 +225,8 @@ class LevelDBDecoder(StreamDecoder):
     offset = self.stream.tell()
     buffer = self.stream.read()
     if len(buffer) % 2:
-      raise errors.DecoderError('Odd number of bytes encountered')
+      raise errors.DecoderError(
+          f'Odd number of bytes encountered at offset {offset}')
     return offset, buffer.decode('utf-16-be')
   def DecodeBlobWithLength(self) -> Tuple[int, bytes]:

dfindexeddb/version.py CHANGED Viewed

@@ -14,7 +14,7 @@
 # limitations under the License.
 """Version information for dfIndexeddb."""
-__version__ = "20240224"
+__version__ = "20240229"
 def GetVersion():

{dfindexeddb-20240224.dist-info → dfindexeddb-20240229.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dfindexeddb
-Version: 20240224
+Version: 20240229
 Summary: dfindexeddb is an experimental Python tool for performing digital forensic analysis of IndexedDB and leveldb files.
 Author-email: Syd Pleno <sydp@google.com>
 Maintainer-email: dfIndexeddb Developers <dfindexeddb-dev@googlegroups.com>
@@ -218,6 +218,7 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 License-File: AUTHORS
 Requires-Dist: python-snappy ==0.6.1
+Requires-Dist: zstd ==1.5.5.1
 # dfIndexeddb
@@ -235,6 +236,12 @@ include:
 * emails and contact information from an e-mail application,
 * images and metadata from a photo gallery application
+## Installation
+```
+$ pip install dfindexeddb
+```
 ## Installation from source
 ### Linux
@@ -255,23 +262,57 @@ include:
     $ pip install .
 ```
-## Tools
+## Usage
+A CLI tool is available after installation:
-This repository contains a number of scripts which demonstrate how one can use
-this library.  To run these tools, please install the `click` python package.
+```
+$ dfindexeddb -h
+usage: dfindexeddb [-h] -s SOURCE [--json] {log,ldb,indexeddb} ...
-* `tools/indexeddb_dump.py` - parses structures from an IndexedDB and prints
-them to standard output.
-  - Optionally, you can also install the `leveldb` python package if you
-  would prefer to use a native leveldb library instead of the leveldb parser in
-  this repository.
-* `tools/ldb_dump.py` - parses structures from a LevelDB .ldb file and prints
-them to standard output.
-* `tools/log_dump.py` - parses structures from a LevelDB .log file and prints
-them to standard output.
+A cli tool for the dfindexeddb package
+positional arguments:
+  {log,ldb,indexeddb}
+options:
+  -s SOURCE, --source SOURCE
+                        The source leveldb file
+  --json                Output as JSON
+```
+To parse a LevelDB .log file:
+```
+$ dfindexeddb -s <SOURCE> log -h
+usage: dfindexeddb log [-h] {blocks,physical_records,write_batches,parsed_internal_key,records}
+positional arguments:
+  {blocks,physical_records,write_batches,parsed_internal_key,records}
+options:
+  -h, --help            show this help message and exit
 ```
-    $ pip install click leveldb
+To parse a LevelDB .ldb file:
+```
+$ dfindexeddb -s <SOURCE> ldb -h
+usage: dfindexeddb ldb [-h] {blocks,records}
+positional arguments:
+  {blocks,records}
+options:
+  -h, --help        show this help message and exit
+```
+To parse a LevelDB .ldb or .log file as IndexedDB:
+```
+$ dfindexeddb -s <SOURCE> indexeddb -h
+usage: dfindexeddb indexeddb [-h]
+options:
+  -h, --help  show this help message and exit
 ```

dfindexeddb-20240229.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,20 @@
+dfindexeddb/__init__.py,sha256=KPYL9__l8od6_OyDfGRTgaJ6iy_fqIgZ-dS2S-e3Rac,599
+dfindexeddb/cli.py,sha256=LD2-BwmXC3qFcJwgP09QDFxU3HOGtsg0Kbtyx-hAqzA,4525
+dfindexeddb/errors.py,sha256=PNpwyf_lrPc4TE77oAakX3mu5D_YcP3f80wq8Y1LkvY,749
+dfindexeddb/utils.py,sha256=g9iiGRX4DB1wFBSBHa6b9lg7JzAdE0SN0DrdB2aS_Co,10091
+dfindexeddb/version.py,sha256=XwHKYiT0CeLWo90AaJfOYHD1mEEgIlUUSB6ot_rU8wc,750
+dfindexeddb/indexeddb/__init__.py,sha256=kExXSVBCTKCD5BZJkdMfUMqGksH-DMJxP2_lI0gq-BE,575
+dfindexeddb/indexeddb/blink.py,sha256=MblpYfv-ByG7n_fjYKu2EUhpfVJdUveoW4oSAg5T4tY,3534
+dfindexeddb/indexeddb/chromium.py,sha256=Anw6QIU7PrsxpUW7qxrUXRb5vBRcxozhv3mHov7Ti8k,43984
+dfindexeddb/indexeddb/definitions.py,sha256=yline3y3gmZx6s-dwjpPDNs5HO4zT6KZqPWQfEsHDoM,7413
+dfindexeddb/indexeddb/v8.py,sha256=ldqpc9T1kG7BOdjnHjQ5hNO9OCXZ3_Zd6vRSpC-NrEA,21893
+dfindexeddb/leveldb/__init__.py,sha256=KPYL9__l8od6_OyDfGRTgaJ6iy_fqIgZ-dS2S-e3Rac,599
+dfindexeddb/leveldb/ldb.py,sha256=uShhXjQe4Sz3dn54IXbGxRtE6D8RNpu1NDy5Zb0P9LA,7927
+dfindexeddb/leveldb/log.py,sha256=cyMfjDz5a6gfGb5NonxC1Y72OmHYBWzYK8UMVzP_umw,8532
+dfindexeddb-20240229.dist-info/AUTHORS,sha256=QbvjbAom57fpEkekkCVFUj0B9KUMGraR510aUMBC-PE,286
+dfindexeddb-20240229.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+dfindexeddb-20240229.dist-info/METADATA,sha256=ILzTLaRO96ALuHL8d72tt3a3shliEGzGHZC54n5wPpc,15933
+dfindexeddb-20240229.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+dfindexeddb-20240229.dist-info/entry_points.txt,sha256=UsfPLLhTiVAAtZ8Rq3ZR7JNFGMuHqJy-tugGWonQWtc,52
+dfindexeddb-20240229.dist-info/top_level.txt,sha256=X9OTaub1c8S_JJ7g-f8JdkhhdiZ4x1j4eni1hdUCwE4,12
+dfindexeddb-20240229.dist-info/RECORD,,

dfindexeddb-20240229.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ dfindexeddb = dfindexeddb.cli:App

dfindexeddb-20240224.dist-info/RECORD DELETED Viewed

@@ -1,18 +0,0 @@
-dfindexeddb/__init__.py,sha256=KPYL9__l8od6_OyDfGRTgaJ6iy_fqIgZ-dS2S-e3Rac,599
-dfindexeddb/errors.py,sha256=PNpwyf_lrPc4TE77oAakX3mu5D_YcP3f80wq8Y1LkvY,749
-dfindexeddb/utils.py,sha256=TG27xY8AjwNs_736ZaPHMERy1HiLv0E123PhPQ-JKKQ,10038
-dfindexeddb/version.py,sha256=-nNzbnRjBpknBvUjJZ8_2lFtuUdQv-yv4r8l78vkDRw,750
-dfindexeddb/indexeddb/__init__.py,sha256=kExXSVBCTKCD5BZJkdMfUMqGksH-DMJxP2_lI0gq-BE,575
-dfindexeddb/indexeddb/blink.py,sha256=MblpYfv-ByG7n_fjYKu2EUhpfVJdUveoW4oSAg5T4tY,3534
-dfindexeddb/indexeddb/chromium.py,sha256=N1aCoJETNqLER8T_C4bmfrxiNr1csJhUJ4-14qrl0nc,42291
-dfindexeddb/indexeddb/definitions.py,sha256=yline3y3gmZx6s-dwjpPDNs5HO4zT6KZqPWQfEsHDoM,7413
-dfindexeddb/indexeddb/v8.py,sha256=ldqpc9T1kG7BOdjnHjQ5hNO9OCXZ3_Zd6vRSpC-NrEA,21893
-dfindexeddb/leveldb/__init__.py,sha256=KPYL9__l8od6_OyDfGRTgaJ6iy_fqIgZ-dS2S-e3Rac,599
-dfindexeddb/leveldb/ldb.py,sha256=AE50TW7zxSUBbi9cmcUs63m_JKnrD24OQSCoamIJ4eU,7668
-dfindexeddb/leveldb/log.py,sha256=nNiycsC9uEmvBatjhsN37cWz0_3A6WBo5byv28-cZdg,8627
-dfindexeddb-20240224.dist-info/AUTHORS,sha256=QbvjbAom57fpEkekkCVFUj0B9KUMGraR510aUMBC-PE,286
-dfindexeddb-20240224.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-dfindexeddb-20240224.dist-info/METADATA,sha256=YoQCcGKgTzYH2j9hr2YjZSy5q6NC55IG2AAS-0Ht89Q,15444
-dfindexeddb-20240224.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-dfindexeddb-20240224.dist-info/top_level.txt,sha256=X9OTaub1c8S_JJ7g-f8JdkhhdiZ4x1j4eni1hdUCwE4,12
-dfindexeddb-20240224.dist-info/RECORD,,

{dfindexeddb-20240224.dist-info → dfindexeddb-20240229.dist-info}/AUTHORS RENAMED Viewed

File without changes

{dfindexeddb-20240224.dist-info → dfindexeddb-20240229.dist-info}/LICENSE RENAMED Viewed

File without changes

{dfindexeddb-20240224.dist-info → dfindexeddb-20240229.dist-info}/WHEEL RENAMED Viewed

File without changes

{dfindexeddb-20240224.dist-info → dfindexeddb-20240229.dist-info}/top_level.txt RENAMED Viewed

File without changes

dfindexeddb 20240224__py3-none-any.whl → 20240229__py3-none-any.whl

dfindexeddb 20240224py3-none-any.whl → 20240229py3-none-any.whl