PyPI - dfindexeddb - Versions diffs - 20240417__py3-none-any.whl → 20240519__py3-none-any.whl - Mend

dfindexeddb 20240417py3-none-any.whl → 20240519py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

dfindexeddb/indexeddb/chromium/blink.py +5 -0
dfindexeddb/indexeddb/chromium/record.py +90 -7
dfindexeddb/indexeddb/cli.py +82 -80
dfindexeddb/indexeddb/safari/definitions.py +123 -0
dfindexeddb/indexeddb/safari/record.py +238 -0
dfindexeddb/indexeddb/safari/webkit.py +701 -0
dfindexeddb/leveldb/cli.py +70 -11
dfindexeddb/leveldb/log.py +9 -3
dfindexeddb/leveldb/plugins/__init__.py +17 -0
dfindexeddb/leveldb/plugins/chrome_notifications.py +135 -0
dfindexeddb/leveldb/plugins/interface.py +36 -0
dfindexeddb/leveldb/plugins/manager.py +75 -0
dfindexeddb/leveldb/plugins/notification_database_data_pb2.py +38 -0
dfindexeddb/leveldb/record.py +212 -53
dfindexeddb/utils.py +34 -0
dfindexeddb/version.py +1 -1
{dfindexeddb-20240417.dist-info → dfindexeddb-20240519.dist-info}/METADATA +74 -80
dfindexeddb-20240519.dist-info/RECORD +37 -0
dfindexeddb-20240417.dist-info/RECORD +0 -29
{dfindexeddb-20240417.dist-info → dfindexeddb-20240519.dist-info}/AUTHORS +0 -0
{dfindexeddb-20240417.dist-info → dfindexeddb-20240519.dist-info}/LICENSE +0 -0
{dfindexeddb-20240417.dist-info → dfindexeddb-20240519.dist-info}/WHEEL +0 -0
{dfindexeddb-20240417.dist-info → dfindexeddb-20240519.dist-info}/entry_points.txt +0 -0
{dfindexeddb-20240417.dist-info → dfindexeddb-20240519.dist-info}/top_level.txt +0 -0

dfindexeddb/indexeddb/chromium/blink.py CHANGED Viewed

@@ -780,6 +780,9 @@ class V8ScriptValueDecoder:
     Returns:
       A parsed CryptoKey.
+    Raises:
+      ParserError: if there is an unexpected CryptoKeySubTag.
     """
     _, raw_key_byte = self.deserializer.decoder.DecodeUint8()
     key_byte = definitions.CryptoKeySubTag(raw_key_byte)
@@ -795,6 +798,8 @@ class V8ScriptValueDecoder:
       key_type, algorithm_parameters = self._ReadED25519Key()
     elif key_byte == definitions.CryptoKeySubTag.NO_PARAMS_KEY:
       key_type, algorithm_parameters = self.ReadNoParamsKey()
+    else:
+      raise errors.ParserError('Unexpected CryptoKeySubTag')
     _, raw_usages = self.deserializer.decoder.DecodeUint32Varint()
     usages = definitions.CryptoKeyUsage(raw_usages)

dfindexeddb/indexeddb/chromium/record.py CHANGED Viewed

@@ -17,7 +17,11 @@ from __future__ import annotations
 from dataclasses import dataclass, field
 from datetime import datetime
 import io
-from typing import Any, BinaryIO, Optional, Tuple, Type, TypeVar, Union
+import pathlib
+import sys
+import traceback
+from typing import Any, BinaryIO, Generator, Optional, Tuple, Type, TypeVar, \
+    Union
 from dfindexeddb import errors
 from dfindexeddb.indexeddb.chromium import blink
@@ -456,7 +460,7 @@ class MaxDatabaseIdKey(BaseIndexedDBKey):
       cls, decoder: utils.LevelDBDecoder, key_prefix: KeyPrefix,
       base_offset: int = 0
   ) -> MaxDatabaseIdKey:
-    """Decodes the maximum databse key."""
+    """Decodes the maximum database key."""
     offset, key_type = decoder.DecodeUint8()
     if key_type != definitions.GlobalMetadataKeyType.MAX_DATABASE_ID:
       raise errors.ParserError('Not a MaxDatabaseIdKey')
@@ -1271,14 +1275,17 @@ class ExternalObjectEntry(utils.FromDecoderMixin):
         filename = None
         last_modified = None
       token = None
-    elif (object_type ==
-        definitions.ExternalObjectType.FILE_SYSTEM_ACCESS_HANDLE):
+    else:
+      if (object_type ==
+          definitions.ExternalObjectType.FILE_SYSTEM_ACCESS_HANDLE):
+        _, token = decoder.DecodeBlobWithLength()
+      else:
+        token = None
       blob_number = None
       mime_type = None
       size = None
       filename = None
       last_modified = None
-      _, token = decoder.DecodeBlobWithLength()
     return cls(offset=base_offset + offset, object_type=object_type,
         blob_number=blob_number, mime_type=mime_type, size=size,
@@ -1331,12 +1338,14 @@ class IndexedDBRecord:
   """An IndexedDB Record.
   Attributes:
+    path: the source file path
     offset: the offset of the record.
     key: the key of the record.
     value: the value of the record.
     sequence_number: if available, the sequence number of the record.
     type: the type of the record.
-    level: the leveldb level, None indicates the record came from a log file.
+    level: the leveldb level, if applicable, None can indicate the record
+        originated from a log file or the level could not be determined.
     recovered: True if the record is a recovered record.
   """
   path: str
@@ -1350,7 +1359,8 @@ class IndexedDBRecord:
   @classmethod
   def FromLevelDBRecord(
-      cls, db_record: record.LevelDBRecord
+      cls,
+      db_record: record.LevelDBRecord
   ) -> IndexedDBRecord:
     """Returns an IndexedDBRecord from a ParsedInternalKey."""
     idb_key = IndexedDbKey.FromBytes(
@@ -1366,3 +1376,76 @@ class IndexedDBRecord:
         type=db_record.record.record_type,
         level=db_record.level,
         recovered=db_record.recovered)
+  @classmethod
+  def FromFile(
+      cls,
+      file_path: pathlib.Path
+  ) -> Generator[IndexedDBRecord, None, None]:
+    """Yields IndexedDBRecords from a file."""
+    for db_record in record.LevelDBRecord.FromFile(file_path):
+      try:
+        yield cls.FromLevelDBRecord(db_record)
+      except(
+          errors.ParserError,
+          errors.DecoderError,
+          NotImplementedError) as err:
+        print((
+            'Error parsing Indexeddb record: '
+            f'{err} at offset {db_record.record.offset} in '
+            f'{db_record.path}'),
+            file=sys.stderr)
+        print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
+class FolderReader:
+  """A IndexedDB folder reader for Chrome/Chromium.
+  Attributes:
+    foldername (str): the source LevelDB folder.
+  """
+  def __init__(self, foldername: pathlib.Path):
+    """Initializes the FileReader.
+    Args:
+      foldername: the source IndexedDB folder.
+    Raises:
+      ValueError: if foldername is None or not a directory.
+    """
+    if not foldername or not foldername.is_dir():
+      raise ValueError(f'{foldername} is None or not a directory')
+    self.foldername = foldername
+  def GetRecords(
+      self,
+      use_manifest: bool = False,
+      use_sequence_number: bool = False
+  ) -> Generator[IndexedDBRecord, None, None]:
+    """Yield LevelDBRecords.
+    Args:
+      use_manifest: True to use the current manifest in the folder as a means to
+          find the active file set.
+      use_sequence_number: True to use the sequence number to determine the
+    Yields:
+      IndexedDBRecord.
+    """
+    leveldb_folder_reader = record.FolderReader(self.foldername)
+    for leveldb_record in leveldb_folder_reader.GetRecords(
+        use_manifest=use_manifest,
+        use_sequence_number=use_sequence_number):
+      try:
+        yield IndexedDBRecord.FromLevelDBRecord(
+            leveldb_record)
+      except(
+          errors.ParserError,
+          errors.DecoderError,
+          NotImplementedError) as err:
+        print((
+            'Error parsing Indexeddb record: '
+            f'{err} at offset {leveldb_record.record.offset} in '
+            f'{leveldb_record.path}'),
+            file=sys.stderr)
+        print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)

dfindexeddb/indexeddb/cli.py CHANGED Viewed

@@ -19,14 +19,13 @@ import enum
 from datetime import datetime
 import json
 import pathlib
-import sys
-import traceback
-from dfindexeddb import errors
+from dfindexeddb import utils
 from dfindexeddb import version
-from dfindexeddb.leveldb import record as leveldb_record
+from dfindexeddb.indexeddb.chromium import blink
 from dfindexeddb.indexeddb.chromium import record as chromium_record
 from dfindexeddb.indexeddb.chromium import v8
+from dfindexeddb.indexeddb.safari import record as safari_record
 _VALID_PRINTABLE_CHARACTERS = (
@@ -38,7 +37,7 @@ class Encoder(json.JSONEncoder):
   """A JSON encoder class for dfindexeddb fields."""
   def default(self, o):
     if dataclasses.is_dataclass(o):
-      o_dict = dataclasses.asdict(o)
+      o_dict = utils.asdict(o)
       return o_dict
     if isinstance(o, bytes):
       out = []
@@ -73,98 +72,92 @@ def _Output(structure, output):
     print(structure)
+def BlinkCommand(args):
+  """The CLI for processing a file as a blink value."""
+  with open(args.source, 'rb') as fd:
+    buffer = fd.read()
+    blink_value = blink.V8ScriptValueDecoder.FromBytes(buffer)
+    _Output(blink_value, output=args.output)
 def DbCommand(args):
-  """The CLI for processing a directory as indexeddb."""
-  if args.use_manifest:
-    for db_record in leveldb_record.LevelDBRecord.FromManifest(args.source):
-      record = db_record.record
-      try:
-        idb_record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
-            db_record)
-      except(
-          errors.ParserError,
-          errors.DecoderError,
-          NotImplementedError) as err:
-        print((
-            f'Error parsing Indexeddb record {record.__class__.__name__}: {err}'
-            f' at offset {record.offset} in {db_record.path}'), file=sys.stderr)
-        print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
-        continue
-      _Output(idb_record, output=args.output)
-  else:
-    for db_record in leveldb_record.LevelDBRecord.FromDir(args.source):
-      record = db_record.record
-      try:
-        idb_record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
-            db_record)
-      except(
-          errors.ParserError,
-          errors.DecoderError,
-          NotImplementedError) as err:
-        print((
-            f'Error parsing Indexeddb record {record.__class__.__name__}: {err}'
-            f' at offset {record.offset} in {db_record.path}'), file=sys.stderr)
-        print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
-        continue
-      _Output(idb_record, output=args.output)
+  """The CLI for processing a directory as IndexedDB."""
+  if args.format in ('chrome', 'chromium'):
+    for db_record in chromium_record.FolderReader(
+        args.source).GetRecords(
+            use_manifest=args.use_manifest,
+            use_sequence_number=args.use_sequence_number):
+      _Output(db_record, output=args.output)
+  elif args.format == 'safari':
+    for db_record in safari_record.FileReader(args.source).Records():
+      _Output(db_record, output=args.output)
 def LdbCommand(args):
-  """The CLI for processing a leveldb table (.ldb) file as indexeddb."""
-  for db_record in leveldb_record.LevelDBRecord.FromFile(args.source):
-    record = db_record.record
-    try:
-      idb_record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
-          db_record)
-    except(
-        errors.ParserError,
-        errors.DecoderError,
-        NotImplementedError) as err:
-      print(
-          (f'Error parsing Indexeddb record {record.__class__.__name__}: {err} '
-           f'at offset {record.offset} in {db_record.path}'), file=sys.stderr)
-      print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
-      continue
-    _Output(idb_record, output=args.output)
+  """The CLI for processing a LevelDB table (.ldb) file as IndexedDB."""
+  for db_record in chromium_record.IndexedDBRecord.FromFile(args.source):
+    _Output(db_record, output=args.output)
 def LogCommand(args):
-  """The CLI for processing a leveldb log file as indexeddb."""
-  for db_record in leveldb_record.LevelDBRecord.FromFile(args.source):
-    record = db_record.record
-    try:
-      idb_record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
-          db_record)
-    except(
-        errors.ParserError,
-        errors.DecoderError,
-        NotImplementedError) as err:
-      print(
-          (f'Error parsing Indexeddb record {record.__class__.__name__}: {err} '
-           f'at offset {record.offset} in {db_record.path}'), file=sys.stderr)
-      print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
-      continue
-    _Output(idb_record, output=args.output)
+  """The CLI for processing a LevelDB log file as IndexedDB."""
+  for db_record in chromium_record.IndexedDBRecord.FromFile(args.source):
+    _Output(db_record, output=args.output)
 def App():
   """The CLI app entrypoint for dfindexeddb."""
   parser = argparse.ArgumentParser(
       prog='dfindexeddb',
-      description='A cli tool for parsing indexeddb files',
+      description='A cli tool for parsing IndexedDB files',
       epilog=f'Version {version.GetVersion()}')
   subparsers = parser.add_subparsers()
+  parser_blink = subparsers.add_parser(
+      'blink', help='Parse a file as a blink value.')
+  parser_blink.add_argument(
+      '-s', '--source',
+      required=True,
+      type=pathlib.Path,
+      help=(
+        'The source file.'))
+  parser_blink.add_argument(
+      '-o',
+      '--output',
+      choices=[
+          'json',
+          'jsonl',
+          'repr'],
+      default='json',
+      help='Output format.  Default is json')
+  parser_blink.set_defaults(func=BlinkCommand)
   parser_db = subparsers.add_parser(
-      'db', help='Parse a directory as indexeddb.')
-  parser_db.add_argument(
-      '-s', '--source', required=True, type=pathlib.Path,
-      help='The source leveldb folder')
+      'db', help='Parse a directory as IndexedDB.')
   parser_db.add_argument(
+      '-s', '--source',
+      required=True,
+      type=pathlib.Path,
+      help=(
+        'The source IndexedDB folder (for chrome/chromium) '
+        'or file (for safari).'))
+  recover_group = parser_db.add_mutually_exclusive_group()
+  recover_group.add_argument(
       '--use_manifest',
       action='store_true',
       help='Use manifest file to determine active/deleted records.')
+  recover_group.add_argument(
+      '--use_sequence_number',
+      action='store_true',
+      help=(
+          'Use sequence number and file offset to determine active/deleted '
+          'records.'))
+  parser_db.add_argument(
+      '--format',
+      required=True,
+      choices=['chromium', 'chrome', 'safari'],
+      help='The type of IndexedDB to parse.')
   parser_db.add_argument(
       '-o',
       '--output',
@@ -177,9 +170,12 @@ def App():
   parser_db.set_defaults(func=DbCommand)
   parser_ldb = subparsers.add_parser(
-      'ldb', help='Parse a ldb file as indexeddb.')
+      'ldb',
+      help='Parse a ldb file as IndexedDB.')
   parser_ldb.add_argument(
-      '-s', '--source', required=True, type=pathlib.Path,
+      '-s', '--source',
+      required=True,
+      type=pathlib.Path,
       help='The source .ldb file.')
   parser_ldb.add_argument(
       '-o',
@@ -193,9 +189,12 @@ def App():
   parser_ldb.set_defaults(func=LdbCommand)
   parser_log = subparsers.add_parser(
-      'log', help='Parse a log file as indexeddb.')
+      'log',
+      help='Parse a log file as IndexedDB.')
   parser_log.add_argument(
-      '-s', '--source', required=True, type=pathlib.Path,
+      '-s', '--source',
+      required=True,
+      type=pathlib.Path,
       help='The source .log file.')
   parser_log.add_argument(
       '-o',
@@ -209,4 +208,7 @@ def App():
   parser_log.set_defaults(func=LogCommand)
   args = parser.parse_args()
-  args.func(args)
+  if hasattr(args, 'func'):
+    args.func(args)
+  else:
+    parser.print_help()

dfindexeddb/indexeddb/safari/definitions.py ADDED Viewed

@@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Definitions for Webkit/Safari."""
+from enum import IntEnum
+CurrentVersion = 0x0000000F  # 15
+TerminatorTag = 0xFFFFFFFF
+StringPoolTag = 0xFFFFFFFE
+NonIndexPropertiesTag = 0xFFFFFFFD
+ImageDataPoolTag = 0xFFFFFFFE
+StringDataIs8BitFlag = 0x80000000
+SIDBKeyVersion = 0x00
+class SIDBKeyType(IntEnum):
+  """SIDBKeyType."""
+  MIN = 0x00
+  NUMBER = 0x20
+  DATE = 0x40
+  STRING = 0x60
+  BINARY = 0x80
+  ARRAY = 0xA0
+  MAX = 0xFF
+class SerializationTag(IntEnum):
+  """Database Metadata key types.
+  All tags are recorded as a single uint8_t.
+  """
+  ARRAY = 1
+  OBJECT = 2
+  UNDEFINED = 3
+  NULL = 4
+  INT = 5
+  ZERO = 6
+  ONE = 7
+  FALSE = 8
+  TRUE = 9
+  DOUBLE = 10
+  DATE = 11
+  FILE = 12
+  FILE_LIST = 13
+  IMAGE_DATA = 14
+  BLOB = 15
+  STRING = 16
+  EMPTY_STRING = 17
+  REG_EXP = 18
+  OBJECT_REFERENCE = 19
+  MESSAGE_PORT_REFERENCE = 20
+  ARRAY_BUFFER = 21
+  ARRAY_BUFFER_VIEW = 22
+  ARRAY_BUFFER_TRANSFER = 23
+  TRUE_OBJECT = 24
+  FALSE_OBJECT = 25
+  STRING_OBJECT = 26
+  EMPTY_STRING_OBJECT = 27
+  NUMBER_OBJECT = 28
+  SET_OBJECT = 29
+  MAP_OBJECT = 30
+  NON_MAP_PROPERTIES = 31
+  NON_SET_PROPERTIES = 32
+  CRYPTO_KEY = 33
+  SHARED_ARRAY_BUFFER = 34
+  WASM_MODULE = 35
+  DOM_POINT_READONLY = 36
+  DOM_POINT = 37
+  DOM_RECT_READONLY = 38
+  DOM_RECT = 39
+  DOM_MATRIX_READONLY = 40
+  DOM_MATRIX = 41
+  DOM_QUAD = 42
+  IMAGE_BITMAP_TRANSFER = 43
+  RTC_CERTIFICATE = 44
+  IMAGE_BITMAP = 45
+  OFF_SCREEN_CANVAS_TRANSFER = 46
+  BIGINT = 47
+  BIGINT_OBJECT = 48
+  WASM_MEMORY = 49
+  RTC_DATA_CHANNEL_TRANSFER = 50
+  DOM_EXCEPTION = 51
+  WEB_CODECS_ENCODED_VIDEO_CHUNK = 52
+  WEB_CODECS_VIDEO_FRAME = 53
+  RESIZABLE_ARRAY_BUFFER = 54
+  ERROR_INSTANCE = 55
+  IN_MEMORY_OFFSCREEN_CANVAS = 56
+  IN_MEMORY_MESSAGE_PORT = 57
+  WEB_CODECS_ENCODED_AUDIO_CHUNK = 58
+  WEB_CODECS_AUDIO_DATA = 59
+  MEDIA_STREAM_TRACK = 60
+  MEDIA_SOURCE_HANDLE_TRANSFER = 61
+  ERROR = 255
+class ArrayBufferViewSubtag(IntEnum):
+  """ArrayBufferView sub tags."""
+  DATA_VIEW = 0
+  INT8_ARRAY = 1
+  UINT8_ARRAY = 2
+  UINT8_CLAMPED_ARRAY = 3
+  INT16_ARRAY = 4
+  UINT16_ARRAY = 5
+  INT32_ARRAY = 6
+  UINT32_ARRAY = 7
+  FLOAT32_ARRAY = 8
+  FLOAT64_ARRAY = 9
+  BIG_INT64_ARRAY = 10
+  BIG_UINT64_ARRAY = 11

dfindexeddb 20240417__py3-none-any.whl → 20240519__py3-none-any.whl

dfindexeddb 20240417py3-none-any.whl → 20240519py3-none-any.whl