PyPI - dfindexeddb - Versions diffs - 20240402__py3-none-any.whl → 20240417__py3-none-any.whl - Mend

dfindexeddb 20240402py3-none-any.whl → 20240417py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

dfindexeddb/indexeddb/chromium/definitions.py CHANGED Viewed

@@ -141,10 +141,12 @@ class BlinkSerializationTag(IntEnum):
   ENCODED_AUDIO_CHUNK = ord('y')
   ENCODED_VIDEO_CHUNK = ord('z')
   CROP_TARGET = ord('c')
+  RESTRICTION_TARGET = ord('D')
   MEDIA_SOURCE_HANDLE = ord('S')
   DEPRECATED_DETECTED_BARCODE = ord('B')
   DEPRECATED_DETECTED_FACE = ord('F')
   DEPRECATED_DETECTED_TEXT = ord('t')
+  FENCED_FRAME_CONFIG = ord('C')
   DOM_EXCEPTION = ord('x')
   TRAILER_OFFSET = 0xFE
   VERSION = 0xFF
@@ -304,3 +306,67 @@ class V8ErrorTag(IntEnum):
   CAUSE = ord('c')
   STACK = ord('s')
   END = ord('.')
+class ImageSerializationTag(IntEnum):
+  """Image Serialization tags."""
+  END = 0
+  PREDEFINED_COLOR_SPACE = 1
+  CANVAS_PIXEL_FORMAT = 2
+  IMAGE_DATA_STORAGE_FORMAT = 3
+  ORIGIN_CLEAN = 4
+  IS_PREMULTIPLIED = 5
+  CANVAS_OPACITY_MODE = 6
+  PARAMETRIC_COLOR_SPACE = 7
+  IMAGE_ORIENTATION = 8
+  LAST = IMAGE_ORIENTATION
+class SerializedPredefinedColorSpace(IntEnum):
+  """Serialized Predefined Color Space enumeration."""
+  LEGACY_OBSOLETE = 0
+  SRGB = 1
+  REC2020 = 2
+  P3 = 3
+  REC2100HLG = 4
+  REC2100PQ = 5
+  SRGB_LINEAR = 6
+  LAST = SRGB_LINEAR
+class SerializedPixelFormat(IntEnum):
+  """Serialized Pixel Format enumeration."""
+  NATIVE8_LEGACY_OBSOLETE = 0
+  F16 = 1
+  RGBA8 = 2
+  BGRA8 = 3
+  RGBX8 = 4
+  LAST = RGBX8
+class SerializedImageDataStorageFormat(IntEnum):
+  """The Serialized Image Data Storage Format."""
+  UINT8CLAMPED = 0
+  UINT16 = 1
+  FLOAT32 = 2
+  LAST = FLOAT32
+class SerializedOpacityMode(IntEnum):
+  """The Serialized Opacity Mode."""
+  KNONOPAQUE = 0
+  KOPAQUE = 1
+  KLAST = KOPAQUE
+class SerializedImageOrientation(IntEnum):
+  """The Serialized Image Orientation."""
+  TOP_LEFT = 0
+  TOP_RIGHT = 1
+  BOTTOM_RIGHT = 2
+  BOTTOM_LEFT = 3
+  LEFT_TOP = 4
+  RIGHT_TOP = 5
+  RIGHT_BOTTOM = 6
+  LEFT_BOTTOM = 7
+  LAST = LEFT_BOTTOM

dfindexeddb/indexeddb/chromium/record.py CHANGED Viewed

@@ -22,8 +22,7 @@ from typing import Any, BinaryIO, Optional, Tuple, Type, TypeVar, Union
 from dfindexeddb import errors
 from dfindexeddb.indexeddb.chromium import blink
 from dfindexeddb.indexeddb.chromium import definitions
-from dfindexeddb.leveldb import ldb
-from dfindexeddb.leveldb import log
+from dfindexeddb.leveldb import record
 from dfindexeddb.leveldb import utils
@@ -546,7 +545,7 @@ class EarliestSweepKey(BaseIndexedDBKey):
 @dataclass
-class EarlistCompactionTimeKey(BaseIndexedDBKey):
+class EarliestCompactionTimeKey(BaseIndexedDBKey):
   """An earliest compaction time IndexedDB key."""
   def DecodeValue(self, decoder: utils.LevelDBDecoder) -> int:
@@ -558,11 +557,11 @@ class EarlistCompactionTimeKey(BaseIndexedDBKey):
   def FromDecoder(
       cls, decoder: utils.LevelDBDecoder, key_prefix: KeyPrefix,
       base_offset: int = 0
-  ) -> EarlistCompactionTimeKey:
+  ) -> EarliestCompactionTimeKey:
     """Decodes the earliest compaction time key."""
     offset, key_type = decoder.DecodeUint8()
     if key_type != definitions.GlobalMetadataKeyType.EARLIEST_COMPACTION_TIME:
-      raise errors.ParserError('Not a EarlistCompactionTimeKey')
+      raise errors.ParserError('Not a EarliestCompactionTimeKey')
     return cls(offset=base_offset + offset, key_prefix=key_prefix)
@@ -668,7 +667,7 @@ class GlobalMetaDataKey(BaseIndexedDBKey):
       definitions.GlobalMetadataKeyType
           .EARLIEST_SWEEP: EarliestSweepKey,
       definitions.GlobalMetadataKeyType
-          .EARLIEST_COMPACTION_TIME: EarlistCompactionTimeKey,
+          .EARLIEST_COMPACTION_TIME: EarliestCompactionTimeKey,
       definitions.GlobalMetadataKeyType
           .SCOPES_PREFIX: ScopesPrefixKey,
       definitions.GlobalMetadataKeyType
@@ -692,7 +691,7 @@ class GlobalMetaDataKey(BaseIndexedDBKey):
              Type[DatabaseFreeListKey],
              Type[DatabaseNameKey],
              Type[EarliestSweepKey],
-             Type[EarlistCompactionTimeKey],
+             Type[EarliestCompactionTimeKey],
              Type[MaxDatabaseIdKey],
              Type[RecoveryBlobJournalKey],
              Type[SchemaVersionKey],
@@ -972,7 +971,7 @@ class ObjectStoreDataValue:
     blob_offset: the blob offset, only valid if wrapped.
     value: the blink serialized value, only valid if not wrapped.
   """
-  unkown: int
+  unknown: int
   is_wrapped: bool
   blob_size: Optional[int]
   blob_offset: Optional[int]
@@ -1003,7 +1002,7 @@ class ObjectStoreDataKey(BaseIndexedDBKey):
       _, blob_size = decoder.DecodeVarint()
       _, blob_offset = decoder.DecodeVarint()
       return ObjectStoreDataValue(
-          unkown=unknown_integer,
+          unknown=unknown_integer,
           is_wrapped=True,
           blob_size=blob_size,
           blob_offset=blob_offset,
@@ -1011,7 +1010,7 @@ class ObjectStoreDataKey(BaseIndexedDBKey):
     _, blink_bytes = decoder.ReadBytes()
     blink_value = blink.V8ScriptValueDecoder.FromBytes(blink_bytes)
     return ObjectStoreDataValue(
-        unkown=unknown_integer,
+        unknown=unknown_integer,
         is_wrapped=False,
         blob_size=None,
         blob_offset=None,
@@ -1337,24 +1336,33 @@ class IndexedDBRecord:
     value: the value of the record.
     sequence_number: if available, the sequence number of the record.
     type: the type of the record.
+    level: the leveldb level, None indicates the record came from a log file.
+    recovered: True if the record is a recovered record.
   """
+  path: str
   offset: int
   key: Any
   value: Any
-  sequence_number: int
+  sequence_number: Optional[int]
   type: int
+  level: Optional[int]
+  recovered: Optional[bool]
   @classmethod
   def FromLevelDBRecord(
-      cls, record: Union[ldb.KeyValueRecord, log.ParsedInternalKey]
+      cls, db_record: record.LevelDBRecord
   ) -> IndexedDBRecord:
     """Returns an IndexedDBRecord from a ParsedInternalKey."""
-    idb_key = IndexedDbKey.FromBytes(record.key, base_offset=record.offset)
-    idb_value = idb_key.ParseValue(record.value)
+    idb_key = IndexedDbKey.FromBytes(
+        db_record.record.key, base_offset=db_record.record.offset)
+    idb_value = idb_key.ParseValue(db_record.record.value)
     return cls(
-      offset=record.offset,
-      key=idb_key,
-      value=idb_value,
-      sequence_number=record.sequence_number if hasattr(
-          record, 'sequence_number') else None,
-      type=record.record_type)
+        path=db_record.path,
+        offset=db_record.record.offset,
+        key=idb_key,
+        value=idb_value,
+        sequence_number=db_record.record.sequence_number if hasattr(
+            db_record.record, 'sequence_number') else None,
+        type=db_record.record.record_type,
+        level=db_record.level,
+        recovered=db_record.recovered)

dfindexeddb/indexeddb/chromium/v8.py CHANGED Viewed

@@ -152,7 +152,12 @@ class ValueDeserializer:
       _, tag_value = self.decoder.PeekBytes(1)
     except errors.DecoderError:
       return None
-    return definitions.V8SerializationTag(tag_value[0])
+    try:
+      return definitions.V8SerializationTag(tag_value[0])
+    except ValueError as error:
+      raise errors.ParserError(
+          f'Invalid v8 tag value {tag_value} at offset'
+          f' {self.decoder.stream.tell()}') from error
   def _ReadTag(self) -> definitions.V8SerializationTag:
     """Returns the next non-padding serialization tag.
@@ -269,7 +274,7 @@ class ValueDeserializer:
             self.version >= 15):
       parsed_object = self.ReadSharedObject()
     elif self.version < 13:
-      self.decoder.stream.seek(-1)
+      self.decoder.stream.seek(-1, os.SEEK_CUR)
       parsed_object = self.ReadHostObject()
     else:
       parsed_object = None
@@ -492,7 +497,7 @@ class ValueDeserializer:
     return value
   def _ReadJSRegExp(self) -> RegExp:
-    """Reads a Javscript regular expression from the current position."""
+    """Reads a Javascript regular expression from the current position."""
     next_id = self._GetNextId()
     pattern = self.ReadString()
     _, flags = self.decoder.DecodeUint32Varint()  # TODO: verify flags

dfindexeddb/indexeddb/cli.py CHANGED Viewed

@@ -15,6 +15,7 @@
 """A CLI tool for dfindexeddb."""
 import argparse
 import dataclasses
+import enum
 from datetime import datetime
 import json
 import pathlib
@@ -57,6 +58,8 @@ class Encoder(json.JSONEncoder):
       return list(o)
     if isinstance(o, v8.RegExp):
       return str(o)
+    if isinstance(o, enum.Enum):
+      return o.name
     return json.JSONEncoder.default(self, o)
@@ -70,22 +73,78 @@ def _Output(structure, output):
     print(structure)
-def IndexeddbCommand(args):
-  """The CLI for processing a log/ldb file as indexeddb."""
-  for db_record in leveldb_record.LevelDBRecord.FromDir(args.source):
+def DbCommand(args):
+  """The CLI for processing a directory as indexeddb."""
+  if args.use_manifest:
+    for db_record in leveldb_record.LevelDBRecord.FromManifest(args.source):
+      record = db_record.record
+      try:
+        idb_record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
+            db_record)
+      except(
+          errors.ParserError,
+          errors.DecoderError,
+          NotImplementedError) as err:
+        print((
+            f'Error parsing Indexeddb record {record.__class__.__name__}: {err}'
+            f' at offset {record.offset} in {db_record.path}'), file=sys.stderr)
+        print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
+        continue
+      _Output(idb_record, output=args.output)
+  else:
+    for db_record in leveldb_record.LevelDBRecord.FromDir(args.source):
+      record = db_record.record
+      try:
+        idb_record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
+            db_record)
+      except(
+          errors.ParserError,
+          errors.DecoderError,
+          NotImplementedError) as err:
+        print((
+            f'Error parsing Indexeddb record {record.__class__.__name__}: {err}'
+            f' at offset {record.offset} in {db_record.path}'), file=sys.stderr)
+        print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
+        continue
+      _Output(idb_record, output=args.output)
+def LdbCommand(args):
+  """The CLI for processing a leveldb table (.ldb) file as indexeddb."""
+  for db_record in leveldb_record.LevelDBRecord.FromFile(args.source):
+    record = db_record.record
+    try:
+      idb_record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
+          db_record)
+    except(
+        errors.ParserError,
+        errors.DecoderError,
+        NotImplementedError) as err:
+      print(
+          (f'Error parsing Indexeddb record {record.__class__.__name__}: {err} '
+           f'at offset {record.offset} in {db_record.path}'), file=sys.stderr)
+      print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
+      continue
+    _Output(idb_record, output=args.output)
+def LogCommand(args):
+  """The CLI for processing a leveldb log file as indexeddb."""
+  for db_record in leveldb_record.LevelDBRecord.FromFile(args.source):
     record = db_record.record
     try:
-      db_record.record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
-          record)
+      idb_record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
+          db_record)
     except(
         errors.ParserError,
         errors.DecoderError,
         NotImplementedError) as err:
       print(
-          (f'Error parsing blink value: {err} for {record.__class__.__name__} '
+          (f'Error parsing Indexeddb record {record.__class__.__name__}: {err} '
            f'at offset {record.offset} in {db_record.path}'), file=sys.stderr)
       print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
-    _Output(db_record, output=args.output)
+      continue
+    _Output(idb_record, output=args.output)
 def App():
@@ -94,10 +153,51 @@ def App():
       prog='dfindexeddb',
       description='A cli tool for parsing indexeddb files',
       epilog=f'Version {version.GetVersion()}')
-  parser.add_argument(
+  subparsers = parser.add_subparsers()
+  parser_db = subparsers.add_parser(
+      'db', help='Parse a directory as indexeddb.')
+  parser_db.add_argument(
       '-s', '--source', required=True, type=pathlib.Path,
       help='The source leveldb folder')
-  parser.add_argument(
+  parser_db.add_argument(
+      '--use_manifest',
+      action='store_true',
+      help='Use manifest file to determine active/deleted records.')
+  parser_db.add_argument(
+      '-o',
+      '--output',
+      choices=[
+          'json',
+          'jsonl',
+          'repr'],
+      default='json',
+      help='Output format.  Default is json')
+  parser_db.set_defaults(func=DbCommand)
+  parser_ldb = subparsers.add_parser(
+      'ldb', help='Parse a ldb file as indexeddb.')
+  parser_ldb.add_argument(
+      '-s', '--source', required=True, type=pathlib.Path,
+      help='The source .ldb file.')
+  parser_ldb.add_argument(
+      '-o',
+      '--output',
+      choices=[
+          'json',
+          'jsonl',
+          'repr'],
+      default='json',
+      help='Output format.  Default is json')
+  parser_ldb.set_defaults(func=LdbCommand)
+  parser_log = subparsers.add_parser(
+      'log', help='Parse a log file as indexeddb.')
+  parser_log.add_argument(
+      '-s', '--source', required=True, type=pathlib.Path,
+      help='The source .log file.')
+  parser_log.add_argument(
       '-o',
       '--output',
       choices=[
@@ -106,7 +206,7 @@ def App():
           'repr'],
       default='json',
       help='Output format.  Default is json')
-  parser.set_defaults(func=IndexeddbCommand)
+  parser_log.set_defaults(func=LogCommand)
   args = parser.parse_args()
   args.func(args)

dfindexeddb/leveldb/cli.py CHANGED Viewed

@@ -66,8 +66,12 @@ def _Output(structure, output):
 def DbCommand(args):
   """The CLI for processing leveldb folders."""
-  for rec in record.LevelDBRecord.FromDir(args.source):
-    _Output(rec, output=args.output)
+  if args.use_manifest:
+    for rec in record.LevelDBRecord.FromManifest(args.source):
+      _Output(rec, output=args.output)
+  else:
+    for rec in record.LevelDBRecord.FromDir(args.source):
+      _Output(rec, output=args.output)
 def LdbCommand(args):
@@ -159,6 +163,10 @@ def App():
       required=True,
       type=pathlib.Path,
       help='The source leveldb directory')
+  parser_db.add_argument(
+      '--use_manifest',
+      action='store_true',
+      help='Use manifest file to determine active/deleted records.')
   parser_db.add_argument(
       '-o',
       '--output',

dfindexeddb/leveldb/definitions.py CHANGED Viewed

@@ -25,6 +25,8 @@ PACKED_SEQUENCE_AND_TYPE_LENGTH = 8
 SEQUENCE_LENGTH = 7
 TYPE_LENGTH = 1
+MANIFEST_FILENAME_PATTERN = r'MANIFEST-[0-9]{6}'
 class BlockCompressionType(enum.IntEnum):
   """Block compression types."""

dfindexeddb/leveldb/record.py CHANGED Viewed

@@ -16,9 +16,12 @@
 from __future__ import annotations
 import dataclasses
 import pathlib
+import re
 import sys
-from typing import Any, Generator, Union
+from typing import Any, Generator, Optional, Union
+from dfindexeddb import errors
+from dfindexeddb.leveldb import definitions
 from dfindexeddb.leveldb import descriptor
 from dfindexeddb.leveldb import ldb
 from dfindexeddb.leveldb import log
@@ -34,18 +37,20 @@ class LevelDBRecord:
   Attributes:
     path: the file path where the record was parsed from.
     record: the leveldb record.
+    level: the leveldb level, None indicates the record came from a log file.
+    recovered: True if the record is a recovered record.
   """
   path: str
   record: Union[
       ldb.KeyValueRecord,
-      log.ParsedInternalKey,
-      descriptor.VersionEdit]
+      log.ParsedInternalKey]
+  level: Optional[int] = None
+  recovered: Optional[bool] = None
   @classmethod
   def FromFile(
       cls,
-      file_path: pathlib.Path,
-      include_versionedit: bool = False
+      file_path: pathlib.Path
   ) -> Generator[LevelDBRecord, Any, Any]:
     """Yields leveldb records from the given path.
@@ -54,7 +59,6 @@ class LevelDBRecord:
     Args:
       file_path: the file path.
-      include_versionedit: include VersionEdit records from descriptor files.
     """
     if file_path.name.endswith('.log'):
       for record in log.FileReader(
@@ -64,12 +68,7 @@ class LevelDBRecord:
       for record in ldb.FileReader(file_path.as_posix()).GetKeyValueRecords():
         yield cls(path=file_path.as_posix(), record=record)
     elif file_path.name.startswith('MANIFEST'):
-      if not include_versionedit:
-        print(f'Ignoring {file_path.as_posix()}', file=sys.stderr)
-        return
-      for record in descriptor.FileReader(
-          file_path.as_posix()).GetVersionEdits():
-        yield cls(path=file_path.as_posix(), record=record)
+      print(f'Ignoring descriptor file {file_path.as_posix()}', file=sys.stderr)
     elif file_path.name in ('LOCK', 'CURRENT', 'LOG', 'LOG.old'):
       print(f'Ignoring {file_path.as_posix()}', file=sys.stderr)
     else:
@@ -78,25 +77,114 @@ class LevelDBRecord:
   @classmethod
   def FromDir(
       cls,
-      path: pathlib.Path,
-      include_versionedit: bool = False
+      path: pathlib.Path
   ) -> Generator[LevelDBRecord, Any, Any]:
     """Yields LevelDBRecords from the given directory.
     Args:
       path: the file path.
-      include_versionedit: include VersionEdit records from descriptor files.
+    Yields:
+      LevelDBRecords
+    """
+    if not path or not path.is_dir():
+      raise ValueError(f'{path} is not a directory')
+    for file_path in path.iterdir():
+      yield from cls.FromFile(file_path=file_path)
+  @classmethod
+  def FromManifest(
+      cls,
+      path: pathlib.Path
+  ) -> Generator[LevelDBRecord, Any, Any]:
+    """Yields LevelDBRecords from the given directory using the manifest.
+    Args:
+      path: the file path.
     Yields:
       LevelDBRecords
     Raises:
+      ParserError: if the CURRENT or MANIFEST-* file does not exist.
       ValueError: if path is not a directory.
     """
-    if path.is_dir():
-      for file_path in path.iterdir():
-        yield from cls.FromFile(
-            file_path=file_path,
-            include_versionedit=include_versionedit)
-    else:
+    if not path or not path.is_dir():
       raise ValueError(f'{path} is not a directory')
+    current_path = path / 'CURRENT'
+    if not current_path.exists():
+      raise errors.ParserError(f'{current_path!s} does not exist.')
+    current_manifest = current_path.read_text().strip()
+    manifest_regex = re.compile(definitions.MANIFEST_FILENAME_PATTERN)
+    if not manifest_regex.fullmatch(current_manifest):
+      raise errors.ParserError(
+          f'{current_path!s} does not contain the expected content')
+    manifest_path = path / current_manifest
+    if not manifest_path.exists():
+      raise errors.ParserError(f'{manifest_path!s} does not exist.')
+    latest_version = descriptor.FileReader(
+        str(manifest_path)).GetLatestVersion()
+    if not latest_version:
+      raise errors.ParserError(
+          f'Could not parse a leveldb version from {manifest_path!s}')
+    # read log records
+    log_records = []
+    if latest_version.current_log:
+      current_log = path / latest_version.current_log
+      if current_log.exists():
+        for log_record in cls.FromFile(file_path=current_log):
+          log_records.append(log_record)
+    else:
+      print('No current log file.', file=sys.stderr)
+    # read records from the "young" or 0-level
+    young_records = []
+    for active_file in latest_version.active_files.get(0, {}).keys():
+      current_young = path / active_file
+      if current_young.exists():
+        for young_record in cls.FromFile(current_young):
+          young_records.append(young_record)
+    active_records = {}
+    for record in sorted(
+        log_records,
+        key=lambda record: record.record.sequence_number,
+        reverse=True):
+      if record.record.key not in active_records:
+        record.recovered = False
+        active_records[record.record.key] = record
+      else:
+        record.recovered = True
+    for record in sorted(
+        young_records,
+        key=lambda record: record.record.sequence_number,
+        reverse=True):
+      if record.record.key not in active_records:
+        record.recovered = False
+        active_records[record.record.key] = record
+      else:
+        record.recovered = True
+      record.level = 0
+    yield from sorted(
+        log_records + young_records,
+        key=lambda record: record.record.sequence_number,
+        reverse=False)
+    if latest_version.active_files.keys():
+      for level in range(1, max(latest_version.active_files.keys()) + 1):
+        for filename in latest_version.active_files.get(level, []):
+          current_filename = path / filename
+          for record in cls.FromFile(file_path=current_filename):
+            if record.record.key in active_records:
+              record.recovered = True
+            else:
+              record.recovered = False
+            record.level = level
+            yield record

dfindexeddb/version.py CHANGED Viewed

@@ -15,7 +15,7 @@
 """Version information for dfIndexeddb."""
-__version__ = "20240402"
+__version__ = "20240417"
 def GetVersion():

dfindexeddb 20240402__py3-none-any.whl → 20240417__py3-none-any.whl

dfindexeddb 20240402py3-none-any.whl → 20240417py3-none-any.whl