PyPI - dfindexeddb - Versions diffs - 20241105__py3-none-any.whl → 20260205__py3-none-any.whl - Mend

dfindexeddb 20241105py3-none-any.whl → 20260205py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

dfindexeddb/indexeddb/chromium/blink.py +116 -74
dfindexeddb/indexeddb/chromium/definitions.py +240 -125
dfindexeddb/indexeddb/chromium/record.py +651 -346
dfindexeddb/indexeddb/chromium/sqlite.py +362 -0
dfindexeddb/indexeddb/chromium/v8.py +100 -78
dfindexeddb/indexeddb/cli.py +282 -121
dfindexeddb/indexeddb/firefox/definitions.py +7 -4
dfindexeddb/indexeddb/firefox/gecko.py +98 -74
dfindexeddb/indexeddb/firefox/record.py +78 -26
dfindexeddb/indexeddb/safari/definitions.py +5 -3
dfindexeddb/indexeddb/safari/record.py +86 -53
dfindexeddb/indexeddb/safari/webkit.py +85 -71
dfindexeddb/indexeddb/types.py +4 -1
dfindexeddb/leveldb/cli.py +146 -138
dfindexeddb/leveldb/definitions.py +6 -2
dfindexeddb/leveldb/descriptor.py +70 -56
dfindexeddb/leveldb/ldb.py +39 -33
dfindexeddb/leveldb/log.py +41 -30
dfindexeddb/leveldb/plugins/chrome_notifications.py +30 -18
dfindexeddb/leveldb/plugins/interface.py +5 -6
dfindexeddb/leveldb/plugins/manager.py +10 -9
dfindexeddb/leveldb/record.py +71 -62
dfindexeddb/leveldb/utils.py +105 -13
dfindexeddb/utils.py +36 -31
dfindexeddb/version.py +2 -2
dfindexeddb-20260205.dist-info/METADATA +171 -0
dfindexeddb-20260205.dist-info/RECORD +41 -0
{dfindexeddb-20241105.dist-info → dfindexeddb-20260205.dist-info}/WHEEL +1 -1
dfindexeddb-20241105.dist-info/AUTHORS +0 -12
dfindexeddb-20241105.dist-info/METADATA +0 -424
dfindexeddb-20241105.dist-info/RECORD +0 -41
{dfindexeddb-20241105.dist-info → dfindexeddb-20260205.dist-info}/entry_points.txt +0 -0
{dfindexeddb-20241105.dist-info → dfindexeddb-20260205.dist-info/licenses}/LICENSE +0 -0
{dfindexeddb-20241105.dist-info → dfindexeddb-20260205.dist-info}/top_level.txt +0 -0

dfindexeddb/indexeddb/cli.py CHANGED Viewed

@@ -16,28 +16,30 @@
 import argparse
 import dataclasses
 import enum
-from datetime import datetime
 import json
 import pathlib
+from datetime import datetime
+from typing import Any
-from dfindexeddb import utils
-from dfindexeddb import version
+from dfindexeddb import utils, version
 from dfindexeddb.indexeddb import types
 from dfindexeddb.indexeddb.chromium import blink
+from dfindexeddb.indexeddb.chromium import sqlite
 from dfindexeddb.indexeddb.chromium import record as chromium_record
 from dfindexeddb.indexeddb.firefox import gecko
 from dfindexeddb.indexeddb.firefox import record as firefox_record
 from dfindexeddb.indexeddb.safari import record as safari_record
 _VALID_PRINTABLE_CHARACTERS = (
-    ' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' +
-    '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~.')
+    " abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
+    + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~."
+)
 class Encoder(json.JSONEncoder):
   """A JSON encoder class for dfindexeddb fields."""
-  def default(self, o):
+  def default(self, o):  # type: ignore[no-untyped-def]
     if dataclasses.is_dataclass(o):
       o_dict = utils.asdict(o)
       return o_dict
@@ -45,18 +47,18 @@ class Encoder(json.JSONEncoder):
       out = []
       for x in o:
         if chr(x) not in _VALID_PRINTABLE_CHARACTERS:
-          out.append(f'\\x{x:02X}')
+          out.append(f"\\x{x:02X}")
         else:
           out.append(chr(x))
-      return ''.join(out)
+      return "".join(out)
     if isinstance(o, datetime):
       return o.isoformat()
     if isinstance(o, types.Undefined):
-      return '<undefined>'
+      return "<undefined>"
     if isinstance(o, types.JSArray):
       return o.__dict__
     if isinstance(o, types.Null):
-      return '<null>'
+      return "<null>"
     if isinstance(o, set):
       return list(o)
     if isinstance(o, types.RegExp):
@@ -66,189 +68,348 @@ class Encoder(json.JSONEncoder):
     return json.JSONEncoder.default(self, o)
-def _Output(structure, output):
-  """Helper method to output parsed structure to stdout."""
-  if output == 'json':
+def _Output(structure: Any, output: str) -> None:
+  """Helper method to output parsed structure to stdout.
+  Args:
+    structure: The structure to output.
+    output: The output format.
+  """
+  if output == "json":
     print(json.dumps(structure, indent=2, cls=Encoder))
-  elif output == 'jsonl':
+  elif output == "jsonl":
     print(json.dumps(structure, cls=Encoder))
-  elif output == 'repr':
+  elif output == "repr":
     print(structure)
-def BlinkCommand(args):
+def BlinkCommand(args: argparse.Namespace) -> None:
   """The CLI for processing a file as a blink-encoded value."""
-  with open(args.source, 'rb') as fd:
+  with open(args.source, "rb") as fd:
     buffer = fd.read()
     blink_value = blink.V8ScriptValueDecoder.FromBytes(buffer)
     _Output(blink_value, output=args.output)
-def GeckoCommand(args):
+def GeckoCommand(args: argparse.Namespace) -> None:
   """The CLI for processing a file as a gecko-encoded value."""
-  with open(args.source, 'rb') as fd:
+  with open(args.source, "rb") as fd:
     buffer = fd.read()
     blink_value = gecko.JSStructuredCloneDecoder.FromBytes(buffer)
     _Output(blink_value, output=args.output)
-def DbCommand(args):
+def DbCommand(args: argparse.Namespace) -> None:
   """The CLI for processing a directory as IndexedDB."""
-  if args.format in ('chrome', 'chromium'):
-    for db_record in chromium_record.FolderReader(
-        args.source).GetRecords(
-            use_manifest=args.use_manifest,
-            use_sequence_number=args.use_sequence_number):
-      _Output(db_record, output=args.output)
-  elif args.format == 'firefox':
-    for db_record in firefox_record.FileReader(args.source).Records():
-      _Output(db_record, output=args.output)
-  elif args.format == 'safari':
-    for db_record in safari_record.FileReader(args.source).Records():
-      _Output(db_record, output=args.output)
-def LdbCommand(args):
+  if args.format in ("chrome", "chromium"):
+    if args.source.is_file():
+      if args.object_store_id is not None:
+        records = sqlite.DatabaseReader(
+            str(args.source)
+        ).RecordsByObjectStoreId(
+            args.object_store_id, include_raw_data=args.include_raw_data
+        )
+      else:
+        records = sqlite.DatabaseReader(str(args.source)).Records(
+            include_raw_data=args.include_raw_data
+        )
+      for chromium_db_record in records:
+        if args.filter_value is not None and args.filter_value not in str(
+            chromium_db_record.value
+        ):
+          continue
+        if args.filter_key is not None and args.filter_key not in str(
+            chromium_db_record.key.value
+        ):
+          continue
+        _Output(chromium_db_record, output=args.output)
+    else:
+      for chromium_leveldb_record in chromium_record.FolderReader(
+          args.source
+      ).GetRecords(
+          use_manifest=args.use_manifest,
+          use_sequence_number=args.use_sequence_number,
+      ):
+        if (
+            args.object_store_id is not None
+            and chromium_leveldb_record.object_store_id != args.object_store_id
+        ):
+          continue
+        if args.filter_value is not None and args.filter_value not in str(
+            chromium_leveldb_record.value
+        ):
+          continue
+        if args.filter_key is not None and args.filter_key not in str(
+            chromium_leveldb_record.key.value
+        ):
+          continue
+        _Output(chromium_leveldb_record, output=args.output)
+  elif args.format == "firefox":
+    if args.object_store_id is not None:
+      firefox_db_records = firefox_record.FileReader(
+          str(args.source)
+      ).RecordsByObjectStoreId(
+          args.object_store_id, include_raw_data=args.include_raw_data
+      )
+    else:
+      firefox_db_records = firefox_record.FileReader(str(args.source)).Records(
+          include_raw_data=args.include_raw_data
+      )
+    for firefox_db_record in firefox_db_records:
+      if args.filter_value is not None and args.filter_value not in str(
+          firefox_db_record.value
+      ):
+        continue
+      if args.filter_key is not None and args.filter_key not in str(
+          firefox_db_record.key.value
+      ):
+        continue
+      _Output(firefox_db_record, output=args.output)
+  elif args.format == "safari":
+    if args.object_store_id is not None:
+      safari_db_records = safari_record.FileReader(
+          str(args.source)
+      ).RecordsByObjectStoreId(
+          args.object_store_id, include_raw_data=args.include_raw_data
+      )
+    else:
+      safari_db_records = safari_record.FileReader(str(args.source)).Records(
+          include_raw_data=args.include_raw_data
+      )
+    for safari_db_record in safari_db_records:
+      if args.filter_value is not None and args.filter_value not in str(
+          safari_db_record.value
+      ):
+        continue
+      if args.filter_key is not None and args.filter_key not in str(
+          safari_db_record.key
+      ):
+        continue
+      _Output(safari_db_record, output=args.output)
+def LdbCommand(args: argparse.Namespace) -> None:
   """The CLI for processing a LevelDB table (.ldb) file as IndexedDB."""
-  for db_record in chromium_record.IndexedDBRecord.FromFile(args.source):
+  for db_record in chromium_record.ChromiumIndexedDBRecord.FromFile(
+      args.source
+  ):
+    if args.filter_value is not None and args.filter_value not in str(
+        db_record.value
+    ):
+      continue
+    if args.filter_key is not None and args.filter_key not in str(
+        db_record.key
+    ):
+      continue
     _Output(db_record, output=args.output)
-def LogCommand(args):
+def LogCommand(args: argparse.Namespace) -> None:
   """The CLI for processing a LevelDB log file as IndexedDB."""
-  for db_record in chromium_record.IndexedDBRecord.FromFile(args.source):
+  for db_record in chromium_record.ChromiumIndexedDBRecord.FromFile(
+      args.source
+  ):
+    if args.filter_value is not None and args.filter_value not in str(
+        db_record.value
+    ):
+      continue
+    if args.filter_key is not None and args.filter_key not in str(
+        db_record.key
+    ):
+      continue
     _Output(db_record, output=args.output)
-def App():
+def App() -> None:
   """The CLI app entrypoint for dfindexeddb."""
   parser = argparse.ArgumentParser(
-      prog='dfindexeddb',
-      description='A cli tool for parsing IndexedDB files',
-      epilog=f'Version {version.GetVersion()}')
+      prog="dfindexeddb",
+      description="A cli tool for parsing IndexedDB files",
+      epilog=f"Version {version.GetVersion()}",
+  )
   subparsers = parser.add_subparsers()
   parser_blink = subparsers.add_parser(
-      'blink', help='Parse a file as a blink-encoded value.')
+      "blink", help="Parse a file as a blink-encoded value."
+  )
   parser_blink.add_argument(
-      '-s',
-      '--source',
+      "-s",
+      "--source",
       required=True,
       type=pathlib.Path,
-      help='The source file.')
+      help="The source file.",
+  )
   parser_blink.add_argument(
-      '-o',
-      '--output',
-      choices=[
-          'json',
-          'jsonl',
-          'repr'],
-      default='json',
-      help='Output format.  Default is json.')
+      "-o",
+      "--output",
+      choices=["json", "jsonl", "repr"],
+      default="json",
+      help="Output format.  Default is json.",
+  )
   parser_blink.set_defaults(func=BlinkCommand)
   parser_gecko = subparsers.add_parser(
-      'gecko', help='Parse a file as a gecko-encoded value.')
+      "gecko", help="Parse a file as a gecko-encoded value."
+  )
   parser_gecko.add_argument(
-      '-s',
-      '--source',
+      "-s",
+      "--source",
       required=True,
       type=pathlib.Path,
-      help='The source file.')
+      help="The source file.",
+  )
   parser_gecko.add_argument(
-      '-o',
-      '--output',
-      choices=[
-          'json',
-          'jsonl',
-          'repr'],
-      default='json',
-      help='Output format.  Default is json.')
+      "-o",
+      "--output",
+      choices=["json", "jsonl", "repr"],
+      default="json",
+      help="Output format.  Default is json.",
+  )
   parser_gecko.set_defaults(func=GeckoCommand)
   parser_db = subparsers.add_parser(
-      'db', help='Parse a directory/file as IndexedDB.')
+      "db", help="Parse a directory/file as IndexedDB."
+  )
   parser_db.add_argument(
-      '-s',
-      '--source',
+      "-s",
+      "--source",
       required=True,
       type=pathlib.Path,
       help=(
-          'The source IndexedDB folder (for chrome/chromium) '
-          'or sqlite3 file (for firefox/safari).'))
+          "The source IndexedDB folder (for chrome/chromium) "
+          "or sqlite3 file (for firefox/safari)."
+      ),
+  )
   recover_group = parser_db.add_mutually_exclusive_group()
   recover_group.add_argument(
-      '--use_manifest',
-      action='store_true',
-      help='Use manifest file to determine active/deleted records.')
+      "--use_manifest",
+      action="store_true",
+      help="Use manifest file to determine active/deleted records.",
+  )
   recover_group.add_argument(
-      '--use_sequence_number',
-      action='store_true',
+      "--use_sequence_number",
+      action="store_true",
       help=(
-          'Use sequence number and file offset to determine active/deleted '
-          'records.'))
+          "Use sequence number and file offset to determine active/deleted "
+          "records."
+      ),
+  )
   parser_db.add_argument(
-      '--format',
+      "--format",
       required=True,
-      choices=[
-          'chromium',
-          'chrome',
-          'firefox',
-          'safari'],
-      help='The type of IndexedDB to parse.')
+      choices=["chromium", "chrome", "firefox", "safari"],
+      help="The type of IndexedDB to parse.",
+  )
+  parser_db.add_argument(
+      "--object_store_id",
+      type=int,
+      help="The object store ID to filter by.",
+  )
+  parser_db.add_argument(
+      "--include_raw_data",
+      action="store_true",
+      help="Include raw key and value in the output.",
+  )
+  parser_db.add_argument(
+      "-o",
+      "--output",
+      choices=["json", "jsonl", "repr"],
+      default="json",
+      help="Output format.  Default is json.",
+  )
   parser_db.add_argument(
-      '-o',
-      '--output',
-      choices=[
-          'json',
-          'jsonl',
-          'repr'],
-      default='json',
-      help='Output format.  Default is json.')
+      "--filter_value",
+      type=str,
+      help=(
+          "Only output records where the value contains this string. "
+          "Values are normalized to strings before comparison."
+      ),
+  )
+  parser_db.add_argument(
+      "--filter_key",
+      type=str,
+      help=(
+          "Only output records where the key contains this string. "
+          "Keys are normalized to strings before comparison."
+      ),
+  )
   parser_db.set_defaults(func=DbCommand)
   parser_ldb = subparsers.add_parser(
-      'ldb',
-      help='Parse a ldb file as IndexedDB.')
+      "ldb", help="Parse a ldb file as IndexedDB."
+  )
   parser_ldb.add_argument(
-      '-s',
-      '--source',
+      "-s",
+      "--source",
       required=True,
       type=pathlib.Path,
-      help='The source .ldb file.')
+      help="The source .ldb file.",
+  )
+  parser_ldb.add_argument(
+      "-o",
+      "--output",
+      choices=["json", "jsonl", "repr"],
+      default="json",
+      help="Output format.  Default is json.",
+  )
+  parser_ldb.add_argument(
+      "--filter_value",
+      type=str,
+      help=(
+          "Only output records where the value contains this string. "
+          "Values are normalized to strings before comparison."
+      ),
+  )
   parser_ldb.add_argument(
-      '-o',
-      '--output',
-      choices=[
-          'json',
-          'jsonl',
-          'repr'],
-      default='json',
-      help='Output format.  Default is json.')
+      "--filter_key",
+      type=str,
+      help=(
+          "Only output records where the key contains this string. "
+          "Keys are normalized to strings before comparison."
+      ),
+  )
   parser_ldb.set_defaults(func=LdbCommand)
   parser_log = subparsers.add_parser(
-      'log',
-      help='Parse a log file as IndexedDB.')
+      "log", help="Parse a log file as IndexedDB."
+  )
   parser_log.add_argument(
-      '-s', '--source',
+      "-s",
+      "--source",
       required=True,
       type=pathlib.Path,
-      help='The source .log file.')
+      help="The source .log file.",
+  )
+  parser_log.add_argument(
+      "-o",
+      "--output",
+      choices=["json", "jsonl", "repr"],
+      default="json",
+      help="Output format.  Default is json.",
+  )
   parser_log.add_argument(
-      '-o',
-      '--output',
-      choices=[
-          'json',
-          'jsonl',
-          'repr'],
-      default='json',
-      help='Output format.  Default is json.')
+      "--filter_value",
+      type=str,
+      help=(
+          "Only output records where the value contains this string. "
+          "Values are normalized to strings before comparison."
+      ),
+  )
+  parser_log.add_argument(
+      "--filter_key",
+      type=str,
+      help=(
+          "Only output records where the key contains this string. "
+          "Keys are normalized to strings before comparison."
+      ),
+  )
   parser_log.set_defaults(func=LogCommand)
-  args = parser.parse_args()
-  if hasattr(args, 'func'):
+  args: argparse.Namespace = parser.parse_args()
+  if hasattr(args, "func"):
     args.func(args)
   else:
     parser.print_help()

dfindexeddb/indexeddb/firefox/definitions.py CHANGED Viewed

@@ -18,6 +18,7 @@ from enum import IntEnum
 class IndexedDBKeyType(IntEnum):
   """IndexedDB Key Types."""
   TERMINATOR = 0
   FLOAT = 0x10
   DATE = 0x20
@@ -38,6 +39,7 @@ THREE_BYTE_SHIFT = 6
 class StructuredDataType(IntEnum):
   """Structured Data Types."""
   FLOAT_MAX = 0xFFF00000
   HEADER = 0xFFF10000
   NULL = 0xFFFF0000
@@ -45,7 +47,7 @@ class StructuredDataType(IntEnum):
   BOOLEAN = 0xFFFF0002
   INT32 = 0xFFFF0003
   STRING = 0xFFFF0004
-  DATE_OBJECT  = 0xFFFF0005
+  DATE_OBJECT = 0xFFFF0005
   REGEXP_OBJECT = 0xFFFF0006
   ARRAY_OBJECT = 0xFFFF0007
   OBJECT_OBJECT = 0xFFFF0008
@@ -53,8 +55,8 @@ class StructuredDataType(IntEnum):
   BOOLEAN_OBJECT = 0xFFFF000A
   STRING_OBJECT = 0xFFFF000B
   NUMBER_OBJECT = 0xFFFF000C
-  BACK_REFERENCE_OBJECT  = 0xFFFF000D
-  DO_NOT_USE_1  = 0xFFFF000E
+  BACK_REFERENCE_OBJECT = 0xFFFF000D
+  DO_NOT_USE_1 = 0xFFFF000E
   DO_NOT_USE_2 = 0xFFFF000F
   TYPED_ARRAY_OBJECT_V2 = 0xFFFF0010
   MAP_OBJECT = 0xFFFF0011
@@ -95,6 +97,7 @@ class StructuredDataType(IntEnum):
 class StructuredCloneTags(IntEnum):
   """Structured Clone Tags."""
   BLOB = 0xFFFF8001
   FILE_WITHOUT_LASTMODIFIEDDATE = 0xFFFF8002
   FILELIST = 0xFFFF8003
@@ -140,4 +143,4 @@ class StructuredCloneTags(IntEnum):
   ENCODEDAUDIOCHUNK = 0xFFFF8031
-FRAME_HEADER = b'\xff\x06\x00\x00sNaPpY'
+FRAME_HEADER = b"\xff\x06\x00\x00sNaPpY"

dfindexeddb 20241105__py3-none-any.whl → 20260205__py3-none-any.whl

dfindexeddb 20241105py3-none-any.whl → 20260205py3-none-any.whl