PyPI - dfindexeddb - Versions diffs - 20251109__py3-none-any.whl → 20260205__py3-none-any.whl - Mend

dfindexeddb 20251109py3-none-any.whl → 20260205py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

dfindexeddb/indexeddb/chromium/sqlite.py ADDED Viewed

@@ -0,0 +1,362 @@
+# -*- coding: utf-8 -*-
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Chromium IndexedDB records encoded in sqlite3 databases."""
+import os
+import sqlite3
+from typing import Any, Generator, Optional
+from dataclasses import dataclass
+import snappy
+import zstd
+from dfindexeddb.indexeddb.chromium import blink
+from dfindexeddb.indexeddb.chromium import definitions
+from dfindexeddb.indexeddb.chromium import record
+@dataclass
+class ChromiumIndexedDBRecord:
+  """Chromium IndexedDB record parsed from sqlite3 database.
+  Attributes:
+    row_id: the row ID.
+    object_store_id: the object store ID.
+    compression_type: the compression type.
+    key: the key.
+    value: the value.
+    has_blobs: whether the record has blobs.
+    raw_key: the raw key.
+    raw_value: the raw value.
+  """
+  row_id: int
+  object_store_id: int
+  compression_type: int
+  key: Any
+  value: Any
+  has_blobs: bool
+  raw_key: Optional[bytes]
+  raw_value: Optional[bytes]
+@dataclass
+class ChromiumObjectStoreInfo:
+  """Chromium IndexedDB object store info parsed from sqlite3 database.
+  Attributes:
+    id: the object store ID.
+    name: the object store name.
+    key_path: the object store key path.
+    auto_increment: whether the object store is auto increment.
+    key_generator_current_number: the current number of the key generator.
+  """
+  id: int
+  name: str
+  key_path: str
+  auto_increment: int
+  key_generator_current_number: int
+@dataclass
+class ChromiumBlobInfo:
+  """Chromium IndexedDB blob info parsed from sqlite3 database.
+  Attributes:
+    row_id: the blob row ID.
+    object_type: the object type.
+    mime_type: the mime type.
+    size_bytes: the total size in bytes.
+    file_name: the file name (only for files).
+    number_of_chunks: the number of chunks including the initial one.
+    blob_data: the blob data.
+  """
+  row_id: int
+  object_type: int
+  mime_type: Optional[str]
+  size_bytes: int
+  file_name: Optional[str]
+  number_of_chunks: int
+  blob_data: bytes
+class DatabaseReader:
+  """A reader for Chromium IndexedDB sqlite3 files."""
+  def __init__(self, filename: str):
+    """Initializes the reader.
+    Args:
+      filename: the path to the sqlite3 file.
+    """
+    self._filename = filename
+  def ObjectStores(self) -> Generator[ChromiumObjectStoreInfo, None, None]:
+    """Yields object stores."""
+    with sqlite3.connect(f"file:{self._filename}?mode=ro", uri=True) as conn:
+      cursor = conn.cursor()
+      cursor.execute(definitions.SQL_OBJECT_STORES_QUERY)
+      for row in cursor:
+        yield ChromiumObjectStoreInfo(
+            id=row[0],
+            name=row[1].decode("utf-16-le"),
+            key_path=row[2],
+            auto_increment=row[3],
+            key_generator_current_number=row[4],
+        )
+  def _GetLegacyBlobPath(self, blob_id: int) -> str:
+    """Gets the path to a legacy blob file.
+    Args:
+      blob_id: the blob ID.
+    Returns:
+      The path to the legacy blob file.
+    """
+    base, ext = os.path.splitext(self._filename)
+    db_dir = f"{base}_{ext}"
+    return os.path.join(db_dir, f"{blob_id:x}")
+  def LoadLegacyBlobData(self, blob_id: int) -> bytes:
+    """Loads legacy blob data from disk.
+    Args:
+      blob_id: the blob ID.
+    Returns:
+      The blob data.
+    Raises:
+      FileNotFoundError: if the legacy blob file is not found.
+    """
+    blob_path = self._GetLegacyBlobPath(blob_id)
+    if os.path.exists(blob_path):
+      with open(blob_path, "rb") as f:
+        return f.read()
+    raise FileNotFoundError(f"Legacy blob file not found: {blob_path}")
+  def LoadBlobDataForRecordId(
+      self, row_id: int
+  ) -> Generator[ChromiumBlobInfo, None, None]:
+    """Loads blob data for a given record row ID.
+    Args:
+      row_id: the record row ID.
+    Yields:
+      ChromiumBlobInfo objects.
+    """
+    with sqlite3.connect(f"file:{self._filename}?mode=ro", uri=True) as conn:
+      conn.row_factory = sqlite3.Row
+      cursor = conn.cursor()
+      # Note this is a UNION query between the blob and overflow_blob_chunks
+      # table.  The chunk_index = 0 for the row from the 'blobs' table.
+      cursor.execute(definitions.SQL_BLOB_DATA_QUERY, (row_id, row_id))
+      current_blob_id = None
+      current_blob_data = bytearray()
+      current_record: Optional[sqlite3.Row] = None
+      total_number_of_chunks = 0
+      for blob_row in cursor:
+        blob_id = blob_row["row_id"]
+        if blob_id != current_blob_id:
+          if current_record is not None:
+            yield ChromiumBlobInfo(
+                row_id=current_record["row_id"],
+                object_type=current_record["object_type"],
+                mime_type=current_record["mime_type"],
+                size_bytes=current_record["size_bytes"],
+                file_name=current_record["file_name"],
+                number_of_chunks=total_number_of_chunks,
+                blob_data=bytes(current_blob_data),
+            )
+          current_blob_id = blob_id
+          current_blob_data = bytearray()
+          current_record = blob_row
+          total_number_of_chunks = 0
+        if blob_row["chunk_index"] == 0 and blob_row["bytes"] is None:
+          current_blob_data.extend(self.LoadLegacyBlobData(blob_id))
+          total_number_of_chunks += 1
+          continue
+        if blob_row["bytes"]:
+          current_blob_data.extend(blob_row["bytes"])
+          total_number_of_chunks += 1
+      if current_record is not None:
+        yield ChromiumBlobInfo(
+            row_id=current_record["row_id"],
+            object_type=current_record["object_type"],
+            mime_type=current_record["mime_type"],
+            size_bytes=current_record["size_bytes"],
+            file_name=current_record["file_name"],
+            number_of_chunks=total_number_of_chunks,
+            blob_data=bytes(current_blob_data),
+        )
+  def _EnumerateCursor(
+      self,
+      cursor: sqlite3.Cursor,
+      include_raw_data: bool = False,
+      parse_key: bool = True,
+      parse_value: bool = True,
+      load_blobs: bool = True,
+  ) -> Generator[ChromiumIndexedDBRecord, None, None]:
+    """Yields ChromiumIndexedDBRecord records from a sqlite3 cursor.
+    Args:
+      cursor: the sqlite3 cursor.
+      include_raw_data: whether to include the raw data.
+      parse_key: whether to parse the key.
+      parse_value: whether to parse the value.
+      load_blobs: whether to load the record blobs.
+    Yields:
+      ChromiumIndexedDBRecord records.
+    """
+    for row in cursor:
+      row_id = row[0]
+      object_store_id = row[1]
+      compression_type = definitions.DatabaseCompressionType(row[2])
+      raw_key = row[3]
+      raw_value = row[4]
+      has_blobs = bool(row[5])
+      key, value = None, None
+      if parse_key and raw_key:
+        key = record.SortableIDBKey.FromBytes(raw_data=raw_key, base_offset=0)
+      if parse_value and raw_value:
+        if compression_type == definitions.DatabaseCompressionType.UNCOMPRESSED:
+          value = blink.V8ScriptValueDecoder.FromBytes(raw_value)
+        elif compression_type == definitions.DatabaseCompressionType.ZSTD:
+          value = blink.V8ScriptValueDecoder.FromBytes(
+              zstd.decompress(raw_value)
+          )
+        elif compression_type == definitions.DatabaseCompressionType.SNAPPY:
+          value = blink.V8ScriptValueDecoder.FromBytes(
+              snappy.decompress(raw_value)
+          )
+      if load_blobs and raw_value is None:
+        if not has_blobs:
+          raise ValueError("Raw value is None but has_blobs is not set")
+        value = []
+        for blob in self.LoadBlobDataForRecordId(row_id):
+          blob.blob_data = blink.V8ScriptValueDecoder.FromBytes(blob.blob_data)
+          value.append(blob)
+      yield ChromiumIndexedDBRecord(
+          row_id=row_id,
+          object_store_id=object_store_id,
+          compression_type=compression_type,
+          key=key,
+          value=value,
+          has_blobs=has_blobs,
+          raw_key=raw_key if include_raw_data else None,
+          raw_value=raw_value if include_raw_data else None,
+      )
+  def RecordsByObjectStoreId(
+      self,
+      object_store_id: int,
+      include_raw_data: bool = False,
+      parse_key: bool = True,
+      parse_value: bool = True,
+      load_blobs: bool = True,
+  ) -> Generator[ChromiumIndexedDBRecord, None, None]:
+    """Yields ChromiumIndexedDBRecord records for a given object store ID.
+    Args:
+      object_store_id: the object store ID.
+      include_raw_data: whether to include the raw data.
+      parse_key: whether to parse the key.
+      parse_value: whether to parse the value.
+      load_blobs: whether to load the record blobs.
+    Yields:
+      ChromiumIndexedDBRecord records.
+    """
+    with sqlite3.connect(f"file:{self._filename}?mode=ro", uri=True) as conn:
+      conn.row_factory = sqlite3.Row
+      cursor = conn.cursor()
+      cursor.execute(definitions.SQL_RECORDS_BY_ID_QUERY, (object_store_id,))
+      yield from self._EnumerateCursor(
+          cursor, include_raw_data, parse_key, parse_value, load_blobs
+      )
+  def RecordsByObjectStoreName(
+      self,
+      object_store_name: str,
+      include_raw_data: bool = False,
+      parse_key: bool = True,
+      parse_value: bool = True,
+      load_blobs: bool = True,
+  ) -> Generator[ChromiumIndexedDBRecord, None, None]:
+    """Yields ChromiumIndexedDBRecord records for a given object store name.
+    Args:
+      object_store_name: the object store name.
+      include_raw_data: whether to include the raw data.
+      parse_key: whether to parse the key.
+      parse_value: whether to parse the value.
+      load_blobs: whether to load the record blobs.
+    Yields:
+      ChromiumIndexedDBRecord records.
+    """
+    with sqlite3.connect(f"file:{self._filename}?mode=ro", uri=True) as conn:
+      conn.row_factory = sqlite3.Row
+      cursor = conn.cursor()
+      cursor.execute(
+          definitions.SQL_RECORDS_BY_NAME_QUERY,
+          (object_store_name.encode("utf-16-le"),),
+      )
+      yield from self._EnumerateCursor(
+          cursor, include_raw_data, parse_key, parse_value, load_blobs
+      )
+  def Records(
+      self,
+      include_raw_data: bool = False,
+      parse_key: bool = True,
+      parse_value: bool = True,
+      load_blobs: bool = True,
+  ) -> Generator[ChromiumIndexedDBRecord, None, None]:
+    """Yields ChromiumIndexedDBRecord records from all object stores.
+    Args:
+      include_raw_data: whether to include the raw data.
+      parse_key: whether to parse the key.
+      parse_value: whether to parse the value.
+      load_blobs: whether to load the record blobs.
+    Yields:
+      ChromiumIndexedDBRecord records.
+    """
+    with sqlite3.connect(f"file:{self._filename}?mode=ro", uri=True) as conn:
+      conn.row_factory = sqlite3.Row
+      cursor = conn.cursor()
+      cursor.execute(definitions.SQL_RECORDS_QUERY)
+      yield from self._EnumerateCursor(
+          cursor, include_raw_data, parse_key, parse_value, load_blobs
+      )

dfindexeddb/indexeddb/cli.py CHANGED Viewed

@@ -24,6 +24,7 @@ from typing import Any
 from dfindexeddb import utils, version
 from dfindexeddb.indexeddb import types
 from dfindexeddb.indexeddb.chromium import blink
+from dfindexeddb.indexeddb.chromium import sqlite
 from dfindexeddb.indexeddb.chromium import record as chromium_record
 from dfindexeddb.indexeddb.firefox import gecko
 from dfindexeddb.indexeddb.firefox import record as firefox_record
@@ -68,7 +69,12 @@ class Encoder(json.JSONEncoder):
 def _Output(structure: Any, output: str) -> None:
-  """Helper method to output parsed structure to stdout."""
+  """Helper method to output parsed structure to stdout.
+  Args:
+    structure: The structure to output.
+    output: The output format.
+  """
   if output == "json":
     print(json.dumps(structure, indent=2, cls=Encoder))
   elif output == "jsonl":
@@ -96,30 +102,123 @@ def GeckoCommand(args: argparse.Namespace) -> None:
 def DbCommand(args: argparse.Namespace) -> None:
   """The CLI for processing a directory as IndexedDB."""
   if args.format in ("chrome", "chromium"):
-    for chromium_db_record in chromium_record.FolderReader(
-        args.source
-    ).GetRecords(
-        use_manifest=args.use_manifest,
-        use_sequence_number=args.use_sequence_number,
-    ):
-      _Output(chromium_db_record, output=args.output)
+    if args.source.is_file():
+      if args.object_store_id is not None:
+        records = sqlite.DatabaseReader(
+            str(args.source)
+        ).RecordsByObjectStoreId(
+            args.object_store_id, include_raw_data=args.include_raw_data
+        )
+      else:
+        records = sqlite.DatabaseReader(str(args.source)).Records(
+            include_raw_data=args.include_raw_data
+        )
+      for chromium_db_record in records:
+        if args.filter_value is not None and args.filter_value not in str(
+            chromium_db_record.value
+        ):
+          continue
+        if args.filter_key is not None and args.filter_key not in str(
+            chromium_db_record.key.value
+        ):
+          continue
+        _Output(chromium_db_record, output=args.output)
+    else:
+      for chromium_leveldb_record in chromium_record.FolderReader(
+          args.source
+      ).GetRecords(
+          use_manifest=args.use_manifest,
+          use_sequence_number=args.use_sequence_number,
+      ):
+        if (
+            args.object_store_id is not None
+            and chromium_leveldb_record.object_store_id != args.object_store_id
+        ):
+          continue
+        if args.filter_value is not None and args.filter_value not in str(
+            chromium_leveldb_record.value
+        ):
+          continue
+        if args.filter_key is not None and args.filter_key not in str(
+            chromium_leveldb_record.key.value
+        ):
+          continue
+        _Output(chromium_leveldb_record, output=args.output)
   elif args.format == "firefox":
-    for firefox_db_record in firefox_record.FileReader(args.source).Records():
+    if args.object_store_id is not None:
+      firefox_db_records = firefox_record.FileReader(
+          str(args.source)
+      ).RecordsByObjectStoreId(
+          args.object_store_id, include_raw_data=args.include_raw_data
+      )
+    else:
+      firefox_db_records = firefox_record.FileReader(str(args.source)).Records(
+          include_raw_data=args.include_raw_data
+      )
+    for firefox_db_record in firefox_db_records:
+      if args.filter_value is not None and args.filter_value not in str(
+          firefox_db_record.value
+      ):
+        continue
+      if args.filter_key is not None and args.filter_key not in str(
+          firefox_db_record.key.value
+      ):
+        continue
       _Output(firefox_db_record, output=args.output)
   elif args.format == "safari":
-    for safari_db_record in safari_record.FileReader(args.source).Records():
+    if args.object_store_id is not None:
+      safari_db_records = safari_record.FileReader(
+          str(args.source)
+      ).RecordsByObjectStoreId(
+          args.object_store_id, include_raw_data=args.include_raw_data
+      )
+    else:
+      safari_db_records = safari_record.FileReader(str(args.source)).Records(
+          include_raw_data=args.include_raw_data
+      )
+    for safari_db_record in safari_db_records:
+      if args.filter_value is not None and args.filter_value not in str(
+          safari_db_record.value
+      ):
+        continue
+      if args.filter_key is not None and args.filter_key not in str(
+          safari_db_record.key
+      ):
+        continue
       _Output(safari_db_record, output=args.output)
 def LdbCommand(args: argparse.Namespace) -> None:
   """The CLI for processing a LevelDB table (.ldb) file as IndexedDB."""
-  for db_record in chromium_record.IndexedDBRecord.FromFile(args.source):
+  for db_record in chromium_record.ChromiumIndexedDBRecord.FromFile(
+      args.source
+  ):
+    if args.filter_value is not None and args.filter_value not in str(
+        db_record.value
+    ):
+      continue
+    if args.filter_key is not None and args.filter_key not in str(
+        db_record.key
+    ):
+      continue
     _Output(db_record, output=args.output)
 def LogCommand(args: argparse.Namespace) -> None:
   """The CLI for processing a LevelDB log file as IndexedDB."""
-  for db_record in chromium_record.IndexedDBRecord.FromFile(args.source):
+  for db_record in chromium_record.ChromiumIndexedDBRecord.FromFile(
+      args.source
+  ):
+    if args.filter_value is not None and args.filter_value not in str(
+        db_record.value
+    ):
+      continue
+    if args.filter_key is not None and args.filter_key not in str(
+        db_record.key
+    ):
+      continue
     _Output(db_record, output=args.output)
@@ -204,6 +303,16 @@ def App() -> None:
       choices=["chromium", "chrome", "firefox", "safari"],
       help="The type of IndexedDB to parse.",
   )
+  parser_db.add_argument(
+      "--object_store_id",
+      type=int,
+      help="The object store ID to filter by.",
+  )
+  parser_db.add_argument(
+      "--include_raw_data",
+      action="store_true",
+      help="Include raw key and value in the output.",
+  )
   parser_db.add_argument(
       "-o",
       "--output",
@@ -211,6 +320,22 @@ def App() -> None:
       default="json",
       help="Output format.  Default is json.",
   )
+  parser_db.add_argument(
+      "--filter_value",
+      type=str,
+      help=(
+          "Only output records where the value contains this string. "
+          "Values are normalized to strings before comparison."
+      ),
+  )
+  parser_db.add_argument(
+      "--filter_key",
+      type=str,
+      help=(
+          "Only output records where the key contains this string. "
+          "Keys are normalized to strings before comparison."
+      ),
+  )
   parser_db.set_defaults(func=DbCommand)
   parser_ldb = subparsers.add_parser(
@@ -230,6 +355,22 @@ def App() -> None:
       default="json",
       help="Output format.  Default is json.",
   )
+  parser_ldb.add_argument(
+      "--filter_value",
+      type=str,
+      help=(
+          "Only output records where the value contains this string. "
+          "Values are normalized to strings before comparison."
+      ),
+  )
+  parser_ldb.add_argument(
+      "--filter_key",
+      type=str,
+      help=(
+          "Only output records where the key contains this string. "
+          "Keys are normalized to strings before comparison."
+      ),
+  )
   parser_ldb.set_defaults(func=LdbCommand)
   parser_log = subparsers.add_parser(
@@ -249,6 +390,22 @@ def App() -> None:
       default="json",
       help="Output format.  Default is json.",
   )
+  parser_log.add_argument(
+      "--filter_value",
+      type=str,
+      help=(
+          "Only output records where the value contains this string. "
+          "Values are normalized to strings before comparison."
+      ),
+  )
+  parser_log.add_argument(
+      "--filter_key",
+      type=str,
+      help=(
+          "Only output records where the key contains this string. "
+          "Keys are normalized to strings before comparison."
+      ),
+  )
   parser_log.set_defaults(func=LogCommand)
   args: argparse.Namespace = parser.parse_args()

dfindexeddb/indexeddb/firefox/record.py CHANGED Viewed

@@ -54,6 +54,8 @@ class FirefoxIndexedDBRecord:
     object_store_id: the object store id.
     object_store_name: the object store name from the object_store table.
     database_name: the IndexedDB database name from the database table.
+    raw_key: the raw key.
+    raw_value: the raw value.
   """
   key: Any
@@ -62,6 +64,8 @@ class FirefoxIndexedDBRecord:
   object_store_id: int
   object_store_name: str
   database_name: str
+  raw_key: Optional[bytes] = None
+  raw_value: Optional[bytes] = None
 class FileReader:
@@ -134,7 +138,7 @@ class FileReader:
         )
   def RecordsByObjectStoreId(
-      self, object_store_id: int
+      self, object_store_id: int, include_raw_data: bool = False
   ) -> Generator[FirefoxIndexedDBRecord, None, None]:
     """Returns FirefoxIndexedDBRecords by a given object store id.
@@ -163,9 +167,13 @@ class FileReader:
             file_ids=row[3],
             object_store_name=row[4].decode("utf-8"),
             database_name=self.database_name,
+            raw_key=row[0] if include_raw_data else None,
+            raw_value=row[1] if include_raw_data else None,
         )
-  def Records(self) -> Generator[FirefoxIndexedDBRecord, None, None]:
+  def Records(
+      self, include_raw_data: bool = False
+  ) -> Generator[FirefoxIndexedDBRecord, None, None]:
     """Returns FirefoxIndexedDBRecords from the database."""
     with sqlite3.connect(f"file:{self.filename}?mode=ro", uri=True) as conn:
       conn.text_factory = bytes
@@ -187,6 +195,8 @@ class FileReader:
             file_ids=row[3],
             object_store_name=row[4].decode("utf-8"),
             database_name=self.database_name,
+            raw_key=row[0] if include_raw_data else None,
+            raw_value=row[1] if include_raw_data else None,
         )

dfindexeddb 20251109__py3-none-any.whl → 20260205__py3-none-any.whl

dfindexeddb 20251109py3-none-any.whl → 20260205py3-none-any.whl