PyPI - dfindexeddb - Versions diffs - 20240305__py3-none-any.whl → 20240324__py3-none-any.whl - Mend

dfindexeddb 20240305py3-none-any.whl → 20240324py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

dfindexeddb/indexeddb/blink.py +2 -1
dfindexeddb/indexeddb/chromium.py +4 -4
dfindexeddb/indexeddb/cli.py +101 -0
dfindexeddb/indexeddb/utils.py +0 -0
dfindexeddb/leveldb/cli.py +217 -0
dfindexeddb/leveldb/definitions.py +16 -0
dfindexeddb/leveldb/descriptor.py +10 -11
dfindexeddb/leveldb/ldb.py +20 -24
dfindexeddb/leveldb/log.py +25 -18
dfindexeddb/leveldb/record.py +102 -0
dfindexeddb/leveldb/utils.py +116 -0
dfindexeddb/utils.py +5 -46
dfindexeddb/version.py +1 -1
{dfindexeddb-20240305.dist-info → dfindexeddb-20240324.dist-info}/METADATA +46 -32
dfindexeddb-20240324.dist-info/RECORD +26 -0
{dfindexeddb-20240305.dist-info → dfindexeddb-20240324.dist-info}/WHEEL +1 -1
dfindexeddb-20240324.dist-info/entry_points.txt +3 -0
dfindexeddb/cli.py +0 -180
dfindexeddb-20240305.dist-info/RECORD +0 -22
dfindexeddb-20240305.dist-info/entry_points.txt +0 -2
{dfindexeddb-20240305.dist-info → dfindexeddb-20240324.dist-info}/AUTHORS +0 -0
{dfindexeddb-20240305.dist-info → dfindexeddb-20240324.dist-info}/LICENSE +0 -0
{dfindexeddb-20240305.dist-info → dfindexeddb-20240324.dist-info}/top_level.txt +0 -0

dfindexeddb/indexeddb/blink.py CHANGED Viewed

@@ -86,7 +86,8 @@ class V8ScriptValueDecoder:
       NotImplementedError: when called.
     """
     tag = self.ReadTag()
-    raise NotImplementedError(f'V8ScriptValueDecoder.ReadHostObject - {tag}')
+    raise NotImplementedError(
+        f'V8ScriptValueDecoder.ReadHostObject - {tag.name}')
   def Deserialize(self) -> Any:
     """Deserializes a Blink SSV.

dfindexeddb/indexeddb/chromium.py CHANGED Viewed

@@ -20,11 +20,11 @@ import io
 from typing import Any, BinaryIO, Optional, Tuple, Type, TypeVar, Union
 from dfindexeddb import errors
-from dfindexeddb import utils
 from dfindexeddb.indexeddb import blink
 from dfindexeddb.indexeddb import definitions
 from dfindexeddb.leveldb import ldb
 from dfindexeddb.leveldb import log
+from dfindexeddb.leveldb import utils
 T = TypeVar('T')
@@ -570,7 +570,7 @@ class EarlistCompactionTimeKey(BaseIndexedDBKey):
 class ScopesPrefixKey(BaseIndexedDBKey):
   """A scopes prefix IndexedDB key."""
-  def DecodeValue(self, decoder: utils.StreamDecoder) -> Optional[bytes]:
+  def DecodeValue(self, decoder: utils.LevelDBDecoder) -> Optional[bytes]:
     """Decodes the scopes prefix value."""
     if decoder.NumRemainingBytes:
       return decoder.ReadBytes()[1]
@@ -578,7 +578,7 @@ class ScopesPrefixKey(BaseIndexedDBKey):
   @classmethod
   def FromDecoder(
-      cls, decoder: utils.StreamDecoder, key_prefix: KeyPrefix,
+      cls, decoder: utils.LevelDBDecoder, key_prefix: KeyPrefix,
       base_offset: int = 0
   ) -> ScopesPrefixKey:
     """Decodes the scopes prefix key."""
@@ -1357,4 +1357,4 @@ class IndexedDBRecord:
       value=idb_value,
       sequence_number=record.sequence_number if hasattr(
           record, 'sequence_number') else None,
-      type=record.type)
+      type=record.record_type)

dfindexeddb/indexeddb/cli.py ADDED Viewed

@@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""A CLI tool for dfindexeddb."""
+import argparse
+import dataclasses
+from datetime import datetime
+import json
+import pathlib
+import sys
+import traceback
+from dfindexeddb import errors
+from dfindexeddb import version
+from dfindexeddb.leveldb import record as leveldb_record
+from dfindexeddb.indexeddb import chromium
+from dfindexeddb.indexeddb import v8
+_VALID_PRINTABLE_CHARACTERS = (
+    ' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' +
+    '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~.')
+class Encoder(json.JSONEncoder):
+  """A JSON encoder class for dfindexeddb fields."""
+  def default(self, o):
+    if dataclasses.is_dataclass(o):
+      o_dict = dataclasses.asdict(o)
+      return o_dict
+    if isinstance(o, bytes):
+      out = []
+      for x in o:
+        if chr(x) not in _VALID_PRINTABLE_CHARACTERS:
+          out.append(f'\\x{x:02X}')
+        else:
+          out.append(chr(x))
+      return ''.join(out)
+    if isinstance(o, datetime):
+      return o.isoformat()
+    if isinstance(o, v8.Undefined):
+      return "<undefined>"
+    if isinstance(o, v8.Null):
+      return "<null>"
+    if isinstance(o, set):
+      return list(o)
+    if isinstance(o, v8.RegExp):
+      return str(o)
+    return json.JSONEncoder.default(self, o)
+def _Output(structure, to_json=False):
+  """Helper method to output parsed structure to stdout."""
+  if to_json:
+    print(json.dumps(structure, indent=2, cls=Encoder))
+  else:
+    print(structure)
+def IndexeddbCommand(args):
+  """The CLI for processing a log/ldb file as indexeddb."""
+  for db_record in leveldb_record.LevelDBRecord.FromDir(args.source):
+    record = db_record.record
+    try:
+      db_record.record = chromium.IndexedDBRecord.FromLevelDBRecord(record)
+    except(
+        errors.ParserError,
+        errors.DecoderError,
+        NotImplementedError) as err:
+      print(
+          (f'Error parsing blink value: {err} for {record.__class__.__name__} '
+           f'at offset {record.offset} in {db_record.path}'), file=sys.stderr)
+      print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
+    _Output(db_record, to_json=args.json)
+def App():
+  """The CLI app entrypoint for dfindexeddb."""
+  parser = argparse.ArgumentParser(
+      prog='dfindexeddb',
+      description='A cli tool for parsing indexeddb files',
+      epilog=f'Version {version.GetVersion()}')
+  parser.add_argument(
+      '-s', '--source', required=True, type=pathlib.Path,
+      help='The source leveldb folder')
+  parser.add_argument('--json', action='store_true', help='Output as JSON')
+  parser.set_defaults(func=IndexeddbCommand)
+  args = parser.parse_args()
+  args.func(args)

dfindexeddb/indexeddb/utils.py ADDED Viewed

File without changes

dfindexeddb/leveldb/cli.py ADDED Viewed

@@ -0,0 +1,217 @@
+# -*- coding: utf-8 -*-
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""A CLI tool for leveldb files."""
+import argparse
+import dataclasses
+from datetime import datetime
+import json
+import pathlib
+from dfindexeddb import version
+from dfindexeddb.leveldb import descriptor
+from dfindexeddb.leveldb import ldb
+from dfindexeddb.leveldb import log
+from dfindexeddb.leveldb import record
+_VALID_PRINTABLE_CHARACTERS = (
+    ' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' +
+    '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~.')
+class Encoder(json.JSONEncoder):
+  """A JSON encoder class for dfleveldb fields."""
+  def default(self, o):
+    """Returns a serializable object for o."""
+    if dataclasses.is_dataclass(o):
+      o_dict = dataclasses.asdict(o)
+      return o_dict
+    if isinstance(o, bytes):
+      out = []
+      for x in o:
+        if chr(x) not in _VALID_PRINTABLE_CHARACTERS:
+          out.append(f'\\x{x:02X}')
+        else:
+          out.append(chr(x))
+      return ''.join(out)
+    if isinstance(o, datetime):
+      return o.isoformat()
+    if isinstance(o, set):
+      return list(o)
+    return json.JSONEncoder.default(self, o)
+def _Output(structure, to_json=False):
+  """Helper method to output parsed structure to stdout."""
+  if to_json:
+    print(json.dumps(structure, indent=2, cls=Encoder))
+  else:
+    print(structure)
+def DbCommand(args):
+  """The CLI for processing leveldb folders."""
+  for rec in record.LevelDBRecord.FromDir(args.source):
+    _Output(rec, to_json=args.json)
+def LdbCommand(args):
+  """The CLI for processing ldb files."""
+  ldb_file = ldb.FileReader(args.source)
+  if args.structure_type == 'blocks':
+    # Prints block information.
+    for block in ldb_file.GetBlocks():
+      _Output(block, to_json=args.json)
+  elif args.structure_type == 'records' or not args.structure_type:
+    # Prints key value record information.
+    for key_value_record in ldb_file.GetKeyValueRecords():
+      _Output(key_value_record, to_json=args.json)
+  else:
+    print(f'{args.structure_type} is not supported for ldb files.')
+def LogCommand(args):
+  """The CLI for processing log files."""
+  log_file = log.FileReader(args.source)
+  if args.structure_type == 'blocks':
+    # Prints block information.
+    for block in log_file.GetBlocks():
+      _Output(block, to_json=args.json)
+  elif args.structure_type == 'physical_records':
+    # Prints log file physical record information.
+    for log_file_record in log_file.GetPhysicalRecords():
+      _Output(log_file_record, to_json=args.json)
+  elif args.structure_type == 'write_batches':
+    # Prints log file batch information.
+    for batch in log_file.GetWriteBatches():
+      _Output(batch, to_json=args.json)
+  elif (args.structure_type in ('parsed_internal_key', 'records')
+        or not args.structure_type):
+    # Prints key value record information.
+    for internal_key_record in log_file.GetParsedInternalKeys():
+      _Output(internal_key_record, to_json=args.json)
+  else:
+    print(f'{args.structure_type} is not supported for log files.')
+def DescriptorCommand(args):
+  """The CLI for processing descriptor (MANIFEST) files."""
+  manifest_file = descriptor.FileReader(args.source)
+  if args.structure_type == 'blocks':
+    # Prints block information.
+    for block in manifest_file.GetBlocks():
+      _Output(block, to_json=args.json)
+  elif args.structure_type == 'physical_records':
+    # Prints log file physical record information.
+    for log_file_record in manifest_file.GetPhysicalRecords():
+      _Output(log_file_record, to_json=args.json)
+  elif (args.structure_type == 'versionedit'
+        or not args.structure_type):
+    for version_edit in manifest_file.GetVersionEdits():
+      _Output(version_edit, to_json=args.json)
+  else:
+    print(f'{args.structure_type} is not supported for descriptor files.')
+def App():
+  """The CLI app entrypoint for parsing leveldb files."""
+  parser = argparse.ArgumentParser(
+      prog='dfleveldb',
+      description='A cli tool for parsing leveldb files',
+      epilog=f'Version {version.GetVersion()}')
+  subparsers = parser.add_subparsers()
+  parser_db = subparsers.add_parser(
+      'db', help='Parse a directory as leveldb.')
+  parser_db.add_argument(
+      '-s', '--source',
+      required=True,
+      type=pathlib.Path,
+      help='The source leveldb directory')
+  parser_db.add_argument(
+      '--json', action='store_true', help='Output as JSON')
+  parser_db.set_defaults(func=DbCommand)
+  parser_log = subparsers.add_parser(
+      'log', help='Parse a leveldb log file.')
+  parser_log.add_argument(
+      '-s', '--source',
+      required=True,
+      type=pathlib.Path,
+      help='The source leveldb file')
+  parser_log.add_argument(
+      '--json', action='store_true', help='Output as JSON')
+  parser_log.add_argument(
+      '-t',
+      '--structure_type',
+      choices=[
+          'blocks',
+          'physical_records',
+          'write_batches',
+          'parsed_internal_key'])
+  parser_log.set_defaults(func=LogCommand)
+  parser_ldb = subparsers.add_parser(
+      'ldb', help='Parse a leveldb table (.ldb) file.')
+  parser_ldb.add_argument(
+      '-s', '--source',
+      required=True,
+      type=pathlib.Path,
+      help='The source leveldb file')
+  parser_ldb.add_argument(
+      '--json', action='store_true', help='Output as JSON')
+  parser_ldb.add_argument(
+      '-t',
+      '--structure_type',
+      choices=[
+          'blocks',
+          'records'])
+  parser_ldb.set_defaults(func=LdbCommand)
+  parser_descriptor = subparsers.add_parser(
+      'descriptor', help='Parse a leveldb descriptor (MANIFEST) file.')
+  parser_descriptor.add_argument(
+      '-s', '--source',
+      required=True,
+      type=pathlib.Path,
+      help='The source leveldb file')
+  parser_descriptor.add_argument(
+      '--json', action='store_true', help='Output as JSON')
+  parser_descriptor.add_argument(
+      '-t',
+      '--structure_type',
+      choices=[
+          'blocks', 'physical_records', 'versionedit'])
+  parser_descriptor.set_defaults(func=DescriptorCommand)
+  args = parser.parse_args()
+  if not hasattr(args, 'func'):
+    parser.print_usage()
+  else:
+    args.func(args)

dfindexeddb/leveldb/definitions.py CHANGED Viewed

@@ -16,12 +16,22 @@
 import enum
+BLOCK_RESTART_ENTRY_LENGTH = 4
+BLOCK_TRAILER_SIZE = 5
+TABLE_FOOTER_SIZE = 48
+TABLE_MAGIC = b'\x57\xfb\x80\x8b\x24\x75\x47\xdb'
 PACKED_SEQUENCE_AND_TYPE_LENGTH = 8
 SEQUENCE_LENGTH = 7
 TYPE_LENGTH = 1
+class BlockCompressionType(enum.IntEnum):
+  """Block compression types."""
+  SNAPPY = 1
+  ZSTD = 2
 class VersionEditTags(enum.IntEnum):
   """VersionEdit tags."""
   COMPARATOR = 1
@@ -41,3 +51,9 @@ class LogFilePhysicalRecordType(enum.IntEnum):
   FIRST = 2
   MIDDLE = 3
   LAST = 4
+class InternalRecordType(enum.IntEnum):
+  """Internal record types."""
+  DELETED = 0
+  VALUE = 1

dfindexeddb/leveldb/descriptor.py CHANGED Viewed

@@ -12,17 +12,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Parser for LevelDB Manifest files."""
+"""Parser for LevelDB Descriptor (MANIFEST) files."""
 from __future__ import annotations
 from dataclasses import dataclass, field
 from typing import Generator, Optional
 from dfindexeddb import errors
-from dfindexeddb import utils
 from dfindexeddb.leveldb import definitions
 from dfindexeddb.leveldb import log
+from dfindexeddb.leveldb import utils
 @dataclass
@@ -43,7 +42,7 @@ class InternalKey:
   @classmethod
   def FromDecoder(
       cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> InternalKey:
-    """Decodes a InternalKey from the current position of a LevelDBDecoder.
+    """Decodes an InternalKey from the current position of a LevelDBDecoder.
     Args:
       decoder: the LevelDBDecoder.
@@ -78,7 +77,7 @@ class NewFile(utils.FromDecoderMixin):
   Attributes:
     offset: the offset.
     level: the level.
-    number: the number.
+    number: the file number.
     file_size: the file size.
     smallest: the smallest internal key.
     largest: the largest internal key.
@@ -119,7 +118,7 @@ class NewFile(utils.FromDecoderMixin):
 @dataclass
 class CompactPointer(utils.FromDecoderMixin):
-  """A NewFile.
+  """A CompactPointer.
   Attributes:
     offset: the offset.
@@ -155,7 +154,7 @@ class DeletedFile(utils.FromDecoderMixin):
   Attributes:
     offset: the offset.
     level: the level.
-    number: the number.
+    number: the file number.
   """
   offset: int
   level: int
@@ -260,12 +259,12 @@ class VersionEdit(utils.FromDecoderMixin):
 class FileReader:
-  """A Descriptor file reader.
+  """A reader for Descriptor files.
   A DescriptorFileReader provides read-only sequential iteration of serialized
   structures in a leveldb Descriptor file.  These structures include:
-  * blocks (log.Block)
-  * records (log.PhysicalRecords)
+  * blocks (Block)
+  * records (PhysicalRecord)
   * version edits (VersionEdit)
   """
   def __init__(self, filename: str):

dfindexeddb/leveldb/ldb.py CHANGED Viewed

@@ -23,8 +23,8 @@ from typing import BinaryIO, Iterable, Tuple
 import snappy
 import zstd
-from dfindexeddb import utils
 from dfindexeddb.leveldb import definitions
+from dfindexeddb.leveldb import utils
 @dataclass
@@ -36,13 +36,13 @@ class KeyValueRecord:
     key: the key of the record.
     value: the value of the record.
     sequence_number: the sequence number of the record.
-    type: the type of the record.
+    record_type: the type of the record.
   """
   offset: int
   key: bytes
   value: bytes
   sequence_number: int
-  type: int
+  record_type: definitions.InternalRecordType
   @classmethod
   def FromDecoder(
@@ -69,9 +69,13 @@ class KeyValueRecord:
     sequence_number = int.from_bytes(
         key[-definitions.SEQUENCE_LENGTH:], byteorder='little', signed=False)
     key_type = shared_key[-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
-    return cls(offset + block_offset, key, value, sequence_number,
-               key_type), shared_key
+    record_type = definitions.InternalRecordType(key_type)
+    return cls(
+        offset=offset + block_offset,
+        key=key,
+        value=value,
+        sequence_number=sequence_number,
+        record_type=record_type), shared_key
 @dataclass
@@ -88,17 +92,13 @@ class Block:
   data: bytes = field(repr=False)
   footer: bytes  # 5 bytes = 1 byte compressed flag + 4 bytes checksum.
-  SNAPPY_COMPRESSED = 1
-  ZSTD_COMPRESSED = 2
-  RESTART_ENTRY_LENGTH = 4
   def IsSnappyCompressed(self) -> bool:
     """Returns true if the block is snappy compressed."""
-    return self.footer[0] == self.SNAPPY_COMPRESSED
+    return self.footer[0] == definitions.BlockCompressionType.SNAPPY
   def IsZstdCompressed(self) -> bool:
     """Returns true if the block is zstd compressed."""
-    return self.footer[0] == self.ZSTD_COMPRESSED
+    return self.footer[0] == definitions.BlockCompressionType.ZSTD
   def GetBuffer(self) -> bytes:
     """Returns the block buffer, decompressing if required."""
@@ -121,10 +121,11 @@ class Block:
     # trailer of a block has the form:
     #    restarts: uint32[num_restarts]
     #    num_restarts: uint32
-    decoder.stream.seek(-self.RESTART_ENTRY_LENGTH, os.SEEK_END)
+    decoder.stream.seek(-definitions.BLOCK_RESTART_ENTRY_LENGTH, os.SEEK_END)
     _, num_restarts = decoder.DecodeUint32()
     restarts_offset = (
-        decoder.stream.tell()) - (num_restarts + 1) * self.RESTART_ENTRY_LENGTH
+        decoder.stream.tell()) - (
+            (num_restarts + 1) * definitions.BLOCK_RESTART_ENTRY_LENGTH)
     decoder.stream.seek(restarts_offset)
     _, offset = decoder.DecodeUint32()
@@ -154,8 +155,6 @@ class BlockHandle(utils.FromDecoderMixin):
   block_offset: int
   length: int
-  BLOCK_TRAILER_SIZE = 5
   def Load(self, stream: BinaryIO) -> Block:
     """Loads the block data.
@@ -173,8 +172,8 @@ class BlockHandle(utils.FromDecoderMixin):
     if len(data) != self.length:
       raise ValueError('Could not read all of the block')
-    footer = stream.read(self.BLOCK_TRAILER_SIZE)
-    if len(footer) != self.BLOCK_TRAILER_SIZE:
+    footer = stream.read(definitions.BLOCK_TRAILER_SIZE)
+    if len(footer) != definitions.BLOCK_TRAILER_SIZE:
       raise ValueError('Could not read all of the block footer')
     return Block(self.offset, self.block_offset, self.length, data, footer)
@@ -208,9 +207,6 @@ class FileReader:
   * records (KeyValueRecord)
   """
-  FOOTER_SIZE = 48
-  MAGIC = b'\x57\xfb\x80\x8b\x24\x75\x47\xdb'
   def __init__(self, filename: str):
     """Initializes the LogFile.
@@ -222,11 +218,11 @@ class FileReader:
     """
     self.filename = filename
     with open(self.filename, 'rb') as fh:
-      fh.seek(-len(self.MAGIC), os.SEEK_END)
-      if fh.read(len(self.MAGIC)) != self.MAGIC:
+      fh.seek(-len(definitions.TABLE_MAGIC), os.SEEK_END)
+      if fh.read(len(definitions.TABLE_MAGIC)) != definitions.TABLE_MAGIC:
         raise ValueError(f'Invalid magic number in {self.filename}')
-      fh.seek(-self.FOOTER_SIZE, os.SEEK_END)
+      fh.seek(-definitions.TABLE_FOOTER_SIZE, os.SEEK_END)
       # meta_handle, need to read first due to variable integers
       _ = BlockHandle.FromStream(fh)
       index_handle = BlockHandle.FromStream(fh)

dfindexeddb 20240305__py3-none-any.whl → 20240324__py3-none-any.whl

dfindexeddb 20240305py3-none-any.whl → 20240324py3-none-any.whl