PyPI - dfindexeddb - Versions diffs - 20240301__tar.gz → 20240305__tar.gz - Mend

dfindexeddb 20240301tar.gz → 20240305tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{dfindexeddb-20240301/dfindexeddb.egg-info → dfindexeddb-20240305}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dfindexeddb
-Version: 20240301
+Version: 20240305
 Summary: dfindexeddb is an experimental Python tool for performing digital forensic analysis of IndexedDB and leveldb files.
 Author-email: Syd Pleno <sydp@google.com>
 Maintainer-email: dfIndexeddb Developers <dfindexeddb-dev@googlegroups.com>

{dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb/cli.py RENAMED Viewed

@@ -21,18 +21,31 @@ import pathlib
 import sys
 import traceback
-from dfindexeddb.leveldb import log
+from dfindexeddb import errors
+from dfindexeddb import version
+from dfindexeddb.leveldb import descriptor
 from dfindexeddb.leveldb import ldb
+from dfindexeddb.leveldb import log
 from dfindexeddb.indexeddb import chromium
-from dfindexeddb import errors
 from dfindexeddb.indexeddb import v8
+_VALID_PRINTABLE_CHARACTERS = (
+    'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' +
+    '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~.')
 class Encoder(json.JSONEncoder):
   """A JSON encoder class for dfindexeddb fields."""
   def default(self, o):
     if isinstance(o, bytes):
-      return o.decode(encoding='ascii', errors='backslashreplace')
+      out = []
+      for x in o:
+        if chr(x) not in _VALID_PRINTABLE_CHARACTERS:
+          out.append(f'\\x{x:02X}')
+        else:
+          out.append(chr(x))
+      return ''.join(out)
     if isinstance(o, datetime):
       return o.isoformat()
     if isinstance(o, v8.Undefined):
@@ -59,10 +72,10 @@ def IndexeddbCommand(args):
   """The CLI for processing a log/ldb file as indexeddb."""
   if args.source.name.endswith('.log'):
     records = list(
-        log.LogFileReader(args.source).GetKeyValueRecords())
+        log.FileReader(args.source).GetKeyValueRecords())
   elif args.source.name.endswith('.ldb'):
     records = list(
-        ldb.LdbFileReader(args.source).GetKeyValueRecords())
+        ldb.FileReader(args.source).GetKeyValueRecords())
   else:
     print('Unsupported file type.', file=sys.stderr)
     return
@@ -78,9 +91,17 @@ def IndexeddbCommand(args):
     _Output(record, to_json=args.json)
+def ManifestCommand(args):
+  """The CLI for processing MANIFEST aka Descriptor files."""
+  manifest_file = descriptor.FileReader(args.source)
+  for version_edit in manifest_file.GetVersionEdits():
+    _Output(version_edit, to_json=args.json)
 def LdbCommand(args):
   """The CLI for processing ldb files."""
-  ldb_file = ldb.LdbFileReader(args.source)
+  ldb_file = ldb.FileReader(args.source)
   if args.structure_type == 'blocks':
     # Prints block information.
@@ -95,7 +116,7 @@ def LdbCommand(args):
 def LogCommand(args):
   """The CLI for processing log files."""
-  log_file = log.LogFileReader(args.source)
+  log_file = log.FileReader(args.source)
   if args.structure_type == 'blocks':
     # Prints block information.
@@ -122,7 +143,8 @@ def App():
   """The CLI app entrypoint."""
   parser = argparse.ArgumentParser(
       prog='dfindexeddb',
-      description='A cli tool for the dfindexeddb package')
+      description='A cli tool for the dfindexeddb package',
+      epilog=f'Version {version.GetVersion()}')
   parser.add_argument(
       '-s', '--source', required=True, type=pathlib.Path,
@@ -147,6 +169,9 @@ def App():
           'records'])
   parser_log.set_defaults(func=LdbCommand)
+  parser_log = subparsers.add_parser('manifest')
+  parser_log.set_defaults(func=ManifestCommand)
   parser_log = subparsers.add_parser('indexeddb')
   parser_log.set_defaults(func=IndexeddbCommand)

{dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb/indexeddb/chromium.py RENAMED Viewed

@@ -31,7 +31,7 @@ T = TypeVar('T')
 @dataclass
-class KeyPrefix(utils.FromStreamMixin):
+class KeyPrefix(utils.FromDecoderMixin):
   """The IndexedDB key prefix.
   Attributes:
@@ -111,7 +111,7 @@ class KeyPrefix(utils.FromStreamMixin):
 @dataclass
-class IDBKey(utils.FromStreamMixin):
+class IDBKey(utils.FromDecoderMixin):
   """An IDBKey.
   Attributes:
@@ -199,7 +199,7 @@ class IDBKey(utils.FromStreamMixin):
 @dataclass
-class IDBKeyPath(utils.FromStreamMixin):
+class IDBKeyPath(utils.FromDecoderMixin):
   """An IDBKeyPath.
   Arguments:
@@ -256,7 +256,7 @@ class IDBKeyPath(utils.FromStreamMixin):
 @dataclass
-class BlobJournalEntry(utils.FromStreamMixin):
+class BlobJournalEntry(utils.FromDecoderMixin):
   """A blob journal entry.
   Attributes:
@@ -287,7 +287,7 @@ class BlobJournalEntry(utils.FromStreamMixin):
 @dataclass
-class BlobJournal(utils.FromStreamMixin):
+class BlobJournal(utils.FromDecoderMixin):
   """A BlobJournal.
   Attributes:
@@ -1226,7 +1226,7 @@ class IndexMetaDataKey(BaseIndexedDBKey):
 @dataclass
-class ExternalObjectEntry(utils.FromStreamMixin):
+class ExternalObjectEntry(utils.FromDecoderMixin):
   """An IndexedDB external object entry.
   Args:
@@ -1287,7 +1287,7 @@ class ExternalObjectEntry(utils.FromStreamMixin):
 @dataclass
-class IndexedDBExternalObject(utils.FromStreamMixin):
+class IndexedDBExternalObject(utils.FromDecoderMixin):
   """An IndexedDB external object.
   Args:
@@ -1346,7 +1346,7 @@ class IndexedDBRecord:
   @classmethod
   def FromLevelDBRecord(
-      cls, record: Union[ldb.LdbKeyValueRecord, log.ParsedInternalKey]
+      cls, record: Union[ldb.KeyValueRecord, log.ParsedInternalKey]
   ) -> IndexedDBRecord:
     """Returns an IndexedDBRecord from a ParsedInternalKey."""
     idb_key = IndexedDbKey.FromBytes(record.key, base_offset=record.offset)

dfindexeddb-20240305/dfindexeddb/leveldb/definitions.py ADDED Viewed

@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Definitions for LevelDB."""
+import enum
+PACKED_SEQUENCE_AND_TYPE_LENGTH = 8
+SEQUENCE_LENGTH = 7
+TYPE_LENGTH = 1
+class VersionEditTags(enum.IntEnum):
+  """VersionEdit tags."""
+  COMPARATOR = 1
+  LOG_NUMBER = 2
+  NEXT_FILE_NUMBER = 3
+  LAST_SEQUENCE = 4
+  COMPACT_POINTER = 5
+  DELETED_FILE = 6
+  NEW_FILE = 7
+  # 8 was used for large value refs
+  PREV_LOG_NUMBER = 9
+class LogFilePhysicalRecordType(enum.IntEnum):
+  """Log file physical record types."""
+  FULL = 1
+  FIRST = 2
+  MIDDLE = 3
+  LAST = 4

dfindexeddb-20240305/dfindexeddb/leveldb/descriptor.py ADDED Viewed

@@ -0,0 +1,335 @@
+# -*- coding: utf-8 -*-
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Parser for LevelDB Manifest files."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Generator, Optional
+from dfindexeddb import errors
+from dfindexeddb import utils
+from dfindexeddb.leveldb import definitions
+from dfindexeddb.leveldb import log
+@dataclass
+class InternalKey:
+  """An InternalKey.
+  Attributes:
+    offset: the offset.
+    user_key: the user key.
+    sequence_number: the sequence number.
+    key_type: the key type.
+  """
+  offset: int
+  user_key: bytes = field(repr=False)
+  sequence_number: int
+  key_type: int
+  @classmethod
+  def FromDecoder(
+      cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> InternalKey:
+    """Decodes a InternalKey from the current position of a LevelDBDecoder.
+    Args:
+      decoder: the LevelDBDecoder.
+      base_offset: the base offset.
+    Returns:
+      The InternalKey instance.
+    """
+    offset, slice_bytes = decoder.DecodeLengthPrefixedSlice()
+    if len(slice_bytes) < definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH:
+      raise errors.ParserError('Insufficient bytes to parse InternalKey')
+    user_key = slice_bytes[:-definitions.SEQUENCE_LENGTH]
+    sequence_number = int.from_bytes(
+        slice_bytes[-definitions.SEQUENCE_LENGTH:],
+        byteorder='little',
+        signed=False)
+    key_type = slice_bytes[-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
+    return cls(
+        offset=base_offset + offset,
+        user_key=user_key,
+        sequence_number=sequence_number,
+        key_type=key_type)
+@dataclass
+class NewFile(utils.FromDecoderMixin):
+  """A NewFile.
+  Attributes:
+    offset: the offset.
+    level: the level.
+    number: the number.
+    file_size: the file size.
+    smallest: the smallest internal key.
+    largest: the largest internal key.
+  """
+  offset: int
+  level: int
+  number: int
+  file_size: int
+  smallest: InternalKey
+  largest: InternalKey
+  @classmethod
+  def FromDecoder(
+      cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> NewFile:
+    """Decodes a NewFile from the current position of a LevelDBDecoder.
+    Args:
+      decoder: the LevelDBDecoder.
+      base_offset: the base offset.
+    Returns:
+      The NewFile instance.
+    """
+    offset, level = decoder.DecodeUint32Varint()
+    _, number = decoder.DecodeUint64Varint()
+    _, file_size = decoder.DecodeUint64Varint()
+    smallest = InternalKey.FromDecoder(decoder, base_offset=base_offset)
+    largest = InternalKey.FromDecoder(decoder, base_offset=base_offset)
+    return cls(
+        offset=offset + base_offset,
+        level=level,
+        number=number,
+        file_size=file_size,
+        smallest=smallest,
+        largest=largest)
+@dataclass
+class CompactPointer(utils.FromDecoderMixin):
+  """A NewFile.
+  Attributes:
+    offset: the offset.
+    level: the level.
+    key: the key bytes.
+  """
+  offset: int
+  level: int
+  key: bytes = field(repr=False)
+  @classmethod
+  def FromDecoder(
+      cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
+  ) -> CompactPointer:
+    """Decodes a CompactPointer from the current position of a LevelDBDecoder.
+    Args:
+      decoder: the LevelDBDecoder.
+      base_offset: the base offset.
+    Returns:
+      The CompactPointer instance.
+    """
+    offset, level = decoder.DecodeUint32Varint()
+    _, key = decoder.DecodeLengthPrefixedSlice()
+    return cls(offset=base_offset + offset, level=level, key=key)
+@dataclass
+class DeletedFile(utils.FromDecoderMixin):
+  """A DeletedFile.
+  Attributes:
+    offset: the offset.
+    level: the level.
+    number: the number.
+  """
+  offset: int
+  level: int
+  number: int
+  @classmethod
+  def FromDecoder(
+      cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> DeletedFile:
+    """Decodes a DeletedFile from the current position of a LevelDBDecoder.
+    Args:
+      decoder: the LevelDBDecoder.
+      base_offset: the base offset.
+    Returns:
+      The DeletedFile instance.
+    """
+    offset, level = decoder.DecodeUint32Varint()
+    _, number = decoder.DecodeUint64Varint()
+    return cls(offset=base_offset + offset, level=level, number=number)
+@dataclass
+class VersionEdit(utils.FromDecoderMixin):
+  """A VersionEdit is recorded in a LevelDB descriptor/manifest file.
+  Attributes:
+    offset: the offset where the VersionEdit was parsed.
+    comparator: the comparator.
+    log_number: the log number.
+    prev_log_number: the previous log number.
+    next_file_number: the next file number.
+    last_sequence: the last sequence.
+    compact_pointers: the list of CompactPointers.
+    deleted_files: the list of DeletedFiles.
+    new_files: the list of NewFiles.
+  """
+  offset: int
+  comparator: Optional[bytes] = None
+  log_number: Optional[int] = None
+  prev_log_number: Optional[int] = None
+  next_file_number: Optional[int] = None
+  last_sequence: Optional[int] = None
+  compact_pointers: list[CompactPointer] = field(default_factory=list)
+  deleted_files: list[DeletedFile] = field(default_factory=list)
+  new_files: list[NewFile] = field(default_factory=list)
+  @classmethod
+  def FromDecoder(
+      cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> VersionEdit:
+    """Decodes a VersionEdit from the current position of a LevelDBDecoder.
+    Args:
+      decoder: the LevelDBDecoder.
+      base_offset: the base offset.
+    Returns:
+      The VersionEdit instance.
+    Raises:
+      ParserError if an invalid VersionEditTag is parsed.
+    """
+    offset, tag_byte = decoder.DecodeUint32Varint()
+    version_edit = cls(offset=base_offset + offset)
+    while tag_byte:
+      try:
+        tag = definitions.VersionEditTags(tag_byte)
+      except TypeError as error:
+        raise errors.ParserError(
+            f'Invalid VersionEditTag at offset {offset}') from error
+      if tag == definitions.VersionEditTags.COMPARATOR:
+        _, version_edit.comparator = decoder.DecodeLengthPrefixedSlice()
+      elif tag == definitions.VersionEditTags.LOG_NUMBER:
+        _, version_edit.log_number = decoder.DecodeUint64Varint()
+      elif tag == definitions.VersionEditTags.PREV_LOG_NUMBER:
+        _, version_edit.prev_log_number = decoder.DecodeUint64Varint()
+      elif tag == definitions.VersionEditTags.NEXT_FILE_NUMBER:
+        _, version_edit.next_file_number = decoder.DecodeUint64Varint()
+      elif tag == definitions.VersionEditTags.LAST_SEQUENCE:
+        _, version_edit.last_sequence = decoder.DecodeUint64Varint()
+      elif tag == definitions.VersionEditTags.COMPACT_POINTER:
+        compact_pointer = CompactPointer.FromDecoder(
+            decoder=decoder, base_offset=base_offset + offset)
+        version_edit.compact_pointers.append(compact_pointer)
+      elif tag == definitions.VersionEditTags.DELETED_FILE:
+        deleted_file = DeletedFile.FromDecoder(
+            decoder=decoder, base_offset=base_offset + offset)
+        version_edit.deleted_files.append(deleted_file)
+      elif tag == definitions.VersionEditTags.NEW_FILE:
+        file_metadata = NewFile.FromDecoder(
+            decoder=decoder, base_offset=base_offset + offset)
+        version_edit.new_files.append(file_metadata)
+      if decoder.NumRemainingBytes() == 0:
+        break
+      offset, tag_byte = decoder.DecodeUint32Varint()
+    return version_edit
+class FileReader:
+  """A Descriptor file reader.
+  A DescriptorFileReader provides read-only sequential iteration of serialized
+  structures in a leveldb Descriptor file.  These structures include:
+  * blocks (log.Block)
+  * records (log.PhysicalRecords)
+  * version edits (VersionEdit)
+  """
+  def __init__(self, filename: str):
+    """Initializes the Descriptor a.k.a. MANIFEST file.
+    Args:
+      filename: the Descriptor filename (e.g. MANIFEST-000001)
+    """
+    self.filename = filename
+  def GetBlocks(self) -> Generator[log.Block, None, None]:
+    """Returns an iterator of Block instances.
+    A Descriptor file is composed of one or more blocks.
+    Yields:
+      Block
+    """
+    with open(self.filename, 'rb') as fh:
+      block = log.Block.FromStream(fh)
+      while block:
+        yield block
+        block = log.Block.FromStream(fh)
+  def GetPhysicalRecords(self) -> Generator[log.PhysicalRecord, None, None]:
+    """Returns an iterator of PhysicalRecord instances.
+    A block is composed of one or more physical records.
+    Yields:
+      PhysicalRecord
+    """
+    for block in self.GetBlocks():
+      yield from block.GetPhysicalRecords()
+  def GetVersionEdits(self) -> Generator[VersionEdit, None, None]:
+    """Returns an iterator of VersionEdit instances.
+    Depending on the VersionEdit size, it can be spread across one or
+    more physical records.
+    Yields:
+      VersionEdit
+    """
+    buffer = bytearray()
+    for physical_record in self.GetPhysicalRecords():
+      if (physical_record.record_type ==
+          definitions.LogFilePhysicalRecordType.FULL):
+        buffer = physical_record.contents
+        offset = physical_record.contents_offset + physical_record.base_offset
+        version_edit = VersionEdit.FromBytes(buffer, base_offset=offset)
+        yield version_edit
+        buffer = bytearray()
+      elif (physical_record.record_type ==
+            definitions.LogFilePhysicalRecordType.FIRST):
+        offset = physical_record.contents_offset + physical_record.base_offset
+        buffer = bytearray(physical_record.contents)
+      elif (physical_record.record_type ==
+            definitions.LogFilePhysicalRecordType.MIDDLE):
+        buffer.extend(bytearray(physical_record.contents))
+      elif (physical_record.record_type ==
+            definitions.LogFilePhysicalRecordType.LAST):
+        buffer.extend(bytearray(physical_record.contents))
+        version_edit = VersionEdit.FromBytes(buffer, base_offset=offset)
+        yield version_edit
+        buffer = bytearray()

{dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb/leveldb/ldb.py RENAMED Viewed

@@ -24,10 +24,11 @@ import snappy
 import zstd
 from dfindexeddb import utils
+from dfindexeddb.leveldb import definitions
 @dataclass
-class LdbKeyValueRecord:
+class KeyValueRecord:
   """A leveldb table key-value record.
   Attributes:
@@ -43,14 +44,10 @@ class LdbKeyValueRecord:
   sequence_number: int
   type: int
-  PACKED_SEQUENCE_AND_TYPE_LENGTH = 8
-  SEQUENCE_LENGTH = 7
-  TYPE_LENGTH = 1
   @classmethod
   def FromDecoder(
       cls, decoder: utils.LevelDBDecoder, block_offset: int, shared_key: bytes
-  ) -> Tuple[LdbKeyValueRecord, bytes]:
+  ) -> Tuple[KeyValueRecord, bytes]:
     """Decodes a ldb key value record.
     Args:
@@ -59,7 +56,7 @@ class LdbKeyValueRecord:
       shared_key: the shared key bytes.
     Returns:
-      A tuple of the parsed LdbKeyValueRecord and the updated shared key bytes.
+      A tuple of the parsed KeyValueRecord and the updated shared key bytes.
     """
     offset, shared_bytes = decoder.DecodeUint32Varint()
     _, unshared_bytes = decoder.DecodeUint32Varint()
@@ -68,17 +65,17 @@ class LdbKeyValueRecord:
     _, value = decoder.ReadBytes(value_length)
     shared_key = shared_key[:shared_bytes] + key_delta
-    key = shared_key[:-cls.PACKED_SEQUENCE_AND_TYPE_LENGTH]
+    key = shared_key[:-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
     sequence_number = int.from_bytes(
-        key[-cls.SEQUENCE_LENGTH:], byteorder='little', signed=False)
-    key_type = shared_key[-cls.PACKED_SEQUENCE_AND_TYPE_LENGTH]
+        key[-definitions.SEQUENCE_LENGTH:], byteorder='little', signed=False)
+    key_type = shared_key[-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
     return cls(offset + block_offset, key, value, sequence_number,
                key_type), shared_key
 @dataclass
-class LdbBlock:
+class Block:
   """A leveldb table block.
   Attributes:
@@ -111,11 +108,11 @@ class LdbBlock:
       return zstd.decompress(self.data)
     return self.data
-  def GetRecords(self) -> Iterable[LdbKeyValueRecord]:
+  def GetRecords(self) -> Iterable[KeyValueRecord]:
     """Returns an iterator over the key value records in the block.
     Yields:
-      LdbKeyValueRecords
+      KeyValueRecords
     """
     # get underlying block content, decompressing if required
     buffer = self.GetBuffer()
@@ -135,7 +132,7 @@ class LdbBlock:
     key = b''
     while decoder.stream.tell() < restarts_offset:
-      key_value_record, key = LdbKeyValueRecord.FromDecoder(
+      key_value_record, key = KeyValueRecord.FromDecoder(
           decoder, self.block_offset, key)
       yield key_value_record
@@ -145,7 +142,7 @@ class LdbBlock:
 @dataclass
-class BlockHandle:
+class BlockHandle(utils.FromDecoderMixin):
   """A handle to a block in the ldb file.
   Attributes:
@@ -159,14 +156,14 @@ class BlockHandle:
   BLOCK_TRAILER_SIZE = 5
-  def Load(self, stream: BinaryIO) -> LdbBlock:
+  def Load(self, stream: BinaryIO) -> Block:
     """Loads the block data.
     Args:
       stream: the binary stream of the ldb file.
     Returns:
-      a LdbBlock.
+      a Block.
     Raises:
       ValueError: if it could not read all of the block or block footer.
@@ -180,32 +177,35 @@ class BlockHandle:
     if len(footer) != self.BLOCK_TRAILER_SIZE:
       raise ValueError('Could not read all of the block footer')
-    return LdbBlock(self.offset, self.block_offset, self.length, data, footer)
+    return Block(self.offset, self.block_offset, self.length, data, footer)
   @classmethod
-  def FromStream(cls, stream: BinaryIO, base_offset: int = 0) -> BlockHandle:
-    """Reads a block handle from a binary stream.
+  def FromDecoder(
+      cls: BlockHandle,
+      decoder: utils.LevelDBDecoder,
+      base_offset: int = 0
+    ) -> BlockHandle:
+    """Decodes a BlockHandle from the current position of a LevelDBDecoder.
     Args:
-      stream: the binary stream.
+      decoder: the LevelDBDecoder.
       base_offset: the base offset.
     Returns:
-      A BlockHandle.
+      The BlockHandle instance.
     """
-    decoder = utils.LevelDBDecoder(stream)
     offset, block_offset = decoder.DecodeUint64Varint()
     _, length = decoder.DecodeUint64Varint()
     return cls(offset + base_offset, block_offset, length)
-class LdbFileReader:
+class FileReader:
   """A leveldb table (.ldb or .sst) file reader.
-  A LdbFileReader provides read-only sequential iteration of serialized
+  A Ldb FileReader provides read-only sequential iteration of serialized
   structures in a leveldb ldb file.  These structures include:
-  * blocks (LdbBlock)
-  * records (LdbKeyValueRecord)
+  * blocks (Block)
+  * records (KeyValueRecord)
   """
   FOOTER_SIZE = 48
@@ -234,11 +234,11 @@ class LdbFileReader:
       # self.meta_block = meta_handle.load(fh)  # TODO: support meta blocks
       self.index_block = index_handle.Load(fh)
-  def GetBlocks(self) -> Iterable[LdbBlock]:
-    """Returns an iterator of LdbBlocks.
+  def GetBlocks(self) -> Iterable[Block]:
+    """Returns an iterator of Blocks.
     Yields:
-      LdbBlock.
+      Block.
     """
     with open(self.filename, 'rb') as fh:
       for key_value_record in self.index_block.GetRecords():
@@ -247,11 +247,11 @@ class LdbFileReader:
             base_offset=key_value_record.offset)
         yield block_handle.Load(fh)
-  def GetKeyValueRecords(self) -> Iterable[LdbKeyValueRecord]:
-    """Returns an iterator of LdbKeyValueRecords.
+  def GetKeyValueRecords(self) -> Iterable[KeyValueRecord]:
+    """Returns an iterator of KeyValueRecords.
     Yields:
-      LdbKeyValueRecords.
+      KeyValueRecords.
     """
     for block in self.GetBlocks():
       yield from block.GetRecords()

{dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb/leveldb/log.py RENAMED Viewed

@@ -16,19 +16,11 @@
 from __future__ import annotations
 from dataclasses import dataclass, field
-from enum import IntEnum
 import io
 from typing import BinaryIO, Generator, Iterable, Optional
 from dfindexeddb import utils
-class LogFilePhysicalRecordType(IntEnum):
-  """LevelDB log file physical record types."""
-  FULL = 1
-  FIRST = 2
-  MIDDLE = 3
-  LAST = 4
+from dfindexeddb.leveldb import definitions
 @dataclass
@@ -38,28 +30,35 @@ class ParsedInternalKey:
   Attributes:
     offset: the offset of the record.
     type: the record type.
+    sequence_number: the sequence number (inferred from the relative location
+        the ParsedInternalKey in a WriteBatch.)
     key: the record key.
     value: the record value.
   """
   offset: int
   type: int
+  sequence_number: int
   key: bytes
   value: bytes
+  __type__: str = 'ParsedInternalKey'
   @classmethod
   def FromDecoder(
       cls,
       decoder: utils.LevelDBDecoder,
-      base_offset: int = 0
+      base_offset: int = 0,
+      sequence_number: int = 0,
   ) -> ParsedInternalKey:
     """Decodes an internal key value record.
     Args:
       decoder: the leveldb decoder.
-      base_offset: the base offset for the parsed key value record.
+      base_offset: the base offset for the parsed internal key value record.
+      sequence_number: the sequence number for the parsed internal key value
+          record.
     Returns:
-      a ParsedInternalKey
+      A ParsedInternalKey
     Raises:
       ValueError: if there is an invalid record type encountered.
@@ -72,15 +71,20 @@ class ParsedInternalKey:
       value =  b''
     else:
       raise ValueError(f'Invalid record type {record_type}')
-    return cls(base_offset + offset, record_type, key, value)
+    return cls(
+        offset=base_offset + offset,
+        type=record_type,
+        key=key,
+        value=value,
+        sequence_number=sequence_number)
 @dataclass
-class WriteBatch:
+class WriteBatch(utils.FromDecoderMixin):
   """A write batch from a leveldb log file.
   Attributes:
-    offset: the batch offset.
+    offset: the write batch offset.
     sequence_number: the batch sequence number.
     count: the number of ParsedInternalKey in the batch.
     records: the ParsedInternalKey parsed from the batch.
@@ -91,46 +95,38 @@ class WriteBatch:
   records: Iterable[ParsedInternalKey] = field(repr=False)
   @classmethod
-  def FromStream(
-    cls, stream: BinaryIO, base_offset: int = 0
+  def FromDecoder(
+    cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
   ) -> WriteBatch:
     """Parses a WriteBatch from a binary stream.
     Args:
-      stream: the binary stream to be parsed.
+      decoder: the LevelDBDecoder
       base_offset: the base offset of the Block from which the data is
           read from.
     Returns:
       A WriteBatch.
     """
-    decoder = utils.LevelDBDecoder(stream)
-    _, sequence_number = decoder.DecodeUint64()
+    offset, sequence_number = decoder.DecodeUint64()
     _, count = decoder.DecodeUint32()
     records = []
-    for _ in range(count):
-      record = ParsedInternalKey.FromDecoder(decoder, base_offset)
+    for relative_sequence_number in range(count):
+      record = ParsedInternalKey.FromDecoder(
+          decoder, base_offset + offset,
+          relative_sequence_number + sequence_number
+      )
       records.append(record)
-    return cls(base_offset, sequence_number, count, records)
-  @classmethod
-  def FromBytes(cls, data: bytes, base_offset: int = 0) -> WriteBatch:
-    """Parses a WriteBatch from bytes.
-    Args:
-      data: the bytes to be parsed.
-      base_offset: the base offset of the Block from which the data is
-          read from.
-    Returns:
-      A WriteBatch.
-    """
-    return cls.FromStream(io.BytesIO(data), base_offset)
+    return cls(
+        offset=base_offset + offset,
+        sequence_number=sequence_number,
+        count=count,
+        records=records)
 @dataclass
-class PhysicalRecord:
+class PhysicalRecord(utils.FromDecoderMixin):
   """A physical record from a leveldb log file.
   Attributes:
@@ -145,27 +141,30 @@ class PhysicalRecord:
   offset: int
   checksum: int
   length: int
-  record_type: LogFilePhysicalRecordType
+  record_type: definitions.LogFilePhysicalRecordType
   contents: bytes = field(repr=False)
   contents_offset: int
+  PHYSICAL_HEADER_LENGTH = 7
   @classmethod
-  def FromStream(
-      cls, stream: BinaryIO, base_offset: int = 0) -> PhysicalRecord:
-    """Parses a PhysicalRecord from a binary stream.
+  def FromDecoder(
+      cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
+  ) -> PhysicalRecord:
+    """Decodes a PhysicalRecord from the current position of a LevelDBDecoder.
     Args:
-      stream: the binary stream to be parsed.
+      decoder: the LevelDBDecoder.
       base_offset: the base offset of the WriteBatch from which the data is
           read from.
     Returns:
       A PhysicalRecord.
     """
-    decoder = utils.StreamDecoder(stream)
     offset, checksum = decoder.DecodeUint32()
     _, length = decoder.DecodeUint16()
-    record_type = LogFilePhysicalRecordType(decoder.DecodeUint8()[1])
+    record_type = definitions.LogFilePhysicalRecordType(
+        decoder.DecodeUint8()[1])
     contents_offset, contents = decoder.ReadBytes(length)
     return cls(
         base_offset=base_offset,
@@ -199,7 +198,7 @@ class Block:
     buffer = io.BytesIO(self.data)
     buffer_length = len(self.data)
-    while buffer.tell() < buffer_length:
+    while buffer.tell() + PhysicalRecord.PHYSICAL_HEADER_LENGTH < buffer_length:
       yield PhysicalRecord.FromStream(buffer, base_offset=self.offset)
   @classmethod
@@ -219,10 +218,10 @@ class Block:
     return cls(offset, data)
-class LogFileReader:
+class FileReader:
   """A leveldb log file reader.
-  A LogFileReader provides read-only sequential iteration of serialized
+  A Log FileReader provides read-only sequential iteration of serialized
   structures in a leveldb logfile.  These structures include:
   * blocks (Block)
   * phyiscal records (PhysicalRecord)
@@ -250,11 +249,10 @@ class LogFileReader:
       a Block
     """
     with open(self.filename, 'rb') as fh:
-      while True:
-        block = Block.FromStream(fh)
-        if not block:
-          break
+      block = Block.FromStream(fh)
+      while block:
         yield block
+        block = Block.FromStream(fh)
   def GetPhysicalRecords(self) -> Generator[PhysicalRecord, None, None]:
     """Returns an iterator of PhysicalRecord instances.
@@ -278,17 +276,21 @@ class LogFileReader:
     """
     buffer = bytearray()
     for physical_record in self.GetPhysicalRecords():
-      if physical_record.record_type == LogFilePhysicalRecordType.FULL:
+      if(physical_record.record_type ==
+         definitions.LogFilePhysicalRecordType.FULL):
         buffer = physical_record.contents
         offset = physical_record.contents_offset + physical_record.base_offset
         yield WriteBatch.FromBytes(buffer, base_offset=offset)
         buffer = bytearray()
-      elif physical_record.record_type == LogFilePhysicalRecordType.FIRST:
+      elif (physical_record.record_type
+            == definitions.LogFilePhysicalRecordType.FIRST):
         offset = physical_record.contents_offset + physical_record.base_offset
         buffer = bytearray(physical_record.contents)
-      elif physical_record.record_type == LogFilePhysicalRecordType.MIDDLE:
+      elif (physical_record.record_type ==
+            definitions.LogFilePhysicalRecordType.MIDDLE):
         buffer.extend(bytearray(physical_record.contents))
-      elif physical_record.record_type == LogFilePhysicalRecordType.LAST:
+      elif (physical_record.record_type ==
+            definitions.LogFilePhysicalRecordType.LAST):
         buffer.extend(bytearray(physical_record.contents))
         yield WriteBatch.FromBytes(buffer, base_offset=offset)
         buffer = bytearray()

{dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb/utils.py RENAMED Viewed

@@ -229,24 +229,30 @@ class LevelDBDecoder(StreamDecoder):
           f'Odd number of bytes encountered at offset {offset}')
     return offset, buffer.decode('utf-16-be')
+  def DecodeLengthPrefixedSlice(self) -> Tuple[int, bytes]:
+    """Returns a tuple of the offset of decoding and the byte 'slice'."""
+    offset, num_bytes = self.DecodeUint32Varint()
+    _, blob = self.ReadBytes(num_bytes)
+    return offset, blob
   def DecodeBlobWithLength(self) -> Tuple[int, bytes]:
     """Returns a tuple of a the offset of decoding and the binary blob."""
     offset, num_bytes = self.DecodeUint64Varint()
     _, blob = self.ReadBytes(num_bytes)
     return offset, blob
-  def DecodeStringWithLength(self) -> Tuple[int, str]:
+  def DecodeStringWithLength(self, encoding='utf-16-be') -> Tuple[int, str]:
     """Returns a tuple of the offset of decoding and the string value."""
     offset, length = self.DecodeUint64Varint()
     _, buffer = self.ReadBytes(length*2)
-    return offset, buffer.decode('utf-16-be')
+    return offset, buffer.decode(encoding=encoding)
 T = TypeVar('T')
-class FromStreamMixin:  # TODO: refactor leveldb parsers
-  """A mixin for dataclasses parsing their attributes from a binary stream."""
+class FromDecoderMixin:
+  """A mixin for parsing dataclass attributes using a LevelDBDecoder."""
   @classmethod
   def FromDecoder(
@@ -278,7 +284,7 @@ class FromStreamMixin:  # TODO: refactor leveldb parsers
       The class instance.
     """
     decoder = LevelDBDecoder(stream)
-    return cls.FromDecoder(decoder, base_offset)
+    return cls.FromDecoder(decoder=decoder, base_offset=base_offset)
   @classmethod
   def FromBytes(

{dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb/version.py RENAMED Viewed

@@ -14,7 +14,7 @@
 # limitations under the License.
 """Version information for dfIndexeddb."""
-__version__ = "20240301"
+__version__ = "20240305"
 def GetVersion():

{dfindexeddb-20240301 → dfindexeddb-20240305/dfindexeddb.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dfindexeddb
-Version: 20240301
+Version: 20240305
 Summary: dfindexeddb is an experimental Python tool for performing digital forensic analysis of IndexedDB and leveldb files.
 Author-email: Syd Pleno <sydp@google.com>
 Maintainer-email: dfIndexeddb Developers <dfindexeddb-dev@googlegroups.com>

{dfindexeddb-20240301 → dfindexeddb-20240305}/dfindexeddb.egg-info/SOURCES.txt RENAMED Viewed

@@ -20,5 +20,7 @@ dfindexeddb/indexeddb/chromium.py
 dfindexeddb/indexeddb/definitions.py
 dfindexeddb/indexeddb/v8.py
 dfindexeddb/leveldb/__init__.py
+dfindexeddb/leveldb/definitions.py
+dfindexeddb/leveldb/descriptor.py
 dfindexeddb/leveldb/ldb.py
 dfindexeddb/leveldb/log.py

{dfindexeddb-20240301 → dfindexeddb-20240305}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "dfindexeddb"
-version = "20240301"
+version = "20240305"
 requires-python = ">=3.8"
 description = "dfindexeddb is an experimental Python tool for performing digital forensic analysis of IndexedDB and leveldb files."
 license = {file = "LICENSE"}