PyPI - dfindexeddb - Versions diffs - 20240301__py3-none-any.whl → 20240324__py3-none-any.whl - Mend

dfindexeddb 20240301py3-none-any.whl → 20240324py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

dfindexeddb/indexeddb/blink.py +2 -1
dfindexeddb/indexeddb/chromium.py +12 -12
dfindexeddb/indexeddb/cli.py +101 -0
dfindexeddb/indexeddb/utils.py +0 -0
dfindexeddb/leveldb/cli.py +217 -0
dfindexeddb/leveldb/definitions.py +59 -0
dfindexeddb/leveldb/descriptor.py +334 -0
dfindexeddb/leveldb/ldb.py +53 -57
dfindexeddb/leveldb/log.py +78 -69
dfindexeddb/leveldb/record.py +102 -0
dfindexeddb/leveldb/utils.py +116 -0
dfindexeddb/utils.py +8 -43
dfindexeddb/version.py +1 -1
{dfindexeddb-20240301.dist-info → dfindexeddb-20240324.dist-info}/METADATA +46 -32
dfindexeddb-20240324.dist-info/RECORD +26 -0
{dfindexeddb-20240301.dist-info → dfindexeddb-20240324.dist-info}/WHEEL +1 -1
dfindexeddb-20240324.dist-info/entry_points.txt +3 -0
dfindexeddb/cli.py +0 -155
dfindexeddb-20240301.dist-info/RECORD +0 -20
dfindexeddb-20240301.dist-info/entry_points.txt +0 -2
{dfindexeddb-20240301.dist-info → dfindexeddb-20240324.dist-info}/AUTHORS +0 -0
{dfindexeddb-20240301.dist-info → dfindexeddb-20240324.dist-info}/LICENSE +0 -0
{dfindexeddb-20240301.dist-info → dfindexeddb-20240324.dist-info}/top_level.txt +0 -0

dfindexeddb/leveldb/descriptor.py ADDED Viewed

@@ -0,0 +1,334 @@
+# -*- coding: utf-8 -*-
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Parser for LevelDB Descriptor (MANIFEST) files."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Generator, Optional
+from dfindexeddb import errors
+from dfindexeddb.leveldb import definitions
+from dfindexeddb.leveldb import log
+from dfindexeddb.leveldb import utils
+@dataclass
+class InternalKey:
+  """An InternalKey.
+  Attributes:
+    offset: the offset.
+    user_key: the user key.
+    sequence_number: the sequence number.
+    key_type: the key type.
+  """
+  offset: int
+  user_key: bytes = field(repr=False)
+  sequence_number: int
+  key_type: int
+  @classmethod
+  def FromDecoder(
+      cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> InternalKey:
+    """Decodes an InternalKey from the current position of a LevelDBDecoder.
+    Args:
+      decoder: the LevelDBDecoder.
+      base_offset: the base offset.
+    Returns:
+      The InternalKey instance.
+    """
+    offset, slice_bytes = decoder.DecodeLengthPrefixedSlice()
+    if len(slice_bytes) < definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH:
+      raise errors.ParserError('Insufficient bytes to parse InternalKey')
+    user_key = slice_bytes[:-definitions.SEQUENCE_LENGTH]
+    sequence_number = int.from_bytes(
+        slice_bytes[-definitions.SEQUENCE_LENGTH:],
+        byteorder='little',
+        signed=False)
+    key_type = slice_bytes[-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
+    return cls(
+        offset=base_offset + offset,
+        user_key=user_key,
+        sequence_number=sequence_number,
+        key_type=key_type)
+@dataclass
+class NewFile(utils.FromDecoderMixin):
+  """A NewFile.
+  Attributes:
+    offset: the offset.
+    level: the level.
+    number: the file number.
+    file_size: the file size.
+    smallest: the smallest internal key.
+    largest: the largest internal key.
+  """
+  offset: int
+  level: int
+  number: int
+  file_size: int
+  smallest: InternalKey
+  largest: InternalKey
+  @classmethod
+  def FromDecoder(
+      cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> NewFile:
+    """Decodes a NewFile from the current position of a LevelDBDecoder.
+    Args:
+      decoder: the LevelDBDecoder.
+      base_offset: the base offset.
+    Returns:
+      The NewFile instance.
+    """
+    offset, level = decoder.DecodeUint32Varint()
+    _, number = decoder.DecodeUint64Varint()
+    _, file_size = decoder.DecodeUint64Varint()
+    smallest = InternalKey.FromDecoder(decoder, base_offset=base_offset)
+    largest = InternalKey.FromDecoder(decoder, base_offset=base_offset)
+    return cls(
+        offset=offset + base_offset,
+        level=level,
+        number=number,
+        file_size=file_size,
+        smallest=smallest,
+        largest=largest)
+@dataclass
+class CompactPointer(utils.FromDecoderMixin):
+  """A CompactPointer.
+  Attributes:
+    offset: the offset.
+    level: the level.
+    key: the key bytes.
+  """
+  offset: int
+  level: int
+  key: bytes = field(repr=False)
+  @classmethod
+  def FromDecoder(
+      cls, decoder: utils.LevelDBDecoder, base_offset: int = 0
+  ) -> CompactPointer:
+    """Decodes a CompactPointer from the current position of a LevelDBDecoder.
+    Args:
+      decoder: the LevelDBDecoder.
+      base_offset: the base offset.
+    Returns:
+      The CompactPointer instance.
+    """
+    offset, level = decoder.DecodeUint32Varint()
+    _, key = decoder.DecodeLengthPrefixedSlice()
+    return cls(offset=base_offset + offset, level=level, key=key)
+@dataclass
+class DeletedFile(utils.FromDecoderMixin):
+  """A DeletedFile.
+  Attributes:
+    offset: the offset.
+    level: the level.
+    number: the file number.
+  """
+  offset: int
+  level: int
+  number: int
+  @classmethod
+  def FromDecoder(
+      cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> DeletedFile:
+    """Decodes a DeletedFile from the current position of a LevelDBDecoder.
+    Args:
+      decoder: the LevelDBDecoder.
+      base_offset: the base offset.
+    Returns:
+      The DeletedFile instance.
+    """
+    offset, level = decoder.DecodeUint32Varint()
+    _, number = decoder.DecodeUint64Varint()
+    return cls(offset=base_offset + offset, level=level, number=number)
+@dataclass
+class VersionEdit(utils.FromDecoderMixin):
+  """A VersionEdit is recorded in a LevelDB descriptor/manifest file.
+  Attributes:
+    offset: the offset where the VersionEdit was parsed.
+    comparator: the comparator.
+    log_number: the log number.
+    prev_log_number: the previous log number.
+    next_file_number: the next file number.
+    last_sequence: the last sequence.
+    compact_pointers: the list of CompactPointers.
+    deleted_files: the list of DeletedFiles.
+    new_files: the list of NewFiles.
+  """
+  offset: int
+  comparator: Optional[bytes] = None
+  log_number: Optional[int] = None
+  prev_log_number: Optional[int] = None
+  next_file_number: Optional[int] = None
+  last_sequence: Optional[int] = None
+  compact_pointers: list[CompactPointer] = field(default_factory=list)
+  deleted_files: list[DeletedFile] = field(default_factory=list)
+  new_files: list[NewFile] = field(default_factory=list)
+  @classmethod
+  def FromDecoder(
+      cls, decoder: utils.LevelDBDecoder, base_offset: int = 0) -> VersionEdit:
+    """Decodes a VersionEdit from the current position of a LevelDBDecoder.
+    Args:
+      decoder: the LevelDBDecoder.
+      base_offset: the base offset.
+    Returns:
+      The VersionEdit instance.
+    Raises:
+      ParserError if an invalid VersionEditTag is parsed.
+    """
+    offset, tag_byte = decoder.DecodeUint32Varint()
+    version_edit = cls(offset=base_offset + offset)
+    while tag_byte:
+      try:
+        tag = definitions.VersionEditTags(tag_byte)
+      except TypeError as error:
+        raise errors.ParserError(
+            f'Invalid VersionEditTag at offset {offset}') from error
+      if tag == definitions.VersionEditTags.COMPARATOR:
+        _, version_edit.comparator = decoder.DecodeLengthPrefixedSlice()
+      elif tag == definitions.VersionEditTags.LOG_NUMBER:
+        _, version_edit.log_number = decoder.DecodeUint64Varint()
+      elif tag == definitions.VersionEditTags.PREV_LOG_NUMBER:
+        _, version_edit.prev_log_number = decoder.DecodeUint64Varint()
+      elif tag == definitions.VersionEditTags.NEXT_FILE_NUMBER:
+        _, version_edit.next_file_number = decoder.DecodeUint64Varint()
+      elif tag == definitions.VersionEditTags.LAST_SEQUENCE:
+        _, version_edit.last_sequence = decoder.DecodeUint64Varint()
+      elif tag == definitions.VersionEditTags.COMPACT_POINTER:
+        compact_pointer = CompactPointer.FromDecoder(
+            decoder=decoder, base_offset=base_offset + offset)
+        version_edit.compact_pointers.append(compact_pointer)
+      elif tag == definitions.VersionEditTags.DELETED_FILE:
+        deleted_file = DeletedFile.FromDecoder(
+            decoder=decoder, base_offset=base_offset + offset)
+        version_edit.deleted_files.append(deleted_file)
+      elif tag == definitions.VersionEditTags.NEW_FILE:
+        file_metadata = NewFile.FromDecoder(
+            decoder=decoder, base_offset=base_offset + offset)
+        version_edit.new_files.append(file_metadata)
+      if decoder.NumRemainingBytes() == 0:
+        break
+      offset, tag_byte = decoder.DecodeUint32Varint()
+    return version_edit
+class FileReader:
+  """A reader for Descriptor files.
+  A DescriptorFileReader provides read-only sequential iteration of serialized
+  structures in a leveldb Descriptor file.  These structures include:
+  * blocks (Block)
+  * records (PhysicalRecord)
+  * version edits (VersionEdit)
+  """
+  def __init__(self, filename: str):
+    """Initializes the Descriptor a.k.a. MANIFEST file.
+    Args:
+      filename: the Descriptor filename (e.g. MANIFEST-000001)
+    """
+    self.filename = filename
+  def GetBlocks(self) -> Generator[log.Block, None, None]:
+    """Returns an iterator of Block instances.
+    A Descriptor file is composed of one or more blocks.
+    Yields:
+      Block
+    """
+    with open(self.filename, 'rb') as fh:
+      block = log.Block.FromStream(fh)
+      while block:
+        yield block
+        block = log.Block.FromStream(fh)
+  def GetPhysicalRecords(self) -> Generator[log.PhysicalRecord, None, None]:
+    """Returns an iterator of PhysicalRecord instances.
+    A block is composed of one or more physical records.
+    Yields:
+      PhysicalRecord
+    """
+    for block in self.GetBlocks():
+      yield from block.GetPhysicalRecords()
+  def GetVersionEdits(self) -> Generator[VersionEdit, None, None]:
+    """Returns an iterator of VersionEdit instances.
+    Depending on the VersionEdit size, it can be spread across one or
+    more physical records.
+    Yields:
+      VersionEdit
+    """
+    buffer = bytearray()
+    for physical_record in self.GetPhysicalRecords():
+      if (physical_record.record_type ==
+          definitions.LogFilePhysicalRecordType.FULL):
+        buffer = physical_record.contents
+        offset = physical_record.contents_offset + physical_record.base_offset
+        version_edit = VersionEdit.FromBytes(buffer, base_offset=offset)
+        yield version_edit
+        buffer = bytearray()
+      elif (physical_record.record_type ==
+            definitions.LogFilePhysicalRecordType.FIRST):
+        offset = physical_record.contents_offset + physical_record.base_offset
+        buffer = bytearray(physical_record.contents)
+      elif (physical_record.record_type ==
+            definitions.LogFilePhysicalRecordType.MIDDLE):
+        buffer.extend(bytearray(physical_record.contents))
+      elif (physical_record.record_type ==
+            definitions.LogFilePhysicalRecordType.LAST):
+        buffer.extend(bytearray(physical_record.contents))
+        version_edit = VersionEdit.FromBytes(buffer, base_offset=offset)
+        yield version_edit
+        buffer = bytearray()

dfindexeddb/leveldb/ldb.py CHANGED Viewed

@@ -23,11 +23,12 @@ from typing import BinaryIO, Iterable, Tuple
 import snappy
 import zstd
-from dfindexeddb import utils
+from dfindexeddb.leveldb import definitions
+from dfindexeddb.leveldb import utils
 @dataclass
-class LdbKeyValueRecord:
+class KeyValueRecord:
   """A leveldb table key-value record.
   Attributes:
@@ -35,22 +36,18 @@ class LdbKeyValueRecord:
     key: the key of the record.
     value: the value of the record.
     sequence_number: the sequence number of the record.
-    type: the type of the record.
+    record_type: the type of the record.
   """
   offset: int
   key: bytes
   value: bytes
   sequence_number: int
-  type: int
-  PACKED_SEQUENCE_AND_TYPE_LENGTH = 8
-  SEQUENCE_LENGTH = 7
-  TYPE_LENGTH = 1
+  record_type: definitions.InternalRecordType
   @classmethod
   def FromDecoder(
       cls, decoder: utils.LevelDBDecoder, block_offset: int, shared_key: bytes
-  ) -> Tuple[LdbKeyValueRecord, bytes]:
+  ) -> Tuple[KeyValueRecord, bytes]:
     """Decodes a ldb key value record.
     Args:
@@ -59,7 +56,7 @@ class LdbKeyValueRecord:
       shared_key: the shared key bytes.
     Returns:
-      A tuple of the parsed LdbKeyValueRecord and the updated shared key bytes.
+      A tuple of the parsed KeyValueRecord and the updated shared key bytes.
     """
     offset, shared_bytes = decoder.DecodeUint32Varint()
     _, unshared_bytes = decoder.DecodeUint32Varint()
@@ -68,17 +65,21 @@ class LdbKeyValueRecord:
     _, value = decoder.ReadBytes(value_length)
     shared_key = shared_key[:shared_bytes] + key_delta
-    key = shared_key[:-cls.PACKED_SEQUENCE_AND_TYPE_LENGTH]
+    key = shared_key[:-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
     sequence_number = int.from_bytes(
-        key[-cls.SEQUENCE_LENGTH:], byteorder='little', signed=False)
-    key_type = shared_key[-cls.PACKED_SEQUENCE_AND_TYPE_LENGTH]
-    return cls(offset + block_offset, key, value, sequence_number,
-               key_type), shared_key
+        key[-definitions.SEQUENCE_LENGTH:], byteorder='little', signed=False)
+    key_type = shared_key[-definitions.PACKED_SEQUENCE_AND_TYPE_LENGTH]
+    record_type = definitions.InternalRecordType(key_type)
+    return cls(
+        offset=offset + block_offset,
+        key=key,
+        value=value,
+        sequence_number=sequence_number,
+        record_type=record_type), shared_key
 @dataclass
-class LdbBlock:
+class Block:
   """A leveldb table block.
   Attributes:
@@ -91,17 +92,13 @@ class LdbBlock:
   data: bytes = field(repr=False)
   footer: bytes  # 5 bytes = 1 byte compressed flag + 4 bytes checksum.
-  SNAPPY_COMPRESSED = 1
-  ZSTD_COMPRESSED = 2
-  RESTART_ENTRY_LENGTH = 4
   def IsSnappyCompressed(self) -> bool:
     """Returns true if the block is snappy compressed."""
-    return self.footer[0] == self.SNAPPY_COMPRESSED
+    return self.footer[0] == definitions.BlockCompressionType.SNAPPY
   def IsZstdCompressed(self) -> bool:
     """Returns true if the block is zstd compressed."""
-    return self.footer[0] == self.ZSTD_COMPRESSED
+    return self.footer[0] == definitions.BlockCompressionType.ZSTD
   def GetBuffer(self) -> bytes:
     """Returns the block buffer, decompressing if required."""
@@ -111,11 +108,11 @@ class LdbBlock:
       return zstd.decompress(self.data)
     return self.data
-  def GetRecords(self) -> Iterable[LdbKeyValueRecord]:
+  def GetRecords(self) -> Iterable[KeyValueRecord]:
     """Returns an iterator over the key value records in the block.
     Yields:
-      LdbKeyValueRecords
+      KeyValueRecords
     """
     # get underlying block content, decompressing if required
     buffer = self.GetBuffer()
@@ -124,10 +121,11 @@ class LdbBlock:
     # trailer of a block has the form:
     #    restarts: uint32[num_restarts]
     #    num_restarts: uint32
-    decoder.stream.seek(-self.RESTART_ENTRY_LENGTH, os.SEEK_END)
+    decoder.stream.seek(-definitions.BLOCK_RESTART_ENTRY_LENGTH, os.SEEK_END)
     _, num_restarts = decoder.DecodeUint32()
     restarts_offset = (
-        decoder.stream.tell()) - (num_restarts + 1) * self.RESTART_ENTRY_LENGTH
+        decoder.stream.tell()) - (
+            (num_restarts + 1) * definitions.BLOCK_RESTART_ENTRY_LENGTH)
     decoder.stream.seek(restarts_offset)
     _, offset = decoder.DecodeUint32()
@@ -135,7 +133,7 @@ class LdbBlock:
     key = b''
     while decoder.stream.tell() < restarts_offset:
-      key_value_record, key = LdbKeyValueRecord.FromDecoder(
+      key_value_record, key = KeyValueRecord.FromDecoder(
           decoder, self.block_offset, key)
       yield key_value_record
@@ -145,7 +143,7 @@ class LdbBlock:
 @dataclass
-class BlockHandle:
+class BlockHandle(utils.FromDecoderMixin):
   """A handle to a block in the ldb file.
   Attributes:
@@ -157,16 +155,14 @@ class BlockHandle:
   block_offset: int
   length: int
-  BLOCK_TRAILER_SIZE = 5
-  def Load(self, stream: BinaryIO) -> LdbBlock:
+  def Load(self, stream: BinaryIO) -> Block:
     """Loads the block data.
     Args:
       stream: the binary stream of the ldb file.
     Returns:
-      a LdbBlock.
+      a Block.
     Raises:
       ValueError: if it could not read all of the block or block footer.
@@ -176,41 +172,41 @@ class BlockHandle:
     if len(data) != self.length:
       raise ValueError('Could not read all of the block')
-    footer = stream.read(self.BLOCK_TRAILER_SIZE)
-    if len(footer) != self.BLOCK_TRAILER_SIZE:
+    footer = stream.read(definitions.BLOCK_TRAILER_SIZE)
+    if len(footer) != definitions.BLOCK_TRAILER_SIZE:
       raise ValueError('Could not read all of the block footer')
-    return LdbBlock(self.offset, self.block_offset, self.length, data, footer)
+    return Block(self.offset, self.block_offset, self.length, data, footer)
   @classmethod
-  def FromStream(cls, stream: BinaryIO, base_offset: int = 0) -> BlockHandle:
-    """Reads a block handle from a binary stream.
+  def FromDecoder(
+      cls: BlockHandle,
+      decoder: utils.LevelDBDecoder,
+      base_offset: int = 0
+    ) -> BlockHandle:
+    """Decodes a BlockHandle from the current position of a LevelDBDecoder.
     Args:
-      stream: the binary stream.
+      decoder: the LevelDBDecoder.
       base_offset: the base offset.
     Returns:
-      A BlockHandle.
+      The BlockHandle instance.
     """
-    decoder = utils.LevelDBDecoder(stream)
     offset, block_offset = decoder.DecodeUint64Varint()
     _, length = decoder.DecodeUint64Varint()
     return cls(offset + base_offset, block_offset, length)
-class LdbFileReader:
+class FileReader:
   """A leveldb table (.ldb or .sst) file reader.
-  A LdbFileReader provides read-only sequential iteration of serialized
+  A Ldb FileReader provides read-only sequential iteration of serialized
   structures in a leveldb ldb file.  These structures include:
-  * blocks (LdbBlock)
-  * records (LdbKeyValueRecord)
+  * blocks (Block)
+  * records (KeyValueRecord)
   """
-  FOOTER_SIZE = 48
-  MAGIC = b'\x57\xfb\x80\x8b\x24\x75\x47\xdb'
   def __init__(self, filename: str):
     """Initializes the LogFile.
@@ -222,11 +218,11 @@ class LdbFileReader:
     """
     self.filename = filename
     with open(self.filename, 'rb') as fh:
-      fh.seek(-len(self.MAGIC), os.SEEK_END)
-      if fh.read(len(self.MAGIC)) != self.MAGIC:
+      fh.seek(-len(definitions.TABLE_MAGIC), os.SEEK_END)
+      if fh.read(len(definitions.TABLE_MAGIC)) != definitions.TABLE_MAGIC:
         raise ValueError(f'Invalid magic number in {self.filename}')
-      fh.seek(-self.FOOTER_SIZE, os.SEEK_END)
+      fh.seek(-definitions.TABLE_FOOTER_SIZE, os.SEEK_END)
       # meta_handle, need to read first due to variable integers
       _ = BlockHandle.FromStream(fh)
       index_handle = BlockHandle.FromStream(fh)
@@ -234,11 +230,11 @@ class LdbFileReader:
       # self.meta_block = meta_handle.load(fh)  # TODO: support meta blocks
       self.index_block = index_handle.Load(fh)
-  def GetBlocks(self) -> Iterable[LdbBlock]:
-    """Returns an iterator of LdbBlocks.
+  def GetBlocks(self) -> Iterable[Block]:
+    """Returns an iterator of Blocks.
     Yields:
-      LdbBlock.
+      Block.
     """
     with open(self.filename, 'rb') as fh:
       for key_value_record in self.index_block.GetRecords():
@@ -247,11 +243,11 @@ class LdbFileReader:
             base_offset=key_value_record.offset)
         yield block_handle.Load(fh)
-  def GetKeyValueRecords(self) -> Iterable[LdbKeyValueRecord]:
-    """Returns an iterator of LdbKeyValueRecords.
+  def GetKeyValueRecords(self) -> Iterable[KeyValueRecord]:
+    """Returns an iterator of KeyValueRecords.
     Yields:
-      LdbKeyValueRecords.
+      KeyValueRecords.
     """
     for block in self.GetBlocks():
       yield from block.GetRecords()

dfindexeddb 20240301__py3-none-any.whl → 20240324__py3-none-any.whl

dfindexeddb 20240301py3-none-any.whl → 20240324py3-none-any.whl