dfindexeddb 20240301__py3-none-any.whl → 20240324__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -86,7 +86,8 @@ class V8ScriptValueDecoder:
86
86
  NotImplementedError: when called.
87
87
  """
88
88
  tag = self.ReadTag()
89
- raise NotImplementedError(f'V8ScriptValueDecoder.ReadHostObject - {tag}')
89
+ raise NotImplementedError(
90
+ f'V8ScriptValueDecoder.ReadHostObject - {tag.name}')
90
91
 
91
92
  def Deserialize(self) -> Any:
92
93
  """Deserializes a Blink SSV.
@@ -20,18 +20,18 @@ import io
20
20
  from typing import Any, BinaryIO, Optional, Tuple, Type, TypeVar, Union
21
21
 
22
22
  from dfindexeddb import errors
23
- from dfindexeddb import utils
24
23
  from dfindexeddb.indexeddb import blink
25
24
  from dfindexeddb.indexeddb import definitions
26
25
  from dfindexeddb.leveldb import ldb
27
26
  from dfindexeddb.leveldb import log
27
+ from dfindexeddb.leveldb import utils
28
28
 
29
29
 
30
30
  T = TypeVar('T')
31
31
 
32
32
 
33
33
  @dataclass
34
- class KeyPrefix(utils.FromStreamMixin):
34
+ class KeyPrefix(utils.FromDecoderMixin):
35
35
  """The IndexedDB key prefix.
36
36
 
37
37
  Attributes:
@@ -111,7 +111,7 @@ class KeyPrefix(utils.FromStreamMixin):
111
111
 
112
112
 
113
113
  @dataclass
114
- class IDBKey(utils.FromStreamMixin):
114
+ class IDBKey(utils.FromDecoderMixin):
115
115
  """An IDBKey.
116
116
 
117
117
  Attributes:
@@ -199,7 +199,7 @@ class IDBKey(utils.FromStreamMixin):
199
199
 
200
200
 
201
201
  @dataclass
202
- class IDBKeyPath(utils.FromStreamMixin):
202
+ class IDBKeyPath(utils.FromDecoderMixin):
203
203
  """An IDBKeyPath.
204
204
 
205
205
  Arguments:
@@ -256,7 +256,7 @@ class IDBKeyPath(utils.FromStreamMixin):
256
256
 
257
257
 
258
258
  @dataclass
259
- class BlobJournalEntry(utils.FromStreamMixin):
259
+ class BlobJournalEntry(utils.FromDecoderMixin):
260
260
  """A blob journal entry.
261
261
 
262
262
  Attributes:
@@ -287,7 +287,7 @@ class BlobJournalEntry(utils.FromStreamMixin):
287
287
 
288
288
 
289
289
  @dataclass
290
- class BlobJournal(utils.FromStreamMixin):
290
+ class BlobJournal(utils.FromDecoderMixin):
291
291
  """A BlobJournal.
292
292
 
293
293
  Attributes:
@@ -570,7 +570,7 @@ class EarlistCompactionTimeKey(BaseIndexedDBKey):
570
570
  class ScopesPrefixKey(BaseIndexedDBKey):
571
571
  """A scopes prefix IndexedDB key."""
572
572
 
573
- def DecodeValue(self, decoder: utils.StreamDecoder) -> Optional[bytes]:
573
+ def DecodeValue(self, decoder: utils.LevelDBDecoder) -> Optional[bytes]:
574
574
  """Decodes the scopes prefix value."""
575
575
  if decoder.NumRemainingBytes:
576
576
  return decoder.ReadBytes()[1]
@@ -578,7 +578,7 @@ class ScopesPrefixKey(BaseIndexedDBKey):
578
578
 
579
579
  @classmethod
580
580
  def FromDecoder(
581
- cls, decoder: utils.StreamDecoder, key_prefix: KeyPrefix,
581
+ cls, decoder: utils.LevelDBDecoder, key_prefix: KeyPrefix,
582
582
  base_offset: int = 0
583
583
  ) -> ScopesPrefixKey:
584
584
  """Decodes the scopes prefix key."""
@@ -1226,7 +1226,7 @@ class IndexMetaDataKey(BaseIndexedDBKey):
1226
1226
 
1227
1227
 
1228
1228
  @dataclass
1229
- class ExternalObjectEntry(utils.FromStreamMixin):
1229
+ class ExternalObjectEntry(utils.FromDecoderMixin):
1230
1230
  """An IndexedDB external object entry.
1231
1231
 
1232
1232
  Args:
@@ -1287,7 +1287,7 @@ class ExternalObjectEntry(utils.FromStreamMixin):
1287
1287
 
1288
1288
 
1289
1289
  @dataclass
1290
- class IndexedDBExternalObject(utils.FromStreamMixin):
1290
+ class IndexedDBExternalObject(utils.FromDecoderMixin):
1291
1291
  """An IndexedDB external object.
1292
1292
 
1293
1293
  Args:
@@ -1346,7 +1346,7 @@ class IndexedDBRecord:
1346
1346
 
1347
1347
  @classmethod
1348
1348
  def FromLevelDBRecord(
1349
- cls, record: Union[ldb.LdbKeyValueRecord, log.ParsedInternalKey]
1349
+ cls, record: Union[ldb.KeyValueRecord, log.ParsedInternalKey]
1350
1350
  ) -> IndexedDBRecord:
1351
1351
  """Returns an IndexedDBRecord from a ParsedInternalKey."""
1352
1352
  idb_key = IndexedDbKey.FromBytes(record.key, base_offset=record.offset)
@@ -1357,4 +1357,4 @@ class IndexedDBRecord:
1357
1357
  value=idb_value,
1358
1358
  sequence_number=record.sequence_number if hasattr(
1359
1359
  record, 'sequence_number') else None,
1360
- type=record.type)
1360
+ type=record.record_type)
@@ -0,0 +1,101 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright 2024 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # https://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """A CLI tool for dfindexeddb."""
16
+ import argparse
17
+ import dataclasses
18
+ from datetime import datetime
19
+ import json
20
+ import pathlib
21
+ import sys
22
+ import traceback
23
+
24
+ from dfindexeddb import errors
25
+ from dfindexeddb import version
26
+ from dfindexeddb.leveldb import record as leveldb_record
27
+ from dfindexeddb.indexeddb import chromium
28
+ from dfindexeddb.indexeddb import v8
29
+
30
+
31
+ _VALID_PRINTABLE_CHARACTERS = (
32
+ ' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' +
33
+ '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~.')
34
+
35
+
36
+ class Encoder(json.JSONEncoder):
37
+ """A JSON encoder class for dfindexeddb fields."""
38
+ def default(self, o):
39
+ if dataclasses.is_dataclass(o):
40
+ o_dict = dataclasses.asdict(o)
41
+ return o_dict
42
+ if isinstance(o, bytes):
43
+ out = []
44
+ for x in o:
45
+ if chr(x) not in _VALID_PRINTABLE_CHARACTERS:
46
+ out.append(f'\\x{x:02X}')
47
+ else:
48
+ out.append(chr(x))
49
+ return ''.join(out)
50
+ if isinstance(o, datetime):
51
+ return o.isoformat()
52
+ if isinstance(o, v8.Undefined):
53
+ return "<undefined>"
54
+ if isinstance(o, v8.Null):
55
+ return "<null>"
56
+ if isinstance(o, set):
57
+ return list(o)
58
+ if isinstance(o, v8.RegExp):
59
+ return str(o)
60
+ return json.JSONEncoder.default(self, o)
61
+
62
+
63
+ def _Output(structure, to_json=False):
64
+ """Helper method to output parsed structure to stdout."""
65
+ if to_json:
66
+ print(json.dumps(structure, indent=2, cls=Encoder))
67
+ else:
68
+ print(structure)
69
+
70
+
71
+ def IndexeddbCommand(args):
72
+ """The CLI for processing a log/ldb file as indexeddb."""
73
+ for db_record in leveldb_record.LevelDBRecord.FromDir(args.source):
74
+ record = db_record.record
75
+ try:
76
+ db_record.record = chromium.IndexedDBRecord.FromLevelDBRecord(record)
77
+ except(
78
+ errors.ParserError,
79
+ errors.DecoderError,
80
+ NotImplementedError) as err:
81
+ print(
82
+ (f'Error parsing blink value: {err} for {record.__class__.__name__} '
83
+ f'at offset {record.offset} in {db_record.path}'), file=sys.stderr)
84
+ print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
85
+ _Output(db_record, to_json=args.json)
86
+
87
+
88
+ def App():
89
+ """The CLI app entrypoint for dfindexeddb."""
90
+ parser = argparse.ArgumentParser(
91
+ prog='dfindexeddb',
92
+ description='A cli tool for parsing indexeddb files',
93
+ epilog=f'Version {version.GetVersion()}')
94
+ parser.add_argument(
95
+ '-s', '--source', required=True, type=pathlib.Path,
96
+ help='The source leveldb folder')
97
+ parser.add_argument('--json', action='store_true', help='Output as JSON')
98
+ parser.set_defaults(func=IndexeddbCommand)
99
+
100
+ args = parser.parse_args()
101
+ args.func(args)
File without changes
@@ -0,0 +1,217 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright 2024 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # https://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """A CLI tool for leveldb files."""
16
+ import argparse
17
+ import dataclasses
18
+ from datetime import datetime
19
+ import json
20
+ import pathlib
21
+
22
+ from dfindexeddb import version
23
+ from dfindexeddb.leveldb import descriptor
24
+ from dfindexeddb.leveldb import ldb
25
+ from dfindexeddb.leveldb import log
26
+ from dfindexeddb.leveldb import record
27
+
28
+
29
+ _VALID_PRINTABLE_CHARACTERS = (
30
+ ' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' +
31
+ '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~.')
32
+
33
+
34
+ class Encoder(json.JSONEncoder):
35
+ """A JSON encoder class for dfleveldb fields."""
36
+
37
+ def default(self, o):
38
+ """Returns a serializable object for o."""
39
+ if dataclasses.is_dataclass(o):
40
+ o_dict = dataclasses.asdict(o)
41
+ return o_dict
42
+ if isinstance(o, bytes):
43
+ out = []
44
+ for x in o:
45
+ if chr(x) not in _VALID_PRINTABLE_CHARACTERS:
46
+ out.append(f'\\x{x:02X}')
47
+ else:
48
+ out.append(chr(x))
49
+ return ''.join(out)
50
+ if isinstance(o, datetime):
51
+ return o.isoformat()
52
+ if isinstance(o, set):
53
+ return list(o)
54
+ return json.JSONEncoder.default(self, o)
55
+
56
+
57
+ def _Output(structure, to_json=False):
58
+ """Helper method to output parsed structure to stdout."""
59
+ if to_json:
60
+ print(json.dumps(structure, indent=2, cls=Encoder))
61
+ else:
62
+ print(structure)
63
+
64
+
65
+ def DbCommand(args):
66
+ """The CLI for processing leveldb folders."""
67
+ for rec in record.LevelDBRecord.FromDir(args.source):
68
+ _Output(rec, to_json=args.json)
69
+
70
+
71
+ def LdbCommand(args):
72
+ """The CLI for processing ldb files."""
73
+ ldb_file = ldb.FileReader(args.source)
74
+
75
+ if args.structure_type == 'blocks':
76
+ # Prints block information.
77
+ for block in ldb_file.GetBlocks():
78
+ _Output(block, to_json=args.json)
79
+
80
+ elif args.structure_type == 'records' or not args.structure_type:
81
+ # Prints key value record information.
82
+ for key_value_record in ldb_file.GetKeyValueRecords():
83
+ _Output(key_value_record, to_json=args.json)
84
+
85
+ else:
86
+ print(f'{args.structure_type} is not supported for ldb files.')
87
+
88
+
89
+ def LogCommand(args):
90
+ """The CLI for processing log files."""
91
+ log_file = log.FileReader(args.source)
92
+
93
+ if args.structure_type == 'blocks':
94
+ # Prints block information.
95
+ for block in log_file.GetBlocks():
96
+ _Output(block, to_json=args.json)
97
+
98
+ elif args.structure_type == 'physical_records':
99
+ # Prints log file physical record information.
100
+ for log_file_record in log_file.GetPhysicalRecords():
101
+ _Output(log_file_record, to_json=args.json)
102
+
103
+ elif args.structure_type == 'write_batches':
104
+ # Prints log file batch information.
105
+ for batch in log_file.GetWriteBatches():
106
+ _Output(batch, to_json=args.json)
107
+
108
+ elif (args.structure_type in ('parsed_internal_key', 'records')
109
+ or not args.structure_type):
110
+ # Prints key value record information.
111
+ for internal_key_record in log_file.GetParsedInternalKeys():
112
+ _Output(internal_key_record, to_json=args.json)
113
+
114
+ else:
115
+ print(f'{args.structure_type} is not supported for log files.')
116
+
117
+
118
+ def DescriptorCommand(args):
119
+ """The CLI for processing descriptor (MANIFEST) files."""
120
+ manifest_file = descriptor.FileReader(args.source)
121
+
122
+ if args.structure_type == 'blocks':
123
+ # Prints block information.
124
+ for block in manifest_file.GetBlocks():
125
+ _Output(block, to_json=args.json)
126
+
127
+ elif args.structure_type == 'physical_records':
128
+ # Prints log file physical record information.
129
+ for log_file_record in manifest_file.GetPhysicalRecords():
130
+ _Output(log_file_record, to_json=args.json)
131
+
132
+ elif (args.structure_type == 'versionedit'
133
+ or not args.structure_type):
134
+ for version_edit in manifest_file.GetVersionEdits():
135
+ _Output(version_edit, to_json=args.json)
136
+
137
+ else:
138
+ print(f'{args.structure_type} is not supported for descriptor files.')
139
+
140
+ def App():
141
+ """The CLI app entrypoint for parsing leveldb files."""
142
+ parser = argparse.ArgumentParser(
143
+ prog='dfleveldb',
144
+ description='A cli tool for parsing leveldb files',
145
+ epilog=f'Version {version.GetVersion()}')
146
+
147
+ subparsers = parser.add_subparsers()
148
+
149
+ parser_db = subparsers.add_parser(
150
+ 'db', help='Parse a directory as leveldb.')
151
+ parser_db.add_argument(
152
+ '-s', '--source',
153
+ required=True,
154
+ type=pathlib.Path,
155
+ help='The source leveldb directory')
156
+ parser_db.add_argument(
157
+ '--json', action='store_true', help='Output as JSON')
158
+ parser_db.set_defaults(func=DbCommand)
159
+
160
+ parser_log = subparsers.add_parser(
161
+ 'log', help='Parse a leveldb log file.')
162
+ parser_log.add_argument(
163
+ '-s', '--source',
164
+ required=True,
165
+ type=pathlib.Path,
166
+ help='The source leveldb file')
167
+ parser_log.add_argument(
168
+ '--json', action='store_true', help='Output as JSON')
169
+ parser_log.add_argument(
170
+ '-t',
171
+ '--structure_type',
172
+ choices=[
173
+ 'blocks',
174
+ 'physical_records',
175
+ 'write_batches',
176
+ 'parsed_internal_key'])
177
+ parser_log.set_defaults(func=LogCommand)
178
+
179
+ parser_ldb = subparsers.add_parser(
180
+ 'ldb', help='Parse a leveldb table (.ldb) file.')
181
+ parser_ldb.add_argument(
182
+ '-s', '--source',
183
+ required=True,
184
+ type=pathlib.Path,
185
+ help='The source leveldb file')
186
+ parser_ldb.add_argument(
187
+ '--json', action='store_true', help='Output as JSON')
188
+ parser_ldb.add_argument(
189
+ '-t',
190
+ '--structure_type',
191
+ choices=[
192
+ 'blocks',
193
+ 'records'])
194
+ parser_ldb.set_defaults(func=LdbCommand)
195
+
196
+ parser_descriptor = subparsers.add_parser(
197
+ 'descriptor', help='Parse a leveldb descriptor (MANIFEST) file.')
198
+ parser_descriptor.add_argument(
199
+ '-s', '--source',
200
+ required=True,
201
+ type=pathlib.Path,
202
+ help='The source leveldb file')
203
+ parser_descriptor.add_argument(
204
+ '--json', action='store_true', help='Output as JSON')
205
+ parser_descriptor.add_argument(
206
+ '-t',
207
+ '--structure_type',
208
+ choices=[
209
+ 'blocks', 'physical_records', 'versionedit'])
210
+ parser_descriptor.set_defaults(func=DescriptorCommand)
211
+
212
+ args = parser.parse_args()
213
+
214
+ if not hasattr(args, 'func'):
215
+ parser.print_usage()
216
+ else:
217
+ args.func(args)
@@ -0,0 +1,59 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright 2024 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # https://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Definitions for LevelDB."""
16
+
17
+ import enum
18
+
19
+ BLOCK_RESTART_ENTRY_LENGTH = 4
20
+ BLOCK_TRAILER_SIZE = 5
21
+ TABLE_FOOTER_SIZE = 48
22
+ TABLE_MAGIC = b'\x57\xfb\x80\x8b\x24\x75\x47\xdb'
23
+
24
+ PACKED_SEQUENCE_AND_TYPE_LENGTH = 8
25
+ SEQUENCE_LENGTH = 7
26
+ TYPE_LENGTH = 1
27
+
28
+
29
+ class BlockCompressionType(enum.IntEnum):
30
+ """Block compression types."""
31
+ SNAPPY = 1
32
+ ZSTD = 2
33
+
34
+
35
+ class VersionEditTags(enum.IntEnum):
36
+ """VersionEdit tags."""
37
+ COMPARATOR = 1
38
+ LOG_NUMBER = 2
39
+ NEXT_FILE_NUMBER = 3
40
+ LAST_SEQUENCE = 4
41
+ COMPACT_POINTER = 5
42
+ DELETED_FILE = 6
43
+ NEW_FILE = 7
44
+ # 8 was used for large value refs
45
+ PREV_LOG_NUMBER = 9
46
+
47
+
48
+ class LogFilePhysicalRecordType(enum.IntEnum):
49
+ """Log file physical record types."""
50
+ FULL = 1
51
+ FIRST = 2
52
+ MIDDLE = 3
53
+ LAST = 4
54
+
55
+
56
+ class InternalRecordType(enum.IntEnum):
57
+ """Internal record types."""
58
+ DELETED = 0
59
+ VALUE = 1