dfindexeddb 20240305__py3-none-any.whl → 20240331__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dfindexeddb/cli.py DELETED
@@ -1,180 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # Copyright 2024 Google LLC
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # https://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
- """A CLI tool for dfindexeddb."""
16
- import argparse
17
- import dataclasses
18
- from datetime import datetime
19
- import json
20
- import pathlib
21
- import sys
22
- import traceback
23
-
24
- from dfindexeddb import errors
25
- from dfindexeddb import version
26
- from dfindexeddb.leveldb import descriptor
27
- from dfindexeddb.leveldb import ldb
28
- from dfindexeddb.leveldb import log
29
- from dfindexeddb.indexeddb import chromium
30
- from dfindexeddb.indexeddb import v8
31
-
32
-
33
- _VALID_PRINTABLE_CHARACTERS = (
34
- 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' +
35
- '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~.')
36
-
37
-
38
- class Encoder(json.JSONEncoder):
39
- """A JSON encoder class for dfindexeddb fields."""
40
- def default(self, o):
41
- if isinstance(o, bytes):
42
- out = []
43
- for x in o:
44
- if chr(x) not in _VALID_PRINTABLE_CHARACTERS:
45
- out.append(f'\\x{x:02X}')
46
- else:
47
- out.append(chr(x))
48
- return ''.join(out)
49
- if isinstance(o, datetime):
50
- return o.isoformat()
51
- if isinstance(o, v8.Undefined):
52
- return "<undefined>"
53
- if isinstance(o, v8.Null):
54
- return "<null>"
55
- if isinstance(o, set):
56
- return list(o)
57
- if isinstance(o, v8.RegExp):
58
- return str(o)
59
- return json.JSONEncoder.default(self, o)
60
-
61
-
62
- def _Output(structure, to_json=False):
63
- """Helper method to output parsed structure to stdout."""
64
- if to_json:
65
- structure_dict = dataclasses.asdict(structure)
66
- print(json.dumps(structure_dict, indent=2, cls=Encoder))
67
- else:
68
- print(structure)
69
-
70
-
71
- def IndexeddbCommand(args):
72
- """The CLI for processing a log/ldb file as indexeddb."""
73
- if args.source.name.endswith('.log'):
74
- records = list(
75
- log.FileReader(args.source).GetKeyValueRecords())
76
- elif args.source.name.endswith('.ldb'):
77
- records = list(
78
- ldb.FileReader(args.source).GetKeyValueRecords())
79
- else:
80
- print('Unsupported file type.', file=sys.stderr)
81
- return
82
-
83
- for record in records:
84
- try:
85
- record = chromium.IndexedDBRecord.FromLevelDBRecord(record)
86
- except (errors.ParserError, errors.DecoderError) as err:
87
- print(
88
- (f'Error parsing blink value: {err} for {record.__class__.__name__} '
89
- f'at offset {record.offset}'), file=sys.stderr)
90
- print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
91
- _Output(record, to_json=args.json)
92
-
93
-
94
- def ManifestCommand(args):
95
- """The CLI for processing MANIFEST aka Descriptor files."""
96
- manifest_file = descriptor.FileReader(args.source)
97
-
98
- for version_edit in manifest_file.GetVersionEdits():
99
- _Output(version_edit, to_json=args.json)
100
-
101
-
102
- def LdbCommand(args):
103
- """The CLI for processing ldb files."""
104
- ldb_file = ldb.FileReader(args.source)
105
-
106
- if args.structure_type == 'blocks':
107
- # Prints block information.
108
- for block in ldb_file.GetBlocks():
109
- _Output(block, to_json=args.json)
110
-
111
- elif args.structure_type == 'records':
112
- # Prints key value record information.
113
- for record in ldb_file.GetKeyValueRecords():
114
- _Output(record, to_json=args.json)
115
-
116
-
117
- def LogCommand(args):
118
- """The CLI for processing log files."""
119
- log_file = log.FileReader(args.source)
120
-
121
- if args.structure_type == 'blocks':
122
- # Prints block information.
123
- for block in log_file.GetBlocks():
124
- _Output(block, to_json=args.json)
125
-
126
- elif args.structure_type == 'physical_records':
127
- # Prints log file physical record information.
128
- for log_file_record in log_file.GetPhysicalRecords():
129
- _Output(log_file_record, to_json=args.json)
130
-
131
- elif args.structure_type == 'write_batches':
132
- # Prints log file batch information.
133
- for batch in log_file.GetWriteBatches():
134
- _Output(batch, to_json=args.json)
135
-
136
- elif args.structure_type in ('parsed_internal_key', 'records'):
137
- # Prints key value record information.
138
- for record in log_file.GetKeyValueRecords():
139
- _Output(record, to_json=args.json)
140
-
141
-
142
- def App():
143
- """The CLI app entrypoint."""
144
- parser = argparse.ArgumentParser(
145
- prog='dfindexeddb',
146
- description='A cli tool for the dfindexeddb package',
147
- epilog=f'Version {version.GetVersion()}')
148
-
149
- parser.add_argument(
150
- '-s', '--source', required=True, type=pathlib.Path,
151
- help='The source leveldb file')
152
- parser.add_argument('--json', action='store_true', help='Output as JSON')
153
- subparsers = parser.add_subparsers(required=True)
154
-
155
- parser_log = subparsers.add_parser('log')
156
- parser_log.add_argument(
157
- 'structure_type', choices=[
158
- 'blocks',
159
- 'physical_records',
160
- 'write_batches',
161
- 'parsed_internal_key',
162
- 'records'])
163
- parser_log.set_defaults(func=LogCommand)
164
-
165
- parser_log = subparsers.add_parser('ldb')
166
- parser_log.add_argument(
167
- 'structure_type', choices=[
168
- 'blocks',
169
- 'records'])
170
- parser_log.set_defaults(func=LdbCommand)
171
-
172
- parser_log = subparsers.add_parser('manifest')
173
- parser_log.set_defaults(func=ManifestCommand)
174
-
175
- parser_log = subparsers.add_parser('indexeddb')
176
- parser_log.set_defaults(func=IndexeddbCommand)
177
-
178
- args = parser.parse_args()
179
-
180
- args.func(args)
@@ -1,115 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # Copyright 2024 Google LLC
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # https://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
- """Parsers for blink javascript serialized objects."""
16
- import io
17
- from typing import Any
18
-
19
- from dfindexeddb import utils
20
- from dfindexeddb.indexeddb import definitions
21
- from dfindexeddb.indexeddb import v8
22
-
23
-
24
-
25
- class V8ScriptValueDecoder:
26
- """A Blink V8 Serialized Script Value (SSV) decoder.
27
-
28
- Attributes:
29
- deserializer (v8.ValueDeserializer): the V8 value deserializer.
30
- raw_data (bytes): the raw bytes containing the serialized javascript
31
- value.
32
- version (int): the blink version.
33
- """
34
- _MIN_VERSION_FOR_SEPARATE_ENVELOPE = 16
35
-
36
- # As defined in trailer_reader.h
37
- _MIN_WIRE_FORMAT_VERSION = 21
38
-
39
- def __init__(self, raw_data: bytes):
40
- self.deserializer = None
41
- self.raw_data = raw_data
42
- self.version = 0
43
-
44
- def _ReadVersionEnvelope(self) -> int:
45
- """Reads the Blink version envelope.
46
-
47
- Returns:
48
- The number of bytes consumed reading the Blink version envelope or zero
49
- if no blink envelope is detected.
50
- """
51
- if not self.raw_data:
52
- return 0
53
-
54
- decoder = utils.StreamDecoder(io.BytesIO(self.raw_data))
55
- _, tag_value = decoder.DecodeUint8()
56
- tag = definitions.BlinkSerializationTag(tag_value)
57
- if tag != definitions.BlinkSerializationTag.VERSION:
58
- return 0
59
-
60
- _, version = decoder.DecodeUint32Varint()
61
- if version < self._MIN_VERSION_FOR_SEPARATE_ENVELOPE:
62
- return 0
63
-
64
- consumed_bytes = decoder.stream.tell()
65
-
66
- if version >= self._MIN_WIRE_FORMAT_VERSION:
67
- trailer_offset_data_size = 1 + 8 + 4 # 1 + sizeof(uint64) + sizeof(uint32)
68
- consumed_bytes += trailer_offset_data_size
69
- if consumed_bytes >= len(self.raw_data):
70
- return 0
71
- return consumed_bytes
72
-
73
- def ReadTag(self) -> definitions.BlinkSerializationTag:
74
- """Reads a blink serialization tag.
75
-
76
- Returns:
77
- The blink serialization tag.
78
- """
79
- _, tag_value = self.deserializer.decoder.DecodeUint8()
80
- return definitions.BlinkSerializationTag(tag_value)
81
-
82
- def ReadHostObject(self) -> Any:
83
- """Reads a host DOM object.
84
-
85
- Raises:
86
- NotImplementedError: when called.
87
- """
88
- tag = self.ReadTag()
89
- raise NotImplementedError(f'V8ScriptValueDecoder.ReadHostObject - {tag}')
90
-
91
- def Deserialize(self) -> Any:
92
- """Deserializes a Blink SSV.
93
-
94
- The serialization format has two 'envelopes'.
95
- [version tag] [Blink version] [version tag] [v8 version] ...
96
-
97
- Returns:
98
- The deserialized script value.
99
- """
100
- version_envelope_size = self._ReadVersionEnvelope()
101
- self.deserializer = v8.ValueDeserializer(
102
- io.BytesIO(self.raw_data[version_envelope_size:]), delegate=self)
103
- v8_version = self.deserializer.ReadHeader()
104
- if not self.version:
105
- self.version = v8_version
106
- return self.deserializer.ReadValue()
107
-
108
- @classmethod
109
- def FromBytes(cls, data: bytes) -> Any:
110
- """Deserializes a Blink SSV from the data array.
111
-
112
- Returns:
113
- The deserialized script value.
114
- """
115
- return cls(data).Deserialize()