dfindexeddb 20240305__py3-none-any.whl → 20240331__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dfindexeddb/indexeddb/cli.py +112 -0
- dfindexeddb/indexeddb/utils.py +0 -0
- dfindexeddb/leveldb/cli.py +260 -0
- dfindexeddb/leveldb/definitions.py +16 -0
- dfindexeddb/leveldb/descriptor.py +61 -14
- dfindexeddb/leveldb/ldb.py +20 -24
- dfindexeddb/leveldb/log.py +25 -18
- dfindexeddb/leveldb/record.py +102 -0
- dfindexeddb/leveldb/utils.py +116 -0
- dfindexeddb/utils.py +5 -46
- dfindexeddb/version.py +1 -1
- {dfindexeddb-20240305.dist-info → dfindexeddb-20240331.dist-info}/METADATA +74 -30
- dfindexeddb-20240331.dist-info/RECORD +22 -0
- {dfindexeddb-20240305.dist-info → dfindexeddb-20240331.dist-info}/WHEEL +1 -1
- dfindexeddb-20240331.dist-info/entry_points.txt +3 -0
- dfindexeddb/cli.py +0 -180
- dfindexeddb/indexeddb/blink.py +0 -115
- dfindexeddb/indexeddb/chromium.py +0 -1360
- dfindexeddb/indexeddb/definitions.py +0 -306
- dfindexeddb/indexeddb/v8.py +0 -642
- dfindexeddb-20240305.dist-info/RECORD +0 -22
- dfindexeddb-20240305.dist-info/entry_points.txt +0 -2
- {dfindexeddb-20240305.dist-info → dfindexeddb-20240331.dist-info}/AUTHORS +0 -0
- {dfindexeddb-20240305.dist-info → dfindexeddb-20240331.dist-info}/LICENSE +0 -0
- {dfindexeddb-20240305.dist-info → dfindexeddb-20240331.dist-info}/top_level.txt +0 -0
dfindexeddb/cli.py
DELETED
|
@@ -1,180 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# Copyright 2024 Google LLC
|
|
3
|
-
#
|
|
4
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
# you may not use this file except in compliance with the License.
|
|
6
|
-
# You may obtain a copy of the License at
|
|
7
|
-
#
|
|
8
|
-
# https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
#
|
|
10
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
# See the License for the specific language governing permissions and
|
|
14
|
-
# limitations under the License.
|
|
15
|
-
"""A CLI tool for dfindexeddb."""
|
|
16
|
-
import argparse
|
|
17
|
-
import dataclasses
|
|
18
|
-
from datetime import datetime
|
|
19
|
-
import json
|
|
20
|
-
import pathlib
|
|
21
|
-
import sys
|
|
22
|
-
import traceback
|
|
23
|
-
|
|
24
|
-
from dfindexeddb import errors
|
|
25
|
-
from dfindexeddb import version
|
|
26
|
-
from dfindexeddb.leveldb import descriptor
|
|
27
|
-
from dfindexeddb.leveldb import ldb
|
|
28
|
-
from dfindexeddb.leveldb import log
|
|
29
|
-
from dfindexeddb.indexeddb import chromium
|
|
30
|
-
from dfindexeddb.indexeddb import v8
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
_VALID_PRINTABLE_CHARACTERS = (
|
|
34
|
-
'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' +
|
|
35
|
-
'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~.')
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class Encoder(json.JSONEncoder):
|
|
39
|
-
"""A JSON encoder class for dfindexeddb fields."""
|
|
40
|
-
def default(self, o):
|
|
41
|
-
if isinstance(o, bytes):
|
|
42
|
-
out = []
|
|
43
|
-
for x in o:
|
|
44
|
-
if chr(x) not in _VALID_PRINTABLE_CHARACTERS:
|
|
45
|
-
out.append(f'\\x{x:02X}')
|
|
46
|
-
else:
|
|
47
|
-
out.append(chr(x))
|
|
48
|
-
return ''.join(out)
|
|
49
|
-
if isinstance(o, datetime):
|
|
50
|
-
return o.isoformat()
|
|
51
|
-
if isinstance(o, v8.Undefined):
|
|
52
|
-
return "<undefined>"
|
|
53
|
-
if isinstance(o, v8.Null):
|
|
54
|
-
return "<null>"
|
|
55
|
-
if isinstance(o, set):
|
|
56
|
-
return list(o)
|
|
57
|
-
if isinstance(o, v8.RegExp):
|
|
58
|
-
return str(o)
|
|
59
|
-
return json.JSONEncoder.default(self, o)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def _Output(structure, to_json=False):
|
|
63
|
-
"""Helper method to output parsed structure to stdout."""
|
|
64
|
-
if to_json:
|
|
65
|
-
structure_dict = dataclasses.asdict(structure)
|
|
66
|
-
print(json.dumps(structure_dict, indent=2, cls=Encoder))
|
|
67
|
-
else:
|
|
68
|
-
print(structure)
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
def IndexeddbCommand(args):
|
|
72
|
-
"""The CLI for processing a log/ldb file as indexeddb."""
|
|
73
|
-
if args.source.name.endswith('.log'):
|
|
74
|
-
records = list(
|
|
75
|
-
log.FileReader(args.source).GetKeyValueRecords())
|
|
76
|
-
elif args.source.name.endswith('.ldb'):
|
|
77
|
-
records = list(
|
|
78
|
-
ldb.FileReader(args.source).GetKeyValueRecords())
|
|
79
|
-
else:
|
|
80
|
-
print('Unsupported file type.', file=sys.stderr)
|
|
81
|
-
return
|
|
82
|
-
|
|
83
|
-
for record in records:
|
|
84
|
-
try:
|
|
85
|
-
record = chromium.IndexedDBRecord.FromLevelDBRecord(record)
|
|
86
|
-
except (errors.ParserError, errors.DecoderError) as err:
|
|
87
|
-
print(
|
|
88
|
-
(f'Error parsing blink value: {err} for {record.__class__.__name__} '
|
|
89
|
-
f'at offset {record.offset}'), file=sys.stderr)
|
|
90
|
-
print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
|
|
91
|
-
_Output(record, to_json=args.json)
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def ManifestCommand(args):
|
|
95
|
-
"""The CLI for processing MANIFEST aka Descriptor files."""
|
|
96
|
-
manifest_file = descriptor.FileReader(args.source)
|
|
97
|
-
|
|
98
|
-
for version_edit in manifest_file.GetVersionEdits():
|
|
99
|
-
_Output(version_edit, to_json=args.json)
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
def LdbCommand(args):
|
|
103
|
-
"""The CLI for processing ldb files."""
|
|
104
|
-
ldb_file = ldb.FileReader(args.source)
|
|
105
|
-
|
|
106
|
-
if args.structure_type == 'blocks':
|
|
107
|
-
# Prints block information.
|
|
108
|
-
for block in ldb_file.GetBlocks():
|
|
109
|
-
_Output(block, to_json=args.json)
|
|
110
|
-
|
|
111
|
-
elif args.structure_type == 'records':
|
|
112
|
-
# Prints key value record information.
|
|
113
|
-
for record in ldb_file.GetKeyValueRecords():
|
|
114
|
-
_Output(record, to_json=args.json)
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def LogCommand(args):
|
|
118
|
-
"""The CLI for processing log files."""
|
|
119
|
-
log_file = log.FileReader(args.source)
|
|
120
|
-
|
|
121
|
-
if args.structure_type == 'blocks':
|
|
122
|
-
# Prints block information.
|
|
123
|
-
for block in log_file.GetBlocks():
|
|
124
|
-
_Output(block, to_json=args.json)
|
|
125
|
-
|
|
126
|
-
elif args.structure_type == 'physical_records':
|
|
127
|
-
# Prints log file physical record information.
|
|
128
|
-
for log_file_record in log_file.GetPhysicalRecords():
|
|
129
|
-
_Output(log_file_record, to_json=args.json)
|
|
130
|
-
|
|
131
|
-
elif args.structure_type == 'write_batches':
|
|
132
|
-
# Prints log file batch information.
|
|
133
|
-
for batch in log_file.GetWriteBatches():
|
|
134
|
-
_Output(batch, to_json=args.json)
|
|
135
|
-
|
|
136
|
-
elif args.structure_type in ('parsed_internal_key', 'records'):
|
|
137
|
-
# Prints key value record information.
|
|
138
|
-
for record in log_file.GetKeyValueRecords():
|
|
139
|
-
_Output(record, to_json=args.json)
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
def App():
|
|
143
|
-
"""The CLI app entrypoint."""
|
|
144
|
-
parser = argparse.ArgumentParser(
|
|
145
|
-
prog='dfindexeddb',
|
|
146
|
-
description='A cli tool for the dfindexeddb package',
|
|
147
|
-
epilog=f'Version {version.GetVersion()}')
|
|
148
|
-
|
|
149
|
-
parser.add_argument(
|
|
150
|
-
'-s', '--source', required=True, type=pathlib.Path,
|
|
151
|
-
help='The source leveldb file')
|
|
152
|
-
parser.add_argument('--json', action='store_true', help='Output as JSON')
|
|
153
|
-
subparsers = parser.add_subparsers(required=True)
|
|
154
|
-
|
|
155
|
-
parser_log = subparsers.add_parser('log')
|
|
156
|
-
parser_log.add_argument(
|
|
157
|
-
'structure_type', choices=[
|
|
158
|
-
'blocks',
|
|
159
|
-
'physical_records',
|
|
160
|
-
'write_batches',
|
|
161
|
-
'parsed_internal_key',
|
|
162
|
-
'records'])
|
|
163
|
-
parser_log.set_defaults(func=LogCommand)
|
|
164
|
-
|
|
165
|
-
parser_log = subparsers.add_parser('ldb')
|
|
166
|
-
parser_log.add_argument(
|
|
167
|
-
'structure_type', choices=[
|
|
168
|
-
'blocks',
|
|
169
|
-
'records'])
|
|
170
|
-
parser_log.set_defaults(func=LdbCommand)
|
|
171
|
-
|
|
172
|
-
parser_log = subparsers.add_parser('manifest')
|
|
173
|
-
parser_log.set_defaults(func=ManifestCommand)
|
|
174
|
-
|
|
175
|
-
parser_log = subparsers.add_parser('indexeddb')
|
|
176
|
-
parser_log.set_defaults(func=IndexeddbCommand)
|
|
177
|
-
|
|
178
|
-
args = parser.parse_args()
|
|
179
|
-
|
|
180
|
-
args.func(args)
|
dfindexeddb/indexeddb/blink.py
DELETED
|
@@ -1,115 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# Copyright 2024 Google LLC
|
|
3
|
-
#
|
|
4
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
# you may not use this file except in compliance with the License.
|
|
6
|
-
# You may obtain a copy of the License at
|
|
7
|
-
#
|
|
8
|
-
# https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
#
|
|
10
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
# See the License for the specific language governing permissions and
|
|
14
|
-
# limitations under the License.
|
|
15
|
-
"""Parsers for blink javascript serialized objects."""
|
|
16
|
-
import io
|
|
17
|
-
from typing import Any
|
|
18
|
-
|
|
19
|
-
from dfindexeddb import utils
|
|
20
|
-
from dfindexeddb.indexeddb import definitions
|
|
21
|
-
from dfindexeddb.indexeddb import v8
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class V8ScriptValueDecoder:
|
|
26
|
-
"""A Blink V8 Serialized Script Value (SSV) decoder.
|
|
27
|
-
|
|
28
|
-
Attributes:
|
|
29
|
-
deserializer (v8.ValueDeserializer): the V8 value deserializer.
|
|
30
|
-
raw_data (bytes): the raw bytes containing the serialized javascript
|
|
31
|
-
value.
|
|
32
|
-
version (int): the blink version.
|
|
33
|
-
"""
|
|
34
|
-
_MIN_VERSION_FOR_SEPARATE_ENVELOPE = 16
|
|
35
|
-
|
|
36
|
-
# As defined in trailer_reader.h
|
|
37
|
-
_MIN_WIRE_FORMAT_VERSION = 21
|
|
38
|
-
|
|
39
|
-
def __init__(self, raw_data: bytes):
|
|
40
|
-
self.deserializer = None
|
|
41
|
-
self.raw_data = raw_data
|
|
42
|
-
self.version = 0
|
|
43
|
-
|
|
44
|
-
def _ReadVersionEnvelope(self) -> int:
|
|
45
|
-
"""Reads the Blink version envelope.
|
|
46
|
-
|
|
47
|
-
Returns:
|
|
48
|
-
The number of bytes consumed reading the Blink version envelope or zero
|
|
49
|
-
if no blink envelope is detected.
|
|
50
|
-
"""
|
|
51
|
-
if not self.raw_data:
|
|
52
|
-
return 0
|
|
53
|
-
|
|
54
|
-
decoder = utils.StreamDecoder(io.BytesIO(self.raw_data))
|
|
55
|
-
_, tag_value = decoder.DecodeUint8()
|
|
56
|
-
tag = definitions.BlinkSerializationTag(tag_value)
|
|
57
|
-
if tag != definitions.BlinkSerializationTag.VERSION:
|
|
58
|
-
return 0
|
|
59
|
-
|
|
60
|
-
_, version = decoder.DecodeUint32Varint()
|
|
61
|
-
if version < self._MIN_VERSION_FOR_SEPARATE_ENVELOPE:
|
|
62
|
-
return 0
|
|
63
|
-
|
|
64
|
-
consumed_bytes = decoder.stream.tell()
|
|
65
|
-
|
|
66
|
-
if version >= self._MIN_WIRE_FORMAT_VERSION:
|
|
67
|
-
trailer_offset_data_size = 1 + 8 + 4 # 1 + sizeof(uint64) + sizeof(uint32)
|
|
68
|
-
consumed_bytes += trailer_offset_data_size
|
|
69
|
-
if consumed_bytes >= len(self.raw_data):
|
|
70
|
-
return 0
|
|
71
|
-
return consumed_bytes
|
|
72
|
-
|
|
73
|
-
def ReadTag(self) -> definitions.BlinkSerializationTag:
|
|
74
|
-
"""Reads a blink serialization tag.
|
|
75
|
-
|
|
76
|
-
Returns:
|
|
77
|
-
The blink serialization tag.
|
|
78
|
-
"""
|
|
79
|
-
_, tag_value = self.deserializer.decoder.DecodeUint8()
|
|
80
|
-
return definitions.BlinkSerializationTag(tag_value)
|
|
81
|
-
|
|
82
|
-
def ReadHostObject(self) -> Any:
|
|
83
|
-
"""Reads a host DOM object.
|
|
84
|
-
|
|
85
|
-
Raises:
|
|
86
|
-
NotImplementedError: when called.
|
|
87
|
-
"""
|
|
88
|
-
tag = self.ReadTag()
|
|
89
|
-
raise NotImplementedError(f'V8ScriptValueDecoder.ReadHostObject - {tag}')
|
|
90
|
-
|
|
91
|
-
def Deserialize(self) -> Any:
|
|
92
|
-
"""Deserializes a Blink SSV.
|
|
93
|
-
|
|
94
|
-
The serialization format has two 'envelopes'.
|
|
95
|
-
[version tag] [Blink version] [version tag] [v8 version] ...
|
|
96
|
-
|
|
97
|
-
Returns:
|
|
98
|
-
The deserialized script value.
|
|
99
|
-
"""
|
|
100
|
-
version_envelope_size = self._ReadVersionEnvelope()
|
|
101
|
-
self.deserializer = v8.ValueDeserializer(
|
|
102
|
-
io.BytesIO(self.raw_data[version_envelope_size:]), delegate=self)
|
|
103
|
-
v8_version = self.deserializer.ReadHeader()
|
|
104
|
-
if not self.version:
|
|
105
|
-
self.version = v8_version
|
|
106
|
-
return self.deserializer.ReadValue()
|
|
107
|
-
|
|
108
|
-
@classmethod
|
|
109
|
-
def FromBytes(cls, data: bytes) -> Any:
|
|
110
|
-
"""Deserializes a Blink SSV from the data array.
|
|
111
|
-
|
|
112
|
-
Returns:
|
|
113
|
-
The deserialized script value.
|
|
114
|
-
"""
|
|
115
|
-
return cls(data).Deserialize()
|