linear-mcp-fast 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ccl_chromium_reader/__init__.py +2 -0
- ccl_chromium_reader/ccl_chromium_cache.py +1335 -0
- ccl_chromium_reader/ccl_chromium_filesystem.py +302 -0
- ccl_chromium_reader/ccl_chromium_history.py +357 -0
- ccl_chromium_reader/ccl_chromium_indexeddb.py +1060 -0
- ccl_chromium_reader/ccl_chromium_localstorage.py +454 -0
- ccl_chromium_reader/ccl_chromium_notifications.py +268 -0
- ccl_chromium_reader/ccl_chromium_profile_folder.py +568 -0
- ccl_chromium_reader/ccl_chromium_sessionstorage.py +368 -0
- ccl_chromium_reader/ccl_chromium_snss2.py +332 -0
- ccl_chromium_reader/ccl_shared_proto_db_downloads.py +189 -0
- ccl_chromium_reader/common.py +19 -0
- ccl_chromium_reader/download_common.py +78 -0
- ccl_chromium_reader/profile_folder_protocols.py +276 -0
- ccl_chromium_reader/serialization_formats/__init__.py +0 -0
- ccl_chromium_reader/serialization_formats/ccl_blink_value_deserializer.py +401 -0
- ccl_chromium_reader/serialization_formats/ccl_easy_chromium_pickle.py +133 -0
- ccl_chromium_reader/serialization_formats/ccl_protobuff.py +276 -0
- ccl_chromium_reader/serialization_formats/ccl_v8_value_deserializer.py +627 -0
- ccl_chromium_reader/storage_formats/__init__.py +0 -0
- ccl_chromium_reader/storage_formats/ccl_leveldb.py +582 -0
- ccl_simplesnappy/__init__.py +1 -0
- ccl_simplesnappy/ccl_simplesnappy.py +306 -0
- linear_mcp_fast/__init__.py +8 -0
- linear_mcp_fast/__main__.py +6 -0
- linear_mcp_fast/reader.py +433 -0
- linear_mcp_fast/server.py +367 -0
- linear_mcp_fast/store_detector.py +117 -0
- linear_mcp_fast-0.1.0.dist-info/METADATA +160 -0
- linear_mcp_fast-0.1.0.dist-info/RECORD +39 -0
- linear_mcp_fast-0.1.0.dist-info/WHEEL +5 -0
- linear_mcp_fast-0.1.0.dist-info/entry_points.txt +2 -0
- linear_mcp_fast-0.1.0.dist-info/top_level.txt +4 -0
- tools_and_utilities/Chromium_dump_local_storage.py +111 -0
- tools_and_utilities/Chromium_dump_session_storage.py +92 -0
- tools_and_utilities/benchmark.py +35 -0
- tools_and_utilities/ccl_chrome_audit.py +651 -0
- tools_and_utilities/dump_indexeddb_details.py +59 -0
- tools_and_utilities/dump_leveldb.py +53 -0
|
@@ -0,0 +1,627 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2020, CCL Forensics
|
|
3
|
+
|
|
4
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
5
|
+
this software and associated documentation files (the "Software"), to deal in
|
|
6
|
+
the Software without restriction, including without limitation the rights to
|
|
7
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
8
|
+
of the Software, and to permit persons to whom the Software is furnished to do
|
|
9
|
+
so, subject to the following conditions:
|
|
10
|
+
|
|
11
|
+
The above copyright notice and this permission notice shall be included in all
|
|
12
|
+
copies or substantial portions of the Software.
|
|
13
|
+
|
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
15
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
16
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
17
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
18
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
19
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
20
|
+
SOFTWARE.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import sys
|
|
24
|
+
import struct
|
|
25
|
+
import datetime
|
|
26
|
+
import types
|
|
27
|
+
import typing
|
|
28
|
+
import re
|
|
29
|
+
|
|
30
|
+
__version__ = "0.1.1"
|
|
31
|
+
__description__ = "Partial reimplementation of the V8 Javascript Object Serialization"
|
|
32
|
+
__contact__ = "Alex Caithness"
|
|
33
|
+
|
|
34
|
+
# TODO: We need to address cyclic references, which are permissible. Probably take the same approach as in ccl_bplist
|
|
35
|
+
# and subclass the collection types to resolve references JIT
|
|
36
|
+
|
|
37
|
+
# See: https://github.com/v8/v8/blob/master/src/objects/value-serializer.cc
|
|
38
|
+
|
|
39
|
+
__DEBUG = False
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def log(msg, debug_only=True):
|
|
43
|
+
if not debug_only or __DEBUG:
|
|
44
|
+
caller_name = sys._getframe(1).f_code.co_name
|
|
45
|
+
caller_line = sys._getframe(1).f_code.co_firstlineno
|
|
46
|
+
print(f"{caller_name} ({caller_line}):\t{msg}")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def read_le_varint(stream: typing.BinaryIO, is_32bit=False) -> typing.Optional[typing.Tuple[int, bytes]]:
|
|
50
|
+
# this only outputs unsigned
|
|
51
|
+
limit = 5 if is_32bit else 10
|
|
52
|
+
i = 0
|
|
53
|
+
result = 0
|
|
54
|
+
underlying_bytes = []
|
|
55
|
+
while i < limit: # 64 bit max possible?
|
|
56
|
+
raw = stream.read(1)
|
|
57
|
+
if len(raw) < 1:
|
|
58
|
+
return None
|
|
59
|
+
tmp, = raw
|
|
60
|
+
underlying_bytes.append(tmp)
|
|
61
|
+
result |= ((tmp & 0x7f) << (i * 7))
|
|
62
|
+
if (tmp & 0x80) == 0:
|
|
63
|
+
break
|
|
64
|
+
i += 1
|
|
65
|
+
return result, bytes(underlying_bytes)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class _Undefined:
|
|
69
|
+
def __bool__(self):
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
def __eq__(self, other):
|
|
73
|
+
if isinstance(other, _Undefined):
|
|
74
|
+
return True
|
|
75
|
+
return False
|
|
76
|
+
|
|
77
|
+
def __repr__(self):
|
|
78
|
+
return "<Undefined>"
|
|
79
|
+
|
|
80
|
+
def __str__(self):
|
|
81
|
+
return "<Undefined>"
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class SharedObject:
|
|
85
|
+
def __init__(self, object_id: int):
|
|
86
|
+
self.id = object_id
|
|
87
|
+
|
|
88
|
+
def __repr__(self):
|
|
89
|
+
return f"<SharedObject; id: {self.id}>"
|
|
90
|
+
|
|
91
|
+
def __str__(self):
|
|
92
|
+
return f"<SharedObject; id: {self.id}>"
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class Constants:
|
|
96
|
+
# Constants
|
|
97
|
+
kLatestVersion = 15
|
|
98
|
+
|
|
99
|
+
# version:uint32_t (if at beginning of data, sets version > 0)
|
|
100
|
+
token_kVersion = b"\xFF"
|
|
101
|
+
# ignore
|
|
102
|
+
token_kPadding = b"\0"
|
|
103
|
+
# refTableSize:uint32_t (previously used for sanity checks; safe to ignore)
|
|
104
|
+
token_kVerifyObjectCount = b"?"
|
|
105
|
+
# Oddballs (no data).
|
|
106
|
+
token_kTheHole = b"-"
|
|
107
|
+
token_kUndefined = b"_"
|
|
108
|
+
token_kNull = b"0"
|
|
109
|
+
token_kTrue = b"T"
|
|
110
|
+
token_kFalse = b"F"
|
|
111
|
+
# Number represented as 32-bit integer, ZigZag-encoded
|
|
112
|
+
# (like sint32 in protobuf)
|
|
113
|
+
token_kInt32 = b"I"
|
|
114
|
+
# Number represented as 32-bit unsigned integer, varint-encoded
|
|
115
|
+
# (like uint32 in protobuf)
|
|
116
|
+
token_kUint32 = b"U"
|
|
117
|
+
# Number represented as a 64-bit double.
|
|
118
|
+
# Host byte order is used (N.B. this makes the format non-portable).
|
|
119
|
+
token_kDouble = b"N"
|
|
120
|
+
# BigInt. Bitfield:uint32_t, then raw digits storage.
|
|
121
|
+
token_kBigInt = b"Z"
|
|
122
|
+
# byteLength:uint32_t, then raw data
|
|
123
|
+
token_kUtf8String = b"S"
|
|
124
|
+
token_kOneByteString = b"\""
|
|
125
|
+
token_kTwoByteString = b"c"
|
|
126
|
+
# Reference to a serialized object. objectID:uint32_t
|
|
127
|
+
token_kObjectReference = b"^"
|
|
128
|
+
# Beginning of a JS object.
|
|
129
|
+
token_kBeginJSObject = b"o"
|
|
130
|
+
# End of a JS object. numProperties:uint32_t
|
|
131
|
+
token_kEndJSObject = b"{"
|
|
132
|
+
# Beginning of a sparse JS array. length:uint32_t
|
|
133
|
+
# Elements and properties are written as token_key/value pairs, like objects.
|
|
134
|
+
token_kBeginSparseJSArray = b"a"
|
|
135
|
+
# End of a sparse JS array. numProperties:uint32_t length:uint32_t
|
|
136
|
+
token_kEndSparseJSArray = b"@"
|
|
137
|
+
# Beginning of a dense JS array. length:uint32_t
|
|
138
|
+
# |length| elements, followed by properties as token_key/value pairs
|
|
139
|
+
token_kBeginDenseJSArray = b"A"
|
|
140
|
+
# End of a dense JS array. numProperties:uint32_t length:uint32_t
|
|
141
|
+
token_kEndDenseJSArray = b"$"
|
|
142
|
+
# Date. millisSinceEpoch:double
|
|
143
|
+
token_kDate = b"D"
|
|
144
|
+
# Boolean object. No data.
|
|
145
|
+
token_kTrueObject = b"y"
|
|
146
|
+
token_kFalseObject = b"x"
|
|
147
|
+
# Number object. value:double
|
|
148
|
+
token_kNumberObject = b"n"
|
|
149
|
+
# BigInt object. Bitfield:uint32_t, then raw digits storage.
|
|
150
|
+
token_kBigIntObject = b"z"
|
|
151
|
+
# String object, UTF-8 encoding. byteLength:uint32_t, then raw data.
|
|
152
|
+
token_kStringObject = b"s"
|
|
153
|
+
# Regular expression, UTF-8 encoding. byteLength:uint32_t, raw data
|
|
154
|
+
# flags:uint32_t.
|
|
155
|
+
token_kRegExp = b"R"
|
|
156
|
+
# Beginning of a JS map.
|
|
157
|
+
token_kBeginJSMap = b";"
|
|
158
|
+
# End of a JS map. length:uint32_t.
|
|
159
|
+
token_kEndJSMap = b":"
|
|
160
|
+
# Beginning of a JS set.
|
|
161
|
+
token_kBeginJSSet = b"'"
|
|
162
|
+
# End of a JS set. length:uint32_t.
|
|
163
|
+
token_kEndJSSet = b","
|
|
164
|
+
# Array buffer. byteLength:uint32_t, then raw data.
|
|
165
|
+
token_kArrayBuffer = b"B"
|
|
166
|
+
# Array buffer (transferred). transferID:uint32_t
|
|
167
|
+
token_kArrayBufferTransfer = b"t"
|
|
168
|
+
# View into an array buffer.
|
|
169
|
+
# subtag:ArrayBufferViewTag, byteOffset:uint32_t, byteLength:uint32_t
|
|
170
|
+
# For typed arrays, byteOffset and byteLength must be divisible by the size
|
|
171
|
+
# of the element.
|
|
172
|
+
# Note: token_kArrayBufferView is special, and should have an ArrayBuffer (or an
|
|
173
|
+
# ObjectReference to one) serialized just before it. This is a quirk arising
|
|
174
|
+
# from the previous stack-based implementation.
|
|
175
|
+
token_kArrayBufferView = b"V"
|
|
176
|
+
# Shared array buffer. transferID:uint32_t
|
|
177
|
+
token_kSharedArrayBuffer = b"u"
|
|
178
|
+
# A HeapObject shared across Isolates.sharedValueID: uint32_t
|
|
179
|
+
token_kSharedObject = 'p'
|
|
180
|
+
# A wasm module object transfer. next value is its index.
|
|
181
|
+
token_kWasmModuleTransfer = b"w"
|
|
182
|
+
# The delegate is responsible for processing all following data.
|
|
183
|
+
# This "escapes" to whatever wire format the delegate chooses.
|
|
184
|
+
token_kHostObject = b"\\"
|
|
185
|
+
# A transferred WebAssembly.Memory object. maximumPages:int32_t, then by
|
|
186
|
+
# SharedArrayBuffer tag and its data.
|
|
187
|
+
token_kWasmMemoryTransfer = b"m"
|
|
188
|
+
# A list of (subtag: ErrorTag, [subtag dependent data]). See ErrorTag for
|
|
189
|
+
# details.
|
|
190
|
+
token_kError = b"r"
|
|
191
|
+
|
|
192
|
+
# The following tags are reserved because they were in use in Chromium before
|
|
193
|
+
# the token_kHostObject tag was introduced in format version 13, at
|
|
194
|
+
# v8 refs/heads/master@{#43466}
|
|
195
|
+
# chromium/src refs/heads/master@{#453568}
|
|
196
|
+
#
|
|
197
|
+
# They must not be reused without a version check to prevent old values from
|
|
198
|
+
# starting to deserialize incorrectly. For simplicity, it's recommended to
|
|
199
|
+
# avoid them altogether.
|
|
200
|
+
#
|
|
201
|
+
# This is the set of tags that existed in SerializationTag.h at that time and
|
|
202
|
+
# still exist at the time of this writing (i.e., excluding those that were
|
|
203
|
+
# removed on the Chromium side because there should be no real user data
|
|
204
|
+
# containing them).
|
|
205
|
+
#
|
|
206
|
+
# It might be possible to also free up other tags which were never persisted
|
|
207
|
+
# (e.g. because they were used only for transfer) in the future.
|
|
208
|
+
token_kLegacyReservedMessagePort = b"M"
|
|
209
|
+
token_kLegacyReservedBlob = b"b"
|
|
210
|
+
token_kLegacyReservedBlobIndex = b"i"
|
|
211
|
+
token_kLegacyReservedFile = b"f"
|
|
212
|
+
token_kLegacyReservedFileIndex = b"e"
|
|
213
|
+
token_kLegacyReservedDOMFileSystem = b"d"
|
|
214
|
+
token_kLegacyReservedFileList = b"l"
|
|
215
|
+
token_kLegacyReservedFileListIndex = b"L"
|
|
216
|
+
token_kLegacyReservedImageData = b"#"
|
|
217
|
+
token_kLegacyReservedImageBitmap = b"g"
|
|
218
|
+
token_kLegacyReservedImageBitmapTransfer = b"G"
|
|
219
|
+
token_kLegacyReservedOffscreenCanvas = b"H"
|
|
220
|
+
token_kLegacyReservedCryptoKey = b"token_k"
|
|
221
|
+
token_kLegacyReservedRTCCertificate = b"token_k"
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class ArrayBufferViewTag:
|
|
225
|
+
tag_kInt8Array = "b"
|
|
226
|
+
tag_kUint8Array = "B"
|
|
227
|
+
tag_kUint8ClampedArray = "C"
|
|
228
|
+
tag_kInt16Array = "w"
|
|
229
|
+
tag_kUint16Array = "W"
|
|
230
|
+
tag_kInt32Array = "d"
|
|
231
|
+
tag_kUint32Array = "D"
|
|
232
|
+
tag_kFloat32Array = "f"
|
|
233
|
+
tag_kFloat64Array = "F"
|
|
234
|
+
tag_kBigInt64Array = "q"
|
|
235
|
+
tag_kBigUint64Array = "Q"
|
|
236
|
+
tag_kDataView = "?"
|
|
237
|
+
|
|
238
|
+
STRUCT_LOOKUP = types.MappingProxyType({
|
|
239
|
+
tag_kInt8Array: "b",
|
|
240
|
+
tag_kUint8Array: "B",
|
|
241
|
+
tag_kUint8ClampedArray: "B",
|
|
242
|
+
tag_kInt16Array: "h",
|
|
243
|
+
tag_kUint16Array: "H",
|
|
244
|
+
tag_kInt32Array: "i",
|
|
245
|
+
tag_kUint32Array: "I",
|
|
246
|
+
tag_kFloat32Array: "f",
|
|
247
|
+
tag_kFloat64Array: "d",
|
|
248
|
+
tag_kBigInt64Array: "q",
|
|
249
|
+
tag_kBigUint64Array: "Q",
|
|
250
|
+
tag_kDataView: "c"
|
|
251
|
+
})
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
class Deserializer:
|
|
255
|
+
Undefined = _Undefined()
|
|
256
|
+
|
|
257
|
+
__ODDBALLS = {
|
|
258
|
+
Constants.token_kUndefined: Undefined,
|
|
259
|
+
Constants.token_kTheHole: Undefined,
|
|
260
|
+
Constants.token_kNull: None,
|
|
261
|
+
Constants.token_kTrue: True,
|
|
262
|
+
Constants.token_kFalse: False,
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
__WRAPPED_PRIMITIVES = {
|
|
266
|
+
Constants.token_kTrueObject,
|
|
267
|
+
Constants.token_kFalseObject,
|
|
268
|
+
Constants.token_kNumberObject,
|
|
269
|
+
Constants.token_kBigIntObject,
|
|
270
|
+
Constants.token_kStringObject
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
def __init__(self, stream: typing.BinaryIO, host_object_delegate: typing.Callable,
|
|
274
|
+
*, is_little_endian=True, is_64bit=True):
|
|
275
|
+
self._f = stream
|
|
276
|
+
self._host_object_delegate = host_object_delegate
|
|
277
|
+
self._endian = "<" if is_little_endian else ">"
|
|
278
|
+
self._pointer_size = 8 if is_64bit else 4
|
|
279
|
+
self._next_id = 0
|
|
280
|
+
self._objects = []
|
|
281
|
+
self.version = self._read_header()
|
|
282
|
+
|
|
283
|
+
def _read_raw(self, length: int) -> bytes:
|
|
284
|
+
start = self._f.tell()
|
|
285
|
+
raw = self._f.read(length)
|
|
286
|
+
if len(raw) != length:
|
|
287
|
+
raise ValueError(f"Could not read all data at offset {start}; wanted {length}; got {len(raw)}")
|
|
288
|
+
|
|
289
|
+
return raw
|
|
290
|
+
|
|
291
|
+
def _read_le_varint(self) -> typing.Optional[typing.Tuple[int, bytes]]:
|
|
292
|
+
return read_le_varint(self._f)
|
|
293
|
+
|
|
294
|
+
def _read_zigzag(self) -> int:
|
|
295
|
+
unsigned = self._read_le_varint()[0]
|
|
296
|
+
if unsigned & 1:
|
|
297
|
+
return -(unsigned >> 1)
|
|
298
|
+
else:
|
|
299
|
+
return unsigned >> 1
|
|
300
|
+
|
|
301
|
+
def _read_unit32(self) -> int:
|
|
302
|
+
return self._read_le_varint()[0]
|
|
303
|
+
|
|
304
|
+
def _read_double(self) -> float:
|
|
305
|
+
return struct.unpack(f"{self._endian}d", self._read_raw(8))[0]
|
|
306
|
+
|
|
307
|
+
# def _read_uint32(self) -> int:
|
|
308
|
+
# return self._read_le_varint()
|
|
309
|
+
|
|
310
|
+
# def _read_uint64(self) -> int:
|
|
311
|
+
# return self._read_le_varint()
|
|
312
|
+
|
|
313
|
+
def _read_bigint(self) -> int:
|
|
314
|
+
size_flag = self._read_le_varint()[0]
|
|
315
|
+
is_neg = size_flag & 0x01
|
|
316
|
+
size = size_flag >> 4
|
|
317
|
+
raw = self._read_raw(size * self._pointer_size)
|
|
318
|
+
|
|
319
|
+
value = int.from_bytes(raw, "big" if self._endian == ">" else "little", signed=False)
|
|
320
|
+
if is_neg:
|
|
321
|
+
value = -value
|
|
322
|
+
|
|
323
|
+
return value
|
|
324
|
+
|
|
325
|
+
def _read_utf8_string(self) -> str:
|
|
326
|
+
length = self._read_le_varint()[0]
|
|
327
|
+
return self._read_raw(length).decode("utf8")
|
|
328
|
+
|
|
329
|
+
def _read_one_byte_string(self) -> typing.AnyStr:
|
|
330
|
+
length = self._read_le_varint()[0]
|
|
331
|
+
# I think this can be used to store raw 8-bit data, so return ascii if we can, otherwise bytes
|
|
332
|
+
raw = self._read_raw(length) # .decode("ascii")
|
|
333
|
+
try:
|
|
334
|
+
result = raw.decode("ascii")
|
|
335
|
+
except UnicodeDecodeError:
|
|
336
|
+
result = raw
|
|
337
|
+
return result
|
|
338
|
+
|
|
339
|
+
def _read_two_byte_string(self) -> str:
|
|
340
|
+
length = self._read_le_varint()[0]
|
|
341
|
+
return self._read_raw(length).decode("utf-16-le") # le?
|
|
342
|
+
|
|
343
|
+
def _read_string(self) -> str:
|
|
344
|
+
if self.version < 12:
|
|
345
|
+
return self._read_utf8_string()
|
|
346
|
+
|
|
347
|
+
value = self._read_object()
|
|
348
|
+
assert isinstance(value, str)
|
|
349
|
+
|
|
350
|
+
return value
|
|
351
|
+
|
|
352
|
+
def _read_object_by_reference(self) -> typing.Any:
|
|
353
|
+
ref_id = self._read_le_varint()[0]
|
|
354
|
+
return self._objects[ref_id]
|
|
355
|
+
|
|
356
|
+
def _read_tag(self) -> bytes:
|
|
357
|
+
while True:
|
|
358
|
+
t = self._f.read(1)
|
|
359
|
+
if t != Constants.token_kPadding:
|
|
360
|
+
return t
|
|
361
|
+
|
|
362
|
+
def _peek_tag(self) -> bytes:
|
|
363
|
+
start = self._f.tell()
|
|
364
|
+
tag = self._read_tag()
|
|
365
|
+
self._f.seek(start, 0)
|
|
366
|
+
return tag
|
|
367
|
+
|
|
368
|
+
def _read_date(self) -> datetime.datetime:
|
|
369
|
+
x = self._read_double()
|
|
370
|
+
result = datetime.datetime(1970, 1, 1) + datetime.timedelta(milliseconds=x)
|
|
371
|
+
self._objects.append(result)
|
|
372
|
+
return result
|
|
373
|
+
|
|
374
|
+
def _read_js_regex(self) -> typing.Pattern:
|
|
375
|
+
log(f"Reading js regex properties at {self._f.tell()}")
|
|
376
|
+
pattern = self._read_string()
|
|
377
|
+
flags = self._read_le_varint()
|
|
378
|
+
|
|
379
|
+
# TODO: Flags?
|
|
380
|
+
regex = re.compile(pattern)
|
|
381
|
+
self._objects.append(regex)
|
|
382
|
+
return regex
|
|
383
|
+
|
|
384
|
+
def _read_js_object_properties(self, end_tag) -> typing.Iterable[typing.Tuple[typing.Any, typing.Any]]:
|
|
385
|
+
log(f"Reading object properties at {self._f.tell()} with end tag: {end_tag}")
|
|
386
|
+
while True:
|
|
387
|
+
if self._peek_tag() == end_tag:
|
|
388
|
+
log(f"Object end at offset {self._f.tell()}")
|
|
389
|
+
break
|
|
390
|
+
key = self._read_object()
|
|
391
|
+
value = self._read_object()
|
|
392
|
+
|
|
393
|
+
yield key, value
|
|
394
|
+
|
|
395
|
+
assert self._read_tag() == end_tag
|
|
396
|
+
|
|
397
|
+
def _read_js_object(self) -> dict:
|
|
398
|
+
log(f"Reading js object properties at {self._f.tell()}")
|
|
399
|
+
result = {}
|
|
400
|
+
self._objects.append(result)
|
|
401
|
+
for key, value in self._read_js_object_properties(Constants.token_kEndJSObject):
|
|
402
|
+
result[key] = value
|
|
403
|
+
# while True:
|
|
404
|
+
# if self._peek_tag() == end_tag:
|
|
405
|
+
# log(f"Object end at offset {self._f.tell()}")
|
|
406
|
+
# break
|
|
407
|
+
# key = self._read_object()
|
|
408
|
+
# value = self._read_object()
|
|
409
|
+
# result[key] = value
|
|
410
|
+
#
|
|
411
|
+
# assert self._read_tag() == end_tag
|
|
412
|
+
property_count = self._read_le_varint()[0]
|
|
413
|
+
log(f"Actual property count: {len(result)}; stated property count: {property_count}")
|
|
414
|
+
if len(result) != property_count:
|
|
415
|
+
raise ValueError("Property count mismatch")
|
|
416
|
+
|
|
417
|
+
return result
|
|
418
|
+
|
|
419
|
+
def _read_js_sparse_array(self) -> list:
|
|
420
|
+
log(f"Reading js sparse array properties at {self._f.tell()}")
|
|
421
|
+
# TODO: implement a sparse list so that this isn't so horribly inefficient
|
|
422
|
+
length = self._read_le_varint()[0]
|
|
423
|
+
result = [None for _ in range(length)]
|
|
424
|
+
self._objects.append(result)
|
|
425
|
+
|
|
426
|
+
sparse_object = self._read_js_object_properties(Constants.token_kEndSparseJSArray)
|
|
427
|
+
prop_count = 0
|
|
428
|
+
for key, value in sparse_object:
|
|
429
|
+
i = int(key)
|
|
430
|
+
result[i] = value
|
|
431
|
+
prop_count += 1
|
|
432
|
+
expected_num_properties = self._read_le_varint()[0]
|
|
433
|
+
|
|
434
|
+
log(f"Actual property count: {prop_count}; stated property count: {expected_num_properties}")
|
|
435
|
+
if prop_count != expected_num_properties:
|
|
436
|
+
raise ValueError("Property count mismatch")
|
|
437
|
+
|
|
438
|
+
expected_length = self._read_le_varint()[0] # TODO: should this be checked?
|
|
439
|
+
|
|
440
|
+
return result
|
|
441
|
+
|
|
442
|
+
def _read_js_dense_array(self) -> list:
|
|
443
|
+
log(f"Reading js dense array properties at {self._f.tell()}")
|
|
444
|
+
length = self._read_le_varint()[0]
|
|
445
|
+
result = [None for _ in range(length)]
|
|
446
|
+
self._objects.append(result)
|
|
447
|
+
|
|
448
|
+
for i in range(length):
|
|
449
|
+
result[i] = self._read_object()
|
|
450
|
+
|
|
451
|
+
# And then there's a sparse bit maybe?
|
|
452
|
+
sparse_object = self._read_js_object_properties(Constants.token_kEndDenseJSArray)
|
|
453
|
+
prop_count = 0
|
|
454
|
+
for key, value in sparse_object:
|
|
455
|
+
i = int(key)
|
|
456
|
+
result[i] = value
|
|
457
|
+
prop_count += 1
|
|
458
|
+
|
|
459
|
+
expected_num_properties = self._read_le_varint()[0]
|
|
460
|
+
|
|
461
|
+
log(f"Actual property count: {prop_count}; stated property count: {expected_num_properties}")
|
|
462
|
+
if prop_count != expected_num_properties:
|
|
463
|
+
raise ValueError("Property count mismatch")
|
|
464
|
+
|
|
465
|
+
expected_length = self._read_le_varint()[0] # TODO: should this be checked?
|
|
466
|
+
|
|
467
|
+
return result
|
|
468
|
+
|
|
469
|
+
def _read_js_map(self) -> dict:
|
|
470
|
+
log(f"Reading js map at {self._f.tell()}")
|
|
471
|
+
result = {}
|
|
472
|
+
self._objects.append(result)
|
|
473
|
+
while True:
|
|
474
|
+
if self._peek_tag() == Constants.token_kEndJSMap:
|
|
475
|
+
log(f"End of map at {self._f.tell()}")
|
|
476
|
+
break
|
|
477
|
+
|
|
478
|
+
key = self._read_object()
|
|
479
|
+
value = self._read_object()
|
|
480
|
+
result[key] = value
|
|
481
|
+
|
|
482
|
+
assert self._read_tag() == Constants.token_kEndJSMap
|
|
483
|
+
|
|
484
|
+
expected_length = self._read_le_varint()[0]
|
|
485
|
+
log(f"Actual map item count: {len(result) * 2}; stated map item count: {expected_length}")
|
|
486
|
+
if expected_length != len(result) * 2:
|
|
487
|
+
raise ValueError("Map count mismatch")
|
|
488
|
+
|
|
489
|
+
return result
|
|
490
|
+
|
|
491
|
+
def _read_js_set(self) -> set:
|
|
492
|
+
log(f"Reading js set properties at {self._f.tell()}")
|
|
493
|
+
result = set()
|
|
494
|
+
self._objects.append(result)
|
|
495
|
+
|
|
496
|
+
while True:
|
|
497
|
+
if self._peek_tag() == Constants.token_kEndJSSet:
|
|
498
|
+
log(f"End of set at {self._f.tell()}")
|
|
499
|
+
break
|
|
500
|
+
|
|
501
|
+
result.add(self._read_object())
|
|
502
|
+
|
|
503
|
+
assert self._read_tag() == Constants.token_kEndJSSet
|
|
504
|
+
|
|
505
|
+
expected_length = self._read_le_varint()[0]
|
|
506
|
+
log(f"Actual set item count: {len(result)}; stated set item count: {expected_length}")
|
|
507
|
+
if expected_length != len(result):
|
|
508
|
+
raise ValueError("Set count mismatch")
|
|
509
|
+
|
|
510
|
+
return result
|
|
511
|
+
|
|
512
|
+
def _read_js_arraybuffer(self) -> bytes:
|
|
513
|
+
length = self._read_le_varint()[0]
|
|
514
|
+
raw = self._read_raw(length)
|
|
515
|
+
self._objects.append(raw)
|
|
516
|
+
|
|
517
|
+
return raw
|
|
518
|
+
|
|
519
|
+
def _wrap_js_array_buffer_view(self, raw: bytes) -> tuple:
|
|
520
|
+
if not isinstance(raw, bytes):
|
|
521
|
+
raise TypeError("Only bytes should be passed to be wrapped in a buffer view")
|
|
522
|
+
|
|
523
|
+
log(f"Wrapping in ArrayBufferView at offset {self._f.tell()}")
|
|
524
|
+
|
|
525
|
+
tag = chr(self._read_le_varint()[0])
|
|
526
|
+
byte_offset = self._read_le_varint()[0]
|
|
527
|
+
byte_length = self._read_le_varint()[0]
|
|
528
|
+
|
|
529
|
+
if byte_offset + byte_length > len(raw):
|
|
530
|
+
raise ValueError("Not enough data in the raw data to hold the defined data")
|
|
531
|
+
|
|
532
|
+
# See: https://github.com/v8/v8/blob/4d34ea98bb655295ab1f9003f6783bd509b7ccb3/src/objects/value-serializer.cc#L1967
|
|
533
|
+
if self.version >= 14:
|
|
534
|
+
flags = self._read_le_varint()[0]
|
|
535
|
+
|
|
536
|
+
log(f"ArrayBufferView: tag: {tag}; byte_offset: {byte_offset}; byte_length: {byte_length}")
|
|
537
|
+
|
|
538
|
+
fmt = ArrayBufferViewTag.STRUCT_LOOKUP[tag]
|
|
539
|
+
element_length = struct.calcsize(fmt)
|
|
540
|
+
if byte_length % element_length != 0:
|
|
541
|
+
raise ValueError(f"ArrayBufferView doesn't fit nicely: byte_length: {byte_length}; "
|
|
542
|
+
f"element_length: {element_length}")
|
|
543
|
+
|
|
544
|
+
element_count = byte_length // element_length
|
|
545
|
+
|
|
546
|
+
return struct.unpack(f"{self._endian}{element_count}{fmt}", raw[byte_offset: byte_offset + byte_length])
|
|
547
|
+
|
|
548
|
+
def _read_host_object(self) -> typing.Any:
|
|
549
|
+
result = self._host_object_delegate(self._f)
|
|
550
|
+
self._objects.append(result)
|
|
551
|
+
return result
|
|
552
|
+
|
|
553
|
+
def _read_shared_object(self) -> SharedObject:
|
|
554
|
+
shobj_id = self._read_le_varint()[0]
|
|
555
|
+
return SharedObject(shobj_id)
|
|
556
|
+
|
|
557
|
+
def _not_implemented(self):
|
|
558
|
+
raise NotImplementedError("Todo")
|
|
559
|
+
|
|
560
|
+
def _read_object_internal(self) -> typing.Tuple[bytes, typing.Any]:
|
|
561
|
+
tag = self._read_tag()
|
|
562
|
+
|
|
563
|
+
log(f"Offset: {self._f.tell()}; Tag: {tag}")
|
|
564
|
+
|
|
565
|
+
if tag in Deserializer.__ODDBALLS:
|
|
566
|
+
return tag, Deserializer.__ODDBALLS[tag]
|
|
567
|
+
|
|
568
|
+
func = {
|
|
569
|
+
Constants.token_kTrueObject: lambda: Deserializer.__ODDBALLS[Constants.token_kTrue],
|
|
570
|
+
Constants.token_kFalseObject: lambda: Deserializer.__ODDBALLS[Constants.token_kFalse],
|
|
571
|
+
Constants.token_kNumberObject: self._read_double,
|
|
572
|
+
Constants.token_kUint32: self._read_unit32,
|
|
573
|
+
Constants.token_kInt32: self._read_zigzag,
|
|
574
|
+
Constants.token_kDouble: self._read_double,
|
|
575
|
+
Constants.token_kDate: self._read_date,
|
|
576
|
+
Constants.token_kBigInt: self._read_bigint,
|
|
577
|
+
Constants.token_kBigIntObject: self._read_bigint,
|
|
578
|
+
Constants.token_kUtf8String: self._read_utf8_string,
|
|
579
|
+
Constants.token_kOneByteString: self._read_one_byte_string,
|
|
580
|
+
Constants.token_kTwoByteString: self._read_two_byte_string,
|
|
581
|
+
Constants.token_kStringObject: self._read_string,
|
|
582
|
+
Constants.token_kRegExp: self._read_js_regex,
|
|
583
|
+
Constants.token_kObjectReference: self._read_object_by_reference,
|
|
584
|
+
Constants.token_kBeginJSObject: self._read_js_object,
|
|
585
|
+
Constants.token_kSharedObject: self._read_shared_object,
|
|
586
|
+
Constants.token_kBeginSparseJSArray: self._read_js_sparse_array,
|
|
587
|
+
Constants.token_kBeginDenseJSArray: self._read_js_dense_array,
|
|
588
|
+
Constants.token_kBeginJSMap: self._read_js_map,
|
|
589
|
+
Constants.token_kBeginJSSet: self._read_js_set,
|
|
590
|
+
Constants.token_kArrayBuffer: self._read_js_arraybuffer,
|
|
591
|
+
Constants.token_kSharedArrayBuffer: self._not_implemented, # and probably never, as it can't be pulled from the data I think?
|
|
592
|
+
Constants.token_kArrayBufferTransfer: self._not_implemented,
|
|
593
|
+
Constants.token_kError: self._not_implemented,
|
|
594
|
+
Constants.token_kWasmModuleTransfer: self._not_implemented,
|
|
595
|
+
Constants.token_kWasmMemoryTransfer: self._not_implemented,
|
|
596
|
+
Constants.token_kHostObject: self._read_host_object,
|
|
597
|
+
}.get(tag)
|
|
598
|
+
|
|
599
|
+
if func is None:
|
|
600
|
+
raise ValueError(f"Unknown tag {tag}")
|
|
601
|
+
|
|
602
|
+
value = func()
|
|
603
|
+
|
|
604
|
+
if tag in Deserializer.__WRAPPED_PRIMITIVES:
|
|
605
|
+
self._objects.append(value)
|
|
606
|
+
|
|
607
|
+
return tag, value
|
|
608
|
+
|
|
609
|
+
def _read_object(self) -> typing.Any:
|
|
610
|
+
log(f"Read object at offset: {self._f.tell()}")
|
|
611
|
+
tag, o = self._read_object_internal()
|
|
612
|
+
|
|
613
|
+
if self._peek_tag() == Constants.token_kArrayBufferView:
|
|
614
|
+
assert self._read_tag() == Constants.token_kArrayBufferView
|
|
615
|
+
o = self._wrap_js_array_buffer_view(o)
|
|
616
|
+
|
|
617
|
+
return o
|
|
618
|
+
|
|
619
|
+
def _read_header(self) -> int:
|
|
620
|
+
tag = self._read_tag()
|
|
621
|
+
if tag != Constants.token_kVersion:
|
|
622
|
+
raise ValueError("Didn't get version tag in the header")
|
|
623
|
+
version = self._read_le_varint()[0]
|
|
624
|
+
return version
|
|
625
|
+
|
|
626
|
+
def read(self) -> typing.Any:
|
|
627
|
+
return self._read_object()
|
|
File without changes
|