linear-mcp-fast 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ccl_chromium_reader/__init__.py +2 -0
- ccl_chromium_reader/ccl_chromium_cache.py +1335 -0
- ccl_chromium_reader/ccl_chromium_filesystem.py +302 -0
- ccl_chromium_reader/ccl_chromium_history.py +357 -0
- ccl_chromium_reader/ccl_chromium_indexeddb.py +1060 -0
- ccl_chromium_reader/ccl_chromium_localstorage.py +454 -0
- ccl_chromium_reader/ccl_chromium_notifications.py +268 -0
- ccl_chromium_reader/ccl_chromium_profile_folder.py +568 -0
- ccl_chromium_reader/ccl_chromium_sessionstorage.py +368 -0
- ccl_chromium_reader/ccl_chromium_snss2.py +332 -0
- ccl_chromium_reader/ccl_shared_proto_db_downloads.py +189 -0
- ccl_chromium_reader/common.py +19 -0
- ccl_chromium_reader/download_common.py +78 -0
- ccl_chromium_reader/profile_folder_protocols.py +276 -0
- ccl_chromium_reader/serialization_formats/__init__.py +0 -0
- ccl_chromium_reader/serialization_formats/ccl_blink_value_deserializer.py +401 -0
- ccl_chromium_reader/serialization_formats/ccl_easy_chromium_pickle.py +133 -0
- ccl_chromium_reader/serialization_formats/ccl_protobuff.py +276 -0
- ccl_chromium_reader/serialization_formats/ccl_v8_value_deserializer.py +627 -0
- ccl_chromium_reader/storage_formats/__init__.py +0 -0
- ccl_chromium_reader/storage_formats/ccl_leveldb.py +582 -0
- ccl_simplesnappy/__init__.py +1 -0
- ccl_simplesnappy/ccl_simplesnappy.py +306 -0
- linear_mcp_fast/__init__.py +8 -0
- linear_mcp_fast/__main__.py +6 -0
- linear_mcp_fast/reader.py +433 -0
- linear_mcp_fast/server.py +367 -0
- linear_mcp_fast/store_detector.py +117 -0
- linear_mcp_fast-0.1.0.dist-info/METADATA +160 -0
- linear_mcp_fast-0.1.0.dist-info/RECORD +39 -0
- linear_mcp_fast-0.1.0.dist-info/WHEEL +5 -0
- linear_mcp_fast-0.1.0.dist-info/entry_points.txt +2 -0
- linear_mcp_fast-0.1.0.dist-info/top_level.txt +4 -0
- tools_and_utilities/Chromium_dump_local_storage.py +111 -0
- tools_and_utilities/Chromium_dump_session_storage.py +92 -0
- tools_and_utilities/benchmark.py +35 -0
- tools_and_utilities/ccl_chrome_audit.py +651 -0
- tools_and_utilities/dump_indexeddb_details.py +59 -0
- tools_and_utilities/dump_leveldb.py +53 -0
|
@@ -0,0 +1,1060 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2020-2024, CCL Forensics
|
|
3
|
+
|
|
4
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
5
|
+
this software and associated documentation files (the "Software"), to deal in
|
|
6
|
+
the Software without restriction, including without limitation the rights to
|
|
7
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
8
|
+
of the Software, and to permit persons to whom the Software is furnished to do
|
|
9
|
+
so, subject to the following conditions:
|
|
10
|
+
|
|
11
|
+
The above copyright notice and this permission notice shall be included in all
|
|
12
|
+
copies or substantial portions of the Software.
|
|
13
|
+
|
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
15
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
16
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
17
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
18
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
19
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
20
|
+
SOFTWARE.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import sys
|
|
24
|
+
import struct
|
|
25
|
+
import os
|
|
26
|
+
import pathlib
|
|
27
|
+
import io
|
|
28
|
+
import enum
|
|
29
|
+
import datetime
|
|
30
|
+
import dataclasses
|
|
31
|
+
import types
|
|
32
|
+
import typing
|
|
33
|
+
|
|
34
|
+
from .storage_formats import ccl_leveldb
|
|
35
|
+
from .serialization_formats import ccl_blink_value_deserializer, ccl_v8_value_deserializer
|
|
36
|
+
|
|
37
|
+
__version__ = "0.19"
|
|
38
|
+
__description__ = "Module for reading Chromium IndexedDB LevelDB databases."
|
|
39
|
+
__contact__ = "Alex Caithness"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# TODO: need to go through and ensure that we have endianness right in all cases
|
|
43
|
+
# (it should sit behind a switch for integers, fixed for most other stuff)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _read_le_varint(stream: typing.BinaryIO, *, is_google_32bit=False) -> typing.Optional[tuple[int, bytes]]:
|
|
47
|
+
# this only outputs unsigned
|
|
48
|
+
i = 0
|
|
49
|
+
result = 0
|
|
50
|
+
underlying_bytes = []
|
|
51
|
+
limit = 5 if is_google_32bit else 10
|
|
52
|
+
while i < limit:
|
|
53
|
+
raw = stream.read(1)
|
|
54
|
+
if len(raw) < 1:
|
|
55
|
+
return None
|
|
56
|
+
tmp, = raw
|
|
57
|
+
underlying_bytes.append(tmp)
|
|
58
|
+
result |= ((tmp & 0x7f) << (i * 7))
|
|
59
|
+
|
|
60
|
+
if (tmp & 0x80) == 0:
|
|
61
|
+
break
|
|
62
|
+
i += 1
|
|
63
|
+
return result, bytes(underlying_bytes)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def read_le_varint(stream: typing.BinaryIO, *, is_google_32bit=False) -> typing.Optional[int]:
|
|
67
|
+
x = _read_le_varint(stream, is_google_32bit=is_google_32bit)
|
|
68
|
+
if x is None:
|
|
69
|
+
return None
|
|
70
|
+
else:
|
|
71
|
+
return x[0]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _le_varint_from_bytes(data: bytes) -> typing.Optional[tuple[int, bytes]]:
|
|
75
|
+
with io.BytesIO(data) as buff:
|
|
76
|
+
return _read_le_varint(buff)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def le_varint_from_bytes(data: bytes) -> typing.Optional[int]:
|
|
80
|
+
with io.BytesIO(data) as buff:
|
|
81
|
+
return read_le_varint(buff)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def decode_truncated_int(data: bytes) -> int:
|
|
85
|
+
# See: /content/browser/indexed_db/indexed_db_leveldb_coding.h EncodeInt()
|
|
86
|
+
# "// Unlike EncodeVarInt, this is a 'dumb' implementation of a variable int
|
|
87
|
+
# // encoder. It writes, little-endian', until there are no more '1' bits in the
|
|
88
|
+
# // number. The Decoder must know how to calculate the size of the encoded int,
|
|
89
|
+
# // typically by having this reside at the end of the value or key."
|
|
90
|
+
if len(data) == 0:
|
|
91
|
+
raise ValueError("No data to decode")
|
|
92
|
+
result = 0
|
|
93
|
+
for i, b in enumerate(data):
|
|
94
|
+
result |= (b << (i * 8))
|
|
95
|
+
return result
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class IdbKeyType(enum.IntEnum):
|
|
99
|
+
Null = 0
|
|
100
|
+
String = 1
|
|
101
|
+
Date = 2
|
|
102
|
+
Number = 3
|
|
103
|
+
Array = 4
|
|
104
|
+
MinKey = 5
|
|
105
|
+
Binary = 6
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class IdbKey:
|
|
109
|
+
# See: https://github.com/chromium/chromium/blob/master/content/browser/indexed_db/indexed_db_leveldb_coding.cc
|
|
110
|
+
def __init__(self, buffer: bytes):
|
|
111
|
+
self.raw_key = buffer
|
|
112
|
+
self.key_type = IdbKeyType(buffer[0])
|
|
113
|
+
raw_key = buffer[1:]
|
|
114
|
+
|
|
115
|
+
if self.key_type == IdbKeyType.Null:
|
|
116
|
+
self.value = None
|
|
117
|
+
self._raw_length = 1
|
|
118
|
+
elif self.key_type == IdbKeyType.String:
|
|
119
|
+
str_len, varint_raw = _le_varint_from_bytes(raw_key)
|
|
120
|
+
self.value = raw_key[len(varint_raw):len(varint_raw) + str_len * 2].decode("utf-16-be")
|
|
121
|
+
self._raw_length = 1 + len(varint_raw) + str_len * 2
|
|
122
|
+
elif self.key_type == IdbKeyType.Date:
|
|
123
|
+
ts, = struct.unpack("<d", raw_key[0:8])
|
|
124
|
+
self.value = datetime.datetime(1970, 1, 1) + datetime.timedelta(milliseconds=ts)
|
|
125
|
+
self._raw_length = 9
|
|
126
|
+
elif self.key_type == IdbKeyType.Number:
|
|
127
|
+
self.value = struct.unpack("<d", raw_key[0:8])[0]
|
|
128
|
+
self._raw_length = 9
|
|
129
|
+
elif self.key_type == IdbKeyType.Array:
|
|
130
|
+
array_count, varint_raw = _le_varint_from_bytes(raw_key)
|
|
131
|
+
raw_key = raw_key[len(varint_raw):]
|
|
132
|
+
self.value = []
|
|
133
|
+
self._raw_length = 1 + len(varint_raw)
|
|
134
|
+
for i in range(array_count):
|
|
135
|
+
key = IdbKey(raw_key)
|
|
136
|
+
raw_key = raw_key[key._raw_length:]
|
|
137
|
+
self._raw_length += key._raw_length
|
|
138
|
+
self.value.append(key)
|
|
139
|
+
self.value = tuple(self.value)
|
|
140
|
+
elif self.key_type == IdbKeyType.MinKey:
|
|
141
|
+
# TODO: not sure what this actually implies, the code doesn't store a value
|
|
142
|
+
self.value = None
|
|
143
|
+
self._raw_length = 1
|
|
144
|
+
raise NotImplementedError()
|
|
145
|
+
elif self.key_type == IdbKeyType.Binary:
|
|
146
|
+
bin_len, varint_raw = _le_varint_from_bytes(raw_key)
|
|
147
|
+
self.value = raw_key[len(varint_raw):len(varint_raw) + bin_len]
|
|
148
|
+
self._raw_length = 1 + len(varint_raw) + bin_len
|
|
149
|
+
else:
|
|
150
|
+
raise ValueError() # Shouldn't happen
|
|
151
|
+
|
|
152
|
+
# trim the raw_key in case this is an inner key:
|
|
153
|
+
self.raw_key = self.raw_key[0: self._raw_length]
|
|
154
|
+
|
|
155
|
+
def __repr__(self):
|
|
156
|
+
return f"<IdbKey {self.value}>"
|
|
157
|
+
|
|
158
|
+
def __str__(self):
|
|
159
|
+
return self.__repr__()
|
|
160
|
+
|
|
161
|
+
def __eq__(self, other):
|
|
162
|
+
if not isinstance(other, IdbKey):
|
|
163
|
+
raise NotImplementedError()
|
|
164
|
+
return self.raw_key == other.raw_key
|
|
165
|
+
|
|
166
|
+
def __ne__(self, other):
|
|
167
|
+
return not (self == other)
|
|
168
|
+
|
|
169
|
+
def __hash__(self):
|
|
170
|
+
return self.raw_key.__hash__()
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class IndexedDBExternalObjectType(enum.IntEnum):
|
|
174
|
+
# see: https://github.com/chromium/chromium/blob/master/content/browser/indexed_db/indexed_db_external_object.h
|
|
175
|
+
Blob = 0
|
|
176
|
+
File = 1
|
|
177
|
+
NativeFileSystemHandle = 2
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class IndexedDBExternalObject:
|
|
181
|
+
# see: https://github.com/chromium/chromium/blob/master/content/browser/indexed_db/indexed_db_backing_store.cc
|
|
182
|
+
# for encoding.
|
|
183
|
+
|
|
184
|
+
def __init__(self, object_type: IndexedDBExternalObjectType, blob_number: typing.Optional[int],
|
|
185
|
+
mime_type: typing.Optional[str], size: typing.Optional[int],
|
|
186
|
+
file_name: typing.Optional[str], last_modified: typing.Optional[datetime.datetime],
|
|
187
|
+
native_file_token: typing.Optional):
|
|
188
|
+
self.object_type = object_type
|
|
189
|
+
self.blob_number = blob_number
|
|
190
|
+
self.mime_type = mime_type
|
|
191
|
+
self.size = size
|
|
192
|
+
self.file_name = file_name
|
|
193
|
+
self.last_modified = last_modified
|
|
194
|
+
self.native_file_token = native_file_token
|
|
195
|
+
|
|
196
|
+
@classmethod
|
|
197
|
+
def from_stream(cls, stream: typing.BinaryIO):
|
|
198
|
+
blob_type = IndexedDBExternalObjectType(stream.read(1)[0])
|
|
199
|
+
if blob_type in (IndexedDBExternalObjectType.Blob, IndexedDBExternalObjectType.File):
|
|
200
|
+
blob_number = read_le_varint(stream)
|
|
201
|
+
mime_type_length = read_le_varint(stream)
|
|
202
|
+
mime_type = stream.read(mime_type_length * 2).decode("utf-16-be")
|
|
203
|
+
data_size = read_le_varint(stream)
|
|
204
|
+
|
|
205
|
+
if blob_type == IndexedDBExternalObjectType.File:
|
|
206
|
+
file_name_length = read_le_varint(stream)
|
|
207
|
+
file_name = stream.read(file_name_length * 2).decode("utf-16-be")
|
|
208
|
+
x, x_raw = _read_le_varint(stream)
|
|
209
|
+
last_modified_td = datetime.timedelta(microseconds=x)
|
|
210
|
+
last_modified = datetime.datetime(1601, 1, 1) + last_modified_td
|
|
211
|
+
return cls(blob_type, blob_number, mime_type, data_size, file_name,
|
|
212
|
+
last_modified, None)
|
|
213
|
+
else:
|
|
214
|
+
return cls(blob_type, blob_number, mime_type, data_size, None, None, None)
|
|
215
|
+
else:
|
|
216
|
+
raise NotImplementedError()
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
@dataclasses.dataclass(frozen=True)
|
|
220
|
+
class DatabaseId:
|
|
221
|
+
dbid_no: int
|
|
222
|
+
origin: str
|
|
223
|
+
name: str
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
class GlobalMetadata:
|
|
227
|
+
def __init__(self, raw_meta_dict: dict):
|
|
228
|
+
# TODO: more of these meta types if required
|
|
229
|
+
self.backing_store_schema_version = None
|
|
230
|
+
if raw_schema_version := raw_meta_dict.get("\x00\x00\x00\x00\x00"):
|
|
231
|
+
self.backing_store_schema_version = le_varint_from_bytes(raw_schema_version)
|
|
232
|
+
|
|
233
|
+
self.max_allocated_db_id = None
|
|
234
|
+
if raw_max_db_id := raw_meta_dict.get("\x00\x00\x00\x00\x01"):
|
|
235
|
+
self.max_allocated_db_id = le_varint_from_bytes(raw_max_db_id)
|
|
236
|
+
|
|
237
|
+
database_ids_raw = (raw_meta_dict[x] for x in raw_meta_dict
|
|
238
|
+
if x.startswith(b"\x00\x00\x00\x00\xc9"))
|
|
239
|
+
|
|
240
|
+
dbids = []
|
|
241
|
+
for dbid_rec in database_ids_raw:
|
|
242
|
+
with io.BytesIO(dbid_rec.key[5:]) as buff:
|
|
243
|
+
origin_length = read_le_varint(buff)
|
|
244
|
+
origin = buff.read(origin_length * 2).decode("utf-16-be")
|
|
245
|
+
db_name_length = read_le_varint(buff)
|
|
246
|
+
db_name = buff.read(db_name_length * 2).decode("utf-16-be")
|
|
247
|
+
|
|
248
|
+
db_id_no = decode_truncated_int(dbid_rec.value)
|
|
249
|
+
|
|
250
|
+
dbids.append(DatabaseId(db_id_no, origin, db_name))
|
|
251
|
+
|
|
252
|
+
self._db_ids = tuple(dbids)
|
|
253
|
+
self._db_ids_lookup = types.MappingProxyType({x.dbid_no: x for x in self._db_ids})
|
|
254
|
+
|
|
255
|
+
@property
|
|
256
|
+
def db_ids(self) -> tuple[DatabaseId, ...]:
|
|
257
|
+
return self._db_ids
|
|
258
|
+
|
|
259
|
+
@property
|
|
260
|
+
def db_ids_lookup(self) -> dict[int: DatabaseId]:
|
|
261
|
+
return self._db_ids_lookup
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
class DatabaseMetadataType(enum.IntEnum):
|
|
265
|
+
OriginName = 0 # String
|
|
266
|
+
DatabaseName = 1 # String
|
|
267
|
+
IdbVersionString = 2 # String (and obsolete)
|
|
268
|
+
MaximumObjectStoreId = 3 # Int
|
|
269
|
+
IdbVersion = 4 # Varint
|
|
270
|
+
BlobNumberGeneratorCurrentNumber = 5 # Varint
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
class DatabaseMetadata:
|
|
274
|
+
def __init__(self, raw_meta: dict):
|
|
275
|
+
self._metas = types.MappingProxyType(raw_meta)
|
|
276
|
+
|
|
277
|
+
def get_meta(self, db_id: int, meta_type: DatabaseMetadataType) -> typing.Optional[typing.Union[str, int]]:
|
|
278
|
+
record = self._metas.get((db_id, meta_type))
|
|
279
|
+
if not record:
|
|
280
|
+
return None
|
|
281
|
+
|
|
282
|
+
if meta_type == DatabaseMetadataType.MaximumObjectStoreId:
|
|
283
|
+
return decode_truncated_int(record.value)
|
|
284
|
+
|
|
285
|
+
# TODO
|
|
286
|
+
raise NotImplementedError()
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
class ObjectStoreMetadataType(enum.IntEnum):
|
|
290
|
+
StoreName = 0 # String
|
|
291
|
+
KeyPath = 1 # IDBKeyPath
|
|
292
|
+
AutoIncrementFlag = 2 # Bool
|
|
293
|
+
IsEvictable = 3 # Bool (and obsolete apparently)
|
|
294
|
+
LastVersionNumber = 4 # Int
|
|
295
|
+
MaximumAllocatedIndexId = 5 # Int
|
|
296
|
+
HasKeyPathFlag = 6 # Bool (and obsolete apparently)
|
|
297
|
+
KeygeneratorCurrentNumber = 7 # Int
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
class ObjectStoreMetadata:
|
|
301
|
+
# All metadata fields are prefaced by a 0x00 byte
|
|
302
|
+
def __init__(self, raw_meta: dict):
|
|
303
|
+
self._metas = types.MappingProxyType(raw_meta)
|
|
304
|
+
|
|
305
|
+
def get_meta(self, db_id: int, obj_store_id: int, meta_type: ObjectStoreMetadataType):
|
|
306
|
+
record = self._metas.get((db_id, obj_store_id, meta_type))
|
|
307
|
+
if not record:
|
|
308
|
+
return None
|
|
309
|
+
|
|
310
|
+
if meta_type == ObjectStoreMetadataType.StoreName:
|
|
311
|
+
return record.value.decode("utf-16-be")
|
|
312
|
+
|
|
313
|
+
# TODO
|
|
314
|
+
raise NotImplementedError()
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
@dataclasses.dataclass(frozen=True)
|
|
318
|
+
class BlinkTrailer:
|
|
319
|
+
# third_party/blink/renderer/bindings/core/v8/serialization/trailer_reader.h
|
|
320
|
+
offset: int
|
|
321
|
+
length: int
|
|
322
|
+
|
|
323
|
+
TRAILER_SIZE: typing.ClassVar[int] = 13
|
|
324
|
+
MIN_WIRE_FORMAT_VERSION_FOR_TRAILER: typing.ClassVar[int] = 21
|
|
325
|
+
|
|
326
|
+
@classmethod
|
|
327
|
+
def from_buffer(cls, buffer, trailer_offset: int):
|
|
328
|
+
tag, offset, length = struct.unpack(">cQI", buffer[trailer_offset: trailer_offset + BlinkTrailer.TRAILER_SIZE])
|
|
329
|
+
if tag != ccl_blink_value_deserializer.Constants.tag_kTrailerOffsetTag:
|
|
330
|
+
raise ValueError(
|
|
331
|
+
f"Trailer doesn't start with kTrailerOffsetTag "
|
|
332
|
+
f"(expected: 0x{ccl_blink_value_deserializer.Constants.tag_kTrailerOffsetTag.hex()}; "
|
|
333
|
+
f"got: 0x{tag.hex()}")
|
|
334
|
+
|
|
335
|
+
return BlinkTrailer(offset, length)
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
@dataclasses.dataclass(frozen=True)
|
|
339
|
+
class IndexedDbRecord:
|
|
340
|
+
owner: "IndexedDb"
|
|
341
|
+
db_id: int
|
|
342
|
+
obj_store_id: int
|
|
343
|
+
key: IdbKey
|
|
344
|
+
value: typing.Any
|
|
345
|
+
is_live: bool
|
|
346
|
+
ldb_seq_no: int
|
|
347
|
+
origin_file: os.PathLike
|
|
348
|
+
external_value_path: typing.Optional[str] = None
|
|
349
|
+
|
|
350
|
+
def resolve_blob_index(self, blob_index: ccl_blink_value_deserializer.BlobIndex) -> IndexedDBExternalObject:
|
|
351
|
+
"""Resolve a ccl_blink_value_deserializer.BlobIndex to its IndexedDBExternalObject
|
|
352
|
+
to get metadata (file name, timestamps, etc)"""
|
|
353
|
+
return self.owner.get_blob_info(self.db_id, self.obj_store_id, self.key.raw_key, blob_index.index_id)
|
|
354
|
+
|
|
355
|
+
def get_blob_stream(self, blob_index: ccl_blink_value_deserializer.BlobIndex) -> typing.BinaryIO:
|
|
356
|
+
"""Resolve a ccl_blink_value_deserializer.BlobIndex to a stream of its content"""
|
|
357
|
+
return self.owner.get_blob(self.db_id, self.obj_store_id, self.key.raw_key, blob_index.index_id)
|
|
358
|
+
|
|
359
|
+
@property
|
|
360
|
+
def database_name(self):
|
|
361
|
+
return self.owner.global_metadata.db_ids_lookup[self.db_id].name
|
|
362
|
+
|
|
363
|
+
@property
|
|
364
|
+
def database_origin(self):
|
|
365
|
+
return self.owner.global_metadata.db_ids_lookup[self.db_id].origin
|
|
366
|
+
|
|
367
|
+
@property
|
|
368
|
+
def object_store_name(self):
|
|
369
|
+
return self.owner.get_object_store_metadata(self.db_id, self.obj_store_id, ObjectStoreMetadataType.StoreName)
|
|
370
|
+
|
|
371
|
+
@property
|
|
372
|
+
def record_location(self) -> str:
|
|
373
|
+
return f"File: {pathlib.Path(*pathlib.Path(self.origin_file).parts[-2:])} Seq: {self.ldb_seq_no}"
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
class IndexedDb:
|
|
377
|
+
# This will be informative for a lot of the data below:
|
|
378
|
+
# https://github.com/chromium/chromium/blob/master/content/browser/indexed_db/docs/leveldb_coding_scheme.md
|
|
379
|
+
|
|
380
|
+
# Of note, the first byte of the key defines the length of the db_id, obj_store_id and index_id in bytes:
|
|
381
|
+
# 0b xxxyyyzz (x = db_id size - 1, y = obj_store size - 1, z = index_id - 1)
|
|
382
|
+
|
|
383
|
+
def __init__(self, leveldb_dir: os.PathLike, leveldb_blob_dir: os.PathLike = None):
|
|
384
|
+
self._db = ccl_leveldb.RawLevelDb(leveldb_dir)
|
|
385
|
+
self._blob_dir = leveldb_blob_dir
|
|
386
|
+
self.global_metadata: typing.Optional[GlobalMetadata] = None
|
|
387
|
+
self.database_metadata: typing.Optional[DatabaseMetadata] = None
|
|
388
|
+
self.object_store_meta: typing.Optional[ObjectStoreMetadata] = None
|
|
389
|
+
self._cache_records()
|
|
390
|
+
self._fetch_meta_data()
|
|
391
|
+
self._blob_lookup_cache = {}
|
|
392
|
+
|
|
393
|
+
def _cache_records(self):
|
|
394
|
+
self._fetched_records = []
|
|
395
|
+
# Fetch the records only once
|
|
396
|
+
for record in self._db.iterate_records_raw():
|
|
397
|
+
self._fetched_records.append(record)
|
|
398
|
+
|
|
399
|
+
def _fetch_meta_data(self):
|
|
400
|
+
global_metadata_raw = self._get_raw_global_metadata()
|
|
401
|
+
self.global_metadata = GlobalMetadata(global_metadata_raw)
|
|
402
|
+
database_metadata_raw = self._get_raw_database_metadata()
|
|
403
|
+
self.database_metadata = DatabaseMetadata(database_metadata_raw)
|
|
404
|
+
objectstore_metadata_raw = self._get_raw_object_store_metadata()
|
|
405
|
+
self.object_store_meta = ObjectStoreMetadata(objectstore_metadata_raw)
|
|
406
|
+
|
|
407
|
+
@staticmethod
|
|
408
|
+
def make_prefix(
|
|
409
|
+
db_id: int, obj_store_id: int, index_id: int, end: typing.Optional[typing.Sequence[int]]=None) -> bytes:
|
|
410
|
+
if end is None:
|
|
411
|
+
end = []
|
|
412
|
+
|
|
413
|
+
def count_bytes(val):
|
|
414
|
+
if val == 0:
|
|
415
|
+
return 1
|
|
416
|
+
i = 0
|
|
417
|
+
while val > 0:
|
|
418
|
+
i += 1
|
|
419
|
+
val = val >> 8
|
|
420
|
+
return i
|
|
421
|
+
|
|
422
|
+
def yield_le_bytes(val):
|
|
423
|
+
if val == 0:
|
|
424
|
+
yield 0
|
|
425
|
+
if val < 0:
|
|
426
|
+
raise ValueError
|
|
427
|
+
while val > 0:
|
|
428
|
+
yield val & 0xff
|
|
429
|
+
val = val >> 8
|
|
430
|
+
|
|
431
|
+
db_id_size = count_bytes(db_id)
|
|
432
|
+
obj_store_id_size = count_bytes(obj_store_id)
|
|
433
|
+
index_id_size = count_bytes(index_id)
|
|
434
|
+
|
|
435
|
+
if db_id_size > 8 or obj_store_id_size > 8 or index_id_size > 4:
|
|
436
|
+
raise ValueError("id sizes are too big")
|
|
437
|
+
|
|
438
|
+
byte_one = ((db_id_size - 1) << 5) | ((obj_store_id_size - 1) << 2) | (index_id_size - 1)
|
|
439
|
+
# print([byte_one, *yield_le_bytes(db_id), *yield_le_bytes(obj_store_id), *yield_le_bytes(index_id), *end])
|
|
440
|
+
return bytes([byte_one, *yield_le_bytes(db_id), *yield_le_bytes(obj_store_id), *yield_le_bytes(index_id), *end])
|
|
441
|
+
|
|
442
|
+
@staticmethod
|
|
443
|
+
def read_prefix(stream: typing.BinaryIO) -> tuple[int, int, int, int]:
|
|
444
|
+
"""
|
|
445
|
+
:param stream: file-like to read the prefix from
|
|
446
|
+
:return: a tuple of db_id, object_store_id, index_id, length of the prefix
|
|
447
|
+
"""
|
|
448
|
+
lengths_bytes = stream.read(1)
|
|
449
|
+
if not lengths_bytes:
|
|
450
|
+
raise ValueError("Couldn't get enough data when reading prefix length")
|
|
451
|
+
lengths = lengths_bytes[0]
|
|
452
|
+
db_id_size = ((lengths >> 5) & 0x07) + 1
|
|
453
|
+
object_store_size = ((lengths >> 2) & 0x07) + 1
|
|
454
|
+
index_size = (lengths & 0x03) + 1
|
|
455
|
+
|
|
456
|
+
db_id_raw = stream.read(db_id_size)
|
|
457
|
+
object_store_raw = stream.read(object_store_size)
|
|
458
|
+
index_raw = stream.read(index_size)
|
|
459
|
+
|
|
460
|
+
if (len(db_id_raw) != db_id_size or
|
|
461
|
+
len(object_store_raw) != object_store_size or
|
|
462
|
+
len(index_raw) != index_size):
|
|
463
|
+
raise ValueError("Couldn't read enough bytes for the prefix")
|
|
464
|
+
|
|
465
|
+
db_id = int.from_bytes(db_id_raw, "little")
|
|
466
|
+
object_store_id = int.from_bytes(object_store_raw, "little")
|
|
467
|
+
index_id = int.from_bytes(index_raw, "little")
|
|
468
|
+
|
|
469
|
+
return db_id, object_store_id, index_id, (db_id_size + object_store_size + index_size + 1)
|
|
470
|
+
|
|
471
|
+
def get_database_metadata(self, db_id: int, meta_type: DatabaseMetadataType):
|
|
472
|
+
return self.database_metadata.get_meta(db_id, meta_type)
|
|
473
|
+
|
|
474
|
+
def get_object_store_metadata(self, db_id: int, obj_store_id: int, meta_type: ObjectStoreMetadataType):
|
|
475
|
+
return self.object_store_meta.get_meta(db_id, obj_store_id, meta_type)
|
|
476
|
+
|
|
477
|
+
def _get_raw_global_metadata(self, live_only=True) -> typing.Dict[bytes, ccl_leveldb.Record]:
|
|
478
|
+
# Global metadata always has the prefix 0 0 0 0
|
|
479
|
+
if not live_only:
|
|
480
|
+
raise NotImplementedError("Deleted metadata not implemented yet")
|
|
481
|
+
meta = {}
|
|
482
|
+
for record in reversed(self._fetched_records):
|
|
483
|
+
if record.key.startswith(b"\x00\x00\x00\x00") and record.state == ccl_leveldb.KeyState.Live:
|
|
484
|
+
# we only want live keys and the newest version thereof (highest seq)
|
|
485
|
+
if record.key not in meta or meta[record.key].seq < record.seq:
|
|
486
|
+
meta[record.key] = record
|
|
487
|
+
|
|
488
|
+
return meta
|
|
489
|
+
|
|
490
|
+
def _get_raw_database_metadata(self, live_only=True):
|
|
491
|
+
if not live_only:
|
|
492
|
+
raise NotImplementedError("Deleted metadata not implemented yet")
|
|
493
|
+
|
|
494
|
+
db_meta = {}
|
|
495
|
+
|
|
496
|
+
for db_id in self.global_metadata.db_ids:
|
|
497
|
+
|
|
498
|
+
prefix = IndexedDb.make_prefix(db_id.dbid_no, 0, 0)
|
|
499
|
+
for record in reversed(self._fetched_records):
|
|
500
|
+
if record.key.startswith(prefix) and record.state == ccl_leveldb.KeyState.Live:
|
|
501
|
+
# we only want live keys and the newest version thereof (highest seq)
|
|
502
|
+
meta_type = record.key[len(prefix)]
|
|
503
|
+
old_version = db_meta.get((db_id.dbid_no, meta_type))
|
|
504
|
+
if old_version is None or old_version.seq < record.seq:
|
|
505
|
+
db_meta[(db_id.dbid_no, meta_type)] = record
|
|
506
|
+
|
|
507
|
+
return db_meta
|
|
508
|
+
|
|
509
|
+
def _get_raw_object_store_metadata(self, live_only=True):
|
|
510
|
+
if not live_only:
|
|
511
|
+
raise NotImplementedError("Deleted metadata not implemented yet")
|
|
512
|
+
|
|
513
|
+
os_meta = {}
|
|
514
|
+
|
|
515
|
+
for db_id in self.global_metadata.db_ids:
|
|
516
|
+
|
|
517
|
+
prefix = IndexedDb.make_prefix(db_id.dbid_no, 0, 0, [50])
|
|
518
|
+
|
|
519
|
+
for record in reversed(self._fetched_records):
|
|
520
|
+
if record.key.startswith(prefix) and record.state == ccl_leveldb.KeyState.Live:
|
|
521
|
+
# we only want live keys and the newest version thereof (highest seq)
|
|
522
|
+
objstore_id, varint_raw = _le_varint_from_bytes(record.key[len(prefix):])
|
|
523
|
+
meta_type = record.key[len(prefix) + len(varint_raw)]
|
|
524
|
+
|
|
525
|
+
old_version = os_meta.get((db_id.dbid_no, objstore_id, meta_type))
|
|
526
|
+
|
|
527
|
+
if old_version is None or old_version.seq < record.seq:
|
|
528
|
+
os_meta[(db_id.dbid_no, objstore_id, meta_type)] = record
|
|
529
|
+
|
|
530
|
+
return os_meta
|
|
531
|
+
|
|
532
|
+
def read_record_precursor(
|
|
533
|
+
self, key: IdbKey, db_id: int, store_id: int, buffer: bytes,
|
|
534
|
+
bad_deserializer_data_handler: typing.Callable[[IdbKey, bytes], typing.Any],
|
|
535
|
+
external_data_path: typing.Optional[str] = None):
|
|
536
|
+
val_idx = 0
|
|
537
|
+
trailer = None
|
|
538
|
+
blink_type_tag = buffer[val_idx]
|
|
539
|
+
if blink_type_tag != 0xff:
|
|
540
|
+
# TODO: probably don't want to fail hard here long term...
|
|
541
|
+
if bad_deserializer_data_handler is not None:
|
|
542
|
+
bad_deserializer_data_handler(key, buffer)
|
|
543
|
+
return None
|
|
544
|
+
else:
|
|
545
|
+
raise ValueError("Blink type tag not present")
|
|
546
|
+
|
|
547
|
+
val_idx += 1
|
|
548
|
+
|
|
549
|
+
blink_version, varint_raw = _le_varint_from_bytes(buffer[val_idx:])
|
|
550
|
+
|
|
551
|
+
val_idx += len(varint_raw)
|
|
552
|
+
|
|
553
|
+
# Peek the next byte to work out if the data is held externally:
|
|
554
|
+
# third_party/blink/renderer/modules/indexeddb/idb_value_wrapping.cc
|
|
555
|
+
if buffer[val_idx] == 0x01: # kReplaceWithBlob
|
|
556
|
+
val_idx += 1
|
|
557
|
+
externally_serialized_blob_size, varint_raw = _le_varint_from_bytes(buffer[val_idx:])
|
|
558
|
+
val_idx += len(varint_raw)
|
|
559
|
+
externally_serialized_blob_index, varint_raw = _le_varint_from_bytes(buffer[val_idx:])
|
|
560
|
+
val_idx += len(varint_raw)
|
|
561
|
+
|
|
562
|
+
try:
|
|
563
|
+
info = self.get_blob_info(db_id, store_id, key.raw_key, externally_serialized_blob_index)
|
|
564
|
+
except KeyError:
|
|
565
|
+
info = None
|
|
566
|
+
|
|
567
|
+
if info is not None:
|
|
568
|
+
data_path = pathlib.Path(str(db_id), f"{info.blob_number >> 8:02x}", f"{info.blob_number:x}")
|
|
569
|
+
try:
|
|
570
|
+
blob = self.get_blob(db_id, store_id, key.raw_key, externally_serialized_blob_index).read()
|
|
571
|
+
except FileNotFoundError:
|
|
572
|
+
if bad_deserializer_data_handler is not None:
|
|
573
|
+
bad_deserializer_data_handler(key, buffer)
|
|
574
|
+
return None
|
|
575
|
+
raise
|
|
576
|
+
|
|
577
|
+
return self.read_record_precursor(
|
|
578
|
+
key, db_id, store_id,
|
|
579
|
+
blob,
|
|
580
|
+
bad_deserializer_data_handler, str(data_path))
|
|
581
|
+
else:
|
|
582
|
+
return None
|
|
583
|
+
else:
|
|
584
|
+
if blink_version >= BlinkTrailer.MIN_WIRE_FORMAT_VERSION_FOR_TRAILER:
|
|
585
|
+
trailer = BlinkTrailer.from_buffer(buffer, val_idx) # TODO: do something with the trailer
|
|
586
|
+
val_idx += BlinkTrailer.TRAILER_SIZE
|
|
587
|
+
|
|
588
|
+
obj_raw = io.BytesIO(buffer[val_idx:])
|
|
589
|
+
|
|
590
|
+
return blink_version, obj_raw, trailer, external_data_path
|
|
591
|
+
|
|
592
|
+
def iterate_records(
|
|
593
|
+
self, db_id: int, store_id: int, *,
|
|
594
|
+
live_only=False, bad_deserializer_data_handler: typing.Callable[[IdbKey, bytes], typing.Any] = None):
|
|
595
|
+
blink_deserializer = ccl_blink_value_deserializer.BlinkV8Deserializer()
|
|
596
|
+
|
|
597
|
+
# goodness me this is a slow way of doing things
|
|
598
|
+
prefix = IndexedDb.make_prefix(db_id, store_id, 1)
|
|
599
|
+
|
|
600
|
+
for record in self._fetched_records:
|
|
601
|
+
if record.key.startswith(prefix):
|
|
602
|
+
key = IdbKey(record.key[len(prefix):])
|
|
603
|
+
if not record.value:
|
|
604
|
+
# empty values will obviously fail, returning None is probably better than dying.
|
|
605
|
+
yield IndexedDbRecord(self, db_id, store_id, key, None,
|
|
606
|
+
record.state == ccl_leveldb.KeyState.Live, record.seq, record.origin_file)
|
|
607
|
+
continue
|
|
608
|
+
value_version, varint_raw = _le_varint_from_bytes(record.value)
|
|
609
|
+
val_idx = len(varint_raw)
|
|
610
|
+
# read the blink envelope
|
|
611
|
+
precursor = self.read_record_precursor(
|
|
612
|
+
key, db_id, store_id, record.value[val_idx:], bad_deserializer_data_handler)
|
|
613
|
+
if precursor is None:
|
|
614
|
+
continue # only returns None on error, handled in the function if bad_deserializer_data_handler can
|
|
615
|
+
|
|
616
|
+
blink_version, obj_raw, trailer, external_path = precursor
|
|
617
|
+
|
|
618
|
+
try:
|
|
619
|
+
deserializer = ccl_v8_value_deserializer.Deserializer(
|
|
620
|
+
obj_raw, host_object_delegate=blink_deserializer.read)
|
|
621
|
+
value = deserializer.read()
|
|
622
|
+
except Exception:
|
|
623
|
+
if bad_deserializer_data_handler is not None:
|
|
624
|
+
bad_deserializer_data_handler(key, record.value)
|
|
625
|
+
continue
|
|
626
|
+
raise
|
|
627
|
+
yield IndexedDbRecord(self, db_id, store_id, key, value,
|
|
628
|
+
record.state == ccl_leveldb.KeyState.Live,
|
|
629
|
+
record.seq, record.origin_file, external_path)
|
|
630
|
+
|
|
631
|
+
def get_blob_info(self, db_id: int, store_id: int, raw_key: bytes, file_index: int) -> IndexedDBExternalObject:
|
|
632
|
+
# if db_id > 0x7f or store_id > 0x7f:
|
|
633
|
+
# raise NotImplementedError("there could be this many dbs, but I don't support it yet")
|
|
634
|
+
|
|
635
|
+
if result := self._blob_lookup_cache.get((db_id, store_id, raw_key, file_index)):
|
|
636
|
+
return result
|
|
637
|
+
|
|
638
|
+
# goodness me this is a slow way of doing things,
|
|
639
|
+
# TODO: we should at least cache along the way to our record
|
|
640
|
+
# prefix = bytes([0, db_id, store_id, 3])
|
|
641
|
+
prefix = IndexedDb.make_prefix(db_id, store_id, 3)
|
|
642
|
+
for record in self._fetched_records:
|
|
643
|
+
if record.user_key.startswith(prefix):
|
|
644
|
+
this_raw_key = record.user_key[len(prefix):]
|
|
645
|
+
buff = io.BytesIO(record.value)
|
|
646
|
+
idx = 0
|
|
647
|
+
while buff.tell() < len(record.value):
|
|
648
|
+
blob_info = IndexedDBExternalObject.from_stream(buff)
|
|
649
|
+
self._blob_lookup_cache[(db_id, store_id, this_raw_key, idx)] = blob_info
|
|
650
|
+
idx += 1
|
|
651
|
+
# if this_raw_key == raw_key:
|
|
652
|
+
# break
|
|
653
|
+
|
|
654
|
+
if result := self._blob_lookup_cache.get((db_id, store_id, raw_key, file_index)):
|
|
655
|
+
return result
|
|
656
|
+
else:
|
|
657
|
+
raise KeyError((db_id, store_id, raw_key, file_index))
|
|
658
|
+
|
|
659
|
+
def get_blob(self, db_id: int, store_id: int, raw_key: bytes, file_index: int) -> typing.BinaryIO:
|
|
660
|
+
# Some detail here: https://github.com/chromium/chromium/blob/master/content/browser/indexed_db/docs/README.md
|
|
661
|
+
if self._blob_dir is None:
|
|
662
|
+
raise ValueError("Can't resolve blob if blob dir is not set")
|
|
663
|
+
info = self.get_blob_info(db_id, store_id, raw_key, file_index)
|
|
664
|
+
|
|
665
|
+
# path will be: origin.blob/database id/top 16 bits of blob number with two digits/blob number
|
|
666
|
+
# TODO: check if this is still the case on non-windows systems
|
|
667
|
+
path = pathlib.Path(self._blob_dir, f"{db_id:x}", f"{info.blob_number >> 8:02x}", f"{info.blob_number:x}")
|
|
668
|
+
|
|
669
|
+
if path.exists():
|
|
670
|
+
return path.open("rb")
|
|
671
|
+
|
|
672
|
+
raise FileNotFoundError(path)
|
|
673
|
+
|
|
674
|
+
def get_undo_task_scopes(self):
|
|
675
|
+
# https://github.com/chromium/chromium/blob/master/components/services/storage/indexed_db/scopes/leveldb_scopes_coding.cc
|
|
676
|
+
|
|
677
|
+
# Prefix will be 00 00 00 00 32 (00|01|02) (varint of scope number) 00
|
|
678
|
+
# 00 00 00 00 = Global metadata
|
|
679
|
+
# 32 = kScopesPrefixByte from indexed_db_leveldb_coding.cc
|
|
680
|
+
# (00|01|02) = one of: kGlobalMetadataByte, kScopesMetadataByte or kLogByte from leveldb_scopes_coding.h
|
|
681
|
+
# (varint of scope)
|
|
682
|
+
# 00 = kUndoTasksByte from leveldb_scopes_coding.h
|
|
683
|
+
|
|
684
|
+
# This is a slow way of doing this:
|
|
685
|
+
prefix = bytes.fromhex("00 00 00 00 32")
|
|
686
|
+
for record in self._fetched_records:
|
|
687
|
+
if record.state != ccl_leveldb.KeyState.Live:
|
|
688
|
+
continue
|
|
689
|
+
if record.user_key.startswith(prefix):
|
|
690
|
+
# process the key first as they define what we'll do later
|
|
691
|
+
o = len(prefix)
|
|
692
|
+
metadata_byte = record.user_key[o]
|
|
693
|
+
assert metadata_byte in (0, 1, 2) # TODO: replace with real exception
|
|
694
|
+
|
|
695
|
+
o += 1
|
|
696
|
+
|
|
697
|
+
if metadata_byte == 0: # global meta
|
|
698
|
+
# print(f"Global metadata:\t{record.user_key.hex(' ')}")
|
|
699
|
+
continue # Don't currently think I need this to do the work
|
|
700
|
+
elif metadata_byte == 1: # scope meta
|
|
701
|
+
# print(f"Scope metadata:\t{record.user_key.hex(' ')}")
|
|
702
|
+
# scope_number, varint_bytes = _le_varint_from_bytes(record.user_key)
|
|
703
|
+
# o += len(varint_bytes)
|
|
704
|
+
continue # Don't currently think I need this to do the work
|
|
705
|
+
elif metadata_byte == 2: # log
|
|
706
|
+
scope_number, varint_bytes = _le_varint_from_bytes(record.user_key)
|
|
707
|
+
o += len(varint_bytes)
|
|
708
|
+
undo_byte = record.key[o]
|
|
709
|
+
if undo_byte != 0:
|
|
710
|
+
continue
|
|
711
|
+
o += 1
|
|
712
|
+
# print(f"Log\t{record.user_key.hex(' ')}")
|
|
713
|
+
undo_sequence_number, = struct.unpack(">q", record.user_key[o:o + 8])
|
|
714
|
+
|
|
715
|
+
# Value should be a LevelDBScopesUndoTask protobuf
|
|
716
|
+
# (indexed_db_components\indexed_db\scopes\scopes_metadata.proto).
|
|
717
|
+
# We're looking for a "Put" protobuf (first and only tag should be a Message numbered 1, with two
|
|
718
|
+
# bytes values numbered 1 and 2 which are the original key and value respectively.
|
|
719
|
+
# To reduce the need for dependencies, as they are so simple, the protobuf can be decoded "manually"
|
|
720
|
+
with io.BytesIO(record.value) as value_stream:
|
|
721
|
+
root_tag_raw = read_le_varint(value_stream)
|
|
722
|
+
root_number = root_tag_raw >> 3
|
|
723
|
+
if root_tag_raw & 0x07 != 2 or root_number != 1:
|
|
724
|
+
assert root_number in (2, 3) # TODO: remove?
|
|
725
|
+
continue # I don't think I need to raise an exception here?
|
|
726
|
+
data_length = read_le_varint(value_stream)
|
|
727
|
+
inner_value_bytes = value_stream.read(data_length)
|
|
728
|
+
if len(inner_value_bytes) != data_length:
|
|
729
|
+
raise ValueError("Couldn't get all data when reading the LevelDBScopesUndoTask")
|
|
730
|
+
|
|
731
|
+
record_key_raw = None
|
|
732
|
+
record_value_raw = None
|
|
733
|
+
with io.BytesIO(inner_value_bytes) as inner_value_stream:
|
|
734
|
+
while inner_value_stream.tell() < len(inner_value_bytes) and (
|
|
735
|
+
record_key_raw is None or record_value_raw is None):
|
|
736
|
+
tag_raw = read_le_varint(inner_value_stream)
|
|
737
|
+
assert tag_raw & 0x07 == 2
|
|
738
|
+
tag_number = tag_raw >> 3
|
|
739
|
+
data_length = read_le_varint(inner_value_stream)
|
|
740
|
+
data = inner_value_stream.read(data_length)
|
|
741
|
+
if len(data) != data_length:
|
|
742
|
+
raise ValueError("Couldn't get enough from the protobuf in LevelDBScopesUndoTask")
|
|
743
|
+
if tag_number == 1:
|
|
744
|
+
record_key_raw = data
|
|
745
|
+
elif tag_number == 2:
|
|
746
|
+
record_value_raw = data
|
|
747
|
+
else:
|
|
748
|
+
raise ValueError("Unexpected message in LevelDBScopesUndoTask")
|
|
749
|
+
|
|
750
|
+
if not record_value_raw:
|
|
751
|
+
continue # I don't think we need to go further here
|
|
752
|
+
|
|
753
|
+
with io.BytesIO(record_key_raw) as record_key_stream:
|
|
754
|
+
db_id, object_store, index_id, length = IndexedDb.read_prefix(record_key_stream)
|
|
755
|
+
if db_id < 1 or object_store < 1 or index_id < 1:
|
|
756
|
+
continue # only work with indexeddb record records
|
|
757
|
+
|
|
758
|
+
key = IdbKey(record_key_stream.read())
|
|
759
|
+
|
|
760
|
+
yield key, record_value_raw
|
|
761
|
+
|
|
762
|
+
def close(self):
|
|
763
|
+
self._db.close()
|
|
764
|
+
|
|
765
|
+
@property
|
|
766
|
+
def database_path(self):
|
|
767
|
+
return self._db.in_dir_path
|
|
768
|
+
|
|
769
|
+
def __enter__(self) -> "IndexedDb":
|
|
770
|
+
return self
|
|
771
|
+
|
|
772
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
773
|
+
self.close()
|
|
774
|
+
|
|
775
|
+
|
|
776
|
+
class WrappedObjectStore:
|
|
777
|
+
"""
|
|
778
|
+
A wrapper class around a "raw" IndexedDb which simplifies accessing records related to an object store. Usually only
|
|
779
|
+
created by a WrappedDatabase.
|
|
780
|
+
"""
|
|
781
|
+
def __init__(self, raw_db: IndexedDb, dbid_no: int, obj_store_id: int):
|
|
782
|
+
self._raw_db = raw_db
|
|
783
|
+
self._dbid_no = dbid_no
|
|
784
|
+
self._obj_store_id = obj_store_id
|
|
785
|
+
|
|
786
|
+
@property
|
|
787
|
+
def object_store_id(self) -> int:
|
|
788
|
+
return self._obj_store_id
|
|
789
|
+
|
|
790
|
+
@property
|
|
791
|
+
def name(self) -> str:
|
|
792
|
+
return self._raw_db.get_object_store_metadata(
|
|
793
|
+
self._dbid_no, self._obj_store_id, ObjectStoreMetadataType.StoreName)
|
|
794
|
+
|
|
795
|
+
@staticmethod
|
|
796
|
+
def _log_error(key: IdbKey, data: bytes):
|
|
797
|
+
sys.stderr.write(f"ERROR decoding key: {key}\n")
|
|
798
|
+
|
|
799
|
+
def get_blob(self, raw_key: bytes, file_index: int) -> typing.BinaryIO:
|
|
800
|
+
"""
|
|
801
|
+
Deprecated: use IndexedDbRecord.get_blob_stream
|
|
802
|
+
|
|
803
|
+
:param raw_key: raw key of the record from which the blob originates
|
|
804
|
+
:param file_index: the file/blob index from a ccl_blink_value_deserializer.BlobIndex
|
|
805
|
+
:return: a file-like object of the blob
|
|
806
|
+
"""
|
|
807
|
+
|
|
808
|
+
return self._raw_db.get_blob(self._dbid_no, self.object_store_id, raw_key, file_index)
|
|
809
|
+
|
|
810
|
+
# def __iter__(self):
|
|
811
|
+
# yield from self._raw_db.iterate_records(self._dbid_no, self._obj_store_id)
|
|
812
|
+
|
|
813
|
+
def iterate_records(
|
|
814
|
+
self, *, live_only=False, errors_to_stdout=False,
|
|
815
|
+
bad_deserializer_data_handler: typing.Callable[[IdbKey, bytes], typing.Any] = None):
|
|
816
|
+
|
|
817
|
+
def _handler(key, record):
|
|
818
|
+
if bad_deserializer_data_handler is not None:
|
|
819
|
+
bad_deserializer_data_handler(key, record)
|
|
820
|
+
if errors_to_stdout:
|
|
821
|
+
WrappedObjectStore._log_error(key, record)
|
|
822
|
+
|
|
823
|
+
handler = _handler if errors_to_stdout or bad_deserializer_data_handler is not None else None
|
|
824
|
+
|
|
825
|
+
yield from self._raw_db.iterate_records(
|
|
826
|
+
self._dbid_no, self._obj_store_id, live_only=live_only,
|
|
827
|
+
bad_deserializer_data_handler=handler)
|
|
828
|
+
|
|
829
|
+
def __repr__(self):
|
|
830
|
+
return f"<WrappedObjectStore: object_store_id={self.object_store_id}; name={self.name}>"
|
|
831
|
+
|
|
832
|
+
|
|
833
|
+
class WrappedDatabase:
|
|
834
|
+
"""
|
|
835
|
+
A wrapper class around the raw "IndexedDb" class which simplifies access to a Database in the IndexedDb. Usually
|
|
836
|
+
only created by WrappedIndexedDb.
|
|
837
|
+
"""
|
|
838
|
+
def __init__(self, raw_db: IndexedDb, dbid: DatabaseId):
|
|
839
|
+
self._raw_db = raw_db
|
|
840
|
+
self._dbid = dbid
|
|
841
|
+
|
|
842
|
+
names = []
|
|
843
|
+
for obj_store_id in range(1, self.object_store_count + 1):
|
|
844
|
+
names.append(self._raw_db.get_object_store_metadata(
|
|
845
|
+
self.db_number, obj_store_id, ObjectStoreMetadataType.StoreName))
|
|
846
|
+
self._obj_store_names = tuple(names)
|
|
847
|
+
# pre-compile object store wrappers as there's little overhead
|
|
848
|
+
self._obj_stores = tuple(
|
|
849
|
+
WrappedObjectStore(
|
|
850
|
+
self._raw_db, self.db_number, i) for i in range(1, self.object_store_count + 1))
|
|
851
|
+
|
|
852
|
+
@property
|
|
853
|
+
def name(self) -> str:
|
|
854
|
+
"""
|
|
855
|
+
:return: the name of this WrappedDatabase
|
|
856
|
+
"""
|
|
857
|
+
return self._dbid.name
|
|
858
|
+
|
|
859
|
+
@property
|
|
860
|
+
def origin(self) -> str:
|
|
861
|
+
"""
|
|
862
|
+
:return: the origin (host name) for this WrappedDatabase
|
|
863
|
+
"""
|
|
864
|
+
return self._dbid.origin
|
|
865
|
+
|
|
866
|
+
@property
|
|
867
|
+
def db_number(self) -> int:
|
|
868
|
+
"""
|
|
869
|
+
:return: the numerical ID assigned to this WrappedDatabase
|
|
870
|
+
"""
|
|
871
|
+
return self._dbid.dbid_no
|
|
872
|
+
|
|
873
|
+
@property
|
|
874
|
+
def object_store_count(self) -> int:
|
|
875
|
+
"""
|
|
876
|
+
:return: the "MaximumObjectStoreId" value fot this database; NB this may not be the *actual* number of object
|
|
877
|
+
stores which can be read - it is possible that some object stores may be deleted. Use len() to check the
|
|
878
|
+
number of object stores you can actually access
|
|
879
|
+
"""
|
|
880
|
+
# NB obj store ids are enumerated from 1.
|
|
881
|
+
return self._raw_db.get_database_metadata(
|
|
882
|
+
self.db_number,
|
|
883
|
+
DatabaseMetadataType.MaximumObjectStoreId) or 0 # returns None if there are none.
|
|
884
|
+
|
|
885
|
+
@property
|
|
886
|
+
def object_store_names(self) -> typing.Iterable[str]:
|
|
887
|
+
"""
|
|
888
|
+
:return: yields the names of the object stores in this WrappedDatabase
|
|
889
|
+
"""
|
|
890
|
+
yield from self._obj_store_names
|
|
891
|
+
|
|
892
|
+
def get_object_store_by_id(self, obj_store_id: int) -> WrappedObjectStore:
|
|
893
|
+
"""
|
|
894
|
+
:param obj_store_id: the numerical ID for an object store in this WrappedDatabase
|
|
895
|
+
:return: the WrappedObjectStore with the ID provided
|
|
896
|
+
"""
|
|
897
|
+
if obj_store_id > 0 and obj_store_id <= self.object_store_count:
|
|
898
|
+
return self._obj_stores[obj_store_id - 1]
|
|
899
|
+
raise ValueError("obj_store_id must be greater than zero and less or equal to object_store_count "
|
|
900
|
+
"NB object stores are enumerated from 1 - there is no store with id 0")
|
|
901
|
+
|
|
902
|
+
def get_object_store_by_name(self, name: str) -> WrappedObjectStore:
|
|
903
|
+
"""
|
|
904
|
+
:param name: the name of an object store in this WrappedDatabase
|
|
905
|
+
:return: the WrappedObjectStore with the name provided
|
|
906
|
+
"""
|
|
907
|
+
if name in self:
|
|
908
|
+
return self.get_object_store_by_id(self._obj_store_names.index(name) + 1)
|
|
909
|
+
raise KeyError(f"{name} is not an object store in this database")
|
|
910
|
+
|
|
911
|
+
def __iter__(self) -> typing.Iterable[WrappedObjectStore]:
|
|
912
|
+
"""
|
|
913
|
+
:return: yields the object stores in this WrappedDatabase
|
|
914
|
+
"""
|
|
915
|
+
yield from self._obj_stores
|
|
916
|
+
|
|
917
|
+
def __len__(self) -> int:
|
|
918
|
+
"""
|
|
919
|
+
:return: the number of object stores accessible in this WrappedDatabase
|
|
920
|
+
"""
|
|
921
|
+
return len(self._obj_stores)
|
|
922
|
+
|
|
923
|
+
def __contains__(self, item: str) -> bool:
|
|
924
|
+
"""
|
|
925
|
+
:param item: the name of an object store in this WrappedDatabase
|
|
926
|
+
:return: True if the name provided matches one of the Object stores in this WrappedDatabase
|
|
927
|
+
"""
|
|
928
|
+
return item in self._obj_store_names
|
|
929
|
+
|
|
930
|
+
def __getitem__(self, item: typing.Union[int, str]) -> WrappedObjectStore:
|
|
931
|
+
"""
|
|
932
|
+
:param item: either the numerical ID of an object store (as an int) or the name of an object store in this
|
|
933
|
+
WrappedDatabase
|
|
934
|
+
:return:
|
|
935
|
+
"""
|
|
936
|
+
if isinstance(item, int):
|
|
937
|
+
return self.get_object_store_by_id(item)
|
|
938
|
+
elif isinstance(item, str):
|
|
939
|
+
return self.get_object_store_by_name(item)
|
|
940
|
+
raise TypeError("Key can only be str (name) or int (id number)")
|
|
941
|
+
|
|
942
|
+
def __repr__(self):
|
|
943
|
+
return f"<WrappedDatabase: id={self.db_number}; name={self.name}; origin={self.origin}>"
|
|
944
|
+
|
|
945
|
+
|
|
946
|
+
class WrappedIndexDB:
|
|
947
|
+
"""
|
|
948
|
+
A wrapper object around the "raw" IndexedDb class. This should be used in most cases as the code required to use it
|
|
949
|
+
is simpler and more pythonic.
|
|
950
|
+
"""
|
|
951
|
+
def __init__(self, leveldb_dir: os.PathLike, leveldb_blob_dir: os.PathLike = None):
|
|
952
|
+
self._raw_db = IndexedDb(leveldb_dir, leveldb_blob_dir)
|
|
953
|
+
self._multiple_origins = len(set(x.origin for x in self._raw_db.global_metadata.db_ids)) > 1
|
|
954
|
+
|
|
955
|
+
self._db_number_lookup = {
|
|
956
|
+
x.dbid_no: WrappedDatabase(self._raw_db, x)
|
|
957
|
+
for x in self._raw_db.global_metadata.db_ids}
|
|
958
|
+
# set origin to 0 if there's only 1, and we'll ignore it in all lookups
|
|
959
|
+
self._db_name_lookup = {
|
|
960
|
+
(x.name, x.origin if self.has_multiple_origins else 0): x
|
|
961
|
+
for x in self._db_number_lookup.values()}
|
|
962
|
+
|
|
963
|
+
def close(self):
|
|
964
|
+
self._raw_db.close()
|
|
965
|
+
|
|
966
|
+
@property
|
|
967
|
+
def database_count(self) -> int:
|
|
968
|
+
"""
|
|
969
|
+
:return: The number of databases in this IndexedDB
|
|
970
|
+
"""
|
|
971
|
+
return len(self._db_number_lookup)
|
|
972
|
+
|
|
973
|
+
@property
|
|
974
|
+
def database_ids(self) -> typing.Iterable[DatabaseId]:
|
|
975
|
+
"""
|
|
976
|
+
:return: yields DatabaseId objects which define the databases in this IndexedDb
|
|
977
|
+
"""
|
|
978
|
+
yield from self._raw_db.global_metadata.db_ids
|
|
979
|
+
|
|
980
|
+
@property
|
|
981
|
+
def has_multiple_origins(self) -> bool:
|
|
982
|
+
return self._multiple_origins
|
|
983
|
+
|
|
984
|
+
def __len__(self):
|
|
985
|
+
"""
|
|
986
|
+
:return: the number of databases in this IndexedDb
|
|
987
|
+
"""
|
|
988
|
+
len(self._db_number_lookup)
|
|
989
|
+
|
|
990
|
+
def __contains__(self, item: typing.Union[str, int, tuple[str, str]]):
|
|
991
|
+
"""
|
|
992
|
+
:param item: either a database id number, the name of a database (as a string), or (if the database has multiple
|
|
993
|
+
origins), a tuple of database name and origin
|
|
994
|
+
:return: True if this IndexedDb contains the referenced database identifier
|
|
995
|
+
"""
|
|
996
|
+
if isinstance(item, str):
|
|
997
|
+
if self.has_multiple_origins:
|
|
998
|
+
raise ValueError(
|
|
999
|
+
"Database contains multiple origins, lookups must be provided as a tuple of (name, origin)")
|
|
1000
|
+
return (item, 0) in self._db_name_lookup
|
|
1001
|
+
elif isinstance(item, tuple) and len(item) == 2:
|
|
1002
|
+
name, origin = item
|
|
1003
|
+
if not self.has_multiple_origins:
|
|
1004
|
+
origin = 0 # origin ignored if not needed
|
|
1005
|
+
return (name, origin) in self._db_name_lookup
|
|
1006
|
+
elif isinstance(item, int):
|
|
1007
|
+
return item in self._db_number_lookup
|
|
1008
|
+
elif isinstance(item, DatabaseId):
|
|
1009
|
+
hit = self._db_number_lookup.get(item.dbid_no)
|
|
1010
|
+
return hit is not None and hit.name == item.name and hit.origin == item.origin
|
|
1011
|
+
else:
|
|
1012
|
+
raise TypeError("keys must be provided as a tuple of (name, origin) or a str (if only single origin) or int")
|
|
1013
|
+
|
|
1014
|
+
def __getitem__(self, item: typing.Union[DatabaseId, int, str, typing.Tuple[str, str]]) -> WrappedDatabase:
|
|
1015
|
+
"""
|
|
1016
|
+
:param item: either a DatabaseID object database id number, the name of a database (as a string), or
|
|
1017
|
+
(if the database has multiple origins), a tuple of database name and origin
|
|
1018
|
+
:return: the WrappedDatabase referenced by the id in item
|
|
1019
|
+
"""
|
|
1020
|
+
if isinstance(item, DatabaseId):
|
|
1021
|
+
if item.dbid_no in self._db_number_lookup:
|
|
1022
|
+
result = self._db_number_lookup[item.dbid_no]
|
|
1023
|
+
if result.name == item.name and result.origin == item.origin:
|
|
1024
|
+
return result
|
|
1025
|
+
else:
|
|
1026
|
+
raise KeyError(item)
|
|
1027
|
+
else:
|
|
1028
|
+
raise KeyError(item)
|
|
1029
|
+
if isinstance(item, int):
|
|
1030
|
+
if item in self._db_number_lookup:
|
|
1031
|
+
return self._db_number_lookup[item]
|
|
1032
|
+
else:
|
|
1033
|
+
raise KeyError(item)
|
|
1034
|
+
elif isinstance(item, str):
|
|
1035
|
+
if self.has_multiple_origins:
|
|
1036
|
+
raise ValueError(
|
|
1037
|
+
"Database contains multiple origins, indexes must be provided as a tuple of (name, origin)")
|
|
1038
|
+
if item in self:
|
|
1039
|
+
return self._db_name_lookup[item, 0]
|
|
1040
|
+
else:
|
|
1041
|
+
raise KeyError(item)
|
|
1042
|
+
elif isinstance(item, tuple) and len(item) == 2:
|
|
1043
|
+
name, origin = item
|
|
1044
|
+
if not self.has_multiple_origins:
|
|
1045
|
+
origin = 0 # origin ignored if not needed
|
|
1046
|
+
if (name, origin) in self:
|
|
1047
|
+
return self._db_name_lookup[name, origin]
|
|
1048
|
+
else:
|
|
1049
|
+
raise KeyError(item)
|
|
1050
|
+
|
|
1051
|
+
raise TypeError("Lookups must be one of int, str or tuple of name and origin")
|
|
1052
|
+
|
|
1053
|
+
def __repr__(self):
|
|
1054
|
+
return f"<WrappedIndexDB: {self._raw_db.database_path}>"
|
|
1055
|
+
|
|
1056
|
+
def __enter__(self) -> "WrappedIndexDB":
|
|
1057
|
+
return self
|
|
1058
|
+
|
|
1059
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
1060
|
+
self.close()
|