linear-mcp-fast 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ccl_chromium_reader/__init__.py +2 -0
- ccl_chromium_reader/ccl_chromium_cache.py +1335 -0
- ccl_chromium_reader/ccl_chromium_filesystem.py +302 -0
- ccl_chromium_reader/ccl_chromium_history.py +357 -0
- ccl_chromium_reader/ccl_chromium_indexeddb.py +1060 -0
- ccl_chromium_reader/ccl_chromium_localstorage.py +454 -0
- ccl_chromium_reader/ccl_chromium_notifications.py +268 -0
- ccl_chromium_reader/ccl_chromium_profile_folder.py +568 -0
- ccl_chromium_reader/ccl_chromium_sessionstorage.py +368 -0
- ccl_chromium_reader/ccl_chromium_snss2.py +332 -0
- ccl_chromium_reader/ccl_shared_proto_db_downloads.py +189 -0
- ccl_chromium_reader/common.py +19 -0
- ccl_chromium_reader/download_common.py +78 -0
- ccl_chromium_reader/profile_folder_protocols.py +276 -0
- ccl_chromium_reader/serialization_formats/__init__.py +0 -0
- ccl_chromium_reader/serialization_formats/ccl_blink_value_deserializer.py +401 -0
- ccl_chromium_reader/serialization_formats/ccl_easy_chromium_pickle.py +133 -0
- ccl_chromium_reader/serialization_formats/ccl_protobuff.py +276 -0
- ccl_chromium_reader/serialization_formats/ccl_v8_value_deserializer.py +627 -0
- ccl_chromium_reader/storage_formats/__init__.py +0 -0
- ccl_chromium_reader/storage_formats/ccl_leveldb.py +582 -0
- ccl_simplesnappy/__init__.py +1 -0
- ccl_simplesnappy/ccl_simplesnappy.py +306 -0
- linear_mcp_fast/__init__.py +8 -0
- linear_mcp_fast/__main__.py +6 -0
- linear_mcp_fast/reader.py +433 -0
- linear_mcp_fast/server.py +367 -0
- linear_mcp_fast/store_detector.py +117 -0
- linear_mcp_fast-0.1.0.dist-info/METADATA +160 -0
- linear_mcp_fast-0.1.0.dist-info/RECORD +39 -0
- linear_mcp_fast-0.1.0.dist-info/WHEEL +5 -0
- linear_mcp_fast-0.1.0.dist-info/entry_points.txt +2 -0
- linear_mcp_fast-0.1.0.dist-info/top_level.txt +4 -0
- tools_and_utilities/Chromium_dump_local_storage.py +111 -0
- tools_and_utilities/Chromium_dump_session_storage.py +92 -0
- tools_and_utilities/benchmark.py +35 -0
- tools_and_utilities/ccl_chrome_audit.py +651 -0
- tools_and_utilities/dump_indexeddb_details.py +59 -0
- tools_and_utilities/dump_leveldb.py +53 -0
|
@@ -0,0 +1,568 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2024, CCL Forensics
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
4
|
+
this software and associated documentation files (the "Software"), to deal in
|
|
5
|
+
the Software without restriction, including without limitation the rights to
|
|
6
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
7
|
+
of the Software, and to permit persons to whom the Software is furnished to do
|
|
8
|
+
so, subject to the following conditions:
|
|
9
|
+
|
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
|
11
|
+
copies or substantial portions of the Software.
|
|
12
|
+
|
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
19
|
+
SOFTWARE.
|
|
20
|
+
"""
|
|
21
|
+
import dataclasses
|
|
22
|
+
import datetime
|
|
23
|
+
import typing
|
|
24
|
+
import pathlib
|
|
25
|
+
import collections.abc as col_abc
|
|
26
|
+
|
|
27
|
+
import gzip
|
|
28
|
+
import zlib
|
|
29
|
+
import brotli
|
|
30
|
+
|
|
31
|
+
from . import ccl_chromium_localstorage
|
|
32
|
+
from . import ccl_chromium_sessionstorage
|
|
33
|
+
from . import ccl_chromium_indexeddb
|
|
34
|
+
from . import ccl_chromium_history
|
|
35
|
+
from . import ccl_chromium_cache
|
|
36
|
+
from . import ccl_shared_proto_db_downloads
|
|
37
|
+
|
|
38
|
+
from .common import KeySearch, is_keysearch_hit
|
|
39
|
+
|
|
40
|
+
__version__ = "0.4.1"
|
|
41
|
+
__description__ = "Module to consolidate and simplify access to data stores in the chrom(e|ium) profile folder"
|
|
42
|
+
__contact__ = "Alex Caithness"
|
|
43
|
+
|
|
44
|
+
# TODO: code currently assumes that lazy-loaded stores are present - they might not be, need some kind of guard object?
|
|
45
|
+
# TODO: paths are currently based around an assumption of a desktop layout. Mobile (and MacOS) will differ (e.g., the
|
|
46
|
+
# cache location).
|
|
47
|
+
|
|
48
|
+
SESSION_STORAGE_FOLDER_PATH = pathlib.Path("Session Storage")
|
|
49
|
+
LOCAL_STORAGE_FOLDER_PATH = pathlib.Path("Local Storage", "leveldb")
|
|
50
|
+
INDEXEDDB_FOLDER_PATH = pathlib.Path("IndexedDB")
|
|
51
|
+
HISTORY_DB_PATH = pathlib.Path("History")
|
|
52
|
+
CACHE_PATH = pathlib.Path("Cache", "Cache_Data")
|
|
53
|
+
SHARED_PROTO_DB_FOLDER_PATH = pathlib.Path("shared_proto_db")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclasses.dataclass(frozen=True, repr=False)
|
|
57
|
+
class CacheResult:
|
|
58
|
+
key: ccl_chromium_cache.CacheKey = dataclasses.field(repr=True)
|
|
59
|
+
metadata: ccl_chromium_cache.CachedMetadata
|
|
60
|
+
data: bytes
|
|
61
|
+
metadata_location: ccl_chromium_cache.CacheFileLocation
|
|
62
|
+
data_location: ccl_chromium_cache.CacheFileLocation
|
|
63
|
+
was_decompressed: bool
|
|
64
|
+
duplicate_key_index: int
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class ChromiumProfileFolder:
|
|
68
|
+
"""
|
|
69
|
+
A class representing a Chrom(e|ium) profile folder with programmatic access to various different data stores.
|
|
70
|
+
Where appropriate, resources are loaded on demand.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(self, path: pathlib.Path, *, cache_folder: typing.Optional[pathlib.Path]=None):
|
|
74
|
+
"""
|
|
75
|
+
Constructor
|
|
76
|
+
|
|
77
|
+
:param path: Path to the profile folder (usually named Default, Profile 1, Profile 2, etc.)
|
|
78
|
+
:param cache_folder: optionally a path to a cache folder, for platforms (such as Android) which
|
|
79
|
+
place the cache data outside the profile folder.
|
|
80
|
+
"""
|
|
81
|
+
if not path.is_dir():
|
|
82
|
+
raise NotADirectoryError(f"Could not find the folder: {path}")
|
|
83
|
+
|
|
84
|
+
self._path = path
|
|
85
|
+
|
|
86
|
+
if cache_folder is not None and not cache_folder.is_dir():
|
|
87
|
+
raise NotADirectoryError(f"Could not find the folder: {cache_folder}")
|
|
88
|
+
|
|
89
|
+
self._external_cache_folder = cache_folder
|
|
90
|
+
|
|
91
|
+
# Data stores are populated lazily where appropriate
|
|
92
|
+
# Webstorage
|
|
93
|
+
self._local_storage: typing.Optional[ccl_chromium_localstorage.LocalStoreDb] = None
|
|
94
|
+
self._session_storage: typing.Optional[ccl_chromium_sessionstorage.SessionStoreDb] = None
|
|
95
|
+
|
|
96
|
+
# IndexedDb
|
|
97
|
+
# Dictionary which when first populated will initially contain the domains as keys with None as the value for
|
|
98
|
+
# each. This will be initially populated on demand.
|
|
99
|
+
self._indexeddb_databases: typing.Optional[dict[str, typing.Optional[ccl_chromium_indexeddb.WrappedIndexDB]]] = None
|
|
100
|
+
self._lazy_populate_indexeddb_list()
|
|
101
|
+
|
|
102
|
+
# History
|
|
103
|
+
self._history: typing.Optional[ccl_chromium_history.HistoryDatabase] = None
|
|
104
|
+
|
|
105
|
+
# Cache
|
|
106
|
+
self._cache: typing.Optional[ccl_chromium_cache.ChromiumCache] = None
|
|
107
|
+
|
|
108
|
+
def close(self):
|
|
109
|
+
"""
|
|
110
|
+
Closes any resources currently open in this profile folder.
|
|
111
|
+
"""
|
|
112
|
+
if self._local_storage is not None:
|
|
113
|
+
self._local_storage.close()
|
|
114
|
+
if self._session_storage is not None:
|
|
115
|
+
self._session_storage.close()
|
|
116
|
+
for idb in self._indexeddb_databases.values():
|
|
117
|
+
if idb is not None:
|
|
118
|
+
idb.close()
|
|
119
|
+
|
|
120
|
+
def _lazy_load_localstorage(self):
|
|
121
|
+
if self._local_storage is None:
|
|
122
|
+
self._local_storage = ccl_chromium_localstorage.LocalStoreDb(self._path / LOCAL_STORAGE_FOLDER_PATH)
|
|
123
|
+
|
|
124
|
+
def _lazy_load_sessionstorage(self):
|
|
125
|
+
if self._session_storage is None:
|
|
126
|
+
self._session_storage = ccl_chromium_sessionstorage.SessionStoreDb(self._path / SESSION_STORAGE_FOLDER_PATH)
|
|
127
|
+
|
|
128
|
+
def _lazy_populate_indexeddb_list(self):
|
|
129
|
+
if self._indexeddb_databases is None:
|
|
130
|
+
self._indexeddb_databases = {}
|
|
131
|
+
for ldb_folder in (self._path / INDEXEDDB_FOLDER_PATH).glob("*.indexeddb.leveldb"):
|
|
132
|
+
if ldb_folder.is_dir():
|
|
133
|
+
idb_id = ldb_folder.name[0:-18]
|
|
134
|
+
self._indexeddb_databases[idb_id] = None
|
|
135
|
+
|
|
136
|
+
def _lazy_load_indexeddb(self, host: str):
|
|
137
|
+
self._lazy_populate_indexeddb_list()
|
|
138
|
+
if host not in self._indexeddb_databases:
|
|
139
|
+
raise KeyError(host)
|
|
140
|
+
|
|
141
|
+
if self._indexeddb_databases[host] is None:
|
|
142
|
+
ldb_path = self._path / INDEXEDDB_FOLDER_PATH / (host + ".indexeddb.leveldb")
|
|
143
|
+
blob_path = self._path / INDEXEDDB_FOLDER_PATH / (host + ".indexeddb.blob")
|
|
144
|
+
blob_path = blob_path if blob_path.exists() else None
|
|
145
|
+
self._indexeddb_databases[host] = ccl_chromium_indexeddb.WrappedIndexDB(ldb_path, blob_path)
|
|
146
|
+
|
|
147
|
+
def _lazy_load_history(self):
|
|
148
|
+
if self._history is None:
|
|
149
|
+
self._history = ccl_chromium_history.HistoryDatabase(self._path / HISTORY_DB_PATH)
|
|
150
|
+
|
|
151
|
+
def _lazy_load_cache(self):
|
|
152
|
+
if self._cache is None:
|
|
153
|
+
if self._external_cache_folder:
|
|
154
|
+
cache_path = self._external_cache_folder
|
|
155
|
+
else:
|
|
156
|
+
cache_path = self._path / CACHE_PATH
|
|
157
|
+
cache_class = ccl_chromium_cache.guess_cache_class(cache_path)
|
|
158
|
+
if cache_class is None:
|
|
159
|
+
raise ValueError(f"Data under {cache_path} could not be identified as a known cache type")
|
|
160
|
+
self._cache = cache_class(cache_path)
|
|
161
|
+
|
|
162
|
+
def iter_local_storage_hosts(self) -> col_abc.Iterable[str]:
|
|
163
|
+
"""
|
|
164
|
+
Iterates the hosts in this profile's local storage
|
|
165
|
+
"""
|
|
166
|
+
self._lazy_load_localstorage()
|
|
167
|
+
yield from self._local_storage.iter_storage_keys()
|
|
168
|
+
|
|
169
|
+
def iter_local_storage(
|
|
170
|
+
self, storage_key: typing.Optional[KeySearch]=None, script_key: typing.Optional[KeySearch]=None, *,
|
|
171
|
+
include_deletions=False, raise_on_no_result=False
|
|
172
|
+
) -> col_abc.Iterable[ccl_chromium_localstorage.LocalStorageRecord]:
|
|
173
|
+
"""
|
|
174
|
+
Iterates this profile's local storage records
|
|
175
|
+
|
|
176
|
+
:param storage_key: storage key (host) for the records. This can be one of: a single string;
|
|
177
|
+
a collection of strings; a regex pattern; a function that takes a string and returns a bool.
|
|
178
|
+
:param script_key: script defined key for the records. This can be one of: a single string;
|
|
179
|
+
a collection of strings; a regex pattern; a function that takes a string and returns a bool.
|
|
180
|
+
:param include_deletions: if True, records related to deletions will be included
|
|
181
|
+
:param raise_on_no_result: if True (the default) if no matching storage keys are found, raise a KeyError
|
|
182
|
+
(these will have None as values).
|
|
183
|
+
:return: iterable of LocalStorageRecords
|
|
184
|
+
"""
|
|
185
|
+
self._lazy_load_localstorage()
|
|
186
|
+
if storage_key is None and script_key is None:
|
|
187
|
+
yield from self._local_storage.iter_all_records(include_deletions=include_deletions)
|
|
188
|
+
elif storage_key is None:
|
|
189
|
+
results = (
|
|
190
|
+
x for x in self._local_storage.iter_all_records(include_deletions=include_deletions)
|
|
191
|
+
if is_keysearch_hit(script_key, x.script_key))
|
|
192
|
+
yielded = False
|
|
193
|
+
for result in results:
|
|
194
|
+
yield result
|
|
195
|
+
yielded = True
|
|
196
|
+
if not yielded and raise_on_no_result:
|
|
197
|
+
raise KeyError(script_key)
|
|
198
|
+
elif script_key is None:
|
|
199
|
+
yield from self._local_storage.iter_records_for_storage_key(
|
|
200
|
+
storage_key, include_deletions=include_deletions, raise_on_no_result=raise_on_no_result)
|
|
201
|
+
else:
|
|
202
|
+
yield from self._local_storage.iter_records_for_script_key(
|
|
203
|
+
storage_key, script_key, include_deletions=include_deletions, raise_on_no_result=raise_on_no_result)
|
|
204
|
+
|
|
205
|
+
def iter_local_storage_with_batches(
|
|
206
|
+
self, storage_key: typing.Optional[KeySearch]=None, script_key: typing.Optional[KeySearch]=None, *,
|
|
207
|
+
include_deletions=False, raise_on_no_result=False
|
|
208
|
+
) -> col_abc.Iterable[tuple[ccl_chromium_localstorage.LocalStorageRecord, ccl_chromium_localstorage.LocalStorageBatch]]:
|
|
209
|
+
"""
|
|
210
|
+
Iterates this profile's local storage records with associated batches where possible.
|
|
211
|
+
|
|
212
|
+
:param storage_key: storage key (host) for the records. This can be one of: a single string;
|
|
213
|
+
a collection of strings; a regex pattern; a function that takes a string and returns a bool; or
|
|
214
|
+
None (the default) in which case all hosts are considered.
|
|
215
|
+
:param script_key: script defined key for the records. This can be one of: a single string;
|
|
216
|
+
a collection of strings; a regex pattern; a function that takes a string and returns a bool; or
|
|
217
|
+
None (the default) in which case all keys are considered.
|
|
218
|
+
:param include_deletions: if True, records related to deletions will be included
|
|
219
|
+
:param raise_on_no_result: if True (the default) if no matching storage keys are found, raise a KeyError
|
|
220
|
+
(these will have None as values).
|
|
221
|
+
:return: iterable of tuples or (LocalStorageRecords, LocalStorageBatch)
|
|
222
|
+
"""
|
|
223
|
+
|
|
224
|
+
# iter_local_storage lazy loads the localstorage, so we don't need to check ahead of time.
|
|
225
|
+
for rec in self.iter_local_storage(storage_key, script_key,
|
|
226
|
+
include_deletions=include_deletions,
|
|
227
|
+
raise_on_no_result=raise_on_no_result):
|
|
228
|
+
batch = self._local_storage.find_batch(rec.leveldb_seq_number)
|
|
229
|
+
yield rec, batch
|
|
230
|
+
|
|
231
|
+
def iter_session_storage_hosts(self) -> col_abc.Iterable[str]:
|
|
232
|
+
"""
|
|
233
|
+
Iterates this profile's session storage hosts
|
|
234
|
+
"""
|
|
235
|
+
self._lazy_load_sessionstorage()
|
|
236
|
+
yield from self._session_storage.iter_hosts()
|
|
237
|
+
|
|
238
|
+
def iter_session_storage(
|
|
239
|
+
self, host: typing.Optional[KeySearch]=None, key: typing.Optional[KeySearch]=None, *,
|
|
240
|
+
include_deletions=False, raise_on_no_result=False
|
|
241
|
+
) -> col_abc.Iterable[ccl_chromium_sessionstorage.SessionStoreValue]:
|
|
242
|
+
"""
|
|
243
|
+
Iterates this profile's session storage records
|
|
244
|
+
|
|
245
|
+
:param host: storage key (host) for the records. This can be one of: a single string;
|
|
246
|
+
a collection of strings; a regex pattern; a function that takes a string (each host) and
|
|
247
|
+
returns a bool; or None (the default) in which case all hosts are considered.
|
|
248
|
+
:param key: script defined key for the records. This can be one of: a single string;
|
|
249
|
+
a collection of strings; a regex pattern; a function that takes a string and returns a bool; or
|
|
250
|
+
None (the default) in which case all keys are considered.
|
|
251
|
+
:param include_deletions: if True, records related to deletions will be included (these will have None as
|
|
252
|
+
values).
|
|
253
|
+
:param raise_on_no_result: if True, if no matching storage keys are found, raise a KeyError
|
|
254
|
+
|
|
255
|
+
:return: iterable of SessionStoreValue
|
|
256
|
+
"""
|
|
257
|
+
|
|
258
|
+
self._lazy_load_sessionstorage()
|
|
259
|
+
if host is None and key is None:
|
|
260
|
+
yield from self._session_storage.iter_all_records(include_deletions=include_deletions, include_orphans=True)
|
|
261
|
+
elif host is None:
|
|
262
|
+
results = (
|
|
263
|
+
rec for
|
|
264
|
+
rec in self._session_storage.iter_all_records(
|
|
265
|
+
include_deletions=include_deletions, include_orphans=True)
|
|
266
|
+
if is_keysearch_hit(key, rec.key)
|
|
267
|
+
)
|
|
268
|
+
yielded = False
|
|
269
|
+
for result in results:
|
|
270
|
+
yield result
|
|
271
|
+
yielded = True
|
|
272
|
+
if not yielded and raise_on_no_result:
|
|
273
|
+
raise KeyError(key)
|
|
274
|
+
elif key is None:
|
|
275
|
+
yield from self._session_storage.iter_records_for_host(
|
|
276
|
+
host, include_deletions=include_deletions, raise_on_no_result=raise_on_no_result)
|
|
277
|
+
else:
|
|
278
|
+
yield from self._session_storage.iter_records_for_session_storage_key(
|
|
279
|
+
host, key, include_deletions=include_deletions, raise_on_no_result=raise_on_no_result)
|
|
280
|
+
|
|
281
|
+
def iter_indexeddb_hosts(self) -> col_abc.Iterable[str]:
|
|
282
|
+
"""
|
|
283
|
+
Iterates the hosts present in the Indexed DB folder. These values are what should be used to load the databases
|
|
284
|
+
directly.
|
|
285
|
+
"""
|
|
286
|
+
self._lazy_populate_indexeddb_list()
|
|
287
|
+
yield from self._indexeddb_databases.keys()
|
|
288
|
+
|
|
289
|
+
def get_indexeddb(self, host: str) -> ccl_chromium_indexeddb.WrappedIndexDB:
|
|
290
|
+
"""
|
|
291
|
+
Returns the database with the host provided. Should be one of the values returned by
|
|
292
|
+
:func:`~iter_indexeddb_hosts`. The database will be opened on-demand if it hasn't previously been opened.
|
|
293
|
+
|
|
294
|
+
:param host: the host to get
|
|
295
|
+
"""
|
|
296
|
+
if host not in self._indexeddb_databases:
|
|
297
|
+
raise KeyError(host)
|
|
298
|
+
|
|
299
|
+
self._lazy_load_indexeddb(host)
|
|
300
|
+
return self._indexeddb_databases[host]
|
|
301
|
+
|
|
302
|
+
def iter_indexeddb_records(
|
|
303
|
+
self, host_id: typing.Optional[KeySearch], database_name: typing.Optional[KeySearch]=None,
|
|
304
|
+
object_store_name: typing.Optional[KeySearch]=None, *,
|
|
305
|
+
raise_on_no_result=False, include_deletions=False,
|
|
306
|
+
bad_deserializer_data_handler: typing.Callable[[ccl_chromium_indexeddb.IdbKey, bytes], typing.Any] = None):
|
|
307
|
+
"""
|
|
308
|
+
Iterates indexeddb records in this profile.
|
|
309
|
+
|
|
310
|
+
:param host_id: the host for the records, relates to the host-named folder in the IndexedDB folder. The
|
|
311
|
+
possible values for this profile are returned by :func:`~iter_indexeddb_hosts`. This can be one of:
|
|
312
|
+
a single string; a collection of strings; a regex pattern; a function that takes a string (each host) and
|
|
313
|
+
returns a bool; or None in which case all hosts are considered. Be cautious with supplying a parameter
|
|
314
|
+
which will lead to unnecessary databases being opened as this has a set-up time for the first time it
|
|
315
|
+
is opened.
|
|
316
|
+
:param database_name: the database name for the records. This can be one of: a single string; a collection
|
|
317
|
+
of strings; a regex pattern; a function that takes a string (each host) and returns a bool; or None (the
|
|
318
|
+
default) in which case all hosts are considered.
|
|
319
|
+
:param object_store_name: the object store name of the records. This can be one of: a single string;
|
|
320
|
+
a collection of strings; a regex pattern; a function that takes a string (each host) and returns a bool;
|
|
321
|
+
or None (the default) in which case all hosts are considered.
|
|
322
|
+
:param raise_on_no_result: if True, if no matching storage keys are found, raise a KeyError
|
|
323
|
+
:param include_deletions: if True, records related to deletions will be included (these will have None as
|
|
324
|
+
values).
|
|
325
|
+
:param bad_deserializer_data_handler: a callback function which will be executed by the underlying
|
|
326
|
+
indexeddb reader if invalid data is encountered during reading a record, rather than raising an exception.
|
|
327
|
+
The function should take two arguments: an IdbKey object (which is the key of the bad record) and a bytes
|
|
328
|
+
object (which is the raw data). The return value of the callback is ignored by the calling code. If this is
|
|
329
|
+
None (the default) then any bad data will cause an exception to be raised.
|
|
330
|
+
|
|
331
|
+
"""
|
|
332
|
+
self._lazy_populate_indexeddb_list()
|
|
333
|
+
|
|
334
|
+
# probably not optimal performance, but we only do it once per call, and it's a lot neater.
|
|
335
|
+
if host_id is None:
|
|
336
|
+
found_hosts = list(self.iter_indexeddb_hosts())
|
|
337
|
+
else:
|
|
338
|
+
found_hosts = [x for x in self._indexeddb_databases.keys() if is_keysearch_hit(host_id, x)]
|
|
339
|
+
|
|
340
|
+
if not found_hosts and raise_on_no_result:
|
|
341
|
+
raise KeyError((host_id, database_name, object_store_name))
|
|
342
|
+
|
|
343
|
+
yielded = False
|
|
344
|
+
for found_host in found_hosts:
|
|
345
|
+
idb = self.get_indexeddb(found_host)
|
|
346
|
+
for idb_db_id in idb.database_ids:
|
|
347
|
+
if database_name is None or is_keysearch_hit(database_name, idb_db_id.name):
|
|
348
|
+
idb_db = idb[idb_db_id]
|
|
349
|
+
for idb_db_objstore_name in idb_db.object_store_names:
|
|
350
|
+
if object_store_name is None or is_keysearch_hit(object_store_name, idb_db_objstore_name):
|
|
351
|
+
idb_db_objstore = idb_db.get_object_store_by_name(idb_db_objstore_name)
|
|
352
|
+
for rec in idb_db_objstore.iterate_records(
|
|
353
|
+
live_only=not include_deletions,
|
|
354
|
+
bad_deserializer_data_handler=bad_deserializer_data_handler):
|
|
355
|
+
yield rec
|
|
356
|
+
yielded = True
|
|
357
|
+
|
|
358
|
+
if not yielded and raise_on_no_result:
|
|
359
|
+
raise KeyError((host_id, database_name, object_store_name))
|
|
360
|
+
|
|
361
|
+
def iterate_history_records(
|
|
362
|
+
self, url: typing.Optional[KeySearch]=None, *,
|
|
363
|
+
earliest: typing.Optional[datetime.datetime]=None, latest: typing.Optional[datetime.datetime]=None):
|
|
364
|
+
"""
|
|
365
|
+
Iterates history records for this profile.
|
|
366
|
+
|
|
367
|
+
:param url: a URL to search for. This can be one of: a single string; a collection of strings;
|
|
368
|
+
a regex pattern; a function that takes a string (each host) and returns a bool; or None (the
|
|
369
|
+
default) in which case all hosts are considered.
|
|
370
|
+
:param earliest: an optional datetime which will be used to exclude records before this date.
|
|
371
|
+
NB the date should be UTC to match the database. If None, no lower limit will be placed on
|
|
372
|
+
timestamps.
|
|
373
|
+
:param latest: an optional datetime which will be used to exclude records after this date.
|
|
374
|
+
NB the date should be UTC to match the database. If None, no upper limit will be placed on
|
|
375
|
+
timestamps.
|
|
376
|
+
"""
|
|
377
|
+
self._lazy_load_history()
|
|
378
|
+
yield from self._history.iter_history_records(url, earliest=earliest, latest=latest)
|
|
379
|
+
|
|
380
|
+
@staticmethod
|
|
381
|
+
def _decompress_cache_data(data, content_encoding) -> tuple[bool, bytes]:
|
|
382
|
+
try:
|
|
383
|
+
if content_encoding.strip() == "gzip":
|
|
384
|
+
return True, gzip.decompress(data)
|
|
385
|
+
elif content_encoding.strip() == "br":
|
|
386
|
+
return True, brotli.decompress(data)
|
|
387
|
+
elif content_encoding.strip() == "deflate":
|
|
388
|
+
return True, zlib.decompress(data, -zlib.MAX_WBITS) # suppress trying to read a header
|
|
389
|
+
except (EOFError, gzip.BadGzipFile, brotli.error, zlib.error):
|
|
390
|
+
return False, data
|
|
391
|
+
|
|
392
|
+
return False, data
|
|
393
|
+
|
|
394
|
+
def _yield_cache_record(self, key: ccl_chromium_cache.CacheKey, decompress, omit_data):
|
|
395
|
+
metas = self._cache.get_metadata(key)
|
|
396
|
+
if not omit_data:
|
|
397
|
+
datas = self._cache.get_cachefile(key)
|
|
398
|
+
else:
|
|
399
|
+
datas = [None] * len(metas)
|
|
400
|
+
meta_locations = self._cache.get_location_for_metadata(key)
|
|
401
|
+
data_locations = self._cache.get_location_for_cachefile(key)
|
|
402
|
+
|
|
403
|
+
if not (len(metas) == len(datas) == len(meta_locations) == len(data_locations)):
|
|
404
|
+
raise ValueError("Data and metadata counts do not match")
|
|
405
|
+
|
|
406
|
+
for idx, (meta, data, meta_location, data_location) in enumerate(
|
|
407
|
+
zip(metas, datas, meta_locations, data_locations)):
|
|
408
|
+
if decompress and data is not None and meta is not None:
|
|
409
|
+
content_encoding = (meta.get_attribute("content-encoding") or [""])[0]
|
|
410
|
+
was_decompressed, data = self._decompress_cache_data(data, content_encoding)
|
|
411
|
+
else:
|
|
412
|
+
was_decompressed = False
|
|
413
|
+
yield CacheResult(key, meta, data, meta_location, data_location, was_decompressed, idx)
|
|
414
|
+
|
|
415
|
+
def iterate_cache(
|
|
416
|
+
self,
|
|
417
|
+
url: typing.Optional[KeySearch]=None, *, decompress=True, omit_cached_data=False,
|
|
418
|
+
**kwargs: typing.Union[bool, KeySearch]) -> col_abc.Iterable[CacheResult]:
|
|
419
|
+
"""
|
|
420
|
+
Iterates cache records for this profile.
|
|
421
|
+
|
|
422
|
+
:param url: a URL to search for. This can be one of: a single string; a collection of strings;
|
|
423
|
+
a regex pattern; a function that takes a string (each host) and returns a bool; or None (the
|
|
424
|
+
default) in which case all records are considered.
|
|
425
|
+
:param decompress: if True (the default), data from the cache which is compressed (as per the
|
|
426
|
+
content-encoding header field) will be decompressed when read if the compression format is
|
|
427
|
+
supported (currently deflate, gzip and brotli are supported).
|
|
428
|
+
:param omit_cached_data: does not collect the cached data and omits it from each `CacheResult`
|
|
429
|
+
object. Should be faster in cases when only metadata recovery is required.
|
|
430
|
+
:param kwargs: further keyword arguments are used to search based upon header fields. The
|
|
431
|
+
keyword should be the header field name, with underscores replacing hyphens (e.g.,
|
|
432
|
+
content-encoding, becomes content_encoding). The value should be one of: a Boolean (in which
|
|
433
|
+
case only records with this field present will be included if True, and vice versa); a single
|
|
434
|
+
string; a collection of strings; a regex pattern; a function that takes a string (the value)
|
|
435
|
+
and returns a bool.
|
|
436
|
+
"""
|
|
437
|
+
|
|
438
|
+
self._lazy_load_cache()
|
|
439
|
+
if url is None and not kwargs:
|
|
440
|
+
for key in self._cache.cache_keys():
|
|
441
|
+
yield from self._yield_cache_record(key, decompress, omit_cached_data)
|
|
442
|
+
else:
|
|
443
|
+
for key in self._cache.cache_keys():
|
|
444
|
+
if url is not None and not is_keysearch_hit(url, key.url):
|
|
445
|
+
# Fail condition: URL doesn't match
|
|
446
|
+
continue
|
|
447
|
+
if not kwargs:
|
|
448
|
+
# No metadata keyword arguments to check
|
|
449
|
+
yield from self._yield_cache_record(key, decompress, omit_cached_data)
|
|
450
|
+
else:
|
|
451
|
+
metas = self._cache.get_metadata(key)
|
|
452
|
+
if not metas or all(x is None for x in metas):
|
|
453
|
+
# Fail condition: we had metadata to check, but no metadata to check against
|
|
454
|
+
continue
|
|
455
|
+
else:
|
|
456
|
+
meta_hit_indices = []
|
|
457
|
+
for meta_idx, meta in enumerate(metas):
|
|
458
|
+
hit = True
|
|
459
|
+
for attribute_name, attribute_check in kwargs.items():
|
|
460
|
+
attribute_name = attribute_name.replace("_", "-")
|
|
461
|
+
if isinstance(attribute_check, bool):
|
|
462
|
+
if (attribute_check == True and
|
|
463
|
+
not meta.has_declaration(attribute_name) and
|
|
464
|
+
not meta.get_attribute(attribute_name)):
|
|
465
|
+
hit = False
|
|
466
|
+
break
|
|
467
|
+
if (attribute_check == False and
|
|
468
|
+
(meta.has_declaration(attribute_name) or
|
|
469
|
+
meta.get_attribute(attribute_name))):
|
|
470
|
+
hit = False
|
|
471
|
+
break
|
|
472
|
+
else:
|
|
473
|
+
attribute = meta.get_attribute(attribute_name)
|
|
474
|
+
if not any(is_keysearch_hit(attribute_check, x) for x in attribute):
|
|
475
|
+
hit = False
|
|
476
|
+
break
|
|
477
|
+
|
|
478
|
+
if hit:
|
|
479
|
+
meta_hit_indices.append(meta_idx)
|
|
480
|
+
|
|
481
|
+
if meta_hit_indices:
|
|
482
|
+
if not omit_cached_data:
|
|
483
|
+
datas = self._cache.get_cachefile(key)
|
|
484
|
+
else:
|
|
485
|
+
datas = [None] * len(metas)
|
|
486
|
+
metadata_locations = self._cache.get_location_for_metadata(key)
|
|
487
|
+
data_locations = self._cache.get_location_for_cachefile(key)
|
|
488
|
+
|
|
489
|
+
if not (len(metas) == len(datas) == len(metadata_locations) == len(data_locations)):
|
|
490
|
+
raise ValueError("Data and metadata counts do not match")
|
|
491
|
+
|
|
492
|
+
for i in meta_hit_indices:
|
|
493
|
+
meta = metas[i]
|
|
494
|
+
data = datas[i]
|
|
495
|
+
metadata_location = metadata_locations[i]
|
|
496
|
+
data_location = data_locations[i]
|
|
497
|
+
|
|
498
|
+
if decompress and data is not None:
|
|
499
|
+
content_encoding = (meta.get_attribute("content-encoding") or [""])[0]
|
|
500
|
+
was_decompressed, data = self._decompress_cache_data(data, content_encoding)
|
|
501
|
+
else:
|
|
502
|
+
was_decompressed = False
|
|
503
|
+
|
|
504
|
+
yield CacheResult(
|
|
505
|
+
key, meta, data, metadata_location, data_location, was_decompressed, i)
|
|
506
|
+
|
|
507
|
+
def iter_downloads(
|
|
508
|
+
self, *, download_url: typing.Optional[KeySearch]=None, tab_url: typing.Optional[KeySearch]=None):
|
|
509
|
+
"""
|
|
510
|
+
Iterates download records for this profile
|
|
511
|
+
|
|
512
|
+
:param download_url: A URL related to the downloaded resource. This can be one of: a single string;
|
|
513
|
+
a collection of strings; a regex pattern; a function that takes a string (each host) and returns a bool;
|
|
514
|
+
or None (the default) in which case all records are considered.
|
|
515
|
+
:param tab_url: A URL related to the page the user was accessing when this download was started.
|
|
516
|
+
This can be one of: a single string; a collection of strings; a regex pattern; a function that takes
|
|
517
|
+
a string (each host) and returns a bool; or None (the default) in which case all records are considered.
|
|
518
|
+
"""
|
|
519
|
+
for download in ccl_shared_proto_db_downloads.read_downloads(self._path / SHARED_PROTO_DB_FOLDER_PATH):
|
|
520
|
+
if ((download_url is None or any(is_keysearch_hit(download_url, url) for url in download.url_chain))
|
|
521
|
+
and
|
|
522
|
+
(tab_url is None
|
|
523
|
+
or is_keysearch_hit(tab_url, download.tab_url or "")
|
|
524
|
+
or is_keysearch_hit(tab_url, download.tab_referrer_url or ""))):
|
|
525
|
+
yield download
|
|
526
|
+
|
|
527
|
+
self._lazy_load_history()
|
|
528
|
+
for download in self._history.iter_downloads(download_url=download_url, tab_url=tab_url):
|
|
529
|
+
yield download
|
|
530
|
+
|
|
531
|
+
def __enter__(self) -> "ChromiumProfileFolder":
|
|
532
|
+
return self
|
|
533
|
+
|
|
534
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
535
|
+
self.close()
|
|
536
|
+
|
|
537
|
+
@property
|
|
538
|
+
def path(self):
|
|
539
|
+
"""The input path of this profile folder"""
|
|
540
|
+
return self._path
|
|
541
|
+
|
|
542
|
+
@property
|
|
543
|
+
def local_storage(self) -> ccl_chromium_localstorage.LocalStoreDb:
|
|
544
|
+
"""The local storage object for this profile folder"""
|
|
545
|
+
self._lazy_load_localstorage()
|
|
546
|
+
return self._local_storage
|
|
547
|
+
|
|
548
|
+
@property
|
|
549
|
+
def session_storage(self) -> ccl_chromium_sessionstorage.SessionStoreDb:
|
|
550
|
+
"""The session storage object for this profile folder"""
|
|
551
|
+
self._lazy_load_sessionstorage()
|
|
552
|
+
return self._session_storage
|
|
553
|
+
|
|
554
|
+
@property
|
|
555
|
+
def cache(self) -> ccl_chromium_cache.ChromiumCache:
|
|
556
|
+
"""The cache for this profile folder"""
|
|
557
|
+
self._lazy_load_cache()
|
|
558
|
+
return self._cache
|
|
559
|
+
|
|
560
|
+
@property
|
|
561
|
+
def history(self) -> ccl_chromium_history.HistoryDatabase:
|
|
562
|
+
"""The history for this profile folder"""
|
|
563
|
+
self._lazy_load_history()
|
|
564
|
+
return self._history
|
|
565
|
+
|
|
566
|
+
@property
|
|
567
|
+
def browser_type(self) -> str:
|
|
568
|
+
return "Chromium"
|