linear-mcp-fast 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. ccl_chromium_reader/__init__.py +2 -0
  2. ccl_chromium_reader/ccl_chromium_cache.py +1335 -0
  3. ccl_chromium_reader/ccl_chromium_filesystem.py +302 -0
  4. ccl_chromium_reader/ccl_chromium_history.py +357 -0
  5. ccl_chromium_reader/ccl_chromium_indexeddb.py +1060 -0
  6. ccl_chromium_reader/ccl_chromium_localstorage.py +454 -0
  7. ccl_chromium_reader/ccl_chromium_notifications.py +268 -0
  8. ccl_chromium_reader/ccl_chromium_profile_folder.py +568 -0
  9. ccl_chromium_reader/ccl_chromium_sessionstorage.py +368 -0
  10. ccl_chromium_reader/ccl_chromium_snss2.py +332 -0
  11. ccl_chromium_reader/ccl_shared_proto_db_downloads.py +189 -0
  12. ccl_chromium_reader/common.py +19 -0
  13. ccl_chromium_reader/download_common.py +78 -0
  14. ccl_chromium_reader/profile_folder_protocols.py +276 -0
  15. ccl_chromium_reader/serialization_formats/__init__.py +0 -0
  16. ccl_chromium_reader/serialization_formats/ccl_blink_value_deserializer.py +401 -0
  17. ccl_chromium_reader/serialization_formats/ccl_easy_chromium_pickle.py +133 -0
  18. ccl_chromium_reader/serialization_formats/ccl_protobuff.py +276 -0
  19. ccl_chromium_reader/serialization_formats/ccl_v8_value_deserializer.py +627 -0
  20. ccl_chromium_reader/storage_formats/__init__.py +0 -0
  21. ccl_chromium_reader/storage_formats/ccl_leveldb.py +582 -0
  22. ccl_simplesnappy/__init__.py +1 -0
  23. ccl_simplesnappy/ccl_simplesnappy.py +306 -0
  24. linear_mcp_fast/__init__.py +8 -0
  25. linear_mcp_fast/__main__.py +6 -0
  26. linear_mcp_fast/reader.py +433 -0
  27. linear_mcp_fast/server.py +367 -0
  28. linear_mcp_fast/store_detector.py +117 -0
  29. linear_mcp_fast-0.1.0.dist-info/METADATA +160 -0
  30. linear_mcp_fast-0.1.0.dist-info/RECORD +39 -0
  31. linear_mcp_fast-0.1.0.dist-info/WHEEL +5 -0
  32. linear_mcp_fast-0.1.0.dist-info/entry_points.txt +2 -0
  33. linear_mcp_fast-0.1.0.dist-info/top_level.txt +4 -0
  34. tools_and_utilities/Chromium_dump_local_storage.py +111 -0
  35. tools_and_utilities/Chromium_dump_session_storage.py +92 -0
  36. tools_and_utilities/benchmark.py +35 -0
  37. tools_and_utilities/ccl_chrome_audit.py +651 -0
  38. tools_and_utilities/dump_indexeddb_details.py +59 -0
  39. tools_and_utilities/dump_leveldb.py +53 -0
@@ -0,0 +1,568 @@
1
+ """
2
+ Copyright 2024, CCL Forensics
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
4
+ this software and associated documentation files (the "Software"), to deal in
5
+ the Software without restriction, including without limitation the rights to
6
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7
+ of the Software, and to permit persons to whom the Software is furnished to do
8
+ so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
20
+ """
21
+ import dataclasses
22
+ import datetime
23
+ import typing
24
+ import pathlib
25
+ import collections.abc as col_abc
26
+
27
+ import gzip
28
+ import zlib
29
+ import brotli
30
+
31
+ from . import ccl_chromium_localstorage
32
+ from . import ccl_chromium_sessionstorage
33
+ from . import ccl_chromium_indexeddb
34
+ from . import ccl_chromium_history
35
+ from . import ccl_chromium_cache
36
+ from . import ccl_shared_proto_db_downloads
37
+
38
+ from .common import KeySearch, is_keysearch_hit
39
+
40
+ __version__ = "0.4.1"
41
+ __description__ = "Module to consolidate and simplify access to data stores in the chrom(e|ium) profile folder"
42
+ __contact__ = "Alex Caithness"
43
+
44
+ # TODO: code currently assumes that lazy-loaded stores are present - they might not be, need some kind of guard object?
45
+ # TODO: paths are currently based around an assumption of a desktop layout. Mobile (and MacOS) will differ (e.g., the
46
+ # cache location).
47
+
48
+ SESSION_STORAGE_FOLDER_PATH = pathlib.Path("Session Storage")
49
+ LOCAL_STORAGE_FOLDER_PATH = pathlib.Path("Local Storage", "leveldb")
50
+ INDEXEDDB_FOLDER_PATH = pathlib.Path("IndexedDB")
51
+ HISTORY_DB_PATH = pathlib.Path("History")
52
+ CACHE_PATH = pathlib.Path("Cache", "Cache_Data")
53
+ SHARED_PROTO_DB_FOLDER_PATH = pathlib.Path("shared_proto_db")
54
+
55
+
56
+ @dataclasses.dataclass(frozen=True, repr=False)
57
+ class CacheResult:
58
+ key: ccl_chromium_cache.CacheKey = dataclasses.field(repr=True)
59
+ metadata: ccl_chromium_cache.CachedMetadata
60
+ data: bytes
61
+ metadata_location: ccl_chromium_cache.CacheFileLocation
62
+ data_location: ccl_chromium_cache.CacheFileLocation
63
+ was_decompressed: bool
64
+ duplicate_key_index: int
65
+
66
+
67
+ class ChromiumProfileFolder:
68
+ """
69
+ A class representing a Chrom(e|ium) profile folder with programmatic access to various different data stores.
70
+ Where appropriate, resources are loaded on demand.
71
+ """
72
+
73
+ def __init__(self, path: pathlib.Path, *, cache_folder: typing.Optional[pathlib.Path]=None):
74
+ """
75
+ Constructor
76
+
77
+ :param path: Path to the profile folder (usually named Default, Profile 1, Profile 2, etc.)
78
+ :param cache_folder: optionally a path to a cache folder, for platforms (such as Android) which
79
+ place the cache data outside the profile folder.
80
+ """
81
+ if not path.is_dir():
82
+ raise NotADirectoryError(f"Could not find the folder: {path}")
83
+
84
+ self._path = path
85
+
86
+ if cache_folder is not None and not cache_folder.is_dir():
87
+ raise NotADirectoryError(f"Could not find the folder: {cache_folder}")
88
+
89
+ self._external_cache_folder = cache_folder
90
+
91
+ # Data stores are populated lazily where appropriate
92
+ # Webstorage
93
+ self._local_storage: typing.Optional[ccl_chromium_localstorage.LocalStoreDb] = None
94
+ self._session_storage: typing.Optional[ccl_chromium_sessionstorage.SessionStoreDb] = None
95
+
96
+ # IndexedDb
97
+ # Dictionary which when first populated will initially contain the domains as keys with None as the value for
98
+ # each. This will be initially populated on demand.
99
+ self._indexeddb_databases: typing.Optional[dict[str, typing.Optional[ccl_chromium_indexeddb.WrappedIndexDB]]] = None
100
+ self._lazy_populate_indexeddb_list()
101
+
102
+ # History
103
+ self._history: typing.Optional[ccl_chromium_history.HistoryDatabase] = None
104
+
105
+ # Cache
106
+ self._cache: typing.Optional[ccl_chromium_cache.ChromiumCache] = None
107
+
108
+ def close(self):
109
+ """
110
+ Closes any resources currently open in this profile folder.
111
+ """
112
+ if self._local_storage is not None:
113
+ self._local_storage.close()
114
+ if self._session_storage is not None:
115
+ self._session_storage.close()
116
+ for idb in self._indexeddb_databases.values():
117
+ if idb is not None:
118
+ idb.close()
119
+
120
+ def _lazy_load_localstorage(self):
121
+ if self._local_storage is None:
122
+ self._local_storage = ccl_chromium_localstorage.LocalStoreDb(self._path / LOCAL_STORAGE_FOLDER_PATH)
123
+
124
+ def _lazy_load_sessionstorage(self):
125
+ if self._session_storage is None:
126
+ self._session_storage = ccl_chromium_sessionstorage.SessionStoreDb(self._path / SESSION_STORAGE_FOLDER_PATH)
127
+
128
+ def _lazy_populate_indexeddb_list(self):
129
+ if self._indexeddb_databases is None:
130
+ self._indexeddb_databases = {}
131
+ for ldb_folder in (self._path / INDEXEDDB_FOLDER_PATH).glob("*.indexeddb.leveldb"):
132
+ if ldb_folder.is_dir():
133
+ idb_id = ldb_folder.name[0:-18]
134
+ self._indexeddb_databases[idb_id] = None
135
+
136
+ def _lazy_load_indexeddb(self, host: str):
137
+ self._lazy_populate_indexeddb_list()
138
+ if host not in self._indexeddb_databases:
139
+ raise KeyError(host)
140
+
141
+ if self._indexeddb_databases[host] is None:
142
+ ldb_path = self._path / INDEXEDDB_FOLDER_PATH / (host + ".indexeddb.leveldb")
143
+ blob_path = self._path / INDEXEDDB_FOLDER_PATH / (host + ".indexeddb.blob")
144
+ blob_path = blob_path if blob_path.exists() else None
145
+ self._indexeddb_databases[host] = ccl_chromium_indexeddb.WrappedIndexDB(ldb_path, blob_path)
146
+
147
+ def _lazy_load_history(self):
148
+ if self._history is None:
149
+ self._history = ccl_chromium_history.HistoryDatabase(self._path / HISTORY_DB_PATH)
150
+
151
+ def _lazy_load_cache(self):
152
+ if self._cache is None:
153
+ if self._external_cache_folder:
154
+ cache_path = self._external_cache_folder
155
+ else:
156
+ cache_path = self._path / CACHE_PATH
157
+ cache_class = ccl_chromium_cache.guess_cache_class(cache_path)
158
+ if cache_class is None:
159
+ raise ValueError(f"Data under {cache_path} could not be identified as a known cache type")
160
+ self._cache = cache_class(cache_path)
161
+
162
+ def iter_local_storage_hosts(self) -> col_abc.Iterable[str]:
163
+ """
164
+ Iterates the hosts in this profile's local storage
165
+ """
166
+ self._lazy_load_localstorage()
167
+ yield from self._local_storage.iter_storage_keys()
168
+
169
+ def iter_local_storage(
170
+ self, storage_key: typing.Optional[KeySearch]=None, script_key: typing.Optional[KeySearch]=None, *,
171
+ include_deletions=False, raise_on_no_result=False
172
+ ) -> col_abc.Iterable[ccl_chromium_localstorage.LocalStorageRecord]:
173
+ """
174
+ Iterates this profile's local storage records
175
+
176
+ :param storage_key: storage key (host) for the records. This can be one of: a single string;
177
+ a collection of strings; a regex pattern; a function that takes a string and returns a bool.
178
+ :param script_key: script defined key for the records. This can be one of: a single string;
179
+ a collection of strings; a regex pattern; a function that takes a string and returns a bool.
180
+ :param include_deletions: if True, records related to deletions will be included
181
+ :param raise_on_no_result: if True (the default) if no matching storage keys are found, raise a KeyError
182
+ (these will have None as values).
183
+ :return: iterable of LocalStorageRecords
184
+ """
185
+ self._lazy_load_localstorage()
186
+ if storage_key is None and script_key is None:
187
+ yield from self._local_storage.iter_all_records(include_deletions=include_deletions)
188
+ elif storage_key is None:
189
+ results = (
190
+ x for x in self._local_storage.iter_all_records(include_deletions=include_deletions)
191
+ if is_keysearch_hit(script_key, x.script_key))
192
+ yielded = False
193
+ for result in results:
194
+ yield result
195
+ yielded = True
196
+ if not yielded and raise_on_no_result:
197
+ raise KeyError(script_key)
198
+ elif script_key is None:
199
+ yield from self._local_storage.iter_records_for_storage_key(
200
+ storage_key, include_deletions=include_deletions, raise_on_no_result=raise_on_no_result)
201
+ else:
202
+ yield from self._local_storage.iter_records_for_script_key(
203
+ storage_key, script_key, include_deletions=include_deletions, raise_on_no_result=raise_on_no_result)
204
+
205
+ def iter_local_storage_with_batches(
206
+ self, storage_key: typing.Optional[KeySearch]=None, script_key: typing.Optional[KeySearch]=None, *,
207
+ include_deletions=False, raise_on_no_result=False
208
+ ) -> col_abc.Iterable[tuple[ccl_chromium_localstorage.LocalStorageRecord, ccl_chromium_localstorage.LocalStorageBatch]]:
209
+ """
210
+ Iterates this profile's local storage records with associated batches where possible.
211
+
212
+ :param storage_key: storage key (host) for the records. This can be one of: a single string;
213
+ a collection of strings; a regex pattern; a function that takes a string and returns a bool; or
214
+ None (the default) in which case all hosts are considered.
215
+ :param script_key: script defined key for the records. This can be one of: a single string;
216
+ a collection of strings; a regex pattern; a function that takes a string and returns a bool; or
217
+ None (the default) in which case all keys are considered.
218
+ :param include_deletions: if True, records related to deletions will be included
219
+ :param raise_on_no_result: if True (the default) if no matching storage keys are found, raise a KeyError
220
+ (these will have None as values).
221
+ :return: iterable of tuples or (LocalStorageRecords, LocalStorageBatch)
222
+ """
223
+
224
+ # iter_local_storage lazy loads the localstorage, so we don't need to check ahead of time.
225
+ for rec in self.iter_local_storage(storage_key, script_key,
226
+ include_deletions=include_deletions,
227
+ raise_on_no_result=raise_on_no_result):
228
+ batch = self._local_storage.find_batch(rec.leveldb_seq_number)
229
+ yield rec, batch
230
+
231
+ def iter_session_storage_hosts(self) -> col_abc.Iterable[str]:
232
+ """
233
+ Iterates this profile's session storage hosts
234
+ """
235
+ self._lazy_load_sessionstorage()
236
+ yield from self._session_storage.iter_hosts()
237
+
238
+ def iter_session_storage(
239
+ self, host: typing.Optional[KeySearch]=None, key: typing.Optional[KeySearch]=None, *,
240
+ include_deletions=False, raise_on_no_result=False
241
+ ) -> col_abc.Iterable[ccl_chromium_sessionstorage.SessionStoreValue]:
242
+ """
243
+ Iterates this profile's session storage records
244
+
245
+ :param host: storage key (host) for the records. This can be one of: a single string;
246
+ a collection of strings; a regex pattern; a function that takes a string (each host) and
247
+ returns a bool; or None (the default) in which case all hosts are considered.
248
+ :param key: script defined key for the records. This can be one of: a single string;
249
+ a collection of strings; a regex pattern; a function that takes a string and returns a bool; or
250
+ None (the default) in which case all keys are considered.
251
+ :param include_deletions: if True, records related to deletions will be included (these will have None as
252
+ values).
253
+ :param raise_on_no_result: if True, if no matching storage keys are found, raise a KeyError
254
+
255
+ :return: iterable of SessionStoreValue
256
+ """
257
+
258
+ self._lazy_load_sessionstorage()
259
+ if host is None and key is None:
260
+ yield from self._session_storage.iter_all_records(include_deletions=include_deletions, include_orphans=True)
261
+ elif host is None:
262
+ results = (
263
+ rec for
264
+ rec in self._session_storage.iter_all_records(
265
+ include_deletions=include_deletions, include_orphans=True)
266
+ if is_keysearch_hit(key, rec.key)
267
+ )
268
+ yielded = False
269
+ for result in results:
270
+ yield result
271
+ yielded = True
272
+ if not yielded and raise_on_no_result:
273
+ raise KeyError(key)
274
+ elif key is None:
275
+ yield from self._session_storage.iter_records_for_host(
276
+ host, include_deletions=include_deletions, raise_on_no_result=raise_on_no_result)
277
+ else:
278
+ yield from self._session_storage.iter_records_for_session_storage_key(
279
+ host, key, include_deletions=include_deletions, raise_on_no_result=raise_on_no_result)
280
+
281
+ def iter_indexeddb_hosts(self) -> col_abc.Iterable[str]:
282
+ """
283
+ Iterates the hosts present in the Indexed DB folder. These values are what should be used to load the databases
284
+ directly.
285
+ """
286
+ self._lazy_populate_indexeddb_list()
287
+ yield from self._indexeddb_databases.keys()
288
+
289
+ def get_indexeddb(self, host: str) -> ccl_chromium_indexeddb.WrappedIndexDB:
290
+ """
291
+ Returns the database with the host provided. Should be one of the values returned by
292
+ :func:`~iter_indexeddb_hosts`. The database will be opened on-demand if it hasn't previously been opened.
293
+
294
+ :param host: the host to get
295
+ """
296
+ if host not in self._indexeddb_databases:
297
+ raise KeyError(host)
298
+
299
+ self._lazy_load_indexeddb(host)
300
+ return self._indexeddb_databases[host]
301
+
302
+ def iter_indexeddb_records(
303
+ self, host_id: typing.Optional[KeySearch], database_name: typing.Optional[KeySearch]=None,
304
+ object_store_name: typing.Optional[KeySearch]=None, *,
305
+ raise_on_no_result=False, include_deletions=False,
306
+ bad_deserializer_data_handler: typing.Callable[[ccl_chromium_indexeddb.IdbKey, bytes], typing.Any] = None):
307
+ """
308
+ Iterates indexeddb records in this profile.
309
+
310
+ :param host_id: the host for the records, relates to the host-named folder in the IndexedDB folder. The
311
+ possible values for this profile are returned by :func:`~iter_indexeddb_hosts`. This can be one of:
312
+ a single string; a collection of strings; a regex pattern; a function that takes a string (each host) and
313
+ returns a bool; or None in which case all hosts are considered. Be cautious with supplying a parameter
314
+ which will lead to unnecessary databases being opened as this has a set-up time for the first time it
315
+ is opened.
316
+ :param database_name: the database name for the records. This can be one of: a single string; a collection
317
+ of strings; a regex pattern; a function that takes a string (each host) and returns a bool; or None (the
318
+ default) in which case all hosts are considered.
319
+ :param object_store_name: the object store name of the records. This can be one of: a single string;
320
+ a collection of strings; a regex pattern; a function that takes a string (each host) and returns a bool;
321
+ or None (the default) in which case all hosts are considered.
322
+ :param raise_on_no_result: if True, if no matching storage keys are found, raise a KeyError
323
+ :param include_deletions: if True, records related to deletions will be included (these will have None as
324
+ values).
325
+ :param bad_deserializer_data_handler: a callback function which will be executed by the underlying
326
+ indexeddb reader if invalid data is encountered during reading a record, rather than raising an exception.
327
+ The function should take two arguments: an IdbKey object (which is the key of the bad record) and a bytes
328
+ object (which is the raw data). The return value of the callback is ignored by the calling code. If this is
329
+ None (the default) then any bad data will cause an exception to be raised.
330
+
331
+ """
332
+ self._lazy_populate_indexeddb_list()
333
+
334
+ # probably not optimal performance, but we only do it once per call, and it's a lot neater.
335
+ if host_id is None:
336
+ found_hosts = list(self.iter_indexeddb_hosts())
337
+ else:
338
+ found_hosts = [x for x in self._indexeddb_databases.keys() if is_keysearch_hit(host_id, x)]
339
+
340
+ if not found_hosts and raise_on_no_result:
341
+ raise KeyError((host_id, database_name, object_store_name))
342
+
343
+ yielded = False
344
+ for found_host in found_hosts:
345
+ idb = self.get_indexeddb(found_host)
346
+ for idb_db_id in idb.database_ids:
347
+ if database_name is None or is_keysearch_hit(database_name, idb_db_id.name):
348
+ idb_db = idb[idb_db_id]
349
+ for idb_db_objstore_name in idb_db.object_store_names:
350
+ if object_store_name is None or is_keysearch_hit(object_store_name, idb_db_objstore_name):
351
+ idb_db_objstore = idb_db.get_object_store_by_name(idb_db_objstore_name)
352
+ for rec in idb_db_objstore.iterate_records(
353
+ live_only=not include_deletions,
354
+ bad_deserializer_data_handler=bad_deserializer_data_handler):
355
+ yield rec
356
+ yielded = True
357
+
358
+ if not yielded and raise_on_no_result:
359
+ raise KeyError((host_id, database_name, object_store_name))
360
+
361
+ def iterate_history_records(
362
+ self, url: typing.Optional[KeySearch]=None, *,
363
+ earliest: typing.Optional[datetime.datetime]=None, latest: typing.Optional[datetime.datetime]=None):
364
+ """
365
+ Iterates history records for this profile.
366
+
367
+ :param url: a URL to search for. This can be one of: a single string; a collection of strings;
368
+ a regex pattern; a function that takes a string (each host) and returns a bool; or None (the
369
+ default) in which case all hosts are considered.
370
+ :param earliest: an optional datetime which will be used to exclude records before this date.
371
+ NB the date should be UTC to match the database. If None, no lower limit will be placed on
372
+ timestamps.
373
+ :param latest: an optional datetime which will be used to exclude records after this date.
374
+ NB the date should be UTC to match the database. If None, no upper limit will be placed on
375
+ timestamps.
376
+ """
377
+ self._lazy_load_history()
378
+ yield from self._history.iter_history_records(url, earliest=earliest, latest=latest)
379
+
380
+ @staticmethod
381
+ def _decompress_cache_data(data, content_encoding) -> tuple[bool, bytes]:
382
+ try:
383
+ if content_encoding.strip() == "gzip":
384
+ return True, gzip.decompress(data)
385
+ elif content_encoding.strip() == "br":
386
+ return True, brotli.decompress(data)
387
+ elif content_encoding.strip() == "deflate":
388
+ return True, zlib.decompress(data, -zlib.MAX_WBITS) # suppress trying to read a header
389
+ except (EOFError, gzip.BadGzipFile, brotli.error, zlib.error):
390
+ return False, data
391
+
392
+ return False, data
393
+
394
+ def _yield_cache_record(self, key: ccl_chromium_cache.CacheKey, decompress, omit_data):
395
+ metas = self._cache.get_metadata(key)
396
+ if not omit_data:
397
+ datas = self._cache.get_cachefile(key)
398
+ else:
399
+ datas = [None] * len(metas)
400
+ meta_locations = self._cache.get_location_for_metadata(key)
401
+ data_locations = self._cache.get_location_for_cachefile(key)
402
+
403
+ if not (len(metas) == len(datas) == len(meta_locations) == len(data_locations)):
404
+ raise ValueError("Data and metadata counts do not match")
405
+
406
+ for idx, (meta, data, meta_location, data_location) in enumerate(
407
+ zip(metas, datas, meta_locations, data_locations)):
408
+ if decompress and data is not None and meta is not None:
409
+ content_encoding = (meta.get_attribute("content-encoding") or [""])[0]
410
+ was_decompressed, data = self._decompress_cache_data(data, content_encoding)
411
+ else:
412
+ was_decompressed = False
413
+ yield CacheResult(key, meta, data, meta_location, data_location, was_decompressed, idx)
414
+
415
+ def iterate_cache(
416
+ self,
417
+ url: typing.Optional[KeySearch]=None, *, decompress=True, omit_cached_data=False,
418
+ **kwargs: typing.Union[bool, KeySearch]) -> col_abc.Iterable[CacheResult]:
419
+ """
420
+ Iterates cache records for this profile.
421
+
422
+ :param url: a URL to search for. This can be one of: a single string; a collection of strings;
423
+ a regex pattern; a function that takes a string (each host) and returns a bool; or None (the
424
+ default) in which case all records are considered.
425
+ :param decompress: if True (the default), data from the cache which is compressed (as per the
426
+ content-encoding header field) will be decompressed when read if the compression format is
427
+ supported (currently deflate, gzip and brotli are supported).
428
+ :param omit_cached_data: does not collect the cached data and omits it from each `CacheResult`
429
+ object. Should be faster in cases when only metadata recovery is required.
430
+ :param kwargs: further keyword arguments are used to search based upon header fields. The
431
+ keyword should be the header field name, with underscores replacing hyphens (e.g.,
432
+ content-encoding, becomes content_encoding). The value should be one of: a Boolean (in which
433
+ case only records with this field present will be included if True, and vice versa); a single
434
+ string; a collection of strings; a regex pattern; a function that takes a string (the value)
435
+ and returns a bool.
436
+ """
437
+
438
+ self._lazy_load_cache()
439
+ if url is None and not kwargs:
440
+ for key in self._cache.cache_keys():
441
+ yield from self._yield_cache_record(key, decompress, omit_cached_data)
442
+ else:
443
+ for key in self._cache.cache_keys():
444
+ if url is not None and not is_keysearch_hit(url, key.url):
445
+ # Fail condition: URL doesn't match
446
+ continue
447
+ if not kwargs:
448
+ # No metadata keyword arguments to check
449
+ yield from self._yield_cache_record(key, decompress, omit_cached_data)
450
+ else:
451
+ metas = self._cache.get_metadata(key)
452
+ if not metas or all(x is None for x in metas):
453
+ # Fail condition: we had metadata to check, but no metadata to check against
454
+ continue
455
+ else:
456
+ meta_hit_indices = []
457
+ for meta_idx, meta in enumerate(metas):
458
+ hit = True
459
+ for attribute_name, attribute_check in kwargs.items():
460
+ attribute_name = attribute_name.replace("_", "-")
461
+ if isinstance(attribute_check, bool):
462
+ if (attribute_check == True and
463
+ not meta.has_declaration(attribute_name) and
464
+ not meta.get_attribute(attribute_name)):
465
+ hit = False
466
+ break
467
+ if (attribute_check == False and
468
+ (meta.has_declaration(attribute_name) or
469
+ meta.get_attribute(attribute_name))):
470
+ hit = False
471
+ break
472
+ else:
473
+ attribute = meta.get_attribute(attribute_name)
474
+ if not any(is_keysearch_hit(attribute_check, x) for x in attribute):
475
+ hit = False
476
+ break
477
+
478
+ if hit:
479
+ meta_hit_indices.append(meta_idx)
480
+
481
+ if meta_hit_indices:
482
+ if not omit_cached_data:
483
+ datas = self._cache.get_cachefile(key)
484
+ else:
485
+ datas = [None] * len(metas)
486
+ metadata_locations = self._cache.get_location_for_metadata(key)
487
+ data_locations = self._cache.get_location_for_cachefile(key)
488
+
489
+ if not (len(metas) == len(datas) == len(metadata_locations) == len(data_locations)):
490
+ raise ValueError("Data and metadata counts do not match")
491
+
492
+ for i in meta_hit_indices:
493
+ meta = metas[i]
494
+ data = datas[i]
495
+ metadata_location = metadata_locations[i]
496
+ data_location = data_locations[i]
497
+
498
+ if decompress and data is not None:
499
+ content_encoding = (meta.get_attribute("content-encoding") or [""])[0]
500
+ was_decompressed, data = self._decompress_cache_data(data, content_encoding)
501
+ else:
502
+ was_decompressed = False
503
+
504
+ yield CacheResult(
505
+ key, meta, data, metadata_location, data_location, was_decompressed, i)
506
+
507
+ def iter_downloads(
508
+ self, *, download_url: typing.Optional[KeySearch]=None, tab_url: typing.Optional[KeySearch]=None):
509
+ """
510
+ Iterates download records for this profile
511
+
512
+ :param download_url: A URL related to the downloaded resource. This can be one of: a single string;
513
+ a collection of strings; a regex pattern; a function that takes a string (each host) and returns a bool;
514
+ or None (the default) in which case all records are considered.
515
+ :param tab_url: A URL related to the page the user was accessing when this download was started.
516
+ This can be one of: a single string; a collection of strings; a regex pattern; a function that takes
517
+ a string (each host) and returns a bool; or None (the default) in which case all records are considered.
518
+ """
519
+ for download in ccl_shared_proto_db_downloads.read_downloads(self._path / SHARED_PROTO_DB_FOLDER_PATH):
520
+ if ((download_url is None or any(is_keysearch_hit(download_url, url) for url in download.url_chain))
521
+ and
522
+ (tab_url is None
523
+ or is_keysearch_hit(tab_url, download.tab_url or "")
524
+ or is_keysearch_hit(tab_url, download.tab_referrer_url or ""))):
525
+ yield download
526
+
527
+ self._lazy_load_history()
528
+ for download in self._history.iter_downloads(download_url=download_url, tab_url=tab_url):
529
+ yield download
530
+
531
+ def __enter__(self) -> "ChromiumProfileFolder":
532
+ return self
533
+
534
+ def __exit__(self, exc_type, exc_val, exc_tb):
535
+ self.close()
536
+
537
+ @property
538
+ def path(self):
539
+ """The input path of this profile folder"""
540
+ return self._path
541
+
542
+ @property
543
+ def local_storage(self) -> ccl_chromium_localstorage.LocalStoreDb:
544
+ """The local storage object for this profile folder"""
545
+ self._lazy_load_localstorage()
546
+ return self._local_storage
547
+
548
+ @property
549
+ def session_storage(self) -> ccl_chromium_sessionstorage.SessionStoreDb:
550
+ """The session storage object for this profile folder"""
551
+ self._lazy_load_sessionstorage()
552
+ return self._session_storage
553
+
554
+ @property
555
+ def cache(self) -> ccl_chromium_cache.ChromiumCache:
556
+ """The cache for this profile folder"""
557
+ self._lazy_load_cache()
558
+ return self._cache
559
+
560
+ @property
561
+ def history(self) -> ccl_chromium_history.HistoryDatabase:
562
+ """The history for this profile folder"""
563
+ self._lazy_load_history()
564
+ return self._history
565
+
566
+ @property
567
+ def browser_type(self) -> str:
568
+ return "Chromium"