linear-mcp-fast 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. ccl_chromium_reader/__init__.py +2 -0
  2. ccl_chromium_reader/ccl_chromium_cache.py +1335 -0
  3. ccl_chromium_reader/ccl_chromium_filesystem.py +302 -0
  4. ccl_chromium_reader/ccl_chromium_history.py +357 -0
  5. ccl_chromium_reader/ccl_chromium_indexeddb.py +1060 -0
  6. ccl_chromium_reader/ccl_chromium_localstorage.py +454 -0
  7. ccl_chromium_reader/ccl_chromium_notifications.py +268 -0
  8. ccl_chromium_reader/ccl_chromium_profile_folder.py +568 -0
  9. ccl_chromium_reader/ccl_chromium_sessionstorage.py +368 -0
  10. ccl_chromium_reader/ccl_chromium_snss2.py +332 -0
  11. ccl_chromium_reader/ccl_shared_proto_db_downloads.py +189 -0
  12. ccl_chromium_reader/common.py +19 -0
  13. ccl_chromium_reader/download_common.py +78 -0
  14. ccl_chromium_reader/profile_folder_protocols.py +276 -0
  15. ccl_chromium_reader/serialization_formats/__init__.py +0 -0
  16. ccl_chromium_reader/serialization_formats/ccl_blink_value_deserializer.py +401 -0
  17. ccl_chromium_reader/serialization_formats/ccl_easy_chromium_pickle.py +133 -0
  18. ccl_chromium_reader/serialization_formats/ccl_protobuff.py +276 -0
  19. ccl_chromium_reader/serialization_formats/ccl_v8_value_deserializer.py +627 -0
  20. ccl_chromium_reader/storage_formats/__init__.py +0 -0
  21. ccl_chromium_reader/storage_formats/ccl_leveldb.py +582 -0
  22. ccl_simplesnappy/__init__.py +1 -0
  23. ccl_simplesnappy/ccl_simplesnappy.py +306 -0
  24. linear_mcp_fast/__init__.py +8 -0
  25. linear_mcp_fast/__main__.py +6 -0
  26. linear_mcp_fast/reader.py +433 -0
  27. linear_mcp_fast/server.py +367 -0
  28. linear_mcp_fast/store_detector.py +117 -0
  29. linear_mcp_fast-0.1.0.dist-info/METADATA +160 -0
  30. linear_mcp_fast-0.1.0.dist-info/RECORD +39 -0
  31. linear_mcp_fast-0.1.0.dist-info/WHEEL +5 -0
  32. linear_mcp_fast-0.1.0.dist-info/entry_points.txt +2 -0
  33. linear_mcp_fast-0.1.0.dist-info/top_level.txt +4 -0
  34. tools_and_utilities/Chromium_dump_local_storage.py +111 -0
  35. tools_and_utilities/Chromium_dump_session_storage.py +92 -0
  36. tools_and_utilities/benchmark.py +35 -0
  37. tools_and_utilities/ccl_chrome_audit.py +651 -0
  38. tools_and_utilities/dump_indexeddb_details.py +59 -0
  39. tools_and_utilities/dump_leveldb.py +53 -0
@@ -0,0 +1,302 @@
1
+ """
2
+ Copyright 2022, CCL Forensics
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
5
+ this software and associated documentation files (the "Software"), to deal in
6
+ the Software without restriction, including without limitation the rights to
7
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
8
+ of the Software, and to permit persons to whom the Software is furnished to do
9
+ so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in all
12
+ copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20
+ SOFTWARE.
21
+ """
22
+
23
+ __version__ = "0.8"
24
+ __description__ = "Library for reading Chrome/Chromium File System API data"
25
+ __contact__ = "Alex Caithness"
26
+
27
+ import dataclasses
28
+ import os
29
+ import sys
30
+ import pathlib
31
+ import datetime
32
+ import re
33
+ import typing
34
+ import types
35
+ import functools
36
+
37
+ from .storage_formats import ccl_leveldb
38
+ from .serialization_formats.ccl_easy_chromium_pickle import EasyPickleIterator
39
+
40
+
41
+ @dataclasses.dataclass(frozen=True)
42
+ class FileInfo:
43
+ _owner: "FileSystem" = dataclasses.field(repr=False)
44
+ origin: str
45
+ folder_id: str
46
+ is_persistent: bool
47
+ seq_no: int
48
+ file_id: int
49
+ parent_id: int
50
+ data_path: str
51
+ name: str
52
+ timestamp: datetime.datetime
53
+
54
+ @classmethod
55
+ def from_pickle(
56
+ cls, owner: "FileSystem", origin: str, folder_id: str, is_persistent: bool,
57
+ seq_no: int, file_id: int, data: bytes):
58
+ with EasyPickleIterator(data) as reader:
59
+ parent_id = reader.read_uint64()
60
+ data_path = reader.read_string()
61
+ name = reader.read_string()
62
+ timestamp = reader.read_datetime()
63
+
64
+ return cls(owner, origin, folder_id, is_persistent, seq_no, file_id, parent_id, data_path, name, timestamp)
65
+
66
+ def get_local_storage_path(self) -> pathlib.Path:
67
+ return self._owner.get_local_path_for_fileinfo(self)
68
+
69
+ @property
70
+ def is_stored_locally(self) -> bool:
71
+ return self.get_local_storage_path().exists()
72
+
73
+
74
+ class OriginStorage:
75
+ def __init__(
76
+ self,
77
+ owner: "FileSystem",
78
+ origin: str,
79
+ folder_id: str,
80
+ persistent_files: typing.Optional[typing.Mapping[int, FileInfo]],
81
+ persistent_deleted_file_ids: typing.Optional[typing.Iterable[int]],
82
+ temporary_files: typing.Optional[typing.Mapping[int, FileInfo]],
83
+ temporary_deleted_file_ids: typing.Optional[typing.Iterable[int]]):
84
+ self._owner = owner
85
+ self._origin = origin
86
+ self._folder_id = folder_id
87
+ self._persistent_files = types.MappingProxyType(persistent_files or {})
88
+ self._persistent_deleted_file_ids = set(persistent_deleted_file_ids or [])
89
+ self._temporary_files = types.MappingProxyType(temporary_files or {})
90
+ self._temporary_deleted_file_ids = set(temporary_deleted_file_ids or [])
91
+
92
+ self._persistent_file_listing_lookup = types.MappingProxyType(self._make_file_listing_lookup(True))
93
+ self._temporary_file_listing_lookup = types.MappingProxyType(self._make_file_listing_lookup(False))
94
+
95
+ self._file_listing_lookup_reverse: dict[str, list[str]] = {}
96
+ for k, v in self._persistent_file_listing_lookup.items():
97
+ self._file_listing_lookup_reverse.setdefault(v, [])
98
+ self._file_listing_lookup_reverse[v].append(f"p_{k}")
99
+
100
+ for k, v in self._temporary_file_listing_lookup.items():
101
+ self._file_listing_lookup_reverse.setdefault(v, [])
102
+ self._file_listing_lookup_reverse[v].append(f"t_{k}")
103
+ self._file_listing_lookup_reverse = types.MappingProxyType(
104
+ self._file_listing_lookup_reverse)
105
+
106
+ def _make_file_listing_lookup(self, persistent=True) -> dict[int, str]:
107
+ files = self._persistent_files if persistent else self._temporary_files
108
+ file_listing_lookup: dict[int, str] = {}
109
+
110
+ for file_info in files.values():
111
+ if not file_info.data_path:
112
+ continue
113
+ path_parts = []
114
+ current = file_info
115
+ while current.file_id != current.parent_id:
116
+ path_parts.insert(0, current.name)
117
+ current = files.get(current.parent_id, "<MISSING PATH SEGMENT>")
118
+
119
+ path_parts.insert(0, "p" if persistent else "t")
120
+ # path_parts.insert(0, self._origin)
121
+ # path_parts.insert(0, "")
122
+ file_listing_lookup[file_info.file_id] = "/".join(path_parts)
123
+
124
+ return file_listing_lookup
125
+
126
+ def get_file_listing(self) -> typing.Iterable[tuple[str, FileInfo]]:
127
+ for file_id in self._persistent_file_listing_lookup:
128
+ yield self._persistent_file_listing_lookup[file_id], self._persistent_files[file_id]
129
+ for file_id in self._temporary_file_listing_lookup:
130
+ yield self._temporary_file_listing_lookup[file_id], self._temporary_files[file_id]
131
+
132
+ def _get_file_info_from_path(self, path) -> typing.Iterable[FileInfo]:
133
+ file_keys = self._file_listing_lookup_reverse[str(path)]
134
+ for key in file_keys:
135
+ p_or_t, file_id = key.split("_", 1)
136
+ yield self._persistent_files[int(file_id)] if p_or_t == "p" else self._temporary_files[int(file_id)]
137
+
138
+
139
+ class FileSystem:
140
+ def __init__(self, path: typing.Union[os.PathLike, str]):
141
+ """
142
+ Constructor for the File System API access (the entry point for most processing scripts)
143
+ :param path: the path of the File System API storage
144
+ """
145
+ self._root = pathlib.Path(path)
146
+ self._origins = self._get_origins()
147
+ self._origins_reverse = {}
148
+ for origin, folders in self._origins.items():
149
+ for folder in folders:
150
+ self._origins_reverse[folder] = origin
151
+
152
+ def _get_origins(self) -> dict[str, tuple]:
153
+ result = {}
154
+ with ccl_leveldb.RawLevelDb(self._root / "Origins") as db:
155
+ for record in db.iterate_records_raw():
156
+ if record.state != ccl_leveldb.KeyState.Live:
157
+ continue
158
+ if record.user_key.startswith(b"ORIGIN:"):
159
+ _, origin = record.user_key.split(b":", 1)
160
+ origin = origin.decode("utf-8")
161
+ result.setdefault(origin, [])
162
+ result[origin].append(record.value.decode("utf-8"))
163
+
164
+ return {k: tuple(v) for (k, v) in result.items()}
165
+
166
+ def get_origins(self) -> typing.Iterable[str]:
167
+ """
168
+ Yields the origins for this File System API
169
+ :return: Yields the origins in this File System API
170
+ """
171
+ yield from self._origins.keys()
172
+
173
+ def get_folders_for_origin(self, origin) -> tuple[str, ...]:
174
+ """
175
+ Returns the folder ids which are used by the origin (host/domain)
176
+ :param origin:
177
+ :return: a tuple of strings which are the folder id(s) for this origin
178
+ """
179
+ return self._origins[origin]
180
+
181
+ def get_storage_for_folder(self, folder_id) -> OriginStorage:
182
+ """
183
+ Get the OriginStorage object for the folder
184
+ :param folder_id: a folder id (such as those returned by get_folders_for_origin)
185
+ :return: OriginStorage for the folder_id
186
+ """
187
+ return self._build_file_graph(folder_id)
188
+
189
+ @functools.cache
190
+ def _build_file_graph(self, folder_id) -> OriginStorage:
191
+ persistent_files: typing.Optional[dict[int, FileInfo]] = {}
192
+ persistent_deleted_files: typing.Optional[dict[int, int]] = {} # file_id: seq_no
193
+ temporary_files: typing.Optional[dict[int, FileInfo]] = {}
194
+ temporary_deleted_files: typing.Optional[dict[int, int]] = {} # file_id: seq_no
195
+
196
+ origin = self._origins_reverse[folder_id]
197
+
198
+ for p_or_t in ("p", "t"):
199
+ db_path = self._root / folder_id / p_or_t / "Paths"
200
+ if not db_path.exists():
201
+ continue
202
+ files: dict[int, FileInfo] = persistent_files if p_or_t == "p" else temporary_files
203
+ deleted_files: dict[int, int] = persistent_deleted_files if p_or_t == "p" else temporary_deleted_files
204
+ with ccl_leveldb.RawLevelDb(db_path) as db:
205
+ # TODO: we can infer file modified (created?) times using the parent's modified times maybe
206
+ for record in db.iterate_records_raw():
207
+ if re.match(b"[0-9]+", record.user_key) is not None:
208
+ if record.state == ccl_leveldb.KeyState.Live:
209
+ file_id = int(record.user_key.decode("utf-8"))
210
+ file_info = FileInfo.from_pickle(
211
+ self, origin, folder_id, p_or_t == "p", record.seq, file_id, record.value)
212
+
213
+ # undelete a file if more recent than deletion record:
214
+ if file_id in deleted_files and deleted_files[file_id] < file_info.seq_no:
215
+ deleted_files.pop(file_id)
216
+
217
+ if old_file_info := files.get(file_id):
218
+ if old_file_info.seq_no < file_info.seq_no:
219
+ # TODO: any reason to keep older records (other than for the timestamps as above?)
220
+ files[file_id] = file_info
221
+ else:
222
+ files[file_id] = file_info
223
+ else:
224
+ if old_file_info := files.get(file_id):
225
+ if old_file_info.seq_no < record.seq:
226
+ deleted_files[file_id] = record.seq
227
+ else:
228
+ deleted_files[file_id] = record.seq
229
+
230
+ return OriginStorage(
231
+ self, origin, folder_id,
232
+ persistent_files, persistent_deleted_files.keys(),
233
+ temporary_files, temporary_deleted_files.keys())
234
+
235
+ def get_local_path_for_fileinfo(self, file_info: FileInfo):
236
+ """
237
+ Returns the path on the local file system for the FilInfo object
238
+ :param file_info:
239
+ :return: the path on the local file system for the FilInfo object
240
+ """
241
+ path = self._root / file_info.folder_id / ("p" if file_info.is_persistent else "t") / file_info.data_path
242
+ return path
243
+
244
+ def get_file_stream_for_fileinfo(self, file_info: FileInfo) -> typing.Optional[typing.BinaryIO]:
245
+ """
246
+ Returns a file object from the local file system for the FilInfo object
247
+ :param file_info:
248
+ :return: a file object from the local file system for the FilInfo object
249
+ """
250
+ path = self.get_local_path_for_fileinfo(file_info)
251
+ if path.exists():
252
+ return path.open("rb")
253
+ return None
254
+
255
+
256
+ class FileSystemUtils:
257
+ @staticmethod
258
+ def print_origin_to_folder(fs_folder: typing.Union[os.PathLike, str]) -> None:
259
+ """
260
+ utility function to print out origins in the File System API to their folders
261
+ :param fs_folder: the path of the File System API storage
262
+ :return: None
263
+ """
264
+ fs = FileSystem(fs_folder)
265
+ for origin in sorted(fs.get_origins()):
266
+ print(f"{origin}: {','.join(fs.get_folders_for_origin(origin))}")
267
+
268
+ @staticmethod
269
+ def print_folder_to_origin(fs_folder: typing.Union[os.PathLike, str]) -> None:
270
+ """
271
+ utility function to print out folders in the File System API to their Origin
272
+ :param fs_folder: the path of the File System API storage
273
+ :return: None
274
+ """
275
+ fs = FileSystem(fs_folder)
276
+ result = {}
277
+ for origin in fs.get_origins():
278
+ for folder in fs.get_folders_for_origin(origin):
279
+ result[folder] = origin
280
+
281
+ for folder in sorted(result.keys()):
282
+ print(f"{folder}: {result[folder]}")
283
+
284
+ @staticmethod
285
+ def print_all_files(fs_folder: typing.Union[os.PathLike, str]) -> None:
286
+ """
287
+ utility function to print out all files in the File System API
288
+ :param fs_folder: the path of the File System API storage
289
+ :return: None
290
+ """
291
+ fs = FileSystem(fs_folder)
292
+ for origin in sorted(fs.get_origins()):
293
+ for folder in fs.get_folders_for_origin(origin):
294
+ storage = fs.get_storage_for_folder(folder)
295
+ for file_path, file_info in storage.get_file_listing():
296
+ print("/".join([origin, file_path]))
297
+
298
+
299
+ if __name__ == "__main__":
300
+ FileSystemUtils.print_all_files(sys.argv[1])
301
+
302
+
@@ -0,0 +1,357 @@
1
+ """
2
+ Copyright 2024, CCL Forensics
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
4
+ this software and associated documentation files (the "Software"), to deal in
5
+ the Software without restriction, including without limitation the rights to
6
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7
+ of the Software, and to permit persons to whom the Software is furnished to do
8
+ so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
20
+ """
21
+
22
+ import dataclasses
23
+ import datetime
24
+ import math
25
+ import pathlib
26
+ import sqlite3
27
+ import enum
28
+ import re
29
+ import struct
30
+ import typing
31
+ import collections.abc as col_abc
32
+
33
+ from .common import KeySearch, is_keysearch_hit
34
+ from .download_common import Download, DownloadSource
35
+
36
+ __version__ = "0.6"
37
+ __description__ = "Module to access the chrom(e|ium) history database"
38
+ __contact__ = "Alex Caithness"
39
+
40
+ EPOCH = datetime.datetime(1601, 1, 1)
41
+
42
+
43
+ def parse_chromium_time(microseconds: int) -> datetime.datetime:
44
+ return EPOCH + datetime.timedelta(microseconds=microseconds)
45
+
46
+
47
+ def encode_chromium_time(datetime_value: datetime.datetime) -> int:
48
+ return math.floor((datetime_value - EPOCH).total_seconds() * 1000000)
49
+
50
+
51
+ class PageTransitionCoreEnum(enum.IntEnum):
52
+ # chrome/common/page_transition_types.h
53
+ link = 0
54
+ typed = 1
55
+ auto_bookmark = 2
56
+ auto_subframe = 3
57
+ manual_subframe = 4
58
+ generated = 5
59
+ start_page = 6
60
+ form_submit = 7
61
+ reload = 8
62
+ keyword = 9
63
+ keyword_generated = 10
64
+
65
+
66
+ class PageTransitionQualifierEnum(enum.IntFlag):
67
+ blocked = 0x00800000
68
+ forward_back = 0x01000000
69
+ from_address_bar = 0x02000000
70
+ home_page = 0x04000000
71
+ from_api = 0x08000000
72
+ chain_start = 0x10000000
73
+ chain_end = 0x20000000
74
+ client_redirect = 0x40000000
75
+ server_redirect = 0x80000000
76
+
77
+
78
+ @dataclasses.dataclass(frozen=True)
79
+ class PageTransition:
80
+ core: PageTransitionCoreEnum
81
+ qualifier: PageTransitionQualifierEnum
82
+
83
+ @classmethod
84
+ def from_int(cls, val):
85
+ # database stores values signed, python needs unsigned
86
+ if val < 0:
87
+ val, = struct.unpack(">I", struct.pack(">i", val))
88
+
89
+ core = PageTransitionCoreEnum(val & 0xff)
90
+ qual = PageTransitionQualifierEnum(val & 0xffffff00)
91
+
92
+ return cls(core, qual)
93
+
94
+
95
+ @dataclasses.dataclass(frozen=True)
96
+ class HistoryRecord:
97
+ _owner: "HistoryDatabase" = dataclasses.field(repr=False)
98
+ rec_id: int
99
+ url: str
100
+ title: str
101
+ visit_time: datetime.datetime
102
+ visit_duration: datetime.timedelta
103
+ transition: PageTransition
104
+ from_visit_id: int
105
+ opener_visit_id: int
106
+
107
+ @property
108
+ def record_location(self) -> str:
109
+ return f"SQLite Rowid: {self.rec_id}"
110
+
111
+ @property
112
+ def has_parent(self) -> bool:
113
+ return self.from_visit_id != 0 or self.opener_visit_id != 0
114
+
115
+ @property
116
+ def parent_visit_id(self) -> int:
117
+ return self.opener_visit_id or self.from_visit_id
118
+
119
+ def get_parent(self) -> typing.Optional["HistoryRecord"]:
120
+ """
121
+ Get the parent visit for this record (based on the from_visit field in the database),
122
+ or None if there isn't one.
123
+ """
124
+
125
+ return self._owner.get_parent_of(self)
126
+
127
+ def get_children(self) -> col_abc.Iterable["HistoryRecord"]:
128
+ """
129
+ Get the children visits for this record (based on their from_visit field in the database).
130
+ """
131
+ return self._owner.get_children_of(self)
132
+
133
+
134
+ class HistoryDatabase:
135
+ _HISTORY_QUERY = """
136
+ SELECT
137
+ "visits"."id",
138
+ "urls"."url",
139
+ "urls"."title",
140
+ "visits"."visit_time",
141
+ "visits"."from_visit",
142
+ "visits"."opener_visit",
143
+ "visits"."transition",
144
+ "visits"."visit_duration",
145
+ CASE
146
+ WHEN "visits"."opener_visit" != 0 THEN "visits"."opener_visit"
147
+ ELSE "visits"."from_visit"
148
+ END "parent_id"
149
+
150
+ FROM "visits"
151
+ LEFT JOIN "urls" ON "visits"."url" = "urls"."id"
152
+ """
153
+
154
+ _WHERE_URL_EQUALS_PREDICATE = """"urls"."url" = ?"""
155
+
156
+ _WHERE_URL_REGEX_PREDICATE = """"urls"."url" REGEXP ?"""
157
+
158
+ _WHERE_URL_IN_PREDICATE = """"urls"."url" IN ({parameter_question_marks})"""
159
+
160
+ _WHERE_VISIT_TIME_EARLIEST_PREDICATE = """"visits"."visit_time" >= ?"""
161
+
162
+ _WHERE_VISIT_TIME_LATEST_PREDICATE = """"visits"."visit_time" <= ?"""
163
+
164
+ _WHERE_VISIT_ID_EQUALS_PREDICATE = """"visits"."id" = ?"""
165
+
166
+ #_WHERE_FROM_VISIT_EQUALS_PREDICATE = """"visits"."from_visit" = ?"""
167
+
168
+ #_WHERE_OPENER_VISIT_EQUALS_PREDICATE = """"visits"."opener_visit" = ?"""
169
+
170
+ _WHERE_PARENT_ID_EQUALS_PREDICATE = """"parent_id" = ?"""
171
+
172
+ _DOWNLOADS_QUERY = """
173
+ SELECT
174
+ "downloads"."id",
175
+ "downloads"."guid",
176
+ "downloads"."current_path",
177
+ "downloads"."target_path",
178
+ "downloads"."start_time",
179
+ "downloads"."received_bytes",
180
+ "downloads"."total_bytes",
181
+ "downloads"."state",
182
+ "downloads"."danger_type",
183
+ "downloads"."interrupt_reason",
184
+ "downloads"."hash",
185
+ "downloads"."end_time",
186
+ "downloads"."opened",
187
+ "downloads"."last_access_time",
188
+ "downloads"."transient",
189
+ "downloads"."referrer",
190
+ "downloads"."site_url",
191
+ "downloads"."embedder_download_data",
192
+ "downloads"."tab_url",
193
+ "downloads"."tab_referrer_url",
194
+ "downloads"."http_method",
195
+ "downloads"."mime_type",
196
+ "downloads"."original_mime_type"
197
+ FROM "downloads";
198
+ """
199
+
200
+ _DOWNLOADS_URL_CHAINS_QUEREY = """
201
+ SELECT "downloads_url_chains"."id",
202
+ "downloads_url_chains"."chain_index",
203
+ "downloads_url_chains"."url"
204
+ FROM "downloads_url_chains"
205
+ WHERE "downloads_url_chains"."id" = ?
206
+ ORDER BY "downloads_url_chains"."chain_index";
207
+ """
208
+
209
+ def __init__(self, db_path: pathlib.Path):
210
+ self._conn = sqlite3.connect(db_path.absolute().as_uri() + "?mode=ro", uri=True)
211
+ self._conn.row_factory = sqlite3.Row
212
+ self._conn.create_function("regexp", 2, lambda y, x: 1 if re.search(y, x) is not None else 0)
213
+
214
+ def _row_to_record(self, row: sqlite3.Row) -> HistoryRecord:
215
+ return HistoryRecord(
216
+ self,
217
+ row["id"],
218
+ row["url"],
219
+ row["title"],
220
+ parse_chromium_time(row["visit_time"]),
221
+ datetime.timedelta(microseconds=row["visit_duration"]),
222
+ PageTransition.from_int(row["transition"]),
223
+ row["from_visit"],
224
+ row["opener_visit"]
225
+ )
226
+
227
+ def get_parent_of(self, record: HistoryRecord) -> typing.Optional[HistoryRecord]:
228
+ if record.from_visit_id == 0 and record.opener_visit_id == 0:
229
+ return None
230
+
231
+ parent_id = record.opener_visit_id if record.opener_visit_id != 0 else record.from_visit_id
232
+
233
+ query = HistoryDatabase._HISTORY_QUERY
234
+ query += f" WHERE {HistoryDatabase._WHERE_VISIT_ID_EQUALS_PREDICATE};"
235
+ cur = self._conn.cursor()
236
+ cur.execute(query, (parent_id,))
237
+ row = cur.fetchone()
238
+ cur.close()
239
+ if row:
240
+ return self._row_to_record(row)
241
+
242
+ def get_children_of(self, record: HistoryRecord) -> col_abc.Iterable[HistoryRecord]:
243
+ query = HistoryDatabase._HISTORY_QUERY
244
+ predicate = HistoryDatabase._WHERE_PARENT_ID_EQUALS_PREDICATE
245
+ query += f" WHERE {predicate};"
246
+ cur = self._conn.cursor()
247
+ cur.execute(query, (record.rec_id,))
248
+ for row in cur:
249
+ yield self._row_to_record(row)
250
+
251
+ cur.close()
252
+
253
+ def get_record_with_id(self, visit_id: int) -> typing.Optional[HistoryRecord]:
254
+ query = HistoryDatabase._HISTORY_QUERY
255
+ query += f" WHERE {HistoryDatabase._WHERE_VISIT_ID_EQUALS_PREDICATE};"
256
+ cur = self._conn.cursor()
257
+ cur.execute(query, (visit_id,))
258
+ row = cur.fetchone()
259
+ cur.close()
260
+ if row:
261
+ return self._row_to_record(row)
262
+
263
+ def iter_history_records(
264
+ self, url: typing.Optional[KeySearch], *,
265
+ earliest: typing.Optional[datetime.datetime]=None, latest: typing.Optional[datetime.datetime]=None
266
+ ) -> col_abc.Iterable[HistoryRecord]:
267
+
268
+ predicates = []
269
+ parameters = []
270
+
271
+ if url is None:
272
+ pass # no predicate
273
+ elif isinstance(url, str):
274
+ predicates.append(HistoryDatabase._WHERE_URL_EQUALS_PREDICATE)
275
+ parameters.append(url)
276
+ elif isinstance(url, re.Pattern):
277
+ predicates.append(HistoryDatabase._WHERE_URL_REGEX_PREDICATE)
278
+ parameters.append(url.pattern)
279
+ elif isinstance(url, col_abc.Collection):
280
+ predicates.append(
281
+ HistoryDatabase._WHERE_URL_IN_PREDICATE.format(
282
+ parameter_question_marks=",".join("?" for _ in range(len(url)))))
283
+ parameters.extend(url)
284
+ elif isinstance(url, col_abc.Callable):
285
+ pass # we have to call this function across every
286
+ else:
287
+ raise TypeError(f"Unexpected type: {type(url)} (expects: {KeySearch})")
288
+
289
+ if earliest is not None:
290
+ predicates.append(HistoryDatabase._WHERE_VISIT_TIME_EARLIEST_PREDICATE)
291
+ parameters.append(encode_chromium_time(earliest))
292
+
293
+ if latest is not None:
294
+ predicates.append(HistoryDatabase._WHERE_VISIT_TIME_LATEST_PREDICATE)
295
+ parameters.append(encode_chromium_time(latest))
296
+
297
+ query = HistoryDatabase._HISTORY_QUERY
298
+ if predicates:
299
+ query += f" WHERE {' AND '.join(predicates)}"
300
+
301
+ query += ";"
302
+ cur = self._conn.cursor()
303
+ for row in cur.execute(query, parameters):
304
+ if not isinstance(url, col_abc.Callable) or url(row["url"]):
305
+ yield self._row_to_record(row)
306
+
307
+ cur.close()
308
+
309
+ def iter_downloads(
310
+ self,
311
+ download_url: typing.Optional[KeySearch]=None,
312
+ tab_url: typing.Optional[KeySearch]=None) -> col_abc.Iterable[Download]:
313
+ downloads_cur = self._conn.cursor()
314
+ chain_cur = self._conn.cursor()
315
+
316
+ downloads_cur.execute(HistoryDatabase._DOWNLOADS_QUERY)
317
+
318
+ for download in downloads_cur:
319
+ chain_cur.execute(HistoryDatabase._DOWNLOADS_URL_CHAINS_QUEREY, (download["id"],))
320
+ chain = tuple(x["url"] for x in chain_cur)
321
+
322
+ if download_url is not None and not any(is_keysearch_hit(download_url, x) for x in chain):
323
+ continue
324
+
325
+ if (tab_url is not None and
326
+ not is_keysearch_hit(tab_url, download["tab_url"]) and
327
+ not is_keysearch_hit(tab_url, download["tab_referrer_url"])):
328
+ continue
329
+
330
+ yield Download(
331
+ DownloadSource.history_db,
332
+ download["id"],
333
+ download["guid"],
334
+ download["hash"].hex(),
335
+ chain,
336
+ download["tab_url"],
337
+ download["tab_referrer_url"],
338
+ download["target_path"],
339
+ download["mime_type"],
340
+ download["original_mime_type"],
341
+ download["total_bytes"],
342
+ parse_chromium_time(download["start_time"]),
343
+ parse_chromium_time(download["end_time"])
344
+ )
345
+
346
+ downloads_cur.close()
347
+ chain_cur.close()
348
+
349
+ def close(self):
350
+ self._conn.close()
351
+
352
+ def __enter__(self):
353
+ return self
354
+
355
+ def __exit__(self, exc_type, exc_val, exc_tb):
356
+ self.close()
357
+