linear-mcp-fast 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ccl_chromium_reader/__init__.py +2 -0
- ccl_chromium_reader/ccl_chromium_cache.py +1335 -0
- ccl_chromium_reader/ccl_chromium_filesystem.py +302 -0
- ccl_chromium_reader/ccl_chromium_history.py +357 -0
- ccl_chromium_reader/ccl_chromium_indexeddb.py +1060 -0
- ccl_chromium_reader/ccl_chromium_localstorage.py +454 -0
- ccl_chromium_reader/ccl_chromium_notifications.py +268 -0
- ccl_chromium_reader/ccl_chromium_profile_folder.py +568 -0
- ccl_chromium_reader/ccl_chromium_sessionstorage.py +368 -0
- ccl_chromium_reader/ccl_chromium_snss2.py +332 -0
- ccl_chromium_reader/ccl_shared_proto_db_downloads.py +189 -0
- ccl_chromium_reader/common.py +19 -0
- ccl_chromium_reader/download_common.py +78 -0
- ccl_chromium_reader/profile_folder_protocols.py +276 -0
- ccl_chromium_reader/serialization_formats/__init__.py +0 -0
- ccl_chromium_reader/serialization_formats/ccl_blink_value_deserializer.py +401 -0
- ccl_chromium_reader/serialization_formats/ccl_easy_chromium_pickle.py +133 -0
- ccl_chromium_reader/serialization_formats/ccl_protobuff.py +276 -0
- ccl_chromium_reader/serialization_formats/ccl_v8_value_deserializer.py +627 -0
- ccl_chromium_reader/storage_formats/__init__.py +0 -0
- ccl_chromium_reader/storage_formats/ccl_leveldb.py +582 -0
- ccl_simplesnappy/__init__.py +1 -0
- ccl_simplesnappy/ccl_simplesnappy.py +306 -0
- linear_mcp_fast/__init__.py +8 -0
- linear_mcp_fast/__main__.py +6 -0
- linear_mcp_fast/reader.py +433 -0
- linear_mcp_fast/server.py +367 -0
- linear_mcp_fast/store_detector.py +117 -0
- linear_mcp_fast-0.1.0.dist-info/METADATA +160 -0
- linear_mcp_fast-0.1.0.dist-info/RECORD +39 -0
- linear_mcp_fast-0.1.0.dist-info/WHEEL +5 -0
- linear_mcp_fast-0.1.0.dist-info/entry_points.txt +2 -0
- linear_mcp_fast-0.1.0.dist-info/top_level.txt +4 -0
- tools_and_utilities/Chromium_dump_local_storage.py +111 -0
- tools_and_utilities/Chromium_dump_session_storage.py +92 -0
- tools_and_utilities/benchmark.py +35 -0
- tools_and_utilities/ccl_chrome_audit.py +651 -0
- tools_and_utilities/dump_indexeddb_details.py +59 -0
- tools_and_utilities/dump_leveldb.py +53 -0
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2022, CCL Forensics
|
|
3
|
+
|
|
4
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
5
|
+
this software and associated documentation files (the "Software"), to deal in
|
|
6
|
+
the Software without restriction, including without limitation the rights to
|
|
7
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
8
|
+
of the Software, and to permit persons to whom the Software is furnished to do
|
|
9
|
+
so, subject to the following conditions:
|
|
10
|
+
|
|
11
|
+
The above copyright notice and this permission notice shall be included in all
|
|
12
|
+
copies or substantial portions of the Software.
|
|
13
|
+
|
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
15
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
16
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
17
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
18
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
19
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
20
|
+
SOFTWARE.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
__version__ = "0.8"
|
|
24
|
+
__description__ = "Library for reading Chrome/Chromium File System API data"
|
|
25
|
+
__contact__ = "Alex Caithness"
|
|
26
|
+
|
|
27
|
+
import dataclasses
|
|
28
|
+
import os
|
|
29
|
+
import sys
|
|
30
|
+
import pathlib
|
|
31
|
+
import datetime
|
|
32
|
+
import re
|
|
33
|
+
import typing
|
|
34
|
+
import types
|
|
35
|
+
import functools
|
|
36
|
+
|
|
37
|
+
from .storage_formats import ccl_leveldb
|
|
38
|
+
from .serialization_formats.ccl_easy_chromium_pickle import EasyPickleIterator
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclasses.dataclass(frozen=True)
|
|
42
|
+
class FileInfo:
|
|
43
|
+
_owner: "FileSystem" = dataclasses.field(repr=False)
|
|
44
|
+
origin: str
|
|
45
|
+
folder_id: str
|
|
46
|
+
is_persistent: bool
|
|
47
|
+
seq_no: int
|
|
48
|
+
file_id: int
|
|
49
|
+
parent_id: int
|
|
50
|
+
data_path: str
|
|
51
|
+
name: str
|
|
52
|
+
timestamp: datetime.datetime
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def from_pickle(
|
|
56
|
+
cls, owner: "FileSystem", origin: str, folder_id: str, is_persistent: bool,
|
|
57
|
+
seq_no: int, file_id: int, data: bytes):
|
|
58
|
+
with EasyPickleIterator(data) as reader:
|
|
59
|
+
parent_id = reader.read_uint64()
|
|
60
|
+
data_path = reader.read_string()
|
|
61
|
+
name = reader.read_string()
|
|
62
|
+
timestamp = reader.read_datetime()
|
|
63
|
+
|
|
64
|
+
return cls(owner, origin, folder_id, is_persistent, seq_no, file_id, parent_id, data_path, name, timestamp)
|
|
65
|
+
|
|
66
|
+
def get_local_storage_path(self) -> pathlib.Path:
|
|
67
|
+
return self._owner.get_local_path_for_fileinfo(self)
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def is_stored_locally(self) -> bool:
|
|
71
|
+
return self.get_local_storage_path().exists()
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class OriginStorage:
|
|
75
|
+
def __init__(
|
|
76
|
+
self,
|
|
77
|
+
owner: "FileSystem",
|
|
78
|
+
origin: str,
|
|
79
|
+
folder_id: str,
|
|
80
|
+
persistent_files: typing.Optional[typing.Mapping[int, FileInfo]],
|
|
81
|
+
persistent_deleted_file_ids: typing.Optional[typing.Iterable[int]],
|
|
82
|
+
temporary_files: typing.Optional[typing.Mapping[int, FileInfo]],
|
|
83
|
+
temporary_deleted_file_ids: typing.Optional[typing.Iterable[int]]):
|
|
84
|
+
self._owner = owner
|
|
85
|
+
self._origin = origin
|
|
86
|
+
self._folder_id = folder_id
|
|
87
|
+
self._persistent_files = types.MappingProxyType(persistent_files or {})
|
|
88
|
+
self._persistent_deleted_file_ids = set(persistent_deleted_file_ids or [])
|
|
89
|
+
self._temporary_files = types.MappingProxyType(temporary_files or {})
|
|
90
|
+
self._temporary_deleted_file_ids = set(temporary_deleted_file_ids or [])
|
|
91
|
+
|
|
92
|
+
self._persistent_file_listing_lookup = types.MappingProxyType(self._make_file_listing_lookup(True))
|
|
93
|
+
self._temporary_file_listing_lookup = types.MappingProxyType(self._make_file_listing_lookup(False))
|
|
94
|
+
|
|
95
|
+
self._file_listing_lookup_reverse: dict[str, list[str]] = {}
|
|
96
|
+
for k, v in self._persistent_file_listing_lookup.items():
|
|
97
|
+
self._file_listing_lookup_reverse.setdefault(v, [])
|
|
98
|
+
self._file_listing_lookup_reverse[v].append(f"p_{k}")
|
|
99
|
+
|
|
100
|
+
for k, v in self._temporary_file_listing_lookup.items():
|
|
101
|
+
self._file_listing_lookup_reverse.setdefault(v, [])
|
|
102
|
+
self._file_listing_lookup_reverse[v].append(f"t_{k}")
|
|
103
|
+
self._file_listing_lookup_reverse = types.MappingProxyType(
|
|
104
|
+
self._file_listing_lookup_reverse)
|
|
105
|
+
|
|
106
|
+
def _make_file_listing_lookup(self, persistent=True) -> dict[int, str]:
|
|
107
|
+
files = self._persistent_files if persistent else self._temporary_files
|
|
108
|
+
file_listing_lookup: dict[int, str] = {}
|
|
109
|
+
|
|
110
|
+
for file_info in files.values():
|
|
111
|
+
if not file_info.data_path:
|
|
112
|
+
continue
|
|
113
|
+
path_parts = []
|
|
114
|
+
current = file_info
|
|
115
|
+
while current.file_id != current.parent_id:
|
|
116
|
+
path_parts.insert(0, current.name)
|
|
117
|
+
current = files.get(current.parent_id, "<MISSING PATH SEGMENT>")
|
|
118
|
+
|
|
119
|
+
path_parts.insert(0, "p" if persistent else "t")
|
|
120
|
+
# path_parts.insert(0, self._origin)
|
|
121
|
+
# path_parts.insert(0, "")
|
|
122
|
+
file_listing_lookup[file_info.file_id] = "/".join(path_parts)
|
|
123
|
+
|
|
124
|
+
return file_listing_lookup
|
|
125
|
+
|
|
126
|
+
def get_file_listing(self) -> typing.Iterable[tuple[str, FileInfo]]:
|
|
127
|
+
for file_id in self._persistent_file_listing_lookup:
|
|
128
|
+
yield self._persistent_file_listing_lookup[file_id], self._persistent_files[file_id]
|
|
129
|
+
for file_id in self._temporary_file_listing_lookup:
|
|
130
|
+
yield self._temporary_file_listing_lookup[file_id], self._temporary_files[file_id]
|
|
131
|
+
|
|
132
|
+
def _get_file_info_from_path(self, path) -> typing.Iterable[FileInfo]:
|
|
133
|
+
file_keys = self._file_listing_lookup_reverse[str(path)]
|
|
134
|
+
for key in file_keys:
|
|
135
|
+
p_or_t, file_id = key.split("_", 1)
|
|
136
|
+
yield self._persistent_files[int(file_id)] if p_or_t == "p" else self._temporary_files[int(file_id)]
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class FileSystem:
|
|
140
|
+
def __init__(self, path: typing.Union[os.PathLike, str]):
|
|
141
|
+
"""
|
|
142
|
+
Constructor for the File System API access (the entry point for most processing scripts)
|
|
143
|
+
:param path: the path of the File System API storage
|
|
144
|
+
"""
|
|
145
|
+
self._root = pathlib.Path(path)
|
|
146
|
+
self._origins = self._get_origins()
|
|
147
|
+
self._origins_reverse = {}
|
|
148
|
+
for origin, folders in self._origins.items():
|
|
149
|
+
for folder in folders:
|
|
150
|
+
self._origins_reverse[folder] = origin
|
|
151
|
+
|
|
152
|
+
def _get_origins(self) -> dict[str, tuple]:
|
|
153
|
+
result = {}
|
|
154
|
+
with ccl_leveldb.RawLevelDb(self._root / "Origins") as db:
|
|
155
|
+
for record in db.iterate_records_raw():
|
|
156
|
+
if record.state != ccl_leveldb.KeyState.Live:
|
|
157
|
+
continue
|
|
158
|
+
if record.user_key.startswith(b"ORIGIN:"):
|
|
159
|
+
_, origin = record.user_key.split(b":", 1)
|
|
160
|
+
origin = origin.decode("utf-8")
|
|
161
|
+
result.setdefault(origin, [])
|
|
162
|
+
result[origin].append(record.value.decode("utf-8"))
|
|
163
|
+
|
|
164
|
+
return {k: tuple(v) for (k, v) in result.items()}
|
|
165
|
+
|
|
166
|
+
def get_origins(self) -> typing.Iterable[str]:
|
|
167
|
+
"""
|
|
168
|
+
Yields the origins for this File System API
|
|
169
|
+
:return: Yields the origins in this File System API
|
|
170
|
+
"""
|
|
171
|
+
yield from self._origins.keys()
|
|
172
|
+
|
|
173
|
+
def get_folders_for_origin(self, origin) -> tuple[str, ...]:
|
|
174
|
+
"""
|
|
175
|
+
Returns the folder ids which are used by the origin (host/domain)
|
|
176
|
+
:param origin:
|
|
177
|
+
:return: a tuple of strings which are the folder id(s) for this origin
|
|
178
|
+
"""
|
|
179
|
+
return self._origins[origin]
|
|
180
|
+
|
|
181
|
+
def get_storage_for_folder(self, folder_id) -> OriginStorage:
|
|
182
|
+
"""
|
|
183
|
+
Get the OriginStorage object for the folder
|
|
184
|
+
:param folder_id: a folder id (such as those returned by get_folders_for_origin)
|
|
185
|
+
:return: OriginStorage for the folder_id
|
|
186
|
+
"""
|
|
187
|
+
return self._build_file_graph(folder_id)
|
|
188
|
+
|
|
189
|
+
@functools.cache
|
|
190
|
+
def _build_file_graph(self, folder_id) -> OriginStorage:
|
|
191
|
+
persistent_files: typing.Optional[dict[int, FileInfo]] = {}
|
|
192
|
+
persistent_deleted_files: typing.Optional[dict[int, int]] = {} # file_id: seq_no
|
|
193
|
+
temporary_files: typing.Optional[dict[int, FileInfo]] = {}
|
|
194
|
+
temporary_deleted_files: typing.Optional[dict[int, int]] = {} # file_id: seq_no
|
|
195
|
+
|
|
196
|
+
origin = self._origins_reverse[folder_id]
|
|
197
|
+
|
|
198
|
+
for p_or_t in ("p", "t"):
|
|
199
|
+
db_path = self._root / folder_id / p_or_t / "Paths"
|
|
200
|
+
if not db_path.exists():
|
|
201
|
+
continue
|
|
202
|
+
files: dict[int, FileInfo] = persistent_files if p_or_t == "p" else temporary_files
|
|
203
|
+
deleted_files: dict[int, int] = persistent_deleted_files if p_or_t == "p" else temporary_deleted_files
|
|
204
|
+
with ccl_leveldb.RawLevelDb(db_path) as db:
|
|
205
|
+
# TODO: we can infer file modified (created?) times using the parent's modified times maybe
|
|
206
|
+
for record in db.iterate_records_raw():
|
|
207
|
+
if re.match(b"[0-9]+", record.user_key) is not None:
|
|
208
|
+
if record.state == ccl_leveldb.KeyState.Live:
|
|
209
|
+
file_id = int(record.user_key.decode("utf-8"))
|
|
210
|
+
file_info = FileInfo.from_pickle(
|
|
211
|
+
self, origin, folder_id, p_or_t == "p", record.seq, file_id, record.value)
|
|
212
|
+
|
|
213
|
+
# undelete a file if more recent than deletion record:
|
|
214
|
+
if file_id in deleted_files and deleted_files[file_id] < file_info.seq_no:
|
|
215
|
+
deleted_files.pop(file_id)
|
|
216
|
+
|
|
217
|
+
if old_file_info := files.get(file_id):
|
|
218
|
+
if old_file_info.seq_no < file_info.seq_no:
|
|
219
|
+
# TODO: any reason to keep older records (other than for the timestamps as above?)
|
|
220
|
+
files[file_id] = file_info
|
|
221
|
+
else:
|
|
222
|
+
files[file_id] = file_info
|
|
223
|
+
else:
|
|
224
|
+
if old_file_info := files.get(file_id):
|
|
225
|
+
if old_file_info.seq_no < record.seq:
|
|
226
|
+
deleted_files[file_id] = record.seq
|
|
227
|
+
else:
|
|
228
|
+
deleted_files[file_id] = record.seq
|
|
229
|
+
|
|
230
|
+
return OriginStorage(
|
|
231
|
+
self, origin, folder_id,
|
|
232
|
+
persistent_files, persistent_deleted_files.keys(),
|
|
233
|
+
temporary_files, temporary_deleted_files.keys())
|
|
234
|
+
|
|
235
|
+
def get_local_path_for_fileinfo(self, file_info: FileInfo):
|
|
236
|
+
"""
|
|
237
|
+
Returns the path on the local file system for the FilInfo object
|
|
238
|
+
:param file_info:
|
|
239
|
+
:return: the path on the local file system for the FilInfo object
|
|
240
|
+
"""
|
|
241
|
+
path = self._root / file_info.folder_id / ("p" if file_info.is_persistent else "t") / file_info.data_path
|
|
242
|
+
return path
|
|
243
|
+
|
|
244
|
+
def get_file_stream_for_fileinfo(self, file_info: FileInfo) -> typing.Optional[typing.BinaryIO]:
|
|
245
|
+
"""
|
|
246
|
+
Returns a file object from the local file system for the FilInfo object
|
|
247
|
+
:param file_info:
|
|
248
|
+
:return: a file object from the local file system for the FilInfo object
|
|
249
|
+
"""
|
|
250
|
+
path = self.get_local_path_for_fileinfo(file_info)
|
|
251
|
+
if path.exists():
|
|
252
|
+
return path.open("rb")
|
|
253
|
+
return None
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
class FileSystemUtils:
|
|
257
|
+
@staticmethod
|
|
258
|
+
def print_origin_to_folder(fs_folder: typing.Union[os.PathLike, str]) -> None:
|
|
259
|
+
"""
|
|
260
|
+
utility function to print out origins in the File System API to their folders
|
|
261
|
+
:param fs_folder: the path of the File System API storage
|
|
262
|
+
:return: None
|
|
263
|
+
"""
|
|
264
|
+
fs = FileSystem(fs_folder)
|
|
265
|
+
for origin in sorted(fs.get_origins()):
|
|
266
|
+
print(f"{origin}: {','.join(fs.get_folders_for_origin(origin))}")
|
|
267
|
+
|
|
268
|
+
@staticmethod
|
|
269
|
+
def print_folder_to_origin(fs_folder: typing.Union[os.PathLike, str]) -> None:
|
|
270
|
+
"""
|
|
271
|
+
utility function to print out folders in the File System API to their Origin
|
|
272
|
+
:param fs_folder: the path of the File System API storage
|
|
273
|
+
:return: None
|
|
274
|
+
"""
|
|
275
|
+
fs = FileSystem(fs_folder)
|
|
276
|
+
result = {}
|
|
277
|
+
for origin in fs.get_origins():
|
|
278
|
+
for folder in fs.get_folders_for_origin(origin):
|
|
279
|
+
result[folder] = origin
|
|
280
|
+
|
|
281
|
+
for folder in sorted(result.keys()):
|
|
282
|
+
print(f"{folder}: {result[folder]}")
|
|
283
|
+
|
|
284
|
+
@staticmethod
|
|
285
|
+
def print_all_files(fs_folder: typing.Union[os.PathLike, str]) -> None:
|
|
286
|
+
"""
|
|
287
|
+
utility function to print out all files in the File System API
|
|
288
|
+
:param fs_folder: the path of the File System API storage
|
|
289
|
+
:return: None
|
|
290
|
+
"""
|
|
291
|
+
fs = FileSystem(fs_folder)
|
|
292
|
+
for origin in sorted(fs.get_origins()):
|
|
293
|
+
for folder in fs.get_folders_for_origin(origin):
|
|
294
|
+
storage = fs.get_storage_for_folder(folder)
|
|
295
|
+
for file_path, file_info in storage.get_file_listing():
|
|
296
|
+
print("/".join([origin, file_path]))
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
if __name__ == "__main__":
|
|
300
|
+
FileSystemUtils.print_all_files(sys.argv[1])
|
|
301
|
+
|
|
302
|
+
|
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2024, CCL Forensics
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
4
|
+
this software and associated documentation files (the "Software"), to deal in
|
|
5
|
+
the Software without restriction, including without limitation the rights to
|
|
6
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
7
|
+
of the Software, and to permit persons to whom the Software is furnished to do
|
|
8
|
+
so, subject to the following conditions:
|
|
9
|
+
|
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
|
11
|
+
copies or substantial portions of the Software.
|
|
12
|
+
|
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
19
|
+
SOFTWARE.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import dataclasses
|
|
23
|
+
import datetime
|
|
24
|
+
import math
|
|
25
|
+
import pathlib
|
|
26
|
+
import sqlite3
|
|
27
|
+
import enum
|
|
28
|
+
import re
|
|
29
|
+
import struct
|
|
30
|
+
import typing
|
|
31
|
+
import collections.abc as col_abc
|
|
32
|
+
|
|
33
|
+
from .common import KeySearch, is_keysearch_hit
|
|
34
|
+
from .download_common import Download, DownloadSource
|
|
35
|
+
|
|
36
|
+
__version__ = "0.6"
|
|
37
|
+
__description__ = "Module to access the chrom(e|ium) history database"
|
|
38
|
+
__contact__ = "Alex Caithness"
|
|
39
|
+
|
|
40
|
+
EPOCH = datetime.datetime(1601, 1, 1)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def parse_chromium_time(microseconds: int) -> datetime.datetime:
|
|
44
|
+
return EPOCH + datetime.timedelta(microseconds=microseconds)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def encode_chromium_time(datetime_value: datetime.datetime) -> int:
|
|
48
|
+
return math.floor((datetime_value - EPOCH).total_seconds() * 1000000)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class PageTransitionCoreEnum(enum.IntEnum):
|
|
52
|
+
# chrome/common/page_transition_types.h
|
|
53
|
+
link = 0
|
|
54
|
+
typed = 1
|
|
55
|
+
auto_bookmark = 2
|
|
56
|
+
auto_subframe = 3
|
|
57
|
+
manual_subframe = 4
|
|
58
|
+
generated = 5
|
|
59
|
+
start_page = 6
|
|
60
|
+
form_submit = 7
|
|
61
|
+
reload = 8
|
|
62
|
+
keyword = 9
|
|
63
|
+
keyword_generated = 10
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class PageTransitionQualifierEnum(enum.IntFlag):
|
|
67
|
+
blocked = 0x00800000
|
|
68
|
+
forward_back = 0x01000000
|
|
69
|
+
from_address_bar = 0x02000000
|
|
70
|
+
home_page = 0x04000000
|
|
71
|
+
from_api = 0x08000000
|
|
72
|
+
chain_start = 0x10000000
|
|
73
|
+
chain_end = 0x20000000
|
|
74
|
+
client_redirect = 0x40000000
|
|
75
|
+
server_redirect = 0x80000000
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclasses.dataclass(frozen=True)
|
|
79
|
+
class PageTransition:
|
|
80
|
+
core: PageTransitionCoreEnum
|
|
81
|
+
qualifier: PageTransitionQualifierEnum
|
|
82
|
+
|
|
83
|
+
@classmethod
|
|
84
|
+
def from_int(cls, val):
|
|
85
|
+
# database stores values signed, python needs unsigned
|
|
86
|
+
if val < 0:
|
|
87
|
+
val, = struct.unpack(">I", struct.pack(">i", val))
|
|
88
|
+
|
|
89
|
+
core = PageTransitionCoreEnum(val & 0xff)
|
|
90
|
+
qual = PageTransitionQualifierEnum(val & 0xffffff00)
|
|
91
|
+
|
|
92
|
+
return cls(core, qual)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@dataclasses.dataclass(frozen=True)
|
|
96
|
+
class HistoryRecord:
|
|
97
|
+
_owner: "HistoryDatabase" = dataclasses.field(repr=False)
|
|
98
|
+
rec_id: int
|
|
99
|
+
url: str
|
|
100
|
+
title: str
|
|
101
|
+
visit_time: datetime.datetime
|
|
102
|
+
visit_duration: datetime.timedelta
|
|
103
|
+
transition: PageTransition
|
|
104
|
+
from_visit_id: int
|
|
105
|
+
opener_visit_id: int
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def record_location(self) -> str:
|
|
109
|
+
return f"SQLite Rowid: {self.rec_id}"
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def has_parent(self) -> bool:
|
|
113
|
+
return self.from_visit_id != 0 or self.opener_visit_id != 0
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def parent_visit_id(self) -> int:
|
|
117
|
+
return self.opener_visit_id or self.from_visit_id
|
|
118
|
+
|
|
119
|
+
def get_parent(self) -> typing.Optional["HistoryRecord"]:
|
|
120
|
+
"""
|
|
121
|
+
Get the parent visit for this record (based on the from_visit field in the database),
|
|
122
|
+
or None if there isn't one.
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
return self._owner.get_parent_of(self)
|
|
126
|
+
|
|
127
|
+
def get_children(self) -> col_abc.Iterable["HistoryRecord"]:
|
|
128
|
+
"""
|
|
129
|
+
Get the children visits for this record (based on their from_visit field in the database).
|
|
130
|
+
"""
|
|
131
|
+
return self._owner.get_children_of(self)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class HistoryDatabase:
|
|
135
|
+
_HISTORY_QUERY = """
|
|
136
|
+
SELECT
|
|
137
|
+
"visits"."id",
|
|
138
|
+
"urls"."url",
|
|
139
|
+
"urls"."title",
|
|
140
|
+
"visits"."visit_time",
|
|
141
|
+
"visits"."from_visit",
|
|
142
|
+
"visits"."opener_visit",
|
|
143
|
+
"visits"."transition",
|
|
144
|
+
"visits"."visit_duration",
|
|
145
|
+
CASE
|
|
146
|
+
WHEN "visits"."opener_visit" != 0 THEN "visits"."opener_visit"
|
|
147
|
+
ELSE "visits"."from_visit"
|
|
148
|
+
END "parent_id"
|
|
149
|
+
|
|
150
|
+
FROM "visits"
|
|
151
|
+
LEFT JOIN "urls" ON "visits"."url" = "urls"."id"
|
|
152
|
+
"""
|
|
153
|
+
|
|
154
|
+
_WHERE_URL_EQUALS_PREDICATE = """"urls"."url" = ?"""
|
|
155
|
+
|
|
156
|
+
_WHERE_URL_REGEX_PREDICATE = """"urls"."url" REGEXP ?"""
|
|
157
|
+
|
|
158
|
+
_WHERE_URL_IN_PREDICATE = """"urls"."url" IN ({parameter_question_marks})"""
|
|
159
|
+
|
|
160
|
+
_WHERE_VISIT_TIME_EARLIEST_PREDICATE = """"visits"."visit_time" >= ?"""
|
|
161
|
+
|
|
162
|
+
_WHERE_VISIT_TIME_LATEST_PREDICATE = """"visits"."visit_time" <= ?"""
|
|
163
|
+
|
|
164
|
+
_WHERE_VISIT_ID_EQUALS_PREDICATE = """"visits"."id" = ?"""
|
|
165
|
+
|
|
166
|
+
#_WHERE_FROM_VISIT_EQUALS_PREDICATE = """"visits"."from_visit" = ?"""
|
|
167
|
+
|
|
168
|
+
#_WHERE_OPENER_VISIT_EQUALS_PREDICATE = """"visits"."opener_visit" = ?"""
|
|
169
|
+
|
|
170
|
+
_WHERE_PARENT_ID_EQUALS_PREDICATE = """"parent_id" = ?"""
|
|
171
|
+
|
|
172
|
+
_DOWNLOADS_QUERY = """
|
|
173
|
+
SELECT
|
|
174
|
+
"downloads"."id",
|
|
175
|
+
"downloads"."guid",
|
|
176
|
+
"downloads"."current_path",
|
|
177
|
+
"downloads"."target_path",
|
|
178
|
+
"downloads"."start_time",
|
|
179
|
+
"downloads"."received_bytes",
|
|
180
|
+
"downloads"."total_bytes",
|
|
181
|
+
"downloads"."state",
|
|
182
|
+
"downloads"."danger_type",
|
|
183
|
+
"downloads"."interrupt_reason",
|
|
184
|
+
"downloads"."hash",
|
|
185
|
+
"downloads"."end_time",
|
|
186
|
+
"downloads"."opened",
|
|
187
|
+
"downloads"."last_access_time",
|
|
188
|
+
"downloads"."transient",
|
|
189
|
+
"downloads"."referrer",
|
|
190
|
+
"downloads"."site_url",
|
|
191
|
+
"downloads"."embedder_download_data",
|
|
192
|
+
"downloads"."tab_url",
|
|
193
|
+
"downloads"."tab_referrer_url",
|
|
194
|
+
"downloads"."http_method",
|
|
195
|
+
"downloads"."mime_type",
|
|
196
|
+
"downloads"."original_mime_type"
|
|
197
|
+
FROM "downloads";
|
|
198
|
+
"""
|
|
199
|
+
|
|
200
|
+
_DOWNLOADS_URL_CHAINS_QUEREY = """
|
|
201
|
+
SELECT "downloads_url_chains"."id",
|
|
202
|
+
"downloads_url_chains"."chain_index",
|
|
203
|
+
"downloads_url_chains"."url"
|
|
204
|
+
FROM "downloads_url_chains"
|
|
205
|
+
WHERE "downloads_url_chains"."id" = ?
|
|
206
|
+
ORDER BY "downloads_url_chains"."chain_index";
|
|
207
|
+
"""
|
|
208
|
+
|
|
209
|
+
def __init__(self, db_path: pathlib.Path):
|
|
210
|
+
self._conn = sqlite3.connect(db_path.absolute().as_uri() + "?mode=ro", uri=True)
|
|
211
|
+
self._conn.row_factory = sqlite3.Row
|
|
212
|
+
self._conn.create_function("regexp", 2, lambda y, x: 1 if re.search(y, x) is not None else 0)
|
|
213
|
+
|
|
214
|
+
def _row_to_record(self, row: sqlite3.Row) -> HistoryRecord:
|
|
215
|
+
return HistoryRecord(
|
|
216
|
+
self,
|
|
217
|
+
row["id"],
|
|
218
|
+
row["url"],
|
|
219
|
+
row["title"],
|
|
220
|
+
parse_chromium_time(row["visit_time"]),
|
|
221
|
+
datetime.timedelta(microseconds=row["visit_duration"]),
|
|
222
|
+
PageTransition.from_int(row["transition"]),
|
|
223
|
+
row["from_visit"],
|
|
224
|
+
row["opener_visit"]
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
def get_parent_of(self, record: HistoryRecord) -> typing.Optional[HistoryRecord]:
|
|
228
|
+
if record.from_visit_id == 0 and record.opener_visit_id == 0:
|
|
229
|
+
return None
|
|
230
|
+
|
|
231
|
+
parent_id = record.opener_visit_id if record.opener_visit_id != 0 else record.from_visit_id
|
|
232
|
+
|
|
233
|
+
query = HistoryDatabase._HISTORY_QUERY
|
|
234
|
+
query += f" WHERE {HistoryDatabase._WHERE_VISIT_ID_EQUALS_PREDICATE};"
|
|
235
|
+
cur = self._conn.cursor()
|
|
236
|
+
cur.execute(query, (parent_id,))
|
|
237
|
+
row = cur.fetchone()
|
|
238
|
+
cur.close()
|
|
239
|
+
if row:
|
|
240
|
+
return self._row_to_record(row)
|
|
241
|
+
|
|
242
|
+
def get_children_of(self, record: HistoryRecord) -> col_abc.Iterable[HistoryRecord]:
|
|
243
|
+
query = HistoryDatabase._HISTORY_QUERY
|
|
244
|
+
predicate = HistoryDatabase._WHERE_PARENT_ID_EQUALS_PREDICATE
|
|
245
|
+
query += f" WHERE {predicate};"
|
|
246
|
+
cur = self._conn.cursor()
|
|
247
|
+
cur.execute(query, (record.rec_id,))
|
|
248
|
+
for row in cur:
|
|
249
|
+
yield self._row_to_record(row)
|
|
250
|
+
|
|
251
|
+
cur.close()
|
|
252
|
+
|
|
253
|
+
def get_record_with_id(self, visit_id: int) -> typing.Optional[HistoryRecord]:
|
|
254
|
+
query = HistoryDatabase._HISTORY_QUERY
|
|
255
|
+
query += f" WHERE {HistoryDatabase._WHERE_VISIT_ID_EQUALS_PREDICATE};"
|
|
256
|
+
cur = self._conn.cursor()
|
|
257
|
+
cur.execute(query, (visit_id,))
|
|
258
|
+
row = cur.fetchone()
|
|
259
|
+
cur.close()
|
|
260
|
+
if row:
|
|
261
|
+
return self._row_to_record(row)
|
|
262
|
+
|
|
263
|
+
def iter_history_records(
|
|
264
|
+
self, url: typing.Optional[KeySearch], *,
|
|
265
|
+
earliest: typing.Optional[datetime.datetime]=None, latest: typing.Optional[datetime.datetime]=None
|
|
266
|
+
) -> col_abc.Iterable[HistoryRecord]:
|
|
267
|
+
|
|
268
|
+
predicates = []
|
|
269
|
+
parameters = []
|
|
270
|
+
|
|
271
|
+
if url is None:
|
|
272
|
+
pass # no predicate
|
|
273
|
+
elif isinstance(url, str):
|
|
274
|
+
predicates.append(HistoryDatabase._WHERE_URL_EQUALS_PREDICATE)
|
|
275
|
+
parameters.append(url)
|
|
276
|
+
elif isinstance(url, re.Pattern):
|
|
277
|
+
predicates.append(HistoryDatabase._WHERE_URL_REGEX_PREDICATE)
|
|
278
|
+
parameters.append(url.pattern)
|
|
279
|
+
elif isinstance(url, col_abc.Collection):
|
|
280
|
+
predicates.append(
|
|
281
|
+
HistoryDatabase._WHERE_URL_IN_PREDICATE.format(
|
|
282
|
+
parameter_question_marks=",".join("?" for _ in range(len(url)))))
|
|
283
|
+
parameters.extend(url)
|
|
284
|
+
elif isinstance(url, col_abc.Callable):
|
|
285
|
+
pass # we have to call this function across every
|
|
286
|
+
else:
|
|
287
|
+
raise TypeError(f"Unexpected type: {type(url)} (expects: {KeySearch})")
|
|
288
|
+
|
|
289
|
+
if earliest is not None:
|
|
290
|
+
predicates.append(HistoryDatabase._WHERE_VISIT_TIME_EARLIEST_PREDICATE)
|
|
291
|
+
parameters.append(encode_chromium_time(earliest))
|
|
292
|
+
|
|
293
|
+
if latest is not None:
|
|
294
|
+
predicates.append(HistoryDatabase._WHERE_VISIT_TIME_LATEST_PREDICATE)
|
|
295
|
+
parameters.append(encode_chromium_time(latest))
|
|
296
|
+
|
|
297
|
+
query = HistoryDatabase._HISTORY_QUERY
|
|
298
|
+
if predicates:
|
|
299
|
+
query += f" WHERE {' AND '.join(predicates)}"
|
|
300
|
+
|
|
301
|
+
query += ";"
|
|
302
|
+
cur = self._conn.cursor()
|
|
303
|
+
for row in cur.execute(query, parameters):
|
|
304
|
+
if not isinstance(url, col_abc.Callable) or url(row["url"]):
|
|
305
|
+
yield self._row_to_record(row)
|
|
306
|
+
|
|
307
|
+
cur.close()
|
|
308
|
+
|
|
309
|
+
def iter_downloads(
|
|
310
|
+
self,
|
|
311
|
+
download_url: typing.Optional[KeySearch]=None,
|
|
312
|
+
tab_url: typing.Optional[KeySearch]=None) -> col_abc.Iterable[Download]:
|
|
313
|
+
downloads_cur = self._conn.cursor()
|
|
314
|
+
chain_cur = self._conn.cursor()
|
|
315
|
+
|
|
316
|
+
downloads_cur.execute(HistoryDatabase._DOWNLOADS_QUERY)
|
|
317
|
+
|
|
318
|
+
for download in downloads_cur:
|
|
319
|
+
chain_cur.execute(HistoryDatabase._DOWNLOADS_URL_CHAINS_QUEREY, (download["id"],))
|
|
320
|
+
chain = tuple(x["url"] for x in chain_cur)
|
|
321
|
+
|
|
322
|
+
if download_url is not None and not any(is_keysearch_hit(download_url, x) for x in chain):
|
|
323
|
+
continue
|
|
324
|
+
|
|
325
|
+
if (tab_url is not None and
|
|
326
|
+
not is_keysearch_hit(tab_url, download["tab_url"]) and
|
|
327
|
+
not is_keysearch_hit(tab_url, download["tab_referrer_url"])):
|
|
328
|
+
continue
|
|
329
|
+
|
|
330
|
+
yield Download(
|
|
331
|
+
DownloadSource.history_db,
|
|
332
|
+
download["id"],
|
|
333
|
+
download["guid"],
|
|
334
|
+
download["hash"].hex(),
|
|
335
|
+
chain,
|
|
336
|
+
download["tab_url"],
|
|
337
|
+
download["tab_referrer_url"],
|
|
338
|
+
download["target_path"],
|
|
339
|
+
download["mime_type"],
|
|
340
|
+
download["original_mime_type"],
|
|
341
|
+
download["total_bytes"],
|
|
342
|
+
parse_chromium_time(download["start_time"]),
|
|
343
|
+
parse_chromium_time(download["end_time"])
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
downloads_cur.close()
|
|
347
|
+
chain_cur.close()
|
|
348
|
+
|
|
349
|
+
def close(self):
|
|
350
|
+
self._conn.close()
|
|
351
|
+
|
|
352
|
+
def __enter__(self):
|
|
353
|
+
return self
|
|
354
|
+
|
|
355
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
356
|
+
self.close()
|
|
357
|
+
|