linear-mcp-fast 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ccl_chromium_reader/__init__.py +2 -0
- ccl_chromium_reader/ccl_chromium_cache.py +1335 -0
- ccl_chromium_reader/ccl_chromium_filesystem.py +302 -0
- ccl_chromium_reader/ccl_chromium_history.py +357 -0
- ccl_chromium_reader/ccl_chromium_indexeddb.py +1060 -0
- ccl_chromium_reader/ccl_chromium_localstorage.py +454 -0
- ccl_chromium_reader/ccl_chromium_notifications.py +268 -0
- ccl_chromium_reader/ccl_chromium_profile_folder.py +568 -0
- ccl_chromium_reader/ccl_chromium_sessionstorage.py +368 -0
- ccl_chromium_reader/ccl_chromium_snss2.py +332 -0
- ccl_chromium_reader/ccl_shared_proto_db_downloads.py +189 -0
- ccl_chromium_reader/common.py +19 -0
- ccl_chromium_reader/download_common.py +78 -0
- ccl_chromium_reader/profile_folder_protocols.py +276 -0
- ccl_chromium_reader/serialization_formats/__init__.py +0 -0
- ccl_chromium_reader/serialization_formats/ccl_blink_value_deserializer.py +401 -0
- ccl_chromium_reader/serialization_formats/ccl_easy_chromium_pickle.py +133 -0
- ccl_chromium_reader/serialization_formats/ccl_protobuff.py +276 -0
- ccl_chromium_reader/serialization_formats/ccl_v8_value_deserializer.py +627 -0
- ccl_chromium_reader/storage_formats/__init__.py +0 -0
- ccl_chromium_reader/storage_formats/ccl_leveldb.py +582 -0
- ccl_simplesnappy/__init__.py +1 -0
- ccl_simplesnappy/ccl_simplesnappy.py +306 -0
- linear_mcp_fast/__init__.py +8 -0
- linear_mcp_fast/__main__.py +6 -0
- linear_mcp_fast/reader.py +433 -0
- linear_mcp_fast/server.py +367 -0
- linear_mcp_fast/store_detector.py +117 -0
- linear_mcp_fast-0.1.0.dist-info/METADATA +160 -0
- linear_mcp_fast-0.1.0.dist-info/RECORD +39 -0
- linear_mcp_fast-0.1.0.dist-info/WHEEL +5 -0
- linear_mcp_fast-0.1.0.dist-info/entry_points.txt +2 -0
- linear_mcp_fast-0.1.0.dist-info/top_level.txt +4 -0
- tools_and_utilities/Chromium_dump_local_storage.py +111 -0
- tools_and_utilities/Chromium_dump_session_storage.py +92 -0
- tools_and_utilities/benchmark.py +35 -0
- tools_and_utilities/ccl_chrome_audit.py +651 -0
- tools_and_utilities/dump_indexeddb_details.py +59 -0
- tools_and_utilities/dump_leveldb.py +53 -0
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2021, CCL Forensics
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
4
|
+
this software and associated documentation files (the "Software"), to deal in
|
|
5
|
+
the Software without restriction, including without limitation the rights to
|
|
6
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
7
|
+
of the Software, and to permit persons to whom the Software is furnished to do
|
|
8
|
+
so, subject to the following conditions:
|
|
9
|
+
|
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
|
11
|
+
copies or substantial portions of the Software.
|
|
12
|
+
|
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
19
|
+
SOFTWARE.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import sys
|
|
23
|
+
import pathlib
|
|
24
|
+
import typing
|
|
25
|
+
import dataclasses
|
|
26
|
+
import re
|
|
27
|
+
import collections.abc as col_abc
|
|
28
|
+
from types import MappingProxyType
|
|
29
|
+
|
|
30
|
+
from .storage_formats import ccl_leveldb
|
|
31
|
+
from .common import KeySearch
|
|
32
|
+
|
|
33
|
+
__version__ = "0.6"
|
|
34
|
+
__description__ = "Module for reading the Chromium leveldb sessionstorage format"
|
|
35
|
+
__contact__ = "Alex Caithness"
|
|
36
|
+
|
|
37
|
+
# See: https://source.chromium.org/chromium/chromium/src/+/main:components/services/storage/dom_storage/session_storage_metadata.cc
|
|
38
|
+
# et al
|
|
39
|
+
|
|
40
|
+
_NAMESPACE_PREFIX = b"namespace-"
|
|
41
|
+
_MAP_ID_PREFIX = b"map-"
|
|
42
|
+
|
|
43
|
+
log = None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclasses.dataclass(frozen=True)
|
|
47
|
+
class SessionStoreValue:
|
|
48
|
+
host: typing.Optional[str]
|
|
49
|
+
key: str
|
|
50
|
+
value: str
|
|
51
|
+
# guid: typing.Optional[str]
|
|
52
|
+
leveldb_sequence_number: int
|
|
53
|
+
is_deleted: bool = False
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def record_location(self) -> str:
|
|
57
|
+
return f"Leveldb Seq: {self.leveldb_sequence_number}"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class SessionStoreDb:
|
|
61
|
+
# todo: get all grouped by namespace by host?
|
|
62
|
+
# todo: get all grouped by namespace by host.key?
|
|
63
|
+
# todo: consider refactoring to only getting metadata on first pass and everything else on demand?
|
|
64
|
+
def __init__(self, in_dir: pathlib.Path):
|
|
65
|
+
if not in_dir.is_dir():
|
|
66
|
+
raise IOError("Input directory is not a directory")
|
|
67
|
+
|
|
68
|
+
self._ldb = ccl_leveldb.RawLevelDb(in_dir)
|
|
69
|
+
|
|
70
|
+
# If performance is a concern we should refactor this, but slow and steady for now
|
|
71
|
+
|
|
72
|
+
# First collect the namespace (session/tab guid + host) and map-ids together
|
|
73
|
+
self._map_id_to_host = {} # map_id: host
|
|
74
|
+
self._deleted_keys = set()
|
|
75
|
+
|
|
76
|
+
for rec in self._ldb.iterate_records_raw():
|
|
77
|
+
if rec.user_key.startswith(_NAMESPACE_PREFIX):
|
|
78
|
+
if rec.user_key == _NAMESPACE_PREFIX:
|
|
79
|
+
continue # bogus entry near the top usually
|
|
80
|
+
try:
|
|
81
|
+
key = rec.user_key.decode("utf-8")
|
|
82
|
+
except UnicodeDecodeError:
|
|
83
|
+
print(f"Invalid namespace key: {rec.user_key}")
|
|
84
|
+
continue
|
|
85
|
+
|
|
86
|
+
split_key = key.split("-", 2)
|
|
87
|
+
if len(split_key) != 3:
|
|
88
|
+
print(f"Invalid namespace key: {key}")
|
|
89
|
+
continue
|
|
90
|
+
|
|
91
|
+
_, guid, host = split_key
|
|
92
|
+
|
|
93
|
+
if not host:
|
|
94
|
+
continue # TODO investigate why this happens
|
|
95
|
+
|
|
96
|
+
# normalize host to lower just in case
|
|
97
|
+
host = host.lower()
|
|
98
|
+
guid_host_pair = guid, host
|
|
99
|
+
|
|
100
|
+
if rec.state == ccl_leveldb.KeyState.Deleted:
|
|
101
|
+
self._deleted_keys.add(guid_host_pair)
|
|
102
|
+
else:
|
|
103
|
+
try:
|
|
104
|
+
map_id = rec.value.decode("utf-8")
|
|
105
|
+
except UnicodeDecodeError:
|
|
106
|
+
print(f"Invalid namespace value: {key}")
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
if not map_id:
|
|
110
|
+
continue # TODO: investigate why this happens/do we want to keep the host around somewhere?
|
|
111
|
+
|
|
112
|
+
#if map_id in self._map_id_to_host_guid and self._map_id_to_host_guid[map_id] != guid_host_pair:
|
|
113
|
+
if map_id in self._map_id_to_host and self._map_id_to_host[map_id] != host:
|
|
114
|
+
print("Map ID Collision!")
|
|
115
|
+
print(f"map_id: {map_id}")
|
|
116
|
+
print(f"Old host: {self._map_id_to_host[map_id]}")
|
|
117
|
+
print(f"New host: {guid_host_pair}")
|
|
118
|
+
raise ValueError("map_id collision")
|
|
119
|
+
else:
|
|
120
|
+
self._map_id_to_host[map_id] = host
|
|
121
|
+
|
|
122
|
+
# freeze stuff
|
|
123
|
+
self._map_id_to_host = MappingProxyType(self._map_id_to_host)
|
|
124
|
+
|
|
125
|
+
self._deleted_keys = frozenset(self._deleted_keys)
|
|
126
|
+
self._deleted_keys_lookup: dict[str, tuple] = {}
|
|
127
|
+
|
|
128
|
+
self._host_lookup = {} # {host: {ss_key: [SessionStoreValue, ...]}}
|
|
129
|
+
self._orphans = [] # list of tuples of key, value where we can't get the host
|
|
130
|
+
for rec in self._ldb.iterate_records_raw():
|
|
131
|
+
if rec.user_key.startswith(_MAP_ID_PREFIX):
|
|
132
|
+
try:
|
|
133
|
+
key = rec.user_key.decode("utf-8")
|
|
134
|
+
except UnicodeDecodeError:
|
|
135
|
+
print(f"Invalid map id key: {rec.user_key}")
|
|
136
|
+
continue
|
|
137
|
+
|
|
138
|
+
# if rec.state == ccl_leveldb.KeyState.Deleted:
|
|
139
|
+
# continue # TODO: do we want to keep the key around because the presence is important?
|
|
140
|
+
|
|
141
|
+
split_key = key.split("-", 2)
|
|
142
|
+
if len(split_key) != 3:
|
|
143
|
+
print(f"Invalid map id key: {key}")
|
|
144
|
+
continue
|
|
145
|
+
|
|
146
|
+
_, map_id, ss_key = split_key
|
|
147
|
+
|
|
148
|
+
if not split_key:
|
|
149
|
+
# TODO what does it mean when there is no key here?
|
|
150
|
+
# The value will also be a single number (encoded utf-8)
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
value = rec.value.decode("UTF-16-LE") if rec.state == ccl_leveldb.KeyState.Live else None
|
|
155
|
+
except UnicodeDecodeError:
|
|
156
|
+
print(f"Error decoding value for {key}")
|
|
157
|
+
print(f"Raw Value: {rec.value}")
|
|
158
|
+
continue
|
|
159
|
+
|
|
160
|
+
host = self._map_id_to_host.get(map_id)
|
|
161
|
+
if not host:
|
|
162
|
+
self._orphans.append(
|
|
163
|
+
(ss_key,
|
|
164
|
+
SessionStoreValue(None, ss_key, value, rec.seq, rec.state == ccl_leveldb.KeyState.Deleted)
|
|
165
|
+
))
|
|
166
|
+
else:
|
|
167
|
+
self._host_lookup.setdefault(host, {})
|
|
168
|
+
self._host_lookup[host].setdefault(ss_key, [])
|
|
169
|
+
self._host_lookup[host][ss_key].append(
|
|
170
|
+
SessionStoreValue(host, ss_key, value, rec.seq, rec.state == ccl_leveldb.KeyState.Deleted))
|
|
171
|
+
|
|
172
|
+
def __contains__(self, item: typing.Union[str, typing.Tuple[str, str]]) -> bool:
|
|
173
|
+
"""
|
|
174
|
+
:param item: either the host as a str or a tuple of the host and a key (both str)
|
|
175
|
+
:return: if item is a str, returns true if that host is present, if item is a tuple of (str, str), returns True
|
|
176
|
+
if that host and key pair are present
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
if isinstance(item, str):
|
|
180
|
+
return item in self._host_lookup
|
|
181
|
+
elif isinstance(item, tuple) and len(item) == 2:
|
|
182
|
+
host, key = item
|
|
183
|
+
return host in self._host_lookup and key in self._host_lookup[host]
|
|
184
|
+
else:
|
|
185
|
+
raise TypeError("item must be a string or a tuple of (str, str)")
|
|
186
|
+
|
|
187
|
+
def iter_hosts(self) -> typing.Iterable[str]:
|
|
188
|
+
"""
|
|
189
|
+
:return: yields the hosts present in this SessionStorage
|
|
190
|
+
"""
|
|
191
|
+
yield from self._host_lookup.keys()
|
|
192
|
+
|
|
193
|
+
def get_all_for_host(self, host: str) -> dict[str, tuple[SessionStoreValue, ...]]:
|
|
194
|
+
"""
|
|
195
|
+
DEPRECATED
|
|
196
|
+
:param host: the host (domain name) for the session storage
|
|
197
|
+
:return: a dictionary where the keys are storage keys and the values are tuples of SessionStoreValue objects
|
|
198
|
+
for that key. Multiple values may be returned as deleted or old values may be recovered.
|
|
199
|
+
"""
|
|
200
|
+
if host not in self:
|
|
201
|
+
return {}
|
|
202
|
+
result_raw = dict(self._host_lookup[host])
|
|
203
|
+
for ss_key in result_raw:
|
|
204
|
+
result_raw[ss_key] = tuple(result_raw[ss_key])
|
|
205
|
+
return result_raw
|
|
206
|
+
|
|
207
|
+
def _search_host(self, host: KeySearch) -> list[str]:
|
|
208
|
+
if isinstance(host, str):
|
|
209
|
+
return [host]
|
|
210
|
+
elif isinstance(host, re.Pattern):
|
|
211
|
+
return [x for x in self._host_lookup if host.search(x)]
|
|
212
|
+
elif isinstance(host, col_abc.Collection):
|
|
213
|
+
return list(set(host) & self._host_lookup.keys())
|
|
214
|
+
elif isinstance(host, col_abc.Callable):
|
|
215
|
+
return [x for x in self._host_lookup if host(x)]
|
|
216
|
+
else:
|
|
217
|
+
raise TypeError(f"Unexpected type: {type(host)} (expects: {KeySearch})")
|
|
218
|
+
|
|
219
|
+
def iter_records_for_host(
|
|
220
|
+
self, host: KeySearch, *,
|
|
221
|
+
include_deletions=False, raise_on_no_result=True) -> col_abc.Iterable[SessionStoreValue]:
|
|
222
|
+
"""
|
|
223
|
+
:param host: storage key (host) for the records. This can be one of: a single string;
|
|
224
|
+
a collection of strings; a regex pattern; a function that takes a string (each host) and returns a bool.
|
|
225
|
+
:param include_deletions: if True, records related to deletions will be included
|
|
226
|
+
:param raise_on_no_result: if True (the default) if no matching storage keys are found, raise a KeyError
|
|
227
|
+
(these will have None as values).
|
|
228
|
+
:return: iterable of SessionStoreValue
|
|
229
|
+
"""
|
|
230
|
+
if isinstance(host, str):
|
|
231
|
+
if raise_on_no_result and host not in self._host_lookup:
|
|
232
|
+
raise KeyError(host)
|
|
233
|
+
for records in self._host_lookup[host].values():
|
|
234
|
+
for rec in records:
|
|
235
|
+
if include_deletions or not rec.is_deleted:
|
|
236
|
+
yield rec
|
|
237
|
+
elif isinstance(host, re.Pattern) or isinstance(host, col_abc.Collection) or isinstance(host, col_abc.Callable):
|
|
238
|
+
found_hosts = self._search_host(host)
|
|
239
|
+
if raise_on_no_result and not found_hosts:
|
|
240
|
+
raise KeyError(host)
|
|
241
|
+
for found_host in found_hosts:
|
|
242
|
+
for records in self._host_lookup[found_host].values():
|
|
243
|
+
for rec in records:
|
|
244
|
+
if include_deletions or not rec.is_deleted:
|
|
245
|
+
yield rec
|
|
246
|
+
else:
|
|
247
|
+
raise TypeError(f"Unexpected type for host: {type(host)} (expects: {KeySearch})")
|
|
248
|
+
|
|
249
|
+
def iter_all_records(self, *, include_deletions=False, include_orphans=False):
|
|
250
|
+
"""
|
|
251
|
+
Returns all records recovered from session storage
|
|
252
|
+
:param include_deletions: if True, records related to deletions will be included
|
|
253
|
+
:param include_orphans: if True, records which cannot be associated with a host will be included
|
|
254
|
+
"""
|
|
255
|
+
for host in self.iter_hosts():
|
|
256
|
+
yield from self.iter_records_for_host(host, include_deletions=include_deletions)
|
|
257
|
+
if include_orphans:
|
|
258
|
+
yield from (x[1] for x in self.iter_orphans())
|
|
259
|
+
|
|
260
|
+
def get_session_storage_key(self, host: str, key: str) -> tuple[SessionStoreValue, ...]:
|
|
261
|
+
"""
|
|
262
|
+
DEPRECATED
|
|
263
|
+
:param host: the host (domain name) for the session storage
|
|
264
|
+
:param key: the storage key
|
|
265
|
+
:return: a tuple of SessionStoreValue matching the host and key. Multiple values may be returned as deleted or
|
|
266
|
+
old values may be recovered.
|
|
267
|
+
"""
|
|
268
|
+
if (host, key) not in self:
|
|
269
|
+
return tuple()
|
|
270
|
+
return tuple(self._host_lookup[host][key])
|
|
271
|
+
|
|
272
|
+
def iter_records_for_session_storage_key(
|
|
273
|
+
self, host: KeySearch, key: KeySearch, *,
|
|
274
|
+
include_deletions=False, raise_on_no_result=True) -> col_abc.Iterable[SessionStoreValue]:
|
|
275
|
+
"""
|
|
276
|
+
:param host: storage key (host) for the records. This can be one of: a single string;
|
|
277
|
+
a collection of strings; a regex pattern; a function that takes a string (each host) and returns a bool.
|
|
278
|
+
:param key: script defined key for the records. This can be one of: a single string;
|
|
279
|
+
a collection of strings; a regex pattern; a function that takes a string and returns a bool.
|
|
280
|
+
:param include_deletions: if True, records related to deletions will be included
|
|
281
|
+
:param raise_on_no_result: if True (the default) if no matching storage keys are found, raise a KeyError
|
|
282
|
+
(these will have None as values).
|
|
283
|
+
:return: iterable of LocalStorageRecords
|
|
284
|
+
"""
|
|
285
|
+
if isinstance(host, str) and isinstance(key, str):
|
|
286
|
+
if host not in self._host_lookup or key not in self._host_lookup[host]:
|
|
287
|
+
if raise_on_no_result:
|
|
288
|
+
raise KeyError((host, key))
|
|
289
|
+
else:
|
|
290
|
+
return []
|
|
291
|
+
|
|
292
|
+
yield from (r for r in self._host_lookup[host][key] if include_deletions or not r.is_deleted)
|
|
293
|
+
|
|
294
|
+
else:
|
|
295
|
+
found_hosts = self._search_host(host)
|
|
296
|
+
if raise_on_no_result and not found_hosts:
|
|
297
|
+
raise KeyError((host, key))
|
|
298
|
+
|
|
299
|
+
yielded = False
|
|
300
|
+
for found_host in found_hosts:
|
|
301
|
+
if isinstance(key, str):
|
|
302
|
+
matched_keys = [key]
|
|
303
|
+
elif isinstance(key, re.Pattern):
|
|
304
|
+
matched_keys = [x for x in self._host_lookup[found_host].keys() if key.search(x)]
|
|
305
|
+
elif isinstance(key, col_abc.Collection):
|
|
306
|
+
script_key_set = set(key)
|
|
307
|
+
matched_keys = list(self._host_lookup[found_host].keys() & script_key_set)
|
|
308
|
+
elif isinstance(key, col_abc.Callable):
|
|
309
|
+
matched_keys = [x for x in self._host_lookup[found_host].keys() if key(x)]
|
|
310
|
+
else:
|
|
311
|
+
raise TypeError(f"Unexpected type for script key: {type(key)} (expects: {KeySearch})")
|
|
312
|
+
|
|
313
|
+
for matched_key in matched_keys:
|
|
314
|
+
for rec in self._host_lookup[found_host][matched_key]:
|
|
315
|
+
if include_deletions or not rec.is_deleted:
|
|
316
|
+
yielded = True
|
|
317
|
+
yield rec
|
|
318
|
+
|
|
319
|
+
if not yielded and raise_on_no_result:
|
|
320
|
+
raise KeyError((host, key))
|
|
321
|
+
|
|
322
|
+
def iter_orphans(self) -> typing.Iterable[tuple[str, SessionStoreValue]]:
|
|
323
|
+
"""
|
|
324
|
+
Returns records which have been orphaned from their host (domain name) where it cannot be recovered. The keys
|
|
325
|
+
may be named uniquely enough that the host may be inferred.
|
|
326
|
+
:return: yields tuples of (session key, SessionStoreValue)
|
|
327
|
+
"""
|
|
328
|
+
yield from self._orphans
|
|
329
|
+
|
|
330
|
+
def __getitem__(self, item: typing.Union[str, typing.Tuple[str, str]]) -> typing.Union[
|
|
331
|
+
dict[str, tuple[SessionStoreValue, ...]], tuple[SessionStoreValue, ...]]:
|
|
332
|
+
if item not in self:
|
|
333
|
+
raise KeyError(item)
|
|
334
|
+
|
|
335
|
+
if isinstance(item, str):
|
|
336
|
+
return self.get_all_for_host(item)
|
|
337
|
+
elif isinstance(item, tuple) and len(item) == 2:
|
|
338
|
+
return self.get_session_storage_key(*item)
|
|
339
|
+
else:
|
|
340
|
+
raise TypeError("item must be a string or a tuple of (str, str)")
|
|
341
|
+
|
|
342
|
+
def __iter__(self) -> typing.Iterable[str]:
|
|
343
|
+
"""
|
|
344
|
+
iterates the hosts present
|
|
345
|
+
"""
|
|
346
|
+
return self.iter_hosts()
|
|
347
|
+
|
|
348
|
+
def close(self):
|
|
349
|
+
self._ldb.close()
|
|
350
|
+
|
|
351
|
+
def __enter__(self) -> "SessionStoreDb":
|
|
352
|
+
return self
|
|
353
|
+
|
|
354
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
355
|
+
self.close()
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def main(args):
|
|
359
|
+
ldb_in_dir = pathlib.Path(args[0])
|
|
360
|
+
ssdb = SessionStoreDb(ldb_in_dir)
|
|
361
|
+
|
|
362
|
+
print("Hosts in db:")
|
|
363
|
+
for host in ssdb:
|
|
364
|
+
print(host)
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
if __name__ == '__main__':
|
|
368
|
+
main(sys.argv[1:])
|
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2022, CCL Forensics
|
|
3
|
+
|
|
4
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
5
|
+
this software and associated documentation files (the "Software"), to deal in
|
|
6
|
+
the Software without restriction, including without limitation the rights to
|
|
7
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
8
|
+
of the Software, and to permit persons to whom the Software is furnished to do
|
|
9
|
+
so, subject to the following conditions:
|
|
10
|
+
|
|
11
|
+
The above copyright notice and this permission notice shall be included in all
|
|
12
|
+
copies or substantial portions of the Software.
|
|
13
|
+
|
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
15
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
16
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
17
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
18
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
19
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
20
|
+
SOFTWARE.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import dataclasses
|
|
24
|
+
import enum
|
|
25
|
+
import struct
|
|
26
|
+
import sys
|
|
27
|
+
import os
|
|
28
|
+
import pathlib
|
|
29
|
+
import datetime
|
|
30
|
+
import types
|
|
31
|
+
import typing
|
|
32
|
+
from .serialization_formats.ccl_easy_chromium_pickle import EasyPickleIterator, EasyPickleException
|
|
33
|
+
|
|
34
|
+
__version__ = "0.2"
|
|
35
|
+
__description__ = "Module for reading Chromium SNSS files"
|
|
36
|
+
__contact__ = "Alex Caithness"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class TabRestoreIdType(enum.Enum):
|
|
40
|
+
# components/sessions/core/tab_restore_service_impl.cc
|
|
41
|
+
CommandUpdateTabNavigation = 1
|
|
42
|
+
CommandRestoredEntry = 2
|
|
43
|
+
CommandWindowDeprecated = 3
|
|
44
|
+
CommandSelectedNavigationInTab = 4
|
|
45
|
+
CommandPinnedState = 5
|
|
46
|
+
CommandSetExtensionAppID = 6
|
|
47
|
+
CommandSetWindowAppName = 7
|
|
48
|
+
CommandSetTabUserAgentOverride = 8
|
|
49
|
+
CommandWindow = 9
|
|
50
|
+
CommandSetTabGroupData = 10
|
|
51
|
+
CommandSetTabUserAgentOverride2 = 11
|
|
52
|
+
CommandSetWindowUserTitle = 12
|
|
53
|
+
CommandCreateGroup = 13
|
|
54
|
+
CommandAddTabExtraData = 14
|
|
55
|
+
|
|
56
|
+
UnusedCommand = 255
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class SessionRestoreIdType(enum.Enum):
|
|
60
|
+
# components/sessions/core/session_service_commands.cc
|
|
61
|
+
CommandSetTabWindow = 0
|
|
62
|
+
CommandSetWindowBounds = 1 # // OBSOLETE Superseded by kCommandSetWindowBounds3.
|
|
63
|
+
CommandSetTabIndexInWindow = 2
|
|
64
|
+
CommandTabNavigationPathPrunedFromBack = 5 # // OBSOLETE: Superseded by kCommandTabNavigationPathPruned instead
|
|
65
|
+
CommandUpdateTabNavigation = 6
|
|
66
|
+
CommandSetSelectedNavigationIndex = 7
|
|
67
|
+
CommandSetSelectedTabInIndex = 8
|
|
68
|
+
CommandSetWindowType = 9
|
|
69
|
+
CommandSetWindowBounds2 = 10 # // OBSOLETE Superseded by kCommandSetWindowBounds3. Except for data migration.
|
|
70
|
+
CommandTabNavigationPathPrunedFromFront = 11 # // Superseded kCommandTabNavigationPathPruned instead
|
|
71
|
+
CommandSetPinnedState = 12
|
|
72
|
+
CommandSetExtensionAppID = 13
|
|
73
|
+
CommandSetWindowBounds3 = 14
|
|
74
|
+
CommandSetWindowAppName = 15
|
|
75
|
+
CommandTabClosed = 16
|
|
76
|
+
CommandWindowClosed = 17
|
|
77
|
+
CommandSetTabUserAgentOverride = 18 # // OBSOLETE: Superseded by kCommandSetTabUserAgentOverride2.
|
|
78
|
+
CommandSessionStorageAssociated = 19
|
|
79
|
+
CommandSetActiveWindow = 20
|
|
80
|
+
CommandLastActiveTime = 21
|
|
81
|
+
CommandSetWindowWorkspace = 22 # // OBSOLETE Superseded by kCommandSetWindowWorkspace2.
|
|
82
|
+
CommandSetWindowWorkspace2 = 23
|
|
83
|
+
CommandTabNavigationPathPruned = 24
|
|
84
|
+
CommandSetTabGroup = 25
|
|
85
|
+
CommandSetTabGroupMetadata = 26 # // OBSOLETE Superseded by kCommandSetTabGroupMetadata2.
|
|
86
|
+
CommandSetTabGroupMetadata2 = 27
|
|
87
|
+
CommandSetTabGuid = 28
|
|
88
|
+
CommandSetTabUserAgentOverride2 = 29
|
|
89
|
+
CommandSetTabData = 30
|
|
90
|
+
CommandSetWindowUserTitle = 31
|
|
91
|
+
CommandSetWindowVisibleOnAllWorkspaces = 32
|
|
92
|
+
CommandAddTabExtraData = 33
|
|
93
|
+
CommandAddWindowExtraData = 34
|
|
94
|
+
|
|
95
|
+
# Edge has custom command types. These are what I have seen so far.
|
|
96
|
+
# None of these types appear to be related to browsing data at the moment (typically only a few bytes long).
|
|
97
|
+
EdgeCommandUnknown131 = 131
|
|
98
|
+
EdgeCommandUnknown132 = 132
|
|
99
|
+
|
|
100
|
+
UnusedCommand = 255
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class PageTransition:
|
|
104
|
+
# ui/base/page_transition_types.h
|
|
105
|
+
_core_mask = 0xff
|
|
106
|
+
_qualifier_mask = 0xffffff00
|
|
107
|
+
_core_transitions = {
|
|
108
|
+
0: "Link",
|
|
109
|
+
1: "Typed",
|
|
110
|
+
2: "AutoBookmark",
|
|
111
|
+
3: "AutoSubframe",
|
|
112
|
+
4: "ManualSubframe",
|
|
113
|
+
5: "Generated",
|
|
114
|
+
6: "AutoToplevel",
|
|
115
|
+
7: "FormSubmit",
|
|
116
|
+
8: "Reload",
|
|
117
|
+
9: "Keyword",
|
|
118
|
+
10: "KeywordGenerated"
|
|
119
|
+
}
|
|
120
|
+
_qualifiers = {
|
|
121
|
+
0x00800000: "Blocked",
|
|
122
|
+
0x01000000: "ForwardBack",
|
|
123
|
+
0x02000000: "FromAddressBar",
|
|
124
|
+
0x04000000: "HomePage",
|
|
125
|
+
0x08000000: "FromApi",
|
|
126
|
+
0x10000000: "ChainStart",
|
|
127
|
+
0x20000000: "ChainEnd",
|
|
128
|
+
0x40000000: "ClientRedirect",
|
|
129
|
+
0x80000000: "ServerRedirect"
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
def __init__(self, value):
|
|
133
|
+
self._value = value
|
|
134
|
+
if value < 0:
|
|
135
|
+
# signed to unsigned
|
|
136
|
+
value += (0x80000000 * 2)
|
|
137
|
+
self._core_transition = PageTransition._core_transitions[value & PageTransition._core_mask]
|
|
138
|
+
self._qualifiers = []
|
|
139
|
+
for flag in PageTransition._qualifiers:
|
|
140
|
+
if (value & PageTransition._qualifier_mask) & flag > 0:
|
|
141
|
+
self._qualifiers.append(PageTransition._qualifiers[flag])
|
|
142
|
+
|
|
143
|
+
def __str__(self):
|
|
144
|
+
return "; ".join([self._core_transition] + self._qualifiers)
|
|
145
|
+
|
|
146
|
+
def __repr__(self):
|
|
147
|
+
return "<ChromeTransition ({0}): {1})>".format(self._value, str(self))
|
|
148
|
+
|
|
149
|
+
@property
|
|
150
|
+
def core_transition(self) -> str:
|
|
151
|
+
return self._core_transition
|
|
152
|
+
|
|
153
|
+
@property
|
|
154
|
+
def qualifiers(self) -> typing.Iterable[str]:
|
|
155
|
+
yield from self._qualifiers
|
|
156
|
+
|
|
157
|
+
@property
|
|
158
|
+
def value(self):
|
|
159
|
+
return self._value
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class SnssError(Exception):
|
|
163
|
+
...
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
@dataclasses.dataclass(frozen=True)
|
|
167
|
+
class SessionCommand:
|
|
168
|
+
offset: int
|
|
169
|
+
id_type: typing.Union[SessionRestoreIdType, TabRestoreIdType]
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@dataclasses.dataclass(frozen=True)
|
|
173
|
+
class NavigationEntry(SessionCommand):
|
|
174
|
+
# components/sessions/core/serialized_navigation_entry.cc
|
|
175
|
+
index: int
|
|
176
|
+
url: str
|
|
177
|
+
title: str
|
|
178
|
+
page_state_raw: bytes # replace with completed PageState object
|
|
179
|
+
transition_type: PageTransition
|
|
180
|
+
has_post_data: typing.Optional[bool] = None
|
|
181
|
+
referrer_url: typing.Optional[str] = None
|
|
182
|
+
original_request_url: typing.Optional[str] = None
|
|
183
|
+
is_overriding_user_agent: typing.Optional[bool] = None
|
|
184
|
+
timestamp: typing.Optional[datetime.datetime] = None
|
|
185
|
+
http_status: typing.Optional[int] = None
|
|
186
|
+
referrer_policy: typing.Optional[int] = None
|
|
187
|
+
extended_map: typing.Optional[types.MappingProxyType] = None
|
|
188
|
+
task_id: typing.Optional[int] = None
|
|
189
|
+
parent_task_id: typing.Optional[int] = None
|
|
190
|
+
root_task_id: typing.Optional[int] = None
|
|
191
|
+
session_id: typing.Optional[int] = None
|
|
192
|
+
|
|
193
|
+
@classmethod
|
|
194
|
+
def from_pickle(
|
|
195
|
+
cls, pickle, id_type: typing.Union[SessionRestoreIdType, TabRestoreIdType],
|
|
196
|
+
offset: int, session_id: typing.Optional[int]=None) -> "NavigationEntry":
|
|
197
|
+
index = pickle.read_int32()
|
|
198
|
+
url = pickle.read_string()
|
|
199
|
+
title = pickle.read_string16()
|
|
200
|
+
page_state_length = pickle.read_int32()
|
|
201
|
+
# https://source.chromium.org/chromium/chromium/src/+/main:third_party/blink/common/page_state/page_state_serialization.cc;drc=d1e1301c82bef37d30796e2d6098856b851d90a4;l=897
|
|
202
|
+
page_state_raw = pickle.read_aligned(page_state_length)
|
|
203
|
+
transition_type = PageTransition(pickle.read_uint32())
|
|
204
|
+
|
|
205
|
+
try:
|
|
206
|
+
type_mask = pickle.read_uint32()
|
|
207
|
+
except EasyPickleException:
|
|
208
|
+
# very old versions of data end here, so we return a partial object here
|
|
209
|
+
return cls(offset, id_type, index, url, title, page_state_raw, transition_type)
|
|
210
|
+
|
|
211
|
+
has_post_data = (type_mask & 0x01) > 0
|
|
212
|
+
referrer_url = pickle.read_string()
|
|
213
|
+
_ = pickle.read_int32() # referrer policy, not used
|
|
214
|
+
original_request_url = pickle.read_string()
|
|
215
|
+
is_overriding_user_agent = pickle.read_bool()
|
|
216
|
+
timestamp = pickle.read_datetime()
|
|
217
|
+
_ = pickle.read_string16() # search terms, not used
|
|
218
|
+
http_status = pickle.read_int32()
|
|
219
|
+
referrer_policy = pickle.read_int32()
|
|
220
|
+
|
|
221
|
+
extended_map_size = pickle.read_int32()
|
|
222
|
+
extended_map = {}
|
|
223
|
+
for _ in range(extended_map_size):
|
|
224
|
+
key = pickle.read_string()
|
|
225
|
+
value = pickle.read_string()
|
|
226
|
+
extended_map[key] = value
|
|
227
|
+
|
|
228
|
+
extended_map = types.MappingProxyType(extended_map)
|
|
229
|
+
|
|
230
|
+
task_id = None
|
|
231
|
+
parent_task_id = None
|
|
232
|
+
root_task_id = None
|
|
233
|
+
|
|
234
|
+
try:
|
|
235
|
+
# these might not exist in older files, so no big deal if we can't get them
|
|
236
|
+
task_id = pickle.read_int64()
|
|
237
|
+
parent_task_id = pickle.read_int64()
|
|
238
|
+
root_task_id = pickle.read_int64()
|
|
239
|
+
|
|
240
|
+
child_task_id_count = pickle.read_int32()
|
|
241
|
+
if child_task_id_count != 0:
|
|
242
|
+
raise SnssError("Child tasks should not be present when reading NavigationEntry")
|
|
243
|
+
except EasyPickleException:
|
|
244
|
+
pass
|
|
245
|
+
|
|
246
|
+
return cls(
|
|
247
|
+
offset, id_type,
|
|
248
|
+
index, url, title, page_state_raw, transition_type, has_post_data, referrer_url, original_request_url,
|
|
249
|
+
is_overriding_user_agent, timestamp, http_status, referrer_policy, extended_map, task_id, parent_task_id,
|
|
250
|
+
root_task_id, session_id
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
class UnprocessedEntry(SessionCommand):
|
|
255
|
+
...
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
class SnssFileType(enum.Enum):
|
|
259
|
+
Session = 1
|
|
260
|
+
Tab = 2
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
class SnssFile:
|
|
264
|
+
def __init__(self, file_type: SnssFileType, stream: typing.BinaryIO):
|
|
265
|
+
# components/sessions/core/command_storage_backend.cc
|
|
266
|
+
self._f = stream
|
|
267
|
+
if file_type == SnssFileType.Session:
|
|
268
|
+
self._id_type = SessionRestoreIdType
|
|
269
|
+
elif file_type == SnssFileType.Tab:
|
|
270
|
+
self._id_type = TabRestoreIdType
|
|
271
|
+
else:
|
|
272
|
+
raise ValueError("file_type is an unknown SnssFileType or is not SnssFileType")
|
|
273
|
+
|
|
274
|
+
self._file_type = file_type
|
|
275
|
+
header = self._f.read(8)
|
|
276
|
+
if header[0:4] != b"SNSS":
|
|
277
|
+
raise SnssError(f"Invalid magic; expected SNSS; got {header[0:4]}")
|
|
278
|
+
self._version, = struct.unpack("<I", header[4:8])
|
|
279
|
+
if self._version not in (1, 3):
|
|
280
|
+
raise SnssError(f"Expected version 1 or 3, got version {self._version}")
|
|
281
|
+
|
|
282
|
+
@property
|
|
283
|
+
def file_type(self) -> SnssFileType:
|
|
284
|
+
return self._file_type
|
|
285
|
+
|
|
286
|
+
def reset(self):
|
|
287
|
+
self._f.seek(8, os.SEEK_SET)
|
|
288
|
+
|
|
289
|
+
def _get_next_session_command(self) -> typing.Optional[SessionCommand]:
|
|
290
|
+
# components/sessions/core/command_storage_backend.cc
|
|
291
|
+
start_offset = self._f.tell()
|
|
292
|
+
length_raw = self._f.read(2)
|
|
293
|
+
if not length_raw:
|
|
294
|
+
return None # eof
|
|
295
|
+
length, = struct.unpack("<H", length_raw)
|
|
296
|
+
data = self._f.read(length)
|
|
297
|
+
if len(data) != length:
|
|
298
|
+
raise ValueError(f"Could not get enough data reading record starting at {start_offset}")
|
|
299
|
+
record_id_type = self._id_type(data[0])
|
|
300
|
+
|
|
301
|
+
# components/sessions/core/session_service_commands.cc, components/sessions/core/base_session_service_commands.cc
|
|
302
|
+
# components/sessions/core/tab_restore_service_impl.cc
|
|
303
|
+
if record_id_type in (SessionRestoreIdType.CommandUpdateTabNavigation, TabRestoreIdType.CommandUpdateTabNavigation): # 6, 1
|
|
304
|
+
with EasyPickleIterator(data[1:]) as pickle:
|
|
305
|
+
session_id = pickle.read_int32()
|
|
306
|
+
nav = NavigationEntry.from_pickle(pickle, record_id_type, start_offset, session_id)
|
|
307
|
+
return nav
|
|
308
|
+
else:
|
|
309
|
+
return UnprocessedEntry(start_offset, record_id_type)
|
|
310
|
+
|
|
311
|
+
def iter_session_commands(self) -> typing.Iterable[SessionCommand]:
|
|
312
|
+
self.reset()
|
|
313
|
+
while command := self._get_next_session_command():
|
|
314
|
+
yield command
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def main(args):
|
|
318
|
+
in_path = pathlib.Path(args[0])
|
|
319
|
+
if in_path.name.startswith("Session_"):
|
|
320
|
+
file_type = SnssFileType.Session
|
|
321
|
+
elif in_path.name.startswith("Tabs_"):
|
|
322
|
+
file_type = SnssFileType.Tab
|
|
323
|
+
else:
|
|
324
|
+
raise ValueError("File name does not start with Session or Tabs")
|
|
325
|
+
with in_path.open("rb") as f:
|
|
326
|
+
snss_file = SnssFile(file_type, f)
|
|
327
|
+
for command in snss_file.iter_session_commands():
|
|
328
|
+
print(command)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
if __name__ == '__main__':
|
|
332
|
+
main(sys.argv[1:])
|