linear-mcp-fast 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. ccl_chromium_reader/__init__.py +2 -0
  2. ccl_chromium_reader/ccl_chromium_cache.py +1335 -0
  3. ccl_chromium_reader/ccl_chromium_filesystem.py +302 -0
  4. ccl_chromium_reader/ccl_chromium_history.py +357 -0
  5. ccl_chromium_reader/ccl_chromium_indexeddb.py +1060 -0
  6. ccl_chromium_reader/ccl_chromium_localstorage.py +454 -0
  7. ccl_chromium_reader/ccl_chromium_notifications.py +268 -0
  8. ccl_chromium_reader/ccl_chromium_profile_folder.py +568 -0
  9. ccl_chromium_reader/ccl_chromium_sessionstorage.py +368 -0
  10. ccl_chromium_reader/ccl_chromium_snss2.py +332 -0
  11. ccl_chromium_reader/ccl_shared_proto_db_downloads.py +189 -0
  12. ccl_chromium_reader/common.py +19 -0
  13. ccl_chromium_reader/download_common.py +78 -0
  14. ccl_chromium_reader/profile_folder_protocols.py +276 -0
  15. ccl_chromium_reader/serialization_formats/__init__.py +0 -0
  16. ccl_chromium_reader/serialization_formats/ccl_blink_value_deserializer.py +401 -0
  17. ccl_chromium_reader/serialization_formats/ccl_easy_chromium_pickle.py +133 -0
  18. ccl_chromium_reader/serialization_formats/ccl_protobuff.py +276 -0
  19. ccl_chromium_reader/serialization_formats/ccl_v8_value_deserializer.py +627 -0
  20. ccl_chromium_reader/storage_formats/__init__.py +0 -0
  21. ccl_chromium_reader/storage_formats/ccl_leveldb.py +582 -0
  22. ccl_simplesnappy/__init__.py +1 -0
  23. ccl_simplesnappy/ccl_simplesnappy.py +306 -0
  24. linear_mcp_fast/__init__.py +8 -0
  25. linear_mcp_fast/__main__.py +6 -0
  26. linear_mcp_fast/reader.py +433 -0
  27. linear_mcp_fast/server.py +367 -0
  28. linear_mcp_fast/store_detector.py +117 -0
  29. linear_mcp_fast-0.1.0.dist-info/METADATA +160 -0
  30. linear_mcp_fast-0.1.0.dist-info/RECORD +39 -0
  31. linear_mcp_fast-0.1.0.dist-info/WHEEL +5 -0
  32. linear_mcp_fast-0.1.0.dist-info/entry_points.txt +2 -0
  33. linear_mcp_fast-0.1.0.dist-info/top_level.txt +4 -0
  34. tools_and_utilities/Chromium_dump_local_storage.py +111 -0
  35. tools_and_utilities/Chromium_dump_session_storage.py +92 -0
  36. tools_and_utilities/benchmark.py +35 -0
  37. tools_and_utilities/ccl_chrome_audit.py +651 -0
  38. tools_and_utilities/dump_indexeddb_details.py +59 -0
  39. tools_and_utilities/dump_leveldb.py +53 -0
@@ -0,0 +1,368 @@
1
+ """
2
+ Copyright 2021, CCL Forensics
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
4
+ this software and associated documentation files (the "Software"), to deal in
5
+ the Software without restriction, including without limitation the rights to
6
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7
+ of the Software, and to permit persons to whom the Software is furnished to do
8
+ so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
20
+ """
21
+
22
+ import sys
23
+ import pathlib
24
+ import typing
25
+ import dataclasses
26
+ import re
27
+ import collections.abc as col_abc
28
+ from types import MappingProxyType
29
+
30
+ from .storage_formats import ccl_leveldb
31
+ from .common import KeySearch
32
+
33
+ __version__ = "0.6"
34
+ __description__ = "Module for reading the Chromium leveldb sessionstorage format"
35
+ __contact__ = "Alex Caithness"
36
+
37
+ # See: https://source.chromium.org/chromium/chromium/src/+/main:components/services/storage/dom_storage/session_storage_metadata.cc
38
+ # et al
39
+
40
+ _NAMESPACE_PREFIX = b"namespace-"
41
+ _MAP_ID_PREFIX = b"map-"
42
+
43
+ log = None
44
+
45
+
46
+ @dataclasses.dataclass(frozen=True)
47
+ class SessionStoreValue:
48
+ host: typing.Optional[str]
49
+ key: str
50
+ value: str
51
+ # guid: typing.Optional[str]
52
+ leveldb_sequence_number: int
53
+ is_deleted: bool = False
54
+
55
+ @property
56
+ def record_location(self) -> str:
57
+ return f"Leveldb Seq: {self.leveldb_sequence_number}"
58
+
59
+
60
+ class SessionStoreDb:
61
+ # todo: get all grouped by namespace by host?
62
+ # todo: get all grouped by namespace by host.key?
63
+ # todo: consider refactoring to only getting metadata on first pass and everything else on demand?
64
+ def __init__(self, in_dir: pathlib.Path):
65
+ if not in_dir.is_dir():
66
+ raise IOError("Input directory is not a directory")
67
+
68
+ self._ldb = ccl_leveldb.RawLevelDb(in_dir)
69
+
70
+ # If performance is a concern we should refactor this, but slow and steady for now
71
+
72
+ # First collect the namespace (session/tab guid + host) and map-ids together
73
+ self._map_id_to_host = {} # map_id: host
74
+ self._deleted_keys = set()
75
+
76
+ for rec in self._ldb.iterate_records_raw():
77
+ if rec.user_key.startswith(_NAMESPACE_PREFIX):
78
+ if rec.user_key == _NAMESPACE_PREFIX:
79
+ continue # bogus entry near the top usually
80
+ try:
81
+ key = rec.user_key.decode("utf-8")
82
+ except UnicodeDecodeError:
83
+ print(f"Invalid namespace key: {rec.user_key}")
84
+ continue
85
+
86
+ split_key = key.split("-", 2)
87
+ if len(split_key) != 3:
88
+ print(f"Invalid namespace key: {key}")
89
+ continue
90
+
91
+ _, guid, host = split_key
92
+
93
+ if not host:
94
+ continue # TODO investigate why this happens
95
+
96
+ # normalize host to lower just in case
97
+ host = host.lower()
98
+ guid_host_pair = guid, host
99
+
100
+ if rec.state == ccl_leveldb.KeyState.Deleted:
101
+ self._deleted_keys.add(guid_host_pair)
102
+ else:
103
+ try:
104
+ map_id = rec.value.decode("utf-8")
105
+ except UnicodeDecodeError:
106
+ print(f"Invalid namespace value: {key}")
107
+ continue
108
+
109
+ if not map_id:
110
+ continue # TODO: investigate why this happens/do we want to keep the host around somewhere?
111
+
112
+ #if map_id in self._map_id_to_host_guid and self._map_id_to_host_guid[map_id] != guid_host_pair:
113
+ if map_id in self._map_id_to_host and self._map_id_to_host[map_id] != host:
114
+ print("Map ID Collision!")
115
+ print(f"map_id: {map_id}")
116
+ print(f"Old host: {self._map_id_to_host[map_id]}")
117
+ print(f"New host: {guid_host_pair}")
118
+ raise ValueError("map_id collision")
119
+ else:
120
+ self._map_id_to_host[map_id] = host
121
+
122
+ # freeze stuff
123
+ self._map_id_to_host = MappingProxyType(self._map_id_to_host)
124
+
125
+ self._deleted_keys = frozenset(self._deleted_keys)
126
+ self._deleted_keys_lookup: dict[str, tuple] = {}
127
+
128
+ self._host_lookup = {} # {host: {ss_key: [SessionStoreValue, ...]}}
129
+ self._orphans = [] # list of tuples of key, value where we can't get the host
130
+ for rec in self._ldb.iterate_records_raw():
131
+ if rec.user_key.startswith(_MAP_ID_PREFIX):
132
+ try:
133
+ key = rec.user_key.decode("utf-8")
134
+ except UnicodeDecodeError:
135
+ print(f"Invalid map id key: {rec.user_key}")
136
+ continue
137
+
138
+ # if rec.state == ccl_leveldb.KeyState.Deleted:
139
+ # continue # TODO: do we want to keep the key around because the presence is important?
140
+
141
+ split_key = key.split("-", 2)
142
+ if len(split_key) != 3:
143
+ print(f"Invalid map id key: {key}")
144
+ continue
145
+
146
+ _, map_id, ss_key = split_key
147
+
148
+ if not split_key:
149
+ # TODO what does it mean when there is no key here?
150
+ # The value will also be a single number (encoded utf-8)
151
+ continue
152
+
153
+ try:
154
+ value = rec.value.decode("UTF-16-LE") if rec.state == ccl_leveldb.KeyState.Live else None
155
+ except UnicodeDecodeError:
156
+ print(f"Error decoding value for {key}")
157
+ print(f"Raw Value: {rec.value}")
158
+ continue
159
+
160
+ host = self._map_id_to_host.get(map_id)
161
+ if not host:
162
+ self._orphans.append(
163
+ (ss_key,
164
+ SessionStoreValue(None, ss_key, value, rec.seq, rec.state == ccl_leveldb.KeyState.Deleted)
165
+ ))
166
+ else:
167
+ self._host_lookup.setdefault(host, {})
168
+ self._host_lookup[host].setdefault(ss_key, [])
169
+ self._host_lookup[host][ss_key].append(
170
+ SessionStoreValue(host, ss_key, value, rec.seq, rec.state == ccl_leveldb.KeyState.Deleted))
171
+
172
+ def __contains__(self, item: typing.Union[str, typing.Tuple[str, str]]) -> bool:
173
+ """
174
+ :param item: either the host as a str or a tuple of the host and a key (both str)
175
+ :return: if item is a str, returns true if that host is present, if item is a tuple of (str, str), returns True
176
+ if that host and key pair are present
177
+ """
178
+
179
+ if isinstance(item, str):
180
+ return item in self._host_lookup
181
+ elif isinstance(item, tuple) and len(item) == 2:
182
+ host, key = item
183
+ return host in self._host_lookup and key in self._host_lookup[host]
184
+ else:
185
+ raise TypeError("item must be a string or a tuple of (str, str)")
186
+
187
+ def iter_hosts(self) -> typing.Iterable[str]:
188
+ """
189
+ :return: yields the hosts present in this SessionStorage
190
+ """
191
+ yield from self._host_lookup.keys()
192
+
193
+ def get_all_for_host(self, host: str) -> dict[str, tuple[SessionStoreValue, ...]]:
194
+ """
195
+ DEPRECATED
196
+ :param host: the host (domain name) for the session storage
197
+ :return: a dictionary where the keys are storage keys and the values are tuples of SessionStoreValue objects
198
+ for that key. Multiple values may be returned as deleted or old values may be recovered.
199
+ """
200
+ if host not in self:
201
+ return {}
202
+ result_raw = dict(self._host_lookup[host])
203
+ for ss_key in result_raw:
204
+ result_raw[ss_key] = tuple(result_raw[ss_key])
205
+ return result_raw
206
+
207
+ def _search_host(self, host: KeySearch) -> list[str]:
208
+ if isinstance(host, str):
209
+ return [host]
210
+ elif isinstance(host, re.Pattern):
211
+ return [x for x in self._host_lookup if host.search(x)]
212
+ elif isinstance(host, col_abc.Collection):
213
+ return list(set(host) & self._host_lookup.keys())
214
+ elif isinstance(host, col_abc.Callable):
215
+ return [x for x in self._host_lookup if host(x)]
216
+ else:
217
+ raise TypeError(f"Unexpected type: {type(host)} (expects: {KeySearch})")
218
+
219
+ def iter_records_for_host(
220
+ self, host: KeySearch, *,
221
+ include_deletions=False, raise_on_no_result=True) -> col_abc.Iterable[SessionStoreValue]:
222
+ """
223
+ :param host: storage key (host) for the records. This can be one of: a single string;
224
+ a collection of strings; a regex pattern; a function that takes a string (each host) and returns a bool.
225
+ :param include_deletions: if True, records related to deletions will be included
226
+ :param raise_on_no_result: if True (the default) if no matching storage keys are found, raise a KeyError
227
+ (these will have None as values).
228
+ :return: iterable of SessionStoreValue
229
+ """
230
+ if isinstance(host, str):
231
+ if raise_on_no_result and host not in self._host_lookup:
232
+ raise KeyError(host)
233
+ for records in self._host_lookup[host].values():
234
+ for rec in records:
235
+ if include_deletions or not rec.is_deleted:
236
+ yield rec
237
+ elif isinstance(host, re.Pattern) or isinstance(host, col_abc.Collection) or isinstance(host, col_abc.Callable):
238
+ found_hosts = self._search_host(host)
239
+ if raise_on_no_result and not found_hosts:
240
+ raise KeyError(host)
241
+ for found_host in found_hosts:
242
+ for records in self._host_lookup[found_host].values():
243
+ for rec in records:
244
+ if include_deletions or not rec.is_deleted:
245
+ yield rec
246
+ else:
247
+ raise TypeError(f"Unexpected type for host: {type(host)} (expects: {KeySearch})")
248
+
249
+ def iter_all_records(self, *, include_deletions=False, include_orphans=False):
250
+ """
251
+ Returns all records recovered from session storage
252
+ :param include_deletions: if True, records related to deletions will be included
253
+ :param include_orphans: if True, records which cannot be associated with a host will be included
254
+ """
255
+ for host in self.iter_hosts():
256
+ yield from self.iter_records_for_host(host, include_deletions=include_deletions)
257
+ if include_orphans:
258
+ yield from (x[1] for x in self.iter_orphans())
259
+
260
+ def get_session_storage_key(self, host: str, key: str) -> tuple[SessionStoreValue, ...]:
261
+ """
262
+ DEPRECATED
263
+ :param host: the host (domain name) for the session storage
264
+ :param key: the storage key
265
+ :return: a tuple of SessionStoreValue matching the host and key. Multiple values may be returned as deleted or
266
+ old values may be recovered.
267
+ """
268
+ if (host, key) not in self:
269
+ return tuple()
270
+ return tuple(self._host_lookup[host][key])
271
+
272
+ def iter_records_for_session_storage_key(
273
+ self, host: KeySearch, key: KeySearch, *,
274
+ include_deletions=False, raise_on_no_result=True) -> col_abc.Iterable[SessionStoreValue]:
275
+ """
276
+ :param host: storage key (host) for the records. This can be one of: a single string;
277
+ a collection of strings; a regex pattern; a function that takes a string (each host) and returns a bool.
278
+ :param key: script defined key for the records. This can be one of: a single string;
279
+ a collection of strings; a regex pattern; a function that takes a string and returns a bool.
280
+ :param include_deletions: if True, records related to deletions will be included
281
+ :param raise_on_no_result: if True (the default) if no matching storage keys are found, raise a KeyError
282
+ (these will have None as values).
283
+ :return: iterable of LocalStorageRecords
284
+ """
285
+ if isinstance(host, str) and isinstance(key, str):
286
+ if host not in self._host_lookup or key not in self._host_lookup[host]:
287
+ if raise_on_no_result:
288
+ raise KeyError((host, key))
289
+ else:
290
+ return []
291
+
292
+ yield from (r for r in self._host_lookup[host][key] if include_deletions or not r.is_deleted)
293
+
294
+ else:
295
+ found_hosts = self._search_host(host)
296
+ if raise_on_no_result and not found_hosts:
297
+ raise KeyError((host, key))
298
+
299
+ yielded = False
300
+ for found_host in found_hosts:
301
+ if isinstance(key, str):
302
+ matched_keys = [key]
303
+ elif isinstance(key, re.Pattern):
304
+ matched_keys = [x for x in self._host_lookup[found_host].keys() if key.search(x)]
305
+ elif isinstance(key, col_abc.Collection):
306
+ script_key_set = set(key)
307
+ matched_keys = list(self._host_lookup[found_host].keys() & script_key_set)
308
+ elif isinstance(key, col_abc.Callable):
309
+ matched_keys = [x for x in self._host_lookup[found_host].keys() if key(x)]
310
+ else:
311
+ raise TypeError(f"Unexpected type for script key: {type(key)} (expects: {KeySearch})")
312
+
313
+ for matched_key in matched_keys:
314
+ for rec in self._host_lookup[found_host][matched_key]:
315
+ if include_deletions or not rec.is_deleted:
316
+ yielded = True
317
+ yield rec
318
+
319
+ if not yielded and raise_on_no_result:
320
+ raise KeyError((host, key))
321
+
322
+ def iter_orphans(self) -> typing.Iterable[tuple[str, SessionStoreValue]]:
323
+ """
324
+ Returns records which have been orphaned from their host (domain name) where it cannot be recovered. The keys
325
+ may be named uniquely enough that the host may be inferred.
326
+ :return: yields tuples of (session key, SessionStoreValue)
327
+ """
328
+ yield from self._orphans
329
+
330
+ def __getitem__(self, item: typing.Union[str, typing.Tuple[str, str]]) -> typing.Union[
331
+ dict[str, tuple[SessionStoreValue, ...]], tuple[SessionStoreValue, ...]]:
332
+ if item not in self:
333
+ raise KeyError(item)
334
+
335
+ if isinstance(item, str):
336
+ return self.get_all_for_host(item)
337
+ elif isinstance(item, tuple) and len(item) == 2:
338
+ return self.get_session_storage_key(*item)
339
+ else:
340
+ raise TypeError("item must be a string or a tuple of (str, str)")
341
+
342
+ def __iter__(self) -> typing.Iterable[str]:
343
+ """
344
+ iterates the hosts present
345
+ """
346
+ return self.iter_hosts()
347
+
348
+ def close(self):
349
+ self._ldb.close()
350
+
351
+ def __enter__(self) -> "SessionStoreDb":
352
+ return self
353
+
354
+ def __exit__(self, exc_type, exc_val, exc_tb):
355
+ self.close()
356
+
357
+
358
+ def main(args):
359
+ ldb_in_dir = pathlib.Path(args[0])
360
+ ssdb = SessionStoreDb(ldb_in_dir)
361
+
362
+ print("Hosts in db:")
363
+ for host in ssdb:
364
+ print(host)
365
+
366
+
367
+ if __name__ == '__main__':
368
+ main(sys.argv[1:])
@@ -0,0 +1,332 @@
1
+ """
2
+ Copyright 2022, CCL Forensics
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
5
+ this software and associated documentation files (the "Software"), to deal in
6
+ the Software without restriction, including without limitation the rights to
7
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
8
+ of the Software, and to permit persons to whom the Software is furnished to do
9
+ so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in all
12
+ copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20
+ SOFTWARE.
21
+ """
22
+
23
+ import dataclasses
24
+ import enum
25
+ import struct
26
+ import sys
27
+ import os
28
+ import pathlib
29
+ import datetime
30
+ import types
31
+ import typing
32
+ from .serialization_formats.ccl_easy_chromium_pickle import EasyPickleIterator, EasyPickleException
33
+
34
+ __version__ = "0.2"
35
+ __description__ = "Module for reading Chromium SNSS files"
36
+ __contact__ = "Alex Caithness"
37
+
38
+
39
+ class TabRestoreIdType(enum.Enum):
40
+ # components/sessions/core/tab_restore_service_impl.cc
41
+ CommandUpdateTabNavigation = 1
42
+ CommandRestoredEntry = 2
43
+ CommandWindowDeprecated = 3
44
+ CommandSelectedNavigationInTab = 4
45
+ CommandPinnedState = 5
46
+ CommandSetExtensionAppID = 6
47
+ CommandSetWindowAppName = 7
48
+ CommandSetTabUserAgentOverride = 8
49
+ CommandWindow = 9
50
+ CommandSetTabGroupData = 10
51
+ CommandSetTabUserAgentOverride2 = 11
52
+ CommandSetWindowUserTitle = 12
53
+ CommandCreateGroup = 13
54
+ CommandAddTabExtraData = 14
55
+
56
+ UnusedCommand = 255
57
+
58
+
59
+ class SessionRestoreIdType(enum.Enum):
60
+ # components/sessions/core/session_service_commands.cc
61
+ CommandSetTabWindow = 0
62
+ CommandSetWindowBounds = 1 # // OBSOLETE Superseded by kCommandSetWindowBounds3.
63
+ CommandSetTabIndexInWindow = 2
64
+ CommandTabNavigationPathPrunedFromBack = 5 # // OBSOLETE: Superseded by kCommandTabNavigationPathPruned instead
65
+ CommandUpdateTabNavigation = 6
66
+ CommandSetSelectedNavigationIndex = 7
67
+ CommandSetSelectedTabInIndex = 8
68
+ CommandSetWindowType = 9
69
+ CommandSetWindowBounds2 = 10 # // OBSOLETE Superseded by kCommandSetWindowBounds3. Except for data migration.
70
+ CommandTabNavigationPathPrunedFromFront = 11 # // Superseded kCommandTabNavigationPathPruned instead
71
+ CommandSetPinnedState = 12
72
+ CommandSetExtensionAppID = 13
73
+ CommandSetWindowBounds3 = 14
74
+ CommandSetWindowAppName = 15
75
+ CommandTabClosed = 16
76
+ CommandWindowClosed = 17
77
+ CommandSetTabUserAgentOverride = 18 # // OBSOLETE: Superseded by kCommandSetTabUserAgentOverride2.
78
+ CommandSessionStorageAssociated = 19
79
+ CommandSetActiveWindow = 20
80
+ CommandLastActiveTime = 21
81
+ CommandSetWindowWorkspace = 22 # // OBSOLETE Superseded by kCommandSetWindowWorkspace2.
82
+ CommandSetWindowWorkspace2 = 23
83
+ CommandTabNavigationPathPruned = 24
84
+ CommandSetTabGroup = 25
85
+ CommandSetTabGroupMetadata = 26 # // OBSOLETE Superseded by kCommandSetTabGroupMetadata2.
86
+ CommandSetTabGroupMetadata2 = 27
87
+ CommandSetTabGuid = 28
88
+ CommandSetTabUserAgentOverride2 = 29
89
+ CommandSetTabData = 30
90
+ CommandSetWindowUserTitle = 31
91
+ CommandSetWindowVisibleOnAllWorkspaces = 32
92
+ CommandAddTabExtraData = 33
93
+ CommandAddWindowExtraData = 34
94
+
95
+ # Edge has custom command types. These are what I have seen so far.
96
+ # None of these types appear to be related to browsing data at the moment (typically only a few bytes long).
97
+ EdgeCommandUnknown131 = 131
98
+ EdgeCommandUnknown132 = 132
99
+
100
+ UnusedCommand = 255
101
+
102
+
103
+ class PageTransition:
104
+ # ui/base/page_transition_types.h
105
+ _core_mask = 0xff
106
+ _qualifier_mask = 0xffffff00
107
+ _core_transitions = {
108
+ 0: "Link",
109
+ 1: "Typed",
110
+ 2: "AutoBookmark",
111
+ 3: "AutoSubframe",
112
+ 4: "ManualSubframe",
113
+ 5: "Generated",
114
+ 6: "AutoToplevel",
115
+ 7: "FormSubmit",
116
+ 8: "Reload",
117
+ 9: "Keyword",
118
+ 10: "KeywordGenerated"
119
+ }
120
+ _qualifiers = {
121
+ 0x00800000: "Blocked",
122
+ 0x01000000: "ForwardBack",
123
+ 0x02000000: "FromAddressBar",
124
+ 0x04000000: "HomePage",
125
+ 0x08000000: "FromApi",
126
+ 0x10000000: "ChainStart",
127
+ 0x20000000: "ChainEnd",
128
+ 0x40000000: "ClientRedirect",
129
+ 0x80000000: "ServerRedirect"
130
+ }
131
+
132
+ def __init__(self, value):
133
+ self._value = value
134
+ if value < 0:
135
+ # signed to unsigned
136
+ value += (0x80000000 * 2)
137
+ self._core_transition = PageTransition._core_transitions[value & PageTransition._core_mask]
138
+ self._qualifiers = []
139
+ for flag in PageTransition._qualifiers:
140
+ if (value & PageTransition._qualifier_mask) & flag > 0:
141
+ self._qualifiers.append(PageTransition._qualifiers[flag])
142
+
143
+ def __str__(self):
144
+ return "; ".join([self._core_transition] + self._qualifiers)
145
+
146
+ def __repr__(self):
147
+ return "<ChromeTransition ({0}): {1})>".format(self._value, str(self))
148
+
149
+ @property
150
+ def core_transition(self) -> str:
151
+ return self._core_transition
152
+
153
+ @property
154
+ def qualifiers(self) -> typing.Iterable[str]:
155
+ yield from self._qualifiers
156
+
157
+ @property
158
+ def value(self):
159
+ return self._value
160
+
161
+
162
+ class SnssError(Exception):
163
+ ...
164
+
165
+
166
+ @dataclasses.dataclass(frozen=True)
167
+ class SessionCommand:
168
+ offset: int
169
+ id_type: typing.Union[SessionRestoreIdType, TabRestoreIdType]
170
+
171
+
172
+ @dataclasses.dataclass(frozen=True)
173
+ class NavigationEntry(SessionCommand):
174
+ # components/sessions/core/serialized_navigation_entry.cc
175
+ index: int
176
+ url: str
177
+ title: str
178
+ page_state_raw: bytes # replace with completed PageState object
179
+ transition_type: PageTransition
180
+ has_post_data: typing.Optional[bool] = None
181
+ referrer_url: typing.Optional[str] = None
182
+ original_request_url: typing.Optional[str] = None
183
+ is_overriding_user_agent: typing.Optional[bool] = None
184
+ timestamp: typing.Optional[datetime.datetime] = None
185
+ http_status: typing.Optional[int] = None
186
+ referrer_policy: typing.Optional[int] = None
187
+ extended_map: typing.Optional[types.MappingProxyType] = None
188
+ task_id: typing.Optional[int] = None
189
+ parent_task_id: typing.Optional[int] = None
190
+ root_task_id: typing.Optional[int] = None
191
+ session_id: typing.Optional[int] = None
192
+
193
+ @classmethod
194
+ def from_pickle(
195
+ cls, pickle, id_type: typing.Union[SessionRestoreIdType, TabRestoreIdType],
196
+ offset: int, session_id: typing.Optional[int]=None) -> "NavigationEntry":
197
+ index = pickle.read_int32()
198
+ url = pickle.read_string()
199
+ title = pickle.read_string16()
200
+ page_state_length = pickle.read_int32()
201
+ # https://source.chromium.org/chromium/chromium/src/+/main:third_party/blink/common/page_state/page_state_serialization.cc;drc=d1e1301c82bef37d30796e2d6098856b851d90a4;l=897
202
+ page_state_raw = pickle.read_aligned(page_state_length)
203
+ transition_type = PageTransition(pickle.read_uint32())
204
+
205
+ try:
206
+ type_mask = pickle.read_uint32()
207
+ except EasyPickleException:
208
+ # very old versions of data end here, so we return a partial object here
209
+ return cls(offset, id_type, index, url, title, page_state_raw, transition_type)
210
+
211
+ has_post_data = (type_mask & 0x01) > 0
212
+ referrer_url = pickle.read_string()
213
+ _ = pickle.read_int32() # referrer policy, not used
214
+ original_request_url = pickle.read_string()
215
+ is_overriding_user_agent = pickle.read_bool()
216
+ timestamp = pickle.read_datetime()
217
+ _ = pickle.read_string16() # search terms, not used
218
+ http_status = pickle.read_int32()
219
+ referrer_policy = pickle.read_int32()
220
+
221
+ extended_map_size = pickle.read_int32()
222
+ extended_map = {}
223
+ for _ in range(extended_map_size):
224
+ key = pickle.read_string()
225
+ value = pickle.read_string()
226
+ extended_map[key] = value
227
+
228
+ extended_map = types.MappingProxyType(extended_map)
229
+
230
+ task_id = None
231
+ parent_task_id = None
232
+ root_task_id = None
233
+
234
+ try:
235
+ # these might not exist in older files, so no big deal if we can't get them
236
+ task_id = pickle.read_int64()
237
+ parent_task_id = pickle.read_int64()
238
+ root_task_id = pickle.read_int64()
239
+
240
+ child_task_id_count = pickle.read_int32()
241
+ if child_task_id_count != 0:
242
+ raise SnssError("Child tasks should not be present when reading NavigationEntry")
243
+ except EasyPickleException:
244
+ pass
245
+
246
+ return cls(
247
+ offset, id_type,
248
+ index, url, title, page_state_raw, transition_type, has_post_data, referrer_url, original_request_url,
249
+ is_overriding_user_agent, timestamp, http_status, referrer_policy, extended_map, task_id, parent_task_id,
250
+ root_task_id, session_id
251
+ )
252
+
253
+
254
+ class UnprocessedEntry(SessionCommand):
255
+ ...
256
+
257
+
258
+ class SnssFileType(enum.Enum):
259
+ Session = 1
260
+ Tab = 2
261
+
262
+
263
+ class SnssFile:
264
+ def __init__(self, file_type: SnssFileType, stream: typing.BinaryIO):
265
+ # components/sessions/core/command_storage_backend.cc
266
+ self._f = stream
267
+ if file_type == SnssFileType.Session:
268
+ self._id_type = SessionRestoreIdType
269
+ elif file_type == SnssFileType.Tab:
270
+ self._id_type = TabRestoreIdType
271
+ else:
272
+ raise ValueError("file_type is an unknown SnssFileType or is not SnssFileType")
273
+
274
+ self._file_type = file_type
275
+ header = self._f.read(8)
276
+ if header[0:4] != b"SNSS":
277
+ raise SnssError(f"Invalid magic; expected SNSS; got {header[0:4]}")
278
+ self._version, = struct.unpack("<I", header[4:8])
279
+ if self._version not in (1, 3):
280
+ raise SnssError(f"Expected version 1 or 3, got version {self._version}")
281
+
282
+ @property
283
+ def file_type(self) -> SnssFileType:
284
+ return self._file_type
285
+
286
+ def reset(self):
287
+ self._f.seek(8, os.SEEK_SET)
288
+
289
+ def _get_next_session_command(self) -> typing.Optional[SessionCommand]:
290
+ # components/sessions/core/command_storage_backend.cc
291
+ start_offset = self._f.tell()
292
+ length_raw = self._f.read(2)
293
+ if not length_raw:
294
+ return None # eof
295
+ length, = struct.unpack("<H", length_raw)
296
+ data = self._f.read(length)
297
+ if len(data) != length:
298
+ raise ValueError(f"Could not get enough data reading record starting at {start_offset}")
299
+ record_id_type = self._id_type(data[0])
300
+
301
+ # components/sessions/core/session_service_commands.cc, components/sessions/core/base_session_service_commands.cc
302
+ # components/sessions/core/tab_restore_service_impl.cc
303
+ if record_id_type in (SessionRestoreIdType.CommandUpdateTabNavigation, TabRestoreIdType.CommandUpdateTabNavigation): # 6, 1
304
+ with EasyPickleIterator(data[1:]) as pickle:
305
+ session_id = pickle.read_int32()
306
+ nav = NavigationEntry.from_pickle(pickle, record_id_type, start_offset, session_id)
307
+ return nav
308
+ else:
309
+ return UnprocessedEntry(start_offset, record_id_type)
310
+
311
+ def iter_session_commands(self) -> typing.Iterable[SessionCommand]:
312
+ self.reset()
313
+ while command := self._get_next_session_command():
314
+ yield command
315
+
316
+
317
+ def main(args):
318
+ in_path = pathlib.Path(args[0])
319
+ if in_path.name.startswith("Session_"):
320
+ file_type = SnssFileType.Session
321
+ elif in_path.name.startswith("Tabs_"):
322
+ file_type = SnssFileType.Tab
323
+ else:
324
+ raise ValueError("File name does not start with Session or Tabs")
325
+ with in_path.open("rb") as f:
326
+ snss_file = SnssFile(file_type, f)
327
+ for command in snss_file.iter_session_commands():
328
+ print(command)
329
+
330
+
331
+ if __name__ == '__main__':
332
+ main(sys.argv[1:])