dfindexeddb 20241105__py3-none-any.whl → 20260205__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dfindexeddb/indexeddb/chromium/blink.py +116 -74
- dfindexeddb/indexeddb/chromium/definitions.py +240 -125
- dfindexeddb/indexeddb/chromium/record.py +651 -346
- dfindexeddb/indexeddb/chromium/sqlite.py +362 -0
- dfindexeddb/indexeddb/chromium/v8.py +100 -78
- dfindexeddb/indexeddb/cli.py +282 -121
- dfindexeddb/indexeddb/firefox/definitions.py +7 -4
- dfindexeddb/indexeddb/firefox/gecko.py +98 -74
- dfindexeddb/indexeddb/firefox/record.py +78 -26
- dfindexeddb/indexeddb/safari/definitions.py +5 -3
- dfindexeddb/indexeddb/safari/record.py +86 -53
- dfindexeddb/indexeddb/safari/webkit.py +85 -71
- dfindexeddb/indexeddb/types.py +4 -1
- dfindexeddb/leveldb/cli.py +146 -138
- dfindexeddb/leveldb/definitions.py +6 -2
- dfindexeddb/leveldb/descriptor.py +70 -56
- dfindexeddb/leveldb/ldb.py +39 -33
- dfindexeddb/leveldb/log.py +41 -30
- dfindexeddb/leveldb/plugins/chrome_notifications.py +30 -18
- dfindexeddb/leveldb/plugins/interface.py +5 -6
- dfindexeddb/leveldb/plugins/manager.py +10 -9
- dfindexeddb/leveldb/record.py +71 -62
- dfindexeddb/leveldb/utils.py +105 -13
- dfindexeddb/utils.py +36 -31
- dfindexeddb/version.py +2 -2
- dfindexeddb-20260205.dist-info/METADATA +171 -0
- dfindexeddb-20260205.dist-info/RECORD +41 -0
- {dfindexeddb-20241105.dist-info → dfindexeddb-20260205.dist-info}/WHEEL +1 -1
- dfindexeddb-20241105.dist-info/AUTHORS +0 -12
- dfindexeddb-20241105.dist-info/METADATA +0 -424
- dfindexeddb-20241105.dist-info/RECORD +0 -41
- {dfindexeddb-20241105.dist-info → dfindexeddb-20260205.dist-info}/entry_points.txt +0 -0
- {dfindexeddb-20241105.dist-info → dfindexeddb-20260205.dist-info/licenses}/LICENSE +0 -0
- {dfindexeddb-20241105.dist-info → dfindexeddb-20260205.dist-info}/top_level.txt +0 -0
|
@@ -17,30 +17,37 @@ from __future__ import annotations
|
|
|
17
17
|
|
|
18
18
|
import dataclasses
|
|
19
19
|
import logging
|
|
20
|
-
|
|
21
|
-
from typing import Optional
|
|
20
|
+
from typing import TYPE_CHECKING, Optional
|
|
22
21
|
|
|
23
22
|
try:
|
|
24
23
|
# pytype: disable=import-error
|
|
25
24
|
from dfdatetime import webkit_time
|
|
26
|
-
|
|
27
|
-
|
|
25
|
+
|
|
26
|
+
from dfindexeddb.leveldb.plugins import notification_database_data_pb2 as notification_pb2
|
|
27
|
+
|
|
28
28
|
# pytype: enable=import-error
|
|
29
29
|
_has_import_dependencies = True
|
|
30
30
|
except ImportError as err:
|
|
31
31
|
_has_import_dependencies = False
|
|
32
|
-
logging.warning(
|
|
33
|
-
|
|
34
|
-
|
|
32
|
+
logging.warning(
|
|
33
|
+
(
|
|
34
|
+
"Could not import dependencies for "
|
|
35
|
+
"leveldb.plugins.chrome_notifications: %s"
|
|
36
|
+
),
|
|
37
|
+
err,
|
|
38
|
+
)
|
|
35
39
|
|
|
36
40
|
from dfindexeddb.indexeddb.chromium import blink
|
|
37
|
-
from dfindexeddb.leveldb.plugins import interface
|
|
38
|
-
|
|
41
|
+
from dfindexeddb.leveldb.plugins import interface, manager
|
|
42
|
+
|
|
43
|
+
if TYPE_CHECKING:
|
|
44
|
+
from dfindexeddb.leveldb import ldb, log
|
|
39
45
|
|
|
40
46
|
|
|
41
47
|
@dataclasses.dataclass
|
|
42
48
|
class ChromeNotificationRecord(interface.LeveldbPlugin):
|
|
43
49
|
"""Chrome notification record."""
|
|
50
|
+
|
|
44
51
|
src_file: Optional[str] = None
|
|
45
52
|
offset: Optional[int] = None
|
|
46
53
|
key: Optional[str] = None
|
|
@@ -71,8 +78,7 @@ class ChromeNotificationRecord(interface.LeveldbPlugin):
|
|
|
71
78
|
|
|
72
79
|
@classmethod
|
|
73
80
|
def FromKeyValueRecord(
|
|
74
|
-
cls,
|
|
75
|
-
ldb_record
|
|
81
|
+
cls, ldb_record: ldb.KeyValueRecord | log.ParsedInternalKey
|
|
76
82
|
) -> ChromeNotificationRecord:
|
|
77
83
|
record = cls()
|
|
78
84
|
record.offset = ldb_record.offset
|
|
@@ -84,15 +90,17 @@ class ChromeNotificationRecord(interface.LeveldbPlugin):
|
|
|
84
90
|
return record
|
|
85
91
|
|
|
86
92
|
# pylint: disable-next=no-member,line-too-long
|
|
87
|
-
notification_proto = notification_pb2.NotificationDatabaseDataProto() #
|
|
93
|
+
notification_proto = notification_pb2.NotificationDatabaseDataProto() # type: ignore[attr-defined]
|
|
88
94
|
notification_proto.ParseFromString(ldb_record.value)
|
|
89
95
|
|
|
90
96
|
record.origin = notification_proto.origin
|
|
91
97
|
record.service_worker_registration_id = (
|
|
92
|
-
notification_proto.service_worker_registration_id
|
|
98
|
+
notification_proto.service_worker_registration_id
|
|
99
|
+
)
|
|
93
100
|
record.notification_title = notification_proto.notification_data.title
|
|
94
101
|
record.notification_direction = (
|
|
95
|
-
notification_proto.notification_data.direction
|
|
102
|
+
notification_proto.notification_data.direction
|
|
103
|
+
)
|
|
96
104
|
record.notification_lang = notification_proto.notification_data.lang
|
|
97
105
|
record.notification_body = notification_proto.notification_data.body
|
|
98
106
|
record.notification_tag = notification_proto.notification_data.tag
|
|
@@ -100,7 +108,8 @@ class ChromeNotificationRecord(interface.LeveldbPlugin):
|
|
|
100
108
|
record.notification_silent = notification_proto.notification_data.silent
|
|
101
109
|
record.notification_data = notification_proto.notification_data.data
|
|
102
110
|
record.notification_require_interaction = (
|
|
103
|
-
notification_proto.notification_data.require_interaction
|
|
111
|
+
notification_proto.notification_data.require_interaction
|
|
112
|
+
)
|
|
104
113
|
record.notification_time = webkit_time.WebKitTime(
|
|
105
114
|
timestamp=notification_proto.notification_data.timestamp
|
|
106
115
|
).CopyToDateTimeString()
|
|
@@ -109,10 +118,12 @@ class ChromeNotificationRecord(interface.LeveldbPlugin):
|
|
|
109
118
|
record.notification_image = notification_proto.notification_data.image
|
|
110
119
|
record.notification_id = notification_proto.notification_id
|
|
111
120
|
record.replaced_existing_notification = (
|
|
112
|
-
notification_proto.replaced_existing_notification
|
|
121
|
+
notification_proto.replaced_existing_notification
|
|
122
|
+
)
|
|
113
123
|
record.num_clicks = notification_proto.num_clicks
|
|
114
124
|
record.num_action_button_clicks = (
|
|
115
|
-
notification_proto.num_action_button_clicks
|
|
125
|
+
notification_proto.num_action_button_clicks
|
|
126
|
+
)
|
|
116
127
|
record.creation_time = webkit_time.WebKitTime(
|
|
117
128
|
timestamp=notification_proto.creation_time_millis
|
|
118
129
|
).CopyToDateTimeString()
|
|
@@ -123,7 +134,8 @@ class ChromeNotificationRecord(interface.LeveldbPlugin):
|
|
|
123
134
|
return record
|
|
124
135
|
|
|
125
136
|
notification_data = blink.V8ScriptValueDecoder(
|
|
126
|
-
raw_data=notification_proto.notification_data.data
|
|
137
|
+
raw_data=notification_proto.notification_data.data
|
|
138
|
+
).Deserialize()
|
|
127
139
|
record.notification_data = notification_data
|
|
128
140
|
|
|
129
141
|
return record
|
|
@@ -15,16 +15,14 @@
|
|
|
15
15
|
"""Interface for leveldb plugins."""
|
|
16
16
|
from typing import Any, Union
|
|
17
17
|
|
|
18
|
-
from dfindexeddb.leveldb import record
|
|
19
|
-
|
|
20
|
-
from dfindexeddb.leveldb import log
|
|
18
|
+
from dfindexeddb.leveldb import ldb, log, record
|
|
19
|
+
|
|
21
20
|
|
|
22
21
|
class LeveldbPlugin:
|
|
23
22
|
"""The base leveldb plugin class."""
|
|
24
23
|
|
|
25
24
|
@classmethod
|
|
26
|
-
def FromLevelDBRecord(cls,
|
|
27
|
-
ldb_record: record.LevelDBRecord) -> Any:
|
|
25
|
+
def FromLevelDBRecord(cls, ldb_record: record.LevelDBRecord) -> Any:
|
|
28
26
|
"""Parses a leveldb record."""
|
|
29
27
|
parsed_record = cls.FromKeyValueRecord(ldb_record.record)
|
|
30
28
|
ldb_record.record = parsed_record
|
|
@@ -32,5 +30,6 @@ class LeveldbPlugin:
|
|
|
32
30
|
|
|
33
31
|
@classmethod
|
|
34
32
|
def FromKeyValueRecord(
|
|
35
|
-
cls, ldb_record: Union[ldb.KeyValueRecord, log.ParsedInternalKey]
|
|
33
|
+
cls, ldb_record: Union[ldb.KeyValueRecord, log.ParsedInternalKey]
|
|
34
|
+
) -> Any:
|
|
36
35
|
"""Parses a leveldb key value record."""
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
"""Leveldb plugin manager."""
|
|
16
|
-
from typing import Type
|
|
16
|
+
from typing import Iterable, Type
|
|
17
17
|
|
|
18
18
|
from dfindexeddb.leveldb.plugins import interface
|
|
19
19
|
|
|
@@ -21,10 +21,10 @@ from dfindexeddb.leveldb.plugins import interface
|
|
|
21
21
|
class LeveldbPluginManager:
|
|
22
22
|
"""The leveldb plugin manager."""
|
|
23
23
|
|
|
24
|
-
_class_registry = {}
|
|
24
|
+
_class_registry: dict[str, Type[interface.LeveldbPlugin]] = {}
|
|
25
25
|
|
|
26
26
|
@classmethod
|
|
27
|
-
def GetPlugins(cls):
|
|
27
|
+
def GetPlugins(cls) -> Iterable[tuple[str, type[interface.LeveldbPlugin]]]:
|
|
28
28
|
"""Retrieves the registered leveldb plugins.
|
|
29
29
|
|
|
30
30
|
Yields:
|
|
@@ -35,7 +35,7 @@ class LeveldbPluginManager:
|
|
|
35
35
|
yield from cls._class_registry.items()
|
|
36
36
|
|
|
37
37
|
@classmethod
|
|
38
|
-
def GetPlugin(cls, plugin_name: str) -> interface.LeveldbPlugin:
|
|
38
|
+
def GetPlugin(cls, plugin_name: str) -> type[interface.LeveldbPlugin]:
|
|
39
39
|
"""Retrieves a class object of a specific plugin.
|
|
40
40
|
|
|
41
41
|
Args:
|
|
@@ -50,10 +50,10 @@ class LeveldbPluginManager:
|
|
|
50
50
|
try:
|
|
51
51
|
return cls._class_registry[plugin_name]
|
|
52
52
|
except KeyError as exc:
|
|
53
|
-
raise KeyError(f
|
|
53
|
+
raise KeyError(f"Plugin not found: {plugin_name}") from exc
|
|
54
54
|
|
|
55
55
|
@classmethod
|
|
56
|
-
def RegisterPlugin(cls, plugin_class: Type[interface.LeveldbPlugin]):
|
|
56
|
+
def RegisterPlugin(cls, plugin_class: Type[interface.LeveldbPlugin]) -> None:
|
|
57
57
|
"""Registers a leveldb plugin.
|
|
58
58
|
|
|
59
59
|
Args:
|
|
@@ -64,12 +64,13 @@ class LeveldbPluginManager:
|
|
|
64
64
|
"""
|
|
65
65
|
plugin_name = plugin_class.__name__
|
|
66
66
|
if plugin_name in cls._class_registry:
|
|
67
|
-
raise KeyError(f
|
|
67
|
+
raise KeyError(f"Plugin already registered {plugin_name}")
|
|
68
68
|
cls._class_registry[plugin_name] = plugin_class
|
|
69
69
|
|
|
70
70
|
@classmethod
|
|
71
|
-
def ClearPlugins(cls):
|
|
71
|
+
def ClearPlugins(cls) -> None:
|
|
72
72
|
"""Clears all plugin registrations."""
|
|
73
73
|
cls._class_registry = {}
|
|
74
74
|
|
|
75
|
-
|
|
75
|
+
|
|
76
|
+
PluginManager = LeveldbPluginManager() # pylint: disable=invalid-name
|
dfindexeddb/leveldb/record.py
CHANGED
|
@@ -14,18 +14,16 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
"""A module for records from LevelDB files."""
|
|
16
16
|
from __future__ import annotations
|
|
17
|
-
|
|
17
|
+
|
|
18
18
|
import dataclasses
|
|
19
19
|
import pathlib
|
|
20
20
|
import re
|
|
21
21
|
import sys
|
|
22
|
+
from collections import defaultdict
|
|
22
23
|
from typing import Generator, Optional, Union
|
|
23
24
|
|
|
24
25
|
from dfindexeddb import errors
|
|
25
|
-
from dfindexeddb.leveldb import definitions
|
|
26
|
-
from dfindexeddb.leveldb import descriptor
|
|
27
|
-
from dfindexeddb.leveldb import ldb
|
|
28
|
-
from dfindexeddb.leveldb import log
|
|
26
|
+
from dfindexeddb.leveldb import definitions, descriptor, ldb, log
|
|
29
27
|
|
|
30
28
|
|
|
31
29
|
@dataclasses.dataclass
|
|
@@ -42,17 +40,15 @@ class LevelDBRecord:
|
|
|
42
40
|
a file not part of the active file set (determined by a MANIFEST file).
|
|
43
41
|
recovered: True if the record is a recovered record.
|
|
44
42
|
"""
|
|
43
|
+
|
|
45
44
|
path: str
|
|
46
|
-
record: Union[
|
|
47
|
-
ldb.KeyValueRecord,
|
|
48
|
-
log.ParsedInternalKey]
|
|
45
|
+
record: Union[ldb.KeyValueRecord, log.ParsedInternalKey]
|
|
49
46
|
level: Optional[int] = None
|
|
50
47
|
recovered: Optional[bool] = None
|
|
51
48
|
|
|
52
49
|
@classmethod
|
|
53
50
|
def FromFile(
|
|
54
|
-
cls,
|
|
55
|
-
file_path: pathlib.Path
|
|
51
|
+
cls, file_path: pathlib.Path
|
|
56
52
|
) -> Generator[LevelDBRecord, None, None]:
|
|
57
53
|
"""Yields leveldb records from the given path.
|
|
58
54
|
|
|
@@ -62,19 +58,22 @@ class LevelDBRecord:
|
|
|
62
58
|
Args:
|
|
63
59
|
file_path: the file path.
|
|
64
60
|
"""
|
|
65
|
-
if file_path.name.endswith(
|
|
66
|
-
for
|
|
67
|
-
file_path.as_posix()
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
61
|
+
if file_path.name.endswith(".log"):
|
|
62
|
+
for internal_record in log.FileReader(
|
|
63
|
+
file_path.as_posix()
|
|
64
|
+
).GetParsedInternalKeys():
|
|
65
|
+
yield cls(path=file_path.as_posix(), record=internal_record)
|
|
66
|
+
elif file_path.name.endswith(".ldb"):
|
|
67
|
+
for kv_record in ldb.FileReader(
|
|
68
|
+
file_path.as_posix()
|
|
69
|
+
).GetKeyValueRecords():
|
|
70
|
+
yield cls(path=file_path.as_posix(), record=kv_record)
|
|
71
|
+
elif file_path.name.startswith("MANIFEST"):
|
|
72
|
+
print(f"Ignoring descriptor file {file_path.as_posix()}", file=sys.stderr)
|
|
73
|
+
elif file_path.name in ("LOCK", "CURRENT", "LOG", "LOG.old"):
|
|
74
|
+
print(f"Ignoring {file_path.as_posix()}", file=sys.stderr)
|
|
76
75
|
else:
|
|
77
|
-
print(f
|
|
76
|
+
print(f"Unsupported file type {file_path.as_posix()}", file=sys.stderr)
|
|
78
77
|
|
|
79
78
|
|
|
80
79
|
class FolderReader:
|
|
@@ -94,20 +93,20 @@ class FolderReader:
|
|
|
94
93
|
ValueError: if foldername is None or not a directory.
|
|
95
94
|
"""
|
|
96
95
|
if not foldername or not foldername.is_dir():
|
|
97
|
-
raise ValueError(f
|
|
96
|
+
raise ValueError(f"{foldername} is None or not a directory")
|
|
98
97
|
self.foldername = foldername
|
|
99
98
|
|
|
100
99
|
def LogFiles(self) -> Generator[pathlib.Path, None, None]:
|
|
101
100
|
"""Returns the log filenames."""
|
|
102
|
-
yield from self.foldername.glob(
|
|
101
|
+
yield from self.foldername.glob("*.log")
|
|
103
102
|
|
|
104
103
|
def LdbFiles(self) -> Generator[pathlib.Path, None, None]:
|
|
105
104
|
"""Returns the ldb filenames."""
|
|
106
|
-
yield from self.foldername.glob(
|
|
105
|
+
yield from self.foldername.glob("*.ldb")
|
|
107
106
|
|
|
108
107
|
def Manifest(self) -> Generator[pathlib.Path, None, None]:
|
|
109
108
|
"""Returns the Manifest filenames."""
|
|
110
|
-
yield from self.foldername.glob(
|
|
109
|
+
yield from self.foldername.glob("MANIFEST-*")
|
|
111
110
|
|
|
112
111
|
def GetCurrentManifestPath(self) -> pathlib.Path:
|
|
113
112
|
"""Returns the path of the current manifest file.
|
|
@@ -116,19 +115,20 @@ class FolderReader:
|
|
|
116
115
|
ParserError: when the CURRENT file does not exist/contain the expected
|
|
117
116
|
content or when the expected MANIFEST file does not exist.
|
|
118
117
|
"""
|
|
119
|
-
current_path = self.foldername /
|
|
118
|
+
current_path = self.foldername / "CURRENT"
|
|
120
119
|
if not current_path.exists():
|
|
121
|
-
raise errors.ParserError(f
|
|
120
|
+
raise errors.ParserError(f"{current_path!s} does not exist.")
|
|
122
121
|
|
|
123
122
|
current_manifest = current_path.read_text().strip()
|
|
124
123
|
manifest_regex = re.compile(definitions.MANIFEST_FILENAME_PATTERN)
|
|
125
124
|
if not manifest_regex.fullmatch(current_manifest):
|
|
126
125
|
raise errors.ParserError(
|
|
127
|
-
f
|
|
126
|
+
f"{current_path!s} does not contain the expected content"
|
|
127
|
+
)
|
|
128
128
|
|
|
129
129
|
manifest_path = self.foldername / current_manifest
|
|
130
130
|
if not manifest_path.exists():
|
|
131
|
-
raise errors.ParserError(f
|
|
131
|
+
raise errors.ParserError(f"{manifest_path!s} does not exist.")
|
|
132
132
|
return manifest_path
|
|
133
133
|
|
|
134
134
|
def GetLatestVersion(self) -> descriptor.LevelDBVersion:
|
|
@@ -139,14 +139,17 @@ class FolderReader:
|
|
|
139
139
|
"""
|
|
140
140
|
current_manifest_path = self.GetCurrentManifestPath()
|
|
141
141
|
latest_version = descriptor.FileReader(
|
|
142
|
-
str(current_manifest_path)
|
|
142
|
+
str(current_manifest_path)
|
|
143
|
+
).GetLatestVersion()
|
|
143
144
|
if not latest_version:
|
|
144
145
|
raise errors.ParserError(
|
|
145
|
-
f
|
|
146
|
+
f"Could not parse a leveldb version from {current_manifest_path!s}"
|
|
147
|
+
)
|
|
146
148
|
return latest_version
|
|
147
149
|
|
|
148
150
|
def _GetRecordsByFile(
|
|
149
|
-
self, filename: pathlib.Path
|
|
151
|
+
self, filename: pathlib.Path
|
|
152
|
+
) -> Generator[LevelDBRecord, None, None]:
|
|
150
153
|
"""Yields the LevelDBRecords from a file.
|
|
151
154
|
|
|
152
155
|
Non-log/ldb files are ignored.
|
|
@@ -157,20 +160,19 @@ class FolderReader:
|
|
|
157
160
|
Yields:
|
|
158
161
|
LevelDBRecords
|
|
159
162
|
"""
|
|
160
|
-
if filename.name.endswith(
|
|
163
|
+
if filename.name.endswith(".log"):
|
|
161
164
|
yield from self._GetLogRecords(filename)
|
|
162
|
-
elif filename.name.endswith(
|
|
165
|
+
elif filename.name.endswith(".ldb"):
|
|
163
166
|
yield from self._GetLdbRecords(filename)
|
|
164
|
-
elif filename.name.startswith(
|
|
165
|
-
print(f
|
|
166
|
-
elif filename.name in (
|
|
167
|
-
print(f
|
|
167
|
+
elif filename.name.startswith("MANIFEST"):
|
|
168
|
+
print(f"Ignoring descriptor file {filename.as_posix()}", file=sys.stderr)
|
|
169
|
+
elif filename.name in ("LOCK", "CURRENT", "LOG", "LOG.old"):
|
|
170
|
+
print(f"Ignoring {filename.as_posix()}", file=sys.stderr)
|
|
168
171
|
else:
|
|
169
|
-
print(f
|
|
172
|
+
print(f"Unsupported file type {filename.as_posix()}", file=sys.stderr)
|
|
170
173
|
|
|
171
174
|
def _GetLogRecords(
|
|
172
|
-
self,
|
|
173
|
-
filename: pathlib.Path
|
|
175
|
+
self, filename: pathlib.Path
|
|
174
176
|
) -> Generator[LevelDBRecord, None, None]:
|
|
175
177
|
"""Yields the LevelDBRecords from a log file.
|
|
176
178
|
|
|
@@ -184,8 +186,7 @@ class FolderReader:
|
|
|
184
186
|
yield LevelDBRecord(path=filename.as_posix(), record=record)
|
|
185
187
|
|
|
186
188
|
def _GetLdbRecords(
|
|
187
|
-
self,
|
|
188
|
-
filename: pathlib.Path
|
|
189
|
+
self, filename: pathlib.Path
|
|
189
190
|
) -> Generator[LevelDBRecord, None, None]:
|
|
190
191
|
"""Yields the LevelDBRecords from a log file.
|
|
191
192
|
|
|
@@ -216,7 +217,7 @@ class FolderReader:
|
|
|
216
217
|
log_records = list(self._GetLogRecords(filename=current_log_filename))
|
|
217
218
|
processed_files.add(current_log_filename)
|
|
218
219
|
else:
|
|
219
|
-
print(
|
|
220
|
+
print("No current log file.", file=sys.stderr)
|
|
220
221
|
|
|
221
222
|
# read and cache the records from the "young" or 0-level
|
|
222
223
|
young_records = []
|
|
@@ -227,8 +228,9 @@ class FolderReader:
|
|
|
227
228
|
processed_files.add(current_young_filename)
|
|
228
229
|
else:
|
|
229
230
|
print(
|
|
230
|
-
f
|
|
231
|
-
file=sys.stderr
|
|
231
|
+
f"Could not find {current_young_filename} for level 0.",
|
|
232
|
+
file=sys.stderr,
|
|
233
|
+
)
|
|
232
234
|
|
|
233
235
|
# sort the log records by the leveldb sequence number in reverse
|
|
234
236
|
# order and update the recovered attribute based on the highest sequence
|
|
@@ -237,7 +239,8 @@ class FolderReader:
|
|
|
237
239
|
for record in sorted(
|
|
238
240
|
log_records,
|
|
239
241
|
key=lambda record: record.record.sequence_number,
|
|
240
|
-
reverse=True
|
|
242
|
+
reverse=True,
|
|
243
|
+
):
|
|
241
244
|
if record.record.key not in active_records:
|
|
242
245
|
record.recovered = False
|
|
243
246
|
active_records[record.record.key] = record
|
|
@@ -251,7 +254,8 @@ class FolderReader:
|
|
|
251
254
|
for record in sorted(
|
|
252
255
|
young_records,
|
|
253
256
|
key=lambda record: record.record.sequence_number,
|
|
254
|
-
reverse=True
|
|
257
|
+
reverse=True,
|
|
258
|
+
):
|
|
255
259
|
if record.record.key not in active_records:
|
|
256
260
|
record.recovered = False
|
|
257
261
|
active_records[record.record.key] = record
|
|
@@ -262,7 +266,8 @@ class FolderReader:
|
|
|
262
266
|
yield from sorted(
|
|
263
267
|
log_records + young_records,
|
|
264
268
|
key=lambda record: record.record.sequence_number,
|
|
265
|
-
reverse=False
|
|
269
|
+
reverse=False,
|
|
270
|
+
)
|
|
266
271
|
|
|
267
272
|
# read records from the active files in each level (except the 0 level)
|
|
268
273
|
# and update the recovered and level attribute.
|
|
@@ -278,8 +283,9 @@ class FolderReader:
|
|
|
278
283
|
yield record
|
|
279
284
|
else:
|
|
280
285
|
print(
|
|
281
|
-
f
|
|
282
|
-
file=sys.stderr
|
|
286
|
+
f"Could not find {current_filename} for level {level}.",
|
|
287
|
+
file=sys.stderr,
|
|
288
|
+
)
|
|
283
289
|
|
|
284
290
|
# as a final step, parse any other log/ldb files which we will consider
|
|
285
291
|
# any records as recovered since they are not listed in the the active file
|
|
@@ -304,22 +310,27 @@ class FolderReader:
|
|
|
304
310
|
Yields:
|
|
305
311
|
LevelDBRecords.
|
|
306
312
|
"""
|
|
307
|
-
|
|
313
|
+
unsorted_records_by_key = defaultdict(list)
|
|
308
314
|
|
|
309
315
|
for filename in self.foldername.iterdir():
|
|
310
316
|
for leveldb_record in LevelDBRecord.FromFile(filename):
|
|
311
317
|
if leveldb_record:
|
|
312
|
-
|
|
313
|
-
|
|
318
|
+
unsorted_records_by_key[leveldb_record.record.key].append(
|
|
319
|
+
leveldb_record
|
|
320
|
+
)
|
|
321
|
+
for _, unsorted_records in unsorted_records_by_key.items():
|
|
314
322
|
num_unsorted_records = len(unsorted_records)
|
|
315
323
|
if num_unsorted_records == 1:
|
|
316
324
|
unsorted_records[0].recovered = False
|
|
317
325
|
yield unsorted_records[0]
|
|
318
326
|
else:
|
|
319
|
-
for i, record in enumerate(
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
327
|
+
for i, record in enumerate(
|
|
328
|
+
sorted(
|
|
329
|
+
unsorted_records,
|
|
330
|
+
key=lambda x: (x.record.sequence_number, x.record.offset),
|
|
331
|
+
),
|
|
332
|
+
start=1,
|
|
333
|
+
):
|
|
323
334
|
if i == num_unsorted_records:
|
|
324
335
|
record.recovered = False
|
|
325
336
|
else:
|
|
@@ -327,9 +338,7 @@ class FolderReader:
|
|
|
327
338
|
yield record
|
|
328
339
|
|
|
329
340
|
def GetRecords(
|
|
330
|
-
self,
|
|
331
|
-
use_manifest: bool = False,
|
|
332
|
-
use_sequence_number: bool = False
|
|
341
|
+
self, use_manifest: bool = False, use_sequence_number: bool = False
|
|
333
342
|
) -> Generator[LevelDBRecord, None, None]:
|
|
334
343
|
"""Yield LevelDBRecords.
|
|
335
344
|
|
dfindexeddb/leveldb/utils.py
CHANGED
|
@@ -14,11 +14,21 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
"""Helper/utility classes for LevelDB."""
|
|
16
16
|
from __future__ import annotations
|
|
17
|
+
|
|
17
18
|
import io
|
|
19
|
+
import struct
|
|
18
20
|
from typing import BinaryIO, Tuple, Type, TypeVar
|
|
19
21
|
|
|
20
|
-
from dfindexeddb import errors
|
|
21
|
-
|
|
22
|
+
from dfindexeddb import errors, utils
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
_CHUNK_SIZE = 8
|
|
26
|
+
_MARKER = _CHUNK_SIZE + 1
|
|
27
|
+
_EMPTY_BINARY_SENTINEL = 0
|
|
28
|
+
_SIGN_BIT = 1 << 63
|
|
29
|
+
_SENTINEL = 0
|
|
30
|
+
_TWO_BYTE_ENCODING_INDICATOR = 0x80
|
|
31
|
+
_THREE_BYTE_ENCODING_INDICATOR = 0xFF
|
|
22
32
|
|
|
23
33
|
|
|
24
34
|
class LevelDBDecoder(utils.StreamDecoder):
|
|
@@ -40,8 +50,9 @@ class LevelDBDecoder(utils.StreamDecoder):
|
|
|
40
50
|
buffer = self.stream.read()
|
|
41
51
|
if len(buffer) % 2:
|
|
42
52
|
raise errors.DecoderError(
|
|
43
|
-
f
|
|
44
|
-
|
|
53
|
+
f"Odd number of bytes encountered at offset {offset}"
|
|
54
|
+
)
|
|
55
|
+
return offset, buffer.decode("utf-16-be")
|
|
45
56
|
|
|
46
57
|
def DecodeLengthPrefixedSlice(self) -> Tuple[int, bytes]:
|
|
47
58
|
"""Returns a tuple of the offset of decoding and the byte 'slice'."""
|
|
@@ -55,14 +66,90 @@ class LevelDBDecoder(utils.StreamDecoder):
|
|
|
55
66
|
_, blob = self.ReadBytes(num_bytes)
|
|
56
67
|
return offset, blob
|
|
57
68
|
|
|
58
|
-
def DecodeStringWithLength(
|
|
69
|
+
def DecodeStringWithLength(
|
|
70
|
+
self, encoding: str = "utf-16-be"
|
|
71
|
+
) -> Tuple[int, str]:
|
|
59
72
|
"""Returns a tuple of the offset of decoding and the string value."""
|
|
60
73
|
offset, length = self.DecodeUint64Varint()
|
|
61
|
-
_, buffer = self.ReadBytes(length*2)
|
|
74
|
+
_, buffer = self.ReadBytes(length * 2)
|
|
62
75
|
return offset, buffer.decode(encoding=encoding)
|
|
63
76
|
|
|
77
|
+
def DecodeSortableBinary(self) -> Tuple[int, bytes]:
|
|
78
|
+
"""Decodes a sortable binary from the binary stream.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
a tuple of the offset and the decoded bytes.
|
|
64
82
|
|
|
65
|
-
|
|
83
|
+
Raises:
|
|
84
|
+
errors.ParserError: if an invalid tag is encountered.
|
|
85
|
+
"""
|
|
86
|
+
output = bytearray()
|
|
87
|
+
|
|
88
|
+
offset, first = self.PeekBytes(1)
|
|
89
|
+
if first[0] == _EMPTY_BINARY_SENTINEL:
|
|
90
|
+
self.ReadBytes(1)
|
|
91
|
+
return offset, b""
|
|
92
|
+
|
|
93
|
+
while True:
|
|
94
|
+
_, marker_or_sentinel = self.DecodeUint8()
|
|
95
|
+
if marker_or_sentinel == _MARKER:
|
|
96
|
+
_, chunk = self.ReadBytes(_CHUNK_SIZE)
|
|
97
|
+
output.extend(chunk)
|
|
98
|
+
continue
|
|
99
|
+
if 1 <= marker_or_sentinel <= _CHUNK_SIZE:
|
|
100
|
+
payload_len = marker_or_sentinel
|
|
101
|
+
padding_len = _CHUNK_SIZE - payload_len
|
|
102
|
+
if padding_len > 0:
|
|
103
|
+
return offset, bytes(output[:-padding_len])
|
|
104
|
+
return offset, bytes(output)
|
|
105
|
+
raise errors.ParserError(
|
|
106
|
+
f"Invalid marker or sentinel {marker_or_sentinel} in sortable binary"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
def DecodeSortableDouble(self) -> Tuple[int, float]:
|
|
110
|
+
"""Decodes a sortable double-precision float from the binary stream.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
a tuple of the offset and the decoded double.
|
|
114
|
+
"""
|
|
115
|
+
offset, host_bits = self.DecodeInt(8, byte_order="big", signed=False)
|
|
116
|
+
if host_bits & _SIGN_BIT:
|
|
117
|
+
host_bits ^= _SIGN_BIT
|
|
118
|
+
else:
|
|
119
|
+
host_bits ^= 0xFFFFFFFFFFFFFFFF
|
|
120
|
+
blob = host_bits.to_bytes(8, byteorder="big")
|
|
121
|
+
return offset, struct.unpack(">d", blob)[0]
|
|
122
|
+
|
|
123
|
+
def DecodeSortableString(self) -> Tuple[int, str]:
|
|
124
|
+
"""Decodes a sortable string from the binary stream.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
a tuple of the offset and the decoded string.
|
|
128
|
+
|
|
129
|
+
Raises:
|
|
130
|
+
errors.ParserError: if an invalid byte is encountered.
|
|
131
|
+
"""
|
|
132
|
+
output = []
|
|
133
|
+
offset = self.stream.tell()
|
|
134
|
+
while True:
|
|
135
|
+
_, first = self.DecodeUint8()
|
|
136
|
+
if first == _SENTINEL:
|
|
137
|
+
break
|
|
138
|
+
if (first & 0x80) == 0:
|
|
139
|
+
output.append(chr((first & 0x7F) - 1))
|
|
140
|
+
elif (first & 0xC0) == _TWO_BYTE_ENCODING_INDICATOR:
|
|
141
|
+
_, second = self.DecodeUint8()
|
|
142
|
+
output.append(chr(((first & 0x3F) << 8) | second))
|
|
143
|
+
elif first == _THREE_BYTE_ENCODING_INDICATOR:
|
|
144
|
+
_, high = self.DecodeUint8()
|
|
145
|
+
_, low = self.DecodeUint8()
|
|
146
|
+
output.append(chr((high << 8) | low))
|
|
147
|
+
else:
|
|
148
|
+
raise errors.ParserError(f"Invalid byte {first} in sortable string")
|
|
149
|
+
return offset, "".join(output)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
T = TypeVar("T")
|
|
66
153
|
|
|
67
154
|
|
|
68
155
|
class FromDecoderMixin:
|
|
@@ -70,7 +157,8 @@ class FromDecoderMixin:
|
|
|
70
157
|
|
|
71
158
|
@classmethod
|
|
72
159
|
def FromDecoder(
|
|
73
|
-
cls: Type[T], decoder: LevelDBDecoder, base_offset: int = 0
|
|
160
|
+
cls: Type[T], decoder: LevelDBDecoder, base_offset: int = 0
|
|
161
|
+
) -> T:
|
|
74
162
|
"""Decodes a class type from the current position of a LevelDBDecoder.
|
|
75
163
|
|
|
76
164
|
Args:
|
|
@@ -86,8 +174,7 @@ class FromDecoderMixin:
|
|
|
86
174
|
raise NotImplementedError
|
|
87
175
|
|
|
88
176
|
@classmethod
|
|
89
|
-
def FromStream(
|
|
90
|
-
cls: Type[T], stream: BinaryIO, base_offset: int = 0) -> T:
|
|
177
|
+
def FromStream(cls: Type[T], stream: BinaryIO, base_offset: int = 0) -> T:
|
|
91
178
|
"""Decodes a class type from the current position of a binary stream.
|
|
92
179
|
|
|
93
180
|
Args:
|
|
@@ -98,11 +185,14 @@ class FromDecoderMixin:
|
|
|
98
185
|
The class instance.
|
|
99
186
|
"""
|
|
100
187
|
decoder = LevelDBDecoder(stream)
|
|
101
|
-
return cls.FromDecoder(
|
|
188
|
+
return cls.FromDecoder( # type: ignore[attr-defined,no-any-return]
|
|
189
|
+
decoder=decoder, base_offset=base_offset
|
|
190
|
+
)
|
|
102
191
|
|
|
103
192
|
@classmethod
|
|
104
193
|
def FromBytes(
|
|
105
|
-
cls: Type[T], raw_data: bytes, base_offset: int = 0
|
|
194
|
+
cls: Type[T], raw_data: bytes | bytearray, base_offset: int = 0
|
|
195
|
+
) -> T:
|
|
106
196
|
"""Parses a class type from raw bytes.
|
|
107
197
|
|
|
108
198
|
Args:
|
|
@@ -113,4 +203,6 @@ class FromDecoderMixin:
|
|
|
113
203
|
The class instance.
|
|
114
204
|
"""
|
|
115
205
|
stream = io.BytesIO(raw_data)
|
|
116
|
-
return cls.FromStream(
|
|
206
|
+
return cls.FromStream( # type: ignore[attr-defined,no-any-return]
|
|
207
|
+
stream=stream, base_offset=base_offset
|
|
208
|
+
)
|