dfindexeddb 20240305__tar.gz → 20240331__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dfindexeddb-20240305/dfindexeddb.egg-info → dfindexeddb-20240331}/PKG-INFO +74 -30
- dfindexeddb-20240331/README.md +140 -0
- dfindexeddb-20240331/dfindexeddb/indexeddb/cli.py +112 -0
- dfindexeddb-20240331/dfindexeddb/indexeddb/utils.py +0 -0
- dfindexeddb-20240331/dfindexeddb/leveldb/cli.py +260 -0
- {dfindexeddb-20240305 → dfindexeddb-20240331}/dfindexeddb/leveldb/definitions.py +16 -0
- {dfindexeddb-20240305 → dfindexeddb-20240331}/dfindexeddb/leveldb/descriptor.py +61 -14
- {dfindexeddb-20240305 → dfindexeddb-20240331}/dfindexeddb/leveldb/ldb.py +20 -24
- {dfindexeddb-20240305 → dfindexeddb-20240331}/dfindexeddb/leveldb/log.py +25 -18
- dfindexeddb-20240331/dfindexeddb/leveldb/record.py +102 -0
- dfindexeddb-20240331/dfindexeddb/leveldb/utils.py +116 -0
- {dfindexeddb-20240305 → dfindexeddb-20240331}/dfindexeddb/utils.py +5 -46
- {dfindexeddb-20240305 → dfindexeddb-20240331}/dfindexeddb/version.py +1 -1
- {dfindexeddb-20240305 → dfindexeddb-20240331/dfindexeddb.egg-info}/PKG-INFO +74 -30
- {dfindexeddb-20240305 → dfindexeddb-20240331}/dfindexeddb.egg-info/SOURCES.txt +6 -6
- dfindexeddb-20240331/dfindexeddb.egg-info/entry_points.txt +3 -0
- {dfindexeddb-20240305 → dfindexeddb-20240331}/pyproject.toml +3 -2
- dfindexeddb-20240305/README.md +0 -96
- dfindexeddb-20240305/dfindexeddb/cli.py +0 -180
- dfindexeddb-20240305/dfindexeddb/indexeddb/blink.py +0 -115
- dfindexeddb-20240305/dfindexeddb/indexeddb/chromium.py +0 -1360
- dfindexeddb-20240305/dfindexeddb/indexeddb/definitions.py +0 -306
- dfindexeddb-20240305/dfindexeddb/indexeddb/v8.py +0 -642
- dfindexeddb-20240305/dfindexeddb.egg-info/entry_points.txt +0 -2
- {dfindexeddb-20240305 → dfindexeddb-20240331}/AUTHORS +0 -0
- {dfindexeddb-20240305 → dfindexeddb-20240331}/LICENSE +0 -0
- {dfindexeddb-20240305 → dfindexeddb-20240331}/dfindexeddb/__init__.py +0 -0
- {dfindexeddb-20240305 → dfindexeddb-20240331}/dfindexeddb/errors.py +0 -0
- {dfindexeddb-20240305 → dfindexeddb-20240331}/dfindexeddb/indexeddb/__init__.py +0 -0
- {dfindexeddb-20240305 → dfindexeddb-20240331}/dfindexeddb/leveldb/__init__.py +0 -0
- {dfindexeddb-20240305 → dfindexeddb-20240331}/dfindexeddb.egg-info/dependency_links.txt +0 -0
- {dfindexeddb-20240305 → dfindexeddb-20240331}/dfindexeddb.egg-info/requires.txt +0 -0
- {dfindexeddb-20240305 → dfindexeddb-20240331}/dfindexeddb.egg-info/top_level.txt +0 -0
- {dfindexeddb-20240305 → dfindexeddb-20240331}/setup.cfg +0 -0
- {dfindexeddb-20240305 → dfindexeddb-20240331}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dfindexeddb
|
|
3
|
-
Version:
|
|
3
|
+
Version: 20240331
|
|
4
4
|
Summary: dfindexeddb is an experimental Python tool for performing digital forensic analysis of IndexedDB and leveldb files.
|
|
5
5
|
Author-email: Syd Pleno <sydp@google.com>
|
|
6
6
|
Maintainer-email: dfIndexeddb Developers <dfindexeddb-dev@googlegroups.com>
|
|
@@ -226,7 +226,9 @@ dfindexeddb is an experimental Python tool for performing digital forensic
|
|
|
226
226
|
analysis of IndexedDB and leveldb files.
|
|
227
227
|
|
|
228
228
|
It parses leveldb, IndexedDB and javascript structures from these files without
|
|
229
|
-
requiring native libraries.
|
|
229
|
+
requiring native libraries. (Note: only a subset of IndexedDB key types and
|
|
230
|
+
Javascript types for Chromium-based browsers are currently supported. Safari
|
|
231
|
+
and Firefox are under development).
|
|
230
232
|
|
|
231
233
|
The content of IndexedDB files is dependent on what a web application stores
|
|
232
234
|
locally/offline using the web browser's
|
|
@@ -236,25 +238,34 @@ include:
|
|
|
236
238
|
* emails and contact information from an e-mail application,
|
|
237
239
|
* images and metadata from a photo gallery application
|
|
238
240
|
|
|
241
|
+
|
|
239
242
|
## Installation
|
|
240
243
|
|
|
244
|
+
1. [Linux] Install the snappy compression development package
|
|
245
|
+
|
|
241
246
|
```
|
|
242
|
-
$
|
|
247
|
+
$ sudo apt install libsnappy-dev
|
|
243
248
|
```
|
|
244
249
|
|
|
245
|
-
|
|
250
|
+
2. Create a virtual environment and install the package
|
|
251
|
+
|
|
252
|
+
```
|
|
253
|
+
$ python3 -m venv .venv
|
|
254
|
+
$ source .venv/bin/activate
|
|
255
|
+
$ pip install dfindexeddb
|
|
256
|
+
```
|
|
246
257
|
|
|
247
|
-
|
|
258
|
+
## Installation from source
|
|
248
259
|
|
|
249
|
-
1. Install the snappy compression development package
|
|
260
|
+
1. [Linux] Install the snappy compression development package
|
|
250
261
|
|
|
251
262
|
```
|
|
252
263
|
$ sudo apt install libsnappy-dev
|
|
253
264
|
```
|
|
254
265
|
|
|
255
|
-
2. Clone or download the repository to your local machine.
|
|
266
|
+
2. Clone or download/unzip the repository to your local machine.
|
|
256
267
|
|
|
257
|
-
3. Create a
|
|
268
|
+
3. Create a virtual environment and install the package
|
|
258
269
|
|
|
259
270
|
```
|
|
260
271
|
$ python3 -m venv .venv
|
|
@@ -264,55 +275,88 @@ $ pip install dfindexeddb
|
|
|
264
275
|
|
|
265
276
|
## Usage
|
|
266
277
|
|
|
267
|
-
|
|
278
|
+
Two CLI tools for parsing IndexedDB/leveldb files are available after
|
|
279
|
+
installation:
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
### IndexedDB
|
|
268
283
|
|
|
269
284
|
```
|
|
270
285
|
$ dfindexeddb -h
|
|
271
|
-
usage: dfindexeddb [-h] -s SOURCE [
|
|
286
|
+
usage: dfindexeddb [-h] -s SOURCE [-o {json,jsonl,repr}]
|
|
272
287
|
|
|
273
|
-
A cli tool for
|
|
274
|
-
|
|
275
|
-
positional arguments:
|
|
276
|
-
{log,ldb,indexeddb}
|
|
288
|
+
A cli tool for parsing indexeddb files
|
|
277
289
|
|
|
278
290
|
options:
|
|
291
|
+
-h, --help show this help message and exit
|
|
279
292
|
-s SOURCE, --source SOURCE
|
|
280
|
-
The source leveldb
|
|
281
|
-
|
|
293
|
+
The source leveldb folder
|
|
294
|
+
-o {json,jsonl,repr}, --output {json,jsonl,repr}
|
|
295
|
+
Output format. Default is json
|
|
282
296
|
```
|
|
283
297
|
|
|
284
|
-
|
|
298
|
+
### LevelDB
|
|
285
299
|
|
|
286
300
|
```
|
|
287
|
-
$
|
|
288
|
-
usage:
|
|
301
|
+
$ dfleveldb -h
|
|
302
|
+
usage: dfleveldb [-h] {db,log,ldb,descriptor} ...
|
|
303
|
+
|
|
304
|
+
A cli tool for parsing leveldb files
|
|
289
305
|
|
|
290
306
|
positional arguments:
|
|
291
|
-
{
|
|
307
|
+
{db,log,ldb,descriptor}
|
|
308
|
+
db Parse a directory as leveldb.
|
|
309
|
+
log Parse a leveldb log file.
|
|
310
|
+
ldb Parse a leveldb table (.ldb) file.
|
|
311
|
+
descriptor Parse a leveldb descriptor (MANIFEST) file.
|
|
292
312
|
|
|
293
313
|
options:
|
|
294
314
|
-h, --help show this help message and exit
|
|
295
315
|
```
|
|
296
316
|
|
|
297
|
-
To parse a LevelDB .
|
|
317
|
+
To parse records from a LevelDB log (.log) file, use the following command:
|
|
298
318
|
|
|
299
319
|
```
|
|
300
|
-
$
|
|
301
|
-
usage: dfindexeddb ldb [-h] {blocks,records}
|
|
320
|
+
$ dfleveldb log -s SOURCE [-o {json,jsonl,repr}] [-t {blocks,physical_records,write_batches,parsed_internal_key}]
|
|
302
321
|
|
|
303
|
-
|
|
304
|
-
|
|
322
|
+
options:
|
|
323
|
+
-h, --help show this help message and exit
|
|
324
|
+
-s SOURCE, --source SOURCE
|
|
325
|
+
The source leveldb file
|
|
326
|
+
-o {json,jsonl,repr}, --output {json,jsonl,repr}
|
|
327
|
+
Output format. Default is json
|
|
328
|
+
-t {blocks,physical_records,write_batches,parsed_internal_key}, --structure_type {blocks,physical_records,write_batches,parsed_internal_key}
|
|
329
|
+
Parses the specified structure. Default is parsed_internal_key.
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
To parse records from a LevelDB table (.ldb) file, use the following command:
|
|
333
|
+
|
|
334
|
+
```
|
|
335
|
+
$ dfleveldb ldb -s SOURCE [-o {json,jsonl,repr}] [-t {blocks,records}]
|
|
305
336
|
|
|
306
337
|
options:
|
|
307
|
-
-h, --help
|
|
338
|
+
-h, --help show this help message and exit
|
|
339
|
+
-s SOURCE, --source SOURCE
|
|
340
|
+
The source leveldb file
|
|
341
|
+
-o {json,jsonl,repr}, --output {json,jsonl,repr}
|
|
342
|
+
Output format. Default is json
|
|
343
|
+
-t {blocks,records}, --structure_type {blocks,records}
|
|
344
|
+
Parses the specified structure. Default is records.
|
|
308
345
|
```
|
|
309
346
|
|
|
310
|
-
To parse
|
|
347
|
+
To parse version edit records from a Descriptor (MANIFEST) file:
|
|
311
348
|
|
|
312
349
|
```
|
|
313
|
-
$
|
|
314
|
-
usage: dfindexeddb indexeddb [-h]
|
|
350
|
+
$ dfleveldb descriptor -s SOURCE [-o {json,jsonl,repr}] [-t {blocks,physical_records,versionedit} | -v]
|
|
315
351
|
|
|
316
352
|
options:
|
|
317
|
-
-h, --help
|
|
353
|
+
-h, --help show this help message and exit
|
|
354
|
+
-s SOURCE, --source SOURCE
|
|
355
|
+
The source leveldb file
|
|
356
|
+
-o {json,jsonl,repr}, --output {json,jsonl,repr}
|
|
357
|
+
Output format. Default is json
|
|
358
|
+
-t {blocks,physical_records,versionedit}, --structure_type {blocks,physical_records,versionedit}
|
|
359
|
+
Parses the specified structure. Default is versionedit.
|
|
360
|
+
-v, --version_history
|
|
361
|
+
Parses the leveldb version history.
|
|
318
362
|
```
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# dfIndexeddb
|
|
2
|
+
|
|
3
|
+
dfindexeddb is an experimental Python tool for performing digital forensic
|
|
4
|
+
analysis of IndexedDB and leveldb files.
|
|
5
|
+
|
|
6
|
+
It parses leveldb, IndexedDB and javascript structures from these files without
|
|
7
|
+
requiring native libraries. (Note: only a subset of IndexedDB key types and
|
|
8
|
+
Javascript types for Chromium-based browsers are currently supported. Safari
|
|
9
|
+
and Firefox are under development).
|
|
10
|
+
|
|
11
|
+
The content of IndexedDB files is dependent on what a web application stores
|
|
12
|
+
locally/offline using the web browser's
|
|
13
|
+
[IndexedDB API](https://www.w3.org/TR/IndexedDB/). Examples of content might
|
|
14
|
+
include:
|
|
15
|
+
* text from a text/source-code editor application,
|
|
16
|
+
* emails and contact information from an e-mail application,
|
|
17
|
+
* images and metadata from a photo gallery application
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
## Installation
|
|
21
|
+
|
|
22
|
+
1. [Linux] Install the snappy compression development package
|
|
23
|
+
|
|
24
|
+
```
|
|
25
|
+
$ sudo apt install libsnappy-dev
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
2. Create a virtual environment and install the package
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
$ python3 -m venv .venv
|
|
32
|
+
$ source .venv/bin/activate
|
|
33
|
+
$ pip install dfindexeddb
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Installation from source
|
|
37
|
+
|
|
38
|
+
1. [Linux] Install the snappy compression development package
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
$ sudo apt install libsnappy-dev
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
2. Clone or download/unzip the repository to your local machine.
|
|
45
|
+
|
|
46
|
+
3. Create a virtual environment and install the package
|
|
47
|
+
|
|
48
|
+
```
|
|
49
|
+
$ python3 -m venv .venv
|
|
50
|
+
$ source .venv/bin/activate
|
|
51
|
+
$ pip install .
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Usage
|
|
55
|
+
|
|
56
|
+
Two CLI tools for parsing IndexedDB/leveldb files are available after
|
|
57
|
+
installation:
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
### IndexedDB
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
$ dfindexeddb -h
|
|
64
|
+
usage: dfindexeddb [-h] -s SOURCE [-o {json,jsonl,repr}]
|
|
65
|
+
|
|
66
|
+
A cli tool for parsing indexeddb files
|
|
67
|
+
|
|
68
|
+
options:
|
|
69
|
+
-h, --help show this help message and exit
|
|
70
|
+
-s SOURCE, --source SOURCE
|
|
71
|
+
The source leveldb folder
|
|
72
|
+
-o {json,jsonl,repr}, --output {json,jsonl,repr}
|
|
73
|
+
Output format. Default is json
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### LevelDB
|
|
77
|
+
|
|
78
|
+
```
|
|
79
|
+
$ dfleveldb -h
|
|
80
|
+
usage: dfleveldb [-h] {db,log,ldb,descriptor} ...
|
|
81
|
+
|
|
82
|
+
A cli tool for parsing leveldb files
|
|
83
|
+
|
|
84
|
+
positional arguments:
|
|
85
|
+
{db,log,ldb,descriptor}
|
|
86
|
+
db Parse a directory as leveldb.
|
|
87
|
+
log Parse a leveldb log file.
|
|
88
|
+
ldb Parse a leveldb table (.ldb) file.
|
|
89
|
+
descriptor Parse a leveldb descriptor (MANIFEST) file.
|
|
90
|
+
|
|
91
|
+
options:
|
|
92
|
+
-h, --help show this help message and exit
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
To parse records from a LevelDB log (.log) file, use the following command:
|
|
96
|
+
|
|
97
|
+
```
|
|
98
|
+
$ dfleveldb log -s SOURCE [-o {json,jsonl,repr}] [-t {blocks,physical_records,write_batches,parsed_internal_key}]
|
|
99
|
+
|
|
100
|
+
options:
|
|
101
|
+
-h, --help show this help message and exit
|
|
102
|
+
-s SOURCE, --source SOURCE
|
|
103
|
+
The source leveldb file
|
|
104
|
+
-o {json,jsonl,repr}, --output {json,jsonl,repr}
|
|
105
|
+
Output format. Default is json
|
|
106
|
+
-t {blocks,physical_records,write_batches,parsed_internal_key}, --structure_type {blocks,physical_records,write_batches,parsed_internal_key}
|
|
107
|
+
Parses the specified structure. Default is parsed_internal_key.
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
To parse records from a LevelDB table (.ldb) file, use the following command:
|
|
111
|
+
|
|
112
|
+
```
|
|
113
|
+
$ dfleveldb ldb -s SOURCE [-o {json,jsonl,repr}] [-t {blocks,records}]
|
|
114
|
+
|
|
115
|
+
options:
|
|
116
|
+
-h, --help show this help message and exit
|
|
117
|
+
-s SOURCE, --source SOURCE
|
|
118
|
+
The source leveldb file
|
|
119
|
+
-o {json,jsonl,repr}, --output {json,jsonl,repr}
|
|
120
|
+
Output format. Default is json
|
|
121
|
+
-t {blocks,records}, --structure_type {blocks,records}
|
|
122
|
+
Parses the specified structure. Default is records.
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
To parse version edit records from a Descriptor (MANIFEST) file:
|
|
126
|
+
|
|
127
|
+
```
|
|
128
|
+
$ dfleveldb descriptor -s SOURCE [-o {json,jsonl,repr}] [-t {blocks,physical_records,versionedit} | -v]
|
|
129
|
+
|
|
130
|
+
options:
|
|
131
|
+
-h, --help show this help message and exit
|
|
132
|
+
-s SOURCE, --source SOURCE
|
|
133
|
+
The source leveldb file
|
|
134
|
+
-o {json,jsonl,repr}, --output {json,jsonl,repr}
|
|
135
|
+
Output format. Default is json
|
|
136
|
+
-t {blocks,physical_records,versionedit}, --structure_type {blocks,physical_records,versionedit}
|
|
137
|
+
Parses the specified structure. Default is versionedit.
|
|
138
|
+
-v, --version_history
|
|
139
|
+
Parses the leveldb version history.
|
|
140
|
+
```
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright 2024 Google LLC
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""A CLI tool for dfindexeddb."""
|
|
16
|
+
import argparse
|
|
17
|
+
import dataclasses
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
import json
|
|
20
|
+
import pathlib
|
|
21
|
+
import sys
|
|
22
|
+
import traceback
|
|
23
|
+
|
|
24
|
+
from dfindexeddb import errors
|
|
25
|
+
from dfindexeddb import version
|
|
26
|
+
from dfindexeddb.leveldb import record as leveldb_record
|
|
27
|
+
from dfindexeddb.indexeddb.chromium import record as chromium_record
|
|
28
|
+
from dfindexeddb.indexeddb.chromium import v8
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
_VALID_PRINTABLE_CHARACTERS = (
|
|
32
|
+
' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' +
|
|
33
|
+
'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~.')
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class Encoder(json.JSONEncoder):
|
|
37
|
+
"""A JSON encoder class for dfindexeddb fields."""
|
|
38
|
+
def default(self, o):
|
|
39
|
+
if dataclasses.is_dataclass(o):
|
|
40
|
+
o_dict = dataclasses.asdict(o)
|
|
41
|
+
return o_dict
|
|
42
|
+
if isinstance(o, bytes):
|
|
43
|
+
out = []
|
|
44
|
+
for x in o:
|
|
45
|
+
if chr(x) not in _VALID_PRINTABLE_CHARACTERS:
|
|
46
|
+
out.append(f'\\x{x:02X}')
|
|
47
|
+
else:
|
|
48
|
+
out.append(chr(x))
|
|
49
|
+
return ''.join(out)
|
|
50
|
+
if isinstance(o, datetime):
|
|
51
|
+
return o.isoformat()
|
|
52
|
+
if isinstance(o, v8.Undefined):
|
|
53
|
+
return "<undefined>"
|
|
54
|
+
if isinstance(o, v8.Null):
|
|
55
|
+
return "<null>"
|
|
56
|
+
if isinstance(o, set):
|
|
57
|
+
return list(o)
|
|
58
|
+
if isinstance(o, v8.RegExp):
|
|
59
|
+
return str(o)
|
|
60
|
+
return json.JSONEncoder.default(self, o)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _Output(structure, output):
|
|
64
|
+
"""Helper method to output parsed structure to stdout."""
|
|
65
|
+
if output == 'json':
|
|
66
|
+
print(json.dumps(structure, indent=2, cls=Encoder))
|
|
67
|
+
elif output == 'jsonl':
|
|
68
|
+
print(json.dumps(structure, cls=Encoder))
|
|
69
|
+
elif output == 'repr':
|
|
70
|
+
print(structure)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def IndexeddbCommand(args):
|
|
74
|
+
"""The CLI for processing a log/ldb file as indexeddb."""
|
|
75
|
+
for db_record in leveldb_record.LevelDBRecord.FromDir(args.source):
|
|
76
|
+
record = db_record.record
|
|
77
|
+
try:
|
|
78
|
+
db_record.record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
|
|
79
|
+
record)
|
|
80
|
+
except(
|
|
81
|
+
errors.ParserError,
|
|
82
|
+
errors.DecoderError,
|
|
83
|
+
NotImplementedError) as err:
|
|
84
|
+
print(
|
|
85
|
+
(f'Error parsing blink value: {err} for {record.__class__.__name__} '
|
|
86
|
+
f'at offset {record.offset} in {db_record.path}'), file=sys.stderr)
|
|
87
|
+
print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
|
|
88
|
+
_Output(db_record, output=args.output)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def App():
|
|
92
|
+
"""The CLI app entrypoint for dfindexeddb."""
|
|
93
|
+
parser = argparse.ArgumentParser(
|
|
94
|
+
prog='dfindexeddb',
|
|
95
|
+
description='A cli tool for parsing indexeddb files',
|
|
96
|
+
epilog=f'Version {version.GetVersion()}')
|
|
97
|
+
parser.add_argument(
|
|
98
|
+
'-s', '--source', required=True, type=pathlib.Path,
|
|
99
|
+
help='The source leveldb folder')
|
|
100
|
+
parser.add_argument(
|
|
101
|
+
'-o',
|
|
102
|
+
'--output',
|
|
103
|
+
choices=[
|
|
104
|
+
'json',
|
|
105
|
+
'jsonl',
|
|
106
|
+
'repr'],
|
|
107
|
+
default='json',
|
|
108
|
+
help='Output format. Default is json')
|
|
109
|
+
parser.set_defaults(func=IndexeddbCommand)
|
|
110
|
+
|
|
111
|
+
args = parser.parse_args()
|
|
112
|
+
args.func(args)
|
|
File without changes
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright 2024 Google LLC
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""A CLI tool for leveldb files."""
|
|
16
|
+
import argparse
|
|
17
|
+
import dataclasses
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
import json
|
|
20
|
+
import pathlib
|
|
21
|
+
|
|
22
|
+
from dfindexeddb import version
|
|
23
|
+
from dfindexeddb.leveldb import descriptor
|
|
24
|
+
from dfindexeddb.leveldb import ldb
|
|
25
|
+
from dfindexeddb.leveldb import log
|
|
26
|
+
from dfindexeddb.leveldb import record
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
_VALID_PRINTABLE_CHARACTERS = (
|
|
30
|
+
' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' +
|
|
31
|
+
'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~.')
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Encoder(json.JSONEncoder):
|
|
35
|
+
"""A JSON encoder class for dfleveldb fields."""
|
|
36
|
+
|
|
37
|
+
def default(self, o):
|
|
38
|
+
"""Returns a serializable object for o."""
|
|
39
|
+
if dataclasses.is_dataclass(o):
|
|
40
|
+
o_dict = dataclasses.asdict(o)
|
|
41
|
+
return o_dict
|
|
42
|
+
if isinstance(o, bytes):
|
|
43
|
+
out = []
|
|
44
|
+
for x in o:
|
|
45
|
+
if chr(x) not in _VALID_PRINTABLE_CHARACTERS:
|
|
46
|
+
out.append(f'\\x{x:02X}')
|
|
47
|
+
else:
|
|
48
|
+
out.append(chr(x))
|
|
49
|
+
return ''.join(out)
|
|
50
|
+
if isinstance(o, datetime):
|
|
51
|
+
return o.isoformat()
|
|
52
|
+
if isinstance(o, set):
|
|
53
|
+
return list(o)
|
|
54
|
+
return json.JSONEncoder.default(self, o)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _Output(structure, output):
|
|
58
|
+
"""Helper method to output parsed structure to stdout."""
|
|
59
|
+
if output == 'json':
|
|
60
|
+
print(json.dumps(structure, indent=2, cls=Encoder))
|
|
61
|
+
elif output == 'jsonl':
|
|
62
|
+
print(json.dumps(structure, cls=Encoder))
|
|
63
|
+
elif output == 'repr':
|
|
64
|
+
print(structure)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def DbCommand(args):
|
|
68
|
+
"""The CLI for processing leveldb folders."""
|
|
69
|
+
for rec in record.LevelDBRecord.FromDir(args.source):
|
|
70
|
+
_Output(rec, output=args.output)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def LdbCommand(args):
|
|
74
|
+
"""The CLI for processing ldb files."""
|
|
75
|
+
ldb_file = ldb.FileReader(args.source)
|
|
76
|
+
|
|
77
|
+
if args.structure_type == 'blocks':
|
|
78
|
+
# Prints block information.
|
|
79
|
+
for block in ldb_file.GetBlocks():
|
|
80
|
+
_Output(block, output=args.output)
|
|
81
|
+
|
|
82
|
+
elif args.structure_type == 'records' or not args.structure_type:
|
|
83
|
+
# Prints key value record information.
|
|
84
|
+
for key_value_record in ldb_file.GetKeyValueRecords():
|
|
85
|
+
_Output(key_value_record, output=args.output)
|
|
86
|
+
|
|
87
|
+
else:
|
|
88
|
+
print(f'{args.structure_type} is not supported for ldb files.')
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def LogCommand(args):
|
|
92
|
+
"""The CLI for processing log files."""
|
|
93
|
+
log_file = log.FileReader(args.source)
|
|
94
|
+
|
|
95
|
+
if args.structure_type == 'blocks':
|
|
96
|
+
# Prints block information.
|
|
97
|
+
for block in log_file.GetBlocks():
|
|
98
|
+
_Output(block, output=args.output)
|
|
99
|
+
|
|
100
|
+
elif args.structure_type == 'physical_records':
|
|
101
|
+
# Prints log file physical record information.
|
|
102
|
+
for log_file_record in log_file.GetPhysicalRecords():
|
|
103
|
+
_Output(log_file_record, output=args.output)
|
|
104
|
+
|
|
105
|
+
elif args.structure_type == 'write_batches':
|
|
106
|
+
# Prints log file batch information.
|
|
107
|
+
for batch in log_file.GetWriteBatches():
|
|
108
|
+
_Output(batch, output=args.output)
|
|
109
|
+
|
|
110
|
+
elif (args.structure_type in ('parsed_internal_key', 'records')
|
|
111
|
+
or not args.structure_type):
|
|
112
|
+
# Prints key value record information.
|
|
113
|
+
for internal_key_record in log_file.GetParsedInternalKeys():
|
|
114
|
+
_Output(internal_key_record, output=args.output)
|
|
115
|
+
|
|
116
|
+
else:
|
|
117
|
+
print(f'{args.structure_type} is not supported for log files.')
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def DescriptorCommand(args):
|
|
121
|
+
"""The CLI for processing descriptor (MANIFEST) files."""
|
|
122
|
+
manifest_file = descriptor.FileReader(args.source)
|
|
123
|
+
|
|
124
|
+
if args.version_history:
|
|
125
|
+
for levels in manifest_file.GetVersions():
|
|
126
|
+
_Output(levels, output=args.output)
|
|
127
|
+
|
|
128
|
+
elif args.structure_type == 'blocks':
|
|
129
|
+
# Prints block information.
|
|
130
|
+
for block in manifest_file.GetBlocks():
|
|
131
|
+
_Output(block, output=args.output)
|
|
132
|
+
|
|
133
|
+
elif args.structure_type == 'physical_records':
|
|
134
|
+
# Prints log file physical record information.
|
|
135
|
+
for log_file_record in manifest_file.GetPhysicalRecords():
|
|
136
|
+
_Output(log_file_record, output=args.output)
|
|
137
|
+
|
|
138
|
+
elif (args.structure_type == 'versionedit'
|
|
139
|
+
or not args.structure_type):
|
|
140
|
+
for version_edit in manifest_file.GetVersionEdits():
|
|
141
|
+
_Output(version_edit, output=args.output)
|
|
142
|
+
|
|
143
|
+
else:
|
|
144
|
+
print(f'{args.structure_type} is not supported for descriptor files.')
|
|
145
|
+
|
|
146
|
+
def App():
|
|
147
|
+
"""The CLI app entrypoint for parsing leveldb files."""
|
|
148
|
+
parser = argparse.ArgumentParser(
|
|
149
|
+
prog='dfleveldb',
|
|
150
|
+
description='A cli tool for parsing leveldb files',
|
|
151
|
+
epilog=f'Version {version.GetVersion()}')
|
|
152
|
+
|
|
153
|
+
subparsers = parser.add_subparsers()
|
|
154
|
+
|
|
155
|
+
parser_db = subparsers.add_parser(
|
|
156
|
+
'db', help='Parse a directory as leveldb.')
|
|
157
|
+
parser_db.add_argument(
|
|
158
|
+
'-s', '--source',
|
|
159
|
+
required=True,
|
|
160
|
+
type=pathlib.Path,
|
|
161
|
+
help='The source leveldb directory')
|
|
162
|
+
parser_db.add_argument(
|
|
163
|
+
'-o',
|
|
164
|
+
'--output',
|
|
165
|
+
choices=[
|
|
166
|
+
'json',
|
|
167
|
+
'jsonl',
|
|
168
|
+
'repr'],
|
|
169
|
+
default='json',
|
|
170
|
+
help='Output format. Default is json')
|
|
171
|
+
|
|
172
|
+
parser_log = subparsers.add_parser(
|
|
173
|
+
'log', help='Parse a leveldb log file.')
|
|
174
|
+
parser_log.add_argument(
|
|
175
|
+
'-s', '--source',
|
|
176
|
+
required=True,
|
|
177
|
+
type=pathlib.Path,
|
|
178
|
+
help='The source leveldb file')
|
|
179
|
+
parser_log.add_argument(
|
|
180
|
+
'-o',
|
|
181
|
+
'--output',
|
|
182
|
+
choices=[
|
|
183
|
+
'json',
|
|
184
|
+
'jsonl',
|
|
185
|
+
'repr'],
|
|
186
|
+
default='json',
|
|
187
|
+
help='Output format. Default is json')
|
|
188
|
+
parser_log.add_argument(
|
|
189
|
+
'-t',
|
|
190
|
+
'--structure_type',
|
|
191
|
+
choices=[
|
|
192
|
+
'blocks',
|
|
193
|
+
'physical_records',
|
|
194
|
+
'write_batches',
|
|
195
|
+
'parsed_internal_key'],
|
|
196
|
+
help='Parses the specified structure. Default is parsed_internal_key.')
|
|
197
|
+
parser_log.set_defaults(func=LogCommand)
|
|
198
|
+
|
|
199
|
+
parser_ldb = subparsers.add_parser(
|
|
200
|
+
'ldb', help='Parse a leveldb table (.ldb) file.')
|
|
201
|
+
parser_ldb.add_argument(
|
|
202
|
+
'-s', '--source',
|
|
203
|
+
required=True,
|
|
204
|
+
type=pathlib.Path,
|
|
205
|
+
help='The source leveldb file')
|
|
206
|
+
parser_ldb.add_argument(
|
|
207
|
+
'-o',
|
|
208
|
+
'--output',
|
|
209
|
+
choices=[
|
|
210
|
+
'json',
|
|
211
|
+
'jsonl',
|
|
212
|
+
'repr'],
|
|
213
|
+
default='json',
|
|
214
|
+
help='Output format. Default is json')
|
|
215
|
+
parser_ldb.add_argument(
|
|
216
|
+
'-t',
|
|
217
|
+
'--structure_type',
|
|
218
|
+
choices=[
|
|
219
|
+
'blocks',
|
|
220
|
+
'records'],
|
|
221
|
+
help='Parses the specified structure. Default is records.')
|
|
222
|
+
parser_ldb.set_defaults(func=LdbCommand)
|
|
223
|
+
|
|
224
|
+
parser_descriptor = subparsers.add_parser(
|
|
225
|
+
'descriptor', help='Parse a leveldb descriptor (MANIFEST) file.')
|
|
226
|
+
parser_descriptor.add_argument(
|
|
227
|
+
'-s', '--source',
|
|
228
|
+
required=True,
|
|
229
|
+
type=pathlib.Path,
|
|
230
|
+
help='The source leveldb file')
|
|
231
|
+
parser_descriptor.add_argument(
|
|
232
|
+
'-o',
|
|
233
|
+
'--output',
|
|
234
|
+
choices=[
|
|
235
|
+
'json',
|
|
236
|
+
'jsonl',
|
|
237
|
+
'repr'],
|
|
238
|
+
default='json',
|
|
239
|
+
help='Output format. Default is json')
|
|
240
|
+
db_group = parser_descriptor.add_mutually_exclusive_group()
|
|
241
|
+
db_group.add_argument(
|
|
242
|
+
'-t',
|
|
243
|
+
'--structure_type',
|
|
244
|
+
choices=[
|
|
245
|
+
'blocks', 'physical_records', 'versionedit'],
|
|
246
|
+
help='Parses the specified structure. Default is versionedit.')
|
|
247
|
+
db_group.add_argument(
|
|
248
|
+
'-v',
|
|
249
|
+
'--version_history',
|
|
250
|
+
action='store_true',
|
|
251
|
+
help='Parses the leveldb version history.'
|
|
252
|
+
)
|
|
253
|
+
parser_descriptor.set_defaults(func=DescriptorCommand)
|
|
254
|
+
|
|
255
|
+
args = parser.parse_args()
|
|
256
|
+
|
|
257
|
+
if not hasattr(args, 'func'):
|
|
258
|
+
parser.print_usage()
|
|
259
|
+
else:
|
|
260
|
+
args.func(args)
|