archae 2026.2.1__tar.gz → 2026.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {archae-2026.2.1 → archae-2026.3}/PKG-INFO +4 -3
- {archae-2026.2.1 → archae-2026.3}/README.md +3 -2
- {archae-2026.2.1 → archae-2026.3}/pyproject.toml +1 -1
- {archae-2026.2.1 → archae-2026.3}/src/archae/config.py +3 -0
- {archae-2026.2.1 → archae-2026.3}/src/archae/default_settings.toml +1 -0
- {archae-2026.2.1 → archae-2026.3}/src/archae/extractor.py +130 -56
- {archae-2026.2.1 → archae-2026.3}/src/archae/options.yaml +7 -0
- {archae-2026.2.1 → archae-2026.3}/src/archae/util/archiver/peazip.py +9 -2
- {archae-2026.2.1 → archae-2026.3}/src/archae/util/archiver/seven_zip.py +17 -3
- {archae-2026.2.1 → archae-2026.3}/src/archae/util/archiver/unar.py +10 -3
- {archae-2026.2.1 → archae-2026.3}/src/archae/util/file_tracker.py +14 -3
- archae-2026.3/src/archae/util/lists.py +58 -0
- {archae-2026.2.1 → archae-2026.3}/LICENSE +0 -0
- {archae-2026.2.1 → archae-2026.3}/src/archae/__init__.py +0 -0
- {archae-2026.2.1 → archae-2026.3}/src/archae/__main__.py +0 -0
- {archae-2026.2.1 → archae-2026.3}/src/archae/cli.py +0 -0
- {archae-2026.2.1 → archae-2026.3}/src/archae/py.typed +0 -0
- {archae-2026.2.1 → archae-2026.3}/src/archae/util/__init__.py +0 -0
- {archae-2026.2.1 → archae-2026.3}/src/archae/util/archiver/__init__.py +0 -0
- {archae-2026.2.1 → archae-2026.3}/src/archae/util/archiver/base_archiver.py +0 -0
- {archae-2026.2.1 → archae-2026.3}/src/archae/util/converter/file_size.py +0 -0
- {archae-2026.2.1 → archae-2026.3}/src/archae/util/enum/__init__.py +0 -0
- {archae-2026.2.1 → archae-2026.3}/src/archae/util/enum/byte_scale.py +0 -0
- {archae-2026.2.1 → archae-2026.3}/src/archae/util/tool_manager.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: archae
|
|
3
|
-
Version: 2026.
|
|
3
|
+
Version: 2026.3
|
|
4
4
|
Summary: Archae explodes archives.
|
|
5
5
|
Keywords:
|
|
6
6
|
Author: Shawn McNaughton
|
|
@@ -73,8 +73,8 @@ Archae explodes archives.
|
|
|
73
73
|
Every once and a while, I run into an issue: multiple layers of archives. The reasons vary, but examples would include:
|
|
74
74
|
|
|
75
75
|
- Searching for something in a ZIP of folders that contained a ZIP with a CD image in it
|
|
76
|
-
- Running a malware scan and finding an obscure archive format was missed and not even flagged
|
|
77
|
-
|
|
76
|
+
- Running a malware scan and finding an obscure archive format was missed and not even flagged.
|
|
77
|
+
- Meanwhile, I want to make sure I don't fill my disk, especially if an archive bomb (more commonly known as a ZIP bomb) has been jammed in somewhere. They're only funny the first time. :D
|
|
78
78
|
|
|
79
79
|
## Features
|
|
80
80
|
|
|
@@ -88,6 +88,7 @@ Every once and a while, I run into an issue: multiple layers of archives. The re
|
|
|
88
88
|
- MIN_ARCHIVE_RATIO - ensures very-high-compression-ratio archives are stopped
|
|
89
89
|
- MIN_DISK_FREE_SPACE - minimum free space at the extraction location
|
|
90
90
|
- MAX_DEPTH - allow setting a maximum archive depth to traverse
|
|
91
|
+
- DELETE_ARCHIVES_AFTER_EXTRACTION - delete pure archive types after deletion
|
|
91
92
|
|
|
92
93
|
## Installation
|
|
93
94
|
|
|
@@ -20,8 +20,8 @@ Archae explodes archives.
|
|
|
20
20
|
Every once and a while, I run into an issue: multiple layers of archives. The reasons vary, but examples would include:
|
|
21
21
|
|
|
22
22
|
- Searching for something in a ZIP of folders that contained a ZIP with a CD image in it
|
|
23
|
-
- Running a malware scan and finding an obscure archive format was missed and not even flagged
|
|
24
|
-
|
|
23
|
+
- Running a malware scan and finding an obscure archive format was missed and not even flagged.
|
|
24
|
+
- Meanwhile, I want to make sure I don't fill my disk, especially if an archive bomb (more commonly known as a ZIP bomb) has been jammed in somewhere. They're only funny the first time. :D
|
|
25
25
|
|
|
26
26
|
## Features
|
|
27
27
|
|
|
@@ -35,6 +35,7 @@ Every once and a while, I run into an issue: multiple layers of archives. The re
|
|
|
35
35
|
- MIN_ARCHIVE_RATIO - ensures very-high-compression-ratio archives are stopped
|
|
36
36
|
- MIN_DISK_FREE_SPACE - minimum free space at the extraction location
|
|
37
37
|
- MAX_DEPTH - allow setting a maximum archive depth to traverse
|
|
38
|
+
- DELETE_ARCHIVES_AFTER_EXTRACTION - delete pure archive types after deletion
|
|
38
39
|
|
|
39
40
|
## Installation
|
|
40
41
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Runtime config management (default, userconfig and envvars)."""
|
|
2
2
|
|
|
3
|
+
import ast
|
|
3
4
|
import importlib
|
|
4
5
|
import typing
|
|
5
6
|
from pathlib import Path
|
|
@@ -66,6 +67,8 @@ def get_converter(converter_def: str) -> typing.Callable:
|
|
|
66
67
|
return float
|
|
67
68
|
if converter_def == "int":
|
|
68
69
|
return int
|
|
70
|
+
if converter_def == "bool":
|
|
71
|
+
return ast.literal_eval
|
|
69
72
|
|
|
70
73
|
# Split the definition into module path and class name
|
|
71
74
|
module_name, class_name = converter_def.split(":")
|
|
@@ -17,6 +17,7 @@ if TYPE_CHECKING:
|
|
|
17
17
|
from pathlib import Path
|
|
18
18
|
|
|
19
19
|
from archae.util.archiver.base_archiver import BaseArchiver
|
|
20
|
+
from archae.util.lists import skip_delete_extensions, skip_delete_mimetypes
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
class WarningAccumulator(logging.Handler):
|
|
@@ -79,75 +80,75 @@ class ArchiveExtractor:
|
|
|
79
80
|
file_size_bytes = file_path.stat().st_size
|
|
80
81
|
self.file_tracker.track_file(base_hash, file_size_bytes)
|
|
81
82
|
self.file_tracker.track_file_path(base_hash, file_path)
|
|
82
|
-
self.file_tracker.
|
|
83
|
-
|
|
84
|
-
)
|
|
85
|
-
self.file_tracker.add_metadata_to_hash(
|
|
83
|
+
self.file_tracker.add_metadata(base_hash, "type", magic.from_file(file_path))
|
|
84
|
+
self.file_tracker.add_metadata(
|
|
86
85
|
base_hash, "type_mime", magic.from_file(file_path, mime=True)
|
|
87
86
|
)
|
|
88
87
|
extension = file_path.suffix.lstrip(".").lower()
|
|
89
|
-
self.file_tracker.
|
|
88
|
+
self.file_tracker.add_metadata(base_hash, "extension", extension)
|
|
90
89
|
is_file_archive = self._is_archive(base_hash)
|
|
91
|
-
self.file_tracker.
|
|
90
|
+
self.file_tracker.add_metadata(base_hash, "is_archive", is_file_archive)
|
|
92
91
|
if is_file_archive:
|
|
93
92
|
settings_dict = get_settings()
|
|
94
93
|
if settings_dict["MAX_DEPTH"] == 0 or depth < settings_dict["MAX_DEPTH"]:
|
|
95
94
|
archiver = self._get_archiver_for_file(base_hash)
|
|
96
|
-
if archiver:
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
95
|
+
if not archiver:
|
|
96
|
+
logger.warning(
|
|
97
|
+
"NO_ARCHIVER: No suitable archiver found for file: %s",
|
|
98
|
+
file_path,
|
|
100
99
|
)
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
100
|
+
return
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
extracted_size = archiver.get_archive_uncompressed_size(file_path)
|
|
104
|
+
except RuntimeError as e:
|
|
105
|
+
logger.warning(
|
|
106
|
+
"SIZE_RETRIEVAL_FAILED: Could not retrieve size for archive %s: %s",
|
|
107
|
+
file_path,
|
|
108
|
+
str(e),
|
|
104
109
|
)
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
extracted_size,
|
|
120
|
-
self.file_tracker.get_tracked_file_size(),
|
|
121
|
-
settings_dict["MAX_TOTAL_SIZE_BYTES"],
|
|
122
|
-
)
|
|
123
|
-
elif compression_ratio < settings_dict["MIN_ARCHIVE_RATIO"]:
|
|
124
|
-
logger.warning(
|
|
125
|
-
"MIN_ARCHIVE_RATIO: Skipped archive %s because compression ratio %.5f is less than MIN_ARCHIVE_RATIO %s",
|
|
110
|
+
return
|
|
111
|
+
self.file_tracker.add_metadata(
|
|
112
|
+
base_hash, "extracted_size", extracted_size
|
|
113
|
+
)
|
|
114
|
+
compression_ratio = extracted_size / file_size_bytes
|
|
115
|
+
self.file_tracker.add_metadata(
|
|
116
|
+
base_hash, "overall_compression_ratio", compression_ratio
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
if self.__should_extract_archive(base_hash, file_path):
|
|
120
|
+
try:
|
|
121
|
+
extraction_dir = self.extract_dir / base_hash
|
|
122
|
+
logger.info(
|
|
123
|
+
"Extracting archive %s to %s",
|
|
126
124
|
file_path,
|
|
127
|
-
|
|
128
|
-
settings_dict["MIN_ARCHIVE_RATIO"],
|
|
125
|
+
extraction_dir,
|
|
129
126
|
)
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
< settings_dict["MIN_DISK_FREE_SPACE"]
|
|
133
|
-
):
|
|
127
|
+
archiver.extract_archive(file_path, extraction_dir)
|
|
128
|
+
except RuntimeError as e:
|
|
134
129
|
logger.warning(
|
|
135
|
-
"
|
|
130
|
+
"EXTRACTION_FAILED: Extraction failed for archive %s: %s",
|
|
136
131
|
file_path,
|
|
137
|
-
|
|
138
|
-
self.extract_dir,
|
|
132
|
+
str(e),
|
|
139
133
|
)
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
134
|
+
return
|
|
135
|
+
child_files = self._list_child_files(extraction_dir)
|
|
136
|
+
for child_file in child_files:
|
|
137
|
+
self.__handle_file(child_file, depth + 1)
|
|
138
|
+
|
|
139
|
+
if self.__should_delete_archive(base_hash, file_path):
|
|
140
|
+
try:
|
|
141
|
+
file_path.unlink()
|
|
142
|
+
logger.info(
|
|
143
|
+
"Deleted archive %s after extraction as per settings.",
|
|
144
|
+
file_path,
|
|
145
|
+
)
|
|
146
|
+
except (PermissionError, OSError) as e:
|
|
147
|
+
logger.warning(
|
|
148
|
+
"DELETE_FAILED: Could not delete archive %s after extraction: %s",
|
|
149
|
+
file_path,
|
|
150
|
+
str(e),
|
|
151
|
+
)
|
|
151
152
|
else:
|
|
152
153
|
logger.warning(
|
|
153
154
|
"MAX_DEPTH: File %s is not extracted; max depth reached.", file_path
|
|
@@ -163,7 +164,7 @@ class ArchiveExtractor:
|
|
|
163
164
|
bool: True if the file is an archive, otherwise False.
|
|
164
165
|
|
|
165
166
|
"""
|
|
166
|
-
metadata = self.file_tracker.
|
|
167
|
+
metadata = self.file_tracker.get_file_metadata(file_hash)
|
|
167
168
|
mime_type = metadata.get("type_mime", "").lower()
|
|
168
169
|
extension = metadata.get("extension", "").lower()
|
|
169
170
|
|
|
@@ -182,7 +183,7 @@ class ArchiveExtractor:
|
|
|
182
183
|
Returns:
|
|
183
184
|
str | None: The name of the archiver tool if found, otherwise None.
|
|
184
185
|
"""
|
|
185
|
-
metadata = self.file_tracker.
|
|
186
|
+
metadata = self.file_tracker.get_file_metadata(file_hash)
|
|
186
187
|
mime_type = metadata.get("type_mime", "").lower()
|
|
187
188
|
extension = metadata.get("extension", "").lower()
|
|
188
189
|
|
|
@@ -250,3 +251,76 @@ class ArchiveExtractor:
|
|
|
250
251
|
extractor.apply_settings([("MAX_ARCHIVE_SIZE_BYTES", "5000000000")])
|
|
251
252
|
"""
|
|
252
253
|
apply_options(option_list)
|
|
254
|
+
|
|
255
|
+
def __should_extract_archive(self, file_hash: str, file_path: Path) -> bool:
|
|
256
|
+
"""Determine whether an archive should be extracted based on its metadata and current settings."""
|
|
257
|
+
settings_dict = get_settings()
|
|
258
|
+
metadata = self.file_tracker.get_file_metadata(file_hash)
|
|
259
|
+
extracted_size = metadata.get("extracted_size", 0)
|
|
260
|
+
if extracted_size > settings_dict["MAX_ARCHIVE_SIZE_BYTES"]:
|
|
261
|
+
logger.warning(
|
|
262
|
+
"MAX_ARCHIVE_SIZE_BYTES: Skipped archive %s because expected size %s is greater than MAX_ARCHIVE_SIZE_BYTES %s",
|
|
263
|
+
file_path,
|
|
264
|
+
extracted_size,
|
|
265
|
+
settings_dict["MAX_ARCHIVE_SIZE_BYTES"],
|
|
266
|
+
)
|
|
267
|
+
return False
|
|
268
|
+
|
|
269
|
+
total_extracted = self.file_tracker.get_total_tracked_file_size()
|
|
270
|
+
if total_extracted + extracted_size > settings_dict["MAX_TOTAL_SIZE_BYTES"]:
|
|
271
|
+
logger.warning(
|
|
272
|
+
"MAX_TOTAL_SIZE_BYTES: Skipped archive %s because expected size %s + current tracked files %s is greater than MAX_TOTAL_SIZE_BYTES %s",
|
|
273
|
+
file_path,
|
|
274
|
+
extracted_size,
|
|
275
|
+
total_extracted,
|
|
276
|
+
settings_dict["MAX_TOTAL_SIZE_BYTES"],
|
|
277
|
+
)
|
|
278
|
+
return False
|
|
279
|
+
compression_ratio = metadata.get("overall_compression_ratio", 0)
|
|
280
|
+
if compression_ratio < settings_dict["MIN_ARCHIVE_RATIO"]:
|
|
281
|
+
logger.warning(
|
|
282
|
+
"MIN_ARCHIVE_RATIO: Skipped archive %s because compression ratio %.5f is less than MIN_ARCHIVE_RATIO %s",
|
|
283
|
+
file_path,
|
|
284
|
+
compression_ratio,
|
|
285
|
+
settings_dict["MIN_ARCHIVE_RATIO"],
|
|
286
|
+
)
|
|
287
|
+
return False
|
|
288
|
+
if (
|
|
289
|
+
shutil.disk_usage(self.extract_dir).free - extracted_size
|
|
290
|
+
< settings_dict["MIN_DISK_FREE_SPACE"]
|
|
291
|
+
):
|
|
292
|
+
logger.warning(
|
|
293
|
+
"MIN_DISK_FREE_SPACE: Skipped archive %s because extracting it would leave less than MIN_DISK_FREE_SPACE %s bytes free at extraction location %s",
|
|
294
|
+
file_path,
|
|
295
|
+
settings_dict["MIN_DISK_FREE_SPACE"],
|
|
296
|
+
self.extract_dir,
|
|
297
|
+
)
|
|
298
|
+
return False
|
|
299
|
+
return True
|
|
300
|
+
|
|
301
|
+
def __should_delete_archive(self, file_hash: str, file_path: Path) -> bool:
|
|
302
|
+
"""Determine whether an archive should be deleted after extraction based on its metadata and current settings."""
|
|
303
|
+
settings_dict = get_settings()
|
|
304
|
+
if not settings_dict["DELETE_ARCHIVES_AFTER_EXTRACTION"]:
|
|
305
|
+
return False
|
|
306
|
+
|
|
307
|
+
metadata = self.file_tracker.get_file_metadata(file_hash)
|
|
308
|
+
extension = metadata.get("extension", "").lower()
|
|
309
|
+
if extension in skip_delete_extensions:
|
|
310
|
+
logger.warning(
|
|
311
|
+
"SKIP_DELETE_EXTENSION: Archive %s not deleted after extraction due to its extension '%s' being in the skip list.",
|
|
312
|
+
file_path,
|
|
313
|
+
extension,
|
|
314
|
+
)
|
|
315
|
+
return False
|
|
316
|
+
|
|
317
|
+
mime_type = metadata.get("type_mime", "").lower()
|
|
318
|
+
if mime_type in skip_delete_mimetypes:
|
|
319
|
+
logger.warning(
|
|
320
|
+
"SKIP_DELETE_MIMETYPE: Archive %s not deleted after extraction due to its mime type '%s' being in the skip list.",
|
|
321
|
+
file_path,
|
|
322
|
+
mime_type,
|
|
323
|
+
)
|
|
324
|
+
return False
|
|
325
|
+
|
|
326
|
+
return True
|
|
@@ -31,7 +31,7 @@ class PeazipArchiver(BaseArchiver):
|
|
|
31
31
|
"chi",
|
|
32
32
|
"chq",
|
|
33
33
|
"pptx",
|
|
34
|
-
"pptm
|
|
34
|
+
"pptm",
|
|
35
35
|
"xlsx",
|
|
36
36
|
"xlsm",
|
|
37
37
|
"docx",
|
|
@@ -145,7 +145,14 @@ class PeazipArchiver(BaseArchiver):
|
|
|
145
145
|
str(archive_path),
|
|
146
146
|
str(extract_dir),
|
|
147
147
|
]
|
|
148
|
-
|
|
148
|
+
try:
|
|
149
|
+
subprocess.run(command, check=True, capture_output=True, text=True) # noqa: S603
|
|
150
|
+
except subprocess.CalledProcessError as e:
|
|
151
|
+
msg = (
|
|
152
|
+
f"PeaZip extraction failed for archive {archive_path} "
|
|
153
|
+
f"with exit code {e.returncode}: {e.stderr}"
|
|
154
|
+
)
|
|
155
|
+
raise RuntimeError(msg) from e
|
|
149
156
|
|
|
150
157
|
def get_archive_uncompressed_size(self, archive_path: Path) -> int: # noqa: ARG002
|
|
151
158
|
"""Get the uncompressed size of the contents.
|
|
@@ -46,7 +46,7 @@ class SevenZipArchiver(BaseArchiver):
|
|
|
46
46
|
"zip",
|
|
47
47
|
"zipx",
|
|
48
48
|
"appimage",
|
|
49
|
-
"dmg
|
|
49
|
+
"dmg",
|
|
50
50
|
"img",
|
|
51
51
|
"arj",
|
|
52
52
|
"cpio",
|
|
@@ -176,7 +176,14 @@ class SevenZipArchiver(BaseArchiver):
|
|
|
176
176
|
str(archive_path),
|
|
177
177
|
f"-o{extract_dir!s}",
|
|
178
178
|
]
|
|
179
|
-
|
|
179
|
+
try:
|
|
180
|
+
subprocess.run(command, check=True, capture_output=True, text=True) # noqa: S603
|
|
181
|
+
except subprocess.CalledProcessError as e:
|
|
182
|
+
msg = (
|
|
183
|
+
f"7zip extraction failed for archive {archive_path} "
|
|
184
|
+
f"with exit code {e.returncode}: {e.stderr}"
|
|
185
|
+
)
|
|
186
|
+
raise RuntimeError(msg) from e
|
|
180
187
|
|
|
181
188
|
def get_archive_uncompressed_size(self, archive_path: Path) -> int:
|
|
182
189
|
"""Get the uncompressed size of the contents.
|
|
@@ -188,7 +195,14 @@ class SevenZipArchiver(BaseArchiver):
|
|
|
188
195
|
int: The size of the contents
|
|
189
196
|
"""
|
|
190
197
|
command: list[str] = [str(self.executable_path), "l", "-slt", str(archive_path)]
|
|
191
|
-
|
|
198
|
+
try:
|
|
199
|
+
result = subprocess.run(command, check=True, capture_output=True, text=True) # noqa: S603
|
|
200
|
+
except subprocess.CalledProcessError as e:
|
|
201
|
+
msg = (
|
|
202
|
+
f"7zip size retrieval failed for archive {archive_path} "
|
|
203
|
+
f"with exit code {e.returncode}: {e.stderr}"
|
|
204
|
+
)
|
|
205
|
+
raise RuntimeError(msg) from e
|
|
192
206
|
|
|
193
207
|
result_lines = str(result.stdout).splitlines()
|
|
194
208
|
exploded_size = 0
|
|
@@ -24,9 +24,9 @@ class UnarArchiver(BaseArchiver):
|
|
|
24
24
|
"deb",
|
|
25
25
|
"cab",
|
|
26
26
|
"pptx",
|
|
27
|
-
"pptm
|
|
28
|
-
"xlsx ",
|
|
27
|
+
"pptm",
|
|
29
28
|
"xlsm",
|
|
29
|
+
"xlsx",
|
|
30
30
|
"docx",
|
|
31
31
|
"docm",
|
|
32
32
|
"7z",
|
|
@@ -144,7 +144,14 @@ class UnarArchiver(BaseArchiver):
|
|
|
144
144
|
str(extract_dir),
|
|
145
145
|
str(archive_path),
|
|
146
146
|
]
|
|
147
|
-
|
|
147
|
+
try:
|
|
148
|
+
subprocess.run(command, check=True, capture_output=True, text=True) # noqa: S603
|
|
149
|
+
except subprocess.CalledProcessError as e:
|
|
150
|
+
msg = (
|
|
151
|
+
f"unar extraction failed for archive {archive_path} "
|
|
152
|
+
f"with exit code {e.returncode}: {e.stderr}"
|
|
153
|
+
)
|
|
154
|
+
raise RuntimeError(msg) from e
|
|
148
155
|
|
|
149
156
|
def get_archive_uncompressed_size(self, archive_path: Path) -> int: # noqa: ARG002
|
|
150
157
|
"""Get the uncompressed size of the contents.
|
|
@@ -39,7 +39,18 @@ class FileTracker:
|
|
|
39
39
|
"""
|
|
40
40
|
return file_hash in self.tracked_files
|
|
41
41
|
|
|
42
|
-
def
|
|
42
|
+
def get_file_size(self, file_hash: str) -> int:
|
|
43
|
+
"""Get the size for a tracked file by its hash.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
file_hash (str): The hash of the file.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
int: The size of the tracked file.
|
|
50
|
+
"""
|
|
51
|
+
return self.tracked_files.get(file_hash, {}).get("size", 0)
|
|
52
|
+
|
|
53
|
+
def get_file_metadata(self, file_hash: str) -> dict:
|
|
43
54
|
"""Get metadata for a tracked file by its hash.
|
|
44
55
|
|
|
45
56
|
Args:
|
|
@@ -63,7 +74,7 @@ class FileTracker:
|
|
|
63
74
|
if file_path not in self.tracked_files[file_hash]["paths"]:
|
|
64
75
|
self.tracked_files[file_hash]["paths"].append(file_path)
|
|
65
76
|
|
|
66
|
-
def
|
|
77
|
+
def add_metadata(self, file_hash: str, key: str, value: Any) -> None:
|
|
67
78
|
"""Add metadata to a tracked file.
|
|
68
79
|
|
|
69
80
|
Args:
|
|
@@ -73,7 +84,7 @@ class FileTracker:
|
|
|
73
84
|
"""
|
|
74
85
|
self.tracked_files[file_hash]["metadata"][key] = value
|
|
75
86
|
|
|
76
|
-
def
|
|
87
|
+
def get_total_tracked_file_size(self) -> int:
|
|
77
88
|
"""Get the total size of all tracked files.
|
|
78
89
|
|
|
79
90
|
Returns:
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Helper lists for various purposes."""
|
|
2
|
+
|
|
3
|
+
skip_delete_extensions = [
|
|
4
|
+
"aar",
|
|
5
|
+
"appimage",
|
|
6
|
+
"cab",
|
|
7
|
+
"chi",
|
|
8
|
+
"chm",
|
|
9
|
+
"chq",
|
|
10
|
+
"chw",
|
|
11
|
+
"crx",
|
|
12
|
+
"deb",
|
|
13
|
+
"docm",
|
|
14
|
+
"docx",
|
|
15
|
+
"edb",
|
|
16
|
+
"edp",
|
|
17
|
+
"edr",
|
|
18
|
+
"esd",
|
|
19
|
+
"exe",
|
|
20
|
+
"ipa",
|
|
21
|
+
"iso",
|
|
22
|
+
"lib",
|
|
23
|
+
"msi",
|
|
24
|
+
"nsi",
|
|
25
|
+
"nsis",
|
|
26
|
+
"pptm",
|
|
27
|
+
"pptx",
|
|
28
|
+
"rpm",
|
|
29
|
+
"s7z",
|
|
30
|
+
"sitx",
|
|
31
|
+
"swm",
|
|
32
|
+
"ear",
|
|
33
|
+
"jar",
|
|
34
|
+
"war",
|
|
35
|
+
"xlsm",
|
|
36
|
+
"xlsx",
|
|
37
|
+
"xpi",
|
|
38
|
+
"zipx",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
skip_delete_mimetypes = [
|
|
43
|
+
"application/java-archive",
|
|
44
|
+
"application/vnd.android.package-archive",
|
|
45
|
+
"application/vnd.debian.binary-package",
|
|
46
|
+
"application/vnd.ms-cab-compressed",
|
|
47
|
+
"application/vnd.ms-htmlhelp",
|
|
48
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
49
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
50
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
51
|
+
"application/x-chrome-extension",
|
|
52
|
+
"application/x-iso9660-image",
|
|
53
|
+
"application/x-ole-storage",
|
|
54
|
+
"application/x-rpm",
|
|
55
|
+
"application/x-sitx",
|
|
56
|
+
"application/x-stuffitx",
|
|
57
|
+
"application/x-xpinstall",
|
|
58
|
+
]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|