assemblyline-core 4.5.1.dev145__tar.gz → 4.5.1.dev151__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of assemblyline-core might be problematic. Click here for more details.
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/PKG-INFO +1 -1
- assemblyline-core-4.5.1.dev151/assemblyline_core/VERSION +1 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/archiver/run_archiver.py +4 -3
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/expiry/run_expiry.py +113 -48
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core.egg-info/PKG-INFO +1 -1
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/test/test_dispatcher.py +3 -3
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/test/test_expiry.py +13 -15
- assemblyline-core-4.5.1.dev145/assemblyline_core/VERSION +0 -1
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/LICENCE.md +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/README.md +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/alerter/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/alerter/processing.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/alerter/run_alerter.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/archiver/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/badlist_client.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/dispatching/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/dispatching/__main__.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/dispatching/client.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/dispatching/dispatcher.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/dispatching/schedules.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/dispatching/timeout.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/expiry/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/ingester/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/ingester/__main__.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/ingester/constants.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/ingester/ingester.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/metrics/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/metrics/es_metrics.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/metrics/heartbeat_formatter.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/metrics/helper.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/metrics/metrics_server.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/metrics/run_heartbeat_manager.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/metrics/run_metrics_aggregator.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/metrics/run_statistics_aggregator.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/plumber/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/plumber/run_plumber.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/replay/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/replay/client.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/replay/creator/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/replay/creator/run.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/replay/creator/run_worker.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/replay/loader/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/replay/loader/run.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/replay/loader/run_worker.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/replay/replay.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/safelist_client.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/scaler/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/scaler/collection.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/scaler/controllers/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/scaler/controllers/docker_ctl.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/scaler/controllers/interface.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/scaler/controllers/kubernetes_ctl.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/scaler/run_scaler.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/scaler/scaler_server.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/server_base.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/signature_client.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/submission_client.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/tasking_client.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/updater/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/updater/helper.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/updater/run_updater.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/vacuum/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/vacuum/crawler.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/vacuum/department_map.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/vacuum/safelist.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/vacuum/stream_map.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/vacuum/worker.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/workflow/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/workflow/run_workflow.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core.egg-info/SOURCES.txt +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core.egg-info/dependency_links.txt +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core.egg-info/requires.txt +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core.egg-info/top_level.txt +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/setup.cfg +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/setup.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/test/test_alerter.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/test/test_badlist_client.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/test/test_plumber.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/test/test_replay.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/test/test_safelist_client.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/test/test_scaler.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/test/test_scheduler.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/test/test_signature_client.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/test/test_simulation.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/test/test_vacuum.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/test/test_worker_ingest.py +0 -0
- {assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/test/test_worker_submit.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
4.5.1.dev151
|
|
@@ -64,7 +64,7 @@ class Archiver(ServerBase):
|
|
|
64
64
|
try:
|
|
65
65
|
if len(message) == 3:
|
|
66
66
|
archive_type, type_id, delete_after = message
|
|
67
|
-
metadata =
|
|
67
|
+
metadata = {}
|
|
68
68
|
use_alternate_dtl = False
|
|
69
69
|
elif len(message) == 4:
|
|
70
70
|
archive_type, type_id, delete_after, metadata = message
|
|
@@ -90,8 +90,9 @@ class Archiver(ServerBase):
|
|
|
90
90
|
submission, version = self.datastore.submission.get_if_exists(type_id, version=True)
|
|
91
91
|
|
|
92
92
|
# If we have metadata passed in the message, we need to apply it before archiving the submission
|
|
93
|
-
if metadata and self.config.
|
|
94
|
-
submission.metadata.update({
|
|
93
|
+
if metadata and self.config.submission.metadata.archive:
|
|
94
|
+
submission.metadata.update({k: v for k, v in metadata.items()
|
|
95
|
+
if k not in submission.metadata})
|
|
95
96
|
self.datastore.submission.save(type_id, submission, version=version)
|
|
96
97
|
|
|
97
98
|
break
|
|
@@ -4,38 +4,48 @@ from __future__ import annotations
|
|
|
4
4
|
import concurrent.futures
|
|
5
5
|
import threading
|
|
6
6
|
import functools
|
|
7
|
-
import elasticapm
|
|
8
7
|
import time
|
|
9
|
-
|
|
10
8
|
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, Future, as_completed
|
|
11
9
|
from concurrent.futures.process import BrokenProcessPool
|
|
12
|
-
from datemath import dm
|
|
13
10
|
from typing import Callable, Optional, TYPE_CHECKING
|
|
14
11
|
|
|
15
|
-
|
|
16
|
-
from
|
|
12
|
+
import elasticapm
|
|
13
|
+
from datemath import dm
|
|
14
|
+
|
|
17
15
|
from assemblyline_core.server_base import ServerBase
|
|
18
16
|
from assemblyline_core.dispatching.dispatcher import BAD_SID_HASH
|
|
19
17
|
from assemblyline.common import forge
|
|
18
|
+
from assemblyline.common.isotime import epoch_to_iso, now_as_iso
|
|
20
19
|
from assemblyline.common.metrics import MetricsFactory
|
|
21
20
|
from assemblyline.filestore import FileStore
|
|
22
21
|
from assemblyline.odm.messages.expiry_heartbeat import Metrics
|
|
23
22
|
from assemblyline.remote.datatypes import get_client
|
|
23
|
+
from assemblyline.datastore.collection import Index
|
|
24
24
|
from assemblyline.remote.datatypes.set import Set
|
|
25
25
|
|
|
26
26
|
if TYPE_CHECKING:
|
|
27
27
|
from assemblyline.datastore.collection import ESCollection
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
def file_delete_worker(logger, filestore_urls, file_batch) -> list[str]:
|
|
30
|
+
def file_delete_worker(logger, filestore_urls, file_batch, archive_filestore_urls=None) -> list[tuple[str, bool]]:
|
|
31
31
|
try:
|
|
32
32
|
filestore = FileStore(*filestore_urls)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
33
|
+
if archive_filestore_urls and filestore_urls != archive_filestore_urls:
|
|
34
|
+
archivestore = FileStore(*archive_filestore_urls)
|
|
35
|
+
else:
|
|
36
|
+
archivestore = filestore
|
|
37
|
+
|
|
38
|
+
def filestore_delete(item: tuple[str, bool]) -> tuple[Optional[str], Optional[bool]]:
|
|
39
|
+
sha256, from_archive = item
|
|
40
|
+
if from_archive:
|
|
41
|
+
archivestore.delete(sha256)
|
|
42
|
+
if not archivestore.exists(sha256):
|
|
43
|
+
return sha256, True
|
|
44
|
+
else:
|
|
45
|
+
filestore.delete(sha256)
|
|
46
|
+
if not filestore.exists(sha256):
|
|
47
|
+
return sha256, False
|
|
48
|
+
return None, None
|
|
39
49
|
|
|
40
50
|
return _file_delete_worker(logger, filestore_delete, file_batch)
|
|
41
51
|
|
|
@@ -44,8 +54,11 @@ def file_delete_worker(logger, filestore_urls, file_batch) -> list[str]:
|
|
|
44
54
|
return []
|
|
45
55
|
|
|
46
56
|
|
|
47
|
-
|
|
48
|
-
|
|
57
|
+
ActionSignature = Callable[[tuple[str, bool]], tuple[Optional[str], Optional[bool]]]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _file_delete_worker(logger, delete_action: ActionSignature, file_batch) -> list[tuple[str, bool]]:
|
|
61
|
+
finished_files: list[tuple[str, bool]] = []
|
|
49
62
|
try:
|
|
50
63
|
futures = []
|
|
51
64
|
|
|
@@ -55,9 +68,9 @@ def _file_delete_worker(logger, delete_action: Callable[[str], Optional[str]], f
|
|
|
55
68
|
|
|
56
69
|
for future in as_completed(futures):
|
|
57
70
|
try:
|
|
58
|
-
erased_name = future.result()
|
|
59
|
-
if erased_name:
|
|
60
|
-
finished_files.append(erased_name)
|
|
71
|
+
erased_name, from_archive = future.result()
|
|
72
|
+
if erased_name and from_archive is not None:
|
|
73
|
+
finished_files.append((erased_name, from_archive))
|
|
61
74
|
except Exception as error:
|
|
62
75
|
logger.exception("Error in filestore worker: " + str(error))
|
|
63
76
|
|
|
@@ -67,17 +80,29 @@ def _file_delete_worker(logger, delete_action: Callable[[str], Optional[str]], f
|
|
|
67
80
|
|
|
68
81
|
|
|
69
82
|
class ExpiryManager(ServerBase):
|
|
70
|
-
def __init__(self, redis_persist=None):
|
|
71
|
-
self.config = forge.get_config()
|
|
83
|
+
def __init__(self, redis_persist=None, datastore=None, filestore=None, config=None, classification=None):
|
|
84
|
+
self.config = config or forge.get_config()
|
|
72
85
|
|
|
73
86
|
super().__init__('assemblyline.expiry', shutdown_timeout=self.config.core.expiry.sleep_time + 5)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
self.
|
|
87
|
+
|
|
88
|
+
# Set Archive related configs
|
|
89
|
+
if self.config.datastore.archive.enabled:
|
|
90
|
+
self.archive_access = True
|
|
91
|
+
self.index_type = Index.HOT_AND_ARCHIVE
|
|
92
|
+
else:
|
|
93
|
+
self.archive_access = False
|
|
94
|
+
self.index_type = Index.HOT
|
|
95
|
+
|
|
96
|
+
self.datastore = datastore or forge.get_datastore(config=self.config, archive_access=self.archive_access)
|
|
97
|
+
self.filestore = filestore or forge.get_filestore(config=self.config)
|
|
98
|
+
self.classification = classification or forge.get_classification()
|
|
77
99
|
self.expirable_collections: list[ESCollection] = []
|
|
78
100
|
self.counter = MetricsFactory('expiry', Metrics)
|
|
79
101
|
self.file_delete_worker = ProcessPoolExecutor(self.config.core.expiry.delete_workers)
|
|
80
|
-
|
|
102
|
+
if self.config.filestore.archive:
|
|
103
|
+
self.same_storage = self.config.filestore.storage == self.config.filestore.archive
|
|
104
|
+
else:
|
|
105
|
+
self.same_storage = True
|
|
81
106
|
self.current_submission_cleanup = set()
|
|
82
107
|
|
|
83
108
|
self.redis_persist = redis_persist or get_client(
|
|
@@ -127,14 +152,15 @@ class ExpiryManager(ServerBase):
|
|
|
127
152
|
def filestore_delete(self, file_batch, _):
|
|
128
153
|
return self.file_delete_worker.submit(file_delete_worker, logger=self.log,
|
|
129
154
|
filestore_urls=list(self.config.filestore.storage),
|
|
130
|
-
file_batch=file_batch
|
|
155
|
+
file_batch=file_batch,
|
|
156
|
+
archive_filestore_urls=list(self.config.filestore.archive))
|
|
131
157
|
|
|
132
158
|
def cachestore_delete(self, file_batch, _):
|
|
133
159
|
return self.file_delete_worker.submit(file_delete_worker, logger=self.log,
|
|
134
160
|
filestore_urls=list(self.config.filestore.cache),
|
|
135
161
|
file_batch=file_batch)
|
|
136
162
|
|
|
137
|
-
def _finish_delete(self, collection: ESCollection, task: Future, expire_only: list[str]):
|
|
163
|
+
def _finish_delete(self, collection: ESCollection, task: Future, expire_only: list[tuple[str, bool]]):
|
|
138
164
|
# Wait until the worker process finishes deleting files
|
|
139
165
|
file_list: list[str] = []
|
|
140
166
|
while self.running:
|
|
@@ -145,25 +171,41 @@ class ExpiryManager(ServerBase):
|
|
|
145
171
|
except concurrent.futures.TimeoutError:
|
|
146
172
|
pass
|
|
147
173
|
|
|
148
|
-
file_list
|
|
149
|
-
|
|
150
|
-
# build a batch delete job for all the removed files
|
|
151
|
-
bulk = collection.get_bulk_plan()
|
|
152
|
-
for sha256 in file_list:
|
|
153
|
-
bulk.add_delete_operation(sha256)
|
|
154
|
-
|
|
155
|
-
if len(file_list) > 0:
|
|
174
|
+
if file_list:
|
|
156
175
|
self.log.info(f'[{collection.name}] Deleted associated files from the '
|
|
157
176
|
f'{"cachestore" if "cache" in collection.name else "filestore"}...')
|
|
158
|
-
collection.bulk(bulk)
|
|
159
|
-
self.counter.increment(f'{collection.name}', increment_by=len(file_list))
|
|
160
|
-
self.log.info(f"[{collection.name}] Deleted {len(file_list)} items from the datastore...")
|
|
161
177
|
else:
|
|
178
|
+
self.log.info(f'[{collection.name}] Nothing was deleted from the '
|
|
179
|
+
f'{"cachestore" if "cache" in collection.name else "filestore"}...')
|
|
180
|
+
|
|
181
|
+
# From the files to be deleted, check which are from the hot index
|
|
182
|
+
hot_file_list = [x[0] for x in file_list if not x[1]]
|
|
183
|
+
hot_file_list.extend([x[0] for x in expire_only if not x[1]])
|
|
184
|
+
|
|
185
|
+
# From the files to be deleted, check which are from the archive index
|
|
186
|
+
archive_file_list = [x[0] for x in file_list if x[1]]
|
|
187
|
+
archive_file_list.extend([x[0] for x in expire_only if x[1]])
|
|
188
|
+
|
|
189
|
+
for cur_file_list, index_type in [(hot_file_list, Index.HOT), (archive_file_list, Index.ARCHIVE)]:
|
|
190
|
+
if not cur_file_list:
|
|
191
|
+
# Nothing to delete from this index type
|
|
192
|
+
continue
|
|
193
|
+
|
|
194
|
+
# build a batch delete job for all the removed files
|
|
195
|
+
bulk = collection.get_bulk_plan(index_type=index_type)
|
|
196
|
+
for sha256 in cur_file_list:
|
|
197
|
+
bulk.add_delete_operation(sha256)
|
|
198
|
+
|
|
199
|
+
collection.bulk(bulk)
|
|
200
|
+
self.counter.increment(f'{collection.name}', increment_by=len(cur_file_list))
|
|
201
|
+
self.log.info(f"[{collection.name}] Deleted {len(cur_file_list)} items from the datastore...")
|
|
202
|
+
|
|
203
|
+
if not hot_file_list and not archive_file_list:
|
|
162
204
|
self.log.warning(f'[{collection.name}] Expiry unable to clean up any of the files in filestore.')
|
|
163
205
|
|
|
164
|
-
def _simple_delete(self, collection, delete_query, number_to_delete):
|
|
206
|
+
def _simple_delete(self, collection: ESCollection, delete_query, number_to_delete):
|
|
165
207
|
self.heartbeat()
|
|
166
|
-
collection.delete_by_query(delete_query)
|
|
208
|
+
collection.delete_by_query(delete_query, index_type=self.index_type)
|
|
167
209
|
self.counter.increment(f'{collection.name}', increment_by=number_to_delete)
|
|
168
210
|
self.log.info(f"[{collection.name}] Deleted {number_to_delete} items from the datastore...")
|
|
169
211
|
|
|
@@ -195,22 +237,45 @@ class ExpiryManager(ServerBase):
|
|
|
195
237
|
# check if we are dealing with an index that needs file cleanup
|
|
196
238
|
if self.config.core.expiry.delete_storage and collection.name in self.fs_hashmap:
|
|
197
239
|
# Delete associated files
|
|
198
|
-
delete_objects: list[str] = []
|
|
199
|
-
for item in collection.stream_search(
|
|
240
|
+
delete_objects: list[tuple[str, bool]] = []
|
|
241
|
+
for item in collection.stream_search(
|
|
242
|
+
delete_query, fl='id,from_archive', as_obj=False, index_type=self.index_type):
|
|
200
243
|
self.heartbeat()
|
|
201
|
-
delete_objects.append(item['id'])
|
|
244
|
+
delete_objects.append((item['id'], item.get('from_archive', False)))
|
|
202
245
|
|
|
203
246
|
# Filter archived documents if archive filestore is the same as the filestore
|
|
204
|
-
expire_only = []
|
|
205
|
-
if self.same_storage and self.
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
247
|
+
expire_only: list[tuple[str, bool]] = []
|
|
248
|
+
if self.same_storage and self.archive_access and collection.name == 'file':
|
|
249
|
+
# Separate hot and archive files
|
|
250
|
+
delete_from_archive = [i[0] for i in delete_objects if i[1]]
|
|
251
|
+
delete_from_hot = [i[0] for i in delete_objects if not i[1]]
|
|
252
|
+
|
|
253
|
+
# Check for overlap
|
|
254
|
+
overlap = set(delete_from_archive).intersection(set(delete_from_hot))
|
|
255
|
+
delete_from_archive = list(set(delete_from_archive)-overlap)
|
|
256
|
+
delete_from_hot = list(set(delete_from_hot)-overlap)
|
|
257
|
+
|
|
258
|
+
# Create the original delete_object form the overlap
|
|
259
|
+
delete_objects = [(k, False) for k in overlap]
|
|
260
|
+
delete_objects.extend([(k, True) for k in overlap])
|
|
261
|
+
|
|
262
|
+
if delete_from_hot:
|
|
263
|
+
# Check hot objects to delete if they are in archive
|
|
264
|
+
archived_files = self.datastore.file.multiexists(delete_from_hot, index_type=Index.ARCHIVE)
|
|
265
|
+
delete_objects.extend([(k, False) for k, v in archived_files.items() if not v])
|
|
266
|
+
expire_only.extend([(k, False) for k, v in archived_files.items() if v])
|
|
267
|
+
|
|
268
|
+
if delete_from_archive:
|
|
269
|
+
# Check hot objects to delete if they are in archive
|
|
270
|
+
hot_files = self.datastore.file.multiexists(delete_from_archive, index_type=Index.HOT)
|
|
271
|
+
delete_objects.extend([(k, True) for k, v in hot_files.items() if not v])
|
|
272
|
+
expire_only.extend([(k, True) for k, v in hot_files.items() if v])
|
|
209
273
|
|
|
210
274
|
delete_tasks = self.fs_hashmap[collection.name](delete_objects, final_date)
|
|
211
275
|
|
|
212
276
|
# Proceed with deletion, but only after all the scheduled deletes for this
|
|
213
|
-
self.log.info(f"[{collection.name}] Scheduled {len(delete_objects)}/{number_to_delete} files to be
|
|
277
|
+
self.log.info(f"[{collection.name}] Scheduled {len(delete_objects)}/{number_to_delete} files to be "
|
|
278
|
+
f"removed from the {'cachestore' if 'cache' in collection.name else 'filestore'}")
|
|
214
279
|
self._finish_delete(collection, delete_tasks, expire_only)
|
|
215
280
|
|
|
216
281
|
else:
|
|
@@ -264,7 +329,7 @@ class ExpiryManager(ServerBase):
|
|
|
264
329
|
will be affected in between start date and the date found"""
|
|
265
330
|
rows = collection.search(f"expiry_ts: {{{start} TO {final_date}]", rows=1,
|
|
266
331
|
offset=self.expiry_size - 1, sort='expiry_ts asc',
|
|
267
|
-
as_obj=False, fl='expiry_ts')
|
|
332
|
+
as_obj=False, fl='expiry_ts', index_type=self.index_type)
|
|
268
333
|
if rows['items']:
|
|
269
334
|
return rows['items'][0]['expiry_ts'], self.expiry_size
|
|
270
335
|
return final_date, rows['total']
|
|
@@ -284,8 +284,8 @@ def test_dispatch_extracted(clean_redis, clean_datastore):
|
|
|
284
284
|
@mock.patch('assemblyline_core.dispatching.dispatcher.MetricsFactory', mock.MagicMock())
|
|
285
285
|
@mock.patch('assemblyline_core.dispatching.dispatcher.Scheduler', DRPScheduler)
|
|
286
286
|
def test_dispatch_extracted_bypass_drp(clean_redis, clean_datastore):
|
|
287
|
-
# Dynamic Recursion Prevention is to prevent services belonging to the 'Dynamic Analysis'
|
|
288
|
-
# of files they've analyzed.
|
|
287
|
+
# Dynamic Recursion Prevention is to prevent services belonging to the 'Dynamic Analysis'
|
|
288
|
+
# from analyzing the children of files they've analyzed.
|
|
289
289
|
|
|
290
290
|
# The bypass should allow services to specify files to run through Dynamic Analysis regardless of the
|
|
291
291
|
# Dynamic Recursion Prevention parameter.
|
|
@@ -372,7 +372,7 @@ def test_dispatch_extracted_bypass_drp(clean_redis, clean_datastore):
|
|
|
372
372
|
disp.service_worker(disp.process_queue_index(sid))
|
|
373
373
|
|
|
374
374
|
# 'sandbox' should have a task for the extracted file
|
|
375
|
-
#disp.dispatch_file(disp.tasks.get(sid), second_file_hash)
|
|
375
|
+
# disp.dispatch_file(disp.tasks.get(sid), second_file_hash)
|
|
376
376
|
job = client.request_work('0', 'sandbox', '0')
|
|
377
377
|
assert job.fileinfo.sha256 == second_file_hash
|
|
378
378
|
assert job.filename == 'second-*'
|
|
@@ -4,6 +4,7 @@ import random
|
|
|
4
4
|
import concurrent.futures
|
|
5
5
|
|
|
6
6
|
from assemblyline.common.isotime import now_as_iso
|
|
7
|
+
from assemblyline.datastore.helper import AssemblylineDatastore
|
|
7
8
|
from assemblyline.odm.randomizer import random_model_obj
|
|
8
9
|
|
|
9
10
|
from assemblyline_core.expiry.run_expiry import ExpiryManager
|
|
@@ -14,34 +15,31 @@ expiry_collections_len = {}
|
|
|
14
15
|
archive_collections_len = {}
|
|
15
16
|
|
|
16
17
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
return archive_connection
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def purge_data(datastore):
|
|
23
|
-
for name, definition in datastore.ds.get_models().items():
|
|
18
|
+
def purge_data(datastore_connection: AssemblylineDatastore):
|
|
19
|
+
for name, definition in datastore_connection.ds.get_models().items():
|
|
24
20
|
if hasattr(definition, 'expiry_ts'):
|
|
25
|
-
getattr(
|
|
21
|
+
getattr(datastore_connection, name).wipe()
|
|
26
22
|
|
|
27
23
|
|
|
28
24
|
@pytest.fixture(scope="function")
|
|
29
|
-
def ds_expiry(request,
|
|
30
|
-
for name, definition in
|
|
25
|
+
def ds_expiry(request, datastore_connection):
|
|
26
|
+
for name, definition in datastore_connection.ds.get_models().items():
|
|
31
27
|
if hasattr(definition, 'expiry_ts'):
|
|
32
|
-
collection = getattr(
|
|
28
|
+
collection = getattr(datastore_connection, name)
|
|
33
29
|
collection.wipe()
|
|
34
30
|
expiry_len = random.randint(MIN_OBJECTS, MAX_OBJECTS)
|
|
35
31
|
for x in range(expiry_len):
|
|
36
32
|
obj = random_model_obj(collection.model_class)
|
|
33
|
+
if hasattr(definition, 'from_archive'):
|
|
34
|
+
obj.from_archive = False
|
|
37
35
|
obj.expiry_ts = now_as_iso(-10000)
|
|
38
36
|
collection.save('longer_name'+str(x), obj)
|
|
39
37
|
|
|
40
38
|
expiry_collections_len[name] = expiry_len
|
|
41
39
|
collection.commit()
|
|
42
40
|
|
|
43
|
-
request.addfinalizer(lambda: purge_data(
|
|
44
|
-
return
|
|
41
|
+
request.addfinalizer(lambda: purge_data(datastore_connection))
|
|
42
|
+
return datastore_connection
|
|
45
43
|
|
|
46
44
|
|
|
47
45
|
class FakeCounter(object):
|
|
@@ -58,8 +56,8 @@ class FakeCounter(object):
|
|
|
58
56
|
return self.counts.get(name, 0)
|
|
59
57
|
|
|
60
58
|
|
|
61
|
-
def test_expire_all(ds_expiry):
|
|
62
|
-
expiry = ExpiryManager()
|
|
59
|
+
def test_expire_all(config, ds_expiry, filestore):
|
|
60
|
+
expiry = ExpiryManager(config=config, datastore=ds_expiry, filestore=filestore)
|
|
63
61
|
expiry.running = True
|
|
64
62
|
expiry.counter = FakeCounter()
|
|
65
63
|
with concurrent.futures.ThreadPoolExecutor(5) as pool:
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
4.5.1.dev145
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/replay/client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/replay/replay.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/server_base.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/assemblyline_core/vacuum/worker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/test/test_badlist_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/test/test_safelist_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/test/test_signature_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/test/test_worker_ingest.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.1.dev145 → assemblyline-core-4.5.1.dev151}/test/test_worker_submit.py
RENAMED
|
File without changes
|