assemblyline-core 4.5.0.22__tar.gz → 4.5.0.24__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of assemblyline-core might be problematic. Click here for more details.
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/PKG-INFO +1 -1
- assemblyline-core-4.5.0.24/assemblyline_core/VERSION +1 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/expiry/run_expiry.py +141 -118
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core.egg-info/PKG-INFO +1 -1
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/test/test_expiry.py +2 -1
- assemblyline-core-4.5.0.22/assemblyline_core/VERSION +0 -1
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/LICENCE.md +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/README.md +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/__init__.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/alerter/__init__.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/alerter/processing.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/alerter/run_alerter.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/archiver/__init__.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/archiver/run_archiver.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/badlist_client.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/dispatching/__init__.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/dispatching/__main__.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/dispatching/client.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/dispatching/dispatcher.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/dispatching/schedules.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/dispatching/timeout.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/expiry/__init__.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/ingester/__init__.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/ingester/__main__.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/ingester/constants.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/ingester/ingester.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/metrics/__init__.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/metrics/es_metrics.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/metrics/heartbeat_formatter.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/metrics/helper.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/metrics/metrics_server.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/metrics/run_heartbeat_manager.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/metrics/run_metrics_aggregator.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/metrics/run_statistics_aggregator.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/plumber/__init__.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/plumber/run_plumber.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/replay/__init__.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/replay/client.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/replay/creator/__init__.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/replay/creator/run.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/replay/creator/run_worker.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/replay/loader/__init__.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/replay/loader/run.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/replay/loader/run_worker.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/replay/replay.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/safelist_client.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/scaler/__init__.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/scaler/collection.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/scaler/controllers/__init__.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/scaler/controllers/docker_ctl.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/scaler/controllers/interface.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/scaler/controllers/kubernetes_ctl.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/scaler/run_scaler.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/scaler/scaler_server.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/server_base.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/signature_client.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/submission_client.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/tasking_client.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/updater/__init__.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/updater/helper.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/updater/run_updater.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/vacuum/__init__.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/vacuum/crawler.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/vacuum/department_map.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/vacuum/safelist.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/vacuum/stream_map.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/vacuum/worker.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/workflow/__init__.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/workflow/run_workflow.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core.egg-info/SOURCES.txt +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core.egg-info/dependency_links.txt +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core.egg-info/requires.txt +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core.egg-info/top_level.txt +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/setup.cfg +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/setup.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/test/test_alerter.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/test/test_badlist_client.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/test/test_dispatcher.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/test/test_plumber.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/test/test_replay.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/test/test_safelist_client.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/test/test_scaler.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/test/test_scheduler.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/test/test_signature_client.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/test/test_simulation.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/test/test_vacuum.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/test/test_worker_ingest.py +0 -0
- {assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/test/test_worker_submit.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
4.5.0.24
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/expiry/run_expiry.py
RENAMED
|
@@ -1,16 +1,19 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
2
|
from __future__ import annotations
|
|
3
|
+
|
|
3
4
|
import concurrent.futures
|
|
4
|
-
|
|
5
|
-
from concurrent.futures.process import BrokenProcessPool
|
|
5
|
+
import threading
|
|
6
6
|
import functools
|
|
7
|
-
from typing import Callable, Optional, Union, TYPE_CHECKING
|
|
8
7
|
import elasticapm
|
|
9
8
|
import time
|
|
10
9
|
|
|
10
|
+
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, Future, as_completed
|
|
11
|
+
from concurrent.futures.process import BrokenProcessPool
|
|
11
12
|
from datemath import dm
|
|
13
|
+
from typing import Callable, Optional, TYPE_CHECKING
|
|
12
14
|
|
|
13
|
-
from assemblyline.common.isotime import epoch_to_iso, now_as_iso
|
|
15
|
+
from assemblyline.common.isotime import epoch_to_iso, now_as_iso
|
|
16
|
+
from assemblyline.datastore.collection import Index
|
|
14
17
|
from assemblyline_core.server_base import ServerBase
|
|
15
18
|
from assemblyline_core.dispatching.dispatcher import BAD_SID_HASH
|
|
16
19
|
from assemblyline.common import forge
|
|
@@ -150,19 +153,19 @@ class ExpiryManager(ServerBase):
|
|
|
150
153
|
bulk.add_delete_operation(sha256)
|
|
151
154
|
|
|
152
155
|
if len(file_list) > 0:
|
|
153
|
-
self.log.info(f'
|
|
156
|
+
self.log.info(f'[{collection.name}] Deleted associated files from the '
|
|
154
157
|
f'{"cachestore" if "cache" in collection.name else "filestore"}...')
|
|
155
158
|
collection.bulk(bulk)
|
|
156
159
|
self.counter.increment(f'{collection.name}', increment_by=len(file_list))
|
|
157
|
-
self.log.info(f"
|
|
160
|
+
self.log.info(f"[{collection.name}] Deleted {len(file_list)} items from the datastore...")
|
|
158
161
|
else:
|
|
159
|
-
self.log.warning('
|
|
162
|
+
self.log.warning(f'[{collection.name}] Expiry unable to clean up any of the files in filestore.')
|
|
160
163
|
|
|
161
164
|
def _simple_delete(self, collection, delete_query, number_to_delete):
|
|
162
165
|
self.heartbeat()
|
|
163
166
|
collection.delete_by_query(delete_query)
|
|
164
167
|
self.counter.increment(f'{collection.name}', increment_by=number_to_delete)
|
|
165
|
-
self.log.info(f"
|
|
168
|
+
self.log.info(f"[{collection.name}] Deleted {number_to_delete} items from the datastore...")
|
|
166
169
|
|
|
167
170
|
def _cleanup_canceled_submission(self, sid):
|
|
168
171
|
# Allowing us at minimum 5 minutes to cleanup the submission
|
|
@@ -171,7 +174,7 @@ class ExpiryManager(ServerBase):
|
|
|
171
174
|
self.apm_client.begin_transaction("Delete canceled submissions")
|
|
172
175
|
|
|
173
176
|
# Cleaning up the submission
|
|
174
|
-
self.log.info(f"Deleting incomplete submission {sid}...")
|
|
177
|
+
self.log.info(f"[submission] Deleting incomplete submission {sid}...")
|
|
175
178
|
self.datastore.delete_submission_tree_bulk(sid, self.classification, transport=self.filestore)
|
|
176
179
|
self.redis_bad_sids.remove(sid)
|
|
177
180
|
|
|
@@ -181,131 +184,151 @@ class ExpiryManager(ServerBase):
|
|
|
181
184
|
if self.apm_client:
|
|
182
185
|
self.apm_client.end_transaction("canceled_submissions", 'deleted')
|
|
183
186
|
|
|
184
|
-
def
|
|
185
|
-
|
|
186
|
-
|
|
187
|
+
def _process_chunk(self, collection: ESCollection, start, end, final_date, number_to_delete):
|
|
188
|
+
# We assume that no records are ever inserted such that their expiry_ts is in the past.
|
|
189
|
+
# We also assume that the `end` dates are also in the past.
|
|
190
|
+
# As long as these two things are true, the set returned by this query should be consistent.
|
|
191
|
+
# The one race condition is that a record might be refreshed while the file
|
|
192
|
+
# blob would be deleted anyway, leaving a file record with no filestore object
|
|
193
|
+
delete_query = f"expiry_ts:{{{start} TO {end}]"
|
|
194
|
+
|
|
195
|
+
# check if we are dealing with an index that needs file cleanup
|
|
196
|
+
if self.config.core.expiry.delete_storage and collection.name in self.fs_hashmap:
|
|
197
|
+
# Delete associated files
|
|
198
|
+
delete_objects: list[str] = []
|
|
199
|
+
for item in collection.stream_search(delete_query, fl='id', as_obj=False):
|
|
200
|
+
self.heartbeat()
|
|
201
|
+
delete_objects.append(item['id'])
|
|
187
202
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
pool.submit(self.log_errors(self._cleanup_canceled_submission), submission.sid)
|
|
203
|
+
# Filter archived documents if archive filestore is the same as the filestore
|
|
204
|
+
expire_only = []
|
|
205
|
+
if self.same_storage and self.config.datastore.archive.enabled and collection.name == 'file':
|
|
206
|
+
archived_files = self.datastore.file.multiexists(delete_objects, index_type=Index.ARCHIVE)
|
|
207
|
+
delete_objects = [k for k, v in archived_files.items() if not v]
|
|
208
|
+
expire_only = [k for k, v in archived_files.items() if v]
|
|
195
209
|
|
|
196
|
-
|
|
197
|
-
for collection in self.expirable_collections:
|
|
198
|
-
self.heartbeat()
|
|
210
|
+
delete_tasks = self.fs_hashmap[collection.name](delete_objects, final_date)
|
|
199
211
|
|
|
200
|
-
#
|
|
201
|
-
|
|
202
|
-
|
|
212
|
+
# Proceed with deletion, but only after all the scheduled deletes for this
|
|
213
|
+
self.log.info(f"[{collection.name}] Scheduled {len(delete_objects)}/{number_to_delete} files to be removed")
|
|
214
|
+
self._finish_delete(collection, delete_tasks, expire_only)
|
|
203
215
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
final_date = dm(f"{now}||-{self.config.core.expiry.delay}h").float_timestamp
|
|
208
|
-
final_date_string = epoch_to_iso(final_date)
|
|
216
|
+
else:
|
|
217
|
+
# Proceed with deletion
|
|
218
|
+
self._simple_delete(collection, delete_query, number_to_delete)
|
|
209
219
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
start, end = unchecked_chunks.pop()
|
|
216
|
-
chunk_size = self._count_expired(collection, start, end)
|
|
220
|
+
def feed_expiry_jobs(self, collection, start, jobs: list[concurrent.futures.Future],
|
|
221
|
+
pool: ThreadPoolExecutor) -> tuple[str, bool]:
|
|
222
|
+
_process_chunk = self.log_errors(self._process_chunk)
|
|
223
|
+
number_to_delete = 0
|
|
224
|
+
self.heartbeat()
|
|
217
225
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
226
|
+
# Start of expiry transaction
|
|
227
|
+
if self.apm_client:
|
|
228
|
+
self.apm_client.begin_transaction("Delete expired documents")
|
|
221
229
|
|
|
222
|
-
|
|
223
|
-
# run on
|
|
224
|
-
if chunk_size < self.expiry_size:
|
|
225
|
-
ready_chunks[(start, end)] = chunk_size
|
|
226
|
-
continue
|
|
230
|
+
final_date = self._get_final_date()
|
|
227
231
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
unchecked_chunks.append((middle, end))
|
|
231
|
-
unchecked_chunks.append((start, middle))
|
|
232
|
+
# Break down the expiry window into smaller chunks of data
|
|
233
|
+
while len(jobs) < self.config.core.expiry.iteration_max_tasks:
|
|
232
234
|
|
|
233
|
-
#
|
|
234
|
-
|
|
235
|
-
reached_max = True
|
|
235
|
+
# Get the next chunk
|
|
236
|
+
end, number_to_delete = self._get_next_chunk(collection, start, final_date)
|
|
236
237
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
pool.submit(self.log_errors(self._finish_delete), collection, delete_tasks, expire_only)
|
|
268
|
-
|
|
269
|
-
else:
|
|
270
|
-
# Proceed with deletion
|
|
271
|
-
pool.submit(self.log_errors(self._simple_delete),
|
|
272
|
-
collection, delete_query, number_to_delete)
|
|
273
|
-
|
|
274
|
-
# End of expiry transaction
|
|
275
|
-
if self.apm_client:
|
|
276
|
-
self.apm_client.end_transaction(collection.name, 'deleted')
|
|
277
|
-
|
|
278
|
-
return reached_max
|
|
279
|
-
|
|
280
|
-
def _find_expiry_start(self, container: ESCollection):
|
|
281
|
-
"""Find earliest expiring item in this container."""
|
|
282
|
-
rows = container.search(f"expiry_ts: [* TO {epoch_to_iso(time.time())}]",
|
|
283
|
-
rows=1, sort='expiry_ts asc', as_obj=False, fl='expiry_ts')
|
|
238
|
+
# Check if we got anything
|
|
239
|
+
if number_to_delete == 0:
|
|
240
|
+
break
|
|
241
|
+
|
|
242
|
+
# Process the chunk in the threadpool
|
|
243
|
+
jobs.append(pool.submit(_process_chunk, collection, start, end, final_date, number_to_delete))
|
|
244
|
+
|
|
245
|
+
# Prepare for next chunk
|
|
246
|
+
start = end
|
|
247
|
+
|
|
248
|
+
# End of expiry transaction
|
|
249
|
+
if self.apm_client:
|
|
250
|
+
self.apm_client.end_transaction(collection.name, 'deleted')
|
|
251
|
+
|
|
252
|
+
return start, number_to_delete < self.expiry_size
|
|
253
|
+
|
|
254
|
+
def _get_final_date(self):
|
|
255
|
+
now = now_as_iso()
|
|
256
|
+
if self.config.core.expiry.batch_delete:
|
|
257
|
+
final_date = dm(f"{now}||-{self.config.core.expiry.delay}h/d").float_timestamp
|
|
258
|
+
else:
|
|
259
|
+
final_date = dm(f"{now}||-{self.config.core.expiry.delay}h").float_timestamp
|
|
260
|
+
return epoch_to_iso(final_date)
|
|
261
|
+
|
|
262
|
+
def _get_next_chunk(self, collection: ESCollection, start, final_date):
|
|
263
|
+
"""Find date of item at chunk size and the number of items that
|
|
264
|
+
will be affected in between start date and the date found"""
|
|
265
|
+
rows = collection.search(f"expiry_ts: {{{start} TO {final_date}]", rows=1,
|
|
266
|
+
offset=self.expiry_size - 1, sort='expiry_ts asc',
|
|
267
|
+
as_obj=False, fl='expiry_ts')
|
|
284
268
|
if rows['items']:
|
|
285
|
-
return
|
|
286
|
-
return
|
|
287
|
-
|
|
288
|
-
def _count_expired(self, container: ESCollection, start: Union[float, str], end: float) -> int:
|
|
289
|
-
"""Count how many items need to be erased in the given window."""
|
|
290
|
-
if start == 0:
|
|
291
|
-
start = '*'
|
|
292
|
-
if isinstance(start, (float, int)):
|
|
293
|
-
start = epoch_to_iso(start)
|
|
294
|
-
query = f'expiry_ts:[{start} TO {epoch_to_iso(end)}}}'
|
|
295
|
-
return container.search(query, rows=0, as_obj=False, track_total_hits=self.expiry_size)['total']
|
|
269
|
+
return rows['items'][0]['expiry_ts'], self.expiry_size
|
|
270
|
+
return final_date, rows['total']
|
|
296
271
|
|
|
297
272
|
def try_run(self):
|
|
273
|
+
pool = ThreadPoolExecutor(self.config.core.expiry.workers)
|
|
274
|
+
main_threads = []
|
|
275
|
+
|
|
276
|
+
# Launch a thread that will expire submissions that have been deleted
|
|
277
|
+
thread = threading.Thread(target=self.clean_deleted_submissions, args=[pool])
|
|
278
|
+
thread.start()
|
|
279
|
+
main_threads.append(thread)
|
|
280
|
+
|
|
281
|
+
# Launch threads that expire data from each collection of data
|
|
282
|
+
for collection in self.expirable_collections:
|
|
283
|
+
thread = threading.Thread(target=self.run_collection, args=[pool, collection])
|
|
284
|
+
thread.start()
|
|
285
|
+
main_threads.append(thread)
|
|
286
|
+
|
|
287
|
+
# Wait for all the threads to exit
|
|
288
|
+
for thread in main_threads:
|
|
289
|
+
thread.join()
|
|
290
|
+
|
|
291
|
+
def clean_deleted_submissions(self, pool):
|
|
292
|
+
"""Delete canceled submissions"""
|
|
298
293
|
while self.running:
|
|
299
|
-
|
|
300
|
-
|
|
294
|
+
# Make sure we're not dedicating more then a quarter of the pool to this operation because it is costly
|
|
295
|
+
for submission in self.datastore.submission.search(
|
|
296
|
+
"to_be_deleted:true", fl="sid", rows=max(1, int(self.config.core.expiry.workers / 4)))['items']:
|
|
297
|
+
if submission.sid not in self.current_submission_cleanup:
|
|
298
|
+
self.current_submission_cleanup.add(submission.sid)
|
|
299
|
+
pool.submit(self.log_errors(self._cleanup_canceled_submission), submission.sid)
|
|
300
|
+
self.sleep_with_heartbeat(self.config.core.expiry.sleep_time)
|
|
301
|
+
|
|
302
|
+
def run_collection(self, pool: concurrent.futures.ThreadPoolExecutor, collection):
|
|
303
|
+
"""Feed batches of jobs to delete to the thread pool for the given collection."""
|
|
304
|
+
start = "*"
|
|
305
|
+
jobs: list[concurrent.futures.Future] = []
|
|
301
306
|
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
+
while self.running:
|
|
308
|
+
try:
|
|
309
|
+
try:
|
|
310
|
+
# Fill up 'jobs' with tasks that have been sent to the thread pool
|
|
311
|
+
# 'jobs' may already have items in it, but 'start' makes sure the new
|
|
312
|
+
# task added starts where the last finshed
|
|
313
|
+
start, final_job_small = self.feed_expiry_jobs(collection, start, jobs, pool)
|
|
314
|
+
|
|
315
|
+
# Wait until some of our work finishes and there is room in the queue for more work
|
|
316
|
+
finished, _jobs = concurrent.futures.wait(jobs, return_when=concurrent.futures.FIRST_COMPLETED)
|
|
317
|
+
jobs = list(_jobs)
|
|
318
|
+
for job in finished:
|
|
319
|
+
job.result()
|
|
320
|
+
|
|
321
|
+
# If we have expired all the data reset the start pointer
|
|
322
|
+
if len(jobs) == 0:
|
|
323
|
+
start = '*'
|
|
324
|
+
|
|
325
|
+
except Exception as e:
|
|
326
|
+
self.log.exception(str(e))
|
|
327
|
+
continue
|
|
307
328
|
|
|
308
|
-
|
|
329
|
+
# IF the most recent job added to the jobs list is short then
|
|
330
|
+
# all the data is currently queued up to delete and we can sleep
|
|
331
|
+
if final_job_small:
|
|
309
332
|
self.sleep_with_heartbeat(self.config.core.expiry.sleep_time)
|
|
310
333
|
|
|
311
334
|
except BrokenProcessPool:
|
|
@@ -63,7 +63,8 @@ def test_expire_all(ds_expiry):
|
|
|
63
63
|
expiry.running = True
|
|
64
64
|
expiry.counter = FakeCounter()
|
|
65
65
|
with concurrent.futures.ThreadPoolExecutor(5) as pool:
|
|
66
|
-
expiry.
|
|
66
|
+
for collection in expiry.expirable_collections:
|
|
67
|
+
expiry.feed_expiry_jobs(collection=collection, pool=pool, start='*', jobs=[])
|
|
67
68
|
|
|
68
69
|
for k, v in expiry_collections_len.items():
|
|
69
70
|
assert v == expiry.counter.get(k)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
4.5.0.22
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/alerter/__init__.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/alerter/processing.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/alerter/run_alerter.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/archiver/__init__.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/archiver/run_archiver.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/badlist_client.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/dispatching/__init__.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/dispatching/__main__.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/dispatching/client.py
RENAMED
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/dispatching/schedules.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/dispatching/timeout.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/expiry/__init__.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/ingester/__init__.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/ingester/__main__.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/ingester/constants.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/ingester/ingester.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/metrics/__init__.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/metrics/es_metrics.py
RENAMED
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/metrics/helper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/plumber/__init__.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/plumber/run_plumber.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/replay/__init__.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/replay/client.py
RENAMED
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/replay/creator/run.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/replay/loader/run.py
RENAMED
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/replay/replay.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/safelist_client.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/scaler/__init__.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/scaler/collection.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/scaler/run_scaler.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/scaler/scaler_server.py
RENAMED
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/signature_client.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/submission_client.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/tasking_client.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/updater/__init__.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/updater/helper.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/updater/run_updater.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/vacuum/__init__.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/vacuum/crawler.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/vacuum/department_map.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/vacuum/safelist.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/vacuum/stream_map.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/vacuum/worker.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/workflow/__init__.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core/workflow/run_workflow.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core.egg-info/requires.txt
RENAMED
|
File without changes
|
{assemblyline-core-4.5.0.22 → assemblyline-core-4.5.0.24}/assemblyline_core.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|