assemblyline-core 4.4.1.dev223__tar.gz → 4.4.1.dev224__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of assemblyline-core might be problematic. Click here for more details.
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/PKG-INFO +1 -1
- assemblyline-core-4.4.1.dev224/assemblyline_core/VERSION +1 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/dispatching/client.py +3 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/dispatching/dispatcher.py +4 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/ingester/ingester.py +62 -2
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core.egg-info/PKG-INFO +1 -1
- assemblyline-core-4.4.1.dev223/assemblyline_core/VERSION +0 -1
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/LICENCE.md +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/README.md +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/__init__.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/alerter/__init__.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/alerter/processing.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/alerter/run_alerter.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/archiver/__init__.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/archiver/run_archiver.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/dispatching/__init__.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/dispatching/__main__.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/dispatching/schedules.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/dispatching/timeout.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/expiry/__init__.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/expiry/run_expiry.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/ingester/__init__.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/ingester/__main__.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/ingester/constants.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/metrics/__init__.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/metrics/es_metrics.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/metrics/heartbeat_formatter.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/metrics/helper.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/metrics/metrics_server.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/metrics/run_heartbeat_manager.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/metrics/run_metrics_aggregator.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/metrics/run_statistics_aggregator.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/plumber/__init__.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/plumber/run_plumber.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/replay/__init__.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/replay/client.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/replay/creator/__init__.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/replay/creator/run.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/replay/creator/run_worker.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/replay/loader/__init__.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/replay/loader/run.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/replay/loader/run_worker.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/replay/replay.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/safelist_client.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/scaler/__init__.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/scaler/collection.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/scaler/controllers/__init__.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/scaler/controllers/docker_ctl.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/scaler/controllers/interface.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/scaler/controllers/kubernetes_ctl.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/scaler/run_scaler.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/scaler/scaler_server.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/server_base.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/submission_client.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/tasking_client.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/updater/__init__.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/updater/helper.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/updater/run_updater.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/vacuum/__init__.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/vacuum/crawler.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/vacuum/department_map.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/vacuum/safelist.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/vacuum/stream_map.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/vacuum/worker.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/workflow/__init__.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/workflow/run_workflow.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core.egg-info/SOURCES.txt +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core.egg-info/dependency_links.txt +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core.egg-info/requires.txt +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core.egg-info/top_level.txt +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/setup.cfg +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/setup.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_alerter.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_dispatcher.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_expiry.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_plumber.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_replay.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_scaler.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_scheduler.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_simulation.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_vacuum.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_worker_ingest.py +0 -0
- {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_worker_submit.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
4.4.1.dev224
|
|
@@ -164,6 +164,9 @@ class DispatchClient:
|
|
|
164
164
|
dispatcher_id = queue.pop(timeout=5)
|
|
165
165
|
listed_dispatchers.discard(dispatcher_id)
|
|
166
166
|
|
|
167
|
+
def queued_submissions(self) -> list[dict]:
|
|
168
|
+
return self.submission_queue.content()
|
|
169
|
+
|
|
167
170
|
def outstanding_services(self, sid) -> Optional[dict[str, int]]:
|
|
168
171
|
"""
|
|
169
172
|
List outstanding services for a given submission and the number of file each
|
|
@@ -274,6 +274,10 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
274
274
|
def instance_assignment_size(persistent_redis, instance_id):
|
|
275
275
|
return Hash(DISPATCH_TASK_ASSIGNMENT + instance_id, host=persistent_redis).length()
|
|
276
276
|
|
|
277
|
+
@staticmethod
|
|
278
|
+
def instance_assignment(persistent_redis, instance_id) -> list[str]:
|
|
279
|
+
return Hash(DISPATCH_TASK_ASSIGNMENT + instance_id, host=persistent_redis).keys()
|
|
280
|
+
|
|
277
281
|
@staticmethod
|
|
278
282
|
def all_queue_lengths(redis, instance_id):
|
|
279
283
|
return {
|
|
@@ -17,8 +17,8 @@ from random import random
|
|
|
17
17
|
from typing import Any, Iterable, List, Optional, Tuple
|
|
18
18
|
|
|
19
19
|
import elasticapm
|
|
20
|
-
from assemblyline.common.postprocess import ActionWorker
|
|
21
20
|
|
|
21
|
+
from assemblyline.common.postprocess import ActionWorker
|
|
22
22
|
from assemblyline_core.server_base import ThreadedCoreBase
|
|
23
23
|
from assemblyline.common.metrics import MetricsFactory
|
|
24
24
|
from assemblyline.common.str_utils import dotdump, safe_str
|
|
@@ -41,6 +41,7 @@ from assemblyline.odm.models.submission import SubmissionParams, Submission as D
|
|
|
41
41
|
from assemblyline.odm.models.alert import EXTENDED_SCAN_VALUES
|
|
42
42
|
from assemblyline.odm.messages.submission import Submission as MessageSubmission, SubmissionMessage
|
|
43
43
|
|
|
44
|
+
from assemblyline_core.dispatching.dispatcher import Dispatcher
|
|
44
45
|
from assemblyline_core.submission_client import SubmissionClient
|
|
45
46
|
from .constants import INGEST_QUEUE_NAME, drop_chance, COMPLETE_QUEUE_NAME
|
|
46
47
|
|
|
@@ -189,7 +190,8 @@ class Ingester(ThreadedCoreBase):
|
|
|
189
190
|
def try_run(self):
|
|
190
191
|
threads_to_maintain = {
|
|
191
192
|
'Retries': self.handle_retries,
|
|
192
|
-
'Timeouts': self.handle_timeouts
|
|
193
|
+
'Timeouts': self.handle_timeouts,
|
|
194
|
+
'Missing': self.handle_missing,
|
|
193
195
|
}
|
|
194
196
|
threads_to_maintain.update({f'Complete_{n}': self.handle_complete for n in range(COMPLETE_THREADS)})
|
|
195
197
|
threads_to_maintain.update({f'Ingest_{n}': self.handle_ingest for n in range(INGEST_THREADS)})
|
|
@@ -483,6 +485,64 @@ class Ingester(ThreadedCoreBase):
|
|
|
483
485
|
self.counter.increment_execution_time('cpu_seconds', time.process_time() - cpu_mark)
|
|
484
486
|
self.counter.increment_execution_time('busy_seconds', time.time() - time_mark)
|
|
485
487
|
|
|
488
|
+
def handle_missing(self) -> None:
|
|
489
|
+
"""
|
|
490
|
+
Messages get dropped or only partially processed when ingester and dispatcher containers scale up and down.
|
|
491
|
+
|
|
492
|
+
This loop checks for submissions that are in two invalid states:
|
|
493
|
+
- finished but still listed as being scanned by ingester (message probably dropped by ingester)
|
|
494
|
+
- listed by ingester but unknown by dispatcher (message could have been dropped on either end)
|
|
495
|
+
|
|
496
|
+
Loading all the info needed to do these checks is a bit slow, but doing them every 5 or 15 minutes
|
|
497
|
+
per ingester shouldn't be noteworthy. While these missing messages are bound to happen from time to time
|
|
498
|
+
they should be rare. With that in mind, a warning is raised whenever this worker processes something
|
|
499
|
+
so that if a constant stream of items are falling through and getting processed here it might stand out.
|
|
500
|
+
"""
|
|
501
|
+
last_round: set[str] = set()
|
|
502
|
+
|
|
503
|
+
while self.sleep(300 if last_round else 900):
|
|
504
|
+
# Get the current set of outstanding tasks
|
|
505
|
+
outstanding: dict[str, dict] = self.scanning.items()
|
|
506
|
+
|
|
507
|
+
# Get jobs being processed by dispatcher or in dispatcher queue
|
|
508
|
+
assignment: dict[str, str] = {}
|
|
509
|
+
for data in self.submit_client.dispatcher.queued_submissions():
|
|
510
|
+
assignment[data['submission']['sid']] = ''
|
|
511
|
+
for dis in Dispatcher.all_instances(self.redis_persist):
|
|
512
|
+
for key in Dispatcher.instance_assignment(self.redis_persist, dis):
|
|
513
|
+
assignment[key] = dis
|
|
514
|
+
|
|
515
|
+
# Filter out outstanding tasks currently assigned or in queue
|
|
516
|
+
outstanding = {
|
|
517
|
+
key: doc
|
|
518
|
+
for key, doc in outstanding.items()
|
|
519
|
+
if doc["submission"]["sid"] not in assignment
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
unprocessed = []
|
|
523
|
+
for key, data in outstanding.items():
|
|
524
|
+
task = IngestTask(data)
|
|
525
|
+
sid = task.submission.sid
|
|
526
|
+
|
|
527
|
+
# Check if its already complete in the database
|
|
528
|
+
from_db = self.datastore.submission.get_if_exists(sid)
|
|
529
|
+
if from_db and from_db.state == "completed":
|
|
530
|
+
self.log.warning("Completing a hanging finished submission [{}]", sid)
|
|
531
|
+
self.completed(from_db)
|
|
532
|
+
|
|
533
|
+
# Check for items that have been in an unknown state since the last round
|
|
534
|
+
# and put it back in processing
|
|
535
|
+
elif sid in last_round:
|
|
536
|
+
self.log.warning("Recovering a submission dispatcher hasn't processed [{}]", sid)
|
|
537
|
+
self.submit(task)
|
|
538
|
+
|
|
539
|
+
# Otherwise defer looking at this until next iteration
|
|
540
|
+
else:
|
|
541
|
+
unprocessed.append(sid)
|
|
542
|
+
|
|
543
|
+
# store items for next round
|
|
544
|
+
last_round = set(unprocessed)
|
|
545
|
+
|
|
486
546
|
def get_groups_from_user(self, username: str) -> List[str]:
|
|
487
547
|
# Reset the group cache at the top of each hour
|
|
488
548
|
if time.time()//HOUR_IN_SECONDS > self._user_groups_reset:
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
4.4.1.dev223
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/replay/client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/replay/replay.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/server_base.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/vacuum/worker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_worker_ingest.py
RENAMED
|
File without changes
|
{assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_worker_submit.py
RENAMED
|
File without changes
|