assemblyline-core 4.6.1.dev163__tar.gz → 4.7.0.dev45__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/PKG-INFO +1 -1
- assemblyline_core-4.7.0.dev45/assemblyline_core/VERSION +1 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/dispatching/client.py +2 -1
- assemblyline_core-4.7.0.dev45/assemblyline_core/dispatching/dispatcher.py +327 -0
- assemblyline_core-4.7.0.dev45/assemblyline_core/ingester/ingester.py +116 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/metrics/heartbeat_formatter.py +1 -1
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/replay/client.py +16 -11
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/scaler/controllers/kubernetes_ctl.py +24 -7
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/scaler/scaler_server.py +17 -7
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/submission_client.py +55 -4
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/tasking_client.py +24 -2
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/updater/helper.py +7 -9
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/updater/run_updater.py +15 -14
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core.egg-info/PKG-INFO +1 -1
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core.egg-info/SOURCES.txt +0 -10
- assemblyline_core-4.7.0.dev45/test/test_tasking_client.py +77 -0
- assemblyline_core-4.6.1.dev163/assemblyline_core/VERSION +0 -1
- assemblyline_core-4.6.1.dev163/assemblyline_core/dispatching/__main__.py +0 -5
- assemblyline_core-4.6.1.dev163/assemblyline_core/dispatching/dispatcher.py +0 -2032
- assemblyline_core-4.6.1.dev163/assemblyline_core/dispatching/timeout.py +0 -59
- assemblyline_core-4.6.1.dev163/assemblyline_core/ingester/__main__.py +0 -5
- assemblyline_core-4.6.1.dev163/assemblyline_core/ingester/ingester.py +0 -967
- assemblyline_core-4.6.1.dev163/assemblyline_core/plumber/run_plumber.py +0 -332
- assemblyline_core-4.6.1.dev163/assemblyline_core/workflow/__init__.py +0 -0
- assemblyline_core-4.6.1.dev163/test/test_dispatcher.py +0 -456
- assemblyline_core-4.6.1.dev163/test/test_plumber.py +0 -162
- assemblyline_core-4.6.1.dev163/test/test_simulation.py +0 -1455
- assemblyline_core-4.6.1.dev163/test/test_tasking_client.py +0 -37
- assemblyline_core-4.6.1.dev163/test/test_worker_ingest.py +0 -248
- assemblyline_core-4.6.1.dev163/test/test_worker_submit.py +0 -137
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/LICENCE.md +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/README.md +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/__init__.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/alerter/__init__.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/alerter/processing.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/alerter/run_alerter.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/archiver/__init__.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/archiver/run_archiver.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/badlist_client.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/dispatching/__init__.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/dispatching/schedules.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/expiry/__init__.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/expiry/run_expiry.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/ingester/__init__.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/ingester/constants.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/metrics/__init__.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/metrics/es_metrics.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/metrics/helper.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/metrics/metrics_server.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/metrics/run_heartbeat_manager.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/metrics/run_metrics_aggregator.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/metrics/run_statistics_aggregator.py +0 -0
- {assemblyline_core-4.6.1.dev163/assemblyline_core/plumber → assemblyline_core-4.7.0.dev45/assemblyline_core/replay}/__init__.py +0 -0
- {assemblyline_core-4.6.1.dev163/assemblyline_core/replay → assemblyline_core-4.7.0.dev45/assemblyline_core/replay/creator}/__init__.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/replay/creator/run.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/replay/creator/run_worker.py +0 -0
- {assemblyline_core-4.6.1.dev163/assemblyline_core/replay/creator → assemblyline_core-4.7.0.dev45/assemblyline_core/replay/loader}/__init__.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/replay/loader/run.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/replay/loader/run_worker.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/replay/replay.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/safelist_client.py +0 -0
- {assemblyline_core-4.6.1.dev163/assemblyline_core/replay/loader → assemblyline_core-4.7.0.dev45/assemblyline_core/scaler}/__init__.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/scaler/collection.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/scaler/controllers/__init__.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/scaler/controllers/docker_ctl.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/scaler/controllers/interface.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/scaler/run_scaler.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/server_base.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/signature_client.py +0 -0
- {assemblyline_core-4.6.1.dev163/assemblyline_core/scaler → assemblyline_core-4.7.0.dev45/assemblyline_core/updater}/__init__.py +0 -0
- {assemblyline_core-4.6.1.dev163/assemblyline_core/updater → assemblyline_core-4.7.0.dev45/assemblyline_core/vacuum}/__init__.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/vacuum/crawler.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/vacuum/department_map.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/vacuum/safelist.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/vacuum/stream_map.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/vacuum/worker.py +0 -0
- {assemblyline_core-4.6.1.dev163/assemblyline_core/vacuum → assemblyline_core-4.7.0.dev45/assemblyline_core/workflow}/__init__.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/workflow/run_workflow.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core.egg-info/dependency_links.txt +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core.egg-info/requires.txt +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core.egg-info/top_level.txt +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/setup.cfg +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/setup.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/test/test_alerter.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/test/test_badlist_client.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/test/test_expiry.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/test/test_replay.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/test/test_safelist_client.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/test/test_scaler.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/test/test_scheduler.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/test/test_signature_client.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/test/test_vacuum.py +0 -0
- {assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/test/test_workflow.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
4.7.0.dev45
|
|
@@ -16,6 +16,7 @@ from assemblyline.common.constants import DISPATCH_RUNNING_TASK_HASH, SUBMISSION
|
|
|
16
16
|
make_watcher_list_name, DISPATCH_TASK_HASH
|
|
17
17
|
from assemblyline.common.forge import CachedObject, get_service_queue
|
|
18
18
|
from assemblyline.common.isotime import now_as_iso
|
|
19
|
+
from assemblyline.common.dispatcher import Dispatcher
|
|
19
20
|
from assemblyline.datastore.exceptions import VersionConflictException
|
|
20
21
|
from assemblyline.odm.base import DATEFORMAT
|
|
21
22
|
from assemblyline.odm.messages.dispatching import DispatcherCommandMessage, CREATE_WATCH, \
|
|
@@ -30,7 +31,7 @@ from assemblyline.remote.datatypes.hash import ExpiringHash, Hash
|
|
|
30
31
|
from assemblyline.remote.datatypes.queues.named import NamedQueue
|
|
31
32
|
from assemblyline.remote.datatypes.set import ExpiringSet, Set
|
|
32
33
|
from assemblyline_core.dispatching.dispatcher import DISPATCH_START_EVENTS, DISPATCH_RESULT_QUEUE, \
|
|
33
|
-
DISPATCH_COMMAND_QUEUE, QUEUE_EXPIRY, BAD_SID_HASH, ServiceTask
|
|
34
|
+
DISPATCH_COMMAND_QUEUE, QUEUE_EXPIRY, BAD_SID_HASH, ServiceTask
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
MAX_CANCEL_RESPONSE_WAIT = 10
|
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import dataclasses
|
|
4
|
+
import enum
|
|
5
|
+
import os
|
|
6
|
+
import threading
|
|
7
|
+
import time
|
|
8
|
+
import uuid
|
|
9
|
+
from contextlib import contextmanager
|
|
10
|
+
from copy import deepcopy
|
|
11
|
+
from queue import Empty, PriorityQueue, Queue
|
|
12
|
+
from typing import TYPE_CHECKING, Any, Iterable, Optional
|
|
13
|
+
|
|
14
|
+
import elasticapm
|
|
15
|
+
|
|
16
|
+
from assemblyline.common.constants import (
|
|
17
|
+
DISPATCH_RUNNING_TASK_HASH,
|
|
18
|
+
DISPATCH_TASK_HASH,
|
|
19
|
+
SCALER_TIMEOUT_QUEUE,
|
|
20
|
+
SUBMISSION_QUEUE,
|
|
21
|
+
make_watcher_list_name,
|
|
22
|
+
)
|
|
23
|
+
from assemblyline.common.forge import (
|
|
24
|
+
get_apm_client,
|
|
25
|
+
get_classification,
|
|
26
|
+
get_service_queue,
|
|
27
|
+
)
|
|
28
|
+
from assemblyline.common.isotime import now_as_iso
|
|
29
|
+
from assemblyline.common.metrics import MetricsFactory
|
|
30
|
+
from assemblyline.common.postprocess import ActionWorker
|
|
31
|
+
from assemblyline.datastore.helper import AssemblylineDatastore
|
|
32
|
+
from assemblyline.odm.messages.changes import Operation, ServiceChange
|
|
33
|
+
from assemblyline.odm.messages.dispatcher_heartbeat import Metrics
|
|
34
|
+
from assemblyline.odm.messages.dispatching import (
|
|
35
|
+
CREATE_WATCH,
|
|
36
|
+
LIST_OUTSTANDING,
|
|
37
|
+
UPDATE_BAD_SID,
|
|
38
|
+
CreateWatch,
|
|
39
|
+
DispatcherCommandMessage,
|
|
40
|
+
ListOutstanding,
|
|
41
|
+
WatchQueueMessage,
|
|
42
|
+
)
|
|
43
|
+
from assemblyline.odm.messages.service_heartbeat import Metrics as ServiceMetrics
|
|
44
|
+
from assemblyline.odm.messages.submission import (
|
|
45
|
+
SubmissionMessage,
|
|
46
|
+
from_datastore_submission,
|
|
47
|
+
)
|
|
48
|
+
from assemblyline.odm.messages.task import FileInfo
|
|
49
|
+
from assemblyline.odm.messages.task import Task as ServiceTask
|
|
50
|
+
from assemblyline.odm.models.error import Error
|
|
51
|
+
from assemblyline.odm.models.result import Result
|
|
52
|
+
from assemblyline.odm.models.service import Service
|
|
53
|
+
from assemblyline.odm.models.submission import Submission, TraceEvent
|
|
54
|
+
from assemblyline.odm.models.user import User
|
|
55
|
+
from assemblyline.remote.datatypes.events import EventWatcher
|
|
56
|
+
from assemblyline.remote.datatypes.exporting_counter import export_metrics_once
|
|
57
|
+
from assemblyline.remote.datatypes.hash import Hash
|
|
58
|
+
from assemblyline.remote.datatypes.queues.comms import CommsQueue
|
|
59
|
+
from assemblyline.remote.datatypes.queues.named import NamedQueue
|
|
60
|
+
from assemblyline.remote.datatypes.set import ExpiringSet, Set
|
|
61
|
+
from assemblyline.remote.datatypes.user_quota_tracker import UserQuotaTracker
|
|
62
|
+
from assemblyline_core.server_base import ThreadedCoreBase
|
|
63
|
+
|
|
64
|
+
from ..ingester.constants import COMPLETE_QUEUE_NAME
|
|
65
|
+
from .schedules import Scheduler
|
|
66
|
+
|
|
67
|
+
if TYPE_CHECKING:
|
|
68
|
+
from redis import Redis
|
|
69
|
+
|
|
70
|
+
from assemblyline.odm.models.file import File
|
|
71
|
+
from assemblyline.odm.models.config import Config
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
APM_SPAN_TYPE = 'handle_message'
|
|
75
|
+
|
|
76
|
+
AL_SHUTDOWN_GRACE = int(os.environ.get('AL_SHUTDOWN_GRACE', '60'))
|
|
77
|
+
AL_SHUTDOWN_QUIT = 60
|
|
78
|
+
FINALIZING_WINDOW = max(AL_SHUTDOWN_GRACE - AL_SHUTDOWN_QUIT, 0)
|
|
79
|
+
RESULT_BATCH_SIZE = int(os.environ.get('DISPATCHER_RESULT_BATCH_SIZE', '50'))
|
|
80
|
+
ERROR_BATCH_SIZE = int(os.environ.get('DISPATCHER_ERROR_BATCH_SIZE', '50'))
|
|
81
|
+
DAY_IN_SECONDS = 24 * 60 * 60
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class KeyType(enum.Enum):
|
|
85
|
+
OVERWRITE = 'overwrite'
|
|
86
|
+
UNION = 'union'
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class Action(enum.IntEnum):
|
|
90
|
+
start = 0
|
|
91
|
+
result = 1
|
|
92
|
+
dispatch_file = 2
|
|
93
|
+
service_timeout = 3
|
|
94
|
+
check_submission = 4
|
|
95
|
+
bad_sid = 5
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@dataclasses.dataclass(order=True)
|
|
99
|
+
class DispatchAction:
|
|
100
|
+
kind: Action
|
|
101
|
+
sid: str = dataclasses.field(compare=False)
|
|
102
|
+
sha: Optional[str] = dataclasses.field(compare=False, default=None)
|
|
103
|
+
service_name: Optional[str] = dataclasses.field(compare=False, default=None)
|
|
104
|
+
worker_id: Optional[str] = dataclasses.field(compare=False, default=None)
|
|
105
|
+
data: Any = dataclasses.field(compare=False, default=None)
|
|
106
|
+
event: Optional[threading.Event] = dataclasses.field(compare=False, default=None)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@contextmanager
|
|
111
|
+
def apm_span(client, span_name: str):
|
|
112
|
+
try:
|
|
113
|
+
if client:
|
|
114
|
+
client.begin_transaction(APM_SPAN_TYPE)
|
|
115
|
+
yield None
|
|
116
|
+
if client:
|
|
117
|
+
client.end_transaction(span_name, 'success')
|
|
118
|
+
except Exception:
|
|
119
|
+
if client:
|
|
120
|
+
client.end_transaction(span_name, 'exception')
|
|
121
|
+
raise
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
DISPATCH_TASK_ASSIGNMENT = 'dispatcher-tasks-assigned-to-'
|
|
125
|
+
TASK_ASSIGNMENT_PATTERN = DISPATCH_TASK_ASSIGNMENT + '*'
|
|
126
|
+
DISPATCH_START_EVENTS = 'dispatcher-start-events-'
|
|
127
|
+
DISPATCH_RESULT_QUEUE = 'dispatcher-results-'
|
|
128
|
+
DISPATCH_COMMAND_QUEUE = 'dispatcher-commands-'
|
|
129
|
+
DISPATCH_DIRECTORY = 'dispatchers-directory'
|
|
130
|
+
DISPATCH_DIRECTORY_FINALIZE = 'dispatchers-directory-finalizing'
|
|
131
|
+
BAD_SID_HASH = 'bad-sid-hash'
|
|
132
|
+
QUEUE_EXPIRY = 60*60
|
|
133
|
+
SERVICE_VERSION_EXPIRY_TIME = 30 * 60 # How old service version info can be before we ignore it
|
|
134
|
+
GUARD_TIMEOUT = 60*2
|
|
135
|
+
GLOBAL_TASK_CHECK_INTERVAL = 60*10
|
|
136
|
+
TIMEOUT_EXTRA_TIME = 5
|
|
137
|
+
TIMEOUT_TEST_INTERVAL = 5
|
|
138
|
+
MAX_RESULT_BUFFER = 64
|
|
139
|
+
RESULT_THREADS = max(1, int(os.getenv('DISPATCHER_RESULT_THREADS', '2')))
|
|
140
|
+
FINALIZE_THREADS = max(1, int(os.getenv('DISPATCHER_FINALIZE_THREADS', '2')))
|
|
141
|
+
|
|
142
|
+
# After 20 minutes, check if a submission is still making progress.
|
|
143
|
+
# In the case of a crash somewhere else in the system, we may not have
|
|
144
|
+
# gotten a message we are expecting. This should prompt a retry in most
|
|
145
|
+
# cases.
|
|
146
|
+
SUBMISSION_TOTAL_TIMEOUT = 60 * 20
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class Dispatcher(ThreadedCoreBase):
|
|
150
|
+
# @staticmethod
|
|
151
|
+
# def all_instances(persistent_redis: Redis):
|
|
152
|
+
# return Hash(DISPATCH_DIRECTORY, host=persistent_redis).keys()
|
|
153
|
+
|
|
154
|
+
# @staticmethod
|
|
155
|
+
# def instance_assignment_size(persistent_redis, instance_id):
|
|
156
|
+
# return Hash(DISPATCH_TASK_ASSIGNMENT + instance_id, host=persistent_redis).length()
|
|
157
|
+
|
|
158
|
+
# @staticmethod
|
|
159
|
+
# def instance_assignment(persistent_redis, instance_id) -> list[str]:
|
|
160
|
+
# return Hash(DISPATCH_TASK_ASSIGNMENT + instance_id, host=persistent_redis).keys()
|
|
161
|
+
|
|
162
|
+
# @staticmethod
|
|
163
|
+
# def all_queue_lengths(redis, instance_id):
|
|
164
|
+
# return {
|
|
165
|
+
# 'start': NamedQueue(DISPATCH_START_EVENTS + instance_id, host=redis).length(),
|
|
166
|
+
# 'result': NamedQueue(DISPATCH_RESULT_QUEUE + instance_id, host=redis).length(),
|
|
167
|
+
# 'command': NamedQueue(DISPATCH_COMMAND_QUEUE + instance_id, host=redis).length()
|
|
168
|
+
# }
|
|
169
|
+
|
|
170
|
+
def __init__(self, datastore=None, redis=None, redis_persist=None, logger=None,
|
|
171
|
+
config=None, counter_name: str = 'dispatcher'):
|
|
172
|
+
super().__init__('assemblyline.dispatcher', config=config, datastore=datastore,
|
|
173
|
+
redis=redis, redis_persist=redis_persist, logger=logger)
|
|
174
|
+
|
|
175
|
+
# Load the datastore collections that we are going to be using
|
|
176
|
+
self.instance_id = uuid.uuid4().hex
|
|
177
|
+
self.tasks: dict[str, SubmissionTask] = {}
|
|
178
|
+
self.finalizing = threading.Event()
|
|
179
|
+
self.finalizing_start = 0.0
|
|
180
|
+
|
|
181
|
+
# Build some utility classes
|
|
182
|
+
self.scheduler = Scheduler(self.datastore, self.config, self.redis)
|
|
183
|
+
self.running_tasks: Hash[dict] = Hash(DISPATCH_RUNNING_TASK_HASH, host=self.redis)
|
|
184
|
+
self.scaler_timeout_queue = NamedQueue(SCALER_TIMEOUT_QUEUE, host=self.redis_persist)
|
|
185
|
+
|
|
186
|
+
self.classification_engine = get_classification()
|
|
187
|
+
|
|
188
|
+
# Output. Duplicate our input traffic into this queue so it may be cloned by other systems
|
|
189
|
+
self.traffic_queue = CommsQueue('submissions', self.redis)
|
|
190
|
+
self.quota_tracker = UserQuotaTracker('submissions', timeout=60 * 60, host=self.redis_persist)
|
|
191
|
+
self.submission_queue = NamedQueue(SUBMISSION_QUEUE, self.redis)
|
|
192
|
+
|
|
193
|
+
# Table to track the running dispatchers
|
|
194
|
+
self.dispatchers_directory: Hash[int] = Hash(DISPATCH_DIRECTORY, host=self.redis_persist)
|
|
195
|
+
self.dispatchers_directory_finalize: Hash[int] = Hash(DISPATCH_DIRECTORY_FINALIZE, host=self.redis_persist)
|
|
196
|
+
self.running_dispatchers_estimate = 1
|
|
197
|
+
|
|
198
|
+
# Tables to track what submissions are running where
|
|
199
|
+
self.active_submissions = Hash(DISPATCH_TASK_ASSIGNMENT+self.instance_id, host=self.redis_persist)
|
|
200
|
+
self.submissions_assignments = Hash(DISPATCH_TASK_HASH, host=self.redis_persist)
|
|
201
|
+
self.ingester_scanning = Hash('m-scanning-table', self.redis_persist)
|
|
202
|
+
|
|
203
|
+
# Communications queues
|
|
204
|
+
self.start_queue: NamedQueue[tuple[str, str, str, str]] =\
|
|
205
|
+
NamedQueue(DISPATCH_START_EVENTS+self.instance_id, host=self.redis, ttl=QUEUE_EXPIRY)
|
|
206
|
+
self.result_queue: NamedQueue[dict] =\
|
|
207
|
+
NamedQueue(DISPATCH_RESULT_QUEUE+self.instance_id, host=self.redis, ttl=QUEUE_EXPIRY)
|
|
208
|
+
self.command_queue: NamedQueue[dict] =\
|
|
209
|
+
NamedQueue(DISPATCH_COMMAND_QUEUE+self.instance_id, host=self.redis, ttl=QUEUE_EXPIRY)
|
|
210
|
+
|
|
211
|
+
# Publish counters to the metrics sink.
|
|
212
|
+
self.counter = MetricsFactory(metrics_type='dispatcher', schema=Metrics, name=counter_name,
|
|
213
|
+
redis=self.redis, config=self.config)
|
|
214
|
+
|
|
215
|
+
self.apm_client = None
|
|
216
|
+
if self.config.core.metrics.apm_server.server_url:
|
|
217
|
+
elasticapm.instrument()
|
|
218
|
+
self.apm_client = get_apm_client("dispatcher")
|
|
219
|
+
|
|
220
|
+
self._service_timeouts: TimeoutTable[tuple[str, str, str], str] = TimeoutTable()
|
|
221
|
+
self._submission_timeouts: TimeoutTable[str, None] = TimeoutTable()
|
|
222
|
+
|
|
223
|
+
# Setup queues for work to be divided into
|
|
224
|
+
self.process_queues: list[PriorityQueue[DispatchAction]] = [PriorityQueue() for _ in range(RESULT_THREADS)]
|
|
225
|
+
self.queue_ready_signals: list[threading.Semaphore] = [threading.Semaphore(MAX_RESULT_BUFFER)
|
|
226
|
+
for _ in range(RESULT_THREADS)]
|
|
227
|
+
|
|
228
|
+
# Queue of finished submissions/errors waiting to be saved into elastic
|
|
229
|
+
self.finalize_queue = Queue()
|
|
230
|
+
self.error_queue: Queue[tuple[str, Error]] = Queue()
|
|
231
|
+
|
|
232
|
+
# Queue to hold of service timeouts that need to be processed
|
|
233
|
+
# They will be held in this queue until results in redis are
|
|
234
|
+
# already processed
|
|
235
|
+
self.timeout_queue: Queue[DispatchAction] = Queue()
|
|
236
|
+
|
|
237
|
+
# Utility object to handle post-processing actions
|
|
238
|
+
self.postprocess_worker = ActionWorker(cache=False, config=self.config, datastore=self.datastore,
|
|
239
|
+
redis_persist=self.redis_persist)
|
|
240
|
+
|
|
241
|
+
# Update bad sid list
|
|
242
|
+
self.redis_bad_sids = Set(BAD_SID_HASH, host=self.redis_persist)
|
|
243
|
+
self.bad_sids: set[str] = set(self.redis_bad_sids.members())
|
|
244
|
+
|
|
245
|
+
# Event Watchers
|
|
246
|
+
self.service_change_watcher = EventWatcher(self.redis, deserializer=ServiceChange.deserialize)
|
|
247
|
+
self.service_change_watcher.register('changes.services.*', self._handle_service_change_event)
|
|
248
|
+
|
|
249
|
+
def stop(self):
|
|
250
|
+
super().stop()
|
|
251
|
+
self.service_change_watcher.stop()
|
|
252
|
+
self.postprocess_worker.stop()
|
|
253
|
+
|
|
254
|
+
def try_run(self):
|
|
255
|
+
self.log.info(f'Using dispatcher id {self.instance_id}')
|
|
256
|
+
self.service_change_watcher.start()
|
|
257
|
+
threads = {
|
|
258
|
+
# Process to protect against old dead tasks timing out
|
|
259
|
+
'Global Timeout Backstop': self.timeout_backstop,
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
for ii in range(RESULT_THREADS):
|
|
263
|
+
# Process results
|
|
264
|
+
threads[f'Service Update Worker #{ii}'] = self.service_worker_factory(ii)
|
|
265
|
+
|
|
266
|
+
self.maintain_threads(threads)
|
|
267
|
+
|
|
268
|
+
# If the dispatcher is exiting cleanly remove as many tasks from the service queues as we can
|
|
269
|
+
service_queues = {}
|
|
270
|
+
for task in self.tasks.values():
|
|
271
|
+
for (_sha256, service_name), dispatch_key in task.queue_keys.items():
|
|
272
|
+
try:
|
|
273
|
+
s_queue = service_queues[service_name]
|
|
274
|
+
except KeyError:
|
|
275
|
+
s_queue = get_service_queue(service_name, self.redis)
|
|
276
|
+
service_queues[service_name] = s_queue
|
|
277
|
+
s_queue.remove(dispatch_key)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def timeout_backstop(self):
|
|
281
|
+
while self.running:
|
|
282
|
+
cpu_mark = time.process_time()
|
|
283
|
+
time_mark = time.time()
|
|
284
|
+
|
|
285
|
+
# Start of process dispatcher transaction
|
|
286
|
+
with apm_span(self.apm_client, 'timeout_backstop'):
|
|
287
|
+
dispatcher_instances = set(Dispatcher.all_instances(persistent_redis=self.redis_persist))
|
|
288
|
+
error_tasks = []
|
|
289
|
+
|
|
290
|
+
# iterate running tasks
|
|
291
|
+
for _task_key, task_body in self.running_tasks:
|
|
292
|
+
task = ServiceTask(task_body)
|
|
293
|
+
# Its a bad task if it's dispatcher isn't running
|
|
294
|
+
if task.metadata['dispatcher__'] not in dispatcher_instances:
|
|
295
|
+
error_tasks.append(task)
|
|
296
|
+
# Its a bad task if its OUR task, but we aren't tracking that submission anymore
|
|
297
|
+
if task.metadata['dispatcher__'] == self.instance_id and task.sid not in self.tasks:
|
|
298
|
+
error_tasks.append(task)
|
|
299
|
+
|
|
300
|
+
# Refresh our dispatcher list.
|
|
301
|
+
dispatcher_instances = set(Dispatcher.all_instances(persistent_redis=self.redis_persist))
|
|
302
|
+
other_dispatcher_instances = dispatcher_instances - {self.instance_id}
|
|
303
|
+
|
|
304
|
+
# The remaining running tasks (probably) belong to dead dispatchers and can be killed
|
|
305
|
+
for task in error_tasks:
|
|
306
|
+
# Check against our refreshed dispatcher list in case it changed during the previous scan
|
|
307
|
+
if task.metadata['dispatcher__'] in other_dispatcher_instances:
|
|
308
|
+
continue
|
|
309
|
+
|
|
310
|
+
# If its already been handled, we don't need to
|
|
311
|
+
if not self.running_tasks.pop(task.key()):
|
|
312
|
+
continue
|
|
313
|
+
|
|
314
|
+
# Kill the task that would report to a dead dispatcher
|
|
315
|
+
self.log.warning(f"[{task.sid}]Task killed by backstop {task.service_name} {task.fileinfo.sha256}")
|
|
316
|
+
self.scaler_timeout_queue.push({
|
|
317
|
+
'service': task.service_name,
|
|
318
|
+
'container': task.metadata['worker__']
|
|
319
|
+
})
|
|
320
|
+
|
|
321
|
+
# Report to the metrics system that a recoverable error has occurred for that service
|
|
322
|
+
export_metrics_once(task.service_name, ServiceMetrics, dict(fail_recoverable=1),
|
|
323
|
+
host=task.metadata['worker__'], counter_type='service', redis=self.redis)
|
|
324
|
+
|
|
325
|
+
self.counter.increment_execution_time('cpu_seconds', time.process_time() - cpu_mark)
|
|
326
|
+
self.counter.increment_execution_time('busy_seconds', time.time() - time_mark)
|
|
327
|
+
self.sleep(GLOBAL_TASK_CHECK_INTERVAL)
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
Ingester
|
|
4
|
+
|
|
5
|
+
Ingester is responsible for monitoring for incoming submission requests,
|
|
6
|
+
sending submissions, waiting for submissions to complete, sending a message
|
|
7
|
+
to a notification queue as specified by the submission and, based on the
|
|
8
|
+
score received, possibly sending a message to indicate that an alert should
|
|
9
|
+
be created.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
import threading
|
|
14
|
+
import time
|
|
15
|
+
from os import environ
|
|
16
|
+
from random import random
|
|
17
|
+
from typing import Any, Iterable, List, Optional, Tuple
|
|
18
|
+
|
|
19
|
+
import elasticapm
|
|
20
|
+
|
|
21
|
+
from assemblyline import odm
|
|
22
|
+
from assemblyline.common import exceptions, forge, isotime
|
|
23
|
+
from assemblyline.common.constants import DROP_PRIORITY
|
|
24
|
+
from assemblyline.common.exceptions import get_stacktrace_info
|
|
25
|
+
from assemblyline.common.importing import load_module_by_path
|
|
26
|
+
from assemblyline.common.isotime import now, now_as_iso
|
|
27
|
+
from assemblyline.common.metrics import MetricsFactory
|
|
28
|
+
from assemblyline.common.postprocess import ActionWorker
|
|
29
|
+
from assemblyline.common.str_utils import dotdump, safe_str
|
|
30
|
+
from assemblyline.datastore.exceptions import DataStoreException
|
|
31
|
+
from assemblyline.filestore import CorruptedFileStoreException, FileStoreException
|
|
32
|
+
from assemblyline.odm.messages.ingest_heartbeat import Metrics
|
|
33
|
+
from assemblyline.odm.messages.submission import Submission as MessageSubmission
|
|
34
|
+
from assemblyline.odm.messages.submission import SubmissionMessage
|
|
35
|
+
from assemblyline.odm.models.alert import EXTENDED_SCAN_VALUES
|
|
36
|
+
from assemblyline.odm.models.filescore import FileScore
|
|
37
|
+
from assemblyline.odm.models.submission import Submission as DatabaseSubmission
|
|
38
|
+
from assemblyline.odm.models.submission import SubmissionParams
|
|
39
|
+
from assemblyline.odm.models.user import User
|
|
40
|
+
from assemblyline.remote.datatypes.events import EventWatcher
|
|
41
|
+
from assemblyline.remote.datatypes.hash import Hash
|
|
42
|
+
from assemblyline.remote.datatypes.queues.comms import CommsQueue
|
|
43
|
+
from assemblyline.remote.datatypes.queues.multi import MultiQueue
|
|
44
|
+
from assemblyline.remote.datatypes.queues.named import NamedQueue
|
|
45
|
+
from assemblyline.remote.datatypes.queues.priority import PriorityQueue
|
|
46
|
+
from assemblyline.remote.datatypes.user_quota_tracker import UserQuotaTracker
|
|
47
|
+
from assemblyline_core.dispatching.dispatcher import Dispatcher
|
|
48
|
+
from assemblyline_core.server_base import ThreadedCoreBase
|
|
49
|
+
from assemblyline_core.submission_client import SubmissionClient
|
|
50
|
+
|
|
51
|
+
from .constants import COMPLETE_QUEUE_NAME, INGEST_QUEUE_NAME, drop_chance
|
|
52
|
+
|
|
53
|
+
_dup_prefix = 'w-m-'
|
|
54
|
+
_notification_queue_prefix = 'nq-'
|
|
55
|
+
_max_retries = 10
|
|
56
|
+
_retry_delay = 60 * 4 # Wait 4 minutes to retry
|
|
57
|
+
_max_time = 2 * 24 * 60 * 60 # Wait 2 days for responses.
|
|
58
|
+
HOUR_IN_SECONDS = 60 * 60
|
|
59
|
+
COMPLETE_THREADS = int(environ.get('INGESTER_COMPLETE_THREADS', 4))
|
|
60
|
+
INGEST_THREADS = int(environ.get('INGESTER_INGEST_THREADS', 1))
|
|
61
|
+
SUBMIT_THREADS = int(environ.get('INGESTER_SUBMIT_THREADS', 4))
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def must_drop(length: int, maximum: int) -> bool:
|
|
65
|
+
"""
|
|
66
|
+
To calculate the probability of dropping an incoming submission we compare
|
|
67
|
+
the number returned by random() which will be in the range [0,1) and the
|
|
68
|
+
number returned by tanh() which will be in the range (-1,1).
|
|
69
|
+
|
|
70
|
+
If length is less than maximum the number returned by tanh will be negative
|
|
71
|
+
and so drop will always return False since the value returned by random()
|
|
72
|
+
cannot be less than 0.
|
|
73
|
+
|
|
74
|
+
If length is greater than maximum, drop will return False with a probability
|
|
75
|
+
that increases as the distance between maximum and length increases:
|
|
76
|
+
|
|
77
|
+
Length Chance of Dropping
|
|
78
|
+
|
|
79
|
+
<= maximum 0
|
|
80
|
+
1.5 * maximum 0.76
|
|
81
|
+
2 * maximum 0.96
|
|
82
|
+
3 * maximum 0.999
|
|
83
|
+
"""
|
|
84
|
+
return random() < drop_chance(length, maximum)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@odm.model()
|
|
88
|
+
class IngestTask(odm.Model):
|
|
89
|
+
# Submission Parameters
|
|
90
|
+
submission: MessageSubmission = odm.compound(MessageSubmission)
|
|
91
|
+
|
|
92
|
+
# Shortcut for properties of the submission
|
|
93
|
+
@property
|
|
94
|
+
def file_size(self) -> int:
|
|
95
|
+
return sum(file.size for file in self.submission.files)
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def params(self) -> SubmissionParams:
|
|
99
|
+
return self.submission.params
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def sha256(self) -> str:
|
|
103
|
+
return self.submission.files[0].sha256
|
|
104
|
+
|
|
105
|
+
# Information about the ingestion itself, parameters irrelevant
|
|
106
|
+
retries = odm.Integer(default=0)
|
|
107
|
+
|
|
108
|
+
# Fields added after a submission is complete for notification/bookkeeping processes
|
|
109
|
+
failure = odm.Text(default='') # If the ingestion has failed for some reason, what is it?
|
|
110
|
+
score = odm.Optional(odm.Integer()) # Score from previous processing of this file
|
|
111
|
+
extended_scan = odm.Enum(EXTENDED_SCAN_VALUES, default="skipped") # Status of the extended scan
|
|
112
|
+
ingest_id = odm.UUID() # Ingestion Identifier
|
|
113
|
+
ingest_time = odm.Date(default="NOW") # Time at which the file was ingested
|
|
114
|
+
notify_time = odm.Optional(odm.Date()) # Time at which the user is notify the submission is finished
|
|
115
|
+
to_ingest = odm.Boolean(default=False)
|
|
116
|
+
|
|
@@ -6,6 +6,7 @@ from assemblyline.common import forge, metrics
|
|
|
6
6
|
from assemblyline.common.archiving import ARCHIVE_QUEUE_NAME
|
|
7
7
|
from assemblyline.common.constants import DISPATCH_TASK_HASH, SUBMISSION_QUEUE, \
|
|
8
8
|
SERVICE_STATE_HASH, ServiceStatus
|
|
9
|
+
from assemblyline.common.dispatcher import Dispatcher
|
|
9
10
|
from assemblyline.datastore.exceptions import SearchException
|
|
10
11
|
from assemblyline.odm.messages.retrohunt_heartbeat import RetrohuntMessage
|
|
11
12
|
from assemblyline.odm.messages.scaler_heartbeat import ScalerMessage
|
|
@@ -25,7 +26,6 @@ from assemblyline.remote.datatypes.queues.named import NamedQueue
|
|
|
25
26
|
from assemblyline.remote.datatypes.queues.priority import PriorityQueue
|
|
26
27
|
|
|
27
28
|
from assemblyline_core.alerter.run_alerter import ALERT_QUEUE_NAME, ALERT_RETRY_QUEUE_NAME
|
|
28
|
-
from assemblyline_core.dispatching.dispatcher import Dispatcher
|
|
29
29
|
from assemblyline_core.ingester import INGEST_QUEUE_NAME, drop_chance
|
|
30
30
|
from assemblyline_core.ingester.constants import COMPLETE_QUEUE_NAME
|
|
31
31
|
|
{assemblyline_core-4.6.1.dev163 → assemblyline_core-4.7.0.dev45}/assemblyline_core/replay/client.py
RENAMED
|
@@ -297,11 +297,14 @@ class APIClient(ClientBase):
|
|
|
297
297
|
self.al_client.bundle.create(id, output=bundle_path, use_alert=use_alert)
|
|
298
298
|
|
|
299
299
|
def load_bundle(self, bundle_path, min_classification, rescan_services, exist_ok=True, reclassification=None):
|
|
300
|
-
self.al_client.bundle.import_bundle(
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
300
|
+
self.al_client.bundle.import_bundle(
|
|
301
|
+
bundle_path,
|
|
302
|
+
min_classification=min_classification,
|
|
303
|
+
rescan_services=rescan_services,
|
|
304
|
+
exist_ok=exist_ok,
|
|
305
|
+
reclassification=reclassification,
|
|
306
|
+
to_ingest=True, # send submissions to ingester
|
|
307
|
+
)
|
|
305
308
|
|
|
306
309
|
def load_json(self, file_path, reclassification=None):
|
|
307
310
|
from assemblyline_client import ClientError
|
|
@@ -412,11 +415,14 @@ class DirectClient(ClientBase):
|
|
|
412
415
|
os.rename(temp_bundle_file, bundle_path)
|
|
413
416
|
|
|
414
417
|
def load_bundle(self, bundle_path, min_classification, rescan_services, exist_ok=True, reclassification=None):
|
|
415
|
-
import_bundle(
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
418
|
+
import_bundle(
|
|
419
|
+
bundle_path,
|
|
420
|
+
min_classification=min_classification,
|
|
421
|
+
rescan_services=rescan_services,
|
|
422
|
+
exist_ok=exist_ok,
|
|
423
|
+
reclassification=reclassification,
|
|
424
|
+
to_ingest=True, # send submissions to ingester
|
|
425
|
+
)
|
|
420
426
|
|
|
421
427
|
def load_json(self, file_path, reclassification=None):
|
|
422
428
|
# We're assuming all JSON that loaded has an "enabled" field
|
|
@@ -442,7 +448,6 @@ class DirectClient(ClientBase):
|
|
|
442
448
|
else:
|
|
443
449
|
raise
|
|
444
450
|
|
|
445
|
-
|
|
446
451
|
if collection == "workflow":
|
|
447
452
|
# If there has been any edits by another user, then preserve the enabled state
|
|
448
453
|
# Otherwise, the workflow will be synchronized with the origin system
|
|
@@ -14,6 +14,12 @@ from time import sleep
|
|
|
14
14
|
from typing import List, Optional, Tuple
|
|
15
15
|
|
|
16
16
|
import urllib3
|
|
17
|
+
from assemblyline.odm.models.config import Selector
|
|
18
|
+
from assemblyline.odm.models.service import (
|
|
19
|
+
DependencyConfig,
|
|
20
|
+
DockerConfig,
|
|
21
|
+
PersistentVolume,
|
|
22
|
+
)
|
|
17
23
|
from cryptography import x509
|
|
18
24
|
from cryptography.hazmat.primitives import hashes, serialization
|
|
19
25
|
from cryptography.hazmat.primitives.asymmetric import rsa
|
|
@@ -22,6 +28,7 @@ from kubernetes import client, config, watch
|
|
|
22
28
|
from kubernetes.client import (
|
|
23
29
|
V1Affinity,
|
|
24
30
|
V1Capabilities,
|
|
31
|
+
V1ConfigMap,
|
|
25
32
|
V1ConfigMapVolumeSource,
|
|
26
33
|
V1Container,
|
|
27
34
|
V1Deployment,
|
|
@@ -62,12 +69,6 @@ from kubernetes.client import (
|
|
|
62
69
|
)
|
|
63
70
|
from kubernetes.client.rest import ApiException
|
|
64
71
|
|
|
65
|
-
from assemblyline.odm.models.config import Selector
|
|
66
|
-
from assemblyline.odm.models.service import (
|
|
67
|
-
DependencyConfig,
|
|
68
|
-
DockerConfig,
|
|
69
|
-
PersistentVolume,
|
|
70
|
-
)
|
|
71
72
|
from assemblyline_core.scaler.controllers.interface import ControllerInterface
|
|
72
73
|
|
|
73
74
|
# RESERVE_MEMORY_PER_NODE = os.environ.get('RESERVE_MEMORY_PER_NODE')
|
|
@@ -390,7 +391,7 @@ class KubernetesController(ControllerInterface):
|
|
|
390
391
|
def _dependency_name(self, service_name: str, container_name: str):
|
|
391
392
|
return f"{self._deployment_name(service_name)}-{container_name}".lower()
|
|
392
393
|
|
|
393
|
-
def add_config_mount(self, name: str, config_map: str, key: str, target_path: str, read_only=True, core=False):
|
|
394
|
+
def add_config_mount(self, name: str, config_map: str, key: Optional[str], target_path: str, read_only=True, core=False):
|
|
394
395
|
volumes, mounts = self.volumes, self.mounts
|
|
395
396
|
if core:
|
|
396
397
|
volumes, mounts = self.core_volumes, self.core_mounts
|
|
@@ -1379,3 +1380,19 @@ class KubernetesController(ControllerInterface):
|
|
|
1379
1380
|
for np in (existing_netpol - {np.metadata.name for np in network_policies}):
|
|
1380
1381
|
self.net_api.delete_namespaced_network_policy(namespace=self.namespace, name=np,
|
|
1381
1382
|
_request_timeout=API_TIMEOUT)
|
|
1383
|
+
|
|
1384
|
+
def update_config_map(self, data: dict, name: str):
|
|
1385
|
+
"""Update or create a ConfigMap in Kubernetes."""
|
|
1386
|
+
config_map = V1ConfigMap(
|
|
1387
|
+
metadata=V1ObjectMeta(name=name, namespace=self.namespace),
|
|
1388
|
+
data=data
|
|
1389
|
+
)
|
|
1390
|
+
try:
|
|
1391
|
+
self.api.patch_namespaced_config_map(name=name, namespace=self.namespace, body=config_map,
|
|
1392
|
+
_request_timeout=API_TIMEOUT)
|
|
1393
|
+
except ApiException as error:
|
|
1394
|
+
if error.status == 404:
|
|
1395
|
+
self.api.create_namespaced_config_map(namespace=self.namespace, body=config_map,
|
|
1396
|
+
_request_timeout=API_TIMEOUT)
|
|
1397
|
+
else:
|
|
1398
|
+
raise
|
|
@@ -19,7 +19,6 @@ from typing import Any, Dict, Optional
|
|
|
19
19
|
|
|
20
20
|
import elasticapm
|
|
21
21
|
import yaml
|
|
22
|
-
|
|
23
22
|
from assemblyline.common.constants import (
|
|
24
23
|
SCALER_TIMEOUT_QUEUE,
|
|
25
24
|
SERVICE_STATE_HASH,
|
|
@@ -44,14 +43,13 @@ from assemblyline.remote.datatypes.hash import ExpiringHash, Hash
|
|
|
44
43
|
from assemblyline.remote.datatypes.queues.named import NamedQueue
|
|
45
44
|
from assemblyline.remote.datatypes.queues.priority import PriorityQueue
|
|
46
45
|
from assemblyline.remote.datatypes.queues.priority import length as pq_length
|
|
47
|
-
|
|
46
|
+
|
|
47
|
+
from assemblyline_core.scaler import collection
|
|
48
|
+
from assemblyline_core.scaler.controllers import DockerController, KubernetesController
|
|
48
49
|
from assemblyline_core.scaler.controllers.interface import ServiceControlError
|
|
49
50
|
from assemblyline_core.server_base import ServiceStage, ThreadedCoreBase
|
|
50
51
|
from assemblyline_core.updater.helper import get_registry_config
|
|
51
52
|
|
|
52
|
-
from . import collection
|
|
53
|
-
from .controllers import DockerController
|
|
54
|
-
|
|
55
53
|
APM_SPAN_TYPE = 'scaler'
|
|
56
54
|
|
|
57
55
|
# How often (in seconds) to download new service data, try to scale managed services,
|
|
@@ -298,6 +296,16 @@ class ScalerServer(ThreadedCoreBase):
|
|
|
298
296
|
# be shared with privileged services.
|
|
299
297
|
pass
|
|
300
298
|
|
|
299
|
+
# Create a configuration file specifically meant for privileged services to consume
|
|
300
|
+
# This should only contain the relevant information to connect to the databases
|
|
301
|
+
privileged_config = yaml.dump({
|
|
302
|
+
'datastore': self.config.datastore.as_primitives(),
|
|
303
|
+
'filestore': self.config.filestore.as_primitives(),
|
|
304
|
+
'core': {
|
|
305
|
+
'redis': self.config.core.redis.as_primitives()
|
|
306
|
+
}
|
|
307
|
+
})
|
|
308
|
+
|
|
301
309
|
labels = {
|
|
302
310
|
'app': 'assemblyline',
|
|
303
311
|
'section': 'service',
|
|
@@ -340,7 +348,9 @@ class ScalerServer(ThreadedCoreBase):
|
|
|
340
348
|
)
|
|
341
349
|
|
|
342
350
|
# Add global configuration for privileged services
|
|
343
|
-
|
|
351
|
+
# Check if the ConfigMap already exists, if it does, update it
|
|
352
|
+
self.controller.update_config_map(data={'config': privileged_config}, name='privileged-service-config')
|
|
353
|
+
self.controller.add_config_mount(KUBERNETES_AL_CONFIG, config_map='privileged-service-config', key="config",
|
|
344
354
|
target_path="/etc/assemblyline/config.yml", read_only=True, core=True)
|
|
345
355
|
|
|
346
356
|
# If we're passed an override for server-server and it's defining an HTTPS connection, then add a global
|
|
@@ -382,7 +392,7 @@ class ScalerServer(ThreadedCoreBase):
|
|
|
382
392
|
|
|
383
393
|
with open(os.path.join(DOCKER_CONFIGURATION_PATH, 'config.yml'), 'w') as handle:
|
|
384
394
|
# Convert to JSON before converting to YAML to account for direct ODM representation errors
|
|
385
|
-
|
|
395
|
+
handle.write(privileged_config)
|
|
386
396
|
|
|
387
397
|
with open(os.path.join(DOCKER_CONFIGURATION_PATH, 'classification.yml'), 'w') as handle:
|
|
388
398
|
yaml.dump(get_classification().original_definition, handle)
|