assemblyline-core 4.5.0.1__tar.gz → 4.5.1.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of assemblyline-core might be problematic. Click here for more details.
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/PKG-INFO +1 -1
- assemblyline-core-4.5.1.dev0/assemblyline_core/VERSION +1 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/dispatching/client.py +1 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/dispatching/dispatcher.py +315 -63
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/replay/loader/run.py +1 -1
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/scaler/controllers/interface.py +10 -1
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/scaler/controllers/kubernetes_ctl.py +70 -7
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/scaler/scaler_server.py +36 -5
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/updater/helper.py +94 -70
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/updater/run_updater.py +3 -3
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core.egg-info/PKG-INFO +1 -1
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/test/test_scheduler.py +2 -1
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/test/test_simulation.py +209 -7
- assemblyline-core-4.5.0.1/assemblyline_core/VERSION +0 -1
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/LICENCE.md +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/README.md +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/__init__.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/alerter/__init__.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/alerter/processing.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/alerter/run_alerter.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/archiver/__init__.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/archiver/run_archiver.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/badlist_client.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/dispatching/__init__.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/dispatching/__main__.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/dispatching/schedules.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/dispatching/timeout.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/expiry/__init__.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/expiry/run_expiry.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/ingester/__init__.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/ingester/__main__.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/ingester/constants.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/ingester/ingester.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/metrics/__init__.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/metrics/es_metrics.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/metrics/heartbeat_formatter.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/metrics/helper.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/metrics/metrics_server.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/metrics/run_heartbeat_manager.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/metrics/run_metrics_aggregator.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/metrics/run_statistics_aggregator.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/plumber/__init__.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/plumber/run_plumber.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/replay/__init__.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/replay/client.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/replay/creator/__init__.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/replay/creator/run.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/replay/creator/run_worker.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/replay/loader/__init__.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/replay/loader/run_worker.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/replay/replay.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/safelist_client.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/scaler/__init__.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/scaler/collection.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/scaler/controllers/__init__.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/scaler/controllers/docker_ctl.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/scaler/run_scaler.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/server_base.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/signature_client.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/submission_client.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/tasking_client.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/updater/__init__.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/vacuum/__init__.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/vacuum/crawler.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/vacuum/department_map.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/vacuum/safelist.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/vacuum/stream_map.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/vacuum/worker.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/workflow/__init__.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core/workflow/run_workflow.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core.egg-info/SOURCES.txt +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core.egg-info/dependency_links.txt +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core.egg-info/requires.txt +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/assemblyline_core.egg-info/top_level.txt +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/setup.cfg +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/setup.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/test/test_alerter.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/test/test_badlist_client.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/test/test_dispatcher.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/test/test_expiry.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/test/test_plumber.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/test/test_replay.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/test/test_safelist_client.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/test/test_scaler.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/test/test_signature_client.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/test/test_vacuum.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/test/test_worker_ingest.py +0 -0
- {assemblyline-core-4.5.0.1 → assemblyline-core-4.5.1.dev0}/test/test_worker_submit.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
4.5.1.dev0
|
|
@@ -42,17 +42,16 @@ from assemblyline.remote.datatypes.queues.named import NamedQueue
|
|
|
42
42
|
from assemblyline.remote.datatypes.set import ExpiringSet, Set
|
|
43
43
|
from assemblyline.remote.datatypes.user_quota_tracker import UserQuotaTracker
|
|
44
44
|
from assemblyline_core.server_base import ThreadedCoreBase
|
|
45
|
-
from assemblyline_core.alerter.run_alerter import ALERT_QUEUE_NAME
|
|
46
45
|
|
|
46
|
+
from .schedules import Scheduler
|
|
47
|
+
from .timeout import TimeoutTable
|
|
48
|
+
from ..ingester.constants import COMPLETE_QUEUE_NAME
|
|
47
49
|
|
|
48
50
|
if TYPE_CHECKING:
|
|
49
51
|
from assemblyline.odm.models.file import File
|
|
52
|
+
from redis import Redis
|
|
50
53
|
|
|
51
54
|
|
|
52
|
-
from .schedules import Scheduler
|
|
53
|
-
from .timeout import TimeoutTable
|
|
54
|
-
from ..ingester.constants import COMPLETE_QUEUE_NAME
|
|
55
|
-
|
|
56
55
|
APM_SPAN_TYPE = 'handle_message'
|
|
57
56
|
|
|
58
57
|
AL_SHUTDOWN_GRACE = int(os.environ.get('AL_SHUTDOWN_GRACE', '60'))
|
|
@@ -64,6 +63,12 @@ DYNAMIC_ANALYSIS_CATEGORY = 'Dynamic Analysis'
|
|
|
64
63
|
DAY_IN_SECONDS = 24 * 60 * 60
|
|
65
64
|
|
|
66
65
|
|
|
66
|
+
class KeyType(enum.Enum):
|
|
67
|
+
OVERWRITE = 'overwrite'
|
|
68
|
+
UNION = 'union'
|
|
69
|
+
IGNORE = 'ignore'
|
|
70
|
+
|
|
71
|
+
|
|
67
72
|
class Action(enum.IntEnum):
|
|
68
73
|
start = 0
|
|
69
74
|
result = 1
|
|
@@ -84,6 +89,19 @@ class DispatchAction:
|
|
|
84
89
|
event: Optional[threading.Event] = dataclasses.field(compare=False, default=None)
|
|
85
90
|
|
|
86
91
|
|
|
92
|
+
@dataclasses.dataclass()
|
|
93
|
+
class MonitorTask:
|
|
94
|
+
"""Tracks whether a task needs to be rerun based on """
|
|
95
|
+
# Service name
|
|
96
|
+
service: str
|
|
97
|
+
# sha256 of file in question
|
|
98
|
+
sha: str
|
|
99
|
+
# The temporary values this task was last dispatached with
|
|
100
|
+
values: dict[str, Optional[str]]
|
|
101
|
+
# Should aservice be dispatched again when possible
|
|
102
|
+
dispatch_needed: bool = dataclasses.field(default=False)
|
|
103
|
+
|
|
104
|
+
|
|
87
105
|
@contextmanager
|
|
88
106
|
def apm_span(client, span_name: str):
|
|
89
107
|
try:
|
|
@@ -99,13 +117,156 @@ def apm_span(client, span_name: str):
|
|
|
99
117
|
|
|
100
118
|
|
|
101
119
|
class ResultSummary:
|
|
102
|
-
def __init__(self, key, drop, score, children):
|
|
120
|
+
def __init__(self, key, drop, score, children, partial=False) -> None:
|
|
103
121
|
self.key: str = key
|
|
104
122
|
self.drop: bool = drop
|
|
123
|
+
self.partial: bool = partial
|
|
105
124
|
self.score: int = score
|
|
106
125
|
self.children: list[tuple[str, str]] = children
|
|
107
126
|
|
|
108
127
|
|
|
128
|
+
class TemporaryFileData:
|
|
129
|
+
def __init__(self, sha256: str) -> None:
|
|
130
|
+
self.sha256 = sha256
|
|
131
|
+
self.parents: list[TemporaryFileData] = []
|
|
132
|
+
self.children: list[TemporaryFileData] = []
|
|
133
|
+
self.parent_cache: dict[str, Any] = {}
|
|
134
|
+
self.local_values: dict[str, Any] = {}
|
|
135
|
+
|
|
136
|
+
def add_parent(self, parent_temp: TemporaryFileData):
|
|
137
|
+
"""Add a parent to this node."""
|
|
138
|
+
self.parents.append(parent_temp)
|
|
139
|
+
parent_temp.children.append(self)
|
|
140
|
+
|
|
141
|
+
def new_child(self, child: str) -> TemporaryFileData:
|
|
142
|
+
"""Create a linked entry for a new child."""
|
|
143
|
+
temp = TemporaryFileData(child)
|
|
144
|
+
temp.parents.append(self)
|
|
145
|
+
self.children.append(temp)
|
|
146
|
+
temp.build_parent_cache()
|
|
147
|
+
return temp
|
|
148
|
+
|
|
149
|
+
def build_parent_cache(self):
|
|
150
|
+
"""Rebuild the cache of data from parent files."""
|
|
151
|
+
self.parent_cache.clear()
|
|
152
|
+
for parent in self.parents:
|
|
153
|
+
self.parent_cache.update(parent.read())
|
|
154
|
+
|
|
155
|
+
def read(self) -> dict[str, Any]:
|
|
156
|
+
"""Get a copy of the current data"""
|
|
157
|
+
# Start with a shallow copy ofthe parent cache
|
|
158
|
+
data = dict(self.parent_cache)
|
|
159
|
+
|
|
160
|
+
# update, this overwrites any common keys (we want this)
|
|
161
|
+
data.update(self.local_values)
|
|
162
|
+
return data
|
|
163
|
+
|
|
164
|
+
def read_key(self, key: str) -> Any:
|
|
165
|
+
"""Get a copy of the current data"""
|
|
166
|
+
try:
|
|
167
|
+
return self.local_values[key]
|
|
168
|
+
except KeyError:
|
|
169
|
+
return self.parent_cache.get(key)
|
|
170
|
+
|
|
171
|
+
def set_value(self, key: str, value: str) -> set[str]:
|
|
172
|
+
"""Using a SET operation update the value on this node and all children.
|
|
173
|
+
|
|
174
|
+
Returns a list of the sha of all files who's temporary data has been modified.
|
|
175
|
+
"""
|
|
176
|
+
# Check if the local value doesn't change then we won't have any effect on children
|
|
177
|
+
old = self.local_values.get(key)
|
|
178
|
+
if type(old) is type(value) and old == value:
|
|
179
|
+
return set()
|
|
180
|
+
|
|
181
|
+
# Update the local value and recurse into children
|
|
182
|
+
self.local_values[key] = value
|
|
183
|
+
changed = [self.sha256]
|
|
184
|
+
for child in self.children:
|
|
185
|
+
changed.extend(child.set_value_from_ancestor(key, value))
|
|
186
|
+
return set(changed)
|
|
187
|
+
|
|
188
|
+
def set_value_from_ancestor(self, key: str, value: str) -> set[str]:
|
|
189
|
+
"""Given that an ancestor has changed, test if this file's temporary data will change also."""
|
|
190
|
+
# If this child has already set this key, the parent values don't matter
|
|
191
|
+
if key in self.local_values:
|
|
192
|
+
return set()
|
|
193
|
+
|
|
194
|
+
# If the parent value was already set to this nothing has changed
|
|
195
|
+
old = self.parent_cache.get(key)
|
|
196
|
+
if type(old) is type(value) and old == value:
|
|
197
|
+
return set()
|
|
198
|
+
|
|
199
|
+
# Update the parent cache and recurse into children
|
|
200
|
+
self.parent_cache[key] = value
|
|
201
|
+
changed = [self.sha256]
|
|
202
|
+
for child in self.children:
|
|
203
|
+
changed.extend(child.set_value_from_ancestor(key, value))
|
|
204
|
+
return set(changed)
|
|
205
|
+
|
|
206
|
+
def union_value(self, key: str, value: set[str]) -> set[str]:
|
|
207
|
+
"""Using a MERGE operation update the value on this node and all children.
|
|
208
|
+
|
|
209
|
+
Returns a list of the sha of all files who's temporary data has been modified.
|
|
210
|
+
"""
|
|
211
|
+
if not value:
|
|
212
|
+
return set()
|
|
213
|
+
|
|
214
|
+
# Check if the local value doesn't change then we won't have any effect on children
|
|
215
|
+
new_value = merge_in_values(self.local_values.get(key), value)
|
|
216
|
+
if new_value is None:
|
|
217
|
+
return set()
|
|
218
|
+
|
|
219
|
+
# Update the local value and recurse into children
|
|
220
|
+
self.local_values[key] = new_value
|
|
221
|
+
changed = [self.sha256]
|
|
222
|
+
for child in self.children:
|
|
223
|
+
changed.extend(child.union_value_from_ancestor(key, value))
|
|
224
|
+
return set(changed)
|
|
225
|
+
|
|
226
|
+
def union_value_from_ancestor(self, key: str, value: set[str]) -> set[str]:
|
|
227
|
+
"""Given that an ancestor has changed, test if this file's temporary data will change also.
|
|
228
|
+
|
|
229
|
+
For values updated by union the parent and local values are the same.
|
|
230
|
+
"""
|
|
231
|
+
# Merge in data to parent cache, we won't be reading from it, but we still want to keep it
|
|
232
|
+
# up to date and use it to check if changes are needed
|
|
233
|
+
new_value = merge_in_values(self.parent_cache.get(key), value)
|
|
234
|
+
if new_value is None:
|
|
235
|
+
return set()
|
|
236
|
+
self.parent_cache[key] = new_value
|
|
237
|
+
|
|
238
|
+
# Update the local values as well if we need to
|
|
239
|
+
new_value = merge_in_values(self.local_values.get(key), value)
|
|
240
|
+
if new_value is None:
|
|
241
|
+
return set()
|
|
242
|
+
self.local_values[key] = new_value
|
|
243
|
+
|
|
244
|
+
# Since we did change the local value, pass the new set down to children
|
|
245
|
+
changed = [self.sha256]
|
|
246
|
+
for child in self.children:
|
|
247
|
+
changed.extend(child.union_value_from_ancestor(key, value))
|
|
248
|
+
return set(changed)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def merge_in_values(old_values: Any, new_values: set[str]) -> Optional[list[str]]:
|
|
252
|
+
"""Merge in new values into a json list.
|
|
253
|
+
|
|
254
|
+
If there is no new values return None.
|
|
255
|
+
"""
|
|
256
|
+
# Read out the old value set
|
|
257
|
+
if isinstance(old_values, (list, set)):
|
|
258
|
+
old_values = set(old_values)
|
|
259
|
+
else:
|
|
260
|
+
old_values = set()
|
|
261
|
+
|
|
262
|
+
# If we have no new values to merge in
|
|
263
|
+
if new_values <= old_values:
|
|
264
|
+
return None
|
|
265
|
+
|
|
266
|
+
# We have new values, build a new set
|
|
267
|
+
return list(new_values | old_values)
|
|
268
|
+
|
|
269
|
+
|
|
109
270
|
class SubmissionTask:
|
|
110
271
|
"""Dispatcher internal model for submissions"""
|
|
111
272
|
|
|
@@ -126,12 +287,13 @@ class SubmissionTask:
|
|
|
126
287
|
self.file_schedules: dict[str, list[dict[str, Service]]] = {}
|
|
127
288
|
self.file_tags: dict[str, dict[str, dict[str, Any]]] = defaultdict(dict)
|
|
128
289
|
self.file_depth: dict[str, int] = {}
|
|
129
|
-
self.
|
|
290
|
+
self.temporary_data: dict[str, TemporaryFileData] = {}
|
|
130
291
|
self.extra_errors: list[str] = []
|
|
131
292
|
self.active_files: set[str] = set()
|
|
132
293
|
self.dropped_files: set[str] = set()
|
|
133
294
|
self.dynamic_recursion_bypass: set[str] = set()
|
|
134
295
|
self.service_logs: dict[tuple[str, str], list[str]] = defaultdict(list)
|
|
296
|
+
self.monitoring: dict[tuple[str, str], MonitorTask] = {}
|
|
135
297
|
|
|
136
298
|
# mapping from file hash to a set of services that shouldn't be run on
|
|
137
299
|
# any children (recursively) of that file
|
|
@@ -178,15 +340,15 @@ class SubmissionTask:
|
|
|
178
340
|
children_detail: list[tuple[str, str]] = [(r['sha256'], r['parent_relation']) for r in extracted]
|
|
179
341
|
self.service_results[(sha256, service)] = ResultSummary(
|
|
180
342
|
key=k, drop=result['drop_file'], score=result['result']['score'],
|
|
181
|
-
children=children_detail)
|
|
343
|
+
children=children_detail, partial=result.get('partial', False))
|
|
182
344
|
|
|
183
345
|
tags = Result(result).scored_tag_dict()
|
|
184
|
-
for key in tags.
|
|
346
|
+
for key, tag in tags.items():
|
|
185
347
|
if key in self.file_tags[sha256].keys():
|
|
186
348
|
# Sum score of already known tags
|
|
187
|
-
self.file_tags[sha256][key]['score'] +=
|
|
349
|
+
self.file_tags[sha256][key]['score'] += tag['score']
|
|
188
350
|
else:
|
|
189
|
-
self.file_tags[sha256][key] =
|
|
351
|
+
self.file_tags[sha256][key] = tag
|
|
190
352
|
|
|
191
353
|
if errors is not None:
|
|
192
354
|
for e in errors:
|
|
@@ -195,6 +357,7 @@ class SubmissionTask:
|
|
|
195
357
|
|
|
196
358
|
@property
|
|
197
359
|
def sid(self) -> str:
|
|
360
|
+
"""Shortcut to read submission SID"""
|
|
198
361
|
return self.submission.sid
|
|
199
362
|
|
|
200
363
|
def forbid_for_children(self, sha256: str, service_name: str):
|
|
@@ -206,16 +369,23 @@ class SubmissionTask:
|
|
|
206
369
|
|
|
207
370
|
def register_children(self, parent: str, children: list[str]):
|
|
208
371
|
"""
|
|
209
|
-
Note
|
|
210
|
-
|
|
372
|
+
Note which files extracted other files.
|
|
373
|
+
_parent_map is for dynamic recursion prevention
|
|
374
|
+
temporary_data is for cascading the temp data to children
|
|
211
375
|
"""
|
|
376
|
+
parent_temp = self.temporary_data[parent]
|
|
212
377
|
for child in children:
|
|
378
|
+
try:
|
|
379
|
+
self.temporary_data[child].add_parent(parent_temp)
|
|
380
|
+
except KeyError:
|
|
381
|
+
self.temporary_data[child] = parent_temp.new_child(child)
|
|
213
382
|
try:
|
|
214
383
|
self._parent_map[child].add(parent)
|
|
215
384
|
except KeyError:
|
|
216
385
|
self._parent_map[child] = {parent}
|
|
217
386
|
|
|
218
387
|
def all_ancestors(self, sha256: str) -> list[str]:
|
|
388
|
+
"""Collect all the known ancestors of the given file within this submission."""
|
|
219
389
|
visited = set()
|
|
220
390
|
to_visit = [sha256]
|
|
221
391
|
while len(to_visit) > 0:
|
|
@@ -239,6 +409,64 @@ class SubmissionTask:
|
|
|
239
409
|
for parent in self.all_ancestors(sha256)
|
|
240
410
|
]))
|
|
241
411
|
|
|
412
|
+
def set_monitoring_entry(self, sha256: str, service_name: str, values: dict[str, Optional[str]]):
|
|
413
|
+
"""A service with monitoring has dispatched, keep track of the conditions."""
|
|
414
|
+
self.monitoring[(sha256, service_name)] = MonitorTask(
|
|
415
|
+
service=service_name,
|
|
416
|
+
sha=sha256,
|
|
417
|
+
values=values,
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
def partial_result(self, sha256, service_name):
|
|
421
|
+
"""Note that a partial result has been recieved. If a dispatch was requested process that now."""
|
|
422
|
+
try:
|
|
423
|
+
entry = self.monitoring[(sha256, service_name)]
|
|
424
|
+
except KeyError:
|
|
425
|
+
return
|
|
426
|
+
|
|
427
|
+
if entry.dispatch_needed:
|
|
428
|
+
self.redispatch_service(sha256, service_name)
|
|
429
|
+
|
|
430
|
+
def clear_monitoring_entry(self, sha256, service_name):
|
|
431
|
+
"""A service has completed normally. If the service is monitoring clear out the record."""
|
|
432
|
+
# We have an incoming non-partial result, flush out any partial monitoring
|
|
433
|
+
self.monitoring.pop((sha256, service_name), None)
|
|
434
|
+
# If there is a partial result for this service flush that as well so we accept this new result
|
|
435
|
+
result = self.service_results.get((sha256, service_name))
|
|
436
|
+
if result and result.partial:
|
|
437
|
+
self.service_results.pop((sha256, service_name), None)
|
|
438
|
+
|
|
439
|
+
def file_temporary_data_changed(self, changed_sha256: set[str], key: str) -> list[str]:
|
|
440
|
+
"""Check all of the monitored tasks on that key for changes. Redispatch as needed."""
|
|
441
|
+
changed = []
|
|
442
|
+
for (sha256, service), entry in self.monitoring.items():
|
|
443
|
+
if sha256 not in changed_sha256:
|
|
444
|
+
continue
|
|
445
|
+
|
|
446
|
+
value = self.temporary_data[sha256].read_key(key)
|
|
447
|
+
dispatched_value = entry.values.get(key)
|
|
448
|
+
|
|
449
|
+
if type(value) is not type(dispatched_value) or value != dispatched_value:
|
|
450
|
+
result = self.service_results.get((sha256, service))
|
|
451
|
+
if not result:
|
|
452
|
+
entry.dispatch_needed = True
|
|
453
|
+
else:
|
|
454
|
+
self.redispatch_service(sha256, service)
|
|
455
|
+
changed.append(sha256)
|
|
456
|
+
return changed
|
|
457
|
+
|
|
458
|
+
def redispatch_service(self, sha256, service_name):
|
|
459
|
+
# Clear the result if its partial or an error
|
|
460
|
+
result = self.service_results.get((sha256, service_name))
|
|
461
|
+
if result and not result.partial:
|
|
462
|
+
return
|
|
463
|
+
self.service_results.pop((sha256, service_name), None)
|
|
464
|
+
self.service_errors.pop((sha256, service_name), None)
|
|
465
|
+
self.service_attempts[(sha256, service_name)] = 1
|
|
466
|
+
|
|
467
|
+
# Try to get the service to run again by reseting the schedule for that service
|
|
468
|
+
self.file_schedules.pop(sha256, None)
|
|
469
|
+
|
|
242
470
|
|
|
243
471
|
DISPATCH_TASK_ASSIGNMENT = 'dispatcher-tasks-assigned-to-'
|
|
244
472
|
TASK_ASSIGNMENT_PATTERN = DISPATCH_TASK_ASSIGNMENT + '*'
|
|
@@ -267,7 +495,7 @@ SUBMISSION_TOTAL_TIMEOUT = 60 * 20
|
|
|
267
495
|
|
|
268
496
|
class Dispatcher(ThreadedCoreBase):
|
|
269
497
|
@staticmethod
|
|
270
|
-
def all_instances(persistent_redis):
|
|
498
|
+
def all_instances(persistent_redis: Redis):
|
|
271
499
|
return Hash(DISPATCH_DIRECTORY, host=persistent_redis).keys()
|
|
272
500
|
|
|
273
501
|
@staticmethod
|
|
@@ -287,7 +515,7 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
287
515
|
}
|
|
288
516
|
|
|
289
517
|
def __init__(self, datastore=None, redis=None, redis_persist=None, logger=None,
|
|
290
|
-
config=None, counter_name='dispatcher'):
|
|
518
|
+
config=None, counter_name: str = 'dispatcher'):
|
|
291
519
|
super().__init__('assemblyline.dispatcher', config=config, datastore=datastore,
|
|
292
520
|
redis=redis, redis_persist=redis_persist, logger=logger)
|
|
293
521
|
|
|
@@ -297,8 +525,7 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
297
525
|
self.finalizing = threading.Event()
|
|
298
526
|
self.finalizing_start = 0.0
|
|
299
527
|
|
|
300
|
-
#
|
|
301
|
-
# # Build some utility classes
|
|
528
|
+
# Build some utility classes
|
|
302
529
|
self.scheduler = Scheduler(self.datastore, self.config, self.redis)
|
|
303
530
|
self.running_tasks = Hash(DISPATCH_RUNNING_TASK_HASH, host=self.redis)
|
|
304
531
|
self.scaler_timeout_queue = NamedQueue(SCALER_TIMEOUT_QUEUE, host=self.redis_persist)
|
|
@@ -321,12 +548,12 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
321
548
|
self.ingester_scanning = Hash('m-scanning-table', self.redis_persist)
|
|
322
549
|
|
|
323
550
|
# Communications queues
|
|
324
|
-
self.start_queue
|
|
325
|
-
|
|
326
|
-
self.
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
551
|
+
self.start_queue: NamedQueue[tuple[str, str, str, str]] =\
|
|
552
|
+
NamedQueue(DISPATCH_START_EVENTS+self.instance_id, host=self.redis, ttl=QUEUE_EXPIRY)
|
|
553
|
+
self.result_queue: NamedQueue[dict] =\
|
|
554
|
+
NamedQueue(DISPATCH_RESULT_QUEUE+self.instance_id, host=self.redis, ttl=QUEUE_EXPIRY)
|
|
555
|
+
self.command_queue: NamedQueue[dict] =\
|
|
556
|
+
NamedQueue(DISPATCH_COMMAND_QUEUE+self.instance_id, host=self.redis, ttl=QUEUE_EXPIRY)
|
|
330
557
|
|
|
331
558
|
# Publish counters to the metrics sink.
|
|
332
559
|
self.counter = MetricsFactory(metrics_type='dispatcher', schema=Metrics, name=counter_name,
|
|
@@ -387,11 +614,16 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
387
614
|
_q = self.find_process_queue(sid)
|
|
388
615
|
_q.put(DispatchAction(kind=Action.check_submission, sid=sid))
|
|
389
616
|
|
|
390
|
-
def _handle_service_change_event(self, data: ServiceChange):
|
|
617
|
+
def _handle_service_change_event(self, data: Optional[ServiceChange]):
|
|
618
|
+
if not data:
|
|
619
|
+
# We may have missed change messages, flush cache
|
|
620
|
+
self.scheduler.c12n_services.clear()
|
|
621
|
+
return
|
|
391
622
|
if data.operation == Operation.Removed:
|
|
392
623
|
# Remove all current instances of service from scheduler cache
|
|
393
|
-
|
|
394
|
-
|
|
624
|
+
for service_set in self.scheduler.c12n_services.values():
|
|
625
|
+
if data.name in service_set:
|
|
626
|
+
service_set.remove(data.name)
|
|
395
627
|
else:
|
|
396
628
|
# If Added/Modifed, pull the service information and modify cache
|
|
397
629
|
service: Service = self.datastore.get_service_with_delta(data.name)
|
|
@@ -451,7 +683,7 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
451
683
|
# If the dispatcher is exiting cleanly remove as many tasks from the service queues as we can
|
|
452
684
|
service_queues = {}
|
|
453
685
|
for task in self.tasks.values():
|
|
454
|
-
for (
|
|
686
|
+
for (_sha256, service_name), dispatch_key in task.queue_keys.items():
|
|
455
687
|
try:
|
|
456
688
|
s_queue = service_queues[service_name]
|
|
457
689
|
except KeyError:
|
|
@@ -537,7 +769,7 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
537
769
|
return
|
|
538
770
|
|
|
539
771
|
if not self.active_submissions.exists(sid):
|
|
540
|
-
self.log.info(
|
|
772
|
+
self.log.info("[%s] New submission received", sid)
|
|
541
773
|
self.active_submissions.add(sid, {
|
|
542
774
|
'completed_queue': task.completed_queue,
|
|
543
775
|
'submission': submission.as_primitives()
|
|
@@ -558,9 +790,10 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
558
790
|
self.log.info(f"[{sid}] Submission counts towards {submission.params.submitter.upper()} quota")
|
|
559
791
|
|
|
560
792
|
# Apply initial data parameter
|
|
793
|
+
temporary_data = task.temporary_data[sha256] = TemporaryFileData(sha256)
|
|
561
794
|
if submission.params.initial_data:
|
|
562
795
|
try:
|
|
563
|
-
|
|
796
|
+
temporary_data.local_values = {
|
|
564
797
|
key: value
|
|
565
798
|
for key, value in dict(json.loads(submission.params.initial_data)).items()
|
|
566
799
|
if len(str(value)) <= self.config.submission.max_temp_data_length
|
|
@@ -578,7 +811,7 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
578
811
|
# Initialize ancestry chain by identifying the root file
|
|
579
812
|
file_info = self.get_fileinfo(task, sha256)
|
|
580
813
|
file_type = file_info.type if file_info else 'NOT_FOUND'
|
|
581
|
-
|
|
814
|
+
temporary_data.local_values['ancestry'] = [[dict(type=file_type, parent_relation="ROOT", sha256=sha256)]]
|
|
582
815
|
|
|
583
816
|
# Start the file dispatching
|
|
584
817
|
task.active_files.add(sha256)
|
|
@@ -587,6 +820,7 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
587
820
|
|
|
588
821
|
@elasticapm.capture_span(span_type='dispatcher')
|
|
589
822
|
def get_fileinfo(self, task: SubmissionTask, sha256: str) -> Optional[FileInfo]:
|
|
823
|
+
"""Read information about a file from the database, caching it locally."""
|
|
590
824
|
# First try to get the info from local cache
|
|
591
825
|
file_info = task.file_info.get(sha256, None)
|
|
592
826
|
if file_info:
|
|
@@ -740,9 +974,12 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
740
974
|
tags = list(task.file_tags.get(sha256, {}).values())
|
|
741
975
|
|
|
742
976
|
# Load the temp submission data we will pass
|
|
743
|
-
temp_data = {}
|
|
977
|
+
temp_data: dict[str, str] = {}
|
|
744
978
|
if service.uses_temp_submission_data:
|
|
745
|
-
temp_data = task.
|
|
979
|
+
temp_data = task.temporary_data[sha256].read()
|
|
980
|
+
if service.monitored_keys:
|
|
981
|
+
values = {key: temp_data.get(key) for key in service.monitored_keys}
|
|
982
|
+
task.set_monitoring_entry(sha256, service.name, values)
|
|
746
983
|
|
|
747
984
|
# Load the metadata we will pass
|
|
748
985
|
metadata = {}
|
|
@@ -910,10 +1147,10 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
910
1147
|
if self.dispatch_file(task, file_hash):
|
|
911
1148
|
return True
|
|
912
1149
|
elif processing_files:
|
|
913
|
-
self.log.debug(
|
|
914
|
-
|
|
1150
|
+
self.log.debug("[%s] Not finished waiting on %d files: %s",
|
|
1151
|
+
task.submission.sid, len(processing_files), list(processing_files))
|
|
915
1152
|
else:
|
|
916
|
-
self.log.debug(
|
|
1153
|
+
self.log.debug("[%s] Finalizing submission.", task.submission.sid)
|
|
917
1154
|
max_score = max(file_scores.values()) if file_scores else 0 # Submissions with no results have no score
|
|
918
1155
|
if self.tasks.pop(task.sid, None):
|
|
919
1156
|
self.finalize_queue.put((task, max_score, checked))
|
|
@@ -1237,6 +1474,12 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
1237
1474
|
self.clear_timeout(task, sha256, service_name)
|
|
1238
1475
|
task.service_logs.pop((sha256, service_name), None)
|
|
1239
1476
|
|
|
1477
|
+
if summary.partial:
|
|
1478
|
+
self.log.info("[%s/%s] %s returned partial results", sid, sha256, service_name)
|
|
1479
|
+
task.partial_result(sha256, service_name)
|
|
1480
|
+
else:
|
|
1481
|
+
task.clear_monitoring_entry(sha256, service_name)
|
|
1482
|
+
|
|
1240
1483
|
# Don't process duplicates
|
|
1241
1484
|
if (sha256, service_name) in task.service_results:
|
|
1242
1485
|
return
|
|
@@ -1258,8 +1501,8 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
1258
1501
|
if isinstance(tags, list):
|
|
1259
1502
|
self.log.warning("Deprecation: Old format of tags found. "
|
|
1260
1503
|
"This format changed with the release of 4.3 on 09-2022. "
|
|
1261
|
-
|
|
1262
|
-
"Proceeding with conversion to compatible format..")
|
|
1504
|
+
"Rebuilding %s may be required or the result of a cache hit. "
|
|
1505
|
+
"Proceeding with conversion to compatible format..", service_name)
|
|
1263
1506
|
alt_tags = {}
|
|
1264
1507
|
for t in tags:
|
|
1265
1508
|
key = f"{t['type']}:{t['value']}"
|
|
@@ -1274,11 +1517,6 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
1274
1517
|
else:
|
|
1275
1518
|
task.file_tags[sha256][key] = value
|
|
1276
1519
|
|
|
1277
|
-
# Update the temporary data table for this file
|
|
1278
|
-
for key, value in (temporary_data or {}).items():
|
|
1279
|
-
if len(str(value)) <= self.config.submission.max_temp_data_length:
|
|
1280
|
-
task.file_temporary_data[sha256][key] = value
|
|
1281
|
-
|
|
1282
1520
|
# Update children to include parent_relation, likely EXTRACTED
|
|
1283
1521
|
if summary.children and isinstance(summary.children[0], str):
|
|
1284
1522
|
old_children = typing.cast(list[str], summary.children)
|
|
@@ -1288,6 +1526,19 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
1288
1526
|
task.service_results[(sha256, service_name)] = summary
|
|
1289
1527
|
task.register_children(sha256, [c for c, _ in summary.children])
|
|
1290
1528
|
|
|
1529
|
+
# Update the temporary data table for this file
|
|
1530
|
+
force_redispatch = set()
|
|
1531
|
+
update_operations = self.config.submission.temporary_keys
|
|
1532
|
+
for key, value in (temporary_data or {}).items():
|
|
1533
|
+
if len(str(value)) <= self.config.submission.max_temp_data_length:
|
|
1534
|
+
if update_operations.get(key) == KeyType.UNION:
|
|
1535
|
+
changed_files = task.temporary_data[sha256].union_value(key, value)
|
|
1536
|
+
elif update_operations.get(key) == KeyType.IGNORE:
|
|
1537
|
+
changed_files = set()
|
|
1538
|
+
else:
|
|
1539
|
+
changed_files = task.temporary_data[sha256].set_value(key, value)
|
|
1540
|
+
force_redispatch |= set(task.file_temporary_data_changed(changed_files, key))
|
|
1541
|
+
|
|
1291
1542
|
# Set the depth of all extracted files, even if we won't be processing them
|
|
1292
1543
|
depth_limit = self.config.submission.max_extraction_depth
|
|
1293
1544
|
new_depth = task.file_depth[sha256] + 1
|
|
@@ -1303,7 +1554,7 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
1303
1554
|
if new_depth < depth_limit:
|
|
1304
1555
|
# Prepare the temporary data from the parent to build the temporary data table for
|
|
1305
1556
|
# these newly extract files
|
|
1306
|
-
parent_data = task.
|
|
1557
|
+
parent_data = task.temporary_data[sha256]
|
|
1307
1558
|
|
|
1308
1559
|
for extracted_sha256, parent_relation in summary.children:
|
|
1309
1560
|
|
|
@@ -1311,7 +1562,7 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
1311
1562
|
continue
|
|
1312
1563
|
|
|
1313
1564
|
if len(task.active_files) > submission.params.max_extracted:
|
|
1314
|
-
self.log.info(
|
|
1565
|
+
self.log.info('[%s] hit extraction limit, dropping %s', sid, extracted_sha256)
|
|
1315
1566
|
task.dropped_files.add(extracted_sha256)
|
|
1316
1567
|
self._dispatching_error(task, Error({
|
|
1317
1568
|
'archive_ts': None,
|
|
@@ -1332,21 +1583,20 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
1332
1583
|
|
|
1333
1584
|
dispatched += 1
|
|
1334
1585
|
task.active_files.add(extracted_sha256)
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
except KeyError:
|
|
1338
|
-
self.log.warn(f"[{sid} :: {sha256}] missing ancestry data.")
|
|
1339
|
-
parent_ancestry = []
|
|
1340
|
-
existing_ancestry = task.file_temporary_data.get(extracted_sha256, {}).get('ancestry', [])
|
|
1586
|
+
|
|
1587
|
+
# Get the new ancestory data
|
|
1341
1588
|
file_info = self.get_fileinfo(task, extracted_sha256)
|
|
1342
1589
|
file_type = file_info.type if file_info else 'NOT_FOUND'
|
|
1343
1590
|
current_ancestry_node = dict(type=file_type, parent_relation=parent_relation,
|
|
1344
1591
|
sha256=extracted_sha256)
|
|
1345
1592
|
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1593
|
+
# Update ancestory data
|
|
1594
|
+
parent_ancestry = parent_data.read_key('ancestry') or []
|
|
1595
|
+
existing_ancestry = task.temporary_data[extracted_sha256].local_values.setdefault('ancestry', [])
|
|
1596
|
+
for ancestry in parent_ancestry:
|
|
1597
|
+
existing_ancestry.append(ancestry + [current_ancestry_node])
|
|
1598
|
+
|
|
1599
|
+
# Trigger the processing of the extracted file
|
|
1350
1600
|
self.find_process_queue(sid).put(DispatchAction(kind=Action.dispatch_file, sid=sid,
|
|
1351
1601
|
sha=extracted_sha256))
|
|
1352
1602
|
else:
|
|
@@ -1369,13 +1619,15 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
1369
1619
|
|
|
1370
1620
|
# Check if its worth trying to run the next stage
|
|
1371
1621
|
# Not worth running if we know we are waiting for another service
|
|
1372
|
-
if any(_s == sha256 for _s, _ in task.running_services):
|
|
1373
|
-
|
|
1622
|
+
if not any(_s == sha256 for _s, _ in task.running_services):
|
|
1623
|
+
force_redispatch.add(sha256)
|
|
1374
1624
|
# Not worth running if we know we have services in queue
|
|
1375
|
-
if any(_s == sha256 for _s, _ in task.queue_keys.keys()):
|
|
1376
|
-
|
|
1625
|
+
if not any(_s == sha256 for _s, _ in task.queue_keys.keys()):
|
|
1626
|
+
force_redispatch.add(sha256)
|
|
1627
|
+
|
|
1377
1628
|
# Try to run the next stage
|
|
1378
|
-
|
|
1629
|
+
for sha256 in force_redispatch:
|
|
1630
|
+
self.dispatch_file(task, sha256)
|
|
1379
1631
|
|
|
1380
1632
|
@elasticapm.capture_span(span_type='dispatcher')
|
|
1381
1633
|
def _dispatching_error(self, task: SubmissionTask, error):
|
|
@@ -1658,13 +1910,13 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
1658
1910
|
|
|
1659
1911
|
@elasticapm.capture_span(span_type='dispatcher')
|
|
1660
1912
|
def list_outstanding(self, sid: str, queue_name: str):
|
|
1661
|
-
response_queue = NamedQueue(queue_name, host=self.redis)
|
|
1913
|
+
response_queue: NamedQueue[dict] = NamedQueue(queue_name, host=self.redis)
|
|
1662
1914
|
outstanding: defaultdict[str, int] = defaultdict(int)
|
|
1663
1915
|
task = self.tasks.get(sid)
|
|
1664
1916
|
if task:
|
|
1665
|
-
for
|
|
1917
|
+
for _sha, service_name in list(task.queue_keys.keys()):
|
|
1666
1918
|
outstanding[service_name] += 1
|
|
1667
|
-
for
|
|
1919
|
+
for _sha, service_name in list(task.running_services):
|
|
1668
1920
|
outstanding[service_name] += 1
|
|
1669
1921
|
response_queue.push(outstanding)
|
|
1670
1922
|
|
|
@@ -1679,7 +1931,7 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
1679
1931
|
error_tasks = []
|
|
1680
1932
|
|
|
1681
1933
|
# iterate running tasks
|
|
1682
|
-
for
|
|
1934
|
+
for _task_key, task_body in self.running_tasks:
|
|
1683
1935
|
task = ServiceTask(task_body)
|
|
1684
1936
|
# Its a bad task if it's dispatcher isn't running
|
|
1685
1937
|
if task.metadata['dispatcher__'] not in dispatcher_instances:
|