assemblyline-core 4.5.1.dev357__tar.gz → 4.5.1.dev359__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of assemblyline-core might be problematic. Click here for more details.
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/PKG-INFO +1 -1
- assemblyline-core-4.5.1.dev359/assemblyline_core/VERSION +1 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/dispatching/dispatcher.py +63 -146
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/vacuum/worker.py +8 -3
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core.egg-info/PKG-INFO +1 -1
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/test/test_simulation.py +1 -1
- assemblyline-core-4.5.1.dev357/assemblyline_core/VERSION +0 -1
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/LICENCE.md +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/README.md +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/alerter/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/alerter/processing.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/alerter/run_alerter.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/archiver/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/archiver/run_archiver.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/badlist_client.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/dispatching/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/dispatching/__main__.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/dispatching/client.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/dispatching/schedules.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/dispatching/timeout.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/expiry/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/expiry/run_expiry.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/ingester/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/ingester/__main__.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/ingester/constants.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/ingester/ingester.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/metrics/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/metrics/es_metrics.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/metrics/heartbeat_formatter.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/metrics/helper.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/metrics/metrics_server.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/metrics/run_heartbeat_manager.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/metrics/run_metrics_aggregator.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/metrics/run_statistics_aggregator.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/plumber/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/plumber/run_plumber.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/replay/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/replay/client.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/replay/creator/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/replay/creator/run.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/replay/creator/run_worker.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/replay/loader/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/replay/loader/run.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/replay/loader/run_worker.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/replay/replay.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/safelist_client.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/scaler/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/scaler/collection.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/scaler/controllers/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/scaler/controllers/docker_ctl.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/scaler/controllers/interface.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/scaler/controllers/kubernetes_ctl.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/scaler/run_scaler.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/scaler/scaler_server.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/server_base.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/signature_client.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/submission_client.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/tasking_client.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/updater/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/updater/helper.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/updater/run_updater.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/vacuum/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/vacuum/crawler.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/vacuum/department_map.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/vacuum/safelist.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/vacuum/stream_map.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/workflow/__init__.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/workflow/run_workflow.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core.egg-info/SOURCES.txt +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core.egg-info/dependency_links.txt +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core.egg-info/requires.txt +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core.egg-info/top_level.txt +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/setup.cfg +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/setup.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/test/test_alerter.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/test/test_badlist_client.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/test/test_dispatcher.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/test/test_expiry.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/test/test_plumber.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/test/test_replay.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/test/test_safelist_client.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/test/test_scaler.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/test/test_scheduler.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/test/test_signature_client.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/test/test_vacuum.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/test/test_worker_ingest.py +0 -0
- {assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/test/test_worker_submit.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
4.5.1.dev359
|
|
@@ -68,7 +68,6 @@ DYNAMIC_ANALYSIS_CATEGORY = 'Dynamic Analysis'
|
|
|
68
68
|
class KeyType(enum.Enum):
|
|
69
69
|
OVERWRITE = 'overwrite'
|
|
70
70
|
UNION = 'union'
|
|
71
|
-
IGNORE = 'ignore'
|
|
72
71
|
|
|
73
72
|
|
|
74
73
|
class Action(enum.IntEnum):
|
|
@@ -103,7 +102,6 @@ class MonitorTask:
|
|
|
103
102
|
# Should aservice be dispatched again when possible
|
|
104
103
|
dispatch_needed: bool = dataclasses.field(default=False)
|
|
105
104
|
|
|
106
|
-
|
|
107
105
|
@contextmanager
|
|
108
106
|
def apm_span(client, span_name: str):
|
|
109
107
|
try:
|
|
@@ -128,145 +126,67 @@ class ResultSummary:
|
|
|
128
126
|
|
|
129
127
|
|
|
130
128
|
class TemporaryFileData:
|
|
131
|
-
def __init__(self, sha256: str) -> None:
|
|
129
|
+
def __init__(self, sha256: str, config: dict[str, str], shared: Optional[dict[str, Any]] = None) -> None:
|
|
132
130
|
self.sha256 = sha256
|
|
133
|
-
self.
|
|
134
|
-
self.
|
|
135
|
-
self.parent_cache: dict[str, Any] = {}
|
|
131
|
+
self.config = config
|
|
132
|
+
self.shared_values: dict[str, Any] = dict() if shared is None else shared
|
|
136
133
|
self.local_values: dict[str, Any] = {}
|
|
137
134
|
|
|
138
|
-
def
|
|
139
|
-
"""
|
|
140
|
-
self.
|
|
141
|
-
parent_temp.children.append(self)
|
|
142
|
-
|
|
143
|
-
def new_child(self, child: str) -> TemporaryFileData:
|
|
144
|
-
"""Create a linked entry for a new child."""
|
|
145
|
-
temp = TemporaryFileData(child)
|
|
146
|
-
temp.parents.append(self)
|
|
147
|
-
self.children.append(temp)
|
|
148
|
-
temp.build_parent_cache()
|
|
149
|
-
return temp
|
|
150
|
-
|
|
151
|
-
def build_parent_cache(self):
|
|
152
|
-
"""Rebuild the cache of data from parent files."""
|
|
153
|
-
self.parent_cache.clear()
|
|
154
|
-
for parent in self.parents:
|
|
155
|
-
self.parent_cache.update(parent.read())
|
|
135
|
+
def new_file(self, sha256: str) -> TemporaryFileData:
|
|
136
|
+
"""Create an entry for another file with reference to the shared values."""
|
|
137
|
+
return TemporaryFileData(sha256, self.config, self.shared_values)
|
|
156
138
|
|
|
157
139
|
def read(self) -> dict[str, Any]:
|
|
158
140
|
"""Get a copy of the current data"""
|
|
159
|
-
# Start with a shallow copy
|
|
160
|
-
data = dict(self.
|
|
141
|
+
# Start with a shallow copy of the local data
|
|
142
|
+
data = dict(self.local_values)
|
|
161
143
|
|
|
162
|
-
#
|
|
163
|
-
data.update(self.
|
|
144
|
+
# mix in whatever the latest submission wide values are values are
|
|
145
|
+
data.update(self.shared_values)
|
|
164
146
|
return data
|
|
165
147
|
|
|
166
148
|
def read_key(self, key: str) -> Any:
|
|
167
149
|
"""Get a copy of the current data"""
|
|
168
150
|
try:
|
|
169
|
-
return self.
|
|
151
|
+
return self.shared_values[key]
|
|
170
152
|
except KeyError:
|
|
171
|
-
return self.
|
|
153
|
+
return self.local_values.get(key)
|
|
172
154
|
|
|
173
|
-
def set_value(self, key: str, value:
|
|
174
|
-
"""
|
|
155
|
+
def set_value(self, key: str, value: Any) -> bool:
|
|
156
|
+
"""Set the value of a temporary data key using the appropriate method for the key.
|
|
175
157
|
|
|
176
|
-
|
|
158
|
+
Return true if this change could mean partial results should be reevaluated.
|
|
177
159
|
"""
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
if type(old) is type(value) and old == value:
|
|
181
|
-
return set()
|
|
182
|
-
|
|
183
|
-
# Update the local value and recurse into children
|
|
184
|
-
self.local_values[key] = value
|
|
185
|
-
changed = [self.sha256]
|
|
186
|
-
for child in self.children:
|
|
187
|
-
changed.extend(child.set_value_from_ancestor(key, value))
|
|
188
|
-
return set(changed)
|
|
189
|
-
|
|
190
|
-
def set_value_from_ancestor(self, key: str, value: str) -> set[str]:
|
|
191
|
-
"""Given that an ancestor has changed, test if this file's temporary data will change also."""
|
|
192
|
-
# If this child has already set this key, the parent values don't matter
|
|
193
|
-
if key in self.local_values:
|
|
194
|
-
return set()
|
|
160
|
+
if self.config.get(key) == KeyType.UNION.value:
|
|
161
|
+
return self._union_shared_value(key, value)
|
|
195
162
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
return
|
|
200
|
-
|
|
201
|
-
# Update the parent cache and recurse into children
|
|
202
|
-
self.parent_cache[key] = value
|
|
203
|
-
changed = [self.sha256]
|
|
204
|
-
for child in self.children:
|
|
205
|
-
changed.extend(child.set_value_from_ancestor(key, value))
|
|
206
|
-
return set(changed)
|
|
207
|
-
|
|
208
|
-
def union_value(self, key: str, value: set[str]) -> set[str]:
|
|
209
|
-
"""Using a MERGE operation update the value on this node and all children.
|
|
210
|
-
|
|
211
|
-
Returns a list of the sha of all files who's temporary data has been modified.
|
|
212
|
-
"""
|
|
213
|
-
if not value:
|
|
214
|
-
return set()
|
|
215
|
-
|
|
216
|
-
# Check if the local value doesn't change then we won't have any effect on children
|
|
217
|
-
new_value = merge_in_values(self.local_values.get(key), value)
|
|
218
|
-
if new_value is None:
|
|
219
|
-
return set()
|
|
220
|
-
|
|
221
|
-
# Update the local value and recurse into children
|
|
222
|
-
self.local_values[key] = new_value
|
|
223
|
-
changed = [self.sha256]
|
|
224
|
-
for child in self.children:
|
|
225
|
-
changed.extend(child.union_value_from_ancestor(key, value))
|
|
226
|
-
return set(changed)
|
|
227
|
-
|
|
228
|
-
def union_value_from_ancestor(self, key: str, value: set[str]) -> set[str]:
|
|
229
|
-
"""Given that an ancestor has changed, test if this file's temporary data will change also.
|
|
163
|
+
if self.config.get(key) == KeyType.OVERWRITE.value:
|
|
164
|
+
change = self.shared_values.get(key) != value
|
|
165
|
+
self.shared_values[key] = value
|
|
166
|
+
return change
|
|
230
167
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
#
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
def merge_in_values(old_values: Any, new_values: set[str]) -> Optional[list[str]]:
|
|
254
|
-
"""Merge in new values into a json list.
|
|
255
|
-
|
|
256
|
-
If there is no new values return None.
|
|
257
|
-
"""
|
|
258
|
-
# Read out the old value set
|
|
259
|
-
if isinstance(old_values, (list, set)):
|
|
260
|
-
old_values = set(old_values)
|
|
261
|
-
else:
|
|
262
|
-
old_values = set()
|
|
263
|
-
|
|
264
|
-
# If we have no new values to merge in
|
|
265
|
-
if new_values <= old_values:
|
|
266
|
-
return None
|
|
267
|
-
|
|
268
|
-
# We have new values, build a new set
|
|
269
|
-
return list(new_values | old_values)
|
|
168
|
+
self.local_values[key] = value
|
|
169
|
+
return False
|
|
170
|
+
|
|
171
|
+
def _union_shared_value(self, key: str, values: Any) -> bool:
|
|
172
|
+
# Make sure the existing value is the right type
|
|
173
|
+
self.shared_values.setdefault(key, [])
|
|
174
|
+
if not isinstance(self.shared_values[key], list):
|
|
175
|
+
self.shared_values[key] = []
|
|
176
|
+
|
|
177
|
+
# make sure the input is the right type
|
|
178
|
+
if not isinstance(values, list | tuple):
|
|
179
|
+
return False
|
|
180
|
+
|
|
181
|
+
# Add each value one at a time testing for new values
|
|
182
|
+
# This is slower than using set intersection, but isn't type sensitive
|
|
183
|
+
changed = False
|
|
184
|
+
for new_item in values:
|
|
185
|
+
if new_item in self.shared_values[key]:
|
|
186
|
+
continue
|
|
187
|
+
self.shared_values[key].append(new_item)
|
|
188
|
+
changed = True
|
|
189
|
+
return changed
|
|
270
190
|
|
|
271
191
|
|
|
272
192
|
class SubmissionTask:
|
|
@@ -376,7 +296,6 @@ class SubmissionTask:
|
|
|
376
296
|
except KeyError:
|
|
377
297
|
self._forbidden_services[sha256] = {service_name}
|
|
378
298
|
|
|
379
|
-
|
|
380
299
|
def register_children(self, parent: str, children: list[str]):
|
|
381
300
|
"""
|
|
382
301
|
Note which files extracted other files.
|
|
@@ -385,10 +304,7 @@ class SubmissionTask:
|
|
|
385
304
|
"""
|
|
386
305
|
parent_temp = self.temporary_data[parent]
|
|
387
306
|
for child in children:
|
|
388
|
-
|
|
389
|
-
self.temporary_data[child].add_parent(parent_temp)
|
|
390
|
-
except KeyError:
|
|
391
|
-
self.temporary_data[child] = parent_temp.new_child(child)
|
|
307
|
+
self.temporary_data.setdefault(child, parent_temp.new_file(child))
|
|
392
308
|
try:
|
|
393
309
|
self._parent_map[child].add(parent)
|
|
394
310
|
except KeyError:
|
|
@@ -446,21 +362,29 @@ class SubmissionTask:
|
|
|
446
362
|
if result and result.partial:
|
|
447
363
|
self.service_results.pop((sha256, service_name), None)
|
|
448
364
|
|
|
449
|
-
def
|
|
365
|
+
def temporary_data_changed(self, key: str) -> list[str]:
|
|
450
366
|
"""Check all of the monitored tasks on that key for changes. Redispatch as needed."""
|
|
451
367
|
changed = []
|
|
452
368
|
for (sha256, service), entry in self.monitoring.items():
|
|
453
|
-
if
|
|
369
|
+
# Check if this key is actually being monitored by this entry
|
|
370
|
+
if key not in entry.values:
|
|
454
371
|
continue
|
|
455
372
|
|
|
373
|
+
# Get whatever values (if any) were provided on the previous dispatch of this service
|
|
456
374
|
value = self.temporary_data[sha256].read_key(key)
|
|
457
375
|
dispatched_value = entry.values.get(key)
|
|
458
376
|
|
|
459
377
|
if type(value) is not type(dispatched_value) or value != dispatched_value:
|
|
460
378
|
result = self.service_results.get((sha256, service))
|
|
461
379
|
if not result:
|
|
380
|
+
# If the value has changed since the last dispatch but results haven't come in yet
|
|
381
|
+
# mark this service to be disptached later. This will only happen if the service
|
|
382
|
+
# returns partial results, if there are full results the entry will be cleared instead.
|
|
462
383
|
entry.dispatch_needed = True
|
|
463
384
|
else:
|
|
385
|
+
# If there are results and there is a monitoring entry, the result was partial
|
|
386
|
+
# so redispatch it immediately. If there are not partial results the monitoring
|
|
387
|
+
# entry will have been cleared.
|
|
464
388
|
self.redispatch_service(sha256, service)
|
|
465
389
|
changed.append(sha256)
|
|
466
390
|
return changed
|
|
@@ -537,7 +461,7 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
537
461
|
|
|
538
462
|
# Build some utility classes
|
|
539
463
|
self.scheduler = Scheduler(self.datastore, self.config, self.redis)
|
|
540
|
-
self.running_tasks = Hash(DISPATCH_RUNNING_TASK_HASH, host=self.redis)
|
|
464
|
+
self.running_tasks: Hash[dict] = Hash(DISPATCH_RUNNING_TASK_HASH, host=self.redis)
|
|
541
465
|
self.scaler_timeout_queue = NamedQueue(SCALER_TIMEOUT_QUEUE, host=self.redis_persist)
|
|
542
466
|
|
|
543
467
|
self.classification_engine = get_classification()
|
|
@@ -800,14 +724,14 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
800
724
|
self.log.info(f"[{sid}] Submission counts towards {submission.params.submitter.upper()} quota")
|
|
801
725
|
|
|
802
726
|
# Apply initial data parameter
|
|
803
|
-
temporary_data =
|
|
727
|
+
temporary_data = TemporaryFileData(sha256, config=self.config.submission.temporary_keys)
|
|
728
|
+
task.temporary_data[sha256] = temporary_data
|
|
804
729
|
if submission.params.initial_data:
|
|
805
730
|
try:
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
}
|
|
731
|
+
for key, value in dict(json.loads(submission.params.initial_data)).items():
|
|
732
|
+
if len(str(value)) > self.config.submission.max_temp_data_length:
|
|
733
|
+
continue
|
|
734
|
+
temporary_data.set_value(key, value)
|
|
811
735
|
|
|
812
736
|
except (ValueError, TypeError) as err:
|
|
813
737
|
self.log.warning(f"[{sid}] could not process initialization data: {err}")
|
|
@@ -1011,7 +935,6 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
1011
935
|
for service_name in prevented_services:
|
|
1012
936
|
task.forbid_for_children(sha256, service_name)
|
|
1013
937
|
|
|
1014
|
-
|
|
1015
938
|
# Build the actual service dispatch message
|
|
1016
939
|
config = self.build_service_config(service, submission)
|
|
1017
940
|
service_task = ServiceTask(dict(
|
|
@@ -1547,16 +1470,10 @@ class Dispatcher(ThreadedCoreBase):
|
|
|
1547
1470
|
|
|
1548
1471
|
# Update the temporary data table for this file
|
|
1549
1472
|
force_redispatch = set()
|
|
1550
|
-
update_operations = self.config.submission.temporary_keys
|
|
1551
1473
|
for key, value in (temporary_data or {}).items():
|
|
1552
1474
|
if len(str(value)) <= self.config.submission.max_temp_data_length:
|
|
1553
|
-
if
|
|
1554
|
-
|
|
1555
|
-
elif update_operations.get(key) == KeyType.IGNORE:
|
|
1556
|
-
changed_files = set()
|
|
1557
|
-
else:
|
|
1558
|
-
changed_files = task.temporary_data[sha256].set_value(key, value)
|
|
1559
|
-
force_redispatch |= set(task.file_temporary_data_changed(changed_files, key))
|
|
1475
|
+
if task.temporary_data[sha256].set_value(key, value):
|
|
1476
|
+
force_redispatch |= set(task.temporary_data_changed(key))
|
|
1560
1477
|
|
|
1561
1478
|
# Set the depth of all extracted files, even if we won't be processing them
|
|
1562
1479
|
depth_limit = self.config.submission.max_extraction_depth
|
{assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/vacuum/worker.py
RENAMED
|
@@ -37,6 +37,7 @@ from assemblyline.common.str_utils import safe_str
|
|
|
37
37
|
from assemblyline.remote.datatypes import get_client as get_redis_client
|
|
38
38
|
from assemblyline.odm.messages.submission import Submission
|
|
39
39
|
from assemblyline.remote.datatypes.queues.named import NamedQueue
|
|
40
|
+
from assemblyline.remote.datatypes.hash import Hash
|
|
40
41
|
|
|
41
42
|
from assemblyline_core.vacuum.crawler import VACUUM_BUFFER_NAME
|
|
42
43
|
|
|
@@ -164,12 +165,16 @@ class FileProcessor(threading.Thread):
|
|
|
164
165
|
# Anything that can't be copied easily should be initialized in 'run'.
|
|
165
166
|
self.config: Config = config
|
|
166
167
|
self.datastore = datastore
|
|
167
|
-
self.metadata_check = MetadataValidator(datastore)
|
|
168
|
+
self.metadata_check = MetadataValidator(datastore, Hash("metadata_suggestions", persistent_redis))
|
|
169
|
+
|
|
170
|
+
# Merge the default metadata required for ingestion with those that are required from vacuum
|
|
171
|
+
validation_scheme = config.submission.metadata.ingest.get('_default', {})
|
|
172
|
+
validation_scheme.update(config.submission.metadata.ingest.get(self.config.core.vacuum.ingest_type, {}))
|
|
173
|
+
|
|
168
174
|
self.metadata_check_kwargs = {
|
|
169
|
-
'validation_scheme':
|
|
175
|
+
'validation_scheme': validation_scheme,
|
|
170
176
|
'strict': self.config.core.vacuum.ingest_type in self.config.submission.metadata.strict_schemes
|
|
171
177
|
}
|
|
172
|
-
self.validation_scheme = self.config.submission.metadata.ingest.get(self.config.core.vacuum.ingest_type, {})
|
|
173
178
|
self.counter = counter
|
|
174
179
|
self.minimum_classification = self.config.core.vacuum.minimum_classification
|
|
175
180
|
logger.info("Connect to work queue")
|
|
@@ -1250,7 +1250,7 @@ def test_temp_data_monitoring(core: CoreSession, metrics):
|
|
|
1250
1250
|
sub: Submission = core.ds.submission.get(dropped_task.submission.sid)
|
|
1251
1251
|
assert len(sub.errors) == 0
|
|
1252
1252
|
assert len(sub.results) == 4, 'results'
|
|
1253
|
-
assert core.pre_service.hits[sha] >= 2, 'pre_service.hits'
|
|
1253
|
+
assert core.pre_service.hits[sha] >= 2, f'pre_service.hits {core.pre_service.hits}'
|
|
1254
1254
|
|
|
1255
1255
|
# Wait until we get feedback from the metrics channel
|
|
1256
1256
|
metrics.expect('ingester', 'submissions_ingested', 1)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
4.5.1.dev357
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/replay/client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/replay/replay.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/assemblyline_core/server_base.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/test/test_badlist_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/test/test_safelist_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/test/test_signature_client.py
RENAMED
|
File without changes
|
|
File without changes
|
{assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/test/test_worker_ingest.py
RENAMED
|
File without changes
|
{assemblyline-core-4.5.1.dev357 → assemblyline-core-4.5.1.dev359}/test/test_worker_submit.py
RENAMED
|
File without changes
|