assemblyline-core 4.5.1.dev358__tar.gz → 4.5.1.dev360__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of assemblyline-core might be problematic. Click here for more details.

Files changed (88) hide show
  1. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/PKG-INFO +1 -1
  2. assemblyline-core-4.5.1.dev360/assemblyline_core/VERSION +1 -0
  3. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/dispatching/dispatcher.py +72 -147
  4. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core.egg-info/PKG-INFO +1 -1
  5. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/test/test_simulation.py +1 -1
  6. assemblyline-core-4.5.1.dev358/assemblyline_core/VERSION +0 -1
  7. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/LICENCE.md +0 -0
  8. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/README.md +0 -0
  9. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/__init__.py +0 -0
  10. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/alerter/__init__.py +0 -0
  11. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/alerter/processing.py +0 -0
  12. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/alerter/run_alerter.py +0 -0
  13. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/archiver/__init__.py +0 -0
  14. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/archiver/run_archiver.py +0 -0
  15. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/badlist_client.py +0 -0
  16. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/dispatching/__init__.py +0 -0
  17. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/dispatching/__main__.py +0 -0
  18. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/dispatching/client.py +0 -0
  19. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/dispatching/schedules.py +0 -0
  20. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/dispatching/timeout.py +0 -0
  21. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/expiry/__init__.py +0 -0
  22. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/expiry/run_expiry.py +0 -0
  23. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/ingester/__init__.py +0 -0
  24. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/ingester/__main__.py +0 -0
  25. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/ingester/constants.py +0 -0
  26. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/ingester/ingester.py +0 -0
  27. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/metrics/__init__.py +0 -0
  28. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/metrics/es_metrics.py +0 -0
  29. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/metrics/heartbeat_formatter.py +0 -0
  30. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/metrics/helper.py +0 -0
  31. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/metrics/metrics_server.py +0 -0
  32. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/metrics/run_heartbeat_manager.py +0 -0
  33. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/metrics/run_metrics_aggregator.py +0 -0
  34. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/metrics/run_statistics_aggregator.py +0 -0
  35. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/plumber/__init__.py +0 -0
  36. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/plumber/run_plumber.py +0 -0
  37. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/replay/__init__.py +0 -0
  38. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/replay/client.py +0 -0
  39. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/replay/creator/__init__.py +0 -0
  40. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/replay/creator/run.py +0 -0
  41. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/replay/creator/run_worker.py +0 -0
  42. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/replay/loader/__init__.py +0 -0
  43. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/replay/loader/run.py +0 -0
  44. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/replay/loader/run_worker.py +0 -0
  45. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/replay/replay.py +0 -0
  46. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/safelist_client.py +0 -0
  47. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/scaler/__init__.py +0 -0
  48. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/scaler/collection.py +0 -0
  49. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/scaler/controllers/__init__.py +0 -0
  50. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/scaler/controllers/docker_ctl.py +0 -0
  51. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/scaler/controllers/interface.py +0 -0
  52. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/scaler/controllers/kubernetes_ctl.py +0 -0
  53. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/scaler/run_scaler.py +0 -0
  54. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/scaler/scaler_server.py +0 -0
  55. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/server_base.py +0 -0
  56. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/signature_client.py +0 -0
  57. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/submission_client.py +0 -0
  58. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/tasking_client.py +0 -0
  59. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/updater/__init__.py +0 -0
  60. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/updater/helper.py +0 -0
  61. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/updater/run_updater.py +0 -0
  62. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/vacuum/__init__.py +0 -0
  63. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/vacuum/crawler.py +0 -0
  64. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/vacuum/department_map.py +0 -0
  65. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/vacuum/safelist.py +0 -0
  66. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/vacuum/stream_map.py +0 -0
  67. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/vacuum/worker.py +0 -0
  68. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/workflow/__init__.py +0 -0
  69. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core/workflow/run_workflow.py +0 -0
  70. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core.egg-info/SOURCES.txt +0 -0
  71. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core.egg-info/dependency_links.txt +0 -0
  72. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core.egg-info/requires.txt +0 -0
  73. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/assemblyline_core.egg-info/top_level.txt +0 -0
  74. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/setup.cfg +0 -0
  75. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/setup.py +0 -0
  76. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/test/test_alerter.py +0 -0
  77. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/test/test_badlist_client.py +0 -0
  78. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/test/test_dispatcher.py +0 -0
  79. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/test/test_expiry.py +0 -0
  80. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/test/test_plumber.py +0 -0
  81. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/test/test_replay.py +0 -0
  82. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/test/test_safelist_client.py +0 -0
  83. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/test/test_scaler.py +0 -0
  84. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/test/test_scheduler.py +0 -0
  85. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/test/test_signature_client.py +0 -0
  86. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/test/test_vacuum.py +0 -0
  87. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/test/test_worker_ingest.py +0 -0
  88. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev360}/test/test_worker_submit.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: assemblyline-core
3
- Version: 4.5.1.dev358
3
+ Version: 4.5.1.dev360
4
4
  Summary: Assemblyline 4 - Core components
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-core/
6
6
  Author: CCCS Assemblyline development team
@@ -0,0 +1 @@
1
+ 4.5.1.dev360
@@ -11,6 +11,7 @@ import json
11
11
  import enum
12
12
  from queue import PriorityQueue, Empty, Queue
13
13
  import dataclasses
14
+ from copy import deepcopy
14
15
 
15
16
  import elasticapm
16
17
 
@@ -68,7 +69,6 @@ DYNAMIC_ANALYSIS_CATEGORY = 'Dynamic Analysis'
68
69
  class KeyType(enum.Enum):
69
70
  OVERWRITE = 'overwrite'
70
71
  UNION = 'union'
71
- IGNORE = 'ignore'
72
72
 
73
73
 
74
74
  class Action(enum.IntEnum):
@@ -128,145 +128,72 @@ class ResultSummary:
128
128
 
129
129
 
130
130
  class TemporaryFileData:
131
- def __init__(self, sha256: str) -> None:
131
+ def __init__(self,
132
+ sha256: str,
133
+ config: dict[str, str],
134
+ shared: Optional[dict[str, Any]] = None,
135
+ local: Optional[dict[str, Any]] = None
136
+ ) -> None:
132
137
  self.sha256 = sha256
133
- self.parents: list[TemporaryFileData] = []
134
- self.children: list[TemporaryFileData] = []
135
- self.parent_cache: dict[str, Any] = {}
136
- self.local_values: dict[str, Any] = {}
137
-
138
- def add_parent(self, parent_temp: TemporaryFileData):
139
- """Add a parent to this node."""
140
- self.parents.append(parent_temp)
141
- parent_temp.children.append(self)
142
-
143
- def new_child(self, child: str) -> TemporaryFileData:
144
- """Create a linked entry for a new child."""
145
- temp = TemporaryFileData(child)
146
- temp.parents.append(self)
147
- self.children.append(temp)
148
- temp.build_parent_cache()
149
- return temp
150
-
151
- def build_parent_cache(self):
152
- """Rebuild the cache of data from parent files."""
153
- self.parent_cache.clear()
154
- for parent in self.parents:
155
- self.parent_cache.update(parent.read())
138
+ self.config = config
139
+ self.shared_values: dict[str, Any] = {} if shared is None else shared
140
+ self.local_values: dict[str, Any] = {} if local is None else local
141
+
142
+ def new_file(self, sha256: str) -> TemporaryFileData:
143
+ """Create an entry for another file with reference to the shared values."""
144
+ return TemporaryFileData(sha256, self.config, self.shared_values, deepcopy(self.local_values))
156
145
 
157
146
  def read(self) -> dict[str, Any]:
158
147
  """Get a copy of the current data"""
159
- # Start with a shallow copy ofthe parent cache
160
- data = dict(self.parent_cache)
148
+ # Start with a shallow copy of the local data
149
+ data = dict(self.local_values)
161
150
 
162
- # update, this overwrites any common keys (we want this)
163
- data.update(self.local_values)
151
+ # mix in whatever the latest submission wide values are values are
152
+ data.update(self.shared_values)
164
153
  return data
165
154
 
166
155
  def read_key(self, key: str) -> Any:
167
156
  """Get a copy of the current data"""
168
157
  try:
169
- return self.local_values[key]
158
+ return self.shared_values[key]
170
159
  except KeyError:
171
- return self.parent_cache.get(key)
160
+ return self.local_values.get(key)
172
161
 
173
- def set_value(self, key: str, value: str) -> set[str]:
174
- """Using a SET operation update the value on this node and all children.
162
+ def set_value(self, key: str, value: Any) -> bool:
163
+ """Set the value of a temporary data key using the appropriate method for the key.
175
164
 
176
- Returns a list of the sha of all files who's temporary data has been modified.
165
+ Return true if this change could mean partial results should be reevaluated.
177
166
  """
178
- # Check if the local value doesn't change then we won't have any effect on children
179
- old = self.local_values.get(key)
180
- if type(old) is type(value) and old == value:
181
- return set()
182
-
183
- # Update the local value and recurse into children
184
- self.local_values[key] = value
185
- changed = [self.sha256]
186
- for child in self.children:
187
- changed.extend(child.set_value_from_ancestor(key, value))
188
- return set(changed)
189
-
190
- def set_value_from_ancestor(self, key: str, value: str) -> set[str]:
191
- """Given that an ancestor has changed, test if this file's temporary data will change also."""
192
- # If this child has already set this key, the parent values don't matter
193
- if key in self.local_values:
194
- return set()
167
+ if self.config.get(key) == KeyType.UNION.value:
168
+ return self._union_shared_value(key, value)
195
169
 
196
- # If the parent value was already set to this nothing has changed
197
- old = self.parent_cache.get(key)
198
- if type(old) is type(value) and old == value:
199
- return set()
200
-
201
- # Update the parent cache and recurse into children
202
- self.parent_cache[key] = value
203
- changed = [self.sha256]
204
- for child in self.children:
205
- changed.extend(child.set_value_from_ancestor(key, value))
206
- return set(changed)
207
-
208
- def union_value(self, key: str, value: set[str]) -> set[str]:
209
- """Using a MERGE operation update the value on this node and all children.
210
-
211
- Returns a list of the sha of all files who's temporary data has been modified.
212
- """
213
- if not value:
214
- return set()
215
-
216
- # Check if the local value doesn't change then we won't have any effect on children
217
- new_value = merge_in_values(self.local_values.get(key), value)
218
- if new_value is None:
219
- return set()
220
-
221
- # Update the local value and recurse into children
222
- self.local_values[key] = new_value
223
- changed = [self.sha256]
224
- for child in self.children:
225
- changed.extend(child.union_value_from_ancestor(key, value))
226
- return set(changed)
227
-
228
- def union_value_from_ancestor(self, key: str, value: set[str]) -> set[str]:
229
- """Given that an ancestor has changed, test if this file's temporary data will change also.
170
+ if self.config.get(key) == KeyType.OVERWRITE.value:
171
+ change = self.shared_values.get(key) != value
172
+ self.shared_values[key] = value
173
+ return change
230
174
 
231
- For values updated by union the parent and local values are the same.
232
- """
233
- # Merge in data to parent cache, we won't be reading from it, but we still want to keep it
234
- # up to date and use it to check if changes are needed
235
- new_value = merge_in_values(self.parent_cache.get(key), value)
236
- if new_value is None:
237
- return set()
238
- self.parent_cache[key] = new_value
239
-
240
- # Update the local values as well if we need to
241
- new_value = merge_in_values(self.local_values.get(key), value)
242
- if new_value is None:
243
- return set()
244
- self.local_values[key] = new_value
245
-
246
- # Since we did change the local value, pass the new set down to children
247
- changed = [self.sha256]
248
- for child in self.children:
249
- changed.extend(child.union_value_from_ancestor(key, value))
250
- return set(changed)
251
-
252
-
253
- def merge_in_values(old_values: Any, new_values: set[str]) -> Optional[list[str]]:
254
- """Merge in new values into a json list.
255
-
256
- If there is no new values return None.
257
- """
258
- # Read out the old value set
259
- if isinstance(old_values, (list, set)):
260
- old_values = set(old_values)
261
- else:
262
- old_values = set()
263
-
264
- # If we have no new values to merge in
265
- if new_values <= old_values:
266
- return None
267
-
268
- # We have new values, build a new set
269
- return list(new_values | old_values)
175
+ self.local_values[key] = value
176
+ return False
177
+
178
+ def _union_shared_value(self, key: str, values: Any) -> bool:
179
+ # Make sure the existing value is the right type
180
+ self.shared_values.setdefault(key, [])
181
+ if not isinstance(self.shared_values[key], list):
182
+ self.shared_values[key] = []
183
+
184
+ # make sure the input is the right type
185
+ if not isinstance(values, list | tuple):
186
+ return False
187
+
188
+ # Add each value one at a time testing for new values
189
+ # This is slower than using set intersection, but isn't type sensitive
190
+ changed = False
191
+ for new_item in values:
192
+ if new_item in self.shared_values[key]:
193
+ continue
194
+ self.shared_values[key].append(new_item)
195
+ changed = True
196
+ return changed
270
197
 
271
198
 
272
199
  class SubmissionTask:
@@ -376,7 +303,6 @@ class SubmissionTask:
376
303
  except KeyError:
377
304
  self._forbidden_services[sha256] = {service_name}
378
305
 
379
-
380
306
  def register_children(self, parent: str, children: list[str]):
381
307
  """
382
308
  Note which files extracted other files.
@@ -385,10 +311,8 @@ class SubmissionTask:
385
311
  """
386
312
  parent_temp = self.temporary_data[parent]
387
313
  for child in children:
388
- try:
389
- self.temporary_data[child].add_parent(parent_temp)
390
- except KeyError:
391
- self.temporary_data[child] = parent_temp.new_child(child)
314
+ if child not in self.temporary_data:
315
+ self.temporary_data[child] = parent_temp.new_file(child)
392
316
  try:
393
317
  self._parent_map[child].add(parent)
394
318
  except KeyError:
@@ -446,21 +370,29 @@ class SubmissionTask:
446
370
  if result and result.partial:
447
371
  self.service_results.pop((sha256, service_name), None)
448
372
 
449
- def file_temporary_data_changed(self, changed_sha256: set[str], key: str) -> list[str]:
373
+ def temporary_data_changed(self, key: str) -> list[str]:
450
374
  """Check all of the monitored tasks on that key for changes. Redispatch as needed."""
451
375
  changed = []
452
376
  for (sha256, service), entry in self.monitoring.items():
453
- if sha256 not in changed_sha256:
377
+ # Check if this key is actually being monitored by this entry
378
+ if key not in entry.values:
454
379
  continue
455
380
 
381
+ # Get whatever values (if any) were provided on the previous dispatch of this service
456
382
  value = self.temporary_data[sha256].read_key(key)
457
383
  dispatched_value = entry.values.get(key)
458
384
 
459
385
  if type(value) is not type(dispatched_value) or value != dispatched_value:
460
386
  result = self.service_results.get((sha256, service))
461
387
  if not result:
388
+ # If the value has changed since the last dispatch but results haven't come in yet
389
+ # mark this service to be disptached later. This will only happen if the service
390
+ # returns partial results, if there are full results the entry will be cleared instead.
462
391
  entry.dispatch_needed = True
463
392
  else:
393
+ # If there are results and there is a monitoring entry, the result was partial
394
+ # so redispatch it immediately. If there are not partial results the monitoring
395
+ # entry will have been cleared.
464
396
  self.redispatch_service(sha256, service)
465
397
  changed.append(sha256)
466
398
  return changed
@@ -537,7 +469,7 @@ class Dispatcher(ThreadedCoreBase):
537
469
 
538
470
  # Build some utility classes
539
471
  self.scheduler = Scheduler(self.datastore, self.config, self.redis)
540
- self.running_tasks = Hash(DISPATCH_RUNNING_TASK_HASH, host=self.redis)
472
+ self.running_tasks: Hash[dict] = Hash(DISPATCH_RUNNING_TASK_HASH, host=self.redis)
541
473
  self.scaler_timeout_queue = NamedQueue(SCALER_TIMEOUT_QUEUE, host=self.redis_persist)
542
474
 
543
475
  self.classification_engine = get_classification()
@@ -800,14 +732,14 @@ class Dispatcher(ThreadedCoreBase):
800
732
  self.log.info(f"[{sid}] Submission counts towards {submission.params.submitter.upper()} quota")
801
733
 
802
734
  # Apply initial data parameter
803
- temporary_data = task.temporary_data[sha256] = TemporaryFileData(sha256)
735
+ temporary_data = TemporaryFileData(sha256, config=self.config.submission.temporary_keys)
736
+ task.temporary_data[sha256] = temporary_data
804
737
  if submission.params.initial_data:
805
738
  try:
806
- temporary_data.local_values = {
807
- key: value
808
- for key, value in dict(json.loads(submission.params.initial_data)).items()
809
- if len(str(value)) <= self.config.submission.max_temp_data_length
810
- }
739
+ for key, value in dict(json.loads(submission.params.initial_data)).items():
740
+ if len(str(value)) > self.config.submission.max_temp_data_length:
741
+ continue
742
+ temporary_data.set_value(key, value)
811
743
 
812
744
  except (ValueError, TypeError) as err:
813
745
  self.log.warning(f"[{sid}] could not process initialization data: {err}")
@@ -1011,7 +943,6 @@ class Dispatcher(ThreadedCoreBase):
1011
943
  for service_name in prevented_services:
1012
944
  task.forbid_for_children(sha256, service_name)
1013
945
 
1014
-
1015
946
  # Build the actual service dispatch message
1016
947
  config = self.build_service_config(service, submission)
1017
948
  service_task = ServiceTask(dict(
@@ -1547,16 +1478,10 @@ class Dispatcher(ThreadedCoreBase):
1547
1478
 
1548
1479
  # Update the temporary data table for this file
1549
1480
  force_redispatch = set()
1550
- update_operations = self.config.submission.temporary_keys
1551
1481
  for key, value in (temporary_data or {}).items():
1552
1482
  if len(str(value)) <= self.config.submission.max_temp_data_length:
1553
- if update_operations.get(key) == KeyType.UNION:
1554
- changed_files = task.temporary_data[sha256].union_value(key, value)
1555
- elif update_operations.get(key) == KeyType.IGNORE:
1556
- changed_files = set()
1557
- else:
1558
- changed_files = task.temporary_data[sha256].set_value(key, value)
1559
- force_redispatch |= set(task.file_temporary_data_changed(changed_files, key))
1483
+ if task.temporary_data[sha256].set_value(key, value):
1484
+ force_redispatch |= set(task.temporary_data_changed(key))
1560
1485
 
1561
1486
  # Set the depth of all extracted files, even if we won't be processing them
1562
1487
  depth_limit = self.config.submission.max_extraction_depth
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: assemblyline-core
3
- Version: 4.5.1.dev358
3
+ Version: 4.5.1.dev360
4
4
  Summary: Assemblyline 4 - Core components
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-core/
6
6
  Author: CCCS Assemblyline development team
@@ -1250,7 +1250,7 @@ def test_temp_data_monitoring(core: CoreSession, metrics):
1250
1250
  sub: Submission = core.ds.submission.get(dropped_task.submission.sid)
1251
1251
  assert len(sub.errors) == 0
1252
1252
  assert len(sub.results) == 4, 'results'
1253
- assert core.pre_service.hits[sha] >= 2, 'pre_service.hits'
1253
+ assert core.pre_service.hits[sha] >= 2, f'pre_service.hits {core.pre_service.hits}'
1254
1254
 
1255
1255
  # Wait until we get feedback from the metrics channel
1256
1256
  metrics.expect('ingester', 'submissions_ingested', 1)
@@ -1 +0,0 @@
1
- 4.5.1.dev358