assemblyline-core 4.5.1.dev358__tar.gz → 4.5.1.dev359__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of assemblyline-core might be problematic. Click here for more details.

Files changed (88) hide show
  1. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/PKG-INFO +1 -1
  2. assemblyline-core-4.5.1.dev359/assemblyline_core/VERSION +1 -0
  3. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/dispatching/dispatcher.py +63 -146
  4. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core.egg-info/PKG-INFO +1 -1
  5. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/test/test_simulation.py +1 -1
  6. assemblyline-core-4.5.1.dev358/assemblyline_core/VERSION +0 -1
  7. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/LICENCE.md +0 -0
  8. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/README.md +0 -0
  9. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/__init__.py +0 -0
  10. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/alerter/__init__.py +0 -0
  11. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/alerter/processing.py +0 -0
  12. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/alerter/run_alerter.py +0 -0
  13. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/archiver/__init__.py +0 -0
  14. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/archiver/run_archiver.py +0 -0
  15. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/badlist_client.py +0 -0
  16. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/dispatching/__init__.py +0 -0
  17. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/dispatching/__main__.py +0 -0
  18. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/dispatching/client.py +0 -0
  19. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/dispatching/schedules.py +0 -0
  20. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/dispatching/timeout.py +0 -0
  21. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/expiry/__init__.py +0 -0
  22. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/expiry/run_expiry.py +0 -0
  23. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/ingester/__init__.py +0 -0
  24. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/ingester/__main__.py +0 -0
  25. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/ingester/constants.py +0 -0
  26. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/ingester/ingester.py +0 -0
  27. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/metrics/__init__.py +0 -0
  28. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/metrics/es_metrics.py +0 -0
  29. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/metrics/heartbeat_formatter.py +0 -0
  30. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/metrics/helper.py +0 -0
  31. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/metrics/metrics_server.py +0 -0
  32. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/metrics/run_heartbeat_manager.py +0 -0
  33. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/metrics/run_metrics_aggregator.py +0 -0
  34. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/metrics/run_statistics_aggregator.py +0 -0
  35. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/plumber/__init__.py +0 -0
  36. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/plumber/run_plumber.py +0 -0
  37. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/replay/__init__.py +0 -0
  38. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/replay/client.py +0 -0
  39. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/replay/creator/__init__.py +0 -0
  40. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/replay/creator/run.py +0 -0
  41. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/replay/creator/run_worker.py +0 -0
  42. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/replay/loader/__init__.py +0 -0
  43. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/replay/loader/run.py +0 -0
  44. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/replay/loader/run_worker.py +0 -0
  45. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/replay/replay.py +0 -0
  46. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/safelist_client.py +0 -0
  47. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/scaler/__init__.py +0 -0
  48. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/scaler/collection.py +0 -0
  49. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/scaler/controllers/__init__.py +0 -0
  50. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/scaler/controllers/docker_ctl.py +0 -0
  51. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/scaler/controllers/interface.py +0 -0
  52. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/scaler/controllers/kubernetes_ctl.py +0 -0
  53. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/scaler/run_scaler.py +0 -0
  54. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/scaler/scaler_server.py +0 -0
  55. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/server_base.py +0 -0
  56. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/signature_client.py +0 -0
  57. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/submission_client.py +0 -0
  58. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/tasking_client.py +0 -0
  59. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/updater/__init__.py +0 -0
  60. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/updater/helper.py +0 -0
  61. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/updater/run_updater.py +0 -0
  62. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/vacuum/__init__.py +0 -0
  63. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/vacuum/crawler.py +0 -0
  64. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/vacuum/department_map.py +0 -0
  65. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/vacuum/safelist.py +0 -0
  66. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/vacuum/stream_map.py +0 -0
  67. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/vacuum/worker.py +0 -0
  68. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/workflow/__init__.py +0 -0
  69. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core/workflow/run_workflow.py +0 -0
  70. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core.egg-info/SOURCES.txt +0 -0
  71. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core.egg-info/dependency_links.txt +0 -0
  72. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core.egg-info/requires.txt +0 -0
  73. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/assemblyline_core.egg-info/top_level.txt +0 -0
  74. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/setup.cfg +0 -0
  75. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/setup.py +0 -0
  76. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/test/test_alerter.py +0 -0
  77. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/test/test_badlist_client.py +0 -0
  78. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/test/test_dispatcher.py +0 -0
  79. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/test/test_expiry.py +0 -0
  80. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/test/test_plumber.py +0 -0
  81. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/test/test_replay.py +0 -0
  82. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/test/test_safelist_client.py +0 -0
  83. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/test/test_scaler.py +0 -0
  84. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/test/test_scheduler.py +0 -0
  85. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/test/test_signature_client.py +0 -0
  86. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/test/test_vacuum.py +0 -0
  87. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/test/test_worker_ingest.py +0 -0
  88. {assemblyline-core-4.5.1.dev358 → assemblyline-core-4.5.1.dev359}/test/test_worker_submit.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: assemblyline-core
3
- Version: 4.5.1.dev358
3
+ Version: 4.5.1.dev359
4
4
  Summary: Assemblyline 4 - Core components
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-core/
6
6
  Author: CCCS Assemblyline development team
@@ -0,0 +1 @@
1
+ 4.5.1.dev359
@@ -68,7 +68,6 @@ DYNAMIC_ANALYSIS_CATEGORY = 'Dynamic Analysis'
68
68
  class KeyType(enum.Enum):
69
69
  OVERWRITE = 'overwrite'
70
70
  UNION = 'union'
71
- IGNORE = 'ignore'
72
71
 
73
72
 
74
73
  class Action(enum.IntEnum):
@@ -103,7 +102,6 @@ class MonitorTask:
103
102
  # Should aservice be dispatched again when possible
104
103
  dispatch_needed: bool = dataclasses.field(default=False)
105
104
 
106
-
107
105
  @contextmanager
108
106
  def apm_span(client, span_name: str):
109
107
  try:
@@ -128,145 +126,67 @@ class ResultSummary:
128
126
 
129
127
 
130
128
  class TemporaryFileData:
131
- def __init__(self, sha256: str) -> None:
129
+ def __init__(self, sha256: str, config: dict[str, str], shared: Optional[dict[str, Any]] = None) -> None:
132
130
  self.sha256 = sha256
133
- self.parents: list[TemporaryFileData] = []
134
- self.children: list[TemporaryFileData] = []
135
- self.parent_cache: dict[str, Any] = {}
131
+ self.config = config
132
+ self.shared_values: dict[str, Any] = dict() if shared is None else shared
136
133
  self.local_values: dict[str, Any] = {}
137
134
 
138
- def add_parent(self, parent_temp: TemporaryFileData):
139
- """Add a parent to this node."""
140
- self.parents.append(parent_temp)
141
- parent_temp.children.append(self)
142
-
143
- def new_child(self, child: str) -> TemporaryFileData:
144
- """Create a linked entry for a new child."""
145
- temp = TemporaryFileData(child)
146
- temp.parents.append(self)
147
- self.children.append(temp)
148
- temp.build_parent_cache()
149
- return temp
150
-
151
- def build_parent_cache(self):
152
- """Rebuild the cache of data from parent files."""
153
- self.parent_cache.clear()
154
- for parent in self.parents:
155
- self.parent_cache.update(parent.read())
135
+ def new_file(self, sha256: str) -> TemporaryFileData:
136
+ """Create an entry for another file with reference to the shared values."""
137
+ return TemporaryFileData(sha256, self.config, self.shared_values)
156
138
 
157
139
  def read(self) -> dict[str, Any]:
158
140
  """Get a copy of the current data"""
159
- # Start with a shallow copy ofthe parent cache
160
- data = dict(self.parent_cache)
141
+ # Start with a shallow copy of the local data
142
+ data = dict(self.local_values)
161
143
 
162
- # update, this overwrites any common keys (we want this)
163
- data.update(self.local_values)
144
+ # mix in whatever the latest submission wide values are values are
145
+ data.update(self.shared_values)
164
146
  return data
165
147
 
166
148
  def read_key(self, key: str) -> Any:
167
149
  """Get a copy of the current data"""
168
150
  try:
169
- return self.local_values[key]
151
+ return self.shared_values[key]
170
152
  except KeyError:
171
- return self.parent_cache.get(key)
153
+ return self.local_values.get(key)
172
154
 
173
- def set_value(self, key: str, value: str) -> set[str]:
174
- """Using a SET operation update the value on this node and all children.
155
+ def set_value(self, key: str, value: Any) -> bool:
156
+ """Set the value of a temporary data key using the appropriate method for the key.
175
157
 
176
- Returns a list of the sha of all files who's temporary data has been modified.
158
+ Return true if this change could mean partial results should be reevaluated.
177
159
  """
178
- # Check if the local value doesn't change then we won't have any effect on children
179
- old = self.local_values.get(key)
180
- if type(old) is type(value) and old == value:
181
- return set()
182
-
183
- # Update the local value and recurse into children
184
- self.local_values[key] = value
185
- changed = [self.sha256]
186
- for child in self.children:
187
- changed.extend(child.set_value_from_ancestor(key, value))
188
- return set(changed)
189
-
190
- def set_value_from_ancestor(self, key: str, value: str) -> set[str]:
191
- """Given that an ancestor has changed, test if this file's temporary data will change also."""
192
- # If this child has already set this key, the parent values don't matter
193
- if key in self.local_values:
194
- return set()
160
+ if self.config.get(key) == KeyType.UNION.value:
161
+ return self._union_shared_value(key, value)
195
162
 
196
- # If the parent value was already set to this nothing has changed
197
- old = self.parent_cache.get(key)
198
- if type(old) is type(value) and old == value:
199
- return set()
200
-
201
- # Update the parent cache and recurse into children
202
- self.parent_cache[key] = value
203
- changed = [self.sha256]
204
- for child in self.children:
205
- changed.extend(child.set_value_from_ancestor(key, value))
206
- return set(changed)
207
-
208
- def union_value(self, key: str, value: set[str]) -> set[str]:
209
- """Using a MERGE operation update the value on this node and all children.
210
-
211
- Returns a list of the sha of all files who's temporary data has been modified.
212
- """
213
- if not value:
214
- return set()
215
-
216
- # Check if the local value doesn't change then we won't have any effect on children
217
- new_value = merge_in_values(self.local_values.get(key), value)
218
- if new_value is None:
219
- return set()
220
-
221
- # Update the local value and recurse into children
222
- self.local_values[key] = new_value
223
- changed = [self.sha256]
224
- for child in self.children:
225
- changed.extend(child.union_value_from_ancestor(key, value))
226
- return set(changed)
227
-
228
- def union_value_from_ancestor(self, key: str, value: set[str]) -> set[str]:
229
- """Given that an ancestor has changed, test if this file's temporary data will change also.
163
+ if self.config.get(key) == KeyType.OVERWRITE.value:
164
+ change = self.shared_values.get(key) != value
165
+ self.shared_values[key] = value
166
+ return change
230
167
 
231
- For values updated by union the parent and local values are the same.
232
- """
233
- # Merge in data to parent cache, we won't be reading from it, but we still want to keep it
234
- # up to date and use it to check if changes are needed
235
- new_value = merge_in_values(self.parent_cache.get(key), value)
236
- if new_value is None:
237
- return set()
238
- self.parent_cache[key] = new_value
239
-
240
- # Update the local values as well if we need to
241
- new_value = merge_in_values(self.local_values.get(key), value)
242
- if new_value is None:
243
- return set()
244
- self.local_values[key] = new_value
245
-
246
- # Since we did change the local value, pass the new set down to children
247
- changed = [self.sha256]
248
- for child in self.children:
249
- changed.extend(child.union_value_from_ancestor(key, value))
250
- return set(changed)
251
-
252
-
253
- def merge_in_values(old_values: Any, new_values: set[str]) -> Optional[list[str]]:
254
- """Merge in new values into a json list.
255
-
256
- If there is no new values return None.
257
- """
258
- # Read out the old value set
259
- if isinstance(old_values, (list, set)):
260
- old_values = set(old_values)
261
- else:
262
- old_values = set()
263
-
264
- # If we have no new values to merge in
265
- if new_values <= old_values:
266
- return None
267
-
268
- # We have new values, build a new set
269
- return list(new_values | old_values)
168
+ self.local_values[key] = value
169
+ return False
170
+
171
+ def _union_shared_value(self, key: str, values: Any) -> bool:
172
+ # Make sure the existing value is the right type
173
+ self.shared_values.setdefault(key, [])
174
+ if not isinstance(self.shared_values[key], list):
175
+ self.shared_values[key] = []
176
+
177
+ # make sure the input is the right type
178
+ if not isinstance(values, list | tuple):
179
+ return False
180
+
181
+ # Add each value one at a time testing for new values
182
+ # This is slower than using set intersection, but isn't type sensitive
183
+ changed = False
184
+ for new_item in values:
185
+ if new_item in self.shared_values[key]:
186
+ continue
187
+ self.shared_values[key].append(new_item)
188
+ changed = True
189
+ return changed
270
190
 
271
191
 
272
192
  class SubmissionTask:
@@ -376,7 +296,6 @@ class SubmissionTask:
376
296
  except KeyError:
377
297
  self._forbidden_services[sha256] = {service_name}
378
298
 
379
-
380
299
  def register_children(self, parent: str, children: list[str]):
381
300
  """
382
301
  Note which files extracted other files.
@@ -385,10 +304,7 @@ class SubmissionTask:
385
304
  """
386
305
  parent_temp = self.temporary_data[parent]
387
306
  for child in children:
388
- try:
389
- self.temporary_data[child].add_parent(parent_temp)
390
- except KeyError:
391
- self.temporary_data[child] = parent_temp.new_child(child)
307
+ self.temporary_data.setdefault(child, parent_temp.new_file(child))
392
308
  try:
393
309
  self._parent_map[child].add(parent)
394
310
  except KeyError:
@@ -446,21 +362,29 @@ class SubmissionTask:
446
362
  if result and result.partial:
447
363
  self.service_results.pop((sha256, service_name), None)
448
364
 
449
- def file_temporary_data_changed(self, changed_sha256: set[str], key: str) -> list[str]:
365
+ def temporary_data_changed(self, key: str) -> list[str]:
450
366
  """Check all of the monitored tasks on that key for changes. Redispatch as needed."""
451
367
  changed = []
452
368
  for (sha256, service), entry in self.monitoring.items():
453
- if sha256 not in changed_sha256:
369
+ # Check if this key is actually being monitored by this entry
370
+ if key not in entry.values:
454
371
  continue
455
372
 
373
+ # Get whatever values (if any) were provided on the previous dispatch of this service
456
374
  value = self.temporary_data[sha256].read_key(key)
457
375
  dispatched_value = entry.values.get(key)
458
376
 
459
377
  if type(value) is not type(dispatched_value) or value != dispatched_value:
460
378
  result = self.service_results.get((sha256, service))
461
379
  if not result:
380
+ # If the value has changed since the last dispatch but results haven't come in yet
381
+ # mark this service to be disptached later. This will only happen if the service
382
+ # returns partial results, if there are full results the entry will be cleared instead.
462
383
  entry.dispatch_needed = True
463
384
  else:
385
+ # If there are results and there is a monitoring entry, the result was partial
386
+ # so redispatch it immediately. If there are not partial results the monitoring
387
+ # entry will have been cleared.
464
388
  self.redispatch_service(sha256, service)
465
389
  changed.append(sha256)
466
390
  return changed
@@ -537,7 +461,7 @@ class Dispatcher(ThreadedCoreBase):
537
461
 
538
462
  # Build some utility classes
539
463
  self.scheduler = Scheduler(self.datastore, self.config, self.redis)
540
- self.running_tasks = Hash(DISPATCH_RUNNING_TASK_HASH, host=self.redis)
464
+ self.running_tasks: Hash[dict] = Hash(DISPATCH_RUNNING_TASK_HASH, host=self.redis)
541
465
  self.scaler_timeout_queue = NamedQueue(SCALER_TIMEOUT_QUEUE, host=self.redis_persist)
542
466
 
543
467
  self.classification_engine = get_classification()
@@ -800,14 +724,14 @@ class Dispatcher(ThreadedCoreBase):
800
724
  self.log.info(f"[{sid}] Submission counts towards {submission.params.submitter.upper()} quota")
801
725
 
802
726
  # Apply initial data parameter
803
- temporary_data = task.temporary_data[sha256] = TemporaryFileData(sha256)
727
+ temporary_data = TemporaryFileData(sha256, config=self.config.submission.temporary_keys)
728
+ task.temporary_data[sha256] = temporary_data
804
729
  if submission.params.initial_data:
805
730
  try:
806
- temporary_data.local_values = {
807
- key: value
808
- for key, value in dict(json.loads(submission.params.initial_data)).items()
809
- if len(str(value)) <= self.config.submission.max_temp_data_length
810
- }
731
+ for key, value in dict(json.loads(submission.params.initial_data)).items():
732
+ if len(str(value)) > self.config.submission.max_temp_data_length:
733
+ continue
734
+ temporary_data.set_value(key, value)
811
735
 
812
736
  except (ValueError, TypeError) as err:
813
737
  self.log.warning(f"[{sid}] could not process initialization data: {err}")
@@ -1011,7 +935,6 @@ class Dispatcher(ThreadedCoreBase):
1011
935
  for service_name in prevented_services:
1012
936
  task.forbid_for_children(sha256, service_name)
1013
937
 
1014
-
1015
938
  # Build the actual service dispatch message
1016
939
  config = self.build_service_config(service, submission)
1017
940
  service_task = ServiceTask(dict(
@@ -1547,16 +1470,10 @@ class Dispatcher(ThreadedCoreBase):
1547
1470
 
1548
1471
  # Update the temporary data table for this file
1549
1472
  force_redispatch = set()
1550
- update_operations = self.config.submission.temporary_keys
1551
1473
  for key, value in (temporary_data or {}).items():
1552
1474
  if len(str(value)) <= self.config.submission.max_temp_data_length:
1553
- if update_operations.get(key) == KeyType.UNION:
1554
- changed_files = task.temporary_data[sha256].union_value(key, value)
1555
- elif update_operations.get(key) == KeyType.IGNORE:
1556
- changed_files = set()
1557
- else:
1558
- changed_files = task.temporary_data[sha256].set_value(key, value)
1559
- force_redispatch |= set(task.file_temporary_data_changed(changed_files, key))
1475
+ if task.temporary_data[sha256].set_value(key, value):
1476
+ force_redispatch |= set(task.temporary_data_changed(key))
1560
1477
 
1561
1478
  # Set the depth of all extracted files, even if we won't be processing them
1562
1479
  depth_limit = self.config.submission.max_extraction_depth
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: assemblyline-core
3
- Version: 4.5.1.dev358
3
+ Version: 4.5.1.dev359
4
4
  Summary: Assemblyline 4 - Core components
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-core/
6
6
  Author: CCCS Assemblyline development team
@@ -1250,7 +1250,7 @@ def test_temp_data_monitoring(core: CoreSession, metrics):
1250
1250
  sub: Submission = core.ds.submission.get(dropped_task.submission.sid)
1251
1251
  assert len(sub.errors) == 0
1252
1252
  assert len(sub.results) == 4, 'results'
1253
- assert core.pre_service.hits[sha] >= 2, 'pre_service.hits'
1253
+ assert core.pre_service.hits[sha] >= 2, f'pre_service.hits {core.pre_service.hits}'
1254
1254
 
1255
1255
  # Wait until we get feedback from the metrics channel
1256
1256
  metrics.expect('ingester', 'submissions_ingested', 1)
@@ -1 +0,0 @@
1
- 4.5.1.dev358