assemblyline-core 4.6.1.dev228__tar.gz → 4.6.1.dev230__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of assemblyline-core might be problematic. Click here for more details.

Files changed (90) hide show
  1. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/PKG-INFO +1 -1
  2. assemblyline_core-4.6.1.dev230/assemblyline_core/VERSION +1 -0
  3. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/dispatching/dispatcher.py +78 -38
  4. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/ingester/ingester.py +17 -5
  5. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/replay/client.py +16 -11
  6. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/submission_client.py +55 -4
  7. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core.egg-info/PKG-INFO +1 -1
  8. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/test/test_dispatcher.py +148 -2
  9. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/test/test_simulation.py +49 -3
  10. assemblyline_core-4.6.1.dev228/assemblyline_core/VERSION +0 -1
  11. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/LICENCE.md +0 -0
  12. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/README.md +0 -0
  13. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/__init__.py +0 -0
  14. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/alerter/__init__.py +0 -0
  15. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/alerter/processing.py +0 -0
  16. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/alerter/run_alerter.py +0 -0
  17. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/archiver/__init__.py +0 -0
  18. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/archiver/run_archiver.py +0 -0
  19. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/badlist_client.py +0 -0
  20. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/dispatching/__init__.py +0 -0
  21. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/dispatching/__main__.py +0 -0
  22. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/dispatching/client.py +0 -0
  23. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/dispatching/schedules.py +0 -0
  24. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/dispatching/timeout.py +0 -0
  25. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/expiry/__init__.py +0 -0
  26. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/expiry/run_expiry.py +0 -0
  27. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/ingester/__init__.py +0 -0
  28. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/ingester/__main__.py +0 -0
  29. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/ingester/constants.py +0 -0
  30. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/metrics/__init__.py +0 -0
  31. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/metrics/es_metrics.py +0 -0
  32. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/metrics/heartbeat_formatter.py +0 -0
  33. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/metrics/helper.py +0 -0
  34. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/metrics/metrics_server.py +0 -0
  35. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/metrics/run_heartbeat_manager.py +0 -0
  36. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/metrics/run_metrics_aggregator.py +0 -0
  37. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/metrics/run_statistics_aggregator.py +0 -0
  38. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/plumber/__init__.py +0 -0
  39. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/plumber/run_plumber.py +0 -0
  40. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/replay/__init__.py +0 -0
  41. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/replay/creator/__init__.py +0 -0
  42. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/replay/creator/run.py +0 -0
  43. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/replay/creator/run_worker.py +0 -0
  44. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/replay/loader/__init__.py +0 -0
  45. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/replay/loader/run.py +0 -0
  46. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/replay/loader/run_worker.py +0 -0
  47. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/replay/replay.py +0 -0
  48. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/safelist_client.py +0 -0
  49. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/scaler/__init__.py +0 -0
  50. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/scaler/collection.py +0 -0
  51. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/scaler/controllers/__init__.py +0 -0
  52. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/scaler/controllers/docker_ctl.py +0 -0
  53. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/scaler/controllers/interface.py +0 -0
  54. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/scaler/controllers/kubernetes_ctl.py +0 -0
  55. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/scaler/run_scaler.py +0 -0
  56. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/scaler/scaler_server.py +0 -0
  57. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/server_base.py +0 -0
  58. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/signature_client.py +0 -0
  59. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/tasking_client.py +0 -0
  60. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/updater/__init__.py +0 -0
  61. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/updater/helper.py +0 -0
  62. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/updater/run_updater.py +0 -0
  63. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/vacuum/__init__.py +0 -0
  64. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/vacuum/crawler.py +0 -0
  65. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/vacuum/department_map.py +0 -0
  66. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/vacuum/safelist.py +0 -0
  67. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/vacuum/stream_map.py +0 -0
  68. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/vacuum/worker.py +0 -0
  69. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/workflow/__init__.py +0 -0
  70. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core/workflow/run_workflow.py +0 -0
  71. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core.egg-info/SOURCES.txt +0 -0
  72. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core.egg-info/dependency_links.txt +0 -0
  73. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core.egg-info/requires.txt +0 -0
  74. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/assemblyline_core.egg-info/top_level.txt +0 -0
  75. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/setup.cfg +0 -0
  76. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/setup.py +0 -0
  77. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/test/test_alerter.py +0 -0
  78. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/test/test_badlist_client.py +0 -0
  79. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/test/test_expiry.py +0 -0
  80. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/test/test_plumber.py +0 -0
  81. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/test/test_replay.py +0 -0
  82. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/test/test_safelist_client.py +0 -0
  83. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/test/test_scaler.py +0 -0
  84. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/test/test_scheduler.py +0 -0
  85. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/test/test_signature_client.py +0 -0
  86. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/test/test_tasking_client.py +0 -0
  87. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/test/test_vacuum.py +0 -0
  88. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/test/test_worker_ingest.py +0 -0
  89. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/test/test_worker_submit.py +0 -0
  90. {assemblyline_core-4.6.1.dev228 → assemblyline_core-4.6.1.dev230}/test/test_workflow.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: assemblyline-core
3
- Version: 4.6.1.dev228
3
+ Version: 4.6.1.dev230
4
4
  Summary: Assemblyline 4 - Core components
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-core/
6
6
  Author: CCCS Assemblyline development team
@@ -0,0 +1 @@
1
+ 4.6.1.dev230
@@ -73,6 +73,7 @@ if TYPE_CHECKING:
73
73
  from redis import Redis
74
74
 
75
75
  from assemblyline.odm.models.file import File
76
+ from assemblyline.odm.models.config import Config
76
77
 
77
78
 
78
79
  APM_SPAN_TYPE = 'handle_message'
@@ -218,8 +219,18 @@ class TemporaryFileData:
218
219
  class SubmissionTask:
219
220
  """Dispatcher internal model for submissions"""
220
221
 
221
- def __init__(self, submission, completed_queue, scheduler, datastore: AssemblylineDatastore, results=None,
222
- file_infos=None, file_tree=None, errors: Optional[Iterable[str]] = None):
222
+ def __init__(
223
+ self,
224
+ submission,
225
+ completed_queue,
226
+ scheduler,
227
+ datastore: AssemblylineDatastore,
228
+ config: Config,
229
+ results=None,
230
+ file_infos=None,
231
+ file_tree=None,
232
+ errors: Optional[Iterable[str]] = None,
233
+ ):
223
234
  self.submission: Submission = Submission(submission)
224
235
  submitter: Optional[User] = datastore.user.get_if_exists(self.submission.params.submitter)
225
236
  self.service_access_control: Optional[str] = None
@@ -227,6 +238,7 @@ class SubmissionTask:
227
238
  self.service_access_control = submitter.classification.value
228
239
 
229
240
  self.completed_queue = None
241
+
230
242
  if completed_queue:
231
243
  self.completed_queue = str(completed_queue)
232
244
 
@@ -265,9 +277,31 @@ class SubmissionTask:
265
277
  recurse_tree(file_data['children'], depth + 1)
266
278
 
267
279
  recurse_tree(file_tree, 0)
280
+ sorted_file_depth = [(k, v) for k, v in sorted(self.file_depth.items(), key=lambda fd: fd[1])]
281
+ else:
282
+ sorted_file_depth = [(self.submission.files[0].sha256, 0)]
283
+
284
+ for sha256, depth in sorted_file_depth:
285
+ # populate temporary data to root level files
286
+ if depth == 0:
287
+ # Apply initial data parameter
288
+ temp_key_config = dict(config.submission.default_temporary_keys)
289
+ temp_key_config.update(config.submission.temporary_keys)
290
+ temporary_data = TemporaryFileData(sha256, config=temp_key_config)
291
+ self.temporary_data[sha256] = temporary_data
292
+ if self.submission.params.initial_data:
293
+ try:
294
+ for key, value in dict(json.loads(self.submission.params.initial_data)).items():
295
+ if len(str(value)) > config.submission.max_temp_data_length:
296
+ continue
297
+ temporary_data.set_value(key, value)
298
+
299
+ except (ValueError, TypeError):
300
+ pass
268
301
 
269
302
  if results is not None:
270
303
  rescan = scheduler.expand_categories(self.submission.params.services.rescan)
304
+ result_keys = list(results.keys())
271
305
 
272
306
  # Replay the process of routing files for dispatcher internal state.
273
307
  for k, result in results.items():
@@ -282,24 +316,35 @@ class SubmissionTask:
282
316
  self.forbid_for_children(sha256, service_name)
283
317
 
284
318
  # Replay the process of receiving results for dispatcher internal state
285
- for k, result in results.items():
286
- sha256, service, _ = k.split('.', 2)
287
- if service not in rescan:
288
- extracted = result['response']['extracted']
289
- children: list[str] = [r['sha256'] for r in extracted]
290
- self.register_children(sha256, children)
291
- children_detail: list[tuple[str, str]] = [(r['sha256'], r['parent_relation']) for r in extracted]
292
- self.service_results[(sha256, service)] = ResultSummary(
293
- key=k, drop=result['drop_file'], score=result['result']['score'],
294
- children=children_detail, partial=result.get('partial', False))
295
-
296
- tags = Result(result).scored_tag_dict()
297
- for key, tag in tags.items():
298
- if key in self.file_tags[sha256].keys():
299
- # Sum score of already known tags
300
- self.file_tags[sha256][key]['score'] += tag['score']
301
- else:
302
- self.file_tags[sha256][key] = tag
319
+ # iterate through result based on file depth
320
+ for sha256, depth in sorted_file_depth:
321
+ results_to_process = list(filter(lambda k: sha256 in k, result_keys))
322
+ for result_key in results_to_process:
323
+ result = results[result_key]
324
+ sha256, service, _ = result_key.split(".", 2)
325
+
326
+ if service not in rescan:
327
+ extracted = result["response"]["extracted"]
328
+ children: list[str] = [r["sha256"] for r in extracted]
329
+ self.register_children(sha256, children)
330
+ children_detail: list[tuple[str, str]] = [
331
+ (r["sha256"], r["parent_relation"]) for r in extracted
332
+ ]
333
+ self.service_results[(sha256, service)] = ResultSummary(
334
+ key=result_key,
335
+ drop=result["drop_file"],
336
+ score=result["result"]["score"],
337
+ children=children_detail,
338
+ partial=result.get("partial", False),
339
+ )
340
+
341
+ tags = Result(result).scored_tag_dict()
342
+ for key, tag in tags.items():
343
+ if key in self.file_tags[sha256].keys():
344
+ # Sum score of already known tags
345
+ self.file_tags[sha256][key]["score"] += tag["score"]
346
+ else:
347
+ self.file_tags[sha256][key] = tag
303
348
 
304
349
  if errors is not None:
305
350
  for e in errors:
@@ -334,6 +379,7 @@ class SubmissionTask:
334
379
  _parent_map is for dynamic recursion prevention
335
380
  temporary_data is for cascading the temp data to children
336
381
  """
382
+
337
383
  parent_temp = self.temporary_data[parent]
338
384
  for child in children:
339
385
  if child not in self.temporary_data:
@@ -706,7 +752,13 @@ class Dispatcher(ThreadedCoreBase):
706
752
  # Start of process dispatcher transaction
707
753
  with apm_span(self.apm_client, 'submission_message'):
708
754
  # This is probably a complete task
709
- task = SubmissionTask(scheduler=self.scheduler, datastore=self.datastore, **message)
755
+
756
+ task = SubmissionTask(
757
+ scheduler=self.scheduler,
758
+ datastore=self.datastore,
759
+ config=self.config,
760
+ **message,
761
+ )
710
762
 
711
763
  # Check the sid table
712
764
  if task.sid in self.bad_sids:
@@ -739,6 +791,7 @@ class Dispatcher(ThreadedCoreBase):
739
791
 
740
792
  if not self.active_submissions.exists(sid):
741
793
  self.log.info("[%s] New submission received", sid)
794
+
742
795
  task.trace('submission_start')
743
796
  self.active_submissions.add(sid, {
744
797
  'completed_queue': task.completed_queue,
@@ -760,21 +813,6 @@ class Dispatcher(ThreadedCoreBase):
760
813
  if submission.params.quota_item and submission.params.submitter:
761
814
  self.log.info(f"[{sid}] Submission counts towards {submission.params.submitter.upper()} quota")
762
815
 
763
- # Apply initial data parameter
764
- temp_key_config = dict(self.config.submission.default_temporary_keys)
765
- temp_key_config.update(self.config.submission.temporary_keys)
766
- temporary_data = TemporaryFileData(sha256, config=temp_key_config)
767
- task.temporary_data[sha256] = temporary_data
768
- if submission.params.initial_data:
769
- try:
770
- for key, value in dict(json.loads(submission.params.initial_data)).items():
771
- if len(str(value)) > self.config.submission.max_temp_data_length:
772
- continue
773
- temporary_data.set_value(key, value)
774
-
775
- except (ValueError, TypeError) as err:
776
- self.log.warning(f"[{sid}] could not process initialization data: {err}")
777
-
778
816
  self.tasks[sid] = task
779
817
  self._submission_timeouts.set(task.sid, SUBMISSION_TOTAL_TIMEOUT, None)
780
818
 
@@ -784,7 +822,10 @@ class Dispatcher(ThreadedCoreBase):
784
822
  # Initialize ancestry chain by identifying the root file
785
823
  file_info = self.get_fileinfo(task, sha256)
786
824
  file_type = file_info.type if file_info else 'NOT_FOUND'
787
- temporary_data.local_values['ancestry'] = [[dict(type=file_type, parent_relation="ROOT", sha256=sha256)]]
825
+
826
+ task.temporary_data[sha256].local_values["ancestry"] = [
827
+ [dict(type=file_type, parent_relation="ROOT", sha256=sha256)]
828
+ ]
788
829
 
789
830
  # Start the file dispatching
790
831
  task.active_files.add(sha256)
@@ -875,7 +916,6 @@ class Dispatcher(ThreadedCoreBase):
875
916
  schedule_summary = [list(stage.keys()) for stage in task.file_schedules[sha256]]
876
917
  task.trace('schedule_built', sha256=sha256, message=str(schedule_summary))
877
918
 
878
-
879
919
  file_info = task.file_info[sha256]
880
920
  schedule: list = list(task.file_schedules[sha256])
881
921
  deep_scan, ignore_filtering = submission.params.deep_scan, submission.params.ignore_filtering
@@ -112,6 +112,7 @@ class IngestTask(odm.Model):
112
112
  ingest_id = odm.UUID() # Ingestion Identifier
113
113
  ingest_time = odm.Date(default="NOW") # Time at which the file was ingested
114
114
  notify_time = odm.Optional(odm.Date()) # Time at which the user is notify the submission is finished
115
+ to_ingest = odm.Boolean(default=False)
115
116
 
116
117
 
117
118
  class Ingester(ThreadedCoreBase):
@@ -250,7 +251,13 @@ class Ingester(ThreadedCoreBase):
250
251
  submission=sub,
251
252
  ingest_id=sub.sid,
252
253
  ))
253
- task.submission.sid = None # Reset to new random uuid
254
+
255
+ # if this is a new task from imported bundle we want to use the same sid
256
+ # because all the submission information are stored in the datastore
257
+ # else create a new sid for this submission
258
+ if "bundle.source" not in task.submission.metadata:
259
+ task.submission.sid = None # Reset to new random uuid
260
+
254
261
  # Write all input to the traffic queue
255
262
  self.traffic_queue.publish(SubmissionMessage({
256
263
  'msg': sub,
@@ -920,10 +927,15 @@ class Ingester(ThreadedCoreBase):
920
927
  return reason
921
928
 
922
929
  def submit(self, task: IngestTask):
923
- self.submit_client.submit(
924
- submission_obj=task.submission,
925
- completed_queue=COMPLETE_QUEUE_NAME,
926
- )
930
+
931
+ if "bundle.source" in task.submission.metadata:
932
+ self.submit_client.send_bundle_to_dispatch(task.submission, completed_queue=COMPLETE_QUEUE_NAME)
933
+ else:
934
+
935
+ self.submit_client.submit(
936
+ submission_obj=task.submission,
937
+ completed_queue=COMPLETE_QUEUE_NAME,
938
+ )
927
939
 
928
940
  self.timeout_queue.push(int(now(_max_time)), task.submission.scan_key)
929
941
  self.log.info(f"[{task.ingest_id} :: {task.sha256}] Submitted to dispatcher for analysis")
@@ -297,11 +297,14 @@ class APIClient(ClientBase):
297
297
  self.al_client.bundle.create(id, output=bundle_path, use_alert=use_alert)
298
298
 
299
299
  def load_bundle(self, bundle_path, min_classification, rescan_services, exist_ok=True, reclassification=None):
300
- self.al_client.bundle.import_bundle(bundle_path,
301
- min_classification=min_classification,
302
- rescan_services=rescan_services,
303
- exist_ok=exist_ok,
304
- reclassification=reclassification)
300
+ self.al_client.bundle.import_bundle(
301
+ bundle_path,
302
+ min_classification=min_classification,
303
+ rescan_services=rescan_services,
304
+ exist_ok=exist_ok,
305
+ reclassification=reclassification,
306
+ to_ingest=True, # send submissions to ingester
307
+ )
305
308
 
306
309
  def load_json(self, file_path, reclassification=None):
307
310
  from assemblyline_client import ClientError
@@ -412,11 +415,14 @@ class DirectClient(ClientBase):
412
415
  os.rename(temp_bundle_file, bundle_path)
413
416
 
414
417
  def load_bundle(self, bundle_path, min_classification, rescan_services, exist_ok=True, reclassification=None):
415
- import_bundle(bundle_path,
416
- min_classification=min_classification,
417
- rescan_services=rescan_services,
418
- exist_ok=exist_ok,
419
- reclassification=reclassification)
418
+ import_bundle(
419
+ bundle_path,
420
+ min_classification=min_classification,
421
+ rescan_services=rescan_services,
422
+ exist_ok=exist_ok,
423
+ reclassification=reclassification,
424
+ to_ingest=True, # send submissions to ingester
425
+ )
420
426
 
421
427
  def load_json(self, file_path, reclassification=None):
422
428
  # We're assuming all JSON that loaded has an "enabled" field
@@ -442,7 +448,6 @@ class DirectClient(ClientBase):
442
448
  else:
443
449
  raise
444
450
 
445
-
446
451
  if collection == "workflow":
447
452
  # If there has been any edits by another user, then preserve the enabled state
448
453
  # Otherwise, the workflow will be synchronized with the origin system
@@ -37,6 +37,8 @@ from assemblyline.odm.models.result import Result
37
37
  from assemblyline.odm.models.submission import File, Submission
38
38
  from assemblyline.odm.models.config import Config
39
39
  from assemblyline_core.dispatching.client import DispatchClient
40
+ from assemblyline_core.ingester.constants import INGEST_QUEUE_NAME
41
+ from assemblyline.remote.datatypes.queues.named import NamedQueue
40
42
 
41
43
  Classification = forge.get_classification()
42
44
  SECONDS_PER_DAY = 24 * 60 * 60
@@ -72,6 +74,7 @@ class SubmissionClient:
72
74
 
73
75
  # A client for interacting with the dispatcher
74
76
  self.dispatcher = DispatchClient(datastore, redis)
77
+ self.ingest_queue = NamedQueue(INGEST_QUEUE_NAME, redis)
75
78
 
76
79
  def __enter__(self):
77
80
  return self
@@ -84,8 +87,16 @@ class SubmissionClient:
84
87
  self.identify.stop()
85
88
 
86
89
  @elasticapm.capture_span(span_type='submission_client')
87
- def rescan(self, submission: Submission, results: Dict[str, Result], file_infos: Dict[str, FileInfo],
88
- file_tree, errors: List[str], rescan_services: List[str]):
90
+ def rescan(
91
+ self,
92
+ submission,
93
+ results: Dict[str, Result],
94
+ file_infos: Dict[str, FileInfo],
95
+ file_tree: dict,
96
+ errors: List[str],
97
+ rescan_services: List[str],
98
+ to_ingest: bool = False,
99
+ ):
89
100
  """
90
101
  Rescan a submission started on another system.
91
102
  """
@@ -114,8 +125,29 @@ class SubmissionClient:
114
125
  self.datastore.submission.save(submission_obj.sid, submission_obj)
115
126
 
116
127
  # Dispatch the submission
117
- self.log.debug("Submission complete. Dispatching: %s", submission_obj.sid)
118
- self.dispatcher.dispatch_bundle(submission_obj, results, file_infos, file_tree, errors)
128
+ if to_ingest:
129
+ self.log.debug("Submission complete. Submission sent to ingester: %s", submission_obj.sid)
130
+
131
+ submission_obj = SubmissionObject(
132
+ {
133
+ "sid": submission["sid"],
134
+ "files": submission.get("files", []),
135
+ "metadata": submission.get("metadata", {}),
136
+ "params": submission.get("params", {}),
137
+ "notification": submission.get("notification", {}),
138
+ "scan_key": submission.get("scan_key", None),
139
+ "errors": errors,
140
+ "file_infos": file_infos,
141
+ "file_tree": file_tree,
142
+ "results": results,
143
+ }
144
+ ).as_primitives()
145
+
146
+ self.ingest_queue.push(submission_obj)
147
+
148
+ else:
149
+ self.log.debug("Submission complete. Dispatching: %s", submission_obj.sid)
150
+ self.dispatcher.dispatch_bundle(submission_obj, results, file_infos, file_tree, errors)
119
151
 
120
152
  return submission
121
153
 
@@ -252,3 +284,22 @@ class SubmissionClient:
252
284
  if extracted_path:
253
285
  if os.path.exists(extracted_path):
254
286
  os.unlink(extracted_path)
287
+
288
+ @elasticapm.capture_span(span_type="submission_client")
289
+ def send_bundle_to_dispatch(
290
+ self,
291
+ submission_obj: SubmissionObject,
292
+ completed_queue: str = None,
293
+ ):
294
+
295
+ sid = submission_obj.sid
296
+ submission = self.datastore.submission.get(sid)
297
+
298
+ self.dispatcher.dispatch_bundle(
299
+ submission=submission,
300
+ results=submission_obj.results,
301
+ file_infos=submission_obj.file_infos,
302
+ file_tree=submission_obj.file_tree,
303
+ errors=submission_obj.errors,
304
+ completed_queue=completed_queue,
305
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: assemblyline-core
3
- Version: 4.6.1.dev228
3
+ Version: 4.6.1.dev230
4
4
  Summary: Assemblyline 4 - Core components
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-core/
6
6
  Author: CCCS Assemblyline development team
@@ -5,7 +5,7 @@ from unittest import mock
5
5
 
6
6
  import pytest
7
7
  from assemblyline_core.dispatching.client import DISPATCH_RESULT_QUEUE, DispatchClient
8
- from assemblyline_core.dispatching.dispatcher import Dispatcher, ServiceTask, Submission
8
+ from assemblyline_core.dispatching.dispatcher import Dispatcher, ServiceTask, Submission, SubmissionTask
9
9
  from assemblyline_core.dispatching.schedules import Scheduler as RealScheduler
10
10
 
11
11
  # noinspection PyUnresolvedReferences
@@ -18,8 +18,9 @@ from assemblyline.common.metrics import MetricsFactory
18
18
  from assemblyline.odm import models
19
19
  from assemblyline.odm.models.error import Error
20
20
  from assemblyline.odm.models.file import File
21
- from assemblyline.odm.models.result import Result
21
+ from assemblyline.odm.models.result import Result, File as ResponseFile
22
22
  from assemblyline.odm.models.user import User
23
+ from assemblyline.odm.models.submission import Submission as SubmissionModel
23
24
  from assemblyline.odm.randomizer import (
24
25
  get_random_hash,
25
26
  random_minimal_obj,
@@ -454,3 +455,148 @@ def test_prevent_result_overwrite(clean_redis, clean_datastore):
454
455
  msg_result_key = message['result_summary']['key']
455
456
 
456
457
  assert msg_result_key != result_key
458
+
459
+
460
+ def test_create_submission_task(datastore_connection, config, filestore, clean_redis):
461
+
462
+ # create a test submission
463
+ scheduler = Scheduler(datastore_connection, config, clean_redis)
464
+ submission = random_model_obj(SubmissionModel)
465
+ sid = get_random_hash(64)
466
+ submission.sid = sid
467
+
468
+ # create files and results with file tree:
469
+ # root_1
470
+ # |
471
+ # middle_1
472
+ # | |
473
+ # leaf_1 leaf_2
474
+
475
+ leaf_1_sha = get_random_hash(64)
476
+ leaf_2_sha = get_random_hash(64)
477
+ middle_1_sha = get_random_hash(64)
478
+ root_1_sha = get_random_hash(64)
479
+
480
+ file_shas = [root_1_sha, middle_1_sha, leaf_2_sha, leaf_1_sha]
481
+
482
+ files = [
483
+ {"name": ["root_1"], "size": 1, "sha256": root_1_sha},
484
+ {"name": ["middle_1"], "size": 1, "sha256": middle_1_sha},
485
+ {"name": ["leaf-2"], "size": 1, "sha256": leaf_2_sha},
486
+ {
487
+ "name": ["leaf-1"],
488
+ "size": 1,
489
+ "sha256": leaf_1_sha,
490
+ },
491
+ ]
492
+
493
+ submission.files = files
494
+
495
+ leaf_1 = {
496
+ "name": ["leaf-1"],
497
+ "type": "test_type",
498
+ "sha256": leaf_1_sha,
499
+ "children": {},
500
+ "truncated": False,
501
+ "score": 0,
502
+ }
503
+
504
+ leaf_2 = {
505
+ "name": ["leaf-2"],
506
+ "type": "test_type",
507
+ "sha256": leaf_2_sha,
508
+ "children": {},
509
+ "truncated": False,
510
+ "score": 0,
511
+ }
512
+
513
+ middle_1 = {
514
+ "name": ["middle-1"],
515
+ "type": "test_type",
516
+ "sha256": middle_1_sha,
517
+ "children": {leaf_1_sha: leaf_1, leaf_2_sha: leaf_2},
518
+ "truncated": False,
519
+ "score": 0,
520
+ }
521
+
522
+ root_1 = {
523
+ root_1_sha: {
524
+ "name": ["root-1"],
525
+ "type": "test_type",
526
+ "sha256": root_1_sha,
527
+ "children": {middle_1_sha: middle_1},
528
+ "truncated": False,
529
+ "score": 0,
530
+ }
531
+ }
532
+
533
+ file_infos = {}
534
+ errors = []
535
+ for file_sha in file_shas:
536
+ file_infos[file_sha] = random_model_obj(File).as_primitives()
537
+ errors.append(f"{file_sha}.serviceName.v0_0_0.c0.e0")
538
+
539
+ submission.errors = errors
540
+
541
+ results = {}
542
+
543
+ result_root_1_key = f"{root_1_sha}.serviceName.v0_0_0.c0"
544
+ results[result_root_1_key] = random_model_obj(Result).as_primitives()
545
+ results[result_root_1_key]["sha256"] = root_1_sha
546
+ result_file_middle = random_model_obj(ResponseFile)
547
+ result_file_middle.sha256 = middle_1_sha
548
+ results[result_root_1_key]["response"]["extracted"] = [result_file_middle]
549
+
550
+ result_middle_1_key = f"{middle_1_sha}.serviceName.v0_0_0.c0"
551
+ results[result_middle_1_key] = random_model_obj(Result).as_primitives()
552
+ results[result_middle_1_key]["sha256"] = middle_1_sha
553
+ result_file_leaf1 = random_model_obj(ResponseFile)
554
+ result_file_leaf1.sha256 = leaf_1_sha
555
+ result_file_leaf2 = random_model_obj(ResponseFile)
556
+ result_file_leaf2.sha256 = leaf_2_sha
557
+
558
+ results[result_middle_1_key]["response"]["extracted"] = [result_file_leaf1, result_file_leaf2]
559
+
560
+ result_leaf_1_key = f"{leaf_1_sha}.serviceName.v0_0_0.c0"
561
+ results[result_leaf_1_key] = random_model_obj(Result).as_primitives()
562
+ results[result_leaf_1_key]["sha256"] = leaf_1_sha
563
+ results[result_leaf_1_key]["response"]["extracted"] = []
564
+
565
+ result_leaf_2_key = f"{leaf_2_sha}.serviceName.v0_0_0.c0"
566
+ results[result_leaf_2_key] = random_model_obj(Result).as_primitives()
567
+ results[result_leaf_2_key]["sha256"] = leaf_2_sha
568
+ results[result_leaf_2_key]["response"]["extracted"] = []
569
+
570
+ submission.results = results.keys()
571
+
572
+ # Create a message from submission queue
573
+ submission_message = {
574
+ "submission": submission.as_primitives(),
575
+ "results": results,
576
+ "file_infos": file_infos,
577
+ "file_tree": root_1,
578
+ "errors": errors,
579
+ "completed_queue": "test",
580
+ }
581
+
582
+ task = SubmissionTask(
583
+ scheduler=scheduler,
584
+ datastore=datastore_connection,
585
+ config=config,
586
+ **submission_message,
587
+ )
588
+
589
+ assert task.sid == sid
590
+
591
+ parent_map = {middle_1_sha: {root_1_sha}, leaf_1_sha: {middle_1_sha}, leaf_2_sha: {middle_1_sha}}
592
+
593
+ assert task._parent_map.keys() == parent_map.keys()
594
+ for key, val in parent_map.items():
595
+ assert val == task._parent_map[key]
596
+
597
+ file_depth = {root_1_sha: 0, middle_1_sha: 1, leaf_1_sha: 2, leaf_2_sha: 2}
598
+ assert task.file_depth.keys() == file_depth.keys()
599
+ for key, val in file_depth.items():
600
+ assert val == task.file_depth[key]
601
+
602
+ assert task.temporary_data.keys() == file_depth.keys()
@@ -12,6 +12,7 @@ import threading
12
12
  import logging
13
13
  from tempfile import NamedTemporaryFile
14
14
  from typing import TYPE_CHECKING, Any
15
+ from assemblyline_core.submission_client import SubmissionClient
15
16
 
16
17
  import pytest
17
18
 
@@ -29,6 +30,7 @@ from assemblyline.odm.models.user import User
29
30
  from assemblyline.odm.randomizer import random_model_obj
30
31
  from assemblyline.odm.messages.submission import Submission as SubmissionInput
31
32
  from assemblyline.remote.datatypes.queues.named import NamedQueue
33
+ from assemblyline.odm.random_data import create_submission
32
34
 
33
35
  import assemblyline_core
34
36
  from assemblyline_core.plumber.run_plumber import Plumber
@@ -84,7 +86,6 @@ class MockService(ServerBase):
84
86
  self.log.info(f"{self.service_name} has received a job {task.sid}")
85
87
 
86
88
  file = self.filestore.get(task.fileinfo.sha256)
87
-
88
89
  instructions = json.loads(file)
89
90
  instructions = instructions.get(self.service_name, {})
90
91
  self.log.info(f"{self.service_name} following instruction: {instructions}")
@@ -1326,7 +1327,7 @@ def test_final_partial(core: CoreSession, metrics):
1326
1327
  name='abc123'
1327
1328
  )]
1328
1329
  )).as_primitives())
1329
-
1330
+
1330
1331
  # Wait until both of the services have started (so service b doesn't get the temp data a produces on its first run)
1331
1332
  start = time.time()
1332
1333
  while sum(s.hits.get(sha, 0) for s in core.services) != 2:
@@ -1342,7 +1343,7 @@ def test_final_partial(core: CoreSession, metrics):
1342
1343
  while sum(s.finish.get(sha, 0) for s in core_a) < 1:
1343
1344
  if time.time() - start > RESPONSE_TIMEOUT:
1344
1345
  pytest.fail()
1345
- time.sleep(0.01)
1346
+ time.sleep(0.01)
1346
1347
 
1347
1348
  # Let b finish, it should produce a partial result then rerun right away
1348
1349
  core_b.local_lock.set()
@@ -1453,3 +1454,48 @@ def test_complex_extracted(core: CoreSession, metrics):
1453
1454
  if result.partial:
1454
1455
  partial_results += 1
1455
1456
  assert partial_results == 0, 'partial_results'
1457
+
1458
+
1459
+ @pytest.mark.parametrize("to_ingest,submission_metadata", [(False, {}), (True, {}),(True, {"bundle.source": "test_source"})])
1460
+ def test_rescan_submission(core: CoreSession, metrics: MetricsCounter, to_ingest, submission_metadata):
1461
+ # when a submission is rescan, the submission information should be stored in the database
1462
+ # file_tree, results, errors are passed to ingestion queue first and then sent to dispatcher
1463
+
1464
+ # Create a new submission
1465
+ submission = create_submission(core.ds, core.filestore, metadata=submission_metadata)
1466
+
1467
+ file_hashes = [x[:64] for x in submission["results"]]
1468
+ file_hashes.extend([x[:64] for x in submission["errors"]])
1469
+ file_hashes.extend([f["sha256"] for f in submission["files"]])
1470
+ file_tree = core.ds.get_or_create_file_tree(submission, core.config.submission.max_extraction_depth)["tree"]
1471
+ file_infos = core.ds.file.multiget(list(set(file_hashes)), as_dictionary=True, as_obj=False)
1472
+ rescan_services = ["core-aaa"]
1473
+ result_ids = list(filter(lambda x: not x.endswith(".e"), submission.results))
1474
+ results = core.ds.result.multiget(result_ids, as_dictionary=True, as_obj=False)
1475
+
1476
+ with SubmissionClient(datastore=core.ds, filestore=core.filestore, config=core.config, identify=None) as sc:
1477
+ sc.ingest_queue = core.ingest_queue
1478
+ sc.rescan(
1479
+ submission.as_primitives(),
1480
+ results,
1481
+ file_infos,
1482
+ file_tree,
1483
+ submission["errors"],
1484
+ rescan_services,
1485
+ to_ingest=to_ingest,
1486
+ )
1487
+
1488
+ if to_ingest:
1489
+
1490
+ metrics.expect("ingester", "submissions_ingested", 1)
1491
+ metrics.expect("dispatcher", "submissions_completed", 1)
1492
+ metrics.expect("ingester", "submissions_completed", 1)
1493
+ if submission_metadata:
1494
+ metrics.expect("dispatcher", "files_completed", len(submission["files"]))
1495
+
1496
+ else:
1497
+ # when to_ingest is false, the submission should be route to dispatcher directly
1498
+ metrics.expect("dispatcher", "submissions_completed", 1)
1499
+ metrics.expect("dispatcher", "files_completed", 1)
1500
+ metrics.expect("ingester", "submissions_ingested", 0)
1501
+ metrics.expect("ingester", "submissions_completed", 0)
@@ -1 +0,0 @@
1
- 4.6.1.dev228