assemblyline-core 4.6.0.16__tar.gz → 4.6.0.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of assemblyline-core might be problematic. Click here for more details.

Files changed (91) hide show
  1. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/PKG-INFO +1 -1
  2. assemblyline_core-4.6.0.18/assemblyline_core/VERSION +1 -0
  3. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/dispatching/dispatcher.py +78 -38
  4. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/ingester/ingester.py +17 -5
  5. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/replay/client.py +16 -11
  6. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/scaler/controllers/kubernetes_ctl.py +24 -7
  7. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/scaler/scaler_server.py +17 -7
  8. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/submission_client.py +55 -4
  9. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/tasking_client.py +24 -2
  10. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/updater/helper.py +7 -9
  11. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core.egg-info/PKG-INFO +1 -1
  12. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/test/test_dispatcher.py +148 -2
  13. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/test/test_simulation.py +49 -3
  14. assemblyline_core-4.6.0.18/test/test_tasking_client.py +77 -0
  15. assemblyline_core-4.6.0.16/assemblyline_core/VERSION +0 -1
  16. assemblyline_core-4.6.0.16/test/test_tasking_client.py +0 -37
  17. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/LICENCE.md +0 -0
  18. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/README.md +0 -0
  19. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/__init__.py +0 -0
  20. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/alerter/__init__.py +0 -0
  21. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/alerter/processing.py +0 -0
  22. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/alerter/run_alerter.py +0 -0
  23. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/archiver/__init__.py +0 -0
  24. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/archiver/run_archiver.py +0 -0
  25. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/badlist_client.py +0 -0
  26. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/dispatching/__init__.py +0 -0
  27. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/dispatching/__main__.py +0 -0
  28. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/dispatching/client.py +0 -0
  29. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/dispatching/schedules.py +0 -0
  30. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/dispatching/timeout.py +0 -0
  31. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/expiry/__init__.py +0 -0
  32. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/expiry/run_expiry.py +0 -0
  33. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/ingester/__init__.py +0 -0
  34. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/ingester/__main__.py +0 -0
  35. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/ingester/constants.py +0 -0
  36. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/metrics/__init__.py +0 -0
  37. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/metrics/es_metrics.py +0 -0
  38. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/metrics/heartbeat_formatter.py +0 -0
  39. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/metrics/helper.py +0 -0
  40. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/metrics/metrics_server.py +0 -0
  41. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/metrics/run_heartbeat_manager.py +0 -0
  42. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/metrics/run_metrics_aggregator.py +0 -0
  43. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/metrics/run_statistics_aggregator.py +0 -0
  44. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/plumber/__init__.py +0 -0
  45. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/plumber/run_plumber.py +0 -0
  46. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/replay/__init__.py +0 -0
  47. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/replay/creator/__init__.py +0 -0
  48. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/replay/creator/run.py +0 -0
  49. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/replay/creator/run_worker.py +0 -0
  50. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/replay/loader/__init__.py +0 -0
  51. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/replay/loader/run.py +0 -0
  52. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/replay/loader/run_worker.py +0 -0
  53. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/replay/replay.py +0 -0
  54. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/safelist_client.py +0 -0
  55. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/scaler/__init__.py +0 -0
  56. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/scaler/collection.py +0 -0
  57. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/scaler/controllers/__init__.py +0 -0
  58. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/scaler/controllers/docker_ctl.py +0 -0
  59. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/scaler/controllers/interface.py +0 -0
  60. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/scaler/run_scaler.py +0 -0
  61. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/server_base.py +0 -0
  62. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/signature_client.py +0 -0
  63. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/updater/__init__.py +0 -0
  64. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/updater/run_updater.py +0 -0
  65. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/vacuum/__init__.py +0 -0
  66. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/vacuum/crawler.py +0 -0
  67. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/vacuum/department_map.py +0 -0
  68. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/vacuum/safelist.py +0 -0
  69. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/vacuum/stream_map.py +0 -0
  70. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/vacuum/worker.py +0 -0
  71. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/workflow/__init__.py +0 -0
  72. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core/workflow/run_workflow.py +0 -0
  73. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core.egg-info/SOURCES.txt +0 -0
  74. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core.egg-info/dependency_links.txt +0 -0
  75. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core.egg-info/requires.txt +0 -0
  76. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/assemblyline_core.egg-info/top_level.txt +0 -0
  77. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/setup.cfg +0 -0
  78. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/setup.py +0 -0
  79. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/test/test_alerter.py +0 -0
  80. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/test/test_badlist_client.py +0 -0
  81. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/test/test_expiry.py +0 -0
  82. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/test/test_plumber.py +0 -0
  83. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/test/test_replay.py +0 -0
  84. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/test/test_safelist_client.py +0 -0
  85. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/test/test_scaler.py +0 -0
  86. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/test/test_scheduler.py +0 -0
  87. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/test/test_signature_client.py +0 -0
  88. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/test/test_vacuum.py +0 -0
  89. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/test/test_worker_ingest.py +0 -0
  90. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/test/test_worker_submit.py +0 -0
  91. {assemblyline_core-4.6.0.16 → assemblyline_core-4.6.0.18}/test/test_workflow.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: assemblyline-core
3
- Version: 4.6.0.16
3
+ Version: 4.6.0.18
4
4
  Summary: Assemblyline 4 - Core components
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-core/
6
6
  Author: CCCS Assemblyline development team
@@ -0,0 +1 @@
1
+ 4.6.0.18
@@ -73,6 +73,7 @@ if TYPE_CHECKING:
73
73
  from redis import Redis
74
74
 
75
75
  from assemblyline.odm.models.file import File
76
+ from assemblyline.odm.models.config import Config
76
77
 
77
78
 
78
79
  APM_SPAN_TYPE = 'handle_message'
@@ -218,8 +219,18 @@ class TemporaryFileData:
218
219
  class SubmissionTask:
219
220
  """Dispatcher internal model for submissions"""
220
221
 
221
- def __init__(self, submission, completed_queue, scheduler, datastore: AssemblylineDatastore, results=None,
222
- file_infos=None, file_tree=None, errors: Optional[Iterable[str]] = None):
222
+ def __init__(
223
+ self,
224
+ submission,
225
+ completed_queue,
226
+ scheduler,
227
+ datastore: AssemblylineDatastore,
228
+ config: Config,
229
+ results=None,
230
+ file_infos=None,
231
+ file_tree=None,
232
+ errors: Optional[Iterable[str]] = None,
233
+ ):
223
234
  self.submission: Submission = Submission(submission)
224
235
  submitter: Optional[User] = datastore.user.get_if_exists(self.submission.params.submitter)
225
236
  self.service_access_control: Optional[str] = None
@@ -227,6 +238,7 @@ class SubmissionTask:
227
238
  self.service_access_control = submitter.classification.value
228
239
 
229
240
  self.completed_queue = None
241
+
230
242
  if completed_queue:
231
243
  self.completed_queue = str(completed_queue)
232
244
 
@@ -265,9 +277,31 @@ class SubmissionTask:
265
277
  recurse_tree(file_data['children'], depth + 1)
266
278
 
267
279
  recurse_tree(file_tree, 0)
280
+ sorted_file_depth = [(k, v) for k, v in sorted(self.file_depth.items(), key=lambda fd: fd[1])]
281
+ else:
282
+ sorted_file_depth = [(self.submission.files[0].sha256, 0)]
283
+
284
+ for sha256, depth in sorted_file_depth:
285
+ # populate temporary data to root level files
286
+ if depth == 0:
287
+ # Apply initial data parameter
288
+ temp_key_config = dict(config.submission.default_temporary_keys)
289
+ temp_key_config.update(config.submission.temporary_keys)
290
+ temporary_data = TemporaryFileData(sha256, config=temp_key_config)
291
+ self.temporary_data[sha256] = temporary_data
292
+ if self.submission.params.initial_data:
293
+ try:
294
+ for key, value in dict(json.loads(self.submission.params.initial_data)).items():
295
+ if len(str(value)) > config.submission.max_temp_data_length:
296
+ continue
297
+ temporary_data.set_value(key, value)
298
+
299
+ except (ValueError, TypeError):
300
+ pass
268
301
 
269
302
  if results is not None:
270
303
  rescan = scheduler.expand_categories(self.submission.params.services.rescan)
304
+ result_keys = list(results.keys())
271
305
 
272
306
  # Replay the process of routing files for dispatcher internal state.
273
307
  for k, result in results.items():
@@ -282,24 +316,35 @@ class SubmissionTask:
282
316
  self.forbid_for_children(sha256, service_name)
283
317
 
284
318
  # Replay the process of receiving results for dispatcher internal state
285
- for k, result in results.items():
286
- sha256, service, _ = k.split('.', 2)
287
- if service not in rescan:
288
- extracted = result['response']['extracted']
289
- children: list[str] = [r['sha256'] for r in extracted]
290
- self.register_children(sha256, children)
291
- children_detail: list[tuple[str, str]] = [(r['sha256'], r['parent_relation']) for r in extracted]
292
- self.service_results[(sha256, service)] = ResultSummary(
293
- key=k, drop=result['drop_file'], score=result['result']['score'],
294
- children=children_detail, partial=result.get('partial', False))
295
-
296
- tags = Result(result).scored_tag_dict()
297
- for key, tag in tags.items():
298
- if key in self.file_tags[sha256].keys():
299
- # Sum score of already known tags
300
- self.file_tags[sha256][key]['score'] += tag['score']
301
- else:
302
- self.file_tags[sha256][key] = tag
319
+ # iterate through result based on file depth
320
+ for sha256, depth in sorted_file_depth:
321
+ results_to_process = list(filter(lambda k: sha256 in k, result_keys))
322
+ for result_key in results_to_process:
323
+ result = results[result_key]
324
+ sha256, service, _ = result_key.split(".", 2)
325
+
326
+ if service not in rescan:
327
+ extracted = result["response"]["extracted"]
328
+ children: list[str] = [r["sha256"] for r in extracted]
329
+ self.register_children(sha256, children)
330
+ children_detail: list[tuple[str, str]] = [
331
+ (r["sha256"], r["parent_relation"]) for r in extracted
332
+ ]
333
+ self.service_results[(sha256, service)] = ResultSummary(
334
+ key=result_key,
335
+ drop=result["drop_file"],
336
+ score=result["result"]["score"],
337
+ children=children_detail,
338
+ partial=result.get("partial", False),
339
+ )
340
+
341
+ tags = Result(result).scored_tag_dict()
342
+ for key, tag in tags.items():
343
+ if key in self.file_tags[sha256].keys():
344
+ # Sum score of already known tags
345
+ self.file_tags[sha256][key]["score"] += tag["score"]
346
+ else:
347
+ self.file_tags[sha256][key] = tag
303
348
 
304
349
  if errors is not None:
305
350
  for e in errors:
@@ -334,6 +379,7 @@ class SubmissionTask:
334
379
  _parent_map is for dynamic recursion prevention
335
380
  temporary_data is for cascading the temp data to children
336
381
  """
382
+
337
383
  parent_temp = self.temporary_data[parent]
338
384
  for child in children:
339
385
  if child not in self.temporary_data:
@@ -706,7 +752,13 @@ class Dispatcher(ThreadedCoreBase):
706
752
  # Start of process dispatcher transaction
707
753
  with apm_span(self.apm_client, 'submission_message'):
708
754
  # This is probably a complete task
709
- task = SubmissionTask(scheduler=self.scheduler, datastore=self.datastore, **message)
755
+
756
+ task = SubmissionTask(
757
+ scheduler=self.scheduler,
758
+ datastore=self.datastore,
759
+ config=self.config,
760
+ **message,
761
+ )
710
762
 
711
763
  # Check the sid table
712
764
  if task.sid in self.bad_sids:
@@ -739,6 +791,7 @@ class Dispatcher(ThreadedCoreBase):
739
791
 
740
792
  if not self.active_submissions.exists(sid):
741
793
  self.log.info("[%s] New submission received", sid)
794
+
742
795
  task.trace('submission_start')
743
796
  self.active_submissions.add(sid, {
744
797
  'completed_queue': task.completed_queue,
@@ -760,21 +813,6 @@ class Dispatcher(ThreadedCoreBase):
760
813
  if submission.params.quota_item and submission.params.submitter:
761
814
  self.log.info(f"[{sid}] Submission counts towards {submission.params.submitter.upper()} quota")
762
815
 
763
- # Apply initial data parameter
764
- temp_key_config = dict(self.config.submission.default_temporary_keys)
765
- temp_key_config.update(self.config.submission.temporary_keys)
766
- temporary_data = TemporaryFileData(sha256, config=temp_key_config)
767
- task.temporary_data[sha256] = temporary_data
768
- if submission.params.initial_data:
769
- try:
770
- for key, value in dict(json.loads(submission.params.initial_data)).items():
771
- if len(str(value)) > self.config.submission.max_temp_data_length:
772
- continue
773
- temporary_data.set_value(key, value)
774
-
775
- except (ValueError, TypeError) as err:
776
- self.log.warning(f"[{sid}] could not process initialization data: {err}")
777
-
778
816
  self.tasks[sid] = task
779
817
  self._submission_timeouts.set(task.sid, SUBMISSION_TOTAL_TIMEOUT, None)
780
818
 
@@ -784,7 +822,10 @@ class Dispatcher(ThreadedCoreBase):
784
822
  # Initialize ancestry chain by identifying the root file
785
823
  file_info = self.get_fileinfo(task, sha256)
786
824
  file_type = file_info.type if file_info else 'NOT_FOUND'
787
- temporary_data.local_values['ancestry'] = [[dict(type=file_type, parent_relation="ROOT", sha256=sha256)]]
825
+
826
+ task.temporary_data[sha256].local_values["ancestry"] = [
827
+ [dict(type=file_type, parent_relation="ROOT", sha256=sha256)]
828
+ ]
788
829
 
789
830
  # Start the file dispatching
790
831
  task.active_files.add(sha256)
@@ -875,7 +916,6 @@ class Dispatcher(ThreadedCoreBase):
875
916
  schedule_summary = [list(stage.keys()) for stage in task.file_schedules[sha256]]
876
917
  task.trace('schedule_built', sha256=sha256, message=str(schedule_summary))
877
918
 
878
-
879
919
  file_info = task.file_info[sha256]
880
920
  schedule: list = list(task.file_schedules[sha256])
881
921
  deep_scan, ignore_filtering = submission.params.deep_scan, submission.params.ignore_filtering
@@ -112,6 +112,7 @@ class IngestTask(odm.Model):
112
112
  ingest_id = odm.UUID() # Ingestion Identifier
113
113
  ingest_time = odm.Date(default="NOW") # Time at which the file was ingested
114
114
  notify_time = odm.Optional(odm.Date()) # Time at which the user is notify the submission is finished
115
+ to_ingest = odm.Boolean(default=False)
115
116
 
116
117
 
117
118
  class Ingester(ThreadedCoreBase):
@@ -250,7 +251,13 @@ class Ingester(ThreadedCoreBase):
250
251
  submission=sub,
251
252
  ingest_id=sub.sid,
252
253
  ))
253
- task.submission.sid = None # Reset to new random uuid
254
+
255
+ # if this is a new task from imported bundle we want to use the same sid
256
+ # because all the submission information are stored in the datastore
257
+ # else create a new sid for this submission
258
+ if "bundle.source" not in task.submission.metadata:
259
+ task.submission.sid = None # Reset to new random uuid
260
+
254
261
  # Write all input to the traffic queue
255
262
  self.traffic_queue.publish(SubmissionMessage({
256
263
  'msg': sub,
@@ -920,10 +927,15 @@ class Ingester(ThreadedCoreBase):
920
927
  return reason
921
928
 
922
929
  def submit(self, task: IngestTask):
923
- self.submit_client.submit(
924
- submission_obj=task.submission,
925
- completed_queue=COMPLETE_QUEUE_NAME,
926
- )
930
+
931
+ if "bundle.source" in task.submission.metadata:
932
+ self.submit_client.send_bundle_to_dispatch(task.submission, completed_queue=COMPLETE_QUEUE_NAME)
933
+ else:
934
+
935
+ self.submit_client.submit(
936
+ submission_obj=task.submission,
937
+ completed_queue=COMPLETE_QUEUE_NAME,
938
+ )
927
939
 
928
940
  self.timeout_queue.push(int(now(_max_time)), task.submission.scan_key)
929
941
  self.log.info(f"[{task.ingest_id} :: {task.sha256}] Submitted to dispatcher for analysis")
@@ -297,11 +297,14 @@ class APIClient(ClientBase):
297
297
  self.al_client.bundle.create(id, output=bundle_path, use_alert=use_alert)
298
298
 
299
299
  def load_bundle(self, bundle_path, min_classification, rescan_services, exist_ok=True, reclassification=None):
300
- self.al_client.bundle.import_bundle(bundle_path,
301
- min_classification=min_classification,
302
- rescan_services=rescan_services,
303
- exist_ok=exist_ok,
304
- reclassification=reclassification)
300
+ self.al_client.bundle.import_bundle(
301
+ bundle_path,
302
+ min_classification=min_classification,
303
+ rescan_services=rescan_services,
304
+ exist_ok=exist_ok,
305
+ reclassification=reclassification,
306
+ to_ingest=True, # send submissions to ingester
307
+ )
305
308
 
306
309
  def load_json(self, file_path, reclassification=None):
307
310
  from assemblyline_client import ClientError
@@ -412,11 +415,14 @@ class DirectClient(ClientBase):
412
415
  os.rename(temp_bundle_file, bundle_path)
413
416
 
414
417
  def load_bundle(self, bundle_path, min_classification, rescan_services, exist_ok=True, reclassification=None):
415
- import_bundle(bundle_path,
416
- min_classification=min_classification,
417
- rescan_services=rescan_services,
418
- exist_ok=exist_ok,
419
- reclassification=reclassification)
418
+ import_bundle(
419
+ bundle_path,
420
+ min_classification=min_classification,
421
+ rescan_services=rescan_services,
422
+ exist_ok=exist_ok,
423
+ reclassification=reclassification,
424
+ to_ingest=True, # send submissions to ingester
425
+ )
420
426
 
421
427
  def load_json(self, file_path, reclassification=None):
422
428
  # We're assuming all JSON that loaded has an "enabled" field
@@ -442,7 +448,6 @@ class DirectClient(ClientBase):
442
448
  else:
443
449
  raise
444
450
 
445
-
446
451
  if collection == "workflow":
447
452
  # If there has been any edits by another user, then preserve the enabled state
448
453
  # Otherwise, the workflow will be synchronized with the origin system
@@ -14,6 +14,12 @@ from time import sleep
14
14
  from typing import List, Optional, Tuple
15
15
 
16
16
  import urllib3
17
+ from assemblyline.odm.models.config import Selector
18
+ from assemblyline.odm.models.service import (
19
+ DependencyConfig,
20
+ DockerConfig,
21
+ PersistentVolume,
22
+ )
17
23
  from cryptography import x509
18
24
  from cryptography.hazmat.primitives import hashes, serialization
19
25
  from cryptography.hazmat.primitives.asymmetric import rsa
@@ -22,6 +28,7 @@ from kubernetes import client, config, watch
22
28
  from kubernetes.client import (
23
29
  V1Affinity,
24
30
  V1Capabilities,
31
+ V1ConfigMap,
25
32
  V1ConfigMapVolumeSource,
26
33
  V1Container,
27
34
  V1Deployment,
@@ -62,12 +69,6 @@ from kubernetes.client import (
62
69
  )
63
70
  from kubernetes.client.rest import ApiException
64
71
 
65
- from assemblyline.odm.models.config import Selector
66
- from assemblyline.odm.models.service import (
67
- DependencyConfig,
68
- DockerConfig,
69
- PersistentVolume,
70
- )
71
72
  from assemblyline_core.scaler.controllers.interface import ControllerInterface
72
73
 
73
74
  # RESERVE_MEMORY_PER_NODE = os.environ.get('RESERVE_MEMORY_PER_NODE')
@@ -390,7 +391,7 @@ class KubernetesController(ControllerInterface):
390
391
  def _dependency_name(self, service_name: str, container_name: str):
391
392
  return f"{self._deployment_name(service_name)}-{container_name}".lower()
392
393
 
393
- def add_config_mount(self, name: str, config_map: str, key: str, target_path: str, read_only=True, core=False):
394
+ def add_config_mount(self, name: str, config_map: str, key: Optional[str], target_path: str, read_only=True, core=False):
394
395
  volumes, mounts = self.volumes, self.mounts
395
396
  if core:
396
397
  volumes, mounts = self.core_volumes, self.core_mounts
@@ -1379,3 +1380,19 @@ class KubernetesController(ControllerInterface):
1379
1380
  for np in (existing_netpol - {np.metadata.name for np in network_policies}):
1380
1381
  self.net_api.delete_namespaced_network_policy(namespace=self.namespace, name=np,
1381
1382
  _request_timeout=API_TIMEOUT)
1383
+
1384
+ def update_config_map(self, data: dict, name: str):
1385
+ """Update or create a ConfigMap in Kubernetes."""
1386
+ config_map = V1ConfigMap(
1387
+ metadata=V1ObjectMeta(name=name, namespace=self.namespace),
1388
+ data=data
1389
+ )
1390
+ try:
1391
+ self.api.patch_namespaced_config_map(name=name, namespace=self.namespace, body=config_map,
1392
+ _request_timeout=API_TIMEOUT)
1393
+ except ApiException as error:
1394
+ if error.status == 404:
1395
+ self.api.create_namespaced_config_map(namespace=self.namespace, body=config_map,
1396
+ _request_timeout=API_TIMEOUT)
1397
+ else:
1398
+ raise
@@ -19,7 +19,6 @@ from typing import Any, Dict, Optional
19
19
 
20
20
  import elasticapm
21
21
  import yaml
22
-
23
22
  from assemblyline.common.constants import (
24
23
  SCALER_TIMEOUT_QUEUE,
25
24
  SERVICE_STATE_HASH,
@@ -44,14 +43,13 @@ from assemblyline.remote.datatypes.hash import ExpiringHash, Hash
44
43
  from assemblyline.remote.datatypes.queues.named import NamedQueue
45
44
  from assemblyline.remote.datatypes.queues.priority import PriorityQueue
46
45
  from assemblyline.remote.datatypes.queues.priority import length as pq_length
47
- from assemblyline_core.scaler.controllers import KubernetesController
46
+
47
+ from assemblyline_core.scaler import collection
48
+ from assemblyline_core.scaler.controllers import DockerController, KubernetesController
48
49
  from assemblyline_core.scaler.controllers.interface import ServiceControlError
49
50
  from assemblyline_core.server_base import ServiceStage, ThreadedCoreBase
50
51
  from assemblyline_core.updater.helper import get_registry_config
51
52
 
52
- from . import collection
53
- from .controllers import DockerController
54
-
55
53
  APM_SPAN_TYPE = 'scaler'
56
54
 
57
55
  # How often (in seconds) to download new service data, try to scale managed services,
@@ -298,6 +296,16 @@ class ScalerServer(ThreadedCoreBase):
298
296
  # be shared with privileged services.
299
297
  pass
300
298
 
299
+ # Create a configuration file specifically meant for privileged services to consume
300
+ # This should only contain the relevant information to connect to the databases
301
+ privileged_config = yaml.dump({
302
+ 'datastore': self.config.datastore.as_primitives(),
303
+ 'filestore': self.config.filestore.as_primitives(),
304
+ 'core': {
305
+ 'redis': self.config.core.redis.as_primitives()
306
+ }
307
+ })
308
+
301
309
  labels = {
302
310
  'app': 'assemblyline',
303
311
  'section': 'service',
@@ -340,7 +348,9 @@ class ScalerServer(ThreadedCoreBase):
340
348
  )
341
349
 
342
350
  # Add global configuration for privileged services
343
- self.controller.add_config_mount(KUBERNETES_AL_CONFIG, config_map=KUBERNETES_AL_CONFIG, key="config",
351
+ # Check if the ConfigMap already exists, if it does, update it
352
+ self.controller.update_config_map(data={'config': privileged_config}, name='privileged-service-config')
353
+ self.controller.add_config_mount(KUBERNETES_AL_CONFIG, config_map='privileged-service-config', key="config",
344
354
  target_path="/etc/assemblyline/config.yml", read_only=True, core=True)
345
355
 
346
356
  # If we're passed an override for server-server and it's defining an HTTPS connection, then add a global
@@ -382,7 +392,7 @@ class ScalerServer(ThreadedCoreBase):
382
392
 
383
393
  with open(os.path.join(DOCKER_CONFIGURATION_PATH, 'config.yml'), 'w') as handle:
384
394
  # Convert to JSON before converting to YAML to account for direct ODM representation errors
385
- yaml.dump(json.loads(self.config.json()), handle)
395
+ handle.write(privileged_config)
386
396
 
387
397
  with open(os.path.join(DOCKER_CONFIGURATION_PATH, 'classification.yml'), 'w') as handle:
388
398
  yaml.dump(get_classification().original_definition, handle)
@@ -37,6 +37,8 @@ from assemblyline.odm.models.result import Result
37
37
  from assemblyline.odm.models.submission import File, Submission
38
38
  from assemblyline.odm.models.config import Config
39
39
  from assemblyline_core.dispatching.client import DispatchClient
40
+ from assemblyline_core.ingester.constants import INGEST_QUEUE_NAME
41
+ from assemblyline.remote.datatypes.queues.named import NamedQueue
40
42
 
41
43
  Classification = forge.get_classification()
42
44
  SECONDS_PER_DAY = 24 * 60 * 60
@@ -72,6 +74,7 @@ class SubmissionClient:
72
74
 
73
75
  # A client for interacting with the dispatcher
74
76
  self.dispatcher = DispatchClient(datastore, redis)
77
+ self.ingest_queue = NamedQueue(INGEST_QUEUE_NAME, redis)
75
78
 
76
79
  def __enter__(self):
77
80
  return self
@@ -84,8 +87,16 @@ class SubmissionClient:
84
87
  self.identify.stop()
85
88
 
86
89
  @elasticapm.capture_span(span_type='submission_client')
87
- def rescan(self, submission: Submission, results: Dict[str, Result], file_infos: Dict[str, FileInfo],
88
- file_tree, errors: List[str], rescan_services: List[str]):
90
+ def rescan(
91
+ self,
92
+ submission,
93
+ results: Dict[str, Result],
94
+ file_infos: Dict[str, FileInfo],
95
+ file_tree: dict,
96
+ errors: List[str],
97
+ rescan_services: List[str],
98
+ to_ingest: bool = False,
99
+ ):
89
100
  """
90
101
  Rescan a submission started on another system.
91
102
  """
@@ -114,8 +125,29 @@ class SubmissionClient:
114
125
  self.datastore.submission.save(submission_obj.sid, submission_obj)
115
126
 
116
127
  # Dispatch the submission
117
- self.log.debug("Submission complete. Dispatching: %s", submission_obj.sid)
118
- self.dispatcher.dispatch_bundle(submission_obj, results, file_infos, file_tree, errors)
128
+ if to_ingest:
129
+ self.log.debug("Submission complete. Submission sent to ingester: %s", submission_obj.sid)
130
+
131
+ submission_obj = SubmissionObject(
132
+ {
133
+ "sid": submission["sid"],
134
+ "files": submission.get("files", []),
135
+ "metadata": submission.get("metadata", {}),
136
+ "params": submission.get("params", {}),
137
+ "notification": submission.get("notification", {}),
138
+ "scan_key": submission.get("scan_key", None),
139
+ "errors": errors,
140
+ "file_infos": file_infos,
141
+ "file_tree": file_tree,
142
+ "results": results,
143
+ }
144
+ ).as_primitives()
145
+
146
+ self.ingest_queue.push(submission_obj)
147
+
148
+ else:
149
+ self.log.debug("Submission complete. Dispatching: %s", submission_obj.sid)
150
+ self.dispatcher.dispatch_bundle(submission_obj, results, file_infos, file_tree, errors)
119
151
 
120
152
  return submission
121
153
 
@@ -252,3 +284,22 @@ class SubmissionClient:
252
284
  if extracted_path:
253
285
  if os.path.exists(extracted_path):
254
286
  os.unlink(extracted_path)
287
+
288
+ @elasticapm.capture_span(span_type="submission_client")
289
+ def send_bundle_to_dispatch(
290
+ self,
291
+ submission_obj: SubmissionObject,
292
+ completed_queue: str = None,
293
+ ):
294
+
295
+ sid = submission_obj.sid
296
+ submission = self.datastore.submission.get(sid)
297
+
298
+ self.dispatcher.dispatch_bundle(
299
+ submission=submission,
300
+ results=submission_obj.results,
301
+ file_infos=submission_obj.file_infos,
302
+ file_tree=submission_obj.file_tree,
303
+ errors=submission_obj.errors,
304
+ completed_queue=completed_queue,
305
+ )
@@ -3,7 +3,6 @@ import time
3
3
  from typing import Any, Dict, Optional
4
4
 
5
5
  import elasticapm
6
-
7
6
  from assemblyline.common import forge
8
7
  from assemblyline.common.constants import SERVICE_STATE_HASH, ServiceStatus
9
8
  from assemblyline.common.dict_utils import flatten, unflatten
@@ -22,6 +21,7 @@ from assemblyline.odm.models.service import Service
22
21
  from assemblyline.odm.models.tagging import Tagging
23
22
  from assemblyline.remote.datatypes.events import EventSender, EventWatcher
24
23
  from assemblyline.remote.datatypes.hash import ExpiringHash
24
+
25
25
  from assemblyline_core.dispatching.client import DispatchClient
26
26
 
27
27
 
@@ -161,9 +161,31 @@ class TaskingClient:
161
161
  self.datastore.service_delta.save(service.name, {'version': service.version})
162
162
  self.datastore.service_delta.commit()
163
163
  self.log.info(f"{log_prefix}{service.name} version ({service.version}) registered")
164
+ else:
165
+ # Check for any changes that should be merged into the service delta
166
+ service_delta = self.datastore.service_delta.get(service.name, as_obj=False)
167
+
168
+ # Check for any new configuration keys that should be added to the service delta
169
+ if service_delta.get('config'):
170
+ new_config = {k: v for k, v in service.config.items() if k not in service_delta['config']}
171
+ if new_config:
172
+ if 'config' not in service_delta:
173
+ service_delta['config'] = {}
174
+ service_delta['config'].update(new_config)
175
+
176
+ # Check for any new submission parameters that should be added to the service delta
177
+ if service_delta.get('submission_params'):
178
+ old_submission_params = {param['name'] for param in service_delta['submission_params']}
179
+ for param in service.submission_params:
180
+ if param['name'] not in old_submission_params:
181
+ # New parameter, add it to the old submission params
182
+ service_delta['submission_params'].append(param.as_primitives())
183
+
184
+ # Save any changes to the service delta
185
+ self.datastore.service_delta.save(service.name, service_delta)
164
186
 
165
187
  new_heuristics = []
166
-
188
+
167
189
  plan = self.datastore.heuristic.get_bulk_plan()
168
190
  for index, heuristic in enumerate(heuristics):
169
191
  heuristic_id = f'#{index}' # Set heuristic id to it's position in the list for logging purposes
@@ -1,23 +1,21 @@
1
- import os
2
- import requests
3
1
  import re
4
2
  import socket
5
3
  import string
6
4
  import time
7
-
8
- from assemblyline.common.version import FRAMEWORK_VERSION, SYSTEM_VERSION
9
- from assemblyline.odm.models.config import Config as SystemConfig, ServiceRegistry
10
- from assemblyline.odm.models.service import Service as ServiceConfig, DockerConfig
11
-
12
5
  from base64 import b64encode
13
6
  from collections import defaultdict
14
7
  from logging import Logger
15
8
  from typing import Dict, List
16
- from packaging.version import parse, Version
17
9
  from urllib.parse import urlencode
18
10
 
11
+ import requests
12
+ from assemblyline.common.version import FRAMEWORK_VERSION, SYSTEM_VERSION
13
+ from assemblyline.odm.models.config import Config as SystemConfig
14
+ from assemblyline.odm.models.config import ServiceRegistry
15
+ from assemblyline.odm.models.service import DockerConfig
16
+ from assemblyline.odm.models.service import Service as ServiceConfig
19
17
  from azure.identity import DefaultAzureCredential
20
-
18
+ from packaging.version import Version, parse
21
19
 
22
20
  DEFAULT_DOCKER_REGISTRY = "hub.docker.com"
23
21
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: assemblyline-core
3
- Version: 4.6.0.16
3
+ Version: 4.6.0.18
4
4
  Summary: Assemblyline 4 - Core components
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-core/
6
6
  Author: CCCS Assemblyline development team