assemblyline-core 4.5.0.27__tar.gz → 4.5.0.29__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of assemblyline-core might be problematic. Click here for more details.

Files changed (88) hide show
  1. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/PKG-INFO +1 -1
  2. assemblyline-core-4.5.0.29/assemblyline_core/VERSION +1 -0
  3. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/badlist_client.py +2 -2
  4. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/ingester/ingester.py +27 -1
  5. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/safelist_client.py +2 -2
  6. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/scaler/controllers/kubernetes_ctl.py +4 -2
  7. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/scaler/scaler_server.py +13 -7
  8. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/tasking_client.py +9 -4
  9. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/updater/run_updater.py +6 -3
  10. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core.egg-info/PKG-INFO +1 -1
  11. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/test/test_worker_ingest.py +53 -0
  12. assemblyline-core-4.5.0.27/assemblyline_core/VERSION +0 -1
  13. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/LICENCE.md +0 -0
  14. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/README.md +0 -0
  15. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/__init__.py +0 -0
  16. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/alerter/__init__.py +0 -0
  17. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/alerter/processing.py +0 -0
  18. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/alerter/run_alerter.py +0 -0
  19. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/archiver/__init__.py +0 -0
  20. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/archiver/run_archiver.py +0 -0
  21. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/dispatching/__init__.py +0 -0
  22. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/dispatching/__main__.py +0 -0
  23. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/dispatching/client.py +0 -0
  24. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/dispatching/dispatcher.py +0 -0
  25. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/dispatching/schedules.py +0 -0
  26. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/dispatching/timeout.py +0 -0
  27. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/expiry/__init__.py +0 -0
  28. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/expiry/run_expiry.py +0 -0
  29. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/ingester/__init__.py +0 -0
  30. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/ingester/__main__.py +0 -0
  31. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/ingester/constants.py +0 -0
  32. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/metrics/__init__.py +0 -0
  33. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/metrics/es_metrics.py +0 -0
  34. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/metrics/heartbeat_formatter.py +0 -0
  35. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/metrics/helper.py +0 -0
  36. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/metrics/metrics_server.py +0 -0
  37. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/metrics/run_heartbeat_manager.py +0 -0
  38. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/metrics/run_metrics_aggregator.py +0 -0
  39. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/metrics/run_statistics_aggregator.py +0 -0
  40. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/plumber/__init__.py +0 -0
  41. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/plumber/run_plumber.py +0 -0
  42. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/replay/__init__.py +0 -0
  43. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/replay/client.py +0 -0
  44. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/replay/creator/__init__.py +0 -0
  45. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/replay/creator/run.py +0 -0
  46. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/replay/creator/run_worker.py +0 -0
  47. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/replay/loader/__init__.py +0 -0
  48. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/replay/loader/run.py +0 -0
  49. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/replay/loader/run_worker.py +0 -0
  50. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/replay/replay.py +0 -0
  51. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/scaler/__init__.py +0 -0
  52. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/scaler/collection.py +0 -0
  53. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/scaler/controllers/__init__.py +0 -0
  54. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/scaler/controllers/docker_ctl.py +0 -0
  55. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/scaler/controllers/interface.py +0 -0
  56. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/scaler/run_scaler.py +0 -0
  57. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/server_base.py +0 -0
  58. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/signature_client.py +0 -0
  59. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/submission_client.py +0 -0
  60. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/updater/__init__.py +0 -0
  61. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/updater/helper.py +0 -0
  62. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/vacuum/__init__.py +0 -0
  63. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/vacuum/crawler.py +0 -0
  64. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/vacuum/department_map.py +0 -0
  65. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/vacuum/safelist.py +0 -0
  66. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/vacuum/stream_map.py +0 -0
  67. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/vacuum/worker.py +0 -0
  68. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/workflow/__init__.py +0 -0
  69. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core/workflow/run_workflow.py +0 -0
  70. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core.egg-info/SOURCES.txt +0 -0
  71. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core.egg-info/dependency_links.txt +0 -0
  72. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core.egg-info/requires.txt +0 -0
  73. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/assemblyline_core.egg-info/top_level.txt +0 -0
  74. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/setup.cfg +0 -0
  75. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/setup.py +0 -0
  76. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/test/test_alerter.py +0 -0
  77. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/test/test_badlist_client.py +0 -0
  78. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/test/test_dispatcher.py +0 -0
  79. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/test/test_expiry.py +0 -0
  80. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/test/test_plumber.py +0 -0
  81. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/test/test_replay.py +0 -0
  82. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/test/test_safelist_client.py +0 -0
  83. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/test/test_scaler.py +0 -0
  84. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/test/test_scheduler.py +0 -0
  85. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/test/test_signature_client.py +0 -0
  86. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/test/test_simulation.py +0 -0
  87. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/test/test_vacuum.py +0 -0
  88. {assemblyline-core-4.5.0.27 → assemblyline-core-4.5.0.29}/test/test_worker_submit.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: assemblyline-core
3
- Version: 4.5.0.27
3
+ Version: 4.5.0.29
4
4
  Summary: Assemblyline 4 - Core components
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-core/
6
6
  Author: CCCS Assemblyline development team
@@ -0,0 +1 @@
1
+ 4.5.0.29
@@ -52,9 +52,9 @@ class BadlistClient:
52
52
  data.setdefault('file', {})
53
53
 
54
54
  # Ensure expiry_ts is set on tag-related items
55
- dtl = data.pop('dtl', None) or self.config.core.expiry.badlisted_tag_dtl * 24 * 3600
55
+ dtl = data.pop('dtl', None) or self.config.core.expiry.badlisted_tag_dtl
56
56
  if dtl:
57
- data['expiry_ts'] = now_as_iso(dtl)
57
+ data['expiry_ts'] = now_as_iso(dtl * 24 * 3600)
58
58
 
59
59
  # Set last updated
60
60
  data['added'] = data['updated'] = now_as_iso()
@@ -595,7 +595,6 @@ class Ingester(ThreadedCoreBase):
595
595
  task.params.submitter) if g in str(task.params.classification)]
596
596
 
597
597
  # Check if this file is already being processed
598
- self.stamp_filescore_key(task)
599
598
  pprevious, previous, score = None, None, None
600
599
  if not param.ignore_cache:
601
600
  pprevious, previous, score, _ = self.check(task, count_miss=False)
@@ -627,6 +626,33 @@ class Ingester(ThreadedCoreBase):
627
626
  # (So we don't end up dropping the resubmission).
628
627
  if previous:
629
628
  self.counter.increment('duplicates')
629
+
630
+ if self.config.core.ingester.always_create_submission:
631
+ # Create a submission record based on the cache hit
632
+ submission = self.datastore.submission.get(previous, as_obj=False)
633
+
634
+ # Assign the current submission as the PSID for the new submission
635
+ pprevious = previous
636
+ previous = task.ingest_id
637
+ task.params.psid = pprevious
638
+
639
+ submission.update({
640
+ 'archived': False,
641
+ 'archive_ts': None,
642
+ 'classification': task.params.classification,
643
+ 'expiry_ts':now_as_iso(task.params.ttl * 24 * 60 * 60),
644
+ 'from_archive': False,
645
+ 'metadata': task.submission.metadata,
646
+ 'params': task.params.as_primitives(),
647
+ 'sid': previous,
648
+ 'to_be_deleted': False,
649
+ 'times': {
650
+ 'submitted': task.ingest_time,
651
+ 'completed': "NOW"
652
+ },
653
+ })
654
+ self.datastore.submission.save(previous, submission)
655
+
630
656
  self.finalize(pprevious, previous, score, task, cache=True)
631
657
 
632
658
  # On cache hits of any kind we want to send out a completed message
@@ -69,9 +69,9 @@ class SafelistClient:
69
69
  data.setdefault('file', {})
70
70
 
71
71
  # Ensure expiry_ts is set on tag-related items
72
- dtl = data.pop('dtl', None) or self.config.core.expiry.safelisted_tag_dtl * 24 * 3600
72
+ dtl = data.pop('dtl', None) or self.config.core.expiry.safelisted_tag_dtl
73
73
  if dtl:
74
- data['expiry_ts'] = now_as_iso(dtl)
74
+ data['expiry_ts'] = now_as_iso(dtl * 24 * 3600)
75
75
 
76
76
  # Set last updated
77
77
  data['added'] = data['updated'] = now_as_iso()
@@ -26,7 +26,7 @@ from kubernetes.client import V1Deployment, V1DeploymentSpec, V1PodTemplateSpec,
26
26
  V1PersistentVolumeClaimSpec, V1NetworkPolicy, V1NetworkPolicySpec, V1NetworkPolicyEgressRule, V1NetworkPolicyPeer, \
27
27
  V1NetworkPolicyIngressRule, V1Secret, V1SecretVolumeSource, V1LocalObjectReference, V1Service, \
28
28
  V1ServiceSpec, V1ServicePort, V1PodSecurityContext, V1Probe, V1ExecAction, V1SecurityContext, \
29
- V1Affinity, V1NodeAffinity, V1NodeSelector, V1NodeSelectorTerm, V1NodeSelectorRequirement
29
+ V1Affinity, V1NodeAffinity, V1NodeSelector, V1NodeSelectorTerm, V1NodeSelectorRequirement, V1Toleration
30
30
  from kubernetes.client.rest import ApiException
31
31
  from assemblyline.odm.models.service import DependencyConfig, DockerConfig, PersistentVolume
32
32
 
@@ -241,7 +241,7 @@ def parse_cpu(string: str) -> float:
241
241
  class KubernetesController(ControllerInterface):
242
242
  def __init__(self, logger, namespace: str, prefix: str, priority: str, dependency_priority: str,
243
243
  cpu_reservation: float, linux_node_selector: Selector, labels=None, log_level="INFO", core_env={},
244
- default_service_account=None, cluster_pod_list=True):
244
+ default_service_account=None, cluster_pod_list=True, default_service_tolerations = []):
245
245
  # Try loading a kubernetes connection from either the fact that we are running
246
246
  # inside of a cluster, or have a config file that tells us how
247
247
  try:
@@ -285,6 +285,7 @@ class KubernetesController(ControllerInterface):
285
285
  self._service_limited_env: dict[str, dict[str, str]] = defaultdict(dict)
286
286
  self.default_service_account: Optional[str] = default_service_account
287
287
  self.cluster_pod_list = cluster_pod_list
288
+ self.default_service_tolerations = [V1Toleration(**toleration.as_primitives()) for toleration in default_service_tolerations]
288
289
 
289
290
  # A record of previously reported events so that we don't report the same message repeatedly, fill it with
290
291
  # existing messages so we don't have a huge dump of duplicates on restart
@@ -849,6 +850,7 @@ class KubernetesController(ControllerInterface):
849
850
  security_context=V1PodSecurityContext(fs_group=1000),
850
851
  service_account_name=service_account,
851
852
  affinity=selector_to_node_affinity(self.linux_node_selector),
853
+ tolerations=self.default_service_tolerations
852
854
  )
853
855
 
854
856
  if use_pull_secret:
@@ -17,6 +17,7 @@ import copy
17
17
  from contextlib import contextmanager
18
18
 
19
19
  import elasticapm
20
+ import json
20
21
  import yaml
21
22
 
22
23
  from assemblyline.remote.datatypes.queues.named import NamedQueue
@@ -285,11 +286,13 @@ class ScalerServer(ThreadedCoreBase):
285
286
  'privilege': 'service'
286
287
  }
287
288
 
289
+ service_defaults_config = self.config.core.scaler.service_defaults
290
+
288
291
  # If Scaler has envs that set service-server env, then that should override configured values
289
292
  if SERVICE_API_HOST:
290
- self.config.core.scaler.service_defaults.environment = \
293
+ service_defaults_config.environment = \
291
294
  [EnvironmentVariable(dict(name="SERVICE_API_HOST", value=SERVICE_API_HOST))] + \
292
- [env for env in self.config.core.scaler.service_defaults.environment if env.name != "SERVICE_API_HOST"]
295
+ [env for env in service_defaults_config.environment if env.name != "SERVICE_API_HOST"]
293
296
 
294
297
  if self.config.core.scaler.additional_labels:
295
298
  labels.update({k: v for k, v in (_l.split("=") for _l in self.config.core.scaler.additional_labels)})
@@ -304,7 +307,9 @@ class ScalerServer(ThreadedCoreBase):
304
307
  log_level=self.config.logging.log_level,
305
308
  core_env=core_env,
306
309
  cluster_pod_list=self.config.core.scaler.cluster_pod_list,
307
- default_service_account=self.config.services.service_account)
310
+ default_service_account=self.config.services.service_account,
311
+ default_service_tolerations=service_defaults_config.tolerations
312
+ )
308
313
 
309
314
  # Add global configuration for privileged services
310
315
  self.controller.add_config_mount(KUBERNETES_AL_CONFIG, config_map=KUBERNETES_AL_CONFIG, key="config",
@@ -313,7 +318,7 @@ class ScalerServer(ThreadedCoreBase):
313
318
  # If we're passed an override for server-server and it's defining an HTTPS connection, then add a global
314
319
  # mount for the Root CA that needs to be mounted
315
320
  if INTERNAL_ENCRYPT:
316
- self.config.core.scaler.service_defaults.mounts.append(Mount(dict(
321
+ service_defaults_config.mounts.append(Mount(dict(
317
322
  name="root-ca",
318
323
  path="/etc/assemblyline/ssl/al_root-ca.crt",
319
324
  resource_type="secret",
@@ -322,7 +327,7 @@ class ScalerServer(ThreadedCoreBase):
322
327
  )))
323
328
 
324
329
  # Add default mounts for (non-)privileged services
325
- for mount in self.config.core.scaler.service_defaults.mounts:
330
+ for mount in service_defaults_config.mounts:
326
331
  # Deprecated configuration for mounting ConfigMap
327
332
  # TODO: Deprecate code on next major change
328
333
  if mount.config_map:
@@ -356,7 +361,8 @@ class ScalerServer(ThreadedCoreBase):
356
361
  self.controller.core_mounts.append((DOCKER_CONFIGURATION_VOLUME, '/etc/assemblyline/'))
357
362
 
358
363
  with open(os.path.join(DOCKER_CONFIGURATION_PATH, 'config.yml'), 'w') as handle:
359
- yaml.dump(self.config.as_primitives(), handle)
364
+ # Convert to JSON before converting to YAML to account for direct ODM representation errors
365
+ yaml.dump(json.loads(self.config.json()), handle)
360
366
 
361
367
  with open(os.path.join(DOCKER_CONFIGURATION_PATH, 'classification.yml'), 'w') as handle:
362
368
  yaml.dump(get_classification().original_definition, handle)
@@ -365,7 +371,7 @@ class ScalerServer(ThreadedCoreBase):
365
371
  if CLASSIFICATION_HOST_PATH:
366
372
  self.controller.global_mounts.append((CLASSIFICATION_HOST_PATH, '/etc/assemblyline/classification.yml'))
367
373
 
368
- for mount in self.config.core.scaler.service_defaults.mounts:
374
+ for mount in service_defaults_config.mounts:
369
375
  # Mounts are all storage-based since there's no equivalent to ConfigMaps in Docker
370
376
  if mount.privileged_only:
371
377
  self.controller.core_mounts.append((mount.name, mount.path))
@@ -91,7 +91,7 @@ class TaskingClient:
91
91
  self.event_listener.stop()
92
92
 
93
93
  @elasticapm.capture_span(span_type='tasking_client')
94
- def upload_file(self, file_path, classification, ttl, is_section_image, expected_sha256=None):
94
+ def upload_file(self, file_path, classification, ttl, is_section_image, is_supplementary, expected_sha256=None):
95
95
  # Identify the file info of the uploaded file
96
96
  file_info = self.identify.fileinfo(file_path)
97
97
 
@@ -105,8 +105,12 @@ class TaskingClient:
105
105
  file_info['expiry_ts'] = None
106
106
 
107
107
  # Update the datastore with the uploaded file
108
- self.datastore.save_or_freshen_file(file_info['sha256'], file_info, file_info['expiry_ts'],
109
- file_info['classification'], is_section_image=is_section_image)
108
+ self.datastore.save_or_freshen_file(
109
+ file_info['sha256'],
110
+ file_info, file_info['expiry_ts'],
111
+ file_info['classification'],
112
+ is_section_image=is_section_image,
113
+ is_supplementary=is_supplementary)
110
114
 
111
115
  # Upload file to the filestore (upload already checks if the file exists)
112
116
  self.filestore.upload(file_path, file_info['sha256'])
@@ -349,7 +353,8 @@ class TaskingClient:
349
353
  file_info['classification'] = item['classification']
350
354
  self.datastore.save_or_freshen_file(item['sha256'], file_info,
351
355
  file_info['expiry_ts'], file_info['classification'],
352
- is_section_image=item.get('is_section_image', False))
356
+ is_section_image=item.get('is_section_image', False),
357
+ is_supplementary=item.get('is_supplementary', False))
353
358
  return False
354
359
 
355
360
  if task.ttl:
@@ -15,7 +15,7 @@ import docker
15
15
 
16
16
  from kubernetes.client import V1Job, V1ObjectMeta, V1JobSpec, V1PodTemplateSpec, V1PodSpec, V1Volume, \
17
17
  V1VolumeMount, V1EnvVar, V1Container, V1ResourceRequirements, \
18
- V1ConfigMapVolumeSource, V1Secret, V1SecretVolumeSource, V1LocalObjectReference
18
+ V1ConfigMapVolumeSource, V1Secret, V1SecretVolumeSource, V1LocalObjectReference, V1Toleration
19
19
  from kubernetes import client, config
20
20
  from kubernetes.client.rest import ApiException
21
21
 
@@ -148,7 +148,7 @@ class DockerUpdateInterface:
148
148
 
149
149
  class KubernetesUpdateInterface:
150
150
  def __init__(self, logger, prefix, namespace, priority_class, extra_labels, linux_node_selector: Selector,
151
- log_level="INFO", default_service_account=None):
151
+ log_level="INFO", default_service_account=None, default_service_tolerations=[]):
152
152
  # Try loading a kubernetes connection from either the fact that we are running
153
153
  # inside of a cluster, or we have a configuration in the normal location
154
154
  try:
@@ -181,6 +181,8 @@ class KubernetesUpdateInterface:
181
181
  self.default_service_account = default_service_account
182
182
  self.secret_env = []
183
183
  self.linux_node_selector = linux_node_selector
184
+ self.default_service_tolerations = [V1Toleration(**toleration.as_primitives()) for toleration in default_service_tolerations]
185
+
184
186
 
185
187
  # Get the deployment of this process. Use that information to fill out the secret info
186
188
  deployment = self.apps_api.read_namespaced_deployment(name='updater', namespace=self.namespace)
@@ -465,7 +467,8 @@ class ServiceUpdater(ThreadedCoreBase):
465
467
  extra_labels=extra_labels,
466
468
  log_level=self.config.logging.log_level,
467
469
  default_service_account=self.config.services.service_account,
468
- linux_node_selector=self.config.core.scaler.linux_node_selector)
470
+ linux_node_selector=self.config.core.scaler.linux_node_selector,
471
+ default_service_tolerations=self.config.core.scaler.service_defaults.tolerations)
469
472
  # Add all additional mounts to privileged services
470
473
  self.mounts = self.config.core.scaler.service_defaults.mounts
471
474
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: assemblyline-core
3
- Version: 4.5.0.27
3
+ Version: 4.5.0.29
4
4
  Summary: Assemblyline 4 - Core components
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-core/
6
6
  Author: CCCS Assemblyline development team
@@ -193,3 +193,56 @@ def test_ingest_size_error(ingest_harness):
193
193
  queue = ingester.notification_queues[queue_name]
194
194
  message = queue.pop()
195
195
  assert message is not None
196
+
197
+ def test_ingest_always_create_submission(ingest_harness):
198
+ datastore, ingester, in_queue = ingest_harness
199
+
200
+ # Simulate configuration where we'll always create a submission
201
+ ingester.config.core.ingester.always_create_submission = True
202
+ get_if_exists = datastore.filescore.get_if_exists
203
+ try:
204
+ # Add a valid file score for all files
205
+ from assemblyline.odm.models.filescore import FileScore
206
+ from assemblyline.odm.models.submission import Submission
207
+ datastore.filescore.get_if_exists = mock.MagicMock(
208
+ return_value=FileScore(dict(psid='000', expiry_ts=0, errors=0, score=10, sid='001', time=time.time()))
209
+ )
210
+ # Create a submission for cache hit
211
+ old_sub = random_minimal_obj(Submission)
212
+ old_sub.sid = '001'
213
+ old_sub.params.psid = '000'
214
+ old_sub = old_sub.as_primitives()
215
+ datastore.submission.save('001', old_sub)
216
+
217
+ # Ingest a file
218
+ submission_msg = make_message(message={'sid': '002', 'metadata': {'blah': 'blah'}})
219
+ submission_msg['sid'] = '002'
220
+ in_queue.push(submission_msg)
221
+ ingester.handle_ingest()
222
+
223
+ # No file has made it into the internal buffer => cache hit and drop
224
+ datastore.filescore.get_if_exists.assert_called_once()
225
+ ingester.counter.increment.assert_any_call('cache_hit')
226
+ ingester.counter.increment.assert_any_call('duplicates')
227
+ assert ingester.unique_queue.length() == 0
228
+ assert ingester.ingest_queue.length() == 0
229
+
230
+ # Check to see if new submission was created
231
+ new_sub = datastore.submission.get_if_exists('002', as_obj=False)
232
+ assert new_sub and new_sub['params']['psid'] == old_sub['sid']
233
+
234
+ # Check to see if certain properties are same (anything relating to analysis)
235
+ assert all([old_sub.get(attr) == new_sub.get(attr) \
236
+ for attr in ['error_count', 'errors', 'file_count', 'files', 'max_score', 'results', 'state', 'verdict']])
237
+
238
+ # Check to see if certain properties are different
239
+ # (anything that isn't related to analysis but can be set at submission time)
240
+ assert all([old_sub.get(attr) != new_sub.get(attr) \
241
+ for attr in ['expiry_ts', 'metadata', 'params', 'times']])
242
+
243
+ # Check to see if certain properties have been nullified
244
+ # (properties that are set outside of submission)
245
+ assert not all([new_sub.get(attr) \
246
+ for attr in ['archived', 'archive_ts', 'to_be_deleted', 'from_archive']])
247
+ finally:
248
+ datastore.filescore.get_if_exists = get_if_exists
@@ -1 +0,0 @@
1
- 4.5.0.27