assemblyline-core 4.5.1.dev493__tar.gz → 4.5.1.dev497__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/PKG-INFO +1 -1
  2. assemblyline_core-4.5.1.dev497/assemblyline_core/VERSION +1 -0
  3. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/dispatching/dispatcher.py +9 -5
  4. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/ingester/ingester.py +32 -16
  5. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core.egg-info/PKG-INFO +1 -1
  6. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/test/test_simulation.py +106 -5
  7. assemblyline_core-4.5.1.dev493/assemblyline_core/VERSION +0 -1
  8. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/LICENCE.md +0 -0
  9. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/README.md +0 -0
  10. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/__init__.py +0 -0
  11. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/alerter/__init__.py +0 -0
  12. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/alerter/processing.py +0 -0
  13. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/alerter/run_alerter.py +0 -0
  14. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/archiver/__init__.py +0 -0
  15. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/archiver/run_archiver.py +0 -0
  16. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/badlist_client.py +0 -0
  17. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/dispatching/__init__.py +0 -0
  18. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/dispatching/__main__.py +0 -0
  19. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/dispatching/client.py +0 -0
  20. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/dispatching/schedules.py +0 -0
  21. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/dispatching/timeout.py +0 -0
  22. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/expiry/__init__.py +0 -0
  23. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/expiry/run_expiry.py +0 -0
  24. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/ingester/__init__.py +0 -0
  25. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/ingester/__main__.py +0 -0
  26. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/ingester/constants.py +0 -0
  27. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/metrics/__init__.py +0 -0
  28. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/metrics/es_metrics.py +0 -0
  29. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/metrics/heartbeat_formatter.py +0 -0
  30. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/metrics/helper.py +0 -0
  31. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/metrics/metrics_server.py +0 -0
  32. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/metrics/run_heartbeat_manager.py +0 -0
  33. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/metrics/run_metrics_aggregator.py +0 -0
  34. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/metrics/run_statistics_aggregator.py +0 -0
  35. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/plumber/__init__.py +0 -0
  36. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/plumber/run_plumber.py +0 -0
  37. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/replay/__init__.py +0 -0
  38. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/replay/client.py +0 -0
  39. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/replay/creator/__init__.py +0 -0
  40. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/replay/creator/run.py +0 -0
  41. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/replay/creator/run_worker.py +0 -0
  42. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/replay/loader/__init__.py +0 -0
  43. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/replay/loader/run.py +0 -0
  44. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/replay/loader/run_worker.py +0 -0
  45. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/replay/replay.py +0 -0
  46. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/safelist_client.py +0 -0
  47. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/scaler/__init__.py +0 -0
  48. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/scaler/collection.py +0 -0
  49. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/scaler/controllers/__init__.py +0 -0
  50. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/scaler/controllers/docker_ctl.py +0 -0
  51. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/scaler/controllers/interface.py +0 -0
  52. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/scaler/controllers/kubernetes_ctl.py +0 -0
  53. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/scaler/run_scaler.py +0 -0
  54. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/scaler/scaler_server.py +0 -0
  55. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/server_base.py +0 -0
  56. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/signature_client.py +0 -0
  57. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/submission_client.py +0 -0
  58. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/tasking_client.py +0 -0
  59. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/updater/__init__.py +0 -0
  60. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/updater/helper.py +0 -0
  61. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/updater/run_updater.py +0 -0
  62. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/vacuum/__init__.py +0 -0
  63. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/vacuum/crawler.py +0 -0
  64. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/vacuum/department_map.py +0 -0
  65. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/vacuum/safelist.py +0 -0
  66. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/vacuum/stream_map.py +0 -0
  67. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/vacuum/worker.py +0 -0
  68. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/workflow/__init__.py +0 -0
  69. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core/workflow/run_workflow.py +0 -0
  70. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core.egg-info/SOURCES.txt +0 -0
  71. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core.egg-info/dependency_links.txt +0 -0
  72. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core.egg-info/requires.txt +0 -0
  73. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/assemblyline_core.egg-info/top_level.txt +0 -0
  74. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/setup.cfg +0 -0
  75. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/setup.py +0 -0
  76. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/test/test_alerter.py +0 -0
  77. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/test/test_badlist_client.py +0 -0
  78. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/test/test_dispatcher.py +0 -0
  79. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/test/test_expiry.py +0 -0
  80. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/test/test_plumber.py +0 -0
  81. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/test/test_replay.py +0 -0
  82. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/test/test_safelist_client.py +0 -0
  83. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/test/test_scaler.py +0 -0
  84. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/test/test_scheduler.py +0 -0
  85. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/test/test_signature_client.py +0 -0
  86. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/test/test_tasking_client.py +0 -0
  87. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/test/test_vacuum.py +0 -0
  88. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/test/test_worker_ingest.py +0 -0
  89. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/test/test_worker_submit.py +0 -0
  90. {assemblyline_core-4.5.1.dev493 → assemblyline_core-4.5.1.dev497}/test/test_workflow.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: assemblyline-core
3
- Version: 4.5.1.dev493
3
+ Version: 4.5.1.dev497
4
4
  Summary: Assemblyline 4 - Core components
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-core/
6
6
  Author: CCCS Assemblyline development team
@@ -0,0 +1 @@
1
+ 4.5.1.dev497
@@ -351,15 +351,17 @@ class SubmissionTask:
351
351
  values=values,
352
352
  )
353
353
 
354
- def partial_result(self, sha256, service_name):
354
+ def partial_result(self, sha256, service_name) -> bool:
355
355
  """Note that a partial result has been recieved. If a dispatch was requested process that now."""
356
356
  try:
357
357
  entry = self.monitoring[(sha256, service_name)]
358
358
  except KeyError:
359
- return
359
+ return False
360
360
 
361
361
  if entry.dispatch_needed:
362
362
  self.redispatch_service(sha256, service_name)
363
+ return True
364
+ return False
363
365
 
364
366
  def clear_monitoring_entry(self, sha256, service_name):
365
367
  """A service has completed normally. If the service is monitoring clear out the record."""
@@ -1426,9 +1428,11 @@ class Dispatcher(ThreadedCoreBase):
1426
1428
  self.clear_timeout(task, sha256, service_name)
1427
1429
  task.service_logs.pop((sha256, service_name), None)
1428
1430
 
1431
+ force_redispatch = set()
1429
1432
  if summary.partial:
1430
1433
  self.log.info("[%s/%s] %s returned partial results", sid, sha256, service_name)
1431
- task.partial_result(sha256, service_name)
1434
+ if task.partial_result(sha256, service_name):
1435
+ force_redispatch.add(sha256)
1432
1436
  else:
1433
1437
  task.clear_monitoring_entry(sha256, service_name)
1434
1438
 
@@ -1475,11 +1479,11 @@ class Dispatcher(ThreadedCoreBase):
1475
1479
  summary.children = [(c, 'EXTRACTED') for c in old_children]
1476
1480
 
1477
1481
  # Record the result as a summary
1478
- task.service_results[(sha256, service_name)] = summary
1482
+ if not force_redispatch:
1483
+ task.service_results[(sha256, service_name)] = summary
1479
1484
  task.register_children(sha256, [c for c, _ in summary.children])
1480
1485
 
1481
1486
  # Update the temporary data table for this file
1482
- force_redispatch = set()
1483
1487
  for key, value in (temporary_data or {}).items():
1484
1488
  if len(str(value)) <= self.config.submission.max_temp_data_length:
1485
1489
  if task.temporary_data[sha256].set_value(key, value):
@@ -18,34 +18,37 @@ from typing import Any, Iterable, List, Optional, Tuple
18
18
 
19
19
  import elasticapm
20
20
 
21
- from assemblyline.common.postprocess import ActionWorker
22
- from assemblyline_core.server_base import ThreadedCoreBase
21
+ from assemblyline import odm
22
+ from assemblyline.common import exceptions, forge, isotime
23
23
  from assemblyline.common.constants import DROP_PRIORITY
24
- from assemblyline.common.metrics import MetricsFactory
25
- from assemblyline.common.str_utils import dotdump, safe_str
26
24
  from assemblyline.common.exceptions import get_stacktrace_info
27
- from assemblyline.common.isotime import now, now_as_iso
28
25
  from assemblyline.common.importing import load_module_by_path
29
- from assemblyline.common import forge, exceptions, isotime
26
+ from assemblyline.common.isotime import now, now_as_iso
27
+ from assemblyline.common.metrics import MetricsFactory
28
+ from assemblyline.common.postprocess import ActionWorker
29
+ from assemblyline.common.str_utils import dotdump, safe_str
30
30
  from assemblyline.datastore.exceptions import DataStoreException
31
31
  from assemblyline.filestore import CorruptedFileStoreException, FileStoreException
32
+ from assemblyline.odm.messages.ingest_heartbeat import Metrics
33
+ from assemblyline.odm.messages.submission import Submission as MessageSubmission
34
+ from assemblyline.odm.messages.submission import SubmissionMessage
35
+ from assemblyline.odm.models.alert import EXTENDED_SCAN_VALUES
32
36
  from assemblyline.odm.models.filescore import FileScore
37
+ from assemblyline.odm.models.submission import Submission as DatabaseSubmission
38
+ from assemblyline.odm.models.submission import SubmissionParams
33
39
  from assemblyline.odm.models.user import User
34
- from assemblyline.odm.messages.ingest_heartbeat import Metrics
35
- from assemblyline.remote.datatypes.queues.named import NamedQueue
36
- from assemblyline.remote.datatypes.queues.priority import PriorityQueue
40
+ from assemblyline.remote.datatypes.events import EventWatcher
41
+ from assemblyline.remote.datatypes.hash import Hash
37
42
  from assemblyline.remote.datatypes.queues.comms import CommsQueue
38
43
  from assemblyline.remote.datatypes.queues.multi import MultiQueue
39
- from assemblyline.remote.datatypes.hash import Hash
44
+ from assemblyline.remote.datatypes.queues.named import NamedQueue
45
+ from assemblyline.remote.datatypes.queues.priority import PriorityQueue
40
46
  from assemblyline.remote.datatypes.user_quota_tracker import UserQuotaTracker
41
- from assemblyline import odm
42
- from assemblyline.odm.models.submission import SubmissionParams, Submission as DatabaseSubmission
43
- from assemblyline.odm.models.alert import EXTENDED_SCAN_VALUES
44
- from assemblyline.odm.messages.submission import Submission as MessageSubmission, SubmissionMessage
45
-
46
47
  from assemblyline_core.dispatching.dispatcher import Dispatcher
48
+ from assemblyline_core.server_base import ThreadedCoreBase
47
49
  from assemblyline_core.submission_client import SubmissionClient
48
- from .constants import INGEST_QUEUE_NAME, drop_chance, COMPLETE_QUEUE_NAME
50
+
51
+ from .constants import COMPLETE_QUEUE_NAME, INGEST_QUEUE_NAME, drop_chance
49
52
 
50
53
  _dup_prefix = 'w-m-'
51
54
  _notification_queue_prefix = 'nq-'
@@ -185,6 +188,11 @@ class Ingester(ThreadedCoreBase):
185
188
  self.async_submission_tracker = UserQuotaTracker('async_submissions', timeout=24 * 60 * 60, # 1 day timeout
186
189
  redis=self.redis_persist)
187
190
 
191
+ # Watchers
192
+ self.submission_delete_watcher = EventWatcher(self.redis)
193
+ self.submission_delete_watcher.register("delete.submission", self.handle_submission_delete)
194
+ self.submission_delete_watcher.start()
195
+
188
196
  if self.config.core.metrics.apm_server.server_url is not None:
189
197
  self.log.info(f"Exporting application metrics to: {self.config.core.metrics.apm_server.server_url}")
190
198
  elasticapm.instrument()
@@ -399,6 +407,14 @@ class Ingester(ThreadedCoreBase):
399
407
  if self.apm_client:
400
408
  self.apm_client.end_transaction('ingest_submit', 'exception')
401
409
 
410
+ def handle_submission_delete(self, sid: Optional[str]):
411
+ if not sid:
412
+ return
413
+
414
+ # Upon submission deletion, ensure to cleanup the local cache of filescores relative to the SID
415
+ with self.cache_lock:
416
+ self.cache = {k: v for k, v in self.cache.items() if v.sid != sid}
417
+
402
418
  def handle_complete(self):
403
419
  while self.running:
404
420
  result = self.complete_queue.pop(timeout=3)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: assemblyline-core
3
- Version: 4.5.1.dev493
3
+ Version: 4.5.1.dev497
4
4
  Summary: Assemblyline 4 - Core components
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-core/
6
6
  Author: CCCS Assemblyline development team
@@ -72,6 +72,9 @@ class MockService(ServerBase):
72
72
  self.dispatch_client = DispatchClient(self.datastore, redis)
73
73
  self.hits = dict()
74
74
  self.drops = dict()
75
+ self.finish = dict()
76
+ self.local_lock = threading.Event()
77
+ self.local_lock.set()
75
78
 
76
79
  def try_run(self):
77
80
  while self.running:
@@ -97,6 +100,9 @@ class MockService(ServerBase):
97
100
  if instructions.get('lock', False):
98
101
  _global_semaphore.acquire(blocking=True, timeout=instructions['lock'])
99
102
 
103
+ if instructions.get('local_lock', False):
104
+ self.local_lock.wait(instructions['local_lock'])
105
+
100
106
  if 'drop' in instructions:
101
107
  if instructions['drop'] >= hits:
102
108
  self.drops[task.fileinfo.sha256] = self.drops.get(task.fileinfo.sha256, 0) + 1
@@ -124,8 +130,10 @@ class MockService(ServerBase):
124
130
  partial = True
125
131
  break
126
132
 
127
- if partial:
128
- print(self.service_name, "will produce partial results")
133
+ if partial:
134
+ print(self.service_name, "will produce partial results")
135
+ else:
136
+ print(self.service_name, "will produce complete results")
129
137
 
130
138
  result_data = {
131
139
  'archive_ts': None,
@@ -151,15 +159,17 @@ class MockService(ServerBase):
151
159
  result_key = result.build_key(get_random_id())
152
160
  self.dispatch_client.service_finished(task.sid, result_key, result,
153
161
  temporary_data=instructions.get('temporary_data'))
162
+ self.finish[task.fileinfo.sha256] = self.finish.get(task.fileinfo.sha256, 0) + 1
154
163
 
155
164
 
156
165
  class CoreSession:
157
- def __init__(self, config, ingest):
166
+ def __init__(self, config, ingest, services):
158
167
  self.ds: typing.Optional[AssemblylineDatastore] = None
159
168
  self.filestore = None
160
169
  self.redis = None
161
170
  self.config: Config = config
162
171
  self.ingest: Ingester = ingest
172
+ self.services: list[MockService] = services
163
173
  self.dispatcher: Dispatcher
164
174
 
165
175
  @property
@@ -253,7 +263,7 @@ def core(request, redis, filestore, config, clean_datastore: AssemblylineDatasto
253
263
  service_config: list[tuple[str, int, str, dict]] = [
254
264
  ('pre', 1, 'EXTRACT', {'extra_data': True, 'monitored_keys': ['passwords']}),
255
265
  ('core-a', 2, 'CORE', {}),
256
- ('core-b', 1, 'CORE', {}),
266
+ ('core-b', 1, 'CORE', {'extra_data': True, 'monitored_keys': ['passwords']}),
257
267
  ('finish', 1, 'POST', {'extra_data': True})
258
268
  ]
259
269
 
@@ -282,7 +292,7 @@ def core(request, redis, filestore, config, clean_datastore: AssemblylineDatasto
282
292
 
283
293
  ingester = Ingester(datastore=ds, redis=redis, persistent_redis=redis, config=config)
284
294
 
285
- fields = CoreSession(config, ingester)
295
+ fields = CoreSession(config, ingester, services)
286
296
  fields.redis = redis
287
297
  fields.ds = ds
288
298
 
@@ -1275,6 +1285,97 @@ def test_temp_data_monitoring(core: CoreSession, metrics):
1275
1285
  assert partial_results == 0, 'partial_results'
1276
1286
 
1277
1287
 
1288
+ def test_final_partial(core: CoreSession, metrics):
1289
+ # This test was written to cover an error where a partial result produced as the final
1290
+ # result of a submission would not trigger dispatching when it should due to data
1291
+ # that was produced while it was running.
1292
+
1293
+ # Both services run at the same time, but one requires info from the other.
1294
+ # We lock down the timing of the service completion so that:
1295
+ # a) both run at the same time so that in its first run core-b does not have the
1296
+ # temp data it wants and produces a partial result.
1297
+ # b) core-a finishes before core-b adding the temporary data to the dispatcher
1298
+ # c) core-b finishes and should trigger a rerun with the data to produce a full result
1299
+ sha, size = ready_body(core, {
1300
+ 'core-a': {'local_lock': 10, 'temporary_data': {'passwords': ['test_temp_data_monitoring']}},
1301
+ 'core-b': {'local_lock': 10, 'partial': {'passwords': 'test_temp_data_monitoring'}},
1302
+ })
1303
+
1304
+ core_a = [s for s in core.services if s.service_name == 'core-a']
1305
+ core_b = [s for s in core.services if s.service_name == 'core-b'][0]
1306
+
1307
+ for service in core_a + [core_b]:
1308
+ service.local_lock.clear()
1309
+
1310
+ core.ingest_queue.push(SubmissionInput(dict(
1311
+ metadata={},
1312
+ params=dict(
1313
+ description="file abc123",
1314
+ services=dict(selected=['core-a', 'core-b']),
1315
+ submitter='user',
1316
+ groups=['user'],
1317
+ max_extracted=10000
1318
+ ),
1319
+ notification=dict(
1320
+ queue='temp-final-partial',
1321
+ threshold=0
1322
+ ),
1323
+ files=[dict(
1324
+ sha256=sha,
1325
+ size=size,
1326
+ name='abc123'
1327
+ )]
1328
+ )).as_primitives())
1329
+
1330
+ # Wait until both of the services have started (so service b doesn't get the temp data a produces on its first run)
1331
+ start = time.time()
1332
+ while sum(s.hits.get(sha, 0) for s in core.services) != 2:
1333
+ if time.time() - start > RESPONSE_TIMEOUT:
1334
+ pytest.fail()
1335
+ time.sleep(0.01)
1336
+
1337
+ # Release a
1338
+ for service in core_a:
1339
+ service.local_lock.set()
1340
+
1341
+ # Let a finish so that the temporary data is added in the dispatcher
1342
+ while sum(s.finish.get(sha, 0) for s in core_a) < 1:
1343
+ if time.time() - start > RESPONSE_TIMEOUT:
1344
+ pytest.fail()
1345
+ time.sleep(0.01)
1346
+
1347
+ # Let b finish, it should produce a partial result then rerun right away
1348
+ core_b.local_lock.set()
1349
+
1350
+ notification_queue = NamedQueue('nq-temp-final-partial', core.redis)
1351
+ dropped_task = notification_queue.pop(timeout=RESPONSE_TIMEOUT)
1352
+ assert dropped_task
1353
+ dropped_task = IngestTask(dropped_task)
1354
+ sub: Submission = core.ds.submission.get(dropped_task.submission.sid)
1355
+
1356
+ # The submission should produce no errors and two results
1357
+ assert len(sub.errors) == 0
1358
+ assert len(sub.results) == 2, sub.results
1359
+
1360
+ # b service should have run twice to produce the results
1361
+ assert core_b.hits[sha] >= 2, f'core_b.hits {core_b.hits[sha]}'
1362
+
1363
+ # Wait until we get feedback from the metrics channel
1364
+ metrics.expect('ingester', 'submissions_ingested', 1)
1365
+ metrics.expect('ingester', 'submissions_completed', 1)
1366
+ metrics.expect('dispatcher', 'submissions_completed', 1)
1367
+ metrics.expect('dispatcher', 'files_completed', 1)
1368
+
1369
+ # Verify thath there are no partial results in the final submission
1370
+ partial_results = 0
1371
+ for res in sub.results:
1372
+ result = core.ds.get_single_result(res, as_obj=True)
1373
+ assert result is not None, res
1374
+ if result.partial:
1375
+ partial_results += 1
1376
+ assert partial_results == 0, 'partial_results'
1377
+
1378
+
1278
1379
  def test_complex_extracted(core: CoreSession, metrics):
1279
1380
  # stages to this processing when everything goes well
1280
1381
  # 1. extract a file that will process to produce a partial result
@@ -1 +0,0 @@
1
- 4.5.1.dev493