assemblyline-core 4.7.0.dev1__tar.gz → 4.7.0.dev5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/PKG-INFO +1 -1
  2. assemblyline_core-4.7.0.dev5/assemblyline_core/VERSION +1 -0
  3. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/dispatching/client.py +2 -1
  4. assemblyline_core-4.7.0.dev5/assemblyline_core/dispatching/dispatcher.py +327 -0
  5. assemblyline_core-4.7.0.dev5/assemblyline_core/ingester/ingester.py +116 -0
  6. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/metrics/heartbeat_formatter.py +1 -1
  7. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core.egg-info/PKG-INFO +1 -1
  8. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core.egg-info/SOURCES.txt +0 -10
  9. assemblyline_core-4.7.0.dev1/assemblyline_core/VERSION +0 -1
  10. assemblyline_core-4.7.0.dev1/assemblyline_core/dispatching/__main__.py +0 -5
  11. assemblyline_core-4.7.0.dev1/assemblyline_core/dispatching/dispatcher.py +0 -2072
  12. assemblyline_core-4.7.0.dev1/assemblyline_core/dispatching/timeout.py +0 -59
  13. assemblyline_core-4.7.0.dev1/assemblyline_core/ingester/__main__.py +0 -5
  14. assemblyline_core-4.7.0.dev1/assemblyline_core/ingester/ingester.py +0 -979
  15. assemblyline_core-4.7.0.dev1/assemblyline_core/plumber/run_plumber.py +0 -332
  16. assemblyline_core-4.7.0.dev1/assemblyline_core/workflow/__init__.py +0 -0
  17. assemblyline_core-4.7.0.dev1/test/test_dispatcher.py +0 -602
  18. assemblyline_core-4.7.0.dev1/test/test_plumber.py +0 -162
  19. assemblyline_core-4.7.0.dev1/test/test_simulation.py +0 -1501
  20. assemblyline_core-4.7.0.dev1/test/test_worker_ingest.py +0 -248
  21. assemblyline_core-4.7.0.dev1/test/test_worker_submit.py +0 -137
  22. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/LICENCE.md +0 -0
  23. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/README.md +0 -0
  24. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/__init__.py +0 -0
  25. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/alerter/__init__.py +0 -0
  26. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/alerter/processing.py +0 -0
  27. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/alerter/run_alerter.py +0 -0
  28. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/archiver/__init__.py +0 -0
  29. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/archiver/run_archiver.py +0 -0
  30. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/badlist_client.py +0 -0
  31. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/dispatching/__init__.py +0 -0
  32. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/dispatching/schedules.py +0 -0
  33. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/expiry/__init__.py +0 -0
  34. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/expiry/run_expiry.py +0 -0
  35. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/ingester/__init__.py +0 -0
  36. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/ingester/constants.py +0 -0
  37. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/metrics/__init__.py +0 -0
  38. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/metrics/es_metrics.py +0 -0
  39. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/metrics/helper.py +0 -0
  40. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/metrics/metrics_server.py +0 -0
  41. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/metrics/run_heartbeat_manager.py +0 -0
  42. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/metrics/run_metrics_aggregator.py +0 -0
  43. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/metrics/run_statistics_aggregator.py +0 -0
  44. {assemblyline_core-4.7.0.dev1/assemblyline_core/plumber → assemblyline_core-4.7.0.dev5/assemblyline_core/replay}/__init__.py +0 -0
  45. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/replay/client.py +0 -0
  46. {assemblyline_core-4.7.0.dev1/assemblyline_core/replay → assemblyline_core-4.7.0.dev5/assemblyline_core/replay/creator}/__init__.py +0 -0
  47. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/replay/creator/run.py +0 -0
  48. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/replay/creator/run_worker.py +0 -0
  49. {assemblyline_core-4.7.0.dev1/assemblyline_core/replay/creator → assemblyline_core-4.7.0.dev5/assemblyline_core/replay/loader}/__init__.py +0 -0
  50. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/replay/loader/run.py +0 -0
  51. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/replay/loader/run_worker.py +0 -0
  52. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/replay/replay.py +0 -0
  53. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/safelist_client.py +0 -0
  54. {assemblyline_core-4.7.0.dev1/assemblyline_core/replay/loader → assemblyline_core-4.7.0.dev5/assemblyline_core/scaler}/__init__.py +0 -0
  55. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/scaler/collection.py +0 -0
  56. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/scaler/controllers/__init__.py +0 -0
  57. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/scaler/controllers/docker_ctl.py +0 -0
  58. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/scaler/controllers/interface.py +0 -0
  59. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/scaler/controllers/kubernetes_ctl.py +0 -0
  60. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/scaler/run_scaler.py +0 -0
  61. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/scaler/scaler_server.py +0 -0
  62. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/server_base.py +0 -0
  63. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/signature_client.py +0 -0
  64. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/submission_client.py +0 -0
  65. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/tasking_client.py +0 -0
  66. {assemblyline_core-4.7.0.dev1/assemblyline_core/scaler → assemblyline_core-4.7.0.dev5/assemblyline_core/updater}/__init__.py +0 -0
  67. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/updater/helper.py +0 -0
  68. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/updater/run_updater.py +0 -0
  69. {assemblyline_core-4.7.0.dev1/assemblyline_core/updater → assemblyline_core-4.7.0.dev5/assemblyline_core/vacuum}/__init__.py +0 -0
  70. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/vacuum/crawler.py +0 -0
  71. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/vacuum/department_map.py +0 -0
  72. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/vacuum/safelist.py +0 -0
  73. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/vacuum/stream_map.py +0 -0
  74. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/vacuum/worker.py +0 -0
  75. {assemblyline_core-4.7.0.dev1/assemblyline_core/vacuum → assemblyline_core-4.7.0.dev5/assemblyline_core/workflow}/__init__.py +0 -0
  76. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core/workflow/run_workflow.py +0 -0
  77. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core.egg-info/dependency_links.txt +0 -0
  78. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core.egg-info/requires.txt +0 -0
  79. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/assemblyline_core.egg-info/top_level.txt +0 -0
  80. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/setup.cfg +0 -0
  81. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/setup.py +0 -0
  82. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/test/test_alerter.py +0 -0
  83. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/test/test_badlist_client.py +0 -0
  84. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/test/test_expiry.py +0 -0
  85. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/test/test_replay.py +0 -0
  86. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/test/test_safelist_client.py +0 -0
  87. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/test/test_scaler.py +0 -0
  88. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/test/test_scheduler.py +0 -0
  89. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/test/test_signature_client.py +0 -0
  90. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/test/test_tasking_client.py +0 -0
  91. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/test/test_vacuum.py +0 -0
  92. {assemblyline_core-4.7.0.dev1 → assemblyline_core-4.7.0.dev5}/test/test_workflow.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: assemblyline-core
3
- Version: 4.7.0.dev1
3
+ Version: 4.7.0.dev5
4
4
  Summary: Assemblyline 4 - Core components
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-core/
6
6
  Author: CCCS Assemblyline development team
@@ -0,0 +1 @@
1
+ 4.7.0.dev5
@@ -16,6 +16,7 @@ from assemblyline.common.constants import DISPATCH_RUNNING_TASK_HASH, SUBMISSION
16
16
  make_watcher_list_name, DISPATCH_TASK_HASH
17
17
  from assemblyline.common.forge import CachedObject, get_service_queue
18
18
  from assemblyline.common.isotime import now_as_iso
19
+ from assemblyline.common.dispatcher import Dispatcher
19
20
  from assemblyline.datastore.exceptions import VersionConflictException
20
21
  from assemblyline.odm.base import DATEFORMAT
21
22
  from assemblyline.odm.messages.dispatching import DispatcherCommandMessage, CREATE_WATCH, \
@@ -30,7 +31,7 @@ from assemblyline.remote.datatypes.hash import ExpiringHash, Hash
30
31
  from assemblyline.remote.datatypes.queues.named import NamedQueue
31
32
  from assemblyline.remote.datatypes.set import ExpiringSet, Set
32
33
  from assemblyline_core.dispatching.dispatcher import DISPATCH_START_EVENTS, DISPATCH_RESULT_QUEUE, \
33
- DISPATCH_COMMAND_QUEUE, QUEUE_EXPIRY, BAD_SID_HASH, ServiceTask, Dispatcher
34
+ DISPATCH_COMMAND_QUEUE, QUEUE_EXPIRY, BAD_SID_HASH, ServiceTask
34
35
 
35
36
 
36
37
  MAX_CANCEL_RESPONSE_WAIT = 10
@@ -0,0 +1,327 @@
1
+ from __future__ import annotations
2
+
3
+ import dataclasses
4
+ import enum
5
+ import os
6
+ import threading
7
+ import time
8
+ import uuid
9
+ from contextlib import contextmanager
10
+ from copy import deepcopy
11
+ from queue import Empty, PriorityQueue, Queue
12
+ from typing import TYPE_CHECKING, Any, Iterable, Optional
13
+
14
+ import elasticapm
15
+
16
+ from assemblyline.common.constants import (
17
+ DISPATCH_RUNNING_TASK_HASH,
18
+ DISPATCH_TASK_HASH,
19
+ SCALER_TIMEOUT_QUEUE,
20
+ SUBMISSION_QUEUE,
21
+ make_watcher_list_name,
22
+ )
23
+ from assemblyline.common.forge import (
24
+ get_apm_client,
25
+ get_classification,
26
+ get_service_queue,
27
+ )
28
+ from assemblyline.common.isotime import now_as_iso
29
+ from assemblyline.common.metrics import MetricsFactory
30
+ from assemblyline.common.postprocess import ActionWorker
31
+ from assemblyline.datastore.helper import AssemblylineDatastore
32
+ from assemblyline.odm.messages.changes import Operation, ServiceChange
33
+ from assemblyline.odm.messages.dispatcher_heartbeat import Metrics
34
+ from assemblyline.odm.messages.dispatching import (
35
+ CREATE_WATCH,
36
+ LIST_OUTSTANDING,
37
+ UPDATE_BAD_SID,
38
+ CreateWatch,
39
+ DispatcherCommandMessage,
40
+ ListOutstanding,
41
+ WatchQueueMessage,
42
+ )
43
+ from assemblyline.odm.messages.service_heartbeat import Metrics as ServiceMetrics
44
+ from assemblyline.odm.messages.submission import (
45
+ SubmissionMessage,
46
+ from_datastore_submission,
47
+ )
48
+ from assemblyline.odm.messages.task import FileInfo
49
+ from assemblyline.odm.messages.task import Task as ServiceTask
50
+ from assemblyline.odm.models.error import Error
51
+ from assemblyline.odm.models.result import Result
52
+ from assemblyline.odm.models.service import Service
53
+ from assemblyline.odm.models.submission import Submission, TraceEvent
54
+ from assemblyline.odm.models.user import User
55
+ from assemblyline.remote.datatypes.events import EventWatcher
56
+ from assemblyline.remote.datatypes.exporting_counter import export_metrics_once
57
+ from assemblyline.remote.datatypes.hash import Hash
58
+ from assemblyline.remote.datatypes.queues.comms import CommsQueue
59
+ from assemblyline.remote.datatypes.queues.named import NamedQueue
60
+ from assemblyline.remote.datatypes.set import ExpiringSet, Set
61
+ from assemblyline.remote.datatypes.user_quota_tracker import UserQuotaTracker
62
+ from assemblyline_core.server_base import ThreadedCoreBase
63
+
64
+ from ..ingester.constants import COMPLETE_QUEUE_NAME
65
+ from .schedules import Scheduler
66
+
67
+ if TYPE_CHECKING:
68
+ from redis import Redis
69
+
70
+ from assemblyline.odm.models.file import File
71
+ from assemblyline.odm.models.config import Config
72
+
73
+
74
+ APM_SPAN_TYPE = 'handle_message'
75
+
76
+ AL_SHUTDOWN_GRACE = int(os.environ.get('AL_SHUTDOWN_GRACE', '60'))
77
+ AL_SHUTDOWN_QUIT = 60
78
+ FINALIZING_WINDOW = max(AL_SHUTDOWN_GRACE - AL_SHUTDOWN_QUIT, 0)
79
+ RESULT_BATCH_SIZE = int(os.environ.get('DISPATCHER_RESULT_BATCH_SIZE', '50'))
80
+ ERROR_BATCH_SIZE = int(os.environ.get('DISPATCHER_ERROR_BATCH_SIZE', '50'))
81
+ DAY_IN_SECONDS = 24 * 60 * 60
82
+
83
+
84
+ class KeyType(enum.Enum):
85
+ OVERWRITE = 'overwrite'
86
+ UNION = 'union'
87
+
88
+
89
+ class Action(enum.IntEnum):
90
+ start = 0
91
+ result = 1
92
+ dispatch_file = 2
93
+ service_timeout = 3
94
+ check_submission = 4
95
+ bad_sid = 5
96
+
97
+
98
+ @dataclasses.dataclass(order=True)
99
+ class DispatchAction:
100
+ kind: Action
101
+ sid: str = dataclasses.field(compare=False)
102
+ sha: Optional[str] = dataclasses.field(compare=False, default=None)
103
+ service_name: Optional[str] = dataclasses.field(compare=False, default=None)
104
+ worker_id: Optional[str] = dataclasses.field(compare=False, default=None)
105
+ data: Any = dataclasses.field(compare=False, default=None)
106
+ event: Optional[threading.Event] = dataclasses.field(compare=False, default=None)
107
+
108
+
109
+
110
+ @contextmanager
111
+ def apm_span(client, span_name: str):
112
+ try:
113
+ if client:
114
+ client.begin_transaction(APM_SPAN_TYPE)
115
+ yield None
116
+ if client:
117
+ client.end_transaction(span_name, 'success')
118
+ except Exception:
119
+ if client:
120
+ client.end_transaction(span_name, 'exception')
121
+ raise
122
+
123
+
124
+ DISPATCH_TASK_ASSIGNMENT = 'dispatcher-tasks-assigned-to-'
125
+ TASK_ASSIGNMENT_PATTERN = DISPATCH_TASK_ASSIGNMENT + '*'
126
+ DISPATCH_START_EVENTS = 'dispatcher-start-events-'
127
+ DISPATCH_RESULT_QUEUE = 'dispatcher-results-'
128
+ DISPATCH_COMMAND_QUEUE = 'dispatcher-commands-'
129
+ DISPATCH_DIRECTORY = 'dispatchers-directory'
130
+ DISPATCH_DIRECTORY_FINALIZE = 'dispatchers-directory-finalizing'
131
+ BAD_SID_HASH = 'bad-sid-hash'
132
+ QUEUE_EXPIRY = 60*60
133
+ SERVICE_VERSION_EXPIRY_TIME = 30 * 60 # How old service version info can be before we ignore it
134
+ GUARD_TIMEOUT = 60*2
135
+ GLOBAL_TASK_CHECK_INTERVAL = 60*10
136
+ TIMEOUT_EXTRA_TIME = 5
137
+ TIMEOUT_TEST_INTERVAL = 5
138
+ MAX_RESULT_BUFFER = 64
139
+ RESULT_THREADS = max(1, int(os.getenv('DISPATCHER_RESULT_THREADS', '2')))
140
+ FINALIZE_THREADS = max(1, int(os.getenv('DISPATCHER_FINALIZE_THREADS', '2')))
141
+
142
+ # After 20 minutes, check if a submission is still making progress.
143
+ # In the case of a crash somewhere else in the system, we may not have
144
+ # gotten a message we are expecting. This should prompt a retry in most
145
+ # cases.
146
+ SUBMISSION_TOTAL_TIMEOUT = 60 * 20
147
+
148
+
149
+ class Dispatcher(ThreadedCoreBase):
150
+ # @staticmethod
151
+ # def all_instances(persistent_redis: Redis):
152
+ # return Hash(DISPATCH_DIRECTORY, host=persistent_redis).keys()
153
+
154
+ # @staticmethod
155
+ # def instance_assignment_size(persistent_redis, instance_id):
156
+ # return Hash(DISPATCH_TASK_ASSIGNMENT + instance_id, host=persistent_redis).length()
157
+
158
+ # @staticmethod
159
+ # def instance_assignment(persistent_redis, instance_id) -> list[str]:
160
+ # return Hash(DISPATCH_TASK_ASSIGNMENT + instance_id, host=persistent_redis).keys()
161
+
162
+ # @staticmethod
163
+ # def all_queue_lengths(redis, instance_id):
164
+ # return {
165
+ # 'start': NamedQueue(DISPATCH_START_EVENTS + instance_id, host=redis).length(),
166
+ # 'result': NamedQueue(DISPATCH_RESULT_QUEUE + instance_id, host=redis).length(),
167
+ # 'command': NamedQueue(DISPATCH_COMMAND_QUEUE + instance_id, host=redis).length()
168
+ # }
169
+
170
+ def __init__(self, datastore=None, redis=None, redis_persist=None, logger=None,
171
+ config=None, counter_name: str = 'dispatcher'):
172
+ super().__init__('assemblyline.dispatcher', config=config, datastore=datastore,
173
+ redis=redis, redis_persist=redis_persist, logger=logger)
174
+
175
+ # Load the datastore collections that we are going to be using
176
+ self.instance_id = uuid.uuid4().hex
177
+ self.tasks: dict[str, SubmissionTask] = {}
178
+ self.finalizing = threading.Event()
179
+ self.finalizing_start = 0.0
180
+
181
+ # Build some utility classes
182
+ self.scheduler = Scheduler(self.datastore, self.config, self.redis)
183
+ self.running_tasks: Hash[dict] = Hash(DISPATCH_RUNNING_TASK_HASH, host=self.redis)
184
+ self.scaler_timeout_queue = NamedQueue(SCALER_TIMEOUT_QUEUE, host=self.redis_persist)
185
+
186
+ self.classification_engine = get_classification()
187
+
188
+ # Output. Duplicate our input traffic into this queue so it may be cloned by other systems
189
+ self.traffic_queue = CommsQueue('submissions', self.redis)
190
+ self.quota_tracker = UserQuotaTracker('submissions', timeout=60 * 60, host=self.redis_persist)
191
+ self.submission_queue = NamedQueue(SUBMISSION_QUEUE, self.redis)
192
+
193
+ # Table to track the running dispatchers
194
+ self.dispatchers_directory: Hash[int] = Hash(DISPATCH_DIRECTORY, host=self.redis_persist)
195
+ self.dispatchers_directory_finalize: Hash[int] = Hash(DISPATCH_DIRECTORY_FINALIZE, host=self.redis_persist)
196
+ self.running_dispatchers_estimate = 1
197
+
198
+ # Tables to track what submissions are running where
199
+ self.active_submissions = Hash(DISPATCH_TASK_ASSIGNMENT+self.instance_id, host=self.redis_persist)
200
+ self.submissions_assignments = Hash(DISPATCH_TASK_HASH, host=self.redis_persist)
201
+ self.ingester_scanning = Hash('m-scanning-table', self.redis_persist)
202
+
203
+ # Communications queues
204
+ self.start_queue: NamedQueue[tuple[str, str, str, str]] =\
205
+ NamedQueue(DISPATCH_START_EVENTS+self.instance_id, host=self.redis, ttl=QUEUE_EXPIRY)
206
+ self.result_queue: NamedQueue[dict] =\
207
+ NamedQueue(DISPATCH_RESULT_QUEUE+self.instance_id, host=self.redis, ttl=QUEUE_EXPIRY)
208
+ self.command_queue: NamedQueue[dict] =\
209
+ NamedQueue(DISPATCH_COMMAND_QUEUE+self.instance_id, host=self.redis, ttl=QUEUE_EXPIRY)
210
+
211
+ # Publish counters to the metrics sink.
212
+ self.counter = MetricsFactory(metrics_type='dispatcher', schema=Metrics, name=counter_name,
213
+ redis=self.redis, config=self.config)
214
+
215
+ self.apm_client = None
216
+ if self.config.core.metrics.apm_server.server_url:
217
+ elasticapm.instrument()
218
+ self.apm_client = get_apm_client("dispatcher")
219
+
220
+ self._service_timeouts: TimeoutTable[tuple[str, str, str], str] = TimeoutTable()
221
+ self._submission_timeouts: TimeoutTable[str, None] = TimeoutTable()
222
+
223
+ # Setup queues for work to be divided into
224
+ self.process_queues: list[PriorityQueue[DispatchAction]] = [PriorityQueue() for _ in range(RESULT_THREADS)]
225
+ self.queue_ready_signals: list[threading.Semaphore] = [threading.Semaphore(MAX_RESULT_BUFFER)
226
+ for _ in range(RESULT_THREADS)]
227
+
228
+ # Queue of finished submissions/errors waiting to be saved into elastic
229
+ self.finalize_queue = Queue()
230
+ self.error_queue: Queue[tuple[str, Error]] = Queue()
231
+
232
+ # Queue to hold of service timeouts that need to be processed
233
+ # They will be held in this queue until results in redis are
234
+ # already processed
235
+ self.timeout_queue: Queue[DispatchAction] = Queue()
236
+
237
+ # Utility object to handle post-processing actions
238
+ self.postprocess_worker = ActionWorker(cache=False, config=self.config, datastore=self.datastore,
239
+ redis_persist=self.redis_persist)
240
+
241
+ # Update bad sid list
242
+ self.redis_bad_sids = Set(BAD_SID_HASH, host=self.redis_persist)
243
+ self.bad_sids: set[str] = set(self.redis_bad_sids.members())
244
+
245
+ # Event Watchers
246
+ self.service_change_watcher = EventWatcher(self.redis, deserializer=ServiceChange.deserialize)
247
+ self.service_change_watcher.register('changes.services.*', self._handle_service_change_event)
248
+
249
+ def stop(self):
250
+ super().stop()
251
+ self.service_change_watcher.stop()
252
+ self.postprocess_worker.stop()
253
+
254
+ def try_run(self):
255
+ self.log.info(f'Using dispatcher id {self.instance_id}')
256
+ self.service_change_watcher.start()
257
+ threads = {
258
+ # Process to protect against old dead tasks timing out
259
+ 'Global Timeout Backstop': self.timeout_backstop,
260
+ }
261
+
262
+ for ii in range(RESULT_THREADS):
263
+ # Process results
264
+ threads[f'Service Update Worker #{ii}'] = self.service_worker_factory(ii)
265
+
266
+ self.maintain_threads(threads)
267
+
268
+ # If the dispatcher is exiting cleanly remove as many tasks from the service queues as we can
269
+ service_queues = {}
270
+ for task in self.tasks.values():
271
+ for (_sha256, service_name), dispatch_key in task.queue_keys.items():
272
+ try:
273
+ s_queue = service_queues[service_name]
274
+ except KeyError:
275
+ s_queue = get_service_queue(service_name, self.redis)
276
+ service_queues[service_name] = s_queue
277
+ s_queue.remove(dispatch_key)
278
+
279
+
280
+ def timeout_backstop(self):
281
+ while self.running:
282
+ cpu_mark = time.process_time()
283
+ time_mark = time.time()
284
+
285
+ # Start of process dispatcher transaction
286
+ with apm_span(self.apm_client, 'timeout_backstop'):
287
+ dispatcher_instances = set(Dispatcher.all_instances(persistent_redis=self.redis_persist))
288
+ error_tasks = []
289
+
290
+ # iterate running tasks
291
+ for _task_key, task_body in self.running_tasks:
292
+ task = ServiceTask(task_body)
293
+ # Its a bad task if it's dispatcher isn't running
294
+ if task.metadata['dispatcher__'] not in dispatcher_instances:
295
+ error_tasks.append(task)
296
+ # Its a bad task if its OUR task, but we aren't tracking that submission anymore
297
+ if task.metadata['dispatcher__'] == self.instance_id and task.sid not in self.tasks:
298
+ error_tasks.append(task)
299
+
300
+ # Refresh our dispatcher list.
301
+ dispatcher_instances = set(Dispatcher.all_instances(persistent_redis=self.redis_persist))
302
+ other_dispatcher_instances = dispatcher_instances - {self.instance_id}
303
+
304
+ # The remaining running tasks (probably) belong to dead dispatchers and can be killed
305
+ for task in error_tasks:
306
+ # Check against our refreshed dispatcher list in case it changed during the previous scan
307
+ if task.metadata['dispatcher__'] in other_dispatcher_instances:
308
+ continue
309
+
310
+ # If its already been handled, we don't need to
311
+ if not self.running_tasks.pop(task.key()):
312
+ continue
313
+
314
+ # Kill the task that would report to a dead dispatcher
315
+ self.log.warning(f"[{task.sid}]Task killed by backstop {task.service_name} {task.fileinfo.sha256}")
316
+ self.scaler_timeout_queue.push({
317
+ 'service': task.service_name,
318
+ 'container': task.metadata['worker__']
319
+ })
320
+
321
+ # Report to the metrics system that a recoverable error has occurred for that service
322
+ export_metrics_once(task.service_name, ServiceMetrics, dict(fail_recoverable=1),
323
+ host=task.metadata['worker__'], counter_type='service', redis=self.redis)
324
+
325
+ self.counter.increment_execution_time('cpu_seconds', time.process_time() - cpu_mark)
326
+ self.counter.increment_execution_time('busy_seconds', time.time() - time_mark)
327
+ self.sleep(GLOBAL_TASK_CHECK_INTERVAL)
@@ -0,0 +1,116 @@
1
+ #!/usr/bin/env python
2
+ """
3
+ Ingester
4
+
5
+ Ingester is responsible for monitoring for incoming submission requests,
6
+ sending submissions, waiting for submissions to complete, sending a message
7
+ to a notification queue as specified by the submission and, based on the
8
+ score received, possibly sending a message to indicate that an alert should
9
+ be created.
10
+ """
11
+
12
+ import logging
13
+ import threading
14
+ import time
15
+ from os import environ
16
+ from random import random
17
+ from typing import Any, Iterable, List, Optional, Tuple
18
+
19
+ import elasticapm
20
+
21
+ from assemblyline import odm
22
+ from assemblyline.common import exceptions, forge, isotime
23
+ from assemblyline.common.constants import DROP_PRIORITY
24
+ from assemblyline.common.exceptions import get_stacktrace_info
25
+ from assemblyline.common.importing import load_module_by_path
26
+ from assemblyline.common.isotime import now, now_as_iso
27
+ from assemblyline.common.metrics import MetricsFactory
28
+ from assemblyline.common.postprocess import ActionWorker
29
+ from assemblyline.common.str_utils import dotdump, safe_str
30
+ from assemblyline.datastore.exceptions import DataStoreException
31
+ from assemblyline.filestore import CorruptedFileStoreException, FileStoreException
32
+ from assemblyline.odm.messages.ingest_heartbeat import Metrics
33
+ from assemblyline.odm.messages.submission import Submission as MessageSubmission
34
+ from assemblyline.odm.messages.submission import SubmissionMessage
35
+ from assemblyline.odm.models.alert import EXTENDED_SCAN_VALUES
36
+ from assemblyline.odm.models.filescore import FileScore
37
+ from assemblyline.odm.models.submission import Submission as DatabaseSubmission
38
+ from assemblyline.odm.models.submission import SubmissionParams
39
+ from assemblyline.odm.models.user import User
40
+ from assemblyline.remote.datatypes.events import EventWatcher
41
+ from assemblyline.remote.datatypes.hash import Hash
42
+ from assemblyline.remote.datatypes.queues.comms import CommsQueue
43
+ from assemblyline.remote.datatypes.queues.multi import MultiQueue
44
+ from assemblyline.remote.datatypes.queues.named import NamedQueue
45
+ from assemblyline.remote.datatypes.queues.priority import PriorityQueue
46
+ from assemblyline.remote.datatypes.user_quota_tracker import UserQuotaTracker
47
+ from assemblyline_core.dispatching.dispatcher import Dispatcher
48
+ from assemblyline_core.server_base import ThreadedCoreBase
49
+ from assemblyline_core.submission_client import SubmissionClient
50
+
51
+ from .constants import COMPLETE_QUEUE_NAME, INGEST_QUEUE_NAME, drop_chance
52
+
53
+ _dup_prefix = 'w-m-'
54
+ _notification_queue_prefix = 'nq-'
55
+ _max_retries = 10
56
+ _retry_delay = 60 * 4 # Wait 4 minutes to retry
57
+ _max_time = 2 * 24 * 60 * 60 # Wait 2 days for responses.
58
+ HOUR_IN_SECONDS = 60 * 60
59
+ COMPLETE_THREADS = int(environ.get('INGESTER_COMPLETE_THREADS', 4))
60
+ INGEST_THREADS = int(environ.get('INGESTER_INGEST_THREADS', 1))
61
+ SUBMIT_THREADS = int(environ.get('INGESTER_SUBMIT_THREADS', 4))
62
+
63
+
64
+ def must_drop(length: int, maximum: int) -> bool:
65
+ """
66
+ To calculate the probability of dropping an incoming submission we compare
67
+ the number returned by random() which will be in the range [0,1) and the
68
+ number returned by tanh() which will be in the range (-1,1).
69
+
70
+ If length is less than maximum the number returned by tanh will be negative
71
+ and so drop will always return False since the value returned by random()
72
+ cannot be less than 0.
73
+
74
+ If length is greater than maximum, drop will return False with a probability
75
+ that increases as the distance between maximum and length increases:
76
+
77
+ Length Chance of Dropping
78
+
79
+ <= maximum 0
80
+ 1.5 * maximum 0.76
81
+ 2 * maximum 0.96
82
+ 3 * maximum 0.999
83
+ """
84
+ return random() < drop_chance(length, maximum)
85
+
86
+
87
+ @odm.model()
88
+ class IngestTask(odm.Model):
89
+ # Submission Parameters
90
+ submission: MessageSubmission = odm.compound(MessageSubmission)
91
+
92
+ # Shortcut for properties of the submission
93
+ @property
94
+ def file_size(self) -> int:
95
+ return sum(file.size for file in self.submission.files)
96
+
97
+ @property
98
+ def params(self) -> SubmissionParams:
99
+ return self.submission.params
100
+
101
+ @property
102
+ def sha256(self) -> str:
103
+ return self.submission.files[0].sha256
104
+
105
+ # Information about the ingestion itself, parameters irrelevant
106
+ retries = odm.Integer(default=0)
107
+
108
+ # Fields added after a submission is complete for notification/bookkeeping processes
109
+ failure = odm.Text(default='') # If the ingestion has failed for some reason, what is it?
110
+ score = odm.Optional(odm.Integer()) # Score from previous processing of this file
111
+ extended_scan = odm.Enum(EXTENDED_SCAN_VALUES, default="skipped") # Status of the extended scan
112
+ ingest_id = odm.UUID() # Ingestion Identifier
113
+ ingest_time = odm.Date(default="NOW") # Time at which the file was ingested
114
+ notify_time = odm.Optional(odm.Date()) # Time at which the user is notify the submission is finished
115
+ to_ingest = odm.Boolean(default=False)
116
+
@@ -6,6 +6,7 @@ from assemblyline.common import forge, metrics
6
6
  from assemblyline.common.archiving import ARCHIVE_QUEUE_NAME
7
7
  from assemblyline.common.constants import DISPATCH_TASK_HASH, SUBMISSION_QUEUE, \
8
8
  SERVICE_STATE_HASH, ServiceStatus
9
+ from assemblyline.common.dispatcher import Dispatcher
9
10
  from assemblyline.datastore.exceptions import SearchException
10
11
  from assemblyline.odm.messages.retrohunt_heartbeat import RetrohuntMessage
11
12
  from assemblyline.odm.messages.scaler_heartbeat import ScalerMessage
@@ -25,7 +26,6 @@ from assemblyline.remote.datatypes.queues.named import NamedQueue
25
26
  from assemblyline.remote.datatypes.queues.priority import PriorityQueue
26
27
 
27
28
  from assemblyline_core.alerter.run_alerter import ALERT_QUEUE_NAME, ALERT_RETRY_QUEUE_NAME
28
- from assemblyline_core.dispatching.dispatcher import Dispatcher
29
29
  from assemblyline_core.ingester import INGEST_QUEUE_NAME, drop_chance
30
30
  from assemblyline_core.ingester.constants import COMPLETE_QUEUE_NAME
31
31
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: assemblyline-core
3
- Version: 4.7.0.dev1
3
+ Version: 4.7.0.dev5
4
4
  Summary: Assemblyline 4 - Core components
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-core/
6
6
  Author: CCCS Assemblyline development team
@@ -21,15 +21,12 @@ assemblyline_core/alerter/run_alerter.py
21
21
  assemblyline_core/archiver/__init__.py
22
22
  assemblyline_core/archiver/run_archiver.py
23
23
  assemblyline_core/dispatching/__init__.py
24
- assemblyline_core/dispatching/__main__.py
25
24
  assemblyline_core/dispatching/client.py
26
25
  assemblyline_core/dispatching/dispatcher.py
27
26
  assemblyline_core/dispatching/schedules.py
28
- assemblyline_core/dispatching/timeout.py
29
27
  assemblyline_core/expiry/__init__.py
30
28
  assemblyline_core/expiry/run_expiry.py
31
29
  assemblyline_core/ingester/__init__.py
32
- assemblyline_core/ingester/__main__.py
33
30
  assemblyline_core/ingester/constants.py
34
31
  assemblyline_core/ingester/ingester.py
35
32
  assemblyline_core/metrics/__init__.py
@@ -40,8 +37,6 @@ assemblyline_core/metrics/metrics_server.py
40
37
  assemblyline_core/metrics/run_heartbeat_manager.py
41
38
  assemblyline_core/metrics/run_metrics_aggregator.py
42
39
  assemblyline_core/metrics/run_statistics_aggregator.py
43
- assemblyline_core/plumber/__init__.py
44
- assemblyline_core/plumber/run_plumber.py
45
40
  assemblyline_core/replay/__init__.py
46
41
  assemblyline_core/replay/client.py
47
42
  assemblyline_core/replay/replay.py
@@ -72,17 +67,12 @@ assemblyline_core/workflow/__init__.py
72
67
  assemblyline_core/workflow/run_workflow.py
73
68
  test/test_alerter.py
74
69
  test/test_badlist_client.py
75
- test/test_dispatcher.py
76
70
  test/test_expiry.py
77
- test/test_plumber.py
78
71
  test/test_replay.py
79
72
  test/test_safelist_client.py
80
73
  test/test_scaler.py
81
74
  test/test_scheduler.py
82
75
  test/test_signature_client.py
83
- test/test_simulation.py
84
76
  test/test_tasking_client.py
85
77
  test/test_vacuum.py
86
- test/test_worker_ingest.py
87
- test/test_worker_submit.py
88
78
  test/test_workflow.py
@@ -1 +0,0 @@
1
- 4.7.0.dev1
@@ -1,5 +0,0 @@
1
- from assemblyline_core.dispatching.dispatcher import Dispatcher
2
-
3
-
4
- with Dispatcher() as server:
5
- server.serve_forever()