assemblyline-core 4.5.1.dev426__tar.gz → 4.7.0.dev45__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/PKG-INFO +3 -2
- assemblyline_core-4.7.0.dev45/assemblyline_core/VERSION +1 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/dispatching/client.py +2 -1
- assemblyline_core-4.7.0.dev45/assemblyline_core/dispatching/dispatcher.py +327 -0
- assemblyline_core-4.7.0.dev45/assemblyline_core/ingester/ingester.py +116 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/metrics/heartbeat_formatter.py +1 -1
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/replay/client.py +34 -15
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/replay/loader/run.py +1 -1
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/replay/loader/run_worker.py +7 -6
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/scaler/controllers/docker_ctl.py +12 -6
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/scaler/controllers/interface.py +1 -10
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/scaler/controllers/kubernetes_ctl.py +102 -87
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/scaler/scaler_server.py +61 -75
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/submission_client.py +55 -4
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/tasking_client.py +75 -34
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/updater/helper.py +8 -10
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/updater/run_updater.py +52 -30
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/vacuum/worker.py +29 -26
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/workflow/run_workflow.py +6 -1
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core.egg-info/PKG-INFO +3 -2
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core.egg-info/SOURCES.txt +2 -10
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/test/test_replay.py +47 -5
- assemblyline_core-4.7.0.dev45/test/test_tasking_client.py +77 -0
- assemblyline_core-4.7.0.dev45/test/test_workflow.py +45 -0
- assemblyline_core-4.5.1.dev426/assemblyline_core/VERSION +0 -1
- assemblyline_core-4.5.1.dev426/assemblyline_core/dispatching/__main__.py +0 -5
- assemblyline_core-4.5.1.dev426/assemblyline_core/dispatching/dispatcher.py +0 -1992
- assemblyline_core-4.5.1.dev426/assemblyline_core/dispatching/timeout.py +0 -59
- assemblyline_core-4.5.1.dev426/assemblyline_core/ingester/__main__.py +0 -5
- assemblyline_core-4.5.1.dev426/assemblyline_core/ingester/ingester.py +0 -950
- assemblyline_core-4.5.1.dev426/assemblyline_core/plumber/run_plumber.py +0 -194
- assemblyline_core-4.5.1.dev426/assemblyline_core/workflow/__init__.py +0 -0
- assemblyline_core-4.5.1.dev426/test/test_dispatcher.py +0 -456
- assemblyline_core-4.5.1.dev426/test/test_plumber.py +0 -109
- assemblyline_core-4.5.1.dev426/test/test_simulation.py +0 -1354
- assemblyline_core-4.5.1.dev426/test/test_worker_ingest.py +0 -248
- assemblyline_core-4.5.1.dev426/test/test_worker_submit.py +0 -138
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/LICENCE.md +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/README.md +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/__init__.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/alerter/__init__.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/alerter/processing.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/alerter/run_alerter.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/archiver/__init__.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/archiver/run_archiver.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/badlist_client.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/dispatching/__init__.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/dispatching/schedules.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/expiry/__init__.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/expiry/run_expiry.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/ingester/__init__.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/ingester/constants.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/metrics/__init__.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/metrics/es_metrics.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/metrics/helper.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/metrics/metrics_server.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/metrics/run_heartbeat_manager.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/metrics/run_metrics_aggregator.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/metrics/run_statistics_aggregator.py +0 -0
- {assemblyline_core-4.5.1.dev426/assemblyline_core/plumber → assemblyline_core-4.7.0.dev45/assemblyline_core/replay}/__init__.py +0 -0
- {assemblyline_core-4.5.1.dev426/assemblyline_core/replay → assemblyline_core-4.7.0.dev45/assemblyline_core/replay/creator}/__init__.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/replay/creator/run.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/replay/creator/run_worker.py +0 -0
- {assemblyline_core-4.5.1.dev426/assemblyline_core/replay/creator → assemblyline_core-4.7.0.dev45/assemblyline_core/replay/loader}/__init__.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/replay/replay.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/safelist_client.py +0 -0
- {assemblyline_core-4.5.1.dev426/assemblyline_core/replay/loader → assemblyline_core-4.7.0.dev45/assemblyline_core/scaler}/__init__.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/scaler/collection.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/scaler/controllers/__init__.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/scaler/run_scaler.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/server_base.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/signature_client.py +0 -0
- {assemblyline_core-4.5.1.dev426/assemblyline_core/scaler → assemblyline_core-4.7.0.dev45/assemblyline_core/updater}/__init__.py +0 -0
- {assemblyline_core-4.5.1.dev426/assemblyline_core/updater → assemblyline_core-4.7.0.dev45/assemblyline_core/vacuum}/__init__.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/vacuum/crawler.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/vacuum/department_map.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/vacuum/safelist.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/vacuum/stream_map.py +0 -0
- {assemblyline_core-4.5.1.dev426/assemblyline_core/vacuum → assemblyline_core-4.7.0.dev45/assemblyline_core/workflow}/__init__.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core.egg-info/dependency_links.txt +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core.egg-info/requires.txt +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core.egg-info/top_level.txt +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/setup.cfg +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/setup.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/test/test_alerter.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/test/test_badlist_client.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/test/test_expiry.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/test/test_safelist_client.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/test/test_scaler.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/test/test_scheduler.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/test/test_signature_client.py +0 -0
- {assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/test/test_vacuum.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: assemblyline-core
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.7.0.dev45
|
|
4
4
|
Summary: Assemblyline 4 - Core components
|
|
5
5
|
Home-page: https://github.com/CybercentreCanada/assemblyline-core/
|
|
6
6
|
Author: CCCS Assemblyline development team
|
|
@@ -33,6 +33,7 @@ Dynamic: description-content-type
|
|
|
33
33
|
Dynamic: home-page
|
|
34
34
|
Dynamic: keywords
|
|
35
35
|
Dynamic: license
|
|
36
|
+
Dynamic: license-file
|
|
36
37
|
Dynamic: provides-extra
|
|
37
38
|
Dynamic: requires-dist
|
|
38
39
|
Dynamic: summary
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
4.7.0.dev45
|
|
@@ -16,6 +16,7 @@ from assemblyline.common.constants import DISPATCH_RUNNING_TASK_HASH, SUBMISSION
|
|
|
16
16
|
make_watcher_list_name, DISPATCH_TASK_HASH
|
|
17
17
|
from assemblyline.common.forge import CachedObject, get_service_queue
|
|
18
18
|
from assemblyline.common.isotime import now_as_iso
|
|
19
|
+
from assemblyline.common.dispatcher import Dispatcher
|
|
19
20
|
from assemblyline.datastore.exceptions import VersionConflictException
|
|
20
21
|
from assemblyline.odm.base import DATEFORMAT
|
|
21
22
|
from assemblyline.odm.messages.dispatching import DispatcherCommandMessage, CREATE_WATCH, \
|
|
@@ -30,7 +31,7 @@ from assemblyline.remote.datatypes.hash import ExpiringHash, Hash
|
|
|
30
31
|
from assemblyline.remote.datatypes.queues.named import NamedQueue
|
|
31
32
|
from assemblyline.remote.datatypes.set import ExpiringSet, Set
|
|
32
33
|
from assemblyline_core.dispatching.dispatcher import DISPATCH_START_EVENTS, DISPATCH_RESULT_QUEUE, \
|
|
33
|
-
DISPATCH_COMMAND_QUEUE, QUEUE_EXPIRY, BAD_SID_HASH, ServiceTask
|
|
34
|
+
DISPATCH_COMMAND_QUEUE, QUEUE_EXPIRY, BAD_SID_HASH, ServiceTask
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
MAX_CANCEL_RESPONSE_WAIT = 10
|
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import dataclasses
|
|
4
|
+
import enum
|
|
5
|
+
import os
|
|
6
|
+
import threading
|
|
7
|
+
import time
|
|
8
|
+
import uuid
|
|
9
|
+
from contextlib import contextmanager
|
|
10
|
+
from copy import deepcopy
|
|
11
|
+
from queue import Empty, PriorityQueue, Queue
|
|
12
|
+
from typing import TYPE_CHECKING, Any, Iterable, Optional
|
|
13
|
+
|
|
14
|
+
import elasticapm
|
|
15
|
+
|
|
16
|
+
from assemblyline.common.constants import (
|
|
17
|
+
DISPATCH_RUNNING_TASK_HASH,
|
|
18
|
+
DISPATCH_TASK_HASH,
|
|
19
|
+
SCALER_TIMEOUT_QUEUE,
|
|
20
|
+
SUBMISSION_QUEUE,
|
|
21
|
+
make_watcher_list_name,
|
|
22
|
+
)
|
|
23
|
+
from assemblyline.common.forge import (
|
|
24
|
+
get_apm_client,
|
|
25
|
+
get_classification,
|
|
26
|
+
get_service_queue,
|
|
27
|
+
)
|
|
28
|
+
from assemblyline.common.isotime import now_as_iso
|
|
29
|
+
from assemblyline.common.metrics import MetricsFactory
|
|
30
|
+
from assemblyline.common.postprocess import ActionWorker
|
|
31
|
+
from assemblyline.datastore.helper import AssemblylineDatastore
|
|
32
|
+
from assemblyline.odm.messages.changes import Operation, ServiceChange
|
|
33
|
+
from assemblyline.odm.messages.dispatcher_heartbeat import Metrics
|
|
34
|
+
from assemblyline.odm.messages.dispatching import (
|
|
35
|
+
CREATE_WATCH,
|
|
36
|
+
LIST_OUTSTANDING,
|
|
37
|
+
UPDATE_BAD_SID,
|
|
38
|
+
CreateWatch,
|
|
39
|
+
DispatcherCommandMessage,
|
|
40
|
+
ListOutstanding,
|
|
41
|
+
WatchQueueMessage,
|
|
42
|
+
)
|
|
43
|
+
from assemblyline.odm.messages.service_heartbeat import Metrics as ServiceMetrics
|
|
44
|
+
from assemblyline.odm.messages.submission import (
|
|
45
|
+
SubmissionMessage,
|
|
46
|
+
from_datastore_submission,
|
|
47
|
+
)
|
|
48
|
+
from assemblyline.odm.messages.task import FileInfo
|
|
49
|
+
from assemblyline.odm.messages.task import Task as ServiceTask
|
|
50
|
+
from assemblyline.odm.models.error import Error
|
|
51
|
+
from assemblyline.odm.models.result import Result
|
|
52
|
+
from assemblyline.odm.models.service import Service
|
|
53
|
+
from assemblyline.odm.models.submission import Submission, TraceEvent
|
|
54
|
+
from assemblyline.odm.models.user import User
|
|
55
|
+
from assemblyline.remote.datatypes.events import EventWatcher
|
|
56
|
+
from assemblyline.remote.datatypes.exporting_counter import export_metrics_once
|
|
57
|
+
from assemblyline.remote.datatypes.hash import Hash
|
|
58
|
+
from assemblyline.remote.datatypes.queues.comms import CommsQueue
|
|
59
|
+
from assemblyline.remote.datatypes.queues.named import NamedQueue
|
|
60
|
+
from assemblyline.remote.datatypes.set import ExpiringSet, Set
|
|
61
|
+
from assemblyline.remote.datatypes.user_quota_tracker import UserQuotaTracker
|
|
62
|
+
from assemblyline_core.server_base import ThreadedCoreBase
|
|
63
|
+
|
|
64
|
+
from ..ingester.constants import COMPLETE_QUEUE_NAME
|
|
65
|
+
from .schedules import Scheduler
|
|
66
|
+
|
|
67
|
+
if TYPE_CHECKING:
|
|
68
|
+
from redis import Redis
|
|
69
|
+
|
|
70
|
+
from assemblyline.odm.models.file import File
|
|
71
|
+
from assemblyline.odm.models.config import Config
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
APM_SPAN_TYPE = 'handle_message'
|
|
75
|
+
|
|
76
|
+
AL_SHUTDOWN_GRACE = int(os.environ.get('AL_SHUTDOWN_GRACE', '60'))
|
|
77
|
+
AL_SHUTDOWN_QUIT = 60
|
|
78
|
+
FINALIZING_WINDOW = max(AL_SHUTDOWN_GRACE - AL_SHUTDOWN_QUIT, 0)
|
|
79
|
+
RESULT_BATCH_SIZE = int(os.environ.get('DISPATCHER_RESULT_BATCH_SIZE', '50'))
|
|
80
|
+
ERROR_BATCH_SIZE = int(os.environ.get('DISPATCHER_ERROR_BATCH_SIZE', '50'))
|
|
81
|
+
DAY_IN_SECONDS = 24 * 60 * 60
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class KeyType(enum.Enum):
|
|
85
|
+
OVERWRITE = 'overwrite'
|
|
86
|
+
UNION = 'union'
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class Action(enum.IntEnum):
|
|
90
|
+
start = 0
|
|
91
|
+
result = 1
|
|
92
|
+
dispatch_file = 2
|
|
93
|
+
service_timeout = 3
|
|
94
|
+
check_submission = 4
|
|
95
|
+
bad_sid = 5
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@dataclasses.dataclass(order=True)
|
|
99
|
+
class DispatchAction:
|
|
100
|
+
kind: Action
|
|
101
|
+
sid: str = dataclasses.field(compare=False)
|
|
102
|
+
sha: Optional[str] = dataclasses.field(compare=False, default=None)
|
|
103
|
+
service_name: Optional[str] = dataclasses.field(compare=False, default=None)
|
|
104
|
+
worker_id: Optional[str] = dataclasses.field(compare=False, default=None)
|
|
105
|
+
data: Any = dataclasses.field(compare=False, default=None)
|
|
106
|
+
event: Optional[threading.Event] = dataclasses.field(compare=False, default=None)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@contextmanager
|
|
111
|
+
def apm_span(client, span_name: str):
|
|
112
|
+
try:
|
|
113
|
+
if client:
|
|
114
|
+
client.begin_transaction(APM_SPAN_TYPE)
|
|
115
|
+
yield None
|
|
116
|
+
if client:
|
|
117
|
+
client.end_transaction(span_name, 'success')
|
|
118
|
+
except Exception:
|
|
119
|
+
if client:
|
|
120
|
+
client.end_transaction(span_name, 'exception')
|
|
121
|
+
raise
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
DISPATCH_TASK_ASSIGNMENT = 'dispatcher-tasks-assigned-to-'
|
|
125
|
+
TASK_ASSIGNMENT_PATTERN = DISPATCH_TASK_ASSIGNMENT + '*'
|
|
126
|
+
DISPATCH_START_EVENTS = 'dispatcher-start-events-'
|
|
127
|
+
DISPATCH_RESULT_QUEUE = 'dispatcher-results-'
|
|
128
|
+
DISPATCH_COMMAND_QUEUE = 'dispatcher-commands-'
|
|
129
|
+
DISPATCH_DIRECTORY = 'dispatchers-directory'
|
|
130
|
+
DISPATCH_DIRECTORY_FINALIZE = 'dispatchers-directory-finalizing'
|
|
131
|
+
BAD_SID_HASH = 'bad-sid-hash'
|
|
132
|
+
QUEUE_EXPIRY = 60*60
|
|
133
|
+
SERVICE_VERSION_EXPIRY_TIME = 30 * 60 # How old service version info can be before we ignore it
|
|
134
|
+
GUARD_TIMEOUT = 60*2
|
|
135
|
+
GLOBAL_TASK_CHECK_INTERVAL = 60*10
|
|
136
|
+
TIMEOUT_EXTRA_TIME = 5
|
|
137
|
+
TIMEOUT_TEST_INTERVAL = 5
|
|
138
|
+
MAX_RESULT_BUFFER = 64
|
|
139
|
+
RESULT_THREADS = max(1, int(os.getenv('DISPATCHER_RESULT_THREADS', '2')))
|
|
140
|
+
FINALIZE_THREADS = max(1, int(os.getenv('DISPATCHER_FINALIZE_THREADS', '2')))
|
|
141
|
+
|
|
142
|
+
# After 20 minutes, check if a submission is still making progress.
|
|
143
|
+
# In the case of a crash somewhere else in the system, we may not have
|
|
144
|
+
# gotten a message we are expecting. This should prompt a retry in most
|
|
145
|
+
# cases.
|
|
146
|
+
SUBMISSION_TOTAL_TIMEOUT = 60 * 20
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class Dispatcher(ThreadedCoreBase):
|
|
150
|
+
# @staticmethod
|
|
151
|
+
# def all_instances(persistent_redis: Redis):
|
|
152
|
+
# return Hash(DISPATCH_DIRECTORY, host=persistent_redis).keys()
|
|
153
|
+
|
|
154
|
+
# @staticmethod
|
|
155
|
+
# def instance_assignment_size(persistent_redis, instance_id):
|
|
156
|
+
# return Hash(DISPATCH_TASK_ASSIGNMENT + instance_id, host=persistent_redis).length()
|
|
157
|
+
|
|
158
|
+
# @staticmethod
|
|
159
|
+
# def instance_assignment(persistent_redis, instance_id) -> list[str]:
|
|
160
|
+
# return Hash(DISPATCH_TASK_ASSIGNMENT + instance_id, host=persistent_redis).keys()
|
|
161
|
+
|
|
162
|
+
# @staticmethod
|
|
163
|
+
# def all_queue_lengths(redis, instance_id):
|
|
164
|
+
# return {
|
|
165
|
+
# 'start': NamedQueue(DISPATCH_START_EVENTS + instance_id, host=redis).length(),
|
|
166
|
+
# 'result': NamedQueue(DISPATCH_RESULT_QUEUE + instance_id, host=redis).length(),
|
|
167
|
+
# 'command': NamedQueue(DISPATCH_COMMAND_QUEUE + instance_id, host=redis).length()
|
|
168
|
+
# }
|
|
169
|
+
|
|
170
|
+
def __init__(self, datastore=None, redis=None, redis_persist=None, logger=None,
|
|
171
|
+
config=None, counter_name: str = 'dispatcher'):
|
|
172
|
+
super().__init__('assemblyline.dispatcher', config=config, datastore=datastore,
|
|
173
|
+
redis=redis, redis_persist=redis_persist, logger=logger)
|
|
174
|
+
|
|
175
|
+
# Load the datastore collections that we are going to be using
|
|
176
|
+
self.instance_id = uuid.uuid4().hex
|
|
177
|
+
self.tasks: dict[str, SubmissionTask] = {}
|
|
178
|
+
self.finalizing = threading.Event()
|
|
179
|
+
self.finalizing_start = 0.0
|
|
180
|
+
|
|
181
|
+
# Build some utility classes
|
|
182
|
+
self.scheduler = Scheduler(self.datastore, self.config, self.redis)
|
|
183
|
+
self.running_tasks: Hash[dict] = Hash(DISPATCH_RUNNING_TASK_HASH, host=self.redis)
|
|
184
|
+
self.scaler_timeout_queue = NamedQueue(SCALER_TIMEOUT_QUEUE, host=self.redis_persist)
|
|
185
|
+
|
|
186
|
+
self.classification_engine = get_classification()
|
|
187
|
+
|
|
188
|
+
# Output. Duplicate our input traffic into this queue so it may be cloned by other systems
|
|
189
|
+
self.traffic_queue = CommsQueue('submissions', self.redis)
|
|
190
|
+
self.quota_tracker = UserQuotaTracker('submissions', timeout=60 * 60, host=self.redis_persist)
|
|
191
|
+
self.submission_queue = NamedQueue(SUBMISSION_QUEUE, self.redis)
|
|
192
|
+
|
|
193
|
+
# Table to track the running dispatchers
|
|
194
|
+
self.dispatchers_directory: Hash[int] = Hash(DISPATCH_DIRECTORY, host=self.redis_persist)
|
|
195
|
+
self.dispatchers_directory_finalize: Hash[int] = Hash(DISPATCH_DIRECTORY_FINALIZE, host=self.redis_persist)
|
|
196
|
+
self.running_dispatchers_estimate = 1
|
|
197
|
+
|
|
198
|
+
# Tables to track what submissions are running where
|
|
199
|
+
self.active_submissions = Hash(DISPATCH_TASK_ASSIGNMENT+self.instance_id, host=self.redis_persist)
|
|
200
|
+
self.submissions_assignments = Hash(DISPATCH_TASK_HASH, host=self.redis_persist)
|
|
201
|
+
self.ingester_scanning = Hash('m-scanning-table', self.redis_persist)
|
|
202
|
+
|
|
203
|
+
# Communications queues
|
|
204
|
+
self.start_queue: NamedQueue[tuple[str, str, str, str]] =\
|
|
205
|
+
NamedQueue(DISPATCH_START_EVENTS+self.instance_id, host=self.redis, ttl=QUEUE_EXPIRY)
|
|
206
|
+
self.result_queue: NamedQueue[dict] =\
|
|
207
|
+
NamedQueue(DISPATCH_RESULT_QUEUE+self.instance_id, host=self.redis, ttl=QUEUE_EXPIRY)
|
|
208
|
+
self.command_queue: NamedQueue[dict] =\
|
|
209
|
+
NamedQueue(DISPATCH_COMMAND_QUEUE+self.instance_id, host=self.redis, ttl=QUEUE_EXPIRY)
|
|
210
|
+
|
|
211
|
+
# Publish counters to the metrics sink.
|
|
212
|
+
self.counter = MetricsFactory(metrics_type='dispatcher', schema=Metrics, name=counter_name,
|
|
213
|
+
redis=self.redis, config=self.config)
|
|
214
|
+
|
|
215
|
+
self.apm_client = None
|
|
216
|
+
if self.config.core.metrics.apm_server.server_url:
|
|
217
|
+
elasticapm.instrument()
|
|
218
|
+
self.apm_client = get_apm_client("dispatcher")
|
|
219
|
+
|
|
220
|
+
self._service_timeouts: TimeoutTable[tuple[str, str, str], str] = TimeoutTable()
|
|
221
|
+
self._submission_timeouts: TimeoutTable[str, None] = TimeoutTable()
|
|
222
|
+
|
|
223
|
+
# Setup queues for work to be divided into
|
|
224
|
+
self.process_queues: list[PriorityQueue[DispatchAction]] = [PriorityQueue() for _ in range(RESULT_THREADS)]
|
|
225
|
+
self.queue_ready_signals: list[threading.Semaphore] = [threading.Semaphore(MAX_RESULT_BUFFER)
|
|
226
|
+
for _ in range(RESULT_THREADS)]
|
|
227
|
+
|
|
228
|
+
# Queue of finished submissions/errors waiting to be saved into elastic
|
|
229
|
+
self.finalize_queue = Queue()
|
|
230
|
+
self.error_queue: Queue[tuple[str, Error]] = Queue()
|
|
231
|
+
|
|
232
|
+
# Queue to hold of service timeouts that need to be processed
|
|
233
|
+
# They will be held in this queue until results in redis are
|
|
234
|
+
# already processed
|
|
235
|
+
self.timeout_queue: Queue[DispatchAction] = Queue()
|
|
236
|
+
|
|
237
|
+
# Utility object to handle post-processing actions
|
|
238
|
+
self.postprocess_worker = ActionWorker(cache=False, config=self.config, datastore=self.datastore,
|
|
239
|
+
redis_persist=self.redis_persist)
|
|
240
|
+
|
|
241
|
+
# Update bad sid list
|
|
242
|
+
self.redis_bad_sids = Set(BAD_SID_HASH, host=self.redis_persist)
|
|
243
|
+
self.bad_sids: set[str] = set(self.redis_bad_sids.members())
|
|
244
|
+
|
|
245
|
+
# Event Watchers
|
|
246
|
+
self.service_change_watcher = EventWatcher(self.redis, deserializer=ServiceChange.deserialize)
|
|
247
|
+
self.service_change_watcher.register('changes.services.*', self._handle_service_change_event)
|
|
248
|
+
|
|
249
|
+
def stop(self):
|
|
250
|
+
super().stop()
|
|
251
|
+
self.service_change_watcher.stop()
|
|
252
|
+
self.postprocess_worker.stop()
|
|
253
|
+
|
|
254
|
+
def try_run(self):
|
|
255
|
+
self.log.info(f'Using dispatcher id {self.instance_id}')
|
|
256
|
+
self.service_change_watcher.start()
|
|
257
|
+
threads = {
|
|
258
|
+
# Process to protect against old dead tasks timing out
|
|
259
|
+
'Global Timeout Backstop': self.timeout_backstop,
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
for ii in range(RESULT_THREADS):
|
|
263
|
+
# Process results
|
|
264
|
+
threads[f'Service Update Worker #{ii}'] = self.service_worker_factory(ii)
|
|
265
|
+
|
|
266
|
+
self.maintain_threads(threads)
|
|
267
|
+
|
|
268
|
+
# If the dispatcher is exiting cleanly remove as many tasks from the service queues as we can
|
|
269
|
+
service_queues = {}
|
|
270
|
+
for task in self.tasks.values():
|
|
271
|
+
for (_sha256, service_name), dispatch_key in task.queue_keys.items():
|
|
272
|
+
try:
|
|
273
|
+
s_queue = service_queues[service_name]
|
|
274
|
+
except KeyError:
|
|
275
|
+
s_queue = get_service_queue(service_name, self.redis)
|
|
276
|
+
service_queues[service_name] = s_queue
|
|
277
|
+
s_queue.remove(dispatch_key)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def timeout_backstop(self):
|
|
281
|
+
while self.running:
|
|
282
|
+
cpu_mark = time.process_time()
|
|
283
|
+
time_mark = time.time()
|
|
284
|
+
|
|
285
|
+
# Start of process dispatcher transaction
|
|
286
|
+
with apm_span(self.apm_client, 'timeout_backstop'):
|
|
287
|
+
dispatcher_instances = set(Dispatcher.all_instances(persistent_redis=self.redis_persist))
|
|
288
|
+
error_tasks = []
|
|
289
|
+
|
|
290
|
+
# iterate running tasks
|
|
291
|
+
for _task_key, task_body in self.running_tasks:
|
|
292
|
+
task = ServiceTask(task_body)
|
|
293
|
+
# Its a bad task if it's dispatcher isn't running
|
|
294
|
+
if task.metadata['dispatcher__'] not in dispatcher_instances:
|
|
295
|
+
error_tasks.append(task)
|
|
296
|
+
# Its a bad task if its OUR task, but we aren't tracking that submission anymore
|
|
297
|
+
if task.metadata['dispatcher__'] == self.instance_id and task.sid not in self.tasks:
|
|
298
|
+
error_tasks.append(task)
|
|
299
|
+
|
|
300
|
+
# Refresh our dispatcher list.
|
|
301
|
+
dispatcher_instances = set(Dispatcher.all_instances(persistent_redis=self.redis_persist))
|
|
302
|
+
other_dispatcher_instances = dispatcher_instances - {self.instance_id}
|
|
303
|
+
|
|
304
|
+
# The remaining running tasks (probably) belong to dead dispatchers and can be killed
|
|
305
|
+
for task in error_tasks:
|
|
306
|
+
# Check against our refreshed dispatcher list in case it changed during the previous scan
|
|
307
|
+
if task.metadata['dispatcher__'] in other_dispatcher_instances:
|
|
308
|
+
continue
|
|
309
|
+
|
|
310
|
+
# If its already been handled, we don't need to
|
|
311
|
+
if not self.running_tasks.pop(task.key()):
|
|
312
|
+
continue
|
|
313
|
+
|
|
314
|
+
# Kill the task that would report to a dead dispatcher
|
|
315
|
+
self.log.warning(f"[{task.sid}]Task killed by backstop {task.service_name} {task.fileinfo.sha256}")
|
|
316
|
+
self.scaler_timeout_queue.push({
|
|
317
|
+
'service': task.service_name,
|
|
318
|
+
'container': task.metadata['worker__']
|
|
319
|
+
})
|
|
320
|
+
|
|
321
|
+
# Report to the metrics system that a recoverable error has occurred for that service
|
|
322
|
+
export_metrics_once(task.service_name, ServiceMetrics, dict(fail_recoverable=1),
|
|
323
|
+
host=task.metadata['worker__'], counter_type='service', redis=self.redis)
|
|
324
|
+
|
|
325
|
+
self.counter.increment_execution_time('cpu_seconds', time.process_time() - cpu_mark)
|
|
326
|
+
self.counter.increment_execution_time('busy_seconds', time.time() - time_mark)
|
|
327
|
+
self.sleep(GLOBAL_TASK_CHECK_INTERVAL)
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
Ingester
|
|
4
|
+
|
|
5
|
+
Ingester is responsible for monitoring for incoming submission requests,
|
|
6
|
+
sending submissions, waiting for submissions to complete, sending a message
|
|
7
|
+
to a notification queue as specified by the submission and, based on the
|
|
8
|
+
score received, possibly sending a message to indicate that an alert should
|
|
9
|
+
be created.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
import threading
|
|
14
|
+
import time
|
|
15
|
+
from os import environ
|
|
16
|
+
from random import random
|
|
17
|
+
from typing import Any, Iterable, List, Optional, Tuple
|
|
18
|
+
|
|
19
|
+
import elasticapm
|
|
20
|
+
|
|
21
|
+
from assemblyline import odm
|
|
22
|
+
from assemblyline.common import exceptions, forge, isotime
|
|
23
|
+
from assemblyline.common.constants import DROP_PRIORITY
|
|
24
|
+
from assemblyline.common.exceptions import get_stacktrace_info
|
|
25
|
+
from assemblyline.common.importing import load_module_by_path
|
|
26
|
+
from assemblyline.common.isotime import now, now_as_iso
|
|
27
|
+
from assemblyline.common.metrics import MetricsFactory
|
|
28
|
+
from assemblyline.common.postprocess import ActionWorker
|
|
29
|
+
from assemblyline.common.str_utils import dotdump, safe_str
|
|
30
|
+
from assemblyline.datastore.exceptions import DataStoreException
|
|
31
|
+
from assemblyline.filestore import CorruptedFileStoreException, FileStoreException
|
|
32
|
+
from assemblyline.odm.messages.ingest_heartbeat import Metrics
|
|
33
|
+
from assemblyline.odm.messages.submission import Submission as MessageSubmission
|
|
34
|
+
from assemblyline.odm.messages.submission import SubmissionMessage
|
|
35
|
+
from assemblyline.odm.models.alert import EXTENDED_SCAN_VALUES
|
|
36
|
+
from assemblyline.odm.models.filescore import FileScore
|
|
37
|
+
from assemblyline.odm.models.submission import Submission as DatabaseSubmission
|
|
38
|
+
from assemblyline.odm.models.submission import SubmissionParams
|
|
39
|
+
from assemblyline.odm.models.user import User
|
|
40
|
+
from assemblyline.remote.datatypes.events import EventWatcher
|
|
41
|
+
from assemblyline.remote.datatypes.hash import Hash
|
|
42
|
+
from assemblyline.remote.datatypes.queues.comms import CommsQueue
|
|
43
|
+
from assemblyline.remote.datatypes.queues.multi import MultiQueue
|
|
44
|
+
from assemblyline.remote.datatypes.queues.named import NamedQueue
|
|
45
|
+
from assemblyline.remote.datatypes.queues.priority import PriorityQueue
|
|
46
|
+
from assemblyline.remote.datatypes.user_quota_tracker import UserQuotaTracker
|
|
47
|
+
from assemblyline_core.dispatching.dispatcher import Dispatcher
|
|
48
|
+
from assemblyline_core.server_base import ThreadedCoreBase
|
|
49
|
+
from assemblyline_core.submission_client import SubmissionClient
|
|
50
|
+
|
|
51
|
+
from .constants import COMPLETE_QUEUE_NAME, INGEST_QUEUE_NAME, drop_chance
|
|
52
|
+
|
|
53
|
+
_dup_prefix = 'w-m-'
|
|
54
|
+
_notification_queue_prefix = 'nq-'
|
|
55
|
+
_max_retries = 10
|
|
56
|
+
_retry_delay = 60 * 4 # Wait 4 minutes to retry
|
|
57
|
+
_max_time = 2 * 24 * 60 * 60 # Wait 2 days for responses.
|
|
58
|
+
HOUR_IN_SECONDS = 60 * 60
|
|
59
|
+
COMPLETE_THREADS = int(environ.get('INGESTER_COMPLETE_THREADS', 4))
|
|
60
|
+
INGEST_THREADS = int(environ.get('INGESTER_INGEST_THREADS', 1))
|
|
61
|
+
SUBMIT_THREADS = int(environ.get('INGESTER_SUBMIT_THREADS', 4))
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def must_drop(length: int, maximum: int) -> bool:
|
|
65
|
+
"""
|
|
66
|
+
To calculate the probability of dropping an incoming submission we compare
|
|
67
|
+
the number returned by random() which will be in the range [0,1) and the
|
|
68
|
+
number returned by tanh() which will be in the range (-1,1).
|
|
69
|
+
|
|
70
|
+
If length is less than maximum the number returned by tanh will be negative
|
|
71
|
+
and so drop will always return False since the value returned by random()
|
|
72
|
+
cannot be less than 0.
|
|
73
|
+
|
|
74
|
+
If length is greater than maximum, drop will return False with a probability
|
|
75
|
+
that increases as the distance between maximum and length increases:
|
|
76
|
+
|
|
77
|
+
Length Chance of Dropping
|
|
78
|
+
|
|
79
|
+
<= maximum 0
|
|
80
|
+
1.5 * maximum 0.76
|
|
81
|
+
2 * maximum 0.96
|
|
82
|
+
3 * maximum 0.999
|
|
83
|
+
"""
|
|
84
|
+
return random() < drop_chance(length, maximum)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@odm.model()
|
|
88
|
+
class IngestTask(odm.Model):
|
|
89
|
+
# Submission Parameters
|
|
90
|
+
submission: MessageSubmission = odm.compound(MessageSubmission)
|
|
91
|
+
|
|
92
|
+
# Shortcut for properties of the submission
|
|
93
|
+
@property
|
|
94
|
+
def file_size(self) -> int:
|
|
95
|
+
return sum(file.size for file in self.submission.files)
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def params(self) -> SubmissionParams:
|
|
99
|
+
return self.submission.params
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def sha256(self) -> str:
|
|
103
|
+
return self.submission.files[0].sha256
|
|
104
|
+
|
|
105
|
+
# Information about the ingestion itself, parameters irrelevant
|
|
106
|
+
retries = odm.Integer(default=0)
|
|
107
|
+
|
|
108
|
+
# Fields added after a submission is complete for notification/bookkeeping processes
|
|
109
|
+
failure = odm.Text(default='') # If the ingestion has failed for some reason, what is it?
|
|
110
|
+
score = odm.Optional(odm.Integer()) # Score from previous processing of this file
|
|
111
|
+
extended_scan = odm.Enum(EXTENDED_SCAN_VALUES, default="skipped") # Status of the extended scan
|
|
112
|
+
ingest_id = odm.UUID() # Ingestion Identifier
|
|
113
|
+
ingest_time = odm.Date(default="NOW") # Time at which the file was ingested
|
|
114
|
+
notify_time = odm.Optional(odm.Date()) # Time at which the user is notify the submission is finished
|
|
115
|
+
to_ingest = odm.Boolean(default=False)
|
|
116
|
+
|
|
@@ -6,6 +6,7 @@ from assemblyline.common import forge, metrics
|
|
|
6
6
|
from assemblyline.common.archiving import ARCHIVE_QUEUE_NAME
|
|
7
7
|
from assemblyline.common.constants import DISPATCH_TASK_HASH, SUBMISSION_QUEUE, \
|
|
8
8
|
SERVICE_STATE_HASH, ServiceStatus
|
|
9
|
+
from assemblyline.common.dispatcher import Dispatcher
|
|
9
10
|
from assemblyline.datastore.exceptions import SearchException
|
|
10
11
|
from assemblyline.odm.messages.retrohunt_heartbeat import RetrohuntMessage
|
|
11
12
|
from assemblyline.odm.messages.scaler_heartbeat import ScalerMessage
|
|
@@ -25,7 +26,6 @@ from assemblyline.remote.datatypes.queues.named import NamedQueue
|
|
|
25
26
|
from assemblyline.remote.datatypes.queues.priority import PriorityQueue
|
|
26
27
|
|
|
27
28
|
from assemblyline_core.alerter.run_alerter import ALERT_QUEUE_NAME, ALERT_RETRY_QUEUE_NAME
|
|
28
|
-
from assemblyline_core.dispatching.dispatcher import Dispatcher
|
|
29
29
|
from assemblyline_core.ingester import INGEST_QUEUE_NAME, drop_chance
|
|
30
30
|
from assemblyline_core.ingester.constants import COMPLETE_QUEUE_NAME
|
|
31
31
|
|
{assemblyline_core-4.5.1.dev426 → assemblyline_core-4.7.0.dev45}/assemblyline_core/replay/client.py
RENAMED
|
@@ -4,13 +4,13 @@ import time
|
|
|
4
4
|
|
|
5
5
|
from assemblyline.common import forge
|
|
6
6
|
from assemblyline.common.bundling import create_bundle, import_bundle
|
|
7
|
+
from assemblyline.common.classification import InvalidClassification
|
|
7
8
|
from assemblyline.odm import Model
|
|
8
|
-
from assemblyline.remote.datatypes.queues.named import NamedQueue
|
|
9
9
|
from assemblyline.remote.datatypes.hash import Hash
|
|
10
|
-
from
|
|
10
|
+
from assemblyline.remote.datatypes.queues.named import NamedQueue
|
|
11
11
|
from assemblyline_core.badlist_client import BadlistClient
|
|
12
|
+
from assemblyline_core.replay.replay import INPUT_TYPES
|
|
12
13
|
from assemblyline_core.safelist_client import SafelistClient
|
|
13
|
-
from assemblyline_core.signature_client import SignatureClient
|
|
14
14
|
|
|
15
15
|
EMPTY_WAIT_TIME = int(os.environ.get('EMPTY_WAIT_TIME', '30'))
|
|
16
16
|
REPLAY_REQUESTED = 'requested'
|
|
@@ -296,13 +296,17 @@ class APIClient(ClientBase):
|
|
|
296
296
|
def create_al_bundle(self, id, bundle_path, use_alert=False):
|
|
297
297
|
self.al_client.bundle.create(id, output=bundle_path, use_alert=use_alert)
|
|
298
298
|
|
|
299
|
-
def load_bundle(self, bundle_path, min_classification, rescan_services, exist_ok=True):
|
|
300
|
-
self.al_client.bundle.import_bundle(
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
299
|
+
def load_bundle(self, bundle_path, min_classification, rescan_services, exist_ok=True, reclassification=None):
|
|
300
|
+
self.al_client.bundle.import_bundle(
|
|
301
|
+
bundle_path,
|
|
302
|
+
min_classification=min_classification,
|
|
303
|
+
rescan_services=rescan_services,
|
|
304
|
+
exist_ok=exist_ok,
|
|
305
|
+
reclassification=reclassification,
|
|
306
|
+
to_ingest=True, # send submissions to ingester
|
|
307
|
+
)
|
|
304
308
|
|
|
305
|
-
def load_json(self, file_path):
|
|
309
|
+
def load_json(self, file_path, reclassification=None):
|
|
306
310
|
from assemblyline_client import ClientError
|
|
307
311
|
|
|
308
312
|
# We're assuming all JSON that loaded has an "enabled" field
|
|
@@ -374,6 +378,7 @@ class DirectClient(ClientBase):
|
|
|
374
378
|
# Initialize connection to redis-persistent for checkpointing
|
|
375
379
|
redis_persist = get_client(config.core.redis.persistent.host,
|
|
376
380
|
config.core.redis.persistent.port, False)
|
|
381
|
+
self.classification = forge.get_classification()
|
|
377
382
|
self.datastore = forge.get_datastore(config=config)
|
|
378
383
|
self.queues = {
|
|
379
384
|
queue_type: NamedQueue(f"replay_{queue_type}", host=redis)
|
|
@@ -409,13 +414,17 @@ class DirectClient(ClientBase):
|
|
|
409
414
|
temp_bundle_file = create_bundle(id, working_dir=os.path.dirname(bundle_path), use_alert=use_alert)
|
|
410
415
|
os.rename(temp_bundle_file, bundle_path)
|
|
411
416
|
|
|
412
|
-
def load_bundle(self, bundle_path, min_classification, rescan_services, exist_ok=True):
|
|
413
|
-
import_bundle(
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
+
def load_bundle(self, bundle_path, min_classification, rescan_services, exist_ok=True, reclassification=None):
|
|
418
|
+
import_bundle(
|
|
419
|
+
bundle_path,
|
|
420
|
+
min_classification=min_classification,
|
|
421
|
+
rescan_services=rescan_services,
|
|
422
|
+
exist_ok=exist_ok,
|
|
423
|
+
reclassification=reclassification,
|
|
424
|
+
to_ingest=True, # send submissions to ingester
|
|
425
|
+
)
|
|
417
426
|
|
|
418
|
-
def load_json(self, file_path):
|
|
427
|
+
def load_json(self, file_path, reclassification=None):
|
|
419
428
|
# We're assuming all JSON that loaded has an "enabled" field
|
|
420
429
|
collection = os.path.basename(file_path).split('_', 1)[0]
|
|
421
430
|
with open(file_path) as fp:
|
|
@@ -428,6 +437,16 @@ class DirectClient(ClientBase):
|
|
|
428
437
|
|
|
429
438
|
# Let's see if there's an existing document with the same ID in the collection
|
|
430
439
|
obj = es_collection.get_if_exists(id, as_obj=False)
|
|
440
|
+
if obj:
|
|
441
|
+
# Check if the classification of the object is compatible with the system's classification
|
|
442
|
+
try:
|
|
443
|
+
self.classification.normalize_classification(obj['classification'])
|
|
444
|
+
except InvalidClassification:
|
|
445
|
+
if reclassification:
|
|
446
|
+
# If reclassification is requested, then we can change the classification
|
|
447
|
+
obj['classification'] = reclassification
|
|
448
|
+
else:
|
|
449
|
+
raise
|
|
431
450
|
|
|
432
451
|
if collection == "workflow":
|
|
433
452
|
# If there has been any edits by another user, then preserve the enabled state
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import shutil
|
|
2
1
|
import os
|
|
2
|
+
import shutil
|
|
3
3
|
|
|
4
4
|
from cart import unpack_file
|
|
5
5
|
|
|
@@ -32,15 +32,16 @@ class ReplayLoaderWorker(ReplayBase):
|
|
|
32
32
|
if file_path.endswith(".al_bundle"):
|
|
33
33
|
self.client.load_bundle(file_path,
|
|
34
34
|
min_classification=self.replay_config.loader.min_classification,
|
|
35
|
-
rescan_services=self.replay_config.loader.rescan
|
|
35
|
+
rescan_services=self.replay_config.loader.rescan,
|
|
36
|
+
reclassification=self.replay_config.loader.reclassification)
|
|
36
37
|
elif file_path.endswith(".al_json"):
|
|
37
|
-
self.client.load_json(file_path)
|
|
38
|
+
self.client.load_json(file_path, reclassification=self.replay_config.loader.reclassification)
|
|
38
39
|
|
|
39
40
|
elif file_path.endswith(".al_json.cart"):
|
|
40
41
|
cart_path = file_path
|
|
41
42
|
file_path = file_path[:-5]
|
|
42
43
|
unpack_file(cart_path, file_path)
|
|
43
|
-
self.client.load_json(file_path)
|
|
44
|
+
self.client.load_json(file_path, reclassification=self.replay_config.loader.reclassification)
|
|
44
45
|
os.unlink(cart_path)
|
|
45
46
|
|
|
46
47
|
if os.path.exists(file_path):
|
|
@@ -55,11 +56,11 @@ class ReplayLoaderWorker(ReplayBase):
|
|
|
55
56
|
# Terminate on NFS-related error
|
|
56
57
|
self.log.warning("'Invalid cross-device link' exception detected. Terminating..")
|
|
57
58
|
self.stop()
|
|
58
|
-
except Exception:
|
|
59
|
+
except Exception as e:
|
|
59
60
|
# Make sure failed directory exists
|
|
60
61
|
os.makedirs(self.replay_config.loader.failed_directory, exist_ok=True)
|
|
61
62
|
|
|
62
|
-
self.log.error(f"Failed to load the bundle file {file_path}, moving it to the failed directory.")
|
|
63
|
+
self.log.error(f"Failed to load the bundle file {file_path}, moving it to the failed directory. Reason: {e}")
|
|
63
64
|
failed_path = os.path.join(self.replay_config.loader.failed_directory, os.path.basename(file_path))
|
|
64
65
|
shutil.move(file_path, failed_path)
|
|
65
66
|
|