assemblyline-core 4.4.2.dev6__tar.gz → 4.4.2.dev10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of assemblyline-core might be problematic. Click here for more details.
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/PKG-INFO +1 -1
- assemblyline-core-4.4.2.dev10/assemblyline_core/VERSION +1 -0
- assemblyline-core-4.4.2.dev10/assemblyline_core/badlist_client.py +134 -0
- assemblyline-core-4.4.2.dev10/assemblyline_core/replay/client.py +447 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/replay/creator/run.py +13 -1
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/replay/creator/run_worker.py +75 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/replay/loader/run.py +1 -1
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/replay/loader/run_worker.py +7 -3
- assemblyline-core-4.4.2.dev10/assemblyline_core/safelist_client.py +136 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/scaler/controllers/kubernetes_ctl.py +25 -21
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core.egg-info/PKG-INFO +1 -1
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/test/test_replay.py +105 -18
- assemblyline-core-4.4.2.dev6/assemblyline_core/VERSION +0 -1
- assemblyline-core-4.4.2.dev6/assemblyline_core/badlist_client.py +0 -47
- assemblyline-core-4.4.2.dev6/assemblyline_core/replay/client.py +0 -315
- assemblyline-core-4.4.2.dev6/assemblyline_core/safelist_client.py +0 -60
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/LICENCE.md +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/README.md +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/__init__.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/alerter/__init__.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/alerter/processing.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/alerter/run_alerter.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/archiver/__init__.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/archiver/run_archiver.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/dispatching/__init__.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/dispatching/__main__.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/dispatching/client.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/dispatching/dispatcher.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/dispatching/schedules.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/dispatching/timeout.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/expiry/__init__.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/expiry/run_expiry.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/ingester/__init__.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/ingester/__main__.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/ingester/constants.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/ingester/ingester.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/metrics/__init__.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/metrics/es_metrics.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/metrics/heartbeat_formatter.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/metrics/helper.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/metrics/metrics_server.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/metrics/run_heartbeat_manager.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/metrics/run_metrics_aggregator.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/metrics/run_statistics_aggregator.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/plumber/__init__.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/plumber/run_plumber.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/replay/__init__.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/replay/creator/__init__.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/replay/loader/__init__.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/replay/replay.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/scaler/__init__.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/scaler/collection.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/scaler/controllers/__init__.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/scaler/controllers/docker_ctl.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/scaler/controllers/interface.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/scaler/run_scaler.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/scaler/scaler_server.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/server_base.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/submission_client.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/tasking_client.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/updater/__init__.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/updater/helper.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/updater/run_updater.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/vacuum/__init__.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/vacuum/crawler.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/vacuum/department_map.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/vacuum/safelist.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/vacuum/stream_map.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/vacuum/worker.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/workflow/__init__.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core/workflow/run_workflow.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core.egg-info/SOURCES.txt +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core.egg-info/dependency_links.txt +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core.egg-info/requires.txt +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/assemblyline_core.egg-info/top_level.txt +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/setup.cfg +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/setup.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/test/test_alerter.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/test/test_dispatcher.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/test/test_expiry.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/test/test_plumber.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/test/test_scaler.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/test/test_scheduler.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/test/test_simulation.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/test/test_vacuum.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/test/test_worker_ingest.py +0 -0
- {assemblyline-core-4.4.2.dev6 → assemblyline-core-4.4.2.dev10}/test/test_worker_submit.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
4.4.2.dev10
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
from assemblyline.common import forge
|
|
5
|
+
from assemblyline.common.chunk import chunk
|
|
6
|
+
from assemblyline.common.isotime import now_as_iso
|
|
7
|
+
from assemblyline.datastore.helper import AssemblylineDatastore
|
|
8
|
+
|
|
9
|
+
CHUNK_SIZE = 1000
|
|
10
|
+
CLASSIFICATION = forge.get_classification()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class InvalidBadhash(Exception):
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class BadlistClient:
|
|
18
|
+
"""A helper class to simplify badlisting for privileged services and service-server."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, datastore: AssemblylineDatastore = None, config=None):
|
|
21
|
+
self.log = logging.getLogger('assemblyline.badlist_client')
|
|
22
|
+
self.config = config or forge.CachedObject(forge.get_config)
|
|
23
|
+
self.datastore = datastore or forge.get_datastore(self.config)
|
|
24
|
+
|
|
25
|
+
# Badlist
|
|
26
|
+
def exists(self, qhash):
|
|
27
|
+
return self.datastore.badlist.get_if_exists(qhash, as_obj=False)
|
|
28
|
+
|
|
29
|
+
def exists_tags(self, tag_map):
|
|
30
|
+
lookup_keys = []
|
|
31
|
+
for tag_type, tag_values in tag_map.items():
|
|
32
|
+
for tag_value in tag_values:
|
|
33
|
+
lookup_keys.append(hashlib.sha256(f"{tag_type}: {tag_value}".encode('utf8')).hexdigest())
|
|
34
|
+
|
|
35
|
+
# Elasticsearch's result window can't be more than 10000 rows
|
|
36
|
+
# we will query for matches in chunks
|
|
37
|
+
results = []
|
|
38
|
+
for key_chunk in chunk(lookup_keys, CHUNK_SIZE):
|
|
39
|
+
results += self.datastore.badlist.search("*", fl="*", rows=CHUNK_SIZE,
|
|
40
|
+
as_obj=False, key_space=key_chunk)['items']
|
|
41
|
+
|
|
42
|
+
return results
|
|
43
|
+
|
|
44
|
+
def find_similar_tlsh(self, tlsh):
|
|
45
|
+
return self.datastore.badlist.search(f"hashes.tlsh:{tlsh}", fl="*", as_obj=False)['items']
|
|
46
|
+
|
|
47
|
+
def find_similar_ssdeep(self, ssdeep):
|
|
48
|
+
try:
|
|
49
|
+
_, long, _ = ssdeep.replace('/', '\\/').split(":")
|
|
50
|
+
return self.datastore.badlist.search(f"hashes.ssdeep:{long}~", fl="*", as_obj=False)['items']
|
|
51
|
+
except ValueError:
|
|
52
|
+
self.log.warning(f'This is not a valid SSDeep hash: {ssdeep}')
|
|
53
|
+
return []
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def _merge_hashes(new, old):
|
|
57
|
+
# Account for the possibility of merging with null types
|
|
58
|
+
if not (new or old):
|
|
59
|
+
# Both are null
|
|
60
|
+
raise ValueError("New and old are both null")
|
|
61
|
+
elif not (new and old):
|
|
62
|
+
# Only one is null, in which case return the other
|
|
63
|
+
return new or old
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
# Check if hash types match
|
|
67
|
+
if new['type'] != old['type']:
|
|
68
|
+
raise InvalidBadhash(f"Bad hash type mismatch: {new['type']} != {old['type']}")
|
|
69
|
+
|
|
70
|
+
# Use the new classification but we will recompute it later anyway
|
|
71
|
+
old['classification'] = new['classification']
|
|
72
|
+
|
|
73
|
+
# Update updated time
|
|
74
|
+
old['updated'] = new.get('updated', now_as_iso())
|
|
75
|
+
|
|
76
|
+
# Update hashes
|
|
77
|
+
old['hashes'].update({k: v for k, v in new['hashes'].items() if v})
|
|
78
|
+
|
|
79
|
+
# Merge attributions
|
|
80
|
+
if not old['attribution']:
|
|
81
|
+
old['attribution'] = new.get('attribution', None)
|
|
82
|
+
elif new.get('attribution', None):
|
|
83
|
+
for key in ["actor", 'campaign', 'category', 'exploit', 'implant', 'family', 'network']:
|
|
84
|
+
old_value = old['attribution'].get(key, []) or []
|
|
85
|
+
new_value = new['attribution'].get(key, []) or []
|
|
86
|
+
old['attribution'][key] = list(set(old_value + new_value)) or None
|
|
87
|
+
|
|
88
|
+
if old['attribution'] is not None:
|
|
89
|
+
old['attribution'] = {key: value for key, value in old['attribution'].items() if value}
|
|
90
|
+
|
|
91
|
+
# Update type specific info
|
|
92
|
+
if old['type'] == 'file':
|
|
93
|
+
old.setdefault('file', {})
|
|
94
|
+
new_names = new.get('file', {}).pop('name', [])
|
|
95
|
+
if 'name' in old['file']:
|
|
96
|
+
for name in new_names:
|
|
97
|
+
if name not in old['file']['name']:
|
|
98
|
+
old['file']['name'].append(name)
|
|
99
|
+
elif new_names:
|
|
100
|
+
old['file']['name'] = new_names
|
|
101
|
+
old['file'].update({k: v for k, v in new.get('file', {}).items() if v})
|
|
102
|
+
elif old['type'] == 'tag':
|
|
103
|
+
old['tag'] = new['tag']
|
|
104
|
+
|
|
105
|
+
# Merge sources
|
|
106
|
+
src_map = {x['name']: x for x in new['sources']}
|
|
107
|
+
if not src_map:
|
|
108
|
+
raise InvalidBadhash("No valid source found")
|
|
109
|
+
|
|
110
|
+
old_src_map = {x['name']: x for x in old['sources']}
|
|
111
|
+
for name, src in src_map.items():
|
|
112
|
+
if name not in old_src_map:
|
|
113
|
+
old_src_map[name] = src
|
|
114
|
+
else:
|
|
115
|
+
old_src = old_src_map[name]
|
|
116
|
+
if old_src['type'] != src['type']:
|
|
117
|
+
raise InvalidBadhash(f"Source {name} has a type conflict: {old_src['type']} != {src['type']}")
|
|
118
|
+
|
|
119
|
+
for reason in src['reason']:
|
|
120
|
+
if reason not in old_src['reason']:
|
|
121
|
+
old_src['reason'].append(reason)
|
|
122
|
+
old_src['classification'] = src.get('classification', old_src['classification'])
|
|
123
|
+
old['sources'] = list(old_src_map.values())
|
|
124
|
+
|
|
125
|
+
# Calculate the new classification
|
|
126
|
+
for src in old['sources']:
|
|
127
|
+
old['classification'] = CLASSIFICATION.max_classification(
|
|
128
|
+
old['classification'], src.get('classification', None))
|
|
129
|
+
|
|
130
|
+
# Set the expiry
|
|
131
|
+
old['expiry_ts'] = new.get('expiry_ts', None)
|
|
132
|
+
return old
|
|
133
|
+
except Exception as e:
|
|
134
|
+
raise InvalidBadhash(f"Invalid data provided: {str(e)}")
|
|
@@ -0,0 +1,447 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import time
|
|
4
|
+
|
|
5
|
+
from assemblyline.common import forge
|
|
6
|
+
from assemblyline.common.bundling import create_bundle, import_bundle
|
|
7
|
+
from assemblyline.odm import Model
|
|
8
|
+
from assemblyline.remote.datatypes.queues.named import NamedQueue
|
|
9
|
+
from assemblyline.remote.datatypes.hash import Hash
|
|
10
|
+
from assemblyline_core.badlist_client import BadlistClient
|
|
11
|
+
from assemblyline_core.safelist_client import SafelistClient
|
|
12
|
+
|
|
13
|
+
EMPTY_WAIT_TIME = int(os.environ.get('EMPTY_WAIT_TIME', '30'))
|
|
14
|
+
REPLAY_REQUESTED = 'requested'
|
|
15
|
+
REPLAY_PENDING = 'pending'
|
|
16
|
+
REPLAY_DONE = 'done'
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ClientBase(object):
|
|
20
|
+
def __init__(self, log, lookback_time='*',
|
|
21
|
+
alert_fqs=None, badlist_fqs=None, safelist_fqs=None, submission_fqs=None, workflow_fqs=None):
|
|
22
|
+
# Set logger
|
|
23
|
+
self.log = log
|
|
24
|
+
|
|
25
|
+
# Setup timming
|
|
26
|
+
self.last_alert_time = self.last_submission_time = self.lookback_time = lookback_time
|
|
27
|
+
|
|
28
|
+
# Setup filter queries
|
|
29
|
+
self.pending_fq = f'NOT metadata.replay:{REPLAY_PENDING}'
|
|
30
|
+
self.done_fq = f'NOT metadata.replay:{REPLAY_DONE}'
|
|
31
|
+
self.alert_fqs = alert_fqs or []
|
|
32
|
+
self.badlist_fqs = badlist_fqs or []
|
|
33
|
+
self.safelist_fqs = safelist_fqs or []
|
|
34
|
+
self.submission_fqs = submission_fqs or []
|
|
35
|
+
self.workflow_fqs = workflow_fqs or []
|
|
36
|
+
|
|
37
|
+
# Set running flag
|
|
38
|
+
self.running = True
|
|
39
|
+
|
|
40
|
+
def _put_checkpoint(self, *_):
|
|
41
|
+
raise NotImplementedError()
|
|
42
|
+
|
|
43
|
+
def _get_checkpoint(self, *_):
|
|
44
|
+
raise NotImplementedError()
|
|
45
|
+
|
|
46
|
+
def _get_next_object_ids(self, collection, query, filter_queries, fl, sort):
|
|
47
|
+
raise NotImplementedError()
|
|
48
|
+
|
|
49
|
+
def _get_next_alert_ids(self, query, filter_queries):
|
|
50
|
+
return self._get_next_object_ids("alert", query, filter_queries, "alert_id,reporting_ts", "reporting_ts asc")
|
|
51
|
+
|
|
52
|
+
def _get_next_submission_ids(self, query, filter_queries):
|
|
53
|
+
return self._get_next_object_ids("submission", query, filter_queries, "sid,times.completed",
|
|
54
|
+
"times.completed asc")
|
|
55
|
+
|
|
56
|
+
def _set_bulk_object_pending(self, collection, query, filter_queries, max_docs):
|
|
57
|
+
raise NotImplementedError()
|
|
58
|
+
|
|
59
|
+
def _set_bulk_alert_pending(self, query, filter_queries, max_docs):
|
|
60
|
+
self._set_bulk_object_pending("alert", query, filter_queries, max_docs)
|
|
61
|
+
|
|
62
|
+
def _set_bulk_submission_pending(self, query, filter_queries, max_docs):
|
|
63
|
+
self._set_bulk_object_pending("submission", query, filter_queries, max_docs)
|
|
64
|
+
|
|
65
|
+
def _stream_objects(self, collection, query, fl="*", filter_queries=[]):
|
|
66
|
+
raise NotImplementedError()
|
|
67
|
+
|
|
68
|
+
def _stream_alert_ids(self, query):
|
|
69
|
+
return self._stream_objects("alert", query, "alert_id,reporting_ts")
|
|
70
|
+
|
|
71
|
+
def _stream_submission_ids(self, query):
|
|
72
|
+
return self._stream_objects("submission", query, "sid,times.completed")
|
|
73
|
+
|
|
74
|
+
def create_al_bundle(self, id, bundle_path, use_alert=False):
|
|
75
|
+
raise NotImplementedError()
|
|
76
|
+
|
|
77
|
+
def create_alert_bundle(self, alert_id, bundle_path):
|
|
78
|
+
self.create_al_bundle(alert_id, bundle_path, use_alert=True)
|
|
79
|
+
|
|
80
|
+
def create_submission_bundle(self, sid, bundle_path):
|
|
81
|
+
self.create_al_bundle(sid, bundle_path)
|
|
82
|
+
|
|
83
|
+
def load_bundle(self, *_):
|
|
84
|
+
raise NotImplementedError()
|
|
85
|
+
|
|
86
|
+
def load_json(self, *_):
|
|
87
|
+
raise NotImplementedError()
|
|
88
|
+
|
|
89
|
+
def stop(self):
|
|
90
|
+
self.running = False
|
|
91
|
+
|
|
92
|
+
def set_single_object_complete(self, collection, id):
|
|
93
|
+
raise NotImplementedError()
|
|
94
|
+
|
|
95
|
+
def set_single_alert_complete(self, alert_id):
|
|
96
|
+
self.set_single_object_complete("alert", alert_id)
|
|
97
|
+
|
|
98
|
+
def set_single_submission_complete(self, sid):
|
|
99
|
+
self.set_single_object_complete("submission", sid)
|
|
100
|
+
|
|
101
|
+
def setup_alert_input_queue(self, once=False):
|
|
102
|
+
# Bootstrap recovery of pending replayed alerts
|
|
103
|
+
for a in self._stream_alert_ids(f"metadata.replay:{REPLAY_PENDING}"):
|
|
104
|
+
self.log.info(f"Replaying alert: {a['alert_id']}")
|
|
105
|
+
self.put_alert(a)
|
|
106
|
+
|
|
107
|
+
# Create the list of filter queries
|
|
108
|
+
processing_fqs = self.alert_fqs + [self.pending_fq, self.done_fq]
|
|
109
|
+
|
|
110
|
+
# Run
|
|
111
|
+
while self.running:
|
|
112
|
+
# Find alerts
|
|
113
|
+
alert_input_query = f"reporting_ts:{{{self.last_alert_time} TO now]"
|
|
114
|
+
alerts = self._get_next_alert_ids(alert_input_query, processing_fqs)
|
|
115
|
+
|
|
116
|
+
# Set their pending state
|
|
117
|
+
if alerts['items']:
|
|
118
|
+
last_time = alerts['items'][-1]['reporting_ts']
|
|
119
|
+
bulk_query = f"reporting_ts:{{{self.last_alert_time} TO {last_time}]"
|
|
120
|
+
count = len(alerts['items'])
|
|
121
|
+
self._set_bulk_alert_pending(bulk_query, processing_fqs, count)
|
|
122
|
+
self.last_alert_time = last_time
|
|
123
|
+
|
|
124
|
+
# Queue them
|
|
125
|
+
for a in alerts['items']:
|
|
126
|
+
self.log.info(f"Replaying alert: {a['alert_id']}")
|
|
127
|
+
self.put_alert(a)
|
|
128
|
+
|
|
129
|
+
# Wait if nothing found
|
|
130
|
+
if alerts['total'] == 0:
|
|
131
|
+
self.last_alert_time = self.lookback_time
|
|
132
|
+
for _ in range(EMPTY_WAIT_TIME):
|
|
133
|
+
if not self.running:
|
|
134
|
+
break
|
|
135
|
+
time.sleep(1)
|
|
136
|
+
|
|
137
|
+
if once:
|
|
138
|
+
break
|
|
139
|
+
|
|
140
|
+
def setup_submission_input_queue(self, once=False):
|
|
141
|
+
# Bootstrap recovery of pending replayed submission
|
|
142
|
+
for sub in self._stream_submission_ids(f"metadata.replay:{REPLAY_PENDING}"):
|
|
143
|
+
self.log.info(f"Replaying submission: {sub['sid']}")
|
|
144
|
+
self.put_submission(sub)
|
|
145
|
+
|
|
146
|
+
# Create the list of filter queries
|
|
147
|
+
processing_fqs = self.submission_fqs + [self.pending_fq, self.done_fq]
|
|
148
|
+
|
|
149
|
+
# Run
|
|
150
|
+
while self.running:
|
|
151
|
+
# Find submissions
|
|
152
|
+
sub_query = f"times.completed:[{self.last_submission_time} TO now]"
|
|
153
|
+
submissions = self._get_next_submission_ids(sub_query, processing_fqs)
|
|
154
|
+
|
|
155
|
+
# Set their pending state
|
|
156
|
+
if submissions['items']:
|
|
157
|
+
last_time = submissions['items'][-1]['times']['completed']
|
|
158
|
+
bulk_query = f"times.completed:[{self.last_submission_time} TO {last_time}]"
|
|
159
|
+
count = len(submissions['items'])
|
|
160
|
+
self._set_bulk_submission_pending(bulk_query, processing_fqs, count)
|
|
161
|
+
self.last_submission_time = last_time
|
|
162
|
+
|
|
163
|
+
# Queue them
|
|
164
|
+
for sub in submissions['items']:
|
|
165
|
+
self.log.info(f"Replaying submission: {sub['sid']}")
|
|
166
|
+
self.put_submission(sub)
|
|
167
|
+
|
|
168
|
+
# Wait if nothing found
|
|
169
|
+
if submissions['total'] == 0:
|
|
170
|
+
self.last_submission_time = self.lookback_time
|
|
171
|
+
for _ in range(EMPTY_WAIT_TIME):
|
|
172
|
+
if not self.running:
|
|
173
|
+
break
|
|
174
|
+
time.sleep(1)
|
|
175
|
+
|
|
176
|
+
if once:
|
|
177
|
+
break
|
|
178
|
+
|
|
179
|
+
def _setup_checkpoint_based_input_queue(self, collection: str, id_field: str, date_field: str, once=False):
|
|
180
|
+
# At bootstrap, get the last checkpoint
|
|
181
|
+
checkpoint = self._get_checkpoint(collection)
|
|
182
|
+
fqs = getattr(self, f"{collection}_fqs")
|
|
183
|
+
|
|
184
|
+
# Run
|
|
185
|
+
while self.running:
|
|
186
|
+
# Find objects of the collection that haven't been replayed
|
|
187
|
+
for obj in self._stream_objects(
|
|
188
|
+
collection, f"{date_field}:[{checkpoint} TO now]", fl="*,id", filter_queries=fqs):
|
|
189
|
+
self.log.info(f"Replaying {collection}: {obj[id_field]}")
|
|
190
|
+
# Submit name queue to be tasked to worker(s) for replay
|
|
191
|
+
self.put_message(collection, obj)
|
|
192
|
+
# Update checkpoint
|
|
193
|
+
checkpoint = obj[date_field]
|
|
194
|
+
|
|
195
|
+
# Wait if there are no more items to queue at this time
|
|
196
|
+
if self._query(collection, f"{date_field}:[{checkpoint} TO now]", fqs, rows=0)['total'] == 0:
|
|
197
|
+
for _ in range(EMPTY_WAIT_TIME):
|
|
198
|
+
if not self.running:
|
|
199
|
+
break
|
|
200
|
+
time.sleep(1)
|
|
201
|
+
|
|
202
|
+
if once:
|
|
203
|
+
break
|
|
204
|
+
|
|
205
|
+
def setup_workflow_input_queue(self, once=False):
|
|
206
|
+
self._setup_checkpoint_based_input_queue("workflow", "workflow_id", "last_edit", once)
|
|
207
|
+
|
|
208
|
+
def setup_badlist_input_queue(self, once=False):
|
|
209
|
+
self._setup_checkpoint_based_input_queue("badlist", "id", "updated", once)
|
|
210
|
+
|
|
211
|
+
def setup_safelist_input_queue(self, once=False):
|
|
212
|
+
self._setup_checkpoint_based_input_queue("safelist", "id", "updated", once)
|
|
213
|
+
|
|
214
|
+
def _query(self, collection, query, filter_queries=[], rows=None, track_total_hits=False):
|
|
215
|
+
raise NotImplementedError()
|
|
216
|
+
|
|
217
|
+
def query_alerts(self, query="*", track_total_hits=False):
|
|
218
|
+
self._query("alert", query, track_total_hits)
|
|
219
|
+
|
|
220
|
+
def get_next_message(self, message_type):
|
|
221
|
+
raise NotImplementedError()
|
|
222
|
+
|
|
223
|
+
def get_next_alert(self):
|
|
224
|
+
return self.get_next_message("alert")
|
|
225
|
+
|
|
226
|
+
def get_next_badlist(self):
|
|
227
|
+
return self.get_next_message("badlist")
|
|
228
|
+
|
|
229
|
+
def get_next_file(self):
|
|
230
|
+
return self.get_next_message("file")
|
|
231
|
+
|
|
232
|
+
def get_next_safelist(self):
|
|
233
|
+
return self.get_next_message("safelist")
|
|
234
|
+
|
|
235
|
+
def get_next_submission(self):
|
|
236
|
+
return self.get_next_message("submission")
|
|
237
|
+
|
|
238
|
+
def get_next_workflow(self):
|
|
239
|
+
return self.get_next_message("workflow")
|
|
240
|
+
|
|
241
|
+
def put_message(self, message_type, message):
|
|
242
|
+
raise NotImplementedError()
|
|
243
|
+
|
|
244
|
+
def put_alert(self, alert):
|
|
245
|
+
self.put_message("alert", alert)
|
|
246
|
+
|
|
247
|
+
def put_badlist(self, badlist):
|
|
248
|
+
self.put_message("badlist", badlist)
|
|
249
|
+
|
|
250
|
+
def put_file(self, path):
|
|
251
|
+
self.put_message("file", path)
|
|
252
|
+
|
|
253
|
+
def put_safelist(self, safelist):
|
|
254
|
+
self.put_message("safelist", safelist)
|
|
255
|
+
|
|
256
|
+
def put_submission(self, submission):
|
|
257
|
+
self.put_message("submission", submission)
|
|
258
|
+
|
|
259
|
+
def put_workflow(self, workflow):
|
|
260
|
+
self.put_message("workflow", workflow)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
class APIClient(ClientBase):
|
|
264
|
+
def __init__(self, log, host, user, apikey, verify, **kwargs):
|
|
265
|
+
from assemblyline_client import get_client
|
|
266
|
+
|
|
267
|
+
# Setup AL client
|
|
268
|
+
self.al_client = get_client(host, apikey=(user, apikey), verify=verify)
|
|
269
|
+
|
|
270
|
+
super().__init__(log, **kwargs)
|
|
271
|
+
|
|
272
|
+
def _put_checkpoint(self, collection, checkpoint):
|
|
273
|
+
return self.al_client.replay.put_checkpoint(collection, checkpoint)
|
|
274
|
+
|
|
275
|
+
def _get_checkpoint(self, collection):
|
|
276
|
+
return self.al_client.replay.get_checkpoint(collection)
|
|
277
|
+
|
|
278
|
+
def _get_next_object_ids(self, collection, query, filter_queries, fl, sort):
|
|
279
|
+
return getattr(self.al_client.search, collection)(query, fl=fl, sort=sort, rows=100, filters=filter_queries)
|
|
280
|
+
|
|
281
|
+
def _set_bulk_object_pending(self, collection, query, filter_queries, max_docs):
|
|
282
|
+
self.al_client.replay.set_bulk_pending(collection, query, filter_queries, max_docs)
|
|
283
|
+
|
|
284
|
+
def _stream_objects(self, collection, query, fl="*", filter_queries=[]):
|
|
285
|
+
return getattr(self.al_client.search.stream, collection)(query, fl=fl, filters=filter_queries, as_obj=False)
|
|
286
|
+
|
|
287
|
+
def create_al_bundle(self, id, bundle_path, use_alert=False):
|
|
288
|
+
self.al_client.bundle.create(id, output=bundle_path, use_alert=use_alert)
|
|
289
|
+
|
|
290
|
+
def load_bundle(self, bundle_path, min_classification, rescan_services, exist_ok=True):
|
|
291
|
+
self.al_client.bundle.import_bundle(bundle_path,
|
|
292
|
+
min_classification=min_classification,
|
|
293
|
+
rescan_services=rescan_services,
|
|
294
|
+
exist_ok=exist_ok)
|
|
295
|
+
|
|
296
|
+
def load_json(self, file_path):
|
|
297
|
+
from assemblyline_client import ClientError
|
|
298
|
+
|
|
299
|
+
# We're assuming all JSON that loaded has an "enabled" field
|
|
300
|
+
collection = os.path.basename(file_path).split('_', 1)[0]
|
|
301
|
+
with open(file_path) as fp:
|
|
302
|
+
data_blob = json.load(fp)
|
|
303
|
+
|
|
304
|
+
if isinstance(data_blob, list):
|
|
305
|
+
for data in data_blob:
|
|
306
|
+
id = data.pop("id")
|
|
307
|
+
try:
|
|
308
|
+
# Let's see if there's an existing document with the same ID in the collection
|
|
309
|
+
obj = getattr(self.al_client, collection)(id)
|
|
310
|
+
|
|
311
|
+
if collection == "workflow":
|
|
312
|
+
# If there has been any edits by another user, then preserve the enabled state
|
|
313
|
+
# Otherwise, the workflow will be synchronized with the origin system
|
|
314
|
+
if obj['edited_by'] != data['edited_by']:
|
|
315
|
+
data['enabled'] = obj["enabled"]
|
|
316
|
+
|
|
317
|
+
self.al_client.workflow.update(id, data)
|
|
318
|
+
elif collection == "badlist":
|
|
319
|
+
data['enabled'] = obj["enabled"]
|
|
320
|
+
self.al_client.badlist.add_update(data)
|
|
321
|
+
elif collection == "safelist":
|
|
322
|
+
data['enabled'] = obj["enabled"]
|
|
323
|
+
self.al_client.safelist.add_update(data)
|
|
324
|
+
except ClientError as e:
|
|
325
|
+
if e.status_code == 404:
|
|
326
|
+
# The document doesn't exist in the system, therefore create it
|
|
327
|
+
if collection == "workflow":
|
|
328
|
+
self.al_client.workflow.add(data)
|
|
329
|
+
elif collection == "badlist":
|
|
330
|
+
self.al_client.badlist.add_update(data)
|
|
331
|
+
elif collection == "safelist":
|
|
332
|
+
self.al_client.safelist.add_update(data)
|
|
333
|
+
return
|
|
334
|
+
raise
|
|
335
|
+
|
|
336
|
+
def set_single_object_complete(self, collection, id):
|
|
337
|
+
self.al_client.replay.set_complete(collection, id)
|
|
338
|
+
|
|
339
|
+
def _query(self, collection, query, filter_queries=[], rows=None, track_total_hits=False):
|
|
340
|
+
return getattr(self.al_client.search, collection)(
|
|
341
|
+
query=query, filters=filter_queries, rows=rows, track_total_hits=track_total_hits
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
def get_next_message(self, message_type):
|
|
345
|
+
return self.al_client.replay.get_message(message_type)
|
|
346
|
+
|
|
347
|
+
def put_message(self, message_type, message):
|
|
348
|
+
if isinstance(message, Model):
|
|
349
|
+
message = message.as_primitives()
|
|
350
|
+
self.al_client.replay.put_message(message_type, message)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
class DirectClient(ClientBase):
|
|
354
|
+
def __init__(self, log, **kwargs):
|
|
355
|
+
from assemblyline.remote.datatypes import get_client
|
|
356
|
+
|
|
357
|
+
# Setup datastore
|
|
358
|
+
config = forge.get_config()
|
|
359
|
+
redis = get_client(config.core.redis.nonpersistent.host, config.core.redis.nonpersistent.port, False)
|
|
360
|
+
# Initialize connection to redis-persistent for checkpointing
|
|
361
|
+
redis_persist = get_client(config.core.redis.persistent.host,
|
|
362
|
+
config.core.redis.persistent.port, False)
|
|
363
|
+
self.datastore = forge.get_datastore(config=config)
|
|
364
|
+
self.queues = {
|
|
365
|
+
queue_type: NamedQueue(f"replay_{queue_type}", host=redis)
|
|
366
|
+
for queue_type in ['alert', 'file', 'submission', 'safelist', 'badlist', 'workflow']
|
|
367
|
+
}
|
|
368
|
+
self.checkpoint_hash = Hash('replay_checkpoints', redis_persist)
|
|
369
|
+
|
|
370
|
+
super().__init__(log, **kwargs)
|
|
371
|
+
|
|
372
|
+
def _query(self, collection, query, filter_queries=[], rows=None, track_total_hits=False):
|
|
373
|
+
return getattr(self.datastore, collection).search(
|
|
374
|
+
query, filters=filter_queries, rows=rows, track_total_hits=track_total_hits
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
def _put_checkpoint(self, collection, checkpoint):
|
|
378
|
+
self.checkpoint_hash.set(collection, checkpoint)
|
|
379
|
+
|
|
380
|
+
def _get_checkpoint(self, collection) -> str:
|
|
381
|
+
return self.checkpoint_hash.get(collection) or "*"
|
|
382
|
+
|
|
383
|
+
def _get_next_object_ids(self, collection, query, filter_queries, fl, sort):
|
|
384
|
+
return getattr(self.datastore, collection).search(query, fl=fl, sort=sort, rows=100, filters=filter_queries)
|
|
385
|
+
|
|
386
|
+
def _set_bulk_object_pending(self, collection, query, filter_queries, max_docs):
|
|
387
|
+
ds_collection = getattr(self.datastore, collection)
|
|
388
|
+
operations = [(ds_collection.UPDATE_SET, 'metadata.replay', REPLAY_PENDING)]
|
|
389
|
+
ds_collection.update_by_query(query, operations, filters=filter_queries, max_docs=max_docs)
|
|
390
|
+
|
|
391
|
+
def _stream_objects(self, collection, query, fl="*", filter_queries=[]):
|
|
392
|
+
return getattr(self.datastore, collection).stream_search(query, fl=fl, filters=filter_queries, as_obj=False)
|
|
393
|
+
|
|
394
|
+
def create_al_bundle(self, id, bundle_path, use_alert=False):
|
|
395
|
+
temp_bundle_file = create_bundle(id, working_dir=os.path.dirname(bundle_path), use_alert=use_alert)
|
|
396
|
+
os.rename(temp_bundle_file, bundle_path)
|
|
397
|
+
|
|
398
|
+
def load_bundle(self, bundle_path, min_classification, rescan_services, exist_ok=True):
|
|
399
|
+
import_bundle(bundle_path,
|
|
400
|
+
min_classification=min_classification,
|
|
401
|
+
rescan_services=rescan_services,
|
|
402
|
+
exist_ok=exist_ok)
|
|
403
|
+
|
|
404
|
+
def load_json(self, file_path):
|
|
405
|
+
# We're assuming all JSON that loaded has an "enabled" field
|
|
406
|
+
collection = os.path.basename(file_path).split('_', 1)[0]
|
|
407
|
+
with open(file_path) as fp:
|
|
408
|
+
data_blob = json.load(fp)
|
|
409
|
+
|
|
410
|
+
if isinstance(data_blob, list):
|
|
411
|
+
es_collection = getattr(self.datastore, collection)
|
|
412
|
+
for data in data_blob:
|
|
413
|
+
id = data.pop("id")
|
|
414
|
+
|
|
415
|
+
# Let's see if there's an existing document with the same ID in the collection
|
|
416
|
+
obj = es_collection.get_if_exists(id, as_obj=False)
|
|
417
|
+
|
|
418
|
+
if collection == "workflow":
|
|
419
|
+
# If there has been any edits by another user, then preserve the enabled state
|
|
420
|
+
# Otherwise, the workflow will be synchronized with the origin system
|
|
421
|
+
if obj and obj['edited_by'] != data['edited_by']:
|
|
422
|
+
data['enabled'] = obj["enabled"]
|
|
423
|
+
es_collection.save(id, data)
|
|
424
|
+
elif collection == "badlist":
|
|
425
|
+
if obj:
|
|
426
|
+
# Preserve the system's enabled state of the item
|
|
427
|
+
data['enabled'] = obj["enabled"]
|
|
428
|
+
es_collection.save(id, BadlistClient._merge_hashes(data, obj))
|
|
429
|
+
elif collection == "safelist":
|
|
430
|
+
if obj:
|
|
431
|
+
# Preserve the system's enabled state of the item
|
|
432
|
+
data['enabled'] = obj["enabled"]
|
|
433
|
+
es_collection.save(id, SafelistClient._merge_hashes(data, obj))
|
|
434
|
+
es_collection.commit()
|
|
435
|
+
|
|
436
|
+
def set_single_object_complete(self, collection, id):
|
|
437
|
+
ds_collection = getattr(self.datastore, collection)
|
|
438
|
+
operations = [(ds_collection.UPDATE_SET, 'metadata.replay', REPLAY_DONE)]
|
|
439
|
+
ds_collection.update(id, operations)
|
|
440
|
+
|
|
441
|
+
def get_next_message(self, message_type):
|
|
442
|
+
return self.queues[message_type].pop(blocking=True, timeout=30)
|
|
443
|
+
|
|
444
|
+
def put_message(self, message_type, message):
|
|
445
|
+
if isinstance(message, Model):
|
|
446
|
+
message = message.as_primitives()
|
|
447
|
+
self.queues[message_type].push(message)
|
|
@@ -18,7 +18,10 @@ class ReplayCreator(ReplayBase):
|
|
|
18
18
|
# Load client
|
|
19
19
|
client_config = dict(lookback_time=self.replay_config.creator.lookback_time,
|
|
20
20
|
alert_fqs=self.replay_config.creator.alert_input.filter_queries,
|
|
21
|
-
|
|
21
|
+
badlist_fqs=self.replay_config.creator.badlist_input.filter_queries,
|
|
22
|
+
safelist_fqs=self.replay_config.creator.safelist_input.filter_queries,
|
|
23
|
+
submission_fqs=self.replay_config.creator.submission_input.filter_queries,
|
|
24
|
+
workflow_fqs=self.replay_config.creator.workflow_input.filter_queries)
|
|
22
25
|
|
|
23
26
|
if self.replay_config.creator.client.type == 'direct':
|
|
24
27
|
self.log.info("Using direct database access client")
|
|
@@ -36,9 +39,18 @@ class ReplayCreator(ReplayBase):
|
|
|
36
39
|
if self.replay_config.creator.alert_input.enabled:
|
|
37
40
|
threads['Load Alerts'] = self.client.setup_alert_input_queue
|
|
38
41
|
|
|
42
|
+
if self.replay_config.creator.badlist_input.enabled:
|
|
43
|
+
threads['Load Badlist Items'] = self.client.setup_badlist_input_queue
|
|
44
|
+
|
|
45
|
+
if self.replay_config.creator.safelist_input.enabled:
|
|
46
|
+
threads['Load Safelist Items'] = self.client.setup_safelist_input_queue
|
|
47
|
+
|
|
39
48
|
if self.replay_config.creator.submission_input.enabled:
|
|
40
49
|
threads['Load Submissions'] = self.client.setup_submission_input_queue
|
|
41
50
|
|
|
51
|
+
if self.replay_config.creator.workflow_input.enabled:
|
|
52
|
+
threads['Load Workflows'] = self.client.setup_workflow_input_queue
|
|
53
|
+
|
|
42
54
|
if threads:
|
|
43
55
|
self.maintain_threads(threads)
|
|
44
56
|
else:
|