assemblyline-core 4.4.1.dev278__tar.gz → 4.4.1.dev281__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of assemblyline-core might be problematic. Click here for more details.

Files changed (83) hide show
  1. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/PKG-INFO +1 -1
  2. assemblyline-core-4.4.1.dev281/assemblyline_core/VERSION +1 -0
  3. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/archiver/run_archiver.py +7 -4
  4. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/plumber/run_plumber.py +47 -5
  5. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core.egg-info/PKG-INFO +1 -1
  6. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/test/test_plumber.py +40 -5
  7. assemblyline-core-4.4.1.dev278/assemblyline_core/VERSION +0 -1
  8. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/LICENCE.md +0 -0
  9. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/README.md +0 -0
  10. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/__init__.py +0 -0
  11. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/alerter/__init__.py +0 -0
  12. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/alerter/processing.py +0 -0
  13. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/alerter/run_alerter.py +0 -0
  14. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/archiver/__init__.py +0 -0
  15. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/dispatching/__init__.py +0 -0
  16. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/dispatching/__main__.py +0 -0
  17. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/dispatching/client.py +0 -0
  18. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/dispatching/dispatcher.py +0 -0
  19. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/dispatching/schedules.py +0 -0
  20. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/dispatching/timeout.py +0 -0
  21. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/expiry/__init__.py +0 -0
  22. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/expiry/run_expiry.py +0 -0
  23. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/ingester/__init__.py +0 -0
  24. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/ingester/__main__.py +0 -0
  25. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/ingester/constants.py +0 -0
  26. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/ingester/ingester.py +0 -0
  27. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/metrics/__init__.py +0 -0
  28. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/metrics/es_metrics.py +0 -0
  29. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/metrics/heartbeat_formatter.py +0 -0
  30. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/metrics/helper.py +0 -0
  31. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/metrics/metrics_server.py +0 -0
  32. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/metrics/run_heartbeat_manager.py +0 -0
  33. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/metrics/run_metrics_aggregator.py +0 -0
  34. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/metrics/run_statistics_aggregator.py +0 -0
  35. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/plumber/__init__.py +0 -0
  36. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/replay/__init__.py +0 -0
  37. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/replay/client.py +0 -0
  38. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/replay/creator/__init__.py +0 -0
  39. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/replay/creator/run.py +0 -0
  40. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/replay/creator/run_worker.py +0 -0
  41. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/replay/loader/__init__.py +0 -0
  42. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/replay/loader/run.py +0 -0
  43. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/replay/loader/run_worker.py +0 -0
  44. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/replay/replay.py +0 -0
  45. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/safelist_client.py +0 -0
  46. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/scaler/__init__.py +0 -0
  47. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/scaler/collection.py +0 -0
  48. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/scaler/controllers/__init__.py +0 -0
  49. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/scaler/controllers/docker_ctl.py +0 -0
  50. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/scaler/controllers/interface.py +0 -0
  51. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/scaler/controllers/kubernetes_ctl.py +0 -0
  52. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/scaler/run_scaler.py +0 -0
  53. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/scaler/scaler_server.py +0 -0
  54. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/server_base.py +0 -0
  55. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/submission_client.py +0 -0
  56. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/tasking_client.py +0 -0
  57. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/updater/__init__.py +0 -0
  58. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/updater/helper.py +0 -0
  59. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/updater/run_updater.py +0 -0
  60. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/vacuum/__init__.py +0 -0
  61. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/vacuum/crawler.py +0 -0
  62. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/vacuum/department_map.py +0 -0
  63. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/vacuum/safelist.py +0 -0
  64. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/vacuum/stream_map.py +0 -0
  65. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/vacuum/worker.py +0 -0
  66. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/workflow/__init__.py +0 -0
  67. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core/workflow/run_workflow.py +0 -0
  68. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core.egg-info/SOURCES.txt +0 -0
  69. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core.egg-info/dependency_links.txt +0 -0
  70. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core.egg-info/requires.txt +0 -0
  71. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/assemblyline_core.egg-info/top_level.txt +0 -0
  72. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/setup.cfg +0 -0
  73. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/setup.py +0 -0
  74. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/test/test_alerter.py +0 -0
  75. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/test/test_dispatcher.py +0 -0
  76. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/test/test_expiry.py +0 -0
  77. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/test/test_replay.py +0 -0
  78. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/test/test_scaler.py +0 -0
  79. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/test/test_scheduler.py +0 -0
  80. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/test/test_simulation.py +0 -0
  81. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/test/test_vacuum.py +0 -0
  82. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/test/test_worker_ingest.py +0 -0
  83. {assemblyline-core-4.4.1.dev278 → assemblyline-core-4.4.1.dev281}/test/test_worker_submit.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: assemblyline-core
3
- Version: 4.4.1.dev278
3
+ Version: 4.4.1.dev281
4
4
  Summary: Assemblyline 4 - Core components
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-core/
6
6
  Author: CCCS Assemblyline development team
@@ -0,0 +1 @@
1
+ 4.4.1.dev281
@@ -86,9 +86,9 @@ class Archiver(ServerBase):
86
86
  index_type=Index.HOT)
87
87
 
88
88
  # Gather list of files and archives them
89
- files = {f.sha256 for f in submission.files}
90
- files.update(self.datastore.get_file_list_from_keys(submission.results, supplementary=True))
91
- for sha256 in files:
89
+ files = {(f.sha256, False) for f in submission.files}
90
+ files.update(self.datastore.get_file_list_from_keys(submission.results))
91
+ for sha256, supplementary in files:
92
92
  self.counter.increment('file')
93
93
 
94
94
  # Get the tags for this file
@@ -111,7 +111,7 @@ class Archiver(ServerBase):
111
111
  operations += [(self.datastore.file.UPDATE_APPEND_IF_MISSING, 'labels', x) for x in techniques]
112
112
  operations += [(self.datastore.file.UPDATE_APPEND_IF_MISSING, 'labels', x) for x in infos]
113
113
 
114
- # create type specific labels
114
+ # Create type specific labels
115
115
  operations += [
116
116
  (self.datastore.file.UPDATE_APPEND_IF_MISSING, 'label_categories.attribution', x)
117
117
  for x in attributions]
@@ -122,6 +122,9 @@ class Archiver(ServerBase):
122
122
  (self.datastore.file.UPDATE_APPEND_IF_MISSING, 'label_categories.info', x)
123
123
  for x in infos]
124
124
 
125
+ # Set the is_supplementary property
126
+ operations += [(self.datastore.file.UPDATE_SET, 'is_supplementary', supplementary)]
127
+
125
128
  # Apply auto-created labels
126
129
  self.datastore.file.update(sha256, operations=operations, index_type=Index.ARCHIVE)
127
130
  self.datastore.file.update(sha256, operations=operations, index_type=Index.HOT)
@@ -7,18 +7,21 @@ disabled or deleted for which a service queue exists, the dispatcher will be inf
7
7
  had an error.
8
8
  """
9
9
  import threading
10
+ import warnings
10
11
  from typing import Optional
11
- from assemblyline.common.forge import get_service_queue
12
12
 
13
- from assemblyline.odm.models.error import Error
14
- from assemblyline.common.isotime import now_as_iso
15
13
  from assemblyline.common.constants import service_queue_name
14
+ from assemblyline.common.forge import get_service_queue
15
+ from assemblyline.common.isotime import now, now_as_iso
16
+ from assemblyline.common.security import generate_random_secret
17
+ from assemblyline.datastore.store import TRANSPORT_TIMEOUT
18
+ from assemblyline.odm.models.error import Error
16
19
  from assemblyline.odm.models.service import Service
17
20
  from assemblyline.remote.datatypes import retry_call
18
21
  from assemblyline.remote.datatypes.queues.named import NamedQueue
19
-
20
22
  from assemblyline_core.dispatching.client import DispatchClient
21
23
  from assemblyline_core.server_base import CoreBase, ServiceStage
24
+ from elasticsearch import Elasticsearch
22
25
 
23
26
  DAY = 60 * 60 * 24
24
27
  TASK_DELETE_CHUNK = 10000
@@ -37,6 +40,31 @@ class Plumber(CoreBase):
37
40
  self.stop_signals: dict[str, threading.Event] = {}
38
41
  self.service_limit: dict[str, int] = {}
39
42
 
43
+ # Ensure roles for "plumber" user are created
44
+ self.datastore.ds.with_retries(
45
+ self.datastore.ds.client.security.put_role,
46
+ name="manage_tasks",
47
+ indices=[{"names": [".tasks"], "privileges": ["all"], "allow_restricted_indices": True}])
48
+
49
+ # Initialize/update 'plumber' user in Elasticsearch to perform cleanup
50
+ password = generate_random_secret()
51
+ self.datastore.ds.with_retries(
52
+ self.datastore.ds.client.security.put_user,
53
+ username="plumber",
54
+ password=password,
55
+ roles=["manage_tasks", "superuser"]
56
+ )
57
+
58
+ # Close existing connection and re-connect to the datastore as "plumber" user
59
+ self.datastore.ds.client.close()
60
+ self.datastore.ds.client = Elasticsearch(hosts=self.datastore.ds.get_hosts(),
61
+ basic_auth=("plumber", password),
62
+ max_retries=0,
63
+ request_timeout=TRANSPORT_TIMEOUT,
64
+ ca_certs=self.datastore.ds.ca_certs)
65
+ if not self.datastore.ds.ping():
66
+ raise Exception("Unable to connect to datastore as 'plumber'")
67
+
40
68
  def stop(self):
41
69
  for sig in self.stop_signals.values():
42
70
  sig.set()
@@ -151,7 +179,21 @@ class Plumber(CoreBase):
151
179
  def cleanup_old_tasks(self):
152
180
  self.log.info("Cleaning up task index for old completed tasks...")
153
181
  while self.running:
154
- deleted = self.datastore.task_cleanup(deleteable_task_age=DAY, max_tasks=TASK_DELETE_CHUNK)
182
+ # Create a new task to delete expired tasks
183
+ # NOTE: This will delete up to 10000 completed tasks older then a day
184
+ q = f"completed:true AND task.start_time_in_millis:<{now(-1 * DAY) * 1000}"
185
+ with warnings.catch_warnings():
186
+ warnings.simplefilter("ignore")
187
+ task = self.datastore.ds.with_retries(self.datastore.ds.client.delete_by_query, index='.tasks',
188
+ q=q, wait_for_completion=False, conflicts='proceed',
189
+ max_docs=TASK_DELETE_CHUNK)
190
+
191
+ # Wait until the tasks deletion task is over
192
+ res = self.datastore.ds._get_task_results(task)
193
+
194
+ # Get the number of deleted items
195
+ deleted = res['deleted']
196
+
155
197
  if not deleted:
156
198
  self.sleep(self.delay)
157
199
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: assemblyline-core
3
- Version: 4.4.1.dev278
3
+ Version: 4.4.1.dev281
4
4
  Summary: Assemblyline 4 - Core components
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-core/
6
6
  Author: CCCS Assemblyline development team
@@ -1,15 +1,13 @@
1
+ from time import sleep
1
2
  from unittest import mock
2
- from redis import Redis
3
3
 
4
4
  from assemblyline.odm.messages.task import Task
5
-
6
- from assemblyline.odm.random_data import random_model_obj
7
5
  from assemblyline.odm.models.service import Service
8
-
6
+ from assemblyline.odm.random_data import random_model_obj
9
7
  from assemblyline_core.plumber.run_plumber import Plumber
10
8
  from assemblyline_core.server_base import ServiceStage
11
-
12
9
  from mocking import TrueCountTimes
10
+ from redis import Redis
13
11
 
14
12
 
15
13
  def test_expire_missing_service():
@@ -24,6 +22,9 @@ def test_expire_missing_service():
24
22
  service_a.enabled = True
25
23
 
26
24
  datastore.list_all_services.return_value = [service_a]
25
+ datastore.ds.ca_certs = None
26
+ datastore.ds.get_hosts.return_value = ["http://localhost:9200"]
27
+
27
28
  plumber = Plumber(redis=redis, redis_persist=redis_persist, datastore=datastore, delay=1)
28
29
  plumber.get_service_stage = mock.MagicMock(return_value=ServiceStage.Running)
29
30
  plumber.dispatch_client = mock.MagicMock()
@@ -51,6 +52,8 @@ def test_flush_paused_queues():
51
52
  service_a.enabled = True
52
53
 
53
54
  datastore.list_all_services.return_value = [service_a]
55
+ datastore.ds.ca_certs = None
56
+ datastore.ds.get_hosts.return_value = ["http://localhost:9200"]
54
57
 
55
58
  plumber = Plumber(redis=redis, redis_persist=redis_persist, datastore=datastore, delay=1)
56
59
  plumber.get_service_stage = mock.MagicMock(return_value=ServiceStage.Running)
@@ -72,3 +75,35 @@ def test_flush_paused_queues():
72
75
  assert plumber.dispatch_client.service_failed.call_count == 1
73
76
  args = plumber.dispatch_client.service_failed.call_args
74
77
  assert args[0][0] == task.sid
78
+
79
+
80
+ def test_cleanup_old_tasks(datastore_connection):
81
+ # Create a bunch of random "old" tasks and clean them up
82
+ redis = mock.MagicMock(spec=Redis)
83
+ redis_persist = mock.MagicMock(spec=Redis)
84
+ plumber = Plumber(redis=redis, redis_persist=redis_persist, datastore=datastore_connection, delay=1)
85
+
86
+ # Generate new documents in .tasks index
87
+ num_old_tasks = 10
88
+ [plumber.datastore.ds.client.index(index=".tasks", document={
89
+ "completed": True,
90
+ "task": {
91
+ "start_time_in_millis": 0
92
+ }
93
+ }) for _ in range(num_old_tasks)]
94
+ sleep(1)
95
+
96
+ # Assert that these have been indeed committed to the tasks index
97
+ assert plumber.datastore.ds.client.search(index='.tasks',
98
+ q="task.start_time_in_millis:0",
99
+ track_total_hits=True,
100
+ size=0)['hits']['total']['value'] == num_old_tasks
101
+
102
+ # Run task cleanup, we should return to no more "old" completed tasks
103
+ plumber.running = TrueCountTimes(count=1)
104
+ plumber.cleanup_old_tasks()
105
+ sleep(1)
106
+ assert plumber.datastore.ds.client.search(index='.tasks',
107
+ q="task.start_time_in_millis:0",
108
+ track_total_hits=True,
109
+ size=0)['hits']['total']['value'] == 0
@@ -1 +0,0 @@
1
- 4.4.1.dev278