karton-core 5.5.1__py3-none-any.whl → 5.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- __version__ = "5.5.1"
1
+ __version__ = "5.6.1"
karton/core/backend.py CHANGED
@@ -986,6 +986,21 @@ class KartonBackend:
986
986
  objs.append(obj["Key"])
987
987
  return objs
988
988
 
989
+ def list_object_versions(self, bucket: str) -> Dict[str, List[str]]:
990
+ """
991
+ List version identifiers of stored resource objects
992
+ :param bucket: Bucket name
993
+ :return: Dictionary of object version identifiers {key: [version_ids, ...]}
994
+ """
995
+ objs = defaultdict(list)
996
+ paginator = self.s3.get_paginator("list_object_versions")
997
+ for page in paginator.paginate(Bucket=bucket):
998
+ for obj in page.get("Versions", list()):
999
+ objs[obj["Key"]].append(obj["VersionId"])
1000
+ for obj in page.get("DeleteMarkers", list()):
1001
+ objs[obj["Key"]].append(obj["VersionId"])
1002
+ return dict(objs)
1003
+
989
1004
  def remove_object(self, bucket: str, object_uid: str) -> None:
990
1005
  """
991
1006
  Remove resource object from object storage
@@ -1002,7 +1017,38 @@ class KartonBackend:
1002
1017
  :param bucket: Bucket name
1003
1018
  :param object_uids: Object identifiers
1004
1019
  """
1005
- for delete_objects in chunks([{"Key": uid} for uid in object_uids], 1000):
1020
+ for delete_objects in chunks([{"Key": uid} for uid in object_uids], 100):
1021
+ self.s3.delete_objects(Bucket=bucket, Delete={"Objects": delete_objects})
1022
+
1023
+ def remove_object_versions(
1024
+ self,
1025
+ bucket: str,
1026
+ object_versions: Dict[str, List[str]],
1027
+ explicit_version_null: bool = False,
1028
+ ) -> None:
1029
+ """
1030
+ Bulk remove resource object versions from object storage
1031
+
1032
+ :param bucket: Bucket name
1033
+ :param object_versions: Object version identifiers
1034
+ :param explicit_version_null: |
1035
+ Some S3 providers (e.g. MinIO) need a reference
1036
+ to "null" version explicitly when versioning is in suspended state. On the
1037
+ other hand, some providers refuse to delete "null" versions when bucket
1038
+ versioning is disabled.
1039
+ See also: https://github.com/CERT-Polska/karton/issues/273.
1040
+ """
1041
+ deletion_chunks = chunks(
1042
+ [
1043
+ {"Key": uid, "VersionId": version_id}
1044
+ if version_id != "null" or explicit_version_null
1045
+ else {"Key": uid}
1046
+ for uid, versions in object_versions.items()
1047
+ for version_id in versions
1048
+ ],
1049
+ 100,
1050
+ )
1051
+ for delete_objects in deletion_chunks:
1006
1052
  self.s3.delete_objects(Bucket=bucket, Delete={"Objects": delete_objects})
1007
1053
 
1008
1054
  def check_bucket_exists(self, bucket: str, create: bool = False) -> bool:
karton/core/karton.py CHANGED
@@ -106,11 +106,15 @@ class Consumer(KartonServiceBase):
106
106
  :param config: Karton config to use for service configuration
107
107
  :param identity: Karton service identity
108
108
  :param backend: Karton backend to use
109
+ :param task_timeout: The maximum time, in seconds, this consumer will wait for
110
+ a task to finish processing before being CRASHED on timeout.
111
+ Set 0 for unlimited, and None for using global value
109
112
  """
110
113
 
111
114
  filters: List[Dict[str, Any]] = []
112
115
  persistent: bool = True
113
116
  version: Optional[str] = None
117
+ task_timeout = None
114
118
 
115
119
  def __init__(
116
120
  self,
@@ -130,7 +134,8 @@ class Consumer(KartonServiceBase):
130
134
  self.config.getboolean("karton", "persistent", self.persistent)
131
135
  and not self.debug
132
136
  )
133
- self.task_timeout = self.config.getint("karton", "task_timeout")
137
+ if self.task_timeout is None:
138
+ self.task_timeout = self.config.getint("karton", "task_timeout")
134
139
  self.current_task: Optional[Task] = None
135
140
  self._pre_hooks: List[Tuple[Optional[str], Callable[[Task], None]]] = []
136
141
  self._post_hooks: List[
@@ -323,7 +328,7 @@ class Consumer(KartonServiceBase):
323
328
  """
324
329
  self.log.info("Service %s started", self.identity)
325
330
 
326
- if self.task_timeout is not None:
331
+ if self.task_timeout:
327
332
  self.log.info(f"Task timeout is set to {self.task_timeout} seconds")
328
333
 
329
334
  # Get the old binds and set the new ones atomically
karton/core/query.py CHANGED
@@ -25,8 +25,15 @@ def is_non_string_sequence(entry):
25
25
  class Query(object):
26
26
  """The Query class is used to match an object against a MongoDB-like query"""
27
27
 
28
- def __init__(self, definition):
28
+ def __init__(self, definition, _type_coercion=False):
29
+ """
30
+ If _type_coercion is enabled: header values are coerced to string
31
+ when condition is also a string. It's implemented for compatibility
32
+ with old syntax e.g. {"execute": "!False"} filter vs {"execute": False}
33
+ header value.
34
+ """
29
35
  self._definition = definition
36
+ self._type_coercion = _type_coercion
30
37
 
31
38
  def match(self, entry):
32
39
  """Matches the entry object against the query specified on instanciation"""
@@ -40,7 +47,7 @@ class Query(object):
40
47
  )
41
48
  if is_non_string_sequence(entry):
42
49
  return condition in entry
43
- return condition == entry
50
+ return self._eq(condition, entry)
44
51
 
45
52
  def _extract(self, entry, path):
46
53
  if not path:
@@ -108,9 +115,14 @@ class Query(object):
108
115
  def _noop(*_):
109
116
  return True
110
117
 
111
- @staticmethod
112
- def _eq(condition, entry):
118
+ def _eq(self, condition, entry):
113
119
  try:
120
+ if (
121
+ self._type_coercion
122
+ and type(condition) is str
123
+ and type(entry) is not str
124
+ ):
125
+ return str(entry) == condition
114
126
  return entry == condition
115
127
  except TypeError:
116
128
  return False
@@ -155,9 +167,8 @@ class Query(object):
155
167
  except TypeError:
156
168
  return False
157
169
 
158
- @staticmethod
159
- def _ne(condition, entry):
160
- return entry != condition
170
+ def _ne(self, condition, entry):
171
+ return not self._eq(condition, entry)
161
172
 
162
173
  def _nin(self, condition, entry):
163
174
  return not self._in(condition, entry)
@@ -346,5 +357,6 @@ def convert(filters):
346
357
  {"$not": {"$or": negative_filter}},
347
358
  {"$or": regular_filter},
348
359
  ]
349
- }
360
+ },
361
+ _type_coercion=True,
350
362
  )
karton/system/system.py CHANGED
@@ -26,6 +26,7 @@ class SystemService(KartonServiceBase):
26
26
  version = __version__
27
27
  with_service_info = True
28
28
 
29
+ CRASH_STARTED_TASKS_ON_TIMEOUT = False
29
30
  GC_INTERVAL = 3 * 60
30
31
  TASK_DISPATCHED_TIMEOUT = 24 * 3600
31
32
  TASK_STARTED_TIMEOUT = 24 * 3600
@@ -45,13 +46,40 @@ class SystemService(KartonServiceBase):
45
46
  )
46
47
  self.enable_gc = self.config.getboolean("system", "enable_gc", True)
47
48
  self.enable_router = self.config.getboolean("system", "enable_router", True)
49
+ self.crash_started_tasks_on_timeout = self.config.getboolean(
50
+ "system", "crash_started_tasks_on_timeout", False
51
+ )
52
+ self.enable_null_version_deletion = self.config.getboolean(
53
+ "system", "enable_null_version_deletion", False
54
+ )
48
55
 
49
56
  self.last_gc_trigger = time.time()
50
57
 
58
+ def _log_config(self):
59
+ self.log.info(
60
+ "Effective config:\n"
61
+ " gc_interval:\t%s\n"
62
+ " task_dispatched_timeout:\t%s\n"
63
+ " task_started_timeout:\t%s\n"
64
+ " task_crashed_timeout:\t%s\n"
65
+ " enable_gc:\t%s\n"
66
+ " enable_router:\t%s\n"
67
+ " enable_null_version_deletion:\t%s\n"
68
+ " crash_started_tasks_on_timeout:\t%s",
69
+ self.gc_interval,
70
+ self.task_dispatched_timeout,
71
+ self.task_started_timeout,
72
+ self.task_crashed_timeout,
73
+ self.enable_gc,
74
+ self.enable_router,
75
+ self.enable_null_version_deletion,
76
+ self.crash_started_tasks_on_timeout,
77
+ )
78
+
51
79
  def gc_collect_resources(self) -> None:
52
80
  # Collects unreferenced resources left in object storage
53
81
  karton_bucket = self.backend.default_bucket_name
54
- resources_to_remove = set(self.backend.list_objects(karton_bucket))
82
+ resources_to_remove = self.backend.list_object_versions(karton_bucket)
55
83
  # Note: it is important to get list of resources before getting list of tasks!
56
84
  # Task is created before resource upload to lock the reference to the resource.
57
85
  tasks = self.backend.iter_all_tasks()
@@ -62,12 +90,17 @@ class SystemService(KartonServiceBase):
62
90
  resource.bucket == karton_bucket
63
91
  and resource.uid in resources_to_remove
64
92
  ):
65
- resources_to_remove.remove(resource.uid)
93
+ del resources_to_remove[resource.uid]
66
94
  # Remove unreferenced resources
67
95
  if resources_to_remove:
68
- self.backend.remove_objects(karton_bucket, resources_to_remove)
96
+ self.backend.remove_object_versions(
97
+ karton_bucket,
98
+ resources_to_remove,
99
+ explicit_version_null=self.enable_null_version_deletion,
100
+ )
69
101
 
70
102
  def gc_collect_tasks(self) -> None:
103
+ self.log.debug("GC: gc_collect_tasks started")
71
104
  # Collects finished tasks
72
105
  root_tasks = set()
73
106
  running_root_tasks = set()
@@ -75,6 +108,7 @@ class SystemService(KartonServiceBase):
75
108
 
76
109
  current_time = time.time()
77
110
  to_delete = []
111
+ to_crash = []
78
112
 
79
113
  queues_to_clear = set()
80
114
  online_consumers = self.backend.get_online_consumers()
@@ -116,14 +150,24 @@ class SystemService(KartonServiceBase):
116
150
  and task.last_update is not None
117
151
  and current_time > task.last_update + self.task_started_timeout
118
152
  ):
119
- to_delete.append(task)
120
- self.log.error(
121
- "Task %s is in Started state more than %d seconds. "
122
- "Killed. (receiver: %s)",
123
- task.uid,
124
- self.task_started_timeout,
125
- task.headers.get("receiver", "<unknown>"),
126
- )
153
+ if self.crash_started_tasks_on_timeout:
154
+ to_crash.append(task)
155
+ self.log.error(
156
+ "Task %s is in Started state more than %d seconds. "
157
+ "Crashed. (receiver: %s)",
158
+ task.uid,
159
+ self.task_started_timeout,
160
+ task.headers.get("receiver", "<unknown>"),
161
+ )
162
+ else:
163
+ to_delete.append(task)
164
+ self.log.error(
165
+ "Task %s is in Started state more than %d seconds. "
166
+ "Killed. (receiver: %s)",
167
+ task.uid,
168
+ self.task_started_timeout,
169
+ task.headers.get("receiver", "<unknown>"),
170
+ )
127
171
  elif task.status == TaskState.FINISHED:
128
172
  to_delete.append(task)
129
173
  self.log.debug("GC: Finished task %s", task.uid)
@@ -151,11 +195,26 @@ class SystemService(KartonServiceBase):
151
195
  self.backend.increment_metrics_list(
152
196
  KartonMetrics.TASK_GARBAGE_COLLECTED, to_increment
153
197
  )
198
+ if to_crash:
199
+ to_increment = [
200
+ task.headers.get("receiver", "unknown") for task in to_crash
201
+ ]
202
+ for task in to_crash:
203
+ task.error = [
204
+ "This task was STARTED too long (TASK_STARTED_TIMEOUT), "
205
+ "so status was changes to CRASHED."
206
+ ]
207
+ self.backend.set_task_status(task, TaskState.CRASHED)
208
+ self.backend.increment_metrics_list(
209
+ KartonMetrics.TASK_CRASHED, to_increment
210
+ )
154
211
 
155
212
  for finished_root_task in root_tasks.difference(running_root_tasks):
156
213
  # TODO: Notification needed
157
214
  self.log.debug("GC: Finished root task %s", finished_root_task)
158
215
 
216
+ self.log.debug("GC: gc_collect_tasks ended")
217
+
159
218
  def gc_collect(self) -> None:
160
219
  if time.time() > (self.last_gc_trigger + self.gc_interval):
161
220
  try:
@@ -251,6 +310,7 @@ class SystemService(KartonServiceBase):
251
310
  self.handle_operations(bodies)
252
311
 
253
312
  def loop(self) -> None:
313
+ self._log_config()
254
314
  self.log.info("Manager %s started", self.identity)
255
315
 
256
316
  with self.graceful_killer():
@@ -288,7 +348,6 @@ class SystemService(KartonServiceBase):
288
348
  parser.add_argument(
289
349
  "--gc-interval",
290
350
  type=int,
291
- default=cls.GC_INTERVAL,
292
351
  help="Garbage collection interval",
293
352
  )
294
353
  parser.add_argument(
@@ -304,16 +363,24 @@ class SystemService(KartonServiceBase):
304
363
  parser.add_argument(
305
364
  "--task-crashed-timeout", help="Timeout for tasks in Crashed state"
306
365
  )
366
+ parser.add_argument(
367
+ "--crash-started-task-on-timeout",
368
+ action="store_const",
369
+ dest="crash_started_tasks",
370
+ help="Crash Started tasks on timeout instead of deleting",
371
+ )
307
372
  return parser
308
373
 
309
374
  @classmethod
310
375
  def config_from_args(cls, config: Config, args: argparse.Namespace):
311
376
  super().config_from_args(config, args)
377
+
312
378
  config.load_from_dict(
313
379
  {
314
380
  "system": {
315
381
  "enable_gc": args.enable_gc,
316
382
  "enable_router": args.enable_router,
383
+ "crash_started_tasks_on_timeout": args.crash_started_tasks,
317
384
  "gc_interval": args.gc_interval,
318
385
  "task_dispatched_timeout": args.task_dispatched_timeout,
319
386
  "task_started_timeout": args.task_started_timeout,
@@ -1,17 +1,15 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: karton-core
3
- Version: 5.5.1
3
+ Version: 5.6.1
4
4
  Summary: Distributed malware analysis orchestration framework
5
5
  Home-page: https://github.com/CERT-Polska/karton
6
- License: UNKNOWN
7
- Platform: UNKNOWN
8
6
  Classifier: Programming Language :: Python :: 3
9
7
  Classifier: Operating System :: OS Independent
10
8
  Classifier: License :: OSI Approved :: BSD License
11
9
  Requires-Python: >=3.8
12
10
  Description-Content-Type: text/markdown
13
11
  License-File: LICENSE
14
- Requires-Dist: boto3
12
+ Requires-Dist: boto3 <1.36.0
15
13
  Requires-Dist: orjson
16
14
  Requires-Dist: redis
17
15
 
@@ -141,5 +139,3 @@ RetDec unpacker module for the Karton framework
141
139
  Malware similarity platform with modularity in mind.
142
140
 
143
141
  ![Co-financed by the Connecting Europe Facility by of the European Union](https://www.cert.pl/uploads/2019/02/en_horizontal_cef_logo-e1550495232540.png)
144
-
145
-
@@ -1,27 +1,27 @@
1
- karton_core-5.5.1-nspkg.pth,sha256=vHa-jm6pBTeInFrmnsHMg9AOeD88czzQy-6QCFbpRcM,539
1
+ karton_core-5.6.1-nspkg.pth,sha256=vHa-jm6pBTeInFrmnsHMg9AOeD88czzQy-6QCFbpRcM,539
2
2
  karton/core/__init__.py,sha256=QuT0BWZyp799eY90tK3H1OD2hwuusqMJq8vQwpB3kG4,337
3
- karton/core/__version__.py,sha256=o0RHk7avNRUho2u_PgLKAFJKJks95Wx54GlOzI6Jzq4,22
4
- karton/core/backend.py,sha256=-sQG7utnaWLJOEcafeSwEDLnkflPqtSCwg_mn_nnFhg,36727
3
+ karton/core/__version__.py,sha256=-q9tSF5ofTJum4PMjvbhaE1xmTXehc_9rxMGcmfodcw,22
4
+ karton/core/backend.py,sha256=g0BSQBsFAksRd_VY5QDjBJ8yIIyzAmwxy-kfJgAZ_lo,38628
5
5
  karton/core/base.py,sha256=C6Lco3E0XCsxvEjeVOLR9fxh_IWJ1vjC9BqUYsQyewE,8083
6
6
  karton/core/config.py,sha256=7oKchitq6pWzPuXRfjBXqVT_BgGIz2p-CDo1RGaNJQg,8118
7
7
  karton/core/exceptions.py,sha256=8i9WVzi4PinNlX10Cb-lQQC35Hl-JB5R_UKXa9AUKoQ,153
8
8
  karton/core/inspect.py,sha256=aIJQEOEkD5q2xLlV8nhxY5qL5zqcnprP-2DdP6ecKlE,6150
9
- karton/core/karton.py,sha256=cXLleTEPCVBIXkj09kKu2hjd1XNUSpTAk87-BES1WlA,15133
9
+ karton/core/karton.py,sha256=Fi3wNqMGiKvHN2BECsqsvfxkiyuwPdlC21jpqQdkeak,15434
10
10
  karton/core/logger.py,sha256=J3XAyG88U0cwYC9zR6E3QD1uJenrQh7zS9-HgxhqeAs,2040
11
11
  karton/core/main.py,sha256=ir1-dhn3vbwfh2YHiM6ZYfRBbjwLvJSz0d8tuK1mb_4,8310
12
12
  karton/core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- karton/core/query.py,sha256=Ay0VzfrBQwdJzcZ27JbOlUc1ZZdOl6A8sh4iIYTmLyE,11493
13
+ karton/core/query.py,sha256=sf24DweVlXfJuBbBD_ns2LXhOV-IBwuPG3jBfTJu77s,12063
14
14
  karton/core/resource.py,sha256=tA3y_38H9HVKIrCeAU70zHUkQUv0BuCQWMC470JLxxc,20321
15
15
  karton/core/task.py,sha256=gW1szMi5PN2Y06X-Ryo7cmEVluZv1r7W5tvmwIJiD94,18808
16
16
  karton/core/test.py,sha256=tms-YM7sUKQDHN0vm2_W7DIvHnO_ld_VPsWHnsbKSfk,9102
17
17
  karton/core/utils.py,sha256=sEVqGdVPyYswWuVn8wYXBQmln8Az826N_2HgC__pmW8,4090
18
18
  karton/system/__init__.py,sha256=JF51OqRU_Y4c0unOulvmv1KzSHSq4ZpXU8ZsH4nefRM,63
19
19
  karton/system/__main__.py,sha256=QJkwIlSwaPRdzwKlNmCAL41HtDAa73db9MZKWmOfxGM,56
20
- karton/system/system.py,sha256=tptar24RuXUnlII1xKbuJtfNkQsSxTtS3g4O8S99tbg,14011
21
- karton_core-5.5.1.dist-info/LICENSE,sha256=o8h7hYhn7BJC_-DmrfqWwLjaR_Gbe0TZOOQJuN2ca3I,1519
22
- karton_core-5.5.1.dist-info/METADATA,sha256=trLfddTECFmbcuReIoSF9Yb9ug1BSAxHBUAM0nNoXnE,6847
23
- karton_core-5.5.1.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
24
- karton_core-5.5.1.dist-info/entry_points.txt,sha256=FJj5EZuvFP0LkagjX_dLbRGBUnuLjgBiSyiFfq4c86U,99
25
- karton_core-5.5.1.dist-info/namespace_packages.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
26
- karton_core-5.5.1.dist-info/top_level.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
27
- karton_core-5.5.1.dist-info/RECORD,,
20
+ karton/system/system.py,sha256=cFE4hCS0LWnwdCiIjU0ym8dHujE5ORi4REJR_y5b2gA,16671
21
+ karton_core-5.6.1.dist-info/LICENSE,sha256=o8h7hYhn7BJC_-DmrfqWwLjaR_Gbe0TZOOQJuN2ca3I,1519
22
+ karton_core-5.6.1.dist-info/METADATA,sha256=AJoa9O_0SOYI3IuVHXhwB6lXoUSs7S4nU6QM8_xHxVI,6818
23
+ karton_core-5.6.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
24
+ karton_core-5.6.1.dist-info/entry_points.txt,sha256=OgLlsXy61GP6-Yob3oXqeJ2hlRU6LBLj33fr0NufKz0,98
25
+ karton_core-5.6.1.dist-info/namespace_packages.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
26
+ karton_core-5.6.1.dist-info/top_level.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
27
+ karton_core-5.6.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.37.1)
2
+ Generator: bdist_wheel (0.42.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,4 +1,3 @@
1
1
  [console_scripts]
2
2
  karton = karton.core.main:main
3
3
  karton-system = karton.system:SystemService.main
4
-