karton-core 5.5.0__py3-none-any.whl → 5.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- __version__ = "5.5.0"
1
+ __version__ = "5.6.0"
karton/core/backend.py CHANGED
@@ -986,6 +986,21 @@ class KartonBackend:
986
986
  objs.append(obj["Key"])
987
987
  return objs
988
988
 
989
+ def list_object_versions(self, bucket: str) -> Dict[str, List[str]]:
990
+ """
991
+ List version identifiers of stored resource objects
992
+ :param bucket: Bucket name
993
+ :return: Dictionary of object version identifiers {key: [version_ids, ...]}
994
+ """
995
+ objs = defaultdict(list)
996
+ paginator = self.s3.get_paginator("list_object_versions")
997
+ for page in paginator.paginate(Bucket=bucket):
998
+ for obj in page.get("Versions", list()):
999
+ objs[obj["Key"]].append(obj["VersionId"])
1000
+ for obj in page.get("DeleteMarkers", list()):
1001
+ objs[obj["Key"]].append(obj["VersionId"])
1002
+ return dict(objs)
1003
+
989
1004
  def remove_object(self, bucket: str, object_uid: str) -> None:
990
1005
  """
991
1006
  Remove resource object from object storage
@@ -1002,7 +1017,28 @@ class KartonBackend:
1002
1017
  :param bucket: Bucket name
1003
1018
  :param object_uids: Object identifiers
1004
1019
  """
1005
- for delete_objects in chunks([{"Key": uid} for uid in object_uids], 1000):
1020
+ for delete_objects in chunks([{"Key": uid} for uid in object_uids], 100):
1021
+ self.s3.delete_objects(Bucket=bucket, Delete={"Objects": delete_objects})
1022
+
1023
+ def remove_object_versions(
1024
+ self, bucket: str, object_versions: Dict[str, List[str]]
1025
+ ) -> None:
1026
+ """
1027
+ Bulk remove resource object versions from object storage
1028
+
1029
+ :param bucket: Bucket name
1030
+ :param object_versions: Object version identifiers
1031
+ """
1032
+ versions = iter(
1033
+ (uid, version_id)
1034
+ for uid, versions in object_versions.items()
1035
+ for version_id in versions
1036
+ )
1037
+ deletion_chunks = chunks(
1038
+ [{"Key": uid, "VersionId": version_id} for uid, version_id in versions],
1039
+ 100,
1040
+ )
1041
+ for delete_objects in deletion_chunks:
1006
1042
  self.s3.delete_objects(Bucket=bucket, Delete={"Objects": delete_objects})
1007
1043
 
1008
1044
  def check_bucket_exists(self, bucket: str, create: bool = False) -> bool:
karton/core/karton.py CHANGED
@@ -106,11 +106,15 @@ class Consumer(KartonServiceBase):
106
106
  :param config: Karton config to use for service configuration
107
107
  :param identity: Karton service identity
108
108
  :param backend: Karton backend to use
109
+ :param task_timeout: The maximum time, in seconds, this consumer will wait for
110
+ a task to finish processing before being CRASHED on timeout.
111
+ Set 0 for unlimited, and None for using global value
109
112
  """
110
113
 
111
114
  filters: List[Dict[str, Any]] = []
112
115
  persistent: bool = True
113
116
  version: Optional[str] = None
117
+ task_timeout = None
114
118
 
115
119
  def __init__(
116
120
  self,
@@ -130,7 +134,8 @@ class Consumer(KartonServiceBase):
130
134
  self.config.getboolean("karton", "persistent", self.persistent)
131
135
  and not self.debug
132
136
  )
133
- self.task_timeout = self.config.getint("karton", "task_timeout")
137
+ if self.task_timeout is None:
138
+ self.task_timeout = self.config.getint("karton", "task_timeout")
134
139
  self.current_task: Optional[Task] = None
135
140
  self._pre_hooks: List[Tuple[Optional[str], Callable[[Task], None]]] = []
136
141
  self._post_hooks: List[
@@ -323,7 +328,7 @@ class Consumer(KartonServiceBase):
323
328
  """
324
329
  self.log.info("Service %s started", self.identity)
325
330
 
326
- if self.task_timeout is not None:
331
+ if self.task_timeout:
327
332
  self.log.info(f"Task timeout is set to {self.task_timeout} seconds")
328
333
 
329
334
  # Get the old binds and set the new ones atomically
karton/core/query.py CHANGED
@@ -25,8 +25,15 @@ def is_non_string_sequence(entry):
25
25
  class Query(object):
26
26
  """The Query class is used to match an object against a MongoDB-like query"""
27
27
 
28
- def __init__(self, definition):
28
+ def __init__(self, definition, _type_coercion=False):
29
+ """
30
+ If _type_coercion is enabled: header values are coerced to string
31
+ when condition is also a string. It's implemented for compatibility
32
+ with old syntax e.g. {"execute": "!False"} filter vs {"execute": False}
33
+ header value.
34
+ """
29
35
  self._definition = definition
36
+ self._type_coercion = _type_coercion
30
37
 
31
38
  def match(self, entry):
32
39
  """Matches the entry object against the query specified on instanciation"""
@@ -40,7 +47,7 @@ class Query(object):
40
47
  )
41
48
  if is_non_string_sequence(entry):
42
49
  return condition in entry
43
- return condition == entry
50
+ return self._eq(condition, entry)
44
51
 
45
52
  def _extract(self, entry, path):
46
53
  if not path:
@@ -108,9 +115,14 @@ class Query(object):
108
115
  def _noop(*_):
109
116
  return True
110
117
 
111
- @staticmethod
112
- def _eq(condition, entry):
118
+ def _eq(self, condition, entry):
113
119
  try:
120
+ if (
121
+ self._type_coercion
122
+ and type(condition) is str
123
+ and type(entry) is not str
124
+ ):
125
+ return str(entry) == condition
114
126
  return entry == condition
115
127
  except TypeError:
116
128
  return False
@@ -155,9 +167,8 @@ class Query(object):
155
167
  except TypeError:
156
168
  return False
157
169
 
158
- @staticmethod
159
- def _ne(condition, entry):
160
- return entry != condition
170
+ def _ne(self, condition, entry):
171
+ return not self._eq(condition, entry)
161
172
 
162
173
  def _nin(self, condition, entry):
163
174
  return not self._in(condition, entry)
@@ -346,5 +357,6 @@ def convert(filters):
346
357
  {"$not": {"$or": negative_filter}},
347
358
  {"$or": regular_filter},
348
359
  ]
349
- }
360
+ },
361
+ _type_coercion=True,
350
362
  )
karton/core/task.py CHANGED
@@ -408,7 +408,12 @@ class Task(object):
408
408
  if parse_resources:
409
409
  task_data = json.loads(data, object_hook=unserialize_resources)
410
410
  else:
411
- task_data = orjson.loads(data)
411
+ try:
412
+ task_data = orjson.loads(data)
413
+ except orjson.JSONDecodeError:
414
+ # Fallback, in case orjson raises exception during loading
415
+ # This may happen for large numbers (too large for float)
416
+ task_data = json.loads(data, object_hook=unserialize_resources)
412
417
 
413
418
  # Compatibility with Karton <5.2.0
414
419
  headers_persistent_fallback = task_data["payload_persistent"].get(
karton/system/system.py CHANGED
@@ -26,6 +26,7 @@ class SystemService(KartonServiceBase):
26
26
  version = __version__
27
27
  with_service_info = True
28
28
 
29
+ CRASH_STARTED_TASKS_ON_TIMEOUT = False
29
30
  GC_INTERVAL = 3 * 60
30
31
  TASK_DISPATCHED_TIMEOUT = 24 * 3600
31
32
  TASK_STARTED_TIMEOUT = 24 * 3600
@@ -45,13 +46,35 @@ class SystemService(KartonServiceBase):
45
46
  )
46
47
  self.enable_gc = self.config.getboolean("system", "enable_gc", True)
47
48
  self.enable_router = self.config.getboolean("system", "enable_router", True)
49
+ self.crash_started_tasks_on_timeout = self.config.getboolean(
50
+ "system", "crash_started_tasks_on_timeout", False
51
+ )
48
52
 
49
53
  self.last_gc_trigger = time.time()
50
54
 
55
+ def _log_config(self):
56
+ self.log.info(
57
+ "Effective config:\n"
58
+ " gc_interval:\t%s\n"
59
+ " task_dispatched_timeout:\t%s\n"
60
+ " task_started_timeout:\t%s\n"
61
+ " task_crashed_timeout:\t%s\n"
62
+ " enable_gc:\t%s\n"
63
+ " enable_router:\t%s\n"
64
+ " crash_started_tasks_on_timeout:\t%s",
65
+ self.gc_interval,
66
+ self.task_dispatched_timeout,
67
+ self.task_started_timeout,
68
+ self.task_crashed_timeout,
69
+ self.enable_gc,
70
+ self.enable_router,
71
+ self.crash_started_tasks_on_timeout,
72
+ )
73
+
51
74
  def gc_collect_resources(self) -> None:
52
75
  # Collects unreferenced resources left in object storage
53
76
  karton_bucket = self.backend.default_bucket_name
54
- resources_to_remove = set(self.backend.list_objects(karton_bucket))
77
+ resources_to_remove = self.backend.list_object_versions(karton_bucket)
55
78
  # Note: it is important to get list of resources before getting list of tasks!
56
79
  # Task is created before resource upload to lock the reference to the resource.
57
80
  tasks = self.backend.iter_all_tasks()
@@ -62,12 +85,13 @@ class SystemService(KartonServiceBase):
62
85
  resource.bucket == karton_bucket
63
86
  and resource.uid in resources_to_remove
64
87
  ):
65
- resources_to_remove.remove(resource.uid)
88
+ del resources_to_remove[resource.uid]
66
89
  # Remove unreferenced resources
67
90
  if resources_to_remove:
68
- self.backend.remove_objects(karton_bucket, resources_to_remove)
91
+ self.backend.remove_object_versions(karton_bucket, resources_to_remove)
69
92
 
70
93
  def gc_collect_tasks(self) -> None:
94
+ self.log.debug("GC: gc_collect_tasks started")
71
95
  # Collects finished tasks
72
96
  root_tasks = set()
73
97
  running_root_tasks = set()
@@ -75,6 +99,7 @@ class SystemService(KartonServiceBase):
75
99
 
76
100
  current_time = time.time()
77
101
  to_delete = []
102
+ to_crash = []
78
103
 
79
104
  queues_to_clear = set()
80
105
  online_consumers = self.backend.get_online_consumers()
@@ -116,14 +141,24 @@ class SystemService(KartonServiceBase):
116
141
  and task.last_update is not None
117
142
  and current_time > task.last_update + self.task_started_timeout
118
143
  ):
119
- to_delete.append(task)
120
- self.log.error(
121
- "Task %s is in Started state more than %d seconds. "
122
- "Killed. (receiver: %s)",
123
- task.uid,
124
- self.task_started_timeout,
125
- task.headers.get("receiver", "<unknown>"),
126
- )
144
+ if self.crash_started_tasks_on_timeout:
145
+ to_crash.append(task)
146
+ self.log.error(
147
+ "Task %s is in Started state more than %d seconds. "
148
+ "Crashed. (receiver: %s)",
149
+ task.uid,
150
+ self.task_started_timeout,
151
+ task.headers.get("receiver", "<unknown>"),
152
+ )
153
+ else:
154
+ to_delete.append(task)
155
+ self.log.error(
156
+ "Task %s is in Started state more than %d seconds. "
157
+ "Killed. (receiver: %s)",
158
+ task.uid,
159
+ self.task_started_timeout,
160
+ task.headers.get("receiver", "<unknown>"),
161
+ )
127
162
  elif task.status == TaskState.FINISHED:
128
163
  to_delete.append(task)
129
164
  self.log.debug("GC: Finished task %s", task.uid)
@@ -151,11 +186,26 @@ class SystemService(KartonServiceBase):
151
186
  self.backend.increment_metrics_list(
152
187
  KartonMetrics.TASK_GARBAGE_COLLECTED, to_increment
153
188
  )
189
+ if to_crash:
190
+ to_increment = [
191
+ task.headers.get("receiver", "unknown") for task in to_crash
192
+ ]
193
+ for task in to_crash:
194
+ task.error = [
195
+ "This task was STARTED too long (TASK_STARTED_TIMEOUT), "
196
+ "so status was changes to CRASHED."
197
+ ]
198
+ self.backend.set_task_status(task, TaskState.CRASHED)
199
+ self.backend.increment_metrics_list(
200
+ KartonMetrics.TASK_CRASHED, to_increment
201
+ )
154
202
 
155
203
  for finished_root_task in root_tasks.difference(running_root_tasks):
156
204
  # TODO: Notification needed
157
205
  self.log.debug("GC: Finished root task %s", finished_root_task)
158
206
 
207
+ self.log.debug("GC: gc_collect_tasks ended")
208
+
159
209
  def gc_collect(self) -> None:
160
210
  if time.time() > (self.last_gc_trigger + self.gc_interval):
161
211
  try:
@@ -251,6 +301,7 @@ class SystemService(KartonServiceBase):
251
301
  self.handle_operations(bodies)
252
302
 
253
303
  def loop(self) -> None:
304
+ self._log_config()
254
305
  self.log.info("Manager %s started", self.identity)
255
306
 
256
307
  with self.graceful_killer():
@@ -288,7 +339,6 @@ class SystemService(KartonServiceBase):
288
339
  parser.add_argument(
289
340
  "--gc-interval",
290
341
  type=int,
291
- default=cls.GC_INTERVAL,
292
342
  help="Garbage collection interval",
293
343
  )
294
344
  parser.add_argument(
@@ -304,16 +354,24 @@ class SystemService(KartonServiceBase):
304
354
  parser.add_argument(
305
355
  "--task-crashed-timeout", help="Timeout for tasks in Crashed state"
306
356
  )
357
+ parser.add_argument(
358
+ "--crash-started-task-on-timeout",
359
+ action="store_const",
360
+ dest="crash_started_tasks",
361
+ help="Crash Started tasks on timeout instead of deleting",
362
+ )
307
363
  return parser
308
364
 
309
365
  @classmethod
310
366
  def config_from_args(cls, config: Config, args: argparse.Namespace):
311
367
  super().config_from_args(config, args)
368
+
312
369
  config.load_from_dict(
313
370
  {
314
371
  "system": {
315
372
  "enable_gc": args.enable_gc,
316
373
  "enable_router": args.enable_router,
374
+ "crash_started_tasks_on_timeout": args.crash_started_tasks,
317
375
  "gc_interval": args.gc_interval,
318
376
  "task_dispatched_timeout": args.task_dispatched_timeout,
319
377
  "task_started_timeout": args.task_started_timeout,
@@ -1,17 +1,15 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: karton-core
3
- Version: 5.5.0
3
+ Version: 5.6.0
4
4
  Summary: Distributed malware analysis orchestration framework
5
5
  Home-page: https://github.com/CERT-Polska/karton
6
- License: UNKNOWN
7
- Platform: UNKNOWN
8
6
  Classifier: Programming Language :: Python :: 3
9
7
  Classifier: Operating System :: OS Independent
10
8
  Classifier: License :: OSI Approved :: BSD License
11
9
  Requires-Python: >=3.8
12
10
  Description-Content-Type: text/markdown
13
11
  License-File: LICENSE
14
- Requires-Dist: boto3
12
+ Requires-Dist: boto3 <1.36.0
15
13
  Requires-Dist: orjson
16
14
  Requires-Dist: redis
17
15
 
@@ -141,5 +139,3 @@ RetDec unpacker module for the Karton framework
141
139
  Malware similarity platform with modularity in mind.
142
140
 
143
141
  ![Co-financed by the Connecting Europe Facility by of the European Union](https://www.cert.pl/uploads/2019/02/en_horizontal_cef_logo-e1550495232540.png)
144
-
145
-
@@ -1,27 +1,27 @@
1
- karton_core-5.5.0-nspkg.pth,sha256=vHa-jm6pBTeInFrmnsHMg9AOeD88czzQy-6QCFbpRcM,539
1
+ karton_core-5.6.0-nspkg.pth,sha256=vHa-jm6pBTeInFrmnsHMg9AOeD88czzQy-6QCFbpRcM,539
2
2
  karton/core/__init__.py,sha256=QuT0BWZyp799eY90tK3H1OD2hwuusqMJq8vQwpB3kG4,337
3
- karton/core/__version__.py,sha256=zFTHldBmR5ReiC3uSZ8VkZOEirtsq_l6QbUJYRBHlTs,22
4
- karton/core/backend.py,sha256=-sQG7utnaWLJOEcafeSwEDLnkflPqtSCwg_mn_nnFhg,36727
3
+ karton/core/__version__.py,sha256=9eGec3AYz2CNznnfEwKYRiJ65G9gDWtXmb587ljDycg,22
4
+ karton/core/backend.py,sha256=HlAolngYUTx2ajXhLf5RbWxmPOq5orLfz52iTkNxxqM,38147
5
5
  karton/core/base.py,sha256=C6Lco3E0XCsxvEjeVOLR9fxh_IWJ1vjC9BqUYsQyewE,8083
6
6
  karton/core/config.py,sha256=7oKchitq6pWzPuXRfjBXqVT_BgGIz2p-CDo1RGaNJQg,8118
7
7
  karton/core/exceptions.py,sha256=8i9WVzi4PinNlX10Cb-lQQC35Hl-JB5R_UKXa9AUKoQ,153
8
8
  karton/core/inspect.py,sha256=aIJQEOEkD5q2xLlV8nhxY5qL5zqcnprP-2DdP6ecKlE,6150
9
- karton/core/karton.py,sha256=cXLleTEPCVBIXkj09kKu2hjd1XNUSpTAk87-BES1WlA,15133
9
+ karton/core/karton.py,sha256=Fi3wNqMGiKvHN2BECsqsvfxkiyuwPdlC21jpqQdkeak,15434
10
10
  karton/core/logger.py,sha256=J3XAyG88U0cwYC9zR6E3QD1uJenrQh7zS9-HgxhqeAs,2040
11
11
  karton/core/main.py,sha256=ir1-dhn3vbwfh2YHiM6ZYfRBbjwLvJSz0d8tuK1mb_4,8310
12
12
  karton/core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- karton/core/query.py,sha256=Ay0VzfrBQwdJzcZ27JbOlUc1ZZdOl6A8sh4iIYTmLyE,11493
13
+ karton/core/query.py,sha256=sf24DweVlXfJuBbBD_ns2LXhOV-IBwuPG3jBfTJu77s,12063
14
14
  karton/core/resource.py,sha256=tA3y_38H9HVKIrCeAU70zHUkQUv0BuCQWMC470JLxxc,20321
15
- karton/core/task.py,sha256=1E_d60XbzqX0O9gFhYe_8aNGH7vuXDHe-bir5cRot_0,18515
15
+ karton/core/task.py,sha256=gW1szMi5PN2Y06X-Ryo7cmEVluZv1r7W5tvmwIJiD94,18808
16
16
  karton/core/test.py,sha256=tms-YM7sUKQDHN0vm2_W7DIvHnO_ld_VPsWHnsbKSfk,9102
17
17
  karton/core/utils.py,sha256=sEVqGdVPyYswWuVn8wYXBQmln8Az826N_2HgC__pmW8,4090
18
18
  karton/system/__init__.py,sha256=JF51OqRU_Y4c0unOulvmv1KzSHSq4ZpXU8ZsH4nefRM,63
19
19
  karton/system/__main__.py,sha256=QJkwIlSwaPRdzwKlNmCAL41HtDAa73db9MZKWmOfxGM,56
20
- karton/system/system.py,sha256=tptar24RuXUnlII1xKbuJtfNkQsSxTtS3g4O8S99tbg,14011
21
- karton_core-5.5.0.dist-info/LICENSE,sha256=o8h7hYhn7BJC_-DmrfqWwLjaR_Gbe0TZOOQJuN2ca3I,1519
22
- karton_core-5.5.0.dist-info/METADATA,sha256=h4-M_JnMm8z_An5IDFPHAkQ4YuR_-YpwekETiNMjIxQ,6847
23
- karton_core-5.5.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
24
- karton_core-5.5.0.dist-info/entry_points.txt,sha256=FJj5EZuvFP0LkagjX_dLbRGBUnuLjgBiSyiFfq4c86U,99
25
- karton_core-5.5.0.dist-info/namespace_packages.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
26
- karton_core-5.5.0.dist-info/top_level.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
27
- karton_core-5.5.0.dist-info/RECORD,,
20
+ karton/system/system.py,sha256=v2rEJYN2Vq5-hH8yUGFv6r2ZdX-TOQg0ldsEC2T6qO8,16315
21
+ karton_core-5.6.0.dist-info/LICENSE,sha256=o8h7hYhn7BJC_-DmrfqWwLjaR_Gbe0TZOOQJuN2ca3I,1519
22
+ karton_core-5.6.0.dist-info/METADATA,sha256=Atu4wfGAqbkw-lVDV_Wtt3fxSG7nV0QmVqjGnfNAFZ4,6818
23
+ karton_core-5.6.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
24
+ karton_core-5.6.0.dist-info/entry_points.txt,sha256=OgLlsXy61GP6-Yob3oXqeJ2hlRU6LBLj33fr0NufKz0,98
25
+ karton_core-5.6.0.dist-info/namespace_packages.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
26
+ karton_core-5.6.0.dist-info/top_level.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
27
+ karton_core-5.6.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.37.1)
2
+ Generator: bdist_wheel (0.42.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,4 +1,3 @@
1
1
  [console_scripts]
2
2
  karton = karton.core.main:main
3
3
  karton-system = karton.system:SystemService.main
4
-