karton-core 5.5.1__py3-none-any.whl → 5.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- karton/core/__version__.py +1 -1
- karton/core/backend.py +37 -1
- karton/core/karton.py +7 -2
- karton/core/query.py +20 -8
- karton/system/system.py +70 -12
- {karton_core-5.5.1.dist-info → karton_core-5.6.0.dist-info}/METADATA +2 -6
- {karton_core-5.5.1.dist-info → karton_core-5.6.0.dist-info}/RECORD +13 -13
- {karton_core-5.5.1.dist-info → karton_core-5.6.0.dist-info}/WHEEL +1 -1
- {karton_core-5.5.1.dist-info → karton_core-5.6.0.dist-info}/entry_points.txt +0 -1
- /karton_core-5.5.1-nspkg.pth → /karton_core-5.6.0-nspkg.pth +0 -0
- {karton_core-5.5.1.dist-info → karton_core-5.6.0.dist-info}/LICENSE +0 -0
- {karton_core-5.5.1.dist-info → karton_core-5.6.0.dist-info}/namespace_packages.txt +0 -0
- {karton_core-5.5.1.dist-info → karton_core-5.6.0.dist-info}/top_level.txt +0 -0
karton/core/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "5.
|
1
|
+
__version__ = "5.6.0"
|
karton/core/backend.py
CHANGED
@@ -986,6 +986,21 @@ class KartonBackend:
|
|
986
986
|
objs.append(obj["Key"])
|
987
987
|
return objs
|
988
988
|
|
989
|
+
def list_object_versions(self, bucket: str) -> Dict[str, List[str]]:
|
990
|
+
"""
|
991
|
+
List version identifiers of stored resource objects
|
992
|
+
:param bucket: Bucket name
|
993
|
+
:return: Dictionary of object version identifiers {key: [version_ids, ...]}
|
994
|
+
"""
|
995
|
+
objs = defaultdict(list)
|
996
|
+
paginator = self.s3.get_paginator("list_object_versions")
|
997
|
+
for page in paginator.paginate(Bucket=bucket):
|
998
|
+
for obj in page.get("Versions", list()):
|
999
|
+
objs[obj["Key"]].append(obj["VersionId"])
|
1000
|
+
for obj in page.get("DeleteMarkers", list()):
|
1001
|
+
objs[obj["Key"]].append(obj["VersionId"])
|
1002
|
+
return dict(objs)
|
1003
|
+
|
989
1004
|
def remove_object(self, bucket: str, object_uid: str) -> None:
|
990
1005
|
"""
|
991
1006
|
Remove resource object from object storage
|
@@ -1002,7 +1017,28 @@ class KartonBackend:
|
|
1002
1017
|
:param bucket: Bucket name
|
1003
1018
|
:param object_uids: Object identifiers
|
1004
1019
|
"""
|
1005
|
-
for delete_objects in chunks([{"Key": uid} for uid in object_uids],
|
1020
|
+
for delete_objects in chunks([{"Key": uid} for uid in object_uids], 100):
|
1021
|
+
self.s3.delete_objects(Bucket=bucket, Delete={"Objects": delete_objects})
|
1022
|
+
|
1023
|
+
def remove_object_versions(
|
1024
|
+
self, bucket: str, object_versions: Dict[str, List[str]]
|
1025
|
+
) -> None:
|
1026
|
+
"""
|
1027
|
+
Bulk remove resource object versions from object storage
|
1028
|
+
|
1029
|
+
:param bucket: Bucket name
|
1030
|
+
:param object_versions: Object version identifiers
|
1031
|
+
"""
|
1032
|
+
versions = iter(
|
1033
|
+
(uid, version_id)
|
1034
|
+
for uid, versions in object_versions.items()
|
1035
|
+
for version_id in versions
|
1036
|
+
)
|
1037
|
+
deletion_chunks = chunks(
|
1038
|
+
[{"Key": uid, "VersionId": version_id} for uid, version_id in versions],
|
1039
|
+
100,
|
1040
|
+
)
|
1041
|
+
for delete_objects in deletion_chunks:
|
1006
1042
|
self.s3.delete_objects(Bucket=bucket, Delete={"Objects": delete_objects})
|
1007
1043
|
|
1008
1044
|
def check_bucket_exists(self, bucket: str, create: bool = False) -> bool:
|
karton/core/karton.py
CHANGED
@@ -106,11 +106,15 @@ class Consumer(KartonServiceBase):
|
|
106
106
|
:param config: Karton config to use for service configuration
|
107
107
|
:param identity: Karton service identity
|
108
108
|
:param backend: Karton backend to use
|
109
|
+
:param task_timeout: The maximum time, in seconds, this consumer will wait for
|
110
|
+
a task to finish processing before being CRASHED on timeout.
|
111
|
+
Set 0 for unlimited, and None for using global value
|
109
112
|
"""
|
110
113
|
|
111
114
|
filters: List[Dict[str, Any]] = []
|
112
115
|
persistent: bool = True
|
113
116
|
version: Optional[str] = None
|
117
|
+
task_timeout = None
|
114
118
|
|
115
119
|
def __init__(
|
116
120
|
self,
|
@@ -130,7 +134,8 @@ class Consumer(KartonServiceBase):
|
|
130
134
|
self.config.getboolean("karton", "persistent", self.persistent)
|
131
135
|
and not self.debug
|
132
136
|
)
|
133
|
-
self.task_timeout
|
137
|
+
if self.task_timeout is None:
|
138
|
+
self.task_timeout = self.config.getint("karton", "task_timeout")
|
134
139
|
self.current_task: Optional[Task] = None
|
135
140
|
self._pre_hooks: List[Tuple[Optional[str], Callable[[Task], None]]] = []
|
136
141
|
self._post_hooks: List[
|
@@ -323,7 +328,7 @@ class Consumer(KartonServiceBase):
|
|
323
328
|
"""
|
324
329
|
self.log.info("Service %s started", self.identity)
|
325
330
|
|
326
|
-
if self.task_timeout
|
331
|
+
if self.task_timeout:
|
327
332
|
self.log.info(f"Task timeout is set to {self.task_timeout} seconds")
|
328
333
|
|
329
334
|
# Get the old binds and set the new ones atomically
|
karton/core/query.py
CHANGED
@@ -25,8 +25,15 @@ def is_non_string_sequence(entry):
|
|
25
25
|
class Query(object):
|
26
26
|
"""The Query class is used to match an object against a MongoDB-like query"""
|
27
27
|
|
28
|
-
def __init__(self, definition):
|
28
|
+
def __init__(self, definition, _type_coercion=False):
|
29
|
+
"""
|
30
|
+
If _type_coercion is enabled: header values are coerced to string
|
31
|
+
when condition is also a string. It's implemented for compatibility
|
32
|
+
with old syntax e.g. {"execute": "!False"} filter vs {"execute": False}
|
33
|
+
header value.
|
34
|
+
"""
|
29
35
|
self._definition = definition
|
36
|
+
self._type_coercion = _type_coercion
|
30
37
|
|
31
38
|
def match(self, entry):
|
32
39
|
"""Matches the entry object against the query specified on instanciation"""
|
@@ -40,7 +47,7 @@ class Query(object):
|
|
40
47
|
)
|
41
48
|
if is_non_string_sequence(entry):
|
42
49
|
return condition in entry
|
43
|
-
return condition
|
50
|
+
return self._eq(condition, entry)
|
44
51
|
|
45
52
|
def _extract(self, entry, path):
|
46
53
|
if not path:
|
@@ -108,9 +115,14 @@ class Query(object):
|
|
108
115
|
def _noop(*_):
|
109
116
|
return True
|
110
117
|
|
111
|
-
|
112
|
-
def _eq(condition, entry):
|
118
|
+
def _eq(self, condition, entry):
|
113
119
|
try:
|
120
|
+
if (
|
121
|
+
self._type_coercion
|
122
|
+
and type(condition) is str
|
123
|
+
and type(entry) is not str
|
124
|
+
):
|
125
|
+
return str(entry) == condition
|
114
126
|
return entry == condition
|
115
127
|
except TypeError:
|
116
128
|
return False
|
@@ -155,9 +167,8 @@ class Query(object):
|
|
155
167
|
except TypeError:
|
156
168
|
return False
|
157
169
|
|
158
|
-
|
159
|
-
|
160
|
-
return entry != condition
|
170
|
+
def _ne(self, condition, entry):
|
171
|
+
return not self._eq(condition, entry)
|
161
172
|
|
162
173
|
def _nin(self, condition, entry):
|
163
174
|
return not self._in(condition, entry)
|
@@ -346,5 +357,6 @@ def convert(filters):
|
|
346
357
|
{"$not": {"$or": negative_filter}},
|
347
358
|
{"$or": regular_filter},
|
348
359
|
]
|
349
|
-
}
|
360
|
+
},
|
361
|
+
_type_coercion=True,
|
350
362
|
)
|
karton/system/system.py
CHANGED
@@ -26,6 +26,7 @@ class SystemService(KartonServiceBase):
|
|
26
26
|
version = __version__
|
27
27
|
with_service_info = True
|
28
28
|
|
29
|
+
CRASH_STARTED_TASKS_ON_TIMEOUT = False
|
29
30
|
GC_INTERVAL = 3 * 60
|
30
31
|
TASK_DISPATCHED_TIMEOUT = 24 * 3600
|
31
32
|
TASK_STARTED_TIMEOUT = 24 * 3600
|
@@ -45,13 +46,35 @@ class SystemService(KartonServiceBase):
|
|
45
46
|
)
|
46
47
|
self.enable_gc = self.config.getboolean("system", "enable_gc", True)
|
47
48
|
self.enable_router = self.config.getboolean("system", "enable_router", True)
|
49
|
+
self.crash_started_tasks_on_timeout = self.config.getboolean(
|
50
|
+
"system", "crash_started_tasks_on_timeout", False
|
51
|
+
)
|
48
52
|
|
49
53
|
self.last_gc_trigger = time.time()
|
50
54
|
|
55
|
+
def _log_config(self):
|
56
|
+
self.log.info(
|
57
|
+
"Effective config:\n"
|
58
|
+
" gc_interval:\t%s\n"
|
59
|
+
" task_dispatched_timeout:\t%s\n"
|
60
|
+
" task_started_timeout:\t%s\n"
|
61
|
+
" task_crashed_timeout:\t%s\n"
|
62
|
+
" enable_gc:\t%s\n"
|
63
|
+
" enable_router:\t%s\n"
|
64
|
+
" crash_started_tasks_on_timeout:\t%s",
|
65
|
+
self.gc_interval,
|
66
|
+
self.task_dispatched_timeout,
|
67
|
+
self.task_started_timeout,
|
68
|
+
self.task_crashed_timeout,
|
69
|
+
self.enable_gc,
|
70
|
+
self.enable_router,
|
71
|
+
self.crash_started_tasks_on_timeout,
|
72
|
+
)
|
73
|
+
|
51
74
|
def gc_collect_resources(self) -> None:
|
52
75
|
# Collects unreferenced resources left in object storage
|
53
76
|
karton_bucket = self.backend.default_bucket_name
|
54
|
-
resources_to_remove =
|
77
|
+
resources_to_remove = self.backend.list_object_versions(karton_bucket)
|
55
78
|
# Note: it is important to get list of resources before getting list of tasks!
|
56
79
|
# Task is created before resource upload to lock the reference to the resource.
|
57
80
|
tasks = self.backend.iter_all_tasks()
|
@@ -62,12 +85,13 @@ class SystemService(KartonServiceBase):
|
|
62
85
|
resource.bucket == karton_bucket
|
63
86
|
and resource.uid in resources_to_remove
|
64
87
|
):
|
65
|
-
resources_to_remove
|
88
|
+
del resources_to_remove[resource.uid]
|
66
89
|
# Remove unreferenced resources
|
67
90
|
if resources_to_remove:
|
68
|
-
self.backend.
|
91
|
+
self.backend.remove_object_versions(karton_bucket, resources_to_remove)
|
69
92
|
|
70
93
|
def gc_collect_tasks(self) -> None:
|
94
|
+
self.log.debug("GC: gc_collect_tasks started")
|
71
95
|
# Collects finished tasks
|
72
96
|
root_tasks = set()
|
73
97
|
running_root_tasks = set()
|
@@ -75,6 +99,7 @@ class SystemService(KartonServiceBase):
|
|
75
99
|
|
76
100
|
current_time = time.time()
|
77
101
|
to_delete = []
|
102
|
+
to_crash = []
|
78
103
|
|
79
104
|
queues_to_clear = set()
|
80
105
|
online_consumers = self.backend.get_online_consumers()
|
@@ -116,14 +141,24 @@ class SystemService(KartonServiceBase):
|
|
116
141
|
and task.last_update is not None
|
117
142
|
and current_time > task.last_update + self.task_started_timeout
|
118
143
|
):
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
144
|
+
if self.crash_started_tasks_on_timeout:
|
145
|
+
to_crash.append(task)
|
146
|
+
self.log.error(
|
147
|
+
"Task %s is in Started state more than %d seconds. "
|
148
|
+
"Crashed. (receiver: %s)",
|
149
|
+
task.uid,
|
150
|
+
self.task_started_timeout,
|
151
|
+
task.headers.get("receiver", "<unknown>"),
|
152
|
+
)
|
153
|
+
else:
|
154
|
+
to_delete.append(task)
|
155
|
+
self.log.error(
|
156
|
+
"Task %s is in Started state more than %d seconds. "
|
157
|
+
"Killed. (receiver: %s)",
|
158
|
+
task.uid,
|
159
|
+
self.task_started_timeout,
|
160
|
+
task.headers.get("receiver", "<unknown>"),
|
161
|
+
)
|
127
162
|
elif task.status == TaskState.FINISHED:
|
128
163
|
to_delete.append(task)
|
129
164
|
self.log.debug("GC: Finished task %s", task.uid)
|
@@ -151,11 +186,26 @@ class SystemService(KartonServiceBase):
|
|
151
186
|
self.backend.increment_metrics_list(
|
152
187
|
KartonMetrics.TASK_GARBAGE_COLLECTED, to_increment
|
153
188
|
)
|
189
|
+
if to_crash:
|
190
|
+
to_increment = [
|
191
|
+
task.headers.get("receiver", "unknown") for task in to_crash
|
192
|
+
]
|
193
|
+
for task in to_crash:
|
194
|
+
task.error = [
|
195
|
+
"This task was STARTED too long (TASK_STARTED_TIMEOUT), "
|
196
|
+
"so status was changes to CRASHED."
|
197
|
+
]
|
198
|
+
self.backend.set_task_status(task, TaskState.CRASHED)
|
199
|
+
self.backend.increment_metrics_list(
|
200
|
+
KartonMetrics.TASK_CRASHED, to_increment
|
201
|
+
)
|
154
202
|
|
155
203
|
for finished_root_task in root_tasks.difference(running_root_tasks):
|
156
204
|
# TODO: Notification needed
|
157
205
|
self.log.debug("GC: Finished root task %s", finished_root_task)
|
158
206
|
|
207
|
+
self.log.debug("GC: gc_collect_tasks ended")
|
208
|
+
|
159
209
|
def gc_collect(self) -> None:
|
160
210
|
if time.time() > (self.last_gc_trigger + self.gc_interval):
|
161
211
|
try:
|
@@ -251,6 +301,7 @@ class SystemService(KartonServiceBase):
|
|
251
301
|
self.handle_operations(bodies)
|
252
302
|
|
253
303
|
def loop(self) -> None:
|
304
|
+
self._log_config()
|
254
305
|
self.log.info("Manager %s started", self.identity)
|
255
306
|
|
256
307
|
with self.graceful_killer():
|
@@ -288,7 +339,6 @@ class SystemService(KartonServiceBase):
|
|
288
339
|
parser.add_argument(
|
289
340
|
"--gc-interval",
|
290
341
|
type=int,
|
291
|
-
default=cls.GC_INTERVAL,
|
292
342
|
help="Garbage collection interval",
|
293
343
|
)
|
294
344
|
parser.add_argument(
|
@@ -304,16 +354,24 @@ class SystemService(KartonServiceBase):
|
|
304
354
|
parser.add_argument(
|
305
355
|
"--task-crashed-timeout", help="Timeout for tasks in Crashed state"
|
306
356
|
)
|
357
|
+
parser.add_argument(
|
358
|
+
"--crash-started-task-on-timeout",
|
359
|
+
action="store_const",
|
360
|
+
dest="crash_started_tasks",
|
361
|
+
help="Crash Started tasks on timeout instead of deleting",
|
362
|
+
)
|
307
363
|
return parser
|
308
364
|
|
309
365
|
@classmethod
|
310
366
|
def config_from_args(cls, config: Config, args: argparse.Namespace):
|
311
367
|
super().config_from_args(config, args)
|
368
|
+
|
312
369
|
config.load_from_dict(
|
313
370
|
{
|
314
371
|
"system": {
|
315
372
|
"enable_gc": args.enable_gc,
|
316
373
|
"enable_router": args.enable_router,
|
374
|
+
"crash_started_tasks_on_timeout": args.crash_started_tasks,
|
317
375
|
"gc_interval": args.gc_interval,
|
318
376
|
"task_dispatched_timeout": args.task_dispatched_timeout,
|
319
377
|
"task_started_timeout": args.task_started_timeout,
|
@@ -1,17 +1,15 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: karton-core
|
3
|
-
Version: 5.
|
3
|
+
Version: 5.6.0
|
4
4
|
Summary: Distributed malware analysis orchestration framework
|
5
5
|
Home-page: https://github.com/CERT-Polska/karton
|
6
|
-
License: UNKNOWN
|
7
|
-
Platform: UNKNOWN
|
8
6
|
Classifier: Programming Language :: Python :: 3
|
9
7
|
Classifier: Operating System :: OS Independent
|
10
8
|
Classifier: License :: OSI Approved :: BSD License
|
11
9
|
Requires-Python: >=3.8
|
12
10
|
Description-Content-Type: text/markdown
|
13
11
|
License-File: LICENSE
|
14
|
-
Requires-Dist: boto3
|
12
|
+
Requires-Dist: boto3 <1.36.0
|
15
13
|
Requires-Dist: orjson
|
16
14
|
Requires-Dist: redis
|
17
15
|
|
@@ -141,5 +139,3 @@ RetDec unpacker module for the Karton framework
|
|
141
139
|
Malware similarity platform with modularity in mind.
|
142
140
|
|
143
141
|

|
144
|
-
|
145
|
-
|
@@ -1,27 +1,27 @@
|
|
1
|
-
karton_core-5.
|
1
|
+
karton_core-5.6.0-nspkg.pth,sha256=vHa-jm6pBTeInFrmnsHMg9AOeD88czzQy-6QCFbpRcM,539
|
2
2
|
karton/core/__init__.py,sha256=QuT0BWZyp799eY90tK3H1OD2hwuusqMJq8vQwpB3kG4,337
|
3
|
-
karton/core/__version__.py,sha256=
|
4
|
-
karton/core/backend.py,sha256
|
3
|
+
karton/core/__version__.py,sha256=9eGec3AYz2CNznnfEwKYRiJ65G9gDWtXmb587ljDycg,22
|
4
|
+
karton/core/backend.py,sha256=HlAolngYUTx2ajXhLf5RbWxmPOq5orLfz52iTkNxxqM,38147
|
5
5
|
karton/core/base.py,sha256=C6Lco3E0XCsxvEjeVOLR9fxh_IWJ1vjC9BqUYsQyewE,8083
|
6
6
|
karton/core/config.py,sha256=7oKchitq6pWzPuXRfjBXqVT_BgGIz2p-CDo1RGaNJQg,8118
|
7
7
|
karton/core/exceptions.py,sha256=8i9WVzi4PinNlX10Cb-lQQC35Hl-JB5R_UKXa9AUKoQ,153
|
8
8
|
karton/core/inspect.py,sha256=aIJQEOEkD5q2xLlV8nhxY5qL5zqcnprP-2DdP6ecKlE,6150
|
9
|
-
karton/core/karton.py,sha256=
|
9
|
+
karton/core/karton.py,sha256=Fi3wNqMGiKvHN2BECsqsvfxkiyuwPdlC21jpqQdkeak,15434
|
10
10
|
karton/core/logger.py,sha256=J3XAyG88U0cwYC9zR6E3QD1uJenrQh7zS9-HgxhqeAs,2040
|
11
11
|
karton/core/main.py,sha256=ir1-dhn3vbwfh2YHiM6ZYfRBbjwLvJSz0d8tuK1mb_4,8310
|
12
12
|
karton/core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
-
karton/core/query.py,sha256=
|
13
|
+
karton/core/query.py,sha256=sf24DweVlXfJuBbBD_ns2LXhOV-IBwuPG3jBfTJu77s,12063
|
14
14
|
karton/core/resource.py,sha256=tA3y_38H9HVKIrCeAU70zHUkQUv0BuCQWMC470JLxxc,20321
|
15
15
|
karton/core/task.py,sha256=gW1szMi5PN2Y06X-Ryo7cmEVluZv1r7W5tvmwIJiD94,18808
|
16
16
|
karton/core/test.py,sha256=tms-YM7sUKQDHN0vm2_W7DIvHnO_ld_VPsWHnsbKSfk,9102
|
17
17
|
karton/core/utils.py,sha256=sEVqGdVPyYswWuVn8wYXBQmln8Az826N_2HgC__pmW8,4090
|
18
18
|
karton/system/__init__.py,sha256=JF51OqRU_Y4c0unOulvmv1KzSHSq4ZpXU8ZsH4nefRM,63
|
19
19
|
karton/system/__main__.py,sha256=QJkwIlSwaPRdzwKlNmCAL41HtDAa73db9MZKWmOfxGM,56
|
20
|
-
karton/system/system.py,sha256=
|
21
|
-
karton_core-5.
|
22
|
-
karton_core-5.
|
23
|
-
karton_core-5.
|
24
|
-
karton_core-5.
|
25
|
-
karton_core-5.
|
26
|
-
karton_core-5.
|
27
|
-
karton_core-5.
|
20
|
+
karton/system/system.py,sha256=v2rEJYN2Vq5-hH8yUGFv6r2ZdX-TOQg0ldsEC2T6qO8,16315
|
21
|
+
karton_core-5.6.0.dist-info/LICENSE,sha256=o8h7hYhn7BJC_-DmrfqWwLjaR_Gbe0TZOOQJuN2ca3I,1519
|
22
|
+
karton_core-5.6.0.dist-info/METADATA,sha256=Atu4wfGAqbkw-lVDV_Wtt3fxSG7nV0QmVqjGnfNAFZ4,6818
|
23
|
+
karton_core-5.6.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
24
|
+
karton_core-5.6.0.dist-info/entry_points.txt,sha256=OgLlsXy61GP6-Yob3oXqeJ2hlRU6LBLj33fr0NufKz0,98
|
25
|
+
karton_core-5.6.0.dist-info/namespace_packages.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
|
26
|
+
karton_core-5.6.0.dist-info/top_level.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
|
27
|
+
karton_core-5.6.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|