karton-core 5.3.4__py3-none-any.whl → 5.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- __version__ = "5.3.4"
1
+ __version__ = "5.4.0"
karton/core/backend.py CHANGED
@@ -449,9 +449,7 @@ class KartonBackend:
449
449
  chunk_size,
450
450
  )
451
451
  return [
452
- Task.unserialize(task_data, backend=self)
453
- if parse_resources
454
- else Task.unserialize(task_data, parse_resources=False)
452
+ Task.unserialize(task_data, backend=self, parse_resources=parse_resources)
455
453
  for chunk in keys
456
454
  for task_data in self.redis.mget(chunk)
457
455
  if task_data is not None
@@ -465,9 +463,9 @@ class KartonBackend:
465
463
  ) -> Iterator[Task]:
466
464
  for chunk in chunks_iter(task_keys, chunk_size):
467
465
  yield from (
468
- Task.unserialize(task_data, backend=self)
469
- if parse_resources
470
- else Task.unserialize(task_data, parse_resources=False)
466
+ Task.unserialize(
467
+ task_data, backend=self, parse_resources=parse_resources
468
+ )
471
469
  for task_data in self.redis.mget(chunk)
472
470
  if task_data is not None
473
471
  )
@@ -534,6 +532,58 @@ class KartonBackend:
534
532
  self.iter_all_tasks(chunk_size=chunk_size, parse_resources=parse_resources)
535
533
  )
536
534
 
535
+ def _iter_legacy_task_tree(
536
+ self, root_uid: str, chunk_size: int = 1000, parse_resources: bool = True
537
+ ) -> Iterator[Task]:
538
+ """
539
+ Processes tasks made by <5.4.0 (unrouted from <5.4.0 producers or existing
540
+ before upgrade)
541
+
542
+ Used internally by iter_task_tree.
543
+ """
544
+ # Iterate over all karton tasks that do not match the new task id format
545
+ legacy_task_keys = self.redis.scan_iter(
546
+ match=f"{KARTON_TASK_NAMESPACE}:[^{{]*", count=chunk_size
547
+ )
548
+ for chunk in chunks_iter(legacy_task_keys, chunk_size):
549
+ yield from filter(
550
+ lambda task: task.root_uid == root_uid,
551
+ (
552
+ Task.unserialize(
553
+ task_data, backend=self, parse_resources=parse_resources
554
+ )
555
+ for task_data in self.redis.mget(chunk)
556
+ if task_data is not None
557
+ ),
558
+ )
559
+
560
+ def iter_task_tree(
561
+ self, root_uid: str, chunk_size: int = 1000, parse_resources: bool = True
562
+ ) -> Iterator[Task]:
563
+ """
564
+ Iterates all tasks that belong to the same analysis task tree
565
+ and have the same root_uid
566
+
567
+ :param root_uid: Root identifier of task tree
568
+ :param chunk_size: Size of chunks passed to the Redis SCAN and MGET command
569
+ :param parse_resources: If set to False, resources are not parsed.
570
+ It speeds up deserialization. Read :py:meth:`Task.unserialize` documentation
571
+ to learn more.
572
+ :return: Iterator with task objects
573
+ """
574
+ # Process <5.4.0 tasks (unrouted from <5.4.0 producers
575
+ # or existing before upgrade)
576
+ yield from self._iter_legacy_task_tree(
577
+ root_uid, chunk_size=chunk_size, parse_resources=parse_resources
578
+ )
579
+ # Process >=5.4.0 tasks
580
+ task_keys = self.redis.scan_iter(
581
+ match=f"{KARTON_TASK_NAMESPACE}:{{{root_uid}}}:*", count=chunk_size
582
+ )
583
+ yield from self._iter_tasks(
584
+ task_keys, chunk_size=chunk_size, parse_resources=parse_resources
585
+ )
586
+
537
587
  def register_task(self, task: Task, pipe: Optional[Pipeline] = None) -> None:
538
588
  """
539
589
  Register or update task in Redis.
karton/core/inspect.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from collections import defaultdict
2
- from typing import Dict, List
2
+ from typing import Dict, List, Optional
3
3
 
4
4
  from .backend import KartonBackend, KartonBind
5
5
  from .task import Task, TaskState
@@ -9,9 +9,9 @@ class KartonQueue:
9
9
  """
10
10
  View object representing a Karton queue
11
11
 
12
- :param bind: :py:meth:`KartonBind` object representing the queue bind
12
+ :param bind: :class:`KartonBind` object representing the queue bind
13
13
  :param tasks: List of tasks currently in queue
14
- :param state: :py:meth:`KartonBackend` object to be used
14
+ :param state: :class:`KartonState` object to be used
15
15
  """
16
16
 
17
17
  def __init__(
@@ -48,7 +48,7 @@ class KartonAnalysis:
48
48
 
49
49
  :param root_uid: Analysis root task uid
50
50
  :param tasks: List of tasks
51
- :param state: :py:meth:`KartonBackend` object to be used
51
+ :param state: :class:`KartonState` object to be used
52
52
  """
53
53
 
54
54
  def __init__(self, root_uid: str, tasks: List[Task], state: "KartonState") -> None:
@@ -89,7 +89,7 @@ def get_queues_for_tasks(
89
89
  Group task objects by their queue name
90
90
 
91
91
  :param tasks: Task objects to group
92
- :param state: :py:meth:`KartonBackend` to bind to created queues
92
+ :param state: :class:`KartonState` object to be used
93
93
  :return: A dictionary containing the queue names and lists of tasks
94
94
  """
95
95
  tasks_per_queue = defaultdict(list)
@@ -119,30 +119,68 @@ class KartonState:
119
119
  :param backend: :py:meth:`KartonBackend` object to use for data fetching
120
120
  """
121
121
 
122
- def __init__(self, backend: KartonBackend) -> None:
122
+ def __init__(self, backend: KartonBackend, parse_resources: bool = False) -> None:
123
123
  self.backend = backend
124
124
  self.binds = {bind.identity: bind for bind in backend.get_binds()}
125
125
  self.replicas = backend.get_online_consumers()
126
- self.tasks = backend.get_all_tasks()
127
- self.pending_tasks = [
128
- task for task in self.tasks if task.status != TaskState.FINISHED
129
- ]
130
-
131
- # Tasks grouped by root_uid
132
- tasks_per_analysis = defaultdict(list)
133
-
134
- for task in self.pending_tasks:
135
- tasks_per_analysis[task.root_uid].append(task)
136
-
137
- self.analyses = {
138
- root_uid: KartonAnalysis(root_uid=root_uid, tasks=tasks, state=self)
139
- for root_uid, tasks in tasks_per_analysis.items()
140
- }
141
- queues = get_queues_for_tasks(self.pending_tasks, self)
142
- # Present registered queues without tasks
143
- for bind_name, bind in self.binds.items():
144
- if bind_name not in queues:
145
- queues[bind_name] = KartonQueue(
146
- bind=self.binds[bind_name], tasks=[], state=self
126
+ self.parse_resources = parse_resources
127
+
128
+ self._tasks: Optional[List[Task]] = None
129
+ self._pending_tasks: Optional[List[Task]] = None
130
+ self._analyses: Optional[Dict[str, KartonAnalysis]] = None
131
+ self._queues: Optional[Dict[str, KartonQueue]] = None
132
+
133
+ @property
134
+ def tasks(self) -> List[Task]:
135
+ if self._tasks is None:
136
+ self._tasks = self.backend.get_all_tasks(
137
+ parse_resources=self.parse_resources
138
+ )
139
+ return self._tasks
140
+
141
+ @property
142
+ def pending_tasks(self) -> List[Task]:
143
+ if self._pending_tasks is None:
144
+ self._pending_tasks = [
145
+ task for task in self.tasks if task.status != TaskState.FINISHED
146
+ ]
147
+ return self._pending_tasks
148
+
149
+ @property
150
+ def analyses(self) -> Dict[str, KartonAnalysis]:
151
+ if self._analyses is None:
152
+ # Tasks grouped by root_uid
153
+ tasks_per_analysis = defaultdict(list)
154
+
155
+ for task in self.pending_tasks:
156
+ tasks_per_analysis[task.root_uid].append(task)
157
+
158
+ self._analyses = {
159
+ root_uid: KartonAnalysis(root_uid=root_uid, tasks=tasks, state=self)
160
+ for root_uid, tasks in tasks_per_analysis.items()
161
+ }
162
+ return self._analyses
163
+
164
+ @property
165
+ def queues(self) -> Dict[str, KartonQueue]:
166
+ if self._queues is None:
167
+ queues = get_queues_for_tasks(self.pending_tasks, self)
168
+ # Present registered queues without tasks
169
+ for bind_name, bind in self.binds.items():
170
+ if bind_name not in queues:
171
+ queues[bind_name] = KartonQueue(
172
+ bind=self.binds[bind_name], tasks=[], state=self
173
+ )
174
+ self._queues = queues
175
+ return self._queues
176
+
177
+ def get_analysis(self, root_uid: str) -> KartonAnalysis:
178
+ return KartonAnalysis(
179
+ root_uid=root_uid,
180
+ tasks=list(
181
+ self.backend.iter_task_tree(
182
+ root_uid, parse_resources=self.parse_resources
147
183
  )
148
- self.queues = queues
184
+ ),
185
+ state=self,
186
+ )
karton/core/task.py CHANGED
@@ -106,13 +106,18 @@ class Task(object):
106
106
  raise ValueError("Persistent headers should be an instance of a dict")
107
107
 
108
108
  if uid is None:
109
- self.uid = str(uuid.uuid4())
109
+ task_uid = str(uuid.uuid4())
110
+ if root_uid is None:
111
+ self.root_uid = task_uid
112
+ else:
113
+ self.root_uid = root_uid
114
+ # New-style UID format introduced in v5.4.0
115
+ # {12345678-1234-1234-1234-12345678abcd}:12345678-1234-1234-1234-12345678abcd
116
+ self.uid = f"{{{self.root_uid}}}:{task_uid}"
110
117
  else:
111
118
  self.uid = uid
112
-
113
- if root_uid is None:
114
- self.root_uid = self.uid
115
- else:
119
+ if root_uid is None:
120
+ raise ValueError("root_uid cannot be None when uid is not None")
116
121
  self.root_uid = root_uid
117
122
 
118
123
  self.orig_uid = orig_uid
@@ -137,6 +142,21 @@ class Task(object):
137
142
  def receiver(self) -> Optional[str]:
138
143
  return self.headers.get("receiver")
139
144
 
145
+ @property
146
+ def task_uid(self) -> str:
147
+ return self.fquid_to_uid(self.uid)
148
+
149
+ @staticmethod
150
+ def fquid_to_uid(fquid: str) -> str:
151
+ """
152
+ Gets task uid from fully-qualified fquid ({root_uid}:task_uid)
153
+
154
+ :return: Task uid
155
+ """
156
+ if ":" not in fquid:
157
+ return fquid
158
+ return fquid.split(":")[-1]
159
+
140
160
  def fork_task(self) -> "Task":
141
161
  """
142
162
  Fork task to transfer single task to many queues (but use different UID).
@@ -434,9 +454,7 @@ class Task(object):
434
454
  process. This flag is used mainly for multiple task processing e.g.
435
455
  filtering based on status.
436
456
  When resource deserialization is turned off, Task.unserialize will try
437
- to use faster 3rd-party JSON parser (orjson) if it's installed. It's not
438
- added as a required dependency but can speed up things if you need to check
439
- status of multiple tasks at once.
457
+ to use faster 3rd-party JSON parser (orjson).
440
458
  :return: Unserialized Task object
441
459
 
442
460
  :meta private:
@@ -457,11 +475,7 @@ class Task(object):
457
475
  if parse_resources:
458
476
  task_data = json.loads(data, object_hook=unserialize_resources)
459
477
  else:
460
- try:
461
- task_data = orjson.loads(data)
462
- except orjson.JSONDecodeError:
463
- # fallback, in case orjson raises exception during loading
464
- task_data = json.loads(data, object_hook=unserialize_resources)
478
+ task_data = orjson.loads(data)
465
479
 
466
480
  # Compatibility with Karton <5.2.0
467
481
  headers_persistent_fallback = task_data["payload_persistent"].get(
karton/system/system.py CHANGED
@@ -166,7 +166,7 @@ class SystemService(KartonServiceBase):
166
166
 
167
167
  def route_task(self, task: Task, binds: List[KartonBind]) -> None:
168
168
  # Performs routing of task
169
- self.log.info("[%s] Processing task %s", task.root_uid, task.uid)
169
+ self.log.info("[%s] Processing task %s", task.root_uid, task.task_uid)
170
170
  # store the producer-task relationship in redis for task tracking
171
171
  self.backend.log_identity_output(
172
172
  task.headers.get("origin", "unknown"), task.headers
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: karton-core
3
- Version: 5.3.4
3
+ Version: 5.4.0
4
4
  Summary: Distributed malware analysis orchestration framework
5
5
  Home-page: https://github.com/CERT-Polska/karton
6
6
  License: UNKNOWN
@@ -1,26 +1,26 @@
1
- karton_core-5.3.4-nspkg.pth,sha256=vHa-jm6pBTeInFrmnsHMg9AOeD88czzQy-6QCFbpRcM,539
1
+ karton_core-5.4.0-nspkg.pth,sha256=vHa-jm6pBTeInFrmnsHMg9AOeD88czzQy-6QCFbpRcM,539
2
2
  karton/core/__init__.py,sha256=QuT0BWZyp799eY90tK3H1OD2hwuusqMJq8vQwpB3kG4,337
3
- karton/core/__version__.py,sha256=dAiy67tSM_yYFR8Us_fQT-TDbfV34VpASYNKXfGmEnQ,22
4
- karton/core/backend.py,sha256=evOzlz1v1sxWusc8VojGAYyeyi9fcbVoEPm6WoNT1Xs,34696
3
+ karton/core/__version__.py,sha256=xjYaBGUFGg0kGZj_WhuoFyPD8NILPsr79SaMwmYQGSg,22
4
+ karton/core/backend.py,sha256=-sQG7utnaWLJOEcafeSwEDLnkflPqtSCwg_mn_nnFhg,36727
5
5
  karton/core/base.py,sha256=C6Lco3E0XCsxvEjeVOLR9fxh_IWJ1vjC9BqUYsQyewE,8083
6
6
  karton/core/config.py,sha256=7oKchitq6pWzPuXRfjBXqVT_BgGIz2p-CDo1RGaNJQg,8118
7
7
  karton/core/exceptions.py,sha256=8i9WVzi4PinNlX10Cb-lQQC35Hl-JB5R_UKXa9AUKoQ,153
8
- karton/core/inspect.py,sha256=rIa0u4u12vG_RudPfc9UAS4RZD56W8qbUa8n1dDIkX0,4868
8
+ karton/core/inspect.py,sha256=aIJQEOEkD5q2xLlV8nhxY5qL5zqcnprP-2DdP6ecKlE,6150
9
9
  karton/core/karton.py,sha256=9SOAviG42kSsPqc3EuaHzWtA_KywMtc01hmU6FaJpHo,15007
10
10
  karton/core/logger.py,sha256=J3XAyG88U0cwYC9zR6E3QD1uJenrQh7zS9-HgxhqeAs,2040
11
11
  karton/core/main.py,sha256=ir1-dhn3vbwfh2YHiM6ZYfRBbjwLvJSz0d8tuK1mb_4,8310
12
12
  karton/core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  karton/core/resource.py,sha256=tA3y_38H9HVKIrCeAU70zHUkQUv0BuCQWMC470JLxxc,20321
14
- karton/core/task.py,sha256=WYXzVopg8VlWOc7ncEscHVKivsXHfZc5zWLHW_mxBwY,21000
14
+ karton/core/task.py,sha256=diwg8uUl57NEYNRjT1l5CPiNw3EQcU11BnrLul33fx0,21350
15
15
  karton/core/test.py,sha256=tms-YM7sUKQDHN0vm2_W7DIvHnO_ld_VPsWHnsbKSfk,9102
16
16
  karton/core/utils.py,sha256=sEVqGdVPyYswWuVn8wYXBQmln8Az826N_2HgC__pmW8,4090
17
17
  karton/system/__init__.py,sha256=JF51OqRU_Y4c0unOulvmv1KzSHSq4ZpXU8ZsH4nefRM,63
18
18
  karton/system/__main__.py,sha256=QJkwIlSwaPRdzwKlNmCAL41HtDAa73db9MZKWmOfxGM,56
19
- karton/system/system.py,sha256=65c8j1Ayra_CrXA1AQPBm00bTnpQiW91iZlTTRfJJI8,13787
20
- karton_core-5.3.4.dist-info/LICENSE,sha256=o8h7hYhn7BJC_-DmrfqWwLjaR_Gbe0TZOOQJuN2ca3I,1519
21
- karton_core-5.3.4.dist-info/METADATA,sha256=w15YiCEMDZrAF_429FAx98NH3tif38vyDxI9g9heY7E,6847
22
- karton_core-5.3.4.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
23
- karton_core-5.3.4.dist-info/entry_points.txt,sha256=FJj5EZuvFP0LkagjX_dLbRGBUnuLjgBiSyiFfq4c86U,99
24
- karton_core-5.3.4.dist-info/namespace_packages.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
25
- karton_core-5.3.4.dist-info/top_level.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
26
- karton_core-5.3.4.dist-info/RECORD,,
19
+ karton/system/system.py,sha256=yF_d71a8w7JYA7IXUt63d5_QBH6x1QplB-xcrzQTXL4,13792
20
+ karton_core-5.4.0.dist-info/LICENSE,sha256=o8h7hYhn7BJC_-DmrfqWwLjaR_Gbe0TZOOQJuN2ca3I,1519
21
+ karton_core-5.4.0.dist-info/METADATA,sha256=kopeYFCI9EoFQbc7J7woZWjI_5egy29-lYUW7UzEQ2I,6847
22
+ karton_core-5.4.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
23
+ karton_core-5.4.0.dist-info/entry_points.txt,sha256=FJj5EZuvFP0LkagjX_dLbRGBUnuLjgBiSyiFfq4c86U,99
24
+ karton_core-5.4.0.dist-info/namespace_packages.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
25
+ karton_core-5.4.0.dist-info/top_level.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
26
+ karton_core-5.4.0.dist-info/RECORD,,