karton-core 5.3.4__py3-none-any.whl → 5.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- __version__ = "5.3.4"
1
+ __version__ = "5.5.0"
karton/core/backend.py CHANGED
@@ -449,9 +449,7 @@ class KartonBackend:
449
449
  chunk_size,
450
450
  )
451
451
  return [
452
- Task.unserialize(task_data, backend=self)
453
- if parse_resources
454
- else Task.unserialize(task_data, parse_resources=False)
452
+ Task.unserialize(task_data, backend=self, parse_resources=parse_resources)
455
453
  for chunk in keys
456
454
  for task_data in self.redis.mget(chunk)
457
455
  if task_data is not None
@@ -465,9 +463,9 @@ class KartonBackend:
465
463
  ) -> Iterator[Task]:
466
464
  for chunk in chunks_iter(task_keys, chunk_size):
467
465
  yield from (
468
- Task.unserialize(task_data, backend=self)
469
- if parse_resources
470
- else Task.unserialize(task_data, parse_resources=False)
466
+ Task.unserialize(
467
+ task_data, backend=self, parse_resources=parse_resources
468
+ )
471
469
  for task_data in self.redis.mget(chunk)
472
470
  if task_data is not None
473
471
  )
@@ -534,6 +532,58 @@ class KartonBackend:
534
532
  self.iter_all_tasks(chunk_size=chunk_size, parse_resources=parse_resources)
535
533
  )
536
534
 
535
+ def _iter_legacy_task_tree(
536
+ self, root_uid: str, chunk_size: int = 1000, parse_resources: bool = True
537
+ ) -> Iterator[Task]:
538
+ """
539
+ Processes tasks made by <5.4.0 (unrouted from <5.4.0 producers or existing
540
+ before upgrade)
541
+
542
+ Used internally by iter_task_tree.
543
+ """
544
+ # Iterate over all karton tasks that do not match the new task id format
545
+ legacy_task_keys = self.redis.scan_iter(
546
+ match=f"{KARTON_TASK_NAMESPACE}:[^{{]*", count=chunk_size
547
+ )
548
+ for chunk in chunks_iter(legacy_task_keys, chunk_size):
549
+ yield from filter(
550
+ lambda task: task.root_uid == root_uid,
551
+ (
552
+ Task.unserialize(
553
+ task_data, backend=self, parse_resources=parse_resources
554
+ )
555
+ for task_data in self.redis.mget(chunk)
556
+ if task_data is not None
557
+ ),
558
+ )
559
+
560
+ def iter_task_tree(
561
+ self, root_uid: str, chunk_size: int = 1000, parse_resources: bool = True
562
+ ) -> Iterator[Task]:
563
+ """
564
+ Iterates all tasks that belong to the same analysis task tree
565
+ and have the same root_uid
566
+
567
+ :param root_uid: Root identifier of task tree
568
+ :param chunk_size: Size of chunks passed to the Redis SCAN and MGET command
569
+ :param parse_resources: If set to False, resources are not parsed.
570
+ It speeds up deserialization. Read :py:meth:`Task.unserialize` documentation
571
+ to learn more.
572
+ :return: Iterator with task objects
573
+ """
574
+ # Process <5.4.0 tasks (unrouted from <5.4.0 producers
575
+ # or existing before upgrade)
576
+ yield from self._iter_legacy_task_tree(
577
+ root_uid, chunk_size=chunk_size, parse_resources=parse_resources
578
+ )
579
+ # Process >=5.4.0 tasks
580
+ task_keys = self.redis.scan_iter(
581
+ match=f"{KARTON_TASK_NAMESPACE}:{{{root_uid}}}:*", count=chunk_size
582
+ )
583
+ yield from self._iter_tasks(
584
+ task_keys, chunk_size=chunk_size, parse_resources=parse_resources
585
+ )
586
+
537
587
  def register_task(self, task: Task, pipe: Optional[Pipeline] = None) -> None:
538
588
  """
539
589
  Register or update task in Redis.
karton/core/inspect.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from collections import defaultdict
2
- from typing import Dict, List
2
+ from typing import Dict, List, Optional
3
3
 
4
4
  from .backend import KartonBackend, KartonBind
5
5
  from .task import Task, TaskState
@@ -9,9 +9,9 @@ class KartonQueue:
9
9
  """
10
10
  View object representing a Karton queue
11
11
 
12
- :param bind: :py:meth:`KartonBind` object representing the queue bind
12
+ :param bind: :class:`KartonBind` object representing the queue bind
13
13
  :param tasks: List of tasks currently in queue
14
- :param state: :py:meth:`KartonBackend` object to be used
14
+ :param state: :class:`KartonState` object to be used
15
15
  """
16
16
 
17
17
  def __init__(
@@ -48,7 +48,7 @@ class KartonAnalysis:
48
48
 
49
49
  :param root_uid: Analysis root task uid
50
50
  :param tasks: List of tasks
51
- :param state: :py:meth:`KartonBackend` object to be used
51
+ :param state: :class:`KartonState` object to be used
52
52
  """
53
53
 
54
54
  def __init__(self, root_uid: str, tasks: List[Task], state: "KartonState") -> None:
@@ -89,7 +89,7 @@ def get_queues_for_tasks(
89
89
  Group task objects by their queue name
90
90
 
91
91
  :param tasks: Task objects to group
92
- :param state: :py:meth:`KartonBackend` to bind to created queues
92
+ :param state: :class:`KartonState` object to be used
93
93
  :return: A dictionary containing the queue names and lists of tasks
94
94
  """
95
95
  tasks_per_queue = defaultdict(list)
@@ -119,30 +119,68 @@ class KartonState:
119
119
  :param backend: :py:meth:`KartonBackend` object to use for data fetching
120
120
  """
121
121
 
122
- def __init__(self, backend: KartonBackend) -> None:
122
+ def __init__(self, backend: KartonBackend, parse_resources: bool = False) -> None:
123
123
  self.backend = backend
124
124
  self.binds = {bind.identity: bind for bind in backend.get_binds()}
125
125
  self.replicas = backend.get_online_consumers()
126
- self.tasks = backend.get_all_tasks()
127
- self.pending_tasks = [
128
- task for task in self.tasks if task.status != TaskState.FINISHED
129
- ]
130
-
131
- # Tasks grouped by root_uid
132
- tasks_per_analysis = defaultdict(list)
133
-
134
- for task in self.pending_tasks:
135
- tasks_per_analysis[task.root_uid].append(task)
136
-
137
- self.analyses = {
138
- root_uid: KartonAnalysis(root_uid=root_uid, tasks=tasks, state=self)
139
- for root_uid, tasks in tasks_per_analysis.items()
140
- }
141
- queues = get_queues_for_tasks(self.pending_tasks, self)
142
- # Present registered queues without tasks
143
- for bind_name, bind in self.binds.items():
144
- if bind_name not in queues:
145
- queues[bind_name] = KartonQueue(
146
- bind=self.binds[bind_name], tasks=[], state=self
126
+ self.parse_resources = parse_resources
127
+
128
+ self._tasks: Optional[List[Task]] = None
129
+ self._pending_tasks: Optional[List[Task]] = None
130
+ self._analyses: Optional[Dict[str, KartonAnalysis]] = None
131
+ self._queues: Optional[Dict[str, KartonQueue]] = None
132
+
133
+ @property
134
+ def tasks(self) -> List[Task]:
135
+ if self._tasks is None:
136
+ self._tasks = self.backend.get_all_tasks(
137
+ parse_resources=self.parse_resources
138
+ )
139
+ return self._tasks
140
+
141
+ @property
142
+ def pending_tasks(self) -> List[Task]:
143
+ if self._pending_tasks is None:
144
+ self._pending_tasks = [
145
+ task for task in self.tasks if task.status != TaskState.FINISHED
146
+ ]
147
+ return self._pending_tasks
148
+
149
+ @property
150
+ def analyses(self) -> Dict[str, KartonAnalysis]:
151
+ if self._analyses is None:
152
+ # Tasks grouped by root_uid
153
+ tasks_per_analysis = defaultdict(list)
154
+
155
+ for task in self.pending_tasks:
156
+ tasks_per_analysis[task.root_uid].append(task)
157
+
158
+ self._analyses = {
159
+ root_uid: KartonAnalysis(root_uid=root_uid, tasks=tasks, state=self)
160
+ for root_uid, tasks in tasks_per_analysis.items()
161
+ }
162
+ return self._analyses
163
+
164
+ @property
165
+ def queues(self) -> Dict[str, KartonQueue]:
166
+ if self._queues is None:
167
+ queues = get_queues_for_tasks(self.pending_tasks, self)
168
+ # Present registered queues without tasks
169
+ for bind_name, bind in self.binds.items():
170
+ if bind_name not in queues:
171
+ queues[bind_name] = KartonQueue(
172
+ bind=self.binds[bind_name], tasks=[], state=self
173
+ )
174
+ self._queues = queues
175
+ return self._queues
176
+
177
+ def get_analysis(self, root_uid: str) -> KartonAnalysis:
178
+ return KartonAnalysis(
179
+ root_uid=root_uid,
180
+ tasks=list(
181
+ self.backend.iter_task_tree(
182
+ root_uid, parse_resources=self.parse_resources
147
183
  )
148
- self.queues = queues
184
+ ),
185
+ state=self,
186
+ )
karton/core/karton.py CHANGED
@@ -8,6 +8,7 @@ import time
8
8
  import traceback
9
9
  from typing import Any, Callable, Dict, List, Optional, Tuple, cast
10
10
 
11
+ from . import query
11
12
  from .__version__ import __version__
12
13
  from .backend import KartonBackend, KartonBind, KartonMetrics
13
14
  from .base import KartonBase, KartonServiceBase
@@ -122,6 +123,9 @@ class Consumer(KartonServiceBase):
122
123
  if self.filters is None:
123
124
  raise ValueError("Cannot bind consumer on Empty binds")
124
125
 
126
+ # Dummy conversion to make sure the filters are well-formed.
127
+ query.convert(self.filters)
128
+
125
129
  self.persistent = (
126
130
  self.config.getboolean("karton", "persistent", self.persistent)
127
131
  and not self.debug
karton/core/query.py ADDED
@@ -0,0 +1,350 @@
1
+ import fnmatch
2
+ import re
3
+ from collections.abc import Mapping, Sequence
4
+ from typing import Dict, Type
5
+
6
+ # Source code adopted from https://github.com/kapouille/mongoquery
7
+ # Original licenced under "The Unlicense" license.
8
+
9
+
10
+ class QueryError(Exception):
11
+ """Query error exception"""
12
+
13
+ pass
14
+
15
+
16
+ class _Undefined(object):
17
+ pass
18
+
19
+
20
+ def is_non_string_sequence(entry):
21
+ """Returns True if entry is a Python sequence iterable, and not a string"""
22
+ return isinstance(entry, Sequence) and not isinstance(entry, str)
23
+
24
+
25
+ class Query(object):
26
+ """The Query class is used to match an object against a MongoDB-like query"""
27
+
28
+ def __init__(self, definition):
29
+ self._definition = definition
30
+
31
+ def match(self, entry):
32
+ """Matches the entry object against the query specified on instanciation"""
33
+ return self._match(self._definition, entry)
34
+
35
+ def _match(self, condition, entry):
36
+ if isinstance(condition, Mapping):
37
+ return all(
38
+ self._process_condition(sub_operator, sub_condition, entry)
39
+ for sub_operator, sub_condition in condition.items()
40
+ )
41
+ if is_non_string_sequence(entry):
42
+ return condition in entry
43
+ return condition == entry
44
+
45
+ def _extract(self, entry, path):
46
+ if not path:
47
+ return entry
48
+ if entry is None:
49
+ return entry
50
+ if is_non_string_sequence(entry):
51
+ try:
52
+ index = int(path[0])
53
+ return self._extract(entry[index], path[1:])
54
+ except ValueError:
55
+ return [self._extract(item, path) for item in entry]
56
+ elif isinstance(entry, Mapping) and path[0] in entry:
57
+ return self._extract(entry[path[0]], path[1:])
58
+ else:
59
+ return _Undefined()
60
+
61
+ def _path_exists(self, operator, condition, entry):
62
+ keys_list = list(operator.split("."))
63
+ for i, k in enumerate(keys_list):
64
+ if isinstance(entry, Sequence) and not k.isdigit():
65
+ for elem in entry:
66
+ operator = ".".join(keys_list[i:])
67
+ if self._path_exists(operator, condition, elem) == condition:
68
+ return condition
69
+ return not condition
70
+ elif isinstance(entry, Sequence):
71
+ k = int(k)
72
+ try:
73
+ entry = entry[k]
74
+ except (TypeError, IndexError, KeyError):
75
+ return not condition
76
+ return condition
77
+
78
+ def _process_condition(self, operator, condition, entry):
79
+ if isinstance(condition, Mapping) and "$exists" in condition:
80
+ if isinstance(operator, str) and operator.find(".") != -1:
81
+ return self._path_exists(operator, condition["$exists"], entry)
82
+ elif condition["$exists"] != (operator in entry):
83
+ return False
84
+ elif tuple(condition.keys()) == ("$exists",):
85
+ return True
86
+ if isinstance(operator, str):
87
+ if operator.startswith("$"):
88
+ try:
89
+ return getattr(self, "_" + operator[1:])(condition, entry)
90
+ except AttributeError:
91
+ raise QueryError(f"{operator} operator isn't supported")
92
+ else:
93
+ try:
94
+ extracted_data = self._extract(entry, operator.split("."))
95
+ except IndexError:
96
+ extracted_data = _Undefined()
97
+ else:
98
+ if operator not in entry:
99
+ return False
100
+ extracted_data = entry[operator]
101
+ return self._match(condition, extracted_data)
102
+
103
+ @staticmethod
104
+ def _not_implemented(*_):
105
+ raise NotImplementedError
106
+
107
+ @staticmethod
108
+ def _noop(*_):
109
+ return True
110
+
111
+ @staticmethod
112
+ def _eq(condition, entry):
113
+ try:
114
+ return entry == condition
115
+ except TypeError:
116
+ return False
117
+
118
+ @staticmethod
119
+ def _gt(condition, entry):
120
+ try:
121
+ return entry > condition
122
+ except TypeError:
123
+ return False
124
+
125
+ @staticmethod
126
+ def _gte(condition, entry):
127
+ try:
128
+ return entry >= condition
129
+ except TypeError:
130
+ return False
131
+
132
+ @staticmethod
133
+ def _in(condition, entry):
134
+ if is_non_string_sequence(condition):
135
+ for elem in condition:
136
+ if is_non_string_sequence(entry) and elem in entry:
137
+ return True
138
+ elif not is_non_string_sequence(entry) and elem == entry:
139
+ return True
140
+ return False
141
+ else:
142
+ raise TypeError("condition must be a list")
143
+
144
+ @staticmethod
145
+ def _lt(condition, entry):
146
+ try:
147
+ return entry < condition
148
+ except TypeError:
149
+ return False
150
+
151
+ @staticmethod
152
+ def _lte(condition, entry):
153
+ try:
154
+ return entry <= condition
155
+ except TypeError:
156
+ return False
157
+
158
+ @staticmethod
159
+ def _ne(condition, entry):
160
+ return entry != condition
161
+
162
+ def _nin(self, condition, entry):
163
+ return not self._in(condition, entry)
164
+
165
+ def _and(self, condition, entry):
166
+ if isinstance(condition, Sequence):
167
+ return all(self._match(sub_condition, entry) for sub_condition in condition)
168
+ raise QueryError(f"$and has been attributed incorrect argument {condition}")
169
+
170
+ def _nor(self, condition, entry):
171
+ if isinstance(condition, Sequence):
172
+ return all(
173
+ not self._match(sub_condition, entry) for sub_condition in condition
174
+ )
175
+ raise QueryError(f"$nor has been attributed incorrect argument {condition}")
176
+
177
+ def _not(self, condition, entry):
178
+ return not self._match(condition, entry)
179
+
180
+ def _or(self, condition, entry):
181
+ if isinstance(condition, Sequence):
182
+ return any(self._match(sub_condition, entry) for sub_condition in condition)
183
+ raise QueryError(f"$or has been attributed incorrect argument {condition}")
184
+
185
+ @staticmethod
186
+ def _type(condition, entry):
187
+ bson_type: Dict[int, Type] = {
188
+ 1: float,
189
+ 2: str,
190
+ 3: Mapping,
191
+ 4: Sequence,
192
+ 5: bytearray,
193
+ 7: str, # object id (uuid)
194
+ 8: bool,
195
+ 9: str, # date (UTC datetime)
196
+ 10: type(None),
197
+ 11: re.Pattern, # regex,
198
+ 13: str, # Javascript
199
+ 15: str, # JavaScript (with scope)
200
+ 16: int, # 32-bit integer
201
+ 17: int, # Timestamp
202
+ 18: int, # 64-bit integer
203
+ }
204
+ bson_alias = {
205
+ "double": 1,
206
+ "string": 2,
207
+ "object": 3,
208
+ "array": 4,
209
+ "binData": 5,
210
+ "objectId": 7,
211
+ "bool": 8,
212
+ "date": 9,
213
+ "null": 10,
214
+ "regex": 11,
215
+ "javascript": 13,
216
+ "javascriptWithScope": 15,
217
+ "int": 16,
218
+ "timestamp": 17,
219
+ "long": 18,
220
+ }
221
+
222
+ if condition == "number":
223
+ return any(
224
+ [
225
+ isinstance(entry, bson_type[bson_alias[alias]])
226
+ for alias in ["double", "int", "long"]
227
+ ]
228
+ )
229
+
230
+ # resolves bson alias, or keeps original condition value
231
+ condition = bson_alias.get(condition, condition)
232
+
233
+ if condition not in bson_type:
234
+ raise QueryError(f"$type has been used with unknown type {condition}")
235
+
236
+ return isinstance(entry, bson_type[condition])
237
+
238
+ _exists = _noop
239
+
240
+ @staticmethod
241
+ def _mod(condition, entry):
242
+ return entry % condition[0] == condition[1]
243
+
244
+ @staticmethod
245
+ def _regex(condition, entry):
246
+ if not isinstance(entry, str):
247
+ return False
248
+ # If the caller has supplied a compiled regex, assume options are already
249
+ # included.
250
+ if isinstance(condition, re.Pattern):
251
+ return bool(re.search(condition, entry))
252
+
253
+ try:
254
+ regex = re.match(r"\A/(.+)/([imsx]{,4})\Z", condition, flags=re.DOTALL)
255
+ except TypeError:
256
+ raise QueryError(
257
+ f"{condition} is not a regular expression and should be a string"
258
+ )
259
+
260
+ flags = 0
261
+ if regex:
262
+ options = regex.group(2)
263
+ for option in options:
264
+ flags |= getattr(re, option.upper())
265
+ exp = regex.group(1)
266
+ else:
267
+ exp = condition
268
+
269
+ try:
270
+ match = re.search(exp, entry, flags=flags)
271
+ except Exception as error:
272
+ raise QueryError(f"{condition} failed to execute with error {error!r}")
273
+ return bool(match)
274
+
275
+ _options = _text = _where = _not_implemented
276
+
277
+ def _all(self, condition, entry):
278
+ return all(self._match(item, entry) for item in condition)
279
+
280
+ def _elemMatch(self, condition, entry):
281
+ if not isinstance(entry, Sequence):
282
+ return False
283
+ return any(
284
+ all(
285
+ self._process_condition(sub_operator, sub_condition, element)
286
+ for sub_operator, sub_condition in condition.items()
287
+ )
288
+ for element in entry
289
+ )
290
+
291
+ @staticmethod
292
+ def _size(condition, entry):
293
+ if not isinstance(condition, int):
294
+ raise QueryError(
295
+ f"$size has been attributed incorrect argument {condition}"
296
+ )
297
+
298
+ if is_non_string_sequence(entry):
299
+ return len(entry) == condition
300
+
301
+ return False
302
+
303
+ def __repr__(self):
304
+ return f"<Query({self._definition})>"
305
+
306
+
307
+ def toregex(wildcard):
308
+ if not isinstance(wildcard, str):
309
+ raise QueryError(f"Unexpected value in the regex conversion: {wildcard}")
310
+ # If is not neessary, but we avoid unnecessary regular expressions.
311
+ if any(c in wildcard for c in "?*[]!"):
312
+ return {"$regex": fnmatch.translate(wildcard)}
313
+ return wildcard
314
+
315
+
316
+ def convert(filters):
317
+ """Convert filters to the mongo query syntax.
318
+ A special care is taken to handle old-style negative filters correctly
319
+ """
320
+ # Negative_filters are old-style negative assertions, and behave differently.
321
+ # See issue #246 for the original bug report.
322
+ #
323
+ # For a short example:
324
+ # [{"platform": "!win32"}, {"platform": "!linux"}]
325
+ # will match all non-linux non-windows samples, but:
326
+ # [{"platform": {"$not": "win32"}}, {"platform": {"$not": "linux"}}]
327
+ # means `platform != "win32" or "platform != "linux"` and will match everything.
328
+ # To get equivalent behaviour with mongo syntax, you should use:
329
+ # [{"platform": {"$not": {"$or": ["win32", "linux"]}}}]
330
+ regular_filter, negative_filter = [], []
331
+ for rule in filters:
332
+ positive_checks, negative_checks = [], []
333
+ for key, value in rule.items():
334
+ if isinstance(value, str):
335
+ if value and value[0] == "!": # negative check
336
+ negative_checks.append({key: toregex(value[1:])})
337
+ else:
338
+ positive_checks.append({key: toregex(value)})
339
+ else:
340
+ positive_checks.append({key: value})
341
+ regular_filter.append({"$and": positive_checks})
342
+ negative_filter.append({"$and": positive_checks + [{"$or": negative_checks}]})
343
+ return Query(
344
+ {
345
+ "$and": [
346
+ {"$not": {"$or": negative_filter}},
347
+ {"$or": regular_filter},
348
+ ]
349
+ }
350
+ )
karton/core/task.py CHANGED
@@ -1,5 +1,4 @@
1
1
  import enum
2
- import fnmatch
3
2
  import json
4
3
  import time
5
4
  import uuid
@@ -16,6 +15,7 @@ from typing import (
16
15
  Union,
17
16
  )
18
17
 
18
+ from . import query
19
19
  from .resource import RemoteResource, ResourceBase
20
20
  from .utils import recursive_iter, recursive_iter_with_keys, recursive_map
21
21
 
@@ -106,13 +106,18 @@ class Task(object):
106
106
  raise ValueError("Persistent headers should be an instance of a dict")
107
107
 
108
108
  if uid is None:
109
- self.uid = str(uuid.uuid4())
109
+ task_uid = str(uuid.uuid4())
110
+ if root_uid is None:
111
+ self.root_uid = task_uid
112
+ else:
113
+ self.root_uid = root_uid
114
+ # New-style UID format introduced in v5.4.0
115
+ # {12345678-1234-1234-1234-12345678abcd}:12345678-1234-1234-1234-12345678abcd
116
+ self.uid = f"{{{self.root_uid}}}:{task_uid}"
110
117
  else:
111
118
  self.uid = uid
112
-
113
- if root_uid is None:
114
- self.root_uid = self.uid
115
- else:
119
+ if root_uid is None:
120
+ raise ValueError("root_uid cannot be None when uid is not None")
116
121
  self.root_uid = root_uid
117
122
 
118
123
  self.orig_uid = orig_uid
@@ -137,6 +142,21 @@ class Task(object):
137
142
  def receiver(self) -> Optional[str]:
138
143
  return self.headers.get("receiver")
139
144
 
145
+ @property
146
+ def task_uid(self) -> str:
147
+ return self.fquid_to_uid(self.uid)
148
+
149
+ @staticmethod
150
+ def fquid_to_uid(fquid: str) -> str:
151
+ """
152
+ Gets task uid from fully-qualified fquid ({root_uid}:task_uid)
153
+
154
+ :return: Task uid
155
+ """
156
+ if ":" not in fquid:
157
+ return fquid
158
+ return fquid.split(":")[-1]
159
+
140
160
  def fork_task(self) -> "Task":
141
161
  """
142
162
  Fork task to transfer single task to many queues (but use different UID).
@@ -203,75 +223,8 @@ class Task(object):
203
223
  return new_task
204
224
 
205
225
  def matches_filters(self, filters: List[Dict[str, Any]]) -> bool:
206
- """
207
- Checks whether provided task headers match filters
208
-
209
- :param filters: Task header filters
210
- :return: True if task headers match specific filters
211
-
212
- :meta private:
213
- """
214
-
215
- def test_filter(headers: Dict[str, Any], filter: Dict[str, Any]) -> int:
216
- """
217
- Filter match follows AND logic, but it's non-boolean because filters may be
218
- negated (task:!platform).
219
-
220
- Result values are as follows:
221
- - 1 - positive match, no mismatched values in headers
222
- (all matched)
223
- - 0 - no match, found value that doesn't match to the filter
224
- (some are not matched)
225
- - -1 - negative match, found value that matches negated filter value
226
- (all matched but found negative matches)
227
- """
228
- matches = 1
229
- for filter_key, filter_value in filter.items():
230
- # Coerce filter value to string
231
- filter_value_str = str(filter_value)
232
- negated = False
233
- if filter_value_str.startswith("!"):
234
- negated = True
235
- filter_value_str = filter_value_str[1:]
236
-
237
- # If expected key doesn't exist in headers
238
- if filter_key not in headers:
239
- # Negated filter ignores non-existent values
240
- if negated:
241
- continue
242
- # But positive filter doesn't
243
- return 0
244
-
245
- # Coerce header value to string
246
- header_value_str = str(headers[filter_key])
247
- # fnmatch is great for handling simple wildcard patterns (?, *, [abc])
248
- match = fnmatch.fnmatchcase(header_value_str, filter_value_str)
249
- # If matches, but it's negated: it's negative match
250
- if match and negated:
251
- matches = -1
252
- # If doesn't match but filter is not negated: it's not a match
253
- if not match and not negated:
254
- return 0
255
- # If there are no mismatched values: filter is matched
256
- return matches
257
-
258
- # List of filter matches follow OR logic, but -1 is special
259
- # If there is any -1, result is False
260
- # (any matched, but it's negative match)
261
- # If there is any 1, but no -1's: result is True
262
- # (any matched, no negative match)
263
- # If there are only 0's: result is False
264
- # (none matched)
265
- matches = False
266
- for task_filter in filters:
267
- match_result = test_filter(self.headers, task_filter)
268
- if match_result == -1:
269
- # Any negative match results in False
270
- return False
271
- if match_result == 1:
272
- # Any positive match but without negative matches results in True
273
- matches = True
274
- return matches
226
+ """Check if a task matches the given filters"""
227
+ return query.convert(filters).match(self.headers)
275
228
 
276
229
  def set_task_parent(self, parent: "Task"):
277
230
  """
@@ -434,9 +387,7 @@ class Task(object):
434
387
  process. This flag is used mainly for multiple task processing e.g.
435
388
  filtering based on status.
436
389
  When resource deserialization is turned off, Task.unserialize will try
437
- to use faster 3rd-party JSON parser (orjson) if it's installed. It's not
438
- added as a required dependency but can speed up things if you need to check
439
- status of multiple tasks at once.
390
+ to use faster 3rd-party JSON parser (orjson).
440
391
  :return: Unserialized Task object
441
392
 
442
393
  :meta private:
@@ -457,11 +408,7 @@ class Task(object):
457
408
  if parse_resources:
458
409
  task_data = json.loads(data, object_hook=unserialize_resources)
459
410
  else:
460
- try:
461
- task_data = orjson.loads(data)
462
- except orjson.JSONDecodeError:
463
- # fallback, in case orjson raises exception during loading
464
- task_data = json.loads(data, object_hook=unserialize_resources)
411
+ task_data = orjson.loads(data)
465
412
 
466
413
  # Compatibility with Karton <5.2.0
467
414
  headers_persistent_fallback = task_data["payload_persistent"].get(
karton/system/system.py CHANGED
@@ -3,6 +3,7 @@ import json
3
3
  import time
4
4
  from typing import List, Optional
5
5
 
6
+ from karton.core import query
6
7
  from karton.core.__version__ import __version__
7
8
  from karton.core.backend import (
8
9
  KARTON_OPERATIONS_QUEUE,
@@ -166,7 +167,7 @@ class SystemService(KartonServiceBase):
166
167
 
167
168
  def route_task(self, task: Task, binds: List[KartonBind]) -> None:
168
169
  # Performs routing of task
169
- self.log.info("[%s] Processing task %s", task.root_uid, task.uid)
170
+ self.log.info("[%s] Processing task %s", task.root_uid, task.task_uid)
170
171
  # store the producer-task relationship in redis for task tracking
171
172
  self.backend.log_identity_output(
172
173
  task.headers.get("origin", "unknown"), task.headers
@@ -175,7 +176,12 @@ class SystemService(KartonServiceBase):
175
176
  pipe = self.backend.make_pipeline()
176
177
  for bind in binds:
177
178
  identity = bind.identity
178
- if task.matches_filters(bind.filters):
179
+ try:
180
+ is_match = task.matches_filters(bind.filters)
181
+ except query.QueryError:
182
+ self.log.error("Task matching failed - invalid filters?")
183
+ continue
184
+ if is_match:
179
185
  routed_task = task.fork_task()
180
186
  routed_task.status = TaskState.SPAWNED
181
187
  routed_task.last_update = time.time()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: karton-core
3
- Version: 5.3.4
3
+ Version: 5.5.0
4
4
  Summary: Distributed malware analysis orchestration framework
5
5
  Home-page: https://github.com/CERT-Polska/karton
6
6
  License: UNKNOWN
@@ -1,26 +1,27 @@
1
- karton_core-5.3.4-nspkg.pth,sha256=vHa-jm6pBTeInFrmnsHMg9AOeD88czzQy-6QCFbpRcM,539
1
+ karton_core-5.5.0-nspkg.pth,sha256=vHa-jm6pBTeInFrmnsHMg9AOeD88czzQy-6QCFbpRcM,539
2
2
  karton/core/__init__.py,sha256=QuT0BWZyp799eY90tK3H1OD2hwuusqMJq8vQwpB3kG4,337
3
- karton/core/__version__.py,sha256=dAiy67tSM_yYFR8Us_fQT-TDbfV34VpASYNKXfGmEnQ,22
4
- karton/core/backend.py,sha256=evOzlz1v1sxWusc8VojGAYyeyi9fcbVoEPm6WoNT1Xs,34696
3
+ karton/core/__version__.py,sha256=zFTHldBmR5ReiC3uSZ8VkZOEirtsq_l6QbUJYRBHlTs,22
4
+ karton/core/backend.py,sha256=-sQG7utnaWLJOEcafeSwEDLnkflPqtSCwg_mn_nnFhg,36727
5
5
  karton/core/base.py,sha256=C6Lco3E0XCsxvEjeVOLR9fxh_IWJ1vjC9BqUYsQyewE,8083
6
6
  karton/core/config.py,sha256=7oKchitq6pWzPuXRfjBXqVT_BgGIz2p-CDo1RGaNJQg,8118
7
7
  karton/core/exceptions.py,sha256=8i9WVzi4PinNlX10Cb-lQQC35Hl-JB5R_UKXa9AUKoQ,153
8
- karton/core/inspect.py,sha256=rIa0u4u12vG_RudPfc9UAS4RZD56W8qbUa8n1dDIkX0,4868
9
- karton/core/karton.py,sha256=9SOAviG42kSsPqc3EuaHzWtA_KywMtc01hmU6FaJpHo,15007
8
+ karton/core/inspect.py,sha256=aIJQEOEkD5q2xLlV8nhxY5qL5zqcnprP-2DdP6ecKlE,6150
9
+ karton/core/karton.py,sha256=cXLleTEPCVBIXkj09kKu2hjd1XNUSpTAk87-BES1WlA,15133
10
10
  karton/core/logger.py,sha256=J3XAyG88U0cwYC9zR6E3QD1uJenrQh7zS9-HgxhqeAs,2040
11
11
  karton/core/main.py,sha256=ir1-dhn3vbwfh2YHiM6ZYfRBbjwLvJSz0d8tuK1mb_4,8310
12
12
  karton/core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ karton/core/query.py,sha256=Ay0VzfrBQwdJzcZ27JbOlUc1ZZdOl6A8sh4iIYTmLyE,11493
13
14
  karton/core/resource.py,sha256=tA3y_38H9HVKIrCeAU70zHUkQUv0BuCQWMC470JLxxc,20321
14
- karton/core/task.py,sha256=WYXzVopg8VlWOc7ncEscHVKivsXHfZc5zWLHW_mxBwY,21000
15
+ karton/core/task.py,sha256=1E_d60XbzqX0O9gFhYe_8aNGH7vuXDHe-bir5cRot_0,18515
15
16
  karton/core/test.py,sha256=tms-YM7sUKQDHN0vm2_W7DIvHnO_ld_VPsWHnsbKSfk,9102
16
17
  karton/core/utils.py,sha256=sEVqGdVPyYswWuVn8wYXBQmln8Az826N_2HgC__pmW8,4090
17
18
  karton/system/__init__.py,sha256=JF51OqRU_Y4c0unOulvmv1KzSHSq4ZpXU8ZsH4nefRM,63
18
19
  karton/system/__main__.py,sha256=QJkwIlSwaPRdzwKlNmCAL41HtDAa73db9MZKWmOfxGM,56
19
- karton/system/system.py,sha256=65c8j1Ayra_CrXA1AQPBm00bTnpQiW91iZlTTRfJJI8,13787
20
- karton_core-5.3.4.dist-info/LICENSE,sha256=o8h7hYhn7BJC_-DmrfqWwLjaR_Gbe0TZOOQJuN2ca3I,1519
21
- karton_core-5.3.4.dist-info/METADATA,sha256=w15YiCEMDZrAF_429FAx98NH3tif38vyDxI9g9heY7E,6847
22
- karton_core-5.3.4.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
23
- karton_core-5.3.4.dist-info/entry_points.txt,sha256=FJj5EZuvFP0LkagjX_dLbRGBUnuLjgBiSyiFfq4c86U,99
24
- karton_core-5.3.4.dist-info/namespace_packages.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
25
- karton_core-5.3.4.dist-info/top_level.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
26
- karton_core-5.3.4.dist-info/RECORD,,
20
+ karton/system/system.py,sha256=tptar24RuXUnlII1xKbuJtfNkQsSxTtS3g4O8S99tbg,14011
21
+ karton_core-5.5.0.dist-info/LICENSE,sha256=o8h7hYhn7BJC_-DmrfqWwLjaR_Gbe0TZOOQJuN2ca3I,1519
22
+ karton_core-5.5.0.dist-info/METADATA,sha256=h4-M_JnMm8z_An5IDFPHAkQ4YuR_-YpwekETiNMjIxQ,6847
23
+ karton_core-5.5.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
24
+ karton_core-5.5.0.dist-info/entry_points.txt,sha256=FJj5EZuvFP0LkagjX_dLbRGBUnuLjgBiSyiFfq4c86U,99
25
+ karton_core-5.5.0.dist-info/namespace_packages.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
26
+ karton_core-5.5.0.dist-info/top_level.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
27
+ karton_core-5.5.0.dist-info/RECORD,,