karton-core 5.4.0__py3-none-any.whl → 5.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- __version__ = "5.4.0"
1
+ __version__ = "5.5.1"
karton/core/karton.py CHANGED
@@ -8,6 +8,7 @@ import time
8
8
  import traceback
9
9
  from typing import Any, Callable, Dict, List, Optional, Tuple, cast
10
10
 
11
+ from . import query
11
12
  from .__version__ import __version__
12
13
  from .backend import KartonBackend, KartonBind, KartonMetrics
13
14
  from .base import KartonBase, KartonServiceBase
@@ -122,6 +123,9 @@ class Consumer(KartonServiceBase):
122
123
  if self.filters is None:
123
124
  raise ValueError("Cannot bind consumer on Empty binds")
124
125
 
126
+ # Dummy conversion to make sure the filters are well-formed.
127
+ query.convert(self.filters)
128
+
125
129
  self.persistent = (
126
130
  self.config.getboolean("karton", "persistent", self.persistent)
127
131
  and not self.debug
karton/core/query.py ADDED
@@ -0,0 +1,350 @@
1
+ import fnmatch
2
+ import re
3
+ from collections.abc import Mapping, Sequence
4
+ from typing import Dict, Type
5
+
6
+ # Source code adopted from https://github.com/kapouille/mongoquery
7
+ # Original licenced under "The Unlicense" license.
8
+
9
+
10
+ class QueryError(Exception):
11
+ """Query error exception"""
12
+
13
+ pass
14
+
15
+
16
+ class _Undefined(object):
17
+ pass
18
+
19
+
20
+ def is_non_string_sequence(entry):
21
+ """Returns True if entry is a Python sequence iterable, and not a string"""
22
+ return isinstance(entry, Sequence) and not isinstance(entry, str)
23
+
24
+
25
+ class Query(object):
26
+ """The Query class is used to match an object against a MongoDB-like query"""
27
+
28
+ def __init__(self, definition):
29
+ self._definition = definition
30
+
31
+ def match(self, entry):
32
+ """Matches the entry object against the query specified on instanciation"""
33
+ return self._match(self._definition, entry)
34
+
35
+ def _match(self, condition, entry):
36
+ if isinstance(condition, Mapping):
37
+ return all(
38
+ self._process_condition(sub_operator, sub_condition, entry)
39
+ for sub_operator, sub_condition in condition.items()
40
+ )
41
+ if is_non_string_sequence(entry):
42
+ return condition in entry
43
+ return condition == entry
44
+
45
+ def _extract(self, entry, path):
46
+ if not path:
47
+ return entry
48
+ if entry is None:
49
+ return entry
50
+ if is_non_string_sequence(entry):
51
+ try:
52
+ index = int(path[0])
53
+ return self._extract(entry[index], path[1:])
54
+ except ValueError:
55
+ return [self._extract(item, path) for item in entry]
56
+ elif isinstance(entry, Mapping) and path[0] in entry:
57
+ return self._extract(entry[path[0]], path[1:])
58
+ else:
59
+ return _Undefined()
60
+
61
+ def _path_exists(self, operator, condition, entry):
62
+ keys_list = list(operator.split("."))
63
+ for i, k in enumerate(keys_list):
64
+ if isinstance(entry, Sequence) and not k.isdigit():
65
+ for elem in entry:
66
+ operator = ".".join(keys_list[i:])
67
+ if self._path_exists(operator, condition, elem) == condition:
68
+ return condition
69
+ return not condition
70
+ elif isinstance(entry, Sequence):
71
+ k = int(k)
72
+ try:
73
+ entry = entry[k]
74
+ except (TypeError, IndexError, KeyError):
75
+ return not condition
76
+ return condition
77
+
78
+ def _process_condition(self, operator, condition, entry):
79
+ if isinstance(condition, Mapping) and "$exists" in condition:
80
+ if isinstance(operator, str) and operator.find(".") != -1:
81
+ return self._path_exists(operator, condition["$exists"], entry)
82
+ elif condition["$exists"] != (operator in entry):
83
+ return False
84
+ elif tuple(condition.keys()) == ("$exists",):
85
+ return True
86
+ if isinstance(operator, str):
87
+ if operator.startswith("$"):
88
+ try:
89
+ return getattr(self, "_" + operator[1:])(condition, entry)
90
+ except AttributeError:
91
+ raise QueryError(f"{operator} operator isn't supported")
92
+ else:
93
+ try:
94
+ extracted_data = self._extract(entry, operator.split("."))
95
+ except IndexError:
96
+ extracted_data = _Undefined()
97
+ else:
98
+ if operator not in entry:
99
+ return False
100
+ extracted_data = entry[operator]
101
+ return self._match(condition, extracted_data)
102
+
103
+ @staticmethod
104
+ def _not_implemented(*_):
105
+ raise NotImplementedError
106
+
107
+ @staticmethod
108
+ def _noop(*_):
109
+ return True
110
+
111
+ @staticmethod
112
+ def _eq(condition, entry):
113
+ try:
114
+ return entry == condition
115
+ except TypeError:
116
+ return False
117
+
118
+ @staticmethod
119
+ def _gt(condition, entry):
120
+ try:
121
+ return entry > condition
122
+ except TypeError:
123
+ return False
124
+
125
+ @staticmethod
126
+ def _gte(condition, entry):
127
+ try:
128
+ return entry >= condition
129
+ except TypeError:
130
+ return False
131
+
132
+ @staticmethod
133
+ def _in(condition, entry):
134
+ if is_non_string_sequence(condition):
135
+ for elem in condition:
136
+ if is_non_string_sequence(entry) and elem in entry:
137
+ return True
138
+ elif not is_non_string_sequence(entry) and elem == entry:
139
+ return True
140
+ return False
141
+ else:
142
+ raise TypeError("condition must be a list")
143
+
144
+ @staticmethod
145
+ def _lt(condition, entry):
146
+ try:
147
+ return entry < condition
148
+ except TypeError:
149
+ return False
150
+
151
+ @staticmethod
152
+ def _lte(condition, entry):
153
+ try:
154
+ return entry <= condition
155
+ except TypeError:
156
+ return False
157
+
158
+ @staticmethod
159
+ def _ne(condition, entry):
160
+ return entry != condition
161
+
162
+ def _nin(self, condition, entry):
163
+ return not self._in(condition, entry)
164
+
165
+ def _and(self, condition, entry):
166
+ if isinstance(condition, Sequence):
167
+ return all(self._match(sub_condition, entry) for sub_condition in condition)
168
+ raise QueryError(f"$and has been attributed incorrect argument {condition}")
169
+
170
+ def _nor(self, condition, entry):
171
+ if isinstance(condition, Sequence):
172
+ return all(
173
+ not self._match(sub_condition, entry) for sub_condition in condition
174
+ )
175
+ raise QueryError(f"$nor has been attributed incorrect argument {condition}")
176
+
177
+ def _not(self, condition, entry):
178
+ return not self._match(condition, entry)
179
+
180
+ def _or(self, condition, entry):
181
+ if isinstance(condition, Sequence):
182
+ return any(self._match(sub_condition, entry) for sub_condition in condition)
183
+ raise QueryError(f"$or has been attributed incorrect argument {condition}")
184
+
185
+ @staticmethod
186
+ def _type(condition, entry):
187
+ bson_type: Dict[int, Type] = {
188
+ 1: float,
189
+ 2: str,
190
+ 3: Mapping,
191
+ 4: Sequence,
192
+ 5: bytearray,
193
+ 7: str, # object id (uuid)
194
+ 8: bool,
195
+ 9: str, # date (UTC datetime)
196
+ 10: type(None),
197
+ 11: re.Pattern, # regex,
198
+ 13: str, # Javascript
199
+ 15: str, # JavaScript (with scope)
200
+ 16: int, # 32-bit integer
201
+ 17: int, # Timestamp
202
+ 18: int, # 64-bit integer
203
+ }
204
+ bson_alias = {
205
+ "double": 1,
206
+ "string": 2,
207
+ "object": 3,
208
+ "array": 4,
209
+ "binData": 5,
210
+ "objectId": 7,
211
+ "bool": 8,
212
+ "date": 9,
213
+ "null": 10,
214
+ "regex": 11,
215
+ "javascript": 13,
216
+ "javascriptWithScope": 15,
217
+ "int": 16,
218
+ "timestamp": 17,
219
+ "long": 18,
220
+ }
221
+
222
+ if condition == "number":
223
+ return any(
224
+ [
225
+ isinstance(entry, bson_type[bson_alias[alias]])
226
+ for alias in ["double", "int", "long"]
227
+ ]
228
+ )
229
+
230
+ # resolves bson alias, or keeps original condition value
231
+ condition = bson_alias.get(condition, condition)
232
+
233
+ if condition not in bson_type:
234
+ raise QueryError(f"$type has been used with unknown type {condition}")
235
+
236
+ return isinstance(entry, bson_type[condition])
237
+
238
+ _exists = _noop
239
+
240
+ @staticmethod
241
+ def _mod(condition, entry):
242
+ return entry % condition[0] == condition[1]
243
+
244
+ @staticmethod
245
+ def _regex(condition, entry):
246
+ if not isinstance(entry, str):
247
+ return False
248
+ # If the caller has supplied a compiled regex, assume options are already
249
+ # included.
250
+ if isinstance(condition, re.Pattern):
251
+ return bool(re.search(condition, entry))
252
+
253
+ try:
254
+ regex = re.match(r"\A/(.+)/([imsx]{,4})\Z", condition, flags=re.DOTALL)
255
+ except TypeError:
256
+ raise QueryError(
257
+ f"{condition} is not a regular expression and should be a string"
258
+ )
259
+
260
+ flags = 0
261
+ if regex:
262
+ options = regex.group(2)
263
+ for option in options:
264
+ flags |= getattr(re, option.upper())
265
+ exp = regex.group(1)
266
+ else:
267
+ exp = condition
268
+
269
+ try:
270
+ match = re.search(exp, entry, flags=flags)
271
+ except Exception as error:
272
+ raise QueryError(f"{condition} failed to execute with error {error!r}")
273
+ return bool(match)
274
+
275
+ _options = _text = _where = _not_implemented
276
+
277
+ def _all(self, condition, entry):
278
+ return all(self._match(item, entry) for item in condition)
279
+
280
+ def _elemMatch(self, condition, entry):
281
+ if not isinstance(entry, Sequence):
282
+ return False
283
+ return any(
284
+ all(
285
+ self._process_condition(sub_operator, sub_condition, element)
286
+ for sub_operator, sub_condition in condition.items()
287
+ )
288
+ for element in entry
289
+ )
290
+
291
+ @staticmethod
292
+ def _size(condition, entry):
293
+ if not isinstance(condition, int):
294
+ raise QueryError(
295
+ f"$size has been attributed incorrect argument {condition}"
296
+ )
297
+
298
+ if is_non_string_sequence(entry):
299
+ return len(entry) == condition
300
+
301
+ return False
302
+
303
+ def __repr__(self):
304
+ return f"<Query({self._definition})>"
305
+
306
+
307
+ def toregex(wildcard):
308
+ if not isinstance(wildcard, str):
309
+ raise QueryError(f"Unexpected value in the regex conversion: {wildcard}")
310
+ # If is not neessary, but we avoid unnecessary regular expressions.
311
+ if any(c in wildcard for c in "?*[]!"):
312
+ return {"$regex": fnmatch.translate(wildcard)}
313
+ return wildcard
314
+
315
+
316
+ def convert(filters):
317
+ """Convert filters to the mongo query syntax.
318
+ A special care is taken to handle old-style negative filters correctly
319
+ """
320
+ # Negative_filters are old-style negative assertions, and behave differently.
321
+ # See issue #246 for the original bug report.
322
+ #
323
+ # For a short example:
324
+ # [{"platform": "!win32"}, {"platform": "!linux"}]
325
+ # will match all non-linux non-windows samples, but:
326
+ # [{"platform": {"$not": "win32"}}, {"platform": {"$not": "linux"}}]
327
+ # means `platform != "win32" or "platform != "linux"` and will match everything.
328
+ # To get equivalent behaviour with mongo syntax, you should use:
329
+ # [{"platform": {"$not": {"$or": ["win32", "linux"]}}}]
330
+ regular_filter, negative_filter = [], []
331
+ for rule in filters:
332
+ positive_checks, negative_checks = [], []
333
+ for key, value in rule.items():
334
+ if isinstance(value, str):
335
+ if value and value[0] == "!": # negative check
336
+ negative_checks.append({key: toregex(value[1:])})
337
+ else:
338
+ positive_checks.append({key: toregex(value)})
339
+ else:
340
+ positive_checks.append({key: value})
341
+ regular_filter.append({"$and": positive_checks})
342
+ negative_filter.append({"$and": positive_checks + [{"$or": negative_checks}]})
343
+ return Query(
344
+ {
345
+ "$and": [
346
+ {"$not": {"$or": negative_filter}},
347
+ {"$or": regular_filter},
348
+ ]
349
+ }
350
+ )
karton/core/task.py CHANGED
@@ -1,5 +1,4 @@
1
1
  import enum
2
- import fnmatch
3
2
  import json
4
3
  import time
5
4
  import uuid
@@ -16,6 +15,7 @@ from typing import (
16
15
  Union,
17
16
  )
18
17
 
18
+ from . import query
19
19
  from .resource import RemoteResource, ResourceBase
20
20
  from .utils import recursive_iter, recursive_iter_with_keys, recursive_map
21
21
 
@@ -223,75 +223,8 @@ class Task(object):
223
223
  return new_task
224
224
 
225
225
  def matches_filters(self, filters: List[Dict[str, Any]]) -> bool:
226
- """
227
- Checks whether provided task headers match filters
228
-
229
- :param filters: Task header filters
230
- :return: True if task headers match specific filters
231
-
232
- :meta private:
233
- """
234
-
235
- def test_filter(headers: Dict[str, Any], filter: Dict[str, Any]) -> int:
236
- """
237
- Filter match follows AND logic, but it's non-boolean because filters may be
238
- negated (task:!platform).
239
-
240
- Result values are as follows:
241
- - 1 - positive match, no mismatched values in headers
242
- (all matched)
243
- - 0 - no match, found value that doesn't match to the filter
244
- (some are not matched)
245
- - -1 - negative match, found value that matches negated filter value
246
- (all matched but found negative matches)
247
- """
248
- matches = 1
249
- for filter_key, filter_value in filter.items():
250
- # Coerce filter value to string
251
- filter_value_str = str(filter_value)
252
- negated = False
253
- if filter_value_str.startswith("!"):
254
- negated = True
255
- filter_value_str = filter_value_str[1:]
256
-
257
- # If expected key doesn't exist in headers
258
- if filter_key not in headers:
259
- # Negated filter ignores non-existent values
260
- if negated:
261
- continue
262
- # But positive filter doesn't
263
- return 0
264
-
265
- # Coerce header value to string
266
- header_value_str = str(headers[filter_key])
267
- # fnmatch is great for handling simple wildcard patterns (?, *, [abc])
268
- match = fnmatch.fnmatchcase(header_value_str, filter_value_str)
269
- # If matches, but it's negated: it's negative match
270
- if match and negated:
271
- matches = -1
272
- # If doesn't match but filter is not negated: it's not a match
273
- if not match and not negated:
274
- return 0
275
- # If there are no mismatched values: filter is matched
276
- return matches
277
-
278
- # List of filter matches follow OR logic, but -1 is special
279
- # If there is any -1, result is False
280
- # (any matched, but it's negative match)
281
- # If there is any 1, but no -1's: result is True
282
- # (any matched, no negative match)
283
- # If there are only 0's: result is False
284
- # (none matched)
285
- matches = False
286
- for task_filter in filters:
287
- match_result = test_filter(self.headers, task_filter)
288
- if match_result == -1:
289
- # Any negative match results in False
290
- return False
291
- if match_result == 1:
292
- # Any positive match but without negative matches results in True
293
- matches = True
294
- return matches
226
+ """Check if a task matches the given filters"""
227
+ return query.convert(filters).match(self.headers)
295
228
 
296
229
  def set_task_parent(self, parent: "Task"):
297
230
  """
@@ -475,7 +408,12 @@ class Task(object):
475
408
  if parse_resources:
476
409
  task_data = json.loads(data, object_hook=unserialize_resources)
477
410
  else:
478
- task_data = orjson.loads(data)
411
+ try:
412
+ task_data = orjson.loads(data)
413
+ except orjson.JSONDecodeError:
414
+ # Fallback, in case orjson raises exception during loading
415
+ # This may happen for large numbers (too large for float)
416
+ task_data = json.loads(data, object_hook=unserialize_resources)
479
417
 
480
418
  # Compatibility with Karton <5.2.0
481
419
  headers_persistent_fallback = task_data["payload_persistent"].get(
karton/system/system.py CHANGED
@@ -3,6 +3,7 @@ import json
3
3
  import time
4
4
  from typing import List, Optional
5
5
 
6
+ from karton.core import query
6
7
  from karton.core.__version__ import __version__
7
8
  from karton.core.backend import (
8
9
  KARTON_OPERATIONS_QUEUE,
@@ -175,7 +176,12 @@ class SystemService(KartonServiceBase):
175
176
  pipe = self.backend.make_pipeline()
176
177
  for bind in binds:
177
178
  identity = bind.identity
178
- if task.matches_filters(bind.filters):
179
+ try:
180
+ is_match = task.matches_filters(bind.filters)
181
+ except query.QueryError:
182
+ self.log.error("Task matching failed - invalid filters?")
183
+ continue
184
+ if is_match:
179
185
  routed_task = task.fork_task()
180
186
  routed_task.status = TaskState.SPAWNED
181
187
  routed_task.last_update = time.time()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: karton-core
3
- Version: 5.4.0
3
+ Version: 5.5.1
4
4
  Summary: Distributed malware analysis orchestration framework
5
5
  Home-page: https://github.com/CERT-Polska/karton
6
6
  License: UNKNOWN
@@ -1,26 +1,27 @@
1
- karton_core-5.4.0-nspkg.pth,sha256=vHa-jm6pBTeInFrmnsHMg9AOeD88czzQy-6QCFbpRcM,539
1
+ karton_core-5.5.1-nspkg.pth,sha256=vHa-jm6pBTeInFrmnsHMg9AOeD88czzQy-6QCFbpRcM,539
2
2
  karton/core/__init__.py,sha256=QuT0BWZyp799eY90tK3H1OD2hwuusqMJq8vQwpB3kG4,337
3
- karton/core/__version__.py,sha256=xjYaBGUFGg0kGZj_WhuoFyPD8NILPsr79SaMwmYQGSg,22
3
+ karton/core/__version__.py,sha256=o0RHk7avNRUho2u_PgLKAFJKJks95Wx54GlOzI6Jzq4,22
4
4
  karton/core/backend.py,sha256=-sQG7utnaWLJOEcafeSwEDLnkflPqtSCwg_mn_nnFhg,36727
5
5
  karton/core/base.py,sha256=C6Lco3E0XCsxvEjeVOLR9fxh_IWJ1vjC9BqUYsQyewE,8083
6
6
  karton/core/config.py,sha256=7oKchitq6pWzPuXRfjBXqVT_BgGIz2p-CDo1RGaNJQg,8118
7
7
  karton/core/exceptions.py,sha256=8i9WVzi4PinNlX10Cb-lQQC35Hl-JB5R_UKXa9AUKoQ,153
8
8
  karton/core/inspect.py,sha256=aIJQEOEkD5q2xLlV8nhxY5qL5zqcnprP-2DdP6ecKlE,6150
9
- karton/core/karton.py,sha256=9SOAviG42kSsPqc3EuaHzWtA_KywMtc01hmU6FaJpHo,15007
9
+ karton/core/karton.py,sha256=cXLleTEPCVBIXkj09kKu2hjd1XNUSpTAk87-BES1WlA,15133
10
10
  karton/core/logger.py,sha256=J3XAyG88U0cwYC9zR6E3QD1uJenrQh7zS9-HgxhqeAs,2040
11
11
  karton/core/main.py,sha256=ir1-dhn3vbwfh2YHiM6ZYfRBbjwLvJSz0d8tuK1mb_4,8310
12
12
  karton/core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ karton/core/query.py,sha256=Ay0VzfrBQwdJzcZ27JbOlUc1ZZdOl6A8sh4iIYTmLyE,11493
13
14
  karton/core/resource.py,sha256=tA3y_38H9HVKIrCeAU70zHUkQUv0BuCQWMC470JLxxc,20321
14
- karton/core/task.py,sha256=diwg8uUl57NEYNRjT1l5CPiNw3EQcU11BnrLul33fx0,21350
15
+ karton/core/task.py,sha256=gW1szMi5PN2Y06X-Ryo7cmEVluZv1r7W5tvmwIJiD94,18808
15
16
  karton/core/test.py,sha256=tms-YM7sUKQDHN0vm2_W7DIvHnO_ld_VPsWHnsbKSfk,9102
16
17
  karton/core/utils.py,sha256=sEVqGdVPyYswWuVn8wYXBQmln8Az826N_2HgC__pmW8,4090
17
18
  karton/system/__init__.py,sha256=JF51OqRU_Y4c0unOulvmv1KzSHSq4ZpXU8ZsH4nefRM,63
18
19
  karton/system/__main__.py,sha256=QJkwIlSwaPRdzwKlNmCAL41HtDAa73db9MZKWmOfxGM,56
19
- karton/system/system.py,sha256=yF_d71a8w7JYA7IXUt63d5_QBH6x1QplB-xcrzQTXL4,13792
20
- karton_core-5.4.0.dist-info/LICENSE,sha256=o8h7hYhn7BJC_-DmrfqWwLjaR_Gbe0TZOOQJuN2ca3I,1519
21
- karton_core-5.4.0.dist-info/METADATA,sha256=kopeYFCI9EoFQbc7J7woZWjI_5egy29-lYUW7UzEQ2I,6847
22
- karton_core-5.4.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
23
- karton_core-5.4.0.dist-info/entry_points.txt,sha256=FJj5EZuvFP0LkagjX_dLbRGBUnuLjgBiSyiFfq4c86U,99
24
- karton_core-5.4.0.dist-info/namespace_packages.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
25
- karton_core-5.4.0.dist-info/top_level.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
26
- karton_core-5.4.0.dist-info/RECORD,,
20
+ karton/system/system.py,sha256=tptar24RuXUnlII1xKbuJtfNkQsSxTtS3g4O8S99tbg,14011
21
+ karton_core-5.5.1.dist-info/LICENSE,sha256=o8h7hYhn7BJC_-DmrfqWwLjaR_Gbe0TZOOQJuN2ca3I,1519
22
+ karton_core-5.5.1.dist-info/METADATA,sha256=trLfddTECFmbcuReIoSF9Yb9ug1BSAxHBUAM0nNoXnE,6847
23
+ karton_core-5.5.1.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
24
+ karton_core-5.5.1.dist-info/entry_points.txt,sha256=FJj5EZuvFP0LkagjX_dLbRGBUnuLjgBiSyiFfq4c86U,99
25
+ karton_core-5.5.1.dist-info/namespace_packages.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
26
+ karton_core-5.5.1.dist-info/top_level.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
27
+ karton_core-5.5.1.dist-info/RECORD,,