karton-core 5.6.0__py3-none-any.whl → 5.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- __version__ = "5.6.0"
1
+ __version__ = "5.7.0"
karton/core/backend.py CHANGED
@@ -1021,21 +1021,33 @@ class KartonBackend:
1021
1021
  self.s3.delete_objects(Bucket=bucket, Delete={"Objects": delete_objects})
1022
1022
 
1023
1023
  def remove_object_versions(
1024
- self, bucket: str, object_versions: Dict[str, List[str]]
1024
+ self,
1025
+ bucket: str,
1026
+ object_versions: Dict[str, List[str]],
1027
+ explicit_version_null: bool = False,
1025
1028
  ) -> None:
1026
1029
  """
1027
1030
  Bulk remove resource object versions from object storage
1028
1031
 
1029
1032
  :param bucket: Bucket name
1030
1033
  :param object_versions: Object version identifiers
1034
+ :param explicit_version_null: |
1035
+ Some S3 providers (e.g. MinIO) need a reference
1036
+ to "null" version explicitly when versioning is in suspended state. On the
1037
+ other hand, some providers refuse to delete "null" versions when bucket
1038
+ versioning is disabled.
1039
+ See also: https://github.com/CERT-Polska/karton/issues/273.
1031
1040
  """
1032
- versions = iter(
1033
- (uid, version_id)
1034
- for uid, versions in object_versions.items()
1035
- for version_id in versions
1036
- )
1037
1041
  deletion_chunks = chunks(
1038
- [{"Key": uid, "VersionId": version_id} for uid, version_id in versions],
1042
+ [
1043
+ (
1044
+ {"Key": uid, "VersionId": version_id}
1045
+ if version_id != "null" or explicit_version_null
1046
+ else {"Key": uid}
1047
+ )
1048
+ for uid, versions in object_versions.items()
1049
+ for version_id in versions
1050
+ ],
1039
1051
  100,
1040
1052
  )
1041
1053
  for delete_objects in deletion_chunks:
@@ -1060,17 +1072,20 @@ class KartonBackend:
1060
1072
  raise e
1061
1073
  return False
1062
1074
 
1063
- def log_identity_output(self, identity: str, headers: Dict[str, Any]) -> None:
1075
+ def log_identity_output(
1076
+ self, identity: str, headers: Dict[str, Any], task_tracking_ttl: int
1077
+ ) -> None:
1064
1078
  """
1065
1079
  Store the type of task outputted for given producer to
1066
1080
  be used in tracking karton service connections.
1067
1081
 
1068
1082
  :param identity: producer identity
1069
1083
  :param headers: outputted headers
1084
+ :param task_tracking_ttl: expire time (in seconds)
1070
1085
  """
1071
1086
 
1072
1087
  self.redis.sadd(f"{KARTON_OUTPUTS_NAMESPACE}:{identity}", json.dumps(headers))
1073
- self.redis.expire(f"{KARTON_OUTPUTS_NAMESPACE}:{identity}", 60 * 60 * 24 * 30)
1088
+ self.redis.expire(f"{KARTON_OUTPUTS_NAMESPACE}:{identity}", task_tracking_ttl)
1074
1089
 
1075
1090
  def get_outputs(self) -> List[KartonOutputs]:
1076
1091
  """
karton/core/base.py CHANGED
@@ -36,6 +36,9 @@ class KartonBase(abc.ABC):
36
36
  backend: Optional[KartonBackend] = None,
37
37
  ) -> None:
38
38
  self.config = config or Config()
39
+ self.enable_publish_log = self.config.getboolean(
40
+ "logging", "enable_publish", True
41
+ )
39
42
 
40
43
  # If not passed via constructor - get it from class
41
44
  if identity is not None:
@@ -108,7 +111,7 @@ class KartonBase(abc.ABC):
108
111
  )
109
112
  logger.addHandler(stream_handler)
110
113
 
111
- if not self.debug:
114
+ if not self.debug and self.enable_publish_log:
112
115
  logger.addHandler(self._log_handler)
113
116
 
114
117
  @property
karton/core/config.py CHANGED
@@ -111,12 +111,10 @@ class Config(object):
111
111
  return True
112
112
 
113
113
  @overload
114
- def getint(self, section_name: str, option_name: str, fallback: int) -> int:
115
- ...
114
+ def getint(self, section_name: str, option_name: str, fallback: int) -> int: ...
116
115
 
117
116
  @overload
118
- def getint(self, section_name: str, option_name: str) -> Optional[int]:
119
- ...
117
+ def getint(self, section_name: str, option_name: str) -> Optional[int]: ...
120
118
 
121
119
  def getint(
122
120
  self, section_name: str, option_name: str, fallback: Optional[int] = None
@@ -131,12 +129,12 @@ class Config(object):
131
129
  return int(value)
132
130
 
133
131
  @overload
134
- def getboolean(self, section_name: str, option_name: str, fallback: bool) -> bool:
135
- ...
132
+ def getboolean(
133
+ self, section_name: str, option_name: str, fallback: bool
134
+ ) -> bool: ...
136
135
 
137
136
  @overload
138
- def getboolean(self, section_name: str, option_name: str) -> Optional[bool]:
139
- ...
137
+ def getboolean(self, section_name: str, option_name: str) -> Optional[bool]: ...
140
138
 
141
139
  def getboolean(
142
140
  self, section_name: str, option_name: str, fallback: Optional[bool] = None
karton/core/karton.py CHANGED
@@ -1,6 +1,7 @@
1
1
  """
2
2
  Base library for karton subsystems.
3
3
  """
4
+
4
5
  import abc
5
6
  import argparse
6
7
  import sys
karton/core/resource.py CHANGED
@@ -39,6 +39,7 @@ class ResourceBase(object):
39
39
  bucket: Optional[str] = None,
40
40
  metadata: Optional[Dict[str, Any]] = None,
41
41
  sha256: Optional[str] = None,
42
+ fd: Optional[IO[bytes]] = None,
42
43
  _uid: Optional[str] = None,
43
44
  _size: Optional[int] = None,
44
45
  _flags: Optional[List[str]] = None,
@@ -66,6 +67,16 @@ class ResourceBase(object):
66
67
  for byte_block in iter(lambda: f.read(4096), b""):
67
68
  sha256_hash.update(byte_block)
68
69
  sha256 = sha256_hash.hexdigest()
70
+ elif fd is not None:
71
+ if calculate_hash:
72
+ # we need to calculate the whole hash and return pos as it was
73
+ sha256_hash = hashlib.sha256()
74
+ last_position = fd.tell()
75
+ fd.seek(0)
76
+ for byte_block in iter(lambda: fd.read(4096), b""):
77
+ sha256_hash.update(byte_block)
78
+ sha256 = sha256_hash.hexdigest()
79
+ fd.seek(last_position)
69
80
  elif content:
70
81
  if isinstance(content, str):
71
82
  self._content = content.encode()
@@ -190,6 +201,7 @@ class LocalResource(ResourceBase):
190
201
  bucket=bucket,
191
202
  metadata=metadata,
192
203
  sha256=sha256,
204
+ fd=fd,
193
205
  _uid=uid,
194
206
  _flags=_flags,
195
207
  )
karton/core/test.py CHANGED
@@ -1,11 +1,12 @@
1
1
  """
2
2
  Test stubs for karton subsystem unit tests
3
3
  """
4
+
4
5
  import hashlib
5
6
  import logging
6
7
  import unittest
7
8
  from collections import defaultdict
8
- from typing import Any, BinaryIO, Dict, List, Union, cast
9
+ from typing import Any, BinaryIO, Dict, List, Optional, Union, cast
9
10
  from unittest import mock
10
11
 
11
12
  from .backend import KartonBackend, KartonMetrics
@@ -66,7 +67,7 @@ class BackendMock:
66
67
  bucket: str,
67
68
  object_uid: str,
68
69
  content: Union[bytes, BinaryIO],
69
- length: int = None,
70
+ length: Optional[int] = None,
70
71
  ) -> None:
71
72
  log.debug("Uploading object %s to bucket %s", object_uid, bucket)
72
73
  if isinstance(content, bytes):
karton/system/system.py CHANGED
@@ -31,6 +31,7 @@ class SystemService(KartonServiceBase):
31
31
  TASK_DISPATCHED_TIMEOUT = 24 * 3600
32
32
  TASK_STARTED_TIMEOUT = 24 * 3600
33
33
  TASK_CRASHED_TIMEOUT = 3 * 24 * 3600
34
+ TASK_TRACKING_TTL = 30 * 24 * 3600
34
35
 
35
36
  def __init__(self, config: Optional[Config]) -> None:
36
37
  super().__init__(config=config)
@@ -49,6 +50,15 @@ class SystemService(KartonServiceBase):
49
50
  self.crash_started_tasks_on_timeout = self.config.getboolean(
50
51
  "system", "crash_started_tasks_on_timeout", False
51
52
  )
53
+ self.enable_null_version_deletion = self.config.getboolean(
54
+ "system", "enable_null_version_deletion", False
55
+ )
56
+ self.enable_task_tracking = self.config.getboolean(
57
+ "system", "enable_task_tracking", True
58
+ )
59
+ self.task_tracking_ttl = self.config.getint(
60
+ "system", "task_tracking_ttl", self.TASK_TRACKING_TTL
61
+ )
52
62
 
53
63
  self.last_gc_trigger = time.time()
54
64
 
@@ -61,6 +71,9 @@ class SystemService(KartonServiceBase):
61
71
  " task_crashed_timeout:\t%s\n"
62
72
  " enable_gc:\t%s\n"
63
73
  " enable_router:\t%s\n"
74
+ " enable_null_version_deletion:\t%s\n"
75
+ " enable_task_tracking:\t%s\n"
76
+ " task_tracking_ttl:\t%s\n"
64
77
  " crash_started_tasks_on_timeout:\t%s",
65
78
  self.gc_interval,
66
79
  self.task_dispatched_timeout,
@@ -68,6 +81,9 @@ class SystemService(KartonServiceBase):
68
81
  self.task_crashed_timeout,
69
82
  self.enable_gc,
70
83
  self.enable_router,
84
+ self.enable_null_version_deletion,
85
+ self.enable_task_tracking,
86
+ self.task_tracking_ttl,
71
87
  self.crash_started_tasks_on_timeout,
72
88
  )
73
89
 
@@ -88,7 +104,11 @@ class SystemService(KartonServiceBase):
88
104
  del resources_to_remove[resource.uid]
89
105
  # Remove unreferenced resources
90
106
  if resources_to_remove:
91
- self.backend.remove_object_versions(karton_bucket, resources_to_remove)
107
+ self.backend.remove_object_versions(
108
+ karton_bucket,
109
+ resources_to_remove,
110
+ explicit_version_null=self.enable_null_version_deletion,
111
+ )
92
112
 
93
113
  def gc_collect_tasks(self) -> None:
94
114
  self.log.debug("GC: gc_collect_tasks started")
@@ -218,10 +238,13 @@ class SystemService(KartonServiceBase):
218
238
  def route_task(self, task: Task, binds: List[KartonBind]) -> None:
219
239
  # Performs routing of task
220
240
  self.log.info("[%s] Processing task %s", task.root_uid, task.task_uid)
221
- # store the producer-task relationship in redis for task tracking
222
- self.backend.log_identity_output(
223
- task.headers.get("origin", "unknown"), task.headers
224
- )
241
+ # if enabled, store the producer-task relationship in redis for task tracking
242
+ if self.enable_task_tracking:
243
+ self.backend.log_identity_output(
244
+ task.headers.get("origin", "unknown"),
245
+ task.headers,
246
+ self.task_tracking_ttl,
247
+ )
225
248
 
226
249
  pipe = self.backend.make_pipeline()
227
250
  for bind in binds:
@@ -276,9 +299,10 @@ class SystemService(KartonServiceBase):
276
299
  operation_bodies.append(operation_body)
277
300
 
278
301
  self.backend.register_tasks(tasks)
279
- self.backend.produce_logs(
280
- operation_bodies, logger_name=KARTON_OPERATIONS_QUEUE, level="INFO"
281
- )
302
+ if self.enable_publish_log:
303
+ self.backend.produce_logs(
304
+ operation_bodies, logger_name=KARTON_OPERATIONS_QUEUE, level="INFO"
305
+ )
282
306
 
283
307
  def process_routing(self) -> None:
284
308
  # Order does matter! task dispatching must be before
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: karton-core
3
- Version: 5.6.0
3
+ Version: 5.7.0
4
4
  Summary: Distributed malware analysis orchestration framework
5
5
  Home-page: https://github.com/CERT-Polska/karton
6
6
  Classifier: Programming Language :: Python :: 3
@@ -0,0 +1,27 @@
1
+ karton_core-5.7.0-nspkg.pth,sha256=vHa-jm6pBTeInFrmnsHMg9AOeD88czzQy-6QCFbpRcM,539
2
+ karton/core/__init__.py,sha256=QuT0BWZyp799eY90tK3H1OD2hwuusqMJq8vQwpB3kG4,337
3
+ karton/core/__version__.py,sha256=QmHMXVnw5DVPfWzvN7FS1tOhDAesdxpM_aVOh9CMuSk,22
4
+ karton/core/backend.py,sha256=_IOjN9pWdSBsDnTMYvg-Fpm6Ag-uf2Jb9LWmrtZqVAU,38773
5
+ karton/core/base.py,sha256=lqVJvCHRMzvIOpS8SaWlOaSSJBEVkNQe0oClZC_GQYM,8225
6
+ karton/core/config.py,sha256=M3dB0XgnUO5VzUcGyQa7FyKzmdgmDml1MrzG6CxEuvE,8100
7
+ karton/core/exceptions.py,sha256=8i9WVzi4PinNlX10Cb-lQQC35Hl-JB5R_UKXa9AUKoQ,153
8
+ karton/core/inspect.py,sha256=aIJQEOEkD5q2xLlV8nhxY5qL5zqcnprP-2DdP6ecKlE,6150
9
+ karton/core/karton.py,sha256=l3joJWw8m23wlOErkcQmNFYhLFA5x2la6L0WopxJ7mk,15435
10
+ karton/core/logger.py,sha256=J3XAyG88U0cwYC9zR6E3QD1uJenrQh7zS9-HgxhqeAs,2040
11
+ karton/core/main.py,sha256=ir1-dhn3vbwfh2YHiM6ZYfRBbjwLvJSz0d8tuK1mb_4,8310
12
+ karton/core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ karton/core/query.py,sha256=sf24DweVlXfJuBbBD_ns2LXhOV-IBwuPG3jBfTJu77s,12063
14
+ karton/core/resource.py,sha256=9kWXpMBRfudH0_whJfSSI27K3Gwv2u93CVa7p68Q5UM,20842
15
+ karton/core/task.py,sha256=gW1szMi5PN2Y06X-Ryo7cmEVluZv1r7W5tvmwIJiD94,18808
16
+ karton/core/test.py,sha256=cj6W4gNt0BpRjsYiiBt0hPE8dmRfUeIc8sSVkxB50cU,9123
17
+ karton/core/utils.py,sha256=sEVqGdVPyYswWuVn8wYXBQmln8Az826N_2HgC__pmW8,4090
18
+ karton/system/__init__.py,sha256=JF51OqRU_Y4c0unOulvmv1KzSHSq4ZpXU8ZsH4nefRM,63
19
+ karton/system/__main__.py,sha256=QJkwIlSwaPRdzwKlNmCAL41HtDAa73db9MZKWmOfxGM,56
20
+ karton/system/system.py,sha256=d_5hhLTthJdr_4gZEGQ6Y-kHvxeBqyQxjjx_wRs3xMA,17285
21
+ karton_core-5.7.0.dist-info/LICENSE,sha256=o8h7hYhn7BJC_-DmrfqWwLjaR_Gbe0TZOOQJuN2ca3I,1519
22
+ karton_core-5.7.0.dist-info/METADATA,sha256=MrmtycTaYsNB8v0LRyuLIHL2bV17n1Lt6e-ak4RfrH8,6818
23
+ karton_core-5.7.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
24
+ karton_core-5.7.0.dist-info/entry_points.txt,sha256=OgLlsXy61GP6-Yob3oXqeJ2hlRU6LBLj33fr0NufKz0,98
25
+ karton_core-5.7.0.dist-info/namespace_packages.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
26
+ karton_core-5.7.0.dist-info/top_level.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
27
+ karton_core-5.7.0.dist-info/RECORD,,
@@ -1,27 +0,0 @@
1
- karton_core-5.6.0-nspkg.pth,sha256=vHa-jm6pBTeInFrmnsHMg9AOeD88czzQy-6QCFbpRcM,539
2
- karton/core/__init__.py,sha256=QuT0BWZyp799eY90tK3H1OD2hwuusqMJq8vQwpB3kG4,337
3
- karton/core/__version__.py,sha256=9eGec3AYz2CNznnfEwKYRiJ65G9gDWtXmb587ljDycg,22
4
- karton/core/backend.py,sha256=HlAolngYUTx2ajXhLf5RbWxmPOq5orLfz52iTkNxxqM,38147
5
- karton/core/base.py,sha256=C6Lco3E0XCsxvEjeVOLR9fxh_IWJ1vjC9BqUYsQyewE,8083
6
- karton/core/config.py,sha256=7oKchitq6pWzPuXRfjBXqVT_BgGIz2p-CDo1RGaNJQg,8118
7
- karton/core/exceptions.py,sha256=8i9WVzi4PinNlX10Cb-lQQC35Hl-JB5R_UKXa9AUKoQ,153
8
- karton/core/inspect.py,sha256=aIJQEOEkD5q2xLlV8nhxY5qL5zqcnprP-2DdP6ecKlE,6150
9
- karton/core/karton.py,sha256=Fi3wNqMGiKvHN2BECsqsvfxkiyuwPdlC21jpqQdkeak,15434
10
- karton/core/logger.py,sha256=J3XAyG88U0cwYC9zR6E3QD1uJenrQh7zS9-HgxhqeAs,2040
11
- karton/core/main.py,sha256=ir1-dhn3vbwfh2YHiM6ZYfRBbjwLvJSz0d8tuK1mb_4,8310
12
- karton/core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- karton/core/query.py,sha256=sf24DweVlXfJuBbBD_ns2LXhOV-IBwuPG3jBfTJu77s,12063
14
- karton/core/resource.py,sha256=tA3y_38H9HVKIrCeAU70zHUkQUv0BuCQWMC470JLxxc,20321
15
- karton/core/task.py,sha256=gW1szMi5PN2Y06X-Ryo7cmEVluZv1r7W5tvmwIJiD94,18808
16
- karton/core/test.py,sha256=tms-YM7sUKQDHN0vm2_W7DIvHnO_ld_VPsWHnsbKSfk,9102
17
- karton/core/utils.py,sha256=sEVqGdVPyYswWuVn8wYXBQmln8Az826N_2HgC__pmW8,4090
18
- karton/system/__init__.py,sha256=JF51OqRU_Y4c0unOulvmv1KzSHSq4ZpXU8ZsH4nefRM,63
19
- karton/system/__main__.py,sha256=QJkwIlSwaPRdzwKlNmCAL41HtDAa73db9MZKWmOfxGM,56
20
- karton/system/system.py,sha256=v2rEJYN2Vq5-hH8yUGFv6r2ZdX-TOQg0ldsEC2T6qO8,16315
21
- karton_core-5.6.0.dist-info/LICENSE,sha256=o8h7hYhn7BJC_-DmrfqWwLjaR_Gbe0TZOOQJuN2ca3I,1519
22
- karton_core-5.6.0.dist-info/METADATA,sha256=Atu4wfGAqbkw-lVDV_Wtt3fxSG7nV0QmVqjGnfNAFZ4,6818
23
- karton_core-5.6.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
24
- karton_core-5.6.0.dist-info/entry_points.txt,sha256=OgLlsXy61GP6-Yob3oXqeJ2hlRU6LBLj33fr0NufKz0,98
25
- karton_core-5.6.0.dist-info/namespace_packages.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
26
- karton_core-5.6.0.dist-info/top_level.txt,sha256=X8SslCPsqXDCnGZqrYYolzT3xPzJMq1r-ZQSc0jfAEA,7
27
- karton_core-5.6.0.dist-info/RECORD,,