karton-core 5.7.0__py3-none-any.whl → 5.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,359 @@
1
+ import abc
2
+ import argparse
3
+ import asyncio
4
+ import sys
5
+ import time
6
+ import traceback
7
+ from asyncio import CancelledError
8
+ from typing import Any, Dict, List, Optional
9
+
10
+ from karton.core import query
11
+ from karton.core.__version__ import __version__
12
+ from karton.core.backend import KartonBind, KartonMetrics
13
+ from karton.core.config import Config
14
+ from karton.core.exceptions import TaskTimeoutError
15
+ from karton.core.resource import LocalResource as SyncLocalResource
16
+ from karton.core.task import Task, TaskState
17
+
18
+ from .backend import KartonAsyncBackend
19
+ from .base import KartonAsyncBase, KartonAsyncServiceBase
20
+ from .resource import LocalResource
21
+
22
+
23
+ class Producer(KartonAsyncBase):
24
+ """
25
+ Producer part of Karton. Used for dispatching initial tasks into karton.
26
+
27
+ :param config: Karton configuration object (optional)
28
+ :type config: :class:`karton.Config`
29
+ :param identity: Producer name (optional)
30
+ :type identity: str
31
+
32
+ Usage example:
33
+
34
+ .. code-block:: python
35
+
36
+ from karton.core.asyncio import Producer
37
+
38
+ producer = Producer(identity="karton.mwdb")
39
+ await producer.connect()
40
+ task = Task(
41
+ headers={
42
+ "type": "sample",
43
+ "kind": "raw"
44
+ },
45
+ payload={
46
+ "sample": Resource("sample.exe", b"put content here")
47
+ }
48
+ )
49
+ await producer.send_task(task)
50
+
51
+ :param config: Karton config to use for service configuration
52
+ :param identity: Karton producer identity
53
+ :param backend: Karton backend to use
54
+ """
55
+
56
+ def __init__(
57
+ self,
58
+ config: Optional[Config] = None,
59
+ identity: Optional[str] = None,
60
+ backend: Optional[KartonAsyncBackend] = None,
61
+ ) -> None:
62
+ super().__init__(config=config, identity=identity, backend=backend)
63
+
64
+ async def send_task(self, task: Task) -> bool:
65
+ """
66
+ Sends a task to the unrouted task queue. Takes care of logging.
67
+ Given task will be child of task we are currently handling (if such exists).
68
+
69
+ :param task: Task object to be sent
70
+ :return: Bool indicating if the task was delivered
71
+ """
72
+ self.log.debug("Dispatched task %s", task.uid)
73
+
74
+ # Complete information about task
75
+ if self.current_task is not None:
76
+ task.set_task_parent(self.current_task)
77
+ task.merge_persistent_payload(self.current_task)
78
+ task.merge_persistent_headers(self.current_task)
79
+ task.priority = self.current_task.priority
80
+
81
+ task.last_update = time.time()
82
+ task.headers.update({"origin": self.identity})
83
+
84
+ # Ensure all local resources have good buckets
85
+ for resource in task.iterate_resources():
86
+ if isinstance(resource, LocalResource) and not resource.bucket:
87
+ resource.bucket = self.backend.default_bucket_name
88
+ if isinstance(resource, SyncLocalResource):
89
+ raise RuntimeError(
90
+ "Synchronous resources are not supported. "
91
+ "Use karton.core.asyncio.resource module instead."
92
+ )
93
+
94
+ # Register new task
95
+ await self.backend.register_task(task)
96
+
97
+ # Upload local resources
98
+ for resource in task.iterate_resources():
99
+ if isinstance(resource, LocalResource):
100
+ await resource.upload(self.backend)
101
+
102
+ # Add task to karton.tasks
103
+ await self.backend.produce_unrouted_task(task)
104
+ await self.backend.increment_metrics(KartonMetrics.TASK_PRODUCED, self.identity)
105
+ return True
106
+
107
+
108
+ class Consumer(KartonAsyncServiceBase):
109
+ """
110
+ Base consumer class, this is the part of Karton responsible for processing
111
+ incoming tasks
112
+
113
+ :param config: Karton config to use for service configuration
114
+ :param identity: Karton service identity
115
+ :param backend: Karton backend to use
116
+ :param task_timeout: The maximum time, in seconds, this consumer will wait for
117
+ a task to finish processing before being CRASHED on timeout.
118
+ Set 0 for unlimited, and None for using global value
119
+ :param concurrency_limit: The maximum number of concurrent tasks that may be
120
+ gathered from queue and processed asynchronously.
121
+ """
122
+
123
+ filters: List[Dict[str, Any]] = []
124
+ persistent: bool = True
125
+ version: Optional[str] = None
126
+ task_timeout = None
127
+ concurrency_limit: Optional[int] = 1
128
+
129
+ def __init__(
130
+ self,
131
+ config: Optional[Config] = None,
132
+ identity: Optional[str] = None,
133
+ backend: Optional[KartonAsyncBackend] = None,
134
+ ) -> None:
135
+ super().__init__(config=config, identity=identity, backend=backend)
136
+
137
+ if self.filters is None:
138
+ raise ValueError("Cannot bind consumer on Empty binds")
139
+
140
+ # Dummy conversion to make sure the filters are well-formed.
141
+ query.convert(self.filters)
142
+
143
+ self.persistent = (
144
+ self.config.getboolean("karton", "persistent", self.persistent)
145
+ and not self.debug
146
+ )
147
+ if self.task_timeout is None:
148
+ self.task_timeout = self.config.getint("karton", "task_timeout")
149
+
150
+ self.concurrency_limit = self.config.getint(
151
+ "karton", "concurrency_limit", self.concurrency_limit
152
+ )
153
+
154
+ self.concurrency_semaphore: Optional[asyncio.Semaphore] = None
155
+ if self.concurrency_limit is not None:
156
+ self.concurrency_semaphore = asyncio.BoundedSemaphore(
157
+ self.concurrency_limit
158
+ )
159
+
160
+ @abc.abstractmethod
161
+ async def process(self, task: Task) -> None:
162
+ """
163
+ Task processing method.
164
+
165
+ :param task: The incoming task object
166
+
167
+ self.current_task contains task that triggered invocation of
168
+ :py:meth:`karton.Consumer.process` but you should only focus on the passed
169
+ task object and shouldn't interact with the field directly.
170
+ """
171
+ raise NotImplementedError()
172
+
173
+ async def _internal_process(self, task: Task) -> None:
174
+ exception_str = None
175
+ try:
176
+ self.log.info("Received new task - %s", task.uid)
177
+ await self.backend.set_task_status(task, TaskState.STARTED)
178
+
179
+ if self.task_timeout:
180
+ try:
181
+ # asyncio.timeout is Py3.11+
182
+ async with asyncio.timeout(self.task_timeout): # type: ignore
183
+ await self.process(task)
184
+ except asyncio.TimeoutError as e:
185
+ raise TaskTimeoutError from e
186
+ else:
187
+ await self.process(task)
188
+ self.log.info("Task done - %s", task.uid)
189
+ except (Exception, TaskTimeoutError, CancelledError):
190
+ exc_info = sys.exc_info()
191
+ exception_str = traceback.format_exception(*exc_info)
192
+
193
+ await self.backend.increment_metrics(
194
+ KartonMetrics.TASK_CRASHED, self.identity
195
+ )
196
+ self.log.exception("Failed to process task - %s", task.uid)
197
+ finally:
198
+ await self.backend.increment_metrics(
199
+ KartonMetrics.TASK_CONSUMED, self.identity
200
+ )
201
+
202
+ task_state = TaskState.FINISHED
203
+
204
+ # report the task status as crashed
205
+ # if an exception was caught while processing
206
+ if exception_str is not None:
207
+ task_state = TaskState.CRASHED
208
+ task.error = exception_str
209
+
210
+ await self.backend.set_task_status(task, task_state)
211
+
212
+ async def internal_process(self, task: Task) -> None:
213
+ """
214
+ The internal side of :py:meth:`Consumer.process` function, takes care of
215
+ synchronizing the task state, handling errors and running task hooks.
216
+
217
+ :param task: Task object to process
218
+
219
+ :meta private:
220
+ """
221
+ try:
222
+ self.current_task = task
223
+
224
+ if not task.matches_filters(self.filters):
225
+ self.log.info("Task rejected because binds are no longer valid.")
226
+ await self.backend.set_task_status(task, TaskState.FINISHED)
227
+ # Task rejected: end of processing
228
+ return
229
+
230
+ await self._internal_process(task)
231
+ finally:
232
+ if self.concurrency_semaphore is not None:
233
+ self.concurrency_semaphore.release()
234
+ self.current_task = None
235
+
236
+ @property
237
+ def _bind(self) -> KartonBind:
238
+ return KartonBind(
239
+ identity=self.identity,
240
+ info=self.__class__.__doc__,
241
+ version=__version__,
242
+ filters=self.filters,
243
+ persistent=self.persistent,
244
+ service_version=self.__class__.version,
245
+ is_async=True,
246
+ )
247
+
248
+ @classmethod
249
+ def args_parser(cls) -> argparse.ArgumentParser:
250
+ parser = super().args_parser()
251
+ # store_false defaults to True, we intentionally want None there
252
+ parser.add_argument(
253
+ "--non-persistent",
254
+ action="store_const",
255
+ const=False,
256
+ dest="persistent",
257
+ help="Run service with non-persistent queue",
258
+ )
259
+ parser.add_argument(
260
+ "--task-timeout",
261
+ type=int,
262
+ help="Limit task execution time",
263
+ )
264
+ parser.add_argument(
265
+ "--concurrency-limit",
266
+ type=int,
267
+ help="Limit number of concurrent tasks",
268
+ )
269
+ return parser
270
+
271
+ @classmethod
272
+ def config_from_args(cls, config: Config, args: argparse.Namespace) -> None:
273
+ super().config_from_args(config, args)
274
+ config.load_from_dict(
275
+ {
276
+ "karton": {
277
+ "persistent": args.persistent,
278
+ "task_timeout": args.task_timeout,
279
+ "concurrency_limit": args.concurrency_limit,
280
+ }
281
+ }
282
+ )
283
+
284
+ async def _loop(self) -> None:
285
+ """
286
+ Blocking loop that consumes tasks and runs
287
+ :py:meth:`karton.Consumer.process` as a handler
288
+
289
+ :meta private:
290
+ """
291
+ self.log.info("Service %s started", self.identity)
292
+
293
+ if self.task_timeout:
294
+ self.log.info(f"Task timeout is set to {self.task_timeout} seconds")
295
+ if self.concurrency_limit:
296
+ self.log.info(f"Concurrency limit is set to {self.concurrency_limit}")
297
+
298
+ # Get the old binds and set the new ones atomically
299
+ old_bind = await self.backend.register_bind(self._bind)
300
+
301
+ if not old_bind:
302
+ self.log.info("Service binds created.")
303
+ elif old_bind != self._bind:
304
+ self.log.info("Binds changed, old service instances should exit soon.")
305
+
306
+ for task_filter in self.filters:
307
+ self.log.info("Binding on: %s", task_filter)
308
+
309
+ concurrent_tasks: List[asyncio.Task] = []
310
+
311
+ try:
312
+ while True:
313
+ current_bind = await self.backend.get_bind(self.identity)
314
+ if current_bind != self._bind:
315
+ self.log.info("Binds changed, shutting down.")
316
+ break
317
+ if self.concurrency_semaphore is not None:
318
+ await self.concurrency_semaphore.acquire()
319
+ task = await self.backend.consume_routed_task(self.identity)
320
+ if task:
321
+ coro_task = asyncio.create_task(self.internal_process(task))
322
+ concurrent_tasks.append(coro_task)
323
+ else:
324
+ if self.concurrency_semaphore is not None:
325
+ self.concurrency_semaphore.release()
326
+ # Garbage collection and exception propagation
327
+ # for finished concurrent tasks
328
+ unfinished_tasks: List[asyncio.Task] = []
329
+ for coro_task in concurrent_tasks:
330
+ if coro_task.done():
331
+ # Propagate possible unhandled exception
332
+ coro_task.result()
333
+ else:
334
+ unfinished_tasks.append(coro_task)
335
+ concurrent_tasks = unfinished_tasks
336
+ finally:
337
+ # Finally handles shutdown events:
338
+ # - main loop cancellation (SIGINT/SIGTERM)
339
+ # - unhandled exception in internal_process
340
+ # First cancel all pending tasks
341
+ for coro_task in concurrent_tasks:
342
+ if not coro_task.done():
343
+ coro_task.cancel()
344
+ # Then gather all tasks to finalize them
345
+ await asyncio.gather(*concurrent_tasks)
346
+
347
+
348
+ class Karton(Consumer, Producer):
349
+ """
350
+ This glues together Consumer and Producer - which is the most common use case
351
+ """
352
+
353
+ def __init__(
354
+ self,
355
+ config: Optional[Config] = None,
356
+ identity: Optional[str] = None,
357
+ backend: Optional[KartonAsyncBackend] = None,
358
+ ) -> None:
359
+ super().__init__(config=config, identity=identity, backend=backend)
@@ -0,0 +1,57 @@
1
+ """
2
+ asyncio implementation of KartonLogHandler
3
+ """
4
+
5
+ import asyncio
6
+ import logging
7
+ import platform
8
+ from typing import Any, Dict, Optional, Tuple
9
+
10
+ from karton.core.logger import LogLineFormatterMixin
11
+
12
+ from .backend import KartonAsyncBackend
13
+
14
+ HOSTNAME = platform.node()
15
+
16
+ QueuedRecord = Optional[Tuple[Dict[str, Any], str]]
17
+
18
+
19
+ async def async_log_consumer(
20
+ queue: asyncio.Queue[QueuedRecord], backend: KartonAsyncBackend, channel: str
21
+ ) -> None:
22
+ while True:
23
+ item = await queue.get()
24
+ if not item:
25
+ break
26
+ log_line, levelname = item
27
+ await backend.produce_log(log_line, logger_name=channel, level=levelname)
28
+
29
+
30
+ class KartonAsyncLogHandler(logging.Handler, LogLineFormatterMixin):
31
+ """
32
+ logging.Handler that passes logs to the Karton backend.
33
+ """
34
+
35
+ def __init__(self, backend: KartonAsyncBackend, channel: str) -> None:
36
+ logging.Handler.__init__(self)
37
+ self._consumer: Optional[asyncio.Task] = None
38
+ self._queue: asyncio.Queue[QueuedRecord] = asyncio.Queue()
39
+ self._backend = backend
40
+ self._channel = channel
41
+
42
+ def emit(self, record: logging.LogRecord) -> None:
43
+ log_line = self.prepare_log_line(record)
44
+ self._queue.put_nowait((log_line, record.levelname))
45
+
46
+ def start_consuming(self):
47
+ if self._consumer is not None:
48
+ raise RuntimeError("Consumer already started")
49
+ self._consumer = asyncio.create_task(
50
+ async_log_consumer(self._queue, self._backend, self._channel)
51
+ )
52
+
53
+ async def stop_consuming(self):
54
+ if self._consumer is None:
55
+ raise RuntimeError("Consumer is not started")
56
+ self._queue.put_nowait(None) # Signal that queue is finished
57
+ await self._consumer