karton-core 5.7.0__py3-none-any.whl → 5.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,364 @@
1
+ import abc
2
+ import argparse
3
+ import asyncio
4
+ import sys
5
+ import time
6
+ import traceback
7
+ from asyncio import CancelledError
8
+ from typing import Any, Dict, List, Optional
9
+
10
+ from karton.core import query
11
+ from karton.core.__version__ import __version__
12
+ from karton.core.backend import KartonBind, KartonMetrics
13
+ from karton.core.config import Config
14
+ from karton.core.exceptions import TaskTimeoutError
15
+ from karton.core.task import Task, TaskState
16
+
17
+ from .backend import KartonAsyncBackend
18
+ from .base import KartonAsyncBase, KartonAsyncServiceBase
19
+ from .resource import LocalResource
20
+
21
+
22
+ class Producer(KartonAsyncBase):
23
+ """
24
+ Producer part of Karton. Used for dispatching initial tasks into karton.
25
+
26
+ :param config: Karton configuration object (optional)
27
+ :type config: :class:`karton.Config`
28
+ :param identity: Producer name (optional)
29
+ :type identity: str
30
+
31
+ Usage example:
32
+
33
+ .. code-block:: python
34
+
35
+ from karton.core.asyncio import Producer
36
+
37
+ producer = Producer(identity="karton.mwdb")
38
+ await producer.connect()
39
+ task = Task(
40
+ headers={
41
+ "type": "sample",
42
+ "kind": "raw"
43
+ },
44
+ payload={
45
+ "sample": Resource("sample.exe", b"put content here")
46
+ }
47
+ )
48
+ await producer.send_task(task)
49
+
50
+ :param config: Karton config to use for service configuration
51
+ :param identity: Karton producer identity
52
+ :param backend: Karton backend to use
53
+ """
54
+
55
+ def __init__(
56
+ self,
57
+ config: Optional[Config] = None,
58
+ identity: Optional[str] = None,
59
+ backend: Optional[KartonAsyncBackend] = None,
60
+ ) -> None:
61
+ super().__init__(config=config, identity=identity, backend=backend)
62
+
63
+ async def send_task(self, task: Task) -> bool:
64
+ """
65
+ Sends a task to the unrouted task queue. Takes care of logging.
66
+ Given task will be child of task we are currently handling (if such exists).
67
+
68
+ :param task: Task object to be sent
69
+ :return: Bool indicating if the task was delivered
70
+ """
71
+ self.log.debug("Dispatched task %s", task.uid)
72
+
73
+ # Complete information about task
74
+ if self.current_task is not None:
75
+ task.set_task_parent(self.current_task)
76
+ task.merge_persistent_payload(self.current_task)
77
+ task.merge_persistent_headers(self.current_task)
78
+ task.priority = self.current_task.priority
79
+
80
+ task.last_update = time.time()
81
+ task.headers.update({"origin": self.identity})
82
+
83
+ # Register new task
84
+ await self.backend.declare_task(task)
85
+
86
+ # Upload local resources
87
+ for resource in task.iterate_resources():
88
+ if isinstance(resource, LocalResource):
89
+ await resource.upload(self.backend)
90
+
91
+ # Add task to karton.tasks
92
+ await self.backend.produce_unrouted_task(task)
93
+ await self.backend.increment_metrics(KartonMetrics.TASK_PRODUCED, self.identity)
94
+ return True
95
+
96
+
97
+ class Consumer(KartonAsyncServiceBase):
98
+ """
99
+ Base consumer class, this is the part of Karton responsible for processing
100
+ incoming tasks
101
+
102
+ :param config: Karton config to use for service configuration
103
+ :param identity: Karton service identity
104
+ :param backend: Karton backend to use
105
+ :param task_timeout: The maximum time, in seconds, this consumer will wait for
106
+ a task to finish processing before being CRASHED on timeout.
107
+ Set 0 for unlimited, and None for using global value
108
+ :param concurrency_limit: The maximum number of concurrent tasks that may be
109
+ gathered from queue and processed asynchronously.
110
+ """
111
+
112
+ filters: List[Dict[str, Any]] = []
113
+ persistent: bool = True
114
+ version: Optional[str] = None
115
+ task_timeout = None
116
+ concurrency_limit: Optional[int] = 1
117
+
118
+ def __init__(
119
+ self,
120
+ config: Optional[Config] = None,
121
+ identity: Optional[str] = None,
122
+ backend: Optional[KartonAsyncBackend] = None,
123
+ ) -> None:
124
+ super().__init__(config=config, identity=identity, backend=backend)
125
+
126
+ if self.filters is None:
127
+ raise ValueError("Cannot bind consumer on Empty binds")
128
+
129
+ # Dummy conversion to make sure the filters are well-formed.
130
+ query.convert(self.filters)
131
+
132
+ self.persistent = (
133
+ self.config.getboolean("karton", "persistent", self.persistent)
134
+ and not self.debug
135
+ )
136
+ if self.task_timeout is None:
137
+ self.task_timeout = self.config.getint("karton", "task_timeout")
138
+
139
+ self.concurrency_limit = self.config.getint(
140
+ "karton", "concurrency_limit", self.concurrency_limit
141
+ )
142
+
143
+ self.concurrency_semaphore: Optional[asyncio.Semaphore] = None
144
+ if self.concurrency_limit is not None:
145
+ self.concurrency_semaphore = asyncio.BoundedSemaphore(
146
+ self.concurrency_limit
147
+ )
148
+
149
+ @abc.abstractmethod
150
+ async def process(self, task: Task) -> None:
151
+ """
152
+ Task processing method.
153
+
154
+ :param task: The incoming task object
155
+
156
+ self.current_task contains task that triggered invocation of
157
+ :py:meth:`karton.Consumer.process` but you should only focus on the passed
158
+ task object and shouldn't interact with the field directly.
159
+ """
160
+ raise NotImplementedError()
161
+
162
+ async def _internal_process(self, task: Task) -> None:
163
+ exception_str = None
164
+ try:
165
+ self.log.info("Received new task - %s", task.uid)
166
+ await self.backend.set_task_status(task, TaskState.STARTED)
167
+
168
+ if self.task_timeout:
169
+ try:
170
+ # asyncio.timeout is Py3.11+
171
+ async with asyncio.timeout(self.task_timeout): # type: ignore
172
+ await self.process(task)
173
+ except asyncio.TimeoutError as e:
174
+ raise TaskTimeoutError from e
175
+ else:
176
+ await self.process(task)
177
+ self.log.info("Task done - %s", task.uid)
178
+ except (Exception, TaskTimeoutError, CancelledError):
179
+ exc_info = sys.exc_info()
180
+ exception_str = traceback.format_exception(*exc_info)
181
+
182
+ await self.backend.increment_metrics(
183
+ KartonMetrics.TASK_CRASHED, self.identity
184
+ )
185
+ self.log.exception("Failed to process task - %s", task.uid)
186
+ finally:
187
+ await self.backend.increment_metrics(
188
+ KartonMetrics.TASK_CONSUMED, self.identity
189
+ )
190
+
191
+ task_state = TaskState.FINISHED
192
+
193
+ # report the task status as crashed
194
+ # if an exception was caught while processing
195
+ if exception_str is not None:
196
+ task_state = TaskState.CRASHED
197
+ task.error = exception_str
198
+
199
+ await self.backend.set_task_status(task, task_state)
200
+
201
+ async def internal_process(self, task: Task) -> None:
202
+ """
203
+ The internal side of :py:meth:`Consumer.process` function, takes care of
204
+ synchronizing the task state, handling errors and running task hooks.
205
+
206
+ :param task: Task object to process
207
+
208
+ :meta private:
209
+ """
210
+ try:
211
+ self.current_task = task
212
+
213
+ if not task.matches_filters(self.filters):
214
+ self.log.info(
215
+ "Task rejected because binds are no longer valid. "
216
+ "Rejected ask headers: %s",
217
+ task.headers,
218
+ )
219
+ await self.backend.set_task_status(task, TaskState.FINISHED)
220
+ # Task rejected: end of processing
221
+ return
222
+
223
+ await self._internal_process(task)
224
+ finally:
225
+ if self.concurrency_semaphore is not None:
226
+ self.concurrency_semaphore.release()
227
+ self.current_task = None
228
+
229
+ @property
230
+ def _bind(self) -> KartonBind:
231
+ return KartonBind(
232
+ identity=self.identity,
233
+ info=self.__class__.__doc__,
234
+ version=__version__,
235
+ filters=self.filters,
236
+ persistent=self.persistent,
237
+ service_version=self.__class__.version,
238
+ is_async=True,
239
+ )
240
+
241
+ @classmethod
242
+ def args_parser(cls) -> argparse.ArgumentParser:
243
+ parser = super().args_parser()
244
+ # store_false defaults to True, we intentionally want None there
245
+ parser.add_argument(
246
+ "--non-persistent",
247
+ action="store_const",
248
+ const=False,
249
+ dest="persistent",
250
+ help="Run service with non-persistent queue",
251
+ )
252
+ parser.add_argument(
253
+ "--task-timeout",
254
+ type=int,
255
+ help="Limit task execution time",
256
+ )
257
+ parser.add_argument(
258
+ "--concurrency-limit",
259
+ type=int,
260
+ help="Limit number of concurrent tasks",
261
+ )
262
+ return parser
263
+
264
+ @classmethod
265
+ def config_from_args(cls, config: Config, args: argparse.Namespace) -> None:
266
+ super().config_from_args(config, args)
267
+ config.load_from_dict(
268
+ {
269
+ "karton": {
270
+ "persistent": args.persistent,
271
+ "task_timeout": args.task_timeout,
272
+ "concurrency_limit": args.concurrency_limit,
273
+ }
274
+ }
275
+ )
276
+
277
+ async def _loop(self) -> None:
278
+ """
279
+ Blocking loop that consumes tasks and runs
280
+ :py:meth:`karton.Consumer.process` as a handler
281
+
282
+ :meta private:
283
+ """
284
+ self.log.info("Service %s started", self.identity)
285
+
286
+ if self.task_timeout:
287
+ self.log.info(f"Task timeout is set to {self.task_timeout} seconds")
288
+ if self.concurrency_limit:
289
+ self.log.info(f"Concurrency limit is set to {self.concurrency_limit}")
290
+
291
+ # Get the old binds and set the new ones atomically
292
+ old_bind = await self.backend.register_bind(self._bind)
293
+
294
+ if not old_bind:
295
+ self.log.info("Service binds created.")
296
+ elif old_bind != self._bind:
297
+ self.log.info(
298
+ "Binds changed, old service instances should exit soon. "
299
+ "Old binds: %s "
300
+ "New binds: %s",
301
+ old_bind,
302
+ self._bind,
303
+ )
304
+
305
+ for task_filter in self.filters:
306
+ self.log.info("Binding on: %s", task_filter)
307
+
308
+ concurrent_tasks: List[asyncio.Task] = []
309
+
310
+ try:
311
+ while True:
312
+ current_bind = await self.backend.get_bind(self.identity)
313
+ if current_bind != self._bind:
314
+ self.log.info(
315
+ "Binds changed, shutting down. "
316
+ "Old binds: %s "
317
+ "New binds: %s",
318
+ self._bind,
319
+ current_bind,
320
+ )
321
+ break
322
+ if self.concurrency_semaphore is not None:
323
+ await self.concurrency_semaphore.acquire()
324
+ task = await self.backend.consume_routed_task(self.identity)
325
+ if task:
326
+ coro_task = asyncio.create_task(self.internal_process(task))
327
+ concurrent_tasks.append(coro_task)
328
+ else:
329
+ if self.concurrency_semaphore is not None:
330
+ self.concurrency_semaphore.release()
331
+ # Garbage collection and exception propagation
332
+ # for finished concurrent tasks
333
+ unfinished_tasks: List[asyncio.Task] = []
334
+ for coro_task in concurrent_tasks:
335
+ if coro_task.done():
336
+ # Propagate possible unhandled exception
337
+ coro_task.result()
338
+ else:
339
+ unfinished_tasks.append(coro_task)
340
+ concurrent_tasks = unfinished_tasks
341
+ finally:
342
+ # Finally handles shutdown events:
343
+ # - main loop cancellation (SIGINT/SIGTERM)
344
+ # - unhandled exception in internal_process
345
+ # First cancel all pending tasks
346
+ for coro_task in concurrent_tasks:
347
+ if not coro_task.done():
348
+ coro_task.cancel()
349
+ # Then gather all tasks to finalize them
350
+ await asyncio.gather(*concurrent_tasks)
351
+
352
+
353
+ class Karton(Consumer, Producer):
354
+ """
355
+ This glues together Consumer and Producer - which is the most common use case
356
+ """
357
+
358
+ def __init__(
359
+ self,
360
+ config: Optional[Config] = None,
361
+ identity: Optional[str] = None,
362
+ backend: Optional[KartonAsyncBackend] = None,
363
+ ) -> None:
364
+ super().__init__(config=config, identity=identity, backend=backend)
@@ -0,0 +1,57 @@
1
+ """
2
+ asyncio implementation of KartonLogHandler
3
+ """
4
+
5
+ import asyncio
6
+ import logging
7
+ import platform
8
+ from typing import Any, Dict, Optional, Tuple
9
+
10
+ from karton.core.logger import LogLineFormatterMixin
11
+
12
+ from .backend import KartonAsyncBackend
13
+
14
+ HOSTNAME = platform.node()
15
+
16
+ QueuedRecord = Optional[Tuple[Dict[str, Any], str]]
17
+
18
+
19
+ async def async_log_consumer(
20
+ queue: asyncio.Queue[QueuedRecord], backend: KartonAsyncBackend, channel: str
21
+ ) -> None:
22
+ while True:
23
+ item = await queue.get()
24
+ if not item:
25
+ break
26
+ log_line, levelname = item
27
+ await backend.produce_log(log_line, logger_name=channel, level=levelname)
28
+
29
+
30
+ class KartonAsyncLogHandler(logging.Handler, LogLineFormatterMixin):
31
+ """
32
+ logging.Handler that passes logs to the Karton backend.
33
+ """
34
+
35
+ def __init__(self, backend: KartonAsyncBackend, channel: str) -> None:
36
+ logging.Handler.__init__(self)
37
+ self._consumer: Optional[asyncio.Task] = None
38
+ self._queue: asyncio.Queue[QueuedRecord] = asyncio.Queue()
39
+ self._backend = backend
40
+ self._channel = channel
41
+
42
+ def emit(self, record: logging.LogRecord) -> None:
43
+ log_line = self.prepare_log_line(record)
44
+ self._queue.put_nowait((log_line, record.levelname))
45
+
46
+ def start_consuming(self):
47
+ if self._consumer is not None:
48
+ raise RuntimeError("Consumer already started")
49
+ self._consumer = asyncio.create_task(
50
+ async_log_consumer(self._queue, self._backend, self._channel)
51
+ )
52
+
53
+ async def stop_consuming(self):
54
+ if self._consumer is None:
55
+ raise RuntimeError("Consumer is not started")
56
+ self._queue.put_nowait(None) # Signal that queue is finished
57
+ await self._consumer