karton-core 5.7.0__py3-none-any.whl → 5.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- karton/core/__version__.py +1 -1
- karton/core/asyncio/__init__.py +21 -0
- karton/core/asyncio/backend.py +379 -0
- karton/core/asyncio/base.py +133 -0
- karton/core/asyncio/karton.py +364 -0
- karton/core/asyncio/logger.py +57 -0
- karton/core/asyncio/resource.py +384 -0
- karton/core/backend.py +192 -107
- karton/core/base.py +121 -94
- karton/core/config.py +13 -1
- karton/core/karton.py +35 -22
- karton/core/logger.py +33 -15
- karton/core/main.py +26 -6
- karton/core/resource.py +32 -30
- karton/core/task.py +24 -2
- karton/core/test.py +6 -2
- {karton_core-5.7.0.dist-info → karton_core-5.9.0.dist-info}/METADATA +30 -6
- karton_core-5.9.0.dist-info/RECORD +31 -0
- {karton_core-5.7.0.dist-info → karton_core-5.9.0.dist-info}/WHEEL +1 -1
- karton_core-5.7.0-nspkg.pth +0 -1
- karton_core-5.7.0.dist-info/RECORD +0 -27
- karton_core-5.7.0.dist-info/namespace_packages.txt +0 -1
- {karton_core-5.7.0.dist-info → karton_core-5.9.0.dist-info}/entry_points.txt +0 -0
- {karton_core-5.7.0.dist-info → karton_core-5.9.0.dist-info/licenses}/LICENSE +0 -0
- {karton_core-5.7.0.dist-info → karton_core-5.9.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,364 @@
|
|
1
|
+
import abc
|
2
|
+
import argparse
|
3
|
+
import asyncio
|
4
|
+
import sys
|
5
|
+
import time
|
6
|
+
import traceback
|
7
|
+
from asyncio import CancelledError
|
8
|
+
from typing import Any, Dict, List, Optional
|
9
|
+
|
10
|
+
from karton.core import query
|
11
|
+
from karton.core.__version__ import __version__
|
12
|
+
from karton.core.backend import KartonBind, KartonMetrics
|
13
|
+
from karton.core.config import Config
|
14
|
+
from karton.core.exceptions import TaskTimeoutError
|
15
|
+
from karton.core.task import Task, TaskState
|
16
|
+
|
17
|
+
from .backend import KartonAsyncBackend
|
18
|
+
from .base import KartonAsyncBase, KartonAsyncServiceBase
|
19
|
+
from .resource import LocalResource
|
20
|
+
|
21
|
+
|
22
|
+
class Producer(KartonAsyncBase):
|
23
|
+
"""
|
24
|
+
Producer part of Karton. Used for dispatching initial tasks into karton.
|
25
|
+
|
26
|
+
:param config: Karton configuration object (optional)
|
27
|
+
:type config: :class:`karton.Config`
|
28
|
+
:param identity: Producer name (optional)
|
29
|
+
:type identity: str
|
30
|
+
|
31
|
+
Usage example:
|
32
|
+
|
33
|
+
.. code-block:: python
|
34
|
+
|
35
|
+
from karton.core.asyncio import Producer
|
36
|
+
|
37
|
+
producer = Producer(identity="karton.mwdb")
|
38
|
+
await producer.connect()
|
39
|
+
task = Task(
|
40
|
+
headers={
|
41
|
+
"type": "sample",
|
42
|
+
"kind": "raw"
|
43
|
+
},
|
44
|
+
payload={
|
45
|
+
"sample": Resource("sample.exe", b"put content here")
|
46
|
+
}
|
47
|
+
)
|
48
|
+
await producer.send_task(task)
|
49
|
+
|
50
|
+
:param config: Karton config to use for service configuration
|
51
|
+
:param identity: Karton producer identity
|
52
|
+
:param backend: Karton backend to use
|
53
|
+
"""
|
54
|
+
|
55
|
+
def __init__(
|
56
|
+
self,
|
57
|
+
config: Optional[Config] = None,
|
58
|
+
identity: Optional[str] = None,
|
59
|
+
backend: Optional[KartonAsyncBackend] = None,
|
60
|
+
) -> None:
|
61
|
+
super().__init__(config=config, identity=identity, backend=backend)
|
62
|
+
|
63
|
+
async def send_task(self, task: Task) -> bool:
|
64
|
+
"""
|
65
|
+
Sends a task to the unrouted task queue. Takes care of logging.
|
66
|
+
Given task will be child of task we are currently handling (if such exists).
|
67
|
+
|
68
|
+
:param task: Task object to be sent
|
69
|
+
:return: Bool indicating if the task was delivered
|
70
|
+
"""
|
71
|
+
self.log.debug("Dispatched task %s", task.uid)
|
72
|
+
|
73
|
+
# Complete information about task
|
74
|
+
if self.current_task is not None:
|
75
|
+
task.set_task_parent(self.current_task)
|
76
|
+
task.merge_persistent_payload(self.current_task)
|
77
|
+
task.merge_persistent_headers(self.current_task)
|
78
|
+
task.priority = self.current_task.priority
|
79
|
+
|
80
|
+
task.last_update = time.time()
|
81
|
+
task.headers.update({"origin": self.identity})
|
82
|
+
|
83
|
+
# Register new task
|
84
|
+
await self.backend.declare_task(task)
|
85
|
+
|
86
|
+
# Upload local resources
|
87
|
+
for resource in task.iterate_resources():
|
88
|
+
if isinstance(resource, LocalResource):
|
89
|
+
await resource.upload(self.backend)
|
90
|
+
|
91
|
+
# Add task to karton.tasks
|
92
|
+
await self.backend.produce_unrouted_task(task)
|
93
|
+
await self.backend.increment_metrics(KartonMetrics.TASK_PRODUCED, self.identity)
|
94
|
+
return True
|
95
|
+
|
96
|
+
|
97
|
+
class Consumer(KartonAsyncServiceBase):
|
98
|
+
"""
|
99
|
+
Base consumer class, this is the part of Karton responsible for processing
|
100
|
+
incoming tasks
|
101
|
+
|
102
|
+
:param config: Karton config to use for service configuration
|
103
|
+
:param identity: Karton service identity
|
104
|
+
:param backend: Karton backend to use
|
105
|
+
:param task_timeout: The maximum time, in seconds, this consumer will wait for
|
106
|
+
a task to finish processing before being CRASHED on timeout.
|
107
|
+
Set 0 for unlimited, and None for using global value
|
108
|
+
:param concurrency_limit: The maximum number of concurrent tasks that may be
|
109
|
+
gathered from queue and processed asynchronously.
|
110
|
+
"""
|
111
|
+
|
112
|
+
filters: List[Dict[str, Any]] = []
|
113
|
+
persistent: bool = True
|
114
|
+
version: Optional[str] = None
|
115
|
+
task_timeout = None
|
116
|
+
concurrency_limit: Optional[int] = 1
|
117
|
+
|
118
|
+
def __init__(
|
119
|
+
self,
|
120
|
+
config: Optional[Config] = None,
|
121
|
+
identity: Optional[str] = None,
|
122
|
+
backend: Optional[KartonAsyncBackend] = None,
|
123
|
+
) -> None:
|
124
|
+
super().__init__(config=config, identity=identity, backend=backend)
|
125
|
+
|
126
|
+
if self.filters is None:
|
127
|
+
raise ValueError("Cannot bind consumer on Empty binds")
|
128
|
+
|
129
|
+
# Dummy conversion to make sure the filters are well-formed.
|
130
|
+
query.convert(self.filters)
|
131
|
+
|
132
|
+
self.persistent = (
|
133
|
+
self.config.getboolean("karton", "persistent", self.persistent)
|
134
|
+
and not self.debug
|
135
|
+
)
|
136
|
+
if self.task_timeout is None:
|
137
|
+
self.task_timeout = self.config.getint("karton", "task_timeout")
|
138
|
+
|
139
|
+
self.concurrency_limit = self.config.getint(
|
140
|
+
"karton", "concurrency_limit", self.concurrency_limit
|
141
|
+
)
|
142
|
+
|
143
|
+
self.concurrency_semaphore: Optional[asyncio.Semaphore] = None
|
144
|
+
if self.concurrency_limit is not None:
|
145
|
+
self.concurrency_semaphore = asyncio.BoundedSemaphore(
|
146
|
+
self.concurrency_limit
|
147
|
+
)
|
148
|
+
|
149
|
+
@abc.abstractmethod
|
150
|
+
async def process(self, task: Task) -> None:
|
151
|
+
"""
|
152
|
+
Task processing method.
|
153
|
+
|
154
|
+
:param task: The incoming task object
|
155
|
+
|
156
|
+
self.current_task contains task that triggered invocation of
|
157
|
+
:py:meth:`karton.Consumer.process` but you should only focus on the passed
|
158
|
+
task object and shouldn't interact with the field directly.
|
159
|
+
"""
|
160
|
+
raise NotImplementedError()
|
161
|
+
|
162
|
+
async def _internal_process(self, task: Task) -> None:
|
163
|
+
exception_str = None
|
164
|
+
try:
|
165
|
+
self.log.info("Received new task - %s", task.uid)
|
166
|
+
await self.backend.set_task_status(task, TaskState.STARTED)
|
167
|
+
|
168
|
+
if self.task_timeout:
|
169
|
+
try:
|
170
|
+
# asyncio.timeout is Py3.11+
|
171
|
+
async with asyncio.timeout(self.task_timeout): # type: ignore
|
172
|
+
await self.process(task)
|
173
|
+
except asyncio.TimeoutError as e:
|
174
|
+
raise TaskTimeoutError from e
|
175
|
+
else:
|
176
|
+
await self.process(task)
|
177
|
+
self.log.info("Task done - %s", task.uid)
|
178
|
+
except (Exception, TaskTimeoutError, CancelledError):
|
179
|
+
exc_info = sys.exc_info()
|
180
|
+
exception_str = traceback.format_exception(*exc_info)
|
181
|
+
|
182
|
+
await self.backend.increment_metrics(
|
183
|
+
KartonMetrics.TASK_CRASHED, self.identity
|
184
|
+
)
|
185
|
+
self.log.exception("Failed to process task - %s", task.uid)
|
186
|
+
finally:
|
187
|
+
await self.backend.increment_metrics(
|
188
|
+
KartonMetrics.TASK_CONSUMED, self.identity
|
189
|
+
)
|
190
|
+
|
191
|
+
task_state = TaskState.FINISHED
|
192
|
+
|
193
|
+
# report the task status as crashed
|
194
|
+
# if an exception was caught while processing
|
195
|
+
if exception_str is not None:
|
196
|
+
task_state = TaskState.CRASHED
|
197
|
+
task.error = exception_str
|
198
|
+
|
199
|
+
await self.backend.set_task_status(task, task_state)
|
200
|
+
|
201
|
+
async def internal_process(self, task: Task) -> None:
|
202
|
+
"""
|
203
|
+
The internal side of :py:meth:`Consumer.process` function, takes care of
|
204
|
+
synchronizing the task state, handling errors and running task hooks.
|
205
|
+
|
206
|
+
:param task: Task object to process
|
207
|
+
|
208
|
+
:meta private:
|
209
|
+
"""
|
210
|
+
try:
|
211
|
+
self.current_task = task
|
212
|
+
|
213
|
+
if not task.matches_filters(self.filters):
|
214
|
+
self.log.info(
|
215
|
+
"Task rejected because binds are no longer valid. "
|
216
|
+
"Rejected ask headers: %s",
|
217
|
+
task.headers,
|
218
|
+
)
|
219
|
+
await self.backend.set_task_status(task, TaskState.FINISHED)
|
220
|
+
# Task rejected: end of processing
|
221
|
+
return
|
222
|
+
|
223
|
+
await self._internal_process(task)
|
224
|
+
finally:
|
225
|
+
if self.concurrency_semaphore is not None:
|
226
|
+
self.concurrency_semaphore.release()
|
227
|
+
self.current_task = None
|
228
|
+
|
229
|
+
@property
|
230
|
+
def _bind(self) -> KartonBind:
|
231
|
+
return KartonBind(
|
232
|
+
identity=self.identity,
|
233
|
+
info=self.__class__.__doc__,
|
234
|
+
version=__version__,
|
235
|
+
filters=self.filters,
|
236
|
+
persistent=self.persistent,
|
237
|
+
service_version=self.__class__.version,
|
238
|
+
is_async=True,
|
239
|
+
)
|
240
|
+
|
241
|
+
@classmethod
|
242
|
+
def args_parser(cls) -> argparse.ArgumentParser:
|
243
|
+
parser = super().args_parser()
|
244
|
+
# store_false defaults to True, we intentionally want None there
|
245
|
+
parser.add_argument(
|
246
|
+
"--non-persistent",
|
247
|
+
action="store_const",
|
248
|
+
const=False,
|
249
|
+
dest="persistent",
|
250
|
+
help="Run service with non-persistent queue",
|
251
|
+
)
|
252
|
+
parser.add_argument(
|
253
|
+
"--task-timeout",
|
254
|
+
type=int,
|
255
|
+
help="Limit task execution time",
|
256
|
+
)
|
257
|
+
parser.add_argument(
|
258
|
+
"--concurrency-limit",
|
259
|
+
type=int,
|
260
|
+
help="Limit number of concurrent tasks",
|
261
|
+
)
|
262
|
+
return parser
|
263
|
+
|
264
|
+
@classmethod
|
265
|
+
def config_from_args(cls, config: Config, args: argparse.Namespace) -> None:
|
266
|
+
super().config_from_args(config, args)
|
267
|
+
config.load_from_dict(
|
268
|
+
{
|
269
|
+
"karton": {
|
270
|
+
"persistent": args.persistent,
|
271
|
+
"task_timeout": args.task_timeout,
|
272
|
+
"concurrency_limit": args.concurrency_limit,
|
273
|
+
}
|
274
|
+
}
|
275
|
+
)
|
276
|
+
|
277
|
+
async def _loop(self) -> None:
|
278
|
+
"""
|
279
|
+
Blocking loop that consumes tasks and runs
|
280
|
+
:py:meth:`karton.Consumer.process` as a handler
|
281
|
+
|
282
|
+
:meta private:
|
283
|
+
"""
|
284
|
+
self.log.info("Service %s started", self.identity)
|
285
|
+
|
286
|
+
if self.task_timeout:
|
287
|
+
self.log.info(f"Task timeout is set to {self.task_timeout} seconds")
|
288
|
+
if self.concurrency_limit:
|
289
|
+
self.log.info(f"Concurrency limit is set to {self.concurrency_limit}")
|
290
|
+
|
291
|
+
# Get the old binds and set the new ones atomically
|
292
|
+
old_bind = await self.backend.register_bind(self._bind)
|
293
|
+
|
294
|
+
if not old_bind:
|
295
|
+
self.log.info("Service binds created.")
|
296
|
+
elif old_bind != self._bind:
|
297
|
+
self.log.info(
|
298
|
+
"Binds changed, old service instances should exit soon. "
|
299
|
+
"Old binds: %s "
|
300
|
+
"New binds: %s",
|
301
|
+
old_bind,
|
302
|
+
self._bind,
|
303
|
+
)
|
304
|
+
|
305
|
+
for task_filter in self.filters:
|
306
|
+
self.log.info("Binding on: %s", task_filter)
|
307
|
+
|
308
|
+
concurrent_tasks: List[asyncio.Task] = []
|
309
|
+
|
310
|
+
try:
|
311
|
+
while True:
|
312
|
+
current_bind = await self.backend.get_bind(self.identity)
|
313
|
+
if current_bind != self._bind:
|
314
|
+
self.log.info(
|
315
|
+
"Binds changed, shutting down. "
|
316
|
+
"Old binds: %s "
|
317
|
+
"New binds: %s",
|
318
|
+
self._bind,
|
319
|
+
current_bind,
|
320
|
+
)
|
321
|
+
break
|
322
|
+
if self.concurrency_semaphore is not None:
|
323
|
+
await self.concurrency_semaphore.acquire()
|
324
|
+
task = await self.backend.consume_routed_task(self.identity)
|
325
|
+
if task:
|
326
|
+
coro_task = asyncio.create_task(self.internal_process(task))
|
327
|
+
concurrent_tasks.append(coro_task)
|
328
|
+
else:
|
329
|
+
if self.concurrency_semaphore is not None:
|
330
|
+
self.concurrency_semaphore.release()
|
331
|
+
# Garbage collection and exception propagation
|
332
|
+
# for finished concurrent tasks
|
333
|
+
unfinished_tasks: List[asyncio.Task] = []
|
334
|
+
for coro_task in concurrent_tasks:
|
335
|
+
if coro_task.done():
|
336
|
+
# Propagate possible unhandled exception
|
337
|
+
coro_task.result()
|
338
|
+
else:
|
339
|
+
unfinished_tasks.append(coro_task)
|
340
|
+
concurrent_tasks = unfinished_tasks
|
341
|
+
finally:
|
342
|
+
# Finally handles shutdown events:
|
343
|
+
# - main loop cancellation (SIGINT/SIGTERM)
|
344
|
+
# - unhandled exception in internal_process
|
345
|
+
# First cancel all pending tasks
|
346
|
+
for coro_task in concurrent_tasks:
|
347
|
+
if not coro_task.done():
|
348
|
+
coro_task.cancel()
|
349
|
+
# Then gather all tasks to finalize them
|
350
|
+
await asyncio.gather(*concurrent_tasks)
|
351
|
+
|
352
|
+
|
353
|
+
class Karton(Consumer, Producer):
|
354
|
+
"""
|
355
|
+
This glues together Consumer and Producer - which is the most common use case
|
356
|
+
"""
|
357
|
+
|
358
|
+
def __init__(
|
359
|
+
self,
|
360
|
+
config: Optional[Config] = None,
|
361
|
+
identity: Optional[str] = None,
|
362
|
+
backend: Optional[KartonAsyncBackend] = None,
|
363
|
+
) -> None:
|
364
|
+
super().__init__(config=config, identity=identity, backend=backend)
|
@@ -0,0 +1,57 @@
|
|
1
|
+
"""
|
2
|
+
asyncio implementation of KartonLogHandler
|
3
|
+
"""
|
4
|
+
|
5
|
+
import asyncio
|
6
|
+
import logging
|
7
|
+
import platform
|
8
|
+
from typing import Any, Dict, Optional, Tuple
|
9
|
+
|
10
|
+
from karton.core.logger import LogLineFormatterMixin
|
11
|
+
|
12
|
+
from .backend import KartonAsyncBackend
|
13
|
+
|
14
|
+
HOSTNAME = platform.node()
|
15
|
+
|
16
|
+
QueuedRecord = Optional[Tuple[Dict[str, Any], str]]
|
17
|
+
|
18
|
+
|
19
|
+
async def async_log_consumer(
|
20
|
+
queue: asyncio.Queue[QueuedRecord], backend: KartonAsyncBackend, channel: str
|
21
|
+
) -> None:
|
22
|
+
while True:
|
23
|
+
item = await queue.get()
|
24
|
+
if not item:
|
25
|
+
break
|
26
|
+
log_line, levelname = item
|
27
|
+
await backend.produce_log(log_line, logger_name=channel, level=levelname)
|
28
|
+
|
29
|
+
|
30
|
+
class KartonAsyncLogHandler(logging.Handler, LogLineFormatterMixin):
|
31
|
+
"""
|
32
|
+
logging.Handler that passes logs to the Karton backend.
|
33
|
+
"""
|
34
|
+
|
35
|
+
def __init__(self, backend: KartonAsyncBackend, channel: str) -> None:
|
36
|
+
logging.Handler.__init__(self)
|
37
|
+
self._consumer: Optional[asyncio.Task] = None
|
38
|
+
self._queue: asyncio.Queue[QueuedRecord] = asyncio.Queue()
|
39
|
+
self._backend = backend
|
40
|
+
self._channel = channel
|
41
|
+
|
42
|
+
def emit(self, record: logging.LogRecord) -> None:
|
43
|
+
log_line = self.prepare_log_line(record)
|
44
|
+
self._queue.put_nowait((log_line, record.levelname))
|
45
|
+
|
46
|
+
def start_consuming(self):
|
47
|
+
if self._consumer is not None:
|
48
|
+
raise RuntimeError("Consumer already started")
|
49
|
+
self._consumer = asyncio.create_task(
|
50
|
+
async_log_consumer(self._queue, self._backend, self._channel)
|
51
|
+
)
|
52
|
+
|
53
|
+
async def stop_consuming(self):
|
54
|
+
if self._consumer is None:
|
55
|
+
raise RuntimeError("Consumer is not started")
|
56
|
+
self._queue.put_nowait(None) # Signal that queue is finished
|
57
|
+
await self._consumer
|