baqueue 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- baqueue/__init__.py +19 -0
- baqueue/balancer.py +108 -0
- baqueue/batch.py +159 -0
- baqueue/cli.py +459 -0
- baqueue/config.py +79 -0
- baqueue/dashboard/__init__.py +1 -0
- baqueue/dashboard/api.py +193 -0
- baqueue/dashboard/server.py +263 -0
- baqueue/dashboard/static/app.js +450 -0
- baqueue/dashboard/static/index.html +580 -0
- baqueue/dashboard/static/style.css +1415 -0
- baqueue/drivers/__init__.py +1 -0
- baqueue/drivers/base.py +212 -0
- baqueue/drivers/memory_driver.py +318 -0
- baqueue/drivers/postgres_driver.py +656 -0
- baqueue/drivers/redis_driver.py +656 -0
- baqueue/drivers/sqlite_driver.py +706 -0
- baqueue/events.py +64 -0
- baqueue/job.py +128 -0
- baqueue/pruner.py +128 -0
- baqueue/queue.py +225 -0
- baqueue/retry.py +55 -0
- baqueue/scheduler.py +101 -0
- baqueue/serializer.py +124 -0
- baqueue/supervisor.py +206 -0
- baqueue/worker.py +165 -0
- baqueue-0.1.0.dist-info/METADATA +609 -0
- baqueue-0.1.0.dist-info/RECORD +32 -0
- baqueue-0.1.0.dist-info/WHEEL +5 -0
- baqueue-0.1.0.dist-info/entry_points.txt +2 -0
- baqueue-0.1.0.dist-info/licenses/LICENSE +21 -0
- baqueue-0.1.0.dist-info/top_level.txt +1 -0
baqueue/serializer.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Job serialization and deserialization."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import importlib
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from typing import Any
|
|
9
|
+
from uuid import uuid4
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _now_ts() -> float:
|
|
13
|
+
return datetime.now(timezone.utc).timestamp()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class JobPayload:
|
|
17
|
+
"""Serializable representation of a queued job."""
|
|
18
|
+
|
|
19
|
+
__slots__ = (
|
|
20
|
+
"id",
|
|
21
|
+
"job_class",
|
|
22
|
+
"data",
|
|
23
|
+
"queue",
|
|
24
|
+
"attempts",
|
|
25
|
+
"max_attempts",
|
|
26
|
+
"backoff",
|
|
27
|
+
"timeout",
|
|
28
|
+
"tags",
|
|
29
|
+
"batch_id",
|
|
30
|
+
"delay_until",
|
|
31
|
+
"created_at",
|
|
32
|
+
"updated_at",
|
|
33
|
+
"started_at",
|
|
34
|
+
"completed_at",
|
|
35
|
+
"failed_at",
|
|
36
|
+
"status",
|
|
37
|
+
"error",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
*,
|
|
43
|
+
id: str | None = None,
|
|
44
|
+
job_class: str = "",
|
|
45
|
+
data: dict[str, Any] | None = None,
|
|
46
|
+
queue: str = "default",
|
|
47
|
+
attempts: int = 0,
|
|
48
|
+
max_attempts: int = 3,
|
|
49
|
+
backoff: str | list[int] = "exponential",
|
|
50
|
+
timeout: int = 60,
|
|
51
|
+
tags: list[str] | None = None,
|
|
52
|
+
batch_id: str | None = None,
|
|
53
|
+
delay_until: float | None = None,
|
|
54
|
+
created_at: float | None = None,
|
|
55
|
+
updated_at: float | None = None,
|
|
56
|
+
started_at: float | None = None,
|
|
57
|
+
completed_at: float | None = None,
|
|
58
|
+
failed_at: float | None = None,
|
|
59
|
+
status: str = "pending",
|
|
60
|
+
error: str | None = None,
|
|
61
|
+
):
|
|
62
|
+
self.id = id or uuid4().hex
|
|
63
|
+
self.job_class = job_class
|
|
64
|
+
self.data = data or {}
|
|
65
|
+
self.queue = queue
|
|
66
|
+
self.attempts = attempts
|
|
67
|
+
self.max_attempts = max_attempts
|
|
68
|
+
self.backoff = backoff
|
|
69
|
+
self.timeout = timeout
|
|
70
|
+
self.tags = tags or []
|
|
71
|
+
self.batch_id = batch_id
|
|
72
|
+
self.delay_until = delay_until
|
|
73
|
+
self.created_at = created_at or _now_ts()
|
|
74
|
+
self.updated_at = updated_at or self.created_at
|
|
75
|
+
self.started_at = started_at
|
|
76
|
+
self.completed_at = completed_at
|
|
77
|
+
self.failed_at = failed_at
|
|
78
|
+
self.status = status
|
|
79
|
+
self.error = error
|
|
80
|
+
|
|
81
|
+
def to_dict(self) -> dict[str, Any]:
|
|
82
|
+
return {
|
|
83
|
+
"id": self.id,
|
|
84
|
+
"job_class": self.job_class,
|
|
85
|
+
"data": self.data,
|
|
86
|
+
"queue": self.queue,
|
|
87
|
+
"attempts": self.attempts,
|
|
88
|
+
"max_attempts": self.max_attempts,
|
|
89
|
+
"backoff": self.backoff,
|
|
90
|
+
"timeout": self.timeout,
|
|
91
|
+
"tags": self.tags,
|
|
92
|
+
"batch_id": self.batch_id,
|
|
93
|
+
"delay_until": self.delay_until,
|
|
94
|
+
"created_at": self.created_at,
|
|
95
|
+
"updated_at": self.updated_at,
|
|
96
|
+
"started_at": self.started_at,
|
|
97
|
+
"completed_at": self.completed_at,
|
|
98
|
+
"failed_at": self.failed_at,
|
|
99
|
+
"status": self.status,
|
|
100
|
+
"error": self.error,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
def to_json(self) -> str:
|
|
104
|
+
return json.dumps(self.to_dict())
|
|
105
|
+
|
|
106
|
+
@classmethod
|
|
107
|
+
def from_dict(cls, data: dict[str, Any]) -> JobPayload:
|
|
108
|
+
return cls(**data)
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def from_json(cls, raw: str) -> JobPayload:
|
|
112
|
+
return cls.from_dict(json.loads(raw))
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def resolve_job_class(class_path: str):
|
|
116
|
+
"""Dynamically import and return a job class from its dotted path."""
|
|
117
|
+
module_path, class_name = class_path.rsplit(".", 1)
|
|
118
|
+
module = importlib.import_module(module_path)
|
|
119
|
+
return getattr(module, class_name)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def get_class_path(cls: type) -> str:
|
|
123
|
+
"""Get the fully qualified dotted path for a class."""
|
|
124
|
+
return f"{cls.__module__}.{cls.__qualname__}"
|
baqueue/supervisor.py
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""Supervisor - manages a pool of workers with auto-scaling and graceful shutdown."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
import signal
|
|
8
|
+
import os
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from baqueue.config import SupervisorConfig
|
|
12
|
+
from baqueue.drivers.base import BaseDriver
|
|
13
|
+
from baqueue.events import EventBus
|
|
14
|
+
from baqueue.pruner import Pruner
|
|
15
|
+
from baqueue.worker import Worker
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger("baqueue.supervisor")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Supervisor:
|
|
21
|
+
"""Manages a pool of workers for one or more queues.
|
|
22
|
+
|
|
23
|
+
Supports auto-balancing, scaling, and graceful shutdown.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
driver: BaseDriver,
|
|
29
|
+
config: SupervisorConfig | None = None,
|
|
30
|
+
events: EventBus | None = None,
|
|
31
|
+
balancer: Any | None = None,
|
|
32
|
+
pruner: Pruner | None = None,
|
|
33
|
+
):
|
|
34
|
+
self.driver = driver
|
|
35
|
+
self.config = config or SupervisorConfig()
|
|
36
|
+
self.events = events or EventBus.default()
|
|
37
|
+
self.balancer = balancer
|
|
38
|
+
self.pruner = pruner
|
|
39
|
+
self._workers: list[Worker] = []
|
|
40
|
+
self._tasks: list[asyncio.Task] = []
|
|
41
|
+
self._running = False
|
|
42
|
+
self._delayed_task: asyncio.Task | None = None
|
|
43
|
+
self._heartbeat_task: asyncio.Task | None = None
|
|
44
|
+
self._balance_task: asyncio.Task | None = None
|
|
45
|
+
self._pruner_task: asyncio.Task | None = None
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def is_running(self) -> bool:
|
|
49
|
+
return self._running
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def worker_count(self) -> int:
|
|
53
|
+
return len(self._workers)
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def stats(self) -> dict[str, Any]:
|
|
57
|
+
return {
|
|
58
|
+
"name": self.config.name,
|
|
59
|
+
"queues": self.config.queues,
|
|
60
|
+
"balance": self.config.balance,
|
|
61
|
+
"workers": len(self._workers),
|
|
62
|
+
"running": self._running,
|
|
63
|
+
"worker_stats": [w.stats for w in self._workers],
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
async def start(self) -> None:
|
|
67
|
+
"""Start the supervisor and its worker pool."""
|
|
68
|
+
self._running = True
|
|
69
|
+
logger.info(
|
|
70
|
+
"Supervisor '%s' starting with %d workers on queues %s",
|
|
71
|
+
self.config.name, self.config.min_workers, self.config.queues,
|
|
72
|
+
)
|
|
73
|
+
await self.events.emit("supervisor.started", supervisor=self.config.name)
|
|
74
|
+
|
|
75
|
+
self._setup_signal_handlers()
|
|
76
|
+
|
|
77
|
+
for i in range(self.config.min_workers):
|
|
78
|
+
self._spawn_worker(i)
|
|
79
|
+
|
|
80
|
+
await self._report_stats()
|
|
81
|
+
self._delayed_task = asyncio.create_task(self._poll_delayed())
|
|
82
|
+
self._heartbeat_task = asyncio.create_task(self._heartbeat_loop())
|
|
83
|
+
|
|
84
|
+
if self.balancer:
|
|
85
|
+
self._balance_task = asyncio.create_task(self._balance_loop())
|
|
86
|
+
|
|
87
|
+
if self.pruner:
|
|
88
|
+
self._pruner_task = asyncio.create_task(self.pruner.start())
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
await asyncio.gather(*self._tasks, return_exceptions=True)
|
|
92
|
+
except asyncio.CancelledError:
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
async def stop(self) -> None:
|
|
96
|
+
"""Gracefully stop all workers."""
|
|
97
|
+
logger.info("Supervisor '%s' shutting down...", self.config.name)
|
|
98
|
+
self._running = False
|
|
99
|
+
|
|
100
|
+
await self._report_stats()
|
|
101
|
+
for w in self._workers:
|
|
102
|
+
w.stop()
|
|
103
|
+
|
|
104
|
+
if self.pruner:
|
|
105
|
+
self.pruner.stop()
|
|
106
|
+
|
|
107
|
+
if self._delayed_task:
|
|
108
|
+
self._delayed_task.cancel()
|
|
109
|
+
if self._heartbeat_task:
|
|
110
|
+
self._heartbeat_task.cancel()
|
|
111
|
+
if self._balance_task:
|
|
112
|
+
self._balance_task.cancel()
|
|
113
|
+
if self._pruner_task:
|
|
114
|
+
self._pruner_task.cancel()
|
|
115
|
+
|
|
116
|
+
aux_tasks = [
|
|
117
|
+
t for t in (self._delayed_task, self._heartbeat_task, self._balance_task, self._pruner_task)
|
|
118
|
+
if t is not None
|
|
119
|
+
]
|
|
120
|
+
if aux_tasks:
|
|
121
|
+
await asyncio.gather(*aux_tasks, return_exceptions=True)
|
|
122
|
+
self._delayed_task = None
|
|
123
|
+
self._heartbeat_task = None
|
|
124
|
+
self._balance_task = None
|
|
125
|
+
self._pruner_task = None
|
|
126
|
+
|
|
127
|
+
for task in self._tasks:
|
|
128
|
+
task.cancel()
|
|
129
|
+
|
|
130
|
+
await asyncio.gather(*self._tasks, return_exceptions=True)
|
|
131
|
+
self._tasks.clear()
|
|
132
|
+
self._workers.clear()
|
|
133
|
+
await self._report_stats()
|
|
134
|
+
|
|
135
|
+
await self.events.emit("supervisor.stopped", supervisor=self.config.name)
|
|
136
|
+
logger.info("Supervisor '%s' stopped", self.config.name)
|
|
137
|
+
|
|
138
|
+
def _spawn_worker(self, index: int) -> Worker:
|
|
139
|
+
worker = Worker(
|
|
140
|
+
driver=self.driver,
|
|
141
|
+
queues=list(self.config.queues),
|
|
142
|
+
events=self.events,
|
|
143
|
+
sleep_interval=self.config.sleep,
|
|
144
|
+
timeout=self.config.timeout,
|
|
145
|
+
name=f"{self.config.name}-worker-{index}",
|
|
146
|
+
)
|
|
147
|
+
self._workers.append(worker)
|
|
148
|
+
task = asyncio.create_task(worker.start())
|
|
149
|
+
self._tasks.append(task)
|
|
150
|
+
return worker
|
|
151
|
+
|
|
152
|
+
async def scale(self, count: int) -> None:
|
|
153
|
+
"""Scale worker pool to the specified count."""
|
|
154
|
+
count = max(self.config.min_workers, min(count, self.config.max_workers))
|
|
155
|
+
current = len(self._workers)
|
|
156
|
+
|
|
157
|
+
if count > current:
|
|
158
|
+
for i in range(current, count):
|
|
159
|
+
self._spawn_worker(i)
|
|
160
|
+
logger.info("Scaled up to %d workers", count)
|
|
161
|
+
elif count < current:
|
|
162
|
+
for _ in range(current - count):
|
|
163
|
+
worker = self._workers.pop()
|
|
164
|
+
worker.stop()
|
|
165
|
+
logger.info("Scaled down to %d workers", count)
|
|
166
|
+
|
|
167
|
+
async def _poll_delayed(self) -> None:
|
|
168
|
+
"""Periodically move delayed jobs into their queues."""
|
|
169
|
+
while self._running:
|
|
170
|
+
try:
|
|
171
|
+
await self.driver.pop_delayed()
|
|
172
|
+
except Exception:
|
|
173
|
+
logger.exception("Error polling delayed jobs")
|
|
174
|
+
await asyncio.sleep(1)
|
|
175
|
+
|
|
176
|
+
async def _balance_loop(self) -> None:
|
|
177
|
+
"""Periodically rebalance workers across queues."""
|
|
178
|
+
while self._running:
|
|
179
|
+
try:
|
|
180
|
+
if self.balancer:
|
|
181
|
+
new_count = await self.balancer.recommend(
|
|
182
|
+
self.driver, self.config.queues, len(self._workers)
|
|
183
|
+
)
|
|
184
|
+
if new_count != len(self._workers):
|
|
185
|
+
await self.scale(new_count)
|
|
186
|
+
except Exception:
|
|
187
|
+
logger.exception("Error in balance loop")
|
|
188
|
+
await asyncio.sleep(5)
|
|
189
|
+
|
|
190
|
+
async def _heartbeat_loop(self) -> None:
|
|
191
|
+
while self._running:
|
|
192
|
+
await self._report_stats()
|
|
193
|
+
await asyncio.sleep(1)
|
|
194
|
+
|
|
195
|
+
async def _report_stats(self) -> None:
|
|
196
|
+
try:
|
|
197
|
+
await self.driver.report_supervisor(self.stats)
|
|
198
|
+
except Exception:
|
|
199
|
+
logger.exception("Failed to report supervisor stats")
|
|
200
|
+
|
|
201
|
+
def _setup_signal_handlers(self) -> None:
|
|
202
|
+
if os.name == "nt":
|
|
203
|
+
return
|
|
204
|
+
loop = asyncio.get_running_loop()
|
|
205
|
+
for sig in (signal.SIGTERM, signal.SIGINT):
|
|
206
|
+
loop.add_signal_handler(sig, lambda: asyncio.create_task(self.stop()))
|
baqueue/worker.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""Worker - processes jobs from queues."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
import traceback
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from baqueue.drivers.base import BaseDriver
|
|
11
|
+
from baqueue.events import EventBus
|
|
12
|
+
from baqueue.job import Job, FunctionJob
|
|
13
|
+
from baqueue.retry import compute_delay, should_retry
|
|
14
|
+
from baqueue.serializer import JobPayload, resolve_job_class
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger("baqueue.worker")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Worker:
|
|
20
|
+
"""Pulls and executes jobs from one or more queues."""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
driver: BaseDriver,
|
|
25
|
+
queues: list[str],
|
|
26
|
+
events: EventBus | None = None,
|
|
27
|
+
sleep_interval: float = 1.0,
|
|
28
|
+
timeout: int = 60,
|
|
29
|
+
name: str = "worker-0",
|
|
30
|
+
):
|
|
31
|
+
self.driver = driver
|
|
32
|
+
self.queues = queues
|
|
33
|
+
self.events = events or EventBus.default()
|
|
34
|
+
self.sleep_interval = sleep_interval
|
|
35
|
+
self.timeout = timeout
|
|
36
|
+
self.name = name
|
|
37
|
+
self._running = False
|
|
38
|
+
self._current_job: JobPayload | None = None
|
|
39
|
+
self._jobs_processed = 0
|
|
40
|
+
self._jobs_failed = 0
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def is_running(self) -> bool:
|
|
44
|
+
return self._running
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def stats(self) -> dict[str, Any]:
|
|
48
|
+
return {
|
|
49
|
+
"name": self.name,
|
|
50
|
+
"queues": self.queues,
|
|
51
|
+
"running": self._running,
|
|
52
|
+
"current_job": self._current_job.id if self._current_job else None,
|
|
53
|
+
"jobs_processed": self._jobs_processed,
|
|
54
|
+
"jobs_failed": self._jobs_failed,
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
async def start(self) -> None:
|
|
58
|
+
"""Start the worker loop."""
|
|
59
|
+
self._running = True
|
|
60
|
+
await self.events.emit("worker.started", worker=self.name)
|
|
61
|
+
logger.info("Worker %s started on queues %s", self.name, self.queues)
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
while self._running:
|
|
65
|
+
job = await self._fetch_next()
|
|
66
|
+
if job:
|
|
67
|
+
await self._process(job)
|
|
68
|
+
else:
|
|
69
|
+
await asyncio.sleep(self.sleep_interval)
|
|
70
|
+
except asyncio.CancelledError:
|
|
71
|
+
pass
|
|
72
|
+
finally:
|
|
73
|
+
self._running = False
|
|
74
|
+
await self.events.emit("worker.stopped", worker=self.name)
|
|
75
|
+
logger.info("Worker %s stopped", self.name)
|
|
76
|
+
|
|
77
|
+
def stop(self) -> None:
|
|
78
|
+
self._running = False
|
|
79
|
+
|
|
80
|
+
async def _fetch_next(self) -> JobPayload | None:
|
|
81
|
+
for queue in self.queues:
|
|
82
|
+
job = await self.driver.pop(queue)
|
|
83
|
+
if job:
|
|
84
|
+
return job
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
async def _process(self, payload: JobPayload) -> None:
|
|
88
|
+
self._current_job = payload
|
|
89
|
+
job_timeout = payload.timeout or self.timeout
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
await self.events.emit("job.started", payload=payload, worker=self.name)
|
|
93
|
+
await self.driver.record_metric(payload.queue, "processing", 1)
|
|
94
|
+
logger.debug("Processing job %s (%s)", payload.id, payload.job_class)
|
|
95
|
+
|
|
96
|
+
job_instance = self._instantiate(payload)
|
|
97
|
+
result = await asyncio.wait_for(
|
|
98
|
+
job_instance.handle(**payload.data),
|
|
99
|
+
timeout=job_timeout,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
await self.driver.complete(payload)
|
|
103
|
+
await self.driver.record_metric(payload.queue, "completed", 1)
|
|
104
|
+
await self.events.emit("job.completed", payload=payload, result=result, worker=self.name)
|
|
105
|
+
self._jobs_processed += 1
|
|
106
|
+
|
|
107
|
+
if hasattr(job_instance, "on_success"):
|
|
108
|
+
try:
|
|
109
|
+
await job_instance.on_success(result, payload)
|
|
110
|
+
except Exception:
|
|
111
|
+
logger.exception("Error in on_success for job %s", payload.id)
|
|
112
|
+
|
|
113
|
+
await self._check_batch_completion(payload)
|
|
114
|
+
|
|
115
|
+
except Exception as exc:
|
|
116
|
+
error_msg = f"{type(exc).__name__}: {exc}\n{traceback.format_exc()}"
|
|
117
|
+
logger.warning("Job %s failed (attempt %d): %s", payload.id, payload.attempts, exc)
|
|
118
|
+
|
|
119
|
+
if should_retry(payload.attempts, payload.max_attempts):
|
|
120
|
+
delay = compute_delay(payload.backoff, payload.attempts)
|
|
121
|
+
await self.driver.release(payload, delay=delay)
|
|
122
|
+
await self.events.emit("job.retrying", payload=payload, error=error_msg, delay=delay)
|
|
123
|
+
else:
|
|
124
|
+
await self.driver.fail(payload, error_msg)
|
|
125
|
+
await self.driver.record_metric(payload.queue, "failed", 1)
|
|
126
|
+
await self.events.emit("job.failed", payload=payload, error=error_msg, worker=self.name)
|
|
127
|
+
self._jobs_failed += 1
|
|
128
|
+
|
|
129
|
+
job_instance = self._instantiate(payload)
|
|
130
|
+
try:
|
|
131
|
+
await job_instance.on_failure(exc, payload)
|
|
132
|
+
except Exception:
|
|
133
|
+
logger.exception("Error in on_failure for job %s", payload.id)
|
|
134
|
+
|
|
135
|
+
await self._check_batch_failure(payload)
|
|
136
|
+
finally:
|
|
137
|
+
self._current_job = None
|
|
138
|
+
|
|
139
|
+
def _instantiate(self, payload: JobPayload) -> Job:
|
|
140
|
+
cls = resolve_job_class(payload.job_class)
|
|
141
|
+
if isinstance(cls, FunctionJob):
|
|
142
|
+
return cls
|
|
143
|
+
return cls()
|
|
144
|
+
|
|
145
|
+
async def _check_batch_completion(self, payload: JobPayload) -> None:
|
|
146
|
+
if not payload.batch_id:
|
|
147
|
+
return
|
|
148
|
+
batch = await self.driver.increment_batch_counter(payload.batch_id, "completed_count", 1)
|
|
149
|
+
if not batch:
|
|
150
|
+
return
|
|
151
|
+
done = batch.get("completed_count", 0) + batch.get("failed_count", 0)
|
|
152
|
+
# Equality, not >=, so only the worker that pushed `done` to total fires the event.
|
|
153
|
+
if done == batch.get("total", 0):
|
|
154
|
+
await self.events.emit("batch.completed", batch_id=payload.batch_id, batch=batch)
|
|
155
|
+
|
|
156
|
+
async def _check_batch_failure(self, payload: JobPayload) -> None:
|
|
157
|
+
if not payload.batch_id:
|
|
158
|
+
return
|
|
159
|
+
batch = await self.driver.increment_batch_counter(payload.batch_id, "failed_count", 1)
|
|
160
|
+
if not batch:
|
|
161
|
+
return
|
|
162
|
+
# Fire batch.failed exactly once on the first failure (the increment that
|
|
163
|
+
# transitioned failed_count from 0 to 1).
|
|
164
|
+
if batch.get("allow_failures", False) is False and batch.get("failed_count", 0) == 1:
|
|
165
|
+
await self.events.emit("batch.failed", batch_id=payload.batch_id, batch=batch)
|