jqueue 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jqueue/__init__.py +95 -0
- jqueue/adapters/__init__.py +0 -0
- jqueue/adapters/storage/__init__.py +0 -0
- jqueue/adapters/storage/filesystem.py +108 -0
- jqueue/adapters/storage/gcs.py +130 -0
- jqueue/adapters/storage/memory.py +62 -0
- jqueue/adapters/storage/s3.py +135 -0
- jqueue/core/__init__.py +0 -0
- jqueue/core/broker.py +109 -0
- jqueue/core/codec.py +42 -0
- jqueue/core/direct.py +170 -0
- jqueue/core/group_commit.py +263 -0
- jqueue/core/heartbeat.py +90 -0
- jqueue/domain/__init__.py +0 -0
- jqueue/domain/errors.py +46 -0
- jqueue/domain/models.py +177 -0
- jqueue/ports/__init__.py +0 -0
- jqueue/ports/storage.py +81 -0
- jqueue-0.1.0.dist-info/METADATA +712 -0
- jqueue-0.1.0.dist-info/RECORD +22 -0
- jqueue-0.1.0.dist-info/WHEEL +4 -0
- jqueue-0.1.0.dist-info/licenses/LICENSE +21 -0
jqueue/core/direct.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DirectQueue — one CAS write per operation.
|
|
3
|
+
|
|
4
|
+
Every enqueue, dequeue, ack, nack, or heartbeat call does:
|
|
5
|
+
1. read current state + etag from storage
|
|
6
|
+
2. mutate state in memory
|
|
7
|
+
3. CAS write back with if_match=etag (retries on CASConflictError)
|
|
8
|
+
|
|
9
|
+
Suitable for ~1-5 ops/sec workloads depending on storage backend latency.
|
|
10
|
+
Use BrokerQueue for higher throughput.
|
|
11
|
+
|
|
12
|
+
Retry policy
|
|
13
|
+
------------
|
|
14
|
+
Operations retry up to `max_retries` times (default 10) on CASConflictError
|
|
15
|
+
with linear back-off (10ms × attempt). Raises CASConflictError if all retries
|
|
16
|
+
are exhausted.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import asyncio
|
|
22
|
+
import dataclasses
|
|
23
|
+
from collections.abc import Callable
|
|
24
|
+
from datetime import UTC, datetime, timedelta
|
|
25
|
+
|
|
26
|
+
from jqueue.core import codec
|
|
27
|
+
from jqueue.domain.errors import CASConflictError, JobNotFoundError
|
|
28
|
+
from jqueue.domain.models import Job, JobStatus, QueueState
|
|
29
|
+
from jqueue.ports.storage import ObjectStoragePort
|
|
30
|
+
|
|
31
|
+
MutationFn = Callable[[QueueState], QueueState]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclasses.dataclass
|
|
35
|
+
class DirectQueue:
|
|
36
|
+
"""
|
|
37
|
+
Thin stateless wrapper around ObjectStoragePort.
|
|
38
|
+
|
|
39
|
+
All methods are async and safe to call from multiple coroutines;
|
|
40
|
+
each operation performs a full CAS cycle independently.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
storage: ObjectStoragePort
|
|
44
|
+
max_retries: int = 10
|
|
45
|
+
|
|
46
|
+
# ------------------------------------------------------------------ #
|
|
47
|
+
# Write operations #
|
|
48
|
+
# ------------------------------------------------------------------ #
|
|
49
|
+
|
|
50
|
+
async def enqueue(
|
|
51
|
+
self,
|
|
52
|
+
entrypoint: str,
|
|
53
|
+
payload: bytes,
|
|
54
|
+
priority: int = 0,
|
|
55
|
+
) -> Job:
|
|
56
|
+
"""Add a new job to the queue. Returns the committed Job."""
|
|
57
|
+
job = Job.new(entrypoint, payload, priority)
|
|
58
|
+
await self._mutate(lambda state: state.with_job_added(job))
|
|
59
|
+
return job
|
|
60
|
+
|
|
61
|
+
async def dequeue(
|
|
62
|
+
self,
|
|
63
|
+
entrypoint: str | None = None,
|
|
64
|
+
*,
|
|
65
|
+
batch_size: int = 1,
|
|
66
|
+
) -> list[Job]:
|
|
67
|
+
"""
|
|
68
|
+
Claim up to `batch_size` QUEUED jobs and mark them IN_PROGRESS.
|
|
69
|
+
|
|
70
|
+
Optionally filter by entrypoint. Returns the list of claimed jobs.
|
|
71
|
+
Returns an empty list if no jobs are available.
|
|
72
|
+
"""
|
|
73
|
+
claimed: list[Job] = []
|
|
74
|
+
|
|
75
|
+
def _fn(state: QueueState) -> QueueState:
|
|
76
|
+
nonlocal claimed
|
|
77
|
+
available = state.queued_jobs(entrypoint)[:batch_size]
|
|
78
|
+
claimed = []
|
|
79
|
+
new_state = state
|
|
80
|
+
for job in available:
|
|
81
|
+
updated = job.with_status(JobStatus.IN_PROGRESS).with_heartbeat(
|
|
82
|
+
datetime.now(UTC)
|
|
83
|
+
)
|
|
84
|
+
new_state = new_state.with_job_replaced(updated)
|
|
85
|
+
claimed.append(updated)
|
|
86
|
+
return new_state
|
|
87
|
+
|
|
88
|
+
await self._mutate(_fn)
|
|
89
|
+
return claimed
|
|
90
|
+
|
|
91
|
+
async def ack(self, job_id: str) -> None:
|
|
92
|
+
"""Mark a job as done and remove it from the queue."""
|
|
93
|
+
await self._mutate(lambda state: state.with_job_removed(job_id))
|
|
94
|
+
|
|
95
|
+
async def nack(self, job_id: str) -> None:
|
|
96
|
+
"""Return a job to QUEUED status (worker failed or declined it)."""
|
|
97
|
+
|
|
98
|
+
def _fn(state: QueueState) -> QueueState:
|
|
99
|
+
job = state.find(job_id)
|
|
100
|
+
if job is None:
|
|
101
|
+
raise JobNotFoundError(job_id)
|
|
102
|
+
return state.with_job_replaced(
|
|
103
|
+
job.with_status(JobStatus.QUEUED).with_heartbeat(None)
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
await self._mutate(_fn)
|
|
107
|
+
|
|
108
|
+
async def heartbeat(self, job_id: str) -> None:
|
|
109
|
+
"""Update the heartbeat timestamp of an IN_PROGRESS job."""
|
|
110
|
+
|
|
111
|
+
def _fn(state: QueueState) -> QueueState:
|
|
112
|
+
job = state.find(job_id)
|
|
113
|
+
if job is None:
|
|
114
|
+
raise JobNotFoundError(job_id)
|
|
115
|
+
return state.with_job_replaced(job.with_heartbeat(datetime.now(UTC)))
|
|
116
|
+
|
|
117
|
+
await self._mutate(_fn)
|
|
118
|
+
|
|
119
|
+
async def requeue_stale(self, timeout: timedelta) -> int:
|
|
120
|
+
"""
|
|
121
|
+
Re-queue any IN_PROGRESS jobs whose heartbeat is older than `timeout`.
|
|
122
|
+
|
|
123
|
+
Returns the number of jobs re-queued.
|
|
124
|
+
"""
|
|
125
|
+
cutoff = datetime.now(UTC) - timeout
|
|
126
|
+
requeued = 0
|
|
127
|
+
|
|
128
|
+
def _fn(state: QueueState) -> QueueState:
|
|
129
|
+
nonlocal requeued
|
|
130
|
+
old_in_progress = {j.id for j in state.in_progress_jobs()}
|
|
131
|
+
new_state = state.requeue_stale(cutoff)
|
|
132
|
+
new_in_progress = {j.id for j in new_state.in_progress_jobs()}
|
|
133
|
+
requeued = len(old_in_progress - new_in_progress)
|
|
134
|
+
return new_state
|
|
135
|
+
|
|
136
|
+
await self._mutate(_fn)
|
|
137
|
+
return requeued
|
|
138
|
+
|
|
139
|
+
# ------------------------------------------------------------------ #
|
|
140
|
+
# Read operations (no CAS needed) #
|
|
141
|
+
# ------------------------------------------------------------------ #
|
|
142
|
+
|
|
143
|
+
async def read_state(self) -> QueueState:
|
|
144
|
+
"""Read-only snapshot of the current queue state."""
|
|
145
|
+
content, _ = await self.storage.read()
|
|
146
|
+
return codec.decode(content)
|
|
147
|
+
|
|
148
|
+
# ------------------------------------------------------------------ #
|
|
149
|
+
# Internal CAS loop #
|
|
150
|
+
# ------------------------------------------------------------------ #
|
|
151
|
+
|
|
152
|
+
async def _mutate(self, fn: MutationFn) -> None:
|
|
153
|
+
"""
|
|
154
|
+
Read-modify-write with CAS retry loop.
|
|
155
|
+
|
|
156
|
+
fn(state) -> new_state (synchronous)
|
|
157
|
+
Retries up to self.max_retries on CASConflictError.
|
|
158
|
+
"""
|
|
159
|
+
for attempt in range(self.max_retries):
|
|
160
|
+
content, etag = await self.storage.read()
|
|
161
|
+
state = codec.decode(content)
|
|
162
|
+
new_state = fn(state)
|
|
163
|
+
new_content = codec.encode(new_state)
|
|
164
|
+
try:
|
|
165
|
+
await self.storage.write(new_content, if_match=etag)
|
|
166
|
+
return
|
|
167
|
+
except CASConflictError:
|
|
168
|
+
if attempt == self.max_retries - 1:
|
|
169
|
+
raise
|
|
170
|
+
await asyncio.sleep(0.01 * (attempt + 1))
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
"""
|
|
2
|
+
GroupCommitLoop — serialize all mutations through a single asyncio writer task.
|
|
3
|
+
|
|
4
|
+
Algorithm (from the turbopuffer blog post)
|
|
5
|
+
------------------------------------------
|
|
6
|
+
When a write is in-flight, incoming operations accumulate in the pending buffer.
|
|
7
|
+
As soon as the write finishes, the buffer is flushed as the next CAS write.
|
|
8
|
+
This collapses N concurrent operations into O(1) storage writes.
|
|
9
|
+
|
|
10
|
+
Concretely:
|
|
11
|
+
|
|
12
|
+
Caller 1: enqueue() ──────────────────────────────────────────> [future]
|
|
13
|
+
Caller 2: enqueue() ──────────────────────────────────────────> [future]
|
|
14
|
+
Caller 3: dequeue() ──────────────────────────────────────────> [future]
|
|
15
|
+
↓ batch = [op1, op2, op3]
|
|
16
|
+
Writer: read → apply op1, op2, op3 → CAS write → resolve futures
|
|
17
|
+
|
|
18
|
+
If the CAS write fails (concurrent external writer), the whole batch is
|
|
19
|
+
re-applied on a fresh state and retried.
|
|
20
|
+
|
|
21
|
+
Per-operation error isolation
|
|
22
|
+
------------------------------
|
|
23
|
+
If one mutation in a batch raises (e.g., JobNotFoundError), that future gets
|
|
24
|
+
the exception but the other mutations in the batch still commit normally.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import asyncio
|
|
30
|
+
import dataclasses
|
|
31
|
+
from collections.abc import Callable
|
|
32
|
+
from datetime import UTC, datetime, timedelta
|
|
33
|
+
|
|
34
|
+
from jqueue.core import codec
|
|
35
|
+
from jqueue.domain.errors import CASConflictError, JQueueError
|
|
36
|
+
from jqueue.domain.models import Job, JobStatus, QueueState
|
|
37
|
+
from jqueue.ports.storage import ObjectStoragePort
|
|
38
|
+
|
|
39
|
+
MutationFn = Callable[[QueueState], QueueState]
|
|
40
|
+
|
|
41
|
+
_MAX_RETRIES: int = 20
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclasses.dataclass
|
|
45
|
+
class _PendingOp:
|
|
46
|
+
"""A buffered mutation waiting to be committed to object storage."""
|
|
47
|
+
|
|
48
|
+
fn: MutationFn
|
|
49
|
+
future: asyncio.Future[None]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclasses.dataclass
|
|
53
|
+
class GroupCommitLoop:
|
|
54
|
+
"""
|
|
55
|
+
Serializes all storage mutations through a single asyncio writer task.
|
|
56
|
+
|
|
57
|
+
Usage
|
|
58
|
+
-----
|
|
59
|
+
loop = GroupCommitLoop(storage=my_storage)
|
|
60
|
+
await loop.start()
|
|
61
|
+
try:
|
|
62
|
+
job = await loop.enqueue("send_email", b"payload")
|
|
63
|
+
jobs = await loop.dequeue("send_email", batch_size=5)
|
|
64
|
+
finally:
|
|
65
|
+
await loop.stop() # drains pending ops before shutting down
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
storage: ObjectStoragePort
|
|
69
|
+
stale_timeout: timedelta = timedelta(minutes=5)
|
|
70
|
+
|
|
71
|
+
_pending: list[_PendingOp] = dataclasses.field(
|
|
72
|
+
default_factory=list, init=False, repr=False
|
|
73
|
+
)
|
|
74
|
+
_wakeup: asyncio.Event = dataclasses.field(
|
|
75
|
+
default_factory=asyncio.Event, init=False, repr=False
|
|
76
|
+
)
|
|
77
|
+
_task: asyncio.Task[None] | None = dataclasses.field(
|
|
78
|
+
default=None, init=False, repr=False
|
|
79
|
+
)
|
|
80
|
+
_stopped: bool = dataclasses.field(default=False, init=False, repr=False)
|
|
81
|
+
|
|
82
|
+
# ------------------------------------------------------------------ #
|
|
83
|
+
# Lifecycle #
|
|
84
|
+
# ------------------------------------------------------------------ #
|
|
85
|
+
|
|
86
|
+
async def start(self) -> None:
|
|
87
|
+
"""Start the background writer task."""
|
|
88
|
+
if self._task is not None:
|
|
89
|
+
raise RuntimeError("GroupCommitLoop is already running")
|
|
90
|
+
self._task = asyncio.create_task(
|
|
91
|
+
self._writer_loop(), name="jqueue-group-commit-writer"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
async def stop(self) -> None:
|
|
95
|
+
"""Signal shutdown and wait for the writer to drain pending ops."""
|
|
96
|
+
self._stopped = True
|
|
97
|
+
self._wakeup.set()
|
|
98
|
+
if self._task is not None:
|
|
99
|
+
await self._task
|
|
100
|
+
self._task = None
|
|
101
|
+
|
|
102
|
+
# ------------------------------------------------------------------ #
|
|
103
|
+
# Public mutation API #
|
|
104
|
+
# ------------------------------------------------------------------ #
|
|
105
|
+
|
|
106
|
+
async def enqueue(
|
|
107
|
+
self,
|
|
108
|
+
entrypoint: str,
|
|
109
|
+
payload: bytes,
|
|
110
|
+
priority: int = 0,
|
|
111
|
+
) -> Job:
|
|
112
|
+
"""Add a new job. Returns the committed Job (UUID stable across retries)."""
|
|
113
|
+
job = Job.new(entrypoint, payload, priority)
|
|
114
|
+
|
|
115
|
+
def _fn(state: QueueState) -> QueueState:
|
|
116
|
+
return state.with_job_added(job)
|
|
117
|
+
|
|
118
|
+
await self._submit(_fn)
|
|
119
|
+
return job
|
|
120
|
+
|
|
121
|
+
async def dequeue(
|
|
122
|
+
self,
|
|
123
|
+
entrypoint: str | None = None,
|
|
124
|
+
*,
|
|
125
|
+
batch_size: int = 1,
|
|
126
|
+
) -> list[Job]:
|
|
127
|
+
"""Claim up to batch_size QUEUED jobs and mark them IN_PROGRESS."""
|
|
128
|
+
claimed: list[Job] = []
|
|
129
|
+
|
|
130
|
+
def _fn(state: QueueState) -> QueueState:
|
|
131
|
+
nonlocal claimed
|
|
132
|
+
available = state.queued_jobs(entrypoint)[:batch_size]
|
|
133
|
+
claimed = []
|
|
134
|
+
new_state = state
|
|
135
|
+
for job in available:
|
|
136
|
+
updated = job.with_status(JobStatus.IN_PROGRESS).with_heartbeat(
|
|
137
|
+
datetime.now(UTC)
|
|
138
|
+
)
|
|
139
|
+
new_state = new_state.with_job_replaced(updated)
|
|
140
|
+
claimed.append(updated)
|
|
141
|
+
return new_state
|
|
142
|
+
|
|
143
|
+
await self._submit(_fn)
|
|
144
|
+
return claimed
|
|
145
|
+
|
|
146
|
+
async def ack(self, job_id: str) -> None:
|
|
147
|
+
"""Remove a completed job from the queue."""
|
|
148
|
+
await self._submit(lambda state: state.with_job_removed(job_id))
|
|
149
|
+
|
|
150
|
+
async def nack(self, job_id: str) -> None:
|
|
151
|
+
"""Return a job to QUEUED status."""
|
|
152
|
+
from jqueue.domain.errors import JobNotFoundError
|
|
153
|
+
|
|
154
|
+
def _fn(state: QueueState) -> QueueState:
|
|
155
|
+
job = state.find(job_id)
|
|
156
|
+
if job is None:
|
|
157
|
+
raise JobNotFoundError(job_id)
|
|
158
|
+
return state.with_job_replaced(
|
|
159
|
+
job.with_status(JobStatus.QUEUED).with_heartbeat(None)
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
await self._submit(_fn)
|
|
163
|
+
|
|
164
|
+
async def heartbeat(self, job_id: str) -> None:
|
|
165
|
+
"""Refresh the heartbeat timestamp for an IN_PROGRESS job."""
|
|
166
|
+
from jqueue.domain.errors import JobNotFoundError
|
|
167
|
+
|
|
168
|
+
def _fn(state: QueueState) -> QueueState:
|
|
169
|
+
job = state.find(job_id)
|
|
170
|
+
if job is None:
|
|
171
|
+
raise JobNotFoundError(job_id)
|
|
172
|
+
return state.with_job_replaced(job.with_heartbeat(datetime.now(UTC)))
|
|
173
|
+
|
|
174
|
+
await self._submit(_fn)
|
|
175
|
+
|
|
176
|
+
async def read_state(self) -> QueueState:
|
|
177
|
+
"""Read-only snapshot of current queue state (bypasses the write pipeline)."""
|
|
178
|
+
content, _ = await self.storage.read()
|
|
179
|
+
return codec.decode(content)
|
|
180
|
+
|
|
181
|
+
# ------------------------------------------------------------------ #
|
|
182
|
+
# Internal machinery #
|
|
183
|
+
# ------------------------------------------------------------------ #
|
|
184
|
+
|
|
185
|
+
async def _submit(self, fn: MutationFn) -> None:
|
|
186
|
+
"""
|
|
187
|
+
Enqueue a mutation and block until it is committed.
|
|
188
|
+
|
|
189
|
+
Appends the op to _pending, wakes the writer, then awaits the future
|
|
190
|
+
that resolves when the batch containing this op successfully commits.
|
|
191
|
+
"""
|
|
192
|
+
if self._stopped:
|
|
193
|
+
raise JQueueError("GroupCommitLoop is stopped")
|
|
194
|
+
future: asyncio.Future[None] = asyncio.get_running_loop().create_future()
|
|
195
|
+
self._pending.append(_PendingOp(fn=fn, future=future))
|
|
196
|
+
self._wakeup.set()
|
|
197
|
+
await future
|
|
198
|
+
|
|
199
|
+
async def _writer_loop(self) -> None:
|
|
200
|
+
"""Background coroutine — runs until stopped and all pending ops drain."""
|
|
201
|
+
while not self._stopped or self._pending:
|
|
202
|
+
if not self._pending:
|
|
203
|
+
self._wakeup.clear()
|
|
204
|
+
await self._wakeup.wait()
|
|
205
|
+
|
|
206
|
+
if not self._pending:
|
|
207
|
+
continue
|
|
208
|
+
|
|
209
|
+
batch = list(self._pending)
|
|
210
|
+
self._pending.clear()
|
|
211
|
+
await self._commit_batch(batch)
|
|
212
|
+
|
|
213
|
+
async def _commit_batch(self, batch: list[_PendingOp]) -> None:
|
|
214
|
+
"""
|
|
215
|
+
Apply all ops in `batch` to the current state and CAS write.
|
|
216
|
+
|
|
217
|
+
Retries on CASConflictError. Per-mutation exceptions only fail that
|
|
218
|
+
op's future; the rest of the batch still commits on the same write.
|
|
219
|
+
"""
|
|
220
|
+
for attempt in range(_MAX_RETRIES):
|
|
221
|
+
try:
|
|
222
|
+
content, etag = await self.storage.read()
|
|
223
|
+
state = codec.decode(content)
|
|
224
|
+
|
|
225
|
+
# Sweep stale jobs on every write cycle (free — no extra I/O)
|
|
226
|
+
cutoff = datetime.now(UTC) - self.stale_timeout
|
|
227
|
+
state = state.requeue_stale(cutoff)
|
|
228
|
+
|
|
229
|
+
per_op_errors: dict[int, Exception] = {}
|
|
230
|
+
for i, op in enumerate(batch):
|
|
231
|
+
try:
|
|
232
|
+
state = op.fn(state)
|
|
233
|
+
except Exception as exc:
|
|
234
|
+
per_op_errors[i] = exc
|
|
235
|
+
|
|
236
|
+
await self.storage.write(codec.encode(state), if_match=etag)
|
|
237
|
+
|
|
238
|
+
for i, op in enumerate(batch):
|
|
239
|
+
if op.future.done():
|
|
240
|
+
continue
|
|
241
|
+
if i in per_op_errors:
|
|
242
|
+
op.future.set_exception(per_op_errors[i])
|
|
243
|
+
else:
|
|
244
|
+
op.future.set_result(None)
|
|
245
|
+
return
|
|
246
|
+
|
|
247
|
+
except CASConflictError:
|
|
248
|
+
if attempt == _MAX_RETRIES - 1:
|
|
249
|
+
_fail_batch(batch, CASConflictError("Max CAS retries exceeded"))
|
|
250
|
+
return
|
|
251
|
+
# Exponential back-off, capped at ~320 ms
|
|
252
|
+
await asyncio.sleep(0.005 * (2 ** min(attempt, 6)))
|
|
253
|
+
|
|
254
|
+
except Exception as exc:
|
|
255
|
+
_fail_batch(batch, exc)
|
|
256
|
+
return
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _fail_batch(batch: list[_PendingOp], exc: Exception) -> None:
|
|
260
|
+
"""Set exception on all unresolved futures in the batch."""
|
|
261
|
+
for op in batch:
|
|
262
|
+
if not op.future.done():
|
|
263
|
+
op.future.set_exception(exc)
|
jqueue/core/heartbeat.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""
|
|
2
|
+
HeartbeatManager — async context manager for background heartbeats.
|
|
3
|
+
|
|
4
|
+
A worker holding a long-running job wraps its work in HeartbeatManager
|
|
5
|
+
to send periodic heartbeats, preventing the broker from re-queueing the
|
|
6
|
+
job as stale.
|
|
7
|
+
|
|
8
|
+
Usage
|
|
9
|
+
-----
|
|
10
|
+
async with BrokerQueue(storage) as q:
|
|
11
|
+
[job] = await q.dequeue("process_video")
|
|
12
|
+
|
|
13
|
+
async with HeartbeatManager(q, job.id, interval=timedelta(seconds=30)):
|
|
14
|
+
result = await do_long_work(job.payload)
|
|
15
|
+
|
|
16
|
+
await q.ack(job.id)
|
|
17
|
+
|
|
18
|
+
If the worker raises, the heartbeat task is cancelled. Callers should
|
|
19
|
+
nack() the job in an except/finally block.
|
|
20
|
+
|
|
21
|
+
HeartbeatManager is typed against the structural Protocol _HasHeartbeat, so
|
|
22
|
+
it works with BrokerQueue, DirectQueue, and GroupCommitLoop without any
|
|
23
|
+
shared base class.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import asyncio
|
|
29
|
+
import dataclasses
|
|
30
|
+
from datetime import timedelta
|
|
31
|
+
from types import TracebackType
|
|
32
|
+
from typing import Protocol
|
|
33
|
+
|
|
34
|
+
from jqueue.domain.errors import JQueueError
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class _HasHeartbeat(Protocol):
|
|
38
|
+
"""Structural Protocol — any object with an async heartbeat(job_id) method."""
|
|
39
|
+
|
|
40
|
+
async def heartbeat(self, job_id: str) -> None: ...
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclasses.dataclass
|
|
44
|
+
class HeartbeatManager:
|
|
45
|
+
"""
|
|
46
|
+
Sends periodic heartbeats for a single job.
|
|
47
|
+
|
|
48
|
+
Parameters
|
|
49
|
+
----------
|
|
50
|
+
queue : any object with async heartbeat(job_id: str) -> None
|
|
51
|
+
job_id : the job to keep alive
|
|
52
|
+
interval : time between heartbeat sends (default 60 seconds)
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
queue: _HasHeartbeat
|
|
56
|
+
job_id: str
|
|
57
|
+
interval: timedelta = timedelta(seconds=60)
|
|
58
|
+
|
|
59
|
+
_task: asyncio.Task[None] | None = dataclasses.field(
|
|
60
|
+
default=None, init=False, repr=False
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
async def __aenter__(self) -> HeartbeatManager:
|
|
64
|
+
self._task = asyncio.create_task(
|
|
65
|
+
self._beat(), name=f"jqueue-heartbeat-{self.job_id}"
|
|
66
|
+
)
|
|
67
|
+
return self
|
|
68
|
+
|
|
69
|
+
async def __aexit__(
|
|
70
|
+
self,
|
|
71
|
+
exc_type: type[BaseException] | None,
|
|
72
|
+
exc_val: BaseException | None,
|
|
73
|
+
exc_tb: TracebackType | None,
|
|
74
|
+
) -> None:
|
|
75
|
+
if self._task is not None:
|
|
76
|
+
self._task.cancel()
|
|
77
|
+
try:
|
|
78
|
+
await self._task
|
|
79
|
+
except asyncio.CancelledError:
|
|
80
|
+
pass
|
|
81
|
+
self._task = None
|
|
82
|
+
|
|
83
|
+
async def _beat(self) -> None:
|
|
84
|
+
while True:
|
|
85
|
+
await asyncio.sleep(self.interval.total_seconds())
|
|
86
|
+
try:
|
|
87
|
+
await self.queue.heartbeat(self.job_id)
|
|
88
|
+
except JQueueError:
|
|
89
|
+
# Job was acked or removed — stop silently.
|
|
90
|
+
return
|
|
File without changes
|
jqueue/domain/errors.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Exception hierarchy for jqueue.
|
|
3
|
+
|
|
4
|
+
JQueueError
|
|
5
|
+
├── CASConflictError — write rejected because etag did not match
|
|
6
|
+
├── JobNotFoundError — job_id not present in current QueueState
|
|
7
|
+
└── StorageError — underlying I/O failure (wraps original exception)
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class JQueueError(Exception):
|
|
14
|
+
"""Base class for all jqueue exceptions."""
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CASConflictError(JQueueError):
|
|
18
|
+
"""
|
|
19
|
+
Raised when a compare-and-set write is rejected by the storage backend.
|
|
20
|
+
|
|
21
|
+
The caller should re-read the current state and retry the operation.
|
|
22
|
+
This is the normal concurrency signal — not an error in the traditional sense.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class JobNotFoundError(JQueueError):
|
|
27
|
+
"""Raised when a job_id is not present in the current QueueState."""
|
|
28
|
+
|
|
29
|
+
def __init__(self, job_id: str) -> None:
|
|
30
|
+
self.job_id = job_id
|
|
31
|
+
super().__init__(f"Job {job_id!r} not found in queue state")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class StorageError(JQueueError):
|
|
35
|
+
"""
|
|
36
|
+
Wraps an underlying I/O failure from a storage adapter.
|
|
37
|
+
|
|
38
|
+
Attributes
|
|
39
|
+
----------
|
|
40
|
+
cause : Exception
|
|
41
|
+
The original exception from the storage backend.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self, message: str, cause: Exception) -> None:
|
|
45
|
+
self.cause = cause
|
|
46
|
+
super().__init__(f"{message}: {cause}")
|