jqueue 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
jqueue/core/direct.py ADDED
@@ -0,0 +1,170 @@
1
+ """
2
+ DirectQueue — one CAS write per operation.
3
+
4
+ Every enqueue, dequeue, ack, nack, or heartbeat call does:
5
+ 1. read current state + etag from storage
6
+ 2. mutate state in memory
7
+ 3. CAS write back with if_match=etag (retries on CASConflictError)
8
+
9
+ Suitable for ~1-5 ops/sec workloads depending on storage backend latency.
10
+ Use BrokerQueue for higher throughput.
11
+
12
+ Retry policy
13
+ ------------
14
+ Operations retry up to `max_retries` times (default 10) on CASConflictError
15
+ with linear back-off (10ms × attempt). Raises CASConflictError if all retries
16
+ are exhausted.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import asyncio
22
+ import dataclasses
23
+ from collections.abc import Callable
24
+ from datetime import UTC, datetime, timedelta
25
+
26
+ from jqueue.core import codec
27
+ from jqueue.domain.errors import CASConflictError, JobNotFoundError
28
+ from jqueue.domain.models import Job, JobStatus, QueueState
29
+ from jqueue.ports.storage import ObjectStoragePort
30
+
31
+ MutationFn = Callable[[QueueState], QueueState]
32
+
33
+
34
+ @dataclasses.dataclass
35
+ class DirectQueue:
36
+ """
37
+ Thin stateless wrapper around ObjectStoragePort.
38
+
39
+ All methods are async and safe to call from multiple coroutines;
40
+ each operation performs a full CAS cycle independently.
41
+ """
42
+
43
+ storage: ObjectStoragePort
44
+ max_retries: int = 10
45
+
46
+ # ------------------------------------------------------------------ #
47
+ # Write operations #
48
+ # ------------------------------------------------------------------ #
49
+
50
+ async def enqueue(
51
+ self,
52
+ entrypoint: str,
53
+ payload: bytes,
54
+ priority: int = 0,
55
+ ) -> Job:
56
+ """Add a new job to the queue. Returns the committed Job."""
57
+ job = Job.new(entrypoint, payload, priority)
58
+ await self._mutate(lambda state: state.with_job_added(job))
59
+ return job
60
+
61
+ async def dequeue(
62
+ self,
63
+ entrypoint: str | None = None,
64
+ *,
65
+ batch_size: int = 1,
66
+ ) -> list[Job]:
67
+ """
68
+ Claim up to `batch_size` QUEUED jobs and mark them IN_PROGRESS.
69
+
70
+ Optionally filter by entrypoint. Returns the list of claimed jobs.
71
+ Returns an empty list if no jobs are available.
72
+ """
73
+ claimed: list[Job] = []
74
+
75
+ def _fn(state: QueueState) -> QueueState:
76
+ nonlocal claimed
77
+ available = state.queued_jobs(entrypoint)[:batch_size]
78
+ claimed = []
79
+ new_state = state
80
+ for job in available:
81
+ updated = job.with_status(JobStatus.IN_PROGRESS).with_heartbeat(
82
+ datetime.now(UTC)
83
+ )
84
+ new_state = new_state.with_job_replaced(updated)
85
+ claimed.append(updated)
86
+ return new_state
87
+
88
+ await self._mutate(_fn)
89
+ return claimed
90
+
91
+ async def ack(self, job_id: str) -> None:
92
+ """Mark a job as done and remove it from the queue."""
93
+ await self._mutate(lambda state: state.with_job_removed(job_id))
94
+
95
+ async def nack(self, job_id: str) -> None:
96
+ """Return a job to QUEUED status (worker failed or declined it)."""
97
+
98
+ def _fn(state: QueueState) -> QueueState:
99
+ job = state.find(job_id)
100
+ if job is None:
101
+ raise JobNotFoundError(job_id)
102
+ return state.with_job_replaced(
103
+ job.with_status(JobStatus.QUEUED).with_heartbeat(None)
104
+ )
105
+
106
+ await self._mutate(_fn)
107
+
108
+ async def heartbeat(self, job_id: str) -> None:
109
+ """Update the heartbeat timestamp of an IN_PROGRESS job."""
110
+
111
+ def _fn(state: QueueState) -> QueueState:
112
+ job = state.find(job_id)
113
+ if job is None:
114
+ raise JobNotFoundError(job_id)
115
+ return state.with_job_replaced(job.with_heartbeat(datetime.now(UTC)))
116
+
117
+ await self._mutate(_fn)
118
+
119
+ async def requeue_stale(self, timeout: timedelta) -> int:
120
+ """
121
+ Re-queue any IN_PROGRESS jobs whose heartbeat is older than `timeout`.
122
+
123
+ Returns the number of jobs re-queued.
124
+ """
125
+ cutoff = datetime.now(UTC) - timeout
126
+ requeued = 0
127
+
128
+ def _fn(state: QueueState) -> QueueState:
129
+ nonlocal requeued
130
+ old_in_progress = {j.id for j in state.in_progress_jobs()}
131
+ new_state = state.requeue_stale(cutoff)
132
+ new_in_progress = {j.id for j in new_state.in_progress_jobs()}
133
+ requeued = len(old_in_progress - new_in_progress)
134
+ return new_state
135
+
136
+ await self._mutate(_fn)
137
+ return requeued
138
+
139
+ # ------------------------------------------------------------------ #
140
+ # Read operations (no CAS needed) #
141
+ # ------------------------------------------------------------------ #
142
+
143
+ async def read_state(self) -> QueueState:
144
+ """Read-only snapshot of the current queue state."""
145
+ content, _ = await self.storage.read()
146
+ return codec.decode(content)
147
+
148
+ # ------------------------------------------------------------------ #
149
+ # Internal CAS loop #
150
+ # ------------------------------------------------------------------ #
151
+
152
+ async def _mutate(self, fn: MutationFn) -> None:
153
+ """
154
+ Read-modify-write with CAS retry loop.
155
+
156
+ fn(state) -> new_state (synchronous)
157
+ Retries up to self.max_retries on CASConflictError.
158
+ """
159
+ for attempt in range(self.max_retries):
160
+ content, etag = await self.storage.read()
161
+ state = codec.decode(content)
162
+ new_state = fn(state)
163
+ new_content = codec.encode(new_state)
164
+ try:
165
+ await self.storage.write(new_content, if_match=etag)
166
+ return
167
+ except CASConflictError:
168
+ if attempt == self.max_retries - 1:
169
+ raise
170
+ await asyncio.sleep(0.01 * (attempt + 1))
@@ -0,0 +1,263 @@
1
+ """
2
+ GroupCommitLoop — serialize all mutations through a single asyncio writer task.
3
+
4
+ Algorithm (from the turbopuffer blog post)
5
+ ------------------------------------------
6
+ When a write is in-flight, incoming operations accumulate in the pending buffer.
7
+ As soon as the write finishes, the buffer is flushed as the next CAS write.
8
+ This collapses N concurrent operations into O(1) storage writes.
9
+
10
+ Concretely:
11
+
12
+ Caller 1: enqueue() ──────────────────────────────────────────> [future]
13
+ Caller 2: enqueue() ──────────────────────────────────────────> [future]
14
+ Caller 3: dequeue() ──────────────────────────────────────────> [future]
15
+ ↓ batch = [op1, op2, op3]
16
+ Writer: read → apply op1, op2, op3 → CAS write → resolve futures
17
+
18
+ If the CAS write fails (concurrent external writer), the whole batch is
19
+ re-applied on a fresh state and retried.
20
+
21
+ Per-operation error isolation
22
+ ------------------------------
23
+ If one mutation in a batch raises (e.g., JobNotFoundError), that future gets
24
+ the exception but the other mutations in the batch still commit normally.
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import asyncio
30
+ import dataclasses
31
+ from collections.abc import Callable
32
+ from datetime import UTC, datetime, timedelta
33
+
34
+ from jqueue.core import codec
35
+ from jqueue.domain.errors import CASConflictError, JQueueError
36
+ from jqueue.domain.models import Job, JobStatus, QueueState
37
+ from jqueue.ports.storage import ObjectStoragePort
38
+
39
+ MutationFn = Callable[[QueueState], QueueState]
40
+
41
+ _MAX_RETRIES: int = 20
42
+
43
+
44
+ @dataclasses.dataclass
45
+ class _PendingOp:
46
+ """A buffered mutation waiting to be committed to object storage."""
47
+
48
+ fn: MutationFn
49
+ future: asyncio.Future[None]
50
+
51
+
52
+ @dataclasses.dataclass
53
+ class GroupCommitLoop:
54
+ """
55
+ Serializes all storage mutations through a single asyncio writer task.
56
+
57
+ Usage
58
+ -----
59
+ loop = GroupCommitLoop(storage=my_storage)
60
+ await loop.start()
61
+ try:
62
+ job = await loop.enqueue("send_email", b"payload")
63
+ jobs = await loop.dequeue("send_email", batch_size=5)
64
+ finally:
65
+ await loop.stop() # drains pending ops before shutting down
66
+ """
67
+
68
+ storage: ObjectStoragePort
69
+ stale_timeout: timedelta = timedelta(minutes=5)
70
+
71
+ _pending: list[_PendingOp] = dataclasses.field(
72
+ default_factory=list, init=False, repr=False
73
+ )
74
+ _wakeup: asyncio.Event = dataclasses.field(
75
+ default_factory=asyncio.Event, init=False, repr=False
76
+ )
77
+ _task: asyncio.Task[None] | None = dataclasses.field(
78
+ default=None, init=False, repr=False
79
+ )
80
+ _stopped: bool = dataclasses.field(default=False, init=False, repr=False)
81
+
82
+ # ------------------------------------------------------------------ #
83
+ # Lifecycle #
84
+ # ------------------------------------------------------------------ #
85
+
86
+ async def start(self) -> None:
87
+ """Start the background writer task."""
88
+ if self._task is not None:
89
+ raise RuntimeError("GroupCommitLoop is already running")
90
+ self._task = asyncio.create_task(
91
+ self._writer_loop(), name="jqueue-group-commit-writer"
92
+ )
93
+
94
+ async def stop(self) -> None:
95
+ """Signal shutdown and wait for the writer to drain pending ops."""
96
+ self._stopped = True
97
+ self._wakeup.set()
98
+ if self._task is not None:
99
+ await self._task
100
+ self._task = None
101
+
102
+ # ------------------------------------------------------------------ #
103
+ # Public mutation API #
104
+ # ------------------------------------------------------------------ #
105
+
106
+ async def enqueue(
107
+ self,
108
+ entrypoint: str,
109
+ payload: bytes,
110
+ priority: int = 0,
111
+ ) -> Job:
112
+ """Add a new job. Returns the committed Job (UUID stable across retries)."""
113
+ job = Job.new(entrypoint, payload, priority)
114
+
115
+ def _fn(state: QueueState) -> QueueState:
116
+ return state.with_job_added(job)
117
+
118
+ await self._submit(_fn)
119
+ return job
120
+
121
+ async def dequeue(
122
+ self,
123
+ entrypoint: str | None = None,
124
+ *,
125
+ batch_size: int = 1,
126
+ ) -> list[Job]:
127
+ """Claim up to batch_size QUEUED jobs and mark them IN_PROGRESS."""
128
+ claimed: list[Job] = []
129
+
130
+ def _fn(state: QueueState) -> QueueState:
131
+ nonlocal claimed
132
+ available = state.queued_jobs(entrypoint)[:batch_size]
133
+ claimed = []
134
+ new_state = state
135
+ for job in available:
136
+ updated = job.with_status(JobStatus.IN_PROGRESS).with_heartbeat(
137
+ datetime.now(UTC)
138
+ )
139
+ new_state = new_state.with_job_replaced(updated)
140
+ claimed.append(updated)
141
+ return new_state
142
+
143
+ await self._submit(_fn)
144
+ return claimed
145
+
146
+ async def ack(self, job_id: str) -> None:
147
+ """Remove a completed job from the queue."""
148
+ await self._submit(lambda state: state.with_job_removed(job_id))
149
+
150
+ async def nack(self, job_id: str) -> None:
151
+ """Return a job to QUEUED status."""
152
+ from jqueue.domain.errors import JobNotFoundError
153
+
154
+ def _fn(state: QueueState) -> QueueState:
155
+ job = state.find(job_id)
156
+ if job is None:
157
+ raise JobNotFoundError(job_id)
158
+ return state.with_job_replaced(
159
+ job.with_status(JobStatus.QUEUED).with_heartbeat(None)
160
+ )
161
+
162
+ await self._submit(_fn)
163
+
164
+ async def heartbeat(self, job_id: str) -> None:
165
+ """Refresh the heartbeat timestamp for an IN_PROGRESS job."""
166
+ from jqueue.domain.errors import JobNotFoundError
167
+
168
+ def _fn(state: QueueState) -> QueueState:
169
+ job = state.find(job_id)
170
+ if job is None:
171
+ raise JobNotFoundError(job_id)
172
+ return state.with_job_replaced(job.with_heartbeat(datetime.now(UTC)))
173
+
174
+ await self._submit(_fn)
175
+
176
+ async def read_state(self) -> QueueState:
177
+ """Read-only snapshot of current queue state (bypasses the write pipeline)."""
178
+ content, _ = await self.storage.read()
179
+ return codec.decode(content)
180
+
181
+ # ------------------------------------------------------------------ #
182
+ # Internal machinery #
183
+ # ------------------------------------------------------------------ #
184
+
185
+ async def _submit(self, fn: MutationFn) -> None:
186
+ """
187
+ Enqueue a mutation and block until it is committed.
188
+
189
+ Appends the op to _pending, wakes the writer, then awaits the future
190
+ that resolves when the batch containing this op successfully commits.
191
+ """
192
+ if self._stopped:
193
+ raise JQueueError("GroupCommitLoop is stopped")
194
+ future: asyncio.Future[None] = asyncio.get_running_loop().create_future()
195
+ self._pending.append(_PendingOp(fn=fn, future=future))
196
+ self._wakeup.set()
197
+ await future
198
+
199
+ async def _writer_loop(self) -> None:
200
+ """Background coroutine — runs until stopped and all pending ops drain."""
201
+ while not self._stopped or self._pending:
202
+ if not self._pending:
203
+ self._wakeup.clear()
204
+ await self._wakeup.wait()
205
+
206
+ if not self._pending:
207
+ continue
208
+
209
+ batch = list(self._pending)
210
+ self._pending.clear()
211
+ await self._commit_batch(batch)
212
+
213
+ async def _commit_batch(self, batch: list[_PendingOp]) -> None:
214
+ """
215
+ Apply all ops in `batch` to the current state and CAS write.
216
+
217
+ Retries on CASConflictError. Per-mutation exceptions only fail that
218
+ op's future; the rest of the batch still commits on the same write.
219
+ """
220
+ for attempt in range(_MAX_RETRIES):
221
+ try:
222
+ content, etag = await self.storage.read()
223
+ state = codec.decode(content)
224
+
225
+ # Sweep stale jobs on every write cycle (free — no extra I/O)
226
+ cutoff = datetime.now(UTC) - self.stale_timeout
227
+ state = state.requeue_stale(cutoff)
228
+
229
+ per_op_errors: dict[int, Exception] = {}
230
+ for i, op in enumerate(batch):
231
+ try:
232
+ state = op.fn(state)
233
+ except Exception as exc:
234
+ per_op_errors[i] = exc
235
+
236
+ await self.storage.write(codec.encode(state), if_match=etag)
237
+
238
+ for i, op in enumerate(batch):
239
+ if op.future.done():
240
+ continue
241
+ if i in per_op_errors:
242
+ op.future.set_exception(per_op_errors[i])
243
+ else:
244
+ op.future.set_result(None)
245
+ return
246
+
247
+ except CASConflictError:
248
+ if attempt == _MAX_RETRIES - 1:
249
+ _fail_batch(batch, CASConflictError("Max CAS retries exceeded"))
250
+ return
251
+ # Exponential back-off, capped at ~320 ms
252
+ await asyncio.sleep(0.005 * (2 ** min(attempt, 6)))
253
+
254
+ except Exception as exc:
255
+ _fail_batch(batch, exc)
256
+ return
257
+
258
+
259
+ def _fail_batch(batch: list[_PendingOp], exc: Exception) -> None:
260
+ """Set exception on all unresolved futures in the batch."""
261
+ for op in batch:
262
+ if not op.future.done():
263
+ op.future.set_exception(exc)
@@ -0,0 +1,90 @@
1
+ """
2
+ HeartbeatManager — async context manager for background heartbeats.
3
+
4
+ A worker holding a long-running job wraps its work in HeartbeatManager
5
+ to send periodic heartbeats, preventing the broker from re-queueing the
6
+ job as stale.
7
+
8
+ Usage
9
+ -----
10
+ async with BrokerQueue(storage) as q:
11
+ [job] = await q.dequeue("process_video")
12
+
13
+ async with HeartbeatManager(q, job.id, interval=timedelta(seconds=30)):
14
+ result = await do_long_work(job.payload)
15
+
16
+ await q.ack(job.id)
17
+
18
+ If the worker raises, the heartbeat task is cancelled. Callers should
19
+ nack() the job in an except/finally block.
20
+
21
+ HeartbeatManager is typed against the structural Protocol _HasHeartbeat, so
22
+ it works with BrokerQueue, DirectQueue, and GroupCommitLoop without any
23
+ shared base class.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import asyncio
29
+ import dataclasses
30
+ from datetime import timedelta
31
+ from types import TracebackType
32
+ from typing import Protocol
33
+
34
+ from jqueue.domain.errors import JQueueError
35
+
36
+
37
+ class _HasHeartbeat(Protocol):
38
+ """Structural Protocol — any object with an async heartbeat(job_id) method."""
39
+
40
+ async def heartbeat(self, job_id: str) -> None: ...
41
+
42
+
43
+ @dataclasses.dataclass
44
+ class HeartbeatManager:
45
+ """
46
+ Sends periodic heartbeats for a single job.
47
+
48
+ Parameters
49
+ ----------
50
+ queue : any object with async heartbeat(job_id: str) -> None
51
+ job_id : the job to keep alive
52
+ interval : time between heartbeat sends (default 60 seconds)
53
+ """
54
+
55
+ queue: _HasHeartbeat
56
+ job_id: str
57
+ interval: timedelta = timedelta(seconds=60)
58
+
59
+ _task: asyncio.Task[None] | None = dataclasses.field(
60
+ default=None, init=False, repr=False
61
+ )
62
+
63
+ async def __aenter__(self) -> HeartbeatManager:
64
+ self._task = asyncio.create_task(
65
+ self._beat(), name=f"jqueue-heartbeat-{self.job_id}"
66
+ )
67
+ return self
68
+
69
+ async def __aexit__(
70
+ self,
71
+ exc_type: type[BaseException] | None,
72
+ exc_val: BaseException | None,
73
+ exc_tb: TracebackType | None,
74
+ ) -> None:
75
+ if self._task is not None:
76
+ self._task.cancel()
77
+ try:
78
+ await self._task
79
+ except asyncio.CancelledError:
80
+ pass
81
+ self._task = None
82
+
83
+ async def _beat(self) -> None:
84
+ while True:
85
+ await asyncio.sleep(self.interval.total_seconds())
86
+ try:
87
+ await self.queue.heartbeat(self.job_id)
88
+ except JQueueError:
89
+ # Job was acked or removed — stop silently.
90
+ return
File without changes
@@ -0,0 +1,46 @@
1
+ """
2
+ Exception hierarchy for jqueue.
3
+
4
+ JQueueError
5
+ ├── CASConflictError — write rejected because etag did not match
6
+ ├── JobNotFoundError — job_id not present in current QueueState
7
+ └── StorageError — underlying I/O failure (wraps original exception)
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+
13
+ class JQueueError(Exception):
14
+ """Base class for all jqueue exceptions."""
15
+
16
+
17
+ class CASConflictError(JQueueError):
18
+ """
19
+ Raised when a compare-and-set write is rejected by the storage backend.
20
+
21
+ The caller should re-read the current state and retry the operation.
22
+ This is the normal concurrency signal — not an error in the traditional sense.
23
+ """
24
+
25
+
26
+ class JobNotFoundError(JQueueError):
27
+ """Raised when a job_id is not present in the current QueueState."""
28
+
29
+ def __init__(self, job_id: str) -> None:
30
+ self.job_id = job_id
31
+ super().__init__(f"Job {job_id!r} not found in queue state")
32
+
33
+
34
+ class StorageError(JQueueError):
35
+ """
36
+ Wraps an underlying I/O failure from a storage adapter.
37
+
38
+ Attributes
39
+ ----------
40
+ cause : Exception
41
+ The original exception from the storage backend.
42
+ """
43
+
44
+ def __init__(self, message: str, cause: Exception) -> None:
45
+ self.cause = cause
46
+ super().__init__(f"{message}: {cause}")