jqueue 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jqueue/__init__.py +95 -0
- jqueue/adapters/__init__.py +0 -0
- jqueue/adapters/storage/__init__.py +0 -0
- jqueue/adapters/storage/filesystem.py +108 -0
- jqueue/adapters/storage/gcs.py +130 -0
- jqueue/adapters/storage/memory.py +62 -0
- jqueue/adapters/storage/s3.py +135 -0
- jqueue/core/__init__.py +0 -0
- jqueue/core/broker.py +109 -0
- jqueue/core/codec.py +42 -0
- jqueue/core/direct.py +170 -0
- jqueue/core/group_commit.py +263 -0
- jqueue/core/heartbeat.py +90 -0
- jqueue/domain/__init__.py +0 -0
- jqueue/domain/errors.py +46 -0
- jqueue/domain/models.py +177 -0
- jqueue/ports/__init__.py +0 -0
- jqueue/ports/storage.py +81 -0
- jqueue-0.1.0.dist-info/METADATA +712 -0
- jqueue-0.1.0.dist-info/RECORD +22 -0
- jqueue-0.1.0.dist-info/WHEEL +4 -0
- jqueue-0.1.0.dist-info/licenses/LICENSE +21 -0
jqueue/__init__.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""
|
|
2
|
+
jqueue — object-storage queue with compare-and-set semantics.
|
|
3
|
+
|
|
4
|
+
Implements the turbopuffer object-storage queue pattern:
|
|
5
|
+
https://turbopuffer.com/blog/object-storage-queue
|
|
6
|
+
|
|
7
|
+
The queue state lives in a single JSON file on object storage. Every mutation
|
|
8
|
+
is a compare-and-set (CAS) write — read the file, mutate in memory, write
|
|
9
|
+
back with an If-Match guard. Concurrent writers that lose the CAS race retry
|
|
10
|
+
automatically.
|
|
11
|
+
|
|
12
|
+
For higher throughput, BrokerQueue batches concurrent operations into a single
|
|
13
|
+
CAS write (group commit), reducing N concurrent writes to O(1) storage
|
|
14
|
+
operations per round-trip.
|
|
15
|
+
|
|
16
|
+
Quick start
|
|
17
|
+
-----------
|
|
18
|
+
import asyncio
|
|
19
|
+
from jqueue import BrokerQueue, HeartbeatManager
|
|
20
|
+
from jqueue.adapters.storage.memory import InMemoryStorage
|
|
21
|
+
|
|
22
|
+
async def main():
|
|
23
|
+
storage = InMemoryStorage()
|
|
24
|
+
|
|
25
|
+
async with BrokerQueue(storage) as q:
|
|
26
|
+
# Enqueue work
|
|
27
|
+
await q.enqueue("send_email", b'{"to": "user@example.com"}')
|
|
28
|
+
|
|
29
|
+
# Claim and process
|
|
30
|
+
[job] = await q.dequeue("send_email")
|
|
31
|
+
async with HeartbeatManager(q, job.id):
|
|
32
|
+
print(f"Processing job {job.id}")
|
|
33
|
+
await q.ack(job.id)
|
|
34
|
+
|
|
35
|
+
asyncio.run(main())
|
|
36
|
+
|
|
37
|
+
Storage adapters
|
|
38
|
+
----------------
|
|
39
|
+
Built-in adapters (no extra deps):
|
|
40
|
+
- InMemoryStorage — for tests and examples
|
|
41
|
+
- LocalFileSystemStorage — POSIX single-machine (fcntl.flock)
|
|
42
|
+
|
|
43
|
+
Optional adapters (install extras):
|
|
44
|
+
- S3Storage (pip install "jqueue[s3]")
|
|
45
|
+
- GCSStorage (pip install "jqueue[gcs]")
|
|
46
|
+
|
|
47
|
+
Custom adapters only need to implement the two-method ObjectStoragePort:
|
|
48
|
+
async def read() -> tuple[bytes, str | None]
|
|
49
|
+
async def write(content, if_match=None) -> str
|
|
50
|
+
|
|
51
|
+
Architecture
|
|
52
|
+
------------
|
|
53
|
+
Follows the Ports & Adapters pattern:
|
|
54
|
+
domain/ — pure value types (Job, QueueState, JobStatus)
|
|
55
|
+
ports/ — Protocol interfaces (ObjectStoragePort)
|
|
56
|
+
core/ — business logic (DirectQueue, BrokerQueue, GroupCommitLoop)
|
|
57
|
+
adapters/ — concrete storage implementations
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
from __future__ import annotations
|
|
61
|
+
|
|
62
|
+
from jqueue.adapters.storage.filesystem import LocalFileSystemStorage
|
|
63
|
+
from jqueue.adapters.storage.memory import InMemoryStorage
|
|
64
|
+
from jqueue.core.broker import BrokerQueue
|
|
65
|
+
from jqueue.core.direct import DirectQueue
|
|
66
|
+
from jqueue.core.heartbeat import HeartbeatManager
|
|
67
|
+
from jqueue.domain.errors import (
|
|
68
|
+
CASConflictError,
|
|
69
|
+
JobNotFoundError,
|
|
70
|
+
JQueueError,
|
|
71
|
+
StorageError,
|
|
72
|
+
)
|
|
73
|
+
from jqueue.domain.models import Job, JobStatus, QueueState
|
|
74
|
+
from jqueue.ports.storage import ObjectStoragePort
|
|
75
|
+
|
|
76
|
+
__all__ = [
|
|
77
|
+
# Domain models
|
|
78
|
+
"Job",
|
|
79
|
+
"JobStatus",
|
|
80
|
+
"QueueState",
|
|
81
|
+
# Errors
|
|
82
|
+
"JQueueError",
|
|
83
|
+
"CASConflictError",
|
|
84
|
+
"JobNotFoundError",
|
|
85
|
+
"StorageError",
|
|
86
|
+
# Port (for typing custom adapters)
|
|
87
|
+
"ObjectStoragePort",
|
|
88
|
+
# High-level queue API
|
|
89
|
+
"BrokerQueue",
|
|
90
|
+
"DirectQueue",
|
|
91
|
+
"HeartbeatManager",
|
|
92
|
+
# Built-in storage adapters
|
|
93
|
+
"InMemoryStorage",
|
|
94
|
+
"LocalFileSystemStorage",
|
|
95
|
+
]
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LocalFileSystemStorage — fcntl.flock-based CAS for POSIX systems.
|
|
3
|
+
|
|
4
|
+
Suitable for local development, single-machine deployments, or integration
|
|
5
|
+
tests that need a persistent file rather than in-memory state.
|
|
6
|
+
|
|
7
|
+
NOT suitable for multi-machine deployments — use S3Storage or GCSStorage
|
|
8
|
+
for distributed workloads.
|
|
9
|
+
|
|
10
|
+
Etag strategy
|
|
11
|
+
-------------
|
|
12
|
+
The etag is a SHA-256 hex digest of the file contents. This is stable,
|
|
13
|
+
deterministic, and always changes when content changes — unlike mtime which
|
|
14
|
+
can be identical across rapid successive writes on fast machines.
|
|
15
|
+
A file that is absent or empty is treated as non-existent; its etag is None.
|
|
16
|
+
The jqueue codec always produces non-empty JSON, so a 0-byte file only occurs
|
|
17
|
+
transiently before the first write completes.
|
|
18
|
+
|
|
19
|
+
CAS semantics
|
|
20
|
+
-------------
|
|
21
|
+
write(content, if_match) acquires an exclusive flock, re-reads the current
|
|
22
|
+
etag while holding the lock, and raises CASConflictError if it differs from
|
|
23
|
+
if_match. The write is performed atomically within the same lock scope.
|
|
24
|
+
|
|
25
|
+
POSIX-only (Linux, macOS). Not compatible with NFS or distributed filesystems.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import asyncio
|
|
31
|
+
import dataclasses
|
|
32
|
+
import fcntl
|
|
33
|
+
import hashlib
|
|
34
|
+
import os
|
|
35
|
+
from pathlib import Path
|
|
36
|
+
|
|
37
|
+
from jqueue.domain.errors import CASConflictError
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclasses.dataclass
|
|
41
|
+
class LocalFileSystemStorage:
|
|
42
|
+
"""
|
|
43
|
+
Stores the queue state in a local file.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
path : path to the JSON state file (parent directory created if absent)
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
path: Path
|
|
51
|
+
|
|
52
|
+
def __init__(self, path: str | Path) -> None:
|
|
53
|
+
self.path = Path(path)
|
|
54
|
+
|
|
55
|
+
async def read(self) -> tuple[bytes, str | None]:
|
|
56
|
+
"""Return (content, etag). Returns (b"", None) if the file does not exist."""
|
|
57
|
+
return await asyncio.to_thread(self._sync_read)
|
|
58
|
+
|
|
59
|
+
async def write(
|
|
60
|
+
self,
|
|
61
|
+
content: bytes,
|
|
62
|
+
if_match: str | None = None,
|
|
63
|
+
) -> str:
|
|
64
|
+
"""CAS write. Raises CASConflictError on etag mismatch."""
|
|
65
|
+
return await asyncio.to_thread(self._sync_write, content, if_match)
|
|
66
|
+
|
|
67
|
+
# ------------------------------------------------------------------ #
|
|
68
|
+
# Synchronous implementations (executed in a thread-pool worker) #
|
|
69
|
+
# ------------------------------------------------------------------ #
|
|
70
|
+
|
|
71
|
+
@staticmethod
|
|
72
|
+
def _etag(data: bytes) -> str:
|
|
73
|
+
return hashlib.sha256(data).hexdigest()
|
|
74
|
+
|
|
75
|
+
def _sync_read(self) -> tuple[bytes, str | None]:
|
|
76
|
+
if not self.path.exists():
|
|
77
|
+
return b"", None
|
|
78
|
+
with open(self.path, "rb") as fh:
|
|
79
|
+
fcntl.flock(fh, fcntl.LOCK_SH)
|
|
80
|
+
try:
|
|
81
|
+
content = fh.read()
|
|
82
|
+
finally:
|
|
83
|
+
fcntl.flock(fh, fcntl.LOCK_UN)
|
|
84
|
+
etag: str | None = self._etag(content) if content else None
|
|
85
|
+
return content, etag
|
|
86
|
+
|
|
87
|
+
def _sync_write(self, content: bytes, if_match: str | None) -> str:
|
|
88
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
89
|
+
fd = os.open(str(self.path), os.O_RDWR | os.O_CREAT, 0o644)
|
|
90
|
+
try:
|
|
91
|
+
fcntl.flock(fd, fcntl.LOCK_EX)
|
|
92
|
+
|
|
93
|
+
existing = os.read(fd, os.fstat(fd).st_size)
|
|
94
|
+
real_etag: str | None = self._etag(existing) if existing else None
|
|
95
|
+
|
|
96
|
+
if real_etag != if_match:
|
|
97
|
+
raise CASConflictError(
|
|
98
|
+
f"ETag mismatch: expected {if_match!r}, got {real_etag!r}"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
os.ftruncate(fd, 0)
|
|
102
|
+
os.lseek(fd, 0, os.SEEK_SET)
|
|
103
|
+
os.write(fd, content)
|
|
104
|
+
finally:
|
|
105
|
+
fcntl.flock(fd, fcntl.LOCK_UN)
|
|
106
|
+
os.close(fd)
|
|
107
|
+
|
|
108
|
+
return self._etag(content)
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""
|
|
2
|
+
GCSStorage — Google Cloud Storage adapter using google-cloud-storage.
|
|
3
|
+
|
|
4
|
+
Install extras: pip install "jqueue[gcs]"
|
|
5
|
+
|
|
6
|
+
CAS semantics
|
|
7
|
+
-------------
|
|
8
|
+
GCS supports conditional writes via object generation numbers.
|
|
9
|
+
|
|
10
|
+
read() → returns (content, generation_string) where generation is the
|
|
11
|
+
integer GCS object generation, stringified to match the etag type
|
|
12
|
+
write() → uses if_generation_match=int(etag); GCS raises PreconditionFailed
|
|
13
|
+
on mismatch → CASConflictError
|
|
14
|
+
|
|
15
|
+
First write (if_match=None):
|
|
16
|
+
Uses if_generation_match=0 — GCS convention for "blob must not exist yet".
|
|
17
|
+
|
|
18
|
+
Note: google-cloud-storage is synchronous. All operations are wrapped in
|
|
19
|
+
asyncio.to_thread to avoid blocking the event loop.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import asyncio
|
|
25
|
+
import dataclasses
|
|
26
|
+
from typing import TYPE_CHECKING
|
|
27
|
+
|
|
28
|
+
from jqueue.domain.errors import CASConflictError, StorageError
|
|
29
|
+
|
|
30
|
+
if TYPE_CHECKING:
|
|
31
|
+
from google.cloud.storage import Client as GCSClient
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclasses.dataclass
|
|
35
|
+
class GCSStorage:
|
|
36
|
+
"""
|
|
37
|
+
Google Cloud Storage adapter.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
bucket_name : GCS bucket name
|
|
42
|
+
blob_name : blob path (e.g. "queues/my-queue/state.json")
|
|
43
|
+
client : google.cloud.storage.Client — created lazily if omitted
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
bucket_name: str
|
|
47
|
+
blob_name: str
|
|
48
|
+
client: GCSClient | None = None
|
|
49
|
+
|
|
50
|
+
def _get_client(self) -> GCSClient:
|
|
51
|
+
if self.client is not None:
|
|
52
|
+
return self.client
|
|
53
|
+
try:
|
|
54
|
+
from google.cloud import storage
|
|
55
|
+
except ImportError as exc:
|
|
56
|
+
raise ImportError(
|
|
57
|
+
"GCSStorage requires google-cloud-storage. "
|
|
58
|
+
"Install with: pip install 'jqueue[gcs]'"
|
|
59
|
+
) from exc
|
|
60
|
+
return storage.Client()
|
|
61
|
+
|
|
62
|
+
async def read(self) -> tuple[bytes, str | None]:
|
|
63
|
+
"""Read the state blob. Returns (b"", None) if the blob does not exist."""
|
|
64
|
+
try:
|
|
65
|
+
return await asyncio.to_thread(self._sync_read)
|
|
66
|
+
except (CASConflictError, StorageError):
|
|
67
|
+
raise
|
|
68
|
+
except Exception as exc:
|
|
69
|
+
raise StorageError("GCS read failed", exc) from exc
|
|
70
|
+
|
|
71
|
+
async def write(
|
|
72
|
+
self,
|
|
73
|
+
content: bytes,
|
|
74
|
+
if_match: str | None = None,
|
|
75
|
+
) -> str:
|
|
76
|
+
"""CAS write. Raises CASConflictError on generation mismatch."""
|
|
77
|
+
try:
|
|
78
|
+
return await asyncio.to_thread(self._sync_write, content, if_match)
|
|
79
|
+
except (CASConflictError, StorageError):
|
|
80
|
+
raise
|
|
81
|
+
except Exception as exc:
|
|
82
|
+
raise StorageError("GCS write failed", exc) from exc
|
|
83
|
+
|
|
84
|
+
# ------------------------------------------------------------------ #
|
|
85
|
+
# Synchronous implementations (executed in a thread-pool worker) #
|
|
86
|
+
# ------------------------------------------------------------------ #
|
|
87
|
+
|
|
88
|
+
def _sync_read(self) -> tuple[bytes, str | None]:
|
|
89
|
+
try:
|
|
90
|
+
from google.api_core import exceptions as gapi_exc
|
|
91
|
+
except ImportError as exc:
|
|
92
|
+
raise ImportError(
|
|
93
|
+
"GCSStorage requires google-cloud-storage. "
|
|
94
|
+
"Install with: pip install 'jqueue[gcs]'"
|
|
95
|
+
) from exc
|
|
96
|
+
|
|
97
|
+
client = self._get_client()
|
|
98
|
+
blob = client.bucket(self.bucket_name).blob(self.blob_name)
|
|
99
|
+
try:
|
|
100
|
+
content: bytes = blob.download_as_bytes()
|
|
101
|
+
return content, str(blob.generation)
|
|
102
|
+
except gapi_exc.NotFound:
|
|
103
|
+
return b"", None
|
|
104
|
+
|
|
105
|
+
def _sync_write(self, content: bytes, if_match: str | None) -> str:
|
|
106
|
+
try:
|
|
107
|
+
from google.api_core import exceptions as gapi_exc
|
|
108
|
+
except ImportError as exc:
|
|
109
|
+
raise ImportError(
|
|
110
|
+
"GCSStorage requires google-cloud-storage. "
|
|
111
|
+
"Install with: pip install 'jqueue[gcs]'"
|
|
112
|
+
) from exc
|
|
113
|
+
|
|
114
|
+
client = self._get_client()
|
|
115
|
+
blob = client.bucket(self.bucket_name).blob(self.blob_name)
|
|
116
|
+
|
|
117
|
+
# if_generation_match=0 → "blob must not exist yet"
|
|
118
|
+
gen_match: int = 0 if if_match is None else int(if_match)
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
blob.upload_from_string(
|
|
122
|
+
content,
|
|
123
|
+
content_type="application/json",
|
|
124
|
+
if_generation_match=gen_match,
|
|
125
|
+
)
|
|
126
|
+
except gapi_exc.PreconditionFailed as exc:
|
|
127
|
+
raise CASConflictError("GCS generation mismatch") from exc
|
|
128
|
+
|
|
129
|
+
blob.reload()
|
|
130
|
+
return str(blob.generation)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""
|
|
2
|
+
InMemoryStorage — asyncio.Lock-based CAS for testing and development.
|
|
3
|
+
|
|
4
|
+
Stores the queue state as bytes in memory. Uses an asyncio.Lock to serialize
|
|
5
|
+
reads and writes, faithfully simulating the CAS semantics of real object
|
|
6
|
+
storage backends.
|
|
7
|
+
|
|
8
|
+
The etag is a simple monotonic integer counter (stringified) that increments
|
|
9
|
+
on every successful write.
|
|
10
|
+
|
|
11
|
+
Zero external dependencies. Safe for multiple concurrent coroutines in a
|
|
12
|
+
single event loop. NOT safe across processes or threads.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import asyncio
|
|
18
|
+
import dataclasses
|
|
19
|
+
|
|
20
|
+
from jqueue.domain.errors import CASConflictError
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclasses.dataclass
|
|
24
|
+
class InMemoryStorage:
|
|
25
|
+
"""
|
|
26
|
+
In-process object storage backed by a bytes buffer.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
initial_content : optional pre-populated bytes (useful for test setup)
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
initial_content: bytes = b""
|
|
34
|
+
|
|
35
|
+
def __post_init__(self) -> None:
|
|
36
|
+
self._content: bytes = self.initial_content
|
|
37
|
+
self._etag: str | None = "0" if self.initial_content else None
|
|
38
|
+
self._counter: int = 0
|
|
39
|
+
self._lock: asyncio.Lock = asyncio.Lock()
|
|
40
|
+
|
|
41
|
+
async def read(self) -> tuple[bytes, str | None]:
|
|
42
|
+
"""Return (content, etag). etag is None until the first write."""
|
|
43
|
+
async with self._lock:
|
|
44
|
+
return self._content, self._etag
|
|
45
|
+
|
|
46
|
+
async def write(
|
|
47
|
+
self,
|
|
48
|
+
content: bytes,
|
|
49
|
+
if_match: str | None = None,
|
|
50
|
+
) -> str:
|
|
51
|
+
"""
|
|
52
|
+
CAS write. Raises CASConflictError if if_match differs from the current etag.
|
|
53
|
+
"""
|
|
54
|
+
async with self._lock:
|
|
55
|
+
if if_match != self._etag:
|
|
56
|
+
raise CASConflictError(
|
|
57
|
+
f"ETag mismatch: expected {if_match!r}, got {self._etag!r}"
|
|
58
|
+
)
|
|
59
|
+
self._counter += 1
|
|
60
|
+
self._etag = str(self._counter)
|
|
61
|
+
self._content = content
|
|
62
|
+
return self._etag
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""
|
|
2
|
+
S3Storage — AWS S3 adapter using aioboto3 and If-Match conditional writes.
|
|
3
|
+
|
|
4
|
+
Install extras: pip install "jqueue[s3]"
|
|
5
|
+
|
|
6
|
+
CAS semantics
|
|
7
|
+
-------------
|
|
8
|
+
S3 supports conditional PutObject via the IfMatch parameter (added Aug 2024).
|
|
9
|
+
|
|
10
|
+
read() → returns (content, ETag) where ETag is the S3 object's entity tag
|
|
11
|
+
write() → passes IfMatch=etag; S3 raises PreconditionFailed on mismatch
|
|
12
|
+
→ CASConflictError
|
|
13
|
+
|
|
14
|
+
First write (if_match=None):
|
|
15
|
+
IfMatch is omitted — unconditional put.
|
|
16
|
+
|
|
17
|
+
Compatible with S3-compatible storage that supports conditional writes:
|
|
18
|
+
MinIO, Cloudflare R2, Tigris, etc.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import dataclasses
|
|
24
|
+
from typing import TYPE_CHECKING
|
|
25
|
+
|
|
26
|
+
from jqueue.domain.errors import CASConflictError, StorageError
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from aioboto3 import Session as AioBoto3Session
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclasses.dataclass
|
|
33
|
+
class S3Storage:
|
|
34
|
+
"""
|
|
35
|
+
AWS S3 storage adapter.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
bucket : S3 bucket name
|
|
40
|
+
key : object key (e.g. "queues/my-queue/state.json")
|
|
41
|
+
session : aioboto3.Session — created lazily from env vars if omitted
|
|
42
|
+
region_name : AWS region passed to the S3 client
|
|
43
|
+
endpoint_url : custom endpoint for S3-compatible backends (e.g. MinIO)
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
bucket: str
|
|
47
|
+
key: str
|
|
48
|
+
session: AioBoto3Session | None = None
|
|
49
|
+
region_name: str | None = None
|
|
50
|
+
endpoint_url: str | None = None
|
|
51
|
+
|
|
52
|
+
def _get_session(self) -> AioBoto3Session:
|
|
53
|
+
if self.session is not None:
|
|
54
|
+
return self.session
|
|
55
|
+
try:
|
|
56
|
+
import aioboto3
|
|
57
|
+
except ImportError as exc:
|
|
58
|
+
raise ImportError(
|
|
59
|
+
"S3Storage requires aioboto3. Install with: pip install 'jqueue[s3]'"
|
|
60
|
+
) from exc
|
|
61
|
+
return aioboto3.Session()
|
|
62
|
+
|
|
63
|
+
def _client_kwargs(self) -> dict[str, str]:
|
|
64
|
+
"""Build kwargs forwarded to the S3 client constructor."""
|
|
65
|
+
kwargs: dict[str, str] = {}
|
|
66
|
+
if self.region_name:
|
|
67
|
+
kwargs["region_name"] = self.region_name
|
|
68
|
+
if self.endpoint_url:
|
|
69
|
+
kwargs["endpoint_url"] = self.endpoint_url
|
|
70
|
+
return kwargs
|
|
71
|
+
|
|
72
|
+
async def read(self) -> tuple[bytes, str | None]:
|
|
73
|
+
"""Read the state object. Returns (b"", None) if the key does not exist."""
|
|
74
|
+
session = self._get_session()
|
|
75
|
+
try:
|
|
76
|
+
async with session.client("s3", **self._client_kwargs()) as s3:
|
|
77
|
+
try:
|
|
78
|
+
response = await s3.get_object(Bucket=self.bucket, Key=self.key)
|
|
79
|
+
content: bytes = await response["Body"].read()
|
|
80
|
+
etag: str = response["ETag"]
|
|
81
|
+
return content, etag
|
|
82
|
+
except Exception as exc:
|
|
83
|
+
if _s3_error_code(exc) in ("NoSuchKey", "404"):
|
|
84
|
+
return b"", None
|
|
85
|
+
raise
|
|
86
|
+
except (CASConflictError, StorageError):
|
|
87
|
+
raise
|
|
88
|
+
except Exception as exc:
|
|
89
|
+
raise StorageError("S3 read failed", exc) from exc
|
|
90
|
+
|
|
91
|
+
async def write(
|
|
92
|
+
self,
|
|
93
|
+
content: bytes,
|
|
94
|
+
if_match: str | None = None,
|
|
95
|
+
) -> str:
|
|
96
|
+
"""CAS write. Raises CASConflictError on ETag mismatch (PreconditionFailed)."""
|
|
97
|
+
session = self._get_session()
|
|
98
|
+
try:
|
|
99
|
+
async with session.client("s3", **self._client_kwargs()) as s3:
|
|
100
|
+
put_kwargs: dict[str, str | bytes] = {
|
|
101
|
+
"Bucket": self.bucket,
|
|
102
|
+
"Key": self.key,
|
|
103
|
+
"Body": content,
|
|
104
|
+
"ContentType": "application/json",
|
|
105
|
+
}
|
|
106
|
+
if if_match is not None:
|
|
107
|
+
put_kwargs["IfMatch"] = if_match
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
response = await s3.put_object(**put_kwargs)
|
|
111
|
+
return str(response["ETag"])
|
|
112
|
+
except Exception as exc:
|
|
113
|
+
if _s3_error_code(exc) == "PreconditionFailed":
|
|
114
|
+
raise CASConflictError(
|
|
115
|
+
"S3 ETag mismatch (PreconditionFailed)"
|
|
116
|
+
) from exc
|
|
117
|
+
raise
|
|
118
|
+
except (CASConflictError, StorageError):
|
|
119
|
+
raise
|
|
120
|
+
except Exception as exc:
|
|
121
|
+
raise StorageError("S3 write failed", exc) from exc
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _s3_error_code(exc: Exception) -> str:
|
|
125
|
+
"""Extract the error code from a botocore ClientError, or return ''."""
|
|
126
|
+
try:
|
|
127
|
+
response = getattr(exc, "response", None)
|
|
128
|
+
if isinstance(response, dict):
|
|
129
|
+
error = response.get("Error", {})
|
|
130
|
+
if isinstance(error, dict):
|
|
131
|
+
code = error.get("Code", "")
|
|
132
|
+
return str(code) if code else ""
|
|
133
|
+
except Exception: # noqa: BLE001
|
|
134
|
+
pass
|
|
135
|
+
return ""
|
jqueue/core/__init__.py
ADDED
|
File without changes
|
jqueue/core/broker.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""
|
|
2
|
+
BrokerQueue — high-throughput queue backed by GroupCommitLoop.
|
|
3
|
+
|
|
4
|
+
BrokerQueue is an async context manager that starts a GroupCommitLoop on
|
|
5
|
+
__aenter__ and performs a clean shutdown (drain pending ops) on __aexit__.
|
|
6
|
+
|
|
7
|
+
Usage
|
|
8
|
+
-----
|
|
9
|
+
from jqueue import BrokerQueue, HeartbeatManager, InMemoryStorage
|
|
10
|
+
|
|
11
|
+
async with BrokerQueue(InMemoryStorage()) as q:
|
|
12
|
+
job = await q.enqueue("send_email", b'{"to": "user@example.com"}')
|
|
13
|
+
|
|
14
|
+
[job] = await q.dequeue("send_email")
|
|
15
|
+
async with HeartbeatManager(q, job.id):
|
|
16
|
+
process(job.payload)
|
|
17
|
+
await q.ack(job.id)
|
|
18
|
+
|
|
19
|
+
Throughput
|
|
20
|
+
----------
|
|
21
|
+
All concurrent callers share a single writer task. While a CAS write is
|
|
22
|
+
in-flight (typically 50–300 ms against real object storage), every enqueue
|
|
23
|
+
and dequeue call that arrives is buffered and committed in the *next* write —
|
|
24
|
+
collapsing N concurrent writes down to O(1) storage round-trips.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import dataclasses
|
|
30
|
+
from datetime import timedelta
|
|
31
|
+
from types import TracebackType
|
|
32
|
+
|
|
33
|
+
from jqueue.core.group_commit import GroupCommitLoop
|
|
34
|
+
from jqueue.domain.models import Job, QueueState
|
|
35
|
+
from jqueue.ports.storage import ObjectStoragePort
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclasses.dataclass
|
|
39
|
+
class BrokerQueue:
|
|
40
|
+
"""
|
|
41
|
+
Async context manager wrapping a GroupCommitLoop.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
storage : any ObjectStoragePort implementation
|
|
46
|
+
stale_timeout : IN_PROGRESS jobs with a heartbeat older than this are
|
|
47
|
+
automatically re-queued on each write cycle (default 5 min)
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
storage: ObjectStoragePort
|
|
51
|
+
stale_timeout: timedelta = timedelta(minutes=5)
|
|
52
|
+
|
|
53
|
+
_loop: GroupCommitLoop = dataclasses.field(init=False, repr=False)
|
|
54
|
+
|
|
55
|
+
def __post_init__(self) -> None:
|
|
56
|
+
self._loop = GroupCommitLoop(
|
|
57
|
+
storage=self.storage,
|
|
58
|
+
stale_timeout=self.stale_timeout,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
async def __aenter__(self) -> BrokerQueue:
|
|
62
|
+
await self._loop.start()
|
|
63
|
+
return self
|
|
64
|
+
|
|
65
|
+
async def __aexit__(
|
|
66
|
+
self,
|
|
67
|
+
exc_type: type[BaseException] | None,
|
|
68
|
+
exc_val: BaseException | None,
|
|
69
|
+
exc_tb: TracebackType | None,
|
|
70
|
+
) -> None:
|
|
71
|
+
await self._loop.stop()
|
|
72
|
+
|
|
73
|
+
# ------------------------------------------------------------------ #
|
|
74
|
+
# Queue operations (delegated to GroupCommitLoop) #
|
|
75
|
+
# ------------------------------------------------------------------ #
|
|
76
|
+
|
|
77
|
+
async def enqueue(
|
|
78
|
+
self,
|
|
79
|
+
entrypoint: str,
|
|
80
|
+
payload: bytes,
|
|
81
|
+
priority: int = 0,
|
|
82
|
+
) -> Job:
|
|
83
|
+
"""Add a new job. Returns the committed Job."""
|
|
84
|
+
return await self._loop.enqueue(entrypoint, payload, priority)
|
|
85
|
+
|
|
86
|
+
async def dequeue(
|
|
87
|
+
self,
|
|
88
|
+
entrypoint: str | None = None,
|
|
89
|
+
*,
|
|
90
|
+
batch_size: int = 1,
|
|
91
|
+
) -> list[Job]:
|
|
92
|
+
"""Claim up to batch_size QUEUED jobs and mark them IN_PROGRESS."""
|
|
93
|
+
return await self._loop.dequeue(entrypoint, batch_size=batch_size)
|
|
94
|
+
|
|
95
|
+
async def ack(self, job_id: str) -> None:
|
|
96
|
+
"""Remove a completed job from the queue."""
|
|
97
|
+
await self._loop.ack(job_id)
|
|
98
|
+
|
|
99
|
+
async def nack(self, job_id: str) -> None:
|
|
100
|
+
"""Return a job to QUEUED status."""
|
|
101
|
+
await self._loop.nack(job_id)
|
|
102
|
+
|
|
103
|
+
async def heartbeat(self, job_id: str) -> None:
|
|
104
|
+
"""Refresh the heartbeat timestamp for an IN_PROGRESS job."""
|
|
105
|
+
await self._loop.heartbeat(job_id)
|
|
106
|
+
|
|
107
|
+
async def read_state(self) -> QueueState:
|
|
108
|
+
"""Read-only snapshot of the current queue state."""
|
|
109
|
+
return await self._loop.read_state()
|
jqueue/core/codec.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Codec — serialize and deserialize QueueState to/from bytes using Pydantic v2.
|
|
3
|
+
|
|
4
|
+
Pydantic v2 handles the full wire format automatically:
|
|
5
|
+
- bytes fields are encoded as base64 strings in JSON mode
|
|
6
|
+
- datetime fields are serialized as ISO-8601 strings with UTC offset
|
|
7
|
+
- Enum values are serialized as their string values
|
|
8
|
+
- Nested models (Job inside QueueState) are recursively serialized
|
|
9
|
+
|
|
10
|
+
Wire format (produced by model_dump_json):
|
|
11
|
+
------------------------------------------
|
|
12
|
+
{
|
|
13
|
+
"version": 3,
|
|
14
|
+
"jobs": [
|
|
15
|
+
{
|
|
16
|
+
"id": "550e8400-...",
|
|
17
|
+
"entrypoint": "send_email",
|
|
18
|
+
"payload": "SGVsbG8gV29ybGQ=", <-- base64-encoded bytes
|
|
19
|
+
"status": "queued",
|
|
20
|
+
"priority": 0,
|
|
21
|
+
"created_at": "2024-01-01T00:00:00+00:00",
|
|
22
|
+
"heartbeat_at": null
|
|
23
|
+
}
|
|
24
|
+
]
|
|
25
|
+
}
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
from jqueue.domain.models import QueueState
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def encode(state: QueueState) -> bytes:
|
|
34
|
+
"""Serialize QueueState to UTF-8 JSON bytes."""
|
|
35
|
+
return state.model_dump_json(indent=2).encode("utf-8")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def decode(data: bytes) -> QueueState:
|
|
39
|
+
"""Deserialize UTF-8 JSON bytes to QueueState. Empty bytes → empty state."""
|
|
40
|
+
if not data:
|
|
41
|
+
return QueueState()
|
|
42
|
+
return QueueState.model_validate_json(data)
|