queue-max 0.1.0__tar.gz → 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {queue_max-0.1.0 → queue_max-0.1.1}/LICENSE +1 -1
- queue_max-0.1.1/PKG-INFO +679 -0
- queue_max-0.1.1/README.md +636 -0
- {queue_max-0.1.0 → queue_max-0.1.1}/examples/ai_rate_limited.py +1 -1
- {queue_max-0.1.0 → queue_max-0.1.1}/examples/basic_usage.py +1 -1
- {queue_max-0.1.0 → queue_max-0.1.1}/examples/webhook_processor.py +1 -1
- {queue_max-0.1.0 → queue_max-0.1.1}/pyproject.toml +3 -1
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/__init__.py +1 -1
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/cli.py +14 -15
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/contrib/__init__.py +1 -1
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/contrib/django/__init__.py +1 -1
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/contrib/django/management/commands/queue_purge.py +1 -1
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/contrib/django/management/commands/queue_stats.py +3 -3
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/contrib/django/management/commands/queue_worker.py +2 -2
- queue_max-0.1.1/src/queue_max/contrib/events.py +313 -0
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/contrib/fastapi/__init__.py +7 -7
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/contrib/flask/__init__.py +4 -4
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/core/__init__.py +1 -1
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/core/circuit_breaker.py +21 -5
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/core/database.py +113 -31
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/core/decorator.py +16 -16
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/core/queue.py +143 -55
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/core/rate_limiter.py +2 -2
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/core/worker.py +54 -39
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/exceptions.py +1 -1
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/models/__init__.py +1 -1
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/models/job.py +11 -13
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/utils/__init__.py +1 -1
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/utils/helpers.py +3 -3
- queue_max-0.1.1/src/queue_max.egg-info/PKG-INFO +679 -0
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max.egg-info/SOURCES.txt +1 -0
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max.egg-info/requires.txt +8 -0
- queue_max-0.1.0/PKG-INFO +0 -233
- queue_max-0.1.0/README.md +0 -193
- queue_max-0.1.0/src/queue_max.egg-info/PKG-INFO +0 -233
- {queue_max-0.1.0 → queue_max-0.1.1}/MANIFEST.in +0 -0
- {queue_max-0.1.0 → queue_max-0.1.1}/setup.cfg +0 -0
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/contrib/django/management/__init__.py +0 -0
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/contrib/django/management/commands/__init__.py +0 -0
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/py.typed +0 -0
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max.egg-info/dependency_links.txt +0 -0
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max.egg-info/entry_points.txt +0 -0
- {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max.egg-info/top_level.txt +0 -0
- {queue_max-0.1.0 → queue_max-0.1.1}/tests/__init__.py +0 -0
- {queue_max-0.1.0 → queue_max-0.1.1}/tests/conftest.py +0 -0
- {queue_max-0.1.0 → queue_max-0.1.1}/tests/test_circuit_breaker.py +0 -0
- {queue_max-0.1.0 → queue_max-0.1.1}/tests/test_concurrency.py +0 -0
- {queue_max-0.1.0 → queue_max-0.1.1}/tests/test_helpers.py +0 -0
- {queue_max-0.1.0 → queue_max-0.1.1}/tests/test_integration.py +0 -0
- {queue_max-0.1.0 → queue_max-0.1.1}/tests/test_queue.py +0 -0
- {queue_max-0.1.0 → queue_max-0.1.1}/tests/test_rate_limiter.py +0 -0
- {queue_max-0.1.0 → queue_max-0.1.1}/tests/test_worker.py +0 -0
queue_max-0.1.1/PKG-INFO
ADDED
|
@@ -0,0 +1,679 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: queue-max
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Task queue with SQLite sharding, rate limiting, and circuit breaker
|
|
5
|
+
Author: Alexandre All
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: homepage, https://github.com/all451/queue-max
|
|
8
|
+
Project-URL: repository, https://github.com/all451/queue-max
|
|
9
|
+
Keywords: queue,task-queue,sqlite,background-tasks,worker,sharding
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Operating System :: OS Independent
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Classifier: Topic :: System :: Distributed Computing
|
|
22
|
+
Requires-Python: >=3.9
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: typing-extensions>=4.5.0
|
|
26
|
+
Provides-Extra: events
|
|
27
|
+
Requires-Dist: bubus>=1.5.0; python_version >= "3.11" and extra == "events"
|
|
28
|
+
Provides-Extra: webhook
|
|
29
|
+
Requires-Dist: requests>=2.31.0; extra == "webhook"
|
|
30
|
+
Provides-Extra: django
|
|
31
|
+
Requires-Dist: Django>=3.2; extra == "django"
|
|
32
|
+
Provides-Extra: fastapi
|
|
33
|
+
Requires-Dist: fastapi>=0.100.0; extra == "fastapi"
|
|
34
|
+
Provides-Extra: flask
|
|
35
|
+
Requires-Dist: Flask>=2.0; extra == "flask"
|
|
36
|
+
Provides-Extra: all
|
|
37
|
+
Requires-Dist: bubus>=1.5.0; python_version >= "3.11" and extra == "all"
|
|
38
|
+
Requires-Dist: requests>=2.31.0; extra == "all"
|
|
39
|
+
Requires-Dist: Django>=3.2; extra == "all"
|
|
40
|
+
Requires-Dist: fastapi>=0.100.0; extra == "all"
|
|
41
|
+
Requires-Dist: Flask>=2.0; extra == "all"
|
|
42
|
+
Dynamic: license-file
|
|
43
|
+
|
|
44
|
+
# Queue Max
|
|
45
|
+
|
|
46
|
+
Task queue library with SQLite persistence, sharding, rate limiting, and circuit breaker.
|
|
47
|
+
|
|
48
|
+
No Redis or RabbitMQ required. **Zero external dependencies** (except `typing-extensions`).
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
from queue_max import Queue, Worker
|
|
52
|
+
|
|
53
|
+
queue = Queue()
|
|
54
|
+
queue.enqueue({"task": "send_email", "to": "user@example.com"})
|
|
55
|
+
|
|
56
|
+
Worker("worker-1", lambda p: print(p), queue).start()
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Installation
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
pip install queue-max
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Optional extras:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
pip install queue-max[events] # Typed events with bubus + Pydantic
|
|
69
|
+
pip install queue-max[django] # Django management commands
|
|
70
|
+
pip install queue-max[fastapi] # FastAPI middleware
|
|
71
|
+
pip install queue-max[flask] # Flask extension
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Architecture
|
|
75
|
+
|
|
76
|
+
```
|
|
77
|
+
┌──────────────────────────────────────────────┐
|
|
78
|
+
│ Queue │
|
|
79
|
+
│ ┌────────────┐ ┌──────────┐ ┌──────────┐ │
|
|
80
|
+
│ │ Shard Mgr │ │RateLimit │ │ Circuit │ │
|
|
81
|
+
│ │ (SQLite) │ │ Token │ │ Breaker │ │
|
|
82
|
+
│ └─────┬──────┘ └──────────┘ └──────────┘ │
|
|
83
|
+
└────────┼─────────────────────────────────────┘
|
|
84
|
+
│
|
|
85
|
+
┌───────────────────┼───────────────────┐
|
|
86
|
+
▼ ▼ ▼
|
|
87
|
+
┌──────────┐ ┌──────────┐ ┌──────────┐
|
|
88
|
+
│ shard_0 │ │ shard_1 │ │ shard_N │
|
|
89
|
+
│ fila.db │ │ fila.db │ │ fila.db │
|
|
90
|
+
│ WAL │ │ WAL │ │ WAL │
|
|
91
|
+
└──────────┘ └──────────┘ └──────────┘
|
|
92
|
+
│ │ │
|
|
93
|
+
└───────────┬───────┴───────┬───────────┘
|
|
94
|
+
▼ ▼
|
|
95
|
+
┌──────────┐ ┌──────────┐
|
|
96
|
+
│ Worker │ │ Worker │ ...
|
|
97
|
+
│ (thread) │ │ (thread) │
|
|
98
|
+
└──────────┘ └──────────┘
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Each shard is an independent SQLite file in WAL mode. Workers scan shard groups in random order to distribute load.
|
|
102
|
+
|
|
103
|
+
## Features
|
|
104
|
+
|
|
105
|
+
**SQLite Persistence** — Jobs are stored in SQLite with WAL mode. No external services needed.
|
|
106
|
+
|
|
107
|
+
**Physical Sharding** — Multiple `.db` files (default 6) allow true concurrent write access. Each shard is an independent database with its own lock.
|
|
108
|
+
|
|
109
|
+
**Rate Limiting** — Token bucket algorithm shared across all workers. Configurable per minute/second/hour.
|
|
110
|
+
|
|
111
|
+
**Circuit Breaker** — Three-state (CLOSED → OPEN → HALF_OPEN → CLOSED). Automatically tracks job failures from `Queue.fail_job()` and resets on `Queue.complete_job()`.
|
|
112
|
+
|
|
113
|
+
**Retry with Backoff** — Exponential backoff (base × 2^(n-1)) with ±20% jitter. Transient errors (5xx, timeout, 429) retry automatically. Client errors (4xx except 429) are permanent.
|
|
114
|
+
|
|
115
|
+
**Dead Letter Queue** — Jobs that exhaust all retries are moved to a DLQ table for inspection.
|
|
116
|
+
|
|
117
|
+
**Heartbeat and Orphan Recovery** — Workers send heartbeats every 5s. Jobs with stale heartbeats (>30s) are recovered as pending.
|
|
118
|
+
|
|
119
|
+
**Priority Queues** — Three levels: low (0), medium (1), high (2). Ordering is per-shard.
|
|
120
|
+
|
|
121
|
+
**Event System** — Simple callbacks built-in. Optional typed events with bubus + Pydantic (`queue-max[events]`).
|
|
122
|
+
|
|
123
|
+
**CLI** — Built-in commands for stats, workers, enqueue, retry, purge, and listing.
|
|
124
|
+
|
|
125
|
+
## Quick Start
|
|
126
|
+
|
|
127
|
+
### Basic enqueue + worker
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
from queue_max import Queue, Worker
|
|
131
|
+
import time
|
|
132
|
+
|
|
133
|
+
queue = Queue(shards=3, rate_limit=100)
|
|
134
|
+
|
|
135
|
+
def process(payload: dict) -> str:
|
|
136
|
+
print(f"Processing: {payload}")
|
|
137
|
+
return "done"
|
|
138
|
+
|
|
139
|
+
# Enqueue jobs
|
|
140
|
+
for i in range(5):
|
|
141
|
+
queue.enqueue({"task": f"job-{i}"}, priority=i % 3)
|
|
142
|
+
|
|
143
|
+
# Process with a worker
|
|
144
|
+
worker = Worker("example", process, queue, poll_interval=0.1)
|
|
145
|
+
worker.start()
|
|
146
|
+
time.sleep(3)
|
|
147
|
+
worker.stop()
|
|
148
|
+
|
|
149
|
+
stats = queue.get_stats()
|
|
150
|
+
print(f"Stats: {stats}")
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### With decorator
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
from queue_max import task
|
|
157
|
+
|
|
158
|
+
@task(priority=2, max_retries=3)
|
|
159
|
+
def send_email(to: str, subject: str):
|
|
160
|
+
return send(to, subject)
|
|
161
|
+
|
|
162
|
+
# Enqueue for background processing
|
|
163
|
+
send_email.delay("user@example.com", "Hello")
|
|
164
|
+
|
|
165
|
+
# Schedule for later
|
|
166
|
+
from datetime import datetime, timezone, timedelta
|
|
167
|
+
future = datetime.now(timezone.utc) + timedelta(minutes=5)
|
|
168
|
+
send_email.schedule_at(future, "user@example.com", "Scheduled!")
|
|
169
|
+
|
|
170
|
+
# Parallel processing
|
|
171
|
+
send_email.map(["a@b.com", "c@d.com"], "Welcome!")
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## Configuration
|
|
175
|
+
|
|
176
|
+
| Variable | Default | Description |
|
|
177
|
+
|----------|---------|-------------|
|
|
178
|
+
| `NUM_SHARDS` | 6 | Number of shard databases |
|
|
179
|
+
| `RATE_LIMIT_MAX` | 160 | Max requests per minute |
|
|
180
|
+
| `QUEUE_MAX_RETRIES` | 3 | Max retry attempts after first failure |
|
|
181
|
+
| `CIRCUIT_FAILURE_THRESHOLD` | 5 | Consecutive failures before circuit opens |
|
|
182
|
+
| `CIRCUIT_TIMEOUT` | 60 | Seconds before circuit recovery attempt |
|
|
183
|
+
| `DB_BUSY_TIMEOUT` | 30000 | SQLite busy timeout (ms) |
|
|
184
|
+
| `DATA_DIR` | ./data | Directory for shard databases |
|
|
185
|
+
| `CACHE_SIZE` | 10000 | SQLite cache size (pages) |
|
|
186
|
+
| `MMAP_SIZE` | 268435456 | Memory-mapped I/O size (bytes) |
|
|
187
|
+
| `HEARTBEAT_INTERVAL` | 5000 | Worker heartbeat interval (ms) |
|
|
188
|
+
| `STUCK_TIMEOUT` | 30000 | Orphan job timeout (ms) |
|
|
189
|
+
| `RECOVERY_INTERVAL` | 10000 | Orphan recovery check interval (ms) |
|
|
190
|
+
| `CLEANUP_DAYS` | 7 | Age threshold for job cleanup |
|
|
191
|
+
| `QUEUE_ALERT_THRESHOLD` | 1000 | Pending jobs before alert event |
|
|
192
|
+
| `QUEUE_MAX_LOG_LEVEL` | WARNING | Log level |
|
|
193
|
+
|
|
194
|
+
Full list in [.env.example](.env.example).
|
|
195
|
+
|
|
196
|
+
## API Reference
|
|
197
|
+
|
|
198
|
+
### Queue
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
from queue_max import Queue
|
|
202
|
+
|
|
203
|
+
queue = Queue(
|
|
204
|
+
shards=None, # Number of shards (default: NUM_SHARDS or 6)
|
|
205
|
+
rate_limit=None, # Requests per minute (default: RATE_LIMIT_MAX or 160)
|
|
206
|
+
max_retries=None, # Max retries after first failure (default: QUEUE_MAX_RETRIES or 3)
|
|
207
|
+
data_dir=None, # Directory for shard files (default: DATA_DIR or ./data)
|
|
208
|
+
circuit_breaker_threshold=None, # Failures before circuit opens (default: 5)
|
|
209
|
+
circuit_breaker_timeout=None, # Seconds before recovery (default: 60)
|
|
210
|
+
rate_limiter_timeout=5.0, # Seconds to wait for rate limit token
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
# ── Enqueue ──
|
|
214
|
+
queue.enqueue(payload, pagina_id=None, priority=0, max_retries=None)
|
|
215
|
+
queue.enqueue_batch([{"payload": {...}, "pagina_id": 1, "priority": 2}, ...])
|
|
216
|
+
queue.enqueue_from_file("jobs.jsonl", fmt="jsonl")
|
|
217
|
+
|
|
218
|
+
# ── Process ──
|
|
219
|
+
job = queue.pop_job(worker_id) # Returns Job or None
|
|
220
|
+
queue.complete_job(job_id, shard_id)
|
|
221
|
+
queue.fail_job(job_id, shard_id, error, permanent=False)
|
|
222
|
+
|
|
223
|
+
# ── Management ──
|
|
224
|
+
queue.retry_failed_jobs(shard_id=None)
|
|
225
|
+
queue.cleanup_old_jobs(days=7)
|
|
226
|
+
queue.purge_queue(status=None) # "pending", "failed", "processing", or None (all)
|
|
227
|
+
queue.recover_orphans()
|
|
228
|
+
queue.heartbeat(shard_id, worker_id)
|
|
229
|
+
queue.wait_until_empty(timeout=None)
|
|
230
|
+
queue.get_failed_jobs(limit=100)
|
|
231
|
+
queue.get_processing_jobs()
|
|
232
|
+
stats = queue.get_stats()
|
|
233
|
+
|
|
234
|
+
# ── Events ──
|
|
235
|
+
queue.on("job_completed", lambda job_id, shard_id: print(f"Done: {job_id}"))
|
|
236
|
+
queue.on("alert", lambda type, pending, threshold: print(f"Alert: {pending}"))
|
|
237
|
+
|
|
238
|
+
# ── Context manager (auto-close) ──
|
|
239
|
+
with Queue() as queue:
|
|
240
|
+
queue.enqueue({"task": "example"})
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
### Queue Stats
|
|
244
|
+
|
|
245
|
+
```python
|
|
246
|
+
stats = queue.get_stats()
|
|
247
|
+
# {
|
|
248
|
+
# "pending": int,
|
|
249
|
+
# "processing": int,
|
|
250
|
+
# "failed": int,
|
|
251
|
+
# "num_shards": int,
|
|
252
|
+
# "rate_limit": int,
|
|
253
|
+
# "max_retries": int,
|
|
254
|
+
# "circuit_state": "closed" | "open" | "half_open",
|
|
255
|
+
# "circuit_failures": int,
|
|
256
|
+
# "tokens_available": float,
|
|
257
|
+
# "uptime_seconds": float,
|
|
258
|
+
# "is_healthy": bool,
|
|
259
|
+
# }
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
### Job
|
|
263
|
+
|
|
264
|
+
```python
|
|
265
|
+
from queue_max import Job, JobStatus, JobPriority
|
|
266
|
+
|
|
267
|
+
# Job properties
|
|
268
|
+
job.id # int — job ID (unique per shard)
|
|
269
|
+
job.payload # dict — job data
|
|
270
|
+
job.status # JobStatus — PENDING, PROCESSING, COMPLETED, FAILED, CANCELLED
|
|
271
|
+
job.priority_int # int — 0, 1, or 2
|
|
272
|
+
job.shard_id # int — which shard holds this job
|
|
273
|
+
job.tentativas # int — attempt count
|
|
274
|
+
job.max_tentativas # int — max attempts allowed = max_retries + 1
|
|
275
|
+
job.last_error # str or None
|
|
276
|
+
job.error_type # str or None
|
|
277
|
+
job.worker_id # str or None
|
|
278
|
+
job.created_at # ISO timestamp
|
|
279
|
+
job.next_retry_at # ISO timestamp or None (retry scheduled for the future)
|
|
280
|
+
job.started_at # ISO timestamp or None
|
|
281
|
+
job.completed_at # ISO timestamp or None
|
|
282
|
+
|
|
283
|
+
# Convenience checks
|
|
284
|
+
job.is_pending # bool
|
|
285
|
+
job.is_processing # bool
|
|
286
|
+
job.is_completed # bool
|
|
287
|
+
job.is_failed # bool
|
|
288
|
+
job.is_cancelled # bool
|
|
289
|
+
job.is_terminal # bool — completed, failed, or cancelled
|
|
290
|
+
job.can_retry # bool — tentativas < max_tentativas
|
|
291
|
+
job.remaining_retries # int
|
|
292
|
+
job.age_seconds # float or None
|
|
293
|
+
job.processing_time_seconds # float or None
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
### Worker
|
|
297
|
+
|
|
298
|
+
```python
|
|
299
|
+
from queue_max import Worker, AsyncWorker, WorkerPool
|
|
300
|
+
|
|
301
|
+
# Basic worker
|
|
302
|
+
worker = Worker(
|
|
303
|
+
worker_id="worker-1",
|
|
304
|
+
process_function=my_func, # Callable[[dict], Any]
|
|
305
|
+
queue=queue,
|
|
306
|
+
poll_interval=1.0, # Seconds between polls when queue is empty
|
|
307
|
+
job_timeout=None, # Max seconds per job execution
|
|
308
|
+
on_job_start=None, # Callback(worker_id, job_id, payload)
|
|
309
|
+
on_job_complete=None, # Callback(worker_id, job_id, result)
|
|
310
|
+
on_job_error=None, # Callback(worker_id, job_id, error, permanent)
|
|
311
|
+
)
|
|
312
|
+
worker.start()
|
|
313
|
+
worker.stop(timeout=10.0)
|
|
314
|
+
|
|
315
|
+
stats = worker.get_stats()
|
|
316
|
+
# {
|
|
317
|
+
# "worker_id": str,
|
|
318
|
+
# "state": "running" | "stopped" | ...,
|
|
319
|
+
# "is_running": bool,
|
|
320
|
+
# "processed": int,
|
|
321
|
+
# "failed": int,
|
|
322
|
+
# "retried": int,
|
|
323
|
+
# "throughput_jobs_per_hour": float,
|
|
324
|
+
# "uptime_seconds": float,
|
|
325
|
+
# "current_job_id": int or None,
|
|
326
|
+
# }
|
|
327
|
+
|
|
328
|
+
# Async worker (for coroutines)
|
|
329
|
+
async def async_process(payload):
|
|
330
|
+
await some_api(payload)
|
|
331
|
+
return "ok"
|
|
332
|
+
|
|
333
|
+
async_worker = AsyncWorker("async-1", async_process, queue)
|
|
334
|
+
async_worker.start()
|
|
335
|
+
|
|
336
|
+
# Worker pool with auto-scaling
|
|
337
|
+
pool = WorkerPool(
|
|
338
|
+
workers=[worker1, worker2],
|
|
339
|
+
auto_scale=True,
|
|
340
|
+
min_workers=2,
|
|
341
|
+
max_workers=20,
|
|
342
|
+
scale_up_threshold=100, # Add worker when pending > 100
|
|
343
|
+
scale_down_threshold=10, # Remove worker when pending < 10
|
|
344
|
+
scale_check_interval=60, # Check every 60s
|
|
345
|
+
)
|
|
346
|
+
pool.start_all()
|
|
347
|
+
pool.stop_all()
|
|
348
|
+
pool.wait_for_idle(timeout=30)
|
|
349
|
+
```
|
|
350
|
+
|
|
351
|
+
### Rate Limiter
|
|
352
|
+
|
|
353
|
+
```python
|
|
354
|
+
from queue_max import RateLimiter, RateLimitUnit
|
|
355
|
+
|
|
356
|
+
limiter = RateLimiter(rate_limit=10, unit=RateLimitUnit.PER_SECOND)
|
|
357
|
+
limiter.acquire(timeout=5.0) # Blocks until token available, raises RateLimitError
|
|
358
|
+
limiter.try_acquire() # Non-blocking, returns bool
|
|
359
|
+
limiter.get_remaining_tokens() # float
|
|
360
|
+
limiter.get_retry_after() # float — seconds until next token
|
|
361
|
+
limiter.update_rate_limit(20) # Change limit dynamically
|
|
362
|
+
limiter.reset()
|
|
363
|
+
limiter.get_stats()
|
|
364
|
+
```
|
|
365
|
+
|
|
366
|
+
### Circuit Breaker
|
|
367
|
+
|
|
368
|
+
```python
|
|
369
|
+
from queue_max import CircuitBreaker
|
|
370
|
+
|
|
371
|
+
cb = CircuitBreaker(failure_threshold=5, recovery_timeout=60.0)
|
|
372
|
+
cb.is_allowed() # Check if request can pass through
|
|
373
|
+
cb.call(func) # Execute with CB protection (raises CircuitBreakerOpenError)
|
|
374
|
+
cb.record_success() # Reset failure count (called automatically by Queue.complete_job)
|
|
375
|
+
cb.record_failure() # Increment failure count (called automatically by Queue.fail_job)
|
|
376
|
+
cb.state # CircuitState: CLOSED, OPEN, or HALF_OPEN
|
|
377
|
+
cb.reset()
|
|
378
|
+
cb.get_stats()
|
|
379
|
+
```
|
|
380
|
+
|
|
381
|
+
### Decorators
|
|
382
|
+
|
|
383
|
+
```python
|
|
384
|
+
from queue_max import task, periodic_task, retryable_task
|
|
385
|
+
|
|
386
|
+
@task(priority=2, max_retries=5, timeout=30)
|
|
387
|
+
def process_order(order_id: int):
|
|
388
|
+
""".delay() enqueues, direct call executes synchronously."""
|
|
389
|
+
...
|
|
390
|
+
|
|
391
|
+
process_order.delay(42) # Background
|
|
392
|
+
process_order.schedule_in(300, 42) # In 5 minutes
|
|
393
|
+
process_order.schedule_at(datetime(...), 42) # At specific time
|
|
394
|
+
process_order.map([101, 102, 103], coupon=1) # Parallel
|
|
395
|
+
process_order.get_stats() # Task statistics
|
|
396
|
+
|
|
397
|
+
@periodic_task(interval=3600, priority=1)
|
|
398
|
+
def cleanup_old_data():
|
|
399
|
+
"""Auto-runs every hour."""
|
|
400
|
+
...
|
|
401
|
+
|
|
402
|
+
cleanup_old_data.start_scheduler() # Starts daemon thread
|
|
403
|
+
|
|
404
|
+
@retryable_task(max_retries=5, retry_on=[TimeoutError])
|
|
405
|
+
def fetch_external_data(url: str):
|
|
406
|
+
"""Sync retry wrapper. .delay() still enqueues async."""
|
|
407
|
+
...
|
|
408
|
+
```
|
|
409
|
+
|
|
410
|
+
## Retry & Failure Flow
|
|
411
|
+
|
|
412
|
+
### Error Classification
|
|
413
|
+
|
|
414
|
+
When a job fails, `is_retryable_error()` classifies the error:
|
|
415
|
+
|
|
416
|
+
| Error | Retryable? | Behavior |
|
|
417
|
+
|-------|-----------|----------|
|
|
418
|
+
| 4xx (except 429) | No | Permanent → Dead Letter Queue |
|
|
419
|
+
| 429 Too Many Requests | Yes | Retry with backoff |
|
|
420
|
+
| 5xx Server Error | Yes | Retry with backoff |
|
|
421
|
+
| Timeout | Yes | Retry with backoff |
|
|
422
|
+
| ConnectionError | Yes | Retry with backoff |
|
|
423
|
+
|
|
424
|
+
### Retry Mechanics
|
|
425
|
+
|
|
426
|
+
- **Backoff formula**: `base × 2^(attempt-1)` ± 20% jitter, capped at 3600s
|
|
427
|
+
- `max_retries=N` means N retries = N+1 total attempts
|
|
428
|
+
- Each attempt increments `tentativas` in the database
|
|
429
|
+
- After exhausting retries, the job moves to the Dead Letter Queue
|
|
430
|
+
|
|
431
|
+
### Circuit Breaker Integration
|
|
432
|
+
|
|
433
|
+
The circuit breaker is wired into `Queue.complete_job()` and `Queue.fail_job()`:
|
|
434
|
+
|
|
435
|
+
1. `fail_job()` — calls `circuit_breaker.record_failure()` (increments counter)
|
|
436
|
+
2. `complete_job()` — calls `circuit_breaker.record_success()` (resets counter)
|
|
437
|
+
3. After N consecutive failures (default 5), circuit opens → `pop_job()` returns `None`
|
|
438
|
+
4. After recovery timeout, next `pop_job()` transitions to HALF_OPEN
|
|
439
|
+
5. If the next job succeeds → CLOSED. If it fails → OPEN again.
|
|
440
|
+
|
|
441
|
+
```python
|
|
442
|
+
queue = Queue(max_retries=2, circuit_breaker_threshold=3)
|
|
443
|
+
|
|
444
|
+
# Retry flow (max_retries=2 → 3 total attempts):
|
|
445
|
+
# Attempt 1 → fail → retry (backoff ~120s)
|
|
446
|
+
# Attempt 2 → fail → retry (backoff ~240s)
|
|
447
|
+
# Attempt 3 → fail → Dead Letter Queue
|
|
448
|
+
|
|
449
|
+
# Circuit breaker flow (threshold=3):
|
|
450
|
+
# After 3 permanent failures → CIRCUIT OPENS
|
|
451
|
+
# pop_job() returns None for all workers
|
|
452
|
+
# After timeout → HALF_OPEN → one job passes
|
|
453
|
+
# If success → CLOSED. If fail → OPEN again.
|
|
454
|
+
```
|
|
455
|
+
|
|
456
|
+
## Dead Letter Queue
|
|
457
|
+
|
|
458
|
+
Jobs that exhaust all retries or fail with `permanent=True` are moved to the `dead_letter_queue` table:
|
|
459
|
+
|
|
460
|
+
```python
|
|
461
|
+
# Inspect DLQ for a specific shard
|
|
462
|
+
dlq = queue.shard_manager.get_dead_letter_queue(shard_id=0)
|
|
463
|
+
|
|
464
|
+
for entry in dlq:
|
|
465
|
+
print(f"Job {entry['original_job_id']}: {entry['error']}")
|
|
466
|
+
```
|
|
467
|
+
|
|
468
|
+
## Orphan Recovery
|
|
469
|
+
|
|
470
|
+
A job stuck in `processing` status (worker died without completing or failing) is recovered:
|
|
471
|
+
|
|
472
|
+
```python
|
|
473
|
+
orphans = queue.recover_orphans()
|
|
474
|
+
print(f"Recovered {orphans} stuck jobs")
|
|
475
|
+
```
|
|
476
|
+
|
|
477
|
+
The recovery checks for jobs where `heartbeat` is older than `STUCK_TIMEOUT` (default 30s).
|
|
478
|
+
Recovered jobs are reset to `pending` status with `next_retry_at` set to now.
|
|
479
|
+
|
|
480
|
+
## Event System
|
|
481
|
+
|
|
482
|
+
### Built-in Callbacks (no extra dependencies)
|
|
483
|
+
|
|
484
|
+
```python
|
|
485
|
+
queue.on("job_enqueued", lambda job_id, shard_id: print(f"Enqueued {job_id}"))
|
|
486
|
+
queue.on("job_completed", lambda job_id, shard_id: update_metrics())
|
|
487
|
+
queue.on("job_failed", lambda job_id, shard_id, error: alert(error))
|
|
488
|
+
queue.on("job_retried", lambda job_id, shard_id, error: log_retry(error))
|
|
489
|
+
queue.on("alert", lambda type, pending, threshold: notify(pending))
|
|
490
|
+
|
|
491
|
+
# Suppress events during batch
|
|
492
|
+
with queue.batch():
|
|
493
|
+
for item in items:
|
|
494
|
+
queue.enqueue(item)
|
|
495
|
+
```
|
|
496
|
+
|
|
497
|
+
Available events:
|
|
498
|
+
|
|
499
|
+
| Event | Payload | Trigger |
|
|
500
|
+
|-------|---------|--------|
|
|
501
|
+
| `job_enqueued` | `job_id`, `shard_id` | `enqueue()` |
|
|
502
|
+
| `job_completed` | `job_id`, `shard_id` | `complete_job()` |
|
|
503
|
+
| `job_failed` | `job_id`, `shard_id`, `error` | `fail_job(permanent=True)` |
|
|
504
|
+
| `job_retried` | `job_id`, `shard_id`, `error` | `fail_job()` with retry |
|
|
505
|
+
| `alert` | `type`, `pending`, `threshold` | Pending > `QUEUE_ALERT_THRESHOLD` |
|
|
506
|
+
|
|
507
|
+
### Typed Events with bubus (optional)
|
|
508
|
+
|
|
509
|
+
Requires: `pip install queue-max[events]`
|
|
510
|
+
|
|
511
|
+
Typed Pydantic events with async dispatch, event tree debugging, `expect()` for waiting,
|
|
512
|
+
and pattern matching:
|
|
513
|
+
|
|
514
|
+
```python
|
|
515
|
+
from queue_max.contrib.events import QueueEventBus, JobCompleted, JobFailed
|
|
516
|
+
|
|
517
|
+
queue = Queue(shards=3)
|
|
518
|
+
events = QueueEventBus(queue)
|
|
519
|
+
|
|
520
|
+
# Typed handler
|
|
521
|
+
@events.on(JobCompleted)
|
|
522
|
+
def handle(event: JobCompleted) -> None:
|
|
523
|
+
print(f"Job {event.job_id} done in shard {event.shard_id}")
|
|
524
|
+
|
|
525
|
+
# Wildcard handler
|
|
526
|
+
@events.on("job_*")
|
|
527
|
+
def log_all(event):
|
|
528
|
+
print(f"{type(event).__name__}: {event.model_dump()}")
|
|
529
|
+
|
|
530
|
+
# Wait for a specific event (blocks until received)
|
|
531
|
+
failed = events.expect(JobFailed, timeout=30)
|
|
532
|
+
print(f"Job {failed.job_id} failed: {failed.error}")
|
|
533
|
+
|
|
534
|
+
# Event tree for debugging causality
|
|
535
|
+
print(events.log_tree())
|
|
536
|
+
|
|
537
|
+
# Metrics
|
|
538
|
+
metrics = events.get_metrics()
|
|
539
|
+
|
|
540
|
+
# Context manager
|
|
541
|
+
with QueueEventBus(queue) as events:
|
|
542
|
+
@events.on(JobCompleted)
|
|
543
|
+
def handler(event): ...
|
|
544
|
+
|
|
545
|
+
# Available typed events:
|
|
546
|
+
# JobEnqueued — job_id, shard_id
|
|
547
|
+
# JobCompleted — job_id, shard_id
|
|
548
|
+
# JobFailed — job_id, shard_id, error
|
|
549
|
+
# JobRetried — job_id, shard_id, error
|
|
550
|
+
# Alert — alert_type, pending, threshold
|
|
551
|
+
```
|
|
552
|
+
|
|
553
|
+
## CLI
|
|
554
|
+
|
|
555
|
+
```bash
|
|
556
|
+
# Stats
|
|
557
|
+
queue-max stats
|
|
558
|
+
queue-max stats --json --shard 0
|
|
559
|
+
|
|
560
|
+
# Start workers
|
|
561
|
+
queue-max worker --function mymodule:myfunction --workers 4
|
|
562
|
+
|
|
563
|
+
# Enqueue
|
|
564
|
+
queue-max enqueue --payload '{"task":"test"}' --priority 2 --json
|
|
565
|
+
|
|
566
|
+
# List & manage
|
|
567
|
+
queue-max list --status failed --limit 20
|
|
568
|
+
queue-max list --status processing
|
|
569
|
+
queue-max retry
|
|
570
|
+
queue-max retry --shard 0 --job-id 42
|
|
571
|
+
queue-max purge --days 7
|
|
572
|
+
```
|
|
573
|
+
|
|
574
|
+
## Framework Integrations
|
|
575
|
+
|
|
576
|
+
### Django
|
|
577
|
+
|
|
578
|
+
```python
|
|
579
|
+
# settings.py
|
|
580
|
+
INSTALLED_APPS = ["queue_max.contrib.django", ...]
|
|
581
|
+
QUEUE_MAX = {"SHARDS": 4, "RATE_LIMIT": 160}
|
|
582
|
+
|
|
583
|
+
# tasks.py
|
|
584
|
+
from queue_max.contrib.django import task
|
|
585
|
+
|
|
586
|
+
@task
|
|
587
|
+
def my_task(user_id):
|
|
588
|
+
...
|
|
589
|
+
```
|
|
590
|
+
|
|
591
|
+
Management commands: `python manage.py queue_worker`, `queue_stats`, `queue_purge`.
|
|
592
|
+
|
|
593
|
+
### FastAPI
|
|
594
|
+
|
|
595
|
+
```python
|
|
596
|
+
from fastapi import FastAPI
|
|
597
|
+
from queue_max.contrib.fastapi import QueueMiddleware
|
|
598
|
+
|
|
599
|
+
app = FastAPI()
|
|
600
|
+
app.add_middleware(QueueMiddleware, max_workers=4)
|
|
601
|
+
```
|
|
602
|
+
|
|
603
|
+
### Flask
|
|
604
|
+
|
|
605
|
+
```python
|
|
606
|
+
from flask import Flask
|
|
607
|
+
from queue_max.contrib.flask import QueueExtension
|
|
608
|
+
|
|
609
|
+
app = Flask(__name__)
|
|
610
|
+
queue = QueueExtension(app)
|
|
611
|
+
|
|
612
|
+
@queue.task
|
|
613
|
+
def my_task():
|
|
614
|
+
...
|
|
615
|
+
```
|
|
616
|
+
|
|
617
|
+
## Database Schema
|
|
618
|
+
|
|
619
|
+
Each shard is a separate `.db` file (`data/shard_0.db`, `data/shard_1.db`, ...) with WAL mode.
|
|
620
|
+
|
|
621
|
+
### `fila` table (the queue)
|
|
622
|
+
|
|
623
|
+
| Column | Type | Description |
|
|
624
|
+
|--------|------|-------------|
|
|
625
|
+
| `id` | INTEGER PK | Auto-increment job ID |
|
|
626
|
+
| `pagina_id` | INTEGER NULL | Optional ID for consistent shard routing |
|
|
627
|
+
| `payload` | TEXT | JSON-serialized job data |
|
|
628
|
+
| `status` | TEXT | pending, processing, completed, failed, cancelled, scheduled |
|
|
629
|
+
| `priority` | INTEGER | 0=low, 1=medium, 2=high |
|
|
630
|
+
| `tentativas` | INTEGER | Attempt counter |
|
|
631
|
+
| `max_tentativas` | INTEGER | Max attempts allowed = max_retries + 1 |
|
|
632
|
+
| `last_error` | TEXT NULL | Error message |
|
|
633
|
+
| `error_type` | TEXT NULL | Exception class name |
|
|
634
|
+
| `error_stack` | TEXT NULL | Full traceback |
|
|
635
|
+
| `worker_id` | TEXT NULL | Currently processing worker |
|
|
636
|
+
| `heartbeat` | TEXT NULL | ISO timestamp of last activity |
|
|
637
|
+
| `created_at` | TEXT | Creation timestamp |
|
|
638
|
+
| `started_at` | TEXT NULL | Processing start timestamp |
|
|
639
|
+
| `completed_at` | TEXT NULL | Completion/failure timestamp |
|
|
640
|
+
| `next_retry_at` | TEXT NULL | Scheduled retry timestamp |
|
|
641
|
+
|
|
642
|
+
### `dead_letter_queue` table
|
|
643
|
+
|
|
644
|
+
| Column | Type | Description |
|
|
645
|
+
|--------|------|-------------|
|
|
646
|
+
| `id` | INTEGER PK | Row ID |
|
|
647
|
+
| `original_job_id` | INTEGER | Reference to original fila.id |
|
|
648
|
+
| `payload` | TEXT | Original job payload |
|
|
649
|
+
| `error` | TEXT | Error message |
|
|
650
|
+
| `error_type` | TEXT | Exception type |
|
|
651
|
+
| `failed_at` | TEXT | Failure timestamp |
|
|
652
|
+
| `shard_id` | INTEGER | Originating shard |
|
|
653
|
+
|
|
654
|
+
### `shard_metadata` table
|
|
655
|
+
|
|
656
|
+
Per-shard statistics: version, created_at, last_vacuum, total_jobs_processed, total_jobs_failed.
|
|
657
|
+
|
|
658
|
+
## Performance
|
|
659
|
+
|
|
660
|
+
| Scenario | Config | Throughput |
|
|
661
|
+
|----------|--------|-----------|
|
|
662
|
+
| Burst | 20 workers, 10 shards | **~3.300 jobs/sec** |
|
|
663
|
+
| Contention | 10 workers, 1 shard | **~1.660 jobs/sec** |
|
|
664
|
+
| 30% failure | 8 workers, max_retries=2 | **Stable** |
|
|
665
|
+
| 50k jobs | 12 workers, 12 shards | **219 jobs/sec** (enqueue bottleneck) |
|
|
666
|
+
|
|
667
|
+
- Max queue size: 1M+ jobs per shard
|
|
668
|
+
- [Detailed stress test results](docs/stress-test.md)
|
|
669
|
+
|
|
670
|
+
## Running Tests
|
|
671
|
+
|
|
672
|
+
```bash
|
|
673
|
+
pip install -e ".[all]"
|
|
674
|
+
PYTHONPATH=src pytest tests/ -v
|
|
675
|
+
```
|
|
676
|
+
|
|
677
|
+
## License
|
|
678
|
+
|
|
679
|
+
MIT
|