queue-max 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {queue_max-0.1.0 → queue_max-0.1.1}/LICENSE +1 -1
  2. queue_max-0.1.1/PKG-INFO +679 -0
  3. queue_max-0.1.1/README.md +636 -0
  4. {queue_max-0.1.0 → queue_max-0.1.1}/examples/ai_rate_limited.py +1 -1
  5. {queue_max-0.1.0 → queue_max-0.1.1}/examples/basic_usage.py +1 -1
  6. {queue_max-0.1.0 → queue_max-0.1.1}/examples/webhook_processor.py +1 -1
  7. {queue_max-0.1.0 → queue_max-0.1.1}/pyproject.toml +3 -1
  8. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/__init__.py +1 -1
  9. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/cli.py +14 -15
  10. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/contrib/__init__.py +1 -1
  11. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/contrib/django/__init__.py +1 -1
  12. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/contrib/django/management/commands/queue_purge.py +1 -1
  13. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/contrib/django/management/commands/queue_stats.py +3 -3
  14. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/contrib/django/management/commands/queue_worker.py +2 -2
  15. queue_max-0.1.1/src/queue_max/contrib/events.py +313 -0
  16. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/contrib/fastapi/__init__.py +7 -7
  17. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/contrib/flask/__init__.py +4 -4
  18. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/core/__init__.py +1 -1
  19. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/core/circuit_breaker.py +21 -5
  20. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/core/database.py +113 -31
  21. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/core/decorator.py +16 -16
  22. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/core/queue.py +143 -55
  23. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/core/rate_limiter.py +2 -2
  24. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/core/worker.py +54 -39
  25. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/exceptions.py +1 -1
  26. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/models/__init__.py +1 -1
  27. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/models/job.py +11 -13
  28. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/utils/__init__.py +1 -1
  29. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/utils/helpers.py +3 -3
  30. queue_max-0.1.1/src/queue_max.egg-info/PKG-INFO +679 -0
  31. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max.egg-info/SOURCES.txt +1 -0
  32. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max.egg-info/requires.txt +8 -0
  33. queue_max-0.1.0/PKG-INFO +0 -233
  34. queue_max-0.1.0/README.md +0 -193
  35. queue_max-0.1.0/src/queue_max.egg-info/PKG-INFO +0 -233
  36. {queue_max-0.1.0 → queue_max-0.1.1}/MANIFEST.in +0 -0
  37. {queue_max-0.1.0 → queue_max-0.1.1}/setup.cfg +0 -0
  38. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/contrib/django/management/__init__.py +0 -0
  39. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/contrib/django/management/commands/__init__.py +0 -0
  40. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max/py.typed +0 -0
  41. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max.egg-info/dependency_links.txt +0 -0
  42. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max.egg-info/entry_points.txt +0 -0
  43. {queue_max-0.1.0 → queue_max-0.1.1}/src/queue_max.egg-info/top_level.txt +0 -0
  44. {queue_max-0.1.0 → queue_max-0.1.1}/tests/__init__.py +0 -0
  45. {queue_max-0.1.0 → queue_max-0.1.1}/tests/conftest.py +0 -0
  46. {queue_max-0.1.0 → queue_max-0.1.1}/tests/test_circuit_breaker.py +0 -0
  47. {queue_max-0.1.0 → queue_max-0.1.1}/tests/test_concurrency.py +0 -0
  48. {queue_max-0.1.0 → queue_max-0.1.1}/tests/test_helpers.py +0 -0
  49. {queue_max-0.1.0 → queue_max-0.1.1}/tests/test_integration.py +0 -0
  50. {queue_max-0.1.0 → queue_max-0.1.1}/tests/test_queue.py +0 -0
  51. {queue_max-0.1.0 → queue_max-0.1.1}/tests/test_rate_limiter.py +0 -0
  52. {queue_max-0.1.0 → queue_max-0.1.1}/tests/test_worker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2024 Robusta Queue Contributors
3
+ Copyright (c) 2024 All451
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -0,0 +1,679 @@
1
+ Metadata-Version: 2.4
2
+ Name: queue-max
3
+ Version: 0.1.1
4
+ Summary: Task queue with SQLite sharding, rate limiting, and circuit breaker
5
+ Author: Alexandre All
6
+ License: MIT
7
+ Project-URL: homepage, https://github.com/all451/queue-max
8
+ Project-URL: repository, https://github.com/all451/queue-max
9
+ Keywords: queue,task-queue,sqlite,background-tasks,worker,sharding
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Operating System :: OS Independent
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Classifier: Topic :: System :: Distributed Computing
22
+ Requires-Python: >=3.9
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: typing-extensions>=4.5.0
26
+ Provides-Extra: events
27
+ Requires-Dist: bubus>=1.5.0; python_version >= "3.11" and extra == "events"
28
+ Provides-Extra: webhook
29
+ Requires-Dist: requests>=2.31.0; extra == "webhook"
30
+ Provides-Extra: django
31
+ Requires-Dist: Django>=3.2; extra == "django"
32
+ Provides-Extra: fastapi
33
+ Requires-Dist: fastapi>=0.100.0; extra == "fastapi"
34
+ Provides-Extra: flask
35
+ Requires-Dist: Flask>=2.0; extra == "flask"
36
+ Provides-Extra: all
37
+ Requires-Dist: bubus>=1.5.0; python_version >= "3.11" and extra == "all"
38
+ Requires-Dist: requests>=2.31.0; extra == "all"
39
+ Requires-Dist: Django>=3.2; extra == "all"
40
+ Requires-Dist: fastapi>=0.100.0; extra == "all"
41
+ Requires-Dist: Flask>=2.0; extra == "all"
42
+ Dynamic: license-file
43
+
44
+ # Queue Max
45
+
46
+ Task queue library with SQLite persistence, sharding, rate limiting, and circuit breaker.
47
+
48
+ No Redis or RabbitMQ required. **Zero external dependencies** (except `typing-extensions`).
49
+
50
+ ```python
51
+ from queue_max import Queue, Worker
52
+
53
+ queue = Queue()
54
+ queue.enqueue({"task": "send_email", "to": "user@example.com"})
55
+
56
+ Worker("worker-1", lambda p: print(p), queue).start()
57
+ ```
58
+
59
+ ## Installation
60
+
61
+ ```bash
62
+ pip install queue-max
63
+ ```
64
+
65
+ Optional extras:
66
+
67
+ ```bash
68
+ pip install queue-max[events] # Typed events with bubus + Pydantic
69
+ pip install queue-max[django] # Django management commands
70
+ pip install queue-max[fastapi] # FastAPI middleware
71
+ pip install queue-max[flask] # Flask extension
72
+ ```
73
+
74
+ ## Architecture
75
+
76
+ ```
77
+ ┌──────────────────────────────────────────────┐
78
+ │ Queue │
79
+ │ ┌────────────┐ ┌──────────┐ ┌──────────┐ │
80
+ │ │ Shard Mgr │ │RateLimit │ │ Circuit │ │
81
+ │ │ (SQLite) │ │ Token │ │ Breaker │ │
82
+ │ └─────┬──────┘ └──────────┘ └──────────┘ │
83
+ └────────┼─────────────────────────────────────┘
84
+
85
+ ┌───────────────────┼───────────────────┐
86
+ ▼ ▼ ▼
87
+ ┌──────────┐ ┌──────────┐ ┌──────────┐
88
+ │ shard_0 │ │ shard_1 │ │ shard_N │
89
+ │ fila.db │ │ fila.db │ │ fila.db │
90
+ │ WAL │ │ WAL │ │ WAL │
91
+ └──────────┘ └──────────┘ └──────────┘
92
+ │ │ │
93
+ └───────────┬───────┴───────┬───────────┘
94
+ ▼ ▼
95
+ ┌──────────┐ ┌──────────┐
96
+ │ Worker │ │ Worker │ ...
97
+ │ (thread) │ │ (thread) │
98
+ └──────────┘ └──────────┘
99
+ ```
100
+
101
+ Each shard is an independent SQLite file in WAL mode. Workers scan shard groups in random order to distribute load.
102
+
103
+ ## Features
104
+
105
+ **SQLite Persistence** — Jobs are stored in SQLite with WAL mode. No external services needed.
106
+
107
+ **Physical Sharding** — Multiple `.db` files (default 6) allow true concurrent write access. Each shard is an independent database with its own lock.
108
+
109
+ **Rate Limiting** — Token bucket algorithm shared across all workers. Configurable per minute/second/hour.
110
+
111
+ **Circuit Breaker** — Three-state (CLOSED → OPEN → HALF_OPEN → CLOSED). Automatically tracks job failures from `Queue.fail_job()` and resets on `Queue.complete_job()`.
112
+
113
+ **Retry with Backoff** — Exponential backoff (base × 2^(n-1)) with ±20% jitter. Transient errors (5xx, timeout, 429) retry automatically. Client errors (4xx except 429) are permanent.
114
+
115
+ **Dead Letter Queue** — Jobs that exhaust all retries are moved to a DLQ table for inspection.
116
+
117
+ **Heartbeat and Orphan Recovery** — Workers send heartbeats every 5s. Jobs with stale heartbeats (>30s) are recovered as pending.
118
+
119
+ **Priority Queues** — Three levels: low (0), medium (1), high (2). Ordering is per-shard.
120
+
121
+ **Event System** — Simple callbacks built-in. Optional typed events with bubus + Pydantic (`queue-max[events]`).
122
+
123
+ **CLI** — Built-in commands for stats, workers, enqueue, retry, purge, and listing.
124
+
125
+ ## Quick Start
126
+
127
+ ### Basic enqueue + worker
128
+
129
+ ```python
130
+ from queue_max import Queue, Worker
131
+ import time
132
+
133
+ queue = Queue(shards=3, rate_limit=100)
134
+
135
+ def process(payload: dict) -> str:
136
+ print(f"Processing: {payload}")
137
+ return "done"
138
+
139
+ # Enqueue jobs
140
+ for i in range(5):
141
+ queue.enqueue({"task": f"job-{i}"}, priority=i % 3)
142
+
143
+ # Process with a worker
144
+ worker = Worker("example", process, queue, poll_interval=0.1)
145
+ worker.start()
146
+ time.sleep(3)
147
+ worker.stop()
148
+
149
+ stats = queue.get_stats()
150
+ print(f"Stats: {stats}")
151
+ ```
152
+
153
+ ### With decorator
154
+
155
+ ```python
156
+ from queue_max import task
157
+
158
+ @task(priority=2, max_retries=3)
159
+ def send_email(to: str, subject: str):
160
+ return send(to, subject)
161
+
162
+ # Enqueue for background processing
163
+ send_email.delay("user@example.com", "Hello")
164
+
165
+ # Schedule for later
166
+ from datetime import datetime, timezone, timedelta
167
+ future = datetime.now(timezone.utc) + timedelta(minutes=5)
168
+ send_email.schedule_at(future, "user@example.com", "Scheduled!")
169
+
170
+ # Parallel processing
171
+ send_email.map(["a@b.com", "c@d.com"], "Welcome!")
172
+ ```
173
+
174
+ ## Configuration
175
+
176
+ | Variable | Default | Description |
177
+ |----------|---------|-------------|
178
+ | `NUM_SHARDS` | 6 | Number of shard databases |
179
+ | `RATE_LIMIT_MAX` | 160 | Max requests per minute |
180
+ | `QUEUE_MAX_RETRIES` | 3 | Max retry attempts after first failure |
181
+ | `CIRCUIT_FAILURE_THRESHOLD` | 5 | Consecutive failures before circuit opens |
182
+ | `CIRCUIT_TIMEOUT` | 60 | Seconds before circuit recovery attempt |
183
+ | `DB_BUSY_TIMEOUT` | 30000 | SQLite busy timeout (ms) |
184
+ | `DATA_DIR` | ./data | Directory for shard databases |
185
+ | `CACHE_SIZE` | 10000 | SQLite cache size (pages) |
186
+ | `MMAP_SIZE` | 268435456 | Memory-mapped I/O size (bytes) |
187
+ | `HEARTBEAT_INTERVAL` | 5000 | Worker heartbeat interval (ms) |
188
+ | `STUCK_TIMEOUT` | 30000 | Orphan job timeout (ms) |
189
+ | `RECOVERY_INTERVAL` | 10000 | Orphan recovery check interval (ms) |
190
+ | `CLEANUP_DAYS` | 7 | Age threshold for job cleanup |
191
+ | `QUEUE_ALERT_THRESHOLD` | 1000 | Pending jobs before alert event |
192
+ | `QUEUE_MAX_LOG_LEVEL` | WARNING | Log level |
193
+
194
+ Full list in [.env.example](.env.example).
195
+
196
+ ## API Reference
197
+
198
+ ### Queue
199
+
200
+ ```python
201
+ from queue_max import Queue
202
+
203
+ queue = Queue(
204
+ shards=None, # Number of shards (default: NUM_SHARDS or 6)
205
+ rate_limit=None, # Requests per minute (default: RATE_LIMIT_MAX or 160)
206
+ max_retries=None, # Max retries after first failure (default: QUEUE_MAX_RETRIES or 3)
207
+ data_dir=None, # Directory for shard files (default: DATA_DIR or ./data)
208
+ circuit_breaker_threshold=None, # Failures before circuit opens (default: 5)
209
+ circuit_breaker_timeout=None, # Seconds before recovery (default: 60)
210
+ rate_limiter_timeout=5.0, # Seconds to wait for rate limit token
211
+ )
212
+
213
+ # ── Enqueue ──
214
+ queue.enqueue(payload, pagina_id=None, priority=0, max_retries=None)
215
+ queue.enqueue_batch([{"payload": {...}, "pagina_id": 1, "priority": 2}, ...])
216
+ queue.enqueue_from_file("jobs.jsonl", fmt="jsonl")
217
+
218
+ # ── Process ──
219
+ job = queue.pop_job(worker_id) # Returns Job or None
220
+ queue.complete_job(job_id, shard_id)
221
+ queue.fail_job(job_id, shard_id, error, permanent=False)
222
+
223
+ # ── Management ──
224
+ queue.retry_failed_jobs(shard_id=None)
225
+ queue.cleanup_old_jobs(days=7)
226
+ queue.purge_queue(status=None) # "pending", "failed", "processing", or None (all)
227
+ queue.recover_orphans()
228
+ queue.heartbeat(shard_id, worker_id)
229
+ queue.wait_until_empty(timeout=None)
230
+ queue.get_failed_jobs(limit=100)
231
+ queue.get_processing_jobs()
232
+ stats = queue.get_stats()
233
+
234
+ # ── Events ──
235
+ queue.on("job_completed", lambda job_id, shard_id: print(f"Done: {job_id}"))
236
+ queue.on("alert", lambda type, pending, threshold: print(f"Alert: {pending}"))
237
+
238
+ # ── Context manager (auto-close) ──
239
+ with Queue() as queue:
240
+ queue.enqueue({"task": "example"})
241
+ ```
242
+
243
+ ### Queue Stats
244
+
245
+ ```python
246
+ stats = queue.get_stats()
247
+ # {
248
+ # "pending": int,
249
+ # "processing": int,
250
+ # "failed": int,
251
+ # "num_shards": int,
252
+ # "rate_limit": int,
253
+ # "max_retries": int,
254
+ # "circuit_state": "closed" | "open" | "half_open",
255
+ # "circuit_failures": int,
256
+ # "tokens_available": float,
257
+ # "uptime_seconds": float,
258
+ # "is_healthy": bool,
259
+ # }
260
+ ```
261
+
262
+ ### Job
263
+
264
+ ```python
265
+ from queue_max import Job, JobStatus, JobPriority
266
+
267
+ # Job properties
268
+ job.id # int — job ID (unique per shard)
269
+ job.payload # dict — job data
270
+ job.status # JobStatus — PENDING, PROCESSING, COMPLETED, FAILED, CANCELLED
271
+ job.priority_int # int — 0, 1, or 2
272
+ job.shard_id # int — which shard holds this job
273
+ job.tentativas # int — attempt count
274
+ job.max_tentativas # int — max attempts allowed = max_retries + 1
275
+ job.last_error # str or None
276
+ job.error_type # str or None
277
+ job.worker_id # str or None
278
+ job.created_at # ISO timestamp
279
+ job.next_retry_at # ISO timestamp or None (retry scheduled for the future)
280
+ job.started_at # ISO timestamp or None
281
+ job.completed_at # ISO timestamp or None
282
+
283
+ # Convenience checks
284
+ job.is_pending # bool
285
+ job.is_processing # bool
286
+ job.is_completed # bool
287
+ job.is_failed # bool
288
+ job.is_cancelled # bool
289
+ job.is_terminal # bool — completed, failed, or cancelled
290
+ job.can_retry # bool — tentativas < max_tentativas
291
+ job.remaining_retries # int
292
+ job.age_seconds # float or None
293
+ job.processing_time_seconds # float or None
294
+ ```
295
+
296
+ ### Worker
297
+
298
+ ```python
299
+ from queue_max import Worker, AsyncWorker, WorkerPool
300
+
301
+ # Basic worker
302
+ worker = Worker(
303
+ worker_id="worker-1",
304
+ process_function=my_func, # Callable[[dict], Any]
305
+ queue=queue,
306
+ poll_interval=1.0, # Seconds between polls when queue is empty
307
+ job_timeout=None, # Max seconds per job execution
308
+ on_job_start=None, # Callback(worker_id, job_id, payload)
309
+ on_job_complete=None, # Callback(worker_id, job_id, result)
310
+ on_job_error=None, # Callback(worker_id, job_id, error, permanent)
311
+ )
312
+ worker.start()
313
+ worker.stop(timeout=10.0)
314
+
315
+ stats = worker.get_stats()
316
+ # {
317
+ # "worker_id": str,
318
+ # "state": "running" | "stopped" | ...,
319
+ # "is_running": bool,
320
+ # "processed": int,
321
+ # "failed": int,
322
+ # "retried": int,
323
+ # "throughput_jobs_per_hour": float,
324
+ # "uptime_seconds": float,
325
+ # "current_job_id": int or None,
326
+ # }
327
+
328
+ # Async worker (for coroutines)
329
+ async def async_process(payload):
330
+ await some_api(payload)
331
+ return "ok"
332
+
333
+ async_worker = AsyncWorker("async-1", async_process, queue)
334
+ async_worker.start()
335
+
336
+ # Worker pool with auto-scaling
337
+ pool = WorkerPool(
338
+ workers=[worker1, worker2],
339
+ auto_scale=True,
340
+ min_workers=2,
341
+ max_workers=20,
342
+ scale_up_threshold=100, # Add worker when pending > 100
343
+ scale_down_threshold=10, # Remove worker when pending < 10
344
+ scale_check_interval=60, # Check every 60s
345
+ )
346
+ pool.start_all()
347
+ pool.stop_all()
348
+ pool.wait_for_idle(timeout=30)
349
+ ```
350
+
351
+ ### Rate Limiter
352
+
353
+ ```python
354
+ from queue_max import RateLimiter, RateLimitUnit
355
+
356
+ limiter = RateLimiter(rate_limit=10, unit=RateLimitUnit.PER_SECOND)
357
+ limiter.acquire(timeout=5.0) # Blocks until token available, raises RateLimitError
358
+ limiter.try_acquire() # Non-blocking, returns bool
359
+ limiter.get_remaining_tokens() # float
360
+ limiter.get_retry_after() # float — seconds until next token
361
+ limiter.update_rate_limit(20) # Change limit dynamically
362
+ limiter.reset()
363
+ limiter.get_stats()
364
+ ```
365
+
366
+ ### Circuit Breaker
367
+
368
+ ```python
369
+ from queue_max import CircuitBreaker
370
+
371
+ cb = CircuitBreaker(failure_threshold=5, recovery_timeout=60.0)
372
+ cb.is_allowed() # Check if request can pass through
373
+ cb.call(func) # Execute with CB protection (raises CircuitBreakerOpenError)
374
+ cb.record_success() # Reset failure count (called automatically by Queue.complete_job)
375
+ cb.record_failure() # Increment failure count (called automatically by Queue.fail_job)
376
+ cb.state # CircuitState: CLOSED, OPEN, or HALF_OPEN
377
+ cb.reset()
378
+ cb.get_stats()
379
+ ```
380
+
381
+ ### Decorators
382
+
383
+ ```python
384
+ from queue_max import task, periodic_task, retryable_task
385
+
386
+ @task(priority=2, max_retries=5, timeout=30)
387
+ def process_order(order_id: int):
388
+ """.delay() enqueues, direct call executes synchronously."""
389
+ ...
390
+
391
+ process_order.delay(42) # Background
392
+ process_order.schedule_in(300, 42) # In 5 minutes
393
+ process_order.schedule_at(datetime(...), 42) # At specific time
394
+ process_order.map([101, 102, 103], coupon=1) # Parallel
395
+ process_order.get_stats() # Task statistics
396
+
397
+ @periodic_task(interval=3600, priority=1)
398
+ def cleanup_old_data():
399
+ """Auto-runs every hour."""
400
+ ...
401
+
402
+ cleanup_old_data.start_scheduler() # Starts daemon thread
403
+
404
+ @retryable_task(max_retries=5, retry_on=[TimeoutError])
405
+ def fetch_external_data(url: str):
406
+ """Sync retry wrapper. .delay() still enqueues async."""
407
+ ...
408
+ ```
409
+
410
+ ## Retry & Failure Flow
411
+
412
+ ### Error Classification
413
+
414
+ When a job fails, `is_retryable_error()` classifies the error:
415
+
416
+ | Error | Retryable? | Behavior |
417
+ |-------|-----------|----------|
418
+ | 4xx (except 429) | No | Permanent → Dead Letter Queue |
419
+ | 429 Too Many Requests | Yes | Retry with backoff |
420
+ | 5xx Server Error | Yes | Retry with backoff |
421
+ | Timeout | Yes | Retry with backoff |
422
+ | ConnectionError | Yes | Retry with backoff |
423
+
424
+ ### Retry Mechanics
425
+
426
+ - **Backoff formula**: `base × 2^(attempt-1)` ± 20% jitter, capped at 3600s
427
+ - `max_retries=N` means N retries = N+1 total attempts
428
+ - Each attempt increments `tentativas` in the database
429
+ - After exhausting retries, the job moves to the Dead Letter Queue
430
+
431
+ ### Circuit Breaker Integration
432
+
433
+ The circuit breaker is wired into `Queue.complete_job()` and `Queue.fail_job()`:
434
+
435
+ 1. `fail_job()` — calls `circuit_breaker.record_failure()` (increments counter)
436
+ 2. `complete_job()` — calls `circuit_breaker.record_success()` (resets counter)
437
+ 3. After N consecutive failures (default 5), circuit opens → `pop_job()` returns `None`
438
+ 4. After recovery timeout, next `pop_job()` transitions to HALF_OPEN
439
+ 5. If the next job succeeds → CLOSED. If it fails → OPEN again.
440
+
441
+ ```python
442
+ queue = Queue(max_retries=2, circuit_breaker_threshold=3)
443
+
444
+ # Retry flow (max_retries=2 → 3 total attempts):
445
+ # Attempt 1 → fail → retry (backoff ~120s)
446
+ # Attempt 2 → fail → retry (backoff ~240s)
447
+ # Attempt 3 → fail → Dead Letter Queue
448
+
449
+ # Circuit breaker flow (threshold=3):
450
+ # After 3 permanent failures → CIRCUIT OPENS
451
+ # pop_job() returns None for all workers
452
+ # After timeout → HALF_OPEN → one job passes
453
+ # If success → CLOSED. If fail → OPEN again.
454
+ ```
455
+
456
+ ## Dead Letter Queue
457
+
458
+ Jobs that exhaust all retries or fail with `permanent=True` are moved to the `dead_letter_queue` table:
459
+
460
+ ```python
461
+ # Inspect DLQ for a specific shard
462
+ dlq = queue.shard_manager.get_dead_letter_queue(shard_id=0)
463
+
464
+ for entry in dlq:
465
+ print(f"Job {entry['original_job_id']}: {entry['error']}")
466
+ ```
467
+
468
+ ## Orphan Recovery
469
+
470
+ A job stuck in `processing` status (worker died without completing or failing) is recovered:
471
+
472
+ ```python
473
+ orphans = queue.recover_orphans()
474
+ print(f"Recovered {orphans} stuck jobs")
475
+ ```
476
+
477
+ The recovery checks for jobs where `heartbeat` is older than `STUCK_TIMEOUT` (default 30s).
478
+ Recovered jobs are reset to `pending` status with `next_retry_at` set to now.
479
+
480
+ ## Event System
481
+
482
+ ### Built-in Callbacks (no extra dependencies)
483
+
484
+ ```python
485
+ queue.on("job_enqueued", lambda job_id, shard_id: print(f"Enqueued {job_id}"))
486
+ queue.on("job_completed", lambda job_id, shard_id: update_metrics())
487
+ queue.on("job_failed", lambda job_id, shard_id, error: alert(error))
488
+ queue.on("job_retried", lambda job_id, shard_id, error: log_retry(error))
489
+ queue.on("alert", lambda type, pending, threshold: notify(pending))
490
+
491
+ # Suppress events during batch
492
+ with queue.batch():
493
+ for item in items:
494
+ queue.enqueue(item)
495
+ ```
496
+
497
+ Available events:
498
+
499
+ | Event | Payload | Trigger |
500
+ |-------|---------|--------|
501
+ | `job_enqueued` | `job_id`, `shard_id` | `enqueue()` |
502
+ | `job_completed` | `job_id`, `shard_id` | `complete_job()` |
503
+ | `job_failed` | `job_id`, `shard_id`, `error` | `fail_job(permanent=True)` |
504
+ | `job_retried` | `job_id`, `shard_id`, `error` | `fail_job()` with retry |
505
+ | `alert` | `type`, `pending`, `threshold` | Pending > `QUEUE_ALERT_THRESHOLD` |
506
+
507
+ ### Typed Events with bubus (optional)
508
+
509
+ Requires: `pip install queue-max[events]`
510
+
511
+ Typed Pydantic events with async dispatch, event tree debugging, `expect()` for waiting,
512
+ and pattern matching:
513
+
514
+ ```python
515
+ from queue_max.contrib.events import QueueEventBus, JobCompleted, JobFailed
516
+
517
+ queue = Queue(shards=3)
518
+ events = QueueEventBus(queue)
519
+
520
+ # Typed handler
521
+ @events.on(JobCompleted)
522
+ def handle(event: JobCompleted) -> None:
523
+ print(f"Job {event.job_id} done in shard {event.shard_id}")
524
+
525
+ # Wildcard handler
526
+ @events.on("job_*")
527
+ def log_all(event):
528
+ print(f"{type(event).__name__}: {event.model_dump()}")
529
+
530
+ # Wait for a specific event (blocks until received)
531
+ failed = events.expect(JobFailed, timeout=30)
532
+ print(f"Job {failed.job_id} failed: {failed.error}")
533
+
534
+ # Event tree for debugging causality
535
+ print(events.log_tree())
536
+
537
+ # Metrics
538
+ metrics = events.get_metrics()
539
+
540
+ # Context manager
541
+ with QueueEventBus(queue) as events:
542
+ @events.on(JobCompleted)
543
+ def handler(event): ...
544
+
545
+ # Available typed events:
546
+ # JobEnqueued — job_id, shard_id
547
+ # JobCompleted — job_id, shard_id
548
+ # JobFailed — job_id, shard_id, error
549
+ # JobRetried — job_id, shard_id, error
550
+ # Alert — alert_type, pending, threshold
551
+ ```
552
+
553
+ ## CLI
554
+
555
+ ```bash
556
+ # Stats
557
+ queue-max stats
558
+ queue-max stats --json --shard 0
559
+
560
+ # Start workers
561
+ queue-max worker --function mymodule:myfunction --workers 4
562
+
563
+ # Enqueue
564
+ queue-max enqueue --payload '{"task":"test"}' --priority 2 --json
565
+
566
+ # List & manage
567
+ queue-max list --status failed --limit 20
568
+ queue-max list --status processing
569
+ queue-max retry
570
+ queue-max retry --shard 0 --job-id 42
571
+ queue-max purge --days 7
572
+ ```
573
+
574
+ ## Framework Integrations
575
+
576
+ ### Django
577
+
578
+ ```python
579
+ # settings.py
580
+ INSTALLED_APPS = ["queue_max.contrib.django", ...]
581
+ QUEUE_MAX = {"SHARDS": 4, "RATE_LIMIT": 160}
582
+
583
+ # tasks.py
584
+ from queue_max.contrib.django import task
585
+
586
+ @task
587
+ def my_task(user_id):
588
+ ...
589
+ ```
590
+
591
+ Management commands: `python manage.py queue_worker`, `queue_stats`, `queue_purge`.
592
+
593
+ ### FastAPI
594
+
595
+ ```python
596
+ from fastapi import FastAPI
597
+ from queue_max.contrib.fastapi import QueueMiddleware
598
+
599
+ app = FastAPI()
600
+ app.add_middleware(QueueMiddleware, max_workers=4)
601
+ ```
602
+
603
+ ### Flask
604
+
605
+ ```python
606
+ from flask import Flask
607
+ from queue_max.contrib.flask import QueueExtension
608
+
609
+ app = Flask(__name__)
610
+ queue = QueueExtension(app)
611
+
612
+ @queue.task
613
+ def my_task():
614
+ ...
615
+ ```
616
+
617
+ ## Database Schema
618
+
619
+ Each shard is a separate `.db` file (`data/shard_0.db`, `data/shard_1.db`, ...) with WAL mode.
620
+
621
+ ### `fila` table (the queue)
622
+
623
+ | Column | Type | Description |
624
+ |--------|------|-------------|
625
+ | `id` | INTEGER PK | Auto-increment job ID |
626
+ | `pagina_id` | INTEGER NULL | Optional ID for consistent shard routing |
627
+ | `payload` | TEXT | JSON-serialized job data |
628
+ | `status` | TEXT | pending, processing, completed, failed, cancelled, scheduled |
629
+ | `priority` | INTEGER | 0=low, 1=medium, 2=high |
630
+ | `tentativas` | INTEGER | Attempt counter |
631
+ | `max_tentativas` | INTEGER | Max attempts allowed = max_retries + 1 |
632
+ | `last_error` | TEXT NULL | Error message |
633
+ | `error_type` | TEXT NULL | Exception class name |
634
+ | `error_stack` | TEXT NULL | Full traceback |
635
+ | `worker_id` | TEXT NULL | Currently processing worker |
636
+ | `heartbeat` | TEXT NULL | ISO timestamp of last activity |
637
+ | `created_at` | TEXT | Creation timestamp |
638
+ | `started_at` | TEXT NULL | Processing start timestamp |
639
+ | `completed_at` | TEXT NULL | Completion/failure timestamp |
640
+ | `next_retry_at` | TEXT NULL | Scheduled retry timestamp |
641
+
642
+ ### `dead_letter_queue` table
643
+
644
+ | Column | Type | Description |
645
+ |--------|------|-------------|
646
+ | `id` | INTEGER PK | Row ID |
647
+ | `original_job_id` | INTEGER | Reference to original fila.id |
648
+ | `payload` | TEXT | Original job payload |
649
+ | `error` | TEXT | Error message |
650
+ | `error_type` | TEXT | Exception type |
651
+ | `failed_at` | TEXT | Failure timestamp |
652
+ | `shard_id` | INTEGER | Originating shard |
653
+
654
+ ### `shard_metadata` table
655
+
656
+ Per-shard statistics: version, created_at, last_vacuum, total_jobs_processed, total_jobs_failed.
657
+
658
+ ## Performance
659
+
660
+ | Scenario | Config | Throughput |
661
+ |----------|--------|-----------|
662
+ | Burst | 20 workers, 10 shards | **~3.300 jobs/sec** |
663
+ | Contention | 10 workers, 1 shard | **~1.660 jobs/sec** |
664
+ | 30% failure | 8 workers, max_retries=2 | **Stable** |
665
+ | 50k jobs | 12 workers, 12 shards | **219 jobs/sec** (enqueue bottleneck) |
666
+
667
+ - Max queue size: 1M+ jobs per shard
668
+ - [Detailed stress test results](docs/stress-test.md)
669
+
670
+ ## Running Tests
671
+
672
+ ```bash
673
+ pip install -e ".[all]"
674
+ PYTHONPATH=src pytest tests/ -v
675
+ ```
676
+
677
+ ## License
678
+
679
+ MIT