rrq 0.5.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {rrq-0.5.0 → rrq-0.7.0}/.claude/settings.local.json +6 -1
  2. rrq-0.7.0/.github/workflows/ci.yml +37 -0
  3. {rrq-0.5.0 → rrq-0.7.0}/PKG-INFO +208 -25
  4. {rrq-0.5.0 → rrq-0.7.0}/README.md +204 -23
  5. rrq-0.7.0/docs/CLI_REFERENCE.md +240 -0
  6. {rrq-0.5.0 → rrq-0.7.0}/pyproject.toml +7 -2
  7. {rrq-0.5.0 → rrq-0.7.0}/rrq/cli.py +39 -64
  8. rrq-0.7.0/rrq/cli_commands/__init__.py +1 -0
  9. rrq-0.7.0/rrq/cli_commands/base.py +102 -0
  10. rrq-0.7.0/rrq/cli_commands/commands/__init__.py +1 -0
  11. rrq-0.7.0/rrq/cli_commands/commands/debug.py +551 -0
  12. rrq-0.7.0/rrq/cli_commands/commands/dlq.py +853 -0
  13. rrq-0.7.0/rrq/cli_commands/commands/jobs.py +516 -0
  14. rrq-0.7.0/rrq/cli_commands/commands/monitor.py +776 -0
  15. rrq-0.7.0/rrq/cli_commands/commands/queues.py +539 -0
  16. rrq-0.7.0/rrq/cli_commands/utils.py +161 -0
  17. {rrq-0.5.0 → rrq-0.7.0}/rrq/client.py +39 -35
  18. {rrq-0.5.0 → rrq-0.7.0}/rrq/constants.py +10 -0
  19. {rrq-0.5.0 → rrq-0.7.0}/rrq/cron.py +67 -8
  20. rrq-0.7.0/rrq/hooks.py +217 -0
  21. {rrq-0.5.0 → rrq-0.7.0}/rrq/job.py +5 -5
  22. {rrq-0.5.0 → rrq-0.7.0}/rrq/registry.py +0 -3
  23. {rrq-0.5.0 → rrq-0.7.0}/rrq/settings.py +13 -1
  24. {rrq-0.5.0 → rrq-0.7.0}/rrq/store.py +211 -53
  25. {rrq-0.5.0 → rrq-0.7.0}/rrq/worker.py +6 -6
  26. rrq-0.7.0/tests/CLAUDE.md +115 -0
  27. rrq-0.7.0/tests/cli_commands/__init__.py +1 -0
  28. rrq-0.7.0/tests/cli_commands/conftest.py +343 -0
  29. rrq-0.7.0/tests/cli_commands/test_debug_commands.py +501 -0
  30. rrq-0.7.0/tests/cli_commands/test_dlq_commands.py +721 -0
  31. rrq-0.7.0/tests/cli_commands/test_integration.py +436 -0
  32. rrq-0.7.0/tests/cli_commands/test_job_commands.py +565 -0
  33. rrq-0.7.0/tests/cli_commands/test_monitor_commands.py +811 -0
  34. rrq-0.7.0/tests/cli_commands/test_monitor_dlq_integration.py +372 -0
  35. rrq-0.7.0/tests/cli_commands/test_queue_commands.py +390 -0
  36. rrq-0.7.0/tests/cli_commands/test_queue_dlq_integration.py +461 -0
  37. {rrq-0.5.0 → rrq-0.7.0}/tests/test_cli.py +2 -61
  38. {rrq-0.5.0 → rrq-0.7.0}/tests/test_client.py +46 -30
  39. {rrq-0.5.0 → rrq-0.7.0}/tests/test_cron.py +22 -22
  40. {rrq-0.5.0 → rrq-0.7.0}/tests/test_store.py +317 -24
  41. {rrq-0.5.0 → rrq-0.7.0}/tests/test_worker.py +26 -276
  42. {rrq-0.5.0 → rrq-0.7.0}/uv.lock +127 -3
  43. {rrq-0.5.0 → rrq-0.7.0}/.coverage +0 -0
  44. {rrq-0.5.0 → rrq-0.7.0}/.gitignore +0 -0
  45. {rrq-0.5.0 → rrq-0.7.0}/CLAUDE.md +0 -0
  46. {rrq-0.5.0 → rrq-0.7.0}/LICENSE +0 -0
  47. {rrq-0.5.0 → rrq-0.7.0}/MANIFEST.in +0 -0
  48. {rrq-0.5.0 → rrq-0.7.0}/example/example_rrq_settings.py +0 -0
  49. {rrq-0.5.0 → rrq-0.7.0}/example/rrq_example.py +0 -0
  50. {rrq-0.5.0 → rrq-0.7.0}/rrq/__init__.py +0 -0
  51. {rrq-0.5.0 → rrq-0.7.0}/rrq/exc.py +0 -0
  52. {rrq-0.5.0 → rrq-0.7.0}/tests/__init__.py +0 -0
  53. {rrq-0.5.0 → rrq-0.7.0}/tests/test_registry.py +0 -0
@@ -10,7 +10,12 @@
10
10
  "Bash(rg:*)",
11
11
  "Bash(uv run:*)",
12
12
  "Bash(ruff format:*)",
13
- "Bash(ruff check:*)"
13
+ "Bash(ruff check:*)",
14
+ "Bash(uv add:*)",
15
+ "Bash(sed:*)",
16
+ "Bash(rm:*)",
17
+ "Bash(python:*)",
18
+ "Bash(true)"
14
19
  ],
15
20
  "deny": []
16
21
  }
@@ -0,0 +1,37 @@
1
+ name: CI
2
+
3
+ on:
4
+ pull_request:
5
+
6
+ jobs:
7
+ test:
8
+ runs-on: ubuntu-latest
9
+ services:
10
+ redis:
11
+ image: redis:7
12
+ ports:
13
+ - 6379:6379
14
+ options: >-
15
+ --health-cmd "redis-cli ping"
16
+ --health-interval 10s
17
+ --health-timeout 5s
18
+ --health-retries 5
19
+ steps:
20
+ - name: Checkout code
21
+ uses: actions/checkout@v4
22
+
23
+ - name: Set up Python
24
+ uses: actions/setup-python@v4
25
+ with:
26
+ python-version: '3.11'
27
+ cache: 'pip'
28
+
29
+ - name: Install uv CLI
30
+ run: |
31
+ python -m pip install --upgrade pip uv
32
+
33
+ - name: Sync dependencies
34
+ run: uv sync --extra dev
35
+
36
+ - name: Run tests
37
+ run: uv run pytest --disable-warnings -q --maxfail=1
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rrq
3
- Version: 0.5.0
3
+ Version: 0.7.0
4
4
  Summary: RRQ is a Python library for creating reliable job queues using Redis and asyncio
5
5
  Project-URL: Homepage, https://github.com/getresq/rrq
6
6
  Project-URL: Bug Tracker, https://github.com/getresq/rrq/issues
@@ -8,16 +8,18 @@ Author-email: Mazdak Rezvani <mazdak@me.com>
8
8
  License-File: LICENSE
9
9
  Classifier: Intended Audience :: Developers
10
10
  Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
11
12
  Classifier: Programming Language :: Python :: 3.11
12
13
  Classifier: Programming Language :: Python :: 3.12
13
14
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
14
15
  Classifier: Topic :: System :: Distributed Computing
15
16
  Classifier: Topic :: System :: Monitoring
16
- Requires-Python: >=3.11
17
+ Requires-Python: >=3.10
17
18
  Requires-Dist: click>=8.1.3
18
19
  Requires-Dist: pydantic-settings>=2.9.1
19
20
  Requires-Dist: pydantic>=2.11.4
20
21
  Requires-Dist: redis[hiredis]<6,>=4.2.0
22
+ Requires-Dist: rich>=14.0.0
21
23
  Requires-Dist: watchfiles>=0.19.0
22
24
  Provides-Extra: dev
23
25
  Requires-Dist: pytest-asyncio>=1.0.0; extra == 'dev'
@@ -29,18 +31,34 @@ Description-Content-Type: text/markdown
29
31
 
30
32
  RRQ is a Python library for creating reliable job queues using Redis and `asyncio`, inspired by [ARQ (Async Redis Queue)](https://github.com/samuelcolvin/arq). It focuses on providing at-least-once job processing semantics with features like automatic retries, job timeouts, dead-letter queues, and graceful worker shutdown.
31
33
 
34
+ ## 🆕 What's New in v0.7.0
35
+
36
+ - **Comprehensive CLI Tools**: 15+ new commands for monitoring, debugging, and management
37
+ - **Real-time Monitoring Dashboard**: Interactive dashboard with `rrq monitor`
38
+ - **Enhanced DLQ Management**: Sophisticated filtering and requeuing capabilities
39
+ - **Python 3.10 Support**: Expanded compatibility from Python 3.11+ to 3.10+
40
+ - **Bug Fixes**: Critical fix for unique job enqueue failures with proper deferral
41
+
42
+ ## Requirements
43
+
44
+ - Python 3.10 or higher
45
+ - Redis 5.0 or higher
46
+ - asyncio-compatible environment
47
+
32
48
  ## Key Features
33
49
 
34
50
  * **At-Least-Once Semantics**: Uses Redis locks to ensure a job is processed by only one worker at a time. If a worker crashes or shuts down mid-processing, the lock expires, and the job *should* be re-processed (though re-queueing on unclean shutdown isn't implemented here yet - graceful shutdown *does* re-queue).
35
51
  * **Automatic Retries with Backoff**: Jobs that fail with standard exceptions are automatically retried based on `max_retries` settings, using exponential backoff for delays.
36
52
  * **Explicit Retries**: Handlers can raise `RetryJob` to control retry attempts and delays.
37
53
  * **Job Timeouts**: Jobs exceeding their configured timeout (`job_timeout_seconds` or `default_job_timeout_seconds`) are terminated and moved to the DLQ.
38
- * **Dead Letter Queue (DLQ)**: Jobs that fail permanently (max retries reached, fatal error, timeout) are moved to a DLQ list in Redis for inspection.
54
+ * **Dead Letter Queue (DLQ)**: Jobs that fail permanently (max retries reached, fatal error, timeout) are moved to a single global DLQ list in Redis. Each failed job retains its original queue information, allowing for filtered inspection and selective requeuing.
39
55
  * **Job Uniqueness**: The `_unique_key` parameter in `enqueue` prevents duplicate jobs based on a custom key within a specified TTL.
40
56
  * **Graceful Shutdown**: Workers listen for SIGINT/SIGTERM and attempt to finish active jobs within a grace period before exiting. Interrupted jobs are re-queued.
41
57
  * **Worker Health Checks**: Workers periodically update a health key in Redis with a TTL, allowing monitoring systems to track active workers.
42
58
  * **Deferred Execution**: Jobs can be scheduled to run at a future time using `_defer_by` or `_defer_until`.
43
59
  * **Cron Jobs**: Periodic jobs can be defined in `RRQSettings.cron_jobs` using a simple cron syntax.
60
+ * **Comprehensive Monitoring**: Built-in CLI tools for monitoring queues, inspecting jobs, and debugging with real-time dashboards and beautiful table output.
61
+ * **Development Tools**: Debug commands for generating test data, stress testing, and cleaning up development environments.
44
62
 
45
63
  - Using deferral with a specific `_job_id` will effectively reschedule the job associated with that ID to the new time, overwriting its previous definition and score. It does not create multiple distinct scheduled jobs with the same ID.
46
64
 
@@ -196,21 +214,21 @@ cron_jobs = [
196
214
  args=["temp_files"],
197
215
  kwargs={"max_age_days": 7}
198
216
  ),
199
-
217
+
200
218
  # Weekly report every Monday at 9 AM
201
219
  CronJob(
202
220
  function_name="generate_weekly_report",
203
221
  schedule="0 9 * * mon",
204
222
  unique=True # Prevent duplicate reports if worker restarts
205
223
  ),
206
-
224
+
207
225
  # Health check every 15 minutes on a specific queue
208
226
  CronJob(
209
227
  function_name="system_health_check",
210
228
  schedule="*/15 * * * *",
211
229
  queue_name="monitoring"
212
230
  ),
213
-
231
+
214
232
  # Backup database every night at 1 AM
215
233
  CronJob(
216
234
  function_name="backup_database",
@@ -258,26 +276,149 @@ rrq_settings.job_registry = job_registry
258
276
 
259
277
  **Note:** Cron jobs are automatically enqueued by the worker when they become due. The worker checks for due cron jobs every 30 seconds and enqueues them as regular jobs to be processed.
260
278
 
279
+ ## Dead Letter Queue (DLQ) Management
280
+
281
+ RRQ uses a single global Dead Letter Queue to store jobs that have failed permanently. Jobs in the DLQ retain their original queue information, allowing for sophisticated filtering and management.
282
+
283
+ ### DLQ Structure
284
+
285
+ - **Global DLQ**: One DLQ per RRQ instance (configurable via `default_dlq_name`)
286
+ - **Queue Preservation**: Each failed job remembers its original queue name
287
+ - **Filtering**: Jobs can be filtered by original queue, function name, error patterns, and time ranges
288
+ - **Inspection**: Full job details including arguments, errors, and execution timeline
289
+
290
+ ### Common DLQ Workflows
291
+
292
+ #### Investigating Failures
293
+ ```bash
294
+ # Get overall DLQ statistics
295
+ rrq dlq stats
296
+
297
+ # List recent failures from a specific queue
298
+ rrq dlq list --queue urgent --limit 10
299
+
300
+ # Group failures by function
301
+ rrq dlq list --function send_email
302
+
303
+ # Inspect a specific failed job
304
+ rrq dlq inspect job_abc123
305
+ ```
306
+
307
+ #### Requeuing Failed Jobs
308
+ ```bash
309
+ # Preview what would be requeued (dry run)
310
+ rrq dlq requeue --queue urgent --dry-run
311
+
312
+ # Requeue all failures from urgent queue
313
+ rrq dlq requeue --queue urgent --all
314
+
315
+ # Requeue specific function failures with limit
316
+ rrq dlq requeue --function send_email --limit 10
317
+
318
+ # Requeue single job to different queue
319
+ rrq dlq requeue --job-id abc123 --target-queue retry_queue
320
+ ```
321
+
322
+ #### Monitoring DLQ in Real-time
323
+ ```bash
324
+ # Monitor includes DLQ statistics panel
325
+ rrq monitor
326
+
327
+ # Queue stats show DLQ count per original queue
328
+ rrq queue stats
329
+ ```
330
+
261
331
  ## Command Line Interface
262
332
 
263
- RRQ provides a command-line interface (CLI) for managing workers and performing health checks:
264
-
265
- - **`rrq worker run`** - Run an RRQ worker process.
266
- - `--settings` (optional): Specify the Python path to your settings object (e.g., `myapp.worker_config.rrq_settings`). If not provided, it will use the `RRQ_SETTINGS` environment variable or default to a basic `RRQSettings` object.
267
- - `--queue` (optional, multiple): Specify queue(s) to poll. Defaults to the `default_queue_name` in settings.
268
- - `--burst` (flag): Run the worker in burst mode to process one job or batch and then exit. Cannot be used with `--num-workers > 1`.
269
- - `--num-workers` (optional, integer): Number of parallel worker processes to start. Defaults to the number of CPU cores available on the machine. Cannot be used with `--burst` mode.
270
- - **`rrq worker watch`** - Run an RRQ worker with auto-restart on file changes.
271
- - `--path` (optional): Directory path to watch for changes. Defaults to the current directory.
272
- - `--settings` (optional): Same as above.
273
- - `--queue` (optional, multiple): Same as above.
274
- - **`rrq check`** - Perform a health check on active RRQ workers.
275
- - `--settings` (optional): Same as above.
276
- - **`rrq dlq requeue`** - Requeue jobs from the dead letter queue back into a live queue.
277
- - `--settings` (optional): Same as above.
278
- - `--dlq-name` (optional): Name of the DLQ (without prefix). Defaults to `default_dlq_name` in settings.
279
- - `--queue` (optional): Target queue name (without prefix). Defaults to `default_queue_name` in settings.
280
- - `--limit` (optional): Maximum number of DLQ jobs to requeue; all if not set.
333
+ RRQ provides a comprehensive command-line interface (CLI) for managing workers, monitoring queues, and debugging.
334
+
335
+ 📖 **[Full CLI Reference Documentation](docs/CLI_REFERENCE.md)**
336
+
337
+ ### Quick Examples
338
+ ```bash
339
+ # Use default settings (localhost Redis)
340
+ rrq queue list
341
+
342
+ # Use custom settings
343
+ rrq queue list --settings myapp.config.rrq_settings
344
+
345
+ # Use environment variable
346
+ export RRQ_SETTINGS=myapp.config.rrq_settings
347
+ rrq monitor
348
+
349
+ # Debug workflow
350
+ rrq debug generate-jobs --count 100 --queue urgent
351
+ rrq queue inspect urgent --limit 10
352
+ rrq monitor --queues urgent --refresh 0.5
353
+
354
+ # DLQ management workflow
355
+ rrq dlq list --queue urgent --limit 10 # List failed jobs from urgent queue
356
+ rrq dlq stats # Show DLQ statistics and error patterns
357
+ rrq dlq inspect <job_id> # Inspect specific failed job
358
+ rrq dlq requeue --queue urgent --dry-run # Preview requeue of urgent queue jobs
359
+ rrq dlq requeue --queue urgent --limit 5 # Requeue 5 jobs from urgent queue
360
+
361
+ # Advanced DLQ filtering and management
362
+ rrq dlq list --function send_email --limit 20 # List failed email jobs
363
+ rrq dlq list --queue urgent --function process_data # Filter by queue AND function
364
+ rrq dlq requeue --function send_email --all # Requeue all failed email jobs
365
+ rrq dlq requeue --job-id abc123 --target-queue retry # Requeue specific job to retry queue
366
+ ```
367
+
368
+ ## Performance and Limitations
369
+
370
+ ### Monitoring Performance Considerations
371
+
372
+ RRQ's monitoring and statistics commands are designed for operational visibility but have some performance considerations for large-scale deployments:
373
+
374
+ #### Queue Statistics (`rrq queue stats`)
375
+ - **Pending Job Counts**: Very fast, uses Redis `ZCARD` operation
376
+ - **Active/Completed/Failed Counts**: Requires scanning job records in Redis which can be slow for large datasets
377
+ - **Optimization**: Use `--max-scan` parameter to limit scanning (default: 1,000 jobs)
378
+ ```bash
379
+ # Fast scan for quick overview
380
+ rrq queue stats --max-scan 500
381
+
382
+ # Complete scan (may be slow)
383
+ rrq queue stats --max-scan 0
384
+ ```
385
+
386
+ #### DLQ Operations (`rrq dlq`)
387
+ - **Job Listing**: Uses batch fetching with Redis pipelines for efficiency
388
+ - **Optimization**: Use `--batch-size` parameter to control memory vs. performance trade-offs
389
+ ```bash
390
+ # Smaller batches for memory-constrained environments
391
+ rrq dlq list --batch-size 50
392
+
393
+ # Larger batches for better performance
394
+ rrq dlq list --batch-size 200
395
+ ```
396
+
397
+ #### Real-time Monitoring (`rrq monitor`)
398
+ - **Error Message Truncation**: Newest errors truncated to 50 characters, error patterns to 50 characters for display consistency
399
+ - **DLQ Statistics**: Updates in real-time but may impact Redis performance with very large DLQs
400
+
401
+ ### Full Metrics Requirements
402
+
403
+ For comprehensive job lifecycle tracking and historical analytics, consider these architectural additions:
404
+
405
+ 1. **Job History Tracking**:
406
+ - Store completed/failed job summaries in a separate Redis structure or external database
407
+ - Implement job completion event logging for time-series analytics
408
+
409
+ 2. **Active Job Monitoring**:
410
+ - Enhanced worker health tracking with job-level visibility
411
+ - Real-time active job registry for immediate status reporting
412
+
413
+ 3. **Throughput Calculation**:
414
+ - Time-series data collection for accurate throughput metrics
415
+ - Queue-specific performance trend tracking
416
+
417
+ 4. **Scalable Statistics**:
418
+ - Consider Redis Streams or time-series databases for high-frequency job event tracking
419
+ - Implement sampling strategies for large-scale deployments
420
+
421
+ The current implementation prioritizes operational simplicity and immediate visibility over comprehensive historical analytics. For production monitoring at scale, complement RRQ's built-in tools with external monitoring systems.
281
422
 
282
423
  ## Configuration
283
424
 
@@ -291,6 +432,48 @@ RRQ can be configured in several ways, with the following precedence:
291
432
 
292
433
  **Important Note on `job_registry`**: The `job_registry` attribute in your `RRQSettings` object is **critical** for RRQ to function. It must be an instance of `JobRegistry` and is used to register job handlers. Without a properly configured `job_registry`, workers will not know how to process jobs, and most operations will fail. Ensure it is set in your settings object to map job names to their respective handler functions.
293
434
 
435
+ ### Comprehensive CLI Command System
436
+ - **New modular CLI architecture** with dedicated command modules for better organization
437
+ - **Enhanced monitoring capabilities** with real-time dashboards and beautiful table output
438
+ - **Extensive DLQ management** commands for inspecting, filtering, and requeuing failed jobs
439
+ - **Job lifecycle management** with detailed inspection and control commands
440
+ - **Queue management** with statistics, purging, and migration capabilities
441
+ - **Debug utilities** for development and testing including stress testing and data generation
442
+
443
+ ## 📚 New CLI Commands
444
+
445
+ ### Monitor Commands
446
+ - `rrq monitor` - Real-time dashboard with queue stats, worker health, and DLQ monitoring
447
+ - `rrq monitor workers` - Detailed worker status and health monitoring
448
+ - `rrq monitor jobs` - Active job tracking and monitoring
449
+
450
+ ### DLQ Commands
451
+ - `rrq dlq list` - List failed jobs with filtering by queue, function, and time
452
+ - `rrq dlq stats` - DLQ statistics including error patterns and queue distribution
453
+ - `rrq dlq inspect` - Detailed inspection of failed jobs
454
+ - `rrq dlq requeue` - Requeue failed jobs with dry-run support
455
+ - `rrq dlq purge` - Clean up old failed jobs
456
+
457
+ ### Queue Commands
458
+ - `rrq queue list` - List all queues with job counts
459
+ - `rrq queue stats` - Detailed queue statistics and throughput metrics
460
+ - `rrq queue inspect` - Inspect pending jobs in queues
461
+ - `rrq queue purge` - Purge jobs from queues with safety confirmations
462
+ - `rrq queue migrate` - Move jobs between queues
463
+
464
+ ### Job Commands
465
+ - `rrq job list` - List jobs with status filtering
466
+ - `rrq job inspect` - Detailed job information including timeline
467
+ - `rrq job result` - Retrieve job results
468
+ - `rrq job cancel` - Cancel active jobs
469
+ - `rrq job retry` - Manually retry failed jobs
470
+ - `rrq job delete` - Delete job records
471
+
472
+ ### Debug Commands
473
+ - `rrq debug generate-jobs` - Generate test jobs for development
474
+ - `rrq debug stress-test` - Stress test the system
475
+ - `rrq debug cleanup` - Clean up test data
476
+ - `rrq debug redis-info` - Redis server information and diagnostics
294
477
 
295
478
  ## Core Components
296
479
 
@@ -299,4 +482,4 @@ RRQ can be configured in several ways, with the following precedence:
299
482
  * **`JobRegistry` (`registry.py`)**: A simple registry to map string function names (used when enqueuing) to the actual asynchronous handler functions the worker should execute.
300
483
  * **`JobStore` (`store.py`)**: An abstraction layer handling all direct interactions with Redis. It manages job definitions (Hashes), queues (Sorted Sets), processing locks (Strings with TTL), unique job locks, and worker health checks.
301
484
  * **`Job` (`job.py`)**: A Pydantic model representing a job, containing its ID, handler name, arguments, status, retry counts, timestamps, results, etc.
302
- * **`JobStatus` (`job.py`)**: An Enum defining the possible states of a job (`PENDING`, `ACTIVE`, `COMPLETED`, `FAILED`, `
485
+ * **`JobStatus` (`job.py`)**: An Enum defining the possible states of a job (`PENDING`, `ACTIVE`, `COMPLETED`, `FAILED`, `DEFERRED`). `
@@ -2,18 +2,34 @@
2
2
 
3
3
  RRQ is a Python library for creating reliable job queues using Redis and `asyncio`, inspired by [ARQ (Async Redis Queue)](https://github.com/samuelcolvin/arq). It focuses on providing at-least-once job processing semantics with features like automatic retries, job timeouts, dead-letter queues, and graceful worker shutdown.
4
4
 
5
+ ## 🆕 What's New in v0.7.0
6
+
7
+ - **Comprehensive CLI Tools**: 15+ new commands for monitoring, debugging, and management
8
+ - **Real-time Monitoring Dashboard**: Interactive dashboard with `rrq monitor`
9
+ - **Enhanced DLQ Management**: Sophisticated filtering and requeuing capabilities
10
+ - **Python 3.10 Support**: Expanded compatibility from Python 3.11+ to 3.10+
11
+ - **Bug Fixes**: Critical fix for unique job enqueue failures with proper deferral
12
+
13
+ ## Requirements
14
+
15
+ - Python 3.10 or higher
16
+ - Redis 5.0 or higher
17
+ - asyncio-compatible environment
18
+
5
19
  ## Key Features
6
20
 
7
21
  * **At-Least-Once Semantics**: Uses Redis locks to ensure a job is processed by only one worker at a time. If a worker crashes or shuts down mid-processing, the lock expires, and the job *should* be re-processed (though re-queueing on unclean shutdown isn't implemented here yet - graceful shutdown *does* re-queue).
8
22
  * **Automatic Retries with Backoff**: Jobs that fail with standard exceptions are automatically retried based on `max_retries` settings, using exponential backoff for delays.
9
23
  * **Explicit Retries**: Handlers can raise `RetryJob` to control retry attempts and delays.
10
24
  * **Job Timeouts**: Jobs exceeding their configured timeout (`job_timeout_seconds` or `default_job_timeout_seconds`) are terminated and moved to the DLQ.
11
- * **Dead Letter Queue (DLQ)**: Jobs that fail permanently (max retries reached, fatal error, timeout) are moved to a DLQ list in Redis for inspection.
25
+ * **Dead Letter Queue (DLQ)**: Jobs that fail permanently (max retries reached, fatal error, timeout) are moved to a single global DLQ list in Redis. Each failed job retains its original queue information, allowing for filtered inspection and selective requeuing.
12
26
  * **Job Uniqueness**: The `_unique_key` parameter in `enqueue` prevents duplicate jobs based on a custom key within a specified TTL.
13
27
  * **Graceful Shutdown**: Workers listen for SIGINT/SIGTERM and attempt to finish active jobs within a grace period before exiting. Interrupted jobs are re-queued.
14
28
  * **Worker Health Checks**: Workers periodically update a health key in Redis with a TTL, allowing monitoring systems to track active workers.
15
29
  * **Deferred Execution**: Jobs can be scheduled to run at a future time using `_defer_by` or `_defer_until`.
16
30
  * **Cron Jobs**: Periodic jobs can be defined in `RRQSettings.cron_jobs` using a simple cron syntax.
31
+ * **Comprehensive Monitoring**: Built-in CLI tools for monitoring queues, inspecting jobs, and debugging with real-time dashboards and beautiful table output.
32
+ * **Development Tools**: Debug commands for generating test data, stress testing, and cleaning up development environments.
17
33
 
18
34
  - Using deferral with a specific `_job_id` will effectively reschedule the job associated with that ID to the new time, overwriting its previous definition and score. It does not create multiple distinct scheduled jobs with the same ID.
19
35
 
@@ -169,21 +185,21 @@ cron_jobs = [
169
185
  args=["temp_files"],
170
186
  kwargs={"max_age_days": 7}
171
187
  ),
172
-
188
+
173
189
  # Weekly report every Monday at 9 AM
174
190
  CronJob(
175
191
  function_name="generate_weekly_report",
176
192
  schedule="0 9 * * mon",
177
193
  unique=True # Prevent duplicate reports if worker restarts
178
194
  ),
179
-
195
+
180
196
  # Health check every 15 minutes on a specific queue
181
197
  CronJob(
182
198
  function_name="system_health_check",
183
199
  schedule="*/15 * * * *",
184
200
  queue_name="monitoring"
185
201
  ),
186
-
202
+
187
203
  # Backup database every night at 1 AM
188
204
  CronJob(
189
205
  function_name="backup_database",
@@ -231,26 +247,149 @@ rrq_settings.job_registry = job_registry
231
247
 
232
248
  **Note:** Cron jobs are automatically enqueued by the worker when they become due. The worker checks for due cron jobs every 30 seconds and enqueues them as regular jobs to be processed.
233
249
 
250
+ ## Dead Letter Queue (DLQ) Management
251
+
252
+ RRQ uses a single global Dead Letter Queue to store jobs that have failed permanently. Jobs in the DLQ retain their original queue information, allowing for sophisticated filtering and management.
253
+
254
+ ### DLQ Structure
255
+
256
+ - **Global DLQ**: One DLQ per RRQ instance (configurable via `default_dlq_name`)
257
+ - **Queue Preservation**: Each failed job remembers its original queue name
258
+ - **Filtering**: Jobs can be filtered by original queue, function name, error patterns, and time ranges
259
+ - **Inspection**: Full job details including arguments, errors, and execution timeline
260
+
261
+ ### Common DLQ Workflows
262
+
263
+ #### Investigating Failures
264
+ ```bash
265
+ # Get overall DLQ statistics
266
+ rrq dlq stats
267
+
268
+ # List recent failures from a specific queue
269
+ rrq dlq list --queue urgent --limit 10
270
+
271
+ # Group failures by function
272
+ rrq dlq list --function send_email
273
+
274
+ # Inspect a specific failed job
275
+ rrq dlq inspect job_abc123
276
+ ```
277
+
278
+ #### Requeuing Failed Jobs
279
+ ```bash
280
+ # Preview what would be requeued (dry run)
281
+ rrq dlq requeue --queue urgent --dry-run
282
+
283
+ # Requeue all failures from urgent queue
284
+ rrq dlq requeue --queue urgent --all
285
+
286
+ # Requeue specific function failures with limit
287
+ rrq dlq requeue --function send_email --limit 10
288
+
289
+ # Requeue single job to different queue
290
+ rrq dlq requeue --job-id abc123 --target-queue retry_queue
291
+ ```
292
+
293
+ #### Monitoring DLQ in Real-time
294
+ ```bash
295
+ # Monitor includes DLQ statistics panel
296
+ rrq monitor
297
+
298
+ # Queue stats show DLQ count per original queue
299
+ rrq queue stats
300
+ ```
301
+
234
302
  ## Command Line Interface
235
303
 
236
- RRQ provides a command-line interface (CLI) for managing workers and performing health checks:
237
-
238
- - **`rrq worker run`** - Run an RRQ worker process.
239
- - `--settings` (optional): Specify the Python path to your settings object (e.g., `myapp.worker_config.rrq_settings`). If not provided, it will use the `RRQ_SETTINGS` environment variable or default to a basic `RRQSettings` object.
240
- - `--queue` (optional, multiple): Specify queue(s) to poll. Defaults to the `default_queue_name` in settings.
241
- - `--burst` (flag): Run the worker in burst mode to process one job or batch and then exit. Cannot be used with `--num-workers > 1`.
242
- - `--num-workers` (optional, integer): Number of parallel worker processes to start. Defaults to the number of CPU cores available on the machine. Cannot be used with `--burst` mode.
243
- - **`rrq worker watch`** - Run an RRQ worker with auto-restart on file changes.
244
- - `--path` (optional): Directory path to watch for changes. Defaults to the current directory.
245
- - `--settings` (optional): Same as above.
246
- - `--queue` (optional, multiple): Same as above.
247
- - **`rrq check`** - Perform a health check on active RRQ workers.
248
- - `--settings` (optional): Same as above.
249
- - **`rrq dlq requeue`** - Requeue jobs from the dead letter queue back into a live queue.
250
- - `--settings` (optional): Same as above.
251
- - `--dlq-name` (optional): Name of the DLQ (without prefix). Defaults to `default_dlq_name` in settings.
252
- - `--queue` (optional): Target queue name (without prefix). Defaults to `default_queue_name` in settings.
253
- - `--limit` (optional): Maximum number of DLQ jobs to requeue; all if not set.
304
+ RRQ provides a comprehensive command-line interface (CLI) for managing workers, monitoring queues, and debugging.
305
+
306
+ 📖 **[Full CLI Reference Documentation](docs/CLI_REFERENCE.md)**
307
+
308
+ ### Quick Examples
309
+ ```bash
310
+ # Use default settings (localhost Redis)
311
+ rrq queue list
312
+
313
+ # Use custom settings
314
+ rrq queue list --settings myapp.config.rrq_settings
315
+
316
+ # Use environment variable
317
+ export RRQ_SETTINGS=myapp.config.rrq_settings
318
+ rrq monitor
319
+
320
+ # Debug workflow
321
+ rrq debug generate-jobs --count 100 --queue urgent
322
+ rrq queue inspect urgent --limit 10
323
+ rrq monitor --queues urgent --refresh 0.5
324
+
325
+ # DLQ management workflow
326
+ rrq dlq list --queue urgent --limit 10 # List failed jobs from urgent queue
327
+ rrq dlq stats # Show DLQ statistics and error patterns
328
+ rrq dlq inspect <job_id> # Inspect specific failed job
329
+ rrq dlq requeue --queue urgent --dry-run # Preview requeue of urgent queue jobs
330
+ rrq dlq requeue --queue urgent --limit 5 # Requeue 5 jobs from urgent queue
331
+
332
+ # Advanced DLQ filtering and management
333
+ rrq dlq list --function send_email --limit 20 # List failed email jobs
334
+ rrq dlq list --queue urgent --function process_data # Filter by queue AND function
335
+ rrq dlq requeue --function send_email --all # Requeue all failed email jobs
336
+ rrq dlq requeue --job-id abc123 --target-queue retry # Requeue specific job to retry queue
337
+ ```
338
+
339
+ ## Performance and Limitations
340
+
341
+ ### Monitoring Performance Considerations
342
+
343
+ RRQ's monitoring and statistics commands are designed for operational visibility but have some performance considerations for large-scale deployments:
344
+
345
+ #### Queue Statistics (`rrq queue stats`)
346
+ - **Pending Job Counts**: Very fast, uses Redis `ZCARD` operation
347
+ - **Active/Completed/Failed Counts**: Requires scanning job records in Redis which can be slow for large datasets
348
+ - **Optimization**: Use `--max-scan` parameter to limit scanning (default: 1,000 jobs)
349
+ ```bash
350
+ # Fast scan for quick overview
351
+ rrq queue stats --max-scan 500
352
+
353
+ # Complete scan (may be slow)
354
+ rrq queue stats --max-scan 0
355
+ ```
356
+
357
+ #### DLQ Operations (`rrq dlq`)
358
+ - **Job Listing**: Uses batch fetching with Redis pipelines for efficiency
359
+ - **Optimization**: Use `--batch-size` parameter to control memory vs. performance trade-offs
360
+ ```bash
361
+ # Smaller batches for memory-constrained environments
362
+ rrq dlq list --batch-size 50
363
+
364
+ # Larger batches for better performance
365
+ rrq dlq list --batch-size 200
366
+ ```
367
+
368
+ #### Real-time Monitoring (`rrq monitor`)
369
+ - **Error Message Truncation**: Newest errors truncated to 50 characters, error patterns to 50 characters for display consistency
370
+ - **DLQ Statistics**: Updates in real-time but may impact Redis performance with very large DLQs
371
+
372
+ ### Full Metrics Requirements
373
+
374
+ For comprehensive job lifecycle tracking and historical analytics, consider these architectural additions:
375
+
376
+ 1. **Job History Tracking**:
377
+ - Store completed/failed job summaries in a separate Redis structure or external database
378
+ - Implement job completion event logging for time-series analytics
379
+
380
+ 2. **Active Job Monitoring**:
381
+ - Enhanced worker health tracking with job-level visibility
382
+ - Real-time active job registry for immediate status reporting
383
+
384
+ 3. **Throughput Calculation**:
385
+ - Time-series data collection for accurate throughput metrics
386
+ - Queue-specific performance trend tracking
387
+
388
+ 4. **Scalable Statistics**:
389
+ - Consider Redis Streams or time-series databases for high-frequency job event tracking
390
+ - Implement sampling strategies for large-scale deployments
391
+
392
+ The current implementation prioritizes operational simplicity and immediate visibility over comprehensive historical analytics. For production monitoring at scale, complement RRQ's built-in tools with external monitoring systems.
254
393
 
255
394
  ## Configuration
256
395
 
@@ -264,6 +403,48 @@ RRQ can be configured in several ways, with the following precedence:
264
403
 
265
404
  **Important Note on `job_registry`**: The `job_registry` attribute in your `RRQSettings` object is **critical** for RRQ to function. It must be an instance of `JobRegistry` and is used to register job handlers. Without a properly configured `job_registry`, workers will not know how to process jobs, and most operations will fail. Ensure it is set in your settings object to map job names to their respective handler functions.
266
405
 
406
+ ### Comprehensive CLI Command System
407
+ - **New modular CLI architecture** with dedicated command modules for better organization
408
+ - **Enhanced monitoring capabilities** with real-time dashboards and beautiful table output
409
+ - **Extensive DLQ management** commands for inspecting, filtering, and requeuing failed jobs
410
+ - **Job lifecycle management** with detailed inspection and control commands
411
+ - **Queue management** with statistics, purging, and migration capabilities
412
+ - **Debug utilities** for development and testing including stress testing and data generation
413
+
414
+ ## 📚 New CLI Commands
415
+
416
+ ### Monitor Commands
417
+ - `rrq monitor` - Real-time dashboard with queue stats, worker health, and DLQ monitoring
418
+ - `rrq monitor workers` - Detailed worker status and health monitoring
419
+ - `rrq monitor jobs` - Active job tracking and monitoring
420
+
421
+ ### DLQ Commands
422
+ - `rrq dlq list` - List failed jobs with filtering by queue, function, and time
423
+ - `rrq dlq stats` - DLQ statistics including error patterns and queue distribution
424
+ - `rrq dlq inspect` - Detailed inspection of failed jobs
425
+ - `rrq dlq requeue` - Requeue failed jobs with dry-run support
426
+ - `rrq dlq purge` - Clean up old failed jobs
427
+
428
+ ### Queue Commands
429
+ - `rrq queue list` - List all queues with job counts
430
+ - `rrq queue stats` - Detailed queue statistics and throughput metrics
431
+ - `rrq queue inspect` - Inspect pending jobs in queues
432
+ - `rrq queue purge` - Purge jobs from queues with safety confirmations
433
+ - `rrq queue migrate` - Move jobs between queues
434
+
435
+ ### Job Commands
436
+ - `rrq job list` - List jobs with status filtering
437
+ - `rrq job inspect` - Detailed job information including timeline
438
+ - `rrq job result` - Retrieve job results
439
+ - `rrq job cancel` - Cancel active jobs
440
+ - `rrq job retry` - Manually retry failed jobs
441
+ - `rrq job delete` - Delete job records
442
+
443
+ ### Debug Commands
444
+ - `rrq debug generate-jobs` - Generate test jobs for development
445
+ - `rrq debug stress-test` - Stress test the system
446
+ - `rrq debug cleanup` - Clean up test data
447
+ - `rrq debug redis-info` - Redis server information and diagnostics
267
448
 
268
449
  ## Core Components
269
450
 
@@ -272,4 +453,4 @@ RRQ can be configured in several ways, with the following precedence:
272
453
  * **`JobRegistry` (`registry.py`)**: A simple registry to map string function names (used when enqueuing) to the actual asynchronous handler functions the worker should execute.
273
454
  * **`JobStore` (`store.py`)**: An abstraction layer handling all direct interactions with Redis. It manages job definitions (Hashes), queues (Sorted Sets), processing locks (Strings with TTL), unique job locks, and worker health checks.
274
455
  * **`Job` (`job.py`)**: A Pydantic model representing a job, containing its ID, handler name, arguments, status, retry counts, timestamps, results, etc.
275
- * **`JobStatus` (`job.py`)**: An Enum defining the possible states of a job (`PENDING`, `ACTIVE`, `COMPLETED`, `FAILED`, `
456
+ * **`JobStatus` (`job.py`)**: An Enum defining the possible states of a job (`PENDING`, `ACTIVE`, `COMPLETED`, `FAILED`, `DEFERRED`). `