rrq 0.5.0__tar.gz → 0.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. rrq-0.7.1/.github/workflows/ci.yml +37 -0
  2. {rrq-0.5.0 → rrq-0.7.1}/PKG-INFO +209 -25
  3. {rrq-0.5.0 → rrq-0.7.1}/README.md +204 -23
  4. rrq-0.7.1/docs/CLI_REFERENCE.md +240 -0
  5. {rrq-0.5.0 → rrq-0.7.1}/pyproject.toml +8 -2
  6. {rrq-0.5.0 → rrq-0.7.1}/rrq/cli.py +39 -64
  7. rrq-0.7.1/rrq/cli_commands/__init__.py +1 -0
  8. rrq-0.7.1/rrq/cli_commands/base.py +102 -0
  9. rrq-0.7.1/rrq/cli_commands/commands/__init__.py +1 -0
  10. rrq-0.7.1/rrq/cli_commands/commands/debug.py +551 -0
  11. rrq-0.7.1/rrq/cli_commands/commands/dlq.py +853 -0
  12. rrq-0.7.1/rrq/cli_commands/commands/jobs.py +516 -0
  13. rrq-0.7.1/rrq/cli_commands/commands/monitor.py +776 -0
  14. rrq-0.7.1/rrq/cli_commands/commands/queues.py +539 -0
  15. rrq-0.7.1/rrq/cli_commands/utils.py +161 -0
  16. {rrq-0.5.0 → rrq-0.7.1}/rrq/client.py +39 -35
  17. {rrq-0.5.0 → rrq-0.7.1}/rrq/constants.py +10 -0
  18. {rrq-0.5.0 → rrq-0.7.1}/rrq/cron.py +67 -8
  19. rrq-0.7.1/rrq/hooks.py +217 -0
  20. {rrq-0.5.0 → rrq-0.7.1}/rrq/job.py +5 -5
  21. {rrq-0.5.0 → rrq-0.7.1}/rrq/registry.py +0 -3
  22. {rrq-0.5.0 → rrq-0.7.1}/rrq/settings.py +13 -1
  23. {rrq-0.5.0 → rrq-0.7.1}/rrq/store.py +211 -53
  24. {rrq-0.5.0 → rrq-0.7.1}/rrq/worker.py +6 -6
  25. rrq-0.7.1/tests/CLAUDE.md +115 -0
  26. rrq-0.7.1/tests/cli_commands/__init__.py +1 -0
  27. rrq-0.7.1/tests/cli_commands/conftest.py +343 -0
  28. rrq-0.7.1/tests/cli_commands/test_debug_commands.py +501 -0
  29. rrq-0.7.1/tests/cli_commands/test_dlq_commands.py +721 -0
  30. rrq-0.7.1/tests/cli_commands/test_integration.py +436 -0
  31. rrq-0.7.1/tests/cli_commands/test_job_commands.py +565 -0
  32. rrq-0.7.1/tests/cli_commands/test_monitor_commands.py +811 -0
  33. rrq-0.7.1/tests/cli_commands/test_monitor_dlq_integration.py +372 -0
  34. rrq-0.7.1/tests/cli_commands/test_queue_commands.py +390 -0
  35. rrq-0.7.1/tests/cli_commands/test_queue_dlq_integration.py +461 -0
  36. {rrq-0.5.0 → rrq-0.7.1}/tests/test_cli.py +2 -61
  37. {rrq-0.5.0 → rrq-0.7.1}/tests/test_client.py +46 -30
  38. {rrq-0.5.0 → rrq-0.7.1}/tests/test_cron.py +22 -22
  39. {rrq-0.5.0 → rrq-0.7.1}/tests/test_store.py +317 -24
  40. {rrq-0.5.0 → rrq-0.7.1}/tests/test_worker.py +26 -276
  41. {rrq-0.5.0 → rrq-0.7.1}/uv.lock +217 -63
  42. rrq-0.5.0/.claude/settings.local.json +0 -17
  43. {rrq-0.5.0 → rrq-0.7.1}/.coverage +0 -0
  44. {rrq-0.5.0 → rrq-0.7.1}/.gitignore +0 -0
  45. {rrq-0.5.0 → rrq-0.7.1}/CLAUDE.md +0 -0
  46. {rrq-0.5.0 → rrq-0.7.1}/LICENSE +0 -0
  47. {rrq-0.5.0 → rrq-0.7.1}/MANIFEST.in +0 -0
  48. {rrq-0.5.0 → rrq-0.7.1}/example/example_rrq_settings.py +0 -0
  49. {rrq-0.5.0 → rrq-0.7.1}/example/rrq_example.py +0 -0
  50. {rrq-0.5.0 → rrq-0.7.1}/rrq/__init__.py +0 -0
  51. {rrq-0.5.0 → rrq-0.7.1}/rrq/exc.py +0 -0
  52. {rrq-0.5.0 → rrq-0.7.1}/tests/__init__.py +0 -0
  53. {rrq-0.5.0 → rrq-0.7.1}/tests/test_registry.py +0 -0
@@ -0,0 +1,37 @@
1
+ name: CI
2
+
3
+ on:
4
+ pull_request:
5
+
6
+ jobs:
7
+ test:
8
+ runs-on: ubuntu-latest
9
+ services:
10
+ redis:
11
+ image: redis:7
12
+ ports:
13
+ - 6379:6379
14
+ options: >-
15
+ --health-cmd "redis-cli ping"
16
+ --health-interval 10s
17
+ --health-timeout 5s
18
+ --health-retries 5
19
+ steps:
20
+ - name: Checkout code
21
+ uses: actions/checkout@v4
22
+
23
+ - name: Set up Python
24
+ uses: actions/setup-python@v4
25
+ with:
26
+ python-version: '3.11'
27
+ cache: 'pip'
28
+
29
+ - name: Install uv CLI
30
+ run: |
31
+ python -m pip install --upgrade pip uv
32
+
33
+ - name: Sync dependencies
34
+ run: uv sync --extra dev
35
+
36
+ - name: Run tests
37
+ run: uv run pytest --disable-warnings -q --maxfail=1
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rrq
3
- Version: 0.5.0
3
+ Version: 0.7.1
4
4
  Summary: RRQ is a Python library for creating reliable job queues using Redis and asyncio
5
5
  Project-URL: Homepage, https://github.com/getresq/rrq
6
6
  Project-URL: Bug Tracker, https://github.com/getresq/rrq/issues
@@ -10,6 +10,8 @@ Classifier: Intended Audience :: Developers
10
10
  Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.11
12
12
  Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: Programming Language :: Python :: 3.14
13
15
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
14
16
  Classifier: Topic :: System :: Distributed Computing
15
17
  Classifier: Topic :: System :: Monitoring
@@ -17,7 +19,8 @@ Requires-Python: >=3.11
17
19
  Requires-Dist: click>=8.1.3
18
20
  Requires-Dist: pydantic-settings>=2.9.1
19
21
  Requires-Dist: pydantic>=2.11.4
20
- Requires-Dist: redis[hiredis]<6,>=4.2.0
22
+ Requires-Dist: redis[hiredis]>=4.2.0
23
+ Requires-Dist: rich>=14.0.0
21
24
  Requires-Dist: watchfiles>=0.19.0
22
25
  Provides-Extra: dev
23
26
  Requires-Dist: pytest-asyncio>=1.0.0; extra == 'dev'
@@ -29,18 +32,34 @@ Description-Content-Type: text/markdown
29
32
 
30
33
  RRQ is a Python library for creating reliable job queues using Redis and `asyncio`, inspired by [ARQ (Async Redis Queue)](https://github.com/samuelcolvin/arq). It focuses on providing at-least-once job processing semantics with features like automatic retries, job timeouts, dead-letter queues, and graceful worker shutdown.
31
34
 
35
+ ## 🆕 What's New in v0.7.0
36
+
37
+ - **Comprehensive CLI Tools**: 15+ new commands for monitoring, debugging, and management
38
+ - **Real-time Monitoring Dashboard**: Interactive dashboard with `rrq monitor`
39
+ - **Enhanced DLQ Management**: Sophisticated filtering and requeuing capabilities
40
+ - **Python 3.10 Support**: Expanded compatibility from Python 3.11+ to 3.10+
41
+ - **Bug Fixes**: Critical fix for unique job enqueue failures with proper deferral
42
+
43
+ ## Requirements
44
+
45
+ - Python 3.10 or higher
46
+ - Redis 5.0 or higher
47
+ - asyncio-compatible environment
48
+
32
49
  ## Key Features
33
50
 
34
51
  * **At-Least-Once Semantics**: Uses Redis locks to ensure a job is processed by only one worker at a time. If a worker crashes or shuts down mid-processing, the lock expires, and the job *should* be re-processed (though re-queueing on unclean shutdown isn't implemented here yet - graceful shutdown *does* re-queue).
35
52
  * **Automatic Retries with Backoff**: Jobs that fail with standard exceptions are automatically retried based on `max_retries` settings, using exponential backoff for delays.
36
53
  * **Explicit Retries**: Handlers can raise `RetryJob` to control retry attempts and delays.
37
54
  * **Job Timeouts**: Jobs exceeding their configured timeout (`job_timeout_seconds` or `default_job_timeout_seconds`) are terminated and moved to the DLQ.
38
- * **Dead Letter Queue (DLQ)**: Jobs that fail permanently (max retries reached, fatal error, timeout) are moved to a DLQ list in Redis for inspection.
55
+ * **Dead Letter Queue (DLQ)**: Jobs that fail permanently (max retries reached, fatal error, timeout) are moved to a single global DLQ list in Redis. Each failed job retains its original queue information, allowing for filtered inspection and selective requeuing.
39
56
  * **Job Uniqueness**: The `_unique_key` parameter in `enqueue` prevents duplicate jobs based on a custom key within a specified TTL.
40
57
  * **Graceful Shutdown**: Workers listen for SIGINT/SIGTERM and attempt to finish active jobs within a grace period before exiting. Interrupted jobs are re-queued.
41
58
  * **Worker Health Checks**: Workers periodically update a health key in Redis with a TTL, allowing monitoring systems to track active workers.
42
59
  * **Deferred Execution**: Jobs can be scheduled to run at a future time using `_defer_by` or `_defer_until`.
43
60
  * **Cron Jobs**: Periodic jobs can be defined in `RRQSettings.cron_jobs` using a simple cron syntax.
61
+ * **Comprehensive Monitoring**: Built-in CLI tools for monitoring queues, inspecting jobs, and debugging with real-time dashboards and beautiful table output.
62
+ * **Development Tools**: Debug commands for generating test data, stress testing, and cleaning up development environments.
44
63
 
45
64
  - Using deferral with a specific `_job_id` will effectively reschedule the job associated with that ID to the new time, overwriting its previous definition and score. It does not create multiple distinct scheduled jobs with the same ID.
46
65
 
@@ -196,21 +215,21 @@ cron_jobs = [
196
215
  args=["temp_files"],
197
216
  kwargs={"max_age_days": 7}
198
217
  ),
199
-
218
+
200
219
  # Weekly report every Monday at 9 AM
201
220
  CronJob(
202
221
  function_name="generate_weekly_report",
203
222
  schedule="0 9 * * mon",
204
223
  unique=True # Prevent duplicate reports if worker restarts
205
224
  ),
206
-
225
+
207
226
  # Health check every 15 minutes on a specific queue
208
227
  CronJob(
209
228
  function_name="system_health_check",
210
229
  schedule="*/15 * * * *",
211
230
  queue_name="monitoring"
212
231
  ),
213
-
232
+
214
233
  # Backup database every night at 1 AM
215
234
  CronJob(
216
235
  function_name="backup_database",
@@ -258,26 +277,149 @@ rrq_settings.job_registry = job_registry
258
277
 
259
278
  **Note:** Cron jobs are automatically enqueued by the worker when they become due. The worker checks for due cron jobs every 30 seconds and enqueues them as regular jobs to be processed.
260
279
 
280
+ ## Dead Letter Queue (DLQ) Management
281
+
282
+ RRQ uses a single global Dead Letter Queue to store jobs that have failed permanently. Jobs in the DLQ retain their original queue information, allowing for sophisticated filtering and management.
283
+
284
+ ### DLQ Structure
285
+
286
+ - **Global DLQ**: One DLQ per RRQ instance (configurable via `default_dlq_name`)
287
+ - **Queue Preservation**: Each failed job remembers its original queue name
288
+ - **Filtering**: Jobs can be filtered by original queue, function name, error patterns, and time ranges
289
+ - **Inspection**: Full job details including arguments, errors, and execution timeline
290
+
291
+ ### Common DLQ Workflows
292
+
293
+ #### Investigating Failures
294
+ ```bash
295
+ # Get overall DLQ statistics
296
+ rrq dlq stats
297
+
298
+ # List recent failures from a specific queue
299
+ rrq dlq list --queue urgent --limit 10
300
+
301
+ # Group failures by function
302
+ rrq dlq list --function send_email
303
+
304
+ # Inspect a specific failed job
305
+ rrq dlq inspect job_abc123
306
+ ```
307
+
308
+ #### Requeuing Failed Jobs
309
+ ```bash
310
+ # Preview what would be requeued (dry run)
311
+ rrq dlq requeue --queue urgent --dry-run
312
+
313
+ # Requeue all failures from urgent queue
314
+ rrq dlq requeue --queue urgent --all
315
+
316
+ # Requeue specific function failures with limit
317
+ rrq dlq requeue --function send_email --limit 10
318
+
319
+ # Requeue single job to different queue
320
+ rrq dlq requeue --job-id abc123 --target-queue retry_queue
321
+ ```
322
+
323
+ #### Monitoring DLQ in Real-time
324
+ ```bash
325
+ # Monitor includes DLQ statistics panel
326
+ rrq monitor
327
+
328
+ # Queue stats show DLQ count per original queue
329
+ rrq queue stats
330
+ ```
331
+
261
332
  ## Command Line Interface
262
333
 
263
- RRQ provides a command-line interface (CLI) for managing workers and performing health checks:
264
-
265
- - **`rrq worker run`** - Run an RRQ worker process.
266
- - `--settings` (optional): Specify the Python path to your settings object (e.g., `myapp.worker_config.rrq_settings`). If not provided, it will use the `RRQ_SETTINGS` environment variable or default to a basic `RRQSettings` object.
267
- - `--queue` (optional, multiple): Specify queue(s) to poll. Defaults to the `default_queue_name` in settings.
268
- - `--burst` (flag): Run the worker in burst mode to process one job or batch and then exit. Cannot be used with `--num-workers > 1`.
269
- - `--num-workers` (optional, integer): Number of parallel worker processes to start. Defaults to the number of CPU cores available on the machine. Cannot be used with `--burst` mode.
270
- - **`rrq worker watch`** - Run an RRQ worker with auto-restart on file changes.
271
- - `--path` (optional): Directory path to watch for changes. Defaults to the current directory.
272
- - `--settings` (optional): Same as above.
273
- - `--queue` (optional, multiple): Same as above.
274
- - **`rrq check`** - Perform a health check on active RRQ workers.
275
- - `--settings` (optional): Same as above.
276
- - **`rrq dlq requeue`** - Requeue jobs from the dead letter queue back into a live queue.
277
- - `--settings` (optional): Same as above.
278
- - `--dlq-name` (optional): Name of the DLQ (without prefix). Defaults to `default_dlq_name` in settings.
279
- - `--queue` (optional): Target queue name (without prefix). Defaults to `default_queue_name` in settings.
280
- - `--limit` (optional): Maximum number of DLQ jobs to requeue; all if not set.
334
+ RRQ provides a comprehensive command-line interface (CLI) for managing workers, monitoring queues, and debugging.
335
+
336
+ 📖 **[Full CLI Reference Documentation](docs/CLI_REFERENCE.md)**
337
+
338
+ ### Quick Examples
339
+ ```bash
340
+ # Use default settings (localhost Redis)
341
+ rrq queue list
342
+
343
+ # Use custom settings
344
+ rrq queue list --settings myapp.config.rrq_settings
345
+
346
+ # Use environment variable
347
+ export RRQ_SETTINGS=myapp.config.rrq_settings
348
+ rrq monitor
349
+
350
+ # Debug workflow
351
+ rrq debug generate-jobs --count 100 --queue urgent
352
+ rrq queue inspect urgent --limit 10
353
+ rrq monitor --queues urgent --refresh 0.5
354
+
355
+ # DLQ management workflow
356
+ rrq dlq list --queue urgent --limit 10 # List failed jobs from urgent queue
357
+ rrq dlq stats # Show DLQ statistics and error patterns
358
+ rrq dlq inspect <job_id> # Inspect specific failed job
359
+ rrq dlq requeue --queue urgent --dry-run # Preview requeue of urgent queue jobs
360
+ rrq dlq requeue --queue urgent --limit 5 # Requeue 5 jobs from urgent queue
361
+
362
+ # Advanced DLQ filtering and management
363
+ rrq dlq list --function send_email --limit 20 # List failed email jobs
364
+ rrq dlq list --queue urgent --function process_data # Filter by queue AND function
365
+ rrq dlq requeue --function send_email --all # Requeue all failed email jobs
366
+ rrq dlq requeue --job-id abc123 --target-queue retry # Requeue specific job to retry queue
367
+ ```
368
+
369
+ ## Performance and Limitations
370
+
371
+ ### Monitoring Performance Considerations
372
+
373
+ RRQ's monitoring and statistics commands are designed for operational visibility but have some performance considerations for large-scale deployments:
374
+
375
+ #### Queue Statistics (`rrq queue stats`)
376
+ - **Pending Job Counts**: Very fast, uses Redis `ZCARD` operation
377
+ - **Active/Completed/Failed Counts**: Requires scanning job records in Redis which can be slow for large datasets
378
+ - **Optimization**: Use `--max-scan` parameter to limit scanning (default: 1,000 jobs)
379
+ ```bash
380
+ # Fast scan for quick overview
381
+ rrq queue stats --max-scan 500
382
+
383
+ # Complete scan (may be slow)
384
+ rrq queue stats --max-scan 0
385
+ ```
386
+
387
+ #### DLQ Operations (`rrq dlq`)
388
+ - **Job Listing**: Uses batch fetching with Redis pipelines for efficiency
389
+ - **Optimization**: Use `--batch-size` parameter to control memory vs. performance trade-offs
390
+ ```bash
391
+ # Smaller batches for memory-constrained environments
392
+ rrq dlq list --batch-size 50
393
+
394
+ # Larger batches for better performance
395
+ rrq dlq list --batch-size 200
396
+ ```
397
+
398
+ #### Real-time Monitoring (`rrq monitor`)
399
+ - **Error Message Truncation**: Newest errors truncated to 50 characters, error patterns to 50 characters for display consistency
400
+ - **DLQ Statistics**: Updates in real-time but may impact Redis performance with very large DLQs
401
+
402
+ ### Full Metrics Requirements
403
+
404
+ For comprehensive job lifecycle tracking and historical analytics, consider these architectural additions:
405
+
406
+ 1. **Job History Tracking**:
407
+ - Store completed/failed job summaries in a separate Redis structure or external database
408
+ - Implement job completion event logging for time-series analytics
409
+
410
+ 2. **Active Job Monitoring**:
411
+ - Enhanced worker health tracking with job-level visibility
412
+ - Real-time active job registry for immediate status reporting
413
+
414
+ 3. **Throughput Calculation**:
415
+ - Time-series data collection for accurate throughput metrics
416
+ - Queue-specific performance trend tracking
417
+
418
+ 4. **Scalable Statistics**:
419
+ - Consider Redis Streams or time-series databases for high-frequency job event tracking
420
+ - Implement sampling strategies for large-scale deployments
421
+
422
+ The current implementation prioritizes operational simplicity and immediate visibility over comprehensive historical analytics. For production monitoring at scale, complement RRQ's built-in tools with external monitoring systems.
281
423
 
282
424
  ## Configuration
283
425
 
@@ -291,6 +433,48 @@ RRQ can be configured in several ways, with the following precedence:
291
433
 
292
434
  **Important Note on `job_registry`**: The `job_registry` attribute in your `RRQSettings` object is **critical** for RRQ to function. It must be an instance of `JobRegistry` and is used to register job handlers. Without a properly configured `job_registry`, workers will not know how to process jobs, and most operations will fail. Ensure it is set in your settings object to map job names to their respective handler functions.
293
435
 
436
+ ### Comprehensive CLI Command System
437
+ - **New modular CLI architecture** with dedicated command modules for better organization
438
+ - **Enhanced monitoring capabilities** with real-time dashboards and beautiful table output
439
+ - **Extensive DLQ management** commands for inspecting, filtering, and requeuing failed jobs
440
+ - **Job lifecycle management** with detailed inspection and control commands
441
+ - **Queue management** with statistics, purging, and migration capabilities
442
+ - **Debug utilities** for development and testing including stress testing and data generation
443
+
444
+ ## 📚 New CLI Commands
445
+
446
+ ### Monitor Commands
447
+ - `rrq monitor` - Real-time dashboard with queue stats, worker health, and DLQ monitoring
448
+ - `rrq monitor workers` - Detailed worker status and health monitoring
449
+ - `rrq monitor jobs` - Active job tracking and monitoring
450
+
451
+ ### DLQ Commands
452
+ - `rrq dlq list` - List failed jobs with filtering by queue, function, and time
453
+ - `rrq dlq stats` - DLQ statistics including error patterns and queue distribution
454
+ - `rrq dlq inspect` - Detailed inspection of failed jobs
455
+ - `rrq dlq requeue` - Requeue failed jobs with dry-run support
456
+ - `rrq dlq purge` - Clean up old failed jobs
457
+
458
+ ### Queue Commands
459
+ - `rrq queue list` - List all queues with job counts
460
+ - `rrq queue stats` - Detailed queue statistics and throughput metrics
461
+ - `rrq queue inspect` - Inspect pending jobs in queues
462
+ - `rrq queue purge` - Purge jobs from queues with safety confirmations
463
+ - `rrq queue migrate` - Move jobs between queues
464
+
465
+ ### Job Commands
466
+ - `rrq job list` - List jobs with status filtering
467
+ - `rrq job inspect` - Detailed job information including timeline
468
+ - `rrq job result` - Retrieve job results
469
+ - `rrq job cancel` - Cancel active jobs
470
+ - `rrq job retry` - Manually retry failed jobs
471
+ - `rrq job delete` - Delete job records
472
+
473
+ ### Debug Commands
474
+ - `rrq debug generate-jobs` - Generate test jobs for development
475
+ - `rrq debug stress-test` - Stress test the system
476
+ - `rrq debug cleanup` - Clean up test data
477
+ - `rrq debug redis-info` - Redis server information and diagnostics
294
478
 
295
479
  ## Core Components
296
480
 
@@ -299,4 +483,4 @@ RRQ can be configured in several ways, with the following precedence:
299
483
  * **`JobRegistry` (`registry.py`)**: A simple registry to map string function names (used when enqueuing) to the actual asynchronous handler functions the worker should execute.
300
484
  * **`JobStore` (`store.py`)**: An abstraction layer handling all direct interactions with Redis. It manages job definitions (Hashes), queues (Sorted Sets), processing locks (Strings with TTL), unique job locks, and worker health checks.
301
485
  * **`Job` (`job.py`)**: A Pydantic model representing a job, containing its ID, handler name, arguments, status, retry counts, timestamps, results, etc.
302
- * **`JobStatus` (`job.py`)**: An Enum defining the possible states of a job (`PENDING`, `ACTIVE`, `COMPLETED`, `FAILED`, `
486
+ * **`JobStatus` (`job.py`)**: An Enum defining the possible states of a job (`PENDING`, `ACTIVE`, `COMPLETED`, `FAILED`, `DEFERRED`). `
@@ -2,18 +2,34 @@
2
2
 
3
3
  RRQ is a Python library for creating reliable job queues using Redis and `asyncio`, inspired by [ARQ (Async Redis Queue)](https://github.com/samuelcolvin/arq). It focuses on providing at-least-once job processing semantics with features like automatic retries, job timeouts, dead-letter queues, and graceful worker shutdown.
4
4
 
5
+ ## 🆕 What's New in v0.7.0
6
+
7
+ - **Comprehensive CLI Tools**: 15+ new commands for monitoring, debugging, and management
8
+ - **Real-time Monitoring Dashboard**: Interactive dashboard with `rrq monitor`
9
+ - **Enhanced DLQ Management**: Sophisticated filtering and requeuing capabilities
10
+ - **Python 3.10 Support**: Expanded compatibility from Python 3.11+ to 3.10+
11
+ - **Bug Fixes**: Critical fix for unique job enqueue failures with proper deferral
12
+
13
+ ## Requirements
14
+
15
+ - Python 3.10 or higher
16
+ - Redis 5.0 or higher
17
+ - asyncio-compatible environment
18
+
5
19
  ## Key Features
6
20
 
7
21
  * **At-Least-Once Semantics**: Uses Redis locks to ensure a job is processed by only one worker at a time. If a worker crashes or shuts down mid-processing, the lock expires, and the job *should* be re-processed (though re-queueing on unclean shutdown isn't implemented here yet - graceful shutdown *does* re-queue).
8
22
  * **Automatic Retries with Backoff**: Jobs that fail with standard exceptions are automatically retried based on `max_retries` settings, using exponential backoff for delays.
9
23
  * **Explicit Retries**: Handlers can raise `RetryJob` to control retry attempts and delays.
10
24
  * **Job Timeouts**: Jobs exceeding their configured timeout (`job_timeout_seconds` or `default_job_timeout_seconds`) are terminated and moved to the DLQ.
11
- * **Dead Letter Queue (DLQ)**: Jobs that fail permanently (max retries reached, fatal error, timeout) are moved to a DLQ list in Redis for inspection.
25
+ * **Dead Letter Queue (DLQ)**: Jobs that fail permanently (max retries reached, fatal error, timeout) are moved to a single global DLQ list in Redis. Each failed job retains its original queue information, allowing for filtered inspection and selective requeuing.
12
26
  * **Job Uniqueness**: The `_unique_key` parameter in `enqueue` prevents duplicate jobs based on a custom key within a specified TTL.
13
27
  * **Graceful Shutdown**: Workers listen for SIGINT/SIGTERM and attempt to finish active jobs within a grace period before exiting. Interrupted jobs are re-queued.
14
28
  * **Worker Health Checks**: Workers periodically update a health key in Redis with a TTL, allowing monitoring systems to track active workers.
15
29
  * **Deferred Execution**: Jobs can be scheduled to run at a future time using `_defer_by` or `_defer_until`.
16
30
  * **Cron Jobs**: Periodic jobs can be defined in `RRQSettings.cron_jobs` using a simple cron syntax.
31
+ * **Comprehensive Monitoring**: Built-in CLI tools for monitoring queues, inspecting jobs, and debugging with real-time dashboards and beautiful table output.
32
+ * **Development Tools**: Debug commands for generating test data, stress testing, and cleaning up development environments.
17
33
 
18
34
  - Using deferral with a specific `_job_id` will effectively reschedule the job associated with that ID to the new time, overwriting its previous definition and score. It does not create multiple distinct scheduled jobs with the same ID.
19
35
 
@@ -169,21 +185,21 @@ cron_jobs = [
169
185
  args=["temp_files"],
170
186
  kwargs={"max_age_days": 7}
171
187
  ),
172
-
188
+
173
189
  # Weekly report every Monday at 9 AM
174
190
  CronJob(
175
191
  function_name="generate_weekly_report",
176
192
  schedule="0 9 * * mon",
177
193
  unique=True # Prevent duplicate reports if worker restarts
178
194
  ),
179
-
195
+
180
196
  # Health check every 15 minutes on a specific queue
181
197
  CronJob(
182
198
  function_name="system_health_check",
183
199
  schedule="*/15 * * * *",
184
200
  queue_name="monitoring"
185
201
  ),
186
-
202
+
187
203
  # Backup database every night at 1 AM
188
204
  CronJob(
189
205
  function_name="backup_database",
@@ -231,26 +247,149 @@ rrq_settings.job_registry = job_registry
231
247
 
232
248
  **Note:** Cron jobs are automatically enqueued by the worker when they become due. The worker checks for due cron jobs every 30 seconds and enqueues them as regular jobs to be processed.
233
249
 
250
+ ## Dead Letter Queue (DLQ) Management
251
+
252
+ RRQ uses a single global Dead Letter Queue to store jobs that have failed permanently. Jobs in the DLQ retain their original queue information, allowing for sophisticated filtering and management.
253
+
254
+ ### DLQ Structure
255
+
256
+ - **Global DLQ**: One DLQ per RRQ instance (configurable via `default_dlq_name`)
257
+ - **Queue Preservation**: Each failed job remembers its original queue name
258
+ - **Filtering**: Jobs can be filtered by original queue, function name, error patterns, and time ranges
259
+ - **Inspection**: Full job details including arguments, errors, and execution timeline
260
+
261
+ ### Common DLQ Workflows
262
+
263
+ #### Investigating Failures
264
+ ```bash
265
+ # Get overall DLQ statistics
266
+ rrq dlq stats
267
+
268
+ # List recent failures from a specific queue
269
+ rrq dlq list --queue urgent --limit 10
270
+
271
+ # Group failures by function
272
+ rrq dlq list --function send_email
273
+
274
+ # Inspect a specific failed job
275
+ rrq dlq inspect job_abc123
276
+ ```
277
+
278
+ #### Requeuing Failed Jobs
279
+ ```bash
280
+ # Preview what would be requeued (dry run)
281
+ rrq dlq requeue --queue urgent --dry-run
282
+
283
+ # Requeue all failures from urgent queue
284
+ rrq dlq requeue --queue urgent --all
285
+
286
+ # Requeue specific function failures with limit
287
+ rrq dlq requeue --function send_email --limit 10
288
+
289
+ # Requeue single job to different queue
290
+ rrq dlq requeue --job-id abc123 --target-queue retry_queue
291
+ ```
292
+
293
+ #### Monitoring DLQ in Real-time
294
+ ```bash
295
+ # Monitor includes DLQ statistics panel
296
+ rrq monitor
297
+
298
+ # Queue stats show DLQ count per original queue
299
+ rrq queue stats
300
+ ```
301
+
234
302
  ## Command Line Interface
235
303
 
236
- RRQ provides a command-line interface (CLI) for managing workers and performing health checks:
237
-
238
- - **`rrq worker run`** - Run an RRQ worker process.
239
- - `--settings` (optional): Specify the Python path to your settings object (e.g., `myapp.worker_config.rrq_settings`). If not provided, it will use the `RRQ_SETTINGS` environment variable or default to a basic `RRQSettings` object.
240
- - `--queue` (optional, multiple): Specify queue(s) to poll. Defaults to the `default_queue_name` in settings.
241
- - `--burst` (flag): Run the worker in burst mode to process one job or batch and then exit. Cannot be used with `--num-workers > 1`.
242
- - `--num-workers` (optional, integer): Number of parallel worker processes to start. Defaults to the number of CPU cores available on the machine. Cannot be used with `--burst` mode.
243
- - **`rrq worker watch`** - Run an RRQ worker with auto-restart on file changes.
244
- - `--path` (optional): Directory path to watch for changes. Defaults to the current directory.
245
- - `--settings` (optional): Same as above.
246
- - `--queue` (optional, multiple): Same as above.
247
- - **`rrq check`** - Perform a health check on active RRQ workers.
248
- - `--settings` (optional): Same as above.
249
- - **`rrq dlq requeue`** - Requeue jobs from the dead letter queue back into a live queue.
250
- - `--settings` (optional): Same as above.
251
- - `--dlq-name` (optional): Name of the DLQ (without prefix). Defaults to `default_dlq_name` in settings.
252
- - `--queue` (optional): Target queue name (without prefix). Defaults to `default_queue_name` in settings.
253
- - `--limit` (optional): Maximum number of DLQ jobs to requeue; all if not set.
304
+ RRQ provides a comprehensive command-line interface (CLI) for managing workers, monitoring queues, and debugging.
305
+
306
+ 📖 **[Full CLI Reference Documentation](docs/CLI_REFERENCE.md)**
307
+
308
+ ### Quick Examples
309
+ ```bash
310
+ # Use default settings (localhost Redis)
311
+ rrq queue list
312
+
313
+ # Use custom settings
314
+ rrq queue list --settings myapp.config.rrq_settings
315
+
316
+ # Use environment variable
317
+ export RRQ_SETTINGS=myapp.config.rrq_settings
318
+ rrq monitor
319
+
320
+ # Debug workflow
321
+ rrq debug generate-jobs --count 100 --queue urgent
322
+ rrq queue inspect urgent --limit 10
323
+ rrq monitor --queues urgent --refresh 0.5
324
+
325
+ # DLQ management workflow
326
+ rrq dlq list --queue urgent --limit 10 # List failed jobs from urgent queue
327
+ rrq dlq stats # Show DLQ statistics and error patterns
328
+ rrq dlq inspect <job_id> # Inspect specific failed job
329
+ rrq dlq requeue --queue urgent --dry-run # Preview requeue of urgent queue jobs
330
+ rrq dlq requeue --queue urgent --limit 5 # Requeue 5 jobs from urgent queue
331
+
332
+ # Advanced DLQ filtering and management
333
+ rrq dlq list --function send_email --limit 20 # List failed email jobs
334
+ rrq dlq list --queue urgent --function process_data # Filter by queue AND function
335
+ rrq dlq requeue --function send_email --all # Requeue all failed email jobs
336
+ rrq dlq requeue --job-id abc123 --target-queue retry # Requeue specific job to retry queue
337
+ ```
338
+
339
+ ## Performance and Limitations
340
+
341
+ ### Monitoring Performance Considerations
342
+
343
+ RRQ's monitoring and statistics commands are designed for operational visibility but have some performance considerations for large-scale deployments:
344
+
345
+ #### Queue Statistics (`rrq queue stats`)
346
+ - **Pending Job Counts**: Very fast, uses Redis `ZCARD` operation
347
+ - **Active/Completed/Failed Counts**: Requires scanning job records in Redis which can be slow for large datasets
348
+ - **Optimization**: Use `--max-scan` parameter to limit scanning (default: 1,000 jobs)
349
+ ```bash
350
+ # Fast scan for quick overview
351
+ rrq queue stats --max-scan 500
352
+
353
+ # Complete scan (may be slow)
354
+ rrq queue stats --max-scan 0
355
+ ```
356
+
357
+ #### DLQ Operations (`rrq dlq`)
358
+ - **Job Listing**: Uses batch fetching with Redis pipelines for efficiency
359
+ - **Optimization**: Use `--batch-size` parameter to control memory vs. performance trade-offs
360
+ ```bash
361
+ # Smaller batches for memory-constrained environments
362
+ rrq dlq list --batch-size 50
363
+
364
+ # Larger batches for better performance
365
+ rrq dlq list --batch-size 200
366
+ ```
367
+
368
+ #### Real-time Monitoring (`rrq monitor`)
369
+ - **Error Message Truncation**: Newest errors truncated to 50 characters, error patterns to 50 characters for display consistency
370
+ - **DLQ Statistics**: Updates in real-time but may impact Redis performance with very large DLQs
371
+
372
+ ### Full Metrics Requirements
373
+
374
+ For comprehensive job lifecycle tracking and historical analytics, consider these architectural additions:
375
+
376
+ 1. **Job History Tracking**:
377
+ - Store completed/failed job summaries in a separate Redis structure or external database
378
+ - Implement job completion event logging for time-series analytics
379
+
380
+ 2. **Active Job Monitoring**:
381
+ - Enhanced worker health tracking with job-level visibility
382
+ - Real-time active job registry for immediate status reporting
383
+
384
+ 3. **Throughput Calculation**:
385
+ - Time-series data collection for accurate throughput metrics
386
+ - Queue-specific performance trend tracking
387
+
388
+ 4. **Scalable Statistics**:
389
+ - Consider Redis Streams or time-series databases for high-frequency job event tracking
390
+ - Implement sampling strategies for large-scale deployments
391
+
392
+ The current implementation prioritizes operational simplicity and immediate visibility over comprehensive historical analytics. For production monitoring at scale, complement RRQ's built-in tools with external monitoring systems.
254
393
 
255
394
  ## Configuration
256
395
 
@@ -264,6 +403,48 @@ RRQ can be configured in several ways, with the following precedence:
264
403
 
265
404
  **Important Note on `job_registry`**: The `job_registry` attribute in your `RRQSettings` object is **critical** for RRQ to function. It must be an instance of `JobRegistry` and is used to register job handlers. Without a properly configured `job_registry`, workers will not know how to process jobs, and most operations will fail. Ensure it is set in your settings object to map job names to their respective handler functions.
266
405
 
406
+ ### Comprehensive CLI Command System
407
+ - **New modular CLI architecture** with dedicated command modules for better organization
408
+ - **Enhanced monitoring capabilities** with real-time dashboards and beautiful table output
409
+ - **Extensive DLQ management** commands for inspecting, filtering, and requeuing failed jobs
410
+ - **Job lifecycle management** with detailed inspection and control commands
411
+ - **Queue management** with statistics, purging, and migration capabilities
412
+ - **Debug utilities** for development and testing including stress testing and data generation
413
+
414
+ ## 📚 New CLI Commands
415
+
416
+ ### Monitor Commands
417
+ - `rrq monitor` - Real-time dashboard with queue stats, worker health, and DLQ monitoring
418
+ - `rrq monitor workers` - Detailed worker status and health monitoring
419
+ - `rrq monitor jobs` - Active job tracking and monitoring
420
+
421
+ ### DLQ Commands
422
+ - `rrq dlq list` - List failed jobs with filtering by queue, function, and time
423
+ - `rrq dlq stats` - DLQ statistics including error patterns and queue distribution
424
+ - `rrq dlq inspect` - Detailed inspection of failed jobs
425
+ - `rrq dlq requeue` - Requeue failed jobs with dry-run support
426
+ - `rrq dlq purge` - Clean up old failed jobs
427
+
428
+ ### Queue Commands
429
+ - `rrq queue list` - List all queues with job counts
430
+ - `rrq queue stats` - Detailed queue statistics and throughput metrics
431
+ - `rrq queue inspect` - Inspect pending jobs in queues
432
+ - `rrq queue purge` - Purge jobs from queues with safety confirmations
433
+ - `rrq queue migrate` - Move jobs between queues
434
+
435
+ ### Job Commands
436
+ - `rrq job list` - List jobs with status filtering
437
+ - `rrq job inspect` - Detailed job information including timeline
438
+ - `rrq job result` - Retrieve job results
439
+ - `rrq job cancel` - Cancel active jobs
440
+ - `rrq job retry` - Manually retry failed jobs
441
+ - `rrq job delete` - Delete job records
442
+
443
+ ### Debug Commands
444
+ - `rrq debug generate-jobs` - Generate test jobs for development
445
+ - `rrq debug stress-test` - Stress test the system
446
+ - `rrq debug cleanup` - Clean up test data
447
+ - `rrq debug redis-info` - Redis server information and diagnostics
267
448
 
268
449
  ## Core Components
269
450
 
@@ -272,4 +453,4 @@ RRQ can be configured in several ways, with the following precedence:
272
453
  * **`JobRegistry` (`registry.py`)**: A simple registry to map string function names (used when enqueuing) to the actual asynchronous handler functions the worker should execute.
273
454
  * **`JobStore` (`store.py`)**: An abstraction layer handling all direct interactions with Redis. It manages job definitions (Hashes), queues (Sorted Sets), processing locks (Strings with TTL), unique job locks, and worker health checks.
274
455
  * **`Job` (`job.py`)**: A Pydantic model representing a job, containing its ID, handler name, arguments, status, retry counts, timestamps, results, etc.
275
- * **`JobStatus` (`job.py`)**: An Enum defining the possible states of a job (`PENDING`, `ACTIVE`, `COMPLETED`, `FAILED`, `
456
+ * **`JobStatus` (`job.py`)**: An Enum defining the possible states of a job (`PENDING`, `ACTIVE`, `COMPLETED`, `FAILED`, `DEFERRED`). `