rrq 0.5.0__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rrq-0.5.0 → rrq-0.7.0}/.claude/settings.local.json +6 -1
- rrq-0.7.0/.github/workflows/ci.yml +37 -0
- {rrq-0.5.0 → rrq-0.7.0}/PKG-INFO +208 -25
- {rrq-0.5.0 → rrq-0.7.0}/README.md +204 -23
- rrq-0.7.0/docs/CLI_REFERENCE.md +240 -0
- {rrq-0.5.0 → rrq-0.7.0}/pyproject.toml +7 -2
- {rrq-0.5.0 → rrq-0.7.0}/rrq/cli.py +39 -64
- rrq-0.7.0/rrq/cli_commands/__init__.py +1 -0
- rrq-0.7.0/rrq/cli_commands/base.py +102 -0
- rrq-0.7.0/rrq/cli_commands/commands/__init__.py +1 -0
- rrq-0.7.0/rrq/cli_commands/commands/debug.py +551 -0
- rrq-0.7.0/rrq/cli_commands/commands/dlq.py +853 -0
- rrq-0.7.0/rrq/cli_commands/commands/jobs.py +516 -0
- rrq-0.7.0/rrq/cli_commands/commands/monitor.py +776 -0
- rrq-0.7.0/rrq/cli_commands/commands/queues.py +539 -0
- rrq-0.7.0/rrq/cli_commands/utils.py +161 -0
- {rrq-0.5.0 → rrq-0.7.0}/rrq/client.py +39 -35
- {rrq-0.5.0 → rrq-0.7.0}/rrq/constants.py +10 -0
- {rrq-0.5.0 → rrq-0.7.0}/rrq/cron.py +67 -8
- rrq-0.7.0/rrq/hooks.py +217 -0
- {rrq-0.5.0 → rrq-0.7.0}/rrq/job.py +5 -5
- {rrq-0.5.0 → rrq-0.7.0}/rrq/registry.py +0 -3
- {rrq-0.5.0 → rrq-0.7.0}/rrq/settings.py +13 -1
- {rrq-0.5.0 → rrq-0.7.0}/rrq/store.py +211 -53
- {rrq-0.5.0 → rrq-0.7.0}/rrq/worker.py +6 -6
- rrq-0.7.0/tests/CLAUDE.md +115 -0
- rrq-0.7.0/tests/cli_commands/__init__.py +1 -0
- rrq-0.7.0/tests/cli_commands/conftest.py +343 -0
- rrq-0.7.0/tests/cli_commands/test_debug_commands.py +501 -0
- rrq-0.7.0/tests/cli_commands/test_dlq_commands.py +721 -0
- rrq-0.7.0/tests/cli_commands/test_integration.py +436 -0
- rrq-0.7.0/tests/cli_commands/test_job_commands.py +565 -0
- rrq-0.7.0/tests/cli_commands/test_monitor_commands.py +811 -0
- rrq-0.7.0/tests/cli_commands/test_monitor_dlq_integration.py +372 -0
- rrq-0.7.0/tests/cli_commands/test_queue_commands.py +390 -0
- rrq-0.7.0/tests/cli_commands/test_queue_dlq_integration.py +461 -0
- {rrq-0.5.0 → rrq-0.7.0}/tests/test_cli.py +2 -61
- {rrq-0.5.0 → rrq-0.7.0}/tests/test_client.py +46 -30
- {rrq-0.5.0 → rrq-0.7.0}/tests/test_cron.py +22 -22
- {rrq-0.5.0 → rrq-0.7.0}/tests/test_store.py +317 -24
- {rrq-0.5.0 → rrq-0.7.0}/tests/test_worker.py +26 -276
- {rrq-0.5.0 → rrq-0.7.0}/uv.lock +127 -3
- {rrq-0.5.0 → rrq-0.7.0}/.coverage +0 -0
- {rrq-0.5.0 → rrq-0.7.0}/.gitignore +0 -0
- {rrq-0.5.0 → rrq-0.7.0}/CLAUDE.md +0 -0
- {rrq-0.5.0 → rrq-0.7.0}/LICENSE +0 -0
- {rrq-0.5.0 → rrq-0.7.0}/MANIFEST.in +0 -0
- {rrq-0.5.0 → rrq-0.7.0}/example/example_rrq_settings.py +0 -0
- {rrq-0.5.0 → rrq-0.7.0}/example/rrq_example.py +0 -0
- {rrq-0.5.0 → rrq-0.7.0}/rrq/__init__.py +0 -0
- {rrq-0.5.0 → rrq-0.7.0}/rrq/exc.py +0 -0
- {rrq-0.5.0 → rrq-0.7.0}/tests/__init__.py +0 -0
- {rrq-0.5.0 → rrq-0.7.0}/tests/test_registry.py +0 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
|
|
6
|
+
jobs:
|
|
7
|
+
test:
|
|
8
|
+
runs-on: ubuntu-latest
|
|
9
|
+
services:
|
|
10
|
+
redis:
|
|
11
|
+
image: redis:7
|
|
12
|
+
ports:
|
|
13
|
+
- 6379:6379
|
|
14
|
+
options: >-
|
|
15
|
+
--health-cmd "redis-cli ping"
|
|
16
|
+
--health-interval 10s
|
|
17
|
+
--health-timeout 5s
|
|
18
|
+
--health-retries 5
|
|
19
|
+
steps:
|
|
20
|
+
- name: Checkout code
|
|
21
|
+
uses: actions/checkout@v4
|
|
22
|
+
|
|
23
|
+
- name: Set up Python
|
|
24
|
+
uses: actions/setup-python@v4
|
|
25
|
+
with:
|
|
26
|
+
python-version: '3.11'
|
|
27
|
+
cache: 'pip'
|
|
28
|
+
|
|
29
|
+
- name: Install uv CLI
|
|
30
|
+
run: |
|
|
31
|
+
python -m pip install --upgrade pip uv
|
|
32
|
+
|
|
33
|
+
- name: Sync dependencies
|
|
34
|
+
run: uv sync --extra dev
|
|
35
|
+
|
|
36
|
+
- name: Run tests
|
|
37
|
+
run: uv run pytest --disable-warnings -q --maxfail=1
|
{rrq-0.5.0 → rrq-0.7.0}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rrq
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0
|
|
4
4
|
Summary: RRQ is a Python library for creating reliable job queues using Redis and asyncio
|
|
5
5
|
Project-URL: Homepage, https://github.com/getresq/rrq
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/getresq/rrq/issues
|
|
@@ -8,16 +8,18 @@ Author-email: Mazdak Rezvani <mazdak@me.com>
|
|
|
8
8
|
License-File: LICENSE
|
|
9
9
|
Classifier: Intended Audience :: Developers
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
13
14
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
14
15
|
Classifier: Topic :: System :: Distributed Computing
|
|
15
16
|
Classifier: Topic :: System :: Monitoring
|
|
16
|
-
Requires-Python: >=3.
|
|
17
|
+
Requires-Python: >=3.10
|
|
17
18
|
Requires-Dist: click>=8.1.3
|
|
18
19
|
Requires-Dist: pydantic-settings>=2.9.1
|
|
19
20
|
Requires-Dist: pydantic>=2.11.4
|
|
20
21
|
Requires-Dist: redis[hiredis]<6,>=4.2.0
|
|
22
|
+
Requires-Dist: rich>=14.0.0
|
|
21
23
|
Requires-Dist: watchfiles>=0.19.0
|
|
22
24
|
Provides-Extra: dev
|
|
23
25
|
Requires-Dist: pytest-asyncio>=1.0.0; extra == 'dev'
|
|
@@ -29,18 +31,34 @@ Description-Content-Type: text/markdown
|
|
|
29
31
|
|
|
30
32
|
RRQ is a Python library for creating reliable job queues using Redis and `asyncio`, inspired by [ARQ (Async Redis Queue)](https://github.com/samuelcolvin/arq). It focuses on providing at-least-once job processing semantics with features like automatic retries, job timeouts, dead-letter queues, and graceful worker shutdown.
|
|
31
33
|
|
|
34
|
+
## 🆕 What's New in v0.7.0
|
|
35
|
+
|
|
36
|
+
- **Comprehensive CLI Tools**: 15+ new commands for monitoring, debugging, and management
|
|
37
|
+
- **Real-time Monitoring Dashboard**: Interactive dashboard with `rrq monitor`
|
|
38
|
+
- **Enhanced DLQ Management**: Sophisticated filtering and requeuing capabilities
|
|
39
|
+
- **Python 3.10 Support**: Expanded compatibility from Python 3.11+ to 3.10+
|
|
40
|
+
- **Bug Fixes**: Critical fix for unique job enqueue failures with proper deferral
|
|
41
|
+
|
|
42
|
+
## Requirements
|
|
43
|
+
|
|
44
|
+
- Python 3.10 or higher
|
|
45
|
+
- Redis 5.0 or higher
|
|
46
|
+
- asyncio-compatible environment
|
|
47
|
+
|
|
32
48
|
## Key Features
|
|
33
49
|
|
|
34
50
|
* **At-Least-Once Semantics**: Uses Redis locks to ensure a job is processed by only one worker at a time. If a worker crashes or shuts down mid-processing, the lock expires, and the job *should* be re-processed (though re-queueing on unclean shutdown isn't implemented here yet - graceful shutdown *does* re-queue).
|
|
35
51
|
* **Automatic Retries with Backoff**: Jobs that fail with standard exceptions are automatically retried based on `max_retries` settings, using exponential backoff for delays.
|
|
36
52
|
* **Explicit Retries**: Handlers can raise `RetryJob` to control retry attempts and delays.
|
|
37
53
|
* **Job Timeouts**: Jobs exceeding their configured timeout (`job_timeout_seconds` or `default_job_timeout_seconds`) are terminated and moved to the DLQ.
|
|
38
|
-
* **Dead Letter Queue (DLQ)**: Jobs that fail permanently (max retries reached, fatal error, timeout) are moved to a DLQ list in Redis for inspection.
|
|
54
|
+
* **Dead Letter Queue (DLQ)**: Jobs that fail permanently (max retries reached, fatal error, timeout) are moved to a single global DLQ list in Redis. Each failed job retains its original queue information, allowing for filtered inspection and selective requeuing.
|
|
39
55
|
* **Job Uniqueness**: The `_unique_key` parameter in `enqueue` prevents duplicate jobs based on a custom key within a specified TTL.
|
|
40
56
|
* **Graceful Shutdown**: Workers listen for SIGINT/SIGTERM and attempt to finish active jobs within a grace period before exiting. Interrupted jobs are re-queued.
|
|
41
57
|
* **Worker Health Checks**: Workers periodically update a health key in Redis with a TTL, allowing monitoring systems to track active workers.
|
|
42
58
|
* **Deferred Execution**: Jobs can be scheduled to run at a future time using `_defer_by` or `_defer_until`.
|
|
43
59
|
* **Cron Jobs**: Periodic jobs can be defined in `RRQSettings.cron_jobs` using a simple cron syntax.
|
|
60
|
+
* **Comprehensive Monitoring**: Built-in CLI tools for monitoring queues, inspecting jobs, and debugging with real-time dashboards and beautiful table output.
|
|
61
|
+
* **Development Tools**: Debug commands for generating test data, stress testing, and cleaning up development environments.
|
|
44
62
|
|
|
45
63
|
- Using deferral with a specific `_job_id` will effectively reschedule the job associated with that ID to the new time, overwriting its previous definition and score. It does not create multiple distinct scheduled jobs with the same ID.
|
|
46
64
|
|
|
@@ -196,21 +214,21 @@ cron_jobs = [
|
|
|
196
214
|
args=["temp_files"],
|
|
197
215
|
kwargs={"max_age_days": 7}
|
|
198
216
|
),
|
|
199
|
-
|
|
217
|
+
|
|
200
218
|
# Weekly report every Monday at 9 AM
|
|
201
219
|
CronJob(
|
|
202
220
|
function_name="generate_weekly_report",
|
|
203
221
|
schedule="0 9 * * mon",
|
|
204
222
|
unique=True # Prevent duplicate reports if worker restarts
|
|
205
223
|
),
|
|
206
|
-
|
|
224
|
+
|
|
207
225
|
# Health check every 15 minutes on a specific queue
|
|
208
226
|
CronJob(
|
|
209
227
|
function_name="system_health_check",
|
|
210
228
|
schedule="*/15 * * * *",
|
|
211
229
|
queue_name="monitoring"
|
|
212
230
|
),
|
|
213
|
-
|
|
231
|
+
|
|
214
232
|
# Backup database every night at 1 AM
|
|
215
233
|
CronJob(
|
|
216
234
|
function_name="backup_database",
|
|
@@ -258,26 +276,149 @@ rrq_settings.job_registry = job_registry
|
|
|
258
276
|
|
|
259
277
|
**Note:** Cron jobs are automatically enqueued by the worker when they become due. The worker checks for due cron jobs every 30 seconds and enqueues them as regular jobs to be processed.
|
|
260
278
|
|
|
279
|
+
## Dead Letter Queue (DLQ) Management
|
|
280
|
+
|
|
281
|
+
RRQ uses a single global Dead Letter Queue to store jobs that have failed permanently. Jobs in the DLQ retain their original queue information, allowing for sophisticated filtering and management.
|
|
282
|
+
|
|
283
|
+
### DLQ Structure
|
|
284
|
+
|
|
285
|
+
- **Global DLQ**: One DLQ per RRQ instance (configurable via `default_dlq_name`)
|
|
286
|
+
- **Queue Preservation**: Each failed job remembers its original queue name
|
|
287
|
+
- **Filtering**: Jobs can be filtered by original queue, function name, error patterns, and time ranges
|
|
288
|
+
- **Inspection**: Full job details including arguments, errors, and execution timeline
|
|
289
|
+
|
|
290
|
+
### Common DLQ Workflows
|
|
291
|
+
|
|
292
|
+
#### Investigating Failures
|
|
293
|
+
```bash
|
|
294
|
+
# Get overall DLQ statistics
|
|
295
|
+
rrq dlq stats
|
|
296
|
+
|
|
297
|
+
# List recent failures from a specific queue
|
|
298
|
+
rrq dlq list --queue urgent --limit 10
|
|
299
|
+
|
|
300
|
+
# Group failures by function
|
|
301
|
+
rrq dlq list --function send_email
|
|
302
|
+
|
|
303
|
+
# Inspect a specific failed job
|
|
304
|
+
rrq dlq inspect job_abc123
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
#### Requeuing Failed Jobs
|
|
308
|
+
```bash
|
|
309
|
+
# Preview what would be requeued (dry run)
|
|
310
|
+
rrq dlq requeue --queue urgent --dry-run
|
|
311
|
+
|
|
312
|
+
# Requeue all failures from urgent queue
|
|
313
|
+
rrq dlq requeue --queue urgent --all
|
|
314
|
+
|
|
315
|
+
# Requeue specific function failures with limit
|
|
316
|
+
rrq dlq requeue --function send_email --limit 10
|
|
317
|
+
|
|
318
|
+
# Requeue single job to different queue
|
|
319
|
+
rrq dlq requeue --job-id abc123 --target-queue retry_queue
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
#### Monitoring DLQ in Real-time
|
|
323
|
+
```bash
|
|
324
|
+
# Monitor includes DLQ statistics panel
|
|
325
|
+
rrq monitor
|
|
326
|
+
|
|
327
|
+
# Queue stats show DLQ count per original queue
|
|
328
|
+
rrq queue stats
|
|
329
|
+
```
|
|
330
|
+
|
|
261
331
|
## Command Line Interface
|
|
262
332
|
|
|
263
|
-
RRQ provides a command-line interface (CLI) for managing workers
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
333
|
+
RRQ provides a comprehensive command-line interface (CLI) for managing workers, monitoring queues, and debugging.
|
|
334
|
+
|
|
335
|
+
📖 **[Full CLI Reference Documentation](docs/CLI_REFERENCE.md)**
|
|
336
|
+
|
|
337
|
+
### Quick Examples
|
|
338
|
+
```bash
|
|
339
|
+
# Use default settings (localhost Redis)
|
|
340
|
+
rrq queue list
|
|
341
|
+
|
|
342
|
+
# Use custom settings
|
|
343
|
+
rrq queue list --settings myapp.config.rrq_settings
|
|
344
|
+
|
|
345
|
+
# Use environment variable
|
|
346
|
+
export RRQ_SETTINGS=myapp.config.rrq_settings
|
|
347
|
+
rrq monitor
|
|
348
|
+
|
|
349
|
+
# Debug workflow
|
|
350
|
+
rrq debug generate-jobs --count 100 --queue urgent
|
|
351
|
+
rrq queue inspect urgent --limit 10
|
|
352
|
+
rrq monitor --queues urgent --refresh 0.5
|
|
353
|
+
|
|
354
|
+
# DLQ management workflow
|
|
355
|
+
rrq dlq list --queue urgent --limit 10 # List failed jobs from urgent queue
|
|
356
|
+
rrq dlq stats # Show DLQ statistics and error patterns
|
|
357
|
+
rrq dlq inspect <job_id> # Inspect specific failed job
|
|
358
|
+
rrq dlq requeue --queue urgent --dry-run # Preview requeue of urgent queue jobs
|
|
359
|
+
rrq dlq requeue --queue urgent --limit 5 # Requeue 5 jobs from urgent queue
|
|
360
|
+
|
|
361
|
+
# Advanced DLQ filtering and management
|
|
362
|
+
rrq dlq list --function send_email --limit 20 # List failed email jobs
|
|
363
|
+
rrq dlq list --queue urgent --function process_data # Filter by queue AND function
|
|
364
|
+
rrq dlq requeue --function send_email --all # Requeue all failed email jobs
|
|
365
|
+
rrq dlq requeue --job-id abc123 --target-queue retry # Requeue specific job to retry queue
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
## Performance and Limitations
|
|
369
|
+
|
|
370
|
+
### Monitoring Performance Considerations
|
|
371
|
+
|
|
372
|
+
RRQ's monitoring and statistics commands are designed for operational visibility but have some performance considerations for large-scale deployments:
|
|
373
|
+
|
|
374
|
+
#### Queue Statistics (`rrq queue stats`)
|
|
375
|
+
- **Pending Job Counts**: Very fast, uses Redis `ZCARD` operation
|
|
376
|
+
- **Active/Completed/Failed Counts**: Requires scanning job records in Redis which can be slow for large datasets
|
|
377
|
+
- **Optimization**: Use `--max-scan` parameter to limit scanning (default: 1,000 jobs)
|
|
378
|
+
```bash
|
|
379
|
+
# Fast scan for quick overview
|
|
380
|
+
rrq queue stats --max-scan 500
|
|
381
|
+
|
|
382
|
+
# Complete scan (may be slow)
|
|
383
|
+
rrq queue stats --max-scan 0
|
|
384
|
+
```
|
|
385
|
+
|
|
386
|
+
#### DLQ Operations (`rrq dlq`)
|
|
387
|
+
- **Job Listing**: Uses batch fetching with Redis pipelines for efficiency
|
|
388
|
+
- **Optimization**: Use `--batch-size` parameter to control memory vs. performance trade-offs
|
|
389
|
+
```bash
|
|
390
|
+
# Smaller batches for memory-constrained environments
|
|
391
|
+
rrq dlq list --batch-size 50
|
|
392
|
+
|
|
393
|
+
# Larger batches for better performance
|
|
394
|
+
rrq dlq list --batch-size 200
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
#### Real-time Monitoring (`rrq monitor`)
|
|
398
|
+
- **Error Message Truncation**: Newest errors truncated to 50 characters, error patterns to 50 characters for display consistency
|
|
399
|
+
- **DLQ Statistics**: Updates in real-time but may impact Redis performance with very large DLQs
|
|
400
|
+
|
|
401
|
+
### Full Metrics Requirements
|
|
402
|
+
|
|
403
|
+
For comprehensive job lifecycle tracking and historical analytics, consider these architectural additions:
|
|
404
|
+
|
|
405
|
+
1. **Job History Tracking**:
|
|
406
|
+
- Store completed/failed job summaries in a separate Redis structure or external database
|
|
407
|
+
- Implement job completion event logging for time-series analytics
|
|
408
|
+
|
|
409
|
+
2. **Active Job Monitoring**:
|
|
410
|
+
- Enhanced worker health tracking with job-level visibility
|
|
411
|
+
- Real-time active job registry for immediate status reporting
|
|
412
|
+
|
|
413
|
+
3. **Throughput Calculation**:
|
|
414
|
+
- Time-series data collection for accurate throughput metrics
|
|
415
|
+
- Queue-specific performance trend tracking
|
|
416
|
+
|
|
417
|
+
4. **Scalable Statistics**:
|
|
418
|
+
- Consider Redis Streams or time-series databases for high-frequency job event tracking
|
|
419
|
+
- Implement sampling strategies for large-scale deployments
|
|
420
|
+
|
|
421
|
+
The current implementation prioritizes operational simplicity and immediate visibility over comprehensive historical analytics. For production monitoring at scale, complement RRQ's built-in tools with external monitoring systems.
|
|
281
422
|
|
|
282
423
|
## Configuration
|
|
283
424
|
|
|
@@ -291,6 +432,48 @@ RRQ can be configured in several ways, with the following precedence:
|
|
|
291
432
|
|
|
292
433
|
**Important Note on `job_registry`**: The `job_registry` attribute in your `RRQSettings` object is **critical** for RRQ to function. It must be an instance of `JobRegistry` and is used to register job handlers. Without a properly configured `job_registry`, workers will not know how to process jobs, and most operations will fail. Ensure it is set in your settings object to map job names to their respective handler functions.
|
|
293
434
|
|
|
435
|
+
### Comprehensive CLI Command System
|
|
436
|
+
- **New modular CLI architecture** with dedicated command modules for better organization
|
|
437
|
+
- **Enhanced monitoring capabilities** with real-time dashboards and beautiful table output
|
|
438
|
+
- **Extensive DLQ management** commands for inspecting, filtering, and requeuing failed jobs
|
|
439
|
+
- **Job lifecycle management** with detailed inspection and control commands
|
|
440
|
+
- **Queue management** with statistics, purging, and migration capabilities
|
|
441
|
+
- **Debug utilities** for development and testing including stress testing and data generation
|
|
442
|
+
|
|
443
|
+
## 📚 New CLI Commands
|
|
444
|
+
|
|
445
|
+
### Monitor Commands
|
|
446
|
+
- `rrq monitor` - Real-time dashboard with queue stats, worker health, and DLQ monitoring
|
|
447
|
+
- `rrq monitor workers` - Detailed worker status and health monitoring
|
|
448
|
+
- `rrq monitor jobs` - Active job tracking and monitoring
|
|
449
|
+
|
|
450
|
+
### DLQ Commands
|
|
451
|
+
- `rrq dlq list` - List failed jobs with filtering by queue, function, and time
|
|
452
|
+
- `rrq dlq stats` - DLQ statistics including error patterns and queue distribution
|
|
453
|
+
- `rrq dlq inspect` - Detailed inspection of failed jobs
|
|
454
|
+
- `rrq dlq requeue` - Requeue failed jobs with dry-run support
|
|
455
|
+
- `rrq dlq purge` - Clean up old failed jobs
|
|
456
|
+
|
|
457
|
+
### Queue Commands
|
|
458
|
+
- `rrq queue list` - List all queues with job counts
|
|
459
|
+
- `rrq queue stats` - Detailed queue statistics and throughput metrics
|
|
460
|
+
- `rrq queue inspect` - Inspect pending jobs in queues
|
|
461
|
+
- `rrq queue purge` - Purge jobs from queues with safety confirmations
|
|
462
|
+
- `rrq queue migrate` - Move jobs between queues
|
|
463
|
+
|
|
464
|
+
### Job Commands
|
|
465
|
+
- `rrq job list` - List jobs with status filtering
|
|
466
|
+
- `rrq job inspect` - Detailed job information including timeline
|
|
467
|
+
- `rrq job result` - Retrieve job results
|
|
468
|
+
- `rrq job cancel` - Cancel active jobs
|
|
469
|
+
- `rrq job retry` - Manually retry failed jobs
|
|
470
|
+
- `rrq job delete` - Delete job records
|
|
471
|
+
|
|
472
|
+
### Debug Commands
|
|
473
|
+
- `rrq debug generate-jobs` - Generate test jobs for development
|
|
474
|
+
- `rrq debug stress-test` - Stress test the system
|
|
475
|
+
- `rrq debug cleanup` - Clean up test data
|
|
476
|
+
- `rrq debug redis-info` - Redis server information and diagnostics
|
|
294
477
|
|
|
295
478
|
## Core Components
|
|
296
479
|
|
|
@@ -299,4 +482,4 @@ RRQ can be configured in several ways, with the following precedence:
|
|
|
299
482
|
* **`JobRegistry` (`registry.py`)**: A simple registry to map string function names (used when enqueuing) to the actual asynchronous handler functions the worker should execute.
|
|
300
483
|
* **`JobStore` (`store.py`)**: An abstraction layer handling all direct interactions with Redis. It manages job definitions (Hashes), queues (Sorted Sets), processing locks (Strings with TTL), unique job locks, and worker health checks.
|
|
301
484
|
* **`Job` (`job.py`)**: A Pydantic model representing a job, containing its ID, handler name, arguments, status, retry counts, timestamps, results, etc.
|
|
302
|
-
* **`JobStatus` (`job.py`)**: An Enum defining the possible states of a job (`PENDING`, `ACTIVE`, `COMPLETED`, `FAILED`, `
|
|
485
|
+
* **`JobStatus` (`job.py`)**: An Enum defining the possible states of a job (`PENDING`, `ACTIVE`, `COMPLETED`, `FAILED`, `DEFERRED`). `
|
|
@@ -2,18 +2,34 @@
|
|
|
2
2
|
|
|
3
3
|
RRQ is a Python library for creating reliable job queues using Redis and `asyncio`, inspired by [ARQ (Async Redis Queue)](https://github.com/samuelcolvin/arq). It focuses on providing at-least-once job processing semantics with features like automatic retries, job timeouts, dead-letter queues, and graceful worker shutdown.
|
|
4
4
|
|
|
5
|
+
## 🆕 What's New in v0.7.0
|
|
6
|
+
|
|
7
|
+
- **Comprehensive CLI Tools**: 15+ new commands for monitoring, debugging, and management
|
|
8
|
+
- **Real-time Monitoring Dashboard**: Interactive dashboard with `rrq monitor`
|
|
9
|
+
- **Enhanced DLQ Management**: Sophisticated filtering and requeuing capabilities
|
|
10
|
+
- **Python 3.10 Support**: Expanded compatibility from Python 3.11+ to 3.10+
|
|
11
|
+
- **Bug Fixes**: Critical fix for unique job enqueue failures with proper deferral
|
|
12
|
+
|
|
13
|
+
## Requirements
|
|
14
|
+
|
|
15
|
+
- Python 3.10 or higher
|
|
16
|
+
- Redis 5.0 or higher
|
|
17
|
+
- asyncio-compatible environment
|
|
18
|
+
|
|
5
19
|
## Key Features
|
|
6
20
|
|
|
7
21
|
* **At-Least-Once Semantics**: Uses Redis locks to ensure a job is processed by only one worker at a time. If a worker crashes or shuts down mid-processing, the lock expires, and the job *should* be re-processed (though re-queueing on unclean shutdown isn't implemented here yet - graceful shutdown *does* re-queue).
|
|
8
22
|
* **Automatic Retries with Backoff**: Jobs that fail with standard exceptions are automatically retried based on `max_retries` settings, using exponential backoff for delays.
|
|
9
23
|
* **Explicit Retries**: Handlers can raise `RetryJob` to control retry attempts and delays.
|
|
10
24
|
* **Job Timeouts**: Jobs exceeding their configured timeout (`job_timeout_seconds` or `default_job_timeout_seconds`) are terminated and moved to the DLQ.
|
|
11
|
-
* **Dead Letter Queue (DLQ)**: Jobs that fail permanently (max retries reached, fatal error, timeout) are moved to a DLQ list in Redis for inspection.
|
|
25
|
+
* **Dead Letter Queue (DLQ)**: Jobs that fail permanently (max retries reached, fatal error, timeout) are moved to a single global DLQ list in Redis. Each failed job retains its original queue information, allowing for filtered inspection and selective requeuing.
|
|
12
26
|
* **Job Uniqueness**: The `_unique_key` parameter in `enqueue` prevents duplicate jobs based on a custom key within a specified TTL.
|
|
13
27
|
* **Graceful Shutdown**: Workers listen for SIGINT/SIGTERM and attempt to finish active jobs within a grace period before exiting. Interrupted jobs are re-queued.
|
|
14
28
|
* **Worker Health Checks**: Workers periodically update a health key in Redis with a TTL, allowing monitoring systems to track active workers.
|
|
15
29
|
* **Deferred Execution**: Jobs can be scheduled to run at a future time using `_defer_by` or `_defer_until`.
|
|
16
30
|
* **Cron Jobs**: Periodic jobs can be defined in `RRQSettings.cron_jobs` using a simple cron syntax.
|
|
31
|
+
* **Comprehensive Monitoring**: Built-in CLI tools for monitoring queues, inspecting jobs, and debugging with real-time dashboards and beautiful table output.
|
|
32
|
+
* **Development Tools**: Debug commands for generating test data, stress testing, and cleaning up development environments.
|
|
17
33
|
|
|
18
34
|
- Using deferral with a specific `_job_id` will effectively reschedule the job associated with that ID to the new time, overwriting its previous definition and score. It does not create multiple distinct scheduled jobs with the same ID.
|
|
19
35
|
|
|
@@ -169,21 +185,21 @@ cron_jobs = [
|
|
|
169
185
|
args=["temp_files"],
|
|
170
186
|
kwargs={"max_age_days": 7}
|
|
171
187
|
),
|
|
172
|
-
|
|
188
|
+
|
|
173
189
|
# Weekly report every Monday at 9 AM
|
|
174
190
|
CronJob(
|
|
175
191
|
function_name="generate_weekly_report",
|
|
176
192
|
schedule="0 9 * * mon",
|
|
177
193
|
unique=True # Prevent duplicate reports if worker restarts
|
|
178
194
|
),
|
|
179
|
-
|
|
195
|
+
|
|
180
196
|
# Health check every 15 minutes on a specific queue
|
|
181
197
|
CronJob(
|
|
182
198
|
function_name="system_health_check",
|
|
183
199
|
schedule="*/15 * * * *",
|
|
184
200
|
queue_name="monitoring"
|
|
185
201
|
),
|
|
186
|
-
|
|
202
|
+
|
|
187
203
|
# Backup database every night at 1 AM
|
|
188
204
|
CronJob(
|
|
189
205
|
function_name="backup_database",
|
|
@@ -231,26 +247,149 @@ rrq_settings.job_registry = job_registry
|
|
|
231
247
|
|
|
232
248
|
**Note:** Cron jobs are automatically enqueued by the worker when they become due. The worker checks for due cron jobs every 30 seconds and enqueues them as regular jobs to be processed.
|
|
233
249
|
|
|
250
|
+
## Dead Letter Queue (DLQ) Management
|
|
251
|
+
|
|
252
|
+
RRQ uses a single global Dead Letter Queue to store jobs that have failed permanently. Jobs in the DLQ retain their original queue information, allowing for sophisticated filtering and management.
|
|
253
|
+
|
|
254
|
+
### DLQ Structure
|
|
255
|
+
|
|
256
|
+
- **Global DLQ**: One DLQ per RRQ instance (configurable via `default_dlq_name`)
|
|
257
|
+
- **Queue Preservation**: Each failed job remembers its original queue name
|
|
258
|
+
- **Filtering**: Jobs can be filtered by original queue, function name, error patterns, and time ranges
|
|
259
|
+
- **Inspection**: Full job details including arguments, errors, and execution timeline
|
|
260
|
+
|
|
261
|
+
### Common DLQ Workflows
|
|
262
|
+
|
|
263
|
+
#### Investigating Failures
|
|
264
|
+
```bash
|
|
265
|
+
# Get overall DLQ statistics
|
|
266
|
+
rrq dlq stats
|
|
267
|
+
|
|
268
|
+
# List recent failures from a specific queue
|
|
269
|
+
rrq dlq list --queue urgent --limit 10
|
|
270
|
+
|
|
271
|
+
# Group failures by function
|
|
272
|
+
rrq dlq list --function send_email
|
|
273
|
+
|
|
274
|
+
# Inspect a specific failed job
|
|
275
|
+
rrq dlq inspect job_abc123
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
#### Requeuing Failed Jobs
|
|
279
|
+
```bash
|
|
280
|
+
# Preview what would be requeued (dry run)
|
|
281
|
+
rrq dlq requeue --queue urgent --dry-run
|
|
282
|
+
|
|
283
|
+
# Requeue all failures from urgent queue
|
|
284
|
+
rrq dlq requeue --queue urgent --all
|
|
285
|
+
|
|
286
|
+
# Requeue specific function failures with limit
|
|
287
|
+
rrq dlq requeue --function send_email --limit 10
|
|
288
|
+
|
|
289
|
+
# Requeue single job to different queue
|
|
290
|
+
rrq dlq requeue --job-id abc123 --target-queue retry_queue
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
#### Monitoring DLQ in Real-time
|
|
294
|
+
```bash
|
|
295
|
+
# Monitor includes DLQ statistics panel
|
|
296
|
+
rrq monitor
|
|
297
|
+
|
|
298
|
+
# Queue stats show DLQ count per original queue
|
|
299
|
+
rrq queue stats
|
|
300
|
+
```
|
|
301
|
+
|
|
234
302
|
## Command Line Interface
|
|
235
303
|
|
|
236
|
-
RRQ provides a command-line interface (CLI) for managing workers
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
304
|
+
RRQ provides a comprehensive command-line interface (CLI) for managing workers, monitoring queues, and debugging.
|
|
305
|
+
|
|
306
|
+
📖 **[Full CLI Reference Documentation](docs/CLI_REFERENCE.md)**
|
|
307
|
+
|
|
308
|
+
### Quick Examples
|
|
309
|
+
```bash
|
|
310
|
+
# Use default settings (localhost Redis)
|
|
311
|
+
rrq queue list
|
|
312
|
+
|
|
313
|
+
# Use custom settings
|
|
314
|
+
rrq queue list --settings myapp.config.rrq_settings
|
|
315
|
+
|
|
316
|
+
# Use environment variable
|
|
317
|
+
export RRQ_SETTINGS=myapp.config.rrq_settings
|
|
318
|
+
rrq monitor
|
|
319
|
+
|
|
320
|
+
# Debug workflow
|
|
321
|
+
rrq debug generate-jobs --count 100 --queue urgent
|
|
322
|
+
rrq queue inspect urgent --limit 10
|
|
323
|
+
rrq monitor --queues urgent --refresh 0.5
|
|
324
|
+
|
|
325
|
+
# DLQ management workflow
|
|
326
|
+
rrq dlq list --queue urgent --limit 10 # List failed jobs from urgent queue
|
|
327
|
+
rrq dlq stats # Show DLQ statistics and error patterns
|
|
328
|
+
rrq dlq inspect <job_id> # Inspect specific failed job
|
|
329
|
+
rrq dlq requeue --queue urgent --dry-run # Preview requeue of urgent queue jobs
|
|
330
|
+
rrq dlq requeue --queue urgent --limit 5 # Requeue 5 jobs from urgent queue
|
|
331
|
+
|
|
332
|
+
# Advanced DLQ filtering and management
|
|
333
|
+
rrq dlq list --function send_email --limit 20 # List failed email jobs
|
|
334
|
+
rrq dlq list --queue urgent --function process_data # Filter by queue AND function
|
|
335
|
+
rrq dlq requeue --function send_email --all # Requeue all failed email jobs
|
|
336
|
+
rrq dlq requeue --job-id abc123 --target-queue retry # Requeue specific job to retry queue
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
## Performance and Limitations
|
|
340
|
+
|
|
341
|
+
### Monitoring Performance Considerations
|
|
342
|
+
|
|
343
|
+
RRQ's monitoring and statistics commands are designed for operational visibility but have some performance considerations for large-scale deployments:
|
|
344
|
+
|
|
345
|
+
#### Queue Statistics (`rrq queue stats`)
|
|
346
|
+
- **Pending Job Counts**: Very fast, uses Redis `ZCARD` operation
|
|
347
|
+
- **Active/Completed/Failed Counts**: Requires scanning job records in Redis which can be slow for large datasets
|
|
348
|
+
- **Optimization**: Use `--max-scan` parameter to limit scanning (default: 1,000 jobs)
|
|
349
|
+
```bash
|
|
350
|
+
# Fast scan for quick overview
|
|
351
|
+
rrq queue stats --max-scan 500
|
|
352
|
+
|
|
353
|
+
# Complete scan (may be slow)
|
|
354
|
+
rrq queue stats --max-scan 0
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
#### DLQ Operations (`rrq dlq`)
|
|
358
|
+
- **Job Listing**: Uses batch fetching with Redis pipelines for efficiency
|
|
359
|
+
- **Optimization**: Use `--batch-size` parameter to control memory vs. performance trade-offs
|
|
360
|
+
```bash
|
|
361
|
+
# Smaller batches for memory-constrained environments
|
|
362
|
+
rrq dlq list --batch-size 50
|
|
363
|
+
|
|
364
|
+
# Larger batches for better performance
|
|
365
|
+
rrq dlq list --batch-size 200
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
#### Real-time Monitoring (`rrq monitor`)
|
|
369
|
+
- **Error Message Truncation**: Newest errors truncated to 50 characters, error patterns to 50 characters for display consistency
|
|
370
|
+
- **DLQ Statistics**: Updates in real-time but may impact Redis performance with very large DLQs
|
|
371
|
+
|
|
372
|
+
### Full Metrics Requirements
|
|
373
|
+
|
|
374
|
+
For comprehensive job lifecycle tracking and historical analytics, consider these architectural additions:
|
|
375
|
+
|
|
376
|
+
1. **Job History Tracking**:
|
|
377
|
+
- Store completed/failed job summaries in a separate Redis structure or external database
|
|
378
|
+
- Implement job completion event logging for time-series analytics
|
|
379
|
+
|
|
380
|
+
2. **Active Job Monitoring**:
|
|
381
|
+
- Enhanced worker health tracking with job-level visibility
|
|
382
|
+
- Real-time active job registry for immediate status reporting
|
|
383
|
+
|
|
384
|
+
3. **Throughput Calculation**:
|
|
385
|
+
- Time-series data collection for accurate throughput metrics
|
|
386
|
+
- Queue-specific performance trend tracking
|
|
387
|
+
|
|
388
|
+
4. **Scalable Statistics**:
|
|
389
|
+
- Consider Redis Streams or time-series databases for high-frequency job event tracking
|
|
390
|
+
- Implement sampling strategies for large-scale deployments
|
|
391
|
+
|
|
392
|
+
The current implementation prioritizes operational simplicity and immediate visibility over comprehensive historical analytics. For production monitoring at scale, complement RRQ's built-in tools with external monitoring systems.
|
|
254
393
|
|
|
255
394
|
## Configuration
|
|
256
395
|
|
|
@@ -264,6 +403,48 @@ RRQ can be configured in several ways, with the following precedence:
|
|
|
264
403
|
|
|
265
404
|
**Important Note on `job_registry`**: The `job_registry` attribute in your `RRQSettings` object is **critical** for RRQ to function. It must be an instance of `JobRegistry` and is used to register job handlers. Without a properly configured `job_registry`, workers will not know how to process jobs, and most operations will fail. Ensure it is set in your settings object to map job names to their respective handler functions.
|
|
266
405
|
|
|
406
|
+
### Comprehensive CLI Command System
|
|
407
|
+
- **New modular CLI architecture** with dedicated command modules for better organization
|
|
408
|
+
- **Enhanced monitoring capabilities** with real-time dashboards and beautiful table output
|
|
409
|
+
- **Extensive DLQ management** commands for inspecting, filtering, and requeuing failed jobs
|
|
410
|
+
- **Job lifecycle management** with detailed inspection and control commands
|
|
411
|
+
- **Queue management** with statistics, purging, and migration capabilities
|
|
412
|
+
- **Debug utilities** for development and testing including stress testing and data generation
|
|
413
|
+
|
|
414
|
+
## 📚 New CLI Commands
|
|
415
|
+
|
|
416
|
+
### Monitor Commands
|
|
417
|
+
- `rrq monitor` - Real-time dashboard with queue stats, worker health, and DLQ monitoring
|
|
418
|
+
- `rrq monitor workers` - Detailed worker status and health monitoring
|
|
419
|
+
- `rrq monitor jobs` - Active job tracking and monitoring
|
|
420
|
+
|
|
421
|
+
### DLQ Commands
|
|
422
|
+
- `rrq dlq list` - List failed jobs with filtering by queue, function, and time
|
|
423
|
+
- `rrq dlq stats` - DLQ statistics including error patterns and queue distribution
|
|
424
|
+
- `rrq dlq inspect` - Detailed inspection of failed jobs
|
|
425
|
+
- `rrq dlq requeue` - Requeue failed jobs with dry-run support
|
|
426
|
+
- `rrq dlq purge` - Clean up old failed jobs
|
|
427
|
+
|
|
428
|
+
### Queue Commands
|
|
429
|
+
- `rrq queue list` - List all queues with job counts
|
|
430
|
+
- `rrq queue stats` - Detailed queue statistics and throughput metrics
|
|
431
|
+
- `rrq queue inspect` - Inspect pending jobs in queues
|
|
432
|
+
- `rrq queue purge` - Purge jobs from queues with safety confirmations
|
|
433
|
+
- `rrq queue migrate` - Move jobs between queues
|
|
434
|
+
|
|
435
|
+
### Job Commands
|
|
436
|
+
- `rrq job list` - List jobs with status filtering
|
|
437
|
+
- `rrq job inspect` - Detailed job information including timeline
|
|
438
|
+
- `rrq job result` - Retrieve job results
|
|
439
|
+
- `rrq job cancel` - Cancel active jobs
|
|
440
|
+
- `rrq job retry` - Manually retry failed jobs
|
|
441
|
+
- `rrq job delete` - Delete job records
|
|
442
|
+
|
|
443
|
+
### Debug Commands
|
|
444
|
+
- `rrq debug generate-jobs` - Generate test jobs for development
|
|
445
|
+
- `rrq debug stress-test` - Stress test the system
|
|
446
|
+
- `rrq debug cleanup` - Clean up test data
|
|
447
|
+
- `rrq debug redis-info` - Redis server information and diagnostics
|
|
267
448
|
|
|
268
449
|
## Core Components
|
|
269
450
|
|
|
@@ -272,4 +453,4 @@ RRQ can be configured in several ways, with the following precedence:
|
|
|
272
453
|
* **`JobRegistry` (`registry.py`)**: A simple registry to map string function names (used when enqueuing) to the actual asynchronous handler functions the worker should execute.
|
|
273
454
|
* **`JobStore` (`store.py`)**: An abstraction layer handling all direct interactions with Redis. It manages job definitions (Hashes), queues (Sorted Sets), processing locks (Strings with TTL), unique job locks, and worker health checks.
|
|
274
455
|
* **`Job` (`job.py`)**: A Pydantic model representing a job, containing its ID, handler name, arguments, status, retry counts, timestamps, results, etc.
|
|
275
|
-
* **`JobStatus` (`job.py`)**: An Enum defining the possible states of a job (`PENDING`, `ACTIVE`, `COMPLETED`, `FAILED`, `
|
|
456
|
+
* **`JobStatus` (`job.py`)**: An Enum defining the possible states of a job (`PENDING`, `ACTIVE`, `COMPLETED`, `FAILED`, `DEFERRED`). `
|