baqueue 1.0.1__tar.gz → 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {baqueue-1.0.1/baqueue.egg-info → baqueue-1.1.0}/PKG-INFO +18 -1
- {baqueue-1.0.1 → baqueue-1.1.0}/README.md +16 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/__init__.py +1 -1
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/cli.py +31 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/config.py +8 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/dashboard/api.py +8 -1
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/dashboard/server.py +5 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/dashboard/static/app.js +38 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/dashboard/static/index.html +62 -18
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/dashboard/static/style.css +17 -1
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/drivers/base.py +58 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/drivers/memory_driver.py +15 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/drivers/postgres_driver.py +18 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/drivers/redis_driver.py +225 -24
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/drivers/sqlite_driver.py +17 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/pruner.py +18 -9
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/queue.py +2 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/serializer.py +13 -3
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/worker.py +42 -1
- {baqueue-1.0.1 → baqueue-1.1.0/baqueue.egg-info}/PKG-INFO +18 -1
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue.egg-info/requires.txt +1 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/pyproject.toml +1 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/LICENSE +0 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/MANIFEST.in +0 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/balancer.py +0 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/batch.py +0 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/dashboard/__init__.py +0 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/drivers/__init__.py +0 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/events.py +0 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/job.py +0 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/retry.py +0 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/scheduler.py +0 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/supervisor.py +0 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue.egg-info/SOURCES.txt +0 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue.egg-info/dependency_links.txt +0 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue.egg-info/entry_points.txt +0 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/baqueue.egg-info/top_level.txt +0 -0
- {baqueue-1.0.1 → baqueue-1.1.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: baqueue
|
|
3
|
-
Version: 1.0
|
|
3
|
+
Version: 1.1.0
|
|
4
4
|
Summary: A powerful Python queue management package inspired by Laravel Horizon
|
|
5
5
|
Author: Basalam, BaQueue Contributors
|
|
6
6
|
License: MIT
|
|
@@ -45,6 +45,7 @@ Provides-Extra: dev
|
|
|
45
45
|
Requires-Dist: baqueue[all]; extra == "dev"
|
|
46
46
|
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
47
47
|
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
48
|
+
Requires-Dist: fakeredis>=2.21; extra == "dev"
|
|
48
49
|
Requires-Dist: build>=1.0; extra == "dev"
|
|
49
50
|
Requires-Dist: twine>=5.0; extra == "dev"
|
|
50
51
|
Dynamic: license-file
|
|
@@ -274,6 +275,21 @@ await Queue.prune(status="completed", hours=24)
|
|
|
274
275
|
await Queue.prune(tag="batch:newsletter")
|
|
275
276
|
```
|
|
276
277
|
|
|
278
|
+
#### Redis index health
|
|
279
|
+
|
|
280
|
+
The Redis driver keeps secondary indexes (sorted sets) so the dashboard can list and
|
|
281
|
+
count jobs by queue/status efficiently. All deletes go through an index-consistent path
|
|
282
|
+
that removes the job hash *and* every index entry in one atomic step, so the indexes stay
|
|
283
|
+
bounded. If entries are ever orphaned out-of-band (e.g. job hashes deleted directly via
|
|
284
|
+
`redis-cli`), pruning reaps them automatically, and you can force a full repair:
|
|
285
|
+
|
|
286
|
+
```bash
|
|
287
|
+
baqueue reconcile-indexes -d redis --driver-url redis://localhost:6379/0
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
Set `reconcile_on_connect=True` to run that repair once on every startup (off by default
|
|
291
|
+
to keep connect fast on large datasets).
|
|
292
|
+
|
|
277
293
|
### Retry Failed Jobs
|
|
278
294
|
|
|
279
295
|
Bulk-retry failed jobs from the CLI, from Python, or from the dashboard.
|
|
@@ -508,6 +524,7 @@ baqueue schedule Start the job scheduler
|
|
|
508
524
|
baqueue dashboard Launch the monitoring dashboard
|
|
509
525
|
baqueue prune Prune old jobs
|
|
510
526
|
baqueue retry-failed Retry all failed jobs (filter by queue/tag/age)
|
|
527
|
+
baqueue reconcile-indexes Repair Redis secondary indexes (drop stale entries)
|
|
511
528
|
baqueue status Show queue status
|
|
512
529
|
baqueue test Run the test suite
|
|
513
530
|
```
|
|
@@ -223,6 +223,21 @@ await Queue.prune(status="completed", hours=24)
|
|
|
223
223
|
await Queue.prune(tag="batch:newsletter")
|
|
224
224
|
```
|
|
225
225
|
|
|
226
|
+
#### Redis index health
|
|
227
|
+
|
|
228
|
+
The Redis driver keeps secondary indexes (sorted sets) so the dashboard can list and
|
|
229
|
+
count jobs by queue/status efficiently. All deletes go through an index-consistent path
|
|
230
|
+
that removes the job hash *and* every index entry in one atomic step, so the indexes stay
|
|
231
|
+
bounded. If entries are ever orphaned out-of-band (e.g. job hashes deleted directly via
|
|
232
|
+
`redis-cli`), pruning reaps them automatically, and you can force a full repair:
|
|
233
|
+
|
|
234
|
+
```bash
|
|
235
|
+
baqueue reconcile-indexes -d redis --driver-url redis://localhost:6379/0
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
Set `reconcile_on_connect=True` to run that repair once on every startup (off by default
|
|
239
|
+
to keep connect fast on large datasets).
|
|
240
|
+
|
|
226
241
|
### Retry Failed Jobs
|
|
227
242
|
|
|
228
243
|
Bulk-retry failed jobs from the CLI, from Python, or from the dashboard.
|
|
@@ -457,6 +472,7 @@ baqueue schedule Start the job scheduler
|
|
|
457
472
|
baqueue dashboard Launch the monitoring dashboard
|
|
458
473
|
baqueue prune Prune old jobs
|
|
459
474
|
baqueue retry-failed Retry all failed jobs (filter by queue/tag/age)
|
|
475
|
+
baqueue reconcile-indexes Repair Redis secondary indexes (drop stale entries)
|
|
460
476
|
baqueue status Show queue status
|
|
461
477
|
baqueue test Run the test suite
|
|
462
478
|
```
|
|
@@ -365,6 +365,37 @@ async def _run_retry_failed(
|
|
|
365
365
|
await Queue.disconnect()
|
|
366
366
|
|
|
367
367
|
|
|
368
|
+
@cli.command(name="reconcile-indexes")
|
|
369
|
+
@click.option("--batch", default=500, type=int, help="Index entries scanned per batch.")
|
|
370
|
+
@click.option("--driver", "-d", default="redis", help="Driver name (sqlite, memory, redis, postgres).")
|
|
371
|
+
@click.option("--driver-url", default=None, help="Driver connection URL.")
|
|
372
|
+
@click.pass_context
|
|
373
|
+
def reconcile_indexes(
|
|
374
|
+
ctx: click.Context,
|
|
375
|
+
batch: int,
|
|
376
|
+
driver: str,
|
|
377
|
+
driver_url: str | None,
|
|
378
|
+
) -> None:
|
|
379
|
+
"""Repair secondary indexes: remove entries pointing at jobs that no longer exist.
|
|
380
|
+
|
|
381
|
+
Only the Redis driver maintains secondary indexes; this is a no-op elsewhere."""
|
|
382
|
+
_validate_driver(driver)
|
|
383
|
+
config: BaQueueConfig = ctx.obj["config"]
|
|
384
|
+
config.driver = DriverConfig(name=driver, url=driver_url or "")
|
|
385
|
+
|
|
386
|
+
removed = _run_async(_run_reconcile_indexes, config, batch)
|
|
387
|
+
click.echo(f"Removed {removed or 0} stale index entr{'y' if removed == 1 else 'ies'}.")
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
async def _run_reconcile_indexes(config: BaQueueConfig, batch: int) -> int:
|
|
391
|
+
Queue.configure(config)
|
|
392
|
+
await Queue.connect()
|
|
393
|
+
try:
|
|
394
|
+
return await Queue.get_driver().reconcile_indexes(batch=batch)
|
|
395
|
+
finally:
|
|
396
|
+
await Queue.disconnect()
|
|
397
|
+
|
|
398
|
+
|
|
368
399
|
@cli.command()
|
|
369
400
|
@click.option("--driver", "-d", default="sqlite", help="Driver name (sqlite, memory, redis, postgres).")
|
|
370
401
|
@click.option("--driver-url", default=None, help="Driver connection URL.")
|
|
@@ -59,6 +59,14 @@ class BaQueueConfig(BaseModel):
|
|
|
59
59
|
prune_completed_seconds: int = 5 # delete completed jobs ~5s after completion
|
|
60
60
|
prune_other_seconds: int = 86400 # 1 day — applies to failed + cancelled
|
|
61
61
|
prune_metrics_seconds: int = 604800 # 7 days
|
|
62
|
+
# Per-call cap for index-consistent bulk deletes; the pruner loops to drain.
|
|
63
|
+
prune_batch_size: int = 1000
|
|
64
|
+
|
|
65
|
+
# ── Secondary-index reconciliation (Redis) ─────────────────
|
|
66
|
+
# When True, connect() runs a one-shot reconcile pass that removes index
|
|
67
|
+
# entries pointing at jobs that no longer exist. Off by default — run on
|
|
68
|
+
# demand via `baqueue reconcile-indexes` to keep startup fast.
|
|
69
|
+
reconcile_on_connect: bool = False
|
|
62
70
|
|
|
63
71
|
# ── Legacy hour-based overrides (kept for back-compat) ──────
|
|
64
72
|
# When > 0, these take precedence over the seconds fields above for the
|
|
@@ -117,7 +117,10 @@ class DashboardAPI:
|
|
|
117
117
|
created_from=created_from, created_to=created_to,
|
|
118
118
|
)
|
|
119
119
|
return {
|
|
120
|
-
|
|
120
|
+
# The list view never renders per-attempt history (the modal fetches
|
|
121
|
+
# job_detail for that), so omit it to keep the list and the live
|
|
122
|
+
# /ws/jobs push lean.
|
|
123
|
+
"jobs": [j.to_dict(include_history=False) for j in jobs],
|
|
121
124
|
"page": page,
|
|
122
125
|
"per_page": per_page,
|
|
123
126
|
"count": len(jobs),
|
|
@@ -128,6 +131,10 @@ class DashboardAPI:
|
|
|
128
131
|
job = await self.driver.get_job(job_id)
|
|
129
132
|
return job.to_dict() if job else None
|
|
130
133
|
|
|
134
|
+
async def promote_job(self, job_id: str) -> bool:
|
|
135
|
+
"""Make a scheduled/pending job runnable immediately. Returns True on success."""
|
|
136
|
+
return await self.driver.promote(job_id)
|
|
137
|
+
|
|
131
138
|
async def retry_job(self, job_id: str) -> bool:
|
|
132
139
|
job = await self.driver.get_job(job_id)
|
|
133
140
|
if not job or job.status != "failed":
|
|
@@ -150,6 +150,11 @@ def create_app(driver: BaseDriver, config: Optional[BaQueueConfig] = None) -> An
|
|
|
150
150
|
ok = await api.retry_job(job_id)
|
|
151
151
|
return JSONResponse({"success": ok})
|
|
152
152
|
|
|
153
|
+
@app.post("/api/jobs/{job_id}/execute")
|
|
154
|
+
async def execute_job(job_id: str):
|
|
155
|
+
ok = await api.promote_job(job_id)
|
|
156
|
+
return JSONResponse({"success": ok})
|
|
157
|
+
|
|
153
158
|
@app.delete("/api/jobs/{job_id}")
|
|
154
159
|
async def delete_job(job_id: str):
|
|
155
160
|
ok = await api.delete_job(job_id)
|
|
@@ -322,6 +322,14 @@ document.addEventListener("alpine:init", () => {
|
|
|
322
322
|
this.fetchOverview();
|
|
323
323
|
},
|
|
324
324
|
|
|
325
|
+
async executeJob(jobId) {
|
|
326
|
+
// Promote a scheduled/pending job so it runs immediately.
|
|
327
|
+
await fetch(`/api/jobs/${jobId}/execute`, { method: "POST" });
|
|
328
|
+
this.closeModal();
|
|
329
|
+
this.fetchJobs();
|
|
330
|
+
this.fetchOverview();
|
|
331
|
+
},
|
|
332
|
+
|
|
325
333
|
async retryAllFailed() {
|
|
326
334
|
const parts = [];
|
|
327
335
|
if (this.jobsFilter.queue) parts.push(`queue "${this.jobsFilter.queue}"`);
|
|
@@ -437,6 +445,36 @@ document.addEventListener("alpine:init", () => {
|
|
|
437
445
|
return Math.floor(diff / 60) + "m " + Math.floor(diff % 60) + "s";
|
|
438
446
|
},
|
|
439
447
|
|
|
448
|
+
// ── Per-attempt timeline ────────────────────────────────
|
|
449
|
+
|
|
450
|
+
attemptHistory(job) {
|
|
451
|
+
return job && Array.isArray(job.history) ? job.history : [];
|
|
452
|
+
},
|
|
453
|
+
|
|
454
|
+
hasHistory(job) {
|
|
455
|
+
return this.attemptHistory(job).length > 0;
|
|
456
|
+
},
|
|
457
|
+
|
|
458
|
+
// A job currently processing has an in-flight attempt that isn't recorded in
|
|
459
|
+
// history yet (entries are appended only when an attempt concludes).
|
|
460
|
+
inFlightAttempt(job) {
|
|
461
|
+
return !!(job && job.status === "processing" && job.started_at);
|
|
462
|
+
},
|
|
463
|
+
|
|
464
|
+
attemptDotClass(entry) {
|
|
465
|
+
return entry && entry.status === "completed" ? "completed" : "failed";
|
|
466
|
+
},
|
|
467
|
+
|
|
468
|
+
attemptDuration(entry) {
|
|
469
|
+
if (!entry || !entry.started_at || !entry.finished_at) return "";
|
|
470
|
+
const diff = entry.finished_at - entry.started_at;
|
|
471
|
+
if (diff < 0) return "";
|
|
472
|
+
if (diff < 0.001) return "<1ms";
|
|
473
|
+
if (diff < 1) return Math.round(diff * 1000) + "ms";
|
|
474
|
+
if (diff < 60) return diff.toFixed(1) + "s";
|
|
475
|
+
return Math.floor(diff / 60) + "m " + Math.floor(diff % 60) + "s";
|
|
476
|
+
},
|
|
477
|
+
|
|
440
478
|
shortId(id) {
|
|
441
479
|
return id ? id.substring(0, 12) : "-";
|
|
442
480
|
},
|
|
@@ -511,27 +511,67 @@
|
|
|
511
511
|
<span class="tl-time" x-text="formatTimeFull(selectedJob.delay_until)"></span>
|
|
512
512
|
</div>
|
|
513
513
|
</div>
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
514
|
+
<!-- Per-attempt history (jobs that ran at least once on a
|
|
515
|
+
driver that persists history). Each backoff retry is its
|
|
516
|
+
own entry. -->
|
|
517
|
+
<template x-for="(entry, idx) in attemptHistory(selectedJob)" :key="idx">
|
|
518
|
+
<div class="tl-item">
|
|
519
|
+
<div class="tl-dot" :class="attemptDotClass(entry)"></div>
|
|
520
|
+
<div class="tl-content">
|
|
521
|
+
<span class="tl-label">
|
|
522
|
+
Attempt <span x-text="entry.attempt"></span> ·
|
|
523
|
+
<span x-text="entry.status"></span>
|
|
524
|
+
<span class="tl-dur" x-show="attemptDuration(entry)" x-text="'(' + attemptDuration(entry) + ')'"></span>
|
|
525
|
+
</span>
|
|
526
|
+
<span class="tl-time" x-text="formatTimeFull(entry.started_at) + (entry.finished_at ? ' → ' + formatTimeFull(entry.finished_at) : '')"></span>
|
|
527
|
+
<span class="tl-retry" x-show="entry.will_retry">
|
|
528
|
+
Retry scheduled <span x-text="entry.next_retry_at ? scheduledIn(entry.next_retry_at) : ''"></span>
|
|
529
|
+
</span>
|
|
530
|
+
<pre class="tl-error" x-show="entry.error" x-text="entry.error"></pre>
|
|
531
|
+
</div>
|
|
519
532
|
</div>
|
|
520
|
-
</
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
<
|
|
533
|
+
</template>
|
|
534
|
+
<!-- The currently-running attempt is not recorded in history
|
|
535
|
+
until it concludes, so surface it live. -->
|
|
536
|
+
<template x-if="inFlightAttempt(selectedJob)">
|
|
537
|
+
<div class="tl-item">
|
|
538
|
+
<div class="tl-dot processing"></div>
|
|
539
|
+
<div class="tl-content">
|
|
540
|
+
<span class="tl-label">Attempt <span x-text="selectedJob.attempts"></span> · running…</span>
|
|
541
|
+
<span class="tl-time" x-text="formatTimeFull(selectedJob.started_at)"></span>
|
|
542
|
+
</div>
|
|
526
543
|
</div>
|
|
527
|
-
</
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
544
|
+
</template>
|
|
545
|
+
|
|
546
|
+
<!-- Legacy single-attempt timeline: jobs created before history
|
|
547
|
+
tracking, or on drivers that don't persist history. -->
|
|
548
|
+
<template x-if="!hasHistory(selectedJob) && !inFlightAttempt(selectedJob) && selectedJob.started_at">
|
|
549
|
+
<div class="tl-item">
|
|
550
|
+
<div class="tl-dot processing"></div>
|
|
551
|
+
<div class="tl-content">
|
|
552
|
+
<span class="tl-label">Started</span>
|
|
553
|
+
<span class="tl-time" x-text="formatTimeFull(selectedJob.started_at)"></span>
|
|
554
|
+
</div>
|
|
533
555
|
</div>
|
|
534
|
-
</
|
|
556
|
+
</template>
|
|
557
|
+
<template x-if="!hasHistory(selectedJob) && selectedJob.completed_at">
|
|
558
|
+
<div class="tl-item">
|
|
559
|
+
<div class="tl-dot completed"></div>
|
|
560
|
+
<div class="tl-content">
|
|
561
|
+
<span class="tl-label">Completed</span>
|
|
562
|
+
<span class="tl-time" x-text="formatTimeFull(selectedJob.completed_at)"></span>
|
|
563
|
+
</div>
|
|
564
|
+
</div>
|
|
565
|
+
</template>
|
|
566
|
+
<template x-if="!hasHistory(selectedJob) && selectedJob.failed_at">
|
|
567
|
+
<div class="tl-item">
|
|
568
|
+
<div class="tl-dot failed"></div>
|
|
569
|
+
<div class="tl-content">
|
|
570
|
+
<span class="tl-label">Failed</span>
|
|
571
|
+
<span class="tl-time" x-text="formatTimeFull(selectedJob.failed_at)"></span>
|
|
572
|
+
</div>
|
|
573
|
+
</div>
|
|
574
|
+
</template>
|
|
535
575
|
</div>
|
|
536
576
|
</div>
|
|
537
577
|
|
|
@@ -563,6 +603,10 @@
|
|
|
563
603
|
</div>
|
|
564
604
|
|
|
565
605
|
<div class="modal-actions">
|
|
606
|
+
<button class="btn-primary" x-show="isScheduled(selectedJob)" @click="executeJob(selectedJob.id)">
|
|
607
|
+
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" width="16" height="16"><polygon points="5 3 19 12 5 21 5 3"/></svg>
|
|
608
|
+
Execute Now
|
|
609
|
+
</button>
|
|
566
610
|
<button class="btn-primary" x-show="selectedJob.status === 'failed'" @click="retryJob(selectedJob.id)">
|
|
567
611
|
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" width="16" height="16"><polyline points="23 4 23 10 17 10"/><path d="M20.49 15a9 9 0 11-2.12-9.36L23 10"/></svg>
|
|
568
612
|
Retry Job
|
|
@@ -1267,8 +1267,24 @@ body {
|
|
|
1267
1267
|
.tl-dot.failed { border-color: var(--red); background: var(--red); }
|
|
1268
1268
|
|
|
1269
1269
|
.tl-content { display: flex; flex-direction: column; gap: 1px; }
|
|
1270
|
-
.tl-label { font-size: 13px; font-weight: 600; }
|
|
1270
|
+
.tl-label { font-size: 13px; font-weight: 600; text-transform: capitalize; }
|
|
1271
1271
|
.tl-time { font-size: 12px; color: var(--text-muted); font-family: 'JetBrains Mono', monospace; }
|
|
1272
|
+
.tl-dur { font-weight: 400; color: var(--text-muted); }
|
|
1273
|
+
.tl-retry { font-size: 12px; color: var(--amber); }
|
|
1274
|
+
.tl-error {
|
|
1275
|
+
margin: 4px 0 0;
|
|
1276
|
+
padding: 6px 8px;
|
|
1277
|
+
font-size: 11px;
|
|
1278
|
+
font-family: 'JetBrains Mono', monospace;
|
|
1279
|
+
color: var(--red);
|
|
1280
|
+
background: var(--bg-surface);
|
|
1281
|
+
border: 1px solid var(--border);
|
|
1282
|
+
border-radius: 6px;
|
|
1283
|
+
white-space: pre-wrap;
|
|
1284
|
+
word-break: break-word;
|
|
1285
|
+
max-height: 140px;
|
|
1286
|
+
overflow: auto;
|
|
1287
|
+
}
|
|
1272
1288
|
|
|
1273
1289
|
/* ── Tags ───────────────────────────────────────────────── */
|
|
1274
1290
|
|
|
@@ -10,6 +10,10 @@ from baqueue.serializer import JobPayload
|
|
|
10
10
|
|
|
11
11
|
logger = logging.getLogger("baqueue.driver")
|
|
12
12
|
|
|
13
|
+
# Default per-call cap for batched bulk-delete / prune operations. Keeps a single
|
|
14
|
+
# call from blocking the backend on very large datasets; callers loop to drain.
|
|
15
|
+
DEFAULT_PRUNE_BATCH = 1000
|
|
16
|
+
|
|
13
17
|
|
|
14
18
|
class BaseDriver(ABC):
|
|
15
19
|
"""Every BaQueue driver must implement this interface."""
|
|
@@ -18,6 +22,11 @@ class BaseDriver(ABC):
|
|
|
18
22
|
# an emergency cleanup and one retry. Wired from BaQueueConfig in queue.py.
|
|
19
23
|
auto_cleanup_on_disk_full: bool = True
|
|
20
24
|
|
|
25
|
+
# When True, connect() runs a one-shot reconcile_indexes() pass to heal any
|
|
26
|
+
# secondary-index drift accumulated while offline. Off by default so connect
|
|
27
|
+
# stays fast on large datasets. Wired from BaQueueConfig in queue.py.
|
|
28
|
+
reconcile_on_connect: bool = False
|
|
29
|
+
|
|
21
30
|
# Re-entrancy guard so emergency_cleanup() doesn't recurse if its own
|
|
22
31
|
# prune calls also hit disk-full.
|
|
23
32
|
_in_emergency_cleanup: bool = False
|
|
@@ -106,6 +115,20 @@ class BaseDriver(ABC):
|
|
|
106
115
|
@abstractmethod
|
|
107
116
|
async def delete(self, job_id: str) -> None: ...
|
|
108
117
|
|
|
118
|
+
async def promote(self, job_id: str) -> bool:
|
|
119
|
+
"""Make a scheduled/pending job runnable immediately (clear its delay).
|
|
120
|
+
|
|
121
|
+
Returns True if the job was promoted, False if it does not exist or is not
|
|
122
|
+
in the ``pending`` state. Concrete (non-abstract) so existing third-party
|
|
123
|
+
drivers keep working; the built-in drivers override it with a race-safe,
|
|
124
|
+
index-aware version. The default relies on ``release(delay=0)`` to enqueue
|
|
125
|
+
the job for immediate processing."""
|
|
126
|
+
job = await self.get_job(job_id)
|
|
127
|
+
if job is None or job.status != "pending":
|
|
128
|
+
return False
|
|
129
|
+
await self.release(job, delay=0)
|
|
130
|
+
return True
|
|
131
|
+
|
|
109
132
|
# ── Query ───────────────────────────────────────────────────
|
|
110
133
|
|
|
111
134
|
@abstractmethod
|
|
@@ -193,6 +216,41 @@ class BaseDriver(ABC):
|
|
|
193
216
|
"""Delete matching jobs. Returns count of pruned jobs."""
|
|
194
217
|
...
|
|
195
218
|
|
|
219
|
+
async def bulk_delete_jobs(self, job_ids: list[str], *, limit: int | None = None) -> int:
|
|
220
|
+
"""Delete an explicit list of jobs, keeping any secondary indexes consistent.
|
|
221
|
+
|
|
222
|
+
Default implementation deletes one id at a time via ``delete``; drivers with
|
|
223
|
+
secondary indexes (Redis) override this with an atomic, batched version that
|
|
224
|
+
also reaps orphaned index entries. Returns the count of ids processed."""
|
|
225
|
+
if limit is not None:
|
|
226
|
+
job_ids = job_ids[:limit]
|
|
227
|
+
for job_id in job_ids:
|
|
228
|
+
await self.delete(job_id)
|
|
229
|
+
return len(job_ids)
|
|
230
|
+
|
|
231
|
+
async def prune_terminal_jobs(
|
|
232
|
+
self,
|
|
233
|
+
queue: str | None = None,
|
|
234
|
+
status: str | None = None,
|
|
235
|
+
*,
|
|
236
|
+
older_than: float | None = None,
|
|
237
|
+
limit: int = DEFAULT_PRUNE_BATCH,
|
|
238
|
+
) -> int:
|
|
239
|
+
"""Index-consistent bulk delete of terminal jobs, capped at ``limit`` per call.
|
|
240
|
+
|
|
241
|
+
Default implementation delegates to ``prune``; the Redis driver overrides it to
|
|
242
|
+
use its status index as the work source, reap orphaned index entries, and bound
|
|
243
|
+
the per-call cost. Callers loop until a pass returns fewer than ``limit``."""
|
|
244
|
+
return await self.prune(status=status, queue=queue, older_than_seconds=older_than)
|
|
245
|
+
|
|
246
|
+
async def reconcile_indexes(self, batch: int = 500) -> int:
|
|
247
|
+
"""Repair secondary indexes by removing entries whose job no longer exists.
|
|
248
|
+
|
|
249
|
+
No-op for drivers without secondary indexes (memory/sqlite/postgres). The Redis
|
|
250
|
+
driver overrides this to walk its index ZSETs and ZREM orphaned ids. Returns the
|
|
251
|
+
number of stale index entries removed."""
|
|
252
|
+
return 0
|
|
253
|
+
|
|
196
254
|
@abstractmethod
|
|
197
255
|
async def flush(self, queue: str | None = None) -> None:
|
|
198
256
|
"""Remove all jobs (optionally for a specific queue)."""
|
|
@@ -116,6 +116,21 @@ class MemoryDriver(BaseDriver):
|
|
|
116
116
|
if job_id in self._delayed:
|
|
117
117
|
self._delayed.remove(job_id)
|
|
118
118
|
|
|
119
|
+
async def promote(self, job_id: str) -> bool:
|
|
120
|
+
async with self._lock:
|
|
121
|
+
payload = self._jobs.get(job_id)
|
|
122
|
+
if payload is None or payload.status != "pending":
|
|
123
|
+
return False
|
|
124
|
+
payload.delay_until = None
|
|
125
|
+
payload.updated_at = _now_ts()
|
|
126
|
+
if job_id in self._delayed:
|
|
127
|
+
self._delayed.remove(job_id)
|
|
128
|
+
# Only enqueue if it isn't already ready, so promoting a non-delayed
|
|
129
|
+
# pending job can never duplicate it in the ready list.
|
|
130
|
+
if job_id not in self._queues[payload.queue]:
|
|
131
|
+
self._queues[payload.queue].append(job_id)
|
|
132
|
+
return True
|
|
133
|
+
|
|
119
134
|
# ── Query ───────────────────────────────────────────────────
|
|
120
135
|
|
|
121
136
|
async def get_job(self, job_id: str) -> JobPayload | None:
|
|
@@ -324,6 +324,24 @@ class PostgresDriver(BaseDriver):
|
|
|
324
324
|
|
|
325
325
|
await self._with_disk_full_recovery(_do)
|
|
326
326
|
|
|
327
|
+
async def promote(self, job_id: str) -> bool:
|
|
328
|
+
now = _now_ts()
|
|
329
|
+
|
|
330
|
+
async def _do():
|
|
331
|
+
async with self._pool.acquire() as conn:
|
|
332
|
+
# Clearing delay_until is enough: pop() already accepts a pending
|
|
333
|
+
# row whose delay_until IS NULL or has elapsed.
|
|
334
|
+
return await conn.fetchrow(
|
|
335
|
+
f"""UPDATE {self._jobs_table}
|
|
336
|
+
SET delay_until=NULL, updated_at=$1
|
|
337
|
+
WHERE id=$2 AND status='pending'
|
|
338
|
+
RETURNING id""",
|
|
339
|
+
now, job_id,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
row = await self._with_disk_full_recovery(_do)
|
|
343
|
+
return row is not None
|
|
344
|
+
|
|
327
345
|
# ── Query ───────────────────────────────────────────────────
|
|
328
346
|
|
|
329
347
|
async def get_job(self, job_id: str) -> JobPayload | None:
|
|
@@ -6,11 +6,16 @@ import json
|
|
|
6
6
|
import logging
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from baqueue.drivers.base import BaseDriver
|
|
9
|
+
from baqueue.drivers.base import DEFAULT_PRUNE_BATCH, BaseDriver
|
|
10
10
|
from baqueue.serializer import JobPayload, _now_ts
|
|
11
11
|
|
|
12
12
|
logger = logging.getLogger("baqueue.redis")
|
|
13
13
|
|
|
14
|
+
# Every status a job hash can carry. Used when reaping orphaned index entries:
|
|
15
|
+
# the job hash is gone, so we can't read its status — we ZREM from every global
|
|
16
|
+
# status index to be sure the stale id is cleared.
|
|
17
|
+
_ALL_STATUSES = ("pending", "processing", "completed", "failed", "cancelled")
|
|
18
|
+
|
|
14
19
|
|
|
15
20
|
class RedisDriver(BaseDriver):
|
|
16
21
|
"""Redis-backed driver using sorted sets for indexed pagination.
|
|
@@ -88,6 +93,10 @@ class RedisDriver(BaseDriver):
|
|
|
88
93
|
self._redis = aioredis.from_url(self._url, decode_responses=True, **self._kwargs)
|
|
89
94
|
await self._redis.ping()
|
|
90
95
|
await self._backfill_indexes_if_needed()
|
|
96
|
+
if self.reconcile_on_connect:
|
|
97
|
+
removed = await self.reconcile_indexes()
|
|
98
|
+
if removed:
|
|
99
|
+
logger.info("reconcile_on_connect removed %d stale index entr(ies)", removed)
|
|
91
100
|
|
|
92
101
|
async def disconnect(self) -> None:
|
|
93
102
|
if self._redis:
|
|
@@ -97,7 +106,11 @@ class RedisDriver(BaseDriver):
|
|
|
97
106
|
async def _backfill_indexes_if_needed(self) -> None:
|
|
98
107
|
"""One-time rebuild of secondary ZSETs for upgrades from a version
|
|
99
108
|
that didn't maintain them. Safe to call on every connect — exits fast
|
|
100
|
-
when the global index is non-empty.
|
|
109
|
+
when the global index is non-empty.
|
|
110
|
+
|
|
111
|
+
This is *add-only*: it inserts index entries for existing job hashes. It
|
|
112
|
+
cannot remove drift (index entries whose hash is gone) — that is the job
|
|
113
|
+
of reconcile_indexes(). Together they fully heal the indexes."""
|
|
101
114
|
if await self._redis.exists(self._idx_all()):
|
|
102
115
|
return
|
|
103
116
|
cursor: Any = "0"
|
|
@@ -297,6 +310,32 @@ class RedisDriver(BaseDriver):
|
|
|
297
310
|
await pipe.execute()
|
|
298
311
|
await self._with_disk_full_recovery(_do)
|
|
299
312
|
|
|
313
|
+
async def promote(self, job_id: str) -> bool:
|
|
314
|
+
raw = await self._redis.hget(self._key("job", job_id), "data")
|
|
315
|
+
if not raw:
|
|
316
|
+
return False
|
|
317
|
+
payload = JobPayload.from_json(raw)
|
|
318
|
+
if payload.status != "pending":
|
|
319
|
+
return False
|
|
320
|
+
now = _now_ts()
|
|
321
|
+
# Only a job actually sitting in the delayed ZSET needs to be moved into
|
|
322
|
+
# its ready list. A pending job that is already ready (delay_until None or
|
|
323
|
+
# in the past) must NOT be re-pushed, or Redis pop — which does not
|
|
324
|
+
# re-check status — would process it twice.
|
|
325
|
+
was_scheduled = payload.delay_until is not None and payload.delay_until > now
|
|
326
|
+
payload.delay_until = None
|
|
327
|
+
payload.updated_at = now
|
|
328
|
+
|
|
329
|
+
async def _do():
|
|
330
|
+
pipe = self._redis.pipeline()
|
|
331
|
+
pipe.hset(self._key("job", job_id), mapping={"data": payload.to_json()})
|
|
332
|
+
if was_scheduled:
|
|
333
|
+
pipe.zrem(self._key("delayed"), job_id)
|
|
334
|
+
pipe.rpush(self._key("queue", payload.queue), job_id)
|
|
335
|
+
await pipe.execute()
|
|
336
|
+
await self._with_disk_full_recovery(_do)
|
|
337
|
+
return True
|
|
338
|
+
|
|
300
339
|
# ── Query ───────────────────────────────────────────────────
|
|
301
340
|
|
|
302
341
|
async def get_job(self, job_id: str) -> JobPayload | None:
|
|
@@ -518,20 +557,41 @@ class RedisDriver(BaseDriver):
|
|
|
518
557
|
|
|
519
558
|
# ── Pruning ─────────────────────────────────────────────────
|
|
520
559
|
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
status
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
560
|
+
def _index_remove_orphan(self, pipe: Any, job_id: str, queue: str | None, status: str | None) -> None:
|
|
561
|
+
"""ZREM a stale id whose job hash is gone. We can't read the job's real
|
|
562
|
+
queue/status, so we clear every index family we can infer from the call:
|
|
563
|
+
always jobs:all + every global status index, plus the queue-scoped families
|
|
564
|
+
when the caller knows the queue/status it was iterating."""
|
|
565
|
+
pipe.zrem(self._idx_all(), job_id)
|
|
566
|
+
for st in _ALL_STATUSES:
|
|
567
|
+
pipe.zrem(self._idx_status(st), job_id)
|
|
568
|
+
if queue:
|
|
569
|
+
pipe.zrem(self._idx_queue(queue), job_id)
|
|
570
|
+
for st in _ALL_STATUSES:
|
|
571
|
+
pipe.zrem(self._idx_queue_status(queue, st), job_id)
|
|
530
572
|
|
|
531
|
-
|
|
532
|
-
|
|
573
|
+
async def _prune_index_batch(
|
|
574
|
+
self,
|
|
575
|
+
index: str,
|
|
576
|
+
queue: str | None,
|
|
577
|
+
status: str | None,
|
|
578
|
+
tag: str | None,
|
|
579
|
+
older_than_seconds: float | None,
|
|
580
|
+
offset: int,
|
|
581
|
+
limit: int,
|
|
582
|
+
) -> tuple[int, int, int]:
|
|
583
|
+
"""Process one window ``[offset, offset+limit)`` of an index in a single
|
|
584
|
+
atomic pass.
|
|
585
|
+
|
|
586
|
+
Live jobs matching the filters are fully deleted (hash + all four index
|
|
587
|
+
families). Orphaned ids (hash already gone) are reaped from the indexes so
|
|
588
|
+
they can never accumulate. Non-matching live jobs are left in place. Returns
|
|
589
|
+
``(removed, scanned, skipped)``: removed = deleted + reaped, scanned = window
|
|
590
|
+
size actually read, skipped = live jobs left in place (so the caller can step
|
|
591
|
+
its offset past them)."""
|
|
592
|
+
candidate_ids: list[str] = await self._redis.zrange(index, offset, offset + limit - 1)
|
|
533
593
|
if not candidate_ids:
|
|
534
|
-
return 0
|
|
594
|
+
return 0, 0, 0
|
|
535
595
|
|
|
536
596
|
pipe = self._redis.pipeline()
|
|
537
597
|
for jid in candidate_ids:
|
|
@@ -540,29 +600,170 @@ class RedisDriver(BaseDriver):
|
|
|
540
600
|
|
|
541
601
|
now = _now_ts()
|
|
542
602
|
to_delete: list[JobPayload] = []
|
|
543
|
-
|
|
603
|
+
orphans: list[str] = []
|
|
604
|
+
skipped = 0
|
|
605
|
+
for jid, raw in zip(candidate_ids, raws):
|
|
544
606
|
if not raw:
|
|
607
|
+
orphans.append(jid)
|
|
545
608
|
continue
|
|
546
609
|
job = JobPayload.from_json(raw)
|
|
547
610
|
if tag and tag not in job.tags:
|
|
611
|
+
skipped += 1
|
|
548
612
|
continue
|
|
549
613
|
if older_than_seconds and (now - job.updated_at) < older_than_seconds:
|
|
614
|
+
skipped += 1
|
|
550
615
|
continue
|
|
551
616
|
to_delete.append(job)
|
|
552
617
|
|
|
553
|
-
if
|
|
618
|
+
if to_delete or orphans:
|
|
619
|
+
async def _do():
|
|
620
|
+
pipe = self._redis.pipeline()
|
|
621
|
+
for job in to_delete:
|
|
622
|
+
pipe.lrem(self._key("queue", job.queue), 0, job.id)
|
|
623
|
+
pipe.zrem(self._key("delayed"), job.id)
|
|
624
|
+
pipe.unlink(self._key("job", job.id))
|
|
625
|
+
self._index_remove(pipe, job.id, job.queue, job.status)
|
|
626
|
+
for jid in orphans:
|
|
627
|
+
self._index_remove_orphan(pipe, jid, queue, status)
|
|
628
|
+
await pipe.execute()
|
|
629
|
+
await self._with_disk_full_recovery(_do)
|
|
630
|
+
|
|
631
|
+
return len(to_delete) + len(orphans), len(candidate_ids), skipped
|
|
632
|
+
|
|
633
|
+
async def _drain_index(
|
|
634
|
+
self,
|
|
635
|
+
index: str,
|
|
636
|
+
queue: str | None,
|
|
637
|
+
status: str | None,
|
|
638
|
+
tag: str | None,
|
|
639
|
+
older_than_seconds: float | None,
|
|
640
|
+
batch: int,
|
|
641
|
+
) -> int:
|
|
642
|
+
"""Page through an index in ``batch``-sized windows, deleting matches and
|
|
643
|
+
reaping orphans, until the whole index has been scanned.
|
|
644
|
+
|
|
645
|
+
Each Redis round-trip handles at most ``batch`` ids, so a huge (possibly
|
|
646
|
+
orphan-laden) index never blocks the server on one giant zrange + delete —
|
|
647
|
+
while every entry is still examined. Entries a filter skips stay in the index,
|
|
648
|
+
so the offset is advanced past them; that is what keeps matches deeper than
|
|
649
|
+
the first window from being missed (re-reading ``zrange(0, batch)`` forever
|
|
650
|
+
would stop early)."""
|
|
651
|
+
batch = max(1, batch)
|
|
652
|
+
offset = 0
|
|
653
|
+
total = 0
|
|
654
|
+
while True:
|
|
655
|
+
removed, scanned, skipped = await self._prune_index_batch(
|
|
656
|
+
index, queue, status, tag, older_than_seconds, offset, batch,
|
|
657
|
+
)
|
|
658
|
+
total += removed
|
|
659
|
+
offset += skipped # kept entries remain; step past them next round
|
|
660
|
+
if scanned < batch:
|
|
661
|
+
break
|
|
662
|
+
return total
|
|
663
|
+
|
|
664
|
+
async def prune(
|
|
665
|
+
self,
|
|
666
|
+
status: str | None = None,
|
|
667
|
+
tag: str | None = None,
|
|
668
|
+
older_than_seconds: float | None = None,
|
|
669
|
+
queue: str | None = None,
|
|
670
|
+
) -> int:
|
|
671
|
+
if not (status or tag or older_than_seconds or queue):
|
|
554
672
|
return 0
|
|
673
|
+
index = self._index_key(queue, status)
|
|
674
|
+
return await self._drain_index(
|
|
675
|
+
index, queue, status, tag, older_than_seconds, DEFAULT_PRUNE_BATCH,
|
|
676
|
+
)
|
|
677
|
+
|
|
678
|
+
async def prune_terminal_jobs(
|
|
679
|
+
self,
|
|
680
|
+
queue: str | None = None,
|
|
681
|
+
status: str | None = None,
|
|
682
|
+
*,
|
|
683
|
+
older_than: float | None = None,
|
|
684
|
+
limit: int = DEFAULT_PRUNE_BATCH,
|
|
685
|
+
) -> int:
|
|
686
|
+
"""Index-consistent bulk delete from a status index, draining fully in
|
|
687
|
+
``limit``-sized batches (each Redis round-trip handles at most ``limit`` ids).
|
|
688
|
+
|
|
689
|
+
Uses the secondary index itself as the work source — no SCAN of every job
|
|
690
|
+
hash — and reaps orphaned index entries in the same pass."""
|
|
691
|
+
index = self._index_key(queue, status)
|
|
692
|
+
return await self._drain_index(index, queue, status, None, older_than, limit)
|
|
693
|
+
|
|
694
|
+
async def bulk_delete_jobs(self, job_ids: list[str], *, limit: int | None = None) -> int:
|
|
695
|
+
"""Delete an explicit list of jobs atomically, keeping all four index
|
|
696
|
+
families consistent. Live jobs are removed precisely (real queue/status from
|
|
697
|
+
the hash); ids whose hash is already gone are reaped from jobs:all and every
|
|
698
|
+
global status index (per-queue orphans are caught by reconcile_indexes)."""
|
|
699
|
+
if limit is not None:
|
|
700
|
+
job_ids = job_ids[:limit]
|
|
701
|
+
if not job_ids:
|
|
702
|
+
return 0
|
|
703
|
+
|
|
704
|
+
pipe = self._redis.pipeline()
|
|
705
|
+
for jid in job_ids:
|
|
706
|
+
pipe.hget(self._key("job", jid), "data")
|
|
707
|
+
raws = await pipe.execute()
|
|
555
708
|
|
|
556
709
|
async def _do():
|
|
557
710
|
pipe = self._redis.pipeline()
|
|
558
|
-
for
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
711
|
+
for jid, raw in zip(job_ids, raws):
|
|
712
|
+
if raw:
|
|
713
|
+
job = JobPayload.from_json(raw)
|
|
714
|
+
pipe.lrem(self._key("queue", job.queue), 0, jid)
|
|
715
|
+
pipe.zrem(self._key("delayed"), jid)
|
|
716
|
+
pipe.unlink(self._key("job", jid))
|
|
717
|
+
self._index_remove(pipe, jid, job.queue, job.status)
|
|
718
|
+
else:
|
|
719
|
+
self._index_remove_orphan(pipe, jid, None, None)
|
|
563
720
|
await pipe.execute()
|
|
564
721
|
await self._with_disk_full_recovery(_do)
|
|
565
|
-
return len(
|
|
722
|
+
return len(job_ids)
|
|
723
|
+
|
|
724
|
+
async def reconcile_indexes(self, batch: int = 500) -> int:
|
|
725
|
+
"""Walk every secondary-index ZSET and ZREM ids whose job hash is gone.
|
|
726
|
+
|
|
727
|
+
Self-healing repair for index drift (e.g. job hashes deleted out-of-band).
|
|
728
|
+
Index keys are discovered by SCAN (every ``baqueue:jobs:*`` key — jobs:all,
|
|
729
|
+
jobs:status:*, jobs:queue:* and jobs:queue:*:status:*) so the repair reaches
|
|
730
|
+
families for queues no longer in the queues set, and never wastes a round-trip
|
|
731
|
+
on an index combination that does not exist. Each index is then walked with
|
|
732
|
+
ZSCAN — never loading a huge set at once — checking hash existence in pipelined
|
|
733
|
+
batches. Returns the number of stale entries removed."""
|
|
734
|
+
# Job hashes are baqueue:job:* (singular); the index ZSETs are baqueue:jobs:*.
|
|
735
|
+
index_keys: list[str] = []
|
|
736
|
+
cursor: Any = "0"
|
|
737
|
+
pattern = self._key("jobs", "*")
|
|
738
|
+
while True:
|
|
739
|
+
cursor, keys = await self._redis.scan(cursor=cursor, match=pattern, count=batch)
|
|
740
|
+
index_keys.extend(keys)
|
|
741
|
+
if cursor == "0" or cursor == 0:
|
|
742
|
+
break
|
|
743
|
+
|
|
744
|
+
removed = 0
|
|
745
|
+
for index in index_keys:
|
|
746
|
+
zcursor: Any = 0
|
|
747
|
+
while True:
|
|
748
|
+
zcursor, members = await self._redis.zscan(index, cursor=zcursor, count=batch)
|
|
749
|
+
ids = [m[0] if isinstance(m, (tuple, list)) else m for m in members]
|
|
750
|
+
if ids:
|
|
751
|
+
pipe = self._redis.pipeline()
|
|
752
|
+
for jid in ids:
|
|
753
|
+
pipe.exists(self._key("job", jid))
|
|
754
|
+
exists_flags = await pipe.execute()
|
|
755
|
+
stale = [jid for jid, ok in zip(ids, exists_flags) if not ok]
|
|
756
|
+
if stale:
|
|
757
|
+
async def _do(index=index, stale=stale):
|
|
758
|
+
pipe = self._redis.pipeline()
|
|
759
|
+
for jid in stale:
|
|
760
|
+
pipe.zrem(index, jid)
|
|
761
|
+
await pipe.execute()
|
|
762
|
+
await self._with_disk_full_recovery(_do)
|
|
763
|
+
removed += len(stale)
|
|
764
|
+
if zcursor == 0 or zcursor == "0":
|
|
765
|
+
break
|
|
766
|
+
return removed
|
|
566
767
|
|
|
567
768
|
async def prune_metrics(self, older_than_seconds: float) -> int:
|
|
568
769
|
cutoff = _now_ts() - older_than_seconds
|
|
@@ -635,11 +836,11 @@ class RedisDriver(BaseDriver):
|
|
|
635
836
|
pipe.delete(self._key("job", jid))
|
|
636
837
|
pipe.zrem(self._idx_all(), jid)
|
|
637
838
|
pipe.zrem(self._key("delayed"), jid)
|
|
638
|
-
for st in
|
|
839
|
+
for st in _ALL_STATUSES:
|
|
639
840
|
pipe.zrem(self._idx_status(st), jid)
|
|
640
841
|
# Drop all per-queue and per-(queue,status) indexes
|
|
641
842
|
pipe.delete(self._idx_queue(queue))
|
|
642
|
-
for st in
|
|
843
|
+
for st in _ALL_STATUSES:
|
|
643
844
|
pipe.delete(self._idx_queue_status(queue, st))
|
|
644
845
|
pipe.srem(self._key("queues"), queue)
|
|
645
846
|
await pipe.execute()
|
|
@@ -377,6 +377,23 @@ class SqliteDriver(BaseDriver):
|
|
|
377
377
|
c.commit()
|
|
378
378
|
await self._execute_with_retry(_do)
|
|
379
379
|
|
|
380
|
+
async def promote(self, job_id: str) -> bool:
|
|
381
|
+
now = _now_ts()
|
|
382
|
+
async with self._lock:
|
|
383
|
+
result = [False]
|
|
384
|
+
def _do():
|
|
385
|
+
c = self._get_conn()
|
|
386
|
+
# Clearing delay_until is enough: pop() already accepts a pending
|
|
387
|
+
# row whose delay_until IS NULL or has elapsed.
|
|
388
|
+
cur = c.execute(
|
|
389
|
+
"UPDATE jobs SET delay_until=NULL, updated_at=? WHERE id=? AND status='pending'",
|
|
390
|
+
(now, job_id),
|
|
391
|
+
)
|
|
392
|
+
c.commit()
|
|
393
|
+
result[0] = cur.rowcount == 1
|
|
394
|
+
await self._execute_with_retry(_do)
|
|
395
|
+
return result[0]
|
|
396
|
+
|
|
380
397
|
# ── Query ───────────────────────────────────────────────────
|
|
381
398
|
|
|
382
399
|
async def get_job(self, job_id: str) -> JobPayload | None:
|
|
@@ -57,26 +57,35 @@ class Pruner:
|
|
|
57
57
|
return self.config.prune_metrics_hours * 3600
|
|
58
58
|
return float(self.config.prune_metrics_seconds)
|
|
59
59
|
|
|
60
|
+
async def _prune_terminal(self, status: str, older_than: float) -> int:
|
|
61
|
+
"""Prune a terminal status via the driver's index-consistent bulk delete.
|
|
62
|
+
|
|
63
|
+
The driver drains the whole backlog in capped batches (so a large or
|
|
64
|
+
orphan-laden index never blocks the backend) and reaps orphaned index entries
|
|
65
|
+
in the same pass, returning the total removed."""
|
|
66
|
+
return await self.driver.prune_terminal_jobs(
|
|
67
|
+
status=status,
|
|
68
|
+
older_than=older_than,
|
|
69
|
+
limit=max(1, int(self.config.prune_batch_size)),
|
|
70
|
+
)
|
|
71
|
+
|
|
60
72
|
async def prune_once(self) -> dict[str, int]:
|
|
61
73
|
"""Run a single prune pass based on config."""
|
|
62
74
|
results: dict[str, int] = {}
|
|
63
75
|
|
|
64
76
|
if self.completed_threshold > 0:
|
|
65
|
-
results["completed"] = await self.
|
|
66
|
-
|
|
67
|
-
older_than_seconds=self.completed_threshold,
|
|
77
|
+
results["completed"] = await self._prune_terminal(
|
|
78
|
+
"completed", self.completed_threshold,
|
|
68
79
|
)
|
|
69
80
|
|
|
70
81
|
if self.failed_threshold > 0:
|
|
71
|
-
results["failed"] = await self.
|
|
72
|
-
|
|
73
|
-
older_than_seconds=self.failed_threshold,
|
|
82
|
+
results["failed"] = await self._prune_terminal(
|
|
83
|
+
"failed", self.failed_threshold,
|
|
74
84
|
)
|
|
75
85
|
|
|
76
86
|
if self.cancelled_threshold > 0:
|
|
77
|
-
results["cancelled"] = await self.
|
|
78
|
-
|
|
79
|
-
older_than_seconds=self.cancelled_threshold,
|
|
87
|
+
results["cancelled"] = await self._prune_terminal(
|
|
88
|
+
"cancelled", self.cancelled_threshold,
|
|
80
89
|
)
|
|
81
90
|
|
|
82
91
|
if self.metrics_threshold > 0:
|
|
@@ -30,6 +30,7 @@ class Queue:
|
|
|
30
30
|
cls._config = config or BaQueueConfig()
|
|
31
31
|
if driver is not None:
|
|
32
32
|
driver.auto_cleanup_on_disk_full = cls._config.auto_cleanup_on_disk_full
|
|
33
|
+
driver.reconcile_on_connect = cls._config.reconcile_on_connect
|
|
33
34
|
cls._driver = driver
|
|
34
35
|
cls._events = EventBus.default()
|
|
35
36
|
|
|
@@ -222,4 +223,5 @@ def _create_driver(config: BaQueueConfig) -> BaseDriver:
|
|
|
222
223
|
else:
|
|
223
224
|
raise ValueError(f"Unknown driver: {name}")
|
|
224
225
|
driver.auto_cleanup_on_disk_full = config.auto_cleanup_on_disk_full
|
|
226
|
+
driver.reconcile_on_connect = config.reconcile_on_connect
|
|
225
227
|
return driver
|
|
@@ -35,6 +35,7 @@ class JobPayload:
|
|
|
35
35
|
"failed_at",
|
|
36
36
|
"status",
|
|
37
37
|
"error",
|
|
38
|
+
"history",
|
|
38
39
|
)
|
|
39
40
|
|
|
40
41
|
def __init__(
|
|
@@ -58,6 +59,7 @@ class JobPayload:
|
|
|
58
59
|
failed_at: float | None = None,
|
|
59
60
|
status: str = "pending",
|
|
60
61
|
error: str | None = None,
|
|
62
|
+
history: list[dict[str, Any]] | None = None,
|
|
61
63
|
):
|
|
62
64
|
self.id = id or uuid4().hex
|
|
63
65
|
self.job_class = job_class
|
|
@@ -77,9 +79,14 @@ class JobPayload:
|
|
|
77
79
|
self.failed_at = failed_at
|
|
78
80
|
self.status = status
|
|
79
81
|
self.error = error
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
82
|
+
# Per-attempt execution history (one record per processing attempt).
|
|
83
|
+
# Bounded by the number of attempts; persisted only by drivers that store
|
|
84
|
+
# the full payload (memory, redis). Older payloads without this key load
|
|
85
|
+
# as an empty list, so the field is fully backward compatible.
|
|
86
|
+
self.history = history or []
|
|
87
|
+
|
|
88
|
+
def to_dict(self, *, include_history: bool = True) -> dict[str, Any]:
|
|
89
|
+
d = {
|
|
83
90
|
"id": self.id,
|
|
84
91
|
"job_class": self.job_class,
|
|
85
92
|
"data": self.data,
|
|
@@ -99,6 +106,9 @@ class JobPayload:
|
|
|
99
106
|
"status": self.status,
|
|
100
107
|
"error": self.error,
|
|
101
108
|
}
|
|
109
|
+
if include_history:
|
|
110
|
+
d["history"] = self.history
|
|
111
|
+
return d
|
|
102
112
|
|
|
103
113
|
def to_json(self) -> str:
|
|
104
114
|
return json.dumps(self.to_dict())
|
|
@@ -11,10 +11,16 @@ from baqueue.drivers.base import BaseDriver
|
|
|
11
11
|
from baqueue.events import EventBus
|
|
12
12
|
from baqueue.job import Job, FunctionJob
|
|
13
13
|
from baqueue.retry import compute_delay, should_retry
|
|
14
|
-
from baqueue.serializer import JobPayload, resolve_job_class
|
|
14
|
+
from baqueue.serializer import JobPayload, resolve_job_class, _now_ts
|
|
15
15
|
|
|
16
16
|
logger = logging.getLogger("baqueue.worker")
|
|
17
17
|
|
|
18
|
+
# Per-attempt errors stored in JobPayload.history are truncated to this many
|
|
19
|
+
# characters. The job's top-level `error` field keeps the full latest traceback;
|
|
20
|
+
# this bound keeps the history (and therefore the stored payload) from growing
|
|
21
|
+
# large across retries.
|
|
22
|
+
_HISTORY_ERROR_MAXLEN = 1000
|
|
23
|
+
|
|
18
24
|
|
|
19
25
|
class Worker:
|
|
20
26
|
"""Pulls and executes jobs from one or more queues."""
|
|
@@ -84,6 +90,33 @@ class Worker:
|
|
|
84
90
|
return job
|
|
85
91
|
return None
|
|
86
92
|
|
|
93
|
+
@staticmethod
|
|
94
|
+
def _record_attempt(
|
|
95
|
+
payload: JobPayload,
|
|
96
|
+
*,
|
|
97
|
+
status: str,
|
|
98
|
+
finished_at: float,
|
|
99
|
+
error: str | None = None,
|
|
100
|
+
will_retry: bool = False,
|
|
101
|
+
next_retry_at: float | None = None,
|
|
102
|
+
) -> None:
|
|
103
|
+
"""Append one bounded record describing the attempt that just concluded.
|
|
104
|
+
|
|
105
|
+
Called once per attempt, right before the driver persists the new state, so
|
|
106
|
+
drivers that store the whole payload (memory, redis) keep the full history.
|
|
107
|
+
The list is bounded by the number of attempts and the error is truncated."""
|
|
108
|
+
if error is not None and len(error) > _HISTORY_ERROR_MAXLEN:
|
|
109
|
+
error = error[:_HISTORY_ERROR_MAXLEN] + "…"
|
|
110
|
+
payload.history.append({
|
|
111
|
+
"attempt": payload.attempts,
|
|
112
|
+
"started_at": payload.started_at,
|
|
113
|
+
"finished_at": finished_at,
|
|
114
|
+
"status": status,
|
|
115
|
+
"error": error,
|
|
116
|
+
"will_retry": will_retry,
|
|
117
|
+
"next_retry_at": next_retry_at,
|
|
118
|
+
})
|
|
119
|
+
|
|
87
120
|
async def _process(self, payload: JobPayload) -> None:
|
|
88
121
|
self._current_job = payload
|
|
89
122
|
job_timeout = payload.timeout or self.timeout
|
|
@@ -99,6 +132,7 @@ class Worker:
|
|
|
99
132
|
timeout=job_timeout,
|
|
100
133
|
)
|
|
101
134
|
|
|
135
|
+
self._record_attempt(payload, status="completed", finished_at=_now_ts())
|
|
102
136
|
await self.driver.complete(payload)
|
|
103
137
|
await self.driver.record_metric(payload.queue, "completed", 1)
|
|
104
138
|
await self.events.emit("job.completed", payload=payload, result=result, worker=self.name)
|
|
@@ -118,9 +152,16 @@ class Worker:
|
|
|
118
152
|
|
|
119
153
|
if should_retry(payload.attempts, payload.max_attempts):
|
|
120
154
|
delay = compute_delay(payload.backoff, payload.attempts)
|
|
155
|
+
self._record_attempt(
|
|
156
|
+
payload, status="failed", finished_at=_now_ts(),
|
|
157
|
+
error=error_msg, will_retry=True, next_retry_at=_now_ts() + delay,
|
|
158
|
+
)
|
|
121
159
|
await self.driver.release(payload, delay=delay)
|
|
122
160
|
await self.events.emit("job.retrying", payload=payload, error=error_msg, delay=delay)
|
|
123
161
|
else:
|
|
162
|
+
self._record_attempt(
|
|
163
|
+
payload, status="failed", finished_at=_now_ts(), error=error_msg,
|
|
164
|
+
)
|
|
124
165
|
await self.driver.fail(payload, error_msg)
|
|
125
166
|
await self.driver.record_metric(payload.queue, "failed", 1)
|
|
126
167
|
await self.events.emit("job.failed", payload=payload, error=error_msg, worker=self.name)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: baqueue
|
|
3
|
-
Version: 1.0
|
|
3
|
+
Version: 1.1.0
|
|
4
4
|
Summary: A powerful Python queue management package inspired by Laravel Horizon
|
|
5
5
|
Author: Basalam, BaQueue Contributors
|
|
6
6
|
License: MIT
|
|
@@ -45,6 +45,7 @@ Provides-Extra: dev
|
|
|
45
45
|
Requires-Dist: baqueue[all]; extra == "dev"
|
|
46
46
|
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
47
47
|
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
48
|
+
Requires-Dist: fakeredis>=2.21; extra == "dev"
|
|
48
49
|
Requires-Dist: build>=1.0; extra == "dev"
|
|
49
50
|
Requires-Dist: twine>=5.0; extra == "dev"
|
|
50
51
|
Dynamic: license-file
|
|
@@ -274,6 +275,21 @@ await Queue.prune(status="completed", hours=24)
|
|
|
274
275
|
await Queue.prune(tag="batch:newsletter")
|
|
275
276
|
```
|
|
276
277
|
|
|
278
|
+
#### Redis index health
|
|
279
|
+
|
|
280
|
+
The Redis driver keeps secondary indexes (sorted sets) so the dashboard can list and
|
|
281
|
+
count jobs by queue/status efficiently. All deletes go through an index-consistent path
|
|
282
|
+
that removes the job hash *and* every index entry in one atomic step, so the indexes stay
|
|
283
|
+
bounded. If entries are ever orphaned out-of-band (e.g. job hashes deleted directly via
|
|
284
|
+
`redis-cli`), pruning reaps them automatically, and you can force a full repair:
|
|
285
|
+
|
|
286
|
+
```bash
|
|
287
|
+
baqueue reconcile-indexes -d redis --driver-url redis://localhost:6379/0
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
Set `reconcile_on_connect=True` to run that repair once on every startup (off by default
|
|
291
|
+
to keep connect fast on large datasets).
|
|
292
|
+
|
|
277
293
|
### Retry Failed Jobs
|
|
278
294
|
|
|
279
295
|
Bulk-retry failed jobs from the CLI, from Python, or from the dashboard.
|
|
@@ -508,6 +524,7 @@ baqueue schedule Start the job scheduler
|
|
|
508
524
|
baqueue dashboard Launch the monitoring dashboard
|
|
509
525
|
baqueue prune Prune old jobs
|
|
510
526
|
baqueue retry-failed Retry all failed jobs (filter by queue/tag/age)
|
|
527
|
+
baqueue reconcile-indexes Repair Redis secondary indexes (drop stale entries)
|
|
511
528
|
baqueue status Show queue status
|
|
512
529
|
baqueue test Run the test suite
|
|
513
530
|
```
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|