baqueue 1.0.1__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {baqueue-1.0.1/baqueue.egg-info → baqueue-1.1.0}/PKG-INFO +18 -1
  2. {baqueue-1.0.1 → baqueue-1.1.0}/README.md +16 -0
  3. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/__init__.py +1 -1
  4. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/cli.py +31 -0
  5. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/config.py +8 -0
  6. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/dashboard/api.py +8 -1
  7. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/dashboard/server.py +5 -0
  8. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/dashboard/static/app.js +38 -0
  9. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/dashboard/static/index.html +62 -18
  10. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/dashboard/static/style.css +17 -1
  11. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/drivers/base.py +58 -0
  12. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/drivers/memory_driver.py +15 -0
  13. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/drivers/postgres_driver.py +18 -0
  14. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/drivers/redis_driver.py +225 -24
  15. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/drivers/sqlite_driver.py +17 -0
  16. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/pruner.py +18 -9
  17. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/queue.py +2 -0
  18. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/serializer.py +13 -3
  19. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/worker.py +42 -1
  20. {baqueue-1.0.1 → baqueue-1.1.0/baqueue.egg-info}/PKG-INFO +18 -1
  21. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue.egg-info/requires.txt +1 -0
  22. {baqueue-1.0.1 → baqueue-1.1.0}/pyproject.toml +1 -0
  23. {baqueue-1.0.1 → baqueue-1.1.0}/LICENSE +0 -0
  24. {baqueue-1.0.1 → baqueue-1.1.0}/MANIFEST.in +0 -0
  25. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/balancer.py +0 -0
  26. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/batch.py +0 -0
  27. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/dashboard/__init__.py +0 -0
  28. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/drivers/__init__.py +0 -0
  29. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/events.py +0 -0
  30. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/job.py +0 -0
  31. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/retry.py +0 -0
  32. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/scheduler.py +0 -0
  33. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue/supervisor.py +0 -0
  34. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue.egg-info/SOURCES.txt +0 -0
  35. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue.egg-info/dependency_links.txt +0 -0
  36. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue.egg-info/entry_points.txt +0 -0
  37. {baqueue-1.0.1 → baqueue-1.1.0}/baqueue.egg-info/top_level.txt +0 -0
  38. {baqueue-1.0.1 → baqueue-1.1.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: baqueue
3
- Version: 1.0.1
3
+ Version: 1.1.0
4
4
  Summary: A powerful Python queue management package inspired by Laravel Horizon
5
5
  Author: Basalam, BaQueue Contributors
6
6
  License: MIT
@@ -45,6 +45,7 @@ Provides-Extra: dev
45
45
  Requires-Dist: baqueue[all]; extra == "dev"
46
46
  Requires-Dist: pytest>=8.0; extra == "dev"
47
47
  Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
48
+ Requires-Dist: fakeredis>=2.21; extra == "dev"
48
49
  Requires-Dist: build>=1.0; extra == "dev"
49
50
  Requires-Dist: twine>=5.0; extra == "dev"
50
51
  Dynamic: license-file
@@ -274,6 +275,21 @@ await Queue.prune(status="completed", hours=24)
274
275
  await Queue.prune(tag="batch:newsletter")
275
276
  ```
276
277
 
278
+ #### Redis index health
279
+
280
+ The Redis driver keeps secondary indexes (sorted sets) so the dashboard can list and
281
+ count jobs by queue/status efficiently. All deletes go through an index-consistent path
282
+ that removes the job hash *and* every index entry in one atomic step, so the indexes stay
283
+ bounded. If entries are ever orphaned out-of-band (e.g. job hashes deleted directly via
284
+ `redis-cli`), pruning reaps them automatically, and you can force a full repair:
285
+
286
+ ```bash
287
+ baqueue reconcile-indexes -d redis --driver-url redis://localhost:6379/0
288
+ ```
289
+
290
+ Set `reconcile_on_connect=True` to run that repair once on every startup (off by default
291
+ to keep connect fast on large datasets).
292
+
277
293
  ### Retry Failed Jobs
278
294
 
279
295
  Bulk-retry failed jobs from the CLI, from Python, or from the dashboard.
@@ -508,6 +524,7 @@ baqueue schedule Start the job scheduler
508
524
  baqueue dashboard Launch the monitoring dashboard
509
525
  baqueue prune Prune old jobs
510
526
  baqueue retry-failed Retry all failed jobs (filter by queue/tag/age)
527
+ baqueue reconcile-indexes Repair Redis secondary indexes (drop stale entries)
511
528
  baqueue status Show queue status
512
529
  baqueue test Run the test suite
513
530
  ```
@@ -223,6 +223,21 @@ await Queue.prune(status="completed", hours=24)
223
223
  await Queue.prune(tag="batch:newsletter")
224
224
  ```
225
225
 
226
+ #### Redis index health
227
+
228
+ The Redis driver keeps secondary indexes (sorted sets) so the dashboard can list and
229
+ count jobs by queue/status efficiently. All deletes go through an index-consistent path
230
+ that removes the job hash *and* every index entry in one atomic step, so the indexes stay
231
+ bounded. If entries are ever orphaned out-of-band (e.g. job hashes deleted directly via
232
+ `redis-cli`), pruning reaps them automatically, and you can force a full repair:
233
+
234
+ ```bash
235
+ baqueue reconcile-indexes -d redis --driver-url redis://localhost:6379/0
236
+ ```
237
+
238
+ Set `reconcile_on_connect=True` to run that repair once on every startup (off by default
239
+ to keep connect fast on large datasets).
240
+
226
241
  ### Retry Failed Jobs
227
242
 
228
243
  Bulk-retry failed jobs from the CLI, from Python, or from the dashboard.
@@ -457,6 +472,7 @@ baqueue schedule Start the job scheduler
457
472
  baqueue dashboard Launch the monitoring dashboard
458
473
  baqueue prune Prune old jobs
459
474
  baqueue retry-failed Retry all failed jobs (filter by queue/tag/age)
475
+ baqueue reconcile-indexes Repair Redis secondary indexes (drop stale entries)
460
476
  baqueue status Show queue status
461
477
  baqueue test Run the test suite
462
478
  ```
@@ -7,7 +7,7 @@ from baqueue.batch import Batch
7
7
  from baqueue.events import EventBus
8
8
  from baqueue.retry import BackoffStrategy
9
9
 
10
- __version__ = "1.0.1"
10
+ __version__ = "1.1.0"
11
11
 
12
12
  __all__ = [
13
13
  "BaQueueConfig",
@@ -365,6 +365,37 @@ async def _run_retry_failed(
365
365
  await Queue.disconnect()
366
366
 
367
367
 
368
+ @cli.command(name="reconcile-indexes")
369
+ @click.option("--batch", default=500, type=int, help="Index entries scanned per batch.")
370
+ @click.option("--driver", "-d", default="redis", help="Driver name (sqlite, memory, redis, postgres).")
371
+ @click.option("--driver-url", default=None, help="Driver connection URL.")
372
+ @click.pass_context
373
+ def reconcile_indexes(
374
+ ctx: click.Context,
375
+ batch: int,
376
+ driver: str,
377
+ driver_url: str | None,
378
+ ) -> None:
379
+ """Repair secondary indexes: remove entries pointing at jobs that no longer exist.
380
+
381
+ Only the Redis driver maintains secondary indexes; this is a no-op elsewhere."""
382
+ _validate_driver(driver)
383
+ config: BaQueueConfig = ctx.obj["config"]
384
+ config.driver = DriverConfig(name=driver, url=driver_url or "")
385
+
386
+ removed = _run_async(_run_reconcile_indexes, config, batch)
387
+ click.echo(f"Removed {removed or 0} stale index entr{'y' if removed == 1 else 'ies'}.")
388
+
389
+
390
+ async def _run_reconcile_indexes(config: BaQueueConfig, batch: int) -> int:
391
+ Queue.configure(config)
392
+ await Queue.connect()
393
+ try:
394
+ return await Queue.get_driver().reconcile_indexes(batch=batch)
395
+ finally:
396
+ await Queue.disconnect()
397
+
398
+
368
399
  @cli.command()
369
400
  @click.option("--driver", "-d", default="sqlite", help="Driver name (sqlite, memory, redis, postgres).")
370
401
  @click.option("--driver-url", default=None, help="Driver connection URL.")
@@ -59,6 +59,14 @@ class BaQueueConfig(BaseModel):
59
59
  prune_completed_seconds: int = 5 # delete completed jobs ~5s after completion
60
60
  prune_other_seconds: int = 86400 # 1 day — applies to failed + cancelled
61
61
  prune_metrics_seconds: int = 604800 # 7 days
62
+ # Per-call cap for index-consistent bulk deletes; the pruner loops to drain.
63
+ prune_batch_size: int = 1000
64
+
65
+ # ── Secondary-index reconciliation (Redis) ─────────────────
66
+ # When True, connect() runs a one-shot reconcile pass that removes index
67
+ # entries pointing at jobs that no longer exist. Off by default — run on
68
+ # demand via `baqueue reconcile-indexes` to keep startup fast.
69
+ reconcile_on_connect: bool = False
62
70
 
63
71
  # ── Legacy hour-based overrides (kept for back-compat) ──────
64
72
  # When > 0, these take precedence over the seconds fields above for the
@@ -117,7 +117,10 @@ class DashboardAPI:
117
117
  created_from=created_from, created_to=created_to,
118
118
  )
119
119
  return {
120
- "jobs": [j.to_dict() for j in jobs],
120
+ # The list view never renders per-attempt history (the modal fetches
121
+ # job_detail for that), so omit it to keep the list and the live
122
+ # /ws/jobs push lean.
123
+ "jobs": [j.to_dict(include_history=False) for j in jobs],
121
124
  "page": page,
122
125
  "per_page": per_page,
123
126
  "count": len(jobs),
@@ -128,6 +131,10 @@ class DashboardAPI:
128
131
  job = await self.driver.get_job(job_id)
129
132
  return job.to_dict() if job else None
130
133
 
134
+ async def promote_job(self, job_id: str) -> bool:
135
+ """Make a scheduled/pending job runnable immediately. Returns True on success."""
136
+ return await self.driver.promote(job_id)
137
+
131
138
  async def retry_job(self, job_id: str) -> bool:
132
139
  job = await self.driver.get_job(job_id)
133
140
  if not job or job.status != "failed":
@@ -150,6 +150,11 @@ def create_app(driver: BaseDriver, config: Optional[BaQueueConfig] = None) -> An
150
150
  ok = await api.retry_job(job_id)
151
151
  return JSONResponse({"success": ok})
152
152
 
153
+ @app.post("/api/jobs/{job_id}/execute")
154
+ async def execute_job(job_id: str):
155
+ ok = await api.promote_job(job_id)
156
+ return JSONResponse({"success": ok})
157
+
153
158
  @app.delete("/api/jobs/{job_id}")
154
159
  async def delete_job(job_id: str):
155
160
  ok = await api.delete_job(job_id)
@@ -322,6 +322,14 @@ document.addEventListener("alpine:init", () => {
322
322
  this.fetchOverview();
323
323
  },
324
324
 
325
+ async executeJob(jobId) {
326
+ // Promote a scheduled/pending job so it runs immediately.
327
+ await fetch(`/api/jobs/${jobId}/execute`, { method: "POST" });
328
+ this.closeModal();
329
+ this.fetchJobs();
330
+ this.fetchOverview();
331
+ },
332
+
325
333
  async retryAllFailed() {
326
334
  const parts = [];
327
335
  if (this.jobsFilter.queue) parts.push(`queue "${this.jobsFilter.queue}"`);
@@ -437,6 +445,36 @@ document.addEventListener("alpine:init", () => {
437
445
  return Math.floor(diff / 60) + "m " + Math.floor(diff % 60) + "s";
438
446
  },
439
447
 
448
+ // ── Per-attempt timeline ────────────────────────────────
449
+
450
+ attemptHistory(job) {
451
+ return job && Array.isArray(job.history) ? job.history : [];
452
+ },
453
+
454
+ hasHistory(job) {
455
+ return this.attemptHistory(job).length > 0;
456
+ },
457
+
458
+ // A job currently processing has an in-flight attempt that isn't recorded in
459
+ // history yet (entries are appended only when an attempt concludes).
460
+ inFlightAttempt(job) {
461
+ return !!(job && job.status === "processing" && job.started_at);
462
+ },
463
+
464
+ attemptDotClass(entry) {
465
+ return entry && entry.status === "completed" ? "completed" : "failed";
466
+ },
467
+
468
+ attemptDuration(entry) {
469
+ if (!entry || !entry.started_at || !entry.finished_at) return "";
470
+ const diff = entry.finished_at - entry.started_at;
471
+ if (diff < 0) return "";
472
+ if (diff < 0.001) return "<1ms";
473
+ if (diff < 1) return Math.round(diff * 1000) + "ms";
474
+ if (diff < 60) return diff.toFixed(1) + "s";
475
+ return Math.floor(diff / 60) + "m " + Math.floor(diff % 60) + "s";
476
+ },
477
+
440
478
  shortId(id) {
441
479
  return id ? id.substring(0, 12) : "-";
442
480
  },
@@ -511,27 +511,67 @@
511
511
  <span class="tl-time" x-text="formatTimeFull(selectedJob.delay_until)"></span>
512
512
  </div>
513
513
  </div>
514
- <div class="tl-item" x-show="selectedJob.started_at">
515
- <div class="tl-dot processing"></div>
516
- <div class="tl-content">
517
- <span class="tl-label">Started</span>
518
- <span class="tl-time" x-text="formatTimeFull(selectedJob.started_at)"></span>
514
+ <!-- Per-attempt history (jobs that ran at least once on a
515
+ driver that persists history). Each backoff retry is its
516
+ own entry. -->
517
+ <template x-for="(entry, idx) in attemptHistory(selectedJob)" :key="idx">
518
+ <div class="tl-item">
519
+ <div class="tl-dot" :class="attemptDotClass(entry)"></div>
520
+ <div class="tl-content">
521
+ <span class="tl-label">
522
+ Attempt <span x-text="entry.attempt"></span> &middot;
523
+ <span x-text="entry.status"></span>
524
+ <span class="tl-dur" x-show="attemptDuration(entry)" x-text="'(' + attemptDuration(entry) + ')'"></span>
525
+ </span>
526
+ <span class="tl-time" x-text="formatTimeFull(entry.started_at) + (entry.finished_at ? ' → ' + formatTimeFull(entry.finished_at) : '')"></span>
527
+ <span class="tl-retry" x-show="entry.will_retry">
528
+ Retry scheduled <span x-text="entry.next_retry_at ? scheduledIn(entry.next_retry_at) : ''"></span>
529
+ </span>
530
+ <pre class="tl-error" x-show="entry.error" x-text="entry.error"></pre>
531
+ </div>
519
532
  </div>
520
- </div>
521
- <div class="tl-item" x-show="selectedJob.completed_at">
522
- <div class="tl-dot completed"></div>
523
- <div class="tl-content">
524
- <span class="tl-label">Completed</span>
525
- <span class="tl-time" x-text="formatTimeFull(selectedJob.completed_at)"></span>
533
+ </template>
534
+ <!-- The currently-running attempt is not recorded in history
535
+ until it concludes, so surface it live. -->
536
+ <template x-if="inFlightAttempt(selectedJob)">
537
+ <div class="tl-item">
538
+ <div class="tl-dot processing"></div>
539
+ <div class="tl-content">
540
+ <span class="tl-label">Attempt <span x-text="selectedJob.attempts"></span> &middot; running&hellip;</span>
541
+ <span class="tl-time" x-text="formatTimeFull(selectedJob.started_at)"></span>
542
+ </div>
526
543
  </div>
527
- </div>
528
- <div class="tl-item" x-show="selectedJob.failed_at">
529
- <div class="tl-dot failed"></div>
530
- <div class="tl-content">
531
- <span class="tl-label">Failed</span>
532
- <span class="tl-time" x-text="formatTimeFull(selectedJob.failed_at)"></span>
544
+ </template>
545
+
546
+ <!-- Legacy single-attempt timeline: jobs created before history
547
+ tracking, or on drivers that don't persist history. -->
548
+ <template x-if="!hasHistory(selectedJob) && !inFlightAttempt(selectedJob) && selectedJob.started_at">
549
+ <div class="tl-item">
550
+ <div class="tl-dot processing"></div>
551
+ <div class="tl-content">
552
+ <span class="tl-label">Started</span>
553
+ <span class="tl-time" x-text="formatTimeFull(selectedJob.started_at)"></span>
554
+ </div>
533
555
  </div>
534
- </div>
556
+ </template>
557
+ <template x-if="!hasHistory(selectedJob) && selectedJob.completed_at">
558
+ <div class="tl-item">
559
+ <div class="tl-dot completed"></div>
560
+ <div class="tl-content">
561
+ <span class="tl-label">Completed</span>
562
+ <span class="tl-time" x-text="formatTimeFull(selectedJob.completed_at)"></span>
563
+ </div>
564
+ </div>
565
+ </template>
566
+ <template x-if="!hasHistory(selectedJob) && selectedJob.failed_at">
567
+ <div class="tl-item">
568
+ <div class="tl-dot failed"></div>
569
+ <div class="tl-content">
570
+ <span class="tl-label">Failed</span>
571
+ <span class="tl-time" x-text="formatTimeFull(selectedJob.failed_at)"></span>
572
+ </div>
573
+ </div>
574
+ </template>
535
575
  </div>
536
576
  </div>
537
577
 
@@ -563,6 +603,10 @@
563
603
  </div>
564
604
 
565
605
  <div class="modal-actions">
606
+ <button class="btn-primary" x-show="isScheduled(selectedJob)" @click="executeJob(selectedJob.id)">
607
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" width="16" height="16"><polygon points="5 3 19 12 5 21 5 3"/></svg>
608
+ Execute Now
609
+ </button>
566
610
  <button class="btn-primary" x-show="selectedJob.status === 'failed'" @click="retryJob(selectedJob.id)">
567
611
  <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" width="16" height="16"><polyline points="23 4 23 10 17 10"/><path d="M20.49 15a9 9 0 11-2.12-9.36L23 10"/></svg>
568
612
  Retry Job
@@ -1267,8 +1267,24 @@ body {
1267
1267
  .tl-dot.failed { border-color: var(--red); background: var(--red); }
1268
1268
 
1269
1269
  .tl-content { display: flex; flex-direction: column; gap: 1px; }
1270
- .tl-label { font-size: 13px; font-weight: 600; }
1270
+ .tl-label { font-size: 13px; font-weight: 600; text-transform: capitalize; }
1271
1271
  .tl-time { font-size: 12px; color: var(--text-muted); font-family: 'JetBrains Mono', monospace; }
1272
+ .tl-dur { font-weight: 400; color: var(--text-muted); }
1273
+ .tl-retry { font-size: 12px; color: var(--amber); }
1274
+ .tl-error {
1275
+ margin: 4px 0 0;
1276
+ padding: 6px 8px;
1277
+ font-size: 11px;
1278
+ font-family: 'JetBrains Mono', monospace;
1279
+ color: var(--red);
1280
+ background: var(--bg-surface);
1281
+ border: 1px solid var(--border);
1282
+ border-radius: 6px;
1283
+ white-space: pre-wrap;
1284
+ word-break: break-word;
1285
+ max-height: 140px;
1286
+ overflow: auto;
1287
+ }
1272
1288
 
1273
1289
  /* ── Tags ───────────────────────────────────────────────── */
1274
1290
 
@@ -10,6 +10,10 @@ from baqueue.serializer import JobPayload
10
10
 
11
11
  logger = logging.getLogger("baqueue.driver")
12
12
 
13
+ # Default per-call cap for batched bulk-delete / prune operations. Keeps a single
14
+ # call from blocking the backend on very large datasets; callers loop to drain.
15
+ DEFAULT_PRUNE_BATCH = 1000
16
+
13
17
 
14
18
  class BaseDriver(ABC):
15
19
  """Every BaQueue driver must implement this interface."""
@@ -18,6 +22,11 @@ class BaseDriver(ABC):
18
22
  # an emergency cleanup and one retry. Wired from BaQueueConfig in queue.py.
19
23
  auto_cleanup_on_disk_full: bool = True
20
24
 
25
+ # When True, connect() runs a one-shot reconcile_indexes() pass to heal any
26
+ # secondary-index drift accumulated while offline. Off by default so connect
27
+ # stays fast on large datasets. Wired from BaQueueConfig in queue.py.
28
+ reconcile_on_connect: bool = False
29
+
21
30
  # Re-entrancy guard so emergency_cleanup() doesn't recurse if its own
22
31
  # prune calls also hit disk-full.
23
32
  _in_emergency_cleanup: bool = False
@@ -106,6 +115,20 @@ class BaseDriver(ABC):
106
115
  @abstractmethod
107
116
  async def delete(self, job_id: str) -> None: ...
108
117
 
118
+ async def promote(self, job_id: str) -> bool:
119
+ """Make a scheduled/pending job runnable immediately (clear its delay).
120
+
121
+ Returns True if the job was promoted, False if it does not exist or is not
122
+ in the ``pending`` state. Concrete (non-abstract) so existing third-party
123
+ drivers keep working; the built-in drivers override it with a race-safe,
124
+ index-aware version. The default relies on ``release(delay=0)`` to enqueue
125
+ the job for immediate processing."""
126
+ job = await self.get_job(job_id)
127
+ if job is None or job.status != "pending":
128
+ return False
129
+ await self.release(job, delay=0)
130
+ return True
131
+
109
132
  # ── Query ───────────────────────────────────────────────────
110
133
 
111
134
  @abstractmethod
@@ -193,6 +216,41 @@ class BaseDriver(ABC):
193
216
  """Delete matching jobs. Returns count of pruned jobs."""
194
217
  ...
195
218
 
219
+ async def bulk_delete_jobs(self, job_ids: list[str], *, limit: int | None = None) -> int:
220
+ """Delete an explicit list of jobs, keeping any secondary indexes consistent.
221
+
222
+ Default implementation deletes one id at a time via ``delete``; drivers with
223
+ secondary indexes (Redis) override this with an atomic, batched version that
224
+ also reaps orphaned index entries. Returns the count of ids processed."""
225
+ if limit is not None:
226
+ job_ids = job_ids[:limit]
227
+ for job_id in job_ids:
228
+ await self.delete(job_id)
229
+ return len(job_ids)
230
+
231
+ async def prune_terminal_jobs(
232
+ self,
233
+ queue: str | None = None,
234
+ status: str | None = None,
235
+ *,
236
+ older_than: float | None = None,
237
+ limit: int = DEFAULT_PRUNE_BATCH,
238
+ ) -> int:
239
+ """Index-consistent bulk delete of terminal jobs, capped at ``limit`` per call.
240
+
241
+ Default implementation delegates to ``prune``; the Redis driver overrides it to
242
+ use its status index as the work source, reap orphaned index entries, and bound
243
+ the per-call cost. Callers loop until a pass returns fewer than ``limit``."""
244
+ return await self.prune(status=status, queue=queue, older_than_seconds=older_than)
245
+
246
+ async def reconcile_indexes(self, batch: int = 500) -> int:
247
+ """Repair secondary indexes by removing entries whose job no longer exists.
248
+
249
+ No-op for drivers without secondary indexes (memory/sqlite/postgres). The Redis
250
+ driver overrides this to walk its index ZSETs and ZREM orphaned ids. Returns the
251
+ number of stale index entries removed."""
252
+ return 0
253
+
196
254
  @abstractmethod
197
255
  async def flush(self, queue: str | None = None) -> None:
198
256
  """Remove all jobs (optionally for a specific queue)."""
@@ -116,6 +116,21 @@ class MemoryDriver(BaseDriver):
116
116
  if job_id in self._delayed:
117
117
  self._delayed.remove(job_id)
118
118
 
119
+ async def promote(self, job_id: str) -> bool:
120
+ async with self._lock:
121
+ payload = self._jobs.get(job_id)
122
+ if payload is None or payload.status != "pending":
123
+ return False
124
+ payload.delay_until = None
125
+ payload.updated_at = _now_ts()
126
+ if job_id in self._delayed:
127
+ self._delayed.remove(job_id)
128
+ # Only enqueue if it isn't already ready, so promoting a non-delayed
129
+ # pending job can never duplicate it in the ready list.
130
+ if job_id not in self._queues[payload.queue]:
131
+ self._queues[payload.queue].append(job_id)
132
+ return True
133
+
119
134
  # ── Query ───────────────────────────────────────────────────
120
135
 
121
136
  async def get_job(self, job_id: str) -> JobPayload | None:
@@ -324,6 +324,24 @@ class PostgresDriver(BaseDriver):
324
324
 
325
325
  await self._with_disk_full_recovery(_do)
326
326
 
327
+ async def promote(self, job_id: str) -> bool:
328
+ now = _now_ts()
329
+
330
+ async def _do():
331
+ async with self._pool.acquire() as conn:
332
+ # Clearing delay_until is enough: pop() already accepts a pending
333
+ # row whose delay_until IS NULL or has elapsed.
334
+ return await conn.fetchrow(
335
+ f"""UPDATE {self._jobs_table}
336
+ SET delay_until=NULL, updated_at=$1
337
+ WHERE id=$2 AND status='pending'
338
+ RETURNING id""",
339
+ now, job_id,
340
+ )
341
+
342
+ row = await self._with_disk_full_recovery(_do)
343
+ return row is not None
344
+
327
345
  # ── Query ───────────────────────────────────────────────────
328
346
 
329
347
  async def get_job(self, job_id: str) -> JobPayload | None:
@@ -6,11 +6,16 @@ import json
6
6
  import logging
7
7
  from typing import Any
8
8
 
9
- from baqueue.drivers.base import BaseDriver
9
+ from baqueue.drivers.base import DEFAULT_PRUNE_BATCH, BaseDriver
10
10
  from baqueue.serializer import JobPayload, _now_ts
11
11
 
12
12
  logger = logging.getLogger("baqueue.redis")
13
13
 
14
+ # Every status a job hash can carry. Used when reaping orphaned index entries:
15
+ # the job hash is gone, so we can't read its status — we ZREM from every global
16
+ # status index to be sure the stale id is cleared.
17
+ _ALL_STATUSES = ("pending", "processing", "completed", "failed", "cancelled")
18
+
14
19
 
15
20
  class RedisDriver(BaseDriver):
16
21
  """Redis-backed driver using sorted sets for indexed pagination.
@@ -88,6 +93,10 @@ class RedisDriver(BaseDriver):
88
93
  self._redis = aioredis.from_url(self._url, decode_responses=True, **self._kwargs)
89
94
  await self._redis.ping()
90
95
  await self._backfill_indexes_if_needed()
96
+ if self.reconcile_on_connect:
97
+ removed = await self.reconcile_indexes()
98
+ if removed:
99
+ logger.info("reconcile_on_connect removed %d stale index entr(ies)", removed)
91
100
 
92
101
  async def disconnect(self) -> None:
93
102
  if self._redis:
@@ -97,7 +106,11 @@ class RedisDriver(BaseDriver):
97
106
  async def _backfill_indexes_if_needed(self) -> None:
98
107
  """One-time rebuild of secondary ZSETs for upgrades from a version
99
108
  that didn't maintain them. Safe to call on every connect — exits fast
100
- when the global index is non-empty."""
109
+ when the global index is non-empty.
110
+
111
+ This is *add-only*: it inserts index entries for existing job hashes. It
112
+ cannot remove drift (index entries whose hash is gone) — that is the job
113
+ of reconcile_indexes(). Together they fully heal the indexes."""
101
114
  if await self._redis.exists(self._idx_all()):
102
115
  return
103
116
  cursor: Any = "0"
@@ -297,6 +310,32 @@ class RedisDriver(BaseDriver):
297
310
  await pipe.execute()
298
311
  await self._with_disk_full_recovery(_do)
299
312
 
313
+ async def promote(self, job_id: str) -> bool:
314
+ raw = await self._redis.hget(self._key("job", job_id), "data")
315
+ if not raw:
316
+ return False
317
+ payload = JobPayload.from_json(raw)
318
+ if payload.status != "pending":
319
+ return False
320
+ now = _now_ts()
321
+ # Only a job actually sitting in the delayed ZSET needs to be moved into
322
+ # its ready list. A pending job that is already ready (delay_until None or
323
+ # in the past) must NOT be re-pushed, or Redis pop — which does not
324
+ # re-check status — would process it twice.
325
+ was_scheduled = payload.delay_until is not None and payload.delay_until > now
326
+ payload.delay_until = None
327
+ payload.updated_at = now
328
+
329
+ async def _do():
330
+ pipe = self._redis.pipeline()
331
+ pipe.hset(self._key("job", job_id), mapping={"data": payload.to_json()})
332
+ if was_scheduled:
333
+ pipe.zrem(self._key("delayed"), job_id)
334
+ pipe.rpush(self._key("queue", payload.queue), job_id)
335
+ await pipe.execute()
336
+ await self._with_disk_full_recovery(_do)
337
+ return True
338
+
300
339
  # ── Query ───────────────────────────────────────────────────
301
340
 
302
341
  async def get_job(self, job_id: str) -> JobPayload | None:
@@ -518,20 +557,41 @@ class RedisDriver(BaseDriver):
518
557
 
519
558
  # ── Pruning ─────────────────────────────────────────────────
520
559
 
521
- async def prune(
522
- self,
523
- status: str | None = None,
524
- tag: str | None = None,
525
- older_than_seconds: float | None = None,
526
- queue: str | None = None,
527
- ) -> int:
528
- if not (status or tag or older_than_seconds or queue):
529
- return 0
560
+ def _index_remove_orphan(self, pipe: Any, job_id: str, queue: str | None, status: str | None) -> None:
561
+ """ZREM a stale id whose job hash is gone. We can't read the job's real
562
+ queue/status, so we clear every index family we can infer from the call:
563
+ always jobs:all + every global status index, plus the queue-scoped families
564
+ when the caller knows the queue/status it was iterating."""
565
+ pipe.zrem(self._idx_all(), job_id)
566
+ for st in _ALL_STATUSES:
567
+ pipe.zrem(self._idx_status(st), job_id)
568
+ if queue:
569
+ pipe.zrem(self._idx_queue(queue), job_id)
570
+ for st in _ALL_STATUSES:
571
+ pipe.zrem(self._idx_queue_status(queue, st), job_id)
530
572
 
531
- index = self._index_key(queue, status)
532
- candidate_ids: list[str] = await self._redis.zrange(index, 0, -1)
573
+ async def _prune_index_batch(
574
+ self,
575
+ index: str,
576
+ queue: str | None,
577
+ status: str | None,
578
+ tag: str | None,
579
+ older_than_seconds: float | None,
580
+ offset: int,
581
+ limit: int,
582
+ ) -> tuple[int, int, int]:
583
+ """Process one window ``[offset, offset+limit)`` of an index in a single
584
+ atomic pass.
585
+
586
+ Live jobs matching the filters are fully deleted (hash + all four index
587
+ families). Orphaned ids (hash already gone) are reaped from the indexes so
588
+ they can never accumulate. Non-matching live jobs are left in place. Returns
589
+ ``(removed, scanned, skipped)``: removed = deleted + reaped, scanned = window
590
+ size actually read, skipped = live jobs left in place (so the caller can step
591
+ its offset past them)."""
592
+ candidate_ids: list[str] = await self._redis.zrange(index, offset, offset + limit - 1)
533
593
  if not candidate_ids:
534
- return 0
594
+ return 0, 0, 0
535
595
 
536
596
  pipe = self._redis.pipeline()
537
597
  for jid in candidate_ids:
@@ -540,29 +600,170 @@ class RedisDriver(BaseDriver):
540
600
 
541
601
  now = _now_ts()
542
602
  to_delete: list[JobPayload] = []
543
- for raw in raws:
603
+ orphans: list[str] = []
604
+ skipped = 0
605
+ for jid, raw in zip(candidate_ids, raws):
544
606
  if not raw:
607
+ orphans.append(jid)
545
608
  continue
546
609
  job = JobPayload.from_json(raw)
547
610
  if tag and tag not in job.tags:
611
+ skipped += 1
548
612
  continue
549
613
  if older_than_seconds and (now - job.updated_at) < older_than_seconds:
614
+ skipped += 1
550
615
  continue
551
616
  to_delete.append(job)
552
617
 
553
- if not to_delete:
618
+ if to_delete or orphans:
619
+ async def _do():
620
+ pipe = self._redis.pipeline()
621
+ for job in to_delete:
622
+ pipe.lrem(self._key("queue", job.queue), 0, job.id)
623
+ pipe.zrem(self._key("delayed"), job.id)
624
+ pipe.unlink(self._key("job", job.id))
625
+ self._index_remove(pipe, job.id, job.queue, job.status)
626
+ for jid in orphans:
627
+ self._index_remove_orphan(pipe, jid, queue, status)
628
+ await pipe.execute()
629
+ await self._with_disk_full_recovery(_do)
630
+
631
+ return len(to_delete) + len(orphans), len(candidate_ids), skipped
632
+
633
+ async def _drain_index(
634
+ self,
635
+ index: str,
636
+ queue: str | None,
637
+ status: str | None,
638
+ tag: str | None,
639
+ older_than_seconds: float | None,
640
+ batch: int,
641
+ ) -> int:
642
+ """Page through an index in ``batch``-sized windows, deleting matches and
643
+ reaping orphans, until the whole index has been scanned.
644
+
645
+ Each Redis round-trip handles at most ``batch`` ids, so a huge (possibly
646
+ orphan-laden) index never blocks the server on one giant zrange + delete —
647
+ while every entry is still examined. Entries a filter skips stay in the index,
648
+ so the offset is advanced past them; that is what keeps matches deeper than
649
+ the first window from being missed (re-reading ``zrange(0, batch)`` forever
650
+ would stop early)."""
651
+ batch = max(1, batch)
652
+ offset = 0
653
+ total = 0
654
+ while True:
655
+ removed, scanned, skipped = await self._prune_index_batch(
656
+ index, queue, status, tag, older_than_seconds, offset, batch,
657
+ )
658
+ total += removed
659
+ offset += skipped # kept entries remain; step past them next round
660
+ if scanned < batch:
661
+ break
662
+ return total
663
+
664
+ async def prune(
665
+ self,
666
+ status: str | None = None,
667
+ tag: str | None = None,
668
+ older_than_seconds: float | None = None,
669
+ queue: str | None = None,
670
+ ) -> int:
671
+ if not (status or tag or older_than_seconds or queue):
554
672
  return 0
673
+ index = self._index_key(queue, status)
674
+ return await self._drain_index(
675
+ index, queue, status, tag, older_than_seconds, DEFAULT_PRUNE_BATCH,
676
+ )
677
+
678
+ async def prune_terminal_jobs(
679
+ self,
680
+ queue: str | None = None,
681
+ status: str | None = None,
682
+ *,
683
+ older_than: float | None = None,
684
+ limit: int = DEFAULT_PRUNE_BATCH,
685
+ ) -> int:
686
+ """Index-consistent bulk delete from a status index, draining fully in
687
+ ``limit``-sized batches (each Redis round-trip handles at most ``limit`` ids).
688
+
689
+ Uses the secondary index itself as the work source — no SCAN of every job
690
+ hash — and reaps orphaned index entries in the same pass."""
691
+ index = self._index_key(queue, status)
692
+ return await self._drain_index(index, queue, status, None, older_than, limit)
693
+
694
+ async def bulk_delete_jobs(self, job_ids: list[str], *, limit: int | None = None) -> int:
695
+ """Delete an explicit list of jobs atomically, keeping all four index
696
+ families consistent. Live jobs are removed precisely (real queue/status from
697
+ the hash); ids whose hash is already gone are reaped from jobs:all and every
698
+ global status index (per-queue orphans are caught by reconcile_indexes)."""
699
+ if limit is not None:
700
+ job_ids = job_ids[:limit]
701
+ if not job_ids:
702
+ return 0
703
+
704
+ pipe = self._redis.pipeline()
705
+ for jid in job_ids:
706
+ pipe.hget(self._key("job", jid), "data")
707
+ raws = await pipe.execute()
555
708
 
556
709
  async def _do():
557
710
  pipe = self._redis.pipeline()
558
- for job in to_delete:
559
- pipe.lrem(self._key("queue", job.queue), 0, job.id)
560
- pipe.zrem(self._key("delayed"), job.id)
561
- pipe.delete(self._key("job", job.id))
562
- self._index_remove(pipe, job.id, job.queue, job.status)
711
+ for jid, raw in zip(job_ids, raws):
712
+ if raw:
713
+ job = JobPayload.from_json(raw)
714
+ pipe.lrem(self._key("queue", job.queue), 0, jid)
715
+ pipe.zrem(self._key("delayed"), jid)
716
+ pipe.unlink(self._key("job", jid))
717
+ self._index_remove(pipe, jid, job.queue, job.status)
718
+ else:
719
+ self._index_remove_orphan(pipe, jid, None, None)
563
720
  await pipe.execute()
564
721
  await self._with_disk_full_recovery(_do)
565
- return len(to_delete)
722
+ return len(job_ids)
723
+
724
+ async def reconcile_indexes(self, batch: int = 500) -> int:
725
+ """Walk every secondary-index ZSET and ZREM ids whose job hash is gone.
726
+
727
+ Self-healing repair for index drift (e.g. job hashes deleted out-of-band).
728
+ Index keys are discovered by SCAN (every ``baqueue:jobs:*`` key — jobs:all,
729
+ jobs:status:*, jobs:queue:* and jobs:queue:*:status:*) so the repair reaches
730
+ families for queues no longer in the queues set, and never wastes a round-trip
731
+ on an index combination that does not exist. Each index is then walked with
732
+ ZSCAN — never loading a huge set at once — checking hash existence in pipelined
733
+ batches. Returns the number of stale entries removed."""
734
+ # Job hashes are baqueue:job:* (singular); the index ZSETs are baqueue:jobs:*.
735
+ index_keys: list[str] = []
736
+ cursor: Any = "0"
737
+ pattern = self._key("jobs", "*")
738
+ while True:
739
+ cursor, keys = await self._redis.scan(cursor=cursor, match=pattern, count=batch)
740
+ index_keys.extend(keys)
741
+ if cursor == "0" or cursor == 0:
742
+ break
743
+
744
+ removed = 0
745
+ for index in index_keys:
746
+ zcursor: Any = 0
747
+ while True:
748
+ zcursor, members = await self._redis.zscan(index, cursor=zcursor, count=batch)
749
+ ids = [m[0] if isinstance(m, (tuple, list)) else m for m in members]
750
+ if ids:
751
+ pipe = self._redis.pipeline()
752
+ for jid in ids:
753
+ pipe.exists(self._key("job", jid))
754
+ exists_flags = await pipe.execute()
755
+ stale = [jid for jid, ok in zip(ids, exists_flags) if not ok]
756
+ if stale:
757
+ async def _do(index=index, stale=stale):
758
+ pipe = self._redis.pipeline()
759
+ for jid in stale:
760
+ pipe.zrem(index, jid)
761
+ await pipe.execute()
762
+ await self._with_disk_full_recovery(_do)
763
+ removed += len(stale)
764
+ if zcursor == 0 or zcursor == "0":
765
+ break
766
+ return removed
566
767
 
567
768
  async def prune_metrics(self, older_than_seconds: float) -> int:
568
769
  cutoff = _now_ts() - older_than_seconds
@@ -635,11 +836,11 @@ class RedisDriver(BaseDriver):
635
836
  pipe.delete(self._key("job", jid))
636
837
  pipe.zrem(self._idx_all(), jid)
637
838
  pipe.zrem(self._key("delayed"), jid)
638
- for st in ("pending", "processing", "completed", "failed"):
839
+ for st in _ALL_STATUSES:
639
840
  pipe.zrem(self._idx_status(st), jid)
640
841
  # Drop all per-queue and per-(queue,status) indexes
641
842
  pipe.delete(self._idx_queue(queue))
642
- for st in ("pending", "processing", "completed", "failed"):
843
+ for st in _ALL_STATUSES:
643
844
  pipe.delete(self._idx_queue_status(queue, st))
644
845
  pipe.srem(self._key("queues"), queue)
645
846
  await pipe.execute()
@@ -377,6 +377,23 @@ class SqliteDriver(BaseDriver):
377
377
  c.commit()
378
378
  await self._execute_with_retry(_do)
379
379
 
380
+ async def promote(self, job_id: str) -> bool:
381
+ now = _now_ts()
382
+ async with self._lock:
383
+ result = [False]
384
+ def _do():
385
+ c = self._get_conn()
386
+ # Clearing delay_until is enough: pop() already accepts a pending
387
+ # row whose delay_until IS NULL or has elapsed.
388
+ cur = c.execute(
389
+ "UPDATE jobs SET delay_until=NULL, updated_at=? WHERE id=? AND status='pending'",
390
+ (now, job_id),
391
+ )
392
+ c.commit()
393
+ result[0] = cur.rowcount == 1
394
+ await self._execute_with_retry(_do)
395
+ return result[0]
396
+
380
397
  # ── Query ───────────────────────────────────────────────────
381
398
 
382
399
  async def get_job(self, job_id: str) -> JobPayload | None:
@@ -57,26 +57,35 @@ class Pruner:
57
57
  return self.config.prune_metrics_hours * 3600
58
58
  return float(self.config.prune_metrics_seconds)
59
59
 
60
+ async def _prune_terminal(self, status: str, older_than: float) -> int:
61
+ """Prune a terminal status via the driver's index-consistent bulk delete.
62
+
63
+ The driver drains the whole backlog in capped batches (so a large or
64
+ orphan-laden index never blocks the backend) and reaps orphaned index entries
65
+ in the same pass, returning the total removed."""
66
+ return await self.driver.prune_terminal_jobs(
67
+ status=status,
68
+ older_than=older_than,
69
+ limit=max(1, int(self.config.prune_batch_size)),
70
+ )
71
+
60
72
  async def prune_once(self) -> dict[str, int]:
61
73
  """Run a single prune pass based on config."""
62
74
  results: dict[str, int] = {}
63
75
 
64
76
  if self.completed_threshold > 0:
65
- results["completed"] = await self.driver.prune(
66
- status="completed",
67
- older_than_seconds=self.completed_threshold,
77
+ results["completed"] = await self._prune_terminal(
78
+ "completed", self.completed_threshold,
68
79
  )
69
80
 
70
81
  if self.failed_threshold > 0:
71
- results["failed"] = await self.driver.prune(
72
- status="failed",
73
- older_than_seconds=self.failed_threshold,
82
+ results["failed"] = await self._prune_terminal(
83
+ "failed", self.failed_threshold,
74
84
  )
75
85
 
76
86
  if self.cancelled_threshold > 0:
77
- results["cancelled"] = await self.driver.prune(
78
- status="cancelled",
79
- older_than_seconds=self.cancelled_threshold,
87
+ results["cancelled"] = await self._prune_terminal(
88
+ "cancelled", self.cancelled_threshold,
80
89
  )
81
90
 
82
91
  if self.metrics_threshold > 0:
@@ -30,6 +30,7 @@ class Queue:
30
30
  cls._config = config or BaQueueConfig()
31
31
  if driver is not None:
32
32
  driver.auto_cleanup_on_disk_full = cls._config.auto_cleanup_on_disk_full
33
+ driver.reconcile_on_connect = cls._config.reconcile_on_connect
33
34
  cls._driver = driver
34
35
  cls._events = EventBus.default()
35
36
 
@@ -222,4 +223,5 @@ def _create_driver(config: BaQueueConfig) -> BaseDriver:
222
223
  else:
223
224
  raise ValueError(f"Unknown driver: {name}")
224
225
  driver.auto_cleanup_on_disk_full = config.auto_cleanup_on_disk_full
226
+ driver.reconcile_on_connect = config.reconcile_on_connect
225
227
  return driver
@@ -35,6 +35,7 @@ class JobPayload:
35
35
  "failed_at",
36
36
  "status",
37
37
  "error",
38
+ "history",
38
39
  )
39
40
 
40
41
  def __init__(
@@ -58,6 +59,7 @@ class JobPayload:
58
59
  failed_at: float | None = None,
59
60
  status: str = "pending",
60
61
  error: str | None = None,
62
+ history: list[dict[str, Any]] | None = None,
61
63
  ):
62
64
  self.id = id or uuid4().hex
63
65
  self.job_class = job_class
@@ -77,9 +79,14 @@ class JobPayload:
77
79
  self.failed_at = failed_at
78
80
  self.status = status
79
81
  self.error = error
80
-
81
- def to_dict(self) -> dict[str, Any]:
82
- return {
82
+ # Per-attempt execution history (one record per processing attempt).
83
+ # Bounded by the number of attempts; persisted only by drivers that store
84
+ # the full payload (memory, redis). Older payloads without this key load
85
+ # as an empty list, so the field is fully backward compatible.
86
+ self.history = history or []
87
+
88
+ def to_dict(self, *, include_history: bool = True) -> dict[str, Any]:
89
+ d = {
83
90
  "id": self.id,
84
91
  "job_class": self.job_class,
85
92
  "data": self.data,
@@ -99,6 +106,9 @@ class JobPayload:
99
106
  "status": self.status,
100
107
  "error": self.error,
101
108
  }
109
+ if include_history:
110
+ d["history"] = self.history
111
+ return d
102
112
 
103
113
  def to_json(self) -> str:
104
114
  return json.dumps(self.to_dict())
@@ -11,10 +11,16 @@ from baqueue.drivers.base import BaseDriver
11
11
  from baqueue.events import EventBus
12
12
  from baqueue.job import Job, FunctionJob
13
13
  from baqueue.retry import compute_delay, should_retry
14
- from baqueue.serializer import JobPayload, resolve_job_class
14
+ from baqueue.serializer import JobPayload, resolve_job_class, _now_ts
15
15
 
16
16
  logger = logging.getLogger("baqueue.worker")
17
17
 
18
+ # Per-attempt errors stored in JobPayload.history are truncated to this many
19
+ # characters. The job's top-level `error` field keeps the full latest traceback;
20
+ # this bound keeps the history (and therefore the stored payload) from growing
21
+ # large across retries.
22
+ _HISTORY_ERROR_MAXLEN = 1000
23
+
18
24
 
19
25
  class Worker:
20
26
  """Pulls and executes jobs from one or more queues."""
@@ -84,6 +90,33 @@ class Worker:
84
90
  return job
85
91
  return None
86
92
 
93
+ @staticmethod
94
+ def _record_attempt(
95
+ payload: JobPayload,
96
+ *,
97
+ status: str,
98
+ finished_at: float,
99
+ error: str | None = None,
100
+ will_retry: bool = False,
101
+ next_retry_at: float | None = None,
102
+ ) -> None:
103
+ """Append one bounded record describing the attempt that just concluded.
104
+
105
+ Called once per attempt, right before the driver persists the new state, so
106
+ drivers that store the whole payload (memory, redis) keep the full history.
107
+ The list is bounded by the number of attempts and the error is truncated."""
108
+ if error is not None and len(error) > _HISTORY_ERROR_MAXLEN:
109
+ error = error[:_HISTORY_ERROR_MAXLEN] + "…"
110
+ payload.history.append({
111
+ "attempt": payload.attempts,
112
+ "started_at": payload.started_at,
113
+ "finished_at": finished_at,
114
+ "status": status,
115
+ "error": error,
116
+ "will_retry": will_retry,
117
+ "next_retry_at": next_retry_at,
118
+ })
119
+
87
120
  async def _process(self, payload: JobPayload) -> None:
88
121
  self._current_job = payload
89
122
  job_timeout = payload.timeout or self.timeout
@@ -99,6 +132,7 @@ class Worker:
99
132
  timeout=job_timeout,
100
133
  )
101
134
 
135
+ self._record_attempt(payload, status="completed", finished_at=_now_ts())
102
136
  await self.driver.complete(payload)
103
137
  await self.driver.record_metric(payload.queue, "completed", 1)
104
138
  await self.events.emit("job.completed", payload=payload, result=result, worker=self.name)
@@ -118,9 +152,16 @@ class Worker:
118
152
 
119
153
  if should_retry(payload.attempts, payload.max_attempts):
120
154
  delay = compute_delay(payload.backoff, payload.attempts)
155
+ self._record_attempt(
156
+ payload, status="failed", finished_at=_now_ts(),
157
+ error=error_msg, will_retry=True, next_retry_at=_now_ts() + delay,
158
+ )
121
159
  await self.driver.release(payload, delay=delay)
122
160
  await self.events.emit("job.retrying", payload=payload, error=error_msg, delay=delay)
123
161
  else:
162
+ self._record_attempt(
163
+ payload, status="failed", finished_at=_now_ts(), error=error_msg,
164
+ )
124
165
  await self.driver.fail(payload, error_msg)
125
166
  await self.driver.record_metric(payload.queue, "failed", 1)
126
167
  await self.events.emit("job.failed", payload=payload, error=error_msg, worker=self.name)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: baqueue
3
- Version: 1.0.1
3
+ Version: 1.1.0
4
4
  Summary: A powerful Python queue management package inspired by Laravel Horizon
5
5
  Author: Basalam, BaQueue Contributors
6
6
  License: MIT
@@ -45,6 +45,7 @@ Provides-Extra: dev
45
45
  Requires-Dist: baqueue[all]; extra == "dev"
46
46
  Requires-Dist: pytest>=8.0; extra == "dev"
47
47
  Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
48
+ Requires-Dist: fakeredis>=2.21; extra == "dev"
48
49
  Requires-Dist: build>=1.0; extra == "dev"
49
50
  Requires-Dist: twine>=5.0; extra == "dev"
50
51
  Dynamic: license-file
@@ -274,6 +275,21 @@ await Queue.prune(status="completed", hours=24)
274
275
  await Queue.prune(tag="batch:newsletter")
275
276
  ```
276
277
 
278
+ #### Redis index health
279
+
280
+ The Redis driver keeps secondary indexes (sorted sets) so the dashboard can list and
281
+ count jobs by queue/status efficiently. All deletes go through an index-consistent path
282
+ that removes the job hash *and* every index entry in one atomic step, so the indexes stay
283
+ bounded. If entries are ever orphaned out-of-band (e.g. job hashes deleted directly via
284
+ `redis-cli`), pruning reaps them automatically, and you can force a full repair:
285
+
286
+ ```bash
287
+ baqueue reconcile-indexes -d redis --driver-url redis://localhost:6379/0
288
+ ```
289
+
290
+ Set `reconcile_on_connect=True` to run that repair once on every startup (off by default
291
+ to keep connect fast on large datasets).
292
+
277
293
  ### Retry Failed Jobs
278
294
 
279
295
  Bulk-retry failed jobs from the CLI, from Python, or from the dashboard.
@@ -508,6 +524,7 @@ baqueue schedule Start the job scheduler
508
524
  baqueue dashboard Launch the monitoring dashboard
509
525
  baqueue prune Prune old jobs
510
526
  baqueue retry-failed Retry all failed jobs (filter by queue/tag/age)
527
+ baqueue reconcile-indexes Repair Redis secondary indexes (drop stale entries)
511
528
  baqueue status Show queue status
512
529
  baqueue test Run the test suite
513
530
  ```
@@ -18,6 +18,7 @@ websockets>=12.0
18
18
  baqueue[all]
19
19
  pytest>=8.0
20
20
  pytest-asyncio>=0.23
21
+ fakeredis>=2.21
21
22
  build>=1.0
22
23
  twine>=5.0
23
24
 
@@ -53,6 +53,7 @@ dev = [
53
53
  "baqueue[all]",
54
54
  "pytest>=8.0",
55
55
  "pytest-asyncio>=0.23",
56
+ "fakeredis>=2.21",
56
57
  "build>=1.0",
57
58
  "twine>=5.0",
58
59
  ]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes