PyPI - baqueue - Versions diffs - 1.0.1__tar.gz → 1.1.0__tar.gz - Mend

baqueue 1.0.1tar.gz → 1.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

{baqueue-1.0.1/baqueue.egg-info → baqueue-1.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: baqueue
-Version: 1.0.1
+Version: 1.1.0
 Summary: A powerful Python queue management package inspired by Laravel Horizon
 Author: Basalam, BaQueue Contributors
 License: MIT
@@ -45,6 +45,7 @@ Provides-Extra: dev
 Requires-Dist: baqueue[all]; extra == "dev"
 Requires-Dist: pytest>=8.0; extra == "dev"
 Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
+Requires-Dist: fakeredis>=2.21; extra == "dev"
 Requires-Dist: build>=1.0; extra == "dev"
 Requires-Dist: twine>=5.0; extra == "dev"
 Dynamic: license-file
@@ -274,6 +275,21 @@ await Queue.prune(status="completed", hours=24)
 await Queue.prune(tag="batch:newsletter")
 ```
+#### Redis index health
+The Redis driver keeps secondary indexes (sorted sets) so the dashboard can list and
+count jobs by queue/status efficiently. All deletes go through an index-consistent path
+that removes the job hash *and* every index entry in one atomic step, so the indexes stay
+bounded. If entries are ever orphaned out-of-band (e.g. job hashes deleted directly via
+`redis-cli`), pruning reaps them automatically, and you can force a full repair:
+```bash
+baqueue reconcile-indexes -d redis --driver-url redis://localhost:6379/0
+```
+Set `reconcile_on_connect=True` to run that repair once on every startup (off by default
+to keep connect fast on large datasets).
 ### Retry Failed Jobs
 Bulk-retry failed jobs from the CLI, from Python, or from the dashboard.
@@ -508,6 +524,7 @@ baqueue schedule      Start the job scheduler
 baqueue dashboard     Launch the monitoring dashboard
 baqueue prune         Prune old jobs
 baqueue retry-failed  Retry all failed jobs (filter by queue/tag/age)
+baqueue reconcile-indexes  Repair Redis secondary indexes (drop stale entries)
 baqueue status        Show queue status
 baqueue test          Run the test suite
 ```

{baqueue-1.0.1 → baqueue-1.1.0}/README.md RENAMED Viewed

@@ -223,6 +223,21 @@ await Queue.prune(status="completed", hours=24)
 await Queue.prune(tag="batch:newsletter")
 ```
+#### Redis index health
+The Redis driver keeps secondary indexes (sorted sets) so the dashboard can list and
+count jobs by queue/status efficiently. All deletes go through an index-consistent path
+that removes the job hash *and* every index entry in one atomic step, so the indexes stay
+bounded. If entries are ever orphaned out-of-band (e.g. job hashes deleted directly via
+`redis-cli`), pruning reaps them automatically, and you can force a full repair:
+```bash
+baqueue reconcile-indexes -d redis --driver-url redis://localhost:6379/0
+```
+Set `reconcile_on_connect=True` to run that repair once on every startup (off by default
+to keep connect fast on large datasets).
 ### Retry Failed Jobs
 Bulk-retry failed jobs from the CLI, from Python, or from the dashboard.
@@ -457,6 +472,7 @@ baqueue schedule      Start the job scheduler
 baqueue dashboard     Launch the monitoring dashboard
 baqueue prune         Prune old jobs
 baqueue retry-failed  Retry all failed jobs (filter by queue/tag/age)
+baqueue reconcile-indexes  Repair Redis secondary indexes (drop stale entries)
 baqueue status        Show queue status
 baqueue test          Run the test suite
 ```

{baqueue-1.0.1 → baqueue-1.1.0}/baqueue/__init__.py RENAMED Viewed

@@ -7,7 +7,7 @@ from baqueue.batch import Batch
 from baqueue.events import EventBus
 from baqueue.retry import BackoffStrategy
-__version__ = "1.0.1"
+__version__ = "1.1.0"
 __all__ = [
     "BaQueueConfig",

{baqueue-1.0.1 → baqueue-1.1.0}/baqueue/cli.py RENAMED Viewed

@@ -365,6 +365,37 @@ async def _run_retry_failed(
         await Queue.disconnect()
+@cli.command(name="reconcile-indexes")
+@click.option("--batch", default=500, type=int, help="Index entries scanned per batch.")
+@click.option("--driver", "-d", default="redis", help="Driver name (sqlite, memory, redis, postgres).")
+@click.option("--driver-url", default=None, help="Driver connection URL.")
+@click.pass_context
+def reconcile_indexes(
+    ctx: click.Context,
+    batch: int,
+    driver: str,
+    driver_url: str | None,
+) -> None:
+    """Repair secondary indexes: remove entries pointing at jobs that no longer exist.
+    Only the Redis driver maintains secondary indexes; this is a no-op elsewhere."""
+    _validate_driver(driver)
+    config: BaQueueConfig = ctx.obj["config"]
+    config.driver = DriverConfig(name=driver, url=driver_url or "")
+    removed = _run_async(_run_reconcile_indexes, config, batch)
+    click.echo(f"Removed {removed or 0} stale index entr{'y' if removed == 1 else 'ies'}.")
+async def _run_reconcile_indexes(config: BaQueueConfig, batch: int) -> int:
+    Queue.configure(config)
+    await Queue.connect()
+    try:
+        return await Queue.get_driver().reconcile_indexes(batch=batch)
+    finally:
+        await Queue.disconnect()
 @cli.command()
 @click.option("--driver", "-d", default="sqlite", help="Driver name (sqlite, memory, redis, postgres).")
 @click.option("--driver-url", default=None, help="Driver connection URL.")

{baqueue-1.0.1 → baqueue-1.1.0}/baqueue/config.py RENAMED Viewed

@@ -59,6 +59,14 @@ class BaQueueConfig(BaseModel):
     prune_completed_seconds: int = 5  # delete completed jobs ~5s after completion
     prune_other_seconds: int = 86400  # 1 day — applies to failed + cancelled
     prune_metrics_seconds: int = 604800  # 7 days
+    # Per-call cap for index-consistent bulk deletes; the pruner loops to drain.
+    prune_batch_size: int = 1000
+    # ── Secondary-index reconciliation (Redis) ─────────────────
+    # When True, connect() runs a one-shot reconcile pass that removes index
+    # entries pointing at jobs that no longer exist. Off by default — run on
+    # demand via `baqueue reconcile-indexes` to keep startup fast.
+    reconcile_on_connect: bool = False
     # ── Legacy hour-based overrides (kept for back-compat) ──────
     # When > 0, these take precedence over the seconds fields above for the

{baqueue-1.0.1 → baqueue-1.1.0}/baqueue/dashboard/api.py RENAMED Viewed

@@ -117,7 +117,10 @@ class DashboardAPI:
             created_from=created_from, created_to=created_to,
         )
         return {
-            "jobs": [j.to_dict() for j in jobs],
+            # The list view never renders per-attempt history (the modal fetches
+            # job_detail for that), so omit it to keep the list and the live
+            # /ws/jobs push lean.
+            "jobs": [j.to_dict(include_history=False) for j in jobs],
             "page": page,
             "per_page": per_page,
             "count": len(jobs),
@@ -128,6 +131,10 @@ class DashboardAPI:
         job = await self.driver.get_job(job_id)
         return job.to_dict() if job else None
+    async def promote_job(self, job_id: str) -> bool:
+        """Make a scheduled/pending job runnable immediately. Returns True on success."""
+        return await self.driver.promote(job_id)
     async def retry_job(self, job_id: str) -> bool:
         job = await self.driver.get_job(job_id)
         if not job or job.status != "failed":

{baqueue-1.0.1 → baqueue-1.1.0}/baqueue/dashboard/server.py RENAMED Viewed

@@ -150,6 +150,11 @@ def create_app(driver: BaseDriver, config: Optional[BaQueueConfig] = None) -> An
         ok = await api.retry_job(job_id)
         return JSONResponse({"success": ok})
+    @app.post("/api/jobs/{job_id}/execute")
+    async def execute_job(job_id: str):
+        ok = await api.promote_job(job_id)
+        return JSONResponse({"success": ok})
     @app.delete("/api/jobs/{job_id}")
     async def delete_job(job_id: str):
         ok = await api.delete_job(job_id)

{baqueue-1.0.1 → baqueue-1.1.0}/baqueue/dashboard/static/app.js RENAMED Viewed

@@ -322,6 +322,14 @@ document.addEventListener("alpine:init", () => {
       this.fetchOverview();
     },
+    async executeJob(jobId) {
+      // Promote a scheduled/pending job so it runs immediately.
+      await fetch(`/api/jobs/${jobId}/execute`, { method: "POST" });
+      this.closeModal();
+      this.fetchJobs();
+      this.fetchOverview();
+    },
     async retryAllFailed() {
       const parts = [];
       if (this.jobsFilter.queue) parts.push(`queue "${this.jobsFilter.queue}"`);
@@ -437,6 +445,36 @@ document.addEventListener("alpine:init", () => {
       return Math.floor(diff / 60) + "m " + Math.floor(diff % 60) + "s";
     },
+    // ── Per-attempt timeline ────────────────────────────────
+    attemptHistory(job) {
+      return job && Array.isArray(job.history) ? job.history : [];
+    },
+    hasHistory(job) {
+      return this.attemptHistory(job).length > 0;
+    },
+    // A job currently processing has an in-flight attempt that isn't recorded in
+    // history yet (entries are appended only when an attempt concludes).
+    inFlightAttempt(job) {
+      return !!(job && job.status === "processing" && job.started_at);
+    },
+    attemptDotClass(entry) {
+      return entry && entry.status === "completed" ? "completed" : "failed";
+    },
+    attemptDuration(entry) {
+      if (!entry || !entry.started_at || !entry.finished_at) return "";
+      const diff = entry.finished_at - entry.started_at;
+      if (diff < 0) return "";
+      if (diff < 0.001) return "<1ms";
+      if (diff < 1) return Math.round(diff * 1000) + "ms";
+      if (diff < 60) return diff.toFixed(1) + "s";
+      return Math.floor(diff / 60) + "m " + Math.floor(diff % 60) + "s";
+    },
     shortId(id) {
       return id ? id.substring(0, 12) : "-";
     },

{baqueue-1.0.1 → baqueue-1.1.0}/baqueue/dashboard/static/index.html RENAMED Viewed

@@ -511,27 +511,67 @@
                                     <span class="tl-time" x-text="formatTimeFull(selectedJob.delay_until)"></span>
                                 </div>
                             </div>
-                            <div class="tl-item" x-show="selectedJob.started_at">
-                                <div class="tl-dot processing"></div>
-                                <div class="tl-content">
-                                    <span class="tl-label">Started</span>
-                                    <span class="tl-time" x-text="formatTimeFull(selectedJob.started_at)"></span>
+                            <!-- Per-attempt history (jobs that ran at least once on a
+                                 driver that persists history). Each backoff retry is its
+                                 own entry. -->
+                            <template x-for="(entry, idx) in attemptHistory(selectedJob)" :key="idx">
+                                <div class="tl-item">
+                                    <div class="tl-dot" :class="attemptDotClass(entry)"></div>
+                                    <div class="tl-content">
+                                        <span class="tl-label">
+                                            Attempt <span x-text="entry.attempt"></span> &middot;
+                                            <span x-text="entry.status"></span>
+                                            <span class="tl-dur" x-show="attemptDuration(entry)" x-text="'(' + attemptDuration(entry) + ')'"></span>
+                                        </span>
+                                        <span class="tl-time" x-text="formatTimeFull(entry.started_at) + (entry.finished_at ? ' → ' + formatTimeFull(entry.finished_at) : '')"></span>
+                                        <span class="tl-retry" x-show="entry.will_retry">
+                                            Retry scheduled <span x-text="entry.next_retry_at ? scheduledIn(entry.next_retry_at) : ''"></span>
+                                        </span>
+                                        <pre class="tl-error" x-show="entry.error" x-text="entry.error"></pre>
+                                    </div>
                                 </div>
-                            </div>
-                            <div class="tl-item" x-show="selectedJob.completed_at">
-                                <div class="tl-dot completed"></div>
-                                <div class="tl-content">
-                                    <span class="tl-label">Completed</span>
-                                    <span class="tl-time" x-text="formatTimeFull(selectedJob.completed_at)"></span>
+                            </template>
+                            <!-- The currently-running attempt is not recorded in history
+                                 until it concludes, so surface it live. -->
+                            <template x-if="inFlightAttempt(selectedJob)">
+                                <div class="tl-item">
+                                    <div class="tl-dot processing"></div>
+                                    <div class="tl-content">
+                                        <span class="tl-label">Attempt <span x-text="selectedJob.attempts"></span> &middot; running&hellip;</span>
+                                        <span class="tl-time" x-text="formatTimeFull(selectedJob.started_at)"></span>
+                                    </div>
                                 </div>
-                            </div>
-                            <div class="tl-item" x-show="selectedJob.failed_at">
-                                <div class="tl-dot failed"></div>
-                                <div class="tl-content">
-                                    <span class="tl-label">Failed</span>
-                                    <span class="tl-time" x-text="formatTimeFull(selectedJob.failed_at)"></span>
+                            </template>
+                            <!-- Legacy single-attempt timeline: jobs created before history
+                                 tracking, or on drivers that don't persist history. -->
+                            <template x-if="!hasHistory(selectedJob) && !inFlightAttempt(selectedJob) && selectedJob.started_at">
+                                <div class="tl-item">
+                                    <div class="tl-dot processing"></div>
+                                    <div class="tl-content">
+                                        <span class="tl-label">Started</span>
+                                        <span class="tl-time" x-text="formatTimeFull(selectedJob.started_at)"></span>
+                                    </div>
                                 </div>
-                            </div>
+                            </template>
+                            <template x-if="!hasHistory(selectedJob) && selectedJob.completed_at">
+                                <div class="tl-item">
+                                    <div class="tl-dot completed"></div>
+                                    <div class="tl-content">
+                                        <span class="tl-label">Completed</span>
+                                        <span class="tl-time" x-text="formatTimeFull(selectedJob.completed_at)"></span>
+                                    </div>
+                                </div>
+                            </template>
+                            <template x-if="!hasHistory(selectedJob) && selectedJob.failed_at">
+                                <div class="tl-item">
+                                    <div class="tl-dot failed"></div>
+                                    <div class="tl-content">
+                                        <span class="tl-label">Failed</span>
+                                        <span class="tl-time" x-text="formatTimeFull(selectedJob.failed_at)"></span>
+                                    </div>
+                                </div>
+                            </template>
                         </div>
                     </div>
@@ -563,6 +603,10 @@
                 </div>
                 <div class="modal-actions">
+                    <button class="btn-primary" x-show="isScheduled(selectedJob)" @click="executeJob(selectedJob.id)">
+                        <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" width="16" height="16"><polygon points="5 3 19 12 5 21 5 3"/></svg>
+                        Execute Now
+                    </button>
                     <button class="btn-primary" x-show="selectedJob.status === 'failed'" @click="retryJob(selectedJob.id)">
                         <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" width="16" height="16"><polyline points="23 4 23 10 17 10"/><path d="M20.49 15a9 9 0 11-2.12-9.36L23 10"/></svg>
                         Retry Job

{baqueue-1.0.1 → baqueue-1.1.0}/baqueue/dashboard/static/style.css RENAMED Viewed

@@ -1267,8 +1267,24 @@ body {
 .tl-dot.failed { border-color: var(--red); background: var(--red); }
 .tl-content { display: flex; flex-direction: column; gap: 1px; }
-.tl-label { font-size: 13px; font-weight: 600; }
+.tl-label { font-size: 13px; font-weight: 600; text-transform: capitalize; }
 .tl-time { font-size: 12px; color: var(--text-muted); font-family: 'JetBrains Mono', monospace; }
+.tl-dur { font-weight: 400; color: var(--text-muted); }
+.tl-retry { font-size: 12px; color: var(--amber); }
+.tl-error {
+    margin: 4px 0 0;
+    padding: 6px 8px;
+    font-size: 11px;
+    font-family: 'JetBrains Mono', monospace;
+    color: var(--red);
+    background: var(--bg-surface);
+    border: 1px solid var(--border);
+    border-radius: 6px;
+    white-space: pre-wrap;
+    word-break: break-word;
+    max-height: 140px;
+    overflow: auto;
+}
 /* ── Tags ───────────────────────────────────────────────── */

{baqueue-1.0.1 → baqueue-1.1.0}/baqueue/drivers/base.py RENAMED Viewed

@@ -10,6 +10,10 @@ from baqueue.serializer import JobPayload
 logger = logging.getLogger("baqueue.driver")
+# Default per-call cap for batched bulk-delete / prune operations. Keeps a single
+# call from blocking the backend on very large datasets; callers loop to drain.
+DEFAULT_PRUNE_BATCH = 1000
 class BaseDriver(ABC):
     """Every BaQueue driver must implement this interface."""
@@ -18,6 +22,11 @@ class BaseDriver(ABC):
     # an emergency cleanup and one retry. Wired from BaQueueConfig in queue.py.
     auto_cleanup_on_disk_full: bool = True
+    # When True, connect() runs a one-shot reconcile_indexes() pass to heal any
+    # secondary-index drift accumulated while offline. Off by default so connect
+    # stays fast on large datasets. Wired from BaQueueConfig in queue.py.
+    reconcile_on_connect: bool = False
     # Re-entrancy guard so emergency_cleanup() doesn't recurse if its own
     # prune calls also hit disk-full.
     _in_emergency_cleanup: bool = False
@@ -106,6 +115,20 @@ class BaseDriver(ABC):
     @abstractmethod
     async def delete(self, job_id: str) -> None: ...
+    async def promote(self, job_id: str) -> bool:
+        """Make a scheduled/pending job runnable immediately (clear its delay).
+        Returns True if the job was promoted, False if it does not exist or is not
+        in the ``pending`` state. Concrete (non-abstract) so existing third-party
+        drivers keep working; the built-in drivers override it with a race-safe,
+        index-aware version. The default relies on ``release(delay=0)`` to enqueue
+        the job for immediate processing."""
+        job = await self.get_job(job_id)
+        if job is None or job.status != "pending":
+            return False
+        await self.release(job, delay=0)
+        return True
     # ── Query ───────────────────────────────────────────────────
     @abstractmethod
@@ -193,6 +216,41 @@ class BaseDriver(ABC):
         """Delete matching jobs. Returns count of pruned jobs."""
         ...
+    async def bulk_delete_jobs(self, job_ids: list[str], *, limit: int | None = None) -> int:
+        """Delete an explicit list of jobs, keeping any secondary indexes consistent.
+        Default implementation deletes one id at a time via ``delete``; drivers with
+        secondary indexes (Redis) override this with an atomic, batched version that
+        also reaps orphaned index entries. Returns the count of ids processed."""
+        if limit is not None:
+            job_ids = job_ids[:limit]
+        for job_id in job_ids:
+            await self.delete(job_id)
+        return len(job_ids)
+    async def prune_terminal_jobs(
+        self,
+        queue: str | None = None,
+        status: str | None = None,
+        *,
+        older_than: float | None = None,
+        limit: int = DEFAULT_PRUNE_BATCH,
+    ) -> int:
+        """Index-consistent bulk delete of terminal jobs, capped at ``limit`` per call.
+        Default implementation delegates to ``prune``; the Redis driver overrides it to
+        use its status index as the work source, reap orphaned index entries, and bound
+        the per-call cost. Callers loop until a pass returns fewer than ``limit``."""
+        return await self.prune(status=status, queue=queue, older_than_seconds=older_than)
+    async def reconcile_indexes(self, batch: int = 500) -> int:
+        """Repair secondary indexes by removing entries whose job no longer exists.
+        No-op for drivers without secondary indexes (memory/sqlite/postgres). The Redis
+        driver overrides this to walk its index ZSETs and ZREM orphaned ids. Returns the
+        number of stale index entries removed."""
+        return 0
     @abstractmethod
     async def flush(self, queue: str | None = None) -> None:
         """Remove all jobs (optionally for a specific queue)."""

{baqueue-1.0.1 → baqueue-1.1.0}/baqueue/drivers/memory_driver.py RENAMED Viewed

@@ -116,6 +116,21 @@ class MemoryDriver(BaseDriver):
             if job_id in self._delayed:
                 self._delayed.remove(job_id)
+    async def promote(self, job_id: str) -> bool:
+        async with self._lock:
+            payload = self._jobs.get(job_id)
+            if payload is None or payload.status != "pending":
+                return False
+            payload.delay_until = None
+            payload.updated_at = _now_ts()
+            if job_id in self._delayed:
+                self._delayed.remove(job_id)
+            # Only enqueue if it isn't already ready, so promoting a non-delayed
+            # pending job can never duplicate it in the ready list.
+            if job_id not in self._queues[payload.queue]:
+                self._queues[payload.queue].append(job_id)
+            return True
     # ── Query ───────────────────────────────────────────────────
     async def get_job(self, job_id: str) -> JobPayload | None:

{baqueue-1.0.1 → baqueue-1.1.0}/baqueue/drivers/postgres_driver.py RENAMED Viewed

@@ -324,6 +324,24 @@ class PostgresDriver(BaseDriver):
         await self._with_disk_full_recovery(_do)
+    async def promote(self, job_id: str) -> bool:
+        now = _now_ts()
+        async def _do():
+            async with self._pool.acquire() as conn:
+                # Clearing delay_until is enough: pop() already accepts a pending
+                # row whose delay_until IS NULL or has elapsed.
+                return await conn.fetchrow(
+                    f"""UPDATE {self._jobs_table}
+                        SET delay_until=NULL, updated_at=$1
+                        WHERE id=$2 AND status='pending'
+                        RETURNING id""",
+                    now, job_id,
+                )
+        row = await self._with_disk_full_recovery(_do)
+        return row is not None
     # ── Query ───────────────────────────────────────────────────
     async def get_job(self, job_id: str) -> JobPayload | None:

{baqueue-1.0.1 → baqueue-1.1.0}/baqueue/drivers/redis_driver.py RENAMED Viewed

@@ -6,11 +6,16 @@ import json
 import logging
 from typing import Any
-from baqueue.drivers.base import BaseDriver
+from baqueue.drivers.base import DEFAULT_PRUNE_BATCH, BaseDriver
 from baqueue.serializer import JobPayload, _now_ts
 logger = logging.getLogger("baqueue.redis")
+# Every status a job hash can carry. Used when reaping orphaned index entries:
+# the job hash is gone, so we can't read its status — we ZREM from every global
+# status index to be sure the stale id is cleared.
+_ALL_STATUSES = ("pending", "processing", "completed", "failed", "cancelled")
 class RedisDriver(BaseDriver):
     """Redis-backed driver using sorted sets for indexed pagination.
@@ -88,6 +93,10 @@ class RedisDriver(BaseDriver):
         self._redis = aioredis.from_url(self._url, decode_responses=True, **self._kwargs)
         await self._redis.ping()
         await self._backfill_indexes_if_needed()
+        if self.reconcile_on_connect:
+            removed = await self.reconcile_indexes()
+            if removed:
+                logger.info("reconcile_on_connect removed %d stale index entr(ies)", removed)
     async def disconnect(self) -> None:
         if self._redis:
@@ -97,7 +106,11 @@ class RedisDriver(BaseDriver):
     async def _backfill_indexes_if_needed(self) -> None:
         """One-time rebuild of secondary ZSETs for upgrades from a version
         that didn't maintain them. Safe to call on every connect — exits fast
-        when the global index is non-empty."""
+        when the global index is non-empty.
+        This is *add-only*: it inserts index entries for existing job hashes. It
+        cannot remove drift (index entries whose hash is gone) — that is the job
+        of reconcile_indexes(). Together they fully heal the indexes."""
         if await self._redis.exists(self._idx_all()):
             return
         cursor: Any = "0"
@@ -297,6 +310,32 @@ class RedisDriver(BaseDriver):
             await pipe.execute()
         await self._with_disk_full_recovery(_do)
+    async def promote(self, job_id: str) -> bool:
+        raw = await self._redis.hget(self._key("job", job_id), "data")
+        if not raw:
+            return False
+        payload = JobPayload.from_json(raw)
+        if payload.status != "pending":
+            return False
+        now = _now_ts()
+        # Only a job actually sitting in the delayed ZSET needs to be moved into
+        # its ready list. A pending job that is already ready (delay_until None or
+        # in the past) must NOT be re-pushed, or Redis pop — which does not
+        # re-check status — would process it twice.
+        was_scheduled = payload.delay_until is not None and payload.delay_until > now
+        payload.delay_until = None
+        payload.updated_at = now
+        async def _do():
+            pipe = self._redis.pipeline()
+            pipe.hset(self._key("job", job_id), mapping={"data": payload.to_json()})
+            if was_scheduled:
+                pipe.zrem(self._key("delayed"), job_id)
+                pipe.rpush(self._key("queue", payload.queue), job_id)
+            await pipe.execute()
+        await self._with_disk_full_recovery(_do)
+        return True
     # ── Query ───────────────────────────────────────────────────
     async def get_job(self, job_id: str) -> JobPayload | None:
@@ -518,20 +557,41 @@ class RedisDriver(BaseDriver):
     # ── Pruning ─────────────────────────────────────────────────
-    async def prune(
-        self,
-        status: str | None = None,
-        tag: str | None = None,
-        older_than_seconds: float | None = None,
-        queue: str | None = None,
-    ) -> int:
-        if not (status or tag or older_than_seconds or queue):
-            return 0
+    def _index_remove_orphan(self, pipe: Any, job_id: str, queue: str | None, status: str | None) -> None:
+        """ZREM a stale id whose job hash is gone. We can't read the job's real
+        queue/status, so we clear every index family we can infer from the call:
+        always jobs:all + every global status index, plus the queue-scoped families
+        when the caller knows the queue/status it was iterating."""
+        pipe.zrem(self._idx_all(), job_id)
+        for st in _ALL_STATUSES:
+            pipe.zrem(self._idx_status(st), job_id)
+        if queue:
+            pipe.zrem(self._idx_queue(queue), job_id)
+            for st in _ALL_STATUSES:
+                pipe.zrem(self._idx_queue_status(queue, st), job_id)
-        index = self._index_key(queue, status)
-        candidate_ids: list[str] = await self._redis.zrange(index, 0, -1)
+    async def _prune_index_batch(
+        self,
+        index: str,
+        queue: str | None,
+        status: str | None,
+        tag: str | None,
+        older_than_seconds: float | None,
+        offset: int,
+        limit: int,
+    ) -> tuple[int, int, int]:
+        """Process one window ``[offset, offset+limit)`` of an index in a single
+        atomic pass.
+        Live jobs matching the filters are fully deleted (hash + all four index
+        families). Orphaned ids (hash already gone) are reaped from the indexes so
+        they can never accumulate. Non-matching live jobs are left in place. Returns
+        ``(removed, scanned, skipped)``: removed = deleted + reaped, scanned = window
+        size actually read, skipped = live jobs left in place (so the caller can step
+        its offset past them)."""
+        candidate_ids: list[str] = await self._redis.zrange(index, offset, offset + limit - 1)
         if not candidate_ids:
-            return 0
+            return 0, 0, 0
         pipe = self._redis.pipeline()
         for jid in candidate_ids:
@@ -540,29 +600,170 @@ class RedisDriver(BaseDriver):
         now = _now_ts()
         to_delete: list[JobPayload] = []
-        for raw in raws:
+        orphans: list[str] = []
+        skipped = 0
+        for jid, raw in zip(candidate_ids, raws):
             if not raw:
+                orphans.append(jid)
                 continue
             job = JobPayload.from_json(raw)
             if tag and tag not in job.tags:
+                skipped += 1
                 continue
             if older_than_seconds and (now - job.updated_at) < older_than_seconds:
+                skipped += 1
                 continue
             to_delete.append(job)
-        if not to_delete:
+        if to_delete or orphans:
+            async def _do():
+                pipe = self._redis.pipeline()
+                for job in to_delete:
+                    pipe.lrem(self._key("queue", job.queue), 0, job.id)
+                    pipe.zrem(self._key("delayed"), job.id)
+                    pipe.unlink(self._key("job", job.id))
+                    self._index_remove(pipe, job.id, job.queue, job.status)
+                for jid in orphans:
+                    self._index_remove_orphan(pipe, jid, queue, status)
+                await pipe.execute()
+            await self._with_disk_full_recovery(_do)
+        return len(to_delete) + len(orphans), len(candidate_ids), skipped
+    async def _drain_index(
+        self,
+        index: str,
+        queue: str | None,
+        status: str | None,
+        tag: str | None,
+        older_than_seconds: float | None,
+        batch: int,
+    ) -> int:
+        """Page through an index in ``batch``-sized windows, deleting matches and
+        reaping orphans, until the whole index has been scanned.
+        Each Redis round-trip handles at most ``batch`` ids, so a huge (possibly
+        orphan-laden) index never blocks the server on one giant zrange + delete —
+        while every entry is still examined. Entries a filter skips stay in the index,
+        so the offset is advanced past them; that is what keeps matches deeper than
+        the first window from being missed (re-reading ``zrange(0, batch)`` forever
+        would stop early)."""
+        batch = max(1, batch)
+        offset = 0
+        total = 0
+        while True:
+            removed, scanned, skipped = await self._prune_index_batch(
+                index, queue, status, tag, older_than_seconds, offset, batch,
+            )
+            total += removed
+            offset += skipped  # kept entries remain; step past them next round
+            if scanned < batch:
+                break
+        return total
+    async def prune(
+        self,
+        status: str | None = None,
+        tag: str | None = None,
+        older_than_seconds: float | None = None,
+        queue: str | None = None,
+    ) -> int:
+        if not (status or tag or older_than_seconds or queue):
             return 0
+        index = self._index_key(queue, status)
+        return await self._drain_index(
+            index, queue, status, tag, older_than_seconds, DEFAULT_PRUNE_BATCH,
+        )
+    async def prune_terminal_jobs(
+        self,
+        queue: str | None = None,
+        status: str | None = None,
+        *,
+        older_than: float | None = None,
+        limit: int = DEFAULT_PRUNE_BATCH,
+    ) -> int:
+        """Index-consistent bulk delete from a status index, draining fully in
+        ``limit``-sized batches (each Redis round-trip handles at most ``limit`` ids).
+        Uses the secondary index itself as the work source — no SCAN of every job
+        hash — and reaps orphaned index entries in the same pass."""
+        index = self._index_key(queue, status)
+        return await self._drain_index(index, queue, status, None, older_than, limit)
+    async def bulk_delete_jobs(self, job_ids: list[str], *, limit: int | None = None) -> int:
+        """Delete an explicit list of jobs atomically, keeping all four index
+        families consistent. Live jobs are removed precisely (real queue/status from
+        the hash); ids whose hash is already gone are reaped from jobs:all and every
+        global status index (per-queue orphans are caught by reconcile_indexes)."""
+        if limit is not None:
+            job_ids = job_ids[:limit]
+        if not job_ids:
+            return 0
+        pipe = self._redis.pipeline()
+        for jid in job_ids:
+            pipe.hget(self._key("job", jid), "data")
+        raws = await pipe.execute()
         async def _do():
             pipe = self._redis.pipeline()
-            for job in to_delete:
-                pipe.lrem(self._key("queue", job.queue), 0, job.id)
-                pipe.zrem(self._key("delayed"), job.id)
-                pipe.delete(self._key("job", job.id))
-                self._index_remove(pipe, job.id, job.queue, job.status)
+            for jid, raw in zip(job_ids, raws):
+                if raw:
+                    job = JobPayload.from_json(raw)
+                    pipe.lrem(self._key("queue", job.queue), 0, jid)
+                    pipe.zrem(self._key("delayed"), jid)
+                    pipe.unlink(self._key("job", jid))
+                    self._index_remove(pipe, jid, job.queue, job.status)
+                else:
+                    self._index_remove_orphan(pipe, jid, None, None)
             await pipe.execute()
         await self._with_disk_full_recovery(_do)
-        return len(to_delete)
+        return len(job_ids)
+    async def reconcile_indexes(self, batch: int = 500) -> int:
+        """Walk every secondary-index ZSET and ZREM ids whose job hash is gone.
+        Self-healing repair for index drift (e.g. job hashes deleted out-of-band).
+        Index keys are discovered by SCAN (every ``baqueue:jobs:*`` key — jobs:all,
+        jobs:status:*, jobs:queue:* and jobs:queue:*:status:*) so the repair reaches
+        families for queues no longer in the queues set, and never wastes a round-trip
+        on an index combination that does not exist. Each index is then walked with
+        ZSCAN — never loading a huge set at once — checking hash existence in pipelined
+        batches. Returns the number of stale entries removed."""
+        # Job hashes are baqueue:job:* (singular); the index ZSETs are baqueue:jobs:*.
+        index_keys: list[str] = []
+        cursor: Any = "0"
+        pattern = self._key("jobs", "*")
+        while True:
+            cursor, keys = await self._redis.scan(cursor=cursor, match=pattern, count=batch)
+            index_keys.extend(keys)
+            if cursor == "0" or cursor == 0:
+                break
+        removed = 0
+        for index in index_keys:
+            zcursor: Any = 0
+            while True:
+                zcursor, members = await self._redis.zscan(index, cursor=zcursor, count=batch)
+                ids = [m[0] if isinstance(m, (tuple, list)) else m for m in members]
+                if ids:
+                    pipe = self._redis.pipeline()
+                    for jid in ids:
+                        pipe.exists(self._key("job", jid))
+                    exists_flags = await pipe.execute()
+                    stale = [jid for jid, ok in zip(ids, exists_flags) if not ok]
+                    if stale:
+                        async def _do(index=index, stale=stale):
+                            pipe = self._redis.pipeline()
+                            for jid in stale:
+                                pipe.zrem(index, jid)
+                            await pipe.execute()
+                        await self._with_disk_full_recovery(_do)
+                        removed += len(stale)
+                if zcursor == 0 or zcursor == "0":
+                    break
+        return removed
     async def prune_metrics(self, older_than_seconds: float) -> int:
         cutoff = _now_ts() - older_than_seconds
@@ -635,11 +836,11 @@ class RedisDriver(BaseDriver):
                     pipe.delete(self._key("job", jid))
                     pipe.zrem(self._idx_all(), jid)
                     pipe.zrem(self._key("delayed"), jid)
-                    for st in ("pending", "processing", "completed", "failed"):
+                    for st in _ALL_STATUSES:
                         pipe.zrem(self._idx_status(st), jid)
                 # Drop all per-queue and per-(queue,status) indexes
                 pipe.delete(self._idx_queue(queue))
-                for st in ("pending", "processing", "completed", "failed"):
+                for st in _ALL_STATUSES:
                     pipe.delete(self._idx_queue_status(queue, st))
                 pipe.srem(self._key("queues"), queue)
                 await pipe.execute()

{baqueue-1.0.1 → baqueue-1.1.0}/baqueue/drivers/sqlite_driver.py RENAMED Viewed

@@ -377,6 +377,23 @@ class SqliteDriver(BaseDriver):
                 c.commit()
             await self._execute_with_retry(_do)
+    async def promote(self, job_id: str) -> bool:
+        now = _now_ts()
+        async with self._lock:
+            result = [False]
+            def _do():
+                c = self._get_conn()
+                # Clearing delay_until is enough: pop() already accepts a pending
+                # row whose delay_until IS NULL or has elapsed.
+                cur = c.execute(
+                    "UPDATE jobs SET delay_until=NULL, updated_at=? WHERE id=? AND status='pending'",
+                    (now, job_id),
+                )
+                c.commit()
+                result[0] = cur.rowcount == 1
+            await self._execute_with_retry(_do)
+            return result[0]
     # ── Query ───────────────────────────────────────────────────
     async def get_job(self, job_id: str) -> JobPayload | None:

{baqueue-1.0.1 → baqueue-1.1.0}/baqueue/pruner.py RENAMED Viewed

@@ -57,26 +57,35 @@ class Pruner:
             return self.config.prune_metrics_hours * 3600
         return float(self.config.prune_metrics_seconds)
+    async def _prune_terminal(self, status: str, older_than: float) -> int:
+        """Prune a terminal status via the driver's index-consistent bulk delete.
+        The driver drains the whole backlog in capped batches (so a large or
+        orphan-laden index never blocks the backend) and reaps orphaned index entries
+        in the same pass, returning the total removed."""
+        return await self.driver.prune_terminal_jobs(
+            status=status,
+            older_than=older_than,
+            limit=max(1, int(self.config.prune_batch_size)),
+        )
     async def prune_once(self) -> dict[str, int]:
         """Run a single prune pass based on config."""
         results: dict[str, int] = {}
         if self.completed_threshold > 0:
-            results["completed"] = await self.driver.prune(
-                status="completed",
-                older_than_seconds=self.completed_threshold,
+            results["completed"] = await self._prune_terminal(
+                "completed", self.completed_threshold,
             )
         if self.failed_threshold > 0:
-            results["failed"] = await self.driver.prune(
-                status="failed",
-                older_than_seconds=self.failed_threshold,
+            results["failed"] = await self._prune_terminal(
+                "failed", self.failed_threshold,
             )
         if self.cancelled_threshold > 0:
-            results["cancelled"] = await self.driver.prune(
-                status="cancelled",
-                older_than_seconds=self.cancelled_threshold,
+            results["cancelled"] = await self._prune_terminal(
+                "cancelled", self.cancelled_threshold,
             )
         if self.metrics_threshold > 0:

{baqueue-1.0.1 → baqueue-1.1.0}/baqueue/queue.py RENAMED Viewed

@@ -30,6 +30,7 @@ class Queue:
         cls._config = config or BaQueueConfig()
         if driver is not None:
             driver.auto_cleanup_on_disk_full = cls._config.auto_cleanup_on_disk_full
+            driver.reconcile_on_connect = cls._config.reconcile_on_connect
         cls._driver = driver
         cls._events = EventBus.default()
@@ -222,4 +223,5 @@ def _create_driver(config: BaQueueConfig) -> BaseDriver:
     else:
         raise ValueError(f"Unknown driver: {name}")
     driver.auto_cleanup_on_disk_full = config.auto_cleanup_on_disk_full
+    driver.reconcile_on_connect = config.reconcile_on_connect
     return driver

{baqueue-1.0.1 → baqueue-1.1.0}/baqueue/serializer.py RENAMED Viewed

@@ -35,6 +35,7 @@ class JobPayload:
         "failed_at",
         "status",
         "error",
+        "history",
     )
     def __init__(
@@ -58,6 +59,7 @@ class JobPayload:
         failed_at: float | None = None,
         status: str = "pending",
         error: str | None = None,
+        history: list[dict[str, Any]] | None = None,
     ):
         self.id = id or uuid4().hex
         self.job_class = job_class
@@ -77,9 +79,14 @@ class JobPayload:
         self.failed_at = failed_at
         self.status = status
         self.error = error
-    def to_dict(self) -> dict[str, Any]:
-        return {
+        # Per-attempt execution history (one record per processing attempt).
+        # Bounded by the number of attempts; persisted only by drivers that store
+        # the full payload (memory, redis). Older payloads without this key load
+        # as an empty list, so the field is fully backward compatible.
+        self.history = history or []
+    def to_dict(self, *, include_history: bool = True) -> dict[str, Any]:
+        d = {
             "id": self.id,
             "job_class": self.job_class,
             "data": self.data,
@@ -99,6 +106,9 @@ class JobPayload:
             "status": self.status,
             "error": self.error,
         }
+        if include_history:
+            d["history"] = self.history
+        return d
     def to_json(self) -> str:
         return json.dumps(self.to_dict())

{baqueue-1.0.1 → baqueue-1.1.0}/baqueue/worker.py RENAMED Viewed

@@ -11,10 +11,16 @@ from baqueue.drivers.base import BaseDriver
 from baqueue.events import EventBus
 from baqueue.job import Job, FunctionJob
 from baqueue.retry import compute_delay, should_retry
-from baqueue.serializer import JobPayload, resolve_job_class
+from baqueue.serializer import JobPayload, resolve_job_class, _now_ts
 logger = logging.getLogger("baqueue.worker")
+# Per-attempt errors stored in JobPayload.history are truncated to this many
+# characters. The job's top-level `error` field keeps the full latest traceback;
+# this bound keeps the history (and therefore the stored payload) from growing
+# large across retries.
+_HISTORY_ERROR_MAXLEN = 1000
 class Worker:
     """Pulls and executes jobs from one or more queues."""
@@ -84,6 +90,33 @@ class Worker:
                 return job
         return None
+    @staticmethod
+    def _record_attempt(
+        payload: JobPayload,
+        *,
+        status: str,
+        finished_at: float,
+        error: str | None = None,
+        will_retry: bool = False,
+        next_retry_at: float | None = None,
+    ) -> None:
+        """Append one bounded record describing the attempt that just concluded.
+        Called once per attempt, right before the driver persists the new state, so
+        drivers that store the whole payload (memory, redis) keep the full history.
+        The list is bounded by the number of attempts and the error is truncated."""
+        if error is not None and len(error) > _HISTORY_ERROR_MAXLEN:
+            error = error[:_HISTORY_ERROR_MAXLEN] + "…"
+        payload.history.append({
+            "attempt": payload.attempts,
+            "started_at": payload.started_at,
+            "finished_at": finished_at,
+            "status": status,
+            "error": error,
+            "will_retry": will_retry,
+            "next_retry_at": next_retry_at,
+        })
     async def _process(self, payload: JobPayload) -> None:
         self._current_job = payload
         job_timeout = payload.timeout or self.timeout
@@ -99,6 +132,7 @@ class Worker:
                 timeout=job_timeout,
             )
+            self._record_attempt(payload, status="completed", finished_at=_now_ts())
             await self.driver.complete(payload)
             await self.driver.record_metric(payload.queue, "completed", 1)
             await self.events.emit("job.completed", payload=payload, result=result, worker=self.name)
@@ -118,9 +152,16 @@ class Worker:
             if should_retry(payload.attempts, payload.max_attempts):
                 delay = compute_delay(payload.backoff, payload.attempts)
+                self._record_attempt(
+                    payload, status="failed", finished_at=_now_ts(),
+                    error=error_msg, will_retry=True, next_retry_at=_now_ts() + delay,
+                )
                 await self.driver.release(payload, delay=delay)
                 await self.events.emit("job.retrying", payload=payload, error=error_msg, delay=delay)
             else:
+                self._record_attempt(
+                    payload, status="failed", finished_at=_now_ts(), error=error_msg,
+                )
                 await self.driver.fail(payload, error_msg)
                 await self.driver.record_metric(payload.queue, "failed", 1)
                 await self.events.emit("job.failed", payload=payload, error=error_msg, worker=self.name)

{baqueue-1.0.1 → baqueue-1.1.0/baqueue.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: baqueue
-Version: 1.0.1
+Version: 1.1.0
 Summary: A powerful Python queue management package inspired by Laravel Horizon
 Author: Basalam, BaQueue Contributors
 License: MIT
@@ -45,6 +45,7 @@ Provides-Extra: dev
 Requires-Dist: baqueue[all]; extra == "dev"
 Requires-Dist: pytest>=8.0; extra == "dev"
 Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
+Requires-Dist: fakeredis>=2.21; extra == "dev"
 Requires-Dist: build>=1.0; extra == "dev"
 Requires-Dist: twine>=5.0; extra == "dev"
 Dynamic: license-file
@@ -274,6 +275,21 @@ await Queue.prune(status="completed", hours=24)
 await Queue.prune(tag="batch:newsletter")
 ```
+#### Redis index health
+The Redis driver keeps secondary indexes (sorted sets) so the dashboard can list and
+count jobs by queue/status efficiently. All deletes go through an index-consistent path
+that removes the job hash *and* every index entry in one atomic step, so the indexes stay
+bounded. If entries are ever orphaned out-of-band (e.g. job hashes deleted directly via
+`redis-cli`), pruning reaps them automatically, and you can force a full repair:
+```bash
+baqueue reconcile-indexes -d redis --driver-url redis://localhost:6379/0
+```
+Set `reconcile_on_connect=True` to run that repair once on every startup (off by default
+to keep connect fast on large datasets).
 ### Retry Failed Jobs
 Bulk-retry failed jobs from the CLI, from Python, or from the dashboard.
@@ -508,6 +524,7 @@ baqueue schedule      Start the job scheduler
 baqueue dashboard     Launch the monitoring dashboard
 baqueue prune         Prune old jobs
 baqueue retry-failed  Retry all failed jobs (filter by queue/tag/age)
+baqueue reconcile-indexes  Repair Redis secondary indexes (drop stale entries)
 baqueue status        Show queue status
 baqueue test          Run the test suite
 ```

{baqueue-1.0.1 → baqueue-1.1.0}/baqueue.egg-info/requires.txt RENAMED Viewed

@@ -18,6 +18,7 @@ websockets>=12.0
 baqueue[all]
 pytest>=8.0
 pytest-asyncio>=0.23
+fakeredis>=2.21
 build>=1.0
 twine>=5.0

{baqueue-1.0.1 → baqueue-1.1.0}/pyproject.toml RENAMED Viewed

@@ -53,6 +53,7 @@ dev = [
     "baqueue[all]",
     "pytest>=8.0",
     "pytest-asyncio>=0.23",
+    "fakeredis>=2.21",
     "build>=1.0",
     "twine>=5.0",
 ]