PyPI - claude-sql - Versions diffs - 0.4.0__py3-none-any.whl - Mend

claude-sql 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

claude_sql/__init__.py +5 -0
claude_sql/binding.py +740 -0
claude_sql/blind_handover.py +155 -0
claude_sql/checkpointer.py +202 -0
claude_sql/cli.py +2344 -0
claude_sql/cluster_worker.py +208 -0
claude_sql/community_worker.py +306 -0
claude_sql/config.py +380 -0
claude_sql/embed_worker.py +482 -0
claude_sql/freeze.py +189 -0
claude_sql/friction_worker.py +561 -0
claude_sql/install_source.py +77 -0
claude_sql/judge_worker.py +459 -0
claude_sql/judges.py +239 -0
claude_sql/kappa_worker.py +257 -0
claude_sql/llm_worker.py +1760 -0
claude_sql/logging_setup.py +95 -0
claude_sql/output.py +248 -0
claude_sql/parquet_shards.py +172 -0
claude_sql/retry_queue.py +180 -0
claude_sql/review_sheet_render.py +167 -0
claude_sql/review_sheet_worker.py +463 -0
claude_sql/schemas.py +454 -0
claude_sql/session_text.py +387 -0
claude_sql/skills_catalog.py +354 -0
claude_sql/sql_views.py +1751 -0
claude_sql/terms_worker.py +145 -0
claude_sql/ungrounded_worker.py +190 -0
claude_sql-0.4.0.dist-info/METADATA +530 -0
claude_sql-0.4.0.dist-info/RECORD +32 -0
claude_sql-0.4.0.dist-info/WHEEL +4 -0
claude_sql-0.4.0.dist-info/entry_points.txt +3 -0

claude_sql/logging_setup.py ADDED Viewed

@@ -0,0 +1,95 @@
+"""Loguru configuration for claude-sql.
+Single ``configure_logging()`` helper that removes the default loguru handler
+and re-installs a stderr handler honoring ``--verbose`` / ``--quiet`` flags and
+``LOGURU_LEVEL``.
+Also provides ``loguru_before_sleep`` so workers can pass tenacity a
+loguru-native callback (instead of stdlib ``logging.getLogger`` +
+``before_sleep_log``). Keeps the package single-logger across the board.
+"""
+from __future__ import annotations
+import os
+import sys
+from collections.abc import Callable
+from typing import TYPE_CHECKING
+from loguru import logger
+if TYPE_CHECKING:
+    from tenacity import RetryCallState
+_FORMAT = "<green>{time:HH:mm:ss}</green> <level>{level:<7}</level> {extra} {message}"
+def configure_logging(verbose: bool = False, quiet: bool = False) -> None:  # noqa: FBT001, FBT002 — CLI flag pass-through
+    """Install the stderr loguru handler for claude-sql.
+    Parameters
+    ----------
+    verbose
+        If true, set level to ``DEBUG`` (takes precedence over ``quiet``).
+    quiet
+        If true, set level to ``ERROR``.
+    """
+    logger.remove()
+    if verbose:
+        level = "DEBUG"
+    elif quiet:
+        level = "ERROR"
+    else:
+        level = os.getenv("LOGURU_LEVEL", "INFO")
+    logger.add(
+        sys.stderr,
+        level=level,
+        format=_FORMAT,
+        backtrace=False,
+        diagnose=False,
+    )
+def loguru_before_sleep(level: str = "WARNING") -> Callable[[RetryCallState], None]:
+    """Return a tenacity ``before_sleep`` callback that logs via loguru.
+    Replaces the historical ``before_sleep_log(stdlib_logger, level)`` shape
+    that pulled stdlib ``logging`` into otherwise loguru-only modules just
+    to satisfy tenacity's API. The format string mirrors tenacity's own
+    :func:`tenacity.before_sleep_log`: function name, sleep seconds, and
+    the exception or returned value that triggered the retry.
+    Parameters
+    ----------
+    level
+        Loguru level name (``"DEBUG" | "INFO" | "WARNING" | …``).
+    Returns
+    -------
+    Callable
+        A function with the tenacity ``before_sleep`` signature
+        ``(RetryCallState) -> None``.
+    """
+    def _before_sleep(retry_state: RetryCallState) -> None:
+        if retry_state.outcome is None or retry_state.next_action is None:
+            return
+        if retry_state.fn is None:
+            fn_name = "<unknown>"
+        else:
+            fn_name = getattr(retry_state.fn, "__qualname__", repr(retry_state.fn))
+        if retry_state.outcome.failed:
+            exc = retry_state.outcome.exception()
+            verb, value = "raised", f"{exc.__class__.__name__}: {exc}"
+        else:
+            verb, value = "returned", retry_state.outcome.result()
+        logger.log(
+            level,
+            "Retrying {} in {:.3g} seconds as it {} {}.",
+            fn_name,
+            retry_state.next_action.sleep,
+            verb,
+            value,
+        )
+    return _before_sleep

claude_sql/output.py ADDED Viewed

@@ -0,0 +1,248 @@
+"""Agent-friendly output formatting and error handling for claude-sql.
+Every CLI subcommand emits results through :func:`emit_dataframe` (for tabular
+output) or :func:`emit_json` (for structured non-tabular output).  The caller
+picks a :class:`OutputFormat`; when it is :data:`OutputFormat.AUTO` the
+formatter picks ``TABLE`` on a TTY and ``JSON`` otherwise so pipes and agent
+subprocesses get machine-readable output without a flag.
+Errors from DuckDB get classified and mapped to stable exit codes so agents
+can distinguish *parse failed* from *unknown view* from *runtime error*
+without pattern-matching tracebacks.  See :data:`EXIT_CODES`.
+"""
+from __future__ import annotations
+import json
+import sys
+from dataclasses import dataclass
+from enum import StrEnum
+from typing import Any
+import duckdb
+import polars as pl
+class OutputFormat(StrEnum):
+    """Supported output formats.
+    ``AUTO`` resolves to ``TABLE`` when stdout is a TTY and ``JSON`` otherwise.
+    Keeping it a string Enum lets cyclopts parse ``--format json`` without any
+    custom converter.
+    """
+    AUTO = "auto"
+    TABLE = "table"
+    JSON = "json"
+    NDJSON = "ndjson"
+    CSV = "csv"
+    MARKDOWN = "markdown"
+# Exit codes that agents can rely on.  Keep them stable -- wire protocols
+# always rot fastest at the boundary.
+EXIT_CODES: dict[str, int] = {
+    "ok": 0,
+    "no_embeddings": 2,
+    "invalid_input": 64,  # malformed user-supplied flags (e.g. --glob)
+    "parse_error": 64,  # malformed SQL
+    "catalog_error": 65,  # unknown view/macro/column
+    "runtime_error": 70,  # everything else from duckdb.Error
+    "duckdb_missing": 127,  # system `duckdb` binary not on PATH
+}
+def resolve_format(fmt: OutputFormat | str) -> OutputFormat:
+    """Resolve ``AUTO`` against the current stdout.  No-op for explicit formats."""
+    resolved = OutputFormat(fmt) if isinstance(fmt, str) else fmt
+    if resolved is not OutputFormat.AUTO:
+        return resolved
+    return OutputFormat.TABLE if sys.stdout.isatty() else OutputFormat.JSON
+def emit_dataframe(
+    df: pl.DataFrame,
+    fmt: OutputFormat | str = OutputFormat.AUTO,
+    *,
+    table_rows: int = 100,
+    table_str_len: int = 120,
+) -> None:
+    """Write a polars DataFrame to stdout in the requested format.
+    Parameters
+    ----------
+    df
+        The frame to emit.
+    fmt
+        One of :class:`OutputFormat`.  ``AUTO`` resolves per :func:`resolve_format`.
+    table_rows
+        Row cap for the pretty-printed table format only.  JSON / NDJSON / CSV
+        always emit every row; rely on SQL ``LIMIT`` to cap upstream.
+    table_str_len
+        Column-cell string truncation for the table format only.
+    """
+    resolved = resolve_format(fmt)
+    if resolved is OutputFormat.TABLE:
+        with pl.Config(tbl_rows=table_rows, tbl_cols=20, fmt_str_lengths=table_str_len):
+            print(df)
+        return
+    if resolved is OutputFormat.JSON:
+        # ``write_json`` emits a JSON array of row objects -- the exact shape
+        # agents expect for tabular results.
+        sys.stdout.write(df.write_json())
+        sys.stdout.write("\n")
+        return
+    if resolved is OutputFormat.NDJSON:
+        df.write_ndjson(sys.stdout)
+        return
+    if resolved is OutputFormat.CSV:
+        df.write_csv(sys.stdout)
+        return
+    # unreachable if OutputFormat stays closed-set
+    raise ValueError(f"Unsupported format: {resolved}")
+def emit_json(payload: Any, fmt: OutputFormat | str = OutputFormat.AUTO) -> None:
+    """Write a non-tabular payload as JSON (for schema, list-cache, errors).
+    The ``TABLE`` path is handled by the caller (schema has a custom human
+    layout); this helper is purely for machine-readable formats.
+    """
+    resolved = resolve_format(fmt)
+    # JSON / NDJSON / CSV all reduce to a JSON document for non-tabular data.
+    # CSV over a nested dict is meaningless, so fall through to JSON.
+    sys.stdout.write(json.dumps(payload, indent=2, default=str))
+    sys.stdout.write("\n")
+    del resolved
+@dataclass(frozen=True, slots=True)
+class ClassifiedError:
+    """The structured shape of a CLI error after classification."""
+    kind: str  # "parse_error" | "catalog_error" | "runtime_error"
+    exit_code: int
+    message: str
+    hint: str | None = None
+    def to_payload(self) -> dict[str, Any]:
+        return {
+            "error": {
+                "kind": self.kind,
+                "message": self.message,
+                "hint": self.hint,
+            }
+        }
+class InputValidationError(ValueError):
+    """Raised when a user-supplied flag (e.g. ``--glob``) is malformed.
+    Carries its own ``hint`` so ``run_or_die`` can surface the fix alongside
+    the failure. Maps to exit code 64 (``invalid_input``).
+    """
+    def __init__(self, message: str, *, hint: str | None = None) -> None:
+        super().__init__(message)
+        self.hint = hint
+def validate_glob(pattern: str | None, *, flag: str = "--glob") -> None:
+    """Reject glob patterns DuckDB's ``read_json`` cannot accept.
+    DuckDB raises ``IO Error: Cannot use multiple '**' in one path`` when a
+    glob contains more than one recursive segment. We catch that up front so
+    the failure surfaces with a useful hint instead of a raw traceback.
+    Pass-through for ``None`` / empty strings: the caller will fall back to
+    its default glob.
+    """
+    if not pattern:
+        return
+    if pattern.count("**") > 1:
+        raise InputValidationError(
+            f"{flag} pattern {pattern!r} contains more than one '**' segment; "
+            "DuckDB's read_json rejects it.",
+            hint=(
+                "use at most one '**' recursive wildcard -- e.g. "
+                "'/home/you/.claude/projects/**/*.jsonl' or "
+                "'/home/you/.claude/projects/<project>/*.jsonl'"
+            ),
+        )
+def classify_duckdb_error(exc: duckdb.Error) -> ClassifiedError:
+    """Classify a ``duckdb.Error`` into one of our stable kinds + exit codes.
+    DuckDB exposes :class:`duckdb.ParserException` and
+    :class:`duckdb.CatalogException` at import time.  Everything else that
+    inherits from :class:`duckdb.Error` is treated as a runtime error.
+    """
+    message = str(exc)
+    if isinstance(exc, duckdb.ParserException):
+        return ClassifiedError(
+            kind="parse_error",
+            exit_code=EXIT_CODES["parse_error"],
+            message=message,
+            hint="check SQL syntax; try `claude-sql schema --format json` for view/macro names",
+        )
+    if isinstance(exc, duckdb.CatalogException):
+        return ClassifiedError(
+            kind="catalog_error",
+            exit_code=EXIT_CODES["catalog_error"],
+            message=message,
+            hint="unknown view or column; run `claude-sql schema --format json` for the catalog",
+        )
+    return ClassifiedError(
+        kind="runtime_error",
+        exit_code=EXIT_CODES["runtime_error"],
+        message=message,
+        hint=None,
+    )
+def emit_error(err: ClassifiedError, fmt: OutputFormat | str = OutputFormat.AUTO) -> None:
+    """Write a classified error to stderr in the requested format.
+    Agents running with ``--format json`` get the structured payload; humans on
+    a TTY get a single readable line with the hint.  Either way the process
+    exits with :attr:`ClassifiedError.exit_code`.
+    """
+    resolved = resolve_format(fmt)
+    if resolved is OutputFormat.TABLE:
+        prefix = f"[{err.kind}]"
+        sys.stderr.write(f"{prefix} {err.message}\n")
+        if err.hint:
+            sys.stderr.write(f"hint: {err.hint}\n")
+    else:
+        sys.stderr.write(json.dumps(err.to_payload(), default=str))
+        sys.stderr.write("\n")
+def run_or_die(
+    fn: Any,
+    *args: Any,
+    fmt: OutputFormat | str = OutputFormat.AUTO,
+    **kwargs: Any,
+) -> Any:
+    """Invoke ``fn(*args, **kwargs)`` and translate DuckDB errors to exit codes.
+    Keeps every subcommand's body clean of try/except bookkeeping.  On success
+    returns ``fn``'s result; on :class:`duckdb.Error` writes a classified error
+    and calls :func:`sys.exit` with the matching code.
+    """
+    try:
+        return fn(*args, **kwargs)
+    except InputValidationError as exc:
+        err = ClassifiedError(
+            kind="invalid_input",
+            exit_code=EXIT_CODES["invalid_input"],
+            message=str(exc),
+            hint=exc.hint,
+        )
+        emit_error(err, fmt)
+        sys.exit(err.exit_code)
+    except duckdb.Error as exc:
+        err = classify_duckdb_error(exc)
+        emit_error(err, fmt)
+        sys.exit(err.exit_code)

claude_sql/parquet_shards.py ADDED Viewed

@@ -0,0 +1,172 @@
+"""Sharded parquet I/O helpers for the five worker-append caches.
+Background
+----------
+Workers ``embed_worker``, ``llm_worker`` (classify / trajectory / conflicts),
+and ``friction_worker`` previously used a "read whole parquet → concat →
+rewrite whole parquet" pattern on every chunk. At ~50 MB this is roughly
+1.5 s of pure IO per chunk × 100 chunks per backfill = ~150 s wasted on a
+single full backfill. This module replaces that with a directory-of-parts
+pattern: each chunk writes ``<dir>/part-<ts_ns>.parquet`` and readers glob
+the directory.
+Design (intentional deviation from the original plan)
+-----------------------------------------------------
+The plan called for renaming five ``Settings.*_parquet_path`` fields to
+``*_dir_path``. That would cascade across ~30 call sites in the CLI, SQL
+views, and tests for no semantic gain. Instead we keep the field names and
+overload their meaning:
+* If the path is a *directory* (or doesn't yet exist on disk), it's a sharded
+  cache — :func:`write_part` drops a fresh ``part-<ts_ns>.parquet`` into it,
+  and :func:`read_all` / :func:`iter_part_files` glob the directory.
+* If the path is a *file*, the legacy single-file behavior kicks in —
+  :func:`write_part` does the read-then-rewrite, :func:`read_all` reads the
+  one file, etc.
+New installs get directories (the field default factories in ``config.py``
+were updated). Existing single-file caches keep working until migrated; see
+``claude-sql cache migrate``.
+Public API
+----------
+* :func:`is_sharded_dir` — does this path point at a sharded directory?
+* :func:`write_part` — append by writing a fresh part (or legacy rewrite).
+* :func:`read_all` — load the union of all parts (or the legacy single file).
+* :func:`iter_part_files` — sorted list of part files (or ``[target]``).
+* :func:`count_rows` — sum of row counts across parts (or single file).
+"""
+from __future__ import annotations
+import time
+from pathlib import Path
+from typing import Any
+import polars as pl
+#: Glob pattern for shard part files within a sharded cache directory.
+PART_GLOB: str = "part-*.parquet"
+def is_sharded_dir(path: Path) -> bool:
+    """Return True iff ``path`` is (or should be) treated as a sharded cache directory.
+    Two cases qualify:
+    1. ``path`` exists and is a directory.
+    2. ``path`` does not exist yet — new caches default to directory layout.
+    A path that points at an existing *file* is the legacy single-file shape.
+    """
+    if path.exists():
+        return path.is_dir()
+    # Heuristic: if the path has a parquet suffix, treat it as legacy single
+    # file even when missing (so brand-new tests using ``tmp_path/"x.parquet"``
+    # still take the legacy code path). Otherwise assume sharded directory.
+    return path.suffix != ".parquet"
+def iter_part_files(target: Path) -> list[Path]:
+    """Return a sorted list of parquet files backing ``target``.
+    For a sharded directory: every ``part-*.parquet`` under it, sorted by
+    name (which is timestamp-keyed so the order is also chronological).
+    For a legacy single-file path: ``[target]`` if it exists, else ``[]``.
+    """
+    if not target.exists():
+        return []
+    if target.is_dir():
+        return sorted(target.glob(PART_GLOB))
+    return [target]
+def write_part(target: Path, df: pl.DataFrame) -> Path:
+    """Write ``df`` as a new shard (or rewrite the legacy single file).
+    Sharded directory branch:
+        Ensure ``target`` exists as a directory and drop a brand-new
+        ``part-<ns>.parquet`` into it. No read-then-rewrite — append cost is
+        proportional to ``len(df)`` only.
+    Legacy single-file branch:
+        Load the existing parquet (if non-empty), concat ``df`` onto the
+        tail, and rewrite the whole file. Preserves the historical behavior
+        for users who haven't migrated yet.
+    Parameters
+    ----------
+    target
+        Either a sharded cache directory or a legacy ``*.parquet`` file path.
+    df
+        The polars DataFrame to persist.
+    Returns
+    -------
+    Path
+        The path that was actually written (a part file in the sharded case,
+        or ``target`` itself in the legacy case).
+    """
+    if is_sharded_dir(target):
+        target.mkdir(parents=True, exist_ok=True)
+        # Nanosecond-resolution timestamps are sortable, monotonic on Linux,
+        # and avoid filename collisions even when two part-writes land in the
+        # same millisecond (chunk_size=256 and concurrency=8 can race).
+        part_path = target / f"part-{time.time_ns()}.parquet"
+        df.write_parquet(part_path)
+        return part_path
+    # Legacy single-file branch.
+    target.parent.mkdir(parents=True, exist_ok=True)
+    if target.exists() and target.stat().st_size > 16:
+        existing = pl.read_parquet(target)
+        df = pl.concat([existing, df], how="diagonal_relaxed")
+    df.write_parquet(target)
+    return target
+def read_all(target: Path, *, dtypes: dict[str, Any] | None = None) -> pl.DataFrame | None:
+    """Return the union of all part files (or the legacy single file).
+    Returns ``None`` when the cache is empty or missing. ``dtypes`` is
+    accepted for forward-compatibility but currently unused — the caches we
+    own all carry self-describing schemas, so an explicit dtype map only
+    matters once we hit a parquet whose schema has drifted.
+    """
+    del dtypes  # reserved for future schema-pinning; not needed today
+    parts = iter_part_files(target)
+    if not parts:
+        return None
+    # ``pl.read_parquet`` accepts a list of paths and concatenates with
+    # ``how='vertical_relaxed'`` semantics, which matches the historical
+    # ``pl.concat(..., how='diagonal_relaxed')`` used by the workers.
+    return pl.read_parquet([str(p) for p in parts])
+def count_rows(target: Path) -> int:
+    """Return the total row count across every part file under ``target``.
+    Uses ``pyarrow.parquet.ParquetFile.metadata`` so we read parquet footers
+    only — no row-group materialization.  Returns 0 when the cache is empty
+    or missing.  Pyarrow ships as a polars dependency, so this is free.
+    """
+    parts = iter_part_files(target)
+    if not parts:
+        return 0
+    import pyarrow.parquet as pq
+    total = 0
+    for p in parts:
+        total += int(pq.ParquetFile(str(p)).metadata.num_rows)
+    return total
+__all__ = [
+    "PART_GLOB",
+    "count_rows",
+    "is_sharded_dir",
+    "iter_part_files",
+    "read_all",
+    "write_part",
+]

claude_sql/retry_queue.py ADDED Viewed

@@ -0,0 +1,180 @@
+"""Durable retry queue backed by the persistent claude-sql DuckDB file.
+When a Bedrock call fails in a way that's worth retrying (parse failure,
+throttle that outlived tenacity's stop-after-attempt, transient model
+error), the unit of work gets enqueued here. A later run drains the
+queue before starting fresh work, so a mid-run crash never costs us the
+rows we already paid for.
+One row per ``(pipeline, unit_id)``. ``unit_id`` is ``session_id`` for
+``classify`` / ``conflicts`` and the message ``uuid`` for ``trajectory``.
+Semantics are "upsert with attempt counter":
+- First failure  → insert with attempts=1, next_attempt_at = now + 2 min.
+- Retry failure  → update attempts += 1, next_attempt_at = now + 2^attempts min (cap 60).
+- Retry success  → ``completed_at`` stamped; row stays as audit trail.
+Lives in the same ``~/.claude/claude_sql.duckdb`` as the checkpoint
+table so a single file holds all durable worker state.
+"""
+from __future__ import annotations
+import time
+from collections.abc import Iterable
+from datetime import UTC, datetime, timedelta
+from pathlib import Path
+import duckdb
+from claude_sql.checkpointer import PIPELINE_NAMES
+MAX_ATTEMPTS_DEFAULT: int = 5
+_BACKOFF_CAP_MIN: int = 60
+_CREATE_TABLE_SQL = """
+CREATE TABLE IF NOT EXISTS retry_queue (
+    pipeline        VARCHAR   NOT NULL,
+    unit_id         VARCHAR   NOT NULL,
+    error           VARCHAR   NOT NULL,
+    attempts        INTEGER   NOT NULL DEFAULT 0,
+    next_attempt_at TIMESTAMP NOT NULL,
+    created_at      TIMESTAMP NOT NULL,
+    completed_at    TIMESTAMP,
+    PRIMARY KEY (pipeline, unit_id)
+);
+"""
+def _connect(path: Path, *, max_attempts: int = 20) -> duckdb.DuckDBPyConnection:
+    """Open the queue DB, retrying on lock contention.
+    The same DB file backs both ``session_checkpoint`` and ``retry_queue``;
+    three pipelines running in parallel will occasionally collide on the
+    file lock. Retry with exponential backoff so concurrent callers
+    serialize instead of crashing.
+    """
+    path.parent.mkdir(parents=True, exist_ok=True)
+    delay = 0.05
+    last_err: duckdb.IOException | None = None
+    for _ in range(max_attempts):
+        try:
+            con = duckdb.connect(str(path))
+            con.execute(_CREATE_TABLE_SQL)
+            return con
+        except duckdb.IOException as exc:
+            last_err = exc
+            time.sleep(delay)
+            delay = min(delay * 1.5, 1.6)
+    assert last_err is not None  # noqa: S101 — loop-postcondition invariant
+    raise last_err
+def _backoff_delta(attempts: int) -> timedelta:
+    """Exponential backoff in minutes: 2, 4, 8, 16, 32, capped at 60."""
+    minutes = min(2**attempts, _BACKOFF_CAP_MIN)
+    return timedelta(minutes=minutes)
+def enqueue(
+    db_path: Path,
+    *,
+    pipeline: str,
+    unit_id: str,
+    error: str,
+    now: datetime | None = None,
+) -> int:
+    """Record a failure.  Increments ``attempts`` on repeat calls.
+    Returns the resulting ``attempts`` value for logging.
+    """
+    if pipeline not in PIPELINE_NAMES:
+        raise ValueError(f"unknown pipeline: {pipeline!r}")
+    cur = (now or datetime.now(UTC)).replace(tzinfo=None)
+    con = _connect(db_path)
+    try:
+        row = con.execute(
+            "SELECT attempts FROM retry_queue WHERE pipeline = ? AND unit_id = ?",
+            [pipeline, unit_id],
+        ).fetchone()
+        prev = int(row[0]) if row else 0
+        attempts = prev + 1
+        next_at = cur + _backoff_delta(attempts)
+        con.execute(
+            "INSERT OR REPLACE INTO retry_queue "
+            "(pipeline, unit_id, error, attempts, next_attempt_at, created_at, completed_at) "
+            "VALUES (?, ?, ?, ?, ?, ?, NULL)",
+            [pipeline, unit_id, error[:2000], attempts, next_at, cur],
+        )
+    finally:
+        con.close()
+    return attempts
+def drain(
+    db_path: Path,
+    *,
+    pipeline: str,
+    now: datetime | None = None,
+    max_attempts: int = MAX_ATTEMPTS_DEFAULT,
+    limit: int | None = None,
+) -> list[str]:
+    """Return unit_ids eligible for retry (not completed, attempts<max, due now)."""
+    if not db_path.exists():
+        return []
+    cur = (now or datetime.now(UTC)).replace(tzinfo=None)
+    con = _connect(db_path)
+    sql = (
+        "SELECT unit_id FROM retry_queue "
+        "WHERE pipeline = ? AND completed_at IS NULL "
+        "  AND attempts < ? AND next_attempt_at <= ? "
+        "ORDER BY next_attempt_at"
+    )
+    params: list[object] = [pipeline, max_attempts, cur]
+    if limit is not None:
+        sql += " LIMIT ?"
+        params.append(int(limit))
+    try:
+        rows = con.execute(sql, params).fetchall()
+    finally:
+        con.close()
+    return [str(r[0]) for r in rows]
+def mark_done(
+    db_path: Path,
+    *,
+    pipeline: str,
+    unit_ids: Iterable[str],
+    now: datetime | None = None,
+) -> int:
+    """Mark the given unit_ids as completed. No-op if unknown."""
+    ids = list(unit_ids)
+    if not ids:
+        return 0
+    cur = (now or datetime.now(UTC)).replace(tzinfo=None)
+    con = _connect(db_path)
+    try:
+        con.executemany(
+            "UPDATE retry_queue SET completed_at = ? "
+            "WHERE pipeline = ? AND unit_id = ? AND completed_at IS NULL",
+            [(cur, pipeline, uid) for uid in ids],
+        )
+    finally:
+        con.close()
+    return len(ids)
+def pending_count(db_path: Path, *, pipeline: str) -> int:
+    """Count not-yet-completed rows for one pipeline."""
+    if not db_path.exists():
+        return 0
+    con = _connect(db_path)
+    try:
+        row = con.execute(
+            "SELECT count(*) FROM retry_queue WHERE pipeline = ? AND completed_at IS NULL",
+            [pipeline],
+        ).fetchone()
+    finally:
+        con.close()
+    return int(row[0]) if row else 0