claude-sql 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,95 @@
1
+ """Loguru configuration for claude-sql.
2
+
3
+ Single ``configure_logging()`` helper that removes the default loguru handler
4
+ and re-installs a stderr handler honoring ``--verbose`` / ``--quiet`` flags and
5
+ ``LOGURU_LEVEL``.
6
+
7
+ Also provides ``loguru_before_sleep`` so workers can pass tenacity a
8
+ loguru-native callback (instead of stdlib ``logging.getLogger`` +
9
+ ``before_sleep_log``). Keeps the package single-logger across the board.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import os
15
+ import sys
16
+ from collections.abc import Callable
17
+ from typing import TYPE_CHECKING
18
+
19
+ from loguru import logger
20
+
21
+ if TYPE_CHECKING:
22
+ from tenacity import RetryCallState
23
+
24
+ _FORMAT = "<green>{time:HH:mm:ss}</green> <level>{level:<7}</level> {extra} {message}"
25
+
26
+
27
+ def configure_logging(verbose: bool = False, quiet: bool = False) -> None: # noqa: FBT001, FBT002 — CLI flag pass-through
28
+ """Install the stderr loguru handler for claude-sql.
29
+
30
+ Parameters
31
+ ----------
32
+ verbose
33
+ If true, set level to ``DEBUG`` (takes precedence over ``quiet``).
34
+ quiet
35
+ If true, set level to ``ERROR``.
36
+ """
37
+ logger.remove()
38
+ if verbose:
39
+ level = "DEBUG"
40
+ elif quiet:
41
+ level = "ERROR"
42
+ else:
43
+ level = os.getenv("LOGURU_LEVEL", "INFO")
44
+ logger.add(
45
+ sys.stderr,
46
+ level=level,
47
+ format=_FORMAT,
48
+ backtrace=False,
49
+ diagnose=False,
50
+ )
51
+
52
+
53
+ def loguru_before_sleep(level: str = "WARNING") -> Callable[[RetryCallState], None]:
54
+ """Return a tenacity ``before_sleep`` callback that logs via loguru.
55
+
56
+ Replaces the historical ``before_sleep_log(stdlib_logger, level)`` shape
57
+ that pulled stdlib ``logging`` into otherwise loguru-only modules just
58
+ to satisfy tenacity's API. The format string mirrors tenacity's own
59
+ :func:`tenacity.before_sleep_log`: function name, sleep seconds, and
60
+ the exception or returned value that triggered the retry.
61
+
62
+ Parameters
63
+ ----------
64
+ level
65
+ Loguru level name (``"DEBUG" | "INFO" | "WARNING" | …``).
66
+
67
+ Returns
68
+ -------
69
+ Callable
70
+ A function with the tenacity ``before_sleep`` signature
71
+ ``(RetryCallState) -> None``.
72
+ """
73
+
74
+ def _before_sleep(retry_state: RetryCallState) -> None:
75
+ if retry_state.outcome is None or retry_state.next_action is None:
76
+ return
77
+ if retry_state.fn is None:
78
+ fn_name = "<unknown>"
79
+ else:
80
+ fn_name = getattr(retry_state.fn, "__qualname__", repr(retry_state.fn))
81
+ if retry_state.outcome.failed:
82
+ exc = retry_state.outcome.exception()
83
+ verb, value = "raised", f"{exc.__class__.__name__}: {exc}"
84
+ else:
85
+ verb, value = "returned", retry_state.outcome.result()
86
+ logger.log(
87
+ level,
88
+ "Retrying {} in {:.3g} seconds as it {} {}.",
89
+ fn_name,
90
+ retry_state.next_action.sleep,
91
+ verb,
92
+ value,
93
+ )
94
+
95
+ return _before_sleep
claude_sql/output.py ADDED
@@ -0,0 +1,248 @@
1
+ """Agent-friendly output formatting and error handling for claude-sql.
2
+
3
+ Every CLI subcommand emits results through :func:`emit_dataframe` (for tabular
4
+ output) or :func:`emit_json` (for structured non-tabular output). The caller
5
+ picks a :class:`OutputFormat`; when it is :data:`OutputFormat.AUTO` the
6
+ formatter picks ``TABLE`` on a TTY and ``JSON`` otherwise so pipes and agent
7
+ subprocesses get machine-readable output without a flag.
8
+
9
+ Errors from DuckDB get classified and mapped to stable exit codes so agents
10
+ can distinguish *parse failed* from *unknown view* from *runtime error*
11
+ without pattern-matching tracebacks. See :data:`EXIT_CODES`.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import sys
18
+ from dataclasses import dataclass
19
+ from enum import StrEnum
20
+ from typing import Any
21
+
22
+ import duckdb
23
+ import polars as pl
24
+
25
+
26
+ class OutputFormat(StrEnum):
27
+ """Supported output formats.
28
+
29
+ ``AUTO`` resolves to ``TABLE`` when stdout is a TTY and ``JSON`` otherwise.
30
+ Keeping it a string Enum lets cyclopts parse ``--format json`` without any
31
+ custom converter.
32
+ """
33
+
34
+ AUTO = "auto"
35
+ TABLE = "table"
36
+ JSON = "json"
37
+ NDJSON = "ndjson"
38
+ CSV = "csv"
39
+ MARKDOWN = "markdown"
40
+
41
+
42
+ # Exit codes that agents can rely on. Keep them stable -- wire protocols
43
+ # always rot fastest at the boundary.
44
+ EXIT_CODES: dict[str, int] = {
45
+ "ok": 0,
46
+ "no_embeddings": 2,
47
+ "invalid_input": 64, # malformed user-supplied flags (e.g. --glob)
48
+ "parse_error": 64, # malformed SQL
49
+ "catalog_error": 65, # unknown view/macro/column
50
+ "runtime_error": 70, # everything else from duckdb.Error
51
+ "duckdb_missing": 127, # system `duckdb` binary not on PATH
52
+ }
53
+
54
+
55
+ def resolve_format(fmt: OutputFormat | str) -> OutputFormat:
56
+ """Resolve ``AUTO`` against the current stdout. No-op for explicit formats."""
57
+ resolved = OutputFormat(fmt) if isinstance(fmt, str) else fmt
58
+ if resolved is not OutputFormat.AUTO:
59
+ return resolved
60
+ return OutputFormat.TABLE if sys.stdout.isatty() else OutputFormat.JSON
61
+
62
+
63
+ def emit_dataframe(
64
+ df: pl.DataFrame,
65
+ fmt: OutputFormat | str = OutputFormat.AUTO,
66
+ *,
67
+ table_rows: int = 100,
68
+ table_str_len: int = 120,
69
+ ) -> None:
70
+ """Write a polars DataFrame to stdout in the requested format.
71
+
72
+ Parameters
73
+ ----------
74
+ df
75
+ The frame to emit.
76
+ fmt
77
+ One of :class:`OutputFormat`. ``AUTO`` resolves per :func:`resolve_format`.
78
+ table_rows
79
+ Row cap for the pretty-printed table format only. JSON / NDJSON / CSV
80
+ always emit every row; rely on SQL ``LIMIT`` to cap upstream.
81
+ table_str_len
82
+ Column-cell string truncation for the table format only.
83
+ """
84
+ resolved = resolve_format(fmt)
85
+ if resolved is OutputFormat.TABLE:
86
+ with pl.Config(tbl_rows=table_rows, tbl_cols=20, fmt_str_lengths=table_str_len):
87
+ print(df)
88
+ return
89
+ if resolved is OutputFormat.JSON:
90
+ # ``write_json`` emits a JSON array of row objects -- the exact shape
91
+ # agents expect for tabular results.
92
+ sys.stdout.write(df.write_json())
93
+ sys.stdout.write("\n")
94
+ return
95
+ if resolved is OutputFormat.NDJSON:
96
+ df.write_ndjson(sys.stdout)
97
+ return
98
+ if resolved is OutputFormat.CSV:
99
+ df.write_csv(sys.stdout)
100
+ return
101
+ # unreachable if OutputFormat stays closed-set
102
+ raise ValueError(f"Unsupported format: {resolved}")
103
+
104
+
105
+ def emit_json(payload: Any, fmt: OutputFormat | str = OutputFormat.AUTO) -> None:
106
+ """Write a non-tabular payload as JSON (for schema, list-cache, errors).
107
+
108
+ The ``TABLE`` path is handled by the caller (schema has a custom human
109
+ layout); this helper is purely for machine-readable formats.
110
+ """
111
+ resolved = resolve_format(fmt)
112
+ # JSON / NDJSON / CSV all reduce to a JSON document for non-tabular data.
113
+ # CSV over a nested dict is meaningless, so fall through to JSON.
114
+ sys.stdout.write(json.dumps(payload, indent=2, default=str))
115
+ sys.stdout.write("\n")
116
+ del resolved
117
+
118
+
119
+ @dataclass(frozen=True, slots=True)
120
+ class ClassifiedError:
121
+ """The structured shape of a CLI error after classification."""
122
+
123
+ kind: str # "parse_error" | "catalog_error" | "runtime_error"
124
+ exit_code: int
125
+ message: str
126
+ hint: str | None = None
127
+
128
+ def to_payload(self) -> dict[str, Any]:
129
+ return {
130
+ "error": {
131
+ "kind": self.kind,
132
+ "message": self.message,
133
+ "hint": self.hint,
134
+ }
135
+ }
136
+
137
+
138
+ class InputValidationError(ValueError):
139
+ """Raised when a user-supplied flag (e.g. ``--glob``) is malformed.
140
+
141
+ Carries its own ``hint`` so ``run_or_die`` can surface the fix alongside
142
+ the failure. Maps to exit code 64 (``invalid_input``).
143
+ """
144
+
145
+ def __init__(self, message: str, *, hint: str | None = None) -> None:
146
+ super().__init__(message)
147
+ self.hint = hint
148
+
149
+
150
+ def validate_glob(pattern: str | None, *, flag: str = "--glob") -> None:
151
+ """Reject glob patterns DuckDB's ``read_json`` cannot accept.
152
+
153
+ DuckDB raises ``IO Error: Cannot use multiple '**' in one path`` when a
154
+ glob contains more than one recursive segment. We catch that up front so
155
+ the failure surfaces with a useful hint instead of a raw traceback.
156
+
157
+ Pass-through for ``None`` / empty strings: the caller will fall back to
158
+ its default glob.
159
+ """
160
+ if not pattern:
161
+ return
162
+ if pattern.count("**") > 1:
163
+ raise InputValidationError(
164
+ f"{flag} pattern {pattern!r} contains more than one '**' segment; "
165
+ "DuckDB's read_json rejects it.",
166
+ hint=(
167
+ "use at most one '**' recursive wildcard -- e.g. "
168
+ "'/home/you/.claude/projects/**/*.jsonl' or "
169
+ "'/home/you/.claude/projects/<project>/*.jsonl'"
170
+ ),
171
+ )
172
+
173
+
174
+ def classify_duckdb_error(exc: duckdb.Error) -> ClassifiedError:
175
+ """Classify a ``duckdb.Error`` into one of our stable kinds + exit codes.
176
+
177
+ DuckDB exposes :class:`duckdb.ParserException` and
178
+ :class:`duckdb.CatalogException` at import time. Everything else that
179
+ inherits from :class:`duckdb.Error` is treated as a runtime error.
180
+ """
181
+ message = str(exc)
182
+ if isinstance(exc, duckdb.ParserException):
183
+ return ClassifiedError(
184
+ kind="parse_error",
185
+ exit_code=EXIT_CODES["parse_error"],
186
+ message=message,
187
+ hint="check SQL syntax; try `claude-sql schema --format json` for view/macro names",
188
+ )
189
+ if isinstance(exc, duckdb.CatalogException):
190
+ return ClassifiedError(
191
+ kind="catalog_error",
192
+ exit_code=EXIT_CODES["catalog_error"],
193
+ message=message,
194
+ hint="unknown view or column; run `claude-sql schema --format json` for the catalog",
195
+ )
196
+ return ClassifiedError(
197
+ kind="runtime_error",
198
+ exit_code=EXIT_CODES["runtime_error"],
199
+ message=message,
200
+ hint=None,
201
+ )
202
+
203
+
204
+ def emit_error(err: ClassifiedError, fmt: OutputFormat | str = OutputFormat.AUTO) -> None:
205
+ """Write a classified error to stderr in the requested format.
206
+
207
+ Agents running with ``--format json`` get the structured payload; humans on
208
+ a TTY get a single readable line with the hint. Either way the process
209
+ exits with :attr:`ClassifiedError.exit_code`.
210
+ """
211
+ resolved = resolve_format(fmt)
212
+ if resolved is OutputFormat.TABLE:
213
+ prefix = f"[{err.kind}]"
214
+ sys.stderr.write(f"{prefix} {err.message}\n")
215
+ if err.hint:
216
+ sys.stderr.write(f"hint: {err.hint}\n")
217
+ else:
218
+ sys.stderr.write(json.dumps(err.to_payload(), default=str))
219
+ sys.stderr.write("\n")
220
+
221
+
222
+ def run_or_die(
223
+ fn: Any,
224
+ *args: Any,
225
+ fmt: OutputFormat | str = OutputFormat.AUTO,
226
+ **kwargs: Any,
227
+ ) -> Any:
228
+ """Invoke ``fn(*args, **kwargs)`` and translate DuckDB errors to exit codes.
229
+
230
+ Keeps every subcommand's body clean of try/except bookkeeping. On success
231
+ returns ``fn``'s result; on :class:`duckdb.Error` writes a classified error
232
+ and calls :func:`sys.exit` with the matching code.
233
+ """
234
+ try:
235
+ return fn(*args, **kwargs)
236
+ except InputValidationError as exc:
237
+ err = ClassifiedError(
238
+ kind="invalid_input",
239
+ exit_code=EXIT_CODES["invalid_input"],
240
+ message=str(exc),
241
+ hint=exc.hint,
242
+ )
243
+ emit_error(err, fmt)
244
+ sys.exit(err.exit_code)
245
+ except duckdb.Error as exc:
246
+ err = classify_duckdb_error(exc)
247
+ emit_error(err, fmt)
248
+ sys.exit(err.exit_code)
@@ -0,0 +1,172 @@
1
+ """Sharded parquet I/O helpers for the five worker-append caches.
2
+
3
+ Background
4
+ ----------
5
+ Workers ``embed_worker``, ``llm_worker`` (classify / trajectory / conflicts),
6
+ and ``friction_worker`` previously used a "read whole parquet → concat →
7
+ rewrite whole parquet" pattern on every chunk. At ~50 MB this is roughly
8
+ 1.5 s of pure IO per chunk × 100 chunks per backfill = ~150 s wasted on a
9
+ single full backfill. This module replaces that with a directory-of-parts
10
+ pattern: each chunk writes ``<dir>/part-<ts_ns>.parquet`` and readers glob
11
+ the directory.
12
+
13
+ Design (intentional deviation from the original plan)
14
+ -----------------------------------------------------
15
+ The plan called for renaming five ``Settings.*_parquet_path`` fields to
16
+ ``*_dir_path``. That would cascade across ~30 call sites in the CLI, SQL
17
+ views, and tests for no semantic gain. Instead we keep the field names and
18
+ overload their meaning:
19
+
20
+ * If the path is a *directory* (or doesn't yet exist on disk), it's a sharded
21
+ cache — :func:`write_part` drops a fresh ``part-<ts_ns>.parquet`` into it,
22
+ and :func:`read_all` / :func:`iter_part_files` glob the directory.
23
+ * If the path is a *file*, the legacy single-file behavior kicks in —
24
+ :func:`write_part` does the read-then-rewrite, :func:`read_all` reads the
25
+ one file, etc.
26
+
27
+ New installs get directories (the field default factories in ``config.py``
28
+ were updated). Existing single-file caches keep working until migrated; see
29
+ ``claude-sql cache migrate``.
30
+
31
+ Public API
32
+ ----------
33
+ * :func:`is_sharded_dir` — does this path point at a sharded directory?
34
+ * :func:`write_part` — append by writing a fresh part (or legacy rewrite).
35
+ * :func:`read_all` — load the union of all parts (or the legacy single file).
36
+ * :func:`iter_part_files` — sorted list of part files (or ``[target]``).
37
+ * :func:`count_rows` — sum of row counts across parts (or single file).
38
+ """
39
+
40
+ from __future__ import annotations
41
+
42
+ import time
43
+ from pathlib import Path
44
+ from typing import Any
45
+
46
+ import polars as pl
47
+
48
+ #: Glob pattern for shard part files within a sharded cache directory.
49
+ PART_GLOB: str = "part-*.parquet"
50
+
51
+
52
+ def is_sharded_dir(path: Path) -> bool:
53
+ """Return True iff ``path`` is (or should be) treated as a sharded cache directory.
54
+
55
+ Two cases qualify:
56
+
57
+ 1. ``path`` exists and is a directory.
58
+ 2. ``path`` does not exist yet — new caches default to directory layout.
59
+
60
+ A path that points at an existing *file* is the legacy single-file shape.
61
+ """
62
+ if path.exists():
63
+ return path.is_dir()
64
+ # Heuristic: if the path has a parquet suffix, treat it as legacy single
65
+ # file even when missing (so brand-new tests using ``tmp_path/"x.parquet"``
66
+ # still take the legacy code path). Otherwise assume sharded directory.
67
+ return path.suffix != ".parquet"
68
+
69
+
70
+ def iter_part_files(target: Path) -> list[Path]:
71
+ """Return a sorted list of parquet files backing ``target``.
72
+
73
+ For a sharded directory: every ``part-*.parquet`` under it, sorted by
74
+ name (which is timestamp-keyed so the order is also chronological).
75
+
76
+ For a legacy single-file path: ``[target]`` if it exists, else ``[]``.
77
+ """
78
+ if not target.exists():
79
+ return []
80
+ if target.is_dir():
81
+ return sorted(target.glob(PART_GLOB))
82
+ return [target]
83
+
84
+
85
+ def write_part(target: Path, df: pl.DataFrame) -> Path:
86
+ """Write ``df`` as a new shard (or rewrite the legacy single file).
87
+
88
+ Sharded directory branch:
89
+ Ensure ``target`` exists as a directory and drop a brand-new
90
+ ``part-<ns>.parquet`` into it. No read-then-rewrite — append cost is
91
+ proportional to ``len(df)`` only.
92
+
93
+ Legacy single-file branch:
94
+ Load the existing parquet (if non-empty), concat ``df`` onto the
95
+ tail, and rewrite the whole file. Preserves the historical behavior
96
+ for users who haven't migrated yet.
97
+
98
+ Parameters
99
+ ----------
100
+ target
101
+ Either a sharded cache directory or a legacy ``*.parquet`` file path.
102
+ df
103
+ The polars DataFrame to persist.
104
+
105
+ Returns
106
+ -------
107
+ Path
108
+ The path that was actually written (a part file in the sharded case,
109
+ or ``target`` itself in the legacy case).
110
+ """
111
+ if is_sharded_dir(target):
112
+ target.mkdir(parents=True, exist_ok=True)
113
+ # Nanosecond-resolution timestamps are sortable, monotonic on Linux,
114
+ # and avoid filename collisions even when two part-writes land in the
115
+ # same millisecond (chunk_size=256 and concurrency=8 can race).
116
+ part_path = target / f"part-{time.time_ns()}.parquet"
117
+ df.write_parquet(part_path)
118
+ return part_path
119
+
120
+ # Legacy single-file branch.
121
+ target.parent.mkdir(parents=True, exist_ok=True)
122
+ if target.exists() and target.stat().st_size > 16:
123
+ existing = pl.read_parquet(target)
124
+ df = pl.concat([existing, df], how="diagonal_relaxed")
125
+ df.write_parquet(target)
126
+ return target
127
+
128
+
129
+ def read_all(target: Path, *, dtypes: dict[str, Any] | None = None) -> pl.DataFrame | None:
130
+ """Return the union of all part files (or the legacy single file).
131
+
132
+ Returns ``None`` when the cache is empty or missing. ``dtypes`` is
133
+ accepted for forward-compatibility but currently unused — the caches we
134
+ own all carry self-describing schemas, so an explicit dtype map only
135
+ matters once we hit a parquet whose schema has drifted.
136
+ """
137
+ del dtypes # reserved for future schema-pinning; not needed today
138
+ parts = iter_part_files(target)
139
+ if not parts:
140
+ return None
141
+ # ``pl.read_parquet`` accepts a list of paths and concatenates with
142
+ # ``how='vertical_relaxed'`` semantics, which matches the historical
143
+ # ``pl.concat(..., how='diagonal_relaxed')`` used by the workers.
144
+ return pl.read_parquet([str(p) for p in parts])
145
+
146
+
147
+ def count_rows(target: Path) -> int:
148
+ """Return the total row count across every part file under ``target``.
149
+
150
+ Uses ``pyarrow.parquet.ParquetFile.metadata`` so we read parquet footers
151
+ only — no row-group materialization. Returns 0 when the cache is empty
152
+ or missing. Pyarrow ships as a polars dependency, so this is free.
153
+ """
154
+ parts = iter_part_files(target)
155
+ if not parts:
156
+ return 0
157
+ import pyarrow.parquet as pq
158
+
159
+ total = 0
160
+ for p in parts:
161
+ total += int(pq.ParquetFile(str(p)).metadata.num_rows)
162
+ return total
163
+
164
+
165
+ __all__ = [
166
+ "PART_GLOB",
167
+ "count_rows",
168
+ "is_sharded_dir",
169
+ "iter_part_files",
170
+ "read_all",
171
+ "write_part",
172
+ ]
@@ -0,0 +1,180 @@
1
+ """Durable retry queue backed by the persistent claude-sql DuckDB file.
2
+
3
+ When a Bedrock call fails in a way that's worth retrying (parse failure,
4
+ throttle that outlived tenacity's stop-after-attempt, transient model
5
+ error), the unit of work gets enqueued here. A later run drains the
6
+ queue before starting fresh work, so a mid-run crash never costs us the
7
+ rows we already paid for.
8
+
9
+ One row per ``(pipeline, unit_id)``. ``unit_id`` is ``session_id`` for
10
+ ``classify`` / ``conflicts`` and the message ``uuid`` for ``trajectory``.
11
+ Semantics are "upsert with attempt counter":
12
+
13
+ - First failure → insert with attempts=1, next_attempt_at = now + 2 min.
14
+ - Retry failure → update attempts += 1, next_attempt_at = now + 2^attempts min (cap 60).
15
+ - Retry success → ``completed_at`` stamped; row stays as audit trail.
16
+
17
+ Lives in the same ``~/.claude/claude_sql.duckdb`` as the checkpoint
18
+ table so a single file holds all durable worker state.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import time
24
+ from collections.abc import Iterable
25
+ from datetime import UTC, datetime, timedelta
26
+ from pathlib import Path
27
+
28
+ import duckdb
29
+
30
+ from claude_sql.checkpointer import PIPELINE_NAMES
31
+
32
+ MAX_ATTEMPTS_DEFAULT: int = 5
33
+ _BACKOFF_CAP_MIN: int = 60
34
+
35
+ _CREATE_TABLE_SQL = """
36
+ CREATE TABLE IF NOT EXISTS retry_queue (
37
+ pipeline VARCHAR NOT NULL,
38
+ unit_id VARCHAR NOT NULL,
39
+ error VARCHAR NOT NULL,
40
+ attempts INTEGER NOT NULL DEFAULT 0,
41
+ next_attempt_at TIMESTAMP NOT NULL,
42
+ created_at TIMESTAMP NOT NULL,
43
+ completed_at TIMESTAMP,
44
+ PRIMARY KEY (pipeline, unit_id)
45
+ );
46
+ """
47
+
48
+
49
+ def _connect(path: Path, *, max_attempts: int = 20) -> duckdb.DuckDBPyConnection:
50
+ """Open the queue DB, retrying on lock contention.
51
+
52
+ The same DB file backs both ``session_checkpoint`` and ``retry_queue``;
53
+ three pipelines running in parallel will occasionally collide on the
54
+ file lock. Retry with exponential backoff so concurrent callers
55
+ serialize instead of crashing.
56
+ """
57
+ path.parent.mkdir(parents=True, exist_ok=True)
58
+ delay = 0.05
59
+ last_err: duckdb.IOException | None = None
60
+ for _ in range(max_attempts):
61
+ try:
62
+ con = duckdb.connect(str(path))
63
+ con.execute(_CREATE_TABLE_SQL)
64
+ return con
65
+ except duckdb.IOException as exc:
66
+ last_err = exc
67
+ time.sleep(delay)
68
+ delay = min(delay * 1.5, 1.6)
69
+ assert last_err is not None # noqa: S101 — loop-postcondition invariant
70
+ raise last_err
71
+
72
+
73
+ def _backoff_delta(attempts: int) -> timedelta:
74
+ """Exponential backoff in minutes: 2, 4, 8, 16, 32, capped at 60."""
75
+ minutes = min(2**attempts, _BACKOFF_CAP_MIN)
76
+ return timedelta(minutes=minutes)
77
+
78
+
79
+ def enqueue(
80
+ db_path: Path,
81
+ *,
82
+ pipeline: str,
83
+ unit_id: str,
84
+ error: str,
85
+ now: datetime | None = None,
86
+ ) -> int:
87
+ """Record a failure. Increments ``attempts`` on repeat calls.
88
+
89
+ Returns the resulting ``attempts`` value for logging.
90
+ """
91
+ if pipeline not in PIPELINE_NAMES:
92
+ raise ValueError(f"unknown pipeline: {pipeline!r}")
93
+ cur = (now or datetime.now(UTC)).replace(tzinfo=None)
94
+ con = _connect(db_path)
95
+ try:
96
+ row = con.execute(
97
+ "SELECT attempts FROM retry_queue WHERE pipeline = ? AND unit_id = ?",
98
+ [pipeline, unit_id],
99
+ ).fetchone()
100
+ prev = int(row[0]) if row else 0
101
+ attempts = prev + 1
102
+ next_at = cur + _backoff_delta(attempts)
103
+ con.execute(
104
+ "INSERT OR REPLACE INTO retry_queue "
105
+ "(pipeline, unit_id, error, attempts, next_attempt_at, created_at, completed_at) "
106
+ "VALUES (?, ?, ?, ?, ?, ?, NULL)",
107
+ [pipeline, unit_id, error[:2000], attempts, next_at, cur],
108
+ )
109
+ finally:
110
+ con.close()
111
+ return attempts
112
+
113
+
114
+ def drain(
115
+ db_path: Path,
116
+ *,
117
+ pipeline: str,
118
+ now: datetime | None = None,
119
+ max_attempts: int = MAX_ATTEMPTS_DEFAULT,
120
+ limit: int | None = None,
121
+ ) -> list[str]:
122
+ """Return unit_ids eligible for retry (not completed, attempts<max, due now)."""
123
+ if not db_path.exists():
124
+ return []
125
+ cur = (now or datetime.now(UTC)).replace(tzinfo=None)
126
+ con = _connect(db_path)
127
+ sql = (
128
+ "SELECT unit_id FROM retry_queue "
129
+ "WHERE pipeline = ? AND completed_at IS NULL "
130
+ " AND attempts < ? AND next_attempt_at <= ? "
131
+ "ORDER BY next_attempt_at"
132
+ )
133
+ params: list[object] = [pipeline, max_attempts, cur]
134
+ if limit is not None:
135
+ sql += " LIMIT ?"
136
+ params.append(int(limit))
137
+ try:
138
+ rows = con.execute(sql, params).fetchall()
139
+ finally:
140
+ con.close()
141
+ return [str(r[0]) for r in rows]
142
+
143
+
144
+ def mark_done(
145
+ db_path: Path,
146
+ *,
147
+ pipeline: str,
148
+ unit_ids: Iterable[str],
149
+ now: datetime | None = None,
150
+ ) -> int:
151
+ """Mark the given unit_ids as completed. No-op if unknown."""
152
+ ids = list(unit_ids)
153
+ if not ids:
154
+ return 0
155
+ cur = (now or datetime.now(UTC)).replace(tzinfo=None)
156
+ con = _connect(db_path)
157
+ try:
158
+ con.executemany(
159
+ "UPDATE retry_queue SET completed_at = ? "
160
+ "WHERE pipeline = ? AND unit_id = ? AND completed_at IS NULL",
161
+ [(cur, pipeline, uid) for uid in ids],
162
+ )
163
+ finally:
164
+ con.close()
165
+ return len(ids)
166
+
167
+
168
+ def pending_count(db_path: Path, *, pipeline: str) -> int:
169
+ """Count not-yet-completed rows for one pipeline."""
170
+ if not db_path.exists():
171
+ return 0
172
+ con = _connect(db_path)
173
+ try:
174
+ row = con.execute(
175
+ "SELECT count(*) FROM retry_queue WHERE pipeline = ? AND completed_at IS NULL",
176
+ [pipeline],
177
+ ).fetchone()
178
+ finally:
179
+ con.close()
180
+ return int(row[0]) if row else 0