claude-sql 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_sql/__init__.py +5 -0
- claude_sql/binding.py +740 -0
- claude_sql/blind_handover.py +155 -0
- claude_sql/checkpointer.py +202 -0
- claude_sql/cli.py +2344 -0
- claude_sql/cluster_worker.py +208 -0
- claude_sql/community_worker.py +306 -0
- claude_sql/config.py +380 -0
- claude_sql/embed_worker.py +482 -0
- claude_sql/freeze.py +189 -0
- claude_sql/friction_worker.py +561 -0
- claude_sql/install_source.py +77 -0
- claude_sql/judge_worker.py +459 -0
- claude_sql/judges.py +239 -0
- claude_sql/kappa_worker.py +257 -0
- claude_sql/llm_worker.py +1760 -0
- claude_sql/logging_setup.py +95 -0
- claude_sql/output.py +248 -0
- claude_sql/parquet_shards.py +172 -0
- claude_sql/retry_queue.py +180 -0
- claude_sql/review_sheet_render.py +167 -0
- claude_sql/review_sheet_worker.py +463 -0
- claude_sql/schemas.py +454 -0
- claude_sql/session_text.py +387 -0
- claude_sql/skills_catalog.py +354 -0
- claude_sql/sql_views.py +1751 -0
- claude_sql/terms_worker.py +145 -0
- claude_sql/ungrounded_worker.py +190 -0
- claude_sql-0.4.0.dist-info/METADATA +530 -0
- claude_sql-0.4.0.dist-info/RECORD +32 -0
- claude_sql-0.4.0.dist-info/WHEEL +4 -0
- claude_sql-0.4.0.dist-info/entry_points.txt +3 -0
claude_sql/cli.py
ADDED
|
@@ -0,0 +1,2344 @@
|
|
|
1
|
+
"""Cyclopts CLI entry point for ``claude-sql``.
|
|
2
|
+
|
|
3
|
+
Wires the ``claude-sql`` console script to its thirteen subcommands. Shared
|
|
4
|
+
flags -- ``--verbose`` / ``--quiet``, ``--glob``, ``--subagent-glob``,
|
|
5
|
+
``--format`` -- live on a flattened :class:`Common` dataclass so callers write
|
|
6
|
+
``claude-sql query ... --format json`` instead of ``--common.format json``.
|
|
7
|
+
|
|
8
|
+
Agent-friendly defaults
|
|
9
|
+
-----------------------
|
|
10
|
+
* ``--format auto`` emits a human table on a TTY and machine-readable JSON
|
|
11
|
+
when stdout is a pipe, so agents do not have to set a flag.
|
|
12
|
+
* DuckDB errors are classified into parse / catalog / runtime and mapped to
|
|
13
|
+
stable exit codes (64 / 65 / 70) with a JSON error payload on non-TTY.
|
|
14
|
+
* ``--quiet`` is honored by every subcommand; view registration goes to DEBUG
|
|
15
|
+
so the default stderr stays quiet for routine reads.
|
|
16
|
+
|
|
17
|
+
``asyncio`` and subprocess imports are performed lazily inside the relevant
|
|
18
|
+
commands so that the fast path (``schema``, ``query``, ``explain``) does not
|
|
19
|
+
drag extra modules into startup.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import json
|
|
25
|
+
import os
|
|
26
|
+
import re
|
|
27
|
+
import subprocess
|
|
28
|
+
import sys
|
|
29
|
+
import tempfile
|
|
30
|
+
import time
|
|
31
|
+
from dataclasses import dataclass
|
|
32
|
+
from datetime import UTC, datetime
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
from typing import Annotated
|
|
35
|
+
|
|
36
|
+
import duckdb
|
|
37
|
+
import polars as pl
|
|
38
|
+
from cyclopts import App, Parameter
|
|
39
|
+
from loguru import logger
|
|
40
|
+
|
|
41
|
+
from claude_sql import (
|
|
42
|
+
binding as _binding,
|
|
43
|
+
blind_handover as _blind_handover,
|
|
44
|
+
checkpointer,
|
|
45
|
+
freeze as _freeze,
|
|
46
|
+
judge_worker as _judge_worker,
|
|
47
|
+
judges as _judge_catalog,
|
|
48
|
+
kappa_worker as _kappa_worker,
|
|
49
|
+
skills_catalog as _skills_catalog,
|
|
50
|
+
ungrounded_worker as _ungrounded_worker,
|
|
51
|
+
)
|
|
52
|
+
from claude_sql.cluster_worker import run_clustering
|
|
53
|
+
from claude_sql.community_worker import run_communities
|
|
54
|
+
from claude_sql.config import Settings
|
|
55
|
+
from claude_sql.embed_worker import embed_query, run_backfill
|
|
56
|
+
from claude_sql.friction_worker import detect_user_friction
|
|
57
|
+
from claude_sql.install_source import format_version
|
|
58
|
+
from claude_sql.llm_worker import classify_sessions, detect_conflicts, trajectory_messages
|
|
59
|
+
from claude_sql.logging_setup import configure_logging
|
|
60
|
+
from claude_sql.output import (
|
|
61
|
+
EXIT_CODES,
|
|
62
|
+
ClassifiedError,
|
|
63
|
+
InputValidationError,
|
|
64
|
+
OutputFormat,
|
|
65
|
+
emit_dataframe,
|
|
66
|
+
emit_error,
|
|
67
|
+
emit_json,
|
|
68
|
+
resolve_format,
|
|
69
|
+
run_or_die,
|
|
70
|
+
validate_glob,
|
|
71
|
+
)
|
|
72
|
+
from claude_sql.parquet_shards import (
|
|
73
|
+
count_rows,
|
|
74
|
+
is_sharded_dir,
|
|
75
|
+
iter_part_files,
|
|
76
|
+
)
|
|
77
|
+
from claude_sql.review_sheet_render import render_markdown, render_refusal_markdown
|
|
78
|
+
from claude_sql.review_sheet_worker import generate_review_sheet
|
|
79
|
+
from claude_sql.sql_views import (
|
|
80
|
+
describe_all,
|
|
81
|
+
list_macros,
|
|
82
|
+
register_all,
|
|
83
|
+
register_raw,
|
|
84
|
+
register_views,
|
|
85
|
+
)
|
|
86
|
+
from claude_sql.terms_worker import run_terms
|
|
87
|
+
|
|
88
|
+
_APP_HELP = """\
|
|
89
|
+
Zero-copy SQL + Cohere Embed v4 semantic search + Sonnet 4.6 analytics over
|
|
90
|
+
~/.claude/ JSONL transcripts (and their subagent sidecars).
|
|
91
|
+
|
|
92
|
+
Surfaces at a glance
|
|
93
|
+
--------------------
|
|
94
|
+
schema / list-cache / explain introspection (read-only, zero cost)
|
|
95
|
+
query / shell run SQL against 18 views + 14 macros
|
|
96
|
+
embed / search Cohere Embed v4 + HNSW cosine search
|
|
97
|
+
classify / trajectory / Sonnet 4.6 analytics -- each defaults to
|
|
98
|
+
conflicts / friction --dry-run; pass --no-dry-run to spend
|
|
99
|
+
cluster / terms / community UMAP+HDBSCAN, c-TF-IDF, Louvain
|
|
100
|
+
analyze composite pipeline over every stage above
|
|
101
|
+
|
|
102
|
+
Flag placement (important for agents)
|
|
103
|
+
-------------------------------------
|
|
104
|
+
All flags attach to a SUBCOMMAND, not the top-level binary. Correct:
|
|
105
|
+
claude-sql query --format json "SELECT 1"
|
|
106
|
+
claude-sql classify --no-dry-run --limit 5
|
|
107
|
+
Incorrect (flag gets swallowed as the subcommand argument):
|
|
108
|
+
claude-sql --format json query "SELECT 1"
|
|
109
|
+
|
|
110
|
+
Output & exit codes
|
|
111
|
+
-------------------
|
|
112
|
+
* --format {auto,table,json,ndjson,csv} on every subcommand. auto = table on
|
|
113
|
+
TTY / json on pipe, so `claude-sql <cmd> | jq` works without a flag.
|
|
114
|
+
* 0 success
|
|
115
|
+
* 2 missing embeddings parquet (run: claude-sql embed --since-days N --no-dry-run)
|
|
116
|
+
* 64 invalid input -- malformed --glob, unparseable SQL, bad flag
|
|
117
|
+
* 65 catalog error -- unknown view/column; run `claude-sql schema` for the catalog
|
|
118
|
+
* 70 runtime error -- everything else DuckDB raises (check --format json stderr)
|
|
119
|
+
* 127 system `duckdb` binary not on PATH (only affects `shell`)
|
|
120
|
+
|
|
121
|
+
Cost guard
|
|
122
|
+
----------
|
|
123
|
+
Every command that calls Bedrock (embed, classify, trajectory, conflicts,
|
|
124
|
+
friction, analyze) defaults to --dry-run. Dry-run emits a plan JSON to stdout
|
|
125
|
+
with candidate counts, estimated tokens, and dollar estimate -- agents can
|
|
126
|
+
parse that to decide whether to proceed. Real spend requires --no-dry-run.
|
|
127
|
+
|
|
128
|
+
Glob scoping (cheaper workers)
|
|
129
|
+
------------------------------
|
|
130
|
+
Narrow to one project with --glob to cut worker budget:
|
|
131
|
+
--glob "/home/you/.claude/projects/-efs-you-workplace-bonk/*.jsonl"
|
|
132
|
+
At most one '**' segment is allowed per pattern (DuckDB limitation) -- the
|
|
133
|
+
CLI rejects multi-star globs with a clear hint before DuckDB sees them.
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
app = App(
|
|
138
|
+
name="claude-sql",
|
|
139
|
+
version=format_version,
|
|
140
|
+
help=_APP_HELP,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@Parameter(name="*")
|
|
145
|
+
@dataclass
|
|
146
|
+
class Common:
|
|
147
|
+
"""Shared CLI flags flattened onto every subcommand.
|
|
148
|
+
|
|
149
|
+
``verbose`` and its paired ``--quiet`` negation both map to this single
|
|
150
|
+
bool (cyclopts uses the ``negative=`` argument to wire the "opposite"
|
|
151
|
+
flag onto the same field). ``quiet`` is the one extra concept the
|
|
152
|
+
dataclass needs to carry: it cannot piggyback on ``verbose`` because
|
|
153
|
+
the two states are not symmetric (verbose forces DEBUG, quiet forces
|
|
154
|
+
ERROR, and the default is INFO).
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
verbose: bool = False
|
|
158
|
+
quiet: bool = False
|
|
159
|
+
glob: str | None = None
|
|
160
|
+
subagent_glob: str | None = None
|
|
161
|
+
format: Annotated[OutputFormat, Parameter(name="--format")] = OutputFormat.AUTO
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _configure(common: Common | None) -> None:
|
|
165
|
+
"""Install logging based on the shared flags."""
|
|
166
|
+
configure_logging(
|
|
167
|
+
verbose=common.verbose if common else False,
|
|
168
|
+
quiet=common.quiet if common else False,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _fmt(common: Common | None) -> OutputFormat:
|
|
173
|
+
"""Resolve the effective output format for a subcommand."""
|
|
174
|
+
return common.format if common else OutputFormat.AUTO
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# ---------------------------------------------------------------------------
|
|
178
|
+
# Helpers
|
|
179
|
+
# ---------------------------------------------------------------------------
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _resolve_settings(common: Common | None) -> Settings:
|
|
183
|
+
"""Build :class:`Settings` from env then apply CLI overrides.
|
|
184
|
+
|
|
185
|
+
Validates ``--glob`` / ``--subagent-glob`` up front so DuckDB never sees
|
|
186
|
+
a pattern it cannot consume (e.g. ``**/.../**``). On failure emits a
|
|
187
|
+
classified error and exits with code 64 so every subcommand gets the
|
|
188
|
+
same treatment without wrapping each call site.
|
|
189
|
+
"""
|
|
190
|
+
settings = Settings()
|
|
191
|
+
if common is None:
|
|
192
|
+
return settings
|
|
193
|
+
try:
|
|
194
|
+
validate_glob(common.glob, flag="--glob")
|
|
195
|
+
validate_glob(common.subagent_glob, flag="--subagent-glob")
|
|
196
|
+
except InputValidationError as exc:
|
|
197
|
+
err = ClassifiedError(
|
|
198
|
+
kind="invalid_input",
|
|
199
|
+
exit_code=EXIT_CODES["invalid_input"],
|
|
200
|
+
message=str(exc),
|
|
201
|
+
hint=exc.hint,
|
|
202
|
+
)
|
|
203
|
+
emit_error(err, _fmt(common))
|
|
204
|
+
sys.exit(err.exit_code)
|
|
205
|
+
updates: dict[str, str] = {}
|
|
206
|
+
if common.glob is not None:
|
|
207
|
+
updates["default_glob"] = common.glob
|
|
208
|
+
if common.subagent_glob is not None:
|
|
209
|
+
updates["subagent_glob"] = common.subagent_glob
|
|
210
|
+
if not updates:
|
|
211
|
+
return settings
|
|
212
|
+
return settings.model_copy(update=updates)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
_PERCENT_LIMIT_RE = re.compile(r"^\s*(\d+(?:\.\d+)?)\s*%\s*$")
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _resolve_memory_limit(limit: str) -> str:
|
|
219
|
+
"""Translate ``"<n>%"`` into an absolute size DuckDB accepts.
|
|
220
|
+
|
|
221
|
+
DuckDB's ``memory_limit`` parser only knows ``KB / MB / GB / TB`` and the
|
|
222
|
+
binary variants. Percentage strings are rejected, so we resolve them
|
|
223
|
+
against the host's reported total memory before the PRAGMA fires. Any
|
|
224
|
+
other form passes through unchanged so the env var can still pin an
|
|
225
|
+
absolute size like ``"4GB"`` directly.
|
|
226
|
+
"""
|
|
227
|
+
match = _PERCENT_LIMIT_RE.match(limit)
|
|
228
|
+
if match is None:
|
|
229
|
+
return limit.strip()
|
|
230
|
+
fraction = float(match.group(1)) / 100.0
|
|
231
|
+
try:
|
|
232
|
+
page_size = os.sysconf("SC_PAGE_SIZE")
|
|
233
|
+
phys_pages = os.sysconf("SC_PHYS_PAGES")
|
|
234
|
+
except (AttributeError, ValueError, OSError):
|
|
235
|
+
# Non-POSIX or restricted host — fall back to a conservative 4 GiB.
|
|
236
|
+
total_bytes = 4 * 1024**3
|
|
237
|
+
else:
|
|
238
|
+
total_bytes = page_size * phys_pages
|
|
239
|
+
target_mib = max(1, int((total_bytes * fraction) // (1024 * 1024)))
|
|
240
|
+
return f"{target_mib}MiB"
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _open_connection(settings: Settings) -> duckdb.DuckDBPyConnection:
|
|
244
|
+
"""Open an in-memory DuckDB connection with every claude-sql object wired.
|
|
245
|
+
|
|
246
|
+
Tuning PRAGMAs are set before view registration so the registration
|
|
247
|
+
queries themselves benefit from the higher thread count and the spill
|
|
248
|
+
directory pointed at real disk (Amazon devboxes ship ``/tmp`` as a
|
|
249
|
+
4 GB tmpfs that thrashes once a clustering run starts spilling).
|
|
250
|
+
"""
|
|
251
|
+
con = duckdb.connect(":memory:")
|
|
252
|
+
settings.duckdb_temp_dir.mkdir(parents=True, exist_ok=True)
|
|
253
|
+
memory_limit = _resolve_memory_limit(settings.duckdb_memory_limit)
|
|
254
|
+
con.execute(f"SET threads = {int(settings.duckdb_threads)}")
|
|
255
|
+
con.execute(f"SET memory_limit = '{memory_limit}'")
|
|
256
|
+
con.execute(f"SET temp_directory = '{settings.duckdb_temp_dir}'")
|
|
257
|
+
con.execute("SET enable_object_cache = true")
|
|
258
|
+
con.execute("SET preserve_insertion_order = false")
|
|
259
|
+
register_all(con, settings=settings)
|
|
260
|
+
return con
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _emit_worker_result(result: int | dict, common: Common | None, pipeline: str) -> None:
|
|
264
|
+
"""Normalize worker results for stdout.
|
|
265
|
+
|
|
266
|
+
Workers return either an ``int`` (rows processed) or a plan ``dict`` when
|
|
267
|
+
``--dry-run`` is set. Agents parse stdout JSON, so we always emit something
|
|
268
|
+
machine-readable: the plan dict under dry-run, or a compact summary dict
|
|
269
|
+
when real work runs.
|
|
270
|
+
"""
|
|
271
|
+
fmt = _fmt(common)
|
|
272
|
+
if isinstance(result, dict):
|
|
273
|
+
emit_json(result, fmt)
|
|
274
|
+
else:
|
|
275
|
+
emit_json({"pipeline": pipeline, "rows_processed": int(result), "dry_run": False}, fmt)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
# EXPLAIN plan markers that indicate pushdown or noteworthy physical ops.
|
|
279
|
+
_EXPLAIN_MARKERS: tuple[str, ...] = (
|
|
280
|
+
"READ_JSON",
|
|
281
|
+
"Filters:",
|
|
282
|
+
"Projection",
|
|
283
|
+
"Filter",
|
|
284
|
+
"HASH_JOIN",
|
|
285
|
+
"HNSW_INDEX_SCAN",
|
|
286
|
+
"HASH_GROUP_BY",
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def _describe_checkpoint_entry(path: Path) -> dict[str, object]:
|
|
291
|
+
"""Report the persistent DuckDB checkpoint file alongside the parquet caches.
|
|
292
|
+
|
|
293
|
+
Keeps the same ``{name, path, exists[, bytes, mtime, rows]}`` shape as
|
|
294
|
+
:func:`_describe_cache_entry` so ``list-cache`` stays homogeneous. Row
|
|
295
|
+
count is queried via :func:`checkpointer.count_rows`.
|
|
296
|
+
"""
|
|
297
|
+
exists = path.exists() and path.is_file()
|
|
298
|
+
entry: dict[str, object] = {"name": "session_checkpoint", "path": str(path), "exists": exists}
|
|
299
|
+
if not exists:
|
|
300
|
+
return entry
|
|
301
|
+
stat = path.stat()
|
|
302
|
+
entry["bytes"] = stat.st_size
|
|
303
|
+
entry["mtime"] = datetime.fromtimestamp(stat.st_mtime, tz=UTC).isoformat()
|
|
304
|
+
try:
|
|
305
|
+
entry["rows"] = checkpointer.count_rows(path)
|
|
306
|
+
except duckdb.Error:
|
|
307
|
+
entry["rows"] = None
|
|
308
|
+
return entry
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _describe_cache_entry(name: str, path: Path) -> dict[str, object]:
|
|
312
|
+
"""Collect filesystem metadata about one parquet cache entry.
|
|
313
|
+
|
|
314
|
+
Handles both legacy single-file caches and the sharded directory layout
|
|
315
|
+
(``<dir>/part-*.parquet``). For a sharded directory, ``bytes`` is the
|
|
316
|
+
sum across parts, ``mtime`` is the latest part's modification time,
|
|
317
|
+
and ``rows`` is the union row count.
|
|
318
|
+
|
|
319
|
+
Row counts are read via :func:`count_rows` (footer-only ``scan_parquet``)
|
|
320
|
+
so the call is cheap even on very large caches. A zero-byte / corrupt
|
|
321
|
+
part surfaces ``rows=None`` rather than aborting the whole listing.
|
|
322
|
+
"""
|
|
323
|
+
parts = iter_part_files(path)
|
|
324
|
+
exists = bool(parts) or path.exists()
|
|
325
|
+
entry: dict[str, object] = {"name": name, "path": str(path), "exists": exists}
|
|
326
|
+
if not exists:
|
|
327
|
+
return entry
|
|
328
|
+
if not parts:
|
|
329
|
+
# Path exists (e.g. an empty directory) but has no part files; surface
|
|
330
|
+
# the bare directory mtime so users can still see "we made the dir".
|
|
331
|
+
stat = path.stat()
|
|
332
|
+
entry["bytes"] = 0
|
|
333
|
+
entry["mtime"] = datetime.fromtimestamp(stat.st_mtime, tz=UTC).isoformat()
|
|
334
|
+
entry["rows"] = 0
|
|
335
|
+
return entry
|
|
336
|
+
total_bytes = 0
|
|
337
|
+
latest_mtime = 0.0
|
|
338
|
+
healthy = True
|
|
339
|
+
for p in parts:
|
|
340
|
+
st = p.stat()
|
|
341
|
+
total_bytes += st.st_size
|
|
342
|
+
latest_mtime = max(latest_mtime, st.st_mtime)
|
|
343
|
+
if st.st_size <= 16:
|
|
344
|
+
healthy = False
|
|
345
|
+
entry["bytes"] = total_bytes
|
|
346
|
+
entry["mtime"] = datetime.fromtimestamp(latest_mtime, tz=UTC).isoformat()
|
|
347
|
+
if not healthy:
|
|
348
|
+
entry["rows"] = None
|
|
349
|
+
return entry
|
|
350
|
+
try:
|
|
351
|
+
entry["rows"] = count_rows(path)
|
|
352
|
+
except (OSError, ValueError):
|
|
353
|
+
# ``count_rows`` is a polars scan; an unreadable footer surfaces here.
|
|
354
|
+
entry["rows"] = None
|
|
355
|
+
return entry
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
# ---------------------------------------------------------------------------
|
|
359
|
+
# Subcommands
|
|
360
|
+
# ---------------------------------------------------------------------------
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
@app.command
|
|
364
|
+
def shell(*, common: Common | None = None) -> None:
|
|
365
|
+
"""Launch the interactive duckdb REPL with every view, macro, and the HNSW index pre-registered.
|
|
366
|
+
|
|
367
|
+
When to use
|
|
368
|
+
-----------
|
|
369
|
+
Interactive exploration -- iterating on SQL joins, inspecting macros,
|
|
370
|
+
feeling out the catalog. Agents should prefer ``query`` (single-shot)
|
|
371
|
+
or ``shell`` via a subprocess only when they truly need a session.
|
|
372
|
+
|
|
373
|
+
What it does
|
|
374
|
+
------------
|
|
375
|
+
1. Creates a temporary on-disk DuckDB file.
|
|
376
|
+
2. Runs ``register_all`` to materialize 18 views + 14 macros + VSS.
|
|
377
|
+
3. Execs the system ``duckdb`` binary against the file.
|
|
378
|
+
|
|
379
|
+
Exit codes
|
|
380
|
+
----------
|
|
381
|
+
* 127 ``duckdb`` binary not on PATH (install it with `uv tool install
|
|
382
|
+
duckdb` or your OS package manager).
|
|
383
|
+
|
|
384
|
+
Notes
|
|
385
|
+
-----
|
|
386
|
+
The temp DB path is printed on startup so you can reopen it later or
|
|
387
|
+
delete it. The path is NOT cleaned up automatically on exit -- that's
|
|
388
|
+
intentional so long-running sessions can be resumed.
|
|
389
|
+
"""
|
|
390
|
+
_configure(common)
|
|
391
|
+
settings = _resolve_settings(common)
|
|
392
|
+
|
|
393
|
+
# mkstemp returns a tuple of (fd, path); we want the path only — duckdb
|
|
394
|
+
# opens its own handle. Closing the fd immediately keeps the file but
|
|
395
|
+
# releases the descriptor (mkstemp is preferred over NamedTemporaryFile
|
|
396
|
+
# here because we never write to the handle; we just need a unique
|
|
397
|
+
# path that already exists on disk so duckdb can open it).
|
|
398
|
+
fd, db_path = tempfile.mkstemp(suffix=".duckdb")
|
|
399
|
+
os.close(fd)
|
|
400
|
+
|
|
401
|
+
con = duckdb.connect(db_path)
|
|
402
|
+
try:
|
|
403
|
+
register_all(con, settings=settings)
|
|
404
|
+
finally:
|
|
405
|
+
con.close()
|
|
406
|
+
|
|
407
|
+
logger.info("Opening DuckDB REPL with pre-registered views + macros + HNSW index")
|
|
408
|
+
logger.info("(Exit with .quit; DB persists at {})", db_path)
|
|
409
|
+
try:
|
|
410
|
+
subprocess.run(["duckdb", db_path], check=False)
|
|
411
|
+
except FileNotFoundError:
|
|
412
|
+
logger.error(
|
|
413
|
+
"`duckdb` binary not found on PATH. Install it or run queries via "
|
|
414
|
+
"`claude-sql query '<sql>'`. DB persists at {}",
|
|
415
|
+
db_path,
|
|
416
|
+
)
|
|
417
|
+
sys.exit(EXIT_CODES["duckdb_missing"])
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def _profile_path_for(label: str) -> Path:
|
|
421
|
+
"""Build the destination path used by ``--profile-json``.
|
|
422
|
+
|
|
423
|
+
Splits filename composition out of the writer so callers can configure
|
|
424
|
+
DuckDB's ``profiling_output`` PRAGMA before the profiled query runs
|
|
425
|
+
(DuckDB writes the JSON itself; we just read it back to confirm the
|
|
426
|
+
file landed and surface its location to the user).
|
|
427
|
+
"""
|
|
428
|
+
profiling_dir = Path(os.path.expanduser("~/.claude/profiling/"))
|
|
429
|
+
profiling_dir.mkdir(parents=True, exist_ok=True)
|
|
430
|
+
safe_label = re.sub(r"[^A-Za-z0-9_-]+", "-", label).strip("-") or "profile"
|
|
431
|
+
return profiling_dir / f"{safe_label}-{int(time.time() * 1000)}.json"
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def _capture_profile(con: duckdb.DuckDBPyConnection, label: str) -> Path:
|
|
435
|
+
"""Run a profiled query and return where DuckDB persisted the JSON output.
|
|
436
|
+
|
|
437
|
+
Caller must have set ``enable_profiling = 'json'`` and pointed
|
|
438
|
+
``profiling_output`` at a file *before* executing the query of
|
|
439
|
+
interest. We synthesize the output path here, set the PRAGMAs, and
|
|
440
|
+
return the path the next query will populate. The caller is
|
|
441
|
+
responsible for executing exactly one statement after this returns.
|
|
442
|
+
"""
|
|
443
|
+
out_path = _profile_path_for(label)
|
|
444
|
+
# Escape single-quotes in the path for the SQL literal; tmp paths can
|
|
445
|
+
# contain unusual characters under pytest.
|
|
446
|
+
safe_path = str(out_path).replace("'", "''")
|
|
447
|
+
con.execute("SET enable_profiling = 'json'")
|
|
448
|
+
con.execute(f"SET profiling_output = '{safe_path}'")
|
|
449
|
+
return out_path
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
@app.command
|
|
453
|
+
def query(
|
|
454
|
+
sql: str,
|
|
455
|
+
/,
|
|
456
|
+
*,
|
|
457
|
+
profile_json: bool = False,
|
|
458
|
+
common: Common | None = None,
|
|
459
|
+
) -> None:
|
|
460
|
+
"""Run one SQL query against the claude-sql catalog and emit results.
|
|
461
|
+
|
|
462
|
+
When to use
|
|
463
|
+
-----------
|
|
464
|
+
Read-only exploration and aggregation against the 18 pre-registered
|
|
465
|
+
views. The catalog is free (no Bedrock, no LLM, no cost), so run queries
|
|
466
|
+
liberally -- they're the cheapest way to introspect sessions / messages
|
|
467
|
+
/ tool calls / analytics.
|
|
468
|
+
|
|
469
|
+
Positional args
|
|
470
|
+
---------------
|
|
471
|
+
SQL
|
|
472
|
+
A single SQL statement. Multi-statement scripts are rejected by
|
|
473
|
+
DuckDB's single-exec path -- use ``shell`` for those.
|
|
474
|
+
|
|
475
|
+
Key flags
|
|
476
|
+
---------
|
|
477
|
+
--glob PATTERN
|
|
478
|
+
Narrow the universe of JSONLs scanned. Must have at most one '**'
|
|
479
|
+
segment. Example:
|
|
480
|
+
--glob "/home/you/.claude/projects/-efs-you-bonk/*.jsonl"
|
|
481
|
+
--subagent-glob PATTERN
|
|
482
|
+
Same, for subagent sidecar files.
|
|
483
|
+
--format {auto,table,json,ndjson,csv}
|
|
484
|
+
auto emits table on TTY, json on pipe.
|
|
485
|
+
|
|
486
|
+
Output
|
|
487
|
+
------
|
|
488
|
+
TTY default: Polars-rendered table.
|
|
489
|
+
Non-TTY: JSON array of row dicts (ideal for `jq` / agent parsing).
|
|
490
|
+
|
|
491
|
+
Exit codes
|
|
492
|
+
----------
|
|
493
|
+
* 64 parse_error malformed SQL (see error.hint for the fix)
|
|
494
|
+
* 65 catalog_error unknown view/macro/column (try ``schema``)
|
|
495
|
+
* 70 runtime_error everything else DuckDB raises
|
|
496
|
+
|
|
497
|
+
Catalog discovery
|
|
498
|
+
-----------------
|
|
499
|
+
Run ``claude-sql schema --format json`` for the full view + macro list,
|
|
500
|
+
or ``claude-sql list-cache`` to see which analytics parquets exist.
|
|
501
|
+
|
|
502
|
+
Examples
|
|
503
|
+
--------
|
|
504
|
+
Session counts:
|
|
505
|
+
claude-sql query "SELECT COUNT(*) FROM sessions"
|
|
506
|
+
Top assistants by token spend:
|
|
507
|
+
claude-sql query --format json "
|
|
508
|
+
SELECT model, SUM(input_tokens + output_tokens) AS toks
|
|
509
|
+
FROM messages GROUP BY 1 ORDER BY 2 DESC LIMIT 5"
|
|
510
|
+
"""
|
|
511
|
+
_configure(common)
|
|
512
|
+
settings = _resolve_settings(common)
|
|
513
|
+
fmt = _fmt(common)
|
|
514
|
+
con = _open_connection(settings)
|
|
515
|
+
try:
|
|
516
|
+
profile_path: Path | None = None
|
|
517
|
+
if profile_json:
|
|
518
|
+
profile_path = _capture_profile(con, label="query")
|
|
519
|
+
df = run_or_die(lambda: con.execute(sql).pl(), fmt=fmt)
|
|
520
|
+
emit_dataframe(df, fmt)
|
|
521
|
+
if profile_path is not None:
|
|
522
|
+
logger.info("Wrote profile JSON: {}", profile_path)
|
|
523
|
+
finally:
|
|
524
|
+
con.close()
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
@app.command
|
|
528
|
+
def explain(
|
|
529
|
+
sql: str,
|
|
530
|
+
/,
|
|
531
|
+
*,
|
|
532
|
+
analyze: bool = False,
|
|
533
|
+
profile_json: bool = False,
|
|
534
|
+
common: Common | None = None,
|
|
535
|
+
) -> None:
|
|
536
|
+
"""Show the DuckDB query plan and highlight pushdown / noteworthy operators.
|
|
537
|
+
|
|
538
|
+
When to use
|
|
539
|
+
-----------
|
|
540
|
+
Before running a ``query`` that might scan a lot of JSONLs -- confirm
|
|
541
|
+
filter pushdown, spot accidental full scans, verify HNSW_INDEX_SCAN
|
|
542
|
+
kicks in for vector searches.
|
|
543
|
+
|
|
544
|
+
Flags
|
|
545
|
+
-----
|
|
546
|
+
--analyze
|
|
547
|
+
Run ``EXPLAIN ANALYZE`` (executes the query and reports real
|
|
548
|
+
timings). Off by default so probing slow queries is free.
|
|
549
|
+
--format {auto,table,json,...}
|
|
550
|
+
TTY table highlights READ_JSON / Filter / HASH_JOIN / HASH_GROUP_BY
|
|
551
|
+
/ HNSW_INDEX_SCAN in green. JSON emits ``{"plan": "<text>"}``.
|
|
552
|
+
|
|
553
|
+
Exit codes
|
|
554
|
+
----------
|
|
555
|
+
Same as ``query``: 64 parse / 65 catalog / 70 runtime.
|
|
556
|
+
"""
|
|
557
|
+
_configure(common)
|
|
558
|
+
settings = _resolve_settings(common)
|
|
559
|
+
fmt = resolve_format(_fmt(common))
|
|
560
|
+
con = _open_connection(settings)
|
|
561
|
+
try:
|
|
562
|
+
profile_path: Path | None = None
|
|
563
|
+
if profile_json:
|
|
564
|
+
profile_path = _capture_profile(con, label="explain")
|
|
565
|
+
prefix = "EXPLAIN ANALYZE " if analyze else "EXPLAIN "
|
|
566
|
+
rows = run_or_die(lambda: con.execute(prefix + sql).fetchall(), fmt=fmt)
|
|
567
|
+
# EXPLAIN rows are (type, plan_text) tuples; the plan sits in the last
|
|
568
|
+
# column regardless of row shape.
|
|
569
|
+
text = "\n".join(str(r[-1]) for r in rows)
|
|
570
|
+
if fmt is OutputFormat.TABLE:
|
|
571
|
+
for line in text.splitlines():
|
|
572
|
+
if any(m in line for m in _EXPLAIN_MARKERS):
|
|
573
|
+
print(f"\033[92m{line}\033[0m")
|
|
574
|
+
else:
|
|
575
|
+
print(line)
|
|
576
|
+
else:
|
|
577
|
+
emit_json({"plan": text}, fmt)
|
|
578
|
+
if profile_path is not None:
|
|
579
|
+
logger.info("Wrote profile JSON: {}", profile_path)
|
|
580
|
+
finally:
|
|
581
|
+
con.close()
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
@app.command
|
|
585
|
+
def schema(*, common: Common | None = None) -> None:
|
|
586
|
+
"""List every registered view (with columns) and every macro in one pass.
|
|
587
|
+
|
|
588
|
+
When to use
|
|
589
|
+
-----------
|
|
590
|
+
First thing an agent should call after ``--help``: it's the canonical
|
|
591
|
+
catalog. Use it to discover column names before composing ``query``
|
|
592
|
+
calls -- e.g., ``session_classifications`` uses both ``autonomy_tier``
|
|
593
|
+
(canonical) and ``autonomy`` (alias), and the schema lists both.
|
|
594
|
+
|
|
595
|
+
Output shape (non-TTY / JSON)
|
|
596
|
+
-----------------------------
|
|
597
|
+
::
|
|
598
|
+
|
|
599
|
+
{
|
|
600
|
+
"views": {
|
|
601
|
+
"sessions": [{"column": "session_id", "type": "VARCHAR"}, ...],
|
|
602
|
+
"messages": [...],
|
|
603
|
+
"session_classifications": [...], // only if parquet exists
|
|
604
|
+
...
|
|
605
|
+
},
|
|
606
|
+
"macros": ["autonomy_trend", "conflict_rate", ...]
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
Missing analytics parquets are silently omitted (register_analytics
|
|
610
|
+
skips them). Use ``list-cache`` to see which generators still need to
|
|
611
|
+
run.
|
|
612
|
+
"""
|
|
613
|
+
_configure(common)
|
|
614
|
+
settings = _resolve_settings(common)
|
|
615
|
+
fmt = resolve_format(_fmt(common))
|
|
616
|
+
con = _open_connection(settings)
|
|
617
|
+
try:
|
|
618
|
+
views = describe_all(con)
|
|
619
|
+
macros = list_macros(con)
|
|
620
|
+
if fmt is OutputFormat.TABLE:
|
|
621
|
+
for name, cols in views.items():
|
|
622
|
+
print(f"\n\033[1m{name}\033[0m ({len(cols)} cols)")
|
|
623
|
+
for col, col_type in cols:
|
|
624
|
+
print(f" {col:<28} {col_type}")
|
|
625
|
+
print(f"\n\033[1mMacros\033[0m ({len(macros)})")
|
|
626
|
+
for macro in macros:
|
|
627
|
+
print(f" {macro}")
|
|
628
|
+
else:
|
|
629
|
+
payload = {
|
|
630
|
+
"views": {
|
|
631
|
+
name: [{"column": c, "type": t} for c, t in cols]
|
|
632
|
+
for name, cols in views.items()
|
|
633
|
+
},
|
|
634
|
+
"macros": list(macros),
|
|
635
|
+
}
|
|
636
|
+
emit_json(payload, fmt)
|
|
637
|
+
finally:
|
|
638
|
+
con.close()
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
@app.command(name="list-cache")
|
|
642
|
+
def list_cache(*, common: Common | None = None) -> None:
|
|
643
|
+
"""Report each parquet cache's presence, size, freshness, and row count.
|
|
644
|
+
|
|
645
|
+
When to use
|
|
646
|
+
-----------
|
|
647
|
+
Before running ``search`` (requires ``embeddings``) or composing
|
|
648
|
+
analytics queries (require ``session_classifications`` /
|
|
649
|
+
``message_trajectory`` / ``session_conflicts`` / ``message_clusters``
|
|
650
|
+
/ ``cluster_terms`` / ``session_communities`` / ``user_friction``).
|
|
651
|
+
|
|
652
|
+
What it reports
|
|
653
|
+
---------------
|
|
654
|
+
One entry per cache (plus the persistent checkpointer DB):
|
|
655
|
+
``{name, path, exists, bytes, mtime, rows}``. When ``exists`` is
|
|
656
|
+
false, ``bytes`` / ``mtime`` / ``rows`` are omitted.
|
|
657
|
+
|
|
658
|
+
How to populate each cache
|
|
659
|
+
--------------------------
|
|
660
|
+
* embeddings → ``claude-sql embed --no-dry-run``
|
|
661
|
+
* session_classifications → ``claude-sql classify --no-dry-run``
|
|
662
|
+
* message_trajectory → ``claude-sql trajectory --no-dry-run``
|
|
663
|
+
* session_conflicts → ``claude-sql conflicts --no-dry-run``
|
|
664
|
+
* message_clusters → ``claude-sql cluster``
|
|
665
|
+
* cluster_terms → ``claude-sql terms``
|
|
666
|
+
* session_communities → ``claude-sql community``
|
|
667
|
+
* user_friction → ``claude-sql friction --no-dry-run``
|
|
668
|
+
* skills_catalog → ``claude-sql skills sync``
|
|
669
|
+
"""
|
|
670
|
+
_configure(common)
|
|
671
|
+
settings = _resolve_settings(common)
|
|
672
|
+
fmt = resolve_format(_fmt(common))
|
|
673
|
+
entries = [
|
|
674
|
+
_describe_cache_entry("embeddings", settings.embeddings_parquet_path),
|
|
675
|
+
_describe_cache_entry("session_classifications", settings.classifications_parquet_path),
|
|
676
|
+
_describe_cache_entry("message_trajectory", settings.trajectory_parquet_path),
|
|
677
|
+
_describe_cache_entry("session_conflicts", settings.conflicts_parquet_path),
|
|
678
|
+
_describe_cache_entry("message_clusters", settings.clusters_parquet_path),
|
|
679
|
+
_describe_cache_entry("cluster_terms", settings.cluster_terms_parquet_path),
|
|
680
|
+
_describe_cache_entry("session_communities", settings.communities_parquet_path),
|
|
681
|
+
_describe_cache_entry("user_friction", settings.user_friction_parquet_path),
|
|
682
|
+
_describe_cache_entry("skills_catalog", settings.skills_catalog_parquet_path),
|
|
683
|
+
_describe_checkpoint_entry(settings.checkpoint_db_path),
|
|
684
|
+
]
|
|
685
|
+
|
|
686
|
+
if fmt is OutputFormat.TABLE:
|
|
687
|
+
df = pl.DataFrame(entries)
|
|
688
|
+
emit_dataframe(df, OutputFormat.TABLE)
|
|
689
|
+
return
|
|
690
|
+
# JSON / NDJSON / CSV -- emit the list directly so downstream tooling
|
|
691
|
+
# doesn't have to unwrap a wrapper object.
|
|
692
|
+
if fmt is OutputFormat.NDJSON:
|
|
693
|
+
for entry in entries:
|
|
694
|
+
sys.stdout.write(json.dumps(entry, default=str))
|
|
695
|
+
sys.stdout.write("\n")
|
|
696
|
+
return
|
|
697
|
+
if fmt is OutputFormat.CSV:
|
|
698
|
+
emit_dataframe(pl.DataFrame(entries), OutputFormat.CSV)
|
|
699
|
+
return
|
|
700
|
+
emit_json(entries, fmt)
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
# ---------------------------------------------------------------------------
|
|
704
|
+
# ``cache`` sub-app — compact / migrate the sharded worker-output parquets.
|
|
705
|
+
# ---------------------------------------------------------------------------
|
|
706
|
+
#
|
|
707
|
+
# Workers (embed, classify, trajectory, conflicts, friction) write each
|
|
708
|
+
# chunk as a fresh ``part-<ts_ns>.parquet`` under their cache directory.
|
|
709
|
+
# Over time many small parts accumulate; ``cache compact`` consolidates
|
|
710
|
+
# them into a single ``part-compacted-<ts>.parquet`` and removes the
|
|
711
|
+
# originals. ``cache migrate`` walks legacy single-file caches that
|
|
712
|
+
# pre-date this layout and moves each one into a sibling directory with
|
|
713
|
+
# its existing mtime preserved so the HNSW persistence and cluster-mtime
|
|
714
|
+
# sidecar logic stay valid.
|
|
715
|
+
#
|
|
716
|
+
# Both commands honour ``--dry-run`` (default ``True``) the same way every
|
|
717
|
+
# Bedrock-bearing command does in this codebase: nothing happens until you
|
|
718
|
+
# pass ``--no-dry-run``.
|
|
719
|
+
|
|
720
|
+
cache_app = App(
|
|
721
|
+
name="cache",
|
|
722
|
+
help=(
|
|
723
|
+
"Manage the sharded worker-output parquet caches.\n\n"
|
|
724
|
+
" cache compact consolidates many ``part-*.parquet`` shards into one.\n"
|
|
725
|
+
" cache migrate moves a legacy single-file cache into the new dir layout.\n\n"
|
|
726
|
+
"Both commands default to --dry-run; pass --no-dry-run to act."
|
|
727
|
+
),
|
|
728
|
+
)
|
|
729
|
+
app.command(cache_app)
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
def _resolve_cache_paths(settings: Settings) -> dict[str, Path]:
|
|
733
|
+
"""Return ``{cache_name: path}`` for every worker-append cache.
|
|
734
|
+
|
|
735
|
+
These are the five caches with sharded-write semantics: writers append
|
|
736
|
+
by dropping fresh parts, so they accumulate and benefit from ``compact``.
|
|
737
|
+
The four single-write caches (``clusters``, ``cluster_terms``,
|
|
738
|
+
``communities``, ``skills_catalog``) and the checkpoint DB don't fit
|
|
739
|
+
the same pattern and are intentionally excluded.
|
|
740
|
+
"""
|
|
741
|
+
return {
|
|
742
|
+
"embeddings": settings.embeddings_parquet_path,
|
|
743
|
+
"session_classifications": settings.classifications_parquet_path,
|
|
744
|
+
"message_trajectory": settings.trajectory_parquet_path,
|
|
745
|
+
"session_conflicts": settings.conflicts_parquet_path,
|
|
746
|
+
"user_friction": settings.user_friction_parquet_path,
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
|
|
750
|
+
@cache_app.command(name="compact")
|
|
751
|
+
def cache_compact(
|
|
752
|
+
*,
|
|
753
|
+
name: str | None = None,
|
|
754
|
+
dry_run: bool = True,
|
|
755
|
+
common: Common | None = None,
|
|
756
|
+
) -> None:
|
|
757
|
+
"""Consolidate ``part-*.parquet`` shards into a single compacted part file.
|
|
758
|
+
|
|
759
|
+
Walks each sharded cache directory, reads every part, writes a fresh
|
|
760
|
+
``part-compacted-<ts_ns>.parquet`` containing the union, and only after
|
|
761
|
+
that succeeds removes the originals. Legacy single-file caches and
|
|
762
|
+
caches with zero or one parts are left alone — there is nothing to
|
|
763
|
+
consolidate.
|
|
764
|
+
|
|
765
|
+
Flags
|
|
766
|
+
-----
|
|
767
|
+
--name <cache> Restrict to one of: embeddings, session_classifications,
|
|
768
|
+
message_trajectory, session_conflicts, user_friction.
|
|
769
|
+
Default is "all five".
|
|
770
|
+
--dry-run Default True. Pass ``--no-dry-run`` to actually rewrite.
|
|
771
|
+
"""
|
|
772
|
+
_configure(common)
|
|
773
|
+
settings = _resolve_settings(common)
|
|
774
|
+
fmt = resolve_format(_fmt(common))
|
|
775
|
+
|
|
776
|
+
targets = _resolve_cache_paths(settings)
|
|
777
|
+
if name is not None:
|
|
778
|
+
if name not in targets:
|
|
779
|
+
err = ClassifiedError(
|
|
780
|
+
kind="invalid_input",
|
|
781
|
+
exit_code=EXIT_CODES["invalid_input"],
|
|
782
|
+
message=f"Unknown cache name: {name!r}",
|
|
783
|
+
hint=f"Pick one of: {', '.join(sorted(targets))}",
|
|
784
|
+
)
|
|
785
|
+
emit_error(err, _fmt(common))
|
|
786
|
+
sys.exit(err.exit_code)
|
|
787
|
+
targets = {name: targets[name]}
|
|
788
|
+
|
|
789
|
+
summaries: list[dict[str, object]] = []
|
|
790
|
+
for cache_name, path in targets.items():
|
|
791
|
+
parts = iter_part_files(path)
|
|
792
|
+
if len(parts) <= 1 or not is_sharded_dir(path):
|
|
793
|
+
summaries.append(
|
|
794
|
+
{
|
|
795
|
+
"name": cache_name,
|
|
796
|
+
"path": str(path),
|
|
797
|
+
"parts": len(parts),
|
|
798
|
+
"action": "skip",
|
|
799
|
+
"reason": "no_compaction_needed",
|
|
800
|
+
}
|
|
801
|
+
)
|
|
802
|
+
continue
|
|
803
|
+
if dry_run:
|
|
804
|
+
total_bytes = sum(p.stat().st_size for p in parts)
|
|
805
|
+
summaries.append(
|
|
806
|
+
{
|
|
807
|
+
"name": cache_name,
|
|
808
|
+
"path": str(path),
|
|
809
|
+
"parts": len(parts),
|
|
810
|
+
"bytes": total_bytes,
|
|
811
|
+
"action": "would_compact",
|
|
812
|
+
}
|
|
813
|
+
)
|
|
814
|
+
continue
|
|
815
|
+
# Read the union via polars, write a fresh compacted shard, delete
|
|
816
|
+
# the originals only after the write succeeds. Any IO error here
|
|
817
|
+
# leaves the directory intact so a retry does not lose data.
|
|
818
|
+
df = pl.read_parquet([str(p) for p in parts])
|
|
819
|
+
compacted = path / f"part-compacted-{time.time_ns()}.parquet"
|
|
820
|
+
df.write_parquet(compacted)
|
|
821
|
+
for p in parts:
|
|
822
|
+
p.unlink()
|
|
823
|
+
summaries.append(
|
|
824
|
+
{
|
|
825
|
+
"name": cache_name,
|
|
826
|
+
"path": str(path),
|
|
827
|
+
"parts": len(parts),
|
|
828
|
+
"rows": int(df.height),
|
|
829
|
+
"compacted_to": str(compacted),
|
|
830
|
+
"action": "compacted",
|
|
831
|
+
}
|
|
832
|
+
)
|
|
833
|
+
|
|
834
|
+
if fmt is OutputFormat.TABLE:
|
|
835
|
+
emit_dataframe(pl.DataFrame(summaries), OutputFormat.TABLE)
|
|
836
|
+
return
|
|
837
|
+
if fmt is OutputFormat.NDJSON:
|
|
838
|
+
for s in summaries:
|
|
839
|
+
sys.stdout.write(json.dumps(s, default=str))
|
|
840
|
+
sys.stdout.write("\n")
|
|
841
|
+
return
|
|
842
|
+
if fmt is OutputFormat.CSV:
|
|
843
|
+
emit_dataframe(pl.DataFrame(summaries), OutputFormat.CSV)
|
|
844
|
+
return
|
|
845
|
+
emit_json(summaries, fmt)
|
|
846
|
+
|
|
847
|
+
|
|
848
|
+
@cache_app.command(name="migrate")
|
|
849
|
+
def cache_migrate(
|
|
850
|
+
*,
|
|
851
|
+
dry_run: bool = True,
|
|
852
|
+
common: Common | None = None,
|
|
853
|
+
) -> None:
|
|
854
|
+
"""Move legacy single-file caches into the sharded directory layout.
|
|
855
|
+
|
|
856
|
+
For each of the five worker-append caches, looks for the historical
|
|
857
|
+
``~/.claude/<name>.parquet`` file alongside the new
|
|
858
|
+
``~/.claude/<name>/`` directory. When a single-file cache exists, the
|
|
859
|
+
file is moved (not copied) into the directory as
|
|
860
|
+
``part-<original_mtime_ns>.parquet`` so subsequent runs treat it as
|
|
861
|
+
just another shard. The original mtime is preserved on the new file so
|
|
862
|
+
HNSW-persistence freshness checks behave identically.
|
|
863
|
+
|
|
864
|
+
Flags
|
|
865
|
+
-----
|
|
866
|
+
--dry-run Default True. Pass ``--no-dry-run`` to actually move files.
|
|
867
|
+
"""
|
|
868
|
+
_configure(common)
|
|
869
|
+
settings = _resolve_settings(common)
|
|
870
|
+
fmt = resolve_format(_fmt(common))
|
|
871
|
+
|
|
872
|
+
targets = _resolve_cache_paths(settings)
|
|
873
|
+
summaries: list[dict[str, object]] = []
|
|
874
|
+
for cache_name, dir_path in targets.items():
|
|
875
|
+
# Legacy single-file path is the same parent directory + the cache
|
|
876
|
+
# name + ".parquet" — that's what ``_default_*_parquet`` returned
|
|
877
|
+
# before this PR.
|
|
878
|
+
legacy = dir_path.with_suffix(".parquet")
|
|
879
|
+
# Some users may have customised the cache path explicitly; we only
|
|
880
|
+
# touch the canonical sibling, never an arbitrary user file.
|
|
881
|
+
if not legacy.is_file():
|
|
882
|
+
summaries.append(
|
|
883
|
+
{
|
|
884
|
+
"name": cache_name,
|
|
885
|
+
"from": str(legacy),
|
|
886
|
+
"to": str(dir_path),
|
|
887
|
+
"action": "skip",
|
|
888
|
+
"reason": "no_legacy_file",
|
|
889
|
+
}
|
|
890
|
+
)
|
|
891
|
+
continue
|
|
892
|
+
original_ns = legacy.stat().st_mtime_ns
|
|
893
|
+
target = dir_path / f"part-{original_ns}.parquet"
|
|
894
|
+
if dry_run:
|
|
895
|
+
summaries.append(
|
|
896
|
+
{
|
|
897
|
+
"name": cache_name,
|
|
898
|
+
"from": str(legacy),
|
|
899
|
+
"to": str(target),
|
|
900
|
+
"bytes": legacy.stat().st_size,
|
|
901
|
+
"action": "would_move",
|
|
902
|
+
}
|
|
903
|
+
)
|
|
904
|
+
continue
|
|
905
|
+
dir_path.mkdir(parents=True, exist_ok=True)
|
|
906
|
+
# ``rename`` preserves contents and mtime when both paths live on
|
|
907
|
+
# the same filesystem — for the canonical ``~/.claude/`` layout
|
|
908
|
+
# they always do. ``os.utime`` is a defensive belt+suspenders.
|
|
909
|
+
legacy.rename(target)
|
|
910
|
+
os.utime(target, ns=(original_ns, original_ns))
|
|
911
|
+
summaries.append(
|
|
912
|
+
{
|
|
913
|
+
"name": cache_name,
|
|
914
|
+
"from": str(legacy),
|
|
915
|
+
"to": str(target),
|
|
916
|
+
"action": "migrated",
|
|
917
|
+
}
|
|
918
|
+
)
|
|
919
|
+
|
|
920
|
+
if fmt is OutputFormat.TABLE:
|
|
921
|
+
emit_dataframe(pl.DataFrame(summaries), OutputFormat.TABLE)
|
|
922
|
+
return
|
|
923
|
+
if fmt is OutputFormat.NDJSON:
|
|
924
|
+
for s in summaries:
|
|
925
|
+
sys.stdout.write(json.dumps(s, default=str))
|
|
926
|
+
sys.stdout.write("\n")
|
|
927
|
+
return
|
|
928
|
+
if fmt is OutputFormat.CSV:
|
|
929
|
+
emit_dataframe(pl.DataFrame(summaries), OutputFormat.CSV)
|
|
930
|
+
return
|
|
931
|
+
emit_json(summaries, fmt)
|
|
932
|
+
|
|
933
|
+
|
|
934
|
+
# ---------------------------------------------------------------------------
|
|
935
|
+
# ``skills`` sub-app — catalog of locally-available Skills and slash commands.
|
|
936
|
+
# ---------------------------------------------------------------------------
|
|
937
|
+
|
|
938
|
+
skills_app = App(
|
|
939
|
+
name="skills",
|
|
940
|
+
help=(
|
|
941
|
+
"Seed and inspect the local Skills catalog.\n\n"
|
|
942
|
+
"The catalog binds skill_id (e.g. 'erpaval', 'personal-plugins:erpaval') "
|
|
943
|
+
"to its human description, source plugin, and version so skill_usage can "
|
|
944
|
+
"enrich raw invocations. Seeded from ~/.claude/skills/ and "
|
|
945
|
+
"~/.claude/plugins/cache/**; no Bedrock cost."
|
|
946
|
+
),
|
|
947
|
+
)
|
|
948
|
+
app.command(skills_app)
|
|
949
|
+
|
|
950
|
+
|
|
951
|
+
@skills_app.command(name="sync")
|
|
952
|
+
def skills_sync(
|
|
953
|
+
*,
|
|
954
|
+
dry_run: bool = False,
|
|
955
|
+
common: Common | None = None,
|
|
956
|
+
) -> None:
|
|
957
|
+
"""Walk ``~/.claude/skills`` and ``~/.claude/plugins/cache`` → skills_catalog.parquet.
|
|
958
|
+
|
|
959
|
+
Sources
|
|
960
|
+
-------
|
|
961
|
+
* ``~/.claude/skills/<name>/SKILL.md`` → ``user-skill``
|
|
962
|
+
* ``<plugins_cache>/<owner>/<plugin>/<v>/skills/<n>/SKILL.md``
|
|
963
|
+
→ ``plugin-skill`` (bare + ``<plugin>:<n>``)
|
|
964
|
+
* ``<plugins_cache>/<owner>/<plugin>/<v>/commands/<n>.md``
|
|
965
|
+
→ ``plugin-command`` (bare + ``<plugin>:<n>``)
|
|
966
|
+
* Built-in slash commands (``/clear``, ``/compact``, …) → ``builtin``
|
|
967
|
+
|
|
968
|
+
Cost: zero (pure filesystem walk). Run whenever you install or
|
|
969
|
+
upgrade a plugin; ``claude-sql analyze`` runs it automatically.
|
|
970
|
+
|
|
971
|
+
Flags
|
|
972
|
+
-----
|
|
973
|
+
--dry-run Count rows without writing the parquet. Useful for
|
|
974
|
+
previewing how many skills will be catalogued.
|
|
975
|
+
"""
|
|
976
|
+
_configure(common)
|
|
977
|
+
settings = _resolve_settings(common)
|
|
978
|
+
stats = _skills_catalog.sync(settings, dry_run=dry_run)
|
|
979
|
+
target = "would write" if dry_run else "wrote"
|
|
980
|
+
logger.info(
|
|
981
|
+
"skills sync: {} {} rows to {} ({} skills, {} commands, {} builtins)",
|
|
982
|
+
target,
|
|
983
|
+
stats["rows"],
|
|
984
|
+
settings.skills_catalog_parquet_path,
|
|
985
|
+
stats["skills"],
|
|
986
|
+
stats["commands"],
|
|
987
|
+
stats["builtins"],
|
|
988
|
+
)
|
|
989
|
+
|
|
990
|
+
|
|
991
|
+
@skills_app.command(name="ls")
|
|
992
|
+
def skills_ls(
|
|
993
|
+
*,
|
|
994
|
+
kind: str | None = None,
|
|
995
|
+
plugin: str | None = None,
|
|
996
|
+
common: Common | None = None,
|
|
997
|
+
) -> None:
|
|
998
|
+
"""List entries from the skills catalog parquet.
|
|
999
|
+
|
|
1000
|
+
Run ``claude-sql skills sync`` first. Emits the catalog in the
|
|
1001
|
+
shared ``--format`` shape (table on TTY, JSON on pipe).
|
|
1002
|
+
|
|
1003
|
+
Flags
|
|
1004
|
+
-----
|
|
1005
|
+
--kind <value> Filter by ``source_kind`` (``user-skill``,
|
|
1006
|
+
``plugin-skill``, ``plugin-command``, ``builtin``).
|
|
1007
|
+
--plugin <value> Filter by plugin name (exact match).
|
|
1008
|
+
"""
|
|
1009
|
+
_configure(common)
|
|
1010
|
+
settings = _resolve_settings(common)
|
|
1011
|
+
fmt = resolve_format(_fmt(common))
|
|
1012
|
+
path = settings.skills_catalog_parquet_path
|
|
1013
|
+
if not path.exists():
|
|
1014
|
+
logger.error(
|
|
1015
|
+
"skills catalog parquet missing at {}. Run `claude-sql skills sync` first.",
|
|
1016
|
+
path,
|
|
1017
|
+
)
|
|
1018
|
+
sys.exit(EXIT_CODES["no_embeddings"])
|
|
1019
|
+
df = pl.read_parquet(path)
|
|
1020
|
+
if kind is not None:
|
|
1021
|
+
df = df.filter(pl.col("source_kind") == kind)
|
|
1022
|
+
if plugin is not None:
|
|
1023
|
+
df = df.filter(pl.col("plugin") == plugin)
|
|
1024
|
+
df = df.sort(["source_kind", "plugin", "name"], nulls_last=True)
|
|
1025
|
+
if fmt is OutputFormat.TABLE:
|
|
1026
|
+
emit_dataframe(df, OutputFormat.TABLE)
|
|
1027
|
+
return
|
|
1028
|
+
if fmt is OutputFormat.CSV:
|
|
1029
|
+
emit_dataframe(df, OutputFormat.CSV)
|
|
1030
|
+
return
|
|
1031
|
+
if fmt is OutputFormat.NDJSON:
|
|
1032
|
+
for row in df.iter_rows(named=True):
|
|
1033
|
+
sys.stdout.write(json.dumps(row, default=str))
|
|
1034
|
+
sys.stdout.write("\n")
|
|
1035
|
+
return
|
|
1036
|
+
emit_json(df.to_dicts(), fmt)
|
|
1037
|
+
|
|
1038
|
+
|
|
1039
|
+
@app.command
|
|
1040
|
+
def embed(
|
|
1041
|
+
*,
|
|
1042
|
+
since_days: int | None = None,
|
|
1043
|
+
limit: int | None = None,
|
|
1044
|
+
dry_run: bool = False,
|
|
1045
|
+
common: Common | None = None,
|
|
1046
|
+
) -> None:
|
|
1047
|
+
"""Embed new messages with Cohere Embed v4 and append to the embeddings parquet.
|
|
1048
|
+
|
|
1049
|
+
Cost
|
|
1050
|
+
----
|
|
1051
|
+
Calls Bedrock (``global.cohere.embed-v4:0``) on every unembedded
|
|
1052
|
+
message. ``--dry-run`` is OFF by default here (unlike LLM workers);
|
|
1053
|
+
pass it if you only want to see the plan.
|
|
1054
|
+
|
|
1055
|
+
Flags
|
|
1056
|
+
-----
|
|
1057
|
+
--since-days N Only consider messages newer than N days.
|
|
1058
|
+
--limit N Cap the number of messages embedded this run.
|
|
1059
|
+
--dry-run Preview only; emit plan JSON, no Bedrock calls.
|
|
1060
|
+
--glob PATTERN Narrow the universe (see top-level --help).
|
|
1061
|
+
|
|
1062
|
+
Dry-run output (stdout JSON)
|
|
1063
|
+
----------------------------
|
|
1064
|
+
::
|
|
1065
|
+
|
|
1066
|
+
{
|
|
1067
|
+
"pipeline": "embed",
|
|
1068
|
+
"candidates": N,
|
|
1069
|
+
"batches": B,
|
|
1070
|
+
"batch_size": 96,
|
|
1071
|
+
"concurrency": 2,
|
|
1072
|
+
"model": "...",
|
|
1073
|
+
"since_days": null,
|
|
1074
|
+
"limit": null,
|
|
1075
|
+
"dry_run": true,
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1078
|
+
Real-run output
|
|
1079
|
+
---------------
|
|
1080
|
+
``{"pipeline": "embed", "rows_processed": N, "dry_run": false}``
|
|
1081
|
+
|
|
1082
|
+
Exit codes: 0 success, 70 runtime (Bedrock / DuckDB failure).
|
|
1083
|
+
"""
|
|
1084
|
+
import asyncio
|
|
1085
|
+
|
|
1086
|
+
_configure(common)
|
|
1087
|
+
settings = _resolve_settings(common)
|
|
1088
|
+
con = duckdb.connect(":memory:")
|
|
1089
|
+
try:
|
|
1090
|
+
register_raw(
|
|
1091
|
+
con,
|
|
1092
|
+
glob=settings.default_glob,
|
|
1093
|
+
subagent_glob=settings.subagent_glob,
|
|
1094
|
+
subagent_meta_glob=settings.subagent_meta_glob,
|
|
1095
|
+
)
|
|
1096
|
+
register_views(con)
|
|
1097
|
+
result = asyncio.run(
|
|
1098
|
+
run_backfill(
|
|
1099
|
+
con=con,
|
|
1100
|
+
settings=settings,
|
|
1101
|
+
since_days=since_days,
|
|
1102
|
+
limit=limit,
|
|
1103
|
+
dry_run=dry_run,
|
|
1104
|
+
)
|
|
1105
|
+
)
|
|
1106
|
+
logger.info("Embedded {} messages (dry_run={})", result, dry_run)
|
|
1107
|
+
_emit_worker_result(result, common, pipeline="embed")
|
|
1108
|
+
finally:
|
|
1109
|
+
con.close()
|
|
1110
|
+
|
|
1111
|
+
|
|
1112
|
+
@app.command
|
|
1113
|
+
def search(
|
|
1114
|
+
query_text: str,
|
|
1115
|
+
/,
|
|
1116
|
+
*,
|
|
1117
|
+
k: int = 10,
|
|
1118
|
+
common: Common | None = None,
|
|
1119
|
+
) -> None:
|
|
1120
|
+
"""Semantic top-k nearest-neighbor search over message embeddings via HNSW.
|
|
1121
|
+
|
|
1122
|
+
Pipeline
|
|
1123
|
+
--------
|
|
1124
|
+
1. Embed ``query_text`` with Cohere Embed v4 ``search_query`` mode.
|
|
1125
|
+
2. DuckDB VSS HNSW cosine lookup against the existing embeddings parquet.
|
|
1126
|
+
3. Join back to ``messages_text`` for a 200-char snippet.
|
|
1127
|
+
|
|
1128
|
+
Prereq
|
|
1129
|
+
------
|
|
1130
|
+
The embeddings parquet must exist. If it's empty or missing, the
|
|
1131
|
+
command exits with code 2 and a hint. Run
|
|
1132
|
+
``claude-sql embed --since-days 7 --no-dry-run`` to populate.
|
|
1133
|
+
|
|
1134
|
+
Positional args
|
|
1135
|
+
---------------
|
|
1136
|
+
QUERY_TEXT A single natural-language query string.
|
|
1137
|
+
|
|
1138
|
+
Flags
|
|
1139
|
+
-----
|
|
1140
|
+
--k N Top-k (default 10).
|
|
1141
|
+
--glob PATTERN Narrow the messages_text view before the HNSW join.
|
|
1142
|
+
--format ... See top-level --help.
|
|
1143
|
+
|
|
1144
|
+
Output columns
|
|
1145
|
+
--------------
|
|
1146
|
+
uuid, session_id, role, sim (cosine similarity ∈ [-1, 1]), snippet.
|
|
1147
|
+
Sorted by cosine distance ascending -- highest sim first.
|
|
1148
|
+
|
|
1149
|
+
When to prefer ``query`` instead
|
|
1150
|
+
--------------------------------
|
|
1151
|
+
Semantic search is good at recall but bad at tie-breaking when the
|
|
1152
|
+
topic is over-represented in the corpus. If you are pinpointing a
|
|
1153
|
+
single known session (not a theme) and the subject is frequent --
|
|
1154
|
+
"the claude-sql session where I ran over 30 days", "the session
|
|
1155
|
+
where the test suite failed" -- a literal ILIKE on a distinctive
|
|
1156
|
+
token finds it in one hop:
|
|
1157
|
+
|
|
1158
|
+
claude-sql query "SELECT DISTINCT session_id FROM messages_text
|
|
1159
|
+
WHERE text_content ILIKE '%--since-days 30%'"
|
|
1160
|
+
|
|
1161
|
+
Good distinctive tokens: exact CLI flags, dollar amounts from a
|
|
1162
|
+
dry-run cost table, precise error strings, the exact command the
|
|
1163
|
+
user ran. If the first search returns >3 plausible sessions at
|
|
1164
|
+
similar ``sim``, stop rephrasing and switch modality.
|
|
1165
|
+
|
|
1166
|
+
Exit codes: 0 success, 2 no_embeddings, 70 runtime.
|
|
1167
|
+
"""
|
|
1168
|
+
_configure(common)
|
|
1169
|
+
settings = _resolve_settings(common)
|
|
1170
|
+
fmt = _fmt(common)
|
|
1171
|
+
con = _open_connection(settings)
|
|
1172
|
+
try:
|
|
1173
|
+
row = con.execute("SELECT count(*) FROM message_embeddings").fetchone()
|
|
1174
|
+
count = int(row[0]) if row else 0
|
|
1175
|
+
if count == 0:
|
|
1176
|
+
logger.error("No embeddings yet. Run: claude-sql embed --since-days 7")
|
|
1177
|
+
sys.exit(EXIT_CODES["no_embeddings"])
|
|
1178
|
+
|
|
1179
|
+
qv = embed_query(query_text, settings=settings)
|
|
1180
|
+
dim = int(settings.output_dimension)
|
|
1181
|
+
# Rank by cosine similarity descending. The HNSW index was built with
|
|
1182
|
+
# metric='cosine', so ORDER BY array_cosine_distance (== 1 - sim) ASC
|
|
1183
|
+
# is what triggers the index lookup. Using array_distance here (L2)
|
|
1184
|
+
# would silently bypass the index AND give wrong ranks because the
|
|
1185
|
+
# raw int8-cast-to-float document vectors have magnitudes in the
|
|
1186
|
+
# thousands while the query vector is unit-normalized.
|
|
1187
|
+
df = run_or_die(
|
|
1188
|
+
lambda: con.execute(
|
|
1189
|
+
f"""
|
|
1190
|
+
WITH qv AS (SELECT CAST(? AS FLOAT[{dim}]) AS v)
|
|
1191
|
+
SELECT CAST(mt.uuid AS VARCHAR) AS uuid,
|
|
1192
|
+
CAST(mt.session_id AS VARCHAR) AS session_id,
|
|
1193
|
+
mt.role,
|
|
1194
|
+
array_cosine_similarity(me.embedding, (SELECT v FROM qv)) AS sim,
|
|
1195
|
+
substr(mt.text_content, 1, 200) AS snippet
|
|
1196
|
+
FROM message_embeddings me
|
|
1197
|
+
JOIN messages_text mt ON CAST(mt.uuid AS VARCHAR) = me.uuid
|
|
1198
|
+
ORDER BY array_cosine_distance(me.embedding, (SELECT v FROM qv)) ASC
|
|
1199
|
+
LIMIT ?
|
|
1200
|
+
""",
|
|
1201
|
+
[qv, k],
|
|
1202
|
+
).pl(),
|
|
1203
|
+
fmt=fmt,
|
|
1204
|
+
)
|
|
1205
|
+
emit_dataframe(df, fmt, table_rows=k, table_str_len=200)
|
|
1206
|
+
finally:
|
|
1207
|
+
con.close()
|
|
1208
|
+
|
|
1209
|
+
|
|
1210
|
+
@app.command
|
|
1211
|
+
def classify(
|
|
1212
|
+
*,
|
|
1213
|
+
since_days: int | None = None,
|
|
1214
|
+
limit: int | None = None,
|
|
1215
|
+
dry_run: bool = True,
|
|
1216
|
+
no_thinking: bool = False,
|
|
1217
|
+
common: Common | None = None,
|
|
1218
|
+
) -> None:
|
|
1219
|
+
"""Classify sessions with Sonnet 4.6: autonomy tier, work category, success, goal.
|
|
1220
|
+
|
|
1221
|
+
Output columns (``session_classifications`` view)
|
|
1222
|
+
-------------------------------------------------
|
|
1223
|
+
session_id, autonomy_tier ∈ {autonomous,assisted,manual},
|
|
1224
|
+
work_category (sde/admin/strategy_business/thought_leadership/other),
|
|
1225
|
+
success ∈ {success,partial,failure,unknown}, goal (string),
|
|
1226
|
+
confidence ∈ [0,1], classified_at.
|
|
1227
|
+
Alias columns added by the view layer: ``autonomy``,
|
|
1228
|
+
``success_outcome``, ``category`` (same values as above).
|
|
1229
|
+
|
|
1230
|
+
Cost (defaults to --dry-run)
|
|
1231
|
+
----------------------------
|
|
1232
|
+
Back-of-envelope ~8K input + ~300 output tokens per session. With
|
|
1233
|
+
Sonnet 4.6 pricing, 1,000 sessions ≈ $25-30. Always start with
|
|
1234
|
+
``--dry-run`` (default) to see the plan JSON, then confirm with
|
|
1235
|
+
``--no-dry-run``.
|
|
1236
|
+
|
|
1237
|
+
Flags
|
|
1238
|
+
-----
|
|
1239
|
+
--since-days N Only classify sessions newer than N days.
|
|
1240
|
+
--limit N Cap at N sessions this run.
|
|
1241
|
+
--dry-run (DEFAULT) emit plan JSON, no Bedrock calls.
|
|
1242
|
+
--no-dry-run Spend real money.
|
|
1243
|
+
--no-thinking Disable Sonnet adaptive thinking (cheaper, less precise).
|
|
1244
|
+
--glob PATTERN Narrow the corpus (recommended for first runs).
|
|
1245
|
+
|
|
1246
|
+
Dry-run stdout JSON
|
|
1247
|
+
-------------------
|
|
1248
|
+
``{"pipeline":"classify","candidates":N,"llm_calls":N,
|
|
1249
|
+
"avg_input_tokens":8000,"avg_output_tokens":300,
|
|
1250
|
+
"estimated_cost_usd":X,"model":"...","thinking":"adaptive",
|
|
1251
|
+
"since_days":null,"limit":null,"dry_run":true}``
|
|
1252
|
+
|
|
1253
|
+
Checkpointing
|
|
1254
|
+
-------------
|
|
1255
|
+
Session-level checkpoint in ``~/.claude/claude_sql.duckdb`` means
|
|
1256
|
+
reruns on unchanged sessions are free -- only sessions whose JSONL
|
|
1257
|
+
mtime changed are re-processed.
|
|
1258
|
+
"""
|
|
1259
|
+
_configure(common)
|
|
1260
|
+
settings = _resolve_settings(common)
|
|
1261
|
+
con = _open_connection(settings)
|
|
1262
|
+
try:
|
|
1263
|
+
result = classify_sessions(
|
|
1264
|
+
con,
|
|
1265
|
+
settings,
|
|
1266
|
+
since_days=since_days,
|
|
1267
|
+
limit=limit,
|
|
1268
|
+
dry_run=dry_run,
|
|
1269
|
+
no_thinking=no_thinking,
|
|
1270
|
+
)
|
|
1271
|
+
logger.info("classify: {} sessions processed (dry_run={})", result, dry_run)
|
|
1272
|
+
_emit_worker_result(result, common, pipeline="classify")
|
|
1273
|
+
finally:
|
|
1274
|
+
con.close()
|
|
1275
|
+
|
|
1276
|
+
|
|
1277
|
+
@app.command
|
|
1278
|
+
def trajectory(
|
|
1279
|
+
*,
|
|
1280
|
+
since_days: int | None = None,
|
|
1281
|
+
limit: int | None = None,
|
|
1282
|
+
dry_run: bool = True,
|
|
1283
|
+
no_thinking: bool = False,
|
|
1284
|
+
common: Common | None = None,
|
|
1285
|
+
) -> None:
|
|
1286
|
+
"""Per-message sentiment + topic-transition classification (regex prefilter → Sonnet 4.6).
|
|
1287
|
+
|
|
1288
|
+
Output columns (``message_trajectory`` view)
|
|
1289
|
+
--------------------------------------------
|
|
1290
|
+
uuid, sentiment_delta ∈ {positive,neutral,negative},
|
|
1291
|
+
is_transition (boolean -- does this message mark a topic shift?),
|
|
1292
|
+
confidence ∈ [0,1], classified_at.
|
|
1293
|
+
Alias columns: ``sentiment`` (same as sentiment_delta),
|
|
1294
|
+
``transition`` (same as is_transition).
|
|
1295
|
+
|
|
1296
|
+
Pipeline
|
|
1297
|
+
--------
|
|
1298
|
+
1. Regex prefilter catches ~50% of obvious transitions for free.
|
|
1299
|
+
2. Sonnet 4.6 classifies the remainder with structured output.
|
|
1300
|
+
|
|
1301
|
+
Cost: defaults to ``--dry-run``. ~500 input / 50 output tokens per LLM
|
|
1302
|
+
call.
|
|
1303
|
+
|
|
1304
|
+
Flags / exit codes identical to ``classify``. See its help for the
|
|
1305
|
+
dry-run JSON schema.
|
|
1306
|
+
"""
|
|
1307
|
+
_configure(common)
|
|
1308
|
+
settings = _resolve_settings(common)
|
|
1309
|
+
con = _open_connection(settings)
|
|
1310
|
+
try:
|
|
1311
|
+
result = trajectory_messages(
|
|
1312
|
+
con,
|
|
1313
|
+
settings,
|
|
1314
|
+
since_days=since_days,
|
|
1315
|
+
limit=limit,
|
|
1316
|
+
dry_run=dry_run,
|
|
1317
|
+
no_thinking=no_thinking,
|
|
1318
|
+
)
|
|
1319
|
+
logger.info("trajectory: {} messages processed (dry_run={})", result, dry_run)
|
|
1320
|
+
_emit_worker_result(result, common, pipeline="trajectory")
|
|
1321
|
+
finally:
|
|
1322
|
+
con.close()
|
|
1323
|
+
|
|
1324
|
+
|
|
1325
|
+
@app.command
|
|
1326
|
+
def conflicts(
|
|
1327
|
+
*,
|
|
1328
|
+
since_days: int | None = None,
|
|
1329
|
+
limit: int | None = None,
|
|
1330
|
+
dry_run: bool = True,
|
|
1331
|
+
no_thinking: bool = False,
|
|
1332
|
+
common: Common | None = None,
|
|
1333
|
+
) -> None:
|
|
1334
|
+
"""Per-session stance-conflict detection via Sonnet 4.6.
|
|
1335
|
+
|
|
1336
|
+
What it finds
|
|
1337
|
+
-------------
|
|
1338
|
+
Places where the user and the agent disagreed on approach or scope,
|
|
1339
|
+
or where the agent contradicted itself. Each conflict gets two stance
|
|
1340
|
+
snippets (``stance_a`` / ``stance_b``), a resolution label
|
|
1341
|
+
∈ {resolved, unresolved, abandoned, null}, and a detected_at timestamp.
|
|
1342
|
+
|
|
1343
|
+
Output columns (``session_conflicts`` view)
|
|
1344
|
+
-------------------------------------------
|
|
1345
|
+
session_id, conflict_idx, stance_a, stance_b, resolution,
|
|
1346
|
+
detected_at, empty. Alias: ``conflict_resolution`` = resolution.
|
|
1347
|
+
|
|
1348
|
+
Cost: defaults to ``--dry-run``. ~6K input / 400 output tokens / session.
|
|
1349
|
+
Flags / exit codes identical to ``classify``.
|
|
1350
|
+
"""
|
|
1351
|
+
_configure(common)
|
|
1352
|
+
settings = _resolve_settings(common)
|
|
1353
|
+
con = _open_connection(settings)
|
|
1354
|
+
try:
|
|
1355
|
+
result = detect_conflicts(
|
|
1356
|
+
con,
|
|
1357
|
+
settings,
|
|
1358
|
+
since_days=since_days,
|
|
1359
|
+
limit=limit,
|
|
1360
|
+
dry_run=dry_run,
|
|
1361
|
+
no_thinking=no_thinking,
|
|
1362
|
+
)
|
|
1363
|
+
logger.info("conflicts: {} sessions processed (dry_run={})", result, dry_run)
|
|
1364
|
+
_emit_worker_result(result, common, pipeline="conflicts")
|
|
1365
|
+
finally:
|
|
1366
|
+
con.close()
|
|
1367
|
+
|
|
1368
|
+
|
|
1369
|
+
@app.command
|
|
1370
|
+
def friction(
|
|
1371
|
+
*,
|
|
1372
|
+
since_days: int | None = None,
|
|
1373
|
+
limit: int | None = None,
|
|
1374
|
+
dry_run: bool = True,
|
|
1375
|
+
no_thinking: bool = False,
|
|
1376
|
+
common: Common | None = None,
|
|
1377
|
+
) -> None:
|
|
1378
|
+
"""Classify short user messages (≤300 chars) for friction signals.
|
|
1379
|
+
|
|
1380
|
+
Labels
|
|
1381
|
+
------
|
|
1382
|
+
status_ping / unmet_expectation / confusion / interruption /
|
|
1383
|
+
correction / frustration / none.
|
|
1384
|
+
|
|
1385
|
+
Pipeline
|
|
1386
|
+
--------
|
|
1387
|
+
1. Pull user-role messages ≤ ``CLAUDE_SQL_FRICTION_MAX_CHARS`` (300).
|
|
1388
|
+
2. Regex fast-path catches ``status_ping`` / ``interruption`` /
|
|
1389
|
+
``correction`` at 0.9 confidence.
|
|
1390
|
+
3. Everything else → Sonnet 4.6 with the USER_FRICTION_SCHEMA.
|
|
1391
|
+
|
|
1392
|
+
Output columns (``user_friction`` view)
|
|
1393
|
+
---------------------------------------
|
|
1394
|
+
uuid, session_id, ts, label, source ∈ {regex, llm, refused},
|
|
1395
|
+
confidence, rationale, text (the original user message).
|
|
1396
|
+
|
|
1397
|
+
Cost: defaults to ``--dry-run``. Short prompts (~200 in / 60 out),
|
|
1398
|
+
so even 10K candidates cost ≈ $3-4.
|
|
1399
|
+
Flags / exit codes identical to ``classify``.
|
|
1400
|
+
"""
|
|
1401
|
+
_configure(common)
|
|
1402
|
+
settings = _resolve_settings(common)
|
|
1403
|
+
con = _open_connection(settings)
|
|
1404
|
+
try:
|
|
1405
|
+
result = detect_user_friction(
|
|
1406
|
+
con,
|
|
1407
|
+
settings,
|
|
1408
|
+
since_days=since_days,
|
|
1409
|
+
limit=limit,
|
|
1410
|
+
dry_run=dry_run,
|
|
1411
|
+
no_thinking=no_thinking,
|
|
1412
|
+
)
|
|
1413
|
+
logger.info("friction: {} rows written (dry_run={})", result, dry_run)
|
|
1414
|
+
_emit_worker_result(result, common, pipeline="friction")
|
|
1415
|
+
finally:
|
|
1416
|
+
con.close()
|
|
1417
|
+
|
|
1418
|
+
|
|
1419
|
+
@app.command
|
|
1420
|
+
def cluster(*, force: bool = False, common: Common | None = None) -> None:
|
|
1421
|
+
"""Cluster message embeddings with UMAP (8D) + HDBSCAN. Writes clusters.parquet.
|
|
1422
|
+
|
|
1423
|
+
Prereq
|
|
1424
|
+
------
|
|
1425
|
+
The embeddings parquet must exist. Run ``embed --no-dry-run`` first.
|
|
1426
|
+
|
|
1427
|
+
Output columns (``message_clusters`` view)
|
|
1428
|
+
------------------------------------------
|
|
1429
|
+
uuid, cluster_id (int; -1 = noise), probability (HDBSCAN soft label).
|
|
1430
|
+
|
|
1431
|
+
Cost: zero (CPU-only, no Bedrock). Seeded by ``CLAUDE_SQL_SEED=42`` so
|
|
1432
|
+
cluster IDs are stable across reruns unless the embedding set changes.
|
|
1433
|
+
|
|
1434
|
+
Flags
|
|
1435
|
+
-----
|
|
1436
|
+
--force Re-cluster even if clusters.parquet already exists.
|
|
1437
|
+
"""
|
|
1438
|
+
_configure(common)
|
|
1439
|
+
settings = _resolve_settings(common)
|
|
1440
|
+
stats = run_clustering(settings, force=force)
|
|
1441
|
+
logger.info(
|
|
1442
|
+
"cluster: {} messages, {} clusters, {} noise ({:.1%})",
|
|
1443
|
+
stats["total"],
|
|
1444
|
+
stats["clusters"],
|
|
1445
|
+
stats["noise"],
|
|
1446
|
+
stats["noise"] / stats["total"] if stats["total"] else 0,
|
|
1447
|
+
)
|
|
1448
|
+
|
|
1449
|
+
|
|
1450
|
+
@app.command
|
|
1451
|
+
def terms(*, force: bool = False, common: Common | None = None) -> None:
|
|
1452
|
+
"""Compute c-TF-IDF per-cluster term labels; writes cluster_terms.parquet.
|
|
1453
|
+
|
|
1454
|
+
Prereq: ``cluster`` (i.e., clusters.parquet must exist).
|
|
1455
|
+
|
|
1456
|
+
Output columns (``cluster_terms`` view)
|
|
1457
|
+
---------------------------------------
|
|
1458
|
+
cluster_id (int), term (unigram or bigram), weight (float),
|
|
1459
|
+
rank (int, 1 = strongest term in that cluster).
|
|
1460
|
+
|
|
1461
|
+
Math: per-class TF → IDF → L1 normalize, ngram (1,2), min_df=2.
|
|
1462
|
+
Cost: zero (sklearn CountVectorizer). See CLAUDE.md for design rationale.
|
|
1463
|
+
|
|
1464
|
+
Flags
|
|
1465
|
+
-----
|
|
1466
|
+
--force Recompute even if cluster_terms.parquet already exists.
|
|
1467
|
+
"""
|
|
1468
|
+
_configure(common)
|
|
1469
|
+
settings = _resolve_settings(common)
|
|
1470
|
+
con = _open_connection(settings)
|
|
1471
|
+
try:
|
|
1472
|
+
tstats = run_terms(con, settings, force=force)
|
|
1473
|
+
logger.info(
|
|
1474
|
+
"terms: {} clusters, {} term-rows",
|
|
1475
|
+
tstats["clusters"],
|
|
1476
|
+
tstats["terms"],
|
|
1477
|
+
)
|
|
1478
|
+
finally:
|
|
1479
|
+
con.close()
|
|
1480
|
+
|
|
1481
|
+
|
|
1482
|
+
@app.command
|
|
1483
|
+
def community(*, force: bool = False, common: Common | None = None) -> None:
|
|
1484
|
+
"""Session-level Louvain community detection over a cosine-similarity graph.
|
|
1485
|
+
|
|
1486
|
+
Prereq: ``embed`` (needs the embeddings parquet).
|
|
1487
|
+
|
|
1488
|
+
Output columns (``session_communities`` view)
|
|
1489
|
+
---------------------------------------------
|
|
1490
|
+
session_id, community_id (int; -1 = isolated).
|
|
1491
|
+
|
|
1492
|
+
Method: build a session-centroid-cosine KNN graph, then run
|
|
1493
|
+
``networkx.community.louvain_communities`` (networkx ≥3.4).
|
|
1494
|
+
Cost: zero. Seeded by ``CLAUDE_SQL_SEED=42``.
|
|
1495
|
+
|
|
1496
|
+
Flags
|
|
1497
|
+
-----
|
|
1498
|
+
--force Re-detect even if session_communities.parquet exists.
|
|
1499
|
+
"""
|
|
1500
|
+
_configure(common)
|
|
1501
|
+
settings = _resolve_settings(common)
|
|
1502
|
+
con = _open_connection(settings)
|
|
1503
|
+
try:
|
|
1504
|
+
stats = run_communities(con, settings, force=force)
|
|
1505
|
+
logger.info(
|
|
1506
|
+
"community: {} sessions grouped into {} communities",
|
|
1507
|
+
stats["sessions"],
|
|
1508
|
+
stats["communities"],
|
|
1509
|
+
)
|
|
1510
|
+
finally:
|
|
1511
|
+
con.close()
|
|
1512
|
+
|
|
1513
|
+
|
|
1514
|
+
@app.command
|
|
1515
|
+
def analyze(
|
|
1516
|
+
*,
|
|
1517
|
+
since_days: int | None = 30,
|
|
1518
|
+
limit: int | None = None,
|
|
1519
|
+
dry_run: bool = True,
|
|
1520
|
+
no_thinking: bool = False,
|
|
1521
|
+
skip_embed: bool = False,
|
|
1522
|
+
skip_classify: bool = False,
|
|
1523
|
+
skip_trajectory: bool = False,
|
|
1524
|
+
skip_conflicts: bool = False,
|
|
1525
|
+
skip_friction: bool = False,
|
|
1526
|
+
skip_cluster: bool = False,
|
|
1527
|
+
skip_community: bool = False,
|
|
1528
|
+
skip_skills_sync: bool = False,
|
|
1529
|
+
force_cluster: bool = False,
|
|
1530
|
+
force_community: bool = False,
|
|
1531
|
+
common: Common | None = None,
|
|
1532
|
+
) -> None:
|
|
1533
|
+
"""Run the full analytics pipeline end-to-end: embed → structure → LLM analytics.
|
|
1534
|
+
|
|
1535
|
+
Stages (in order)
|
|
1536
|
+
-----------------
|
|
1537
|
+
0. skills sync (filesystem walk; zero-cost; produces skills_catalog.parquet)
|
|
1538
|
+
1. embed (Bedrock Cohere Embed v4; honors --dry-run)
|
|
1539
|
+
2. cluster (UMAP+HDBSCAN; zero-cost; --force_cluster to rebuild)
|
|
1540
|
+
3. terms (c-TF-IDF labels for clusters; zero-cost)
|
|
1541
|
+
4. community (Louvain; zero-cost; --force_community to rebuild)
|
|
1542
|
+
5. classify (Sonnet 4.6; honors --dry-run)
|
|
1543
|
+
6. trajectory (Sonnet 4.6; honors --dry-run)
|
|
1544
|
+
7. conflicts (Sonnet 4.6; honors --dry-run)
|
|
1545
|
+
8. friction (Sonnet 4.6; honors --dry-run)
|
|
1546
|
+
|
|
1547
|
+
Cost
|
|
1548
|
+
----
|
|
1549
|
+
Every LLM-touching stage defaults to ``--dry-run`` -- stdout logs the
|
|
1550
|
+
plan per stage. Pass ``--no-dry-run`` to execute for real.
|
|
1551
|
+
|
|
1552
|
+
Flags
|
|
1553
|
+
-----
|
|
1554
|
+
--since-days N Scope all stages to the last N days (default 30).
|
|
1555
|
+
--limit N Cap each LLM stage at N items.
|
|
1556
|
+
--dry-run / --no-dry-run (default --dry-run)
|
|
1557
|
+
--no-thinking Disable Sonnet adaptive thinking across all stages.
|
|
1558
|
+
--skip-<stage> Drop a stage:
|
|
1559
|
+
embed, cluster, community, classify, trajectory,
|
|
1560
|
+
conflicts, friction. Terms is bound to cluster.
|
|
1561
|
+
--force-cluster Rebuild clusters.parquet (+ terms) even if present.
|
|
1562
|
+
--force-community Rebuild session_communities.parquet even if present.
|
|
1563
|
+
--glob / --subagent-glob Narrow the corpus (applies to every stage).
|
|
1564
|
+
|
|
1565
|
+
Typical recipes
|
|
1566
|
+
---------------
|
|
1567
|
+
Preview spend over the last week::
|
|
1568
|
+
|
|
1569
|
+
claude-sql analyze --since-days 7
|
|
1570
|
+
|
|
1571
|
+
Run the non-LLM stages only (cluster + terms + community)::
|
|
1572
|
+
|
|
1573
|
+
claude-sql analyze --skip-embed --skip-classify \
|
|
1574
|
+
--skip-trajectory --skip-conflicts --skip-friction \
|
|
1575
|
+
--force-cluster --force-community
|
|
1576
|
+
"""
|
|
1577
|
+
import asyncio
|
|
1578
|
+
|
|
1579
|
+
_configure(common)
|
|
1580
|
+
settings = _resolve_settings(common)
|
|
1581
|
+
|
|
1582
|
+
# 0. Skills catalog sync (filesystem walk, zero cost). Runs even in
|
|
1583
|
+
# --dry-run because it does not hit Bedrock; opt out via
|
|
1584
|
+
# --skip-skills-sync if you want to keep the parquet frozen.
|
|
1585
|
+
if not skip_skills_sync:
|
|
1586
|
+
stats = _skills_catalog.sync(settings)
|
|
1587
|
+
logger.info(
|
|
1588
|
+
"analyze/skills: wrote {} rows to {} ({} skills, {} commands, {} builtins)",
|
|
1589
|
+
stats["rows"],
|
|
1590
|
+
settings.skills_catalog_parquet_path,
|
|
1591
|
+
stats["skills"],
|
|
1592
|
+
stats["commands"],
|
|
1593
|
+
stats["builtins"],
|
|
1594
|
+
)
|
|
1595
|
+
|
|
1596
|
+
# 1. Embed (reuses embed_worker). Silently skipped if the parquet is up to date.
|
|
1597
|
+
if not skip_embed:
|
|
1598
|
+
con = _open_connection(settings)
|
|
1599
|
+
try:
|
|
1600
|
+
n = asyncio.run(
|
|
1601
|
+
run_backfill(
|
|
1602
|
+
con=con,
|
|
1603
|
+
settings=settings,
|
|
1604
|
+
since_days=since_days,
|
|
1605
|
+
limit=limit,
|
|
1606
|
+
dry_run=dry_run,
|
|
1607
|
+
)
|
|
1608
|
+
)
|
|
1609
|
+
logger.info("analyze/embed: {} new embeddings (dry_run={})", n, dry_run)
|
|
1610
|
+
finally:
|
|
1611
|
+
con.close()
|
|
1612
|
+
|
|
1613
|
+
# 2. Cluster (reads embeddings parquet, writes clusters.parquet). Non-LLM.
|
|
1614
|
+
if not skip_cluster:
|
|
1615
|
+
stats = run_clustering(settings, force=force_cluster)
|
|
1616
|
+
logger.info(
|
|
1617
|
+
"analyze/cluster: {} messages, {} clusters, {} noise",
|
|
1618
|
+
stats["total"],
|
|
1619
|
+
stats["clusters"],
|
|
1620
|
+
stats["noise"],
|
|
1621
|
+
)
|
|
1622
|
+
con = _open_connection(settings)
|
|
1623
|
+
try:
|
|
1624
|
+
tstats = run_terms(con, settings, force=force_cluster)
|
|
1625
|
+
logger.info(
|
|
1626
|
+
"analyze/terms: {} clusters, {} term-rows",
|
|
1627
|
+
tstats["clusters"],
|
|
1628
|
+
tstats["terms"],
|
|
1629
|
+
)
|
|
1630
|
+
finally:
|
|
1631
|
+
con.close()
|
|
1632
|
+
|
|
1633
|
+
# 3. Community detection (non-LLM, runs in parallel conceptually with cluster).
|
|
1634
|
+
if not skip_community:
|
|
1635
|
+
con = _open_connection(settings)
|
|
1636
|
+
try:
|
|
1637
|
+
cstats = run_communities(con, settings, force=force_community)
|
|
1638
|
+
logger.info(
|
|
1639
|
+
"analyze/community: {} sessions, {} communities",
|
|
1640
|
+
cstats["sessions"],
|
|
1641
|
+
cstats["communities"],
|
|
1642
|
+
)
|
|
1643
|
+
finally:
|
|
1644
|
+
con.close()
|
|
1645
|
+
|
|
1646
|
+
# 4. Session classification (LLM).
|
|
1647
|
+
if not skip_classify:
|
|
1648
|
+
con = _open_connection(settings)
|
|
1649
|
+
try:
|
|
1650
|
+
n = classify_sessions(
|
|
1651
|
+
con,
|
|
1652
|
+
settings,
|
|
1653
|
+
since_days=since_days,
|
|
1654
|
+
limit=limit,
|
|
1655
|
+
dry_run=dry_run,
|
|
1656
|
+
no_thinking=no_thinking,
|
|
1657
|
+
)
|
|
1658
|
+
logger.info("analyze/classify: {} sessions (dry_run={})", n, dry_run)
|
|
1659
|
+
finally:
|
|
1660
|
+
con.close()
|
|
1661
|
+
|
|
1662
|
+
# 5. Trajectory (LLM).
|
|
1663
|
+
if not skip_trajectory:
|
|
1664
|
+
con = _open_connection(settings)
|
|
1665
|
+
try:
|
|
1666
|
+
n = trajectory_messages(
|
|
1667
|
+
con,
|
|
1668
|
+
settings,
|
|
1669
|
+
since_days=since_days,
|
|
1670
|
+
limit=limit,
|
|
1671
|
+
dry_run=dry_run,
|
|
1672
|
+
no_thinking=no_thinking,
|
|
1673
|
+
)
|
|
1674
|
+
logger.info("analyze/trajectory: {} messages (dry_run={})", n, dry_run)
|
|
1675
|
+
finally:
|
|
1676
|
+
con.close()
|
|
1677
|
+
|
|
1678
|
+
# 6. Conflicts (LLM, requires full session context).
|
|
1679
|
+
if not skip_conflicts:
|
|
1680
|
+
con = _open_connection(settings)
|
|
1681
|
+
try:
|
|
1682
|
+
n = detect_conflicts(
|
|
1683
|
+
con,
|
|
1684
|
+
settings,
|
|
1685
|
+
since_days=since_days,
|
|
1686
|
+
limit=limit,
|
|
1687
|
+
dry_run=dry_run,
|
|
1688
|
+
no_thinking=no_thinking,
|
|
1689
|
+
)
|
|
1690
|
+
logger.info("analyze/conflicts: {} sessions (dry_run={})", n, dry_run)
|
|
1691
|
+
finally:
|
|
1692
|
+
con.close()
|
|
1693
|
+
|
|
1694
|
+
# 7. Friction (LLM, short-message scope).
|
|
1695
|
+
if not skip_friction:
|
|
1696
|
+
con = _open_connection(settings)
|
|
1697
|
+
try:
|
|
1698
|
+
n = detect_user_friction(
|
|
1699
|
+
con,
|
|
1700
|
+
settings,
|
|
1701
|
+
since_days=since_days,
|
|
1702
|
+
limit=limit,
|
|
1703
|
+
dry_run=dry_run,
|
|
1704
|
+
no_thinking=no_thinking,
|
|
1705
|
+
)
|
|
1706
|
+
logger.info("analyze/friction: {} rows (dry_run={})", n, dry_run)
|
|
1707
|
+
finally:
|
|
1708
|
+
con.close()
|
|
1709
|
+
|
|
1710
|
+
logger.info("analyze: done")
|
|
1711
|
+
|
|
1712
|
+
|
|
1713
|
+
@app.command(name="judges")
|
|
1714
|
+
def judges_cmd(*, common: Common | None = None) -> None:
|
|
1715
|
+
"""List the cross-provider Bedrock judge catalog (shortname, model ID, family, notes)."""
|
|
1716
|
+
_configure(common)
|
|
1717
|
+
fmt = _fmt(common)
|
|
1718
|
+
rows = [
|
|
1719
|
+
{
|
|
1720
|
+
"shortname": j.shortname,
|
|
1721
|
+
"model_id": j.model_id,
|
|
1722
|
+
"provider": j.provider,
|
|
1723
|
+
"family": j.family,
|
|
1724
|
+
"role": j.role,
|
|
1725
|
+
"notes": j.notes,
|
|
1726
|
+
}
|
|
1727
|
+
for j in _judge_catalog.catalog()
|
|
1728
|
+
]
|
|
1729
|
+
df = pl.DataFrame(rows)
|
|
1730
|
+
emit_dataframe(df, fmt=fmt)
|
|
1731
|
+
|
|
1732
|
+
|
|
1733
|
+
@app.command(name="freeze")
|
|
1734
|
+
def freeze_cmd(
|
|
1735
|
+
rubric: Path,
|
|
1736
|
+
/,
|
|
1737
|
+
*,
|
|
1738
|
+
panel: str,
|
|
1739
|
+
embed_model: str = "global.cohere.embed-v4:0",
|
|
1740
|
+
seed: int = 42,
|
|
1741
|
+
min_turns: int = 10,
|
|
1742
|
+
max_turns: int = 40,
|
|
1743
|
+
common: Common | None = None,
|
|
1744
|
+
) -> None:
|
|
1745
|
+
"""Pre-register a study: write an immutable manifest under ~/.claude/studies/<sha>/.
|
|
1746
|
+
|
|
1747
|
+
``panel`` is a comma-separated list of judge shortnames (see ``claude-sql
|
|
1748
|
+
judges``). The returned SHA is what every downstream worker consumes.
|
|
1749
|
+
"""
|
|
1750
|
+
_configure(common)
|
|
1751
|
+
fmt = _fmt(common)
|
|
1752
|
+
panel_list = [s.strip() for s in panel.split(",") if s.strip()]
|
|
1753
|
+
if not panel_list:
|
|
1754
|
+
raise InputValidationError("--panel must have at least one shortname")
|
|
1755
|
+
scope = _freeze.SessionScope(min_turns=min_turns, max_turns=max_turns)
|
|
1756
|
+
study = _freeze.freeze(
|
|
1757
|
+
rubric_path=rubric,
|
|
1758
|
+
panel_shortnames=tuple(panel_list),
|
|
1759
|
+
embed_model_id=embed_model,
|
|
1760
|
+
session_scope=scope,
|
|
1761
|
+
seed=seed,
|
|
1762
|
+
)
|
|
1763
|
+
emit_json(
|
|
1764
|
+
{
|
|
1765
|
+
"manifest_sha": study.manifest_sha,
|
|
1766
|
+
"rubric_path": study.rubric_path,
|
|
1767
|
+
"panel_shortnames": list(study.panel_shortnames),
|
|
1768
|
+
"commit_sha": study.commit_sha,
|
|
1769
|
+
"created_at_utc": study.created_at_utc,
|
|
1770
|
+
},
|
|
1771
|
+
fmt=fmt,
|
|
1772
|
+
)
|
|
1773
|
+
|
|
1774
|
+
|
|
1775
|
+
@app.command(name="replay")
|
|
1776
|
+
def replay_cmd(manifest_sha: str, /, *, common: Common | None = None) -> None:
|
|
1777
|
+
"""Load and echo a frozen study manifest by SHA."""
|
|
1778
|
+
_configure(common)
|
|
1779
|
+
fmt = _fmt(common)
|
|
1780
|
+
study = _freeze.replay(manifest_sha)
|
|
1781
|
+
emit_json(study.to_dict(), fmt=fmt)
|
|
1782
|
+
|
|
1783
|
+
|
|
1784
|
+
@app.command(name="blind-handover")
|
|
1785
|
+
def blind_handover_cmd(
|
|
1786
|
+
input_path: Path,
|
|
1787
|
+
/,
|
|
1788
|
+
output_path: Path,
|
|
1789
|
+
*,
|
|
1790
|
+
common: Common | None = None,
|
|
1791
|
+
) -> None:
|
|
1792
|
+
"""Strip identity markers from a parquet of sessions for grader-safe handover.
|
|
1793
|
+
|
|
1794
|
+
Input parquet must have (session_id, text) columns. Writes the same
|
|
1795
|
+
parquet with text stripped and an ``original_hash`` column added.
|
|
1796
|
+
"""
|
|
1797
|
+
_configure(common)
|
|
1798
|
+
df = pl.read_parquet(input_path)
|
|
1799
|
+
required = {"session_id", "text"}
|
|
1800
|
+
missing = required - set(df.columns)
|
|
1801
|
+
if missing:
|
|
1802
|
+
raise InputValidationError(f"input parquet missing columns: {sorted(missing)}")
|
|
1803
|
+
stripped = [_blind_handover.strip_text(t) for t in df["text"].to_list()]
|
|
1804
|
+
out = df.with_columns(
|
|
1805
|
+
pl.Series("text", [r.text for r in stripped]),
|
|
1806
|
+
pl.Series(
|
|
1807
|
+
"original_hash",
|
|
1808
|
+
[_blind_handover.original_hash(s) for s in df["session_id"].to_list()],
|
|
1809
|
+
),
|
|
1810
|
+
)
|
|
1811
|
+
out.write_parquet(output_path)
|
|
1812
|
+
logger.info("blind-handover: wrote {} stripped rows to {}", out.height, output_path)
|
|
1813
|
+
|
|
1814
|
+
|
|
1815
|
+
@app.command(name="judge")
|
|
1816
|
+
def judge_cmd(
|
|
1817
|
+
manifest_sha: str,
|
|
1818
|
+
/,
|
|
1819
|
+
*,
|
|
1820
|
+
sessions_parquet: Path,
|
|
1821
|
+
output_parquet: Path,
|
|
1822
|
+
dry_run: bool = True,
|
|
1823
|
+
concurrency: int = 4,
|
|
1824
|
+
region: str = "us-east-1",
|
|
1825
|
+
common: Common | None = None,
|
|
1826
|
+
) -> None:
|
|
1827
|
+
"""Dispatch a frozen study's judge panel over a sessions parquet.
|
|
1828
|
+
|
|
1829
|
+
``sessions_parquet`` must have (session_id, text) columns. Defaults to
|
|
1830
|
+
``--dry-run`` per the project cost-guard convention.
|
|
1831
|
+
"""
|
|
1832
|
+
_configure(common)
|
|
1833
|
+
fmt = _fmt(common)
|
|
1834
|
+
study = _freeze.replay(manifest_sha)
|
|
1835
|
+
df = pl.read_parquet(sessions_parquet)
|
|
1836
|
+
required = {"session_id", "text"}
|
|
1837
|
+
missing = required - set(df.columns)
|
|
1838
|
+
if missing:
|
|
1839
|
+
raise InputValidationError(f"sessions parquet missing columns: {sorted(missing)}")
|
|
1840
|
+
sessions = list(zip(df["session_id"].to_list(), df["text"].to_list(), strict=True))
|
|
1841
|
+
result = _judge_worker.run(
|
|
1842
|
+
sessions=sessions,
|
|
1843
|
+
panel_shortnames=list(study.panel_shortnames),
|
|
1844
|
+
rubric_yaml_path=Path(study.rubric_path),
|
|
1845
|
+
freeze_sha=study.manifest_sha,
|
|
1846
|
+
out_parquet=output_parquet,
|
|
1847
|
+
dry_run=dry_run,
|
|
1848
|
+
concurrency=concurrency,
|
|
1849
|
+
region=region,
|
|
1850
|
+
)
|
|
1851
|
+
if isinstance(result, _judge_worker.GradePlan):
|
|
1852
|
+
emit_json(
|
|
1853
|
+
{
|
|
1854
|
+
"dry_run": True,
|
|
1855
|
+
"n_sessions": result.n_sessions,
|
|
1856
|
+
"n_judges": result.n_judges,
|
|
1857
|
+
"n_axes": result.n_axes,
|
|
1858
|
+
"n_calls": result.n_calls,
|
|
1859
|
+
"est_input_tokens": result.est_input_tokens,
|
|
1860
|
+
"est_output_tokens": result.est_output_tokens,
|
|
1861
|
+
"est_usd": result.est_usd,
|
|
1862
|
+
},
|
|
1863
|
+
fmt=fmt,
|
|
1864
|
+
)
|
|
1865
|
+
else:
|
|
1866
|
+
emit_json({"dry_run": False, "n_scores": len(result), "out": str(output_parquet)}, fmt=fmt)
|
|
1867
|
+
|
|
1868
|
+
|
|
1869
|
+
@app.command(name="ungrounded-claim")
|
|
1870
|
+
def ungrounded_cmd(
|
|
1871
|
+
manifest_sha: str,
|
|
1872
|
+
/,
|
|
1873
|
+
*,
|
|
1874
|
+
turns_parquet: Path,
|
|
1875
|
+
output_parquet: Path,
|
|
1876
|
+
common: Common | None = None,
|
|
1877
|
+
) -> None:
|
|
1878
|
+
"""Run the ungrounded-claim detector over a turns parquet.
|
|
1879
|
+
|
|
1880
|
+
``turns_parquet`` needs (session_id, turn_idx, assistant_text,
|
|
1881
|
+
tool_output_text) columns. Writes per-claim grounded flags.
|
|
1882
|
+
"""
|
|
1883
|
+
_configure(common)
|
|
1884
|
+
fmt = _fmt(common)
|
|
1885
|
+
study = _freeze.replay(manifest_sha)
|
|
1886
|
+
df = pl.read_parquet(turns_parquet)
|
|
1887
|
+
required = {"session_id", "turn_idx", "assistant_text", "tool_output_text"}
|
|
1888
|
+
missing = required - set(df.columns)
|
|
1889
|
+
if missing:
|
|
1890
|
+
raise InputValidationError(f"turns parquet missing columns: {sorted(missing)}")
|
|
1891
|
+
turns = [
|
|
1892
|
+
_ungrounded_worker.Turn(
|
|
1893
|
+
session_id=row["session_id"],
|
|
1894
|
+
turn_idx=int(row["turn_idx"]),
|
|
1895
|
+
assistant_text=row["assistant_text"],
|
|
1896
|
+
tool_output_text=row["tool_output_text"],
|
|
1897
|
+
)
|
|
1898
|
+
for row in df.iter_rows(named=True)
|
|
1899
|
+
]
|
|
1900
|
+
out = _ungrounded_worker.detect(turns, freeze_sha=study.manifest_sha)
|
|
1901
|
+
_ungrounded_worker.to_parquet(out, output_parquet)
|
|
1902
|
+
summary = _ungrounded_worker.summarize(out)
|
|
1903
|
+
emit_dataframe(summary, fmt=fmt)
|
|
1904
|
+
|
|
1905
|
+
|
|
1906
|
+
@app.command(name="kappa")
|
|
1907
|
+
def kappa_cmd(
|
|
1908
|
+
scores_parquet: Path,
|
|
1909
|
+
/,
|
|
1910
|
+
*,
|
|
1911
|
+
bootstrap: int = 1000,
|
|
1912
|
+
floor: float = 0.6,
|
|
1913
|
+
delta_gate: Path | None = None,
|
|
1914
|
+
common: Common | None = None,
|
|
1915
|
+
) -> None:
|
|
1916
|
+
"""Compute Cohen's + Fleiss' kappa with bootstrapped 95% CI.
|
|
1917
|
+
|
|
1918
|
+
Exits non-zero (66) if any axis has Fleiss kappa below ``--floor`` OR
|
|
1919
|
+
if ``--delta-gate <prior.parquet>`` is set and the delta-kappa CI
|
|
1920
|
+
excludes zero on any axis (pre-registered stopping rule).
|
|
1921
|
+
"""
|
|
1922
|
+
_configure(common)
|
|
1923
|
+
fmt = _fmt(common)
|
|
1924
|
+
df = _kappa_worker.load_scores(scores_parquet)
|
|
1925
|
+
pairs = _kappa_worker.compute_pairwise(df, n_bootstrap=bootstrap)
|
|
1926
|
+
fleiss = _kappa_worker.compute_fleiss(df, n_bootstrap=bootstrap)
|
|
1927
|
+
report = {
|
|
1928
|
+
"pairs": [
|
|
1929
|
+
{
|
|
1930
|
+
"axis": p.axis,
|
|
1931
|
+
"judge_a": p.judge_a,
|
|
1932
|
+
"judge_b": p.judge_b,
|
|
1933
|
+
"n_items": p.n_items,
|
|
1934
|
+
"kappa": round(p.kappa, 4),
|
|
1935
|
+
"ci_low": round(p.ci_low, 4),
|
|
1936
|
+
"ci_high": round(p.ci_high, 4),
|
|
1937
|
+
}
|
|
1938
|
+
for p in pairs
|
|
1939
|
+
],
|
|
1940
|
+
"fleiss": [
|
|
1941
|
+
{
|
|
1942
|
+
"axis": f.axis,
|
|
1943
|
+
"n_judges": f.n_judges,
|
|
1944
|
+
"n_items": f.n_items,
|
|
1945
|
+
"kappa": round(f.kappa, 4),
|
|
1946
|
+
"ci_low": round(f.ci_low, 4),
|
|
1947
|
+
"ci_high": round(f.ci_high, 4),
|
|
1948
|
+
"below_floor": f.kappa < floor,
|
|
1949
|
+
}
|
|
1950
|
+
for f in fleiss
|
|
1951
|
+
],
|
|
1952
|
+
"floor": floor,
|
|
1953
|
+
}
|
|
1954
|
+
any_gate_tripped = any(row["below_floor"] for row in report["fleiss"])
|
|
1955
|
+
if delta_gate is not None:
|
|
1956
|
+
prior_df = _kappa_worker.load_scores(delta_gate)
|
|
1957
|
+
prior_fleiss = {
|
|
1958
|
+
f.axis: f for f in _kappa_worker.compute_fleiss(prior_df, n_bootstrap=bootstrap)
|
|
1959
|
+
}
|
|
1960
|
+
delta_rows = []
|
|
1961
|
+
for cur in fleiss:
|
|
1962
|
+
prior = prior_fleiss.get(cur.axis)
|
|
1963
|
+
if prior is None:
|
|
1964
|
+
continue
|
|
1965
|
+
tripped = _kappa_worker.delta_gate_excludes_zero(cur, prior, n_bootstrap=bootstrap)
|
|
1966
|
+
delta_rows.append(
|
|
1967
|
+
{
|
|
1968
|
+
"axis": cur.axis,
|
|
1969
|
+
"delta_excludes_zero": tripped,
|
|
1970
|
+
"current_kappa": cur.kappa,
|
|
1971
|
+
"prior_kappa": prior.kappa,
|
|
1972
|
+
}
|
|
1973
|
+
)
|
|
1974
|
+
any_gate_tripped = any_gate_tripped or tripped
|
|
1975
|
+
report["delta_gate"] = delta_rows
|
|
1976
|
+
emit_json(report, fmt=fmt)
|
|
1977
|
+
if any_gate_tripped:
|
|
1978
|
+
sys.exit(66)
|
|
1979
|
+
|
|
1980
|
+
|
|
1981
|
+
@app.command(name="bind")
|
|
1982
|
+
def bind_cmd(
|
|
1983
|
+
*,
|
|
1984
|
+
repo: Path | None = None,
|
|
1985
|
+
commit_msg: Path | None = None,
|
|
1986
|
+
dry_run: bool = False,
|
|
1987
|
+
common: Common | None = None,
|
|
1988
|
+
) -> None:
|
|
1989
|
+
"""Attach the transcript-PR binding (trailers + git-notes JSON) to a commit.
|
|
1990
|
+
|
|
1991
|
+
Pre-commit-hook entry point per RFC 0001 (see
|
|
1992
|
+
``docs/rfc/0001-transcript-pr-binding.md``). Wires into a
|
|
1993
|
+
``prepare-commit-msg`` lefthook job so the trailer lands in the
|
|
1994
|
+
user's editor before they confirm the message.
|
|
1995
|
+
|
|
1996
|
+
Discovery order for the commit-message file:
|
|
1997
|
+
|
|
1998
|
+
1. ``--commit-msg PATH`` flag if set.
|
|
1999
|
+
2. ``GIT_PARAMS`` / ``$1`` from the hook -- we re-read it from
|
|
2000
|
+
the ``CLAUDE_SQL_BIND_COMMIT_MSG`` env var, which is the
|
|
2001
|
+
lefthook-friendly way to pass the hook's ``{0}`` arg through.
|
|
2002
|
+
3. ``<repo>/.git/COMMIT_EDITMSG`` as a last-ditch fallback.
|
|
2003
|
+
|
|
2004
|
+
Resolves the active transcript via
|
|
2005
|
+
:func:`claude_sql.binding.find_active_transcript` (latest mtime
|
|
2006
|
+
under ``~/.claude/projects/<projectified-cwd>/*.jsonl``); when no
|
|
2007
|
+
transcript is found the command exits 0 cleanly without touching
|
|
2008
|
+
the message — bind is best-effort by design.
|
|
2009
|
+
|
|
2010
|
+
With ``--dry-run`` (default ``False``), prints the planned
|
|
2011
|
+
binding as JSON and writes nothing. Off ``--dry-run``, writes
|
|
2012
|
+
the three trailers in place and a JSON note under
|
|
2013
|
+
``refs/notes/transcripts``.
|
|
2014
|
+
"""
|
|
2015
|
+
_configure(common)
|
|
2016
|
+
fmt = _fmt(common)
|
|
2017
|
+
repo_path = repo.resolve() if repo is not None else _binding._resolve_repo(None)
|
|
2018
|
+
cwd = Path.cwd()
|
|
2019
|
+
transcript = _binding.find_active_transcript(cwd)
|
|
2020
|
+
if transcript is None:
|
|
2021
|
+
emit_json(
|
|
2022
|
+
{
|
|
2023
|
+
"bound": False,
|
|
2024
|
+
"reason": "no-active-transcript",
|
|
2025
|
+
"cwd": str(cwd),
|
|
2026
|
+
"projects_dir": f"~/.claude/projects/{_binding.projectify(cwd)}",
|
|
2027
|
+
},
|
|
2028
|
+
fmt=fmt,
|
|
2029
|
+
)
|
|
2030
|
+
return
|
|
2031
|
+
binding = _binding.build_binding(transcript_path=transcript)
|
|
2032
|
+
|
|
2033
|
+
msg_path: Path | None = commit_msg
|
|
2034
|
+
if msg_path is None:
|
|
2035
|
+
env_path = os.environ.get("CLAUDE_SQL_BIND_COMMIT_MSG")
|
|
2036
|
+
if env_path:
|
|
2037
|
+
msg_path = Path(env_path)
|
|
2038
|
+
if msg_path is None:
|
|
2039
|
+
candidate = repo_path / ".git" / "COMMIT_EDITMSG"
|
|
2040
|
+
if candidate.exists():
|
|
2041
|
+
msg_path = candidate
|
|
2042
|
+
|
|
2043
|
+
if dry_run:
|
|
2044
|
+
emit_json(
|
|
2045
|
+
{
|
|
2046
|
+
"bound": False,
|
|
2047
|
+
"dry_run": True,
|
|
2048
|
+
"transcript_path": str(transcript),
|
|
2049
|
+
"binding": binding.to_dict(),
|
|
2050
|
+
"note_payload": binding.to_note_payload(),
|
|
2051
|
+
"commit_msg_path": str(msg_path) if msg_path else None,
|
|
2052
|
+
"repo": str(repo_path),
|
|
2053
|
+
},
|
|
2054
|
+
fmt=fmt,
|
|
2055
|
+
)
|
|
2056
|
+
return
|
|
2057
|
+
|
|
2058
|
+
if msg_path is None:
|
|
2059
|
+
err = ClassifiedError(
|
|
2060
|
+
kind="invalid_input",
|
|
2061
|
+
exit_code=EXIT_CODES["invalid_input"],
|
|
2062
|
+
message="no commit-message file found; pass --commit-msg or run from a prepare-commit-msg hook",
|
|
2063
|
+
hint="set --commit-msg PATH or CLAUDE_SQL_BIND_COMMIT_MSG=$1 in your hook",
|
|
2064
|
+
)
|
|
2065
|
+
emit_error(err, fmt)
|
|
2066
|
+
sys.exit(err.exit_code)
|
|
2067
|
+
|
|
2068
|
+
try:
|
|
2069
|
+
_binding.write_trailer(msg_path, binding)
|
|
2070
|
+
except _binding.GitInvocationError as exc:
|
|
2071
|
+
err = ClassifiedError(
|
|
2072
|
+
kind="runtime_error",
|
|
2073
|
+
exit_code=EXIT_CODES["runtime_error"],
|
|
2074
|
+
message=f"git interpret-trailers failed: {exc.stderr.strip()}",
|
|
2075
|
+
hint=None,
|
|
2076
|
+
)
|
|
2077
|
+
emit_error(err, fmt)
|
|
2078
|
+
sys.exit(err.exit_code)
|
|
2079
|
+
|
|
2080
|
+
# Note write is best-effort: we have a HEAD commit only when bind
|
|
2081
|
+
# runs *after* the commit (e.g., post-commit hook). In a
|
|
2082
|
+
# prepare-commit-msg flow the commit doesn't exist yet, so we skip
|
|
2083
|
+
# the note here and the integration relies on a separate
|
|
2084
|
+
# post-commit step. When the caller is invoking us with --commit
|
|
2085
|
+
# already created (e.g., backfill), they pass --no-dry-run with a
|
|
2086
|
+
# repo containing HEAD.
|
|
2087
|
+
head_cp = _binding._run_git(
|
|
2088
|
+
["git", "-C", str(repo_path), "rev-parse", "HEAD"],
|
|
2089
|
+
)
|
|
2090
|
+
if head_cp.returncode == 0:
|
|
2091
|
+
commit_sha = head_cp.stdout.strip()
|
|
2092
|
+
try:
|
|
2093
|
+
_binding.write_note(repo_path, commit_sha, binding)
|
|
2094
|
+
except _binding.GitInvocationError as exc:
|
|
2095
|
+
logger.warning("git notes write failed (non-fatal): {}", exc.stderr.strip())
|
|
2096
|
+
commit_sha = ""
|
|
2097
|
+
else:
|
|
2098
|
+
commit_sha = ""
|
|
2099
|
+
|
|
2100
|
+
emit_json(
|
|
2101
|
+
{
|
|
2102
|
+
"bound": True,
|
|
2103
|
+
"dry_run": False,
|
|
2104
|
+
"transcript_path": str(transcript),
|
|
2105
|
+
"binding": binding.to_dict(),
|
|
2106
|
+
"commit_msg_path": str(msg_path),
|
|
2107
|
+
"repo": str(repo_path),
|
|
2108
|
+
"commit_sha": commit_sha,
|
|
2109
|
+
},
|
|
2110
|
+
fmt=fmt,
|
|
2111
|
+
)
|
|
2112
|
+
|
|
2113
|
+
|
|
2114
|
+
@app.command(name="resolve")
|
|
2115
|
+
def resolve_cmd(
|
|
2116
|
+
commit_sha: str,
|
|
2117
|
+
/,
|
|
2118
|
+
*,
|
|
2119
|
+
repo: Path | None = None,
|
|
2120
|
+
all_sources: bool = False,
|
|
2121
|
+
common: Common | None = None,
|
|
2122
|
+
) -> None:
|
|
2123
|
+
"""Resolve a commit's bound transcript per RFC 0001 §Resolution precedence.
|
|
2124
|
+
|
|
2125
|
+
Reads the ``Claude-Transcript-*`` trailers first; falls back to
|
|
2126
|
+
the JSON note under ``refs/notes/transcripts``; raises a loud
|
|
2127
|
+
error (exit 70) when both surfaces disagree on the digest.
|
|
2128
|
+
Returns the parsed binding as JSON.
|
|
2129
|
+
|
|
2130
|
+
Flags
|
|
2131
|
+
-----
|
|
2132
|
+
--repo PATH
|
|
2133
|
+
Repository root. Defaults to ``git rev-parse --show-toplevel``
|
|
2134
|
+
from the current cwd.
|
|
2135
|
+
--all-sources
|
|
2136
|
+
Return ``{"trailer": ..., "note": ...}`` instead of merging.
|
|
2137
|
+
Diagnostic flow for investigating mismatches; never raises on
|
|
2138
|
+
disagreement.
|
|
2139
|
+
|
|
2140
|
+
Exit codes
|
|
2141
|
+
----------
|
|
2142
|
+
* 0 binding resolved cleanly (or ``--all-sources`` returned both)
|
|
2143
|
+
* 2 commit has no binding (no trailer, no note)
|
|
2144
|
+
* 65 commit not found / git invocation failed
|
|
2145
|
+
* 70 trailer and note disagree on digest
|
|
2146
|
+
"""
|
|
2147
|
+
_configure(common)
|
|
2148
|
+
fmt = _fmt(common)
|
|
2149
|
+
repo_path = repo.resolve() if repo is not None else None
|
|
2150
|
+
try:
|
|
2151
|
+
if all_sources:
|
|
2152
|
+
sources = _binding.resolve_all_sources(commit_sha, repo=repo_path)
|
|
2153
|
+
payload: dict[str, dict[str, str] | None] = {
|
|
2154
|
+
"trailer": sources["trailer"].to_dict() if sources["trailer"] is not None else None,
|
|
2155
|
+
"note": sources["note"].to_dict() if sources["note"] is not None else None,
|
|
2156
|
+
}
|
|
2157
|
+
emit_json(payload, fmt=fmt)
|
|
2158
|
+
return
|
|
2159
|
+
binding = _binding.resolve_commit_to_transcript(commit_sha, repo=repo_path)
|
|
2160
|
+
except _binding.BindingMismatchError as exc:
|
|
2161
|
+
err = ClassifiedError(
|
|
2162
|
+
kind="runtime_error",
|
|
2163
|
+
exit_code=EXIT_CODES["runtime_error"],
|
|
2164
|
+
message=str(exc),
|
|
2165
|
+
hint="run `claude-sql resolve <sha> --all-sources` to see both surfaces",
|
|
2166
|
+
)
|
|
2167
|
+
emit_error(err, fmt)
|
|
2168
|
+
sys.exit(err.exit_code)
|
|
2169
|
+
except LookupError as exc:
|
|
2170
|
+
err = ClassifiedError(
|
|
2171
|
+
kind="no_embeddings", # re-uses the "absent-but-not-broken" kind
|
|
2172
|
+
exit_code=EXIT_CODES["no_embeddings"],
|
|
2173
|
+
message=str(exc),
|
|
2174
|
+
hint="commit has no Claude-Transcript-* trailer and no refs/notes/transcripts entry",
|
|
2175
|
+
)
|
|
2176
|
+
emit_error(err, fmt)
|
|
2177
|
+
sys.exit(err.exit_code)
|
|
2178
|
+
except _binding.GitInvocationError as exc:
|
|
2179
|
+
err = ClassifiedError(
|
|
2180
|
+
kind="catalog_error",
|
|
2181
|
+
exit_code=EXIT_CODES["catalog_error"],
|
|
2182
|
+
message=f"git invocation failed: {exc.stderr.strip()}",
|
|
2183
|
+
hint="check that the commit SHA exists in --repo",
|
|
2184
|
+
)
|
|
2185
|
+
emit_error(err, fmt)
|
|
2186
|
+
sys.exit(err.exit_code)
|
|
2187
|
+
|
|
2188
|
+
emit_json(binding.to_dict(), fmt=fmt)
|
|
2189
|
+
|
|
2190
|
+
|
|
2191
|
+
def _review_sheet_format(common: Common | None) -> OutputFormat:
|
|
2192
|
+
"""Pick the review-sheet effective format.
|
|
2193
|
+
|
|
2194
|
+
Default policy diverges from every other subcommand: review-sheet
|
|
2195
|
+
output is human-first prose, so ``AUTO`` resolves to ``MARKDOWN`` on
|
|
2196
|
+
a TTY (override of the global ``TABLE`` default) and ``JSON``
|
|
2197
|
+
off-TTY. Explicit ``--format`` flags pass through unchanged so
|
|
2198
|
+
agents can still pin ``--format json`` regardless of TTY state.
|
|
2199
|
+
"""
|
|
2200
|
+
fmt = _fmt(common)
|
|
2201
|
+
if fmt is not OutputFormat.AUTO:
|
|
2202
|
+
return fmt
|
|
2203
|
+
return OutputFormat.MARKDOWN if sys.stdout.isatty() else OutputFormat.JSON
|
|
2204
|
+
|
|
2205
|
+
|
|
2206
|
+
@app.command(name="review-sheet")
|
|
2207
|
+
def review_sheet_cmd(
|
|
2208
|
+
commit_sha: str,
|
|
2209
|
+
/,
|
|
2210
|
+
*,
|
|
2211
|
+
repo: Path | None = None,
|
|
2212
|
+
no_thinking: bool = False,
|
|
2213
|
+
dry_run: bool = True,
|
|
2214
|
+
common: Common | None = None,
|
|
2215
|
+
) -> None:
|
|
2216
|
+
"""Render a compressed PR review sheet for a merged commit.
|
|
2217
|
+
|
|
2218
|
+
Resolves the commit's bound transcript via
|
|
2219
|
+
:func:`claude_sql.binding.resolve_commit_to_transcript` (RFC 0001
|
|
2220
|
+
precedence: trailer first, note fallback, loud failure on
|
|
2221
|
+
disagreement), flattens the JSONL into a single review text, and
|
|
2222
|
+
asks Sonnet 4.6 — via ``output_config.format`` structured output —
|
|
2223
|
+
to populate the :class:`PRReviewSheet` schema.
|
|
2224
|
+
|
|
2225
|
+
Defaults to ``--dry-run`` per the project cost-guard convention.
|
|
2226
|
+
Dry-run prints a plan dict (commit_sha, transcript_uri,
|
|
2227
|
+
transcript_digest, model_id, prompt_chars_estimate) and skips the
|
|
2228
|
+
Bedrock call.
|
|
2229
|
+
|
|
2230
|
+
Output format
|
|
2231
|
+
-------------
|
|
2232
|
+
On a TTY ``--format auto`` resolves to ``markdown`` (the
|
|
2233
|
+
human-readable review-sheet shape). Off-TTY it resolves to
|
|
2234
|
+
``json`` so agents get machine-readable output without a flag. Pass
|
|
2235
|
+
``--format json`` / ``--format markdown`` explicitly to override.
|
|
2236
|
+
Dry-run always emits JSON regardless of the selected format —
|
|
2237
|
+
plan output is structured by design.
|
|
2238
|
+
|
|
2239
|
+
Exit codes
|
|
2240
|
+
----------
|
|
2241
|
+
* 0 review sheet rendered (or refused; refusal still exits 0 with
|
|
2242
|
+
``{"refused": true}`` in the payload).
|
|
2243
|
+
* 2 commit has no binding (no trailer, no note).
|
|
2244
|
+
* 65 commit not found / git invocation failed.
|
|
2245
|
+
* 70 trailer and note disagree on digest.
|
|
2246
|
+
"""
|
|
2247
|
+
_configure(common)
|
|
2248
|
+
fmt = _review_sheet_format(common)
|
|
2249
|
+
settings = _resolve_settings(common)
|
|
2250
|
+
repo_path = repo.resolve() if repo is not None else None
|
|
2251
|
+
|
|
2252
|
+
try:
|
|
2253
|
+
# Resolve up-front so the worker's binding lookup uses the same repo
|
|
2254
|
+
# (the worker re-runs resolve internally when ``transcript_uri_override``
|
|
2255
|
+
# is unset; we pre-resolve so we can map LookupError / mismatch errors
|
|
2256
|
+
# to the canonical CLI exit codes before opening a DuckDB connection).
|
|
2257
|
+
binding = _binding.resolve_commit_to_transcript(commit_sha, repo=repo_path)
|
|
2258
|
+
except _binding.BindingMismatchError as exc:
|
|
2259
|
+
err = ClassifiedError(
|
|
2260
|
+
kind="runtime_error",
|
|
2261
|
+
exit_code=EXIT_CODES["runtime_error"],
|
|
2262
|
+
message=str(exc),
|
|
2263
|
+
hint="run `claude-sql resolve <sha> --all-sources` to see both surfaces",
|
|
2264
|
+
)
|
|
2265
|
+
emit_error(err, fmt)
|
|
2266
|
+
sys.exit(err.exit_code)
|
|
2267
|
+
except LookupError as exc:
|
|
2268
|
+
err = ClassifiedError(
|
|
2269
|
+
kind="no_embeddings",
|
|
2270
|
+
exit_code=EXIT_CODES["no_embeddings"],
|
|
2271
|
+
message=str(exc),
|
|
2272
|
+
hint="commit has no Claude-Transcript-* trailer and no refs/notes/transcripts entry",
|
|
2273
|
+
)
|
|
2274
|
+
emit_error(err, fmt)
|
|
2275
|
+
sys.exit(err.exit_code)
|
|
2276
|
+
except _binding.GitInvocationError as exc:
|
|
2277
|
+
err = ClassifiedError(
|
|
2278
|
+
kind="catalog_error",
|
|
2279
|
+
exit_code=EXIT_CODES["catalog_error"],
|
|
2280
|
+
message=f"git invocation failed: {exc.stderr.strip()}",
|
|
2281
|
+
hint="check that the commit SHA exists in --repo",
|
|
2282
|
+
)
|
|
2283
|
+
emit_error(err, fmt)
|
|
2284
|
+
sys.exit(err.exit_code)
|
|
2285
|
+
|
|
2286
|
+
# Hand the resolved URI through the override so the worker doesn't
|
|
2287
|
+
# round-trip to git twice (and so it stays testable without a repo).
|
|
2288
|
+
result = generate_review_sheet(
|
|
2289
|
+
None,
|
|
2290
|
+
settings,
|
|
2291
|
+
commit_sha=commit_sha,
|
|
2292
|
+
transcript_uri_override=binding.uri,
|
|
2293
|
+
dry_run=dry_run,
|
|
2294
|
+
no_thinking=no_thinking,
|
|
2295
|
+
)
|
|
2296
|
+
|
|
2297
|
+
if dry_run:
|
|
2298
|
+
# Plan output is structured regardless of --format choice; users
|
|
2299
|
+
# asking for markdown still get JSON for the plan because there's
|
|
2300
|
+
# no narrative to render yet.
|
|
2301
|
+
plan = result.get("plan", result)
|
|
2302
|
+
emit_json(plan, fmt=OutputFormat.JSON)
|
|
2303
|
+
return
|
|
2304
|
+
|
|
2305
|
+
if result.get("refused"):
|
|
2306
|
+
if fmt is OutputFormat.MARKDOWN:
|
|
2307
|
+
metadata = result.get("metadata") or {"commit_sha": commit_sha}
|
|
2308
|
+
print(render_refusal_markdown(str(result.get("reason", "")), metadata))
|
|
2309
|
+
return
|
|
2310
|
+
emit_json(result, fmt=fmt)
|
|
2311
|
+
return
|
|
2312
|
+
|
|
2313
|
+
sheet = result.get("sheet") or {}
|
|
2314
|
+
metadata = result.get("metadata") or {}
|
|
2315
|
+
if fmt is OutputFormat.MARKDOWN:
|
|
2316
|
+
print(render_markdown(sheet, metadata))
|
|
2317
|
+
return
|
|
2318
|
+
emit_json({"sheet": sheet, "metadata": metadata}, fmt=fmt)
|
|
2319
|
+
|
|
2320
|
+
|
|
2321
|
+
@app.default
|
|
2322
|
+
def _default(*, common: Common | None = None) -> None:
|
|
2323
|
+
"""Print a hint when ``claude-sql`` is invoked without a subcommand."""
|
|
2324
|
+
del common
|
|
2325
|
+
print("claude-sql - pass a subcommand or --help")
|
|
2326
|
+
print(" schema | query | explain | shell | list-cache")
|
|
2327
|
+
print(" embed | search")
|
|
2328
|
+
print(" classify | trajectory | conflicts | friction | cluster | terms | community | analyze")
|
|
2329
|
+
print(" judges | freeze | replay | judge | ungrounded-claim | kappa | blind-handover")
|
|
2330
|
+
print(" bind | resolve | review-sheet")
|
|
2331
|
+
|
|
2332
|
+
|
|
2333
|
+
# ---------------------------------------------------------------------------
|
|
2334
|
+
# Entry point
|
|
2335
|
+
# ---------------------------------------------------------------------------
|
|
2336
|
+
|
|
2337
|
+
|
|
2338
|
+
def main() -> None:
|
|
2339
|
+
"""Entry point wired into ``[project.scripts]`` in ``pyproject.toml``."""
|
|
2340
|
+
app()
|
|
2341
|
+
|
|
2342
|
+
|
|
2343
|
+
if __name__ == "__main__":
|
|
2344
|
+
main()
|