claude-sql 0.5.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {claude_sql-0.5.0 → claude_sql-0.6.0}/PKG-INFO +2 -1
- {claude_sql-0.5.0 → claude_sql-0.6.0}/README.md +1 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/pyproject.toml +1 -1
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/cli.py +203 -74
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/config.py +2 -1
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/output.py +10 -4
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/sql_views.py +570 -257
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/__init__.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/binding.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/blind_handover.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/checkpointer.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/cluster_worker.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/community_worker.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/embed_worker.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/freeze.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/friction_worker.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/install_source.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/judge_worker.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/judges.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/kappa_worker.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/llm_worker.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/logging_setup.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/parquet_shards.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/retry_queue.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/review_sheet_render.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/review_sheet_worker.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/schemas.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/session_text.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/skills_catalog.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/terms_worker.py +0 -0
- {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/ungrounded_worker.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: claude-sql
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Zero-copy SQL + semantic search + LLM analytics over ~/.claude/ transcripts.
|
|
5
5
|
Keywords: claude,claude-code,anthropic,duckdb,sql,semantic-search,embeddings,bedrock,transcripts,analytics,observability
|
|
6
6
|
Author: Laith Al-Saadoon
|
|
@@ -336,6 +336,7 @@ Commands that spend real Bedrock money default to `--dry-run`.
|
|
|
336
336
|
|
|
337
337
|
| Macro | Signature | What it does |
|
|
338
338
|
|---|---|---|
|
|
339
|
+
| `ago(interval_text)` | scalar → `TIMESTAMP` | `current_timestamp - INTERVAL <text>` -- e.g. `WHERE ts >= ago('30 days')` |
|
|
339
340
|
| `model_used(sid)` | scalar → `VARCHAR` | Latest `model` observed in the session |
|
|
340
341
|
| `cost_estimate(sid)` | scalar → `DOUBLE` | USD spend (dated model IDs prefix-matched) |
|
|
341
342
|
| `tool_rank(last_n_days)` | table | Tool-use leaderboard over a window |
|
|
@@ -290,6 +290,7 @@ Commands that spend real Bedrock money default to `--dry-run`.
|
|
|
290
290
|
|
|
291
291
|
| Macro | Signature | What it does |
|
|
292
292
|
|---|---|---|
|
|
293
|
+
| `ago(interval_text)` | scalar → `TIMESTAMP` | `current_timestamp - INTERVAL <text>` -- e.g. `WHERE ts >= ago('30 days')` |
|
|
293
294
|
| `model_used(sid)` | scalar → `VARCHAR` | Latest `model` observed in the session |
|
|
294
295
|
| `cost_estimate(sid)` | scalar → `DOUBLE` | USD spend (dated model IDs prefix-matched) |
|
|
295
296
|
| `tool_rank(last_n_days)` | table | Tool-use leaderboard over a window |
|
|
@@ -30,6 +30,7 @@ import tempfile
|
|
|
30
30
|
import time
|
|
31
31
|
from dataclasses import dataclass
|
|
32
32
|
from datetime import UTC, datetime
|
|
33
|
+
from enum import StrEnum
|
|
33
34
|
from pathlib import Path
|
|
34
35
|
from typing import Annotated
|
|
35
36
|
|
|
@@ -81,8 +82,11 @@ from claude_sql.parquet_shards import (
|
|
|
81
82
|
from claude_sql.review_sheet_render import render_markdown, render_refusal_markdown
|
|
82
83
|
from claude_sql.review_sheet_worker import generate_review_sheet
|
|
83
84
|
from claude_sql.sql_views import (
|
|
84
|
-
|
|
85
|
-
|
|
85
|
+
MACRO_NAMES,
|
|
86
|
+
MACRO_SIGNATURES,
|
|
87
|
+
VIEW_NAMES,
|
|
88
|
+
VIEW_SCHEMA,
|
|
89
|
+
_parquet_is_populated,
|
|
86
90
|
register_all,
|
|
87
91
|
register_raw,
|
|
88
92
|
register_views,
|
|
@@ -103,12 +107,19 @@ Surfaces at a glance
|
|
|
103
107
|
cluster / terms / community UMAP+HDBSCAN, c-TF-IDF, Leiden+CPM
|
|
104
108
|
analyze composite pipeline over every stage above
|
|
105
109
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
110
|
+
!! FLAG PLACEMENT — flags attach to a SUBCOMMAND, not the binary !!
|
|
111
|
+
-------------------------------------------------------------------
|
|
112
|
+
Every flag (--format, --quiet, --verbose, --glob, --subagent-glob, and
|
|
113
|
+
every per-command flag) goes AFTER the subcommand name. This applies
|
|
114
|
+
to global-feeling flags too: `--quiet` must come after the subcommand.
|
|
115
|
+
|
|
116
|
+
OK:
|
|
109
117
|
claude-sql query --format json "SELECT 1"
|
|
118
|
+
claude-sql schema --quiet --format json
|
|
110
119
|
claude-sql classify --no-dry-run --limit 5
|
|
111
|
-
|
|
120
|
+
FAIL (cyclopts: "Unused Tokens: ['schema']"):
|
|
121
|
+
claude-sql --quiet schema --format json
|
|
122
|
+
FAIL (flag swallowed as subcommand arg):
|
|
112
123
|
claude-sql --format json query "SELECT 1"
|
|
113
124
|
|
|
114
125
|
Output & exit codes
|
|
@@ -244,15 +255,15 @@ def _resolve_memory_limit(limit: str) -> str:
|
|
|
244
255
|
return f"{target_mib}MiB"
|
|
245
256
|
|
|
246
257
|
|
|
247
|
-
def
|
|
248
|
-
"""
|
|
258
|
+
def _apply_duckdb_pragmas(con: duckdb.DuckDBPyConnection, settings: Settings) -> None:
|
|
259
|
+
"""Set the tuning PRAGMAs both connection helpers share.
|
|
249
260
|
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
261
|
+
Centralized so :func:`_open_connection_full` and
|
|
262
|
+
:func:`_open_connection_introspect` stay in sync. Threads, memory_limit,
|
|
263
|
+
and temp_directory all come from ``settings``; the spill directory is
|
|
264
|
+
materialized on disk before DuckDB sees the path because DuckDB will
|
|
265
|
+
happily fail later when it tries to write a spill file.
|
|
254
266
|
"""
|
|
255
|
-
con = duckdb.connect(":memory:")
|
|
256
267
|
settings.duckdb_temp_dir.mkdir(parents=True, exist_ok=True)
|
|
257
268
|
memory_limit = _resolve_memory_limit(settings.duckdb_memory_limit)
|
|
258
269
|
con.execute(f"SET threads = {int(settings.duckdb_threads)}")
|
|
@@ -260,10 +271,48 @@ def _open_connection(settings: Settings) -> duckdb.DuckDBPyConnection:
|
|
|
260
271
|
con.execute(f"SET temp_directory = '{settings.duckdb_temp_dir}'")
|
|
261
272
|
con.execute("SET enable_object_cache = true")
|
|
262
273
|
con.execute("SET preserve_insertion_order = false")
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _open_connection_full(settings: Settings) -> duckdb.DuckDBPyConnection:
|
|
277
|
+
"""Open an in-memory DuckDB connection with every claude-sql object wired.
|
|
278
|
+
|
|
279
|
+
Tuning PRAGMAs are set before view registration so the registration
|
|
280
|
+
queries themselves benefit from the higher thread count and the spill
|
|
281
|
+
directory pointed at real disk (Amazon devboxes ship ``/tmp`` as a
|
|
282
|
+
4 GB tmpfs that thrashes once a clustering run starts spilling).
|
|
283
|
+
"""
|
|
284
|
+
con = duckdb.connect(":memory:")
|
|
285
|
+
_apply_duckdb_pragmas(con, settings)
|
|
263
286
|
register_all(con, settings=settings)
|
|
264
287
|
return con
|
|
265
288
|
|
|
266
289
|
|
|
290
|
+
def _open_connection_introspect(settings: Settings) -> duckdb.DuckDBPyConnection:
|
|
291
|
+
"""Bare DuckDB connection — PRAGMAs only, no view/macro registration.
|
|
292
|
+
|
|
293
|
+
For commands that don't need the catalog (``schema`` reads the static
|
|
294
|
+
:data:`VIEW_SCHEMA` dict; trivial scalar queries like ``SELECT 1`` or
|
|
295
|
+
``SELECT current_timestamp`` don't reference any view). Returning a
|
|
296
|
+
bare connection avoids the ~25 s :func:`register_all` chain entirely.
|
|
297
|
+
"""
|
|
298
|
+
con = duckdb.connect(":memory:")
|
|
299
|
+
_apply_duckdb_pragmas(con, settings)
|
|
300
|
+
return con
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def _sql_uses_catalog(sql: str) -> bool:
|
|
304
|
+
"""Cheap pre-flight: does ``sql`` reference any registered view/macro?
|
|
305
|
+
|
|
306
|
+
Substring-matches against ``VIEW_NAMES + MACRO_NAMES`` (case-insensitive).
|
|
307
|
+
False positives (a string literal containing ``'sessions'``) just trigger
|
|
308
|
+
the slow path — no correctness regression. False negatives can't happen
|
|
309
|
+
if the user genuinely references a view: the name has to appear in the
|
|
310
|
+
SQL text.
|
|
311
|
+
"""
|
|
312
|
+
lowered = sql.lower()
|
|
313
|
+
return any(name.lower() in lowered for name in (*VIEW_NAMES, *MACRO_NAMES))
|
|
314
|
+
|
|
315
|
+
|
|
267
316
|
def _emit_worker_result(result: int | dict, common: Common | None, pipeline: str) -> None:
|
|
268
317
|
"""Normalize worker results for stdout.
|
|
269
318
|
|
|
@@ -515,7 +564,11 @@ def query(
|
|
|
515
564
|
_configure(common)
|
|
516
565
|
settings = _resolve_settings(common)
|
|
517
566
|
fmt = _fmt(common)
|
|
518
|
-
con =
|
|
567
|
+
con = (
|
|
568
|
+
_open_connection_full(settings)
|
|
569
|
+
if _sql_uses_catalog(sql)
|
|
570
|
+
else _open_connection_introspect(settings)
|
|
571
|
+
)
|
|
519
572
|
try:
|
|
520
573
|
profile_path: Path | None = None
|
|
521
574
|
if profile_json:
|
|
@@ -561,7 +614,11 @@ def explain(
|
|
|
561
614
|
_configure(common)
|
|
562
615
|
settings = _resolve_settings(common)
|
|
563
616
|
fmt = resolve_format(_fmt(common))
|
|
564
|
-
con =
|
|
617
|
+
con = (
|
|
618
|
+
_open_connection_full(settings)
|
|
619
|
+
if _sql_uses_catalog(sql)
|
|
620
|
+
else _open_connection_introspect(settings)
|
|
621
|
+
)
|
|
565
622
|
try:
|
|
566
623
|
profile_path: Path | None = None
|
|
567
624
|
if profile_json:
|
|
@@ -585,9 +642,58 @@ def explain(
|
|
|
585
642
|
con.close()
|
|
586
643
|
|
|
587
644
|
|
|
645
|
+
def _compute_cached_map(settings: Settings) -> dict[str, bool]:
|
|
646
|
+
"""Map analytics view + analytics macro names to parquet-existence.
|
|
647
|
+
|
|
648
|
+
A name appears in this map when its data lives in a parquet that may
|
|
649
|
+
or may not exist on disk (the v2 analytics surface). v1 views always
|
|
650
|
+
have data (the transcript globs are always present), so they don't
|
|
651
|
+
appear here. Use ``list-cache`` for byte counts / mtimes / row counts.
|
|
652
|
+
"""
|
|
653
|
+
# Analytics view → backing parquet path on Settings.
|
|
654
|
+
view_paths: dict[str, Path] = {
|
|
655
|
+
"session_classifications": settings.classifications_parquet_path,
|
|
656
|
+
"session_goals": settings.classifications_parquet_path,
|
|
657
|
+
"message_trajectory": settings.trajectory_parquet_path,
|
|
658
|
+
"session_conflicts": settings.conflicts_parquet_path,
|
|
659
|
+
"message_clusters": settings.clusters_parquet_path,
|
|
660
|
+
"cluster_terms": settings.cluster_terms_parquet_path,
|
|
661
|
+
"session_communities": settings.communities_parquet_path,
|
|
662
|
+
"community_profile": settings.community_profile_parquet_path,
|
|
663
|
+
"user_friction": settings.user_friction_parquet_path,
|
|
664
|
+
"skills_catalog": settings.skills_catalog_parquet_path,
|
|
665
|
+
"skill_usage": settings.skills_catalog_parquet_path,
|
|
666
|
+
}
|
|
667
|
+
cached: dict[str, bool] = {
|
|
668
|
+
name: _parquet_is_populated(path) for name, path in view_paths.items()
|
|
669
|
+
}
|
|
670
|
+
# Analytics macros depend on the same parquets as their backing views;
|
|
671
|
+
# surface them too so an agent asking "is friction_rate ready?" gets a
|
|
672
|
+
# direct yes/no without re-deriving the dependency.
|
|
673
|
+
macro_paths: dict[str, tuple[Path, ...]] = {
|
|
674
|
+
"autonomy_trend": (settings.classifications_parquet_path,),
|
|
675
|
+
"work_mix": (settings.classifications_parquet_path,),
|
|
676
|
+
"success_rate_by_work": (settings.classifications_parquet_path,),
|
|
677
|
+
"cluster_top_terms": (settings.cluster_terms_parquet_path,),
|
|
678
|
+
"community_top_topics": (
|
|
679
|
+
settings.cluster_terms_parquet_path,
|
|
680
|
+
settings.communities_parquet_path,
|
|
681
|
+
settings.clusters_parquet_path,
|
|
682
|
+
),
|
|
683
|
+
"sentiment_arc": (settings.trajectory_parquet_path,),
|
|
684
|
+
"friction_counts": (settings.user_friction_parquet_path,),
|
|
685
|
+
"friction_rate": (settings.user_friction_parquet_path,),
|
|
686
|
+
"friction_examples": (settings.user_friction_parquet_path,),
|
|
687
|
+
"unused_skills": (settings.skills_catalog_parquet_path,),
|
|
688
|
+
}
|
|
689
|
+
for macro_name, paths in macro_paths.items():
|
|
690
|
+
cached[macro_name] = all(_parquet_is_populated(p) for p in paths)
|
|
691
|
+
return cached
|
|
692
|
+
|
|
693
|
+
|
|
588
694
|
@app.command
|
|
589
695
|
def schema(*, common: Common | None = None) -> None:
|
|
590
|
-
"""List every registered view (with columns) and every macro
|
|
696
|
+
"""List every registered view (with columns) and every macro signature.
|
|
591
697
|
|
|
592
698
|
When to use
|
|
593
699
|
-----------
|
|
@@ -596,6 +702,15 @@ def schema(*, common: Common | None = None) -> None:
|
|
|
596
702
|
calls -- e.g., ``session_classifications`` uses both ``autonomy_tier``
|
|
597
703
|
(canonical) and ``autonomy`` (alias), and the schema lists both.
|
|
598
704
|
|
|
705
|
+
Implementation
|
|
706
|
+
--------------
|
|
707
|
+
Reads the static :data:`VIEW_SCHEMA` and :data:`MACRO_SIGNATURES`
|
|
708
|
+
dicts -- no DuckDB connection, no JSON schema inference, no view
|
|
709
|
+
registration. Sub-50ms even on large corpora. The ``cached`` map is
|
|
710
|
+
keyed by analytics view + analytics macro names so an agent can tell
|
|
711
|
+
which parquet-backed entries are populated; use ``list-cache`` for
|
|
712
|
+
byte counts and mtimes.
|
|
713
|
+
|
|
599
714
|
Output shape (non-TTY / JSON)
|
|
600
715
|
-----------------------------
|
|
601
716
|
::
|
|
@@ -604,42 +719,39 @@ def schema(*, common: Common | None = None) -> None:
|
|
|
604
719
|
"views": {
|
|
605
720
|
"sessions": [{"column": "session_id", "type": "VARCHAR"}, ...],
|
|
606
721
|
"messages": [...],
|
|
607
|
-
"session_classifications": [...], // only if parquet exists
|
|
608
722
|
...
|
|
609
723
|
},
|
|
610
|
-
"macros": ["
|
|
724
|
+
"macros": [{"name": "ago", "params": ["interval_text"]}, ...],
|
|
725
|
+
"cached": {"user_friction": false, "friction_rate": false, ...}
|
|
611
726
|
}
|
|
612
727
|
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
728
|
+
Only v1 (transcript-derived) views appear under ``views`` -- v2
|
|
729
|
+
analytics views are parquet-backed; their schema source-of-truth is
|
|
730
|
+
the parquet metadata. Use ``cached`` to see which v2 views can be
|
|
731
|
+
queried right now.
|
|
616
732
|
"""
|
|
617
733
|
_configure(common)
|
|
618
734
|
settings = _resolve_settings(common)
|
|
619
735
|
fmt = resolve_format(_fmt(common))
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
views
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
}
|
|
640
|
-
emit_json(payload, fmt)
|
|
641
|
-
finally:
|
|
642
|
-
con.close()
|
|
736
|
+
cached = _compute_cached_map(settings)
|
|
737
|
+
payload = {
|
|
738
|
+
"views": {
|
|
739
|
+
name: [{"column": c, "type": t} for c, t in cols] for name, cols in VIEW_SCHEMA.items()
|
|
740
|
+
},
|
|
741
|
+
"macros": [{"name": n, "params": list(p)} for n, p in MACRO_SIGNATURES.items()],
|
|
742
|
+
"cached": cached,
|
|
743
|
+
}
|
|
744
|
+
if fmt is OutputFormat.TABLE:
|
|
745
|
+
for name, cols in VIEW_SCHEMA.items():
|
|
746
|
+
print(f"\n\033[1m{name}\033[0m ({len(cols)} cols)")
|
|
747
|
+
for col, col_type in cols:
|
|
748
|
+
print(f" {col:<28} {col_type}")
|
|
749
|
+
print(f"\n\033[1mMacros\033[0m ({len(MACRO_SIGNATURES)})")
|
|
750
|
+
for n, params in MACRO_SIGNATURES.items():
|
|
751
|
+
tag = "" if cached.get(n, True) else " [cache empty]"
|
|
752
|
+
print(f" {n}({', '.join(params)}){tag}")
|
|
753
|
+
return
|
|
754
|
+
emit_json(payload, fmt)
|
|
643
755
|
|
|
644
756
|
|
|
645
757
|
@app.command(name="list-cache")
|
|
@@ -1173,7 +1285,7 @@ def search(
|
|
|
1173
1285
|
_configure(common)
|
|
1174
1286
|
settings = _resolve_settings(common)
|
|
1175
1287
|
fmt = _fmt(common)
|
|
1176
|
-
con =
|
|
1288
|
+
con = _open_connection_full(settings)
|
|
1177
1289
|
try:
|
|
1178
1290
|
row = con.execute("SELECT count(*) FROM message_embeddings").fetchone()
|
|
1179
1291
|
count = int(row[0]) if row else 0
|
|
@@ -1263,7 +1375,7 @@ def classify(
|
|
|
1263
1375
|
"""
|
|
1264
1376
|
_configure(common)
|
|
1265
1377
|
settings = _resolve_settings(common)
|
|
1266
|
-
con =
|
|
1378
|
+
con = _open_connection_full(settings)
|
|
1267
1379
|
try:
|
|
1268
1380
|
result = classify_sessions(
|
|
1269
1381
|
con,
|
|
@@ -1311,7 +1423,7 @@ def trajectory(
|
|
|
1311
1423
|
"""
|
|
1312
1424
|
_configure(common)
|
|
1313
1425
|
settings = _resolve_settings(common)
|
|
1314
|
-
con =
|
|
1426
|
+
con = _open_connection_full(settings)
|
|
1315
1427
|
try:
|
|
1316
1428
|
result = trajectory_messages(
|
|
1317
1429
|
con,
|
|
@@ -1355,7 +1467,7 @@ def conflicts(
|
|
|
1355
1467
|
"""
|
|
1356
1468
|
_configure(common)
|
|
1357
1469
|
settings = _resolve_settings(common)
|
|
1358
|
-
con =
|
|
1470
|
+
con = _open_connection_full(settings)
|
|
1359
1471
|
try:
|
|
1360
1472
|
result = detect_conflicts(
|
|
1361
1473
|
con,
|
|
@@ -1405,7 +1517,7 @@ def friction(
|
|
|
1405
1517
|
"""
|
|
1406
1518
|
_configure(common)
|
|
1407
1519
|
settings = _resolve_settings(common)
|
|
1408
|
-
con =
|
|
1520
|
+
con = _open_connection_full(settings)
|
|
1409
1521
|
try:
|
|
1410
1522
|
result = detect_user_friction(
|
|
1411
1523
|
con,
|
|
@@ -1472,7 +1584,7 @@ def terms(*, force: bool = False, common: Common | None = None) -> None:
|
|
|
1472
1584
|
"""
|
|
1473
1585
|
_configure(common)
|
|
1474
1586
|
settings = _resolve_settings(common)
|
|
1475
|
-
con =
|
|
1587
|
+
con = _open_connection_full(settings)
|
|
1476
1588
|
try:
|
|
1477
1589
|
tstats = run_terms(con, settings, force=force)
|
|
1478
1590
|
logger.info(
|
|
@@ -1560,7 +1672,7 @@ def community(
|
|
|
1560
1672
|
emit_error(err, fmt)
|
|
1561
1673
|
sys.exit(err.exit_code)
|
|
1562
1674
|
|
|
1563
|
-
con =
|
|
1675
|
+
con = _open_connection_full(settings)
|
|
1564
1676
|
try:
|
|
1565
1677
|
if neighbors_of_session is not None:
|
|
1566
1678
|
df = neighbors_of(con, settings, neighbors_of_session, top_k=top_k)
|
|
@@ -1704,7 +1816,7 @@ def analyze(
|
|
|
1704
1816
|
|
|
1705
1817
|
# 1. Embed (reuses embed_worker). Silently skipped if the parquet is up to date.
|
|
1706
1818
|
if not skip_embed:
|
|
1707
|
-
con =
|
|
1819
|
+
con = _open_connection_full(settings)
|
|
1708
1820
|
try:
|
|
1709
1821
|
n = asyncio.run(
|
|
1710
1822
|
run_backfill(
|
|
@@ -1728,7 +1840,7 @@ def analyze(
|
|
|
1728
1840
|
stats["clusters"],
|
|
1729
1841
|
stats["noise"],
|
|
1730
1842
|
)
|
|
1731
|
-
con =
|
|
1843
|
+
con = _open_connection_full(settings)
|
|
1732
1844
|
try:
|
|
1733
1845
|
tstats = run_terms(con, settings, force=force_cluster)
|
|
1734
1846
|
logger.info(
|
|
@@ -1741,7 +1853,7 @@ def analyze(
|
|
|
1741
1853
|
|
|
1742
1854
|
# 3. Community detection (non-LLM, runs in parallel conceptually with cluster).
|
|
1743
1855
|
if not skip_community:
|
|
1744
|
-
con =
|
|
1856
|
+
con = _open_connection_full(settings)
|
|
1745
1857
|
try:
|
|
1746
1858
|
cstats = run_communities(con, settings, force=force_community)
|
|
1747
1859
|
logger.info(
|
|
@@ -1754,7 +1866,7 @@ def analyze(
|
|
|
1754
1866
|
|
|
1755
1867
|
# 4. Session classification (LLM).
|
|
1756
1868
|
if not skip_classify:
|
|
1757
|
-
con =
|
|
1869
|
+
con = _open_connection_full(settings)
|
|
1758
1870
|
try:
|
|
1759
1871
|
n = classify_sessions(
|
|
1760
1872
|
con,
|
|
@@ -1770,7 +1882,7 @@ def analyze(
|
|
|
1770
1882
|
|
|
1771
1883
|
# 5. Trajectory (LLM).
|
|
1772
1884
|
if not skip_trajectory:
|
|
1773
|
-
con =
|
|
1885
|
+
con = _open_connection_full(settings)
|
|
1774
1886
|
try:
|
|
1775
1887
|
n = trajectory_messages(
|
|
1776
1888
|
con,
|
|
@@ -1786,7 +1898,7 @@ def analyze(
|
|
|
1786
1898
|
|
|
1787
1899
|
# 6. Conflicts (LLM, requires full session context).
|
|
1788
1900
|
if not skip_conflicts:
|
|
1789
|
-
con =
|
|
1901
|
+
con = _open_connection_full(settings)
|
|
1790
1902
|
try:
|
|
1791
1903
|
n = detect_conflicts(
|
|
1792
1904
|
con,
|
|
@@ -1802,7 +1914,7 @@ def analyze(
|
|
|
1802
1914
|
|
|
1803
1915
|
# 7. Friction (LLM, short-message scope).
|
|
1804
1916
|
if not skip_friction:
|
|
1805
|
-
con =
|
|
1917
|
+
con = _open_connection_full(settings)
|
|
1806
1918
|
try:
|
|
1807
1919
|
n = detect_user_friction(
|
|
1808
1920
|
con,
|
|
@@ -2297,19 +2409,33 @@ def resolve_cmd(
|
|
|
2297
2409
|
emit_json(binding.to_dict(), fmt=fmt)
|
|
2298
2410
|
|
|
2299
2411
|
|
|
2300
|
-
|
|
2301
|
-
"""
|
|
2412
|
+
class RenderFormat(StrEnum):
|
|
2413
|
+
"""``review-sheet`` render targets.
|
|
2414
|
+
|
|
2415
|
+
Local to ``review-sheet`` because no other subcommand emits human prose.
|
|
2416
|
+
Keeping markdown out of the global :class:`OutputFormat` keeps
|
|
2417
|
+
``--format`` honest on every other subcommand (only renderers that
|
|
2418
|
+
actually support markdown get to advertise it).
|
|
2419
|
+
"""
|
|
2420
|
+
|
|
2421
|
+
MARKDOWN = "markdown"
|
|
2422
|
+
JSON = "json"
|
|
2423
|
+
|
|
2424
|
+
|
|
2425
|
+
def _review_sheet_format(common: Common | None) -> RenderFormat:
|
|
2426
|
+
"""Pick the review-sheet effective render format.
|
|
2302
2427
|
|
|
2303
2428
|
Default policy diverges from every other subcommand: review-sheet
|
|
2304
|
-
output is human-first prose, so ``
|
|
2305
|
-
a TTY (override of the global ``TABLE`` default) and
|
|
2306
|
-
off-TTY.
|
|
2307
|
-
|
|
2429
|
+
output is human-first prose, so ``--format auto`` resolves to
|
|
2430
|
+
``MARKDOWN`` on a TTY (override of the global ``TABLE`` default) and
|
|
2431
|
+
``JSON`` off-TTY. ``--format json`` always pins JSON; every other
|
|
2432
|
+
``OutputFormat`` value resolves to ``MARKDOWN`` on a TTY and ``JSON``
|
|
2433
|
+
off-TTY (table/ndjson/csv are not meaningful for the prose shape).
|
|
2308
2434
|
"""
|
|
2309
2435
|
fmt = _fmt(common)
|
|
2310
|
-
if fmt is
|
|
2311
|
-
return
|
|
2312
|
-
return
|
|
2436
|
+
if fmt is OutputFormat.JSON:
|
|
2437
|
+
return RenderFormat.JSON
|
|
2438
|
+
return RenderFormat.MARKDOWN if sys.stdout.isatty() else RenderFormat.JSON
|
|
2313
2439
|
|
|
2314
2440
|
|
|
2315
2441
|
@app.command(name="review-sheet")
|
|
@@ -2355,6 +2481,9 @@ def review_sheet_cmd(
|
|
|
2355
2481
|
"""
|
|
2356
2482
|
_configure(common)
|
|
2357
2483
|
fmt = _review_sheet_format(common)
|
|
2484
|
+
# Error output follows the global rule (TABLE on TTY, JSON off-TTY) — the
|
|
2485
|
+
# render format is only meaningful for the success-path narrative.
|
|
2486
|
+
error_fmt = _fmt(common)
|
|
2358
2487
|
settings = _resolve_settings(common)
|
|
2359
2488
|
repo_path = repo.resolve() if repo is not None else None
|
|
2360
2489
|
|
|
@@ -2371,7 +2500,7 @@ def review_sheet_cmd(
|
|
|
2371
2500
|
message=str(exc),
|
|
2372
2501
|
hint="run `claude-sql resolve <sha> --all-sources` to see both surfaces",
|
|
2373
2502
|
)
|
|
2374
|
-
emit_error(err,
|
|
2503
|
+
emit_error(err, error_fmt)
|
|
2375
2504
|
sys.exit(err.exit_code)
|
|
2376
2505
|
except LookupError as exc:
|
|
2377
2506
|
err = ClassifiedError(
|
|
@@ -2380,7 +2509,7 @@ def review_sheet_cmd(
|
|
|
2380
2509
|
message=str(exc),
|
|
2381
2510
|
hint="commit has no Claude-Transcript-* trailer and no refs/notes/transcripts entry",
|
|
2382
2511
|
)
|
|
2383
|
-
emit_error(err,
|
|
2512
|
+
emit_error(err, error_fmt)
|
|
2384
2513
|
sys.exit(err.exit_code)
|
|
2385
2514
|
except _binding.GitInvocationError as exc:
|
|
2386
2515
|
err = ClassifiedError(
|
|
@@ -2389,7 +2518,7 @@ def review_sheet_cmd(
|
|
|
2389
2518
|
message=f"git invocation failed: {exc.stderr.strip()}",
|
|
2390
2519
|
hint="check that the commit SHA exists in --repo",
|
|
2391
2520
|
)
|
|
2392
|
-
emit_error(err,
|
|
2521
|
+
emit_error(err, error_fmt)
|
|
2393
2522
|
sys.exit(err.exit_code)
|
|
2394
2523
|
|
|
2395
2524
|
# Hand the resolved URI through the override so the worker doesn't
|
|
@@ -2412,19 +2541,19 @@ def review_sheet_cmd(
|
|
|
2412
2541
|
return
|
|
2413
2542
|
|
|
2414
2543
|
if result.get("refused"):
|
|
2415
|
-
if fmt is
|
|
2544
|
+
if fmt is RenderFormat.MARKDOWN:
|
|
2416
2545
|
metadata = result.get("metadata") or {"commit_sha": commit_sha}
|
|
2417
2546
|
print(render_refusal_markdown(str(result.get("reason", "")), metadata))
|
|
2418
2547
|
return
|
|
2419
|
-
emit_json(result, fmt=
|
|
2548
|
+
emit_json(result, fmt=OutputFormat.JSON)
|
|
2420
2549
|
return
|
|
2421
2550
|
|
|
2422
2551
|
sheet = result.get("sheet") or {}
|
|
2423
2552
|
metadata = result.get("metadata") or {}
|
|
2424
|
-
if fmt is
|
|
2553
|
+
if fmt is RenderFormat.MARKDOWN:
|
|
2425
2554
|
print(render_markdown(sheet, metadata))
|
|
2426
2555
|
return
|
|
2427
|
-
emit_json({"sheet": sheet, "metadata": metadata}, fmt=
|
|
2556
|
+
emit_json({"sheet": sheet, "metadata": metadata}, fmt=OutputFormat.JSON)
|
|
2428
2557
|
|
|
2429
2558
|
|
|
2430
2559
|
@app.default
|
|
@@ -312,7 +312,8 @@ class Settings(BaseSettings):
|
|
|
312
312
|
tfidf_top_n_terms: int = 10
|
|
313
313
|
|
|
314
314
|
# ------------------------------------------------------------------
|
|
315
|
-
# DuckDB engine tuning — applied as PRAGMAs in cli.
|
|
315
|
+
# DuckDB engine tuning — applied as PRAGMAs in cli._open_connection_full
|
|
316
|
+
# and cli._open_connection_introspect.
|
|
316
317
|
# ------------------------------------------------------------------
|
|
317
318
|
#: Worker threads. Defaults to ``os.cpu_count()`` so DuckDB uses every
|
|
318
319
|
#: core; agents and CI runners with limited parallelism can override.
|
|
@@ -24,11 +24,17 @@ import polars as pl
|
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class OutputFormat(StrEnum):
|
|
27
|
-
"""Supported output formats.
|
|
27
|
+
"""Supported output formats for tabular and structured CLI output.
|
|
28
28
|
|
|
29
29
|
``AUTO`` resolves to ``TABLE`` when stdout is a TTY and ``JSON`` otherwise.
|
|
30
30
|
Keeping it a string Enum lets cyclopts parse ``--format json`` without any
|
|
31
31
|
custom converter.
|
|
32
|
+
|
|
33
|
+
Markdown rendering is intentionally absent: only ``review-sheet`` emits
|
|
34
|
+
human prose, and it owns its own ``--render`` flag (see
|
|
35
|
+
:class:`claude_sql.cli.RenderFormat`). Pulling markdown into this enum
|
|
36
|
+
advertised the format on every subcommand even though no other command
|
|
37
|
+
knows how to produce it.
|
|
32
38
|
"""
|
|
33
39
|
|
|
34
40
|
AUTO = "auto"
|
|
@@ -36,7 +42,6 @@ class OutputFormat(StrEnum):
|
|
|
36
42
|
JSON = "json"
|
|
37
43
|
NDJSON = "ndjson"
|
|
38
44
|
CSV = "csv"
|
|
39
|
-
MARKDOWN = "markdown"
|
|
40
45
|
|
|
41
46
|
|
|
42
47
|
# Exit codes that agents can rely on. Keep them stable -- wire protocols
|
|
@@ -98,8 +103,9 @@ def emit_dataframe(
|
|
|
98
103
|
if resolved is OutputFormat.CSV:
|
|
99
104
|
df.write_csv(sys.stdout)
|
|
100
105
|
return
|
|
101
|
-
# unreachable
|
|
102
|
-
|
|
106
|
+
# Defensive: unreachable while OutputFormat stays closed-set
|
|
107
|
+
# (auto/table/json/ndjson/csv). Kept as a guard for future enum additions.
|
|
108
|
+
raise ValueError(f"Unsupported format: {resolved}") # pragma: no cover
|
|
103
109
|
|
|
104
110
|
|
|
105
111
|
def emit_json(payload: Any, fmt: OutputFormat | str = OutputFormat.AUTO) -> None:
|