claude-sql 0.5.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {claude_sql-0.5.0 → claude_sql-0.6.0}/PKG-INFO +2 -1
  2. {claude_sql-0.5.0 → claude_sql-0.6.0}/README.md +1 -0
  3. {claude_sql-0.5.0 → claude_sql-0.6.0}/pyproject.toml +1 -1
  4. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/cli.py +203 -74
  5. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/config.py +2 -1
  6. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/output.py +10 -4
  7. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/sql_views.py +570 -257
  8. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/__init__.py +0 -0
  9. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/binding.py +0 -0
  10. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/blind_handover.py +0 -0
  11. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/checkpointer.py +0 -0
  12. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/cluster_worker.py +0 -0
  13. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/community_worker.py +0 -0
  14. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/embed_worker.py +0 -0
  15. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/freeze.py +0 -0
  16. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/friction_worker.py +0 -0
  17. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/install_source.py +0 -0
  18. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/judge_worker.py +0 -0
  19. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/judges.py +0 -0
  20. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/kappa_worker.py +0 -0
  21. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/llm_worker.py +0 -0
  22. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/logging_setup.py +0 -0
  23. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/parquet_shards.py +0 -0
  24. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/retry_queue.py +0 -0
  25. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/review_sheet_render.py +0 -0
  26. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/review_sheet_worker.py +0 -0
  27. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/schemas.py +0 -0
  28. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/session_text.py +0 -0
  29. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/skills_catalog.py +0 -0
  30. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/terms_worker.py +0 -0
  31. {claude_sql-0.5.0 → claude_sql-0.6.0}/src/claude_sql/ungrounded_worker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: claude-sql
3
- Version: 0.5.0
3
+ Version: 0.6.0
4
4
  Summary: Zero-copy SQL + semantic search + LLM analytics over ~/.claude/ transcripts.
5
5
  Keywords: claude,claude-code,anthropic,duckdb,sql,semantic-search,embeddings,bedrock,transcripts,analytics,observability
6
6
  Author: Laith Al-Saadoon
@@ -336,6 +336,7 @@ Commands that spend real Bedrock money default to `--dry-run`.
336
336
 
337
337
  | Macro | Signature | What it does |
338
338
  |---|---|---|
339
+ | `ago(interval_text)` | scalar → `TIMESTAMP` | `current_timestamp - INTERVAL <text>` -- e.g. `WHERE ts >= ago('30 days')` |
339
340
  | `model_used(sid)` | scalar → `VARCHAR` | Latest `model` observed in the session |
340
341
  | `cost_estimate(sid)` | scalar → `DOUBLE` | USD spend (dated model IDs prefix-matched) |
341
342
  | `tool_rank(last_n_days)` | table | Tool-use leaderboard over a window |
@@ -290,6 +290,7 @@ Commands that spend real Bedrock money default to `--dry-run`.
290
290
 
291
291
  | Macro | Signature | What it does |
292
292
  |---|---|---|
293
+ | `ago(interval_text)` | scalar → `TIMESTAMP` | `current_timestamp - INTERVAL <text>` -- e.g. `WHERE ts >= ago('30 days')` |
293
294
  | `model_used(sid)` | scalar → `VARCHAR` | Latest `model` observed in the session |
294
295
  | `cost_estimate(sid)` | scalar → `DOUBLE` | USD spend (dated model IDs prefix-matched) |
295
296
  | `tool_rank(last_n_days)` | table | Tool-use leaderboard over a window |
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "claude-sql"
3
- version = "0.5.0"
3
+ version = "0.6.0"
4
4
  description = "Zero-copy SQL + semantic search + LLM analytics over ~/.claude/ transcripts."
5
5
  readme = "README.md"
6
6
  license = { text = "Apache-2.0" }
@@ -30,6 +30,7 @@ import tempfile
30
30
  import time
31
31
  from dataclasses import dataclass
32
32
  from datetime import UTC, datetime
33
+ from enum import StrEnum
33
34
  from pathlib import Path
34
35
  from typing import Annotated
35
36
 
@@ -81,8 +82,11 @@ from claude_sql.parquet_shards import (
81
82
  from claude_sql.review_sheet_render import render_markdown, render_refusal_markdown
82
83
  from claude_sql.review_sheet_worker import generate_review_sheet
83
84
  from claude_sql.sql_views import (
84
- describe_all,
85
- list_macros,
85
+ MACRO_NAMES,
86
+ MACRO_SIGNATURES,
87
+ VIEW_NAMES,
88
+ VIEW_SCHEMA,
89
+ _parquet_is_populated,
86
90
  register_all,
87
91
  register_raw,
88
92
  register_views,
@@ -103,12 +107,19 @@ Surfaces at a glance
103
107
  cluster / terms / community UMAP+HDBSCAN, c-TF-IDF, Leiden+CPM
104
108
  analyze composite pipeline over every stage above
105
109
 
106
- Flag placement (important for agents)
107
- -------------------------------------
108
- All flags attach to a SUBCOMMAND, not the top-level binary. Correct:
110
+ !! FLAG PLACEMENT flags attach to a SUBCOMMAND, not the binary !!
111
+ -------------------------------------------------------------------
112
+ Every flag (--format, --quiet, --verbose, --glob, --subagent-glob, and
113
+ every per-command flag) goes AFTER the subcommand name. This applies
114
+ to global-feeling flags too: `--quiet` must come after the subcommand.
115
+
116
+ OK:
109
117
  claude-sql query --format json "SELECT 1"
118
+ claude-sql schema --quiet --format json
110
119
  claude-sql classify --no-dry-run --limit 5
111
- Incorrect (flag gets swallowed as the subcommand argument):
120
+ FAIL (cyclopts: "Unused Tokens: ['schema']"):
121
+ claude-sql --quiet schema --format json
122
+ FAIL (flag swallowed as subcommand arg):
112
123
  claude-sql --format json query "SELECT 1"
113
124
 
114
125
  Output & exit codes
@@ -244,15 +255,15 @@ def _resolve_memory_limit(limit: str) -> str:
244
255
  return f"{target_mib}MiB"
245
256
 
246
257
 
247
- def _open_connection(settings: Settings) -> duckdb.DuckDBPyConnection:
248
- """Open an in-memory DuckDB connection with every claude-sql object wired.
258
+ def _apply_duckdb_pragmas(con: duckdb.DuckDBPyConnection, settings: Settings) -> None:
259
+ """Set the tuning PRAGMAs both connection helpers share.
249
260
 
250
- Tuning PRAGMAs are set before view registration so the registration
251
- queries themselves benefit from the higher thread count and the spill
252
- directory pointed at real disk (Amazon devboxes ship ``/tmp`` as a
253
- 4 GB tmpfs that thrashes once a clustering run starts spilling).
261
+ Centralized so :func:`_open_connection_full` and
262
+ :func:`_open_connection_introspect` stay in sync. Threads, memory_limit,
263
+ and temp_directory all come from ``settings``; the spill directory is
264
+ materialized on disk before DuckDB sees the path because DuckDB will
265
+ happily fail later when it tries to write a spill file.
254
266
  """
255
- con = duckdb.connect(":memory:")
256
267
  settings.duckdb_temp_dir.mkdir(parents=True, exist_ok=True)
257
268
  memory_limit = _resolve_memory_limit(settings.duckdb_memory_limit)
258
269
  con.execute(f"SET threads = {int(settings.duckdb_threads)}")
@@ -260,10 +271,48 @@ def _open_connection(settings: Settings) -> duckdb.DuckDBPyConnection:
260
271
  con.execute(f"SET temp_directory = '{settings.duckdb_temp_dir}'")
261
272
  con.execute("SET enable_object_cache = true")
262
273
  con.execute("SET preserve_insertion_order = false")
274
+
275
+
276
+ def _open_connection_full(settings: Settings) -> duckdb.DuckDBPyConnection:
277
+ """Open an in-memory DuckDB connection with every claude-sql object wired.
278
+
279
+ Tuning PRAGMAs are set before view registration so the registration
280
+ queries themselves benefit from the higher thread count and the spill
281
+ directory pointed at real disk (Amazon devboxes ship ``/tmp`` as a
282
+ 4 GB tmpfs that thrashes once a clustering run starts spilling).
283
+ """
284
+ con = duckdb.connect(":memory:")
285
+ _apply_duckdb_pragmas(con, settings)
263
286
  register_all(con, settings=settings)
264
287
  return con
265
288
 
266
289
 
290
+ def _open_connection_introspect(settings: Settings) -> duckdb.DuckDBPyConnection:
291
+ """Bare DuckDB connection — PRAGMAs only, no view/macro registration.
292
+
293
+ For commands that don't need the catalog (``schema`` reads the static
294
+ :data:`VIEW_SCHEMA` dict; trivial scalar queries like ``SELECT 1`` or
295
+ ``SELECT current_timestamp`` don't reference any view). Returning a
296
+ bare connection avoids the ~25 s :func:`register_all` chain entirely.
297
+ """
298
+ con = duckdb.connect(":memory:")
299
+ _apply_duckdb_pragmas(con, settings)
300
+ return con
301
+
302
+
303
+ def _sql_uses_catalog(sql: str) -> bool:
304
+ """Cheap pre-flight: does ``sql`` reference any registered view/macro?
305
+
306
+ Substring-matches against ``VIEW_NAMES + MACRO_NAMES`` (case-insensitive).
307
+ False positives (a string literal containing ``'sessions'``) just trigger
308
+ the slow path — no correctness regression. False negatives can't happen
309
+ if the user genuinely references a view: the name has to appear in the
310
+ SQL text.
311
+ """
312
+ lowered = sql.lower()
313
+ return any(name.lower() in lowered for name in (*VIEW_NAMES, *MACRO_NAMES))
314
+
315
+
267
316
  def _emit_worker_result(result: int | dict, common: Common | None, pipeline: str) -> None:
268
317
  """Normalize worker results for stdout.
269
318
 
@@ -515,7 +564,11 @@ def query(
515
564
  _configure(common)
516
565
  settings = _resolve_settings(common)
517
566
  fmt = _fmt(common)
518
- con = _open_connection(settings)
567
+ con = (
568
+ _open_connection_full(settings)
569
+ if _sql_uses_catalog(sql)
570
+ else _open_connection_introspect(settings)
571
+ )
519
572
  try:
520
573
  profile_path: Path | None = None
521
574
  if profile_json:
@@ -561,7 +614,11 @@ def explain(
561
614
  _configure(common)
562
615
  settings = _resolve_settings(common)
563
616
  fmt = resolve_format(_fmt(common))
564
- con = _open_connection(settings)
617
+ con = (
618
+ _open_connection_full(settings)
619
+ if _sql_uses_catalog(sql)
620
+ else _open_connection_introspect(settings)
621
+ )
565
622
  try:
566
623
  profile_path: Path | None = None
567
624
  if profile_json:
@@ -585,9 +642,58 @@ def explain(
585
642
  con.close()
586
643
 
587
644
 
645
+ def _compute_cached_map(settings: Settings) -> dict[str, bool]:
646
+ """Map analytics view + analytics macro names to parquet-existence.
647
+
648
+ A name appears in this map when its data lives in a parquet that may
649
+ or may not exist on disk (the v2 analytics surface). v1 views always
650
+ have data (the transcript globs are always present), so they don't
651
+ appear here. Use ``list-cache`` for byte counts / mtimes / row counts.
652
+ """
653
+ # Analytics view → backing parquet path on Settings.
654
+ view_paths: dict[str, Path] = {
655
+ "session_classifications": settings.classifications_parquet_path,
656
+ "session_goals": settings.classifications_parquet_path,
657
+ "message_trajectory": settings.trajectory_parquet_path,
658
+ "session_conflicts": settings.conflicts_parquet_path,
659
+ "message_clusters": settings.clusters_parquet_path,
660
+ "cluster_terms": settings.cluster_terms_parquet_path,
661
+ "session_communities": settings.communities_parquet_path,
662
+ "community_profile": settings.community_profile_parquet_path,
663
+ "user_friction": settings.user_friction_parquet_path,
664
+ "skills_catalog": settings.skills_catalog_parquet_path,
665
+ "skill_usage": settings.skills_catalog_parquet_path,
666
+ }
667
+ cached: dict[str, bool] = {
668
+ name: _parquet_is_populated(path) for name, path in view_paths.items()
669
+ }
670
+ # Analytics macros depend on the same parquets as their backing views;
671
+ # surface them too so an agent asking "is friction_rate ready?" gets a
672
+ # direct yes/no without re-deriving the dependency.
673
+ macro_paths: dict[str, tuple[Path, ...]] = {
674
+ "autonomy_trend": (settings.classifications_parquet_path,),
675
+ "work_mix": (settings.classifications_parquet_path,),
676
+ "success_rate_by_work": (settings.classifications_parquet_path,),
677
+ "cluster_top_terms": (settings.cluster_terms_parquet_path,),
678
+ "community_top_topics": (
679
+ settings.cluster_terms_parquet_path,
680
+ settings.communities_parquet_path,
681
+ settings.clusters_parquet_path,
682
+ ),
683
+ "sentiment_arc": (settings.trajectory_parquet_path,),
684
+ "friction_counts": (settings.user_friction_parquet_path,),
685
+ "friction_rate": (settings.user_friction_parquet_path,),
686
+ "friction_examples": (settings.user_friction_parquet_path,),
687
+ "unused_skills": (settings.skills_catalog_parquet_path,),
688
+ }
689
+ for macro_name, paths in macro_paths.items():
690
+ cached[macro_name] = all(_parquet_is_populated(p) for p in paths)
691
+ return cached
692
+
693
+
588
694
  @app.command
589
695
  def schema(*, common: Common | None = None) -> None:
590
- """List every registered view (with columns) and every macro in one pass.
696
+ """List every registered view (with columns) and every macro signature.
591
697
 
592
698
  When to use
593
699
  -----------
@@ -596,6 +702,15 @@ def schema(*, common: Common | None = None) -> None:
596
702
  calls -- e.g., ``session_classifications`` uses both ``autonomy_tier``
597
703
  (canonical) and ``autonomy`` (alias), and the schema lists both.
598
704
 
705
+ Implementation
706
+ --------------
707
+ Reads the static :data:`VIEW_SCHEMA` and :data:`MACRO_SIGNATURES`
708
+ dicts -- no DuckDB connection, no JSON schema inference, no view
709
+ registration. Sub-50ms even on large corpora. The ``cached`` map is
710
+ keyed by analytics view + analytics macro names so an agent can tell
711
+ which parquet-backed entries are populated; use ``list-cache`` for
712
+ byte counts and mtimes.
713
+
599
714
  Output shape (non-TTY / JSON)
600
715
  -----------------------------
601
716
  ::
@@ -604,42 +719,39 @@ def schema(*, common: Common | None = None) -> None:
604
719
  "views": {
605
720
  "sessions": [{"column": "session_id", "type": "VARCHAR"}, ...],
606
721
  "messages": [...],
607
- "session_classifications": [...], // only if parquet exists
608
722
  ...
609
723
  },
610
- "macros": ["autonomy_trend", "conflict_rate", ...]
724
+ "macros": [{"name": "ago", "params": ["interval_text"]}, ...],
725
+ "cached": {"user_friction": false, "friction_rate": false, ...}
611
726
  }
612
727
 
613
- Missing analytics parquets are silently omitted (register_analytics
614
- skips them). Use ``list-cache`` to see which generators still need to
615
- run.
728
+ Only v1 (transcript-derived) views appear under ``views`` -- v2
729
+ analytics views are parquet-backed; their schema source-of-truth is
730
+ the parquet metadata. Use ``cached`` to see which v2 views can be
731
+ queried right now.
616
732
  """
617
733
  _configure(common)
618
734
  settings = _resolve_settings(common)
619
735
  fmt = resolve_format(_fmt(common))
620
- con = _open_connection(settings)
621
- try:
622
- views = describe_all(con)
623
- macros = list_macros(con)
624
- if fmt is OutputFormat.TABLE:
625
- for name, cols in views.items():
626
- print(f"\n\033[1m{name}\033[0m ({len(cols)} cols)")
627
- for col, col_type in cols:
628
- print(f" {col:<28} {col_type}")
629
- print(f"\n\033[1mMacros\033[0m ({len(macros)})")
630
- for macro in macros:
631
- print(f" {macro}")
632
- else:
633
- payload = {
634
- "views": {
635
- name: [{"column": c, "type": t} for c, t in cols]
636
- for name, cols in views.items()
637
- },
638
- "macros": list(macros),
639
- }
640
- emit_json(payload, fmt)
641
- finally:
642
- con.close()
736
+ cached = _compute_cached_map(settings)
737
+ payload = {
738
+ "views": {
739
+ name: [{"column": c, "type": t} for c, t in cols] for name, cols in VIEW_SCHEMA.items()
740
+ },
741
+ "macros": [{"name": n, "params": list(p)} for n, p in MACRO_SIGNATURES.items()],
742
+ "cached": cached,
743
+ }
744
+ if fmt is OutputFormat.TABLE:
745
+ for name, cols in VIEW_SCHEMA.items():
746
+ print(f"\n\033[1m{name}\033[0m ({len(cols)} cols)")
747
+ for col, col_type in cols:
748
+ print(f" {col:<28} {col_type}")
749
+ print(f"\n\033[1mMacros\033[0m ({len(MACRO_SIGNATURES)})")
750
+ for n, params in MACRO_SIGNATURES.items():
751
+ tag = "" if cached.get(n, True) else " [cache empty]"
752
+ print(f" {n}({', '.join(params)}){tag}")
753
+ return
754
+ emit_json(payload, fmt)
643
755
 
644
756
 
645
757
  @app.command(name="list-cache")
@@ -1173,7 +1285,7 @@ def search(
1173
1285
  _configure(common)
1174
1286
  settings = _resolve_settings(common)
1175
1287
  fmt = _fmt(common)
1176
- con = _open_connection(settings)
1288
+ con = _open_connection_full(settings)
1177
1289
  try:
1178
1290
  row = con.execute("SELECT count(*) FROM message_embeddings").fetchone()
1179
1291
  count = int(row[0]) if row else 0
@@ -1263,7 +1375,7 @@ def classify(
1263
1375
  """
1264
1376
  _configure(common)
1265
1377
  settings = _resolve_settings(common)
1266
- con = _open_connection(settings)
1378
+ con = _open_connection_full(settings)
1267
1379
  try:
1268
1380
  result = classify_sessions(
1269
1381
  con,
@@ -1311,7 +1423,7 @@ def trajectory(
1311
1423
  """
1312
1424
  _configure(common)
1313
1425
  settings = _resolve_settings(common)
1314
- con = _open_connection(settings)
1426
+ con = _open_connection_full(settings)
1315
1427
  try:
1316
1428
  result = trajectory_messages(
1317
1429
  con,
@@ -1355,7 +1467,7 @@ def conflicts(
1355
1467
  """
1356
1468
  _configure(common)
1357
1469
  settings = _resolve_settings(common)
1358
- con = _open_connection(settings)
1470
+ con = _open_connection_full(settings)
1359
1471
  try:
1360
1472
  result = detect_conflicts(
1361
1473
  con,
@@ -1405,7 +1517,7 @@ def friction(
1405
1517
  """
1406
1518
  _configure(common)
1407
1519
  settings = _resolve_settings(common)
1408
- con = _open_connection(settings)
1520
+ con = _open_connection_full(settings)
1409
1521
  try:
1410
1522
  result = detect_user_friction(
1411
1523
  con,
@@ -1472,7 +1584,7 @@ def terms(*, force: bool = False, common: Common | None = None) -> None:
1472
1584
  """
1473
1585
  _configure(common)
1474
1586
  settings = _resolve_settings(common)
1475
- con = _open_connection(settings)
1587
+ con = _open_connection_full(settings)
1476
1588
  try:
1477
1589
  tstats = run_terms(con, settings, force=force)
1478
1590
  logger.info(
@@ -1560,7 +1672,7 @@ def community(
1560
1672
  emit_error(err, fmt)
1561
1673
  sys.exit(err.exit_code)
1562
1674
 
1563
- con = _open_connection(settings)
1675
+ con = _open_connection_full(settings)
1564
1676
  try:
1565
1677
  if neighbors_of_session is not None:
1566
1678
  df = neighbors_of(con, settings, neighbors_of_session, top_k=top_k)
@@ -1704,7 +1816,7 @@ def analyze(
1704
1816
 
1705
1817
  # 1. Embed (reuses embed_worker). Silently skipped if the parquet is up to date.
1706
1818
  if not skip_embed:
1707
- con = _open_connection(settings)
1819
+ con = _open_connection_full(settings)
1708
1820
  try:
1709
1821
  n = asyncio.run(
1710
1822
  run_backfill(
@@ -1728,7 +1840,7 @@ def analyze(
1728
1840
  stats["clusters"],
1729
1841
  stats["noise"],
1730
1842
  )
1731
- con = _open_connection(settings)
1843
+ con = _open_connection_full(settings)
1732
1844
  try:
1733
1845
  tstats = run_terms(con, settings, force=force_cluster)
1734
1846
  logger.info(
@@ -1741,7 +1853,7 @@ def analyze(
1741
1853
 
1742
1854
  # 3. Community detection (non-LLM, runs in parallel conceptually with cluster).
1743
1855
  if not skip_community:
1744
- con = _open_connection(settings)
1856
+ con = _open_connection_full(settings)
1745
1857
  try:
1746
1858
  cstats = run_communities(con, settings, force=force_community)
1747
1859
  logger.info(
@@ -1754,7 +1866,7 @@ def analyze(
1754
1866
 
1755
1867
  # 4. Session classification (LLM).
1756
1868
  if not skip_classify:
1757
- con = _open_connection(settings)
1869
+ con = _open_connection_full(settings)
1758
1870
  try:
1759
1871
  n = classify_sessions(
1760
1872
  con,
@@ -1770,7 +1882,7 @@ def analyze(
1770
1882
 
1771
1883
  # 5. Trajectory (LLM).
1772
1884
  if not skip_trajectory:
1773
- con = _open_connection(settings)
1885
+ con = _open_connection_full(settings)
1774
1886
  try:
1775
1887
  n = trajectory_messages(
1776
1888
  con,
@@ -1786,7 +1898,7 @@ def analyze(
1786
1898
 
1787
1899
  # 6. Conflicts (LLM, requires full session context).
1788
1900
  if not skip_conflicts:
1789
- con = _open_connection(settings)
1901
+ con = _open_connection_full(settings)
1790
1902
  try:
1791
1903
  n = detect_conflicts(
1792
1904
  con,
@@ -1802,7 +1914,7 @@ def analyze(
1802
1914
 
1803
1915
  # 7. Friction (LLM, short-message scope).
1804
1916
  if not skip_friction:
1805
- con = _open_connection(settings)
1917
+ con = _open_connection_full(settings)
1806
1918
  try:
1807
1919
  n = detect_user_friction(
1808
1920
  con,
@@ -2297,19 +2409,33 @@ def resolve_cmd(
2297
2409
  emit_json(binding.to_dict(), fmt=fmt)
2298
2410
 
2299
2411
 
2300
- def _review_sheet_format(common: Common | None) -> OutputFormat:
2301
- """Pick the review-sheet effective format.
2412
+ class RenderFormat(StrEnum):
2413
+ """``review-sheet`` render targets.
2414
+
2415
+ Local to ``review-sheet`` because no other subcommand emits human prose.
2416
+ Keeping markdown out of the global :class:`OutputFormat` keeps
2417
+ ``--format`` honest on every other subcommand (only renderers that
2418
+ actually support markdown get to advertise it).
2419
+ """
2420
+
2421
+ MARKDOWN = "markdown"
2422
+ JSON = "json"
2423
+
2424
+
2425
+ def _review_sheet_format(common: Common | None) -> RenderFormat:
2426
+ """Pick the review-sheet effective render format.
2302
2427
 
2303
2428
  Default policy diverges from every other subcommand: review-sheet
2304
- output is human-first prose, so ``AUTO`` resolves to ``MARKDOWN`` on
2305
- a TTY (override of the global ``TABLE`` default) and ``JSON``
2306
- off-TTY. Explicit ``--format`` flags pass through unchanged so
2307
- agents can still pin ``--format json`` regardless of TTY state.
2429
+ output is human-first prose, so ``--format auto`` resolves to
2430
+ ``MARKDOWN`` on a TTY (override of the global ``TABLE`` default) and
2431
+ ``JSON`` off-TTY. ``--format json`` always pins JSON; every other
2432
+ ``OutputFormat`` value resolves to ``MARKDOWN`` on a TTY and ``JSON``
2433
+ off-TTY (table/ndjson/csv are not meaningful for the prose shape).
2308
2434
  """
2309
2435
  fmt = _fmt(common)
2310
- if fmt is not OutputFormat.AUTO:
2311
- return fmt
2312
- return OutputFormat.MARKDOWN if sys.stdout.isatty() else OutputFormat.JSON
2436
+ if fmt is OutputFormat.JSON:
2437
+ return RenderFormat.JSON
2438
+ return RenderFormat.MARKDOWN if sys.stdout.isatty() else RenderFormat.JSON
2313
2439
 
2314
2440
 
2315
2441
  @app.command(name="review-sheet")
@@ -2355,6 +2481,9 @@ def review_sheet_cmd(
2355
2481
  """
2356
2482
  _configure(common)
2357
2483
  fmt = _review_sheet_format(common)
2484
+ # Error output follows the global rule (TABLE on TTY, JSON off-TTY) — the
2485
+ # render format is only meaningful for the success-path narrative.
2486
+ error_fmt = _fmt(common)
2358
2487
  settings = _resolve_settings(common)
2359
2488
  repo_path = repo.resolve() if repo is not None else None
2360
2489
 
@@ -2371,7 +2500,7 @@ def review_sheet_cmd(
2371
2500
  message=str(exc),
2372
2501
  hint="run `claude-sql resolve <sha> --all-sources` to see both surfaces",
2373
2502
  )
2374
- emit_error(err, fmt)
2503
+ emit_error(err, error_fmt)
2375
2504
  sys.exit(err.exit_code)
2376
2505
  except LookupError as exc:
2377
2506
  err = ClassifiedError(
@@ -2380,7 +2509,7 @@ def review_sheet_cmd(
2380
2509
  message=str(exc),
2381
2510
  hint="commit has no Claude-Transcript-* trailer and no refs/notes/transcripts entry",
2382
2511
  )
2383
- emit_error(err, fmt)
2512
+ emit_error(err, error_fmt)
2384
2513
  sys.exit(err.exit_code)
2385
2514
  except _binding.GitInvocationError as exc:
2386
2515
  err = ClassifiedError(
@@ -2389,7 +2518,7 @@ def review_sheet_cmd(
2389
2518
  message=f"git invocation failed: {exc.stderr.strip()}",
2390
2519
  hint="check that the commit SHA exists in --repo",
2391
2520
  )
2392
- emit_error(err, fmt)
2521
+ emit_error(err, error_fmt)
2393
2522
  sys.exit(err.exit_code)
2394
2523
 
2395
2524
  # Hand the resolved URI through the override so the worker doesn't
@@ -2412,19 +2541,19 @@ def review_sheet_cmd(
2412
2541
  return
2413
2542
 
2414
2543
  if result.get("refused"):
2415
- if fmt is OutputFormat.MARKDOWN:
2544
+ if fmt is RenderFormat.MARKDOWN:
2416
2545
  metadata = result.get("metadata") or {"commit_sha": commit_sha}
2417
2546
  print(render_refusal_markdown(str(result.get("reason", "")), metadata))
2418
2547
  return
2419
- emit_json(result, fmt=fmt)
2548
+ emit_json(result, fmt=OutputFormat.JSON)
2420
2549
  return
2421
2550
 
2422
2551
  sheet = result.get("sheet") or {}
2423
2552
  metadata = result.get("metadata") or {}
2424
- if fmt is OutputFormat.MARKDOWN:
2553
+ if fmt is RenderFormat.MARKDOWN:
2425
2554
  print(render_markdown(sheet, metadata))
2426
2555
  return
2427
- emit_json({"sheet": sheet, "metadata": metadata}, fmt=fmt)
2556
+ emit_json({"sheet": sheet, "metadata": metadata}, fmt=OutputFormat.JSON)
2428
2557
 
2429
2558
 
2430
2559
  @app.default
@@ -312,7 +312,8 @@ class Settings(BaseSettings):
312
312
  tfidf_top_n_terms: int = 10
313
313
 
314
314
  # ------------------------------------------------------------------
315
- # DuckDB engine tuning — applied as PRAGMAs in cli._open_connection.
315
+ # DuckDB engine tuning — applied as PRAGMAs in cli._open_connection_full
316
+ # and cli._open_connection_introspect.
316
317
  # ------------------------------------------------------------------
317
318
  #: Worker threads. Defaults to ``os.cpu_count()`` so DuckDB uses every
318
319
  #: core; agents and CI runners with limited parallelism can override.
@@ -24,11 +24,17 @@ import polars as pl
24
24
 
25
25
 
26
26
  class OutputFormat(StrEnum):
27
- """Supported output formats.
27
+ """Supported output formats for tabular and structured CLI output.
28
28
 
29
29
  ``AUTO`` resolves to ``TABLE`` when stdout is a TTY and ``JSON`` otherwise.
30
30
  Keeping it a string Enum lets cyclopts parse ``--format json`` without any
31
31
  custom converter.
32
+
33
+ Markdown rendering is intentionally absent: only ``review-sheet`` emits
34
+ human prose, and it owns its own ``--render`` flag (see
35
+ :class:`claude_sql.cli.RenderFormat`). Pulling markdown into this enum
36
+ advertised the format on every subcommand even though no other command
37
+ knows how to produce it.
32
38
  """
33
39
 
34
40
  AUTO = "auto"
@@ -36,7 +42,6 @@ class OutputFormat(StrEnum):
36
42
  JSON = "json"
37
43
  NDJSON = "ndjson"
38
44
  CSV = "csv"
39
- MARKDOWN = "markdown"
40
45
 
41
46
 
42
47
  # Exit codes that agents can rely on. Keep them stable -- wire protocols
@@ -98,8 +103,9 @@ def emit_dataframe(
98
103
  if resolved is OutputFormat.CSV:
99
104
  df.write_csv(sys.stdout)
100
105
  return
101
- # unreachable if OutputFormat stays closed-set
102
- raise ValueError(f"Unsupported format: {resolved}")
106
+ # Defensive: unreachable while OutputFormat stays closed-set
107
+ # (auto/table/json/ndjson/csv). Kept as a guard for future enum additions.
108
+ raise ValueError(f"Unsupported format: {resolved}") # pragma: no cover
103
109
 
104
110
 
105
111
  def emit_json(payload: Any, fmt: OutputFormat | str = OutputFormat.AUTO) -> None: