claude-sql 0.4.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {claude_sql-0.4.0 → claude_sql-0.5.0}/PKG-INFO +34 -11
  2. {claude_sql-0.4.0 → claude_sql-0.5.0}/README.md +31 -9
  3. {claude_sql-0.4.0 → claude_sql-0.5.0}/pyproject.toml +3 -2
  4. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/cli.py +122 -13
  5. claude_sql-0.5.0/src/claude_sql/community_worker.py +662 -0
  6. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/config.py +34 -17
  7. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/sql_views.py +6 -0
  8. claude_sql-0.4.0/src/claude_sql/community_worker.py +0 -306
  9. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/__init__.py +0 -0
  10. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/binding.py +0 -0
  11. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/blind_handover.py +0 -0
  12. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/checkpointer.py +0 -0
  13. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/cluster_worker.py +0 -0
  14. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/embed_worker.py +0 -0
  15. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/freeze.py +0 -0
  16. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/friction_worker.py +0 -0
  17. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/install_source.py +0 -0
  18. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/judge_worker.py +0 -0
  19. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/judges.py +0 -0
  20. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/kappa_worker.py +0 -0
  21. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/llm_worker.py +0 -0
  22. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/logging_setup.py +0 -0
  23. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/output.py +0 -0
  24. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/parquet_shards.py +0 -0
  25. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/retry_queue.py +0 -0
  26. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/review_sheet_render.py +0 -0
  27. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/review_sheet_worker.py +0 -0
  28. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/schemas.py +0 -0
  29. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/session_text.py +0 -0
  30. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/skills_catalog.py +0 -0
  31. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/terms_worker.py +0 -0
  32. {claude_sql-0.4.0 → claude_sql-0.5.0}/src/claude_sql/ungrounded_worker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: claude-sql
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Zero-copy SQL + semantic search + LLM analytics over ~/.claude/ transcripts.
5
5
  Keywords: claude,claude-code,anthropic,duckdb,sql,semantic-search,embeddings,bedrock,transcripts,analytics,observability
6
6
  Author: Laith Al-Saadoon
@@ -23,8 +23,9 @@ Requires-Dist: boto3>=1.42.91
23
23
  Requires-Dist: cyclopts>=4.10.2
24
24
  Requires-Dist: duckdb>=1.5.2
25
25
  Requires-Dist: hdbscan>=0.8.40
26
+ Requires-Dist: igraph>=1.0.0,<2.0
27
+ Requires-Dist: leidenalg>=0.11.0,<0.12
26
28
  Requires-Dist: loguru>=0.7.3
27
- Requires-Dist: networkx>=3.4
28
29
  Requires-Dist: numpy>=2.4.4
29
30
  Requires-Dist: packaging>=26.2
30
31
  Requires-Dist: polars>=1.40.0
@@ -122,7 +123,7 @@ flowchart LR
122
123
  L --> PA["session_classifications/, message_trajectory/,<br/>session_conflicts/, user_friction/<br/>(sharded part-*.parquet)"]
123
124
  P --> C["claude-sql cluster<br/>(UMAP + HDBSCAN)"]
124
125
  C --> PC["clusters + cluster_terms<br/>(c-TF-IDF)"]
125
- P --> CM["claude-sql community<br/>(Louvain over centroids)"]
126
+ P --> CM["claude-sql community<br/>(Leiden + CPM over mutual-kNN centroids)"]
126
127
  CM --> PM["session_communities<br/>parquet"]
127
128
  PA --> AV[analytics views + macros]
128
129
  PC --> AV
@@ -272,7 +273,7 @@ Commands that spend real Bedrock money default to `--dry-run`.
272
273
  | `conflicts` | Per-session stance-conflict detection |
273
274
  | `friction` | Regex + Sonnet 4.6 → status pings, unmet expectations, confusion, etc. |
274
275
  | `cluster` | UMAP → HDBSCAN → c-TF-IDF over message embeddings |
275
- | `community` | Louvain over session centroids |
276
+ | `community` | Leiden + CPM over mutual-kNN session centroids; emits medoid + coherence + resolution profile + `--neighbors-of` |
276
277
  | `skills sync` | Walk `~/.claude/skills/` + `~/.claude/plugins/cache/` → seedable skills catalog |
277
278
  | `skills ls` | List catalog entries, filterable by `--kind` and `--plugin` |
278
279
  | `analyze` | Run the whole pipeline in dependency order |
@@ -325,7 +326,8 @@ Commands that spend real Bedrock money default to `--dry-run`.
325
326
  | `session_conflicts` | per-session stance conflicts | `stance_a`, `stance_b`, `resolution` |
326
327
  | `message_clusters` | cluster id + 2d viz coords | `cluster_id`, `x`, `y`, `is_noise` |
327
328
  | `cluster_terms` | c-TF-IDF top terms per cluster | `cluster_id`, `term`, `weight`, `rank` |
328
- | `session_communities` | Louvain community per session | `community_id`, `size` |
329
+ | `session_communities` | Leiden+CPM community per session | `community_id`, `size`, `is_medoid`, `coherence`, `gamma_used` |
330
+ | `community_profile` | Resolution-profile sidecar (auto-γ runs only) | `gamma`, `n_communities`, `quality`, `plateau_length` |
329
331
  | `user_friction` | one row per classified short user message | `label` (7-way), `rationale`, `source` (`regex` / `llm` / `refused`), `confidence` |
330
332
  | `skills_catalog` | one row per known skill / slash command (seed by `claude-sql skills sync`) | `skill_id`, `name`, `plugin`, `plugin_version`, `source_kind` (`user-skill` / `plugin-skill` / `plugin-command` / `builtin`), `description` |
331
333
  | `skill_usage` | `skill_invocations` ⟕ `skills_catalog` | `source`, `skill_id`, `skill_name`, `plugin`, `is_builtin`, `description` |
@@ -380,7 +382,15 @@ Every option is configurable via `CLAUDE_SQL_*`:
380
382
  | `CLAUDE_SQL_SKILLS_CATALOG_PARQUET_PATH` | `~/.claude/skills_catalog.parquet` | Skills catalog parquet |
381
383
  | `CLAUDE_SQL_USER_SKILLS_DIR` | `~/.claude/skills` | Root scanned for user-installed skills |
382
384
  | `CLAUDE_SQL_PLUGINS_CACHE_DIR` | `~/.claude/plugins/cache` | Root scanned for plugin skills + commands |
383
- | `CLAUDE_SQL_SEED` | `42` | UMAP / HDBSCAN / Louvain determinism |
385
+ | `CLAUDE_SQL_SEED` | `42` | UMAP / HDBSCAN / Leiden determinism |
386
+ | `CLAUDE_SQL_LEIDEN_KNN_K` | `15` | Mutual-kNN k for the session-centroid graph |
387
+ | `CLAUDE_SQL_LEIDEN_EDGE_FLOOR` | `0.3` | Cosine floor below which edges are dropped |
388
+ | `CLAUDE_SQL_LEIDEN_MIN_COMMUNITY_SIZE` | `3` | Communities below this collapse to noise (`-1`) |
389
+ | `CLAUDE_SQL_LEIDEN_RESOLUTION` | unset (auto) | Explicit CPM γ; skips the resolution profile |
390
+ | `CLAUDE_SQL_LEIDEN_RESOLUTION_RANGE_LO` | `0.05` | Lower bound for `Optimiser.resolution_profile` bisection |
391
+ | `CLAUDE_SQL_LEIDEN_RESOLUTION_RANGE_HI` | `0.95` | Upper bound for the bisection |
392
+ | `CLAUDE_SQL_LEIDEN_N_ITERATIONS` | `-1` | Iterate to convergence; `2` is leidenalg's default |
393
+ | `CLAUDE_SQL_COMMUNITY_PROFILE_PARQUET_PATH` | `~/.claude/community_profile.parquet` | Resolution-profile sidecar path |
384
394
 
385
395
  ## Development
386
396
 
@@ -497,12 +507,25 @@ See `docs/adr/0015-stack-modernization.md` and
497
507
  with adaptive thinking on. Pydantic v2 schemas are flattened (inline
498
508
  `$ref`, inject `additionalProperties: false`, strip the numeric /
499
509
  string constraints the validator rejects from Draft 2020-12).
500
- - **Determinism.** UMAP, HDBSCAN, and Louvain all seed from
510
+ - **Determinism.** UMAP, HDBSCAN, and Leiden all seed from
501
511
  `CLAUDE_SQL_SEED=42` (default) so cluster IDs and community IDs are
502
- stable across reruns.
503
- - **Louvain = `networkx`.** `networkx.community.louvain_communities`,
504
- built into `networkx >= 3.4`. The abandoned `python-louvain` package
505
- is not used.
512
+ stable across reruns. The Leiden seed flows into both
513
+ `leidenalg.find_partition(seed=...)` and `Optimiser.set_rng_seed(...)`
514
+ for the resolution-profile bisection same seed + same input ⇒
515
+ byte-equal parquets across runs.
516
+ - **Communities = `leidenalg` + CPM.** Reference Leiden implementation
517
+ (`leidenalg>=0.11.0`) over an `igraph>=1.0` mutual-kNN cosine graph
518
+ (k=15, edge floor 0.3) of session centroids. CPM γ has direct cosine
519
+ semantics (internal density ≥ γ, external ≤ γ); auto-γ via
520
+ `Optimiser.resolution_profile` + longest-plateau picker; warn-only
521
+ connectivity check. Output is signal-rich: `is_medoid`, `coherence`,
522
+ `gamma_used` per row, plus a `community_profile` sidecar with one row
523
+ per γ tested, so an LLM agent can ask "what γ would yield 50
524
+ communities?" without rerunning Leiden. Top terms per community come
525
+ from the live `community_top_topics(cid, n)` macro composed from
526
+ `cluster_terms`, not a frozen column. See
527
+ [`docs/research_notes.md`](docs/research_notes.md) for the
528
+ Louvain → Leiden+CPM swap rationale.
506
529
  - **Hybrid friction pipeline.** A hand-curated regex bank catches the
507
530
  unambiguous `status_ping` / `interruption` / `correction` cases at
508
531
  zero Bedrock cost; the ambiguous class — especially
@@ -77,7 +77,7 @@ flowchart LR
77
77
  L --> PA["session_classifications/, message_trajectory/,<br/>session_conflicts/, user_friction/<br/>(sharded part-*.parquet)"]
78
78
  P --> C["claude-sql cluster<br/>(UMAP + HDBSCAN)"]
79
79
  C --> PC["clusters + cluster_terms<br/>(c-TF-IDF)"]
80
- P --> CM["claude-sql community<br/>(Louvain over centroids)"]
80
+ P --> CM["claude-sql community<br/>(Leiden + CPM over mutual-kNN centroids)"]
81
81
  CM --> PM["session_communities<br/>parquet"]
82
82
  PA --> AV[analytics views + macros]
83
83
  PC --> AV
@@ -227,7 +227,7 @@ Commands that spend real Bedrock money default to `--dry-run`.
227
227
  | `conflicts` | Per-session stance-conflict detection |
228
228
  | `friction` | Regex + Sonnet 4.6 → status pings, unmet expectations, confusion, etc. |
229
229
  | `cluster` | UMAP → HDBSCAN → c-TF-IDF over message embeddings |
230
- | `community` | Louvain over session centroids |
230
+ | `community` | Leiden + CPM over mutual-kNN session centroids; emits medoid + coherence + resolution profile + `--neighbors-of` |
231
231
  | `skills sync` | Walk `~/.claude/skills/` + `~/.claude/plugins/cache/` → seedable skills catalog |
232
232
  | `skills ls` | List catalog entries, filterable by `--kind` and `--plugin` |
233
233
  | `analyze` | Run the whole pipeline in dependency order |
@@ -280,7 +280,8 @@ Commands that spend real Bedrock money default to `--dry-run`.
280
280
  | `session_conflicts` | per-session stance conflicts | `stance_a`, `stance_b`, `resolution` |
281
281
  | `message_clusters` | cluster id + 2d viz coords | `cluster_id`, `x`, `y`, `is_noise` |
282
282
  | `cluster_terms` | c-TF-IDF top terms per cluster | `cluster_id`, `term`, `weight`, `rank` |
283
- | `session_communities` | Louvain community per session | `community_id`, `size` |
283
+ | `session_communities` | Leiden+CPM community per session | `community_id`, `size`, `is_medoid`, `coherence`, `gamma_used` |
284
+ | `community_profile` | Resolution-profile sidecar (auto-γ runs only) | `gamma`, `n_communities`, `quality`, `plateau_length` |
284
285
  | `user_friction` | one row per classified short user message | `label` (7-way), `rationale`, `source` (`regex` / `llm` / `refused`), `confidence` |
285
286
  | `skills_catalog` | one row per known skill / slash command (seed by `claude-sql skills sync`) | `skill_id`, `name`, `plugin`, `plugin_version`, `source_kind` (`user-skill` / `plugin-skill` / `plugin-command` / `builtin`), `description` |
286
287
  | `skill_usage` | `skill_invocations` ⟕ `skills_catalog` | `source`, `skill_id`, `skill_name`, `plugin`, `is_builtin`, `description` |
@@ -335,7 +336,15 @@ Every option is configurable via `CLAUDE_SQL_*`:
335
336
  | `CLAUDE_SQL_SKILLS_CATALOG_PARQUET_PATH` | `~/.claude/skills_catalog.parquet` | Skills catalog parquet |
336
337
  | `CLAUDE_SQL_USER_SKILLS_DIR` | `~/.claude/skills` | Root scanned for user-installed skills |
337
338
  | `CLAUDE_SQL_PLUGINS_CACHE_DIR` | `~/.claude/plugins/cache` | Root scanned for plugin skills + commands |
338
- | `CLAUDE_SQL_SEED` | `42` | UMAP / HDBSCAN / Louvain determinism |
339
+ | `CLAUDE_SQL_SEED` | `42` | UMAP / HDBSCAN / Leiden determinism |
340
+ | `CLAUDE_SQL_LEIDEN_KNN_K` | `15` | Mutual-kNN k for the session-centroid graph |
341
+ | `CLAUDE_SQL_LEIDEN_EDGE_FLOOR` | `0.3` | Cosine floor below which edges are dropped |
342
+ | `CLAUDE_SQL_LEIDEN_MIN_COMMUNITY_SIZE` | `3` | Communities below this collapse to noise (`-1`) |
343
+ | `CLAUDE_SQL_LEIDEN_RESOLUTION` | unset (auto) | Explicit CPM γ; skips the resolution profile |
344
+ | `CLAUDE_SQL_LEIDEN_RESOLUTION_RANGE_LO` | `0.05` | Lower bound for `Optimiser.resolution_profile` bisection |
345
+ | `CLAUDE_SQL_LEIDEN_RESOLUTION_RANGE_HI` | `0.95` | Upper bound for the bisection |
346
+ | `CLAUDE_SQL_LEIDEN_N_ITERATIONS` | `-1` | Iterate to convergence; `2` is leidenalg's default |
347
+ | `CLAUDE_SQL_COMMUNITY_PROFILE_PARQUET_PATH` | `~/.claude/community_profile.parquet` | Resolution-profile sidecar path |
339
348
 
340
349
  ## Development
341
350
 
@@ -452,12 +461,25 @@ See `docs/adr/0015-stack-modernization.md` and
452
461
  with adaptive thinking on. Pydantic v2 schemas are flattened (inline
453
462
  `$ref`, inject `additionalProperties: false`, strip the numeric /
454
463
  string constraints the validator rejects from Draft 2020-12).
455
- - **Determinism.** UMAP, HDBSCAN, and Louvain all seed from
464
+ - **Determinism.** UMAP, HDBSCAN, and Leiden all seed from
456
465
  `CLAUDE_SQL_SEED=42` (default) so cluster IDs and community IDs are
457
- stable across reruns.
458
- - **Louvain = `networkx`.** `networkx.community.louvain_communities`,
459
- built into `networkx >= 3.4`. The abandoned `python-louvain` package
460
- is not used.
466
+ stable across reruns. The Leiden seed flows into both
467
+ `leidenalg.find_partition(seed=...)` and `Optimiser.set_rng_seed(...)`
468
+ for the resolution-profile bisection same seed + same input ⇒
469
+ byte-equal parquets across runs.
470
+ - **Communities = `leidenalg` + CPM.** Reference Leiden implementation
471
+ (`leidenalg>=0.11.0`) over an `igraph>=1.0` mutual-kNN cosine graph
472
+ (k=15, edge floor 0.3) of session centroids. CPM γ has direct cosine
473
+ semantics (internal density ≥ γ, external ≤ γ); auto-γ via
474
+ `Optimiser.resolution_profile` + longest-plateau picker; warn-only
475
+ connectivity check. Output is signal-rich: `is_medoid`, `coherence`,
476
+ `gamma_used` per row, plus a `community_profile` sidecar with one row
477
+ per γ tested, so an LLM agent can ask "what γ would yield 50
478
+ communities?" without rerunning Leiden. Top terms per community come
479
+ from the live `community_top_topics(cid, n)` macro composed from
480
+ `cluster_terms`, not a frozen column. See
481
+ [`docs/research_notes.md`](docs/research_notes.md) for the
482
+ Louvain → Leiden+CPM swap rationale.
461
483
  - **Hybrid friction pipeline.** A hand-curated regex bank catches the
462
484
  unambiguous `status_ping` / `interruption` / `correction` cases at
463
485
  zero Bedrock cost; the ambiguous class — especially
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "claude-sql"
3
- version = "0.4.0"
3
+ version = "0.5.0"
4
4
  description = "Zero-copy SQL + semantic search + LLM analytics over ~/.claude/ transcripts."
5
5
  readme = "README.md"
6
6
  license = { text = "Apache-2.0" }
@@ -33,8 +33,9 @@ dependencies = [
33
33
  "cyclopts>=4.10.2",
34
34
  "duckdb>=1.5.2",
35
35
  "hdbscan>=0.8.40",
36
+ "igraph>=1.0.0,<2.0",
37
+ "leidenalg>=0.11.0,<0.12",
36
38
  "loguru>=0.7.3",
37
- "networkx>=3.4",
38
39
  "numpy>=2.4.4",
39
40
  "packaging>=26.2",
40
41
  "polars>=1.40.0",
@@ -50,7 +50,11 @@ from claude_sql import (
50
50
  ungrounded_worker as _ungrounded_worker,
51
51
  )
52
52
  from claude_sql.cluster_worker import run_clustering
53
- from claude_sql.community_worker import run_communities
53
+ from claude_sql.community_worker import (
54
+ ResolutionLevel,
55
+ neighbors_of,
56
+ run_communities,
57
+ )
54
58
  from claude_sql.config import Settings
55
59
  from claude_sql.embed_worker import embed_query, run_backfill
56
60
  from claude_sql.friction_worker import detect_user_friction
@@ -96,7 +100,7 @@ Surfaces at a glance
96
100
  embed / search Cohere Embed v4 + HNSW cosine search
97
101
  classify / trajectory / Sonnet 4.6 analytics -- each defaults to
98
102
  conflicts / friction --dry-run; pass --no-dry-run to spend
99
- cluster / terms / community UMAP+HDBSCAN, c-TF-IDF, Louvain
103
+ cluster / terms / community UMAP+HDBSCAN, c-TF-IDF, Leiden+CPM
100
104
  analyze composite pipeline over every stage above
101
105
 
102
106
  Flag placement (important for agents)
@@ -678,6 +682,7 @@ def list_cache(*, common: Common | None = None) -> None:
678
682
  _describe_cache_entry("message_clusters", settings.clusters_parquet_path),
679
683
  _describe_cache_entry("cluster_terms", settings.cluster_terms_parquet_path),
680
684
  _describe_cache_entry("session_communities", settings.communities_parquet_path),
685
+ _describe_cache_entry("community_profile", settings.community_profile_parquet_path),
681
686
  _describe_cache_entry("user_friction", settings.user_friction_parquet_path),
682
687
  _describe_cache_entry("skills_catalog", settings.skills_catalog_parquet_path),
683
688
  _describe_checkpoint_entry(settings.checkpoint_db_path),
@@ -1480,33 +1485,137 @@ def terms(*, force: bool = False, common: Common | None = None) -> None:
1480
1485
 
1481
1486
 
1482
1487
  @app.command
1483
- def community(*, force: bool = False, common: Common | None = None) -> None:
1484
- """Session-level Louvain community detection over a cosine-similarity graph.
1488
+ def community(
1489
+ *,
1490
+ force: bool = False,
1491
+ gamma: float | None = None,
1492
+ resolution: ResolutionLevel = "medium",
1493
+ neighbors_of_session: Annotated[str | None, Parameter(name=["--neighbors-of"])] = None,
1494
+ top_k: int = 15,
1495
+ dry_run: bool = False,
1496
+ common: Common | None = None,
1497
+ ) -> None:
1498
+ """Session-level Leiden+CPM community detection over a mutual-kNN cosine graph.
1485
1499
 
1486
1500
  Prereq: ``embed`` (needs the embeddings parquet).
1487
1501
 
1488
1502
  Output columns (``session_communities`` view)
1489
1503
  ---------------------------------------------
1490
- session_id, community_id (int; -1 = isolated).
1491
-
1492
- Method: build a session-centroid-cosine KNN graph, then run
1493
- ``networkx.community.louvain_communities`` (networkx ≥3.4).
1494
- Cost: zero. Seeded by ``CLAUDE_SQL_SEED=42``.
1504
+ session_id, community_id (int; -1 = noise / sub-min-size),
1505
+ size (int), is_medoid (bool — best representative session of its
1506
+ community), coherence (float — mean intra-community cosine),
1507
+ gamma_used (float — the CPM γ used at run time).
1508
+
1509
+ Sidecar (``community_profile`` view, written when auto-γ runs)
1510
+ --------------------------------------------------------------
1511
+ gamma, n_communities, quality, plateau_length — one row per γ tested
1512
+ by ``leidenalg.Optimiser.resolution_profile``. Lets the agent ask
1513
+ "what γ would yield 50 communities?" without rerunning Leiden.
1514
+
1515
+ Method: build a session-centroid mutual-kNN graph (k=15, edge floor 0.3
1516
+ by default), then ``leidenalg.find_partition`` with
1517
+ ``CPMVertexPartition``. CPM γ is auto-picked from the resolution
1518
+ profile via the longest-plateau heuristic (Traag et al.); the
1519
+ ``--resolution {coarse, medium, fine}`` flag picks alternate plateaus
1520
+ of the same profile (no extra Leiden runs).
1521
+
1522
+ Cost: zero (CPU only). Seeded by ``CLAUDE_SQL_SEED=42``. For top
1523
+ terms per community, run
1524
+ ``claude-sql query "SELECT * FROM community_top_topics(<cid>, 10)"``.
1495
1525
 
1496
1526
  Flags
1497
1527
  -----
1498
- --force Re-detect even if session_communities.parquet exists.
1528
+ --force Re-detect even if session_communities.parquet exists.
1529
+ --gamma FLOAT Explicit CPM γ; skips the resolution profile + sidecar.
1530
+ Mutually exclusive with --resolution / --force / --neighbors-of.
1531
+ --resolution {coarse,medium,fine}
1532
+ Pick a γ plateau without specifying a value.
1533
+ Default 'medium' = longest plateau. Ignored if --gamma set.
1534
+ --neighbors-of SID Early-return path: skip Leiden, return top-k cosine
1535
+ neighbors of SID. Reads centroids on the fly +
1536
+ joins session_communities.parquet if it exists.
1537
+ --top-k N Used with --neighbors-of (default 15).
1538
+ --dry-run Plan-only: count candidate sessions via SQL, do not
1539
+ run Leiden. Honors agent JSON output for free.
1540
+
1541
+ Exit codes
1542
+ ----------
1543
+ 0 success
1544
+ 64 invalid input (e.g., --neighbors-of combined with partition flags)
1499
1545
  """
1500
1546
  _configure(common)
1501
1547
  settings = _resolve_settings(common)
1548
+ fmt = _fmt(common)
1549
+
1550
+ if neighbors_of_session is not None and (gamma is not None or force or dry_run):
1551
+ err = ClassifiedError(
1552
+ kind="invalid_input",
1553
+ exit_code=EXIT_CODES["invalid_input"],
1554
+ message=(
1555
+ "--neighbors-of is mutually exclusive with --gamma, --force, "
1556
+ "and --dry-run; pass only --neighbors-of and --top-k."
1557
+ ),
1558
+ hint="Run `claude-sql community --neighbors-of <sid> --top-k 15` alone.",
1559
+ )
1560
+ emit_error(err, fmt)
1561
+ sys.exit(err.exit_code)
1562
+
1502
1563
  con = _open_connection(settings)
1503
1564
  try:
1504
- stats = run_communities(con, settings, force=force)
1565
+ if neighbors_of_session is not None:
1566
+ df = neighbors_of(con, settings, neighbors_of_session, top_k=top_k)
1567
+ emit_dataframe(df, fmt, table_rows=top_k)
1568
+ return
1569
+
1570
+ if dry_run:
1571
+ row = con.execute(
1572
+ """
1573
+ SELECT COUNT(DISTINCT m.session_id) AS candidate_sessions
1574
+ FROM read_parquet(?) e
1575
+ JOIN messages m
1576
+ ON CAST(m.uuid AS VARCHAR) = e.uuid
1577
+ """,
1578
+ [str(settings.embeddings_parquet_path)],
1579
+ ).fetchone()
1580
+ n = int(row[0]) if row else 0
1581
+ plan: dict[str, object] = {
1582
+ "pipeline": "community",
1583
+ "candidate_sessions": n,
1584
+ "knn_k": settings.leiden_knn_k,
1585
+ "edge_floor": settings.leiden_edge_floor,
1586
+ "min_community_size": settings.leiden_min_community_size,
1587
+ "gamma": gamma if gamma is not None else "auto",
1588
+ "resolution": resolution,
1589
+ "would_write": [
1590
+ str(settings.communities_parquet_path),
1591
+ ]
1592
+ + ([] if gamma is not None else [str(settings.community_profile_parquet_path)]),
1593
+ "dry_run": True,
1594
+ }
1595
+ emit_json(plan, fmt)
1596
+ return
1597
+
1598
+ stats = run_communities(
1599
+ con,
1600
+ settings,
1601
+ force=force,
1602
+ gamma=gamma,
1603
+ resolution=resolution,
1604
+ )
1605
+ import math
1606
+
1607
+ quality_val = stats["quality"]
1608
+ quality_log = (
1609
+ quality_val if isinstance(quality_val, float) and not math.isnan(quality_val) else 0.0
1610
+ )
1505
1611
  logger.info(
1506
- "community: {} sessions grouped into {} communities",
1612
+ "community: {} sessions, {} communities (γ={:.4f}, quality={:.4f})",
1507
1613
  stats["sessions"],
1508
1614
  stats["communities"],
1615
+ stats["gamma_used"],
1616
+ quality_log,
1509
1617
  )
1618
+ _emit_worker_result(stats, common, pipeline="community")
1510
1619
  finally:
1511
1620
  con.close()
1512
1621
 
@@ -1538,7 +1647,7 @@ def analyze(
1538
1647
  1. embed (Bedrock Cohere Embed v4; honors --dry-run)
1539
1648
  2. cluster (UMAP+HDBSCAN; zero-cost; --force_cluster to rebuild)
1540
1649
  3. terms (c-TF-IDF labels for clusters; zero-cost)
1541
- 4. community (Louvain; zero-cost; --force_community to rebuild)
1650
+ 4. community (Leiden+CPM; zero-cost; --force-community to rebuild)
1542
1651
  5. classify (Sonnet 4.6; honors --dry-run)
1543
1652
  6. trajectory (Sonnet 4.6; honors --dry-run)
1544
1653
  7. conflicts (Sonnet 4.6; honors --dry-run)