claude-sql 0.4.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {claude_sql-0.4.0 → claude_sql-0.6.0}/PKG-INFO +35 -11
- {claude_sql-0.4.0 → claude_sql-0.6.0}/README.md +32 -9
- {claude_sql-0.4.0 → claude_sql-0.6.0}/pyproject.toml +3 -2
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/cli.py +325 -87
- claude_sql-0.6.0/src/claude_sql/community_worker.py +662 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/config.py +36 -18
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/output.py +10 -4
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/sql_views.py +576 -257
- claude_sql-0.4.0/src/claude_sql/community_worker.py +0 -306
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/__init__.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/binding.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/blind_handover.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/checkpointer.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/cluster_worker.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/embed_worker.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/freeze.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/friction_worker.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/install_source.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/judge_worker.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/judges.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/kappa_worker.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/llm_worker.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/logging_setup.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/parquet_shards.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/retry_queue.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/review_sheet_render.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/review_sheet_worker.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/schemas.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/session_text.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/skills_catalog.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/terms_worker.py +0 -0
- {claude_sql-0.4.0 → claude_sql-0.6.0}/src/claude_sql/ungrounded_worker.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: claude-sql
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Zero-copy SQL + semantic search + LLM analytics over ~/.claude/ transcripts.
|
|
5
5
|
Keywords: claude,claude-code,anthropic,duckdb,sql,semantic-search,embeddings,bedrock,transcripts,analytics,observability
|
|
6
6
|
Author: Laith Al-Saadoon
|
|
@@ -23,8 +23,9 @@ Requires-Dist: boto3>=1.42.91
|
|
|
23
23
|
Requires-Dist: cyclopts>=4.10.2
|
|
24
24
|
Requires-Dist: duckdb>=1.5.2
|
|
25
25
|
Requires-Dist: hdbscan>=0.8.40
|
|
26
|
+
Requires-Dist: igraph>=1.0.0,<2.0
|
|
27
|
+
Requires-Dist: leidenalg>=0.11.0,<0.12
|
|
26
28
|
Requires-Dist: loguru>=0.7.3
|
|
27
|
-
Requires-Dist: networkx>=3.4
|
|
28
29
|
Requires-Dist: numpy>=2.4.4
|
|
29
30
|
Requires-Dist: packaging>=26.2
|
|
30
31
|
Requires-Dist: polars>=1.40.0
|
|
@@ -122,7 +123,7 @@ flowchart LR
|
|
|
122
123
|
L --> PA["session_classifications/, message_trajectory/,<br/>session_conflicts/, user_friction/<br/>(sharded part-*.parquet)"]
|
|
123
124
|
P --> C["claude-sql cluster<br/>(UMAP + HDBSCAN)"]
|
|
124
125
|
C --> PC["clusters + cluster_terms<br/>(c-TF-IDF)"]
|
|
125
|
-
P --> CM["claude-sql community<br/>(
|
|
126
|
+
P --> CM["claude-sql community<br/>(Leiden + CPM over mutual-kNN centroids)"]
|
|
126
127
|
CM --> PM["session_communities<br/>parquet"]
|
|
127
128
|
PA --> AV[analytics views + macros]
|
|
128
129
|
PC --> AV
|
|
@@ -272,7 +273,7 @@ Commands that spend real Bedrock money default to `--dry-run`.
|
|
|
272
273
|
| `conflicts` | Per-session stance-conflict detection |
|
|
273
274
|
| `friction` | Regex + Sonnet 4.6 → status pings, unmet expectations, confusion, etc. |
|
|
274
275
|
| `cluster` | UMAP → HDBSCAN → c-TF-IDF over message embeddings |
|
|
275
|
-
| `community` |
|
|
276
|
+
| `community` | Leiden + CPM over mutual-kNN session centroids; emits medoid + coherence + resolution profile + `--neighbors-of` |
|
|
276
277
|
| `skills sync` | Walk `~/.claude/skills/` + `~/.claude/plugins/cache/` → seedable skills catalog |
|
|
277
278
|
| `skills ls` | List catalog entries, filterable by `--kind` and `--plugin` |
|
|
278
279
|
| `analyze` | Run the whole pipeline in dependency order |
|
|
@@ -325,7 +326,8 @@ Commands that spend real Bedrock money default to `--dry-run`.
|
|
|
325
326
|
| `session_conflicts` | per-session stance conflicts | `stance_a`, `stance_b`, `resolution` |
|
|
326
327
|
| `message_clusters` | cluster id + 2d viz coords | `cluster_id`, `x`, `y`, `is_noise` |
|
|
327
328
|
| `cluster_terms` | c-TF-IDF top terms per cluster | `cluster_id`, `term`, `weight`, `rank` |
|
|
328
|
-
| `session_communities` |
|
|
329
|
+
| `session_communities` | Leiden+CPM community per session | `community_id`, `size`, `is_medoid`, `coherence`, `gamma_used` |
|
|
330
|
+
| `community_profile` | Resolution-profile sidecar (auto-γ runs only) | `gamma`, `n_communities`, `quality`, `plateau_length` |
|
|
329
331
|
| `user_friction` | one row per classified short user message | `label` (7-way), `rationale`, `source` (`regex` / `llm` / `refused`), `confidence` |
|
|
330
332
|
| `skills_catalog` | one row per known skill / slash command (seed by `claude-sql skills sync`) | `skill_id`, `name`, `plugin`, `plugin_version`, `source_kind` (`user-skill` / `plugin-skill` / `plugin-command` / `builtin`), `description` |
|
|
331
333
|
| `skill_usage` | `skill_invocations` ⟕ `skills_catalog` | `source`, `skill_id`, `skill_name`, `plugin`, `is_builtin`, `description` |
|
|
@@ -334,6 +336,7 @@ Commands that spend real Bedrock money default to `--dry-run`.
|
|
|
334
336
|
|
|
335
337
|
| Macro | Signature | What it does |
|
|
336
338
|
|---|---|---|
|
|
339
|
+
| `ago(interval_text)` | scalar → `TIMESTAMP` | `current_timestamp - INTERVAL <text>` -- e.g. `WHERE ts >= ago('30 days')` |
|
|
337
340
|
| `model_used(sid)` | scalar → `VARCHAR` | Latest `model` observed in the session |
|
|
338
341
|
| `cost_estimate(sid)` | scalar → `DOUBLE` | USD spend (dated model IDs prefix-matched) |
|
|
339
342
|
| `tool_rank(last_n_days)` | table | Tool-use leaderboard over a window |
|
|
@@ -380,7 +383,15 @@ Every option is configurable via `CLAUDE_SQL_*`:
|
|
|
380
383
|
| `CLAUDE_SQL_SKILLS_CATALOG_PARQUET_PATH` | `~/.claude/skills_catalog.parquet` | Skills catalog parquet |
|
|
381
384
|
| `CLAUDE_SQL_USER_SKILLS_DIR` | `~/.claude/skills` | Root scanned for user-installed skills |
|
|
382
385
|
| `CLAUDE_SQL_PLUGINS_CACHE_DIR` | `~/.claude/plugins/cache` | Root scanned for plugin skills + commands |
|
|
383
|
-
| `CLAUDE_SQL_SEED` | `42` | UMAP / HDBSCAN /
|
|
386
|
+
| `CLAUDE_SQL_SEED` | `42` | UMAP / HDBSCAN / Leiden determinism |
|
|
387
|
+
| `CLAUDE_SQL_LEIDEN_KNN_K` | `15` | Mutual-kNN k for the session-centroid graph |
|
|
388
|
+
| `CLAUDE_SQL_LEIDEN_EDGE_FLOOR` | `0.3` | Cosine floor below which edges are dropped |
|
|
389
|
+
| `CLAUDE_SQL_LEIDEN_MIN_COMMUNITY_SIZE` | `3` | Communities below this collapse to noise (`-1`) |
|
|
390
|
+
| `CLAUDE_SQL_LEIDEN_RESOLUTION` | unset (auto) | Explicit CPM γ; skips the resolution profile |
|
|
391
|
+
| `CLAUDE_SQL_LEIDEN_RESOLUTION_RANGE_LO` | `0.05` | Lower bound for `Optimiser.resolution_profile` bisection |
|
|
392
|
+
| `CLAUDE_SQL_LEIDEN_RESOLUTION_RANGE_HI` | `0.95` | Upper bound for the bisection |
|
|
393
|
+
| `CLAUDE_SQL_LEIDEN_N_ITERATIONS` | `-1` | Iterate to convergence; `2` is leidenalg's default |
|
|
394
|
+
| `CLAUDE_SQL_COMMUNITY_PROFILE_PARQUET_PATH` | `~/.claude/community_profile.parquet` | Resolution-profile sidecar path |
|
|
384
395
|
|
|
385
396
|
## Development
|
|
386
397
|
|
|
@@ -497,12 +508,25 @@ See `docs/adr/0015-stack-modernization.md` and
|
|
|
497
508
|
with adaptive thinking on. Pydantic v2 schemas are flattened (inline
|
|
498
509
|
`$ref`, inject `additionalProperties: false`, strip the numeric /
|
|
499
510
|
string constraints the validator rejects from Draft 2020-12).
|
|
500
|
-
- **Determinism.** UMAP, HDBSCAN, and
|
|
511
|
+
- **Determinism.** UMAP, HDBSCAN, and Leiden all seed from
|
|
501
512
|
`CLAUDE_SQL_SEED=42` (default) so cluster IDs and community IDs are
|
|
502
|
-
stable across reruns.
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
513
|
+
stable across reruns. The Leiden seed flows into both
|
|
514
|
+
`leidenalg.find_partition(seed=...)` and `Optimiser.set_rng_seed(...)`
|
|
515
|
+
for the resolution-profile bisection — same seed + same input ⇒
|
|
516
|
+
byte-equal parquets across runs.
|
|
517
|
+
- **Communities = `leidenalg` + CPM.** Reference Leiden implementation
|
|
518
|
+
(`leidenalg>=0.11.0`) over an `igraph>=1.0` mutual-kNN cosine graph
|
|
519
|
+
(k=15, edge floor 0.3) of session centroids. CPM γ has direct cosine
|
|
520
|
+
semantics (internal density ≥ γ, external ≤ γ); auto-γ via
|
|
521
|
+
`Optimiser.resolution_profile` + longest-plateau picker; warn-only
|
|
522
|
+
connectivity check. Output is signal-rich: `is_medoid`, `coherence`,
|
|
523
|
+
`gamma_used` per row, plus a `community_profile` sidecar with one row
|
|
524
|
+
per γ tested, so an LLM agent can ask "what γ would yield 50
|
|
525
|
+
communities?" without rerunning Leiden. Top terms per community come
|
|
526
|
+
from the live `community_top_topics(cid, n)` macro composed from
|
|
527
|
+
`cluster_terms`, not a frozen column. See
|
|
528
|
+
[`docs/research_notes.md`](docs/research_notes.md) for the
|
|
529
|
+
Louvain → Leiden+CPM swap rationale.
|
|
506
530
|
- **Hybrid friction pipeline.** A hand-curated regex bank catches the
|
|
507
531
|
unambiguous `status_ping` / `interruption` / `correction` cases at
|
|
508
532
|
zero Bedrock cost; the ambiguous class — especially
|
|
@@ -77,7 +77,7 @@ flowchart LR
|
|
|
77
77
|
L --> PA["session_classifications/, message_trajectory/,<br/>session_conflicts/, user_friction/<br/>(sharded part-*.parquet)"]
|
|
78
78
|
P --> C["claude-sql cluster<br/>(UMAP + HDBSCAN)"]
|
|
79
79
|
C --> PC["clusters + cluster_terms<br/>(c-TF-IDF)"]
|
|
80
|
-
P --> CM["claude-sql community<br/>(
|
|
80
|
+
P --> CM["claude-sql community<br/>(Leiden + CPM over mutual-kNN centroids)"]
|
|
81
81
|
CM --> PM["session_communities<br/>parquet"]
|
|
82
82
|
PA --> AV[analytics views + macros]
|
|
83
83
|
PC --> AV
|
|
@@ -227,7 +227,7 @@ Commands that spend real Bedrock money default to `--dry-run`.
|
|
|
227
227
|
| `conflicts` | Per-session stance-conflict detection |
|
|
228
228
|
| `friction` | Regex + Sonnet 4.6 → status pings, unmet expectations, confusion, etc. |
|
|
229
229
|
| `cluster` | UMAP → HDBSCAN → c-TF-IDF over message embeddings |
|
|
230
|
-
| `community` |
|
|
230
|
+
| `community` | Leiden + CPM over mutual-kNN session centroids; emits medoid + coherence + resolution profile + `--neighbors-of` |
|
|
231
231
|
| `skills sync` | Walk `~/.claude/skills/` + `~/.claude/plugins/cache/` → seedable skills catalog |
|
|
232
232
|
| `skills ls` | List catalog entries, filterable by `--kind` and `--plugin` |
|
|
233
233
|
| `analyze` | Run the whole pipeline in dependency order |
|
|
@@ -280,7 +280,8 @@ Commands that spend real Bedrock money default to `--dry-run`.
|
|
|
280
280
|
| `session_conflicts` | per-session stance conflicts | `stance_a`, `stance_b`, `resolution` |
|
|
281
281
|
| `message_clusters` | cluster id + 2d viz coords | `cluster_id`, `x`, `y`, `is_noise` |
|
|
282
282
|
| `cluster_terms` | c-TF-IDF top terms per cluster | `cluster_id`, `term`, `weight`, `rank` |
|
|
283
|
-
| `session_communities` |
|
|
283
|
+
| `session_communities` | Leiden+CPM community per session | `community_id`, `size`, `is_medoid`, `coherence`, `gamma_used` |
|
|
284
|
+
| `community_profile` | Resolution-profile sidecar (auto-γ runs only) | `gamma`, `n_communities`, `quality`, `plateau_length` |
|
|
284
285
|
| `user_friction` | one row per classified short user message | `label` (7-way), `rationale`, `source` (`regex` / `llm` / `refused`), `confidence` |
|
|
285
286
|
| `skills_catalog` | one row per known skill / slash command (seed by `claude-sql skills sync`) | `skill_id`, `name`, `plugin`, `plugin_version`, `source_kind` (`user-skill` / `plugin-skill` / `plugin-command` / `builtin`), `description` |
|
|
286
287
|
| `skill_usage` | `skill_invocations` ⟕ `skills_catalog` | `source`, `skill_id`, `skill_name`, `plugin`, `is_builtin`, `description` |
|
|
@@ -289,6 +290,7 @@ Commands that spend real Bedrock money default to `--dry-run`.
|
|
|
289
290
|
|
|
290
291
|
| Macro | Signature | What it does |
|
|
291
292
|
|---|---|---|
|
|
293
|
+
| `ago(interval_text)` | scalar → `TIMESTAMP` | `current_timestamp - INTERVAL <text>` -- e.g. `WHERE ts >= ago('30 days')` |
|
|
292
294
|
| `model_used(sid)` | scalar → `VARCHAR` | Latest `model` observed in the session |
|
|
293
295
|
| `cost_estimate(sid)` | scalar → `DOUBLE` | USD spend (dated model IDs prefix-matched) |
|
|
294
296
|
| `tool_rank(last_n_days)` | table | Tool-use leaderboard over a window |
|
|
@@ -335,7 +337,15 @@ Every option is configurable via `CLAUDE_SQL_*`:
|
|
|
335
337
|
| `CLAUDE_SQL_SKILLS_CATALOG_PARQUET_PATH` | `~/.claude/skills_catalog.parquet` | Skills catalog parquet |
|
|
336
338
|
| `CLAUDE_SQL_USER_SKILLS_DIR` | `~/.claude/skills` | Root scanned for user-installed skills |
|
|
337
339
|
| `CLAUDE_SQL_PLUGINS_CACHE_DIR` | `~/.claude/plugins/cache` | Root scanned for plugin skills + commands |
|
|
338
|
-
| `CLAUDE_SQL_SEED` | `42` | UMAP / HDBSCAN /
|
|
340
|
+
| `CLAUDE_SQL_SEED` | `42` | UMAP / HDBSCAN / Leiden determinism |
|
|
341
|
+
| `CLAUDE_SQL_LEIDEN_KNN_K` | `15` | Mutual-kNN k for the session-centroid graph |
|
|
342
|
+
| `CLAUDE_SQL_LEIDEN_EDGE_FLOOR` | `0.3` | Cosine floor below which edges are dropped |
|
|
343
|
+
| `CLAUDE_SQL_LEIDEN_MIN_COMMUNITY_SIZE` | `3` | Communities below this collapse to noise (`-1`) |
|
|
344
|
+
| `CLAUDE_SQL_LEIDEN_RESOLUTION` | unset (auto) | Explicit CPM γ; skips the resolution profile |
|
|
345
|
+
| `CLAUDE_SQL_LEIDEN_RESOLUTION_RANGE_LO` | `0.05` | Lower bound for `Optimiser.resolution_profile` bisection |
|
|
346
|
+
| `CLAUDE_SQL_LEIDEN_RESOLUTION_RANGE_HI` | `0.95` | Upper bound for the bisection |
|
|
347
|
+
| `CLAUDE_SQL_LEIDEN_N_ITERATIONS` | `-1` | Iterate to convergence; `2` is leidenalg's default |
|
|
348
|
+
| `CLAUDE_SQL_COMMUNITY_PROFILE_PARQUET_PATH` | `~/.claude/community_profile.parquet` | Resolution-profile sidecar path |
|
|
339
349
|
|
|
340
350
|
## Development
|
|
341
351
|
|
|
@@ -452,12 +462,25 @@ See `docs/adr/0015-stack-modernization.md` and
|
|
|
452
462
|
with adaptive thinking on. Pydantic v2 schemas are flattened (inline
|
|
453
463
|
`$ref`, inject `additionalProperties: false`, strip the numeric /
|
|
454
464
|
string constraints the validator rejects from Draft 2020-12).
|
|
455
|
-
- **Determinism.** UMAP, HDBSCAN, and
|
|
465
|
+
- **Determinism.** UMAP, HDBSCAN, and Leiden all seed from
|
|
456
466
|
`CLAUDE_SQL_SEED=42` (default) so cluster IDs and community IDs are
|
|
457
|
-
stable across reruns.
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
467
|
+
stable across reruns. The Leiden seed flows into both
|
|
468
|
+
`leidenalg.find_partition(seed=...)` and `Optimiser.set_rng_seed(...)`
|
|
469
|
+
for the resolution-profile bisection — same seed + same input ⇒
|
|
470
|
+
byte-equal parquets across runs.
|
|
471
|
+
- **Communities = `leidenalg` + CPM.** Reference Leiden implementation
|
|
472
|
+
(`leidenalg>=0.11.0`) over an `igraph>=1.0` mutual-kNN cosine graph
|
|
473
|
+
(k=15, edge floor 0.3) of session centroids. CPM γ has direct cosine
|
|
474
|
+
semantics (internal density ≥ γ, external ≤ γ); auto-γ via
|
|
475
|
+
`Optimiser.resolution_profile` + longest-plateau picker; warn-only
|
|
476
|
+
connectivity check. Output is signal-rich: `is_medoid`, `coherence`,
|
|
477
|
+
`gamma_used` per row, plus a `community_profile` sidecar with one row
|
|
478
|
+
per γ tested, so an LLM agent can ask "what γ would yield 50
|
|
479
|
+
communities?" without rerunning Leiden. Top terms per community come
|
|
480
|
+
from the live `community_top_topics(cid, n)` macro composed from
|
|
481
|
+
`cluster_terms`, not a frozen column. See
|
|
482
|
+
[`docs/research_notes.md`](docs/research_notes.md) for the
|
|
483
|
+
Louvain → Leiden+CPM swap rationale.
|
|
461
484
|
- **Hybrid friction pipeline.** A hand-curated regex bank catches the
|
|
462
485
|
unambiguous `status_ping` / `interruption` / `correction` cases at
|
|
463
486
|
zero Bedrock cost; the ambiguous class — especially
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "claude-sql"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.6.0"
|
|
4
4
|
description = "Zero-copy SQL + semantic search + LLM analytics over ~/.claude/ transcripts."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = { text = "Apache-2.0" }
|
|
@@ -33,8 +33,9 @@ dependencies = [
|
|
|
33
33
|
"cyclopts>=4.10.2",
|
|
34
34
|
"duckdb>=1.5.2",
|
|
35
35
|
"hdbscan>=0.8.40",
|
|
36
|
+
"igraph>=1.0.0,<2.0",
|
|
37
|
+
"leidenalg>=0.11.0,<0.12",
|
|
36
38
|
"loguru>=0.7.3",
|
|
37
|
-
"networkx>=3.4",
|
|
38
39
|
"numpy>=2.4.4",
|
|
39
40
|
"packaging>=26.2",
|
|
40
41
|
"polars>=1.40.0",
|