claude-sql 1.0.0__tar.gz → 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {claude_sql-1.0.0 → claude_sql-1.0.1}/PKG-INFO +1 -2
- {claude_sql-1.0.0 → claude_sql-1.0.1}/README.md +0 -1
- {claude_sql-1.0.0 → claude_sql-1.0.1}/pyproject.toml +1 -1
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/checkpointer.py +2 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/config.py +0 -31
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/embed_worker.py +1 -27
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/parquet_shards.py +81 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/sql_views.py +11 -88
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/trajectory_worker.py +13 -1
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/__init__.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/binding.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/blind_handover.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/classify_worker.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/cli.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/cluster_worker.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/community_worker.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/conflicts_worker.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/freeze.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/friction_worker.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/home.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/ingest.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/install_source.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/judge_worker.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/judges.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/kappa_worker.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/lance_store.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/llm_shared.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/logging_setup.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/output.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/retry_queue.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/review_sheet_render.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/review_sheet_worker.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/schemas.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/session_text.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/skills_catalog.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/terms_worker.py +0 -0
- {claude_sql-1.0.0 → claude_sql-1.0.1}/src/claude_sql/ungrounded_worker.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: claude-sql
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: Zero-copy SQL + semantic search + LLM analytics over ~/.claude/ transcripts.
|
|
5
5
|
Keywords: claude,claude-code,anthropic,duckdb,sql,semantic-search,embeddings,bedrock,transcripts,analytics,observability
|
|
6
6
|
Author: Laith Al-Saadoon
|
|
@@ -373,7 +373,6 @@ Every option is configurable via `CLAUDE_SQL_*`:
|
|
|
373
373
|
| `CLAUDE_SQL_OUTPUT_DIMENSION` | `1024` | Matryoshka embedding dimension |
|
|
374
374
|
| `CLAUDE_SQL_EMBED_CONCURRENCY` | `8` | Parallel Cohere Embed v4 calls (global CRIS) |
|
|
375
375
|
| `CLAUDE_SQL_LLM_CONCURRENCY` | `2` | Parallel Sonnet 4.6 calls (global CRIS) |
|
|
376
|
-
| `CLAUDE_SQL_CONCURRENCY` | `None` | DEPRECATED single knob — aliases onto both pipelines with a warning |
|
|
377
376
|
| `CLAUDE_SQL_BATCH_SIZE` | `96` | Cohere batch size |
|
|
378
377
|
| `CLAUDE_SQL_EMBEDDINGS_PARQUET_PATH` | `~/.claude/embeddings/` | Embeddings cache (sharded directory of `part-*.parquet`) |
|
|
379
378
|
| `CLAUDE_SQL_USER_FRICTION_PARQUET_PATH` | `~/.claude/user_friction/` | Friction cache (sharded) |
|
|
@@ -325,7 +325,6 @@ Every option is configurable via `CLAUDE_SQL_*`:
|
|
|
325
325
|
| `CLAUDE_SQL_OUTPUT_DIMENSION` | `1024` | Matryoshka embedding dimension |
|
|
326
326
|
| `CLAUDE_SQL_EMBED_CONCURRENCY` | `8` | Parallel Cohere Embed v4 calls (global CRIS) |
|
|
327
327
|
| `CLAUDE_SQL_LLM_CONCURRENCY` | `2` | Parallel Sonnet 4.6 calls (global CRIS) |
|
|
328
|
-
| `CLAUDE_SQL_CONCURRENCY` | `None` | DEPRECATED single knob — aliases onto both pipelines with a warning |
|
|
329
328
|
| `CLAUDE_SQL_BATCH_SIZE` | `96` | Cohere batch size |
|
|
330
329
|
| `CLAUDE_SQL_EMBEDDINGS_PARQUET_PATH` | `~/.claude/embeddings/` | Embeddings cache (sharded directory of `part-*.parquet`) |
|
|
331
330
|
| `CLAUDE_SQL_USER_FRICTION_PARQUET_PATH` | `~/.claude/user_friction/` | Friction cache (sharded) |
|
|
@@ -207,6 +207,8 @@ def _migrate_from_duckdb_if_present(new_path: Path) -> None:
|
|
|
207
207
|
)
|
|
208
208
|
con.execute("COMMIT")
|
|
209
209
|
except Exception:
|
|
210
|
+
# Rollback-and-reraise: any mid-bulk-INSERT failure must abort
|
|
211
|
+
# the txn cleanly and surface to the caller.
|
|
210
212
|
con.execute("ROLLBACK")
|
|
211
213
|
raise
|
|
212
214
|
finally:
|
|
@@ -8,7 +8,6 @@ Defaults are picked for a single-user devbox install pointing at
|
|
|
8
8
|
from __future__ import annotations
|
|
9
9
|
|
|
10
10
|
import os
|
|
11
|
-
import warnings
|
|
12
11
|
from pathlib import Path
|
|
13
12
|
from typing import Literal, Self
|
|
14
13
|
|
|
@@ -179,10 +178,6 @@ class Settings(BaseSettings):
|
|
|
179
178
|
#: concurrency=16 scales that linearly with negligible throttle.
|
|
180
179
|
#: Drop to 2–4 if a future model has a smaller TPM bucket.
|
|
181
180
|
llm_concurrency: int = 16
|
|
182
|
-
#: DEPRECATED: use ``embed_concurrency`` / ``llm_concurrency``. Kept for
|
|
183
|
-
#: one release as a back-compat alias — when set explicitly (env or
|
|
184
|
-
#: kwarg), it overrides both. Removed once downstream callers migrate.
|
|
185
|
-
concurrency: int | None = None
|
|
186
181
|
batch_size: int = 96
|
|
187
182
|
|
|
188
183
|
embeddings_parquet_path: Path = Field(default_factory=_default_embeddings_parquet)
|
|
@@ -381,32 +376,6 @@ class Settings(BaseSettings):
|
|
|
381
376
|
)
|
|
382
377
|
return self
|
|
383
378
|
|
|
384
|
-
@model_validator(mode="after")
|
|
385
|
-
def _resolve_concurrency_alias(self) -> Settings:
|
|
386
|
-
"""Honor the deprecated ``concurrency`` field as an alias for both pipelines.
|
|
387
|
-
|
|
388
|
-
When ``concurrency`` is set explicitly (env or kwarg) and the modern
|
|
389
|
-
per-pipeline fields are at their defaults, mirror it onto both. We
|
|
390
|
-
only override when the user clearly didn't set the new fields, so
|
|
391
|
-
``embed_concurrency=8, concurrency=4`` keeps the explicit 8.
|
|
392
|
-
"""
|
|
393
|
-
if self.concurrency is None:
|
|
394
|
-
return self
|
|
395
|
-
warnings.warn(
|
|
396
|
-
"CLAUDE_SQL_CONCURRENCY / Settings.concurrency is deprecated. "
|
|
397
|
-
"Use CLAUDE_SQL_EMBED_CONCURRENCY (default 8) and "
|
|
398
|
-
"CLAUDE_SQL_LLM_CONCURRENCY (default 2) instead. The single "
|
|
399
|
-
"knob will be removed in the next release.",
|
|
400
|
-
DeprecationWarning,
|
|
401
|
-
stacklevel=2,
|
|
402
|
-
)
|
|
403
|
-
# Only apply the alias to fields still at their default value.
|
|
404
|
-
if self.embed_concurrency == 8:
|
|
405
|
-
object.__setattr__(self, "embed_concurrency", self.concurrency)
|
|
406
|
-
if self.llm_concurrency == 16:
|
|
407
|
-
object.__setattr__(self, "llm_concurrency", self.concurrency)
|
|
408
|
-
return self
|
|
409
|
-
|
|
410
379
|
@property
|
|
411
380
|
def active_model_id(self) -> str:
|
|
412
381
|
"""Return the Bedrock embedding model ID (kept as a property for call-site stability)."""
|
|
@@ -22,9 +22,7 @@ from datetime import UTC, datetime
|
|
|
22
22
|
from pathlib import Path
|
|
23
23
|
from typing import TYPE_CHECKING, Any
|
|
24
24
|
|
|
25
|
-
import boto3
|
|
26
25
|
import polars as pl
|
|
27
|
-
from botocore.config import Config as BotoConfig
|
|
28
26
|
from botocore.exceptions import (
|
|
29
27
|
ClientError,
|
|
30
28
|
ConnectionError as BotoConnectionError,
|
|
@@ -42,6 +40,7 @@ from tenacity import (
|
|
|
42
40
|
|
|
43
41
|
from claude_sql import lance_store
|
|
44
42
|
from claude_sql.config import Settings
|
|
43
|
+
from claude_sql.llm_shared import _build_bedrock_client
|
|
45
44
|
from claude_sql.logging_setup import loguru_before_sleep
|
|
46
45
|
|
|
47
46
|
if TYPE_CHECKING:
|
|
@@ -179,31 +178,6 @@ def discover_unembedded(
|
|
|
179
178
|
return pairs
|
|
180
179
|
|
|
181
180
|
|
|
182
|
-
def _build_bedrock_client(settings: Settings) -> Any:
|
|
183
|
-
"""Construct a boto3 ``bedrock-runtime`` client from settings.
|
|
184
|
-
|
|
185
|
-
Parameters
|
|
186
|
-
----------
|
|
187
|
-
settings
|
|
188
|
-
Application settings providing the target AWS region.
|
|
189
|
-
|
|
190
|
-
Returns
|
|
191
|
-
-------
|
|
192
|
-
botocore client
|
|
193
|
-
A low-level ``bedrock-runtime`` client.
|
|
194
|
-
"""
|
|
195
|
-
# Disable botocore's internal retry layer so tenacity sees throttling
|
|
196
|
-
# immediately — otherwise botocore silently absorbs 4 retries and our
|
|
197
|
-
# retry policy never kicks in. Also bump read_timeout for large batches.
|
|
198
|
-
boto_cfg = BotoConfig(
|
|
199
|
-
region_name=settings.region,
|
|
200
|
-
retries={"max_attempts": 0, "mode": "standard"},
|
|
201
|
-
read_timeout=60,
|
|
202
|
-
connect_timeout=10,
|
|
203
|
-
)
|
|
204
|
-
return boto3.client("bedrock-runtime", config=boto_cfg)
|
|
205
|
-
|
|
206
|
-
|
|
207
181
|
@retry(
|
|
208
182
|
# Cohere Embed v4 on Bedrock has a strict TPM bucket that replenishes over
|
|
209
183
|
# tens of seconds; wait up to 60s between attempts and try up to 10 times
|
|
@@ -40,10 +40,12 @@ Public API
|
|
|
40
40
|
from __future__ import annotations
|
|
41
41
|
|
|
42
42
|
import time
|
|
43
|
+
from collections.abc import Iterable
|
|
43
44
|
from pathlib import Path
|
|
44
45
|
from typing import Any
|
|
45
46
|
|
|
46
47
|
import polars as pl
|
|
48
|
+
from loguru import logger
|
|
47
49
|
|
|
48
50
|
#: Glob pattern for shard part files within a sharded cache directory.
|
|
49
51
|
PART_GLOB: str = "part-*.parquet"
|
|
@@ -162,11 +164,90 @@ def count_rows(target: Path) -> int:
|
|
|
162
164
|
return total
|
|
163
165
|
|
|
164
166
|
|
|
167
|
+
def replace_sessions(
|
|
168
|
+
target: Path,
|
|
169
|
+
*,
|
|
170
|
+
key_column: str,
|
|
171
|
+
session_ids: Iterable[str],
|
|
172
|
+
) -> int:
|
|
173
|
+
"""Drop rows whose ``key_column`` is in ``session_ids`` across every shard.
|
|
174
|
+
|
|
175
|
+
Context
|
|
176
|
+
-------
|
|
177
|
+
Workers like :mod:`claude_sql.trajectory_worker` gate computation on a
|
|
178
|
+
``(session_id, latest_ts, message_count)`` checkpoint that advances when
|
|
179
|
+
a session grows. When the checkpoint admits a session for re-scoring,
|
|
180
|
+
the pipeline rewrites rows it may have already produced under an earlier
|
|
181
|
+
shard — without this helper those prior rows accumulate and every
|
|
182
|
+
``(session_id, prev_uuid, curr_uuid)`` pair duplicates on rerun.
|
|
183
|
+
|
|
184
|
+
Behavior
|
|
185
|
+
--------
|
|
186
|
+
* Shards containing *some* rows for ``session_ids`` are rewritten in place
|
|
187
|
+
with those rows filtered out; other sessions' rows are preserved.
|
|
188
|
+
* Shards that become empty are unlinked — leaving empty part files causes
|
|
189
|
+
DuckDB's ``read_parquet`` glob to bind a zero-row parquet and surfaces
|
|
190
|
+
no other harm, but the file is unreachable as data so we remove it.
|
|
191
|
+
* A shard with no matching rows is left untouched (cheap footer read).
|
|
192
|
+
* The legacy single-file branch mirrors the same shape: filter → rewrite
|
|
193
|
+
(or unlink when the filter empties the file).
|
|
194
|
+
|
|
195
|
+
Returns the total number of rows removed across the cache. Returns 0 on
|
|
196
|
+
an empty cache, a missing file, or an empty ``session_ids``.
|
|
197
|
+
"""
|
|
198
|
+
ids = set(session_ids)
|
|
199
|
+
if not ids:
|
|
200
|
+
return 0
|
|
201
|
+
parts = iter_part_files(target)
|
|
202
|
+
if not parts:
|
|
203
|
+
return 0
|
|
204
|
+
removed_total = 0
|
|
205
|
+
for part in parts:
|
|
206
|
+
try:
|
|
207
|
+
df = pl.read_parquet(part)
|
|
208
|
+
except (OSError, pl.exceptions.ComputeError) as exc:
|
|
209
|
+
# A truncated or unreadable shard is worth flagging — don't
|
|
210
|
+
# silently let it block the replace. Leave it on disk; the
|
|
211
|
+
# caller's next write still lands, and the analytics view will
|
|
212
|
+
# surface the unreadable file the next time it binds.
|
|
213
|
+
logger.warning("replace_sessions: unreadable shard {} ({}); skipping", part, exc)
|
|
214
|
+
continue
|
|
215
|
+
if key_column not in df.columns or df.height == 0:
|
|
216
|
+
continue
|
|
217
|
+
mask = df[key_column].is_in(list(ids))
|
|
218
|
+
hit_count = int(mask.sum())
|
|
219
|
+
if hit_count == 0:
|
|
220
|
+
continue
|
|
221
|
+
removed_total += hit_count
|
|
222
|
+
kept = df.filter(~mask)
|
|
223
|
+
if kept.height == 0:
|
|
224
|
+
# Nothing left in this shard — remove it so the cache doesn't
|
|
225
|
+
# accumulate empty part files across reruns.
|
|
226
|
+
try:
|
|
227
|
+
part.unlink()
|
|
228
|
+
except OSError as exc:
|
|
229
|
+
logger.warning("replace_sessions: failed to unlink empty shard {}: {}", part, exc)
|
|
230
|
+
continue
|
|
231
|
+
# Rewrite in place. The legacy single-file branch lands here too
|
|
232
|
+
# (``iter_part_files`` returns ``[target]`` in that case), which is
|
|
233
|
+
# correct — we want to overwrite the same file.
|
|
234
|
+
kept.write_parquet(part)
|
|
235
|
+
if removed_total:
|
|
236
|
+
logger.info(
|
|
237
|
+
"replace_sessions: dropped {} row(s) for {} session(s) under {}",
|
|
238
|
+
removed_total,
|
|
239
|
+
len(ids),
|
|
240
|
+
target,
|
|
241
|
+
)
|
|
242
|
+
return removed_total
|
|
243
|
+
|
|
244
|
+
|
|
165
245
|
__all__ = [
|
|
166
246
|
"PART_GLOB",
|
|
167
247
|
"count_rows",
|
|
168
248
|
"is_sharded_dir",
|
|
169
249
|
"iter_part_files",
|
|
170
250
|
"read_all",
|
|
251
|
+
"replace_sessions",
|
|
171
252
|
"write_part",
|
|
172
253
|
]
|
|
@@ -57,8 +57,8 @@ SUBAGENT_META_GLOB: str = os.path.expanduser("~/.claude/projects/*/*/subagents/a
|
|
|
57
57
|
|
|
58
58
|
# Business-level views emitted by ``register_views``. Used by the
|
|
59
59
|
# ``claude-sql schema`` subcommand for schema dumps. Includes the v2
|
|
60
|
-
# analytics view names at the tail
|
|
61
|
-
#
|
|
60
|
+
# analytics view names at the tail; the schema dump materializes only
|
|
61
|
+
# rows where :func:`register_analytics` has populated the matching parquets.
|
|
62
62
|
VIEW_NAMES: tuple[str, ...] = (
|
|
63
63
|
"sessions",
|
|
64
64
|
"messages",
|
|
@@ -73,7 +73,6 @@ VIEW_NAMES: tuple[str, ...] = (
|
|
|
73
73
|
"task_creations",
|
|
74
74
|
"task_updates",
|
|
75
75
|
"tasks_state_current",
|
|
76
|
-
"task_spawns",
|
|
77
76
|
"skill_invocations",
|
|
78
77
|
"subagent_sessions",
|
|
79
78
|
"subagent_messages",
|
|
@@ -109,11 +108,11 @@ VIEW_NAMES: tuple[str, ...] = (
|
|
|
109
108
|
# they're correctly omitted because the source of truth for those views
|
|
110
109
|
# is the parquet, not this dict.
|
|
111
110
|
#
|
|
112
|
-
# Drift is caught by :func:`tests.test_sql_views.
|
|
113
|
-
# which registers the v1 views over the fixture corpus, runs
|
|
114
|
-
#
|
|
115
|
-
#
|
|
116
|
-
#
|
|
111
|
+
# Drift is caught by :func:`tests.test_sql_views.test_view_schema_matches_describe_inline`,
|
|
112
|
+
# which registers the v1 views over the fixture corpus, runs ``DESCRIBE``
|
|
113
|
+
# inline per view, and asserts column-level equality with this dict. A
|
|
114
|
+
# contributor who edits view DDL without updating ``VIEW_SCHEMA`` gets a
|
|
115
|
+
# hard CI failure rather than a runtime mystery.
|
|
117
116
|
VIEW_SCHEMA: dict[str, tuple[tuple[str, str], ...]] = {
|
|
118
117
|
"sessions": (
|
|
119
118
|
("session_id", "VARCHAR"),
|
|
@@ -251,16 +250,6 @@ VIEW_SCHEMA: dict[str, tuple[tuple[str, str], ...]] = {
|
|
|
251
250
|
("created_at", "TIMESTAMP"),
|
|
252
251
|
("last_updated_at", "TIMESTAMP"),
|
|
253
252
|
),
|
|
254
|
-
"task_spawns": (
|
|
255
|
-
("session_id", "VARCHAR"),
|
|
256
|
-
("spawned_at", "TIMESTAMP"),
|
|
257
|
-
("message_uuid", "VARCHAR"),
|
|
258
|
-
("tool_use_id", "VARCHAR"),
|
|
259
|
-
("spawn_tool", "VARCHAR"),
|
|
260
|
-
("subagent_type", "VARCHAR"),
|
|
261
|
-
("description", "VARCHAR"),
|
|
262
|
-
("prompt", "VARCHAR"),
|
|
263
|
-
),
|
|
264
253
|
"skill_invocations": (
|
|
265
254
|
("session_id", "VARCHAR"),
|
|
266
255
|
("ts", "TIMESTAMP"),
|
|
@@ -421,7 +410,7 @@ def _sql_str(value: str) -> str:
|
|
|
421
410
|
# Drift discipline: when a downstream view in :func:`register_views` adds a
|
|
422
411
|
# new top-level field reference, add it here too — otherwise the view will
|
|
423
412
|
# silently return NULL for that column. The
|
|
424
|
-
# ``
|
|
413
|
+
# ``test_view_schema_matches_describe_inline`` drift test catches the column
|
|
425
414
|
# disappearing from any of the 18 v1 views.
|
|
426
415
|
_MESSAGE_STRUCT_TYPE: str = (
|
|
427
416
|
"STRUCT("
|
|
@@ -610,6 +599,7 @@ def register_raw(
|
|
|
610
599
|
)
|
|
611
600
|
logger.debug("Registered v_raw_subagent_meta from glob {}", subagent_meta_glob)
|
|
612
601
|
except Exception:
|
|
602
|
+
# register-or-fail-loud — any DuckDB error must surface to the caller.
|
|
613
603
|
logger.exception("Failed to register raw views")
|
|
614
604
|
raise
|
|
615
605
|
|
|
@@ -626,8 +616,7 @@ def register_views(con: duckdb.DuckDBPyConnection) -> None:
|
|
|
626
616
|
``sessions``, ``messages``, ``content_blocks``, ``messages_text``,
|
|
627
617
|
``tool_calls``, ``tool_results``, ``todo_events``, ``todo_state_current``,
|
|
628
618
|
``subagent_spawns``, ``task_creations``, ``task_updates``,
|
|
629
|
-
``tasks_state_current``, ``
|
|
630
|
-
``subagent_sessions``, ``subagent_messages``.
|
|
619
|
+
``tasks_state_current``, ``subagent_sessions``, ``subagent_messages``.
|
|
631
620
|
|
|
632
621
|
The split between ``subagent_spawns`` and ``task_creations`` reflects
|
|
633
622
|
the Claude Code v2.1.63 ``Task``→``Agent`` rename and the v2.1.16
|
|
@@ -973,30 +962,6 @@ def register_views(con: duckdb.DuckDBPyConnection) -> None:
|
|
|
973
962
|
)
|
|
974
963
|
logger.debug("Registered view: tasks_state_current")
|
|
975
964
|
|
|
976
|
-
# DEPRECATED: ``task_spawns`` predates the Task→Agent rename (v2.1.63)
|
|
977
|
-
# and the TodoWrite→TaskCreate split (v2.1.16). It conflated subagent
|
|
978
|
-
# launchers with task-tracker creation. Kept as a UNION ALL alias for
|
|
979
|
-
# one release; new analytics should use ``subagent_spawns`` or
|
|
980
|
-
# ``task_creations`` directly. Removed in the next minor release.
|
|
981
|
-
con.execute(
|
|
982
|
-
"""
|
|
983
|
-
CREATE OR REPLACE VIEW task_spawns AS
|
|
984
|
-
SELECT
|
|
985
|
-
session_id, spawned_at, message_uuid, tool_use_id,
|
|
986
|
-
spawn_tool, subagent_type, description, prompt
|
|
987
|
-
FROM subagent_spawns
|
|
988
|
-
UNION ALL
|
|
989
|
-
SELECT
|
|
990
|
-
session_id, created_at AS spawned_at, message_uuid, tool_use_id,
|
|
991
|
-
create_tool AS spawn_tool,
|
|
992
|
-
NULL AS subagent_type,
|
|
993
|
-
description,
|
|
994
|
-
NULL AS prompt
|
|
995
|
-
FROM task_creations;
|
|
996
|
-
"""
|
|
997
|
-
)
|
|
998
|
-
logger.debug("Registered view: task_spawns (deprecated)")
|
|
999
|
-
|
|
1000
965
|
# Every Skill / slash-command invocation observable in the transcripts,
|
|
1001
966
|
# unioned across the two shapes they take:
|
|
1002
967
|
#
|
|
@@ -1108,6 +1073,7 @@ def register_views(con: duckdb.DuckDBPyConnection) -> None:
|
|
|
1108
1073
|
)
|
|
1109
1074
|
logger.debug("Registered view: subagent_messages")
|
|
1110
1075
|
except Exception:
|
|
1076
|
+
# register-or-fail-loud — any DuckDB error must surface to the caller.
|
|
1111
1077
|
logger.exception("Failed to register derived views")
|
|
1112
1078
|
raise
|
|
1113
1079
|
|
|
@@ -2148,49 +2114,6 @@ def register_all(
|
|
|
2148
2114
|
# ---------------------------------------------------------------------------
|
|
2149
2115
|
|
|
2150
2116
|
|
|
2151
|
-
def describe_all(con: duckdb.DuckDBPyConnection) -> dict[str, list[tuple[str, str]]]:
|
|
2152
|
-
"""Return the column schema of every business-level view.
|
|
2153
|
-
|
|
2154
|
-
.. deprecated::
|
|
2155
|
-
Use :data:`VIEW_SCHEMA` for static introspection. ``describe_all``
|
|
2156
|
-
opens a DuckDB connection and runs ``DESCRIBE`` per view, which on
|
|
2157
|
-
the live corpus takes ~14 s -- prohibitive for the agent-facing
|
|
2158
|
-
``schema`` command. Kept for one release as a fallback and as the
|
|
2159
|
-
ground truth used by the ``test_view_schema_matches_describe_all``
|
|
2160
|
-
drift test; it will be removed once that test moves to a different
|
|
2161
|
-
introspection path.
|
|
2162
|
-
|
|
2163
|
-
Parameters
|
|
2164
|
-
----------
|
|
2165
|
-
con
|
|
2166
|
-
Open DuckDB connection with views registered.
|
|
2167
|
-
|
|
2168
|
-
Returns
|
|
2169
|
-
-------
|
|
2170
|
-
dict
|
|
2171
|
-
``{view_name: [(column_name, column_type), ...]}``. Views that fail to
|
|
2172
|
-
describe (e.g. missing because ``register_views`` was not called) map
|
|
2173
|
-
to an empty list and emit a warning.
|
|
2174
|
-
"""
|
|
2175
|
-
import warnings
|
|
2176
|
-
|
|
2177
|
-
warnings.warn(
|
|
2178
|
-
"describe_all is deprecated; use VIEW_SCHEMA for static "
|
|
2179
|
-
"introspection. Will be removed in a future release.",
|
|
2180
|
-
DeprecationWarning,
|
|
2181
|
-
stacklevel=2,
|
|
2182
|
-
)
|
|
2183
|
-
out: dict[str, list[tuple[str, str]]] = {}
|
|
2184
|
-
for name in VIEW_NAMES:
|
|
2185
|
-
try:
|
|
2186
|
-
rows = con.execute(f"DESCRIBE {name}").fetchall()
|
|
2187
|
-
out[name] = [(str(r[0]), str(r[1])) for r in rows]
|
|
2188
|
-
except duckdb.Error as exc:
|
|
2189
|
-
logger.warning("Could not describe {}: {}", name, exc)
|
|
2190
|
-
out[name] = []
|
|
2191
|
-
return out
|
|
2192
|
-
|
|
2193
|
-
|
|
2194
2117
|
def list_macros(con: duckdb.DuckDBPyConnection) -> list[tuple[str, tuple[str, ...]]]:
|
|
2195
2118
|
"""Return ``(name, params)`` for every registered macro.
|
|
2196
2119
|
|
|
@@ -51,7 +51,7 @@ from claude_sql.llm_shared import (
|
|
|
51
51
|
classify_one,
|
|
52
52
|
pipeline_cache_stats,
|
|
53
53
|
)
|
|
54
|
-
from claude_sql.parquet_shards import iter_part_files, write_part
|
|
54
|
+
from claude_sql.parquet_shards import iter_part_files, replace_sessions, write_part
|
|
55
55
|
from claude_sql.schemas import TRAJECTORY_ARRAY_SCHEMA
|
|
56
56
|
from claude_sql.session_text import session_bounds
|
|
57
57
|
|
|
@@ -886,8 +886,20 @@ async def _trajectory_async(
|
|
|
886
886
|
# don't collide on filenames — but we still keep the lock so the
|
|
887
887
|
# in-memory ``written_box`` / ``processed_sessions`` set updates
|
|
888
888
|
# in lockstep with the on-disk write.
|
|
889
|
+
#
|
|
890
|
+
# replace_sessions drops any prior rows for ``sid`` still sitting
|
|
891
|
+
# in the cache from earlier runs. The checkpointer gates
|
|
892
|
+
# computation on advancing (latest_ts, message_count) bounds but
|
|
893
|
+
# does NOT touch the parquet cache; without this step a growing
|
|
894
|
+
# active session duplicates its (prev_uuid, curr_uuid) pairs
|
|
895
|
+
# on every rerun. See GH #45.
|
|
889
896
|
df = pl.DataFrame(all_rows, schema=_PARQUET_SCHEMA)
|
|
890
897
|
async with write_lock:
|
|
898
|
+
replace_sessions(
|
|
899
|
+
settings.trajectory_parquet_path,
|
|
900
|
+
key_column="session_id",
|
|
901
|
+
session_ids=[sid],
|
|
902
|
+
)
|
|
891
903
|
write_part(settings.trajectory_parquet_path, df)
|
|
892
904
|
written_box[0] += len(all_rows)
|
|
893
905
|
processed_sessions.add(sid)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|