claude-sql 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_sql/__init__.py +5 -0
- claude_sql/binding.py +740 -0
- claude_sql/blind_handover.py +155 -0
- claude_sql/checkpointer.py +202 -0
- claude_sql/cli.py +2344 -0
- claude_sql/cluster_worker.py +208 -0
- claude_sql/community_worker.py +306 -0
- claude_sql/config.py +380 -0
- claude_sql/embed_worker.py +482 -0
- claude_sql/freeze.py +189 -0
- claude_sql/friction_worker.py +561 -0
- claude_sql/install_source.py +77 -0
- claude_sql/judge_worker.py +459 -0
- claude_sql/judges.py +239 -0
- claude_sql/kappa_worker.py +257 -0
- claude_sql/llm_worker.py +1760 -0
- claude_sql/logging_setup.py +95 -0
- claude_sql/output.py +248 -0
- claude_sql/parquet_shards.py +172 -0
- claude_sql/retry_queue.py +180 -0
- claude_sql/review_sheet_render.py +167 -0
- claude_sql/review_sheet_worker.py +463 -0
- claude_sql/schemas.py +454 -0
- claude_sql/session_text.py +387 -0
- claude_sql/skills_catalog.py +354 -0
- claude_sql/sql_views.py +1751 -0
- claude_sql/terms_worker.py +145 -0
- claude_sql/ungrounded_worker.py +190 -0
- claude_sql-0.4.0.dist-info/METADATA +530 -0
- claude_sql-0.4.0.dist-info/RECORD +32 -0
- claude_sql-0.4.0.dist-info/WHEEL +4 -0
- claude_sql-0.4.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,561 @@
|
|
|
1
|
+
"""Detect user-friction signals in short user-role messages.
|
|
2
|
+
|
|
3
|
+
A *friction signal* is anything in a user message that implies the agent's
|
|
4
|
+
last turn fell short of what the user expected:
|
|
5
|
+
|
|
6
|
+
- ``status_ping`` -- "how's it going?", "any update?", "status?"
|
|
7
|
+
- ``unmet_expectation`` -- "screenshot?", "tests?", "link?" (the agent
|
|
8
|
+
should have produced this proactively)
|
|
9
|
+
- ``confusion`` -- "what does that mean?", "why?", "I don't get it"
|
|
10
|
+
- ``interruption`` -- "wait", "stop", "actually...", "hold on"
|
|
11
|
+
- ``correction`` -- "no, not that", "that's wrong", "nope"
|
|
12
|
+
- ``frustration`` -- "ugh", "seriously?", terse annoyance
|
|
13
|
+
- ``none`` -- ordinary task instruction (majority)
|
|
14
|
+
|
|
15
|
+
Pipeline shape
|
|
16
|
+
--------------
|
|
17
|
+
1. Pre-filter to user-role messages below ``settings.friction_max_chars``
|
|
18
|
+
(default 300). Long turns are almost always genuine instructions.
|
|
19
|
+
2. Regex fast-path for strong, unambiguous patterns (status pings, obvious
|
|
20
|
+
interruption keywords). Confidence 0.9 and skips the LLM.
|
|
21
|
+
3. Everything else goes to Sonnet 4.6 via ``invoke_model`` with
|
|
22
|
+
``output_config.format`` using :data:`USER_FRICTION_SCHEMA`.
|
|
23
|
+
4. Per-session checkpoint + per-uuid anti-join so reruns are free on
|
|
24
|
+
untouched sessions.
|
|
25
|
+
|
|
26
|
+
Outputs ``user_friction.parquet`` with one row per analysed user message:
|
|
27
|
+
|
|
28
|
+
{uuid, session_id, ts, text_snippet, label, rationale, source,
|
|
29
|
+
confidence, classified_at}
|
|
30
|
+
|
|
31
|
+
``source`` is ``'regex'`` or ``'llm'`` so downstream queries can filter to
|
|
32
|
+
the high-recall LLM rows or audit the fast-path separately.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
from __future__ import annotations
|
|
36
|
+
|
|
37
|
+
import asyncio
|
|
38
|
+
import re
|
|
39
|
+
import time
|
|
40
|
+
from datetime import UTC, datetime
|
|
41
|
+
from typing import TYPE_CHECKING, Any
|
|
42
|
+
|
|
43
|
+
import anyio
|
|
44
|
+
import polars as pl
|
|
45
|
+
from loguru import logger
|
|
46
|
+
|
|
47
|
+
from claude_sql import checkpointer, retry_queue
|
|
48
|
+
from claude_sql.llm_worker import (
|
|
49
|
+
USER_FRICTION_SYSTEM_PROMPT,
|
|
50
|
+
BedrockRefusalError,
|
|
51
|
+
_build_bedrock_client,
|
|
52
|
+
_classify_one,
|
|
53
|
+
_estimate_cost,
|
|
54
|
+
)
|
|
55
|
+
from claude_sql.parquet_shards import read_all, write_part
|
|
56
|
+
from claude_sql.schemas import USER_FRICTION_SCHEMA
|
|
57
|
+
from claude_sql.session_text import session_bounds
|
|
58
|
+
|
|
59
|
+
if TYPE_CHECKING:
|
|
60
|
+
import duckdb
|
|
61
|
+
|
|
62
|
+
from claude_sql.config import Settings
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# ---------------------------------------------------------------------------
|
|
66
|
+
# Regex fast-path
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
#
|
|
69
|
+
# These patterns catch the unambiguous cases so we don't pay Bedrock for them.
|
|
70
|
+
# Everything ambiguous falls through to the LLM, which is where the
|
|
71
|
+
# "screenshot?" / "tests?" class lives (those need semantic context to
|
|
72
|
+
# distinguish from genuine topic questions like "can you write tests?").
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
_REGEX_BANK: tuple[tuple[str, re.Pattern[str]], ...] = (
|
|
76
|
+
# Status pings — only multi-word phrasings that unambiguously ask about
|
|
77
|
+
# progress. Anything shorter or even slightly ambiguous ("status?",
|
|
78
|
+
# "what's the status column called?") falls through to the LLM so it
|
|
79
|
+
# can disambiguate via context. The trailing-context guards below
|
|
80
|
+
# require a question mark, end-of-string, or a progress-related word
|
|
81
|
+
# to avoid matching "what's the status column called".
|
|
82
|
+
(
|
|
83
|
+
"status_ping",
|
|
84
|
+
re.compile(
|
|
85
|
+
r"""
|
|
86
|
+
\bhow(?:'s|\s+is|\s+are|\s+it)?\s+(?:it|we|things|progress)\s+
|
|
87
|
+
(?:going|coming|doing|looking|progressing|holding\s+up)\b
|
|
88
|
+
| \bhow'?s?\s+progress\b(?=\s*[?.!]?\s*$)
|
|
89
|
+
| \bany\s+update(?:s)?\b(?=\s*[?.!]?\s*$)
|
|
90
|
+
| \bstatus\s+update\b
|
|
91
|
+
| \bwhere\s+(?:are\s+we|we['\u2019]re)\s+(?:at|with)\b
|
|
92
|
+
| \b(?:are\s+you\s+)?still\s+(?:working|going|running|on\s+it)\b
|
|
93
|
+
| \bwhat'?s?\s+(?:the|your)\s+eta\b
|
|
94
|
+
| \bhow\s+(?:much\s+)?long\s+(?:until|till|more|left)\b
|
|
95
|
+
""",
|
|
96
|
+
re.IGNORECASE | re.VERBOSE,
|
|
97
|
+
),
|
|
98
|
+
),
|
|
99
|
+
# Hard interruption keywords at the start of a message.
|
|
100
|
+
(
|
|
101
|
+
"interruption",
|
|
102
|
+
re.compile(
|
|
103
|
+
r"""
|
|
104
|
+
^\s*
|
|
105
|
+
(?:
|
|
106
|
+
wait(?:\s*[.,!]|\s+a\s+(?:sec|second|moment|minute)|[\s$])
|
|
107
|
+
| stop(?:\s*[.,!]|\s+(?:right\s+)?there|[\s$])
|
|
108
|
+
| hold\s+on
|
|
109
|
+
| hold\s+up
|
|
110
|
+
| hang\s+on
|
|
111
|
+
| actually[,\s]
|
|
112
|
+
| before\s+you\s+(?:do|go)
|
|
113
|
+
| pause\b
|
|
114
|
+
| nvm\b
|
|
115
|
+
| never\s*mind\b
|
|
116
|
+
)
|
|
117
|
+
""",
|
|
118
|
+
re.IGNORECASE | re.VERBOSE,
|
|
119
|
+
),
|
|
120
|
+
),
|
|
121
|
+
# Explicit corrections.
|
|
122
|
+
(
|
|
123
|
+
"correction",
|
|
124
|
+
re.compile(
|
|
125
|
+
r"""
|
|
126
|
+
^\s*
|
|
127
|
+
(?:
|
|
128
|
+
no[,\s.!]
|
|
129
|
+
| nope[,\s.!]?
|
|
130
|
+
| nah[,\s.!]?
|
|
131
|
+
| that'?s\s+(?:wrong|not\s+(?:right|it|what)|incorrect)
|
|
132
|
+
| not\s+(?:that|what\s+i)
|
|
133
|
+
| try\s+again
|
|
134
|
+
| wrong\b
|
|
135
|
+
| that'?s\s+not\s+
|
|
136
|
+
)
|
|
137
|
+
""",
|
|
138
|
+
re.IGNORECASE | re.VERBOSE,
|
|
139
|
+
),
|
|
140
|
+
),
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def regex_fast_path(text: str) -> tuple[str, float] | None:
|
|
145
|
+
"""Return ``(label, confidence)`` for a regex hit or ``None``.
|
|
146
|
+
|
|
147
|
+
Confidence is a flat 0.9 for regex hits -- these are hand-picked,
|
|
148
|
+
unambiguous phrasings. Ambiguous shapes deliberately fall through to
|
|
149
|
+
the LLM so a single misclassification in the bank does not poison the
|
|
150
|
+
corpus.
|
|
151
|
+
"""
|
|
152
|
+
if not text:
|
|
153
|
+
return None
|
|
154
|
+
probe = text[:512]
|
|
155
|
+
for label, pat in _REGEX_BANK:
|
|
156
|
+
if pat.search(probe):
|
|
157
|
+
return label, 0.9
|
|
158
|
+
return None
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# ---------------------------------------------------------------------------
|
|
162
|
+
# Parquet helpers
|
|
163
|
+
# ---------------------------------------------------------------------------
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
_SCHEMA: dict[str, Any] = {
|
|
167
|
+
"uuid": pl.Utf8,
|
|
168
|
+
"session_id": pl.Utf8,
|
|
169
|
+
"ts": pl.Datetime("us", "UTC"),
|
|
170
|
+
"text_snippet": pl.Utf8,
|
|
171
|
+
"label": pl.Utf8,
|
|
172
|
+
"rationale": pl.Utf8,
|
|
173
|
+
"source": pl.Utf8, # 'regex' | 'llm' | 'refused'
|
|
174
|
+
"confidence": pl.Float32,
|
|
175
|
+
"classified_at": pl.Datetime("us", "UTC"),
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# ---------------------------------------------------------------------------
|
|
180
|
+
# Candidate SQL
|
|
181
|
+
# ---------------------------------------------------------------------------
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
#: Claude Code injects these strings as user-role messages even though
|
|
185
|
+
#: they're system-generated bookkeeping. An audit of the live
|
|
186
|
+
#: ``user_friction.parquet`` showed they accounted for 279 of 298
|
|
187
|
+
#: LLM-classified rows (~94% of friction Bedrock calls). Filter at the
|
|
188
|
+
#: SQL boundary so they never reach Sonnet.
|
|
189
|
+
_SYSTEM_MARKER_TEXTS: tuple[str, ...] = (
|
|
190
|
+
"Continue from where you left off.",
|
|
191
|
+
"[Request interrupted by user for tool use]",
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _candidate_sql(max_chars: int, since_days: int | None) -> tuple[str, list[Any]]:
|
|
196
|
+
"""SQL pulling user-role messages under the char cutoff.
|
|
197
|
+
|
|
198
|
+
Claude Code system markers (see ``_SYSTEM_MARKER_TEXTS``) are
|
|
199
|
+
excluded here because they're CLI bookkeeping, not user-typed
|
|
200
|
+
friction signals. Single-quotes inside markers are escaped per SQL
|
|
201
|
+
rules.
|
|
202
|
+
"""
|
|
203
|
+
quoted = ", ".join(f"'{m.replace(chr(39), chr(39) * 2)}'" for m in _SYSTEM_MARKER_TEXTS)
|
|
204
|
+
where = [
|
|
205
|
+
"mt.text_content IS NOT NULL",
|
|
206
|
+
"length(mt.text_content) >= 1",
|
|
207
|
+
f"length(mt.text_content) <= {int(max_chars)}",
|
|
208
|
+
"mt.role = 'user'",
|
|
209
|
+
f"trim(mt.text_content) NOT IN ({quoted})",
|
|
210
|
+
]
|
|
211
|
+
if since_days is not None:
|
|
212
|
+
where.append(f"mt.ts >= current_timestamp - INTERVAL {int(since_days)} DAY")
|
|
213
|
+
sql = f"""
|
|
214
|
+
SELECT CAST(mt.uuid AS VARCHAR) AS uuid,
|
|
215
|
+
CAST(mt.session_id AS VARCHAR) AS session_id,
|
|
216
|
+
mt.ts AS ts,
|
|
217
|
+
mt.text_content AS text_content
|
|
218
|
+
FROM messages_text mt
|
|
219
|
+
WHERE {" AND ".join(where)}
|
|
220
|
+
ORDER BY mt.ts
|
|
221
|
+
"""
|
|
222
|
+
return sql, []
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
# ---------------------------------------------------------------------------
|
|
226
|
+
# Prompt wrapper
|
|
227
|
+
# ---------------------------------------------------------------------------
|
|
228
|
+
#
|
|
229
|
+
# Friction detection is context-free per message: we hand the LLM a single
|
|
230
|
+
# user utterance and a brief framing paragraph, and let the schema's
|
|
231
|
+
# field-level descriptions do the rest of the work. No session history --
|
|
232
|
+
# that's what makes this cheap (hundreds of input tokens, not thousands).
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
_USER_PROMPT_TEMPLATE = """\
|
|
236
|
+
Classify the following SHORT USER MESSAGE from a Claude Code coding session.
|
|
237
|
+
|
|
238
|
+
You're looking for FRICTION SIGNALS — cues that the human is impatient,
|
|
239
|
+
confused, interrupting the agent, correcting it, or asking for something
|
|
240
|
+
the agent should have provided proactively but didn't.
|
|
241
|
+
|
|
242
|
+
Examples of NON-obvious friction:
|
|
243
|
+
- "screenshot?" → unmet_expectation (agent should have shared a screenshot)
|
|
244
|
+
- "tests?" → unmet_expectation (agent didn't run tests)
|
|
245
|
+
- "link?" → unmet_expectation (agent referenced a resource without linking)
|
|
246
|
+
- "why?" / "why did you do that?" → confusion
|
|
247
|
+
- "wait" / "actually..." → interruption
|
|
248
|
+
- "no not that" / "nope" → correction
|
|
249
|
+
- "are you there?" / "you alive?" → status_ping
|
|
250
|
+
|
|
251
|
+
The MAJORITY of short messages are ordinary task instructions and should
|
|
252
|
+
get label=none. Only flag a friction signal when the cue is clear.
|
|
253
|
+
|
|
254
|
+
USER MESSAGE:
|
|
255
|
+
```
|
|
256
|
+
{text}
|
|
257
|
+
```
|
|
258
|
+
"""
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
# ---------------------------------------------------------------------------
|
|
262
|
+
# Main pipeline
|
|
263
|
+
# ---------------------------------------------------------------------------
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
async def _classify_async(
|
|
267
|
+
con: duckdb.DuckDBPyConnection,
|
|
268
|
+
settings: Settings,
|
|
269
|
+
*,
|
|
270
|
+
since_days: int | None,
|
|
271
|
+
limit: int | None,
|
|
272
|
+
thinking_mode: str,
|
|
273
|
+
) -> int:
|
|
274
|
+
"""Async body behind :func:`detect_user_friction`."""
|
|
275
|
+
out_path = settings.user_friction_parquet_path
|
|
276
|
+
already: set[str] = set()
|
|
277
|
+
done_df = read_all(out_path)
|
|
278
|
+
if done_df is not None and done_df.height > 0:
|
|
279
|
+
already = set(done_df["uuid"].to_list())
|
|
280
|
+
|
|
281
|
+
# Session-level checkpoint: skip sessions unchanged since last run.
|
|
282
|
+
bounds = session_bounds(con, since_days=since_days, limit=limit)
|
|
283
|
+
unchanged_pending, skipped_sessions = checkpointer.filter_unchanged(
|
|
284
|
+
((sid, lt, mt) for sid, (lt, mt) in bounds.items()),
|
|
285
|
+
pipeline="user_friction",
|
|
286
|
+
checkpoint_db_path=settings.checkpoint_db_path,
|
|
287
|
+
)
|
|
288
|
+
active_sessions: set[str] = set(unchanged_pending)
|
|
289
|
+
|
|
290
|
+
retry_uuids = set(retry_queue.drain(settings.checkpoint_db_path, pipeline="user_friction"))
|
|
291
|
+
if retry_uuids:
|
|
292
|
+
logger.info("user_friction: draining {} retry-queue entries", len(retry_uuids))
|
|
293
|
+
already -= retry_uuids
|
|
294
|
+
|
|
295
|
+
sql, _ = _candidate_sql(settings.friction_max_chars, since_days)
|
|
296
|
+
if active_sessions:
|
|
297
|
+
sql = sql.replace(
|
|
298
|
+
" WHERE ",
|
|
299
|
+
" WHERE CAST(mt.session_id AS VARCHAR) IN (SELECT unnest(?)) AND ",
|
|
300
|
+
1,
|
|
301
|
+
)
|
|
302
|
+
if limit is not None:
|
|
303
|
+
sql += f"\nLIMIT {int(limit)}"
|
|
304
|
+
|
|
305
|
+
params = [list(active_sessions)] if active_sessions else []
|
|
306
|
+
rows_raw = con.execute(sql, params).fetchall() if active_sessions or not bounds else []
|
|
307
|
+
candidates = [(r[0], r[1], r[2], r[3]) for r in rows_raw if r[0] not in already]
|
|
308
|
+
session_for_uuid = {r[0]: r[1] for r in rows_raw if r[0] not in already}
|
|
309
|
+
if skipped_sessions:
|
|
310
|
+
logger.info("user_friction: skipped {} sessions via checkpoint", skipped_sessions)
|
|
311
|
+
logger.info("user_friction: {} candidate user messages", len(candidates))
|
|
312
|
+
|
|
313
|
+
if not candidates:
|
|
314
|
+
logger.info("user_friction: nothing pending")
|
|
315
|
+
return 0
|
|
316
|
+
|
|
317
|
+
# 1. Regex fast-path.
|
|
318
|
+
fast_rows: list[dict[str, Any]] = []
|
|
319
|
+
llm_pending: list[tuple[str, str, Any, str]] = []
|
|
320
|
+
now = datetime.now(UTC)
|
|
321
|
+
for uuid, session_id, ts, text in candidates:
|
|
322
|
+
hit = regex_fast_path(text or "")
|
|
323
|
+
if hit is not None:
|
|
324
|
+
label, conf = hit
|
|
325
|
+
fast_rows.append(
|
|
326
|
+
{
|
|
327
|
+
"uuid": uuid,
|
|
328
|
+
"session_id": session_id,
|
|
329
|
+
"ts": ts,
|
|
330
|
+
"text_snippet": (text or "")[:200],
|
|
331
|
+
"label": label,
|
|
332
|
+
"rationale": "regex match",
|
|
333
|
+
"source": "regex",
|
|
334
|
+
"confidence": conf,
|
|
335
|
+
"classified_at": now,
|
|
336
|
+
}
|
|
337
|
+
)
|
|
338
|
+
else:
|
|
339
|
+
llm_pending.append((uuid, session_id, ts, text or ""))
|
|
340
|
+
|
|
341
|
+
logger.info(
|
|
342
|
+
"user_friction: {} regex fast-path, {} pending LLM",
|
|
343
|
+
len(fast_rows),
|
|
344
|
+
len(llm_pending),
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
if fast_rows:
|
|
348
|
+
write_part(out_path, pl.DataFrame(fast_rows, schema=_SCHEMA))
|
|
349
|
+
|
|
350
|
+
processed_sessions: set[str] = {r["session_id"] for r in fast_rows}
|
|
351
|
+
|
|
352
|
+
if not llm_pending:
|
|
353
|
+
if processed_sessions:
|
|
354
|
+
checkpointer.mark_completed(
|
|
355
|
+
settings.checkpoint_db_path,
|
|
356
|
+
pipeline="user_friction",
|
|
357
|
+
rows=[(sid, *bounds.get(sid, (None, None))) for sid in processed_sessions],
|
|
358
|
+
)
|
|
359
|
+
logger.info("user_friction: wrote {} total rows (regex only)", len(fast_rows))
|
|
360
|
+
return len(fast_rows)
|
|
361
|
+
|
|
362
|
+
# 2. LLM path.
|
|
363
|
+
client = _build_bedrock_client(settings)
|
|
364
|
+
sem = anyio.CapacityLimiter(settings.llm_concurrency)
|
|
365
|
+
chunk_size = max(settings.batch_size * 4, 256)
|
|
366
|
+
written = len(fast_rows)
|
|
367
|
+
|
|
368
|
+
for i in range(0, len(llm_pending), chunk_size):
|
|
369
|
+
chunk = llm_pending[i : i + chunk_size]
|
|
370
|
+
t0 = time.monotonic()
|
|
371
|
+
prompts = [_USER_PROMPT_TEMPLATE.format(text=text) for _, _, _, text in chunk]
|
|
372
|
+
coros = [
|
|
373
|
+
_classify_one(
|
|
374
|
+
client,
|
|
375
|
+
settings.sonnet_model_id,
|
|
376
|
+
USER_FRICTION_SCHEMA,
|
|
377
|
+
prompt,
|
|
378
|
+
max_tokens=settings.classify_max_tokens,
|
|
379
|
+
thinking_mode=thinking_mode,
|
|
380
|
+
sem=sem,
|
|
381
|
+
system=USER_FRICTION_SYSTEM_PROMPT,
|
|
382
|
+
)
|
|
383
|
+
for prompt in prompts
|
|
384
|
+
]
|
|
385
|
+
results = await asyncio.gather(*coros, return_exceptions=True)
|
|
386
|
+
now = datetime.now(UTC)
|
|
387
|
+
|
|
388
|
+
ok_rows: list[dict[str, Any]] = []
|
|
389
|
+
ok_uuids: list[str] = []
|
|
390
|
+
refused_uuids: list[str] = []
|
|
391
|
+
errors = 0
|
|
392
|
+
for (uuid, session_id, ts, text), res in zip(chunk, results, strict=True):
|
|
393
|
+
if isinstance(res, BedrockRefusalError):
|
|
394
|
+
logger.info("user_friction: {} refused by Bedrock — marking none", uuid)
|
|
395
|
+
ok_rows.append(
|
|
396
|
+
{
|
|
397
|
+
"uuid": uuid,
|
|
398
|
+
"session_id": session_id,
|
|
399
|
+
"ts": ts,
|
|
400
|
+
"text_snippet": text[:200],
|
|
401
|
+
"label": "none",
|
|
402
|
+
"rationale": "refused by bedrock",
|
|
403
|
+
"source": "refused",
|
|
404
|
+
"confidence": 0.0,
|
|
405
|
+
"classified_at": now,
|
|
406
|
+
}
|
|
407
|
+
)
|
|
408
|
+
refused_uuids.append(uuid)
|
|
409
|
+
continue
|
|
410
|
+
if isinstance(res, BaseException):
|
|
411
|
+
errors += 1
|
|
412
|
+
logger.warning("user_friction: {} failed (queued for retry): {}", uuid, res)
|
|
413
|
+
retry_queue.enqueue(
|
|
414
|
+
settings.checkpoint_db_path,
|
|
415
|
+
pipeline="user_friction",
|
|
416
|
+
unit_id=uuid,
|
|
417
|
+
error=str(res),
|
|
418
|
+
)
|
|
419
|
+
continue
|
|
420
|
+
res_dict: dict[str, Any] = res
|
|
421
|
+
ok_rows.append(
|
|
422
|
+
{
|
|
423
|
+
"uuid": uuid,
|
|
424
|
+
"session_id": session_id,
|
|
425
|
+
"ts": ts,
|
|
426
|
+
"text_snippet": text[:200],
|
|
427
|
+
"label": res_dict.get("label", "none"),
|
|
428
|
+
"rationale": (res_dict.get("rationale") or "")[:200],
|
|
429
|
+
"source": "llm",
|
|
430
|
+
"confidence": float(res_dict.get("confidence", 0.0)),
|
|
431
|
+
"classified_at": now,
|
|
432
|
+
}
|
|
433
|
+
)
|
|
434
|
+
ok_uuids.append(uuid)
|
|
435
|
+
processed_sessions.add(session_id)
|
|
436
|
+
|
|
437
|
+
if ok_rows:
|
|
438
|
+
write_part(out_path, pl.DataFrame(ok_rows, schema=_SCHEMA))
|
|
439
|
+
done_uuids = ok_uuids + refused_uuids
|
|
440
|
+
if done_uuids:
|
|
441
|
+
retry_queue.mark_done(
|
|
442
|
+
settings.checkpoint_db_path,
|
|
443
|
+
pipeline="user_friction",
|
|
444
|
+
unit_ids=done_uuids,
|
|
445
|
+
)
|
|
446
|
+
chunk_sessions = {
|
|
447
|
+
session_for_uuid[u] for u in ok_uuids + refused_uuids if u in session_for_uuid
|
|
448
|
+
}
|
|
449
|
+
if chunk_sessions:
|
|
450
|
+
checkpointer.mark_completed(
|
|
451
|
+
settings.checkpoint_db_path,
|
|
452
|
+
pipeline="user_friction",
|
|
453
|
+
rows=[(sid, *bounds.get(sid, (None, None))) for sid in chunk_sessions],
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
written += len(ok_rows)
|
|
457
|
+
logger.info(
|
|
458
|
+
"user_friction chunk {}/{}: {} ok, {} errors, {:.1f}s",
|
|
459
|
+
i // chunk_size + 1,
|
|
460
|
+
(len(llm_pending) + chunk_size - 1) // chunk_size,
|
|
461
|
+
len(ok_rows),
|
|
462
|
+
errors,
|
|
463
|
+
time.monotonic() - t0,
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
if processed_sessions:
|
|
467
|
+
checkpointer.mark_completed(
|
|
468
|
+
settings.checkpoint_db_path,
|
|
469
|
+
pipeline="user_friction",
|
|
470
|
+
rows=[(sid, *bounds.get(sid, (None, None))) for sid in processed_sessions],
|
|
471
|
+
)
|
|
472
|
+
logger.info("user_friction: wrote {} total rows", written)
|
|
473
|
+
return written
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def detect_user_friction(
|
|
477
|
+
con: duckdb.DuckDBPyConnection,
|
|
478
|
+
settings: Settings,
|
|
479
|
+
*,
|
|
480
|
+
since_days: int | None = None,
|
|
481
|
+
limit: int | None = None,
|
|
482
|
+
dry_run: bool = False,
|
|
483
|
+
no_thinking: bool = False,
|
|
484
|
+
) -> int | dict[str, Any]:
|
|
485
|
+
"""Classify short user messages for friction signals.
|
|
486
|
+
|
|
487
|
+
See module docstring for category definitions and pipeline shape.
|
|
488
|
+
|
|
489
|
+
Parameters
|
|
490
|
+
----------
|
|
491
|
+
con
|
|
492
|
+
DuckDB connection with ``messages_text`` registered.
|
|
493
|
+
settings
|
|
494
|
+
:class:`Settings` driving parquet path, char cutoff, concurrency.
|
|
495
|
+
since_days
|
|
496
|
+
Restrict to messages whose ``ts`` is within the last N days. ``None``
|
|
497
|
+
means the full corpus.
|
|
498
|
+
limit
|
|
499
|
+
Optional hard cap on candidate count.
|
|
500
|
+
dry_run
|
|
501
|
+
Count candidate messages and estimate LLM cost without calling Bedrock.
|
|
502
|
+
no_thinking
|
|
503
|
+
Force ``thinking_mode='disabled'``. ``adaptive`` (default) gives
|
|
504
|
+
better labels on edge cases like bare "screenshot?" where the model
|
|
505
|
+
needs to reason about "did the agent just do something visual?".
|
|
506
|
+
|
|
507
|
+
Returns
|
|
508
|
+
-------
|
|
509
|
+
int | dict
|
|
510
|
+
Under ``dry_run=True`` a plan dict with ``{pipeline, candidates,
|
|
511
|
+
llm_calls, estimated_cost_usd, ...}``; otherwise the count of rows
|
|
512
|
+
written to the parquet.
|
|
513
|
+
"""
|
|
514
|
+
thinking_mode = "disabled" if no_thinking else settings.friction_thinking
|
|
515
|
+
|
|
516
|
+
if dry_run:
|
|
517
|
+
sql, _ = _candidate_sql(settings.friction_max_chars, since_days)
|
|
518
|
+
probe = f"SELECT count(*) FROM ({sql}) q"
|
|
519
|
+
row = con.execute(probe).fetchone()
|
|
520
|
+
n = int(row[0]) if row is not None else 0
|
|
521
|
+
if limit is not None:
|
|
522
|
+
n = min(n, int(limit))
|
|
523
|
+
# Roughly half of short user messages survive the regex fast-path,
|
|
524
|
+
# consistent with the trajectory pipeline's pre-filter survival rate.
|
|
525
|
+
# Short-message prompt is ~200 input tokens (template + message),
|
|
526
|
+
# output is ~60 tokens (label + rationale + confidence).
|
|
527
|
+
llm_n = n // 2
|
|
528
|
+
cost = _estimate_cost(llm_n, 200, 60, settings.sonnet_pricing)
|
|
529
|
+
logger.info(
|
|
530
|
+
"user_friction --dry-run: {} candidates (~{} hit LLM), estimated cost ~${:.2f}",
|
|
531
|
+
n,
|
|
532
|
+
llm_n,
|
|
533
|
+
cost,
|
|
534
|
+
)
|
|
535
|
+
return {
|
|
536
|
+
"pipeline": "friction",
|
|
537
|
+
"candidates": n,
|
|
538
|
+
"llm_calls": llm_n,
|
|
539
|
+
"avg_input_tokens": 200,
|
|
540
|
+
"avg_output_tokens": 60,
|
|
541
|
+
"estimated_cost_usd": round(cost, 4),
|
|
542
|
+
"model": settings.sonnet_model_id,
|
|
543
|
+
"thinking": thinking_mode,
|
|
544
|
+
"since_days": since_days,
|
|
545
|
+
"limit": limit,
|
|
546
|
+
"friction_max_chars": settings.friction_max_chars,
|
|
547
|
+
"dry_run": True,
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
return asyncio.run(
|
|
551
|
+
_classify_async(
|
|
552
|
+
con,
|
|
553
|
+
settings,
|
|
554
|
+
since_days=since_days,
|
|
555
|
+
limit=limit,
|
|
556
|
+
thinking_mode=thinking_mode,
|
|
557
|
+
)
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
__all__ = ["detect_user_friction", "regex_fast_path"]
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Discover where ``claude-sql`` was installed from.
|
|
2
|
+
|
|
3
|
+
The tool is not published to PyPI — users install from a local checkout via
|
|
4
|
+
``uv tool install --from . claude-sql``. ``uv`` records the source of every
|
|
5
|
+
tool install in ``$UV_TOOL_DIR/<tool>/uv-receipt.toml``. This module reads
|
|
6
|
+
that receipt so ``claude-sql --version`` (and the ``version`` subcommand) can
|
|
7
|
+
tell the user whether the binary on their ``PATH`` came from a directory
|
|
8
|
+
checkout, a git URL, or (fallback) this project's own venv.
|
|
9
|
+
|
|
10
|
+
The receipt schema is not a public contract — uv has changed it between
|
|
11
|
+
releases. Every read is wrapped in ``try/except`` so a future schema change
|
|
12
|
+
degrades to "source unknown" instead of crashing the CLI.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import os
|
|
18
|
+
import tomllib
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
from claude_sql import __version__
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _tool_dir() -> Path:
|
|
25
|
+
"""Return the uv tool-install root, respecting ``$UV_TOOL_DIR`` / XDG."""
|
|
26
|
+
if override := os.environ.get("UV_TOOL_DIR"):
|
|
27
|
+
return Path(override)
|
|
28
|
+
if xdg := os.environ.get("XDG_DATA_HOME"):
|
|
29
|
+
return Path(xdg) / "uv" / "tools"
|
|
30
|
+
return Path.home() / ".local" / "share" / "uv" / "tools"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def read_install_source(tool: str = "claude-sql") -> dict[str, str] | None:
|
|
34
|
+
"""Parse ``uv-receipt.toml`` for an installed uv tool.
|
|
35
|
+
|
|
36
|
+
Returns a dict with keys ``source_kind`` (``"directory"`` / ``"url"`` /
|
|
37
|
+
``"git"``), ``source`` (the value), and optionally ``install_path`` (the
|
|
38
|
+
resolved entrypoint). Returns ``None`` when the receipt is missing or the
|
|
39
|
+
TOML is unreadable.
|
|
40
|
+
"""
|
|
41
|
+
receipt = _tool_dir() / tool / "uv-receipt.toml"
|
|
42
|
+
try:
|
|
43
|
+
data = tomllib.loads(receipt.read_text())
|
|
44
|
+
except (OSError, tomllib.TOMLDecodeError):
|
|
45
|
+
return None
|
|
46
|
+
info: dict[str, str] = {}
|
|
47
|
+
for req in (data.get("tool") or {}).get("requirements") or []:
|
|
48
|
+
if not isinstance(req, dict) or req.get("name") != tool:
|
|
49
|
+
continue
|
|
50
|
+
for key in ("directory", "url", "git"):
|
|
51
|
+
if val := req.get(key):
|
|
52
|
+
info["source_kind"] = key
|
|
53
|
+
info["source"] = str(val)
|
|
54
|
+
break
|
|
55
|
+
break
|
|
56
|
+
for ep in (data.get("tool") or {}).get("entrypoints") or []:
|
|
57
|
+
if not isinstance(ep, dict) or ep.get("name") != tool:
|
|
58
|
+
continue
|
|
59
|
+
if path := ep.get("install-path"):
|
|
60
|
+
info["install_path"] = str(path)
|
|
61
|
+
break
|
|
62
|
+
return info or None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def format_version() -> str:
|
|
66
|
+
"""Return ``"claude-sql X.Y.Z"`` plus an install-source line when known."""
|
|
67
|
+
lines = [f"claude-sql {__version__}"]
|
|
68
|
+
src = read_install_source()
|
|
69
|
+
if src is None:
|
|
70
|
+
lines.append("installed from: project venv (not via `uv tool install`)")
|
|
71
|
+
return "\n".join(lines)
|
|
72
|
+
kind = src.get("source_kind", "source")
|
|
73
|
+
where = src.get("source", "?")
|
|
74
|
+
lines.append(f"installed from {kind}: {where}")
|
|
75
|
+
if ip := src.get("install_path"):
|
|
76
|
+
lines.append(f"entrypoint: {ip}")
|
|
77
|
+
return "\n".join(lines)
|