cctally 1.27.0 → 1.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/bin/_cctally_alerts.py +26 -1
- package/bin/_cctally_cache.py +278 -6
- package/bin/_cctally_config.py +153 -11
- package/bin/_cctally_core.py +230 -41
- package/bin/_cctally_dashboard.py +399 -37
- package/bin/_cctally_db.py +594 -163
- package/bin/_cctally_doctor.py +11 -0
- package/bin/_cctally_forecast.py +700 -57
- package/bin/_cctally_milestones.py +273 -28
- package/bin/_cctally_parser.py +44 -4
- package/bin/_cctally_record.py +328 -50
- package/bin/_cctally_setup.py +7 -3
- package/bin/_cctally_statusline.py +8 -0
- package/bin/_cctally_update.py +3 -3
- package/bin/_cctally_weekrefs.py +30 -6
- package/bin/_lib_alert_axes.py +8 -1
- package/bin/_lib_alerts_payload.py +95 -3
- package/bin/_lib_budget.py +48 -0
- package/bin/_lib_conversation.py +162 -0
- package/bin/_lib_conversation_query.py +524 -0
- package/bin/_lib_doctor.py +60 -1
- package/bin/_lib_transcript_access.py +80 -0
- package/bin/cctally +40 -1
- package/dashboard/static/assets/{index-D34qf0LE.css → index-Bj5ckRUE.css} +1 -1
- package/dashboard/static/assets/index-Dw4G5FD9.js +18 -0
- package/dashboard/static/dashboard.html +2 -2
- package/package.json +4 -1
- package/dashboard/static/assets/index-C2F1_Mxt.js +0 -18
|
@@ -0,0 +1,524 @@
|
|
|
1
|
+
"""Pure query kernel for the conversation viewer endpoints (Plan 2, spec §3).
|
|
2
|
+
|
|
3
|
+
Takes a sqlite3.Connection over a cache.db that already holds Plan 1's
|
|
4
|
+
conversation_messages (+ FTS) and session_entries. No clock, no network, no
|
|
5
|
+
global mutation — unit-tested against an in-memory cache.db seeded by
|
|
6
|
+
_apply_cache_schema. Three entry points back the three GET routes:
|
|
7
|
+
list_conversations (rail), get_conversation (reader), search_conversations.
|
|
8
|
+
|
|
9
|
+
Cost is joined ONCE per logical assistant turn (msg_id, req_id) to the single
|
|
10
|
+
deduped session_entries row (idx_entries_dedup), via the shared pricing helper
|
|
11
|
+
— never per physical fragment and never from cost_usd_raw (often NULL).
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
import json as _json
|
|
15
|
+
import os
|
|
16
|
+
import sqlite3
|
|
17
|
+
|
|
18
|
+
# Public surface (Plan 2): shipped in the npm tarball + brew formula + public
|
|
19
|
+
# mirror — imported by the dashboard's conversation endpoints at runtime.
|
|
20
|
+
|
|
21
|
+
from _lib_pricing import _calculate_entry_cost
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _project_label(cwd) -> str:
|
|
25
|
+
"""Basename of the project cwd (dashboard label posture — no reveal). Falls
|
|
26
|
+
back to the raw path for root-ish cwds, '' when absent."""
|
|
27
|
+
if not cwd:
|
|
28
|
+
return ""
|
|
29
|
+
return os.path.basename(cwd.rstrip("/")) or cwd
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _entry_cost(model, inp, out, cc, cr, cost_usd_raw) -> float:
|
|
33
|
+
"""Cost for one session_entries row via the shared pricing helper. Tokens →
|
|
34
|
+
the helper's usage dict. cost_usd_raw is passed as the optional override the
|
|
35
|
+
helper already understands (it is often NULL — never the primary source)."""
|
|
36
|
+
usage = {
|
|
37
|
+
"input_tokens": inp or 0,
|
|
38
|
+
"output_tokens": out or 0,
|
|
39
|
+
"cache_creation_input_tokens": cc or 0,
|
|
40
|
+
"cache_read_input_tokens": cr or 0,
|
|
41
|
+
}
|
|
42
|
+
return _calculate_entry_cost(model or "", usage, cost_usd=cost_usd_raw)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _session_cost_map(conn, session_ids):
|
|
46
|
+
"""{session_id: total_cost_usd} for the given sessions. Joins
|
|
47
|
+
conversation_messages turn keys to the single deduped session_entries row
|
|
48
|
+
per (msg_id, req_id), so a turn replayed across files contributes once.
|
|
49
|
+
(msg_id, req_id) is globally unique in session_entries and maps to exactly
|
|
50
|
+
one session_id, so per-session sums are clean."""
|
|
51
|
+
costs = {sid: 0.0 for sid in session_ids}
|
|
52
|
+
if not session_ids:
|
|
53
|
+
return costs
|
|
54
|
+
placeholders = ",".join("?" for _ in session_ids)
|
|
55
|
+
sql = (
|
|
56
|
+
"SELECT cm.session_id, se.model, se.input_tokens, se.output_tokens, "
|
|
57
|
+
" se.cache_create_tokens, se.cache_read_tokens, se.cost_usd_raw "
|
|
58
|
+
"FROM (SELECT DISTINCT session_id, msg_id, req_id "
|
|
59
|
+
" FROM conversation_messages "
|
|
60
|
+
" WHERE session_id IN (%s) AND msg_id IS NOT NULL AND req_id IS NOT NULL) cm "
|
|
61
|
+
"JOIN session_entries se ON se.msg_id = cm.msg_id AND se.req_id = cm.req_id"
|
|
62
|
+
% placeholders
|
|
63
|
+
)
|
|
64
|
+
for sid, model, inp, out, cc, cr, raw in conn.execute(sql, list(session_ids)):
|
|
65
|
+
costs[sid] = costs.get(sid, 0.0) + _entry_cost(model, inp, out, cc, cr, raw)
|
|
66
|
+
return costs
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _session_models_map(conn, session_ids):
|
|
70
|
+
"""{session_id: sorted distinct non-null models}."""
|
|
71
|
+
out = {sid: [] for sid in session_ids}
|
|
72
|
+
if not session_ids:
|
|
73
|
+
return out
|
|
74
|
+
placeholders = ",".join("?" for _ in session_ids)
|
|
75
|
+
sql = (
|
|
76
|
+
"SELECT DISTINCT session_id, model FROM conversation_messages "
|
|
77
|
+
"WHERE session_id IN (%s) AND model IS NOT NULL AND model != '' "
|
|
78
|
+
"ORDER BY model" % placeholders
|
|
79
|
+
)
|
|
80
|
+
for sid, model in conn.execute(sql, list(session_ids)):
|
|
81
|
+
out.setdefault(sid, []).append(model)
|
|
82
|
+
return out
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _session_latest_meta_map(conn, session_ids):
|
|
86
|
+
"""{session_id: (cwd, git_branch)} using the most-recent NON-NULL value per
|
|
87
|
+
column — the SAME posture as get_conversation's _latest, so the rail and the
|
|
88
|
+
reader agree on a session whose cwd/branch changed over its lifetime (a plain
|
|
89
|
+
MAX() picks the lexical max, not the latest). Bounded to the page's sessions
|
|
90
|
+
via per-session correlated lookups over idx (session_id, timestamp_utc, id),
|
|
91
|
+
mirroring _session_cost_map / _session_models_map."""
|
|
92
|
+
meta = {sid: (None, None) for sid in session_ids}
|
|
93
|
+
if not session_ids:
|
|
94
|
+
return meta
|
|
95
|
+
placeholders = ",".join("?" for _ in session_ids)
|
|
96
|
+
sql = (
|
|
97
|
+
"SELECT s.session_id, "
|
|
98
|
+
" (SELECT c.cwd FROM conversation_messages c "
|
|
99
|
+
" WHERE c.session_id = s.session_id AND c.cwd IS NOT NULL "
|
|
100
|
+
" ORDER BY c.timestamp_utc DESC, c.id DESC LIMIT 1), "
|
|
101
|
+
" (SELECT b.git_branch FROM conversation_messages b "
|
|
102
|
+
" WHERE b.session_id = s.session_id AND b.git_branch IS NOT NULL "
|
|
103
|
+
" ORDER BY b.timestamp_utc DESC, b.id DESC LIMIT 1) "
|
|
104
|
+
"FROM (SELECT DISTINCT session_id FROM conversation_messages "
|
|
105
|
+
" WHERE session_id IN (%s)) s" % placeholders
|
|
106
|
+
)
|
|
107
|
+
for sid, cwd, branch in conn.execute(sql, list(session_ids)):
|
|
108
|
+
meta[sid] = (cwd, branch)
|
|
109
|
+
return meta
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
_SORTS = {
|
|
113
|
+
"recent": "MAX(timestamp_utc) DESC, session_id DESC",
|
|
114
|
+
"oldest": "MIN(timestamp_utc) ASC, session_id ASC",
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def list_conversations(conn, *, sort="recent", limit=50, offset=0) -> dict:
|
|
119
|
+
"""All-history per-session browse rows (spec §3.1). NOT 365-day bounded."""
|
|
120
|
+
order = _SORTS.get(sort, _SORTS["recent"])
|
|
121
|
+
limit = max(1, min(int(limit), 200))
|
|
122
|
+
offset = max(0, int(offset))
|
|
123
|
+
rows = conn.execute(
|
|
124
|
+
"SELECT session_id, COUNT(*) AS msg_count, "
|
|
125
|
+
" MIN(timestamp_utc) AS started, MAX(timestamp_utc) AS last_activity "
|
|
126
|
+
"FROM conversation_messages "
|
|
127
|
+
"WHERE session_id IS NOT NULL "
|
|
128
|
+
"GROUP BY session_id "
|
|
129
|
+
"ORDER BY " + order + " LIMIT ? OFFSET ?",
|
|
130
|
+
(limit + 1, offset),
|
|
131
|
+
).fetchall()
|
|
132
|
+
has_more = len(rows) > limit
|
|
133
|
+
rows = rows[:limit]
|
|
134
|
+
session_ids = [r[0] for r in rows]
|
|
135
|
+
costs = _session_cost_map(conn, session_ids)
|
|
136
|
+
models = _session_models_map(conn, session_ids)
|
|
137
|
+
# cwd/git_branch as the latest non-null (reader posture), NOT a lexical MAX().
|
|
138
|
+
meta = _session_latest_meta_map(conn, session_ids)
|
|
139
|
+
conversations = [
|
|
140
|
+
{
|
|
141
|
+
"session_id": sid,
|
|
142
|
+
"project_label": _project_label(meta.get(sid, (None, None))[0]),
|
|
143
|
+
"git_branch": meta.get(sid, (None, None))[1],
|
|
144
|
+
"started_utc": started,
|
|
145
|
+
"last_activity_utc": last_activity,
|
|
146
|
+
"msg_count": msg_count,
|
|
147
|
+
"cost_usd": round(costs.get(sid, 0.0), 6),
|
|
148
|
+
"models": models.get(sid, []),
|
|
149
|
+
}
|
|
150
|
+
for (sid, msg_count, started, last_activity) in rows
|
|
151
|
+
]
|
|
152
|
+
return {
|
|
153
|
+
"conversations": conversations,
|
|
154
|
+
"page": {
|
|
155
|
+
"next_offset": offset + len(conversations) if has_more else None,
|
|
156
|
+
"has_more": has_more,
|
|
157
|
+
},
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _turn_cost_map(conn, turn_keys):
|
|
162
|
+
"""{(msg_id, req_id): cost_usd} for the given non-null turn keys, joined ONCE
|
|
163
|
+
to the deduped session_entries row. Keys absent from session_entries (e.g.
|
|
164
|
+
<synthetic> walker-skipped rows) are simply not present → cost 0 by omission."""
|
|
165
|
+
costs = {}
|
|
166
|
+
keys = [(m, r) for (m, r) in turn_keys if m is not None and r is not None]
|
|
167
|
+
if not keys:
|
|
168
|
+
return costs
|
|
169
|
+
# Chunk the OR-of-pairs to stay well under SQLite's variable limit.
|
|
170
|
+
for i in range(0, len(keys), 400):
|
|
171
|
+
chunk = keys[i:i + 400]
|
|
172
|
+
cond = " OR ".join("(msg_id=? AND req_id=?)" for _ in chunk)
|
|
173
|
+
params = [v for pair in chunk for v in pair]
|
|
174
|
+
sql = ("SELECT msg_id, req_id, model, input_tokens, output_tokens, "
|
|
175
|
+
"cache_create_tokens, cache_read_tokens, cost_usd_raw "
|
|
176
|
+
"FROM session_entries WHERE " + cond)
|
|
177
|
+
for m, r, model, inp, out, cc, cr, raw in conn.execute(sql, params):
|
|
178
|
+
costs[(m, r)] = _entry_cost(model, inp, out, cc, cr, raw)
|
|
179
|
+
return costs
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def get_conversation(conn, session_id, *, after=None, limit=500):
|
|
183
|
+
"""Reader payload for one session (spec §3.2). Returns None for an unknown
|
|
184
|
+
session. Dedups logical messages by (session_id, uuid) (canonical = earliest
|
|
185
|
+
timestamp), groups assistant fragments into turn items by (msg_id, req_id),
|
|
186
|
+
joins cost once, anchors a turn on its prose-bearing fragment, and exposes
|
|
187
|
+
every member fragment uuid for jump resolution. Cursor over (timestamp_utc,
|
|
188
|
+
id); ~500 items/page."""
|
|
189
|
+
limit = max(1, min(int(limit), 1000))
|
|
190
|
+
exists = conn.execute(
|
|
191
|
+
"SELECT 1 FROM conversation_messages WHERE session_id=? LIMIT 1",
|
|
192
|
+
(session_id,)).fetchone()
|
|
193
|
+
if exists is None:
|
|
194
|
+
return None
|
|
195
|
+
|
|
196
|
+
# Pull the session ordered; dedup logical messages by (session_id, uuid),
|
|
197
|
+
# canonical row = earliest (timestamp_utc, id). Replays carry the original
|
|
198
|
+
# uuid, so the first occurrence in ascending order is canonical.
|
|
199
|
+
raw = conn.execute(
|
|
200
|
+
"SELECT id, uuid, timestamp_utc, entry_type, text, blocks_json, model, "
|
|
201
|
+
" msg_id, req_id, is_sidechain, cwd, git_branch "
|
|
202
|
+
"FROM conversation_messages WHERE session_id=? "
|
|
203
|
+
"ORDER BY timestamp_utc, id", (session_id,)).fetchall()
|
|
204
|
+
|
|
205
|
+
seen_uuid = set()
|
|
206
|
+
logical = [] # canonical physical rows, in order
|
|
207
|
+
for row in raw:
|
|
208
|
+
u = row[1]
|
|
209
|
+
if u in seen_uuid:
|
|
210
|
+
continue
|
|
211
|
+
seen_uuid.add(u)
|
|
212
|
+
logical.append(row)
|
|
213
|
+
|
|
214
|
+
# Group assistant fragments sharing (msg_id, req_id) into one turn item over
|
|
215
|
+
# the WHOLE logical list — NOT by adjacency. Real tool-using transcripts
|
|
216
|
+
# interleave a tool_result (a `user`/tool_result item) between fragments of
|
|
217
|
+
# the SAME turn, so the same key recurs non-consecutively. We keep a turn-key
|
|
218
|
+
# → item-index map: first occurrence emits the turn item AT THIS POSITION;
|
|
219
|
+
# later same-key fragments fold their blocks/prose/uuids into the existing
|
|
220
|
+
# item. A turn → exactly ONE item → cost counted exactly once. Humans,
|
|
221
|
+
# tool_results, and assistant rows with a null msg_id emit as simple items at
|
|
222
|
+
# their own position.
|
|
223
|
+
items = []
|
|
224
|
+
turn_index = {} # (msg_id, req_id) -> index into items
|
|
225
|
+
for row in logical:
|
|
226
|
+
(rid, u, ts, etype, text, blocks, model, msg_id, req_id,
|
|
227
|
+
is_sc, cwd, branch) = row
|
|
228
|
+
if etype == "assistant" and msg_id is not None:
|
|
229
|
+
key = (msg_id, req_id)
|
|
230
|
+
idx = turn_index.get(key)
|
|
231
|
+
if idx is None:
|
|
232
|
+
turn_index[key] = len(items)
|
|
233
|
+
items.append(_build_turn([row]))
|
|
234
|
+
else:
|
|
235
|
+
_extend_turn(items[idx], row)
|
|
236
|
+
else:
|
|
237
|
+
items.append(_build_simple(row))
|
|
238
|
+
|
|
239
|
+
costs = _turn_cost_map(conn, list(turn_index))
|
|
240
|
+
# Stamp per-item cost first, then derive the header from the SUM of the
|
|
241
|
+
# ROUNDED per-item assistant costs (M2) — so the §6.5 invariant
|
|
242
|
+
# sum(items.cost_usd) == header cost_usd holds EXACTLY to 1e-9 by
|
|
243
|
+
# construction OVER THE FULL ITEM LIST. 6dp is the deliberate JSON display
|
|
244
|
+
# precision. NOTE: the header is the whole-session total; the returned
|
|
245
|
+
# ``items`` is a page subset, so on page 2+ sum(page) < header by design.
|
|
246
|
+
header_cost = 0.0
|
|
247
|
+
for it in items:
|
|
248
|
+
if it["kind"] == "assistant" and "_msg_id" in it:
|
|
249
|
+
turn_cost = round(costs.get((it["_msg_id"], it["_req_id"]), 0.0), 6)
|
|
250
|
+
it["cost_usd"] = turn_cost
|
|
251
|
+
header_cost += turn_cost
|
|
252
|
+
del it["_msg_id"]
|
|
253
|
+
del it["_req_id"]
|
|
254
|
+
it.pop("_has_prose", None)
|
|
255
|
+
header_cost = round(header_cost, 6)
|
|
256
|
+
|
|
257
|
+
# Cursor pagination over the item list (anchored to each item's canonical id).
|
|
258
|
+
# A non-None `after` that matches no item's anchor (stale/deleted cursor)
|
|
259
|
+
# yields an EMPTY page — never silently re-serves the head (M1).
|
|
260
|
+
start = 0
|
|
261
|
+
if after is not None:
|
|
262
|
+
start = None
|
|
263
|
+
for k, it in enumerate(items):
|
|
264
|
+
if str(it["anchor"]["id"]) == str(after):
|
|
265
|
+
start = k + 1
|
|
266
|
+
break
|
|
267
|
+
if start is None:
|
|
268
|
+
return {
|
|
269
|
+
"session_id": session_id,
|
|
270
|
+
"project_label": _project_label(_latest(logical, 10)),
|
|
271
|
+
"git_branch": _latest(logical, 11),
|
|
272
|
+
"started_utc": logical[0][2],
|
|
273
|
+
"last_activity_utc": logical[-1][2],
|
|
274
|
+
"cost_usd": header_cost,
|
|
275
|
+
"models": sorted({r[6] for r in logical if r[6]}),
|
|
276
|
+
"items": [],
|
|
277
|
+
"page": {"next_after": None, "has_more": False},
|
|
278
|
+
}
|
|
279
|
+
page = items[start:start + limit]
|
|
280
|
+
has_more = start + limit < len(items)
|
|
281
|
+
next_after = page[-1]["anchor"]["id"] if (page and has_more) else None
|
|
282
|
+
|
|
283
|
+
# Stamp the session_id into each anchor (spec anchor is (session_id, uuid);
|
|
284
|
+
# the dict literals are built session-agnostic, so fill it here where the
|
|
285
|
+
# session id is known). NOT a no-op — the endpoint/clients rely on it.
|
|
286
|
+
for it in page:
|
|
287
|
+
it["anchor"]["session_id"] = session_id
|
|
288
|
+
|
|
289
|
+
first = logical[0]
|
|
290
|
+
last = logical[-1]
|
|
291
|
+
models = sorted({r[6] for r in logical if r[6]})
|
|
292
|
+
return {
|
|
293
|
+
"session_id": session_id,
|
|
294
|
+
"project_label": _project_label(_latest(logical, 10)),
|
|
295
|
+
"git_branch": _latest(logical, 11),
|
|
296
|
+
"started_utc": first[2],
|
|
297
|
+
"last_activity_utc": last[2],
|
|
298
|
+
"cost_usd": header_cost,
|
|
299
|
+
"models": models,
|
|
300
|
+
"items": page,
|
|
301
|
+
"page": {"next_after": next_after, "has_more": has_more},
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def _latest(logical, col):
|
|
306
|
+
"""Most-recent non-null value in a column across the session (project/branch
|
|
307
|
+
show the latest, matching the dashboard's session posture)."""
|
|
308
|
+
for row in reversed(logical):
|
|
309
|
+
if row[col]:
|
|
310
|
+
return row[col]
|
|
311
|
+
return "" if col == 10 else None
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def _build_turn(members):
|
|
315
|
+
"""Seed a turn item from its first fragment(s). Prose = joined non-empty
|
|
316
|
+
fragment text; anchor/model = the prose-bearing fragment (empirically exactly
|
|
317
|
+
one per turn); member_uuids = all fragment uuids. Fragments arriving later
|
|
318
|
+
(possibly non-consecutive — interleaved with a tool_result) fold in via
|
|
319
|
+
_extend_turn, which re-promotes the anchor/model once a prose fragment lands."""
|
|
320
|
+
first = members[0]
|
|
321
|
+
item = {
|
|
322
|
+
"kind": "assistant",
|
|
323
|
+
"anchor": {"session_id": None, "uuid": first[1], "id": first[0]},
|
|
324
|
+
"member_uuids": [first[1]],
|
|
325
|
+
"ts": first[2],
|
|
326
|
+
"text": "",
|
|
327
|
+
"blocks": [],
|
|
328
|
+
"model": first[6],
|
|
329
|
+
"is_sidechain": bool(first[9]),
|
|
330
|
+
"_msg_id": first[7],
|
|
331
|
+
"_req_id": first[8],
|
|
332
|
+
"_has_prose": False,
|
|
333
|
+
}
|
|
334
|
+
_fold_fragment(item, first)
|
|
335
|
+
for m in members[1:]:
|
|
336
|
+
_extend_turn(item, m)
|
|
337
|
+
return item
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def _extend_turn(item, row):
|
|
341
|
+
"""Fold one more same-turn assistant fragment into an existing turn item:
|
|
342
|
+
append its uuid + blocks + non-empty prose. The FIRST fragment carrying prose
|
|
343
|
+
promotes the anchor/model to itself (the prose-bearing fragment is the
|
|
344
|
+
canonical anchor); subsequent prose fragments only extend the joined text."""
|
|
345
|
+
item["member_uuids"].append(row[1])
|
|
346
|
+
_fold_fragment(item, row)
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def _fold_fragment(item, row):
|
|
350
|
+
blocks = item["blocks"]
|
|
351
|
+
try:
|
|
352
|
+
blocks.extend(_json.loads(row[5] or "[]"))
|
|
353
|
+
except (ValueError, TypeError):
|
|
354
|
+
pass
|
|
355
|
+
frag_text = (row[4] or "").strip()
|
|
356
|
+
if frag_text:
|
|
357
|
+
if not item["_has_prose"]:
|
|
358
|
+
# First prose fragment becomes the canonical anchor / model.
|
|
359
|
+
item["anchor"]["uuid"] = row[1]
|
|
360
|
+
item["anchor"]["id"] = row[0]
|
|
361
|
+
item["model"] = row[6]
|
|
362
|
+
item["is_sidechain"] = bool(row[9])
|
|
363
|
+
item["_msg_id"] = row[7]
|
|
364
|
+
item["_req_id"] = row[8]
|
|
365
|
+
item["_has_prose"] = True
|
|
366
|
+
item["text"] = frag_text
|
|
367
|
+
else:
|
|
368
|
+
item["text"] = item["text"] + "\n" + frag_text
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def _build_simple(row):
|
|
372
|
+
"""A human, tool_result, or assistant-with-null-msg_id item (no turn grouping,
|
|
373
|
+
no cost). An assistant row routes here only when its msg_id is NULL (no turn
|
|
374
|
+
key → no session_entries join); it carries an explicit cost_usd of 0.0 and NO
|
|
375
|
+
internal _msg_id/_req_id keys, so the cost loop's KeyError path can never fire
|
|
376
|
+
(I2). The model is preserved for assistant rows."""
|
|
377
|
+
(rid, u, ts, etype, text, blocks, model, msg_id, req_id, is_sc, cwd, branch) = row
|
|
378
|
+
try:
|
|
379
|
+
parsed = _json.loads(blocks or "[]")
|
|
380
|
+
except (ValueError, TypeError):
|
|
381
|
+
parsed = []
|
|
382
|
+
item = {
|
|
383
|
+
"kind": etype,
|
|
384
|
+
"anchor": {"session_id": None, "uuid": u, "id": rid},
|
|
385
|
+
"member_uuids": [u],
|
|
386
|
+
"ts": ts,
|
|
387
|
+
"text": text,
|
|
388
|
+
"blocks": parsed,
|
|
389
|
+
"is_sidechain": bool(is_sc),
|
|
390
|
+
}
|
|
391
|
+
if etype == "assistant":
|
|
392
|
+
item["model"] = model
|
|
393
|
+
item["cost_usd"] = 0.0
|
|
394
|
+
return item
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def _fts_flag_unavailable(conn) -> bool:
|
|
398
|
+
try:
|
|
399
|
+
row = conn.execute(
|
|
400
|
+
"SELECT value FROM cache_meta WHERE key='fts5_unavailable'").fetchone()
|
|
401
|
+
except sqlite3.OperationalError:
|
|
402
|
+
return False
|
|
403
|
+
return bool(row and row[0])
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def search_conversations(conn, query, *, limit=50, offset=0,
|
|
407
|
+
fts_available=None) -> dict:
|
|
408
|
+
"""Cross-session search (spec §3.3). Uses FTS5 when available (bm25 rank +
|
|
409
|
+
snippet); else a LIKE scan with a manual snippet. Hits deduped by
|
|
410
|
+
(session_id, uuid); each carries the turn's cost. `fts_available` overrides
|
|
411
|
+
detection (test seam / explicit LIKE)."""
|
|
412
|
+
q = (query or "").strip()
|
|
413
|
+
limit = max(1, min(int(limit), 200))
|
|
414
|
+
offset = max(0, int(offset))
|
|
415
|
+
if fts_available is None:
|
|
416
|
+
fts_available = not _fts_flag_unavailable(conn)
|
|
417
|
+
if not q:
|
|
418
|
+
return {"query": q, "mode": "fts" if fts_available else "like",
|
|
419
|
+
"hits": [], "total": 0}
|
|
420
|
+
if fts_available:
|
|
421
|
+
try:
|
|
422
|
+
return _search_fts(conn, q, limit, offset)
|
|
423
|
+
except sqlite3.OperationalError:
|
|
424
|
+
pass # corrupt/missing FTS at query time → fall through to LIKE
|
|
425
|
+
return _search_like(conn, q, limit, offset)
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
def _row_to_hit(uuid_, sid, ts, cwd, snippet, msg_id, req_id):
|
|
429
|
+
"""Build one hit WITHOUT cost — cost is batched onto the FINAL page in
|
|
430
|
+
_attach_costs (I1: no per-hit _turn_cost_map round-trip). The turn key rides
|
|
431
|
+
on the private `_turn_key` field until the batch maps it to `cost_usd`."""
|
|
432
|
+
return {
|
|
433
|
+
"session_id": sid,
|
|
434
|
+
"uuid": uuid_,
|
|
435
|
+
"project_label": _project_label(cwd),
|
|
436
|
+
"ts": ts,
|
|
437
|
+
"snippet": snippet,
|
|
438
|
+
"_turn_key": (msg_id, req_id) if msg_id is not None and req_id is not None
|
|
439
|
+
else None,
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
def _dedup_hits(hits, limit, offset):
|
|
444
|
+
seen = set()
|
|
445
|
+
out = []
|
|
446
|
+
for h in hits:
|
|
447
|
+
key = (h["session_id"], h["uuid"])
|
|
448
|
+
if key in seen:
|
|
449
|
+
continue
|
|
450
|
+
seen.add(key)
|
|
451
|
+
out.append(h)
|
|
452
|
+
total = len(out)
|
|
453
|
+
return out[offset:offset + limit], total
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
def _attach_costs(conn, page):
|
|
457
|
+
"""Compute turn cost for the FINAL page's hits in ONE _turn_cost_map call,
|
|
458
|
+
then map it onto each hit and drop the private `_turn_key`. Off-page and
|
|
459
|
+
duplicate hits never reach here, so we never compute cost for them (I1)."""
|
|
460
|
+
keys = [h["_turn_key"] for h in page if h.get("_turn_key") is not None]
|
|
461
|
+
costs = _turn_cost_map(conn, keys) if keys else {}
|
|
462
|
+
for h in page:
|
|
463
|
+
tk = h.pop("_turn_key", None)
|
|
464
|
+
h["cost_usd"] = round(costs.get(tk, 0.0), 6) if tk is not None else 0.0
|
|
465
|
+
return page
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def _search_fts(conn, q, limit, offset):
|
|
469
|
+
sql = (
|
|
470
|
+
"SELECT cm.session_id, cm.uuid, cm.timestamp_utc, cm.cwd, "
|
|
471
|
+
" cm.msg_id, cm.req_id, "
|
|
472
|
+
" snippet(conversation_fts, 0, '[', ']', ' … ', 12) AS snip "
|
|
473
|
+
"FROM conversation_fts "
|
|
474
|
+
"JOIN conversation_messages cm ON cm.id = conversation_fts.rowid "
|
|
475
|
+
"WHERE conversation_fts MATCH ? "
|
|
476
|
+
# cm.id is the final tiebreaker so equal (rank, timestamp) hits order
|
|
477
|
+
# deterministically — _dedup_hits keeps the FIRST occurrence, so without
|
|
478
|
+
# it the surviving snippet/cost (and page boundary) would flip run-to-run.
|
|
479
|
+
"ORDER BY bm25(conversation_fts), cm.timestamp_utc DESC, cm.id DESC"
|
|
480
|
+
)
|
|
481
|
+
raw = conn.execute(sql, (_fts_query(q),)).fetchall()
|
|
482
|
+
hits = [_row_to_hit(u, sid, ts, cwd, snip, mid, rqd)
|
|
483
|
+
for (sid, u, ts, cwd, mid, rqd, snip) in raw]
|
|
484
|
+
page, total = _dedup_hits(hits, limit, offset)
|
|
485
|
+
return {"query": q, "mode": "fts", "hits": _attach_costs(conn, page),
|
|
486
|
+
"total": total}
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def _search_like(conn, q, limit, offset):
|
|
490
|
+
# Escape the ESCAPE char (\) FIRST, then the wildcards — otherwise a query
|
|
491
|
+
# containing a backslash (incl. a trailing one) mis-escapes the appended
|
|
492
|
+
# '%' and the LIKE silently matches nothing (ESCAPE '\' below).
|
|
493
|
+
like = ("%" + q.replace("\\", "\\\\").replace("%", r"\%").replace("_", r"\_")
|
|
494
|
+
+ "%")
|
|
495
|
+
sql = (
|
|
496
|
+
"SELECT session_id, uuid, timestamp_utc, cwd, msg_id, req_id, text "
|
|
497
|
+
"FROM conversation_messages "
|
|
498
|
+
"WHERE text LIKE ? ESCAPE '\\' AND text != '' "
|
|
499
|
+
"ORDER BY timestamp_utc DESC, id DESC"
|
|
500
|
+
)
|
|
501
|
+
hits = []
|
|
502
|
+
for sid, u, ts, cwd, mid, rqd, text in conn.execute(sql, (like,)):
|
|
503
|
+
hits.append(_row_to_hit(u, sid, ts, cwd,
|
|
504
|
+
_manual_snippet(text, q), mid, rqd))
|
|
505
|
+
page, total = _dedup_hits(hits, limit, offset)
|
|
506
|
+
return {"query": q, "mode": "like", "hits": _attach_costs(conn, page),
|
|
507
|
+
"total": total}
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
def _fts_query(q):
|
|
511
|
+
"""Quote each whitespace term as an FTS5 string literal so punctuation /
|
|
512
|
+
operators in user input can't error the MATCH or inject FTS syntax."""
|
|
513
|
+
terms = [t for t in q.split() if t]
|
|
514
|
+
return " ".join('"' + t.replace('"', '""') + '"' for t in terms) or '""'
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def _manual_snippet(text, q, width=80):
|
|
518
|
+
lo = text.lower().find(q.lower())
|
|
519
|
+
if lo < 0:
|
|
520
|
+
return text[:width]
|
|
521
|
+
start = max(0, lo - width // 2)
|
|
522
|
+
end = min(len(text), lo + len(q) + width // 2)
|
|
523
|
+
s = text[start:end]
|
|
524
|
+
return ("… " if start else "") + s + (" …" if end < len(text) else "")
|
package/bin/_lib_doctor.py
CHANGED
|
@@ -140,6 +140,14 @@ class DoctorState:
|
|
|
140
140
|
# token_total); the kernel only reads `.kind`/`.model`/`.entry_count`/
|
|
141
141
|
# `.token_total`, so any duck-typed equivalent works for tests.
|
|
142
142
|
pricing_coverage: Optional[list] = None
|
|
143
|
+
# Conversation viewer (Plan 2, spec §5): the resolved
|
|
144
|
+
# `dashboard.expose_transcripts` opt-in. Only consequential when the bind
|
|
145
|
+
# is LAN — `_check_safety_dashboard_bind` then surfaces an extra
|
|
146
|
+
# "transcripts exposed on LAN" detail on top of the existing LAN-bind
|
|
147
|
+
# WARN. Defaulted False (placed last after the other defaulted fields) so
|
|
148
|
+
# existing constructors stay valid and a loopback bind is byte-identical
|
|
149
|
+
# whether or not expose is set.
|
|
150
|
+
expose_transcripts: bool = False
|
|
143
151
|
|
|
144
152
|
|
|
145
153
|
@dataclasses.dataclass(frozen=True)
|
|
@@ -556,6 +564,45 @@ def _check_db_migrations_pending(s: DoctorState) -> CheckResult:
|
|
|
556
564
|
)
|
|
557
565
|
|
|
558
566
|
|
|
567
|
+
def _check_db_version_ahead(s: DoctorState) -> CheckResult:
|
|
568
|
+
"""FAIL/WARN when a DB's user_version exceeds the running binary's
|
|
569
|
+
registry head (issue #145). stats.db ahead bricks commands (FAIL);
|
|
570
|
+
cache.db ahead auto-heals on the next open (WARN). doctor reads raw
|
|
571
|
+
user_version (no dispatcher), so it can report without healing/bricking.
|
|
572
|
+
"""
|
|
573
|
+
def _eval(status):
|
|
574
|
+
if not status:
|
|
575
|
+
return None
|
|
576
|
+
uv = status.get("user_version", 0) or 0
|
|
577
|
+
rs = status.get("registry_size", 0) or 0
|
|
578
|
+
return {"user_version": uv, "registry_size": rs, "ahead": uv > rs}
|
|
579
|
+
|
|
580
|
+
stats = _eval(s.stats_db_status)
|
|
581
|
+
cache = _eval(s.cache_db_status)
|
|
582
|
+
details = {"stats.db": stats, "cache.db": cache}
|
|
583
|
+
stats_ahead = bool(stats and stats["ahead"])
|
|
584
|
+
cache_ahead = bool(cache and cache["ahead"])
|
|
585
|
+
|
|
586
|
+
if stats_ahead:
|
|
587
|
+
return CheckResult(
|
|
588
|
+
id="db.version_ahead", title="Version ahead", severity="fail",
|
|
589
|
+
summary=f"stats.db ahead (v{stats['user_version']} > known v{stats['registry_size']})",
|
|
590
|
+
remediation="Run `cctally db recover --db stats --yes` (or restore from backup)",
|
|
591
|
+
details=details,
|
|
592
|
+
)
|
|
593
|
+
if cache_ahead:
|
|
594
|
+
return CheckResult(
|
|
595
|
+
id="db.version_ahead", title="Version ahead", severity="warn",
|
|
596
|
+
summary=f"cache.db ahead (v{cache['user_version']} > known v{cache['registry_size']}) — auto-heals",
|
|
597
|
+
remediation="Auto-heals on next command, or run `cctally db recover --db cache`",
|
|
598
|
+
details=details,
|
|
599
|
+
)
|
|
600
|
+
return CheckResult(
|
|
601
|
+
id="db.version_ahead", title="Version ahead", severity="ok",
|
|
602
|
+
summary="none ahead", remediation=None, details=details,
|
|
603
|
+
)
|
|
604
|
+
|
|
605
|
+
|
|
559
606
|
def _check_data_latest_snapshot_age(s: DoctorState) -> CheckResult:
|
|
560
607
|
if s.latest_snapshot_at is None:
|
|
561
608
|
return CheckResult(
|
|
@@ -859,13 +906,24 @@ def _check_safety_dashboard_bind(s: DoctorState) -> CheckResult:
|
|
|
859
906
|
rem += "."
|
|
860
907
|
note = ("A separate running dashboard process may have overridden via --host; "
|
|
861
908
|
"the CLI sees config only.") if s.runtime_bind is None else None
|
|
909
|
+
# Conversation viewer (Plan 2, spec §5): a LAN bind WITH the
|
|
910
|
+
# `dashboard.expose_transcripts` opt-in serves raw conversation prose to
|
|
911
|
+
# the LAN. Surface that ONLY here (the bind already WARNs and is
|
|
912
|
+
# non-loopback by construction), additively — a loopback bind never
|
|
913
|
+
# reaches this branch, so the loopback report stays byte-identical
|
|
914
|
+
# regardless of the expose flag.
|
|
915
|
+
extra = {}
|
|
916
|
+
if s.expose_transcripts:
|
|
917
|
+
notes.append("transcripts exposed on LAN")
|
|
918
|
+
extra["transcripts_exposed_on_lan"] = True
|
|
862
919
|
return CheckResult(
|
|
863
920
|
id="safety.dashboard_bind", title="Dashboard bind",
|
|
864
921
|
severity="warn", summary="; ".join(notes),
|
|
865
922
|
remediation=rem,
|
|
866
923
|
details={"config": s.dashboard_bind_stored,
|
|
867
924
|
"runtime_bind": s.runtime_bind,
|
|
868
|
-
**({"note": note} if note else {})
|
|
925
|
+
**({"note": note} if note else {}),
|
|
926
|
+
**extra},
|
|
869
927
|
)
|
|
870
928
|
|
|
871
929
|
|
|
@@ -1052,6 +1110,7 @@ _CATEGORY_DEFINITIONS: tuple[tuple[str, str, tuple[tuple[str, str], ...]], ...]
|
|
|
1052
1110
|
("db", "Database", (
|
|
1053
1111
|
("db.stats.file", "_check_db_stats_file"),
|
|
1054
1112
|
("db.cache.file", "_check_db_cache_file"),
|
|
1113
|
+
("db.version_ahead", "_check_db_version_ahead"),
|
|
1055
1114
|
("db.migrations.applied", "_check_db_migrations_applied"),
|
|
1056
1115
|
("db.migrations.pending", "_check_db_migrations_pending"),
|
|
1057
1116
|
)),
|