cctally 1.27.0 → 1.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,524 @@
1
+ """Pure query kernel for the conversation viewer endpoints (Plan 2, spec §3).
2
+
3
+ Takes a sqlite3.Connection over a cache.db that already holds Plan 1's
4
+ conversation_messages (+ FTS) and session_entries. No clock, no network, no
5
+ global mutation — unit-tested against an in-memory cache.db seeded by
6
+ _apply_cache_schema. Three entry points back the three GET routes:
7
+ list_conversations (rail), get_conversation (reader), search_conversations.
8
+
9
+ Cost is joined ONCE per logical assistant turn (msg_id, req_id) to the single
10
+ deduped session_entries row (idx_entries_dedup), via the shared pricing helper
11
+ — never per physical fragment and never from cost_usd_raw (often NULL).
12
+ """
13
+ from __future__ import annotations
14
+ import json as _json
15
+ import os
16
+ import sqlite3
17
+
18
+ # Public surface (Plan 2): shipped in the npm tarball + brew formula + public
19
+ # mirror — imported by the dashboard's conversation endpoints at runtime.
20
+
21
+ from _lib_pricing import _calculate_entry_cost
22
+
23
+
24
+ def _project_label(cwd) -> str:
25
+ """Basename of the project cwd (dashboard label posture — no reveal). Falls
26
+ back to the raw path for root-ish cwds, '' when absent."""
27
+ if not cwd:
28
+ return ""
29
+ return os.path.basename(cwd.rstrip("/")) or cwd
30
+
31
+
32
+ def _entry_cost(model, inp, out, cc, cr, cost_usd_raw) -> float:
33
+ """Cost for one session_entries row via the shared pricing helper. Tokens →
34
+ the helper's usage dict. cost_usd_raw is passed as the optional override the
35
+ helper already understands (it is often NULL — never the primary source)."""
36
+ usage = {
37
+ "input_tokens": inp or 0,
38
+ "output_tokens": out or 0,
39
+ "cache_creation_input_tokens": cc or 0,
40
+ "cache_read_input_tokens": cr or 0,
41
+ }
42
+ return _calculate_entry_cost(model or "", usage, cost_usd=cost_usd_raw)
43
+
44
+
45
+ def _session_cost_map(conn, session_ids):
46
+ """{session_id: total_cost_usd} for the given sessions. Joins
47
+ conversation_messages turn keys to the single deduped session_entries row
48
+ per (msg_id, req_id), so a turn replayed across files contributes once.
49
+ (msg_id, req_id) is globally unique in session_entries and maps to exactly
50
+ one session_id, so per-session sums are clean."""
51
+ costs = {sid: 0.0 for sid in session_ids}
52
+ if not session_ids:
53
+ return costs
54
+ placeholders = ",".join("?" for _ in session_ids)
55
+ sql = (
56
+ "SELECT cm.session_id, se.model, se.input_tokens, se.output_tokens, "
57
+ " se.cache_create_tokens, se.cache_read_tokens, se.cost_usd_raw "
58
+ "FROM (SELECT DISTINCT session_id, msg_id, req_id "
59
+ " FROM conversation_messages "
60
+ " WHERE session_id IN (%s) AND msg_id IS NOT NULL AND req_id IS NOT NULL) cm "
61
+ "JOIN session_entries se ON se.msg_id = cm.msg_id AND se.req_id = cm.req_id"
62
+ % placeholders
63
+ )
64
+ for sid, model, inp, out, cc, cr, raw in conn.execute(sql, list(session_ids)):
65
+ costs[sid] = costs.get(sid, 0.0) + _entry_cost(model, inp, out, cc, cr, raw)
66
+ return costs
67
+
68
+
69
+ def _session_models_map(conn, session_ids):
70
+ """{session_id: sorted distinct non-null models}."""
71
+ out = {sid: [] for sid in session_ids}
72
+ if not session_ids:
73
+ return out
74
+ placeholders = ",".join("?" for _ in session_ids)
75
+ sql = (
76
+ "SELECT DISTINCT session_id, model FROM conversation_messages "
77
+ "WHERE session_id IN (%s) AND model IS NOT NULL AND model != '' "
78
+ "ORDER BY model" % placeholders
79
+ )
80
+ for sid, model in conn.execute(sql, list(session_ids)):
81
+ out.setdefault(sid, []).append(model)
82
+ return out
83
+
84
+
85
+ def _session_latest_meta_map(conn, session_ids):
86
+ """{session_id: (cwd, git_branch)} using the most-recent NON-NULL value per
87
+ column — the SAME posture as get_conversation's _latest, so the rail and the
88
+ reader agree on a session whose cwd/branch changed over its lifetime (a plain
89
+ MAX() picks the lexical max, not the latest). Bounded to the page's sessions
90
+ via per-session correlated lookups over idx (session_id, timestamp_utc, id),
91
+ mirroring _session_cost_map / _session_models_map."""
92
+ meta = {sid: (None, None) for sid in session_ids}
93
+ if not session_ids:
94
+ return meta
95
+ placeholders = ",".join("?" for _ in session_ids)
96
+ sql = (
97
+ "SELECT s.session_id, "
98
+ " (SELECT c.cwd FROM conversation_messages c "
99
+ " WHERE c.session_id = s.session_id AND c.cwd IS NOT NULL "
100
+ " ORDER BY c.timestamp_utc DESC, c.id DESC LIMIT 1), "
101
+ " (SELECT b.git_branch FROM conversation_messages b "
102
+ " WHERE b.session_id = s.session_id AND b.git_branch IS NOT NULL "
103
+ " ORDER BY b.timestamp_utc DESC, b.id DESC LIMIT 1) "
104
+ "FROM (SELECT DISTINCT session_id FROM conversation_messages "
105
+ " WHERE session_id IN (%s)) s" % placeholders
106
+ )
107
+ for sid, cwd, branch in conn.execute(sql, list(session_ids)):
108
+ meta[sid] = (cwd, branch)
109
+ return meta
110
+
111
+
112
+ _SORTS = {
113
+ "recent": "MAX(timestamp_utc) DESC, session_id DESC",
114
+ "oldest": "MIN(timestamp_utc) ASC, session_id ASC",
115
+ }
116
+
117
+
118
+ def list_conversations(conn, *, sort="recent", limit=50, offset=0) -> dict:
119
+ """All-history per-session browse rows (spec §3.1). NOT 365-day bounded."""
120
+ order = _SORTS.get(sort, _SORTS["recent"])
121
+ limit = max(1, min(int(limit), 200))
122
+ offset = max(0, int(offset))
123
+ rows = conn.execute(
124
+ "SELECT session_id, COUNT(*) AS msg_count, "
125
+ " MIN(timestamp_utc) AS started, MAX(timestamp_utc) AS last_activity "
126
+ "FROM conversation_messages "
127
+ "WHERE session_id IS NOT NULL "
128
+ "GROUP BY session_id "
129
+ "ORDER BY " + order + " LIMIT ? OFFSET ?",
130
+ (limit + 1, offset),
131
+ ).fetchall()
132
+ has_more = len(rows) > limit
133
+ rows = rows[:limit]
134
+ session_ids = [r[0] for r in rows]
135
+ costs = _session_cost_map(conn, session_ids)
136
+ models = _session_models_map(conn, session_ids)
137
+ # cwd/git_branch as the latest non-null (reader posture), NOT a lexical MAX().
138
+ meta = _session_latest_meta_map(conn, session_ids)
139
+ conversations = [
140
+ {
141
+ "session_id": sid,
142
+ "project_label": _project_label(meta.get(sid, (None, None))[0]),
143
+ "git_branch": meta.get(sid, (None, None))[1],
144
+ "started_utc": started,
145
+ "last_activity_utc": last_activity,
146
+ "msg_count": msg_count,
147
+ "cost_usd": round(costs.get(sid, 0.0), 6),
148
+ "models": models.get(sid, []),
149
+ }
150
+ for (sid, msg_count, started, last_activity) in rows
151
+ ]
152
+ return {
153
+ "conversations": conversations,
154
+ "page": {
155
+ "next_offset": offset + len(conversations) if has_more else None,
156
+ "has_more": has_more,
157
+ },
158
+ }
159
+
160
+
161
+ def _turn_cost_map(conn, turn_keys):
162
+ """{(msg_id, req_id): cost_usd} for the given non-null turn keys, joined ONCE
163
+ to the deduped session_entries row. Keys absent from session_entries (e.g.
164
+ <synthetic> walker-skipped rows) are simply not present → cost 0 by omission."""
165
+ costs = {}
166
+ keys = [(m, r) for (m, r) in turn_keys if m is not None and r is not None]
167
+ if not keys:
168
+ return costs
169
+ # Chunk the OR-of-pairs to stay well under SQLite's variable limit.
170
+ for i in range(0, len(keys), 400):
171
+ chunk = keys[i:i + 400]
172
+ cond = " OR ".join("(msg_id=? AND req_id=?)" for _ in chunk)
173
+ params = [v for pair in chunk for v in pair]
174
+ sql = ("SELECT msg_id, req_id, model, input_tokens, output_tokens, "
175
+ "cache_create_tokens, cache_read_tokens, cost_usd_raw "
176
+ "FROM session_entries WHERE " + cond)
177
+ for m, r, model, inp, out, cc, cr, raw in conn.execute(sql, params):
178
+ costs[(m, r)] = _entry_cost(model, inp, out, cc, cr, raw)
179
+ return costs
180
+
181
+
182
+ def get_conversation(conn, session_id, *, after=None, limit=500):
183
+ """Reader payload for one session (spec §3.2). Returns None for an unknown
184
+ session. Dedups logical messages by (session_id, uuid) (canonical = earliest
185
+ timestamp), groups assistant fragments into turn items by (msg_id, req_id),
186
+ joins cost once, anchors a turn on its prose-bearing fragment, and exposes
187
+ every member fragment uuid for jump resolution. Cursor over (timestamp_utc,
188
+ id); ~500 items/page."""
189
+ limit = max(1, min(int(limit), 1000))
190
+ exists = conn.execute(
191
+ "SELECT 1 FROM conversation_messages WHERE session_id=? LIMIT 1",
192
+ (session_id,)).fetchone()
193
+ if exists is None:
194
+ return None
195
+
196
+ # Pull the session ordered; dedup logical messages by (session_id, uuid),
197
+ # canonical row = earliest (timestamp_utc, id). Replays carry the original
198
+ # uuid, so the first occurrence in ascending order is canonical.
199
+ raw = conn.execute(
200
+ "SELECT id, uuid, timestamp_utc, entry_type, text, blocks_json, model, "
201
+ " msg_id, req_id, is_sidechain, cwd, git_branch "
202
+ "FROM conversation_messages WHERE session_id=? "
203
+ "ORDER BY timestamp_utc, id", (session_id,)).fetchall()
204
+
205
+ seen_uuid = set()
206
+ logical = [] # canonical physical rows, in order
207
+ for row in raw:
208
+ u = row[1]
209
+ if u in seen_uuid:
210
+ continue
211
+ seen_uuid.add(u)
212
+ logical.append(row)
213
+
214
+ # Group assistant fragments sharing (msg_id, req_id) into one turn item over
215
+ # the WHOLE logical list — NOT by adjacency. Real tool-using transcripts
216
+ # interleave a tool_result (a `user`/tool_result item) between fragments of
217
+ # the SAME turn, so the same key recurs non-consecutively. We keep a turn-key
218
+ # → item-index map: first occurrence emits the turn item AT THIS POSITION;
219
+ # later same-key fragments fold their blocks/prose/uuids into the existing
220
+ # item. A turn → exactly ONE item → cost counted exactly once. Humans,
221
+ # tool_results, and assistant rows with a null msg_id emit as simple items at
222
+ # their own position.
223
+ items = []
224
+ turn_index = {} # (msg_id, req_id) -> index into items
225
+ for row in logical:
226
+ (rid, u, ts, etype, text, blocks, model, msg_id, req_id,
227
+ is_sc, cwd, branch) = row
228
+ if etype == "assistant" and msg_id is not None:
229
+ key = (msg_id, req_id)
230
+ idx = turn_index.get(key)
231
+ if idx is None:
232
+ turn_index[key] = len(items)
233
+ items.append(_build_turn([row]))
234
+ else:
235
+ _extend_turn(items[idx], row)
236
+ else:
237
+ items.append(_build_simple(row))
238
+
239
+ costs = _turn_cost_map(conn, list(turn_index))
240
+ # Stamp per-item cost first, then derive the header from the SUM of the
241
+ # ROUNDED per-item assistant costs (M2) — so the §6.5 invariant
242
+ # sum(items.cost_usd) == header cost_usd holds EXACTLY to 1e-9 by
243
+ # construction OVER THE FULL ITEM LIST. 6dp is the deliberate JSON display
244
+ # precision. NOTE: the header is the whole-session total; the returned
245
+ # ``items`` is a page subset, so on page 2+ sum(page) < header by design.
246
+ header_cost = 0.0
247
+ for it in items:
248
+ if it["kind"] == "assistant" and "_msg_id" in it:
249
+ turn_cost = round(costs.get((it["_msg_id"], it["_req_id"]), 0.0), 6)
250
+ it["cost_usd"] = turn_cost
251
+ header_cost += turn_cost
252
+ del it["_msg_id"]
253
+ del it["_req_id"]
254
+ it.pop("_has_prose", None)
255
+ header_cost = round(header_cost, 6)
256
+
257
+ # Cursor pagination over the item list (anchored to each item's canonical id).
258
+ # A non-None `after` that matches no item's anchor (stale/deleted cursor)
259
+ # yields an EMPTY page — never silently re-serves the head (M1).
260
+ start = 0
261
+ if after is not None:
262
+ start = None
263
+ for k, it in enumerate(items):
264
+ if str(it["anchor"]["id"]) == str(after):
265
+ start = k + 1
266
+ break
267
+ if start is None:
268
+ return {
269
+ "session_id": session_id,
270
+ "project_label": _project_label(_latest(logical, 10)),
271
+ "git_branch": _latest(logical, 11),
272
+ "started_utc": logical[0][2],
273
+ "last_activity_utc": logical[-1][2],
274
+ "cost_usd": header_cost,
275
+ "models": sorted({r[6] for r in logical if r[6]}),
276
+ "items": [],
277
+ "page": {"next_after": None, "has_more": False},
278
+ }
279
+ page = items[start:start + limit]
280
+ has_more = start + limit < len(items)
281
+ next_after = page[-1]["anchor"]["id"] if (page and has_more) else None
282
+
283
+ # Stamp the session_id into each anchor (spec anchor is (session_id, uuid);
284
+ # the dict literals are built session-agnostic, so fill it here where the
285
+ # session id is known). NOT a no-op — the endpoint/clients rely on it.
286
+ for it in page:
287
+ it["anchor"]["session_id"] = session_id
288
+
289
+ first = logical[0]
290
+ last = logical[-1]
291
+ models = sorted({r[6] for r in logical if r[6]})
292
+ return {
293
+ "session_id": session_id,
294
+ "project_label": _project_label(_latest(logical, 10)),
295
+ "git_branch": _latest(logical, 11),
296
+ "started_utc": first[2],
297
+ "last_activity_utc": last[2],
298
+ "cost_usd": header_cost,
299
+ "models": models,
300
+ "items": page,
301
+ "page": {"next_after": next_after, "has_more": has_more},
302
+ }
303
+
304
+
305
+ def _latest(logical, col):
306
+ """Most-recent non-null value in a column across the session (project/branch
307
+ show the latest, matching the dashboard's session posture)."""
308
+ for row in reversed(logical):
309
+ if row[col]:
310
+ return row[col]
311
+ return "" if col == 10 else None
312
+
313
+
314
+ def _build_turn(members):
315
+ """Seed a turn item from its first fragment(s). Prose = joined non-empty
316
+ fragment text; anchor/model = the prose-bearing fragment (empirically exactly
317
+ one per turn); member_uuids = all fragment uuids. Fragments arriving later
318
+ (possibly non-consecutive — interleaved with a tool_result) fold in via
319
+ _extend_turn, which re-promotes the anchor/model once a prose fragment lands."""
320
+ first = members[0]
321
+ item = {
322
+ "kind": "assistant",
323
+ "anchor": {"session_id": None, "uuid": first[1], "id": first[0]},
324
+ "member_uuids": [first[1]],
325
+ "ts": first[2],
326
+ "text": "",
327
+ "blocks": [],
328
+ "model": first[6],
329
+ "is_sidechain": bool(first[9]),
330
+ "_msg_id": first[7],
331
+ "_req_id": first[8],
332
+ "_has_prose": False,
333
+ }
334
+ _fold_fragment(item, first)
335
+ for m in members[1:]:
336
+ _extend_turn(item, m)
337
+ return item
338
+
339
+
340
+ def _extend_turn(item, row):
341
+ """Fold one more same-turn assistant fragment into an existing turn item:
342
+ append its uuid + blocks + non-empty prose. The FIRST fragment carrying prose
343
+ promotes the anchor/model to itself (the prose-bearing fragment is the
344
+ canonical anchor); subsequent prose fragments only extend the joined text."""
345
+ item["member_uuids"].append(row[1])
346
+ _fold_fragment(item, row)
347
+
348
+
349
+ def _fold_fragment(item, row):
350
+ blocks = item["blocks"]
351
+ try:
352
+ blocks.extend(_json.loads(row[5] or "[]"))
353
+ except (ValueError, TypeError):
354
+ pass
355
+ frag_text = (row[4] or "").strip()
356
+ if frag_text:
357
+ if not item["_has_prose"]:
358
+ # First prose fragment becomes the canonical anchor / model.
359
+ item["anchor"]["uuid"] = row[1]
360
+ item["anchor"]["id"] = row[0]
361
+ item["model"] = row[6]
362
+ item["is_sidechain"] = bool(row[9])
363
+ item["_msg_id"] = row[7]
364
+ item["_req_id"] = row[8]
365
+ item["_has_prose"] = True
366
+ item["text"] = frag_text
367
+ else:
368
+ item["text"] = item["text"] + "\n" + frag_text
369
+
370
+
371
+ def _build_simple(row):
372
+ """A human, tool_result, or assistant-with-null-msg_id item (no turn grouping,
373
+ no cost). An assistant row routes here only when its msg_id is NULL (no turn
374
+ key → no session_entries join); it carries an explicit cost_usd of 0.0 and NO
375
+ internal _msg_id/_req_id keys, so the cost loop's KeyError path can never fire
376
+ (I2). The model is preserved for assistant rows."""
377
+ (rid, u, ts, etype, text, blocks, model, msg_id, req_id, is_sc, cwd, branch) = row
378
+ try:
379
+ parsed = _json.loads(blocks or "[]")
380
+ except (ValueError, TypeError):
381
+ parsed = []
382
+ item = {
383
+ "kind": etype,
384
+ "anchor": {"session_id": None, "uuid": u, "id": rid},
385
+ "member_uuids": [u],
386
+ "ts": ts,
387
+ "text": text,
388
+ "blocks": parsed,
389
+ "is_sidechain": bool(is_sc),
390
+ }
391
+ if etype == "assistant":
392
+ item["model"] = model
393
+ item["cost_usd"] = 0.0
394
+ return item
395
+
396
+
397
+ def _fts_flag_unavailable(conn) -> bool:
398
+ try:
399
+ row = conn.execute(
400
+ "SELECT value FROM cache_meta WHERE key='fts5_unavailable'").fetchone()
401
+ except sqlite3.OperationalError:
402
+ return False
403
+ return bool(row and row[0])
404
+
405
+
406
+ def search_conversations(conn, query, *, limit=50, offset=0,
407
+ fts_available=None) -> dict:
408
+ """Cross-session search (spec §3.3). Uses FTS5 when available (bm25 rank +
409
+ snippet); else a LIKE scan with a manual snippet. Hits deduped by
410
+ (session_id, uuid); each carries the turn's cost. `fts_available` overrides
411
+ detection (test seam / explicit LIKE)."""
412
+ q = (query or "").strip()
413
+ limit = max(1, min(int(limit), 200))
414
+ offset = max(0, int(offset))
415
+ if fts_available is None:
416
+ fts_available = not _fts_flag_unavailable(conn)
417
+ if not q:
418
+ return {"query": q, "mode": "fts" if fts_available else "like",
419
+ "hits": [], "total": 0}
420
+ if fts_available:
421
+ try:
422
+ return _search_fts(conn, q, limit, offset)
423
+ except sqlite3.OperationalError:
424
+ pass # corrupt/missing FTS at query time → fall through to LIKE
425
+ return _search_like(conn, q, limit, offset)
426
+
427
+
428
+ def _row_to_hit(uuid_, sid, ts, cwd, snippet, msg_id, req_id):
429
+ """Build one hit WITHOUT cost — cost is batched onto the FINAL page in
430
+ _attach_costs (I1: no per-hit _turn_cost_map round-trip). The turn key rides
431
+ on the private `_turn_key` field until the batch maps it to `cost_usd`."""
432
+ return {
433
+ "session_id": sid,
434
+ "uuid": uuid_,
435
+ "project_label": _project_label(cwd),
436
+ "ts": ts,
437
+ "snippet": snippet,
438
+ "_turn_key": (msg_id, req_id) if msg_id is not None and req_id is not None
439
+ else None,
440
+ }
441
+
442
+
443
+ def _dedup_hits(hits, limit, offset):
444
+ seen = set()
445
+ out = []
446
+ for h in hits:
447
+ key = (h["session_id"], h["uuid"])
448
+ if key in seen:
449
+ continue
450
+ seen.add(key)
451
+ out.append(h)
452
+ total = len(out)
453
+ return out[offset:offset + limit], total
454
+
455
+
456
+ def _attach_costs(conn, page):
457
+ """Compute turn cost for the FINAL page's hits in ONE _turn_cost_map call,
458
+ then map it onto each hit and drop the private `_turn_key`. Off-page and
459
+ duplicate hits never reach here, so we never compute cost for them (I1)."""
460
+ keys = [h["_turn_key"] for h in page if h.get("_turn_key") is not None]
461
+ costs = _turn_cost_map(conn, keys) if keys else {}
462
+ for h in page:
463
+ tk = h.pop("_turn_key", None)
464
+ h["cost_usd"] = round(costs.get(tk, 0.0), 6) if tk is not None else 0.0
465
+ return page
466
+
467
+
468
+ def _search_fts(conn, q, limit, offset):
469
+ sql = (
470
+ "SELECT cm.session_id, cm.uuid, cm.timestamp_utc, cm.cwd, "
471
+ " cm.msg_id, cm.req_id, "
472
+ " snippet(conversation_fts, 0, '[', ']', ' … ', 12) AS snip "
473
+ "FROM conversation_fts "
474
+ "JOIN conversation_messages cm ON cm.id = conversation_fts.rowid "
475
+ "WHERE conversation_fts MATCH ? "
476
+ # cm.id is the final tiebreaker so equal (rank, timestamp) hits order
477
+ # deterministically — _dedup_hits keeps the FIRST occurrence, so without
478
+ # it the surviving snippet/cost (and page boundary) would flip run-to-run.
479
+ "ORDER BY bm25(conversation_fts), cm.timestamp_utc DESC, cm.id DESC"
480
+ )
481
+ raw = conn.execute(sql, (_fts_query(q),)).fetchall()
482
+ hits = [_row_to_hit(u, sid, ts, cwd, snip, mid, rqd)
483
+ for (sid, u, ts, cwd, mid, rqd, snip) in raw]
484
+ page, total = _dedup_hits(hits, limit, offset)
485
+ return {"query": q, "mode": "fts", "hits": _attach_costs(conn, page),
486
+ "total": total}
487
+
488
+
489
+ def _search_like(conn, q, limit, offset):
490
+ # Escape the ESCAPE char (\) FIRST, then the wildcards — otherwise a query
491
+ # containing a backslash (incl. a trailing one) mis-escapes the appended
492
+ # '%' and the LIKE silently matches nothing (ESCAPE '\' below).
493
+ like = ("%" + q.replace("\\", "\\\\").replace("%", r"\%").replace("_", r"\_")
494
+ + "%")
495
+ sql = (
496
+ "SELECT session_id, uuid, timestamp_utc, cwd, msg_id, req_id, text "
497
+ "FROM conversation_messages "
498
+ "WHERE text LIKE ? ESCAPE '\\' AND text != '' "
499
+ "ORDER BY timestamp_utc DESC, id DESC"
500
+ )
501
+ hits = []
502
+ for sid, u, ts, cwd, mid, rqd, text in conn.execute(sql, (like,)):
503
+ hits.append(_row_to_hit(u, sid, ts, cwd,
504
+ _manual_snippet(text, q), mid, rqd))
505
+ page, total = _dedup_hits(hits, limit, offset)
506
+ return {"query": q, "mode": "like", "hits": _attach_costs(conn, page),
507
+ "total": total}
508
+
509
+
510
+ def _fts_query(q):
511
+ """Quote each whitespace term as an FTS5 string literal so punctuation /
512
+ operators in user input can't error the MATCH or inject FTS syntax."""
513
+ terms = [t for t in q.split() if t]
514
+ return " ".join('"' + t.replace('"', '""') + '"' for t in terms) or '""'
515
+
516
+
517
+ def _manual_snippet(text, q, width=80):
518
+ lo = text.lower().find(q.lower())
519
+ if lo < 0:
520
+ return text[:width]
521
+ start = max(0, lo - width // 2)
522
+ end = min(len(text), lo + len(q) + width // 2)
523
+ s = text[start:end]
524
+ return ("… " if start else "") + s + (" …" if end < len(text) else "")
@@ -140,6 +140,14 @@ class DoctorState:
140
140
  # token_total); the kernel only reads `.kind`/`.model`/`.entry_count`/
141
141
  # `.token_total`, so any duck-typed equivalent works for tests.
142
142
  pricing_coverage: Optional[list] = None
143
+ # Conversation viewer (Plan 2, spec §5): the resolved
144
+ # `dashboard.expose_transcripts` opt-in. Only consequential when the bind
145
+ # is LAN — `_check_safety_dashboard_bind` then surfaces an extra
146
+ # "transcripts exposed on LAN" detail on top of the existing LAN-bind
147
+ # WARN. Defaulted False (placed last after the other defaulted fields) so
148
+ # existing constructors stay valid and a loopback bind is byte-identical
149
+ # whether or not expose is set.
150
+ expose_transcripts: bool = False
143
151
 
144
152
 
145
153
  @dataclasses.dataclass(frozen=True)
@@ -556,6 +564,45 @@ def _check_db_migrations_pending(s: DoctorState) -> CheckResult:
556
564
  )
557
565
 
558
566
 
567
+ def _check_db_version_ahead(s: DoctorState) -> CheckResult:
568
+ """FAIL/WARN when a DB's user_version exceeds the running binary's
569
+ registry head (issue #145). stats.db ahead bricks commands (FAIL);
570
+ cache.db ahead auto-heals on the next open (WARN). doctor reads raw
571
+ user_version (no dispatcher), so it can report without healing/bricking.
572
+ """
573
+ def _eval(status):
574
+ if not status:
575
+ return None
576
+ uv = status.get("user_version", 0) or 0
577
+ rs = status.get("registry_size", 0) or 0
578
+ return {"user_version": uv, "registry_size": rs, "ahead": uv > rs}
579
+
580
+ stats = _eval(s.stats_db_status)
581
+ cache = _eval(s.cache_db_status)
582
+ details = {"stats.db": stats, "cache.db": cache}
583
+ stats_ahead = bool(stats and stats["ahead"])
584
+ cache_ahead = bool(cache and cache["ahead"])
585
+
586
+ if stats_ahead:
587
+ return CheckResult(
588
+ id="db.version_ahead", title="Version ahead", severity="fail",
589
+ summary=f"stats.db ahead (v{stats['user_version']} > known v{stats['registry_size']})",
590
+ remediation="Run `cctally db recover --db stats --yes` (or restore from backup)",
591
+ details=details,
592
+ )
593
+ if cache_ahead:
594
+ return CheckResult(
595
+ id="db.version_ahead", title="Version ahead", severity="warn",
596
+ summary=f"cache.db ahead (v{cache['user_version']} > known v{cache['registry_size']}) — auto-heals",
597
+ remediation="Auto-heals on next command, or run `cctally db recover --db cache`",
598
+ details=details,
599
+ )
600
+ return CheckResult(
601
+ id="db.version_ahead", title="Version ahead", severity="ok",
602
+ summary="none ahead", remediation=None, details=details,
603
+ )
604
+
605
+
559
606
  def _check_data_latest_snapshot_age(s: DoctorState) -> CheckResult:
560
607
  if s.latest_snapshot_at is None:
561
608
  return CheckResult(
@@ -859,13 +906,24 @@ def _check_safety_dashboard_bind(s: DoctorState) -> CheckResult:
859
906
  rem += "."
860
907
  note = ("A separate running dashboard process may have overridden via --host; "
861
908
  "the CLI sees config only.") if s.runtime_bind is None else None
909
+ # Conversation viewer (Plan 2, spec §5): a LAN bind WITH the
910
+ # `dashboard.expose_transcripts` opt-in serves raw conversation prose to
911
+ # the LAN. Surface that ONLY here (the bind already WARNs and is
912
+ # non-loopback by construction), additively — a loopback bind never
913
+ # reaches this branch, so the loopback report stays byte-identical
914
+ # regardless of the expose flag.
915
+ extra = {}
916
+ if s.expose_transcripts:
917
+ notes.append("transcripts exposed on LAN")
918
+ extra["transcripts_exposed_on_lan"] = True
862
919
  return CheckResult(
863
920
  id="safety.dashboard_bind", title="Dashboard bind",
864
921
  severity="warn", summary="; ".join(notes),
865
922
  remediation=rem,
866
923
  details={"config": s.dashboard_bind_stored,
867
924
  "runtime_bind": s.runtime_bind,
868
- **({"note": note} if note else {})},
925
+ **({"note": note} if note else {}),
926
+ **extra},
869
927
  )
870
928
 
871
929
 
@@ -1052,6 +1110,7 @@ _CATEGORY_DEFINITIONS: tuple[tuple[str, str, tuple[tuple[str, str], ...]], ...]
1052
1110
  ("db", "Database", (
1053
1111
  ("db.stats.file", "_check_db_stats_file"),
1054
1112
  ("db.cache.file", "_check_db_cache_file"),
1113
+ ("db.version_ahead", "_check_db_version_ahead"),
1055
1114
  ("db.migrations.applied", "_check_db_migrations_applied"),
1056
1115
  ("db.migrations.pending", "_check_db_migrations_pending"),
1057
1116
  )),