cctally 1.27.0 → 1.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/bin/_cctally_alerts.py +26 -1
- package/bin/_cctally_cache.py +278 -6
- package/bin/_cctally_config.py +153 -11
- package/bin/_cctally_core.py +230 -41
- package/bin/_cctally_dashboard.py +399 -37
- package/bin/_cctally_db.py +594 -163
- package/bin/_cctally_doctor.py +11 -0
- package/bin/_cctally_forecast.py +700 -57
- package/bin/_cctally_milestones.py +273 -28
- package/bin/_cctally_parser.py +44 -4
- package/bin/_cctally_record.py +328 -50
- package/bin/_cctally_setup.py +7 -3
- package/bin/_cctally_statusline.py +8 -0
- package/bin/_cctally_update.py +3 -3
- package/bin/_cctally_weekrefs.py +30 -6
- package/bin/_lib_alert_axes.py +8 -1
- package/bin/_lib_alerts_payload.py +95 -3
- package/bin/_lib_budget.py +48 -0
- package/bin/_lib_conversation.py +162 -0
- package/bin/_lib_conversation_query.py +524 -0
- package/bin/_lib_doctor.py +60 -1
- package/bin/_lib_transcript_access.py +80 -0
- package/bin/cctally +40 -1
- package/dashboard/static/assets/{index-D34qf0LE.css → index-Bj5ckRUE.css} +1 -1
- package/dashboard/static/assets/index-Dw4G5FD9.js +18 -0
- package/dashboard/static/dashboard.html +2 -2
- package/package.json +4 -1
- package/dashboard/static/assets/index-C2F1_Mxt.js +0 -18
package/bin/_cctally_weekrefs.py
CHANGED
|
@@ -233,6 +233,15 @@ def _backfill_week_reset_events(conn: sqlite3.Connection) -> None:
|
|
|
233
233
|
effective reset moment is floored to the hour via `_floor_to_hour`
|
|
234
234
|
so minute/second-level Anthropic jitter ("in X hr Y min" relative-text
|
|
235
235
|
drift) doesn't masquerade as a reset.
|
|
236
|
+
|
|
237
|
+
ONE deliberate divergence from the live rule: backfill passes
|
|
238
|
+
``allow_reset_to_zero=False`` to ``_is_reset_drop``, so it fires only on
|
|
239
|
+
the unambiguous ``>=25pp`` drop. The lenient reset-to-zero signal is
|
|
240
|
+
live-only — the live path debounces a transient API zero (issue #128),
|
|
241
|
+
but this one-shot historical scan has no debounce and would otherwise
|
|
242
|
+
mis-read a stale-replica 0% blip (``6% → 0% → 1%`` on a still-future
|
|
243
|
+
week_end) as a credit, segmenting the week into a degenerate zero-width
|
|
244
|
+
window. See ``_is_reset_drop`` for the full rationale.
|
|
236
245
|
"""
|
|
237
246
|
c = _cctally()
|
|
238
247
|
try:
|
|
@@ -278,7 +287,7 @@ def _backfill_week_reset_events(conn: sqlite3.Connection) -> None:
|
|
|
278
287
|
if (
|
|
279
288
|
captured_dt < prior_end_dt
|
|
280
289
|
and prior_pct is not None and cur_pct is not None
|
|
281
|
-
and _is_reset_drop(prior_pct, cur_pct)
|
|
290
|
+
and _is_reset_drop(prior_pct, cur_pct, allow_reset_to_zero=False)
|
|
282
291
|
):
|
|
283
292
|
# Floor to the hour so the display boundary lands on the
|
|
284
293
|
# natural hour mark (Anthropic's reset times are always
|
|
@@ -309,7 +318,7 @@ def _backfill_week_reset_events(conn: sqlite3.Connection) -> None:
|
|
|
309
318
|
if (
|
|
310
319
|
captured_dt < prior_end_dt
|
|
311
320
|
and prior_pct is not None and cur_pct is not None
|
|
312
|
-
and _is_reset_drop(prior_pct, cur_pct)
|
|
321
|
+
and _is_reset_drop(prior_pct, cur_pct, allow_reset_to_zero=False)
|
|
313
322
|
):
|
|
314
323
|
# Pre-check on ``new_week_end_at`` (mirrors the live
|
|
315
324
|
# detection path's pre-check). Necessary because the
|
|
@@ -392,7 +401,9 @@ _RESET_ZERO_FLOOR_PCT = 1.0
|
|
|
392
401
|
_RESET_ZERO_MIN_DROP_PCT = 3.0
|
|
393
402
|
|
|
394
403
|
|
|
395
|
-
def _is_reset_drop(
|
|
404
|
+
def _is_reset_drop(
|
|
405
|
+
prior_pct: float, cur_pct: float, *, allow_reset_to_zero: bool = True
|
|
406
|
+
) -> bool:
|
|
396
407
|
"""True when ``prior_pct → cur_pct`` is a genuine weekly reset/credit.
|
|
397
408
|
|
|
398
409
|
Two independent percent-shape signals (OR):
|
|
@@ -400,17 +411,30 @@ def _is_reset_drop(prior_pct: float, cur_pct: float) -> bool:
|
|
|
400
411
|
* **Partial credit** — drop ``>= _RESET_PCT_DROP_THRESHOLD`` (25pp).
|
|
401
412
|
* **Reset-to-zero** — ``cur_pct`` collapses to ~0
|
|
402
413
|
(``<= _RESET_ZERO_FLOOR_PCT``) with a drop clearing
|
|
403
|
-
``_RESET_ZERO_MIN_DROP_PCT``.
|
|
414
|
+
``_RESET_ZERO_MIN_DROP_PCT``. Gated on ``allow_reset_to_zero``.
|
|
415
|
+
|
|
416
|
+
``allow_reset_to_zero`` scopes the lenient reset-to-zero signal to the
|
|
417
|
+
sites that can afford it. **Live** current-week detection passes the
|
|
418
|
+
default ``True``: the live in-place path debounces a transient API zero
|
|
419
|
+
(issue #128 — arm on the first ~0, confirm only if it stays low, clear
|
|
420
|
+
on recovery). The **historical backfill**
|
|
421
|
+
(``_backfill_week_reset_events``) passes ``False`` — it is a one-shot
|
|
422
|
+
scan with NO debounce, so a single stale-replica 0% reading on a
|
|
423
|
+
still-future ``week_end`` (e.g. a ``6% → 0% → 1%`` blip) would otherwise
|
|
424
|
+
be mis-read as a goodwill credit and segment the week into a degenerate
|
|
425
|
+
zero-width window. Backfill therefore fires only on the unambiguous
|
|
426
|
+
``>=25pp`` drop and defers sub-25pp reset-to-zero to the live path.
|
|
404
427
|
|
|
405
428
|
Callers retain the boundary predicates (same/advanced ``week_end_at``
|
|
406
429
|
AND ``prior_end_dt > now``); this helper owns ONLY the percent-shape
|
|
407
|
-
discrimination
|
|
408
|
-
in-place, backfill advance, backfill in-place) stay byte-identical.
|
|
430
|
+
discrimination.
|
|
409
431
|
"""
|
|
410
432
|
cur = float(cur_pct)
|
|
411
433
|
drop = float(prior_pct) - cur
|
|
412
434
|
if drop >= _RESET_PCT_DROP_THRESHOLD:
|
|
413
435
|
return True
|
|
436
|
+
if not allow_reset_to_zero:
|
|
437
|
+
return False
|
|
414
438
|
return cur <= _RESET_ZERO_FLOOR_PCT and drop >= _RESET_ZERO_MIN_DROP_PCT
|
|
415
439
|
|
|
416
440
|
|
package/bin/_lib_alert_axes.py
CHANGED
|
@@ -37,7 +37,7 @@ def severity_for(threshold: int) -> str:
|
|
|
37
37
|
class AlertAxisDescriptor:
|
|
38
38
|
"""Axis-agnostic metadata shared by the record path + dashboard envelope."""
|
|
39
39
|
|
|
40
|
-
id: str # 'weekly' | 'five_hour' | 'budget' | 'projected' | 'project_budget'
|
|
40
|
+
id: str # 'weekly' | 'five_hour' | 'budget' | 'projected' | 'project_budget' | 'codex_budget'
|
|
41
41
|
chip_label: str # SHOUT form, byte-identical with alertAxis.ts AXIS_CHIP_LABEL
|
|
42
42
|
title_label: str # sentence-case form, byte-identical with AXIS_TITLE_LABEL
|
|
43
43
|
milestone_table: str # SQLite table the dashboard envelope SELECTs from
|
|
@@ -53,6 +53,13 @@ AXIS_REGISTRY: "tuple[AlertAxisDescriptor, ...]" = (
|
|
|
53
53
|
AlertAxisDescriptor(
|
|
54
54
|
"project_budget", "PROJECT", "Project budget", "project_budget_milestones"
|
|
55
55
|
),
|
|
56
|
+
# Per-vendor Codex budget alerts (calendar-period; calendar-period-codex-budgets
|
|
57
|
+
# feature). Distinct "CODEX" chip vs the global "BUDGET" / per-project
|
|
58
|
+
# "PROJECT" chips; its own forward-only `codex_budget_milestones` table keyed
|
|
59
|
+
# on the resolved period-window start instant (period_start_at, threshold).
|
|
60
|
+
AlertAxisDescriptor(
|
|
61
|
+
"codex_budget", "CODEX", "Codex budget", "codex_budget_milestones"
|
|
62
|
+
),
|
|
56
63
|
)
|
|
57
64
|
|
|
58
65
|
AXIS_BY_ID: "dict[str, AlertAxisDescriptor]" = {d.id: d for d in AXIS_REGISTRY}
|
|
@@ -222,13 +222,23 @@ def _build_alert_payload_budget(
|
|
|
222
222
|
budget_usd: float,
|
|
223
223
|
spent_usd: float,
|
|
224
224
|
consumption_pct: float,
|
|
225
|
+
period: str = "subscription-week",
|
|
225
226
|
) -> dict:
|
|
226
227
|
"""Build the alert payload for an equiv-$ budget threshold crossing.
|
|
227
228
|
|
|
228
229
|
See ``_build_alert_payload_weekly`` for the ``alerted_at == crossed_at``
|
|
229
230
|
rationale (set-then-dispatch invariant). ``axis: "budget"`` is the third
|
|
230
231
|
alert axis (Task 4 surfaces it in the dashboard Recent-alerts panel).
|
|
231
|
-
|
|
232
|
+
|
|
233
|
+
``period`` defaults to ``subscription-week`` (the existing behavior — a
|
|
234
|
+
calendar-period-codex-budgets generalization, spec §6). The ``week_start_at``
|
|
235
|
+
key column carries the resolved PERIOD-start instant for a calendar period
|
|
236
|
+
(the name stays a back-compat misnomer, like ``weekly_usd``); the
|
|
237
|
+
additive ``period`` + ``period_start_at`` context fields let the dashboard
|
|
238
|
+
(Task 4) label "Month" / "Calendar week" instead of the hardcoded "Week".
|
|
239
|
+
The legacy subscription-week case is byte-stable on the rendered text — the
|
|
240
|
+
new context keys are purely additive and consumed only by the period-aware
|
|
241
|
+
label fix."""
|
|
232
242
|
return {
|
|
233
243
|
"id": f"budget:{week_start_at}:{threshold}",
|
|
234
244
|
"axis": "budget",
|
|
@@ -237,6 +247,8 @@ def _build_alert_payload_budget(
|
|
|
237
247
|
"alerted_at": crossed_at_utc, # set-then-dispatch
|
|
238
248
|
"context": {
|
|
239
249
|
"week_start_at": week_start_at,
|
|
250
|
+
"period": str(period),
|
|
251
|
+
"period_start_at": week_start_at,
|
|
240
252
|
"budget_usd": float(budget_usd),
|
|
241
253
|
"spent_usd": float(spent_usd),
|
|
242
254
|
"consumption_pct": float(consumption_pct),
|
|
@@ -314,6 +326,78 @@ def _build_alert_payload_project_budget(
|
|
|
314
326
|
}
|
|
315
327
|
|
|
316
328
|
|
|
329
|
+
def _alert_text_codex_budget(
|
|
330
|
+
payload: dict, tz: "ZoneInfo | None"
|
|
331
|
+
) -> tuple[str, str, str]:
|
|
332
|
+
"""Build (title, subtitle, body) for a Codex budget threshold alert (axis
|
|
333
|
+
``codex_budget``, the sixth alert axis; calendar-period-codex-budgets spec
|
|
334
|
+
§6).
|
|
335
|
+
|
|
336
|
+
Mirrors :func:`_alert_text_budget` but labels the vendor (Codex) and the
|
|
337
|
+
civil period (Month / Calendar week) read from the period context so the
|
|
338
|
+
notification reads apart from a Claude budget alert. The rendered numbers
|
|
339
|
+
come from the payload (snapshotted at crossing), never live config that may
|
|
340
|
+
have changed since. ``period_start_at`` is an instant but the text doesn't
|
|
341
|
+
render it as a clock time, so no ``format_display_dt`` call is needed; ``tz``
|
|
342
|
+
is accepted for signature parity with peer ``_alert_text_*`` builders and
|
|
343
|
+
intentionally unused (same as ``_alert_text_budget``)."""
|
|
344
|
+
threshold = int(payload["threshold"])
|
|
345
|
+
ctx = payload.get("context") or {}
|
|
346
|
+
period = ctx.get("period")
|
|
347
|
+
period_label = {
|
|
348
|
+
"calendar-month": "this month",
|
|
349
|
+
"calendar-week": "this week",
|
|
350
|
+
}.get(period, "this period")
|
|
351
|
+
title = "cctally - Codex budget"
|
|
352
|
+
subtitle = f"{threshold}% of Codex budget ({period_label})"
|
|
353
|
+
spent = float(ctx.get("spent_usd") or 0.0)
|
|
354
|
+
budget = float(ctx.get("budget_usd") or 0.0)
|
|
355
|
+
consumption = float(ctx.get("consumption_pct") or 0.0)
|
|
356
|
+
body = (
|
|
357
|
+
f"Codex - ${spent:,.2f} of ${budget:,.2f} "
|
|
358
|
+
f"({consumption:.0f}% of budget)"
|
|
359
|
+
)
|
|
360
|
+
return title, subtitle, body
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def _build_alert_payload_codex_budget(
|
|
364
|
+
*,
|
|
365
|
+
threshold: int,
|
|
366
|
+
crossed_at_utc: str,
|
|
367
|
+
period_start_at: str,
|
|
368
|
+
period: str,
|
|
369
|
+
budget_usd: float,
|
|
370
|
+
spent_usd: float,
|
|
371
|
+
consumption_pct: float,
|
|
372
|
+
) -> dict:
|
|
373
|
+
"""Build the alert payload for a Codex budget threshold crossing (axis
|
|
374
|
+
``codex_budget``, the sixth alert axis; spec §6).
|
|
375
|
+
|
|
376
|
+
Mirrors :func:`_build_alert_payload_budget` but keyed on the resolved
|
|
377
|
+
CALENDAR-period window (``period_start_at`` in place of ``week_start_at``)
|
|
378
|
+
and carrying the period DISCRIMINATOR (``period`` = calendar-week /
|
|
379
|
+
calendar-month) in the context so the dashboard (Task 4) labels Month /
|
|
380
|
+
Calendar week instead of the hardcoded "Week". See
|
|
381
|
+
:func:`_build_alert_payload_weekly` for the ``alerted_at == crossed_at``
|
|
382
|
+
rationale (set-then-dispatch invariant). The dashboard envelope (Task 4)
|
|
383
|
+
surfaces this axis in the Recent-alerts panel from the row-sourced context.
|
|
384
|
+
"""
|
|
385
|
+
return {
|
|
386
|
+
"id": f"codex_budget:{period_start_at}:{threshold}",
|
|
387
|
+
"axis": "codex_budget",
|
|
388
|
+
"threshold": int(threshold),
|
|
389
|
+
"crossed_at": crossed_at_utc,
|
|
390
|
+
"alerted_at": crossed_at_utc, # set-then-dispatch
|
|
391
|
+
"context": {
|
|
392
|
+
"period": str(period),
|
|
393
|
+
"period_start_at": period_start_at,
|
|
394
|
+
"budget_usd": float(budget_usd),
|
|
395
|
+
"spent_usd": float(spent_usd),
|
|
396
|
+
"consumption_pct": float(consumption_pct),
|
|
397
|
+
},
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
|
|
317
401
|
def _alert_text_projected(payload: dict, tz: "ZoneInfo | None") -> tuple[str, str, str]:
|
|
318
402
|
"""Build (title, subtitle, body) for a projected-pace alert (#121).
|
|
319
403
|
|
|
@@ -334,6 +418,13 @@ def _alert_text_projected(payload: dict, tz: "ZoneInfo | None") -> tuple[str, st
|
|
|
334
418
|
title = f"cctally - projected to reach {t}% this week"
|
|
335
419
|
subtitle = "On current pace (projection)"
|
|
336
420
|
body = f"Projected ~{proj:.0f}% of cap by reset (week-average pace)"
|
|
421
|
+
elif metric == "codex_budget_usd":
|
|
422
|
+
title = "cctally - Codex projected to exceed budget"
|
|
423
|
+
subtitle = f"On current pace (projection) - {t}% of Codex budget"
|
|
424
|
+
body = (
|
|
425
|
+
f"Projected ${proj:,.2f} of ${denom:,.2f} Codex budget "
|
|
426
|
+
f"(period-average pace)"
|
|
427
|
+
)
|
|
337
428
|
else: # budget_usd
|
|
338
429
|
title = "cctally - projected to exceed budget"
|
|
339
430
|
subtitle = f"On current pace (projection) - {t}% of budget"
|
|
@@ -355,8 +446,9 @@ def _build_alert_payload_projected(
|
|
|
355
446
|
"""Build the alert payload for a projected-pace threshold crossing (#121).
|
|
356
447
|
|
|
357
448
|
``axis: "projected"`` is the fourth alert axis; ``metric`` discriminates
|
|
358
|
-
``weekly_pct`` (denominator 100.0, "% of cap") from ``budget_usd``
|
|
359
|
-
(denominator = target_usd, "$ of budget"
|
|
449
|
+
``weekly_pct`` (denominator 100.0, "% of cap") from ``budget_usd`` and
|
|
450
|
+
``codex_budget_usd`` (denominator = target_usd, "$ of budget"; the codex
|
|
451
|
+
variant renders Codex-flavored text). The frontend renders context
|
|
360
452
|
FROM these row-sourced fields (``metric`` / ``projected_value`` /
|
|
361
453
|
``denominator``), not from live config that may have changed since crossing
|
|
362
454
|
(Codex P0-4). No ``crossed_at``/``alerted_at`` keys here: the projected
|
package/bin/_lib_budget.py
CHANGED
|
@@ -33,6 +33,54 @@ def project_linear(
|
|
|
33
33
|
return (current + rate_low * remaining, current + rate_high * remaining)
|
|
34
34
|
|
|
35
35
|
|
|
36
|
+
def calendar_month_window(
|
|
37
|
+
now: dt.datetime, tz: dt.tzinfo
|
|
38
|
+
) -> tuple[dt.datetime, dt.datetime]:
|
|
39
|
+
"""Civil month window in ``tz``, returned as UTC-normalized instants.
|
|
40
|
+
|
|
41
|
+
Pure; no I/O. ``now`` is a tz-aware datetime and ``tz`` a tzinfo. Returns
|
|
42
|
+
``(start_utc, end_utc)`` where ``start`` = the 1st of ``now``'s civil month
|
|
43
|
+
at 00:00 local and ``end`` = the 1st of the *next* month at 00:00 local
|
|
44
|
+
(civil rollover via ``(year, month + 1)`` with year carry — NEVER a fixed
|
|
45
|
+
``timedelta(days=30)``, so 28/29/30/31-day months and Dec→Jan are exact),
|
|
46
|
+
both converted to UTC so the kernel's elapsed-seconds math stays single-tz.
|
|
47
|
+
"""
|
|
48
|
+
local = now.astimezone(tz)
|
|
49
|
+
start_local = local.replace(
|
|
50
|
+
day=1, hour=0, minute=0, second=0, microsecond=0
|
|
51
|
+
)
|
|
52
|
+
if start_local.month == 12:
|
|
53
|
+
end_local = start_local.replace(year=start_local.year + 1, month=1)
|
|
54
|
+
else:
|
|
55
|
+
end_local = start_local.replace(month=start_local.month + 1)
|
|
56
|
+
return (
|
|
57
|
+
start_local.astimezone(dt.timezone.utc),
|
|
58
|
+
end_local.astimezone(dt.timezone.utc),
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def calendar_week_window(
|
|
63
|
+
now: dt.datetime, tz: dt.tzinfo, week_start_idx: int
|
|
64
|
+
) -> tuple[dt.datetime, dt.datetime]:
|
|
65
|
+
"""Civil week window in ``tz`` anchored on ``week_start_idx`` (Mon=0..Sun=6),
|
|
66
|
+
returned as UTC-normalized instants.
|
|
67
|
+
|
|
68
|
+
Pure; no I/O. Snaps ``now``'s local date back to the most recent
|
|
69
|
+
``week_start_idx`` weekday at 00:00 local via ``(weekday − start_idx) % 7``,
|
|
70
|
+
then adds the 7-day delta to the *aware local* start so a DST week is a true
|
|
71
|
+
167h/169h span before normalizing both ends to UTC.
|
|
72
|
+
"""
|
|
73
|
+
local = now.astimezone(tz)
|
|
74
|
+
midnight = local.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
75
|
+
diff = (midnight.weekday() - week_start_idx) % 7
|
|
76
|
+
start_local = midnight - dt.timedelta(days=diff)
|
|
77
|
+
end_local = start_local + dt.timedelta(days=7)
|
|
78
|
+
return (
|
|
79
|
+
start_local.astimezone(dt.timezone.utc),
|
|
80
|
+
end_local.astimezone(dt.timezone.utc),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
36
84
|
@dataclass(frozen=True)
|
|
37
85
|
class BudgetInputs:
|
|
38
86
|
target_usd: float
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""Pure parser kernel for the conversation viewer (Plan 1).
|
|
2
|
+
|
|
3
|
+
Turns Claude Code transcript JSONL lines into normalized conversation_messages
|
|
4
|
+
rows. No DB, no clock, no I/O beyond the passed text-mode file handle — directly
|
|
5
|
+
unit-testable. Mirrors _lib_jsonl.py's readline()+tell() byte-offset discipline
|
|
6
|
+
so the message walk can share sync_cache's per-file cursor and rewind a partial
|
|
7
|
+
mid-write tail line. Spec §1, §2.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
import json
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
|
|
13
|
+
HUMAN = "human"
|
|
14
|
+
ASSISTANT = "assistant"
|
|
15
|
+
TOOL_RESULT = "tool_result"
|
|
16
|
+
|
|
17
|
+
_TOOL_RESULT_CAP = 4000 # chars; full text always re-derivable from JSONL
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class MessageRow:
|
|
22
|
+
byte_offset: int
|
|
23
|
+
session_id: "str | None"
|
|
24
|
+
uuid: "str | None"
|
|
25
|
+
parent_uuid: "str | None"
|
|
26
|
+
timestamp_utc: "str | None"
|
|
27
|
+
entry_type: str
|
|
28
|
+
text: str
|
|
29
|
+
blocks_json: str
|
|
30
|
+
model: "str | None"
|
|
31
|
+
msg_id: "str | None"
|
|
32
|
+
req_id: "str | None"
|
|
33
|
+
cwd: "str | None"
|
|
34
|
+
git_branch: "str | None"
|
|
35
|
+
is_sidechain: int
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def iter_message_rows(fh, path_str):
|
|
39
|
+
"""Yield one MessageRow per user/assistant JSONL line from fh's current
|
|
40
|
+
position. summary / file-history-snapshot / malformed / uuid-less lines are
|
|
41
|
+
skipped (offset still advances). A partial tail line (no trailing newline)
|
|
42
|
+
rewinds the handle and stops, so the next sync re-reads it once complete.
|
|
43
|
+
|
|
44
|
+
``path_str`` is accepted for caller symmetry — the sync ingest threads
|
|
45
|
+
``source_path`` into each row at write time — but the kernel itself does
|
|
46
|
+
not use it (the returned MessageRow carries only ``byte_offset``)."""
|
|
47
|
+
while True:
|
|
48
|
+
offset = fh.tell()
|
|
49
|
+
line = fh.readline()
|
|
50
|
+
if not line:
|
|
51
|
+
return
|
|
52
|
+
if not line.endswith("\n"):
|
|
53
|
+
fh.seek(offset)
|
|
54
|
+
return
|
|
55
|
+
s = line.strip()
|
|
56
|
+
if not s:
|
|
57
|
+
continue
|
|
58
|
+
try:
|
|
59
|
+
obj = json.loads(s)
|
|
60
|
+
except json.JSONDecodeError:
|
|
61
|
+
continue
|
|
62
|
+
t = obj.get("type")
|
|
63
|
+
if t not in ("user", "assistant"):
|
|
64
|
+
continue
|
|
65
|
+
if not obj.get("uuid"):
|
|
66
|
+
continue
|
|
67
|
+
yield _normalize(obj, t, offset)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _normalize(obj, t, offset):
|
|
71
|
+
msg = obj.get("message")
|
|
72
|
+
if not isinstance(msg, dict):
|
|
73
|
+
msg = {}
|
|
74
|
+
blocks, text = _blocks_and_text(msg.get("content"))
|
|
75
|
+
if t == "assistant":
|
|
76
|
+
entry_type = ASSISTANT
|
|
77
|
+
elif any(b["kind"] == "tool_result" for b in blocks):
|
|
78
|
+
entry_type = TOOL_RESULT
|
|
79
|
+
# tool_result rows are stored but NOT indexed as prose (spec §2). A
|
|
80
|
+
# user line that mixes a text block with a tool_result block must not
|
|
81
|
+
# leak that text into the FTS index; the full content stays in
|
|
82
|
+
# blocks_json for rendering.
|
|
83
|
+
text = ""
|
|
84
|
+
else:
|
|
85
|
+
entry_type = HUMAN
|
|
86
|
+
is_asst = t == "assistant"
|
|
87
|
+
return MessageRow(
|
|
88
|
+
byte_offset=offset,
|
|
89
|
+
session_id=obj.get("sessionId"),
|
|
90
|
+
uuid=obj.get("uuid"),
|
|
91
|
+
parent_uuid=obj.get("parentUuid"),
|
|
92
|
+
timestamp_utc=obj.get("timestamp"),
|
|
93
|
+
entry_type=entry_type,
|
|
94
|
+
text=text,
|
|
95
|
+
blocks_json=json.dumps(blocks, separators=(",", ":")),
|
|
96
|
+
model=msg.get("model") if is_asst else None,
|
|
97
|
+
msg_id=msg.get("id") if is_asst else None,
|
|
98
|
+
req_id=obj.get("requestId") if is_asst else None,
|
|
99
|
+
cwd=obj.get("cwd"),
|
|
100
|
+
git_branch=obj.get("gitBranch"),
|
|
101
|
+
is_sidechain=1 if obj.get("isSidechain") else 0,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _blocks_and_text(content):
|
|
106
|
+
"""Return (normalized blocks list, indexed-prose string). Prose = joined
|
|
107
|
+
`text` blocks only (thinking / tool_use / tool_result excluded)."""
|
|
108
|
+
if isinstance(content, str):
|
|
109
|
+
return ([{"kind": "text", "text": content}] if content else []), content
|
|
110
|
+
blocks, texts = [], []
|
|
111
|
+
if isinstance(content, list):
|
|
112
|
+
for b in content:
|
|
113
|
+
if not isinstance(b, dict):
|
|
114
|
+
continue
|
|
115
|
+
bt = b.get("type")
|
|
116
|
+
if bt == "text":
|
|
117
|
+
txt = b.get("text", "") or ""
|
|
118
|
+
blocks.append({"kind": "text", "text": txt})
|
|
119
|
+
texts.append(txt)
|
|
120
|
+
elif bt == "thinking":
|
|
121
|
+
blocks.append({"kind": "thinking", "text": b.get("thinking", "") or ""})
|
|
122
|
+
elif bt == "tool_use":
|
|
123
|
+
blocks.append({"kind": "tool_use", "name": b.get("name"),
|
|
124
|
+
"input_summary": _summarize(b.get("input"))})
|
|
125
|
+
elif bt == "tool_result":
|
|
126
|
+
raw = _stringify(b.get("content"))
|
|
127
|
+
blocks.append({"kind": "tool_result", "text": raw[:_TOOL_RESULT_CAP],
|
|
128
|
+
"truncated": len(raw) > _TOOL_RESULT_CAP,
|
|
129
|
+
"is_error": bool(b.get("is_error"))})
|
|
130
|
+
elif bt in ("image", "document"):
|
|
131
|
+
blocks.append({"kind": bt, **_media(b.get("source"))})
|
|
132
|
+
elif bt == "tool_reference":
|
|
133
|
+
blocks.append({"kind": "tool_reference", "name": b.get("name")})
|
|
134
|
+
return blocks, "\n".join(t for t in texts if t)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _stringify(c):
|
|
138
|
+
if isinstance(c, str):
|
|
139
|
+
return c
|
|
140
|
+
if isinstance(c, list):
|
|
141
|
+
out = []
|
|
142
|
+
for b in c:
|
|
143
|
+
if isinstance(b, dict) and b.get("type") == "text":
|
|
144
|
+
out.append(b.get("text", "") or "")
|
|
145
|
+
elif isinstance(b, str):
|
|
146
|
+
out.append(b)
|
|
147
|
+
return "\n".join(out)
|
|
148
|
+
return "" if c is None else json.dumps(c, separators=(",", ":"))
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _summarize(inp):
|
|
152
|
+
if not isinstance(inp, dict):
|
|
153
|
+
return ""
|
|
154
|
+
s = json.dumps(inp, separators=(",", ":"))
|
|
155
|
+
return s[:200]
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _media(source):
|
|
159
|
+
if not isinstance(source, dict):
|
|
160
|
+
return {"media_type": None, "bytes": 0}
|
|
161
|
+
data = source.get("data") or ""
|
|
162
|
+
return {"media_type": source.get("media_type"), "bytes": len(data)}
|