octarin-cli 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,685 @@
1
+ """Claude Code -> Octarin capture hook (pure stdlib, fail-open).
2
+
3
+ Registered as a Claude Code ``Stop`` hook. On each turn-end Claude Code pipes a
4
+ small JSON payload on stdin (``session_id``, ``transcript_path``, ``cwd``, ...).
5
+ This hook:
6
+
7
+ 1. reads that payload and locates the session transcript JSONL;
8
+ 2. parses user/assistant turns, tool calls, token usage, and model;
9
+ 3. builds a single canonical ``IngestEvent`` (full ``spans`` form) covering the
10
+ turns produced since the last run (tracked via a per-session offset file);
11
+ 4. POSTs it to ``${OCTARIN_INGEST_URL:-$OCTARIN_API_BASE/v1/ingest}`` with
12
+ ``Authorization: Bearer $OCTARIN_API_KEY``.
13
+
14
+ It is deliberately tiny and dependency-free (stdlib only). Every failure path
15
+ exits 0 so the host tool is never blocked, and the network call has a hard
16
+ timeout. The canonical shape is defined in ``backend/app/schema/canonical.py``.
17
+
18
+ Why this file disables a few ruff rules at the module level:
19
+
20
+ * ``BLE001`` (bare ``except Exception``) and ``S110``/``S112`` (try/except/
21
+ pass / continue) are EXPLICITLY the design — a capture hook that raises
22
+ or logs into stderr breaks the host tool's UX. We swallow everything and
23
+ exit 0.
24
+ * ``S310`` (unaudited URL scheme on ``urllib.request.urlopen``) — the URL
25
+ comes from our own ``OCTARIN_INGEST_URL`` / ``OCTARIN_API_BASE`` env, not
26
+ user input.
27
+
28
+ The hook is invoked by ``run.sh`` via ``exec python3 hook.py``, so no shebang
29
+ is needed — and dropping it sidesteps EXE001 (shebang on non-executable file)
30
+ in every consuming repo.
31
+ """
32
+ # ruff: noqa: BLE001, S110, S112, S310, INP001
33
+ # (INP001: this is a standalone Claude Code hook script — `.claude/octarin/` is
34
+ # a config directory that happens to contain a .py; it's not a Python package
35
+ # and adding __init__.py would falsely advertise importability from elsewhere
36
+ # in the host repo.)
37
+
38
+ from __future__ import annotations
39
+
40
+ import base64
41
+ import getpass
42
+ import hashlib
43
+ import json
44
+ import os
45
+ import subprocess
46
+ import sys
47
+ import time
48
+ import urllib.error
49
+ import urllib.request
50
+ import uuid
51
+ from datetime import datetime, timezone
52
+ from pathlib import Path
53
+
54
+ SOURCE = "claude-code"
55
+ STATE_DIR = Path.home() / ".octarin"
56
+ STATE_FILE = STATE_DIR / "claude_code_state.json"
57
+ MAX_TEXT = 20_000 # cap stored input/output text so payloads stay small
58
+ HTTP_TIMEOUT_S = 5.0
59
+ # Named HTTP status codes — keeps the response-handling logic free of magic
60
+ # numbers and matches the ``PLR2004`` lint contract in strict ruff profiles.
61
+ HTTP_OK = 200
62
+ HTTP_MULTIPLE_CHOICES = 300
63
+ HTTP_UNAUTHORIZED = 401
64
+ # Cap per-attachment base64 payload we ship inline. Larger items are recorded
65
+ # metadata-only (no b64) so a giant paste never bloats the POST or the backend.
66
+ MAX_ATTACHMENT_BYTES = 5 * 1024 * 1024 # ~5MB of raw bytes
67
+ # Map common file extensions -> mime for file refs that lack one.
68
+ _EXT_MIME = {
69
+ ".png": "image/png",
70
+ ".jpg": "image/jpeg",
71
+ ".jpeg": "image/jpeg",
72
+ ".gif": "image/gif",
73
+ ".webp": "image/webp",
74
+ ".svg": "image/svg+xml",
75
+ ".pdf": "application/pdf",
76
+ ".txt": "text/plain",
77
+ ".md": "text/markdown",
78
+ ".json": "application/json",
79
+ ".csv": "text/csv",
80
+ }
81
+ # Same UUID5 namespace as backend deterministic_trace_id so retries de-dupe.
82
+ _TRACE_NAMESPACE = uuid.UUID("6f8d2c1e-9a3b-4f5e-8c7d-1a2b3c4d5e6f")
83
+
84
+
85
+ def _now_iso() -> str:
86
+ return datetime.now(timezone.utc).isoformat()
87
+
88
+
89
+ def _truncate(text: str) -> str:
90
+ if not text:
91
+ return ""
92
+ return text if len(text) <= MAX_TEXT else text[:MAX_TEXT]
93
+
94
+
95
+ def read_payload() -> dict:
96
+ """Read and parse the hook JSON from stdin; ``{}`` on any problem."""
97
+ try:
98
+ raw = sys.stdin.read()
99
+ if not raw.strip():
100
+ return {}
101
+ parsed = json.loads(raw)
102
+ return parsed if isinstance(parsed, dict) else {}
103
+ except Exception:
104
+ return {}
105
+
106
+
107
+ def locate_transcript(payload: dict) -> tuple[str | None, Path | None, str | None]:
108
+ """Pull ``(session_id, transcript_path, cwd)`` from the hook payload."""
109
+ session_id = (
110
+ payload.get("session_id")
111
+ or payload.get("sessionId")
112
+ or (payload.get("session") or {}).get("id")
113
+ )
114
+ raw_path = (
115
+ payload.get("transcript_path")
116
+ or payload.get("transcriptPath")
117
+ or (payload.get("transcript") or {}).get("path")
118
+ )
119
+ cwd = payload.get("cwd") or payload.get("workspace") or None
120
+ path: Path | None = None
121
+ if raw_path:
122
+ try:
123
+ path = Path(raw_path).expanduser()
124
+ except Exception:
125
+ path = None
126
+ return session_id, path, cwd
127
+
128
+
129
+ # ── transcript helpers (mirror Claude Code's JSONL shape) ──
130
+
131
+
132
+ def _msg(entry: dict) -> dict:
133
+ m = entry.get("message")
134
+ return m if isinstance(m, dict) else {}
135
+
136
+
137
+ def _role(entry: dict) -> str | None:
138
+ t = entry.get("type")
139
+ if t in ("user", "assistant"):
140
+ return t
141
+ r = _msg(entry).get("role")
142
+ return r if r in ("user", "assistant") else None
143
+
144
+
145
+ def _content(entry: dict):
146
+ m = _msg(entry)
147
+ return m.get("content") if "message" in entry else entry.get("content")
148
+
149
+
150
+ def _text(content) -> str:
151
+ if isinstance(content, str):
152
+ return content
153
+ if isinstance(content, list):
154
+ parts = []
155
+ for x in content:
156
+ if isinstance(x, dict) and x.get("type") == "text":
157
+ parts.append(x.get("text", ""))
158
+ elif isinstance(x, str):
159
+ parts.append(x)
160
+ return "\n".join(p for p in parts if p)
161
+ return ""
162
+
163
+
164
+ def _blocks(content, block_type: str) -> list[dict]:
165
+ if not isinstance(content, list):
166
+ return []
167
+ return [x for x in content if isinstance(x, dict) and x.get("type") == block_type]
168
+
169
+
170
+ def _attachment_from_image_block(block: dict) -> dict | None:
171
+ """Build an attachment dict from a Claude ``image`` content block.
172
+
173
+ Claude carries pasted images as ``{"type":"image","source":{"type":"base64",
174
+ "media_type":"image/png","data":"..."}}``. We capture the base64 bytes inline
175
+ when within the size cap; larger images are recorded metadata-only (no b64).
176
+ Returns ``None`` if the block carries no usable image data.
177
+ """
178
+ src = block.get("source")
179
+ if not isinstance(src, dict):
180
+ return None
181
+ mime = src.get("media_type") or "image/png"
182
+ name = block.get("name") or block.get("filename") or "pasted-image"
183
+ if src.get("type") == "base64":
184
+ data = src.get("data")
185
+ if not isinstance(data, str) or not data:
186
+ return None
187
+ # Authoritative size: decode once (cheap vs. the network cost we save).
188
+ try:
189
+ raw = base64.b64decode(data, validate=False)
190
+ except Exception:
191
+ return None
192
+ nbytes = len(raw)
193
+ att = {"kind": "image", "mime": mime, "name": str(name), "bytes": nbytes}
194
+ att["b64"] = data if nbytes <= MAX_ATTACHMENT_BYTES else None
195
+ return att
196
+ # URL-backed image (rare in transcripts): record metadata only.
197
+ if src.get("type") == "url" and src.get("url"):
198
+ return {
199
+ "kind": "image",
200
+ "mime": mime,
201
+ "name": str(name),
202
+ "bytes": 0,
203
+ "b64": None,
204
+ }
205
+ return None
206
+
207
+
208
+ def _mime_for_name(name: str) -> str:
209
+ """Best-effort mime from a filename extension; generic when unknown."""
210
+ lower = name.lower()
211
+ for ext, mime in _EXT_MIME.items():
212
+ if lower.endswith(ext):
213
+ return mime
214
+ return "application/octet-stream"
215
+
216
+
217
+ def _attachment_from_file_block(block: dict) -> dict | None:
218
+ """Build a metadata attachment from a ``document``/file-ref content block.
219
+
220
+ Claude can carry document blocks (``{"type":"document","source":{...}}``) and
221
+ tool results sometimes reference files. We capture base64 ``document`` bytes
222
+ when present (within the cap); otherwise record the file name as metadata so
223
+ the trace at least shows that a file was attached.
224
+ """
225
+ src = block.get("source")
226
+ name = (
227
+ block.get("name")
228
+ or block.get("title")
229
+ or block.get("filename")
230
+ or "attached-file"
231
+ )
232
+ name = str(name)
233
+ if (
234
+ isinstance(src, dict)
235
+ and src.get("type") == "base64"
236
+ and isinstance(src.get("data"), str)
237
+ ):
238
+ data = src["data"]
239
+ mime = src.get("media_type") or _mime_for_name(name)
240
+ try:
241
+ raw = base64.b64decode(data, validate=False)
242
+ except Exception:
243
+ return None
244
+ nbytes = len(raw)
245
+ return {
246
+ "kind": "file",
247
+ "mime": mime,
248
+ "name": name,
249
+ "bytes": nbytes,
250
+ "b64": data if nbytes <= MAX_ATTACHMENT_BYTES else None,
251
+ }
252
+ # Bare reference with a name/path but no inline bytes: metadata only.
253
+ if block.get("name") or block.get("title") or block.get("filename"):
254
+ return {
255
+ "kind": "file",
256
+ "mime": _mime_for_name(name),
257
+ "name": name,
258
+ "bytes": 0,
259
+ "b64": None,
260
+ }
261
+ return None
262
+
263
+
264
+ def _extract_attachments(content) -> list[dict]:
265
+ """Pull image/file attachments from a message/tool-result content list.
266
+
267
+ Walks ``image`` and ``document`` content blocks (Claude's pasted-binary
268
+ shapes). Pure + fail-open: any malformed block is skipped, never raised, so
269
+ attachment capture can NEVER break the hook's core span extraction.
270
+ """
271
+ out: list[dict] = []
272
+ if not isinstance(content, list):
273
+ return out
274
+ for block in content:
275
+ if not isinstance(block, dict):
276
+ continue
277
+ try:
278
+ btype = block.get("type")
279
+ if btype == "image":
280
+ att = _attachment_from_image_block(block)
281
+ elif btype in ("document", "file"):
282
+ att = _attachment_from_file_block(block)
283
+ else:
284
+ att = None
285
+ if att:
286
+ out.append(att)
287
+ except Exception:
288
+ continue
289
+ return out
290
+
291
+
292
+ def _is_tool_result(entry: dict) -> bool:
293
+ return _role(entry) == "user" and bool(_blocks(_content(entry), "tool_result"))
294
+
295
+
296
+ def _usage(entry: dict) -> dict:
297
+ u = _msg(entry).get("usage")
298
+ if not isinstance(u, dict):
299
+ return {}
300
+ return {
301
+ "input": int(u.get("input_tokens") or 0),
302
+ "output": int(u.get("output_tokens") or 0),
303
+ "cache_read": int(u.get("cache_read_input_tokens") or 0),
304
+ "cache_write": int(u.get("cache_creation_input_tokens") or 0),
305
+ }
306
+
307
+
308
+ def _ts(entry: dict) -> str | None:
309
+ v = entry.get("timestamp")
310
+ return v if isinstance(v, str) and v else None
311
+
312
+
313
+ def read_new_entries(path: Path, state: dict, key: str) -> list[dict]:
314
+ """Return transcript entries appended since the last processed byte offset."""
315
+ if not path.exists():
316
+ return []
317
+ sess = state.get(key) or {}
318
+ offset = int(sess.get("offset", 0))
319
+ try:
320
+ size = path.stat().st_size
321
+ if size < offset: # transcript rotated/truncated -> reprocess from start
322
+ offset = 0
323
+ with path.open("rb") as fh:
324
+ fh.seek(offset)
325
+ chunk = fh.read()
326
+ new_offset = fh.tell()
327
+ except Exception:
328
+ return []
329
+ sess["offset"] = new_offset
330
+ state[key] = sess
331
+ out: list[dict] = []
332
+ for raw_line in chunk.decode("utf-8", errors="replace").splitlines():
333
+ line = raw_line.strip()
334
+ if not line:
335
+ continue
336
+ try:
337
+ obj = json.loads(line)
338
+ if isinstance(obj, dict):
339
+ out.append(obj)
340
+ except Exception:
341
+ continue
342
+ return out
343
+
344
+
345
+ def build_spans( # noqa: PLR0915 - top-down transcript parser; splitting it
346
+ entries: list[dict], # would scatter the local span-bookkeeping state.
347
+ ) -> tuple[list[dict], dict, list[str], str | None]:
348
+ """Turn transcript entries into canonical spans + rolled-up totals.
349
+
350
+ Each assistant message becomes one ``llm`` span (model + token usage); each
351
+ ``tool_use`` inside it becomes a child ``tool`` span. Returns
352
+ ``(spans, totals, models, repo)``.
353
+ """
354
+ # Map tool_use_id -> tool_result text for output enrichment, and
355
+ # tool_use_id -> attachments for any images a tool returned.
356
+ results_by_id: dict[str, str] = {}
357
+ attachments_by_tool_id: dict[str, list[dict]] = {}
358
+ # tool_use_id -> ts of the message that returned the result. Gives tool spans
359
+ # a real (assistant_ts -> result_ts) duration instead of zero.
360
+ result_ts_by_id: dict[str, str] = {}
361
+ for entry in entries:
362
+ if _is_tool_result(entry):
363
+ entry_ts = _ts(entry)
364
+ for tr in _blocks(_content(entry), "tool_result"):
365
+ tid = tr.get("tool_use_id")
366
+ if tid:
367
+ out = tr.get("content")
368
+ results_by_id[str(tid)] = (
369
+ out
370
+ if isinstance(out, str)
371
+ else json.dumps(out, ensure_ascii=False)
372
+ )
373
+ atts = _extract_attachments(out)
374
+ if atts:
375
+ attachments_by_tool_id[str(tid)] = atts
376
+ if entry_ts:
377
+ result_ts_by_id[str(tid)] = entry_ts
378
+
379
+ spans: list[dict] = []
380
+ models: list[str] = []
381
+ totals = {
382
+ "input_tokens": 0,
383
+ "output_tokens": 0,
384
+ "total_tokens": 0,
385
+ "cache_read_tokens": 0,
386
+ "cost_usd": 0.0,
387
+ "span_count": 0,
388
+ "tool_call_count": 0,
389
+ }
390
+
391
+ pending_user_text = ""
392
+ pending_user_attachments: list[dict] = []
393
+ # ts of the previous transcript entry; the LLM call started when the user
394
+ # prompt / tool result landed, finished when the assistant message appears.
395
+ prev_ts: str | None = None
396
+ for entry in entries:
397
+ role = _role(entry)
398
+ if role == "user" and not _is_tool_result(entry):
399
+ pending_user_text = _truncate(_text(_content(entry)))
400
+ # Images/files the user pasted into this turn ride along to the
401
+ # assistant span they prompted (accumulate across consecutive user
402
+ # messages until the next assistant generation consumes them).
403
+ pending_user_attachments.extend(_extract_attachments(_content(entry)))
404
+ prev_ts = _ts(entry) or prev_ts
405
+ continue
406
+ if role != "assistant":
407
+ prev_ts = _ts(entry) or prev_ts
408
+ continue
409
+
410
+ content = _content(entry)
411
+ usage = _usage(entry)
412
+ model = _msg(entry).get("model")
413
+ if model and model not in models:
414
+ models.append(model)
415
+ ts = _ts(entry) or _now_iso()
416
+ span_id = _msg(entry).get("id") or uuid.uuid4().hex
417
+ out_text = _truncate(_text(content))
418
+
419
+ in_tok = usage.get("input", 0)
420
+ out_tok = usage.get("output", 0)
421
+ cache_r = usage.get("cache_read", 0)
422
+ cache_w = usage.get("cache_write", 0)
423
+ llm_span = {
424
+ "span_id": str(span_id),
425
+ "parent_span_id": None,
426
+ "name": f"Claude generation ({model})" if model else "Claude generation",
427
+ "span_type": "llm",
428
+ "start_time": prev_ts or ts,
429
+ "end_time": ts,
430
+ "model": model,
431
+ "provider": "anthropic",
432
+ "input": pending_user_text or None,
433
+ "output": out_text or None,
434
+ "input_tokens": in_tok,
435
+ "output_tokens": out_tok,
436
+ "total_tokens": in_tok + out_tok,
437
+ "cache_read_tokens": cache_r,
438
+ "cache_write_tokens": cache_w,
439
+ "status": "ok",
440
+ "attributes": {"turn_role": "assistant"},
441
+ }
442
+ if pending_user_attachments:
443
+ llm_span["attachments"] = pending_user_attachments
444
+ spans.append(llm_span)
445
+ pending_user_text = "" # consumed by this generation
446
+ pending_user_attachments = [] # consumed by this generation
447
+
448
+ totals["input_tokens"] += in_tok
449
+ totals["output_tokens"] += out_tok
450
+ totals["cache_read_tokens"] += cache_r
451
+ totals["total_tokens"] += in_tok + out_tok
452
+
453
+ for tu in _blocks(content, "tool_use"):
454
+ tid = str(tu.get("id") or uuid.uuid4().hex)
455
+ tname = tu.get("name") or "unknown"
456
+ tu_input = tu.get("input")
457
+ input_str = (
458
+ tu_input
459
+ if isinstance(tu_input, str)
460
+ else json.dumps(tu_input, ensure_ascii=False)
461
+ )
462
+ tool_span = {
463
+ "span_id": tid,
464
+ "parent_span_id": str(span_id),
465
+ "name": f"Tool: {tname}",
466
+ "span_type": "tool",
467
+ "start_time": ts,
468
+ "end_time": result_ts_by_id.get(tid, ts),
469
+ "input": _truncate(input_str),
470
+ "output": _truncate(results_by_id.get(tid, "")) or None,
471
+ "status": "ok",
472
+ "attributes": {"tool_name": tname, "tool_id": tid},
473
+ }
474
+ tool_atts = attachments_by_tool_id.get(tid)
475
+ if tool_atts:
476
+ tool_span["attachments"] = tool_atts
477
+ spans.append(tool_span)
478
+ totals["tool_call_count"] += 1
479
+
480
+ prev_ts = ts
481
+
482
+ totals["span_count"] = len(spans)
483
+ return spans, totals, models, None
484
+
485
+
486
+ def user_ref() -> str:
487
+ """Resolve the engineer's real identity for attribution.
488
+
489
+ Priority: an explicit ``OCTARIN_USER`` override → the Claude Code account
490
+ email (``~/.claude.json`` ``oauthAccount.emailAddress`` — the signed-in user)
491
+ → the git ``user.email`` → the OS username. We attribute to a real person
492
+ (matching ``backfill.py`` and the per-user ingest key) rather than an opaque
493
+ per-machine hash, so the dashboard shows who actually did the work. When the
494
+ request carries a per-user key the server overrides this with the key owner
495
+ anyway; a real identity here is what ANONYMOUS (slug-only) sends rely on.
496
+ """
497
+ ref = (os.environ.get("OCTARIN_USER") or "").strip()
498
+ if ref:
499
+ return ref
500
+ try:
501
+ with open(Path.home() / ".claude.json", encoding="utf-8") as fh:
502
+ account = json.load(fh).get("oauthAccount") or {}
503
+ email = (account.get("emailAddress") or "").strip()
504
+ if email:
505
+ return email
506
+ except Exception:
507
+ pass
508
+ try:
509
+ out = subprocess.check_output(
510
+ ["git", "config", "user.email"],
511
+ cwd=os.environ.get("CLAUDE_PROJECT_DIR") or os.getcwd(),
512
+ stderr=subprocess.DEVNULL,
513
+ )
514
+ email = out.decode().strip()
515
+ if email:
516
+ return email
517
+ except Exception:
518
+ pass
519
+ try:
520
+ return getpass.getuser()
521
+ except Exception:
522
+ return "unknown"
523
+
524
+
525
+ def _notify_auth_required_once(project: str) -> None:
526
+ """Print the one-time ``login.sh`` hint when the server says auth_required.
527
+
528
+ Only fires when the project has flipped per-user auth on AND this machine
529
+ has no Bearer key yet — i.e. the precise moment the teammate needs to run
530
+ the bootstrap. The marker is per-project so different repos don't suppress
531
+ each other's hint.
532
+ """
533
+ try:
534
+ STATE_DIR.mkdir(parents=True, exist_ok=True)
535
+ marker = (
536
+ STATE_DIR / f"auth_hint.{hashlib.sha256(project.encode()).hexdigest()[:12]}"
537
+ )
538
+ if marker.exists():
539
+ return
540
+ marker.write_text("", encoding="utf-8")
541
+ except Exception:
542
+ pass
543
+ sys.stderr.write(
544
+ f"[octarin] project {project!r} now requires per-user auth. "
545
+ "Run once to authorize:\n"
546
+ "[octarin] curl -fsSL https://octarin.ai/hooks/login.sh | bash\n"
547
+ )
548
+
549
+
550
+ def post_event(event: dict) -> bool:
551
+ """POST the IngestEvent. Returns True on 2xx, False otherwise (fail-open).
552
+
553
+ Two auth modes:
554
+ * Bearer key (``OCTARIN_API_KEY`` set) — per-user, minted by ``login.sh``.
555
+ * Slug-only — no key, but ``OCTARIN_PROJECT`` is set. Hook adds
556
+ ``X-Octarin-Project: <slug>`` AND embeds ``project`` in the body so
557
+ the backend can match either way. Server enforces the project's
558
+ ``allow_anonymous_ingest`` policy and rate-limits per IP.
559
+
560
+ On a 401 with the ``auth_required`` server code (project flipped strict),
561
+ we print the one-time ``login.sh`` hint so the user knows what to do next.
562
+ """
563
+ url = os.environ.get("OCTARIN_INGEST_URL")
564
+ if not url:
565
+ base = (os.environ.get("OCTARIN_API_BASE") or "").rstrip("/")
566
+ if not base:
567
+ return False
568
+ url = f"{base}/v1/ingest"
569
+
570
+ api_key = os.environ.get("OCTARIN_API_KEY", "")
571
+ project = os.environ.get("OCTARIN_PROJECT", "").strip()
572
+
573
+ # Embed `project` in the body for slug-auth (server reads it from the body
574
+ # OR an X-Octarin-Project header). Always safe to include — server ignores
575
+ # it when a valid Bearer is present.
576
+ payload = dict(event)
577
+ if project and "project" not in payload:
578
+ payload["project"] = project
579
+
580
+ body = json.dumps(payload).encode("utf-8")
581
+ req = urllib.request.Request(url, data=body, method="POST")
582
+ req.add_header("Content-Type", "application/json")
583
+ if api_key:
584
+ req.add_header("Authorization", f"Bearer {api_key}")
585
+ elif project:
586
+ req.add_header("X-Octarin-Project", project)
587
+ try:
588
+ with urllib.request.urlopen(req, timeout=HTTP_TIMEOUT_S) as resp:
589
+ return HTTP_OK <= resp.status < HTTP_MULTIPLE_CHOICES
590
+ except urllib.error.HTTPError as exc:
591
+ # Strict-auth signal from the server: print the login.sh hint, once.
592
+ if exc.code == HTTP_UNAUTHORIZED and project and not api_key:
593
+ try:
594
+ envelope = json.loads(exc.read().decode("utf-8") or "{}")
595
+ except Exception:
596
+ envelope = {}
597
+ if envelope.get("error", {}).get("code") == "auth_required":
598
+ _notify_auth_required_once(project)
599
+ return False
600
+ except Exception:
601
+ return False
602
+
603
+
604
+ def load_state() -> dict:
605
+ try:
606
+ return (
607
+ json.loads(STATE_FILE.read_text(encoding="utf-8"))
608
+ if STATE_FILE.exists()
609
+ else {}
610
+ )
611
+ except Exception:
612
+ return {}
613
+
614
+
615
+ def save_state(state: dict) -> None:
616
+ try:
617
+ STATE_DIR.mkdir(parents=True, exist_ok=True)
618
+ tmp = STATE_FILE.with_suffix(".tmp")
619
+ tmp.write_text(json.dumps(state, sort_keys=True), encoding="utf-8")
620
+ tmp.replace(STATE_FILE)
621
+ except Exception:
622
+ pass
623
+
624
+
625
+ def build_event(payload: dict) -> dict | None:
626
+ """Assemble the canonical IngestEvent from a hook payload (or None to skip)."""
627
+ session_id, path, cwd = locate_transcript(payload)
628
+ if not session_id or path is None:
629
+ return None
630
+
631
+ state = load_state()
632
+ key = hashlib.sha256(f"{session_id}::{path}".encode()).hexdigest()
633
+ entries = read_new_entries(path, state, key)
634
+ save_state(state)
635
+ if not entries:
636
+ return None
637
+
638
+ spans, totals, models, _ = build_spans(entries)
639
+ if not spans:
640
+ return None
641
+
642
+ repo = Path(cwd).name if cwd else None
643
+ src_trace = f"{session_id}:{int(time.time())}"
644
+ trace_id = str(uuid.uuid5(_TRACE_NAMESPACE, f"{SOURCE}:{src_trace}"))
645
+ times = [s["start_time"] for s in spans]
646
+
647
+ return {
648
+ "trace_id": trace_id,
649
+ "source": SOURCE,
650
+ "session_id": session_id,
651
+ "user_ref": user_ref(),
652
+ "repo": repo,
653
+ "model": models[0] if models else None,
654
+ "spans": spans,
655
+ "start_time": min(times),
656
+ "end_time": max(times),
657
+ "total_tokens": totals["total_tokens"],
658
+ "input_tokens": totals["input_tokens"],
659
+ "output_tokens": totals["output_tokens"],
660
+ "cache_read_tokens": totals["cache_read_tokens"],
661
+ # extra (extra="allow"): handy for the backend rollup/audit
662
+ "totals": totals,
663
+ "models": models,
664
+ }
665
+
666
+
667
+ def main() -> int:
668
+ try:
669
+ payload = read_payload()
670
+ event = build_event(payload)
671
+ if event is None:
672
+ return 0
673
+ # post_event handles the auth path internally (Bearer if a per-user key
674
+ # is set, slug-only otherwise). It only prints the login.sh hint if the
675
+ # server actually refuses with auth_required — i.e. the project flipped
676
+ # per-user auth on. Until then, slug-only mode just works silently.
677
+ post_event(event)
678
+ except Exception:
679
+ # Absolutely never let the hook break the host tool.
680
+ return 0
681
+ return 0
682
+
683
+
684
+ if __name__ == "__main__":
685
+ sys.exit(main())