octarin-cli 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +202 -0
- package/assets/backfill.py +1113 -0
- package/assets/claude_code/hook.py +573 -0
- package/assets/codex/hook.mjs +487 -0
- package/assets/cursor/hook-handler.js +41 -0
- package/assets/cursor/lib/canonical.js +240 -0
- package/assets/cursor/lib/utils.js +138 -0
- package/assets/repo-template/dot-claude/octarin/hook.py +685 -0
- package/assets/repo-template/dot-claude/octarin/run.sh +41 -0
- package/assets/repo-template/dot-claude/settings.json +15 -0
- package/assets/repo-template/dot-codex/config.toml +6 -0
- package/assets/repo-template/dot-codex/hooks/hook.mjs +531 -0
- package/assets/repo-template/dot-codex/hooks/run.sh +38 -0
- package/assets/repo-template/dot-cursor/hooks/hook-handler.js +41 -0
- package/assets/repo-template/dot-cursor/hooks/lib/canonical.js +240 -0
- package/assets/repo-template/dot-cursor/hooks/lib/utils.js +196 -0
- package/assets/repo-template/dot-cursor/hooks/run.sh +41 -0
- package/assets/repo-template/dot-cursor/hooks.json +13 -0
- package/dist/args.js +85 -0
- package/dist/assets.js +28 -0
- package/dist/client.js +105 -0
- package/dist/envfile.js +94 -0
- package/dist/index.js +192 -0
- package/dist/init.js +314 -0
- package/dist/init_repo.js +348 -0
- package/dist/login.js +209 -0
- package/dist/output.js +56 -0
- package/package.json +37 -0
|
@@ -0,0 +1,685 @@
|
|
|
1
|
+
"""Claude Code -> Octarin capture hook (pure stdlib, fail-open).
|
|
2
|
+
|
|
3
|
+
Registered as a Claude Code ``Stop`` hook. On each turn-end Claude Code pipes a
|
|
4
|
+
small JSON payload on stdin (``session_id``, ``transcript_path``, ``cwd``, ...).
|
|
5
|
+
This hook:
|
|
6
|
+
|
|
7
|
+
1. reads that payload and locates the session transcript JSONL;
|
|
8
|
+
2. parses user/assistant turns, tool calls, token usage, and model;
|
|
9
|
+
3. builds a single canonical ``IngestEvent`` (full ``spans`` form) covering the
|
|
10
|
+
turns produced since the last run (tracked via a per-session offset file);
|
|
11
|
+
4. POSTs it to ``${OCTARIN_INGEST_URL:-$OCTARIN_API_BASE/v1/ingest}`` with
|
|
12
|
+
``Authorization: Bearer $OCTARIN_API_KEY``.
|
|
13
|
+
|
|
14
|
+
It is deliberately tiny and dependency-free (stdlib only). Every failure path
|
|
15
|
+
exits 0 so the host tool is never blocked, and the network call has a hard
|
|
16
|
+
timeout. The canonical shape is defined in ``backend/app/schema/canonical.py``.
|
|
17
|
+
|
|
18
|
+
Why this file disables a few ruff rules at the module level:
|
|
19
|
+
|
|
20
|
+
* ``BLE001`` (bare ``except Exception``) and ``S110``/``S112`` (try/except/
|
|
21
|
+
pass / continue) are EXPLICITLY the design — a capture hook that raises
|
|
22
|
+
or logs into stderr breaks the host tool's UX. We swallow everything and
|
|
23
|
+
exit 0.
|
|
24
|
+
* ``S310`` (unaudited URL scheme on ``urllib.request.urlopen``) — the URL
|
|
25
|
+
comes from our own ``OCTARIN_INGEST_URL`` / ``OCTARIN_API_BASE`` env, not
|
|
26
|
+
user input.
|
|
27
|
+
|
|
28
|
+
The hook is invoked by ``run.sh`` via ``exec python3 hook.py``, so no shebang
|
|
29
|
+
is needed — and dropping it sidesteps EXE001 (shebang on non-executable file)
|
|
30
|
+
in every consuming repo.
|
|
31
|
+
"""
|
|
32
|
+
# ruff: noqa: BLE001, S110, S112, S310, INP001
|
|
33
|
+
# (INP001: this is a standalone Claude Code hook script — `.claude/octarin/` is
|
|
34
|
+
# a config directory that happens to contain a .py; it's not a Python package
|
|
35
|
+
# and adding __init__.py would falsely advertise importability from elsewhere
|
|
36
|
+
# in the host repo.)
|
|
37
|
+
|
|
38
|
+
from __future__ import annotations
|
|
39
|
+
|
|
40
|
+
import base64
|
|
41
|
+
import getpass
|
|
42
|
+
import hashlib
|
|
43
|
+
import json
|
|
44
|
+
import os
|
|
45
|
+
import subprocess
|
|
46
|
+
import sys
|
|
47
|
+
import time
|
|
48
|
+
import urllib.error
|
|
49
|
+
import urllib.request
|
|
50
|
+
import uuid
|
|
51
|
+
from datetime import datetime, timezone
|
|
52
|
+
from pathlib import Path
|
|
53
|
+
|
|
54
|
+
SOURCE = "claude-code"
|
|
55
|
+
STATE_DIR = Path.home() / ".octarin"
|
|
56
|
+
STATE_FILE = STATE_DIR / "claude_code_state.json"
|
|
57
|
+
MAX_TEXT = 20_000 # cap stored input/output text so payloads stay small
|
|
58
|
+
HTTP_TIMEOUT_S = 5.0
|
|
59
|
+
# Named HTTP status codes — keeps the response-handling logic free of magic
|
|
60
|
+
# numbers and matches the ``PLR2004`` lint contract in strict ruff profiles.
|
|
61
|
+
HTTP_OK = 200
|
|
62
|
+
HTTP_MULTIPLE_CHOICES = 300
|
|
63
|
+
HTTP_UNAUTHORIZED = 401
|
|
64
|
+
# Cap per-attachment base64 payload we ship inline. Larger items are recorded
|
|
65
|
+
# metadata-only (no b64) so a giant paste never bloats the POST or the backend.
|
|
66
|
+
MAX_ATTACHMENT_BYTES = 5 * 1024 * 1024 # ~5MB of raw bytes
|
|
67
|
+
# Map common file extensions -> mime for file refs that lack one.
|
|
68
|
+
_EXT_MIME = {
|
|
69
|
+
".png": "image/png",
|
|
70
|
+
".jpg": "image/jpeg",
|
|
71
|
+
".jpeg": "image/jpeg",
|
|
72
|
+
".gif": "image/gif",
|
|
73
|
+
".webp": "image/webp",
|
|
74
|
+
".svg": "image/svg+xml",
|
|
75
|
+
".pdf": "application/pdf",
|
|
76
|
+
".txt": "text/plain",
|
|
77
|
+
".md": "text/markdown",
|
|
78
|
+
".json": "application/json",
|
|
79
|
+
".csv": "text/csv",
|
|
80
|
+
}
|
|
81
|
+
# Same UUID5 namespace as backend deterministic_trace_id so retries de-dupe.
|
|
82
|
+
_TRACE_NAMESPACE = uuid.UUID("6f8d2c1e-9a3b-4f5e-8c7d-1a2b3c4d5e6f")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _now_iso() -> str:
|
|
86
|
+
return datetime.now(timezone.utc).isoformat()
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _truncate(text: str) -> str:
|
|
90
|
+
if not text:
|
|
91
|
+
return ""
|
|
92
|
+
return text if len(text) <= MAX_TEXT else text[:MAX_TEXT]
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def read_payload() -> dict:
|
|
96
|
+
"""Read and parse the hook JSON from stdin; ``{}`` on any problem."""
|
|
97
|
+
try:
|
|
98
|
+
raw = sys.stdin.read()
|
|
99
|
+
if not raw.strip():
|
|
100
|
+
return {}
|
|
101
|
+
parsed = json.loads(raw)
|
|
102
|
+
return parsed if isinstance(parsed, dict) else {}
|
|
103
|
+
except Exception:
|
|
104
|
+
return {}
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def locate_transcript(payload: dict) -> tuple[str | None, Path | None, str | None]:
|
|
108
|
+
"""Pull ``(session_id, transcript_path, cwd)`` from the hook payload."""
|
|
109
|
+
session_id = (
|
|
110
|
+
payload.get("session_id")
|
|
111
|
+
or payload.get("sessionId")
|
|
112
|
+
or (payload.get("session") or {}).get("id")
|
|
113
|
+
)
|
|
114
|
+
raw_path = (
|
|
115
|
+
payload.get("transcript_path")
|
|
116
|
+
or payload.get("transcriptPath")
|
|
117
|
+
or (payload.get("transcript") or {}).get("path")
|
|
118
|
+
)
|
|
119
|
+
cwd = payload.get("cwd") or payload.get("workspace") or None
|
|
120
|
+
path: Path | None = None
|
|
121
|
+
if raw_path:
|
|
122
|
+
try:
|
|
123
|
+
path = Path(raw_path).expanduser()
|
|
124
|
+
except Exception:
|
|
125
|
+
path = None
|
|
126
|
+
return session_id, path, cwd
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ── transcript helpers (mirror Claude Code's JSONL shape) ──
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _msg(entry: dict) -> dict:
|
|
133
|
+
m = entry.get("message")
|
|
134
|
+
return m if isinstance(m, dict) else {}
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _role(entry: dict) -> str | None:
|
|
138
|
+
t = entry.get("type")
|
|
139
|
+
if t in ("user", "assistant"):
|
|
140
|
+
return t
|
|
141
|
+
r = _msg(entry).get("role")
|
|
142
|
+
return r if r in ("user", "assistant") else None
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _content(entry: dict):
|
|
146
|
+
m = _msg(entry)
|
|
147
|
+
return m.get("content") if "message" in entry else entry.get("content")
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _text(content) -> str:
|
|
151
|
+
if isinstance(content, str):
|
|
152
|
+
return content
|
|
153
|
+
if isinstance(content, list):
|
|
154
|
+
parts = []
|
|
155
|
+
for x in content:
|
|
156
|
+
if isinstance(x, dict) and x.get("type") == "text":
|
|
157
|
+
parts.append(x.get("text", ""))
|
|
158
|
+
elif isinstance(x, str):
|
|
159
|
+
parts.append(x)
|
|
160
|
+
return "\n".join(p for p in parts if p)
|
|
161
|
+
return ""
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _blocks(content, block_type: str) -> list[dict]:
|
|
165
|
+
if not isinstance(content, list):
|
|
166
|
+
return []
|
|
167
|
+
return [x for x in content if isinstance(x, dict) and x.get("type") == block_type]
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _attachment_from_image_block(block: dict) -> dict | None:
|
|
171
|
+
"""Build an attachment dict from a Claude ``image`` content block.
|
|
172
|
+
|
|
173
|
+
Claude carries pasted images as ``{"type":"image","source":{"type":"base64",
|
|
174
|
+
"media_type":"image/png","data":"..."}}``. We capture the base64 bytes inline
|
|
175
|
+
when within the size cap; larger images are recorded metadata-only (no b64).
|
|
176
|
+
Returns ``None`` if the block carries no usable image data.
|
|
177
|
+
"""
|
|
178
|
+
src = block.get("source")
|
|
179
|
+
if not isinstance(src, dict):
|
|
180
|
+
return None
|
|
181
|
+
mime = src.get("media_type") or "image/png"
|
|
182
|
+
name = block.get("name") or block.get("filename") or "pasted-image"
|
|
183
|
+
if src.get("type") == "base64":
|
|
184
|
+
data = src.get("data")
|
|
185
|
+
if not isinstance(data, str) or not data:
|
|
186
|
+
return None
|
|
187
|
+
# Authoritative size: decode once (cheap vs. the network cost we save).
|
|
188
|
+
try:
|
|
189
|
+
raw = base64.b64decode(data, validate=False)
|
|
190
|
+
except Exception:
|
|
191
|
+
return None
|
|
192
|
+
nbytes = len(raw)
|
|
193
|
+
att = {"kind": "image", "mime": mime, "name": str(name), "bytes": nbytes}
|
|
194
|
+
att["b64"] = data if nbytes <= MAX_ATTACHMENT_BYTES else None
|
|
195
|
+
return att
|
|
196
|
+
# URL-backed image (rare in transcripts): record metadata only.
|
|
197
|
+
if src.get("type") == "url" and src.get("url"):
|
|
198
|
+
return {
|
|
199
|
+
"kind": "image",
|
|
200
|
+
"mime": mime,
|
|
201
|
+
"name": str(name),
|
|
202
|
+
"bytes": 0,
|
|
203
|
+
"b64": None,
|
|
204
|
+
}
|
|
205
|
+
return None
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _mime_for_name(name: str) -> str:
|
|
209
|
+
"""Best-effort mime from a filename extension; generic when unknown."""
|
|
210
|
+
lower = name.lower()
|
|
211
|
+
for ext, mime in _EXT_MIME.items():
|
|
212
|
+
if lower.endswith(ext):
|
|
213
|
+
return mime
|
|
214
|
+
return "application/octet-stream"
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _attachment_from_file_block(block: dict) -> dict | None:
|
|
218
|
+
"""Build a metadata attachment from a ``document``/file-ref content block.
|
|
219
|
+
|
|
220
|
+
Claude can carry document blocks (``{"type":"document","source":{...}}``) and
|
|
221
|
+
tool results sometimes reference files. We capture base64 ``document`` bytes
|
|
222
|
+
when present (within the cap); otherwise record the file name as metadata so
|
|
223
|
+
the trace at least shows that a file was attached.
|
|
224
|
+
"""
|
|
225
|
+
src = block.get("source")
|
|
226
|
+
name = (
|
|
227
|
+
block.get("name")
|
|
228
|
+
or block.get("title")
|
|
229
|
+
or block.get("filename")
|
|
230
|
+
or "attached-file"
|
|
231
|
+
)
|
|
232
|
+
name = str(name)
|
|
233
|
+
if (
|
|
234
|
+
isinstance(src, dict)
|
|
235
|
+
and src.get("type") == "base64"
|
|
236
|
+
and isinstance(src.get("data"), str)
|
|
237
|
+
):
|
|
238
|
+
data = src["data"]
|
|
239
|
+
mime = src.get("media_type") or _mime_for_name(name)
|
|
240
|
+
try:
|
|
241
|
+
raw = base64.b64decode(data, validate=False)
|
|
242
|
+
except Exception:
|
|
243
|
+
return None
|
|
244
|
+
nbytes = len(raw)
|
|
245
|
+
return {
|
|
246
|
+
"kind": "file",
|
|
247
|
+
"mime": mime,
|
|
248
|
+
"name": name,
|
|
249
|
+
"bytes": nbytes,
|
|
250
|
+
"b64": data if nbytes <= MAX_ATTACHMENT_BYTES else None,
|
|
251
|
+
}
|
|
252
|
+
# Bare reference with a name/path but no inline bytes: metadata only.
|
|
253
|
+
if block.get("name") or block.get("title") or block.get("filename"):
|
|
254
|
+
return {
|
|
255
|
+
"kind": "file",
|
|
256
|
+
"mime": _mime_for_name(name),
|
|
257
|
+
"name": name,
|
|
258
|
+
"bytes": 0,
|
|
259
|
+
"b64": None,
|
|
260
|
+
}
|
|
261
|
+
return None
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _extract_attachments(content) -> list[dict]:
|
|
265
|
+
"""Pull image/file attachments from a message/tool-result content list.
|
|
266
|
+
|
|
267
|
+
Walks ``image`` and ``document`` content blocks (Claude's pasted-binary
|
|
268
|
+
shapes). Pure + fail-open: any malformed block is skipped, never raised, so
|
|
269
|
+
attachment capture can NEVER break the hook's core span extraction.
|
|
270
|
+
"""
|
|
271
|
+
out: list[dict] = []
|
|
272
|
+
if not isinstance(content, list):
|
|
273
|
+
return out
|
|
274
|
+
for block in content:
|
|
275
|
+
if not isinstance(block, dict):
|
|
276
|
+
continue
|
|
277
|
+
try:
|
|
278
|
+
btype = block.get("type")
|
|
279
|
+
if btype == "image":
|
|
280
|
+
att = _attachment_from_image_block(block)
|
|
281
|
+
elif btype in ("document", "file"):
|
|
282
|
+
att = _attachment_from_file_block(block)
|
|
283
|
+
else:
|
|
284
|
+
att = None
|
|
285
|
+
if att:
|
|
286
|
+
out.append(att)
|
|
287
|
+
except Exception:
|
|
288
|
+
continue
|
|
289
|
+
return out
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def _is_tool_result(entry: dict) -> bool:
|
|
293
|
+
return _role(entry) == "user" and bool(_blocks(_content(entry), "tool_result"))
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def _usage(entry: dict) -> dict:
|
|
297
|
+
u = _msg(entry).get("usage")
|
|
298
|
+
if not isinstance(u, dict):
|
|
299
|
+
return {}
|
|
300
|
+
return {
|
|
301
|
+
"input": int(u.get("input_tokens") or 0),
|
|
302
|
+
"output": int(u.get("output_tokens") or 0),
|
|
303
|
+
"cache_read": int(u.get("cache_read_input_tokens") or 0),
|
|
304
|
+
"cache_write": int(u.get("cache_creation_input_tokens") or 0),
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def _ts(entry: dict) -> str | None:
|
|
309
|
+
v = entry.get("timestamp")
|
|
310
|
+
return v if isinstance(v, str) and v else None
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def read_new_entries(path: Path, state: dict, key: str) -> list[dict]:
|
|
314
|
+
"""Return transcript entries appended since the last processed byte offset."""
|
|
315
|
+
if not path.exists():
|
|
316
|
+
return []
|
|
317
|
+
sess = state.get(key) or {}
|
|
318
|
+
offset = int(sess.get("offset", 0))
|
|
319
|
+
try:
|
|
320
|
+
size = path.stat().st_size
|
|
321
|
+
if size < offset: # transcript rotated/truncated -> reprocess from start
|
|
322
|
+
offset = 0
|
|
323
|
+
with path.open("rb") as fh:
|
|
324
|
+
fh.seek(offset)
|
|
325
|
+
chunk = fh.read()
|
|
326
|
+
new_offset = fh.tell()
|
|
327
|
+
except Exception:
|
|
328
|
+
return []
|
|
329
|
+
sess["offset"] = new_offset
|
|
330
|
+
state[key] = sess
|
|
331
|
+
out: list[dict] = []
|
|
332
|
+
for raw_line in chunk.decode("utf-8", errors="replace").splitlines():
|
|
333
|
+
line = raw_line.strip()
|
|
334
|
+
if not line:
|
|
335
|
+
continue
|
|
336
|
+
try:
|
|
337
|
+
obj = json.loads(line)
|
|
338
|
+
if isinstance(obj, dict):
|
|
339
|
+
out.append(obj)
|
|
340
|
+
except Exception:
|
|
341
|
+
continue
|
|
342
|
+
return out
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def build_spans( # noqa: PLR0915 - top-down transcript parser; splitting it
|
|
346
|
+
entries: list[dict], # would scatter the local span-bookkeeping state.
|
|
347
|
+
) -> tuple[list[dict], dict, list[str], str | None]:
|
|
348
|
+
"""Turn transcript entries into canonical spans + rolled-up totals.
|
|
349
|
+
|
|
350
|
+
Each assistant message becomes one ``llm`` span (model + token usage); each
|
|
351
|
+
``tool_use`` inside it becomes a child ``tool`` span. Returns
|
|
352
|
+
``(spans, totals, models, repo)``.
|
|
353
|
+
"""
|
|
354
|
+
# Map tool_use_id -> tool_result text for output enrichment, and
|
|
355
|
+
# tool_use_id -> attachments for any images a tool returned.
|
|
356
|
+
results_by_id: dict[str, str] = {}
|
|
357
|
+
attachments_by_tool_id: dict[str, list[dict]] = {}
|
|
358
|
+
# tool_use_id -> ts of the message that returned the result. Gives tool spans
|
|
359
|
+
# a real (assistant_ts -> result_ts) duration instead of zero.
|
|
360
|
+
result_ts_by_id: dict[str, str] = {}
|
|
361
|
+
for entry in entries:
|
|
362
|
+
if _is_tool_result(entry):
|
|
363
|
+
entry_ts = _ts(entry)
|
|
364
|
+
for tr in _blocks(_content(entry), "tool_result"):
|
|
365
|
+
tid = tr.get("tool_use_id")
|
|
366
|
+
if tid:
|
|
367
|
+
out = tr.get("content")
|
|
368
|
+
results_by_id[str(tid)] = (
|
|
369
|
+
out
|
|
370
|
+
if isinstance(out, str)
|
|
371
|
+
else json.dumps(out, ensure_ascii=False)
|
|
372
|
+
)
|
|
373
|
+
atts = _extract_attachments(out)
|
|
374
|
+
if atts:
|
|
375
|
+
attachments_by_tool_id[str(tid)] = atts
|
|
376
|
+
if entry_ts:
|
|
377
|
+
result_ts_by_id[str(tid)] = entry_ts
|
|
378
|
+
|
|
379
|
+
spans: list[dict] = []
|
|
380
|
+
models: list[str] = []
|
|
381
|
+
totals = {
|
|
382
|
+
"input_tokens": 0,
|
|
383
|
+
"output_tokens": 0,
|
|
384
|
+
"total_tokens": 0,
|
|
385
|
+
"cache_read_tokens": 0,
|
|
386
|
+
"cost_usd": 0.0,
|
|
387
|
+
"span_count": 0,
|
|
388
|
+
"tool_call_count": 0,
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
pending_user_text = ""
|
|
392
|
+
pending_user_attachments: list[dict] = []
|
|
393
|
+
# ts of the previous transcript entry; the LLM call started when the user
|
|
394
|
+
# prompt / tool result landed, finished when the assistant message appears.
|
|
395
|
+
prev_ts: str | None = None
|
|
396
|
+
for entry in entries:
|
|
397
|
+
role = _role(entry)
|
|
398
|
+
if role == "user" and not _is_tool_result(entry):
|
|
399
|
+
pending_user_text = _truncate(_text(_content(entry)))
|
|
400
|
+
# Images/files the user pasted into this turn ride along to the
|
|
401
|
+
# assistant span they prompted (accumulate across consecutive user
|
|
402
|
+
# messages until the next assistant generation consumes them).
|
|
403
|
+
pending_user_attachments.extend(_extract_attachments(_content(entry)))
|
|
404
|
+
prev_ts = _ts(entry) or prev_ts
|
|
405
|
+
continue
|
|
406
|
+
if role != "assistant":
|
|
407
|
+
prev_ts = _ts(entry) or prev_ts
|
|
408
|
+
continue
|
|
409
|
+
|
|
410
|
+
content = _content(entry)
|
|
411
|
+
usage = _usage(entry)
|
|
412
|
+
model = _msg(entry).get("model")
|
|
413
|
+
if model and model not in models:
|
|
414
|
+
models.append(model)
|
|
415
|
+
ts = _ts(entry) or _now_iso()
|
|
416
|
+
span_id = _msg(entry).get("id") or uuid.uuid4().hex
|
|
417
|
+
out_text = _truncate(_text(content))
|
|
418
|
+
|
|
419
|
+
in_tok = usage.get("input", 0)
|
|
420
|
+
out_tok = usage.get("output", 0)
|
|
421
|
+
cache_r = usage.get("cache_read", 0)
|
|
422
|
+
cache_w = usage.get("cache_write", 0)
|
|
423
|
+
llm_span = {
|
|
424
|
+
"span_id": str(span_id),
|
|
425
|
+
"parent_span_id": None,
|
|
426
|
+
"name": f"Claude generation ({model})" if model else "Claude generation",
|
|
427
|
+
"span_type": "llm",
|
|
428
|
+
"start_time": prev_ts or ts,
|
|
429
|
+
"end_time": ts,
|
|
430
|
+
"model": model,
|
|
431
|
+
"provider": "anthropic",
|
|
432
|
+
"input": pending_user_text or None,
|
|
433
|
+
"output": out_text or None,
|
|
434
|
+
"input_tokens": in_tok,
|
|
435
|
+
"output_tokens": out_tok,
|
|
436
|
+
"total_tokens": in_tok + out_tok,
|
|
437
|
+
"cache_read_tokens": cache_r,
|
|
438
|
+
"cache_write_tokens": cache_w,
|
|
439
|
+
"status": "ok",
|
|
440
|
+
"attributes": {"turn_role": "assistant"},
|
|
441
|
+
}
|
|
442
|
+
if pending_user_attachments:
|
|
443
|
+
llm_span["attachments"] = pending_user_attachments
|
|
444
|
+
spans.append(llm_span)
|
|
445
|
+
pending_user_text = "" # consumed by this generation
|
|
446
|
+
pending_user_attachments = [] # consumed by this generation
|
|
447
|
+
|
|
448
|
+
totals["input_tokens"] += in_tok
|
|
449
|
+
totals["output_tokens"] += out_tok
|
|
450
|
+
totals["cache_read_tokens"] += cache_r
|
|
451
|
+
totals["total_tokens"] += in_tok + out_tok
|
|
452
|
+
|
|
453
|
+
for tu in _blocks(content, "tool_use"):
|
|
454
|
+
tid = str(tu.get("id") or uuid.uuid4().hex)
|
|
455
|
+
tname = tu.get("name") or "unknown"
|
|
456
|
+
tu_input = tu.get("input")
|
|
457
|
+
input_str = (
|
|
458
|
+
tu_input
|
|
459
|
+
if isinstance(tu_input, str)
|
|
460
|
+
else json.dumps(tu_input, ensure_ascii=False)
|
|
461
|
+
)
|
|
462
|
+
tool_span = {
|
|
463
|
+
"span_id": tid,
|
|
464
|
+
"parent_span_id": str(span_id),
|
|
465
|
+
"name": f"Tool: {tname}",
|
|
466
|
+
"span_type": "tool",
|
|
467
|
+
"start_time": ts,
|
|
468
|
+
"end_time": result_ts_by_id.get(tid, ts),
|
|
469
|
+
"input": _truncate(input_str),
|
|
470
|
+
"output": _truncate(results_by_id.get(tid, "")) or None,
|
|
471
|
+
"status": "ok",
|
|
472
|
+
"attributes": {"tool_name": tname, "tool_id": tid},
|
|
473
|
+
}
|
|
474
|
+
tool_atts = attachments_by_tool_id.get(tid)
|
|
475
|
+
if tool_atts:
|
|
476
|
+
tool_span["attachments"] = tool_atts
|
|
477
|
+
spans.append(tool_span)
|
|
478
|
+
totals["tool_call_count"] += 1
|
|
479
|
+
|
|
480
|
+
prev_ts = ts
|
|
481
|
+
|
|
482
|
+
totals["span_count"] = len(spans)
|
|
483
|
+
return spans, totals, models, None
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def user_ref() -> str:
|
|
487
|
+
"""Resolve the engineer's real identity for attribution.
|
|
488
|
+
|
|
489
|
+
Priority: an explicit ``OCTARIN_USER`` override → the Claude Code account
|
|
490
|
+
email (``~/.claude.json`` ``oauthAccount.emailAddress`` — the signed-in user)
|
|
491
|
+
→ the git ``user.email`` → the OS username. We attribute to a real person
|
|
492
|
+
(matching ``backfill.py`` and the per-user ingest key) rather than an opaque
|
|
493
|
+
per-machine hash, so the dashboard shows who actually did the work. When the
|
|
494
|
+
request carries a per-user key the server overrides this with the key owner
|
|
495
|
+
anyway; a real identity here is what ANONYMOUS (slug-only) sends rely on.
|
|
496
|
+
"""
|
|
497
|
+
ref = (os.environ.get("OCTARIN_USER") or "").strip()
|
|
498
|
+
if ref:
|
|
499
|
+
return ref
|
|
500
|
+
try:
|
|
501
|
+
with open(Path.home() / ".claude.json", encoding="utf-8") as fh:
|
|
502
|
+
account = json.load(fh).get("oauthAccount") or {}
|
|
503
|
+
email = (account.get("emailAddress") or "").strip()
|
|
504
|
+
if email:
|
|
505
|
+
return email
|
|
506
|
+
except Exception:
|
|
507
|
+
pass
|
|
508
|
+
try:
|
|
509
|
+
out = subprocess.check_output(
|
|
510
|
+
["git", "config", "user.email"],
|
|
511
|
+
cwd=os.environ.get("CLAUDE_PROJECT_DIR") or os.getcwd(),
|
|
512
|
+
stderr=subprocess.DEVNULL,
|
|
513
|
+
)
|
|
514
|
+
email = out.decode().strip()
|
|
515
|
+
if email:
|
|
516
|
+
return email
|
|
517
|
+
except Exception:
|
|
518
|
+
pass
|
|
519
|
+
try:
|
|
520
|
+
return getpass.getuser()
|
|
521
|
+
except Exception:
|
|
522
|
+
return "unknown"
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
def _notify_auth_required_once(project: str) -> None:
|
|
526
|
+
"""Print the one-time ``login.sh`` hint when the server says auth_required.
|
|
527
|
+
|
|
528
|
+
Only fires when the project has flipped per-user auth on AND this machine
|
|
529
|
+
has no Bearer key yet — i.e. the precise moment the teammate needs to run
|
|
530
|
+
the bootstrap. The marker is per-project so different repos don't suppress
|
|
531
|
+
each other's hint.
|
|
532
|
+
"""
|
|
533
|
+
try:
|
|
534
|
+
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
|
535
|
+
marker = (
|
|
536
|
+
STATE_DIR / f"auth_hint.{hashlib.sha256(project.encode()).hexdigest()[:12]}"
|
|
537
|
+
)
|
|
538
|
+
if marker.exists():
|
|
539
|
+
return
|
|
540
|
+
marker.write_text("", encoding="utf-8")
|
|
541
|
+
except Exception:
|
|
542
|
+
pass
|
|
543
|
+
sys.stderr.write(
|
|
544
|
+
f"[octarin] project {project!r} now requires per-user auth. "
|
|
545
|
+
"Run once to authorize:\n"
|
|
546
|
+
"[octarin] curl -fsSL https://octarin.ai/hooks/login.sh | bash\n"
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
def post_event(event: dict) -> bool:
|
|
551
|
+
"""POST the IngestEvent. Returns True on 2xx, False otherwise (fail-open).
|
|
552
|
+
|
|
553
|
+
Two auth modes:
|
|
554
|
+
* Bearer key (``OCTARIN_API_KEY`` set) — per-user, minted by ``login.sh``.
|
|
555
|
+
* Slug-only — no key, but ``OCTARIN_PROJECT`` is set. Hook adds
|
|
556
|
+
``X-Octarin-Project: <slug>`` AND embeds ``project`` in the body so
|
|
557
|
+
the backend can match either way. Server enforces the project's
|
|
558
|
+
``allow_anonymous_ingest`` policy and rate-limits per IP.
|
|
559
|
+
|
|
560
|
+
On a 401 with the ``auth_required`` server code (project flipped strict),
|
|
561
|
+
we print the one-time ``login.sh`` hint so the user knows what to do next.
|
|
562
|
+
"""
|
|
563
|
+
url = os.environ.get("OCTARIN_INGEST_URL")
|
|
564
|
+
if not url:
|
|
565
|
+
base = (os.environ.get("OCTARIN_API_BASE") or "").rstrip("/")
|
|
566
|
+
if not base:
|
|
567
|
+
return False
|
|
568
|
+
url = f"{base}/v1/ingest"
|
|
569
|
+
|
|
570
|
+
api_key = os.environ.get("OCTARIN_API_KEY", "")
|
|
571
|
+
project = os.environ.get("OCTARIN_PROJECT", "").strip()
|
|
572
|
+
|
|
573
|
+
# Embed `project` in the body for slug-auth (server reads it from the body
|
|
574
|
+
# OR an X-Octarin-Project header). Always safe to include — server ignores
|
|
575
|
+
# it when a valid Bearer is present.
|
|
576
|
+
payload = dict(event)
|
|
577
|
+
if project and "project" not in payload:
|
|
578
|
+
payload["project"] = project
|
|
579
|
+
|
|
580
|
+
body = json.dumps(payload).encode("utf-8")
|
|
581
|
+
req = urllib.request.Request(url, data=body, method="POST")
|
|
582
|
+
req.add_header("Content-Type", "application/json")
|
|
583
|
+
if api_key:
|
|
584
|
+
req.add_header("Authorization", f"Bearer {api_key}")
|
|
585
|
+
elif project:
|
|
586
|
+
req.add_header("X-Octarin-Project", project)
|
|
587
|
+
try:
|
|
588
|
+
with urllib.request.urlopen(req, timeout=HTTP_TIMEOUT_S) as resp:
|
|
589
|
+
return HTTP_OK <= resp.status < HTTP_MULTIPLE_CHOICES
|
|
590
|
+
except urllib.error.HTTPError as exc:
|
|
591
|
+
# Strict-auth signal from the server: print the login.sh hint, once.
|
|
592
|
+
if exc.code == HTTP_UNAUTHORIZED and project and not api_key:
|
|
593
|
+
try:
|
|
594
|
+
envelope = json.loads(exc.read().decode("utf-8") or "{}")
|
|
595
|
+
except Exception:
|
|
596
|
+
envelope = {}
|
|
597
|
+
if envelope.get("error", {}).get("code") == "auth_required":
|
|
598
|
+
_notify_auth_required_once(project)
|
|
599
|
+
return False
|
|
600
|
+
except Exception:
|
|
601
|
+
return False
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
def load_state() -> dict:
|
|
605
|
+
try:
|
|
606
|
+
return (
|
|
607
|
+
json.loads(STATE_FILE.read_text(encoding="utf-8"))
|
|
608
|
+
if STATE_FILE.exists()
|
|
609
|
+
else {}
|
|
610
|
+
)
|
|
611
|
+
except Exception:
|
|
612
|
+
return {}
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
def save_state(state: dict) -> None:
|
|
616
|
+
try:
|
|
617
|
+
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
|
618
|
+
tmp = STATE_FILE.with_suffix(".tmp")
|
|
619
|
+
tmp.write_text(json.dumps(state, sort_keys=True), encoding="utf-8")
|
|
620
|
+
tmp.replace(STATE_FILE)
|
|
621
|
+
except Exception:
|
|
622
|
+
pass
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
def build_event(payload: dict) -> dict | None:
|
|
626
|
+
"""Assemble the canonical IngestEvent from a hook payload (or None to skip)."""
|
|
627
|
+
session_id, path, cwd = locate_transcript(payload)
|
|
628
|
+
if not session_id or path is None:
|
|
629
|
+
return None
|
|
630
|
+
|
|
631
|
+
state = load_state()
|
|
632
|
+
key = hashlib.sha256(f"{session_id}::{path}".encode()).hexdigest()
|
|
633
|
+
entries = read_new_entries(path, state, key)
|
|
634
|
+
save_state(state)
|
|
635
|
+
if not entries:
|
|
636
|
+
return None
|
|
637
|
+
|
|
638
|
+
spans, totals, models, _ = build_spans(entries)
|
|
639
|
+
if not spans:
|
|
640
|
+
return None
|
|
641
|
+
|
|
642
|
+
repo = Path(cwd).name if cwd else None
|
|
643
|
+
src_trace = f"{session_id}:{int(time.time())}"
|
|
644
|
+
trace_id = str(uuid.uuid5(_TRACE_NAMESPACE, f"{SOURCE}:{src_trace}"))
|
|
645
|
+
times = [s["start_time"] for s in spans]
|
|
646
|
+
|
|
647
|
+
return {
|
|
648
|
+
"trace_id": trace_id,
|
|
649
|
+
"source": SOURCE,
|
|
650
|
+
"session_id": session_id,
|
|
651
|
+
"user_ref": user_ref(),
|
|
652
|
+
"repo": repo,
|
|
653
|
+
"model": models[0] if models else None,
|
|
654
|
+
"spans": spans,
|
|
655
|
+
"start_time": min(times),
|
|
656
|
+
"end_time": max(times),
|
|
657
|
+
"total_tokens": totals["total_tokens"],
|
|
658
|
+
"input_tokens": totals["input_tokens"],
|
|
659
|
+
"output_tokens": totals["output_tokens"],
|
|
660
|
+
"cache_read_tokens": totals["cache_read_tokens"],
|
|
661
|
+
# extra (extra="allow"): handy for the backend rollup/audit
|
|
662
|
+
"totals": totals,
|
|
663
|
+
"models": models,
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
def main() -> int:
|
|
668
|
+
try:
|
|
669
|
+
payload = read_payload()
|
|
670
|
+
event = build_event(payload)
|
|
671
|
+
if event is None:
|
|
672
|
+
return 0
|
|
673
|
+
# post_event handles the auth path internally (Bearer if a per-user key
|
|
674
|
+
# is set, slug-only otherwise). It only prints the login.sh hint if the
|
|
675
|
+
# server actually refuses with auth_required — i.e. the project flipped
|
|
676
|
+
# per-user auth on. Until then, slug-only mode just works silently.
|
|
677
|
+
post_event(event)
|
|
678
|
+
except Exception:
|
|
679
|
+
# Absolutely never let the hook break the host tool.
|
|
680
|
+
return 0
|
|
681
|
+
return 0
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
if __name__ == "__main__":
|
|
685
|
+
sys.exit(main())
|