npm - octarin-cli - Versions diffs - 0.2.0 - Mend

octarin-cli 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/README.md +202 -0
package/assets/backfill.py +1113 -0
package/assets/claude_code/hook.py +573 -0
package/assets/codex/hook.mjs +487 -0
package/assets/cursor/hook-handler.js +41 -0
package/assets/cursor/lib/canonical.js +240 -0
package/assets/cursor/lib/utils.js +138 -0
package/assets/repo-template/dot-claude/octarin/hook.py +685 -0
package/assets/repo-template/dot-claude/octarin/run.sh +41 -0
package/assets/repo-template/dot-claude/settings.json +15 -0
package/assets/repo-template/dot-codex/config.toml +6 -0
package/assets/repo-template/dot-codex/hooks/hook.mjs +531 -0
package/assets/repo-template/dot-codex/hooks/run.sh +38 -0
package/assets/repo-template/dot-cursor/hooks/hook-handler.js +41 -0
package/assets/repo-template/dot-cursor/hooks/lib/canonical.js +240 -0
package/assets/repo-template/dot-cursor/hooks/lib/utils.js +196 -0
package/assets/repo-template/dot-cursor/hooks/run.sh +41 -0
package/assets/repo-template/dot-cursor/hooks.json +13 -0
package/dist/args.js +85 -0
package/dist/assets.js +28 -0
package/dist/client.js +105 -0
package/dist/envfile.js +94 -0
package/dist/index.js +192 -0
package/dist/init.js +314 -0
package/dist/init_repo.js +348 -0
package/dist/login.js +209 -0
package/dist/output.js +56 -0
package/package.json +37 -0

package/assets/claude_code/hook.py ADDED Viewed

@@ -0,0 +1,573 @@
+#!/usr/bin/env python3
+"""Claude Code -> Octarin capture hook (pure stdlib, fail-open).
+Registered as a Claude Code ``Stop`` hook. On each turn-end Claude Code pipes a
+small JSON payload on stdin (``session_id``, ``transcript_path``, ``cwd``, ...).
+This hook:
+  1. reads that payload and locates the session transcript JSONL;
+  2. parses user/assistant turns, tool calls, token usage, and model;
+  3. builds a single canonical ``IngestEvent`` (full ``spans`` form) covering the
+     turns produced since the last run (tracked via a per-session offset file);
+  4. POSTs it to ``${OCTARIN_INGEST_URL:-$OCTARIN_API_BASE/v1/ingest}`` with
+     ``Authorization: Bearer $OCTARIN_API_KEY``.
+It is deliberately tiny and dependency-free (stdlib only). Every failure path
+exits 0 so the host tool is never blocked, and the network call has a hard
+timeout. The canonical shape is defined in ``backend/app/schema/canonical.py``.
+"""
+from __future__ import annotations
+import base64
+import getpass
+import hashlib
+import json
+import os
+import subprocess
+import sys
+import time
+import urllib.request
+import uuid
+from datetime import datetime, timezone
+from pathlib import Path
+SOURCE = "claude-code"
+STATE_DIR = Path.home() / ".octarin"
+STATE_FILE = STATE_DIR / "claude_code_state.json"
+MAX_TEXT = 20_000  # cap stored input/output text so payloads stay small
+HTTP_TIMEOUT_S = 5.0
+# Cap per-attachment base64 payload we ship inline. Larger items are recorded
+# metadata-only (no b64) so a giant paste never bloats the POST or the backend.
+MAX_ATTACHMENT_BYTES = 5 * 1024 * 1024  # ~5MB of raw bytes
+# Map common file extensions -> mime for file refs that lack one.
+_EXT_MIME = {
+    ".png": "image/png",
+    ".jpg": "image/jpeg",
+    ".jpeg": "image/jpeg",
+    ".gif": "image/gif",
+    ".webp": "image/webp",
+    ".svg": "image/svg+xml",
+    ".pdf": "application/pdf",
+    ".txt": "text/plain",
+    ".md": "text/markdown",
+    ".json": "application/json",
+    ".csv": "text/csv",
+}
+# Same UUID5 namespace as backend deterministic_trace_id so retries de-dupe.
+_TRACE_NAMESPACE = uuid.UUID("6f8d2c1e-9a3b-4f5e-8c7d-1a2b3c4d5e6f")
+def _now_iso() -> str:
+    return datetime.now(timezone.utc).isoformat()
+def _truncate(text: str) -> str:
+    if not text:
+        return ""
+    return text if len(text) <= MAX_TEXT else text[:MAX_TEXT]
+def read_payload() -> dict:
+    """Read and parse the hook JSON from stdin; ``{}`` on any problem."""
+    try:
+        raw = sys.stdin.read()
+        if not raw.strip():
+            return {}
+        parsed = json.loads(raw)
+        return parsed if isinstance(parsed, dict) else {}
+    except Exception:
+        return {}
+def locate_transcript(payload: dict) -> tuple[str | None, Path | None, str | None]:
+    """Pull ``(session_id, transcript_path, cwd)`` from the hook payload."""
+    session_id = (
+        payload.get("session_id")
+        or payload.get("sessionId")
+        or (payload.get("session") or {}).get("id")
+    )
+    raw_path = (
+        payload.get("transcript_path")
+        or payload.get("transcriptPath")
+        or (payload.get("transcript") or {}).get("path")
+    )
+    cwd = payload.get("cwd") or payload.get("workspace") or None
+    path: Path | None = None
+    if raw_path:
+        try:
+            path = Path(raw_path).expanduser()
+        except Exception:
+            path = None
+    return session_id, path, cwd
+# ── transcript helpers (mirror Claude Code's JSONL shape) ──
+def _msg(entry: dict) -> dict:
+    m = entry.get("message")
+    return m if isinstance(m, dict) else {}
+def _role(entry: dict) -> str | None:
+    t = entry.get("type")
+    if t in ("user", "assistant"):
+        return t
+    r = _msg(entry).get("role")
+    return r if r in ("user", "assistant") else None
+def _content(entry: dict):
+    m = _msg(entry)
+    return m.get("content") if "message" in entry else entry.get("content")
+def _text(content) -> str:
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts = []
+        for x in content:
+            if isinstance(x, dict) and x.get("type") == "text":
+                parts.append(x.get("text", ""))
+            elif isinstance(x, str):
+                parts.append(x)
+        return "\n".join(p for p in parts if p)
+    return ""
+def _blocks(content, block_type: str) -> list[dict]:
+    if not isinstance(content, list):
+        return []
+    return [x for x in content if isinstance(x, dict) and x.get("type") == block_type]
+def _attachment_from_image_block(block: dict) -> dict | None:
+    """Build an attachment dict from a Claude ``image`` content block.
+    Claude carries pasted images as ``{"type":"image","source":{"type":"base64",
+    "media_type":"image/png","data":"..."}}``. We capture the base64 bytes inline
+    when within the size cap; larger images are recorded metadata-only (no b64).
+    Returns ``None`` if the block carries no usable image data.
+    """
+    src = block.get("source")
+    if not isinstance(src, dict):
+        return None
+    mime = src.get("media_type") or "image/png"
+    name = block.get("name") or block.get("filename") or "pasted-image"
+    if src.get("type") == "base64":
+        data = src.get("data")
+        if not isinstance(data, str) or not data:
+            return None
+        # Authoritative size: decode once (cheap vs. the network cost we save).
+        try:
+            raw = base64.b64decode(data, validate=False)
+        except Exception:
+            return None
+        nbytes = len(raw)
+        att = {"kind": "image", "mime": mime, "name": str(name), "bytes": nbytes}
+        att["b64"] = data if nbytes <= MAX_ATTACHMENT_BYTES else None
+        return att
+    # URL-backed image (rare in transcripts): record metadata only.
+    if src.get("type") == "url" and src.get("url"):
+        return {"kind": "image", "mime": mime, "name": str(name), "bytes": 0, "b64": None}
+    return None
+def _mime_for_name(name: str) -> str:
+    """Best-effort mime from a filename extension; generic when unknown."""
+    lower = name.lower()
+    for ext, mime in _EXT_MIME.items():
+        if lower.endswith(ext):
+            return mime
+    return "application/octet-stream"
+def _attachment_from_file_block(block: dict) -> dict | None:
+    """Build a metadata attachment from a ``document``/file-ref content block.
+    Claude can carry document blocks (``{"type":"document","source":{...}}``) and
+    tool results sometimes reference files. We capture base64 ``document`` bytes
+    when present (within the cap); otherwise record the file name as metadata so
+    the trace at least shows that a file was attached.
+    """
+    src = block.get("source")
+    name = block.get("name") or block.get("title") or block.get("filename") or "attached-file"
+    name = str(name)
+    if isinstance(src, dict) and src.get("type") == "base64" and isinstance(src.get("data"), str):
+        data = src["data"]
+        mime = src.get("media_type") or _mime_for_name(name)
+        try:
+            raw = base64.b64decode(data, validate=False)
+        except Exception:
+            return None
+        nbytes = len(raw)
+        return {
+            "kind": "file",
+            "mime": mime,
+            "name": name,
+            "bytes": nbytes,
+            "b64": data if nbytes <= MAX_ATTACHMENT_BYTES else None,
+        }
+    # Bare reference with a name/path but no inline bytes: metadata only.
+    if block.get("name") or block.get("title") or block.get("filename"):
+        return {"kind": "file", "mime": _mime_for_name(name), "name": name, "bytes": 0, "b64": None}
+    return None
+def _extract_attachments(content) -> list[dict]:
+    """Pull image/file attachments from a message/tool-result content list.
+    Walks ``image`` and ``document`` content blocks (Claude's pasted-binary
+    shapes). Pure + fail-open: any malformed block is skipped, never raised, so
+    attachment capture can NEVER break the hook's core span extraction.
+    """
+    out: list[dict] = []
+    if not isinstance(content, list):
+        return out
+    for block in content:
+        if not isinstance(block, dict):
+            continue
+        try:
+            btype = block.get("type")
+            if btype == "image":
+                att = _attachment_from_image_block(block)
+            elif btype in ("document", "file"):
+                att = _attachment_from_file_block(block)
+            else:
+                att = None
+            if att:
+                out.append(att)
+        except Exception:
+            continue
+    return out
+def _is_tool_result(entry: dict) -> bool:
+    return _role(entry) == "user" and bool(_blocks(_content(entry), "tool_result"))
+def _usage(entry: dict) -> dict:
+    u = _msg(entry).get("usage")
+    if not isinstance(u, dict):
+        return {}
+    return {
+        "input": int(u.get("input_tokens") or 0),
+        "output": int(u.get("output_tokens") or 0),
+        "cache_read": int(u.get("cache_read_input_tokens") or 0),
+        "cache_write": int(u.get("cache_creation_input_tokens") or 0),
+    }
+def _ts(entry: dict) -> str | None:
+    v = entry.get("timestamp")
+    return v if isinstance(v, str) and v else None
+def read_new_entries(path: Path, state: dict, key: str) -> list[dict]:
+    """Return transcript entries appended since the last processed byte offset."""
+    if not path.exists():
+        return []
+    sess = state.get(key) or {}
+    offset = int(sess.get("offset", 0))
+    try:
+        size = path.stat().st_size
+        if size < offset:  # transcript rotated/truncated -> reprocess from start
+            offset = 0
+        with open(path, "rb") as fh:
+            fh.seek(offset)
+            chunk = fh.read()
+            new_offset = fh.tell()
+    except Exception:
+        return []
+    sess["offset"] = new_offset
+    state[key] = sess
+    out: list[dict] = []
+    for line in chunk.decode("utf-8", errors="replace").splitlines():
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            obj = json.loads(line)
+            if isinstance(obj, dict):
+                out.append(obj)
+        except Exception:
+            continue
+    return out
+def build_spans(entries: list[dict]) -> tuple[list[dict], dict, list[str], str | None]:
+    """Turn transcript entries into canonical spans + rolled-up totals.
+    Each assistant message becomes one ``llm`` span (model + token usage); each
+    ``tool_use`` inside it becomes a child ``tool`` span. Returns
+    ``(spans, totals, models, repo)``.
+    """
+    # Map tool_use_id -> tool_result text for output enrichment, and
+    # tool_use_id -> attachments for any images a tool returned.
+    results_by_id: dict[str, str] = {}
+    attachments_by_tool_id: dict[str, list[dict]] = {}
+    # tool_use_id -> whether the tool_result reported a failure (Claude Code sets
+    # ``is_error: true`` on a failed tool result). Drives the tool span's status so
+    # the dashboard's error counts/rates reflect real failures instead of always 0.
+    errors_by_id: dict[str, bool] = {}
+    # tool_use_id -> ts of the message that returned the result. Used to give tool
+    # spans a real (assistant_ts -> result_ts) duration instead of zero.
+    result_ts_by_id: dict[str, str] = {}
+    for entry in entries:
+        if _is_tool_result(entry):
+            entry_ts = _ts(entry)
+            for tr in _blocks(_content(entry), "tool_result"):
+                tid = tr.get("tool_use_id")
+                if tid:
+                    out = tr.get("content")
+                    results_by_id[str(tid)] = (
+                        out if isinstance(out, str) else json.dumps(out, ensure_ascii=False)
+                    )
+                    errors_by_id[str(tid)] = bool(tr.get("is_error"))
+                    atts = _extract_attachments(out)
+                    if atts:
+                        attachments_by_tool_id[str(tid)] = atts
+                    if entry_ts:
+                        result_ts_by_id[str(tid)] = entry_ts
+    spans: list[dict] = []
+    models: list[str] = []
+    totals = {
+        "input_tokens": 0,
+        "output_tokens": 0,
+        "total_tokens": 0,
+        "cache_read_tokens": 0,
+        "cost_usd": 0.0,
+        "span_count": 0,
+        "tool_call_count": 0,
+    }
+    pending_user_text = ""
+    pending_user_attachments: list[dict] = []
+    # ts of the previous transcript entry; the LLM call started when the user
+    # prompt / tool result landed, finished when the assistant message appears.
+    prev_ts: str | None = None
+    for entry in entries:
+        role = _role(entry)
+        if role == "user" and not _is_tool_result(entry):
+            pending_user_text = _truncate(_text(_content(entry)))
+            # Images/files the user pasted into this turn ride along to the
+            # assistant span they prompted (accumulate across consecutive user
+            # messages until the next assistant generation consumes them).
+            pending_user_attachments.extend(_extract_attachments(_content(entry)))
+            prev_ts = _ts(entry) or prev_ts
+            continue
+        if role != "assistant":
+            prev_ts = _ts(entry) or prev_ts
+            continue
+        content = _content(entry)
+        usage = _usage(entry)
+        model = _msg(entry).get("model")
+        if model and model not in models:
+            models.append(model)
+        ts = _ts(entry) or _now_iso()
+        span_id = _msg(entry).get("id") or uuid.uuid4().hex
+        out_text = _truncate(_text(content))
+        in_tok = usage.get("input", 0)
+        out_tok = usage.get("output", 0)
+        cache_r = usage.get("cache_read", 0)
+        cache_w = usage.get("cache_write", 0)
+        llm_span = {
+            "span_id": str(span_id),
+            "parent_span_id": None,
+            "name": f"Claude generation ({model})" if model else "Claude generation",
+            "span_type": "llm",
+            "start_time": prev_ts or ts,
+            "end_time": ts,
+            "model": model,
+            "provider": "anthropic",
+            "input": pending_user_text or None,
+            "output": out_text or None,
+            "input_tokens": in_tok,
+            "output_tokens": out_tok,
+            "total_tokens": in_tok + out_tok,
+            "cache_read_tokens": cache_r,
+            "cache_write_tokens": cache_w,
+            "status": "ok",
+            "attributes": {"turn_role": "assistant"},
+        }
+        if pending_user_attachments:
+            llm_span["attachments"] = pending_user_attachments
+        spans.append(llm_span)
+        pending_user_text = ""  # consumed by this generation
+        pending_user_attachments = []  # consumed by this generation
+        totals["input_tokens"] += in_tok
+        totals["output_tokens"] += out_tok
+        totals["cache_read_tokens"] += cache_r
+        totals["total_tokens"] += in_tok + out_tok
+        for tu in _blocks(content, "tool_use"):
+            tid = str(tu.get("id") or uuid.uuid4().hex)
+            tname = tu.get("name") or "unknown"
+            tu_input = tu.get("input")
+            input_str = (
+                tu_input if isinstance(tu_input, str) else json.dumps(tu_input, ensure_ascii=False)
+            )
+            tool_span = {
+                "span_id": tid,
+                "parent_span_id": str(span_id),
+                "name": f"Tool: {tname}",
+                "span_type": "tool",
+                "start_time": ts,
+                "end_time": result_ts_by_id.get(tid, ts),
+                "input": _truncate(input_str),
+                "output": _truncate(results_by_id.get(tid, "")) or None,
+                "status": "error" if errors_by_id.get(tid) else "ok",
+                "attributes": {"tool_name": tname, "tool_id": tid},
+            }
+            tool_atts = attachments_by_tool_id.get(tid)
+            if tool_atts:
+                tool_span["attachments"] = tool_atts
+            spans.append(tool_span)
+            totals["tool_call_count"] += 1
+        prev_ts = ts
+    totals["span_count"] = len(spans)
+    return spans, totals, models, None
+def user_ref() -> str:
+    """Resolve the engineer's real identity for attribution.
+    Priority: an explicit ``OCTARIN_USER`` override → the Claude Code account
+    email (``~/.claude.json`` ``oauthAccount.emailAddress`` — the signed-in user)
+    → the git ``user.email`` → the OS username. We attribute to a real person
+    (matching ``backfill.py`` and the per-user ingest key) rather than an opaque
+    per-machine hash, so the dashboard shows who actually did the work. When the
+    request carries a per-user key the server overrides this with the key owner
+    anyway; a real identity here is what ANONYMOUS (slug-only) sends rely on.
+    """
+    ref = (os.environ.get("OCTARIN_USER") or "").strip()
+    if ref:
+        return ref
+    try:
+        with open(Path.home() / ".claude.json", encoding="utf-8") as fh:
+            account = json.load(fh).get("oauthAccount") or {}
+        email = (account.get("emailAddress") or "").strip()
+        if email:
+            return email
+    except Exception:
+        pass
+    try:
+        out = subprocess.check_output(
+            ["git", "config", "user.email"],
+            cwd=os.environ.get("CLAUDE_PROJECT_DIR") or os.getcwd(),
+            stderr=subprocess.DEVNULL,
+        )
+        email = out.decode().strip()
+        if email:
+            return email
+    except Exception:
+        pass
+    try:
+        return getpass.getuser()
+    except Exception:
+        return "unknown"
+def post_event(event: dict) -> bool:
+    """POST the IngestEvent. Returns True on 2xx, False otherwise (fail-open)."""
+    url = os.environ.get("OCTARIN_INGEST_URL")
+    if not url:
+        base = (os.environ.get("OCTARIN_API_BASE") or "").rstrip("/")
+        if not base:
+            return False
+        url = f"{base}/v1/ingest"
+    api_key = os.environ.get("OCTARIN_API_KEY", "")
+    body = json.dumps(event).encode("utf-8")
+    req = urllib.request.Request(url, data=body, method="POST")
+    req.add_header("Content-Type", "application/json")
+    if api_key:
+        req.add_header("Authorization", f"Bearer {api_key}")
+    try:
+        with urllib.request.urlopen(req, timeout=HTTP_TIMEOUT_S) as resp:
+            return 200 <= resp.status < 300
+    except Exception:
+        return False
+def load_state() -> dict:
+    try:
+        return json.loads(STATE_FILE.read_text(encoding="utf-8")) if STATE_FILE.exists() else {}
+    except Exception:
+        return {}
+def save_state(state: dict) -> None:
+    try:
+        STATE_DIR.mkdir(parents=True, exist_ok=True)
+        tmp = STATE_FILE.with_suffix(".tmp")
+        tmp.write_text(json.dumps(state, sort_keys=True), encoding="utf-8")
+        os.replace(tmp, STATE_FILE)
+    except Exception:
+        pass
+def build_event(payload: dict) -> dict | None:
+    """Assemble the canonical IngestEvent from a hook payload (or None to skip)."""
+    session_id, path, cwd = locate_transcript(payload)
+    if not session_id or path is None:
+        return None
+    state = load_state()
+    key = hashlib.sha256(f"{session_id}::{path}".encode()).hexdigest()
+    entries = read_new_entries(path, state, key)
+    save_state(state)
+    if not entries:
+        return None
+    spans, totals, models, _ = build_spans(entries)
+    if not spans:
+        return None
+    repo = Path(cwd).name if cwd else None
+    src_trace = f"{session_id}:{int(time.time())}"
+    trace_id = str(uuid.uuid5(_TRACE_NAMESPACE, f"{SOURCE}:{src_trace}"))
+    times = [s["start_time"] for s in spans]
+    return {
+        "trace_id": trace_id,
+        "source": SOURCE,
+        "session_id": session_id,
+        "user_ref": user_ref(),
+        "repo": repo,
+        "model": models[0] if models else None,
+        "spans": spans,
+        "start_time": min(times),
+        "end_time": max(times),
+        "total_tokens": totals["total_tokens"],
+        "input_tokens": totals["input_tokens"],
+        "output_tokens": totals["output_tokens"],
+        "cache_read_tokens": totals["cache_read_tokens"],
+        # extra (extra="allow"): handy for the backend rollup/audit
+        "totals": totals,
+        "models": models,
+    }
+def main() -> int:
+    try:
+        payload = read_payload()
+        event = build_event(payload)
+        if event is None:
+            return 0
+        post_event(event)
+    except Exception:
+        # Absolutely never let the hook break the host tool.
+        return 0
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())