npm - octarin-cli - Versions diffs - 0.2.0 - Mend

octarin-cli 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/README.md +202 -0
package/assets/backfill.py +1113 -0
package/assets/claude_code/hook.py +573 -0
package/assets/codex/hook.mjs +487 -0
package/assets/cursor/hook-handler.js +41 -0
package/assets/cursor/lib/canonical.js +240 -0
package/assets/cursor/lib/utils.js +138 -0
package/assets/repo-template/dot-claude/octarin/hook.py +685 -0
package/assets/repo-template/dot-claude/octarin/run.sh +41 -0
package/assets/repo-template/dot-claude/settings.json +15 -0
package/assets/repo-template/dot-codex/config.toml +6 -0
package/assets/repo-template/dot-codex/hooks/hook.mjs +531 -0
package/assets/repo-template/dot-codex/hooks/run.sh +38 -0
package/assets/repo-template/dot-cursor/hooks/hook-handler.js +41 -0
package/assets/repo-template/dot-cursor/hooks/lib/canonical.js +240 -0
package/assets/repo-template/dot-cursor/hooks/lib/utils.js +196 -0
package/assets/repo-template/dot-cursor/hooks/run.sh +41 -0
package/assets/repo-template/dot-cursor/hooks.json +13 -0
package/dist/args.js +85 -0
package/dist/assets.js +28 -0
package/dist/client.js +105 -0
package/dist/envfile.js +94 -0
package/dist/index.js +192 -0
package/dist/init.js +314 -0
package/dist/init_repo.js +348 -0
package/dist/login.js +209 -0
package/dist/output.js +56 -0
package/package.json +37 -0

package/assets/cursor/lib/canonical.js ADDED Viewed

@@ -0,0 +1,240 @@
+/**
+ * canonical.js — map Cursor hook payloads to Octarin canonical IngestEvents.
+ *
+ * Cursor fires a separate process per hook event (beforeSubmitPrompt,
+ * afterAgentResponse, afterFileEdit, stop, ...), so each event becomes its own
+ * IngestEvent carrying one span. All events for a conversation share a
+ * deterministic trace_id (derived from `conversation_id`) so the backend rolls
+ * them into a single trace. Shape: backend/app/schema/canonical.py::IngestEvent.
+ */
+import { SOURCE, truncate, nowIso, userRef, deterministicTraceId } from "./utils.js";
+function repoFromRoots(roots) {
+  if (!Array.isArray(roots) || roots.length === 0) return null;
+  const r = String(roots[0]);
+  return r.split("/").filter(Boolean).pop() || null;
+}
+/** Wrap one span into a full IngestEvent for the given conversation. */
+function envelope(input, span, model) {
+  const conv = input.conversation_id || input.generation_id || "cursor-session";
+  return {
+    trace_id: deterministicTraceId(conv),
+    source: SOURCE,
+    session_id: input.conversation_id || null,
+    // Prefer the signed-in email Cursor puts on the event; else resolve a real
+    // identity (git / OS user) rather than an opaque machine hash.
+    user_ref: (input.user_email || "").trim() || userRef(),
+    repo: repoFromRoots(input.workspace_roots),
+    model: model || input.model || null,
+    spans: [span],
+    start_time: span.start_time,
+    end_time: span.end_time,
+  };
+}
+function baseSpan(spanId, name, spanType) {
+  const ts = nowIso();
+  return {
+    span_id: spanId,
+    parent_span_id: null,
+    name,
+    span_type: spanType,
+    start_time: ts,
+    end_time: ts,
+    status: "ok",
+    attributes: {},
+  };
+}
+function spanId(input, suffix) {
+  const conv = input.conversation_id || "conv";
+  const gen = input.generation_id || Date.now();
+  return `${conv}:${gen}:${suffix}`;
+}
+/**
+ * Cursor `afterAgentResponse` carries the model output.
+ *
+ * TOKEN USAGE: as of Cursor 1.7 NO hook event exposes per-turn token usage —
+ * the documented `afterAgentResponse` fields are only `text` (+ the shared
+ * envelope: conversation_id, generation_id, model, workspace_roots,
+ * transcript_path, ...). There is no `usage`, `input_tokens`, or `output_tokens`
+ * anywhere in the hook payload, so these spans legitimately carry 0 tokens (not
+ * a capture bug). The lookup below stays defensive against several possible
+ * field shapes so we transparently pick usage up IF a future Cursor version
+ * starts emitting it — but we never fabricate counts when it is absent.
+ * (`preCompact.context_tokens` is context-window utilisation, not billable
+ * usage, so we deliberately do not treat it as tokens.)
+ */
+function fromAfterAgentResponse(input) {
+  const span = baseSpan(spanId(input, "gen"), "Cursor agent response", "llm");
+  span.model = input.model || null;
+  span.input = truncate(input.prompt || "");
+  span.output = truncate(input.text || "");
+  const usage = input.usage || input.token_usage || input.tokens || {};
+  span.input_tokens = Number(usage.input_tokens || usage.prompt_tokens || 0) || 0;
+  span.output_tokens = Number(usage.output_tokens || usage.completion_tokens || 0) || 0;
+  span.cache_read_tokens =
+    Number(usage.cache_read_input_tokens || usage.cached_input_tokens || 0) || 0;
+  span.cache_write_tokens =
+    Number(usage.cache_creation_input_tokens || usage.cache_write_tokens || 0) || 0;
+  span.total_tokens =
+    Number(usage.total_tokens || 0) || span.input_tokens + span.output_tokens;
+  span.attributes = {
+    generation_id: input.generation_id,
+    hook: "afterAgentResponse",
+    // Flag when Cursor supplied no usage so the gap is visible downstream
+    // rather than looking like a silently-dropped count.
+    usage_available: Object.keys(usage).length > 0,
+  };
+  return envelope(input, span, input.model);
+}
+/** `beforeSubmitPrompt` records the user turn (no tokens yet). */
+function fromBeforeSubmitPrompt(input) {
+  const span = baseSpan(spanId(input, "prompt"), "Cursor user prompt", "agent");
+  span.input = truncate(input.prompt || "");
+  span.attributes = {
+    generation_id: input.generation_id,
+    attachment_count: (input.attachments || []).length,
+    hook: "beforeSubmitPrompt",
+  };
+  return envelope(input, span, input.model);
+}
+function asText(v) {
+  if (v == null) return "";
+  return typeof v === "string" ? v : JSON.stringify(v);
+}
+/**
+ * Tool span id keyed on Cursor's `tool_use_id` (unique per call) so multiple
+ * tools in ONE turn don't collide on the same id and get deduped to one (the
+ * old `afterFileEdit` keyed on conversation:generation, so 3 edits in a turn
+ * overwrote each other). Falls back to a per-call unique when absent.
+ */
+function toolSpanId(input) {
+  const conv = input.conversation_id || "conv";
+  return `${conv}:tool:${input.tool_use_id || input.generation_id || Date.now()}`;
+}
+/**
+ * `postToolUse` — the GENERIC post-tool hook; fires for EVERY tool (edit, shell,
+ * read, MCP, ...) with its result. Replaces the per-tool afterFileEdit /
+ * afterShellExecution / afterMCPExecution (which would double-count).
+ */
+function fromPostToolUse(input) {
+  const tool = input.tool_name || "tool";
+  const span = baseSpan(toolSpanId(input), tool, "tool");
+  span.input = truncate(asText(input.tool_input));
+  span.output = truncate(asText(input.tool_output));
+  span.attributes = {
+    tool_name: tool,
+    tool_use_id: input.tool_use_id,
+    duration_ms: input.duration,
+    hook: "postToolUse",
+  };
+  return envelope(input, span, input.model);
+}
+/** `postToolUseFailure` — a tool that errored; this is what powers a real
+ *  Cursor error rate (status=error, failure_type, is_interrupt). */
+function fromPostToolUseFailure(input) {
+  const tool = input.tool_name || "tool";
+  const span = baseSpan(toolSpanId(input), tool, "tool");
+  span.status = "error";
+  span.error_message = input.error_message || input.failure_type || "tool failed";
+  span.input = truncate(asText(input.tool_input));
+  span.attributes = {
+    tool_name: tool,
+    tool_use_id: input.tool_use_id,
+    failure_type: input.failure_type,
+    is_interrupt: input.is_interrupt,
+    duration_ms: input.duration,
+    hook: "postToolUseFailure",
+  };
+  return envelope(input, span, input.model);
+}
+/** `sessionStart` — explicit session boundary (identity rides the envelope). */
+function fromSessionStart(input) {
+  const span = baseSpan(spanId(input, "session-start"), "Cursor session start", "agent");
+  span.attributes = {
+    session_id: input.session_id,
+    is_background_agent: input.is_background_agent,
+    composer_mode: input.composer_mode,
+    hook: "sessionStart",
+  };
+  return envelope(input, span, input.model);
+}
+/** `sessionEnd` — richer close than `stop`: final status + reason + duration. */
+function fromSessionEnd(input) {
+  const span = baseSpan(spanId(input, "session-end"), "Cursor session end", "agent");
+  span.status = input.final_status === "error" ? "error" : "ok";
+  if (span.status === "error") span.error_message = input.error_message || "session error";
+  span.attributes = {
+    session_id: input.session_id,
+    reason: input.reason,
+    final_status: input.final_status,
+    duration_ms: input.duration_ms,
+    hook: "sessionEnd",
+  };
+  return envelope(input, span, input.model);
+}
+/**
+ * `preCompact` — context-window compaction signal. NOTE: `context_tokens` here
+ * is context UTILISATION, not billable usage, so it is deliberately NOT mapped
+ * to span tokens (no Cursor hook exposes real usage).
+ */
+function fromPreCompact(input) {
+  const span = baseSpan(
+    spanId(input, `compact-${input.message_count || 0}`),
+    "Context compaction",
+    "agent",
+  );
+  span.attributes = {
+    hook: "preCompact",
+    trigger: input.trigger,
+    context_usage_percent: input.context_usage_percent,
+    context_tokens: input.context_tokens,
+    context_window_size: input.context_window_size,
+    message_count: input.message_count,
+    messages_to_compact: input.messages_to_compact,
+    is_first_compaction: input.is_first_compaction,
+  };
+  return envelope(input, span, input.model);
+}
+/** `stop` records task completion + status. */
+function fromStop(input) {
+  const span = baseSpan(spanId(input, "stop"), "Cursor agent stopped", "agent");
+  span.status = input.status === "error" ? "error" : "ok";
+  if (input.status === "error") span.error_message = "agent error";
+  span.attributes = { status: input.status, loop_count: input.loop_count, hook: "stop" };
+  return envelope(input, span, input.model);
+}
+const BUILDERS = {
+  sessionStart: fromSessionStart,
+  beforeSubmitPrompt: fromBeforeSubmitPrompt,
+  afterAgentResponse: fromAfterAgentResponse,
+  postToolUse: fromPostToolUse,
+  postToolUseFailure: fromPostToolUseFailure,
+  preCompact: fromPreCompact,
+  stop: fromStop,
+  sessionEnd: fromSessionEnd,
+};
+/**
+ * Build an IngestEvent for a Cursor hook event, or null if this event carries
+ * nothing worth sending. `hookName` falls back to `input.hook_event_name`.
+ */
+export function buildEvent(hookName, input) {
+  const name = hookName || input.hook_event_name;
+  const builder = BUILDERS[name];
+  return builder ? builder(input) : null;
+}

package/assets/cursor/lib/utils.js ADDED Viewed

@@ -0,0 +1,138 @@
+/**
+ * utils.js — tiny stdlib helpers for the Cursor -> Octarin capture hook.
+ *
+ * Zero npm dependencies: stdin reading, a raw `https`/`http` POST with a hard
+ * timeout, a real-identity user_ref (git email / OS user), and a deterministic UUID5 (matching the
+ * backend's trace-id namespace). Everything here is fail-open friendly — the
+ * caller decides what to do on rejection.
+ */
+import https from "node:https";
+import http from "node:http";
+import crypto from "node:crypto";
+import os from "node:os";
+import { execFileSync } from "node:child_process";
+export const SOURCE = "cursor";
+export const MAX_TEXT = 20000;
+export const HTTP_TIMEOUT_MS = 5000;
+// Same namespace as backend deterministic_trace_id so retries de-duplicate.
+const TRACE_NAMESPACE = "6f8d2c1e-9a3b-4f5e-8c7d-1a2b3c4d5e6f";
+export function readStdin() {
+  return new Promise((resolve) => {
+    let data = "";
+    process.stdin.setEncoding("utf8");
+    process.stdin.on("data", (chunk) => (data += chunk));
+    process.stdin.on("end", () => {
+      try {
+        resolve(data.trim() ? JSON.parse(data) : {});
+      } catch {
+        resolve({});
+      }
+    });
+    process.stdin.on("error", () => resolve({}));
+  });
+}
+export function truncate(text) {
+  if (typeof text !== "string") return text == null ? "" : String(text);
+  return text.length <= MAX_TEXT ? text : text.slice(0, MAX_TEXT);
+}
+export function nowIso() {
+  return new Date().toISOString();
+}
+/** The committing git identity, or "" if git isn't configured here. */
+function gitEmail() {
+  try {
+    return execFileSync("git", ["config", "user.email"], {
+      stdio: ["ignore", "pipe", "ignore"],
+    })
+      .toString()
+      .trim();
+  } catch {
+    return "";
+  }
+}
+/**
+ * Resolve the engineer's real identity for attribution.
+ *
+ * Priority: an explicit OCTARIN_USER override → the git user.email → the OS
+ * username. We attribute to a real person (matching backfill.py + the per-user
+ * ingest key) rather than an opaque per-machine hash. When a per-user key is
+ * present the server overrides this with the key owner anyway; a real identity
+ * here is what ANONYMOUS (slug-only) sends rely on.
+ */
+export function userRef() {
+  const env = (process.env.OCTARIN_USER || "").trim();
+  if (env) return env;
+  const email = gitEmail();
+  if (email) return email;
+  return os.userInfo().username || "unknown";
+}
+/** RFC-4122 v5 UUID from (namespace, name) — matches Python's uuid.uuid5. */
+export function uuid5(name) {
+  const ns = Buffer.from(TRACE_NAMESPACE.replace(/-/g, ""), "hex");
+  const hash = crypto.createHash("sha1").update(Buffer.concat([ns, Buffer.from(name, "utf8")])).digest();
+  const bytes = hash.subarray(0, 16);
+  bytes[6] = (bytes[6] & 0x0f) | 0x50; // version 5
+  bytes[8] = (bytes[8] & 0x3f) | 0x80; // variant
+  const hex = bytes.toString("hex");
+  return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20)}`;
+}
+export function deterministicTraceId(sourceTraceId) {
+  return uuid5(`${SOURCE}:${sourceTraceId}`);
+}
+/**
+ * Fire-and-forget POST of an IngestEvent. Resolves true on 2xx, false otherwise.
+ * Never throws — the hook must stay fail-open.
+ */
+export function postEvent(event) {
+  return new Promise((resolve) => {
+    let url = process.env.OCTARIN_INGEST_URL;
+    if (!url) {
+      const base = (process.env.OCTARIN_API_BASE || "").replace(/\/+$/, "");
+      if (!base) return resolve(false);
+      url = `${base}/v1/ingest`;
+    }
+    let parsed;
+    try {
+      parsed = new URL(url);
+    } catch {
+      return resolve(false);
+    }
+    const body = Buffer.from(JSON.stringify(event), "utf8");
+    const headers = { "Content-Type": "application/json", "Content-Length": body.length };
+    if (process.env.OCTARIN_API_KEY) {
+      headers.Authorization = `Bearer ${process.env.OCTARIN_API_KEY}`;
+    }
+    const lib = parsed.protocol === "http:" ? http : https;
+    const req = lib.request(
+      {
+        method: "POST",
+        hostname: parsed.hostname,
+        port: parsed.port || (parsed.protocol === "http:" ? 80 : 443),
+        path: parsed.pathname + parsed.search,
+        headers,
+        timeout: HTTP_TIMEOUT_MS,
+      },
+      (res) => {
+        res.on("data", () => {});
+        res.on("end", () => resolve(res.statusCode >= 200 && res.statusCode < 300));
+      },
+    );
+    req.on("error", () => resolve(false));
+    req.on("timeout", () => {
+      req.destroy();
+      resolve(false);
+    });
+    req.write(body);
+    req.end();
+  });
+}