octarin-cli 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,7 +27,7 @@ import os
27
27
  import ssl
28
28
  import subprocess
29
29
  import sys
30
- import time
30
+ import urllib.parse
31
31
  import urllib.request
32
32
  import uuid
33
33
  from datetime import datetime, timezone
@@ -593,19 +593,42 @@ def post_event(event: dict) -> bool:
593
593
  return False
594
594
 
595
595
 
596
- def load_state() -> dict:
596
+ def _state_file(key: str) -> Path:
597
+ """Per-session state file.
598
+
599
+ One file per session key, NOT one shared JSON: with the shared file two
600
+ concurrent sessions raced load→save and the last writer clobbered the other
601
+ session's offset back, so its next fire re-read (and re-sent) transcript
602
+ chunks it had already shipped.
603
+ """
604
+ return STATE_DIR / f"claude_code_state.{key[:32]}.json"
605
+
606
+
607
+ def load_state(key: str) -> dict:
608
+ f = _state_file(key)
597
609
  try:
598
- return json.loads(STATE_FILE.read_text(encoding="utf-8")) if STATE_FILE.exists() else {}
610
+ if f.exists():
611
+ return {key: json.loads(f.read_text(encoding="utf-8"))}
599
612
  except Exception:
600
613
  return {}
614
+ # One-time migration: pick this session's entry out of the legacy shared file.
615
+ try:
616
+ if STATE_FILE.exists():
617
+ legacy = json.loads(STATE_FILE.read_text(encoding="utf-8"))
618
+ if key in legacy:
619
+ return {key: legacy[key]}
620
+ except Exception:
621
+ pass
622
+ return {}
601
623
 
602
624
 
603
- def save_state(state: dict) -> None:
625
+ def save_state(state: dict, key: str) -> None:
604
626
  try:
605
627
  STATE_DIR.mkdir(parents=True, exist_ok=True)
606
- tmp = STATE_FILE.with_suffix(".tmp")
607
- tmp.write_text(json.dumps(state, sort_keys=True), encoding="utf-8")
608
- os.replace(tmp, STATE_FILE)
628
+ f = _state_file(key)
629
+ tmp = f.with_suffix(".tmp")
630
+ tmp.write_text(json.dumps(state.get(key) or {}, sort_keys=True), encoding="utf-8")
631
+ os.replace(tmp, f)
609
632
  except Exception:
610
633
  pass
611
634
 
@@ -616,10 +639,10 @@ def build_event(payload: dict) -> dict | None:
616
639
  if not session_id or path is None:
617
640
  return None
618
641
 
619
- state = load_state()
620
642
  key = hashlib.sha256(f"{session_id}::{path}".encode()).hexdigest()
643
+ state = load_state(key)
621
644
  entries = read_new_entries(path, state, key)
622
- save_state(state)
645
+ save_state(state, key)
623
646
  if not entries:
624
647
  return None
625
648
 
@@ -628,7 +651,12 @@ def build_event(payload: dict) -> dict | None:
628
651
  return None
629
652
 
630
653
  repo = Path(cwd).name if cwd else None
631
- src_trace = f"{session_id}:{int(time.time())}"
654
+ # Day-stable trace id (like the Cursor hook): every fire for a session
655
+ # upserts the SAME trace, so a re-sent chunk (offset race, retry) replaces
656
+ # its span rows in the ReplacingMergeTree instead of minting a new trace
657
+ # per second and double-counting. The date suffix splits multi-day sessions.
658
+ day = datetime.now(timezone.utc).strftime("%Y-%m-%d")
659
+ src_trace = f"{session_id}:{day}"
632
660
  trace_id = str(uuid.uuid5(_TRACE_NAMESPACE, f"{SOURCE}:{src_trace}"))
633
661
  times = [s["start_time"] for s in spans]
634
662
 
@@ -652,9 +680,70 @@ def build_event(payload: dict) -> dict | None:
652
680
  }
653
681
 
654
682
 
683
+ def _api_base() -> str:
684
+ """The API base URL (no trailing slash), derived from the capture env.
685
+
686
+ Prefers OCTARIN_API_BASE; otherwise strips ``/v1/ingest`` off OCTARIN_INGEST_URL.
687
+ """
688
+ base = (os.environ.get("OCTARIN_API_BASE") or "").rstrip("/")
689
+ if base:
690
+ return base
691
+ url = (os.environ.get("OCTARIN_INGEST_URL") or "").rstrip("/")
692
+ if url.endswith("/v1/ingest"):
693
+ return url[: -len("/v1/ingest")]
694
+ return ""
695
+
696
+
697
+ def inject_context(payload: dict) -> None:
698
+ """SessionStart: fetch the org's Memory pack and emit it as additionalContext.
699
+
700
+ Octarin Memory is the team's durable, shared knowledge (decisions, conventions,
701
+ gotchas). On a new session we pull the pack for this repo and hand it to Claude
702
+ Code as ``hookSpecificOutput.additionalContext`` so the agent starts with the
703
+ team's hard-won context instead of rediscovering it. Fail-open and silent: any
704
+ problem (no key, network, empty) prints nothing and the session proceeds.
705
+ """
706
+ base = _api_base()
707
+ api_key = os.environ.get("OCTARIN_API_KEY", "")
708
+ if not base or not api_key:
709
+ return
710
+ cwd = payload.get("cwd") or payload.get("workspace") or ""
711
+ repo = Path(cwd).name if cwd else ""
712
+ qs = urllib.parse.urlencode({"repo": repo, "limit": "8"})
713
+ req = urllib.request.Request(f"{base}/v1/memory/agent-context?{qs}", method="GET")
714
+ req.add_header("Authorization", f"Bearer {api_key}")
715
+ try:
716
+ with urllib.request.urlopen(req, timeout=HTTP_TIMEOUT_S, context=_ssl_context()) as resp:
717
+ if not (200 <= resp.status < 300):
718
+ return
719
+ data = json.loads(resp.read().decode("utf-8"))
720
+ except Exception:
721
+ return
722
+ context = ((data or {}).get("context") or "").strip()
723
+ if not context:
724
+ return
725
+ header = (
726
+ "# Octarin Memory — durable decisions, conventions & gotchas your team has "
727
+ "recorded (shared across everyone). Treat as authoritative context:\n"
728
+ )
729
+ out = {
730
+ "hookSpecificOutput": {
731
+ "hookEventName": "SessionStart",
732
+ "additionalContext": header + context,
733
+ }
734
+ }
735
+ sys.stdout.write(json.dumps(out))
736
+
737
+
655
738
  def main() -> int:
656
739
  try:
657
740
  payload = read_payload()
741
+ # One script, two Claude Code hooks: SessionStart injects Memory context;
742
+ # Stop (the default) captures the finished turn.
743
+ event_name = str(payload.get("hook_event_name") or payload.get("hookEventName") or "")
744
+ if event_name == "SessionStart":
745
+ inject_context(payload)
746
+ return 0
658
747
  event = build_event(payload)
659
748
  if event is None:
660
749
  return 0
@@ -391,6 +391,11 @@ def build_spans( # noqa: PLR0915 - top-down transcript parser; splitting it
391
391
 
392
392
  pending_user_text = ""
393
393
  pending_user_attachments: list[dict] = []
394
+ # One API generation streams as SEVERAL transcript entries (one per content
395
+ # block — text, then each tool_use) that share the same message id and each
396
+ # repeat the generation's FULL usage. Merge them into ONE span keyed by that
397
+ # id: usage/cost counted once, outputs concatenated, tool children attached.
398
+ llm_span_by_id: dict[str, dict] = {}
394
399
  # ts of the previous transcript entry; the LLM call started when the user
395
400
  # prompt / tool result landed, finished when the assistant message appears.
396
401
  prev_ts: str | None = None
@@ -417,6 +422,32 @@ def build_spans( # noqa: PLR0915 - top-down transcript parser; splitting it
417
422
  span_id = _msg(entry).get("id") or uuid.uuid4().hex
418
423
  out_text = _truncate(_text(content))
419
424
 
425
+ existing = llm_span_by_id.get(str(span_id))
426
+ if existing is not None:
427
+ # Continuation entry of an already-seen generation: extend the span,
428
+ # never re-count its usage (each entry repeats the full totals).
429
+ existing["end_time"] = ts
430
+ if out_text:
431
+ joined = (
432
+ f"{existing['output']}\n{out_text}"
433
+ if existing["output"]
434
+ else out_text
435
+ )
436
+ existing["output"] = _truncate(joined)
437
+ for tu in _blocks(content, "tool_use"):
438
+ _append_tool_span(
439
+ spans,
440
+ totals,
441
+ tu,
442
+ parent_span_id=str(span_id),
443
+ ts=ts,
444
+ results_by_id=results_by_id,
445
+ result_ts_by_id=result_ts_by_id,
446
+ attachments_by_tool_id=attachments_by_tool_id,
447
+ )
448
+ prev_ts = ts
449
+ continue
450
+
420
451
  in_tok = usage.get("input", 0)
421
452
  out_tok = usage.get("output", 0)
422
453
  cache_r = usage.get("cache_read", 0)
@@ -443,6 +474,7 @@ def build_spans( # noqa: PLR0915 - top-down transcript parser; splitting it
443
474
  if pending_user_attachments:
444
475
  llm_span["attachments"] = pending_user_attachments
445
476
  spans.append(llm_span)
477
+ llm_span_by_id[str(span_id)] = llm_span
446
478
  pending_user_text = "" # consumed by this generation
447
479
  pending_user_attachments = [] # consumed by this generation
448
480
 
@@ -452,31 +484,16 @@ def build_spans( # noqa: PLR0915 - top-down transcript parser; splitting it
452
484
  totals["total_tokens"] += in_tok + out_tok
453
485
 
454
486
  for tu in _blocks(content, "tool_use"):
455
- tid = str(tu.get("id") or uuid.uuid4().hex)
456
- tname = tu.get("name") or "unknown"
457
- tu_input = tu.get("input")
458
- input_str = (
459
- tu_input
460
- if isinstance(tu_input, str)
461
- else json.dumps(tu_input, ensure_ascii=False)
487
+ _append_tool_span(
488
+ spans,
489
+ totals,
490
+ tu,
491
+ parent_span_id=str(span_id),
492
+ ts=ts,
493
+ results_by_id=results_by_id,
494
+ result_ts_by_id=result_ts_by_id,
495
+ attachments_by_tool_id=attachments_by_tool_id,
462
496
  )
463
- tool_span = {
464
- "span_id": tid,
465
- "parent_span_id": str(span_id),
466
- "name": f"Tool: {tname}",
467
- "span_type": "tool",
468
- "start_time": ts,
469
- "end_time": result_ts_by_id.get(tid, ts),
470
- "input": _truncate(input_str),
471
- "output": _truncate(results_by_id.get(tid, "")) or None,
472
- "status": "ok",
473
- "attributes": {"tool_name": tname, "tool_id": tid},
474
- }
475
- tool_atts = attachments_by_tool_id.get(tid)
476
- if tool_atts:
477
- tool_span["attachments"] = tool_atts
478
- spans.append(tool_span)
479
- totals["tool_call_count"] += 1
480
497
 
481
498
  prev_ts = ts
482
499
 
@@ -484,6 +501,43 @@ def build_spans( # noqa: PLR0915 - top-down transcript parser; splitting it
484
501
  return spans, totals, models, None
485
502
 
486
503
 
504
+ def _append_tool_span(
505
+ spans: list[dict],
506
+ totals: dict,
507
+ tu: dict,
508
+ *,
509
+ parent_span_id: str,
510
+ ts: str,
511
+ results_by_id: dict[str, str],
512
+ result_ts_by_id: dict[str, str],
513
+ attachments_by_tool_id: dict[str, list[dict]],
514
+ ) -> None:
515
+ """Append one ``tool`` child span for a ``tool_use`` block to ``spans``."""
516
+ tid = str(tu.get("id") or uuid.uuid4().hex)
517
+ tname = tu.get("name") or "unknown"
518
+ tu_input = tu.get("input")
519
+ input_str = (
520
+ tu_input if isinstance(tu_input, str) else json.dumps(tu_input, ensure_ascii=False)
521
+ )
522
+ tool_span = {
523
+ "span_id": tid,
524
+ "parent_span_id": parent_span_id,
525
+ "name": f"Tool: {tname}",
526
+ "span_type": "tool",
527
+ "start_time": ts,
528
+ "end_time": result_ts_by_id.get(tid, ts),
529
+ "input": _truncate(input_str),
530
+ "output": _truncate(results_by_id.get(tid, "")) or None,
531
+ "status": "ok",
532
+ "attributes": {"tool_name": tname, "tool_id": tid},
533
+ }
534
+ tool_atts = attachments_by_tool_id.get(tid)
535
+ if tool_atts:
536
+ tool_span["attachments"] = tool_atts
537
+ spans.append(tool_span)
538
+ totals["tool_call_count"] += 1
539
+
540
+
487
541
  def user_ref() -> str:
488
542
  """Resolve the engineer's real identity for attribution.
489
543
 
package/dist/index.js CHANGED
File without changes
package/dist/init.js CHANGED
@@ -129,13 +129,22 @@ async function readJson(path) {
129
129
  function hasOctarin(value) {
130
130
  return JSON.stringify(value ?? "").toLowerCase().includes("octarin");
131
131
  }
132
- /** Merge the Stop hook into ~/.claude/settings.json, preserving other settings. */
132
+ /**
133
+ * Merge Octarin's Claude Code hooks into ~/.claude/settings.json, preserving
134
+ * other settings. Two events, ONE wrapper command (hook.py branches on the
135
+ * hook event): **Stop** captures the finished turn; **SessionStart** injects the
136
+ * team's shared Octarin Memory pack as context so a new session starts with the
137
+ * org's durable decisions/conventions/gotchas. Idempotent (deduped by the
138
+ * "octarin" marker in the command).
139
+ */
133
140
  async function mergeClaudeSettings(path, command) {
134
141
  const json = await readJson(path);
135
142
  const hooks = (json.hooks ??= {});
136
- const stop = (Array.isArray(hooks.Stop) ? hooks.Stop : (hooks.Stop = []));
137
- if (!stop.some(hasOctarin)) {
138
- stop.push({ hooks: [{ type: "command", command }] });
143
+ for (const event of ["Stop", "SessionStart"]) {
144
+ const arr = (Array.isArray(hooks[event]) ? hooks[event] : (hooks[event] = []));
145
+ if (!arr.some(hasOctarin)) {
146
+ arr.push({ hooks: [{ type: "command", command }] });
147
+ }
139
148
  }
140
149
  await fs.mkdir(dirname(path), { recursive: true });
141
150
  await fs.writeFile(path, JSON.stringify(json, null, 2) + "\n");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "octarin-cli",
3
- "version": "0.3.4",
3
+ "version": "0.4.0",
4
4
  "description": "Octarin's per-user CLI: install AI-coding capture (`octarin init` / `init-repo`) and authorize a machine (`octarin login`). Streams your Claude Code / Cursor / Codex usage to your Octarin workspace.",
5
5
  "keywords": [
6
6
  "octarin",