@os-eco/overstory-cli 0.7.3 → 0.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,18 +1,21 @@
1
1
  # Overstory
2
2
 
3
- Multi-agent orchestration for Claude Code.
3
+ Multi-agent orchestration for AI coding agents.
4
4
 
5
5
  [![npm](https://img.shields.io/npm/v/@os-eco/overstory-cli)](https://www.npmjs.com/package/@os-eco/overstory-cli)
6
6
  [![CI](https://github.com/jayminwest/overstory/actions/workflows/ci.yml/badge.svg)](https://github.com/jayminwest/overstory/actions/workflows/ci.yml)
7
7
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
8
8
 
9
- Overstory turns a single Claude Code session into a multi-agent team by spawning worker agents in git worktrees via tmux, coordinating them through a custom SQLite mail system, and merging their work back with tiered conflict resolution.
9
+ Overstory turns a single coding session into a multi-agent team by spawning worker agents in git worktrees via tmux, coordinating them through a custom SQLite mail system, and merging their work back with tiered conflict resolution. A pluggable `AgentRuntime` interface lets you swap between runtimes — Claude Code, [Pi](https://github.com/nichochar/pi-coding-agent), or your own adapter.
10
10
 
11
11
  > **Warning: Agent swarms are not a universal solution.** Do not deploy Overstory without understanding the risks of multi-agent orchestration — compounding error rates, cost amplification, debugging complexity, and merge conflicts are the normal case, not edge cases. Read [STEELMAN.md](STEELMAN.md) for a full risk analysis and the [Agentic Engineering Book](https://github.com/jayminwest/agentic-engineering-book) ([web version](https://jayminwest.com/agentic-engineering-book)) before using this tool in production.
12
12
 
13
13
  ## Install
14
14
 
15
- Requires [Bun](https://bun.sh) v1.0+, [Claude Code](https://docs.anthropic.com/en/docs/claude-code), git, and tmux.
15
+ Requires [Bun](https://bun.sh) v1.0+, git, and tmux. At least one supported agent runtime must be installed:
16
+
17
+ - [Claude Code](https://docs.anthropic.com/en/docs/claude-code) (`claude` CLI)
18
+ - [Pi](https://github.com/nichochar/pi-coding-agent) (`pi` CLI)
16
19
 
17
20
  ```bash
18
21
  bun install -g @os-eco/overstory-cli
@@ -158,11 +161,20 @@ Every command supports `--json` where noted. Global flags: `-q`/`--quiet`, `--ti
158
161
 
159
162
  ## Architecture
160
163
 
161
- Overstory uses CLAUDE.md overlays and PreToolUse hooks to turn Claude Code sessions into orchestrated agents. Each agent runs in an isolated git worktree via tmux. Inter-agent messaging is handled by a custom SQLite mail system (WAL mode, ~1-5ms per query) with typed protocol messages and broadcast support. A FIFO merge queue with 4-tier conflict resolution merges agent branches back to canonical. A tiered watchdog system (Tier 0 mechanical daemon, Tier 1 AI-assisted triage, Tier 2 monitor agent) ensures fleet health. See [CLAUDE.md](CLAUDE.md) for full technical details.
164
+ Overstory uses instruction overlays and tool-call guards to turn agent sessions into orchestrated workers. Each agent runs in an isolated git worktree via tmux. Inter-agent messaging is handled by a custom SQLite mail system (WAL mode, ~1-5ms per query) with typed protocol messages and broadcast support. A FIFO merge queue with 4-tier conflict resolution merges agent branches back to canonical. A tiered watchdog system (Tier 0 mechanical daemon, Tier 1 AI-assisted triage, Tier 2 monitor agent) ensures fleet health. See [CLAUDE.md](CLAUDE.md) for full technical details.
165
+
166
+ ### Runtime Adapters
167
+
168
+ Overstory is runtime-agnostic. The `AgentRuntime` interface (`src/runtimes/types.ts`) defines the contract — each adapter handles spawning, config deployment, guard enforcement, readiness detection, and transcript parsing for its runtime. Set the default in `config.yaml` or override per-agent with `ov sling --runtime <name>`.
169
+
170
+ | Runtime | CLI | Guard Mechanism | Status |
171
+ |---------|-----|-----------------|--------|
172
+ | Claude Code | `claude` | `settings.local.json` hooks | Stable |
173
+ | Pi | `pi` | `.pi/extensions/` guard extension | Active development |
162
174
 
163
175
  ## How It Works
164
176
 
165
- CLAUDE.md + hooks + the `ov` CLI turn your Claude Code session into a multi-agent orchestrator. A persistent coordinator agent manages task decomposition and dispatch, while a mechanical watchdog daemon monitors agent health in the background.
177
+ Instruction overlays + tool-call guards + the `ov` CLI turn your coding session into a multi-agent orchestrator. A persistent coordinator agent manages task decomposition and dispatch, while a mechanical watchdog daemon monitors agent health in the background.
166
178
 
167
179
  ```
168
180
  Coordinator (persistent orchestrator at project root)
@@ -190,10 +202,10 @@ Coordinator (persistent orchestrator at project root)
190
202
  - **Worktrees**: Each agent gets an isolated git worktree — no file conflicts between agents
191
203
  - **Merge**: FIFO merge queue (SQLite-backed) with 4-tier conflict resolution
192
204
  - **Watchdog**: Tiered health monitoring — Tier 0 mechanical daemon (tmux/pid liveness), Tier 1 AI-assisted failure triage, Tier 2 monitor agent for continuous fleet patrol
193
- - **Tool Enforcement**: PreToolUse hooks mechanically block file modifications for non-implementation agents and dangerous git operations for all agents
205
+ - **Tool Enforcement**: Runtime-specific guards (hooks for Claude Code, extensions for Pi) mechanically block file modifications for non-implementation agents and dangerous git operations for all agents
194
206
  - **Task Groups**: Batch coordination with auto-close when all member issues complete
195
207
  - **Session Lifecycle**: Checkpoint save/restore for compaction survivability, handoff orchestration for crash recovery
196
- - **Token Instrumentation**: Session metrics extracted from Claude Code transcript JSONL files
208
+ - **Token Instrumentation**: Session metrics extracted from runtime transcript files (JSONL)
197
209
 
198
210
  ## Project Structure
199
211
 
@@ -252,7 +264,7 @@ overstory/
252
264
  merge/ FIFO queue + conflict resolution
253
265
  watchdog/ Tiered health monitoring (daemon, triage, health)
254
266
  logging/ Multi-format logger + sanitizer + reporter + color control + shared theme/format
255
- metrics/ SQLite metrics + transcript parsing
267
+ metrics/ SQLite metrics + pricing + transcript parsing
256
268
  doctor/ Health check modules (10 checks)
257
269
  insights/ Session insight analyzer for auto-expertise
258
270
  runtimes/ AgentRuntime abstraction (registry + adapters: Claude, Pi)
package/agents/builder.md CHANGED
@@ -54,8 +54,10 @@ Your task-specific context (task ID, file scope, spec path, branch name, parent
54
54
  5. **Record mulch learnings** -- review your work for insights worth preserving (conventions discovered, patterns applied, failures encountered, decisions made) and record them with outcome data:
55
55
  ```bash
56
56
  ml record <domain> --type <convention|pattern|failure|decision> --description "..." \
57
+ --classification <foundational|tactical|observational> \
57
58
  --outcome-status success --outcome-agent $OVERSTORY_AGENT_NAME
58
59
  ```
60
+ Classification guide: use `foundational` for stable conventions confirmed across sessions, `tactical` for session-specific patterns (default), `observational` for unverified one-off findings.
59
61
  This is a required gate, not optional. Every implementation session produces learnings. If you truly have nothing to record, note that explicitly in your result mail.
60
62
  6. Send `worker_done` mail to your parent with structured payload:
61
63
  ```bash
@@ -99,6 +101,10 @@ You are an implementation specialist. Given a spec and a set of files you own, y
99
101
  ### Expertise
100
102
  - **Load context:** `ml prime [domain]` to load domain expertise before implementing
101
103
  - **Record patterns:** `ml record <domain>` to capture useful patterns you discover
104
+ - **Classify records:** Always pass `--classification` when recording:
105
+ - `foundational` — core conventions confirmed across multiple sessions (e.g., "all SQLite DBs use WAL mode")
106
+ - `tactical` — session-specific patterns useful for similar tasks (default if omitted)
107
+ - `observational` — one-off findings or unverified hypotheses worth noting
102
108
 
103
109
  ## workflow
104
110
 
@@ -145,7 +145,7 @@ Coordinator (you, depth 0)
145
145
 
146
146
  ### Expertise
147
147
  - **Load context:** `ml prime [domain]` to understand the problem space before planning
148
- - **Record insights:** `ml record <domain> --type <type> --description "<insight>"` to capture orchestration patterns, dispatch decisions, and failure learnings
148
+ - **Record insights:** `ml record <domain> --type <type> --classification <foundational|tactical|observational> --description "<insight>"` to capture orchestration patterns, dispatch decisions, and failure learnings. Use `foundational` for stable conventions, `tactical` for session-specific patterns, `observational` for unverified findings.
149
149
  - **Search knowledge:** `ml search <query>` to find relevant past decisions
150
150
 
151
151
  ## workflow
@@ -243,7 +243,7 @@ When a batch is complete (task group auto-closed, all issues resolved):
243
243
  1. Verify all issues are closed: run `{{TRACKER_CLI}} show <id>` for each issue in the group.
244
244
  2. Verify all branches are merged: check `ov status` for unmerged branches.
245
245
  3. Clean up worktrees: `ov worktree clean --completed`.
246
- 4. Record orchestration insights: `ml record <domain> --type <type> --description "<insight>"`.
246
+ 4. Record orchestration insights: `ml record <domain> --type <type> --classification <foundational|tactical|observational> --description "<insight>"`.
247
247
  5. Report to the human operator: summarize what was accomplished, what was merged, any issues encountered.
248
248
  6. Check for follow-up work: `{{TRACKER_CLI}} ready` to see if new issues surfaced during the batch.
249
249
 
package/agents/lead.md CHANGED
@@ -121,6 +121,7 @@ ov sling <task-id> \
121
121
  - **Load domain context:** `ml prime [domain]` to understand the problem space before decomposing
122
122
  - **Record patterns:** `ml record <domain>` to capture orchestration insights
123
123
  - **Record worker insights:** When worker result mails contain notable findings, record them via `ml record` if they represent reusable patterns or conventions.
124
+ - **Classify records:** Always pass `--classification` when recording. Use `foundational` for core conventions confirmed across sessions, `tactical` for session-specific patterns (default), `observational` for one-off findings.
124
125
 
125
126
  ## task-complexity-assessment
126
127
 
@@ -297,8 +298,10 @@ Good decomposition follows these principles:
297
298
  3. Run integration tests if applicable: {{QUALITY_GATE_INLINE}}.
298
299
  4. **Record mulch learnings** -- review your orchestration work for insights (decomposition strategies, worker coordination patterns, failures encountered, decisions made) and record them:
299
300
  ```bash
300
- ml record <domain> --type <convention|pattern|failure|decision> --description "..."
301
+ ml record <domain> --type <convention|pattern|failure|decision> --description "..." \
302
+ --classification <foundational|tactical|observational>
301
303
  ```
304
+ Classification guide: use `foundational` for stable conventions confirmed across sessions, `tactical` for session-specific patterns (default), `observational` for unverified one-off findings.
302
305
  This is required. Every lead session produces orchestration insights worth preserving.
303
306
  5. Run `{{TRACKER_CLI}} close <task-id> --reason "<summary of what was accomplished>"`.
304
307
  6. Send a `status` mail to the coordinator confirming all subtasks are complete.
package/agents/merger.md CHANGED
@@ -51,7 +51,8 @@ Your task-specific context (task ID, branches to merge, target branch, merge ord
51
51
  {{QUALITY_GATE_STEPS}}
52
52
  4. **Record mulch learnings** -- capture merge resolution insights (conflict patterns, resolution strategies, branch integration issues):
53
53
  ```bash
54
- ml record <domain> --type <convention|pattern|failure> --description "..."
54
+ ml record <domain> --type <convention|pattern|failure> --description "..." \
55
+ --classification <foundational|tactical|observational>
55
56
  ```
56
57
  This is required for non-trivial merges (Tier 2+). Merge resolution patterns are highly reusable knowledge for future mergers. Skip for clean Tier 1 merges with no conflicts.
57
58
  5. Send a `result` mail to your parent with: tier used, conflicts resolved (if any), test status.
@@ -92,7 +93,7 @@ You are a branch integration specialist. When workers complete their tasks on se
92
93
 
93
94
  ### Expertise
94
95
  - **Load context:** `ml prime [domain]` to understand the code being merged
95
- - **Record patterns:** `ml record <domain>` to capture merge resolution insights
96
+ - **Record patterns:** `ml record <domain> --classification <foundational|tactical|observational>` to capture merge resolution insights. Use `foundational` for stable merge conventions, `tactical` for resolution strategies, `observational` for one-off conflict patterns.
96
97
 
97
98
  ## workflow
98
99
 
package/agents/monitor.md CHANGED
@@ -72,7 +72,7 @@ You are the watchdog's brain. While Tier 0 (mechanical daemon) checks tmux/pid l
72
72
 
73
73
  ### Expertise
74
74
  - **Load context:** `ml prime [domain]` to understand project patterns
75
- - **Record insights:** `ml record <domain> --type <type> --description "<insight>"` to capture monitoring patterns, failure signatures, and recovery strategies
75
+ - **Record insights:** `ml record <domain> --type <type> --classification <foundational|tactical|observational> --description "<insight>"` to capture monitoring patterns, failure signatures, and recovery strategies. Use `foundational` for stable monitoring conventions, `tactical` for incident-specific patterns, `observational` for unverified anomaly observations.
76
76
  - **Search knowledge:** `ml search <query>` to find relevant past incidents
77
77
 
78
78
  ## workflow
@@ -91,6 +91,7 @@ You are a validation specialist. Given code to review, you check it for correctn
91
91
  ### Expertise
92
92
  - **Load conventions:** `ml prime [domain]` to understand project standards
93
93
  - **Surface insights:** Include notable findings (convention violations, code quality patterns) in your result mail so your parent has full context.
94
+ - **Classification guidance for parents:** When including notable findings in your result mail, indicate suggested classification: `foundational` (confirmed stable convention), `tactical` (task-specific pattern), or `observational` (unverified finding). This helps your parent record accurately.
94
95
 
95
96
  ## workflow
96
97
 
package/agents/scout.md CHANGED
@@ -93,6 +93,7 @@ You perform reconnaissance. Given a research question, exploration target, or an
93
93
  ### Expertise
94
94
  - **Query expertise:** `ml prime [domain]` to load relevant context
95
95
  - **Surface insights:** Include notable findings (patterns, conventions, gotchas) in your result mail so your parent has full context for spec writing.
96
+ - **Classification guidance for parents:** When including notable findings in your result mail, indicate suggested classification: `foundational` (confirmed stable convention), `tactical` (task-specific pattern), or `observational` (unverified finding). This helps your parent record accurately.
96
97
 
97
98
  ## workflow
98
99
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@os-eco/overstory-cli",
3
- "version": "0.7.3",
4
- "description": "Multi-agent orchestration for Claude Code — spawn worker agents in git worktrees via tmux, coordinate through SQLite mail, merge with tiered conflict resolution",
3
+ "version": "0.7.5",
4
+ "description": "Multi-agent orchestration for AI coding agents — spawn workers in git worktrees via tmux, coordinate through SQLite mail, merge with tiered conflict resolution. Pluggable runtime adapters for Claude Code, Pi, and more.",
5
5
  "author": "Jaymin West",
6
6
  "license": "MIT",
7
7
  "type": "module",
@@ -29,9 +29,15 @@ export interface DiscoveredAgent {
29
29
  lastActivity: string;
30
30
  }
31
31
 
32
+ /** Known instruction file paths, tried in order until one exists. */
33
+ const KNOWN_INSTRUCTION_PATHS = [
34
+ join(".claude", "CLAUDE.md"), // Claude Code, Pi
35
+ "AGENTS.md", // Codex (future)
36
+ ];
37
+
32
38
  /**
33
- * Extract file scope from an agent's overlay CLAUDE.md.
34
- * Returns empty array if overlay doesn't exist, has no file scope restrictions,
39
+ * Extract file scope from an agent's overlay instruction file.
40
+ * Returns empty array if no overlay exists, has no file scope restrictions,
35
41
  * or can't be read.
36
42
  *
37
43
  * @param worktreePath - Absolute path to the agent's worktree
@@ -39,15 +45,19 @@ export interface DiscoveredAgent {
39
45
  */
40
46
  export async function extractFileScope(worktreePath: string): Promise<string[]> {
41
47
  try {
42
- const overlayPath = join(worktreePath, ".claude", "CLAUDE.md");
43
- const overlayFile = Bun.file(overlayPath);
44
-
45
- if (!(await overlayFile.exists())) {
48
+ let content: string | null = null;
49
+ for (const relPath of KNOWN_INSTRUCTION_PATHS) {
50
+ const overlayPath = join(worktreePath, relPath);
51
+ const overlayFile = Bun.file(overlayPath);
52
+ if (await overlayFile.exists()) {
53
+ content = await overlayFile.text();
54
+ break;
55
+ }
56
+ }
57
+ if (content === null) {
46
58
  return [];
47
59
  }
48
60
 
49
- const content = await overlayFile.text();
50
-
51
61
  // Find the section between "## File Scope (exclusive ownership)" and "## Expertise"
52
62
  const startMarker = "## File Scope (exclusive ownership)";
53
63
  const endMarker = "## Expertise";
@@ -540,7 +540,9 @@ describe("startCoordinator", () => {
540
540
  expect(calls.createSession).toHaveLength(1);
541
541
  const cmd = calls.createSession[0]?.command ?? "";
542
542
  expect(cmd).toContain("--append-system-prompt");
543
- expect(cmd).toContain("# Coordinator Agent");
543
+ // File path is passed via $(cat ...) instead of inlining content (overstory#45)
544
+ expect(cmd).toContain("$(cat '");
545
+ expect(cmd).toContain("agent-defs/coordinator.md");
544
546
  });
545
547
 
546
548
  test("reads model from manifest instead of hardcoding", async () => {
@@ -363,17 +363,20 @@ async function startCoordinator(
363
363
  // Inject the coordinator base definition via --append-system-prompt so the
364
364
  // coordinator knows its role, hierarchy rules, and delegation patterns
365
365
  // (overstory-gaio, overstory-0kwf).
366
+ // Pass the file path (not content) so the shell inside the tmux pane reads
367
+ // it via $(cat ...) — avoids tmux IPC "command too long" errors with large
368
+ // agent definitions (overstory#45).
366
369
  const agentDefPath = join(projectRoot, ".overstory", "agent-defs", "coordinator.md");
367
370
  const agentDefFile = Bun.file(agentDefPath);
368
- let appendSystemPrompt: string | undefined;
371
+ let appendSystemPromptFile: string | undefined;
369
372
  if (await agentDefFile.exists()) {
370
- appendSystemPrompt = await agentDefFile.text();
373
+ appendSystemPromptFile = agentDefPath;
371
374
  }
372
375
  const spawnCmd = runtime.buildSpawnCommand({
373
376
  model: resolvedModel.model,
374
377
  permissionMode: "bypass",
375
378
  cwd: projectRoot,
376
- appendSystemPrompt,
379
+ appendSystemPromptFile,
377
380
  env: {
378
381
  ...runtime.buildEnv(resolvedModel),
379
382
  OVERSTORY_AGENT_NAME: COORDINATOR_NAME,
@@ -142,17 +142,18 @@ async function startMonitor(opts: { json: boolean; attach: boolean }): Promise<v
142
142
  }
143
143
 
144
144
  // Spawn tmux session at project root with Claude Code (interactive mode).
145
+ // Pass file path (not content) to avoid tmux "command too long" (overstory#45).
145
146
  const agentDefPath = join(projectRoot, ".overstory", "agent-defs", "monitor.md");
146
147
  const agentDefFile = Bun.file(agentDefPath);
147
- let appendSystemPrompt: string | undefined;
148
+ let appendSystemPromptFile: string | undefined;
148
149
  if (await agentDefFile.exists()) {
149
- appendSystemPrompt = await agentDefFile.text();
150
+ appendSystemPromptFile = agentDefPath;
150
151
  }
151
152
  const spawnCmd = runtime.buildSpawnCommand({
152
153
  model: resolvedModel.model,
153
154
  permissionMode: "bypass",
154
155
  cwd: projectRoot,
155
- appendSystemPrompt,
156
+ appendSystemPromptFile,
156
157
  env: {
157
158
  ...runtime.buildEnv(resolvedModel),
158
159
  OVERSTORY_AGENT_NAME: MONITOR_NAME,
@@ -366,6 +366,7 @@ recentTasks: []
366
366
  !hooks.json
367
367
  !groups.json
368
368
  !agent-defs/
369
+ !README.md
369
370
  `;
370
371
 
371
372
  test("creates .overstory/.gitignore if missing", async () => {
@@ -18,22 +18,7 @@ import { createMulchClient } from "../mulch/client.ts";
18
18
  import { openSessionStore } from "../sessions/compat.ts";
19
19
  import type { AgentIdentity, AgentManifest, SessionCheckpoint, SessionMetrics } from "../types.ts";
20
20
  import { getCurrentSessionName } from "../worktree/tmux.ts";
21
-
22
- /**
23
- * Gitignore content for .overstory/.gitignore.
24
- * TODO: Import from init.ts once it's exported (parallel branch change).
25
- * Wildcard+whitelist pattern: ignore everything except tracked config files.
26
- */
27
- const OVERSTORY_GITIGNORE = `# Wildcard+whitelist: ignore everything, whitelist tracked files
28
- # Auto-healed by ov prime on each session start
29
- *
30
- !.gitignore
31
- !config.yaml
32
- !agent-manifest.json
33
- !hooks.json
34
- !groups.json
35
- !agent-defs/
36
- `;
21
+ import { OVERSTORY_GITIGNORE } from "./init.ts";
37
22
 
38
23
  export interface PrimeOptions {
39
24
  agent?: string;
@@ -169,18 +169,19 @@ async function startSupervisor(opts: {
169
169
 
170
170
  // Spawn tmux session at project root with Claude Code (interactive mode).
171
171
  // Inject the supervisor base definition via --append-system-prompt.
172
+ // Pass file path (not content) to avoid tmux "command too long" (overstory#45).
172
173
  const tmuxSession = `overstory-${config.project.name}-supervisor-${opts.name}`;
173
174
  const agentDefPath = join(projectRoot, ".overstory", "agent-defs", "supervisor.md");
174
175
  const agentDefFile = Bun.file(agentDefPath);
175
- let appendSystemPrompt: string | undefined;
176
+ let appendSystemPromptFile: string | undefined;
176
177
  if (await agentDefFile.exists()) {
177
- appendSystemPrompt = await agentDefFile.text();
178
+ appendSystemPromptFile = agentDefPath;
178
179
  }
179
180
  const spawnCmd = runtime.buildSpawnCommand({
180
181
  model: resolvedModel.model,
181
182
  permissionMode: "bypass",
182
183
  cwd: projectRoot,
183
- appendSystemPrompt,
184
+ appendSystemPromptFile,
184
185
  env: {
185
186
  ...runtime.buildEnv(resolvedModel),
186
187
  OVERSTORY_AGENT_NAME: opts.name,
package/src/index.ts CHANGED
@@ -45,7 +45,7 @@ import { OverstoryError, WorktreeError } from "./errors.ts";
45
45
  import { jsonError } from "./json.ts";
46
46
  import { brand, chalk, muted, setQuiet } from "./logging/color.ts";
47
47
 
48
- export const VERSION = "0.7.3";
48
+ export const VERSION = "0.7.5";
49
49
 
50
50
  const rawArgs = process.argv.slice(2);
51
51
 
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Runtime-agnostic pricing and cost estimation for AI models.
3
+ *
4
+ * Extracted from transcript.ts so any runtime can use cost estimation
5
+ * without pulling in Claude Code-specific JSONL parsing logic.
6
+ *
7
+ * To add support for a new provider model, add an entry to MODEL_PRICING
8
+ * using a lowercase substring that uniquely identifies the model tier
9
+ * (e.g. "opus", "sonnet", "haiku").
10
+ */
11
+
12
+ /** Canonical token usage representation shared across all runtimes. */
13
+ export interface TokenUsage {
14
+ inputTokens: number;
15
+ outputTokens: number;
16
+ cacheReadTokens: number;
17
+ cacheCreationTokens: number;
18
+ modelUsed: string | null;
19
+ }
20
+
21
+ /** Pricing per million tokens (USD). */
22
+ export interface ModelPricing {
23
+ inputPerMTok: number;
24
+ outputPerMTok: number;
25
+ cacheReadPerMTok: number;
26
+ cacheCreationPerMTok: number;
27
+ }
28
+
29
+ /** Hardcoded pricing for known Claude models. */
30
+ const MODEL_PRICING: Record<string, ModelPricing> = {
31
+ opus: {
32
+ inputPerMTok: 15,
33
+ outputPerMTok: 75,
34
+ cacheReadPerMTok: 1.5, // 10% of input
35
+ cacheCreationPerMTok: 3.75, // 25% of input
36
+ },
37
+ sonnet: {
38
+ inputPerMTok: 3,
39
+ outputPerMTok: 15,
40
+ cacheReadPerMTok: 0.3, // 10% of input
41
+ cacheCreationPerMTok: 0.75, // 25% of input
42
+ },
43
+ haiku: {
44
+ inputPerMTok: 0.8,
45
+ outputPerMTok: 4,
46
+ cacheReadPerMTok: 0.08, // 10% of input
47
+ cacheCreationPerMTok: 0.2, // 25% of input
48
+ },
49
+ };
50
+
51
+ /**
52
+ * Determine the pricing tier for a given model string.
53
+ * Matches on substring: "opus" -> opus pricing, "sonnet" -> sonnet, "haiku" -> haiku.
54
+ * Returns null if unrecognized.
55
+ */
56
+ export function getPricingForModel(model: string): ModelPricing | null {
57
+ const lower = model.toLowerCase();
58
+ if (lower.includes("opus")) return MODEL_PRICING.opus ?? null;
59
+ if (lower.includes("sonnet")) return MODEL_PRICING.sonnet ?? null;
60
+ if (lower.includes("haiku")) return MODEL_PRICING.haiku ?? null;
61
+ return null;
62
+ }
63
+
64
+ /**
65
+ * Calculate the estimated cost in USD for a given usage and model.
66
+ * Returns null if the model is unrecognized.
67
+ */
68
+ export function estimateCost(usage: TokenUsage): number | null {
69
+ if (usage.modelUsed === null) return null;
70
+
71
+ const pricing = getPricingForModel(usage.modelUsed);
72
+ if (pricing === null) return null;
73
+
74
+ const inputCost = (usage.inputTokens / 1_000_000) * pricing.inputPerMTok;
75
+ const outputCost = (usage.outputTokens / 1_000_000) * pricing.outputPerMTok;
76
+ const cacheReadCost = (usage.cacheReadTokens / 1_000_000) * pricing.cacheReadPerMTok;
77
+ const cacheCreationCost = (usage.cacheCreationTokens / 1_000_000) * pricing.cacheCreationPerMTok;
78
+
79
+ return inputCost + outputCost + cacheReadCost + cacheCreationCost;
80
+ }
@@ -1,8 +1,13 @@
1
1
  /**
2
- * Tests for Claude Code transcript JSONL parser.
2
+ * Tests for Claude Code transcript JSONL parser and pricing.ts module.
3
3
  *
4
4
  * Uses temp files with real-format JSONL data. No mocks.
5
5
  * Philosophy: "never mock what you can use for real" (mx-252b16).
6
+ *
7
+ * Coverage:
8
+ * - parseTranscriptUsage (transcript.ts)
9
+ * - estimateCost re-export (transcript.ts -> pricing.ts)
10
+ * - getPricingForModel (pricing.ts)
6
11
  */
7
12
 
8
13
  import { afterEach, beforeEach, describe, expect, test } from "bun:test";
@@ -10,6 +15,7 @@ import { mkdtemp } from "node:fs/promises";
10
15
  import { tmpdir } from "node:os";
11
16
  import { join } from "node:path";
12
17
  import { cleanupTempDir } from "../test-helpers.ts";
18
+ import { getPricingForModel, estimateCost as pricingEstimateCost } from "./pricing.ts";
13
19
  import { estimateCost, parseTranscriptUsage } from "./transcript.ts";
14
20
 
15
21
  let tempDir: string;
@@ -354,3 +360,54 @@ describe("estimateCost", () => {
354
360
  }
355
361
  });
356
362
  });
363
+
364
+ // === getPricingForModel (pricing.ts) ===
365
+
366
+ describe("getPricingForModel", () => {
367
+ test("matches opus substring", () => {
368
+ const pricing = getPricingForModel("claude-opus-4-6");
369
+ expect(pricing).not.toBeNull();
370
+ if (pricing !== null) {
371
+ expect(pricing.inputPerMTok).toBe(15);
372
+ expect(pricing.outputPerMTok).toBe(75);
373
+ }
374
+ });
375
+
376
+ test("matches sonnet substring", () => {
377
+ const pricing = getPricingForModel("claude-sonnet-4-20250514");
378
+ expect(pricing).not.toBeNull();
379
+ if (pricing !== null) {
380
+ expect(pricing.inputPerMTok).toBe(3);
381
+ expect(pricing.outputPerMTok).toBe(15);
382
+ }
383
+ });
384
+
385
+ test("matches haiku substring", () => {
386
+ const pricing = getPricingForModel("claude-haiku-3-5-20241022");
387
+ expect(pricing).not.toBeNull();
388
+ if (pricing !== null) {
389
+ expect(pricing.inputPerMTok).toBe(0.8);
390
+ expect(pricing.outputPerMTok).toBe(4);
391
+ }
392
+ });
393
+
394
+ test("returns null for unknown model", () => {
395
+ const pricing = getPricingForModel("gpt-4o");
396
+ expect(pricing).toBeNull();
397
+ });
398
+ });
399
+
400
+ // === re-export parity ===
401
+
402
+ describe("estimateCost re-export parity", () => {
403
+ test("transcript.estimateCost and pricing.estimateCost produce same result", () => {
404
+ const usage = {
405
+ inputTokens: 1_000_000,
406
+ outputTokens: 1_000_000,
407
+ cacheReadTokens: 1_000_000,
408
+ cacheCreationTokens: 1_000_000,
409
+ modelUsed: "claude-opus-4-6",
410
+ };
411
+ expect(estimateCost(usage)).toBe(pricingEstimateCost(usage));
412
+ });
413
+ });
@@ -1,8 +1,12 @@
1
1
  /**
2
2
  * Parser for Claude Code transcript JSONL files.
3
3
  *
4
- * Extracts token usage data from assistant-type entries in transcript files
5
- * at ~/.claude/projects/{project-slug}/{session-id}.jsonl.
4
+ * This is a Claude Code-specific JSONL parser that extracts token usage data
5
+ * from assistant-type entries in transcript files at
6
+ * ~/.claude/projects/{project-slug}/{session-id}.jsonl.
7
+ *
8
+ * Runtime-agnostic pricing logic lives in ./pricing.ts. Other runtimes
9
+ * implement their own transcript parsing via AgentRuntime.parseTranscript().
6
10
  *
7
11
  * Each assistant entry contains per-turn usage:
8
12
  * {
@@ -19,74 +23,11 @@
19
23
  * }
20
24
  */
21
25
 
22
- export interface TranscriptUsage {
23
- inputTokens: number;
24
- outputTokens: number;
25
- cacheReadTokens: number;
26
- cacheCreationTokens: number;
27
- modelUsed: string | null;
28
- }
29
-
30
- /** Pricing per million tokens (USD). */
31
- interface ModelPricing {
32
- inputPerMTok: number;
33
- outputPerMTok: number;
34
- cacheReadPerMTok: number;
35
- cacheCreationPerMTok: number;
36
- }
37
-
38
- /** Hardcoded pricing for known Claude models. */
39
- const MODEL_PRICING: Record<string, ModelPricing> = {
40
- opus: {
41
- inputPerMTok: 15,
42
- outputPerMTok: 75,
43
- cacheReadPerMTok: 1.5, // 10% of input
44
- cacheCreationPerMTok: 3.75, // 25% of input
45
- },
46
- sonnet: {
47
- inputPerMTok: 3,
48
- outputPerMTok: 15,
49
- cacheReadPerMTok: 0.3, // 10% of input
50
- cacheCreationPerMTok: 0.75, // 25% of input
51
- },
52
- haiku: {
53
- inputPerMTok: 0.8,
54
- outputPerMTok: 4,
55
- cacheReadPerMTok: 0.08, // 10% of input
56
- cacheCreationPerMTok: 0.2, // 25% of input
57
- },
58
- };
59
-
60
- /**
61
- * Determine the pricing tier for a given model string.
62
- * Matches on substring: "opus" -> opus pricing, "sonnet" -> sonnet, "haiku" -> haiku.
63
- * Returns null if unrecognized.
64
- */
65
- function getPricingForModel(model: string): ModelPricing | null {
66
- const lower = model.toLowerCase();
67
- if (lower.includes("opus")) return MODEL_PRICING.opus ?? null;
68
- if (lower.includes("sonnet")) return MODEL_PRICING.sonnet ?? null;
69
- if (lower.includes("haiku")) return MODEL_PRICING.haiku ?? null;
70
- return null;
71
- }
72
-
73
- /**
74
- * Calculate the estimated cost in USD for a given usage and model.
75
- * Returns null if the model is unrecognized.
76
- */
77
- export function estimateCost(usage: TranscriptUsage): number | null {
78
- if (usage.modelUsed === null) return null;
26
+ import type { TokenUsage } from "./pricing.ts";
79
27
 
80
- const pricing = getPricingForModel(usage.modelUsed);
81
- if (pricing === null) return null;
28
+ export type TranscriptUsage = TokenUsage;
82
29
 
83
- const inputCost = (usage.inputTokens / 1_000_000) * pricing.inputPerMTok;
84
- const outputCost = (usage.outputTokens / 1_000_000) * pricing.outputPerMTok;
85
- const cacheReadCost = (usage.cacheReadTokens / 1_000_000) * pricing.cacheReadPerMTok;
86
- const cacheCreationCost = (usage.cacheCreationTokens / 1_000_000) * pricing.cacheCreationPerMTok;
87
-
88
- return inputCost + outputCost + cacheReadCost + cacheCreationCost;
89
- }
30
+ export { estimateCost } from "./pricing.ts";
90
31
 
91
32
  /**
92
33
  * Narrow an unknown value to determine if it looks like a transcript assistant entry.