@os-eco/overstory-cli 0.7.4 → 0.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -8
- package/package.json +1 -1
- package/src/commands/agents.ts +21 -3
- package/src/commands/completions.ts +7 -1
- package/src/commands/coordinator.test.ts +3 -1
- package/src/commands/coordinator.ts +6 -3
- package/src/commands/costs.test.ts +45 -2
- package/src/commands/costs.ts +42 -13
- package/src/commands/doctor.ts +3 -1
- package/src/commands/init.test.ts +366 -27
- package/src/commands/init.ts +194 -2
- package/src/commands/monitor.ts +4 -3
- package/src/commands/supervisor.ts +4 -3
- package/src/doctor/providers.test.ts +373 -0
- package/src/doctor/providers.ts +250 -0
- package/src/doctor/types.ts +2 -1
- package/src/e2e/init-sling-lifecycle.test.ts +12 -7
- package/src/index.ts +11 -2
- package/src/metrics/pricing.ts +57 -2
- package/src/metrics/store.test.ts +38 -0
- package/src/metrics/store.ts +10 -0
- package/src/metrics/transcript.test.ts +84 -2
- package/src/metrics/transcript.ts +1 -1
- package/src/runtimes/claude.test.ts +40 -0
- package/src/runtimes/claude.ts +8 -1
- package/src/runtimes/copilot.test.ts +507 -0
- package/src/runtimes/copilot.ts +226 -0
- package/src/runtimes/pi.test.ts +28 -0
- package/src/runtimes/pi.ts +5 -1
- package/src/runtimes/registry.test.ts +20 -0
- package/src/runtimes/registry.ts +2 -0
- package/src/runtimes/types.ts +2 -0
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
import type { OverstoryConfig, ProviderConfig } from "../types.ts";
|
|
2
|
+
import type { DoctorCheck, DoctorCheckFn } from "./types.ts";
|
|
3
|
+
|
|
4
|
+
/** Roles that rely heavily on tool-use (function calling). */
|
|
5
|
+
const TOOL_HEAVY_ROLES = new Set(["builder", "scout", "merger"]);
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Provider and multi-runtime configuration checks.
|
|
9
|
+
*
|
|
10
|
+
* Validates gateway provider reachability, auth tokens, model-provider references,
|
|
11
|
+
* and tool-use compatibility across configured runtimes.
|
|
12
|
+
*/
|
|
13
|
+
export const checkProviders: DoctorCheckFn = async (
|
|
14
|
+
config,
|
|
15
|
+
_overstoryDir,
|
|
16
|
+
): Promise<DoctorCheck[]> => {
|
|
17
|
+
const checks: DoctorCheck[] = [];
|
|
18
|
+
|
|
19
|
+
// Base check: at least one provider configured
|
|
20
|
+
checks.push(buildProvidersConfigured(config));
|
|
21
|
+
|
|
22
|
+
// Identify gateway providers
|
|
23
|
+
const gatewayEntries = Object.entries(config.providers).filter(([, p]) => p.type === "gateway");
|
|
24
|
+
|
|
25
|
+
// Check 1: provider-reachable-{name} — one per gateway provider with baseUrl
|
|
26
|
+
for (const [name, provider] of gatewayEntries) {
|
|
27
|
+
if (provider.baseUrl) {
|
|
28
|
+
checks.push(await checkProviderReachable(name, provider));
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Check 2: provider-auth-token-{name} — one per gateway provider with authTokenEnv
|
|
33
|
+
for (const [name, provider] of gatewayEntries) {
|
|
34
|
+
if (provider.authTokenEnv) {
|
|
35
|
+
checks.push(buildProviderAuthToken(name, provider));
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Check 3: tool-use-compat — one warn per tool-heavy role using a provider-prefixed model
|
|
40
|
+
checks.push(...buildToolUseCompat(config));
|
|
41
|
+
|
|
42
|
+
// Check 4: model-provider-ref(s) — one per provider-prefixed model, or single pass
|
|
43
|
+
checks.push(...buildModelProviderRefs(config));
|
|
44
|
+
|
|
45
|
+
// Check 5: gateway-api-key-reminder — only when gateway providers exist
|
|
46
|
+
if (gatewayEntries.length > 0) {
|
|
47
|
+
checks.push(buildGatewayApiKeyReminder());
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return checks;
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Base check: verifies at least one provider is configured.
|
|
55
|
+
*/
|
|
56
|
+
function buildProvidersConfigured(config: OverstoryConfig): DoctorCheck {
|
|
57
|
+
const entries = Object.entries(config.providers);
|
|
58
|
+
|
|
59
|
+
if (entries.length > 0) {
|
|
60
|
+
return {
|
|
61
|
+
name: "providers-configured",
|
|
62
|
+
category: "providers",
|
|
63
|
+
status: "pass",
|
|
64
|
+
message: `${entries.length} provider${entries.length === 1 ? "" : "s"} configured`,
|
|
65
|
+
details: entries.map(([name, p]) => `${name} (${p.type})`),
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return {
|
|
70
|
+
name: "providers-configured",
|
|
71
|
+
category: "providers",
|
|
72
|
+
status: "warn",
|
|
73
|
+
message: "No providers configured — add providers to config.yaml",
|
|
74
|
+
details: ["At least one native or gateway provider should be configured."],
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Check 1: HTTP reachability of a gateway provider's baseUrl.
|
|
80
|
+
*
|
|
81
|
+
* Uses fetch() with a 5-second timeout. Any HTTP response (any status code)
|
|
82
|
+
* counts as reachable — only network errors or timeouts produce a warn.
|
|
83
|
+
*/
|
|
84
|
+
async function checkProviderReachable(
|
|
85
|
+
name: string,
|
|
86
|
+
provider: ProviderConfig,
|
|
87
|
+
): Promise<DoctorCheck> {
|
|
88
|
+
const baseUrl = provider.baseUrl as string; // caller guards baseUrl is defined
|
|
89
|
+
|
|
90
|
+
try {
|
|
91
|
+
await fetch(baseUrl, {
|
|
92
|
+
method: "HEAD",
|
|
93
|
+
signal: AbortSignal.timeout(5000),
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
return {
|
|
97
|
+
name: `provider-reachable-${name}`,
|
|
98
|
+
category: "providers",
|
|
99
|
+
status: "pass",
|
|
100
|
+
message: `Gateway provider '${name}' is reachable`,
|
|
101
|
+
details: [baseUrl],
|
|
102
|
+
};
|
|
103
|
+
} catch (error) {
|
|
104
|
+
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
105
|
+
return {
|
|
106
|
+
name: `provider-reachable-${name}`,
|
|
107
|
+
category: "providers",
|
|
108
|
+
status: "warn",
|
|
109
|
+
message: `Gateway provider '${name}' is unreachable`,
|
|
110
|
+
details: [baseUrl, errorMsg],
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Check 2: Validate that the auth token env var for a gateway provider is set.
|
|
117
|
+
*
|
|
118
|
+
* Reports the env var NAME in details — never the value.
|
|
119
|
+
*/
|
|
120
|
+
function buildProviderAuthToken(name: string, provider: ProviderConfig): DoctorCheck {
|
|
121
|
+
const envVar = provider.authTokenEnv as string; // caller guards authTokenEnv is defined
|
|
122
|
+
const value = process.env[envVar];
|
|
123
|
+
|
|
124
|
+
if (value && value.length > 0) {
|
|
125
|
+
return {
|
|
126
|
+
name: `provider-auth-token-${name}`,
|
|
127
|
+
category: "providers",
|
|
128
|
+
status: "pass",
|
|
129
|
+
message: `Auth token for provider '${name}' is set`,
|
|
130
|
+
details: [`Env var: ${envVar}`],
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
return {
|
|
135
|
+
name: `provider-auth-token-${name}`,
|
|
136
|
+
category: "providers",
|
|
137
|
+
status: "warn",
|
|
138
|
+
message: `Auth token for provider '${name}' is missing`,
|
|
139
|
+
details: [`Env var: ${envVar}`, `Set ${envVar} to authenticate with this provider.`],
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Check 3: Tool-use compatibility for tool-heavy roles using non-Anthropic models.
|
|
145
|
+
*
|
|
146
|
+
* Tool-heavy roles (builder, scout, merger) rely on structured tool-use (function
|
|
147
|
+
* calling). Non-Anthropic models accessed via gateway providers may have different
|
|
148
|
+
* tool-use behavior. Emits one warn per affected role, or a single pass if none.
|
|
149
|
+
*/
|
|
150
|
+
function buildToolUseCompat(config: OverstoryConfig): DoctorCheck[] {
|
|
151
|
+
const checks: DoctorCheck[] = [];
|
|
152
|
+
|
|
153
|
+
for (const [role, model] of Object.entries(config.models)) {
|
|
154
|
+
if (!TOOL_HEAVY_ROLES.has(role)) continue;
|
|
155
|
+
if (model === undefined) continue;
|
|
156
|
+
if (!model.includes("/")) continue;
|
|
157
|
+
|
|
158
|
+
checks.push({
|
|
159
|
+
name: "tool-use-compat",
|
|
160
|
+
category: "providers",
|
|
161
|
+
status: "warn",
|
|
162
|
+
message: `models.${role} uses non-Anthropic model — tool-use compatibility not guaranteed`,
|
|
163
|
+
details: [
|
|
164
|
+
`Model: ${model}`,
|
|
165
|
+
"Tool use (function calling) behavior varies across providers.",
|
|
166
|
+
"Test agent behavior thoroughly before using in production.",
|
|
167
|
+
],
|
|
168
|
+
});
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
if (checks.length === 0) {
|
|
172
|
+
checks.push({
|
|
173
|
+
name: "tool-use-compat",
|
|
174
|
+
category: "providers",
|
|
175
|
+
status: "pass",
|
|
176
|
+
message: "No tool-heavy roles use non-Anthropic models",
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return checks;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Check 4: Validate that provider-prefixed model references point to configured providers.
|
|
185
|
+
*
|
|
186
|
+
* For each config.models entry containing '/' (provider-qualified), extracts the
|
|
187
|
+
* provider name and verifies it exists in config.providers. Emits one check per
|
|
188
|
+
* provider-prefixed model, or a single pass if no such models exist.
|
|
189
|
+
*/
|
|
190
|
+
function buildModelProviderRefs(config: OverstoryConfig): DoctorCheck[] {
|
|
191
|
+
const checks: DoctorCheck[] = [];
|
|
192
|
+
|
|
193
|
+
for (const [role, model] of Object.entries(config.models)) {
|
|
194
|
+
if (model === undefined) continue;
|
|
195
|
+
if (!model.includes("/")) continue;
|
|
196
|
+
|
|
197
|
+
const providerName = model.split("/")[0];
|
|
198
|
+
if (!providerName) continue;
|
|
199
|
+
|
|
200
|
+
if (config.providers[providerName]) {
|
|
201
|
+
checks.push({
|
|
202
|
+
name: "model-provider-ref",
|
|
203
|
+
category: "providers",
|
|
204
|
+
status: "pass",
|
|
205
|
+
message: `models.${role} references defined provider '${providerName}'`,
|
|
206
|
+
details: [`Model: ${model}`],
|
|
207
|
+
});
|
|
208
|
+
} else {
|
|
209
|
+
checks.push({
|
|
210
|
+
name: "model-provider-ref",
|
|
211
|
+
category: "providers",
|
|
212
|
+
status: "fail",
|
|
213
|
+
message: `models.${role} references undefined provider '${providerName}'`,
|
|
214
|
+
details: [
|
|
215
|
+
`Model: ${model}`,
|
|
216
|
+
`Provider '${providerName}' is not defined in config.yaml providers section.`,
|
|
217
|
+
`Add it: providers:\n ${providerName}:\n type: gateway\n baseUrl: https://...`,
|
|
218
|
+
],
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
if (checks.length === 0) {
|
|
224
|
+
checks.push({
|
|
225
|
+
name: "model-provider-refs",
|
|
226
|
+
category: "providers",
|
|
227
|
+
status: "pass",
|
|
228
|
+
message: "No provider-prefixed model references",
|
|
229
|
+
});
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
return checks;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Check 5: Reminder about ANTHROPIC_API_KEY when gateway providers are configured.
|
|
237
|
+
*
|
|
238
|
+
* Agents spawned via gateway routes receive ANTHROPIC_API_KEY="" so they use the
|
|
239
|
+
* gateway instead of Anthropic directly. Any direct Anthropic API calls (e.g.,
|
|
240
|
+
* from merge/resolver.ts) require a separate key.
|
|
241
|
+
*/
|
|
242
|
+
function buildGatewayApiKeyReminder(): DoctorCheck {
|
|
243
|
+
return {
|
|
244
|
+
name: "gateway-api-key-reminder",
|
|
245
|
+
category: "providers",
|
|
246
|
+
status: "warn",
|
|
247
|
+
message:
|
|
248
|
+
"Gateway providers configured — agents using gateway routes will have ANTHROPIC_API_KEY set to empty string. Direct Anthropic API calls require a separate key.",
|
|
249
|
+
};
|
|
250
|
+
}
|
package/src/doctor/types.ts
CHANGED
|
@@ -3,6 +3,7 @@ import { readdir, stat } from "node:fs/promises";
|
|
|
3
3
|
import { join } from "node:path";
|
|
4
4
|
import { createManifestLoader } from "../agents/manifest.ts";
|
|
5
5
|
import { writeOverlay } from "../agents/overlay.ts";
|
|
6
|
+
import type { Spawner } from "../commands/init.ts";
|
|
6
7
|
import { initCommand } from "../commands/init.ts";
|
|
7
8
|
import { loadConfig } from "../config.ts";
|
|
8
9
|
import { cleanupTempDir, createTempGitRepo } from "../test-helpers.ts";
|
|
@@ -15,10 +16,14 @@ import type { OverlayConfig } from "../types.ts";
|
|
|
15
16
|
* fresh temp git repo (NOT the overstory repo itself), then verifying all
|
|
16
17
|
* artifacts, loading config + manifest via real APIs, and generating an overlay.
|
|
17
18
|
*
|
|
18
|
-
* Uses real filesystem and real git repos.
|
|
19
|
+
* Uses real filesystem and real git repos.
|
|
20
|
+
* Uses a no-op spawner so ecosystem CLIs (ml/sd/cn) don't need to be installed in CI.
|
|
19
21
|
* Suppresses stdout because initCommand prints status lines.
|
|
20
22
|
*/
|
|
21
23
|
|
|
24
|
+
/** No-op spawner that treats all ecosystem tools as "not installed". */
|
|
25
|
+
const noopSpawner: Spawner = async () => ({ exitCode: 1, stdout: "", stderr: "not found" });
|
|
26
|
+
|
|
22
27
|
const EXPECTED_AGENT_DEFS = [
|
|
23
28
|
"builder.md",
|
|
24
29
|
"coordinator.md",
|
|
@@ -51,7 +56,7 @@ describe("E2E: init→sling lifecycle on external project", () => {
|
|
|
51
56
|
});
|
|
52
57
|
|
|
53
58
|
test("init creates all expected artifacts", async () => {
|
|
54
|
-
await initCommand({});
|
|
59
|
+
await initCommand({ _spawner: noopSpawner });
|
|
55
60
|
|
|
56
61
|
const overstoryDir = join(tempDir, ".overstory");
|
|
57
62
|
|
|
@@ -91,7 +96,7 @@ describe("E2E: init→sling lifecycle on external project", () => {
|
|
|
91
96
|
});
|
|
92
97
|
|
|
93
98
|
test("loadConfig returns valid config pointing to temp dir", async () => {
|
|
94
|
-
await initCommand({});
|
|
99
|
+
await initCommand({ _spawner: noopSpawner });
|
|
95
100
|
|
|
96
101
|
const config = await loadConfig(tempDir);
|
|
97
102
|
|
|
@@ -109,7 +114,7 @@ describe("E2E: init→sling lifecycle on external project", () => {
|
|
|
109
114
|
});
|
|
110
115
|
|
|
111
116
|
test("manifest loads successfully with all 7 agents (supervisor deprecated)", async () => {
|
|
112
|
-
await initCommand({});
|
|
117
|
+
await initCommand({ _spawner: noopSpawner });
|
|
113
118
|
|
|
114
119
|
const manifestPath = join(tempDir, ".overstory", "agent-manifest.json");
|
|
115
120
|
const agentDefsDir = join(tempDir, ".overstory", "agent-defs");
|
|
@@ -143,7 +148,7 @@ describe("E2E: init→sling lifecycle on external project", () => {
|
|
|
143
148
|
});
|
|
144
149
|
|
|
145
150
|
test("manifest capability index is consistent", async () => {
|
|
146
|
-
await initCommand({});
|
|
151
|
+
await initCommand({ _spawner: noopSpawner });
|
|
147
152
|
|
|
148
153
|
const manifestPath = join(tempDir, ".overstory", "agent-manifest.json");
|
|
149
154
|
const agentDefsDir = join(tempDir, ".overstory", "agent-defs");
|
|
@@ -165,7 +170,7 @@ describe("E2E: init→sling lifecycle on external project", () => {
|
|
|
165
170
|
});
|
|
166
171
|
|
|
167
172
|
test("overlay generation works for external project", async () => {
|
|
168
|
-
await initCommand({});
|
|
173
|
+
await initCommand({ _spawner: noopSpawner });
|
|
169
174
|
|
|
170
175
|
const agentDefsDir = join(tempDir, ".overstory", "agent-defs");
|
|
171
176
|
const baseDefinition = await Bun.file(join(agentDefsDir, "builder.md")).text();
|
|
@@ -213,7 +218,7 @@ describe("E2E: init→sling lifecycle on external project", () => {
|
|
|
213
218
|
// init → load config → load manifest → generate overlay
|
|
214
219
|
|
|
215
220
|
// Step 1: Init
|
|
216
|
-
await initCommand({});
|
|
221
|
+
await initCommand({ _spawner: noopSpawner });
|
|
217
222
|
|
|
218
223
|
// Step 2: Load config
|
|
219
224
|
const config = await loadConfig(tempDir);
|
package/src/index.ts
CHANGED
|
@@ -45,7 +45,7 @@ import { OverstoryError, WorktreeError } from "./errors.ts";
|
|
|
45
45
|
import { jsonError } from "./json.ts";
|
|
46
46
|
import { brand, chalk, muted, setQuiet } from "./logging/color.ts";
|
|
47
47
|
|
|
48
|
-
export const VERSION = "0.7.
|
|
48
|
+
export const VERSION = "0.7.6";
|
|
49
49
|
|
|
50
50
|
const rawArgs = process.argv.slice(2);
|
|
51
51
|
|
|
@@ -228,10 +228,19 @@ program.addCommand(createCompletionsCommand());
|
|
|
228
228
|
// Unmigrated commands — passthrough pattern
|
|
229
229
|
program
|
|
230
230
|
.command("init")
|
|
231
|
-
.description("Initialize .overstory/
|
|
231
|
+
.description("Initialize .overstory/ and bootstrap os-eco ecosystem tools")
|
|
232
232
|
.option("--force", "Reinitialize even if .overstory/ already exists")
|
|
233
233
|
.option("-y, --yes", "Accept all defaults without prompting (non-interactive mode)")
|
|
234
234
|
.option("--name <name>", "Project name (skips auto-detection)")
|
|
235
|
+
.option(
|
|
236
|
+
"--tools <list>",
|
|
237
|
+
"Comma-separated list of ecosystem tools to bootstrap (default: mulch,seeds,canopy)",
|
|
238
|
+
)
|
|
239
|
+
.option("--skip-mulch", "Skip mulch bootstrap")
|
|
240
|
+
.option("--skip-seeds", "Skip seeds bootstrap")
|
|
241
|
+
.option("--skip-canopy", "Skip canopy bootstrap")
|
|
242
|
+
.option("--skip-onboard", "Skip CLAUDE.md onboarding step for ecosystem tools")
|
|
243
|
+
.option("--json", "Output result as JSON")
|
|
235
244
|
.action(async (opts) => {
|
|
236
245
|
await initCommand(opts);
|
|
237
246
|
});
|
package/src/metrics/pricing.ts
CHANGED
|
@@ -26,8 +26,9 @@ export interface ModelPricing {
|
|
|
26
26
|
cacheCreationPerMTok: number;
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
-
/**
|
|
29
|
+
/** Pricing for known AI models across providers. */
|
|
30
30
|
const MODEL_PRICING: Record<string, ModelPricing> = {
|
|
31
|
+
// --- Claude ---
|
|
31
32
|
opus: {
|
|
32
33
|
inputPerMTok: 15,
|
|
33
34
|
outputPerMTok: 75,
|
|
@@ -46,18 +47,72 @@ const MODEL_PRICING: Record<string, ModelPricing> = {
|
|
|
46
47
|
cacheReadPerMTok: 0.08, // 10% of input
|
|
47
48
|
cacheCreationPerMTok: 0.2, // 25% of input
|
|
48
49
|
},
|
|
50
|
+
// --- OpenAI GPT ---
|
|
51
|
+
"gpt-4o-mini": {
|
|
52
|
+
inputPerMTok: 0.15,
|
|
53
|
+
outputPerMTok: 0.6,
|
|
54
|
+
cacheReadPerMTok: 0.075, // 50% of input
|
|
55
|
+
cacheCreationPerMTok: 0.15,
|
|
56
|
+
},
|
|
57
|
+
"gpt-4o": {
|
|
58
|
+
inputPerMTok: 2.5,
|
|
59
|
+
outputPerMTok: 10,
|
|
60
|
+
cacheReadPerMTok: 1.25,
|
|
61
|
+
cacheCreationPerMTok: 2.5,
|
|
62
|
+
},
|
|
63
|
+
"gpt-5": {
|
|
64
|
+
inputPerMTok: 10,
|
|
65
|
+
outputPerMTok: 40,
|
|
66
|
+
cacheReadPerMTok: 5,
|
|
67
|
+
cacheCreationPerMTok: 10,
|
|
68
|
+
},
|
|
69
|
+
o1: {
|
|
70
|
+
inputPerMTok: 15,
|
|
71
|
+
outputPerMTok: 60,
|
|
72
|
+
cacheReadPerMTok: 7.5,
|
|
73
|
+
cacheCreationPerMTok: 15,
|
|
74
|
+
},
|
|
75
|
+
o3: {
|
|
76
|
+
inputPerMTok: 10,
|
|
77
|
+
outputPerMTok: 40,
|
|
78
|
+
cacheReadPerMTok: 5,
|
|
79
|
+
cacheCreationPerMTok: 10,
|
|
80
|
+
},
|
|
81
|
+
// --- Google Gemini ---
|
|
82
|
+
"gemini-flash": {
|
|
83
|
+
inputPerMTok: 0.1,
|
|
84
|
+
outputPerMTok: 0.4,
|
|
85
|
+
cacheReadPerMTok: 0.025,
|
|
86
|
+
cacheCreationPerMTok: 0.1,
|
|
87
|
+
},
|
|
88
|
+
"gemini-pro": {
|
|
89
|
+
inputPerMTok: 1.25,
|
|
90
|
+
outputPerMTok: 5,
|
|
91
|
+
cacheReadPerMTok: 0.3125,
|
|
92
|
+
cacheCreationPerMTok: 1.25,
|
|
93
|
+
},
|
|
49
94
|
};
|
|
50
95
|
|
|
51
96
|
/**
|
|
52
97
|
* Determine the pricing tier for a given model string.
|
|
53
|
-
* Matches on substring
|
|
98
|
+
* Matches on substring in priority order to avoid ambiguous overlaps.
|
|
54
99
|
* Returns null if unrecognized.
|
|
55
100
|
*/
|
|
56
101
|
export function getPricingForModel(model: string): ModelPricing | null {
|
|
57
102
|
const lower = model.toLowerCase();
|
|
103
|
+
// --- Claude ---
|
|
58
104
|
if (lower.includes("opus")) return MODEL_PRICING.opus ?? null;
|
|
59
105
|
if (lower.includes("sonnet")) return MODEL_PRICING.sonnet ?? null;
|
|
60
106
|
if (lower.includes("haiku")) return MODEL_PRICING.haiku ?? null;
|
|
107
|
+
// --- OpenAI GPT --- (gpt-4o-mini before gpt-4o; o3 before o1)
|
|
108
|
+
if (lower.includes("gpt-4o-mini")) return MODEL_PRICING["gpt-4o-mini"] ?? null;
|
|
109
|
+
if (lower.includes("gpt-4o")) return MODEL_PRICING["gpt-4o"] ?? null;
|
|
110
|
+
if (lower.includes("gpt-5")) return MODEL_PRICING["gpt-5"] ?? null;
|
|
111
|
+
if (lower.includes("o3")) return MODEL_PRICING.o3 ?? null;
|
|
112
|
+
if (lower.includes("o1")) return MODEL_PRICING.o1 ?? null;
|
|
113
|
+
// --- Google Gemini --- (flash before generic gemini+pro check)
|
|
114
|
+
if (lower.includes("flash")) return MODEL_PRICING["gemini-flash"] ?? null;
|
|
115
|
+
if (lower.includes("gemini") && lower.includes("pro")) return MODEL_PRICING["gemini-pro"] ?? null;
|
|
61
116
|
return null;
|
|
62
117
|
}
|
|
63
118
|
|
|
@@ -224,6 +224,44 @@ describe("getSessionsByAgent", () => {
|
|
|
224
224
|
});
|
|
225
225
|
});
|
|
226
226
|
|
|
227
|
+
// === getSessionsByTask ===
|
|
228
|
+
|
|
229
|
+
describe("getSessionsByTask", () => {
|
|
230
|
+
test("returns sessions matching task_id", () => {
|
|
231
|
+
store.recordSession(makeSession({ agentName: "agent-1", taskId: "task-A" }));
|
|
232
|
+
store.recordSession(makeSession({ agentName: "agent-2", taskId: "task-A" }));
|
|
233
|
+
store.recordSession(makeSession({ agentName: "agent-3", taskId: "task-B" }));
|
|
234
|
+
|
|
235
|
+
const sessions = store.getSessionsByTask("task-A");
|
|
236
|
+
expect(sessions).toHaveLength(2);
|
|
237
|
+
expect(sessions.every((s) => s.taskId === "task-A")).toBe(true);
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
test("returns empty array for unknown task_id", () => {
|
|
241
|
+
store.recordSession(makeSession({ agentName: "agent-1", taskId: "task-A" }));
|
|
242
|
+
|
|
243
|
+
expect(store.getSessionsByTask("nonexistent")).toEqual([]);
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
test("returns sessions ordered by started_at DESC", () => {
|
|
247
|
+
store.recordSession(
|
|
248
|
+
makeSession({ agentName: "agent-1", taskId: "task-X", startedAt: "2026-01-01T10:00:00Z" }),
|
|
249
|
+
);
|
|
250
|
+
store.recordSession(
|
|
251
|
+
makeSession({ agentName: "agent-2", taskId: "task-X", startedAt: "2026-01-01T12:00:00Z" }),
|
|
252
|
+
);
|
|
253
|
+
store.recordSession(
|
|
254
|
+
makeSession({ agentName: "agent-3", taskId: "task-X", startedAt: "2026-01-01T11:00:00Z" }),
|
|
255
|
+
);
|
|
256
|
+
|
|
257
|
+
const sessions = store.getSessionsByTask("task-X");
|
|
258
|
+
expect(sessions).toHaveLength(3);
|
|
259
|
+
expect(sessions[0]?.startedAt).toBe("2026-01-01T12:00:00Z"); // most recent first
|
|
260
|
+
expect(sessions[1]?.startedAt).toBe("2026-01-01T11:00:00Z");
|
|
261
|
+
expect(sessions[2]?.startedAt).toBe("2026-01-01T10:00:00Z");
|
|
262
|
+
});
|
|
263
|
+
});
|
|
264
|
+
|
|
227
265
|
// === getAverageDuration ===
|
|
228
266
|
|
|
229
267
|
describe("getAverageDuration", () => {
|
package/src/metrics/store.ts
CHANGED
|
@@ -13,6 +13,7 @@ export interface MetricsStore {
|
|
|
13
13
|
getRecentSessions(limit?: number): SessionMetrics[];
|
|
14
14
|
getSessionsByAgent(agentName: string): SessionMetrics[];
|
|
15
15
|
getSessionsByRun(runId: string): SessionMetrics[];
|
|
16
|
+
getSessionsByTask(taskId: string): SessionMetrics[];
|
|
16
17
|
getAverageDuration(capability?: string): number;
|
|
17
18
|
/** Count the total number of sessions in the database (no limit cap). */
|
|
18
19
|
countSessions(): number;
|
|
@@ -250,6 +251,10 @@ export function createMetricsStore(dbPath: string): MetricsStore {
|
|
|
250
251
|
SELECT * FROM sessions WHERE run_id = $run_id ORDER BY started_at DESC
|
|
251
252
|
`);
|
|
252
253
|
|
|
254
|
+
const byTaskStmt = db.prepare<SessionRow, { $task_id: string }>(`
|
|
255
|
+
SELECT * FROM sessions WHERE task_id = $task_id ORDER BY started_at DESC
|
|
256
|
+
`);
|
|
257
|
+
|
|
253
258
|
const avgDurationAllStmt = db.prepare<{ avg_duration: number | null }, Record<string, never>>(`
|
|
254
259
|
SELECT AVG(duration_ms) AS avg_duration FROM sessions WHERE completed_at IS NOT NULL
|
|
255
260
|
`);
|
|
@@ -342,6 +347,11 @@ export function createMetricsStore(dbPath: string): MetricsStore {
|
|
|
342
347
|
return rows.map(rowToMetrics);
|
|
343
348
|
},
|
|
344
349
|
|
|
350
|
+
getSessionsByTask(taskId: string): SessionMetrics[] {
|
|
351
|
+
const rows = byTaskStmt.all({ $task_id: taskId });
|
|
352
|
+
return rows.map(rowToMetrics);
|
|
353
|
+
},
|
|
354
|
+
|
|
345
355
|
getAverageDuration(capability?: string): number {
|
|
346
356
|
if (capability !== undefined) {
|
|
347
357
|
const row = avgDurationByCapStmt.get({ $capability: capability });
|
|
@@ -311,12 +311,38 @@ describe("estimateCost", () => {
|
|
|
311
311
|
outputTokens: 1_000_000,
|
|
312
312
|
cacheReadTokens: 0,
|
|
313
313
|
cacheCreationTokens: 0,
|
|
314
|
-
modelUsed: "
|
|
314
|
+
modelUsed: "unknown-model-xyz",
|
|
315
315
|
});
|
|
316
316
|
|
|
317
317
|
expect(cost).toBeNull();
|
|
318
318
|
});
|
|
319
319
|
|
|
320
|
+
test("calculates cost for gpt-4o", () => {
|
|
321
|
+
const cost = estimateCost({
|
|
322
|
+
inputTokens: 1_000_000,
|
|
323
|
+
outputTokens: 1_000_000,
|
|
324
|
+
cacheReadTokens: 1_000_000,
|
|
325
|
+
cacheCreationTokens: 1_000_000,
|
|
326
|
+
modelUsed: "gpt-4o",
|
|
327
|
+
});
|
|
328
|
+
|
|
329
|
+
// gpt-4o: input=2.5, output=10, cacheRead=1.25, cacheCreation=2.5 => total=16.25
|
|
330
|
+
expect(cost).toBeCloseTo(16.25, 2);
|
|
331
|
+
});
|
|
332
|
+
|
|
333
|
+
test("calculates cost for gemini flash", () => {
|
|
334
|
+
const cost = estimateCost({
|
|
335
|
+
inputTokens: 1_000_000,
|
|
336
|
+
outputTokens: 1_000_000,
|
|
337
|
+
cacheReadTokens: 1_000_000,
|
|
338
|
+
cacheCreationTokens: 1_000_000,
|
|
339
|
+
modelUsed: "gemini-2.5-flash",
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
// gemini-flash: input=0.1, output=0.4, cacheRead=0.025, cacheCreation=0.1 => total=0.625
|
|
343
|
+
expect(cost).toBeCloseTo(0.625, 3);
|
|
344
|
+
});
|
|
345
|
+
|
|
320
346
|
test("returns null when modelUsed is null", () => {
|
|
321
347
|
const cost = estimateCost({
|
|
322
348
|
inputTokens: 1_000_000,
|
|
@@ -392,9 +418,65 @@ describe("getPricingForModel", () => {
|
|
|
392
418
|
});
|
|
393
419
|
|
|
394
420
|
test("returns null for unknown model", () => {
|
|
395
|
-
const pricing = getPricingForModel("
|
|
421
|
+
const pricing = getPricingForModel("unknown-model-xyz");
|
|
396
422
|
expect(pricing).toBeNull();
|
|
397
423
|
});
|
|
424
|
+
|
|
425
|
+
test("matches gpt-4o", () => {
|
|
426
|
+
const pricing = getPricingForModel("gpt-4o");
|
|
427
|
+
expect(pricing).not.toBeNull();
|
|
428
|
+
if (pricing !== null) {
|
|
429
|
+
expect(pricing.inputPerMTok).toBe(2.5);
|
|
430
|
+
}
|
|
431
|
+
});
|
|
432
|
+
|
|
433
|
+
test("matches gpt-4o-mini", () => {
|
|
434
|
+
const pricing = getPricingForModel("gpt-4o-mini");
|
|
435
|
+
expect(pricing).not.toBeNull();
|
|
436
|
+
if (pricing !== null) {
|
|
437
|
+
expect(pricing.inputPerMTok).toBe(0.15);
|
|
438
|
+
}
|
|
439
|
+
});
|
|
440
|
+
|
|
441
|
+
test("matches gpt-5", () => {
|
|
442
|
+
const pricing = getPricingForModel("gpt-5");
|
|
443
|
+
expect(pricing).not.toBeNull();
|
|
444
|
+
if (pricing !== null) {
|
|
445
|
+
expect(pricing.inputPerMTok).toBe(10);
|
|
446
|
+
}
|
|
447
|
+
});
|
|
448
|
+
|
|
449
|
+
test("matches o1", () => {
|
|
450
|
+
const pricing = getPricingForModel("o1");
|
|
451
|
+
expect(pricing).not.toBeNull();
|
|
452
|
+
if (pricing !== null) {
|
|
453
|
+
expect(pricing.inputPerMTok).toBe(15);
|
|
454
|
+
}
|
|
455
|
+
});
|
|
456
|
+
|
|
457
|
+
test("matches o3", () => {
|
|
458
|
+
const pricing = getPricingForModel("o3");
|
|
459
|
+
expect(pricing).not.toBeNull();
|
|
460
|
+
if (pricing !== null) {
|
|
461
|
+
expect(pricing.inputPerMTok).toBe(10);
|
|
462
|
+
}
|
|
463
|
+
});
|
|
464
|
+
|
|
465
|
+
test("matches gemini flash", () => {
|
|
466
|
+
const pricing = getPricingForModel("gemini-2.5-flash");
|
|
467
|
+
expect(pricing).not.toBeNull();
|
|
468
|
+
if (pricing !== null) {
|
|
469
|
+
expect(pricing.inputPerMTok).toBe(0.1);
|
|
470
|
+
}
|
|
471
|
+
});
|
|
472
|
+
|
|
473
|
+
test("matches gemini pro", () => {
|
|
474
|
+
const pricing = getPricingForModel("gemini-2.5-pro");
|
|
475
|
+
expect(pricing).not.toBeNull();
|
|
476
|
+
if (pricing !== null) {
|
|
477
|
+
expect(pricing.inputPerMTok).toBe(1.25);
|
|
478
|
+
}
|
|
479
|
+
});
|
|
398
480
|
});
|
|
399
481
|
|
|
400
482
|
// === re-export parity ===
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*
|
|
4
4
|
* This is a Claude Code-specific JSONL parser that extracts token usage data
|
|
5
5
|
* from assistant-type entries in transcript files at
|
|
6
|
-
* ~/.claude/projects/
|
|
6
|
+
* the runtime-specific transcript directory (e.g. ~/.claude/projects/ for Claude Code).
|
|
7
7
|
*
|
|
8
8
|
* Runtime-agnostic pricing logic lives in ./pricing.ts. Other runtimes
|
|
9
9
|
* implement their own transcript parsing via AgentRuntime.parseTranscript().
|
|
@@ -73,6 +73,46 @@ describe("ClaudeRuntime", () => {
|
|
|
73
73
|
);
|
|
74
74
|
});
|
|
75
75
|
|
|
76
|
+
test("with appendSystemPromptFile uses $(cat ...) expansion", () => {
|
|
77
|
+
const opts: SpawnOpts = {
|
|
78
|
+
model: "opus",
|
|
79
|
+
permissionMode: "bypass",
|
|
80
|
+
cwd: "/project",
|
|
81
|
+
env: {},
|
|
82
|
+
appendSystemPromptFile: "/project/.overstory/agent-defs/coordinator.md",
|
|
83
|
+
};
|
|
84
|
+
const cmd = runtime.buildSpawnCommand(opts);
|
|
85
|
+
expect(cmd).toBe(
|
|
86
|
+
`claude --model opus --permission-mode bypassPermissions --append-system-prompt "$(cat '/project/.overstory/agent-defs/coordinator.md')"`,
|
|
87
|
+
);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
test("appendSystemPromptFile with single quotes in path", () => {
|
|
91
|
+
const opts: SpawnOpts = {
|
|
92
|
+
model: "opus",
|
|
93
|
+
permissionMode: "bypass",
|
|
94
|
+
cwd: "/project",
|
|
95
|
+
env: {},
|
|
96
|
+
appendSystemPromptFile: "/project/it's a path/agent.md",
|
|
97
|
+
};
|
|
98
|
+
const cmd = runtime.buildSpawnCommand(opts);
|
|
99
|
+
expect(cmd).toContain("$(cat '/project/it'\\''s a path/agent.md')");
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
test("appendSystemPromptFile takes precedence over appendSystemPrompt", () => {
|
|
103
|
+
const opts: SpawnOpts = {
|
|
104
|
+
model: "opus",
|
|
105
|
+
permissionMode: "bypass",
|
|
106
|
+
cwd: "/project",
|
|
107
|
+
env: {},
|
|
108
|
+
appendSystemPromptFile: "/project/.overstory/agent-defs/coordinator.md",
|
|
109
|
+
appendSystemPrompt: "This inline content should be ignored",
|
|
110
|
+
};
|
|
111
|
+
const cmd = runtime.buildSpawnCommand(opts);
|
|
112
|
+
expect(cmd).toContain("$(cat ");
|
|
113
|
+
expect(cmd).not.toContain("This inline content should be ignored");
|
|
114
|
+
});
|
|
115
|
+
|
|
76
116
|
test("without appendSystemPrompt omits the flag", () => {
|
|
77
117
|
const opts: SpawnOpts = {
|
|
78
118
|
model: "haiku",
|