@os-eco/overstory-cli 0.7.2 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -9
- package/agents/builder.md +6 -0
- package/agents/coordinator.md +2 -2
- package/agents/lead.md +4 -1
- package/agents/merger.md +3 -2
- package/agents/monitor.md +1 -1
- package/agents/reviewer.md +1 -0
- package/agents/scout.md +1 -0
- package/package.json +2 -2
- package/src/agents/hooks-deployer.test.ts +6 -5
- package/src/agents/identity.test.ts +3 -2
- package/src/agents/manifest.test.ts +4 -3
- package/src/agents/overlay.test.ts +3 -2
- package/src/commands/agents.test.ts +5 -4
- package/src/commands/agents.ts +18 -8
- package/src/commands/completions.test.ts +8 -5
- package/src/commands/completions.ts +37 -1
- package/src/commands/costs.test.ts +4 -3
- package/src/commands/dashboard.test.ts +265 -6
- package/src/commands/dashboard.ts +367 -64
- package/src/commands/doctor.test.ts +3 -2
- package/src/commands/errors.test.ts +3 -2
- package/src/commands/feed.test.ts +3 -2
- package/src/commands/feed.ts +2 -29
- package/src/commands/inspect.test.ts +3 -2
- package/src/commands/log.test.ts +248 -8
- package/src/commands/log.ts +193 -110
- package/src/commands/logs.test.ts +3 -2
- package/src/commands/mail.test.ts +3 -2
- package/src/commands/metrics.test.ts +4 -3
- package/src/commands/nudge.test.ts +3 -2
- package/src/commands/prime.test.ts +3 -2
- package/src/commands/prime.ts +1 -16
- package/src/commands/replay.test.ts +3 -2
- package/src/commands/run.test.ts +2 -1
- package/src/commands/sling.test.ts +127 -0
- package/src/commands/sling.ts +101 -3
- package/src/commands/status.test.ts +8 -8
- package/src/commands/trace.test.ts +3 -2
- package/src/commands/watch.test.ts +3 -2
- package/src/config.test.ts +3 -3
- package/src/doctor/agents.test.ts +3 -2
- package/src/doctor/logs.test.ts +3 -2
- package/src/doctor/structure.test.ts +3 -2
- package/src/index.ts +3 -1
- package/src/logging/color.ts +1 -1
- package/src/logging/format.test.ts +110 -0
- package/src/logging/format.ts +42 -1
- package/src/logging/logger.test.ts +3 -2
- package/src/mail/client.test.ts +3 -2
- package/src/mail/store.test.ts +3 -2
- package/src/merge/queue.test.ts +3 -2
- package/src/merge/resolver.test.ts +39 -0
- package/src/merge/resolver.ts +1 -1
- package/src/metrics/pricing.ts +80 -0
- package/src/metrics/transcript.test.ts +58 -1
- package/src/metrics/transcript.ts +9 -68
- package/src/mulch/client.test.ts +63 -2
- package/src/mulch/client.ts +62 -1
- package/src/runtimes/claude.test.ts +4 -3
- package/src/runtimes/pi-guards.test.ts +55 -2
- package/src/runtimes/pi-guards.ts +26 -9
- package/src/schema-consistency.test.ts +4 -2
- package/src/sessions/compat.test.ts +3 -2
- package/src/sessions/store.test.ts +3 -2
- package/src/test-helpers.ts +20 -1
- package/src/tracker/beads.test.ts +454 -0
- package/src/tracker/seeds.test.ts +461 -0
- package/src/watchdog/daemon.test.ts +4 -3
- package/src/watchdog/triage.test.ts +3 -2
|
@@ -203,6 +203,9 @@ function createMockMulchClient(
|
|
|
203
203
|
action: "analyze",
|
|
204
204
|
};
|
|
205
205
|
},
|
|
206
|
+
async appendOutcome() {
|
|
207
|
+
// No-op stub: resolver tests don't exercise outcome appending
|
|
208
|
+
},
|
|
206
209
|
};
|
|
207
210
|
}
|
|
208
211
|
|
|
@@ -1440,6 +1443,42 @@ describe("createMergeResolver", () => {
|
|
|
1440
1443
|
});
|
|
1441
1444
|
});
|
|
1442
1445
|
|
|
1446
|
+
describe("queryConflictHistory uses sortByScore", () => {
|
|
1447
|
+
test("passes sortByScore: true to mulch search when querying conflict history", async () => {
|
|
1448
|
+
const repoDir = await createTempGitRepo();
|
|
1449
|
+
try {
|
|
1450
|
+
const defaultBranch = await getDefaultBranch(repoDir);
|
|
1451
|
+
await setupContentConflict(repoDir, defaultBranch);
|
|
1452
|
+
|
|
1453
|
+
const entry = makeTestEntry({
|
|
1454
|
+
branchName: "feature-branch",
|
|
1455
|
+
filesModified: ["src/test.ts"],
|
|
1456
|
+
});
|
|
1457
|
+
|
|
1458
|
+
// Capture search call options
|
|
1459
|
+
let capturedSearchOptions: unknown;
|
|
1460
|
+
const mockMulchClient = createMockMulchClient();
|
|
1461
|
+
mockMulchClient.search = async (_query, options) => {
|
|
1462
|
+
capturedSearchOptions = options;
|
|
1463
|
+
return "";
|
|
1464
|
+
};
|
|
1465
|
+
|
|
1466
|
+
const resolver = createMergeResolver({
|
|
1467
|
+
aiResolveEnabled: false,
|
|
1468
|
+
reimagineEnabled: false,
|
|
1469
|
+
mulchClient: mockMulchClient,
|
|
1470
|
+
});
|
|
1471
|
+
|
|
1472
|
+
await resolver.resolve(entry, defaultBranch, repoDir);
|
|
1473
|
+
|
|
1474
|
+
// Verify sortByScore was passed to search
|
|
1475
|
+
expect(capturedSearchOptions).toMatchObject({ sortByScore: true });
|
|
1476
|
+
} finally {
|
|
1477
|
+
await cleanupTempDir(repoDir);
|
|
1478
|
+
}
|
|
1479
|
+
});
|
|
1480
|
+
});
|
|
1481
|
+
|
|
1443
1482
|
describe("AI-resolve with history context", () => {
|
|
1444
1483
|
test("includes historical context in AI prompt when available", async () => {
|
|
1445
1484
|
const repoDir = await createTempGitRepo();
|
package/src/merge/resolver.ts
CHANGED
|
@@ -514,7 +514,7 @@ async function queryConflictHistory(
|
|
|
514
514
|
entry: MergeEntry,
|
|
515
515
|
): Promise<ConflictHistory> {
|
|
516
516
|
try {
|
|
517
|
-
const searchOutput = await mulchClient.search("merge-conflict");
|
|
517
|
+
const searchOutput = await mulchClient.search("merge-conflict", { sortByScore: true });
|
|
518
518
|
const patterns = parseConflictPatterns(searchOutput);
|
|
519
519
|
return buildConflictHistory(patterns, entry.filesModified);
|
|
520
520
|
} catch {
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Runtime-agnostic pricing and cost estimation for AI models.
|
|
3
|
+
*
|
|
4
|
+
* Extracted from transcript.ts so any runtime can use cost estimation
|
|
5
|
+
* without pulling in Claude Code-specific JSONL parsing logic.
|
|
6
|
+
*
|
|
7
|
+
* To add support for a new provider model, add an entry to MODEL_PRICING
|
|
8
|
+
* using a lowercase substring that uniquely identifies the model tier
|
|
9
|
+
* (e.g. "opus", "sonnet", "haiku").
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
/** Canonical token usage representation shared across all runtimes. */
|
|
13
|
+
export interface TokenUsage {
|
|
14
|
+
inputTokens: number;
|
|
15
|
+
outputTokens: number;
|
|
16
|
+
cacheReadTokens: number;
|
|
17
|
+
cacheCreationTokens: number;
|
|
18
|
+
modelUsed: string | null;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/** Pricing per million tokens (USD). */
|
|
22
|
+
export interface ModelPricing {
|
|
23
|
+
inputPerMTok: number;
|
|
24
|
+
outputPerMTok: number;
|
|
25
|
+
cacheReadPerMTok: number;
|
|
26
|
+
cacheCreationPerMTok: number;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/** Hardcoded pricing for known Claude models. */
|
|
30
|
+
const MODEL_PRICING: Record<string, ModelPricing> = {
|
|
31
|
+
opus: {
|
|
32
|
+
inputPerMTok: 15,
|
|
33
|
+
outputPerMTok: 75,
|
|
34
|
+
cacheReadPerMTok: 1.5, // 10% of input
|
|
35
|
+
cacheCreationPerMTok: 3.75, // 25% of input
|
|
36
|
+
},
|
|
37
|
+
sonnet: {
|
|
38
|
+
inputPerMTok: 3,
|
|
39
|
+
outputPerMTok: 15,
|
|
40
|
+
cacheReadPerMTok: 0.3, // 10% of input
|
|
41
|
+
cacheCreationPerMTok: 0.75, // 25% of input
|
|
42
|
+
},
|
|
43
|
+
haiku: {
|
|
44
|
+
inputPerMTok: 0.8,
|
|
45
|
+
outputPerMTok: 4,
|
|
46
|
+
cacheReadPerMTok: 0.08, // 10% of input
|
|
47
|
+
cacheCreationPerMTok: 0.2, // 25% of input
|
|
48
|
+
},
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Determine the pricing tier for a given model string.
|
|
53
|
+
* Matches on substring: "opus" -> opus pricing, "sonnet" -> sonnet, "haiku" -> haiku.
|
|
54
|
+
* Returns null if unrecognized.
|
|
55
|
+
*/
|
|
56
|
+
export function getPricingForModel(model: string): ModelPricing | null {
|
|
57
|
+
const lower = model.toLowerCase();
|
|
58
|
+
if (lower.includes("opus")) return MODEL_PRICING.opus ?? null;
|
|
59
|
+
if (lower.includes("sonnet")) return MODEL_PRICING.sonnet ?? null;
|
|
60
|
+
if (lower.includes("haiku")) return MODEL_PRICING.haiku ?? null;
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Calculate the estimated cost in USD for a given usage and model.
|
|
66
|
+
* Returns null if the model is unrecognized.
|
|
67
|
+
*/
|
|
68
|
+
export function estimateCost(usage: TokenUsage): number | null {
|
|
69
|
+
if (usage.modelUsed === null) return null;
|
|
70
|
+
|
|
71
|
+
const pricing = getPricingForModel(usage.modelUsed);
|
|
72
|
+
if (pricing === null) return null;
|
|
73
|
+
|
|
74
|
+
const inputCost = (usage.inputTokens / 1_000_000) * pricing.inputPerMTok;
|
|
75
|
+
const outputCost = (usage.outputTokens / 1_000_000) * pricing.outputPerMTok;
|
|
76
|
+
const cacheReadCost = (usage.cacheReadTokens / 1_000_000) * pricing.cacheReadPerMTok;
|
|
77
|
+
const cacheCreationCost = (usage.cacheCreationTokens / 1_000_000) * pricing.cacheCreationPerMTok;
|
|
78
|
+
|
|
79
|
+
return inputCost + outputCost + cacheReadCost + cacheCreationCost;
|
|
80
|
+
}
|
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Tests for Claude Code transcript JSONL parser.
|
|
2
|
+
* Tests for Claude Code transcript JSONL parser and pricing.ts module.
|
|
3
3
|
*
|
|
4
4
|
* Uses temp files with real-format JSONL data. No mocks.
|
|
5
5
|
* Philosophy: "never mock what you can use for real" (mx-252b16).
|
|
6
|
+
*
|
|
7
|
+
* Coverage:
|
|
8
|
+
* - parseTranscriptUsage (transcript.ts)
|
|
9
|
+
* - estimateCost re-export (transcript.ts -> pricing.ts)
|
|
10
|
+
* - getPricingForModel (pricing.ts)
|
|
6
11
|
*/
|
|
7
12
|
|
|
8
13
|
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
@@ -10,6 +15,7 @@ import { mkdtemp } from "node:fs/promises";
|
|
|
10
15
|
import { tmpdir } from "node:os";
|
|
11
16
|
import { join } from "node:path";
|
|
12
17
|
import { cleanupTempDir } from "../test-helpers.ts";
|
|
18
|
+
import { getPricingForModel, estimateCost as pricingEstimateCost } from "./pricing.ts";
|
|
13
19
|
import { estimateCost, parseTranscriptUsage } from "./transcript.ts";
|
|
14
20
|
|
|
15
21
|
let tempDir: string;
|
|
@@ -354,3 +360,54 @@ describe("estimateCost", () => {
|
|
|
354
360
|
}
|
|
355
361
|
});
|
|
356
362
|
});
|
|
363
|
+
|
|
364
|
+
// === getPricingForModel (pricing.ts) ===
|
|
365
|
+
|
|
366
|
+
describe("getPricingForModel", () => {
|
|
367
|
+
test("matches opus substring", () => {
|
|
368
|
+
const pricing = getPricingForModel("claude-opus-4-6");
|
|
369
|
+
expect(pricing).not.toBeNull();
|
|
370
|
+
if (pricing !== null) {
|
|
371
|
+
expect(pricing.inputPerMTok).toBe(15);
|
|
372
|
+
expect(pricing.outputPerMTok).toBe(75);
|
|
373
|
+
}
|
|
374
|
+
});
|
|
375
|
+
|
|
376
|
+
test("matches sonnet substring", () => {
|
|
377
|
+
const pricing = getPricingForModel("claude-sonnet-4-20250514");
|
|
378
|
+
expect(pricing).not.toBeNull();
|
|
379
|
+
if (pricing !== null) {
|
|
380
|
+
expect(pricing.inputPerMTok).toBe(3);
|
|
381
|
+
expect(pricing.outputPerMTok).toBe(15);
|
|
382
|
+
}
|
|
383
|
+
});
|
|
384
|
+
|
|
385
|
+
test("matches haiku substring", () => {
|
|
386
|
+
const pricing = getPricingForModel("claude-haiku-3-5-20241022");
|
|
387
|
+
expect(pricing).not.toBeNull();
|
|
388
|
+
if (pricing !== null) {
|
|
389
|
+
expect(pricing.inputPerMTok).toBe(0.8);
|
|
390
|
+
expect(pricing.outputPerMTok).toBe(4);
|
|
391
|
+
}
|
|
392
|
+
});
|
|
393
|
+
|
|
394
|
+
test("returns null for unknown model", () => {
|
|
395
|
+
const pricing = getPricingForModel("gpt-4o");
|
|
396
|
+
expect(pricing).toBeNull();
|
|
397
|
+
});
|
|
398
|
+
});
|
|
399
|
+
|
|
400
|
+
// === re-export parity ===
|
|
401
|
+
|
|
402
|
+
describe("estimateCost re-export parity", () => {
|
|
403
|
+
test("transcript.estimateCost and pricing.estimateCost produce same result", () => {
|
|
404
|
+
const usage = {
|
|
405
|
+
inputTokens: 1_000_000,
|
|
406
|
+
outputTokens: 1_000_000,
|
|
407
|
+
cacheReadTokens: 1_000_000,
|
|
408
|
+
cacheCreationTokens: 1_000_000,
|
|
409
|
+
modelUsed: "claude-opus-4-6",
|
|
410
|
+
};
|
|
411
|
+
expect(estimateCost(usage)).toBe(pricingEstimateCost(usage));
|
|
412
|
+
});
|
|
413
|
+
});
|
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Parser for Claude Code transcript JSONL files.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
4
|
+
* This is a Claude Code-specific JSONL parser that extracts token usage data
|
|
5
|
+
* from assistant-type entries in transcript files at
|
|
6
|
+
* ~/.claude/projects/{project-slug}/{session-id}.jsonl.
|
|
7
|
+
*
|
|
8
|
+
* Runtime-agnostic pricing logic lives in ./pricing.ts. Other runtimes
|
|
9
|
+
* implement their own transcript parsing via AgentRuntime.parseTranscript().
|
|
6
10
|
*
|
|
7
11
|
* Each assistant entry contains per-turn usage:
|
|
8
12
|
* {
|
|
@@ -19,74 +23,11 @@
|
|
|
19
23
|
* }
|
|
20
24
|
*/
|
|
21
25
|
|
|
22
|
-
|
|
23
|
-
inputTokens: number;
|
|
24
|
-
outputTokens: number;
|
|
25
|
-
cacheReadTokens: number;
|
|
26
|
-
cacheCreationTokens: number;
|
|
27
|
-
modelUsed: string | null;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
/** Pricing per million tokens (USD). */
|
|
31
|
-
interface ModelPricing {
|
|
32
|
-
inputPerMTok: number;
|
|
33
|
-
outputPerMTok: number;
|
|
34
|
-
cacheReadPerMTok: number;
|
|
35
|
-
cacheCreationPerMTok: number;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
/** Hardcoded pricing for known Claude models. */
|
|
39
|
-
const MODEL_PRICING: Record<string, ModelPricing> = {
|
|
40
|
-
opus: {
|
|
41
|
-
inputPerMTok: 15,
|
|
42
|
-
outputPerMTok: 75,
|
|
43
|
-
cacheReadPerMTok: 1.5, // 10% of input
|
|
44
|
-
cacheCreationPerMTok: 3.75, // 25% of input
|
|
45
|
-
},
|
|
46
|
-
sonnet: {
|
|
47
|
-
inputPerMTok: 3,
|
|
48
|
-
outputPerMTok: 15,
|
|
49
|
-
cacheReadPerMTok: 0.3, // 10% of input
|
|
50
|
-
cacheCreationPerMTok: 0.75, // 25% of input
|
|
51
|
-
},
|
|
52
|
-
haiku: {
|
|
53
|
-
inputPerMTok: 0.8,
|
|
54
|
-
outputPerMTok: 4,
|
|
55
|
-
cacheReadPerMTok: 0.08, // 10% of input
|
|
56
|
-
cacheCreationPerMTok: 0.2, // 25% of input
|
|
57
|
-
},
|
|
58
|
-
};
|
|
59
|
-
|
|
60
|
-
/**
|
|
61
|
-
* Determine the pricing tier for a given model string.
|
|
62
|
-
* Matches on substring: "opus" -> opus pricing, "sonnet" -> sonnet, "haiku" -> haiku.
|
|
63
|
-
* Returns null if unrecognized.
|
|
64
|
-
*/
|
|
65
|
-
function getPricingForModel(model: string): ModelPricing | null {
|
|
66
|
-
const lower = model.toLowerCase();
|
|
67
|
-
if (lower.includes("opus")) return MODEL_PRICING.opus ?? null;
|
|
68
|
-
if (lower.includes("sonnet")) return MODEL_PRICING.sonnet ?? null;
|
|
69
|
-
if (lower.includes("haiku")) return MODEL_PRICING.haiku ?? null;
|
|
70
|
-
return null;
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
/**
|
|
74
|
-
* Calculate the estimated cost in USD for a given usage and model.
|
|
75
|
-
* Returns null if the model is unrecognized.
|
|
76
|
-
*/
|
|
77
|
-
export function estimateCost(usage: TranscriptUsage): number | null {
|
|
78
|
-
if (usage.modelUsed === null) return null;
|
|
26
|
+
import type { TokenUsage } from "./pricing.ts";
|
|
79
27
|
|
|
80
|
-
|
|
81
|
-
if (pricing === null) return null;
|
|
28
|
+
export type TranscriptUsage = TokenUsage;
|
|
82
29
|
|
|
83
|
-
|
|
84
|
-
const outputCost = (usage.outputTokens / 1_000_000) * pricing.outputPerMTok;
|
|
85
|
-
const cacheReadCost = (usage.cacheReadTokens / 1_000_000) * pricing.cacheReadPerMTok;
|
|
86
|
-
const cacheCreationCost = (usage.cacheCreationTokens / 1_000_000) * pricing.cacheCreationPerMTok;
|
|
87
|
-
|
|
88
|
-
return inputCost + outputCost + cacheReadCost + cacheCreationCost;
|
|
89
|
-
}
|
|
30
|
+
export { estimateCost } from "./pricing.ts";
|
|
90
31
|
|
|
91
32
|
/**
|
|
92
33
|
* Narrow an unknown value to determine if it looks like a transcript assistant entry.
|
package/src/mulch/client.test.ts
CHANGED
|
@@ -6,10 +6,11 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
9
|
-
import { mkdtemp
|
|
9
|
+
import { mkdtemp } from "node:fs/promises";
|
|
10
10
|
import { tmpdir } from "node:os";
|
|
11
11
|
import { join } from "node:path";
|
|
12
12
|
import { AgentError } from "../errors.ts";
|
|
13
|
+
import { cleanupTempDir } from "../test-helpers.ts";
|
|
13
14
|
import { createMulchClient } from "./client.ts";
|
|
14
15
|
|
|
15
16
|
// Check if mulch is available
|
|
@@ -30,7 +31,7 @@ describe("createMulchClient", () => {
|
|
|
30
31
|
});
|
|
31
32
|
|
|
32
33
|
afterEach(async () => {
|
|
33
|
-
await
|
|
34
|
+
await cleanupTempDir(tempDir);
|
|
34
35
|
});
|
|
35
36
|
|
|
36
37
|
/**
|
|
@@ -162,6 +163,33 @@ describe("createMulchClient", () => {
|
|
|
162
163
|
});
|
|
163
164
|
expect(typeof result).toBe("string");
|
|
164
165
|
});
|
|
166
|
+
|
|
167
|
+
test.skipIf(!hasMulch)("passes --sort-by-score flag in prime options", async () => {
|
|
168
|
+
await initMulch();
|
|
169
|
+
const client = createMulchClient(tempDir);
|
|
170
|
+
// mulch prime --sort-by-score may not be supported in older mulch versions;
|
|
171
|
+
// the interface and impl are forward-looking — test accepts both outcomes.
|
|
172
|
+
try {
|
|
173
|
+
const result = await client.prime([], "markdown", { sortByScore: true });
|
|
174
|
+
expect(typeof result).toBe("string");
|
|
175
|
+
} catch (error) {
|
|
176
|
+
expect(error).toBeInstanceOf(AgentError);
|
|
177
|
+
}
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
test.skipIf(!hasMulch)("passes --sort-by-score with --files together", async () => {
|
|
181
|
+
await initMulch();
|
|
182
|
+
const client = createMulchClient(tempDir);
|
|
183
|
+
try {
|
|
184
|
+
const result = await client.prime([], "markdown", {
|
|
185
|
+
files: ["src/config.ts"],
|
|
186
|
+
sortByScore: true,
|
|
187
|
+
});
|
|
188
|
+
expect(typeof result).toBe("string");
|
|
189
|
+
} catch (error) {
|
|
190
|
+
expect(error).toBeInstanceOf(AgentError);
|
|
191
|
+
}
|
|
192
|
+
});
|
|
165
193
|
});
|
|
166
194
|
|
|
167
195
|
describe("status", () => {
|
|
@@ -452,6 +480,39 @@ describe("createMulchClient", () => {
|
|
|
452
480
|
expect(typeof result).toBe("string");
|
|
453
481
|
});
|
|
454
482
|
|
|
483
|
+
test.skipIf(!hasMulch)("passes --classification flag when provided", async () => {
|
|
484
|
+
await initMulch();
|
|
485
|
+
const client = createMulchClient(tempDir);
|
|
486
|
+
const result = await client.search("test", { classification: "foundational" });
|
|
487
|
+
expect(typeof result).toBe("string");
|
|
488
|
+
});
|
|
489
|
+
|
|
490
|
+
test.skipIf(!hasMulch)("passes --outcome-status flag when provided (success)", async () => {
|
|
491
|
+
await initMulch();
|
|
492
|
+
const client = createMulchClient(tempDir);
|
|
493
|
+
const result = await client.search("test", { outcomeStatus: "success" });
|
|
494
|
+
expect(typeof result).toBe("string");
|
|
495
|
+
});
|
|
496
|
+
|
|
497
|
+
test.skipIf(!hasMulch)("passes --outcome-status flag when provided (failure)", async () => {
|
|
498
|
+
await initMulch();
|
|
499
|
+
const client = createMulchClient(tempDir);
|
|
500
|
+
const result = await client.search("test", { outcomeStatus: "failure" });
|
|
501
|
+
expect(typeof result).toBe("string");
|
|
502
|
+
});
|
|
503
|
+
|
|
504
|
+
test.skipIf(!hasMulch)("passes all search filters together", async () => {
|
|
505
|
+
await initMulch();
|
|
506
|
+
const client = createMulchClient(tempDir);
|
|
507
|
+
const result = await client.search("test", {
|
|
508
|
+
classification: "tactical",
|
|
509
|
+
outcomeStatus: "success",
|
|
510
|
+
sortByScore: true,
|
|
511
|
+
file: "src/config.ts",
|
|
512
|
+
});
|
|
513
|
+
expect(typeof result).toBe("string");
|
|
514
|
+
});
|
|
515
|
+
|
|
455
516
|
test.skipIf(!hasMulch)("roundtrip: record via API then search and find it", async () => {
|
|
456
517
|
await initMulch();
|
|
457
518
|
const addProc = Bun.spawn(["ml", "add", "roundtrip"], {
|
package/src/mulch/client.ts
CHANGED
|
@@ -28,9 +28,22 @@ export interface MulchClient {
|
|
|
28
28
|
options?: {
|
|
29
29
|
files?: string[];
|
|
30
30
|
excludeDomain?: string[];
|
|
31
|
+
sortByScore?: boolean;
|
|
31
32
|
},
|
|
32
33
|
): Promise<string>;
|
|
33
34
|
|
|
35
|
+
/** Append an outcome entry to an existing record by ID in the given domain. */
|
|
36
|
+
appendOutcome(
|
|
37
|
+
domain: string,
|
|
38
|
+
id: string,
|
|
39
|
+
outcome: {
|
|
40
|
+
status: "success" | "failure" | "partial";
|
|
41
|
+
agent?: string;
|
|
42
|
+
notes?: string;
|
|
43
|
+
duration?: number;
|
|
44
|
+
},
|
|
45
|
+
): Promise<void>;
|
|
46
|
+
|
|
34
47
|
/** Show domain statistics. */
|
|
35
48
|
status(): Promise<MulchStatus>;
|
|
36
49
|
|
|
@@ -58,7 +71,15 @@ export interface MulchClient {
|
|
|
58
71
|
query(domain?: string): Promise<string>;
|
|
59
72
|
|
|
60
73
|
/** Search records across all domains. */
|
|
61
|
-
search(
|
|
74
|
+
search(
|
|
75
|
+
query: string,
|
|
76
|
+
options?: {
|
|
77
|
+
file?: string;
|
|
78
|
+
sortByScore?: boolean;
|
|
79
|
+
classification?: string;
|
|
80
|
+
outcomeStatus?: "success" | "failure";
|
|
81
|
+
},
|
|
82
|
+
): Promise<string>;
|
|
62
83
|
|
|
63
84
|
/** Show expertise record changes since a git ref. */
|
|
64
85
|
diff(options?: { since?: string }): Promise<MulchDiffResult>;
|
|
@@ -214,6 +235,8 @@ interface MulchProgrammaticApi {
|
|
|
214
235
|
type?: string;
|
|
215
236
|
tag?: string;
|
|
216
237
|
classification?: string;
|
|
238
|
+
outcomeStatus?: "success" | "failure";
|
|
239
|
+
sortByScore?: boolean;
|
|
217
240
|
file?: string;
|
|
218
241
|
cwd?: string;
|
|
219
242
|
},
|
|
@@ -222,6 +245,22 @@ interface MulchProgrammaticApi {
|
|
|
222
245
|
domain: string,
|
|
223
246
|
options?: { type?: string; classification?: string; file?: string; cwd?: string },
|
|
224
247
|
): Promise<MulchExpertiseRecord[]>;
|
|
248
|
+
appendOutcome(
|
|
249
|
+
domain: string,
|
|
250
|
+
id: string,
|
|
251
|
+
outcome: {
|
|
252
|
+
status: "success" | "failure" | "partial";
|
|
253
|
+
agent?: string;
|
|
254
|
+
notes?: string;
|
|
255
|
+
duration?: number;
|
|
256
|
+
recorded_at?: string;
|
|
257
|
+
},
|
|
258
|
+
options?: { cwd?: string },
|
|
259
|
+
): Promise<{
|
|
260
|
+
record: MulchExpertiseRecord;
|
|
261
|
+
outcome: { status: string; agent?: string; notes?: string; recorded_at?: string };
|
|
262
|
+
total_outcomes: number;
|
|
263
|
+
}>;
|
|
225
264
|
}
|
|
226
265
|
|
|
227
266
|
const MULCH_PKG = "@os-eco/mulch-cli";
|
|
@@ -406,6 +445,9 @@ export function createMulchClient(cwd: string): MulchClient {
|
|
|
406
445
|
if (options?.excludeDomain && options.excludeDomain.length > 0) {
|
|
407
446
|
args.push("--exclude-domain", ...options.excludeDomain);
|
|
408
447
|
}
|
|
448
|
+
if (options?.sortByScore) {
|
|
449
|
+
args.push("--sort-by-score");
|
|
450
|
+
}
|
|
409
451
|
const { stdout } = await runMulch(args, "prime");
|
|
410
452
|
return stdout;
|
|
411
453
|
},
|
|
@@ -472,6 +514,9 @@ export function createMulchClient(cwd: string): MulchClient {
|
|
|
472
514
|
const api = await loadMulchApi();
|
|
473
515
|
const results = await api.searchExpertise(query, {
|
|
474
516
|
file: options?.file,
|
|
517
|
+
classification: options?.classification,
|
|
518
|
+
outcomeStatus: options?.outcomeStatus,
|
|
519
|
+
sortByScore: options?.sortByScore,
|
|
475
520
|
cwd,
|
|
476
521
|
});
|
|
477
522
|
return formatSearchResults(results);
|
|
@@ -595,5 +640,21 @@ export function createMulchClient(cwd: string): MulchClient {
|
|
|
595
640
|
throw new AgentError(`Failed to parse JSON from mulch compact: ${trimmed.slice(0, 200)}`);
|
|
596
641
|
}
|
|
597
642
|
},
|
|
643
|
+
|
|
644
|
+
async appendOutcome(domain, id, outcome) {
|
|
645
|
+
const api = await loadMulchApi();
|
|
646
|
+
try {
|
|
647
|
+
await api.appendOutcome(
|
|
648
|
+
domain,
|
|
649
|
+
id,
|
|
650
|
+
{ ...outcome, recorded_at: new Date().toISOString() },
|
|
651
|
+
{ cwd },
|
|
652
|
+
);
|
|
653
|
+
} catch (error) {
|
|
654
|
+
throw new AgentError(
|
|
655
|
+
`mulch appendOutcome ${domain}/${id} failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
656
|
+
);
|
|
657
|
+
}
|
|
658
|
+
},
|
|
598
659
|
};
|
|
599
660
|
}
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
2
|
-
import { mkdtemp
|
|
2
|
+
import { mkdtemp } from "node:fs/promises";
|
|
3
3
|
import { tmpdir } from "node:os";
|
|
4
4
|
import { join } from "node:path";
|
|
5
|
+
import { cleanupTempDir } from "../test-helpers.ts";
|
|
5
6
|
import type { ResolvedModel } from "../types.ts";
|
|
6
7
|
import { ClaudeRuntime } from "./claude.ts";
|
|
7
8
|
import type { SpawnOpts } from "./types.ts";
|
|
@@ -239,7 +240,7 @@ describe("ClaudeRuntime", () => {
|
|
|
239
240
|
});
|
|
240
241
|
|
|
241
242
|
afterEach(async () => {
|
|
242
|
-
await
|
|
243
|
+
await cleanupTempDir(tempDir);
|
|
243
244
|
});
|
|
244
245
|
|
|
245
246
|
test("writes overlay to .claude/CLAUDE.md when overlay is provided", async () => {
|
|
@@ -373,7 +374,7 @@ describe("ClaudeRuntime", () => {
|
|
|
373
374
|
});
|
|
374
375
|
|
|
375
376
|
afterEach(async () => {
|
|
376
|
-
await
|
|
377
|
+
await cleanupTempDir(tempDir);
|
|
377
378
|
});
|
|
378
379
|
|
|
379
380
|
test("returns null for non-existent file", async () => {
|
|
@@ -349,7 +349,9 @@ describe("generatePiGuardExtension", () => {
|
|
|
349
349
|
|
|
350
350
|
test("generated code contains pi.exec ov log tool-start in tool_call handler", () => {
|
|
351
351
|
const generated = generatePiGuardExtension(builderHooks());
|
|
352
|
-
expect(generated).toContain(
|
|
352
|
+
expect(generated).toContain(
|
|
353
|
+
'pi.exec("ov", ["log", "tool-start", "--agent", AGENT_NAME, "--tool-name", event.toolName])',
|
|
354
|
+
);
|
|
353
355
|
});
|
|
354
356
|
|
|
355
357
|
test('generated code contains pi.on("tool_execution_end", ...)', () => {
|
|
@@ -359,7 +361,9 @@ describe("generatePiGuardExtension", () => {
|
|
|
359
361
|
|
|
360
362
|
test("generated code contains pi.exec ov log tool-end in tool_execution_end handler", () => {
|
|
361
363
|
const generated = generatePiGuardExtension(builderHooks());
|
|
362
|
-
expect(generated).toContain(
|
|
364
|
+
expect(generated).toContain(
|
|
365
|
+
'pi.exec("ov", ["log", "tool-end", "--agent", AGENT_NAME, "--tool-name", event.toolName])',
|
|
366
|
+
);
|
|
363
367
|
});
|
|
364
368
|
|
|
365
369
|
test('generated code contains pi.on("session_shutdown", ...)', () => {
|
|
@@ -373,6 +377,55 @@ describe("generatePiGuardExtension", () => {
|
|
|
373
377
|
'await pi.exec("ov", ["log", "session-end", "--agent", AGENT_NAME])',
|
|
374
378
|
);
|
|
375
379
|
});
|
|
380
|
+
|
|
381
|
+
test("tool_call handler passes --tool-name event.toolName to tool-start", () => {
|
|
382
|
+
const generated = generatePiGuardExtension(builderHooks());
|
|
383
|
+
expect(generated).toContain(
|
|
384
|
+
'pi.exec("ov", ["log", "tool-start", "--agent", AGENT_NAME, "--tool-name", event.toolName])',
|
|
385
|
+
);
|
|
386
|
+
});
|
|
387
|
+
|
|
388
|
+
test("tool_execution_end handler passes --tool-name event.toolName to tool-end", () => {
|
|
389
|
+
const generated = generatePiGuardExtension(builderHooks());
|
|
390
|
+
expect(generated).toContain(
|
|
391
|
+
'pi.exec("ov", ["log", "tool-end", "--agent", AGENT_NAME, "--tool-name", event.toolName])',
|
|
392
|
+
);
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
test("tool_execution_end handler uses named event parameter (not _event)", () => {
|
|
396
|
+
const generated = generatePiGuardExtension(builderHooks());
|
|
397
|
+
expect(generated).toContain('pi.on("tool_execution_end", async (event) => {');
|
|
398
|
+
expect(generated).not.toContain('pi.on("tool_execution_end", async (_event) => {');
|
|
399
|
+
});
|
|
400
|
+
|
|
401
|
+
test('generated code contains pi.on("agent_end", ...)', () => {
|
|
402
|
+
const generated = generatePiGuardExtension(builderHooks());
|
|
403
|
+
expect(generated).toContain('pi.on("agent_end",');
|
|
404
|
+
});
|
|
405
|
+
|
|
406
|
+
test("generated code awaits pi.exec ov log session-end in agent_end handler", () => {
|
|
407
|
+
const generated = generatePiGuardExtension(builderHooks());
|
|
408
|
+
// agent_end handler must await (not fire-and-forget) so it completes
|
|
409
|
+
// before Pi moves on, ensuring the SessionStore is updated.
|
|
410
|
+
const agentEndIdx = generated.indexOf('pi.on("agent_end"');
|
|
411
|
+
const sessionShutdownIdx = generated.indexOf('pi.on("session_shutdown"');
|
|
412
|
+
expect(agentEndIdx).toBeGreaterThan(-1);
|
|
413
|
+
expect(sessionShutdownIdx).toBeGreaterThan(-1);
|
|
414
|
+
// agent_end must come before session_shutdown
|
|
415
|
+
expect(agentEndIdx).toBeLessThan(sessionShutdownIdx);
|
|
416
|
+
// Extract the agent_end handler body
|
|
417
|
+
const handlerBody = generated.slice(agentEndIdx, sessionShutdownIdx);
|
|
418
|
+
expect(handlerBody).toContain(
|
|
419
|
+
'await pi.exec("ov", ["log", "session-end", "--agent", AGENT_NAME])',
|
|
420
|
+
);
|
|
421
|
+
});
|
|
422
|
+
|
|
423
|
+
test("agent_end handler is present for all capabilities", () => {
|
|
424
|
+
for (const hooks of [builderHooks(), scoutHooks(), coordinatorHooks()]) {
|
|
425
|
+
const generated = generatePiGuardExtension(hooks);
|
|
426
|
+
expect(generated).toContain('pi.on("agent_end",');
|
|
427
|
+
}
|
|
428
|
+
});
|
|
376
429
|
});
|
|
377
430
|
|
|
378
431
|
describe("PiRuntime integration", () => {
|