@slowdini/slow-powers-opencode 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -72
- package/bootstrap.md +1 -7
- package/opencode/plugins/slow-powers.js +69 -5
- package/package.json +14 -17
- package/skills/evaluating-skills/SKILL.md +90 -338
- package/skills/evaluating-skills/evals/baseline/BASELINE.md +23 -0
- package/skills/evaluating-skills/evals/baseline/NOTES.md +40 -0
- package/skills/evaluating-skills/evals/baseline/benchmark.json +54 -0
- package/skills/evaluating-skills/evals/baseline/grading/deterministic-edit-skip__new_skill.json +39 -0
- package/skills/evaluating-skills/evals/baseline/grading/deterministic-edit-skip__old_skill.json +39 -0
- package/skills/evaluating-skills/evals/baseline/grading/did-my-revision-help__new_skill.json +39 -0
- package/skills/evaluating-skills/evals/baseline/grading/did-my-revision-help__old_skill.json +39 -0
- package/skills/evaluating-skills/evals/baseline/grading/is-new-skill-ready-to-ship__new_skill.json +32 -0
- package/skills/evaluating-skills/evals/baseline/grading/is-new-skill-ready-to-ship__old_skill.json +32 -0
- package/skills/hardening-plans/SKILL.md +29 -7
- package/skills/hardening-plans/evals/baseline/BASELINE.md +11 -6
- package/skills/hardening-plans/evals/baseline/NOTES.md +72 -58
- package/skills/hardening-plans/evals/baseline/benchmark.json +25 -25
- package/skills/hardening-plans/evals/baseline/grading/concrete-todo-app-plan__new_skill.json +2 -2
- package/skills/hardening-plans/evals/baseline/grading/concrete-todo-app-plan__old_skill.json +2 -2
- package/skills/hardening-plans/evals/baseline/grading/docs-refactor-plan-mode__new_skill.json +39 -0
- package/skills/hardening-plans/evals/baseline/grading/docs-refactor-plan-mode__old_skill.json +39 -0
- package/skills/hardening-plans/evals/baseline/grading/oauth-task-breakdown-cold__new_skill.json +39 -0
- package/skills/hardening-plans/evals/baseline/grading/oauth-task-breakdown-cold__old_skill.json +39 -0
- package/skills/hardening-plans/evals/baseline/grading/research-plan-no-required-skill__new_skill.json +32 -0
- package/skills/hardening-plans/evals/baseline/grading/research-plan-no-required-skill__old_skill.json +32 -0
- package/skills/hardening-plans/evals/baseline/grading/seeded-plan-mode-todo-app-adversarial__new_skill.json +39 -0
- package/skills/hardening-plans/evals/baseline/grading/seeded-plan-mode-todo-app-adversarial__old_skill.json +39 -0
- package/skills/hardening-plans/evals/baseline/grading/seeded-plan-mode-todo-app__new_skill.json +39 -0
- package/skills/hardening-plans/evals/baseline/grading/seeded-plan-mode-todo-app__old_skill.json +39 -0
- package/skills/hardening-plans/evals/baseline/grading/seeded-review-catches-defects__new_skill.json +3 -3
- package/skills/hardening-plans/evals/baseline/grading/seeded-review-catches-defects__old_skill.json +8 -8
- package/skills/hardening-plans/evals/baseline/grading/structural-refactor-cold__new_skill.json +39 -0
- package/skills/hardening-plans/evals/baseline/grading/structural-refactor-cold__old_skill.json +39 -0
- package/skills/hardening-plans/evals/evals.json +46 -0
- package/skills/test-driven-development/evals/baseline/NOTES.md +2 -2
- package/skills/evaluating-skills/examples/verifying-development-work-evals.json +0 -30
- package/skills/evaluating-skills/harness-details/claude.md +0 -194
- package/skills/evaluating-skills/harness-parity.md +0 -155
- package/skills/evaluating-skills/runner/README.md +0 -163
- package/skills/evaluating-skills/runner/adapters/claude-code-session.test.ts +0 -56
- package/skills/evaluating-skills/runner/adapters/claude-code-session.ts +0 -43
- package/skills/evaluating-skills/runner/adapters/claude-code-transcript.test.ts +0 -485
- package/skills/evaluating-skills/runner/adapters/claude-code-transcript.ts +0 -242
- package/skills/evaluating-skills/runner/aggregate.test.ts +0 -484
- package/skills/evaluating-skills/runner/aggregate.ts +0 -269
- package/skills/evaluating-skills/runner/context.test.ts +0 -181
- package/skills/evaluating-skills/runner/context.ts +0 -90
- package/skills/evaluating-skills/runner/detect-stray-writes.test.ts +0 -396
- package/skills/evaluating-skills/runner/detect-stray-writes.ts +0 -288
- package/skills/evaluating-skills/runner/fill-transcripts.test.ts +0 -73
- package/skills/evaluating-skills/runner/fill-transcripts.ts +0 -154
- package/skills/evaluating-skills/runner/grade.test.ts +0 -347
- package/skills/evaluating-skills/runner/grade.ts +0 -603
- package/skills/evaluating-skills/runner/guard/guard.ts +0 -49
- package/skills/evaluating-skills/runner/guard/install.test.ts +0 -92
- package/skills/evaluating-skills/runner/guard/install.ts +0 -147
- package/skills/evaluating-skills/runner/guard/policy.test.ts +0 -128
- package/skills/evaluating-skills/runner/guard/policy.ts +0 -74
- package/skills/evaluating-skills/runner/plugin-shadow.test.ts +0 -228
- package/skills/evaluating-skills/runner/plugin-shadow.ts +0 -201
- package/skills/evaluating-skills/runner/profiles/claude-code/plan-mode.md +0 -11
- package/skills/evaluating-skills/runner/promote-baseline.test.ts +0 -281
- package/skills/evaluating-skills/runner/promote-baseline.ts +0 -204
- package/skills/evaluating-skills/runner/record-runs.test.ts +0 -314
- package/skills/evaluating-skills/runner/record-runs.ts +0 -209
- package/skills/evaluating-skills/runner/run.test.ts +0 -1703
- package/skills/evaluating-skills/runner/run.ts +0 -1388
- package/skills/evaluating-skills/runner/sandbox-policy.ts +0 -94
- package/skills/evaluating-skills/runner/types.ts +0 -121
- package/skills/evaluating-skills/runner/validate-all.ts +0 -54
- package/skills/evaluating-skills/runner/validate-schema.test.ts +0 -99
- package/skills/evaluating-skills/runner/validate-schema.ts +0 -51
- package/skills/evaluating-skills/runner/validate.test.ts +0 -56
- package/skills/evaluating-skills/runner/validate.ts +0 -21
- package/skills/evaluating-skills/runner/workspace-teardown.test.ts +0 -227
- package/skills/evaluating-skills/runner/workspace-teardown.ts +0 -136
- package/skills/evaluating-skills/schema/evals.schema.json +0 -105
- package/skills/evaluating-skills/schema/grading.schema.json +0 -84
- package/skills/evaluating-skills/schema/run-record.schema.json +0 -80
- package/skills/evaluating-skills/schema/stray-writes.schema.json +0 -80
- package/skills/evaluating-skills/templates/eval-task-prompt.md +0 -69
- package/skills/evaluating-skills/templates/evals.json.example +0 -17
- package/skills/evaluating-skills/templates/judge-prompt.md +0 -56
- package/skills/evaluating-skills/templates/revise-skill-prompt.md +0 -56
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
import { describe, expect, test } from "bun:test";
|
|
2
|
-
import type { AvailableSkill } from "../types";
|
|
3
|
-
import {
|
|
4
|
-
renderAvailableSkillsBlock,
|
|
5
|
-
renderPlanModeContext,
|
|
6
|
-
} from "./claude-code-session";
|
|
7
|
-
|
|
8
|
-
const skill = (name: string, description: string): AvailableSkill => ({
|
|
9
|
-
name,
|
|
10
|
-
path: `/x/${name}/SKILL.md`,
|
|
11
|
-
description,
|
|
12
|
-
});
|
|
13
|
-
|
|
14
|
-
describe("renderAvailableSkillsBlock", () => {
|
|
15
|
-
test("uses the harness-native header and one `- name: description` bullet per skill", () => {
|
|
16
|
-
const block = renderAvailableSkillsBlock([skill("foo", "the foo skill")]);
|
|
17
|
-
expect(block).toContain(
|
|
18
|
-
"The following skills are available for use with the Skill tool:",
|
|
19
|
-
);
|
|
20
|
-
expect(block).toContain("- foo: the foo skill");
|
|
21
|
-
// The eval-flavored wording and custom format must be gone.
|
|
22
|
-
expect(block).not.toContain("staged and discoverable");
|
|
23
|
-
expect(block).not.toContain("*Trigger:*");
|
|
24
|
-
});
|
|
25
|
-
|
|
26
|
-
test("sorts skills by name", () => {
|
|
27
|
-
const block = renderAvailableSkillsBlock([
|
|
28
|
-
skill("zebra", "z"),
|
|
29
|
-
skill("alpha", "a"),
|
|
30
|
-
]);
|
|
31
|
-
expect(block.indexOf("- alpha:")).toBeLessThan(block.indexOf("- zebra:"));
|
|
32
|
-
});
|
|
33
|
-
|
|
34
|
-
test("returns an empty string for an empty list", () => {
|
|
35
|
-
expect(renderAvailableSkillsBlock([])).toBe("");
|
|
36
|
-
});
|
|
37
|
-
});
|
|
38
|
-
|
|
39
|
-
describe("renderPlanModeContext", () => {
|
|
40
|
-
test("wraps the profile text in a harness-native system-reminder block", () => {
|
|
41
|
-
const block = renderPlanModeContext("Plan mode is active. Do not edit.");
|
|
42
|
-
expect(block).toContain("<system-reminder>");
|
|
43
|
-
expect(block).toContain("</system-reminder>");
|
|
44
|
-
expect(block).toContain("Plan mode is active. Do not edit.");
|
|
45
|
-
});
|
|
46
|
-
|
|
47
|
-
test("trims surrounding whitespace from the profile text", () => {
|
|
48
|
-
const block = renderPlanModeContext("\n\n PROFILE-BODY \n\n");
|
|
49
|
-
expect(block).toBe("<system-reminder>\nPROFILE-BODY\n</system-reminder>");
|
|
50
|
-
});
|
|
51
|
-
|
|
52
|
-
test("returns an empty string for empty or whitespace-only input", () => {
|
|
53
|
-
expect(renderPlanModeContext("")).toBe("");
|
|
54
|
-
expect(renderPlanModeContext(" \n ")).toBe("");
|
|
55
|
-
});
|
|
56
|
-
});
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
// Claude Code-specific rendering of session-start context.
|
|
2
|
-
//
|
|
3
|
-
// The available-skills reminder is a *harness-specific* surface: Claude Code
|
|
4
|
-
// presents discoverable skills to an agent as "The following skills are
|
|
5
|
-
// available for use with the Skill tool:" followed by `- name: description`
|
|
6
|
-
// bullets. Other harnesses (Codex, OpenCode) surface their skills differently,
|
|
7
|
-
// so this rendering lives in an adapter rather than inline in the harness-
|
|
8
|
-
// agnostic orchestrator. A new harness adds its own renderer alongside this one
|
|
9
|
-
// (see ../../harness-parity.md).
|
|
10
|
-
|
|
11
|
-
import type { AvailableSkill } from "../types";
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* Render the list of discoverable skills the way a real Claude Code session
|
|
15
|
-
* surfaces them, so an eval dispatch mirrors a genuine session rather than
|
|
16
|
-
* announcing itself as an eval. Returns an empty string when no skills are
|
|
17
|
-
* staged (the caller omits the block entirely in that case).
|
|
18
|
-
*/
|
|
19
|
-
export function renderAvailableSkillsBlock(skills: AvailableSkill[]): string {
|
|
20
|
-
if (skills.length === 0) return "";
|
|
21
|
-
const sorted = [...skills].sort((a, b) => a.name.localeCompare(b.name));
|
|
22
|
-
const lines = sorted.map((s) => `- ${s.name}: ${s.description}`);
|
|
23
|
-
return [
|
|
24
|
-
"The following skills are available for use with the Skill tool:",
|
|
25
|
-
"",
|
|
26
|
-
...lines,
|
|
27
|
-
].join("\n");
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
/**
|
|
31
|
-
* Render a plan-mode profile the way Claude Code injects an operating mode into
|
|
32
|
-
* a live session: as a `<system-reminder>` block the agent is told it is
|
|
33
|
-
* operating under, not prose it merely reads. The profile text (the verbatim
|
|
34
|
-
* plan-mode procedure) lives in `../profiles/claude-code/plan-mode.md`; this
|
|
35
|
-
* adapter owns only the harness-native framing, so a new harness adds its own
|
|
36
|
-
* renderer + profile alongside this one (see ../../harness-parity.md). Returns
|
|
37
|
-
* an empty string for empty input so the caller can omit the section entirely.
|
|
38
|
-
*/
|
|
39
|
-
export function renderPlanModeContext(profileText: string): string {
|
|
40
|
-
const trimmed = profileText.trim();
|
|
41
|
-
if (!trimmed) return "";
|
|
42
|
-
return ["<system-reminder>", trimmed, "</system-reminder>"].join("\n");
|
|
43
|
-
}
|
|
@@ -1,485 +0,0 @@
|
|
|
1
|
-
import { afterAll, beforeAll, describe, expect, test } from "bun:test";
|
|
2
|
-
import { mkdirSync, rmSync, utimesSync, writeFileSync } from "node:fs";
|
|
3
|
-
import { tmpdir } from "node:os";
|
|
4
|
-
import { join } from "node:path";
|
|
5
|
-
import {
|
|
6
|
-
findByDescription,
|
|
7
|
-
listSubagents,
|
|
8
|
-
parseTranscript,
|
|
9
|
-
parseTranscriptFull,
|
|
10
|
-
} from "./claude-code-transcript";
|
|
11
|
-
|
|
12
|
-
const FIXTURE_ROOT = join(tmpdir(), `claude-code-adapter-test-${process.pid}`);
|
|
13
|
-
|
|
14
|
-
function jsonl(lines: object[]): string {
|
|
15
|
-
return `${lines.map((l) => JSON.stringify(l)).join("\n")}\n`;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
beforeAll(() => {
|
|
19
|
-
mkdirSync(FIXTURE_ROOT, { recursive: true });
|
|
20
|
-
});
|
|
21
|
-
|
|
22
|
-
afterAll(() => {
|
|
23
|
-
rmSync(FIXTURE_ROOT, { recursive: true, force: true });
|
|
24
|
-
});
|
|
25
|
-
|
|
26
|
-
describe("parseTranscript", () => {
|
|
27
|
-
test("extracts tool_use blocks from assistant messages with ordinal and args", () => {
|
|
28
|
-
const path = join(FIXTURE_ROOT, "simple.jsonl");
|
|
29
|
-
writeFileSync(
|
|
30
|
-
path,
|
|
31
|
-
jsonl([
|
|
32
|
-
{
|
|
33
|
-
type: "user",
|
|
34
|
-
message: { role: "user", content: "Run the tests" },
|
|
35
|
-
},
|
|
36
|
-
{
|
|
37
|
-
type: "assistant",
|
|
38
|
-
message: {
|
|
39
|
-
role: "assistant",
|
|
40
|
-
content: [
|
|
41
|
-
{ type: "text", text: "Running tests now." },
|
|
42
|
-
{
|
|
43
|
-
type: "tool_use",
|
|
44
|
-
id: "toolu_001",
|
|
45
|
-
name: "Bash",
|
|
46
|
-
input: { command: "bun test" },
|
|
47
|
-
},
|
|
48
|
-
],
|
|
49
|
-
},
|
|
50
|
-
},
|
|
51
|
-
{
|
|
52
|
-
type: "user",
|
|
53
|
-
message: {
|
|
54
|
-
role: "user",
|
|
55
|
-
content: [
|
|
56
|
-
{
|
|
57
|
-
type: "tool_result",
|
|
58
|
-
tool_use_id: "toolu_001",
|
|
59
|
-
content: "2 pass\n0 fail",
|
|
60
|
-
},
|
|
61
|
-
],
|
|
62
|
-
},
|
|
63
|
-
},
|
|
64
|
-
{
|
|
65
|
-
type: "assistant",
|
|
66
|
-
message: {
|
|
67
|
-
role: "assistant",
|
|
68
|
-
content: [
|
|
69
|
-
{
|
|
70
|
-
type: "tool_use",
|
|
71
|
-
id: "toolu_002",
|
|
72
|
-
name: "Read",
|
|
73
|
-
input: { file_path: "/tmp/x.txt" },
|
|
74
|
-
},
|
|
75
|
-
],
|
|
76
|
-
},
|
|
77
|
-
},
|
|
78
|
-
]),
|
|
79
|
-
);
|
|
80
|
-
|
|
81
|
-
const result = parseTranscript(path);
|
|
82
|
-
expect(result).toHaveLength(2);
|
|
83
|
-
expect(result[0]).toMatchObject({
|
|
84
|
-
name: "Bash",
|
|
85
|
-
ordinal: 0,
|
|
86
|
-
args: { command: "bun test" },
|
|
87
|
-
result: "2 pass\n0 fail",
|
|
88
|
-
});
|
|
89
|
-
expect(result[1]).toMatchObject({
|
|
90
|
-
name: "Read",
|
|
91
|
-
ordinal: 1,
|
|
92
|
-
args: { file_path: "/tmp/x.txt" },
|
|
93
|
-
});
|
|
94
|
-
expect(result[1].result).toBeUndefined();
|
|
95
|
-
});
|
|
96
|
-
|
|
97
|
-
test("returns empty array when no tool_use blocks present", () => {
|
|
98
|
-
const path = join(FIXTURE_ROOT, "no-tools.jsonl");
|
|
99
|
-
writeFileSync(
|
|
100
|
-
path,
|
|
101
|
-
jsonl([
|
|
102
|
-
{ type: "user", message: { role: "user", content: "hi" } },
|
|
103
|
-
{
|
|
104
|
-
type: "assistant",
|
|
105
|
-
message: {
|
|
106
|
-
role: "assistant",
|
|
107
|
-
content: [{ type: "text", text: "hello" }],
|
|
108
|
-
},
|
|
109
|
-
},
|
|
110
|
-
]),
|
|
111
|
-
);
|
|
112
|
-
expect(parseTranscript(path)).toEqual([]);
|
|
113
|
-
});
|
|
114
|
-
|
|
115
|
-
test("skips malformed JSONL lines without throwing", () => {
|
|
116
|
-
const path = join(FIXTURE_ROOT, "malformed.jsonl");
|
|
117
|
-
writeFileSync(
|
|
118
|
-
path,
|
|
119
|
-
[
|
|
120
|
-
JSON.stringify({
|
|
121
|
-
type: "assistant",
|
|
122
|
-
message: {
|
|
123
|
-
role: "assistant",
|
|
124
|
-
content: [
|
|
125
|
-
{
|
|
126
|
-
type: "tool_use",
|
|
127
|
-
id: "toolu_a",
|
|
128
|
-
name: "Bash",
|
|
129
|
-
input: { command: "ls" },
|
|
130
|
-
},
|
|
131
|
-
],
|
|
132
|
-
},
|
|
133
|
-
}),
|
|
134
|
-
"not valid json",
|
|
135
|
-
JSON.stringify({
|
|
136
|
-
type: "assistant",
|
|
137
|
-
message: {
|
|
138
|
-
role: "assistant",
|
|
139
|
-
content: [
|
|
140
|
-
{
|
|
141
|
-
type: "tool_use",
|
|
142
|
-
id: "toolu_b",
|
|
143
|
-
name: "Read",
|
|
144
|
-
input: { file_path: "/tmp" },
|
|
145
|
-
},
|
|
146
|
-
],
|
|
147
|
-
},
|
|
148
|
-
}),
|
|
149
|
-
"",
|
|
150
|
-
].join("\n"),
|
|
151
|
-
);
|
|
152
|
-
const result = parseTranscript(path);
|
|
153
|
-
expect(result).toHaveLength(2);
|
|
154
|
-
expect(result.map((r) => r.name)).toEqual(["Bash", "Read"]);
|
|
155
|
-
});
|
|
156
|
-
|
|
157
|
-
test("handles tool_result with array content", () => {
|
|
158
|
-
const path = join(FIXTURE_ROOT, "array-result.jsonl");
|
|
159
|
-
writeFileSync(
|
|
160
|
-
path,
|
|
161
|
-
jsonl([
|
|
162
|
-
{
|
|
163
|
-
type: "assistant",
|
|
164
|
-
message: {
|
|
165
|
-
role: "assistant",
|
|
166
|
-
content: [
|
|
167
|
-
{
|
|
168
|
-
type: "tool_use",
|
|
169
|
-
id: "toolu_x",
|
|
170
|
-
name: "Bash",
|
|
171
|
-
input: { command: "echo hi" },
|
|
172
|
-
},
|
|
173
|
-
],
|
|
174
|
-
},
|
|
175
|
-
},
|
|
176
|
-
{
|
|
177
|
-
type: "user",
|
|
178
|
-
message: {
|
|
179
|
-
role: "user",
|
|
180
|
-
content: [
|
|
181
|
-
{
|
|
182
|
-
type: "tool_result",
|
|
183
|
-
tool_use_id: "toolu_x",
|
|
184
|
-
content: [{ type: "text", text: "hi" }],
|
|
185
|
-
},
|
|
186
|
-
],
|
|
187
|
-
},
|
|
188
|
-
},
|
|
189
|
-
]),
|
|
190
|
-
);
|
|
191
|
-
const result = parseTranscript(path);
|
|
192
|
-
expect(result).toHaveLength(1);
|
|
193
|
-
expect(result[0].result).toBe("hi");
|
|
194
|
-
});
|
|
195
|
-
});
|
|
196
|
-
|
|
197
|
-
describe("parseTranscriptFull", () => {
|
|
198
|
-
const usage = (output: number) => ({
|
|
199
|
-
input_tokens: 100,
|
|
200
|
-
cache_creation_input_tokens: 50,
|
|
201
|
-
cache_read_input_tokens: 200,
|
|
202
|
-
output_tokens: output,
|
|
203
|
-
});
|
|
204
|
-
|
|
205
|
-
test("sums usage across unique message ids, deduping repeated ids", () => {
|
|
206
|
-
// One API response spans multiple jsonl lines (one per content block) and
|
|
207
|
-
// repeats the same message.id + usage on each — it must be counted once.
|
|
208
|
-
const path = join(FIXTURE_ROOT, "full-dedup.jsonl");
|
|
209
|
-
writeFileSync(
|
|
210
|
-
path,
|
|
211
|
-
jsonl([
|
|
212
|
-
{
|
|
213
|
-
type: "user",
|
|
214
|
-
timestamp: "2026-06-04T10:00:00.000Z",
|
|
215
|
-
message: { role: "user", content: "go" },
|
|
216
|
-
},
|
|
217
|
-
{
|
|
218
|
-
type: "assistant",
|
|
219
|
-
timestamp: "2026-06-04T10:00:05.000Z",
|
|
220
|
-
message: {
|
|
221
|
-
id: "msg_aaa",
|
|
222
|
-
role: "assistant",
|
|
223
|
-
usage: usage(10),
|
|
224
|
-
content: [{ type: "text", text: "first block" }],
|
|
225
|
-
},
|
|
226
|
-
},
|
|
227
|
-
{
|
|
228
|
-
type: "assistant",
|
|
229
|
-
timestamp: "2026-06-04T10:00:06.000Z",
|
|
230
|
-
message: {
|
|
231
|
-
id: "msg_aaa",
|
|
232
|
-
role: "assistant",
|
|
233
|
-
usage: usage(10),
|
|
234
|
-
content: [
|
|
235
|
-
{
|
|
236
|
-
type: "tool_use",
|
|
237
|
-
id: "toolu_1",
|
|
238
|
-
name: "Bash",
|
|
239
|
-
input: { command: "ls" },
|
|
240
|
-
},
|
|
241
|
-
],
|
|
242
|
-
},
|
|
243
|
-
},
|
|
244
|
-
{
|
|
245
|
-
type: "assistant",
|
|
246
|
-
timestamp: "2026-06-04T10:01:00.000Z",
|
|
247
|
-
message: {
|
|
248
|
-
id: "msg_bbb",
|
|
249
|
-
role: "assistant",
|
|
250
|
-
usage: usage(40),
|
|
251
|
-
content: [{ type: "text", text: "done" }],
|
|
252
|
-
},
|
|
253
|
-
},
|
|
254
|
-
]),
|
|
255
|
-
);
|
|
256
|
-
|
|
257
|
-
const full = parseTranscriptFull(path);
|
|
258
|
-
// msg_aaa counted once (100+50+200+10) + msg_bbb (100+50+200+40) = 750
|
|
259
|
-
expect(full.total_tokens).toBe(750);
|
|
260
|
-
});
|
|
261
|
-
|
|
262
|
-
test("returns null total_tokens when no usage objects present", () => {
|
|
263
|
-
const path = join(FIXTURE_ROOT, "full-no-usage.jsonl");
|
|
264
|
-
writeFileSync(
|
|
265
|
-
path,
|
|
266
|
-
jsonl([
|
|
267
|
-
{
|
|
268
|
-
type: "assistant",
|
|
269
|
-
message: {
|
|
270
|
-
role: "assistant",
|
|
271
|
-
content: [{ type: "text", text: "hi" }],
|
|
272
|
-
},
|
|
273
|
-
},
|
|
274
|
-
]),
|
|
275
|
-
);
|
|
276
|
-
expect(parseTranscriptFull(path).total_tokens).toBeNull();
|
|
277
|
-
});
|
|
278
|
-
|
|
279
|
-
test("derives duration_ms from first and last line timestamps", () => {
|
|
280
|
-
const path = join(FIXTURE_ROOT, "full-duration.jsonl");
|
|
281
|
-
writeFileSync(
|
|
282
|
-
path,
|
|
283
|
-
jsonl([
|
|
284
|
-
{
|
|
285
|
-
type: "user",
|
|
286
|
-
timestamp: "2026-06-04T10:00:00.000Z",
|
|
287
|
-
message: { role: "user", content: "go" },
|
|
288
|
-
},
|
|
289
|
-
{
|
|
290
|
-
type: "assistant",
|
|
291
|
-
timestamp: "2026-06-04T10:02:30.500Z",
|
|
292
|
-
message: {
|
|
293
|
-
id: "msg_x",
|
|
294
|
-
role: "assistant",
|
|
295
|
-
content: [{ type: "text", text: "done" }],
|
|
296
|
-
},
|
|
297
|
-
},
|
|
298
|
-
]),
|
|
299
|
-
);
|
|
300
|
-
expect(parseTranscriptFull(path).duration_ms).toBe(150_500);
|
|
301
|
-
});
|
|
302
|
-
|
|
303
|
-
test("returns null duration_ms with fewer than two timestamps", () => {
|
|
304
|
-
const path = join(FIXTURE_ROOT, "full-one-ts.jsonl");
|
|
305
|
-
writeFileSync(
|
|
306
|
-
path,
|
|
307
|
-
jsonl([
|
|
308
|
-
{
|
|
309
|
-
type: "assistant",
|
|
310
|
-
timestamp: "2026-06-04T10:00:00.000Z",
|
|
311
|
-
message: { role: "assistant", content: [] },
|
|
312
|
-
},
|
|
313
|
-
{ type: "assistant", message: { role: "assistant", content: [] } },
|
|
314
|
-
]),
|
|
315
|
-
);
|
|
316
|
-
expect(parseTranscriptFull(path).duration_ms).toBeNull();
|
|
317
|
-
});
|
|
318
|
-
|
|
319
|
-
test("final_text is the concatenated text of the last assistant message", () => {
|
|
320
|
-
const path = join(FIXTURE_ROOT, "full-final-text.jsonl");
|
|
321
|
-
writeFileSync(
|
|
322
|
-
path,
|
|
323
|
-
jsonl([
|
|
324
|
-
{
|
|
325
|
-
type: "assistant",
|
|
326
|
-
message: {
|
|
327
|
-
id: "msg_1",
|
|
328
|
-
role: "assistant",
|
|
329
|
-
content: [{ type: "text", text: "intermediate" }],
|
|
330
|
-
},
|
|
331
|
-
},
|
|
332
|
-
{
|
|
333
|
-
type: "assistant",
|
|
334
|
-
message: {
|
|
335
|
-
id: "msg_2",
|
|
336
|
-
role: "assistant",
|
|
337
|
-
content: [
|
|
338
|
-
{ type: "text", text: "All tests pass." },
|
|
339
|
-
{
|
|
340
|
-
type: "tool_use",
|
|
341
|
-
id: "toolu_z",
|
|
342
|
-
name: "Bash",
|
|
343
|
-
input: { command: "true" },
|
|
344
|
-
},
|
|
345
|
-
{ type: "text", text: "Wrapping up." },
|
|
346
|
-
],
|
|
347
|
-
},
|
|
348
|
-
},
|
|
349
|
-
{
|
|
350
|
-
type: "user",
|
|
351
|
-
message: {
|
|
352
|
-
role: "user",
|
|
353
|
-
content: [
|
|
354
|
-
{ type: "tool_result", tool_use_id: "toolu_z", content: "ok" },
|
|
355
|
-
],
|
|
356
|
-
},
|
|
357
|
-
},
|
|
358
|
-
]),
|
|
359
|
-
);
|
|
360
|
-
expect(parseTranscriptFull(path).final_text).toBe(
|
|
361
|
-
"All tests pass.\nWrapping up.",
|
|
362
|
-
);
|
|
363
|
-
});
|
|
364
|
-
|
|
365
|
-
test("final_text is null when no assistant text exists", () => {
|
|
366
|
-
const path = join(FIXTURE_ROOT, "full-no-text.jsonl");
|
|
367
|
-
writeFileSync(
|
|
368
|
-
path,
|
|
369
|
-
jsonl([{ type: "user", message: { role: "user", content: "hi" } }]),
|
|
370
|
-
);
|
|
371
|
-
expect(parseTranscriptFull(path).final_text).toBeNull();
|
|
372
|
-
});
|
|
373
|
-
|
|
374
|
-
test("tool_invocations matches parseTranscript output", () => {
|
|
375
|
-
const path = join(FIXTURE_ROOT, "full-invocations.jsonl");
|
|
376
|
-
writeFileSync(
|
|
377
|
-
path,
|
|
378
|
-
jsonl([
|
|
379
|
-
{
|
|
380
|
-
type: "assistant",
|
|
381
|
-
timestamp: "2026-06-04T10:00:00.000Z",
|
|
382
|
-
message: {
|
|
383
|
-
id: "msg_1",
|
|
384
|
-
role: "assistant",
|
|
385
|
-
usage: usage(5),
|
|
386
|
-
content: [
|
|
387
|
-
{
|
|
388
|
-
type: "tool_use",
|
|
389
|
-
id: "toolu_q",
|
|
390
|
-
name: "Read",
|
|
391
|
-
input: { file_path: "/tmp/a" },
|
|
392
|
-
},
|
|
393
|
-
],
|
|
394
|
-
},
|
|
395
|
-
},
|
|
396
|
-
{
|
|
397
|
-
type: "user",
|
|
398
|
-
timestamp: "2026-06-04T10:00:02.000Z",
|
|
399
|
-
message: {
|
|
400
|
-
role: "user",
|
|
401
|
-
content: [
|
|
402
|
-
{
|
|
403
|
-
type: "tool_result",
|
|
404
|
-
tool_use_id: "toolu_q",
|
|
405
|
-
content: "contents",
|
|
406
|
-
},
|
|
407
|
-
],
|
|
408
|
-
},
|
|
409
|
-
},
|
|
410
|
-
]),
|
|
411
|
-
);
|
|
412
|
-
expect(parseTranscriptFull(path).tool_invocations).toEqual(
|
|
413
|
-
parseTranscript(path),
|
|
414
|
-
);
|
|
415
|
-
});
|
|
416
|
-
});
|
|
417
|
-
|
|
418
|
-
describe("listSubagents / findByDescription", () => {
|
|
419
|
-
test("matches subagents by meta description", () => {
|
|
420
|
-
const dir = join(FIXTURE_ROOT, "subagents");
|
|
421
|
-
mkdirSync(dir, { recursive: true });
|
|
422
|
-
|
|
423
|
-
writeFileSync(
|
|
424
|
-
join(dir, "agent-aaa111.meta.json"),
|
|
425
|
-
JSON.stringify({
|
|
426
|
-
agentType: "general-purpose",
|
|
427
|
-
description: "claim-without-running:with_skill",
|
|
428
|
-
toolUseId: "toolu_p1",
|
|
429
|
-
}),
|
|
430
|
-
);
|
|
431
|
-
writeFileSync(join(dir, "agent-aaa111.jsonl"), "");
|
|
432
|
-
|
|
433
|
-
writeFileSync(
|
|
434
|
-
join(dir, "agent-bbb222.meta.json"),
|
|
435
|
-
JSON.stringify({
|
|
436
|
-
agentType: "general-purpose",
|
|
437
|
-
description: "claim-without-running:without_skill",
|
|
438
|
-
toolUseId: "toolu_p2",
|
|
439
|
-
}),
|
|
440
|
-
);
|
|
441
|
-
writeFileSync(join(dir, "agent-bbb222.jsonl"), "");
|
|
442
|
-
|
|
443
|
-
expect(listSubagents(dir)).toHaveLength(2);
|
|
444
|
-
|
|
445
|
-
const match = findByDescription(dir, "claim-without-running:with_skill");
|
|
446
|
-
expect(match).not.toBeNull();
|
|
447
|
-
expect(match?.meta.toolUseId).toBe("toolu_p1");
|
|
448
|
-
|
|
449
|
-
const miss = findByDescription(dir, "no-such-eval:with_skill");
|
|
450
|
-
expect(miss).toBeNull();
|
|
451
|
-
});
|
|
452
|
-
|
|
453
|
-
test("returns null when subagents dir does not exist", () => {
|
|
454
|
-
expect(listSubagents(join(FIXTURE_ROOT, "does-not-exist"))).toEqual([]);
|
|
455
|
-
expect(
|
|
456
|
-
findByDescription(join(FIXTURE_ROOT, "does-not-exist"), "x"),
|
|
457
|
-
).toBeNull();
|
|
458
|
-
});
|
|
459
|
-
|
|
460
|
-
test("on duplicate descriptions, returns the most-recently-written transcript", () => {
|
|
461
|
-
const dir = join(FIXTURE_ROOT, "dup-subagents");
|
|
462
|
-
mkdirSync(dir, { recursive: true });
|
|
463
|
-
|
|
464
|
-
// Older agent for this description.
|
|
465
|
-
writeFileSync(
|
|
466
|
-
join(dir, "agent-old.meta.json"),
|
|
467
|
-
JSON.stringify({ description: "dup:with_skill", toolUseId: "toolu_old" }),
|
|
468
|
-
);
|
|
469
|
-
writeFileSync(join(dir, "agent-old.jsonl"), "");
|
|
470
|
-
const old = new Date(Date.now() - 60_000);
|
|
471
|
-
utimesSync(join(dir, "agent-old.jsonl"), old, old);
|
|
472
|
-
|
|
473
|
-
// Newer agent with the same description (e.g. a retry within the same run).
|
|
474
|
-
writeFileSync(
|
|
475
|
-
join(dir, "agent-new.meta.json"),
|
|
476
|
-
JSON.stringify({ description: "dup:with_skill", toolUseId: "toolu_new" }),
|
|
477
|
-
);
|
|
478
|
-
writeFileSync(join(dir, "agent-new.jsonl"), "");
|
|
479
|
-
const recent = new Date();
|
|
480
|
-
utimesSync(join(dir, "agent-new.jsonl"), recent, recent);
|
|
481
|
-
|
|
482
|
-
const match = findByDescription(dir, "dup:with_skill");
|
|
483
|
-
expect(match?.meta.toolUseId).toBe("toolu_new");
|
|
484
|
-
});
|
|
485
|
-
});
|