@bluecopa/harness 0.1.0-snapshot.99 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +18 -0
- package/README.md +117 -212
- package/docs/guides/observability.md +32 -0
- package/docs/guides/providers.md +51 -0
- package/docs/guides/skills.md +25 -0
- package/docs/security/skill-sandbox-threat-model.md +20 -0
- package/package.json +1 -29
- package/src/agent/create-agent.ts +884 -0
- package/src/agent/create-tools.ts +33 -0
- package/src/agent/step-executor.ts +15 -0
- package/src/agent/types.ts +57 -0
- package/src/context/llm-compaction-strategy.ts +37 -0
- package/src/context/prepare-step.ts +65 -0
- package/src/context/token-tracker.ts +26 -0
- package/src/extracted/manifest.json +10 -0
- package/src/extracted/prompts/compaction.md +5 -0
- package/src/extracted/prompts/system.md +5 -0
- package/src/extracted/tools.json +82 -0
- package/src/hooks/hook-runner.ts +22 -0
- package/src/hooks/tool-wrappers.ts +64 -0
- package/src/interfaces/compaction-strategy.ts +18 -0
- package/src/interfaces/hooks.ts +24 -0
- package/src/interfaces/sandbox-provider.ts +29 -0
- package/src/interfaces/session-store.ts +48 -0
- package/src/interfaces/tool-provider.ts +70 -0
- package/src/loop/bridge.ts +363 -0
- package/src/loop/context-store.ts +207 -0
- package/src/loop/lcm-tool-loop.ts +163 -0
- package/src/loop/vercel-agent-loop.ts +279 -0
- package/src/observability/context.ts +17 -0
- package/src/observability/metrics.ts +27 -0
- package/src/observability/otel.ts +105 -0
- package/src/observability/tracing.ts +13 -0
- package/src/optimization/agent-evaluator.ts +40 -0
- package/src/optimization/config-serializer.ts +16 -0
- package/src/optimization/optimization-runner.ts +39 -0
- package/src/optimization/trace-collector.ts +33 -0
- package/src/permissions/permission-manager.ts +34 -0
- package/src/providers/composite-tool-provider.ts +72 -0
- package/src/providers/control-plane-e2b-executor.ts +218 -0
- package/src/providers/e2b-tool-provider.ts +68 -0
- package/src/providers/local-tool-provider.ts +190 -0
- package/src/providers/skill-sandbox-provider.ts +46 -0
- package/src/sessions/file-session-store.ts +61 -0
- package/src/sessions/in-memory-session-store.ts +39 -0
- package/src/sessions/session-manager.ts +44 -0
- package/src/skills/skill-loader.ts +52 -0
- package/src/skills/skill-manager.ts +175 -0
- package/src/skills/skill-router.ts +99 -0
- package/src/skills/skill-types.ts +26 -0
- package/src/subagents/subagent-manager.ts +22 -0
- package/src/subagents/task-tool.ts +13 -0
- package/tests/integration/agent-loop-basic.spec.ts +56 -0
- package/tests/integration/agent-skill-default-from-sandbox.spec.ts +66 -0
- package/tests/integration/concurrency-single-turn.spec.ts +35 -0
- package/tests/integration/otel-metrics-emission.spec.ts +62 -0
- package/tests/integration/otel-trace-propagation.spec.ts +48 -0
- package/tests/integration/parity-benchmark.spec.ts +45 -0
- package/tests/integration/provider-local-smoke.spec.ts +63 -0
- package/tests/integration/session-resume.spec.ts +30 -0
- package/tests/integration/skill-install-rollback.spec.ts +64 -0
- package/tests/integration/skill-sandbox-file-blob.spec.ts +54 -0
- package/tests/integration/skills-progressive-disclosure.spec.ts +61 -0
- package/tests/integration/streaming-compaction-boundary.spec.ts +43 -0
- package/tests/integration/structured-messages-agent.spec.ts +265 -0
- package/tests/integration/subagent-isolation.spec.ts +24 -0
- package/tests/security/skill-sandbox-isolation.spec.ts +51 -0
- package/tests/unit/create-tools-schema-parity.spec.ts +22 -0
- package/tests/unit/extracted-manifest.spec.ts +41 -0
- package/tests/unit/interfaces-contract.spec.ts +101 -0
- package/tests/unit/structured-messages.spec.ts +176 -0
- package/tests/unit/token-tracker.spec.ts +22 -0
- package/tsconfig.json +14 -0
- package/vitest.config.ts +7 -0
- package/dist/arc/app-adapter.d.ts +0 -101
- package/dist/arc/app-adapter.js +0 -312
- package/dist/arc/app-adapter.js.map +0 -1
- package/dist/arc/create-arc-agent.d.ts +0 -50
- package/dist/arc/create-arc-agent.js +0 -2926
- package/dist/arc/create-arc-agent.js.map +0 -1
- package/dist/arc/profile-builder.d.ts +0 -49
- package/dist/arc/profile-builder.js +0 -163
- package/dist/arc/profile-builder.js.map +0 -1
- package/dist/loop/vercel-agent-loop.d.ts +0 -99
- package/dist/loop/vercel-agent-loop.js +0 -308
- package/dist/loop/vercel-agent-loop.js.map +0 -1
- package/dist/types-g-3DvSSE.d.ts +0 -745
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
import { mkdtemp, rm } from 'node:fs/promises';
|
|
2
|
+
import { tmpdir } from 'node:os';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
import { afterEach, describe, expect, it } from 'vitest';
|
|
5
|
+
|
|
6
|
+
import { createAgent } from '../../src/agent/create-agent';
|
|
7
|
+
import { LocalToolProvider } from '../../src/providers/local-tool-provider';
|
|
8
|
+
import type { AgentMessage, AgentAction, AgentStreamEvent } from '../../src/agent/types';
|
|
9
|
+
|
|
10
|
+
const tempDirs: string[] = [];
|
|
11
|
+
|
|
12
|
+
afterEach(async () => {
|
|
13
|
+
await Promise.all(tempDirs.map((dir) => rm(dir, { recursive: true, force: true })));
|
|
14
|
+
tempDirs.length = 0;
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
describe('structured messages in agent run()', () => {
|
|
18
|
+
it('single tool call produces assistant with toolCalls and tool with toolResults', async () => {
|
|
19
|
+
const dir = await mkdtemp(join(tmpdir(), 'harness-struct-'));
|
|
20
|
+
tempDirs.push(dir);
|
|
21
|
+
|
|
22
|
+
const provider = new LocalToolProvider(dir);
|
|
23
|
+
|
|
24
|
+
let turn = 0;
|
|
25
|
+
const agent = createAgent({
|
|
26
|
+
toolProvider: provider,
|
|
27
|
+
async nextAction() {
|
|
28
|
+
turn += 1;
|
|
29
|
+
if (turn === 1) {
|
|
30
|
+
return {
|
|
31
|
+
type: 'tool',
|
|
32
|
+
name: 'Write',
|
|
33
|
+
args: { path: 'test.txt', content: 'structured' },
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
return { type: 'final', content: 'done' };
|
|
37
|
+
},
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
const result = await agent.run('write a file');
|
|
41
|
+
|
|
42
|
+
// Find the assistant message with toolCalls
|
|
43
|
+
const assistantMsg = result.messages.find(
|
|
44
|
+
(m) => m.role === 'assistant' && m.toolCalls && m.toolCalls.length > 0
|
|
45
|
+
);
|
|
46
|
+
expect(assistantMsg).toBeDefined();
|
|
47
|
+
expect(assistantMsg!.toolCalls).toHaveLength(1);
|
|
48
|
+
expect(assistantMsg!.toolCalls![0]!.toolName).toBe('Write');
|
|
49
|
+
expect(assistantMsg!.toolCalls![0]!.toolCallId).toBeTruthy();
|
|
50
|
+
expect(assistantMsg!.toolCalls![0]!.args).toEqual({ path: 'test.txt', content: 'structured' });
|
|
51
|
+
|
|
52
|
+
// Find the tool message with toolResults
|
|
53
|
+
const toolMsg = result.messages.find(
|
|
54
|
+
(m) => m.role === 'tool' && m.toolResults && m.toolResults.length > 0
|
|
55
|
+
);
|
|
56
|
+
expect(toolMsg).toBeDefined();
|
|
57
|
+
expect(toolMsg!.toolResults).toHaveLength(1);
|
|
58
|
+
expect(toolMsg!.toolResults![0]!.toolName).toBe('Write');
|
|
59
|
+
expect(toolMsg!.toolResults![0]!.isError).toBe(false);
|
|
60
|
+
// content string still present for display
|
|
61
|
+
expect(toolMsg!.content).toContain('Write(test.txt)');
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it('tool batch produces assistant with multiple toolCalls', async () => {
|
|
65
|
+
const dir = await mkdtemp(join(tmpdir(), 'harness-batch-'));
|
|
66
|
+
tempDirs.push(dir);
|
|
67
|
+
|
|
68
|
+
const provider = new LocalToolProvider(dir);
|
|
69
|
+
|
|
70
|
+
let turn = 0;
|
|
71
|
+
const agent = createAgent({
|
|
72
|
+
toolProvider: provider,
|
|
73
|
+
async nextAction() {
|
|
74
|
+
turn += 1;
|
|
75
|
+
if (turn === 1) {
|
|
76
|
+
return {
|
|
77
|
+
type: 'tool_batch',
|
|
78
|
+
calls: [
|
|
79
|
+
{ type: 'tool', name: 'Write', args: { path: 'a.txt', content: 'aaa' } },
|
|
80
|
+
{ type: 'tool', name: 'Write', args: { path: 'b.txt', content: 'bbb' } },
|
|
81
|
+
],
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
return { type: 'final', content: 'done' };
|
|
85
|
+
},
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
const result = await agent.run('write two files');
|
|
89
|
+
|
|
90
|
+
// Assistant message should have 2 toolCalls
|
|
91
|
+
const assistantMsg = result.messages.find(
|
|
92
|
+
(m) => m.role === 'assistant' && m.toolCalls && m.toolCalls.length === 2
|
|
93
|
+
);
|
|
94
|
+
expect(assistantMsg).toBeDefined();
|
|
95
|
+
expect(assistantMsg!.toolCalls![0]!.toolName).toBe('Write');
|
|
96
|
+
expect(assistantMsg!.toolCalls![1]!.toolName).toBe('Write');
|
|
97
|
+
// Each should have a unique toolCallId
|
|
98
|
+
expect(assistantMsg!.toolCalls![0]!.toolCallId).not.toBe(
|
|
99
|
+
assistantMsg!.toolCalls![1]!.toolCallId
|
|
100
|
+
);
|
|
101
|
+
|
|
102
|
+
// Should have 2 tool result messages
|
|
103
|
+
const toolMsgs = result.messages.filter(
|
|
104
|
+
(m) => m.role === 'tool' && m.toolResults && m.toolResults.length > 0
|
|
105
|
+
);
|
|
106
|
+
expect(toolMsgs).toHaveLength(2);
|
|
107
|
+
expect(toolMsgs[0]!.toolResults![0]!.toolName).toBe('Write');
|
|
108
|
+
expect(toolMsgs[1]!.toolResults![0]!.toolName).toBe('Write');
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
it('failed tool call sets isError on toolResults', async () => {
|
|
112
|
+
const dir = await mkdtemp(join(tmpdir(), 'harness-err-'));
|
|
113
|
+
tempDirs.push(dir);
|
|
114
|
+
|
|
115
|
+
const provider = new LocalToolProvider(dir);
|
|
116
|
+
|
|
117
|
+
let turn = 0;
|
|
118
|
+
const agent = createAgent({
|
|
119
|
+
toolProvider: provider,
|
|
120
|
+
async nextAction() {
|
|
121
|
+
turn += 1;
|
|
122
|
+
if (turn === 1) {
|
|
123
|
+
// Read a file that doesn't exist
|
|
124
|
+
return {
|
|
125
|
+
type: 'tool',
|
|
126
|
+
name: 'Read',
|
|
127
|
+
args: { path: 'nonexistent.txt' },
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
return { type: 'final', content: 'done' };
|
|
131
|
+
},
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
const result = await agent.run('read missing file');
|
|
135
|
+
|
|
136
|
+
const toolMsg = result.messages.find(
|
|
137
|
+
(m) => m.role === 'tool' && m.toolResults && m.toolResults.length > 0
|
|
138
|
+
);
|
|
139
|
+
expect(toolMsg).toBeDefined();
|
|
140
|
+
expect(toolMsg!.toolResults![0]!.isError).toBe(true);
|
|
141
|
+
expect(toolMsg!.toolResults![0]!.result).toContain('ERROR');
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
it('toolCallId from loop action is preserved through to messages', async () => {
|
|
145
|
+
const dir = await mkdtemp(join(tmpdir(), 'harness-id-'));
|
|
146
|
+
tempDirs.push(dir);
|
|
147
|
+
|
|
148
|
+
const provider = new LocalToolProvider(dir);
|
|
149
|
+
const knownId = 'toolu_test_abc123';
|
|
150
|
+
|
|
151
|
+
let turn = 0;
|
|
152
|
+
const agent = createAgent({
|
|
153
|
+
toolProvider: provider,
|
|
154
|
+
async nextAction() {
|
|
155
|
+
turn += 1;
|
|
156
|
+
if (turn === 1) {
|
|
157
|
+
return {
|
|
158
|
+
type: 'tool',
|
|
159
|
+
name: 'Write',
|
|
160
|
+
args: { path: 'id-test.txt', content: 'hi' },
|
|
161
|
+
toolCallId: knownId,
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
return { type: 'final', content: 'done' };
|
|
165
|
+
},
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
const result = await agent.run('test toolCallId passthrough');
|
|
169
|
+
|
|
170
|
+
const assistantMsg = result.messages.find(
|
|
171
|
+
(m) => m.role === 'assistant' && m.toolCalls?.some((tc) => tc.toolCallId === knownId)
|
|
172
|
+
);
|
|
173
|
+
expect(assistantMsg).toBeDefined();
|
|
174
|
+
|
|
175
|
+
const toolMsg = result.messages.find(
|
|
176
|
+
(m) => m.role === 'tool' && m.toolResults?.some((tr) => tr.toolCallId === knownId)
|
|
177
|
+
);
|
|
178
|
+
expect(toolMsg).toBeDefined();
|
|
179
|
+
});
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
describe('structured messages in agent stream()', () => {
|
|
183
|
+
it('streaming yields tool_start events and builds structured messages', async () => {
|
|
184
|
+
const dir = await mkdtemp(join(tmpdir(), 'harness-stream-'));
|
|
185
|
+
tempDirs.push(dir);
|
|
186
|
+
|
|
187
|
+
const provider = new LocalToolProvider(dir);
|
|
188
|
+
const toolCallId = 'stream-call-1';
|
|
189
|
+
|
|
190
|
+
let turn = 0;
|
|
191
|
+
const agent = createAgent({
|
|
192
|
+
toolProvider: provider,
|
|
193
|
+
loop: {
|
|
194
|
+
async nextAction() {
|
|
195
|
+
turn += 1;
|
|
196
|
+
return { type: 'final', content: 'done' };
|
|
197
|
+
},
|
|
198
|
+
async *streamAction(messages: AgentMessage[]): AsyncGenerator<AgentStreamEvent> {
|
|
199
|
+
turn += 1;
|
|
200
|
+
if (turn === 1) {
|
|
201
|
+
yield { type: 'text_delta', text: 'Creating file...' };
|
|
202
|
+
yield {
|
|
203
|
+
type: 'tool_start',
|
|
204
|
+
name: 'Write',
|
|
205
|
+
args: { path: 'stream.txt', content: 'streamed' },
|
|
206
|
+
toolCallId,
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
// No tools on second turn → final
|
|
210
|
+
},
|
|
211
|
+
},
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
const events: AgentStreamEvent[] = [];
|
|
215
|
+
for await (const event of agent.stream('stream test')) {
|
|
216
|
+
events.push(event);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Should have a tool_start event with toolCallId
|
|
220
|
+
const toolStart = events.find(
|
|
221
|
+
(e) => e.type === 'tool_start' && e.name === 'Write'
|
|
222
|
+
);
|
|
223
|
+
expect(toolStart).toBeDefined();
|
|
224
|
+
|
|
225
|
+
// Should have a done event
|
|
226
|
+
const done = events.find((e) => e.type === 'done');
|
|
227
|
+
expect(done).toBeDefined();
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
it('streaming non-streaming fallback builds structured messages', async () => {
|
|
231
|
+
const dir = await mkdtemp(join(tmpdir(), 'harness-fallback-'));
|
|
232
|
+
tempDirs.push(dir);
|
|
233
|
+
|
|
234
|
+
const provider = new LocalToolProvider(dir);
|
|
235
|
+
|
|
236
|
+
let turn = 0;
|
|
237
|
+
const agent = createAgent({
|
|
238
|
+
toolProvider: provider,
|
|
239
|
+
loop: {
|
|
240
|
+
async nextAction(): Promise<AgentAction> {
|
|
241
|
+
turn += 1;
|
|
242
|
+
if (turn === 1) {
|
|
243
|
+
return {
|
|
244
|
+
type: 'tool',
|
|
245
|
+
name: 'Write',
|
|
246
|
+
args: { path: 'fallback.txt', content: 'via-nextAction' },
|
|
247
|
+
toolCallId: 'fallback-id',
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
return { type: 'final', content: 'complete' };
|
|
251
|
+
},
|
|
252
|
+
// No streamAction — forces fallback path
|
|
253
|
+
},
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
const events: AgentStreamEvent[] = [];
|
|
257
|
+
for await (const event of agent.stream('fallback test')) {
|
|
258
|
+
events.push(event);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
expect(events.some((e) => e.type === 'tool_start' && e.name === 'Write')).toBe(true);
|
|
262
|
+
expect(events.some((e) => e.type === 'tool_end' && e.name === 'Write')).toBe(true);
|
|
263
|
+
expect(events.some((e) => e.type === 'done')).toBe(true);
|
|
264
|
+
});
|
|
265
|
+
});
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
|
|
3
|
+
import { createSubagentManager } from '../../src/subagents/subagent-manager';
|
|
4
|
+
import { LocalToolProvider } from '../../src/providers/local-tool-provider';
|
|
5
|
+
|
|
6
|
+
describe('subagent isolation', () => {
|
|
7
|
+
it('runs child tasks with isolated runtime state', async () => {
|
|
8
|
+
let callCount = 0;
|
|
9
|
+
|
|
10
|
+
const manager = createSubagentManager(() => ({
|
|
11
|
+
toolProvider: new LocalToolProvider(),
|
|
12
|
+
async nextAction() {
|
|
13
|
+
callCount += 1;
|
|
14
|
+
return { type: 'final', content: `child-finished-${callCount}` };
|
|
15
|
+
}
|
|
16
|
+
}));
|
|
17
|
+
|
|
18
|
+
const first = await manager.runIsolated('child task 1');
|
|
19
|
+
const second = await manager.runIsolated('child task 2');
|
|
20
|
+
|
|
21
|
+
expect(first.output).toBe('child-finished-1');
|
|
22
|
+
expect(second.output).toBe('child-finished-2');
|
|
23
|
+
});
|
|
24
|
+
});
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
|
|
2
|
+
import { tmpdir } from 'node:os';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
import { afterEach, describe, expect, it } from 'vitest';
|
|
5
|
+
|
|
6
|
+
import type { SandboxProvider } from '../../src/interfaces/sandbox-provider';
|
|
7
|
+
import { SkillManager } from '../../src/skills/skill-manager';
|
|
8
|
+
|
|
9
|
+
const tempDirs: string[] = [];
|
|
10
|
+
|
|
11
|
+
afterEach(async () => {
|
|
12
|
+
await Promise.all(tempDirs.map((dir) => rm(dir, { recursive: true, force: true })));
|
|
13
|
+
tempDirs.length = 0;
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
class NoopSandbox implements SandboxProvider {
|
|
17
|
+
async exec() {
|
|
18
|
+
return { exitCode: 0, stdout: '', stderr: '' };
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
async readSandboxFile() {
|
|
22
|
+
return { data: new Uint8Array() };
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
async writeSandboxFile() {}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
describe('skill sandbox isolation', () => {
|
|
29
|
+
it('rejects unsafe skill paths containing traversal sequences', async () => {
|
|
30
|
+
const dir = await mkdtemp(join(tmpdir(), 'harness-skill-security-'));
|
|
31
|
+
tempDirs.push(dir);
|
|
32
|
+
|
|
33
|
+
const indexPath = join(dir, 'skills.json');
|
|
34
|
+
await writeFile(
|
|
35
|
+
indexPath,
|
|
36
|
+
JSON.stringify([
|
|
37
|
+
{
|
|
38
|
+
name: 'unsafe',
|
|
39
|
+
description: 'unsafe path',
|
|
40
|
+
path: '../outside/SKILL.md'
|
|
41
|
+
}
|
|
42
|
+
]),
|
|
43
|
+
'utf8'
|
|
44
|
+
);
|
|
45
|
+
|
|
46
|
+
const manager = new SkillManager(new NoopSandbox());
|
|
47
|
+
await manager.discover(indexPath);
|
|
48
|
+
|
|
49
|
+
await expect(manager.invoke('unsafe')).rejects.toThrow('unsafe skill path');
|
|
50
|
+
});
|
|
51
|
+
});
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { readFileSync } from 'node:fs';
|
|
2
|
+
import { resolve } from 'node:path';
|
|
3
|
+
import { describe, expect, it } from 'vitest';
|
|
4
|
+
|
|
5
|
+
import { createToolRegistry, extractedToolNames, loadExtractedTools } from '../../src/agent/create-tools';
|
|
6
|
+
|
|
7
|
+
describe('createTools parity', () => {
|
|
8
|
+
it('loads extracted tools and preserves canonical names', () => {
|
|
9
|
+
const expectedNames = ['Bash', 'Read', 'Write', 'Edit', 'Glob', 'Grep'];
|
|
10
|
+
expect(extractedToolNames()).toEqual(expectedNames);
|
|
11
|
+
|
|
12
|
+
const registry = createToolRegistry();
|
|
13
|
+
expect([...registry.keys()]).toEqual(expectedNames);
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
it('matches extracted snapshot exactly', () => {
|
|
17
|
+
const path = resolve(import.meta.dirname, '../../src/extracted/tools.json');
|
|
18
|
+
const raw = JSON.parse(readFileSync(path, 'utf8')) as ReturnType<typeof loadExtractedTools>;
|
|
19
|
+
const loaded = loadExtractedTools();
|
|
20
|
+
expect(loaded).toEqual(raw);
|
|
21
|
+
});
|
|
22
|
+
});
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
2
|
+
import { readFileSync } from 'node:fs';
|
|
3
|
+
import { resolve } from 'node:path';
|
|
4
|
+
import { describe, expect, it } from 'vitest';
|
|
5
|
+
|
|
6
|
+
interface ExtractedManifest {
|
|
7
|
+
schemaVersion: string;
|
|
8
|
+
sourceCommit: string;
|
|
9
|
+
extractedAt: string;
|
|
10
|
+
checksums: {
|
|
11
|
+
tools: string;
|
|
12
|
+
systemPrompt: string;
|
|
13
|
+
compactionPrompt: string;
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
const root = resolve(__dirname, '..', '..');
|
|
18
|
+
|
|
19
|
+
function sha256(filePath: string): string {
|
|
20
|
+
const content = readFileSync(filePath);
|
|
21
|
+
return createHash('sha256').update(content).digest('hex');
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
describe('extracted artifact manifest', () => {
|
|
25
|
+
it('contains required metadata and valid checksums', () => {
|
|
26
|
+
const manifestPath = resolve(root, 'src/extracted/manifest.json');
|
|
27
|
+
const manifest = JSON.parse(readFileSync(manifestPath, 'utf8')) as ExtractedManifest;
|
|
28
|
+
|
|
29
|
+
expect(manifest.schemaVersion).toMatch(/^\d+\.\d+\.\d+$/);
|
|
30
|
+
expect(manifest.sourceCommit.length).toBeGreaterThan(0);
|
|
31
|
+
expect(() => new Date(manifest.extractedAt)).not.toThrow();
|
|
32
|
+
|
|
33
|
+
const expectedTools = sha256(resolve(root, 'src/extracted/tools.json'));
|
|
34
|
+
const expectedSystem = sha256(resolve(root, 'src/extracted/prompts/system.md'));
|
|
35
|
+
const expectedCompaction = sha256(resolve(root, 'src/extracted/prompts/compaction.md'));
|
|
36
|
+
|
|
37
|
+
expect(manifest.checksums.tools).toBe(expectedTools);
|
|
38
|
+
expect(manifest.checksums.systemPrompt).toBe(expectedSystem);
|
|
39
|
+
expect(manifest.checksums.compactionPrompt).toBe(expectedCompaction);
|
|
40
|
+
});
|
|
41
|
+
});
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
|
|
3
|
+
import type { HookCallback, HookContext } from '../../src/interfaces/hooks';
|
|
4
|
+
import type { SessionStore } from '../../src/interfaces/session-store';
|
|
5
|
+
import type { SandboxProvider } from '../../src/interfaces/sandbox-provider';
|
|
6
|
+
import type { ToolProvider } from '../../src/interfaces/tool-provider';
|
|
7
|
+
|
|
8
|
+
describe('interfaces contract smoke checks', () => {
|
|
9
|
+
it('tool provider capabilities expose required flags', () => {
|
|
10
|
+
const provider: ToolProvider = {
|
|
11
|
+
async bash() {
|
|
12
|
+
return { success: true, output: '' };
|
|
13
|
+
},
|
|
14
|
+
async readFile() {
|
|
15
|
+
return { success: true, output: '' };
|
|
16
|
+
},
|
|
17
|
+
async writeFile() {
|
|
18
|
+
return { success: true, output: '' };
|
|
19
|
+
},
|
|
20
|
+
async editFile() {
|
|
21
|
+
return { success: true, output: '' };
|
|
22
|
+
},
|
|
23
|
+
async glob() {
|
|
24
|
+
return { success: true, output: '' };
|
|
25
|
+
},
|
|
26
|
+
async grep() {
|
|
27
|
+
return { success: true, output: '' };
|
|
28
|
+
},
|
|
29
|
+
capabilities() {
|
|
30
|
+
return {
|
|
31
|
+
bash: true,
|
|
32
|
+
fileSystem: true,
|
|
33
|
+
webFetch: false,
|
|
34
|
+
webSearch: false,
|
|
35
|
+
codeExecution: false,
|
|
36
|
+
sandboxed: false
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
const caps = provider.capabilities();
|
|
42
|
+
expect(Object.keys(caps).sort()).toEqual([
|
|
43
|
+
'bash',
|
|
44
|
+
'codeExecution',
|
|
45
|
+
'fileSystem',
|
|
46
|
+
'sandboxed',
|
|
47
|
+
'webFetch',
|
|
48
|
+
'webSearch'
|
|
49
|
+
]);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it('sandbox provider contract supports exec and file io', async () => {
|
|
53
|
+
const sandbox: SandboxProvider = {
|
|
54
|
+
async exec() {
|
|
55
|
+
return { exitCode: 0, stdout: 'ok', stderr: '' };
|
|
56
|
+
},
|
|
57
|
+
async readSandboxFile() {
|
|
58
|
+
return {
|
|
59
|
+
data: Buffer.from('content', 'utf8'),
|
|
60
|
+
mimeType: 'text/plain',
|
|
61
|
+
filename: 'x'
|
|
62
|
+
};
|
|
63
|
+
},
|
|
64
|
+
async writeSandboxFile() {}
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
const result = await sandbox.exec('echo ok');
|
|
68
|
+
expect(result.exitCode).toBe(0);
|
|
69
|
+
const file = await sandbox.readSandboxFile('x');
|
|
70
|
+
expect(Buffer.from(file.data).toString('utf8')).toBe('content');
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it('session store contract returns null for missing sessions', async () => {
|
|
74
|
+
const store: SessionStore = {
|
|
75
|
+
async save() {},
|
|
76
|
+
async get() {
|
|
77
|
+
return null;
|
|
78
|
+
},
|
|
79
|
+
async list() {
|
|
80
|
+
return [];
|
|
81
|
+
},
|
|
82
|
+
async delete() {}
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
expect(await store.get('missing')).toBeNull();
|
|
86
|
+
expect(await store.list()).toEqual([]);
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
it('hook callback can deny execution with reason', async () => {
|
|
90
|
+
const hook: HookCallback = async (context: HookContext) => {
|
|
91
|
+
if (context.event === 'PreToolUse' && context.toolName === 'Bash') {
|
|
92
|
+
return { allow: false, reason: 'blocked by policy' };
|
|
93
|
+
}
|
|
94
|
+
return { allow: true };
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
const result = await hook({ event: 'PreToolUse', toolName: 'Bash' });
|
|
98
|
+
expect(result?.allow).toBe(false);
|
|
99
|
+
expect(result?.reason).toBe('blocked by policy');
|
|
100
|
+
});
|
|
101
|
+
});
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
|
|
3
|
+
import type { AgentMessage, ToolCallInfo, ToolResultInfo } from '../../src/agent/types';
|
|
4
|
+
import { LosslessContextStore } from '../../src/loop/context-store';
|
|
5
|
+
|
|
6
|
+
describe('AgentMessage structured fields', () => {
|
|
7
|
+
it('assistant message carries toolCalls alongside content', () => {
|
|
8
|
+
const toolCalls: ToolCallInfo[] = [
|
|
9
|
+
{ toolCallId: 'call-1', toolName: 'Write', args: { path: '/tmp/a.txt', content: 'hello' } },
|
|
10
|
+
{ toolCallId: 'call-2', toolName: 'Bash', args: { command: 'echo ok' } },
|
|
11
|
+
];
|
|
12
|
+
|
|
13
|
+
const msg: AgentMessage = {
|
|
14
|
+
role: 'assistant',
|
|
15
|
+
content: 'I will create a file and run a command.',
|
|
16
|
+
toolCalls,
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
expect(msg.toolCalls).toHaveLength(2);
|
|
20
|
+
expect(msg.toolCalls![0]!.toolCallId).toBe('call-1');
|
|
21
|
+
expect(msg.toolCalls![1]!.toolName).toBe('Bash');
|
|
22
|
+
// content preserved for display/logging
|
|
23
|
+
expect(msg.content).toContain('create a file');
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it('tool message carries toolResults alongside content', () => {
|
|
27
|
+
const toolResults: ToolResultInfo[] = [
|
|
28
|
+
{ toolCallId: 'call-1', toolName: 'Write', result: 'ok' },
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
const msg: AgentMessage = {
|
|
32
|
+
role: 'tool',
|
|
33
|
+
content: 'Write(/tmp/a.txt): ok',
|
|
34
|
+
toolResults,
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
expect(msg.toolResults).toHaveLength(1);
|
|
38
|
+
expect(msg.toolResults![0]!.toolCallId).toBe('call-1');
|
|
39
|
+
expect(msg.toolResults![0]!.isError).toBeUndefined();
|
|
40
|
+
// content preserved for display
|
|
41
|
+
expect(msg.content).toContain('Write');
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it('tool result with isError flag', () => {
|
|
45
|
+
const msg: AgentMessage = {
|
|
46
|
+
role: 'tool',
|
|
47
|
+
content: 'Bash: ERROR: command not found',
|
|
48
|
+
toolResults: [{
|
|
49
|
+
toolCallId: 'call-x',
|
|
50
|
+
toolName: 'Bash',
|
|
51
|
+
result: 'command not found',
|
|
52
|
+
isError: true,
|
|
53
|
+
}],
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
expect(msg.toolResults![0]!.isError).toBe(true);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it('messages without structured fields remain backward-compatible', () => {
|
|
60
|
+
const plain: AgentMessage = { role: 'user', content: 'hello' };
|
|
61
|
+
expect(plain.toolCalls).toBeUndefined();
|
|
62
|
+
expect(plain.toolResults).toBeUndefined();
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
describe('LosslessContextStore preserves structured fields', () => {
|
|
67
|
+
it('getView returns toolCalls and toolResults when under budget', () => {
|
|
68
|
+
const store = new LosslessContextStore({ maxTokenBudget: 100_000 });
|
|
69
|
+
|
|
70
|
+
const messages: AgentMessage[] = [
|
|
71
|
+
{ role: 'user', content: 'write a file' },
|
|
72
|
+
{
|
|
73
|
+
role: 'assistant',
|
|
74
|
+
content: 'Writing file.',
|
|
75
|
+
toolCalls: [{ toolCallId: 'c1', toolName: 'Write', args: { path: '/tmp/f.txt', content: 'data' } }],
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
role: 'tool',
|
|
79
|
+
content: 'Write(/tmp/f.txt): ok',
|
|
80
|
+
toolResults: [{ toolCallId: 'c1', toolName: 'Write', result: 'ok' }],
|
|
81
|
+
},
|
|
82
|
+
{ role: 'assistant', content: 'Done.' },
|
|
83
|
+
];
|
|
84
|
+
|
|
85
|
+
store.ingest(messages);
|
|
86
|
+
const view = store.getView();
|
|
87
|
+
|
|
88
|
+
expect(view).toHaveLength(4);
|
|
89
|
+
expect(view[1]!.toolCalls).toHaveLength(1);
|
|
90
|
+
expect(view[1]!.toolCalls![0]!.toolCallId).toBe('c1');
|
|
91
|
+
expect(view[2]!.toolResults).toHaveLength(1);
|
|
92
|
+
expect(view[2]!.toolResults![0]!.toolCallId).toBe('c1');
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it('cold zone trimming preserves toolResults on tool messages', () => {
|
|
96
|
+
// Use a very small budget to force trimming
|
|
97
|
+
const store = new LosslessContextStore({
|
|
98
|
+
maxTokenBudget: 200,
|
|
99
|
+
trimThreshold: 0.5,
|
|
100
|
+
stubThreshold: 50,
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
// Build enough messages to push the early ones into cold zone
|
|
104
|
+
const messages: AgentMessage[] = [
|
|
105
|
+
{ role: 'user', content: 'initial request' },
|
|
106
|
+
{
|
|
107
|
+
role: 'assistant',
|
|
108
|
+
content: 'step 1',
|
|
109
|
+
toolCalls: [{ toolCallId: 'old-1', toolName: 'Read', args: { path: '/a' } }],
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
role: 'tool',
|
|
113
|
+
content: 'Read(/a): short',
|
|
114
|
+
toolResults: [{ toolCallId: 'old-1', toolName: 'Read', result: 'short' }],
|
|
115
|
+
},
|
|
116
|
+
];
|
|
117
|
+
|
|
118
|
+
// Add hot-zone messages to push earlier messages into cold zone
|
|
119
|
+
for (let i = 0; i < 10; i++) {
|
|
120
|
+
messages.push({ role: 'user', content: `follow up message ${i} with some filler text to consume budget` });
|
|
121
|
+
messages.push({ role: 'assistant', content: `response ${i} with additional filler text to fill the budget` });
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
store.ingest(messages);
|
|
125
|
+
const view = store.getView();
|
|
126
|
+
|
|
127
|
+
// Find the tool message if it survived cold zone trimming
|
|
128
|
+
const toolMsg = view.find(m => m.role === 'tool' && m.toolResults);
|
|
129
|
+
if (toolMsg) {
|
|
130
|
+
// The key assertion: toolResults survived trimming (spread preserves them)
|
|
131
|
+
expect(toolMsg.toolResults).toBeDefined();
|
|
132
|
+
expect(toolMsg.toolResults![0]!.toolCallId).toBe('old-1');
|
|
133
|
+
}
|
|
134
|
+
// Either way, verify no tool message lost its toolResults while keeping content
|
|
135
|
+
for (const m of view) {
|
|
136
|
+
if (m.role === 'tool' && m.content.startsWith('Read(')) {
|
|
137
|
+
expect(m.toolResults).toBeDefined();
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
it('assistant toolCalls survive in cold zone (pushed as-is)', () => {
|
|
143
|
+
const store = new LosslessContextStore({
|
|
144
|
+
maxTokenBudget: 200,
|
|
145
|
+
trimThreshold: 0.5,
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
const messages: AgentMessage[] = [
|
|
149
|
+
{ role: 'user', content: 'do something' },
|
|
150
|
+
{
|
|
151
|
+
role: 'assistant',
|
|
152
|
+
content: 'calling tool',
|
|
153
|
+
toolCalls: [{ toolCallId: 'tc-1', toolName: 'Bash', args: { command: 'ls' } }],
|
|
154
|
+
},
|
|
155
|
+
{
|
|
156
|
+
role: 'tool',
|
|
157
|
+
content: 'Bash("ls"): file1 file2',
|
|
158
|
+
toolResults: [{ toolCallId: 'tc-1', toolName: 'Bash', result: 'file1 file2' }],
|
|
159
|
+
},
|
|
160
|
+
];
|
|
161
|
+
|
|
162
|
+
for (let i = 0; i < 10; i++) {
|
|
163
|
+
messages.push({ role: 'user', content: `later message ${i} padding text to push budget` });
|
|
164
|
+
messages.push({ role: 'assistant', content: `later reply ${i} with extra padding text here` });
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
store.ingest(messages);
|
|
168
|
+
const view = store.getView();
|
|
169
|
+
|
|
170
|
+
// Find the assistant message with toolCalls if it survived
|
|
171
|
+
const assistantWithCalls = view.find(m => m.role === 'assistant' && m.toolCalls);
|
|
172
|
+
if (assistantWithCalls) {
|
|
173
|
+
expect(assistantWithCalls.toolCalls![0]!.toolCallId).toBe('tc-1');
|
|
174
|
+
}
|
|
175
|
+
});
|
|
176
|
+
});
|