@purista/harness 1.2.1 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/index.d.ts +1 -0
- package/dist/agents/index.js +276 -141
- package/dist/errors/catalog.d.ts +4 -3
- package/dist/harness/defineHarness.d.ts +26 -2
- package/dist/harness/defineHarness.js +51 -2
- package/dist/index.d.ts +1 -1
- package/dist/memory/sandbox/index.js +7 -1
- package/dist/models/registry.js +45 -3
- package/dist/ports/base-model-provider.js +2 -0
- package/dist/ports/capabilities.d.ts +2 -0
- package/dist/ports/harness-context.d.ts +1 -0
- package/dist/ports/model-provider.d.ts +4 -0
- package/dist/ports/state.d.ts +6 -0
- package/dist/runtime/abort.d.ts +5 -0
- package/dist/runtime/abort.js +33 -0
- package/dist/runtime/durable.d.ts +2 -0
- package/dist/runtime/durable.js +6 -2
- package/dist/runtime/sessionDurable.d.ts +49 -0
- package/dist/runtime/sessionDurable.js +135 -0
- package/dist/runtime/steps.d.ts +19 -1
- package/dist/runtime/steps.js +21 -3
- package/dist/sandbox/index.d.ts +34 -0
- package/dist/sandbox/index.js +40 -3
- package/dist/sessions/index.d.ts +15 -2
- package/dist/sessions/index.js +212 -99
- package/dist/skills/index.js +19 -6
- package/dist/state/in-memory.d.ts +1 -0
- package/dist/state/in-memory.js +15 -0
- package/dist/telemetry/shim.js +9 -4
- package/dist/testing/durableWorkspaceStoreContract.d.ts +1 -1
- package/dist/testing/durableWorkspaceStoreContract.js +64 -28
- package/dist/tools/index.d.ts +2 -0
- package/dist/tools/index.js +15 -1
- package/dist/tools/mcp/runner.js +11 -6
- package/dist/tools/mcp/stdio.js +170 -1
- package/dist/ulid/index.d.ts +6 -1
- package/dist/ulid/index.js +31 -13
- package/dist/version.d.ts +2 -0
- package/dist/version.js +2 -0
- package/dist/workflows/index.js +7 -1
- package/dist/workspace/in-memory.d.ts +9 -10
- package/dist/workspace/in-memory.js +191 -48
- package/package.json +1 -1
- package/dist/harness/errors.d.ts +0 -62
- package/dist/harness/errors.js +0 -67
package/dist/agents/index.d.ts
CHANGED
package/dist/agents/index.js
CHANGED
|
@@ -1,31 +1,92 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
2
|
import { ATTR_GEN_AI_AGENT_ID, ATTR_GEN_AI_AGENT_NAME, ATTR_GEN_AI_TOOL_CALL_ID, ATTR_GEN_AI_TOOL_NAME, ATTR_GEN_AI_TOOL_TYPE } from '@opentelemetry/semantic-conventions/incubating';
|
|
3
|
-
import { AgentLoopBudgetError,
|
|
3
|
+
import { AgentLoopBudgetError, HarnessError, OperationCancelledError, OperationTimeoutError, PermissionDeniedError, SkillManifestError, ToolError, ToolNotFoundError, ValidationError, serializeError } from '../errors/index.js';
|
|
4
4
|
import { createMetrics } from '../telemetry/index.js';
|
|
5
5
|
import { buildSkillIndex, mountSkillsOnce } from '../skills/index.js';
|
|
6
6
|
import { BUILTIN_ALIAS_TO_CANONICAL, getBuiltinToolSpecs, invokeBuiltinTool } from '../tools/index.js';
|
|
7
7
|
import { getMcpToolSpecs, invokeMcpTool, isMcpToolDefinition } from '../tools/mcp/runner.js';
|
|
8
8
|
import { ulid } from '../ulid/index.js';
|
|
9
|
+
import { abortError, withAbortSignal } from '../runtime/abort.js';
|
|
9
10
|
function stringifyInput(input) { return typeof input === 'string' ? input : JSON.stringify(input); }
|
|
10
11
|
function isReadonlyBuiltin(name) { return ['read', 'list', 'glob', 'grep'].includes(name); }
|
|
11
12
|
async function checkPermission(agentId, runId, sessionId, def, toolName, input) {
|
|
12
13
|
if (isReadonlyBuiltin(toolName))
|
|
13
|
-
return 'allow';
|
|
14
|
+
return { decision: 'allow' };
|
|
14
15
|
const perm = def.permissions?.[toolName];
|
|
15
|
-
const
|
|
16
|
+
const policy = normalizePermissionPolicy(perm);
|
|
17
|
+
const mode = policy.mode;
|
|
18
|
+
const target = permissionTarget(toolName, input);
|
|
19
|
+
if (target && matchesAnyPattern(target, policy.deny))
|
|
20
|
+
return { decision: 'deny', reason: 'mode_deny' };
|
|
21
|
+
if (policy.allow && policy.allow.length > 0 && (!target || !matchesAnyPattern(target, policy.allow))) {
|
|
22
|
+
return { decision: 'deny', reason: 'mode_deny' };
|
|
23
|
+
}
|
|
16
24
|
if (mode === 'allow')
|
|
17
|
-
return 'allow';
|
|
25
|
+
return { decision: 'allow' };
|
|
18
26
|
if (mode === 'deny')
|
|
19
|
-
return 'deny';
|
|
27
|
+
return { decision: 'deny', reason: 'mode_deny' };
|
|
20
28
|
if (!def.onPermission)
|
|
21
|
-
return 'deny';
|
|
29
|
+
return { decision: 'deny', reason: 'hook_deny' };
|
|
22
30
|
try {
|
|
23
|
-
|
|
31
|
+
const decision = await def.onPermission({ toolName, input, agentId, runId, sessionId });
|
|
32
|
+
return decision === 'allow' ? { decision } : { decision, reason: 'hook_deny' };
|
|
24
33
|
}
|
|
25
34
|
catch {
|
|
26
35
|
throw new PermissionDeniedError('Permission hook failed.', { tool_name: toolName, agent_id: agentId, reason: 'hook_failed' });
|
|
27
36
|
}
|
|
28
37
|
}
|
|
38
|
+
function normalizePermissionPolicy(perm) {
|
|
39
|
+
if (perm === 'allow' || perm === 'ask' || perm === 'deny')
|
|
40
|
+
return { mode: perm };
|
|
41
|
+
if (perm && typeof perm === 'object' && 'mode' in perm) {
|
|
42
|
+
const candidate = perm;
|
|
43
|
+
if (candidate.mode === 'allow' || candidate.mode === 'ask' || candidate.mode === 'deny') {
|
|
44
|
+
return {
|
|
45
|
+
mode: candidate.mode,
|
|
46
|
+
...(Array.isArray(candidate.allow) ? { allow: candidate.allow.filter(isString) } : {}),
|
|
47
|
+
...(Array.isArray(candidate.deny) ? { deny: candidate.deny.filter(isString) } : {})
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return { mode: 'allow' };
|
|
52
|
+
}
|
|
53
|
+
function isString(value) {
|
|
54
|
+
return typeof value === 'string';
|
|
55
|
+
}
|
|
56
|
+
function permissionTarget(toolName, input) {
|
|
57
|
+
if (!input || typeof input !== 'object')
|
|
58
|
+
return undefined;
|
|
59
|
+
const record = input;
|
|
60
|
+
if (toolName === 'bash')
|
|
61
|
+
return typeof record['command'] === 'string' ? record['command'] : undefined;
|
|
62
|
+
if (toolName === 'write' || toolName === 'edit')
|
|
63
|
+
return typeof record['path'] === 'string' ? record['path'] : undefined;
|
|
64
|
+
return undefined;
|
|
65
|
+
}
|
|
66
|
+
function matchesAnyPattern(value, patterns) {
|
|
67
|
+
return patterns?.some((pattern) => globPatternToRegExp(pattern).test(value)) ?? false;
|
|
68
|
+
}
|
|
69
|
+
function globPatternToRegExp(pattern) {
|
|
70
|
+
let source = '^';
|
|
71
|
+
for (let index = 0; index < pattern.length; index += 1) {
|
|
72
|
+
const char = pattern[index];
|
|
73
|
+
if (char === '*') {
|
|
74
|
+
if (pattern[index + 1] === '*') {
|
|
75
|
+
source += '.*';
|
|
76
|
+
index += 1;
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
source += '[^/]*';
|
|
80
|
+
}
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
source += escapeRegExp(char ?? '');
|
|
84
|
+
}
|
|
85
|
+
return new RegExp(`${source}$`);
|
|
86
|
+
}
|
|
87
|
+
function escapeRegExp(value) {
|
|
88
|
+
return value.replace(/[\\^$+?.()|[\]{}]/g, '\\$&');
|
|
89
|
+
}
|
|
29
90
|
export async function runDefaultAgent(args) {
|
|
30
91
|
const agentAttrs = {
|
|
31
92
|
'harness.name': args.harnessName,
|
|
@@ -68,7 +129,8 @@ function metadataSpanAttrs(metadata) {
|
|
|
68
129
|
return attrs;
|
|
69
130
|
}
|
|
70
131
|
async function runDefaultAgentInner(args) {
|
|
71
|
-
args.signal.
|
|
132
|
+
if (args.signal.aborted)
|
|
133
|
+
throw abortError(args.signal, 'run', 'Run was cancelled.');
|
|
72
134
|
const inputSchema = args.agent.input ?? z.string();
|
|
73
135
|
const outputSchema = args.agent.output ?? z.string();
|
|
74
136
|
const parsedInput = parseAgentSchema(inputSchema, args.input, 'agent_input');
|
|
@@ -79,7 +141,8 @@ async function runDefaultAgentInner(args) {
|
|
|
79
141
|
await mountSkillsOnce(args.session, args.mountedSkills, args.skills, skillIds);
|
|
80
142
|
const activatedSkills = new Set();
|
|
81
143
|
if (args.agent.handler) {
|
|
82
|
-
const
|
|
144
|
+
const handler = args.agent.handler;
|
|
145
|
+
const output = await withAbortSignal(args.signal, 'run', 'Run was cancelled.', () => handler({
|
|
83
146
|
input: parsedInput,
|
|
84
147
|
signal: args.signal,
|
|
85
148
|
models: args.models,
|
|
@@ -89,7 +152,7 @@ async function runDefaultAgentInner(args) {
|
|
|
89
152
|
memory: args.memory,
|
|
90
153
|
metadata: args.metadata ?? {},
|
|
91
154
|
metrics: args.metrics
|
|
92
|
-
});
|
|
155
|
+
}));
|
|
93
156
|
const validated = parseAgentSchema(outputSchema, output, 'agent_output');
|
|
94
157
|
return { output: validated, emitted: [{ id: `msg_${ulid()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() }] };
|
|
95
158
|
}
|
|
@@ -99,10 +162,9 @@ async function runDefaultAgentInner(args) {
|
|
|
99
162
|
const instructions = `${baseInstructions}${buildSkillIndex(args.skills, skillIds)}`;
|
|
100
163
|
const enabledBuiltins = args.agent.builtinTools === false ? [] : args.agent.builtinTools?.slice() ?? ['bash', 'read', 'write', 'edit', 'glob', 'grep', 'list'];
|
|
101
164
|
if (skillIds.length > 0 && !enabledBuiltins.includes('read')) {
|
|
102
|
-
throw new
|
|
165
|
+
throw new SkillManifestError('Agents with skills require the read built-in tool for skill activation.', {
|
|
103
166
|
reason: 'skill_read_tool_missing',
|
|
104
|
-
|
|
105
|
-
id: args.agentId
|
|
167
|
+
agent_id: args.agentId
|
|
106
168
|
});
|
|
107
169
|
}
|
|
108
170
|
const builtinSpecs = getBuiltinToolSpecs(enabledBuiltins, args.session);
|
|
@@ -131,117 +193,169 @@ async function runDefaultAgentInner(args) {
|
|
|
131
193
|
const emitted = [];
|
|
132
194
|
const maxSteps = Math.min(args.agent.maxSteps ?? args.maxSteps, 64);
|
|
133
195
|
let steps = 0;
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
runId: args.runId,
|
|
151
|
-
...(args.workflowId ? { workflowId: args.workflowId } : {}),
|
|
152
|
-
agentId: args.agentId
|
|
153
|
-
});
|
|
154
|
-
const toolCalls = response.toolCalls ?? [];
|
|
155
|
-
if (toolCalls.length === 0) {
|
|
156
|
-
const validated = parseAgentSchema(outputSchema, response.object, 'agent_output');
|
|
157
|
-
emitted.push({ id: `msg_${ulid()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() });
|
|
158
|
-
await args.emitEvent?.({ type: 'model.object', runId: args.runId, agentId: args.agentId, object: validated, usage: response.usage });
|
|
159
|
-
await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), output: validated });
|
|
160
|
-
return { output: validated, emitted };
|
|
161
|
-
}
|
|
162
|
-
const assistantMsg = {
|
|
163
|
-
id: `msg_${ulid()}_assistant`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: '', toolCalls,
|
|
164
|
-
timestamp: new Date().toISOString()
|
|
165
|
-
};
|
|
166
|
-
emitted.push(assistantMsg);
|
|
167
|
-
modelMessages.push({ role: 'assistant', content: assistantMsg.content, toolCalls });
|
|
168
|
-
for (const call of toolCalls) {
|
|
169
|
-
const canonical = BUILTIN_ALIAS_TO_CANONICAL[call.name] ?? call.name;
|
|
170
|
-
const input = call.arguments;
|
|
171
|
-
let result;
|
|
172
|
-
try {
|
|
173
|
-
args.signal.throwIfAborted();
|
|
174
|
-
await args.emitEvent?.({ type: 'tool.started', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, input: input });
|
|
175
|
-
const tool = args.customTools[canonical];
|
|
176
|
-
const toolKind = canonical in BUILTIN_ALIAS_TO_CANONICAL ? 'builtin' : tool && isMcpToolDefinition(tool) ? tool.kind : 'ts';
|
|
177
|
-
result = await withToolSpan(args, canonical, call.id, toolKind, tool && isMcpToolDefinition(tool) ? { server: canonical, upstreamTool: tool.tool, transport: tool.kind === 'mcp_stdio' ? 'stdio' : 'http' } : undefined, async () => {
|
|
178
|
-
const decision = await checkPermission(args.agentId, args.runId, args.sessionId, args.agent, canonical, input);
|
|
179
|
-
if (decision === 'deny') {
|
|
180
|
-
throw new PermissionDeniedError('Permission denied.', { tool_name: canonical, agent_id: args.agentId, reason: 'hook_deny' });
|
|
181
|
-
}
|
|
182
|
-
if (canonical in BUILTIN_ALIAS_TO_CANONICAL) {
|
|
183
|
-
const output = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => invokeBuiltinTool(canonical, input, withSandboxTelemetry(args, canonical), signal));
|
|
184
|
-
if (canonical === 'read')
|
|
185
|
-
markSkillActivation(input, args.skills, activatedSkills);
|
|
186
|
-
return { output };
|
|
187
|
-
}
|
|
188
|
-
if (!enabledCustomTools.has(canonical)) {
|
|
189
|
-
throw new ToolNotFoundError('Tool is not allowed for this agent.', { tool_id: canonical, where: 'agent_allowlist' });
|
|
190
|
-
}
|
|
191
|
-
if (!tool)
|
|
192
|
-
throw new ToolNotFoundError('Tool was not found.', { tool_id: canonical, where: 'registry' });
|
|
193
|
-
if (isMcpToolDefinition(tool)) {
|
|
194
|
-
if (!args.mcpRegistry)
|
|
195
|
-
throw new ToolNotFoundError('MCP registry is not available.', { tool_id: canonical, where: 'registry' });
|
|
196
|
-
const registry = args.mcpRegistry;
|
|
197
|
-
return { output: await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => invokeMcpTool(canonical, tool, input, { registry, signal, toolTimeoutMs: args.toolTimeoutMs, sandbox: withSandboxTelemetry(args, canonical), sandboxKey: args.sessionId })) };
|
|
198
|
-
}
|
|
199
|
-
if (tool.kind && tool.kind !== 'ts') {
|
|
200
|
-
throw new ValidationError('Unsupported tool kind.', { where: 'tool_input', issues: { toolId: canonical, kind: tool.kind } });
|
|
201
|
-
}
|
|
202
|
-
const tsTool = tool;
|
|
203
|
-
const parsed = tsTool.input.parse(input);
|
|
204
|
-
const out = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => tsTool.handler({
|
|
205
|
-
signal,
|
|
206
|
-
sandbox: withSandboxTelemetry(args, canonical),
|
|
207
|
-
logger: args.logger,
|
|
208
|
-
telemetry: args.telemetry,
|
|
209
|
-
metrics: createMetrics(args.telemetry, {
|
|
210
|
-
'harness.name': args.harnessName,
|
|
211
|
-
'harness.session.id': args.sessionId,
|
|
212
|
-
'harness.run.id': args.runId,
|
|
213
|
-
...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
|
|
214
|
-
'harness.agent.id': args.agentId,
|
|
215
|
-
'harness.tool.id': canonical
|
|
216
|
-
}),
|
|
217
|
-
memory: args.memory,
|
|
218
|
-
runId: args.runId,
|
|
219
|
-
sessionId: args.sessionId,
|
|
220
|
-
agentId: args.agentId,
|
|
221
|
-
toolId: canonical
|
|
222
|
-
}, parsed));
|
|
223
|
-
return { output: tsTool.output.parse(out) };
|
|
224
|
-
});
|
|
225
|
-
}
|
|
226
|
-
catch (error) {
|
|
227
|
-
result = { error: serializeError(normalizeToolFailure(canonical, error)) };
|
|
228
|
-
}
|
|
229
|
-
await args.emitEvent?.({ type: 'tool.finished', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, ...(result.output !== undefined ? { output: result.output } : {}), ...(result.error ? { error: result.error } : {}) });
|
|
230
|
-
const toolMessage = {
|
|
231
|
-
id: `msg_${ulid()}_${call.id}`,
|
|
196
|
+
await args.emitEvent?.({ type: 'agent.started', runId: args.runId, agentId: args.agentId, at: new Date().toISOString() });
|
|
197
|
+
try {
|
|
198
|
+
while (true) {
|
|
199
|
+
if (args.signal.aborted)
|
|
200
|
+
throw abortError(args.signal, 'run', 'Run was cancelled.');
|
|
201
|
+
if (steps >= maxSteps)
|
|
202
|
+
throw new AgentLoopBudgetError('Agent loop budget exceeded.', { agent_id: args.agentId, reason: 'iterations_exceeded', limit: maxSteps });
|
|
203
|
+
const response = await model.object({
|
|
204
|
+
messages: [
|
|
205
|
+
{ role: 'system', content: instructions },
|
|
206
|
+
...modelMessages
|
|
207
|
+
],
|
|
208
|
+
tools: [...builtinSpecs, ...customSpecs],
|
|
209
|
+
schema: z.toJSONSchema(outputSchema)
|
|
210
|
+
}, args.signal, {
|
|
211
|
+
harnessName: args.harnessName,
|
|
232
212
|
sessionId: args.sessionId,
|
|
233
213
|
runId: args.runId,
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
214
|
+
...(args.workflowId ? { workflowId: args.workflowId } : {}),
|
|
215
|
+
agentId: args.agentId
|
|
216
|
+
});
|
|
217
|
+
// Emit one usage-bearing model event per model round-trip (including
|
|
218
|
+
// tool-call steps) so run-summary modelCalls and tokenTotals are accurate
|
|
219
|
+
// for multi-step runs.
|
|
220
|
+
await args.emitEvent?.({ type: 'model.object', runId: args.runId, agentId: args.agentId, object: (response.object ?? null), usage: response.usage });
|
|
221
|
+
const toolCalls = (response.toolCalls ?? []);
|
|
222
|
+
if (toolCalls.length === 0) {
|
|
223
|
+
const validated = parseAgentSchema(outputSchema, response.object, 'agent_output');
|
|
224
|
+
emitted.push({ id: `msg_${ulid()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() });
|
|
225
|
+
await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), output: validated });
|
|
226
|
+
return { output: validated, emitted };
|
|
227
|
+
}
|
|
228
|
+
const assistantMsg = {
|
|
229
|
+
id: `msg_${ulid()}_assistant`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: '', toolCalls,
|
|
237
230
|
timestamp: new Date().toISOString()
|
|
238
231
|
};
|
|
239
|
-
emitted.push(
|
|
240
|
-
modelMessages.push({ role: '
|
|
232
|
+
emitted.push(assistantMsg);
|
|
233
|
+
modelMessages.push({ role: 'assistant', content: assistantMsg.content, toolCalls });
|
|
234
|
+
args.metrics.histogram('harness.agent.tool_batch.size', toolCalls.length, {
|
|
235
|
+
'harness.agent.tool_batch.max_parallel': args.maxParallelToolCalls
|
|
236
|
+
});
|
|
237
|
+
const outcomes = await runLimited(toolCalls, args.maxParallelToolCalls, (call) => executeToolCall({
|
|
238
|
+
...args,
|
|
239
|
+
enabledCustomTools,
|
|
240
|
+
activatedSkills
|
|
241
|
+
}, call));
|
|
242
|
+
for (const outcome of outcomes) {
|
|
243
|
+
emitted.push(outcome.emitted);
|
|
244
|
+
modelMessages.push(outcome.modelMessage);
|
|
245
|
+
}
|
|
246
|
+
steps += 1;
|
|
241
247
|
}
|
|
242
|
-
|
|
248
|
+
}
|
|
249
|
+
catch (error) {
|
|
250
|
+
// Pair every agent.started with an agent.finished, even on error/cancel/budget.
|
|
251
|
+
await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), error: serializeError(error) });
|
|
252
|
+
throw error;
|
|
243
253
|
}
|
|
244
254
|
}
|
|
255
|
+
async function runLimited(items, limit, fn) {
|
|
256
|
+
const concurrency = Math.max(1, Math.min(limit, items.length));
|
|
257
|
+
const results = new Array(items.length);
|
|
258
|
+
let next = 0;
|
|
259
|
+
async function worker() {
|
|
260
|
+
while (true) {
|
|
261
|
+
const index = next;
|
|
262
|
+
next += 1;
|
|
263
|
+
const item = items[index];
|
|
264
|
+
if (item === undefined)
|
|
265
|
+
return;
|
|
266
|
+
results[index] = await fn(item);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
await Promise.all(Array.from({ length: concurrency }, () => worker()));
|
|
270
|
+
return results;
|
|
271
|
+
}
|
|
272
|
+
async function executeToolCall(args, call) {
|
|
273
|
+
const canonical = BUILTIN_ALIAS_TO_CANONICAL[call.name] ?? call.name;
|
|
274
|
+
const input = call.arguments;
|
|
275
|
+
const tool = args.customTools[canonical];
|
|
276
|
+
const toolKind = resolveToolKind(canonical, tool);
|
|
277
|
+
let result;
|
|
278
|
+
try {
|
|
279
|
+
if (args.signal.aborted)
|
|
280
|
+
throw abortError(args.signal, 'run', 'Run was cancelled.');
|
|
281
|
+
await args.emitEvent?.({ type: 'tool.started', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, input: input });
|
|
282
|
+
result = await withToolSpan(args, canonical, call.id, toolKind, tool && isMcpToolDefinition(tool) ? { server: canonical, upstreamTool: tool.tool, transport: tool.kind === 'mcp_stdio' ? 'stdio' : 'http' } : undefined, async () => {
|
|
283
|
+
const permission = await withToolSignal(args.signal, args.toolTimeoutMs, () => checkPermission(args.agentId, args.runId, args.sessionId, args.agent, canonical, input));
|
|
284
|
+
if (permission.decision === 'deny') {
|
|
285
|
+
throw new PermissionDeniedError('Permission denied.', { tool_name: canonical, agent_id: args.agentId, reason: permission.reason });
|
|
286
|
+
}
|
|
287
|
+
if (canonical in BUILTIN_ALIAS_TO_CANONICAL) {
|
|
288
|
+
const output = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => invokeBuiltinTool(canonical, input, withSandboxTelemetry(args, canonical), signal));
|
|
289
|
+
if (canonical === 'read')
|
|
290
|
+
markSkillActivation(input, args.skills, args.activatedSkills);
|
|
291
|
+
return { output };
|
|
292
|
+
}
|
|
293
|
+
if (!args.enabledCustomTools.has(canonical)) {
|
|
294
|
+
throw new ToolNotFoundError('Tool is not allowed for this agent.', { tool_id: canonical, where: 'agent_allowlist' });
|
|
295
|
+
}
|
|
296
|
+
if (!tool)
|
|
297
|
+
throw new ToolNotFoundError('Tool was not found.', { tool_id: canonical, where: 'registry' });
|
|
298
|
+
if (isMcpToolDefinition(tool)) {
|
|
299
|
+
if (!args.mcpRegistry)
|
|
300
|
+
throw new ToolNotFoundError('MCP registry is not available.', { tool_id: canonical, where: 'registry' });
|
|
301
|
+
const registry = args.mcpRegistry;
|
|
302
|
+
return { output: await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => invokeMcpTool(canonical, tool, input, { registry, signal, toolTimeoutMs: args.toolTimeoutMs, sandbox: withSandboxTelemetry(args, canonical), sandboxKey: args.sessionId })) };
|
|
303
|
+
}
|
|
304
|
+
if (tool.kind && tool.kind !== 'ts') {
|
|
305
|
+
throw new ValidationError('Unsupported tool kind.', { where: 'tool_input', issues: { toolId: canonical, kind: tool.kind } });
|
|
306
|
+
}
|
|
307
|
+
const parsed = tool.input.parse(input);
|
|
308
|
+
const out = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => tool.handler({
|
|
309
|
+
signal,
|
|
310
|
+
sandbox: withSandboxTelemetry(args, canonical),
|
|
311
|
+
logger: args.logger,
|
|
312
|
+
telemetry: args.telemetry,
|
|
313
|
+
metrics: createMetrics(args.telemetry, {
|
|
314
|
+
'harness.name': args.harnessName,
|
|
315
|
+
'harness.session.id': args.sessionId,
|
|
316
|
+
'harness.run.id': args.runId,
|
|
317
|
+
...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
|
|
318
|
+
'harness.agent.id': args.agentId,
|
|
319
|
+
'harness.tool.id': canonical
|
|
320
|
+
}),
|
|
321
|
+
memory: args.memory,
|
|
322
|
+
runId: args.runId,
|
|
323
|
+
sessionId: args.sessionId,
|
|
324
|
+
agentId: args.agentId,
|
|
325
|
+
toolId: canonical
|
|
326
|
+
}, parsed));
|
|
327
|
+
return { output: tool.output.parse(out) };
|
|
328
|
+
});
|
|
329
|
+
}
|
|
330
|
+
catch (error) {
|
|
331
|
+
const failure = normalizeToolFailure(canonical, error, toolKind);
|
|
332
|
+
if (failure instanceof OperationCancelledError) {
|
|
333
|
+
if (args.signal.aborted)
|
|
334
|
+
throw new OperationCancelledError('Run was cancelled.', { scope: 'run' }, args.signal.reason ?? failure);
|
|
335
|
+
throw failure;
|
|
336
|
+
}
|
|
337
|
+
result = { error: serializeError(failure) };
|
|
338
|
+
}
|
|
339
|
+
await args.emitEvent?.({ type: 'tool.finished', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, ...(result.output !== undefined ? { output: result.output } : {}), ...(result.error ? { error: result.error } : {}) });
|
|
340
|
+
const toolMessage = {
|
|
341
|
+
id: `msg_${ulid()}_${call.id}`,
|
|
342
|
+
sessionId: args.sessionId,
|
|
343
|
+
runId: args.runId,
|
|
344
|
+
role: 'tool',
|
|
345
|
+
content: '',
|
|
346
|
+
toolResults: [{ toolCallId: call.id, ...(result.output !== undefined ? { output: result.output } : {}), ...(result.error ? { error: result.error } : {}) }],
|
|
347
|
+
timestamp: new Date().toISOString()
|
|
348
|
+
};
|
|
349
|
+
return {
|
|
350
|
+
emitted: toolMessage,
|
|
351
|
+
modelMessage: { role: 'tool', toolCallId: call.id, content: JSON.stringify(result.output ?? result.error ?? {}) }
|
|
352
|
+
};
|
|
353
|
+
}
|
|
354
|
+
function resolveToolKind(toolId, tool) {
|
|
355
|
+
if (toolId in BUILTIN_ALIAS_TO_CANONICAL)
|
|
356
|
+
return 'builtin';
|
|
357
|
+
return tool && isMcpToolDefinition(tool) ? tool.kind : 'ts';
|
|
358
|
+
}
|
|
245
359
|
function markSkillActivation(input, skills, activated) {
|
|
246
360
|
if (!input || typeof input !== 'object')
|
|
247
361
|
return;
|
|
@@ -256,31 +370,35 @@ function markSkillActivation(input, skills, activated) {
|
|
|
256
370
|
}
|
|
257
371
|
}
|
|
258
372
|
async function withToolSignal(parent, timeoutMs, fn) {
|
|
259
|
-
parent.
|
|
373
|
+
if (parent.aborted)
|
|
374
|
+
throw abortError(parent, 'run', 'Run was cancelled.');
|
|
260
375
|
const controller = new AbortController();
|
|
261
376
|
const relay = () => controller.abort(parent.reason);
|
|
262
377
|
parent.addEventListener('abort', relay, { once: true });
|
|
378
|
+
if (parent.aborted)
|
|
379
|
+
relay();
|
|
380
|
+
let abortListener;
|
|
263
381
|
const timeout = timeoutMs > 0
|
|
264
382
|
? setTimeout(() => controller.abort(new OperationTimeoutError('Tool execution timed out.', { scope: 'tool', timeout_ms: timeoutMs })), timeoutMs)
|
|
265
383
|
: undefined;
|
|
266
|
-
const
|
|
267
|
-
|
|
268
|
-
const
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
})
|
|
274
|
-
: undefined;
|
|
384
|
+
const abortPromise = new Promise((_, reject) => {
|
|
385
|
+
abortListener = () => {
|
|
386
|
+
const reason = controller.signal.reason;
|
|
387
|
+
reject(reason instanceof Error ? reason : new OperationCancelledError('Tool execution was cancelled.', { scope: 'tool' }, reason));
|
|
388
|
+
};
|
|
389
|
+
controller.signal.addEventListener('abort', abortListener, { once: true });
|
|
390
|
+
});
|
|
275
391
|
try {
|
|
276
392
|
const operation = fn(controller.signal);
|
|
277
|
-
return await
|
|
393
|
+
return await Promise.race([operation, abortPromise]);
|
|
278
394
|
}
|
|
279
395
|
catch (error) {
|
|
280
396
|
if (controller.signal.aborted) {
|
|
281
397
|
const reason = controller.signal.reason;
|
|
282
398
|
if (reason instanceof OperationTimeoutError)
|
|
283
399
|
throw reason;
|
|
400
|
+
if (reason instanceof OperationCancelledError)
|
|
401
|
+
throw reason;
|
|
284
402
|
throw new OperationCancelledError('Tool execution was cancelled.', { scope: 'tool' }, reason ?? error);
|
|
285
403
|
}
|
|
286
404
|
throw error;
|
|
@@ -288,6 +406,8 @@ async function withToolSignal(parent, timeoutMs, fn) {
|
|
|
288
406
|
finally {
|
|
289
407
|
if (timeout)
|
|
290
408
|
clearTimeout(timeout);
|
|
409
|
+
if (abortListener)
|
|
410
|
+
controller.signal.removeEventListener('abort', abortListener);
|
|
291
411
|
parent.removeEventListener('abort', relay);
|
|
292
412
|
}
|
|
293
413
|
}
|
|
@@ -313,25 +433,34 @@ async function withToolSpan(args, toolId, callId, toolKind, mcpAttrs, fn) {
|
|
|
313
433
|
} : {})
|
|
314
434
|
};
|
|
315
435
|
const started = Date.now();
|
|
436
|
+
let durationAttrs = {};
|
|
316
437
|
const execute = async () => {
|
|
317
438
|
try {
|
|
318
439
|
const result = await fn();
|
|
319
|
-
args.telemetry?.recordHistogram('harness.tool.duration', (Date.now() - started) / 1000, attrs);
|
|
320
440
|
return result;
|
|
321
441
|
}
|
|
322
442
|
catch (error) {
|
|
323
|
-
|
|
443
|
+
const normalized = normalizeToolFailure(toolId, error, toolKind);
|
|
444
|
+
durationAttrs = {
|
|
445
|
+
'harness.error.code': normalized.code,
|
|
446
|
+
'harness.error.category': normalized.category,
|
|
447
|
+
'harness.error.retriable': normalized.retriable
|
|
448
|
+
};
|
|
449
|
+
throw normalized;
|
|
450
|
+
}
|
|
451
|
+
finally {
|
|
452
|
+
args.telemetry?.recordHistogram('harness.tool.duration', (Date.now() - started) / 1000, { ...attrs, ...durationAttrs });
|
|
324
453
|
}
|
|
325
454
|
};
|
|
326
455
|
return args.telemetry ? args.telemetry.span(`execute_tool ${toolId}`, attrs, execute) : execute();
|
|
327
456
|
}
|
|
328
|
-
function normalizeToolFailure(toolId, error) {
|
|
457
|
+
function normalizeToolFailure(toolId, error, toolKind = toolId in BUILTIN_ALIAS_TO_CANONICAL ? 'builtin' : 'ts') {
|
|
329
458
|
if (error instanceof z.ZodError) {
|
|
330
459
|
return new ValidationError('Tool input validation failed', { where: 'tool_input', issues: JSON.parse(JSON.stringify(error.issues)) });
|
|
331
460
|
}
|
|
332
461
|
if (error instanceof HarnessError)
|
|
333
462
|
return error;
|
|
334
|
-
return new ToolError('Tool execution failed.', { tool_id: toolId, tool_kind:
|
|
463
|
+
return new ToolError('Tool execution failed.', { tool_id: toolId, tool_kind: toolKind }, error);
|
|
335
464
|
}
|
|
336
465
|
function parseAgentSchema(schema, value, where) {
|
|
337
466
|
try {
|
|
@@ -347,7 +476,15 @@ function parseAgentSchema(schema, value, where) {
|
|
|
347
476
|
function withSandboxTelemetry(args, toolId) {
|
|
348
477
|
if (!args.telemetry || args.session.executor === 'unavailable')
|
|
349
478
|
return args.session;
|
|
350
|
-
|
|
479
|
+
const attrs = {
|
|
480
|
+
'harness.name': args.harnessName,
|
|
481
|
+
'harness.session.id': args.sessionId,
|
|
482
|
+
'harness.run.id': args.runId,
|
|
483
|
+
...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
|
|
484
|
+
'harness.agent.id': args.agentId,
|
|
485
|
+
'harness.tool.id': toolId
|
|
486
|
+
};
|
|
487
|
+
const wrapped = {
|
|
351
488
|
...args.session,
|
|
352
489
|
executor: args.session.executor,
|
|
353
490
|
read: args.session.read.bind(args.session),
|
|
@@ -359,14 +496,7 @@ function withSandboxTelemetry(args, toolId) {
|
|
|
359
496
|
exists: args.session.exists.bind(args.session),
|
|
360
497
|
mount: args.session.mount.bind(args.session),
|
|
361
498
|
close: args.session.close.bind(args.session),
|
|
362
|
-
exec: async (command, opts) => args.telemetry.span('harness.sandbox.exec', {
|
|
363
|
-
'harness.name': args.harnessName,
|
|
364
|
-
'harness.session.id': args.sessionId,
|
|
365
|
-
'harness.run.id': args.runId,
|
|
366
|
-
...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
|
|
367
|
-
'harness.agent.id': args.agentId,
|
|
368
|
-
'harness.tool.id': toolId
|
|
369
|
-
}, async (span) => {
|
|
499
|
+
exec: async (command, opts) => args.telemetry.span('harness.sandbox.exec', attrs, async (span) => {
|
|
370
500
|
const result = await args.session.exec(command, opts);
|
|
371
501
|
span.setAttributes({
|
|
372
502
|
'harness.exec.exit_code': result.exitCode,
|
|
@@ -375,4 +505,9 @@ function withSandboxTelemetry(args, toolId) {
|
|
|
375
505
|
return result;
|
|
376
506
|
})
|
|
377
507
|
};
|
|
508
|
+
const spawn = args.session.spawn;
|
|
509
|
+
if (typeof spawn === 'function') {
|
|
510
|
+
wrapped.spawn = async (command, opts) => args.telemetry.span('harness.sandbox.spawn', attrs, async () => spawn.call(args.session, command, opts));
|
|
511
|
+
}
|
|
512
|
+
return wrapped;
|
|
378
513
|
}
|
package/dist/errors/catalog.d.ts
CHANGED
|
@@ -62,7 +62,7 @@ export declare class ModelError extends HarnessError {
|
|
|
62
62
|
model: string;
|
|
63
63
|
method: string;
|
|
64
64
|
status?: number;
|
|
65
|
-
reason?: 'http_error' | 'network' | 'unstructured_response' | 'malformed_response' | 'context_length_exceeded';
|
|
65
|
+
reason?: 'http_error' | 'network' | 'unstructured_response' | 'malformed_response' | 'context_length_exceeded' | 'embedding_count_mismatch' | 'rerank_result_mismatch';
|
|
66
66
|
providerCode?: string;
|
|
67
67
|
providerType?: string;
|
|
68
68
|
providerParam?: string;
|
|
@@ -103,10 +103,11 @@ export declare class SkillNotFoundError extends HarnessError {
|
|
|
103
103
|
/** Skill manifest/frontmatter/config validation failure. */
|
|
104
104
|
export declare class SkillManifestError extends HarnessError {
|
|
105
105
|
constructor(message: string, meta: {
|
|
106
|
-
directory
|
|
107
|
-
reason: 'missing_skill_md' | 'invalid_frontmatter' | 'missing_description' | 'invalid_name' | 'name_mismatch' | 'directory_missing' | '
|
|
106
|
+
directory?: string;
|
|
107
|
+
reason: 'missing_skill_md' | 'invalid_frontmatter' | 'missing_description' | 'invalid_name' | 'name_mismatch' | 'directory_missing' | 'reserved_name' | 'skill_not_declared' | 'skill_read_tool_missing' | 'skill_sandbox_unsupported' | 'untrusted_project_skill' | 'collision_shadowed' | 'scan_limit_reached';
|
|
108
108
|
skill_id?: string;
|
|
109
109
|
source?: string;
|
|
110
|
+
agent_id?: string;
|
|
110
111
|
}, cause?: unknown);
|
|
111
112
|
}
|
|
112
113
|
/** Workflow referenced an unknown agent id. */
|
|
@@ -14,7 +14,7 @@ import { type Sandbox } from '../sandbox/index.js';
|
|
|
14
14
|
import type { ModelHandle } from '../models/registry.js';
|
|
15
15
|
import { type AdapterCapability, type DurableRuntimeAdapter, type HarnessInspection } from '../ports/capabilities.js';
|
|
16
16
|
/** Stable harness version string for diagnostics and generated documentation. */
|
|
17
|
-
export
|
|
17
|
+
export { HARNESS_VERSION } from '../version.js';
|
|
18
18
|
/** OpenTelemetry capture controls used by the harness. */
|
|
19
19
|
export type TelemetryFlavor = 'dual' | 'gen_ai_only' | 'openinference_only';
|
|
20
20
|
export type ContentCaptureMode = 'NO_CONTENT' | 'SPAN_ONLY' | 'EVENT_ONLY' | 'SPAN_AND_EVENT';
|
|
@@ -36,6 +36,8 @@ export interface HarnessDefaults {
|
|
|
36
36
|
skillTimeoutMs?: number;
|
|
37
37
|
/** Per-model timeout in milliseconds. Default: `300_000`. */
|
|
38
38
|
modelTimeoutMs?: number;
|
|
39
|
+
/** Maximum tool calls from one model response executed at the same time. Default: `8`. */
|
|
40
|
+
maxParallelToolCalls?: number;
|
|
39
41
|
/**
|
|
40
42
|
* Max non-system messages forwarded into model calls.
|
|
41
43
|
* `undefined` keeps all history, `0` keeps only system messages.
|
|
@@ -47,6 +49,17 @@ export interface HarnessOptions {
|
|
|
47
49
|
/** Optional harness name for logs, telemetry, and diagnostics. Default: `agent-harness`. */
|
|
48
50
|
name?: string;
|
|
49
51
|
}
|
|
52
|
+
/** Durable execution opt-in for a single workflow call. */
|
|
53
|
+
export interface DurableInvokeOptions {
|
|
54
|
+
/** Stable run id reused across resumes/retries. Matches `/^[A-Za-z0-9_.:-]{1,200}$/`. */
|
|
55
|
+
runId: string;
|
|
56
|
+
/** Worker/process id owning the durable lease. Defaults to the harness worker id. */
|
|
57
|
+
workerId?: string;
|
|
58
|
+
/** Initial durable step id label. Defaults to the workflow id. */
|
|
59
|
+
stepId?: string;
|
|
60
|
+
/** Optional attempt hint; the runtime may raise it on retry. */
|
|
61
|
+
attempt?: number;
|
|
62
|
+
}
|
|
50
63
|
/** Shared invoke options for workflow and agent execution. */
|
|
51
64
|
export interface InvokeOptions {
|
|
52
65
|
/** Abort signal used to cooperatively cancel the call. */
|
|
@@ -61,6 +74,12 @@ export interface InvokeOptions {
|
|
|
61
74
|
tracestate?: string;
|
|
62
75
|
/** Scalar metadata exposed to handlers and telemetry sanitizers. */
|
|
63
76
|
metadata?: Record<string, JsonValue>;
|
|
77
|
+
/**
|
|
78
|
+
* Opt a workflow run into durable execution against the configured
|
|
79
|
+
* `.runtime(...)` (and optional `.workspaceStore(...)`). Workflow-only;
|
|
80
|
+
* supplying it on an agent run throws `ValidationError`.
|
|
81
|
+
*/
|
|
82
|
+
durable?: DurableInvokeOptions;
|
|
64
83
|
}
|
|
65
84
|
/** Canonical built-in tool names provided by the harness. */
|
|
66
85
|
export type BuiltinToolName = 'bash' | 'read' | 'write' | 'edit' | 'glob' | 'grep' | 'list';
|
|
@@ -330,6 +349,12 @@ export interface WorkflowContext<S extends BuilderState, I, O> {
|
|
|
330
349
|
metadata: Readonly<Record<string, JsonValue>>;
|
|
331
350
|
memory: MemoryFacade;
|
|
332
351
|
metrics: Metrics;
|
|
352
|
+
/**
|
|
353
|
+
* Runs `fn` as a durable step. Under a durable invocation the output is
|
|
354
|
+
* checkpointed and replayed on resume without re-running `fn`; otherwise it is
|
|
355
|
+
* a transparent pass-through. See spec 10 "Durable steps".
|
|
356
|
+
*/
|
|
357
|
+
step<T extends JsonValue>(stepId: string, fn: () => Promise<T>): Promise<T>;
|
|
333
358
|
output?: O;
|
|
334
359
|
}
|
|
335
360
|
/** Full context passed to custom agent handlers. */
|
|
@@ -713,4 +738,3 @@ export interface HarnessBuilder<S extends BuilderState = {}> {
|
|
|
713
738
|
* ```
|
|
714
739
|
*/
|
|
715
740
|
export declare function defineHarness(opts?: HarnessOptions): HarnessBuilder<{}>;
|
|
716
|
-
export {};
|