@purista/harness 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/index.d.ts +1 -0
- package/dist/agents/index.js +278 -142
- package/dist/errors/catalog.d.ts +4 -3
- package/dist/harness/defineHarness.d.ts +26 -2
- package/dist/harness/defineHarness.js +51 -2
- package/dist/index.d.ts +1 -1
- package/dist/memory/sandbox/index.js +7 -1
- package/dist/models/registry.js +45 -3
- package/dist/ports/base-model-provider.js +2 -0
- package/dist/ports/capabilities.d.ts +2 -0
- package/dist/ports/harness-context.d.ts +1 -0
- package/dist/ports/model-provider.d.ts +4 -0
- package/dist/ports/state.d.ts +6 -0
- package/dist/runtime/abort.d.ts +5 -0
- package/dist/runtime/abort.js +33 -0
- package/dist/runtime/durable.d.ts +2 -0
- package/dist/runtime/durable.js +6 -2
- package/dist/runtime/sessionDurable.d.ts +49 -0
- package/dist/runtime/sessionDurable.js +135 -0
- package/dist/runtime/steps.d.ts +19 -1
- package/dist/runtime/steps.js +21 -3
- package/dist/sandbox/index.d.ts +34 -0
- package/dist/sandbox/index.js +40 -3
- package/dist/sessions/index.d.ts +15 -2
- package/dist/sessions/index.js +212 -99
- package/dist/skills/index.js +19 -6
- package/dist/state/in-memory.d.ts +1 -0
- package/dist/state/in-memory.js +15 -0
- package/dist/telemetry/shim.js +9 -4
- package/dist/testing/durableWorkspaceStoreContract.d.ts +1 -1
- package/dist/testing/durableWorkspaceStoreContract.js +64 -28
- package/dist/tools/index.d.ts +2 -0
- package/dist/tools/index.js +17 -2
- package/dist/tools/mcp/runner.js +11 -6
- package/dist/tools/mcp/stdio.js +170 -1
- package/dist/ulid/index.d.ts +6 -1
- package/dist/ulid/index.js +31 -13
- package/dist/version.d.ts +2 -0
- package/dist/version.js +2 -0
- package/dist/workflows/index.js +7 -1
- package/dist/workspace/in-memory.d.ts +9 -10
- package/dist/workspace/in-memory.js +191 -48
- package/package.json +1 -1
- package/dist/harness/errors.d.ts +0 -62
- package/dist/harness/errors.js +0 -67
package/dist/agents/index.d.ts
CHANGED
package/dist/agents/index.js
CHANGED
|
@@ -1,30 +1,92 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
2
|
import { ATTR_GEN_AI_AGENT_ID, ATTR_GEN_AI_AGENT_NAME, ATTR_GEN_AI_TOOL_CALL_ID, ATTR_GEN_AI_TOOL_NAME, ATTR_GEN_AI_TOOL_TYPE } from '@opentelemetry/semantic-conventions/incubating';
|
|
3
|
-
import { AgentLoopBudgetError,
|
|
3
|
+
import { AgentLoopBudgetError, HarnessError, OperationCancelledError, OperationTimeoutError, PermissionDeniedError, SkillManifestError, ToolError, ToolNotFoundError, ValidationError, serializeError } from '../errors/index.js';
|
|
4
4
|
import { createMetrics } from '../telemetry/index.js';
|
|
5
5
|
import { buildSkillIndex, mountSkillsOnce } from '../skills/index.js';
|
|
6
6
|
import { BUILTIN_ALIAS_TO_CANONICAL, getBuiltinToolSpecs, invokeBuiltinTool } from '../tools/index.js';
|
|
7
7
|
import { getMcpToolSpecs, invokeMcpTool, isMcpToolDefinition } from '../tools/mcp/runner.js';
|
|
8
|
+
import { ulid } from '../ulid/index.js';
|
|
9
|
+
import { abortError, withAbortSignal } from '../runtime/abort.js';
|
|
8
10
|
function stringifyInput(input) { return typeof input === 'string' ? input : JSON.stringify(input); }
|
|
9
11
|
function isReadonlyBuiltin(name) { return ['read', 'list', 'glob', 'grep'].includes(name); }
|
|
10
12
|
async function checkPermission(agentId, runId, sessionId, def, toolName, input) {
|
|
11
13
|
if (isReadonlyBuiltin(toolName))
|
|
12
|
-
return 'allow';
|
|
14
|
+
return { decision: 'allow' };
|
|
13
15
|
const perm = def.permissions?.[toolName];
|
|
14
|
-
const
|
|
16
|
+
const policy = normalizePermissionPolicy(perm);
|
|
17
|
+
const mode = policy.mode;
|
|
18
|
+
const target = permissionTarget(toolName, input);
|
|
19
|
+
if (target && matchesAnyPattern(target, policy.deny))
|
|
20
|
+
return { decision: 'deny', reason: 'mode_deny' };
|
|
21
|
+
if (policy.allow && policy.allow.length > 0 && (!target || !matchesAnyPattern(target, policy.allow))) {
|
|
22
|
+
return { decision: 'deny', reason: 'mode_deny' };
|
|
23
|
+
}
|
|
15
24
|
if (mode === 'allow')
|
|
16
|
-
return 'allow';
|
|
25
|
+
return { decision: 'allow' };
|
|
17
26
|
if (mode === 'deny')
|
|
18
|
-
return 'deny';
|
|
27
|
+
return { decision: 'deny', reason: 'mode_deny' };
|
|
19
28
|
if (!def.onPermission)
|
|
20
|
-
return 'deny';
|
|
29
|
+
return { decision: 'deny', reason: 'hook_deny' };
|
|
21
30
|
try {
|
|
22
|
-
|
|
31
|
+
const decision = await def.onPermission({ toolName, input, agentId, runId, sessionId });
|
|
32
|
+
return decision === 'allow' ? { decision } : { decision, reason: 'hook_deny' };
|
|
23
33
|
}
|
|
24
34
|
catch {
|
|
25
35
|
throw new PermissionDeniedError('Permission hook failed.', { tool_name: toolName, agent_id: agentId, reason: 'hook_failed' });
|
|
26
36
|
}
|
|
27
37
|
}
|
|
38
|
+
function normalizePermissionPolicy(perm) {
|
|
39
|
+
if (perm === 'allow' || perm === 'ask' || perm === 'deny')
|
|
40
|
+
return { mode: perm };
|
|
41
|
+
if (perm && typeof perm === 'object' && 'mode' in perm) {
|
|
42
|
+
const candidate = perm;
|
|
43
|
+
if (candidate.mode === 'allow' || candidate.mode === 'ask' || candidate.mode === 'deny') {
|
|
44
|
+
return {
|
|
45
|
+
mode: candidate.mode,
|
|
46
|
+
...(Array.isArray(candidate.allow) ? { allow: candidate.allow.filter(isString) } : {}),
|
|
47
|
+
...(Array.isArray(candidate.deny) ? { deny: candidate.deny.filter(isString) } : {})
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return { mode: 'allow' };
|
|
52
|
+
}
|
|
53
|
+
function isString(value) {
|
|
54
|
+
return typeof value === 'string';
|
|
55
|
+
}
|
|
56
|
+
function permissionTarget(toolName, input) {
|
|
57
|
+
if (!input || typeof input !== 'object')
|
|
58
|
+
return undefined;
|
|
59
|
+
const record = input;
|
|
60
|
+
if (toolName === 'bash')
|
|
61
|
+
return typeof record['command'] === 'string' ? record['command'] : undefined;
|
|
62
|
+
if (toolName === 'write' || toolName === 'edit')
|
|
63
|
+
return typeof record['path'] === 'string' ? record['path'] : undefined;
|
|
64
|
+
return undefined;
|
|
65
|
+
}
|
|
66
|
+
function matchesAnyPattern(value, patterns) {
|
|
67
|
+
return patterns?.some((pattern) => globPatternToRegExp(pattern).test(value)) ?? false;
|
|
68
|
+
}
|
|
69
|
+
function globPatternToRegExp(pattern) {
|
|
70
|
+
let source = '^';
|
|
71
|
+
for (let index = 0; index < pattern.length; index += 1) {
|
|
72
|
+
const char = pattern[index];
|
|
73
|
+
if (char === '*') {
|
|
74
|
+
if (pattern[index + 1] === '*') {
|
|
75
|
+
source += '.*';
|
|
76
|
+
index += 1;
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
source += '[^/]*';
|
|
80
|
+
}
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
source += escapeRegExp(char ?? '');
|
|
84
|
+
}
|
|
85
|
+
return new RegExp(`${source}$`);
|
|
86
|
+
}
|
|
87
|
+
function escapeRegExp(value) {
|
|
88
|
+
return value.replace(/[\\^$+?.()|[\]{}]/g, '\\$&');
|
|
89
|
+
}
|
|
28
90
|
export async function runDefaultAgent(args) {
|
|
29
91
|
const agentAttrs = {
|
|
30
92
|
'harness.name': args.harnessName,
|
|
@@ -67,7 +129,8 @@ function metadataSpanAttrs(metadata) {
|
|
|
67
129
|
return attrs;
|
|
68
130
|
}
|
|
69
131
|
async function runDefaultAgentInner(args) {
|
|
70
|
-
args.signal.
|
|
132
|
+
if (args.signal.aborted)
|
|
133
|
+
throw abortError(args.signal, 'run', 'Run was cancelled.');
|
|
71
134
|
const inputSchema = args.agent.input ?? z.string();
|
|
72
135
|
const outputSchema = args.agent.output ?? z.string();
|
|
73
136
|
const parsedInput = parseAgentSchema(inputSchema, args.input, 'agent_input');
|
|
@@ -78,7 +141,8 @@ async function runDefaultAgentInner(args) {
|
|
|
78
141
|
await mountSkillsOnce(args.session, args.mountedSkills, args.skills, skillIds);
|
|
79
142
|
const activatedSkills = new Set();
|
|
80
143
|
if (args.agent.handler) {
|
|
81
|
-
const
|
|
144
|
+
const handler = args.agent.handler;
|
|
145
|
+
const output = await withAbortSignal(args.signal, 'run', 'Run was cancelled.', () => handler({
|
|
82
146
|
input: parsedInput,
|
|
83
147
|
signal: args.signal,
|
|
84
148
|
models: args.models,
|
|
@@ -88,9 +152,9 @@ async function runDefaultAgentInner(args) {
|
|
|
88
152
|
memory: args.memory,
|
|
89
153
|
metadata: args.metadata ?? {},
|
|
90
154
|
metrics: args.metrics
|
|
91
|
-
});
|
|
155
|
+
}));
|
|
92
156
|
const validated = parseAgentSchema(outputSchema, output, 'agent_output');
|
|
93
|
-
return { output: validated, emitted: [{ id: `msg_${
|
|
157
|
+
return { output: validated, emitted: [{ id: `msg_${ulid()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() }] };
|
|
94
158
|
}
|
|
95
159
|
const baseInstructions = typeof args.agent.instructions === 'function'
|
|
96
160
|
? args.agent.instructions({ input: parsedInput, runId: args.runId, sessionId: args.sessionId, history: { list: async () => args.history }, memory: args.memory, metadata: args.metadata ?? {}, metrics: args.metrics })
|
|
@@ -98,10 +162,9 @@ async function runDefaultAgentInner(args) {
|
|
|
98
162
|
const instructions = `${baseInstructions}${buildSkillIndex(args.skills, skillIds)}`;
|
|
99
163
|
const enabledBuiltins = args.agent.builtinTools === false ? [] : args.agent.builtinTools?.slice() ?? ['bash', 'read', 'write', 'edit', 'glob', 'grep', 'list'];
|
|
100
164
|
if (skillIds.length > 0 && !enabledBuiltins.includes('read')) {
|
|
101
|
-
throw new
|
|
165
|
+
throw new SkillManifestError('Agents with skills require the read built-in tool for skill activation.', {
|
|
102
166
|
reason: 'skill_read_tool_missing',
|
|
103
|
-
|
|
104
|
-
id: args.agentId
|
|
167
|
+
agent_id: args.agentId
|
|
105
168
|
});
|
|
106
169
|
}
|
|
107
170
|
const builtinSpecs = getBuiltinToolSpecs(enabledBuiltins, args.session);
|
|
@@ -130,117 +193,169 @@ async function runDefaultAgentInner(args) {
|
|
|
130
193
|
const emitted = [];
|
|
131
194
|
const maxSteps = Math.min(args.agent.maxSteps ?? args.maxSteps, 64);
|
|
132
195
|
let steps = 0;
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
runId: args.runId,
|
|
150
|
-
...(args.workflowId ? { workflowId: args.workflowId } : {}),
|
|
151
|
-
agentId: args.agentId
|
|
152
|
-
});
|
|
153
|
-
const toolCalls = response.toolCalls ?? [];
|
|
154
|
-
if (toolCalls.length === 0) {
|
|
155
|
-
const validated = parseAgentSchema(outputSchema, response.object, 'agent_output');
|
|
156
|
-
emitted.push({ id: `msg_${Date.now()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() });
|
|
157
|
-
await args.emitEvent?.({ type: 'model.object', runId: args.runId, agentId: args.agentId, object: validated, usage: response.usage });
|
|
158
|
-
await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), output: validated });
|
|
159
|
-
return { output: validated, emitted };
|
|
160
|
-
}
|
|
161
|
-
const assistantMsg = {
|
|
162
|
-
id: `msg_${Date.now()}_assistant`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: '', toolCalls,
|
|
163
|
-
timestamp: new Date().toISOString()
|
|
164
|
-
};
|
|
165
|
-
emitted.push(assistantMsg);
|
|
166
|
-
modelMessages.push({ role: 'assistant', content: assistantMsg.content, toolCalls });
|
|
167
|
-
for (const call of toolCalls) {
|
|
168
|
-
const canonical = BUILTIN_ALIAS_TO_CANONICAL[call.name] ?? call.name;
|
|
169
|
-
const input = call.arguments;
|
|
170
|
-
let result;
|
|
171
|
-
try {
|
|
172
|
-
args.signal.throwIfAborted();
|
|
173
|
-
await args.emitEvent?.({ type: 'tool.started', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, input: input });
|
|
174
|
-
const tool = args.customTools[canonical];
|
|
175
|
-
const toolKind = canonical in BUILTIN_ALIAS_TO_CANONICAL ? 'builtin' : tool && isMcpToolDefinition(tool) ? tool.kind : 'ts';
|
|
176
|
-
result = await withToolSpan(args, canonical, call.id, toolKind, tool && isMcpToolDefinition(tool) ? { server: canonical, upstreamTool: tool.tool, transport: tool.kind === 'mcp_stdio' ? 'stdio' : 'http' } : undefined, async () => {
|
|
177
|
-
const decision = await checkPermission(args.agentId, args.runId, args.sessionId, args.agent, canonical, input);
|
|
178
|
-
if (decision === 'deny') {
|
|
179
|
-
throw new PermissionDeniedError('Permission denied.', { tool_name: canonical, agent_id: args.agentId, reason: 'hook_deny' });
|
|
180
|
-
}
|
|
181
|
-
if (canonical in BUILTIN_ALIAS_TO_CANONICAL) {
|
|
182
|
-
const output = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => invokeBuiltinTool(canonical, input, withSandboxTelemetry(args, canonical), signal));
|
|
183
|
-
if (canonical === 'read')
|
|
184
|
-
markSkillActivation(input, args.skills, activatedSkills);
|
|
185
|
-
return { output };
|
|
186
|
-
}
|
|
187
|
-
if (!enabledCustomTools.has(canonical)) {
|
|
188
|
-
throw new ToolNotFoundError('Tool is not allowed for this agent.', { tool_id: canonical, where: 'agent_allowlist' });
|
|
189
|
-
}
|
|
190
|
-
if (!tool)
|
|
191
|
-
throw new ToolNotFoundError('Tool was not found.', { tool_id: canonical, where: 'registry' });
|
|
192
|
-
if (isMcpToolDefinition(tool)) {
|
|
193
|
-
if (!args.mcpRegistry)
|
|
194
|
-
throw new ToolNotFoundError('MCP registry is not available.', { tool_id: canonical, where: 'registry' });
|
|
195
|
-
const registry = args.mcpRegistry;
|
|
196
|
-
return { output: await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => invokeMcpTool(canonical, tool, input, { registry, signal, toolTimeoutMs: args.toolTimeoutMs, sandbox: withSandboxTelemetry(args, canonical), sandboxKey: args.sessionId })) };
|
|
197
|
-
}
|
|
198
|
-
if (tool.kind && tool.kind !== 'ts') {
|
|
199
|
-
throw new ValidationError('Unsupported tool kind.', { where: 'tool_input', issues: { toolId: canonical, kind: tool.kind } });
|
|
200
|
-
}
|
|
201
|
-
const tsTool = tool;
|
|
202
|
-
const parsed = tsTool.input.parse(input);
|
|
203
|
-
const out = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => tsTool.handler({
|
|
204
|
-
signal,
|
|
205
|
-
sandbox: withSandboxTelemetry(args, canonical),
|
|
206
|
-
logger: args.logger,
|
|
207
|
-
telemetry: args.telemetry,
|
|
208
|
-
metrics: createMetrics(args.telemetry, {
|
|
209
|
-
'harness.name': args.harnessName,
|
|
210
|
-
'harness.session.id': args.sessionId,
|
|
211
|
-
'harness.run.id': args.runId,
|
|
212
|
-
...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
|
|
213
|
-
'harness.agent.id': args.agentId,
|
|
214
|
-
'harness.tool.id': canonical
|
|
215
|
-
}),
|
|
216
|
-
memory: args.memory,
|
|
217
|
-
runId: args.runId,
|
|
218
|
-
sessionId: args.sessionId,
|
|
219
|
-
agentId: args.agentId,
|
|
220
|
-
toolId: canonical
|
|
221
|
-
}, parsed));
|
|
222
|
-
return { output: tsTool.output.parse(out) };
|
|
223
|
-
});
|
|
224
|
-
}
|
|
225
|
-
catch (error) {
|
|
226
|
-
result = { error: serializeError(normalizeToolFailure(canonical, error)) };
|
|
227
|
-
}
|
|
228
|
-
await args.emitEvent?.({ type: 'tool.finished', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, ...(result.output !== undefined ? { output: result.output } : {}), ...(result.error ? { error: result.error } : {}) });
|
|
229
|
-
const toolMessage = {
|
|
230
|
-
id: `msg_${Date.now()}_${call.id}`,
|
|
196
|
+
await args.emitEvent?.({ type: 'agent.started', runId: args.runId, agentId: args.agentId, at: new Date().toISOString() });
|
|
197
|
+
try {
|
|
198
|
+
while (true) {
|
|
199
|
+
if (args.signal.aborted)
|
|
200
|
+
throw abortError(args.signal, 'run', 'Run was cancelled.');
|
|
201
|
+
if (steps >= maxSteps)
|
|
202
|
+
throw new AgentLoopBudgetError('Agent loop budget exceeded.', { agent_id: args.agentId, reason: 'iterations_exceeded', limit: maxSteps });
|
|
203
|
+
const response = await model.object({
|
|
204
|
+
messages: [
|
|
205
|
+
{ role: 'system', content: instructions },
|
|
206
|
+
...modelMessages
|
|
207
|
+
],
|
|
208
|
+
tools: [...builtinSpecs, ...customSpecs],
|
|
209
|
+
schema: z.toJSONSchema(outputSchema)
|
|
210
|
+
}, args.signal, {
|
|
211
|
+
harnessName: args.harnessName,
|
|
231
212
|
sessionId: args.sessionId,
|
|
232
213
|
runId: args.runId,
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
214
|
+
...(args.workflowId ? { workflowId: args.workflowId } : {}),
|
|
215
|
+
agentId: args.agentId
|
|
216
|
+
});
|
|
217
|
+
// Emit one usage-bearing model event per model round-trip (including
|
|
218
|
+
// tool-call steps) so run-summary modelCalls and tokenTotals are accurate
|
|
219
|
+
// for multi-step runs.
|
|
220
|
+
await args.emitEvent?.({ type: 'model.object', runId: args.runId, agentId: args.agentId, object: (response.object ?? null), usage: response.usage });
|
|
221
|
+
const toolCalls = (response.toolCalls ?? []);
|
|
222
|
+
if (toolCalls.length === 0) {
|
|
223
|
+
const validated = parseAgentSchema(outputSchema, response.object, 'agent_output');
|
|
224
|
+
emitted.push({ id: `msg_${ulid()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() });
|
|
225
|
+
await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), output: validated });
|
|
226
|
+
return { output: validated, emitted };
|
|
227
|
+
}
|
|
228
|
+
const assistantMsg = {
|
|
229
|
+
id: `msg_${ulid()}_assistant`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: '', toolCalls,
|
|
236
230
|
timestamp: new Date().toISOString()
|
|
237
231
|
};
|
|
238
|
-
emitted.push(
|
|
239
|
-
modelMessages.push({ role: '
|
|
232
|
+
emitted.push(assistantMsg);
|
|
233
|
+
modelMessages.push({ role: 'assistant', content: assistantMsg.content, toolCalls });
|
|
234
|
+
args.metrics.histogram('harness.agent.tool_batch.size', toolCalls.length, {
|
|
235
|
+
'harness.agent.tool_batch.max_parallel': args.maxParallelToolCalls
|
|
236
|
+
});
|
|
237
|
+
const outcomes = await runLimited(toolCalls, args.maxParallelToolCalls, (call) => executeToolCall({
|
|
238
|
+
...args,
|
|
239
|
+
enabledCustomTools,
|
|
240
|
+
activatedSkills
|
|
241
|
+
}, call));
|
|
242
|
+
for (const outcome of outcomes) {
|
|
243
|
+
emitted.push(outcome.emitted);
|
|
244
|
+
modelMessages.push(outcome.modelMessage);
|
|
245
|
+
}
|
|
246
|
+
steps += 1;
|
|
240
247
|
}
|
|
241
|
-
|
|
248
|
+
}
|
|
249
|
+
catch (error) {
|
|
250
|
+
// Pair every agent.started with an agent.finished, even on error/cancel/budget.
|
|
251
|
+
await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), error: serializeError(error) });
|
|
252
|
+
throw error;
|
|
242
253
|
}
|
|
243
254
|
}
|
|
255
|
+
async function runLimited(items, limit, fn) {
|
|
256
|
+
const concurrency = Math.max(1, Math.min(limit, items.length));
|
|
257
|
+
const results = new Array(items.length);
|
|
258
|
+
let next = 0;
|
|
259
|
+
async function worker() {
|
|
260
|
+
while (true) {
|
|
261
|
+
const index = next;
|
|
262
|
+
next += 1;
|
|
263
|
+
const item = items[index];
|
|
264
|
+
if (item === undefined)
|
|
265
|
+
return;
|
|
266
|
+
results[index] = await fn(item);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
await Promise.all(Array.from({ length: concurrency }, () => worker()));
|
|
270
|
+
return results;
|
|
271
|
+
}
|
|
272
|
+
async function executeToolCall(args, call) {
|
|
273
|
+
const canonical = BUILTIN_ALIAS_TO_CANONICAL[call.name] ?? call.name;
|
|
274
|
+
const input = call.arguments;
|
|
275
|
+
const tool = args.customTools[canonical];
|
|
276
|
+
const toolKind = resolveToolKind(canonical, tool);
|
|
277
|
+
let result;
|
|
278
|
+
try {
|
|
279
|
+
if (args.signal.aborted)
|
|
280
|
+
throw abortError(args.signal, 'run', 'Run was cancelled.');
|
|
281
|
+
await args.emitEvent?.({ type: 'tool.started', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, input: input });
|
|
282
|
+
result = await withToolSpan(args, canonical, call.id, toolKind, tool && isMcpToolDefinition(tool) ? { server: canonical, upstreamTool: tool.tool, transport: tool.kind === 'mcp_stdio' ? 'stdio' : 'http' } : undefined, async () => {
|
|
283
|
+
const permission = await withToolSignal(args.signal, args.toolTimeoutMs, () => checkPermission(args.agentId, args.runId, args.sessionId, args.agent, canonical, input));
|
|
284
|
+
if (permission.decision === 'deny') {
|
|
285
|
+
throw new PermissionDeniedError('Permission denied.', { tool_name: canonical, agent_id: args.agentId, reason: permission.reason });
|
|
286
|
+
}
|
|
287
|
+
if (canonical in BUILTIN_ALIAS_TO_CANONICAL) {
|
|
288
|
+
const output = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => invokeBuiltinTool(canonical, input, withSandboxTelemetry(args, canonical), signal));
|
|
289
|
+
if (canonical === 'read')
|
|
290
|
+
markSkillActivation(input, args.skills, args.activatedSkills);
|
|
291
|
+
return { output };
|
|
292
|
+
}
|
|
293
|
+
if (!args.enabledCustomTools.has(canonical)) {
|
|
294
|
+
throw new ToolNotFoundError('Tool is not allowed for this agent.', { tool_id: canonical, where: 'agent_allowlist' });
|
|
295
|
+
}
|
|
296
|
+
if (!tool)
|
|
297
|
+
throw new ToolNotFoundError('Tool was not found.', { tool_id: canonical, where: 'registry' });
|
|
298
|
+
if (isMcpToolDefinition(tool)) {
|
|
299
|
+
if (!args.mcpRegistry)
|
|
300
|
+
throw new ToolNotFoundError('MCP registry is not available.', { tool_id: canonical, where: 'registry' });
|
|
301
|
+
const registry = args.mcpRegistry;
|
|
302
|
+
return { output: await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => invokeMcpTool(canonical, tool, input, { registry, signal, toolTimeoutMs: args.toolTimeoutMs, sandbox: withSandboxTelemetry(args, canonical), sandboxKey: args.sessionId })) };
|
|
303
|
+
}
|
|
304
|
+
if (tool.kind && tool.kind !== 'ts') {
|
|
305
|
+
throw new ValidationError('Unsupported tool kind.', { where: 'tool_input', issues: { toolId: canonical, kind: tool.kind } });
|
|
306
|
+
}
|
|
307
|
+
const parsed = tool.input.parse(input);
|
|
308
|
+
const out = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => tool.handler({
|
|
309
|
+
signal,
|
|
310
|
+
sandbox: withSandboxTelemetry(args, canonical),
|
|
311
|
+
logger: args.logger,
|
|
312
|
+
telemetry: args.telemetry,
|
|
313
|
+
metrics: createMetrics(args.telemetry, {
|
|
314
|
+
'harness.name': args.harnessName,
|
|
315
|
+
'harness.session.id': args.sessionId,
|
|
316
|
+
'harness.run.id': args.runId,
|
|
317
|
+
...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
|
|
318
|
+
'harness.agent.id': args.agentId,
|
|
319
|
+
'harness.tool.id': canonical
|
|
320
|
+
}),
|
|
321
|
+
memory: args.memory,
|
|
322
|
+
runId: args.runId,
|
|
323
|
+
sessionId: args.sessionId,
|
|
324
|
+
agentId: args.agentId,
|
|
325
|
+
toolId: canonical
|
|
326
|
+
}, parsed));
|
|
327
|
+
return { output: tool.output.parse(out) };
|
|
328
|
+
});
|
|
329
|
+
}
|
|
330
|
+
catch (error) {
|
|
331
|
+
const failure = normalizeToolFailure(canonical, error, toolKind);
|
|
332
|
+
if (failure instanceof OperationCancelledError) {
|
|
333
|
+
if (args.signal.aborted)
|
|
334
|
+
throw new OperationCancelledError('Run was cancelled.', { scope: 'run' }, args.signal.reason ?? failure);
|
|
335
|
+
throw failure;
|
|
336
|
+
}
|
|
337
|
+
result = { error: serializeError(failure) };
|
|
338
|
+
}
|
|
339
|
+
await args.emitEvent?.({ type: 'tool.finished', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, ...(result.output !== undefined ? { output: result.output } : {}), ...(result.error ? { error: result.error } : {}) });
|
|
340
|
+
const toolMessage = {
|
|
341
|
+
id: `msg_${ulid()}_${call.id}`,
|
|
342
|
+
sessionId: args.sessionId,
|
|
343
|
+
runId: args.runId,
|
|
344
|
+
role: 'tool',
|
|
345
|
+
content: '',
|
|
346
|
+
toolResults: [{ toolCallId: call.id, ...(result.output !== undefined ? { output: result.output } : {}), ...(result.error ? { error: result.error } : {}) }],
|
|
347
|
+
timestamp: new Date().toISOString()
|
|
348
|
+
};
|
|
349
|
+
return {
|
|
350
|
+
emitted: toolMessage,
|
|
351
|
+
modelMessage: { role: 'tool', toolCallId: call.id, content: JSON.stringify(result.output ?? result.error ?? {}) }
|
|
352
|
+
};
|
|
353
|
+
}
|
|
354
|
+
function resolveToolKind(toolId, tool) {
|
|
355
|
+
if (toolId in BUILTIN_ALIAS_TO_CANONICAL)
|
|
356
|
+
return 'builtin';
|
|
357
|
+
return tool && isMcpToolDefinition(tool) ? tool.kind : 'ts';
|
|
358
|
+
}
|
|
244
359
|
function markSkillActivation(input, skills, activated) {
|
|
245
360
|
if (!input || typeof input !== 'object')
|
|
246
361
|
return;
|
|
@@ -255,31 +370,35 @@ function markSkillActivation(input, skills, activated) {
|
|
|
255
370
|
}
|
|
256
371
|
}
|
|
257
372
|
async function withToolSignal(parent, timeoutMs, fn) {
|
|
258
|
-
parent.
|
|
373
|
+
if (parent.aborted)
|
|
374
|
+
throw abortError(parent, 'run', 'Run was cancelled.');
|
|
259
375
|
const controller = new AbortController();
|
|
260
376
|
const relay = () => controller.abort(parent.reason);
|
|
261
377
|
parent.addEventListener('abort', relay, { once: true });
|
|
378
|
+
if (parent.aborted)
|
|
379
|
+
relay();
|
|
380
|
+
let abortListener;
|
|
262
381
|
const timeout = timeoutMs > 0
|
|
263
382
|
? setTimeout(() => controller.abort(new OperationTimeoutError('Tool execution timed out.', { scope: 'tool', timeout_ms: timeoutMs })), timeoutMs)
|
|
264
383
|
: undefined;
|
|
265
|
-
const
|
|
266
|
-
|
|
267
|
-
const
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
})
|
|
273
|
-
: undefined;
|
|
384
|
+
const abortPromise = new Promise((_, reject) => {
|
|
385
|
+
abortListener = () => {
|
|
386
|
+
const reason = controller.signal.reason;
|
|
387
|
+
reject(reason instanceof Error ? reason : new OperationCancelledError('Tool execution was cancelled.', { scope: 'tool' }, reason));
|
|
388
|
+
};
|
|
389
|
+
controller.signal.addEventListener('abort', abortListener, { once: true });
|
|
390
|
+
});
|
|
274
391
|
try {
|
|
275
392
|
const operation = fn(controller.signal);
|
|
276
|
-
return await
|
|
393
|
+
return await Promise.race([operation, abortPromise]);
|
|
277
394
|
}
|
|
278
395
|
catch (error) {
|
|
279
396
|
if (controller.signal.aborted) {
|
|
280
397
|
const reason = controller.signal.reason;
|
|
281
398
|
if (reason instanceof OperationTimeoutError)
|
|
282
399
|
throw reason;
|
|
400
|
+
if (reason instanceof OperationCancelledError)
|
|
401
|
+
throw reason;
|
|
283
402
|
throw new OperationCancelledError('Tool execution was cancelled.', { scope: 'tool' }, reason ?? error);
|
|
284
403
|
}
|
|
285
404
|
throw error;
|
|
@@ -287,6 +406,8 @@ async function withToolSignal(parent, timeoutMs, fn) {
|
|
|
287
406
|
finally {
|
|
288
407
|
if (timeout)
|
|
289
408
|
clearTimeout(timeout);
|
|
409
|
+
if (abortListener)
|
|
410
|
+
controller.signal.removeEventListener('abort', abortListener);
|
|
290
411
|
parent.removeEventListener('abort', relay);
|
|
291
412
|
}
|
|
292
413
|
}
|
|
@@ -312,25 +433,34 @@ async function withToolSpan(args, toolId, callId, toolKind, mcpAttrs, fn) {
|
|
|
312
433
|
} : {})
|
|
313
434
|
};
|
|
314
435
|
const started = Date.now();
|
|
436
|
+
let durationAttrs = {};
|
|
315
437
|
const execute = async () => {
|
|
316
438
|
try {
|
|
317
439
|
const result = await fn();
|
|
318
|
-
args.telemetry?.recordHistogram('harness.tool.duration', (Date.now() - started) / 1000, attrs);
|
|
319
440
|
return result;
|
|
320
441
|
}
|
|
321
442
|
catch (error) {
|
|
322
|
-
|
|
443
|
+
const normalized = normalizeToolFailure(toolId, error, toolKind);
|
|
444
|
+
durationAttrs = {
|
|
445
|
+
'harness.error.code': normalized.code,
|
|
446
|
+
'harness.error.category': normalized.category,
|
|
447
|
+
'harness.error.retriable': normalized.retriable
|
|
448
|
+
};
|
|
449
|
+
throw normalized;
|
|
450
|
+
}
|
|
451
|
+
finally {
|
|
452
|
+
args.telemetry?.recordHistogram('harness.tool.duration', (Date.now() - started) / 1000, { ...attrs, ...durationAttrs });
|
|
323
453
|
}
|
|
324
454
|
};
|
|
325
455
|
return args.telemetry ? args.telemetry.span(`execute_tool ${toolId}`, attrs, execute) : execute();
|
|
326
456
|
}
|
|
327
|
-
function normalizeToolFailure(toolId, error) {
|
|
457
|
+
function normalizeToolFailure(toolId, error, toolKind = toolId in BUILTIN_ALIAS_TO_CANONICAL ? 'builtin' : 'ts') {
|
|
328
458
|
if (error instanceof z.ZodError) {
|
|
329
459
|
return new ValidationError('Tool input validation failed', { where: 'tool_input', issues: JSON.parse(JSON.stringify(error.issues)) });
|
|
330
460
|
}
|
|
331
461
|
if (error instanceof HarnessError)
|
|
332
462
|
return error;
|
|
333
|
-
return new ToolError('Tool execution failed.', { tool_id: toolId, tool_kind:
|
|
463
|
+
return new ToolError('Tool execution failed.', { tool_id: toolId, tool_kind: toolKind }, error);
|
|
334
464
|
}
|
|
335
465
|
function parseAgentSchema(schema, value, where) {
|
|
336
466
|
try {
|
|
@@ -346,7 +476,15 @@ function parseAgentSchema(schema, value, where) {
|
|
|
346
476
|
function withSandboxTelemetry(args, toolId) {
|
|
347
477
|
if (!args.telemetry || args.session.executor === 'unavailable')
|
|
348
478
|
return args.session;
|
|
349
|
-
|
|
479
|
+
const attrs = {
|
|
480
|
+
'harness.name': args.harnessName,
|
|
481
|
+
'harness.session.id': args.sessionId,
|
|
482
|
+
'harness.run.id': args.runId,
|
|
483
|
+
...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
|
|
484
|
+
'harness.agent.id': args.agentId,
|
|
485
|
+
'harness.tool.id': toolId
|
|
486
|
+
};
|
|
487
|
+
const wrapped = {
|
|
350
488
|
...args.session,
|
|
351
489
|
executor: args.session.executor,
|
|
352
490
|
read: args.session.read.bind(args.session),
|
|
@@ -358,14 +496,7 @@ function withSandboxTelemetry(args, toolId) {
|
|
|
358
496
|
exists: args.session.exists.bind(args.session),
|
|
359
497
|
mount: args.session.mount.bind(args.session),
|
|
360
498
|
close: args.session.close.bind(args.session),
|
|
361
|
-
exec: async (command, opts) => args.telemetry.span('harness.sandbox.exec', {
|
|
362
|
-
'harness.name': args.harnessName,
|
|
363
|
-
'harness.session.id': args.sessionId,
|
|
364
|
-
'harness.run.id': args.runId,
|
|
365
|
-
...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
|
|
366
|
-
'harness.agent.id': args.agentId,
|
|
367
|
-
'harness.tool.id': toolId
|
|
368
|
-
}, async (span) => {
|
|
499
|
+
exec: async (command, opts) => args.telemetry.span('harness.sandbox.exec', attrs, async (span) => {
|
|
369
500
|
const result = await args.session.exec(command, opts);
|
|
370
501
|
span.setAttributes({
|
|
371
502
|
'harness.exec.exit_code': result.exitCode,
|
|
@@ -374,4 +505,9 @@ function withSandboxTelemetry(args, toolId) {
|
|
|
374
505
|
return result;
|
|
375
506
|
})
|
|
376
507
|
};
|
|
508
|
+
const spawn = args.session.spawn;
|
|
509
|
+
if (typeof spawn === 'function') {
|
|
510
|
+
wrapped.spawn = async (command, opts) => args.telemetry.span('harness.sandbox.spawn', attrs, async () => spawn.call(args.session, command, opts));
|
|
511
|
+
}
|
|
512
|
+
return wrapped;
|
|
377
513
|
}
|
package/dist/errors/catalog.d.ts
CHANGED
|
@@ -62,7 +62,7 @@ export declare class ModelError extends HarnessError {
|
|
|
62
62
|
model: string;
|
|
63
63
|
method: string;
|
|
64
64
|
status?: number;
|
|
65
|
-
reason?: 'http_error' | 'network' | 'unstructured_response' | 'malformed_response' | 'context_length_exceeded';
|
|
65
|
+
reason?: 'http_error' | 'network' | 'unstructured_response' | 'malformed_response' | 'context_length_exceeded' | 'embedding_count_mismatch' | 'rerank_result_mismatch';
|
|
66
66
|
providerCode?: string;
|
|
67
67
|
providerType?: string;
|
|
68
68
|
providerParam?: string;
|
|
@@ -103,10 +103,11 @@ export declare class SkillNotFoundError extends HarnessError {
|
|
|
103
103
|
/** Skill manifest/frontmatter/config validation failure. */
|
|
104
104
|
export declare class SkillManifestError extends HarnessError {
|
|
105
105
|
constructor(message: string, meta: {
|
|
106
|
-
directory
|
|
107
|
-
reason: 'missing_skill_md' | 'invalid_frontmatter' | 'missing_description' | 'invalid_name' | 'name_mismatch' | 'directory_missing' | '
|
|
106
|
+
directory?: string;
|
|
107
|
+
reason: 'missing_skill_md' | 'invalid_frontmatter' | 'missing_description' | 'invalid_name' | 'name_mismatch' | 'directory_missing' | 'reserved_name' | 'skill_not_declared' | 'skill_read_tool_missing' | 'skill_sandbox_unsupported' | 'untrusted_project_skill' | 'collision_shadowed' | 'scan_limit_reached';
|
|
108
108
|
skill_id?: string;
|
|
109
109
|
source?: string;
|
|
110
|
+
agent_id?: string;
|
|
110
111
|
}, cause?: unknown);
|
|
111
112
|
}
|
|
112
113
|
/** Workflow referenced an unknown agent id. */
|
|
@@ -14,7 +14,7 @@ import { type Sandbox } from '../sandbox/index.js';
|
|
|
14
14
|
import type { ModelHandle } from '../models/registry.js';
|
|
15
15
|
import { type AdapterCapability, type DurableRuntimeAdapter, type HarnessInspection } from '../ports/capabilities.js';
|
|
16
16
|
/** Stable harness version string for diagnostics and generated documentation. */
|
|
17
|
-
export
|
|
17
|
+
export { HARNESS_VERSION } from '../version.js';
|
|
18
18
|
/** OpenTelemetry capture controls used by the harness. */
|
|
19
19
|
export type TelemetryFlavor = 'dual' | 'gen_ai_only' | 'openinference_only';
|
|
20
20
|
export type ContentCaptureMode = 'NO_CONTENT' | 'SPAN_ONLY' | 'EVENT_ONLY' | 'SPAN_AND_EVENT';
|
|
@@ -36,6 +36,8 @@ export interface HarnessDefaults {
|
|
|
36
36
|
skillTimeoutMs?: number;
|
|
37
37
|
/** Per-model timeout in milliseconds. Default: `300_000`. */
|
|
38
38
|
modelTimeoutMs?: number;
|
|
39
|
+
/** Maximum tool calls from one model response executed at the same time. Default: `8`. */
|
|
40
|
+
maxParallelToolCalls?: number;
|
|
39
41
|
/**
|
|
40
42
|
* Max non-system messages forwarded into model calls.
|
|
41
43
|
* `undefined` keeps all history, `0` keeps only system messages.
|
|
@@ -47,6 +49,17 @@ export interface HarnessOptions {
|
|
|
47
49
|
/** Optional harness name for logs, telemetry, and diagnostics. Default: `agent-harness`. */
|
|
48
50
|
name?: string;
|
|
49
51
|
}
|
|
52
|
+
/** Durable execution opt-in for a single workflow call. */
|
|
53
|
+
export interface DurableInvokeOptions {
|
|
54
|
+
/** Stable run id reused across resumes/retries. Matches `/^[A-Za-z0-9_.:-]{1,200}$/`. */
|
|
55
|
+
runId: string;
|
|
56
|
+
/** Worker/process id owning the durable lease. Defaults to the harness worker id. */
|
|
57
|
+
workerId?: string;
|
|
58
|
+
/** Initial durable step id label. Defaults to the workflow id. */
|
|
59
|
+
stepId?: string;
|
|
60
|
+
/** Optional attempt hint; the runtime may raise it on retry. */
|
|
61
|
+
attempt?: number;
|
|
62
|
+
}
|
|
50
63
|
/** Shared invoke options for workflow and agent execution. */
|
|
51
64
|
export interface InvokeOptions {
|
|
52
65
|
/** Abort signal used to cooperatively cancel the call. */
|
|
@@ -61,6 +74,12 @@ export interface InvokeOptions {
|
|
|
61
74
|
tracestate?: string;
|
|
62
75
|
/** Scalar metadata exposed to handlers and telemetry sanitizers. */
|
|
63
76
|
metadata?: Record<string, JsonValue>;
|
|
77
|
+
/**
|
|
78
|
+
* Opt a workflow run into durable execution against the configured
|
|
79
|
+
* `.runtime(...)` (and optional `.workspaceStore(...)`). Workflow-only;
|
|
80
|
+
* supplying it on an agent run throws `ValidationError`.
|
|
81
|
+
*/
|
|
82
|
+
durable?: DurableInvokeOptions;
|
|
64
83
|
}
|
|
65
84
|
/** Canonical built-in tool names provided by the harness. */
|
|
66
85
|
export type BuiltinToolName = 'bash' | 'read' | 'write' | 'edit' | 'glob' | 'grep' | 'list';
|
|
@@ -330,6 +349,12 @@ export interface WorkflowContext<S extends BuilderState, I, O> {
|
|
|
330
349
|
metadata: Readonly<Record<string, JsonValue>>;
|
|
331
350
|
memory: MemoryFacade;
|
|
332
351
|
metrics: Metrics;
|
|
352
|
+
/**
|
|
353
|
+
* Runs `fn` as a durable step. Under a durable invocation the output is
|
|
354
|
+
* checkpointed and replayed on resume without re-running `fn`; otherwise it is
|
|
355
|
+
* a transparent pass-through. See spec 10 "Durable steps".
|
|
356
|
+
*/
|
|
357
|
+
step<T extends JsonValue>(stepId: string, fn: () => Promise<T>): Promise<T>;
|
|
333
358
|
output?: O;
|
|
334
359
|
}
|
|
335
360
|
/** Full context passed to custom agent handlers. */
|
|
@@ -713,4 +738,3 @@ export interface HarnessBuilder<S extends BuilderState = {}> {
|
|
|
713
738
|
* ```
|
|
714
739
|
*/
|
|
715
740
|
export declare function defineHarness(opts?: HarnessOptions): HarnessBuilder<{}>;
|
|
716
|
-
export {};
|