@purista/harness 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/agents/index.d.ts +1 -0
  2. package/dist/agents/index.js +278 -142
  3. package/dist/errors/catalog.d.ts +4 -3
  4. package/dist/harness/defineHarness.d.ts +26 -2
  5. package/dist/harness/defineHarness.js +51 -2
  6. package/dist/index.d.ts +1 -1
  7. package/dist/memory/sandbox/index.js +7 -1
  8. package/dist/models/registry.js +45 -3
  9. package/dist/ports/base-model-provider.js +2 -0
  10. package/dist/ports/capabilities.d.ts +2 -0
  11. package/dist/ports/harness-context.d.ts +1 -0
  12. package/dist/ports/model-provider.d.ts +4 -0
  13. package/dist/ports/state.d.ts +6 -0
  14. package/dist/runtime/abort.d.ts +5 -0
  15. package/dist/runtime/abort.js +33 -0
  16. package/dist/runtime/durable.d.ts +2 -0
  17. package/dist/runtime/durable.js +6 -2
  18. package/dist/runtime/sessionDurable.d.ts +49 -0
  19. package/dist/runtime/sessionDurable.js +135 -0
  20. package/dist/runtime/steps.d.ts +19 -1
  21. package/dist/runtime/steps.js +21 -3
  22. package/dist/sandbox/index.d.ts +34 -0
  23. package/dist/sandbox/index.js +40 -3
  24. package/dist/sessions/index.d.ts +15 -2
  25. package/dist/sessions/index.js +212 -99
  26. package/dist/skills/index.js +19 -6
  27. package/dist/state/in-memory.d.ts +1 -0
  28. package/dist/state/in-memory.js +15 -0
  29. package/dist/telemetry/shim.js +9 -4
  30. package/dist/testing/durableWorkspaceStoreContract.d.ts +1 -1
  31. package/dist/testing/durableWorkspaceStoreContract.js +64 -28
  32. package/dist/tools/index.d.ts +2 -0
  33. package/dist/tools/index.js +17 -2
  34. package/dist/tools/mcp/runner.js +11 -6
  35. package/dist/tools/mcp/stdio.js +170 -1
  36. package/dist/ulid/index.d.ts +6 -1
  37. package/dist/ulid/index.js +31 -13
  38. package/dist/version.d.ts +2 -0
  39. package/dist/version.js +2 -0
  40. package/dist/workflows/index.js +7 -1
  41. package/dist/workspace/in-memory.d.ts +9 -10
  42. package/dist/workspace/in-memory.js +191 -48
  43. package/package.json +1 -1
  44. package/dist/harness/errors.d.ts +0 -62
  45. package/dist/harness/errors.js +0 -67
@@ -26,6 +26,7 @@ export declare function runDefaultAgent(args: {
26
26
  maxSteps: number;
27
27
  signal: AbortSignal;
28
28
  toolTimeoutMs: number;
29
+ maxParallelToolCalls: number;
29
30
  logger: Logger;
30
31
  telemetry: TelemetryShim;
31
32
  emitEvent?: (event: RunEvent) => Promise<void>;
@@ -1,30 +1,92 @@
1
1
  import { z } from 'zod';
2
2
  import { ATTR_GEN_AI_AGENT_ID, ATTR_GEN_AI_AGENT_NAME, ATTR_GEN_AI_TOOL_CALL_ID, ATTR_GEN_AI_TOOL_NAME, ATTR_GEN_AI_TOOL_TYPE } from '@opentelemetry/semantic-conventions/incubating';
3
- import { AgentLoopBudgetError, HarnessConfigError, HarnessError, OperationCancelledError, OperationTimeoutError, PermissionDeniedError, ToolError, ToolNotFoundError, ValidationError, serializeError } from '../errors/index.js';
3
+ import { AgentLoopBudgetError, HarnessError, OperationCancelledError, OperationTimeoutError, PermissionDeniedError, SkillManifestError, ToolError, ToolNotFoundError, ValidationError, serializeError } from '../errors/index.js';
4
4
  import { createMetrics } from '../telemetry/index.js';
5
5
  import { buildSkillIndex, mountSkillsOnce } from '../skills/index.js';
6
6
  import { BUILTIN_ALIAS_TO_CANONICAL, getBuiltinToolSpecs, invokeBuiltinTool } from '../tools/index.js';
7
7
  import { getMcpToolSpecs, invokeMcpTool, isMcpToolDefinition } from '../tools/mcp/runner.js';
8
+ import { ulid } from '../ulid/index.js';
9
+ import { abortError, withAbortSignal } from '../runtime/abort.js';
8
10
  function stringifyInput(input) { return typeof input === 'string' ? input : JSON.stringify(input); }
9
11
  function isReadonlyBuiltin(name) { return ['read', 'list', 'glob', 'grep'].includes(name); }
10
12
  async function checkPermission(agentId, runId, sessionId, def, toolName, input) {
11
13
  if (isReadonlyBuiltin(toolName))
12
- return 'allow';
14
+ return { decision: 'allow' };
13
15
  const perm = def.permissions?.[toolName];
14
- const mode = typeof perm === 'string' ? perm : (perm && typeof perm === 'object' && 'mode' in perm ? perm.mode : 'allow');
16
+ const policy = normalizePermissionPolicy(perm);
17
+ const mode = policy.mode;
18
+ const target = permissionTarget(toolName, input);
19
+ if (target && matchesAnyPattern(target, policy.deny))
20
+ return { decision: 'deny', reason: 'mode_deny' };
21
+ if (policy.allow && policy.allow.length > 0 && (!target || !matchesAnyPattern(target, policy.allow))) {
22
+ return { decision: 'deny', reason: 'mode_deny' };
23
+ }
15
24
  if (mode === 'allow')
16
- return 'allow';
25
+ return { decision: 'allow' };
17
26
  if (mode === 'deny')
18
- return 'deny';
27
+ return { decision: 'deny', reason: 'mode_deny' };
19
28
  if (!def.onPermission)
20
- return 'deny';
29
+ return { decision: 'deny', reason: 'hook_deny' };
21
30
  try {
22
- return await def.onPermission({ toolName, input, agentId, runId, sessionId });
31
+ const decision = await def.onPermission({ toolName, input, agentId, runId, sessionId });
32
+ return decision === 'allow' ? { decision } : { decision, reason: 'hook_deny' };
23
33
  }
24
34
  catch {
25
35
  throw new PermissionDeniedError('Permission hook failed.', { tool_name: toolName, agent_id: agentId, reason: 'hook_failed' });
26
36
  }
27
37
  }
38
+ function normalizePermissionPolicy(perm) {
39
+ if (perm === 'allow' || perm === 'ask' || perm === 'deny')
40
+ return { mode: perm };
41
+ if (perm && typeof perm === 'object' && 'mode' in perm) {
42
+ const candidate = perm;
43
+ if (candidate.mode === 'allow' || candidate.mode === 'ask' || candidate.mode === 'deny') {
44
+ return {
45
+ mode: candidate.mode,
46
+ ...(Array.isArray(candidate.allow) ? { allow: candidate.allow.filter(isString) } : {}),
47
+ ...(Array.isArray(candidate.deny) ? { deny: candidate.deny.filter(isString) } : {})
48
+ };
49
+ }
50
+ }
51
+ return { mode: 'allow' };
52
+ }
53
+ function isString(value) {
54
+ return typeof value === 'string';
55
+ }
56
+ function permissionTarget(toolName, input) {
57
+ if (!input || typeof input !== 'object')
58
+ return undefined;
59
+ const record = input;
60
+ if (toolName === 'bash')
61
+ return typeof record['command'] === 'string' ? record['command'] : undefined;
62
+ if (toolName === 'write' || toolName === 'edit')
63
+ return typeof record['path'] === 'string' ? record['path'] : undefined;
64
+ return undefined;
65
+ }
66
+ function matchesAnyPattern(value, patterns) {
67
+ return patterns?.some((pattern) => globPatternToRegExp(pattern).test(value)) ?? false;
68
+ }
69
+ function globPatternToRegExp(pattern) {
70
+ let source = '^';
71
+ for (let index = 0; index < pattern.length; index += 1) {
72
+ const char = pattern[index];
73
+ if (char === '*') {
74
+ if (pattern[index + 1] === '*') {
75
+ source += '.*';
76
+ index += 1;
77
+ }
78
+ else {
79
+ source += '[^/]*';
80
+ }
81
+ continue;
82
+ }
83
+ source += escapeRegExp(char ?? '');
84
+ }
85
+ return new RegExp(`${source}$`);
86
+ }
87
+ function escapeRegExp(value) {
88
+ return value.replace(/[\\^$+?.()|[\]{}]/g, '\\$&');
89
+ }
28
90
  export async function runDefaultAgent(args) {
29
91
  const agentAttrs = {
30
92
  'harness.name': args.harnessName,
@@ -67,7 +129,8 @@ function metadataSpanAttrs(metadata) {
67
129
  return attrs;
68
130
  }
69
131
  async function runDefaultAgentInner(args) {
70
- args.signal.throwIfAborted();
132
+ if (args.signal.aborted)
133
+ throw abortError(args.signal, 'run', 'Run was cancelled.');
71
134
  const inputSchema = args.agent.input ?? z.string();
72
135
  const outputSchema = args.agent.output ?? z.string();
73
136
  const parsedInput = parseAgentSchema(inputSchema, args.input, 'agent_input');
@@ -78,7 +141,8 @@ async function runDefaultAgentInner(args) {
78
141
  await mountSkillsOnce(args.session, args.mountedSkills, args.skills, skillIds);
79
142
  const activatedSkills = new Set();
80
143
  if (args.agent.handler) {
81
- const output = await args.agent.handler({
144
+ const handler = args.agent.handler;
145
+ const output = await withAbortSignal(args.signal, 'run', 'Run was cancelled.', () => handler({
82
146
  input: parsedInput,
83
147
  signal: args.signal,
84
148
  models: args.models,
@@ -88,9 +152,9 @@ async function runDefaultAgentInner(args) {
88
152
  memory: args.memory,
89
153
  metadata: args.metadata ?? {},
90
154
  metrics: args.metrics
91
- });
155
+ }));
92
156
  const validated = parseAgentSchema(outputSchema, output, 'agent_output');
93
- return { output: validated, emitted: [{ id: `msg_${Date.now()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() }] };
157
+ return { output: validated, emitted: [{ id: `msg_${ulid()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() }] };
94
158
  }
95
159
  const baseInstructions = typeof args.agent.instructions === 'function'
96
160
  ? args.agent.instructions({ input: parsedInput, runId: args.runId, sessionId: args.sessionId, history: { list: async () => args.history }, memory: args.memory, metadata: args.metadata ?? {}, metrics: args.metrics })
@@ -98,10 +162,9 @@ async function runDefaultAgentInner(args) {
98
162
  const instructions = `${baseInstructions}${buildSkillIndex(args.skills, skillIds)}`;
99
163
  const enabledBuiltins = args.agent.builtinTools === false ? [] : args.agent.builtinTools?.slice() ?? ['bash', 'read', 'write', 'edit', 'glob', 'grep', 'list'];
100
164
  if (skillIds.length > 0 && !enabledBuiltins.includes('read')) {
101
- throw new HarnessConfigError('Agents with skills require the read built-in tool for skill activation.', {
165
+ throw new SkillManifestError('Agents with skills require the read built-in tool for skill activation.', {
102
166
  reason: 'skill_read_tool_missing',
103
- path: `agents.${args.agentId}.builtinTools`,
104
- id: args.agentId
167
+ agent_id: args.agentId
105
168
  });
106
169
  }
107
170
  const builtinSpecs = getBuiltinToolSpecs(enabledBuiltins, args.session);
@@ -130,117 +193,169 @@ async function runDefaultAgentInner(args) {
130
193
  const emitted = [];
131
194
  const maxSteps = Math.min(args.agent.maxSteps ?? args.maxSteps, 64);
132
195
  let steps = 0;
133
- while (true) {
134
- args.signal.throwIfAborted();
135
- if (steps >= maxSteps)
136
- throw new AgentLoopBudgetError('Agent loop budget exceeded.', { agent_id: args.agentId, reason: 'iterations_exceeded', limit: maxSteps });
137
- if (steps === 0)
138
- await args.emitEvent?.({ type: 'agent.started', runId: args.runId, agentId: args.agentId, at: new Date().toISOString() });
139
- const response = await model.object({
140
- messages: [
141
- { role: 'system', content: instructions },
142
- ...modelMessages
143
- ],
144
- tools: [...builtinSpecs, ...customSpecs],
145
- schema: z.toJSONSchema(outputSchema)
146
- }, args.signal, {
147
- harnessName: args.harnessName,
148
- sessionId: args.sessionId,
149
- runId: args.runId,
150
- ...(args.workflowId ? { workflowId: args.workflowId } : {}),
151
- agentId: args.agentId
152
- });
153
- const toolCalls = response.toolCalls ?? [];
154
- if (toolCalls.length === 0) {
155
- const validated = parseAgentSchema(outputSchema, response.object, 'agent_output');
156
- emitted.push({ id: `msg_${Date.now()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() });
157
- await args.emitEvent?.({ type: 'model.object', runId: args.runId, agentId: args.agentId, object: validated, usage: response.usage });
158
- await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), output: validated });
159
- return { output: validated, emitted };
160
- }
161
- const assistantMsg = {
162
- id: `msg_${Date.now()}_assistant`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: '', toolCalls,
163
- timestamp: new Date().toISOString()
164
- };
165
- emitted.push(assistantMsg);
166
- modelMessages.push({ role: 'assistant', content: assistantMsg.content, toolCalls });
167
- for (const call of toolCalls) {
168
- const canonical = BUILTIN_ALIAS_TO_CANONICAL[call.name] ?? call.name;
169
- const input = call.arguments;
170
- let result;
171
- try {
172
- args.signal.throwIfAborted();
173
- await args.emitEvent?.({ type: 'tool.started', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, input: input });
174
- const tool = args.customTools[canonical];
175
- const toolKind = canonical in BUILTIN_ALIAS_TO_CANONICAL ? 'builtin' : tool && isMcpToolDefinition(tool) ? tool.kind : 'ts';
176
- result = await withToolSpan(args, canonical, call.id, toolKind, tool && isMcpToolDefinition(tool) ? { server: canonical, upstreamTool: tool.tool, transport: tool.kind === 'mcp_stdio' ? 'stdio' : 'http' } : undefined, async () => {
177
- const decision = await checkPermission(args.agentId, args.runId, args.sessionId, args.agent, canonical, input);
178
- if (decision === 'deny') {
179
- throw new PermissionDeniedError('Permission denied.', { tool_name: canonical, agent_id: args.agentId, reason: 'hook_deny' });
180
- }
181
- if (canonical in BUILTIN_ALIAS_TO_CANONICAL) {
182
- const output = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => invokeBuiltinTool(canonical, input, withSandboxTelemetry(args, canonical), signal));
183
- if (canonical === 'read')
184
- markSkillActivation(input, args.skills, activatedSkills);
185
- return { output };
186
- }
187
- if (!enabledCustomTools.has(canonical)) {
188
- throw new ToolNotFoundError('Tool is not allowed for this agent.', { tool_id: canonical, where: 'agent_allowlist' });
189
- }
190
- if (!tool)
191
- throw new ToolNotFoundError('Tool was not found.', { tool_id: canonical, where: 'registry' });
192
- if (isMcpToolDefinition(tool)) {
193
- if (!args.mcpRegistry)
194
- throw new ToolNotFoundError('MCP registry is not available.', { tool_id: canonical, where: 'registry' });
195
- const registry = args.mcpRegistry;
196
- return { output: await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => invokeMcpTool(canonical, tool, input, { registry, signal, toolTimeoutMs: args.toolTimeoutMs, sandbox: withSandboxTelemetry(args, canonical), sandboxKey: args.sessionId })) };
197
- }
198
- if (tool.kind && tool.kind !== 'ts') {
199
- throw new ValidationError('Unsupported tool kind.', { where: 'tool_input', issues: { toolId: canonical, kind: tool.kind } });
200
- }
201
- const tsTool = tool;
202
- const parsed = tsTool.input.parse(input);
203
- const out = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => tsTool.handler({
204
- signal,
205
- sandbox: withSandboxTelemetry(args, canonical),
206
- logger: args.logger,
207
- telemetry: args.telemetry,
208
- metrics: createMetrics(args.telemetry, {
209
- 'harness.name': args.harnessName,
210
- 'harness.session.id': args.sessionId,
211
- 'harness.run.id': args.runId,
212
- ...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
213
- 'harness.agent.id': args.agentId,
214
- 'harness.tool.id': canonical
215
- }),
216
- memory: args.memory,
217
- runId: args.runId,
218
- sessionId: args.sessionId,
219
- agentId: args.agentId,
220
- toolId: canonical
221
- }, parsed));
222
- return { output: tsTool.output.parse(out) };
223
- });
224
- }
225
- catch (error) {
226
- result = { error: serializeError(normalizeToolFailure(canonical, error)) };
227
- }
228
- await args.emitEvent?.({ type: 'tool.finished', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, ...(result.output !== undefined ? { output: result.output } : {}), ...(result.error ? { error: result.error } : {}) });
229
- const toolMessage = {
230
- id: `msg_${Date.now()}_${call.id}`,
196
+ await args.emitEvent?.({ type: 'agent.started', runId: args.runId, agentId: args.agentId, at: new Date().toISOString() });
197
+ try {
198
+ while (true) {
199
+ if (args.signal.aborted)
200
+ throw abortError(args.signal, 'run', 'Run was cancelled.');
201
+ if (steps >= maxSteps)
202
+ throw new AgentLoopBudgetError('Agent loop budget exceeded.', { agent_id: args.agentId, reason: 'iterations_exceeded', limit: maxSteps });
203
+ const response = await model.object({
204
+ messages: [
205
+ { role: 'system', content: instructions },
206
+ ...modelMessages
207
+ ],
208
+ tools: [...builtinSpecs, ...customSpecs],
209
+ schema: z.toJSONSchema(outputSchema)
210
+ }, args.signal, {
211
+ harnessName: args.harnessName,
231
212
  sessionId: args.sessionId,
232
213
  runId: args.runId,
233
- role: 'tool',
234
- content: '',
235
- toolResults: [{ toolCallId: call.id, ...(result.output !== undefined ? { output: result.output } : {}), ...(result.error ? { error: result.error } : {}) }],
214
+ ...(args.workflowId ? { workflowId: args.workflowId } : {}),
215
+ agentId: args.agentId
216
+ });
217
+ // Emit one usage-bearing model event per model round-trip (including
218
+ // tool-call steps) so run-summary modelCalls and tokenTotals are accurate
219
+ // for multi-step runs.
220
+ await args.emitEvent?.({ type: 'model.object', runId: args.runId, agentId: args.agentId, object: (response.object ?? null), usage: response.usage });
221
+ const toolCalls = (response.toolCalls ?? []);
222
+ if (toolCalls.length === 0) {
223
+ const validated = parseAgentSchema(outputSchema, response.object, 'agent_output');
224
+ emitted.push({ id: `msg_${ulid()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() });
225
+ await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), output: validated });
226
+ return { output: validated, emitted };
227
+ }
228
+ const assistantMsg = {
229
+ id: `msg_${ulid()}_assistant`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: '', toolCalls,
236
230
  timestamp: new Date().toISOString()
237
231
  };
238
- emitted.push(toolMessage);
239
- modelMessages.push({ role: 'tool', toolCallId: call.id, content: JSON.stringify(result.output ?? result.error ?? {}) });
232
+ emitted.push(assistantMsg);
233
+ modelMessages.push({ role: 'assistant', content: assistantMsg.content, toolCalls });
234
+ args.metrics.histogram('harness.agent.tool_batch.size', toolCalls.length, {
235
+ 'harness.agent.tool_batch.max_parallel': args.maxParallelToolCalls
236
+ });
237
+ const outcomes = await runLimited(toolCalls, args.maxParallelToolCalls, (call) => executeToolCall({
238
+ ...args,
239
+ enabledCustomTools,
240
+ activatedSkills
241
+ }, call));
242
+ for (const outcome of outcomes) {
243
+ emitted.push(outcome.emitted);
244
+ modelMessages.push(outcome.modelMessage);
245
+ }
246
+ steps += 1;
240
247
  }
241
- steps += 1;
248
+ }
249
+ catch (error) {
250
+ // Pair every agent.started with an agent.finished, even on error/cancel/budget.
251
+ await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), error: serializeError(error) });
252
+ throw error;
242
253
  }
243
254
  }
255
+ async function runLimited(items, limit, fn) {
256
+ const concurrency = Math.max(1, Math.min(limit, items.length));
257
+ const results = new Array(items.length);
258
+ let next = 0;
259
+ async function worker() {
260
+ while (true) {
261
+ const index = next;
262
+ next += 1;
263
+ const item = items[index];
264
+ if (item === undefined)
265
+ return;
266
+ results[index] = await fn(item);
267
+ }
268
+ }
269
+ await Promise.all(Array.from({ length: concurrency }, () => worker()));
270
+ return results;
271
+ }
272
+ async function executeToolCall(args, call) {
273
+ const canonical = BUILTIN_ALIAS_TO_CANONICAL[call.name] ?? call.name;
274
+ const input = call.arguments;
275
+ const tool = args.customTools[canonical];
276
+ const toolKind = resolveToolKind(canonical, tool);
277
+ let result;
278
+ try {
279
+ if (args.signal.aborted)
280
+ throw abortError(args.signal, 'run', 'Run was cancelled.');
281
+ await args.emitEvent?.({ type: 'tool.started', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, input: input });
282
+ result = await withToolSpan(args, canonical, call.id, toolKind, tool && isMcpToolDefinition(tool) ? { server: canonical, upstreamTool: tool.tool, transport: tool.kind === 'mcp_stdio' ? 'stdio' : 'http' } : undefined, async () => {
283
+ const permission = await withToolSignal(args.signal, args.toolTimeoutMs, () => checkPermission(args.agentId, args.runId, args.sessionId, args.agent, canonical, input));
284
+ if (permission.decision === 'deny') {
285
+ throw new PermissionDeniedError('Permission denied.', { tool_name: canonical, agent_id: args.agentId, reason: permission.reason });
286
+ }
287
+ if (canonical in BUILTIN_ALIAS_TO_CANONICAL) {
288
+ const output = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => invokeBuiltinTool(canonical, input, withSandboxTelemetry(args, canonical), signal));
289
+ if (canonical === 'read')
290
+ markSkillActivation(input, args.skills, args.activatedSkills);
291
+ return { output };
292
+ }
293
+ if (!args.enabledCustomTools.has(canonical)) {
294
+ throw new ToolNotFoundError('Tool is not allowed for this agent.', { tool_id: canonical, where: 'agent_allowlist' });
295
+ }
296
+ if (!tool)
297
+ throw new ToolNotFoundError('Tool was not found.', { tool_id: canonical, where: 'registry' });
298
+ if (isMcpToolDefinition(tool)) {
299
+ if (!args.mcpRegistry)
300
+ throw new ToolNotFoundError('MCP registry is not available.', { tool_id: canonical, where: 'registry' });
301
+ const registry = args.mcpRegistry;
302
+ return { output: await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => invokeMcpTool(canonical, tool, input, { registry, signal, toolTimeoutMs: args.toolTimeoutMs, sandbox: withSandboxTelemetry(args, canonical), sandboxKey: args.sessionId })) };
303
+ }
304
+ if (tool.kind && tool.kind !== 'ts') {
305
+ throw new ValidationError('Unsupported tool kind.', { where: 'tool_input', issues: { toolId: canonical, kind: tool.kind } });
306
+ }
307
+ const parsed = tool.input.parse(input);
308
+ const out = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => tool.handler({
309
+ signal,
310
+ sandbox: withSandboxTelemetry(args, canonical),
311
+ logger: args.logger,
312
+ telemetry: args.telemetry,
313
+ metrics: createMetrics(args.telemetry, {
314
+ 'harness.name': args.harnessName,
315
+ 'harness.session.id': args.sessionId,
316
+ 'harness.run.id': args.runId,
317
+ ...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
318
+ 'harness.agent.id': args.agentId,
319
+ 'harness.tool.id': canonical
320
+ }),
321
+ memory: args.memory,
322
+ runId: args.runId,
323
+ sessionId: args.sessionId,
324
+ agentId: args.agentId,
325
+ toolId: canonical
326
+ }, parsed));
327
+ return { output: tool.output.parse(out) };
328
+ });
329
+ }
330
+ catch (error) {
331
+ const failure = normalizeToolFailure(canonical, error, toolKind);
332
+ if (failure instanceof OperationCancelledError) {
333
+ if (args.signal.aborted)
334
+ throw new OperationCancelledError('Run was cancelled.', { scope: 'run' }, args.signal.reason ?? failure);
335
+ throw failure;
336
+ }
337
+ result = { error: serializeError(failure) };
338
+ }
339
+ await args.emitEvent?.({ type: 'tool.finished', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, ...(result.output !== undefined ? { output: result.output } : {}), ...(result.error ? { error: result.error } : {}) });
340
+ const toolMessage = {
341
+ id: `msg_${ulid()}_${call.id}`,
342
+ sessionId: args.sessionId,
343
+ runId: args.runId,
344
+ role: 'tool',
345
+ content: '',
346
+ toolResults: [{ toolCallId: call.id, ...(result.output !== undefined ? { output: result.output } : {}), ...(result.error ? { error: result.error } : {}) }],
347
+ timestamp: new Date().toISOString()
348
+ };
349
+ return {
350
+ emitted: toolMessage,
351
+ modelMessage: { role: 'tool', toolCallId: call.id, content: JSON.stringify(result.output ?? result.error ?? {}) }
352
+ };
353
+ }
354
+ function resolveToolKind(toolId, tool) {
355
+ if (toolId in BUILTIN_ALIAS_TO_CANONICAL)
356
+ return 'builtin';
357
+ return tool && isMcpToolDefinition(tool) ? tool.kind : 'ts';
358
+ }
244
359
  function markSkillActivation(input, skills, activated) {
245
360
  if (!input || typeof input !== 'object')
246
361
  return;
@@ -255,31 +370,35 @@ function markSkillActivation(input, skills, activated) {
255
370
  }
256
371
  }
257
372
  async function withToolSignal(parent, timeoutMs, fn) {
258
- parent.throwIfAborted();
373
+ if (parent.aborted)
374
+ throw abortError(parent, 'run', 'Run was cancelled.');
259
375
  const controller = new AbortController();
260
376
  const relay = () => controller.abort(parent.reason);
261
377
  parent.addEventListener('abort', relay, { once: true });
378
+ if (parent.aborted)
379
+ relay();
380
+ let abortListener;
262
381
  const timeout = timeoutMs > 0
263
382
  ? setTimeout(() => controller.abort(new OperationTimeoutError('Tool execution timed out.', { scope: 'tool', timeout_ms: timeoutMs })), timeoutMs)
264
383
  : undefined;
265
- const timeoutPromise = timeoutMs > 0
266
- ? new Promise((_, reject) => {
267
- const check = () => {
268
- const reason = controller.signal.reason;
269
- reject(reason instanceof Error ? reason : new OperationCancelledError('Tool execution was cancelled.', { scope: 'tool' }, reason));
270
- };
271
- controller.signal.addEventListener('abort', check, { once: true });
272
- })
273
- : undefined;
384
+ const abortPromise = new Promise((_, reject) => {
385
+ abortListener = () => {
386
+ const reason = controller.signal.reason;
387
+ reject(reason instanceof Error ? reason : new OperationCancelledError('Tool execution was cancelled.', { scope: 'tool' }, reason));
388
+ };
389
+ controller.signal.addEventListener('abort', abortListener, { once: true });
390
+ });
274
391
  try {
275
392
  const operation = fn(controller.signal);
276
- return await (timeoutPromise ? Promise.race([operation, timeoutPromise]) : operation);
393
+ return await Promise.race([operation, abortPromise]);
277
394
  }
278
395
  catch (error) {
279
396
  if (controller.signal.aborted) {
280
397
  const reason = controller.signal.reason;
281
398
  if (reason instanceof OperationTimeoutError)
282
399
  throw reason;
400
+ if (reason instanceof OperationCancelledError)
401
+ throw reason;
283
402
  throw new OperationCancelledError('Tool execution was cancelled.', { scope: 'tool' }, reason ?? error);
284
403
  }
285
404
  throw error;
@@ -287,6 +406,8 @@ async function withToolSignal(parent, timeoutMs, fn) {
287
406
  finally {
288
407
  if (timeout)
289
408
  clearTimeout(timeout);
409
+ if (abortListener)
410
+ controller.signal.removeEventListener('abort', abortListener);
290
411
  parent.removeEventListener('abort', relay);
291
412
  }
292
413
  }
@@ -312,25 +433,34 @@ async function withToolSpan(args, toolId, callId, toolKind, mcpAttrs, fn) {
312
433
  } : {})
313
434
  };
314
435
  const started = Date.now();
436
+ let durationAttrs = {};
315
437
  const execute = async () => {
316
438
  try {
317
439
  const result = await fn();
318
- args.telemetry?.recordHistogram('harness.tool.duration', (Date.now() - started) / 1000, attrs);
319
440
  return result;
320
441
  }
321
442
  catch (error) {
322
- throw normalizeToolFailure(toolId, error);
443
+ const normalized = normalizeToolFailure(toolId, error, toolKind);
444
+ durationAttrs = {
445
+ 'harness.error.code': normalized.code,
446
+ 'harness.error.category': normalized.category,
447
+ 'harness.error.retriable': normalized.retriable
448
+ };
449
+ throw normalized;
450
+ }
451
+ finally {
452
+ args.telemetry?.recordHistogram('harness.tool.duration', (Date.now() - started) / 1000, { ...attrs, ...durationAttrs });
323
453
  }
324
454
  };
325
455
  return args.telemetry ? args.telemetry.span(`execute_tool ${toolId}`, attrs, execute) : execute();
326
456
  }
327
- function normalizeToolFailure(toolId, error) {
457
+ function normalizeToolFailure(toolId, error, toolKind = toolId in BUILTIN_ALIAS_TO_CANONICAL ? 'builtin' : 'ts') {
328
458
  if (error instanceof z.ZodError) {
329
459
  return new ValidationError('Tool input validation failed', { where: 'tool_input', issues: JSON.parse(JSON.stringify(error.issues)) });
330
460
  }
331
461
  if (error instanceof HarnessError)
332
462
  return error;
333
- return new ToolError('Tool execution failed.', { tool_id: toolId, tool_kind: toolId in BUILTIN_ALIAS_TO_CANONICAL ? 'builtin' : 'ts' }, error);
463
+ return new ToolError('Tool execution failed.', { tool_id: toolId, tool_kind: toolKind }, error);
334
464
  }
335
465
  function parseAgentSchema(schema, value, where) {
336
466
  try {
@@ -346,7 +476,15 @@ function parseAgentSchema(schema, value, where) {
346
476
  function withSandboxTelemetry(args, toolId) {
347
477
  if (!args.telemetry || args.session.executor === 'unavailable')
348
478
  return args.session;
349
- return {
479
+ const attrs = {
480
+ 'harness.name': args.harnessName,
481
+ 'harness.session.id': args.sessionId,
482
+ 'harness.run.id': args.runId,
483
+ ...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
484
+ 'harness.agent.id': args.agentId,
485
+ 'harness.tool.id': toolId
486
+ };
487
+ const wrapped = {
350
488
  ...args.session,
351
489
  executor: args.session.executor,
352
490
  read: args.session.read.bind(args.session),
@@ -358,14 +496,7 @@ function withSandboxTelemetry(args, toolId) {
358
496
  exists: args.session.exists.bind(args.session),
359
497
  mount: args.session.mount.bind(args.session),
360
498
  close: args.session.close.bind(args.session),
361
- exec: async (command, opts) => args.telemetry.span('harness.sandbox.exec', {
362
- 'harness.name': args.harnessName,
363
- 'harness.session.id': args.sessionId,
364
- 'harness.run.id': args.runId,
365
- ...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
366
- 'harness.agent.id': args.agentId,
367
- 'harness.tool.id': toolId
368
- }, async (span) => {
499
+ exec: async (command, opts) => args.telemetry.span('harness.sandbox.exec', attrs, async (span) => {
369
500
  const result = await args.session.exec(command, opts);
370
501
  span.setAttributes({
371
502
  'harness.exec.exit_code': result.exitCode,
@@ -374,4 +505,9 @@ function withSandboxTelemetry(args, toolId) {
374
505
  return result;
375
506
  })
376
507
  };
508
+ const spawn = args.session.spawn;
509
+ if (typeof spawn === 'function') {
510
+ wrapped.spawn = async (command, opts) => args.telemetry.span('harness.sandbox.spawn', attrs, async () => spawn.call(args.session, command, opts));
511
+ }
512
+ return wrapped;
377
513
  }
@@ -62,7 +62,7 @@ export declare class ModelError extends HarnessError {
62
62
  model: string;
63
63
  method: string;
64
64
  status?: number;
65
- reason?: 'http_error' | 'network' | 'unstructured_response' | 'malformed_response' | 'context_length_exceeded';
65
+ reason?: 'http_error' | 'network' | 'unstructured_response' | 'malformed_response' | 'context_length_exceeded' | 'embedding_count_mismatch' | 'rerank_result_mismatch';
66
66
  providerCode?: string;
67
67
  providerType?: string;
68
68
  providerParam?: string;
@@ -103,10 +103,11 @@ export declare class SkillNotFoundError extends HarnessError {
103
103
  /** Skill manifest/frontmatter/config validation failure. */
104
104
  export declare class SkillManifestError extends HarnessError {
105
105
  constructor(message: string, meta: {
106
- directory: string;
107
- reason: 'missing_skill_md' | 'invalid_frontmatter' | 'missing_description' | 'invalid_name' | 'name_mismatch' | 'directory_missing' | 'collision_shadowed' | 'untrusted_project_skill' | 'scan_limit_reached' | 'reserved_name';
106
+ directory?: string;
107
+ reason: 'missing_skill_md' | 'invalid_frontmatter' | 'missing_description' | 'invalid_name' | 'name_mismatch' | 'directory_missing' | 'reserved_name' | 'skill_not_declared' | 'skill_read_tool_missing' | 'skill_sandbox_unsupported' | 'untrusted_project_skill' | 'collision_shadowed' | 'scan_limit_reached';
108
108
  skill_id?: string;
109
109
  source?: string;
110
+ agent_id?: string;
110
111
  }, cause?: unknown);
111
112
  }
112
113
  /** Workflow referenced an unknown agent id. */
@@ -14,7 +14,7 @@ import { type Sandbox } from '../sandbox/index.js';
14
14
  import type { ModelHandle } from '../models/registry.js';
15
15
  import { type AdapterCapability, type DurableRuntimeAdapter, type HarnessInspection } from '../ports/capabilities.js';
16
16
  /** Stable harness version string for diagnostics and generated documentation. */
17
- export declare const HARNESS_VERSION = "0.0.0";
17
+ export { HARNESS_VERSION } from '../version.js';
18
18
  /** OpenTelemetry capture controls used by the harness. */
19
19
  export type TelemetryFlavor = 'dual' | 'gen_ai_only' | 'openinference_only';
20
20
  export type ContentCaptureMode = 'NO_CONTENT' | 'SPAN_ONLY' | 'EVENT_ONLY' | 'SPAN_AND_EVENT';
@@ -36,6 +36,8 @@ export interface HarnessDefaults {
36
36
  skillTimeoutMs?: number;
37
37
  /** Per-model timeout in milliseconds. Default: `300_000`. */
38
38
  modelTimeoutMs?: number;
39
+ /** Maximum tool calls from one model response executed at the same time. Default: `8`. */
40
+ maxParallelToolCalls?: number;
39
41
  /**
40
42
  * Max non-system messages forwarded into model calls.
41
43
  * `undefined` keeps all history, `0` keeps only system messages.
@@ -47,6 +49,17 @@ export interface HarnessOptions {
47
49
  /** Optional harness name for logs, telemetry, and diagnostics. Default: `agent-harness`. */
48
50
  name?: string;
49
51
  }
52
+ /** Durable execution opt-in for a single workflow call. */
53
+ export interface DurableInvokeOptions {
54
+ /** Stable run id reused across resumes/retries. Matches `/^[A-Za-z0-9_.:-]{1,200}$/`. */
55
+ runId: string;
56
+ /** Worker/process id owning the durable lease. Defaults to the harness worker id. */
57
+ workerId?: string;
58
+ /** Initial durable step id label. Defaults to the workflow id. */
59
+ stepId?: string;
60
+ /** Optional attempt hint; the runtime may raise it on retry. */
61
+ attempt?: number;
62
+ }
50
63
  /** Shared invoke options for workflow and agent execution. */
51
64
  export interface InvokeOptions {
52
65
  /** Abort signal used to cooperatively cancel the call. */
@@ -61,6 +74,12 @@ export interface InvokeOptions {
61
74
  tracestate?: string;
62
75
  /** Scalar metadata exposed to handlers and telemetry sanitizers. */
63
76
  metadata?: Record<string, JsonValue>;
77
+ /**
78
+ * Opt a workflow run into durable execution against the configured
79
+ * `.runtime(...)` (and optional `.workspaceStore(...)`). Workflow-only;
80
+ * supplying it on an agent run throws `ValidationError`.
81
+ */
82
+ durable?: DurableInvokeOptions;
64
83
  }
65
84
  /** Canonical built-in tool names provided by the harness. */
66
85
  export type BuiltinToolName = 'bash' | 'read' | 'write' | 'edit' | 'glob' | 'grep' | 'list';
@@ -330,6 +349,12 @@ export interface WorkflowContext<S extends BuilderState, I, O> {
330
349
  metadata: Readonly<Record<string, JsonValue>>;
331
350
  memory: MemoryFacade;
332
351
  metrics: Metrics;
352
+ /**
353
+ * Runs `fn` as a durable step. Under a durable invocation the output is
354
+ * checkpointed and replayed on resume without re-running `fn`; otherwise it is
355
+ * a transparent pass-through. See spec 10 "Durable steps".
356
+ */
357
+ step<T extends JsonValue>(stepId: string, fn: () => Promise<T>): Promise<T>;
333
358
  output?: O;
334
359
  }
335
360
  /** Full context passed to custom agent handlers. */
@@ -713,4 +738,3 @@ export interface HarnessBuilder<S extends BuilderState = {}> {
713
738
  * ```
714
739
  */
715
740
  export declare function defineHarness(opts?: HarnessOptions): HarnessBuilder<{}>;
716
- export {};