@purista/harness 1.2.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/agents/index.d.ts +1 -0
  2. package/dist/agents/index.js +276 -141
  3. package/dist/errors/catalog.d.ts +4 -3
  4. package/dist/harness/defineHarness.d.ts +26 -2
  5. package/dist/harness/defineHarness.js +51 -2
  6. package/dist/index.d.ts +1 -1
  7. package/dist/memory/sandbox/index.js +7 -1
  8. package/dist/models/registry.js +45 -3
  9. package/dist/ports/base-model-provider.js +2 -0
  10. package/dist/ports/capabilities.d.ts +2 -0
  11. package/dist/ports/harness-context.d.ts +1 -0
  12. package/dist/ports/model-provider.d.ts +4 -0
  13. package/dist/ports/state.d.ts +6 -0
  14. package/dist/runtime/abort.d.ts +5 -0
  15. package/dist/runtime/abort.js +33 -0
  16. package/dist/runtime/durable.d.ts +2 -0
  17. package/dist/runtime/durable.js +6 -2
  18. package/dist/runtime/sessionDurable.d.ts +49 -0
  19. package/dist/runtime/sessionDurable.js +135 -0
  20. package/dist/runtime/steps.d.ts +19 -1
  21. package/dist/runtime/steps.js +21 -3
  22. package/dist/sandbox/index.d.ts +34 -0
  23. package/dist/sandbox/index.js +40 -3
  24. package/dist/sessions/index.d.ts +15 -2
  25. package/dist/sessions/index.js +212 -99
  26. package/dist/skills/index.js +19 -6
  27. package/dist/state/in-memory.d.ts +1 -0
  28. package/dist/state/in-memory.js +15 -0
  29. package/dist/telemetry/shim.js +9 -4
  30. package/dist/testing/durableWorkspaceStoreContract.d.ts +1 -1
  31. package/dist/testing/durableWorkspaceStoreContract.js +64 -28
  32. package/dist/tools/index.d.ts +2 -0
  33. package/dist/tools/index.js +15 -1
  34. package/dist/tools/mcp/runner.js +11 -6
  35. package/dist/tools/mcp/stdio.js +170 -1
  36. package/dist/ulid/index.d.ts +6 -1
  37. package/dist/ulid/index.js +31 -13
  38. package/dist/version.d.ts +2 -0
  39. package/dist/version.js +2 -0
  40. package/dist/workflows/index.js +7 -1
  41. package/dist/workspace/in-memory.d.ts +9 -10
  42. package/dist/workspace/in-memory.js +191 -48
  43. package/package.json +1 -1
  44. package/dist/harness/errors.d.ts +0 -62
  45. package/dist/harness/errors.js +0 -67
@@ -26,6 +26,7 @@ export declare function runDefaultAgent(args: {
26
26
  maxSteps: number;
27
27
  signal: AbortSignal;
28
28
  toolTimeoutMs: number;
29
+ maxParallelToolCalls: number;
29
30
  logger: Logger;
30
31
  telemetry: TelemetryShim;
31
32
  emitEvent?: (event: RunEvent) => Promise<void>;
@@ -1,31 +1,92 @@
1
1
  import { z } from 'zod';
2
2
  import { ATTR_GEN_AI_AGENT_ID, ATTR_GEN_AI_AGENT_NAME, ATTR_GEN_AI_TOOL_CALL_ID, ATTR_GEN_AI_TOOL_NAME, ATTR_GEN_AI_TOOL_TYPE } from '@opentelemetry/semantic-conventions/incubating';
3
- import { AgentLoopBudgetError, HarnessConfigError, HarnessError, OperationCancelledError, OperationTimeoutError, PermissionDeniedError, ToolError, ToolNotFoundError, ValidationError, serializeError } from '../errors/index.js';
3
+ import { AgentLoopBudgetError, HarnessError, OperationCancelledError, OperationTimeoutError, PermissionDeniedError, SkillManifestError, ToolError, ToolNotFoundError, ValidationError, serializeError } from '../errors/index.js';
4
4
  import { createMetrics } from '../telemetry/index.js';
5
5
  import { buildSkillIndex, mountSkillsOnce } from '../skills/index.js';
6
6
  import { BUILTIN_ALIAS_TO_CANONICAL, getBuiltinToolSpecs, invokeBuiltinTool } from '../tools/index.js';
7
7
  import { getMcpToolSpecs, invokeMcpTool, isMcpToolDefinition } from '../tools/mcp/runner.js';
8
8
  import { ulid } from '../ulid/index.js';
9
+ import { abortError, withAbortSignal } from '../runtime/abort.js';
9
10
  function stringifyInput(input) { return typeof input === 'string' ? input : JSON.stringify(input); }
10
11
  function isReadonlyBuiltin(name) { return ['read', 'list', 'glob', 'grep'].includes(name); }
11
12
  async function checkPermission(agentId, runId, sessionId, def, toolName, input) {
12
13
  if (isReadonlyBuiltin(toolName))
13
- return 'allow';
14
+ return { decision: 'allow' };
14
15
  const perm = def.permissions?.[toolName];
15
- const mode = typeof perm === 'string' ? perm : (perm && typeof perm === 'object' && 'mode' in perm ? perm.mode : 'allow');
16
+ const policy = normalizePermissionPolicy(perm);
17
+ const mode = policy.mode;
18
+ const target = permissionTarget(toolName, input);
19
+ if (target && matchesAnyPattern(target, policy.deny))
20
+ return { decision: 'deny', reason: 'mode_deny' };
21
+ if (policy.allow && policy.allow.length > 0 && (!target || !matchesAnyPattern(target, policy.allow))) {
22
+ return { decision: 'deny', reason: 'mode_deny' };
23
+ }
16
24
  if (mode === 'allow')
17
- return 'allow';
25
+ return { decision: 'allow' };
18
26
  if (mode === 'deny')
19
- return 'deny';
27
+ return { decision: 'deny', reason: 'mode_deny' };
20
28
  if (!def.onPermission)
21
- return 'deny';
29
+ return { decision: 'deny', reason: 'hook_deny' };
22
30
  try {
23
- return await def.onPermission({ toolName, input, agentId, runId, sessionId });
31
+ const decision = await def.onPermission({ toolName, input, agentId, runId, sessionId });
32
+ return decision === 'allow' ? { decision } : { decision, reason: 'hook_deny' };
24
33
  }
25
34
  catch {
26
35
  throw new PermissionDeniedError('Permission hook failed.', { tool_name: toolName, agent_id: agentId, reason: 'hook_failed' });
27
36
  }
28
37
  }
38
+ function normalizePermissionPolicy(perm) {
39
+ if (perm === 'allow' || perm === 'ask' || perm === 'deny')
40
+ return { mode: perm };
41
+ if (perm && typeof perm === 'object' && 'mode' in perm) {
42
+ const candidate = perm;
43
+ if (candidate.mode === 'allow' || candidate.mode === 'ask' || candidate.mode === 'deny') {
44
+ return {
45
+ mode: candidate.mode,
46
+ ...(Array.isArray(candidate.allow) ? { allow: candidate.allow.filter(isString) } : {}),
47
+ ...(Array.isArray(candidate.deny) ? { deny: candidate.deny.filter(isString) } : {})
48
+ };
49
+ }
50
+ }
51
+ return { mode: 'allow' };
52
+ }
53
+ function isString(value) {
54
+ return typeof value === 'string';
55
+ }
56
+ function permissionTarget(toolName, input) {
57
+ if (!input || typeof input !== 'object')
58
+ return undefined;
59
+ const record = input;
60
+ if (toolName === 'bash')
61
+ return typeof record['command'] === 'string' ? record['command'] : undefined;
62
+ if (toolName === 'write' || toolName === 'edit')
63
+ return typeof record['path'] === 'string' ? record['path'] : undefined;
64
+ return undefined;
65
+ }
66
+ function matchesAnyPattern(value, patterns) {
67
+ return patterns?.some((pattern) => globPatternToRegExp(pattern).test(value)) ?? false;
68
+ }
69
+ function globPatternToRegExp(pattern) {
70
+ let source = '^';
71
+ for (let index = 0; index < pattern.length; index += 1) {
72
+ const char = pattern[index];
73
+ if (char === '*') {
74
+ if (pattern[index + 1] === '*') {
75
+ source += '.*';
76
+ index += 1;
77
+ }
78
+ else {
79
+ source += '[^/]*';
80
+ }
81
+ continue;
82
+ }
83
+ source += escapeRegExp(char ?? '');
84
+ }
85
+ return new RegExp(`${source}$`);
86
+ }
87
+ function escapeRegExp(value) {
88
+ return value.replace(/[\\^$+?.()|[\]{}]/g, '\\$&');
89
+ }
29
90
  export async function runDefaultAgent(args) {
30
91
  const agentAttrs = {
31
92
  'harness.name': args.harnessName,
@@ -68,7 +129,8 @@ function metadataSpanAttrs(metadata) {
68
129
  return attrs;
69
130
  }
70
131
  async function runDefaultAgentInner(args) {
71
- args.signal.throwIfAborted();
132
+ if (args.signal.aborted)
133
+ throw abortError(args.signal, 'run', 'Run was cancelled.');
72
134
  const inputSchema = args.agent.input ?? z.string();
73
135
  const outputSchema = args.agent.output ?? z.string();
74
136
  const parsedInput = parseAgentSchema(inputSchema, args.input, 'agent_input');
@@ -79,7 +141,8 @@ async function runDefaultAgentInner(args) {
79
141
  await mountSkillsOnce(args.session, args.mountedSkills, args.skills, skillIds);
80
142
  const activatedSkills = new Set();
81
143
  if (args.agent.handler) {
82
- const output = await args.agent.handler({
144
+ const handler = args.agent.handler;
145
+ const output = await withAbortSignal(args.signal, 'run', 'Run was cancelled.', () => handler({
83
146
  input: parsedInput,
84
147
  signal: args.signal,
85
148
  models: args.models,
@@ -89,7 +152,7 @@ async function runDefaultAgentInner(args) {
89
152
  memory: args.memory,
90
153
  metadata: args.metadata ?? {},
91
154
  metrics: args.metrics
92
- });
155
+ }));
93
156
  const validated = parseAgentSchema(outputSchema, output, 'agent_output');
94
157
  return { output: validated, emitted: [{ id: `msg_${ulid()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() }] };
95
158
  }
@@ -99,10 +162,9 @@ async function runDefaultAgentInner(args) {
99
162
  const instructions = `${baseInstructions}${buildSkillIndex(args.skills, skillIds)}`;
100
163
  const enabledBuiltins = args.agent.builtinTools === false ? [] : args.agent.builtinTools?.slice() ?? ['bash', 'read', 'write', 'edit', 'glob', 'grep', 'list'];
101
164
  if (skillIds.length > 0 && !enabledBuiltins.includes('read')) {
102
- throw new HarnessConfigError('Agents with skills require the read built-in tool for skill activation.', {
165
+ throw new SkillManifestError('Agents with skills require the read built-in tool for skill activation.', {
103
166
  reason: 'skill_read_tool_missing',
104
- path: `agents.${args.agentId}.builtinTools`,
105
- id: args.agentId
167
+ agent_id: args.agentId
106
168
  });
107
169
  }
108
170
  const builtinSpecs = getBuiltinToolSpecs(enabledBuiltins, args.session);
@@ -131,117 +193,169 @@ async function runDefaultAgentInner(args) {
131
193
  const emitted = [];
132
194
  const maxSteps = Math.min(args.agent.maxSteps ?? args.maxSteps, 64);
133
195
  let steps = 0;
134
- while (true) {
135
- args.signal.throwIfAborted();
136
- if (steps >= maxSteps)
137
- throw new AgentLoopBudgetError('Agent loop budget exceeded.', { agent_id: args.agentId, reason: 'iterations_exceeded', limit: maxSteps });
138
- if (steps === 0)
139
- await args.emitEvent?.({ type: 'agent.started', runId: args.runId, agentId: args.agentId, at: new Date().toISOString() });
140
- const response = await model.object({
141
- messages: [
142
- { role: 'system', content: instructions },
143
- ...modelMessages
144
- ],
145
- tools: [...builtinSpecs, ...customSpecs],
146
- schema: z.toJSONSchema(outputSchema)
147
- }, args.signal, {
148
- harnessName: args.harnessName,
149
- sessionId: args.sessionId,
150
- runId: args.runId,
151
- ...(args.workflowId ? { workflowId: args.workflowId } : {}),
152
- agentId: args.agentId
153
- });
154
- const toolCalls = response.toolCalls ?? [];
155
- if (toolCalls.length === 0) {
156
- const validated = parseAgentSchema(outputSchema, response.object, 'agent_output');
157
- emitted.push({ id: `msg_${ulid()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() });
158
- await args.emitEvent?.({ type: 'model.object', runId: args.runId, agentId: args.agentId, object: validated, usage: response.usage });
159
- await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), output: validated });
160
- return { output: validated, emitted };
161
- }
162
- const assistantMsg = {
163
- id: `msg_${ulid()}_assistant`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: '', toolCalls,
164
- timestamp: new Date().toISOString()
165
- };
166
- emitted.push(assistantMsg);
167
- modelMessages.push({ role: 'assistant', content: assistantMsg.content, toolCalls });
168
- for (const call of toolCalls) {
169
- const canonical = BUILTIN_ALIAS_TO_CANONICAL[call.name] ?? call.name;
170
- const input = call.arguments;
171
- let result;
172
- try {
173
- args.signal.throwIfAborted();
174
- await args.emitEvent?.({ type: 'tool.started', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, input: input });
175
- const tool = args.customTools[canonical];
176
- const toolKind = canonical in BUILTIN_ALIAS_TO_CANONICAL ? 'builtin' : tool && isMcpToolDefinition(tool) ? tool.kind : 'ts';
177
- result = await withToolSpan(args, canonical, call.id, toolKind, tool && isMcpToolDefinition(tool) ? { server: canonical, upstreamTool: tool.tool, transport: tool.kind === 'mcp_stdio' ? 'stdio' : 'http' } : undefined, async () => {
178
- const decision = await checkPermission(args.agentId, args.runId, args.sessionId, args.agent, canonical, input);
179
- if (decision === 'deny') {
180
- throw new PermissionDeniedError('Permission denied.', { tool_name: canonical, agent_id: args.agentId, reason: 'hook_deny' });
181
- }
182
- if (canonical in BUILTIN_ALIAS_TO_CANONICAL) {
183
- const output = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => invokeBuiltinTool(canonical, input, withSandboxTelemetry(args, canonical), signal));
184
- if (canonical === 'read')
185
- markSkillActivation(input, args.skills, activatedSkills);
186
- return { output };
187
- }
188
- if (!enabledCustomTools.has(canonical)) {
189
- throw new ToolNotFoundError('Tool is not allowed for this agent.', { tool_id: canonical, where: 'agent_allowlist' });
190
- }
191
- if (!tool)
192
- throw new ToolNotFoundError('Tool was not found.', { tool_id: canonical, where: 'registry' });
193
- if (isMcpToolDefinition(tool)) {
194
- if (!args.mcpRegistry)
195
- throw new ToolNotFoundError('MCP registry is not available.', { tool_id: canonical, where: 'registry' });
196
- const registry = args.mcpRegistry;
197
- return { output: await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => invokeMcpTool(canonical, tool, input, { registry, signal, toolTimeoutMs: args.toolTimeoutMs, sandbox: withSandboxTelemetry(args, canonical), sandboxKey: args.sessionId })) };
198
- }
199
- if (tool.kind && tool.kind !== 'ts') {
200
- throw new ValidationError('Unsupported tool kind.', { where: 'tool_input', issues: { toolId: canonical, kind: tool.kind } });
201
- }
202
- const tsTool = tool;
203
- const parsed = tsTool.input.parse(input);
204
- const out = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => tsTool.handler({
205
- signal,
206
- sandbox: withSandboxTelemetry(args, canonical),
207
- logger: args.logger,
208
- telemetry: args.telemetry,
209
- metrics: createMetrics(args.telemetry, {
210
- 'harness.name': args.harnessName,
211
- 'harness.session.id': args.sessionId,
212
- 'harness.run.id': args.runId,
213
- ...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
214
- 'harness.agent.id': args.agentId,
215
- 'harness.tool.id': canonical
216
- }),
217
- memory: args.memory,
218
- runId: args.runId,
219
- sessionId: args.sessionId,
220
- agentId: args.agentId,
221
- toolId: canonical
222
- }, parsed));
223
- return { output: tsTool.output.parse(out) };
224
- });
225
- }
226
- catch (error) {
227
- result = { error: serializeError(normalizeToolFailure(canonical, error)) };
228
- }
229
- await args.emitEvent?.({ type: 'tool.finished', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, ...(result.output !== undefined ? { output: result.output } : {}), ...(result.error ? { error: result.error } : {}) });
230
- const toolMessage = {
231
- id: `msg_${ulid()}_${call.id}`,
196
+ await args.emitEvent?.({ type: 'agent.started', runId: args.runId, agentId: args.agentId, at: new Date().toISOString() });
197
+ try {
198
+ while (true) {
199
+ if (args.signal.aborted)
200
+ throw abortError(args.signal, 'run', 'Run was cancelled.');
201
+ if (steps >= maxSteps)
202
+ throw new AgentLoopBudgetError('Agent loop budget exceeded.', { agent_id: args.agentId, reason: 'iterations_exceeded', limit: maxSteps });
203
+ const response = await model.object({
204
+ messages: [
205
+ { role: 'system', content: instructions },
206
+ ...modelMessages
207
+ ],
208
+ tools: [...builtinSpecs, ...customSpecs],
209
+ schema: z.toJSONSchema(outputSchema)
210
+ }, args.signal, {
211
+ harnessName: args.harnessName,
232
212
  sessionId: args.sessionId,
233
213
  runId: args.runId,
234
- role: 'tool',
235
- content: '',
236
- toolResults: [{ toolCallId: call.id, ...(result.output !== undefined ? { output: result.output } : {}), ...(result.error ? { error: result.error } : {}) }],
214
+ ...(args.workflowId ? { workflowId: args.workflowId } : {}),
215
+ agentId: args.agentId
216
+ });
217
+ // Emit one usage-bearing model event per model round-trip (including
218
+ // tool-call steps) so run-summary modelCalls and tokenTotals are accurate
219
+ // for multi-step runs.
220
+ await args.emitEvent?.({ type: 'model.object', runId: args.runId, agentId: args.agentId, object: (response.object ?? null), usage: response.usage });
221
+ const toolCalls = (response.toolCalls ?? []);
222
+ if (toolCalls.length === 0) {
223
+ const validated = parseAgentSchema(outputSchema, response.object, 'agent_output');
224
+ emitted.push({ id: `msg_${ulid()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() });
225
+ await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), output: validated });
226
+ return { output: validated, emitted };
227
+ }
228
+ const assistantMsg = {
229
+ id: `msg_${ulid()}_assistant`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: '', toolCalls,
237
230
  timestamp: new Date().toISOString()
238
231
  };
239
- emitted.push(toolMessage);
240
- modelMessages.push({ role: 'tool', toolCallId: call.id, content: JSON.stringify(result.output ?? result.error ?? {}) });
232
+ emitted.push(assistantMsg);
233
+ modelMessages.push({ role: 'assistant', content: assistantMsg.content, toolCalls });
234
+ args.metrics.histogram('harness.agent.tool_batch.size', toolCalls.length, {
235
+ 'harness.agent.tool_batch.max_parallel': args.maxParallelToolCalls
236
+ });
237
+ const outcomes = await runLimited(toolCalls, args.maxParallelToolCalls, (call) => executeToolCall({
238
+ ...args,
239
+ enabledCustomTools,
240
+ activatedSkills
241
+ }, call));
242
+ for (const outcome of outcomes) {
243
+ emitted.push(outcome.emitted);
244
+ modelMessages.push(outcome.modelMessage);
245
+ }
246
+ steps += 1;
241
247
  }
242
- steps += 1;
248
+ }
249
+ catch (error) {
250
+ // Pair every agent.started with an agent.finished, even on error/cancel/budget.
251
+ await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), error: serializeError(error) });
252
+ throw error;
243
253
  }
244
254
  }
255
+ async function runLimited(items, limit, fn) {
256
+ const concurrency = Math.max(1, Math.min(limit, items.length));
257
+ const results = new Array(items.length);
258
+ let next = 0;
259
+ async function worker() {
260
+ while (true) {
261
+ const index = next;
262
+ next += 1;
263
+ const item = items[index];
264
+ if (item === undefined)
265
+ return;
266
+ results[index] = await fn(item);
267
+ }
268
+ }
269
+ await Promise.all(Array.from({ length: concurrency }, () => worker()));
270
+ return results;
271
+ }
272
+ async function executeToolCall(args, call) {
273
+ const canonical = BUILTIN_ALIAS_TO_CANONICAL[call.name] ?? call.name;
274
+ const input = call.arguments;
275
+ const tool = args.customTools[canonical];
276
+ const toolKind = resolveToolKind(canonical, tool);
277
+ let result;
278
+ try {
279
+ if (args.signal.aborted)
280
+ throw abortError(args.signal, 'run', 'Run was cancelled.');
281
+ await args.emitEvent?.({ type: 'tool.started', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, input: input });
282
+ result = await withToolSpan(args, canonical, call.id, toolKind, tool && isMcpToolDefinition(tool) ? { server: canonical, upstreamTool: tool.tool, transport: tool.kind === 'mcp_stdio' ? 'stdio' : 'http' } : undefined, async () => {
283
+ const permission = await withToolSignal(args.signal, args.toolTimeoutMs, () => checkPermission(args.agentId, args.runId, args.sessionId, args.agent, canonical, input));
284
+ if (permission.decision === 'deny') {
285
+ throw new PermissionDeniedError('Permission denied.', { tool_name: canonical, agent_id: args.agentId, reason: permission.reason });
286
+ }
287
+ if (canonical in BUILTIN_ALIAS_TO_CANONICAL) {
288
+ const output = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => invokeBuiltinTool(canonical, input, withSandboxTelemetry(args, canonical), signal));
289
+ if (canonical === 'read')
290
+ markSkillActivation(input, args.skills, args.activatedSkills);
291
+ return { output };
292
+ }
293
+ if (!args.enabledCustomTools.has(canonical)) {
294
+ throw new ToolNotFoundError('Tool is not allowed for this agent.', { tool_id: canonical, where: 'agent_allowlist' });
295
+ }
296
+ if (!tool)
297
+ throw new ToolNotFoundError('Tool was not found.', { tool_id: canonical, where: 'registry' });
298
+ if (isMcpToolDefinition(tool)) {
299
+ if (!args.mcpRegistry)
300
+ throw new ToolNotFoundError('MCP registry is not available.', { tool_id: canonical, where: 'registry' });
301
+ const registry = args.mcpRegistry;
302
+ return { output: await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => invokeMcpTool(canonical, tool, input, { registry, signal, toolTimeoutMs: args.toolTimeoutMs, sandbox: withSandboxTelemetry(args, canonical), sandboxKey: args.sessionId })) };
303
+ }
304
+ if (tool.kind && tool.kind !== 'ts') {
305
+ throw new ValidationError('Unsupported tool kind.', { where: 'tool_input', issues: { toolId: canonical, kind: tool.kind } });
306
+ }
307
+ const parsed = tool.input.parse(input);
308
+ const out = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => tool.handler({
309
+ signal,
310
+ sandbox: withSandboxTelemetry(args, canonical),
311
+ logger: args.logger,
312
+ telemetry: args.telemetry,
313
+ metrics: createMetrics(args.telemetry, {
314
+ 'harness.name': args.harnessName,
315
+ 'harness.session.id': args.sessionId,
316
+ 'harness.run.id': args.runId,
317
+ ...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
318
+ 'harness.agent.id': args.agentId,
319
+ 'harness.tool.id': canonical
320
+ }),
321
+ memory: args.memory,
322
+ runId: args.runId,
323
+ sessionId: args.sessionId,
324
+ agentId: args.agentId,
325
+ toolId: canonical
326
+ }, parsed));
327
+ return { output: tool.output.parse(out) };
328
+ });
329
+ }
330
+ catch (error) {
331
+ const failure = normalizeToolFailure(canonical, error, toolKind);
332
+ if (failure instanceof OperationCancelledError) {
333
+ if (args.signal.aborted)
334
+ throw new OperationCancelledError('Run was cancelled.', { scope: 'run' }, args.signal.reason ?? failure);
335
+ throw failure;
336
+ }
337
+ result = { error: serializeError(failure) };
338
+ }
339
+ await args.emitEvent?.({ type: 'tool.finished', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, ...(result.output !== undefined ? { output: result.output } : {}), ...(result.error ? { error: result.error } : {}) });
340
+ const toolMessage = {
341
+ id: `msg_${ulid()}_${call.id}`,
342
+ sessionId: args.sessionId,
343
+ runId: args.runId,
344
+ role: 'tool',
345
+ content: '',
346
+ toolResults: [{ toolCallId: call.id, ...(result.output !== undefined ? { output: result.output } : {}), ...(result.error ? { error: result.error } : {}) }],
347
+ timestamp: new Date().toISOString()
348
+ };
349
+ return {
350
+ emitted: toolMessage,
351
+ modelMessage: { role: 'tool', toolCallId: call.id, content: JSON.stringify(result.output ?? result.error ?? {}) }
352
+ };
353
+ }
354
+ function resolveToolKind(toolId, tool) {
355
+ if (toolId in BUILTIN_ALIAS_TO_CANONICAL)
356
+ return 'builtin';
357
+ return tool && isMcpToolDefinition(tool) ? tool.kind : 'ts';
358
+ }
245
359
  function markSkillActivation(input, skills, activated) {
246
360
  if (!input || typeof input !== 'object')
247
361
  return;
@@ -256,31 +370,35 @@ function markSkillActivation(input, skills, activated) {
256
370
  }
257
371
  }
258
372
  async function withToolSignal(parent, timeoutMs, fn) {
259
- parent.throwIfAborted();
373
+ if (parent.aborted)
374
+ throw abortError(parent, 'run', 'Run was cancelled.');
260
375
  const controller = new AbortController();
261
376
  const relay = () => controller.abort(parent.reason);
262
377
  parent.addEventListener('abort', relay, { once: true });
378
+ if (parent.aborted)
379
+ relay();
380
+ let abortListener;
263
381
  const timeout = timeoutMs > 0
264
382
  ? setTimeout(() => controller.abort(new OperationTimeoutError('Tool execution timed out.', { scope: 'tool', timeout_ms: timeoutMs })), timeoutMs)
265
383
  : undefined;
266
- const timeoutPromise = timeoutMs > 0
267
- ? new Promise((_, reject) => {
268
- const check = () => {
269
- const reason = controller.signal.reason;
270
- reject(reason instanceof Error ? reason : new OperationCancelledError('Tool execution was cancelled.', { scope: 'tool' }, reason));
271
- };
272
- controller.signal.addEventListener('abort', check, { once: true });
273
- })
274
- : undefined;
384
+ const abortPromise = new Promise((_, reject) => {
385
+ abortListener = () => {
386
+ const reason = controller.signal.reason;
387
+ reject(reason instanceof Error ? reason : new OperationCancelledError('Tool execution was cancelled.', { scope: 'tool' }, reason));
388
+ };
389
+ controller.signal.addEventListener('abort', abortListener, { once: true });
390
+ });
275
391
  try {
276
392
  const operation = fn(controller.signal);
277
- return await (timeoutPromise ? Promise.race([operation, timeoutPromise]) : operation);
393
+ return await Promise.race([operation, abortPromise]);
278
394
  }
279
395
  catch (error) {
280
396
  if (controller.signal.aborted) {
281
397
  const reason = controller.signal.reason;
282
398
  if (reason instanceof OperationTimeoutError)
283
399
  throw reason;
400
+ if (reason instanceof OperationCancelledError)
401
+ throw reason;
284
402
  throw new OperationCancelledError('Tool execution was cancelled.', { scope: 'tool' }, reason ?? error);
285
403
  }
286
404
  throw error;
@@ -288,6 +406,8 @@ async function withToolSignal(parent, timeoutMs, fn) {
288
406
  finally {
289
407
  if (timeout)
290
408
  clearTimeout(timeout);
409
+ if (abortListener)
410
+ controller.signal.removeEventListener('abort', abortListener);
291
411
  parent.removeEventListener('abort', relay);
292
412
  }
293
413
  }
@@ -313,25 +433,34 @@ async function withToolSpan(args, toolId, callId, toolKind, mcpAttrs, fn) {
313
433
  } : {})
314
434
  };
315
435
  const started = Date.now();
436
+ let durationAttrs = {};
316
437
  const execute = async () => {
317
438
  try {
318
439
  const result = await fn();
319
- args.telemetry?.recordHistogram('harness.tool.duration', (Date.now() - started) / 1000, attrs);
320
440
  return result;
321
441
  }
322
442
  catch (error) {
323
- throw normalizeToolFailure(toolId, error);
443
+ const normalized = normalizeToolFailure(toolId, error, toolKind);
444
+ durationAttrs = {
445
+ 'harness.error.code': normalized.code,
446
+ 'harness.error.category': normalized.category,
447
+ 'harness.error.retriable': normalized.retriable
448
+ };
449
+ throw normalized;
450
+ }
451
+ finally {
452
+ args.telemetry?.recordHistogram('harness.tool.duration', (Date.now() - started) / 1000, { ...attrs, ...durationAttrs });
324
453
  }
325
454
  };
326
455
  return args.telemetry ? args.telemetry.span(`execute_tool ${toolId}`, attrs, execute) : execute();
327
456
  }
328
- function normalizeToolFailure(toolId, error) {
457
+ function normalizeToolFailure(toolId, error, toolKind = toolId in BUILTIN_ALIAS_TO_CANONICAL ? 'builtin' : 'ts') {
329
458
  if (error instanceof z.ZodError) {
330
459
  return new ValidationError('Tool input validation failed', { where: 'tool_input', issues: JSON.parse(JSON.stringify(error.issues)) });
331
460
  }
332
461
  if (error instanceof HarnessError)
333
462
  return error;
334
- return new ToolError('Tool execution failed.', { tool_id: toolId, tool_kind: toolId in BUILTIN_ALIAS_TO_CANONICAL ? 'builtin' : 'ts' }, error);
463
+ return new ToolError('Tool execution failed.', { tool_id: toolId, tool_kind: toolKind }, error);
335
464
  }
336
465
  function parseAgentSchema(schema, value, where) {
337
466
  try {
@@ -347,7 +476,15 @@ function parseAgentSchema(schema, value, where) {
347
476
  function withSandboxTelemetry(args, toolId) {
348
477
  if (!args.telemetry || args.session.executor === 'unavailable')
349
478
  return args.session;
350
- return {
479
+ const attrs = {
480
+ 'harness.name': args.harnessName,
481
+ 'harness.session.id': args.sessionId,
482
+ 'harness.run.id': args.runId,
483
+ ...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
484
+ 'harness.agent.id': args.agentId,
485
+ 'harness.tool.id': toolId
486
+ };
487
+ const wrapped = {
351
488
  ...args.session,
352
489
  executor: args.session.executor,
353
490
  read: args.session.read.bind(args.session),
@@ -359,14 +496,7 @@ function withSandboxTelemetry(args, toolId) {
359
496
  exists: args.session.exists.bind(args.session),
360
497
  mount: args.session.mount.bind(args.session),
361
498
  close: args.session.close.bind(args.session),
362
- exec: async (command, opts) => args.telemetry.span('harness.sandbox.exec', {
363
- 'harness.name': args.harnessName,
364
- 'harness.session.id': args.sessionId,
365
- 'harness.run.id': args.runId,
366
- ...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
367
- 'harness.agent.id': args.agentId,
368
- 'harness.tool.id': toolId
369
- }, async (span) => {
499
+ exec: async (command, opts) => args.telemetry.span('harness.sandbox.exec', attrs, async (span) => {
370
500
  const result = await args.session.exec(command, opts);
371
501
  span.setAttributes({
372
502
  'harness.exec.exit_code': result.exitCode,
@@ -375,4 +505,9 @@ function withSandboxTelemetry(args, toolId) {
375
505
  return result;
376
506
  })
377
507
  };
508
+ const spawn = args.session.spawn;
509
+ if (typeof spawn === 'function') {
510
+ wrapped.spawn = async (command, opts) => args.telemetry.span('harness.sandbox.spawn', attrs, async () => spawn.call(args.session, command, opts));
511
+ }
512
+ return wrapped;
378
513
  }
@@ -62,7 +62,7 @@ export declare class ModelError extends HarnessError {
62
62
  model: string;
63
63
  method: string;
64
64
  status?: number;
65
- reason?: 'http_error' | 'network' | 'unstructured_response' | 'malformed_response' | 'context_length_exceeded';
65
+ reason?: 'http_error' | 'network' | 'unstructured_response' | 'malformed_response' | 'context_length_exceeded' | 'embedding_count_mismatch' | 'rerank_result_mismatch';
66
66
  providerCode?: string;
67
67
  providerType?: string;
68
68
  providerParam?: string;
@@ -103,10 +103,11 @@ export declare class SkillNotFoundError extends HarnessError {
103
103
  /** Skill manifest/frontmatter/config validation failure. */
104
104
  export declare class SkillManifestError extends HarnessError {
105
105
  constructor(message: string, meta: {
106
- directory: string;
107
- reason: 'missing_skill_md' | 'invalid_frontmatter' | 'missing_description' | 'invalid_name' | 'name_mismatch' | 'directory_missing' | 'collision_shadowed' | 'untrusted_project_skill' | 'scan_limit_reached' | 'reserved_name';
106
+ directory?: string;
107
+ reason: 'missing_skill_md' | 'invalid_frontmatter' | 'missing_description' | 'invalid_name' | 'name_mismatch' | 'directory_missing' | 'reserved_name' | 'skill_not_declared' | 'skill_read_tool_missing' | 'skill_sandbox_unsupported' | 'untrusted_project_skill' | 'collision_shadowed' | 'scan_limit_reached';
108
108
  skill_id?: string;
109
109
  source?: string;
110
+ agent_id?: string;
110
111
  }, cause?: unknown);
111
112
  }
112
113
  /** Workflow referenced an unknown agent id. */
@@ -14,7 +14,7 @@ import { type Sandbox } from '../sandbox/index.js';
14
14
  import type { ModelHandle } from '../models/registry.js';
15
15
  import { type AdapterCapability, type DurableRuntimeAdapter, type HarnessInspection } from '../ports/capabilities.js';
16
16
  /** Stable harness version string for diagnostics and generated documentation. */
17
- export declare const HARNESS_VERSION = "0.0.0";
17
+ export { HARNESS_VERSION } from '../version.js';
18
18
  /** OpenTelemetry capture controls used by the harness. */
19
19
  export type TelemetryFlavor = 'dual' | 'gen_ai_only' | 'openinference_only';
20
20
  export type ContentCaptureMode = 'NO_CONTENT' | 'SPAN_ONLY' | 'EVENT_ONLY' | 'SPAN_AND_EVENT';
@@ -36,6 +36,8 @@ export interface HarnessDefaults {
36
36
  skillTimeoutMs?: number;
37
37
  /** Per-model timeout in milliseconds. Default: `300_000`. */
38
38
  modelTimeoutMs?: number;
39
+ /** Maximum tool calls from one model response executed at the same time. Default: `8`. */
40
+ maxParallelToolCalls?: number;
39
41
  /**
40
42
  * Max non-system messages forwarded into model calls.
41
43
  * `undefined` keeps all history, `0` keeps only system messages.
@@ -47,6 +49,17 @@ export interface HarnessOptions {
47
49
  /** Optional harness name for logs, telemetry, and diagnostics. Default: `agent-harness`. */
48
50
  name?: string;
49
51
  }
52
+ /** Durable execution opt-in for a single workflow call. */
53
+ export interface DurableInvokeOptions {
54
+ /** Stable run id reused across resumes/retries. Matches `/^[A-Za-z0-9_.:-]{1,200}$/`. */
55
+ runId: string;
56
+ /** Worker/process id owning the durable lease. Defaults to the harness worker id. */
57
+ workerId?: string;
58
+ /** Initial durable step id label. Defaults to the workflow id. */
59
+ stepId?: string;
60
+ /** Optional attempt hint; the runtime may raise it on retry. */
61
+ attempt?: number;
62
+ }
50
63
  /** Shared invoke options for workflow and agent execution. */
51
64
  export interface InvokeOptions {
52
65
  /** Abort signal used to cooperatively cancel the call. */
@@ -61,6 +74,12 @@ export interface InvokeOptions {
61
74
  tracestate?: string;
62
75
  /** Scalar metadata exposed to handlers and telemetry sanitizers. */
63
76
  metadata?: Record<string, JsonValue>;
77
+ /**
78
+ * Opt a workflow run into durable execution against the configured
79
+ * `.runtime(...)` (and optional `.workspaceStore(...)`). Workflow-only;
80
+ * supplying it on an agent run throws `ValidationError`.
81
+ */
82
+ durable?: DurableInvokeOptions;
64
83
  }
65
84
  /** Canonical built-in tool names provided by the harness. */
66
85
  export type BuiltinToolName = 'bash' | 'read' | 'write' | 'edit' | 'glob' | 'grep' | 'list';
@@ -330,6 +349,12 @@ export interface WorkflowContext<S extends BuilderState, I, O> {
330
349
  metadata: Readonly<Record<string, JsonValue>>;
331
350
  memory: MemoryFacade;
332
351
  metrics: Metrics;
352
+ /**
353
+ * Runs `fn` as a durable step. Under a durable invocation the output is
354
+ * checkpointed and replayed on resume without re-running `fn`; otherwise it is
355
+ * a transparent pass-through. See spec 10 "Durable steps".
356
+ */
357
+ step<T extends JsonValue>(stepId: string, fn: () => Promise<T>): Promise<T>;
333
358
  output?: O;
334
359
  }
335
360
  /** Full context passed to custom agent handlers. */
@@ -713,4 +738,3 @@ export interface HarnessBuilder<S extends BuilderState = {}> {
713
738
  * ```
714
739
  */
715
740
  export declare function defineHarness(opts?: HarnessOptions): HarnessBuilder<{}>;
716
- export {};