npm - @purista/harness - Versions diffs - 1.2.0 → 1.2.2 - Mend

@purista/harness 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/dist/agents/index.d.ts +1 -0
package/dist/agents/index.js +278 -142
package/dist/errors/catalog.d.ts +4 -3
package/dist/harness/defineHarness.d.ts +26 -2
package/dist/harness/defineHarness.js +51 -2
package/dist/index.d.ts +1 -1
package/dist/memory/sandbox/index.js +7 -1
package/dist/models/registry.js +45 -3
package/dist/ports/base-model-provider.js +2 -0
package/dist/ports/capabilities.d.ts +2 -0
package/dist/ports/harness-context.d.ts +1 -0
package/dist/ports/model-provider.d.ts +4 -0
package/dist/ports/state.d.ts +6 -0
package/dist/runtime/abort.d.ts +5 -0
package/dist/runtime/abort.js +33 -0
package/dist/runtime/durable.d.ts +2 -0
package/dist/runtime/durable.js +6 -2
package/dist/runtime/sessionDurable.d.ts +49 -0
package/dist/runtime/sessionDurable.js +135 -0
package/dist/runtime/steps.d.ts +19 -1
package/dist/runtime/steps.js +21 -3
package/dist/sandbox/index.d.ts +34 -0
package/dist/sandbox/index.js +40 -3
package/dist/sessions/index.d.ts +15 -2
package/dist/sessions/index.js +212 -99
package/dist/skills/index.js +19 -6
package/dist/state/in-memory.d.ts +1 -0
package/dist/state/in-memory.js +15 -0
package/dist/telemetry/shim.js +9 -4
package/dist/testing/durableWorkspaceStoreContract.d.ts +1 -1
package/dist/testing/durableWorkspaceStoreContract.js +64 -28
package/dist/tools/index.d.ts +2 -0
package/dist/tools/index.js +17 -2
package/dist/tools/mcp/runner.js +11 -6
package/dist/tools/mcp/stdio.js +170 -1
package/dist/ulid/index.d.ts +6 -1
package/dist/ulid/index.js +31 -13
package/dist/version.d.ts +2 -0
package/dist/version.js +2 -0
package/dist/workflows/index.js +7 -1
package/dist/workspace/in-memory.d.ts +9 -10
package/dist/workspace/in-memory.js +191 -48
package/package.json +1 -1
package/dist/harness/errors.d.ts +0 -62
package/dist/harness/errors.js +0 -67

package/dist/agents/index.d.ts CHANGED Viewed

@@ -26,6 +26,7 @@ export declare function runDefaultAgent(args: {
     maxSteps: number;
     signal: AbortSignal;
     toolTimeoutMs: number;
+    maxParallelToolCalls: number;
     logger: Logger;
     telemetry: TelemetryShim;
     emitEvent?: (event: RunEvent) => Promise<void>;

package/dist/agents/index.js CHANGED Viewed

@@ -1,30 +1,92 @@
 import { z } from 'zod';
 import { ATTR_GEN_AI_AGENT_ID, ATTR_GEN_AI_AGENT_NAME, ATTR_GEN_AI_TOOL_CALL_ID, ATTR_GEN_AI_TOOL_NAME, ATTR_GEN_AI_TOOL_TYPE } from '@opentelemetry/semantic-conventions/incubating';
-import { AgentLoopBudgetError, HarnessConfigError, HarnessError, OperationCancelledError, OperationTimeoutError, PermissionDeniedError, ToolError, ToolNotFoundError, ValidationError, serializeError } from '../errors/index.js';
+import { AgentLoopBudgetError, HarnessError, OperationCancelledError, OperationTimeoutError, PermissionDeniedError, SkillManifestError, ToolError, ToolNotFoundError, ValidationError, serializeError } from '../errors/index.js';
 import { createMetrics } from '../telemetry/index.js';
 import { buildSkillIndex, mountSkillsOnce } from '../skills/index.js';
 import { BUILTIN_ALIAS_TO_CANONICAL, getBuiltinToolSpecs, invokeBuiltinTool } from '../tools/index.js';
 import { getMcpToolSpecs, invokeMcpTool, isMcpToolDefinition } from '../tools/mcp/runner.js';
+import { ulid } from '../ulid/index.js';
+import { abortError, withAbortSignal } from '../runtime/abort.js';
 function stringifyInput(input) { return typeof input === 'string' ? input : JSON.stringify(input); }
 function isReadonlyBuiltin(name) { return ['read', 'list', 'glob', 'grep'].includes(name); }
 async function checkPermission(agentId, runId, sessionId, def, toolName, input) {
     if (isReadonlyBuiltin(toolName))
-        return 'allow';
+        return { decision: 'allow' };
     const perm = def.permissions?.[toolName];
-    const mode = typeof perm === 'string' ? perm : (perm && typeof perm === 'object' && 'mode' in perm ? perm.mode : 'allow');
+    const policy = normalizePermissionPolicy(perm);
+    const mode = policy.mode;
+    const target = permissionTarget(toolName, input);
+    if (target && matchesAnyPattern(target, policy.deny))
+        return { decision: 'deny', reason: 'mode_deny' };
+    if (policy.allow && policy.allow.length > 0 && (!target || !matchesAnyPattern(target, policy.allow))) {
+        return { decision: 'deny', reason: 'mode_deny' };
+    }
     if (mode === 'allow')
-        return 'allow';
+        return { decision: 'allow' };
     if (mode === 'deny')
-        return 'deny';
+        return { decision: 'deny', reason: 'mode_deny' };
     if (!def.onPermission)
-        return 'deny';
+        return { decision: 'deny', reason: 'hook_deny' };
     try {
-        return await def.onPermission({ toolName, input, agentId, runId, sessionId });
+        const decision = await def.onPermission({ toolName, input, agentId, runId, sessionId });
+        return decision === 'allow' ? { decision } : { decision, reason: 'hook_deny' };
     }
     catch {
         throw new PermissionDeniedError('Permission hook failed.', { tool_name: toolName, agent_id: agentId, reason: 'hook_failed' });
     }
 }
+function normalizePermissionPolicy(perm) {
+    if (perm === 'allow' || perm === 'ask' || perm === 'deny')
+        return { mode: perm };
+    if (perm && typeof perm === 'object' && 'mode' in perm) {
+        const candidate = perm;
+        if (candidate.mode === 'allow' || candidate.mode === 'ask' || candidate.mode === 'deny') {
+            return {
+                mode: candidate.mode,
+                ...(Array.isArray(candidate.allow) ? { allow: candidate.allow.filter(isString) } : {}),
+                ...(Array.isArray(candidate.deny) ? { deny: candidate.deny.filter(isString) } : {})
+            };
+        }
+    }
+    return { mode: 'allow' };
+}
+function isString(value) {
+    return typeof value === 'string';
+}
+function permissionTarget(toolName, input) {
+    if (!input || typeof input !== 'object')
+        return undefined;
+    const record = input;
+    if (toolName === 'bash')
+        return typeof record['command'] === 'string' ? record['command'] : undefined;
+    if (toolName === 'write' || toolName === 'edit')
+        return typeof record['path'] === 'string' ? record['path'] : undefined;
+    return undefined;
+}
+function matchesAnyPattern(value, patterns) {
+    return patterns?.some((pattern) => globPatternToRegExp(pattern).test(value)) ?? false;
+}
+function globPatternToRegExp(pattern) {
+    let source = '^';
+    for (let index = 0; index < pattern.length; index += 1) {
+        const char = pattern[index];
+        if (char === '*') {
+            if (pattern[index + 1] === '*') {
+                source += '.*';
+                index += 1;
+            }
+            else {
+                source += '[^/]*';
+            }
+            continue;
+        }
+        source += escapeRegExp(char ?? '');
+    }
+    return new RegExp(`${source}$`);
+}
+function escapeRegExp(value) {
+    return value.replace(/[\\^$+?.()|[\]{}]/g, '\\$&');
+}
 export async function runDefaultAgent(args) {
     const agentAttrs = {
         'harness.name': args.harnessName,
@@ -67,7 +129,8 @@ function metadataSpanAttrs(metadata) {
     return attrs;
 }
 async function runDefaultAgentInner(args) {
-    args.signal.throwIfAborted();
+    if (args.signal.aborted)
+        throw abortError(args.signal, 'run', 'Run was cancelled.');
     const inputSchema = args.agent.input ?? z.string();
     const outputSchema = args.agent.output ?? z.string();
     const parsedInput = parseAgentSchema(inputSchema, args.input, 'agent_input');
@@ -78,7 +141,8 @@ async function runDefaultAgentInner(args) {
     await mountSkillsOnce(args.session, args.mountedSkills, args.skills, skillIds);
     const activatedSkills = new Set();
     if (args.agent.handler) {
-        const output = await args.agent.handler({
+        const handler = args.agent.handler;
+        const output = await withAbortSignal(args.signal, 'run', 'Run was cancelled.', () => handler({
             input: parsedInput,
             signal: args.signal,
             models: args.models,
@@ -88,9 +152,9 @@ async function runDefaultAgentInner(args) {
             memory: args.memory,
             metadata: args.metadata ?? {},
             metrics: args.metrics
-        });
+        }));
         const validated = parseAgentSchema(outputSchema, output, 'agent_output');
-        return { output: validated, emitted: [{ id: `msg_${Date.now()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() }] };
+        return { output: validated, emitted: [{ id: `msg_${ulid()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() }] };
     }
     const baseInstructions = typeof args.agent.instructions === 'function'
         ? args.agent.instructions({ input: parsedInput, runId: args.runId, sessionId: args.sessionId, history: { list: async () => args.history }, memory: args.memory, metadata: args.metadata ?? {}, metrics: args.metrics })
@@ -98,10 +162,9 @@ async function runDefaultAgentInner(args) {
     const instructions = `${baseInstructions}${buildSkillIndex(args.skills, skillIds)}`;
     const enabledBuiltins = args.agent.builtinTools === false ? [] : args.agent.builtinTools?.slice() ?? ['bash', 'read', 'write', 'edit', 'glob', 'grep', 'list'];
     if (skillIds.length > 0 && !enabledBuiltins.includes('read')) {
-        throw new HarnessConfigError('Agents with skills require the read built-in tool for skill activation.', {
+        throw new SkillManifestError('Agents with skills require the read built-in tool for skill activation.', {
             reason: 'skill_read_tool_missing',
-            path: `agents.${args.agentId}.builtinTools`,
-            id: args.agentId
+            agent_id: args.agentId
         });
     }
     const builtinSpecs = getBuiltinToolSpecs(enabledBuiltins, args.session);
@@ -130,117 +193,169 @@ async function runDefaultAgentInner(args) {
     const emitted = [];
     const maxSteps = Math.min(args.agent.maxSteps ?? args.maxSteps, 64);
     let steps = 0;
-    while (true) {
-        args.signal.throwIfAborted();
-        if (steps >= maxSteps)
-            throw new AgentLoopBudgetError('Agent loop budget exceeded.', { agent_id: args.agentId, reason: 'iterations_exceeded', limit: maxSteps });
-        if (steps === 0)
-            await args.emitEvent?.({ type: 'agent.started', runId: args.runId, agentId: args.agentId, at: new Date().toISOString() });
-        const response = await model.object({
-            messages: [
-                { role: 'system', content: instructions },
-                ...modelMessages
-            ],
-            tools: [...builtinSpecs, ...customSpecs],
-            schema: z.toJSONSchema(outputSchema)
-        }, args.signal, {
-            harnessName: args.harnessName,
-            sessionId: args.sessionId,
-            runId: args.runId,
-            ...(args.workflowId ? { workflowId: args.workflowId } : {}),
-            agentId: args.agentId
-        });
-        const toolCalls = response.toolCalls ?? [];
-        if (toolCalls.length === 0) {
-            const validated = parseAgentSchema(outputSchema, response.object, 'agent_output');
-            emitted.push({ id: `msg_${Date.now()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() });
-            await args.emitEvent?.({ type: 'model.object', runId: args.runId, agentId: args.agentId, object: validated, usage: response.usage });
-            await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), output: validated });
-            return { output: validated, emitted };
-        }
-        const assistantMsg = {
-            id: `msg_${Date.now()}_assistant`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: '', toolCalls,
-            timestamp: new Date().toISOString()
-        };
-        emitted.push(assistantMsg);
-        modelMessages.push({ role: 'assistant', content: assistantMsg.content, toolCalls });
-        for (const call of toolCalls) {
-            const canonical = BUILTIN_ALIAS_TO_CANONICAL[call.name] ?? call.name;
-            const input = call.arguments;
-            let result;
-            try {
-                args.signal.throwIfAborted();
-                await args.emitEvent?.({ type: 'tool.started', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, input: input });
-                const tool = args.customTools[canonical];
-                const toolKind = canonical in BUILTIN_ALIAS_TO_CANONICAL ? 'builtin' : tool && isMcpToolDefinition(tool) ? tool.kind : 'ts';
-                result = await withToolSpan(args, canonical, call.id, toolKind, tool && isMcpToolDefinition(tool) ? { server: canonical, upstreamTool: tool.tool, transport: tool.kind === 'mcp_stdio' ? 'stdio' : 'http' } : undefined, async () => {
-                    const decision = await checkPermission(args.agentId, args.runId, args.sessionId, args.agent, canonical, input);
-                    if (decision === 'deny') {
-                        throw new PermissionDeniedError('Permission denied.', { tool_name: canonical, agent_id: args.agentId, reason: 'hook_deny' });
-                    }
-                    if (canonical in BUILTIN_ALIAS_TO_CANONICAL) {
-                        const output = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => invokeBuiltinTool(canonical, input, withSandboxTelemetry(args, canonical), signal));
-                        if (canonical === 'read')
-                            markSkillActivation(input, args.skills, activatedSkills);
-                        return { output };
-                    }
-                    if (!enabledCustomTools.has(canonical)) {
-                        throw new ToolNotFoundError('Tool is not allowed for this agent.', { tool_id: canonical, where: 'agent_allowlist' });
-                    }
-                    if (!tool)
-                        throw new ToolNotFoundError('Tool was not found.', { tool_id: canonical, where: 'registry' });
-                    if (isMcpToolDefinition(tool)) {
-                        if (!args.mcpRegistry)
-                            throw new ToolNotFoundError('MCP registry is not available.', { tool_id: canonical, where: 'registry' });
-                        const registry = args.mcpRegistry;
-                        return { output: await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => invokeMcpTool(canonical, tool, input, { registry, signal, toolTimeoutMs: args.toolTimeoutMs, sandbox: withSandboxTelemetry(args, canonical), sandboxKey: args.sessionId })) };
-                    }
-                    if (tool.kind && tool.kind !== 'ts') {
-                        throw new ValidationError('Unsupported tool kind.', { where: 'tool_input', issues: { toolId: canonical, kind: tool.kind } });
-                    }
-                    const tsTool = tool;
-                    const parsed = tsTool.input.parse(input);
-                    const out = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => tsTool.handler({
-                        signal,
-                        sandbox: withSandboxTelemetry(args, canonical),
-                        logger: args.logger,
-                        telemetry: args.telemetry,
-                        metrics: createMetrics(args.telemetry, {
-                            'harness.name': args.harnessName,
-                            'harness.session.id': args.sessionId,
-                            'harness.run.id': args.runId,
-                            ...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
-                            'harness.agent.id': args.agentId,
-                            'harness.tool.id': canonical
-                        }),
-                        memory: args.memory,
-                        runId: args.runId,
-                        sessionId: args.sessionId,
-                        agentId: args.agentId,
-                        toolId: canonical
-                    }, parsed));
-                    return { output: tsTool.output.parse(out) };
-                });
-            }
-            catch (error) {
-                result = { error: serializeError(normalizeToolFailure(canonical, error)) };
-            }
-            await args.emitEvent?.({ type: 'tool.finished', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, ...(result.output !== undefined ? { output: result.output } : {}), ...(result.error ? { error: result.error } : {}) });
-            const toolMessage = {
-                id: `msg_${Date.now()}_${call.id}`,
+    await args.emitEvent?.({ type: 'agent.started', runId: args.runId, agentId: args.agentId, at: new Date().toISOString() });
+    try {
+        while (true) {
+            if (args.signal.aborted)
+                throw abortError(args.signal, 'run', 'Run was cancelled.');
+            if (steps >= maxSteps)
+                throw new AgentLoopBudgetError('Agent loop budget exceeded.', { agent_id: args.agentId, reason: 'iterations_exceeded', limit: maxSteps });
+            const response = await model.object({
+                messages: [
+                    { role: 'system', content: instructions },
+                    ...modelMessages
+                ],
+                tools: [...builtinSpecs, ...customSpecs],
+                schema: z.toJSONSchema(outputSchema)
+            }, args.signal, {
+                harnessName: args.harnessName,
                 sessionId: args.sessionId,
                 runId: args.runId,
-                role: 'tool',
-                content: '',
-                toolResults: [{ toolCallId: call.id, ...(result.output !== undefined ? { output: result.output } : {}), ...(result.error ? { error: result.error } : {}) }],
+                ...(args.workflowId ? { workflowId: args.workflowId } : {}),
+                agentId: args.agentId
+            });
+            // Emit one usage-bearing model event per model round-trip (including
+            // tool-call steps) so run-summary modelCalls and tokenTotals are accurate
+            // for multi-step runs.
+            await args.emitEvent?.({ type: 'model.object', runId: args.runId, agentId: args.agentId, object: (response.object ?? null), usage: response.usage });
+            const toolCalls = (response.toolCalls ?? []);
+            if (toolCalls.length === 0) {
+                const validated = parseAgentSchema(outputSchema, response.object, 'agent_output');
+                emitted.push({ id: `msg_${ulid()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() });
+                await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), output: validated });
+                return { output: validated, emitted };
+            }
+            const assistantMsg = {
+                id: `msg_${ulid()}_assistant`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: '', toolCalls,
                 timestamp: new Date().toISOString()
             };
-            emitted.push(toolMessage);
-            modelMessages.push({ role: 'tool', toolCallId: call.id, content: JSON.stringify(result.output ?? result.error ?? {}) });
+            emitted.push(assistantMsg);
+            modelMessages.push({ role: 'assistant', content: assistantMsg.content, toolCalls });
+            args.metrics.histogram('harness.agent.tool_batch.size', toolCalls.length, {
+                'harness.agent.tool_batch.max_parallel': args.maxParallelToolCalls
+            });
+            const outcomes = await runLimited(toolCalls, args.maxParallelToolCalls, (call) => executeToolCall({
+                ...args,
+                enabledCustomTools,
+                activatedSkills
+            }, call));
+            for (const outcome of outcomes) {
+                emitted.push(outcome.emitted);
+                modelMessages.push(outcome.modelMessage);
+            }
+            steps += 1;
         }
-        steps += 1;
+    }
+    catch (error) {
+        // Pair every agent.started with an agent.finished, even on error/cancel/budget.
+        await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), error: serializeError(error) });
+        throw error;
     }
 }
+async function runLimited(items, limit, fn) {
+    const concurrency = Math.max(1, Math.min(limit, items.length));
+    const results = new Array(items.length);
+    let next = 0;
+    async function worker() {
+        while (true) {
+            const index = next;
+            next += 1;
+            const item = items[index];
+            if (item === undefined)
+                return;
+            results[index] = await fn(item);
+        }
+    }
+    await Promise.all(Array.from({ length: concurrency }, () => worker()));
+    return results;
+}
+async function executeToolCall(args, call) {
+    const canonical = BUILTIN_ALIAS_TO_CANONICAL[call.name] ?? call.name;
+    const input = call.arguments;
+    const tool = args.customTools[canonical];
+    const toolKind = resolveToolKind(canonical, tool);
+    let result;
+    try {
+        if (args.signal.aborted)
+            throw abortError(args.signal, 'run', 'Run was cancelled.');
+        await args.emitEvent?.({ type: 'tool.started', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, input: input });
+        result = await withToolSpan(args, canonical, call.id, toolKind, tool && isMcpToolDefinition(tool) ? { server: canonical, upstreamTool: tool.tool, transport: tool.kind === 'mcp_stdio' ? 'stdio' : 'http' } : undefined, async () => {
+            const permission = await withToolSignal(args.signal, args.toolTimeoutMs, () => checkPermission(args.agentId, args.runId, args.sessionId, args.agent, canonical, input));
+            if (permission.decision === 'deny') {
+                throw new PermissionDeniedError('Permission denied.', { tool_name: canonical, agent_id: args.agentId, reason: permission.reason });
+            }
+            if (canonical in BUILTIN_ALIAS_TO_CANONICAL) {
+                const output = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => invokeBuiltinTool(canonical, input, withSandboxTelemetry(args, canonical), signal));
+                if (canonical === 'read')
+                    markSkillActivation(input, args.skills, args.activatedSkills);
+                return { output };
+            }
+            if (!args.enabledCustomTools.has(canonical)) {
+                throw new ToolNotFoundError('Tool is not allowed for this agent.', { tool_id: canonical, where: 'agent_allowlist' });
+            }
+            if (!tool)
+                throw new ToolNotFoundError('Tool was not found.', { tool_id: canonical, where: 'registry' });
+            if (isMcpToolDefinition(tool)) {
+                if (!args.mcpRegistry)
+                    throw new ToolNotFoundError('MCP registry is not available.', { tool_id: canonical, where: 'registry' });
+                const registry = args.mcpRegistry;
+                return { output: await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => invokeMcpTool(canonical, tool, input, { registry, signal, toolTimeoutMs: args.toolTimeoutMs, sandbox: withSandboxTelemetry(args, canonical), sandboxKey: args.sessionId })) };
+            }
+            if (tool.kind && tool.kind !== 'ts') {
+                throw new ValidationError('Unsupported tool kind.', { where: 'tool_input', issues: { toolId: canonical, kind: tool.kind } });
+            }
+            const parsed = tool.input.parse(input);
+            const out = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => tool.handler({
+                signal,
+                sandbox: withSandboxTelemetry(args, canonical),
+                logger: args.logger,
+                telemetry: args.telemetry,
+                metrics: createMetrics(args.telemetry, {
+                    'harness.name': args.harnessName,
+                    'harness.session.id': args.sessionId,
+                    'harness.run.id': args.runId,
+                    ...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
+                    'harness.agent.id': args.agentId,
+                    'harness.tool.id': canonical
+                }),
+                memory: args.memory,
+                runId: args.runId,
+                sessionId: args.sessionId,
+                agentId: args.agentId,
+                toolId: canonical
+            }, parsed));
+            return { output: tool.output.parse(out) };
+        });
+    }
+    catch (error) {
+        const failure = normalizeToolFailure(canonical, error, toolKind);
+        if (failure instanceof OperationCancelledError) {
+            if (args.signal.aborted)
+                throw new OperationCancelledError('Run was cancelled.', { scope: 'run' }, args.signal.reason ?? failure);
+            throw failure;
+        }
+        result = { error: serializeError(failure) };
+    }
+    await args.emitEvent?.({ type: 'tool.finished', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, ...(result.output !== undefined ? { output: result.output } : {}), ...(result.error ? { error: result.error } : {}) });
+    const toolMessage = {
+        id: `msg_${ulid()}_${call.id}`,
+        sessionId: args.sessionId,
+        runId: args.runId,
+        role: 'tool',
+        content: '',
+        toolResults: [{ toolCallId: call.id, ...(result.output !== undefined ? { output: result.output } : {}), ...(result.error ? { error: result.error } : {}) }],
+        timestamp: new Date().toISOString()
+    };
+    return {
+        emitted: toolMessage,
+        modelMessage: { role: 'tool', toolCallId: call.id, content: JSON.stringify(result.output ?? result.error ?? {}) }
+    };
+}
+function resolveToolKind(toolId, tool) {
+    if (toolId in BUILTIN_ALIAS_TO_CANONICAL)
+        return 'builtin';
+    return tool && isMcpToolDefinition(tool) ? tool.kind : 'ts';
+}
 function markSkillActivation(input, skills, activated) {
     if (!input || typeof input !== 'object')
         return;
@@ -255,31 +370,35 @@ function markSkillActivation(input, skills, activated) {
     }
 }
 async function withToolSignal(parent, timeoutMs, fn) {
-    parent.throwIfAborted();
+    if (parent.aborted)
+        throw abortError(parent, 'run', 'Run was cancelled.');
     const controller = new AbortController();
     const relay = () => controller.abort(parent.reason);
     parent.addEventListener('abort', relay, { once: true });
+    if (parent.aborted)
+        relay();
+    let abortListener;
     const timeout = timeoutMs > 0
         ? setTimeout(() => controller.abort(new OperationTimeoutError('Tool execution timed out.', { scope: 'tool', timeout_ms: timeoutMs })), timeoutMs)
         : undefined;
-    const timeoutPromise = timeoutMs > 0
-        ? new Promise((_, reject) => {
-            const check = () => {
-                const reason = controller.signal.reason;
-                reject(reason instanceof Error ? reason : new OperationCancelledError('Tool execution was cancelled.', { scope: 'tool' }, reason));
-            };
-            controller.signal.addEventListener('abort', check, { once: true });
-        })
-        : undefined;
+    const abortPromise = new Promise((_, reject) => {
+        abortListener = () => {
+            const reason = controller.signal.reason;
+            reject(reason instanceof Error ? reason : new OperationCancelledError('Tool execution was cancelled.', { scope: 'tool' }, reason));
+        };
+        controller.signal.addEventListener('abort', abortListener, { once: true });
+    });
     try {
         const operation = fn(controller.signal);
-        return await (timeoutPromise ? Promise.race([operation, timeoutPromise]) : operation);
+        return await Promise.race([operation, abortPromise]);
     }
     catch (error) {
         if (controller.signal.aborted) {
             const reason = controller.signal.reason;
             if (reason instanceof OperationTimeoutError)
                 throw reason;
+            if (reason instanceof OperationCancelledError)
+                throw reason;
             throw new OperationCancelledError('Tool execution was cancelled.', { scope: 'tool' }, reason ?? error);
         }
         throw error;
@@ -287,6 +406,8 @@ async function withToolSignal(parent, timeoutMs, fn) {
     finally {
         if (timeout)
             clearTimeout(timeout);
+        if (abortListener)
+            controller.signal.removeEventListener('abort', abortListener);
         parent.removeEventListener('abort', relay);
     }
 }
@@ -312,25 +433,34 @@ async function withToolSpan(args, toolId, callId, toolKind, mcpAttrs, fn) {
         } : {})
     };
     const started = Date.now();
+    let durationAttrs = {};
     const execute = async () => {
         try {
             const result = await fn();
-            args.telemetry?.recordHistogram('harness.tool.duration', (Date.now() - started) / 1000, attrs);
             return result;
         }
         catch (error) {
-            throw normalizeToolFailure(toolId, error);
+            const normalized = normalizeToolFailure(toolId, error, toolKind);
+            durationAttrs = {
+                'harness.error.code': normalized.code,
+                'harness.error.category': normalized.category,
+                'harness.error.retriable': normalized.retriable
+            };
+            throw normalized;
+        }
+        finally {
+            args.telemetry?.recordHistogram('harness.tool.duration', (Date.now() - started) / 1000, { ...attrs, ...durationAttrs });
         }
     };
     return args.telemetry ? args.telemetry.span(`execute_tool ${toolId}`, attrs, execute) : execute();
 }
-function normalizeToolFailure(toolId, error) {
+function normalizeToolFailure(toolId, error, toolKind = toolId in BUILTIN_ALIAS_TO_CANONICAL ? 'builtin' : 'ts') {
     if (error instanceof z.ZodError) {
         return new ValidationError('Tool input validation failed', { where: 'tool_input', issues: JSON.parse(JSON.stringify(error.issues)) });
     }
     if (error instanceof HarnessError)
         return error;
-    return new ToolError('Tool execution failed.', { tool_id: toolId, tool_kind: toolId in BUILTIN_ALIAS_TO_CANONICAL ? 'builtin' : 'ts' }, error);
+    return new ToolError('Tool execution failed.', { tool_id: toolId, tool_kind: toolKind }, error);
 }
 function parseAgentSchema(schema, value, where) {
     try {
@@ -346,7 +476,15 @@ function parseAgentSchema(schema, value, where) {
 function withSandboxTelemetry(args, toolId) {
     if (!args.telemetry || args.session.executor === 'unavailable')
         return args.session;
-    return {
+    const attrs = {
+        'harness.name': args.harnessName,
+        'harness.session.id': args.sessionId,
+        'harness.run.id': args.runId,
+        ...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
+        'harness.agent.id': args.agentId,
+        'harness.tool.id': toolId
+    };
+    const wrapped = {
         ...args.session,
         executor: args.session.executor,
         read: args.session.read.bind(args.session),
@@ -358,14 +496,7 @@ function withSandboxTelemetry(args, toolId) {
         exists: args.session.exists.bind(args.session),
         mount: args.session.mount.bind(args.session),
         close: args.session.close.bind(args.session),
-        exec: async (command, opts) => args.telemetry.span('harness.sandbox.exec', {
-            'harness.name': args.harnessName,
-            'harness.session.id': args.sessionId,
-            'harness.run.id': args.runId,
-            ...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
-            'harness.agent.id': args.agentId,
-            'harness.tool.id': toolId
-        }, async (span) => {
+        exec: async (command, opts) => args.telemetry.span('harness.sandbox.exec', attrs, async (span) => {
             const result = await args.session.exec(command, opts);
             span.setAttributes({
                 'harness.exec.exit_code': result.exitCode,
@@ -374,4 +505,9 @@ function withSandboxTelemetry(args, toolId) {
             return result;
         })
     };
+    const spawn = args.session.spawn;
+    if (typeof spawn === 'function') {
+        wrapped.spawn = async (command, opts) => args.telemetry.span('harness.sandbox.spawn', attrs, async () => spawn.call(args.session, command, opts));
+    }
+    return wrapped;
 }

package/dist/errors/catalog.d.ts CHANGED Viewed

@@ -62,7 +62,7 @@ export declare class ModelError extends HarnessError {
         model: string;
         method: string;
         status?: number;
-        reason?: 'http_error' | 'network' | 'unstructured_response' | 'malformed_response' | 'context_length_exceeded';
+        reason?: 'http_error' | 'network' | 'unstructured_response' | 'malformed_response' | 'context_length_exceeded' | 'embedding_count_mismatch' | 'rerank_result_mismatch';
         providerCode?: string;
         providerType?: string;
         providerParam?: string;
@@ -103,10 +103,11 @@ export declare class SkillNotFoundError extends HarnessError {
 /** Skill manifest/frontmatter/config validation failure. */
 export declare class SkillManifestError extends HarnessError {
     constructor(message: string, meta: {
-        directory: string;
-        reason: 'missing_skill_md' | 'invalid_frontmatter' | 'missing_description' | 'invalid_name' | 'name_mismatch' | 'directory_missing' | 'collision_shadowed' | 'untrusted_project_skill' | 'scan_limit_reached' | 'reserved_name';
+        directory?: string;
+        reason: 'missing_skill_md' | 'invalid_frontmatter' | 'missing_description' | 'invalid_name' | 'name_mismatch' | 'directory_missing' | 'reserved_name' | 'skill_not_declared' | 'skill_read_tool_missing' | 'skill_sandbox_unsupported' | 'untrusted_project_skill' | 'collision_shadowed' | 'scan_limit_reached';
         skill_id?: string;
         source?: string;
+        agent_id?: string;
     }, cause?: unknown);
 }
 /** Workflow referenced an unknown agent id. */

package/dist/harness/defineHarness.d.ts CHANGED Viewed

@@ -14,7 +14,7 @@ import { type Sandbox } from '../sandbox/index.js';
 import type { ModelHandle } from '../models/registry.js';
 import { type AdapterCapability, type DurableRuntimeAdapter, type HarnessInspection } from '../ports/capabilities.js';
 /** Stable harness version string for diagnostics and generated documentation. */
-export declare const HARNESS_VERSION = "0.0.0";
+export { HARNESS_VERSION } from '../version.js';
 /** OpenTelemetry capture controls used by the harness. */
 export type TelemetryFlavor = 'dual' | 'gen_ai_only' | 'openinference_only';
 export type ContentCaptureMode = 'NO_CONTENT' | 'SPAN_ONLY' | 'EVENT_ONLY' | 'SPAN_AND_EVENT';
@@ -36,6 +36,8 @@ export interface HarnessDefaults {
     skillTimeoutMs?: number;
     /** Per-model timeout in milliseconds. Default: `300_000`. */
     modelTimeoutMs?: number;
+    /** Maximum tool calls from one model response executed at the same time. Default: `8`. */
+    maxParallelToolCalls?: number;
     /**
      * Max non-system messages forwarded into model calls.
      * `undefined` keeps all history, `0` keeps only system messages.
@@ -47,6 +49,17 @@ export interface HarnessOptions {
     /** Optional harness name for logs, telemetry, and diagnostics. Default: `agent-harness`. */
     name?: string;
 }
+/** Durable execution opt-in for a single workflow call. */
+export interface DurableInvokeOptions {
+    /** Stable run id reused across resumes/retries. Matches `/^[A-Za-z0-9_.:-]{1,200}$/`. */
+    runId: string;
+    /** Worker/process id owning the durable lease. Defaults to the harness worker id. */
+    workerId?: string;
+    /** Initial durable step id label. Defaults to the workflow id. */
+    stepId?: string;
+    /** Optional attempt hint; the runtime may raise it on retry. */
+    attempt?: number;
+}
 /** Shared invoke options for workflow and agent execution. */
 export interface InvokeOptions {
     /** Abort signal used to cooperatively cancel the call. */
@@ -61,6 +74,12 @@ export interface InvokeOptions {
     tracestate?: string;
     /** Scalar metadata exposed to handlers and telemetry sanitizers. */
     metadata?: Record<string, JsonValue>;
+    /**
+     * Opt a workflow run into durable execution against the configured
+     * `.runtime(...)` (and optional `.workspaceStore(...)`). Workflow-only;
+     * supplying it on an agent run throws `ValidationError`.
+     */
+    durable?: DurableInvokeOptions;
 }
 /** Canonical built-in tool names provided by the harness. */
 export type BuiltinToolName = 'bash' | 'read' | 'write' | 'edit' | 'glob' | 'grep' | 'list';
@@ -330,6 +349,12 @@ export interface WorkflowContext<S extends BuilderState, I, O> {
     metadata: Readonly<Record<string, JsonValue>>;
     memory: MemoryFacade;
     metrics: Metrics;
+    /**
+     * Runs `fn` as a durable step. Under a durable invocation the output is
+     * checkpointed and replayed on resume without re-running `fn`; otherwise it is
+     * a transparent pass-through. See spec 10 "Durable steps".
+     */
+    step<T extends JsonValue>(stepId: string, fn: () => Promise<T>): Promise<T>;
     output?: O;
 }
 /** Full context passed to custom agent handlers. */
@@ -713,4 +738,3 @@ export interface HarnessBuilder<S extends BuilderState = {}> {
  * ```
  */
 export declare function defineHarness(opts?: HarnessOptions): HarnessBuilder<{}>;
-export {};