npm - @botbotgo/agent-harness - Versions diffs - 0.0.346 → 0.0.347 - Mend

@botbotgo/agent-harness 0.0.346 → 0.0.347

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/dist/contracts/runtime-requests.d.ts +1 -0
package/dist/contracts/workspace.d.ts +4 -0
package/dist/package-version.d.ts +1 -1
package/dist/package-version.js +1 -1
package/dist/resources/prompts/runtime/delegated-task-failure-recovery.md +8 -0
package/dist/runtime/adapter/flow/stream-runtime.js +50 -17
package/dist/runtime/adapter/invocation-result.js +49 -5
package/dist/runtime/adapter/local-tool-invocation.js +5 -0
package/dist/runtime/adapter/stream-event-projection.js +3 -1
package/dist/runtime/adapter/terminal-status.d.ts +4 -0
package/dist/runtime/adapter/terminal-status.js +67 -0
package/dist/runtime/agent-runtime-adapter.js +51 -37
package/dist/runtime/agent-runtime-assembly.d.ts +10 -0
package/dist/runtime/agent-runtime-assembly.js +68 -0
package/dist/runtime/harness/run/stream-run.js +17 -31
package/dist/runtime/parsing/output-recovery.d.ts +2 -1
package/dist/runtime/parsing/output-recovery.js +2 -25
package/dist/runtime/prompts/runtime-prompts.d.ts +1 -0
package/dist/runtime/prompts/runtime-prompts.js +1 -0
package/dist/workspace/agent-binding-compiler.js +11 -0
package/dist/workspace/framework-contract-validation.js +122 -26
package/dist/workspace/object-loader.js +3 -0
package/package.json +1 -1

package/dist/contracts/runtime-requests.d.ts CHANGED Viewed

@@ -17,6 +17,7 @@ export type RequestResult = {
     artifacts?: ArtifactRecord[];
     metadata?: Record<string, unknown>;
 };
+export type TerminalExecutionStatus = "completed" | "blocked" | "failed" | "refused";
 export type UpstreamRuntimeEvent = unknown;
 export type UpstreamRuntimeEventItem = {
     sessionId: string;

package/dist/contracts/workspace.d.ts CHANGED Viewed

@@ -203,6 +203,9 @@ export type CompiledBuiltinToolsConfig = {
     todos?: boolean;
     modelExposed?: boolean | string[];
 };
+export type CompiledExecutionContract = {
+    requiresPlan?: boolean;
+};
 export type LangChainAgentParams = {
     model: CompiledModel;
     tools: CompiledTool[];
@@ -287,6 +290,7 @@ export type CompiledAgentBinding = {
         resilience?: Record<string, unknown>;
         governance?: Record<string, unknown>;
         observability?: Record<string, unknown>;
+        executionContract?: CompiledExecutionContract;
         deepagent?: {
             description?: string;
             passthrough?: Record<string, unknown>;

package/dist/package-version.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
-export declare const AGENT_HARNESS_VERSION = "0.0.346";
+export declare const AGENT_HARNESS_VERSION = "0.0.347";
 export declare const AGENT_HARNESS_RELEASE_DATE = "2026-04-24";

package/dist/package-version.js CHANGED Viewed

@@ -1,2 +1,2 @@
-export const AGENT_HARNESS_VERSION = "0.0.346";
+export const AGENT_HARNESS_VERSION = "0.0.347";
 export const AGENT_HARNESS_RELEASE_DATE = "2026-04-24";

package/dist/resources/prompts/runtime/delegated-task-failure-recovery.md ADDED Viewed

@@ -0,0 +1,8 @@
+The delegated task failed. You are the routing/delegation parent agent, so you must not switch into local execution or start a new local plan.
+Your next response has only two valid forms:
+1. Call the `task` tool again, preserving the user's original request and delegating to the same specialist or another explicit specialist whose configured responsibility clearly matches the original request.
+2. Return a final blocker report to the user explaining that delegated execution failed.
+Do not call local execution tools, repository tools, web tools, shell tools, or `write_todos` from the parent agent after this delegated failure. Do not invent a new topic or downgrade the original request. If you continue execution, it must be through `task`.

package/dist/runtime/adapter/flow/stream-runtime.js CHANGED Viewed

@@ -1,4 +1,5 @@
 import { extractVisibleOutput, isToolCallRecoveryFailure, isRetrySafeInvalidToolSelectionError, resolveMissingPlanRecoveryInstruction, resolveExecutionWithoutToolEvidenceTextInstruction, shouldValidateExecutionWithoutToolEvidence, resolveToolCallRecoveryInstruction, sanitizeVisibleText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION, INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION, } from "../../parsing/output-parsing.js";
+import { DELEGATED_TASK_FAILURE_RECOVERY_INSTRUCTION } from "../../prompts/runtime-prompts.js";
 import { buildInvocationRequest } from "../model/invocation-request.js";
 import { buildRawModelMessages } from "../model/message-assembly.js";
 import { projectRuntimeStreamEvent, createStreamEventProjectionState } from "../stream-event-projection.js";
@@ -55,6 +56,18 @@ function hasSuccessfulNonTodoToolEvidence(executedToolResults) {
 function hasSuccessfulTaskToolEvidence(executedToolResults) {
     return executedToolResults.some((item) => item.isError !== true && item.toolName === "task");
 }
+function requiresPlanEvidence(binding) {
+    return binding.harnessRuntime?.executionContract?.requiresPlan === true;
+}
+function hasParentLocalToolExecutionAfterDelegationFailure(originalEvidence, executedToolResults) {
+    return originalEvidence.hasFailedTaskDelegation
+        && executedToolResults.some((item) => item.toolName !== "task");
+}
+function isDelegationFailureFinalReport(originalEvidence, executedToolResults, visibleOutput) {
+    return originalEvidence.hasFailedTaskDelegation
+        && executedToolResults.length === 0
+        && visibleOutput.trim().length > 0;
+}
 function buildExecutionRecoveryEvidence(params) {
     const { projectionState, executedToolResults = [] } = params;
     return {
@@ -65,7 +78,8 @@ function buildExecutionRecoveryEvidence(params) {
         hasIncompletePlanState: projectionState.hasIncompletePlanState || hasIncompletePlanStateInExecutedToolResults(executedToolResults),
         hasPlanStateEvidence: projectionState.sawPlanState || hasIncompletePlanStateInExecutedToolResults(executedToolResults),
         hasOpenTaskDelegation: projectionState.openTaskDelegations > 0,
-        hasFailedTaskDelegation: projectionState.hasFailedTaskDelegation,
+        hasFailedTaskDelegation: projectionState.hasFailedTaskDelegation
+            || executedToolResults.some((item) => item.toolName === "task" && item.isError === true),
         hasDelegatedAgentWithConfiguredTools: projectionState.sawDelegatedAgentWithConfiguredTools,
         hasDelegatedExecutionToolEvidence: projectionState.emittedDelegatedExecutionToolResult,
         hasOnlyPlaceholderTaskCompletion: projectionState.emittedSuccessfulTaskResult
@@ -98,6 +112,9 @@ function resolveStreamedRuntimeFailureRecoveryInstruction(output, evidence) {
     return hasExecutionEvidence ? null : EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION;
 }
 function resolveDelegatedExecutionRecoveryInstruction(evidence) {
+    if (evidence.hasFailedTaskDelegation) {
+        return DELEGATED_TASK_FAILURE_RECOVERY_INSTRUCTION;
+    }
     if (hasMissingDelegatedFindings(evidence)
         || (evidence.hasOpenTaskDelegation
             && evidence.hasDelegatedAgentWithConfiguredTools
@@ -400,6 +417,7 @@ export async function* streamRuntimeExecution(options) {
                             ? resolveMissingPlanRecoveryInstruction({
                                 request,
                                 assistantText: terminalVisibleOutput,
+                                requiresPlan: requiresPlanEvidence(options.binding),
                                 hasPlanStateEvidence: terminalExecutionEvidence.hasPlanStateEvidence,
                                 hasWriteTodosEvidence: terminalExecutionEvidence.hasPlanStateEvidence,
                                 hasToolResultEvidence: terminalExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence,
@@ -443,8 +461,9 @@ export async function* streamRuntimeExecution(options) {
                 throw error;
             }
             const streamedExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
-            const delegatedExecutionRecoveryInstruction = !emittedUnsafeStreamSideEffects
-                ? resolveDelegatedExecutionRecoveryInstruction(streamedExecutionEvidence)
+            const streamedDelegatedRecoveryInstruction = resolveDelegatedExecutionRecoveryInstruction(streamedExecutionEvidence);
+            const delegatedExecutionRecoveryInstruction = !emittedUnsafeStreamSideEffects || streamedDelegatedRecoveryInstruction
+                ? streamedDelegatedRecoveryInstruction
                 : null;
             if (hasUnresolvedExecution(streamedExecutionEvidence) && !delegatedExecutionRecoveryInstruction) {
                 throw createUnresolvedExecutionError(streamedExecutionEvidence);
@@ -453,6 +472,7 @@ export async function* streamRuntimeExecution(options) {
                 ? resolveExecutionWithoutToolEvidenceTextInstruction(request, projectionState.emittedOutput, false, {
                     ...streamedExecutionEvidence,
                     hasMissingDelegatedExecutionEvidence: hasMissingDelegatedExecutionEvidence(streamedExecutionEvidence),
+                    requiresPlan: requiresPlanEvidence(options.binding),
                 })
                 : null;
             const streamedRuntimeFailureRecoveryInstruction = projectionState.emittedOutput
@@ -462,6 +482,7 @@ export async function* streamRuntimeExecution(options) {
                 ? resolveMissingPlanRecoveryInstruction({
                     request,
                     assistantText: projectionState.emittedOutput,
+                    requiresPlan: requiresPlanEvidence(options.binding),
                     hasPlanStateEvidence: streamedExecutionEvidence.hasPlanStateEvidence,
                     hasWriteTodosEvidence: streamedExecutionEvidence.hasPlanStateEvidence,
                     hasToolResultEvidence: streamedExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence,
@@ -484,17 +505,22 @@ export async function* streamRuntimeExecution(options) {
                     projectionState: createStreamEventProjectionState(),
                     executedToolResults,
                 });
+                if (hasParentLocalToolExecutionAfterDelegationFailure(originalExecutionEvidence, executedToolResults)) {
+                    throw new ExecutionReconciliationError("Agent attempted parent-local tool execution after delegated task failure; it must report a blocker or re-delegate with task.");
+                }
                 const retriedVisibleOutput = retried.output ? toVisibleContent(retried.output) : "";
+                const retriedIsDelegationFailureFinalReport = isDelegationFailureFinalReport(originalExecutionEvidence, executedToolResults, retriedVisibleOutput);
                 const retriedCarriesExecutionEvidence = retriedExecutionEvidence.hasToolResultEvidence
                     || retriedExecutionEvidence.hasOpenTaskDelegation
                     || retriedExecutionEvidence.hasDelegatedExecutionToolEvidence;
-                const retriedHasUnresolvedExecution = hasUnresolvedExecution(retriedExecutionEvidence)
-                    || hasMissingDelegatedExecutionEvidence(retriedExecutionEvidence)
-                    || hasMissingDelegatedFindings(retriedExecutionEvidence)
-                    || (!retriedCarriesExecutionEvidence
-                        && (hasUnresolvedExecution(originalExecutionEvidence)
-                            || hasMissingDelegatedExecutionEvidence(originalExecutionEvidence)
-                            || hasMissingDelegatedFindings(originalExecutionEvidence)));
+                const retriedHasUnresolvedExecution = !retriedIsDelegationFailureFinalReport
+                    && (hasUnresolvedExecution(retriedExecutionEvidence)
+                        || hasMissingDelegatedExecutionEvidence(retriedExecutionEvidence)
+                        || hasMissingDelegatedFindings(retriedExecutionEvidence)
+                        || (!retriedCarriesExecutionEvidence
+                            && (hasUnresolvedExecution(originalExecutionEvidence)
+                                || hasMissingDelegatedExecutionEvidence(originalExecutionEvidence)
+                                || hasMissingDelegatedFindings(originalExecutionEvidence))));
                 const effectiveRecoveryEvidence = retriedCarriesExecutionEvidence
                     ? retriedExecutionEvidence
                     : {
@@ -666,12 +692,14 @@ export async function* streamRuntimeExecution(options) {
             ? resolveExecutionWithoutToolEvidenceTextInstruction(request, result.output, false, {
                 ...invokeExecutionEvidence,
                 hasMissingDelegatedExecutionEvidence: hasMissingDelegatedExecutionEvidence(invokeExecutionEvidence),
+                requiresPlan: requiresPlanEvidence(options.binding),
             })
             : resolveDelegatedExecutionRecoveryInstruction(invokeExecutionEvidence);
         const invokeFallbackMissingPlanRecoveryInstruction = !hasUnresolvedExecution(invokeExecutionEvidence) && !invokeFallbackRecoveryInstruction
             ? resolveMissingPlanRecoveryInstruction({
                 request,
                 assistantText: typeof result.output === "string" ? result.output : "",
+                requiresPlan: requiresPlanEvidence(options.binding),
                 hasPlanStateEvidence: invokeExecutionEvidence.hasPlanStateEvidence,
                 hasWriteTodosEvidence: invokeExecutionEvidence.hasPlanStateEvidence,
                 hasToolResultEvidence: invokeExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence,
@@ -688,17 +716,22 @@ export async function* streamRuntimeExecution(options) {
                 projectionState: createStreamEventProjectionState(),
                 executedToolResults: recoveredToolResults,
             });
+            if (hasParentLocalToolExecutionAfterDelegationFailure(originalExecutionEvidence, recoveredToolResults)) {
+                throw new ExecutionReconciliationError("Agent attempted parent-local tool execution after delegated task failure; it must report a blocker or re-delegate with task.");
+            }
             const recoveredVisibleOutput = recovered.output ? toVisibleContent(recovered.output) : "";
+            const recoveredIsDelegationFailureFinalReport = isDelegationFailureFinalReport(originalExecutionEvidence, recoveredToolResults, recoveredVisibleOutput);
             const recoveredCarriesExecutionEvidence = recoveredExecutionEvidence.hasToolResultEvidence
                 || recoveredExecutionEvidence.hasOpenTaskDelegation
                 || recoveredExecutionEvidence.hasDelegatedExecutionToolEvidence;
-            const recoveredHasUnresolvedExecution = hasUnresolvedExecution(recoveredExecutionEvidence)
-                || hasMissingDelegatedExecutionEvidence(recoveredExecutionEvidence)
-                || hasMissingDelegatedFindings(recoveredExecutionEvidence)
-                || (!recoveredCarriesExecutionEvidence
-                    && (hasUnresolvedExecution(originalExecutionEvidence)
-                        || hasMissingDelegatedExecutionEvidence(originalExecutionEvidence)
-                        || hasMissingDelegatedFindings(originalExecutionEvidence)));
+            const recoveredHasUnresolvedExecution = !recoveredIsDelegationFailureFinalReport
+                && (hasUnresolvedExecution(recoveredExecutionEvidence)
+                    || hasMissingDelegatedExecutionEvidence(recoveredExecutionEvidence)
+                    || hasMissingDelegatedFindings(recoveredExecutionEvidence)
+                    || (!recoveredCarriesExecutionEvidence
+                        && (hasUnresolvedExecution(originalExecutionEvidence)
+                            || hasMissingDelegatedExecutionEvidence(originalExecutionEvidence)
+                            || hasMissingDelegatedFindings(originalExecutionEvidence))));
             const effectiveRecoveredEvidence = recoveredCarriesExecutionEvidence
                 ? recoveredExecutionEvidence
                 : {

package/dist/runtime/adapter/invocation-result.js CHANGED Viewed

@@ -3,6 +3,7 @@ import { salvageFunctionLikeToolCall } from "../parsing/output-tool-args.js";
 import { buildStateSnapshot } from "./model/message-assembly.js";
 import { asRecord } from "./tool/resolved-tool.js";
 import { renderToolFailure } from "../support/harness-support.js";
+import { mapTerminalStatusToRequestState, readTerminalExecutionStatus } from "./terminal-status.js";
 function looksLikeLeakedToolCallText(value) {
     const normalized = sanitizeVisibleText(value).trim();
     if (!normalized) {
@@ -168,6 +169,29 @@ function extractDeterministicToolFailureReport(executedToolResults) {
         "- none",
     ].join("\n");
 }
+function hasEmptyFinalMessage(result) {
+    const messages = Array.isArray(result.messages) ? result.messages : [];
+    const lastMessage = messages.at(-1);
+    if (!lastMessage || typeof lastMessage !== "object") {
+        return false;
+    }
+    const direct = lastMessage;
+    return direct.content === "" || direct.kwargs?.content === "" || direct.lc_kwargs?.content === "";
+}
+function hasFinalMessageToolCalls(result) {
+    const messages = Array.isArray(result.messages) ? result.messages : [];
+    const lastMessage = messages.at(-1);
+    if (!lastMessage || typeof lastMessage !== "object") {
+        return false;
+    }
+    const direct = lastMessage;
+    return Array.isArray(direct.tool_calls) && direct.tool_calls.length > 0
+        || Array.isArray(direct.invalid_tool_calls) && direct.invalid_tool_calls.length > 0
+        || Array.isArray(direct.kwargs?.tool_calls) && direct.kwargs.tool_calls.length > 0
+        || Array.isArray(direct.kwargs?.invalid_tool_calls) && direct.kwargs.invalid_tool_calls.length > 0
+        || Array.isArray(direct.lc_kwargs?.tool_calls) && direct.lc_kwargs.tool_calls.length > 0
+        || Array.isArray(direct.lc_kwargs?.invalid_tool_calls) && direct.lc_kwargs.invalid_tool_calls.length > 0;
+}
 export function resolveDeterministicFinalOutput(params) {
     const visibleOutput = params.visibleOutput ?? "";
     const toolFallback = params.toolFallback ?? "";
@@ -178,6 +202,9 @@ export function resolveDeterministicFinalOutput(params) {
     const deterministicFailureReport = extractDeterministicToolFailureReport(executedToolResults);
     const delegatedTaskOutput = extractLatestSuccessfulTaskResultText(executedToolResults);
     const successfulToolOutput = extractLatestSuccessfulNonTodoToolResultText(executedToolResults);
+    if (sanitizedVisibleOutput && deterministicFailureReport && hasDelegationBlocker(executedToolResults) && !successfulToolOutput) {
+        return deterministicFailureReport;
+    }
     if (sanitizedVisibleOutput && successfulToolOutput && hasDelegationBlocker(executedToolResults)) {
         return deterministicFailureReport || delegatedTaskOutput || successfulToolOutput;
     }
@@ -215,9 +242,26 @@ export function finalizeRequestResult(params) {
     const visibleOutput = extractedOutput && !isLikelyToolArgsObject(tryParseJson(extractedOutput)) ? extractedOutput : "";
     const emptyAssistantMessageFailure = extractEmptyAssistantMessageFailure(result);
     const toolFallback = extractToolFallbackContext(result);
+    const outputContent = extractOutputContent(result);
+    const contentBlocks = extractContentBlocks(result);
+    const structuredResponse = result.structuredResponse;
+    const structuredTerminalStatus = readTerminalExecutionStatus(structuredResponse) ?? readTerminalExecutionStatus(result);
+    const files = asRecord(result.files);
     if (!visibleOutput && !toolFallback && emptyAssistantMessageFailure) {
         throw new Error(emptyAssistantMessageFailure);
     }
+    if (!visibleOutput
+        && !toolFallback
+        && interruptContent === undefined
+        && outputContent === undefined
+        && contentBlocks.length === 0
+        && structuredResponse === undefined
+        && !files
+        && executedToolResults.length === 0
+        && hasEmptyFinalMessage(result)
+        && !hasFinalMessageToolCalls(result)) {
+        throw new Error("empty_final_output");
+    }
     const serializedResult = JSON.stringify(result, null, 2);
     const output = resolveDeterministicFinalOutput({
         visibleOutput,
@@ -226,17 +270,16 @@ export function finalizeRequestResult(params) {
     })
         || (containsLikelySkillDocument(result) ? "" : serializedResult);
     const finalMessageText = sanitizeVisibleText(output);
-    const outputContent = extractOutputContent(result);
-    const contentBlocks = extractContentBlocks(result);
-    const structuredResponse = result.structuredResponse;
-    const files = asRecord(result.files);
+    const terminalStatus = structuredTerminalStatus ?? readTerminalExecutionStatus(finalMessageText);
     const stateSnapshot = buildStateSnapshot(result);
     const memoryCandidates = executedToolResults.flatMap((toolResult) => toolResult.memoryCandidates ?? []);
     return {
         sessionId,
         requestId,
         agentId: bindingAgentId,
-        state: Array.isArray(result.__interrupt__) && result.__interrupt__.length > 0 ? "waiting_for_approval" : "completed",
+        state: Array.isArray(result.__interrupt__) && result.__interrupt__.length > 0
+            ? "waiting_for_approval"
+            : mapTerminalStatusToRequestState(terminalStatus),
         interruptContent,
         output: finalMessageText,
         finalMessageText,
@@ -247,6 +290,7 @@ export function finalizeRequestResult(params) {
             ...(executedToolResults.length > 0 ? { executedToolResults } : {}),
             ...(memoryCandidates.length > 0 ? { memoryCandidates } : {}),
             ...(structuredResponse !== undefined ? { structuredResponse } : {}),
+            ...(terminalStatus ? { terminalStatus } : {}),
             ...(outputContent !== undefined ? { outputContent } : {}),
             ...(contentBlocks.length > 0 ? { contentBlocks } : {}),
             ...(files ? { files } : {}),

package/dist/runtime/adapter/local-tool-invocation.js CHANGED Viewed

@@ -43,6 +43,9 @@ function hasNonTodoToolEvidence(executedToolResults) {
 function hasPlanStateEvidence(executedToolResults) {
     return executedToolResults.some((item) => item.toolName === "write_todos" || item.toolName === "read_todos" || readPlanStateSummary(item.output) !== null);
 }
+function requiresPlanEvidence(binding) {
+    return binding.harnessRuntime.executionContract?.requiresPlan === true;
+}
 function extractLatestUserInput(request) {
     const typedRequest = request;
     const messages = Array.isArray(typedRequest.messages) ? typedRequest.messages : [];
@@ -87,6 +90,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
                     hasToolResultEvidence: hasExecutionBeyondTodoPlanning,
                     hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults),
                     hasIncompletePlanState: hasExecutionBeyondTodoPlanning && hasIncompletePlanState,
+                    requiresPlan: requiresPlanEvidence(binding),
                 })
                 : hasIncompletePlanState && hasExecutionBeyondTodoPlanning
                     ? AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION
@@ -102,6 +106,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
         }
         const missingPlanRecoveryInstruction = resolveMissingPlanRecoveryInstruction({
             request: activeRequest,
+            requiresPlan: requiresPlanEvidence(binding),
             hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults),
             hasWriteTodosEvidence: executedToolResults.some((item) => item.toolName === "write_todos"),
             hasToolResultEvidence: executedToolResults.length > 0 || toolCalls.length > 0,

package/dist/runtime/adapter/stream-event-projection.js CHANGED Viewed

@@ -350,7 +350,9 @@ export function projectRuntimeStreamEvent(params) {
             ? state.lastCompletedTaskDelegationFindings
             : "";
         const effectiveToolOutput = salvagedTaskErrorFindings || toolResult.output;
-        const effectiveToolIsError = salvagedTaskErrorFindings ? false : toolResult.isError;
+        const effectiveToolIsError = salvagedTaskErrorFindings
+            ? false
+            : toolResult.isError === true;
         const isSuccessfulTaskResult = toolResult.toolName === "task" && effectiveToolIsError !== true;
         const isDelegatedExecutionTool = (isDelegatedAgentEvent || state.openToolCapableTaskDelegations > 0)
             && toolResult.toolName !== "write_todos"

package/dist/runtime/adapter/terminal-status.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+import type { RequestState, TerminalExecutionStatus } from "../../contracts/types.js";
+export declare function readTerminalExecutionStatus(value: unknown): TerminalExecutionStatus | null;
+export declare function mapTerminalStatusToRequestState(status: TerminalExecutionStatus | null): RequestState;
+export declare function mapTerminalStatusToPlanItemStatus(status: TerminalExecutionStatus): "completed" | "failed";

package/dist/runtime/adapter/terminal-status.js ADDED Viewed

@@ -0,0 +1,67 @@
+const TERMINAL_STATUSES = new Set(["completed", "blocked", "failed", "refused"]);
+function normalizeTerminalStatus(value) {
+    if (typeof value !== "string") {
+        return null;
+    }
+    const normalized = value.trim().toLowerCase();
+    return TERMINAL_STATUSES.has(normalized)
+        ? normalized
+        : null;
+}
+function readStatusLine(value) {
+    for (const line of value.split("\n")) {
+        const [key, ...rest] = line.split(":");
+        if (key?.trim().toLowerCase() !== "status") {
+            continue;
+        }
+        const statusValue = rest.join(":").trim().split(/\s+/)[0];
+        const status = normalizeTerminalStatus(statusValue);
+        if (status) {
+            return status;
+        }
+    }
+    return null;
+}
+export function readTerminalExecutionStatus(value) {
+    const direct = normalizeTerminalStatus(value);
+    if (direct) {
+        return direct;
+    }
+    if (typeof value === "string") {
+        try {
+            return readTerminalExecutionStatus(JSON.parse(value));
+        }
+        catch {
+            return readStatusLine(value);
+        }
+    }
+    if (typeof value !== "object" || value === null) {
+        return null;
+    }
+    if (Array.isArray(value)) {
+        for (let index = value.length - 1; index >= 0; index -= 1) {
+            const status = readTerminalExecutionStatus(value[index]);
+            if (status) {
+                return status;
+            }
+        }
+        return null;
+    }
+    const typed = value;
+    return (readTerminalExecutionStatus(typed.status)
+        ?? readTerminalExecutionStatus(typed.structuredResponse)
+        ?? readTerminalExecutionStatus(typed.messages)
+        ?? readTerminalExecutionStatus(typed.content)
+        ?? readTerminalExecutionStatus(typed.kwargs?.content)
+        ?? readTerminalExecutionStatus(typed.lc_kwargs?.content)
+        ?? readTerminalExecutionStatus(typed.output)
+        ?? readTerminalExecutionStatus(typed.data));
+}
+export function mapTerminalStatusToRequestState(status) {
+    return status === "blocked" || status === "failed" || status === "refused"
+        ? "failed"
+        : "completed";
+}
+export function mapTerminalStatusToPlanItemStatus(status) {
+    return status === "completed" ? "completed" : "failed";
+}

package/dist/runtime/agent-runtime-adapter.js CHANGED Viewed

@@ -1,9 +1,10 @@
 import path from "node:path";
-import { GENERAL_PURPOSE_SUBAGENT, createAsyncSubAgentMiddleware, createDeepAgent, createFilesystemMiddleware, createMemoryMiddleware, createPatchToolCallsMiddleware, createSkillsMiddleware, createSummarizationMiddleware, createSubAgentMiddleware, FilesystemBackend, StateBackend, } from "deepagents";
+import { createAsyncSubAgentMiddleware, createFilesystemMiddleware, createMemoryMiddleware, createPatchToolCallsMiddleware, createSkillsMiddleware, createSummarizationMiddleware, createSubAgentMiddleware, FilesystemBackend, StateBackend, } from "deepagents";
 import { createAgent, humanInTheLoopMiddleware, todoListMiddleware } from "langchain";
 import { wrapResolvedModel, } from "./parsing/output-parsing.js";
-import { AGENT_INTERRUPT_SENTINEL_PREFIX, buildDeepAgentCreateParams, buildLangChainCreateParams, DEFAULT_DEEPAGENT_RECURSION_LIMIT, materializeModelExposedBuiltinMiddlewareTools, resolveLangChainInvocationConfig, resolveRunnableCheckpointer, resolveRunnableInterruptOn, shouldAttachDeepAgentBackend, shouldAttachDeepAgentCheckpointer, shouldAttachDeepAgentStore, } from "./agent-runtime-assembly.js";
+import { AGENT_INTERRUPT_SENTINEL_PREFIX, buildDeepAgentCreateParams, buildDeepAgentSystemPromptWithCapabilityHierarchy, buildLangChainCreateParams, DEFAULT_DEEPAGENT_RECURSION_LIMIT, materializeModelExposedBuiltinMiddlewareTools, resolveLangChainInvocationConfig, resolveRunnableCheckpointer, resolveRunnableInterruptOn, shouldAttachDeepAgentBackend, shouldAttachDeepAgentCheckpointer, shouldAttachDeepAgentStore, } from "./agent-runtime-assembly.js";
 import { resolveDeepAgentSkillSourcePaths, } from "./adapter/compat/deepagent-compat.js";
+import { EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION } from "./prompts/runtime-prompts.js";
 import { buildToolNameMapping, } from "./adapter/tool/tool-name-mapping.js";
 import { executeRequestInvocation } from "./adapter/flow/invocation-flow.js";
 import { streamRuntimeExecution } from "./adapter/flow/stream-runtime.js";
@@ -21,8 +22,22 @@ export { buildAuthOmittingFetch, normalizeOpenAICompatibleInit } from "./adapter
 export { buildToolNameMapping, createModelFacingToolNameCandidates, createModelFacingToolNameLookupCandidates, resolveModelFacingToolName, sanitizeToolNameForModel, } from "./adapter/tool/tool-name-mapping.js";
 export { computeRemainingTimeoutMs, isRetryableProviderError, resolveBindingTimeout, resolveProviderRetryPolicy, resolveStreamIdleTimeout, resolveTimeoutMs, } from "./adapter/resilience.js";
 import { getBindingAdapterKind, getBindingBuiltinToolsConfig, getBindingDeepAgentSubagents, getBindingExecutionParams, getBindingExecutionKind, getBindingFilesystemConfig, getBindingMemorySources, getBindingPrimaryModel, getBindingSkills, getBindingToolCount, getBindingPrimaryTools, getBindingSystemPrompt, isDeepAgentBinding, isLangChainBinding, } from "./support/compiled-binding.js";
+class DelegatedExecutionNoToolEvidenceError extends Error {
+    constructor(agentId) {
+        super(`Delegated agent ${agentId} completed without tool execution evidence.`);
+        this.name = "DelegatedExecutionNoToolEvidenceError";
+    }
+}
+function hasDelegatedExecutionToolEvidence(result) {
+    const executedToolResults = Array.isArray(result.metadata?.executedToolResults)
+        ? result.metadata.executedToolResults
+        : [];
+    return executedToolResults.some((toolResult) => (toolResult.isError !== true
+        && toolResult.toolName !== "write_todos"
+        && toolResult.toolName !== "read_todos"));
+}
 function shouldUseConfigurableDeepAgentAssembly(binding) {
-    return getBindingBuiltinToolsConfig(binding) !== undefined;
+    return getBindingExecutionKind(binding) === "deepagent";
 }
 export class AgentRuntimeAdapter {
     options;
@@ -319,9 +334,18 @@ export class AgentRuntimeAdapter {
                         const childSessionId = `${sessionId}:delegated:${resolvedSubagent.name}`;
                         const childRequestId = `${requestId}:delegated:${resolvedSubagent.name}:${Date.now().toString(36)}`;
                         try {
-                            const result = await this.invoke(targetBinding, requestText, childSessionId, childRequestId, undefined, [], {
+                            const invokeOptions = {
                                 ...(typeof config?.context === "object" && config.context ? { context: config.context } : {}),
-                            });
+                            };
+                            const runDelegatedRequest = (text, requestSuffix = "") => this.invoke(targetBinding, text, childSessionId, `${childRequestId}${requestSuffix}`, undefined, [], invokeOptions);
+                            let result = await runDelegatedRequest(requestText);
+                            const targetRequiresExecutionToolEvidence = getBindingPrimaryTools(targetBinding).length > 0;
+                            if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(result)) {
+                                result = await runDelegatedRequest([requestText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":tool-evidence-retry");
+                                if (!hasDelegatedExecutionToolEvidence(result)) {
+                                    throw new DelegatedExecutionNoToolEvidenceError(targetBinding.agent.id);
+                                }
+                            }
                             return wrapRequestResultAsSubagentResponse({
                                 output: result.output,
                                 structuredResponse: result.structuredResponse,
@@ -331,9 +355,7 @@ export class AgentRuntimeAdapter {
                             const message = error instanceof Error && error.message.trim().length > 0
                                 ? error.message.trim()
                                 : "delegated execution failed";
-                            return wrapRequestResultAsSubagentResponse({
-                                output: `Blocked: ${message}`,
-                            });
+                            throw new Error(message);
                         }
                     },
                 },
@@ -422,18 +444,6 @@ export class AgentRuntimeAdapter {
             ownerId: binding.agent.id,
             skillPaths: getBindingSkills(binding),
         }) ?? [];
-        const deepAgentConfig = buildDeepAgentCreateParams({
-            binding,
-            resolvedModel,
-            resolvedTools: [...resolvedTools, ...builtinMiddlewareTools],
-            resolvedMiddleware,
-            resolvedSubagents,
-            resolvedCheckpointer,
-            resolvedStore,
-            resolvedBackend,
-            resolvedInterruptOn,
-            resolvedSkills,
-        });
         if (shouldUseConfigurableDeepAgentAssembly(binding)) {
             return this.createConfigurableDeepAgentRunnable(binding, {
                 resolvedModel,
@@ -441,36 +451,33 @@ export class AgentRuntimeAdapter {
                 resolvedMiddleware,
                 resolvedSubagents,
                 resolvedInterruptOn,
+                resolvedCheckpointer,
+                resolvedStore,
                 resolvedBackend,
                 resolvedSkills,
             });
         }
-        return createDeepAgent(deepAgentConfig);
+        throw new Error(`Agent ${binding.agent.id} has no supported deepagent assembly path`);
     }
     createConfigurableDeepAgentRunnable(binding, input) {
         const builtinTools = getBindingBuiltinToolsConfig(binding) ?? {};
         const backend = (input.resolvedBackend ?? new StateBackend({}));
         const inlineSubagents = input.resolvedSubagents.filter((subagent) => !("graphId" in subagent));
         const asyncSubagents = input.resolvedSubagents.filter((subagent) => "graphId" in subagent);
-        const subagents = inlineSubagents.some((subagent) => subagent.name === GENERAL_PURPOSE_SUBAGENT.name)
-            ? inlineSubagents
-            : [{
-                    ...GENERAL_PURPOSE_SUBAGENT,
-                    model: input.resolvedModel,
-                    tools: input.resolvedTools,
-                    skills: input.resolvedSkills,
-                }, ...inlineSubagents];
+        const subagents = inlineSubagents;
         const middleware = [
             ...(builtinTools.todos === false ? [] : [todoListMiddleware()]),
             ...(input.resolvedSkills.length > 0 ? [createSkillsMiddleware({ backend, sources: input.resolvedSkills })] : []),
             ...(builtinTools.filesystem === false ? [] : [createFilesystemMiddleware({ backend })]),
-            createSubAgentMiddleware({
-                defaultModel: input.resolvedModel,
-                defaultTools: input.resolvedTools,
-                defaultInterruptOn: input.resolvedInterruptOn,
-                subagents: subagents,
-                generalPurposeAgent: false,
-            }),
+            ...(subagents.length > 0
+                ? [createSubAgentMiddleware({
+                        defaultModel: input.resolvedModel,
+                        defaultTools: input.resolvedTools,
+                        defaultInterruptOn: input.resolvedInterruptOn,
+                        subagents: subagents,
+                        generalPurposeAgent: false,
+                    })]
+                : []),
             createSummarizationMiddleware({
                 model: input.resolvedModel,
                 backend,
@@ -487,10 +494,17 @@ export class AgentRuntimeAdapter {
             : undefined;
         return createAgent({
             model: input.resolvedModel,
-            systemPrompt: getBindingSystemPrompt(binding),
+            systemPrompt: buildDeepAgentSystemPromptWithCapabilityHierarchy({
+                systemPrompt: getBindingSystemPrompt(binding),
+                subagents: input.resolvedSubagents,
+                skills: input.resolvedSkills,
+                tools: getBindingPrimaryTools(binding),
+            }),
             tools: input.resolvedTools,
             middleware: middleware,
             name: binding.agent.id,
+            ...(input.resolvedCheckpointer !== undefined ? { checkpointer: input.resolvedCheckpointer } : {}),
+            ...(input.resolvedStore !== undefined ? { store: input.resolvedStore } : {}),
             ...(responseFormat !== undefined ? { responseFormat: responseFormat } : {}),
         });
     }

package/dist/runtime/agent-runtime-assembly.d.ts CHANGED Viewed

@@ -8,6 +8,16 @@ export declare function materializeModelExposedBuiltinMiddlewareTools(input: {
     explicitToolNames?: string[];
     modelExposed?: boolean | string[];
 }): unknown[];
+export declare function buildDeepAgentSystemPromptWithCapabilityHierarchy(input: {
+    systemPrompt?: unknown;
+    subagents: Array<Pick<UpstreamSubagentConfig, "name" | "description"> | Pick<CompiledAsyncSubAgent, "name" | "description">>;
+    skills?: string[];
+    tools?: Array<{
+        name: string;
+        description?: string;
+    }>;
+}): unknown;
+export declare const buildDeepAgentSystemPromptWithSubagentCatalog: typeof buildDeepAgentSystemPromptWithCapabilityHierarchy;
 export declare function resolveRunnableCheckpointer(options: RuntimeAdapterOptions, binding: CompiledAgentBinding): unknown;
 export declare function resolveRunnableInterruptOn(binding: CompiledAgentBinding): Record<string, {
     allowedDecisions: import("./adapter/tool/interrupt-policy.js").InterruptDecision[];

package/dist/runtime/agent-runtime-assembly.js CHANGED Viewed

@@ -2,6 +2,7 @@ import { MemorySaver } from "@langchain/langgraph";
 import { UPSTREAM_REQUEST_CONFIG_KEY, UPSTREAM_SESSION_CONFIG_KEY } from "./adapter/upstream-configurable-keys.js";
 import { asStructuredExecutableTool } from "./adapter/tool/resolved-tool.js";
 import { compileInterruptOn } from "./adapter/tool/interrupt-policy.js";
+import { readSkillMetadata } from "./skills/skill-metadata.js";
 import { getBindingBackendConfig, getBindingExecutionKind, getBindingExecutionParams, getBindingInterruptCompatibilityRules, getBindingMemorySources, getBindingMiddlewareConfigs, getBindingPrimaryTools, getBindingSkills, getBindingStoreConfig, } from "./support/compiled-binding.js";
 export const AGENT_INTERRUPT_SENTINEL_PREFIX = "__agent_harness_interrupt__:";
 export const DEFAULT_DEEPAGENT_RECURSION_LIMIT = 100;
@@ -37,6 +38,67 @@ export function materializeModelExposedBuiltinMiddlewareTools(input) {
     }
     return tools;
 }
+function formatCapabilityLine(item) {
+    const description = typeof item.description === "string" && item.description.length > 0
+        ? `: ${item.description}`
+        : "";
+    return `- ${JSON.stringify(item.name)}${description}`;
+}
+function buildSkillCatalog(skillPaths) {
+    return skillPaths.map((skillPath) => {
+        const metadata = readSkillMetadata(skillPath);
+        return {
+            name: metadata.name,
+            ...(metadata.description ? { description: metadata.description } : {}),
+        };
+    });
+}
+export function buildDeepAgentSystemPromptWithCapabilityHierarchy(input) {
+    const basePrompt = typeof input.systemPrompt === "string" ? input.systemPrompt : undefined;
+    const skills = buildSkillCatalog(input.skills ?? []);
+    const tools = input.tools ?? [];
+    if (input.subagents.length === 0 && skills.length === 0 && tools.length === 0) {
+        return input.systemPrompt;
+    }
+    const catalogPrompt = [
+        "Capability selection hierarchy:",
+        "1. If the current request fits an available subagent, delegate with the task tool before using local skills or raw tools.",
+        "2. If you are the selected agent and an available skill fits the request, read and follow that skill before calling raw tools.",
+        "3. Use raw tools as execution primitives for the selected agent or skill, or when no listed skill applies.",
+        "Keep each selection inside the selected capability's described responsibility boundary.",
+        "If no listed subagent, skill, or tool can responsibly handle the request, do not invent a path. Return a terminal response with status \"refused\" and explain the missing capability.",
+        "If a selected capability cannot complete after using its available tools, return a terminal response with status \"blocked\" or \"failed\" and include the blocker evidence.",
+        ...(input.subagents.length > 0
+            ? [
+                "",
+                "Available subagents for task delegation:",
+                ...input.subagents.map(formatCapabilityLine),
+                "",
+                "When using the task tool, set subagent_type to exactly one of the listed subagent names. Do not create, translate, alias, or modify subagent names.",
+            ]
+            : [
+                "",
+                "No configured specialist subagents are available for this agent. Do not use task delegation for role-internal work; select a skill first, then tools.",
+            ]),
+        ...(skills.length > 0
+            ? [
+                "",
+                "Available skills for this agent:",
+                ...skills.map(formatCapabilityLine),
+            ]
+            : []),
+        ...(tools.length > 0
+            ? [
+                "",
+                "Raw tools available to this agent:",
+                ...tools.map(formatCapabilityLine),
+            ]
+            : []),
+        "",
+    ].join("\n");
+    return [basePrompt, catalogPrompt].filter((part) => typeof part === "string" && part.length > 0).join("\n\n");
+}
+export const buildDeepAgentSystemPromptWithSubagentCatalog = buildDeepAgentSystemPromptWithCapabilityHierarchy;
 export function resolveRunnableCheckpointer(options, binding) {
     return options.checkpointerResolver ? options.checkpointerResolver(binding) : new MemorySaver();
 }
@@ -146,6 +208,12 @@ export function buildDeepAgentCreateParams(input) {
     ]);
     return {
         ...upstreamParams,
+        systemPrompt: buildDeepAgentSystemPromptWithSubagentCatalog({
+            systemPrompt: upstreamParams.systemPrompt,
+            subagents: input.resolvedSubagents,
+            skills: input.resolvedSkills,
+            tools: getBindingPrimaryTools(input.binding),
+        }),
         skills: input.resolvedSkills,
         model: input.resolvedModel,
         tools: input.resolvedTools,

package/dist/runtime/harness/run/stream-run.js CHANGED Viewed

@@ -2,6 +2,7 @@ import { resolveDeterministicFinalOutput, } from "../../adapter/invocation-resul
 import { AGENT_INTERRUPT_SENTINEL_PREFIX, RuntimeOperationTimeoutError } from "../../agent-runtime-adapter.js";
 import { ExecutionReconciliationError } from "../../adapter/flow/stream-runtime.js";
 import { buildRequestPlanState, summarizeBuiltinWriteTodosArgs } from "../../adapter/runtime-adapter-support.js";
+import { mapTerminalStatusToPlanItemStatus, mapTerminalStatusToRequestState, readTerminalExecutionStatus, } from "../../adapter/terminal-status.js";
 import { sanitizeVisibleText } from "../../parsing/output-parsing.js";
 import { describeRuntimeError, renderRuntimeFailure, renderToolFailure } from "../../support/harness-support.js";
 import { getBindingPrimaryModel } from "../../support/compiled-binding.js";
@@ -37,27 +38,6 @@ function planStateHasActiveItems(planState) {
     }
     return planState.summary.pending > 0 || planState.summary.inProgress > 0;
 }
-function readTerminalStructuredStatus(value) {
-    if (typeof value === "string") {
-        try {
-            return readTerminalStructuredStatus(JSON.parse(value));
-        }
-        catch {
-            return /^\s*Status:\s*completed\b/im.test(value) ? "completed" : null;
-        }
-    }
-    if (typeof value !== "object" || value === null) {
-        return null;
-    }
-    const typed = value;
-    if (typed.status === "completed") {
-        return typed.status;
-    }
-    return (readTerminalStructuredStatus(typed.structuredResponse)
-        ?? readTerminalStructuredStatus(typed.content)
-        ?? readTerminalStructuredStatus(typed.output)
-        ?? readTerminalStructuredStatus(typed.data));
-}
 function isSubstantiveTerminalAssistantOutput(value) {
     const normalized = sanitizeVisibleText(value).trim();
     if (normalized.length < 80) {
@@ -898,10 +878,10 @@ export async function* streamHarnessRun(options) {
                     }
                 }
                 const terminalStructuredStatus = normalizedChunk.toolName === "task"
-                    ? readTerminalStructuredStatus(normalizedChunk.output)
+                    ? readTerminalExecutionStatus(normalizedChunk.output)
                     : null;
                 if (terminalStructuredStatus && currentPlanState && planStateHasActiveItems(currentPlanState)) {
-                    const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState, terminalStructuredStatus, new Date().toISOString());
+                    const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState, mapTerminalStatusToPlanItemStatus(terminalStructuredStatus), new Date().toISOString());
                     const signature = buildPlanStateSignature(reconciledPlanState);
                     if (signature !== lastPlanStateSignature) {
                         const previousPlanState = currentPlanState;
@@ -1005,9 +985,9 @@ export async function* streamHarnessRun(options) {
                 }
             }
             currentPlanState = await refreshPlanStateFromPersistence(options, currentPlanState);
-            const terminalStructuredStatus = readTerminalStructuredStatus(actual.structuredResponse);
+            const terminalStructuredStatus = readTerminalExecutionStatus(actual.structuredResponse);
             if (terminalStructuredStatus && currentPlanState && planStateHasActiveItems(currentPlanState)) {
-                const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState, terminalStructuredStatus, new Date().toISOString());
+                const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState, mapTerminalStatusToPlanItemStatus(terminalStructuredStatus), new Date().toISOString());
                 const signature = buildPlanStateSignature(reconciledPlanState);
                 if (signature !== lastPlanStateSignature) {
                     const previousPlanState = currentPlanState;
@@ -1075,8 +1055,10 @@ export async function* streamHarnessRun(options) {
                 content: assistantOutput,
             };
         }
+        const terminalStatus = readTerminalExecutionStatus(assistantOutput);
+        const terminalRequestState = mapTerminalStatusToRequestState(terminalStatus);
         await options.appendAssistantMessage(options.sessionId, options.requestId, assistantOutput);
-        const completedEvent = await options.setRequestStateAndEmit(options.sessionId, options.requestId, 6, "completed", {
+        const completedEvent = await options.setRequestStateAndEmit(options.sessionId, options.requestId, 6, terminalRequestState, {
             previousState: "running",
         });
         yield {
@@ -1089,9 +1071,10 @@ export async function* streamHarnessRun(options) {
                 sessionId: options.sessionId,
                 requestId: options.requestId,
                 agentId: currentAgentId,
-                state: "completed",
+                state: terminalRequestState,
                 output: assistantOutput,
                 finalMessageText: assistantOutput,
+                ...(terminalStatus ? { metadata: { terminalStatus } } : {}),
             },
         };
     }
@@ -1101,9 +1084,9 @@ export async function* streamHarnessRun(options) {
             executedToolResults,
         });
         if (!assistantOutput && sawSuccessfulToolResult && deterministicToolEvidenceOutput) {
-            const terminalStructuredStatus = readTerminalStructuredStatus(deterministicToolEvidenceOutput);
+            const terminalStructuredStatus = readTerminalExecutionStatus(deterministicToolEvidenceOutput);
             if (terminalStructuredStatus && currentPlanState && planStateHasActiveItems(currentPlanState)) {
-                const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState, terminalStructuredStatus, new Date().toISOString());
+                const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState, mapTerminalStatusToPlanItemStatus(terminalStructuredStatus), new Date().toISOString());
                 const signature = buildPlanStateSignature(reconciledPlanState);
                 if (signature !== lastPlanStateSignature) {
                     const previousPlanState = currentPlanState;
@@ -1137,7 +1120,9 @@ export async function* streamHarnessRun(options) {
                 agentId: currentAgentId,
                 content: deterministicToolEvidenceOutput,
             };
-            const completedEvent = await options.setRequestStateAndEmit(options.sessionId, options.requestId, 6, "completed", {
+            const terminalStatus = readTerminalExecutionStatus(deterministicToolEvidenceOutput);
+            const terminalRequestState = mapTerminalStatusToRequestState(terminalStatus);
+            const completedEvent = await options.setRequestStateAndEmit(options.sessionId, options.requestId, 6, terminalRequestState, {
                 previousState: "running",
             });
             yield {
@@ -1150,11 +1135,12 @@ export async function* streamHarnessRun(options) {
                     sessionId: options.sessionId,
                     requestId: options.requestId,
                     agentId: currentAgentId,
-                    state: "completed",
+                    state: terminalRequestState,
                     output: deterministicToolEvidenceOutput,
                     finalMessageText: deterministicToolEvidenceOutput,
                     metadata: {
                         executedToolResults,
+                        ...(terminalStatus ? { terminalStatus } : {}),
                     },
                 },
             };

package/dist/runtime/parsing/output-recovery.d.ts CHANGED Viewed

@@ -7,10 +7,10 @@ export declare function isRepairableWriteTodosEmptyFailure(error: unknown): bool
 export declare function isToolCallRecoveryFailure(error: unknown): boolean;
 export declare function isRetrySafeInvalidToolSelectionError(value: unknown): boolean;
 export declare function shouldValidateExecutionWithoutToolEvidence(request: unknown): boolean;
-export declare function shouldRequireVisibleTodoPlan(request: unknown): boolean;
 export declare function resolveMissingPlanRecoveryInstruction(params: {
     request: unknown;
     assistantText?: string;
+    requiresPlan?: boolean;
     hasPlanStateEvidence?: boolean;
     hasWriteTodosEvidence?: boolean;
     hasToolResultEvidence?: boolean;
@@ -23,6 +23,7 @@ export declare function resolveExecutionWithoutToolEvidenceTextInstruction(reque
     hasPlanStateEvidence?: boolean;
     hasOpenTaskDelegation?: boolean;
     hasMissingDelegatedExecutionEvidence?: boolean;
+    requiresPlan?: boolean;
 }): string | null;
 export declare function resolveToolCallRecoveryInstruction(error: unknown): string | null;
 export declare function appendToolRecoveryInstruction(input: unknown, instruction: string): unknown;

package/dist/runtime/parsing/output-recovery.js CHANGED Viewed

@@ -111,34 +111,10 @@ export function shouldValidateExecutionWithoutToolEvidence(request) {
     }
     return readSystemInstructionText(request).length > 0;
 }
-export function shouldRequireVisibleTodoPlan(request) {
-    const userText = readLatestUserRequestText(request).toLowerCase();
-    if (!userText) {
-        return false;
-    }
-    return [
-        "investigate",
-        "investigation",
-        "issue",
-        "issues",
-        "rca",
-        "root cause",
-        "go deeper",
-        "deep research",
-        "debug",
-        "排查",
-        "调查",
-        "问题",
-        "根因",
-        "故障",
-        "集群",
-        "cluster",
-    ].some((keyword) => userText.includes(keyword));
-}
 export function resolveMissingPlanRecoveryInstruction(params) {
     const hasPlanEvidence = params.hasWriteTodosEvidence === true
         || params.hasPlanStateEvidence === true;
-    if (!shouldRequireVisibleTodoPlan(params.request) || hasPlanEvidence) {
+    if (params.requiresPlan !== true || hasPlanEvidence) {
         return null;
     }
     if (params.hasToolResultEvidence === true) {
@@ -158,6 +134,7 @@ export function resolveExecutionWithoutToolEvidenceTextInstruction(request, assi
     const missingPlanRecoveryInstruction = resolveMissingPlanRecoveryInstruction({
         request,
         assistantText: normalizedText,
+        requiresPlan: resultEvidence.requiresPlan,
         hasWriteTodosEvidence: resultEvidence.hasWriteTodosEvidence,
         hasPlanStateEvidence: resultEvidence.hasIncompletePlanState === true || resultEvidence.hasPlanStateEvidence === true,
         hasToolResultEvidence: resultEvidence.hasToolResultEvidence,

package/dist/runtime/prompts/runtime-prompts.d.ts CHANGED Viewed

@@ -11,6 +11,7 @@ export declare const WRITE_TODOS_REQUIRED_PLAN_INSTRUCTION: string;
 export declare const EXECUTION_WITH_TOOL_EVIDENCE_INSTRUCTION: string;
 export declare const EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION: string;
 export declare const AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION: string;
+export declare const DELEGATED_TASK_FAILURE_RECOVERY_INSTRUCTION: string;
 export declare const INTERNAL_RUNTIME_SPILL_PATH_INSTRUCTION: string;
 export declare const WORKSPACE_RELATIVE_PATH_INSTRUCTION: string;
 export declare function renderDurableMemoryContextPrompt(memoryContext: string): string;

package/dist/runtime/prompts/runtime-prompts.js CHANGED Viewed

@@ -14,6 +14,7 @@ export const WRITE_TODOS_REQUIRED_PLAN_INSTRUCTION = readRuntimePrompt("write-to
 export const EXECUTION_WITH_TOOL_EVIDENCE_INSTRUCTION = readRuntimePrompt("execution-with-tool-evidence");
 export const EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION = readRuntimePrompt("execution-with-tool-evidence-retry");
 export const AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION = readRuntimePrompt("autonomous-investigation-recovery");
+export const DELEGATED_TASK_FAILURE_RECOVERY_INSTRUCTION = readRuntimePrompt("delegated-task-failure-recovery");
 export const INTERNAL_RUNTIME_SPILL_PATH_INSTRUCTION = readRuntimePrompt("internal-runtime-spill-path");
 export const WORKSPACE_RELATIVE_PATH_INSTRUCTION = readRuntimePrompt("workspace-relative-path");
 export function renderDurableMemoryContextPrompt(memoryContext) {

package/dist/workspace/agent-binding-compiler.js CHANGED Viewed

@@ -199,6 +199,15 @@ function resolveResponseFormat(agent) {
 function resolveContextSchema(agent) {
     return getAgentExecutionConfigValue(agent, "contextSchema");
 }
+function resolveExecutionContract(agent) {
+    const value = getAgentExecutionObject(agent, "executionContract");
+    if (!value) {
+        return undefined;
+    }
+    return {
+        ...(value.requiresPlan === true ? { requiresPlan: true } : {}),
+    };
+}
 function resolveCompiledMiddleware(agent, models) {
     const middleware = getAgentExecutionConfigValue(agent, "middleware");
     return compileMiddlewareConfigs(middleware, models, agent.id);
@@ -399,6 +408,7 @@ export function compileBinding(workspaceRoot, agent, agents, referencedSubagentI
         : undefined;
     const runtimeGovernanceDefaults = asObject(runtimeDefaults?.governance);
     const runtimeObservabilityDefaults = asObject(runtimeDefaults?.observability);
+    const executionContract = resolveExecutionContract(agent);
     const compiledFilesystemConfig = agent.executionMode === "langchain-v1"
         ? mergeConfigObjects(runtimeFilesystemDefaults, getAgentExecutionObject(agent, "filesystem", { executionMode: "langchain-v1" }))
         : undefined;
@@ -417,6 +427,7 @@ export function compileBinding(workspaceRoot, agent, agents, referencedSubagentI
             resilience,
             ...(runtimeGovernanceDefaults ? { governance: runtimeGovernanceDefaults } : {}),
             ...(runtimeObservabilityDefaults ? { observability: runtimeObservabilityDefaults } : {}),
+            ...(executionContract ? { executionContract } : {}),
             ...(agent.executionMode === "deepagent"
                 ? {
                     deepagent: {

package/dist/workspace/framework-contract-validation.js CHANGED Viewed

@@ -1,8 +1,11 @@
-import { readFileSync } from "node:fs";
 import path from "node:path";
 import { validateSkillMetadata } from "../runtime/skills/skill-metadata.js";
 import { getAgentExecutionConfigValue } from "./support/agent-execution-config.js";
 import { resolvePromptValue } from "./support/workspace-ref-utils.js";
+const FORBIDDEN_GENERAL_PURPOSE_SUBAGENT_NAME = "general-purpose";
+const FRAMEWORK_AGENT_TOOL_NAMES = new Set(["task"]);
+const FRAMEWORK_EXECUTION_TOOL_NAMES = new Set(["write_todos", "read_todos"]);
+const TERMINAL_STATUS_VALUES = new Set(["completed", "blocked", "failed", "refused"]);
 function normalizeMode(mode) {
     if (mode === "warn" || mode === "error") {
         return mode;
@@ -29,7 +32,76 @@ function isWorkspaceOwnedPath(candidate, roots) {
 function addIssue(issues, code, message) {
     issues.push({ code, message });
 }
-function validateAgentContract(agent, referencedSubagentIds, issues) {
+function stripRefPrefix(value, prefix) {
+    return value.startsWith(prefix) ? value.slice(prefix.length) : value;
+}
+function resolveRefId(value) {
+    return stripRefPrefix(stripRefPrefix(value, "agent/"), "tool/");
+}
+function readBuiltinToolsConfig(agent) {
+    const value = getAgentExecutionConfigValue(agent, "builtinTools");
+    return typeof value === "object" && value && !Array.isArray(value)
+        ? value
+        : undefined;
+}
+function readExecutionContractConfig(agent) {
+    const value = getAgentExecutionConfigValue(agent, "executionContract");
+    return typeof value === "object" && value && !Array.isArray(value)
+        ? value
+        : undefined;
+}
+function collectAgentToolNames(agent, tools, ownsDelegation) {
+    const names = new Set(FRAMEWORK_EXECUTION_TOOL_NAMES);
+    if (ownsDelegation) {
+        for (const toolName of FRAMEWORK_AGENT_TOOL_NAMES) {
+            names.add(toolName);
+        }
+    }
+    for (const ref of agent.toolRefs) {
+        const tool = tools.get(resolveRefId(ref));
+        if (tool) {
+            names.add(tool.id);
+            names.add(tool.name);
+        }
+        names.add(resolveRefId(ref));
+    }
+    for (const binding of agent.toolBindings ?? []) {
+        const tool = tools.get(resolveRefId(binding.ref));
+        if (tool) {
+            names.add(tool.id);
+            names.add(tool.name);
+        }
+        names.add(resolveRefId(binding.ref));
+    }
+    for (const tool of agent.inlineTools ?? []) {
+        names.add(tool.id);
+        names.add(tool.name);
+    }
+    return names;
+}
+function hasDuplicateValues(values) {
+    return new Set(values).size !== values.length;
+}
+function readObject(value) {
+    return typeof value === "object" && value !== null && !Array.isArray(value)
+        ? value
+        : undefined;
+}
+function validateResponseFormatTerminalStatus(agent, responseFormat, issues) {
+    const schema = readObject(responseFormat);
+    const properties = readObject(schema?.properties);
+    const statusProperty = readObject(properties?.status);
+    const required = Array.isArray(schema?.required) ? schema.required : [];
+    if (!statusProperty || !required.includes("status")) {
+        addIssue(issues, "agent.response_format.missing_terminal_status", `Agent ${agent.id} responseFormat must require a status field so parents can distinguish completed, blocked, failed, and refused terminal states.`);
+        return;
+    }
+    const statusEnum = Array.isArray(statusProperty.enum) ? statusProperty.enum : [];
+    if (!Array.from(TERMINAL_STATUS_VALUES).every((value) => statusEnum.includes(value))) {
+        addIssue(issues, "agent.response_format.incomplete_terminal_status_enum", `Agent ${agent.id} responseFormat status enum must include completed, blocked, failed, and refused.`);
+    }
+}
+function validateAgentContract(agent, referencedSubagentIds, tools, issues) {
     const description = agent.description.trim();
     const systemPrompt = resolvePromptValue(getAgentExecutionConfigValue(agent, "systemPrompt"), path.dirname(agent.sourcePath));
     const ownsDelegation = agent.subagentRefs.length > 0 || agent.subagentPathRefs.length > 0 || (agent.asyncSubagents?.length ?? 0) > 0;
@@ -38,45 +110,72 @@ function validateAgentContract(agent, referencedSubagentIds, issues) {
         || (agent.toolBindings?.length ?? 0) > 0
         || (agent.inlineTools?.length ?? 0) > 0;
     const responseFormat = getAgentExecutionConfigValue(agent, "responseFormat");
+    const builtinTools = readBuiltinToolsConfig(agent);
+    const executionContract = readExecutionContractConfig(agent);
+    const localSubagentNames = [
+        ...agent.subagentRefs.map(resolveRefId),
+        ...(agent.asyncSubagents ?? []).map((subagent) => subagent.name),
+    ];
+    if (agent.id === FORBIDDEN_GENERAL_PURPOSE_SUBAGENT_NAME) {
+        addIssue(issues, "agent.general_purpose.forbidden", `Agent ${agent.id} uses the reserved general-purpose subagent name. Define explicit specialists with narrow responsibilities instead.`);
+    }
+    for (const asyncSubagent of agent.asyncSubagents ?? []) {
+        if (asyncSubagent.name === FORBIDDEN_GENERAL_PURPOSE_SUBAGENT_NAME) {
+            addIssue(issues, "agent.general_purpose.forbidden", `Agent ${agent.id} defines async subagent ${asyncSubagent.name}. Define explicit specialists with narrow responsibilities instead.`);
+        }
+    }
+    if (localSubagentNames.includes(FORBIDDEN_GENERAL_PURPOSE_SUBAGENT_NAME)) {
+        addIssue(issues, "agent.general_purpose.forbidden", `Agent ${agent.id} references reserved subagent name ${FORBIDDEN_GENERAL_PURPOSE_SUBAGENT_NAME}. Define explicit specialists with narrow responsibilities instead.`);
+    }
+    if (hasDuplicateValues(localSubagentNames)) {
+        addIssue(issues, "agent.subagent.duplicate_name", `Agent ${agent.id} exposes duplicate subagent names. Each delegated capability must have one stable owner.`);
+    }
     if (description.length < 24) {
         addIssue(issues, "agent.description.too_short", `Agent ${agent.id} should use a more specific description that explains when it should be used.`);
     }
+    if (executionContract?.requiresPlan === true && builtinTools?.todos === false) {
+        addIssue(issues, "agent.execution_contract.plan_without_todos", `Agent ${agent.id} requires plan evidence but disables todo tools. Enable todo tools or remove config.executionContract.requiresPlan.`);
+    }
     if (ownsDelegation) {
+        if (hasTools) {
+            addIssue(issues, "agent.orchestrator.mixed_tool_surface", `Delegating agent ${agent.id} defines both subagents and direct tools. Keep routing agents focused on delegation, and move execution tools to specialist agents.`);
+        }
+        if (builtinTools?.modelExposed !== false) {
+            addIssue(issues, "agent.orchestrator.model_exposed_builtins", `Delegating agent ${agent.id} should set config.builtinTools.modelExposed: false so raw built-in tools do not compete with specialist routing.`);
+        }
         if (!systemPrompt?.trim()) {
             addIssue(issues, "agent.orchestrator.missing_prompt", `Delegating agent ${agent.id} should define a systemPrompt that explains decomposition, delegation, synthesis, and stop conditions.`);
         }
-        if (!/(delegate|delegation|subagent|decompose|synthesi|answer directly|parallel)/i.test(description)) {
-            addIssue(issues, "agent.orchestrator.description_boundary", `Delegating agent ${agent.id} description should make its delegation boundary explicit, for example when it should answer directly versus delegate.`);
-        }
     }
     if (isSubagent) {
         if (!systemPrompt?.trim()) {
             addIssue(issues, "agent.subagent.missing_prompt", `Subagent ${agent.id} should define a systemPrompt that makes its operating boundary and output contract explicit.`);
         }
-        if (!/(use this when|when the task|for .*?(analysis|research|search|debug|review|triage|inspection|extraction|comparison|validation|implementation))/i.test(description)) {
-            addIssue(issues, "agent.subagent.description_trigger", `Subagent ${agent.id} description should clarify when it should be delegated to and what narrow task class it owns.`);
-        }
         if (agent.executionMode === "deepagent" && hasTools && responseFormat === undefined) {
             addIssue(issues, "agent.subagent.deepagent.missing_response_format", `DeepAgents subagent ${agent.id} exposes tools, so it should define config.responseFormat to guarantee a stable task result for its parent agent.`);
         }
+        if (agent.executionMode === "deepagent" && hasTools && responseFormat !== undefined) {
+            validateResponseFormatTerminalStatus(agent, responseFormat, issues);
+        }
+        if (hasTools && agent.skillPathRefs.length === 0) {
+            addIssue(issues, "agent.subagent.tools_without_skills", `Subagent ${agent.id} exposes execution tools but no skills. Add skills that describe tool-selection workflows and boundaries.`);
+        }
+    }
+    const toolNames = collectAgentToolNames(agent, tools, ownsDelegation);
+    for (const skillPath of agent.skillPathRefs) {
+        const metadata = validateSkillMetadata(skillPath);
+        for (const allowedTool of metadata.allowedTools ?? []) {
+            if (!toolNames.has(allowedTool)) {
+                addIssue(issues, "agent.skill.allowed_tool_unavailable", `Agent ${agent.id} attaches skill ${metadata.name}, but that skill allows tool ${allowedTool} which is not available to the agent.`);
+            }
+        }
     }
-}
-function stripFrontmatter(document) {
-    return document.replace(/^---\s*\n[\s\S]*?\n---\s*(?:\n|$)/, "");
 }
 function validateSkillContract(skillRoot, issues) {
     const metadata = validateSkillMetadata(skillRoot);
-    const document = readFileSync(path.join(skillRoot, "SKILL.md"), "utf8");
-    const body = stripFrontmatter(document);
     const skillName = metadata.name || path.basename(skillRoot);
-    if (!/(Use this skill when|Use this when)/i.test(body)) {
-        addIssue(issues, "skill.missing_trigger", `Skill ${skillName} should explain when it should be used, preferably with a clear "Use this skill when..." trigger.`);
-    }
-    if (!/(## Workflow|^## Workflow|^\d+\.\s)/m.test(body)) {
-        addIssue(issues, "skill.missing_workflow", `Skill ${skillName} should define an explicit workflow instead of only background prose.`);
-    }
-    if (!/(## Rules|Do not|Output|Caveat|Caveats)/i.test(body)) {
-        addIssue(issues, "skill.missing_boundaries", `Skill ${skillName} should include execution boundaries such as rules, non-goals, caveats, or output expectations.`);
+    if (!metadata.description?.trim()) {
+        addIssue(issues, "skill.description.missing", `Skill ${skillName} must define a frontmatter description so agents can compare its boundary without reading the whole document.`);
     }
 }
 function validateToolContract(tool, issues) {
@@ -85,9 +184,6 @@ function validateToolContract(tool, issues) {
         addIssue(issues, "tool.description.too_short", `Tool ${tool.id} should use a more specific description that explains invocation boundaries and argument expectations.`);
         return;
     }
-    if (!/(Use this when|Do not use|Before calling)/i.test(description)) {
-        addIssue(issues, "tool.description.missing_boundary", `Tool ${tool.id} description should describe when to call it and, ideally, when not to call it or what must be true before calling it.`);
-    }
 }
 export function validateFrameworkContracts(input) {
     const mode = normalizeMode(input.mode);
@@ -95,12 +191,12 @@ export function validateFrameworkContracts(input) {
         return;
     }
     const issues = [];
-    const referencedSubagentIds = new Set(input.agents.flatMap((agent) => agent.subagentRefs.map((ref) => ref.replace(/^agent\//, ""))));
+    const referencedSubagentIds = new Set(input.agents.flatMap((agent) => agent.subagentRefs.map(resolveRefId)));
     for (const agent of input.agents) {
         if (!isWorkspaceOwnedPath(agent.sourcePath, input.ownedRoots)) {
             continue;
         }
-        validateAgentContract(agent, referencedSubagentIds, issues);
+        validateAgentContract(agent, referencedSubagentIds, input.tools, issues);
     }
     for (const [skillName, skillRoot] of input.skillRegistry) {
         if (!isWorkspaceOwnedPath(skillRoot, input.ownedRoots)) {

package/dist/workspace/object-loader.js CHANGED Viewed

@@ -29,6 +29,7 @@ const CONSUMED_AGENT_CONFIG_KEYS = [
     "filesystem",
     "builtinTools",
     "interactionMode",
+    "executionContract",
 ];
 const NON_AGENT_CONFIG_ITEM_KEYS = [
     "id",
@@ -65,6 +66,7 @@ const MIGRATED_AGENT_CONFIG_KEYS = [
     "filesystem",
     "builtinTools",
     "interactionMode",
+    "executionContract",
 ];
 function normalizeAgentItemForMerge(item) {
     const normalized = { ...item };
@@ -267,6 +269,7 @@ function readSharedAgentConfig(config) {
         ...(config.includeAgentName === "inline" ? { includeAgentName: "inline" } : {}),
         ...(config.version === "v1" || config.version === "v2" ? { version: config.version } : {}),
         ...(typeof config.filesystem === "object" && config.filesystem ? { filesystem: config.filesystem } : {}),
+        ...(typeof config.executionContract === "object" && config.executionContract ? { executionContract: config.executionContract } : {}),
         ...(backend ? { backend } : {}),
         ...(store ? { store } : {}),
         ...(middleware ? { middleware } : {}),

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@botbotgo/agent-harness",
-  "version": "0.0.346",
+  "version": "0.0.347",
   "description": "Workspace runtime for multi-agent applications",
   "license": "MIT",
   "type": "module",