@botbotgo/agent-harness 0.0.327 → 0.0.328

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/dist/cli/main.js +30 -3
  2. package/dist/contracts/runtime-requests.d.ts +1 -2
  3. package/dist/contracts/runtime-scheduling.d.ts +1 -1
  4. package/dist/flow/flow-graph-upstream.js +3 -7
  5. package/dist/package-version.d.ts +1 -1
  6. package/dist/package-version.js +1 -1
  7. package/dist/projections/request-events.js +0 -1
  8. package/dist/resource/isolation.js +51 -10
  9. package/dist/resources/toolkit.mjs +183 -0
  10. package/dist/resources/tools/cancel_request.mjs +1 -1
  11. package/dist/resources/tools/fetch_url.mjs +1 -1
  12. package/dist/resources/tools/http_request.mjs +1 -1
  13. package/dist/resources/tools/inspect_approvals.mjs +1 -1
  14. package/dist/resources/tools/inspect_artifacts.mjs +1 -1
  15. package/dist/resources/tools/inspect_events.mjs +1 -1
  16. package/dist/resources/tools/inspect_requests.mjs +1 -1
  17. package/dist/resources/tools/inspect_sessions.mjs +1 -1
  18. package/dist/resources/tools/list_files.mjs +1 -1
  19. package/dist/resources/tools/read_artifact.mjs +1 -1
  20. package/dist/resources/tools/request_approval.mjs +1 -1
  21. package/dist/resources/tools/run_command.mjs +1 -1
  22. package/dist/resources/tools/schedule_task.mjs +1 -1
  23. package/dist/resources/tools/search_files.mjs +1 -1
  24. package/dist/resources/tools/send_message.mjs +1 -1
  25. package/dist/runtime/adapter/compat/deepagent-compat.d.ts +0 -9
  26. package/dist/runtime/adapter/compat/deepagent-compat.js +0 -22
  27. package/dist/runtime/adapter/flow/stream-runtime.d.ts +4 -0
  28. package/dist/runtime/adapter/flow/stream-runtime.js +239 -8
  29. package/dist/runtime/adapter/local-tool-invocation.js +53 -0
  30. package/dist/runtime/adapter/middleware-assembly.js +174 -29
  31. package/dist/runtime/adapter/runtime-adapter-support.js +1 -2
  32. package/dist/runtime/adapter/stream-event-projection.d.ts +17 -0
  33. package/dist/runtime/adapter/stream-event-projection.js +217 -4
  34. package/dist/runtime/adapter/tool/builtin-middleware-tools.d.ts +0 -3
  35. package/dist/runtime/adapter/tool/builtin-middleware-tools.js +37 -17
  36. package/dist/runtime/adapter/tool/resolved-tool.js +29 -3
  37. package/dist/runtime/agent-runtime-adapter.d.ts +3 -3
  38. package/dist/runtime/agent-runtime-adapter.js +12 -33
  39. package/dist/runtime/agent-runtime-assembly.d.ts +3 -21
  40. package/dist/runtime/agent-runtime-assembly.js +4 -56
  41. package/dist/runtime/harness/run/inspection.js +21 -5
  42. package/dist/runtime/harness/run/run-operations.js +2 -1
  43. package/dist/runtime/harness/run/stream-run.d.ts +3 -1
  44. package/dist/runtime/harness/run/stream-run.js +205 -29
  45. package/dist/runtime/harness.js +3 -0
  46. package/dist/runtime/parsing/output-content.js +11 -4
  47. package/dist/runtime/parsing/output-recovery.d.ts +3 -0
  48. package/dist/runtime/parsing/output-recovery.js +57 -11
  49. package/dist/runtime/parsing/output-tool-args.d.ts +4 -0
  50. package/dist/runtime/parsing/output-tool-args.js +122 -0
  51. package/dist/runtime/parsing/stream-event-parsing.js +37 -3
  52. package/dist/runtime/support/harness-support.d.ts +1 -0
  53. package/dist/runtime/support/harness-support.js +44 -2
  54. package/dist/tools.js +34 -4
  55. package/package.json +8 -8
@@ -1,14 +1,109 @@
1
- import { extractVisibleOutput, isToolCallRecoveryFailure, isRetrySafeInvalidToolSelectionError, shouldValidateExecutionWithoutToolEvidence, resolveToolCallRecoveryInstruction, sanitizeVisibleText, INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION, } from "../../parsing/output-parsing.js";
1
+ import { extractVisibleOutput, isToolCallRecoveryFailure, isRetrySafeInvalidToolSelectionError, resolveExecutionWithoutToolEvidenceTextInstruction, shouldValidateExecutionWithoutToolEvidence, resolveToolCallRecoveryInstruction, sanitizeVisibleText, INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION, } from "../../parsing/output-parsing.js";
2
2
  import { buildInvocationRequest } from "../model/invocation-request.js";
3
3
  import { buildRawModelMessages } from "../model/message-assembly.js";
4
4
  import { projectRuntimeStreamEvent, createStreamEventProjectionState } from "../stream-event-projection.js";
5
5
  import { projectTextStreamChunks } from "../stream-text-consumption.js";
6
6
  import { computeRemainingTimeoutMs } from "../resilience.js";
7
7
  import { UPSTREAM_REQUEST_CONFIG_KEY, UPSTREAM_SESSION_CONFIG_KEY } from "../upstream-configurable-keys.js";
8
+ export class ExecutionReconciliationError extends Error {
9
+ constructor(message) {
10
+ super(message);
11
+ this.name = "ExecutionReconciliationError";
12
+ }
13
+ }
8
14
  function toVisibleContent(value) {
9
15
  const extracted = extractVisibleOutput(value);
10
16
  return extracted ? sanitizeVisibleText(extracted) : "";
11
17
  }
18
+ function readTerminalEventVisibleOutput(event) {
19
+ if (typeof event !== "object" || event === null) {
20
+ return "";
21
+ }
22
+ const typed = event;
23
+ const eventName = typeof typed.event === "string" ? typed.event : "";
24
+ if (eventName !== "on_chat_model_end" && eventName !== "on_chain_end") {
25
+ return "";
26
+ }
27
+ return toVisibleContent(typed.data?.output);
28
+ }
29
+ function hasIncompletePlanStateInExecutedToolResults(executedToolResults) {
30
+ for (const latest of [...executedToolResults].reverse()) {
31
+ if (typeof latest.output !== "object" || latest.output === null) {
32
+ continue;
33
+ }
34
+ const summaryContainer = latest.output.summary;
35
+ if (typeof summaryContainer !== "object" || summaryContainer === null) {
36
+ continue;
37
+ }
38
+ const summary = summaryContainer.summary;
39
+ if (typeof summary !== "object" || summary === null) {
40
+ continue;
41
+ }
42
+ const typedSummary = summary;
43
+ const pending = typeof typedSummary.pending === "number" ? typedSummary.pending : 0;
44
+ const inProgress = typeof typedSummary.inProgress === "number" ? typedSummary.inProgress : 0;
45
+ return pending > 0 || inProgress > 0;
46
+ }
47
+ return false;
48
+ }
49
+ function hasNonTodoToolEvidence(executedToolResults) {
50
+ return executedToolResults.some((item) => item.toolName !== "write_todos" && item.toolName !== "read_todos");
51
+ }
52
+ function hasSuccessfulNonTodoToolEvidence(executedToolResults) {
53
+ return executedToolResults.some((item) => item.isError !== true && item.toolName !== "write_todos" && item.toolName !== "read_todos");
54
+ }
55
+ function hasSuccessfulTaskToolEvidence(executedToolResults) {
56
+ return executedToolResults.some((item) => item.isError !== true && item.toolName === "task");
57
+ }
58
+ function buildExecutionRecoveryEvidence(params) {
59
+ const { projectionState, executedToolResults = [] } = params;
60
+ return {
61
+ hasToolResultEvidence: executedToolResults.length > 0 || projectionState.emittedToolResult || projectionState.emittedToolError,
62
+ hasSuccessfulToolResultEvidence: executedToolResults.some((item) => item.isError !== true) || projectionState.emittedSuccessfulToolResult,
63
+ hasNonTodoToolResultEvidence: hasNonTodoToolEvidence(executedToolResults) || projectionState.emittedNonTodoToolResult || projectionState.emittedToolError,
64
+ hasSuccessfulNonTodoToolResultEvidence: hasSuccessfulNonTodoToolEvidence(executedToolResults) || projectionState.emittedSuccessfulNonTodoToolResult,
65
+ hasIncompletePlanState: projectionState.hasIncompletePlanState || hasIncompletePlanStateInExecutedToolResults(executedToolResults),
66
+ hasPlanStateEvidence: projectionState.sawPlanState || hasIncompletePlanStateInExecutedToolResults(executedToolResults),
67
+ hasOpenTaskDelegation: projectionState.openTaskDelegations > 0,
68
+ hasFailedTaskDelegation: projectionState.hasFailedTaskDelegation,
69
+ hasDelegatedAgentWithConfiguredTools: projectionState.sawDelegatedAgentWithConfiguredTools,
70
+ hasDelegatedExecutionToolEvidence: projectionState.emittedDelegatedExecutionToolResult,
71
+ hasOnlyPlaceholderTaskCompletion: projectionState.emittedSuccessfulTaskResult
72
+ && projectionState.emittedPlaceholderTaskResult
73
+ && !projectionState.emittedDelegatedTerminalOutput,
74
+ };
75
+ }
76
+ function hasUnresolvedExecution(evidence) {
77
+ return (evidence.hasIncompletePlanState
78
+ || evidence.hasFailedTaskDelegation
79
+ || evidence.hasOpenTaskDelegation);
80
+ }
81
+ function hasMissingDelegatedExecutionEvidence(evidence) {
82
+ return evidence.hasDelegatedAgentWithConfiguredTools && !evidence.hasDelegatedExecutionToolEvidence;
83
+ }
84
+ function hasMissingDelegatedFindings(evidence) {
85
+ return evidence.hasDelegatedAgentWithConfiguredTools && evidence.hasOnlyPlaceholderTaskCompletion;
86
+ }
87
+ function createUnresolvedExecutionError(evidence) {
88
+ const reasons = [];
89
+ if (evidence.hasIncompletePlanState) {
90
+ reasons.push("plan state still has unfinished work");
91
+ }
92
+ if (evidence.hasFailedTaskDelegation) {
93
+ reasons.push("delegated task failed before surfacing final findings");
94
+ }
95
+ if (evidence.hasOpenTaskDelegation) {
96
+ reasons.push("delegated task has not finished");
97
+ }
98
+ if (hasMissingDelegatedExecutionEvidence(evidence)) {
99
+ reasons.push("delegated agent ended without surfacing any real tool execution evidence");
100
+ }
101
+ if (hasMissingDelegatedFindings(evidence)) {
102
+ reasons.push("delegated task returned only the upstream placeholder result without surfaced final findings");
103
+ }
104
+ const detail = reasons.length > 0 ? `: ${reasons.join("; ")}` : "";
105
+ return new ExecutionReconciliationError(`Agent ended before execution was fully reconciled${detail}.`);
106
+ }
12
107
  function createProfileStep(id, kind, name, action, status, detail) {
13
108
  return {
14
109
  kind: "profile",
@@ -49,6 +144,7 @@ export async function* streamRuntimeExecution(options) {
49
144
  const shouldValidateStreamOutput = shouldValidateExecutionWithoutToolEvidence(request);
50
145
  const deferredStreamContent = [];
51
146
  let sawRetrySafeInvalidToolSelectionError = false;
147
+ const projectionState = createStreamEventProjectionState();
52
148
  const shouldDeferStreamContent = () => shouldValidateStreamOutput && !emittedUnsafeStreamSideEffects;
53
149
  const flushDeferredStreamContent = async function* () {
54
150
  while (deferredStreamContent.length > 0) {
@@ -212,7 +308,6 @@ export async function* streamRuntimeExecution(options) {
212
308
  });
213
309
  throw error;
214
310
  }
215
- const projectionState = createStreamEventProjectionState();
216
311
  const streamEventsConsume = startProfileStep({
217
312
  id: "profile:agent:stream-events-consume",
218
313
  kind: "agent",
@@ -225,8 +320,12 @@ export async function* streamRuntimeExecution(options) {
225
320
  for await (const event of options.iterateWithTimeout(events, options.streamIdleTimeoutMs, "agent streamEvents", options.streamDeadlineAt, options.invokeTimeoutMs)) {
226
321
  const projectedChunks = projectRuntimeStreamEvent({
227
322
  event,
228
- allowVisibleStreamDeltas: options.isLangChainBinding(options.binding),
323
+ allowVisibleStreamDeltas: true,
229
324
  includeStateStreamOutput: options.isDeepAgentBinding(options.binding),
325
+ rootAgentId: typeof options.binding.agent?.id === "string"
326
+ ? options.binding.agent.id
327
+ : undefined,
328
+ countConfiguredToolsForAgentId: options.countConfiguredToolsForAgentId,
230
329
  toolNameMapping: options.toolNameMapping,
231
330
  primaryTools: options.primaryTools,
232
331
  state: projectionState,
@@ -248,12 +347,27 @@ export async function* streamRuntimeExecution(options) {
248
347
  if (eventContainsNonTodoToolResult || eventContainsNonRetrySafeChunk) {
249
348
  emittedUnsafeStreamSideEffects = true;
250
349
  }
251
- if (chunk.kind === "content" && shouldDeferStreamContent()) {
350
+ if (chunk.kind === "content" && (shouldDeferStreamContent() || projectionState.hasFailedTaskDelegation)) {
252
351
  deferredStreamContent.push(chunk);
253
352
  continue;
254
353
  }
255
354
  yield chunk;
256
355
  }
356
+ const terminalVisibleOutput = readTerminalEventVisibleOutput(event);
357
+ if (terminalVisibleOutput) {
358
+ const terminalExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
359
+ if (!shouldDeferStreamContent()
360
+ && !terminalExecutionEvidence.hasIncompletePlanState
361
+ && !terminalExecutionEvidence.hasFailedTaskDelegation
362
+ && !terminalExecutionEvidence.hasOpenTaskDelegation
363
+ && !hasMissingDelegatedExecutionEvidence(terminalExecutionEvidence)
364
+ && !hasMissingDelegatedFindings(terminalExecutionEvidence)) {
365
+ if (deferredStreamContent.length > 0) {
366
+ yield* flushDeferredStreamContent();
367
+ }
368
+ return;
369
+ }
370
+ }
257
371
  }
258
372
  if (shouldProfile)
259
373
  yield finishProfileStep({
@@ -278,13 +392,55 @@ export async function* streamRuntimeExecution(options) {
278
392
  });
279
393
  throw error;
280
394
  }
281
- const terminalRecoveryInstruction = !emittedUnsafeStreamSideEffects && sawRetrySafeInvalidToolSelectionError ? INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION : null;
282
- if (terminalRecoveryInstruction) {
395
+ const streamedExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
396
+ if (hasUnresolvedExecution(streamedExecutionEvidence)) {
397
+ throw createUnresolvedExecutionError(streamedExecutionEvidence);
398
+ }
399
+ const executionWithoutToolEvidenceInstruction = projectionState.emittedOutput
400
+ ? resolveExecutionWithoutToolEvidenceTextInstruction(request, projectionState.emittedOutput, false, {
401
+ ...streamedExecutionEvidence,
402
+ hasMissingDelegatedExecutionEvidence: hasMissingDelegatedExecutionEvidence(streamedExecutionEvidence),
403
+ })
404
+ : null;
405
+ const retryInstruction = !emittedUnsafeStreamSideEffects && sawRetrySafeInvalidToolSelectionError
406
+ ? INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION
407
+ : executionWithoutToolEvidenceInstruction;
408
+ if (retryInstruction) {
283
409
  let retried;
284
- retried = await options.invoke(options.applyToolRecoveryInstruction(options.binding, terminalRecoveryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, options.runtimeOptions);
410
+ retried = await options.invoke(options.applyToolRecoveryInstruction(options.binding, retryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, options.runtimeOptions);
285
411
  const executedToolResults = Array.isArray(retried.metadata?.executedToolResults)
286
412
  ? retried.metadata.executedToolResults
287
413
  : [];
414
+ const originalExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
415
+ const retriedExecutionEvidence = buildExecutionRecoveryEvidence({
416
+ projectionState: createStreamEventProjectionState(),
417
+ executedToolResults,
418
+ });
419
+ const retriedVisibleOutput = retried.output ? toVisibleContent(retried.output) : "";
420
+ const retriedCarriesExecutionEvidence = retriedExecutionEvidence.hasToolResultEvidence
421
+ || retriedExecutionEvidence.hasOpenTaskDelegation
422
+ || retriedExecutionEvidence.hasDelegatedExecutionToolEvidence;
423
+ const retriedHasUnresolvedExecution = hasUnresolvedExecution(retriedExecutionEvidence)
424
+ || hasMissingDelegatedExecutionEvidence(retriedExecutionEvidence)
425
+ || hasMissingDelegatedFindings(retriedExecutionEvidence)
426
+ || (!retriedCarriesExecutionEvidence
427
+ && (hasUnresolvedExecution(originalExecutionEvidence)
428
+ || hasMissingDelegatedExecutionEvidence(originalExecutionEvidence)
429
+ || hasMissingDelegatedFindings(originalExecutionEvidence)));
430
+ const effectiveRecoveryEvidence = retriedCarriesExecutionEvidence
431
+ ? retriedExecutionEvidence
432
+ : {
433
+ ...retriedExecutionEvidence,
434
+ hasIncompletePlanState: originalExecutionEvidence.hasIncompletePlanState,
435
+ hasFailedTaskDelegation: originalExecutionEvidence.hasFailedTaskDelegation,
436
+ hasOpenTaskDelegation: originalExecutionEvidence.hasOpenTaskDelegation,
437
+ hasDelegatedAgentWithConfiguredTools: originalExecutionEvidence.hasDelegatedAgentWithConfiguredTools,
438
+ hasDelegatedExecutionToolEvidence: originalExecutionEvidence.hasDelegatedExecutionToolEvidence,
439
+ };
440
+ if (retriedHasUnresolvedExecution
441
+ || (retriedHasUnresolvedExecution && retriedExecutionEvidence.hasToolResultEvidence && !retriedVisibleOutput)) {
442
+ throw createUnresolvedExecutionError(effectiveRecoveryEvidence);
443
+ }
288
444
  for (const toolResult of executedToolResults) {
289
445
  yield {
290
446
  kind: "tool-result",
@@ -303,7 +459,15 @@ export async function* streamRuntimeExecution(options) {
303
459
  if (deferredStreamContent.length > 0) {
304
460
  yield* flushDeferredStreamContent();
305
461
  }
306
- if (projectionState.emittedOutput || projectionState.emittedToolResult || projectionState.emittedToolError) {
462
+ if (hasMissingDelegatedExecutionEvidence(streamedExecutionEvidence)) {
463
+ throw createUnresolvedExecutionError(streamedExecutionEvidence);
464
+ }
465
+ if (hasMissingDelegatedFindings(streamedExecutionEvidence)) {
466
+ throw createUnresolvedExecutionError(streamedExecutionEvidence);
467
+ }
468
+ const hasUnresolvedStreamExecution = hasUnresolvedExecution(streamedExecutionEvidence);
469
+ if (projectionState.emittedOutput
470
+ || ((projectionState.emittedToolResult || projectionState.emittedToolError) && !hasUnresolvedStreamExecution)) {
307
471
  return;
308
472
  }
309
473
  }
@@ -429,6 +593,73 @@ export async function* streamRuntimeExecution(options) {
429
593
  const executedToolResults = Array.isArray(result.metadata?.executedToolResults)
430
594
  ? result.metadata.executedToolResults
431
595
  : [];
596
+ const invokeExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState, executedToolResults });
597
+ if (hasUnresolvedExecution(invokeExecutionEvidence)) {
598
+ throw createUnresolvedExecutionError(invokeExecutionEvidence);
599
+ }
600
+ const invokeFallbackRecoveryInstruction = result.output
601
+ ? resolveExecutionWithoutToolEvidenceTextInstruction(request, result.output, false, {
602
+ ...invokeExecutionEvidence,
603
+ hasMissingDelegatedExecutionEvidence: hasMissingDelegatedExecutionEvidence(invokeExecutionEvidence),
604
+ })
605
+ : null;
606
+ if (invokeFallbackRecoveryInstruction) {
607
+ const recovered = await options.invoke(options.applyToolRecoveryInstruction(options.binding, invokeFallbackRecoveryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, options.runtimeOptions);
608
+ const recoveredToolResults = Array.isArray(recovered.metadata?.executedToolResults)
609
+ ? recovered.metadata.executedToolResults
610
+ : [];
611
+ const originalExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState, executedToolResults });
612
+ const recoveredExecutionEvidence = buildExecutionRecoveryEvidence({
613
+ projectionState: createStreamEventProjectionState(),
614
+ executedToolResults: recoveredToolResults,
615
+ });
616
+ const recoveredVisibleOutput = recovered.output ? toVisibleContent(recovered.output) : "";
617
+ const recoveredCarriesExecutionEvidence = recoveredExecutionEvidence.hasToolResultEvidence
618
+ || recoveredExecutionEvidence.hasOpenTaskDelegation
619
+ || recoveredExecutionEvidence.hasDelegatedExecutionToolEvidence;
620
+ const recoveredHasUnresolvedExecution = hasUnresolvedExecution(recoveredExecutionEvidence)
621
+ || hasMissingDelegatedExecutionEvidence(recoveredExecutionEvidence)
622
+ || hasMissingDelegatedFindings(recoveredExecutionEvidence)
623
+ || (!recoveredCarriesExecutionEvidence
624
+ && (hasUnresolvedExecution(originalExecutionEvidence)
625
+ || hasMissingDelegatedExecutionEvidence(originalExecutionEvidence)
626
+ || hasMissingDelegatedFindings(originalExecutionEvidence)));
627
+ const effectiveRecoveredEvidence = recoveredCarriesExecutionEvidence
628
+ ? recoveredExecutionEvidence
629
+ : {
630
+ ...recoveredExecutionEvidence,
631
+ hasIncompletePlanState: originalExecutionEvidence.hasIncompletePlanState,
632
+ hasFailedTaskDelegation: originalExecutionEvidence.hasFailedTaskDelegation,
633
+ hasOpenTaskDelegation: originalExecutionEvidence.hasOpenTaskDelegation,
634
+ hasDelegatedAgentWithConfiguredTools: originalExecutionEvidence.hasDelegatedAgentWithConfiguredTools,
635
+ hasDelegatedExecutionToolEvidence: originalExecutionEvidence.hasDelegatedExecutionToolEvidence,
636
+ };
637
+ if (recoveredHasUnresolvedExecution
638
+ || (recoveredHasUnresolvedExecution && recoveredExecutionEvidence.hasToolResultEvidence && !recoveredVisibleOutput)) {
639
+ throw createUnresolvedExecutionError(effectiveRecoveredEvidence);
640
+ }
641
+ for (const toolResult of recoveredToolResults) {
642
+ yield {
643
+ kind: "tool-result",
644
+ toolName: toolResult.toolName,
645
+ output: toolResult.output,
646
+ isError: toolResult.isError,
647
+ };
648
+ }
649
+ if (recovered.output) {
650
+ const visible = toVisibleContent(recovered.output);
651
+ if (visible) {
652
+ yield { kind: "content", content: visible };
653
+ }
654
+ }
655
+ return;
656
+ }
657
+ if (hasMissingDelegatedExecutionEvidence(invokeExecutionEvidence)) {
658
+ throw createUnresolvedExecutionError(invokeExecutionEvidence);
659
+ }
660
+ if (hasMissingDelegatedFindings(invokeExecutionEvidence)) {
661
+ throw createUnresolvedExecutionError(invokeExecutionEvidence);
662
+ }
432
663
  for (const toolResult of executedToolResults) {
433
664
  yield {
434
665
  kind: "tool-result",
@@ -4,7 +4,42 @@ import { canReplayToolCallsLocally } from "./tool/tool-replay.js";
4
4
  import { extractToolCallsFromResult, normalizeToolArgsForSchema, stringifyToolOutput } from "./tool/tool-arguments.js";
5
5
  import { extractMemoryCandidatesFromToolOutput } from "../harness/system/runtime-memory-candidates.js";
6
6
  import { maybePersistLargeToolOutput } from "./tool/tool-output-artifacts.js";
7
+ import { appendToolRecoveryInstruction, extractVisibleOutput, resolveExecutionWithoutToolEvidenceTextInstruction, sanitizeVisibleText, } from "../parsing/output-parsing.js";
8
+ import { AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION } from "../prompts/runtime-prompts.js";
7
9
  const TOOL_FOLLOW_UP_INSTRUCTION = "One or more tool results are already available in this conversation. Answer the user's current request directly from the existing context and tool results. Do not ask the user to repeat inputs that are already present above.";
10
+ function readPlanStateSummary(output) {
11
+ if (typeof output !== "object" || output === null) {
12
+ return null;
13
+ }
14
+ const typed = output;
15
+ const summaryContainer = typed.summary;
16
+ if (typeof summaryContainer !== "object" || summaryContainer === null) {
17
+ return null;
18
+ }
19
+ const nested = summaryContainer;
20
+ const counts = nested.summary;
21
+ if (typeof counts !== "object" || counts === null) {
22
+ return null;
23
+ }
24
+ const typedCounts = counts;
25
+ return {
26
+ pending: typeof typedCounts.pending === "number" ? typedCounts.pending : 0,
27
+ inProgress: typeof typedCounts.inProgress === "number" ? typedCounts.inProgress : 0,
28
+ };
29
+ }
30
+ function hasIncompleteExecutedPlan(executedToolResults) {
31
+ for (const latest of [...executedToolResults].reverse()) {
32
+ const summary = readPlanStateSummary(latest.output);
33
+ if (!summary) {
34
+ continue;
35
+ }
36
+ return summary.pending > 0 || summary.inProgress > 0;
37
+ }
38
+ return false;
39
+ }
40
+ function hasNonTodoToolEvidence(executedToolResults) {
41
+ return executedToolResults.some((item) => item.toolName !== "write_todos" && item.toolName !== "read_todos");
42
+ }
8
43
  function extractLatestUserInput(request) {
9
44
  const typedRequest = request;
10
45
  const messages = Array.isArray(typedRequest.messages) ? typedRequest.messages : [];
@@ -40,6 +75,24 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
40
75
  pendingResult = undefined;
41
76
  const toolCalls = extractToolCallsFromResult(result);
42
77
  if (toolCalls.length === 0) {
78
+ const terminalText = sanitizeVisibleText(extractVisibleOutput(result) || "");
79
+ const hasIncompletePlanState = hasIncompleteExecutedPlan(executedToolResults);
80
+ const hasExecutionBeyondTodoPlanning = hasNonTodoToolEvidence(executedToolResults);
81
+ const recoveryInstruction = terminalText
82
+ ? resolveExecutionWithoutToolEvidenceTextInstruction(activeRequest, terminalText, false, {
83
+ hasToolResultEvidence: hasExecutionBeyondTodoPlanning,
84
+ hasIncompletePlanState: hasExecutionBeyondTodoPlanning && hasIncompletePlanState,
85
+ })
86
+ : hasIncompletePlanState && hasExecutionBeyondTodoPlanning
87
+ ? AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION
88
+ : null;
89
+ if (recoveryInstruction) {
90
+ if (iteration + 1 === maxToolIterations) {
91
+ throw new Error(`Tool-calling loop exceeded the maximum of ${maxToolIterations} iterations`);
92
+ }
93
+ activeRequest = appendToolRecoveryInstruction(activeRequest, recoveryInstruction);
94
+ continue;
95
+ }
43
96
  break;
44
97
  }
45
98
  if (!canReplayToolCallsLocally(binding, toolCalls, primaryTools, toolNameMapping, executableTools, builtinExecutableTools)) {
@@ -1,4 +1,4 @@
1
- import { HumanMessage } from "@langchain/core/messages";
1
+ import { HumanMessage, SystemMessage } from "@langchain/core/messages";
2
2
  import { createMemoryMiddleware, createPatchToolCallsMiddleware, createSkillsMiddleware, createSubAgentMiddleware, createSummarizationMiddleware, StateBackend, } from "deepagents";
3
3
  import { createAgent, humanInTheLoopMiddleware } from "langchain";
4
4
  import { createBuiltinMiddlewareTools } from "./tool/builtin-middleware-tools.js";
@@ -7,9 +7,98 @@ import { extractToolFallbackContext, extractVisibleOutput } from "../parsing/out
7
7
  import { isRecord } from "../../utils/object.js";
8
8
  import { resolveDeclaredMiddleware } from "./tool/declared-middleware.js";
9
9
  import { UPSTREAM_SESSION_CONFIG_KEY } from "./upstream-configurable-keys.js";
10
- import { bindingHasLangChainSubagentSupport, bindingHasMiddlewareKind, getBindingExecutionKind, getBindingGeneralPurposeAgent, getBindingInterruptCompatibilityRules, getBindingMiddlewareConfigs, getBindingMemorySources, getBindingPrimaryModel, getBindingPrimaryTools, getBindingSkills, getBindingSubagents, getBindingTaskDescription, isDeepAgentBinding, isLangChainBinding, } from "../support/compiled-binding.js";
11
- import { applyDeepAgentDelegationPromptCompatibility, materializeDeepAgentSkillSourcePaths } from "./compat/deepagent-compat.js";
10
+ import { bindingHasLangChainSubagentSupport, bindingHasMiddlewareKind, getBindingExecutionKind, getBindingGeneralPurposeAgent, getBindingDeepAgentSubagents, getBindingInterruptCompatibilityRules, getBindingMiddlewareConfigs, getBindingMemorySources, getBindingPrimaryModel, getBindingPrimaryTools, getBindingSkills, getBindingSubagents, getBindingTaskDescription, isDeepAgentBinding, isLangChainBinding, } from "../support/compiled-binding.js";
11
+ import { materializeDeepAgentSkillSourcePaths } from "./compat/deepagent-compat.js";
12
12
  import { DEFAULT_SUBAGENT_PROMPT } from "../prompts/runtime-prompts.js";
13
+ import { AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION } from "../prompts/runtime-prompts.js";
14
+ import { createStreamEventProjectionState, projectRuntimeStreamEvent } from "./stream-event-projection.js";
15
+ const EMPTY_TOOL_NAME_MAPPING = {
16
+ originalToModelFacing: new Map(),
17
+ modelFacingToOriginal: new Map(),
18
+ };
19
+ function readPlanStateSummaryCounts(summary) {
20
+ if (typeof summary !== "object" || summary === null) {
21
+ return null;
22
+ }
23
+ const typed = summary;
24
+ const hasAny = typeof typed.pending === "number"
25
+ || typeof typed.inProgress === "number";
26
+ if (!hasAny) {
27
+ return null;
28
+ }
29
+ return {
30
+ pending: typeof typed.pending === "number" ? typed.pending : 0,
31
+ inProgress: typeof typed.inProgress === "number" ? typed.inProgress : 0,
32
+ };
33
+ }
34
+ function hasIncompletePlanTodos(value) {
35
+ if (!Array.isArray(value)) {
36
+ return null;
37
+ }
38
+ return value.some((item) => {
39
+ if (typeof item !== "object" || item === null) {
40
+ return false;
41
+ }
42
+ const status = item.status;
43
+ return status === "pending" || status === "in_progress";
44
+ });
45
+ }
46
+ function hasIncompletePlanStateInValue(value) {
47
+ if (Array.isArray(value)) {
48
+ return value.some((item) => hasIncompletePlanStateInValue(item));
49
+ }
50
+ if (typeof value !== "object" || value === null) {
51
+ return false;
52
+ }
53
+ const typed = value;
54
+ const summaryCounts = readPlanStateSummaryCounts(typed.summary);
55
+ if (summaryCounts) {
56
+ return summaryCounts.pending > 0 || summaryCounts.inProgress > 0;
57
+ }
58
+ const todoCompleteness = hasIncompletePlanTodos(typed.todos);
59
+ if (todoCompleteness !== null) {
60
+ return todoCompleteness;
61
+ }
62
+ const nestedCandidates = [
63
+ typed.summary,
64
+ typed.output,
65
+ typed.content,
66
+ typed.update,
67
+ typed.data,
68
+ typed.messages,
69
+ ];
70
+ return nestedCandidates.some((candidate) => hasIncompletePlanStateInValue(candidate));
71
+ }
72
+ function hasUnresolvedDelegatedExecution(state) {
73
+ return state.hasIncompletePlanState || state.openTaskDelegations > 0;
74
+ }
75
+ function hasMissingDelegatedToolExecutionEvidence(state, subagentHasTools) {
76
+ return subagentHasTools && !state.emittedToolResult && !state.emittedToolError;
77
+ }
78
+ function formatDelegatedExecutionBlocker(state) {
79
+ const summary = state.emittedOutput.trim();
80
+ if (summary) {
81
+ return summary;
82
+ }
83
+ if (state.emittedToolError) {
84
+ return "Delegated investigation encountered a tool failure before the plan completed.";
85
+ }
86
+ if (state.openTaskDelegations > 0) {
87
+ return "Delegated investigation did not finish before control returned to the parent agent.";
88
+ }
89
+ return "Delegated investigation ended before the plan was completed.";
90
+ }
91
+ function requiresDelegatedExecutionRecovery(state) {
92
+ return hasUnresolvedDelegatedExecution(state) || (state.emittedToolResult && !state.emittedOutput.trim());
93
+ }
94
+ const DELEGATED_FAILURE_PLAN_RECONCILIATION_INSTRUCTION = [
95
+ "Your previous attempt ended with a tool failure while the todo board still had unfinished work.",
96
+ "Do not continue broad investigation from here.",
97
+ "If the failed command had malformed arguments you can correct locally, retry that same command once.",
98
+ "Otherwise, if a todo board already exists, call write_todos again and keep only the tasks that were actually completed in this session.",
99
+ "Remove the unfinished tasks that cannot proceed until the blocker is resolved.",
100
+ "Then return a concise blocker report.",
101
+ ].join(" ");
13
102
  export function buildBuiltinTaskSubagentMiddleware(input) {
14
103
  const { selectedSubagent, builtinBackend, summarizationModel } = input;
15
104
  const defaultSubagentMiddleware = [
@@ -57,13 +146,6 @@ function buildLangChainContextMiddleware(params) {
57
146
  }
58
147
  return middleware;
59
148
  }
60
- function resolveLangChainDelegationCompatibility(params) {
61
- return applyDeepAgentDelegationPromptCompatibility(params.model, {
62
- subagents: params.subagents,
63
- generalPurposeAgent: params.generalPurposeAgent,
64
- taskDescription: params.taskDescription,
65
- });
66
- }
67
149
  export function planLangChainRuntimeExtensions(binding) {
68
150
  const primaryModel = getBindingPrimaryModel(binding);
69
151
  if (!isLangChainBinding(binding) || !primaryModel) {
@@ -71,12 +153,6 @@ export function planLangChainRuntimeExtensions(binding) {
71
153
  }
72
154
  const skills = getBindingSkills(binding);
73
155
  const memory = getBindingMemorySources(binding);
74
- const delegationCompatibility = resolveLangChainDelegationCompatibility({
75
- model: primaryModel,
76
- subagents: getBindingSubagents(binding),
77
- generalPurposeAgent: getBindingGeneralPurposeAgent(binding),
78
- taskDescription: getBindingTaskDescription(binding),
79
- });
80
156
  return {
81
157
  includePatchToolCalls: !bindingHasMiddlewareKind(binding, "patchToolCalls"),
82
158
  includeAutomaticSummarization: !bindingHasMiddlewareKind(binding, "summarization"),
@@ -86,9 +162,9 @@ export function planLangChainRuntimeExtensions(binding) {
86
162
  },
87
163
  delegation: bindingHasLangChainSubagentSupport(binding)
88
164
  ? {
89
- subagents: delegationCompatibility.subagents ?? [],
90
- generalPurposeAgent: delegationCompatibility.generalPurposeAgent,
91
- taskDescription: delegationCompatibility.taskDescription,
165
+ subagents: getBindingSubagents(binding),
166
+ generalPurposeAgent: getBindingGeneralPurposeAgent(binding),
167
+ taskDescription: getBindingTaskDescription(binding),
92
168
  interruptOn: getBindingInterruptCompatibilityRules(binding),
93
169
  }
94
170
  : undefined,
@@ -142,13 +218,18 @@ export async function invokeBuiltinTaskTool(input) {
142
218
  }
143
219
  const primaryModel = getBindingPrimaryModel(input.binding);
144
220
  const primaryTools = getBindingPrimaryTools(input.binding);
145
- const compiledSubagents = getBindingSubagents(input.binding);
221
+ const compiledSubagents = getBindingDeepAgentSubagents(input.binding)
222
+ .filter((subagent) => !("graphId" in subagent));
146
223
  if (!primaryModel) {
147
224
  throw new Error(`Agent ${input.binding.agent.id} has no deepagent params`);
148
225
  }
149
226
  const typedInput = isRecord(input.toolInput) ? input.toolInput : {};
150
- const description = typeof typedInput.description === "string" ? typedInput.description : "";
151
- const subagentType = typeof typedInput.subagent_type === "string" ? typedInput.subagent_type : "";
227
+ const description = typeof typedInput.description === "string"
228
+ ? typedInput.description
229
+ : "";
230
+ const subagentType = typeof typedInput.subagent_type === "string"
231
+ ? typedInput.subagent_type
232
+ : "";
152
233
  const builtinBackend = input.resolveBuiltinMiddlewareBackend(input.binding, input.options);
153
234
  const resolvedSubagents = await input.resolveSubagents(compiledSubagents, input.binding);
154
235
  const selectedSubagent = resolvedSubagents.find((subagent) => subagent.name === subagentType);
@@ -163,16 +244,82 @@ export async function invokeBuiltinTaskTool(input) {
163
244
  builtinBackend,
164
245
  summarizationModel,
165
246
  });
247
+ const resolvedSubagentTools = selectedSubagent.tools ?? input.resolveTools(primaryTools, input.binding);
248
+ const subagentHasTools = (resolvedSubagentTools?.length ?? 0) > 0;
166
249
  const runnable = createAgent({
167
250
  model: (selectedSubagent.model ?? resolvedHostModel),
168
- tools: (selectedSubagent.tools ?? input.resolveTools(primaryTools, input.binding)),
251
+ tools: resolvedSubagentTools,
169
252
  systemPrompt: selectedSubagent.systemPrompt ?? DEFAULT_SUBAGENT_PROMPT,
170
253
  middleware: middleware,
171
254
  responseFormat: selectedSubagent.responseFormat,
172
255
  name: selectedSubagent.name,
173
256
  description: selectedSubagent.description,
174
257
  });
175
- const result = await runnable.invoke({ messages: [new HumanMessage({ content: description })] }, { configurable: { [UPSTREAM_SESSION_CONFIG_KEY]: `${input.binding.agent.id}:builtin-task` }, ...(input.options?.context ? { context: input.options.context } : {}) });
258
+ const invokeConfig = {
259
+ configurable: { [UPSTREAM_SESSION_CONFIG_KEY]: `${input.binding.agent.id}:builtin-task` },
260
+ ...(input.options?.context ? { context: input.options.context } : {}),
261
+ };
262
+ const buildMessages = (recoveryInstruction) => ({
263
+ messages: [
264
+ ...(recoveryInstruction ? [new SystemMessage({ content: recoveryInstruction })] : []),
265
+ new HumanMessage({ content: description }),
266
+ ],
267
+ });
268
+ if (typeof runnable.streamEvents === "function") {
269
+ const runWithStreamInspection = async (recoveryInstruction) => {
270
+ const projectionState = createStreamEventProjectionState();
271
+ const events = await runnable.streamEvents(buildMessages(recoveryInstruction), invokeConfig);
272
+ for await (const event of events) {
273
+ projectRuntimeStreamEvent({
274
+ event,
275
+ allowVisibleStreamDeltas: false,
276
+ includeStateStreamOutput: false,
277
+ rootAgentId: input.binding.agent.id,
278
+ countConfiguredToolsForAgentId: (agentId) => agentId === selectedSubagent.name ? resolvedSubagentTools.length : 0,
279
+ toolNameMapping: EMPTY_TOOL_NAME_MAPPING,
280
+ primaryTools: [],
281
+ state: projectionState,
282
+ });
283
+ }
284
+ return projectionState;
285
+ };
286
+ let projectionState = await runWithStreamInspection();
287
+ if (requiresDelegatedExecutionRecovery(projectionState)) {
288
+ const recoveryInstruction = projectionState.hasIncompletePlanState && projectionState.emittedToolError
289
+ ? `${AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION}\n\n${DELEGATED_FAILURE_PLAN_RECONCILIATION_INSTRUCTION}`
290
+ : AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION;
291
+ projectionState = await runWithStreamInspection(recoveryInstruction);
292
+ }
293
+ if (requiresDelegatedExecutionRecovery(projectionState)) {
294
+ throw new Error(formatDelegatedExecutionBlocker(projectionState));
295
+ }
296
+ if (hasMissingDelegatedToolExecutionEvidence(projectionState, subagentHasTools)) {
297
+ throw new Error("Delegated investigation ended without any real tool execution evidence.");
298
+ }
299
+ if (projectionState.emittedToolError) {
300
+ const blockerMessage = projectionState.emittedOutput.trim()
301
+ || formatDelegatedExecutionBlocker(projectionState);
302
+ if (hasUnresolvedDelegatedExecution(projectionState) || !projectionState.emittedSuccessfulToolResult) {
303
+ throw new Error(blockerMessage);
304
+ }
305
+ }
306
+ if (projectionState.emittedOutput.trim()) {
307
+ return projectionState.emittedOutput.trim();
308
+ }
309
+ if (projectionState.emittedToolResult) {
310
+ throw new Error("Delegated investigation performed tool work but did not return surfaced findings.");
311
+ }
312
+ }
313
+ let result = await runnable.invoke(buildMessages(), invokeConfig);
314
+ if (hasIncompletePlanStateInValue(result)) {
315
+ result = await runnable.invoke(buildMessages(AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION), invokeConfig);
316
+ }
317
+ if (hasIncompletePlanStateInValue(result)) {
318
+ throw new Error(extractVisibleOutput(result) || extractToolFallbackContext(result) || "Delegated investigation ended before the plan was completed.");
319
+ }
320
+ if (subagentHasTools) {
321
+ throw new Error("Delegated investigation ended without any real tool execution evidence.");
322
+ }
176
323
  const visibleOutput = extractVisibleOutput(result);
177
324
  const fallbackOutput = extractToolFallbackContext(result);
178
325
  return visibleOutput || fallbackOutput || JSON.stringify(result);
@@ -180,12 +327,10 @@ export async function invokeBuiltinTaskTool(input) {
180
327
  export async function resolveBuiltinMiddlewareTools(input) {
181
328
  const backend = input.resolveBuiltinMiddlewareBackend(input.binding, input.options);
182
329
  return createBuiltinMiddlewareTools(backend, {
183
- includeTaskTool: isDeepAgentBinding(input.binding),
330
+ includeTaskTool: false,
184
331
  workspaceRoot: input.binding.harnessRuntime.workspaceRoot,
185
332
  toolRuntimeContext: input.options?.toolRuntimeContext,
186
- invokeTaskTool: isDeepAgentBinding(input.binding)
187
- ? async (toolInput) => input.invokeBuiltinTaskTool(input.binding, toolInput, input.options)
188
- : undefined,
333
+ invokeTaskTool: undefined,
189
334
  });
190
335
  }
191
336
  export async function materializeAutomaticSummarizationMiddleware(input) {
@@ -214,7 +359,7 @@ export async function materializeLangChainRuntimeMiddleware(input) {
214
359
  memory: input.plan.context.memory,
215
360
  resolveFilesystemBackend: input.resolveFilesystemBackend,
216
361
  }));
217
- if (input.plan.delegation) {
362
+ if (input.plan.delegation && !isDeepAgentBinding(input.binding)) {
218
363
  runtimeMiddleware.push(createSubAgentMiddleware({
219
364
  defaultModel: (await input.resolveModel(primaryModel)),
220
365
  defaultTools: input.resolveTools(primaryTools, input.binding),