@botbotgo/agent-harness 0.0.333 → 0.0.337

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -534,10 +534,19 @@ import { createAgentHarness } from "@botbotgo/agent-harness";
534
534
  const runtime = await createAgentHarness("/path/to/workspace", {
535
535
  load: {
536
536
  overlayRoots: ["/path/to/framework-defaults", "/path/to/product-overrides"],
537
+ frameworkContractValidation: "warn",
537
538
  },
538
539
  });
539
540
  ```
540
541
 
542
+ Framework contract validation modes:
543
+
544
+ - `off` keeps startup behavior unchanged
545
+ - `warn` loads the workspace and emits contract-quality warnings for workspace-owned agents, skills, and tools
546
+ - `error` fails startup when those workspace-owned definitions drift away from the framework contract
547
+
548
+ You can also control the same startup behavior with `AGENT_HARNESS_FRAMEWORK_CONTRACT_VALIDATION=warn|error|off`.
549
+
541
550
  ### Run A Request
542
551
 
543
552
  ```ts
@@ -314,4 +314,5 @@ export type WorkspaceLoadOptions = {
314
314
  */
315
315
  overlayRoots?: string[];
316
316
  resources?: string[];
317
+ frameworkContractValidation?: "off" | "warn" | "error";
317
318
  };
@@ -1,2 +1,2 @@
1
- export declare const AGENT_HARNESS_VERSION = "0.0.332";
2
- export declare const AGENT_HARNESS_RELEASE_DATE = "2026-04-22";
1
+ export declare const AGENT_HARNESS_VERSION = "0.0.336";
2
+ export declare const AGENT_HARNESS_RELEASE_DATE = "2026-04-23";
@@ -1,2 +1,2 @@
1
- export const AGENT_HARNESS_VERSION = "0.0.332";
2
- export const AGENT_HARNESS_RELEASE_DATE = "2026-04-22";
1
+ export const AGENT_HARNESS_VERSION = "0.0.336";
2
+ export const AGENT_HARNESS_RELEASE_DATE = "2026-04-23";
@@ -1,4 +1,4 @@
1
- import { extractVisibleOutput, isToolCallRecoveryFailure, isRetrySafeInvalidToolSelectionError, resolveExecutionWithoutToolEvidenceTextInstruction, shouldValidateExecutionWithoutToolEvidence, resolveToolCallRecoveryInstruction, sanitizeVisibleText, INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION, } from "../../parsing/output-parsing.js";
1
+ import { extractVisibleOutput, isToolCallRecoveryFailure, isRetrySafeInvalidToolSelectionError, resolveExecutionWithoutToolEvidenceTextInstruction, shouldValidateExecutionWithoutToolEvidence, resolveToolCallRecoveryInstruction, sanitizeVisibleText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION, INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION, } from "../../parsing/output-parsing.js";
2
2
  import { buildInvocationRequest } from "../model/invocation-request.js";
3
3
  import { buildRawModelMessages } from "../model/message-assembly.js";
4
4
  import { projectRuntimeStreamEvent, createStreamEventProjectionState } from "../stream-event-projection.js";
@@ -79,11 +79,21 @@ function hasUnresolvedExecution(evidence) {
79
79
  || evidence.hasOpenTaskDelegation);
80
80
  }
81
81
  function hasMissingDelegatedExecutionEvidence(evidence) {
82
- return evidence.hasDelegatedAgentWithConfiguredTools && !evidence.hasDelegatedExecutionToolEvidence;
82
+ return false;
83
83
  }
84
84
  function hasMissingDelegatedFindings(evidence) {
85
85
  return evidence.hasDelegatedAgentWithConfiguredTools && evidence.hasOnlyPlaceholderTaskCompletion;
86
86
  }
87
+ function resolveDelegatedExecutionRecoveryInstruction(evidence) {
88
+ if (hasMissingDelegatedFindings(evidence)
89
+ || (evidence.hasOpenTaskDelegation
90
+ && evidence.hasDelegatedAgentWithConfiguredTools
91
+ && !evidence.hasPlanStateEvidence
92
+ && !evidence.hasFailedTaskDelegation)) {
93
+ return EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION;
94
+ }
95
+ return null;
96
+ }
87
97
  function createUnresolvedExecutionError(evidence) {
88
98
  const reasons = [];
89
99
  if (evidence.hasIncompletePlanState) {
@@ -95,9 +105,6 @@ function createUnresolvedExecutionError(evidence) {
95
105
  if (evidence.hasOpenTaskDelegation) {
96
106
  reasons.push("delegated task has not finished");
97
107
  }
98
- if (hasMissingDelegatedExecutionEvidence(evidence)) {
99
- reasons.push("delegated agent ended without surfacing any real tool execution evidence");
100
- }
101
108
  if (hasMissingDelegatedFindings(evidence)) {
102
109
  reasons.push("delegated task returned only the upstream placeholder result without surfaced final findings");
103
110
  }
@@ -400,7 +407,10 @@ export async function* streamRuntimeExecution(options) {
400
407
  throw error;
401
408
  }
402
409
  const streamedExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
403
- if (hasUnresolvedExecution(streamedExecutionEvidence)) {
410
+ const delegatedExecutionRecoveryInstruction = !emittedUnsafeStreamSideEffects
411
+ ? resolveDelegatedExecutionRecoveryInstruction(streamedExecutionEvidence)
412
+ : null;
413
+ if (hasUnresolvedExecution(streamedExecutionEvidence) && !delegatedExecutionRecoveryInstruction) {
404
414
  throw createUnresolvedExecutionError(streamedExecutionEvidence);
405
415
  }
406
416
  const executionWithoutToolEvidenceInstruction = projectionState.emittedOutput
@@ -411,7 +421,7 @@ export async function* streamRuntimeExecution(options) {
411
421
  : null;
412
422
  const retryInstruction = !emittedUnsafeStreamSideEffects && sawRetrySafeInvalidToolSelectionError
413
423
  ? INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION
414
- : executionWithoutToolEvidenceInstruction;
424
+ : delegatedExecutionRecoveryInstruction ?? executionWithoutToolEvidenceInstruction;
415
425
  if (retryInstruction) {
416
426
  let retried;
417
427
  retried = await options.invoke(options.applyToolRecoveryInstruction(options.binding, retryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, options.runtimeOptions);
@@ -609,7 +619,7 @@ export async function* streamRuntimeExecution(options) {
609
619
  ...invokeExecutionEvidence,
610
620
  hasMissingDelegatedExecutionEvidence: hasMissingDelegatedExecutionEvidence(invokeExecutionEvidence),
611
621
  })
612
- : null;
622
+ : resolveDelegatedExecutionRecoveryInstruction(invokeExecutionEvidence);
613
623
  if (invokeFallbackRecoveryInstruction) {
614
624
  const recovered = await options.invoke(options.applyToolRecoveryInstruction(options.binding, invokeFallbackRecoveryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, options.runtimeOptions);
615
625
  const recoveredToolResults = Array.isArray(recovered.metadata?.executedToolResults)
@@ -17,19 +17,35 @@ function isPlaceholderTaskCompletion(value) {
17
17
  const normalized = sanitizeVisibleText(value).trim();
18
18
  return normalized === "Task completed";
19
19
  }
20
+ function isLowSignalStructuredCompletion(value) {
21
+ const normalized = sanitizeVisibleText(value).trim();
22
+ if (!normalized) {
23
+ return false;
24
+ }
25
+ return /Status:\s*completed[\s\S]*Summary:\s*-\s*none[\s\S]*Likely Causes:\s*-\s*none[\s\S]*Blockers:\s*-\s*none[\s\S]*Next Commands:\s*-\s*none/i.test(normalized);
26
+ }
20
27
  function normalizeToolOutputText(output) {
21
28
  const directText = typeof output === "string"
22
29
  ? sanitizeVisibleText(output).trim()
23
30
  : "";
24
- if (directText && !looksLikeLeakedToolCallText(directText) && !isPlaceholderTaskCompletion(directText)) {
31
+ if (directText
32
+ && !looksLikeLeakedToolCallText(directText)
33
+ && !isPlaceholderTaskCompletion(directText)
34
+ && !isLowSignalStructuredCompletion(directText)) {
25
35
  return directText;
26
36
  }
27
37
  const visibleOutput = sanitizeVisibleText(extractVisibleOutput(output)).trim();
28
- if (visibleOutput && !looksLikeLeakedToolCallText(visibleOutput) && !isPlaceholderTaskCompletion(visibleOutput)) {
38
+ if (visibleOutput
39
+ && !looksLikeLeakedToolCallText(visibleOutput)
40
+ && !isPlaceholderTaskCompletion(visibleOutput)
41
+ && !isLowSignalStructuredCompletion(visibleOutput)) {
29
42
  return visibleOutput;
30
43
  }
31
44
  const fallbackContext = sanitizeVisibleText(extractToolFallbackContext(output)).trim();
32
- if (fallbackContext && !looksLikeLeakedToolCallText(fallbackContext) && !isPlaceholderTaskCompletion(fallbackContext)) {
45
+ if (fallbackContext
46
+ && !looksLikeLeakedToolCallText(fallbackContext)
47
+ && !isPlaceholderTaskCompletion(fallbackContext)
48
+ && !isLowSignalStructuredCompletion(fallbackContext)) {
33
49
  return fallbackContext;
34
50
  }
35
51
  return "";
@@ -72,7 +88,7 @@ export function resolveDeterministicFinalOutput(params) {
72
88
  const sanitizedVisibleOutput = visibleOutput && !looksLikeLeakedToolCallText(visibleOutput)
73
89
  ? sanitizeVisibleText(visibleOutput).trim()
74
90
  : "";
75
- if (sanitizedVisibleOutput) {
91
+ if (sanitizedVisibleOutput && !isLowSignalStructuredCompletion(sanitizedVisibleOutput)) {
76
92
  return sanitizedVisibleOutput;
77
93
  }
78
94
  const successfulToolOutput = extractLatestSuccessfulNonTodoToolResultText(executedToolResults);
@@ -82,7 +98,7 @@ export function resolveDeterministicFinalOutput(params) {
82
98
  const sanitizedToolFallback = toolFallback && !looksLikeLeakedToolCallText(toolFallback)
83
99
  ? sanitizeVisibleText(toolFallback).trim()
84
100
  : "";
85
- return sanitizedToolFallback;
101
+ return isLowSignalStructuredCompletion(sanitizedToolFallback) ? "" : sanitizedToolFallback;
86
102
  }
87
103
  export function extractDelegatedFindingsText(executedToolResults) {
88
104
  return extractLatestSuccessfulTaskResultText(executedToolResults);
@@ -73,9 +73,6 @@ function hasIncompletePlanStateInValue(value) {
73
73
  function hasUnresolvedDelegatedExecution(state) {
74
74
  return state.hasIncompletePlanState || state.openTaskDelegations > 0;
75
75
  }
76
- function hasMissingDelegatedToolExecutionEvidence(state, subagentHasTools) {
77
- return subagentHasTools && !state.emittedToolResult && !state.emittedToolError;
78
- }
79
76
  function formatDelegatedExecutionBlocker(state) {
80
77
  const summary = state.emittedOutput.trim();
81
78
  if (summary) {
@@ -90,7 +87,7 @@ function formatDelegatedExecutionBlocker(state) {
90
87
  return "Delegated investigation ended before the plan was completed.";
91
88
  }
92
89
  function requiresDelegatedExecutionRecovery(state) {
93
- return hasUnresolvedDelegatedExecution(state) || (state.emittedToolResult && !state.emittedOutput.trim());
90
+ return hasUnresolvedDelegatedExecution(state);
94
91
  }
95
92
  const DELEGATED_FAILURE_PLAN_RECONCILIATION_INSTRUCTION = [
96
93
  "Your previous attempt ended with a tool failure while the todo board still had unfinished work.",
@@ -246,7 +243,6 @@ export async function invokeBuiltinTaskTool(input) {
246
243
  summarizationModel,
247
244
  });
248
245
  const resolvedSubagentTools = selectedSubagent.tools ?? input.resolveTools(primaryTools, input.binding);
249
- const subagentHasTools = (resolvedSubagentTools?.length ?? 0) > 0;
250
246
  const runnable = createAgent({
251
247
  model: (selectedSubagent.model ?? resolvedHostModel),
252
248
  tools: resolvedSubagentTools,
@@ -297,17 +293,39 @@ export async function invokeBuiltinTaskTool(input) {
297
293
  };
298
294
  let { projectionState, executedToolResults } = await runWithStreamInspection();
299
295
  if (requiresDelegatedExecutionRecovery(projectionState)) {
296
+ const initialProjectionState = projectionState;
297
+ const initialExecutedToolResults = executedToolResults;
298
+ const initialDeterministicOutput = resolveDeterministicFinalOutput({
299
+ visibleOutput: initialProjectionState.emittedOutput.trim(),
300
+ executedToolResults: initialExecutedToolResults,
301
+ });
300
302
  const recoveryInstruction = projectionState.hasIncompletePlanState && projectionState.emittedToolError
301
303
  ? `${AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION}\n\n${DELEGATED_FAILURE_PLAN_RECONCILIATION_INSTRUCTION}`
302
304
  : AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION;
303
- ({ projectionState, executedToolResults } = await runWithStreamInspection(recoveryInstruction));
305
+ const recovered = await runWithStreamInspection(recoveryInstruction);
306
+ const recoveredDeterministicOutput = resolveDeterministicFinalOutput({
307
+ visibleOutput: recovered.projectionState.emittedOutput.trim(),
308
+ executedToolResults: recovered.executedToolResults,
309
+ });
310
+ const recoveredHasSubstantiveExecution = recoveredDeterministicOutput.length > 0;
311
+ if (recoveredHasSubstantiveExecution) {
312
+ projectionState = recovered.projectionState;
313
+ executedToolResults = recovered.executedToolResults;
314
+ }
315
+ else {
316
+ projectionState = initialProjectionState;
317
+ executedToolResults = initialExecutedToolResults;
318
+ if (initialDeterministicOutput) {
319
+ projectionState = {
320
+ ...projectionState,
321
+ emittedOutput: initialDeterministicOutput,
322
+ };
323
+ }
324
+ }
304
325
  }
305
326
  if (requiresDelegatedExecutionRecovery(projectionState)) {
306
327
  throw new Error(formatDelegatedExecutionBlocker(projectionState));
307
328
  }
308
- if (hasMissingDelegatedToolExecutionEvidence(projectionState, subagentHasTools)) {
309
- throw new Error("Delegated investigation ended without any real tool execution evidence.");
310
- }
311
329
  if (projectionState.emittedToolError) {
312
330
  const blockerMessage = resolveDeterministicFinalOutput({
313
331
  visibleOutput: projectionState.emittedOutput.trim(),
@@ -335,12 +353,12 @@ export async function invokeBuiltinTaskTool(input) {
335
353
  if (hasIncompletePlanStateInValue(result)) {
336
354
  throw new Error(extractVisibleOutput(result) || extractToolFallbackContext(result) || "Delegated investigation ended before the plan was completed.");
337
355
  }
338
- if (subagentHasTools) {
339
- throw new Error("Delegated investigation ended without any real tool execution evidence.");
340
- }
341
356
  const visibleOutput = extractVisibleOutput(result);
342
357
  const fallbackOutput = extractToolFallbackContext(result);
343
- return visibleOutput || fallbackOutput || JSON.stringify(result);
358
+ const structuredResponse = typeof result === "object" && result !== null && "structuredResponse" in result
359
+ ? result.structuredResponse
360
+ : undefined;
361
+ return visibleOutput || fallbackOutput || (structuredResponse !== undefined ? JSON.stringify(structuredResponse) : "") || JSON.stringify(result);
344
362
  }
345
363
  export async function resolveBuiltinMiddlewareTools(input) {
346
364
  const backend = input.resolveBuiltinMiddlewareBackend(input.binding, input.options);
@@ -20,6 +20,8 @@ export type StreamEventProjectionState = {
20
20
  openTaskDelegations: number;
21
21
  openToolCapableTaskDelegations: number;
22
22
  taskDelegationHasToolsStack: boolean[];
23
+ taskDelegationFindingsStack: string[];
24
+ lastCompletedTaskDelegationFindings: string;
23
25
  seenTerminalOutputs: Set<string>;
24
26
  };
25
27
  export declare function createStreamEventProjectionState(): StreamEventProjectionState;
@@ -1,4 +1,4 @@
1
- import { sanitizeVisibleText } from "../parsing/output-parsing.js";
1
+ import { extractToolFallbackContext, extractVisibleOutput, readTextContent, sanitizeVisibleText } from "../parsing/output-parsing.js";
2
2
  import { salvageFunctionLikeToolCall } from "../parsing/output-tool-args.js";
3
3
  import { computeIncrementalOutput, extractInterruptPayload, extractReasoningStreamOutput, sanitizeRetainedUpstreamEvent, extractStateStreamOutput, extractTerminalStreamOutput, extractToolResult, extractVisibleStreamOutput, normalizeTerminalOutputKey, } from "../parsing/stream-event-parsing.js";
4
4
  import { resolveModelFacingToolName } from "./tool/tool-name-mapping.js";
@@ -22,6 +22,8 @@ export function createStreamEventProjectionState() {
22
22
  openTaskDelegations: 0,
23
23
  openToolCapableTaskDelegations: 0,
24
24
  taskDelegationHasToolsStack: [],
25
+ taskDelegationFindingsStack: [],
26
+ lastCompletedTaskDelegationFindings: "",
25
27
  seenTerminalOutputs: new Set(),
26
28
  };
27
29
  }
@@ -163,6 +165,90 @@ function isUpstreamPlaceholderTaskResult(value) {
163
165
  && typeof message?.tool_call_id === "string"
164
166
  && message?.content === "Task completed";
165
167
  }
168
+ function extractDelegatedFindingsCandidateText(value, depth = 0) {
169
+ if (depth > 6 || value === null || value === undefined) {
170
+ return "";
171
+ }
172
+ if (typeof value === "string") {
173
+ return value;
174
+ }
175
+ const visibleOutput = extractVisibleOutput(value);
176
+ if (visibleOutput) {
177
+ return visibleOutput;
178
+ }
179
+ const toolFallback = extractToolFallbackContext(value);
180
+ if (toolFallback) {
181
+ return toolFallback;
182
+ }
183
+ const directText = readTextContent(value);
184
+ if (directText) {
185
+ return directText;
186
+ }
187
+ if (Array.isArray(value)) {
188
+ for (let index = value.length - 1; index >= 0; index -= 1) {
189
+ const nested = extractDelegatedFindingsCandidateText(value[index], depth + 1);
190
+ if (nested) {
191
+ return nested;
192
+ }
193
+ }
194
+ return "";
195
+ }
196
+ if (typeof value === "object") {
197
+ const typed = value;
198
+ for (const key of ["messages", "update", "output", "content", "data", "summary"]) {
199
+ const nested = extractDelegatedFindingsCandidateText(typed[key], depth + 1);
200
+ if (nested) {
201
+ return nested;
202
+ }
203
+ }
204
+ }
205
+ return "";
206
+ }
207
+ function normalizeDelegatedFindingsText(value) {
208
+ const directText = extractDelegatedFindingsCandidateText(value);
209
+ if (!directText) {
210
+ return "";
211
+ }
212
+ const normalized = sanitizeVisibleText(directText).trim();
213
+ if (normalized === "Task completed") {
214
+ return "";
215
+ }
216
+ if (/Status:\s*completed[\s\S]*Summary:\s*-\s*none[\s\S]*Likely Causes:\s*-\s*none[\s\S]*Blockers:\s*-\s*none[\s\S]*Next Commands:\s*-\s*none/i.test(normalized)) {
217
+ return "";
218
+ }
219
+ return normalized;
220
+ }
221
+ function isInternalRuntimeSpillPathErrorValue(value) {
222
+ const directText = typeof value === "string"
223
+ ? value
224
+ : extractToolFallbackContext(value) || extractVisibleOutput(value) || readTextContent(value);
225
+ if (directText && /\/large_tool_results\/|internal runtime spill path/i.test(directText)) {
226
+ return true;
227
+ }
228
+ if (typeof value === "object" && value !== null) {
229
+ try {
230
+ return /\/large_tool_results\/|internal runtime spill path/i.test(JSON.stringify(value));
231
+ }
232
+ catch {
233
+ return false;
234
+ }
235
+ }
236
+ return false;
237
+ }
238
+ function recordDelegatedFindings(state, value, source = "tool") {
239
+ if (state.taskDelegationFindingsStack.length === 0) {
240
+ return;
241
+ }
242
+ const normalized = normalizeDelegatedFindingsText(value);
243
+ if (!normalized) {
244
+ return;
245
+ }
246
+ const current = state.taskDelegationFindingsStack[state.taskDelegationFindingsStack.length - 1] ?? "";
247
+ if (source === "terminal" && current) {
248
+ return;
249
+ }
250
+ state.taskDelegationFindingsStack[state.taskDelegationFindingsStack.length - 1] = normalized;
251
+ }
166
252
  function updateDelegationState(state, event, countConfiguredToolsForAgentId) {
167
253
  if (typeof event !== "object" || event === null) {
168
254
  return;
@@ -185,11 +271,13 @@ function updateDelegationState(state, event, countConfiguredToolsForAgentId) {
185
271
  state.sawDelegatedAgentWithConfiguredTools = true;
186
272
  state.openToolCapableTaskDelegations += 1;
187
273
  }
274
+ state.taskDelegationFindingsStack.push("");
188
275
  return;
189
276
  }
190
277
  if (isTaskEnd || isTaskError) {
191
278
  state.openTaskDelegations = Math.max(0, state.openTaskDelegations - 1);
192
279
  const delegatedTaskHadTools = state.taskDelegationHasToolsStack.pop() === true;
280
+ state.lastCompletedTaskDelegationFindings = state.taskDelegationFindingsStack.pop() ?? "";
193
281
  if (delegatedTaskHadTools) {
194
282
  state.openToolCapableTaskDelegations = Math.max(0, state.openToolCapableTaskDelegations - 1);
195
283
  }
@@ -254,34 +342,58 @@ export function projectRuntimeStreamEvent(params) {
254
342
  const toolResult = extractToolResult(event);
255
343
  if (toolResult) {
256
344
  const isTodoTool = toolResult.toolName === "write_todos" || toolResult.toolName === "read_todos";
257
- const isSuccessfulTaskResult = toolResult.toolName === "task" && toolResult.isError !== true;
258
- const isPlaceholderTaskResult = isSuccessfulTaskResult && isUpstreamPlaceholderTaskResult(toolResult.output);
345
+ const salvagedTaskErrorFindings = toolResult.toolName === "task"
346
+ && toolResult.isError === true
347
+ && !!state.lastCompletedTaskDelegationFindings
348
+ && isInternalRuntimeSpillPathErrorValue(toolResult.output)
349
+ ? state.lastCompletedTaskDelegationFindings
350
+ : "";
351
+ const effectiveToolOutput = salvagedTaskErrorFindings || toolResult.output;
352
+ const effectiveToolIsError = salvagedTaskErrorFindings ? false : toolResult.isError;
353
+ const isSuccessfulTaskResult = toolResult.toolName === "task" && effectiveToolIsError !== true;
259
354
  const isDelegatedExecutionTool = (isDelegatedAgentEvent || state.openToolCapableTaskDelegations > 0)
260
355
  && toolResult.toolName !== "write_todos"
261
356
  && toolResult.toolName !== "read_todos"
262
357
  && toolResult.toolName !== "task";
358
+ if (isDelegatedExecutionTool && toolResult.isError !== true) {
359
+ recordDelegatedFindings(state, toolResult.output, "tool");
360
+ }
361
+ const delegatedTaskFindings = isSuccessfulTaskResult && state.lastCompletedTaskDelegationFindings
362
+ ? state.lastCompletedTaskDelegationFindings
363
+ : "";
364
+ const resolvedToolOutput = delegatedTaskFindings || effectiveToolOutput;
365
+ const isPlaceholderTaskResult = isSuccessfulTaskResult
366
+ && !delegatedTaskFindings
367
+ && isUpstreamPlaceholderTaskResult(toolResult.output);
263
368
  state.emittedToolResult = true;
264
- state.emittedToolError = state.emittedToolError || toolResult.isError === true;
265
- state.emittedSuccessfulToolResult = state.emittedSuccessfulToolResult || toolResult.isError !== true;
369
+ state.emittedToolError = state.emittedToolError || effectiveToolIsError === true;
370
+ state.emittedSuccessfulToolResult = state.emittedSuccessfulToolResult || effectiveToolIsError !== true;
266
371
  state.emittedSuccessfulTaskResult = state.emittedSuccessfulTaskResult || isSuccessfulTaskResult;
267
372
  state.emittedPlaceholderTaskResult = state.emittedPlaceholderTaskResult || isPlaceholderTaskResult;
268
373
  state.emittedNonTodoToolResult = state.emittedNonTodoToolResult || !isTodoTool;
269
- state.emittedSuccessfulNonTodoToolResult = state.emittedSuccessfulNonTodoToolResult || (!isTodoTool && toolResult.isError !== true);
270
- state.emittedDelegatedExecutionToolResult = state.emittedDelegatedExecutionToolResult || isDelegatedExecutionTool;
374
+ state.emittedSuccessfulNonTodoToolResult = state.emittedSuccessfulNonTodoToolResult || (!isTodoTool && effectiveToolIsError !== true);
375
+ state.emittedDelegatedExecutionToolResult = state.emittedDelegatedExecutionToolResult || isDelegatedExecutionTool || !!salvagedTaskErrorFindings;
271
376
  state.emittedSuccessfulDelegatedExecutionToolResult =
272
- state.emittedSuccessfulDelegatedExecutionToolResult || (isDelegatedExecutionTool && toolResult.isError !== true);
377
+ state.emittedSuccessfulDelegatedExecutionToolResult || (isDelegatedExecutionTool && effectiveToolIsError !== true) || !!salvagedTaskErrorFindings;
378
+ if (salvagedTaskErrorFindings) {
379
+ state.hasFailedTaskDelegation = false;
380
+ }
273
381
  chunks.push({
274
382
  kind: "tool-result",
275
383
  toolName: resolveModelFacingToolName(toolResult.toolName, toolNameMapping, primaryTools),
276
- output: toolResult.output,
277
- isError: toolResult.isError,
384
+ output: resolvedToolOutput,
385
+ isError: effectiveToolIsError,
278
386
  });
387
+ if (toolResult.toolName === "task") {
388
+ state.lastCompletedTaskDelegationFindings = "";
389
+ }
279
390
  }
280
391
  const output = allowVisibleContent ? extractTerminalStreamOutput(event) : "";
281
392
  if (!allowVisibleContent) {
282
393
  const delegatedTerminalOutput = extractTerminalStreamOutput(event);
283
394
  if (delegatedTerminalOutput) {
284
395
  state.emittedDelegatedTerminalOutput = true;
396
+ recordDelegatedFindings(state, delegatedTerminalOutput, "terminal");
285
397
  }
286
398
  }
287
399
  if (output && !shouldSuppressVisibleToolCallText(output)) {
@@ -406,7 +406,7 @@ kind: ToolSets
406
406
  spec:
407
407
  - name: web-search
408
408
  type: provider
409
- description: ${options.provider} web search tool for current research.
409
+ description: Use this when the runtime needs current web discovery for research tasks. Do not use this as a substitute for synthesis or comparison.
410
410
  providerTool:
411
411
  provider: ${options.provider}
412
412
  tool: webSearch
@@ -421,7 +421,7 @@ function renderResearchAgentYaml(options) {
421
421
  kind: Agent
422
422
  metadata:
423
423
  name: research
424
- description: Host-facing research agent for investigating and synthesizing answers.
424
+ description: Use this when a research request needs synthesis, bounded tool use, or specialist delegation. Answer directly when one pass is enough; delegate only when the sub-task boundary is clear.
425
425
  spec:
426
426
  runtime:
427
427
  runtimeMemory: default
@@ -447,7 +447,7 @@ function renderResearchAnalystYaml(options) {
447
447
  kind: Agent
448
448
  metadata:
449
449
  name: research-analyst
450
- description: Analyst subagent for source gathering, comparison, and evidence extraction.
450
+ description: Use this when the task needs source gathering, comparison, and evidence extraction for a bounded research sub-task. Do not use this for final answer ownership.
451
451
  spec:
452
452
  runtime:
453
453
  runtimeMemory: default
@@ -494,6 +494,12 @@ ${stepTwo}
494
494
  3. Compare evidence instead of trusting a single source.
495
495
  4. Separate verified facts from inference.
496
496
  5. End with a concise synthesis, explicit caveats, and source links when available.
497
+
498
+ ## Rules
499
+
500
+ - Do not rely on one source when the question requires comparison.
501
+ - Do not present inference as a verified fact.
502
+ - If current information is required, prefer fresh sources over model memory.
497
503
  `;
498
504
  }
499
505
  function renderStarterSkill(name) {
@@ -9,6 +9,7 @@ import { validateSkillMetadata } from "../runtime/skills/skill-metadata.js";
9
9
  import { parseEmbeddingModelObject, parseMcpServerObject, parseModelObject, parseToolObject, parseVectorStoreObject, validateEmbeddingModelObject, validateMcpServerObject, validateModelObject, validateToolObject, validateVectorStoreObject, } from "./resource-compilers.js";
10
10
  import { validateAgent, validateTopology } from "./validate.js";
11
11
  import { compileBinding } from "./agent-binding-compiler.js";
12
+ import { resolveFrameworkContractValidationMode, validateFrameworkContracts, } from "./framework-contract-validation.js";
12
13
  import { discoverSubagents, ensureDiscoverySources } from "./support/discovery.js";
13
14
  import { collectAgentDiscoverySourceRefs, collectToolSourceRefs } from "./support/source-collectors.js";
14
15
  import { getRoutingDefaultAgentId, getRuntimeSources, getRuntimeResources, getRuntimeStorageRoots, getToolModuleDiscoveryConfig, getRoutingRules, resolveRefId, } from "./support/workspace-ref-utils.js";
@@ -412,6 +413,24 @@ export async function loadWorkspace(workspaceRoot, options = {}) {
412
413
  externalResourceCount: externalResources.length,
413
414
  });
414
415
  validateToolNameConflicts(tools);
416
+ const frameworkContractValidation = resolveFrameworkContractValidationMode(options.frameworkContractValidation);
417
+ const contractOwnedRoots = Array.from(new Set([
418
+ workspaceRoot,
419
+ ...(localResourceRoot ? [localResourceRoot] : []),
420
+ ...resolvedConfiguredResources.map((resource) => resource.root),
421
+ ]));
422
+ await traceStartupStage("workspace.validate.frameworkContracts", async () => {
423
+ validateFrameworkContracts({
424
+ agents: loaded.agents,
425
+ tools,
426
+ skillRegistry,
427
+ ownedRoots: contractOwnedRoots,
428
+ mode: frameworkContractValidation,
429
+ });
430
+ }, {
431
+ workspaceRoot,
432
+ mode: frameworkContractValidation,
433
+ });
415
434
  const resources = Array.from(new Set([
416
435
  ...(localResourceRoot ? [localResourceRoot] : []),
417
436
  ...runtimeSources.tools.filter((source) => isNpmSourceUri(source)),
@@ -0,0 +1,10 @@
1
+ import type { ParsedAgentObject, ParsedToolObject } from "../contracts/types.js";
2
+ export type FrameworkContractValidationMode = "off" | "warn" | "error";
3
+ export declare function resolveFrameworkContractValidationMode(mode: FrameworkContractValidationMode | undefined): FrameworkContractValidationMode;
4
+ export declare function validateFrameworkContracts(input: {
5
+ agents: ParsedAgentObject[];
6
+ tools: Map<string, ParsedToolObject>;
7
+ skillRegistry: Map<string, string>;
8
+ ownedRoots: string[];
9
+ mode?: FrameworkContractValidationMode;
10
+ }): void;
@@ -0,0 +1,133 @@
1
+ import { readFileSync } from "node:fs";
2
+ import path from "node:path";
3
+ import { validateSkillMetadata } from "../runtime/skills/skill-metadata.js";
4
+ import { getAgentExecutionConfigValue } from "./support/agent-execution-config.js";
5
+ import { resolvePromptValue } from "./support/workspace-ref-utils.js";
6
+ function normalizeMode(mode) {
7
+ if (mode === "warn" || mode === "error") {
8
+ return mode;
9
+ }
10
+ return "off";
11
+ }
12
+ export function resolveFrameworkContractValidationMode(mode) {
13
+ if (mode === "warn" || mode === "error" || mode === "off") {
14
+ return mode;
15
+ }
16
+ const envValue = process.env.AGENT_HARNESS_FRAMEWORK_CONTRACT_VALIDATION?.trim().toLowerCase();
17
+ if (envValue === "warn" || envValue === "error" || envValue === "off") {
18
+ return envValue;
19
+ }
20
+ return "off";
21
+ }
22
+ function isPathWithinRoot(candidate, root) {
23
+ const relative = path.relative(path.resolve(root), path.resolve(candidate));
24
+ return relative === "" || (!relative.startsWith("..") && !path.isAbsolute(relative));
25
+ }
26
+ function isWorkspaceOwnedPath(candidate, roots) {
27
+ return roots.some((root) => isPathWithinRoot(candidate, root));
28
+ }
29
+ function addIssue(issues, code, message) {
30
+ issues.push({ code, message });
31
+ }
32
+ function validateAgentContract(agent, referencedSubagentIds, issues) {
33
+ const description = agent.description.trim();
34
+ const systemPrompt = resolvePromptValue(getAgentExecutionConfigValue(agent, "systemPrompt"), path.dirname(agent.sourcePath));
35
+ const ownsDelegation = agent.subagentRefs.length > 0 || agent.subagentPathRefs.length > 0 || (agent.asyncSubagents?.length ?? 0) > 0;
36
+ const isSubagent = referencedSubagentIds.has(agent.id);
37
+ const hasTools = agent.toolRefs.length > 0
38
+ || (agent.toolBindings?.length ?? 0) > 0
39
+ || (agent.inlineTools?.length ?? 0) > 0;
40
+ const responseFormat = getAgentExecutionConfigValue(agent, "responseFormat");
41
+ if (description.length < 24) {
42
+ addIssue(issues, "agent.description.too_short", `Agent ${agent.id} should use a more specific description that explains when it should be used.`);
43
+ }
44
+ if (ownsDelegation) {
45
+ if (!systemPrompt?.trim()) {
46
+ addIssue(issues, "agent.orchestrator.missing_prompt", `Delegating agent ${agent.id} should define a systemPrompt that explains decomposition, delegation, synthesis, and stop conditions.`);
47
+ }
48
+ if (!/(delegate|delegation|subagent|decompose|synthesi|answer directly|parallel)/i.test(description)) {
49
+ addIssue(issues, "agent.orchestrator.description_boundary", `Delegating agent ${agent.id} description should make its delegation boundary explicit, for example when it should answer directly versus delegate.`);
50
+ }
51
+ }
52
+ if (isSubagent) {
53
+ if (!systemPrompt?.trim()) {
54
+ addIssue(issues, "agent.subagent.missing_prompt", `Subagent ${agent.id} should define a systemPrompt that makes its operating boundary and output contract explicit.`);
55
+ }
56
+ if (!/(use this when|when the task|for .*?(analysis|research|search|debug|review|triage|inspection|extraction|comparison|validation|implementation))/i.test(description)) {
57
+ addIssue(issues, "agent.subagent.description_trigger", `Subagent ${agent.id} description should clarify when it should be delegated to and what narrow task class it owns.`);
58
+ }
59
+ if (agent.executionMode === "deepagent" && hasTools && responseFormat === undefined) {
60
+ addIssue(issues, "agent.subagent.deepagent.missing_response_format", `DeepAgents subagent ${agent.id} exposes tools, so it should define config.responseFormat to guarantee a stable task result for its parent agent.`);
61
+ }
62
+ }
63
+ }
64
+ function stripFrontmatter(document) {
65
+ return document.replace(/^---\s*\n[\s\S]*?\n---\s*(?:\n|$)/, "");
66
+ }
67
+ function validateSkillContract(skillRoot, issues) {
68
+ const metadata = validateSkillMetadata(skillRoot);
69
+ const document = readFileSync(path.join(skillRoot, "SKILL.md"), "utf8");
70
+ const body = stripFrontmatter(document);
71
+ const skillName = metadata.name || path.basename(skillRoot);
72
+ if (!/(Use this skill when|Use this when)/i.test(body)) {
73
+ addIssue(issues, "skill.missing_trigger", `Skill ${skillName} should explain when it should be used, preferably with a clear "Use this skill when..." trigger.`);
74
+ }
75
+ if (!/(## Workflow|^## Workflow|^\d+\.\s)/m.test(body)) {
76
+ addIssue(issues, "skill.missing_workflow", `Skill ${skillName} should define an explicit workflow instead of only background prose.`);
77
+ }
78
+ if (!/(## Rules|Do not|Output|Caveat|Caveats)/i.test(body)) {
79
+ addIssue(issues, "skill.missing_boundaries", `Skill ${skillName} should include execution boundaries such as rules, non-goals, caveats, or output expectations.`);
80
+ }
81
+ }
82
+ function validateToolContract(tool, issues) {
83
+ const description = tool.description.trim();
84
+ if (description.length < 20) {
85
+ addIssue(issues, "tool.description.too_short", `Tool ${tool.id} should use a more specific description that explains invocation boundaries and argument expectations.`);
86
+ return;
87
+ }
88
+ if (!/(Use this when|Do not use|Before calling)/i.test(description)) {
89
+ addIssue(issues, "tool.description.missing_boundary", `Tool ${tool.id} description should describe when to call it and, ideally, when not to call it or what must be true before calling it.`);
90
+ }
91
+ }
92
+ export function validateFrameworkContracts(input) {
93
+ const mode = normalizeMode(input.mode);
94
+ if (mode === "off") {
95
+ return;
96
+ }
97
+ const issues = [];
98
+ const referencedSubagentIds = new Set(input.agents.flatMap((agent) => agent.subagentRefs.map((ref) => ref.replace(/^agent\//, ""))));
99
+ for (const agent of input.agents) {
100
+ if (!isWorkspaceOwnedPath(agent.sourcePath, input.ownedRoots)) {
101
+ continue;
102
+ }
103
+ validateAgentContract(agent, referencedSubagentIds, issues);
104
+ }
105
+ for (const [skillName, skillRoot] of input.skillRegistry) {
106
+ if (!isWorkspaceOwnedPath(skillRoot, input.ownedRoots)) {
107
+ continue;
108
+ }
109
+ validateSkillContract(skillRoot, issues);
110
+ if (!skillName.trim()) {
111
+ addIssue(issues, "skill.name.empty", `Skill ${skillRoot} must define a stable name in frontmatter.`);
112
+ }
113
+ }
114
+ for (const tool of input.tools.values()) {
115
+ if (!isWorkspaceOwnedPath(tool.sourcePath, input.ownedRoots)) {
116
+ continue;
117
+ }
118
+ validateToolContract(tool, issues);
119
+ }
120
+ if (issues.length === 0) {
121
+ return;
122
+ }
123
+ const message = [
124
+ "Framework contract validation failed.",
125
+ "The workspace should follow the agent-harness contract-writing guidance for agents, skills, and tools.",
126
+ ...issues.map((issue) => `- [${issue.code}] ${issue.message}`),
127
+ ].join("\n");
128
+ if (mode === "warn") {
129
+ console.warn(`[agent-harness] ${message}`);
130
+ return;
131
+ }
132
+ throw new Error(message);
133
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@botbotgo/agent-harness",
3
- "version": "0.0.333",
3
+ "version": "0.0.337",
4
4
  "description": "Workspace runtime for multi-agent applications",
5
5
  "license": "MIT",
6
6
  "type": "module",