@botbotgo/agent-harness 0.0.335 → 0.0.337
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -0
- package/dist/contracts/workspace.d.ts +1 -0
- package/dist/package-version.d.ts +1 -1
- package/dist/package-version.js +1 -1
- package/dist/runtime/adapter/flow/stream-runtime.js +18 -8
- package/dist/runtime/adapter/invocation-result.js +21 -5
- package/dist/runtime/adapter/middleware-assembly.js +31 -13
- package/dist/runtime/adapter/stream-event-projection.js +52 -14
- package/dist/scaffold/init-project.js +9 -3
- package/dist/workspace/compile.js +19 -0
- package/dist/workspace/framework-contract-validation.d.ts +10 -0
- package/dist/workspace/framework-contract-validation.js +133 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -534,10 +534,19 @@ import { createAgentHarness } from "@botbotgo/agent-harness";
|
|
|
534
534
|
const runtime = await createAgentHarness("/path/to/workspace", {
|
|
535
535
|
load: {
|
|
536
536
|
overlayRoots: ["/path/to/framework-defaults", "/path/to/product-overrides"],
|
|
537
|
+
frameworkContractValidation: "warn",
|
|
537
538
|
},
|
|
538
539
|
});
|
|
539
540
|
```
|
|
540
541
|
|
|
542
|
+
Framework contract validation modes:
|
|
543
|
+
|
|
544
|
+
- `off` keeps startup behavior unchanged
|
|
545
|
+
- `warn` loads the workspace and emits contract-quality warnings for workspace-owned agents, skills, and tools
|
|
546
|
+
- `error` fails startup when those workspace-owned definitions drift away from the framework contract
|
|
547
|
+
|
|
548
|
+
You can also control the same startup behavior with `AGENT_HARNESS_FRAMEWORK_CONTRACT_VALIDATION=warn|error|off`.
|
|
549
|
+
|
|
541
550
|
### Run A Request
|
|
542
551
|
|
|
543
552
|
```ts
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export declare const AGENT_HARNESS_VERSION = "0.0.336";
|
|
2
2
|
export declare const AGENT_HARNESS_RELEASE_DATE = "2026-04-23";
|
package/dist/package-version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export const AGENT_HARNESS_VERSION = "0.0.336";
|
|
2
2
|
export const AGENT_HARNESS_RELEASE_DATE = "2026-04-23";
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { extractVisibleOutput, isToolCallRecoveryFailure, isRetrySafeInvalidToolSelectionError, resolveExecutionWithoutToolEvidenceTextInstruction, shouldValidateExecutionWithoutToolEvidence, resolveToolCallRecoveryInstruction, sanitizeVisibleText, INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION, } from "../../parsing/output-parsing.js";
|
|
1
|
+
import { extractVisibleOutput, isToolCallRecoveryFailure, isRetrySafeInvalidToolSelectionError, resolveExecutionWithoutToolEvidenceTextInstruction, shouldValidateExecutionWithoutToolEvidence, resolveToolCallRecoveryInstruction, sanitizeVisibleText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION, INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION, } from "../../parsing/output-parsing.js";
|
|
2
2
|
import { buildInvocationRequest } from "../model/invocation-request.js";
|
|
3
3
|
import { buildRawModelMessages } from "../model/message-assembly.js";
|
|
4
4
|
import { projectRuntimeStreamEvent, createStreamEventProjectionState } from "../stream-event-projection.js";
|
|
@@ -79,11 +79,21 @@ function hasUnresolvedExecution(evidence) {
|
|
|
79
79
|
|| evidence.hasOpenTaskDelegation);
|
|
80
80
|
}
|
|
81
81
|
function hasMissingDelegatedExecutionEvidence(evidence) {
|
|
82
|
-
return
|
|
82
|
+
return false;
|
|
83
83
|
}
|
|
84
84
|
function hasMissingDelegatedFindings(evidence) {
|
|
85
85
|
return evidence.hasDelegatedAgentWithConfiguredTools && evidence.hasOnlyPlaceholderTaskCompletion;
|
|
86
86
|
}
|
|
87
|
+
function resolveDelegatedExecutionRecoveryInstruction(evidence) {
|
|
88
|
+
if (hasMissingDelegatedFindings(evidence)
|
|
89
|
+
|| (evidence.hasOpenTaskDelegation
|
|
90
|
+
&& evidence.hasDelegatedAgentWithConfiguredTools
|
|
91
|
+
&& !evidence.hasPlanStateEvidence
|
|
92
|
+
&& !evidence.hasFailedTaskDelegation)) {
|
|
93
|
+
return EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION;
|
|
94
|
+
}
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
87
97
|
function createUnresolvedExecutionError(evidence) {
|
|
88
98
|
const reasons = [];
|
|
89
99
|
if (evidence.hasIncompletePlanState) {
|
|
@@ -95,9 +105,6 @@ function createUnresolvedExecutionError(evidence) {
|
|
|
95
105
|
if (evidence.hasOpenTaskDelegation) {
|
|
96
106
|
reasons.push("delegated task has not finished");
|
|
97
107
|
}
|
|
98
|
-
if (hasMissingDelegatedExecutionEvidence(evidence)) {
|
|
99
|
-
reasons.push("delegated agent ended without surfacing any real tool execution evidence");
|
|
100
|
-
}
|
|
101
108
|
if (hasMissingDelegatedFindings(evidence)) {
|
|
102
109
|
reasons.push("delegated task returned only the upstream placeholder result without surfaced final findings");
|
|
103
110
|
}
|
|
@@ -400,7 +407,10 @@ export async function* streamRuntimeExecution(options) {
|
|
|
400
407
|
throw error;
|
|
401
408
|
}
|
|
402
409
|
const streamedExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
|
|
403
|
-
|
|
410
|
+
const delegatedExecutionRecoveryInstruction = !emittedUnsafeStreamSideEffects
|
|
411
|
+
? resolveDelegatedExecutionRecoveryInstruction(streamedExecutionEvidence)
|
|
412
|
+
: null;
|
|
413
|
+
if (hasUnresolvedExecution(streamedExecutionEvidence) && !delegatedExecutionRecoveryInstruction) {
|
|
404
414
|
throw createUnresolvedExecutionError(streamedExecutionEvidence);
|
|
405
415
|
}
|
|
406
416
|
const executionWithoutToolEvidenceInstruction = projectionState.emittedOutput
|
|
@@ -411,7 +421,7 @@ export async function* streamRuntimeExecution(options) {
|
|
|
411
421
|
: null;
|
|
412
422
|
const retryInstruction = !emittedUnsafeStreamSideEffects && sawRetrySafeInvalidToolSelectionError
|
|
413
423
|
? INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION
|
|
414
|
-
: executionWithoutToolEvidenceInstruction;
|
|
424
|
+
: delegatedExecutionRecoveryInstruction ?? executionWithoutToolEvidenceInstruction;
|
|
415
425
|
if (retryInstruction) {
|
|
416
426
|
let retried;
|
|
417
427
|
retried = await options.invoke(options.applyToolRecoveryInstruction(options.binding, retryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, options.runtimeOptions);
|
|
@@ -609,7 +619,7 @@ export async function* streamRuntimeExecution(options) {
|
|
|
609
619
|
...invokeExecutionEvidence,
|
|
610
620
|
hasMissingDelegatedExecutionEvidence: hasMissingDelegatedExecutionEvidence(invokeExecutionEvidence),
|
|
611
621
|
})
|
|
612
|
-
:
|
|
622
|
+
: resolveDelegatedExecutionRecoveryInstruction(invokeExecutionEvidence);
|
|
613
623
|
if (invokeFallbackRecoveryInstruction) {
|
|
614
624
|
const recovered = await options.invoke(options.applyToolRecoveryInstruction(options.binding, invokeFallbackRecoveryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, options.runtimeOptions);
|
|
615
625
|
const recoveredToolResults = Array.isArray(recovered.metadata?.executedToolResults)
|
|
@@ -17,19 +17,35 @@ function isPlaceholderTaskCompletion(value) {
|
|
|
17
17
|
const normalized = sanitizeVisibleText(value).trim();
|
|
18
18
|
return normalized === "Task completed";
|
|
19
19
|
}
|
|
20
|
+
function isLowSignalStructuredCompletion(value) {
|
|
21
|
+
const normalized = sanitizeVisibleText(value).trim();
|
|
22
|
+
if (!normalized) {
|
|
23
|
+
return false;
|
|
24
|
+
}
|
|
25
|
+
return /Status:\s*completed[\s\S]*Summary:\s*-\s*none[\s\S]*Likely Causes:\s*-\s*none[\s\S]*Blockers:\s*-\s*none[\s\S]*Next Commands:\s*-\s*none/i.test(normalized);
|
|
26
|
+
}
|
|
20
27
|
function normalizeToolOutputText(output) {
|
|
21
28
|
const directText = typeof output === "string"
|
|
22
29
|
? sanitizeVisibleText(output).trim()
|
|
23
30
|
: "";
|
|
24
|
-
if (directText
|
|
31
|
+
if (directText
|
|
32
|
+
&& !looksLikeLeakedToolCallText(directText)
|
|
33
|
+
&& !isPlaceholderTaskCompletion(directText)
|
|
34
|
+
&& !isLowSignalStructuredCompletion(directText)) {
|
|
25
35
|
return directText;
|
|
26
36
|
}
|
|
27
37
|
const visibleOutput = sanitizeVisibleText(extractVisibleOutput(output)).trim();
|
|
28
|
-
if (visibleOutput
|
|
38
|
+
if (visibleOutput
|
|
39
|
+
&& !looksLikeLeakedToolCallText(visibleOutput)
|
|
40
|
+
&& !isPlaceholderTaskCompletion(visibleOutput)
|
|
41
|
+
&& !isLowSignalStructuredCompletion(visibleOutput)) {
|
|
29
42
|
return visibleOutput;
|
|
30
43
|
}
|
|
31
44
|
const fallbackContext = sanitizeVisibleText(extractToolFallbackContext(output)).trim();
|
|
32
|
-
if (fallbackContext
|
|
45
|
+
if (fallbackContext
|
|
46
|
+
&& !looksLikeLeakedToolCallText(fallbackContext)
|
|
47
|
+
&& !isPlaceholderTaskCompletion(fallbackContext)
|
|
48
|
+
&& !isLowSignalStructuredCompletion(fallbackContext)) {
|
|
33
49
|
return fallbackContext;
|
|
34
50
|
}
|
|
35
51
|
return "";
|
|
@@ -72,7 +88,7 @@ export function resolveDeterministicFinalOutput(params) {
|
|
|
72
88
|
const sanitizedVisibleOutput = visibleOutput && !looksLikeLeakedToolCallText(visibleOutput)
|
|
73
89
|
? sanitizeVisibleText(visibleOutput).trim()
|
|
74
90
|
: "";
|
|
75
|
-
if (sanitizedVisibleOutput) {
|
|
91
|
+
if (sanitizedVisibleOutput && !isLowSignalStructuredCompletion(sanitizedVisibleOutput)) {
|
|
76
92
|
return sanitizedVisibleOutput;
|
|
77
93
|
}
|
|
78
94
|
const successfulToolOutput = extractLatestSuccessfulNonTodoToolResultText(executedToolResults);
|
|
@@ -82,7 +98,7 @@ export function resolveDeterministicFinalOutput(params) {
|
|
|
82
98
|
const sanitizedToolFallback = toolFallback && !looksLikeLeakedToolCallText(toolFallback)
|
|
83
99
|
? sanitizeVisibleText(toolFallback).trim()
|
|
84
100
|
: "";
|
|
85
|
-
return sanitizedToolFallback;
|
|
101
|
+
return isLowSignalStructuredCompletion(sanitizedToolFallback) ? "" : sanitizedToolFallback;
|
|
86
102
|
}
|
|
87
103
|
export function extractDelegatedFindingsText(executedToolResults) {
|
|
88
104
|
return extractLatestSuccessfulTaskResultText(executedToolResults);
|
|
@@ -73,9 +73,6 @@ function hasIncompletePlanStateInValue(value) {
|
|
|
73
73
|
function hasUnresolvedDelegatedExecution(state) {
|
|
74
74
|
return state.hasIncompletePlanState || state.openTaskDelegations > 0;
|
|
75
75
|
}
|
|
76
|
-
function hasMissingDelegatedToolExecutionEvidence(state, subagentHasTools) {
|
|
77
|
-
return subagentHasTools && !state.emittedToolResult && !state.emittedToolError;
|
|
78
|
-
}
|
|
79
76
|
function formatDelegatedExecutionBlocker(state) {
|
|
80
77
|
const summary = state.emittedOutput.trim();
|
|
81
78
|
if (summary) {
|
|
@@ -90,7 +87,7 @@ function formatDelegatedExecutionBlocker(state) {
|
|
|
90
87
|
return "Delegated investigation ended before the plan was completed.";
|
|
91
88
|
}
|
|
92
89
|
function requiresDelegatedExecutionRecovery(state) {
|
|
93
|
-
return hasUnresolvedDelegatedExecution(state)
|
|
90
|
+
return hasUnresolvedDelegatedExecution(state);
|
|
94
91
|
}
|
|
95
92
|
const DELEGATED_FAILURE_PLAN_RECONCILIATION_INSTRUCTION = [
|
|
96
93
|
"Your previous attempt ended with a tool failure while the todo board still had unfinished work.",
|
|
@@ -246,7 +243,6 @@ export async function invokeBuiltinTaskTool(input) {
|
|
|
246
243
|
summarizationModel,
|
|
247
244
|
});
|
|
248
245
|
const resolvedSubagentTools = selectedSubagent.tools ?? input.resolveTools(primaryTools, input.binding);
|
|
249
|
-
const subagentHasTools = (resolvedSubagentTools?.length ?? 0) > 0;
|
|
250
246
|
const runnable = createAgent({
|
|
251
247
|
model: (selectedSubagent.model ?? resolvedHostModel),
|
|
252
248
|
tools: resolvedSubagentTools,
|
|
@@ -297,17 +293,39 @@ export async function invokeBuiltinTaskTool(input) {
|
|
|
297
293
|
};
|
|
298
294
|
let { projectionState, executedToolResults } = await runWithStreamInspection();
|
|
299
295
|
if (requiresDelegatedExecutionRecovery(projectionState)) {
|
|
296
|
+
const initialProjectionState = projectionState;
|
|
297
|
+
const initialExecutedToolResults = executedToolResults;
|
|
298
|
+
const initialDeterministicOutput = resolveDeterministicFinalOutput({
|
|
299
|
+
visibleOutput: initialProjectionState.emittedOutput.trim(),
|
|
300
|
+
executedToolResults: initialExecutedToolResults,
|
|
301
|
+
});
|
|
300
302
|
const recoveryInstruction = projectionState.hasIncompletePlanState && projectionState.emittedToolError
|
|
301
303
|
? `${AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION}\n\n${DELEGATED_FAILURE_PLAN_RECONCILIATION_INSTRUCTION}`
|
|
302
304
|
: AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION;
|
|
303
|
-
|
|
305
|
+
const recovered = await runWithStreamInspection(recoveryInstruction);
|
|
306
|
+
const recoveredDeterministicOutput = resolveDeterministicFinalOutput({
|
|
307
|
+
visibleOutput: recovered.projectionState.emittedOutput.trim(),
|
|
308
|
+
executedToolResults: recovered.executedToolResults,
|
|
309
|
+
});
|
|
310
|
+
const recoveredHasSubstantiveExecution = recoveredDeterministicOutput.length > 0;
|
|
311
|
+
if (recoveredHasSubstantiveExecution) {
|
|
312
|
+
projectionState = recovered.projectionState;
|
|
313
|
+
executedToolResults = recovered.executedToolResults;
|
|
314
|
+
}
|
|
315
|
+
else {
|
|
316
|
+
projectionState = initialProjectionState;
|
|
317
|
+
executedToolResults = initialExecutedToolResults;
|
|
318
|
+
if (initialDeterministicOutput) {
|
|
319
|
+
projectionState = {
|
|
320
|
+
...projectionState,
|
|
321
|
+
emittedOutput: initialDeterministicOutput,
|
|
322
|
+
};
|
|
323
|
+
}
|
|
324
|
+
}
|
|
304
325
|
}
|
|
305
326
|
if (requiresDelegatedExecutionRecovery(projectionState)) {
|
|
306
327
|
throw new Error(formatDelegatedExecutionBlocker(projectionState));
|
|
307
328
|
}
|
|
308
|
-
if (hasMissingDelegatedToolExecutionEvidence(projectionState, subagentHasTools)) {
|
|
309
|
-
throw new Error("Delegated investigation ended without any real tool execution evidence.");
|
|
310
|
-
}
|
|
311
329
|
if (projectionState.emittedToolError) {
|
|
312
330
|
const blockerMessage = resolveDeterministicFinalOutput({
|
|
313
331
|
visibleOutput: projectionState.emittedOutput.trim(),
|
|
@@ -335,12 +353,12 @@ export async function invokeBuiltinTaskTool(input) {
|
|
|
335
353
|
if (hasIncompletePlanStateInValue(result)) {
|
|
336
354
|
throw new Error(extractVisibleOutput(result) || extractToolFallbackContext(result) || "Delegated investigation ended before the plan was completed.");
|
|
337
355
|
}
|
|
338
|
-
if (subagentHasTools) {
|
|
339
|
-
throw new Error("Delegated investigation ended without any real tool execution evidence.");
|
|
340
|
-
}
|
|
341
356
|
const visibleOutput = extractVisibleOutput(result);
|
|
342
357
|
const fallbackOutput = extractToolFallbackContext(result);
|
|
343
|
-
|
|
358
|
+
const structuredResponse = typeof result === "object" && result !== null && "structuredResponse" in result
|
|
359
|
+
? result.structuredResponse
|
|
360
|
+
: undefined;
|
|
361
|
+
return visibleOutput || fallbackOutput || (structuredResponse !== undefined ? JSON.stringify(structuredResponse) : "") || JSON.stringify(result);
|
|
344
362
|
}
|
|
345
363
|
export async function resolveBuiltinMiddlewareTools(input) {
|
|
346
364
|
const backend = input.resolveBuiltinMiddlewareBackend(input.binding, input.options);
|
|
@@ -210,9 +210,32 @@ function normalizeDelegatedFindingsText(value) {
|
|
|
210
210
|
return "";
|
|
211
211
|
}
|
|
212
212
|
const normalized = sanitizeVisibleText(directText).trim();
|
|
213
|
-
|
|
213
|
+
if (normalized === "Task completed") {
|
|
214
|
+
return "";
|
|
215
|
+
}
|
|
216
|
+
if (/Status:\s*completed[\s\S]*Summary:\s*-\s*none[\s\S]*Likely Causes:\s*-\s*none[\s\S]*Blockers:\s*-\s*none[\s\S]*Next Commands:\s*-\s*none/i.test(normalized)) {
|
|
217
|
+
return "";
|
|
218
|
+
}
|
|
219
|
+
return normalized;
|
|
220
|
+
}
|
|
221
|
+
function isInternalRuntimeSpillPathErrorValue(value) {
|
|
222
|
+
const directText = typeof value === "string"
|
|
223
|
+
? value
|
|
224
|
+
: extractToolFallbackContext(value) || extractVisibleOutput(value) || readTextContent(value);
|
|
225
|
+
if (directText && /\/large_tool_results\/|internal runtime spill path/i.test(directText)) {
|
|
226
|
+
return true;
|
|
227
|
+
}
|
|
228
|
+
if (typeof value === "object" && value !== null) {
|
|
229
|
+
try {
|
|
230
|
+
return /\/large_tool_results\/|internal runtime spill path/i.test(JSON.stringify(value));
|
|
231
|
+
}
|
|
232
|
+
catch {
|
|
233
|
+
return false;
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
return false;
|
|
214
237
|
}
|
|
215
|
-
function recordDelegatedFindings(state, value) {
|
|
238
|
+
function recordDelegatedFindings(state, value, source = "tool") {
|
|
216
239
|
if (state.taskDelegationFindingsStack.length === 0) {
|
|
217
240
|
return;
|
|
218
241
|
}
|
|
@@ -220,6 +243,10 @@ function recordDelegatedFindings(state, value) {
|
|
|
220
243
|
if (!normalized) {
|
|
221
244
|
return;
|
|
222
245
|
}
|
|
246
|
+
const current = state.taskDelegationFindingsStack[state.taskDelegationFindingsStack.length - 1] ?? "";
|
|
247
|
+
if (source === "terminal" && current) {
|
|
248
|
+
return;
|
|
249
|
+
}
|
|
223
250
|
state.taskDelegationFindingsStack[state.taskDelegationFindingsStack.length - 1] = normalized;
|
|
224
251
|
}
|
|
225
252
|
function updateDelegationState(state, event, countConfiguredToolsForAgentId) {
|
|
@@ -315,36 +342,47 @@ export function projectRuntimeStreamEvent(params) {
|
|
|
315
342
|
const toolResult = extractToolResult(event);
|
|
316
343
|
if (toolResult) {
|
|
317
344
|
const isTodoTool = toolResult.toolName === "write_todos" || toolResult.toolName === "read_todos";
|
|
318
|
-
const
|
|
345
|
+
const salvagedTaskErrorFindings = toolResult.toolName === "task"
|
|
346
|
+
&& toolResult.isError === true
|
|
347
|
+
&& !!state.lastCompletedTaskDelegationFindings
|
|
348
|
+
&& isInternalRuntimeSpillPathErrorValue(toolResult.output)
|
|
349
|
+
? state.lastCompletedTaskDelegationFindings
|
|
350
|
+
: "";
|
|
351
|
+
const effectiveToolOutput = salvagedTaskErrorFindings || toolResult.output;
|
|
352
|
+
const effectiveToolIsError = salvagedTaskErrorFindings ? false : toolResult.isError;
|
|
353
|
+
const isSuccessfulTaskResult = toolResult.toolName === "task" && effectiveToolIsError !== true;
|
|
319
354
|
const isDelegatedExecutionTool = (isDelegatedAgentEvent || state.openToolCapableTaskDelegations > 0)
|
|
320
355
|
&& toolResult.toolName !== "write_todos"
|
|
321
356
|
&& toolResult.toolName !== "read_todos"
|
|
322
357
|
&& toolResult.toolName !== "task";
|
|
323
358
|
if (isDelegatedExecutionTool && toolResult.isError !== true) {
|
|
324
|
-
recordDelegatedFindings(state, toolResult.output);
|
|
359
|
+
recordDelegatedFindings(state, toolResult.output, "tool");
|
|
325
360
|
}
|
|
326
|
-
const delegatedTaskFindings = isSuccessfulTaskResult &&
|
|
361
|
+
const delegatedTaskFindings = isSuccessfulTaskResult && state.lastCompletedTaskDelegationFindings
|
|
327
362
|
? state.lastCompletedTaskDelegationFindings
|
|
328
363
|
: "";
|
|
329
|
-
const
|
|
364
|
+
const resolvedToolOutput = delegatedTaskFindings || effectiveToolOutput;
|
|
330
365
|
const isPlaceholderTaskResult = isSuccessfulTaskResult
|
|
331
366
|
&& !delegatedTaskFindings
|
|
332
367
|
&& isUpstreamPlaceholderTaskResult(toolResult.output);
|
|
333
368
|
state.emittedToolResult = true;
|
|
334
|
-
state.emittedToolError = state.emittedToolError ||
|
|
335
|
-
state.emittedSuccessfulToolResult = state.emittedSuccessfulToolResult ||
|
|
369
|
+
state.emittedToolError = state.emittedToolError || effectiveToolIsError === true;
|
|
370
|
+
state.emittedSuccessfulToolResult = state.emittedSuccessfulToolResult || effectiveToolIsError !== true;
|
|
336
371
|
state.emittedSuccessfulTaskResult = state.emittedSuccessfulTaskResult || isSuccessfulTaskResult;
|
|
337
372
|
state.emittedPlaceholderTaskResult = state.emittedPlaceholderTaskResult || isPlaceholderTaskResult;
|
|
338
373
|
state.emittedNonTodoToolResult = state.emittedNonTodoToolResult || !isTodoTool;
|
|
339
|
-
state.emittedSuccessfulNonTodoToolResult = state.emittedSuccessfulNonTodoToolResult || (!isTodoTool &&
|
|
340
|
-
state.emittedDelegatedExecutionToolResult = state.emittedDelegatedExecutionToolResult || isDelegatedExecutionTool;
|
|
374
|
+
state.emittedSuccessfulNonTodoToolResult = state.emittedSuccessfulNonTodoToolResult || (!isTodoTool && effectiveToolIsError !== true);
|
|
375
|
+
state.emittedDelegatedExecutionToolResult = state.emittedDelegatedExecutionToolResult || isDelegatedExecutionTool || !!salvagedTaskErrorFindings;
|
|
341
376
|
state.emittedSuccessfulDelegatedExecutionToolResult =
|
|
342
|
-
state.emittedSuccessfulDelegatedExecutionToolResult || (isDelegatedExecutionTool &&
|
|
377
|
+
state.emittedSuccessfulDelegatedExecutionToolResult || (isDelegatedExecutionTool && effectiveToolIsError !== true) || !!salvagedTaskErrorFindings;
|
|
378
|
+
if (salvagedTaskErrorFindings) {
|
|
379
|
+
state.hasFailedTaskDelegation = false;
|
|
380
|
+
}
|
|
343
381
|
chunks.push({
|
|
344
382
|
kind: "tool-result",
|
|
345
383
|
toolName: resolveModelFacingToolName(toolResult.toolName, toolNameMapping, primaryTools),
|
|
346
|
-
output:
|
|
347
|
-
isError:
|
|
384
|
+
output: resolvedToolOutput,
|
|
385
|
+
isError: effectiveToolIsError,
|
|
348
386
|
});
|
|
349
387
|
if (toolResult.toolName === "task") {
|
|
350
388
|
state.lastCompletedTaskDelegationFindings = "";
|
|
@@ -355,7 +393,7 @@ export function projectRuntimeStreamEvent(params) {
|
|
|
355
393
|
const delegatedTerminalOutput = extractTerminalStreamOutput(event);
|
|
356
394
|
if (delegatedTerminalOutput) {
|
|
357
395
|
state.emittedDelegatedTerminalOutput = true;
|
|
358
|
-
recordDelegatedFindings(state, delegatedTerminalOutput);
|
|
396
|
+
recordDelegatedFindings(state, delegatedTerminalOutput, "terminal");
|
|
359
397
|
}
|
|
360
398
|
}
|
|
361
399
|
if (output && !shouldSuppressVisibleToolCallText(output)) {
|
|
@@ -406,7 +406,7 @@ kind: ToolSets
|
|
|
406
406
|
spec:
|
|
407
407
|
- name: web-search
|
|
408
408
|
type: provider
|
|
409
|
-
description:
|
|
409
|
+
description: Use this when the runtime needs current web discovery for research tasks. Do not use this as a substitute for synthesis or comparison.
|
|
410
410
|
providerTool:
|
|
411
411
|
provider: ${options.provider}
|
|
412
412
|
tool: webSearch
|
|
@@ -421,7 +421,7 @@ function renderResearchAgentYaml(options) {
|
|
|
421
421
|
kind: Agent
|
|
422
422
|
metadata:
|
|
423
423
|
name: research
|
|
424
|
-
description:
|
|
424
|
+
description: Use this when a research request needs synthesis, bounded tool use, or specialist delegation. Answer directly when one pass is enough; delegate only when the sub-task boundary is clear.
|
|
425
425
|
spec:
|
|
426
426
|
runtime:
|
|
427
427
|
runtimeMemory: default
|
|
@@ -447,7 +447,7 @@ function renderResearchAnalystYaml(options) {
|
|
|
447
447
|
kind: Agent
|
|
448
448
|
metadata:
|
|
449
449
|
name: research-analyst
|
|
450
|
-
description:
|
|
450
|
+
description: Use this when the task needs source gathering, comparison, and evidence extraction for a bounded research sub-task. Do not use this for final answer ownership.
|
|
451
451
|
spec:
|
|
452
452
|
runtime:
|
|
453
453
|
runtimeMemory: default
|
|
@@ -494,6 +494,12 @@ ${stepTwo}
|
|
|
494
494
|
3. Compare evidence instead of trusting a single source.
|
|
495
495
|
4. Separate verified facts from inference.
|
|
496
496
|
5. End with a concise synthesis, explicit caveats, and source links when available.
|
|
497
|
+
|
|
498
|
+
## Rules
|
|
499
|
+
|
|
500
|
+
- Do not rely on one source when the question requires comparison.
|
|
501
|
+
- Do not present inference as a verified fact.
|
|
502
|
+
- If current information is required, prefer fresh sources over model memory.
|
|
497
503
|
`;
|
|
498
504
|
}
|
|
499
505
|
function renderStarterSkill(name) {
|
|
@@ -9,6 +9,7 @@ import { validateSkillMetadata } from "../runtime/skills/skill-metadata.js";
|
|
|
9
9
|
import { parseEmbeddingModelObject, parseMcpServerObject, parseModelObject, parseToolObject, parseVectorStoreObject, validateEmbeddingModelObject, validateMcpServerObject, validateModelObject, validateToolObject, validateVectorStoreObject, } from "./resource-compilers.js";
|
|
10
10
|
import { validateAgent, validateTopology } from "./validate.js";
|
|
11
11
|
import { compileBinding } from "./agent-binding-compiler.js";
|
|
12
|
+
import { resolveFrameworkContractValidationMode, validateFrameworkContracts, } from "./framework-contract-validation.js";
|
|
12
13
|
import { discoverSubagents, ensureDiscoverySources } from "./support/discovery.js";
|
|
13
14
|
import { collectAgentDiscoverySourceRefs, collectToolSourceRefs } from "./support/source-collectors.js";
|
|
14
15
|
import { getRoutingDefaultAgentId, getRuntimeSources, getRuntimeResources, getRuntimeStorageRoots, getToolModuleDiscoveryConfig, getRoutingRules, resolveRefId, } from "./support/workspace-ref-utils.js";
|
|
@@ -412,6 +413,24 @@ export async function loadWorkspace(workspaceRoot, options = {}) {
|
|
|
412
413
|
externalResourceCount: externalResources.length,
|
|
413
414
|
});
|
|
414
415
|
validateToolNameConflicts(tools);
|
|
416
|
+
const frameworkContractValidation = resolveFrameworkContractValidationMode(options.frameworkContractValidation);
|
|
417
|
+
const contractOwnedRoots = Array.from(new Set([
|
|
418
|
+
workspaceRoot,
|
|
419
|
+
...(localResourceRoot ? [localResourceRoot] : []),
|
|
420
|
+
...resolvedConfiguredResources.map((resource) => resource.root),
|
|
421
|
+
]));
|
|
422
|
+
await traceStartupStage("workspace.validate.frameworkContracts", async () => {
|
|
423
|
+
validateFrameworkContracts({
|
|
424
|
+
agents: loaded.agents,
|
|
425
|
+
tools,
|
|
426
|
+
skillRegistry,
|
|
427
|
+
ownedRoots: contractOwnedRoots,
|
|
428
|
+
mode: frameworkContractValidation,
|
|
429
|
+
});
|
|
430
|
+
}, {
|
|
431
|
+
workspaceRoot,
|
|
432
|
+
mode: frameworkContractValidation,
|
|
433
|
+
});
|
|
415
434
|
const resources = Array.from(new Set([
|
|
416
435
|
...(localResourceRoot ? [localResourceRoot] : []),
|
|
417
436
|
...runtimeSources.tools.filter((source) => isNpmSourceUri(source)),
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { ParsedAgentObject, ParsedToolObject } from "../contracts/types.js";
|
|
2
|
+
export type FrameworkContractValidationMode = "off" | "warn" | "error";
|
|
3
|
+
export declare function resolveFrameworkContractValidationMode(mode: FrameworkContractValidationMode | undefined): FrameworkContractValidationMode;
|
|
4
|
+
export declare function validateFrameworkContracts(input: {
|
|
5
|
+
agents: ParsedAgentObject[];
|
|
6
|
+
tools: Map<string, ParsedToolObject>;
|
|
7
|
+
skillRegistry: Map<string, string>;
|
|
8
|
+
ownedRoots: string[];
|
|
9
|
+
mode?: FrameworkContractValidationMode;
|
|
10
|
+
}): void;
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { validateSkillMetadata } from "../runtime/skills/skill-metadata.js";
|
|
4
|
+
import { getAgentExecutionConfigValue } from "./support/agent-execution-config.js";
|
|
5
|
+
import { resolvePromptValue } from "./support/workspace-ref-utils.js";
|
|
6
|
+
function normalizeMode(mode) {
|
|
7
|
+
if (mode === "warn" || mode === "error") {
|
|
8
|
+
return mode;
|
|
9
|
+
}
|
|
10
|
+
return "off";
|
|
11
|
+
}
|
|
12
|
+
export function resolveFrameworkContractValidationMode(mode) {
|
|
13
|
+
if (mode === "warn" || mode === "error" || mode === "off") {
|
|
14
|
+
return mode;
|
|
15
|
+
}
|
|
16
|
+
const envValue = process.env.AGENT_HARNESS_FRAMEWORK_CONTRACT_VALIDATION?.trim().toLowerCase();
|
|
17
|
+
if (envValue === "warn" || envValue === "error" || envValue === "off") {
|
|
18
|
+
return envValue;
|
|
19
|
+
}
|
|
20
|
+
return "off";
|
|
21
|
+
}
|
|
22
|
+
function isPathWithinRoot(candidate, root) {
|
|
23
|
+
const relative = path.relative(path.resolve(root), path.resolve(candidate));
|
|
24
|
+
return relative === "" || (!relative.startsWith("..") && !path.isAbsolute(relative));
|
|
25
|
+
}
|
|
26
|
+
function isWorkspaceOwnedPath(candidate, roots) {
|
|
27
|
+
return roots.some((root) => isPathWithinRoot(candidate, root));
|
|
28
|
+
}
|
|
29
|
+
function addIssue(issues, code, message) {
|
|
30
|
+
issues.push({ code, message });
|
|
31
|
+
}
|
|
32
|
+
function validateAgentContract(agent, referencedSubagentIds, issues) {
|
|
33
|
+
const description = agent.description.trim();
|
|
34
|
+
const systemPrompt = resolvePromptValue(getAgentExecutionConfigValue(agent, "systemPrompt"), path.dirname(agent.sourcePath));
|
|
35
|
+
const ownsDelegation = agent.subagentRefs.length > 0 || agent.subagentPathRefs.length > 0 || (agent.asyncSubagents?.length ?? 0) > 0;
|
|
36
|
+
const isSubagent = referencedSubagentIds.has(agent.id);
|
|
37
|
+
const hasTools = agent.toolRefs.length > 0
|
|
38
|
+
|| (agent.toolBindings?.length ?? 0) > 0
|
|
39
|
+
|| (agent.inlineTools?.length ?? 0) > 0;
|
|
40
|
+
const responseFormat = getAgentExecutionConfigValue(agent, "responseFormat");
|
|
41
|
+
if (description.length < 24) {
|
|
42
|
+
addIssue(issues, "agent.description.too_short", `Agent ${agent.id} should use a more specific description that explains when it should be used.`);
|
|
43
|
+
}
|
|
44
|
+
if (ownsDelegation) {
|
|
45
|
+
if (!systemPrompt?.trim()) {
|
|
46
|
+
addIssue(issues, "agent.orchestrator.missing_prompt", `Delegating agent ${agent.id} should define a systemPrompt that explains decomposition, delegation, synthesis, and stop conditions.`);
|
|
47
|
+
}
|
|
48
|
+
if (!/(delegate|delegation|subagent|decompose|synthesi|answer directly|parallel)/i.test(description)) {
|
|
49
|
+
addIssue(issues, "agent.orchestrator.description_boundary", `Delegating agent ${agent.id} description should make its delegation boundary explicit, for example when it should answer directly versus delegate.`);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
if (isSubagent) {
|
|
53
|
+
if (!systemPrompt?.trim()) {
|
|
54
|
+
addIssue(issues, "agent.subagent.missing_prompt", `Subagent ${agent.id} should define a systemPrompt that makes its operating boundary and output contract explicit.`);
|
|
55
|
+
}
|
|
56
|
+
if (!/(use this when|when the task|for .*?(analysis|research|search|debug|review|triage|inspection|extraction|comparison|validation|implementation))/i.test(description)) {
|
|
57
|
+
addIssue(issues, "agent.subagent.description_trigger", `Subagent ${agent.id} description should clarify when it should be delegated to and what narrow task class it owns.`);
|
|
58
|
+
}
|
|
59
|
+
if (agent.executionMode === "deepagent" && hasTools && responseFormat === undefined) {
|
|
60
|
+
addIssue(issues, "agent.subagent.deepagent.missing_response_format", `DeepAgents subagent ${agent.id} exposes tools, so it should define config.responseFormat to guarantee a stable task result for its parent agent.`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
function stripFrontmatter(document) {
|
|
65
|
+
return document.replace(/^---\s*\n[\s\S]*?\n---\s*(?:\n|$)/, "");
|
|
66
|
+
}
|
|
67
|
+
function validateSkillContract(skillRoot, issues) {
|
|
68
|
+
const metadata = validateSkillMetadata(skillRoot);
|
|
69
|
+
const document = readFileSync(path.join(skillRoot, "SKILL.md"), "utf8");
|
|
70
|
+
const body = stripFrontmatter(document);
|
|
71
|
+
const skillName = metadata.name || path.basename(skillRoot);
|
|
72
|
+
if (!/(Use this skill when|Use this when)/i.test(body)) {
|
|
73
|
+
addIssue(issues, "skill.missing_trigger", `Skill ${skillName} should explain when it should be used, preferably with a clear "Use this skill when..." trigger.`);
|
|
74
|
+
}
|
|
75
|
+
if (!/(## Workflow|^## Workflow|^\d+\.\s)/m.test(body)) {
|
|
76
|
+
addIssue(issues, "skill.missing_workflow", `Skill ${skillName} should define an explicit workflow instead of only background prose.`);
|
|
77
|
+
}
|
|
78
|
+
if (!/(## Rules|Do not|Output|Caveat|Caveats)/i.test(body)) {
|
|
79
|
+
addIssue(issues, "skill.missing_boundaries", `Skill ${skillName} should include execution boundaries such as rules, non-goals, caveats, or output expectations.`);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
function validateToolContract(tool, issues) {
|
|
83
|
+
const description = tool.description.trim();
|
|
84
|
+
if (description.length < 20) {
|
|
85
|
+
addIssue(issues, "tool.description.too_short", `Tool ${tool.id} should use a more specific description that explains invocation boundaries and argument expectations.`);
|
|
86
|
+
return;
|
|
87
|
+
}
|
|
88
|
+
if (!/(Use this when|Do not use|Before calling)/i.test(description)) {
|
|
89
|
+
addIssue(issues, "tool.description.missing_boundary", `Tool ${tool.id} description should describe when to call it and, ideally, when not to call it or what must be true before calling it.`);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
export function validateFrameworkContracts(input) {
|
|
93
|
+
const mode = normalizeMode(input.mode);
|
|
94
|
+
if (mode === "off") {
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
const issues = [];
|
|
98
|
+
const referencedSubagentIds = new Set(input.agents.flatMap((agent) => agent.subagentRefs.map((ref) => ref.replace(/^agent\//, ""))));
|
|
99
|
+
for (const agent of input.agents) {
|
|
100
|
+
if (!isWorkspaceOwnedPath(agent.sourcePath, input.ownedRoots)) {
|
|
101
|
+
continue;
|
|
102
|
+
}
|
|
103
|
+
validateAgentContract(agent, referencedSubagentIds, issues);
|
|
104
|
+
}
|
|
105
|
+
for (const [skillName, skillRoot] of input.skillRegistry) {
|
|
106
|
+
if (!isWorkspaceOwnedPath(skillRoot, input.ownedRoots)) {
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
validateSkillContract(skillRoot, issues);
|
|
110
|
+
if (!skillName.trim()) {
|
|
111
|
+
addIssue(issues, "skill.name.empty", `Skill ${skillRoot} must define a stable name in frontmatter.`);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
for (const tool of input.tools.values()) {
|
|
115
|
+
if (!isWorkspaceOwnedPath(tool.sourcePath, input.ownedRoots)) {
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
validateToolContract(tool, issues);
|
|
119
|
+
}
|
|
120
|
+
if (issues.length === 0) {
|
|
121
|
+
return;
|
|
122
|
+
}
|
|
123
|
+
const message = [
|
|
124
|
+
"Framework contract validation failed.",
|
|
125
|
+
"The workspace should follow the agent-harness contract-writing guidance for agents, skills, and tools.",
|
|
126
|
+
...issues.map((issue) => `- [${issue.code}] ${issue.message}`),
|
|
127
|
+
].join("\n");
|
|
128
|
+
if (mode === "warn") {
|
|
129
|
+
console.warn(`[agent-harness] ${message}`);
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
throw new Error(message);
|
|
133
|
+
}
|