@botbotgo/agent-harness 0.0.333 → 0.0.337
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -0
- package/dist/contracts/workspace.d.ts +1 -0
- package/dist/package-version.d.ts +2 -2
- package/dist/package-version.js +2 -2
- package/dist/runtime/adapter/flow/stream-runtime.js +18 -8
- package/dist/runtime/adapter/invocation-result.js +21 -5
- package/dist/runtime/adapter/middleware-assembly.js +31 -13
- package/dist/runtime/adapter/stream-event-projection.d.ts +2 -0
- package/dist/runtime/adapter/stream-event-projection.js +122 -10
- package/dist/scaffold/init-project.js +9 -3
- package/dist/workspace/compile.js +19 -0
- package/dist/workspace/framework-contract-validation.d.ts +10 -0
- package/dist/workspace/framework-contract-validation.js +133 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -534,10 +534,19 @@ import { createAgentHarness } from "@botbotgo/agent-harness";
|
|
|
534
534
|
const runtime = await createAgentHarness("/path/to/workspace", {
|
|
535
535
|
load: {
|
|
536
536
|
overlayRoots: ["/path/to/framework-defaults", "/path/to/product-overrides"],
|
|
537
|
+
frameworkContractValidation: "warn",
|
|
537
538
|
},
|
|
538
539
|
});
|
|
539
540
|
```
|
|
540
541
|
|
|
542
|
+
Framework contract validation modes:
|
|
543
|
+
|
|
544
|
+
- `off` keeps startup behavior unchanged
|
|
545
|
+
- `warn` loads the workspace and emits contract-quality warnings for workspace-owned agents, skills, and tools
|
|
546
|
+
- `error` fails startup when those workspace-owned definitions drift away from the framework contract
|
|
547
|
+
|
|
548
|
+
You can also control the same startup behavior with `AGENT_HARNESS_FRAMEWORK_CONTRACT_VALIDATION=warn|error|off`.
|
|
549
|
+
|
|
541
550
|
### Run A Request
|
|
542
551
|
|
|
543
552
|
```ts
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const AGENT_HARNESS_VERSION = "0.0.
|
|
2
|
-
export declare const AGENT_HARNESS_RELEASE_DATE = "2026-04-
|
|
1
|
+
export declare const AGENT_HARNESS_VERSION = "0.0.336";
|
|
2
|
+
export declare const AGENT_HARNESS_RELEASE_DATE = "2026-04-23";
|
package/dist/package-version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export const AGENT_HARNESS_VERSION = "0.0.
|
|
2
|
-
export const AGENT_HARNESS_RELEASE_DATE = "2026-04-
|
|
1
|
+
export const AGENT_HARNESS_VERSION = "0.0.336";
|
|
2
|
+
export const AGENT_HARNESS_RELEASE_DATE = "2026-04-23";
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { extractVisibleOutput, isToolCallRecoveryFailure, isRetrySafeInvalidToolSelectionError, resolveExecutionWithoutToolEvidenceTextInstruction, shouldValidateExecutionWithoutToolEvidence, resolveToolCallRecoveryInstruction, sanitizeVisibleText, INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION, } from "../../parsing/output-parsing.js";
|
|
1
|
+
import { extractVisibleOutput, isToolCallRecoveryFailure, isRetrySafeInvalidToolSelectionError, resolveExecutionWithoutToolEvidenceTextInstruction, shouldValidateExecutionWithoutToolEvidence, resolveToolCallRecoveryInstruction, sanitizeVisibleText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION, INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION, } from "../../parsing/output-parsing.js";
|
|
2
2
|
import { buildInvocationRequest } from "../model/invocation-request.js";
|
|
3
3
|
import { buildRawModelMessages } from "../model/message-assembly.js";
|
|
4
4
|
import { projectRuntimeStreamEvent, createStreamEventProjectionState } from "../stream-event-projection.js";
|
|
@@ -79,11 +79,21 @@ function hasUnresolvedExecution(evidence) {
|
|
|
79
79
|
|| evidence.hasOpenTaskDelegation);
|
|
80
80
|
}
|
|
81
81
|
function hasMissingDelegatedExecutionEvidence(evidence) {
|
|
82
|
-
return
|
|
82
|
+
return false;
|
|
83
83
|
}
|
|
84
84
|
function hasMissingDelegatedFindings(evidence) {
|
|
85
85
|
return evidence.hasDelegatedAgentWithConfiguredTools && evidence.hasOnlyPlaceholderTaskCompletion;
|
|
86
86
|
}
|
|
87
|
+
function resolveDelegatedExecutionRecoveryInstruction(evidence) {
|
|
88
|
+
if (hasMissingDelegatedFindings(evidence)
|
|
89
|
+
|| (evidence.hasOpenTaskDelegation
|
|
90
|
+
&& evidence.hasDelegatedAgentWithConfiguredTools
|
|
91
|
+
&& !evidence.hasPlanStateEvidence
|
|
92
|
+
&& !evidence.hasFailedTaskDelegation)) {
|
|
93
|
+
return EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION;
|
|
94
|
+
}
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
87
97
|
function createUnresolvedExecutionError(evidence) {
|
|
88
98
|
const reasons = [];
|
|
89
99
|
if (evidence.hasIncompletePlanState) {
|
|
@@ -95,9 +105,6 @@ function createUnresolvedExecutionError(evidence) {
|
|
|
95
105
|
if (evidence.hasOpenTaskDelegation) {
|
|
96
106
|
reasons.push("delegated task has not finished");
|
|
97
107
|
}
|
|
98
|
-
if (hasMissingDelegatedExecutionEvidence(evidence)) {
|
|
99
|
-
reasons.push("delegated agent ended without surfacing any real tool execution evidence");
|
|
100
|
-
}
|
|
101
108
|
if (hasMissingDelegatedFindings(evidence)) {
|
|
102
109
|
reasons.push("delegated task returned only the upstream placeholder result without surfaced final findings");
|
|
103
110
|
}
|
|
@@ -400,7 +407,10 @@ export async function* streamRuntimeExecution(options) {
|
|
|
400
407
|
throw error;
|
|
401
408
|
}
|
|
402
409
|
const streamedExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
|
|
403
|
-
|
|
410
|
+
const delegatedExecutionRecoveryInstruction = !emittedUnsafeStreamSideEffects
|
|
411
|
+
? resolveDelegatedExecutionRecoveryInstruction(streamedExecutionEvidence)
|
|
412
|
+
: null;
|
|
413
|
+
if (hasUnresolvedExecution(streamedExecutionEvidence) && !delegatedExecutionRecoveryInstruction) {
|
|
404
414
|
throw createUnresolvedExecutionError(streamedExecutionEvidence);
|
|
405
415
|
}
|
|
406
416
|
const executionWithoutToolEvidenceInstruction = projectionState.emittedOutput
|
|
@@ -411,7 +421,7 @@ export async function* streamRuntimeExecution(options) {
|
|
|
411
421
|
: null;
|
|
412
422
|
const retryInstruction = !emittedUnsafeStreamSideEffects && sawRetrySafeInvalidToolSelectionError
|
|
413
423
|
? INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION
|
|
414
|
-
: executionWithoutToolEvidenceInstruction;
|
|
424
|
+
: delegatedExecutionRecoveryInstruction ?? executionWithoutToolEvidenceInstruction;
|
|
415
425
|
if (retryInstruction) {
|
|
416
426
|
let retried;
|
|
417
427
|
retried = await options.invoke(options.applyToolRecoveryInstruction(options.binding, retryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, options.runtimeOptions);
|
|
@@ -609,7 +619,7 @@ export async function* streamRuntimeExecution(options) {
|
|
|
609
619
|
...invokeExecutionEvidence,
|
|
610
620
|
hasMissingDelegatedExecutionEvidence: hasMissingDelegatedExecutionEvidence(invokeExecutionEvidence),
|
|
611
621
|
})
|
|
612
|
-
:
|
|
622
|
+
: resolveDelegatedExecutionRecoveryInstruction(invokeExecutionEvidence);
|
|
613
623
|
if (invokeFallbackRecoveryInstruction) {
|
|
614
624
|
const recovered = await options.invoke(options.applyToolRecoveryInstruction(options.binding, invokeFallbackRecoveryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, options.runtimeOptions);
|
|
615
625
|
const recoveredToolResults = Array.isArray(recovered.metadata?.executedToolResults)
|
|
@@ -17,19 +17,35 @@ function isPlaceholderTaskCompletion(value) {
|
|
|
17
17
|
const normalized = sanitizeVisibleText(value).trim();
|
|
18
18
|
return normalized === "Task completed";
|
|
19
19
|
}
|
|
20
|
+
function isLowSignalStructuredCompletion(value) {
|
|
21
|
+
const normalized = sanitizeVisibleText(value).trim();
|
|
22
|
+
if (!normalized) {
|
|
23
|
+
return false;
|
|
24
|
+
}
|
|
25
|
+
return /Status:\s*completed[\s\S]*Summary:\s*-\s*none[\s\S]*Likely Causes:\s*-\s*none[\s\S]*Blockers:\s*-\s*none[\s\S]*Next Commands:\s*-\s*none/i.test(normalized);
|
|
26
|
+
}
|
|
20
27
|
function normalizeToolOutputText(output) {
|
|
21
28
|
const directText = typeof output === "string"
|
|
22
29
|
? sanitizeVisibleText(output).trim()
|
|
23
30
|
: "";
|
|
24
|
-
if (directText
|
|
31
|
+
if (directText
|
|
32
|
+
&& !looksLikeLeakedToolCallText(directText)
|
|
33
|
+
&& !isPlaceholderTaskCompletion(directText)
|
|
34
|
+
&& !isLowSignalStructuredCompletion(directText)) {
|
|
25
35
|
return directText;
|
|
26
36
|
}
|
|
27
37
|
const visibleOutput = sanitizeVisibleText(extractVisibleOutput(output)).trim();
|
|
28
|
-
if (visibleOutput
|
|
38
|
+
if (visibleOutput
|
|
39
|
+
&& !looksLikeLeakedToolCallText(visibleOutput)
|
|
40
|
+
&& !isPlaceholderTaskCompletion(visibleOutput)
|
|
41
|
+
&& !isLowSignalStructuredCompletion(visibleOutput)) {
|
|
29
42
|
return visibleOutput;
|
|
30
43
|
}
|
|
31
44
|
const fallbackContext = sanitizeVisibleText(extractToolFallbackContext(output)).trim();
|
|
32
|
-
if (fallbackContext
|
|
45
|
+
if (fallbackContext
|
|
46
|
+
&& !looksLikeLeakedToolCallText(fallbackContext)
|
|
47
|
+
&& !isPlaceholderTaskCompletion(fallbackContext)
|
|
48
|
+
&& !isLowSignalStructuredCompletion(fallbackContext)) {
|
|
33
49
|
return fallbackContext;
|
|
34
50
|
}
|
|
35
51
|
return "";
|
|
@@ -72,7 +88,7 @@ export function resolveDeterministicFinalOutput(params) {
|
|
|
72
88
|
const sanitizedVisibleOutput = visibleOutput && !looksLikeLeakedToolCallText(visibleOutput)
|
|
73
89
|
? sanitizeVisibleText(visibleOutput).trim()
|
|
74
90
|
: "";
|
|
75
|
-
if (sanitizedVisibleOutput) {
|
|
91
|
+
if (sanitizedVisibleOutput && !isLowSignalStructuredCompletion(sanitizedVisibleOutput)) {
|
|
76
92
|
return sanitizedVisibleOutput;
|
|
77
93
|
}
|
|
78
94
|
const successfulToolOutput = extractLatestSuccessfulNonTodoToolResultText(executedToolResults);
|
|
@@ -82,7 +98,7 @@ export function resolveDeterministicFinalOutput(params) {
|
|
|
82
98
|
const sanitizedToolFallback = toolFallback && !looksLikeLeakedToolCallText(toolFallback)
|
|
83
99
|
? sanitizeVisibleText(toolFallback).trim()
|
|
84
100
|
: "";
|
|
85
|
-
return sanitizedToolFallback;
|
|
101
|
+
return isLowSignalStructuredCompletion(sanitizedToolFallback) ? "" : sanitizedToolFallback;
|
|
86
102
|
}
|
|
87
103
|
export function extractDelegatedFindingsText(executedToolResults) {
|
|
88
104
|
return extractLatestSuccessfulTaskResultText(executedToolResults);
|
|
@@ -73,9 +73,6 @@ function hasIncompletePlanStateInValue(value) {
|
|
|
73
73
|
function hasUnresolvedDelegatedExecution(state) {
|
|
74
74
|
return state.hasIncompletePlanState || state.openTaskDelegations > 0;
|
|
75
75
|
}
|
|
76
|
-
function hasMissingDelegatedToolExecutionEvidence(state, subagentHasTools) {
|
|
77
|
-
return subagentHasTools && !state.emittedToolResult && !state.emittedToolError;
|
|
78
|
-
}
|
|
79
76
|
function formatDelegatedExecutionBlocker(state) {
|
|
80
77
|
const summary = state.emittedOutput.trim();
|
|
81
78
|
if (summary) {
|
|
@@ -90,7 +87,7 @@ function formatDelegatedExecutionBlocker(state) {
|
|
|
90
87
|
return "Delegated investigation ended before the plan was completed.";
|
|
91
88
|
}
|
|
92
89
|
function requiresDelegatedExecutionRecovery(state) {
|
|
93
|
-
return hasUnresolvedDelegatedExecution(state)
|
|
90
|
+
return hasUnresolvedDelegatedExecution(state);
|
|
94
91
|
}
|
|
95
92
|
const DELEGATED_FAILURE_PLAN_RECONCILIATION_INSTRUCTION = [
|
|
96
93
|
"Your previous attempt ended with a tool failure while the todo board still had unfinished work.",
|
|
@@ -246,7 +243,6 @@ export async function invokeBuiltinTaskTool(input) {
|
|
|
246
243
|
summarizationModel,
|
|
247
244
|
});
|
|
248
245
|
const resolvedSubagentTools = selectedSubagent.tools ?? input.resolveTools(primaryTools, input.binding);
|
|
249
|
-
const subagentHasTools = (resolvedSubagentTools?.length ?? 0) > 0;
|
|
250
246
|
const runnable = createAgent({
|
|
251
247
|
model: (selectedSubagent.model ?? resolvedHostModel),
|
|
252
248
|
tools: resolvedSubagentTools,
|
|
@@ -297,17 +293,39 @@ export async function invokeBuiltinTaskTool(input) {
|
|
|
297
293
|
};
|
|
298
294
|
let { projectionState, executedToolResults } = await runWithStreamInspection();
|
|
299
295
|
if (requiresDelegatedExecutionRecovery(projectionState)) {
|
|
296
|
+
const initialProjectionState = projectionState;
|
|
297
|
+
const initialExecutedToolResults = executedToolResults;
|
|
298
|
+
const initialDeterministicOutput = resolveDeterministicFinalOutput({
|
|
299
|
+
visibleOutput: initialProjectionState.emittedOutput.trim(),
|
|
300
|
+
executedToolResults: initialExecutedToolResults,
|
|
301
|
+
});
|
|
300
302
|
const recoveryInstruction = projectionState.hasIncompletePlanState && projectionState.emittedToolError
|
|
301
303
|
? `${AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION}\n\n${DELEGATED_FAILURE_PLAN_RECONCILIATION_INSTRUCTION}`
|
|
302
304
|
: AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION;
|
|
303
|
-
|
|
305
|
+
const recovered = await runWithStreamInspection(recoveryInstruction);
|
|
306
|
+
const recoveredDeterministicOutput = resolveDeterministicFinalOutput({
|
|
307
|
+
visibleOutput: recovered.projectionState.emittedOutput.trim(),
|
|
308
|
+
executedToolResults: recovered.executedToolResults,
|
|
309
|
+
});
|
|
310
|
+
const recoveredHasSubstantiveExecution = recoveredDeterministicOutput.length > 0;
|
|
311
|
+
if (recoveredHasSubstantiveExecution) {
|
|
312
|
+
projectionState = recovered.projectionState;
|
|
313
|
+
executedToolResults = recovered.executedToolResults;
|
|
314
|
+
}
|
|
315
|
+
else {
|
|
316
|
+
projectionState = initialProjectionState;
|
|
317
|
+
executedToolResults = initialExecutedToolResults;
|
|
318
|
+
if (initialDeterministicOutput) {
|
|
319
|
+
projectionState = {
|
|
320
|
+
...projectionState,
|
|
321
|
+
emittedOutput: initialDeterministicOutput,
|
|
322
|
+
};
|
|
323
|
+
}
|
|
324
|
+
}
|
|
304
325
|
}
|
|
305
326
|
if (requiresDelegatedExecutionRecovery(projectionState)) {
|
|
306
327
|
throw new Error(formatDelegatedExecutionBlocker(projectionState));
|
|
307
328
|
}
|
|
308
|
-
if (hasMissingDelegatedToolExecutionEvidence(projectionState, subagentHasTools)) {
|
|
309
|
-
throw new Error("Delegated investigation ended without any real tool execution evidence.");
|
|
310
|
-
}
|
|
311
329
|
if (projectionState.emittedToolError) {
|
|
312
330
|
const blockerMessage = resolveDeterministicFinalOutput({
|
|
313
331
|
visibleOutput: projectionState.emittedOutput.trim(),
|
|
@@ -335,12 +353,12 @@ export async function invokeBuiltinTaskTool(input) {
|
|
|
335
353
|
if (hasIncompletePlanStateInValue(result)) {
|
|
336
354
|
throw new Error(extractVisibleOutput(result) || extractToolFallbackContext(result) || "Delegated investigation ended before the plan was completed.");
|
|
337
355
|
}
|
|
338
|
-
if (subagentHasTools) {
|
|
339
|
-
throw new Error("Delegated investigation ended without any real tool execution evidence.");
|
|
340
|
-
}
|
|
341
356
|
const visibleOutput = extractVisibleOutput(result);
|
|
342
357
|
const fallbackOutput = extractToolFallbackContext(result);
|
|
343
|
-
|
|
358
|
+
const structuredResponse = typeof result === "object" && result !== null && "structuredResponse" in result
|
|
359
|
+
? result.structuredResponse
|
|
360
|
+
: undefined;
|
|
361
|
+
return visibleOutput || fallbackOutput || (structuredResponse !== undefined ? JSON.stringify(structuredResponse) : "") || JSON.stringify(result);
|
|
344
362
|
}
|
|
345
363
|
export async function resolveBuiltinMiddlewareTools(input) {
|
|
346
364
|
const backend = input.resolveBuiltinMiddlewareBackend(input.binding, input.options);
|
|
@@ -20,6 +20,8 @@ export type StreamEventProjectionState = {
|
|
|
20
20
|
openTaskDelegations: number;
|
|
21
21
|
openToolCapableTaskDelegations: number;
|
|
22
22
|
taskDelegationHasToolsStack: boolean[];
|
|
23
|
+
taskDelegationFindingsStack: string[];
|
|
24
|
+
lastCompletedTaskDelegationFindings: string;
|
|
23
25
|
seenTerminalOutputs: Set<string>;
|
|
24
26
|
};
|
|
25
27
|
export declare function createStreamEventProjectionState(): StreamEventProjectionState;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { sanitizeVisibleText } from "../parsing/output-parsing.js";
|
|
1
|
+
import { extractToolFallbackContext, extractVisibleOutput, readTextContent, sanitizeVisibleText } from "../parsing/output-parsing.js";
|
|
2
2
|
import { salvageFunctionLikeToolCall } from "../parsing/output-tool-args.js";
|
|
3
3
|
import { computeIncrementalOutput, extractInterruptPayload, extractReasoningStreamOutput, sanitizeRetainedUpstreamEvent, extractStateStreamOutput, extractTerminalStreamOutput, extractToolResult, extractVisibleStreamOutput, normalizeTerminalOutputKey, } from "../parsing/stream-event-parsing.js";
|
|
4
4
|
import { resolveModelFacingToolName } from "./tool/tool-name-mapping.js";
|
|
@@ -22,6 +22,8 @@ export function createStreamEventProjectionState() {
|
|
|
22
22
|
openTaskDelegations: 0,
|
|
23
23
|
openToolCapableTaskDelegations: 0,
|
|
24
24
|
taskDelegationHasToolsStack: [],
|
|
25
|
+
taskDelegationFindingsStack: [],
|
|
26
|
+
lastCompletedTaskDelegationFindings: "",
|
|
25
27
|
seenTerminalOutputs: new Set(),
|
|
26
28
|
};
|
|
27
29
|
}
|
|
@@ -163,6 +165,90 @@ function isUpstreamPlaceholderTaskResult(value) {
|
|
|
163
165
|
&& typeof message?.tool_call_id === "string"
|
|
164
166
|
&& message?.content === "Task completed";
|
|
165
167
|
}
|
|
168
|
+
function extractDelegatedFindingsCandidateText(value, depth = 0) {
|
|
169
|
+
if (depth > 6 || value === null || value === undefined) {
|
|
170
|
+
return "";
|
|
171
|
+
}
|
|
172
|
+
if (typeof value === "string") {
|
|
173
|
+
return value;
|
|
174
|
+
}
|
|
175
|
+
const visibleOutput = extractVisibleOutput(value);
|
|
176
|
+
if (visibleOutput) {
|
|
177
|
+
return visibleOutput;
|
|
178
|
+
}
|
|
179
|
+
const toolFallback = extractToolFallbackContext(value);
|
|
180
|
+
if (toolFallback) {
|
|
181
|
+
return toolFallback;
|
|
182
|
+
}
|
|
183
|
+
const directText = readTextContent(value);
|
|
184
|
+
if (directText) {
|
|
185
|
+
return directText;
|
|
186
|
+
}
|
|
187
|
+
if (Array.isArray(value)) {
|
|
188
|
+
for (let index = value.length - 1; index >= 0; index -= 1) {
|
|
189
|
+
const nested = extractDelegatedFindingsCandidateText(value[index], depth + 1);
|
|
190
|
+
if (nested) {
|
|
191
|
+
return nested;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
return "";
|
|
195
|
+
}
|
|
196
|
+
if (typeof value === "object") {
|
|
197
|
+
const typed = value;
|
|
198
|
+
for (const key of ["messages", "update", "output", "content", "data", "summary"]) {
|
|
199
|
+
const nested = extractDelegatedFindingsCandidateText(typed[key], depth + 1);
|
|
200
|
+
if (nested) {
|
|
201
|
+
return nested;
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
return "";
|
|
206
|
+
}
|
|
207
|
+
function normalizeDelegatedFindingsText(value) {
|
|
208
|
+
const directText = extractDelegatedFindingsCandidateText(value);
|
|
209
|
+
if (!directText) {
|
|
210
|
+
return "";
|
|
211
|
+
}
|
|
212
|
+
const normalized = sanitizeVisibleText(directText).trim();
|
|
213
|
+
if (normalized === "Task completed") {
|
|
214
|
+
return "";
|
|
215
|
+
}
|
|
216
|
+
if (/Status:\s*completed[\s\S]*Summary:\s*-\s*none[\s\S]*Likely Causes:\s*-\s*none[\s\S]*Blockers:\s*-\s*none[\s\S]*Next Commands:\s*-\s*none/i.test(normalized)) {
|
|
217
|
+
return "";
|
|
218
|
+
}
|
|
219
|
+
return normalized;
|
|
220
|
+
}
|
|
221
|
+
function isInternalRuntimeSpillPathErrorValue(value) {
|
|
222
|
+
const directText = typeof value === "string"
|
|
223
|
+
? value
|
|
224
|
+
: extractToolFallbackContext(value) || extractVisibleOutput(value) || readTextContent(value);
|
|
225
|
+
if (directText && /\/large_tool_results\/|internal runtime spill path/i.test(directText)) {
|
|
226
|
+
return true;
|
|
227
|
+
}
|
|
228
|
+
if (typeof value === "object" && value !== null) {
|
|
229
|
+
try {
|
|
230
|
+
return /\/large_tool_results\/|internal runtime spill path/i.test(JSON.stringify(value));
|
|
231
|
+
}
|
|
232
|
+
catch {
|
|
233
|
+
return false;
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
return false;
|
|
237
|
+
}
|
|
238
|
+
function recordDelegatedFindings(state, value, source = "tool") {
|
|
239
|
+
if (state.taskDelegationFindingsStack.length === 0) {
|
|
240
|
+
return;
|
|
241
|
+
}
|
|
242
|
+
const normalized = normalizeDelegatedFindingsText(value);
|
|
243
|
+
if (!normalized) {
|
|
244
|
+
return;
|
|
245
|
+
}
|
|
246
|
+
const current = state.taskDelegationFindingsStack[state.taskDelegationFindingsStack.length - 1] ?? "";
|
|
247
|
+
if (source === "terminal" && current) {
|
|
248
|
+
return;
|
|
249
|
+
}
|
|
250
|
+
state.taskDelegationFindingsStack[state.taskDelegationFindingsStack.length - 1] = normalized;
|
|
251
|
+
}
|
|
166
252
|
function updateDelegationState(state, event, countConfiguredToolsForAgentId) {
|
|
167
253
|
if (typeof event !== "object" || event === null) {
|
|
168
254
|
return;
|
|
@@ -185,11 +271,13 @@ function updateDelegationState(state, event, countConfiguredToolsForAgentId) {
|
|
|
185
271
|
state.sawDelegatedAgentWithConfiguredTools = true;
|
|
186
272
|
state.openToolCapableTaskDelegations += 1;
|
|
187
273
|
}
|
|
274
|
+
state.taskDelegationFindingsStack.push("");
|
|
188
275
|
return;
|
|
189
276
|
}
|
|
190
277
|
if (isTaskEnd || isTaskError) {
|
|
191
278
|
state.openTaskDelegations = Math.max(0, state.openTaskDelegations - 1);
|
|
192
279
|
const delegatedTaskHadTools = state.taskDelegationHasToolsStack.pop() === true;
|
|
280
|
+
state.lastCompletedTaskDelegationFindings = state.taskDelegationFindingsStack.pop() ?? "";
|
|
193
281
|
if (delegatedTaskHadTools) {
|
|
194
282
|
state.openToolCapableTaskDelegations = Math.max(0, state.openToolCapableTaskDelegations - 1);
|
|
195
283
|
}
|
|
@@ -254,34 +342,58 @@ export function projectRuntimeStreamEvent(params) {
|
|
|
254
342
|
const toolResult = extractToolResult(event);
|
|
255
343
|
if (toolResult) {
|
|
256
344
|
const isTodoTool = toolResult.toolName === "write_todos" || toolResult.toolName === "read_todos";
|
|
257
|
-
const
|
|
258
|
-
|
|
345
|
+
const salvagedTaskErrorFindings = toolResult.toolName === "task"
|
|
346
|
+
&& toolResult.isError === true
|
|
347
|
+
&& !!state.lastCompletedTaskDelegationFindings
|
|
348
|
+
&& isInternalRuntimeSpillPathErrorValue(toolResult.output)
|
|
349
|
+
? state.lastCompletedTaskDelegationFindings
|
|
350
|
+
: "";
|
|
351
|
+
const effectiveToolOutput = salvagedTaskErrorFindings || toolResult.output;
|
|
352
|
+
const effectiveToolIsError = salvagedTaskErrorFindings ? false : toolResult.isError;
|
|
353
|
+
const isSuccessfulTaskResult = toolResult.toolName === "task" && effectiveToolIsError !== true;
|
|
259
354
|
const isDelegatedExecutionTool = (isDelegatedAgentEvent || state.openToolCapableTaskDelegations > 0)
|
|
260
355
|
&& toolResult.toolName !== "write_todos"
|
|
261
356
|
&& toolResult.toolName !== "read_todos"
|
|
262
357
|
&& toolResult.toolName !== "task";
|
|
358
|
+
if (isDelegatedExecutionTool && toolResult.isError !== true) {
|
|
359
|
+
recordDelegatedFindings(state, toolResult.output, "tool");
|
|
360
|
+
}
|
|
361
|
+
const delegatedTaskFindings = isSuccessfulTaskResult && state.lastCompletedTaskDelegationFindings
|
|
362
|
+
? state.lastCompletedTaskDelegationFindings
|
|
363
|
+
: "";
|
|
364
|
+
const resolvedToolOutput = delegatedTaskFindings || effectiveToolOutput;
|
|
365
|
+
const isPlaceholderTaskResult = isSuccessfulTaskResult
|
|
366
|
+
&& !delegatedTaskFindings
|
|
367
|
+
&& isUpstreamPlaceholderTaskResult(toolResult.output);
|
|
263
368
|
state.emittedToolResult = true;
|
|
264
|
-
state.emittedToolError = state.emittedToolError ||
|
|
265
|
-
state.emittedSuccessfulToolResult = state.emittedSuccessfulToolResult ||
|
|
369
|
+
state.emittedToolError = state.emittedToolError || effectiveToolIsError === true;
|
|
370
|
+
state.emittedSuccessfulToolResult = state.emittedSuccessfulToolResult || effectiveToolIsError !== true;
|
|
266
371
|
state.emittedSuccessfulTaskResult = state.emittedSuccessfulTaskResult || isSuccessfulTaskResult;
|
|
267
372
|
state.emittedPlaceholderTaskResult = state.emittedPlaceholderTaskResult || isPlaceholderTaskResult;
|
|
268
373
|
state.emittedNonTodoToolResult = state.emittedNonTodoToolResult || !isTodoTool;
|
|
269
|
-
state.emittedSuccessfulNonTodoToolResult = state.emittedSuccessfulNonTodoToolResult || (!isTodoTool &&
|
|
270
|
-
state.emittedDelegatedExecutionToolResult = state.emittedDelegatedExecutionToolResult || isDelegatedExecutionTool;
|
|
374
|
+
state.emittedSuccessfulNonTodoToolResult = state.emittedSuccessfulNonTodoToolResult || (!isTodoTool && effectiveToolIsError !== true);
|
|
375
|
+
state.emittedDelegatedExecutionToolResult = state.emittedDelegatedExecutionToolResult || isDelegatedExecutionTool || !!salvagedTaskErrorFindings;
|
|
271
376
|
state.emittedSuccessfulDelegatedExecutionToolResult =
|
|
272
|
-
state.emittedSuccessfulDelegatedExecutionToolResult || (isDelegatedExecutionTool &&
|
|
377
|
+
state.emittedSuccessfulDelegatedExecutionToolResult || (isDelegatedExecutionTool && effectiveToolIsError !== true) || !!salvagedTaskErrorFindings;
|
|
378
|
+
if (salvagedTaskErrorFindings) {
|
|
379
|
+
state.hasFailedTaskDelegation = false;
|
|
380
|
+
}
|
|
273
381
|
chunks.push({
|
|
274
382
|
kind: "tool-result",
|
|
275
383
|
toolName: resolveModelFacingToolName(toolResult.toolName, toolNameMapping, primaryTools),
|
|
276
|
-
output:
|
|
277
|
-
isError:
|
|
384
|
+
output: resolvedToolOutput,
|
|
385
|
+
isError: effectiveToolIsError,
|
|
278
386
|
});
|
|
387
|
+
if (toolResult.toolName === "task") {
|
|
388
|
+
state.lastCompletedTaskDelegationFindings = "";
|
|
389
|
+
}
|
|
279
390
|
}
|
|
280
391
|
const output = allowVisibleContent ? extractTerminalStreamOutput(event) : "";
|
|
281
392
|
if (!allowVisibleContent) {
|
|
282
393
|
const delegatedTerminalOutput = extractTerminalStreamOutput(event);
|
|
283
394
|
if (delegatedTerminalOutput) {
|
|
284
395
|
state.emittedDelegatedTerminalOutput = true;
|
|
396
|
+
recordDelegatedFindings(state, delegatedTerminalOutput, "terminal");
|
|
285
397
|
}
|
|
286
398
|
}
|
|
287
399
|
if (output && !shouldSuppressVisibleToolCallText(output)) {
|
|
@@ -406,7 +406,7 @@ kind: ToolSets
|
|
|
406
406
|
spec:
|
|
407
407
|
- name: web-search
|
|
408
408
|
type: provider
|
|
409
|
-
description:
|
|
409
|
+
description: Use this when the runtime needs current web discovery for research tasks. Do not use this as a substitute for synthesis or comparison.
|
|
410
410
|
providerTool:
|
|
411
411
|
provider: ${options.provider}
|
|
412
412
|
tool: webSearch
|
|
@@ -421,7 +421,7 @@ function renderResearchAgentYaml(options) {
|
|
|
421
421
|
kind: Agent
|
|
422
422
|
metadata:
|
|
423
423
|
name: research
|
|
424
|
-
description:
|
|
424
|
+
description: Use this when a research request needs synthesis, bounded tool use, or specialist delegation. Answer directly when one pass is enough; delegate only when the sub-task boundary is clear.
|
|
425
425
|
spec:
|
|
426
426
|
runtime:
|
|
427
427
|
runtimeMemory: default
|
|
@@ -447,7 +447,7 @@ function renderResearchAnalystYaml(options) {
|
|
|
447
447
|
kind: Agent
|
|
448
448
|
metadata:
|
|
449
449
|
name: research-analyst
|
|
450
|
-
description:
|
|
450
|
+
description: Use this when the task needs source gathering, comparison, and evidence extraction for a bounded research sub-task. Do not use this for final answer ownership.
|
|
451
451
|
spec:
|
|
452
452
|
runtime:
|
|
453
453
|
runtimeMemory: default
|
|
@@ -494,6 +494,12 @@ ${stepTwo}
|
|
|
494
494
|
3. Compare evidence instead of trusting a single source.
|
|
495
495
|
4. Separate verified facts from inference.
|
|
496
496
|
5. End with a concise synthesis, explicit caveats, and source links when available.
|
|
497
|
+
|
|
498
|
+
## Rules
|
|
499
|
+
|
|
500
|
+
- Do not rely on one source when the question requires comparison.
|
|
501
|
+
- Do not present inference as a verified fact.
|
|
502
|
+
- If current information is required, prefer fresh sources over model memory.
|
|
497
503
|
`;
|
|
498
504
|
}
|
|
499
505
|
function renderStarterSkill(name) {
|
|
@@ -9,6 +9,7 @@ import { validateSkillMetadata } from "../runtime/skills/skill-metadata.js";
|
|
|
9
9
|
import { parseEmbeddingModelObject, parseMcpServerObject, parseModelObject, parseToolObject, parseVectorStoreObject, validateEmbeddingModelObject, validateMcpServerObject, validateModelObject, validateToolObject, validateVectorStoreObject, } from "./resource-compilers.js";
|
|
10
10
|
import { validateAgent, validateTopology } from "./validate.js";
|
|
11
11
|
import { compileBinding } from "./agent-binding-compiler.js";
|
|
12
|
+
import { resolveFrameworkContractValidationMode, validateFrameworkContracts, } from "./framework-contract-validation.js";
|
|
12
13
|
import { discoverSubagents, ensureDiscoverySources } from "./support/discovery.js";
|
|
13
14
|
import { collectAgentDiscoverySourceRefs, collectToolSourceRefs } from "./support/source-collectors.js";
|
|
14
15
|
import { getRoutingDefaultAgentId, getRuntimeSources, getRuntimeResources, getRuntimeStorageRoots, getToolModuleDiscoveryConfig, getRoutingRules, resolveRefId, } from "./support/workspace-ref-utils.js";
|
|
@@ -412,6 +413,24 @@ export async function loadWorkspace(workspaceRoot, options = {}) {
|
|
|
412
413
|
externalResourceCount: externalResources.length,
|
|
413
414
|
});
|
|
414
415
|
validateToolNameConflicts(tools);
|
|
416
|
+
const frameworkContractValidation = resolveFrameworkContractValidationMode(options.frameworkContractValidation);
|
|
417
|
+
const contractOwnedRoots = Array.from(new Set([
|
|
418
|
+
workspaceRoot,
|
|
419
|
+
...(localResourceRoot ? [localResourceRoot] : []),
|
|
420
|
+
...resolvedConfiguredResources.map((resource) => resource.root),
|
|
421
|
+
]));
|
|
422
|
+
await traceStartupStage("workspace.validate.frameworkContracts", async () => {
|
|
423
|
+
validateFrameworkContracts({
|
|
424
|
+
agents: loaded.agents,
|
|
425
|
+
tools,
|
|
426
|
+
skillRegistry,
|
|
427
|
+
ownedRoots: contractOwnedRoots,
|
|
428
|
+
mode: frameworkContractValidation,
|
|
429
|
+
});
|
|
430
|
+
}, {
|
|
431
|
+
workspaceRoot,
|
|
432
|
+
mode: frameworkContractValidation,
|
|
433
|
+
});
|
|
415
434
|
const resources = Array.from(new Set([
|
|
416
435
|
...(localResourceRoot ? [localResourceRoot] : []),
|
|
417
436
|
...runtimeSources.tools.filter((source) => isNpmSourceUri(source)),
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { ParsedAgentObject, ParsedToolObject } from "../contracts/types.js";
|
|
2
|
+
export type FrameworkContractValidationMode = "off" | "warn" | "error";
|
|
3
|
+
export declare function resolveFrameworkContractValidationMode(mode: FrameworkContractValidationMode | undefined): FrameworkContractValidationMode;
|
|
4
|
+
export declare function validateFrameworkContracts(input: {
|
|
5
|
+
agents: ParsedAgentObject[];
|
|
6
|
+
tools: Map<string, ParsedToolObject>;
|
|
7
|
+
skillRegistry: Map<string, string>;
|
|
8
|
+
ownedRoots: string[];
|
|
9
|
+
mode?: FrameworkContractValidationMode;
|
|
10
|
+
}): void;
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { validateSkillMetadata } from "../runtime/skills/skill-metadata.js";
|
|
4
|
+
import { getAgentExecutionConfigValue } from "./support/agent-execution-config.js";
|
|
5
|
+
import { resolvePromptValue } from "./support/workspace-ref-utils.js";
|
|
6
|
+
function normalizeMode(mode) {
|
|
7
|
+
if (mode === "warn" || mode === "error") {
|
|
8
|
+
return mode;
|
|
9
|
+
}
|
|
10
|
+
return "off";
|
|
11
|
+
}
|
|
12
|
+
export function resolveFrameworkContractValidationMode(mode) {
|
|
13
|
+
if (mode === "warn" || mode === "error" || mode === "off") {
|
|
14
|
+
return mode;
|
|
15
|
+
}
|
|
16
|
+
const envValue = process.env.AGENT_HARNESS_FRAMEWORK_CONTRACT_VALIDATION?.trim().toLowerCase();
|
|
17
|
+
if (envValue === "warn" || envValue === "error" || envValue === "off") {
|
|
18
|
+
return envValue;
|
|
19
|
+
}
|
|
20
|
+
return "off";
|
|
21
|
+
}
|
|
22
|
+
function isPathWithinRoot(candidate, root) {
|
|
23
|
+
const relative = path.relative(path.resolve(root), path.resolve(candidate));
|
|
24
|
+
return relative === "" || (!relative.startsWith("..") && !path.isAbsolute(relative));
|
|
25
|
+
}
|
|
26
|
+
function isWorkspaceOwnedPath(candidate, roots) {
|
|
27
|
+
return roots.some((root) => isPathWithinRoot(candidate, root));
|
|
28
|
+
}
|
|
29
|
+
function addIssue(issues, code, message) {
|
|
30
|
+
issues.push({ code, message });
|
|
31
|
+
}
|
|
32
|
+
function validateAgentContract(agent, referencedSubagentIds, issues) {
|
|
33
|
+
const description = agent.description.trim();
|
|
34
|
+
const systemPrompt = resolvePromptValue(getAgentExecutionConfigValue(agent, "systemPrompt"), path.dirname(agent.sourcePath));
|
|
35
|
+
const ownsDelegation = agent.subagentRefs.length > 0 || agent.subagentPathRefs.length > 0 || (agent.asyncSubagents?.length ?? 0) > 0;
|
|
36
|
+
const isSubagent = referencedSubagentIds.has(agent.id);
|
|
37
|
+
const hasTools = agent.toolRefs.length > 0
|
|
38
|
+
|| (agent.toolBindings?.length ?? 0) > 0
|
|
39
|
+
|| (agent.inlineTools?.length ?? 0) > 0;
|
|
40
|
+
const responseFormat = getAgentExecutionConfigValue(agent, "responseFormat");
|
|
41
|
+
if (description.length < 24) {
|
|
42
|
+
addIssue(issues, "agent.description.too_short", `Agent ${agent.id} should use a more specific description that explains when it should be used.`);
|
|
43
|
+
}
|
|
44
|
+
if (ownsDelegation) {
|
|
45
|
+
if (!systemPrompt?.trim()) {
|
|
46
|
+
addIssue(issues, "agent.orchestrator.missing_prompt", `Delegating agent ${agent.id} should define a systemPrompt that explains decomposition, delegation, synthesis, and stop conditions.`);
|
|
47
|
+
}
|
|
48
|
+
if (!/(delegate|delegation|subagent|decompose|synthesi|answer directly|parallel)/i.test(description)) {
|
|
49
|
+
addIssue(issues, "agent.orchestrator.description_boundary", `Delegating agent ${agent.id} description should make its delegation boundary explicit, for example when it should answer directly versus delegate.`);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
if (isSubagent) {
|
|
53
|
+
if (!systemPrompt?.trim()) {
|
|
54
|
+
addIssue(issues, "agent.subagent.missing_prompt", `Subagent ${agent.id} should define a systemPrompt that makes its operating boundary and output contract explicit.`);
|
|
55
|
+
}
|
|
56
|
+
if (!/(use this when|when the task|for .*?(analysis|research|search|debug|review|triage|inspection|extraction|comparison|validation|implementation))/i.test(description)) {
|
|
57
|
+
addIssue(issues, "agent.subagent.description_trigger", `Subagent ${agent.id} description should clarify when it should be delegated to and what narrow task class it owns.`);
|
|
58
|
+
}
|
|
59
|
+
if (agent.executionMode === "deepagent" && hasTools && responseFormat === undefined) {
|
|
60
|
+
addIssue(issues, "agent.subagent.deepagent.missing_response_format", `DeepAgents subagent ${agent.id} exposes tools, so it should define config.responseFormat to guarantee a stable task result for its parent agent.`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
function stripFrontmatter(document) {
|
|
65
|
+
return document.replace(/^---\s*\n[\s\S]*?\n---\s*(?:\n|$)/, "");
|
|
66
|
+
}
|
|
67
|
+
function validateSkillContract(skillRoot, issues) {
|
|
68
|
+
const metadata = validateSkillMetadata(skillRoot);
|
|
69
|
+
const document = readFileSync(path.join(skillRoot, "SKILL.md"), "utf8");
|
|
70
|
+
const body = stripFrontmatter(document);
|
|
71
|
+
const skillName = metadata.name || path.basename(skillRoot);
|
|
72
|
+
if (!/(Use this skill when|Use this when)/i.test(body)) {
|
|
73
|
+
addIssue(issues, "skill.missing_trigger", `Skill ${skillName} should explain when it should be used, preferably with a clear "Use this skill when..." trigger.`);
|
|
74
|
+
}
|
|
75
|
+
if (!/(## Workflow|^## Workflow|^\d+\.\s)/m.test(body)) {
|
|
76
|
+
addIssue(issues, "skill.missing_workflow", `Skill ${skillName} should define an explicit workflow instead of only background prose.`);
|
|
77
|
+
}
|
|
78
|
+
if (!/(## Rules|Do not|Output|Caveat|Caveats)/i.test(body)) {
|
|
79
|
+
addIssue(issues, "skill.missing_boundaries", `Skill ${skillName} should include execution boundaries such as rules, non-goals, caveats, or output expectations.`);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
function validateToolContract(tool, issues) {
|
|
83
|
+
const description = tool.description.trim();
|
|
84
|
+
if (description.length < 20) {
|
|
85
|
+
addIssue(issues, "tool.description.too_short", `Tool ${tool.id} should use a more specific description that explains invocation boundaries and argument expectations.`);
|
|
86
|
+
return;
|
|
87
|
+
}
|
|
88
|
+
if (!/(Use this when|Do not use|Before calling)/i.test(description)) {
|
|
89
|
+
addIssue(issues, "tool.description.missing_boundary", `Tool ${tool.id} description should describe when to call it and, ideally, when not to call it or what must be true before calling it.`);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
export function validateFrameworkContracts(input) {
|
|
93
|
+
const mode = normalizeMode(input.mode);
|
|
94
|
+
if (mode === "off") {
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
const issues = [];
|
|
98
|
+
const referencedSubagentIds = new Set(input.agents.flatMap((agent) => agent.subagentRefs.map((ref) => ref.replace(/^agent\//, ""))));
|
|
99
|
+
for (const agent of input.agents) {
|
|
100
|
+
if (!isWorkspaceOwnedPath(agent.sourcePath, input.ownedRoots)) {
|
|
101
|
+
continue;
|
|
102
|
+
}
|
|
103
|
+
validateAgentContract(agent, referencedSubagentIds, issues);
|
|
104
|
+
}
|
|
105
|
+
for (const [skillName, skillRoot] of input.skillRegistry) {
|
|
106
|
+
if (!isWorkspaceOwnedPath(skillRoot, input.ownedRoots)) {
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
validateSkillContract(skillRoot, issues);
|
|
110
|
+
if (!skillName.trim()) {
|
|
111
|
+
addIssue(issues, "skill.name.empty", `Skill ${skillRoot} must define a stable name in frontmatter.`);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
for (const tool of input.tools.values()) {
|
|
115
|
+
if (!isWorkspaceOwnedPath(tool.sourcePath, input.ownedRoots)) {
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
validateToolContract(tool, issues);
|
|
119
|
+
}
|
|
120
|
+
if (issues.length === 0) {
|
|
121
|
+
return;
|
|
122
|
+
}
|
|
123
|
+
const message = [
|
|
124
|
+
"Framework contract validation failed.",
|
|
125
|
+
"The workspace should follow the agent-harness contract-writing guidance for agents, skills, and tools.",
|
|
126
|
+
...issues.map((issue) => `- [${issue.code}] ${issue.message}`),
|
|
127
|
+
].join("\n");
|
|
128
|
+
if (mode === "warn") {
|
|
129
|
+
console.warn(`[agent-harness] ${message}`);
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
throw new Error(message);
|
|
133
|
+
}
|