@botbotgo/agent-harness 0.0.324 → 0.0.326
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/README.zh.md +2 -1
- package/dist/cli/main.js +84 -2
- package/dist/cli/managed-service.d.ts +2 -1
- package/dist/cli/managed-service.js +78 -2
- package/dist/cli/options-serve.d.ts +13 -0
- package/dist/cli/options-serve.js +116 -0
- package/dist/cli/options.d.ts +1 -1
- package/dist/cli/options.js +7 -1
- package/dist/cli/server-commands.d.ts +1 -1
- package/dist/cli/server-commands.js +28 -8
- package/dist/config/knowledge/knowledge-runtime.yaml +3 -2
- package/dist/config/prompts/orchestra-system.md +4 -0
- package/dist/config/runtime/runtime-memory.yaml +3 -2
- package/dist/knowledge/module.js +2 -1
- package/dist/package-version.d.ts +2 -2
- package/dist/package-version.js +2 -2
- package/dist/resource/isolation.js +28 -0
- package/dist/resources/prompts/runtime/autonomous-investigation-recovery.md +1 -1
- package/dist/resources/prompts/runtime/durable-memory-context.md +2 -0
- package/dist/resources/prompts/runtime/execution-with-tool-evidence-retry.md +1 -1
- package/dist/resources/prompts/runtime/write-todos-required-plan.md +1 -0
- package/dist/runtime/adapter/flow/stream-runtime.js +9 -141
- package/dist/runtime/adapter/model/invocation-request.js +1 -1
- package/dist/runtime/adapter/tool/builtin-middleware-tools.js +22 -8
- package/dist/runtime/harness/run/stream-run.js +17 -0
- package/dist/runtime/harness/system/runtime-memory-manager.js +4 -0
- package/dist/runtime/harness/system/runtime-memory-policy.d.ts +14 -0
- package/dist/runtime/harness/system/runtime-memory-policy.js +19 -0
- package/dist/runtime/parsing/output-parsing.d.ts +2 -1
- package/dist/runtime/parsing/output-parsing.js +2 -1
- package/dist/runtime/parsing/output-recovery.d.ts +4 -2
- package/dist/runtime/parsing/output-recovery.js +30 -264
- package/dist/runtime/parsing/stream-event-parsing.js +10 -1
- package/dist/runtime/prompts/runtime-prompts.d.ts +1 -0
- package/dist/runtime/prompts/runtime-prompts.js +1 -0
- package/dist/scaffold/init-project.js +6 -4
- package/dist/tools.js +25 -2
- package/package.json +1 -1
|
@@ -1,14 +1,11 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
1
|
+
import { EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION, INTERNAL_RUNTIME_SPILL_PATH_INSTRUCTION, STRICT_TOOL_JSON_INSTRUCTION, WORKSPACE_RELATIVE_PATH_INSTRUCTION, WRITE_TODOS_DESCRIPTIVE_CONTENT_INSTRUCTION, WRITE_TODOS_FULL_ENTRY_INSTRUCTION, WRITE_TODOS_NON_EMPTY_INITIAL_LIST_INSTRUCTION, } from "../prompts/runtime-prompts.js";
|
|
2
|
+
import { wrapNormalizedMessage, readTextContent } from "./output-content.js";
|
|
3
3
|
function isToolCallParseFailure(error) {
|
|
4
|
-
|
|
5
|
-
return false;
|
|
6
|
-
return /error parsing tool call:/i.test(error.message);
|
|
4
|
+
return error instanceof Error && /error parsing tool call:/i.test(error.message);
|
|
7
5
|
}
|
|
8
6
|
function isStructuredValidationIssue(value) {
|
|
9
|
-
if (typeof value !== "object" || !value || Array.isArray(value))
|
|
7
|
+
if (typeof value !== "object" || !value || Array.isArray(value))
|
|
10
8
|
return false;
|
|
11
|
-
}
|
|
12
9
|
const typed = value;
|
|
13
10
|
return typeof typed.code === "string" && Array.isArray(typed.path) && (typed.message === undefined || typeof typed.message === "string");
|
|
14
11
|
}
|
|
@@ -31,18 +28,16 @@ export function isToolCallValidationFailure(error) {
|
|
|
31
28
|
return /Received tool input did not match expected schema/i.test(message) && /(?:→\s*at|at)\s+[\w[\].]+/i.test(message);
|
|
32
29
|
}
|
|
33
30
|
function collectErrorMessages(error) {
|
|
34
|
-
if (!error)
|
|
31
|
+
if (!error)
|
|
35
32
|
return [];
|
|
36
|
-
}
|
|
37
33
|
if (error instanceof Error) {
|
|
38
34
|
const nested = Array.isArray(error.errors)
|
|
39
35
|
? (error.errors ?? []).flatMap((item) => collectErrorMessages(item))
|
|
40
36
|
: [];
|
|
41
37
|
return [error.message, ...nested].filter((value) => typeof value === "string" && value.trim().length > 0);
|
|
42
38
|
}
|
|
43
|
-
if (typeof error === "string" && error.trim().length > 0)
|
|
39
|
+
if (typeof error === "string" && error.trim().length > 0)
|
|
44
40
|
return [error];
|
|
45
|
-
}
|
|
46
41
|
if (typeof error === "object") {
|
|
47
42
|
const record = error;
|
|
48
43
|
return [record.message, record.error, record.details].flatMap((item) => collectErrorMessages(item));
|
|
@@ -56,28 +51,13 @@ export function isInternalRuntimeSpillPathFailure(error) {
|
|
|
56
51
|
return collectErrorMessages(error).some((message) => /\/large_tool_results\/|internal runtime spill path/i.test(message));
|
|
57
52
|
}
|
|
58
53
|
export function isRepairableWriteTodosPlaceholderFailure(error) {
|
|
59
|
-
|
|
60
|
-
return false;
|
|
61
|
-
const message = error.message.trim();
|
|
62
|
-
if (!message)
|
|
63
|
-
return false;
|
|
64
|
-
return /write_todos/i.test(message) && /descriptive task content|placeholder entries/i.test(message);
|
|
54
|
+
return error instanceof Error && /write_todos/i.test(error.message) && /descriptive task content|placeholder entries/i.test(error.message);
|
|
65
55
|
}
|
|
66
56
|
export function isRepairableWriteTodosContentFailure(error) {
|
|
67
|
-
|
|
68
|
-
return false;
|
|
69
|
-
const message = error.message.trim();
|
|
70
|
-
if (!message)
|
|
71
|
-
return false;
|
|
72
|
-
return /write_todos/i.test(message) && /todos\[\d+\]\.content/i.test(message);
|
|
57
|
+
return error instanceof Error && /write_todos/i.test(error.message) && /todos\[\d+\]\.content/i.test(error.message);
|
|
73
58
|
}
|
|
74
59
|
export function isRepairableWriteTodosEmptyFailure(error) {
|
|
75
|
-
|
|
76
|
-
return false;
|
|
77
|
-
const message = error.message.trim();
|
|
78
|
-
if (!message)
|
|
79
|
-
return false;
|
|
80
|
-
return /write_todos/i.test(message) && /Initial write_todos call cannot use an empty todo list/i.test(message);
|
|
60
|
+
return error instanceof Error && /write_todos/i.test(error.message) && /Initial write_todos call cannot use an empty todo list/i.test(error.message);
|
|
81
61
|
}
|
|
82
62
|
export function isToolCallRecoveryFailure(error) {
|
|
83
63
|
return (isToolCallParseFailure(error) ||
|
|
@@ -87,237 +67,39 @@ export function isToolCallRecoveryFailure(error) {
|
|
|
87
67
|
isInternalRuntimeSpillPathFailure(error) ||
|
|
88
68
|
isWorkspacePathScopeFailure(error));
|
|
89
69
|
}
|
|
90
|
-
function extractMessageContent(message) {
|
|
91
|
-
if (typeof message !== "object" || !message)
|
|
92
|
-
return "";
|
|
93
|
-
const typed = message;
|
|
94
|
-
if (typed.content !== undefined)
|
|
95
|
-
return readTextContent(typed.content);
|
|
96
|
-
if (typeof typed.kwargs === "object" && typed.kwargs) {
|
|
97
|
-
return readTextContent(typed.kwargs.content);
|
|
98
|
-
}
|
|
99
|
-
return "";
|
|
100
|
-
}
|
|
101
|
-
function extractLastUserMessageText(input) {
|
|
102
|
-
if (!Array.isArray(input) && !(typeof input === "object" && input && Array.isArray(input.messages))) {
|
|
103
|
-
return "";
|
|
104
|
-
}
|
|
105
|
-
const messages = Array.isArray(input) ? input : input.messages;
|
|
106
|
-
for (let index = messages.length - 1; index >= 0; index -= 1) {
|
|
107
|
-
const message = messages[index];
|
|
108
|
-
if (typeof message !== "object" || !message)
|
|
109
|
-
continue;
|
|
110
|
-
const typed = message;
|
|
111
|
-
if (typed.role === "user") {
|
|
112
|
-
return extractMessageContent(message).trim();
|
|
113
|
-
}
|
|
114
|
-
const ids = Array.isArray(typed.id) ? typed.id.filter((item) => typeof item === "string") : [];
|
|
115
|
-
const typeName = ids.at(-1);
|
|
116
|
-
const runtimeType = typeof message._getType === "function"
|
|
117
|
-
? message._getType()
|
|
118
|
-
: typeof message.getType === "function"
|
|
119
|
-
? message.getType()
|
|
120
|
-
: undefined;
|
|
121
|
-
if (typeName === "HumanMessage" || runtimeType === "human") {
|
|
122
|
-
return extractMessageContent(message).trim();
|
|
123
|
-
}
|
|
124
|
-
if (typeof typed.kwargs === "object" && typed.kwargs && typed.kwargs.role === "user") {
|
|
125
|
-
return extractMessageContent(message).trim();
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
return "";
|
|
129
|
-
}
|
|
130
|
-
function extractAllUserMessageText(input) {
|
|
131
|
-
if (!Array.isArray(input) && !(typeof input === "object" && input && Array.isArray(input.messages))) {
|
|
132
|
-
return "";
|
|
133
|
-
}
|
|
134
|
-
const messages = Array.isArray(input) ? input : input.messages;
|
|
135
|
-
const values = [];
|
|
136
|
-
for (const message of messages) {
|
|
137
|
-
if (typeof message !== "object" || !message)
|
|
138
|
-
continue;
|
|
139
|
-
const typed = message;
|
|
140
|
-
if (typed.role === "user") {
|
|
141
|
-
const text = extractMessageContent(message).trim();
|
|
142
|
-
if (text)
|
|
143
|
-
values.push(text);
|
|
144
|
-
continue;
|
|
145
|
-
}
|
|
146
|
-
const ids = Array.isArray(typed.id) ? typed.id.filter((item) => typeof item === "string") : [];
|
|
147
|
-
const typeName = ids.at(-1);
|
|
148
|
-
const runtimeType = typeof message._getType === "function"
|
|
149
|
-
? message._getType()
|
|
150
|
-
: typeof message.getType === "function"
|
|
151
|
-
? message.getType()
|
|
152
|
-
: undefined;
|
|
153
|
-
if (typeName === "HumanMessage" || runtimeType === "human") {
|
|
154
|
-
const text = extractMessageContent(message).trim();
|
|
155
|
-
if (text)
|
|
156
|
-
values.push(text);
|
|
157
|
-
continue;
|
|
158
|
-
}
|
|
159
|
-
if (typeof typed.kwargs === "object" && typed.kwargs && typed.kwargs.role === "user") {
|
|
160
|
-
const text = extractMessageContent(message).trim();
|
|
161
|
-
if (text)
|
|
162
|
-
values.push(text);
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
return values.join("\n").trim();
|
|
166
|
-
}
|
|
167
|
-
function hasToolResultEvidence(input) {
|
|
168
|
-
if (!Array.isArray(input) && !(typeof input === "object" && input && Array.isArray(input.messages))) {
|
|
169
|
-
return false;
|
|
170
|
-
}
|
|
171
|
-
const messages = Array.isArray(input) ? input : input.messages;
|
|
172
|
-
for (const message of messages) {
|
|
173
|
-
if (typeof message !== "object" || !message)
|
|
174
|
-
continue;
|
|
175
|
-
const typed = message;
|
|
176
|
-
if (typed.role === "tool") {
|
|
177
|
-
return true;
|
|
178
|
-
}
|
|
179
|
-
const ids = Array.isArray(typed.id) ? typed.id.filter((item) => typeof item === "string") : [];
|
|
180
|
-
const typeName = ids.at(-1);
|
|
181
|
-
const runtimeType = typeof message._getType === "function"
|
|
182
|
-
? message._getType()
|
|
183
|
-
: typeof message.getType === "function"
|
|
184
|
-
? message.getType()
|
|
185
|
-
: undefined;
|
|
186
|
-
if (typeName === "ToolMessage" || runtimeType === "tool") {
|
|
187
|
-
return true;
|
|
188
|
-
}
|
|
189
|
-
if (typeof typed.kwargs === "object" && typed.kwargs && typed.kwargs.role === "tool") {
|
|
190
|
-
return true;
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
return false;
|
|
194
|
-
}
|
|
195
|
-
function isToolRequiredExecutionPrompt(text) {
|
|
196
|
-
if (!text)
|
|
197
|
-
return false;
|
|
198
|
-
return (/write_todos/i.test(text) ||
|
|
199
|
-
/\b(create|write|edit|read|show|display|append|run|execute|wait|sleep|repeat|loop|file|files|command)\b/i.test(text) ||
|
|
200
|
-
/(创建|写入|读取|显示|追加|执行|运行|等待|重复|文件|命令|步骤)/.test(text));
|
|
201
|
-
}
|
|
202
|
-
function isAutonomousInvestigationPrompt(text) {
|
|
203
|
-
if (!text)
|
|
204
|
-
return false;
|
|
205
|
-
return (/\b(root cause|rca|deep investigation|investigate deeply|investigate fully|step by step|find all (?:issues|problems|causes)|continue investigating|keep digging|until complete|until you find)\b/i.test(text) ||
|
|
206
|
-
/(根因分析|深度调查|深入调查|一步一步|逐步分析|逐个分析|继续排查|排查到底|找到所有问题|查出所有问题|直到完成|直到找到.*原因)/.test(text));
|
|
207
|
-
}
|
|
208
|
-
function isBrowserOrUrlTask(text) {
|
|
209
|
-
if (!text)
|
|
210
|
-
return false;
|
|
211
|
-
return (/https?:\/\/\S+/i.test(text) ||
|
|
212
|
-
/\b(url|urls|website|websites|web page|webpage|browser|browse|page summary|summari[sz]e.*page|open.*page)\b/i.test(text) ||
|
|
213
|
-
/(网址|链接|网页|网站|页面|浏览器|总结.*网页|总结.*页面|打开.*网页)/.test(text));
|
|
214
|
-
}
|
|
215
|
-
function claimsExecutionWithoutToolEvidence(text) {
|
|
216
|
-
if (!text)
|
|
217
|
-
return false;
|
|
218
|
-
return (/\b(completed|done|created|wrote|written|read|displayed|appended|executed|waited|result|results)\b/i.test(text) ||
|
|
219
|
-
/(已创建|已写入|已读取|已显示|已追加|已执行|已等待|执行结果|状态更新|步骤\s*\d+|文件内容)/.test(text));
|
|
220
|
-
}
|
|
221
|
-
function claimsMissingBrowserCapability(text) {
|
|
222
|
-
if (!text)
|
|
223
|
-
return false;
|
|
224
|
-
return (/\b(i do not have the capability to access external websites|i do not have access to external websites|i cannot access external websites|i can(?:not|'t) browse|i do not have (?:direct )?(?:web|browser|browsing) access|i do not have .*tool.*(?:website|url|browser)|i cannot summarize .* because .*tool.*failed|please provide (?:the )?text content.*i (?:will|can) summarize)\b/i.test(text) ||
|
|
225
|
-
/(我没有直接访问外部网站|我无法直接访问外部网站|我无法访问外部网站|我不能访问外部网站|我无法直接访问外部网站内容|我无法访问网页|请提供文本内容.*我将为您进行总结|我没有.*浏览.*工具|我没有.*网页抓取.*工具|我没有.*访问.*网站.*工具|我无法总结该网页内容,因为.*工具执行失败)/.test(text));
|
|
226
|
-
}
|
|
227
|
-
function asksUserToChooseObviousNextDiagnosticStep(text) {
|
|
228
|
-
if (!text)
|
|
229
|
-
return false;
|
|
230
|
-
return (/\b(which (?:direction|aspect|node|namespace|part)|what would you like me to check|please choose|please tell me (?:which|what|where) .* (?:check|inspect|investigate)|which .* should we start with)\b/i.test(text) ||
|
|
231
|
-
/(请告诉我.*(哪个|哪一个|什么方向)|请选择.*(方向|项目|节点)|您希望我.*(检查|查看).*(哪个|哪一个|方向)|从哪个方向开始)/.test(text));
|
|
232
|
-
}
|
|
233
|
-
function claimsFutureExecutionWithoutToolEvidence(text) {
|
|
234
|
-
if (!text)
|
|
235
|
-
return false;
|
|
236
|
-
return (/\b(i will|i'll|we will|next i(?:'ll| will)|i am going to)\b.*\b(run|execute|inspect|check|investigate|continue|start)\b/i.test(text) ||
|
|
237
|
-
/(我将|我会|接下来我会|现在我将|我将继续).*(执行|检查|查看|调查|排查|开始|继续)/.test(text));
|
|
238
|
-
}
|
|
239
70
|
export function isRetrySafeInvalidToolSelectionError(value) {
|
|
240
71
|
const text = readTextContent(value).trim();
|
|
241
|
-
|
|
242
|
-
return false;
|
|
243
|
-
return /is not a valid tool, try one of \[/i.test(text);
|
|
72
|
+
return !!text && /is not a valid tool, try one of \[/i.test(text);
|
|
244
73
|
}
|
|
245
74
|
export function shouldValidateExecutionWithoutToolEvidence(request) {
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
return browserOrUrlTask || isToolRequiredExecutionPrompt(userText) || isAutonomousInvestigationPrompt(userText);
|
|
249
|
-
}
|
|
250
|
-
function extractAssistantTextFromResult(result) {
|
|
251
|
-
if (typeof result === "object" && result && "messages" in result) {
|
|
252
|
-
const messages = result.messages;
|
|
253
|
-
if (Array.isArray(messages)) {
|
|
254
|
-
for (let index = messages.length - 1; index >= 0; index -= 1) {
|
|
255
|
-
const message = messages[index];
|
|
256
|
-
if (typeof message !== "object" || !message)
|
|
257
|
-
continue;
|
|
258
|
-
const ids = Array.isArray(message.id)
|
|
259
|
-
? message.id.filter((item) => typeof item === "string")
|
|
260
|
-
: [];
|
|
261
|
-
const typeName = ids.at(-1);
|
|
262
|
-
const runtimeType = typeof message._getType === "function"
|
|
263
|
-
? message._getType()
|
|
264
|
-
: typeof message.getType === "function"
|
|
265
|
-
? message.getType()
|
|
266
|
-
: undefined;
|
|
267
|
-
if (typeName === "AIMessage" || runtimeType === "ai") {
|
|
268
|
-
return extractMessageContent(message).trim();
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
}
|
|
272
|
-
}
|
|
273
|
-
return "";
|
|
75
|
+
void request;
|
|
76
|
+
return false;
|
|
274
77
|
}
|
|
275
78
|
export function resolveExecutionWithoutToolEvidenceInstruction(request, result) {
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
return resolveExecutionWithoutToolEvidenceTextInstruction(request, assistantText, hasToolCalls(result));
|
|
79
|
+
void request;
|
|
80
|
+
void result;
|
|
81
|
+
return null;
|
|
280
82
|
}
|
|
281
|
-
export function resolveExecutionWithoutToolEvidenceTextInstruction(request, assistantText, toolCallEvidence = false) {
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
return null;
|
|
288
|
-
}
|
|
289
|
-
if (!assistantText || toolCallEvidence) {
|
|
290
|
-
return null;
|
|
291
|
-
}
|
|
292
|
-
if (autonomousInvestigationTask &&
|
|
293
|
-
toolResultEvidence &&
|
|
294
|
-
(asksUserToChooseObviousNextDiagnosticStep(assistantText) || claimsFutureExecutionWithoutToolEvidence(assistantText))) {
|
|
295
|
-
return AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION;
|
|
296
|
-
}
|
|
297
|
-
if (browserOrUrlTask && claimsMissingBrowserCapability(assistantText)) {
|
|
298
|
-
return BROWSER_CAPABILITY_DISCLAIMER_RECOVERY_INSTRUCTION;
|
|
299
|
-
}
|
|
300
|
-
return claimsExecutionWithoutToolEvidence(assistantText) ? EXECUTION_WITH_TOOL_EVIDENCE_INSTRUCTION : null;
|
|
83
|
+
export function resolveExecutionWithoutToolEvidenceTextInstruction(request, assistantText, toolCallEvidence = false, resultEvidence = {}) {
|
|
84
|
+
void request;
|
|
85
|
+
void assistantText;
|
|
86
|
+
void toolCallEvidence;
|
|
87
|
+
void resultEvidence;
|
|
88
|
+
return null;
|
|
301
89
|
}
|
|
302
90
|
export function resolveToolCallRecoveryInstruction(error) {
|
|
303
|
-
if (isRepairableWriteTodosEmptyFailure(error))
|
|
91
|
+
if (isRepairableWriteTodosEmptyFailure(error))
|
|
304
92
|
return WRITE_TODOS_NON_EMPTY_INITIAL_LIST_INSTRUCTION;
|
|
305
|
-
|
|
306
|
-
if (isRepairableWriteTodosContentFailure(error)) {
|
|
93
|
+
if (isRepairableWriteTodosContentFailure(error))
|
|
307
94
|
return WRITE_TODOS_FULL_ENTRY_INSTRUCTION;
|
|
308
|
-
|
|
309
|
-
if (isRepairableWriteTodosPlaceholderFailure(error)) {
|
|
95
|
+
if (isRepairableWriteTodosPlaceholderFailure(error))
|
|
310
96
|
return WRITE_TODOS_DESCRIPTIVE_CONTENT_INSTRUCTION;
|
|
311
|
-
|
|
312
|
-
if (isInternalRuntimeSpillPathFailure(error)) {
|
|
97
|
+
if (isInternalRuntimeSpillPathFailure(error))
|
|
313
98
|
return INTERNAL_RUNTIME_SPILL_PATH_INSTRUCTION;
|
|
314
|
-
|
|
315
|
-
if (isWorkspacePathScopeFailure(error)) {
|
|
99
|
+
if (isWorkspacePathScopeFailure(error))
|
|
316
100
|
return WORKSPACE_RELATIVE_PATH_INSTRUCTION;
|
|
317
|
-
|
|
318
|
-
if (isToolCallRecoveryFailure(error)) {
|
|
101
|
+
if (isToolCallRecoveryFailure(error))
|
|
319
102
|
return STRICT_TOOL_JSON_INSTRUCTION;
|
|
320
|
-
}
|
|
321
103
|
return null;
|
|
322
104
|
}
|
|
323
105
|
export function appendToolRecoveryInstruction(input, instruction) {
|
|
@@ -344,30 +126,15 @@ export function wrapResolvedModel(value) {
|
|
|
344
126
|
let activeArgs = [...args];
|
|
345
127
|
for (let attempt = 0; attempt < 3; attempt += 1) {
|
|
346
128
|
try {
|
|
347
|
-
|
|
348
|
-
const executionRecoveryInstruction = resolveExecutionWithoutToolEvidenceInstruction(activeArgs[0], normalized);
|
|
349
|
-
if (!executionRecoveryInstruction) {
|
|
350
|
-
return normalized;
|
|
351
|
-
}
|
|
352
|
-
if (attempt === 2) {
|
|
353
|
-
throw new Error("Model claimed task execution without any tool calls after repeated recovery attempts. Refusing to treat that response as a successful execution.");
|
|
354
|
-
}
|
|
355
|
-
const nextInstruction = attempt === 0
|
|
356
|
-
? executionRecoveryInstruction
|
|
357
|
-
: EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION;
|
|
358
|
-
activeArgs = [...activeArgs];
|
|
359
|
-
activeArgs[0] = appendToolRecoveryInstruction(activeArgs[0], nextInstruction);
|
|
129
|
+
return wrapNormalizedMessage(await member.apply(currentTarget, activeArgs));
|
|
360
130
|
}
|
|
361
131
|
catch (error) {
|
|
362
132
|
const recoveryInstruction = resolveToolCallRecoveryInstruction(error);
|
|
363
|
-
if (!recoveryInstruction) {
|
|
364
|
-
throw error;
|
|
365
|
-
}
|
|
366
|
-
if (attempt === 2) {
|
|
133
|
+
if (!recoveryInstruction || attempt === 2) {
|
|
367
134
|
throw error;
|
|
368
135
|
}
|
|
369
136
|
activeArgs = [...activeArgs];
|
|
370
|
-
activeArgs[0] = appendToolRecoveryInstruction(activeArgs[0], recoveryInstruction);
|
|
137
|
+
activeArgs[0] = appendToolRecoveryInstruction(activeArgs[0], attempt === 0 ? recoveryInstruction : EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION);
|
|
371
138
|
}
|
|
372
139
|
}
|
|
373
140
|
throw new Error("Model invocation retry loop exited unexpectedly.");
|
|
@@ -382,4 +149,3 @@ export function wrapResolvedModel(value) {
|
|
|
382
149
|
},
|
|
383
150
|
});
|
|
384
151
|
}
|
|
385
|
-
export { AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION, BROWSER_CAPABILITY_DISCLAIMER_RECOVERY_INSTRUCTION, EXECUTION_WITH_TOOL_EVIDENCE_INSTRUCTION, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION, INTERNAL_RUNTIME_SPILL_PATH_INSTRUCTION, INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION, STRICT_TOOL_JSON_INSTRUCTION, WORKSPACE_RELATIVE_PATH_INSTRUCTION, WRITE_TODOS_DESCRIPTIVE_CONTENT_INSTRUCTION, WRITE_TODOS_FULL_ENTRY_INSTRUCTION, WRITE_TODOS_NON_EMPTY_INITIAL_LIST_INSTRUCTION } from "../prompts/runtime-prompts.js";
|
|
@@ -196,7 +196,16 @@ function isErrorLikeToolOutput(value) {
|
|
|
196
196
|
if (!message) {
|
|
197
197
|
return false;
|
|
198
198
|
}
|
|
199
|
-
|
|
199
|
+
const firstNonEmptyLine = message
|
|
200
|
+
.split("\n")
|
|
201
|
+
.map((line) => line.trim())
|
|
202
|
+
.find((line) => line.length > 0) ?? "";
|
|
203
|
+
if (!firstNonEmptyLine) {
|
|
204
|
+
return false;
|
|
205
|
+
}
|
|
206
|
+
return /^(error|exception|failed|failure|denied|timed out|timeout|not permitted|eperm|eacces)\b:?/i.test(firstNonEmptyLine)
|
|
207
|
+
|| /^command failed:/i.test(firstNonEmptyLine)
|
|
208
|
+
|| /^stderr:/i.test(firstNonEmptyLine);
|
|
200
209
|
}
|
|
201
210
|
function isEmptyInitialWriteTodosResult(value) {
|
|
202
211
|
if (typeof value !== "object" || !value || Array.isArray(value)) {
|
|
@@ -7,6 +7,7 @@ export declare const INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION: string;
|
|
|
7
7
|
export declare const WRITE_TODOS_FULL_ENTRY_INSTRUCTION: string;
|
|
8
8
|
export declare const WRITE_TODOS_NON_EMPTY_INITIAL_LIST_INSTRUCTION: string;
|
|
9
9
|
export declare const WRITE_TODOS_DESCRIPTIVE_CONTENT_INSTRUCTION: string;
|
|
10
|
+
export declare const WRITE_TODOS_REQUIRED_PLAN_INSTRUCTION: string;
|
|
10
11
|
export declare const EXECUTION_WITH_TOOL_EVIDENCE_INSTRUCTION: string;
|
|
11
12
|
export declare const EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION: string;
|
|
12
13
|
export declare const AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION: string;
|
|
@@ -10,6 +10,7 @@ export const INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION = readRuntimePrompt("in
|
|
|
10
10
|
export const WRITE_TODOS_FULL_ENTRY_INSTRUCTION = readRuntimePrompt("write-todos-full-entry");
|
|
11
11
|
export const WRITE_TODOS_NON_EMPTY_INITIAL_LIST_INSTRUCTION = readRuntimePrompt("write-todos-non-empty-initial-list");
|
|
12
12
|
export const WRITE_TODOS_DESCRIPTIVE_CONTENT_INSTRUCTION = readRuntimePrompt("write-todos-descriptive-content");
|
|
13
|
+
export const WRITE_TODOS_REQUIRED_PLAN_INSTRUCTION = readRuntimePrompt("write-todos-required-plan");
|
|
13
14
|
export const EXECUTION_WITH_TOOL_EVIDENCE_INSTRUCTION = readRuntimePrompt("execution-with-tool-evidence");
|
|
14
15
|
export const EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION = readRuntimePrompt("execution-with-tool-evidence-retry");
|
|
15
16
|
export const AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION = readRuntimePrompt("autonomous-investigation-recovery");
|
|
@@ -205,7 +205,8 @@ spec:
|
|
|
205
205
|
Rules:
|
|
206
206
|
- Store only durable reusable knowledge. Reject transient chatter, scratchpad, or duplication without added value.
|
|
207
207
|
- Reject raw request/session summaries, source-specific page/news recaps, and generic "we learned how to use the tools/workflow" reflections unless they clearly contain reusable preferences, facts, decisions, or procedures.
|
|
208
|
-
-
|
|
208
|
+
- Never store assistant- or system-authored workflow instructions inferred from a single transcript reflection, such as "how the system should investigate", "what the assistant should ask first", or other generic control-flow recipes for future turns.
|
|
209
|
+
- If transcript evidence shows the user explicitly asked the system to remember or follow a future instruction and the assistant confirmed that intent, store the durable user instruction instead of rejecting it as a generic summary.
|
|
209
210
|
- Treat durable knowledge as generic mutable records with database-like operations over the same underlying knowledge item.
|
|
210
211
|
- One candidate may yield zero, one, or multiple durable knowledge items. Split it only when the input clearly contains multiple independently mutable knowledge points.
|
|
211
212
|
- When storing a knowledge item, always return a \`knowledgeMutation\` object with a stable \`identity\` and an \`operation\` of \`create\`, \`update\`, or \`delete\`.
|
|
@@ -214,7 +215,7 @@ spec:
|
|
|
214
215
|
- If an existing relevant record already represents the same underlying knowledge item, reuse that record's \`knowledge_identity\` instead of inventing a new one.
|
|
215
216
|
- Do not invent a second identity just because the new statement negates, revokes, deletes, or replaces the old wording. That is usually the same knowledge item with a different mutation operation.
|
|
216
217
|
- The stored \`content\` must be canonical knowledge text, not an assistant acknowledgement such as "已记住" or "I will remember".
|
|
217
|
-
- You may optionally include \`operationalRule\` when the knowledge is
|
|
218
|
+
- You may optionally include \`operationalRule\` when the knowledge is a durable user-approved rule, instruction, or recurring procedure. Do not use it for assistant-only workflow guidance inferred from one conversation. Treat it as structured metadata, not as the primary identity mechanism.
|
|
218
219
|
- Prefer semantic/episodic/procedural kinds only.
|
|
219
220
|
- Prefer scopes session/agent/workspace/user/project only.
|
|
220
221
|
- If the candidate should not be stored, return {"store": false, "reason": "..."}
|
|
@@ -283,7 +284,8 @@ spec:
|
|
|
283
284
|
Rules:
|
|
284
285
|
- Store only durable reusable knowledge. Reject transient chatter, scratchpad, or duplication without added value.
|
|
285
286
|
- Reject raw request/session summaries, source-specific page/news recaps, and generic "we learned how to use the tools/workflow" reflections unless they clearly contain reusable preferences, facts, decisions, or procedures.
|
|
286
|
-
-
|
|
287
|
+
- Never store assistant- or system-authored workflow instructions inferred from a single transcript reflection, such as "how the system should investigate", "what the assistant should ask first", or other generic control-flow recipes for future turns.
|
|
288
|
+
- If transcript evidence shows the user explicitly asked the system to remember or follow a future instruction and the assistant confirmed that intent, store the durable user instruction instead of rejecting it as a generic summary.
|
|
287
289
|
- Treat durable knowledge as generic mutable records with database-like operations over the same underlying knowledge item.
|
|
288
290
|
- One candidate may yield zero, one, or multiple durable knowledge items. Split it only when the input clearly contains multiple independently mutable knowledge points.
|
|
289
291
|
- When storing a knowledge item, always return a \`knowledgeMutation\` object with a stable \`identity\` and an \`operation\` of \`create\`, \`update\`, or \`delete\`.
|
|
@@ -292,7 +294,7 @@ spec:
|
|
|
292
294
|
- If an existing relevant record already represents the same underlying knowledge item, reuse that record's \`knowledge_identity\` instead of inventing a new one.
|
|
293
295
|
- Do not invent a second identity just because the new statement negates, revokes, deletes, or replaces the old wording. That is usually the same knowledge item with a different mutation operation.
|
|
294
296
|
- The stored \`content\` must be canonical knowledge text, not an assistant acknowledgement such as "已记住" or "I will remember".
|
|
295
|
-
- You may optionally include \`operationalRule\` when the knowledge is
|
|
297
|
+
- You may optionally include \`operationalRule\` when the knowledge is a durable user-approved rule, instruction, or recurring procedure. Do not use it for assistant-only workflow guidance inferred from one conversation. Treat it as structured metadata, not as the primary identity mechanism.
|
|
296
298
|
- Prefer semantic/episodic/procedural kinds only.
|
|
297
299
|
- Prefer scopes session/agent/workspace/user/project only.
|
|
298
300
|
- If the candidate should not be stored, return {"store": false, "reason": "..."}
|
package/dist/tools.js
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
import { z } from "zod";
|
|
2
1
|
export const TOOL_DEFINITION_MARKER = "__agent_harness_tool_definition__";
|
|
3
2
|
function isZodSchema(value) {
|
|
4
3
|
return typeof value === "object" && value !== null && typeof value.parse === "function";
|
|
5
4
|
}
|
|
5
|
+
function isParseableSchema(value) {
|
|
6
|
+
return typeof value === "object" && value !== null && typeof value.parse === "function";
|
|
7
|
+
}
|
|
6
8
|
function getZodLikeTypeName(value) {
|
|
7
9
|
if (typeof value !== "object" || value === null) {
|
|
8
10
|
return undefined;
|
|
@@ -113,7 +115,28 @@ export function normalizeToolSchema(schema) {
|
|
|
113
115
|
if (isZodSchema(schema)) {
|
|
114
116
|
return schema;
|
|
115
117
|
}
|
|
116
|
-
|
|
118
|
+
const rawShape = schema;
|
|
119
|
+
return {
|
|
120
|
+
shape: rawShape,
|
|
121
|
+
parse(input) {
|
|
122
|
+
const source = typeof input === "object" && input !== null && !Array.isArray(input)
|
|
123
|
+
? input
|
|
124
|
+
: {};
|
|
125
|
+
const parsed = {};
|
|
126
|
+
for (const [key, validator] of Object.entries(rawShape)) {
|
|
127
|
+
if (!isParseableSchema(validator)) {
|
|
128
|
+
throw new TypeError(`Tool schema field '${key}' is not parseable.`);
|
|
129
|
+
}
|
|
130
|
+
const hasValue = Object.prototype.hasOwnProperty.call(source, key);
|
|
131
|
+
const value = hasValue ? source[key] : undefined;
|
|
132
|
+
const parsedValue = validator.parse(value);
|
|
133
|
+
if (parsedValue !== undefined) {
|
|
134
|
+
parsed[key] = parsedValue;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
return parsed;
|
|
138
|
+
},
|
|
139
|
+
};
|
|
117
140
|
}
|
|
118
141
|
export function tool(definition) {
|
|
119
142
|
return {
|