open-agents-ai 0.187.477 → 0.187.479
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +15 -33
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
- package/prompts/agentic/system-large.md +7 -7
- package/prompts/agentic/system-medium.md +8 -13
package/dist/index.js
CHANGED
|
@@ -512194,17 +512194,18 @@ function buildStagnationDiagnostic(signals) {
|
|
|
512194
512194
|
``,
|
|
512195
512195
|
`2. STATE A HYPOTHESIS in writing — what specifically is wrong? "I think X is failing because Y." Be concrete. Do NOT propose a fix yet.`,
|
|
512196
512196
|
``,
|
|
512197
|
-
`3. VERIFY ONE ASSUMPTION — pick the ONE thing you most BELIEVE to be true and test it with the smallest possible command:`,
|
|
512198
|
-
` •
|
|
512199
|
-
` •
|
|
512200
|
-
` •
|
|
512201
|
-
` •
|
|
512197
|
+
`3. VERIFY ONE ASSUMPTION — pick the ONE thing you most BELIEVE to be true and test it with the smallest possible command native to whatever ecosystem you're in. Examples of the *shape* (not the exact commands):`,
|
|
512198
|
+
` • Is this artifact present on disk? (one read of the path)`,
|
|
512199
|
+
` • Does this import / reference resolve? (read 5 lines around it)`,
|
|
512200
|
+
` • Is this environment value set? (one query)`,
|
|
512201
|
+
` • Is this binary on PATH? (one which/where)`,
|
|
512202
|
+
` • Don't know what an error means? web_search("<exact error string>")`,
|
|
512202
512203
|
``,
|
|
512203
|
-
`4. CHECK SILENT FAILURES —
|
|
512204
|
+
`4. CHECK SILENT FAILURES — package managers and build systems frequently report "success" while silently dropping artifacts you needed. Don't trust summary output ("added N", "build complete") without verifying the SPECIFIC artifact exists.`,
|
|
512204
512205
|
``,
|
|
512205
512206
|
`DO NOT in your next response:`,
|
|
512206
512207
|
` • Try another version, flag, or variant of any command in the list above`,
|
|
512207
|
-
` • Wipe
|
|
512208
|
+
` • Wipe caches / re-install / re-build — that hides the original error`,
|
|
512208
512209
|
` • Call task_complete — being stuck on a debug problem is NEVER grounds for task_complete`,
|
|
512209
512210
|
``,
|
|
512210
512211
|
`task_complete is ONLY for actual completion or unrecoverable hardware/permission errors. You are stuck on a fixable problem; diagnose it.`
|
|
@@ -518337,10 +518338,8 @@ function detectTaskMode(task) {
|
|
|
518337
518338
|
return true;
|
|
518338
518339
|
if (/(\/[\w.-]+){2,}/.test(task.slice(0, 2e3)))
|
|
518339
518340
|
return true;
|
|
518340
|
-
if (/\b(implement|build|create|refactor|
|
|
518341
|
-
|
|
518342
|
-
return true;
|
|
518343
|
-
}
|
|
518341
|
+
if (/\b(implement|build|create|refactor|rewrite|fix|migrate|deploy|generate|setup|set up|develop|design|integrate|configure|install|debug|port|extend|add)\b/.test(head)) {
|
|
518342
|
+
return true;
|
|
518344
518343
|
}
|
|
518345
518344
|
return false;
|
|
518346
518345
|
}
|
|
@@ -518353,23 +518352,11 @@ function slimSystemPromptForTaskMode(prompt) {
|
|
|
518353
518352
|
/^##\s*Self-Awareness( & Introspection)?\s*$/im,
|
|
518354
518353
|
/^##\s*Debugging\s*[—-]\s*Observe Before Reasoning\s*$/im
|
|
518355
518354
|
];
|
|
518356
|
-
const TOOL_LINES_TO_REMOVE = [
|
|
518357
|
-
/^- nexus:.*$/im,
|
|
518358
|
-
/^- background_run.*task_status.*task_output.*task_stop:.*$/im,
|
|
518359
|
-
/^- (asr_listen|audio_capture|audio_playback|audio_analyze|camera_capture|desktop_click|bluetooth_scan|browser_action):.*$/im,
|
|
518360
|
-
/^Voice\/TTS:.*$/im,
|
|
518361
|
-
/^- Voice\/TTS:.*$/im,
|
|
518362
|
-
/^- Desktop\/Vision:.*$/im,
|
|
518363
|
-
/^- P2P:.*$/im
|
|
518364
|
-
];
|
|
518365
518355
|
const CHAT_MODE_BLOCK = /^\*\*CHAT MODE\*\*[\s\S]*?(?=\*\*TASK MODE\*\*)/im;
|
|
518366
518356
|
let out = prompt;
|
|
518367
518357
|
for (const re of SECTION_HEADERS_TO_REMOVE) {
|
|
518368
518358
|
out = out.replace(new RegExp(re.source + "[\\s\\S]*?(?=^##\\s|\\Z)", "im"), "");
|
|
518369
518359
|
}
|
|
518370
|
-
for (const re of TOOL_LINES_TO_REMOVE) {
|
|
518371
|
-
out = out.replace(re, "");
|
|
518372
|
-
}
|
|
518373
518360
|
out = out.replace(CHAT_MODE_BLOCK, "");
|
|
518374
518361
|
out = out.replace(/^\*\*TASK MODE\*\*[^\n]*\n/im, "");
|
|
518375
518362
|
out = out.replace(/\n{3,}/g, "\n\n");
|
|
@@ -525012,7 +524999,11 @@ ${transcript}`
|
|
|
525012
524999
|
/\buse\s+(\w+)/g,
|
|
525013
525000
|
/\bcall\s+(\w+)/g,
|
|
525014
525001
|
/`([a-z_]+)`/g,
|
|
525015
|
-
/\btool[:\s]+(\w+)/g
|
|
525002
|
+
/\btool[:\s]+(\w+)/g,
|
|
525003
|
+
// Function-call syntax: `name(args)` is the strongest call-site signal
|
|
525004
|
+
// a name can have. A bare identifier mention isn't enough — that catches
|
|
525005
|
+
// filesystem nouns like `node_modules` or `package_lock`.
|
|
525006
|
+
/\b([a-z][a-z0-9_]*[a-z0-9])\s*\(/g
|
|
525016
525007
|
];
|
|
525017
525008
|
const contextualNames = /* @__PURE__ */ new Set();
|
|
525018
525009
|
for (const pat of TOOL_CONTEXT_PATTERNS) {
|
|
@@ -525023,14 +525014,6 @@ ${transcript}`
|
|
|
525023
525014
|
contextualNames.add(name10);
|
|
525024
525015
|
}
|
|
525025
525016
|
}
|
|
525026
|
-
const nameCounts = /* @__PURE__ */ new Map();
|
|
525027
|
-
for (const name10 of referenced) {
|
|
525028
|
-
const escaped = name10.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
525029
|
-
const occurrences = (systemPrompt.match(new RegExp(`\\b${escaped}\\b`, "g")) ?? []).length;
|
|
525030
|
-
nameCounts.set(name10, occurrences);
|
|
525031
|
-
if (occurrences >= 2)
|
|
525032
|
-
contextualNames.add(name10);
|
|
525033
|
-
}
|
|
525034
525017
|
const IGNORE_LIST = /* @__PURE__ */ new Set([
|
|
525035
525018
|
"tool_use",
|
|
525036
525019
|
"tool_call",
|
|
@@ -525075,7 +525058,6 @@ ${transcript}`
|
|
|
525075
525058
|
// reserved status for partial-done todos
|
|
525076
525059
|
"not_started",
|
|
525077
525060
|
// alternative status phrasing
|
|
525078
|
-
// Shell/bash idioms that look like snake_case
|
|
525079
525061
|
"ctrl_c",
|
|
525080
525062
|
"ctrl_d"
|
|
525081
525063
|
]);
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "open-agents-ai",
|
|
3
|
-
"version": "0.187.
|
|
3
|
+
"version": "0.187.479",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "open-agents-ai",
|
|
9
|
-
"version": "0.187.
|
|
9
|
+
"version": "0.187.479",
|
|
10
10
|
"hasInstallScript": true,
|
|
11
11
|
"license": "CC-BY-NC-4.0",
|
|
12
12
|
"dependencies": {
|
package/package.json
CHANGED
|
@@ -169,17 +169,17 @@ If you have tried 2+ approaches to the same blocker and both failed, **STOP atte
|
|
|
169
169
|
|
|
170
170
|
**The diagnostic loop (one cycle per turn, NOT batched):**
|
|
171
171
|
|
|
172
|
-
1. **READ THE FULL ERROR** — re-read the most recent failure output ENTIRELY.
|
|
173
|
-
2. **VERIFY ONE ASSUMPTION** — pick ONE thing you BELIEVE to be true and test it with the smallest possible command
|
|
174
|
-
3. **STATE A HYPOTHESIS in writing** before your next action. Then design ONE experiment that
|
|
172
|
+
1. **READ THE FULL ERROR** — re-read the most recent failure output ENTIRELY. If it's in a log packet, query `op="errors"` then `op="lines"` for context.
|
|
173
|
+
2. **VERIFY ONE ASSUMPTION** — pick ONE thing you BELIEVE to be true and test it with the smallest possible command native to your ecosystem. Examples of the shape: "is this artifact present?", "does this import resolve?", "is this env var set?". One read, one fact verified.
|
|
174
|
+
3. **STATE A HYPOTHESIS in writing** before your next action. Then design ONE experiment that CONFIRMS or REFUTES it — verify, do NOT fix yet.
|
|
175
175
|
4. **WEB SEARCH the exact error message** if you don't know what it means. A 30-second lookup beats 10 retry attempts.
|
|
176
|
-
5. **CHECK THE OBVIOUS** —
|
|
176
|
+
5. **CHECK THE OBVIOUS** — package managers and build systems frequently report "success" while silently dropping artifacts. Don't trust summary output ("added N", "build complete") without verifying the SPECIFIC artifact you needed actually exists.
|
|
177
177
|
6. Only AFTER root cause is verified, attempt ONE fix targeting that cause. If the fix fails, return to step 1 with the new error.
|
|
178
178
|
|
|
179
179
|
**What diagnostic mode is NOT:**
|
|
180
|
-
- Trying another version
|
|
181
|
-
- Adding
|
|
182
|
-
- Wiping
|
|
180
|
+
- Trying another version of the same dependency after one failed — variant-fatigue, not diagnosis.
|
|
181
|
+
- Adding force/override flags that suppress warnings — masks root causes.
|
|
182
|
+
- Wiping caches/dependencies and reinstalling — hides the original error.
|
|
183
183
|
- Calling task_complete to escape — task_complete is NEVER the answer to a stuck debugging session.
|
|
184
184
|
- Use grep_search and find_files for efficient exploration (don't dump entire directories)
|
|
185
185
|
- Use file_edit for small changes instead of rewriting entire files
|
|
@@ -102,27 +102,22 @@ If you have tried 2+ approaches to the same blocker and both failed, **STOP atte
|
|
|
102
102
|
|
|
103
103
|
**The diagnostic loop (one cycle per turn, NOT batched):**
|
|
104
104
|
|
|
105
|
-
1. **READ THE FULL ERROR** — re-read the most recent failure output ENTIRELY. Don't skim the first 200 chars. If the output is in a log packet,
|
|
105
|
+
1. **READ THE FULL ERROR** — re-read the most recent failure output ENTIRELY. Don't skim the first 200 chars. If the output is in a log packet, query it with `op="errors"` then `op="lines"` for surrounding context.
|
|
106
106
|
|
|
107
|
-
2. **VERIFY ONE ASSUMPTION** — pick ONE thing you BELIEVE to be true and test it with the smallest possible command:
|
|
108
|
-
- "I think tailwindcss is installed" → `ls node_modules/tailwindcss/package.json` (one line)
|
|
109
|
-
- "I think the import path is right" → `cat src/lib/x.ts | head -5`
|
|
110
|
-
- "I think the env var is set" → `printenv VAR_NAME`
|
|
107
|
+
2. **VERIFY ONE ASSUMPTION** — pick ONE thing you BELIEVE to be true and test it with the smallest possible command native to whatever ecosystem you're in. Examples of the *shape* (not the exact commands): "is this artifact present on disk?", "does this import resolve?", "is this environment variable set?", "does this binary exist on PATH?". One read, one fact verified.
|
|
111
108
|
|
|
112
|
-
3. **STATE A HYPOTHESIS in writing** before your next action
|
|
113
|
-
- "Hypothesis: tailwindcss didn't install because @tailwindcss/postcss has a peer-dep conflict with autoprefixer."
|
|
114
|
-
- Then design ONE experiment that would CONFIRM or REFUTE it (not fix it — verify it first).
|
|
109
|
+
3. **STATE A HYPOTHESIS in writing** before your next action — "I think X is failing because Y." Be concrete. Then design ONE experiment that would CONFIRM or REFUTE it (verify it first; do NOT fix yet).
|
|
115
110
|
|
|
116
|
-
4. **WEB SEARCH the exact error message** if you don't know what it means.
|
|
111
|
+
4. **WEB SEARCH the exact error message** if you don't know what it means. Quote the exact error string. A 30-second lookup beats 10 retry attempts.
|
|
117
112
|
|
|
118
|
-
5. **CHECK THE OBVIOUS** —
|
|
113
|
+
5. **CHECK THE OBVIOUS** — package managers and build systems frequently report "success" while silently dropping artifacts. Don't trust a summary like "added N packages" or "build complete" without verifying the SPECIFIC artifact you needed actually exists. Check each expected output explicitly.
|
|
119
114
|
|
|
120
115
|
6. Only AFTER root cause is verified, attempt ONE fix targeting that cause. If the fix fails, return to step 1 with the new error.
|
|
121
116
|
|
|
122
117
|
**What diagnostic mode is NOT:**
|
|
123
|
-
- Trying
|
|
124
|
-
- Adding
|
|
125
|
-
- Wiping
|
|
118
|
+
- Trying a different version of the same dependency after one failed — that's variant-fatigue, not diagnosis.
|
|
119
|
+
- Adding force/override flags that suppress warnings — those mask root causes, they don't reveal them.
|
|
120
|
+
- Wiping caches/dependencies and reinstalling — that hides the original error.
|
|
126
121
|
- Calling task_complete to escape — task_complete is NEVER the answer to a stuck debugging session.
|
|
127
122
|
- Do NOT output long explanations. Focus on tool calls.
|
|
128
123
|
- If file_read/list_directory returns ENOENT, use list_directory on the project root — do NOT guess parent paths
|