open-agents-ai 0.187.474 → 0.187.476

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -518229,6 +518229,52 @@ function getSystemPromptForTier(tier) {
518229
518229
  return SYSTEM_PROMPT;
518230
518230
  }
518231
518231
  }
518232
+ function detectTaskMode(task) {
518233
+ if (!task)
518234
+ return false;
518235
+ const head = task.slice(0, 4e3).toLowerCase();
518236
+ if (task.length > 2e3)
518237
+ return true;
518238
+ if (/(\/[\w.-]+){2,}/.test(task.slice(0, 2e3)))
518239
+ return true;
518240
+ if (/\b(implement|build|create|refactor|write|fix|migrate|deploy|generate|setup|set up|develop|design|integrate)\b/.test(head)) {
518241
+ if (/\b(spec|file|module|component|api|endpoint|database|schema|test|build|next\.js|typescript|react|prisma|tailwind|sql|python|rust|go)\b/.test(head)) {
518242
+ return true;
518243
+ }
518244
+ }
518245
+ return false;
518246
+ }
518247
+ function slimSystemPromptForTaskMode(prompt) {
518248
+ const SECTION_HEADERS_TO_REMOVE = [
518249
+ /^##\s*Interactive\s*\/\s*Long-?Running Sessions\s*$/im,
518250
+ /^##\s*Document Generation Strategy\s*$/im,
518251
+ /^##\s*Calculations\s*[—-]\s*Always Execute, Never Guess\s*$/im,
518252
+ /^##\s*Knowledge Gaps\s*[—-]\s*Search, Don't Hallucinate\s*$/im,
518253
+ /^##\s*Self-Awareness( & Introspection)?\s*$/im,
518254
+ /^##\s*Debugging\s*[—-]\s*Observe Before Reasoning\s*$/im
518255
+ ];
518256
+ const TOOL_LINES_TO_REMOVE = [
518257
+ /^- nexus:.*$/im,
518258
+ /^- background_run.*task_status.*task_output.*task_stop:.*$/im,
518259
+ /^- (asr_listen|audio_capture|audio_playback|audio_analyze|camera_capture|desktop_click|bluetooth_scan|browser_action):.*$/im,
518260
+ /^Voice\/TTS:.*$/im,
518261
+ /^- Voice\/TTS:.*$/im,
518262
+ /^- Desktop\/Vision:.*$/im,
518263
+ /^- P2P:.*$/im
518264
+ ];
518265
+ const CHAT_MODE_BLOCK = /^\*\*CHAT MODE\*\*[\s\S]*?(?=\*\*TASK MODE\*\*)/im;
518266
+ let out = prompt;
518267
+ for (const re of SECTION_HEADERS_TO_REMOVE) {
518268
+ out = out.replace(new RegExp(re.source + "[\\s\\S]*?(?=^##\\s|\\Z)", "im"), "");
518269
+ }
518270
+ for (const re of TOOL_LINES_TO_REMOVE) {
518271
+ out = out.replace(re, "");
518272
+ }
518273
+ out = out.replace(CHAT_MODE_BLOCK, "");
518274
+ out = out.replace(/^\*\*TASK MODE\*\*[^\n]*\n/im, "");
518275
+ out = out.replace(/\n{3,}/g, "\n\n");
518276
+ return out.trim() + "\n";
518277
+ }
518232
518278
  function computeTodoReminder(input) {
518233
518279
  const turnsSinceWriteThreshold = input.turnsSinceWriteThreshold ?? 10;
518234
518280
  const turnsBetweenReminders = input.turnsBetweenReminders ?? 10;
@@ -518638,7 +518684,17 @@ var init_agenticRunner = __esm({
518638
518684
  async assembleContext(task, context2) {
518639
518685
  const sections = [];
518640
518686
  const pressureCue = pressureCheck(task);
518641
- const basePrompt = getSystemPromptForTier(this.options.modelTier) + pressureCue;
518687
+ const rawPrompt = getSystemPromptForTier(this.options.modelTier);
518688
+ const taskModeOn = detectTaskMode(task);
518689
+ const slimmedPrompt = taskModeOn ? slimSystemPromptForTaskMode(rawPrompt) : rawPrompt;
518690
+ const basePrompt = slimmedPrompt + pressureCue;
518691
+ if (taskModeOn) {
518692
+ this.emit({
518693
+ type: "status",
518694
+ content: `REG-19: TASK MODE detected — system prompt slimmed ${rawPrompt.length}→${slimmedPrompt.length} bytes`,
518695
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
518696
+ });
518697
+ }
518642
518698
  const _BATCH_GUIDANCE = {
518643
518699
  small: "\n\n## Response batching\n\nEmit AT MOST 2 tool calls per response. After observing their results, plan the next 2 in your following response. Smaller batches let the orchestrator deliver cache/failure/progress signals to you between actions. Tool calls beyond the cap are dropped. Use todo_write between batches to mark progress.",
518644
518700
  medium: "\n\n## Response batching\n\nEmit AT MOST 4 tool calls per response. After observing their results, plan the next batch in your following response. Smaller batches let the orchestrator deliver cache/failure/progress signals to you between actions. Tool calls beyond the cap are dropped. Use todo_write between batches to mark progress.",
@@ -520548,6 +520604,28 @@ TASK: ${task}` : task;
520548
520604
  for (const [tool, budget] of Object.entries(toolBudgets)) {
520549
520605
  toolCallBudget.set(tool, budget);
520550
520606
  }
520607
+ const stagnationWindow = [];
520608
+ let stagnationCooldownUntilTurn = -1;
520609
+ const STAG_WINDOW_TURNS = 40;
520610
+ const STAG_WINDOW_MS = 10 * 60 * 1e3;
520611
+ const STAG_MIN_SAMPLES = 30;
520612
+ const STAG_FAILURE_THRESHOLD = 5;
520613
+ const STAG_VARIANT_THRESHOLD = 4;
520614
+ const STAG_FILES_DELTA_MIN = 3;
520615
+ let injectionsThisTurn = 0;
520616
+ const INJECTION_BUDGET_SOFT = 2;
520617
+ const deferredSoftInjections = [];
520618
+ const pushSoftInjection = (role, content) => {
520619
+ if (injectionsThisTurn < INJECTION_BUDGET_SOFT) {
520620
+ messages2.push({ role, content });
520621
+ injectionsThisTurn++;
520622
+ return true;
520623
+ }
520624
+ if (deferredSoftInjections.length < 6) {
520625
+ deferredSoftInjections.push({ role, content });
520626
+ }
520627
+ return false;
520628
+ };
520551
520629
  for (let turn = 0; turn < this.options.maxTurns; turn++) {
520552
520630
  clearTurnState(this._appState);
520553
520631
  this._maybeApplyThinkGuard();
@@ -520562,6 +520640,76 @@ TASK: ${task}` : task;
520562
520640
  this.emit({ type: "error", content: "Task aborted by user", timestamp: (/* @__PURE__ */ new Date()).toISOString() });
520563
520641
  break;
520564
520642
  }
520643
+ injectionsThisTurn = 0;
520644
+ while (deferredSoftInjections.length > 0 && injectionsThisTurn < INJECTION_BUDGET_SOFT) {
520645
+ const next = deferredSoftInjections.shift();
520646
+ messages2.push({ role: next.role, content: next.content });
520647
+ injectionsThisTurn++;
520648
+ }
520649
+ if (turn > stagnationCooldownUntilTurn && stagnationWindow.length >= STAG_MIN_SAMPLES) {
520650
+ const cutoffTurn = turn - STAG_WINDOW_TURNS;
520651
+ const cutoffTs = Date.now() - STAG_WINDOW_MS;
520652
+ while (stagnationWindow.length && (stagnationWindow[0].turn < cutoffTurn || stagnationWindow[0].ts < cutoffTs)) {
520653
+ stagnationWindow.shift();
520654
+ }
520655
+ if (stagnationWindow.length >= STAG_MIN_SAMPLES) {
520656
+ const completedDelta = stagnationWindow[stagnationWindow.length - 1].completedTodos - stagnationWindow[0].completedTodos;
520657
+ const fileSet = /* @__PURE__ */ new Set();
520658
+ for (const s2 of stagnationWindow)
520659
+ for (const p2 of s2.filesTouchedThisTurn)
520660
+ fileSet.add(p2);
520661
+ const filesDelta = fileSet.size;
520662
+ const failureSum = stagnationWindow.reduce((a2, s2) => a2 + s2.failuresThisTurn, 0);
520663
+ const variantSet = /* @__PURE__ */ new Set();
520664
+ for (const s2 of stagnationWindow)
520665
+ for (const p2 of s2.shellPrefixesThisTurn)
520666
+ variantSet.add(p2);
520667
+ const variantCount = variantSet.size;
520668
+ if (completedDelta === 0 && filesDelta < STAG_FILES_DELTA_MIN && failureSum >= STAG_FAILURE_THRESHOLD && variantCount >= STAG_VARIANT_THRESHOLD) {
520669
+ const variantList = [...variantSet].slice(0, 8).map((v) => ` • ${v}`).join("\n");
520670
+ const stagMsg = [
520671
+ `[STAGNATION DETECTED — DIAGNOSTIC MODE REQUIRED]`,
520672
+ ``,
520673
+ `Over the last ${stagnationWindow.length} turns you have:`,
520674
+ ` • Completed 0 new todos`,
520675
+ ` • Written/edited only ${filesDelta} unique file(s) (need ≥${STAG_FILES_DELTA_MIN} for healthy progress)`,
520676
+ ` • Accumulated ${failureSum} failures`,
520677
+ ` • Tried ${variantCount} different shell-command variants:`,
520678
+ variantList,
520679
+ ``,
520680
+ `You are not making progress — you are trying surface-level variants of the same approach without diagnosing root cause. This is the failure mode that prevents real completion.`,
520681
+ ``,
520682
+ `MANDATORY NEXT ACTIONS (do NOT call task_complete; do NOT try another variant):`,
520683
+ ``,
520684
+ `1. READ THE FULL ERROR — re-read your most recent failure output ENTIRELY. If it's in a log packet, call log_explore({op:"errors"}) then log_explore({op:"lines", start:..., end:...}) for context. Do not skim.`,
520685
+ ``,
520686
+ `2. STATE A HYPOTHESIS in writing — what specifically is wrong? "I think X is failing because Y." Be concrete. Do NOT propose a fix yet.`,
520687
+ ``,
520688
+ `3. VERIFY ONE ASSUMPTION — pick the ONE thing you most BELIEVE to be true and test it with the smallest possible command:`,
520689
+ ` • If you think a package is installed: ls node_modules/<name>/package.json`,
520690
+ ` • If you think an env var is set: printenv <NAME>`,
520691
+ ` • If you think a file imports correctly: head -5 <file>`,
520692
+ ` • If you don't know what an error means: web_search("<exact error string>")`,
520693
+ ``,
520694
+ `4. CHECK SILENT FAILURES — npm install reporting "added N packages" does NOT mean ALL declared deps installed; npm sometimes drops packages with peer-dep conflicts without erroring. Verify each expected dep individually.`,
520695
+ ``,
520696
+ `DO NOT in your next response:`,
520697
+ ` • Try another version, flag, or variant of any command in the list above`,
520698
+ ` • Wipe node_modules / re-install — that hides the original error`,
520699
+ ` • Call task_complete — being stuck on a debug problem is NEVER grounds for task_complete`,
520700
+ ``,
520701
+ `task_complete is ONLY for actual completion or unrecoverable hardware/permission errors. You are stuck on a fixable problem; diagnose it.`
520702
+ ].join("\n");
520703
+ messages2.push({ role: "system", content: stagMsg });
520704
+ stagnationCooldownUntilTurn = turn + 5;
520705
+ this.emit({
520706
+ type: "status",
520707
+ content: `STAGNATION DETECTED — injected diagnostic mode at turn ${turn} (${variantCount} variants, ${failureSum} failures, ${filesDelta} files in window)`,
520708
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
520709
+ });
520710
+ }
520711
+ }
520712
+ }
520565
520713
  if (pendingConstraintWarnings.length > 0) {
520566
520714
  const warningMsg = "<constraint-recall>\n" + pendingConstraintWarnings.join("\n") + "\n</constraint-recall>";
520567
520715
  messages2.push({ role: "system", content: warningMsg });
@@ -520745,11 +520893,8 @@ Now call file_write with YOUR skeleton for this task.`
520745
520893
  if (toolHints.length > 0) {
520746
520894
  toolHints.sort((a2, b) => b.score - a2.score);
520747
520895
  const top = toolHints.slice(0, 5);
520748
- messages2.push({
520749
- role: "system",
520750
- content: `[Relevant tools for this task]
520751
- ${top.map((t2) => `- ${t2.name}: ${t2.desc}`).join("\n")}`
520752
- });
520896
+ pushSoftInjection("system", `[Relevant tools for this task]
520897
+ ${top.map((t2) => `- ${t2.name}: ${t2.desc}`).join("\n")}`);
520753
520898
  }
520754
520899
  }
520755
520900
  if (turn === 0 && (turnTier === "small" || turnTier === "medium")) {
@@ -520773,11 +520918,8 @@ ${top.map((t2) => `- ${t2.name}: ${t2.desc}`).join("\n")}`
520773
520918
  }
520774
520919
  hints.push("EFFICIENCY: Aim for 3-5 tool calls total. Each call should make measurable progress. Do not repeat a tool call with the same arguments.");
520775
520920
  if (hints.length > 0) {
520776
- messages2.push({
520777
- role: "system",
520778
- content: `[Efficiency Guide]
520779
- ${hints.join("\n")}`
520780
- });
520921
+ pushSoftInjection("system", `[Efficiency Guide]
520922
+ ${hints.join("\n")}`);
520781
520923
  }
520782
520924
  }
520783
520925
  if (turn === 0 && (turnTier === "small" || turnTier === "medium")) {
@@ -520787,21 +520929,18 @@ ${hints.join("\n")}`
520787
520929
  const hasMultiStepRequirement = taskGoal.length > 200 && (taskGoal.match(/\d\./g) || []).length >= 2;
520788
520930
  const isAnalysisTask = (taskGoal.match(/\banalyze\b|\baudit\b|\breview\b|\bdiagnose\b|\binvestigate\b|\bcompare\b|\bevaluate\b/gi) || []).length >= 1;
520789
520931
  if (hasMultiplePremises || hasConditionalLogic || hasMultiStepRequirement || isAnalysisTask) {
520790
- messages2.push({
520791
- role: "system",
520792
- content: [
520793
- "[Structured Reasoning Guide]",
520794
- "This task requires multi-step reasoning. Follow this structure:",
520795
- "",
520796
- "1. DECOMPOSE: List the sub-questions this task requires, from simplest to most complex.",
520797
- "2. For each sub-question:",
520798
- " a. State what you KNOW (verified from evidence/tool output)",
520799
- " b. State what you ASSUME (hypotheses not yet confirmed)",
520800
- " c. Derive your conclusion using ONLY verified facts",
520801
- "3. If a tool result contradicts your earlier reasoning, UPDATE your conclusions — don't ignore new evidence.",
520802
- "4. Before your final answer, verify: does each conclusion follow from the evidence?"
520803
- ].join("\n")
520804
- });
520932
+ pushSoftInjection("system", [
520933
+ "[Structured Reasoning Guide]",
520934
+ "This task requires multi-step reasoning. Follow this structure:",
520935
+ "",
520936
+ "1. DECOMPOSE: List the sub-questions this task requires, from simplest to most complex.",
520937
+ "2. For each sub-question:",
520938
+ " a. State what you KNOW (verified from evidence/tool output)",
520939
+ " b. State what you ASSUME (hypotheses not yet confirmed)",
520940
+ " c. Derive your conclusion using ONLY verified facts",
520941
+ "3. If a tool result contradicts your earlier reasoning, UPDATE your conclusions — don't ignore new evidence.",
520942
+ "4. Before your final answer, verify: does each conclusion follow from the evidence?"
520943
+ ].join("\n"));
520805
520944
  }
520806
520945
  }
520807
520946
  const turnBudget = turnTier === "small" ? 5 : turnTier === "medium" ? 8 : 0;
@@ -522354,6 +522493,39 @@ Your most recent tool calls SUCCEEDED. If the task is complete, call task_comple
522354
522493
  });
522355
522494
  }
522356
522495
  }
522496
+ try {
522497
+ const turnLogTail = toolCallLog.filter((t2) => t2.turn === turn || t2.turn === void 0);
522498
+ const filesTouched = /* @__PURE__ */ new Set();
522499
+ const shellPrefixes = /* @__PURE__ */ new Set();
522500
+ let failuresThisTurn = 0;
522501
+ for (const tc of turnLogTail) {
522502
+ if (tc.success === false)
522503
+ failuresThisTurn++;
522504
+ if (["file_write", "file_edit", "batch_edit", "file_patch"].includes(tc.name)) {
522505
+ const m2 = tc.argsKey?.match(/path=([^,]+)/);
522506
+ if (m2 && m2[1])
522507
+ filesTouched.add(m2[1]);
522508
+ }
522509
+ if (tc.name === "shell") {
522510
+ const cmdMatch = tc.argsKey?.match(/command=([^,]{0,200})/);
522511
+ const cmd = cmdMatch?.[1] ?? "";
522512
+ const prefix = cmd.replace(/^cd\s+\S+\s*&&\s*/, "").split(/\s+/).slice(0, 3).join(" ");
522513
+ if (prefix)
522514
+ shellPrefixes.add(prefix);
522515
+ }
522516
+ }
522517
+ const todosNow = this.readSessionTodos() || [];
522518
+ const completedNow = todosNow.filter((t2) => t2.status === "completed").length;
522519
+ stagnationWindow.push({
522520
+ turn,
522521
+ ts: Date.now(),
522522
+ completedTodos: completedNow,
522523
+ filesTouchedThisTurn: filesTouched,
522524
+ failuresThisTurn,
522525
+ shellPrefixesThisTurn: shellPrefixes
522526
+ });
522527
+ } catch {
522528
+ }
522357
522529
  }
522358
522530
  let prevCycleToolCalls = toolCallCount;
522359
522531
  while (!completed && !this.aborted && this.options.bruteForce && bruteForceCycle < this.options.bruteForceMaxCycles) {
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.187.474",
3
+ "version": "0.187.476",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "open-agents-ai",
9
- "version": "0.187.474",
9
+ "version": "0.187.476",
10
10
  "hasInstallScript": true,
11
11
  "license": "CC-BY-NC-4.0",
12
12
  "dependencies": {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.187.474",
3
+ "version": "0.187.476",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -161,6 +161,26 @@ When you discover image files (png, jpg, gif, svg, webp, bmp) during codebase ex
161
161
  - ALWAYS run validation (tests, build, lint) after making changes
162
162
  - If tests fail, read the FULL error output. Fix the exact failing assertion or error.
163
163
  - Do NOT give up after a failure. Iterate: fix → test → fix → test until it passes.
164
+ - task_complete is ONLY for actual completion or unrecoverable hardware/permission errors. Being stuck on a code/config problem is NEVER grounds for task_complete — use DIAGNOSTIC MODE below.
165
+
166
+ ### DIAGNOSTIC MODE — When You ARE Stuck, Slow Down and Investigate
167
+
168
+ If you have tried 2+ approaches to the same blocker and both failed, **STOP attempting fixes** and enter diagnostic mode. Repeating fix-attempts on a misunderstood problem just wastes turns. Diagnose ROOT CAUSE first.
169
+
170
+ **The diagnostic loop (one cycle per turn, NOT batched):**
171
+
172
+ 1. **READ THE FULL ERROR** — re-read the most recent failure output ENTIRELY. Don't skim the first 200 chars. If output is in a log packet, use `log_explore` with `op="errors"`, then `op="lines"` for context.
173
+ 2. **VERIFY ONE ASSUMPTION** — pick ONE thing you BELIEVE to be true and test it with the smallest possible command (e.g. "I think tailwindcss is installed" → `ls node_modules/tailwindcss/package.json`).
174
+ 3. **STATE A HYPOTHESIS in writing** before your next action. Then design ONE experiment that would CONFIRM or REFUTE it (not fix it — verify it first).
175
+ 4. **WEB SEARCH the exact error message** if you don't know what it means. A 30-second lookup beats 10 retry attempts.
176
+ 5. **CHECK THE OBVIOUS** — silent failures are common. `npm install` reporting "added 141 packages" doesn't mean ALL declared deps installed; npm sometimes drops packages with peer-dep conflicts without erroring. Verify each expected dep with `ls node_modules/<name>/package.json`.
177
+ 6. Only AFTER root cause is verified, attempt ONE fix targeting that cause. If the fix fails, return to step 1 with the new error.
178
+
179
+ **What diagnostic mode is NOT:**
180
+ - Trying another version (`tailwindcss@3.4.19` after `tailwindcss@4.0.0`) — that's variant-fatigue, not diagnosis.
181
+ - Adding `--force` or `--legacy-peer-deps` — those mask root causes.
182
+ - Wiping node_modules and re-installing — hides the original error.
183
+ - Calling task_complete to escape — task_complete is NEVER the answer to a stuck debugging session.
164
184
  - Use grep_search and find_files for efficient exploration (don't dump entire directories)
165
185
  - Use file_edit for small changes instead of rewriting entire files
166
186
  - Keep tool calls focused — read only what you need
@@ -94,6 +94,36 @@ NEVER write the entire document in ONE file_write call. DECOMPOSE:
94
94
  - Do NOT give up after failure. Iterate until it passes.
95
95
  - Use file_edit for small changes, not full file rewrites
96
96
  - You MUST call task_complete when done — when you have enough information from web tools, STOP fetching and call task_complete with a summary. Do not keep browsing after you have the answer.
97
+ - task_complete is ONLY for actual completion or unrecoverable hardware/permission errors. Being stuck on a code/config problem is NEVER grounds for task_complete — use DIAGNOSTIC MODE below.
98
+
99
+ ### DIAGNOSTIC MODE — When You ARE Stuck, Slow Down and Investigate
100
+
101
+ If you have tried 2+ approaches to the same blocker and both failed, **STOP attempting fixes** and enter diagnostic mode. Repeating fix-attempts on a misunderstood problem just wastes turns. Diagnose ROOT CAUSE first.
102
+
103
+ **The diagnostic loop (one cycle per turn, NOT batched):**
104
+
105
+ 1. **READ THE FULL ERROR** — re-read the most recent failure output ENTIRELY. Don't skim the first 200 chars. If the output is in a log packet, use `log_explore` with `op="errors"` to see every marker, then `op="lines"` for surrounding context.
106
+
107
+ 2. **VERIFY ONE ASSUMPTION** — pick ONE thing you BELIEVE to be true and test it with the smallest possible command:
108
+ - "I think tailwindcss is installed" → `ls node_modules/tailwindcss/package.json` (one line)
109
+ - "I think the import path is right" → `cat src/lib/x.ts | head -5`
110
+ - "I think the env var is set" → `printenv VAR_NAME`
111
+
112
+ 3. **STATE A HYPOTHESIS in writing** before your next action:
113
+ - "Hypothesis: tailwindcss didn't install because @tailwindcss/postcss has a peer-dep conflict with autoprefixer."
114
+ - Then design ONE experiment that would CONFIRM or REFUTE it (not fix it — verify it first).
115
+
116
+ 4. **WEB SEARCH the exact error message** if you don't know what it means. `web_search("exact error string from terminal")`. A 30-second lookup beats 10 retry attempts.
117
+
118
+ 5. **CHECK THE OBVIOUS** — silent failures are common. `npm install` saying "added 141 packages" doesn't mean ALL declared deps installed; npm sometimes drops packages with peer-dep conflicts without erroring. Verify each expected dep with `ls node_modules/<name>/package.json`.
119
+
120
+ 6. Only AFTER root cause is verified, attempt ONE fix targeting that cause. If the fix fails, return to step 1 with the new error.
121
+
122
+ **What diagnostic mode is NOT:**
123
+ - Trying another version (`tailwindcss@3.4.19` after `tailwindcss@4.0.0` failed) — that's variant-fatigue, not diagnosis.
124
+ - Adding `--force` or `--legacy-peer-deps` — those mask root causes, they don't reveal them.
125
+ - Wiping node_modules and re-installing — that just hides the original error.
126
+ - Calling task_complete to escape — task_complete is NEVER the answer to a stuck debugging session.
97
127
  - Do NOT output long explanations. Focus on tool calls.
98
128
  - If file_read/list_directory returns ENOENT, use list_directory on the project root — do NOT guess parent paths
99
129
  - Directory listing entries are RELATIVE to the listed directory. If you list "parent/" and see "child", the full path is "parent/child" — NOT ".child" or just "child"
@@ -99,10 +99,12 @@ Complex tasks (5+ steps) — DECOMPOSE before acting:
99
99
  1. Call todo_write with the checklist. Mark item 1 "in_progress".
100
100
  2. Execute ONE STEP AT A TIME. After each, update todo_write status.
101
101
  3. After each file edit, VERIFY: file_read or shell test.
102
- 4. If stuck after 2 attempts, try a DIFFERENT approach do not repeat the same tool call.
102
+ 4. If stuck after 2 attempts: STOP. Enter DIAGNOSTIC MODE — read the FULL error output, state a hypothesis in writing, verify ONE assumption with the smallest test command, web_search the exact error string. Only fix AFTER you've confirmed root cause. Do NOT keep trying variants of the same approach.
103
103
  5. For multi-file changes: read ALL relevant files first, then edit in dependency order.
104
104
  6. Final todo_write marks all items "completed", then call task_complete.
105
105
 
106
+ task_complete is ONLY for ACTUAL completion. Being stuck on a code/config problem is NEVER grounds for task_complete — diagnose, do not exit.
107
+
106
108
  CRITICAL — NEVER repeat a tool call with the same arguments. If you already read a file, use the data you have. If you already ran a command, use the output. Calling the same tool twice with identical arguments wastes turns and produces the same result.
107
109
 
108
110
  Long document generation (reports, SOWs, proposals, contracts):