open-agents-ai 0.187.475 → 0.187.477

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -282094,14 +282094,14 @@ ${lanes.join("\n")}
282094
282094
  return { value: value2, isSyntacticallyString, resolvedOtherFiles, hasExternalReferences };
282095
282095
  }
282096
282096
  function createEvaluator({ evaluateElementAccessExpression, evaluateEntityNameExpression }) {
282097
- function evaluate2(expr, location) {
282097
+ function evaluate3(expr, location) {
282098
282098
  let isSyntacticallyString = false;
282099
282099
  let resolvedOtherFiles = false;
282100
282100
  let hasExternalReferences = false;
282101
282101
  expr = skipParentheses(expr);
282102
282102
  switch (expr.kind) {
282103
282103
  case 225:
282104
- const result = evaluate2(expr.operand, location);
282104
+ const result = evaluate3(expr.operand, location);
282105
282105
  resolvedOtherFiles = result.resolvedOtherFiles;
282106
282106
  hasExternalReferences = result.hasExternalReferences;
282107
282107
  if (typeof result.value === "number") {
@@ -282116,8 +282116,8 @@ ${lanes.join("\n")}
282116
282116
  }
282117
282117
  break;
282118
282118
  case 227: {
282119
- const left = evaluate2(expr.left, location);
282120
- const right = evaluate2(expr.right, location);
282119
+ const left = evaluate3(expr.left, location);
282120
+ const right = evaluate3(expr.right, location);
282121
282121
  isSyntacticallyString = (left.isSyntacticallyString || right.isSyntacticallyString) && expr.operatorToken.kind === 40;
282122
282122
  resolvedOtherFiles = left.resolvedOtherFiles || right.resolvedOtherFiles;
282123
282123
  hasExternalReferences = left.hasExternalReferences || right.hasExternalReferences;
@@ -282192,7 +282192,7 @@ ${lanes.join("\n")}
282192
282192
  let resolvedOtherFiles = false;
282193
282193
  let hasExternalReferences = false;
282194
282194
  for (const span of expr.templateSpans) {
282195
- const spanResult = evaluate2(span.expression, location);
282195
+ const spanResult = evaluate3(span.expression, location);
282196
282196
  if (spanResult.value === void 0) {
282197
282197
  return evaluatorResult(
282198
282198
  /*value*/
@@ -282214,7 +282214,7 @@ ${lanes.join("\n")}
282214
282214
  hasExternalReferences
282215
282215
  );
282216
282216
  }
282217
- return evaluate2;
282217
+ return evaluate3;
282218
282218
  }
282219
282219
  function isConstAssertion(location) {
282220
282220
  return isAssertionExpression(location) && isConstTypeReference(location.type) || isJSDocTypeTag(location) && isConstTypeReference(location.typeExpression);
@@ -312752,7 +312752,7 @@ ${lanes.join("\n")}
312752
312752
  var emitResolver = createResolver();
312753
312753
  var nodeBuilder = createNodeBuilder();
312754
312754
  var syntacticNodeBuilder = createSyntacticTypeNodeBuilder(compilerOptions, nodeBuilder.syntacticBuilderResolver);
312755
- var evaluate2 = createEvaluator({
312755
+ var evaluate3 = createEvaluator({
312756
312756
  evaluateElementAccessExpression,
312757
312757
  evaluateEntityNameExpression
312758
312758
  });
@@ -348722,7 +348722,7 @@ ${lanes.join("\n")}
348722
348722
  case 72:
348723
348723
  case 50:
348724
348724
  case 73:
348725
- const rhsEval = evaluate2(right);
348725
+ const rhsEval = evaluate3(right);
348726
348726
  if (typeof rhsEval.value === "number" && Math.abs(rhsEval.value) >= 32) {
348727
348727
  errorOrSuggestion(
348728
348728
  isEnumMember(walkUpParenthesizedExpressions(right.parent.parent)),
@@ -349225,7 +349225,7 @@ ${lanes.join("\n")}
349225
349225
  texts.push(span.literal.text);
349226
349226
  types2.push(isTypeAssignableTo(type, templateConstraintType) ? type : stringType);
349227
349227
  }
349228
- const evaluated = node.parent.kind !== 216 && evaluate2(node).value;
349228
+ const evaluated = node.parent.kind !== 216 && evaluate3(node).value;
349229
349229
  if (evaluated) {
349230
349230
  return getFreshTypeOfLiteralType(getStringLiteralType(evaluated));
349231
349231
  }
@@ -354706,7 +354706,7 @@ ${lanes.join("\n")}
354706
354706
  function computeConstantEnumMemberValue(member) {
354707
354707
  const isConstEnum = isEnumConst(member.parent);
354708
354708
  const initializer = member.initializer;
354709
- const result = evaluate2(initializer, member);
354709
+ const result = evaluate3(initializer, member);
354710
354710
  if (result.value !== void 0) {
354711
354711
  if (isConstEnum && typeof result.value === "number" && !isFinite(result.value)) {
354712
354712
  error2(
@@ -354761,7 +354761,7 @@ ${lanes.join("\n")}
354761
354761
  if (isConstantVariable(symbol3)) {
354762
354762
  const declaration = symbol3.valueDeclaration;
354763
354763
  if (declaration && isVariableDeclaration(declaration) && !declaration.type && declaration.initializer && (!location || declaration !== location && isBlockScopedNameDeclaredBeforeUse(declaration, location))) {
354764
- const result = evaluate2(declaration.initializer, declaration);
354764
+ const result = evaluate3(declaration.initializer, declaration);
354765
354765
  if (location && getSourceFileOfNode(location) !== getSourceFileOfNode(declaration)) {
354766
354766
  return evaluatorResult(
354767
354767
  result.value,
@@ -512128,6 +512128,106 @@ var init_personality = __esm({
512128
512128
  }
512129
512129
  });
512130
512130
 
512131
+ // packages/orchestrator/dist/critic.js
512132
+ function buildForceProgressBlockMessage(call, hits) {
512133
+ const argPreview = JSON.stringify(call.args ?? {}).slice(0, 200);
512134
+ return `[FORCED PROGRESS BLOCK — you have called ${call.tool}(${argPreview}) ${hits} times with identical arguments and received the cached result each time. The data is not changing. You are stuck in a read-only loop instead of advancing the plan.
512135
+
512136
+ REQUIRED before this tool will run again with these arguments:
512137
+ • file_write or file_edit, OR
512138
+ • todo_write that advances the plan, OR
512139
+ • task_complete (if all phases are done).
512140
+
512141
+ If you genuinely need this same data again, call a DIFFERENT tool first (one of the three above). Until then, refer to your conversation history — the result of this exact call is already there.]`;
512142
+ }
512143
+ function evaluate(inputs) {
512144
+ const { proposedCall, fingerprint, isReadLike, recentToolResults, dedupHitCount, observerRedundantBlock } = inputs;
512145
+ if (observerRedundantBlock) {
512146
+ const cached = recentToolResults.get(fingerprint);
512147
+ return {
512148
+ decision: "observer_block",
512149
+ reason: "Littleman observer flagged this fingerprint as redundant",
512150
+ cachedResult: cached ? cached.result : null
512151
+ };
512152
+ }
512153
+ if (isReadLike) {
512154
+ const cached = recentToolResults.get(fingerprint);
512155
+ if (cached !== void 0) {
512156
+ const hits = (dedupHitCount.get(fingerprint) ?? 0) + 1;
512157
+ const threshold = proposedCall.tool === "shell" ? SHELL_THRESHOLD : FS_THRESHOLD;
512158
+ if (hits >= threshold) {
512159
+ return {
512160
+ decision: "force_progress_block",
512161
+ reason: `${proposedCall.tool} fingerprint hit count ${hits} >= ${threshold}`,
512162
+ hitNumber: hits,
512163
+ blockMessage: buildForceProgressBlockMessage(proposedCall, hits)
512164
+ };
512165
+ }
512166
+ return {
512167
+ decision: "serve_cached",
512168
+ reason: cached.compacted ? "post-compaction cache re-serve" : `duplicate call #${hits} (still under ${threshold}-hit gate)`,
512169
+ cachedResult: cached.result,
512170
+ compacted: cached.compacted,
512171
+ hitNumber: hits
512172
+ };
512173
+ }
512174
+ }
512175
+ return { decision: "pass" };
512176
+ }
512177
+ function buildStagnationDiagnostic(signals) {
512178
+ const variantList = signals.variantList.slice(0, 8).map((v) => ` • ${v}`).join("\n");
512179
+ return [
512180
+ `[STAGNATION DETECTED — DIAGNOSTIC MODE REQUIRED]`,
512181
+ ``,
512182
+ `Over the last ${signals.windowSamples} turns you have:`,
512183
+ ` • Completed 0 new todos`,
512184
+ ` • Written/edited only ${signals.filesDelta} unique file(s) (need ≥3 for healthy progress)`,
512185
+ ` • Accumulated ${signals.failureSum} failures`,
512186
+ ` • Tried ${signals.variantCount} different shell-command variants:`,
512187
+ variantList,
512188
+ ``,
512189
+ `You are not making progress — you are trying surface-level variants of the same approach without diagnosing root cause. This is the failure mode that prevents real completion.`,
512190
+ ``,
512191
+ `MANDATORY NEXT ACTIONS (do NOT call task_complete; do NOT try another variant):`,
512192
+ ``,
512193
+ `1. READ THE FULL ERROR — re-read your most recent failure output ENTIRELY. If it's in a log packet, call log_explore({op:"errors"}) then log_explore({op:"lines", start:..., end:...}) for context. Do not skim.`,
512194
+ ``,
512195
+ `2. STATE A HYPOTHESIS in writing — what specifically is wrong? "I think X is failing because Y." Be concrete. Do NOT propose a fix yet.`,
512196
+ ``,
512197
+ `3. VERIFY ONE ASSUMPTION — pick the ONE thing you most BELIEVE to be true and test it with the smallest possible command:`,
512198
+ ` • If you think a package is installed: ls node_modules/<name>/package.json`,
512199
+ ` • If you think an env var is set: printenv <NAME>`,
512200
+ ` • If you think a file imports correctly: head -5 <file>`,
512201
+ ` • If you don't know what an error means: web_search("<exact error string>")`,
512202
+ ``,
512203
+ `4. CHECK SILENT FAILURES — npm install reporting "added N packages" does NOT mean ALL declared deps installed; npm sometimes drops packages with peer-dep conflicts without erroring. Verify each expected dep individually.`,
512204
+ ``,
512205
+ `DO NOT in your next response:`,
512206
+ ` • Try another version, flag, or variant of any command in the list above`,
512207
+ ` • Wipe node_modules / re-install — that hides the original error`,
512208
+ ` • Call task_complete — being stuck on a debug problem is NEVER grounds for task_complete`,
512209
+ ``,
512210
+ `task_complete is ONLY for actual completion or unrecoverable hardware/permission errors. You are stuck on a fixable problem; diagnose it.`
512211
+ ].join("\n");
512212
+ }
512213
+ function isStagnant(signals, opts) {
512214
+ const failureThreshold = opts?.failureThreshold ?? 5;
512215
+ const variantThreshold = opts?.variantThreshold ?? 4;
512216
+ const filesDeltaMin = opts?.filesDeltaMin ?? 3;
512217
+ const minSamples = opts?.minSamples ?? 30;
512218
+ if (signals.windowSamples < minSamples)
512219
+ return false;
512220
+ return signals.completedDelta === 0 && signals.filesDelta < filesDeltaMin && signals.failureSum >= failureThreshold && signals.variantCount >= variantThreshold;
512221
+ }
512222
+ var SHELL_THRESHOLD, FS_THRESHOLD;
512223
+ var init_critic = __esm({
512224
+ "packages/orchestrator/dist/critic.js"() {
512225
+ "use strict";
512226
+ SHELL_THRESHOLD = 2;
512227
+ FS_THRESHOLD = 3;
512228
+ }
512229
+ });
512230
+
512131
512231
  // packages/orchestrator/dist/pressure-gate.js
512132
512232
  function detectPressure(message2) {
512133
512233
  const hasProfanity = PRESSURE_SIGNALS.test(message2);
@@ -514455,7 +514555,7 @@ function h10(t2) {
514455
514555
  function h11(t2) {
514456
514556
  return t2 * t2 * (t2 - 1);
514457
514557
  }
514458
- function evaluate(controls, r2) {
514558
+ function evaluate2(controls, r2) {
514459
514559
  const { K: K2, N, P: P2, E: Eb, Pd, Ed, expansion, epsSigma, deltaMax } = controls;
514460
514560
  const eps2 = epsSigma * epsSigma;
514461
514561
  const u = new Float64Array(K2);
@@ -514731,7 +514831,7 @@ function deserialize(obj) {
514731
514831
  function rmse(controls, samples) {
514732
514832
  let sumSq = 0;
514733
514833
  for (const s2 of samples) {
514734
- const pred = evaluate(controls, s2.input);
514834
+ const pred = evaluate2(controls, s2.input);
514735
514835
  for (let n2 = 0; n2 < controls.N; n2++) {
514736
514836
  const diff = pred[n2] - s2.output[n2];
514737
514837
  sumSq += diff * diff;
@@ -514906,7 +515006,7 @@ var init_embeddingAligner = __esm({
514906
515006
  const range = max - min;
514907
515007
  normalized[k] = range > 1e-10 ? Math.max(0, Math.min(1, (projected[k] - min) / range)) : 0.5;
514908
515008
  }
514909
- const aligned64 = evaluate(this.state.controls, normalized);
515009
+ const aligned64 = evaluate2(this.state.controls, normalized);
514910
515010
  const result = new Float32Array(this.state.dstDim);
514911
515011
  for (let n2 = 0; n2 < this.state.dstDim; n2++)
514912
515012
  result[n2] = aligned64[n2];
@@ -514993,7 +515093,7 @@ var init_embeddingAligner = __esm({
514993
515093
  let avgCosine = 0;
514994
515094
  if (testSamples.length > 0) {
514995
515095
  for (const s2 of testSamples) {
514996
- const pred = evaluate(controls, s2.input);
515096
+ const pred = evaluate2(controls, s2.input);
514997
515097
  avgCosine += cosine(pred, s2.output);
514998
515098
  }
514999
515099
  avgCosine /= testSamples.length;
@@ -516500,7 +516600,7 @@ __export(dist_exports2, {
516500
516600
  retrieveByPPR: () => retrieveByPPR,
516501
516601
  splanifoldCosine: () => cosine,
516502
516602
  splanifoldDeserialize: () => deserialize,
516503
- splanifoldEvaluate: () => evaluate,
516603
+ splanifoldEvaluate: () => evaluate2,
516504
516604
  splanifoldFit: () => fit,
516505
516605
  splanifoldRmse: () => rmse,
516506
516606
  splanifoldSerialize: () => serialize
@@ -518229,6 +518329,52 @@ function getSystemPromptForTier(tier) {
518229
518329
  return SYSTEM_PROMPT;
518230
518330
  }
518231
518331
  }
518332
+ function detectTaskMode(task) {
518333
+ if (!task)
518334
+ return false;
518335
+ const head = task.slice(0, 4e3).toLowerCase();
518336
+ if (task.length > 2e3)
518337
+ return true;
518338
+ if (/(\/[\w.-]+){2,}/.test(task.slice(0, 2e3)))
518339
+ return true;
518340
+ if (/\b(implement|build|create|refactor|write|fix|migrate|deploy|generate|setup|set up|develop|design|integrate)\b/.test(head)) {
518341
+ if (/\b(spec|file|module|component|api|endpoint|database|schema|test|build|next\.js|typescript|react|prisma|tailwind|sql|python|rust|go)\b/.test(head)) {
518342
+ return true;
518343
+ }
518344
+ }
518345
+ return false;
518346
+ }
518347
+ function slimSystemPromptForTaskMode(prompt) {
518348
+ const SECTION_HEADERS_TO_REMOVE = [
518349
+ /^##\s*Interactive\s*\/\s*Long-?Running Sessions\s*$/im,
518350
+ /^##\s*Document Generation Strategy\s*$/im,
518351
+ /^##\s*Calculations\s*[—-]\s*Always Execute, Never Guess\s*$/im,
518352
+ /^##\s*Knowledge Gaps\s*[—-]\s*Search, Don't Hallucinate\s*$/im,
518353
+ /^##\s*Self-Awareness( & Introspection)?\s*$/im,
518354
+ /^##\s*Debugging\s*[—-]\s*Observe Before Reasoning\s*$/im
518355
+ ];
518356
+ const TOOL_LINES_TO_REMOVE = [
518357
+ /^- nexus:.*$/im,
518358
+ /^- background_run.*task_status.*task_output.*task_stop:.*$/im,
518359
+ /^- (asr_listen|audio_capture|audio_playback|audio_analyze|camera_capture|desktop_click|bluetooth_scan|browser_action):.*$/im,
518360
+ /^Voice\/TTS:.*$/im,
518361
+ /^- Voice\/TTS:.*$/im,
518362
+ /^- Desktop\/Vision:.*$/im,
518363
+ /^- P2P:.*$/im
518364
+ ];
518365
+ const CHAT_MODE_BLOCK = /^\*\*CHAT MODE\*\*[\s\S]*?(?=\*\*TASK MODE\*\*)/im;
518366
+ let out = prompt;
518367
+ for (const re of SECTION_HEADERS_TO_REMOVE) {
518368
+ out = out.replace(new RegExp(re.source + "[\\s\\S]*?(?=^##\\s|\\Z)", "im"), "");
518369
+ }
518370
+ for (const re of TOOL_LINES_TO_REMOVE) {
518371
+ out = out.replace(re, "");
518372
+ }
518373
+ out = out.replace(CHAT_MODE_BLOCK, "");
518374
+ out = out.replace(/^\*\*TASK MODE\*\*[^\n]*\n/im, "");
518375
+ out = out.replace(/\n{3,}/g, "\n\n");
518376
+ return out.trim() + "\n";
518377
+ }
518232
518378
  function computeTodoReminder(input) {
518233
518379
  const turnsSinceWriteThreshold = input.turnsSinceWriteThreshold ?? 10;
518234
518380
  const turnsBetweenReminders = input.turnsBetweenReminders ?? 10;
@@ -518326,6 +518472,7 @@ var init_agenticRunner = __esm({
518326
518472
  init_dist();
518327
518473
  init_personality();
518328
518474
  init_promptLoader();
518475
+ init_critic();
518329
518476
  init_pressure_gate();
518330
518477
  init_dist5();
518331
518478
  init_dist7();
@@ -518638,7 +518785,17 @@ var init_agenticRunner = __esm({
518638
518785
  async assembleContext(task, context2) {
518639
518786
  const sections = [];
518640
518787
  const pressureCue = pressureCheck(task);
518641
- const basePrompt = getSystemPromptForTier(this.options.modelTier) + pressureCue;
518788
+ const rawPrompt = getSystemPromptForTier(this.options.modelTier);
518789
+ const taskModeOn = detectTaskMode(task);
518790
+ const slimmedPrompt = taskModeOn ? slimSystemPromptForTaskMode(rawPrompt) : rawPrompt;
518791
+ const basePrompt = slimmedPrompt + pressureCue;
518792
+ if (taskModeOn) {
518793
+ this.emit({
518794
+ type: "status",
518795
+ content: `REG-19: TASK MODE detected — system prompt slimmed ${rawPrompt.length}→${slimmedPrompt.length} bytes`,
518796
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
518797
+ });
518798
+ }
518642
518799
  const _BATCH_GUIDANCE = {
518643
518800
  small: "\n\n## Response batching\n\nEmit AT MOST 2 tool calls per response. After observing their results, plan the next 2 in your following response. Smaller batches let the orchestrator deliver cache/failure/progress signals to you between actions. Tool calls beyond the cap are dropped. Use todo_write between batches to mark progress.",
518644
518801
  medium: "\n\n## Response batching\n\nEmit AT MOST 4 tool calls per response. After observing their results, plan the next batch in your following response. Smaller batches let the orchestrator deliver cache/failure/progress signals to you between actions. Tool calls beyond the cap are dropped. Use todo_write between batches to mark progress.",
@@ -520556,6 +520713,20 @@ TASK: ${task}` : task;
520556
520713
  const STAG_FAILURE_THRESHOLD = 5;
520557
520714
  const STAG_VARIANT_THRESHOLD = 4;
520558
520715
  const STAG_FILES_DELTA_MIN = 3;
520716
+ let injectionsThisTurn = 0;
520717
+ const INJECTION_BUDGET_SOFT = 2;
520718
+ const deferredSoftInjections = [];
520719
+ const pushSoftInjection = (role, content) => {
520720
+ if (injectionsThisTurn < INJECTION_BUDGET_SOFT) {
520721
+ messages2.push({ role, content });
520722
+ injectionsThisTurn++;
520723
+ return true;
520724
+ }
520725
+ if (deferredSoftInjections.length < 6) {
520726
+ deferredSoftInjections.push({ role, content });
520727
+ }
520728
+ return false;
520729
+ };
520559
520730
  for (let turn = 0; turn < this.options.maxTurns; turn++) {
520560
520731
  clearTurnState(this._appState);
520561
520732
  this._maybeApplyThinkGuard();
@@ -520570,6 +520741,12 @@ TASK: ${task}` : task;
520570
520741
  this.emit({ type: "error", content: "Task aborted by user", timestamp: (/* @__PURE__ */ new Date()).toISOString() });
520571
520742
  break;
520572
520743
  }
520744
+ injectionsThisTurn = 0;
520745
+ while (deferredSoftInjections.length > 0 && injectionsThisTurn < INJECTION_BUDGET_SOFT) {
520746
+ const next = deferredSoftInjections.shift();
520747
+ messages2.push({ role: next.role, content: next.content });
520748
+ injectionsThisTurn++;
520749
+ }
520573
520750
  if (turn > stagnationCooldownUntilTurn && stagnationWindow.length >= STAG_MIN_SAMPLES) {
520574
520751
  const cutoffTurn = turn - STAG_WINDOW_TURNS;
520575
520752
  const cutoffTs = Date.now() - STAG_WINDOW_MS;
@@ -520582,53 +520759,30 @@ TASK: ${task}` : task;
520582
520759
  for (const s2 of stagnationWindow)
520583
520760
  for (const p2 of s2.filesTouchedThisTurn)
520584
520761
  fileSet.add(p2);
520585
- const filesDelta = fileSet.size;
520586
- const failureSum = stagnationWindow.reduce((a2, s2) => a2 + s2.failuresThisTurn, 0);
520587
520762
  const variantSet = /* @__PURE__ */ new Set();
520588
520763
  for (const s2 of stagnationWindow)
520589
520764
  for (const p2 of s2.shellPrefixesThisTurn)
520590
520765
  variantSet.add(p2);
520591
- const variantCount = variantSet.size;
520592
- if (completedDelta === 0 && filesDelta < STAG_FILES_DELTA_MIN && failureSum >= STAG_FAILURE_THRESHOLD && variantCount >= STAG_VARIANT_THRESHOLD) {
520593
- const variantList = [...variantSet].slice(0, 8).map((v) => ` • ${v}`).join("\n");
520594
- const stagMsg = [
520595
- `[STAGNATION DETECTED — DIAGNOSTIC MODE REQUIRED]`,
520596
- ``,
520597
- `Over the last ${stagnationWindow.length} turns you have:`,
520598
- ` • Completed 0 new todos`,
520599
- ` • Written/edited only ${filesDelta} unique file(s) (need ≥${STAG_FILES_DELTA_MIN} for healthy progress)`,
520600
- ` • Accumulated ${failureSum} failures`,
520601
- ` • Tried ${variantCount} different shell-command variants:`,
520602
- variantList,
520603
- ``,
520604
- `You are not making progress — you are trying surface-level variants of the same approach without diagnosing root cause. This is the failure mode that prevents real completion.`,
520605
- ``,
520606
- `MANDATORY NEXT ACTIONS (do NOT call task_complete; do NOT try another variant):`,
520607
- ``,
520608
- `1. READ THE FULL ERROR — re-read your most recent failure output ENTIRELY. If it's in a log packet, call log_explore({op:"errors"}) then log_explore({op:"lines", start:..., end:...}) for context. Do not skim.`,
520609
- ``,
520610
- `2. STATE A HYPOTHESIS in writing — what specifically is wrong? "I think X is failing because Y." Be concrete. Do NOT propose a fix yet.`,
520611
- ``,
520612
- `3. VERIFY ONE ASSUMPTION — pick the ONE thing you most BELIEVE to be true and test it with the smallest possible command:`,
520613
- ` • If you think a package is installed: ls node_modules/<name>/package.json`,
520614
- ` • If you think an env var is set: printenv <NAME>`,
520615
- ` • If you think a file imports correctly: head -5 <file>`,
520616
- ` • If you don't know what an error means: web_search("<exact error string>")`,
520617
- ``,
520618
- `4. CHECK SILENT FAILURES — npm install reporting "added N packages" does NOT mean ALL declared deps installed; npm sometimes drops packages with peer-dep conflicts without erroring. Verify each expected dep individually.`,
520619
- ``,
520620
- `DO NOT in your next response:`,
520621
- ` • Try another version, flag, or variant of any command in the list above`,
520622
- ` • Wipe node_modules / re-install — that hides the original error`,
520623
- ` • Call task_complete — being stuck on a debug problem is NEVER grounds for task_complete`,
520624
- ``,
520625
- `task_complete is ONLY for actual completion or unrecoverable hardware/permission errors. You are stuck on a fixable problem; diagnose it.`
520626
- ].join("\n");
520627
- messages2.push({ role: "system", content: stagMsg });
520766
+ const failureSum = stagnationWindow.reduce((a2, s2) => a2 + s2.failuresThisTurn, 0);
520767
+ const signals = {
520768
+ completedDelta,
520769
+ filesDelta: fileSet.size,
520770
+ failureSum,
520771
+ variantCount: variantSet.size,
520772
+ windowSamples: stagnationWindow.length,
520773
+ variantList: [...variantSet]
520774
+ };
520775
+ if (isStagnant(signals, {
520776
+ failureThreshold: STAG_FAILURE_THRESHOLD,
520777
+ variantThreshold: STAG_VARIANT_THRESHOLD,
520778
+ filesDeltaMin: STAG_FILES_DELTA_MIN,
520779
+ minSamples: STAG_MIN_SAMPLES
520780
+ })) {
520781
+ messages2.push({ role: "system", content: buildStagnationDiagnostic(signals) });
520628
520782
  stagnationCooldownUntilTurn = turn + 5;
520629
520783
  this.emit({
520630
520784
  type: "status",
520631
- content: `STAGNATION DETECTED — injected diagnostic mode at turn ${turn} (${variantCount} variants, ${failureSum} failures, ${filesDelta} files in window)`,
520785
+ content: `STAGNATION DETECTED — injected diagnostic mode at turn ${turn} (${signals.variantCount} variants, ${signals.failureSum} failures, ${signals.filesDelta} files in window)`,
520632
520786
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
520633
520787
  });
520634
520788
  }
@@ -520817,11 +520971,8 @@ Now call file_write with YOUR skeleton for this task.`
520817
520971
  if (toolHints.length > 0) {
520818
520972
  toolHints.sort((a2, b) => b.score - a2.score);
520819
520973
  const top = toolHints.slice(0, 5);
520820
- messages2.push({
520821
- role: "system",
520822
- content: `[Relevant tools for this task]
520823
- ${top.map((t2) => `- ${t2.name}: ${t2.desc}`).join("\n")}`
520824
- });
520974
+ pushSoftInjection("system", `[Relevant tools for this task]
520975
+ ${top.map((t2) => `- ${t2.name}: ${t2.desc}`).join("\n")}`);
520825
520976
  }
520826
520977
  }
520827
520978
  if (turn === 0 && (turnTier === "small" || turnTier === "medium")) {
@@ -520845,11 +520996,8 @@ ${top.map((t2) => `- ${t2.name}: ${t2.desc}`).join("\n")}`
520845
520996
  }
520846
520997
  hints.push("EFFICIENCY: Aim for 3-5 tool calls total. Each call should make measurable progress. Do not repeat a tool call with the same arguments.");
520847
520998
  if (hints.length > 0) {
520848
- messages2.push({
520849
- role: "system",
520850
- content: `[Efficiency Guide]
520851
- ${hints.join("\n")}`
520852
- });
520999
+ pushSoftInjection("system", `[Efficiency Guide]
521000
+ ${hints.join("\n")}`);
520853
521001
  }
520854
521002
  }
520855
521003
  if (turn === 0 && (turnTier === "small" || turnTier === "medium")) {
@@ -520859,21 +521007,18 @@ ${hints.join("\n")}`
520859
521007
  const hasMultiStepRequirement = taskGoal.length > 200 && (taskGoal.match(/\d\./g) || []).length >= 2;
520860
521008
  const isAnalysisTask = (taskGoal.match(/\banalyze\b|\baudit\b|\breview\b|\bdiagnose\b|\binvestigate\b|\bcompare\b|\bevaluate\b/gi) || []).length >= 1;
520861
521009
  if (hasMultiplePremises || hasConditionalLogic || hasMultiStepRequirement || isAnalysisTask) {
520862
- messages2.push({
520863
- role: "system",
520864
- content: [
520865
- "[Structured Reasoning Guide]",
520866
- "This task requires multi-step reasoning. Follow this structure:",
520867
- "",
520868
- "1. DECOMPOSE: List the sub-questions this task requires, from simplest to most complex.",
520869
- "2. For each sub-question:",
520870
- " a. State what you KNOW (verified from evidence/tool output)",
520871
- " b. State what you ASSUME (hypotheses not yet confirmed)",
520872
- " c. Derive your conclusion using ONLY verified facts",
520873
- "3. If a tool result contradicts your earlier reasoning, UPDATE your conclusions — don't ignore new evidence.",
520874
- "4. Before your final answer, verify: does each conclusion follow from the evidence?"
520875
- ].join("\n")
520876
- });
521010
+ pushSoftInjection("system", [
521011
+ "[Structured Reasoning Guide]",
521012
+ "This task requires multi-step reasoning. Follow this structure:",
521013
+ "",
521014
+ "1. DECOMPOSE: List the sub-questions this task requires, from simplest to most complex.",
521015
+ "2. For each sub-question:",
521016
+ " a. State what you KNOW (verified from evidence/tool output)",
521017
+ " b. State what you ASSUME (hypotheses not yet confirmed)",
521018
+ " c. Derive your conclusion using ONLY verified facts",
521019
+ "3. If a tool result contradicts your earlier reasoning, UPDATE your conclusions — don't ignore new evidence.",
521020
+ "4. Before your final answer, verify: does each conclusion follow from the evidence?"
521021
+ ].join("\n"));
520877
521022
  }
520878
521023
  }
520879
521024
  const turnBudget = turnTier === "small" ? 5 : turnTier === "medium" ? 8 : 0;
@@ -521442,16 +521587,6 @@ ${memoryLines.join("\n")}`
521442
521587
  toolCallBudget.set(tc.name, budgetRemaining - 1);
521443
521588
  }
521444
521589
  const toolFingerprint = `${tc.name}:${argsKey}`;
521445
- if (this._littlemanRedundantBlocks.has(toolFingerprint)) {
521446
- this._littlemanRedundantBlocks.delete(toolFingerprint);
521447
- const cachedEntry2 = recentToolResults.get(toolFingerprint);
521448
- this.emit({ type: "tool_call", toolName: tc.name, toolArgs: tc.arguments, turn, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
521449
- const blockMsg = cachedEntry2 ? `[BLOCKED — this tool+args already succeeded. Re-served from cache:]
521450
-
521451
- ${cachedEntry2.result.slice(0, 500)}` : `[BLOCKED — the observer confirmed this tool already succeeded with these arguments on a prior turn. Do NOT re-run. Use your prior findings to proceed.]`;
521452
- this.emit({ type: "tool_result", toolName: tc.name, success: true, content: blockMsg.slice(0, 100), turn, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
521453
- return { tc, output: blockMsg };
521454
- }
521455
521590
  const baseIsReadLike = ![
521456
521591
  "file_write",
521457
521592
  "file_edit",
@@ -521468,22 +521603,53 @@ ${cachedEntry2.result.slice(0, 500)}` : `[BLOCKED — the observer confirmed thi
521468
521603
  "sub_agent",
521469
521604
  "priority_delegate",
521470
521605
  "ask_user",
521471
- // WO-TASK-02 — todo_write is a state-write tool. Calling it twice
521472
- // with the same args is idempotent (it just re-stores the same
521473
- // list) but the dedup detector was flagging it as a wasted call
521474
- // and blocking the planning workflow. The agent uses todo_write
521475
- // as its primary checkpoint mechanism so it MUST always execute.
521476
521606
  "todo_write",
521477
- // nexus is also a state tool — connect is idempotent but the
521478
- // dedup warning was causing confused agents to bail out to
521479
- // shell workarounds (npm install, find /bin, etc.) when they
521480
- // saw "DUPLICATE CALL" after their first connect. Let the
521481
- // tool see every call and return the cached state itself.
521482
521607
  "nexus"
521483
521608
  ].includes(tc.name);
521484
521609
  const isReadLike = baseIsReadLike || tc.name === "shell" && this._isShellCommandReadOnly(tc.arguments?.["command"] ?? tc.arguments?.["cmd"] ?? "");
521485
- const cachedEntry = recentToolResults.get(toolFingerprint);
521486
- if (isReadLike && cachedEntry !== void 0) {
521610
+ const observerRedundantBlock = this._littlemanRedundantBlocks.has(toolFingerprint);
521611
+ if (observerRedundantBlock) {
521612
+ this._littlemanRedundantBlocks.delete(toolFingerprint);
521613
+ }
521614
+ const criticDecision = evaluate({
521615
+ proposedCall: { tool: tc.name, args: tc.arguments ?? {} },
521616
+ fingerprint: toolFingerprint,
521617
+ isReadLike,
521618
+ recentToolResults,
521619
+ dedupHitCount,
521620
+ recentFailures: this._recentFailures.map((f2) => ({
521621
+ fingerprint: f2.fingerprint,
521622
+ toolName: f2.tool,
521623
+ errorPreview: (f2.error || f2.output || "").slice(0, 200)
521624
+ })),
521625
+ stagnationSignals: null,
521626
+ // stagnation gate handled at top-of-turn
521627
+ stagnationGateActive: false,
521628
+ observerRedundantBlock
521629
+ });
521630
+ if (criticDecision.decision === "observer_block") {
521631
+ this.emit({ type: "tool_call", toolName: tc.name, toolArgs: tc.arguments, turn, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
521632
+ const blockMsg = criticDecision.cachedResult ? `[BLOCKED — this tool+args already succeeded. Re-served from cache:]
521633
+
521634
+ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confirmed this tool already succeeded with these arguments on a prior turn. Do NOT re-run. Use your prior findings to proceed.]`;
521635
+ this.emit({ type: "tool_result", toolName: tc.name, success: true, content: blockMsg.slice(0, 100), turn, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
521636
+ return { tc, output: blockMsg };
521637
+ }
521638
+ if (criticDecision.decision === "force_progress_block") {
521639
+ dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
521640
+ this.emit({ type: "tool_call", toolName: tc.name, toolArgs: tc.arguments, turn, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
521641
+ this.emit({
521642
+ type: "tool_result",
521643
+ toolName: tc.name,
521644
+ success: false,
521645
+ content: criticDecision.blockMessage.slice(0, 120),
521646
+ turn,
521647
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
521648
+ });
521649
+ return { tc, output: criticDecision.blockMessage };
521650
+ }
521651
+ if (criticDecision.decision === "serve_cached") {
521652
+ dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
521487
521653
  this.emit({
521488
521654
  type: "tool_call",
521489
521655
  toolName: tc.name,
@@ -521491,36 +521657,13 @@ ${cachedEntry2.result.slice(0, 500)}` : `[BLOCKED — the observer confirmed thi
521491
521657
  turn,
521492
521658
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
521493
521659
  });
521494
- const hits = (dedupHitCount.get(toolFingerprint) ?? 0) + 1;
521495
- dedupHitCount.set(toolFingerprint, hits);
521496
- const threshold = tc.name === "shell" ? 2 : DEDUP_ESCALATION_THRESHOLD;
521497
- if (hits >= threshold) {
521498
- const argPreview = JSON.stringify(tc.arguments ?? {}).slice(0, 200);
521499
- const blockMsg = `[FORCED PROGRESS BLOCK — you have called ${tc.name}(${argPreview}) ${hits} times with identical arguments and received the cached result each time. The data is not changing. You are stuck in a read-only loop instead of advancing the plan.
521500
-
521501
- REQUIRED before this tool will run again with these arguments:
521502
- • file_write or file_edit, OR
521503
- • todo_write that advances the plan, OR
521504
- • task_complete (if all phases are done).
521505
-
521506
- If you genuinely need this same data again, call a DIFFERENT tool first (one of the three above). Until then, refer to your conversation history — the result of this exact call is already there.]`;
521507
- this.emit({
521508
- type: "tool_result",
521509
- toolName: tc.name,
521510
- success: false,
521511
- content: blockMsg.slice(0, 120),
521512
- turn,
521513
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
521514
- });
521515
- return { tc, output: blockMsg };
521516
- }
521517
- const header = cachedEntry.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. No need to call this tool again.]
521660
+ const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. No need to call this tool again.]
521518
521661
 
521519
- ` : `[DUPLICATE CALL #${hits} — you already called ${tc.name} with these exact arguments. The result is identical. Do NOT call this again. Use the data you already have to make progress. One more identical call will trigger a hard progress block.]
521662
+ ` : `[DUPLICATE CALL #${criticDecision.hitNumber} — you already called ${tc.name} with these exact arguments. The result is identical. Do NOT call this again. Use the data you already have to make progress. One more identical call will trigger a hard progress block.]
521520
521663
 
521521
521664
  `;
521522
- const truncatedCache = cachedEntry.result.length > 500 ? cachedEntry.result.slice(0, 500) + `
521523
- ... [${cachedEntry.result.length - 500} chars omitted — same as before]` : cachedEntry.result;
521665
+ const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
521666
+ ... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
521524
521667
  const dedupOutput = header + truncatedCache;
521525
521668
  this.emit({
521526
521669
  type: "tool_result",
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.187.475",
3
+ "version": "0.187.477",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "open-agents-ai",
9
- "version": "0.187.475",
9
+ "version": "0.187.477",
10
10
  "hasInstallScript": true,
11
11
  "license": "CC-BY-NC-4.0",
12
12
  "dependencies": {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.187.475",
3
+ "version": "0.187.477",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",