open-agents-ai 0.187.475 → 0.187.477
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +274 -131
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -282094,14 +282094,14 @@ ${lanes.join("\n")}
|
|
|
282094
282094
|
return { value: value2, isSyntacticallyString, resolvedOtherFiles, hasExternalReferences };
|
|
282095
282095
|
}
|
|
282096
282096
|
function createEvaluator({ evaluateElementAccessExpression, evaluateEntityNameExpression }) {
|
|
282097
|
-
function
|
|
282097
|
+
function evaluate3(expr, location) {
|
|
282098
282098
|
let isSyntacticallyString = false;
|
|
282099
282099
|
let resolvedOtherFiles = false;
|
|
282100
282100
|
let hasExternalReferences = false;
|
|
282101
282101
|
expr = skipParentheses(expr);
|
|
282102
282102
|
switch (expr.kind) {
|
|
282103
282103
|
case 225:
|
|
282104
|
-
const result =
|
|
282104
|
+
const result = evaluate3(expr.operand, location);
|
|
282105
282105
|
resolvedOtherFiles = result.resolvedOtherFiles;
|
|
282106
282106
|
hasExternalReferences = result.hasExternalReferences;
|
|
282107
282107
|
if (typeof result.value === "number") {
|
|
@@ -282116,8 +282116,8 @@ ${lanes.join("\n")}
|
|
|
282116
282116
|
}
|
|
282117
282117
|
break;
|
|
282118
282118
|
case 227: {
|
|
282119
|
-
const left =
|
|
282120
|
-
const right =
|
|
282119
|
+
const left = evaluate3(expr.left, location);
|
|
282120
|
+
const right = evaluate3(expr.right, location);
|
|
282121
282121
|
isSyntacticallyString = (left.isSyntacticallyString || right.isSyntacticallyString) && expr.operatorToken.kind === 40;
|
|
282122
282122
|
resolvedOtherFiles = left.resolvedOtherFiles || right.resolvedOtherFiles;
|
|
282123
282123
|
hasExternalReferences = left.hasExternalReferences || right.hasExternalReferences;
|
|
@@ -282192,7 +282192,7 @@ ${lanes.join("\n")}
|
|
|
282192
282192
|
let resolvedOtherFiles = false;
|
|
282193
282193
|
let hasExternalReferences = false;
|
|
282194
282194
|
for (const span of expr.templateSpans) {
|
|
282195
|
-
const spanResult =
|
|
282195
|
+
const spanResult = evaluate3(span.expression, location);
|
|
282196
282196
|
if (spanResult.value === void 0) {
|
|
282197
282197
|
return evaluatorResult(
|
|
282198
282198
|
/*value*/
|
|
@@ -282214,7 +282214,7 @@ ${lanes.join("\n")}
|
|
|
282214
282214
|
hasExternalReferences
|
|
282215
282215
|
);
|
|
282216
282216
|
}
|
|
282217
|
-
return
|
|
282217
|
+
return evaluate3;
|
|
282218
282218
|
}
|
|
282219
282219
|
function isConstAssertion(location) {
|
|
282220
282220
|
return isAssertionExpression(location) && isConstTypeReference(location.type) || isJSDocTypeTag(location) && isConstTypeReference(location.typeExpression);
|
|
@@ -312752,7 +312752,7 @@ ${lanes.join("\n")}
|
|
|
312752
312752
|
var emitResolver = createResolver();
|
|
312753
312753
|
var nodeBuilder = createNodeBuilder();
|
|
312754
312754
|
var syntacticNodeBuilder = createSyntacticTypeNodeBuilder(compilerOptions, nodeBuilder.syntacticBuilderResolver);
|
|
312755
|
-
var
|
|
312755
|
+
var evaluate3 = createEvaluator({
|
|
312756
312756
|
evaluateElementAccessExpression,
|
|
312757
312757
|
evaluateEntityNameExpression
|
|
312758
312758
|
});
|
|
@@ -348722,7 +348722,7 @@ ${lanes.join("\n")}
|
|
|
348722
348722
|
case 72:
|
|
348723
348723
|
case 50:
|
|
348724
348724
|
case 73:
|
|
348725
|
-
const rhsEval =
|
|
348725
|
+
const rhsEval = evaluate3(right);
|
|
348726
348726
|
if (typeof rhsEval.value === "number" && Math.abs(rhsEval.value) >= 32) {
|
|
348727
348727
|
errorOrSuggestion(
|
|
348728
348728
|
isEnumMember(walkUpParenthesizedExpressions(right.parent.parent)),
|
|
@@ -349225,7 +349225,7 @@ ${lanes.join("\n")}
|
|
|
349225
349225
|
texts.push(span.literal.text);
|
|
349226
349226
|
types2.push(isTypeAssignableTo(type, templateConstraintType) ? type : stringType);
|
|
349227
349227
|
}
|
|
349228
|
-
const evaluated = node.parent.kind !== 216 &&
|
|
349228
|
+
const evaluated = node.parent.kind !== 216 && evaluate3(node).value;
|
|
349229
349229
|
if (evaluated) {
|
|
349230
349230
|
return getFreshTypeOfLiteralType(getStringLiteralType(evaluated));
|
|
349231
349231
|
}
|
|
@@ -354706,7 +354706,7 @@ ${lanes.join("\n")}
|
|
|
354706
354706
|
function computeConstantEnumMemberValue(member) {
|
|
354707
354707
|
const isConstEnum = isEnumConst(member.parent);
|
|
354708
354708
|
const initializer = member.initializer;
|
|
354709
|
-
const result =
|
|
354709
|
+
const result = evaluate3(initializer, member);
|
|
354710
354710
|
if (result.value !== void 0) {
|
|
354711
354711
|
if (isConstEnum && typeof result.value === "number" && !isFinite(result.value)) {
|
|
354712
354712
|
error2(
|
|
@@ -354761,7 +354761,7 @@ ${lanes.join("\n")}
|
|
|
354761
354761
|
if (isConstantVariable(symbol3)) {
|
|
354762
354762
|
const declaration = symbol3.valueDeclaration;
|
|
354763
354763
|
if (declaration && isVariableDeclaration(declaration) && !declaration.type && declaration.initializer && (!location || declaration !== location && isBlockScopedNameDeclaredBeforeUse(declaration, location))) {
|
|
354764
|
-
const result =
|
|
354764
|
+
const result = evaluate3(declaration.initializer, declaration);
|
|
354765
354765
|
if (location && getSourceFileOfNode(location) !== getSourceFileOfNode(declaration)) {
|
|
354766
354766
|
return evaluatorResult(
|
|
354767
354767
|
result.value,
|
|
@@ -512128,6 +512128,106 @@ var init_personality = __esm({
|
|
|
512128
512128
|
}
|
|
512129
512129
|
});
|
|
512130
512130
|
|
|
512131
|
+
// packages/orchestrator/dist/critic.js
|
|
512132
|
+
function buildForceProgressBlockMessage(call, hits) {
|
|
512133
|
+
const argPreview = JSON.stringify(call.args ?? {}).slice(0, 200);
|
|
512134
|
+
return `[FORCED PROGRESS BLOCK — you have called ${call.tool}(${argPreview}) ${hits} times with identical arguments and received the cached result each time. The data is not changing. You are stuck in a read-only loop instead of advancing the plan.
|
|
512135
|
+
|
|
512136
|
+
REQUIRED before this tool will run again with these arguments:
|
|
512137
|
+
• file_write or file_edit, OR
|
|
512138
|
+
• todo_write that advances the plan, OR
|
|
512139
|
+
• task_complete (if all phases are done).
|
|
512140
|
+
|
|
512141
|
+
If you genuinely need this same data again, call a DIFFERENT tool first (one of the three above). Until then, refer to your conversation history — the result of this exact call is already there.]`;
|
|
512142
|
+
}
|
|
512143
|
+
function evaluate(inputs) {
|
|
512144
|
+
const { proposedCall, fingerprint, isReadLike, recentToolResults, dedupHitCount, observerRedundantBlock } = inputs;
|
|
512145
|
+
if (observerRedundantBlock) {
|
|
512146
|
+
const cached = recentToolResults.get(fingerprint);
|
|
512147
|
+
return {
|
|
512148
|
+
decision: "observer_block",
|
|
512149
|
+
reason: "Littleman observer flagged this fingerprint as redundant",
|
|
512150
|
+
cachedResult: cached ? cached.result : null
|
|
512151
|
+
};
|
|
512152
|
+
}
|
|
512153
|
+
if (isReadLike) {
|
|
512154
|
+
const cached = recentToolResults.get(fingerprint);
|
|
512155
|
+
if (cached !== void 0) {
|
|
512156
|
+
const hits = (dedupHitCount.get(fingerprint) ?? 0) + 1;
|
|
512157
|
+
const threshold = proposedCall.tool === "shell" ? SHELL_THRESHOLD : FS_THRESHOLD;
|
|
512158
|
+
if (hits >= threshold) {
|
|
512159
|
+
return {
|
|
512160
|
+
decision: "force_progress_block",
|
|
512161
|
+
reason: `${proposedCall.tool} fingerprint hit count ${hits} >= ${threshold}`,
|
|
512162
|
+
hitNumber: hits,
|
|
512163
|
+
blockMessage: buildForceProgressBlockMessage(proposedCall, hits)
|
|
512164
|
+
};
|
|
512165
|
+
}
|
|
512166
|
+
return {
|
|
512167
|
+
decision: "serve_cached",
|
|
512168
|
+
reason: cached.compacted ? "post-compaction cache re-serve" : `duplicate call #${hits} (still under ${threshold}-hit gate)`,
|
|
512169
|
+
cachedResult: cached.result,
|
|
512170
|
+
compacted: cached.compacted,
|
|
512171
|
+
hitNumber: hits
|
|
512172
|
+
};
|
|
512173
|
+
}
|
|
512174
|
+
}
|
|
512175
|
+
return { decision: "pass" };
|
|
512176
|
+
}
|
|
512177
|
+
function buildStagnationDiagnostic(signals) {
|
|
512178
|
+
const variantList = signals.variantList.slice(0, 8).map((v) => ` • ${v}`).join("\n");
|
|
512179
|
+
return [
|
|
512180
|
+
`[STAGNATION DETECTED — DIAGNOSTIC MODE REQUIRED]`,
|
|
512181
|
+
``,
|
|
512182
|
+
`Over the last ${signals.windowSamples} turns you have:`,
|
|
512183
|
+
` • Completed 0 new todos`,
|
|
512184
|
+
` • Written/edited only ${signals.filesDelta} unique file(s) (need ≥3 for healthy progress)`,
|
|
512185
|
+
` • Accumulated ${signals.failureSum} failures`,
|
|
512186
|
+
` • Tried ${signals.variantCount} different shell-command variants:`,
|
|
512187
|
+
variantList,
|
|
512188
|
+
``,
|
|
512189
|
+
`You are not making progress — you are trying surface-level variants of the same approach without diagnosing root cause. This is the failure mode that prevents real completion.`,
|
|
512190
|
+
``,
|
|
512191
|
+
`MANDATORY NEXT ACTIONS (do NOT call task_complete; do NOT try another variant):`,
|
|
512192
|
+
``,
|
|
512193
|
+
`1. READ THE FULL ERROR — re-read your most recent failure output ENTIRELY. If it's in a log packet, call log_explore({op:"errors"}) then log_explore({op:"lines", start:..., end:...}) for context. Do not skim.`,
|
|
512194
|
+
``,
|
|
512195
|
+
`2. STATE A HYPOTHESIS in writing — what specifically is wrong? "I think X is failing because Y." Be concrete. Do NOT propose a fix yet.`,
|
|
512196
|
+
``,
|
|
512197
|
+
`3. VERIFY ONE ASSUMPTION — pick the ONE thing you most BELIEVE to be true and test it with the smallest possible command:`,
|
|
512198
|
+
` • If you think a package is installed: ls node_modules/<name>/package.json`,
|
|
512199
|
+
` • If you think an env var is set: printenv <NAME>`,
|
|
512200
|
+
` • If you think a file imports correctly: head -5 <file>`,
|
|
512201
|
+
` • If you don't know what an error means: web_search("<exact error string>")`,
|
|
512202
|
+
``,
|
|
512203
|
+
`4. CHECK SILENT FAILURES — npm install reporting "added N packages" does NOT mean ALL declared deps installed; npm sometimes drops packages with peer-dep conflicts without erroring. Verify each expected dep individually.`,
|
|
512204
|
+
``,
|
|
512205
|
+
`DO NOT in your next response:`,
|
|
512206
|
+
` • Try another version, flag, or variant of any command in the list above`,
|
|
512207
|
+
` • Wipe node_modules / re-install — that hides the original error`,
|
|
512208
|
+
` • Call task_complete — being stuck on a debug problem is NEVER grounds for task_complete`,
|
|
512209
|
+
``,
|
|
512210
|
+
`task_complete is ONLY for actual completion or unrecoverable hardware/permission errors. You are stuck on a fixable problem; diagnose it.`
|
|
512211
|
+
].join("\n");
|
|
512212
|
+
}
|
|
512213
|
+
function isStagnant(signals, opts) {
|
|
512214
|
+
const failureThreshold = opts?.failureThreshold ?? 5;
|
|
512215
|
+
const variantThreshold = opts?.variantThreshold ?? 4;
|
|
512216
|
+
const filesDeltaMin = opts?.filesDeltaMin ?? 3;
|
|
512217
|
+
const minSamples = opts?.minSamples ?? 30;
|
|
512218
|
+
if (signals.windowSamples < minSamples)
|
|
512219
|
+
return false;
|
|
512220
|
+
return signals.completedDelta === 0 && signals.filesDelta < filesDeltaMin && signals.failureSum >= failureThreshold && signals.variantCount >= variantThreshold;
|
|
512221
|
+
}
|
|
512222
|
+
var SHELL_THRESHOLD, FS_THRESHOLD;
|
|
512223
|
+
var init_critic = __esm({
|
|
512224
|
+
"packages/orchestrator/dist/critic.js"() {
|
|
512225
|
+
"use strict";
|
|
512226
|
+
SHELL_THRESHOLD = 2;
|
|
512227
|
+
FS_THRESHOLD = 3;
|
|
512228
|
+
}
|
|
512229
|
+
});
|
|
512230
|
+
|
|
512131
512231
|
// packages/orchestrator/dist/pressure-gate.js
|
|
512132
512232
|
function detectPressure(message2) {
|
|
512133
512233
|
const hasProfanity = PRESSURE_SIGNALS.test(message2);
|
|
@@ -514455,7 +514555,7 @@ function h10(t2) {
|
|
|
514455
514555
|
function h11(t2) {
|
|
514456
514556
|
return t2 * t2 * (t2 - 1);
|
|
514457
514557
|
}
|
|
514458
|
-
function
|
|
514558
|
+
function evaluate2(controls, r2) {
|
|
514459
514559
|
const { K: K2, N, P: P2, E: Eb, Pd, Ed, expansion, epsSigma, deltaMax } = controls;
|
|
514460
514560
|
const eps2 = epsSigma * epsSigma;
|
|
514461
514561
|
const u = new Float64Array(K2);
|
|
@@ -514731,7 +514831,7 @@ function deserialize(obj) {
|
|
|
514731
514831
|
function rmse(controls, samples) {
|
|
514732
514832
|
let sumSq = 0;
|
|
514733
514833
|
for (const s2 of samples) {
|
|
514734
|
-
const pred =
|
|
514834
|
+
const pred = evaluate2(controls, s2.input);
|
|
514735
514835
|
for (let n2 = 0; n2 < controls.N; n2++) {
|
|
514736
514836
|
const diff = pred[n2] - s2.output[n2];
|
|
514737
514837
|
sumSq += diff * diff;
|
|
@@ -514906,7 +515006,7 @@ var init_embeddingAligner = __esm({
|
|
|
514906
515006
|
const range = max - min;
|
|
514907
515007
|
normalized[k] = range > 1e-10 ? Math.max(0, Math.min(1, (projected[k] - min) / range)) : 0.5;
|
|
514908
515008
|
}
|
|
514909
|
-
const aligned64 =
|
|
515009
|
+
const aligned64 = evaluate2(this.state.controls, normalized);
|
|
514910
515010
|
const result = new Float32Array(this.state.dstDim);
|
|
514911
515011
|
for (let n2 = 0; n2 < this.state.dstDim; n2++)
|
|
514912
515012
|
result[n2] = aligned64[n2];
|
|
@@ -514993,7 +515093,7 @@ var init_embeddingAligner = __esm({
|
|
|
514993
515093
|
let avgCosine = 0;
|
|
514994
515094
|
if (testSamples.length > 0) {
|
|
514995
515095
|
for (const s2 of testSamples) {
|
|
514996
|
-
const pred =
|
|
515096
|
+
const pred = evaluate2(controls, s2.input);
|
|
514997
515097
|
avgCosine += cosine(pred, s2.output);
|
|
514998
515098
|
}
|
|
514999
515099
|
avgCosine /= testSamples.length;
|
|
@@ -516500,7 +516600,7 @@ __export(dist_exports2, {
|
|
|
516500
516600
|
retrieveByPPR: () => retrieveByPPR,
|
|
516501
516601
|
splanifoldCosine: () => cosine,
|
|
516502
516602
|
splanifoldDeserialize: () => deserialize,
|
|
516503
|
-
splanifoldEvaluate: () =>
|
|
516603
|
+
splanifoldEvaluate: () => evaluate2,
|
|
516504
516604
|
splanifoldFit: () => fit,
|
|
516505
516605
|
splanifoldRmse: () => rmse,
|
|
516506
516606
|
splanifoldSerialize: () => serialize
|
|
@@ -518229,6 +518329,52 @@ function getSystemPromptForTier(tier) {
|
|
|
518229
518329
|
return SYSTEM_PROMPT;
|
|
518230
518330
|
}
|
|
518231
518331
|
}
|
|
518332
|
+
function detectTaskMode(task) {
|
|
518333
|
+
if (!task)
|
|
518334
|
+
return false;
|
|
518335
|
+
const head = task.slice(0, 4e3).toLowerCase();
|
|
518336
|
+
if (task.length > 2e3)
|
|
518337
|
+
return true;
|
|
518338
|
+
if (/(\/[\w.-]+){2,}/.test(task.slice(0, 2e3)))
|
|
518339
|
+
return true;
|
|
518340
|
+
if (/\b(implement|build|create|refactor|write|fix|migrate|deploy|generate|setup|set up|develop|design|integrate)\b/.test(head)) {
|
|
518341
|
+
if (/\b(spec|file|module|component|api|endpoint|database|schema|test|build|next\.js|typescript|react|prisma|tailwind|sql|python|rust|go)\b/.test(head)) {
|
|
518342
|
+
return true;
|
|
518343
|
+
}
|
|
518344
|
+
}
|
|
518345
|
+
return false;
|
|
518346
|
+
}
|
|
518347
|
+
function slimSystemPromptForTaskMode(prompt) {
|
|
518348
|
+
const SECTION_HEADERS_TO_REMOVE = [
|
|
518349
|
+
/^##\s*Interactive\s*\/\s*Long-?Running Sessions\s*$/im,
|
|
518350
|
+
/^##\s*Document Generation Strategy\s*$/im,
|
|
518351
|
+
/^##\s*Calculations\s*[—-]\s*Always Execute, Never Guess\s*$/im,
|
|
518352
|
+
/^##\s*Knowledge Gaps\s*[—-]\s*Search, Don't Hallucinate\s*$/im,
|
|
518353
|
+
/^##\s*Self-Awareness( & Introspection)?\s*$/im,
|
|
518354
|
+
/^##\s*Debugging\s*[—-]\s*Observe Before Reasoning\s*$/im
|
|
518355
|
+
];
|
|
518356
|
+
const TOOL_LINES_TO_REMOVE = [
|
|
518357
|
+
/^- nexus:.*$/im,
|
|
518358
|
+
/^- background_run.*task_status.*task_output.*task_stop:.*$/im,
|
|
518359
|
+
/^- (asr_listen|audio_capture|audio_playback|audio_analyze|camera_capture|desktop_click|bluetooth_scan|browser_action):.*$/im,
|
|
518360
|
+
/^Voice\/TTS:.*$/im,
|
|
518361
|
+
/^- Voice\/TTS:.*$/im,
|
|
518362
|
+
/^- Desktop\/Vision:.*$/im,
|
|
518363
|
+
/^- P2P:.*$/im
|
|
518364
|
+
];
|
|
518365
|
+
const CHAT_MODE_BLOCK = /^\*\*CHAT MODE\*\*[\s\S]*?(?=\*\*TASK MODE\*\*)/im;
|
|
518366
|
+
let out = prompt;
|
|
518367
|
+
for (const re of SECTION_HEADERS_TO_REMOVE) {
|
|
518368
|
+
out = out.replace(new RegExp(re.source + "[\\s\\S]*?(?=^##\\s|\\Z)", "im"), "");
|
|
518369
|
+
}
|
|
518370
|
+
for (const re of TOOL_LINES_TO_REMOVE) {
|
|
518371
|
+
out = out.replace(re, "");
|
|
518372
|
+
}
|
|
518373
|
+
out = out.replace(CHAT_MODE_BLOCK, "");
|
|
518374
|
+
out = out.replace(/^\*\*TASK MODE\*\*[^\n]*\n/im, "");
|
|
518375
|
+
out = out.replace(/\n{3,}/g, "\n\n");
|
|
518376
|
+
return out.trim() + "\n";
|
|
518377
|
+
}
|
|
518232
518378
|
function computeTodoReminder(input) {
|
|
518233
518379
|
const turnsSinceWriteThreshold = input.turnsSinceWriteThreshold ?? 10;
|
|
518234
518380
|
const turnsBetweenReminders = input.turnsBetweenReminders ?? 10;
|
|
@@ -518326,6 +518472,7 @@ var init_agenticRunner = __esm({
|
|
|
518326
518472
|
init_dist();
|
|
518327
518473
|
init_personality();
|
|
518328
518474
|
init_promptLoader();
|
|
518475
|
+
init_critic();
|
|
518329
518476
|
init_pressure_gate();
|
|
518330
518477
|
init_dist5();
|
|
518331
518478
|
init_dist7();
|
|
@@ -518638,7 +518785,17 @@ var init_agenticRunner = __esm({
|
|
|
518638
518785
|
async assembleContext(task, context2) {
|
|
518639
518786
|
const sections = [];
|
|
518640
518787
|
const pressureCue = pressureCheck(task);
|
|
518641
|
-
const
|
|
518788
|
+
const rawPrompt = getSystemPromptForTier(this.options.modelTier);
|
|
518789
|
+
const taskModeOn = detectTaskMode(task);
|
|
518790
|
+
const slimmedPrompt = taskModeOn ? slimSystemPromptForTaskMode(rawPrompt) : rawPrompt;
|
|
518791
|
+
const basePrompt = slimmedPrompt + pressureCue;
|
|
518792
|
+
if (taskModeOn) {
|
|
518793
|
+
this.emit({
|
|
518794
|
+
type: "status",
|
|
518795
|
+
content: `REG-19: TASK MODE detected — system prompt slimmed ${rawPrompt.length}→${slimmedPrompt.length} bytes`,
|
|
518796
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
518797
|
+
});
|
|
518798
|
+
}
|
|
518642
518799
|
const _BATCH_GUIDANCE = {
|
|
518643
518800
|
small: "\n\n## Response batching\n\nEmit AT MOST 2 tool calls per response. After observing their results, plan the next 2 in your following response. Smaller batches let the orchestrator deliver cache/failure/progress signals to you between actions. Tool calls beyond the cap are dropped. Use todo_write between batches to mark progress.",
|
|
518644
518801
|
medium: "\n\n## Response batching\n\nEmit AT MOST 4 tool calls per response. After observing their results, plan the next batch in your following response. Smaller batches let the orchestrator deliver cache/failure/progress signals to you between actions. Tool calls beyond the cap are dropped. Use todo_write between batches to mark progress.",
|
|
@@ -520556,6 +520713,20 @@ TASK: ${task}` : task;
|
|
|
520556
520713
|
const STAG_FAILURE_THRESHOLD = 5;
|
|
520557
520714
|
const STAG_VARIANT_THRESHOLD = 4;
|
|
520558
520715
|
const STAG_FILES_DELTA_MIN = 3;
|
|
520716
|
+
let injectionsThisTurn = 0;
|
|
520717
|
+
const INJECTION_BUDGET_SOFT = 2;
|
|
520718
|
+
const deferredSoftInjections = [];
|
|
520719
|
+
const pushSoftInjection = (role, content) => {
|
|
520720
|
+
if (injectionsThisTurn < INJECTION_BUDGET_SOFT) {
|
|
520721
|
+
messages2.push({ role, content });
|
|
520722
|
+
injectionsThisTurn++;
|
|
520723
|
+
return true;
|
|
520724
|
+
}
|
|
520725
|
+
if (deferredSoftInjections.length < 6) {
|
|
520726
|
+
deferredSoftInjections.push({ role, content });
|
|
520727
|
+
}
|
|
520728
|
+
return false;
|
|
520729
|
+
};
|
|
520559
520730
|
for (let turn = 0; turn < this.options.maxTurns; turn++) {
|
|
520560
520731
|
clearTurnState(this._appState);
|
|
520561
520732
|
this._maybeApplyThinkGuard();
|
|
@@ -520570,6 +520741,12 @@ TASK: ${task}` : task;
|
|
|
520570
520741
|
this.emit({ type: "error", content: "Task aborted by user", timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
520571
520742
|
break;
|
|
520572
520743
|
}
|
|
520744
|
+
injectionsThisTurn = 0;
|
|
520745
|
+
while (deferredSoftInjections.length > 0 && injectionsThisTurn < INJECTION_BUDGET_SOFT) {
|
|
520746
|
+
const next = deferredSoftInjections.shift();
|
|
520747
|
+
messages2.push({ role: next.role, content: next.content });
|
|
520748
|
+
injectionsThisTurn++;
|
|
520749
|
+
}
|
|
520573
520750
|
if (turn > stagnationCooldownUntilTurn && stagnationWindow.length >= STAG_MIN_SAMPLES) {
|
|
520574
520751
|
const cutoffTurn = turn - STAG_WINDOW_TURNS;
|
|
520575
520752
|
const cutoffTs = Date.now() - STAG_WINDOW_MS;
|
|
@@ -520582,53 +520759,30 @@ TASK: ${task}` : task;
|
|
|
520582
520759
|
for (const s2 of stagnationWindow)
|
|
520583
520760
|
for (const p2 of s2.filesTouchedThisTurn)
|
|
520584
520761
|
fileSet.add(p2);
|
|
520585
|
-
const filesDelta = fileSet.size;
|
|
520586
|
-
const failureSum = stagnationWindow.reduce((a2, s2) => a2 + s2.failuresThisTurn, 0);
|
|
520587
520762
|
const variantSet = /* @__PURE__ */ new Set();
|
|
520588
520763
|
for (const s2 of stagnationWindow)
|
|
520589
520764
|
for (const p2 of s2.shellPrefixesThisTurn)
|
|
520590
520765
|
variantSet.add(p2);
|
|
520591
|
-
const
|
|
520592
|
-
|
|
520593
|
-
|
|
520594
|
-
|
|
520595
|
-
|
|
520596
|
-
|
|
520597
|
-
|
|
520598
|
-
|
|
520599
|
-
|
|
520600
|
-
|
|
520601
|
-
|
|
520602
|
-
|
|
520603
|
-
|
|
520604
|
-
|
|
520605
|
-
|
|
520606
|
-
|
|
520607
|
-
``,
|
|
520608
|
-
`1. READ THE FULL ERROR — re-read your most recent failure output ENTIRELY. If it's in a log packet, call log_explore({op:"errors"}) then log_explore({op:"lines", start:..., end:...}) for context. Do not skim.`,
|
|
520609
|
-
``,
|
|
520610
|
-
`2. STATE A HYPOTHESIS in writing — what specifically is wrong? "I think X is failing because Y." Be concrete. Do NOT propose a fix yet.`,
|
|
520611
|
-
``,
|
|
520612
|
-
`3. VERIFY ONE ASSUMPTION — pick the ONE thing you most BELIEVE to be true and test it with the smallest possible command:`,
|
|
520613
|
-
` • If you think a package is installed: ls node_modules/<name>/package.json`,
|
|
520614
|
-
` • If you think an env var is set: printenv <NAME>`,
|
|
520615
|
-
` • If you think a file imports correctly: head -5 <file>`,
|
|
520616
|
-
` • If you don't know what an error means: web_search("<exact error string>")`,
|
|
520617
|
-
``,
|
|
520618
|
-
`4. CHECK SILENT FAILURES — npm install reporting "added N packages" does NOT mean ALL declared deps installed; npm sometimes drops packages with peer-dep conflicts without erroring. Verify each expected dep individually.`,
|
|
520619
|
-
``,
|
|
520620
|
-
`DO NOT in your next response:`,
|
|
520621
|
-
` • Try another version, flag, or variant of any command in the list above`,
|
|
520622
|
-
` • Wipe node_modules / re-install — that hides the original error`,
|
|
520623
|
-
` • Call task_complete — being stuck on a debug problem is NEVER grounds for task_complete`,
|
|
520624
|
-
``,
|
|
520625
|
-
`task_complete is ONLY for actual completion or unrecoverable hardware/permission errors. You are stuck on a fixable problem; diagnose it.`
|
|
520626
|
-
].join("\n");
|
|
520627
|
-
messages2.push({ role: "system", content: stagMsg });
|
|
520766
|
+
const failureSum = stagnationWindow.reduce((a2, s2) => a2 + s2.failuresThisTurn, 0);
|
|
520767
|
+
const signals = {
|
|
520768
|
+
completedDelta,
|
|
520769
|
+
filesDelta: fileSet.size,
|
|
520770
|
+
failureSum,
|
|
520771
|
+
variantCount: variantSet.size,
|
|
520772
|
+
windowSamples: stagnationWindow.length,
|
|
520773
|
+
variantList: [...variantSet]
|
|
520774
|
+
};
|
|
520775
|
+
if (isStagnant(signals, {
|
|
520776
|
+
failureThreshold: STAG_FAILURE_THRESHOLD,
|
|
520777
|
+
variantThreshold: STAG_VARIANT_THRESHOLD,
|
|
520778
|
+
filesDeltaMin: STAG_FILES_DELTA_MIN,
|
|
520779
|
+
minSamples: STAG_MIN_SAMPLES
|
|
520780
|
+
})) {
|
|
520781
|
+
messages2.push({ role: "system", content: buildStagnationDiagnostic(signals) });
|
|
520628
520782
|
stagnationCooldownUntilTurn = turn + 5;
|
|
520629
520783
|
this.emit({
|
|
520630
520784
|
type: "status",
|
|
520631
|
-
content: `STAGNATION DETECTED — injected diagnostic mode at turn ${turn} (${variantCount} variants, ${failureSum} failures, ${filesDelta} files in window)`,
|
|
520785
|
+
content: `STAGNATION DETECTED — injected diagnostic mode at turn ${turn} (${signals.variantCount} variants, ${signals.failureSum} failures, ${signals.filesDelta} files in window)`,
|
|
520632
520786
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
520633
520787
|
});
|
|
520634
520788
|
}
|
|
@@ -520817,11 +520971,8 @@ Now call file_write with YOUR skeleton for this task.`
|
|
|
520817
520971
|
if (toolHints.length > 0) {
|
|
520818
520972
|
toolHints.sort((a2, b) => b.score - a2.score);
|
|
520819
520973
|
const top = toolHints.slice(0, 5);
|
|
520820
|
-
|
|
520821
|
-
|
|
520822
|
-
content: `[Relevant tools for this task]
|
|
520823
|
-
${top.map((t2) => `- ${t2.name}: ${t2.desc}`).join("\n")}`
|
|
520824
|
-
});
|
|
520974
|
+
pushSoftInjection("system", `[Relevant tools for this task]
|
|
520975
|
+
${top.map((t2) => `- ${t2.name}: ${t2.desc}`).join("\n")}`);
|
|
520825
520976
|
}
|
|
520826
520977
|
}
|
|
520827
520978
|
if (turn === 0 && (turnTier === "small" || turnTier === "medium")) {
|
|
@@ -520845,11 +520996,8 @@ ${top.map((t2) => `- ${t2.name}: ${t2.desc}`).join("\n")}`
|
|
|
520845
520996
|
}
|
|
520846
520997
|
hints.push("EFFICIENCY: Aim for 3-5 tool calls total. Each call should make measurable progress. Do not repeat a tool call with the same arguments.");
|
|
520847
520998
|
if (hints.length > 0) {
|
|
520848
|
-
|
|
520849
|
-
|
|
520850
|
-
content: `[Efficiency Guide]
|
|
520851
|
-
${hints.join("\n")}`
|
|
520852
|
-
});
|
|
520999
|
+
pushSoftInjection("system", `[Efficiency Guide]
|
|
521000
|
+
${hints.join("\n")}`);
|
|
520853
521001
|
}
|
|
520854
521002
|
}
|
|
520855
521003
|
if (turn === 0 && (turnTier === "small" || turnTier === "medium")) {
|
|
@@ -520859,21 +521007,18 @@ ${hints.join("\n")}`
|
|
|
520859
521007
|
const hasMultiStepRequirement = taskGoal.length > 200 && (taskGoal.match(/\d\./g) || []).length >= 2;
|
|
520860
521008
|
const isAnalysisTask = (taskGoal.match(/\banalyze\b|\baudit\b|\breview\b|\bdiagnose\b|\binvestigate\b|\bcompare\b|\bevaluate\b/gi) || []).length >= 1;
|
|
520861
521009
|
if (hasMultiplePremises || hasConditionalLogic || hasMultiStepRequirement || isAnalysisTask) {
|
|
520862
|
-
|
|
520863
|
-
|
|
520864
|
-
|
|
520865
|
-
|
|
520866
|
-
|
|
520867
|
-
|
|
520868
|
-
|
|
520869
|
-
|
|
520870
|
-
|
|
520871
|
-
|
|
520872
|
-
|
|
520873
|
-
|
|
520874
|
-
"4. Before your final answer, verify: does each conclusion follow from the evidence?"
|
|
520875
|
-
].join("\n")
|
|
520876
|
-
});
|
|
521010
|
+
pushSoftInjection("system", [
|
|
521011
|
+
"[Structured Reasoning Guide]",
|
|
521012
|
+
"This task requires multi-step reasoning. Follow this structure:",
|
|
521013
|
+
"",
|
|
521014
|
+
"1. DECOMPOSE: List the sub-questions this task requires, from simplest to most complex.",
|
|
521015
|
+
"2. For each sub-question:",
|
|
521016
|
+
" a. State what you KNOW (verified from evidence/tool output)",
|
|
521017
|
+
" b. State what you ASSUME (hypotheses not yet confirmed)",
|
|
521018
|
+
" c. Derive your conclusion using ONLY verified facts",
|
|
521019
|
+
"3. If a tool result contradicts your earlier reasoning, UPDATE your conclusions — don't ignore new evidence.",
|
|
521020
|
+
"4. Before your final answer, verify: does each conclusion follow from the evidence?"
|
|
521021
|
+
].join("\n"));
|
|
520877
521022
|
}
|
|
520878
521023
|
}
|
|
520879
521024
|
const turnBudget = turnTier === "small" ? 5 : turnTier === "medium" ? 8 : 0;
|
|
@@ -521442,16 +521587,6 @@ ${memoryLines.join("\n")}`
|
|
|
521442
521587
|
toolCallBudget.set(tc.name, budgetRemaining - 1);
|
|
521443
521588
|
}
|
|
521444
521589
|
const toolFingerprint = `${tc.name}:${argsKey}`;
|
|
521445
|
-
if (this._littlemanRedundantBlocks.has(toolFingerprint)) {
|
|
521446
|
-
this._littlemanRedundantBlocks.delete(toolFingerprint);
|
|
521447
|
-
const cachedEntry2 = recentToolResults.get(toolFingerprint);
|
|
521448
|
-
this.emit({ type: "tool_call", toolName: tc.name, toolArgs: tc.arguments, turn, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
521449
|
-
const blockMsg = cachedEntry2 ? `[BLOCKED — this tool+args already succeeded. Re-served from cache:]
|
|
521450
|
-
|
|
521451
|
-
${cachedEntry2.result.slice(0, 500)}` : `[BLOCKED — the observer confirmed this tool already succeeded with these arguments on a prior turn. Do NOT re-run. Use your prior findings to proceed.]`;
|
|
521452
|
-
this.emit({ type: "tool_result", toolName: tc.name, success: true, content: blockMsg.slice(0, 100), turn, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
521453
|
-
return { tc, output: blockMsg };
|
|
521454
|
-
}
|
|
521455
521590
|
const baseIsReadLike = ![
|
|
521456
521591
|
"file_write",
|
|
521457
521592
|
"file_edit",
|
|
@@ -521468,22 +521603,53 @@ ${cachedEntry2.result.slice(0, 500)}` : `[BLOCKED — the observer confirmed thi
|
|
|
521468
521603
|
"sub_agent",
|
|
521469
521604
|
"priority_delegate",
|
|
521470
521605
|
"ask_user",
|
|
521471
|
-
// WO-TASK-02 — todo_write is a state-write tool. Calling it twice
|
|
521472
|
-
// with the same args is idempotent (it just re-stores the same
|
|
521473
|
-
// list) but the dedup detector was flagging it as a wasted call
|
|
521474
|
-
// and blocking the planning workflow. The agent uses todo_write
|
|
521475
|
-
// as its primary checkpoint mechanism so it MUST always execute.
|
|
521476
521606
|
"todo_write",
|
|
521477
|
-
// nexus is also a state tool — connect is idempotent but the
|
|
521478
|
-
// dedup warning was causing confused agents to bail out to
|
|
521479
|
-
// shell workarounds (npm install, find /bin, etc.) when they
|
|
521480
|
-
// saw "DUPLICATE CALL" after their first connect. Let the
|
|
521481
|
-
// tool see every call and return the cached state itself.
|
|
521482
521607
|
"nexus"
|
|
521483
521608
|
].includes(tc.name);
|
|
521484
521609
|
const isReadLike = baseIsReadLike || tc.name === "shell" && this._isShellCommandReadOnly(tc.arguments?.["command"] ?? tc.arguments?.["cmd"] ?? "");
|
|
521485
|
-
const
|
|
521486
|
-
if (
|
|
521610
|
+
const observerRedundantBlock = this._littlemanRedundantBlocks.has(toolFingerprint);
|
|
521611
|
+
if (observerRedundantBlock) {
|
|
521612
|
+
this._littlemanRedundantBlocks.delete(toolFingerprint);
|
|
521613
|
+
}
|
|
521614
|
+
const criticDecision = evaluate({
|
|
521615
|
+
proposedCall: { tool: tc.name, args: tc.arguments ?? {} },
|
|
521616
|
+
fingerprint: toolFingerprint,
|
|
521617
|
+
isReadLike,
|
|
521618
|
+
recentToolResults,
|
|
521619
|
+
dedupHitCount,
|
|
521620
|
+
recentFailures: this._recentFailures.map((f2) => ({
|
|
521621
|
+
fingerprint: f2.fingerprint,
|
|
521622
|
+
toolName: f2.tool,
|
|
521623
|
+
errorPreview: (f2.error || f2.output || "").slice(0, 200)
|
|
521624
|
+
})),
|
|
521625
|
+
stagnationSignals: null,
|
|
521626
|
+
// stagnation gate handled at top-of-turn
|
|
521627
|
+
stagnationGateActive: false,
|
|
521628
|
+
observerRedundantBlock
|
|
521629
|
+
});
|
|
521630
|
+
if (criticDecision.decision === "observer_block") {
|
|
521631
|
+
this.emit({ type: "tool_call", toolName: tc.name, toolArgs: tc.arguments, turn, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
521632
|
+
const blockMsg = criticDecision.cachedResult ? `[BLOCKED — this tool+args already succeeded. Re-served from cache:]
|
|
521633
|
+
|
|
521634
|
+
${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confirmed this tool already succeeded with these arguments on a prior turn. Do NOT re-run. Use your prior findings to proceed.]`;
|
|
521635
|
+
this.emit({ type: "tool_result", toolName: tc.name, success: true, content: blockMsg.slice(0, 100), turn, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
521636
|
+
return { tc, output: blockMsg };
|
|
521637
|
+
}
|
|
521638
|
+
if (criticDecision.decision === "force_progress_block") {
|
|
521639
|
+
dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
|
|
521640
|
+
this.emit({ type: "tool_call", toolName: tc.name, toolArgs: tc.arguments, turn, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
521641
|
+
this.emit({
|
|
521642
|
+
type: "tool_result",
|
|
521643
|
+
toolName: tc.name,
|
|
521644
|
+
success: false,
|
|
521645
|
+
content: criticDecision.blockMessage.slice(0, 120),
|
|
521646
|
+
turn,
|
|
521647
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
521648
|
+
});
|
|
521649
|
+
return { tc, output: criticDecision.blockMessage };
|
|
521650
|
+
}
|
|
521651
|
+
if (criticDecision.decision === "serve_cached") {
|
|
521652
|
+
dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
|
|
521487
521653
|
this.emit({
|
|
521488
521654
|
type: "tool_call",
|
|
521489
521655
|
toolName: tc.name,
|
|
@@ -521491,36 +521657,13 @@ ${cachedEntry2.result.slice(0, 500)}` : `[BLOCKED — the observer confirmed thi
|
|
|
521491
521657
|
turn,
|
|
521492
521658
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
521493
521659
|
});
|
|
521494
|
-
const
|
|
521495
|
-
dedupHitCount.set(toolFingerprint, hits);
|
|
521496
|
-
const threshold = tc.name === "shell" ? 2 : DEDUP_ESCALATION_THRESHOLD;
|
|
521497
|
-
if (hits >= threshold) {
|
|
521498
|
-
const argPreview = JSON.stringify(tc.arguments ?? {}).slice(0, 200);
|
|
521499
|
-
const blockMsg = `[FORCED PROGRESS BLOCK — you have called ${tc.name}(${argPreview}) ${hits} times with identical arguments and received the cached result each time. The data is not changing. You are stuck in a read-only loop instead of advancing the plan.
|
|
521500
|
-
|
|
521501
|
-
REQUIRED before this tool will run again with these arguments:
|
|
521502
|
-
• file_write or file_edit, OR
|
|
521503
|
-
• todo_write that advances the plan, OR
|
|
521504
|
-
• task_complete (if all phases are done).
|
|
521505
|
-
|
|
521506
|
-
If you genuinely need this same data again, call a DIFFERENT tool first (one of the three above). Until then, refer to your conversation history — the result of this exact call is already there.]`;
|
|
521507
|
-
this.emit({
|
|
521508
|
-
type: "tool_result",
|
|
521509
|
-
toolName: tc.name,
|
|
521510
|
-
success: false,
|
|
521511
|
-
content: blockMsg.slice(0, 120),
|
|
521512
|
-
turn,
|
|
521513
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
521514
|
-
});
|
|
521515
|
-
return { tc, output: blockMsg };
|
|
521516
|
-
}
|
|
521517
|
-
const header = cachedEntry.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. No need to call this tool again.]
|
|
521660
|
+
const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. No need to call this tool again.]
|
|
521518
521661
|
|
|
521519
|
-
` : `[DUPLICATE CALL #${
|
|
521662
|
+
` : `[DUPLICATE CALL #${criticDecision.hitNumber} — you already called ${tc.name} with these exact arguments. The result is identical. Do NOT call this again. Use the data you already have to make progress. One more identical call will trigger a hard progress block.]
|
|
521520
521663
|
|
|
521521
521664
|
`;
|
|
521522
|
-
const truncatedCache =
|
|
521523
|
-
... [${
|
|
521665
|
+
const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
|
|
521666
|
+
... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
|
|
521524
521667
|
const dedupOutput = header + truncatedCache;
|
|
521525
521668
|
this.emit({
|
|
521526
521669
|
type: "tool_result",
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "open-agents-ai",
|
|
3
|
-
"version": "0.187.
|
|
3
|
+
"version": "0.187.477",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "open-agents-ai",
|
|
9
|
-
"version": "0.187.
|
|
9
|
+
"version": "0.187.477",
|
|
10
10
|
"hasInstallScript": true,
|
|
11
11
|
"license": "CC-BY-NC-4.0",
|
|
12
12
|
"dependencies": {
|
package/package.json
CHANGED