open-agents-ai 0.187.574 → 0.187.576

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -515299,26 +515299,10 @@ function summarizeMAST(tags) {
515299
515299
  }
515300
515300
  return { byMode, byCategory, total: tags.length };
515301
515301
  }
515302
- var MAST_CATEGORY;
515303
515302
  var init_mast_tagger = __esm({
515304
515303
  "packages/orchestrator/dist/mast-tagger.js"() {
515305
515304
  "use strict";
515306
515305
  init_reflection();
515307
- MAST_CATEGORY = {
515308
- spec_disobedience: "specification_design",
515309
- step_repetition: "specification_design",
515310
- history_loss: "specification_design",
515311
- completion_unrecognized: "specification_design",
515312
- input_ignored: "inter_agent_misalignment",
515313
- proceeded_without_clarify: "inter_agent_misalignment",
515314
- conversation_reset: "inter_agent_misalignment",
515315
- reasoning_action_mismatch: "inter_agent_misalignment",
515316
- premature_termination: "task_verification_termination",
515317
- validation_skipped: "task_verification_termination",
515318
- shallow_check_accepted: "task_verification_termination",
515319
- premature_task_complete: "task_verification_termination",
515320
- other: "specification_design"
515321
- };
515322
515306
  }
515323
515307
  });
515324
515308
 
@@ -528102,15 +528086,16 @@ var init_agenticRunner = __esm({
528102
528086
  // a phase's worth of work without recording progress — and on the next
528103
528087
  // turn will replay the same plan. Surface a nudge before that happens.
528104
528088
  _writesSinceLastTodoWrite = 0;
528105
- // REG-12: Progress gate (root-cause enforcement). When ≥6 file writes
528106
- // have happened without a todo_write call, this latch flips ON. While
528107
- // the latch is on, every tool call EXCEPT todo_write/todo_read/
528108
- // task_complete/ask_user is intercepted with a synthetic '[PROGRESS GATE]'
528109
- // result that forces the agent to update its plan before continuing.
528110
- // Released when todo_write fires successfully. Without this, the agent
528111
- // can re-emit the same plan a second time (plan-replay) and execute
528112
- // duplicate work because PROGRESS NUDGE alone is informational.
528089
+ // REG-12: Progress advisory latch. When ≥6 file writes have happened
528090
+ // without a todo_write call, this latch flips ON. While active, non-todo
528091
+ // tool calls receive model-visible system guidance, but still execute.
528092
+ // Released when todo_write fires successfully. Without this, the agent can
528093
+ // re-emit the same plan a second time (plan-replay) and duplicate work.
528113
528094
  _progressGateActive = false;
528095
+ // Consecutive advisory count. When the model ignores stale progress state,
528096
+ // this counter increments. ≥2 triggers a system message escalation to break
528097
+ // pattern-lock loops.
528098
+ _consecutiveProgressAdvisories = 0;
528114
528099
  // REG-5: Rolling buffer of recent tool failures with their error output.
528115
528100
  // Surfaced before every LLM call so the agent can't ignore "I just ran this
528116
528101
  // and it errored". Detects same-fingerprint failure repetition and escalates
@@ -528340,19 +528325,12 @@ var init_agenticRunner = __esm({
528340
528325
  // explicitly excludes todo_write/memory_write/list_directory.
528341
528326
  _reg61CooldownUntilTurn = -1;
528342
528327
  // BFC-61.G (root-cause from batch529-midi-coerce, 2026-05-03): REG-61
528343
- // PERPETUAL gate until obeyed. Replaces BFC-61.E's one-shot counter,
528344
- // which empirically warned but did not change behavior agents took
528345
- // the [BLOCKED] tool_result and on the very next turn issued ANOTHER
528346
- // read (counter cleared, gate inert). batch529 measured 6 blocks across
528347
- // 7 fires; ZERO of those blocks were followed by a creative edit on
528348
- // the agent's next turn.
528349
- //
528350
- // Semantics: when REG-61 fires, this latch goes true. While true, every
528351
- // non-bypass tool call gets BLOCKED with a synthetic error result. The
528352
- // bypass set includes the 4 creative-edit tools plus task_complete /
528353
- // ask_user / explicit web-task escape hatches. Any creative edit dispatch
528328
+ // persistent advisory until obeyed. Replaces BFC-61.E's one-shot nudge,
528329
+ // which empirically warned but did not change behavior. When REG-61 fires,
528330
+ // this latch goes true. While true, every non-bypass tool call receives
528331
+ // model-visible guidance and still executes. Any creative edit dispatch
528354
528332
  // clears the latch ("directive satisfied"). Shell, file_read, todo_*,
528355
- // grep_search, list_directory etc. are NOT in bypass those are the
528333
+ // grep_search, list_directory etc. are NOT in bypass because those are the
528356
528334
  // exact patterns batch528/529 agents used to ignore REG-61.
528357
528335
  //
528358
528336
  // Kill switch: OA_DISABLE_REG61_COERCE=1 disables BOTH set and enforce.
@@ -528360,15 +528338,18 @@ var init_agenticRunner = __esm({
528360
528338
  // DECOMP-2 (root-cause from batch531-midi-decomp, 2026-05-03): compelling
528361
528339
  // sub_agent delegation. DECOMP-1's informational directive was ignored
528362
528340
  // (0 sub_agent calls in 466 tool-call run despite directive at turn 1).
528363
- // Mirrors the BFC-61.G escalation arc: when the agent has edited
528364
- // ≥THRESHOLD distinct files in main context WITHOUT successful sub_agent,
528365
- // the dispatcher BLOCKS edits to NEW files (paths not yet edited) until
528366
- // sub_agent succeeds. Edits to already-touched files are still allowed
528367
- // (current-module finishing work). Failed or malformed delegation does
528368
- // not clear the gate.
528341
+ // Mirrors the BFC-61.G escalation arc, but must not deadlock delivery:
528342
+ // when the agent has edited adaptive-threshold distinct files in main
528343
+ // context WITHOUT successful sub_agent, the dispatcher injects guidance
528344
+ // for NEW files (paths not yet edited) while still allowing the edit.
528345
+ // Repeated failed delegation attempts unlock a main-context fallback so the
528346
+ // guardrail cannot become a hard write-deadlock when sub_agent itself is
528347
+ // broken or unavailable.
528369
528348
  // Kill switch: OA_DISABLE_DECOMP2=1.
528370
528349
  _decomp2MainContextFiles = /* @__PURE__ */ new Set();
528371
528350
  _decomp2SubAgentCalls = 0;
528351
+ _decomp2FailedDelegationCalls = 0;
528352
+ _decomp2FallbackAllowed = false;
528372
528353
  _decomp2GateActive = false;
528373
528354
  // MEM_PATH item #9: adaptive retrieval cache. When the (goalHash, recent-tool-sig)
528374
528355
  // hasn't changed since last retrieval, skip the PPR call entirely and reuse
@@ -528893,15 +528874,12 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
528893
528874
  });
528894
528875
  }
528895
528876
  /**
528896
- * DECOMP-2 PRE-dispatch enforcement check. Returns a synthetic block-message
528897
- * string when the gate should reject this dispatch; null to proceed normally.
528877
+ * DECOMP-2 PRE-dispatch advisory check. Returns a model-visible guidance
528878
+ * string when the file-spread advisory should surface; null to proceed
528879
+ * without extra guidance. Callers inject the string as system guidance and
528880
+ * then run the requested tool normally.
528898
528881
  *
528899
- * Caller logic differs between main-loop and brute-force dispatchers:
528900
- * - Main loop returns the block as a synthetic { tc, output } result.
528901
- * - Brute-force emits tool_result + pushes tool message + `continue`.
528902
- * Both invoke this method to compute the decision.
528903
- *
528904
- * Conditions for blocking:
528882
+ * Conditions for surfacing guidance:
528905
528883
  * - `_decomp2GateActive` is true (set by `_trackDecomp2` when threshold crossed)
528906
528884
  * - tool is one of the 4 creative-edit tools
528907
528885
  * - the path is NOT in `_decomp2MainContextFiles` (i.e. it's a NEW file)
@@ -528910,9 +528888,11 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
528910
528888
  * Already-touched paths pass through (current-module finishing work allowed).
528911
528889
  * sub_agent / task_complete / explicit web-task tools pass through (not creative-edit tools).
528912
528890
  */
528913
- _maybeDecomp2Block(tc, turn) {
528891
+ _maybeDecomp2Advisory(tc, turn) {
528914
528892
  if (!this._decomp2GateActive)
528915
528893
  return null;
528894
+ if (this._decomp2FallbackAllowed)
528895
+ return null;
528916
528896
  if (process.env["OA_DISABLE_DECOMP2"] === "1")
528917
528897
  return null;
528918
528898
  const _editTools = /* @__PURE__ */ new Set([
@@ -528931,9 +528911,9 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
528931
528911
  const _moreFiles = this._decomp2MainContextFiles.size > 8 ? `
528932
528912
  ... +${this._decomp2MainContextFiles.size - 8} more` : "";
528933
528913
  const decomp2Msg = [
528934
- `[BLOCKED — DECOMP-2 main-context exhaustion]`,
528914
+ `[DECOMP-2 advisory — main-context file spread]`,
528935
528915
  ``,
528936
- `You have already edited ${this._decomp2MainContextFiles.size} distinct files in main context without invoking sub_agent. Continuing to edit ANOTHER new file ('${_editPath}') will keep your context window saturated and trigger compaction thrashing.`,
528916
+ `You have already edited ${this._decomp2MainContextFiles.size} distinct files in main context without a successful sub_agent. Continuing to edit another new file ('${_editPath}') may keep your context window saturated and trigger compaction thrashing.`,
528937
528917
  ``,
528938
528918
  `Files you've already edited (will accept further edits to these):`,
528939
528919
  _filesList,
@@ -528949,36 +528929,40 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
528949
528929
  ` })`,
528950
528930
  ` 3. After sub_agent returns, mark the todo completed.`,
528951
528931
  ``,
528932
+ `If sub_agent keeps failing for reasons outside the module work, retry it once with corrected arguments. After repeated failed delegation attempts, OA will keep this as advisory-only fallback so file writes can continue.`,
528933
+ ``,
528952
528934
  `Why this matters: spreading edits across N files in main context burns ~N × file_size tokens. sub_agent gives the next module a focused context window.`,
528953
528935
  ``,
528954
528936
  `If you have ALREADY edited '${_editPath}' (this is a continuation), the orchestrator's set must have missed it — call file_read to verify, then re-edit. Otherwise, dispatch sub_agent now.`
528955
528937
  ].join("\n");
528956
528938
  this.emit({
528957
528939
  type: "status",
528958
- content: `DECOMP-2 NEW-FILE BLOCK rejected ${tc.name}('${_editPath}') at turn ${turn}; gate stays active until sub_agent succeeds`,
528940
+ content: `DECOMP-2 ADVISORY — ${tc.name}('${_editPath}') at turn ${turn}; tool allowed, consider sub_agent for the next module`,
528959
528941
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
528960
528942
  });
528961
- this._tagSyntheticFailure({
528962
- mode: "step_repetition",
528963
- rationale: `DECOMP-2 new-file block on '${tc.name}'(${_editPath}) — agent has spread edits across ${this._decomp2MainContextFiles.size} files without sub_agent`
528964
- });
528965
528943
  return decomp2Msg;
528966
528944
  }
528945
+ _decomp2FileSpreadThreshold() {
528946
+ const ctx3 = this.options.contextWindowSize ?? 0;
528947
+ if (ctx3 <= 0)
528948
+ return 5;
528949
+ return Math.max(5, Math.min(30, Math.round(ctx3 / 6400)));
528950
+ }
528967
528951
  /**
528968
528952
  * DECOMP-2 post-dispatch tracking. Refactored from inline so both the
528969
528953
  * main turn loop AND the brute-force re-engagement inner loop record
528970
528954
  * edits / sub_agent calls and check the gate-activation threshold.
528971
528955
  * Without this method on both paths, batch532 measured 14 distinct
528972
528956
  * files edited but DECOMP-2 never activated — because the main loop
528973
- * exited via blocked task_complete and all subsequent edits flowed
528974
- * through the brute-force dispatch which had no tracking.
528957
+ * entered brute-force after task_complete review and all subsequent edits
528958
+ * flowed through the brute-force dispatch which had no tracking.
528975
528959
  *
528976
528960
  * Side effects when fired:
528977
528961
  * - On successful creative edit: adds path to `_decomp2MainContextFiles`,
528978
- * possibly activates `_decomp2GateActive` (emits status).
528962
+ * possibly activates `_decomp2GateActive` (emits advisory status).
528979
528963
  * - On successful sub_agent / priority_delegate / background_run:
528980
- * increments counter, clears gate (emits status). Failed or malformed
528981
- * delegation attempts do not satisfy the gate.
528964
+ * increments counter, clears advisory (emits status). Failed or
528965
+ * malformed delegation attempts do not satisfy the advisory.
528982
528966
  *
528983
528967
  * Pure post-dispatch: caller invokes AFTER the tool result is in hand,
528984
528968
  * regardless of which loop the dispatch happened in.
@@ -528990,12 +528974,12 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
528990
528974
  const _editPaths = this._extractToolTargetPaths(tc.name, tc.arguments, result);
528991
528975
  for (const _editPath of _editPaths) {
528992
528976
  this._decomp2MainContextFiles.add(_editPath);
528993
- const DECOMP2_FILE_SPREAD_THRESHOLD = 5;
528994
- if (!this._decomp2GateActive && this._decomp2MainContextFiles.size >= DECOMP2_FILE_SPREAD_THRESHOLD && this._decomp2SubAgentCalls === 0) {
528977
+ const DECOMP2_FILE_SPREAD_THRESHOLD = this._decomp2FileSpreadThreshold();
528978
+ if (!this._decomp2GateActive && !this._decomp2FallbackAllowed && this._decomp2MainContextFiles.size >= DECOMP2_FILE_SPREAD_THRESHOLD && this._decomp2SubAgentCalls === 0) {
528995
528979
  this._decomp2GateActive = true;
528996
528980
  this.emit({
528997
528981
  type: "status",
528998
- content: `DECOMP-2 NEW-FILE GATE ACTIVATED — ${this._decomp2MainContextFiles.size} distinct files edited in main context, 0 successful sub_agent calls; further edits to NEW files will be blocked until sub_agent succeeds`,
528982
+ content: `DECOMP-2 ADVISORY ACTIVATED — ${this._decomp2MainContextFiles.size} distinct files edited in main context, 0 successful sub_agent calls, threshold=${DECOMP2_FILE_SPREAD_THRESHOLD}; further edits to NEW files remain allowed, but sub_agent is recommended for the next module`,
528999
528983
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
529000
528984
  });
529001
528985
  }
@@ -529004,20 +528988,32 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
529004
528988
  if (tc.name === "sub_agent" || tc.name === "priority_delegate" || tc.name === "background_run") {
529005
528989
  if (result?.success !== true) {
529006
528990
  if (this._decomp2GateActive) {
528991
+ this._decomp2FailedDelegationCalls++;
529007
528992
  this.emit({
529008
528993
  type: "status",
529009
528994
  content: `DECOMP-2 DELEGATION FAILED — '${tc.name}' did not clear gate at turn ${turn}; fix delegation arguments/result before editing another new file`,
529010
528995
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
529011
528996
  });
528997
+ if (this._decomp2FailedDelegationCalls >= 2) {
528998
+ this._decomp2FallbackAllowed = true;
528999
+ this._decomp2GateActive = false;
529000
+ this.emit({
529001
+ type: "status",
529002
+ content: `DECOMP-2 FALLBACK UNLOCKED — ${this._decomp2FailedDelegationCalls} failed delegation attempts while gate was active; allowing main-context new-file edits so work can continue`,
529003
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
529004
+ });
529005
+ }
529012
529006
  }
529013
529007
  return;
529014
529008
  }
529015
529009
  this._decomp2SubAgentCalls++;
529010
+ this._decomp2FailedDelegationCalls = 0;
529011
+ this._decomp2FallbackAllowed = false;
529016
529012
  if (this._decomp2GateActive) {
529017
529013
  this._decomp2GateActive = false;
529018
529014
  this.emit({
529019
529015
  type: "status",
529020
- content: `DECOMP-2 GATE CLEARED — '${tc.name}' satisfied delegation directive at turn ${turn}`,
529016
+ content: `DECOMP-2 ADVISORY CLEARED — '${tc.name}' satisfied delegation directive at turn ${turn}`,
529021
529017
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
529022
529018
  });
529023
529019
  }
@@ -529027,7 +529023,7 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
529027
529023
  * REG-66 Debug-Loop Detection (root-cause from batch535-midi, 2026-05-04).
529028
529024
  *
529029
529025
  * Empirical: midi run had 11x `npm run build 2>&1` + same 5 files re-read
529030
- * 5-6 times each + 22 BFC-61.G coercion BLOCKS — and ZERO of those blocks
529026
+ * 5-6 times each + 22 BFC-61.G coercion advisories — and ZERO of those
529031
529027
  * resulted in a creative edit. The agent was rationally stuck: it
529032
529028
  * believed it needed to read more to debug, the build command kept
529033
529029
  * giving the same error, and the standard "issue an edit" directive
@@ -529040,7 +529036,7 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
529040
529036
  * different output. Without this signal we'd just keep telling the
529041
529037
  * agent to "make an edit" — which is exactly what it can't think of.
529042
529038
  *
529043
- * When detected, the BFC-61.G block message swaps to a PERTURB-strategy
529039
+ * When detected, the BFC-61.G advisory message swaps to a PERTURB-strategy
529044
529040
  * directive: stop reading, change ONE thing in the most-likely-culprit
529045
529041
  * file even if you're uncertain, and let the new error signal guide
529046
529042
  * the next iteration. This is real human debugging strategy ("perturb
@@ -529177,27 +529173,6 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
529177
529173
  * name with objective evidence, complete remaining items in order, update the
529178
529174
  * checklist via todo_write, and only then call task_complete.
529179
529175
  */
529180
- /**
529181
- * REG-39c: tag a SYNTHETIC failure (FORCED PROGRESS BLOCK / observer
529182
- * block / budget exhausted). These paths return early from
529183
- * executeSingle BEFORE the main result-handling code, so the normal
529184
- * MAST tagging miss them. This helper lets each return-early site
529185
- * record a tag directly. Push-only — keeps the tag buffer bounded
529186
- * to 200 entries.
529187
- */
529188
- _tagSyntheticFailure(args) {
529189
- try {
529190
- this._mastTags.push({
529191
- mode: args.mode,
529192
- category: MAST_CATEGORY[args.mode],
529193
- rationale: args.rationale
529194
- });
529195
- if (this._mastTags.length > 200) {
529196
- this._mastTags = this._mastTags.slice(-200);
529197
- }
529198
- } catch {
529199
- }
529200
- }
529201
529176
  /**
529202
529177
  * REG-39b: emit a MAST taxonomy summary as a status event. Called both
529203
529178
  * mid-run (every N turns, so SIGTERM kills don't lose the data) and at
@@ -531097,9 +531072,14 @@ Respond with your assessment, then take action.`;
531097
531072
  this._fileWriteTimestamps = [];
531098
531073
  this._aborting = false;
531099
531074
  this._reg61CooldownUntilTurn = -1;
531075
+ this._writesSinceLastTodoWrite = 0;
531076
+ this._progressGateActive = false;
531077
+ this._consecutiveProgressAdvisories = 0;
531100
531078
  this._reg61PerpetualGateActive = false;
531101
531079
  this._decomp2MainContextFiles = /* @__PURE__ */ new Set();
531102
531080
  this._decomp2SubAgentCalls = 0;
531081
+ this._decomp2FailedDelegationCalls = 0;
531082
+ this._decomp2FallbackAllowed = false;
531103
531083
  this._decomp2GateActive = false;
531104
531084
  if (!globalThis.__oa_rca1_sigterm_installed) {
531105
531085
  globalThis.__oa_rca1_sigterm_installed = true;
@@ -533396,24 +533376,9 @@ ${memoryLines.join("\n")}`
533396
533376
  });
533397
533377
  }
533398
533378
  {
533399
- const _decomp2Block = this._maybeDecomp2Block(tc, turn);
533400
- if (_decomp2Block) {
533401
- this.emit({
533402
- type: "tool_call",
533403
- toolName: tc.name,
533404
- toolArgs: tc.arguments,
533405
- turn,
533406
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
533407
- });
533408
- this.emit({
533409
- type: "tool_result",
533410
- toolName: tc.name,
533411
- success: false,
533412
- content: _decomp2Block.slice(0, 120),
533413
- turn,
533414
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
533415
- });
533416
- return { tc, output: _decomp2Block };
533379
+ const _decomp2Advisory = this._maybeDecomp2Advisory(tc, turn);
533380
+ if (_decomp2Advisory) {
533381
+ pushSoftInjection("system", _decomp2Advisory);
533417
533382
  }
533418
533383
  }
533419
533384
  const PROGRESS_GATE_BYPASS_TOOLS = /* @__PURE__ */ new Set([
@@ -533425,74 +533390,55 @@ ${memoryLines.join("\n")}`
533425
533390
  // useful for the agent to consult prior phase state before updating
533426
533391
  ]);
533427
533392
  if (this._progressGateActive && !PROGRESS_GATE_BYPASS_TOOLS.has(tc.name)) {
533428
- this.emit({
533429
- type: "tool_call",
533430
- toolName: tc.name,
533431
- toolArgs: tc.arguments,
533432
- turn,
533433
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
533434
- });
533435
533393
  const recentWrites = [];
533436
533394
  for (const [path11, info] of this._worldFacts.files) {
533437
533395
  if ((info.writeCount ?? 0) > 0 && (info.lastWriteTurn ?? -1) >= 0 && turn - (info.lastWriteTurn ?? 0) <= 8) {
533438
533396
  recentWrites.push({ path: path11, turn: info.lastWriteTurn ?? 0 });
533439
533397
  }
533440
533398
  }
533399
+ this._consecutiveProgressAdvisories++;
533441
533400
  recentWrites.sort((a2, b) => b.turn - a2.turn);
533442
533401
  const showWrites = recentWrites.slice(0, 16);
533402
+ const isRepeat = this._consecutiveProgressAdvisories >= 2;
533443
533403
  const gateMsg = [
533444
- `[PROGRESS GATEcall todo_write FIRST before any other tool]`,
533404
+ `[PROGRESS ADVISORYtodo update recommended]`,
533445
533405
  ``,
533446
- `You have completed ${this._writesSinceLastTodoWrite} file modification${this._writesSinceLastTodoWrite === 1 ? "" : "s"} since your last todo_write call.`,
533447
- `The next tool call MUST be todo_write to mark progress. This is enforced non-todo tool calls are intercepted until plan state is updated.`,
533406
+ `CAUSE: ${this._writesSinceLastTodoWrite} file writes since last todo_write call. Without progress tracking, the next turn re-plans the same work (plan-replay).`,
533407
+ `EFFECT: The requested tool is still allowed. Update todos soon so progress state stays accurate.`,
533408
+ `ACTION RECOMMENDED: Call todo_write with updated progress after this tool call if the recent writes satisfy todo items.`,
533409
+ isRepeat ? `
533410
+ [ESCALATION: This is advisory #${this._consecutiveProgressAdvisories}. You are continuing without todo_write; progress replay risk is increasing.]` : "",
533448
533411
  ``,
533449
533412
  `Recent file modifications (use these to decide what's done):`,
533450
533413
  ...showWrites.map((w) => ` • ${w.path} (turn ${w.turn})`),
533451
533414
  recentWrites.length > showWrites.length ? ` • ... +${recentWrites.length - showWrites.length} more` : "",
533452
533415
  ``,
533453
- `Required action: call todo_write with the updated todo array — mark anything completed that these writes satisfy, advance the next item to in_progress, keep the rest pending.`,
533454
- `After todo_write succeeds, this gate releases and you can continue normal work.`,
533455
- ``,
533456
- `Why this exists: without the explicit progress update, your next turn will see the same in_progress todo, re-plan the same work, and re-emit identical tool calls (the "plan replay" failure mode that causes byte-identical writes to appear twice).`
533416
+ `Format: todo_write with todos array — mark items completed that these writes satisfy, advance next to in_progress.`
533457
533417
  ].filter(Boolean).join("\n");
533418
+ pushSoftInjection("system", gateMsg);
533458
533419
  this.emit({
533459
- type: "tool_result",
533420
+ type: "status",
533460
533421
  toolName: tc.name,
533461
- success: false,
533462
- content: gateMsg.slice(0, 120),
533422
+ content: `PROGRESS ADVISORY — ${this._writesSinceLastTodoWrite} writes since todo_write; '${tc.name}' allowed`,
533463
533423
  turn,
533464
533424
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
533465
533425
  });
533466
- return { tc, output: gateMsg };
533467
533426
  }
533468
533427
  const _argsKeyForBudget = `${tc.name}:${argsKey}`;
533469
533428
  const _isCachedHit = recentToolResults.has(_argsKeyForBudget);
533470
533429
  const budgetRemaining = toolCallBudget.get(tc.name);
533471
533430
  if (budgetRemaining !== void 0 && !_isCachedHit) {
533472
533431
  if (budgetRemaining <= 0) {
533432
+ const budgetMsg = `[BUDGET ADVISORY] You have used all ${toolBudgets[tc.name]} recommended ${tc.name} calls for the current phase. The requested tool call is still allowed, but repeated use may be low-value. If your todo list shows more phases pending: mark the current phase completed via todo_write so a new budget allowance kicks in. If all phases are done: call task_complete with your final summary.`;
533433
+ pushSoftInjection("system", budgetMsg);
533473
533434
  this.emit({
533474
- type: "tool_call",
533475
- toolName: tc.name,
533476
- toolArgs: tc.arguments,
533477
- turn,
533478
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
533479
- });
533480
- const budgetMsg = `[BUDGET EXHAUSTED] You have used all ${toolBudgets[tc.name]} allowed ${tc.name} calls for the current phase. You ALREADY have enough information from previous calls. DO NOT try to call ${tc.name} again — it will be blocked. If your todo list shows more phases pending: mark the current phase completed via todo_write so a new budget allowance kicks in. If all phases are done: call task_complete with your final summary.`;
533481
- this.emit({
533482
- type: "tool_result",
533483
- toolName: tc.name,
533484
- success: false,
533485
- content: budgetMsg.slice(0, 120),
533486
- turn,
533435
+ type: "status",
533436
+ content: `BUDGET ADVISORY — ${tc.name} exceeded recommended per-phase budget; tool allowed`,
533487
533437
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
533488
533438
  });
533489
- this._tagSyntheticFailure({
533490
- mode: "step_repetition",
533491
- rationale: `${tc.name} exhausted per-phase budget of ${toolBudgets[tc.name]}`
533492
- });
533493
- return { tc, output: budgetMsg };
533439
+ } else {
533440
+ toolCallBudget.set(tc.name, budgetRemaining - 1);
533494
533441
  }
533495
- toolCallBudget.set(tc.name, budgetRemaining - 1);
533496
533442
  }
533497
533443
  const toolFingerprint = this._buildToolFingerprint(tc.name, tc.arguments ?? {});
533498
533444
  const baseIsReadLike = ![
@@ -533577,29 +533523,15 @@ ${memoryLines.join("\n")}`
533577
533523
  observerRedundantBlock
533578
533524
  });
533579
533525
  if (criticDecision.decision === "observer_block") {
533580
- this.emit({
533581
- type: "tool_call",
533582
- toolName: tc.name,
533583
- toolArgs: tc.arguments,
533584
- turn,
533585
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
533586
- });
533587
- const blockMsg = criticDecision.cachedResult ? `[BLOCKED — this tool+args already succeeded. Re-served from cache:]
533526
+ const advisoryMsg = criticDecision.cachedResult ? `[DUPLICATE-CALL ADVISORY — this tool+args already succeeded. Prior result preview:]
533588
533527
 
533589
- ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confirmed this tool already succeeded with these arguments on a prior turn. Do NOT re-run. Use your prior findings to proceed.]`;
533528
+ ${criticDecision.cachedResult.slice(0, 500)}` : `[DUPLICATE-CALL ADVISORY — the observer confirmed this tool already succeeded with these arguments on a prior turn. The call is still allowed, but using prior findings may be faster.]`;
533529
+ pushSoftInjection("system", advisoryMsg);
533590
533530
  this.emit({
533591
- type: "tool_result",
533592
- toolName: tc.name,
533593
- success: true,
533594
- content: blockMsg.slice(0, 100),
533595
- turn,
533531
+ type: "status",
533532
+ content: `DUPLICATE-CALL ADVISORY — ${tc.name} allowed despite observer redundancy`,
533596
533533
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
533597
533534
  });
533598
- this._tagSyntheticFailure({
533599
- mode: "step_repetition",
533600
- rationale: `observer-block on ${tc.name} fingerprint flagged redundant`
533601
- });
533602
- return { tc, output: blockMsg };
533603
533535
  }
533604
533536
  if (criticDecision.decision === "force_progress_block") {
533605
533537
  dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
@@ -533608,26 +533540,12 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
533608
533540
  recentToolResults.delete(toolFingerprint);
533609
533541
  recentToolResults.set(toolFingerprint, _existingFp);
533610
533542
  }
533543
+ pushSoftInjection("system", criticDecision.blockMessage.replace(/\[FORCED PROGRESS BLOCK[^\]]*\]|\[BLOCKED[^\]]*\]/gi, "[REPETITION ADVISORY]"));
533611
533544
  this.emit({
533612
- type: "tool_call",
533613
- toolName: tc.name,
533614
- toolArgs: tc.arguments,
533615
- turn,
533616
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
533617
- });
533618
- this.emit({
533619
- type: "tool_result",
533620
- toolName: tc.name,
533621
- success: false,
533622
- content: criticDecision.blockMessage.slice(0, 120),
533623
- turn,
533545
+ type: "status",
533546
+ content: `REPETITION ADVISORY — ${tc.name} repeated ${criticDecision.hitNumber} times; tool allowed`,
533624
533547
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
533625
533548
  });
533626
- this._tagSyntheticFailure({
533627
- mode: "step_repetition",
533628
- rationale: `force_progress_block on ${tc.name} after ${criticDecision.hitNumber} identical calls`
533629
- });
533630
- return { tc, output: criticDecision.blockMessage };
533631
533549
  }
533632
533550
  if (criticDecision.decision === "serve_cached") {
533633
533551
  dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
@@ -533636,30 +533554,20 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
533636
533554
  recentToolResults.delete(toolFingerprint);
533637
533555
  recentToolResults.set(toolFingerprint, _existingFp);
533638
533556
  }
533639
- this.emit({
533640
- type: "tool_call",
533641
- toolName: tc.name,
533642
- toolArgs: tc.arguments,
533643
- turn,
533644
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
533645
- });
533646
- const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. No need to call this tool again.]
533557
+ const header = criticDecision.compacted ? `[CACHE ADVISORY — the original result was compacted from context. Prior data preview follows; the tool call is still allowed.]
533647
533558
 
533648
- ` : `[DUPLICATE CALL #${criticDecision.hitNumber} — you already called ${tc.name} with these exact arguments. The result is identical. Do NOT call this again. Use the data you already have to make progress. One more identical call will trigger a hard progress block.]
533559
+ ` : `[DUPLICATE CALL #${criticDecision.hitNumber} — you already called ${tc.name} with these exact arguments. The result may be identical. The tool call is still allowed, but using existing data may be faster.]
533649
533560
 
533650
533561
  `;
533651
533562
  const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
533652
533563
  ... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
533653
533564
  const dedupOutput = header + truncatedCache;
533565
+ pushSoftInjection("system", dedupOutput);
533654
533566
  this.emit({
533655
- type: "tool_result",
533656
- toolName: tc.name,
533657
- success: true,
533658
- content: header.slice(0, 100),
533659
- turn,
533567
+ type: "status",
533568
+ content: `CACHE ADVISORY — ${tc.name} duplicate call #${criticDecision.hitNumber}; tool allowed`,
533660
533569
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
533661
533570
  });
533662
- return { tc, output: dedupOutput };
533663
533571
  }
533664
533572
  this.emit({
533665
533573
  type: "tool_call",
@@ -533709,6 +533617,18 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
533709
533617
  const violations = checkConstraints(tc.name, tc.arguments);
533710
533618
  const blockViolations = violations.filter((v) => v.constraint.action === "block");
533711
533619
  if (blockViolations.length > 0) {
533620
+ const warning = formatViolationWarning(blockViolations);
533621
+ pushSoftInjection("system", `[CONSTRAINT ADVISORY]
533622
+ ${warning}
533623
+ The tool call is still allowed by the no-hard-block policy. Treat this as risk guidance, not a runtime denial.`);
533624
+ this.emit({
533625
+ type: "status",
533626
+ content: `Constraint advisory: ${blockViolations[0].constraint.message}`,
533627
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
533628
+ });
533629
+ pendingConstraintWarnings.push(warning);
533630
+ }
533631
+ if (process.env["OA_ENABLE_HARD_TOOL_BLOCKS"] === "1" && blockViolations.length > 0) {
533712
533632
  result = {
533713
533633
  success: false,
533714
533634
  output: "",
@@ -533732,6 +533652,16 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
533732
533652
  }
533733
533653
  const hookCheck = this._hookManager.runPreToolUse(tc.name, tc.arguments, this._sessionId);
533734
533654
  if (!hookCheck.allowed) {
533655
+ pushSoftInjection("system", `[HOOK ADVISORY]
533656
+ Tool "${tc.name}" matched a pre-tool hook warning: ${hookCheck.reason ?? "hook denied"}.
533657
+ The tool call is still allowed by the no-hard-block policy. Treat this as guidance and proceed deliberately.`);
533658
+ this.emit({
533659
+ type: "status",
533660
+ content: `Hook advisory for ${tc.name}: ${hookCheck.reason ?? "hook denied"}. Tool allowed by no-hard-block policy.`,
533661
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
533662
+ });
533663
+ }
533664
+ if (process.env["OA_ENABLE_HARD_TOOL_BLOCKS"] === "1" && !hookCheck.allowed) {
533735
533665
  result = {
533736
533666
  success: false,
533737
533667
  output: "",
@@ -533888,7 +533818,7 @@ Respond with EXACTLY this structure before your next tool call:
533888
533818
  this._reg61PerpetualGateActive = false;
533889
533819
  this.emit({
533890
533820
  type: "status",
533891
- content: `REG-61 GATE CLEARED — '${tc.name}' landed real file mutation at turn ${turn}`,
533821
+ content: `REG-61 ADVISORY CLEARED — '${tc.name}' landed real file mutation at turn ${turn}`,
533892
533822
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
533893
533823
  });
533894
533824
  }
@@ -533923,7 +533853,7 @@ Respond with EXACTLY this structure before your next tool call:
533923
533853
  this._progressGateActive = true;
533924
533854
  this.emit({
533925
533855
  type: "status",
533926
- content: `Progress gate engaged at ${this._writesSinceLastTodoWrite} writes without todo_write — non-todo tools will be blocked until plan is updated`,
533856
+ content: `Progress advisory engaged at ${this._writesSinceLastTodoWrite} writes without todo_write — non-todo tools remain allowed, todo update recommended`,
533927
533857
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
533928
533858
  });
533929
533859
  }
@@ -533939,6 +533869,7 @@ Respond with EXACTLY this structure before your next tool call:
533939
533869
  }
533940
533870
  this._writesSinceLastTodoWrite = 0;
533941
533871
  this._progressGateActive = false;
533872
+ this._consecutiveProgressAdvisories = 0;
533942
533873
  }
533943
533874
  if (tc.name === "file_read") {
533944
533875
  const p2 = String(tc.arguments?.["path"] ?? tc.arguments?.["file"] ?? "");
@@ -534947,6 +534878,12 @@ Then use file_read on individual FILES inside it.`);
534947
534878
  const output = sr.result.success ? sr.result.output : `Error: ${sr.result.error || "unknown"}
534948
534879
  ${sr.result.output}`;
534949
534880
  messages2.push(this.buildToolMessage(output, matchTc.id, matchTc.name));
534881
+ if (this._consecutiveProgressAdvisories >= 2 && this._progressGateActive) {
534882
+ messages2.push({
534883
+ role: "system",
534884
+ content: `[PROGRESS ADVISORY ESCALATION] You have made ${this._consecutiveProgressAdvisories} consecutive tool calls while progress tracking is stale. The last tool was allowed to run, but your NEXT call should be todo_write(todos=[...]) with updated progress so the plan and completed work stay synchronized.`
534885
+ });
534886
+ }
534950
534887
  if (matchTc.name === "task_complete") {
534951
534888
  const open2 = this.getOpenTodoItems();
534952
534889
  if (open2.length > 0) {
@@ -534954,28 +534891,32 @@ ${sr.result.output}`;
534954
534891
  messages2.push({ role: "system", content: guard });
534955
534892
  this.emit({
534956
534893
  type: "status",
534957
- content: `task_complete intercepted — ${open2.length} open todo(s) remain`,
534894
+ content: `task_complete advisory — ${open2.length} open todo(s) remain; completion allowed`,
534958
534895
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
534959
534896
  });
534960
- } else {
534961
- const _bp1 = await this._runBackwardPassReview(turn);
534962
- if (_bp1 && !_bp1.proceed && _bp1.feedback) {
534963
- messages2.push({ role: "system", content: _bp1.feedback });
534964
- } else {
534965
- completed = true;
534966
- summary = extractTaskCompleteSummary(matchTc.arguments);
534967
- if (summary && !this._assistantTextEmitted) {
534968
- this.emit({
534969
- type: "assistant_text",
534970
- content: summary,
534971
- turn,
534972
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
534973
- });
534974
- this._assistantTextEmitted = true;
534975
- }
534976
- break;
534977
- }
534978
534897
  }
534898
+ const _bp1 = await this._runBackwardPassReview(turn);
534899
+ if (_bp1 && !_bp1.proceed && _bp1.feedback) {
534900
+ messages2.push({ role: "system", content: `[COMPLETION REVIEW ADVISORY]
534901
+ ${_bp1.feedback}` });
534902
+ this.emit({
534903
+ type: "status",
534904
+ content: "completion review advisory surfaced; task_complete allowed",
534905
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
534906
+ });
534907
+ }
534908
+ completed = true;
534909
+ summary = extractTaskCompleteSummary(matchTc.arguments);
534910
+ if (summary && !this._assistantTextEmitted) {
534911
+ this.emit({
534912
+ type: "assistant_text",
534913
+ content: summary,
534914
+ turn,
534915
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
534916
+ });
534917
+ this._assistantTextEmitted = true;
534918
+ }
534919
+ break;
534979
534920
  }
534980
534921
  }
534981
534922
  }
@@ -534987,6 +534928,12 @@ ${sr.result.output}`;
534987
534928
  const r2 = await executeSingle(tc);
534988
534929
  if (r2) {
534989
534930
  messages2.push(this.buildToolMessage(r2.output, r2.tc.id, r2.tc.name));
534931
+ if (this._consecutiveProgressAdvisories >= 2 && this._progressGateActive) {
534932
+ messages2.push({
534933
+ role: "system",
534934
+ content: `[PROGRESS ADVISORY ESCALATION] You have made ${this._consecutiveProgressAdvisories} consecutive tool calls while progress tracking is stale. The last tool was allowed to run, but your NEXT call should be todo_write(todos=[...]) with updated progress so the plan and completed work stay synchronized.`
534935
+ });
534936
+ }
534990
534937
  if (r2.tc.name === "task_complete") {
534991
534938
  const open2 = this.getOpenTodoItems();
534992
534939
  if (open2.length > 0) {
@@ -534994,28 +534941,32 @@ ${sr.result.output}`;
534994
534941
  messages2.push({ role: "system", content: guard });
534995
534942
  this.emit({
534996
534943
  type: "status",
534997
- content: `task_complete intercepted — ${open2.length} open todo(s) remain`,
534944
+ content: `task_complete advisory — ${open2.length} open todo(s) remain; completion allowed`,
534998
534945
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
534999
534946
  });
535000
- } else {
535001
- const _bp2 = await this._runBackwardPassReview(turn);
535002
- if (_bp2 && !_bp2.proceed && _bp2.feedback) {
535003
- messages2.push({ role: "system", content: _bp2.feedback });
535004
- } else {
535005
- completed = true;
535006
- summary = extractTaskCompleteSummary(r2.tc.arguments);
535007
- if (summary && !this._assistantTextEmitted) {
535008
- this.emit({
535009
- type: "assistant_text",
535010
- content: summary,
535011
- turn,
535012
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
535013
- });
535014
- this._assistantTextEmitted = true;
535015
- }
535016
- break;
535017
- }
535018
534947
  }
534948
+ const _bp2 = await this._runBackwardPassReview(turn);
534949
+ if (_bp2 && !_bp2.proceed && _bp2.feedback) {
534950
+ messages2.push({ role: "system", content: `[COMPLETION REVIEW ADVISORY]
534951
+ ${_bp2.feedback}` });
534952
+ this.emit({
534953
+ type: "status",
534954
+ content: "completion review advisory surfaced; task_complete allowed",
534955
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
534956
+ });
534957
+ }
534958
+ completed = true;
534959
+ summary = extractTaskCompleteSummary(r2.tc.arguments);
534960
+ if (summary && !this._assistantTextEmitted) {
534961
+ this.emit({
534962
+ type: "assistant_text",
534963
+ content: summary,
534964
+ turn,
534965
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
534966
+ });
534967
+ this._assistantTextEmitted = true;
534968
+ }
534969
+ break;
535019
534970
  }
535020
534971
  }
535021
534972
  }
@@ -535069,36 +535020,40 @@ ${sr.result.output}`;
535069
535020
  messages2.push({ role: "system", content: guard });
535070
535021
  this.emit({
535071
535022
  type: "status",
535072
- content: `task_complete intercepted — ${open2.length} open todo(s) remain`,
535023
+ content: `task_complete advisory — ${open2.length} open todo(s) remain; completion allowed`,
535073
535024
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
535074
535025
  });
535075
- } else {
535076
- const _bp3 = await this._runBackwardPassReview(turn);
535077
- if (_bp3 && !_bp3.proceed && _bp3.feedback) {
535078
- messages2.push({ role: "system", content: _bp3.feedback });
535079
- } else {
535080
- completed = true;
535081
- summary = extractTaskCompleteSummary(r2.tc.arguments);
535082
- for (const tool of this.tools.values()) {
535083
- if (tool.cleanup) {
535084
- try {
535085
- await tool.cleanup();
535086
- } catch {
535087
- }
535088
- }
535089
- }
535090
- if (summary && !this._assistantTextEmitted) {
535091
- this.emit({
535092
- type: "assistant_text",
535093
- content: summary,
535094
- turn,
535095
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
535096
- });
535097
- this._assistantTextEmitted = true;
535026
+ }
535027
+ const _bp3 = await this._runBackwardPassReview(turn);
535028
+ if (_bp3 && !_bp3.proceed && _bp3.feedback) {
535029
+ messages2.push({ role: "system", content: `[COMPLETION REVIEW ADVISORY]
535030
+ ${_bp3.feedback}` });
535031
+ this.emit({
535032
+ type: "status",
535033
+ content: "completion review advisory surfaced; task_complete allowed",
535034
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
535035
+ });
535036
+ }
535037
+ completed = true;
535038
+ summary = extractTaskCompleteSummary(r2.tc.arguments);
535039
+ for (const tool of this.tools.values()) {
535040
+ if (tool.cleanup) {
535041
+ try {
535042
+ await tool.cleanup();
535043
+ } catch {
535098
535044
  }
535099
- break;
535100
535045
  }
535101
535046
  }
535047
+ if (summary && !this._assistantTextEmitted) {
535048
+ this.emit({
535049
+ type: "assistant_text",
535050
+ content: summary,
535051
+ turn,
535052
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
535053
+ });
535054
+ this._assistantTextEmitted = true;
535055
+ }
535056
+ break;
535102
535057
  }
535103
535058
  }
535104
535059
  }
@@ -535711,18 +535666,9 @@ Integrate this guidance into your current approach. Continue working on the task
535711
535666
  turn,
535712
535667
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
535713
535668
  });
535714
- const _decomp2BFBlock = this._maybeDecomp2Block(tc, turn);
535715
- if (_decomp2BFBlock) {
535716
- this.emit({
535717
- type: "tool_result",
535718
- toolName: tc.name,
535719
- content: _decomp2BFBlock.slice(0, 200),
535720
- success: false,
535721
- turn,
535722
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
535723
- });
535724
- messages2.push(this.buildToolMessage(_decomp2BFBlock, tc.id, tc.name));
535725
- continue;
535669
+ const _decomp2BFAdvisory = this._maybeDecomp2Advisory(tc, turn);
535670
+ if (_decomp2BFAdvisory) {
535671
+ messages2.push({ role: "system", content: _decomp2BFAdvisory });
535726
535672
  }
535727
535673
  const tool = this.tools.get(tc.name);
535728
535674
  let result;
@@ -535823,28 +535769,32 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
535823
535769
  messages2.push({ role: "system", content: guard });
535824
535770
  this.emit({
535825
535771
  type: "status",
535826
- content: `task_complete intercepted — ${open2.length} open todo(s) remain`,
535772
+ content: `task_complete advisory — ${open2.length} open todo(s) remain; completion allowed`,
535827
535773
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
535828
535774
  });
535829
- } else {
535830
- const _bp4 = await this._runBackwardPassReview(turn);
535831
- if (_bp4 && !_bp4.proceed && _bp4.feedback) {
535832
- messages2.push({ role: "system", content: _bp4.feedback });
535833
- continue;
535834
- }
535835
- completed = true;
535836
- summary = extractTaskCompleteSummary(tc.arguments);
535837
- if (summary && !this._assistantTextEmitted) {
535838
- this.emit({
535839
- type: "assistant_text",
535840
- content: summary,
535841
- turn,
535842
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
535843
- });
535844
- this._assistantTextEmitted = true;
535845
- }
535846
- break;
535847
535775
  }
535776
+ const _bp4 = await this._runBackwardPassReview(turn);
535777
+ if (_bp4 && !_bp4.proceed && _bp4.feedback) {
535778
+ messages2.push({ role: "system", content: `[COMPLETION REVIEW ADVISORY]
535779
+ ${_bp4.feedback}` });
535780
+ this.emit({
535781
+ type: "status",
535782
+ content: "completion review advisory surfaced; task_complete allowed",
535783
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
535784
+ });
535785
+ }
535786
+ completed = true;
535787
+ summary = extractTaskCompleteSummary(tc.arguments);
535788
+ if (summary && !this._assistantTextEmitted) {
535789
+ this.emit({
535790
+ type: "assistant_text",
535791
+ content: summary,
535792
+ turn,
535793
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
535794
+ });
535795
+ this._assistantTextEmitted = true;
535796
+ }
535797
+ break;
535848
535798
  }
535849
535799
  }
535850
535800
  if (completed)
@@ -535872,14 +535822,13 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
535872
535822
  messages2.push({ role: "system", content: guard });
535873
535823
  this.emit({
535874
535824
  type: "status",
535875
- content: `task_complete text detected but intercepted — ${open2.length} open todo(s) remain`,
535825
+ content: `task_complete text advisory — ${open2.length} open todo(s) remain; completion allowed`,
535876
535826
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
535877
535827
  });
535878
- } else {
535879
- completed = true;
535880
- summary = content;
535881
- break;
535882
535828
  }
535829
+ completed = true;
535830
+ summary = content;
535831
+ break;
535883
535832
  }
535884
535833
  const finalVarMatch = content.match(/FINAL_VAR\s*\(\s*["']?(\w+)["']?\s*\)/);
535885
535834
  if (finalVarMatch && this.options.finalVarResolver) {
@@ -614072,7 +614021,7 @@ function createTaskCompleteTool(modelTier) {
614072
614021
  const summaryDesc = modelTier === "small" || modelTier === "medium" ? "Your complete response to the user. For questions/chat: put your FULL answer here (this is what the user will see). For coding tasks: brief summary of what was accomplished." : "Brief summary of what was accomplished";
614073
614022
  return {
614074
614023
  name: "task_complete",
614075
- description: "Signal that the task is complete. GUARDED: cannot fire while the active todo list (todo_write) has pending, in_progress, or blocked items. If you're truly done, first call todo_write to mark every remaining item completed. If you're not done, continue working down the list and call this only after the last item flips to completed.",
614024
+ description: "Signal that the task is complete. ADVISORY: if active todos, interactive sessions, or build checks indicate risk, the tool result will include model-visible guidance, but the tool is not hard-blocked.",
614076
614025
  parameters: {
614077
614026
  type: "object",
614078
614027
  properties: {
@@ -614081,11 +614030,14 @@ function createTaskCompleteTool(modelTier) {
614081
614030
  required: ["summary"]
614082
614031
  },
614083
614032
  async execute(args) {
614033
+ const summary = args["summary"] || "Task completed.";
614084
614034
  if (_interactiveSessionActive) {
614085
614035
  return {
614086
- success: false,
614087
- output: `SESSION STILL ACTIVE. Call your next interaction tool NOW. Do NOT produce text call a tool immediately to continue the session.`,
614088
- error: `task_complete BLOCKED — interactive session still active. ${_interactiveSessionReason} You MUST continue the interaction loop until the session ends. Do NOT call task_complete until you receive a termination signal (e.g. "ended", "disconnected", "closed", SESSION_ACTIVE=false). YOUR NEXT ACTION: call the listening/polling tool to continue the session.`
614036
+ success: true,
614037
+ output: `[TASK_COMPLETE ADVISORYinteractive session still active]
614038
+ ${_interactiveSessionReason} You should continue the interaction loop until the session ends (e.g. "ended", "disconnected", "closed", SESSION_ACTIVE=false). The completion is allowed by no-hard-block policy.
614039
+
614040
+ ${summary}`
614089
614041
  };
614090
614042
  }
614091
614043
  try {
@@ -614111,14 +614063,17 @@ Respond concisely in this shape:
614111
614063
  - verify: [{ name: "<exact item text>", completed: true|false, evidence: "<objective proof>" }, ...]
614112
614064
  - next: "what you will do next OR the exact todo_write(...) call to update statuses"`;
614113
614065
  return {
614114
- success: false,
614115
- output: "",
614116
- error: `task_complete BLOCKED — ${incomplete.length} todo item(s) still incomplete.
614066
+ success: true,
614067
+ output: `[TASK_COMPLETE ADVISORY — ${incomplete.length} todo item(s) still incomplete]
614117
614068
 
614118
614069
  Incomplete items:
614119
614070
  ${incompleteList}${more}
614120
614071
 
614121
- ` + guidance
614072
+ ${guidance}
614073
+
614074
+ Completion is allowed by no-hard-block policy.
614075
+
614076
+ ${summary}`
614122
614077
  };
614123
614078
  }
614124
614079
  try {
@@ -614128,7 +614083,6 @@ ${incompleteList}${more}
614128
614083
  }
614129
614084
  } catch {
614130
614085
  }
614131
- const summary = args["summary"] || "Task completed.";
614132
614086
  const buildGuardSkip = process.env["OA_DISABLE_TASK_COMPLETE_BUILD_GUARD"] === "1" || /^\s*BLOCKED\b/i.test(summary);
614133
614087
  if (!buildGuardSkip) {
614134
614088
  try {
@@ -614152,16 +614106,15 @@ ${incompleteList}${more}
614152
614106
  } catch (e2) {
614153
614107
  const out = ((e2?.stdout || "") + (e2?.stderr || "")).toString().slice(0, 2e3);
614154
614108
  return {
614155
- success: false,
614156
- output: "",
614157
- error: `task_complete BLOCKED — \`npm run ${checkScript}\` is failing in ${cwd4}.
614109
+ success: true,
614110
+ output: `[TASK_COMPLETE ADVISORY — \`npm run ${checkScript}\` is failing in ${cwd4}]
614158
614111
 
614159
614112
  Error output (last 2KB):
614160
614113
  ${out || "<empty stdout/stderr — likely timeout or non-zero exit>"}
614161
614114
 
614162
- Fix the build errors before calling task_complete. If you genuinely cannot fix it (e.g. missing tool, env issue), call task_complete with a summary that STARTS with "BLOCKED: " and explains why.
614115
+ Recommended: fix the build errors before considering the work done. Completion is still allowed by no-hard-block policy.
614163
614116
 
614164
- Bypass for special cases: set env OA_DISABLE_TASK_COMPLETE_BUILD_GUARD=1 (not recommended).`
614117
+ ${summary}`
614165
614118
  };
614166
614119
  }
614167
614120
  }
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.187.574",
3
+ "version": "0.187.576",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "open-agents-ai",
9
- "version": "0.187.574",
9
+ "version": "0.187.576",
10
10
  "hasInstallScript": true,
11
11
  "license": "CC-BY-NC-4.0",
12
12
  "dependencies": {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.187.574",
3
+ "version": "0.187.576",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",