open-agents-ai 0.187.575 → 0.187.576

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -515299,26 +515299,10 @@ function summarizeMAST(tags) {
515299
515299
  }
515300
515300
  return { byMode, byCategory, total: tags.length };
515301
515301
  }
515302
- var MAST_CATEGORY;
515303
515302
  var init_mast_tagger = __esm({
515304
515303
  "packages/orchestrator/dist/mast-tagger.js"() {
515305
515304
  "use strict";
515306
515305
  init_reflection();
515307
- MAST_CATEGORY = {
515308
- spec_disobedience: "specification_design",
515309
- step_repetition: "specification_design",
515310
- history_loss: "specification_design",
515311
- completion_unrecognized: "specification_design",
515312
- input_ignored: "inter_agent_misalignment",
515313
- proceeded_without_clarify: "inter_agent_misalignment",
515314
- conversation_reset: "inter_agent_misalignment",
515315
- reasoning_action_mismatch: "inter_agent_misalignment",
515316
- premature_termination: "task_verification_termination",
515317
- validation_skipped: "task_verification_termination",
515318
- shallow_check_accepted: "task_verification_termination",
515319
- premature_task_complete: "task_verification_termination",
515320
- other: "specification_design"
515321
- };
515322
515306
  }
515323
515307
  });
515324
515308
 
@@ -528102,19 +528086,16 @@ var init_agenticRunner = __esm({
528102
528086
  // a phase's worth of work without recording progress — and on the next
528103
528087
  // turn will replay the same plan. Surface a nudge before that happens.
528104
528088
  _writesSinceLastTodoWrite = 0;
528105
- // REG-12: Progress gate (root-cause enforcement). When ≥6 file writes
528106
- // have happened without a todo_write call, this latch flips ON. While
528107
- // the latch is on, every tool call EXCEPT todo_write/todo_read/
528108
- // task_complete/ask_user is intercepted with a synthetic '[PROGRESS GATE]'
528109
- // result that forces the agent to update its plan before continuing.
528110
- // Released when todo_write fires successfully. Without this, the agent
528111
- // can re-emit the same plan a second time (plan-replay) and execute
528112
- // duplicate work because PROGRESS NUDGE alone is informational.
528089
+ // REG-12: Progress advisory latch. When ≥6 file writes have happened
528090
+ // without a todo_write call, this latch flips ON. While active, non-todo
528091
+ // tool calls receive model-visible system guidance, but still execute.
528092
+ // Released when todo_write fires successfully. Without this, the agent can
528093
+ // re-emit the same plan a second time (plan-replay) and duplicate work.
528113
528094
  _progressGateActive = false;
528114
- // Consecutive gate blocks count. When the model ignores the gate and
528115
- // retries a blocked tool, this counter increments. ≥2 triggers a system
528116
- // message escalation to break pattern-lock loops.
528117
- _consecutiveGateBlocks = 0;
528095
+ // Consecutive advisory count. When the model ignores stale progress state,
528096
+ // this counter increments. ≥2 triggers a system message escalation to break
528097
+ // pattern-lock loops.
528098
+ _consecutiveProgressAdvisories = 0;
528118
528099
  // REG-5: Rolling buffer of recent tool failures with their error output.
528119
528100
  // Surfaced before every LLM call so the agent can't ignore "I just ran this
528120
528101
  // and it errored". Detects same-fingerprint failure repetition and escalates
@@ -528344,19 +528325,12 @@ var init_agenticRunner = __esm({
528344
528325
  // explicitly excludes todo_write/memory_write/list_directory.
528345
528326
  _reg61CooldownUntilTurn = -1;
528346
528327
  // BFC-61.G (root-cause from batch529-midi-coerce, 2026-05-03): REG-61
528347
- // PERPETUAL gate until obeyed. Replaces BFC-61.E's one-shot counter,
528348
- // which empirically warned but did not change behavior agents took
528349
- // the [BLOCKED] tool_result and on the very next turn issued ANOTHER
528350
- // read (counter cleared, gate inert). batch529 measured 6 blocks across
528351
- // 7 fires; ZERO of those blocks were followed by a creative edit on
528352
- // the agent's next turn.
528353
- //
528354
- // Semantics: when REG-61 fires, this latch goes true. While true, every
528355
- // non-bypass tool call gets BLOCKED with a synthetic error result. The
528356
- // bypass set includes the 4 creative-edit tools plus task_complete /
528357
- // ask_user / explicit web-task escape hatches. Any creative edit dispatch
528328
+ // persistent advisory until obeyed. Replaces BFC-61.E's one-shot nudge,
528329
+ // which empirically warned but did not change behavior. When REG-61 fires,
528330
+ // this latch goes true. While true, every non-bypass tool call receives
528331
+ // model-visible guidance and still executes. Any creative edit dispatch
528358
528332
  // clears the latch ("directive satisfied"). Shell, file_read, todo_*,
528359
- // grep_search, list_directory etc. are NOT in bypass those are the
528333
+ // grep_search, list_directory etc. are NOT in bypass because those are the
528360
528334
  // exact patterns batch528/529 agents used to ignore REG-61.
528361
528335
  //
528362
528336
  // Kill switch: OA_DISABLE_REG61_COERCE=1 disables BOTH set and enforce.
@@ -528365,13 +528339,12 @@ var init_agenticRunner = __esm({
528365
528339
  // sub_agent delegation. DECOMP-1's informational directive was ignored
528366
528340
  // (0 sub_agent calls in 466 tool-call run despite directive at turn 1).
528367
528341
  // Mirrors the BFC-61.G escalation arc, but must not deadlock delivery:
528368
- // when the agent has edited ≥adaptive-threshold distinct files in main context
528369
- // WITHOUT successful sub_agent,
528370
- // the dispatcher BLOCKS edits to NEW files (paths not yet edited) until
528371
- // sub_agent succeeds. Edits to already-touched files are still allowed
528372
- // (current-module finishing work). Repeated failed delegation attempts
528373
- // unlock a main-context fallback so the guardrail cannot become a hard
528374
- // write-deadlock when sub_agent itself is broken or unavailable.
528342
+ // when the agent has edited ≥adaptive-threshold distinct files in main
528343
+ // context WITHOUT successful sub_agent, the dispatcher injects guidance
528344
+ // for NEW files (paths not yet edited) while still allowing the edit.
528345
+ // Repeated failed delegation attempts unlock a main-context fallback so the
528346
+ // guardrail cannot become a hard write-deadlock when sub_agent itself is
528347
+ // broken or unavailable.
528375
528348
  // Kill switch: OA_DISABLE_DECOMP2=1.
528376
528349
  _decomp2MainContextFiles = /* @__PURE__ */ new Set();
528377
528350
  _decomp2SubAgentCalls = 0;
@@ -528901,15 +528874,12 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
528901
528874
  });
528902
528875
  }
528903
528876
  /**
528904
- * DECOMP-2 PRE-dispatch enforcement check. Returns a synthetic block-message
528905
- * string when the gate should reject this dispatch; null to proceed normally.
528877
+ * DECOMP-2 PRE-dispatch advisory check. Returns a model-visible guidance
528878
+ * string when the file-spread advisory should surface; null to proceed
528879
+ * without extra guidance. Callers inject the string as system guidance and
528880
+ * then run the requested tool normally.
528906
528881
  *
528907
- * Caller logic differs between main-loop and brute-force dispatchers:
528908
- * - Main loop returns the block as a synthetic { tc, output } result.
528909
- * - Brute-force emits tool_result + pushes tool message + `continue`.
528910
- * Both invoke this method to compute the decision.
528911
- *
528912
- * Conditions for blocking:
528882
+ * Conditions for surfacing guidance:
528913
528883
  * - `_decomp2GateActive` is true (set by `_trackDecomp2` when threshold crossed)
528914
528884
  * - tool is one of the 4 creative-edit tools
528915
528885
  * - the path is NOT in `_decomp2MainContextFiles` (i.e. it's a NEW file)
@@ -528918,7 +528888,7 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
528918
528888
  * Already-touched paths pass through (current-module finishing work allowed).
528919
528889
  * sub_agent / task_complete / explicit web-task tools pass through (not creative-edit tools).
528920
528890
  */
528921
- _maybeDecomp2Block(tc, turn) {
528891
+ _maybeDecomp2Advisory(tc, turn) {
528922
528892
  if (!this._decomp2GateActive)
528923
528893
  return null;
528924
528894
  if (this._decomp2FallbackAllowed)
@@ -528941,7 +528911,7 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
528941
528911
  const _moreFiles = this._decomp2MainContextFiles.size > 8 ? `
528942
528912
  ... +${this._decomp2MainContextFiles.size - 8} more` : "";
528943
528913
  const decomp2Msg = [
528944
- `[BLOCKED — DECOMP-2 main-context exhaustion]`,
528914
+ `[DECOMP-2 advisory — main-context file spread]`,
528945
528915
  ``,
528946
528916
  `You have already edited ${this._decomp2MainContextFiles.size} distinct files in main context without a successful sub_agent. Continuing to edit another new file ('${_editPath}') may keep your context window saturated and trigger compaction thrashing.`,
528947
528917
  ``,
@@ -528959,7 +528929,7 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
528959
528929
  ` })`,
528960
528930
  ` 3. After sub_agent returns, mark the todo completed.`,
528961
528931
  ``,
528962
- `If sub_agent keeps failing for reasons outside the module work, retry it once with corrected arguments. After repeated failed delegation attempts, OA will downgrade this from a hard block to an advisory fallback so file writes can continue.`,
528932
+ `If sub_agent keeps failing for reasons outside the module work, retry it once with corrected arguments. After repeated failed delegation attempts, OA will keep this as advisory-only fallback so file writes can continue.`,
528963
528933
  ``,
528964
528934
  `Why this matters: spreading edits across N files in main context burns ~N × file_size tokens. sub_agent gives the next module a focused context window.`,
528965
528935
  ``,
@@ -528967,13 +528937,9 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
528967
528937
  ].join("\n");
528968
528938
  this.emit({
528969
528939
  type: "status",
528970
- content: `DECOMP-2 NEW-FILE BLOCK rejected ${tc.name}('${_editPath}') at turn ${turn}; gate stays active until sub_agent succeeds or repeated delegation failure unlocks fallback`,
528940
+ content: `DECOMP-2 ADVISORY — ${tc.name}('${_editPath}') at turn ${turn}; tool allowed, consider sub_agent for the next module`,
528971
528941
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
528972
528942
  });
528973
- this._tagSyntheticFailure({
528974
- mode: "step_repetition",
528975
- rationale: `DECOMP-2 new-file block on '${tc.name}'(${_editPath}) — agent has spread edits across ${this._decomp2MainContextFiles.size} files without sub_agent`
528976
- });
528977
528943
  return decomp2Msg;
528978
528944
  }
528979
528945
  _decomp2FileSpreadThreshold() {
@@ -528988,15 +528954,15 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
528988
528954
  * edits / sub_agent calls and check the gate-activation threshold.
528989
528955
  * Without this method on both paths, batch532 measured 14 distinct
528990
528956
  * files edited but DECOMP-2 never activated — because the main loop
528991
- * exited via blocked task_complete and all subsequent edits flowed
528992
- * through the brute-force dispatch which had no tracking.
528957
+ * entered brute-force after task_complete review and all subsequent edits
528958
+ * flowed through the brute-force dispatch which had no tracking.
528993
528959
  *
528994
528960
  * Side effects when fired:
528995
528961
  * - On successful creative edit: adds path to `_decomp2MainContextFiles`,
528996
- * possibly activates `_decomp2GateActive` (emits status).
528962
+ * possibly activates `_decomp2GateActive` (emits advisory status).
528997
528963
  * - On successful sub_agent / priority_delegate / background_run:
528998
- * increments counter, clears gate (emits status). Failed or malformed
528999
- * delegation attempts do not satisfy the gate.
528964
+ * increments counter, clears advisory (emits status). Failed or
528965
+ * malformed delegation attempts do not satisfy the advisory.
529000
528966
  *
529001
528967
  * Pure post-dispatch: caller invokes AFTER the tool result is in hand,
529002
528968
  * regardless of which loop the dispatch happened in.
@@ -529013,7 +528979,7 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
529013
528979
  this._decomp2GateActive = true;
529014
528980
  this.emit({
529015
528981
  type: "status",
529016
- content: `DECOMP-2 NEW-FILE GATE ACTIVATED — ${this._decomp2MainContextFiles.size} distinct files edited in main context, 0 successful sub_agent calls, threshold=${DECOMP2_FILE_SPREAD_THRESHOLD}; further edits to NEW files will be blocked until sub_agent succeeds or repeated delegation failure unlocks fallback`,
528982
+ content: `DECOMP-2 ADVISORY ACTIVATED — ${this._decomp2MainContextFiles.size} distinct files edited in main context, 0 successful sub_agent calls, threshold=${DECOMP2_FILE_SPREAD_THRESHOLD}; further edits to NEW files remain allowed, but sub_agent is recommended for the next module`,
529017
528983
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
529018
528984
  });
529019
528985
  }
@@ -529047,7 +529013,7 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
529047
529013
  this._decomp2GateActive = false;
529048
529014
  this.emit({
529049
529015
  type: "status",
529050
- content: `DECOMP-2 GATE CLEARED — '${tc.name}' satisfied delegation directive at turn ${turn}`,
529016
+ content: `DECOMP-2 ADVISORY CLEARED — '${tc.name}' satisfied delegation directive at turn ${turn}`,
529051
529017
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
529052
529018
  });
529053
529019
  }
@@ -529057,7 +529023,7 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
529057
529023
  * REG-66 Debug-Loop Detection (root-cause from batch535-midi, 2026-05-04).
529058
529024
  *
529059
529025
  * Empirical: midi run had 11x `npm run build 2>&1` + same 5 files re-read
529060
- * 5-6 times each + 22 BFC-61.G coercion BLOCKS — and ZERO of those blocks
529026
+ * 5-6 times each + 22 BFC-61.G coercion advisories — and ZERO of those
529061
529027
  * resulted in a creative edit. The agent was rationally stuck: it
529062
529028
  * believed it needed to read more to debug, the build command kept
529063
529029
  * giving the same error, and the standard "issue an edit" directive
@@ -529070,7 +529036,7 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
529070
529036
  * different output. Without this signal we'd just keep telling the
529071
529037
  * agent to "make an edit" — which is exactly what it can't think of.
529072
529038
  *
529073
- * When detected, the BFC-61.G block message swaps to a PERTURB-strategy
529039
+ * When detected, the BFC-61.G advisory message swaps to a PERTURB-strategy
529074
529040
  * directive: stop reading, change ONE thing in the most-likely-culprit
529075
529041
  * file even if you're uncertain, and let the new error signal guide
529076
529042
  * the next iteration. This is real human debugging strategy ("perturb
@@ -529207,27 +529173,6 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
529207
529173
  * name with objective evidence, complete remaining items in order, update the
529208
529174
  * checklist via todo_write, and only then call task_complete.
529209
529175
  */
529210
- /**
529211
- * REG-39c: tag a SYNTHETIC failure (FORCED PROGRESS BLOCK / observer
529212
- * block / budget exhausted). These paths return early from
529213
- * executeSingle BEFORE the main result-handling code, so the normal
529214
- * MAST tagging miss them. This helper lets each return-early site
529215
- * record a tag directly. Push-only — keeps the tag buffer bounded
529216
- * to 200 entries.
529217
- */
529218
- _tagSyntheticFailure(args) {
529219
- try {
529220
- this._mastTags.push({
529221
- mode: args.mode,
529222
- category: MAST_CATEGORY[args.mode],
529223
- rationale: args.rationale
529224
- });
529225
- if (this._mastTags.length > 200) {
529226
- this._mastTags = this._mastTags.slice(-200);
529227
- }
529228
- } catch {
529229
- }
529230
- }
529231
529176
  /**
529232
529177
  * REG-39b: emit a MAST taxonomy summary as a status event. Called both
529233
529178
  * mid-run (every N turns, so SIGTERM kills don't lose the data) and at
@@ -531127,6 +531072,9 @@ Respond with your assessment, then take action.`;
531127
531072
  this._fileWriteTimestamps = [];
531128
531073
  this._aborting = false;
531129
531074
  this._reg61CooldownUntilTurn = -1;
531075
+ this._writesSinceLastTodoWrite = 0;
531076
+ this._progressGateActive = false;
531077
+ this._consecutiveProgressAdvisories = 0;
531130
531078
  this._reg61PerpetualGateActive = false;
531131
531079
  this._decomp2MainContextFiles = /* @__PURE__ */ new Set();
531132
531080
  this._decomp2SubAgentCalls = 0;
@@ -533428,24 +533376,9 @@ ${memoryLines.join("\n")}`
533428
533376
  });
533429
533377
  }
533430
533378
  {
533431
- const _decomp2Block = this._maybeDecomp2Block(tc, turn);
533432
- if (_decomp2Block) {
533433
- this.emit({
533434
- type: "tool_call",
533435
- toolName: tc.name,
533436
- toolArgs: tc.arguments,
533437
- turn,
533438
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
533439
- });
533440
- this.emit({
533441
- type: "tool_result",
533442
- toolName: tc.name,
533443
- success: false,
533444
- content: _decomp2Block.slice(0, 120),
533445
- turn,
533446
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
533447
- });
533448
- return { tc, output: _decomp2Block };
533379
+ const _decomp2Advisory = this._maybeDecomp2Advisory(tc, turn);
533380
+ if (_decomp2Advisory) {
533381
+ pushSoftInjection("system", _decomp2Advisory);
533449
533382
  }
533450
533383
  }
533451
533384
  const PROGRESS_GATE_BYPASS_TOOLS = /* @__PURE__ */ new Set([
@@ -533457,77 +533390,55 @@ ${memoryLines.join("\n")}`
533457
533390
  // useful for the agent to consult prior phase state before updating
533458
533391
  ]);
533459
533392
  if (this._progressGateActive && !PROGRESS_GATE_BYPASS_TOOLS.has(tc.name)) {
533460
- this.emit({
533461
- type: "tool_call",
533462
- toolName: tc.name,
533463
- toolArgs: tc.arguments,
533464
- turn,
533465
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
533466
- });
533467
533393
  const recentWrites = [];
533468
533394
  for (const [path11, info] of this._worldFacts.files) {
533469
533395
  if ((info.writeCount ?? 0) > 0 && (info.lastWriteTurn ?? -1) >= 0 && turn - (info.lastWriteTurn ?? 0) <= 8) {
533470
533396
  recentWrites.push({ path: path11, turn: info.lastWriteTurn ?? 0 });
533471
533397
  }
533472
533398
  }
533473
- this._consecutiveGateBlocks++;
533399
+ this._consecutiveProgressAdvisories++;
533474
533400
  recentWrites.sort((a2, b) => b.turn - a2.turn);
533475
533401
  const showWrites = recentWrites.slice(0, 16);
533476
- const isRepeat = this._consecutiveGateBlocks >= 2;
533402
+ const isRepeat = this._consecutiveProgressAdvisories >= 2;
533477
533403
  const gateMsg = [
533478
- `[BLOCKEDPROGRESS GATE active]`,
533404
+ `[PROGRESS ADVISORY todo update recommended]`,
533479
533405
  ``,
533480
533406
  `CAUSE: ${this._writesSinceLastTodoWrite} file writes since last todo_write call. Without progress tracking, the next turn re-plans the same work (plan-replay).`,
533481
- `EFFECT: All non-todo tool calls are now blocked at the runtime level.`,
533482
- `ACTION REQUIRED: Call todo_write with updated progress to release the gate.`,
533483
- `CONSEQUENCE OF IGNORING: Retrying blocked tools does NOT work — only todo_write is accepted while the gate is active.`,
533407
+ `EFFECT: The requested tool is still allowed. Update todos soon so progress state stays accurate.`,
533408
+ `ACTION RECOMMENDED: Call todo_write with updated progress after this tool call if the recent writes satisfy todo items.`,
533484
533409
  isRepeat ? `
533485
- [ESCALATION: This is block #${this._consecutiveGateBlocks}. You keep calling blocked tools instead of todo_write. The gate cannot be bypassed. You MUST call todo_write next.]` : "",
533410
+ [ESCALATION: This is advisory #${this._consecutiveProgressAdvisories}. You are continuing without todo_write; progress replay risk is increasing.]` : "",
533486
533411
  ``,
533487
533412
  `Recent file modifications (use these to decide what's done):`,
533488
533413
  ...showWrites.map((w) => ` • ${w.path} (turn ${w.turn})`),
533489
533414
  recentWrites.length > showWrites.length ? ` • ... +${recentWrites.length - showWrites.length} more` : "",
533490
533415
  ``,
533491
- `Format: todo_write with todos array — mark items completed that these writes satisfy, advance next to in_progress. After todo_write succeeds, normal tools resume.`
533416
+ `Format: todo_write with todos array — mark items completed that these writes satisfy, advance next to in_progress.`
533492
533417
  ].filter(Boolean).join("\n");
533418
+ pushSoftInjection("system", gateMsg);
533493
533419
  this.emit({
533494
- type: "tool_result",
533420
+ type: "status",
533495
533421
  toolName: tc.name,
533496
- success: false,
533497
- content: gateMsg.slice(0, 120),
533422
+ content: `PROGRESS ADVISORY — ${this._writesSinceLastTodoWrite} writes since todo_write; '${tc.name}' allowed`,
533498
533423
  turn,
533499
533424
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
533500
533425
  });
533501
- return { tc, output: gateMsg };
533502
533426
  }
533503
533427
  const _argsKeyForBudget = `${tc.name}:${argsKey}`;
533504
533428
  const _isCachedHit = recentToolResults.has(_argsKeyForBudget);
533505
533429
  const budgetRemaining = toolCallBudget.get(tc.name);
533506
533430
  if (budgetRemaining !== void 0 && !_isCachedHit) {
533507
533431
  if (budgetRemaining <= 0) {
533432
+ const budgetMsg = `[BUDGET ADVISORY] You have used all ${toolBudgets[tc.name]} recommended ${tc.name} calls for the current phase. The requested tool call is still allowed, but repeated use may be low-value. If your todo list shows more phases pending: mark the current phase completed via todo_write so a new budget allowance kicks in. If all phases are done: call task_complete with your final summary.`;
533433
+ pushSoftInjection("system", budgetMsg);
533508
533434
  this.emit({
533509
- type: "tool_call",
533510
- toolName: tc.name,
533511
- toolArgs: tc.arguments,
533512
- turn,
533513
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
533514
- });
533515
- const budgetMsg = `[BUDGET EXHAUSTED] You have used all ${toolBudgets[tc.name]} allowed ${tc.name} calls for the current phase. You ALREADY have enough information from previous calls. DO NOT try to call ${tc.name} again — it will be blocked. If your todo list shows more phases pending: mark the current phase completed via todo_write so a new budget allowance kicks in. If all phases are done: call task_complete with your final summary.`;
533516
- this.emit({
533517
- type: "tool_result",
533518
- toolName: tc.name,
533519
- success: false,
533520
- content: budgetMsg.slice(0, 120),
533521
- turn,
533435
+ type: "status",
533436
+ content: `BUDGET ADVISORY — ${tc.name} exceeded recommended per-phase budget; tool allowed`,
533522
533437
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
533523
533438
  });
533524
- this._tagSyntheticFailure({
533525
- mode: "step_repetition",
533526
- rationale: `${tc.name} exhausted per-phase budget of ${toolBudgets[tc.name]}`
533527
- });
533528
- return { tc, output: budgetMsg };
533439
+ } else {
533440
+ toolCallBudget.set(tc.name, budgetRemaining - 1);
533529
533441
  }
533530
- toolCallBudget.set(tc.name, budgetRemaining - 1);
533531
533442
  }
533532
533443
  const toolFingerprint = this._buildToolFingerprint(tc.name, tc.arguments ?? {});
533533
533444
  const baseIsReadLike = ![
@@ -533612,29 +533523,15 @@ ${memoryLines.join("\n")}`
533612
533523
  observerRedundantBlock
533613
533524
  });
533614
533525
  if (criticDecision.decision === "observer_block") {
533615
- this.emit({
533616
- type: "tool_call",
533617
- toolName: tc.name,
533618
- toolArgs: tc.arguments,
533619
- turn,
533620
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
533621
- });
533622
- const blockMsg = criticDecision.cachedResult ? `[BLOCKED — this tool+args already succeeded. Re-served from cache:]
533526
+ const advisoryMsg = criticDecision.cachedResult ? `[DUPLICATE-CALL ADVISORY — this tool+args already succeeded. Prior result preview:]
533623
533527
 
533624
- ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confirmed this tool already succeeded with these arguments on a prior turn. Do NOT re-run. Use your prior findings to proceed.]`;
533528
+ ${criticDecision.cachedResult.slice(0, 500)}` : `[DUPLICATE-CALL ADVISORY — the observer confirmed this tool already succeeded with these arguments on a prior turn. The call is still allowed, but using prior findings may be faster.]`;
533529
+ pushSoftInjection("system", advisoryMsg);
533625
533530
  this.emit({
533626
- type: "tool_result",
533627
- toolName: tc.name,
533628
- success: true,
533629
- content: blockMsg.slice(0, 100),
533630
- turn,
533531
+ type: "status",
533532
+ content: `DUPLICATE-CALL ADVISORY — ${tc.name} allowed despite observer redundancy`,
533631
533533
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
533632
533534
  });
533633
- this._tagSyntheticFailure({
533634
- mode: "step_repetition",
533635
- rationale: `observer-block on ${tc.name} fingerprint flagged redundant`
533636
- });
533637
- return { tc, output: blockMsg };
533638
533535
  }
533639
533536
  if (criticDecision.decision === "force_progress_block") {
533640
533537
  dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
@@ -533643,26 +533540,12 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
533643
533540
  recentToolResults.delete(toolFingerprint);
533644
533541
  recentToolResults.set(toolFingerprint, _existingFp);
533645
533542
  }
533543
+ pushSoftInjection("system", criticDecision.blockMessage.replace(/\[FORCED PROGRESS BLOCK[^\]]*\]|\[BLOCKED[^\]]*\]/gi, "[REPETITION ADVISORY]"));
533646
533544
  this.emit({
533647
- type: "tool_call",
533648
- toolName: tc.name,
533649
- toolArgs: tc.arguments,
533650
- turn,
533651
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
533652
- });
533653
- this.emit({
533654
- type: "tool_result",
533655
- toolName: tc.name,
533656
- success: false,
533657
- content: criticDecision.blockMessage.slice(0, 120),
533658
- turn,
533545
+ type: "status",
533546
+ content: `REPETITION ADVISORY — ${tc.name} repeated ${criticDecision.hitNumber} times; tool allowed`,
533659
533547
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
533660
533548
  });
533661
- this._tagSyntheticFailure({
533662
- mode: "step_repetition",
533663
- rationale: `force_progress_block on ${tc.name} after ${criticDecision.hitNumber} identical calls`
533664
- });
533665
- return { tc, output: criticDecision.blockMessage };
533666
533549
  }
533667
533550
  if (criticDecision.decision === "serve_cached") {
533668
533551
  dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
@@ -533671,30 +533554,20 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
533671
533554
  recentToolResults.delete(toolFingerprint);
533672
533555
  recentToolResults.set(toolFingerprint, _existingFp);
533673
533556
  }
533674
- this.emit({
533675
- type: "tool_call",
533676
- toolName: tc.name,
533677
- toolArgs: tc.arguments,
533678
- turn,
533679
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
533680
- });
533681
- const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. No need to call this tool again.]
533557
+ const header = criticDecision.compacted ? `[CACHE ADVISORY — the original result was compacted from context. Prior data preview follows; the tool call is still allowed.]
533682
533558
 
533683
- ` : `[DUPLICATE CALL #${criticDecision.hitNumber} — you already called ${tc.name} with these exact arguments. The result is identical. Do NOT call this again. Use the data you already have to make progress. One more identical call will trigger a hard progress block.]
533559
+ ` : `[DUPLICATE CALL #${criticDecision.hitNumber} — you already called ${tc.name} with these exact arguments. The result may be identical. The tool call is still allowed, but using existing data may be faster.]
533684
533560
 
533685
533561
  `;
533686
533562
  const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
533687
533563
  ... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
533688
533564
  const dedupOutput = header + truncatedCache;
533565
+ pushSoftInjection("system", dedupOutput);
533689
533566
  this.emit({
533690
- type: "tool_result",
533691
- toolName: tc.name,
533692
- success: true,
533693
- content: header.slice(0, 100),
533694
- turn,
533567
+ type: "status",
533568
+ content: `CACHE ADVISORY — ${tc.name} duplicate call #${criticDecision.hitNumber}; tool allowed`,
533695
533569
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
533696
533570
  });
533697
- return { tc, output: dedupOutput };
533698
533571
  }
533699
533572
  this.emit({
533700
533573
  type: "tool_call",
@@ -533744,6 +533617,18 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
533744
533617
  const violations = checkConstraints(tc.name, tc.arguments);
533745
533618
  const blockViolations = violations.filter((v) => v.constraint.action === "block");
533746
533619
  if (blockViolations.length > 0) {
533620
+ const warning = formatViolationWarning(blockViolations);
533621
+ pushSoftInjection("system", `[CONSTRAINT ADVISORY]
533622
+ ${warning}
533623
+ The tool call is still allowed by the no-hard-block policy. Treat this as risk guidance, not a runtime denial.`);
533624
+ this.emit({
533625
+ type: "status",
533626
+ content: `Constraint advisory: ${blockViolations[0].constraint.message}`,
533627
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
533628
+ });
533629
+ pendingConstraintWarnings.push(warning);
533630
+ }
533631
+ if (process.env["OA_ENABLE_HARD_TOOL_BLOCKS"] === "1" && blockViolations.length > 0) {
533747
533632
  result = {
533748
533633
  success: false,
533749
533634
  output: "",
@@ -533767,6 +533652,16 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
533767
533652
  }
533768
533653
  const hookCheck = this._hookManager.runPreToolUse(tc.name, tc.arguments, this._sessionId);
533769
533654
  if (!hookCheck.allowed) {
533655
+ pushSoftInjection("system", `[HOOK ADVISORY]
533656
+ Tool "${tc.name}" matched a pre-tool hook warning: ${hookCheck.reason ?? "hook denied"}.
533657
+ The tool call is still allowed by the no-hard-block policy. Treat this as guidance and proceed deliberately.`);
533658
+ this.emit({
533659
+ type: "status",
533660
+ content: `Hook advisory for ${tc.name}: ${hookCheck.reason ?? "hook denied"}. Tool allowed by no-hard-block policy.`,
533661
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
533662
+ });
533663
+ }
533664
+ if (process.env["OA_ENABLE_HARD_TOOL_BLOCKS"] === "1" && !hookCheck.allowed) {
533770
533665
  result = {
533771
533666
  success: false,
533772
533667
  output: "",
@@ -533923,7 +533818,7 @@ Respond with EXACTLY this structure before your next tool call:
533923
533818
  this._reg61PerpetualGateActive = false;
533924
533819
  this.emit({
533925
533820
  type: "status",
533926
- content: `REG-61 GATE CLEARED — '${tc.name}' landed real file mutation at turn ${turn}`,
533821
+ content: `REG-61 ADVISORY CLEARED — '${tc.name}' landed real file mutation at turn ${turn}`,
533927
533822
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
533928
533823
  });
533929
533824
  }
@@ -533958,7 +533853,7 @@ Respond with EXACTLY this structure before your next tool call:
533958
533853
  this._progressGateActive = true;
533959
533854
  this.emit({
533960
533855
  type: "status",
533961
- content: `Progress gate engaged at ${this._writesSinceLastTodoWrite} writes without todo_write — non-todo tools will be blocked until plan is updated`,
533856
+ content: `Progress advisory engaged at ${this._writesSinceLastTodoWrite} writes without todo_write — non-todo tools remain allowed, todo update recommended`,
533962
533857
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
533963
533858
  });
533964
533859
  }
@@ -533974,7 +533869,7 @@ Respond with EXACTLY this structure before your next tool call:
533974
533869
  }
533975
533870
  this._writesSinceLastTodoWrite = 0;
533976
533871
  this._progressGateActive = false;
533977
- this._consecutiveGateBlocks = 0;
533872
+ this._consecutiveProgressAdvisories = 0;
533978
533873
  }
533979
533874
  if (tc.name === "file_read") {
533980
533875
  const p2 = String(tc.arguments?.["path"] ?? tc.arguments?.["file"] ?? "");
@@ -534983,10 +534878,10 @@ Then use file_read on individual FILES inside it.`);
534983
534878
  const output = sr.result.success ? sr.result.output : `Error: ${sr.result.error || "unknown"}
534984
534879
  ${sr.result.output}`;
534985
534880
  messages2.push(this.buildToolMessage(output, matchTc.id, matchTc.name));
534986
- if (this._consecutiveGateBlocks >= 2 && this._progressGateActive) {
534881
+ if (this._consecutiveProgressAdvisories >= 2 && this._progressGateActive) {
534987
534882
  messages2.push({
534988
534883
  role: "system",
534989
- content: `[PROGRESS GATE ESCALATION] You have made ${this._consecutiveGateBlocks} consecutive blocked tool calls without calling todo_write. The gate is enforced at the runtime level retrying the same blocked tool will never work. Your NEXT call MUST be todo_write(todos=[...]) with updated progress. No other tool will be accepted until the gate is released.`
534884
+ content: `[PROGRESS ADVISORY ESCALATION] You have made ${this._consecutiveProgressAdvisories} consecutive tool calls while progress tracking is stale. The last tool was allowed to run, but your NEXT call should be todo_write(todos=[...]) with updated progress so the plan and completed work stay synchronized.`
534990
534885
  });
534991
534886
  }
534992
534887
  if (matchTc.name === "task_complete") {
@@ -534996,28 +534891,32 @@ ${sr.result.output}`;
534996
534891
  messages2.push({ role: "system", content: guard });
534997
534892
  this.emit({
534998
534893
  type: "status",
534999
- content: `task_complete intercepted — ${open2.length} open todo(s) remain`,
534894
+ content: `task_complete advisory — ${open2.length} open todo(s) remain; completion allowed`,
535000
534895
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
535001
534896
  });
535002
- } else {
535003
- const _bp1 = await this._runBackwardPassReview(turn);
535004
- if (_bp1 && !_bp1.proceed && _bp1.feedback) {
535005
- messages2.push({ role: "system", content: _bp1.feedback });
535006
- } else {
535007
- completed = true;
535008
- summary = extractTaskCompleteSummary(matchTc.arguments);
535009
- if (summary && !this._assistantTextEmitted) {
535010
- this.emit({
535011
- type: "assistant_text",
535012
- content: summary,
535013
- turn,
535014
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
535015
- });
535016
- this._assistantTextEmitted = true;
535017
- }
535018
- break;
535019
- }
535020
534897
  }
534898
+ const _bp1 = await this._runBackwardPassReview(turn);
534899
+ if (_bp1 && !_bp1.proceed && _bp1.feedback) {
534900
+ messages2.push({ role: "system", content: `[COMPLETION REVIEW ADVISORY]
534901
+ ${_bp1.feedback}` });
534902
+ this.emit({
534903
+ type: "status",
534904
+ content: "completion review advisory surfaced; task_complete allowed",
534905
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
534906
+ });
534907
+ }
534908
+ completed = true;
534909
+ summary = extractTaskCompleteSummary(matchTc.arguments);
534910
+ if (summary && !this._assistantTextEmitted) {
534911
+ this.emit({
534912
+ type: "assistant_text",
534913
+ content: summary,
534914
+ turn,
534915
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
534916
+ });
534917
+ this._assistantTextEmitted = true;
534918
+ }
534919
+ break;
535021
534920
  }
535022
534921
  }
535023
534922
  }
@@ -535029,10 +534928,10 @@ ${sr.result.output}`;
535029
534928
  const r2 = await executeSingle(tc);
535030
534929
  if (r2) {
535031
534930
  messages2.push(this.buildToolMessage(r2.output, r2.tc.id, r2.tc.name));
535032
- if (this._consecutiveGateBlocks >= 2 && this._progressGateActive) {
534931
+ if (this._consecutiveProgressAdvisories >= 2 && this._progressGateActive) {
535033
534932
  messages2.push({
535034
534933
  role: "system",
535035
- content: `[PROGRESS GATE ESCALATION] You have made ${this._consecutiveGateBlocks} consecutive blocked tool calls without calling todo_write. The gate is enforced at the runtime level retrying the same blocked tool will never work. Your NEXT call MUST be todo_write(todos=[...]) with updated progress. No other tool will be accepted until the gate is released.`
534934
+ content: `[PROGRESS ADVISORY ESCALATION] You have made ${this._consecutiveProgressAdvisories} consecutive tool calls while progress tracking is stale. The last tool was allowed to run, but your NEXT call should be todo_write(todos=[...]) with updated progress so the plan and completed work stay synchronized.`
535036
534935
  });
535037
534936
  }
535038
534937
  if (r2.tc.name === "task_complete") {
@@ -535042,28 +534941,32 @@ ${sr.result.output}`;
535042
534941
  messages2.push({ role: "system", content: guard });
535043
534942
  this.emit({
535044
534943
  type: "status",
535045
- content: `task_complete intercepted — ${open2.length} open todo(s) remain`,
534944
+ content: `task_complete advisory — ${open2.length} open todo(s) remain; completion allowed`,
535046
534945
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
535047
534946
  });
535048
- } else {
535049
- const _bp2 = await this._runBackwardPassReview(turn);
535050
- if (_bp2 && !_bp2.proceed && _bp2.feedback) {
535051
- messages2.push({ role: "system", content: _bp2.feedback });
535052
- } else {
535053
- completed = true;
535054
- summary = extractTaskCompleteSummary(r2.tc.arguments);
535055
- if (summary && !this._assistantTextEmitted) {
535056
- this.emit({
535057
- type: "assistant_text",
535058
- content: summary,
535059
- turn,
535060
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
535061
- });
535062
- this._assistantTextEmitted = true;
535063
- }
535064
- break;
535065
- }
535066
534947
  }
534948
+ const _bp2 = await this._runBackwardPassReview(turn);
534949
+ if (_bp2 && !_bp2.proceed && _bp2.feedback) {
534950
+ messages2.push({ role: "system", content: `[COMPLETION REVIEW ADVISORY]
534951
+ ${_bp2.feedback}` });
534952
+ this.emit({
534953
+ type: "status",
534954
+ content: "completion review advisory surfaced; task_complete allowed",
534955
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
534956
+ });
534957
+ }
534958
+ completed = true;
534959
+ summary = extractTaskCompleteSummary(r2.tc.arguments);
534960
+ if (summary && !this._assistantTextEmitted) {
534961
+ this.emit({
534962
+ type: "assistant_text",
534963
+ content: summary,
534964
+ turn,
534965
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
534966
+ });
534967
+ this._assistantTextEmitted = true;
534968
+ }
534969
+ break;
535067
534970
  }
535068
534971
  }
535069
534972
  }
@@ -535117,36 +535020,40 @@ ${sr.result.output}`;
535117
535020
  messages2.push({ role: "system", content: guard });
535118
535021
  this.emit({
535119
535022
  type: "status",
535120
- content: `task_complete intercepted — ${open2.length} open todo(s) remain`,
535023
+ content: `task_complete advisory — ${open2.length} open todo(s) remain; completion allowed`,
535121
535024
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
535122
535025
  });
535123
- } else {
535124
- const _bp3 = await this._runBackwardPassReview(turn);
535125
- if (_bp3 && !_bp3.proceed && _bp3.feedback) {
535126
- messages2.push({ role: "system", content: _bp3.feedback });
535127
- } else {
535128
- completed = true;
535129
- summary = extractTaskCompleteSummary(r2.tc.arguments);
535130
- for (const tool of this.tools.values()) {
535131
- if (tool.cleanup) {
535132
- try {
535133
- await tool.cleanup();
535134
- } catch {
535135
- }
535136
- }
535137
- }
535138
- if (summary && !this._assistantTextEmitted) {
535139
- this.emit({
535140
- type: "assistant_text",
535141
- content: summary,
535142
- turn,
535143
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
535144
- });
535145
- this._assistantTextEmitted = true;
535026
+ }
535027
+ const _bp3 = await this._runBackwardPassReview(turn);
535028
+ if (_bp3 && !_bp3.proceed && _bp3.feedback) {
535029
+ messages2.push({ role: "system", content: `[COMPLETION REVIEW ADVISORY]
535030
+ ${_bp3.feedback}` });
535031
+ this.emit({
535032
+ type: "status",
535033
+ content: "completion review advisory surfaced; task_complete allowed",
535034
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
535035
+ });
535036
+ }
535037
+ completed = true;
535038
+ summary = extractTaskCompleteSummary(r2.tc.arguments);
535039
+ for (const tool of this.tools.values()) {
535040
+ if (tool.cleanup) {
535041
+ try {
535042
+ await tool.cleanup();
535043
+ } catch {
535146
535044
  }
535147
- break;
535148
535045
  }
535149
535046
  }
535047
+ if (summary && !this._assistantTextEmitted) {
535048
+ this.emit({
535049
+ type: "assistant_text",
535050
+ content: summary,
535051
+ turn,
535052
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
535053
+ });
535054
+ this._assistantTextEmitted = true;
535055
+ }
535056
+ break;
535150
535057
  }
535151
535058
  }
535152
535059
  }
@@ -535759,18 +535666,9 @@ Integrate this guidance into your current approach. Continue working on the task
535759
535666
  turn,
535760
535667
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
535761
535668
  });
535762
- const _decomp2BFBlock = this._maybeDecomp2Block(tc, turn);
535763
- if (_decomp2BFBlock) {
535764
- this.emit({
535765
- type: "tool_result",
535766
- toolName: tc.name,
535767
- content: _decomp2BFBlock.slice(0, 200),
535768
- success: false,
535769
- turn,
535770
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
535771
- });
535772
- messages2.push(this.buildToolMessage(_decomp2BFBlock, tc.id, tc.name));
535773
- continue;
535669
+ const _decomp2BFAdvisory = this._maybeDecomp2Advisory(tc, turn);
535670
+ if (_decomp2BFAdvisory) {
535671
+ messages2.push({ role: "system", content: _decomp2BFAdvisory });
535774
535672
  }
535775
535673
  const tool = this.tools.get(tc.name);
535776
535674
  let result;
@@ -535871,28 +535769,32 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
535871
535769
  messages2.push({ role: "system", content: guard });
535872
535770
  this.emit({
535873
535771
  type: "status",
535874
- content: `task_complete intercepted — ${open2.length} open todo(s) remain`,
535772
+ content: `task_complete advisory — ${open2.length} open todo(s) remain; completion allowed`,
535875
535773
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
535876
535774
  });
535877
- } else {
535878
- const _bp4 = await this._runBackwardPassReview(turn);
535879
- if (_bp4 && !_bp4.proceed && _bp4.feedback) {
535880
- messages2.push({ role: "system", content: _bp4.feedback });
535881
- continue;
535882
- }
535883
- completed = true;
535884
- summary = extractTaskCompleteSummary(tc.arguments);
535885
- if (summary && !this._assistantTextEmitted) {
535886
- this.emit({
535887
- type: "assistant_text",
535888
- content: summary,
535889
- turn,
535890
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
535891
- });
535892
- this._assistantTextEmitted = true;
535893
- }
535894
- break;
535895
535775
  }
535776
+ const _bp4 = await this._runBackwardPassReview(turn);
535777
+ if (_bp4 && !_bp4.proceed && _bp4.feedback) {
535778
+ messages2.push({ role: "system", content: `[COMPLETION REVIEW ADVISORY]
535779
+ ${_bp4.feedback}` });
535780
+ this.emit({
535781
+ type: "status",
535782
+ content: "completion review advisory surfaced; task_complete allowed",
535783
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
535784
+ });
535785
+ }
535786
+ completed = true;
535787
+ summary = extractTaskCompleteSummary(tc.arguments);
535788
+ if (summary && !this._assistantTextEmitted) {
535789
+ this.emit({
535790
+ type: "assistant_text",
535791
+ content: summary,
535792
+ turn,
535793
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
535794
+ });
535795
+ this._assistantTextEmitted = true;
535796
+ }
535797
+ break;
535896
535798
  }
535897
535799
  }
535898
535800
  if (completed)
@@ -535920,14 +535822,13 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
535920
535822
  messages2.push({ role: "system", content: guard });
535921
535823
  this.emit({
535922
535824
  type: "status",
535923
- content: `task_complete text detected but intercepted — ${open2.length} open todo(s) remain`,
535825
+ content: `task_complete text advisory — ${open2.length} open todo(s) remain; completion allowed`,
535924
535826
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
535925
535827
  });
535926
- } else {
535927
- completed = true;
535928
- summary = content;
535929
- break;
535930
535828
  }
535829
+ completed = true;
535830
+ summary = content;
535831
+ break;
535931
535832
  }
535932
535833
  const finalVarMatch = content.match(/FINAL_VAR\s*\(\s*["']?(\w+)["']?\s*\)/);
535933
535834
  if (finalVarMatch && this.options.finalVarResolver) {
@@ -614120,7 +614021,7 @@ function createTaskCompleteTool(modelTier) {
614120
614021
  const summaryDesc = modelTier === "small" || modelTier === "medium" ? "Your complete response to the user. For questions/chat: put your FULL answer here (this is what the user will see). For coding tasks: brief summary of what was accomplished." : "Brief summary of what was accomplished";
614121
614022
  return {
614122
614023
  name: "task_complete",
614123
- description: "Signal that the task is complete. GUARDED: cannot fire while the active todo list (todo_write) has pending, in_progress, or blocked items. If you're truly done, first call todo_write to mark every remaining item completed. If you're not done, continue working down the list and call this only after the last item flips to completed.",
614024
+ description: "Signal that the task is complete. ADVISORY: if active todos, interactive sessions, or build checks indicate risk, the tool result will include model-visible guidance, but the tool is not hard-blocked.",
614124
614025
  parameters: {
614125
614026
  type: "object",
614126
614027
  properties: {
@@ -614129,11 +614030,14 @@ function createTaskCompleteTool(modelTier) {
614129
614030
  required: ["summary"]
614130
614031
  },
614131
614032
  async execute(args) {
614033
+ const summary = args["summary"] || "Task completed.";
614132
614034
  if (_interactiveSessionActive) {
614133
614035
  return {
614134
- success: false,
614135
- output: `SESSION STILL ACTIVE. Call your next interaction tool NOW. Do NOT produce text call a tool immediately to continue the session.`,
614136
- error: `task_complete BLOCKED — interactive session still active. ${_interactiveSessionReason} You MUST continue the interaction loop until the session ends. Do NOT call task_complete until you receive a termination signal (e.g. "ended", "disconnected", "closed", SESSION_ACTIVE=false). YOUR NEXT ACTION: call the listening/polling tool to continue the session.`
614036
+ success: true,
614037
+ output: `[TASK_COMPLETE ADVISORYinteractive session still active]
614038
+ ${_interactiveSessionReason} You should continue the interaction loop until the session ends (e.g. "ended", "disconnected", "closed", SESSION_ACTIVE=false). The completion is allowed by no-hard-block policy.
614039
+
614040
+ ${summary}`
614137
614041
  };
614138
614042
  }
614139
614043
  try {
@@ -614159,14 +614063,17 @@ Respond concisely in this shape:
614159
614063
  - verify: [{ name: "<exact item text>", completed: true|false, evidence: "<objective proof>" }, ...]
614160
614064
  - next: "what you will do next OR the exact todo_write(...) call to update statuses"`;
614161
614065
  return {
614162
- success: false,
614163
- output: "",
614164
- error: `task_complete BLOCKED — ${incomplete.length} todo item(s) still incomplete.
614066
+ success: true,
614067
+ output: `[TASK_COMPLETE ADVISORY — ${incomplete.length} todo item(s) still incomplete]
614165
614068
 
614166
614069
  Incomplete items:
614167
614070
  ${incompleteList}${more}
614168
614071
 
614169
- ` + guidance
614072
+ ${guidance}
614073
+
614074
+ Completion is allowed by no-hard-block policy.
614075
+
614076
+ ${summary}`
614170
614077
  };
614171
614078
  }
614172
614079
  try {
@@ -614176,7 +614083,6 @@ ${incompleteList}${more}
614176
614083
  }
614177
614084
  } catch {
614178
614085
  }
614179
- const summary = args["summary"] || "Task completed.";
614180
614086
  const buildGuardSkip = process.env["OA_DISABLE_TASK_COMPLETE_BUILD_GUARD"] === "1" || /^\s*BLOCKED\b/i.test(summary);
614181
614087
  if (!buildGuardSkip) {
614182
614088
  try {
@@ -614200,16 +614106,15 @@ ${incompleteList}${more}
614200
614106
  } catch (e2) {
614201
614107
  const out = ((e2?.stdout || "") + (e2?.stderr || "")).toString().slice(0, 2e3);
614202
614108
  return {
614203
- success: false,
614204
- output: "",
614205
- error: `task_complete BLOCKED — \`npm run ${checkScript}\` is failing in ${cwd4}.
614109
+ success: true,
614110
+ output: `[TASK_COMPLETE ADVISORY — \`npm run ${checkScript}\` is failing in ${cwd4}]
614206
614111
 
614207
614112
  Error output (last 2KB):
614208
614113
  ${out || "<empty stdout/stderr — likely timeout or non-zero exit>"}
614209
614114
 
614210
- Fix the build errors before calling task_complete. If you genuinely cannot fix it (e.g. missing tool, env issue), call task_complete with a summary that STARTS with "BLOCKED: " and explains why.
614115
+ Recommended: fix the build errors before considering the work done. Completion is still allowed by no-hard-block policy.
614211
614116
 
614212
- Bypass for special cases: set env OA_DISABLE_TASK_COMPLETE_BUILD_GUARD=1 (not recommended).`
614117
+ ${summary}`
614213
614118
  };
614214
614119
  }
614215
614120
  }
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.187.575",
3
+ "version": "0.187.576",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "open-agents-ai",
9
- "version": "0.187.575",
9
+ "version": "0.187.576",
10
10
  "hasInstallScript": true,
11
11
  "license": "CC-BY-NC-4.0",
12
12
  "dependencies": {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.187.575",
3
+ "version": "0.187.576",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",