open-agents-ai 0.187.575 → 0.187.576
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +225 -320
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -515299,26 +515299,10 @@ function summarizeMAST(tags) {
|
|
|
515299
515299
|
}
|
|
515300
515300
|
return { byMode, byCategory, total: tags.length };
|
|
515301
515301
|
}
|
|
515302
|
-
var MAST_CATEGORY;
|
|
515303
515302
|
var init_mast_tagger = __esm({
|
|
515304
515303
|
"packages/orchestrator/dist/mast-tagger.js"() {
|
|
515305
515304
|
"use strict";
|
|
515306
515305
|
init_reflection();
|
|
515307
|
-
MAST_CATEGORY = {
|
|
515308
|
-
spec_disobedience: "specification_design",
|
|
515309
|
-
step_repetition: "specification_design",
|
|
515310
|
-
history_loss: "specification_design",
|
|
515311
|
-
completion_unrecognized: "specification_design",
|
|
515312
|
-
input_ignored: "inter_agent_misalignment",
|
|
515313
|
-
proceeded_without_clarify: "inter_agent_misalignment",
|
|
515314
|
-
conversation_reset: "inter_agent_misalignment",
|
|
515315
|
-
reasoning_action_mismatch: "inter_agent_misalignment",
|
|
515316
|
-
premature_termination: "task_verification_termination",
|
|
515317
|
-
validation_skipped: "task_verification_termination",
|
|
515318
|
-
shallow_check_accepted: "task_verification_termination",
|
|
515319
|
-
premature_task_complete: "task_verification_termination",
|
|
515320
|
-
other: "specification_design"
|
|
515321
|
-
};
|
|
515322
515306
|
}
|
|
515323
515307
|
});
|
|
515324
515308
|
|
|
@@ -528102,19 +528086,16 @@ var init_agenticRunner = __esm({
|
|
|
528102
528086
|
// a phase's worth of work without recording progress — and on the next
|
|
528103
528087
|
// turn will replay the same plan. Surface a nudge before that happens.
|
|
528104
528088
|
_writesSinceLastTodoWrite = 0;
|
|
528105
|
-
// REG-12: Progress
|
|
528106
|
-
//
|
|
528107
|
-
//
|
|
528108
|
-
//
|
|
528109
|
-
//
|
|
528110
|
-
// Released when todo_write fires successfully. Without this, the agent
|
|
528111
|
-
// can re-emit the same plan a second time (plan-replay) and execute
|
|
528112
|
-
// duplicate work because PROGRESS NUDGE alone is informational.
|
|
528089
|
+
// REG-12: Progress advisory latch. When ≥6 file writes have happened
|
|
528090
|
+
// without a todo_write call, this latch flips ON. While active, non-todo
|
|
528091
|
+
// tool calls receive model-visible system guidance, but still execute.
|
|
528092
|
+
// Released when todo_write fires successfully. Without this, the agent can
|
|
528093
|
+
// re-emit the same plan a second time (plan-replay) and duplicate work.
|
|
528113
528094
|
_progressGateActive = false;
|
|
528114
|
-
// Consecutive
|
|
528115
|
-
//
|
|
528116
|
-
//
|
|
528117
|
-
|
|
528095
|
+
// Consecutive advisory count. When the model ignores stale progress state,
|
|
528096
|
+
// this counter increments. ≥2 triggers a system message escalation to break
|
|
528097
|
+
// pattern-lock loops.
|
|
528098
|
+
_consecutiveProgressAdvisories = 0;
|
|
528118
528099
|
// REG-5: Rolling buffer of recent tool failures with their error output.
|
|
528119
528100
|
// Surfaced before every LLM call so the agent can't ignore "I just ran this
|
|
528120
528101
|
// and it errored". Detects same-fingerprint failure repetition and escalates
|
|
@@ -528344,19 +528325,12 @@ var init_agenticRunner = __esm({
|
|
|
528344
528325
|
// explicitly excludes todo_write/memory_write/list_directory.
|
|
528345
528326
|
_reg61CooldownUntilTurn = -1;
|
|
528346
528327
|
// BFC-61.G (root-cause from batch529-midi-coerce, 2026-05-03): REG-61
|
|
528347
|
-
//
|
|
528348
|
-
// which empirically warned but did not change behavior
|
|
528349
|
-
//
|
|
528350
|
-
//
|
|
528351
|
-
// 7 fires; ZERO of those blocks were followed by a creative edit on
|
|
528352
|
-
// the agent's next turn.
|
|
528353
|
-
//
|
|
528354
|
-
// Semantics: when REG-61 fires, this latch goes true. While true, every
|
|
528355
|
-
// non-bypass tool call gets BLOCKED with a synthetic error result. The
|
|
528356
|
-
// bypass set includes the 4 creative-edit tools plus task_complete /
|
|
528357
|
-
// ask_user / explicit web-task escape hatches. Any creative edit dispatch
|
|
528328
|
+
// persistent advisory until obeyed. Replaces BFC-61.E's one-shot nudge,
|
|
528329
|
+
// which empirically warned but did not change behavior. When REG-61 fires,
|
|
528330
|
+
// this latch goes true. While true, every non-bypass tool call receives
|
|
528331
|
+
// model-visible guidance and still executes. Any creative edit dispatch
|
|
528358
528332
|
// clears the latch ("directive satisfied"). Shell, file_read, todo_*,
|
|
528359
|
-
// grep_search, list_directory etc. are NOT in bypass
|
|
528333
|
+
// grep_search, list_directory etc. are NOT in bypass because those are the
|
|
528360
528334
|
// exact patterns batch528/529 agents used to ignore REG-61.
|
|
528361
528335
|
//
|
|
528362
528336
|
// Kill switch: OA_DISABLE_REG61_COERCE=1 disables BOTH set and enforce.
|
|
@@ -528365,13 +528339,12 @@ var init_agenticRunner = __esm({
|
|
|
528365
528339
|
// sub_agent delegation. DECOMP-1's informational directive was ignored
|
|
528366
528340
|
// (0 sub_agent calls in 466 tool-call run despite directive at turn 1).
|
|
528367
528341
|
// Mirrors the BFC-61.G escalation arc, but must not deadlock delivery:
|
|
528368
|
-
// when the agent has edited ≥adaptive-threshold distinct files in main
|
|
528369
|
-
// WITHOUT successful sub_agent,
|
|
528370
|
-
//
|
|
528371
|
-
//
|
|
528372
|
-
//
|
|
528373
|
-
//
|
|
528374
|
-
// write-deadlock when sub_agent itself is broken or unavailable.
|
|
528342
|
+
// when the agent has edited ≥adaptive-threshold distinct files in main
|
|
528343
|
+
// context WITHOUT successful sub_agent, the dispatcher injects guidance
|
|
528344
|
+
// for NEW files (paths not yet edited) while still allowing the edit.
|
|
528345
|
+
// Repeated failed delegation attempts unlock a main-context fallback so the
|
|
528346
|
+
// guardrail cannot become a hard write-deadlock when sub_agent itself is
|
|
528347
|
+
// broken or unavailable.
|
|
528375
528348
|
// Kill switch: OA_DISABLE_DECOMP2=1.
|
|
528376
528349
|
_decomp2MainContextFiles = /* @__PURE__ */ new Set();
|
|
528377
528350
|
_decomp2SubAgentCalls = 0;
|
|
@@ -528901,15 +528874,12 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
528901
528874
|
});
|
|
528902
528875
|
}
|
|
528903
528876
|
/**
|
|
528904
|
-
* DECOMP-2 PRE-dispatch
|
|
528905
|
-
* string when the
|
|
528877
|
+
* DECOMP-2 PRE-dispatch advisory check. Returns a model-visible guidance
|
|
528878
|
+
* string when the file-spread advisory should surface; null to proceed
|
|
528879
|
+
* without extra guidance. Callers inject the string as system guidance and
|
|
528880
|
+
* then run the requested tool normally.
|
|
528906
528881
|
*
|
|
528907
|
-
*
|
|
528908
|
-
* - Main loop returns the block as a synthetic { tc, output } result.
|
|
528909
|
-
* - Brute-force emits tool_result + pushes tool message + `continue`.
|
|
528910
|
-
* Both invoke this method to compute the decision.
|
|
528911
|
-
*
|
|
528912
|
-
* Conditions for blocking:
|
|
528882
|
+
* Conditions for surfacing guidance:
|
|
528913
528883
|
* - `_decomp2GateActive` is true (set by `_trackDecomp2` when threshold crossed)
|
|
528914
528884
|
* - tool is one of the 4 creative-edit tools
|
|
528915
528885
|
* - the path is NOT in `_decomp2MainContextFiles` (i.e. it's a NEW file)
|
|
@@ -528918,7 +528888,7 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
528918
528888
|
* Already-touched paths pass through (current-module finishing work allowed).
|
|
528919
528889
|
* sub_agent / task_complete / explicit web-task tools pass through (not creative-edit tools).
|
|
528920
528890
|
*/
|
|
528921
|
-
|
|
528891
|
+
_maybeDecomp2Advisory(tc, turn) {
|
|
528922
528892
|
if (!this._decomp2GateActive)
|
|
528923
528893
|
return null;
|
|
528924
528894
|
if (this._decomp2FallbackAllowed)
|
|
@@ -528941,7 +528911,7 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
528941
528911
|
const _moreFiles = this._decomp2MainContextFiles.size > 8 ? `
|
|
528942
528912
|
... +${this._decomp2MainContextFiles.size - 8} more` : "";
|
|
528943
528913
|
const decomp2Msg = [
|
|
528944
|
-
`[
|
|
528914
|
+
`[DECOMP-2 advisory — main-context file spread]`,
|
|
528945
528915
|
``,
|
|
528946
528916
|
`You have already edited ${this._decomp2MainContextFiles.size} distinct files in main context without a successful sub_agent. Continuing to edit another new file ('${_editPath}') may keep your context window saturated and trigger compaction thrashing.`,
|
|
528947
528917
|
``,
|
|
@@ -528959,7 +528929,7 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
528959
528929
|
` })`,
|
|
528960
528930
|
` 3. After sub_agent returns, mark the todo completed.`,
|
|
528961
528931
|
``,
|
|
528962
|
-
`If sub_agent keeps failing for reasons outside the module work, retry it once with corrected arguments. After repeated failed delegation attempts, OA will
|
|
528932
|
+
`If sub_agent keeps failing for reasons outside the module work, retry it once with corrected arguments. After repeated failed delegation attempts, OA will keep this as advisory-only fallback so file writes can continue.`,
|
|
528963
528933
|
``,
|
|
528964
528934
|
`Why this matters: spreading edits across N files in main context burns ~N × file_size tokens. sub_agent gives the next module a focused context window.`,
|
|
528965
528935
|
``,
|
|
@@ -528967,13 +528937,9 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
528967
528937
|
].join("\n");
|
|
528968
528938
|
this.emit({
|
|
528969
528939
|
type: "status",
|
|
528970
|
-
content: `DECOMP-2
|
|
528940
|
+
content: `DECOMP-2 ADVISORY — ${tc.name}('${_editPath}') at turn ${turn}; tool allowed, consider sub_agent for the next module`,
|
|
528971
528941
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
528972
528942
|
});
|
|
528973
|
-
this._tagSyntheticFailure({
|
|
528974
|
-
mode: "step_repetition",
|
|
528975
|
-
rationale: `DECOMP-2 new-file block on '${tc.name}'(${_editPath}) — agent has spread edits across ${this._decomp2MainContextFiles.size} files without sub_agent`
|
|
528976
|
-
});
|
|
528977
528943
|
return decomp2Msg;
|
|
528978
528944
|
}
|
|
528979
528945
|
_decomp2FileSpreadThreshold() {
|
|
@@ -528988,15 +528954,15 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
528988
528954
|
* edits / sub_agent calls and check the gate-activation threshold.
|
|
528989
528955
|
* Without this method on both paths, batch532 measured 14 distinct
|
|
528990
528956
|
* files edited but DECOMP-2 never activated — because the main loop
|
|
528991
|
-
*
|
|
528992
|
-
* through the brute-force dispatch which had no tracking.
|
|
528957
|
+
* entered brute-force after task_complete review and all subsequent edits
|
|
528958
|
+
* flowed through the brute-force dispatch which had no tracking.
|
|
528993
528959
|
*
|
|
528994
528960
|
* Side effects when fired:
|
|
528995
528961
|
* - On successful creative edit: adds path to `_decomp2MainContextFiles`,
|
|
528996
|
-
* possibly activates `_decomp2GateActive` (emits status).
|
|
528962
|
+
* possibly activates `_decomp2GateActive` (emits advisory status).
|
|
528997
528963
|
* - On successful sub_agent / priority_delegate / background_run:
|
|
528998
|
-
* increments counter, clears
|
|
528999
|
-
* delegation attempts do not satisfy the
|
|
528964
|
+
* increments counter, clears advisory (emits status). Failed or
|
|
528965
|
+
* malformed delegation attempts do not satisfy the advisory.
|
|
529000
528966
|
*
|
|
529001
528967
|
* Pure post-dispatch: caller invokes AFTER the tool result is in hand,
|
|
529002
528968
|
* regardless of which loop the dispatch happened in.
|
|
@@ -529013,7 +528979,7 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
529013
528979
|
this._decomp2GateActive = true;
|
|
529014
528980
|
this.emit({
|
|
529015
528981
|
type: "status",
|
|
529016
|
-
content: `DECOMP-2
|
|
528982
|
+
content: `DECOMP-2 ADVISORY ACTIVATED — ${this._decomp2MainContextFiles.size} distinct files edited in main context, 0 successful sub_agent calls, threshold=${DECOMP2_FILE_SPREAD_THRESHOLD}; further edits to NEW files remain allowed, but sub_agent is recommended for the next module`,
|
|
529017
528983
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
529018
528984
|
});
|
|
529019
528985
|
}
|
|
@@ -529047,7 +529013,7 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
529047
529013
|
this._decomp2GateActive = false;
|
|
529048
529014
|
this.emit({
|
|
529049
529015
|
type: "status",
|
|
529050
|
-
content: `DECOMP-2
|
|
529016
|
+
content: `DECOMP-2 ADVISORY CLEARED — '${tc.name}' satisfied delegation directive at turn ${turn}`,
|
|
529051
529017
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
529052
529018
|
});
|
|
529053
529019
|
}
|
|
@@ -529057,7 +529023,7 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
529057
529023
|
* REG-66 Debug-Loop Detection (root-cause from batch535-midi, 2026-05-04).
|
|
529058
529024
|
*
|
|
529059
529025
|
* Empirical: midi run had 11x `npm run build 2>&1` + same 5 files re-read
|
|
529060
|
-
* 5-6 times each + 22 BFC-61.G coercion
|
|
529026
|
+
* 5-6 times each + 22 BFC-61.G coercion advisories — and ZERO of those
|
|
529061
529027
|
* resulted in a creative edit. The agent was rationally stuck: it
|
|
529062
529028
|
* believed it needed to read more to debug, the build command kept
|
|
529063
529029
|
* giving the same error, and the standard "issue an edit" directive
|
|
@@ -529070,7 +529036,7 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
529070
529036
|
* different output. Without this signal we'd just keep telling the
|
|
529071
529037
|
* agent to "make an edit" — which is exactly what it can't think of.
|
|
529072
529038
|
*
|
|
529073
|
-
* When detected, the BFC-61.G
|
|
529039
|
+
* When detected, the BFC-61.G advisory message swaps to a PERTURB-strategy
|
|
529074
529040
|
* directive: stop reading, change ONE thing in the most-likely-culprit
|
|
529075
529041
|
* file even if you're uncertain, and let the new error signal guide
|
|
529076
529042
|
* the next iteration. This is real human debugging strategy ("perturb
|
|
@@ -529207,27 +529173,6 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
529207
529173
|
* name with objective evidence, complete remaining items in order, update the
|
|
529208
529174
|
* checklist via todo_write, and only then call task_complete.
|
|
529209
529175
|
*/
|
|
529210
|
-
/**
|
|
529211
|
-
* REG-39c: tag a SYNTHETIC failure (FORCED PROGRESS BLOCK / observer
|
|
529212
|
-
* block / budget exhausted). These paths return early from
|
|
529213
|
-
* executeSingle BEFORE the main result-handling code, so the normal
|
|
529214
|
-
* MAST tagging miss them. This helper lets each return-early site
|
|
529215
|
-
* record a tag directly. Push-only — keeps the tag buffer bounded
|
|
529216
|
-
* to 200 entries.
|
|
529217
|
-
*/
|
|
529218
|
-
_tagSyntheticFailure(args) {
|
|
529219
|
-
try {
|
|
529220
|
-
this._mastTags.push({
|
|
529221
|
-
mode: args.mode,
|
|
529222
|
-
category: MAST_CATEGORY[args.mode],
|
|
529223
|
-
rationale: args.rationale
|
|
529224
|
-
});
|
|
529225
|
-
if (this._mastTags.length > 200) {
|
|
529226
|
-
this._mastTags = this._mastTags.slice(-200);
|
|
529227
|
-
}
|
|
529228
|
-
} catch {
|
|
529229
|
-
}
|
|
529230
|
-
}
|
|
529231
529176
|
/**
|
|
529232
529177
|
* REG-39b: emit a MAST taxonomy summary as a status event. Called both
|
|
529233
529178
|
* mid-run (every N turns, so SIGTERM kills don't lose the data) and at
|
|
@@ -531127,6 +531072,9 @@ Respond with your assessment, then take action.`;
|
|
|
531127
531072
|
this._fileWriteTimestamps = [];
|
|
531128
531073
|
this._aborting = false;
|
|
531129
531074
|
this._reg61CooldownUntilTurn = -1;
|
|
531075
|
+
this._writesSinceLastTodoWrite = 0;
|
|
531076
|
+
this._progressGateActive = false;
|
|
531077
|
+
this._consecutiveProgressAdvisories = 0;
|
|
531130
531078
|
this._reg61PerpetualGateActive = false;
|
|
531131
531079
|
this._decomp2MainContextFiles = /* @__PURE__ */ new Set();
|
|
531132
531080
|
this._decomp2SubAgentCalls = 0;
|
|
@@ -533428,24 +533376,9 @@ ${memoryLines.join("\n")}`
|
|
|
533428
533376
|
});
|
|
533429
533377
|
}
|
|
533430
533378
|
{
|
|
533431
|
-
const
|
|
533432
|
-
if (
|
|
533433
|
-
|
|
533434
|
-
type: "tool_call",
|
|
533435
|
-
toolName: tc.name,
|
|
533436
|
-
toolArgs: tc.arguments,
|
|
533437
|
-
turn,
|
|
533438
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533439
|
-
});
|
|
533440
|
-
this.emit({
|
|
533441
|
-
type: "tool_result",
|
|
533442
|
-
toolName: tc.name,
|
|
533443
|
-
success: false,
|
|
533444
|
-
content: _decomp2Block.slice(0, 120),
|
|
533445
|
-
turn,
|
|
533446
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533447
|
-
});
|
|
533448
|
-
return { tc, output: _decomp2Block };
|
|
533379
|
+
const _decomp2Advisory = this._maybeDecomp2Advisory(tc, turn);
|
|
533380
|
+
if (_decomp2Advisory) {
|
|
533381
|
+
pushSoftInjection("system", _decomp2Advisory);
|
|
533449
533382
|
}
|
|
533450
533383
|
}
|
|
533451
533384
|
const PROGRESS_GATE_BYPASS_TOOLS = /* @__PURE__ */ new Set([
|
|
@@ -533457,77 +533390,55 @@ ${memoryLines.join("\n")}`
|
|
|
533457
533390
|
// useful for the agent to consult prior phase state before updating
|
|
533458
533391
|
]);
|
|
533459
533392
|
if (this._progressGateActive && !PROGRESS_GATE_BYPASS_TOOLS.has(tc.name)) {
|
|
533460
|
-
this.emit({
|
|
533461
|
-
type: "tool_call",
|
|
533462
|
-
toolName: tc.name,
|
|
533463
|
-
toolArgs: tc.arguments,
|
|
533464
|
-
turn,
|
|
533465
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533466
|
-
});
|
|
533467
533393
|
const recentWrites = [];
|
|
533468
533394
|
for (const [path11, info] of this._worldFacts.files) {
|
|
533469
533395
|
if ((info.writeCount ?? 0) > 0 && (info.lastWriteTurn ?? -1) >= 0 && turn - (info.lastWriteTurn ?? 0) <= 8) {
|
|
533470
533396
|
recentWrites.push({ path: path11, turn: info.lastWriteTurn ?? 0 });
|
|
533471
533397
|
}
|
|
533472
533398
|
}
|
|
533473
|
-
this.
|
|
533399
|
+
this._consecutiveProgressAdvisories++;
|
|
533474
533400
|
recentWrites.sort((a2, b) => b.turn - a2.turn);
|
|
533475
533401
|
const showWrites = recentWrites.slice(0, 16);
|
|
533476
|
-
const isRepeat = this.
|
|
533402
|
+
const isRepeat = this._consecutiveProgressAdvisories >= 2;
|
|
533477
533403
|
const gateMsg = [
|
|
533478
|
-
`[
|
|
533404
|
+
`[PROGRESS ADVISORY — todo update recommended]`,
|
|
533479
533405
|
``,
|
|
533480
533406
|
`CAUSE: ${this._writesSinceLastTodoWrite} file writes since last todo_write call. Without progress tracking, the next turn re-plans the same work (plan-replay).`,
|
|
533481
|
-
`EFFECT:
|
|
533482
|
-
`ACTION
|
|
533483
|
-
`CONSEQUENCE OF IGNORING: Retrying blocked tools does NOT work — only todo_write is accepted while the gate is active.`,
|
|
533407
|
+
`EFFECT: The requested tool is still allowed. Update todos soon so progress state stays accurate.`,
|
|
533408
|
+
`ACTION RECOMMENDED: Call todo_write with updated progress after this tool call if the recent writes satisfy todo items.`,
|
|
533484
533409
|
isRepeat ? `
|
|
533485
|
-
[ESCALATION: This is
|
|
533410
|
+
[ESCALATION: This is advisory #${this._consecutiveProgressAdvisories}. You are continuing without todo_write; progress replay risk is increasing.]` : "",
|
|
533486
533411
|
``,
|
|
533487
533412
|
`Recent file modifications (use these to decide what's done):`,
|
|
533488
533413
|
...showWrites.map((w) => ` • ${w.path} (turn ${w.turn})`),
|
|
533489
533414
|
recentWrites.length > showWrites.length ? ` • ... +${recentWrites.length - showWrites.length} more` : "",
|
|
533490
533415
|
``,
|
|
533491
|
-
`Format: todo_write with todos array — mark items completed that these writes satisfy, advance next to in_progress
|
|
533416
|
+
`Format: todo_write with todos array — mark items completed that these writes satisfy, advance next to in_progress.`
|
|
533492
533417
|
].filter(Boolean).join("\n");
|
|
533418
|
+
pushSoftInjection("system", gateMsg);
|
|
533493
533419
|
this.emit({
|
|
533494
|
-
type: "
|
|
533420
|
+
type: "status",
|
|
533495
533421
|
toolName: tc.name,
|
|
533496
|
-
|
|
533497
|
-
content: gateMsg.slice(0, 120),
|
|
533422
|
+
content: `PROGRESS ADVISORY — ${this._writesSinceLastTodoWrite} writes since todo_write; '${tc.name}' allowed`,
|
|
533498
533423
|
turn,
|
|
533499
533424
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533500
533425
|
});
|
|
533501
|
-
return { tc, output: gateMsg };
|
|
533502
533426
|
}
|
|
533503
533427
|
const _argsKeyForBudget = `${tc.name}:${argsKey}`;
|
|
533504
533428
|
const _isCachedHit = recentToolResults.has(_argsKeyForBudget);
|
|
533505
533429
|
const budgetRemaining = toolCallBudget.get(tc.name);
|
|
533506
533430
|
if (budgetRemaining !== void 0 && !_isCachedHit) {
|
|
533507
533431
|
if (budgetRemaining <= 0) {
|
|
533432
|
+
const budgetMsg = `[BUDGET ADVISORY] You have used all ${toolBudgets[tc.name]} recommended ${tc.name} calls for the current phase. The requested tool call is still allowed, but repeated use may be low-value. If your todo list shows more phases pending: mark the current phase completed via todo_write so a new budget allowance kicks in. If all phases are done: call task_complete with your final summary.`;
|
|
533433
|
+
pushSoftInjection("system", budgetMsg);
|
|
533508
533434
|
this.emit({
|
|
533509
|
-
type: "
|
|
533510
|
-
|
|
533511
|
-
toolArgs: tc.arguments,
|
|
533512
|
-
turn,
|
|
533513
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533514
|
-
});
|
|
533515
|
-
const budgetMsg = `[BUDGET EXHAUSTED] You have used all ${toolBudgets[tc.name]} allowed ${tc.name} calls for the current phase. You ALREADY have enough information from previous calls. DO NOT try to call ${tc.name} again — it will be blocked. If your todo list shows more phases pending: mark the current phase completed via todo_write so a new budget allowance kicks in. If all phases are done: call task_complete with your final summary.`;
|
|
533516
|
-
this.emit({
|
|
533517
|
-
type: "tool_result",
|
|
533518
|
-
toolName: tc.name,
|
|
533519
|
-
success: false,
|
|
533520
|
-
content: budgetMsg.slice(0, 120),
|
|
533521
|
-
turn,
|
|
533435
|
+
type: "status",
|
|
533436
|
+
content: `BUDGET ADVISORY — ${tc.name} exceeded recommended per-phase budget; tool allowed`,
|
|
533522
533437
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533523
533438
|
});
|
|
533524
|
-
|
|
533525
|
-
|
|
533526
|
-
rationale: `${tc.name} exhausted per-phase budget of ${toolBudgets[tc.name]}`
|
|
533527
|
-
});
|
|
533528
|
-
return { tc, output: budgetMsg };
|
|
533439
|
+
} else {
|
|
533440
|
+
toolCallBudget.set(tc.name, budgetRemaining - 1);
|
|
533529
533441
|
}
|
|
533530
|
-
toolCallBudget.set(tc.name, budgetRemaining - 1);
|
|
533531
533442
|
}
|
|
533532
533443
|
const toolFingerprint = this._buildToolFingerprint(tc.name, tc.arguments ?? {});
|
|
533533
533444
|
const baseIsReadLike = ![
|
|
@@ -533612,29 +533523,15 @@ ${memoryLines.join("\n")}`
|
|
|
533612
533523
|
observerRedundantBlock
|
|
533613
533524
|
});
|
|
533614
533525
|
if (criticDecision.decision === "observer_block") {
|
|
533615
|
-
this.
|
|
533616
|
-
type: "tool_call",
|
|
533617
|
-
toolName: tc.name,
|
|
533618
|
-
toolArgs: tc.arguments,
|
|
533619
|
-
turn,
|
|
533620
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533621
|
-
});
|
|
533622
|
-
const blockMsg = criticDecision.cachedResult ? `[BLOCKED — this tool+args already succeeded. Re-served from cache:]
|
|
533526
|
+
const advisoryMsg = criticDecision.cachedResult ? `[DUPLICATE-CALL ADVISORY — this tool+args already succeeded. Prior result preview:]
|
|
533623
533527
|
|
|
533624
|
-
${criticDecision.cachedResult.slice(0, 500)}` : `[
|
|
533528
|
+
${criticDecision.cachedResult.slice(0, 500)}` : `[DUPLICATE-CALL ADVISORY — the observer confirmed this tool already succeeded with these arguments on a prior turn. The call is still allowed, but using prior findings may be faster.]`;
|
|
533529
|
+
pushSoftInjection("system", advisoryMsg);
|
|
533625
533530
|
this.emit({
|
|
533626
|
-
type: "
|
|
533627
|
-
|
|
533628
|
-
success: true,
|
|
533629
|
-
content: blockMsg.slice(0, 100),
|
|
533630
|
-
turn,
|
|
533531
|
+
type: "status",
|
|
533532
|
+
content: `DUPLICATE-CALL ADVISORY — ${tc.name} allowed despite observer redundancy`,
|
|
533631
533533
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533632
533534
|
});
|
|
533633
|
-
this._tagSyntheticFailure({
|
|
533634
|
-
mode: "step_repetition",
|
|
533635
|
-
rationale: `observer-block on ${tc.name} fingerprint flagged redundant`
|
|
533636
|
-
});
|
|
533637
|
-
return { tc, output: blockMsg };
|
|
533638
533535
|
}
|
|
533639
533536
|
if (criticDecision.decision === "force_progress_block") {
|
|
533640
533537
|
dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
|
|
@@ -533643,26 +533540,12 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
|
|
|
533643
533540
|
recentToolResults.delete(toolFingerprint);
|
|
533644
533541
|
recentToolResults.set(toolFingerprint, _existingFp);
|
|
533645
533542
|
}
|
|
533543
|
+
pushSoftInjection("system", criticDecision.blockMessage.replace(/\[FORCED PROGRESS BLOCK[^\]]*\]|\[BLOCKED[^\]]*\]/gi, "[REPETITION ADVISORY]"));
|
|
533646
533544
|
this.emit({
|
|
533647
|
-
type: "
|
|
533648
|
-
|
|
533649
|
-
toolArgs: tc.arguments,
|
|
533650
|
-
turn,
|
|
533651
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533652
|
-
});
|
|
533653
|
-
this.emit({
|
|
533654
|
-
type: "tool_result",
|
|
533655
|
-
toolName: tc.name,
|
|
533656
|
-
success: false,
|
|
533657
|
-
content: criticDecision.blockMessage.slice(0, 120),
|
|
533658
|
-
turn,
|
|
533545
|
+
type: "status",
|
|
533546
|
+
content: `REPETITION ADVISORY — ${tc.name} repeated ${criticDecision.hitNumber} times; tool allowed`,
|
|
533659
533547
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533660
533548
|
});
|
|
533661
|
-
this._tagSyntheticFailure({
|
|
533662
|
-
mode: "step_repetition",
|
|
533663
|
-
rationale: `force_progress_block on ${tc.name} after ${criticDecision.hitNumber} identical calls`
|
|
533664
|
-
});
|
|
533665
|
-
return { tc, output: criticDecision.blockMessage };
|
|
533666
533549
|
}
|
|
533667
533550
|
if (criticDecision.decision === "serve_cached") {
|
|
533668
533551
|
dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
|
|
@@ -533671,30 +533554,20 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
|
|
|
533671
533554
|
recentToolResults.delete(toolFingerprint);
|
|
533672
533555
|
recentToolResults.set(toolFingerprint, _existingFp);
|
|
533673
533556
|
}
|
|
533674
|
-
|
|
533675
|
-
type: "tool_call",
|
|
533676
|
-
toolName: tc.name,
|
|
533677
|
-
toolArgs: tc.arguments,
|
|
533678
|
-
turn,
|
|
533679
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533680
|
-
});
|
|
533681
|
-
const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. No need to call this tool again.]
|
|
533557
|
+
const header = criticDecision.compacted ? `[CACHE ADVISORY — the original result was compacted from context. Prior data preview follows; the tool call is still allowed.]
|
|
533682
533558
|
|
|
533683
|
-
` : `[DUPLICATE CALL #${criticDecision.hitNumber} — you already called ${tc.name} with these exact arguments. The result
|
|
533559
|
+
` : `[DUPLICATE CALL #${criticDecision.hitNumber} — you already called ${tc.name} with these exact arguments. The result may be identical. The tool call is still allowed, but using existing data may be faster.]
|
|
533684
533560
|
|
|
533685
533561
|
`;
|
|
533686
533562
|
const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
|
|
533687
533563
|
... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
|
|
533688
533564
|
const dedupOutput = header + truncatedCache;
|
|
533565
|
+
pushSoftInjection("system", dedupOutput);
|
|
533689
533566
|
this.emit({
|
|
533690
|
-
type: "
|
|
533691
|
-
|
|
533692
|
-
success: true,
|
|
533693
|
-
content: header.slice(0, 100),
|
|
533694
|
-
turn,
|
|
533567
|
+
type: "status",
|
|
533568
|
+
content: `CACHE ADVISORY — ${tc.name} duplicate call #${criticDecision.hitNumber}; tool allowed`,
|
|
533695
533569
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533696
533570
|
});
|
|
533697
|
-
return { tc, output: dedupOutput };
|
|
533698
533571
|
}
|
|
533699
533572
|
this.emit({
|
|
533700
533573
|
type: "tool_call",
|
|
@@ -533744,6 +533617,18 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
|
|
|
533744
533617
|
const violations = checkConstraints(tc.name, tc.arguments);
|
|
533745
533618
|
const blockViolations = violations.filter((v) => v.constraint.action === "block");
|
|
533746
533619
|
if (blockViolations.length > 0) {
|
|
533620
|
+
const warning = formatViolationWarning(blockViolations);
|
|
533621
|
+
pushSoftInjection("system", `[CONSTRAINT ADVISORY]
|
|
533622
|
+
${warning}
|
|
533623
|
+
The tool call is still allowed by the no-hard-block policy. Treat this as risk guidance, not a runtime denial.`);
|
|
533624
|
+
this.emit({
|
|
533625
|
+
type: "status",
|
|
533626
|
+
content: `Constraint advisory: ${blockViolations[0].constraint.message}`,
|
|
533627
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533628
|
+
});
|
|
533629
|
+
pendingConstraintWarnings.push(warning);
|
|
533630
|
+
}
|
|
533631
|
+
if (process.env["OA_ENABLE_HARD_TOOL_BLOCKS"] === "1" && blockViolations.length > 0) {
|
|
533747
533632
|
result = {
|
|
533748
533633
|
success: false,
|
|
533749
533634
|
output: "",
|
|
@@ -533767,6 +533652,16 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
|
|
|
533767
533652
|
}
|
|
533768
533653
|
const hookCheck = this._hookManager.runPreToolUse(tc.name, tc.arguments, this._sessionId);
|
|
533769
533654
|
if (!hookCheck.allowed) {
|
|
533655
|
+
pushSoftInjection("system", `[HOOK ADVISORY]
|
|
533656
|
+
Tool "${tc.name}" matched a pre-tool hook warning: ${hookCheck.reason ?? "hook denied"}.
|
|
533657
|
+
The tool call is still allowed by the no-hard-block policy. Treat this as guidance and proceed deliberately.`);
|
|
533658
|
+
this.emit({
|
|
533659
|
+
type: "status",
|
|
533660
|
+
content: `Hook advisory for ${tc.name}: ${hookCheck.reason ?? "hook denied"}. Tool allowed by no-hard-block policy.`,
|
|
533661
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533662
|
+
});
|
|
533663
|
+
}
|
|
533664
|
+
if (process.env["OA_ENABLE_HARD_TOOL_BLOCKS"] === "1" && !hookCheck.allowed) {
|
|
533770
533665
|
result = {
|
|
533771
533666
|
success: false,
|
|
533772
533667
|
output: "",
|
|
@@ -533923,7 +533818,7 @@ Respond with EXACTLY this structure before your next tool call:
|
|
|
533923
533818
|
this._reg61PerpetualGateActive = false;
|
|
533924
533819
|
this.emit({
|
|
533925
533820
|
type: "status",
|
|
533926
|
-
content: `REG-61
|
|
533821
|
+
content: `REG-61 ADVISORY CLEARED — '${tc.name}' landed real file mutation at turn ${turn}`,
|
|
533927
533822
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533928
533823
|
});
|
|
533929
533824
|
}
|
|
@@ -533958,7 +533853,7 @@ Respond with EXACTLY this structure before your next tool call:
|
|
|
533958
533853
|
this._progressGateActive = true;
|
|
533959
533854
|
this.emit({
|
|
533960
533855
|
type: "status",
|
|
533961
|
-
content: `Progress
|
|
533856
|
+
content: `Progress advisory engaged at ${this._writesSinceLastTodoWrite} writes without todo_write — non-todo tools remain allowed, todo update recommended`,
|
|
533962
533857
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533963
533858
|
});
|
|
533964
533859
|
}
|
|
@@ -533974,7 +533869,7 @@ Respond with EXACTLY this structure before your next tool call:
|
|
|
533974
533869
|
}
|
|
533975
533870
|
this._writesSinceLastTodoWrite = 0;
|
|
533976
533871
|
this._progressGateActive = false;
|
|
533977
|
-
this.
|
|
533872
|
+
this._consecutiveProgressAdvisories = 0;
|
|
533978
533873
|
}
|
|
533979
533874
|
if (tc.name === "file_read") {
|
|
533980
533875
|
const p2 = String(tc.arguments?.["path"] ?? tc.arguments?.["file"] ?? "");
|
|
@@ -534983,10 +534878,10 @@ Then use file_read on individual FILES inside it.`);
|
|
|
534983
534878
|
const output = sr.result.success ? sr.result.output : `Error: ${sr.result.error || "unknown"}
|
|
534984
534879
|
${sr.result.output}`;
|
|
534985
534880
|
messages2.push(this.buildToolMessage(output, matchTc.id, matchTc.name));
|
|
534986
|
-
if (this.
|
|
534881
|
+
if (this._consecutiveProgressAdvisories >= 2 && this._progressGateActive) {
|
|
534987
534882
|
messages2.push({
|
|
534988
534883
|
role: "system",
|
|
534989
|
-
content: `[PROGRESS
|
|
534884
|
+
content: `[PROGRESS ADVISORY ESCALATION] You have made ${this._consecutiveProgressAdvisories} consecutive tool calls while progress tracking is stale. The last tool was allowed to run, but your NEXT call should be todo_write(todos=[...]) with updated progress so the plan and completed work stay synchronized.`
|
|
534990
534885
|
});
|
|
534991
534886
|
}
|
|
534992
534887
|
if (matchTc.name === "task_complete") {
|
|
@@ -534996,28 +534891,32 @@ ${sr.result.output}`;
|
|
|
534996
534891
|
messages2.push({ role: "system", content: guard });
|
|
534997
534892
|
this.emit({
|
|
534998
534893
|
type: "status",
|
|
534999
|
-
content: `task_complete
|
|
534894
|
+
content: `task_complete advisory — ${open2.length} open todo(s) remain; completion allowed`,
|
|
535000
534895
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535001
534896
|
});
|
|
535002
|
-
} else {
|
|
535003
|
-
const _bp1 = await this._runBackwardPassReview(turn);
|
|
535004
|
-
if (_bp1 && !_bp1.proceed && _bp1.feedback) {
|
|
535005
|
-
messages2.push({ role: "system", content: _bp1.feedback });
|
|
535006
|
-
} else {
|
|
535007
|
-
completed = true;
|
|
535008
|
-
summary = extractTaskCompleteSummary(matchTc.arguments);
|
|
535009
|
-
if (summary && !this._assistantTextEmitted) {
|
|
535010
|
-
this.emit({
|
|
535011
|
-
type: "assistant_text",
|
|
535012
|
-
content: summary,
|
|
535013
|
-
turn,
|
|
535014
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535015
|
-
});
|
|
535016
|
-
this._assistantTextEmitted = true;
|
|
535017
|
-
}
|
|
535018
|
-
break;
|
|
535019
|
-
}
|
|
535020
534897
|
}
|
|
534898
|
+
const _bp1 = await this._runBackwardPassReview(turn);
|
|
534899
|
+
if (_bp1 && !_bp1.proceed && _bp1.feedback) {
|
|
534900
|
+
messages2.push({ role: "system", content: `[COMPLETION REVIEW ADVISORY]
|
|
534901
|
+
${_bp1.feedback}` });
|
|
534902
|
+
this.emit({
|
|
534903
|
+
type: "status",
|
|
534904
|
+
content: "completion review advisory surfaced; task_complete allowed",
|
|
534905
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
534906
|
+
});
|
|
534907
|
+
}
|
|
534908
|
+
completed = true;
|
|
534909
|
+
summary = extractTaskCompleteSummary(matchTc.arguments);
|
|
534910
|
+
if (summary && !this._assistantTextEmitted) {
|
|
534911
|
+
this.emit({
|
|
534912
|
+
type: "assistant_text",
|
|
534913
|
+
content: summary,
|
|
534914
|
+
turn,
|
|
534915
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
534916
|
+
});
|
|
534917
|
+
this._assistantTextEmitted = true;
|
|
534918
|
+
}
|
|
534919
|
+
break;
|
|
535021
534920
|
}
|
|
535022
534921
|
}
|
|
535023
534922
|
}
|
|
@@ -535029,10 +534928,10 @@ ${sr.result.output}`;
|
|
|
535029
534928
|
const r2 = await executeSingle(tc);
|
|
535030
534929
|
if (r2) {
|
|
535031
534930
|
messages2.push(this.buildToolMessage(r2.output, r2.tc.id, r2.tc.name));
|
|
535032
|
-
if (this.
|
|
534931
|
+
if (this._consecutiveProgressAdvisories >= 2 && this._progressGateActive) {
|
|
535033
534932
|
messages2.push({
|
|
535034
534933
|
role: "system",
|
|
535035
|
-
content: `[PROGRESS
|
|
534934
|
+
content: `[PROGRESS ADVISORY ESCALATION] You have made ${this._consecutiveProgressAdvisories} consecutive tool calls while progress tracking is stale. The last tool was allowed to run, but your NEXT call should be todo_write(todos=[...]) with updated progress so the plan and completed work stay synchronized.`
|
|
535036
534935
|
});
|
|
535037
534936
|
}
|
|
535038
534937
|
if (r2.tc.name === "task_complete") {
|
|
@@ -535042,28 +534941,32 @@ ${sr.result.output}`;
|
|
|
535042
534941
|
messages2.push({ role: "system", content: guard });
|
|
535043
534942
|
this.emit({
|
|
535044
534943
|
type: "status",
|
|
535045
|
-
content: `task_complete
|
|
534944
|
+
content: `task_complete advisory — ${open2.length} open todo(s) remain; completion allowed`,
|
|
535046
534945
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535047
534946
|
});
|
|
535048
|
-
} else {
|
|
535049
|
-
const _bp2 = await this._runBackwardPassReview(turn);
|
|
535050
|
-
if (_bp2 && !_bp2.proceed && _bp2.feedback) {
|
|
535051
|
-
messages2.push({ role: "system", content: _bp2.feedback });
|
|
535052
|
-
} else {
|
|
535053
|
-
completed = true;
|
|
535054
|
-
summary = extractTaskCompleteSummary(r2.tc.arguments);
|
|
535055
|
-
if (summary && !this._assistantTextEmitted) {
|
|
535056
|
-
this.emit({
|
|
535057
|
-
type: "assistant_text",
|
|
535058
|
-
content: summary,
|
|
535059
|
-
turn,
|
|
535060
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535061
|
-
});
|
|
535062
|
-
this._assistantTextEmitted = true;
|
|
535063
|
-
}
|
|
535064
|
-
break;
|
|
535065
|
-
}
|
|
535066
534947
|
}
|
|
534948
|
+
const _bp2 = await this._runBackwardPassReview(turn);
|
|
534949
|
+
if (_bp2 && !_bp2.proceed && _bp2.feedback) {
|
|
534950
|
+
messages2.push({ role: "system", content: `[COMPLETION REVIEW ADVISORY]
|
|
534951
|
+
${_bp2.feedback}` });
|
|
534952
|
+
this.emit({
|
|
534953
|
+
type: "status",
|
|
534954
|
+
content: "completion review advisory surfaced; task_complete allowed",
|
|
534955
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
534956
|
+
});
|
|
534957
|
+
}
|
|
534958
|
+
completed = true;
|
|
534959
|
+
summary = extractTaskCompleteSummary(r2.tc.arguments);
|
|
534960
|
+
if (summary && !this._assistantTextEmitted) {
|
|
534961
|
+
this.emit({
|
|
534962
|
+
type: "assistant_text",
|
|
534963
|
+
content: summary,
|
|
534964
|
+
turn,
|
|
534965
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
534966
|
+
});
|
|
534967
|
+
this._assistantTextEmitted = true;
|
|
534968
|
+
}
|
|
534969
|
+
break;
|
|
535067
534970
|
}
|
|
535068
534971
|
}
|
|
535069
534972
|
}
|
|
@@ -535117,36 +535020,40 @@ ${sr.result.output}`;
|
|
|
535117
535020
|
messages2.push({ role: "system", content: guard });
|
|
535118
535021
|
this.emit({
|
|
535119
535022
|
type: "status",
|
|
535120
|
-
content: `task_complete
|
|
535023
|
+
content: `task_complete advisory — ${open2.length} open todo(s) remain; completion allowed`,
|
|
535121
535024
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535122
535025
|
});
|
|
535123
|
-
}
|
|
535124
|
-
|
|
535125
|
-
|
|
535126
|
-
|
|
535127
|
-
|
|
535128
|
-
|
|
535129
|
-
|
|
535130
|
-
|
|
535131
|
-
|
|
535132
|
-
|
|
535133
|
-
|
|
535134
|
-
|
|
535135
|
-
|
|
535136
|
-
|
|
535137
|
-
|
|
535138
|
-
|
|
535139
|
-
|
|
535140
|
-
|
|
535141
|
-
content: summary,
|
|
535142
|
-
turn,
|
|
535143
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535144
|
-
});
|
|
535145
|
-
this._assistantTextEmitted = true;
|
|
535026
|
+
}
|
|
535027
|
+
const _bp3 = await this._runBackwardPassReview(turn);
|
|
535028
|
+
if (_bp3 && !_bp3.proceed && _bp3.feedback) {
|
|
535029
|
+
messages2.push({ role: "system", content: `[COMPLETION REVIEW ADVISORY]
|
|
535030
|
+
${_bp3.feedback}` });
|
|
535031
|
+
this.emit({
|
|
535032
|
+
type: "status",
|
|
535033
|
+
content: "completion review advisory surfaced; task_complete allowed",
|
|
535034
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535035
|
+
});
|
|
535036
|
+
}
|
|
535037
|
+
completed = true;
|
|
535038
|
+
summary = extractTaskCompleteSummary(r2.tc.arguments);
|
|
535039
|
+
for (const tool of this.tools.values()) {
|
|
535040
|
+
if (tool.cleanup) {
|
|
535041
|
+
try {
|
|
535042
|
+
await tool.cleanup();
|
|
535043
|
+
} catch {
|
|
535146
535044
|
}
|
|
535147
|
-
break;
|
|
535148
535045
|
}
|
|
535149
535046
|
}
|
|
535047
|
+
if (summary && !this._assistantTextEmitted) {
|
|
535048
|
+
this.emit({
|
|
535049
|
+
type: "assistant_text",
|
|
535050
|
+
content: summary,
|
|
535051
|
+
turn,
|
|
535052
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535053
|
+
});
|
|
535054
|
+
this._assistantTextEmitted = true;
|
|
535055
|
+
}
|
|
535056
|
+
break;
|
|
535150
535057
|
}
|
|
535151
535058
|
}
|
|
535152
535059
|
}
|
|
@@ -535759,18 +535666,9 @@ Integrate this guidance into your current approach. Continue working on the task
|
|
|
535759
535666
|
turn,
|
|
535760
535667
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535761
535668
|
});
|
|
535762
|
-
const
|
|
535763
|
-
if (
|
|
535764
|
-
|
|
535765
|
-
type: "tool_result",
|
|
535766
|
-
toolName: tc.name,
|
|
535767
|
-
content: _decomp2BFBlock.slice(0, 200),
|
|
535768
|
-
success: false,
|
|
535769
|
-
turn,
|
|
535770
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535771
|
-
});
|
|
535772
|
-
messages2.push(this.buildToolMessage(_decomp2BFBlock, tc.id, tc.name));
|
|
535773
|
-
continue;
|
|
535669
|
+
const _decomp2BFAdvisory = this._maybeDecomp2Advisory(tc, turn);
|
|
535670
|
+
if (_decomp2BFAdvisory) {
|
|
535671
|
+
messages2.push({ role: "system", content: _decomp2BFAdvisory });
|
|
535774
535672
|
}
|
|
535775
535673
|
const tool = this.tools.get(tc.name);
|
|
535776
535674
|
let result;
|
|
@@ -535871,28 +535769,32 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
|
|
|
535871
535769
|
messages2.push({ role: "system", content: guard });
|
|
535872
535770
|
this.emit({
|
|
535873
535771
|
type: "status",
|
|
535874
|
-
content: `task_complete
|
|
535772
|
+
content: `task_complete advisory — ${open2.length} open todo(s) remain; completion allowed`,
|
|
535875
535773
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535876
535774
|
});
|
|
535877
|
-
} else {
|
|
535878
|
-
const _bp4 = await this._runBackwardPassReview(turn);
|
|
535879
|
-
if (_bp4 && !_bp4.proceed && _bp4.feedback) {
|
|
535880
|
-
messages2.push({ role: "system", content: _bp4.feedback });
|
|
535881
|
-
continue;
|
|
535882
|
-
}
|
|
535883
|
-
completed = true;
|
|
535884
|
-
summary = extractTaskCompleteSummary(tc.arguments);
|
|
535885
|
-
if (summary && !this._assistantTextEmitted) {
|
|
535886
|
-
this.emit({
|
|
535887
|
-
type: "assistant_text",
|
|
535888
|
-
content: summary,
|
|
535889
|
-
turn,
|
|
535890
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535891
|
-
});
|
|
535892
|
-
this._assistantTextEmitted = true;
|
|
535893
|
-
}
|
|
535894
|
-
break;
|
|
535895
535775
|
}
|
|
535776
|
+
const _bp4 = await this._runBackwardPassReview(turn);
|
|
535777
|
+
if (_bp4 && !_bp4.proceed && _bp4.feedback) {
|
|
535778
|
+
messages2.push({ role: "system", content: `[COMPLETION REVIEW ADVISORY]
|
|
535779
|
+
${_bp4.feedback}` });
|
|
535780
|
+
this.emit({
|
|
535781
|
+
type: "status",
|
|
535782
|
+
content: "completion review advisory surfaced; task_complete allowed",
|
|
535783
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535784
|
+
});
|
|
535785
|
+
}
|
|
535786
|
+
completed = true;
|
|
535787
|
+
summary = extractTaskCompleteSummary(tc.arguments);
|
|
535788
|
+
if (summary && !this._assistantTextEmitted) {
|
|
535789
|
+
this.emit({
|
|
535790
|
+
type: "assistant_text",
|
|
535791
|
+
content: summary,
|
|
535792
|
+
turn,
|
|
535793
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535794
|
+
});
|
|
535795
|
+
this._assistantTextEmitted = true;
|
|
535796
|
+
}
|
|
535797
|
+
break;
|
|
535896
535798
|
}
|
|
535897
535799
|
}
|
|
535898
535800
|
if (completed)
|
|
@@ -535920,14 +535822,13 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
|
|
|
535920
535822
|
messages2.push({ role: "system", content: guard });
|
|
535921
535823
|
this.emit({
|
|
535922
535824
|
type: "status",
|
|
535923
|
-
content: `task_complete text
|
|
535825
|
+
content: `task_complete text advisory — ${open2.length} open todo(s) remain; completion allowed`,
|
|
535924
535826
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535925
535827
|
});
|
|
535926
|
-
} else {
|
|
535927
|
-
completed = true;
|
|
535928
|
-
summary = content;
|
|
535929
|
-
break;
|
|
535930
535828
|
}
|
|
535829
|
+
completed = true;
|
|
535830
|
+
summary = content;
|
|
535831
|
+
break;
|
|
535931
535832
|
}
|
|
535932
535833
|
const finalVarMatch = content.match(/FINAL_VAR\s*\(\s*["']?(\w+)["']?\s*\)/);
|
|
535933
535834
|
if (finalVarMatch && this.options.finalVarResolver) {
|
|
@@ -614120,7 +614021,7 @@ function createTaskCompleteTool(modelTier) {
|
|
|
614120
614021
|
const summaryDesc = modelTier === "small" || modelTier === "medium" ? "Your complete response to the user. For questions/chat: put your FULL answer here (this is what the user will see). For coding tasks: brief summary of what was accomplished." : "Brief summary of what was accomplished";
|
|
614121
614022
|
return {
|
|
614122
614023
|
name: "task_complete",
|
|
614123
|
-
description: "Signal that the task is complete.
|
|
614024
|
+
description: "Signal that the task is complete. ADVISORY: if active todos, interactive sessions, or build checks indicate risk, the tool result will include model-visible guidance, but the tool is not hard-blocked.",
|
|
614124
614025
|
parameters: {
|
|
614125
614026
|
type: "object",
|
|
614126
614027
|
properties: {
|
|
@@ -614129,11 +614030,14 @@ function createTaskCompleteTool(modelTier) {
|
|
|
614129
614030
|
required: ["summary"]
|
|
614130
614031
|
},
|
|
614131
614032
|
async execute(args) {
|
|
614033
|
+
const summary = args["summary"] || "Task completed.";
|
|
614132
614034
|
if (_interactiveSessionActive) {
|
|
614133
614035
|
return {
|
|
614134
|
-
success:
|
|
614135
|
-
output: `
|
|
614136
|
-
|
|
614036
|
+
success: true,
|
|
614037
|
+
output: `[TASK_COMPLETE ADVISORY — interactive session still active]
|
|
614038
|
+
${_interactiveSessionReason} You should continue the interaction loop until the session ends (e.g. "ended", "disconnected", "closed", SESSION_ACTIVE=false). The completion is allowed by no-hard-block policy.
|
|
614039
|
+
|
|
614040
|
+
${summary}`
|
|
614137
614041
|
};
|
|
614138
614042
|
}
|
|
614139
614043
|
try {
|
|
@@ -614159,14 +614063,17 @@ Respond concisely in this shape:
|
|
|
614159
614063
|
- verify: [{ name: "<exact item text>", completed: true|false, evidence: "<objective proof>" }, ...]
|
|
614160
614064
|
- next: "what you will do next OR the exact todo_write(...) call to update statuses"`;
|
|
614161
614065
|
return {
|
|
614162
|
-
success:
|
|
614163
|
-
output:
|
|
614164
|
-
error: `task_complete BLOCKED — ${incomplete.length} todo item(s) still incomplete.
|
|
614066
|
+
success: true,
|
|
614067
|
+
output: `[TASK_COMPLETE ADVISORY — ${incomplete.length} todo item(s) still incomplete]
|
|
614165
614068
|
|
|
614166
614069
|
Incomplete items:
|
|
614167
614070
|
${incompleteList}${more}
|
|
614168
614071
|
|
|
614169
|
-
|
|
614072
|
+
${guidance}
|
|
614073
|
+
|
|
614074
|
+
Completion is allowed by no-hard-block policy.
|
|
614075
|
+
|
|
614076
|
+
${summary}`
|
|
614170
614077
|
};
|
|
614171
614078
|
}
|
|
614172
614079
|
try {
|
|
@@ -614176,7 +614083,6 @@ ${incompleteList}${more}
|
|
|
614176
614083
|
}
|
|
614177
614084
|
} catch {
|
|
614178
614085
|
}
|
|
614179
|
-
const summary = args["summary"] || "Task completed.";
|
|
614180
614086
|
const buildGuardSkip = process.env["OA_DISABLE_TASK_COMPLETE_BUILD_GUARD"] === "1" || /^\s*BLOCKED\b/i.test(summary);
|
|
614181
614087
|
if (!buildGuardSkip) {
|
|
614182
614088
|
try {
|
|
@@ -614200,16 +614106,15 @@ ${incompleteList}${more}
|
|
|
614200
614106
|
} catch (e2) {
|
|
614201
614107
|
const out = ((e2?.stdout || "") + (e2?.stderr || "")).toString().slice(0, 2e3);
|
|
614202
614108
|
return {
|
|
614203
|
-
success:
|
|
614204
|
-
output:
|
|
614205
|
-
error: `task_complete BLOCKED — \`npm run ${checkScript}\` is failing in ${cwd4}.
|
|
614109
|
+
success: true,
|
|
614110
|
+
output: `[TASK_COMPLETE ADVISORY — \`npm run ${checkScript}\` is failing in ${cwd4}]
|
|
614206
614111
|
|
|
614207
614112
|
Error output (last 2KB):
|
|
614208
614113
|
${out || "<empty stdout/stderr — likely timeout or non-zero exit>"}
|
|
614209
614114
|
|
|
614210
|
-
|
|
614115
|
+
Recommended: fix the build errors before considering the work done. Completion is still allowed by no-hard-block policy.
|
|
614211
614116
|
|
|
614212
|
-
|
|
614117
|
+
${summary}`
|
|
614213
614118
|
};
|
|
614214
614119
|
}
|
|
614215
614120
|
}
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "open-agents-ai",
|
|
3
|
-
"version": "0.187.
|
|
3
|
+
"version": "0.187.576",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "open-agents-ai",
|
|
9
|
-
"version": "0.187.
|
|
9
|
+
"version": "0.187.576",
|
|
10
10
|
"hasInstallScript": true,
|
|
11
11
|
"license": "CC-BY-NC-4.0",
|
|
12
12
|
"dependencies": {
|
package/package.json
CHANGED