open-agents-ai 0.187.574 → 0.187.576
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +264 -311
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -515299,26 +515299,10 @@ function summarizeMAST(tags) {
|
|
|
515299
515299
|
}
|
|
515300
515300
|
return { byMode, byCategory, total: tags.length };
|
|
515301
515301
|
}
|
|
515302
|
-
var MAST_CATEGORY;
|
|
515303
515302
|
var init_mast_tagger = __esm({
|
|
515304
515303
|
"packages/orchestrator/dist/mast-tagger.js"() {
|
|
515305
515304
|
"use strict";
|
|
515306
515305
|
init_reflection();
|
|
515307
|
-
MAST_CATEGORY = {
|
|
515308
|
-
spec_disobedience: "specification_design",
|
|
515309
|
-
step_repetition: "specification_design",
|
|
515310
|
-
history_loss: "specification_design",
|
|
515311
|
-
completion_unrecognized: "specification_design",
|
|
515312
|
-
input_ignored: "inter_agent_misalignment",
|
|
515313
|
-
proceeded_without_clarify: "inter_agent_misalignment",
|
|
515314
|
-
conversation_reset: "inter_agent_misalignment",
|
|
515315
|
-
reasoning_action_mismatch: "inter_agent_misalignment",
|
|
515316
|
-
premature_termination: "task_verification_termination",
|
|
515317
|
-
validation_skipped: "task_verification_termination",
|
|
515318
|
-
shallow_check_accepted: "task_verification_termination",
|
|
515319
|
-
premature_task_complete: "task_verification_termination",
|
|
515320
|
-
other: "specification_design"
|
|
515321
|
-
};
|
|
515322
515306
|
}
|
|
515323
515307
|
});
|
|
515324
515308
|
|
|
@@ -528102,15 +528086,16 @@ var init_agenticRunner = __esm({
|
|
|
528102
528086
|
// a phase's worth of work without recording progress — and on the next
|
|
528103
528087
|
// turn will replay the same plan. Surface a nudge before that happens.
|
|
528104
528088
|
_writesSinceLastTodoWrite = 0;
|
|
528105
|
-
// REG-12: Progress
|
|
528106
|
-
//
|
|
528107
|
-
//
|
|
528108
|
-
//
|
|
528109
|
-
//
|
|
528110
|
-
// Released when todo_write fires successfully. Without this, the agent
|
|
528111
|
-
// can re-emit the same plan a second time (plan-replay) and execute
|
|
528112
|
-
// duplicate work because PROGRESS NUDGE alone is informational.
|
|
528089
|
+
// REG-12: Progress advisory latch. When ≥6 file writes have happened
|
|
528090
|
+
// without a todo_write call, this latch flips ON. While active, non-todo
|
|
528091
|
+
// tool calls receive model-visible system guidance, but still execute.
|
|
528092
|
+
// Released when todo_write fires successfully. Without this, the agent can
|
|
528093
|
+
// re-emit the same plan a second time (plan-replay) and duplicate work.
|
|
528113
528094
|
_progressGateActive = false;
|
|
528095
|
+
// Consecutive advisory count. When the model ignores stale progress state,
|
|
528096
|
+
// this counter increments. ≥2 triggers a system message escalation to break
|
|
528097
|
+
// pattern-lock loops.
|
|
528098
|
+
_consecutiveProgressAdvisories = 0;
|
|
528114
528099
|
// REG-5: Rolling buffer of recent tool failures with their error output.
|
|
528115
528100
|
// Surfaced before every LLM call so the agent can't ignore "I just ran this
|
|
528116
528101
|
// and it errored". Detects same-fingerprint failure repetition and escalates
|
|
@@ -528340,19 +528325,12 @@ var init_agenticRunner = __esm({
|
|
|
528340
528325
|
// explicitly excludes todo_write/memory_write/list_directory.
|
|
528341
528326
|
_reg61CooldownUntilTurn = -1;
|
|
528342
528327
|
// BFC-61.G (root-cause from batch529-midi-coerce, 2026-05-03): REG-61
|
|
528343
|
-
//
|
|
528344
|
-
// which empirically warned but did not change behavior
|
|
528345
|
-
//
|
|
528346
|
-
//
|
|
528347
|
-
// 7 fires; ZERO of those blocks were followed by a creative edit on
|
|
528348
|
-
// the agent's next turn.
|
|
528349
|
-
//
|
|
528350
|
-
// Semantics: when REG-61 fires, this latch goes true. While true, every
|
|
528351
|
-
// non-bypass tool call gets BLOCKED with a synthetic error result. The
|
|
528352
|
-
// bypass set includes the 4 creative-edit tools plus task_complete /
|
|
528353
|
-
// ask_user / explicit web-task escape hatches. Any creative edit dispatch
|
|
528328
|
+
// persistent advisory until obeyed. Replaces BFC-61.E's one-shot nudge,
|
|
528329
|
+
// which empirically warned but did not change behavior. When REG-61 fires,
|
|
528330
|
+
// this latch goes true. While true, every non-bypass tool call receives
|
|
528331
|
+
// model-visible guidance and still executes. Any creative edit dispatch
|
|
528354
528332
|
// clears the latch ("directive satisfied"). Shell, file_read, todo_*,
|
|
528355
|
-
// grep_search, list_directory etc. are NOT in bypass
|
|
528333
|
+
// grep_search, list_directory etc. are NOT in bypass because those are the
|
|
528356
528334
|
// exact patterns batch528/529 agents used to ignore REG-61.
|
|
528357
528335
|
//
|
|
528358
528336
|
// Kill switch: OA_DISABLE_REG61_COERCE=1 disables BOTH set and enforce.
|
|
@@ -528360,15 +528338,18 @@ var init_agenticRunner = __esm({
|
|
|
528360
528338
|
// DECOMP-2 (root-cause from batch531-midi-decomp, 2026-05-03): compelling
|
|
528361
528339
|
// sub_agent delegation. DECOMP-1's informational directive was ignored
|
|
528362
528340
|
// (0 sub_agent calls in 466 tool-call run despite directive at turn 1).
|
|
528363
|
-
// Mirrors the BFC-61.G escalation arc
|
|
528364
|
-
// ≥
|
|
528365
|
-
//
|
|
528366
|
-
//
|
|
528367
|
-
//
|
|
528368
|
-
//
|
|
528341
|
+
// Mirrors the BFC-61.G escalation arc, but must not deadlock delivery:
|
|
528342
|
+
// when the agent has edited ≥adaptive-threshold distinct files in main
|
|
528343
|
+
// context WITHOUT successful sub_agent, the dispatcher injects guidance
|
|
528344
|
+
// for NEW files (paths not yet edited) while still allowing the edit.
|
|
528345
|
+
// Repeated failed delegation attempts unlock a main-context fallback so the
|
|
528346
|
+
// guardrail cannot become a hard write-deadlock when sub_agent itself is
|
|
528347
|
+
// broken or unavailable.
|
|
528369
528348
|
// Kill switch: OA_DISABLE_DECOMP2=1.
|
|
528370
528349
|
_decomp2MainContextFiles = /* @__PURE__ */ new Set();
|
|
528371
528350
|
_decomp2SubAgentCalls = 0;
|
|
528351
|
+
_decomp2FailedDelegationCalls = 0;
|
|
528352
|
+
_decomp2FallbackAllowed = false;
|
|
528372
528353
|
_decomp2GateActive = false;
|
|
528373
528354
|
// MEM_PATH item #9: adaptive retrieval cache. When the (goalHash, recent-tool-sig)
|
|
528374
528355
|
// hasn't changed since last retrieval, skip the PPR call entirely and reuse
|
|
@@ -528893,15 +528874,12 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
528893
528874
|
});
|
|
528894
528875
|
}
|
|
528895
528876
|
/**
|
|
528896
|
-
* DECOMP-2 PRE-dispatch
|
|
528897
|
-
* string when the
|
|
528877
|
+
* DECOMP-2 PRE-dispatch advisory check. Returns a model-visible guidance
|
|
528878
|
+
* string when the file-spread advisory should surface; null to proceed
|
|
528879
|
+
* without extra guidance. Callers inject the string as system guidance and
|
|
528880
|
+
* then run the requested tool normally.
|
|
528898
528881
|
*
|
|
528899
|
-
*
|
|
528900
|
-
* - Main loop returns the block as a synthetic { tc, output } result.
|
|
528901
|
-
* - Brute-force emits tool_result + pushes tool message + `continue`.
|
|
528902
|
-
* Both invoke this method to compute the decision.
|
|
528903
|
-
*
|
|
528904
|
-
* Conditions for blocking:
|
|
528882
|
+
* Conditions for surfacing guidance:
|
|
528905
528883
|
* - `_decomp2GateActive` is true (set by `_trackDecomp2` when threshold crossed)
|
|
528906
528884
|
* - tool is one of the 4 creative-edit tools
|
|
528907
528885
|
* - the path is NOT in `_decomp2MainContextFiles` (i.e. it's a NEW file)
|
|
@@ -528910,9 +528888,11 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
528910
528888
|
* Already-touched paths pass through (current-module finishing work allowed).
|
|
528911
528889
|
* sub_agent / task_complete / explicit web-task tools pass through (not creative-edit tools).
|
|
528912
528890
|
*/
|
|
528913
|
-
|
|
528891
|
+
_maybeDecomp2Advisory(tc, turn) {
|
|
528914
528892
|
if (!this._decomp2GateActive)
|
|
528915
528893
|
return null;
|
|
528894
|
+
if (this._decomp2FallbackAllowed)
|
|
528895
|
+
return null;
|
|
528916
528896
|
if (process.env["OA_DISABLE_DECOMP2"] === "1")
|
|
528917
528897
|
return null;
|
|
528918
528898
|
const _editTools = /* @__PURE__ */ new Set([
|
|
@@ -528931,9 +528911,9 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
528931
528911
|
const _moreFiles = this._decomp2MainContextFiles.size > 8 ? `
|
|
528932
528912
|
... +${this._decomp2MainContextFiles.size - 8} more` : "";
|
|
528933
528913
|
const decomp2Msg = [
|
|
528934
|
-
`[
|
|
528914
|
+
`[DECOMP-2 advisory — main-context file spread]`,
|
|
528935
528915
|
``,
|
|
528936
|
-
`You have already edited ${this._decomp2MainContextFiles.size} distinct files in main context without
|
|
528916
|
+
`You have already edited ${this._decomp2MainContextFiles.size} distinct files in main context without a successful sub_agent. Continuing to edit another new file ('${_editPath}') may keep your context window saturated and trigger compaction thrashing.`,
|
|
528937
528917
|
``,
|
|
528938
528918
|
`Files you've already edited (will accept further edits to these):`,
|
|
528939
528919
|
_filesList,
|
|
@@ -528949,36 +528929,40 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
528949
528929
|
` })`,
|
|
528950
528930
|
` 3. After sub_agent returns, mark the todo completed.`,
|
|
528951
528931
|
``,
|
|
528932
|
+
`If sub_agent keeps failing for reasons outside the module work, retry it once with corrected arguments. After repeated failed delegation attempts, OA will keep this as advisory-only fallback so file writes can continue.`,
|
|
528933
|
+
``,
|
|
528952
528934
|
`Why this matters: spreading edits across N files in main context burns ~N × file_size tokens. sub_agent gives the next module a focused context window.`,
|
|
528953
528935
|
``,
|
|
528954
528936
|
`If you have ALREADY edited '${_editPath}' (this is a continuation), the orchestrator's set must have missed it — call file_read to verify, then re-edit. Otherwise, dispatch sub_agent now.`
|
|
528955
528937
|
].join("\n");
|
|
528956
528938
|
this.emit({
|
|
528957
528939
|
type: "status",
|
|
528958
|
-
content: `DECOMP-2
|
|
528940
|
+
content: `DECOMP-2 ADVISORY — ${tc.name}('${_editPath}') at turn ${turn}; tool allowed, consider sub_agent for the next module`,
|
|
528959
528941
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
528960
528942
|
});
|
|
528961
|
-
this._tagSyntheticFailure({
|
|
528962
|
-
mode: "step_repetition",
|
|
528963
|
-
rationale: `DECOMP-2 new-file block on '${tc.name}'(${_editPath}) — agent has spread edits across ${this._decomp2MainContextFiles.size} files without sub_agent`
|
|
528964
|
-
});
|
|
528965
528943
|
return decomp2Msg;
|
|
528966
528944
|
}
|
|
528945
|
+
_decomp2FileSpreadThreshold() {
|
|
528946
|
+
const ctx3 = this.options.contextWindowSize ?? 0;
|
|
528947
|
+
if (ctx3 <= 0)
|
|
528948
|
+
return 5;
|
|
528949
|
+
return Math.max(5, Math.min(30, Math.round(ctx3 / 6400)));
|
|
528950
|
+
}
|
|
528967
528951
|
/**
|
|
528968
528952
|
* DECOMP-2 post-dispatch tracking. Refactored from inline so both the
|
|
528969
528953
|
* main turn loop AND the brute-force re-engagement inner loop record
|
|
528970
528954
|
* edits / sub_agent calls and check the gate-activation threshold.
|
|
528971
528955
|
* Without this method on both paths, batch532 measured 14 distinct
|
|
528972
528956
|
* files edited but DECOMP-2 never activated — because the main loop
|
|
528973
|
-
*
|
|
528974
|
-
* through the brute-force dispatch which had no tracking.
|
|
528957
|
+
* entered brute-force after task_complete review and all subsequent edits
|
|
528958
|
+
* flowed through the brute-force dispatch which had no tracking.
|
|
528975
528959
|
*
|
|
528976
528960
|
* Side effects when fired:
|
|
528977
528961
|
* - On successful creative edit: adds path to `_decomp2MainContextFiles`,
|
|
528978
|
-
* possibly activates `_decomp2GateActive` (emits status).
|
|
528962
|
+
* possibly activates `_decomp2GateActive` (emits advisory status).
|
|
528979
528963
|
* - On successful sub_agent / priority_delegate / background_run:
|
|
528980
|
-
* increments counter, clears
|
|
528981
|
-
* delegation attempts do not satisfy the
|
|
528964
|
+
* increments counter, clears advisory (emits status). Failed or
|
|
528965
|
+
* malformed delegation attempts do not satisfy the advisory.
|
|
528982
528966
|
*
|
|
528983
528967
|
* Pure post-dispatch: caller invokes AFTER the tool result is in hand,
|
|
528984
528968
|
* regardless of which loop the dispatch happened in.
|
|
@@ -528990,12 +528974,12 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
528990
528974
|
const _editPaths = this._extractToolTargetPaths(tc.name, tc.arguments, result);
|
|
528991
528975
|
for (const _editPath of _editPaths) {
|
|
528992
528976
|
this._decomp2MainContextFiles.add(_editPath);
|
|
528993
|
-
const DECOMP2_FILE_SPREAD_THRESHOLD =
|
|
528994
|
-
if (!this._decomp2GateActive && this._decomp2MainContextFiles.size >= DECOMP2_FILE_SPREAD_THRESHOLD && this._decomp2SubAgentCalls === 0) {
|
|
528977
|
+
const DECOMP2_FILE_SPREAD_THRESHOLD = this._decomp2FileSpreadThreshold();
|
|
528978
|
+
if (!this._decomp2GateActive && !this._decomp2FallbackAllowed && this._decomp2MainContextFiles.size >= DECOMP2_FILE_SPREAD_THRESHOLD && this._decomp2SubAgentCalls === 0) {
|
|
528995
528979
|
this._decomp2GateActive = true;
|
|
528996
528980
|
this.emit({
|
|
528997
528981
|
type: "status",
|
|
528998
|
-
content: `DECOMP-2
|
|
528982
|
+
content: `DECOMP-2 ADVISORY ACTIVATED — ${this._decomp2MainContextFiles.size} distinct files edited in main context, 0 successful sub_agent calls, threshold=${DECOMP2_FILE_SPREAD_THRESHOLD}; further edits to NEW files remain allowed, but sub_agent is recommended for the next module`,
|
|
528999
528983
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
529000
528984
|
});
|
|
529001
528985
|
}
|
|
@@ -529004,20 +528988,32 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
529004
528988
|
if (tc.name === "sub_agent" || tc.name === "priority_delegate" || tc.name === "background_run") {
|
|
529005
528989
|
if (result?.success !== true) {
|
|
529006
528990
|
if (this._decomp2GateActive) {
|
|
528991
|
+
this._decomp2FailedDelegationCalls++;
|
|
529007
528992
|
this.emit({
|
|
529008
528993
|
type: "status",
|
|
529009
528994
|
content: `DECOMP-2 DELEGATION FAILED — '${tc.name}' did not clear gate at turn ${turn}; fix delegation arguments/result before editing another new file`,
|
|
529010
528995
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
529011
528996
|
});
|
|
528997
|
+
if (this._decomp2FailedDelegationCalls >= 2) {
|
|
528998
|
+
this._decomp2FallbackAllowed = true;
|
|
528999
|
+
this._decomp2GateActive = false;
|
|
529000
|
+
this.emit({
|
|
529001
|
+
type: "status",
|
|
529002
|
+
content: `DECOMP-2 FALLBACK UNLOCKED — ${this._decomp2FailedDelegationCalls} failed delegation attempts while gate was active; allowing main-context new-file edits so work can continue`,
|
|
529003
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
529004
|
+
});
|
|
529005
|
+
}
|
|
529012
529006
|
}
|
|
529013
529007
|
return;
|
|
529014
529008
|
}
|
|
529015
529009
|
this._decomp2SubAgentCalls++;
|
|
529010
|
+
this._decomp2FailedDelegationCalls = 0;
|
|
529011
|
+
this._decomp2FallbackAllowed = false;
|
|
529016
529012
|
if (this._decomp2GateActive) {
|
|
529017
529013
|
this._decomp2GateActive = false;
|
|
529018
529014
|
this.emit({
|
|
529019
529015
|
type: "status",
|
|
529020
|
-
content: `DECOMP-2
|
|
529016
|
+
content: `DECOMP-2 ADVISORY CLEARED — '${tc.name}' satisfied delegation directive at turn ${turn}`,
|
|
529021
529017
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
529022
529018
|
});
|
|
529023
529019
|
}
|
|
@@ -529027,7 +529023,7 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
529027
529023
|
* REG-66 Debug-Loop Detection (root-cause from batch535-midi, 2026-05-04).
|
|
529028
529024
|
*
|
|
529029
529025
|
* Empirical: midi run had 11x `npm run build 2>&1` + same 5 files re-read
|
|
529030
|
-
* 5-6 times each + 22 BFC-61.G coercion
|
|
529026
|
+
* 5-6 times each + 22 BFC-61.G coercion advisories — and ZERO of those
|
|
529031
529027
|
* resulted in a creative edit. The agent was rationally stuck: it
|
|
529032
529028
|
* believed it needed to read more to debug, the build command kept
|
|
529033
529029
|
* giving the same error, and the standard "issue an edit" directive
|
|
@@ -529040,7 +529036,7 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
529040
529036
|
* different output. Without this signal we'd just keep telling the
|
|
529041
529037
|
* agent to "make an edit" — which is exactly what it can't think of.
|
|
529042
529038
|
*
|
|
529043
|
-
* When detected, the BFC-61.G
|
|
529039
|
+
* When detected, the BFC-61.G advisory message swaps to a PERTURB-strategy
|
|
529044
529040
|
* directive: stop reading, change ONE thing in the most-likely-culprit
|
|
529045
529041
|
* file even if you're uncertain, and let the new error signal guide
|
|
529046
529042
|
* the next iteration. This is real human debugging strategy ("perturb
|
|
@@ -529177,27 +529173,6 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
529177
529173
|
* name with objective evidence, complete remaining items in order, update the
|
|
529178
529174
|
* checklist via todo_write, and only then call task_complete.
|
|
529179
529175
|
*/
|
|
529180
|
-
/**
|
|
529181
|
-
* REG-39c: tag a SYNTHETIC failure (FORCED PROGRESS BLOCK / observer
|
|
529182
|
-
* block / budget exhausted). These paths return early from
|
|
529183
|
-
* executeSingle BEFORE the main result-handling code, so the normal
|
|
529184
|
-
* MAST tagging miss them. This helper lets each return-early site
|
|
529185
|
-
* record a tag directly. Push-only — keeps the tag buffer bounded
|
|
529186
|
-
* to 200 entries.
|
|
529187
|
-
*/
|
|
529188
|
-
_tagSyntheticFailure(args) {
|
|
529189
|
-
try {
|
|
529190
|
-
this._mastTags.push({
|
|
529191
|
-
mode: args.mode,
|
|
529192
|
-
category: MAST_CATEGORY[args.mode],
|
|
529193
|
-
rationale: args.rationale
|
|
529194
|
-
});
|
|
529195
|
-
if (this._mastTags.length > 200) {
|
|
529196
|
-
this._mastTags = this._mastTags.slice(-200);
|
|
529197
|
-
}
|
|
529198
|
-
} catch {
|
|
529199
|
-
}
|
|
529200
|
-
}
|
|
529201
529176
|
/**
|
|
529202
529177
|
* REG-39b: emit a MAST taxonomy summary as a status event. Called both
|
|
529203
529178
|
* mid-run (every N turns, so SIGTERM kills don't lose the data) and at
|
|
@@ -531097,9 +531072,14 @@ Respond with your assessment, then take action.`;
|
|
|
531097
531072
|
this._fileWriteTimestamps = [];
|
|
531098
531073
|
this._aborting = false;
|
|
531099
531074
|
this._reg61CooldownUntilTurn = -1;
|
|
531075
|
+
this._writesSinceLastTodoWrite = 0;
|
|
531076
|
+
this._progressGateActive = false;
|
|
531077
|
+
this._consecutiveProgressAdvisories = 0;
|
|
531100
531078
|
this._reg61PerpetualGateActive = false;
|
|
531101
531079
|
this._decomp2MainContextFiles = /* @__PURE__ */ new Set();
|
|
531102
531080
|
this._decomp2SubAgentCalls = 0;
|
|
531081
|
+
this._decomp2FailedDelegationCalls = 0;
|
|
531082
|
+
this._decomp2FallbackAllowed = false;
|
|
531103
531083
|
this._decomp2GateActive = false;
|
|
531104
531084
|
if (!globalThis.__oa_rca1_sigterm_installed) {
|
|
531105
531085
|
globalThis.__oa_rca1_sigterm_installed = true;
|
|
@@ -533396,24 +533376,9 @@ ${memoryLines.join("\n")}`
|
|
|
533396
533376
|
});
|
|
533397
533377
|
}
|
|
533398
533378
|
{
|
|
533399
|
-
const
|
|
533400
|
-
if (
|
|
533401
|
-
|
|
533402
|
-
type: "tool_call",
|
|
533403
|
-
toolName: tc.name,
|
|
533404
|
-
toolArgs: tc.arguments,
|
|
533405
|
-
turn,
|
|
533406
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533407
|
-
});
|
|
533408
|
-
this.emit({
|
|
533409
|
-
type: "tool_result",
|
|
533410
|
-
toolName: tc.name,
|
|
533411
|
-
success: false,
|
|
533412
|
-
content: _decomp2Block.slice(0, 120),
|
|
533413
|
-
turn,
|
|
533414
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533415
|
-
});
|
|
533416
|
-
return { tc, output: _decomp2Block };
|
|
533379
|
+
const _decomp2Advisory = this._maybeDecomp2Advisory(tc, turn);
|
|
533380
|
+
if (_decomp2Advisory) {
|
|
533381
|
+
pushSoftInjection("system", _decomp2Advisory);
|
|
533417
533382
|
}
|
|
533418
533383
|
}
|
|
533419
533384
|
const PROGRESS_GATE_BYPASS_TOOLS = /* @__PURE__ */ new Set([
|
|
@@ -533425,74 +533390,55 @@ ${memoryLines.join("\n")}`
|
|
|
533425
533390
|
// useful for the agent to consult prior phase state before updating
|
|
533426
533391
|
]);
|
|
533427
533392
|
if (this._progressGateActive && !PROGRESS_GATE_BYPASS_TOOLS.has(tc.name)) {
|
|
533428
|
-
this.emit({
|
|
533429
|
-
type: "tool_call",
|
|
533430
|
-
toolName: tc.name,
|
|
533431
|
-
toolArgs: tc.arguments,
|
|
533432
|
-
turn,
|
|
533433
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533434
|
-
});
|
|
533435
533393
|
const recentWrites = [];
|
|
533436
533394
|
for (const [path11, info] of this._worldFacts.files) {
|
|
533437
533395
|
if ((info.writeCount ?? 0) > 0 && (info.lastWriteTurn ?? -1) >= 0 && turn - (info.lastWriteTurn ?? 0) <= 8) {
|
|
533438
533396
|
recentWrites.push({ path: path11, turn: info.lastWriteTurn ?? 0 });
|
|
533439
533397
|
}
|
|
533440
533398
|
}
|
|
533399
|
+
this._consecutiveProgressAdvisories++;
|
|
533441
533400
|
recentWrites.sort((a2, b) => b.turn - a2.turn);
|
|
533442
533401
|
const showWrites = recentWrites.slice(0, 16);
|
|
533402
|
+
const isRepeat = this._consecutiveProgressAdvisories >= 2;
|
|
533443
533403
|
const gateMsg = [
|
|
533444
|
-
`[PROGRESS
|
|
533404
|
+
`[PROGRESS ADVISORY — todo update recommended]`,
|
|
533445
533405
|
``,
|
|
533446
|
-
`
|
|
533447
|
-
`The
|
|
533406
|
+
`CAUSE: ${this._writesSinceLastTodoWrite} file writes since last todo_write call. Without progress tracking, the next turn re-plans the same work (plan-replay).`,
|
|
533407
|
+
`EFFECT: The requested tool is still allowed. Update todos soon so progress state stays accurate.`,
|
|
533408
|
+
`ACTION RECOMMENDED: Call todo_write with updated progress after this tool call if the recent writes satisfy todo items.`,
|
|
533409
|
+
isRepeat ? `
|
|
533410
|
+
[ESCALATION: This is advisory #${this._consecutiveProgressAdvisories}. You are continuing without todo_write; progress replay risk is increasing.]` : "",
|
|
533448
533411
|
``,
|
|
533449
533412
|
`Recent file modifications (use these to decide what's done):`,
|
|
533450
533413
|
...showWrites.map((w) => ` • ${w.path} (turn ${w.turn})`),
|
|
533451
533414
|
recentWrites.length > showWrites.length ? ` • ... +${recentWrites.length - showWrites.length} more` : "",
|
|
533452
533415
|
``,
|
|
533453
|
-
`
|
|
533454
|
-
`After todo_write succeeds, this gate releases and you can continue normal work.`,
|
|
533455
|
-
``,
|
|
533456
|
-
`Why this exists: without the explicit progress update, your next turn will see the same in_progress todo, re-plan the same work, and re-emit identical tool calls (the "plan replay" failure mode that causes byte-identical writes to appear twice).`
|
|
533416
|
+
`Format: todo_write with todos array — mark items completed that these writes satisfy, advance next to in_progress.`
|
|
533457
533417
|
].filter(Boolean).join("\n");
|
|
533418
|
+
pushSoftInjection("system", gateMsg);
|
|
533458
533419
|
this.emit({
|
|
533459
|
-
type: "
|
|
533420
|
+
type: "status",
|
|
533460
533421
|
toolName: tc.name,
|
|
533461
|
-
|
|
533462
|
-
content: gateMsg.slice(0, 120),
|
|
533422
|
+
content: `PROGRESS ADVISORY — ${this._writesSinceLastTodoWrite} writes since todo_write; '${tc.name}' allowed`,
|
|
533463
533423
|
turn,
|
|
533464
533424
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533465
533425
|
});
|
|
533466
|
-
return { tc, output: gateMsg };
|
|
533467
533426
|
}
|
|
533468
533427
|
const _argsKeyForBudget = `${tc.name}:${argsKey}`;
|
|
533469
533428
|
const _isCachedHit = recentToolResults.has(_argsKeyForBudget);
|
|
533470
533429
|
const budgetRemaining = toolCallBudget.get(tc.name);
|
|
533471
533430
|
if (budgetRemaining !== void 0 && !_isCachedHit) {
|
|
533472
533431
|
if (budgetRemaining <= 0) {
|
|
533432
|
+
const budgetMsg = `[BUDGET ADVISORY] You have used all ${toolBudgets[tc.name]} recommended ${tc.name} calls for the current phase. The requested tool call is still allowed, but repeated use may be low-value. If your todo list shows more phases pending: mark the current phase completed via todo_write so a new budget allowance kicks in. If all phases are done: call task_complete with your final summary.`;
|
|
533433
|
+
pushSoftInjection("system", budgetMsg);
|
|
533473
533434
|
this.emit({
|
|
533474
|
-
type: "
|
|
533475
|
-
|
|
533476
|
-
toolArgs: tc.arguments,
|
|
533477
|
-
turn,
|
|
533478
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533479
|
-
});
|
|
533480
|
-
const budgetMsg = `[BUDGET EXHAUSTED] You have used all ${toolBudgets[tc.name]} allowed ${tc.name} calls for the current phase. You ALREADY have enough information from previous calls. DO NOT try to call ${tc.name} again — it will be blocked. If your todo list shows more phases pending: mark the current phase completed via todo_write so a new budget allowance kicks in. If all phases are done: call task_complete with your final summary.`;
|
|
533481
|
-
this.emit({
|
|
533482
|
-
type: "tool_result",
|
|
533483
|
-
toolName: tc.name,
|
|
533484
|
-
success: false,
|
|
533485
|
-
content: budgetMsg.slice(0, 120),
|
|
533486
|
-
turn,
|
|
533435
|
+
type: "status",
|
|
533436
|
+
content: `BUDGET ADVISORY — ${tc.name} exceeded recommended per-phase budget; tool allowed`,
|
|
533487
533437
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533488
533438
|
});
|
|
533489
|
-
|
|
533490
|
-
|
|
533491
|
-
rationale: `${tc.name} exhausted per-phase budget of ${toolBudgets[tc.name]}`
|
|
533492
|
-
});
|
|
533493
|
-
return { tc, output: budgetMsg };
|
|
533439
|
+
} else {
|
|
533440
|
+
toolCallBudget.set(tc.name, budgetRemaining - 1);
|
|
533494
533441
|
}
|
|
533495
|
-
toolCallBudget.set(tc.name, budgetRemaining - 1);
|
|
533496
533442
|
}
|
|
533497
533443
|
const toolFingerprint = this._buildToolFingerprint(tc.name, tc.arguments ?? {});
|
|
533498
533444
|
const baseIsReadLike = ![
|
|
@@ -533577,29 +533523,15 @@ ${memoryLines.join("\n")}`
|
|
|
533577
533523
|
observerRedundantBlock
|
|
533578
533524
|
});
|
|
533579
533525
|
if (criticDecision.decision === "observer_block") {
|
|
533580
|
-
this.
|
|
533581
|
-
type: "tool_call",
|
|
533582
|
-
toolName: tc.name,
|
|
533583
|
-
toolArgs: tc.arguments,
|
|
533584
|
-
turn,
|
|
533585
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533586
|
-
});
|
|
533587
|
-
const blockMsg = criticDecision.cachedResult ? `[BLOCKED — this tool+args already succeeded. Re-served from cache:]
|
|
533526
|
+
const advisoryMsg = criticDecision.cachedResult ? `[DUPLICATE-CALL ADVISORY — this tool+args already succeeded. Prior result preview:]
|
|
533588
533527
|
|
|
533589
|
-
${criticDecision.cachedResult.slice(0, 500)}` : `[
|
|
533528
|
+
${criticDecision.cachedResult.slice(0, 500)}` : `[DUPLICATE-CALL ADVISORY — the observer confirmed this tool already succeeded with these arguments on a prior turn. The call is still allowed, but using prior findings may be faster.]`;
|
|
533529
|
+
pushSoftInjection("system", advisoryMsg);
|
|
533590
533530
|
this.emit({
|
|
533591
|
-
type: "
|
|
533592
|
-
|
|
533593
|
-
success: true,
|
|
533594
|
-
content: blockMsg.slice(0, 100),
|
|
533595
|
-
turn,
|
|
533531
|
+
type: "status",
|
|
533532
|
+
content: `DUPLICATE-CALL ADVISORY — ${tc.name} allowed despite observer redundancy`,
|
|
533596
533533
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533597
533534
|
});
|
|
533598
|
-
this._tagSyntheticFailure({
|
|
533599
|
-
mode: "step_repetition",
|
|
533600
|
-
rationale: `observer-block on ${tc.name} fingerprint flagged redundant`
|
|
533601
|
-
});
|
|
533602
|
-
return { tc, output: blockMsg };
|
|
533603
533535
|
}
|
|
533604
533536
|
if (criticDecision.decision === "force_progress_block") {
|
|
533605
533537
|
dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
|
|
@@ -533608,26 +533540,12 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
|
|
|
533608
533540
|
recentToolResults.delete(toolFingerprint);
|
|
533609
533541
|
recentToolResults.set(toolFingerprint, _existingFp);
|
|
533610
533542
|
}
|
|
533543
|
+
pushSoftInjection("system", criticDecision.blockMessage.replace(/\[FORCED PROGRESS BLOCK[^\]]*\]|\[BLOCKED[^\]]*\]/gi, "[REPETITION ADVISORY]"));
|
|
533611
533544
|
this.emit({
|
|
533612
|
-
type: "
|
|
533613
|
-
|
|
533614
|
-
toolArgs: tc.arguments,
|
|
533615
|
-
turn,
|
|
533616
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533617
|
-
});
|
|
533618
|
-
this.emit({
|
|
533619
|
-
type: "tool_result",
|
|
533620
|
-
toolName: tc.name,
|
|
533621
|
-
success: false,
|
|
533622
|
-
content: criticDecision.blockMessage.slice(0, 120),
|
|
533623
|
-
turn,
|
|
533545
|
+
type: "status",
|
|
533546
|
+
content: `REPETITION ADVISORY — ${tc.name} repeated ${criticDecision.hitNumber} times; tool allowed`,
|
|
533624
533547
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533625
533548
|
});
|
|
533626
|
-
this._tagSyntheticFailure({
|
|
533627
|
-
mode: "step_repetition",
|
|
533628
|
-
rationale: `force_progress_block on ${tc.name} after ${criticDecision.hitNumber} identical calls`
|
|
533629
|
-
});
|
|
533630
|
-
return { tc, output: criticDecision.blockMessage };
|
|
533631
533549
|
}
|
|
533632
533550
|
if (criticDecision.decision === "serve_cached") {
|
|
533633
533551
|
dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
|
|
@@ -533636,30 +533554,20 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
|
|
|
533636
533554
|
recentToolResults.delete(toolFingerprint);
|
|
533637
533555
|
recentToolResults.set(toolFingerprint, _existingFp);
|
|
533638
533556
|
}
|
|
533639
|
-
|
|
533640
|
-
type: "tool_call",
|
|
533641
|
-
toolName: tc.name,
|
|
533642
|
-
toolArgs: tc.arguments,
|
|
533643
|
-
turn,
|
|
533644
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533645
|
-
});
|
|
533646
|
-
const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. No need to call this tool again.]
|
|
533557
|
+
const header = criticDecision.compacted ? `[CACHE ADVISORY — the original result was compacted from context. Prior data preview follows; the tool call is still allowed.]
|
|
533647
533558
|
|
|
533648
|
-
` : `[DUPLICATE CALL #${criticDecision.hitNumber} — you already called ${tc.name} with these exact arguments. The result
|
|
533559
|
+
` : `[DUPLICATE CALL #${criticDecision.hitNumber} — you already called ${tc.name} with these exact arguments. The result may be identical. The tool call is still allowed, but using existing data may be faster.]
|
|
533649
533560
|
|
|
533650
533561
|
`;
|
|
533651
533562
|
const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
|
|
533652
533563
|
... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
|
|
533653
533564
|
const dedupOutput = header + truncatedCache;
|
|
533565
|
+
pushSoftInjection("system", dedupOutput);
|
|
533654
533566
|
this.emit({
|
|
533655
|
-
type: "
|
|
533656
|
-
|
|
533657
|
-
success: true,
|
|
533658
|
-
content: header.slice(0, 100),
|
|
533659
|
-
turn,
|
|
533567
|
+
type: "status",
|
|
533568
|
+
content: `CACHE ADVISORY — ${tc.name} duplicate call #${criticDecision.hitNumber}; tool allowed`,
|
|
533660
533569
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533661
533570
|
});
|
|
533662
|
-
return { tc, output: dedupOutput };
|
|
533663
533571
|
}
|
|
533664
533572
|
this.emit({
|
|
533665
533573
|
type: "tool_call",
|
|
@@ -533709,6 +533617,18 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
|
|
|
533709
533617
|
const violations = checkConstraints(tc.name, tc.arguments);
|
|
533710
533618
|
const blockViolations = violations.filter((v) => v.constraint.action === "block");
|
|
533711
533619
|
if (blockViolations.length > 0) {
|
|
533620
|
+
const warning = formatViolationWarning(blockViolations);
|
|
533621
|
+
pushSoftInjection("system", `[CONSTRAINT ADVISORY]
|
|
533622
|
+
${warning}
|
|
533623
|
+
The tool call is still allowed by the no-hard-block policy. Treat this as risk guidance, not a runtime denial.`);
|
|
533624
|
+
this.emit({
|
|
533625
|
+
type: "status",
|
|
533626
|
+
content: `Constraint advisory: ${blockViolations[0].constraint.message}`,
|
|
533627
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533628
|
+
});
|
|
533629
|
+
pendingConstraintWarnings.push(warning);
|
|
533630
|
+
}
|
|
533631
|
+
if (process.env["OA_ENABLE_HARD_TOOL_BLOCKS"] === "1" && blockViolations.length > 0) {
|
|
533712
533632
|
result = {
|
|
533713
533633
|
success: false,
|
|
533714
533634
|
output: "",
|
|
@@ -533732,6 +533652,16 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
|
|
|
533732
533652
|
}
|
|
533733
533653
|
const hookCheck = this._hookManager.runPreToolUse(tc.name, tc.arguments, this._sessionId);
|
|
533734
533654
|
if (!hookCheck.allowed) {
|
|
533655
|
+
pushSoftInjection("system", `[HOOK ADVISORY]
|
|
533656
|
+
Tool "${tc.name}" matched a pre-tool hook warning: ${hookCheck.reason ?? "hook denied"}.
|
|
533657
|
+
The tool call is still allowed by the no-hard-block policy. Treat this as guidance and proceed deliberately.`);
|
|
533658
|
+
this.emit({
|
|
533659
|
+
type: "status",
|
|
533660
|
+
content: `Hook advisory for ${tc.name}: ${hookCheck.reason ?? "hook denied"}. Tool allowed by no-hard-block policy.`,
|
|
533661
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533662
|
+
});
|
|
533663
|
+
}
|
|
533664
|
+
if (process.env["OA_ENABLE_HARD_TOOL_BLOCKS"] === "1" && !hookCheck.allowed) {
|
|
533735
533665
|
result = {
|
|
533736
533666
|
success: false,
|
|
533737
533667
|
output: "",
|
|
@@ -533888,7 +533818,7 @@ Respond with EXACTLY this structure before your next tool call:
|
|
|
533888
533818
|
this._reg61PerpetualGateActive = false;
|
|
533889
533819
|
this.emit({
|
|
533890
533820
|
type: "status",
|
|
533891
|
-
content: `REG-61
|
|
533821
|
+
content: `REG-61 ADVISORY CLEARED — '${tc.name}' landed real file mutation at turn ${turn}`,
|
|
533892
533822
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533893
533823
|
});
|
|
533894
533824
|
}
|
|
@@ -533923,7 +533853,7 @@ Respond with EXACTLY this structure before your next tool call:
|
|
|
533923
533853
|
this._progressGateActive = true;
|
|
533924
533854
|
this.emit({
|
|
533925
533855
|
type: "status",
|
|
533926
|
-
content: `Progress
|
|
533856
|
+
content: `Progress advisory engaged at ${this._writesSinceLastTodoWrite} writes without todo_write — non-todo tools remain allowed, todo update recommended`,
|
|
533927
533857
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
533928
533858
|
});
|
|
533929
533859
|
}
|
|
@@ -533939,6 +533869,7 @@ Respond with EXACTLY this structure before your next tool call:
|
|
|
533939
533869
|
}
|
|
533940
533870
|
this._writesSinceLastTodoWrite = 0;
|
|
533941
533871
|
this._progressGateActive = false;
|
|
533872
|
+
this._consecutiveProgressAdvisories = 0;
|
|
533942
533873
|
}
|
|
533943
533874
|
if (tc.name === "file_read") {
|
|
533944
533875
|
const p2 = String(tc.arguments?.["path"] ?? tc.arguments?.["file"] ?? "");
|
|
@@ -534947,6 +534878,12 @@ Then use file_read on individual FILES inside it.`);
|
|
|
534947
534878
|
const output = sr.result.success ? sr.result.output : `Error: ${sr.result.error || "unknown"}
|
|
534948
534879
|
${sr.result.output}`;
|
|
534949
534880
|
messages2.push(this.buildToolMessage(output, matchTc.id, matchTc.name));
|
|
534881
|
+
if (this._consecutiveProgressAdvisories >= 2 && this._progressGateActive) {
|
|
534882
|
+
messages2.push({
|
|
534883
|
+
role: "system",
|
|
534884
|
+
content: `[PROGRESS ADVISORY ESCALATION] You have made ${this._consecutiveProgressAdvisories} consecutive tool calls while progress tracking is stale. The last tool was allowed to run, but your NEXT call should be todo_write(todos=[...]) with updated progress so the plan and completed work stay synchronized.`
|
|
534885
|
+
});
|
|
534886
|
+
}
|
|
534950
534887
|
if (matchTc.name === "task_complete") {
|
|
534951
534888
|
const open2 = this.getOpenTodoItems();
|
|
534952
534889
|
if (open2.length > 0) {
|
|
@@ -534954,28 +534891,32 @@ ${sr.result.output}`;
|
|
|
534954
534891
|
messages2.push({ role: "system", content: guard });
|
|
534955
534892
|
this.emit({
|
|
534956
534893
|
type: "status",
|
|
534957
|
-
content: `task_complete
|
|
534894
|
+
content: `task_complete advisory — ${open2.length} open todo(s) remain; completion allowed`,
|
|
534958
534895
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
534959
534896
|
});
|
|
534960
|
-
} else {
|
|
534961
|
-
const _bp1 = await this._runBackwardPassReview(turn);
|
|
534962
|
-
if (_bp1 && !_bp1.proceed && _bp1.feedback) {
|
|
534963
|
-
messages2.push({ role: "system", content: _bp1.feedback });
|
|
534964
|
-
} else {
|
|
534965
|
-
completed = true;
|
|
534966
|
-
summary = extractTaskCompleteSummary(matchTc.arguments);
|
|
534967
|
-
if (summary && !this._assistantTextEmitted) {
|
|
534968
|
-
this.emit({
|
|
534969
|
-
type: "assistant_text",
|
|
534970
|
-
content: summary,
|
|
534971
|
-
turn,
|
|
534972
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
534973
|
-
});
|
|
534974
|
-
this._assistantTextEmitted = true;
|
|
534975
|
-
}
|
|
534976
|
-
break;
|
|
534977
|
-
}
|
|
534978
534897
|
}
|
|
534898
|
+
const _bp1 = await this._runBackwardPassReview(turn);
|
|
534899
|
+
if (_bp1 && !_bp1.proceed && _bp1.feedback) {
|
|
534900
|
+
messages2.push({ role: "system", content: `[COMPLETION REVIEW ADVISORY]
|
|
534901
|
+
${_bp1.feedback}` });
|
|
534902
|
+
this.emit({
|
|
534903
|
+
type: "status",
|
|
534904
|
+
content: "completion review advisory surfaced; task_complete allowed",
|
|
534905
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
534906
|
+
});
|
|
534907
|
+
}
|
|
534908
|
+
completed = true;
|
|
534909
|
+
summary = extractTaskCompleteSummary(matchTc.arguments);
|
|
534910
|
+
if (summary && !this._assistantTextEmitted) {
|
|
534911
|
+
this.emit({
|
|
534912
|
+
type: "assistant_text",
|
|
534913
|
+
content: summary,
|
|
534914
|
+
turn,
|
|
534915
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
534916
|
+
});
|
|
534917
|
+
this._assistantTextEmitted = true;
|
|
534918
|
+
}
|
|
534919
|
+
break;
|
|
534979
534920
|
}
|
|
534980
534921
|
}
|
|
534981
534922
|
}
|
|
@@ -534987,6 +534928,12 @@ ${sr.result.output}`;
|
|
|
534987
534928
|
const r2 = await executeSingle(tc);
|
|
534988
534929
|
if (r2) {
|
|
534989
534930
|
messages2.push(this.buildToolMessage(r2.output, r2.tc.id, r2.tc.name));
|
|
534931
|
+
if (this._consecutiveProgressAdvisories >= 2 && this._progressGateActive) {
|
|
534932
|
+
messages2.push({
|
|
534933
|
+
role: "system",
|
|
534934
|
+
content: `[PROGRESS ADVISORY ESCALATION] You have made ${this._consecutiveProgressAdvisories} consecutive tool calls while progress tracking is stale. The last tool was allowed to run, but your NEXT call should be todo_write(todos=[...]) with updated progress so the plan and completed work stay synchronized.`
|
|
534935
|
+
});
|
|
534936
|
+
}
|
|
534990
534937
|
if (r2.tc.name === "task_complete") {
|
|
534991
534938
|
const open2 = this.getOpenTodoItems();
|
|
534992
534939
|
if (open2.length > 0) {
|
|
@@ -534994,28 +534941,32 @@ ${sr.result.output}`;
|
|
|
534994
534941
|
messages2.push({ role: "system", content: guard });
|
|
534995
534942
|
this.emit({
|
|
534996
534943
|
type: "status",
|
|
534997
|
-
content: `task_complete
|
|
534944
|
+
content: `task_complete advisory — ${open2.length} open todo(s) remain; completion allowed`,
|
|
534998
534945
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
534999
534946
|
});
|
|
535000
|
-
} else {
|
|
535001
|
-
const _bp2 = await this._runBackwardPassReview(turn);
|
|
535002
|
-
if (_bp2 && !_bp2.proceed && _bp2.feedback) {
|
|
535003
|
-
messages2.push({ role: "system", content: _bp2.feedback });
|
|
535004
|
-
} else {
|
|
535005
|
-
completed = true;
|
|
535006
|
-
summary = extractTaskCompleteSummary(r2.tc.arguments);
|
|
535007
|
-
if (summary && !this._assistantTextEmitted) {
|
|
535008
|
-
this.emit({
|
|
535009
|
-
type: "assistant_text",
|
|
535010
|
-
content: summary,
|
|
535011
|
-
turn,
|
|
535012
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535013
|
-
});
|
|
535014
|
-
this._assistantTextEmitted = true;
|
|
535015
|
-
}
|
|
535016
|
-
break;
|
|
535017
|
-
}
|
|
535018
534947
|
}
|
|
534948
|
+
const _bp2 = await this._runBackwardPassReview(turn);
|
|
534949
|
+
if (_bp2 && !_bp2.proceed && _bp2.feedback) {
|
|
534950
|
+
messages2.push({ role: "system", content: `[COMPLETION REVIEW ADVISORY]
|
|
534951
|
+
${_bp2.feedback}` });
|
|
534952
|
+
this.emit({
|
|
534953
|
+
type: "status",
|
|
534954
|
+
content: "completion review advisory surfaced; task_complete allowed",
|
|
534955
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
534956
|
+
});
|
|
534957
|
+
}
|
|
534958
|
+
completed = true;
|
|
534959
|
+
summary = extractTaskCompleteSummary(r2.tc.arguments);
|
|
534960
|
+
if (summary && !this._assistantTextEmitted) {
|
|
534961
|
+
this.emit({
|
|
534962
|
+
type: "assistant_text",
|
|
534963
|
+
content: summary,
|
|
534964
|
+
turn,
|
|
534965
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
534966
|
+
});
|
|
534967
|
+
this._assistantTextEmitted = true;
|
|
534968
|
+
}
|
|
534969
|
+
break;
|
|
535019
534970
|
}
|
|
535020
534971
|
}
|
|
535021
534972
|
}
|
|
@@ -535069,36 +535020,40 @@ ${sr.result.output}`;
|
|
|
535069
535020
|
messages2.push({ role: "system", content: guard });
|
|
535070
535021
|
this.emit({
|
|
535071
535022
|
type: "status",
|
|
535072
|
-
content: `task_complete
|
|
535023
|
+
content: `task_complete advisory — ${open2.length} open todo(s) remain; completion allowed`,
|
|
535073
535024
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535074
535025
|
});
|
|
535075
|
-
}
|
|
535076
|
-
|
|
535077
|
-
|
|
535078
|
-
|
|
535079
|
-
|
|
535080
|
-
|
|
535081
|
-
|
|
535082
|
-
|
|
535083
|
-
|
|
535084
|
-
|
|
535085
|
-
|
|
535086
|
-
|
|
535087
|
-
|
|
535088
|
-
|
|
535089
|
-
|
|
535090
|
-
|
|
535091
|
-
|
|
535092
|
-
|
|
535093
|
-
content: summary,
|
|
535094
|
-
turn,
|
|
535095
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535096
|
-
});
|
|
535097
|
-
this._assistantTextEmitted = true;
|
|
535026
|
+
}
|
|
535027
|
+
const _bp3 = await this._runBackwardPassReview(turn);
|
|
535028
|
+
if (_bp3 && !_bp3.proceed && _bp3.feedback) {
|
|
535029
|
+
messages2.push({ role: "system", content: `[COMPLETION REVIEW ADVISORY]
|
|
535030
|
+
${_bp3.feedback}` });
|
|
535031
|
+
this.emit({
|
|
535032
|
+
type: "status",
|
|
535033
|
+
content: "completion review advisory surfaced; task_complete allowed",
|
|
535034
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535035
|
+
});
|
|
535036
|
+
}
|
|
535037
|
+
completed = true;
|
|
535038
|
+
summary = extractTaskCompleteSummary(r2.tc.arguments);
|
|
535039
|
+
for (const tool of this.tools.values()) {
|
|
535040
|
+
if (tool.cleanup) {
|
|
535041
|
+
try {
|
|
535042
|
+
await tool.cleanup();
|
|
535043
|
+
} catch {
|
|
535098
535044
|
}
|
|
535099
|
-
break;
|
|
535100
535045
|
}
|
|
535101
535046
|
}
|
|
535047
|
+
if (summary && !this._assistantTextEmitted) {
|
|
535048
|
+
this.emit({
|
|
535049
|
+
type: "assistant_text",
|
|
535050
|
+
content: summary,
|
|
535051
|
+
turn,
|
|
535052
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535053
|
+
});
|
|
535054
|
+
this._assistantTextEmitted = true;
|
|
535055
|
+
}
|
|
535056
|
+
break;
|
|
535102
535057
|
}
|
|
535103
535058
|
}
|
|
535104
535059
|
}
|
|
@@ -535711,18 +535666,9 @@ Integrate this guidance into your current approach. Continue working on the task
|
|
|
535711
535666
|
turn,
|
|
535712
535667
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535713
535668
|
});
|
|
535714
|
-
const
|
|
535715
|
-
if (
|
|
535716
|
-
|
|
535717
|
-
type: "tool_result",
|
|
535718
|
-
toolName: tc.name,
|
|
535719
|
-
content: _decomp2BFBlock.slice(0, 200),
|
|
535720
|
-
success: false,
|
|
535721
|
-
turn,
|
|
535722
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535723
|
-
});
|
|
535724
|
-
messages2.push(this.buildToolMessage(_decomp2BFBlock, tc.id, tc.name));
|
|
535725
|
-
continue;
|
|
535669
|
+
const _decomp2BFAdvisory = this._maybeDecomp2Advisory(tc, turn);
|
|
535670
|
+
if (_decomp2BFAdvisory) {
|
|
535671
|
+
messages2.push({ role: "system", content: _decomp2BFAdvisory });
|
|
535726
535672
|
}
|
|
535727
535673
|
const tool = this.tools.get(tc.name);
|
|
535728
535674
|
let result;
|
|
@@ -535823,28 +535769,32 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
|
|
|
535823
535769
|
messages2.push({ role: "system", content: guard });
|
|
535824
535770
|
this.emit({
|
|
535825
535771
|
type: "status",
|
|
535826
|
-
content: `task_complete
|
|
535772
|
+
content: `task_complete advisory — ${open2.length} open todo(s) remain; completion allowed`,
|
|
535827
535773
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535828
535774
|
});
|
|
535829
|
-
} else {
|
|
535830
|
-
const _bp4 = await this._runBackwardPassReview(turn);
|
|
535831
|
-
if (_bp4 && !_bp4.proceed && _bp4.feedback) {
|
|
535832
|
-
messages2.push({ role: "system", content: _bp4.feedback });
|
|
535833
|
-
continue;
|
|
535834
|
-
}
|
|
535835
|
-
completed = true;
|
|
535836
|
-
summary = extractTaskCompleteSummary(tc.arguments);
|
|
535837
|
-
if (summary && !this._assistantTextEmitted) {
|
|
535838
|
-
this.emit({
|
|
535839
|
-
type: "assistant_text",
|
|
535840
|
-
content: summary,
|
|
535841
|
-
turn,
|
|
535842
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535843
|
-
});
|
|
535844
|
-
this._assistantTextEmitted = true;
|
|
535845
|
-
}
|
|
535846
|
-
break;
|
|
535847
535775
|
}
|
|
535776
|
+
const _bp4 = await this._runBackwardPassReview(turn);
|
|
535777
|
+
if (_bp4 && !_bp4.proceed && _bp4.feedback) {
|
|
535778
|
+
messages2.push({ role: "system", content: `[COMPLETION REVIEW ADVISORY]
|
|
535779
|
+
${_bp4.feedback}` });
|
|
535780
|
+
this.emit({
|
|
535781
|
+
type: "status",
|
|
535782
|
+
content: "completion review advisory surfaced; task_complete allowed",
|
|
535783
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535784
|
+
});
|
|
535785
|
+
}
|
|
535786
|
+
completed = true;
|
|
535787
|
+
summary = extractTaskCompleteSummary(tc.arguments);
|
|
535788
|
+
if (summary && !this._assistantTextEmitted) {
|
|
535789
|
+
this.emit({
|
|
535790
|
+
type: "assistant_text",
|
|
535791
|
+
content: summary,
|
|
535792
|
+
turn,
|
|
535793
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535794
|
+
});
|
|
535795
|
+
this._assistantTextEmitted = true;
|
|
535796
|
+
}
|
|
535797
|
+
break;
|
|
535848
535798
|
}
|
|
535849
535799
|
}
|
|
535850
535800
|
if (completed)
|
|
@@ -535872,14 +535822,13 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
|
|
|
535872
535822
|
messages2.push({ role: "system", content: guard });
|
|
535873
535823
|
this.emit({
|
|
535874
535824
|
type: "status",
|
|
535875
|
-
content: `task_complete text
|
|
535825
|
+
content: `task_complete text advisory — ${open2.length} open todo(s) remain; completion allowed`,
|
|
535876
535826
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
535877
535827
|
});
|
|
535878
|
-
} else {
|
|
535879
|
-
completed = true;
|
|
535880
|
-
summary = content;
|
|
535881
|
-
break;
|
|
535882
535828
|
}
|
|
535829
|
+
completed = true;
|
|
535830
|
+
summary = content;
|
|
535831
|
+
break;
|
|
535883
535832
|
}
|
|
535884
535833
|
const finalVarMatch = content.match(/FINAL_VAR\s*\(\s*["']?(\w+)["']?\s*\)/);
|
|
535885
535834
|
if (finalVarMatch && this.options.finalVarResolver) {
|
|
@@ -614072,7 +614021,7 @@ function createTaskCompleteTool(modelTier) {
|
|
|
614072
614021
|
const summaryDesc = modelTier === "small" || modelTier === "medium" ? "Your complete response to the user. For questions/chat: put your FULL answer here (this is what the user will see). For coding tasks: brief summary of what was accomplished." : "Brief summary of what was accomplished";
|
|
614073
614022
|
return {
|
|
614074
614023
|
name: "task_complete",
|
|
614075
|
-
description: "Signal that the task is complete.
|
|
614024
|
+
description: "Signal that the task is complete. ADVISORY: if active todos, interactive sessions, or build checks indicate risk, the tool result will include model-visible guidance, but the tool is not hard-blocked.",
|
|
614076
614025
|
parameters: {
|
|
614077
614026
|
type: "object",
|
|
614078
614027
|
properties: {
|
|
@@ -614081,11 +614030,14 @@ function createTaskCompleteTool(modelTier) {
|
|
|
614081
614030
|
required: ["summary"]
|
|
614082
614031
|
},
|
|
614083
614032
|
async execute(args) {
|
|
614033
|
+
const summary = args["summary"] || "Task completed.";
|
|
614084
614034
|
if (_interactiveSessionActive) {
|
|
614085
614035
|
return {
|
|
614086
|
-
success:
|
|
614087
|
-
output: `
|
|
614088
|
-
|
|
614036
|
+
success: true,
|
|
614037
|
+
output: `[TASK_COMPLETE ADVISORY — interactive session still active]
|
|
614038
|
+
${_interactiveSessionReason} You should continue the interaction loop until the session ends (e.g. "ended", "disconnected", "closed", SESSION_ACTIVE=false). The completion is allowed by no-hard-block policy.
|
|
614039
|
+
|
|
614040
|
+
${summary}`
|
|
614089
614041
|
};
|
|
614090
614042
|
}
|
|
614091
614043
|
try {
|
|
@@ -614111,14 +614063,17 @@ Respond concisely in this shape:
|
|
|
614111
614063
|
- verify: [{ name: "<exact item text>", completed: true|false, evidence: "<objective proof>" }, ...]
|
|
614112
614064
|
- next: "what you will do next OR the exact todo_write(...) call to update statuses"`;
|
|
614113
614065
|
return {
|
|
614114
|
-
success:
|
|
614115
|
-
output:
|
|
614116
|
-
error: `task_complete BLOCKED — ${incomplete.length} todo item(s) still incomplete.
|
|
614066
|
+
success: true,
|
|
614067
|
+
output: `[TASK_COMPLETE ADVISORY — ${incomplete.length} todo item(s) still incomplete]
|
|
614117
614068
|
|
|
614118
614069
|
Incomplete items:
|
|
614119
614070
|
${incompleteList}${more}
|
|
614120
614071
|
|
|
614121
|
-
|
|
614072
|
+
${guidance}
|
|
614073
|
+
|
|
614074
|
+
Completion is allowed by no-hard-block policy.
|
|
614075
|
+
|
|
614076
|
+
${summary}`
|
|
614122
614077
|
};
|
|
614123
614078
|
}
|
|
614124
614079
|
try {
|
|
@@ -614128,7 +614083,6 @@ ${incompleteList}${more}
|
|
|
614128
614083
|
}
|
|
614129
614084
|
} catch {
|
|
614130
614085
|
}
|
|
614131
|
-
const summary = args["summary"] || "Task completed.";
|
|
614132
614086
|
const buildGuardSkip = process.env["OA_DISABLE_TASK_COMPLETE_BUILD_GUARD"] === "1" || /^\s*BLOCKED\b/i.test(summary);
|
|
614133
614087
|
if (!buildGuardSkip) {
|
|
614134
614088
|
try {
|
|
@@ -614152,16 +614106,15 @@ ${incompleteList}${more}
|
|
|
614152
614106
|
} catch (e2) {
|
|
614153
614107
|
const out = ((e2?.stdout || "") + (e2?.stderr || "")).toString().slice(0, 2e3);
|
|
614154
614108
|
return {
|
|
614155
|
-
success:
|
|
614156
|
-
output:
|
|
614157
|
-
error: `task_complete BLOCKED — \`npm run ${checkScript}\` is failing in ${cwd4}.
|
|
614109
|
+
success: true,
|
|
614110
|
+
output: `[TASK_COMPLETE ADVISORY — \`npm run ${checkScript}\` is failing in ${cwd4}]
|
|
614158
614111
|
|
|
614159
614112
|
Error output (last 2KB):
|
|
614160
614113
|
${out || "<empty stdout/stderr — likely timeout or non-zero exit>"}
|
|
614161
614114
|
|
|
614162
|
-
|
|
614115
|
+
Recommended: fix the build errors before considering the work done. Completion is still allowed by no-hard-block policy.
|
|
614163
614116
|
|
|
614164
|
-
|
|
614117
|
+
${summary}`
|
|
614165
614118
|
};
|
|
614166
614119
|
}
|
|
614167
614120
|
}
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "open-agents-ai",
|
|
3
|
-
"version": "0.187.
|
|
3
|
+
"version": "0.187.576",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "open-agents-ai",
|
|
9
|
-
"version": "0.187.
|
|
9
|
+
"version": "0.187.576",
|
|
10
10
|
"hasInstallScript": true,
|
|
11
11
|
"license": "CC-BY-NC-4.0",
|
|
12
12
|
"dependencies": {
|
package/package.json
CHANGED