omnius 1.0.212 → 1.0.213
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +322 -293
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -551582,28 +551582,38 @@ var init_personality = __esm({
|
|
|
551582
551582
|
});
|
|
551583
551583
|
|
|
551584
551584
|
// packages/orchestrator/dist/critic.js
|
|
551585
|
-
function
|
|
551585
|
+
function buildCriticGuidanceMessage(call, hits, opts = {}) {
|
|
551586
551586
|
const argPreview = JSON.stringify(call.args ?? {}).slice(0, 200);
|
|
551587
|
-
|
|
551588
|
-
|
|
551589
|
-
|
|
551590
|
-
|
|
551591
|
-
|
|
551592
|
-
|
|
551593
|
-
|
|
551587
|
+
const cached = opts.cachedResult ? `
|
|
551588
|
+
Prior evidence preview:
|
|
551589
|
+
${opts.cachedResult.slice(0, 700)}` : "";
|
|
551590
|
+
const source = opts.adversaryFlag ? "The adversary recognized this exact tool call as already observed earlier." : `This is exact repeat #${hits} for the same ${call.tool} arguments.`;
|
|
551591
|
+
return `[ADVERSARY GUIDANCE — non-blocking]
|
|
551592
|
+
Observation: ${source}
|
|
551593
|
+
Call: ${call.tool}(${argPreview})
|
|
551594
|
+
Root cause hypothesis: the run is losing track of already-observed evidence, usually after path confusion, compaction, or an over-broad discovery loop.
|
|
551595
|
+
Corrective action: let this call's result inform the next step once, then pivot to a concrete action.
|
|
551596
|
+
Suggested next actions: edit/write the implicated file, run verification, read a different specific file, or complete with evidence. Prefer not to repeat this exact call again unless the filesystem, browser, or page state changed.${cached}`;
|
|
551594
551597
|
}
|
|
551595
551598
|
function buildCachedResultEnvelope(result) {
|
|
551596
|
-
return `[
|
|
551599
|
+
return `[PRIOR RESULT — already observed by a prior identical call]
|
|
551597
551600
|
${result}`;
|
|
551598
551601
|
}
|
|
551599
551602
|
function evaluate2(inputs) {
|
|
551600
|
-
const { proposedCall, fingerprint, isReadLike, recentToolResults, dedupHitCount,
|
|
551601
|
-
if (
|
|
551603
|
+
const { proposedCall, fingerprint, isReadLike, recentToolResults, dedupHitCount, adversaryRedundantSignal } = inputs;
|
|
551604
|
+
if (adversaryRedundantSignal) {
|
|
551602
551605
|
const cached = recentToolResults.get(fingerprint);
|
|
551606
|
+
const cachedResult = cached ? buildCachedResultEnvelope(cached.result) : void 0;
|
|
551603
551607
|
return {
|
|
551604
|
-
decision: "
|
|
551605
|
-
reason: "
|
|
551606
|
-
|
|
551608
|
+
decision: "guidance",
|
|
551609
|
+
reason: "Adversary flagged this fingerprint as redundant",
|
|
551610
|
+
hitNumber: (dedupHitCount.get(fingerprint) ?? 0) + 1,
|
|
551611
|
+
guidanceMessage: buildCriticGuidanceMessage(proposedCall, (dedupHitCount.get(fingerprint) ?? 0) + 1, {
|
|
551612
|
+
cachedResult,
|
|
551613
|
+
adversaryFlag: true
|
|
551614
|
+
}),
|
|
551615
|
+
cachedResult,
|
|
551616
|
+
compacted: cached?.compacted
|
|
551607
551617
|
};
|
|
551608
551618
|
}
|
|
551609
551619
|
const cacheEligible = isReadLike || proposedCall.tool === "shell";
|
|
@@ -551611,24 +551621,16 @@ function evaluate2(inputs) {
|
|
|
551611
551621
|
const cached = recentToolResults.get(fingerprint);
|
|
551612
551622
|
if (cached !== void 0) {
|
|
551613
551623
|
const hits = (dedupHitCount.get(fingerprint) ?? 0) + 1;
|
|
551614
|
-
const threshold = proposedCall.tool === "shell" ? SHELL_THRESHOLD : FS_THRESHOLD;
|
|
551615
|
-
if (hits >= threshold) {
|
|
551616
|
-
return {
|
|
551617
|
-
decision: "force_progress_block",
|
|
551618
|
-
reason: `${proposedCall.tool} fingerprint hit count ${hits} >= ${threshold}`,
|
|
551619
|
-
hitNumber: hits,
|
|
551620
|
-
blockMessage: buildForceProgressBlockMessage(proposedCall, hits),
|
|
551621
|
-
cachedResult: buildCachedResultEnvelope(cached.result),
|
|
551622
|
-
compacted: cached.compacted
|
|
551623
|
-
};
|
|
551624
|
-
}
|
|
551625
551624
|
const cachedEnvelope = buildCachedResultEnvelope(cached.result);
|
|
551626
551625
|
return {
|
|
551627
|
-
decision: "
|
|
551628
|
-
reason: cached.compacted ? "post-compaction
|
|
551626
|
+
decision: "guidance",
|
|
551627
|
+
reason: cached.compacted ? "post-compaction duplicate evidence" : `duplicate call #${hits}`,
|
|
551629
551628
|
cachedResult: cachedEnvelope,
|
|
551630
551629
|
compacted: cached.compacted,
|
|
551631
|
-
hitNumber: hits
|
|
551630
|
+
hitNumber: hits,
|
|
551631
|
+
guidanceMessage: buildCriticGuidanceMessage(proposedCall, hits, {
|
|
551632
|
+
cachedResult: cachedEnvelope
|
|
551633
|
+
})
|
|
551632
551634
|
};
|
|
551633
551635
|
}
|
|
551634
551636
|
}
|
|
@@ -551680,12 +551682,9 @@ function isStagnant(signals, opts) {
|
|
|
551680
551682
|
return false;
|
|
551681
551683
|
return signals.completedDelta <= 0 && signals.filesDelta < filesDeltaMin && signals.failureSum >= failureThreshold && signals.variantCount >= variantThreshold;
|
|
551682
551684
|
}
|
|
551683
|
-
var SHELL_THRESHOLD, FS_THRESHOLD;
|
|
551684
551685
|
var init_critic = __esm({
|
|
551685
551686
|
"packages/orchestrator/dist/critic.js"() {
|
|
551686
551687
|
"use strict";
|
|
551687
|
-
SHELL_THRESHOLD = 2;
|
|
551688
|
-
FS_THRESHOLD = 3;
|
|
551689
551688
|
}
|
|
551690
551689
|
});
|
|
551691
551690
|
|
|
@@ -558656,8 +558655,8 @@ var init_agenticRunner = __esm({
|
|
|
558656
558655
|
// WO-KG-15
|
|
558657
558656
|
_retrievalContextCache = null;
|
|
558658
558657
|
// WO-KG-15: cache per-run
|
|
558659
|
-
//
|
|
558660
|
-
|
|
558658
|
+
// Adversary world-model and cohort stats
|
|
558659
|
+
_adversaryMode = "both";
|
|
558661
558660
|
_worldFacts = { files: /* @__PURE__ */ new Map(), lastTest: {}, lastLists: /* @__PURE__ */ new Map() };
|
|
558662
558661
|
// REG-7-root: Track file writes since last todo_write call. When this
|
|
558663
558662
|
// counter climbs without a todo update, the agent has likely batched
|
|
@@ -559006,6 +559005,8 @@ var init_agenticRunner = __esm({
|
|
|
559006
559005
|
_sessionId = `session-${Date.now()}`;
|
|
559007
559006
|
_workingDirectory = "";
|
|
559008
559007
|
constructor(backend, options2) {
|
|
559008
|
+
const adversaryMode = options2?.adversaryMode ?? options2?.observerMode ?? "both";
|
|
559009
|
+
const disableAdversaryCritic = options2?.disableAdversaryCritic ?? options2?.disableStepCritic ?? false;
|
|
559009
559010
|
this.backend = backend;
|
|
559010
559011
|
this.options = {
|
|
559011
559012
|
maxTurns: options2?.maxTurns ?? 60,
|
|
@@ -559030,19 +559031,22 @@ var init_agenticRunner = __esm({
|
|
|
559030
559031
|
bruteForceMaxCycles: options2?.bruteForceMaxCycles ?? 100,
|
|
559031
559032
|
allowTurnExtension: options2?.allowTurnExtension ?? true,
|
|
559032
559033
|
completionProvenanceGuard: options2?.completionProvenanceGuard ?? true,
|
|
559034
|
+
disableAdversaryCritic,
|
|
559035
|
+
disableStepCritic: disableAdversaryCritic,
|
|
559033
559036
|
modelTier: options2?.modelTier ?? "large",
|
|
559034
559037
|
contextWindowSize: options2?.contextWindowSize ?? 0,
|
|
559035
559038
|
personality: options2?.personality ?? PERSONALITY_PRESETS.balanced,
|
|
559036
559039
|
personalityName: options2?.personalityName ?? "",
|
|
559037
559040
|
finalVarResolver: options2?.finalVarResolver ?? void 0,
|
|
559038
|
-
|
|
559041
|
+
adversaryMode,
|
|
559042
|
+
observerMode: adversaryMode,
|
|
559039
559043
|
// Phase 4 — sub-agent isolation flag (defaults false). When true, this
|
|
559040
559044
|
// runner skips cross-task handoff inheritance from the parent's
|
|
559041
559045
|
// session.
|
|
559042
559046
|
subAgent: options2?.subAgent ?? false,
|
|
559043
559047
|
skipCrossTaskHandoff: options2?.skipCrossTaskHandoff ?? false
|
|
559044
559048
|
};
|
|
559045
|
-
this.
|
|
559049
|
+
this._adversaryMode = this.options.adversaryMode;
|
|
559046
559050
|
}
|
|
559047
559051
|
/** Update context window size (e.g. after querying Ollama /api/show) */
|
|
559048
559052
|
setContextWindowSize(size) {
|
|
@@ -559050,7 +559054,10 @@ var init_agenticRunner = __esm({
|
|
|
559050
559054
|
}
|
|
559051
559055
|
/** Set the working directory for session checkpointing */
|
|
559052
559056
|
setWorkingDirectory(dir) {
|
|
559053
|
-
this._workingDirectory = dir;
|
|
559057
|
+
this._workingDirectory = _pathResolve(dir);
|
|
559058
|
+
}
|
|
559059
|
+
authoritativeWorkingDirectory() {
|
|
559060
|
+
return _pathResolve(this._workingDirectory || process.cwd());
|
|
559054
559061
|
}
|
|
559055
559062
|
/** State root for runner-owned memory/artifacts. Defaults to cwd/.omnius. */
|
|
559056
559063
|
omniusStateDir() {
|
|
@@ -559823,7 +559830,7 @@ ${result.output ?? ""}`;
|
|
|
559823
559830
|
* checklist via todo_write, and only then call task_complete.
|
|
559824
559831
|
*/
|
|
559825
559832
|
/**
|
|
559826
|
-
* REG-39c: tag a SYNTHETIC failure (FORCED PROGRESS BLOCK /
|
|
559833
|
+
* REG-39c: tag a SYNTHETIC failure (FORCED PROGRESS BLOCK / adversary
|
|
559827
559834
|
* block / budget exhausted). These paths return early from
|
|
559828
559835
|
* executeSingle BEFORE the main result-handling code, so the normal
|
|
559829
559836
|
* MAST tagging miss them. This helper lets each return-early site
|
|
@@ -561367,7 +561374,7 @@ ${latest.output || ""}`.trim();
|
|
|
561367
561374
|
}
|
|
561368
561375
|
}
|
|
561369
561376
|
const sections = [
|
|
561370
|
-
"[KNOWLEDGE — cached tool results already known to the runtime.
|
|
561377
|
+
"[KNOWLEDGE — cached tool results already known to the runtime. Repeating an exact read/list/search/shell call is a wasted action and will be blocked or served from cache:]"
|
|
561371
561378
|
];
|
|
561372
561379
|
if (compactedCount > 0) {
|
|
561373
561380
|
sections.push(`Compacted cached entries still count as already-known results (${compactedCount}); an exact repeat will be served from cache or skipped, not produce new information.`);
|
|
@@ -561379,6 +561386,7 @@ ${latest.output || ""}`.trim();
|
|
|
561379
561386
|
if (dirsListed.length > 0) {
|
|
561380
561387
|
const unique2 = [...new Set(dirsListed)].slice(0, 15);
|
|
561381
561388
|
sections.push(`Directories already listed (${unique2.length}): ${unique2.join(", ")}`);
|
|
561389
|
+
sections.push(`Do not call list_directory again on these exact directories unless you changed their contents. Use the listed child paths directly with file_read/edit/delegation.`);
|
|
561382
561390
|
}
|
|
561383
561391
|
if (searches.length > 0) {
|
|
561384
561392
|
const unique2 = [...new Set(searches)].slice(0, 15);
|
|
@@ -561392,6 +561400,23 @@ ${latest.output || ""}`.trim();
|
|
|
561392
561400
|
return null;
|
|
561393
561401
|
return sections.join("\n");
|
|
561394
561402
|
}
|
|
561403
|
+
_renderRuntimeRootBlock() {
|
|
561404
|
+
const authoritative = this.authoritativeWorkingDirectory();
|
|
561405
|
+
const proc = _pathResolve(process.cwd());
|
|
561406
|
+
const lines = [
|
|
561407
|
+
`[RUNTIME ROOT — authoritative]`,
|
|
561408
|
+
`Current working directory for this run: ${authoritative}`,
|
|
561409
|
+
`All relative file/tool paths resolve under this directory unless the tool call uses an absolute path.`,
|
|
561410
|
+
`Do not infer cwd from old tasks, shell transcripts, memory, or prior browser sessions.`
|
|
561411
|
+
];
|
|
561412
|
+
if (proc !== authoritative) {
|
|
561413
|
+
lines.push(`Process cwd differs (${proc}); treat the run cwd above as authoritative for repo/project work.`);
|
|
561414
|
+
}
|
|
561415
|
+
if (this._worldFacts.lastCwd && this._worldFacts.lastCwd !== authoritative) {
|
|
561416
|
+
lines.push(`Last shell cd target was command-local only: ${this._worldFacts.lastCwd}. It does not change the run cwd.`);
|
|
561417
|
+
}
|
|
561418
|
+
return lines.join("\n");
|
|
561419
|
+
}
|
|
561395
561420
|
_insertContextFrame(messages2, frame) {
|
|
561396
561421
|
if (!frame)
|
|
561397
561422
|
return;
|
|
@@ -561429,7 +561454,7 @@ ${latest.output || ""}`.trim();
|
|
|
561429
561454
|
add2(this._activeContextItem("task_state", "todo-state", "turn.todos", "Todo state", input.todoBlock, 80));
|
|
561430
561455
|
add2(this._activeContextItem("recent_failure", "recent-failures", "turn.failures", "Recent failures", input.failureBlock, 95));
|
|
561431
561456
|
add2(this._activeContextItem("recent_failure", "write-churn", "turn.churn", "Write churn", input.churnBlock, 75));
|
|
561432
|
-
add2(this._activeContextItem("tool_cache", "tool-cache", "turn.tool-cache", "Tool cache", input.toolCacheBlock,
|
|
561457
|
+
add2(this._activeContextItem("tool_cache", "tool-cache", "turn.tool-cache", "Tool cache", input.toolCacheBlock, 92));
|
|
561433
561458
|
add2(this._activeContextItem("anchor", "anchors", "turn.anchors", "Relevant anchors", input.anchorsBlock, 50));
|
|
561434
561459
|
add2(this._activeContextItem("environment", "environment", "turn.environment", "Environment", input.environmentBlock, 35));
|
|
561435
561460
|
if (this._lastPprMemoryLines.length > 0) {
|
|
@@ -561684,7 +561709,10 @@ ${chunk.content}`, {
|
|
|
561684
561709
|
async _buildTurnContextFrame(turn, messages2, recentToolResults, environmentBlock) {
|
|
561685
561710
|
this._contextLedger.clearSources("turn.");
|
|
561686
561711
|
this._contextLedger.prune(turn);
|
|
561687
|
-
const goalBlock =
|
|
561712
|
+
const goalBlock = [
|
|
561713
|
+
this._renderRuntimeRootBlock(),
|
|
561714
|
+
this._taskState.goal ? `Active task: ${this._taskState.goal}` : null
|
|
561715
|
+
].filter(Boolean).join("\n\n");
|
|
561688
561716
|
const filesystemBlock = this._renderFilesystemStateBlock(turn);
|
|
561689
561717
|
const todoBlock = this._renderTodoStateBlock(turn);
|
|
561690
561718
|
const failureBlock = this._renderRecentFailuresBlock(turn);
|
|
@@ -561750,7 +561778,7 @@ ${this._lastPprMemoryLines.slice(0, 5).join("\n")}` : null;
|
|
|
561750
561778
|
signalFromBlock("tool_cache", "turn.tool-cache", toolCacheBlock, {
|
|
561751
561779
|
id: "tool-cache",
|
|
561752
561780
|
dedupeKey: "turn.tool-cache",
|
|
561753
|
-
priority:
|
|
561781
|
+
priority: 92,
|
|
561754
561782
|
createdTurn: turn,
|
|
561755
561783
|
ttlTurns: 1
|
|
561756
561784
|
}),
|
|
@@ -562602,8 +562630,8 @@ ${notice}`;
|
|
|
562602
562630
|
const window2 = recentToolCalls.slice(-repetitionWindow);
|
|
562603
562631
|
const uniqueKeys = new Set(window2.map((tc) => `${tc.name}:${tc.argsKey}`));
|
|
562604
562632
|
const ratio = 1 - uniqueKeys.size / window2.length;
|
|
562605
|
-
if (ratio > 0.4 && this.
|
|
562606
|
-
const recentOutcomes = this.
|
|
562633
|
+
if (ratio > 0.4 && this._adversaryToolOutcomes.length >= 3) {
|
|
562634
|
+
const recentOutcomes = this._adversaryToolOutcomes.slice(-6);
|
|
562607
562635
|
const uniquePreviews = new Set(recentOutcomes.map((o2) => o2.preview.slice(0, 40)));
|
|
562608
562636
|
if (uniquePreviews.size >= 3) {
|
|
562609
562637
|
return Math.max(0, ratio - 0.4);
|
|
@@ -562701,6 +562729,9 @@ Respond with your assessment, then take action.`;
|
|
|
562701
562729
|
this._lastActiveForgettingReport = null;
|
|
562702
562730
|
this._lastContextConsolidationTurn = -1e3;
|
|
562703
562731
|
this._contextFrameBuilder = new ContextFrameBuilder();
|
|
562732
|
+
if (!this._workingDirectory) {
|
|
562733
|
+
this._workingDirectory = _pathResolve(process.cwd());
|
|
562734
|
+
}
|
|
562704
562735
|
if (!this.options.disablePersistentMemory && !this._memoryInitialized) {
|
|
562705
562736
|
try {
|
|
562706
562737
|
const path12 = await import("node:path");
|
|
@@ -563134,10 +563165,10 @@ TASK: ${scrubbedTask}` : scrubbedTask;
|
|
|
563134
563165
|
this._hookDenyHintCount = 0;
|
|
563135
563166
|
this._selfConsistencyVotes = 0;
|
|
563136
563167
|
this._retrievalContextCache = null;
|
|
563137
|
-
this.
|
|
563168
|
+
this._adversaryMode = this.options.adversaryMode ?? "both";
|
|
563138
563169
|
this._worldFacts = { files: /* @__PURE__ */ new Map(), lastTest: {}, lastLists: /* @__PURE__ */ new Map() };
|
|
563139
563170
|
this._argCohorts.clear();
|
|
563140
|
-
this.
|
|
563171
|
+
this._adversaryRedundantSignals.clear();
|
|
563141
563172
|
this._lastTodoWriteTurn = -1;
|
|
563142
563173
|
this._lastTodoReminderTurn = -1;
|
|
563143
563174
|
let pendingConstraintWarnings = [];
|
|
@@ -563237,14 +563268,44 @@ TASK: ${scrubbedTask}` : scrubbedTask;
|
|
|
563237
563268
|
});
|
|
563238
563269
|
if (gate.proceed)
|
|
563239
563270
|
return false;
|
|
563240
|
-
messages2.push({
|
|
563271
|
+
messages2.push({
|
|
563272
|
+
role: "system",
|
|
563273
|
+
content: `${gate.feedback}
|
|
563274
|
+
|
|
563275
|
+
[ADVISORY ONLY] This critique does not block task_complete; use it to improve the next run or visible evidence if the task continues.`
|
|
563276
|
+
});
|
|
563241
563277
|
this.emit({
|
|
563242
563278
|
type: "status",
|
|
563243
|
-
content: `
|
|
563279
|
+
content: `completion provenance critique emitted without blocking: ${gate.reason}`,
|
|
563280
|
+
turn,
|
|
563281
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
563282
|
+
});
|
|
563283
|
+
this.emit({
|
|
563284
|
+
type: "adversary_reaction",
|
|
563285
|
+
adversary: {
|
|
563286
|
+
class: "guidance",
|
|
563287
|
+
shortText: "Completion provenance critique emitted",
|
|
563288
|
+
confidence: 0.9,
|
|
563289
|
+
details: gate.reason
|
|
563290
|
+
},
|
|
563291
|
+
turn,
|
|
563292
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
563293
|
+
});
|
|
563294
|
+
return false;
|
|
563295
|
+
};
|
|
563296
|
+
const emitBackwardPassAdvisory = (feedback, turn) => {
|
|
563297
|
+
messages2.push({
|
|
563298
|
+
role: "system",
|
|
563299
|
+
content: `${feedback}
|
|
563300
|
+
|
|
563301
|
+
[ADVISORY ONLY] Backward-pass critique is non-blocking; do not treat this as a tool failure or completion refusal.`
|
|
563302
|
+
});
|
|
563303
|
+
this.emit({
|
|
563304
|
+
type: "status",
|
|
563305
|
+
content: "backward-pass critique emitted without blocking completion",
|
|
563244
563306
|
turn,
|
|
563245
563307
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
563246
563308
|
});
|
|
563247
|
-
return true;
|
|
563248
563309
|
};
|
|
563249
563310
|
const turnCap = this.options.maxTurns && this.options.maxTurns > 0 ? this.options.maxTurns : Number.MAX_SAFE_INTEGER;
|
|
563250
563311
|
for (let turn = 0; turn < turnCap; turn++) {
|
|
@@ -564230,8 +564291,8 @@ ${_staleSamples.join("\n")}` : ``,
|
|
|
564230
564291
|
nextSelfEval = now + selfEvalInterval;
|
|
564231
564292
|
}
|
|
564232
564293
|
const turnsRemaining = this.options.maxTurns - turn;
|
|
564233
|
-
if (this.options.allowTurnExtension && turnsRemaining <= 3 && turnsRemaining > 0 && this.
|
|
564234
|
-
const recentOutcomes = this.
|
|
564294
|
+
if (this.options.allowTurnExtension && turnsRemaining <= 3 && turnsRemaining > 0 && this._adversaryToolOutcomes.length >= 2) {
|
|
564295
|
+
const recentOutcomes = this._adversaryToolOutcomes.slice(-6);
|
|
564235
564296
|
const recentSuccesses = recentOutcomes.filter((o2) => o2.succeeded).length;
|
|
564236
564297
|
const uniqueResults = new Set(recentOutcomes.map((o2) => o2.preview.slice(0, 40))).size;
|
|
564237
564298
|
const isActive = recentSuccesses >= 2 && uniqueResults >= 2;
|
|
@@ -564240,16 +564301,16 @@ ${_staleSamples.join("\n")}` : ``,
|
|
|
564240
564301
|
this.options.maxTurns += extension3;
|
|
564241
564302
|
this.emit({
|
|
564242
564303
|
type: "status",
|
|
564243
|
-
content: `
|
|
564304
|
+
content: `Adversary triage: activity detected (${recentSuccesses} recent successes, ${uniqueResults} unique results) — extending turn limit by ${extension3} (now ${this.options.maxTurns})`,
|
|
564244
564305
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
564245
564306
|
});
|
|
564246
564307
|
const detailsLines = recentOutcomes.map((o2) => `- ${o2.tool}: ${o2.succeeded ? "OK" : "ERR"} — ${o2.preview}`);
|
|
564247
564308
|
this.emit({
|
|
564248
|
-
type: "
|
|
564309
|
+
type: "debug_adversary",
|
|
564249
564310
|
turn,
|
|
564250
564311
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
564251
564312
|
content: `Timeout triage: EXTENDED by ${extension3} turns (active session detected)`,
|
|
564252
|
-
|
|
564313
|
+
adversaryAction: {
|
|
564253
564314
|
detection: "none",
|
|
564254
564315
|
recentSuccesses,
|
|
564255
564316
|
recentFailures: recentOutcomes.length - recentSuccesses,
|
|
@@ -564582,6 +564643,9 @@ ${memoryLines.join("\n")}`
|
|
|
564582
564643
|
maxTokens: effectiveMaxTokens,
|
|
564583
564644
|
timeoutMs: this.options.requestTimeoutMs
|
|
564584
564645
|
};
|
|
564646
|
+
if ((this.options.contextWindowSize ?? 0) > 0) {
|
|
564647
|
+
chatRequest.numCtx = this.options.contextWindowSize;
|
|
564648
|
+
}
|
|
564585
564649
|
if (this.options.memoryPrefix)
|
|
564586
564650
|
chatRequest.memoryPrefix = this.options.memoryPrefix;
|
|
564587
564651
|
if (this.options.memoryPrefixHash)
|
|
@@ -564623,7 +564687,7 @@ ${memoryLines.join("\n")}`
|
|
|
564623
564687
|
compactionThreshold: limits.compactionThreshold,
|
|
564624
564688
|
toolCallCount,
|
|
564625
564689
|
keepRecent: limits.keepRecent,
|
|
564626
|
-
|
|
564690
|
+
adversaryOutcomes: this._adversaryToolOutcomes.length,
|
|
564627
564691
|
headroom: limits.compactionThreshold - estTokens
|
|
564628
564692
|
}
|
|
564629
564693
|
});
|
|
@@ -564986,16 +565050,19 @@ ${memoryLines.join("\n")}`
|
|
|
564986
565050
|
const cohort = this._argCohorts.get(cohortKey);
|
|
564987
565051
|
if (cohort && cohort.failure >= 3 && cohort.success === 0) {
|
|
564988
565052
|
this.emit({
|
|
564989
|
-
type: "
|
|
565053
|
+
type: "adversary_reaction",
|
|
564990
565054
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
564991
|
-
|
|
565055
|
+
adversary: {
|
|
564992
565056
|
class: "arg_cohort_risk",
|
|
564993
565057
|
shortText: `${tc.name} with similar args has failed ${cohort.failure}× recently`,
|
|
564994
565058
|
confidence: 0.85
|
|
564995
565059
|
}
|
|
564996
565060
|
});
|
|
564997
|
-
if (this.
|
|
564998
|
-
this.pendingUserMessages.push(
|
|
565061
|
+
if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
|
|
565062
|
+
this.pendingUserMessages.push(`[ADVERSARY CRITIQUE — non-blocking]
|
|
565063
|
+
Evidence: ${tc.name} with similar arguments has failed ${cohort.failure}× recently.
|
|
565064
|
+
Root cause hypothesis: the argument family may be wrong, a prerequisite may be missing, or the tool is being used before enough state is known.
|
|
565065
|
+
Corrective action: try a different approach first: read relevant files, adjust arguments, or verify prerequisites.`);
|
|
564999
565066
|
}
|
|
565000
565067
|
}
|
|
565001
565068
|
if (this._errorPatterns.size > 0) {
|
|
@@ -565277,19 +565344,11 @@ ${memoryLines.join("\n")}`
|
|
|
565277
565344
|
].includes(tc.name);
|
|
565278
565345
|
const isStatefulBrowserTool = this._isStatefulBrowserTool(tc.name);
|
|
565279
565346
|
const isReadLike = !isStatefulBrowserTool && (baseIsReadLike || tc.name === "shell" && this._isShellCommandReadOnly(tc.arguments?.["command"] ?? tc.arguments?.["cmd"] ?? ""));
|
|
565280
|
-
const
|
|
565281
|
-
if (
|
|
565282
|
-
this.
|
|
565347
|
+
const adversaryRedundantSignal = this._adversaryRedundantSignals.has(toolFingerprint);
|
|
565348
|
+
if (adversaryRedundantSignal) {
|
|
565349
|
+
this._adversaryRedundantSignals.delete(toolFingerprint);
|
|
565283
565350
|
}
|
|
565284
|
-
|
|
565285
|
-
const lastLog = toolCallLog[_toolLogTailIdx];
|
|
565286
|
-
if (!lastLog)
|
|
565287
|
-
return;
|
|
565288
|
-
lastLog.success = true;
|
|
565289
|
-
lastLog.mutated = false;
|
|
565290
|
-
lastLog.mutatedFiles = [];
|
|
565291
|
-
lastLog.outputPreview = outputPreview.slice(0, 100);
|
|
565292
|
-
};
|
|
565351
|
+
let criticGuidance = null;
|
|
565293
565352
|
{
|
|
565294
565353
|
const _reflStem = buildStem(tc.name, tc.arguments ?? {});
|
|
565295
565354
|
if (!this._reflectionsInjectedThisTurn.has(_reflStem)) {
|
|
@@ -565331,7 +565390,10 @@ ${memoryLines.join("\n")}`
|
|
|
565331
565390
|
}
|
|
565332
565391
|
}
|
|
565333
565392
|
}
|
|
565334
|
-
const criticDecision =
|
|
565393
|
+
const criticDecision = this.options.disableAdversaryCritic === true ? {
|
|
565394
|
+
decision: "pass",
|
|
565395
|
+
reason: "adversary critic disabled for isolated evaluation"
|
|
565396
|
+
} : evaluate2({
|
|
565335
565397
|
proposedCall: { tool: tc.name, args: tc.arguments ?? {} },
|
|
565336
565398
|
fingerprint: toolFingerprint,
|
|
565337
565399
|
isReadLike,
|
|
@@ -565345,116 +565407,33 @@ ${memoryLines.join("\n")}`
|
|
|
565345
565407
|
stagnationSignals: null,
|
|
565346
565408
|
// stagnation gate handled at top-of-turn
|
|
565347
565409
|
stagnationGateActive: false,
|
|
565348
|
-
|
|
565410
|
+
adversaryRedundantSignal
|
|
565349
565411
|
});
|
|
565350
|
-
if (criticDecision.decision === "
|
|
565351
|
-
this.emit({
|
|
565352
|
-
type: "tool_call",
|
|
565353
|
-
toolName: tc.name,
|
|
565354
|
-
toolArgs: tc.arguments,
|
|
565355
|
-
turn,
|
|
565356
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
565357
|
-
});
|
|
565358
|
-
const blockMsg = criticDecision.cachedResult ? `[BLOCKED — this tool+args already succeeded. Re-served from cache:]
|
|
565359
|
-
|
|
565360
|
-
${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confirmed this tool already succeeded with these arguments on a prior turn. Do NOT re-run. Use your prior findings to proceed.]`;
|
|
565361
|
-
markSyntheticToolLog(blockMsg);
|
|
565362
|
-
this.emit({
|
|
565363
|
-
type: "tool_result",
|
|
565364
|
-
toolName: tc.name,
|
|
565365
|
-
success: true,
|
|
565366
|
-
content: blockMsg.slice(0, 100),
|
|
565367
|
-
turn,
|
|
565368
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
565369
|
-
});
|
|
565370
|
-
this._tagSyntheticFailure({
|
|
565371
|
-
mode: "step_repetition",
|
|
565372
|
-
rationale: `observer-block on ${tc.name} fingerprint flagged redundant`
|
|
565373
|
-
});
|
|
565374
|
-
return { tc, output: blockMsg, success: true };
|
|
565375
|
-
}
|
|
565376
|
-
if (criticDecision.decision === "force_progress_block") {
|
|
565412
|
+
if (criticDecision.decision === "guidance") {
|
|
565377
565413
|
dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
|
|
565378
565414
|
const _existingFp = recentToolResults.get(toolFingerprint);
|
|
565379
565415
|
if (_existingFp !== void 0) {
|
|
565380
565416
|
recentToolResults.delete(toolFingerprint);
|
|
565381
565417
|
recentToolResults.set(toolFingerprint, _existingFp);
|
|
565382
565418
|
}
|
|
565419
|
+
criticGuidance = criticDecision.guidanceMessage;
|
|
565383
565420
|
this.emit({
|
|
565384
|
-
type: "
|
|
565385
|
-
|
|
565386
|
-
|
|
565387
|
-
|
|
565421
|
+
type: "adversary_reaction",
|
|
565422
|
+
adversary: {
|
|
565423
|
+
class: "guidance",
|
|
565424
|
+
shortText: `Adversary guidance for repeated ${tc.name} call`,
|
|
565425
|
+
confidence: 0.9,
|
|
565426
|
+
details: criticDecision.reason
|
|
565427
|
+
},
|
|
565388
565428
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
565389
565429
|
});
|
|
565390
565430
|
this.emit({
|
|
565391
|
-
type: "
|
|
565392
|
-
toolName: tc.name,
|
|
565393
|
-
success: true,
|
|
565394
|
-
content: `[SKIPPED DUPLICATE — exact ${tc.name} call not re-run; cached result returned.]`.slice(0, 120),
|
|
565395
|
-
turn,
|
|
565396
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
565397
|
-
});
|
|
565398
|
-
this._tagSyntheticFailure({
|
|
565399
|
-
mode: "step_repetition",
|
|
565400
|
-
rationale: `force_progress_block on ${tc.name} after ${criticDecision.hitNumber} identical calls`
|
|
565401
|
-
});
|
|
565402
|
-
const generationCompletionHint = isGenerationArtifactSuccess(tc.name, criticDecision.cachedResult) ? `
|
|
565403
|
-
|
|
565404
|
-
[GENERATION ALREADY COMPLETE] This exact ${tc.name} call already succeeded. Do not call it again. Use the cached artifact/path above; if delivery is needed, send it, otherwise call task_complete.` : "";
|
|
565405
|
-
const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. Do not retry this exact call.]
|
|
565406
|
-
|
|
565407
|
-
` : `[SKIPPED DUPLICATE — exact ${tc.name} call not re-run. The cached result below is from the prior successful call. Do not retry this exact call.]
|
|
565408
|
-
|
|
565409
|
-
`;
|
|
565410
|
-
const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
|
|
565411
|
-
... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
|
|
565412
|
-
markSyntheticToolLog(`${criticDecision.blockMessage}
|
|
565413
|
-
|
|
565414
|
-
${truncatedCache}`);
|
|
565415
|
-
return {
|
|
565416
|
-
tc,
|
|
565417
|
-
output: `${criticDecision.blockMessage}
|
|
565418
|
-
|
|
565419
|
-
${header}${truncatedCache}${generationCompletionHint}`,
|
|
565420
|
-
success: true
|
|
565421
|
-
};
|
|
565422
|
-
}
|
|
565423
|
-
if (criticDecision.decision === "serve_cached") {
|
|
565424
|
-
dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
|
|
565425
|
-
const _existingFp = recentToolResults.get(toolFingerprint);
|
|
565426
|
-
if (_existingFp !== void 0) {
|
|
565427
|
-
recentToolResults.delete(toolFingerprint);
|
|
565428
|
-
recentToolResults.set(toolFingerprint, _existingFp);
|
|
565429
|
-
}
|
|
565430
|
-
this.emit({
|
|
565431
|
-
type: "tool_call",
|
|
565432
|
-
toolName: tc.name,
|
|
565433
|
-
toolArgs: tc.arguments,
|
|
565434
|
-
turn,
|
|
565435
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
565436
|
-
});
|
|
565437
|
-
const generationCompletionHint = isGenerationArtifactSuccess(tc.name, criticDecision.cachedResult) ? `
|
|
565438
|
-
|
|
565439
|
-
[GENERATION ALREADY COMPLETE] This exact ${tc.name} call already succeeded. Do not call it again. Use the cached artifact/path above; if delivery is needed, send it, otherwise call task_complete.` : "";
|
|
565440
|
-
const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. No need to call this tool again.]
|
|
565441
|
-
|
|
565442
|
-
` : `[DUPLICATE CALL #${criticDecision.hitNumber} — you already called ${tc.name} with these exact arguments. The result is identical. Do NOT call this again. Use the data you already have to make progress. One more identical call will trigger a hard progress block.]
|
|
565443
|
-
|
|
565444
|
-
`;
|
|
565445
|
-
const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
|
|
565446
|
-
... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
|
|
565447
|
-
const dedupOutput = header + truncatedCache + generationCompletionHint;
|
|
565448
|
-
markSyntheticToolLog(dedupOutput);
|
|
565449
|
-
this.emit({
|
|
565450
|
-
type: "tool_result",
|
|
565431
|
+
type: "status",
|
|
565451
565432
|
toolName: tc.name,
|
|
565452
|
-
|
|
565453
|
-
content: header.slice(0, 100),
|
|
565433
|
+
content: `Adversary guidance emitted for ${tc.name}; tool call will still execute`,
|
|
565454
565434
|
turn,
|
|
565455
565435
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
565456
565436
|
});
|
|
565457
|
-
return { tc, output: dedupOutput, success: true };
|
|
565458
565437
|
}
|
|
565459
565438
|
this.emit({
|
|
565460
565439
|
type: "tool_call",
|
|
@@ -566455,6 +566434,11 @@ Respond with EXACTLY this structure before your next tool call:
|
|
|
566455
566434
|
result = await this.offloadEmbeddedImageResult(result, tc.name, turn);
|
|
566456
566435
|
}
|
|
566457
566436
|
let output = this.normalizeToolOutput(result, tc.name, tc.arguments, turn);
|
|
566437
|
+
if (criticGuidance) {
|
|
566438
|
+
output += `
|
|
566439
|
+
|
|
566440
|
+
${criticGuidance}`;
|
|
566441
|
+
}
|
|
566458
566442
|
if (!result.success && (this.options.modelTier === "small" || this.options.modelTier === "medium")) {
|
|
566459
566443
|
const recovery = this.buildRecoveryGuidance(tc.name, result.error ?? "", tc.arguments);
|
|
566460
566444
|
if (recovery)
|
|
@@ -566865,22 +566849,21 @@ ${sr.result.output}`;
|
|
|
566865
566849
|
}
|
|
566866
566850
|
const _bp1 = await this._runBackwardPassReview(turn);
|
|
566867
566851
|
if (_bp1 && !_bp1.proceed && _bp1.feedback) {
|
|
566868
|
-
|
|
566869
|
-
}
|
|
566870
|
-
|
|
566871
|
-
|
|
566872
|
-
|
|
566873
|
-
|
|
566874
|
-
|
|
566875
|
-
|
|
566876
|
-
|
|
566877
|
-
|
|
566878
|
-
|
|
566879
|
-
|
|
566880
|
-
|
|
566881
|
-
}
|
|
566882
|
-
break;
|
|
566852
|
+
emitBackwardPassAdvisory(_bp1.feedback, turn);
|
|
566853
|
+
}
|
|
566854
|
+
completed = true;
|
|
566855
|
+
summary = extractTaskCompleteSummary(matchTc.arguments);
|
|
566856
|
+
if (summary && !this._assistantTextEmitted) {
|
|
566857
|
+
this.emit({
|
|
566858
|
+
type: "assistant_text",
|
|
566859
|
+
content: summary,
|
|
566860
|
+
source: "task_complete_summary",
|
|
566861
|
+
turn,
|
|
566862
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
566863
|
+
});
|
|
566864
|
+
this._assistantTextEmitted = true;
|
|
566883
566865
|
}
|
|
566866
|
+
break;
|
|
566884
566867
|
}
|
|
566885
566868
|
}
|
|
566886
566869
|
}
|
|
@@ -566921,22 +566904,21 @@ ${sr.result.output}`;
|
|
|
566921
566904
|
}
|
|
566922
566905
|
const _bp2 = await this._runBackwardPassReview(turn);
|
|
566923
566906
|
if (_bp2 && !_bp2.proceed && _bp2.feedback) {
|
|
566924
|
-
|
|
566925
|
-
} else {
|
|
566926
|
-
completed = true;
|
|
566927
|
-
summary = extractTaskCompleteSummary(r2.tc.arguments);
|
|
566928
|
-
if (summary && !this._assistantTextEmitted) {
|
|
566929
|
-
this.emit({
|
|
566930
|
-
type: "assistant_text",
|
|
566931
|
-
content: summary,
|
|
566932
|
-
source: "task_complete_summary",
|
|
566933
|
-
turn,
|
|
566934
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
566935
|
-
});
|
|
566936
|
-
this._assistantTextEmitted = true;
|
|
566937
|
-
}
|
|
566938
|
-
break;
|
|
566907
|
+
emitBackwardPassAdvisory(_bp2.feedback, turn);
|
|
566939
566908
|
}
|
|
566909
|
+
completed = true;
|
|
566910
|
+
summary = extractTaskCompleteSummary(r2.tc.arguments);
|
|
566911
|
+
if (summary && !this._assistantTextEmitted) {
|
|
566912
|
+
this.emit({
|
|
566913
|
+
type: "assistant_text",
|
|
566914
|
+
content: summary,
|
|
566915
|
+
source: "task_complete_summary",
|
|
566916
|
+
turn,
|
|
566917
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
566918
|
+
});
|
|
566919
|
+
this._assistantTextEmitted = true;
|
|
566920
|
+
}
|
|
566921
|
+
break;
|
|
566940
566922
|
}
|
|
566941
566923
|
}
|
|
566942
566924
|
}
|
|
@@ -567013,22 +566995,21 @@ ${sr.result.output}`;
|
|
|
567013
566995
|
}
|
|
567014
566996
|
const _bp3 = await this._runBackwardPassReview(turn);
|
|
567015
566997
|
if (_bp3 && !_bp3.proceed && _bp3.feedback) {
|
|
567016
|
-
|
|
567017
|
-
} else {
|
|
567018
|
-
completed = true;
|
|
567019
|
-
summary = extractTaskCompleteSummary(r2.tc.arguments);
|
|
567020
|
-
if (summary && !this._assistantTextEmitted) {
|
|
567021
|
-
this.emit({
|
|
567022
|
-
type: "assistant_text",
|
|
567023
|
-
content: summary,
|
|
567024
|
-
source: "task_complete_summary",
|
|
567025
|
-
turn,
|
|
567026
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
567027
|
-
});
|
|
567028
|
-
this._assistantTextEmitted = true;
|
|
567029
|
-
}
|
|
567030
|
-
break;
|
|
566998
|
+
emitBackwardPassAdvisory(_bp3.feedback, turn);
|
|
567031
566999
|
}
|
|
567000
|
+
completed = true;
|
|
567001
|
+
summary = extractTaskCompleteSummary(r2.tc.arguments);
|
|
567002
|
+
if (summary && !this._assistantTextEmitted) {
|
|
567003
|
+
this.emit({
|
|
567004
|
+
type: "assistant_text",
|
|
567005
|
+
content: summary,
|
|
567006
|
+
source: "task_complete_summary",
|
|
567007
|
+
turn,
|
|
567008
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
567009
|
+
});
|
|
567010
|
+
this._assistantTextEmitted = true;
|
|
567011
|
+
}
|
|
567012
|
+
break;
|
|
567032
567013
|
}
|
|
567033
567014
|
}
|
|
567034
567015
|
}
|
|
@@ -567039,7 +567020,7 @@ ${sr.result.output}`;
|
|
|
567039
567020
|
}
|
|
567040
567021
|
if (completed)
|
|
567041
567022
|
break;
|
|
567042
|
-
this.
|
|
567023
|
+
this.adversaryObserve(messages2, turn);
|
|
567043
567024
|
const currentRepScore = this.detectRepetition(toolCallLog);
|
|
567044
567025
|
if (currentRepScore > 0.4 && toolCallLog.length >= 4) {
|
|
567045
567026
|
const { repetitionWindow } = this.contextLimits();
|
|
@@ -567236,7 +567217,7 @@ Call task_complete(summary="...") NOW with whatever you have.`
|
|
|
567236
567217
|
}
|
|
567237
567218
|
if (isThinkOnly) {
|
|
567238
567219
|
if (consecutiveThinkOnly >= MAX_CONSECUTIVE_THINK_ONLY) {
|
|
567239
|
-
const recentSuccesses = this.
|
|
567220
|
+
const recentSuccesses = this._adversaryToolOutcomes.slice(-3).filter((o2) => o2.succeeded);
|
|
567240
567221
|
const hasRecentSuccess = recentSuccesses.length > 0;
|
|
567241
567222
|
const successHint = hasRecentSuccess ? `
|
|
567242
567223
|
|
|
@@ -567487,7 +567468,8 @@ ${this.options.maxTurns && this.options.maxTurns > 0 ? `You have ${this.options.
|
|
|
567487
567468
|
tools: toolDefs,
|
|
567488
567469
|
temperature: this.options.temperature,
|
|
567489
567470
|
maxTokens: this.options.maxTokens,
|
|
567490
|
-
timeoutMs: this.options.requestTimeoutMs
|
|
567471
|
+
timeoutMs: this.options.requestTimeoutMs,
|
|
567472
|
+
numCtx: this.options.contextWindowSize || void 0
|
|
567491
567473
|
};
|
|
567492
567474
|
let response;
|
|
567493
567475
|
try {
|
|
@@ -567797,8 +567779,7 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
|
|
|
567797
567779
|
}
|
|
567798
567780
|
const _bp4 = await this._runBackwardPassReview(turn);
|
|
567799
567781
|
if (_bp4 && !_bp4.proceed && _bp4.feedback) {
|
|
567800
|
-
|
|
567801
|
-
continue;
|
|
567782
|
+
emitBackwardPassAdvisory(_bp4.feedback, turn);
|
|
567802
567783
|
}
|
|
567803
567784
|
completed = true;
|
|
567804
567785
|
summary = extractTaskCompleteSummary(tc.arguments);
|
|
@@ -567871,7 +567852,7 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
|
|
|
567871
567852
|
}
|
|
567872
567853
|
if (isThinkOnlyBF) {
|
|
567873
567854
|
if (consecutiveThinkOnly >= MAX_CONSECUTIVE_THINK_ONLY) {
|
|
567874
|
-
const recentSucc = this.
|
|
567855
|
+
const recentSucc = this._adversaryToolOutcomes.slice(-3).filter((o2) => o2.succeeded);
|
|
567875
567856
|
const succHint = recentSucc.length > 0 ? "\n\nYour most recent tool calls SUCCEEDED. If the task is complete, call task_complete now with a summary." : "";
|
|
567876
567857
|
messages2.push({
|
|
567877
567858
|
role: "user",
|
|
@@ -569959,36 +569940,35 @@ ${newerSummary}`;
|
|
|
569959
569940
|
${trimmedNew}`;
|
|
569960
569941
|
}
|
|
569961
569942
|
// -------------------------------------------------------------------------
|
|
569962
|
-
//
|
|
569943
|
+
// Adversary — parallel meta-analysis of the main loop
|
|
569963
569944
|
// -------------------------------------------------------------------------
|
|
569964
|
-
// Inspired by Hannover's fireCompanionObserver (src/buddy/observer.ts).
|
|
569965
569945
|
// Runs after each tool turn to detect when the model has lost track of
|
|
569966
569946
|
// what happened and inject corrections before the next inference.
|
|
569967
569947
|
//
|
|
569968
569948
|
// This is the architectural fix for the "I see both tools have been failing"
|
|
569969
569949
|
// regression: instead of only fixing the data the model sees (mask/summary),
|
|
569970
569950
|
// we add a second analysis path that catches mismatches in real-time.
|
|
569971
|
-
/** Track recent tool outcomes for the
|
|
569972
|
-
|
|
569973
|
-
/** WO-FIX-C: Tool fingerprints the
|
|
569974
|
-
* Checked in executeSingle to
|
|
569975
|
-
|
|
569951
|
+
/** Track recent tool outcomes for the adversary */
|
|
569952
|
+
_adversaryToolOutcomes = [];
|
|
569953
|
+
/** WO-FIX-C: Tool fingerprints the adversary has flagged as redundant.
|
|
569954
|
+
* Checked in executeSingle to attach advisory guidance before dispatch. */
|
|
569955
|
+
_adversaryRedundantSignals = /* @__PURE__ */ new Set();
|
|
569976
569956
|
/** Reflexion pattern: task-local failure-indexed reflection buffer.
|
|
569977
569957
|
* Generates typed self-reflections on task failure and injects them
|
|
569978
569958
|
* into the next attempt's context for active learning. */
|
|
569979
569959
|
_reflectionBuffer = null;
|
|
569980
569960
|
/**
|
|
569981
|
-
*
|
|
569961
|
+
* Adversary: post-turn meta-analysis.
|
|
569982
569962
|
*
|
|
569983
569963
|
* Examines the last few messages looking for contradictions between
|
|
569984
569964
|
* actual tool outcomes and the model's stated understanding. When it
|
|
569985
569965
|
* detects the model claiming failure after success (or vice versa),
|
|
569986
|
-
* it injects a corrective
|
|
569966
|
+
* it injects a corrective non-blocking critique.
|
|
569987
569967
|
*
|
|
569988
569968
|
* Also detects repeated actions — when the model re-does something
|
|
569989
|
-
* that already succeeded, the
|
|
569969
|
+
* that already succeeded, the adversary nudges it to move on.
|
|
569990
569970
|
*/
|
|
569991
|
-
|
|
569971
|
+
adversaryObserve(messages2, turn) {
|
|
569992
569972
|
if (this.options.modelTier === "large")
|
|
569993
569973
|
return;
|
|
569994
569974
|
const recent = messages2.slice(-6);
|
|
@@ -570017,8 +569997,8 @@ ${trimmedNew}`;
|
|
|
570017
569997
|
}
|
|
570018
569998
|
const argsKey = toolArgs ? this._buildExactArgsKey(toolArgs) : void 0;
|
|
570019
569999
|
const fingerprint = toolArgs ? this._buildToolFingerprint(toolName, toolArgs) : void 0;
|
|
570020
|
-
if (!this.
|
|
570021
|
-
this.
|
|
570000
|
+
if (!this._adversaryToolOutcomes.some((o2) => o2.turn === turn && o2.tool === toolName && o2.fingerprint === fingerprint)) {
|
|
570001
|
+
this._adversaryToolOutcomes.push({
|
|
570022
570002
|
turn,
|
|
570023
570003
|
tool: toolName,
|
|
570024
570004
|
argsKey,
|
|
@@ -570029,27 +570009,47 @@ ${trimmedNew}`;
|
|
|
570029
570009
|
}
|
|
570030
570010
|
}
|
|
570031
570011
|
}
|
|
570032
|
-
while (this.
|
|
570033
|
-
this.
|
|
570012
|
+
while (this._adversaryToolOutcomes.length > 20)
|
|
570013
|
+
this._adversaryToolOutcomes.shift();
|
|
570034
570014
|
const emitReaction = (cls, shortText, confidence2, details2) => {
|
|
570035
570015
|
this.emit({
|
|
570036
|
-
type: "
|
|
570016
|
+
type: "adversary_reaction",
|
|
570037
570017
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
570038
|
-
|
|
570018
|
+
adversary: { class: cls, shortText, confidence: confidence2, details: details2 }
|
|
570039
570019
|
});
|
|
570040
570020
|
};
|
|
570021
|
+
const buildAdversaryCritique = (input) => {
|
|
570022
|
+
const alternatives = input.alternatives && input.alternatives.length > 0 ? `
|
|
570023
|
+
Alternatives:
|
|
570024
|
+
${input.alternatives.map((item) => `- ${item}`).join("\n")}` : "";
|
|
570025
|
+
return [
|
|
570026
|
+
`[ADVERSARY CRITIQUE — non-blocking]`,
|
|
570027
|
+
`Evidence: ${input.evidence}`,
|
|
570028
|
+
`Root cause hypothesis: ${input.hypothesis}`,
|
|
570029
|
+
`Corrective action: ${input.correctiveAction}${alternatives}`
|
|
570030
|
+
].join("\n");
|
|
570031
|
+
};
|
|
570041
570032
|
const lastAssistant = [...recent].reverse().find((m2) => m2.role === "assistant" && typeof m2.content === "string");
|
|
570042
570033
|
if (lastAssistant && typeof lastAssistant.content === "string") {
|
|
570043
570034
|
const text = lastAssistant.content.toLowerCase();
|
|
570044
570035
|
const claimsFailure = /(?:fail|error|didn't work|not working|unable to|cannot|couldn't|both .* fail|tools? (?:have |has )?been fail)/i.test(text);
|
|
570045
570036
|
if (claimsFailure) {
|
|
570046
|
-
const recentOutcomes = this.
|
|
570037
|
+
const recentOutcomes = this._adversaryToolOutcomes.slice(-4);
|
|
570047
570038
|
const successes = recentOutcomes.filter((o2) => o2.succeeded);
|
|
570048
570039
|
if (successes.length >= 1) {
|
|
570049
570040
|
const successList = successes.map((o2) => `${o2.tool}: ${o2.preview.slice(0, 60)}`).join("; ");
|
|
570050
570041
|
emitReaction("false_failure", `Claimed failure, but recent tools succeeded (${successes.length})`, 0.9, successList);
|
|
570051
|
-
if (this.
|
|
570052
|
-
this.pendingUserMessages.push(
|
|
570042
|
+
if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
|
|
570043
|
+
this.pendingUserMessages.push(buildAdversaryCritique({
|
|
570044
|
+
evidence: `Recent tools succeeded: ${successList}.`,
|
|
570045
|
+
hypothesis: "The main loop is interpreting uncertainty or partial progress as failure and may be about to discard usable evidence.",
|
|
570046
|
+
correctiveAction: "Use the successful results to advance the task, then verify the next concrete step.",
|
|
570047
|
+
alternatives: [
|
|
570048
|
+
"Edit or run the next verification step that follows from the successful output.",
|
|
570049
|
+
"Read a different targeted file if the successful result exposed a new path or symbol.",
|
|
570050
|
+
"Complete only if the successful output is sufficient evidence for the user's request."
|
|
570051
|
+
]
|
|
570052
|
+
}));
|
|
570053
570053
|
}
|
|
570054
570054
|
this.emit({
|
|
570055
570055
|
type: "status",
|
|
@@ -570063,47 +570063,67 @@ ${trimmedNew}`;
|
|
|
570063
570063
|
const text = lastAssistant.content.toLowerCase();
|
|
570064
570064
|
const claimsSuccess = /(done|fixed|success|passed|complete)/i.test(text);
|
|
570065
570065
|
if (claimsSuccess) {
|
|
570066
|
-
const recentOutcomes = this.
|
|
570066
|
+
const recentOutcomes = this._adversaryToolOutcomes.slice(-4);
|
|
570067
570067
|
const failures = recentOutcomes.filter((o2) => !o2.succeeded);
|
|
570068
570068
|
const successes = recentOutcomes.filter((o2) => o2.succeeded);
|
|
570069
570069
|
if (failures.length > 0 && successes.length === 0) {
|
|
570070
570070
|
const failList = failures.map((o2) => `${o2.tool}: ${o2.preview.slice(0, 60)}`).join("; ");
|
|
570071
570071
|
emitReaction("false_success", `Claimed success, but recent tools failed (${failures.length})`, 0.9, failList);
|
|
570072
|
-
if (this.
|
|
570073
|
-
this.pendingUserMessages.push(
|
|
570072
|
+
if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
|
|
570073
|
+
this.pendingUserMessages.push(buildAdversaryCritique({
|
|
570074
|
+
evidence: `Recent tools show errors (${failures.length}): ${failList}.`,
|
|
570075
|
+
hypothesis: "The main loop is prematurely compressing intent into success language before the verifier produced evidence.",
|
|
570076
|
+
correctiveAction: "Inspect the failed output, identify the implicated path/symbol/command, and run one focused corrective step before claiming success.",
|
|
570077
|
+
alternatives: [
|
|
570078
|
+
"Read the smallest relevant source region around the failed symbol.",
|
|
570079
|
+
"Patch the implicated code or configuration.",
|
|
570080
|
+
"Run the same verifier only after a state-changing fix."
|
|
570081
|
+
]
|
|
570082
|
+
}));
|
|
570074
570083
|
}
|
|
570075
570084
|
}
|
|
570076
570085
|
}
|
|
570077
570086
|
}
|
|
570078
|
-
|
|
570079
|
-
|
|
570080
|
-
const
|
|
570081
|
-
|
|
570082
|
-
|
|
570083
|
-
|
|
570084
|
-
|
|
570085
|
-
|
|
570086
|
-
|
|
570087
|
-
|
|
570088
|
-
|
|
570089
|
-
|
|
570090
|
-
|
|
570091
|
-
|
|
570092
|
-
|
|
570093
|
-
|
|
570094
|
-
|
|
570095
|
-
this.
|
|
570087
|
+
if (this.options.disableAdversaryCritic !== true) {
|
|
570088
|
+
const lastToolCalls = recent.filter((m2) => m2.role === "assistant" && m2.tool_calls?.length).flatMap((m2) => m2.tool_calls ?? []);
|
|
570089
|
+
for (const tc of lastToolCalls) {
|
|
570090
|
+
const name10 = tc.function.name;
|
|
570091
|
+
if (this._isStatefulBrowserTool(name10))
|
|
570092
|
+
continue;
|
|
570093
|
+
let args = {};
|
|
570094
|
+
try {
|
|
570095
|
+
args = JSON.parse(tc.function.arguments);
|
|
570096
|
+
} catch {
|
|
570097
|
+
}
|
|
570098
|
+
const argsKey = this._buildExactArgsKey(args);
|
|
570099
|
+
const fingerprint = this._buildToolFingerprint(name10, args);
|
|
570100
|
+
const prior = this._adversaryToolOutcomes.find((o2) => o2.succeeded && o2.tool === name10 && o2.fingerprint === fingerprint && o2.turn < turn);
|
|
570101
|
+
if (prior) {
|
|
570102
|
+
this._adversaryRedundantSignals.add(fingerprint);
|
|
570103
|
+
emitReaction("redundant_action", `Already ran ${name10} successfully on turn ${prior.turn}`, 0.8, prior.preview);
|
|
570104
|
+
if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
|
|
570105
|
+
this.pendingUserMessages.push(buildAdversaryCritique({
|
|
570106
|
+
evidence: `${name10} already succeeded on turn ${prior.turn} with exact arguments (${argsKey.slice(0, 120)}). Prior preview: ${prior.preview}`,
|
|
570107
|
+
hypothesis: "The main loop may have lost track of previously observed evidence because of context pressure, path confusion, or repeated discovery.",
|
|
570108
|
+
correctiveAction: "Let this duplicate run execute if needed, but treat the prior result as evidence and pivot afterward unless state has changed.",
|
|
570109
|
+
alternatives: [
|
|
570110
|
+
"Use the prior result to edit/write, verify, or finish with evidence.",
|
|
570111
|
+
"Read a different specific file or selector if the current evidence is insufficient.",
|
|
570112
|
+
"Repeat exact arguments only when filesystem, browser, or page state changed."
|
|
570113
|
+
]
|
|
570114
|
+
}));
|
|
570115
|
+
}
|
|
570116
|
+
this.emit({
|
|
570117
|
+
type: "status",
|
|
570118
|
+
content: `\x1B[38;5;178m⚠ Adversary noted redundant ${name10} call (succeeded on turn ${prior.turn}); action remains allowed\x1B[0m`,
|
|
570119
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
570120
|
+
});
|
|
570121
|
+
break;
|
|
570096
570122
|
}
|
|
570097
|
-
this.emit({
|
|
570098
|
-
type: "status",
|
|
570099
|
-
content: `\x1B[38;5;178m⚠ Prevented redundant ${name10} call (succeeded on turn ${prior.turn})\x1B[0m`,
|
|
570100
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
570101
|
-
});
|
|
570102
|
-
break;
|
|
570103
570123
|
}
|
|
570104
570124
|
}
|
|
570105
570125
|
{
|
|
570106
|
-
const recentCalls = this.
|
|
570126
|
+
const recentCalls = this._adversaryToolOutcomes.slice(-5);
|
|
570107
570127
|
if (recentCalls.length >= 3) {
|
|
570108
570128
|
let consecutiveShortResults = 0;
|
|
570109
570129
|
for (let i2 = recentCalls.length - 1; i2 >= 0; i2--) {
|
|
@@ -570116,30 +570136,39 @@ ${trimmedNew}`;
|
|
|
570116
570136
|
}
|
|
570117
570137
|
if (consecutiveShortResults >= 3) {
|
|
570118
570138
|
emitReaction("idle_think", `Consecutive output without input: ${consecutiveShortResults}`, 0.7);
|
|
570119
|
-
if (this.
|
|
570120
|
-
this.pendingUserMessages.push(
|
|
570139
|
+
if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
|
|
570140
|
+
this.pendingUserMessages.push(buildAdversaryCritique({
|
|
570141
|
+
evidence: `${consecutiveShortResults} consecutive output-like calls occurred without an input-like observation.`,
|
|
570142
|
+
hypothesis: "The loop may be acting from stale state instead of re-observing the environment.",
|
|
570143
|
+
correctiveAction: "Take one input/observation step before another output step.",
|
|
570144
|
+
alternatives: [
|
|
570145
|
+
"Call the input/listen/poll tool for the current environment.",
|
|
570146
|
+
"Read the current UI/page state before clicking or typing again.",
|
|
570147
|
+
"If the task is already complete, finish with the concrete evidence already observed."
|
|
570148
|
+
]
|
|
570149
|
+
}));
|
|
570121
570150
|
}
|
|
570122
570151
|
this.emit({
|
|
570123
570152
|
type: "status",
|
|
570124
|
-
content: `\x1B[38;5;178m⚠
|
|
570153
|
+
content: `\x1B[38;5;178m⚠ Adversary flagged runaway-output risk (${consecutiveShortResults} consecutive sends without receive); action remains allowed\x1B[0m`,
|
|
570125
570154
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
570126
570155
|
});
|
|
570127
570156
|
}
|
|
570128
570157
|
}
|
|
570129
570158
|
}
|
|
570130
|
-
const succCount = this.
|
|
570131
|
-
const failCount = this.
|
|
570132
|
-
const lastFour = this.
|
|
570159
|
+
const succCount = this._adversaryToolOutcomes.filter((o2) => o2.succeeded).length;
|
|
570160
|
+
const failCount = this._adversaryToolOutcomes.filter((o2) => !o2.succeeded).length;
|
|
570161
|
+
const lastFour = this._adversaryToolOutcomes.slice(-4);
|
|
570133
570162
|
const details = [
|
|
570134
570163
|
`Recent tool outcomes:`,
|
|
570135
570164
|
...lastFour.map((o2) => `- ${o2.tool}: ${o2.succeeded ? "OK" : "ERR"} — ${o2.preview}`)
|
|
570136
570165
|
].join("\n");
|
|
570137
570166
|
this.emit({
|
|
570138
|
-
type: "
|
|
570167
|
+
type: "debug_adversary",
|
|
570139
570168
|
turn,
|
|
570140
570169
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
570141
|
-
content: `
|
|
570142
|
-
|
|
570170
|
+
content: `Adversary: ${this._adversaryToolOutcomes.length} tracked outcomes (${succCount} ok, ${failCount} err)`,
|
|
570171
|
+
adversaryAction: {
|
|
570143
570172
|
detection: "none",
|
|
570144
570173
|
recentSuccesses: succCount,
|
|
570145
570174
|
recentFailures: failCount,
|
|
@@ -651081,7 +651110,7 @@ ${conversationStream}`
|
|
|
651081
651110
|
// off default rather than the global config's value.
|
|
651082
651111
|
thinking: false,
|
|
651083
651112
|
// Telegram sub-agent runs must be bounded. Brute-force re-engagement and
|
|
651084
|
-
// the
|
|
651113
|
+
// the Adversary near-cap turn extension are appropriate for the full TUI
|
|
651085
651114
|
// session but cause Telegram to silently outgrow its nominal maxTurns,
|
|
651086
651115
|
// which is how the Snow Crash PDF loop reached 60+ turns of self-talk.
|
|
651087
651116
|
...TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS
|
|
@@ -683444,8 +683473,8 @@ ${entry.fullContent}`
|
|
|
683444
683473
|
let streamTextBuffer = "";
|
|
683445
683474
|
let lastAssistantText = "";
|
|
683446
683475
|
let lastProvenancePath = null;
|
|
683447
|
-
let
|
|
683448
|
-
const
|
|
683476
|
+
let showAdversary = false;
|
|
683477
|
+
const adversaryBuffer = [];
|
|
683449
683478
|
const contentWrite = (fn) => {
|
|
683450
683479
|
if (isNeovimActive()) {
|
|
683451
683480
|
const origWrite = process.stdout.write;
|
|
@@ -683929,24 +683958,24 @@ ${entry.fullContent}`
|
|
|
683929
683958
|
if (snap) {
|
|
683930
683959
|
contentWrite(
|
|
683931
683960
|
() => renderInfo(
|
|
683932
|
-
`\x1B[38;5;243m[ctx] ${snap.messageCount} msgs | ~${snap.estimatedTokens} tok | headroom: ${snap.headroom} | tools: ${snap.toolCallCount} |
|
|
683961
|
+
`\x1B[38;5;243m[ctx] ${snap.messageCount} msgs | ~${snap.estimatedTokens} tok | headroom: ${snap.headroom} | tools: ${snap.toolCallCount} | adversary: ${snap.adversaryOutcomes} tracked\x1B[0m`
|
|
683933
683962
|
)
|
|
683934
683963
|
);
|
|
683935
683964
|
}
|
|
683936
683965
|
}
|
|
683937
683966
|
break;
|
|
683938
|
-
case "
|
|
683939
|
-
if (event.
|
|
683940
|
-
const lm = event.
|
|
683967
|
+
case "debug_adversary":
|
|
683968
|
+
if (event.adversaryAction) {
|
|
683969
|
+
const lm = event.adversaryAction;
|
|
683941
683970
|
if (lm.intervention) {
|
|
683942
683971
|
const simple = `⚠ ${lm.intervention}`;
|
|
683943
683972
|
contentWrite(() => renderInfo(simple));
|
|
683944
683973
|
}
|
|
683945
683974
|
if (lm.details) {
|
|
683946
|
-
|
|
683947
|
-
if (
|
|
683948
|
-
|
|
683949
|
-
if (
|
|
683975
|
+
adversaryBuffer.push(lm.details);
|
|
683976
|
+
if (adversaryBuffer.length > 50)
|
|
683977
|
+
adversaryBuffer.splice(0, adversaryBuffer.length - 50);
|
|
683978
|
+
if (showAdversary) {
|
|
683950
683979
|
const det = String(lm.details);
|
|
683951
683980
|
contentWrite(() => {
|
|
683952
683981
|
process.stdout.write(c3.dim(det) + "\n");
|
|
@@ -685688,8 +685717,8 @@ This is an independent background session started from /background.`
|
|
|
685688
685717
|
origTtyWriteRef = null;
|
|
685689
685718
|
statusBar.setNeovimFocusChecker(() => isNeovimFocused());
|
|
685690
685719
|
let _escapeHandler = null;
|
|
685691
|
-
let
|
|
685692
|
-
const
|
|
685720
|
+
let showAdversary = false;
|
|
685721
|
+
const adversaryBuffer = [];
|
|
685693
685722
|
statusBar.hookDirectInput(
|
|
685694
685723
|
rl,
|
|
685695
685724
|
() => {
|
|
@@ -685722,26 +685751,26 @@ This is an independent background session started from /background.`
|
|
|
685722
685751
|
}
|
|
685723
685752
|
},
|
|
685724
685753
|
() => {
|
|
685725
|
-
|
|
685754
|
+
showAdversary = !showAdversary;
|
|
685726
685755
|
if (statusBar.isActive) {
|
|
685727
685756
|
try {
|
|
685728
685757
|
statusBar.jumpToLive();
|
|
685729
685758
|
} catch {
|
|
685730
685759
|
}
|
|
685731
685760
|
statusBar.beginContentWrite();
|
|
685732
|
-
if (
|
|
685733
|
-
renderInfo("
|
|
685734
|
-
const dump =
|
|
685761
|
+
if (showAdversary) {
|
|
685762
|
+
renderInfo("Adversary details: shown");
|
|
685763
|
+
const dump = adversaryBuffer.slice(-10).join("\n");
|
|
685735
685764
|
if (dump.trim()) {
|
|
685736
685765
|
process.stdout.write(`
|
|
685737
|
-
${c3.dim("[
|
|
685766
|
+
${c3.dim("[adversary recap]")}
|
|
685738
685767
|
`);
|
|
685739
685768
|
for (const line of dump.split("\n")) {
|
|
685740
685769
|
process.stdout.write(" " + c3.dim(line) + "\n");
|
|
685741
685770
|
}
|
|
685742
685771
|
}
|
|
685743
685772
|
} else {
|
|
685744
|
-
renderInfo("
|
|
685773
|
+
renderInfo("Adversary details: hidden");
|
|
685745
685774
|
}
|
|
685746
685775
|
statusBar.endContentWrite();
|
|
685747
685776
|
}
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.213",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.213",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED