omnius 1.0.212 → 1.0.214
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +541 -360
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -4896,6 +4896,29 @@ var init_shell = __esm({
|
|
|
4896
4896
|
hasSudoPassword() {
|
|
4897
4897
|
return this._sudoPassword !== null;
|
|
4898
4898
|
}
|
|
4899
|
+
isConcurrencySafe(args) {
|
|
4900
|
+
return this.isReadOnly(args);
|
|
4901
|
+
}
|
|
4902
|
+
isReadOnly(args) {
|
|
4903
|
+
const command = String(args["command"] ?? "").trim();
|
|
4904
|
+
if (!command || args["stdin"] !== void 0)
|
|
4905
|
+
return false;
|
|
4906
|
+
const normalized = command.replace(/\s+/g, " ");
|
|
4907
|
+
if (/\bcd\b/.test(normalized))
|
|
4908
|
+
return false;
|
|
4909
|
+
if (/[<>]|\|\s*(?:tee|xargs)\b|\btee\b/.test(normalized))
|
|
4910
|
+
return false;
|
|
4911
|
+
if (/\b(?:sudo|su|rm|mv|cp|touch|mkdir|rmdir|chmod|chown|ln|truncate|dd|patch|apply_patch)\b/.test(normalized)) {
|
|
4912
|
+
return false;
|
|
4913
|
+
}
|
|
4914
|
+
if (/\bgit\s+(?:add|commit|checkout|switch|reset|clean|push|pull|merge|rebase|apply|am|stash|tag)\b/.test(normalized)) {
|
|
4915
|
+
return false;
|
|
4916
|
+
}
|
|
4917
|
+
if (/\b(?:npm|pnpm|yarn|bun)\s+(?:install|i|ci|add|remove|run|test|build)\b/.test(normalized)) {
|
|
4918
|
+
return false;
|
|
4919
|
+
}
|
|
4920
|
+
return /^(?:pwd|ls\b|find\b|rg\b|grep\b|cat\b|head\b|tail\b|wc\b|sed\s+-n\b|awk\b|git\s+(?:status|diff|log|show|branch|rev-parse|ls-files|grep)\b|which\b|command\s+-v\b|node\s+--version\b|node\s+-v\b|python(?:3)?\s+--version\b)/.test(normalized);
|
|
4921
|
+
}
|
|
4899
4922
|
async execute(args) {
|
|
4900
4923
|
const start2 = performance.now();
|
|
4901
4924
|
const command = args["command"];
|
|
@@ -6112,6 +6135,7 @@ var init_file_read = __esm({
|
|
|
6112
6135
|
];
|
|
6113
6136
|
FileReadTool = class {
|
|
6114
6137
|
name = "file_read";
|
|
6138
|
+
aliases = ["read_file", "read", "cat"];
|
|
6115
6139
|
description = "Read the contents of a file. For large files (200+ lines), returns a structural preview with signatures — use offset/limit to read specific sections.";
|
|
6116
6140
|
parameters = {
|
|
6117
6141
|
type: "object",
|
|
@@ -6127,6 +6151,12 @@ var init_file_read = __esm({
|
|
|
6127
6151
|
constructor(workingDir) {
|
|
6128
6152
|
this.workingDir = workingDir;
|
|
6129
6153
|
}
|
|
6154
|
+
isConcurrencySafe() {
|
|
6155
|
+
return true;
|
|
6156
|
+
}
|
|
6157
|
+
isReadOnly() {
|
|
6158
|
+
return true;
|
|
6159
|
+
}
|
|
6130
6160
|
/** Set actual context window size for proportional auto-windowing */
|
|
6131
6161
|
setContextWindowSize(size) {
|
|
6132
6162
|
this._contextWindowSize = size;
|
|
@@ -6662,6 +6692,7 @@ var init_grep_search = __esm({
|
|
|
6662
6692
|
MAX_OUTPUT_LINES = 100;
|
|
6663
6693
|
GrepSearchTool = class {
|
|
6664
6694
|
name = "grep_search";
|
|
6695
|
+
aliases = ["grep", "ripgrep", "search_text"];
|
|
6665
6696
|
description = "Search file contents using regex patterns. Returns matching lines with file paths and line numbers.";
|
|
6666
6697
|
parameters = {
|
|
6667
6698
|
type: "object",
|
|
@@ -6685,6 +6716,12 @@ var init_grep_search = __esm({
|
|
|
6685
6716
|
constructor(workingDir) {
|
|
6686
6717
|
this.workingDir = workingDir;
|
|
6687
6718
|
}
|
|
6719
|
+
isConcurrencySafe() {
|
|
6720
|
+
return true;
|
|
6721
|
+
}
|
|
6722
|
+
isReadOnly() {
|
|
6723
|
+
return true;
|
|
6724
|
+
}
|
|
6688
6725
|
async execute(args) {
|
|
6689
6726
|
const pattern = args["pattern"];
|
|
6690
6727
|
const searchPath = resolve4(this.workingDir, args["path"] ?? ".");
|
|
@@ -6767,6 +6804,7 @@ var init_glob_find = __esm({
|
|
|
6767
6804
|
MAX_RESULTS = 50;
|
|
6768
6805
|
GlobFindTool = class {
|
|
6769
6806
|
name = "find_files";
|
|
6807
|
+
aliases = ["glob", "find"];
|
|
6770
6808
|
description = "Find files matching a glob pattern. Returns list of matching file paths.";
|
|
6771
6809
|
parameters = {
|
|
6772
6810
|
type: "object",
|
|
@@ -6786,6 +6824,12 @@ var init_glob_find = __esm({
|
|
|
6786
6824
|
constructor(workingDir) {
|
|
6787
6825
|
this.workingDir = workingDir;
|
|
6788
6826
|
}
|
|
6827
|
+
isConcurrencySafe() {
|
|
6828
|
+
return true;
|
|
6829
|
+
}
|
|
6830
|
+
isReadOnly() {
|
|
6831
|
+
return true;
|
|
6832
|
+
}
|
|
6789
6833
|
async execute(args) {
|
|
6790
6834
|
const pattern = args["pattern"];
|
|
6791
6835
|
const searchPath = resolve5(this.workingDir, args["path"] ?? ".");
|
|
@@ -24373,6 +24417,7 @@ var init_list_directory = __esm({
|
|
|
24373
24417
|
MAX_ENTRIES = 100;
|
|
24374
24418
|
ListDirectoryTool = class {
|
|
24375
24419
|
name = "list_directory";
|
|
24420
|
+
aliases = ["ls", "dir"];
|
|
24376
24421
|
description = "List files and directories at a given path. Shows file sizes and types. Output includes full relative paths you can use directly in subsequent tool calls.";
|
|
24377
24422
|
parameters = {
|
|
24378
24423
|
type: "object",
|
|
@@ -24388,6 +24433,12 @@ var init_list_directory = __esm({
|
|
|
24388
24433
|
constructor(workingDir) {
|
|
24389
24434
|
this.workingDir = workingDir;
|
|
24390
24435
|
}
|
|
24436
|
+
isConcurrencySafe() {
|
|
24437
|
+
return true;
|
|
24438
|
+
}
|
|
24439
|
+
isReadOnly() {
|
|
24440
|
+
return true;
|
|
24441
|
+
}
|
|
24391
24442
|
async execute(args) {
|
|
24392
24443
|
const rawPath = args["path"];
|
|
24393
24444
|
const dirPath = typeof rawPath === "string" && rawPath.trim() ? rawPath : ".";
|
|
@@ -551582,28 +551633,38 @@ var init_personality = __esm({
|
|
|
551582
551633
|
});
|
|
551583
551634
|
|
|
551584
551635
|
// packages/orchestrator/dist/critic.js
|
|
551585
|
-
function
|
|
551636
|
+
function buildCriticGuidanceMessage(call, hits, opts = {}) {
|
|
551586
551637
|
const argPreview = JSON.stringify(call.args ?? {}).slice(0, 200);
|
|
551587
|
-
|
|
551588
|
-
|
|
551589
|
-
|
|
551590
|
-
|
|
551591
|
-
|
|
551592
|
-
|
|
551593
|
-
|
|
551638
|
+
const cached = opts.cachedResult ? `
|
|
551639
|
+
Prior evidence preview:
|
|
551640
|
+
${opts.cachedResult.slice(0, 700)}` : "";
|
|
551641
|
+
const source = opts.adversaryFlag ? "The adversary recognized this exact tool call as already observed earlier." : `This is exact repeat #${hits} for the same ${call.tool} arguments.`;
|
|
551642
|
+
return `[ADVERSARY GUIDANCE — non-blocking]
|
|
551643
|
+
Observation: ${source}
|
|
551644
|
+
Call: ${call.tool}(${argPreview})
|
|
551645
|
+
Root cause hypothesis: the run is losing track of already-observed evidence, usually after path confusion, compaction, or an over-broad discovery loop.
|
|
551646
|
+
Corrective action: let this call's result inform the next step once, then pivot to a concrete action.
|
|
551647
|
+
Suggested next actions: edit/write the implicated file, run verification, read a different specific file, or complete with evidence. Prefer not to repeat this exact call again unless the filesystem, browser, or page state changed.${cached}`;
|
|
551594
551648
|
}
|
|
551595
551649
|
function buildCachedResultEnvelope(result) {
|
|
551596
|
-
return `[
|
|
551650
|
+
return `[PRIOR RESULT — already observed by a prior identical call]
|
|
551597
551651
|
${result}`;
|
|
551598
551652
|
}
|
|
551599
551653
|
function evaluate2(inputs) {
|
|
551600
|
-
const { proposedCall, fingerprint, isReadLike, recentToolResults, dedupHitCount,
|
|
551601
|
-
if (
|
|
551654
|
+
const { proposedCall, fingerprint, isReadLike, recentToolResults, dedupHitCount, adversaryRedundantSignal } = inputs;
|
|
551655
|
+
if (adversaryRedundantSignal) {
|
|
551602
551656
|
const cached = recentToolResults.get(fingerprint);
|
|
551657
|
+
const cachedResult = cached ? buildCachedResultEnvelope(cached.result) : void 0;
|
|
551603
551658
|
return {
|
|
551604
|
-
decision: "
|
|
551605
|
-
reason: "
|
|
551606
|
-
|
|
551659
|
+
decision: "guidance",
|
|
551660
|
+
reason: "Adversary flagged this fingerprint as redundant",
|
|
551661
|
+
hitNumber: (dedupHitCount.get(fingerprint) ?? 0) + 1,
|
|
551662
|
+
guidanceMessage: buildCriticGuidanceMessage(proposedCall, (dedupHitCount.get(fingerprint) ?? 0) + 1, {
|
|
551663
|
+
cachedResult,
|
|
551664
|
+
adversaryFlag: true
|
|
551665
|
+
}),
|
|
551666
|
+
cachedResult,
|
|
551667
|
+
compacted: cached?.compacted
|
|
551607
551668
|
};
|
|
551608
551669
|
}
|
|
551609
551670
|
const cacheEligible = isReadLike || proposedCall.tool === "shell";
|
|
@@ -551611,24 +551672,16 @@ function evaluate2(inputs) {
|
|
|
551611
551672
|
const cached = recentToolResults.get(fingerprint);
|
|
551612
551673
|
if (cached !== void 0) {
|
|
551613
551674
|
const hits = (dedupHitCount.get(fingerprint) ?? 0) + 1;
|
|
551614
|
-
const threshold = proposedCall.tool === "shell" ? SHELL_THRESHOLD : FS_THRESHOLD;
|
|
551615
|
-
if (hits >= threshold) {
|
|
551616
|
-
return {
|
|
551617
|
-
decision: "force_progress_block",
|
|
551618
|
-
reason: `${proposedCall.tool} fingerprint hit count ${hits} >= ${threshold}`,
|
|
551619
|
-
hitNumber: hits,
|
|
551620
|
-
blockMessage: buildForceProgressBlockMessage(proposedCall, hits),
|
|
551621
|
-
cachedResult: buildCachedResultEnvelope(cached.result),
|
|
551622
|
-
compacted: cached.compacted
|
|
551623
|
-
};
|
|
551624
|
-
}
|
|
551625
551675
|
const cachedEnvelope = buildCachedResultEnvelope(cached.result);
|
|
551626
551676
|
return {
|
|
551627
|
-
decision: "
|
|
551628
|
-
reason: cached.compacted ? "post-compaction
|
|
551677
|
+
decision: "guidance",
|
|
551678
|
+
reason: cached.compacted ? "post-compaction duplicate evidence" : `duplicate call #${hits}`,
|
|
551629
551679
|
cachedResult: cachedEnvelope,
|
|
551630
551680
|
compacted: cached.compacted,
|
|
551631
|
-
hitNumber: hits
|
|
551681
|
+
hitNumber: hits,
|
|
551682
|
+
guidanceMessage: buildCriticGuidanceMessage(proposedCall, hits, {
|
|
551683
|
+
cachedResult: cachedEnvelope
|
|
551684
|
+
})
|
|
551632
551685
|
};
|
|
551633
551686
|
}
|
|
551634
551687
|
}
|
|
@@ -551680,12 +551733,9 @@ function isStagnant(signals, opts) {
|
|
|
551680
551733
|
return false;
|
|
551681
551734
|
return signals.completedDelta <= 0 && signals.filesDelta < filesDeltaMin && signals.failureSum >= failureThreshold && signals.variantCount >= variantThreshold;
|
|
551682
551735
|
}
|
|
551683
|
-
var SHELL_THRESHOLD, FS_THRESHOLD;
|
|
551684
551736
|
var init_critic = __esm({
|
|
551685
551737
|
"packages/orchestrator/dist/critic.js"() {
|
|
551686
551738
|
"use strict";
|
|
551687
|
-
SHELL_THRESHOLD = 2;
|
|
551688
|
-
FS_THRESHOLD = 3;
|
|
551689
551739
|
}
|
|
551690
551740
|
});
|
|
551691
551741
|
|
|
@@ -555234,7 +555284,8 @@ function partitionToolCalls(calls, readOnlyHints) {
|
|
|
555234
555284
|
const batches = [];
|
|
555235
555285
|
let currentConcurrent = [];
|
|
555236
555286
|
for (const call of calls) {
|
|
555237
|
-
|
|
555287
|
+
const safe = typeof call.concurrencySafe === "boolean" ? call.concurrencySafe : isConcurrencySafe(call.name, readOnlyHints);
|
|
555288
|
+
if (safe) {
|
|
555238
555289
|
currentConcurrent.push(call);
|
|
555239
555290
|
} else {
|
|
555240
555291
|
if (currentConcurrent.length > 0) {
|
|
@@ -555892,13 +555943,18 @@ var init_streaming_executor = __esm({
|
|
|
555892
555943
|
executeFn = null;
|
|
555893
555944
|
constructor(config) {
|
|
555894
555945
|
this.config = {
|
|
555895
|
-
maxConcurrent: config?.maxConcurrent ?? 5
|
|
555946
|
+
maxConcurrent: config?.maxConcurrent ?? 5,
|
|
555947
|
+
concurrencyResolver: config?.concurrencyResolver
|
|
555896
555948
|
};
|
|
555897
555949
|
}
|
|
555898
555950
|
/** Set the tool execution function */
|
|
555899
555951
|
setExecutor(fn) {
|
|
555900
555952
|
this.executeFn = fn;
|
|
555901
555953
|
}
|
|
555954
|
+
/** Update the parsed-input concurrency classifier. */
|
|
555955
|
+
setConcurrencyResolver(fn) {
|
|
555956
|
+
this.config.concurrencyResolver = fn;
|
|
555957
|
+
}
|
|
555902
555958
|
/** Number of tools tracked */
|
|
555903
555959
|
get size() {
|
|
555904
555960
|
return this.tools.size;
|
|
@@ -555919,7 +555975,7 @@ var init_streaming_executor = __esm({
|
|
|
555919
555975
|
name: name10,
|
|
555920
555976
|
args: partialArgs ?? {},
|
|
555921
555977
|
state: "queued",
|
|
555922
|
-
concurrencySafe:
|
|
555978
|
+
concurrencySafe: this.resolveConcurrencySafe(name10, partialArgs ?? {}),
|
|
555923
555979
|
finalized: false,
|
|
555924
555980
|
queuedAt: Date.now()
|
|
555925
555981
|
});
|
|
@@ -555934,6 +555990,7 @@ var init_streaming_executor = __esm({
|
|
|
555934
555990
|
if (!entry)
|
|
555935
555991
|
return;
|
|
555936
555992
|
entry.args = args;
|
|
555993
|
+
entry.concurrencySafe = this.resolveConcurrencySafe(entry.name, args);
|
|
555937
555994
|
entry.finalized = true;
|
|
555938
555995
|
if (entry.state === "queued") {
|
|
555939
555996
|
this.processQueue();
|
|
@@ -556031,6 +556088,15 @@ var init_streaming_executor = __esm({
|
|
|
556031
556088
|
return true;
|
|
556032
556089
|
return false;
|
|
556033
556090
|
}
|
|
556091
|
+
resolveConcurrencySafe(name10, args) {
|
|
556092
|
+
try {
|
|
556093
|
+
const resolved = this.config.concurrencyResolver?.(name10, args);
|
|
556094
|
+
if (typeof resolved === "boolean")
|
|
556095
|
+
return resolved;
|
|
556096
|
+
} catch {
|
|
556097
|
+
}
|
|
556098
|
+
return isConcurrencySafe(name10);
|
|
556099
|
+
}
|
|
556034
556100
|
entryFingerprint(entry) {
|
|
556035
556101
|
return `${entry.name}:${stableValueKey(entry.args)}`;
|
|
556036
556102
|
}
|
|
@@ -558656,8 +558722,8 @@ var init_agenticRunner = __esm({
|
|
|
558656
558722
|
// WO-KG-15
|
|
558657
558723
|
_retrievalContextCache = null;
|
|
558658
558724
|
// WO-KG-15: cache per-run
|
|
558659
|
-
//
|
|
558660
|
-
|
|
558725
|
+
// Adversary world-model and cohort stats
|
|
558726
|
+
_adversaryMode = "both";
|
|
558661
558727
|
_worldFacts = { files: /* @__PURE__ */ new Map(), lastTest: {}, lastLists: /* @__PURE__ */ new Map() };
|
|
558662
558728
|
// REG-7-root: Track file writes since last todo_write call. When this
|
|
558663
558729
|
// counter climbs without a todo update, the agent has likely batched
|
|
@@ -559006,6 +559072,8 @@ var init_agenticRunner = __esm({
|
|
|
559006
559072
|
_sessionId = `session-${Date.now()}`;
|
|
559007
559073
|
_workingDirectory = "";
|
|
559008
559074
|
constructor(backend, options2) {
|
|
559075
|
+
const adversaryMode = options2?.adversaryMode ?? options2?.observerMode ?? "both";
|
|
559076
|
+
const disableAdversaryCritic = options2?.disableAdversaryCritic ?? options2?.disableStepCritic ?? false;
|
|
559009
559077
|
this.backend = backend;
|
|
559010
559078
|
this.options = {
|
|
559011
559079
|
maxTurns: options2?.maxTurns ?? 60,
|
|
@@ -559030,19 +559098,23 @@ var init_agenticRunner = __esm({
|
|
|
559030
559098
|
bruteForceMaxCycles: options2?.bruteForceMaxCycles ?? 100,
|
|
559031
559099
|
allowTurnExtension: options2?.allowTurnExtension ?? true,
|
|
559032
559100
|
completionProvenanceGuard: options2?.completionProvenanceGuard ?? true,
|
|
559101
|
+
disableAdversaryCritic,
|
|
559102
|
+
disableStepCritic: disableAdversaryCritic,
|
|
559033
559103
|
modelTier: options2?.modelTier ?? "large",
|
|
559034
559104
|
contextWindowSize: options2?.contextWindowSize ?? 0,
|
|
559035
559105
|
personality: options2?.personality ?? PERSONALITY_PRESETS.balanced,
|
|
559036
559106
|
personalityName: options2?.personalityName ?? "",
|
|
559037
559107
|
finalVarResolver: options2?.finalVarResolver ?? void 0,
|
|
559038
|
-
|
|
559108
|
+
adversaryMode,
|
|
559109
|
+
observerMode: adversaryMode,
|
|
559039
559110
|
// Phase 4 — sub-agent isolation flag (defaults false). When true, this
|
|
559040
559111
|
// runner skips cross-task handoff inheritance from the parent's
|
|
559041
559112
|
// session.
|
|
559042
559113
|
subAgent: options2?.subAgent ?? false,
|
|
559043
559114
|
skipCrossTaskHandoff: options2?.skipCrossTaskHandoff ?? false
|
|
559044
559115
|
};
|
|
559045
|
-
this.
|
|
559116
|
+
this._adversaryMode = this.options.adversaryMode;
|
|
559117
|
+
this._streamingExecutor.setConcurrencyResolver((name10, args) => this.resolveToolConcurrencySafe(name10, args));
|
|
559046
559118
|
}
|
|
559047
559119
|
/** Update context window size (e.g. after querying Ollama /api/show) */
|
|
559048
559120
|
setContextWindowSize(size) {
|
|
@@ -559050,7 +559122,10 @@ var init_agenticRunner = __esm({
|
|
|
559050
559122
|
}
|
|
559051
559123
|
/** Set the working directory for session checkpointing */
|
|
559052
559124
|
setWorkingDirectory(dir) {
|
|
559053
|
-
this._workingDirectory = dir;
|
|
559125
|
+
this._workingDirectory = _pathResolve(dir);
|
|
559126
|
+
}
|
|
559127
|
+
authoritativeWorkingDirectory() {
|
|
559128
|
+
return _pathResolve(this._workingDirectory || process.cwd());
|
|
559054
559129
|
}
|
|
559055
559130
|
/** State root for runner-owned memory/artifacts. Defaults to cwd/.omnius. */
|
|
559056
559131
|
omniusStateDir() {
|
|
@@ -559823,7 +559898,7 @@ ${result.output ?? ""}`;
|
|
|
559823
559898
|
* checklist via todo_write, and only then call task_complete.
|
|
559824
559899
|
*/
|
|
559825
559900
|
/**
|
|
559826
|
-
* REG-39c: tag a SYNTHETIC failure (FORCED PROGRESS BLOCK /
|
|
559901
|
+
* REG-39c: tag a SYNTHETIC failure (FORCED PROGRESS BLOCK / adversary
|
|
559827
559902
|
* block / budget exhausted). These paths return early from
|
|
559828
559903
|
* executeSingle BEFORE the main result-handling code, so the normal
|
|
559829
559904
|
* MAST tagging miss them. This helper lets each return-early site
|
|
@@ -561367,7 +561442,7 @@ ${latest.output || ""}`.trim();
|
|
|
561367
561442
|
}
|
|
561368
561443
|
}
|
|
561369
561444
|
const sections = [
|
|
561370
|
-
"[KNOWLEDGE — cached tool results already known to the runtime.
|
|
561445
|
+
"[KNOWLEDGE — cached tool results already known to the runtime. Repeating an exact read/list/search/shell call is a wasted action and will be blocked or served from cache:]"
|
|
561371
561446
|
];
|
|
561372
561447
|
if (compactedCount > 0) {
|
|
561373
561448
|
sections.push(`Compacted cached entries still count as already-known results (${compactedCount}); an exact repeat will be served from cache or skipped, not produce new information.`);
|
|
@@ -561379,6 +561454,7 @@ ${latest.output || ""}`.trim();
|
|
|
561379
561454
|
if (dirsListed.length > 0) {
|
|
561380
561455
|
const unique2 = [...new Set(dirsListed)].slice(0, 15);
|
|
561381
561456
|
sections.push(`Directories already listed (${unique2.length}): ${unique2.join(", ")}`);
|
|
561457
|
+
sections.push(`Do not call list_directory again on these exact directories unless you changed their contents. Use the listed child paths directly with file_read/edit/delegation.`);
|
|
561382
561458
|
}
|
|
561383
561459
|
if (searches.length > 0) {
|
|
561384
561460
|
const unique2 = [...new Set(searches)].slice(0, 15);
|
|
@@ -561392,6 +561468,23 @@ ${latest.output || ""}`.trim();
|
|
|
561392
561468
|
return null;
|
|
561393
561469
|
return sections.join("\n");
|
|
561394
561470
|
}
|
|
561471
|
+
_renderRuntimeRootBlock() {
|
|
561472
|
+
const authoritative = this.authoritativeWorkingDirectory();
|
|
561473
|
+
const proc = _pathResolve(process.cwd());
|
|
561474
|
+
const lines = [
|
|
561475
|
+
`[RUNTIME ROOT — authoritative]`,
|
|
561476
|
+
`Current working directory for this run: ${authoritative}`,
|
|
561477
|
+
`All relative file/tool paths resolve under this directory unless the tool call uses an absolute path.`,
|
|
561478
|
+
`Do not infer cwd from old tasks, shell transcripts, memory, or prior browser sessions.`
|
|
561479
|
+
];
|
|
561480
|
+
if (proc !== authoritative) {
|
|
561481
|
+
lines.push(`Process cwd differs (${proc}); treat the run cwd above as authoritative for repo/project work.`);
|
|
561482
|
+
}
|
|
561483
|
+
if (this._worldFacts.lastCwd && this._worldFacts.lastCwd !== authoritative) {
|
|
561484
|
+
lines.push(`Last shell cd target was command-local only: ${this._worldFacts.lastCwd}. It does not change the run cwd.`);
|
|
561485
|
+
}
|
|
561486
|
+
return lines.join("\n");
|
|
561487
|
+
}
|
|
561395
561488
|
_insertContextFrame(messages2, frame) {
|
|
561396
561489
|
if (!frame)
|
|
561397
561490
|
return;
|
|
@@ -561429,7 +561522,7 @@ ${latest.output || ""}`.trim();
|
|
|
561429
561522
|
add2(this._activeContextItem("task_state", "todo-state", "turn.todos", "Todo state", input.todoBlock, 80));
|
|
561430
561523
|
add2(this._activeContextItem("recent_failure", "recent-failures", "turn.failures", "Recent failures", input.failureBlock, 95));
|
|
561431
561524
|
add2(this._activeContextItem("recent_failure", "write-churn", "turn.churn", "Write churn", input.churnBlock, 75));
|
|
561432
|
-
add2(this._activeContextItem("tool_cache", "tool-cache", "turn.tool-cache", "Tool cache", input.toolCacheBlock,
|
|
561525
|
+
add2(this._activeContextItem("tool_cache", "tool-cache", "turn.tool-cache", "Tool cache", input.toolCacheBlock, 92));
|
|
561433
561526
|
add2(this._activeContextItem("anchor", "anchors", "turn.anchors", "Relevant anchors", input.anchorsBlock, 50));
|
|
561434
561527
|
add2(this._activeContextItem("environment", "environment", "turn.environment", "Environment", input.environmentBlock, 35));
|
|
561435
561528
|
if (this._lastPprMemoryLines.length > 0) {
|
|
@@ -561684,7 +561777,10 @@ ${chunk.content}`, {
|
|
|
561684
561777
|
async _buildTurnContextFrame(turn, messages2, recentToolResults, environmentBlock) {
|
|
561685
561778
|
this._contextLedger.clearSources("turn.");
|
|
561686
561779
|
this._contextLedger.prune(turn);
|
|
561687
|
-
const goalBlock =
|
|
561780
|
+
const goalBlock = [
|
|
561781
|
+
this._renderRuntimeRootBlock(),
|
|
561782
|
+
this._taskState.goal ? `Active task: ${this._taskState.goal}` : null
|
|
561783
|
+
].filter(Boolean).join("\n\n");
|
|
561688
561784
|
const filesystemBlock = this._renderFilesystemStateBlock(turn);
|
|
561689
561785
|
const todoBlock = this._renderTodoStateBlock(turn);
|
|
561690
561786
|
const failureBlock = this._renderRecentFailuresBlock(turn);
|
|
@@ -561750,7 +561846,7 @@ ${this._lastPprMemoryLines.slice(0, 5).join("\n")}` : null;
|
|
|
561750
561846
|
signalFromBlock("tool_cache", "turn.tool-cache", toolCacheBlock, {
|
|
561751
561847
|
id: "tool-cache",
|
|
561752
561848
|
dedupeKey: "turn.tool-cache",
|
|
561753
|
-
priority:
|
|
561849
|
+
priority: 92,
|
|
561754
561850
|
createdTurn: turn,
|
|
561755
561851
|
ttlTurns: 1
|
|
561756
561852
|
}),
|
|
@@ -562020,7 +562116,8 @@ ${blob}
|
|
|
562020
562116
|
return Object.entries(args ?? {}).sort(([a2], [b]) => a2.localeCompare(b)).map(([k, v]) => `${k}=${this._formatExactArgValue(v)}`).join(",");
|
|
562021
562117
|
}
|
|
562022
562118
|
_buildToolFingerprint(name10, args) {
|
|
562023
|
-
|
|
562119
|
+
const canonical = this.lookupRegisteredTool(name10)?.name ?? name10;
|
|
562120
|
+
return `${canonical}:${this._buildExactArgsKey(args)}`;
|
|
562024
562121
|
}
|
|
562025
562122
|
_dedupeToolCallsForResponse(toolCalls, turn) {
|
|
562026
562123
|
if (toolCalls.length <= 1)
|
|
@@ -562226,32 +562323,45 @@ ${blob}
|
|
|
562226
562323
|
}
|
|
562227
562324
|
/** Register a tool for the agent to use */
|
|
562228
562325
|
registerTool(tool) {
|
|
562229
|
-
if (!this.isToolAllowedByProfile(tool.name))
|
|
562326
|
+
if (!this.isToolAllowedByProfile(tool.name, tool.aliases))
|
|
562230
562327
|
return;
|
|
562231
562328
|
this.tools.set(tool.name, tool);
|
|
562232
562329
|
if (tool.name === "generate_image") {
|
|
562233
562330
|
this.maybeInstallImagePromptExpander(tool);
|
|
562234
562331
|
}
|
|
562235
562332
|
}
|
|
562236
|
-
|
|
562237
|
-
const profile = this.options.toolProfile;
|
|
562238
|
-
if (!profile)
|
|
562239
|
-
return true;
|
|
562333
|
+
toolNameVariants(name10) {
|
|
562240
562334
|
const raw = String(name10 ?? "").trim();
|
|
562335
|
+
if (!raw)
|
|
562336
|
+
return [];
|
|
562241
562337
|
const lastSegment = raw.split(/[.:/]/).filter(Boolean).pop() ?? raw;
|
|
562242
|
-
|
|
562338
|
+
return Array.from(new Set([
|
|
562243
562339
|
raw,
|
|
562244
562340
|
raw.toLowerCase(),
|
|
562341
|
+
raw.replace(/[-\s]+/g, "_"),
|
|
562245
562342
|
raw.replace(/^functions[._:-]/i, ""),
|
|
562246
562343
|
raw.replace(/^tools[._:-]/i, ""),
|
|
562247
562344
|
lastSegment,
|
|
562248
|
-
lastSegment.toLowerCase()
|
|
562249
|
-
|
|
562345
|
+
lastSegment.toLowerCase(),
|
|
562346
|
+
lastSegment.replace(/[-\s]+/g, "_")
|
|
562347
|
+
].filter(Boolean)));
|
|
562348
|
+
}
|
|
562349
|
+
isToolAllowedByProfile(name10, aliases) {
|
|
562350
|
+
const profile = this.options.toolProfile;
|
|
562351
|
+
if (!profile)
|
|
562352
|
+
return true;
|
|
562353
|
+
const candidates = /* @__PURE__ */ new Set();
|
|
562354
|
+
for (const value2 of [name10, ...aliases ?? []]) {
|
|
562355
|
+
for (const variant of this.toolNameVariants(value2)) {
|
|
562356
|
+
candidates.add(variant);
|
|
562357
|
+
}
|
|
562358
|
+
}
|
|
562250
562359
|
const allow = Array.isArray(profile.allow) ? profile.allow.filter(Boolean) : [];
|
|
562251
|
-
if (allow.length > 0)
|
|
562252
|
-
return allow.some((toolName) => candidates.has(
|
|
562360
|
+
if (allow.length > 0) {
|
|
562361
|
+
return allow.some((toolName) => this.toolNameVariants(toolName).some((candidate) => candidates.has(candidate)));
|
|
562362
|
+
}
|
|
562253
562363
|
const deny = Array.isArray(profile.deny) ? profile.deny.filter(Boolean) : [];
|
|
562254
|
-
return !deny.some((toolName) => candidates.has(
|
|
562364
|
+
return !deny.some((toolName) => this.toolNameVariants(toolName).some((candidate) => candidates.has(candidate)));
|
|
562255
562365
|
}
|
|
562256
562366
|
toolProfileDenial(name10) {
|
|
562257
562367
|
const profileName = this.options.toolProfile?.name || "active tool profile";
|
|
@@ -562329,20 +562439,14 @@ Rewrite it now for ${ctx3.model}.`;
|
|
|
562329
562439
|
const direct = this.tools.get(raw);
|
|
562330
562440
|
if (direct)
|
|
562331
562441
|
return { name: raw, tool: direct };
|
|
562332
|
-
const
|
|
562333
|
-
const candidates = /* @__PURE__ */ new Set([
|
|
562334
|
-
raw,
|
|
562335
|
-
raw.toLowerCase(),
|
|
562336
|
-
raw.replace(/[-\s]+/g, "_"),
|
|
562337
|
-
raw.replace(/^functions[._:-]/i, ""),
|
|
562338
|
-
raw.replace(/^tools[._:-]/i, ""),
|
|
562339
|
-
lastSegment,
|
|
562340
|
-
lastSegment.toLowerCase(),
|
|
562341
|
-
lastSegment.replace(/[-\s]+/g, "_")
|
|
562342
|
-
]);
|
|
562442
|
+
const candidates = new Set(this.toolNameVariants(raw));
|
|
562343
562443
|
const lowerIndex = /* @__PURE__ */ new Map();
|
|
562344
|
-
for (const registeredName of this.tools
|
|
562345
|
-
|
|
562444
|
+
for (const [registeredName, tool] of this.tools) {
|
|
562445
|
+
for (const value2 of [registeredName, ...tool.aliases ?? []]) {
|
|
562446
|
+
for (const variant of this.toolNameVariants(value2)) {
|
|
562447
|
+
lowerIndex.set(variant.toLowerCase(), registeredName);
|
|
562448
|
+
}
|
|
562449
|
+
}
|
|
562346
562450
|
}
|
|
562347
562451
|
for (const candidate of candidates) {
|
|
562348
562452
|
const exact = this.tools.get(candidate);
|
|
@@ -562354,8 +562458,47 @@ Rewrite it now for ${ctx3.model}.`;
|
|
|
562354
562458
|
}
|
|
562355
562459
|
return null;
|
|
562356
562460
|
}
|
|
562461
|
+
resolveToolConcurrencySafe(name10, args) {
|
|
562462
|
+
const resolved = this.lookupRegisteredTool(name10);
|
|
562463
|
+
const tool = resolved?.tool;
|
|
562464
|
+
try {
|
|
562465
|
+
if (typeof tool?.isConcurrencySafe === "function") {
|
|
562466
|
+
return !!tool.isConcurrencySafe(args);
|
|
562467
|
+
}
|
|
562468
|
+
if (typeof tool?.isReadOnly === "function") {
|
|
562469
|
+
return !!tool.isReadOnly(args);
|
|
562470
|
+
}
|
|
562471
|
+
} catch {
|
|
562472
|
+
}
|
|
562473
|
+
return isConcurrencySafe(resolved?.name ?? name10);
|
|
562474
|
+
}
|
|
562475
|
+
toolResultMaxSize(tool) {
|
|
562476
|
+
const max = tool?.maxResultSizeChars;
|
|
562477
|
+
return typeof max === "number" && Number.isFinite(max) && max > 0 ? max : void 0;
|
|
562478
|
+
}
|
|
562479
|
+
applyRegisteredToolResultTriage(result, toolName, tool) {
|
|
562480
|
+
return applyToolResultTriage(result, {
|
|
562481
|
+
workingDir: this._workingDirectory || process.cwd(),
|
|
562482
|
+
toolName,
|
|
562483
|
+
maxOutputSize: this.toolResultMaxSize(tool)
|
|
562484
|
+
});
|
|
562485
|
+
}
|
|
562486
|
+
async validateToolInput(tool, args, toolName) {
|
|
562487
|
+
if (typeof tool.validateInput !== "function")
|
|
562488
|
+
return null;
|
|
562489
|
+
const validation = await tool.validateInput(args, {
|
|
562490
|
+
toolName,
|
|
562491
|
+
workingDir: this._workingDirectory || process.cwd()
|
|
562492
|
+
});
|
|
562493
|
+
if (validation?.result === true)
|
|
562494
|
+
return null;
|
|
562495
|
+
if (validation?.result === false) {
|
|
562496
|
+
return validation.message || "custom validation failed";
|
|
562497
|
+
}
|
|
562498
|
+
return null;
|
|
562499
|
+
}
|
|
562357
562500
|
unknownToolError(name10) {
|
|
562358
|
-
const names = Array.from(this.tools.
|
|
562501
|
+
const names = Array.from(this.tools.values()).map((tool) => tool.aliases?.length ? `${tool.name} (aliases: ${tool.aliases.join("|")})` : tool.name).sort();
|
|
562359
562502
|
const preview = names.slice(0, 80).join(", ");
|
|
562360
562503
|
const suffix = names.length > 80 ? `, ... ${names.length - 80} more` : "";
|
|
562361
562504
|
return `Unknown tool: ${name10}. Registered tools (${names.length}): ${preview}${suffix}`;
|
|
@@ -562369,6 +562512,7 @@ Rewrite it now for ${ctx3.model}.`;
|
|
|
562369
562512
|
for (const t2 of this.tools.values()) {
|
|
562370
562513
|
list.push({
|
|
562371
562514
|
name: t2.name,
|
|
562515
|
+
...t2.aliases?.length ? { aliases: t2.aliases } : {},
|
|
562372
562516
|
description: t2.description,
|
|
562373
562517
|
parameters: t2.parameters
|
|
562374
562518
|
});
|
|
@@ -562380,10 +562524,10 @@ Rewrite it now for ${ctx3.model}.`;
|
|
|
562380
562524
|
* Validates against inputSchema if present and returns the tool result.
|
|
562381
562525
|
*/
|
|
562382
562526
|
async runToolByName(name10, args) {
|
|
562383
|
-
|
|
562527
|
+
const resolved = this.lookupRegisteredTool(name10);
|
|
562528
|
+
if (!this.isToolAllowedByProfile(resolved?.name ?? name10, resolved?.tool.aliases)) {
|
|
562384
562529
|
return this.toolProfileDenial(name10);
|
|
562385
562530
|
}
|
|
562386
|
-
const resolved = this.lookupRegisteredTool(name10);
|
|
562387
562531
|
if (!resolved) {
|
|
562388
562532
|
return { success: false, output: "", error: this.unknownToolError(name10) };
|
|
562389
562533
|
}
|
|
@@ -562399,12 +562543,17 @@ Rewrite it now for ${ctx3.model}.`;
|
|
|
562399
562543
|
error: `Invalid args for ${resolved.name}: ${e2?.message || String(e2)}`
|
|
562400
562544
|
};
|
|
562401
562545
|
}
|
|
562546
|
+
const validationError = await this.validateToolInput(tool, args, resolved.name);
|
|
562547
|
+
if (validationError) {
|
|
562548
|
+
return {
|
|
562549
|
+
success: false,
|
|
562550
|
+
output: "",
|
|
562551
|
+
error: `Invalid args for ${resolved.name}: ${validationError}. Check the parameter values and try again.`
|
|
562552
|
+
};
|
|
562553
|
+
}
|
|
562402
562554
|
try {
|
|
562403
562555
|
const result = await tool.execute(args);
|
|
562404
|
-
return
|
|
562405
|
-
workingDir: this._workingDirectory || process.cwd(),
|
|
562406
|
-
toolName: tool.name ?? "tool"
|
|
562407
|
-
});
|
|
562556
|
+
return this.applyRegisteredToolResultTriage(result, resolved.name, tool);
|
|
562408
562557
|
} catch (e2) {
|
|
562409
562558
|
return { success: false, output: "", error: e2?.message || String(e2) };
|
|
562410
562559
|
}
|
|
@@ -562602,8 +562751,8 @@ ${notice}`;
|
|
|
562602
562751
|
const window2 = recentToolCalls.slice(-repetitionWindow);
|
|
562603
562752
|
const uniqueKeys = new Set(window2.map((tc) => `${tc.name}:${tc.argsKey}`));
|
|
562604
562753
|
const ratio = 1 - uniqueKeys.size / window2.length;
|
|
562605
|
-
if (ratio > 0.4 && this.
|
|
562606
|
-
const recentOutcomes = this.
|
|
562754
|
+
if (ratio > 0.4 && this._adversaryToolOutcomes.length >= 3) {
|
|
562755
|
+
const recentOutcomes = this._adversaryToolOutcomes.slice(-6);
|
|
562607
562756
|
const uniquePreviews = new Set(recentOutcomes.map((o2) => o2.preview.slice(0, 40)));
|
|
562608
562757
|
if (uniquePreviews.size >= 3) {
|
|
562609
562758
|
return Math.max(0, ratio - 0.4);
|
|
@@ -562701,6 +562850,9 @@ Respond with your assessment, then take action.`;
|
|
|
562701
562850
|
this._lastActiveForgettingReport = null;
|
|
562702
562851
|
this._lastContextConsolidationTurn = -1e3;
|
|
562703
562852
|
this._contextFrameBuilder = new ContextFrameBuilder();
|
|
562853
|
+
if (!this._workingDirectory) {
|
|
562854
|
+
this._workingDirectory = _pathResolve(process.cwd());
|
|
562855
|
+
}
|
|
562704
562856
|
if (!this.options.disablePersistentMemory && !this._memoryInitialized) {
|
|
562705
562857
|
try {
|
|
562706
562858
|
const path12 = await import("node:path");
|
|
@@ -563134,10 +563286,10 @@ TASK: ${scrubbedTask}` : scrubbedTask;
|
|
|
563134
563286
|
this._hookDenyHintCount = 0;
|
|
563135
563287
|
this._selfConsistencyVotes = 0;
|
|
563136
563288
|
this._retrievalContextCache = null;
|
|
563137
|
-
this.
|
|
563289
|
+
this._adversaryMode = this.options.adversaryMode ?? "both";
|
|
563138
563290
|
this._worldFacts = { files: /* @__PURE__ */ new Map(), lastTest: {}, lastLists: /* @__PURE__ */ new Map() };
|
|
563139
563291
|
this._argCohorts.clear();
|
|
563140
|
-
this.
|
|
563292
|
+
this._adversaryRedundantSignals.clear();
|
|
563141
563293
|
this._lastTodoWriteTurn = -1;
|
|
563142
563294
|
this._lastTodoReminderTurn = -1;
|
|
563143
563295
|
let pendingConstraintWarnings = [];
|
|
@@ -563237,14 +563389,44 @@ TASK: ${scrubbedTask}` : scrubbedTask;
|
|
|
563237
563389
|
});
|
|
563238
563390
|
if (gate.proceed)
|
|
563239
563391
|
return false;
|
|
563240
|
-
messages2.push({
|
|
563392
|
+
messages2.push({
|
|
563393
|
+
role: "system",
|
|
563394
|
+
content: `${gate.feedback}
|
|
563395
|
+
|
|
563396
|
+
[ADVISORY ONLY] This critique does not block task_complete; use it to improve the next run or visible evidence if the task continues.`
|
|
563397
|
+
});
|
|
563241
563398
|
this.emit({
|
|
563242
563399
|
type: "status",
|
|
563243
|
-
content: `
|
|
563400
|
+
content: `completion provenance critique emitted without blocking: ${gate.reason}`,
|
|
563401
|
+
turn,
|
|
563402
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
563403
|
+
});
|
|
563404
|
+
this.emit({
|
|
563405
|
+
type: "adversary_reaction",
|
|
563406
|
+
adversary: {
|
|
563407
|
+
class: "guidance",
|
|
563408
|
+
shortText: "Completion provenance critique emitted",
|
|
563409
|
+
confidence: 0.9,
|
|
563410
|
+
details: gate.reason
|
|
563411
|
+
},
|
|
563412
|
+
turn,
|
|
563413
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
563414
|
+
});
|
|
563415
|
+
return false;
|
|
563416
|
+
};
|
|
563417
|
+
const emitBackwardPassAdvisory = (feedback, turn) => {
|
|
563418
|
+
messages2.push({
|
|
563419
|
+
role: "system",
|
|
563420
|
+
content: `${feedback}
|
|
563421
|
+
|
|
563422
|
+
[ADVISORY ONLY] Backward-pass critique is non-blocking; do not treat this as a tool failure or completion refusal.`
|
|
563423
|
+
});
|
|
563424
|
+
this.emit({
|
|
563425
|
+
type: "status",
|
|
563426
|
+
content: "backward-pass critique emitted without blocking completion",
|
|
563244
563427
|
turn,
|
|
563245
563428
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
563246
563429
|
});
|
|
563247
|
-
return true;
|
|
563248
563430
|
};
|
|
563249
563431
|
const turnCap = this.options.maxTurns && this.options.maxTurns > 0 ? this.options.maxTurns : Number.MAX_SAFE_INTEGER;
|
|
563250
563432
|
for (let turn = 0; turn < turnCap; turn++) {
|
|
@@ -564230,8 +564412,8 @@ ${_staleSamples.join("\n")}` : ``,
|
|
|
564230
564412
|
nextSelfEval = now + selfEvalInterval;
|
|
564231
564413
|
}
|
|
564232
564414
|
const turnsRemaining = this.options.maxTurns - turn;
|
|
564233
|
-
if (this.options.allowTurnExtension && turnsRemaining <= 3 && turnsRemaining > 0 && this.
|
|
564234
|
-
const recentOutcomes = this.
|
|
564415
|
+
if (this.options.allowTurnExtension && turnsRemaining <= 3 && turnsRemaining > 0 && this._adversaryToolOutcomes.length >= 2) {
|
|
564416
|
+
const recentOutcomes = this._adversaryToolOutcomes.slice(-6);
|
|
564235
564417
|
const recentSuccesses = recentOutcomes.filter((o2) => o2.succeeded).length;
|
|
564236
564418
|
const uniqueResults = new Set(recentOutcomes.map((o2) => o2.preview.slice(0, 40))).size;
|
|
564237
564419
|
const isActive = recentSuccesses >= 2 && uniqueResults >= 2;
|
|
@@ -564240,16 +564422,16 @@ ${_staleSamples.join("\n")}` : ``,
|
|
|
564240
564422
|
this.options.maxTurns += extension3;
|
|
564241
564423
|
this.emit({
|
|
564242
564424
|
type: "status",
|
|
564243
|
-
content: `
|
|
564425
|
+
content: `Adversary triage: activity detected (${recentSuccesses} recent successes, ${uniqueResults} unique results) — extending turn limit by ${extension3} (now ${this.options.maxTurns})`,
|
|
564244
564426
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
564245
564427
|
});
|
|
564246
564428
|
const detailsLines = recentOutcomes.map((o2) => `- ${o2.tool}: ${o2.succeeded ? "OK" : "ERR"} — ${o2.preview}`);
|
|
564247
564429
|
this.emit({
|
|
564248
|
-
type: "
|
|
564430
|
+
type: "debug_adversary",
|
|
564249
564431
|
turn,
|
|
564250
564432
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
564251
564433
|
content: `Timeout triage: EXTENDED by ${extension3} turns (active session detected)`,
|
|
564252
|
-
|
|
564434
|
+
adversaryAction: {
|
|
564253
564435
|
detection: "none",
|
|
564254
564436
|
recentSuccesses,
|
|
564255
564437
|
recentFailures: recentOutcomes.length - recentSuccesses,
|
|
@@ -564582,6 +564764,9 @@ ${memoryLines.join("\n")}`
|
|
|
564582
564764
|
maxTokens: effectiveMaxTokens,
|
|
564583
564765
|
timeoutMs: this.options.requestTimeoutMs
|
|
564584
564766
|
};
|
|
564767
|
+
if ((this.options.contextWindowSize ?? 0) > 0) {
|
|
564768
|
+
chatRequest.numCtx = this.options.contextWindowSize;
|
|
564769
|
+
}
|
|
564585
564770
|
if (this.options.memoryPrefix)
|
|
564586
564771
|
chatRequest.memoryPrefix = this.options.memoryPrefix;
|
|
564587
564772
|
if (this.options.memoryPrefixHash)
|
|
@@ -564623,7 +564808,7 @@ ${memoryLines.join("\n")}`
|
|
|
564623
564808
|
compactionThreshold: limits.compactionThreshold,
|
|
564624
564809
|
toolCallCount,
|
|
564625
564810
|
keepRecent: limits.keepRecent,
|
|
564626
|
-
|
|
564811
|
+
adversaryOutcomes: this._adversaryToolOutcomes.length,
|
|
564627
564812
|
headroom: limits.compactionThreshold - estTokens
|
|
564628
564813
|
}
|
|
564629
564814
|
});
|
|
@@ -564740,19 +564925,17 @@ ${memoryLines.join("\n")}`
|
|
|
564740
564925
|
if (jsonMatch) {
|
|
564741
564926
|
try {
|
|
564742
564927
|
const parsed = JSON.parse(jsonMatch[1]);
|
|
564743
|
-
|
|
564744
|
-
|
|
564928
|
+
const resolvedParsedTool = parsed.tool ? this.lookupRegisteredTool(parsed.tool) : null;
|
|
564929
|
+
if (parsed.tool && resolvedParsedTool) {
|
|
564930
|
+
const tool = resolvedParsedTool.tool;
|
|
564745
564931
|
const rawResult = await tool.execute(parsed.args ?? {});
|
|
564746
|
-
const result =
|
|
564747
|
-
workingDir: this._workingDirectory || process.cwd(),
|
|
564748
|
-
toolName: parsed.tool
|
|
564749
|
-
});
|
|
564932
|
+
const result = this.applyRegisteredToolResultTriage(rawResult, resolvedParsedTool.name, tool);
|
|
564750
564933
|
messages2.push({ role: "assistant", content });
|
|
564751
564934
|
messages2.push({
|
|
564752
564935
|
role: "user",
|
|
564753
564936
|
content: `Tool result (${parsed.tool}): ${result.output.slice(0, 2e3)}`
|
|
564754
564937
|
});
|
|
564755
|
-
if (
|
|
564938
|
+
if (resolvedParsedTool.name === "task_complete") {
|
|
564756
564939
|
completed = true;
|
|
564757
564940
|
summary = String(parsed.args?.summary ?? content);
|
|
564758
564941
|
}
|
|
@@ -564986,16 +565169,19 @@ ${memoryLines.join("\n")}`
|
|
|
564986
565169
|
const cohort = this._argCohorts.get(cohortKey);
|
|
564987
565170
|
if (cohort && cohort.failure >= 3 && cohort.success === 0) {
|
|
564988
565171
|
this.emit({
|
|
564989
|
-
type: "
|
|
565172
|
+
type: "adversary_reaction",
|
|
564990
565173
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
564991
|
-
|
|
565174
|
+
adversary: {
|
|
564992
565175
|
class: "arg_cohort_risk",
|
|
564993
565176
|
shortText: `${tc.name} with similar args has failed ${cohort.failure}× recently`,
|
|
564994
565177
|
confidence: 0.85
|
|
564995
565178
|
}
|
|
564996
565179
|
});
|
|
564997
|
-
if (this.
|
|
564998
|
-
this.pendingUserMessages.push(
|
|
565180
|
+
if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
|
|
565181
|
+
this.pendingUserMessages.push(`[ADVERSARY CRITIQUE — non-blocking]
|
|
565182
|
+
Evidence: ${tc.name} with similar arguments has failed ${cohort.failure}× recently.
|
|
565183
|
+
Root cause hypothesis: the argument family may be wrong, a prerequisite may be missing, or the tool is being used before enough state is known.
|
|
565184
|
+
Corrective action: try a different approach first: read relevant files, adjust arguments, or verify prerequisites.`);
|
|
564999
565185
|
}
|
|
565000
565186
|
}
|
|
565001
565187
|
if (this._errorPatterns.size > 0) {
|
|
@@ -565277,19 +565463,11 @@ ${memoryLines.join("\n")}`
|
|
|
565277
565463
|
].includes(tc.name);
|
|
565278
565464
|
const isStatefulBrowserTool = this._isStatefulBrowserTool(tc.name);
|
|
565279
565465
|
const isReadLike = !isStatefulBrowserTool && (baseIsReadLike || tc.name === "shell" && this._isShellCommandReadOnly(tc.arguments?.["command"] ?? tc.arguments?.["cmd"] ?? ""));
|
|
565280
|
-
const
|
|
565281
|
-
if (
|
|
565282
|
-
this.
|
|
565466
|
+
const adversaryRedundantSignal = this._adversaryRedundantSignals.has(toolFingerprint);
|
|
565467
|
+
if (adversaryRedundantSignal) {
|
|
565468
|
+
this._adversaryRedundantSignals.delete(toolFingerprint);
|
|
565283
565469
|
}
|
|
565284
|
-
|
|
565285
|
-
const lastLog = toolCallLog[_toolLogTailIdx];
|
|
565286
|
-
if (!lastLog)
|
|
565287
|
-
return;
|
|
565288
|
-
lastLog.success = true;
|
|
565289
|
-
lastLog.mutated = false;
|
|
565290
|
-
lastLog.mutatedFiles = [];
|
|
565291
|
-
lastLog.outputPreview = outputPreview.slice(0, 100);
|
|
565292
|
-
};
|
|
565470
|
+
let criticGuidance = null;
|
|
565293
565471
|
{
|
|
565294
565472
|
const _reflStem = buildStem(tc.name, tc.arguments ?? {});
|
|
565295
565473
|
if (!this._reflectionsInjectedThisTurn.has(_reflStem)) {
|
|
@@ -565331,7 +565509,10 @@ ${memoryLines.join("\n")}`
|
|
|
565331
565509
|
}
|
|
565332
565510
|
}
|
|
565333
565511
|
}
|
|
565334
|
-
const criticDecision =
|
|
565512
|
+
const criticDecision = this.options.disableAdversaryCritic === true ? {
|
|
565513
|
+
decision: "pass",
|
|
565514
|
+
reason: "adversary critic disabled for isolated evaluation"
|
|
565515
|
+
} : evaluate2({
|
|
565335
565516
|
proposedCall: { tool: tc.name, args: tc.arguments ?? {} },
|
|
565336
565517
|
fingerprint: toolFingerprint,
|
|
565337
565518
|
isReadLike,
|
|
@@ -565345,116 +565526,33 @@ ${memoryLines.join("\n")}`
|
|
|
565345
565526
|
stagnationSignals: null,
|
|
565346
565527
|
// stagnation gate handled at top-of-turn
|
|
565347
565528
|
stagnationGateActive: false,
|
|
565348
|
-
|
|
565529
|
+
adversaryRedundantSignal
|
|
565349
565530
|
});
|
|
565350
|
-
if (criticDecision.decision === "
|
|
565351
|
-
this.emit({
|
|
565352
|
-
type: "tool_call",
|
|
565353
|
-
toolName: tc.name,
|
|
565354
|
-
toolArgs: tc.arguments,
|
|
565355
|
-
turn,
|
|
565356
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
565357
|
-
});
|
|
565358
|
-
const blockMsg = criticDecision.cachedResult ? `[BLOCKED — this tool+args already succeeded. Re-served from cache:]
|
|
565359
|
-
|
|
565360
|
-
${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confirmed this tool already succeeded with these arguments on a prior turn. Do NOT re-run. Use your prior findings to proceed.]`;
|
|
565361
|
-
markSyntheticToolLog(blockMsg);
|
|
565362
|
-
this.emit({
|
|
565363
|
-
type: "tool_result",
|
|
565364
|
-
toolName: tc.name,
|
|
565365
|
-
success: true,
|
|
565366
|
-
content: blockMsg.slice(0, 100),
|
|
565367
|
-
turn,
|
|
565368
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
565369
|
-
});
|
|
565370
|
-
this._tagSyntheticFailure({
|
|
565371
|
-
mode: "step_repetition",
|
|
565372
|
-
rationale: `observer-block on ${tc.name} fingerprint flagged redundant`
|
|
565373
|
-
});
|
|
565374
|
-
return { tc, output: blockMsg, success: true };
|
|
565375
|
-
}
|
|
565376
|
-
if (criticDecision.decision === "force_progress_block") {
|
|
565531
|
+
if (criticDecision.decision === "guidance") {
|
|
565377
565532
|
dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
|
|
565378
565533
|
const _existingFp = recentToolResults.get(toolFingerprint);
|
|
565379
565534
|
if (_existingFp !== void 0) {
|
|
565380
565535
|
recentToolResults.delete(toolFingerprint);
|
|
565381
565536
|
recentToolResults.set(toolFingerprint, _existingFp);
|
|
565382
565537
|
}
|
|
565538
|
+
criticGuidance = criticDecision.guidanceMessage;
|
|
565383
565539
|
this.emit({
|
|
565384
|
-
type: "
|
|
565385
|
-
|
|
565386
|
-
|
|
565387
|
-
|
|
565540
|
+
type: "adversary_reaction",
|
|
565541
|
+
adversary: {
|
|
565542
|
+
class: "guidance",
|
|
565543
|
+
shortText: `Adversary guidance for repeated ${tc.name} call`,
|
|
565544
|
+
confidence: 0.9,
|
|
565545
|
+
details: criticDecision.reason
|
|
565546
|
+
},
|
|
565388
565547
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
565389
565548
|
});
|
|
565390
565549
|
this.emit({
|
|
565391
|
-
type: "
|
|
565392
|
-
toolName: tc.name,
|
|
565393
|
-
success: true,
|
|
565394
|
-
content: `[SKIPPED DUPLICATE — exact ${tc.name} call not re-run; cached result returned.]`.slice(0, 120),
|
|
565395
|
-
turn,
|
|
565396
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
565397
|
-
});
|
|
565398
|
-
this._tagSyntheticFailure({
|
|
565399
|
-
mode: "step_repetition",
|
|
565400
|
-
rationale: `force_progress_block on ${tc.name} after ${criticDecision.hitNumber} identical calls`
|
|
565401
|
-
});
|
|
565402
|
-
const generationCompletionHint = isGenerationArtifactSuccess(tc.name, criticDecision.cachedResult) ? `
|
|
565403
|
-
|
|
565404
|
-
[GENERATION ALREADY COMPLETE] This exact ${tc.name} call already succeeded. Do not call it again. Use the cached artifact/path above; if delivery is needed, send it, otherwise call task_complete.` : "";
|
|
565405
|
-
const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. Do not retry this exact call.]
|
|
565406
|
-
|
|
565407
|
-
` : `[SKIPPED DUPLICATE — exact ${tc.name} call not re-run. The cached result below is from the prior successful call. Do not retry this exact call.]
|
|
565408
|
-
|
|
565409
|
-
`;
|
|
565410
|
-
const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
|
|
565411
|
-
... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
|
|
565412
|
-
markSyntheticToolLog(`${criticDecision.blockMessage}
|
|
565413
|
-
|
|
565414
|
-
${truncatedCache}`);
|
|
565415
|
-
return {
|
|
565416
|
-
tc,
|
|
565417
|
-
output: `${criticDecision.blockMessage}
|
|
565418
|
-
|
|
565419
|
-
${header}${truncatedCache}${generationCompletionHint}`,
|
|
565420
|
-
success: true
|
|
565421
|
-
};
|
|
565422
|
-
}
|
|
565423
|
-
if (criticDecision.decision === "serve_cached") {
|
|
565424
|
-
dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
|
|
565425
|
-
const _existingFp = recentToolResults.get(toolFingerprint);
|
|
565426
|
-
if (_existingFp !== void 0) {
|
|
565427
|
-
recentToolResults.delete(toolFingerprint);
|
|
565428
|
-
recentToolResults.set(toolFingerprint, _existingFp);
|
|
565429
|
-
}
|
|
565430
|
-
this.emit({
|
|
565431
|
-
type: "tool_call",
|
|
565432
|
-
toolName: tc.name,
|
|
565433
|
-
toolArgs: tc.arguments,
|
|
565434
|
-
turn,
|
|
565435
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
565436
|
-
});
|
|
565437
|
-
const generationCompletionHint = isGenerationArtifactSuccess(tc.name, criticDecision.cachedResult) ? `
|
|
565438
|
-
|
|
565439
|
-
[GENERATION ALREADY COMPLETE] This exact ${tc.name} call already succeeded. Do not call it again. Use the cached artifact/path above; if delivery is needed, send it, otherwise call task_complete.` : "";
|
|
565440
|
-
const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. No need to call this tool again.]
|
|
565441
|
-
|
|
565442
|
-
` : `[DUPLICATE CALL #${criticDecision.hitNumber} — you already called ${tc.name} with these exact arguments. The result is identical. Do NOT call this again. Use the data you already have to make progress. One more identical call will trigger a hard progress block.]
|
|
565443
|
-
|
|
565444
|
-
`;
|
|
565445
|
-
const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
|
|
565446
|
-
... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
|
|
565447
|
-
const dedupOutput = header + truncatedCache + generationCompletionHint;
|
|
565448
|
-
markSyntheticToolLog(dedupOutput);
|
|
565449
|
-
this.emit({
|
|
565450
|
-
type: "tool_result",
|
|
565550
|
+
type: "status",
|
|
565451
565551
|
toolName: tc.name,
|
|
565452
|
-
|
|
565453
|
-
content: header.slice(0, 100),
|
|
565552
|
+
content: `Adversary guidance emitted for ${tc.name}; tool call will still execute`,
|
|
565454
565553
|
turn,
|
|
565455
565554
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
565456
565555
|
});
|
|
565457
|
-
return { tc, output: dedupOutput, success: true };
|
|
565458
565556
|
}
|
|
565459
565557
|
this.emit({
|
|
565460
565558
|
type: "tool_call",
|
|
@@ -565494,6 +565592,9 @@ ${header}${truncatedCache}${generationCompletionHint}`,
|
|
|
565494
565592
|
}
|
|
565495
565593
|
}
|
|
565496
565594
|
}
|
|
565595
|
+
if (!validationError) {
|
|
565596
|
+
validationError = await this.validateToolInput(tool, tc.arguments, resolvedTool?.name ?? tc.name);
|
|
565597
|
+
}
|
|
565497
565598
|
if (validationError) {
|
|
565498
565599
|
result = {
|
|
565499
565600
|
success: false,
|
|
@@ -565582,10 +565683,7 @@ ${header}${truncatedCache}${generationCompletionHint}`,
|
|
|
565582
565683
|
} else {
|
|
565583
565684
|
result = await tool.execute(finalArgs);
|
|
565584
565685
|
}
|
|
565585
|
-
result =
|
|
565586
|
-
workingDir: this._workingDirectory || process.cwd(),
|
|
565587
|
-
toolName: tc.name
|
|
565588
|
-
});
|
|
565686
|
+
result = this.applyRegisteredToolResultTriage(result, resolvedTool?.name ?? tc.name, tool);
|
|
565589
565687
|
if (tc.name === "shell" && result.success === true) {
|
|
565590
565688
|
const semanticErr = this._detectSemanticShellFailure(result.output ?? "");
|
|
565591
565689
|
if (semanticErr) {
|
|
@@ -566455,6 +566553,11 @@ Respond with EXACTLY this structure before your next tool call:
|
|
|
566455
566553
|
result = await this.offloadEmbeddedImageResult(result, tc.name, turn);
|
|
566456
566554
|
}
|
|
566457
566555
|
let output = this.normalizeToolOutput(result, tc.name, tc.arguments, turn);
|
|
566556
|
+
if (criticGuidance) {
|
|
566557
|
+
output += `
|
|
566558
|
+
|
|
566559
|
+
${criticGuidance}`;
|
|
566560
|
+
}
|
|
566458
566561
|
if (!result.success && (this.options.modelTier === "small" || this.options.modelTier === "medium")) {
|
|
566459
566562
|
const recovery = this.buildRecoveryGuidance(tc.name, result.error ?? "", tc.arguments);
|
|
566460
566563
|
if (recovery)
|
|
@@ -566824,10 +566927,8 @@ Then use file_read on individual FILES inside it.`);
|
|
|
566824
566927
|
await this._streamingExecutor.waitAll();
|
|
566825
566928
|
const streamResults = this._streamingExecutor.drainCompleted();
|
|
566826
566929
|
for (const sr of streamResults) {
|
|
566827
|
-
|
|
566828
|
-
|
|
566829
|
-
toolName: sr.name
|
|
566830
|
-
});
|
|
566930
|
+
const resolvedStreamTool = this.lookupRegisteredTool(sr.name);
|
|
566931
|
+
sr.result = this.applyRegisteredToolResultTriage(sr.result, resolvedStreamTool?.name ?? sr.name, resolvedStreamTool?.tool);
|
|
566831
566932
|
}
|
|
566832
566933
|
const handledIds = /* @__PURE__ */ new Set();
|
|
566833
566934
|
for (const sr of streamResults) {
|
|
@@ -566865,22 +566966,21 @@ ${sr.result.output}`;
|
|
|
566865
566966
|
}
|
|
566866
566967
|
const _bp1 = await this._runBackwardPassReview(turn);
|
|
566867
566968
|
if (_bp1 && !_bp1.proceed && _bp1.feedback) {
|
|
566868
|
-
|
|
566869
|
-
}
|
|
566870
|
-
|
|
566871
|
-
|
|
566872
|
-
|
|
566873
|
-
|
|
566874
|
-
|
|
566875
|
-
|
|
566876
|
-
|
|
566877
|
-
|
|
566878
|
-
|
|
566879
|
-
|
|
566880
|
-
|
|
566881
|
-
}
|
|
566882
|
-
break;
|
|
566969
|
+
emitBackwardPassAdvisory(_bp1.feedback, turn);
|
|
566970
|
+
}
|
|
566971
|
+
completed = true;
|
|
566972
|
+
summary = extractTaskCompleteSummary(matchTc.arguments);
|
|
566973
|
+
if (summary && !this._assistantTextEmitted) {
|
|
566974
|
+
this.emit({
|
|
566975
|
+
type: "assistant_text",
|
|
566976
|
+
content: summary,
|
|
566977
|
+
source: "task_complete_summary",
|
|
566978
|
+
turn,
|
|
566979
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
566980
|
+
});
|
|
566981
|
+
this._assistantTextEmitted = true;
|
|
566883
566982
|
}
|
|
566983
|
+
break;
|
|
566884
566984
|
}
|
|
566885
566985
|
}
|
|
566886
566986
|
}
|
|
@@ -566921,22 +567021,21 @@ ${sr.result.output}`;
|
|
|
566921
567021
|
}
|
|
566922
567022
|
const _bp2 = await this._runBackwardPassReview(turn);
|
|
566923
567023
|
if (_bp2 && !_bp2.proceed && _bp2.feedback) {
|
|
566924
|
-
|
|
566925
|
-
} else {
|
|
566926
|
-
completed = true;
|
|
566927
|
-
summary = extractTaskCompleteSummary(r2.tc.arguments);
|
|
566928
|
-
if (summary && !this._assistantTextEmitted) {
|
|
566929
|
-
this.emit({
|
|
566930
|
-
type: "assistant_text",
|
|
566931
|
-
content: summary,
|
|
566932
|
-
source: "task_complete_summary",
|
|
566933
|
-
turn,
|
|
566934
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
566935
|
-
});
|
|
566936
|
-
this._assistantTextEmitted = true;
|
|
566937
|
-
}
|
|
566938
|
-
break;
|
|
567024
|
+
emitBackwardPassAdvisory(_bp2.feedback, turn);
|
|
566939
567025
|
}
|
|
567026
|
+
completed = true;
|
|
567027
|
+
summary = extractTaskCompleteSummary(r2.tc.arguments);
|
|
567028
|
+
if (summary && !this._assistantTextEmitted) {
|
|
567029
|
+
this.emit({
|
|
567030
|
+
type: "assistant_text",
|
|
567031
|
+
content: summary,
|
|
567032
|
+
source: "task_complete_summary",
|
|
567033
|
+
turn,
|
|
567034
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
567035
|
+
});
|
|
567036
|
+
this._assistantTextEmitted = true;
|
|
567037
|
+
}
|
|
567038
|
+
break;
|
|
566940
567039
|
}
|
|
566941
567040
|
}
|
|
566942
567041
|
}
|
|
@@ -566946,7 +567045,8 @@ ${sr.result.output}`;
|
|
|
566946
567045
|
const batchToolCalls = rawToolCalls.map((tc) => ({
|
|
566947
567046
|
name: tc.name,
|
|
566948
567047
|
args: tc.arguments,
|
|
566949
|
-
id: tc.id
|
|
567048
|
+
id: tc.id,
|
|
567049
|
+
concurrencySafe: this.resolveToolConcurrencySafe(tc.name, tc.arguments)
|
|
566950
567050
|
}));
|
|
566951
567051
|
const batches = partitionToolCalls(batchToolCalls);
|
|
566952
567052
|
for (const batch2 of batches) {
|
|
@@ -567013,22 +567113,21 @@ ${sr.result.output}`;
|
|
|
567013
567113
|
}
|
|
567014
567114
|
const _bp3 = await this._runBackwardPassReview(turn);
|
|
567015
567115
|
if (_bp3 && !_bp3.proceed && _bp3.feedback) {
|
|
567016
|
-
|
|
567017
|
-
} else {
|
|
567018
|
-
completed = true;
|
|
567019
|
-
summary = extractTaskCompleteSummary(r2.tc.arguments);
|
|
567020
|
-
if (summary && !this._assistantTextEmitted) {
|
|
567021
|
-
this.emit({
|
|
567022
|
-
type: "assistant_text",
|
|
567023
|
-
content: summary,
|
|
567024
|
-
source: "task_complete_summary",
|
|
567025
|
-
turn,
|
|
567026
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
567027
|
-
});
|
|
567028
|
-
this._assistantTextEmitted = true;
|
|
567029
|
-
}
|
|
567030
|
-
break;
|
|
567116
|
+
emitBackwardPassAdvisory(_bp3.feedback, turn);
|
|
567031
567117
|
}
|
|
567118
|
+
completed = true;
|
|
567119
|
+
summary = extractTaskCompleteSummary(r2.tc.arguments);
|
|
567120
|
+
if (summary && !this._assistantTextEmitted) {
|
|
567121
|
+
this.emit({
|
|
567122
|
+
type: "assistant_text",
|
|
567123
|
+
content: summary,
|
|
567124
|
+
source: "task_complete_summary",
|
|
567125
|
+
turn,
|
|
567126
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
567127
|
+
});
|
|
567128
|
+
this._assistantTextEmitted = true;
|
|
567129
|
+
}
|
|
567130
|
+
break;
|
|
567032
567131
|
}
|
|
567033
567132
|
}
|
|
567034
567133
|
}
|
|
@@ -567039,7 +567138,7 @@ ${sr.result.output}`;
|
|
|
567039
567138
|
}
|
|
567040
567139
|
if (completed)
|
|
567041
567140
|
break;
|
|
567042
|
-
this.
|
|
567141
|
+
this.adversaryObserve(messages2, turn);
|
|
567043
567142
|
const currentRepScore = this.detectRepetition(toolCallLog);
|
|
567044
567143
|
if (currentRepScore > 0.4 && toolCallLog.length >= 4) {
|
|
567045
567144
|
const { repetitionWindow } = this.contextLimits();
|
|
@@ -567225,6 +567324,9 @@ Call task_complete(summary="...") NOW with whatever you have.`
|
|
|
567225
567324
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
567226
567325
|
});
|
|
567227
567326
|
}
|
|
567327
|
+
const pendingBeforeAdversary = this.pendingUserMessages.length;
|
|
567328
|
+
this.adversaryObserve(messages2, turn);
|
|
567329
|
+
const adversaryAddedGuidance = this.pendingUserMessages.length > pendingBeforeAdversary;
|
|
567228
567330
|
if (/task.?complete|all tests pass/i.test(content)) {
|
|
567229
567331
|
const completionArgs = { summary: content };
|
|
567230
567332
|
if (holdNoProgressTaskComplete(completionArgs, turn) || holdProvenanceTaskComplete(completionArgs, turn)) {
|
|
@@ -567236,7 +567338,7 @@ Call task_complete(summary="...") NOW with whatever you have.`
|
|
|
567236
567338
|
}
|
|
567237
567339
|
if (isThinkOnly) {
|
|
567238
567340
|
if (consecutiveThinkOnly >= MAX_CONSECUTIVE_THINK_ONLY) {
|
|
567239
|
-
const recentSuccesses = this.
|
|
567341
|
+
const recentSuccesses = this._adversaryToolOutcomes.slice(-3).filter((o2) => o2.succeeded);
|
|
567240
567342
|
const hasRecentSuccess = recentSuccesses.length > 0;
|
|
567241
567343
|
const successHint = hasRecentSuccess ? `
|
|
567242
567344
|
|
|
@@ -567340,6 +567442,12 @@ Your most recent tool calls SUCCEEDED. If the task is complete, call task_comple
|
|
|
567340
567442
|
content: "Continue working. Use tools to read files, make changes, and run validation. Call task_complete when done."
|
|
567341
567443
|
});
|
|
567342
567444
|
}
|
|
567445
|
+
if (adversaryAddedGuidance) {
|
|
567446
|
+
while (this.pendingUserMessages.length > 0) {
|
|
567447
|
+
const userMsg = this.pendingUserMessages.shift();
|
|
567448
|
+
await this.appendInjectedUserMessage(userMsg, messages2, turn);
|
|
567449
|
+
}
|
|
567450
|
+
}
|
|
567343
567451
|
}
|
|
567344
567452
|
try {
|
|
567345
567453
|
const turnLogTail = toolCallLog.filter((t2) => t2.turn === turn || t2.turn === void 0);
|
|
@@ -567487,7 +567595,8 @@ ${this.options.maxTurns && this.options.maxTurns > 0 ? `You have ${this.options.
|
|
|
567487
567595
|
tools: toolDefs,
|
|
567488
567596
|
temperature: this.options.temperature,
|
|
567489
567597
|
maxTokens: this.options.maxTokens,
|
|
567490
|
-
timeoutMs: this.options.requestTimeoutMs
|
|
567598
|
+
timeoutMs: this.options.requestTimeoutMs,
|
|
567599
|
+
numCtx: this.options.contextWindowSize || void 0
|
|
567491
567600
|
};
|
|
567492
567601
|
let response;
|
|
567493
567602
|
try {
|
|
@@ -567677,10 +567786,7 @@ ${this.options.maxTurns && this.options.maxTurns > 0 ? `You have ${this.options.
|
|
|
567677
567786
|
} else {
|
|
567678
567787
|
try {
|
|
567679
567788
|
result = await tool.execute(tc.arguments);
|
|
567680
|
-
result =
|
|
567681
|
-
workingDir: this._workingDirectory || process.cwd(),
|
|
567682
|
-
toolName: tc.name
|
|
567683
|
-
});
|
|
567789
|
+
result = this.applyRegisteredToolResultTriage(result, resolvedTool?.name ?? tc.name, tool);
|
|
567684
567790
|
} catch (err) {
|
|
567685
567791
|
result = {
|
|
567686
567792
|
success: false,
|
|
@@ -567699,10 +567805,7 @@ ${this.options.maxTurns && this.options.maxTurns > 0 ? `You have ${this.options.
|
|
|
567699
567805
|
if (pw2 && tool) {
|
|
567700
567806
|
try {
|
|
567701
567807
|
result = await tool.execute(tc.arguments);
|
|
567702
|
-
result =
|
|
567703
|
-
workingDir: this._workingDirectory || process.cwd(),
|
|
567704
|
-
toolName: tc.name
|
|
567705
|
-
});
|
|
567808
|
+
result = this.applyRegisteredToolResultTriage(result, resolvedTool?.name ?? tc.name, tool);
|
|
567706
567809
|
} catch (err) {
|
|
567707
567810
|
result = {
|
|
567708
567811
|
success: false,
|
|
@@ -567797,8 +567900,7 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
|
|
|
567797
567900
|
}
|
|
567798
567901
|
const _bp4 = await this._runBackwardPassReview(turn);
|
|
567799
567902
|
if (_bp4 && !_bp4.proceed && _bp4.feedback) {
|
|
567800
|
-
|
|
567801
|
-
continue;
|
|
567903
|
+
emitBackwardPassAdvisory(_bp4.feedback, turn);
|
|
567802
567904
|
}
|
|
567803
567905
|
completed = true;
|
|
567804
567906
|
summary = extractTaskCompleteSummary(tc.arguments);
|
|
@@ -567871,7 +567973,7 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
|
|
|
567871
567973
|
}
|
|
567872
567974
|
if (isThinkOnlyBF) {
|
|
567873
567975
|
if (consecutiveThinkOnly >= MAX_CONSECUTIVE_THINK_ONLY) {
|
|
567874
|
-
const recentSucc = this.
|
|
567976
|
+
const recentSucc = this._adversaryToolOutcomes.slice(-3).filter((o2) => o2.succeeded);
|
|
567875
567977
|
const succHint = recentSucc.length > 0 ? "\n\nYour most recent tool calls SUCCEEDED. If the task is complete, call task_complete now with a summary." : "";
|
|
567876
567978
|
messages2.push({
|
|
567877
567979
|
role: "user",
|
|
@@ -569959,38 +570061,35 @@ ${newerSummary}`;
|
|
|
569959
570061
|
${trimmedNew}`;
|
|
569960
570062
|
}
|
|
569961
570063
|
// -------------------------------------------------------------------------
|
|
569962
|
-
//
|
|
570064
|
+
// Adversary — parallel meta-analysis of the main loop
|
|
569963
570065
|
// -------------------------------------------------------------------------
|
|
569964
|
-
// Inspired by Hannover's fireCompanionObserver (src/buddy/observer.ts).
|
|
569965
570066
|
// Runs after each tool turn to detect when the model has lost track of
|
|
569966
570067
|
// what happened and inject corrections before the next inference.
|
|
569967
570068
|
//
|
|
569968
570069
|
// This is the architectural fix for the "I see both tools have been failing"
|
|
569969
570070
|
// regression: instead of only fixing the data the model sees (mask/summary),
|
|
569970
570071
|
// we add a second analysis path that catches mismatches in real-time.
|
|
569971
|
-
/** Track recent tool outcomes for the
|
|
569972
|
-
|
|
569973
|
-
/** WO-FIX-C: Tool fingerprints the
|
|
569974
|
-
* Checked in executeSingle to
|
|
569975
|
-
|
|
570072
|
+
/** Track recent tool outcomes for the adversary */
|
|
570073
|
+
_adversaryToolOutcomes = [];
|
|
570074
|
+
/** WO-FIX-C: Tool fingerprints the adversary has flagged as redundant.
|
|
570075
|
+
* Checked in executeSingle to attach advisory guidance before dispatch. */
|
|
570076
|
+
_adversaryRedundantSignals = /* @__PURE__ */ new Set();
|
|
569976
570077
|
/** Reflexion pattern: task-local failure-indexed reflection buffer.
|
|
569977
570078
|
* Generates typed self-reflections on task failure and injects them
|
|
569978
570079
|
* into the next attempt's context for active learning. */
|
|
569979
570080
|
_reflectionBuffer = null;
|
|
569980
570081
|
/**
|
|
569981
|
-
*
|
|
570082
|
+
* Adversary: post-turn meta-analysis.
|
|
569982
570083
|
*
|
|
569983
570084
|
* Examines the last few messages looking for contradictions between
|
|
569984
570085
|
* actual tool outcomes and the model's stated understanding. When it
|
|
569985
570086
|
* detects the model claiming failure after success (or vice versa),
|
|
569986
|
-
* it injects a corrective
|
|
570087
|
+
* it injects a corrective non-blocking critique.
|
|
569987
570088
|
*
|
|
569988
570089
|
* Also detects repeated actions — when the model re-does something
|
|
569989
|
-
* that already succeeded, the
|
|
570090
|
+
* that already succeeded, the adversary nudges it to move on.
|
|
569990
570091
|
*/
|
|
569991
|
-
|
|
569992
|
-
if (this.options.modelTier === "large")
|
|
569993
|
-
return;
|
|
570092
|
+
adversaryObserve(messages2, turn) {
|
|
569994
570093
|
const recent = messages2.slice(-6);
|
|
569995
570094
|
for (const msg of recent) {
|
|
569996
570095
|
if (msg.role === "tool" && typeof msg.content === "string") {
|
|
@@ -570017,10 +570116,16 @@ ${trimmedNew}`;
|
|
|
570017
570116
|
}
|
|
570018
570117
|
const argsKey = toolArgs ? this._buildExactArgsKey(toolArgs) : void 0;
|
|
570019
570118
|
const fingerprint = toolArgs ? this._buildToolFingerprint(toolName, toolArgs) : void 0;
|
|
570020
|
-
|
|
570021
|
-
|
|
570119
|
+
const alreadySeen = this._adversaryToolOutcomes.some((o2) => {
|
|
570120
|
+
if (msg.tool_call_id && o2.toolCallId === msg.tool_call_id)
|
|
570121
|
+
return true;
|
|
570122
|
+
return o2.turn === turn && o2.tool === toolName && o2.fingerprint === fingerprint;
|
|
570123
|
+
});
|
|
570124
|
+
if (!alreadySeen) {
|
|
570125
|
+
this._adversaryToolOutcomes.push({
|
|
570022
570126
|
turn,
|
|
570023
570127
|
tool: toolName,
|
|
570128
|
+
toolCallId: msg.tool_call_id,
|
|
570024
570129
|
argsKey,
|
|
570025
570130
|
fingerprint,
|
|
570026
570131
|
succeeded,
|
|
@@ -570029,27 +570134,47 @@ ${trimmedNew}`;
|
|
|
570029
570134
|
}
|
|
570030
570135
|
}
|
|
570031
570136
|
}
|
|
570032
|
-
while (this.
|
|
570033
|
-
this.
|
|
570137
|
+
while (this._adversaryToolOutcomes.length > 20)
|
|
570138
|
+
this._adversaryToolOutcomes.shift();
|
|
570034
570139
|
const emitReaction = (cls, shortText, confidence2, details2) => {
|
|
570035
570140
|
this.emit({
|
|
570036
|
-
type: "
|
|
570141
|
+
type: "adversary_reaction",
|
|
570037
570142
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
570038
|
-
|
|
570143
|
+
adversary: { class: cls, shortText, confidence: confidence2, details: details2 }
|
|
570039
570144
|
});
|
|
570040
570145
|
};
|
|
570146
|
+
const buildAdversaryCritique = (input) => {
|
|
570147
|
+
const alternatives = input.alternatives && input.alternatives.length > 0 ? `
|
|
570148
|
+
Alternatives:
|
|
570149
|
+
${input.alternatives.map((item) => `- ${item}`).join("\n")}` : "";
|
|
570150
|
+
return [
|
|
570151
|
+
`[ADVERSARY CRITIQUE — non-blocking]`,
|
|
570152
|
+
`Evidence: ${input.evidence}`,
|
|
570153
|
+
`Root cause hypothesis: ${input.hypothesis}`,
|
|
570154
|
+
`Corrective action: ${input.correctiveAction}${alternatives}`
|
|
570155
|
+
].join("\n");
|
|
570156
|
+
};
|
|
570041
570157
|
const lastAssistant = [...recent].reverse().find((m2) => m2.role === "assistant" && typeof m2.content === "string");
|
|
570042
570158
|
if (lastAssistant && typeof lastAssistant.content === "string") {
|
|
570043
570159
|
const text = lastAssistant.content.toLowerCase();
|
|
570044
570160
|
const claimsFailure = /(?:fail|error|didn't work|not working|unable to|cannot|couldn't|both .* fail|tools? (?:have |has )?been fail)/i.test(text);
|
|
570045
570161
|
if (claimsFailure) {
|
|
570046
|
-
const recentOutcomes = this.
|
|
570162
|
+
const recentOutcomes = this._adversaryToolOutcomes.slice(-4);
|
|
570047
570163
|
const successes = recentOutcomes.filter((o2) => o2.succeeded);
|
|
570048
570164
|
if (successes.length >= 1) {
|
|
570049
570165
|
const successList = successes.map((o2) => `${o2.tool}: ${o2.preview.slice(0, 60)}`).join("; ");
|
|
570050
570166
|
emitReaction("false_failure", `Claimed failure, but recent tools succeeded (${successes.length})`, 0.9, successList);
|
|
570051
|
-
if (this.
|
|
570052
|
-
this.pendingUserMessages.push(
|
|
570167
|
+
if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
|
|
570168
|
+
this.pendingUserMessages.push(buildAdversaryCritique({
|
|
570169
|
+
evidence: `Recent tools succeeded: ${successList}.`,
|
|
570170
|
+
hypothesis: "The main loop is interpreting uncertainty or partial progress as failure and may be about to discard usable evidence.",
|
|
570171
|
+
correctiveAction: "Use the successful results to advance the task, then verify the next concrete step.",
|
|
570172
|
+
alternatives: [
|
|
570173
|
+
"Edit or run the next verification step that follows from the successful output.",
|
|
570174
|
+
"Read a different targeted file if the successful result exposed a new path or symbol.",
|
|
570175
|
+
"Complete only if the successful output is sufficient evidence for the user's request."
|
|
570176
|
+
]
|
|
570177
|
+
}));
|
|
570053
570178
|
}
|
|
570054
570179
|
this.emit({
|
|
570055
570180
|
type: "status",
|
|
@@ -570063,47 +570188,68 @@ ${trimmedNew}`;
|
|
|
570063
570188
|
const text = lastAssistant.content.toLowerCase();
|
|
570064
570189
|
const claimsSuccess = /(done|fixed|success|passed|complete)/i.test(text);
|
|
570065
570190
|
if (claimsSuccess) {
|
|
570066
|
-
const recentOutcomes = this.
|
|
570191
|
+
const recentOutcomes = this._adversaryToolOutcomes.slice(-4);
|
|
570067
570192
|
const failures = recentOutcomes.filter((o2) => !o2.succeeded);
|
|
570068
570193
|
const successes = recentOutcomes.filter((o2) => o2.succeeded);
|
|
570069
570194
|
if (failures.length > 0 && successes.length === 0) {
|
|
570070
570195
|
const failList = failures.map((o2) => `${o2.tool}: ${o2.preview.slice(0, 60)}`).join("; ");
|
|
570071
570196
|
emitReaction("false_success", `Claimed success, but recent tools failed (${failures.length})`, 0.9, failList);
|
|
570072
|
-
if (this.
|
|
570073
|
-
this.pendingUserMessages.push(
|
|
570197
|
+
if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
|
|
570198
|
+
this.pendingUserMessages.push(buildAdversaryCritique({
|
|
570199
|
+
evidence: `Recent tools show errors (${failures.length}): ${failList}.`,
|
|
570200
|
+
hypothesis: "The main loop is prematurely compressing intent into success language before the verifier produced evidence.",
|
|
570201
|
+
correctiveAction: "Inspect the failed output, identify the implicated path/symbol/command, and run one focused corrective step before claiming success.",
|
|
570202
|
+
alternatives: [
|
|
570203
|
+
"Read the smallest relevant source region around the failed symbol.",
|
|
570204
|
+
"Patch the implicated code or configuration.",
|
|
570205
|
+
"Run the same verifier only after a state-changing fix."
|
|
570206
|
+
]
|
|
570207
|
+
}));
|
|
570074
570208
|
}
|
|
570075
570209
|
}
|
|
570076
570210
|
}
|
|
570077
570211
|
}
|
|
570078
|
-
|
|
570079
|
-
|
|
570080
|
-
const
|
|
570081
|
-
|
|
570082
|
-
|
|
570083
|
-
|
|
570084
|
-
|
|
570085
|
-
args =
|
|
570086
|
-
|
|
570087
|
-
|
|
570088
|
-
|
|
570089
|
-
|
|
570090
|
-
|
|
570091
|
-
|
|
570092
|
-
this.
|
|
570093
|
-
|
|
570094
|
-
|
|
570095
|
-
|
|
570212
|
+
if (this.options.disableAdversaryCritic !== true) {
|
|
570213
|
+
const newestAssistant = [...recent].reverse().find((m2) => m2.role === "assistant");
|
|
570214
|
+
const lastToolCalls = newestAssistant?.tool_calls ?? [];
|
|
570215
|
+
for (const tc of lastToolCalls) {
|
|
570216
|
+
const name10 = tc.function.name;
|
|
570217
|
+
if (this._isStatefulBrowserTool(name10))
|
|
570218
|
+
continue;
|
|
570219
|
+
let args = {};
|
|
570220
|
+
try {
|
|
570221
|
+
args = JSON.parse(tc.function.arguments);
|
|
570222
|
+
} catch {
|
|
570223
|
+
}
|
|
570224
|
+
const argsKey = this._buildExactArgsKey(args);
|
|
570225
|
+
const fingerprint = this._buildToolFingerprint(name10, args);
|
|
570226
|
+
const prior = this._adversaryToolOutcomes.find((o2) => o2.succeeded && o2.tool === name10 && o2.fingerprint === fingerprint && o2.turn < turn);
|
|
570227
|
+
if (prior) {
|
|
570228
|
+
this._adversaryRedundantSignals.add(fingerprint);
|
|
570229
|
+
emitReaction("redundant_action", `Already ran ${name10} successfully on turn ${prior.turn}`, 0.8, prior.preview);
|
|
570230
|
+
if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
|
|
570231
|
+
this.pendingUserMessages.push(buildAdversaryCritique({
|
|
570232
|
+
evidence: `${name10} already succeeded on turn ${prior.turn} with exact arguments (${argsKey.slice(0, 120)}). Prior preview: ${prior.preview}`,
|
|
570233
|
+
hypothesis: "The main loop may have lost track of previously observed evidence because of context pressure, path confusion, or repeated discovery.",
|
|
570234
|
+
correctiveAction: "Let this duplicate run execute if needed, but treat the prior result as evidence and pivot afterward unless state has changed.",
|
|
570235
|
+
alternatives: [
|
|
570236
|
+
"Use the prior result to edit/write, verify, or finish with evidence.",
|
|
570237
|
+
"Read a different specific file or selector if the current evidence is insufficient.",
|
|
570238
|
+
"Repeat exact arguments only when filesystem, browser, or page state changed."
|
|
570239
|
+
]
|
|
570240
|
+
}));
|
|
570241
|
+
}
|
|
570242
|
+
this.emit({
|
|
570243
|
+
type: "status",
|
|
570244
|
+
content: `\x1B[38;5;178m⚠ Adversary noted redundant ${name10} call (succeeded on turn ${prior.turn}); action remains allowed\x1B[0m`,
|
|
570245
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
570246
|
+
});
|
|
570247
|
+
break;
|
|
570096
570248
|
}
|
|
570097
|
-
this.emit({
|
|
570098
|
-
type: "status",
|
|
570099
|
-
content: `\x1B[38;5;178m⚠ Prevented redundant ${name10} call (succeeded on turn ${prior.turn})\x1B[0m`,
|
|
570100
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
570101
|
-
});
|
|
570102
|
-
break;
|
|
570103
570249
|
}
|
|
570104
570250
|
}
|
|
570105
570251
|
{
|
|
570106
|
-
const recentCalls = this.
|
|
570252
|
+
const recentCalls = this._adversaryToolOutcomes.slice(-5);
|
|
570107
570253
|
if (recentCalls.length >= 3) {
|
|
570108
570254
|
let consecutiveShortResults = 0;
|
|
570109
570255
|
for (let i2 = recentCalls.length - 1; i2 >= 0; i2--) {
|
|
@@ -570116,30 +570262,39 @@ ${trimmedNew}`;
|
|
|
570116
570262
|
}
|
|
570117
570263
|
if (consecutiveShortResults >= 3) {
|
|
570118
570264
|
emitReaction("idle_think", `Consecutive output without input: ${consecutiveShortResults}`, 0.7);
|
|
570119
|
-
if (this.
|
|
570120
|
-
this.pendingUserMessages.push(
|
|
570265
|
+
if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
|
|
570266
|
+
this.pendingUserMessages.push(buildAdversaryCritique({
|
|
570267
|
+
evidence: `${consecutiveShortResults} consecutive output-like calls occurred without an input-like observation.`,
|
|
570268
|
+
hypothesis: "The loop may be acting from stale state instead of re-observing the environment.",
|
|
570269
|
+
correctiveAction: "Take one input/observation step before another output step.",
|
|
570270
|
+
alternatives: [
|
|
570271
|
+
"Call the input/listen/poll tool for the current environment.",
|
|
570272
|
+
"Read the current UI/page state before clicking or typing again.",
|
|
570273
|
+
"If the task is already complete, finish with the concrete evidence already observed."
|
|
570274
|
+
]
|
|
570275
|
+
}));
|
|
570121
570276
|
}
|
|
570122
570277
|
this.emit({
|
|
570123
570278
|
type: "status",
|
|
570124
|
-
content: `\x1B[38;5;178m⚠
|
|
570279
|
+
content: `\x1B[38;5;178m⚠ Adversary flagged runaway-output risk (${consecutiveShortResults} consecutive sends without receive); action remains allowed\x1B[0m`,
|
|
570125
570280
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
570126
570281
|
});
|
|
570127
570282
|
}
|
|
570128
570283
|
}
|
|
570129
570284
|
}
|
|
570130
|
-
const succCount = this.
|
|
570131
|
-
const failCount = this.
|
|
570132
|
-
const lastFour = this.
|
|
570285
|
+
const succCount = this._adversaryToolOutcomes.filter((o2) => o2.succeeded).length;
|
|
570286
|
+
const failCount = this._adversaryToolOutcomes.filter((o2) => !o2.succeeded).length;
|
|
570287
|
+
const lastFour = this._adversaryToolOutcomes.slice(-4);
|
|
570133
570288
|
const details = [
|
|
570134
570289
|
`Recent tool outcomes:`,
|
|
570135
570290
|
...lastFour.map((o2) => `- ${o2.tool}: ${o2.succeeded ? "OK" : "ERR"} — ${o2.preview}`)
|
|
570136
570291
|
].join("\n");
|
|
570137
570292
|
this.emit({
|
|
570138
|
-
type: "
|
|
570293
|
+
type: "debug_adversary",
|
|
570139
570294
|
turn,
|
|
570140
570295
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
570141
|
-
content: `
|
|
570142
|
-
|
|
570296
|
+
content: `Adversary: ${this._adversaryToolOutcomes.length} tracked outcomes (${succCount} ok, ${failCount} err)`,
|
|
570297
|
+
adversaryAction: {
|
|
570143
570298
|
detection: "none",
|
|
570144
570299
|
recentSuccesses: succCount,
|
|
570145
570300
|
recentFailures: failCount,
|
|
@@ -571164,6 +571319,7 @@ ${transcript}`
|
|
|
571164
571319
|
}
|
|
571165
571320
|
}
|
|
571166
571321
|
const getDesc = (tool) => dynamicDescs.get(tool.name) ?? tool.description;
|
|
571322
|
+
const aliasText = (tool) => Array.isArray(tool.aliases) && tool.aliases.length > 0 ? ` aliases:${tool.aliases.join(",")}` : "";
|
|
571167
571323
|
const getCustomToolMetadata = (tool) => {
|
|
571168
571324
|
const meta = tool.customToolMetadata;
|
|
571169
571325
|
return meta?.isCustomTool === true ? meta : void 0;
|
|
@@ -571200,7 +571356,7 @@ Example: ${tool.name}(${JSON.stringify(meta.examples[0].args ?? {})})` : "";
|
|
|
571200
571356
|
};
|
|
571201
571357
|
const getIndexLabel = (tool) => {
|
|
571202
571358
|
const meta = getCustomToolMetadata(tool);
|
|
571203
|
-
const desc = `${getDesc(tool)} ${customToolSearchText(tool)}`.toLowerCase().replace(/[`"'()[\]{}:;,.!?/\\|-]+/g, " ");
|
|
571359
|
+
const desc = `${getDesc(tool)} ${aliasText(tool)} ${customToolSearchText(tool)}`.toLowerCase().replace(/[`"'()[\]{}:;,.!?/\\|-]+/g, " ");
|
|
571204
571360
|
const keywords2 = Array.from(new Set(desc.split(/\s+/).filter((word2) => word2.length > 2 && !STOPWORDS3.has(word2) && !tool.name.toLowerCase().includes(word2)))).slice(0, 4);
|
|
571205
571361
|
const base3 = keywords2.length > 0 ? `${tool.name}(${keywords2.join(",")})` : tool.name;
|
|
571206
571362
|
if (!meta)
|
|
@@ -571234,7 +571390,7 @@ Example: ${tool.name}(${JSON.stringify(meta.examples[0].args ?? {})})` : "";
|
|
|
571234
571390
|
if (CORE_TOOLS3.has(tool.name))
|
|
571235
571391
|
continue;
|
|
571236
571392
|
const customMeta = getCustomToolMetadata(tool);
|
|
571237
|
-
const toolText = `${tool.name} ${getDesc(tool)} ${customToolSearchText(tool)}`.toLowerCase();
|
|
571393
|
+
const toolText = `${tool.name} ${aliasText(tool)} ${getDesc(tool)} ${customToolSearchText(tool)}`.toLowerCase();
|
|
571238
571394
|
const toolWords = toolText.split(/\s+/).filter((w) => w.length > 2);
|
|
571239
571395
|
let score = 0;
|
|
571240
571396
|
for (const tw of toolWords) {
|
|
@@ -571245,7 +571401,7 @@ Example: ${tool.name}(${JSON.stringify(meta.examples[0].args ?? {})})` : "";
|
|
|
571245
571401
|
score += 1;
|
|
571246
571402
|
}
|
|
571247
571403
|
}
|
|
571248
|
-
if (taskText.includes(tool.name.replace(/_/g, " ")) || taskText.includes(tool.name)) {
|
|
571404
|
+
if (taskText.includes(tool.name.replace(/_/g, " ")) || taskText.includes(tool.name) || (tool.aliases ?? []).some((alias) => taskText.includes(alias.toLowerCase()))) {
|
|
571249
571405
|
score += customMeta ? 16 : 10;
|
|
571250
571406
|
}
|
|
571251
571407
|
if (wants3dModelGeneration) {
|
|
@@ -571398,6 +571554,9 @@ ${catalog}`,
|
|
|
571398
571554
|
continue;
|
|
571399
571555
|
lines.push("");
|
|
571400
571556
|
lines.push(`## ${tool.name}`);
|
|
571557
|
+
if (tool.aliases?.length) {
|
|
571558
|
+
lines.push(`Aliases: ${tool.aliases.join(", ")}`);
|
|
571559
|
+
}
|
|
571401
571560
|
lines.push(`${getDesc(tool)}${customToolDetails(tool)}`);
|
|
571402
571561
|
lines.push(`Parameters: ${JSON.stringify(tool.parameters)}`);
|
|
571403
571562
|
}
|
|
@@ -571410,7 +571569,7 @@ ${catalog}`,
|
|
|
571410
571569
|
}
|
|
571411
571570
|
return { success: true, output: lines.join("\n") };
|
|
571412
571571
|
}
|
|
571413
|
-
const matches = deferred.filter((t2) => t2.name.toLowerCase().includes(query) || getDesc(t2).toLowerCase().includes(query) || customToolSearchText(t2).toLowerCase().includes(query)).sort((a2, b) => {
|
|
571572
|
+
const matches = deferred.filter((t2) => t2.name.toLowerCase().includes(query) || (t2.aliases ?? []).some((alias) => alias.toLowerCase().includes(query)) || getDesc(t2).toLowerCase().includes(query) || customToolSearchText(t2).toLowerCase().includes(query)).sort((a2, b) => {
|
|
571414
571573
|
const scoreTool = (tool) => {
|
|
571415
571574
|
const meta = getCustomToolMetadata(tool);
|
|
571416
571575
|
let score = 0;
|
|
@@ -571418,6 +571577,10 @@ ${catalog}`,
|
|
|
571418
571577
|
score += 30;
|
|
571419
571578
|
if (tool.name.toLowerCase().includes(query))
|
|
571420
571579
|
score += 10;
|
|
571580
|
+
if ((tool.aliases ?? []).some((alias) => alias.toLowerCase() === query))
|
|
571581
|
+
score += 24;
|
|
571582
|
+
if ((tool.aliases ?? []).some((alias) => alias.toLowerCase().includes(query)))
|
|
571583
|
+
score += 8;
|
|
571421
571584
|
if (getDesc(tool).toLowerCase().includes(query))
|
|
571422
571585
|
score += 4;
|
|
571423
571586
|
if (customToolSearchText(tool).toLowerCase().includes(query))
|
|
@@ -571448,7 +571611,9 @@ ${catalog}`,
|
|
|
571448
571611
|
activatedToolsRef.add(t2.name);
|
|
571449
571612
|
const result = matches.map((t2) => {
|
|
571450
571613
|
const paramsStr = JSON.stringify(t2.parameters, null, 2);
|
|
571451
|
-
|
|
571614
|
+
const aliases = t2.aliases?.length ? `
|
|
571615
|
+
Aliases: ${t2.aliases.join(", ")}` : "";
|
|
571616
|
+
return `## ${t2.name}${aliases}
|
|
571452
571617
|
${getDesc(t2)}${customToolDetails(t2)}
|
|
571453
571618
|
|
|
571454
571619
|
Parameters:
|
|
@@ -651081,7 +651246,7 @@ ${conversationStream}`
|
|
|
651081
651246
|
// off default rather than the global config's value.
|
|
651082
651247
|
thinking: false,
|
|
651083
651248
|
// Telegram sub-agent runs must be bounded. Brute-force re-engagement and
|
|
651084
|
-
// the
|
|
651249
|
+
// the Adversary near-cap turn extension are appropriate for the full TUI
|
|
651085
651250
|
// session but cause Telegram to silently outgrow its nominal maxTurns,
|
|
651086
651251
|
// which is how the Snow Crash PDF loop reached 60+ turns of self-talk.
|
|
651087
651252
|
...TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS
|
|
@@ -681052,15 +681217,31 @@ function adaptTool6(tool) {
|
|
|
681052
681217
|
}
|
|
681053
681218
|
return {
|
|
681054
681219
|
name: tool.name,
|
|
681220
|
+
aliases: tool.aliases,
|
|
681055
681221
|
description: tool.description,
|
|
681056
681222
|
parameters: tool.parameters,
|
|
681223
|
+
inputSchema: tool.inputSchema,
|
|
681224
|
+
prompt: tool.prompt,
|
|
681225
|
+
executeStream: tool.executeStream,
|
|
681226
|
+
validateInput: tool.validateInput,
|
|
681227
|
+
isConcurrencySafe: tool.isConcurrencySafe,
|
|
681228
|
+
isReadOnly: tool.isReadOnly,
|
|
681229
|
+
maxResultSizeChars: tool.maxResultSizeChars,
|
|
681057
681230
|
async execute(args) {
|
|
681058
681231
|
const result = await tool.execute(args);
|
|
681059
681232
|
return {
|
|
681060
681233
|
success: result.success,
|
|
681061
681234
|
output: result.output,
|
|
681062
681235
|
error: result.error,
|
|
681063
|
-
llmContent: result.llmContent
|
|
681236
|
+
llmContent: result.llmContent,
|
|
681237
|
+
mutated: result.mutated,
|
|
681238
|
+
mutatedFiles: result.mutatedFiles,
|
|
681239
|
+
diff: result.diff,
|
|
681240
|
+
dryRun: result.dryRun,
|
|
681241
|
+
noop: result.noop,
|
|
681242
|
+
partial: result.partial,
|
|
681243
|
+
beforeHash: result.beforeHash,
|
|
681244
|
+
afterHash: result.afterHash
|
|
681064
681245
|
};
|
|
681065
681246
|
}
|
|
681066
681247
|
};
|
|
@@ -683444,8 +683625,8 @@ ${entry.fullContent}`
|
|
|
683444
683625
|
let streamTextBuffer = "";
|
|
683445
683626
|
let lastAssistantText = "";
|
|
683446
683627
|
let lastProvenancePath = null;
|
|
683447
|
-
let
|
|
683448
|
-
const
|
|
683628
|
+
let showAdversary = false;
|
|
683629
|
+
const adversaryBuffer = [];
|
|
683449
683630
|
const contentWrite = (fn) => {
|
|
683450
683631
|
if (isNeovimActive()) {
|
|
683451
683632
|
const origWrite = process.stdout.write;
|
|
@@ -683929,24 +684110,24 @@ ${entry.fullContent}`
|
|
|
683929
684110
|
if (snap) {
|
|
683930
684111
|
contentWrite(
|
|
683931
684112
|
() => renderInfo(
|
|
683932
|
-
`\x1B[38;5;243m[ctx] ${snap.messageCount} msgs | ~${snap.estimatedTokens} tok | headroom: ${snap.headroom} | tools: ${snap.toolCallCount} |
|
|
684113
|
+
`\x1B[38;5;243m[ctx] ${snap.messageCount} msgs | ~${snap.estimatedTokens} tok | headroom: ${snap.headroom} | tools: ${snap.toolCallCount} | adversary: ${snap.adversaryOutcomes} tracked\x1B[0m`
|
|
683933
684114
|
)
|
|
683934
684115
|
);
|
|
683935
684116
|
}
|
|
683936
684117
|
}
|
|
683937
684118
|
break;
|
|
683938
|
-
case "
|
|
683939
|
-
if (event.
|
|
683940
|
-
const lm = event.
|
|
684119
|
+
case "debug_adversary":
|
|
684120
|
+
if (event.adversaryAction) {
|
|
684121
|
+
const lm = event.adversaryAction;
|
|
683941
684122
|
if (lm.intervention) {
|
|
683942
684123
|
const simple = `⚠ ${lm.intervention}`;
|
|
683943
684124
|
contentWrite(() => renderInfo(simple));
|
|
683944
684125
|
}
|
|
683945
684126
|
if (lm.details) {
|
|
683946
|
-
|
|
683947
|
-
if (
|
|
683948
|
-
|
|
683949
|
-
if (
|
|
684127
|
+
adversaryBuffer.push(lm.details);
|
|
684128
|
+
if (adversaryBuffer.length > 50)
|
|
684129
|
+
adversaryBuffer.splice(0, adversaryBuffer.length - 50);
|
|
684130
|
+
if (showAdversary) {
|
|
683950
684131
|
const det = String(lm.details);
|
|
683951
684132
|
contentWrite(() => {
|
|
683952
684133
|
process.stdout.write(c3.dim(det) + "\n");
|
|
@@ -685688,8 +685869,8 @@ This is an independent background session started from /background.`
|
|
|
685688
685869
|
origTtyWriteRef = null;
|
|
685689
685870
|
statusBar.setNeovimFocusChecker(() => isNeovimFocused());
|
|
685690
685871
|
let _escapeHandler = null;
|
|
685691
|
-
let
|
|
685692
|
-
const
|
|
685872
|
+
let showAdversary = false;
|
|
685873
|
+
const adversaryBuffer = [];
|
|
685693
685874
|
statusBar.hookDirectInput(
|
|
685694
685875
|
rl,
|
|
685695
685876
|
() => {
|
|
@@ -685722,26 +685903,26 @@ This is an independent background session started from /background.`
|
|
|
685722
685903
|
}
|
|
685723
685904
|
},
|
|
685724
685905
|
() => {
|
|
685725
|
-
|
|
685906
|
+
showAdversary = !showAdversary;
|
|
685726
685907
|
if (statusBar.isActive) {
|
|
685727
685908
|
try {
|
|
685728
685909
|
statusBar.jumpToLive();
|
|
685729
685910
|
} catch {
|
|
685730
685911
|
}
|
|
685731
685912
|
statusBar.beginContentWrite();
|
|
685732
|
-
if (
|
|
685733
|
-
renderInfo("
|
|
685734
|
-
const dump =
|
|
685913
|
+
if (showAdversary) {
|
|
685914
|
+
renderInfo("Adversary details: shown");
|
|
685915
|
+
const dump = adversaryBuffer.slice(-10).join("\n");
|
|
685735
685916
|
if (dump.trim()) {
|
|
685736
685917
|
process.stdout.write(`
|
|
685737
|
-
${c3.dim("[
|
|
685918
|
+
${c3.dim("[adversary recap]")}
|
|
685738
685919
|
`);
|
|
685739
685920
|
for (const line of dump.split("\n")) {
|
|
685740
685921
|
process.stdout.write(" " + c3.dim(line) + "\n");
|
|
685741
685922
|
}
|
|
685742
685923
|
}
|
|
685743
685924
|
} else {
|
|
685744
|
-
renderInfo("
|
|
685925
|
+
renderInfo("Adversary details: hidden");
|
|
685745
685926
|
}
|
|
685746
685927
|
statusBar.endContentWrite();
|
|
685747
685928
|
}
|