agenr 0.13.2 → 0.13.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/dist/cli-main.js +285 -5
- package/dist/modules/surgeon/adapters/prompts/system.md +5 -3
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.13.4] - 2026-03-23
|
|
4
|
+
|
|
5
|
+
### Surgeon
|
|
6
|
+
|
|
7
|
+
- **Tightened completion gating thresholds.** Final completion now requires 75% budget usage (was 25%). Phase completion requires 75% (was 50%). Safety valve raised to 5 rejections (was 3). Continuation attempts raised to 5 (was 3). These changes force the surgeon to work through substantially more of the corpus before accepting completion.
|
|
8
|
+
|
|
9
|
+
## [0.13.3] - 2026-03-23
|
|
10
|
+
|
|
11
|
+
### Surgeon
|
|
12
|
+
|
|
13
|
+
- **`complete_pass` gating rejects premature completion.** The tool now validates budget utilization and candidate coverage before accepting completion. Final completion rejected if <25% budget used. Dedup phase rejected if <50% of clusters processed with budget remaining. Retirement phase rejected if <40 candidates evaluated with budget remaining. Safety valve accepts after 3 rejections per phase. Rejection messages tell the surgeon exactly what to do next.
|
|
14
|
+
- **System prompt tightened.** Budget Awareness section now explicitly states that `complete_pass` will reject premature attempts, and that efficiency means spending budget on the right candidates, not spending less budget overall.
|
|
15
|
+
|
|
3
16
|
## [0.13.2] - 2026-03-23
|
|
4
17
|
|
|
5
18
|
### Surgeon
|
package/dist/cli-main.js
CHANGED
|
@@ -22929,6 +22929,53 @@ async function updateEntryFieldsById(db, entryId, fields) {
|
|
|
22929
22929
|
};
|
|
22930
22930
|
}
|
|
22931
22931
|
|
|
22932
|
+
// src/modules/surgeon/application/completion-guard.ts
|
|
22933
|
+
function createEmptyProgress() {
|
|
22934
|
+
return {
|
|
22935
|
+
queryCalls: 0,
|
|
22936
|
+
maxWindowEnd: 0,
|
|
22937
|
+
totalCount: null,
|
|
22938
|
+
sawExhaustedPage: false
|
|
22939
|
+
};
|
|
22940
|
+
}
|
|
22941
|
+
function createPaginatedQueryTracker() {
|
|
22942
|
+
let progress = createEmptyProgress();
|
|
22943
|
+
return {
|
|
22944
|
+
reset() {
|
|
22945
|
+
progress = createEmptyProgress();
|
|
22946
|
+
},
|
|
22947
|
+
recordPage(input) {
|
|
22948
|
+
const offset = Number.isFinite(input.offset) ? Math.max(0, Math.floor(input.offset)) : 0;
|
|
22949
|
+
const returnedCount = Number.isFinite(input.returnedCount) ? Math.max(0, Math.floor(input.returnedCount)) : 0;
|
|
22950
|
+
const totalCount = Number.isFinite(input.totalCount) ? Math.max(0, Math.floor(input.totalCount)) : null;
|
|
22951
|
+
progress = {
|
|
22952
|
+
queryCalls: progress.queryCalls + 1,
|
|
22953
|
+
maxWindowEnd: Math.max(progress.maxWindowEnd, offset + returnedCount),
|
|
22954
|
+
totalCount: totalCount ?? progress.totalCount,
|
|
22955
|
+
sawExhaustedPage: progress.sawExhaustedPage || input.exhausted
|
|
22956
|
+
};
|
|
22957
|
+
},
|
|
22958
|
+
snapshot() {
|
|
22959
|
+
return { ...progress };
|
|
22960
|
+
}
|
|
22961
|
+
};
|
|
22962
|
+
}
|
|
22963
|
+
function createSurgeonCompletionGuardState(input) {
|
|
22964
|
+
return {
|
|
22965
|
+
rejectionCounts: /* @__PURE__ */ new Map(),
|
|
22966
|
+
initialHealth: {
|
|
22967
|
+
totalEntries: Math.max(0, Math.floor(input.totalEntries)),
|
|
22968
|
+
retirementCandidates: Math.max(0, Math.floor(input.retirementCandidates)),
|
|
22969
|
+
dedupClusters: Number.isFinite(input.dedupClusters) ? Math.max(0, Math.floor(input.dedupClusters)) : void 0,
|
|
22970
|
+
pendingConflicts: Number.isFinite(input.pendingConflicts) ? Math.max(0, Math.floor(input.pendingConflicts)) : void 0
|
|
22971
|
+
},
|
|
22972
|
+
retirement: createPaginatedQueryTracker(),
|
|
22973
|
+
dedup: createPaginatedQueryTracker(),
|
|
22974
|
+
pendingConflicts: createPaginatedQueryTracker(),
|
|
22975
|
+
contradictionScan: createPaginatedQueryTracker()
|
|
22976
|
+
};
|
|
22977
|
+
}
|
|
22978
|
+
|
|
22932
22979
|
// src/modules/surgeon/adapters/prompts/index.ts
|
|
22933
22980
|
import fs21 from "fs/promises";
|
|
22934
22981
|
import path24 from "path";
|
|
@@ -23149,6 +23196,72 @@ var COMPLETE_PASS_SCHEMA = Type2.Object({
|
|
|
23149
23196
|
observations: Type2.Array(Type2.String()),
|
|
23150
23197
|
recommendations: Type2.Array(Type2.String())
|
|
23151
23198
|
});
|
|
23199
|
+
var FINAL_COMPLETION_MIN_BUDGET_USED_FRACTION = 0.75;
|
|
23200
|
+
var PHASE_COMPLETION_MIN_BUDGET_USED_FRACTION = 0.75;
|
|
23201
|
+
var SAFETY_VALVE_REJECTION_LIMIT = 5;
|
|
23202
|
+
var LARGE_CORPUS_PROACTIVE_SCAN_THRESHOLD = 200;
|
|
23203
|
+
function isCompletionPhase(value) {
|
|
23204
|
+
return value === "contradictions" || value === "dedup" || value === "retirement";
|
|
23205
|
+
}
|
|
23206
|
+
function normalizeCompletionKey(passType, currentPass) {
|
|
23207
|
+
const normalizedPassType = passType?.trim();
|
|
23208
|
+
if (normalizedPassType) {
|
|
23209
|
+
return normalizedPassType;
|
|
23210
|
+
}
|
|
23211
|
+
return currentPass;
|
|
23212
|
+
}
|
|
23213
|
+
function isAutoPhaseTransition(currentPass, passType) {
|
|
23214
|
+
return currentPass === "auto" && isCompletionPhase(passType);
|
|
23215
|
+
}
|
|
23216
|
+
function calculateBudgetUsedPct(deps) {
|
|
23217
|
+
if (!deps.budgetTracker) {
|
|
23218
|
+
return null;
|
|
23219
|
+
}
|
|
23220
|
+
const remaining = deps.budgetTracker.remaining();
|
|
23221
|
+
const tokenBudget = Number.isFinite(deps.tokenBudget) ? Math.max(0, deps.tokenBudget ?? 0) : 0;
|
|
23222
|
+
const costCap = Number.isFinite(deps.costCap) ? Math.max(0, deps.costCap ?? 0) : 0;
|
|
23223
|
+
const tokenUsedPct = tokenBudget > 0 ? 1 - remaining.tokens / tokenBudget : 1;
|
|
23224
|
+
const costUsedPct = costCap > 0 ? 1 - remaining.costUsd / costCap : 1;
|
|
23225
|
+
return {
|
|
23226
|
+
budgetUsedPct: Math.max(0, Math.min(1, Math.max(tokenUsedPct, costUsedPct))),
|
|
23227
|
+
remainingTokens: remaining.tokens,
|
|
23228
|
+
remainingCostUsd: remaining.costUsd
|
|
23229
|
+
};
|
|
23230
|
+
}
|
|
23231
|
+
function formatBudgetUsedPct(value) {
|
|
23232
|
+
return Math.round(value * 100);
|
|
23233
|
+
}
|
|
23234
|
+
function rejectCompletionAttempt(deps, rejectionKey, priorRejections, summary, details, message) {
|
|
23235
|
+
deps.completionGuards?.rejectionCounts.set(rejectionKey, priorRejections + 1);
|
|
23236
|
+
return toolResult(
|
|
23237
|
+
{
|
|
23238
|
+
completed: false,
|
|
23239
|
+
rejected: true,
|
|
23240
|
+
rejectionCount: priorRejections + 1,
|
|
23241
|
+
summary,
|
|
23242
|
+
...details
|
|
23243
|
+
},
|
|
23244
|
+
message
|
|
23245
|
+
);
|
|
23246
|
+
}
|
|
23247
|
+
function describeRetirementProgress(progress, knownCandidates) {
|
|
23248
|
+
if (progress.queryCalls === 0) {
|
|
23249
|
+
return knownCandidates > 0 ? `about ${knownCandidates} retirement candidates were available before the pass started, but query_candidates has not been called yet` : "query_candidates has not been called yet";
|
|
23250
|
+
}
|
|
23251
|
+
if (knownCandidates > 0) {
|
|
23252
|
+
return `only ${progress.maxWindowEnd} of about ${knownCandidates} retirement candidates have been paged so far`;
|
|
23253
|
+
}
|
|
23254
|
+
return `only ${progress.maxWindowEnd} retirement candidates have been paged so far and query_candidates has not been exhausted`;
|
|
23255
|
+
}
|
|
23256
|
+
function describeDedupProgress(progress, totalClusters) {
|
|
23257
|
+
if (progress.queryCalls === 0) {
|
|
23258
|
+
return totalClusters > 0 ? `${totalClusters} dedup clusters were cached for this run, but query_dedup_clusters has not been called yet` : "query_dedup_clusters has not been called yet";
|
|
23259
|
+
}
|
|
23260
|
+
if (totalClusters > 0) {
|
|
23261
|
+
return `only ${progress.maxWindowEnd} of ${totalClusters} dedup clusters have been paged so far`;
|
|
23262
|
+
}
|
|
23263
|
+
return `only ${progress.maxWindowEnd} dedup clusters have been paged so far and query_dedup_clusters has not been exhausted`;
|
|
23264
|
+
}
|
|
23152
23265
|
function createCompletePassTool(deps) {
|
|
23153
23266
|
return {
|
|
23154
23267
|
name: "complete_pass",
|
|
@@ -23178,12 +23291,111 @@ function createCompletePassTool(deps) {
|
|
|
23178
23291
|
recommendations: params.recommendations
|
|
23179
23292
|
};
|
|
23180
23293
|
const passType = params.pass_type?.trim();
|
|
23294
|
+
const rejectionKey = normalizeCompletionKey(passType, deps.pass);
|
|
23295
|
+
const priorRejections = deps.completionGuards?.rejectionCounts.get(rejectionKey) ?? 0;
|
|
23296
|
+
const budgetUsage = calculateBudgetUsedPct(deps);
|
|
23297
|
+
const budgetUsedPct = budgetUsage?.budgetUsedPct ?? 1;
|
|
23298
|
+
const budgetUsedLabel = formatBudgetUsedPct(budgetUsedPct);
|
|
23299
|
+
const handledCount = Math.max(0, params.actions_taken + params.entries_skipped.length);
|
|
23300
|
+
if (priorRejections < SAFETY_VALVE_REJECTION_LIMIT && budgetUsage && deps.completionGuards) {
|
|
23301
|
+
const isPhaseTransition = isAutoPhaseTransition(deps.pass, passType);
|
|
23302
|
+
const guardedPhase = isPhaseTransition ? passType : isCompletionPhase(deps.pass) ? deps.pass : null;
|
|
23303
|
+
if (guardedPhase === "retirement" && budgetUsedPct < PHASE_COMPLETION_MIN_BUDGET_USED_FRACTION) {
|
|
23304
|
+
const progress = deps.completionGuards.retirement.snapshot();
|
|
23305
|
+
const knownCandidates = deps.completionGuards.initialHealth.retirementCandidates;
|
|
23306
|
+
const hasKnownRetirementWork = knownCandidates > 0 || progress.queryCalls > 0;
|
|
23307
|
+
const shouldReject = hasKnownRetirementWork && !progress.sawExhaustedPage && (progress.queryCalls === 0 && knownCandidates > handledCount || progress.queryCalls > 0 && (knownCandidates === 0 || progress.maxWindowEnd < knownCandidates));
|
|
23308
|
+
if (shouldReject) {
|
|
23309
|
+
return rejectCompletionAttempt(
|
|
23310
|
+
deps,
|
|
23311
|
+
rejectionKey,
|
|
23312
|
+
priorRejections,
|
|
23313
|
+
summary,
|
|
23314
|
+
{
|
|
23315
|
+
phase: "retirement",
|
|
23316
|
+
budgetUsedPct: budgetUsedLabel,
|
|
23317
|
+
pagedCandidates: progress.maxWindowEnd,
|
|
23318
|
+
knownCandidates: knownCandidates || null,
|
|
23319
|
+
remainingTokens: budgetUsage.remainingTokens,
|
|
23320
|
+
remainingCostUsd: budgetUsage.remainingCostUsd
|
|
23321
|
+
},
|
|
23322
|
+
`Retirement completion rejected: ${describeRetirementProgress(progress, knownCandidates)} with ${budgetUsedLabel}% of budget used. Continue calling query_candidates with a higher offset until it returns no more candidates or your budget is genuinely low.`
|
|
23323
|
+
);
|
|
23324
|
+
}
|
|
23325
|
+
}
|
|
23326
|
+
if (guardedPhase === "dedup" && budgetUsedPct < PHASE_COMPLETION_MIN_BUDGET_USED_FRACTION) {
|
|
23327
|
+
const progress = deps.completionGuards.dedup.snapshot();
|
|
23328
|
+
const totalClusters = progress.totalCount ?? deps.completionGuards.initialHealth.dedupClusters ?? 0;
|
|
23329
|
+
const halfClusters = totalClusters > 0 ? Math.ceil(totalClusters * 0.5) : 0;
|
|
23330
|
+
const hasKnownDedupWork = totalClusters > 0 || progress.queryCalls > 0;
|
|
23331
|
+
const shouldReject = hasKnownDedupWork && !progress.sawExhaustedPage && (progress.queryCalls === 0 && totalClusters > handledCount || progress.queryCalls > 0 && (totalClusters === 0 || progress.maxWindowEnd < halfClusters));
|
|
23332
|
+
if (shouldReject) {
|
|
23333
|
+
return rejectCompletionAttempt(
|
|
23334
|
+
deps,
|
|
23335
|
+
rejectionKey,
|
|
23336
|
+
priorRejections,
|
|
23337
|
+
summary,
|
|
23338
|
+
{
|
|
23339
|
+
phase: "dedup",
|
|
23340
|
+
budgetUsedPct: budgetUsedLabel,
|
|
23341
|
+
pagedClusters: progress.maxWindowEnd,
|
|
23342
|
+
totalClusters: totalClusters || null,
|
|
23343
|
+
remainingTokens: budgetUsage.remainingTokens,
|
|
23344
|
+
remainingCostUsd: budgetUsage.remainingCostUsd
|
|
23345
|
+
},
|
|
23346
|
+
`Dedup completion rejected: ${describeDedupProgress(progress, totalClusters)} with ${budgetUsedLabel}% of budget used. Continue paging query_dedup_clusters before completing the dedup phase.`
|
|
23347
|
+
);
|
|
23348
|
+
}
|
|
23349
|
+
}
|
|
23350
|
+
const isFinalAutoCompletion = deps.pass === "auto" && (!passType || passType === "auto");
|
|
23351
|
+
if (isFinalAutoCompletion && budgetUsedPct < FINAL_COMPLETION_MIN_BUDGET_USED_FRACTION) {
|
|
23352
|
+
const reasons = [];
|
|
23353
|
+
const pendingConflicts = deps.completionGuards.pendingConflicts.snapshot();
|
|
23354
|
+
const dedup = deps.completionGuards.dedup.snapshot();
|
|
23355
|
+
const retirement = deps.completionGuards.retirement.snapshot();
|
|
23356
|
+
const contradictionScan = deps.completionGuards.contradictionScan.snapshot();
|
|
23357
|
+
const initialPendingConflicts = deps.completionGuards.initialHealth.pendingConflicts ?? 0;
|
|
23358
|
+
const initialDedupClusters = deps.completionGuards.initialHealth.dedupClusters ?? 0;
|
|
23359
|
+
const initialRetirementCandidates = deps.completionGuards.initialHealth.retirementCandidates;
|
|
23360
|
+
if (initialPendingConflicts > 0 && !pendingConflicts.sawExhaustedPage && (pendingConflicts.queryCalls === 0 || pendingConflicts.maxWindowEnd < initialPendingConflicts)) {
|
|
23361
|
+
reasons.push(
|
|
23362
|
+
pendingConflicts.queryCalls === 0 ? `${initialPendingConflicts} pending conflicts were available and query_conflicts has not been paged` : `only ${pendingConflicts.maxWindowEnd} of ${initialPendingConflicts} pending conflicts have been paged`
|
|
23363
|
+
);
|
|
23364
|
+
}
|
|
23365
|
+
if (initialDedupClusters > 0 && !dedup.sawExhaustedPage && (dedup.queryCalls === 0 || dedup.maxWindowEnd < initialDedupClusters)) {
|
|
23366
|
+
reasons.push(describeDedupProgress(dedup, initialDedupClusters));
|
|
23367
|
+
}
|
|
23368
|
+
if (initialRetirementCandidates > 0 && !retirement.sawExhaustedPage && (retirement.queryCalls === 0 || retirement.maxWindowEnd < initialRetirementCandidates)) {
|
|
23369
|
+
reasons.push(describeRetirementProgress(retirement, initialRetirementCandidates));
|
|
23370
|
+
}
|
|
23371
|
+
if (deps.completionGuards.initialHealth.totalEntries >= LARGE_CORPUS_PROACTIVE_SCAN_THRESHOLD && contradictionScan.queryCalls === 0) {
|
|
23372
|
+
reasons.push("the proactive contradiction scan has not run yet");
|
|
23373
|
+
}
|
|
23374
|
+
if (reasons.length > 0) {
|
|
23375
|
+
return rejectCompletionAttempt(
|
|
23376
|
+
deps,
|
|
23377
|
+
rejectionKey,
|
|
23378
|
+
priorRejections,
|
|
23379
|
+
summary,
|
|
23380
|
+
{
|
|
23381
|
+
phase: "auto",
|
|
23382
|
+
budgetUsedPct: budgetUsedLabel,
|
|
23383
|
+
remainingTokens: budgetUsage.remainingTokens,
|
|
23384
|
+
remainingCostUsd: budgetUsage.remainingCostUsd,
|
|
23385
|
+
reasons
|
|
23386
|
+
},
|
|
23387
|
+
`Completion rejected: only ${budgetUsedLabel}% of budget used and the sweep still looks incomplete because ${reasons.join("; ")}. Continue paging candidates and only call complete_pass with pass_type="auto" when the remaining phases are genuinely exhausted or budget is low.`
|
|
23388
|
+
);
|
|
23389
|
+
}
|
|
23390
|
+
}
|
|
23391
|
+
}
|
|
23181
23392
|
if (passType && passType !== "auto" && deps.pass === "auto") {
|
|
23182
23393
|
deps.completionState.completePhase(passType, summary);
|
|
23183
23394
|
return toolResult(
|
|
23184
23395
|
{
|
|
23185
23396
|
completed: false,
|
|
23186
23397
|
phaseComplete: passType,
|
|
23398
|
+
safetyValveUsed: priorRejections >= SAFETY_VALVE_REJECTION_LIMIT,
|
|
23187
23399
|
summary
|
|
23188
23400
|
},
|
|
23189
23401
|
`${passType} phase complete. Continue with the next pass. Call complete_pass with pass_type="auto" when all passes are done.`
|
|
@@ -23193,6 +23405,7 @@ function createCompletePassTool(deps) {
|
|
|
23193
23405
|
return toolResult(
|
|
23194
23406
|
{
|
|
23195
23407
|
completed: true,
|
|
23408
|
+
safetyValveUsed: priorRejections >= SAFETY_VALVE_REJECTION_LIMIT,
|
|
23196
23409
|
summary
|
|
23197
23410
|
},
|
|
23198
23411
|
"Pass marked complete. Do not call more tools. Respond with a brief final acknowledgment."
|
|
@@ -23450,6 +23663,7 @@ function createQueryContradictionCandidatesTool(deps) {
|
|
|
23450
23663
|
async execute(_toolCallId, params) {
|
|
23451
23664
|
if (params.reset === true) {
|
|
23452
23665
|
cached = null;
|
|
23666
|
+
deps.completionGuards?.contradictionScan.reset();
|
|
23453
23667
|
}
|
|
23454
23668
|
const query = buildQuery(params, deps);
|
|
23455
23669
|
const offset = normalizeOffset2(params.offset);
|
|
@@ -23477,6 +23691,12 @@ function createQueryContradictionCandidatesTool(deps) {
|
|
|
23477
23691
|
}
|
|
23478
23692
|
const totalCount = cached?.pairs.length ?? 0;
|
|
23479
23693
|
if (offset >= totalCount) {
|
|
23694
|
+
deps.completionGuards?.contradictionScan.recordPage({
|
|
23695
|
+
offset,
|
|
23696
|
+
returnedCount: 0,
|
|
23697
|
+
totalCount,
|
|
23698
|
+
exhausted: true
|
|
23699
|
+
});
|
|
23480
23700
|
return toolResult({
|
|
23481
23701
|
pairs: [],
|
|
23482
23702
|
count: 0,
|
|
@@ -23488,6 +23708,12 @@ function createQueryContradictionCandidatesTool(deps) {
|
|
|
23488
23708
|
});
|
|
23489
23709
|
}
|
|
23490
23710
|
const pairs = (cached?.pairs ?? []).slice(offset, offset + limit);
|
|
23711
|
+
deps.completionGuards?.contradictionScan.recordPage({
|
|
23712
|
+
offset,
|
|
23713
|
+
returnedCount: pairs.length,
|
|
23714
|
+
totalCount,
|
|
23715
|
+
exhausted: offset + pairs.length >= totalCount
|
|
23716
|
+
});
|
|
23491
23717
|
return toolResult({
|
|
23492
23718
|
pairs,
|
|
23493
23719
|
count: pairs.length,
|
|
@@ -23627,6 +23853,12 @@ function createQueryConflictsTool(deps) {
|
|
|
23627
23853
|
now: deps.now()
|
|
23628
23854
|
});
|
|
23629
23855
|
if (offset >= conflicts.length) {
|
|
23856
|
+
deps.completionGuards?.pendingConflicts.recordPage({
|
|
23857
|
+
offset,
|
|
23858
|
+
returnedCount: 0,
|
|
23859
|
+
totalCount: conflicts.length,
|
|
23860
|
+
exhausted: true
|
|
23861
|
+
});
|
|
23630
23862
|
return toolResult({
|
|
23631
23863
|
conflicts: [],
|
|
23632
23864
|
count: 0,
|
|
@@ -23637,6 +23869,12 @@ function createQueryConflictsTool(deps) {
|
|
|
23637
23869
|
});
|
|
23638
23870
|
}
|
|
23639
23871
|
const page = conflicts.slice(offset, offset + limit).filter((conflict) => !deps.conflictCache?.consumedConflictIds.has(conflict.id)).map((conflict) => summarizeConflict(conflict));
|
|
23872
|
+
deps.completionGuards?.pendingConflicts.recordPage({
|
|
23873
|
+
offset,
|
|
23874
|
+
returnedCount: page.length,
|
|
23875
|
+
totalCount: conflicts.length,
|
|
23876
|
+
exhausted: offset + page.length >= conflicts.length
|
|
23877
|
+
});
|
|
23640
23878
|
return toolResult({
|
|
23641
23879
|
conflicts: page,
|
|
23642
23880
|
count: page.length,
|
|
@@ -24074,6 +24312,7 @@ function createQueryDedupClustersTool(deps) {
|
|
|
24074
24312
|
}
|
|
24075
24313
|
if (params.reset === true) {
|
|
24076
24314
|
resetDedupClusterCache(deps.clusterCache);
|
|
24315
|
+
deps.completionGuards?.dedup.reset();
|
|
24077
24316
|
}
|
|
24078
24317
|
const query = normalizeDedupClusterQuery(
|
|
24079
24318
|
{
|
|
@@ -24093,6 +24332,12 @@ function createQueryDedupClustersTool(deps) {
|
|
|
24093
24332
|
now: deps.now()
|
|
24094
24333
|
});
|
|
24095
24334
|
if (offset >= clusters.length) {
|
|
24335
|
+
deps.completionGuards?.dedup.recordPage({
|
|
24336
|
+
offset,
|
|
24337
|
+
returnedCount: 0,
|
|
24338
|
+
totalCount: clusters.length,
|
|
24339
|
+
exhausted: true
|
|
24340
|
+
});
|
|
24096
24341
|
return toolResult({
|
|
24097
24342
|
clusters: [],
|
|
24098
24343
|
count: 0,
|
|
@@ -24104,6 +24349,12 @@ function createQueryDedupClustersTool(deps) {
|
|
|
24104
24349
|
});
|
|
24105
24350
|
}
|
|
24106
24351
|
const page = clusters.slice(offset, offset + limit).map((cluster, index) => summarizeDedupCluster(cluster, offset + index, query.project));
|
|
24352
|
+
deps.completionGuards?.dedup.recordPage({
|
|
24353
|
+
offset,
|
|
24354
|
+
returnedCount: page.length,
|
|
24355
|
+
totalCount: clusters.length,
|
|
24356
|
+
exhausted: offset + page.length >= clusters.length
|
|
24357
|
+
});
|
|
24107
24358
|
return toolResult({
|
|
24108
24359
|
clusters: page,
|
|
24109
24360
|
count: page.length,
|
|
@@ -24269,6 +24520,18 @@ var QUERY_CANDIDATES_SCHEMA = Type12.Object({
|
|
|
24269
24520
|
limit: Type12.Optional(Type12.Integer({ minimum: 1, maximum: 100 })),
|
|
24270
24521
|
offset: Type12.Optional(Type12.Integer({ minimum: 0 }))
|
|
24271
24522
|
});
|
|
24523
|
+
function normalizeLimit5(value) {
|
|
24524
|
+
if (!Number.isFinite(value) || (value ?? 0) <= 0) {
|
|
24525
|
+
return 20;
|
|
24526
|
+
}
|
|
24527
|
+
return Math.floor(value);
|
|
24528
|
+
}
|
|
24529
|
+
function normalizeOffset5(value) {
|
|
24530
|
+
if (!Number.isFinite(value) || (value ?? 0) < 0) {
|
|
24531
|
+
return 0;
|
|
24532
|
+
}
|
|
24533
|
+
return Math.floor(value);
|
|
24534
|
+
}
|
|
24272
24535
|
function createQueryCandidatesTool(deps) {
|
|
24273
24536
|
return {
|
|
24274
24537
|
name: "query_candidates",
|
|
@@ -24276,6 +24539,8 @@ function createQueryCandidatesTool(deps) {
|
|
|
24276
24539
|
description: "List active entries that look stale enough to inspect for retirement.",
|
|
24277
24540
|
parameters: QUERY_CANDIDATES_SCHEMA,
|
|
24278
24541
|
async execute(_toolCallId, params) {
|
|
24542
|
+
const limit = normalizeLimit5(params.limit);
|
|
24543
|
+
const offset = normalizeOffset5(params.offset);
|
|
24279
24544
|
const candidates = await listRetirementCandidates(deps.db, {
|
|
24280
24545
|
project: params.project?.trim() || deps.project,
|
|
24281
24546
|
type: params.type?.trim() || void 0,
|
|
@@ -24289,6 +24554,11 @@ function createQueryCandidatesTool(deps) {
|
|
|
24289
24554
|
runId: deps.runId,
|
|
24290
24555
|
now: deps.now()
|
|
24291
24556
|
});
|
|
24557
|
+
deps.completionGuards?.retirement.recordPage({
|
|
24558
|
+
offset,
|
|
24559
|
+
returnedCount: candidates.length,
|
|
24560
|
+
exhausted: candidates.length < limit
|
|
24561
|
+
});
|
|
24292
24562
|
if (candidates.length === 0) {
|
|
24293
24563
|
return toolResult({
|
|
24294
24564
|
candidates: [],
|
|
@@ -24492,13 +24762,13 @@ var QUERY_SUPERSESSION_SCHEMA = Type15.Object({
|
|
|
24492
24762
|
limit: Type15.Optional(Type15.Integer({ minimum: 1, maximum: 50, default: 20 })),
|
|
24493
24763
|
offset: Type15.Optional(Type15.Integer({ minimum: 0 }))
|
|
24494
24764
|
});
|
|
24495
|
-
function
|
|
24765
|
+
function normalizeOffset6(value) {
|
|
24496
24766
|
if (!Number.isFinite(value) || (value ?? 0) < 0) {
|
|
24497
24767
|
return 0;
|
|
24498
24768
|
}
|
|
24499
24769
|
return Math.floor(value);
|
|
24500
24770
|
}
|
|
24501
|
-
function
|
|
24771
|
+
function normalizeLimit6(value) {
|
|
24502
24772
|
if (!Number.isFinite(value) || (value ?? 0) <= 0) {
|
|
24503
24773
|
return 20;
|
|
24504
24774
|
}
|
|
@@ -24522,8 +24792,8 @@ function createQuerySupersessionCandidatesTool(deps) {
|
|
|
24522
24792
|
},
|
|
24523
24793
|
deps.project
|
|
24524
24794
|
);
|
|
24525
|
-
const offset =
|
|
24526
|
-
const limit =
|
|
24795
|
+
const offset = normalizeOffset6(params.offset);
|
|
24796
|
+
const limit = normalizeLimit6(params.limit);
|
|
24527
24797
|
const { groups } = await loadEligibleSupersessionGroups(deps.db, {
|
|
24528
24798
|
cache: deps.supersessionCache,
|
|
24529
24799
|
query,
|
|
@@ -24946,7 +25216,7 @@ async function captureBrainHealthSnapshot(db) {
|
|
|
24946
25216
|
// src/modules/surgeon/application/workflow.ts
|
|
24947
25217
|
var USER_ABORT_ERROR = "Run aborted by user (SIGINT).";
|
|
24948
25218
|
var USER_ABORT_SUMMARY = "Run aborted by user.";
|
|
24949
|
-
var MAX_CONTINUATION_ATTEMPTS =
|
|
25219
|
+
var MAX_CONTINUATION_ATTEMPTS = 5;
|
|
24950
25220
|
var LOW_BUDGET_FRACTION = 0.1;
|
|
24951
25221
|
var SHALLOW_RUN_WARNING_BUDGET_USED_FRACTION = 0.5;
|
|
24952
25222
|
function resolveRunBudget(options, config) {
|
|
@@ -25463,6 +25733,12 @@ async function runSurgeon(options, deps) {
|
|
|
25463
25733
|
skipRecentlyEvaluatedDays: protection.contradictionSkipRecentlyEvaluatedDays,
|
|
25464
25734
|
now
|
|
25465
25735
|
})).length : void 0;
|
|
25736
|
+
const completionGuards = createSurgeonCompletionGuardState({
|
|
25737
|
+
totalEntries: initialStatus.health.total,
|
|
25738
|
+
retirementCandidates: initialStatus.health.forgetting.candidates,
|
|
25739
|
+
dedupClusters: initialDedupClusterCount ?? initialAutoDedupClusterCount,
|
|
25740
|
+
pendingConflicts: initialPendingConflictCount
|
|
25741
|
+
});
|
|
25466
25742
|
const tools = createToolRegistryFn({
|
|
25467
25743
|
db: deps.db,
|
|
25468
25744
|
config: deps.config,
|
|
@@ -25493,6 +25769,10 @@ async function runSurgeon(options, deps) {
|
|
|
25493
25769
|
await logSurgeonAction(deps.db, action);
|
|
25494
25770
|
traceLogger.logAction(action);
|
|
25495
25771
|
},
|
|
25772
|
+
budgetTracker,
|
|
25773
|
+
tokenBudget,
|
|
25774
|
+
costCap: runCostCap,
|
|
25775
|
+
completionGuards,
|
|
25496
25776
|
getHealthStats: (statusDeps) => loadStatusFn(
|
|
25497
25777
|
{
|
|
25498
25778
|
db: deps.db,
|
|
@@ -43,7 +43,7 @@ You are working through the full candidate pool, not just one batch. After proce
|
|
|
43
43
|
- Your budget is running low - check the budget warnings from blocked tool calls
|
|
44
44
|
- You have exhausted the actionable candidates
|
|
45
45
|
|
|
46
|
-
Only call `complete_pass` when you have genuinely finished working through available candidates or your budget is exhausted. Processing a single batch and stopping is not completing the pass.
|
|
46
|
+
Only call `complete_pass` when you have genuinely finished working through available candidates or your budget is exhausted. Processing a single batch and stopping is not completing the pass. `complete_pass` will reject your request if significant budget remains and candidates have not been exhausted. If your completion is rejected, continue paging through candidates.
|
|
47
47
|
|
|
48
48
|
When `query_candidates` returns zero candidates, that is your signal that no more candidates match the current filters and it is appropriate to call `complete_pass`.
|
|
49
49
|
|
|
@@ -97,13 +97,15 @@ This is your core competency - the judgment that mechanical rules cannot make.
|
|
|
97
97
|
|
|
98
98
|
## Budget Awareness
|
|
99
99
|
|
|
100
|
-
You have a token budget.
|
|
100
|
+
You have a token budget for this run. Use it wisely:
|
|
101
101
|
|
|
102
102
|
- Don't waste budget inspecting entries that are obviously protected or clearly fine from their summary.
|
|
103
103
|
- Don't inspect every candidate - scan summaries, pick the most promising ones.
|
|
104
|
-
- When you have enough evidence, act or
|
|
104
|
+
- When you have enough evidence, act or skip. Don't over-investigate a single entry.
|
|
105
105
|
- Flag borderline cases for review rather than spending budget trying to reach certainty.
|
|
106
106
|
|
|
107
|
+
**But do not stop early.** Efficiency means spending budget on the right candidates, not spending less budget overall. If candidates remain and budget is available, keep working. The `complete_pass` tool will reject premature completion if you have not used enough of your budget.
|
|
108
|
+
|
|
107
109
|
## Scope
|
|
108
110
|
|
|
109
111
|
- When a project scope is provided, focus on entries in that project plus universal (unscoped) entries.
|