agenr 0.13.2 → 0.13.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.13.4] - 2026-03-23
4
+
5
+ ### Surgeon
6
+
7
+ - **Tightened completion gating thresholds.** Final completion now requires 75% budget usage (was 25%). Phase completion requires 75% (was 50%). Safety valve raised to 5 rejections (was 3). Continuation attempts raised to 5 (was 3). These changes force the surgeon to work through substantially more of the corpus before accepting completion.
8
+
9
+ ## [0.13.3] - 2026-03-23
10
+
11
+ ### Surgeon
12
+
13
+ - **`complete_pass` gating rejects premature completion.** The tool now validates budget utilization and candidate coverage before accepting completion. Final completion rejected if <25% budget used. Dedup phase rejected if <50% of clusters processed with budget remaining. Retirement phase rejected if <40 candidates evaluated with budget remaining. Safety valve accepts after 3 rejections per phase. Rejection messages tell the surgeon exactly what to do next.
14
+ - **System prompt tightened.** Budget Awareness section now explicitly states that `complete_pass` will reject premature attempts, and that efficiency means spending budget on the right candidates, not spending less budget overall.
15
+
3
16
  ## [0.13.2] - 2026-03-23
4
17
 
5
18
  ### Surgeon
package/dist/cli-main.js CHANGED
@@ -22929,6 +22929,53 @@ async function updateEntryFieldsById(db, entryId, fields) {
22929
22929
  };
22930
22930
  }
22931
22931
 
22932
+ // src/modules/surgeon/application/completion-guard.ts
22933
+ function createEmptyProgress() {
22934
+ return {
22935
+ queryCalls: 0,
22936
+ maxWindowEnd: 0,
22937
+ totalCount: null,
22938
+ sawExhaustedPage: false
22939
+ };
22940
+ }
22941
+ function createPaginatedQueryTracker() {
22942
+ let progress = createEmptyProgress();
22943
+ return {
22944
+ reset() {
22945
+ progress = createEmptyProgress();
22946
+ },
22947
+ recordPage(input) {
22948
+ const offset = Number.isFinite(input.offset) ? Math.max(0, Math.floor(input.offset)) : 0;
22949
+ const returnedCount = Number.isFinite(input.returnedCount) ? Math.max(0, Math.floor(input.returnedCount)) : 0;
22950
+ const totalCount = Number.isFinite(input.totalCount) ? Math.max(0, Math.floor(input.totalCount)) : null;
22951
+ progress = {
22952
+ queryCalls: progress.queryCalls + 1,
22953
+ maxWindowEnd: Math.max(progress.maxWindowEnd, offset + returnedCount),
22954
+ totalCount: totalCount ?? progress.totalCount,
22955
+ sawExhaustedPage: progress.sawExhaustedPage || input.exhausted
22956
+ };
22957
+ },
22958
+ snapshot() {
22959
+ return { ...progress };
22960
+ }
22961
+ };
22962
+ }
22963
+ function createSurgeonCompletionGuardState(input) {
22964
+ return {
22965
+ rejectionCounts: /* @__PURE__ */ new Map(),
22966
+ initialHealth: {
22967
+ totalEntries: Math.max(0, Math.floor(input.totalEntries)),
22968
+ retirementCandidates: Math.max(0, Math.floor(input.retirementCandidates)),
22969
+ dedupClusters: Number.isFinite(input.dedupClusters) ? Math.max(0, Math.floor(input.dedupClusters)) : void 0,
22970
+ pendingConflicts: Number.isFinite(input.pendingConflicts) ? Math.max(0, Math.floor(input.pendingConflicts)) : void 0
22971
+ },
22972
+ retirement: createPaginatedQueryTracker(),
22973
+ dedup: createPaginatedQueryTracker(),
22974
+ pendingConflicts: createPaginatedQueryTracker(),
22975
+ contradictionScan: createPaginatedQueryTracker()
22976
+ };
22977
+ }
22978
+
22932
22979
  // src/modules/surgeon/adapters/prompts/index.ts
22933
22980
  import fs21 from "fs/promises";
22934
22981
  import path24 from "path";
@@ -23149,6 +23196,72 @@ var COMPLETE_PASS_SCHEMA = Type2.Object({
23149
23196
  observations: Type2.Array(Type2.String()),
23150
23197
  recommendations: Type2.Array(Type2.String())
23151
23198
  });
23199
+ var FINAL_COMPLETION_MIN_BUDGET_USED_FRACTION = 0.75;
23200
+ var PHASE_COMPLETION_MIN_BUDGET_USED_FRACTION = 0.75;
23201
+ var SAFETY_VALVE_REJECTION_LIMIT = 5;
23202
+ var LARGE_CORPUS_PROACTIVE_SCAN_THRESHOLD = 200;
23203
+ function isCompletionPhase(value) {
23204
+ return value === "contradictions" || value === "dedup" || value === "retirement";
23205
+ }
23206
+ function normalizeCompletionKey(passType, currentPass) {
23207
+ const normalizedPassType = passType?.trim();
23208
+ if (normalizedPassType) {
23209
+ return normalizedPassType;
23210
+ }
23211
+ return currentPass;
23212
+ }
23213
+ function isAutoPhaseTransition(currentPass, passType) {
23214
+ return currentPass === "auto" && isCompletionPhase(passType);
23215
+ }
23216
+ function calculateBudgetUsedPct(deps) {
23217
+ if (!deps.budgetTracker) {
23218
+ return null;
23219
+ }
23220
+ const remaining = deps.budgetTracker.remaining();
23221
+ const tokenBudget = Number.isFinite(deps.tokenBudget) ? Math.max(0, deps.tokenBudget ?? 0) : 0;
23222
+ const costCap = Number.isFinite(deps.costCap) ? Math.max(0, deps.costCap ?? 0) : 0;
23223
+ const tokenUsedPct = tokenBudget > 0 ? 1 - remaining.tokens / tokenBudget : 1;
23224
+ const costUsedPct = costCap > 0 ? 1 - remaining.costUsd / costCap : 1;
23225
+ return {
23226
+ budgetUsedPct: Math.max(0, Math.min(1, Math.max(tokenUsedPct, costUsedPct))),
23227
+ remainingTokens: remaining.tokens,
23228
+ remainingCostUsd: remaining.costUsd
23229
+ };
23230
+ }
23231
+ function formatBudgetUsedPct(value) {
23232
+ return Math.round(value * 100);
23233
+ }
23234
+ function rejectCompletionAttempt(deps, rejectionKey, priorRejections, summary, details, message) {
23235
+ deps.completionGuards?.rejectionCounts.set(rejectionKey, priorRejections + 1);
23236
+ return toolResult(
23237
+ {
23238
+ completed: false,
23239
+ rejected: true,
23240
+ rejectionCount: priorRejections + 1,
23241
+ summary,
23242
+ ...details
23243
+ },
23244
+ message
23245
+ );
23246
+ }
23247
+ function describeRetirementProgress(progress, knownCandidates) {
23248
+ if (progress.queryCalls === 0) {
23249
+ return knownCandidates > 0 ? `about ${knownCandidates} retirement candidates were available before the pass started, but query_candidates has not been called yet` : "query_candidates has not been called yet";
23250
+ }
23251
+ if (knownCandidates > 0) {
23252
+ return `only ${progress.maxWindowEnd} of about ${knownCandidates} retirement candidates have been paged so far`;
23253
+ }
23254
+ return `only ${progress.maxWindowEnd} retirement candidates have been paged so far and query_candidates has not been exhausted`;
23255
+ }
23256
+ function describeDedupProgress(progress, totalClusters) {
23257
+ if (progress.queryCalls === 0) {
23258
+ return totalClusters > 0 ? `${totalClusters} dedup clusters were cached for this run, but query_dedup_clusters has not been called yet` : "query_dedup_clusters has not been called yet";
23259
+ }
23260
+ if (totalClusters > 0) {
23261
+ return `only ${progress.maxWindowEnd} of ${totalClusters} dedup clusters have been paged so far`;
23262
+ }
23263
+ return `only ${progress.maxWindowEnd} dedup clusters have been paged so far and query_dedup_clusters has not been exhausted`;
23264
+ }
23152
23265
  function createCompletePassTool(deps) {
23153
23266
  return {
23154
23267
  name: "complete_pass",
@@ -23178,12 +23291,111 @@ function createCompletePassTool(deps) {
23178
23291
  recommendations: params.recommendations
23179
23292
  };
23180
23293
  const passType = params.pass_type?.trim();
23294
+ const rejectionKey = normalizeCompletionKey(passType, deps.pass);
23295
+ const priorRejections = deps.completionGuards?.rejectionCounts.get(rejectionKey) ?? 0;
23296
+ const budgetUsage = calculateBudgetUsedPct(deps);
23297
+ const budgetUsedPct = budgetUsage?.budgetUsedPct ?? 1;
23298
+ const budgetUsedLabel = formatBudgetUsedPct(budgetUsedPct);
23299
+ const handledCount = Math.max(0, params.actions_taken + params.entries_skipped.length);
23300
+ if (priorRejections < SAFETY_VALVE_REJECTION_LIMIT && budgetUsage && deps.completionGuards) {
23301
+ const isPhaseTransition = isAutoPhaseTransition(deps.pass, passType);
23302
+ const guardedPhase = isPhaseTransition ? passType : isCompletionPhase(deps.pass) ? deps.pass : null;
23303
+ if (guardedPhase === "retirement" && budgetUsedPct < PHASE_COMPLETION_MIN_BUDGET_USED_FRACTION) {
23304
+ const progress = deps.completionGuards.retirement.snapshot();
23305
+ const knownCandidates = deps.completionGuards.initialHealth.retirementCandidates;
23306
+ const hasKnownRetirementWork = knownCandidates > 0 || progress.queryCalls > 0;
23307
+ const shouldReject = hasKnownRetirementWork && !progress.sawExhaustedPage && (progress.queryCalls === 0 && knownCandidates > handledCount || progress.queryCalls > 0 && (knownCandidates === 0 || progress.maxWindowEnd < knownCandidates));
23308
+ if (shouldReject) {
23309
+ return rejectCompletionAttempt(
23310
+ deps,
23311
+ rejectionKey,
23312
+ priorRejections,
23313
+ summary,
23314
+ {
23315
+ phase: "retirement",
23316
+ budgetUsedPct: budgetUsedLabel,
23317
+ pagedCandidates: progress.maxWindowEnd,
23318
+ knownCandidates: knownCandidates || null,
23319
+ remainingTokens: budgetUsage.remainingTokens,
23320
+ remainingCostUsd: budgetUsage.remainingCostUsd
23321
+ },
23322
+ `Retirement completion rejected: ${describeRetirementProgress(progress, knownCandidates)} with ${budgetUsedLabel}% of budget used. Continue calling query_candidates with a higher offset until it returns no more candidates or your budget is genuinely low.`
23323
+ );
23324
+ }
23325
+ }
23326
+ if (guardedPhase === "dedup" && budgetUsedPct < PHASE_COMPLETION_MIN_BUDGET_USED_FRACTION) {
23327
+ const progress = deps.completionGuards.dedup.snapshot();
23328
+ const totalClusters = progress.totalCount ?? deps.completionGuards.initialHealth.dedupClusters ?? 0;
23329
+ const halfClusters = totalClusters > 0 ? Math.ceil(totalClusters * 0.5) : 0;
23330
+ const hasKnownDedupWork = totalClusters > 0 || progress.queryCalls > 0;
23331
+ const shouldReject = hasKnownDedupWork && !progress.sawExhaustedPage && (progress.queryCalls === 0 && totalClusters > handledCount || progress.queryCalls > 0 && (totalClusters === 0 || progress.maxWindowEnd < halfClusters));
23332
+ if (shouldReject) {
23333
+ return rejectCompletionAttempt(
23334
+ deps,
23335
+ rejectionKey,
23336
+ priorRejections,
23337
+ summary,
23338
+ {
23339
+ phase: "dedup",
23340
+ budgetUsedPct: budgetUsedLabel,
23341
+ pagedClusters: progress.maxWindowEnd,
23342
+ totalClusters: totalClusters || null,
23343
+ remainingTokens: budgetUsage.remainingTokens,
23344
+ remainingCostUsd: budgetUsage.remainingCostUsd
23345
+ },
23346
+ `Dedup completion rejected: ${describeDedupProgress(progress, totalClusters)} with ${budgetUsedLabel}% of budget used. Continue paging query_dedup_clusters before completing the dedup phase.`
23347
+ );
23348
+ }
23349
+ }
23350
+ const isFinalAutoCompletion = deps.pass === "auto" && (!passType || passType === "auto");
23351
+ if (isFinalAutoCompletion && budgetUsedPct < FINAL_COMPLETION_MIN_BUDGET_USED_FRACTION) {
23352
+ const reasons = [];
23353
+ const pendingConflicts = deps.completionGuards.pendingConflicts.snapshot();
23354
+ const dedup = deps.completionGuards.dedup.snapshot();
23355
+ const retirement = deps.completionGuards.retirement.snapshot();
23356
+ const contradictionScan = deps.completionGuards.contradictionScan.snapshot();
23357
+ const initialPendingConflicts = deps.completionGuards.initialHealth.pendingConflicts ?? 0;
23358
+ const initialDedupClusters = deps.completionGuards.initialHealth.dedupClusters ?? 0;
23359
+ const initialRetirementCandidates = deps.completionGuards.initialHealth.retirementCandidates;
23360
+ if (initialPendingConflicts > 0 && !pendingConflicts.sawExhaustedPage && (pendingConflicts.queryCalls === 0 || pendingConflicts.maxWindowEnd < initialPendingConflicts)) {
23361
+ reasons.push(
23362
+ pendingConflicts.queryCalls === 0 ? `${initialPendingConflicts} pending conflicts were available and query_conflicts has not been paged` : `only ${pendingConflicts.maxWindowEnd} of ${initialPendingConflicts} pending conflicts have been paged`
23363
+ );
23364
+ }
23365
+ if (initialDedupClusters > 0 && !dedup.sawExhaustedPage && (dedup.queryCalls === 0 || dedup.maxWindowEnd < initialDedupClusters)) {
23366
+ reasons.push(describeDedupProgress(dedup, initialDedupClusters));
23367
+ }
23368
+ if (initialRetirementCandidates > 0 && !retirement.sawExhaustedPage && (retirement.queryCalls === 0 || retirement.maxWindowEnd < initialRetirementCandidates)) {
23369
+ reasons.push(describeRetirementProgress(retirement, initialRetirementCandidates));
23370
+ }
23371
+ if (deps.completionGuards.initialHealth.totalEntries >= LARGE_CORPUS_PROACTIVE_SCAN_THRESHOLD && contradictionScan.queryCalls === 0) {
23372
+ reasons.push("the proactive contradiction scan has not run yet");
23373
+ }
23374
+ if (reasons.length > 0) {
23375
+ return rejectCompletionAttempt(
23376
+ deps,
23377
+ rejectionKey,
23378
+ priorRejections,
23379
+ summary,
23380
+ {
23381
+ phase: "auto",
23382
+ budgetUsedPct: budgetUsedLabel,
23383
+ remainingTokens: budgetUsage.remainingTokens,
23384
+ remainingCostUsd: budgetUsage.remainingCostUsd,
23385
+ reasons
23386
+ },
23387
+ `Completion rejected: only ${budgetUsedLabel}% of budget used and the sweep still looks incomplete because ${reasons.join("; ")}. Continue paging candidates and only call complete_pass with pass_type="auto" when the remaining phases are genuinely exhausted or budget is low.`
23388
+ );
23389
+ }
23390
+ }
23391
+ }
23181
23392
  if (passType && passType !== "auto" && deps.pass === "auto") {
23182
23393
  deps.completionState.completePhase(passType, summary);
23183
23394
  return toolResult(
23184
23395
  {
23185
23396
  completed: false,
23186
23397
  phaseComplete: passType,
23398
+ safetyValveUsed: priorRejections >= SAFETY_VALVE_REJECTION_LIMIT,
23187
23399
  summary
23188
23400
  },
23189
23401
  `${passType} phase complete. Continue with the next pass. Call complete_pass with pass_type="auto" when all passes are done.`
@@ -23193,6 +23405,7 @@ function createCompletePassTool(deps) {
23193
23405
  return toolResult(
23194
23406
  {
23195
23407
  completed: true,
23408
+ safetyValveUsed: priorRejections >= SAFETY_VALVE_REJECTION_LIMIT,
23196
23409
  summary
23197
23410
  },
23198
23411
  "Pass marked complete. Do not call more tools. Respond with a brief final acknowledgment."
@@ -23450,6 +23663,7 @@ function createQueryContradictionCandidatesTool(deps) {
23450
23663
  async execute(_toolCallId, params) {
23451
23664
  if (params.reset === true) {
23452
23665
  cached = null;
23666
+ deps.completionGuards?.contradictionScan.reset();
23453
23667
  }
23454
23668
  const query = buildQuery(params, deps);
23455
23669
  const offset = normalizeOffset2(params.offset);
@@ -23477,6 +23691,12 @@ function createQueryContradictionCandidatesTool(deps) {
23477
23691
  }
23478
23692
  const totalCount = cached?.pairs.length ?? 0;
23479
23693
  if (offset >= totalCount) {
23694
+ deps.completionGuards?.contradictionScan.recordPage({
23695
+ offset,
23696
+ returnedCount: 0,
23697
+ totalCount,
23698
+ exhausted: true
23699
+ });
23480
23700
  return toolResult({
23481
23701
  pairs: [],
23482
23702
  count: 0,
@@ -23488,6 +23708,12 @@ function createQueryContradictionCandidatesTool(deps) {
23488
23708
  });
23489
23709
  }
23490
23710
  const pairs = (cached?.pairs ?? []).slice(offset, offset + limit);
23711
+ deps.completionGuards?.contradictionScan.recordPage({
23712
+ offset,
23713
+ returnedCount: pairs.length,
23714
+ totalCount,
23715
+ exhausted: offset + pairs.length >= totalCount
23716
+ });
23491
23717
  return toolResult({
23492
23718
  pairs,
23493
23719
  count: pairs.length,
@@ -23627,6 +23853,12 @@ function createQueryConflictsTool(deps) {
23627
23853
  now: deps.now()
23628
23854
  });
23629
23855
  if (offset >= conflicts.length) {
23856
+ deps.completionGuards?.pendingConflicts.recordPage({
23857
+ offset,
23858
+ returnedCount: 0,
23859
+ totalCount: conflicts.length,
23860
+ exhausted: true
23861
+ });
23630
23862
  return toolResult({
23631
23863
  conflicts: [],
23632
23864
  count: 0,
@@ -23637,6 +23869,12 @@ function createQueryConflictsTool(deps) {
23637
23869
  });
23638
23870
  }
23639
23871
  const page = conflicts.slice(offset, offset + limit).filter((conflict) => !deps.conflictCache?.consumedConflictIds.has(conflict.id)).map((conflict) => summarizeConflict(conflict));
23872
+ deps.completionGuards?.pendingConflicts.recordPage({
23873
+ offset,
23874
+ returnedCount: page.length,
23875
+ totalCount: conflicts.length,
23876
+ exhausted: offset + page.length >= conflicts.length
23877
+ });
23640
23878
  return toolResult({
23641
23879
  conflicts: page,
23642
23880
  count: page.length,
@@ -24074,6 +24312,7 @@ function createQueryDedupClustersTool(deps) {
24074
24312
  }
24075
24313
  if (params.reset === true) {
24076
24314
  resetDedupClusterCache(deps.clusterCache);
24315
+ deps.completionGuards?.dedup.reset();
24077
24316
  }
24078
24317
  const query = normalizeDedupClusterQuery(
24079
24318
  {
@@ -24093,6 +24332,12 @@ function createQueryDedupClustersTool(deps) {
24093
24332
  now: deps.now()
24094
24333
  });
24095
24334
  if (offset >= clusters.length) {
24335
+ deps.completionGuards?.dedup.recordPage({
24336
+ offset,
24337
+ returnedCount: 0,
24338
+ totalCount: clusters.length,
24339
+ exhausted: true
24340
+ });
24096
24341
  return toolResult({
24097
24342
  clusters: [],
24098
24343
  count: 0,
@@ -24104,6 +24349,12 @@ function createQueryDedupClustersTool(deps) {
24104
24349
  });
24105
24350
  }
24106
24351
  const page = clusters.slice(offset, offset + limit).map((cluster, index) => summarizeDedupCluster(cluster, offset + index, query.project));
24352
+ deps.completionGuards?.dedup.recordPage({
24353
+ offset,
24354
+ returnedCount: page.length,
24355
+ totalCount: clusters.length,
24356
+ exhausted: offset + page.length >= clusters.length
24357
+ });
24107
24358
  return toolResult({
24108
24359
  clusters: page,
24109
24360
  count: page.length,
@@ -24269,6 +24520,18 @@ var QUERY_CANDIDATES_SCHEMA = Type12.Object({
24269
24520
  limit: Type12.Optional(Type12.Integer({ minimum: 1, maximum: 100 })),
24270
24521
  offset: Type12.Optional(Type12.Integer({ minimum: 0 }))
24271
24522
  });
24523
+ function normalizeLimit5(value) {
24524
+ if (!Number.isFinite(value) || (value ?? 0) <= 0) {
24525
+ return 20;
24526
+ }
24527
+ return Math.floor(value);
24528
+ }
24529
+ function normalizeOffset5(value) {
24530
+ if (!Number.isFinite(value) || (value ?? 0) < 0) {
24531
+ return 0;
24532
+ }
24533
+ return Math.floor(value);
24534
+ }
24272
24535
  function createQueryCandidatesTool(deps) {
24273
24536
  return {
24274
24537
  name: "query_candidates",
@@ -24276,6 +24539,8 @@ function createQueryCandidatesTool(deps) {
24276
24539
  description: "List active entries that look stale enough to inspect for retirement.",
24277
24540
  parameters: QUERY_CANDIDATES_SCHEMA,
24278
24541
  async execute(_toolCallId, params) {
24542
+ const limit = normalizeLimit5(params.limit);
24543
+ const offset = normalizeOffset5(params.offset);
24279
24544
  const candidates = await listRetirementCandidates(deps.db, {
24280
24545
  project: params.project?.trim() || deps.project,
24281
24546
  type: params.type?.trim() || void 0,
@@ -24289,6 +24554,11 @@ function createQueryCandidatesTool(deps) {
24289
24554
  runId: deps.runId,
24290
24555
  now: deps.now()
24291
24556
  });
24557
+ deps.completionGuards?.retirement.recordPage({
24558
+ offset,
24559
+ returnedCount: candidates.length,
24560
+ exhausted: candidates.length < limit
24561
+ });
24292
24562
  if (candidates.length === 0) {
24293
24563
  return toolResult({
24294
24564
  candidates: [],
@@ -24492,13 +24762,13 @@ var QUERY_SUPERSESSION_SCHEMA = Type15.Object({
24492
24762
  limit: Type15.Optional(Type15.Integer({ minimum: 1, maximum: 50, default: 20 })),
24493
24763
  offset: Type15.Optional(Type15.Integer({ minimum: 0 }))
24494
24764
  });
24495
- function normalizeOffset5(value) {
24765
+ function normalizeOffset6(value) {
24496
24766
  if (!Number.isFinite(value) || (value ?? 0) < 0) {
24497
24767
  return 0;
24498
24768
  }
24499
24769
  return Math.floor(value);
24500
24770
  }
24501
- function normalizeLimit5(value) {
24771
+ function normalizeLimit6(value) {
24502
24772
  if (!Number.isFinite(value) || (value ?? 0) <= 0) {
24503
24773
  return 20;
24504
24774
  }
@@ -24522,8 +24792,8 @@ function createQuerySupersessionCandidatesTool(deps) {
24522
24792
  },
24523
24793
  deps.project
24524
24794
  );
24525
- const offset = normalizeOffset5(params.offset);
24526
- const limit = normalizeLimit5(params.limit);
24795
+ const offset = normalizeOffset6(params.offset);
24796
+ const limit = normalizeLimit6(params.limit);
24527
24797
  const { groups } = await loadEligibleSupersessionGroups(deps.db, {
24528
24798
  cache: deps.supersessionCache,
24529
24799
  query,
@@ -24946,7 +25216,7 @@ async function captureBrainHealthSnapshot(db) {
24946
25216
  // src/modules/surgeon/application/workflow.ts
24947
25217
  var USER_ABORT_ERROR = "Run aborted by user (SIGINT).";
24948
25218
  var USER_ABORT_SUMMARY = "Run aborted by user.";
24949
- var MAX_CONTINUATION_ATTEMPTS = 3;
25219
+ var MAX_CONTINUATION_ATTEMPTS = 5;
24950
25220
  var LOW_BUDGET_FRACTION = 0.1;
24951
25221
  var SHALLOW_RUN_WARNING_BUDGET_USED_FRACTION = 0.5;
24952
25222
  function resolveRunBudget(options, config) {
@@ -25463,6 +25733,12 @@ async function runSurgeon(options, deps) {
25463
25733
  skipRecentlyEvaluatedDays: protection.contradictionSkipRecentlyEvaluatedDays,
25464
25734
  now
25465
25735
  })).length : void 0;
25736
+ const completionGuards = createSurgeonCompletionGuardState({
25737
+ totalEntries: initialStatus.health.total,
25738
+ retirementCandidates: initialStatus.health.forgetting.candidates,
25739
+ dedupClusters: initialDedupClusterCount ?? initialAutoDedupClusterCount,
25740
+ pendingConflicts: initialPendingConflictCount
25741
+ });
25466
25742
  const tools = createToolRegistryFn({
25467
25743
  db: deps.db,
25468
25744
  config: deps.config,
@@ -25493,6 +25769,10 @@ async function runSurgeon(options, deps) {
25493
25769
  await logSurgeonAction(deps.db, action);
25494
25770
  traceLogger.logAction(action);
25495
25771
  },
25772
+ budgetTracker,
25773
+ tokenBudget,
25774
+ costCap: runCostCap,
25775
+ completionGuards,
25496
25776
  getHealthStats: (statusDeps) => loadStatusFn(
25497
25777
  {
25498
25778
  db: deps.db,
@@ -43,7 +43,7 @@ You are working through the full candidate pool, not just one batch. After proce
43
43
  - Your budget is running low - check the budget warnings from blocked tool calls
44
44
  - You have exhausted the actionable candidates
45
45
 
46
- Only call `complete_pass` when you have genuinely finished working through available candidates or your budget is exhausted. Processing a single batch and stopping is not completing the pass.
46
+ Only call `complete_pass` when you have genuinely finished working through available candidates or your budget is exhausted. Processing a single batch and stopping is not completing the pass. `complete_pass` will reject your request if significant budget remains and candidates have not been exhausted. If your completion is rejected, continue paging through candidates.
47
47
 
48
48
  When `query_candidates` returns zero candidates, that is your signal that no more candidates match the current filters and it is appropriate to call `complete_pass`.
49
49
 
@@ -97,13 +97,15 @@ This is your core competency - the judgment that mechanical rules cannot make.
97
97
 
98
98
  ## Budget Awareness
99
99
 
100
- You have a token budget. Prioritize high-value actions:
100
+ You have a token budget for this run. Use it wisely:
101
101
 
102
102
  - Don't waste budget inspecting entries that are obviously protected or clearly fine from their summary.
103
103
  - Don't inspect every candidate - scan summaries, pick the most promising ones.
104
- - When you have enough evidence, act or finish. Don't over-investigate.
104
+ - When you have enough evidence, act or skip. Don't over-investigate a single entry.
105
105
  - Flag borderline cases for review rather than spending budget trying to reach certainty.
106
106
 
107
+ **But do not stop early.** Efficiency means spending budget on the right candidates, not spending less budget overall. If candidates remain and budget is available, keep working. The `complete_pass` tool will reject premature completion if you have not used enough of your budget.
108
+
107
109
  ## Scope
108
110
 
109
111
  - When a project scope is provided, focus on entries in that project plus universal (unscoped) entries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agenr",
3
- "version": "0.13.2",
3
+ "version": "0.13.4",
4
4
  "openclaw": {
5
5
  "extensions": [
6
6
  "dist/edge/openclaw/index.js"