npm - agenr - Versions diffs - 0.13.2 → 0.13.4 - Mend

agenr 0.13.2 → 0.13.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/CHANGELOG.md +13 -0
package/dist/cli-main.js +285 -5
package/dist/modules/surgeon/adapters/prompts/system.md +5 -3
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,18 @@
 # Changelog
+## [0.13.4] - 2026-03-23
+### Surgeon
+- **Tightened completion gating thresholds.** Final completion now requires 75% budget usage (was 25%). Phase completion requires 75% (was 50%). Safety valve raised to 5 rejections (was 3). Continuation attempts raised to 5 (was 3). These changes force the surgeon to work through substantially more of the corpus before accepting completion.
+## [0.13.3] - 2026-03-23
+### Surgeon
+- **`complete_pass` gating rejects premature completion.** The tool now validates budget utilization and candidate coverage before accepting completion. Final completion rejected if <25% budget used. Dedup phase rejected if <50% of clusters processed with budget remaining. Retirement phase rejected if <40 candidates evaluated with budget remaining. Safety valve accepts after 3 rejections per phase. Rejection messages tell the surgeon exactly what to do next.
+- **System prompt tightened.** Budget Awareness section now explicitly states that `complete_pass` will reject premature attempts, and that efficiency means spending budget on the right candidates, not spending less budget overall.
 ## [0.13.2] - 2026-03-23
 ### Surgeon

package/dist/cli-main.js CHANGED Viewed

@@ -22929,6 +22929,53 @@ async function updateEntryFieldsById(db, entryId, fields) {
   };
 }
+// src/modules/surgeon/application/completion-guard.ts
+function createEmptyProgress() {
+  return {
+    queryCalls: 0,
+    maxWindowEnd: 0,
+    totalCount: null,
+    sawExhaustedPage: false
+  };
+}
+function createPaginatedQueryTracker() {
+  let progress = createEmptyProgress();
+  return {
+    reset() {
+      progress = createEmptyProgress();
+    },
+    recordPage(input) {
+      const offset = Number.isFinite(input.offset) ? Math.max(0, Math.floor(input.offset)) : 0;
+      const returnedCount = Number.isFinite(input.returnedCount) ? Math.max(0, Math.floor(input.returnedCount)) : 0;
+      const totalCount = Number.isFinite(input.totalCount) ? Math.max(0, Math.floor(input.totalCount)) : null;
+      progress = {
+        queryCalls: progress.queryCalls + 1,
+        maxWindowEnd: Math.max(progress.maxWindowEnd, offset + returnedCount),
+        totalCount: totalCount ?? progress.totalCount,
+        sawExhaustedPage: progress.sawExhaustedPage || input.exhausted
+      };
+    },
+    snapshot() {
+      return { ...progress };
+    }
+  };
+}
+function createSurgeonCompletionGuardState(input) {
+  return {
+    rejectionCounts: /* @__PURE__ */ new Map(),
+    initialHealth: {
+      totalEntries: Math.max(0, Math.floor(input.totalEntries)),
+      retirementCandidates: Math.max(0, Math.floor(input.retirementCandidates)),
+      dedupClusters: Number.isFinite(input.dedupClusters) ? Math.max(0, Math.floor(input.dedupClusters)) : void 0,
+      pendingConflicts: Number.isFinite(input.pendingConflicts) ? Math.max(0, Math.floor(input.pendingConflicts)) : void 0
+    },
+    retirement: createPaginatedQueryTracker(),
+    dedup: createPaginatedQueryTracker(),
+    pendingConflicts: createPaginatedQueryTracker(),
+    contradictionScan: createPaginatedQueryTracker()
+  };
+}
 // src/modules/surgeon/adapters/prompts/index.ts
 import fs21 from "fs/promises";
 import path24 from "path";
@@ -23149,6 +23196,72 @@ var COMPLETE_PASS_SCHEMA = Type2.Object({
   observations: Type2.Array(Type2.String()),
   recommendations: Type2.Array(Type2.String())
 });
+var FINAL_COMPLETION_MIN_BUDGET_USED_FRACTION = 0.75;
+var PHASE_COMPLETION_MIN_BUDGET_USED_FRACTION = 0.75;
+var SAFETY_VALVE_REJECTION_LIMIT = 5;
+var LARGE_CORPUS_PROACTIVE_SCAN_THRESHOLD = 200;
+function isCompletionPhase(value) {
+  return value === "contradictions" || value === "dedup" || value === "retirement";
+}
+function normalizeCompletionKey(passType, currentPass) {
+  const normalizedPassType = passType?.trim();
+  if (normalizedPassType) {
+    return normalizedPassType;
+  }
+  return currentPass;
+}
+function isAutoPhaseTransition(currentPass, passType) {
+  return currentPass === "auto" && isCompletionPhase(passType);
+}
+function calculateBudgetUsedPct(deps) {
+  if (!deps.budgetTracker) {
+    return null;
+  }
+  const remaining = deps.budgetTracker.remaining();
+  const tokenBudget = Number.isFinite(deps.tokenBudget) ? Math.max(0, deps.tokenBudget ?? 0) : 0;
+  const costCap = Number.isFinite(deps.costCap) ? Math.max(0, deps.costCap ?? 0) : 0;
+  const tokenUsedPct = tokenBudget > 0 ? 1 - remaining.tokens / tokenBudget : 1;
+  const costUsedPct = costCap > 0 ? 1 - remaining.costUsd / costCap : 1;
+  return {
+    budgetUsedPct: Math.max(0, Math.min(1, Math.max(tokenUsedPct, costUsedPct))),
+    remainingTokens: remaining.tokens,
+    remainingCostUsd: remaining.costUsd
+  };
+}
+function formatBudgetUsedPct(value) {
+  return Math.round(value * 100);
+}
+function rejectCompletionAttempt(deps, rejectionKey, priorRejections, summary, details, message) {
+  deps.completionGuards?.rejectionCounts.set(rejectionKey, priorRejections + 1);
+  return toolResult(
+    {
+      completed: false,
+      rejected: true,
+      rejectionCount: priorRejections + 1,
+      summary,
+      ...details
+    },
+    message
+  );
+}
+function describeRetirementProgress(progress, knownCandidates) {
+  if (progress.queryCalls === 0) {
+    return knownCandidates > 0 ? `about ${knownCandidates} retirement candidates were available before the pass started, but query_candidates has not been called yet` : "query_candidates has not been called yet";
+  }
+  if (knownCandidates > 0) {
+    return `only ${progress.maxWindowEnd} of about ${knownCandidates} retirement candidates have been paged so far`;
+  }
+  return `only ${progress.maxWindowEnd} retirement candidates have been paged so far and query_candidates has not been exhausted`;
+}
+function describeDedupProgress(progress, totalClusters) {
+  if (progress.queryCalls === 0) {
+    return totalClusters > 0 ? `${totalClusters} dedup clusters were cached for this run, but query_dedup_clusters has not been called yet` : "query_dedup_clusters has not been called yet";
+  }
+  if (totalClusters > 0) {
+    return `only ${progress.maxWindowEnd} of ${totalClusters} dedup clusters have been paged so far`;
+  }
+  return `only ${progress.maxWindowEnd} dedup clusters have been paged so far and query_dedup_clusters has not been exhausted`;
+}
 function createCompletePassTool(deps) {
   return {
     name: "complete_pass",
@@ -23178,12 +23291,111 @@ function createCompletePassTool(deps) {
         recommendations: params.recommendations
       };
       const passType = params.pass_type?.trim();
+      const rejectionKey = normalizeCompletionKey(passType, deps.pass);
+      const priorRejections = deps.completionGuards?.rejectionCounts.get(rejectionKey) ?? 0;
+      const budgetUsage = calculateBudgetUsedPct(deps);
+      const budgetUsedPct = budgetUsage?.budgetUsedPct ?? 1;
+      const budgetUsedLabel = formatBudgetUsedPct(budgetUsedPct);
+      const handledCount = Math.max(0, params.actions_taken + params.entries_skipped.length);
+      if (priorRejections < SAFETY_VALVE_REJECTION_LIMIT && budgetUsage && deps.completionGuards) {
+        const isPhaseTransition = isAutoPhaseTransition(deps.pass, passType);
+        const guardedPhase = isPhaseTransition ? passType : isCompletionPhase(deps.pass) ? deps.pass : null;
+        if (guardedPhase === "retirement" && budgetUsedPct < PHASE_COMPLETION_MIN_BUDGET_USED_FRACTION) {
+          const progress = deps.completionGuards.retirement.snapshot();
+          const knownCandidates = deps.completionGuards.initialHealth.retirementCandidates;
+          const hasKnownRetirementWork = knownCandidates > 0 || progress.queryCalls > 0;
+          const shouldReject = hasKnownRetirementWork && !progress.sawExhaustedPage && (progress.queryCalls === 0 && knownCandidates > handledCount || progress.queryCalls > 0 && (knownCandidates === 0 || progress.maxWindowEnd < knownCandidates));
+          if (shouldReject) {
+            return rejectCompletionAttempt(
+              deps,
+              rejectionKey,
+              priorRejections,
+              summary,
+              {
+                phase: "retirement",
+                budgetUsedPct: budgetUsedLabel,
+                pagedCandidates: progress.maxWindowEnd,
+                knownCandidates: knownCandidates || null,
+                remainingTokens: budgetUsage.remainingTokens,
+                remainingCostUsd: budgetUsage.remainingCostUsd
+              },
+              `Retirement completion rejected: ${describeRetirementProgress(progress, knownCandidates)} with ${budgetUsedLabel}% of budget used. Continue calling query_candidates with a higher offset until it returns no more candidates or your budget is genuinely low.`
+            );
+          }
+        }
+        if (guardedPhase === "dedup" && budgetUsedPct < PHASE_COMPLETION_MIN_BUDGET_USED_FRACTION) {
+          const progress = deps.completionGuards.dedup.snapshot();
+          const totalClusters = progress.totalCount ?? deps.completionGuards.initialHealth.dedupClusters ?? 0;
+          const halfClusters = totalClusters > 0 ? Math.ceil(totalClusters * 0.5) : 0;
+          const hasKnownDedupWork = totalClusters > 0 || progress.queryCalls > 0;
+          const shouldReject = hasKnownDedupWork && !progress.sawExhaustedPage && (progress.queryCalls === 0 && totalClusters > handledCount || progress.queryCalls > 0 && (totalClusters === 0 || progress.maxWindowEnd < halfClusters));
+          if (shouldReject) {
+            return rejectCompletionAttempt(
+              deps,
+              rejectionKey,
+              priorRejections,
+              summary,
+              {
+                phase: "dedup",
+                budgetUsedPct: budgetUsedLabel,
+                pagedClusters: progress.maxWindowEnd,
+                totalClusters: totalClusters || null,
+                remainingTokens: budgetUsage.remainingTokens,
+                remainingCostUsd: budgetUsage.remainingCostUsd
+              },
+              `Dedup completion rejected: ${describeDedupProgress(progress, totalClusters)} with ${budgetUsedLabel}% of budget used. Continue paging query_dedup_clusters before completing the dedup phase.`
+            );
+          }
+        }
+        const isFinalAutoCompletion = deps.pass === "auto" && (!passType || passType === "auto");
+        if (isFinalAutoCompletion && budgetUsedPct < FINAL_COMPLETION_MIN_BUDGET_USED_FRACTION) {
+          const reasons = [];
+          const pendingConflicts = deps.completionGuards.pendingConflicts.snapshot();
+          const dedup = deps.completionGuards.dedup.snapshot();
+          const retirement = deps.completionGuards.retirement.snapshot();
+          const contradictionScan = deps.completionGuards.contradictionScan.snapshot();
+          const initialPendingConflicts = deps.completionGuards.initialHealth.pendingConflicts ?? 0;
+          const initialDedupClusters = deps.completionGuards.initialHealth.dedupClusters ?? 0;
+          const initialRetirementCandidates = deps.completionGuards.initialHealth.retirementCandidates;
+          if (initialPendingConflicts > 0 && !pendingConflicts.sawExhaustedPage && (pendingConflicts.queryCalls === 0 || pendingConflicts.maxWindowEnd < initialPendingConflicts)) {
+            reasons.push(
+              pendingConflicts.queryCalls === 0 ? `${initialPendingConflicts} pending conflicts were available and query_conflicts has not been paged` : `only ${pendingConflicts.maxWindowEnd} of ${initialPendingConflicts} pending conflicts have been paged`
+            );
+          }
+          if (initialDedupClusters > 0 && !dedup.sawExhaustedPage && (dedup.queryCalls === 0 || dedup.maxWindowEnd < initialDedupClusters)) {
+            reasons.push(describeDedupProgress(dedup, initialDedupClusters));
+          }
+          if (initialRetirementCandidates > 0 && !retirement.sawExhaustedPage && (retirement.queryCalls === 0 || retirement.maxWindowEnd < initialRetirementCandidates)) {
+            reasons.push(describeRetirementProgress(retirement, initialRetirementCandidates));
+          }
+          if (deps.completionGuards.initialHealth.totalEntries >= LARGE_CORPUS_PROACTIVE_SCAN_THRESHOLD && contradictionScan.queryCalls === 0) {
+            reasons.push("the proactive contradiction scan has not run yet");
+          }
+          if (reasons.length > 0) {
+            return rejectCompletionAttempt(
+              deps,
+              rejectionKey,
+              priorRejections,
+              summary,
+              {
+                phase: "auto",
+                budgetUsedPct: budgetUsedLabel,
+                remainingTokens: budgetUsage.remainingTokens,
+                remainingCostUsd: budgetUsage.remainingCostUsd,
+                reasons
+              },
+              `Completion rejected: only ${budgetUsedLabel}% of budget used and the sweep still looks incomplete because ${reasons.join("; ")}. Continue paging candidates and only call complete_pass with pass_type="auto" when the remaining phases are genuinely exhausted or budget is low.`
+            );
+          }
+        }
+      }
       if (passType && passType !== "auto" && deps.pass === "auto") {
         deps.completionState.completePhase(passType, summary);
         return toolResult(
           {
             completed: false,
             phaseComplete: passType,
+            safetyValveUsed: priorRejections >= SAFETY_VALVE_REJECTION_LIMIT,
             summary
           },
           `${passType} phase complete. Continue with the next pass. Call complete_pass with pass_type="auto" when all passes are done.`
@@ -23193,6 +23405,7 @@ function createCompletePassTool(deps) {
       return toolResult(
         {
           completed: true,
+          safetyValveUsed: priorRejections >= SAFETY_VALVE_REJECTION_LIMIT,
           summary
         },
         "Pass marked complete. Do not call more tools. Respond with a brief final acknowledgment."
@@ -23450,6 +23663,7 @@ function createQueryContradictionCandidatesTool(deps) {
     async execute(_toolCallId, params) {
       if (params.reset === true) {
         cached = null;
+        deps.completionGuards?.contradictionScan.reset();
       }
       const query = buildQuery(params, deps);
       const offset = normalizeOffset2(params.offset);
@@ -23477,6 +23691,12 @@ function createQueryContradictionCandidatesTool(deps) {
       }
       const totalCount = cached?.pairs.length ?? 0;
       if (offset >= totalCount) {
+        deps.completionGuards?.contradictionScan.recordPage({
+          offset,
+          returnedCount: 0,
+          totalCount,
+          exhausted: true
+        });
         return toolResult({
           pairs: [],
           count: 0,
@@ -23488,6 +23708,12 @@ function createQueryContradictionCandidatesTool(deps) {
         });
       }
       const pairs = (cached?.pairs ?? []).slice(offset, offset + limit);
+      deps.completionGuards?.contradictionScan.recordPage({
+        offset,
+        returnedCount: pairs.length,
+        totalCount,
+        exhausted: offset + pairs.length >= totalCount
+      });
       return toolResult({
         pairs,
         count: pairs.length,
@@ -23627,6 +23853,12 @@ function createQueryConflictsTool(deps) {
         now: deps.now()
       });
       if (offset >= conflicts.length) {
+        deps.completionGuards?.pendingConflicts.recordPage({
+          offset,
+          returnedCount: 0,
+          totalCount: conflicts.length,
+          exhausted: true
+        });
         return toolResult({
           conflicts: [],
           count: 0,
@@ -23637,6 +23869,12 @@ function createQueryConflictsTool(deps) {
         });
       }
       const page = conflicts.slice(offset, offset + limit).filter((conflict) => !deps.conflictCache?.consumedConflictIds.has(conflict.id)).map((conflict) => summarizeConflict(conflict));
+      deps.completionGuards?.pendingConflicts.recordPage({
+        offset,
+        returnedCount: page.length,
+        totalCount: conflicts.length,
+        exhausted: offset + page.length >= conflicts.length
+      });
       return toolResult({
         conflicts: page,
         count: page.length,
@@ -24074,6 +24312,7 @@ function createQueryDedupClustersTool(deps) {
       }
       if (params.reset === true) {
         resetDedupClusterCache(deps.clusterCache);
+        deps.completionGuards?.dedup.reset();
       }
       const query = normalizeDedupClusterQuery(
         {
@@ -24093,6 +24332,12 @@ function createQueryDedupClustersTool(deps) {
         now: deps.now()
       });
       if (offset >= clusters.length) {
+        deps.completionGuards?.dedup.recordPage({
+          offset,
+          returnedCount: 0,
+          totalCount: clusters.length,
+          exhausted: true
+        });
         return toolResult({
           clusters: [],
           count: 0,
@@ -24104,6 +24349,12 @@ function createQueryDedupClustersTool(deps) {
         });
       }
       const page = clusters.slice(offset, offset + limit).map((cluster, index) => summarizeDedupCluster(cluster, offset + index, query.project));
+      deps.completionGuards?.dedup.recordPage({
+        offset,
+        returnedCount: page.length,
+        totalCount: clusters.length,
+        exhausted: offset + page.length >= clusters.length
+      });
       return toolResult({
         clusters: page,
         count: page.length,
@@ -24269,6 +24520,18 @@ var QUERY_CANDIDATES_SCHEMA = Type12.Object({
   limit: Type12.Optional(Type12.Integer({ minimum: 1, maximum: 100 })),
   offset: Type12.Optional(Type12.Integer({ minimum: 0 }))
 });
+function normalizeLimit5(value) {
+  if (!Number.isFinite(value) || (value ?? 0) <= 0) {
+    return 20;
+  }
+  return Math.floor(value);
+}
+function normalizeOffset5(value) {
+  if (!Number.isFinite(value) || (value ?? 0) < 0) {
+    return 0;
+  }
+  return Math.floor(value);
+}
 function createQueryCandidatesTool(deps) {
   return {
     name: "query_candidates",
@@ -24276,6 +24539,8 @@ function createQueryCandidatesTool(deps) {
     description: "List active entries that look stale enough to inspect for retirement.",
     parameters: QUERY_CANDIDATES_SCHEMA,
     async execute(_toolCallId, params) {
+      const limit = normalizeLimit5(params.limit);
+      const offset = normalizeOffset5(params.offset);
       const candidates = await listRetirementCandidates(deps.db, {
         project: params.project?.trim() || deps.project,
         type: params.type?.trim() || void 0,
@@ -24289,6 +24554,11 @@ function createQueryCandidatesTool(deps) {
         runId: deps.runId,
         now: deps.now()
       });
+      deps.completionGuards?.retirement.recordPage({
+        offset,
+        returnedCount: candidates.length,
+        exhausted: candidates.length < limit
+      });
       if (candidates.length === 0) {
         return toolResult({
           candidates: [],
@@ -24492,13 +24762,13 @@ var QUERY_SUPERSESSION_SCHEMA = Type15.Object({
   limit: Type15.Optional(Type15.Integer({ minimum: 1, maximum: 50, default: 20 })),
   offset: Type15.Optional(Type15.Integer({ minimum: 0 }))
 });
-function normalizeOffset5(value) {
+function normalizeOffset6(value) {
   if (!Number.isFinite(value) || (value ?? 0) < 0) {
     return 0;
   }
   return Math.floor(value);
 }
-function normalizeLimit5(value) {
+function normalizeLimit6(value) {
   if (!Number.isFinite(value) || (value ?? 0) <= 0) {
     return 20;
   }
@@ -24522,8 +24792,8 @@ function createQuerySupersessionCandidatesTool(deps) {
         },
         deps.project
       );
-      const offset = normalizeOffset5(params.offset);
-      const limit = normalizeLimit5(params.limit);
+      const offset = normalizeOffset6(params.offset);
+      const limit = normalizeLimit6(params.limit);
       const { groups } = await loadEligibleSupersessionGroups(deps.db, {
         cache: deps.supersessionCache,
         query,
@@ -24946,7 +25216,7 @@ async function captureBrainHealthSnapshot(db) {
 // src/modules/surgeon/application/workflow.ts
 var USER_ABORT_ERROR = "Run aborted by user (SIGINT).";
 var USER_ABORT_SUMMARY = "Run aborted by user.";
-var MAX_CONTINUATION_ATTEMPTS = 3;
+var MAX_CONTINUATION_ATTEMPTS = 5;
 var LOW_BUDGET_FRACTION = 0.1;
 var SHALLOW_RUN_WARNING_BUDGET_USED_FRACTION = 0.5;
 function resolveRunBudget(options, config) {
@@ -25463,6 +25733,12 @@ async function runSurgeon(options, deps) {
       skipRecentlyEvaluatedDays: protection.contradictionSkipRecentlyEvaluatedDays,
       now
     })).length : void 0;
+    const completionGuards = createSurgeonCompletionGuardState({
+      totalEntries: initialStatus.health.total,
+      retirementCandidates: initialStatus.health.forgetting.candidates,
+      dedupClusters: initialDedupClusterCount ?? initialAutoDedupClusterCount,
+      pendingConflicts: initialPendingConflictCount
+    });
     const tools = createToolRegistryFn({
       db: deps.db,
       config: deps.config,
@@ -25493,6 +25769,10 @@ async function runSurgeon(options, deps) {
         await logSurgeonAction(deps.db, action);
         traceLogger.logAction(action);
       },
+      budgetTracker,
+      tokenBudget,
+      costCap: runCostCap,
+      completionGuards,
       getHealthStats: (statusDeps) => loadStatusFn(
         {
           db: deps.db,

package/dist/modules/surgeon/adapters/prompts/system.md CHANGED Viewed

@@ -43,7 +43,7 @@ You are working through the full candidate pool, not just one batch. After proce
 - Your budget is running low - check the budget warnings from blocked tool calls
 - You have exhausted the actionable candidates
-Only call `complete_pass` when you have genuinely finished working through available candidates or your budget is exhausted. Processing a single batch and stopping is not completing the pass.
+Only call `complete_pass` when you have genuinely finished working through available candidates or your budget is exhausted. Processing a single batch and stopping is not completing the pass. `complete_pass` will reject your request if significant budget remains and candidates have not been exhausted. If your completion is rejected, continue paging through candidates.
 When `query_candidates` returns zero candidates, that is your signal that no more candidates match the current filters and it is appropriate to call `complete_pass`.
@@ -97,13 +97,15 @@ This is your core competency - the judgment that mechanical rules cannot make.
 ## Budget Awareness
-You have a token budget. Prioritize high-value actions:
+You have a token budget for this run. Use it wisely:
 - Don't waste budget inspecting entries that are obviously protected or clearly fine from their summary.
 - Don't inspect every candidate - scan summaries, pick the most promising ones.
-- When you have enough evidence, act or finish. Don't over-investigate.
+- When you have enough evidence, act or skip. Don't over-investigate a single entry.
 - Flag borderline cases for review rather than spending budget trying to reach certainty.
+**But do not stop early.** Efficiency means spending budget on the right candidates, not spending less budget overall. If candidates remain and budget is available, keep working. The `complete_pass` tool will reject premature completion if you have not used enough of your budget.
 ## Scope
 - When a project scope is provided, focus on entries in that project plus universal (unscoped) entries.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agenr",
-  "version": "0.13.2",
+  "version": "0.13.4",
   "openclaw": {
     "extensions": [
       "dist/edge/openclaw/index.js"