npm - @blockrun/franklin - Versions diffs - 3.15.29 → 3.15.31 - Mend

@blockrun/franklin 3.15.29 → 3.15.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/agent/loop.js +56 -27
package/dist/storage/hygiene.d.ts +17 -2
package/dist/storage/hygiene.js +36 -13
package/package.json +1 -1

package/dist/agent/loop.js CHANGED Viewed

@@ -448,7 +448,18 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
         persistSessionMeta();
     };
     pruneOldSessions(sessionId); // Cleanup old sessions on start, protect current
-    runDataHygiene(); // Trim ~/.blockrun/data + cost_log + remove legacy files
+    // Trim ~/.blockrun/data + cost_log + remove legacy files + sweep
+    // orphan tool-results dirs. Logs a summary if anything was actually
+    // touched — pre-3.15.31 hygiene was completely silent and the only
+    // way to verify it was running was poking disk yourself.
+    const hygieneReport = runDataHygiene();
+    const totalCleaned = hygieneReport.legacyFilesRemoved +
+        hygieneReport.dataFilesTrimmed +
+        hygieneReport.costLogRowsTrimmed +
+        hygieneReport.orphanToolResultsRemoved;
+    if (totalCleaned > 0) {
+        logger.info(`[franklin] Data hygiene: ${hygieneReport.legacyFilesRemoved} legacy, ${hygieneReport.dataFilesTrimmed} data files, ${hygieneReport.costLogRowsTrimmed} cost_log rows, ${hygieneReport.orphanToolResultsRemoved} orphan tool-results dirs cleaned`);
+    }
     persistSessionMeta();
     // Flush session meta on SIGINT/SIGTERM so mid-stream Ctrl+C doesn't
     // leave a stale .meta.json (wrong turnCount/messageCount/cost).
@@ -615,19 +626,23 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
         const HARD_TOOL_CAP = MAX_TOOL_CALLS_PER_TURN * 2;
         let toolCapWarned = false; // Log + inject only once per turn
         const SAME_TOOL_WARN_THRESHOLD = 3; // Warn after N calls to same tool (lowered from 5 — search loops were wasting turns)
-        // Hard stop at 2× the warn threshold. The previous loop injected
-        // "[SYSTEM] STOP" on every call past 3 (verified 2026-05-04 in a real
-        // Opus-4.7 session: Opus saw 4 STOP messages, made 4 more Bash calls
-        // anyway). Strong models read the system tool_result, briefly
-        // acknowledge, then call the same tool again — the soft injection
-        // doesn't actually constrain behavior. Hard stop matches what
-        // HARD_TOOL_CAP already does for total tool count.
-        const SAME_TOOL_HARD_STOP = SAME_TOOL_WARN_THRESHOLD * 2;
+        // Repetition-based hard stop. 3.15.28 used a count-based threshold
+        // (Bash called 6× → break) which incorrectly killed legitimate
+        // exploratory data work — verified 2026-05-04 in a real Opus session
+        // running data-engineering on GCS logs: 15 distinct gsutil/bq calls,
+        // each producing new insights, would have been cut off at call 6.
+        // 3.15.30 detects ACTUAL loops by tracking the (tool, input)
+        // signature: only break when the model calls the SAME signature 3
+        // times in one turn. Different inputs → exploration, allowed.
+        const SAME_SIGNATURE_HARD_STOP = 3;
         // Tracks which tool names have already had a warn injected this turn.
         // Without it, every call past threshold pushes another [SYSTEM] STOP
         // tool_result into the model's context — same shape bug as the cap
         // spam fixed in 3.15.24, just in a sibling guardrail.
         const sameToolWarned = new Set();
+        // Tracks how many times each (tool, input)-signature has been called
+        // this turn. Different inputs → different signatures → exploration.
+        const turnSignatureCounts = new Map();
         // ── No-progress guardrail: kill infinite tiny-response loops ──
         let consecutiveTinyResponses = 0; // Count of consecutive calls with <10 output tokens
         const MAX_TINY_RESPONSES = 2; // Break after N tiny responses — if 2 calls return near-empty, something is wrong
@@ -1495,6 +1510,10 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
             for (const [inv] of results) {
                 const name = inv.name;
                 turnToolCounts.set(name, (turnToolCounts.get(name) || 0) + 1);
+                // Track (tool, input)-signature for the loop detector below.
+                // Identical signatures → real loop. Different inputs → exploration.
+                const sig = toolCallSignature(name, inv.input);
+                turnSignatureCounts.set(sig, (turnSignatureCounts.get(sig) || 0) + 1);
                 // Session-scope aggregate (drives telemetry opt-in export).
                 sessionToolCounts.set(name, (sessionToolCounts.get(name) || 0) + 1);
                 // Read file dedup: track paths already read
@@ -1555,14 +1574,12 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
             // Re-injecting on every subsequent call (the pre-3.15.28 behavior)
             // just spammed the model's context: Opus-4.7 verified to ignore 4
             // sequential "STOP" messages and keep calling Bash. Cleaner contract:
-            // one nudge at the threshold, then if the model ignores it past
-            // SAME_TOOL_HARD_STOP, break the turn.
-            let sameToolHardStopHit = null;
+            // one nudge at the threshold, and the loop detector below catches
+            // genuine stuck loops via input-signature repetition (3.15.30
+            // replaced 3.15.28's count-based hard stop — that broke legitimate
+            // exploratory data work where 15 distinct gsutil/bq calls were
+            // each producing new insights).
             for (const [name, count] of turnToolCounts) {
-                if (count >= SAME_TOOL_HARD_STOP) {
-                    sameToolHardStopHit = name;
-                    continue;
-                }
                 if (count === SAME_TOOL_WARN_THRESHOLD && !sameToolWarned.has(name)) {
                     sameToolWarned.add(name);
                     outcomeContent.push({
@@ -1573,6 +1590,17 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                     });
                 }
             }
+            // True loop detector: same (tool, input) signature repeated.
+            // Catches the actual failure mode (model retrying the exact same
+            // call hoping for a different result) without misfiring on
+            // legitimate exploration where each call has different input.
+            let stuckSignature = null;
+            for (const [sig, count] of turnSignatureCounts) {
+                if (count >= SAME_SIGNATURE_HARD_STOP) {
+                    stuckSignature = { sig, count };
+                    break;
+                }
+            }
             // Hard cap: nudge the model to stop. Inject once per turn —
             // re-injecting on every iteration past the cap is just noise
             // and clutters the model's context with repeated stop signals.
@@ -1634,19 +1662,20 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 onEvent({ kind: 'turn_done', reason: 'cap_exceeded' });
                 break;
             }
-            // Same-tool hard stop. Strong models (Opus, GPT-5.5) sometimes
-            // read the warn injection, briefly acknowledge it, and call the
-            // same tool again — the soft signal is ineffective. Break the
-            // turn here when one tool name crosses the hard threshold to
-            // stop the search loop. Verified 2026-05-04: Opus-4.7 made 4
-            // Bash calls past 3 nags before this break would have triggered
-            // (at 6).
-            if (sameToolHardStopHit) {
-                const count = turnToolCounts.get(sameToolHardStopHit) ?? 0;
-                logger.error(`[franklin] Same-tool hard stop: ${sameToolHardStopHit} called ${count} times this turn — model ignoring soft warn, ending turn`);
+            // Signature-based hard stop (3.15.30). The original 3.15.28 fired
+            // on count alone (Bash 6× → break), which incorrectly killed
+            // legitimate data-engineering work — the same Opus-4.7 session
+            // verified at 2026-05-04 13:36 was making 15 distinct gsutil/bq
+            // calls, each producing new insights. Now we only break when the
+            // SAME (tool, input) signature has been called 3× — the actual
+            // failure mode of "model retrying the exact same call hoping
+            // something changes". Different inputs = exploration, allowed.
+            if (stuckSignature) {
+                const toolName = stuckSignature.sig.split('::')[0];
+                logger.error(`[franklin] Signature-loop hard stop: \`${toolName}\` called with identical input ${stuckSignature.count} times this turn — ending turn`);
                 onEvent({
                     kind: 'text_delta',
-                    text: `\n\n⚠️ ${sameToolHardStopHit} called ${count}× in one turn — that's a search loop. Ending turn so you don't burn through credits. Rephrase what you actually need, or try a different model with \`/model\`.\n`,
+                    text: `\n\n⚠️ ${toolName} called ${stuckSignature.count}× with the same input this turn — that's a real loop, not exploration. Ending turn. Rephrase what you actually need, or try \`/model\` to switch.\n`,
                 });
                 onEvent({ kind: 'turn_done', reason: 'cap_exceeded' });
                 break;

package/dist/storage/hygiene.d.ts CHANGED Viewed

@@ -21,8 +21,23 @@
  * unlinkSync). Best-effort: every operation is wrapped so a single failure
  * never breaks agent boot.
  */
+/**
+ * Summary of what hygiene removed/trimmed in one pass. Returned so the
+ * caller (agent loop) can log it — silent hygiene is hard to verify
+ * without poking at disk yourself, which is exactly the kind of thing
+ * users shouldn't have to do.
+ */
+export interface HygieneReport {
+    legacyFilesRemoved: number;
+    dataFilesTrimmed: number;
+    costLogRowsTrimmed: number;
+    orphanToolResultsRemoved: number;
+}
 /**
  * Top-level entry. Call once at agent session start. Catches its own
- * errors so a bad disk never blocks startup.
+ * errors so a bad disk never blocks startup. Returns counts so callers
+ * can log a one-line summary — verified 2026-05-04 from a real session
+ * where hygiene was running silently for hours and there was no way to
+ * tell from the log whether anything was being cleaned.
  */
-export declare function runDataHygiene(): void;
+export declare function runDataHygiene(): HygieneReport;

package/dist/storage/hygiene.js CHANGED Viewed

@@ -44,35 +44,46 @@ const LEGACY_FILENAMES = [
     '0xcode-stats.json',
     'runcode-debug.log',
 ];
+const ZERO_REPORT = {
+    legacyFilesRemoved: 0,
+    dataFilesTrimmed: 0,
+    costLogRowsTrimmed: 0,
+    orphanToolResultsRemoved: 0,
+};
 /**
  * Top-level entry. Call once at agent session start. Catches its own
- * errors so a bad disk never blocks startup.
+ * errors so a bad disk never blocks startup. Returns counts so callers
+ * can log a one-line summary — verified 2026-05-04 from a real session
+ * where hygiene was running silently for hours and there was no way to
+ * tell from the log whether anything was being cleaned.
  */
 export function runDataHygiene() {
+    const report = { ...ZERO_REPORT };
     try {
-        trimDataDir();
+        report.dataFilesTrimmed = trimDataDir();
     }
     catch { /* best effort */ }
     try {
-        trimCostLog();
+        report.costLogRowsTrimmed = trimCostLog();
     }
     catch { /* best effort */ }
     try {
-        removeLegacyFiles();
+        report.legacyFilesRemoved = removeLegacyFiles();
     }
     catch { /* best effort */ }
     try {
-        sweepOrphanToolResults();
+        report.orphanToolResultsRemoved = sweepOrphanToolResults();
     }
     catch { /* best effort */ }
+    return report;
 }
 function trimDataDir() {
     const dir = path.join(BLOCKRUN_DIR, 'data');
     if (!fs.existsSync(dir))
-        return;
+        return 0;
     const entries = fs.readdirSync(dir);
     if (entries.length === 0)
-        return;
+        return 0;
     const cutoff = Date.now() - DATA_DIR_MAX_AGE_MS;
     const stats = [];
     for (const name of entries) {
@@ -86,11 +97,13 @@ function trimDataDir() {
             // Best effort — skip unreadable entries.
         }
     }
+    let removed = 0;
     // Pass 1: age-based delete.
     for (const e of stats) {
         if (e.mtime < cutoff) {
             try {
                 fs.unlinkSync(path.join(dir, e.name));
+                removed++;
             }
             catch { /* ok */ }
         }
@@ -106,35 +119,42 @@ function trimDataDir() {
         for (let i = 0; i < excess; i++) {
             try {
                 fs.unlinkSync(path.join(dir, survivors[i].name));
+                removed++;
             }
             catch { /* ok */ }
         }
     }
+    return removed;
 }
 function trimCostLog() {
     const file = path.join(BLOCKRUN_DIR, 'cost_log.jsonl');
     if (!fs.existsSync(file))
-        return;
+        return 0;
     // Cheap probe — skip the full read+rewrite when the file is small.
     const stat = fs.statSync(file);
     if (stat.size < COST_LOG_PROBE_BYTES)
-        return;
+        return 0;
     const lines = fs.readFileSync(file, 'utf-8').split('\n').filter(Boolean);
     if (lines.length <= COST_LOG_MAX_ENTRIES)
-        return;
+        return 0;
+    const dropped = lines.length - COST_LOG_MAX_ENTRIES;
     const kept = lines.slice(lines.length - COST_LOG_MAX_ENTRIES);
     fs.writeFileSync(file, kept.join('\n') + '\n');
+    return dropped;
 }
 function removeLegacyFiles() {
+    let removed = 0;
     for (const name of LEGACY_FILENAMES) {
         const p = path.join(BLOCKRUN_DIR, name);
         if (!fs.existsSync(p))
             continue;
         try {
             fs.unlinkSync(p);
+            removed++;
         }
         catch { /* ok */ }
     }
+    return removed;
 }
 /**
  * `streaming-executor` writes large tool outputs to
@@ -151,7 +171,7 @@ function sweepOrphanToolResults() {
     const toolResultsDir = path.join(BLOCKRUN_DIR, 'tool-results');
     const sessionsDir = path.join(BLOCKRUN_DIR, 'sessions');
     if (!fs.existsSync(toolResultsDir))
-        return;
+        return 0;
     const knownSessionIds = new Set();
     if (fs.existsSync(sessionsDir)) {
         try {
@@ -165,7 +185,7 @@ function sweepOrphanToolResults() {
             // Best-effort — if we can't read sessions/, skip the sweep so
             // we never delete tool-results that might still belong to a
             // live session.
-            return;
+            return 0;
         }
     }
     let entries;
@@ -173,8 +193,9 @@ function sweepOrphanToolResults() {
         entries = fs.readdirSync(toolResultsDir);
     }
     catch {
-        return;
+        return 0;
     }
+    let removed = 0;
     for (const name of entries) {
         if (knownSessionIds.has(name))
             continue;
@@ -184,9 +205,11 @@ function sweepOrphanToolResults() {
             if (!stat.isDirectory())
                 continue;
             fs.rmSync(dir, { recursive: true, force: true });
+            removed++;
         }
         catch {
             // Skip — best-effort cleanup.
         }
     }
+    return removed;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/franklin",
-  "version": "3.15.29",
+  "version": "3.15.31",
   "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
   "type": "module",
   "exports": {