npm - @blockrun/franklin - Versions diffs - 3.15.23 → 3.15.25 - Mend

@blockrun/franklin 3.15.23 → 3.15.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/agent/loop.js +28 -7
package/dist/agent/types.d.ts +1 -1
package/dist/proxy/server.js +10 -2
package/dist/stats/audit.js +9 -0
package/dist/stats/test-fixture.js +14 -0
package/dist/stats/tracker.js +6 -0
package/package.json +1 -1

package/dist/agent/loop.js CHANGED Viewed

@@ -606,6 +606,14 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
         const turnToolCounts = new Map(); // Per-tool-name counts this turn
         const readFileCache = new Set(); // Files already read (dedup)
         const MAX_TOOL_CALLS_PER_TURN = 25; // Hard cap per user turn
+        // Hard break threshold for runaways. The cap above is soft — we
+        // inject a "limit reached" tool_result once and let the model
+        // close out. If it ignores that signal and keeps calling tools,
+        // we force end the turn to prevent unbounded billing. Verified
+        // on a real user log: one turn went 25 → 100 tool calls before
+        // the loop ended via maxTurns (much later, much more expensive).
+        const HARD_TOOL_CAP = MAX_TOOL_CALLS_PER_TURN * 2;
+        let toolCapWarned = false; // Log + inject only once per turn
         const SAME_TOOL_WARN_THRESHOLD = 3; // Warn after N calls to same tool (lowered from 5 — search loops were wasting turns)
         // ── No-progress guardrail: kill infinite tiny-response loops ──
         let consecutiveTinyResponses = 0; // Count of consecutive calls with <10 output tokens
@@ -1527,8 +1535,10 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                     });
                 }
             }
-            // Hard cap: stop the turn if too many tool calls
-            if (turnToolCalls >= MAX_TOOL_CALLS_PER_TURN) {
+            // Hard cap: nudge the model to stop. Inject once per turn —
+            // re-injecting on every iteration past the cap is just noise
+            // and clutters the model's context with repeated stop signals.
+            if (turnToolCalls >= MAX_TOOL_CALLS_PER_TURN && !toolCapWarned) {
                 outcomeContent.push({
                     type: 'tool_result',
                     tool_use_id: 'guardrail-cap',
@@ -1569,11 +1579,22 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                     }
                 }
             }
-            // Hard stop: if cap exceeded, force end this agent loop iteration
-            if (turnToolCalls >= MAX_TOOL_CALLS_PER_TURN) {
-                logger.warn(`[franklin] Tool call cap hit: ${turnToolCalls} calls this turn`);
-                // Don't break — let the model respond one more time to summarize,
-                // but inject the stop signal above so it knows to finish up.
+            // Cap signaling: warn once per turn (was firing every iteration
+            // past the cap — verified on a real user log, one turn produced
+            // 76 sequential warnings 25→100). Hard break at 2× cap stops a
+            // runaway model that ignores the soft stop signal above.
+            if (turnToolCalls >= MAX_TOOL_CALLS_PER_TURN && !toolCapWarned) {
+                toolCapWarned = true;
+                logger.warn(`[franklin] Tool call cap hit: ${turnToolCalls} calls this turn (soft cap ${MAX_TOOL_CALLS_PER_TURN}, hard cap ${HARD_TOOL_CAP})`);
+            }
+            if (turnToolCalls >= HARD_TOOL_CAP) {
+                logger.error(`[franklin] Hard tool cap exceeded (${turnToolCalls}) — ending turn to prevent runaway`);
+                onEvent({
+                    kind: 'text_delta',
+                    text: `\n\n⚠️ Tool call limit exceeded (${turnToolCalls}/${HARD_TOOL_CAP}). Ending turn to prevent runaway loop. Try rephrasing or use \`/model\` to switch.\n`,
+                });
+                onEvent({ kind: 'turn_done', reason: 'cap_exceeded' });
+                break;
             }
         }
         if (loopCount >= maxTurns) {

package/dist/agent/types.d.ts CHANGED Viewed

@@ -125,7 +125,7 @@ export interface StreamCapabilityDone {
 }
 export interface StreamTurnDone {
     kind: 'turn_done';
-    reason: 'completed' | 'max_turns' | 'aborted' | 'error' | 'budget' | 'no_progress';
+    reason: 'completed' | 'max_turns' | 'aborted' | 'error' | 'budget' | 'no_progress' | 'cap_exceeded';
     error?: string;
 }
 export interface StreamUsageInfo {

package/dist/proxy/server.js CHANGED Viewed

@@ -16,6 +16,7 @@ const X_FRANKLIN_VERSION = VERSION;
 // pattern!), and timestamp format — bug fixes never propagated. They
 // were the last holdouts after the agent loop was migrated.
 import { logger, setDebugMode } from '../logger.js';
+import { isTestFixtureModel } from '../stats/test-fixture.js';
 const DEFAULT_MAX_TOKENS = 4096;
 // 180s budget for *time-to-headers* — reasoning-class models (zai/glm-*,
 // nemotron *-reasoning, deepseek-r*, gpt-5-codex, anthropic extended-thinking)
@@ -439,6 +440,11 @@ export function createProxy(options) {
                         solanaWallet,
                         timeoutMs: requestTimeoutMs,
                     }, (failedModel, status, nextModel) => {
+                        // Skip test-fixture model names (slow/, mock/, test/, local/test*)
+                        // — these come from in-process proxy tests with mock servers and
+                        // would otherwise pollute the user's real franklin-debug.log.
+                        if (isTestFixtureModel(failedModel) || isTestFixtureModel(nextModel))
+                            return;
                         logger.warn(`[franklin] ⚠️  ${failedModel} returned ${status}, falling back to ${nextModel}`);
                     });
                     response = result.response;
@@ -446,7 +452,7 @@ export function createProxy(options) {
                     // Use the body with the correct fallback model for payment
                     body = result.bodyUsed;
                     usedFallback = result.fallbackUsed;
-                    if (usedFallback) {
+                    if (usedFallback && !isTestFixtureModel(finalModel)) {
                         logger.info(`[franklin] ↺ Fallback successful: using ${finalModel}`);
                     }
                 }
@@ -678,7 +684,9 @@ async function fetchWithPaymentFallback(url, init, originalBody, config, payment
             if (nextModel && onFallback) {
                 onFallback(model, 0, nextModel);
             }
-            logger.warn(`[franklin] [fallback] ${model} request error: ${err instanceof Error ? err.message : String(err)}`);
+            if (!isTestFixtureModel(model)) {
+                logger.warn(`[franklin] [fallback] ${model} request error: ${err instanceof Error ? err.message : String(err)}`);
+            }
             if (i < config.chain.length - 1) {
                 await sleep(config.retryDelayMs);
             }

package/dist/stats/audit.js CHANGED Viewed

@@ -33,6 +33,15 @@ export function appendAudit(entry) {
     // remember to redirect HOME.
     if (isTestFixtureModel(entry.model))
         return;
+    // Belt-and-braces: when 3.15.17 renamed several test fixtures from
+    // local/test-model to zai/glm-5.1 (a real-looking model, so
+    // persistence tests can verify the write path), the model-name gate
+    // stopped catching them. Verified on a real machine: 310 of 370
+    // recent zai/glm-5.1 audit entries had output_tokens < 10 — clearly
+    // mock responses. The env-var lets tests opt out at file level
+    // without renaming fixtures back.
+    if (process.env.FRANKLIN_NO_AUDIT === '1')
+        return;
     try {
         fs.mkdirSync(BLOCKRUN_DIR, { recursive: true });
         const safe = {

package/dist/stats/test-fixture.js CHANGED Viewed

@@ -17,12 +17,26 @@
  * `local/lmstudio`, etc.) are intentionally NOT filtered — only the
  * `local/test` prefix.
  */
+// Prefixes test files use to mark "this isn't a real model name". The
+// list grew by inspection of real franklin-debug.log pollution after
+// 3.15.16 — each new convention surfaced as a writes-to-user-home leak:
+//   `local/test*`  — agent loop in-process tests (test/local.mjs:567 etc.)
+//   `slow/`        — proxy timeout test (test/local.mjs:380)
+//   `mock/`        — generic mock-server fixtures (defensive)
+//   `test/`        — e.g. `test/model` used in some test paths
 const TEST_FIXTURE_PREFIXES = [
     'local/test',
+    'slow/',
+    'mock/',
+    'test/',
 ];
+// Exact-match fixtures (model is literally "test" without a slash).
+const TEST_FIXTURE_EXACT = new Set(['test']);
 export function isTestFixtureModel(model) {
     if (!model)
         return false;
+    if (TEST_FIXTURE_EXACT.has(model))
+        return true;
     for (const prefix of TEST_FIXTURE_PREFIXES) {
         if (model.startsWith(prefix))
             return true;

package/dist/stats/tracker.js CHANGED Viewed

@@ -163,6 +163,12 @@ export function recordUsage(model, inputTokens, outputTokens, costUsd, latencyMs
     // test fixtures before this gate).
     if (isTestFixtureModel(model))
         return;
+    // Test fixtures using real model names (`zai/glm-5.1` after 3.15.17's
+    // rename) escape the prefix gate. Env-var override lets tests opt
+    // out at file level. Mirrors the audit.ts guard; same env var so
+    // tests flip a single switch.
+    if (process.env.FRANKLIN_NO_AUDIT === '1')
+        return;
     const stats = getCachedStats();
     const now = Date.now();
     // Update totals

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/franklin",
-  "version": "3.15.23",
+  "version": "3.15.25",
   "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
   "type": "module",
   "exports": {