@blockrun/franklin 3.15.23 → 3.15.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -606,6 +606,14 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
606
606
  const turnToolCounts = new Map(); // Per-tool-name counts this turn
607
607
  const readFileCache = new Set(); // Files already read (dedup)
608
608
  const MAX_TOOL_CALLS_PER_TURN = 25; // Hard cap per user turn
609
+ // Hard break threshold for runaways. The cap above is soft — we
610
+ // inject a "limit reached" tool_result once and let the model
611
+ // close out. If it ignores that signal and keeps calling tools,
612
+ // we force end the turn to prevent unbounded billing. Verified
613
+ // on a real user log: one turn went 25 → 100 tool calls before
614
+ // the loop ended via maxTurns (much later, much more expensive).
615
+ const HARD_TOOL_CAP = MAX_TOOL_CALLS_PER_TURN * 2;
616
+ let toolCapWarned = false; // Log + inject only once per turn
609
617
  const SAME_TOOL_WARN_THRESHOLD = 3; // Warn after N calls to same tool (lowered from 5 — search loops were wasting turns)
610
618
  // ── No-progress guardrail: kill infinite tiny-response loops ──
611
619
  let consecutiveTinyResponses = 0; // Count of consecutive calls with <10 output tokens
@@ -1527,8 +1535,10 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1527
1535
  });
1528
1536
  }
1529
1537
  }
1530
- // Hard cap: stop the turn if too many tool calls
1531
- if (turnToolCalls >= MAX_TOOL_CALLS_PER_TURN) {
1538
+ // Hard cap: nudge the model to stop. Inject once per turn —
1539
+ // re-injecting on every iteration past the cap is just noise
1540
+ // and clutters the model's context with repeated stop signals.
1541
+ if (turnToolCalls >= MAX_TOOL_CALLS_PER_TURN && !toolCapWarned) {
1532
1542
  outcomeContent.push({
1533
1543
  type: 'tool_result',
1534
1544
  tool_use_id: 'guardrail-cap',
@@ -1569,11 +1579,22 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1569
1579
  }
1570
1580
  }
1571
1581
  }
1572
- // Hard stop: if cap exceeded, force end this agent loop iteration
1573
- if (turnToolCalls >= MAX_TOOL_CALLS_PER_TURN) {
1574
- logger.warn(`[franklin] Tool call cap hit: ${turnToolCalls} calls this turn`);
1575
- // Don't break let the model respond one more time to summarize,
1576
- // but inject the stop signal above so it knows to finish up.
1582
+ // Cap signaling: warn once per turn (was firing every iteration
1583
+ // past the cap — verified on a real user log, one turn produced
1584
+ // 76 sequential warnings 25→100). Hard break at cap stops a
1585
+ // runaway model that ignores the soft stop signal above.
1586
+ if (turnToolCalls >= MAX_TOOL_CALLS_PER_TURN && !toolCapWarned) {
1587
+ toolCapWarned = true;
1588
+ logger.warn(`[franklin] Tool call cap hit: ${turnToolCalls} calls this turn (soft cap ${MAX_TOOL_CALLS_PER_TURN}, hard cap ${HARD_TOOL_CAP})`);
1589
+ }
1590
+ if (turnToolCalls >= HARD_TOOL_CAP) {
1591
+ logger.error(`[franklin] Hard tool cap exceeded (${turnToolCalls}) — ending turn to prevent runaway`);
1592
+ onEvent({
1593
+ kind: 'text_delta',
1594
+ text: `\n\n⚠️ Tool call limit exceeded (${turnToolCalls}/${HARD_TOOL_CAP}). Ending turn to prevent runaway loop. Try rephrasing or use \`/model\` to switch.\n`,
1595
+ });
1596
+ onEvent({ kind: 'turn_done', reason: 'cap_exceeded' });
1597
+ break;
1577
1598
  }
1578
1599
  }
1579
1600
  if (loopCount >= maxTurns) {
@@ -125,7 +125,7 @@ export interface StreamCapabilityDone {
125
125
  }
126
126
  export interface StreamTurnDone {
127
127
  kind: 'turn_done';
128
- reason: 'completed' | 'max_turns' | 'aborted' | 'error' | 'budget' | 'no_progress';
128
+ reason: 'completed' | 'max_turns' | 'aborted' | 'error' | 'budget' | 'no_progress' | 'cap_exceeded';
129
129
  error?: string;
130
130
  }
131
131
  export interface StreamUsageInfo {
@@ -16,6 +16,7 @@ const X_FRANKLIN_VERSION = VERSION;
16
16
  // pattern!), and timestamp format — bug fixes never propagated. They
17
17
  // were the last holdouts after the agent loop was migrated.
18
18
  import { logger, setDebugMode } from '../logger.js';
19
+ import { isTestFixtureModel } from '../stats/test-fixture.js';
19
20
  const DEFAULT_MAX_TOKENS = 4096;
20
21
  // 180s budget for *time-to-headers* — reasoning-class models (zai/glm-*,
21
22
  // nemotron *-reasoning, deepseek-r*, gpt-5-codex, anthropic extended-thinking)
@@ -439,6 +440,11 @@ export function createProxy(options) {
439
440
  solanaWallet,
440
441
  timeoutMs: requestTimeoutMs,
441
442
  }, (failedModel, status, nextModel) => {
443
+ // Skip test-fixture model names (slow/, mock/, test/, local/test*)
444
+ // — these come from in-process proxy tests with mock servers and
445
+ // would otherwise pollute the user's real franklin-debug.log.
446
+ if (isTestFixtureModel(failedModel) || isTestFixtureModel(nextModel))
447
+ return;
442
448
  logger.warn(`[franklin] ⚠️ ${failedModel} returned ${status}, falling back to ${nextModel}`);
443
449
  });
444
450
  response = result.response;
@@ -446,7 +452,7 @@ export function createProxy(options) {
446
452
  // Use the body with the correct fallback model for payment
447
453
  body = result.bodyUsed;
448
454
  usedFallback = result.fallbackUsed;
449
- if (usedFallback) {
455
+ if (usedFallback && !isTestFixtureModel(finalModel)) {
450
456
  logger.info(`[franklin] ↺ Fallback successful: using ${finalModel}`);
451
457
  }
452
458
  }
@@ -678,7 +684,9 @@ async function fetchWithPaymentFallback(url, init, originalBody, config, payment
678
684
  if (nextModel && onFallback) {
679
685
  onFallback(model, 0, nextModel);
680
686
  }
681
- logger.warn(`[franklin] [fallback] ${model} request error: ${err instanceof Error ? err.message : String(err)}`);
687
+ if (!isTestFixtureModel(model)) {
688
+ logger.warn(`[franklin] [fallback] ${model} request error: ${err instanceof Error ? err.message : String(err)}`);
689
+ }
682
690
  if (i < config.chain.length - 1) {
683
691
  await sleep(config.retryDelayMs);
684
692
  }
@@ -33,6 +33,15 @@ export function appendAudit(entry) {
33
33
  // remember to redirect HOME.
34
34
  if (isTestFixtureModel(entry.model))
35
35
  return;
36
+ // Belt-and-braces: when 3.15.17 renamed several test fixtures from
37
+ // local/test-model to zai/glm-5.1 (a real-looking model, so
38
+ // persistence tests can verify the write path), the model-name gate
39
+ // stopped catching them. Verified on a real machine: 310 of 370
40
+ // recent zai/glm-5.1 audit entries had output_tokens < 10 — clearly
41
+ // mock responses. The env-var lets tests opt out at file level
42
+ // without renaming fixtures back.
43
+ if (process.env.FRANKLIN_NO_AUDIT === '1')
44
+ return;
36
45
  try {
37
46
  fs.mkdirSync(BLOCKRUN_DIR, { recursive: true });
38
47
  const safe = {
@@ -17,12 +17,26 @@
17
17
  * `local/lmstudio`, etc.) are intentionally NOT filtered — only the
18
18
  * `local/test` prefix.
19
19
  */
20
+ // Prefixes test files use to mark "this isn't a real model name". The
21
+ // list grew by inspection of real franklin-debug.log pollution after
22
+ // 3.15.16 — each new convention surfaced as a writes-to-user-home leak:
23
+ // `local/test*` — agent loop in-process tests (test/local.mjs:567 etc.)
24
+ // `slow/` — proxy timeout test (test/local.mjs:380)
25
+ // `mock/` — generic mock-server fixtures (defensive)
26
+ // `test/` — e.g. `test/model` used in some test paths
20
27
  const TEST_FIXTURE_PREFIXES = [
21
28
  'local/test',
29
+ 'slow/',
30
+ 'mock/',
31
+ 'test/',
22
32
  ];
33
+ // Exact-match fixtures (model is literally "test" without a slash).
34
+ const TEST_FIXTURE_EXACT = new Set(['test']);
23
35
  export function isTestFixtureModel(model) {
24
36
  if (!model)
25
37
  return false;
38
+ if (TEST_FIXTURE_EXACT.has(model))
39
+ return true;
26
40
  for (const prefix of TEST_FIXTURE_PREFIXES) {
27
41
  if (model.startsWith(prefix))
28
42
  return true;
@@ -163,6 +163,12 @@ export function recordUsage(model, inputTokens, outputTokens, costUsd, latencyMs
163
163
  // test fixtures before this gate).
164
164
  if (isTestFixtureModel(model))
165
165
  return;
166
+ // Test fixtures using real model names (`zai/glm-5.1` after 3.15.17's
167
+ // rename) escape the prefix gate. Env-var override lets tests opt
168
+ // out at file level. Mirrors the audit.ts guard; same env var so
169
+ // tests flip a single switch.
170
+ if (process.env.FRANKLIN_NO_AUDIT === '1')
171
+ return;
166
172
  const stats = getCachedStats();
167
173
  const now = Date.now();
168
174
  // Update totals
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.15.23",
3
+ "version": "3.15.25",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {