@blockrun/franklin 3.15.23 → 3.15.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/loop.js +28 -7
- package/dist/agent/types.d.ts +1 -1
- package/dist/proxy/server.js +10 -2
- package/dist/stats/audit.js +9 -0
- package/dist/stats/test-fixture.js +14 -0
- package/dist/stats/tracker.js +6 -0
- package/package.json +1 -1
package/dist/agent/loop.js
CHANGED
|
@@ -606,6 +606,14 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
606
606
|
const turnToolCounts = new Map(); // Per-tool-name counts this turn
|
|
607
607
|
const readFileCache = new Set(); // Files already read (dedup)
|
|
608
608
|
const MAX_TOOL_CALLS_PER_TURN = 25; // Hard cap per user turn
|
|
609
|
+
// Hard break threshold for runaways. The cap above is soft — we
|
|
610
|
+
// inject a "limit reached" tool_result once and let the model
|
|
611
|
+
// close out. If it ignores that signal and keeps calling tools,
|
|
612
|
+
// we force end the turn to prevent unbounded billing. Verified
|
|
613
|
+
// on a real user log: one turn went 25 → 100 tool calls before
|
|
614
|
+
// the loop ended via maxTurns (much later, much more expensive).
|
|
615
|
+
const HARD_TOOL_CAP = MAX_TOOL_CALLS_PER_TURN * 2;
|
|
616
|
+
let toolCapWarned = false; // Log + inject only once per turn
|
|
609
617
|
const SAME_TOOL_WARN_THRESHOLD = 3; // Warn after N calls to same tool (lowered from 5 — search loops were wasting turns)
|
|
610
618
|
// ── No-progress guardrail: kill infinite tiny-response loops ──
|
|
611
619
|
let consecutiveTinyResponses = 0; // Count of consecutive calls with <10 output tokens
|
|
@@ -1527,8 +1535,10 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1527
1535
|
});
|
|
1528
1536
|
}
|
|
1529
1537
|
}
|
|
1530
|
-
// Hard cap:
|
|
1531
|
-
|
|
1538
|
+
// Hard cap: nudge the model to stop. Inject once per turn —
|
|
1539
|
+
// re-injecting on every iteration past the cap is just noise
|
|
1540
|
+
// and clutters the model's context with repeated stop signals.
|
|
1541
|
+
if (turnToolCalls >= MAX_TOOL_CALLS_PER_TURN && !toolCapWarned) {
|
|
1532
1542
|
outcomeContent.push({
|
|
1533
1543
|
type: 'tool_result',
|
|
1534
1544
|
tool_use_id: 'guardrail-cap',
|
|
@@ -1569,11 +1579,22 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1569
1579
|
}
|
|
1570
1580
|
}
|
|
1571
1581
|
}
|
|
1572
|
-
//
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
|
|
1576
|
-
|
|
1582
|
+
// Cap signaling: warn once per turn (was firing every iteration
|
|
1583
|
+
// past the cap — verified on a real user log, one turn produced
|
|
1584
|
+
// 76 sequential warnings 25→100). Hard break at 2× cap stops a
|
|
1585
|
+
// runaway model that ignores the soft stop signal above.
|
|
1586
|
+
if (turnToolCalls >= MAX_TOOL_CALLS_PER_TURN && !toolCapWarned) {
|
|
1587
|
+
toolCapWarned = true;
|
|
1588
|
+
logger.warn(`[franklin] Tool call cap hit: ${turnToolCalls} calls this turn (soft cap ${MAX_TOOL_CALLS_PER_TURN}, hard cap ${HARD_TOOL_CAP})`);
|
|
1589
|
+
}
|
|
1590
|
+
if (turnToolCalls >= HARD_TOOL_CAP) {
|
|
1591
|
+
logger.error(`[franklin] Hard tool cap exceeded (${turnToolCalls}) — ending turn to prevent runaway`);
|
|
1592
|
+
onEvent({
|
|
1593
|
+
kind: 'text_delta',
|
|
1594
|
+
text: `\n\n⚠️ Tool call limit exceeded (${turnToolCalls}/${HARD_TOOL_CAP}). Ending turn to prevent runaway loop. Try rephrasing or use \`/model\` to switch.\n`,
|
|
1595
|
+
});
|
|
1596
|
+
onEvent({ kind: 'turn_done', reason: 'cap_exceeded' });
|
|
1597
|
+
break;
|
|
1577
1598
|
}
|
|
1578
1599
|
}
|
|
1579
1600
|
if (loopCount >= maxTurns) {
|
package/dist/agent/types.d.ts
CHANGED
|
@@ -125,7 +125,7 @@ export interface StreamCapabilityDone {
|
|
|
125
125
|
}
|
|
126
126
|
export interface StreamTurnDone {
|
|
127
127
|
kind: 'turn_done';
|
|
128
|
-
reason: 'completed' | 'max_turns' | 'aborted' | 'error' | 'budget' | 'no_progress';
|
|
128
|
+
reason: 'completed' | 'max_turns' | 'aborted' | 'error' | 'budget' | 'no_progress' | 'cap_exceeded';
|
|
129
129
|
error?: string;
|
|
130
130
|
}
|
|
131
131
|
export interface StreamUsageInfo {
|
package/dist/proxy/server.js
CHANGED
|
@@ -16,6 +16,7 @@ const X_FRANKLIN_VERSION = VERSION;
|
|
|
16
16
|
// pattern!), and timestamp format — bug fixes never propagated. They
|
|
17
17
|
// were the last holdouts after the agent loop was migrated.
|
|
18
18
|
import { logger, setDebugMode } from '../logger.js';
|
|
19
|
+
import { isTestFixtureModel } from '../stats/test-fixture.js';
|
|
19
20
|
const DEFAULT_MAX_TOKENS = 4096;
|
|
20
21
|
// 180s budget for *time-to-headers* — reasoning-class models (zai/glm-*,
|
|
21
22
|
// nemotron *-reasoning, deepseek-r*, gpt-5-codex, anthropic extended-thinking)
|
|
@@ -439,6 +440,11 @@ export function createProxy(options) {
|
|
|
439
440
|
solanaWallet,
|
|
440
441
|
timeoutMs: requestTimeoutMs,
|
|
441
442
|
}, (failedModel, status, nextModel) => {
|
|
443
|
+
// Skip test-fixture model names (slow/, mock/, test/, local/test*)
|
|
444
|
+
// — these come from in-process proxy tests with mock servers and
|
|
445
|
+
// would otherwise pollute the user's real franklin-debug.log.
|
|
446
|
+
if (isTestFixtureModel(failedModel) || isTestFixtureModel(nextModel))
|
|
447
|
+
return;
|
|
442
448
|
logger.warn(`[franklin] ⚠️ ${failedModel} returned ${status}, falling back to ${nextModel}`);
|
|
443
449
|
});
|
|
444
450
|
response = result.response;
|
|
@@ -446,7 +452,7 @@ export function createProxy(options) {
|
|
|
446
452
|
// Use the body with the correct fallback model for payment
|
|
447
453
|
body = result.bodyUsed;
|
|
448
454
|
usedFallback = result.fallbackUsed;
|
|
449
|
-
if (usedFallback) {
|
|
455
|
+
if (usedFallback && !isTestFixtureModel(finalModel)) {
|
|
450
456
|
logger.info(`[franklin] ↺ Fallback successful: using ${finalModel}`);
|
|
451
457
|
}
|
|
452
458
|
}
|
|
@@ -678,7 +684,9 @@ async function fetchWithPaymentFallback(url, init, originalBody, config, payment
|
|
|
678
684
|
if (nextModel && onFallback) {
|
|
679
685
|
onFallback(model, 0, nextModel);
|
|
680
686
|
}
|
|
681
|
-
|
|
687
|
+
if (!isTestFixtureModel(model)) {
|
|
688
|
+
logger.warn(`[franklin] [fallback] ${model} request error: ${err instanceof Error ? err.message : String(err)}`);
|
|
689
|
+
}
|
|
682
690
|
if (i < config.chain.length - 1) {
|
|
683
691
|
await sleep(config.retryDelayMs);
|
|
684
692
|
}
|
package/dist/stats/audit.js
CHANGED
|
@@ -33,6 +33,15 @@ export function appendAudit(entry) {
|
|
|
33
33
|
// remember to redirect HOME.
|
|
34
34
|
if (isTestFixtureModel(entry.model))
|
|
35
35
|
return;
|
|
36
|
+
// Belt-and-braces: when 3.15.17 renamed several test fixtures from
|
|
37
|
+
// local/test-model to zai/glm-5.1 (a real-looking model, so
|
|
38
|
+
// persistence tests can verify the write path), the model-name gate
|
|
39
|
+
// stopped catching them. Verified on a real machine: 310 of 370
|
|
40
|
+
// recent zai/glm-5.1 audit entries had output_tokens < 10 — clearly
|
|
41
|
+
// mock responses. The env-var lets tests opt out at file level
|
|
42
|
+
// without renaming fixtures back.
|
|
43
|
+
if (process.env.FRANKLIN_NO_AUDIT === '1')
|
|
44
|
+
return;
|
|
36
45
|
try {
|
|
37
46
|
fs.mkdirSync(BLOCKRUN_DIR, { recursive: true });
|
|
38
47
|
const safe = {
|
|
@@ -17,12 +17,26 @@
|
|
|
17
17
|
* `local/lmstudio`, etc.) are intentionally NOT filtered — only the
|
|
18
18
|
* `local/test` prefix.
|
|
19
19
|
*/
|
|
20
|
+
// Prefixes test files use to mark "this isn't a real model name". The
|
|
21
|
+
// list grew by inspection of real franklin-debug.log pollution after
|
|
22
|
+
// 3.15.16 — each new convention surfaced as a writes-to-user-home leak:
|
|
23
|
+
// `local/test*` — agent loop in-process tests (test/local.mjs:567 etc.)
|
|
24
|
+
// `slow/` — proxy timeout test (test/local.mjs:380)
|
|
25
|
+
// `mock/` — generic mock-server fixtures (defensive)
|
|
26
|
+
// `test/` — e.g. `test/model` used in some test paths
|
|
20
27
|
const TEST_FIXTURE_PREFIXES = [
|
|
21
28
|
'local/test',
|
|
29
|
+
'slow/',
|
|
30
|
+
'mock/',
|
|
31
|
+
'test/',
|
|
22
32
|
];
|
|
33
|
+
// Exact-match fixtures (model is literally "test" without a slash).
|
|
34
|
+
const TEST_FIXTURE_EXACT = new Set(['test']);
|
|
23
35
|
export function isTestFixtureModel(model) {
|
|
24
36
|
if (!model)
|
|
25
37
|
return false;
|
|
38
|
+
if (TEST_FIXTURE_EXACT.has(model))
|
|
39
|
+
return true;
|
|
26
40
|
for (const prefix of TEST_FIXTURE_PREFIXES) {
|
|
27
41
|
if (model.startsWith(prefix))
|
|
28
42
|
return true;
|
package/dist/stats/tracker.js
CHANGED
|
@@ -163,6 +163,12 @@ export function recordUsage(model, inputTokens, outputTokens, costUsd, latencyMs
|
|
|
163
163
|
// test fixtures before this gate).
|
|
164
164
|
if (isTestFixtureModel(model))
|
|
165
165
|
return;
|
|
166
|
+
// Test fixtures using real model names (`zai/glm-5.1` after 3.15.17's
|
|
167
|
+
// rename) escape the prefix gate. Env-var override lets tests opt
|
|
168
|
+
// out at file level. Mirrors the audit.ts guard; same env var so
|
|
169
|
+
// tests flip a single switch.
|
|
170
|
+
if (process.env.FRANKLIN_NO_AUDIT === '1')
|
|
171
|
+
return;
|
|
166
172
|
const stats = getCachedStats();
|
|
167
173
|
const now = Date.now();
|
|
168
174
|
// Update totals
|
package/package.json
CHANGED