@blockrun/franklin 3.15.28 → 3.15.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/loop.js +61 -26
- package/package.json +1 -1
package/dist/agent/loop.js
CHANGED
|
@@ -615,19 +615,23 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
615
615
|
const HARD_TOOL_CAP = MAX_TOOL_CALLS_PER_TURN * 2;
|
|
616
616
|
let toolCapWarned = false; // Log + inject only once per turn
|
|
617
617
|
const SAME_TOOL_WARN_THRESHOLD = 3; // Warn after N calls to same tool (lowered from 5 — search loops were wasting turns)
|
|
618
|
-
//
|
|
619
|
-
//
|
|
620
|
-
//
|
|
621
|
-
//
|
|
622
|
-
//
|
|
623
|
-
//
|
|
624
|
-
//
|
|
625
|
-
|
|
618
|
+
// Repetition-based hard stop. 3.15.28 used a count-based threshold
|
|
619
|
+
// (Bash called 6× → break) which incorrectly killed legitimate
|
|
620
|
+
// exploratory data work — verified 2026-05-04 in a real Opus session
|
|
621
|
+
// running data-engineering on GCS logs: 15 distinct gsutil/bq calls,
|
|
622
|
+
// each producing new insights, would have been cut off at call 6.
|
|
623
|
+
// 3.15.30 detects ACTUAL loops by tracking the (tool, input)
|
|
624
|
+
// signature: only break when the model calls the SAME signature 3
|
|
625
|
+
// times in one turn. Different inputs → exploration, allowed.
|
|
626
|
+
const SAME_SIGNATURE_HARD_STOP = 3;
|
|
626
627
|
// Tracks which tool names have already had a warn injected this turn.
|
|
627
628
|
// Without it, every call past threshold pushes another [SYSTEM] STOP
|
|
628
629
|
// tool_result into the model's context — same shape bug as the cap
|
|
629
630
|
// spam fixed in 3.15.24, just in a sibling guardrail.
|
|
630
631
|
const sameToolWarned = new Set();
|
|
632
|
+
// Tracks how many times each (tool, input)-signature has been called
|
|
633
|
+
// this turn. Different inputs → different signatures → exploration.
|
|
634
|
+
const turnSignatureCounts = new Map();
|
|
631
635
|
// ── No-progress guardrail: kill infinite tiny-response loops ──
|
|
632
636
|
let consecutiveTinyResponses = 0; // Count of consecutive calls with <10 output tokens
|
|
633
637
|
const MAX_TINY_RESPONSES = 2; // Break after N tiny responses — if 2 calls return near-empty, something is wrong
|
|
@@ -1225,6 +1229,22 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1225
1229
|
consecutiveTinyResponses = 0;
|
|
1226
1230
|
}
|
|
1227
1231
|
recordSessionUsage(resolvedModel, inputTokens, usage.outputTokens, costEstimate, routingTier);
|
|
1232
|
+
// Capture tool names invoked in this assistant turn. The AuditEntry
|
|
1233
|
+
// interface has had a `toolCalls?: string[]` slot since 3.15.11, but
|
|
1234
|
+
// nothing populated it — verified 2026-05-04 in a real Opus session
|
|
1235
|
+
// where 14 audit rows showed `tools=[]` despite Bash being called
|
|
1236
|
+
// every turn (the session jsonl had the tool_use blocks; the audit
|
|
1237
|
+
// just lost them). Now we pull names off responseParts so post-hoc
|
|
1238
|
+
// analytics can answer "what tools fired most often last week" from
|
|
1239
|
+
// ~/.blockrun/franklin-audit.jsonl alone.
|
|
1240
|
+
const turnToolNames = [];
|
|
1241
|
+
for (const p of responseParts) {
|
|
1242
|
+
if (p.type === 'tool_use') {
|
|
1243
|
+
const name = p.name;
|
|
1244
|
+
if (typeof name === 'string')
|
|
1245
|
+
turnToolNames.push(name);
|
|
1246
|
+
}
|
|
1247
|
+
}
|
|
1228
1248
|
appendAudit({
|
|
1229
1249
|
ts: Date.now(),
|
|
1230
1250
|
sessionId,
|
|
@@ -1240,6 +1260,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1240
1260
|
source: 'agent',
|
|
1241
1261
|
workDir,
|
|
1242
1262
|
prompt: extractLastUserPrompt(history),
|
|
1263
|
+
toolCalls: turnToolNames.length > 0 ? turnToolNames : undefined,
|
|
1243
1264
|
routingTier,
|
|
1244
1265
|
});
|
|
1245
1266
|
// Accumulate session-level totals for session meta
|
|
@@ -1478,6 +1499,10 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1478
1499
|
for (const [inv] of results) {
|
|
1479
1500
|
const name = inv.name;
|
|
1480
1501
|
turnToolCounts.set(name, (turnToolCounts.get(name) || 0) + 1);
|
|
1502
|
+
// Track (tool, input)-signature for the loop detector below.
|
|
1503
|
+
// Identical signatures → real loop. Different inputs → exploration.
|
|
1504
|
+
const sig = toolCallSignature(name, inv.input);
|
|
1505
|
+
turnSignatureCounts.set(sig, (turnSignatureCounts.get(sig) || 0) + 1);
|
|
1481
1506
|
// Session-scope aggregate (drives telemetry opt-in export).
|
|
1482
1507
|
sessionToolCounts.set(name, (sessionToolCounts.get(name) || 0) + 1);
|
|
1483
1508
|
// Read file dedup: track paths already read
|
|
@@ -1538,14 +1563,12 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1538
1563
|
// Re-injecting on every subsequent call (the pre-3.15.28 behavior)
|
|
1539
1564
|
// just spammed the model's context: Opus-4.7 verified to ignore 4
|
|
1540
1565
|
// sequential "STOP" messages and keep calling Bash. Cleaner contract:
|
|
1541
|
-
// one nudge at the threshold,
|
|
1542
|
-
//
|
|
1543
|
-
|
|
1566
|
+
// one nudge at the threshold, and the loop detector below catches
|
|
1567
|
+
// genuine stuck loops via input-signature repetition (3.15.30
|
|
1568
|
+
// replaced 3.15.28's count-based hard stop — that broke legitimate
|
|
1569
|
+
// exploratory data work where 15 distinct gsutil/bq calls were
|
|
1570
|
+
// each producing new insights).
|
|
1544
1571
|
for (const [name, count] of turnToolCounts) {
|
|
1545
|
-
if (count >= SAME_TOOL_HARD_STOP) {
|
|
1546
|
-
sameToolHardStopHit = name;
|
|
1547
|
-
continue;
|
|
1548
|
-
}
|
|
1549
1572
|
if (count === SAME_TOOL_WARN_THRESHOLD && !sameToolWarned.has(name)) {
|
|
1550
1573
|
sameToolWarned.add(name);
|
|
1551
1574
|
outcomeContent.push({
|
|
@@ -1556,6 +1579,17 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1556
1579
|
});
|
|
1557
1580
|
}
|
|
1558
1581
|
}
|
|
1582
|
+
// True loop detector: same (tool, input) signature repeated.
|
|
1583
|
+
// Catches the actual failure mode (model retrying the exact same
|
|
1584
|
+
// call hoping for a different result) without misfiring on
|
|
1585
|
+
// legitimate exploration where each call has different input.
|
|
1586
|
+
let stuckSignature = null;
|
|
1587
|
+
for (const [sig, count] of turnSignatureCounts) {
|
|
1588
|
+
if (count >= SAME_SIGNATURE_HARD_STOP) {
|
|
1589
|
+
stuckSignature = { sig, count };
|
|
1590
|
+
break;
|
|
1591
|
+
}
|
|
1592
|
+
}
|
|
1559
1593
|
// Hard cap: nudge the model to stop. Inject once per turn —
|
|
1560
1594
|
// re-injecting on every iteration past the cap is just noise
|
|
1561
1595
|
// and clutters the model's context with repeated stop signals.
|
|
@@ -1617,19 +1651,20 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1617
1651
|
onEvent({ kind: 'turn_done', reason: 'cap_exceeded' });
|
|
1618
1652
|
break;
|
|
1619
1653
|
}
|
|
1620
|
-
//
|
|
1621
|
-
//
|
|
1622
|
-
//
|
|
1623
|
-
//
|
|
1624
|
-
//
|
|
1625
|
-
//
|
|
1626
|
-
//
|
|
1627
|
-
|
|
1628
|
-
|
|
1629
|
-
|
|
1654
|
+
// Signature-based hard stop (3.15.30). The original 3.15.28 fired
|
|
1655
|
+
// on count alone (Bash 6× → break), which incorrectly killed
|
|
1656
|
+
// legitimate data-engineering work — the same Opus-4.7 session
|
|
1657
|
+
// verified at 2026-05-04 13:36 was making 15 distinct gsutil/bq
|
|
1658
|
+
// calls, each producing new insights. Now we only break when the
|
|
1659
|
+
// SAME (tool, input) signature has been called 3× — the actual
|
|
1660
|
+
// failure mode of "model retrying the exact same call hoping
|
|
1661
|
+
// something changes". Different inputs = exploration, allowed.
|
|
1662
|
+
if (stuckSignature) {
|
|
1663
|
+
const toolName = stuckSignature.sig.split('::')[0];
|
|
1664
|
+
logger.error(`[franklin] Signature-loop hard stop: \`${toolName}\` called with identical input ${stuckSignature.count} times this turn — ending turn`);
|
|
1630
1665
|
onEvent({
|
|
1631
1666
|
kind: 'text_delta',
|
|
1632
|
-
text: `\n\n⚠️ ${
|
|
1667
|
+
text: `\n\n⚠️ ${toolName} called ${stuckSignature.count}× with the same input this turn — that's a real loop, not exploration. Ending turn. Rephrase what you actually need, or try \`/model\` to switch.\n`,
|
|
1633
1668
|
});
|
|
1634
1669
|
onEvent({ kind: 'turn_done', reason: 'cap_exceeded' });
|
|
1635
1670
|
break;
|
package/package.json
CHANGED