@blockrun/franklin 3.15.28 → 3.15.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/agent/loop.js +61 -26
  2. package/package.json +1 -1
@@ -615,19 +615,23 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
615
615
  const HARD_TOOL_CAP = MAX_TOOL_CALLS_PER_TURN * 2;
616
616
  let toolCapWarned = false; // Log + inject only once per turn
617
617
  const SAME_TOOL_WARN_THRESHOLD = 3; // Warn after N calls to same tool (lowered from 5 — search loops were wasting turns)
618
- // Hard stop at the warn threshold. The previous loop injected
619
- // "[SYSTEM] STOP" on every call past 3 (verified 2026-05-04 in a real
620
- // Opus-4.7 session: Opus saw 4 STOP messages, made 4 more Bash calls
621
- // anyway). Strong models read the system tool_result, briefly
622
- // acknowledge, then call the same tool again the soft injection
623
- // doesn't actually constrain behavior. Hard stop matches what
624
- // HARD_TOOL_CAP already does for total tool count.
625
- const SAME_TOOL_HARD_STOP = SAME_TOOL_WARN_THRESHOLD * 2;
618
+ // Repetition-based hard stop. 3.15.28 used a count-based threshold
619
+ // (Bash called break) which incorrectly killed legitimate
620
+ // exploratory data work verified 2026-05-04 in a real Opus session
621
+ // running data-engineering on GCS logs: 15 distinct gsutil/bq calls,
622
+ // each producing new insights, would have been cut off at call 6.
623
+ // 3.15.30 detects ACTUAL loops by tracking the (tool, input)
624
+ // signature: only break when the model calls the SAME signature 3
625
+ // times in one turn. Different inputs → exploration, allowed.
626
+ const SAME_SIGNATURE_HARD_STOP = 3;
626
627
  // Tracks which tool names have already had a warn injected this turn.
627
628
  // Without it, every call past threshold pushes another [SYSTEM] STOP
628
629
  // tool_result into the model's context — same shape bug as the cap
629
630
  // spam fixed in 3.15.24, just in a sibling guardrail.
630
631
  const sameToolWarned = new Set();
632
+ // Tracks how many times each (tool, input)-signature has been called
633
+ // this turn. Different inputs → different signatures → exploration.
634
+ const turnSignatureCounts = new Map();
631
635
  // ── No-progress guardrail: kill infinite tiny-response loops ──
632
636
  let consecutiveTinyResponses = 0; // Count of consecutive calls with <10 output tokens
633
637
  const MAX_TINY_RESPONSES = 2; // Break after N tiny responses — if 2 calls return near-empty, something is wrong
@@ -1225,6 +1229,22 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1225
1229
  consecutiveTinyResponses = 0;
1226
1230
  }
1227
1231
  recordSessionUsage(resolvedModel, inputTokens, usage.outputTokens, costEstimate, routingTier);
1232
+ // Capture tool names invoked in this assistant turn. The AuditEntry
1233
+ // interface has had a `toolCalls?: string[]` slot since 3.15.11, but
1234
+ // nothing populated it — verified 2026-05-04 in a real Opus session
1235
+ // where 14 audit rows showed `tools=[]` despite Bash being called
1236
+ // every turn (the session jsonl had the tool_use blocks; the audit
1237
+ // just lost them). Now we pull names off responseParts so post-hoc
1238
+ // analytics can answer "what tools fired most often last week" from
1239
+ // ~/.blockrun/franklin-audit.jsonl alone.
1240
+ const turnToolNames = [];
1241
+ for (const p of responseParts) {
1242
+ if (p.type === 'tool_use') {
1243
+ const name = p.name;
1244
+ if (typeof name === 'string')
1245
+ turnToolNames.push(name);
1246
+ }
1247
+ }
1228
1248
  appendAudit({
1229
1249
  ts: Date.now(),
1230
1250
  sessionId,
@@ -1240,6 +1260,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1240
1260
  source: 'agent',
1241
1261
  workDir,
1242
1262
  prompt: extractLastUserPrompt(history),
1263
+ toolCalls: turnToolNames.length > 0 ? turnToolNames : undefined,
1243
1264
  routingTier,
1244
1265
  });
1245
1266
  // Accumulate session-level totals for session meta
@@ -1478,6 +1499,10 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1478
1499
  for (const [inv] of results) {
1479
1500
  const name = inv.name;
1480
1501
  turnToolCounts.set(name, (turnToolCounts.get(name) || 0) + 1);
1502
+ // Track (tool, input)-signature for the loop detector below.
1503
+ // Identical signatures → real loop. Different inputs → exploration.
1504
+ const sig = toolCallSignature(name, inv.input);
1505
+ turnSignatureCounts.set(sig, (turnSignatureCounts.get(sig) || 0) + 1);
1481
1506
  // Session-scope aggregate (drives telemetry opt-in export).
1482
1507
  sessionToolCounts.set(name, (sessionToolCounts.get(name) || 0) + 1);
1483
1508
  // Read file dedup: track paths already read
@@ -1538,14 +1563,12 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1538
1563
  // Re-injecting on every subsequent call (the pre-3.15.28 behavior)
1539
1564
  // just spammed the model's context: Opus-4.7 verified to ignore 4
1540
1565
  // sequential "STOP" messages and keep calling Bash. Cleaner contract:
1541
- // one nudge at the threshold, then if the model ignores it past
1542
- // SAME_TOOL_HARD_STOP, break the turn.
1543
- let sameToolHardStopHit = null;
1566
+ // one nudge at the threshold, and the loop detector below catches
1567
+ // genuine stuck loops via input-signature repetition (3.15.30
1568
+ // replaced 3.15.28's count-based hard stop — that broke legitimate
1569
+ // exploratory data work where 15 distinct gsutil/bq calls were
1570
+ // each producing new insights).
1544
1571
  for (const [name, count] of turnToolCounts) {
1545
- if (count >= SAME_TOOL_HARD_STOP) {
1546
- sameToolHardStopHit = name;
1547
- continue;
1548
- }
1549
1572
  if (count === SAME_TOOL_WARN_THRESHOLD && !sameToolWarned.has(name)) {
1550
1573
  sameToolWarned.add(name);
1551
1574
  outcomeContent.push({
@@ -1556,6 +1579,17 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1556
1579
  });
1557
1580
  }
1558
1581
  }
1582
+ // True loop detector: same (tool, input) signature repeated.
1583
+ // Catches the actual failure mode (model retrying the exact same
1584
+ // call hoping for a different result) without misfiring on
1585
+ // legitimate exploration where each call has different input.
1586
+ let stuckSignature = null;
1587
+ for (const [sig, count] of turnSignatureCounts) {
1588
+ if (count >= SAME_SIGNATURE_HARD_STOP) {
1589
+ stuckSignature = { sig, count };
1590
+ break;
1591
+ }
1592
+ }
1559
1593
  // Hard cap: nudge the model to stop. Inject once per turn —
1560
1594
  // re-injecting on every iteration past the cap is just noise
1561
1595
  // and clutters the model's context with repeated stop signals.
@@ -1617,19 +1651,20 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1617
1651
  onEvent({ kind: 'turn_done', reason: 'cap_exceeded' });
1618
1652
  break;
1619
1653
  }
1620
- // Same-tool hard stop. Strong models (Opus, GPT-5.5) sometimes
1621
- // read the warn injection, briefly acknowledge it, and call the
1622
- // same tool again — the soft signal is ineffective. Break the
1623
- // turn here when one tool name crosses the hard threshold to
1624
- // stop the search loop. Verified 2026-05-04: Opus-4.7 made 4
1625
- // Bash calls past 3 nags before this break would have triggered
1626
- // (at 6).
1627
- if (sameToolHardStopHit) {
1628
- const count = turnToolCounts.get(sameToolHardStopHit) ?? 0;
1629
- logger.error(`[franklin] Same-tool hard stop: ${sameToolHardStopHit} called ${count} times this turn — model ignoring soft warn, ending turn`);
1654
+ // Signature-based hard stop (3.15.30). The original 3.15.28 fired
1655
+ // on count alone (Bash break), which incorrectly killed
1656
+ // legitimate data-engineering work — the same Opus-4.7 session
1657
+ // verified at 2026-05-04 13:36 was making 15 distinct gsutil/bq
1658
+ // calls, each producing new insights. Now we only break when the
1659
+ // SAME (tool, input) signature has been called the actual
1660
+ // failure mode of "model retrying the exact same call hoping
1661
+ // something changes". Different inputs = exploration, allowed.
1662
+ if (stuckSignature) {
1663
+ const toolName = stuckSignature.sig.split('::')[0];
1664
+ logger.error(`[franklin] Signature-loop hard stop: \`${toolName}\` called with identical input ${stuckSignature.count} times this turn — ending turn`);
1630
1665
  onEvent({
1631
1666
  kind: 'text_delta',
1632
- text: `\n\n⚠️ ${sameToolHardStopHit} called ${count}× in one turn — that's a search loop. Ending turn so you don't burn through credits. Rephrase what you actually need, or try a different model with \`/model\`.\n`,
1667
+ text: `\n\n⚠️ ${toolName} called ${stuckSignature.count}× with the same input this turn — that's a real loop, not exploration. Ending turn. Rephrase what you actually need, or try \`/model\` to switch.\n`,
1633
1668
  });
1634
1669
  onEvent({ kind: 'turn_done', reason: 'cap_exceeded' });
1635
1670
  break;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.15.28",
3
+ "version": "3.15.30",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {