@blockrun/franklin 3.15.29 → 3.15.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/agent/loop.js +44 -26
  2. package/package.json +1 -1
@@ -615,19 +615,23 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
615
615
  const HARD_TOOL_CAP = MAX_TOOL_CALLS_PER_TURN * 2;
616
616
  let toolCapWarned = false; // Log + inject only once per turn
617
617
  const SAME_TOOL_WARN_THRESHOLD = 3; // Warn after N calls to same tool (lowered from 5 — search loops were wasting turns)
618
- // Hard stop at the warn threshold. The previous loop injected
619
- // "[SYSTEM] STOP" on every call past 3 (verified 2026-05-04 in a real
620
- // Opus-4.7 session: Opus saw 4 STOP messages, made 4 more Bash calls
621
- // anyway). Strong models read the system tool_result, briefly
622
- // acknowledge, then call the same tool again the soft injection
623
- // doesn't actually constrain behavior. Hard stop matches what
624
- // HARD_TOOL_CAP already does for total tool count.
625
- const SAME_TOOL_HARD_STOP = SAME_TOOL_WARN_THRESHOLD * 2;
618
+ // Repetition-based hard stop. 3.15.28 used a count-based threshold
619
+ // (Bash called break) which incorrectly killed legitimate
620
+ // exploratory data work verified 2026-05-04 in a real Opus session
621
+ // running data-engineering on GCS logs: 15 distinct gsutil/bq calls,
622
+ // each producing new insights, would have been cut off at call 6.
623
+ // 3.15.30 detects ACTUAL loops by tracking the (tool, input)
624
+ // signature: only break when the model calls the SAME signature 3
625
+ // times in one turn. Different inputs → exploration, allowed.
626
+ const SAME_SIGNATURE_HARD_STOP = 3;
626
627
  // Tracks which tool names have already had a warn injected this turn.
627
628
  // Without it, every call past threshold pushes another [SYSTEM] STOP
628
629
  // tool_result into the model's context — same shape bug as the cap
629
630
  // spam fixed in 3.15.24, just in a sibling guardrail.
630
631
  const sameToolWarned = new Set();
632
+ // Tracks how many times each (tool, input)-signature has been called
633
+ // this turn. Different inputs → different signatures → exploration.
634
+ const turnSignatureCounts = new Map();
631
635
  // ── No-progress guardrail: kill infinite tiny-response loops ──
632
636
  let consecutiveTinyResponses = 0; // Count of consecutive calls with <10 output tokens
633
637
  const MAX_TINY_RESPONSES = 2; // Break after N tiny responses — if 2 calls return near-empty, something is wrong
@@ -1495,6 +1499,10 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1495
1499
  for (const [inv] of results) {
1496
1500
  const name = inv.name;
1497
1501
  turnToolCounts.set(name, (turnToolCounts.get(name) || 0) + 1);
1502
+ // Track (tool, input)-signature for the loop detector below.
1503
+ // Identical signatures → real loop. Different inputs → exploration.
1504
+ const sig = toolCallSignature(name, inv.input);
1505
+ turnSignatureCounts.set(sig, (turnSignatureCounts.get(sig) || 0) + 1);
1498
1506
  // Session-scope aggregate (drives telemetry opt-in export).
1499
1507
  sessionToolCounts.set(name, (sessionToolCounts.get(name) || 0) + 1);
1500
1508
  // Read file dedup: track paths already read
@@ -1555,14 +1563,12 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1555
1563
  // Re-injecting on every subsequent call (the pre-3.15.28 behavior)
1556
1564
  // just spammed the model's context: Opus-4.7 verified to ignore 4
1557
1565
  // sequential "STOP" messages and keep calling Bash. Cleaner contract:
1558
- // one nudge at the threshold, then if the model ignores it past
1559
- // SAME_TOOL_HARD_STOP, break the turn.
1560
- let sameToolHardStopHit = null;
1566
+ // one nudge at the threshold, and the loop detector below catches
1567
+ // genuine stuck loops via input-signature repetition (3.15.30
1568
+ // replaced 3.15.28's count-based hard stop — that broke legitimate
1569
+ // exploratory data work where 15 distinct gsutil/bq calls were
1570
+ // each producing new insights).
1561
1571
  for (const [name, count] of turnToolCounts) {
1562
- if (count >= SAME_TOOL_HARD_STOP) {
1563
- sameToolHardStopHit = name;
1564
- continue;
1565
- }
1566
1572
  if (count === SAME_TOOL_WARN_THRESHOLD && !sameToolWarned.has(name)) {
1567
1573
  sameToolWarned.add(name);
1568
1574
  outcomeContent.push({
@@ -1573,6 +1579,17 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1573
1579
  });
1574
1580
  }
1575
1581
  }
1582
+ // True loop detector: same (tool, input) signature repeated.
1583
+ // Catches the actual failure mode (model retrying the exact same
1584
+ // call hoping for a different result) without misfiring on
1585
+ // legitimate exploration where each call has different input.
1586
+ let stuckSignature = null;
1587
+ for (const [sig, count] of turnSignatureCounts) {
1588
+ if (count >= SAME_SIGNATURE_HARD_STOP) {
1589
+ stuckSignature = { sig, count };
1590
+ break;
1591
+ }
1592
+ }
1576
1593
  // Hard cap: nudge the model to stop. Inject once per turn —
1577
1594
  // re-injecting on every iteration past the cap is just noise
1578
1595
  // and clutters the model's context with repeated stop signals.
@@ -1634,19 +1651,20 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1634
1651
  onEvent({ kind: 'turn_done', reason: 'cap_exceeded' });
1635
1652
  break;
1636
1653
  }
1637
- // Same-tool hard stop. Strong models (Opus, GPT-5.5) sometimes
1638
- // read the warn injection, briefly acknowledge it, and call the
1639
- // same tool again — the soft signal is ineffective. Break the
1640
- // turn here when one tool name crosses the hard threshold to
1641
- // stop the search loop. Verified 2026-05-04: Opus-4.7 made 4
1642
- // Bash calls past 3 nags before this break would have triggered
1643
- // (at 6).
1644
- if (sameToolHardStopHit) {
1645
- const count = turnToolCounts.get(sameToolHardStopHit) ?? 0;
1646
- logger.error(`[franklin] Same-tool hard stop: ${sameToolHardStopHit} called ${count} times this turn — model ignoring soft warn, ending turn`);
1654
+ // Signature-based hard stop (3.15.30). The original 3.15.28 fired
1655
+ // on count alone (Bash break), which incorrectly killed
1656
+ // legitimate data-engineering work — the same Opus-4.7 session
1657
+ // verified at 2026-05-04 13:36 was making 15 distinct gsutil/bq
1658
+ // calls, each producing new insights. Now we only break when the
1659
+ // SAME (tool, input) signature has been called the actual
1660
+ // failure mode of "model retrying the exact same call hoping
1661
+ // something changes". Different inputs = exploration, allowed.
1662
+ if (stuckSignature) {
1663
+ const toolName = stuckSignature.sig.split('::')[0];
1664
+ logger.error(`[franklin] Signature-loop hard stop: \`${toolName}\` called with identical input ${stuckSignature.count} times this turn — ending turn`);
1647
1665
  onEvent({
1648
1666
  kind: 'text_delta',
1649
- text: `\n\n⚠️ ${sameToolHardStopHit} called ${count}× in one turn — that's a search loop. Ending turn so you don't burn through credits. Rephrase what you actually need, or try a different model with \`/model\`.\n`,
1667
+ text: `\n\n⚠️ ${toolName} called ${stuckSignature.count}× with the same input this turn — that's a real loop, not exploration. Ending turn. Rephrase what you actually need, or try \`/model\` to switch.\n`,
1650
1668
  });
1651
1669
  onEvent({ kind: 'turn_done', reason: 'cap_exceeded' });
1652
1670
  break;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.15.29",
3
+ "version": "3.15.30",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {