@blockrun/franklin 3.15.29 → 3.15.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -448,7 +448,18 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
448
448
  persistSessionMeta();
449
449
  };
450
450
  pruneOldSessions(sessionId); // Cleanup old sessions on start, protect current
451
- runDataHygiene(); // Trim ~/.blockrun/data + cost_log + remove legacy files
451
+ // Trim ~/.blockrun/data + cost_log + remove legacy files + sweep
452
+ // orphan tool-results dirs. Logs a summary if anything was actually
453
+ // touched — pre-3.15.31 hygiene was completely silent and the only
454
+ // way to verify it was running was poking disk yourself.
455
+ const hygieneReport = runDataHygiene();
456
+ const totalCleaned = hygieneReport.legacyFilesRemoved +
457
+ hygieneReport.dataFilesTrimmed +
458
+ hygieneReport.costLogRowsTrimmed +
459
+ hygieneReport.orphanToolResultsRemoved;
460
+ if (totalCleaned > 0) {
461
+ logger.info(`[franklin] Data hygiene: ${hygieneReport.legacyFilesRemoved} legacy, ${hygieneReport.dataFilesTrimmed} data files, ${hygieneReport.costLogRowsTrimmed} cost_log rows, ${hygieneReport.orphanToolResultsRemoved} orphan tool-results dirs cleaned`);
462
+ }
452
463
  persistSessionMeta();
453
464
  // Flush session meta on SIGINT/SIGTERM so mid-stream Ctrl+C doesn't
454
465
  // leave a stale .meta.json (wrong turnCount/messageCount/cost).
@@ -615,19 +626,23 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
615
626
  const HARD_TOOL_CAP = MAX_TOOL_CALLS_PER_TURN * 2;
616
627
  let toolCapWarned = false; // Log + inject only once per turn
617
628
  const SAME_TOOL_WARN_THRESHOLD = 3; // Warn after N calls to same tool (lowered from 5 — search loops were wasting turns)
618
- // Hard stop at the warn threshold. The previous loop injected
619
- // "[SYSTEM] STOP" on every call past 3 (verified 2026-05-04 in a real
620
- // Opus-4.7 session: Opus saw 4 STOP messages, made 4 more Bash calls
621
- // anyway). Strong models read the system tool_result, briefly
622
- // acknowledge, then call the same tool again the soft injection
623
- // doesn't actually constrain behavior. Hard stop matches what
624
- // HARD_TOOL_CAP already does for total tool count.
625
- const SAME_TOOL_HARD_STOP = SAME_TOOL_WARN_THRESHOLD * 2;
629
+ // Repetition-based hard stop. 3.15.28 used a count-based threshold
630
+ // (Bash called break) which incorrectly killed legitimate
631
+ // exploratory data work verified 2026-05-04 in a real Opus session
632
+ // running data-engineering on GCS logs: 15 distinct gsutil/bq calls,
633
+ // each producing new insights, would have been cut off at call 6.
634
+ // 3.15.30 detects ACTUAL loops by tracking the (tool, input)
635
+ // signature: only break when the model calls the SAME signature 3
636
+ // times in one turn. Different inputs → exploration, allowed.
637
+ const SAME_SIGNATURE_HARD_STOP = 3;
626
638
  // Tracks which tool names have already had a warn injected this turn.
627
639
  // Without it, every call past threshold pushes another [SYSTEM] STOP
628
640
  // tool_result into the model's context — same shape bug as the cap
629
641
  // spam fixed in 3.15.24, just in a sibling guardrail.
630
642
  const sameToolWarned = new Set();
643
+ // Tracks how many times each (tool, input)-signature has been called
644
+ // this turn. Different inputs → different signatures → exploration.
645
+ const turnSignatureCounts = new Map();
631
646
  // ── No-progress guardrail: kill infinite tiny-response loops ──
632
647
  let consecutiveTinyResponses = 0; // Count of consecutive calls with <10 output tokens
633
648
  const MAX_TINY_RESPONSES = 2; // Break after N tiny responses — if 2 calls return near-empty, something is wrong
@@ -1495,6 +1510,10 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1495
1510
  for (const [inv] of results) {
1496
1511
  const name = inv.name;
1497
1512
  turnToolCounts.set(name, (turnToolCounts.get(name) || 0) + 1);
1513
+ // Track (tool, input)-signature for the loop detector below.
1514
+ // Identical signatures → real loop. Different inputs → exploration.
1515
+ const sig = toolCallSignature(name, inv.input);
1516
+ turnSignatureCounts.set(sig, (turnSignatureCounts.get(sig) || 0) + 1);
1498
1517
  // Session-scope aggregate (drives telemetry opt-in export).
1499
1518
  sessionToolCounts.set(name, (sessionToolCounts.get(name) || 0) + 1);
1500
1519
  // Read file dedup: track paths already read
@@ -1555,14 +1574,12 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1555
1574
  // Re-injecting on every subsequent call (the pre-3.15.28 behavior)
1556
1575
  // just spammed the model's context: Opus-4.7 verified to ignore 4
1557
1576
  // sequential "STOP" messages and keep calling Bash. Cleaner contract:
1558
- // one nudge at the threshold, then if the model ignores it past
1559
- // SAME_TOOL_HARD_STOP, break the turn.
1560
- let sameToolHardStopHit = null;
1577
+ // one nudge at the threshold, and the loop detector below catches
1578
+ // genuine stuck loops via input-signature repetition (3.15.30
1579
+ // replaced 3.15.28's count-based hard stop — that broke legitimate
1580
+ // exploratory data work where 15 distinct gsutil/bq calls were
1581
+ // each producing new insights).
1561
1582
  for (const [name, count] of turnToolCounts) {
1562
- if (count >= SAME_TOOL_HARD_STOP) {
1563
- sameToolHardStopHit = name;
1564
- continue;
1565
- }
1566
1583
  if (count === SAME_TOOL_WARN_THRESHOLD && !sameToolWarned.has(name)) {
1567
1584
  sameToolWarned.add(name);
1568
1585
  outcomeContent.push({
@@ -1573,6 +1590,17 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1573
1590
  });
1574
1591
  }
1575
1592
  }
1593
+ // True loop detector: same (tool, input) signature repeated.
1594
+ // Catches the actual failure mode (model retrying the exact same
1595
+ // call hoping for a different result) without misfiring on
1596
+ // legitimate exploration where each call has different input.
1597
+ let stuckSignature = null;
1598
+ for (const [sig, count] of turnSignatureCounts) {
1599
+ if (count >= SAME_SIGNATURE_HARD_STOP) {
1600
+ stuckSignature = { sig, count };
1601
+ break;
1602
+ }
1603
+ }
1576
1604
  // Hard cap: nudge the model to stop. Inject once per turn —
1577
1605
  // re-injecting on every iteration past the cap is just noise
1578
1606
  // and clutters the model's context with repeated stop signals.
@@ -1634,19 +1662,20 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1634
1662
  onEvent({ kind: 'turn_done', reason: 'cap_exceeded' });
1635
1663
  break;
1636
1664
  }
1637
- // Same-tool hard stop. Strong models (Opus, GPT-5.5) sometimes
1638
- // read the warn injection, briefly acknowledge it, and call the
1639
- // same tool again — the soft signal is ineffective. Break the
1640
- // turn here when one tool name crosses the hard threshold to
1641
- // stop the search loop. Verified 2026-05-04: Opus-4.7 made 4
1642
- // Bash calls past 3 nags before this break would have triggered
1643
- // (at 6).
1644
- if (sameToolHardStopHit) {
1645
- const count = turnToolCounts.get(sameToolHardStopHit) ?? 0;
1646
- logger.error(`[franklin] Same-tool hard stop: ${sameToolHardStopHit} called ${count} times this turn — model ignoring soft warn, ending turn`);
1665
+ // Signature-based hard stop (3.15.30). The original 3.15.28 fired
1666
+ // on count alone (Bash break), which incorrectly killed
1667
+ // legitimate data-engineering work — the same Opus-4.7 session
1668
+ // verified at 2026-05-04 13:36 was making 15 distinct gsutil/bq
1669
+ // calls, each producing new insights. Now we only break when the
1670
+ // SAME (tool, input) signature has been called the actual
1671
+ // failure mode of "model retrying the exact same call hoping
1672
+ // something changes". Different inputs = exploration, allowed.
1673
+ if (stuckSignature) {
1674
+ const toolName = stuckSignature.sig.split('::')[0];
1675
+ logger.error(`[franklin] Signature-loop hard stop: \`${toolName}\` called with identical input ${stuckSignature.count} times this turn — ending turn`);
1647
1676
  onEvent({
1648
1677
  kind: 'text_delta',
1649
- text: `\n\n⚠️ ${sameToolHardStopHit} called ${count}× in one turn — that's a search loop. Ending turn so you don't burn through credits. Rephrase what you actually need, or try a different model with \`/model\`.\n`,
1678
+ text: `\n\n⚠️ ${toolName} called ${stuckSignature.count}× with the same input this turn — that's a real loop, not exploration. Ending turn. Rephrase what you actually need, or try \`/model\` to switch.\n`,
1650
1679
  });
1651
1680
  onEvent({ kind: 'turn_done', reason: 'cap_exceeded' });
1652
1681
  break;
@@ -21,8 +21,23 @@
21
21
  * unlinkSync). Best-effort: every operation is wrapped so a single failure
22
22
  * never breaks agent boot.
23
23
  */
24
+ /**
25
+ * Summary of what hygiene removed/trimmed in one pass. Returned so the
26
+ * caller (agent loop) can log it — silent hygiene is hard to verify
27
+ * without poking at disk yourself, which is exactly the kind of thing
28
+ * users shouldn't have to do.
29
+ */
30
+ export interface HygieneReport {
31
+ legacyFilesRemoved: number;
32
+ dataFilesTrimmed: number;
33
+ costLogRowsTrimmed: number;
34
+ orphanToolResultsRemoved: number;
35
+ }
24
36
  /**
25
37
  * Top-level entry. Call once at agent session start. Catches its own
26
- * errors so a bad disk never blocks startup.
38
+ * errors so a bad disk never blocks startup. Returns counts so callers
39
+ * can log a one-line summary — verified 2026-05-04 from a real session
40
+ * where hygiene was running silently for hours and there was no way to
41
+ * tell from the log whether anything was being cleaned.
27
42
  */
28
- export declare function runDataHygiene(): void;
43
+ export declare function runDataHygiene(): HygieneReport;
@@ -44,35 +44,46 @@ const LEGACY_FILENAMES = [
44
44
  '0xcode-stats.json',
45
45
  'runcode-debug.log',
46
46
  ];
47
+ const ZERO_REPORT = {
48
+ legacyFilesRemoved: 0,
49
+ dataFilesTrimmed: 0,
50
+ costLogRowsTrimmed: 0,
51
+ orphanToolResultsRemoved: 0,
52
+ };
47
53
  /**
48
54
  * Top-level entry. Call once at agent session start. Catches its own
49
- * errors so a bad disk never blocks startup.
55
+ * errors so a bad disk never blocks startup. Returns counts so callers
56
+ * can log a one-line summary — verified 2026-05-04 from a real session
57
+ * where hygiene was running silently for hours and there was no way to
58
+ * tell from the log whether anything was being cleaned.
50
59
  */
51
60
  export function runDataHygiene() {
61
+ const report = { ...ZERO_REPORT };
52
62
  try {
53
- trimDataDir();
63
+ report.dataFilesTrimmed = trimDataDir();
54
64
  }
55
65
  catch { /* best effort */ }
56
66
  try {
57
- trimCostLog();
67
+ report.costLogRowsTrimmed = trimCostLog();
58
68
  }
59
69
  catch { /* best effort */ }
60
70
  try {
61
- removeLegacyFiles();
71
+ report.legacyFilesRemoved = removeLegacyFiles();
62
72
  }
63
73
  catch { /* best effort */ }
64
74
  try {
65
- sweepOrphanToolResults();
75
+ report.orphanToolResultsRemoved = sweepOrphanToolResults();
66
76
  }
67
77
  catch { /* best effort */ }
78
+ return report;
68
79
  }
69
80
  function trimDataDir() {
70
81
  const dir = path.join(BLOCKRUN_DIR, 'data');
71
82
  if (!fs.existsSync(dir))
72
- return;
83
+ return 0;
73
84
  const entries = fs.readdirSync(dir);
74
85
  if (entries.length === 0)
75
- return;
86
+ return 0;
76
87
  const cutoff = Date.now() - DATA_DIR_MAX_AGE_MS;
77
88
  const stats = [];
78
89
  for (const name of entries) {
@@ -86,11 +97,13 @@ function trimDataDir() {
86
97
  // Best effort — skip unreadable entries.
87
98
  }
88
99
  }
100
+ let removed = 0;
89
101
  // Pass 1: age-based delete.
90
102
  for (const e of stats) {
91
103
  if (e.mtime < cutoff) {
92
104
  try {
93
105
  fs.unlinkSync(path.join(dir, e.name));
106
+ removed++;
94
107
  }
95
108
  catch { /* ok */ }
96
109
  }
@@ -106,35 +119,42 @@ function trimDataDir() {
106
119
  for (let i = 0; i < excess; i++) {
107
120
  try {
108
121
  fs.unlinkSync(path.join(dir, survivors[i].name));
122
+ removed++;
109
123
  }
110
124
  catch { /* ok */ }
111
125
  }
112
126
  }
127
+ return removed;
113
128
  }
114
129
  function trimCostLog() {
115
130
  const file = path.join(BLOCKRUN_DIR, 'cost_log.jsonl');
116
131
  if (!fs.existsSync(file))
117
- return;
132
+ return 0;
118
133
  // Cheap probe — skip the full read+rewrite when the file is small.
119
134
  const stat = fs.statSync(file);
120
135
  if (stat.size < COST_LOG_PROBE_BYTES)
121
- return;
136
+ return 0;
122
137
  const lines = fs.readFileSync(file, 'utf-8').split('\n').filter(Boolean);
123
138
  if (lines.length <= COST_LOG_MAX_ENTRIES)
124
- return;
139
+ return 0;
140
+ const dropped = lines.length - COST_LOG_MAX_ENTRIES;
125
141
  const kept = lines.slice(lines.length - COST_LOG_MAX_ENTRIES);
126
142
  fs.writeFileSync(file, kept.join('\n') + '\n');
143
+ return dropped;
127
144
  }
128
145
  function removeLegacyFiles() {
146
+ let removed = 0;
129
147
  for (const name of LEGACY_FILENAMES) {
130
148
  const p = path.join(BLOCKRUN_DIR, name);
131
149
  if (!fs.existsSync(p))
132
150
  continue;
133
151
  try {
134
152
  fs.unlinkSync(p);
153
+ removed++;
135
154
  }
136
155
  catch { /* ok */ }
137
156
  }
157
+ return removed;
138
158
  }
139
159
  /**
140
160
  * `streaming-executor` writes large tool outputs to
@@ -151,7 +171,7 @@ function sweepOrphanToolResults() {
151
171
  const toolResultsDir = path.join(BLOCKRUN_DIR, 'tool-results');
152
172
  const sessionsDir = path.join(BLOCKRUN_DIR, 'sessions');
153
173
  if (!fs.existsSync(toolResultsDir))
154
- return;
174
+ return 0;
155
175
  const knownSessionIds = new Set();
156
176
  if (fs.existsSync(sessionsDir)) {
157
177
  try {
@@ -165,7 +185,7 @@ function sweepOrphanToolResults() {
165
185
  // Best-effort — if we can't read sessions/, skip the sweep so
166
186
  // we never delete tool-results that might still belong to a
167
187
  // live session.
168
- return;
188
+ return 0;
169
189
  }
170
190
  }
171
191
  let entries;
@@ -173,8 +193,9 @@ function sweepOrphanToolResults() {
173
193
  entries = fs.readdirSync(toolResultsDir);
174
194
  }
175
195
  catch {
176
- return;
196
+ return 0;
177
197
  }
198
+ let removed = 0;
178
199
  for (const name of entries) {
179
200
  if (knownSessionIds.has(name))
180
201
  continue;
@@ -184,9 +205,11 @@ function sweepOrphanToolResults() {
184
205
  if (!stat.isDirectory())
185
206
  continue;
186
207
  fs.rmSync(dir, { recursive: true, force: true });
208
+ removed++;
187
209
  }
188
210
  catch {
189
211
  // Skip — best-effort cleanup.
190
212
  }
191
213
  }
214
+ return removed;
192
215
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.15.29",
3
+ "version": "3.15.31",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {