@blockrun/franklin 3.15.27 → 3.15.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -615,6 +615,19 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
615
615
  const HARD_TOOL_CAP = MAX_TOOL_CALLS_PER_TURN * 2;
616
616
  let toolCapWarned = false; // Log + inject only once per turn
617
617
  const SAME_TOOL_WARN_THRESHOLD = 3; // Warn after N calls to same tool (lowered from 5 — search loops were wasting turns)
618
+ // Hard stop at 2× the warn threshold. The previous loop injected
619
+ // "[SYSTEM] STOP" on every call past 3 (verified 2026-05-04 in a real
620
+ // Opus-4.7 session: Opus saw 4 STOP messages, made 4 more Bash calls
621
+ // anyway). Strong models read the system tool_result, briefly
622
+ // acknowledge, then call the same tool again — the soft injection
623
+ // doesn't actually constrain behavior. Hard stop matches what
624
+ // HARD_TOOL_CAP already does for total tool count.
625
+ const SAME_TOOL_HARD_STOP = SAME_TOOL_WARN_THRESHOLD * 2;
626
+ // Tracks which tool names have already had a warn injected this turn.
627
+ // Without it, every call past threshold pushes another [SYSTEM] STOP
628
+ // tool_result into the model's context — same shape bug as the cap
629
+ // spam fixed in 3.15.24, just in a sibling guardrail.
630
+ const sameToolWarned = new Set();
618
631
  // ── No-progress guardrail: kill infinite tiny-response loops ──
619
632
  let consecutiveTinyResponses = 0; // Count of consecutive calls with <10 output tokens
620
633
  const MAX_TINY_RESPONSES = 2; // Break after N tiny responses — if 2 calls return near-empty, something is wrong
@@ -1521,16 +1534,24 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1521
1534
  };
1522
1535
  });
1523
1536
  // ── Guardrail injections ──
1524
- // Warn about same-tool repetition — escalate on every call past threshold
1537
+ // Warn about same-tool repetition — fire once per tool name per turn.
1538
+ // Re-injecting on every subsequent call (the pre-3.15.28 behavior)
1539
+ // just spammed the model's context: Opus-4.7 verified to ignore 4
1540
+ // sequential "STOP" messages and keep calling Bash. Cleaner contract:
1541
+ // one nudge at the threshold, then if the model ignores it past
1542
+ // SAME_TOOL_HARD_STOP, break the turn.
1543
+ let sameToolHardStopHit = null;
1525
1544
  for (const [name, count] of turnToolCounts) {
1526
- if (count >= SAME_TOOL_WARN_THRESHOLD) {
1527
- const escalation = count === SAME_TOOL_WARN_THRESHOLD
1528
- ? `[SYSTEM] You have called ${name} ${count} times this turn. Stop and present your results now. Do not make more ${name} calls.`
1529
- : `[SYSTEM] STOP. You have now called ${name} ${count} times — more searching is not producing new information. Answer the user with what you already have. If the answer truly requires a different approach, use a DIFFERENT tool or ask the user.`;
1545
+ if (count >= SAME_TOOL_HARD_STOP) {
1546
+ sameToolHardStopHit = name;
1547
+ continue;
1548
+ }
1549
+ if (count === SAME_TOOL_WARN_THRESHOLD && !sameToolWarned.has(name)) {
1550
+ sameToolWarned.add(name);
1530
1551
  outcomeContent.push({
1531
1552
  type: 'tool_result',
1532
- tool_use_id: `guardrail-warn-${name}-${count}`,
1533
- content: escalation,
1553
+ tool_use_id: `guardrail-warn-${name}`,
1554
+ content: `[SYSTEM] You have called ${name} ${count} times this turn. Stop and present your results now. Do not make more ${name} calls — if you need different data, switch tools or ask the user.`,
1534
1555
  is_error: true,
1535
1556
  });
1536
1557
  }
@@ -1596,6 +1617,23 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1596
1617
  onEvent({ kind: 'turn_done', reason: 'cap_exceeded' });
1597
1618
  break;
1598
1619
  }
1620
+ // Same-tool hard stop. Strong models (Opus, GPT-5.5) sometimes
1621
+ // read the warn injection, briefly acknowledge it, and call the
1622
+ // same tool again — the soft signal is ineffective. Break the
1623
+ // turn here when one tool name crosses the hard threshold to
1624
+ // stop the search loop. Verified 2026-05-04: Opus-4.7 made 4
1625
+ // Bash calls past 3 nags before this break would have triggered
1626
+ // (at 6).
1627
+ if (sameToolHardStopHit) {
1628
+ const count = turnToolCounts.get(sameToolHardStopHit) ?? 0;
1629
+ logger.error(`[franklin] Same-tool hard stop: ${sameToolHardStopHit} called ${count} times this turn — model ignoring soft warn, ending turn`);
1630
+ onEvent({
1631
+ kind: 'text_delta',
1632
+ text: `\n\n⚠️ ${sameToolHardStopHit} called ${count}× in one turn — that's a search loop. Ending turn so you don't burn through credits. Rephrase what you actually need, or try a different model with \`/model\`.\n`,
1633
+ });
1634
+ onEvent({ kind: 'turn_done', reason: 'cap_exceeded' });
1635
+ break;
1636
+ }
1599
1637
  }
1600
1638
  if (loopCount >= maxTurns) {
1601
1639
  lastSessionActivity = Date.now();
@@ -7,6 +7,8 @@ export interface ProxyOptions {
7
7
  modelOverride?: string;
8
8
  debug?: boolean;
9
9
  fallbackEnabled?: boolean;
10
+ requestTimeoutMs?: number;
11
+ streamTimeoutMs?: number;
10
12
  }
11
13
  export declare function createProxy(options: ProxyOptions): http.Server;
12
14
  type RequestCategory = 'simple' | 'code' | 'default';
@@ -233,6 +233,11 @@ export function createProxy(options) {
233
233
  const chain = options.chain || 'base';
234
234
  let currentModel = options.modelOverride || DEFAULT_MODEL;
235
235
  const fallbackEnabled = options.fallbackEnabled !== false; // Default true
236
+ // Resolve timeouts once at construction. The option wins over the env var
237
+ // so callers (esp. tests) can configure a single proxy without polluting
238
+ // process.env for the rest of the process — and for any sibling proxy.
239
+ const effectiveRequestTimeoutMs = options.requestTimeoutMs ?? getProxyRequestTimeoutMs();
240
+ const effectiveStreamTimeoutMs = options.streamTimeoutMs ?? getProxyStreamTimeoutMs();
236
241
  let baseWallet = null;
237
242
  let solanaWallet = null;
238
243
  if (chain === 'base') {
@@ -425,7 +430,7 @@ export function createProxy(options) {
425
430
  };
426
431
  let response;
427
432
  let finalModel = requestModel;
428
- const requestTimeoutMs = getProxyRequestTimeoutMs();
433
+ const requestTimeoutMs = effectiveRequestTimeoutMs;
429
434
  // Use fallback chain if enabled
430
435
  if (fallbackEnabled && body && requestPath.includes('messages')) {
431
436
  const fallbackConfig = {
@@ -526,7 +531,7 @@ export function createProxy(options) {
526
531
  const decoder = new TextDecoder();
527
532
  let fullResponse = '';
528
533
  const STREAM_CAP = 5_000_000; // 5MB cap on accumulated stream
529
- const STREAM_TIMEOUT_MS = getProxyStreamTimeoutMs();
534
+ const STREAM_TIMEOUT_MS = effectiveStreamTimeoutMs;
530
535
  const streamDeadline = Date.now() + STREAM_TIMEOUT_MS;
531
536
  const pump = async () => {
532
537
  while (true) {
@@ -9,6 +9,12 @@
9
9
  * EPERM means the pid exists but we don't have permission to signal it —
10
10
  * treat that as alive. ESRCH (or anything else) means dead.
11
11
  *
12
+ * Pid-less queued tasks: runner.ts writes its own pid on entry, so a task
13
+ * with status=queued and no pid means the runner subprocess crashed during
14
+ * module import (cliPath wrong, syntax error in dist) before it could record
15
+ * itself. We reap these once they're older than QUEUED_NO_PID_TIMEOUT_MS so
16
+ * `franklin task list` doesn't show them as eternally pending.
17
+ *
12
18
  * Best-effort: PID reuse can lie. v3.10's contract is "lazy reconciliation
13
19
  * on `task list`"; v3.11 may add a pidStartTime cross-check.
14
20
  */
@@ -9,10 +9,17 @@
9
9
  * EPERM means the pid exists but we don't have permission to signal it —
10
10
  * treat that as alive. ESRCH (or anything else) means dead.
11
11
  *
12
+ * Pid-less queued tasks: runner.ts writes its own pid on entry, so a task
13
+ * with status=queued and no pid means the runner subprocess crashed during
14
+ * module import (cliPath wrong, syntax error in dist) before it could record
15
+ * itself. We reap these once they're older than QUEUED_NO_PID_TIMEOUT_MS so
16
+ * `franklin task list` doesn't show them as eternally pending.
17
+ *
12
18
  * Best-effort: PID reuse can lie. v3.10's contract is "lazy reconciliation
13
19
  * on `task list`"; v3.11 may add a pidStartTime cross-check.
14
20
  */
15
21
  import { listTasks, applyEvent } from './store.js';
22
+ const QUEUED_NO_PID_TIMEOUT_MS = 5 * 60 * 1000; // 5 min
16
23
  function isPidAlive(pid) {
17
24
  try {
18
25
  process.kill(pid, 0);
@@ -28,16 +35,25 @@ export function reconcileLostTasks(now = Date.now()) {
28
35
  for (const t of listTasks()) {
29
36
  if (t.status !== 'running' && t.status !== 'queued')
30
37
  continue;
31
- if (typeof t.pid !== 'number')
32
- continue;
33
- if (isPidAlive(t.pid))
34
- continue;
38
+ let summary = null;
39
+ if (typeof t.pid !== 'number') {
40
+ // Only reap pid-less tasks that have been queued long enough that the
41
+ // runner can't plausibly still be importing. On slow networks or cold
42
+ // caches Franklin's startup can take 30+ seconds — 5 minutes leaves
43
+ // generous headroom for legitimate slow starts.
44
+ if (t.status !== 'queued')
45
+ continue;
46
+ if (now - t.createdAt < QUEUED_NO_PID_TIMEOUT_MS)
47
+ continue;
48
+ summary = 'Runner never registered a pid — likely crashed during module import.';
49
+ }
50
+ else {
51
+ if (isPidAlive(t.pid))
52
+ continue;
53
+ summary = 'Backing process not found — task may have been killed externally.';
54
+ }
35
55
  try {
36
- applyEvent(t.runId, {
37
- at: now,
38
- kind: 'lost',
39
- summary: 'Backing process not found — task may have been killed externally.',
40
- });
56
+ applyEvent(t.runId, { at: now, kind: 'lost', summary });
41
57
  n++;
42
58
  }
43
59
  catch (err) {
@@ -16,7 +16,8 @@
16
16
  *
17
17
  * CLI path resolution (in priority order):
18
18
  * 1. process.env.FRANKLIN_CLI_PATH — escape hatch for tests / dev.
19
- * 2. <cwd>/dist/index.js — the published bundle's entry point.
19
+ * 2. process.argv[1] — the script Node is currently executing, i.e. the
20
+ * running franklin bundle. Works regardless of the user's cwd.
20
21
  */
21
22
  export interface StartDetachedTaskInput {
22
23
  label: string;
@@ -16,11 +16,11 @@
16
16
  *
17
17
  * CLI path resolution (in priority order):
18
18
  * 1. process.env.FRANKLIN_CLI_PATH — escape hatch for tests / dev.
19
- * 2. <cwd>/dist/index.js — the published bundle's entry point.
19
+ * 2. process.argv[1] — the script Node is currently executing, i.e. the
20
+ * running franklin bundle. Works regardless of the user's cwd.
20
21
  */
21
22
  import { spawn } from 'node:child_process';
22
23
  import fs from 'node:fs';
23
- import path from 'node:path';
24
24
  import { randomUUID } from 'node:crypto';
25
25
  import { writeTaskMeta } from './store.js';
26
26
  import { taskLogPath, ensureTaskDir } from './paths.js';
@@ -28,7 +28,10 @@ function resolveCliPath() {
28
28
  const fromEnv = process.env.FRANKLIN_CLI_PATH;
29
29
  if (fromEnv && fromEnv.length > 0)
30
30
  return fromEnv;
31
- return path.resolve(process.cwd(), 'dist', 'index.js');
31
+ // Resolving from process.cwd() breaks whenever Franklin is launched outside
32
+ // the source tree (npm global install, brew, or just `cd /elsewhere &&
33
+ // franklin`). process.argv[1] is the actual entry script Node loaded.
34
+ return process.argv[1];
32
35
  }
33
36
  function generateRunId() {
34
37
  return `t_${Date.now().toString(36)}_${randomUUID().slice(0, 8)}`;
@@ -286,12 +286,31 @@ function executeCommand(command, timeoutMs, ctx) {
286
286
  RUNCODE_WORKDIR: ctx.workingDir,
287
287
  },
288
288
  stdio: ['ignore', 'pipe', 'pipe'],
289
+ // Put the shell in its own process group (pgid = pid) so a timeout
290
+ // can SIGTERM the entire tree. Without this, signalling only the
291
+ // immediate bash leaves grandchildren (e.g. `gsutil -m cp` and its
292
+ // python helpers) running as orphans — observed in the wild as
293
+ // 18-day-old leaked gsutil processes after a 30-min Bash timeout.
294
+ detached: true,
289
295
  });
290
296
  }
291
297
  catch (spawnErr) {
292
298
  resolve({ output: `Error spawning shell: ${spawnErr.message}`, isError: true });
293
299
  return;
294
300
  }
301
+ // Signal the whole process group (negative pid). ESRCH means the group
302
+ // is already gone — fine. Any other failure we swallow because the close
303
+ // handler will still resolve the promise on its own.
304
+ const killTree = (signal) => {
305
+ if (typeof child.pid !== 'number')
306
+ return;
307
+ try {
308
+ process.kill(-child.pid, signal);
309
+ }
310
+ catch {
311
+ /* group already dead */
312
+ }
313
+ };
295
314
  let stdout = '';
296
315
  let stderr = '';
297
316
  let outputBytes = 0;
@@ -300,19 +319,14 @@ function executeCommand(command, timeoutMs, ctx) {
300
319
  let abortedByUser = false;
301
320
  const timer = setTimeout(() => {
302
321
  killed = true;
303
- child.kill('SIGTERM');
304
- setTimeout(() => {
305
- try {
306
- child.kill('SIGKILL');
307
- }
308
- catch { /* already dead */ }
309
- }, 5000); // Give 5s for graceful shutdown before SIGKILL
322
+ killTree('SIGTERM');
323
+ setTimeout(() => killTree('SIGKILL'), 5000); // 5s grace before SIGKILL
310
324
  }, timeoutMs);
311
325
  // Handle abort signal
312
326
  const onAbort = () => {
313
327
  killed = true;
314
328
  abortedByUser = true;
315
- child.kill('SIGTERM');
329
+ killTree('SIGTERM');
316
330
  };
317
331
  ctx.abortSignal.addEventListener('abort', onAbort, { once: true });
318
332
  // Emit last non-empty line to UI progress (throttled to avoid flooding)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.15.27",
3
+ "version": "3.15.28",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {