npm - @blockrun/franklin - Versions diffs - 3.15.27 → 3.15.28 - Mend

@blockrun/franklin 3.15.27 → 3.15.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/agent/loop.js +45 -7
package/dist/proxy/server.d.ts +2 -0
package/dist/proxy/server.js +7 -2
package/dist/tasks/lost-detection.d.ts +6 -0
package/dist/tasks/lost-detection.js +25 -9
package/dist/tasks/spawn.d.ts +2 -1
package/dist/tasks/spawn.js +6 -3
package/dist/tools/bash.js +22 -8
package/package.json +1 -1

package/dist/agent/loop.js CHANGED Viewed

@@ -615,6 +615,19 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
         const HARD_TOOL_CAP = MAX_TOOL_CALLS_PER_TURN * 2;
         let toolCapWarned = false; // Log + inject only once per turn
         const SAME_TOOL_WARN_THRESHOLD = 3; // Warn after N calls to same tool (lowered from 5 — search loops were wasting turns)
+        // Hard stop at 2× the warn threshold. The previous loop injected
+        // "[SYSTEM] STOP" on every call past 3 (verified 2026-05-04 in a real
+        // Opus-4.7 session: Opus saw 4 STOP messages, made 4 more Bash calls
+        // anyway). Strong models read the system tool_result, briefly
+        // acknowledge, then call the same tool again — the soft injection
+        // doesn't actually constrain behavior. Hard stop matches what
+        // HARD_TOOL_CAP already does for total tool count.
+        const SAME_TOOL_HARD_STOP = SAME_TOOL_WARN_THRESHOLD * 2;
+        // Tracks which tool names have already had a warn injected this turn.
+        // Without it, every call past threshold pushes another [SYSTEM] STOP
+        // tool_result into the model's context — same shape bug as the cap
+        // spam fixed in 3.15.24, just in a sibling guardrail.
+        const sameToolWarned = new Set();
         // ── No-progress guardrail: kill infinite tiny-response loops ──
         let consecutiveTinyResponses = 0; // Count of consecutive calls with <10 output tokens
         const MAX_TINY_RESPONSES = 2; // Break after N tiny responses — if 2 calls return near-empty, something is wrong
@@ -1521,16 +1534,24 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 };
             });
             // ── Guardrail injections ──
-            // Warn about same-tool repetition — escalate on every call past threshold
+            // Warn about same-tool repetition — fire once per tool name per turn.
+            // Re-injecting on every subsequent call (the pre-3.15.28 behavior)
+            // just spammed the model's context: Opus-4.7 verified to ignore 4
+            // sequential "STOP" messages and keep calling Bash. Cleaner contract:
+            // one nudge at the threshold, then if the model ignores it past
+            // SAME_TOOL_HARD_STOP, break the turn.
+            let sameToolHardStopHit = null;
             for (const [name, count] of turnToolCounts) {
-                if (count >= SAME_TOOL_WARN_THRESHOLD) {
-                    const escalation = count === SAME_TOOL_WARN_THRESHOLD
-                        ? `[SYSTEM] You have called ${name} ${count} times this turn. Stop and present your results now. Do not make more ${name} calls.`
-                        : `[SYSTEM] STOP. You have now called ${name} ${count} times — more searching is not producing new information. Answer the user with what you already have. If the answer truly requires a different approach, use a DIFFERENT tool or ask the user.`;
+                if (count >= SAME_TOOL_HARD_STOP) {
+                    sameToolHardStopHit = name;
+                    continue;
+                }
+                if (count === SAME_TOOL_WARN_THRESHOLD && !sameToolWarned.has(name)) {
+                    sameToolWarned.add(name);
                     outcomeContent.push({
                         type: 'tool_result',
-                        tool_use_id: `guardrail-warn-${name}-${count}`,
-                        content: escalation,
+                        tool_use_id: `guardrail-warn-${name}`,
+                        content: `[SYSTEM] You have called ${name} ${count} times this turn. Stop and present your results now. Do not make more ${name} calls — if you need different data, switch tools or ask the user.`,
                         is_error: true,
                     });
                 }
@@ -1596,6 +1617,23 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 onEvent({ kind: 'turn_done', reason: 'cap_exceeded' });
                 break;
             }
+            // Same-tool hard stop. Strong models (Opus, GPT-5.5) sometimes
+            // read the warn injection, briefly acknowledge it, and call the
+            // same tool again — the soft signal is ineffective. Break the
+            // turn here when one tool name crosses the hard threshold to
+            // stop the search loop. Verified 2026-05-04: Opus-4.7 made 4
+            // Bash calls past 3 nags before this break would have triggered
+            // (at 6).
+            if (sameToolHardStopHit) {
+                const count = turnToolCounts.get(sameToolHardStopHit) ?? 0;
+                logger.error(`[franklin] Same-tool hard stop: ${sameToolHardStopHit} called ${count} times this turn — model ignoring soft warn, ending turn`);
+                onEvent({
+                    kind: 'text_delta',
+                    text: `\n\n⚠️ ${sameToolHardStopHit} called ${count}× in one turn — that's a search loop. Ending turn so you don't burn through credits. Rephrase what you actually need, or try a different model with \`/model\`.\n`,
+                });
+                onEvent({ kind: 'turn_done', reason: 'cap_exceeded' });
+                break;
+            }
         }
         if (loopCount >= maxTurns) {
             lastSessionActivity = Date.now();

package/dist/proxy/server.d.ts CHANGED Viewed

@@ -7,6 +7,8 @@ export interface ProxyOptions {
     modelOverride?: string;
     debug?: boolean;
     fallbackEnabled?: boolean;
+    requestTimeoutMs?: number;
+    streamTimeoutMs?: number;
 }
 export declare function createProxy(options: ProxyOptions): http.Server;
 type RequestCategory = 'simple' | 'code' | 'default';

package/dist/proxy/server.js CHANGED Viewed

@@ -233,6 +233,11 @@ export function createProxy(options) {
     const chain = options.chain || 'base';
     let currentModel = options.modelOverride || DEFAULT_MODEL;
     const fallbackEnabled = options.fallbackEnabled !== false; // Default true
+    // Resolve timeouts once at construction. The option wins over the env var
+    // so callers (esp. tests) can configure a single proxy without polluting
+    // process.env for the rest of the process — and for any sibling proxy.
+    const effectiveRequestTimeoutMs = options.requestTimeoutMs ?? getProxyRequestTimeoutMs();
+    const effectiveStreamTimeoutMs = options.streamTimeoutMs ?? getProxyStreamTimeoutMs();
     let baseWallet = null;
     let solanaWallet = null;
     if (chain === 'base') {
@@ -425,7 +430,7 @@ export function createProxy(options) {
                 };
                 let response;
                 let finalModel = requestModel;
-                const requestTimeoutMs = getProxyRequestTimeoutMs();
+                const requestTimeoutMs = effectiveRequestTimeoutMs;
                 // Use fallback chain if enabled
                 if (fallbackEnabled && body && requestPath.includes('messages')) {
                     const fallbackConfig = {
@@ -526,7 +531,7 @@ export function createProxy(options) {
                     const decoder = new TextDecoder();
                     let fullResponse = '';
                     const STREAM_CAP = 5_000_000; // 5MB cap on accumulated stream
-                    const STREAM_TIMEOUT_MS = getProxyStreamTimeoutMs();
+                    const STREAM_TIMEOUT_MS = effectiveStreamTimeoutMs;
                     const streamDeadline = Date.now() + STREAM_TIMEOUT_MS;
                     const pump = async () => {
                         while (true) {

package/dist/tasks/lost-detection.d.ts CHANGED Viewed

@@ -9,6 +9,12 @@
  * EPERM means the pid exists but we don't have permission to signal it —
  * treat that as alive. ESRCH (or anything else) means dead.
  *
+ * Pid-less queued tasks: runner.ts writes its own pid on entry, so a task
+ * with status=queued and no pid means the runner subprocess crashed during
+ * module import (cliPath wrong, syntax error in dist) before it could record
+ * itself. We reap these once they're older than QUEUED_NO_PID_TIMEOUT_MS so
+ * `franklin task list` doesn't show them as eternally pending.
+ *
  * Best-effort: PID reuse can lie. v3.10's contract is "lazy reconciliation
  * on `task list`"; v3.11 may add a pidStartTime cross-check.
  */

package/dist/tasks/lost-detection.js CHANGED Viewed

@@ -9,10 +9,17 @@
  * EPERM means the pid exists but we don't have permission to signal it —
  * treat that as alive. ESRCH (or anything else) means dead.
  *
+ * Pid-less queued tasks: runner.ts writes its own pid on entry, so a task
+ * with status=queued and no pid means the runner subprocess crashed during
+ * module import (cliPath wrong, syntax error in dist) before it could record
+ * itself. We reap these once they're older than QUEUED_NO_PID_TIMEOUT_MS so
+ * `franklin task list` doesn't show them as eternally pending.
+ *
  * Best-effort: PID reuse can lie. v3.10's contract is "lazy reconciliation
  * on `task list`"; v3.11 may add a pidStartTime cross-check.
  */
 import { listTasks, applyEvent } from './store.js';
+const QUEUED_NO_PID_TIMEOUT_MS = 5 * 60 * 1000; // 5 min
 function isPidAlive(pid) {
     try {
         process.kill(pid, 0);
@@ -28,16 +35,25 @@ export function reconcileLostTasks(now = Date.now()) {
     for (const t of listTasks()) {
         if (t.status !== 'running' && t.status !== 'queued')
             continue;
-        if (typeof t.pid !== 'number')
-            continue;
-        if (isPidAlive(t.pid))
-            continue;
+        let summary = null;
+        if (typeof t.pid !== 'number') {
+            // Only reap pid-less tasks that have been queued long enough that the
+            // runner can't plausibly still be importing. On slow networks or cold
+            // caches Franklin's startup can take 30+ seconds — 5 minutes leaves
+            // generous headroom for legitimate slow starts.
+            if (t.status !== 'queued')
+                continue;
+            if (now - t.createdAt < QUEUED_NO_PID_TIMEOUT_MS)
+                continue;
+            summary = 'Runner never registered a pid — likely crashed during module import.';
+        }
+        else {
+            if (isPidAlive(t.pid))
+                continue;
+            summary = 'Backing process not found — task may have been killed externally.';
+        }
         try {
-            applyEvent(t.runId, {
-                at: now,
-                kind: 'lost',
-                summary: 'Backing process not found — task may have been killed externally.',
-            });
+            applyEvent(t.runId, { at: now, kind: 'lost', summary });
             n++;
         }
         catch (err) {

package/dist/tasks/spawn.d.ts CHANGED Viewed

@@ -16,7 +16,8 @@
  *
  * CLI path resolution (in priority order):
  *   1. process.env.FRANKLIN_CLI_PATH — escape hatch for tests / dev.
- *   2. <cwd>/dist/index.js — the published bundle's entry point.
+ *   2. process.argv[1] — the script Node is currently executing, i.e. the
+ *      running franklin bundle. Works regardless of the user's cwd.
  */
 export interface StartDetachedTaskInput {
     label: string;

package/dist/tasks/spawn.js CHANGED Viewed

@@ -16,11 +16,11 @@
  *
  * CLI path resolution (in priority order):
  *   1. process.env.FRANKLIN_CLI_PATH — escape hatch for tests / dev.
- *   2. <cwd>/dist/index.js — the published bundle's entry point.
+ *   2. process.argv[1] — the script Node is currently executing, i.e. the
+ *      running franklin bundle. Works regardless of the user's cwd.
  */
 import { spawn } from 'node:child_process';
 import fs from 'node:fs';
-import path from 'node:path';
 import { randomUUID } from 'node:crypto';
 import { writeTaskMeta } from './store.js';
 import { taskLogPath, ensureTaskDir } from './paths.js';
@@ -28,7 +28,10 @@ function resolveCliPath() {
     const fromEnv = process.env.FRANKLIN_CLI_PATH;
     if (fromEnv && fromEnv.length > 0)
         return fromEnv;
-    return path.resolve(process.cwd(), 'dist', 'index.js');
+    // Resolving from process.cwd() breaks whenever Franklin is launched outside
+    // the source tree (npm global install, brew, or just `cd /elsewhere &&
+    // franklin`). process.argv[1] is the actual entry script Node loaded.
+    return process.argv[1];
 }
 function generateRunId() {
     return `t_${Date.now().toString(36)}_${randomUUID().slice(0, 8)}`;

package/dist/tools/bash.js CHANGED Viewed

@@ -286,12 +286,31 @@ function executeCommand(command, timeoutMs, ctx) {
                     RUNCODE_WORKDIR: ctx.workingDir,
                 },
                 stdio: ['ignore', 'pipe', 'pipe'],
+                // Put the shell in its own process group (pgid = pid) so a timeout
+                // can SIGTERM the entire tree. Without this, signalling only the
+                // immediate bash leaves grandchildren (e.g. `gsutil -m cp` and its
+                // python helpers) running as orphans — observed in the wild as
+                // 18-day-old leaked gsutil processes after a 30-min Bash timeout.
+                detached: true,
             });
         }
         catch (spawnErr) {
             resolve({ output: `Error spawning shell: ${spawnErr.message}`, isError: true });
             return;
         }
+        // Signal the whole process group (negative pid). ESRCH means the group
+        // is already gone — fine. Any other failure we swallow because the close
+        // handler will still resolve the promise on its own.
+        const killTree = (signal) => {
+            if (typeof child.pid !== 'number')
+                return;
+            try {
+                process.kill(-child.pid, signal);
+            }
+            catch {
+                /* group already dead */
+            }
+        };
         let stdout = '';
         let stderr = '';
         let outputBytes = 0;
@@ -300,19 +319,14 @@ function executeCommand(command, timeoutMs, ctx) {
         let abortedByUser = false;
         const timer = setTimeout(() => {
             killed = true;
-            child.kill('SIGTERM');
-            setTimeout(() => {
-                try {
-                    child.kill('SIGKILL');
-                }
-                catch { /* already dead */ }
-            }, 5000); // Give 5s for graceful shutdown before SIGKILL
+            killTree('SIGTERM');
+            setTimeout(() => killTree('SIGKILL'), 5000); // 5s grace before SIGKILL
         }, timeoutMs);
         // Handle abort signal
         const onAbort = () => {
             killed = true;
             abortedByUser = true;
-            child.kill('SIGTERM');
+            killTree('SIGTERM');
         };
         ctx.abortSignal.addEventListener('abort', onAbort, { once: true });
         // Emit last non-empty line to UI progress (throttled to avoid flooding)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/franklin",
-  "version": "3.15.27",
+  "version": "3.15.28",
   "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
   "type": "module",
   "exports": {