npm - @blockrun/franklin - Versions diffs - 3.15.0 → 3.15.2 - Mend

@blockrun/franklin 3.15.0 → 3.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/agent/tool-guard.d.ts +2 -0
package/dist/agent/tool-guard.js +67 -2
package/dist/tools/bash.js +4 -0
package/dist/tools/detach.js +10 -7
package/package.json +1 -1

package/dist/agent/tool-guard.d.ts CHANGED Viewed

@@ -1,8 +1,10 @@
 import type { CapabilityInvocation, CapabilityResult, ExecutionScope } from './types.js';
+export declare const BLOCKING_POLL_LOOP_RE: RegExp;
 export declare function normalizeSearchQuery(query: string): {
     normalized: string;
     tokens: string[];
 };
+export declare function isToolClassFailure(name: string, result: CapabilityResult): boolean;
 export declare class SessionToolGuard {
     private turn;
     private webSearchesThisTurn;

package/dist/agent/tool-guard.js CHANGED Viewed

@@ -38,6 +38,13 @@ function globKey(invocation) {
     const path = normalizePath(String(invocation.input.path ?? ''));
     return `${pattern}::${path}`;
 }
+// Detect a blocking poll-loop in a foreground bash command:
+// any `for|while|until` loop containing a `sleep` of ≥1 second. This is
+// the canonical antipattern that makes the agent feel frozen — see
+// beforeBash() for the full rationale and the recommended alternatives.
+// Use [\s\S] for cross-line match so we catch multi-line scripts; require
+// `sleep [1-9]` so trivial `sleep 0` / `sleep 0.1` micro-pauses don't trip.
+export const BLOCKING_POLL_LOOP_RE = /\b(?:for|while|until)\b[\s\S]*?\bsleep\s+[1-9]/;
 const WRITE_KEYWORDS = (() => {
     const words = [
         'rm', 'mv', 'cp', 'mkdir', 'touch', 'chmod', 'chown', 'ln',
@@ -128,6 +135,35 @@ function readKey(resolved, offset, limit) {
 function fetchKey(url, maxLength) {
     return `${url}::${maxLength ?? 12288}`;
 }
+// Circuit-breaker classifier for the per-tool kill switch.
+//
+// `isError: true` covers everything from "tool itself broke" (network, parse,
+// timeout) to "agent fed me a bad input" (404 on a guessed URL, malformed URL).
+// Only the first category should count toward disabling the tool — otherwise
+// three hallucinated URLs in one prompt permanently kill WebFetch for the
+// session, even though the tool worked correctly each time.
+export function isToolClassFailure(name, result) {
+    if (!result.isError)
+        return false;
+    const out = String(result.output ?? '');
+    if (name === 'WebFetch') {
+        // HTTP 4xx/5xx — the URL was real-but-wrong or the upstream had issues.
+        // Either way, the tool worked; the agent should pick a different URL.
+        if (/^HTTP \d{3}\b/.test(out))
+            return false;
+        // Bad URL syntax / unsupported protocol / missing arg — agent input error.
+        if (out.startsWith('Error: invalid URL'))
+            return false;
+        if (out.startsWith('Error: only http'))
+            return false;
+        if (out.startsWith('Error: url is required'))
+            return false;
+        // User interrupt — not a tool failure.
+        if (out.startsWith('Error: request aborted'))
+            return false;
+    }
+    return true;
+}
 export class SessionToolGuard {
     turn = 0;
     webSearchesThisTurn = 0;
@@ -183,6 +219,30 @@ export class SessionToolGuard {
         const cmd = String(invocation.input.command ?? '').trim();
         if (!cmd)
             return null;
+        // Reject blocking poll-loops in foreground bash. A single bash call with
+        // `sleep N` inside a for/while/until loop blocks the agent for the full
+        // duration — the UI repeats the same status line and the user almost
+        // always cancels before it finishes. The right pattern is `Detach`
+        // (persistent background task) or `run_in_background: true`.
+        const runInBackground = Boolean(invocation.input.run_in_background);
+        if (!runInBackground && BLOCKING_POLL_LOOP_RE.test(cmd)) {
+            return {
+                output: 'Blocked: this Bash command runs `sleep` inside a for/while/until loop in the ' +
+                    'foreground. That blocks the agent for the full poll duration and looks frozen ' +
+                    'to the user — they almost always cancel before it finishes.\n\n' +
+                    'Use the `Detach` tool for polling-style work (waiting for an Apify run, video ' +
+                    'generation, deploy, build, or any external async job to complete). It returns ' +
+                    'a runId immediately and the polling continues persistently. Check status later ' +
+                    'with `franklin task wait <runId>` or `franklin task tail <runId>` via a ' +
+                    'separate Bash call.\n\n' +
+                    'If you need the result inline, break the loop into discrete single-poll Bash ' +
+                    'calls — poll once, reason about the status, then decide whether to poll again. ' +
+                    'Or, if the upstream API has a sync variant (e.g. Apify\'s ' +
+                    '`run-sync-get-dataset-items`), use that with a `timeout` of 300000–600000 ms ' +
+                    'instead of orchestrating async + poll yourself.',
+                isError: true,
+            };
+        }
         // Only dedup deterministic read-only commands. Skip anything writing/network/long-running.
         if (WRITE_KEYWORDS.test(cmd))
             return null;
@@ -227,10 +287,15 @@ export class SessionToolGuard {
         return null;
     }
     afterExecute(invocation, result) {
-        // Track per-tool error counts across the session
-        if (result.isError) {
+        // Per-tool circuit breaker: count consecutive tool-class failures, reset on
+        // any success. Agent-input errors (e.g. WebFetch 404 on a guessed URL) are
+        // not tool failures and must not trip the breaker.
+        if (isToolClassFailure(invocation.name, result)) {
             this.toolErrorCounts.set(invocation.name, (this.toolErrorCounts.get(invocation.name) ?? 0) + 1);
         }
+        else if (!result.isError) {
+            this.toolErrorCounts.delete(invocation.name);
+        }
         switch (invocation.name) {
             case 'WebSearch':
             case 'SearchX':

package/dist/tools/bash.js CHANGED Viewed

@@ -511,6 +511,10 @@ IMPORTANT: Avoid using this tool to run \`find\`, \`grep\`, \`cat\`, \`head\`, \
 - Avoid unnecessary \`sleep\` commands:
   - Do not sleep between commands that can run immediately — just run them.
   - Do not retry failing commands in a sleep loop — diagnose the root cause.
+  - Do NOT write \`sleep\` inside a for/while/until loop in a single foreground Bash call to poll an external async job. That blocks the agent for the whole poll duration and looks frozen to the user; they will cancel before it finishes. Pick one:
+    1. Use the \`Detach\` tool for polling-style work (waiting for an Apify run, video generation, deploy, or build to complete). It returns a runId immediately and the polling runs persistently; check status later with \`franklin task wait/tail <runId>\`.
+    2. Use the upstream sync endpoint when one exists (e.g. Apify's \`run-sync-get-dataset-items\`) with an explicit \`timeout\` up to 600000ms — usually simpler than orchestrating async + poll yourself.
+    3. Break the poll into discrete single-call polls — one poll per Bash call, reason about the status between calls, decide whether to poll again. The user can then see progress and course-correct.
 Output is capped at 512KB capture / 32KB return.`,
         input_schema: {

package/dist/tools/detach.js CHANGED Viewed

@@ -32,13 +32,16 @@ export const detachCapability = {
         description: "Run a Bash command as a detached background job. Returns immediately " +
             "with a runId. The command continues even if Franklin exits or the user " +
             "closes their terminal. Use this for any iteration over more than ~20 " +
-            "items, large data fetches, paginated API loops, or anything you'd " +
-            "otherwise loop on turn-by-turn (which would burn turns and trip " +
-            "timeouts). The agent's job is to design and orchestrate, not to be " +
-            "the for-loop. Pair with a script that writes a checkpoint file so " +
-            "progress survives restarts. Tail logs with `franklin task tail " +
-            "<runId> --follow` and check completion with `franklin task wait " +
-            "<runId>`.",
+            "items, large data fetches, paginated API loops, polling external async " +
+            "jobs (waiting for an Apify run / video generation / deploy / build to " +
+            "complete), or anything you'd otherwise loop on turn-by-turn (which " +
+            "would burn turns and trip timeouts). The agent's job is to design and " +
+            "orchestrate, not to be the for-loop. Pair with a script that writes a " +
+            "checkpoint file so progress survives restarts. Tail logs with " +
+            "`franklin task tail <runId> --follow` and check completion with " +
+            "`franklin task wait <runId>`. ALWAYS prefer Detach over a single " +
+            "foreground Bash call with `sleep` inside a for/while/until loop — that " +
+            "antipattern blocks the agent for the full duration and looks frozen.",
         input_schema: {
             type: 'object',
             properties: {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/franklin",
-  "version": "3.15.0",
+  "version": "3.15.2",
   "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
   "type": "module",
   "exports": {