@blockrun/franklin 3.15.0 → 3.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,10 @@
1
1
  import type { CapabilityInvocation, CapabilityResult, ExecutionScope } from './types.js';
2
+ export declare const BLOCKING_POLL_LOOP_RE: RegExp;
2
3
  export declare function normalizeSearchQuery(query: string): {
3
4
  normalized: string;
4
5
  tokens: string[];
5
6
  };
7
+ export declare function isToolClassFailure(name: string, result: CapabilityResult): boolean;
6
8
  export declare class SessionToolGuard {
7
9
  private turn;
8
10
  private webSearchesThisTurn;
@@ -38,6 +38,13 @@ function globKey(invocation) {
38
38
  const path = normalizePath(String(invocation.input.path ?? ''));
39
39
  return `${pattern}::${path}`;
40
40
  }
41
+ // Detect a blocking poll-loop in a foreground bash command:
42
+ // any `for|while|until` loop containing a `sleep` of ≥1 second. This is
43
+ // the canonical antipattern that makes the agent feel frozen — see
44
+ // beforeBash() for the full rationale and the recommended alternatives.
45
+ // Use [\s\S] for cross-line match so we catch multi-line scripts; require
46
+ // `sleep [1-9]` so trivial `sleep 0` / `sleep 0.1` micro-pauses don't trip.
47
+ export const BLOCKING_POLL_LOOP_RE = /\b(?:for|while|until)\b[\s\S]*?\bsleep\s+[1-9]/;
41
48
  const WRITE_KEYWORDS = (() => {
42
49
  const words = [
43
50
  'rm', 'mv', 'cp', 'mkdir', 'touch', 'chmod', 'chown', 'ln',
@@ -128,6 +135,35 @@ function readKey(resolved, offset, limit) {
128
135
  function fetchKey(url, maxLength) {
129
136
  return `${url}::${maxLength ?? 12288}`;
130
137
  }
138
+ // Circuit-breaker classifier for the per-tool kill switch.
139
+ //
140
+ // `isError: true` covers everything from "tool itself broke" (network, parse,
141
+ // timeout) to "agent fed me a bad input" (404 on a guessed URL, malformed URL).
142
+ // Only the first category should count toward disabling the tool — otherwise
143
+ // three hallucinated URLs in one prompt permanently kill WebFetch for the
144
+ // session, even though the tool worked correctly each time.
145
+ export function isToolClassFailure(name, result) {
146
+ if (!result.isError)
147
+ return false;
148
+ const out = String(result.output ?? '');
149
+ if (name === 'WebFetch') {
150
+ // HTTP 4xx/5xx — the URL was real-but-wrong or the upstream had issues.
151
+ // Either way, the tool worked; the agent should pick a different URL.
152
+ if (/^HTTP \d{3}\b/.test(out))
153
+ return false;
154
+ // Bad URL syntax / unsupported protocol / missing arg — agent input error.
155
+ if (out.startsWith('Error: invalid URL'))
156
+ return false;
157
+ if (out.startsWith('Error: only http'))
158
+ return false;
159
+ if (out.startsWith('Error: url is required'))
160
+ return false;
161
+ // User interrupt — not a tool failure.
162
+ if (out.startsWith('Error: request aborted'))
163
+ return false;
164
+ }
165
+ return true;
166
+ }
131
167
  export class SessionToolGuard {
132
168
  turn = 0;
133
169
  webSearchesThisTurn = 0;
@@ -183,6 +219,30 @@ export class SessionToolGuard {
183
219
  const cmd = String(invocation.input.command ?? '').trim();
184
220
  if (!cmd)
185
221
  return null;
222
+ // Reject blocking poll-loops in foreground bash. A single bash call with
223
+ // `sleep N` inside a for/while/until loop blocks the agent for the full
224
+ // duration — the UI repeats the same status line and the user almost
225
+ // always cancels before it finishes. The right pattern is `Detach`
226
+ // (persistent background task) or `run_in_background: true`.
227
+ const runInBackground = Boolean(invocation.input.run_in_background);
228
+ if (!runInBackground && BLOCKING_POLL_LOOP_RE.test(cmd)) {
229
+ return {
230
+ output: 'Blocked: this Bash command runs `sleep` inside a for/while/until loop in the ' +
231
+ 'foreground. That blocks the agent for the full poll duration and looks frozen ' +
232
+ 'to the user — they almost always cancel before it finishes.\n\n' +
233
+ 'Use the `Detach` tool for polling-style work (waiting for an Apify run, video ' +
234
+ 'generation, deploy, build, or any external async job to complete). It returns ' +
235
+ 'a runId immediately and the polling continues persistently. Check status later ' +
236
+ 'with `franklin task wait <runId>` or `franklin task tail <runId>` via a ' +
237
+ 'separate Bash call.\n\n' +
238
+ 'If you need the result inline, break the loop into discrete single-poll Bash ' +
239
+ 'calls — poll once, reason about the status, then decide whether to poll again. ' +
240
+ 'Or, if the upstream API has a sync variant (e.g. Apify\'s ' +
241
+ '`run-sync-get-dataset-items`), use that with a `timeout` of 300000–600000 ms ' +
242
+ 'instead of orchestrating async + poll yourself.',
243
+ isError: true,
244
+ };
245
+ }
186
246
  // Only dedup deterministic read-only commands. Skip anything writing/network/long-running.
187
247
  if (WRITE_KEYWORDS.test(cmd))
188
248
  return null;
@@ -227,10 +287,15 @@ export class SessionToolGuard {
227
287
  return null;
228
288
  }
229
289
  afterExecute(invocation, result) {
230
- // Track per-tool error counts across the session
231
- if (result.isError) {
290
+ // Per-tool circuit breaker: count consecutive tool-class failures, reset on
291
+ // any success. Agent-input errors (e.g. WebFetch 404 on a guessed URL) are
292
+ // not tool failures and must not trip the breaker.
293
+ if (isToolClassFailure(invocation.name, result)) {
232
294
  this.toolErrorCounts.set(invocation.name, (this.toolErrorCounts.get(invocation.name) ?? 0) + 1);
233
295
  }
296
+ else if (!result.isError) {
297
+ this.toolErrorCounts.delete(invocation.name);
298
+ }
234
299
  switch (invocation.name) {
235
300
  case 'WebSearch':
236
301
  case 'SearchX':
@@ -511,6 +511,10 @@ IMPORTANT: Avoid using this tool to run \`find\`, \`grep\`, \`cat\`, \`head\`, \
511
511
  - Avoid unnecessary \`sleep\` commands:
512
512
  - Do not sleep between commands that can run immediately — just run them.
513
513
  - Do not retry failing commands in a sleep loop — diagnose the root cause.
514
+ - Do NOT write \`sleep\` inside a for/while/until loop in a single foreground Bash call to poll an external async job. That blocks the agent for the whole poll duration and looks frozen to the user; they will cancel before it finishes. Pick one:
515
+ 1. Use the \`Detach\` tool for polling-style work (waiting for an Apify run, video generation, deploy, or build to complete). It returns a runId immediately and the polling runs persistently; check status later with \`franklin task wait/tail <runId>\`.
516
+ 2. Use the upstream sync endpoint when one exists (e.g. Apify's \`run-sync-get-dataset-items\`) with an explicit \`timeout\` up to 600000ms — usually simpler than orchestrating async + poll yourself.
517
+ 3. Break the poll into discrete single-call polls — one poll per Bash call, reason about the status between calls, decide whether to poll again. The user can then see progress and course-correct.
514
518
 
515
519
  Output is capped at 512KB capture / 32KB return.`,
516
520
  input_schema: {
@@ -32,13 +32,16 @@ export const detachCapability = {
32
32
  description: "Run a Bash command as a detached background job. Returns immediately " +
33
33
  "with a runId. The command continues even if Franklin exits or the user " +
34
34
  "closes their terminal. Use this for any iteration over more than ~20 " +
35
- "items, large data fetches, paginated API loops, or anything you'd " +
36
- "otherwise loop on turn-by-turn (which would burn turns and trip " +
37
- "timeouts). The agent's job is to design and orchestrate, not to be " +
38
- "the for-loop. Pair with a script that writes a checkpoint file so " +
39
- "progress survives restarts. Tail logs with `franklin task tail " +
40
- "<runId> --follow` and check completion with `franklin task wait " +
41
- "<runId>`.",
35
+ "items, large data fetches, paginated API loops, polling external async " +
36
+ "jobs (waiting for an Apify run / video generation / deploy / build to " +
37
+ "complete), or anything you'd otherwise loop on turn-by-turn (which " +
38
+ "would burn turns and trip timeouts). The agent's job is to design and " +
39
+ "orchestrate, not to be the for-loop. Pair with a script that writes a " +
40
+ "checkpoint file so progress survives restarts. Tail logs with " +
41
+ "`franklin task tail <runId> --follow` and check completion with " +
42
+ "`franklin task wait <runId>`. ALWAYS prefer Detach over a single " +
43
+ "foreground Bash call with `sleep` inside a for/while/until loop — that " +
44
+ "antipattern blocks the agent for the full duration and looks frozen.",
42
45
  input_schema: {
43
46
  type: 'object',
44
47
  properties: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.15.0",
3
+ "version": "3.15.2",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {