@blockrun/franklin 3.15.27 → 3.15.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/loop.js +62 -7
- package/dist/proxy/server.d.ts +2 -0
- package/dist/proxy/server.js +7 -2
- package/dist/tasks/lost-detection.d.ts +6 -0
- package/dist/tasks/lost-detection.js +25 -9
- package/dist/tasks/spawn.d.ts +2 -1
- package/dist/tasks/spawn.js +6 -3
- package/dist/tools/bash.js +22 -8
- package/package.json +1 -1
package/dist/agent/loop.js
CHANGED
|
@@ -615,6 +615,19 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
615
615
|
const HARD_TOOL_CAP = MAX_TOOL_CALLS_PER_TURN * 2;
|
|
616
616
|
let toolCapWarned = false; // Log + inject only once per turn
|
|
617
617
|
const SAME_TOOL_WARN_THRESHOLD = 3; // Warn after N calls to same tool (lowered from 5 — search loops were wasting turns)
|
|
618
|
+
// Hard stop at 2× the warn threshold. The previous loop injected
|
|
619
|
+
// "[SYSTEM] STOP" on every call past 3 (verified 2026-05-04 in a real
|
|
620
|
+
// Opus-4.7 session: Opus saw 4 STOP messages, made 4 more Bash calls
|
|
621
|
+
// anyway). Strong models read the system tool_result, briefly
|
|
622
|
+
// acknowledge, then call the same tool again — the soft injection
|
|
623
|
+
// doesn't actually constrain behavior. Hard stop matches what
|
|
624
|
+
// HARD_TOOL_CAP already does for total tool count.
|
|
625
|
+
const SAME_TOOL_HARD_STOP = SAME_TOOL_WARN_THRESHOLD * 2;
|
|
626
|
+
// Tracks which tool names have already had a warn injected this turn.
|
|
627
|
+
// Without it, every call past threshold pushes another [SYSTEM] STOP
|
|
628
|
+
// tool_result into the model's context — same shape bug as the cap
|
|
629
|
+
// spam fixed in 3.15.24, just in a sibling guardrail.
|
|
630
|
+
const sameToolWarned = new Set();
|
|
618
631
|
// ── No-progress guardrail: kill infinite tiny-response loops ──
|
|
619
632
|
let consecutiveTinyResponses = 0; // Count of consecutive calls with <10 output tokens
|
|
620
633
|
const MAX_TINY_RESPONSES = 2; // Break after N tiny responses — if 2 calls return near-empty, something is wrong
|
|
@@ -1212,6 +1225,22 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1212
1225
|
consecutiveTinyResponses = 0;
|
|
1213
1226
|
}
|
|
1214
1227
|
recordSessionUsage(resolvedModel, inputTokens, usage.outputTokens, costEstimate, routingTier);
|
|
1228
|
+
// Capture tool names invoked in this assistant turn. The AuditEntry
|
|
1229
|
+
// interface has had a `toolCalls?: string[]` slot since 3.15.11, but
|
|
1230
|
+
// nothing populated it — verified 2026-05-04 in a real Opus session
|
|
1231
|
+
// where 14 audit rows showed `tools=[]` despite Bash being called
|
|
1232
|
+
// every turn (the session jsonl had the tool_use blocks; the audit
|
|
1233
|
+
// just lost them). Now we pull names off responseParts so post-hoc
|
|
1234
|
+
// analytics can answer "what tools fired most often last week" from
|
|
1235
|
+
// ~/.blockrun/franklin-audit.jsonl alone.
|
|
1236
|
+
const turnToolNames = [];
|
|
1237
|
+
for (const p of responseParts) {
|
|
1238
|
+
if (p.type === 'tool_use') {
|
|
1239
|
+
const name = p.name;
|
|
1240
|
+
if (typeof name === 'string')
|
|
1241
|
+
turnToolNames.push(name);
|
|
1242
|
+
}
|
|
1243
|
+
}
|
|
1215
1244
|
appendAudit({
|
|
1216
1245
|
ts: Date.now(),
|
|
1217
1246
|
sessionId,
|
|
@@ -1227,6 +1256,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1227
1256
|
source: 'agent',
|
|
1228
1257
|
workDir,
|
|
1229
1258
|
prompt: extractLastUserPrompt(history),
|
|
1259
|
+
toolCalls: turnToolNames.length > 0 ? turnToolNames : undefined,
|
|
1230
1260
|
routingTier,
|
|
1231
1261
|
});
|
|
1232
1262
|
// Accumulate session-level totals for session meta
|
|
@@ -1521,16 +1551,24 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1521
1551
|
};
|
|
1522
1552
|
});
|
|
1523
1553
|
// ── Guardrail injections ──
|
|
1524
|
-
// Warn about same-tool repetition —
|
|
1554
|
+
// Warn about same-tool repetition — fire once per tool name per turn.
|
|
1555
|
+
// Re-injecting on every subsequent call (the pre-3.15.28 behavior)
|
|
1556
|
+
// just spammed the model's context: Opus-4.7 verified to ignore 4
|
|
1557
|
+
// sequential "STOP" messages and keep calling Bash. Cleaner contract:
|
|
1558
|
+
// one nudge at the threshold, then if the model ignores it past
|
|
1559
|
+
// SAME_TOOL_HARD_STOP, break the turn.
|
|
1560
|
+
let sameToolHardStopHit = null;
|
|
1525
1561
|
for (const [name, count] of turnToolCounts) {
|
|
1526
|
-
if (count >=
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
|
|
1562
|
+
if (count >= SAME_TOOL_HARD_STOP) {
|
|
1563
|
+
sameToolHardStopHit = name;
|
|
1564
|
+
continue;
|
|
1565
|
+
}
|
|
1566
|
+
if (count === SAME_TOOL_WARN_THRESHOLD && !sameToolWarned.has(name)) {
|
|
1567
|
+
sameToolWarned.add(name);
|
|
1530
1568
|
outcomeContent.push({
|
|
1531
1569
|
type: 'tool_result',
|
|
1532
|
-
tool_use_id: `guardrail-warn-${name}
|
|
1533
|
-
content:
|
|
1570
|
+
tool_use_id: `guardrail-warn-${name}`,
|
|
1571
|
+
content: `[SYSTEM] You have called ${name} ${count} times this turn. Stop and present your results now. Do not make more ${name} calls — if you need different data, switch tools or ask the user.`,
|
|
1534
1572
|
is_error: true,
|
|
1535
1573
|
});
|
|
1536
1574
|
}
|
|
@@ -1596,6 +1634,23 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1596
1634
|
onEvent({ kind: 'turn_done', reason: 'cap_exceeded' });
|
|
1597
1635
|
break;
|
|
1598
1636
|
}
|
|
1637
|
+
// Same-tool hard stop. Strong models (Opus, GPT-5.5) sometimes
|
|
1638
|
+
// read the warn injection, briefly acknowledge it, and call the
|
|
1639
|
+
// same tool again — the soft signal is ineffective. Break the
|
|
1640
|
+
// turn here when one tool name crosses the hard threshold to
|
|
1641
|
+
// stop the search loop. Verified 2026-05-04: Opus-4.7 made 4
|
|
1642
|
+
// Bash calls past 3 nags before this break would have triggered
|
|
1643
|
+
// (at 6).
|
|
1644
|
+
if (sameToolHardStopHit) {
|
|
1645
|
+
const count = turnToolCounts.get(sameToolHardStopHit) ?? 0;
|
|
1646
|
+
logger.error(`[franklin] Same-tool hard stop: ${sameToolHardStopHit} called ${count} times this turn — model ignoring soft warn, ending turn`);
|
|
1647
|
+
onEvent({
|
|
1648
|
+
kind: 'text_delta',
|
|
1649
|
+
text: `\n\n⚠️ ${sameToolHardStopHit} called ${count}× in one turn — that's a search loop. Ending turn so you don't burn through credits. Rephrase what you actually need, or try a different model with \`/model\`.\n`,
|
|
1650
|
+
});
|
|
1651
|
+
onEvent({ kind: 'turn_done', reason: 'cap_exceeded' });
|
|
1652
|
+
break;
|
|
1653
|
+
}
|
|
1599
1654
|
}
|
|
1600
1655
|
if (loopCount >= maxTurns) {
|
|
1601
1656
|
lastSessionActivity = Date.now();
|
package/dist/proxy/server.d.ts
CHANGED
|
@@ -7,6 +7,8 @@ export interface ProxyOptions {
|
|
|
7
7
|
modelOverride?: string;
|
|
8
8
|
debug?: boolean;
|
|
9
9
|
fallbackEnabled?: boolean;
|
|
10
|
+
requestTimeoutMs?: number;
|
|
11
|
+
streamTimeoutMs?: number;
|
|
10
12
|
}
|
|
11
13
|
export declare function createProxy(options: ProxyOptions): http.Server;
|
|
12
14
|
type RequestCategory = 'simple' | 'code' | 'default';
|
package/dist/proxy/server.js
CHANGED
|
@@ -233,6 +233,11 @@ export function createProxy(options) {
|
|
|
233
233
|
const chain = options.chain || 'base';
|
|
234
234
|
let currentModel = options.modelOverride || DEFAULT_MODEL;
|
|
235
235
|
const fallbackEnabled = options.fallbackEnabled !== false; // Default true
|
|
236
|
+
// Resolve timeouts once at construction. The option wins over the env var
|
|
237
|
+
// so callers (esp. tests) can configure a single proxy without polluting
|
|
238
|
+
// process.env for the rest of the process — and for any sibling proxy.
|
|
239
|
+
const effectiveRequestTimeoutMs = options.requestTimeoutMs ?? getProxyRequestTimeoutMs();
|
|
240
|
+
const effectiveStreamTimeoutMs = options.streamTimeoutMs ?? getProxyStreamTimeoutMs();
|
|
236
241
|
let baseWallet = null;
|
|
237
242
|
let solanaWallet = null;
|
|
238
243
|
if (chain === 'base') {
|
|
@@ -425,7 +430,7 @@ export function createProxy(options) {
|
|
|
425
430
|
};
|
|
426
431
|
let response;
|
|
427
432
|
let finalModel = requestModel;
|
|
428
|
-
const requestTimeoutMs =
|
|
433
|
+
const requestTimeoutMs = effectiveRequestTimeoutMs;
|
|
429
434
|
// Use fallback chain if enabled
|
|
430
435
|
if (fallbackEnabled && body && requestPath.includes('messages')) {
|
|
431
436
|
const fallbackConfig = {
|
|
@@ -526,7 +531,7 @@ export function createProxy(options) {
|
|
|
526
531
|
const decoder = new TextDecoder();
|
|
527
532
|
let fullResponse = '';
|
|
528
533
|
const STREAM_CAP = 5_000_000; // 5MB cap on accumulated stream
|
|
529
|
-
const STREAM_TIMEOUT_MS =
|
|
534
|
+
const STREAM_TIMEOUT_MS = effectiveStreamTimeoutMs;
|
|
530
535
|
const streamDeadline = Date.now() + STREAM_TIMEOUT_MS;
|
|
531
536
|
const pump = async () => {
|
|
532
537
|
while (true) {
|
|
@@ -9,6 +9,12 @@
|
|
|
9
9
|
* EPERM means the pid exists but we don't have permission to signal it —
|
|
10
10
|
* treat that as alive. ESRCH (or anything else) means dead.
|
|
11
11
|
*
|
|
12
|
+
* Pid-less queued tasks: runner.ts writes its own pid on entry, so a task
|
|
13
|
+
* with status=queued and no pid means the runner subprocess crashed during
|
|
14
|
+
* module import (cliPath wrong, syntax error in dist) before it could record
|
|
15
|
+
* itself. We reap these once they're older than QUEUED_NO_PID_TIMEOUT_MS so
|
|
16
|
+
* `franklin task list` doesn't show them as eternally pending.
|
|
17
|
+
*
|
|
12
18
|
* Best-effort: PID reuse can lie. v3.10's contract is "lazy reconciliation
|
|
13
19
|
* on `task list`"; v3.11 may add a pidStartTime cross-check.
|
|
14
20
|
*/
|
|
@@ -9,10 +9,17 @@
|
|
|
9
9
|
* EPERM means the pid exists but we don't have permission to signal it —
|
|
10
10
|
* treat that as alive. ESRCH (or anything else) means dead.
|
|
11
11
|
*
|
|
12
|
+
* Pid-less queued tasks: runner.ts writes its own pid on entry, so a task
|
|
13
|
+
* with status=queued and no pid means the runner subprocess crashed during
|
|
14
|
+
* module import (cliPath wrong, syntax error in dist) before it could record
|
|
15
|
+
* itself. We reap these once they're older than QUEUED_NO_PID_TIMEOUT_MS so
|
|
16
|
+
* `franklin task list` doesn't show them as eternally pending.
|
|
17
|
+
*
|
|
12
18
|
* Best-effort: PID reuse can lie. v3.10's contract is "lazy reconciliation
|
|
13
19
|
* on `task list`"; v3.11 may add a pidStartTime cross-check.
|
|
14
20
|
*/
|
|
15
21
|
import { listTasks, applyEvent } from './store.js';
|
|
22
|
+
const QUEUED_NO_PID_TIMEOUT_MS = 5 * 60 * 1000; // 5 min
|
|
16
23
|
function isPidAlive(pid) {
|
|
17
24
|
try {
|
|
18
25
|
process.kill(pid, 0);
|
|
@@ -28,16 +35,25 @@ export function reconcileLostTasks(now = Date.now()) {
|
|
|
28
35
|
for (const t of listTasks()) {
|
|
29
36
|
if (t.status !== 'running' && t.status !== 'queued')
|
|
30
37
|
continue;
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
38
|
+
let summary = null;
|
|
39
|
+
if (typeof t.pid !== 'number') {
|
|
40
|
+
// Only reap pid-less tasks that have been queued long enough that the
|
|
41
|
+
// runner can't plausibly still be importing. On slow networks or cold
|
|
42
|
+
// caches Franklin's startup can take 30+ seconds — 5 minutes leaves
|
|
43
|
+
// generous headroom for legitimate slow starts.
|
|
44
|
+
if (t.status !== 'queued')
|
|
45
|
+
continue;
|
|
46
|
+
if (now - t.createdAt < QUEUED_NO_PID_TIMEOUT_MS)
|
|
47
|
+
continue;
|
|
48
|
+
summary = 'Runner never registered a pid — likely crashed during module import.';
|
|
49
|
+
}
|
|
50
|
+
else {
|
|
51
|
+
if (isPidAlive(t.pid))
|
|
52
|
+
continue;
|
|
53
|
+
summary = 'Backing process not found — task may have been killed externally.';
|
|
54
|
+
}
|
|
35
55
|
try {
|
|
36
|
-
applyEvent(t.runId, {
|
|
37
|
-
at: now,
|
|
38
|
-
kind: 'lost',
|
|
39
|
-
summary: 'Backing process not found — task may have been killed externally.',
|
|
40
|
-
});
|
|
56
|
+
applyEvent(t.runId, { at: now, kind: 'lost', summary });
|
|
41
57
|
n++;
|
|
42
58
|
}
|
|
43
59
|
catch (err) {
|
package/dist/tasks/spawn.d.ts
CHANGED
|
@@ -16,7 +16,8 @@
|
|
|
16
16
|
*
|
|
17
17
|
* CLI path resolution (in priority order):
|
|
18
18
|
* 1. process.env.FRANKLIN_CLI_PATH — escape hatch for tests / dev.
|
|
19
|
-
* 2.
|
|
19
|
+
* 2. process.argv[1] — the script Node is currently executing, i.e. the
|
|
20
|
+
* running franklin bundle. Works regardless of the user's cwd.
|
|
20
21
|
*/
|
|
21
22
|
export interface StartDetachedTaskInput {
|
|
22
23
|
label: string;
|
package/dist/tasks/spawn.js
CHANGED
|
@@ -16,11 +16,11 @@
|
|
|
16
16
|
*
|
|
17
17
|
* CLI path resolution (in priority order):
|
|
18
18
|
* 1. process.env.FRANKLIN_CLI_PATH — escape hatch for tests / dev.
|
|
19
|
-
* 2.
|
|
19
|
+
* 2. process.argv[1] — the script Node is currently executing, i.e. the
|
|
20
|
+
* running franklin bundle. Works regardless of the user's cwd.
|
|
20
21
|
*/
|
|
21
22
|
import { spawn } from 'node:child_process';
|
|
22
23
|
import fs from 'node:fs';
|
|
23
|
-
import path from 'node:path';
|
|
24
24
|
import { randomUUID } from 'node:crypto';
|
|
25
25
|
import { writeTaskMeta } from './store.js';
|
|
26
26
|
import { taskLogPath, ensureTaskDir } from './paths.js';
|
|
@@ -28,7 +28,10 @@ function resolveCliPath() {
|
|
|
28
28
|
const fromEnv = process.env.FRANKLIN_CLI_PATH;
|
|
29
29
|
if (fromEnv && fromEnv.length > 0)
|
|
30
30
|
return fromEnv;
|
|
31
|
-
|
|
31
|
+
// Resolving from process.cwd() breaks whenever Franklin is launched outside
|
|
32
|
+
// the source tree (npm global install, brew, or just `cd /elsewhere &&
|
|
33
|
+
// franklin`). process.argv[1] is the actual entry script Node loaded.
|
|
34
|
+
return process.argv[1];
|
|
32
35
|
}
|
|
33
36
|
function generateRunId() {
|
|
34
37
|
return `t_${Date.now().toString(36)}_${randomUUID().slice(0, 8)}`;
|
package/dist/tools/bash.js
CHANGED
|
@@ -286,12 +286,31 @@ function executeCommand(command, timeoutMs, ctx) {
|
|
|
286
286
|
RUNCODE_WORKDIR: ctx.workingDir,
|
|
287
287
|
},
|
|
288
288
|
stdio: ['ignore', 'pipe', 'pipe'],
|
|
289
|
+
// Put the shell in its own process group (pgid = pid) so a timeout
|
|
290
|
+
// can SIGTERM the entire tree. Without this, signalling only the
|
|
291
|
+
// immediate bash leaves grandchildren (e.g. `gsutil -m cp` and its
|
|
292
|
+
// python helpers) running as orphans — observed in the wild as
|
|
293
|
+
// 18-day-old leaked gsutil processes after a 30-min Bash timeout.
|
|
294
|
+
detached: true,
|
|
289
295
|
});
|
|
290
296
|
}
|
|
291
297
|
catch (spawnErr) {
|
|
292
298
|
resolve({ output: `Error spawning shell: ${spawnErr.message}`, isError: true });
|
|
293
299
|
return;
|
|
294
300
|
}
|
|
301
|
+
// Signal the whole process group (negative pid). ESRCH means the group
|
|
302
|
+
// is already gone — fine. Any other failure we swallow because the close
|
|
303
|
+
// handler will still resolve the promise on its own.
|
|
304
|
+
const killTree = (signal) => {
|
|
305
|
+
if (typeof child.pid !== 'number')
|
|
306
|
+
return;
|
|
307
|
+
try {
|
|
308
|
+
process.kill(-child.pid, signal);
|
|
309
|
+
}
|
|
310
|
+
catch {
|
|
311
|
+
/* group already dead */
|
|
312
|
+
}
|
|
313
|
+
};
|
|
295
314
|
let stdout = '';
|
|
296
315
|
let stderr = '';
|
|
297
316
|
let outputBytes = 0;
|
|
@@ -300,19 +319,14 @@ function executeCommand(command, timeoutMs, ctx) {
|
|
|
300
319
|
let abortedByUser = false;
|
|
301
320
|
const timer = setTimeout(() => {
|
|
302
321
|
killed = true;
|
|
303
|
-
|
|
304
|
-
setTimeout(() =>
|
|
305
|
-
try {
|
|
306
|
-
child.kill('SIGKILL');
|
|
307
|
-
}
|
|
308
|
-
catch { /* already dead */ }
|
|
309
|
-
}, 5000); // Give 5s for graceful shutdown before SIGKILL
|
|
322
|
+
killTree('SIGTERM');
|
|
323
|
+
setTimeout(() => killTree('SIGKILL'), 5000); // 5s grace before SIGKILL
|
|
310
324
|
}, timeoutMs);
|
|
311
325
|
// Handle abort signal
|
|
312
326
|
const onAbort = () => {
|
|
313
327
|
killed = true;
|
|
314
328
|
abortedByUser = true;
|
|
315
|
-
|
|
329
|
+
killTree('SIGTERM');
|
|
316
330
|
};
|
|
317
331
|
ctx.abortSignal.addEventListener('abort', onAbort, { once: true });
|
|
318
332
|
// Emit last non-empty line to UI progress (throttled to avoid flooding)
|
package/package.json
CHANGED