@blockrun/franklin 3.22.0 → 3.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,193 @@
1
+ /**
2
+ * Scavenge tool calls that the model emitted as text instead of via the
3
+ * structured tool_use channel. Ported from reasonix (MIT) and adapted to
4
+ * Franklin's Anthropic-shape `CapabilityInvocation` (parsed `input` object
5
+ * + synthetic id).
6
+ *
7
+ * Triggers we've actually seen:
8
+ * - DeepSeek R1 leaks tool-call JSON into `reasoning_content` and forgets
9
+ * to populate `tool_calls`. The text channel ends up with raw JSON like
10
+ * `{"name":"Read","arguments":{...}}`.
11
+ * - DeepSeek V3.1 sometimes emits its chat-template DSML markup
12
+ * (`<|DSML|invoke …>`) in the content channel.
13
+ * - Smaller OpenAI-compatible models (some Qwen / Llama variants behind
14
+ * the BlockRun gateway) leak the OpenAI tool-call shape inline.
15
+ *
16
+ * All three are recoverable. This module turns the leaked text back into
17
+ * a `CapabilityInvocation` so the agent loop doesn't waste a turn telling
18
+ * the model "you forgot to call a tool".
19
+ */
20
+ import { randomBytes } from 'node:crypto';
21
+ /** Bounds regex input — DSML matchers are O(n²) on adversarial input. */
22
+ const MAX_SCAVENGE_INPUT = 100 * 1024;
23
+ export function scavengeToolCalls(text, opts) {
24
+ if (!text)
25
+ return { calls: [], notes: [] };
26
+ if (text.length > MAX_SCAVENGE_INPUT) {
27
+ return {
28
+ calls: [],
29
+ notes: [`scavenge skipped: input too large (${text.length} chars)`],
30
+ };
31
+ }
32
+ const max = opts.maxCalls ?? 4;
33
+ const notes = [];
34
+ const out = [];
35
+ // Pattern A — DSML invoke blocks (DeepSeek chat-template markup leaked
36
+ // into the content channel).
37
+ for (const invoke of iterateDsmlInvokes(text)) {
38
+ if (out.length >= max)
39
+ break;
40
+ if (!opts.allowedNames.has(invoke.name))
41
+ continue;
42
+ out.push(makeInvocation(invoke.name, invoke.args));
43
+ notes.push(`scavenged DSML call: ${invoke.name}`);
44
+ }
45
+ // Pattern B — raw JSON objects in the three canonical shapes. Strip
46
+ // DSML blocks first so their parameter payloads don't get re-scavenged
47
+ // as standalone JSON calls.
48
+ const nonDsml = stripDsmlBlocks(text);
49
+ for (const candidate of iterateJsonObjects(nonDsml)) {
50
+ if (out.length >= max)
51
+ break;
52
+ const call = coerceToInvocation(candidate, opts.allowedNames);
53
+ if (call) {
54
+ out.push(call);
55
+ notes.push(`scavenged call: ${call.name}`);
56
+ }
57
+ }
58
+ return { calls: out, notes };
59
+ }
60
+ function stripDsmlBlocks(text) {
61
+ let out = text;
62
+ out = out.replace(/<[||]DSML[||]function_calls>[\s\S]*?<\/?[||]DSML[||]function_calls>/g, '');
63
+ out = out.replace(/<[||]DSML[||]invoke\s+[^>]*>[\s\S]*?<\/[||]DSML[||]invoke>/g, '');
64
+ return out;
65
+ }
66
+ function* iterateDsmlInvokes(text) {
67
+ // `|` (U+FF5C) in practice; `|` (ASCII) as a fallback variant.
68
+ const INVOKE_RE = /<[||]DSML[||]invoke\s+name="([^"]+)">([\s\S]*?)<\/[||]DSML[||]invoke>/g;
69
+ for (const match of text.matchAll(INVOKE_RE)) {
70
+ const name = match[1];
71
+ const body = match[2];
72
+ if (!name || body === undefined)
73
+ continue;
74
+ yield { name, args: parseDsmlParameters(body) };
75
+ }
76
+ }
77
+ function parseDsmlParameters(body) {
78
+ const PARAM_RE = /<[||]DSML[||]parameter\s+name="([^"]+)"(?:\s+string="(true|false)")?\s*>([\s\S]*?)<\/[||]DSML[||]parameter>/g;
79
+ const args = {};
80
+ for (const m of body.matchAll(PARAM_RE)) {
81
+ const key = m[1];
82
+ const stringFlag = m[2];
83
+ const raw = (m[3] ?? '').trim();
84
+ if (!key)
85
+ continue;
86
+ if (stringFlag === 'false') {
87
+ try {
88
+ args[key] = JSON.parse(raw);
89
+ continue;
90
+ }
91
+ catch {
92
+ // Fall through — preserve literal so info isn't lost.
93
+ }
94
+ }
95
+ args[key] = raw;
96
+ }
97
+ return args;
98
+ }
99
+ function* iterateJsonObjects(text) {
100
+ for (let i = 0; i < text.length; i++) {
101
+ if (text[i] !== '{')
102
+ continue;
103
+ let depth = 0;
104
+ let inString = false;
105
+ let escaped = false;
106
+ for (let j = i; j < text.length; j++) {
107
+ const c = text[j];
108
+ if (escaped) {
109
+ escaped = false;
110
+ continue;
111
+ }
112
+ if (inString) {
113
+ if (c === '\\') {
114
+ escaped = true;
115
+ continue;
116
+ }
117
+ if (c === '"')
118
+ inString = false;
119
+ continue;
120
+ }
121
+ if (c === '"')
122
+ inString = true;
123
+ else if (c === '{')
124
+ depth++;
125
+ else if (c === '}') {
126
+ depth--;
127
+ if (depth === 0) {
128
+ yield text.slice(i, j + 1);
129
+ i = j;
130
+ break;
131
+ }
132
+ }
133
+ }
134
+ }
135
+ }
136
+ function coerceToInvocation(candidateJson, allowedNames) {
137
+ let parsed;
138
+ try {
139
+ parsed = JSON.parse(candidateJson);
140
+ }
141
+ catch {
142
+ return null;
143
+ }
144
+ if (!parsed || typeof parsed !== 'object')
145
+ return null;
146
+ const obj = parsed;
147
+ // Pattern 1 — { name, arguments } (Anthropic-ish flat form).
148
+ if (typeof obj.name === 'string' && allowedNames.has(obj.name)) {
149
+ return makeInvocation(obj.name, normalizeArgs(obj.arguments));
150
+ }
151
+ // Pattern 2 — OpenAI-style { type: "function", function: { name, arguments } }.
152
+ if (obj.type === 'function' &&
153
+ obj.function &&
154
+ typeof obj.function === 'object') {
155
+ const fn = obj.function;
156
+ if (typeof fn.name === 'string' && allowedNames.has(fn.name)) {
157
+ return makeInvocation(fn.name, normalizeArgs(fn.arguments));
158
+ }
159
+ }
160
+ // Pattern 3 — { tool_name, tool_args } (R1 free-form variant).
161
+ if (typeof obj.tool_name === 'string' && allowedNames.has(obj.tool_name)) {
162
+ return makeInvocation(obj.tool_name, normalizeArgs(obj.tool_args));
163
+ }
164
+ return null;
165
+ }
166
+ function normalizeArgs(raw) {
167
+ if (raw == null)
168
+ return {};
169
+ if (typeof raw === 'string') {
170
+ try {
171
+ const parsed = JSON.parse(raw);
172
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
173
+ return parsed;
174
+ }
175
+ return {};
176
+ }
177
+ catch {
178
+ return {};
179
+ }
180
+ }
181
+ if (typeof raw === 'object' && !Array.isArray(raw)) {
182
+ return raw;
183
+ }
184
+ return {};
185
+ }
186
+ function makeInvocation(name, input) {
187
+ return {
188
+ type: 'tool_use',
189
+ id: `toolu_repair_${randomBytes(6).toString('hex')}`,
190
+ name,
191
+ input,
192
+ };
193
+ }
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Truncated-JSON repair — ported from reasonix (MIT). Format-agnostic:
3
+ * works on any raw JSON argument string. Common trigger: model hits
4
+ * max_tokens mid-structure; the last useful argument is half-emitted.
5
+ *
6
+ * Local-only — never makes a continuation call. The agent loop owns
7
+ * budgets; this just patches what we have so the dispatcher can either
8
+ * parse it or report a clean fallback.
9
+ */
10
+ export interface TruncationRepairResult {
11
+ repaired: string;
12
+ changed: boolean;
13
+ notes: string[];
14
+ /** True when all repair attempts failed and the result fell back to "{}". */
15
+ fallback: boolean;
16
+ }
17
+ export declare function repairTruncatedJson(input: string): TruncationRepairResult;
@@ -0,0 +1,94 @@
1
+ /**
2
+ * Truncated-JSON repair — ported from reasonix (MIT). Format-agnostic:
3
+ * works on any raw JSON argument string. Common trigger: model hits
4
+ * max_tokens mid-structure; the last useful argument is half-emitted.
5
+ *
6
+ * Local-only — never makes a continuation call. The agent loop owns
7
+ * budgets; this just patches what we have so the dispatcher can either
8
+ * parse it or report a clean fallback.
9
+ */
10
+ export function repairTruncatedJson(input) {
11
+ const notes = [];
12
+ if (!input || !input.trim()) {
13
+ return {
14
+ repaired: '{}',
15
+ changed: input !== '{}',
16
+ notes: ['empty input → {}'],
17
+ fallback: false,
18
+ };
19
+ }
20
+ // Fast path: already valid JSON.
21
+ try {
22
+ JSON.parse(input);
23
+ return { repaired: input, changed: false, notes: [], fallback: false };
24
+ }
25
+ catch {
26
+ /* fall through to repair */
27
+ }
28
+ const stack = [];
29
+ let escaped = false;
30
+ let inString = false;
31
+ let lastSignificant = -1;
32
+ for (let i = 0; i < input.length; i++) {
33
+ const c = input[i];
34
+ if (!/\s/.test(c))
35
+ lastSignificant = i;
36
+ if (escaped) {
37
+ escaped = false;
38
+ continue;
39
+ }
40
+ if (inString) {
41
+ if (c === '\\') {
42
+ escaped = true;
43
+ continue;
44
+ }
45
+ if (c === '"') {
46
+ inString = false;
47
+ stack.pop();
48
+ }
49
+ continue;
50
+ }
51
+ if (c === '"') {
52
+ inString = true;
53
+ stack.push('"');
54
+ continue;
55
+ }
56
+ if (c === '{' || c === '[')
57
+ stack.push(c);
58
+ else if (c === '}' || c === ']')
59
+ stack.pop();
60
+ }
61
+ let s = input.slice(0, lastSignificant + 1);
62
+ if (/,$/.test(s)) {
63
+ s = s.replace(/,$/, '');
64
+ notes.push('trimmed trailing comma');
65
+ }
66
+ if (/"\s*:\s*$/.test(s)) {
67
+ s += ' null';
68
+ notes.push('filled dangling key with null');
69
+ }
70
+ if (inString) {
71
+ s += '"';
72
+ stack.pop();
73
+ notes.push('closed unterminated string');
74
+ }
75
+ while (stack.length > 0) {
76
+ const top = stack.pop();
77
+ if (top === '{')
78
+ s += '}';
79
+ else if (top === '[')
80
+ s += ']';
81
+ else if (top === '"')
82
+ s += '"';
83
+ }
84
+ try {
85
+ JSON.parse(s);
86
+ return { repaired: s, changed: s !== input, notes, fallback: false };
87
+ }
88
+ catch (err) {
89
+ const preview = input.length <= 500 ? input : `${input.slice(0, 500)} …[+${input.length - 500} chars]`;
90
+ notes.push(`fallback to {}: ${err.message}`);
91
+ notes.push(`unrecoverable truncation — original args preview: ${preview}`);
92
+ return { repaired: '{}', changed: true, notes, fallback: true };
93
+ }
94
+ }
@@ -191,10 +191,11 @@ export function estimateHistoryTokens(history) {
191
191
  */
192
192
  const MODEL_CONTEXT_WINDOWS = {
193
193
  // Anthropic. The BlockRun gateway model entry advertises 1M context for
194
- // Opus 4.7, but the 1M beta header may not be enabled at the gateway
194
+ // Opus 4.8 / 4.7, but the 1M beta header may not be enabled at the gateway
195
195
  // edge yet — sending more than 200k without it 413s. Keep 200k as the
196
196
  // safe Franklin baseline; bump to 1_000_000 in a separate commit once
197
197
  // a real >200k call has been verified end-to-end.
198
+ 'anthropic/claude-opus-4.8': 200_000,
198
199
  'anthropic/claude-opus-4.7': 200_000,
199
200
  'anthropic/claude-opus-4.6': 200_000,
200
201
  'anthropic/claude-sonnet-4.6': 200_000,
@@ -29,7 +29,7 @@ export async function initCommand(options) {
29
29
  ANTHROPIC_AUTH_TOKEN: 'x402-proxy-handles-auth',
30
30
  ANTHROPIC_MODEL: 'blockrun/auto',
31
31
  ANTHROPIC_DEFAULT_SONNET_MODEL: 'anthropic/claude-sonnet-4.6',
32
- ANTHROPIC_DEFAULT_OPUS_MODEL: 'anthropic/claude-opus-4.7',
32
+ ANTHROPIC_DEFAULT_OPUS_MODEL: 'anthropic/claude-opus-4.8',
33
33
  ANTHROPIC_DEFAULT_HAIKU_MODEL: 'anthropic/claude-haiku-4.5-20251001',
34
34
  };
35
35
  fs.mkdirSync(path.dirname(CLAUDE_SETTINGS_FILE), { recursive: true });
@@ -516,7 +516,15 @@ async function runWithInkUI(agentConfig, model, workDir, version, walletInfo, on
516
516
  if (process.env.FRANKLIN_EXTRACT_ON_EXIT === '1') {
517
517
  runExitBackgroundTasks(sessionHistory, agentConfig).catch(() => { });
518
518
  }
519
- disconnectMcpServers().catch(() => { });
519
+ // Await MCP shutdown with a bounded timeout — previously fire-and-forget,
520
+ // which left stdio child processes alive and (combined with no explicit
521
+ // process.exit() below) was the root cause of the "I quit but the
522
+ // process is still running" report (audited 2026-05-28). A misbehaving
523
+ // MCP server must not be able to pin shutdown, so cap the wait at 2s.
524
+ await Promise.race([
525
+ disconnectMcpServers().catch(() => { }),
526
+ new Promise((r) => setTimeout(r, 2000)),
527
+ ]);
520
528
  // Session summary — delta vs. snapshot at session start
521
529
  try {
522
530
  const delta = statsDelta(startSnapshot);
@@ -547,6 +555,14 @@ async function runWithInkUI(agentConfig, model, workDir, version, walletInfo, on
547
555
  console.log(chalk.dim(' Latest: franklin --continue'));
548
556
  }
549
557
  console.log(chalk.dim('\nGoodbye.\n'));
558
+ // Explicit exit. Without this, lingering keep-alive sockets (bootstrap
559
+ // learnings importer, panel HTTP server, gateway client agents) and any
560
+ // FRANKLIN_EXTRACT_ON_EXIT background promise can hold the event loop
561
+ // open for seconds-to-minutes after the UI tears down — the user sees
562
+ // "Goodbye." but `ps` still shows the process, and a subsequent
563
+ // `franklin` invocation races with the zombie. Force a clean exit. Any
564
+ // explicit error paths above set process.exitCode = 1 — preserve it.
565
+ process.exit(process.exitCode ?? 0);
550
566
  }
551
567
  async function runExitBackgroundTasks(sessionHistory, agentConfig) {
552
568
  if (!sessionHistory || sessionHistory.length < 4)
@@ -631,6 +647,13 @@ async function runWithBasicUI(agentConfig, model, workDir, initialInput) {
631
647
  catch { /* stats unavailable */ }
632
648
  ui.printGoodbye();
633
649
  flushStats();
650
+ // Same explicit-exit reasoning as runWithInkUI — bounded MCP shutdown
651
+ // then hard exit so background promises can't pin the process alive.
652
+ await Promise.race([
653
+ disconnectMcpServers().catch(() => { }),
654
+ new Promise((r) => setTimeout(r, 2000)),
655
+ ]);
656
+ process.exit(process.exitCode ?? 0);
634
657
  }
635
658
  // ─── Panel auto-start ──────────────────────────────────────────────────────
636
659
  async function startPanelBackground(startPort) {
package/dist/pricing.js CHANGED
@@ -27,6 +27,7 @@ export const MODEL_PRICING = {
27
27
  'nvidia/mistral-large-3-675b': { input: 0, output: 0 },
28
28
  // Anthropic
29
29
  'anthropic/claude-sonnet-4.6': { input: 3.0, output: 15.0 },
30
+ 'anthropic/claude-opus-4.8': { input: 5.0, output: 25.0 },
30
31
  'anthropic/claude-opus-4.7': { input: 5.0, output: 25.0 },
31
32
  'anthropic/claude-opus-4.6': { input: 5.0, output: 25.0 },
32
33
  'anthropic/claude-haiku-4.5': { input: 1.0, output: 5.0 },
@@ -90,7 +91,7 @@ export const MODEL_PRICING = {
90
91
  'zai/glm-5.1-turbo': { input: 0, output: 0, perCall: 0.001 }, // client alias for zai/glm-5-turbo
91
92
  };
92
93
  /** Opus pricing for savings calculations — tracks the current flagship. */
93
- export const OPUS_PRICING = MODEL_PRICING['anthropic/claude-opus-4.7'];
94
+ export const OPUS_PRICING = MODEL_PRICING['anthropic/claude-opus-4.8'];
94
95
  /**
95
96
  * Estimate cost in USD for a request.
96
97
  * Falls back to $2/$10 per 1M for unknown models.
@@ -100,7 +100,8 @@ const MODEL_SHORTCUTS = {
100
100
  sonnet: 'anthropic/claude-sonnet-4.6',
101
101
  claude: 'anthropic/claude-sonnet-4.6',
102
102
  'sonnet-4.6': 'anthropic/claude-sonnet-4.6',
103
- opus: 'anthropic/claude-opus-4.7',
103
+ opus: 'anthropic/claude-opus-4.8',
104
+ 'opus-4.8': 'anthropic/claude-opus-4.8',
104
105
  'opus-4.7': 'anthropic/claude-opus-4.7',
105
106
  'opus-4.6': 'anthropic/claude-opus-4.6',
106
107
  haiku: 'anthropic/claude-haiku-4.5-20251001',
@@ -55,14 +55,15 @@ const AUTO_TIERS = {
55
55
  // Hard tasks — multi-file refactors, ambiguous specs, dense reasoning
56
56
  // chains — still go to Opus. V4 Pro is great but not a Sonnet/Opus
57
57
  // replacement at the high end of difficulty per recent agent-bench runs.
58
- primary: 'anthropic/claude-opus-4.7',
59
- fallback: ['openai/gpt-5.5', 'anthropic/claude-sonnet-4.6', 'deepseek/deepseek-v4-pro'],
58
+ primary: 'anthropic/claude-opus-4.8',
59
+ fallback: ['anthropic/claude-opus-4.7', 'openai/gpt-5.5', 'anthropic/claude-sonnet-4.6', 'deepseek/deepseek-v4-pro'],
60
60
  },
61
61
  REASONING: {
62
- // Opus 4.7: step-change improvement in agentic coding over 4.6 per
63
- // Anthropic. 4.6 stays in the fallback chain in case of rollout delays.
64
- primary: 'anthropic/claude-opus-4.7',
62
+ // Opus 4.8: latest flagship, most capable for agentic coding. 4.7 and 4.6
63
+ // stay in the fallback chain in case of rollout delays.
64
+ primary: 'anthropic/claude-opus-4.8',
65
65
  fallback: [
66
+ 'anthropic/claude-opus-4.7',
66
67
  'anthropic/claude-opus-4.6',
67
68
  'openai/o3',
68
69
  'deepseek/deepseek-v4-pro',
@@ -20,6 +20,7 @@
20
20
  */
21
21
  const VISION_MODELS = new Set([
22
22
  // Anthropic — native vision across the line
23
+ 'anthropic/claude-opus-4.8',
23
24
  'anthropic/claude-opus-4.7',
24
25
  'anthropic/claude-opus-4.6',
25
26
  'anthropic/claude-sonnet-4.6',
@@ -19,7 +19,8 @@ export const MODEL_SHORTCUTS = {
19
19
  sonnet: 'anthropic/claude-sonnet-4.6',
20
20
  claude: 'anthropic/claude-sonnet-4.6',
21
21
  'sonnet-4.6': 'anthropic/claude-sonnet-4.6',
22
- opus: 'anthropic/claude-opus-4.7',
22
+ opus: 'anthropic/claude-opus-4.8',
23
+ 'opus-4.8': 'anthropic/claude-opus-4.8',
23
24
  'opus-4.7': 'anthropic/claude-opus-4.7',
24
25
  'opus-4.6': 'anthropic/claude-opus-4.6',
25
26
  haiku: 'anthropic/claude-haiku-4.5-20251001',
@@ -149,7 +150,7 @@ export const PICKER_CATEGORIES = [
149
150
  // free-tier entries and v3.9.2 used to retire Kimi K2.5.
150
151
  category: '✨ Premium frontier',
151
152
  models: [
152
- { id: 'anthropic/claude-opus-4.7', shortcut: 'opus', label: 'Claude Opus 4.7', price: '$5/$25', highlight: true },
153
+ { id: 'anthropic/claude-opus-4.8', shortcut: 'opus', label: 'Claude Opus 4.8', price: '$5/$25', highlight: true },
153
154
  { id: 'anthropic/claude-sonnet-4.6', shortcut: 'sonnet', label: 'Claude Sonnet 4.6', price: '$3/$15' },
154
155
  { id: 'openai/gpt-5.5', shortcut: 'gpt', label: 'GPT-5.5', price: '$5/$30', highlight: true },
155
156
  { id: 'google/gemini-3.1-pro', shortcut: 'gemini-3', label: 'Gemini 3.1 Pro', price: '$2/$12' },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.22.0",
3
+ "version": "3.23.1",
4
4
  "description": "Franklin Agent — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {
@@ -31,7 +31,7 @@
31
31
  "build": "tsc && node scripts/copy-plugin-assets.mjs",
32
32
  "dev": "tsc --watch",
33
33
  "start": "node dist/index.js",
34
- "test": "npm run build && node --test --test-reporter=spec test/local.mjs test/skills.local.mjs",
34
+ "test": "npm run build && node --test --test-reporter=spec test/local.mjs test/skills.local.mjs test/repair.mjs",
35
35
  "test:e2e": "npm run build && node --test --test-reporter=spec test/e2e.mjs",
36
36
  "test:free-models": "npm run build && node --test --test-reporter=spec test/free-model-matrix.mjs",
37
37
  "test:all": "npm run test && npm run test:e2e",