@blockrun/franklin 3.8.37 → 3.8.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -71,5 +71,17 @@ export declare function renderGroundingFollowup(result: GroundingResult): string
71
71
  * Intentionally terse: the agent already has the original question in
72
72
  * history; we only need to name the gap + the tools to use.
73
73
  */
74
+ /**
75
+ * Pull the tool names the evaluator suggested out of its issue lines.
76
+ * Issue lines look like:
77
+ * Claim: "..." → missing tool: WebSearch
78
+ * Refusal: "..." → should have called: TradingMarket
79
+ * ... → missing tool: WebSearch (or any distance calculation tool)
80
+ *
81
+ * Returns first-token-of-each-comma/pipe-segment names, deduplicated.
82
+ * Used by both the retry instruction (to name them in prose) and the
83
+ * loop's tool_choice selection (to pin the next request to a tool).
84
+ */
85
+ export declare function extractMissingToolNames(result: GroundingResult): string[];
74
86
  export declare function buildGroundingRetryInstruction(result: GroundingResult, originalUserQuestion: string): string;
75
87
  export type { CapabilityHandler };
@@ -307,24 +307,35 @@ export function renderGroundingFollowup(result) {
307
307
  * Intentionally terse: the agent already has the original question in
308
308
  * history; we only need to name the gap + the tools to use.
309
309
  */
310
- export function buildGroundingRetryInstruction(result, originalUserQuestion) {
311
- // Pull the named missing tools out of the evaluator's issue list so we
312
- // can name them in the imperative. The evaluator outputs lines like
313
- // Claim: "..." → missing tool: WebSearch
314
- // grab the bit after "missing tool:" / "should have called:".
315
- const namedTools = new Set();
310
+ /**
311
+ * Pull the tool names the evaluator suggested out of its issue lines.
312
+ * Issue lines look like:
313
+ * Claim: "..." → missing tool: WebSearch
314
+ * Refusal: "..." should have called: TradingMarket
315
+ * ... missing tool: WebSearch (or any distance calculation tool)
316
+ *
317
+ * Returns first-token-of-each-comma/pipe-segment names, deduplicated.
318
+ * Used by both the retry instruction (to name them in prose) and the
319
+ * loop's tool_choice selection (to pin the next request to a tool).
320
+ */
321
+ export function extractMissingToolNames(result) {
322
+ const names = new Set();
316
323
  for (const issue of result.issues) {
317
324
  const m = issue.match(/(?:missing tool|should have called):\s*([A-Za-z][\w| ,/-]*)/i);
318
- if (m) {
319
- for (const tok of m[1].split(/[|,/]/)) {
320
- const t = tok.trim().split(/\s+/)[0];
321
- if (t && t !== '...' && t !== '(or')
322
- namedTools.add(t);
323
- }
325
+ if (!m)
326
+ continue;
327
+ for (const tok of m[1].split(/[|,/]/)) {
328
+ const t = tok.trim().split(/\s+/)[0];
329
+ if (t && t !== '...' && t !== '(or' && t !== '(any')
330
+ names.add(t);
324
331
  }
325
332
  }
326
- const toolList = namedTools.size > 0
327
- ? Array.from(namedTools).join(', ')
333
+ return Array.from(names);
334
+ }
335
+ export function buildGroundingRetryInstruction(result, originalUserQuestion) {
336
+ const namedTools = extractMissingToolNames(result);
337
+ const toolList = namedTools.length > 0
338
+ ? namedTools.join(', ')
328
339
  : '(see the missing-tool fields in the issues above)';
329
340
  const lines = [
330
341
  '[GROUNDING CHECK FAILED — RETRY ROUND]',
@@ -5,6 +5,30 @@
5
5
  */
6
6
  import { type Chain } from '../config.js';
7
7
  import type { Dialogue, CapabilityDefinition, ContentPart, CapabilityInvocation } from './types.js';
8
+ /**
9
+ * Anthropic-compatible tool_choice. Forwarded as-is through the proxy and on
10
+ * to the backend (Anthropic / OpenAI / Gemini gateways translate as needed).
11
+ *
12
+ * - `auto` — model decides (default if omitted)
13
+ * - `any` — must call SOME tool, model picks which
14
+ * - `tool` — must call the specifically named tool
15
+ * - `none` — must not call any tool
16
+ *
17
+ * Used by the grounding-retry path in `loop.ts`: when the evaluator catches
18
+ * an ungrounded answer that should have invoked tools, the next round sets
19
+ * `tool_choice` to force tool use rather than relying on a soft instruction
20
+ * the model can defy by fabricating citations.
21
+ */
22
+ export type ToolChoice = {
23
+ type: 'auto';
24
+ } | {
25
+ type: 'any';
26
+ } | {
27
+ type: 'tool';
28
+ name: string;
29
+ } | {
30
+ type: 'none';
31
+ };
8
32
  export interface ModelRequest {
9
33
  model: string;
10
34
  messages: Dialogue[];
@@ -13,6 +37,7 @@ export interface ModelRequest {
13
37
  max_tokens?: number;
14
38
  stream?: boolean;
15
39
  temperature?: number;
40
+ tool_choice?: ToolChoice;
16
41
  }
17
42
  export interface StreamChunk {
18
43
  kind: 'content_block_start' | 'content_block_delta' | 'content_block_stop' | 'message_start' | 'message_delta' | 'message_stop' | 'ping' | 'error';
package/dist/agent/llm.js CHANGED
@@ -15,12 +15,12 @@ function parseTimeoutEnv(name) {
15
15
  function getModelRequestTimeoutMs() {
16
16
  return (parseTimeoutEnv('FRANKLIN_MODEL_REQUEST_TIMEOUT_MS') ??
17
17
  parseTimeoutEnv('FRANKLIN_MODEL_IDLE_TIMEOUT_MS') ??
18
- 45_000);
18
+ 8_000);
19
19
  }
20
20
  function getModelStreamIdleTimeoutMs() {
21
21
  return (parseTimeoutEnv('FRANKLIN_MODEL_STREAM_IDLE_TIMEOUT_MS') ??
22
22
  parseTimeoutEnv('FRANKLIN_MODEL_IDLE_TIMEOUT_MS') ??
23
- 90_000);
23
+ 25_000);
24
24
  }
25
25
  function linkAbortSignal(parent, child) {
26
26
  if (!parent)
@@ -273,6 +273,12 @@ export class ModelClient {
273
273
  const isGLM = request.model.startsWith('zai/') || request.model.includes('glm');
274
274
  // Build the request payload, injecting model-specific optimizations
275
275
  let requestPayload = { ...request, stream: true };
276
+ // Safety: tool_choice without tools causes upstream 400. Strip rather
277
+ // than reject so callers don't have to coordinate the two fields.
278
+ if (requestPayload['tool_choice'] !== undefined &&
279
+ (!Array.isArray(requestPayload['tools']) || requestPayload['tools'].length === 0)) {
280
+ delete requestPayload['tool_choice'];
281
+ }
276
282
  // ── GLM-specific optimizations ───────────────────────────────────────────
277
283
  // GLM models work best with temperature=0.8 per official zai spec.
278
284
  // Enable thinking mode only for explicit reasoning variants (-thinking-).
@@ -25,7 +25,7 @@ import { routeRequestAsync, resolveTierToModel, parseRoutingProfile } from '../r
25
25
  import { recordOutcome } from '../router/local-elo.js';
26
26
  import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
27
27
  import { shouldVerify, runVerification } from './verification.js';
28
- import { shouldCheckGrounding, checkGrounding, renderGroundingFollowup, buildGroundingRetryInstruction, } from './evaluator.js';
28
+ import { shouldCheckGrounding, checkGrounding, renderGroundingFollowup, buildGroundingRetryInstruction, extractMissingToolNames, } from './evaluator.js';
29
29
  import { augmentUserMessage, prefetchForIntent } from './intent-prefetch.js';
30
30
  import { analyzeTurn } from './turn-analyzer.js';
31
31
  import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, loadSessionHistory, loadSessionMeta, } from '../session/storage.js';
@@ -464,6 +464,12 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
464
464
  // decide — avoids pathological loops, caps wall-clock cost.
465
465
  let groundingRetryCount = 0;
466
466
  const MAX_GROUNDING_RETRIES = 1;
467
+ // When the previous round failed grounding and we're retrying, force the
468
+ // model to actually call a tool this round instead of trusting it to
469
+ // comply with a soft instruction. Single-shot — cleared after attached.
470
+ // Set to `{ type: "tool", name: "X" }` if the evaluator named exactly
471
+ // one available tool, else `{ type: "any" }` so the model picks.
472
+ let forceToolChoiceNextRound = null;
467
473
  // ── Plan-then-execute state (per turn) ──
468
474
  let planActive = false;
469
475
  let planPlannerModel = '';
@@ -767,6 +773,11 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
767
773
  if (sanitized.length !== history.length) {
768
774
  replaceHistory(history, sanitized);
769
775
  }
776
+ // Consume any pending forced tool_choice from the previous round's
777
+ // grounding-retry decision. `tool_choice` is dropped automatically in
778
+ // llm.ts if `tools` ended up empty, so it's safe to attach here.
779
+ const callToolChoice = forceToolChoiceNextRound;
780
+ forceToolChoiceNextRound = null;
770
781
  try {
771
782
  const result = await client.complete({
772
783
  model: resolvedModel,
@@ -775,6 +786,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
775
786
  tools: callToolDefs,
776
787
  max_tokens: callMaxTokens,
777
788
  stream: true,
789
+ ...(callToolChoice ? { tool_choice: callToolChoice } : {}),
778
790
  }, abort.signal,
779
791
  // Start concurrent tools as soon as their input is fully received
780
792
  (tool) => streamExec.onToolReceived(tool),
@@ -1144,9 +1156,24 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1144
1156
  const feedbackMsg = { role: 'user', content: retryMsg };
1145
1157
  history.push(feedbackMsg);
1146
1158
  persistSessionMessage(feedbackMsg);
1159
+ // Hard enforcement: set tool_choice so the model can't fabricate
1160
+ // citations in lieu of running tools (the round-2 failure mode
1161
+ // from the Tampa→Miami log). If the evaluator named exactly one
1162
+ // available tool, pin to it; otherwise force "any" tool use.
1163
+ const namedTools = extractMissingToolNames(gResult);
1164
+ const availableNames = new Set(buildCallToolDefs().map(t => t.name));
1165
+ const matched = namedTools.filter(n => availableNames.has(n));
1166
+ if (matched.length === 1) {
1167
+ forceToolChoiceNextRound = { type: 'tool', name: matched[0] };
1168
+ }
1169
+ else if (availableNames.size > 0) {
1170
+ forceToolChoiceNextRound = { type: 'any' };
1171
+ }
1147
1172
  onEvent({
1148
1173
  kind: 'text_delta',
1149
- text: '\n\n*Ungrounded claims detected — retrying with required tool calls...*\n\n',
1174
+ text: forceToolChoiceNextRound
1175
+ ? `\n\n*Ungrounded claims detected — forcing tool use (${forceToolChoiceNextRound.type === 'tool' ? forceToolChoiceNextRound.name : 'any'}) and retrying...*\n\n`
1176
+ : '\n\n*Ungrounded claims detected — retrying with required tool calls...*\n\n',
1150
1177
  });
1151
1178
  continue; // Re-enter outer loop — generator will produce a new response.
1152
1179
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.8.37",
3
+ "version": "3.8.38",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {