npm - @blockrun/franklin - Versions diffs - 3.8.37 → 3.8.38 - Mend

@blockrun/franklin 3.8.37 → 3.8.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/agent/evaluator.d.ts +12 -0
package/dist/agent/evaluator.js +25 -14
package/dist/agent/llm.d.ts +25 -0
package/dist/agent/llm.js +8 -2
package/dist/agent/loop.js +29 -2
package/package.json +1 -1

package/dist/agent/evaluator.d.ts CHANGED Viewed

@@ -71,5 +71,17 @@ export declare function renderGroundingFollowup(result: GroundingResult): string
  * Intentionally terse: the agent already has the original question in
  * history; we only need to name the gap + the tools to use.
  */
+/**
+ * Pull the tool names the evaluator suggested out of its issue lines.
+ * Issue lines look like:
+ *   Claim: "..." → missing tool: WebSearch
+ *   Refusal: "..." → should have called: TradingMarket
+ *   ... → missing tool: WebSearch (or any distance calculation tool)
+ *
+ * Returns first-token-of-each-comma/pipe-segment names, deduplicated.
+ * Used by both the retry instruction (to name them in prose) and the
+ * loop's tool_choice selection (to pin the next request to a tool).
+ */
+export declare function extractMissingToolNames(result: GroundingResult): string[];
 export declare function buildGroundingRetryInstruction(result: GroundingResult, originalUserQuestion: string): string;
 export type { CapabilityHandler };

package/dist/agent/evaluator.js CHANGED Viewed

@@ -307,24 +307,35 @@ export function renderGroundingFollowup(result) {
  * Intentionally terse: the agent already has the original question in
  * history; we only need to name the gap + the tools to use.
  */
-export function buildGroundingRetryInstruction(result, originalUserQuestion) {
-    // Pull the named missing tools out of the evaluator's issue list so we
-    // can name them in the imperative. The evaluator outputs lines like
-    //   Claim: "..." → missing tool: WebSearch
-    // grab the bit after "missing tool:" / "should have called:".
-    const namedTools = new Set();
+/**
+ * Pull the tool names the evaluator suggested out of its issue lines.
+ * Issue lines look like:
+ *   Claim: "..." → missing tool: WebSearch
+ *   Refusal: "..." → should have called: TradingMarket
+ *   ... → missing tool: WebSearch (or any distance calculation tool)
+ *
+ * Returns first-token-of-each-comma/pipe-segment names, deduplicated.
+ * Used by both the retry instruction (to name them in prose) and the
+ * loop's tool_choice selection (to pin the next request to a tool).
+ */
+export function extractMissingToolNames(result) {
+    const names = new Set();
     for (const issue of result.issues) {
         const m = issue.match(/(?:missing tool|should have called):\s*([A-Za-z][\w| ,/-]*)/i);
-        if (m) {
-            for (const tok of m[1].split(/[|,/]/)) {
-                const t = tok.trim().split(/\s+/)[0];
-                if (t && t !== '...' && t !== '(or')
-                    namedTools.add(t);
-            }
+        if (!m)
+            continue;
+        for (const tok of m[1].split(/[|,/]/)) {
+            const t = tok.trim().split(/\s+/)[0];
+            if (t && t !== '...' && t !== '(or' && t !== '(any')
+                names.add(t);
         }
     }
-    const toolList = namedTools.size > 0
-        ? Array.from(namedTools).join(', ')
+    return Array.from(names);
+}
+export function buildGroundingRetryInstruction(result, originalUserQuestion) {
+    const namedTools = extractMissingToolNames(result);
+    const toolList = namedTools.length > 0
+        ? namedTools.join(', ')
         : '(see the missing-tool fields in the issues above)';
     const lines = [
         '[GROUNDING CHECK FAILED — RETRY ROUND]',

package/dist/agent/llm.d.ts CHANGED Viewed

@@ -5,6 +5,30 @@
  */
 import { type Chain } from '../config.js';
 import type { Dialogue, CapabilityDefinition, ContentPart, CapabilityInvocation } from './types.js';
+/**
+ * Anthropic-compatible tool_choice. Forwarded as-is through the proxy and on
+ * to the backend (Anthropic / OpenAI / Gemini gateways translate as needed).
+ *
+ * - `auto`  — model decides (default if omitted)
+ * - `any`   — must call SOME tool, model picks which
+ * - `tool`  — must call the specifically named tool
+ * - `none`  — must not call any tool
+ *
+ * Used by the grounding-retry path in `loop.ts`: when the evaluator catches
+ * an ungrounded answer that should have invoked tools, the next round sets
+ * `tool_choice` to force tool use rather than relying on a soft instruction
+ * the model can defy by fabricating citations.
+ */
+export type ToolChoice = {
+    type: 'auto';
+} | {
+    type: 'any';
+} | {
+    type: 'tool';
+    name: string;
+} | {
+    type: 'none';
+};
 export interface ModelRequest {
     model: string;
     messages: Dialogue[];
@@ -13,6 +37,7 @@ export interface ModelRequest {
     max_tokens?: number;
     stream?: boolean;
     temperature?: number;
+    tool_choice?: ToolChoice;
 }
 export interface StreamChunk {
     kind: 'content_block_start' | 'content_block_delta' | 'content_block_stop' | 'message_start' | 'message_delta' | 'message_stop' | 'ping' | 'error';

package/dist/agent/llm.js CHANGED Viewed

@@ -15,12 +15,12 @@ function parseTimeoutEnv(name) {
 function getModelRequestTimeoutMs() {
     return (parseTimeoutEnv('FRANKLIN_MODEL_REQUEST_TIMEOUT_MS') ??
         parseTimeoutEnv('FRANKLIN_MODEL_IDLE_TIMEOUT_MS') ??
-        45_000);
+        8_000);
 }
 function getModelStreamIdleTimeoutMs() {
     return (parseTimeoutEnv('FRANKLIN_MODEL_STREAM_IDLE_TIMEOUT_MS') ??
         parseTimeoutEnv('FRANKLIN_MODEL_IDLE_TIMEOUT_MS') ??
-        90_000);
+        25_000);
 }
 function linkAbortSignal(parent, child) {
     if (!parent)
@@ -273,6 +273,12 @@ export class ModelClient {
         const isGLM = request.model.startsWith('zai/') || request.model.includes('glm');
         // Build the request payload, injecting model-specific optimizations
         let requestPayload = { ...request, stream: true };
+        // Safety: tool_choice without tools causes upstream 400. Strip rather
+        // than reject so callers don't have to coordinate the two fields.
+        if (requestPayload['tool_choice'] !== undefined &&
+            (!Array.isArray(requestPayload['tools']) || requestPayload['tools'].length === 0)) {
+            delete requestPayload['tool_choice'];
+        }
         // ── GLM-specific optimizations ───────────────────────────────────────────
         // GLM models work best with temperature=0.8 per official zai spec.
         // Enable thinking mode only for explicit reasoning variants (-thinking-).

package/dist/agent/loop.js CHANGED Viewed

@@ -25,7 +25,7 @@ import { routeRequestAsync, resolveTierToModel, parseRoutingProfile } from '../r
 import { recordOutcome } from '../router/local-elo.js';
 import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
 import { shouldVerify, runVerification } from './verification.js';
-import { shouldCheckGrounding, checkGrounding, renderGroundingFollowup, buildGroundingRetryInstruction, } from './evaluator.js';
+import { shouldCheckGrounding, checkGrounding, renderGroundingFollowup, buildGroundingRetryInstruction, extractMissingToolNames, } from './evaluator.js';
 import { augmentUserMessage, prefetchForIntent } from './intent-prefetch.js';
 import { analyzeTurn } from './turn-analyzer.js';
 import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, loadSessionHistory, loadSessionMeta, } from '../session/storage.js';
@@ -464,6 +464,12 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
         // decide — avoids pathological loops, caps wall-clock cost.
         let groundingRetryCount = 0;
         const MAX_GROUNDING_RETRIES = 1;
+        // When the previous round failed grounding and we're retrying, force the
+        // model to actually call a tool this round instead of trusting it to
+        // comply with a soft instruction. Single-shot — cleared after attached.
+        // Set to `{ type: "tool", name: "X" }` if the evaluator named exactly
+        // one available tool, else `{ type: "any" }` so the model picks.
+        let forceToolChoiceNextRound = null;
         // ── Plan-then-execute state (per turn) ──
         let planActive = false;
         let planPlannerModel = '';
@@ -767,6 +773,11 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
             if (sanitized.length !== history.length) {
                 replaceHistory(history, sanitized);
             }
+            // Consume any pending forced tool_choice from the previous round's
+            // grounding-retry decision. `tool_choice` is dropped automatically in
+            // llm.ts if `tools` ended up empty, so it's safe to attach here.
+            const callToolChoice = forceToolChoiceNextRound;
+            forceToolChoiceNextRound = null;
             try {
                 const result = await client.complete({
                     model: resolvedModel,
@@ -775,6 +786,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                     tools: callToolDefs,
                     max_tokens: callMaxTokens,
                     stream: true,
+                    ...(callToolChoice ? { tool_choice: callToolChoice } : {}),
                 }, abort.signal,
                 // Start concurrent tools as soon as their input is fully received
                 (tool) => streamExec.onToolReceived(tool),
@@ -1144,9 +1156,24 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                             const feedbackMsg = { role: 'user', content: retryMsg };
                             history.push(feedbackMsg);
                             persistSessionMessage(feedbackMsg);
+                            // Hard enforcement: set tool_choice so the model can't fabricate
+                            // citations in lieu of running tools (the round-2 failure mode
+                            // from the Tampa→Miami log). If the evaluator named exactly one
+                            // available tool, pin to it; otherwise force "any" tool use.
+                            const namedTools = extractMissingToolNames(gResult);
+                            const availableNames = new Set(buildCallToolDefs().map(t => t.name));
+                            const matched = namedTools.filter(n => availableNames.has(n));
+                            if (matched.length === 1) {
+                                forceToolChoiceNextRound = { type: 'tool', name: matched[0] };
+                            }
+                            else if (availableNames.size > 0) {
+                                forceToolChoiceNextRound = { type: 'any' };
+                            }
                             onEvent({
                                 kind: 'text_delta',
-                                text: '\n\n*Ungrounded claims detected — retrying with required tool calls...*\n\n',
+                                text: forceToolChoiceNextRound
+                                    ? `\n\n*Ungrounded claims detected — forcing tool use (${forceToolChoiceNextRound.type === 'tool' ? forceToolChoiceNextRound.name : 'any'}) and retrying...*\n\n`
+                                    : '\n\n*Ungrounded claims detected — retrying with required tool calls...*\n\n',
                             });
                             continue; // Re-enter outer loop — generator will produce a new response.
                         }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/franklin",
-  "version": "3.8.37",
+  "version": "3.8.38",
   "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
   "type": "module",
   "exports": {