@blockrun/franklin 3.8.37 → 3.8.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/evaluator.d.ts +12 -0
- package/dist/agent/evaluator.js +25 -14
- package/dist/agent/llm.d.ts +25 -0
- package/dist/agent/llm.js +8 -2
- package/dist/agent/loop.js +29 -2
- package/package.json +1 -1
|
@@ -71,5 +71,17 @@ export declare function renderGroundingFollowup(result: GroundingResult): string
|
|
|
71
71
|
* Intentionally terse: the agent already has the original question in
|
|
72
72
|
* history; we only need to name the gap + the tools to use.
|
|
73
73
|
*/
|
|
74
|
+
/**
|
|
75
|
+
* Pull the tool names the evaluator suggested out of its issue lines.
|
|
76
|
+
* Issue lines look like:
|
|
77
|
+
* Claim: "..." → missing tool: WebSearch
|
|
78
|
+
* Refusal: "..." → should have called: TradingMarket
|
|
79
|
+
* ... → missing tool: WebSearch (or any distance calculation tool)
|
|
80
|
+
*
|
|
81
|
+
* Returns first-token-of-each-comma/pipe-segment names, deduplicated.
|
|
82
|
+
* Used by both the retry instruction (to name them in prose) and the
|
|
83
|
+
* loop's tool_choice selection (to pin the next request to a tool).
|
|
84
|
+
*/
|
|
85
|
+
export declare function extractMissingToolNames(result: GroundingResult): string[];
|
|
74
86
|
export declare function buildGroundingRetryInstruction(result: GroundingResult, originalUserQuestion: string): string;
|
|
75
87
|
export type { CapabilityHandler };
|
package/dist/agent/evaluator.js
CHANGED
|
@@ -307,24 +307,35 @@ export function renderGroundingFollowup(result) {
|
|
|
307
307
|
* Intentionally terse: the agent already has the original question in
|
|
308
308
|
* history; we only need to name the gap + the tools to use.
|
|
309
309
|
*/
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
310
|
+
/**
|
|
311
|
+
* Pull the tool names the evaluator suggested out of its issue lines.
|
|
312
|
+
* Issue lines look like:
|
|
313
|
+
* Claim: "..." → missing tool: WebSearch
|
|
314
|
+
* Refusal: "..." → should have called: TradingMarket
|
|
315
|
+
* ... → missing tool: WebSearch (or any distance calculation tool)
|
|
316
|
+
*
|
|
317
|
+
* Returns first-token-of-each-comma/pipe-segment names, deduplicated.
|
|
318
|
+
* Used by both the retry instruction (to name them in prose) and the
|
|
319
|
+
* loop's tool_choice selection (to pin the next request to a tool).
|
|
320
|
+
*/
|
|
321
|
+
export function extractMissingToolNames(result) {
|
|
322
|
+
const names = new Set();
|
|
316
323
|
for (const issue of result.issues) {
|
|
317
324
|
const m = issue.match(/(?:missing tool|should have called):\s*([A-Za-z][\w| ,/-]*)/i);
|
|
318
|
-
if (m)
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
325
|
+
if (!m)
|
|
326
|
+
continue;
|
|
327
|
+
for (const tok of m[1].split(/[|,/]/)) {
|
|
328
|
+
const t = tok.trim().split(/\s+/)[0];
|
|
329
|
+
if (t && t !== '...' && t !== '(or' && t !== '(any')
|
|
330
|
+
names.add(t);
|
|
324
331
|
}
|
|
325
332
|
}
|
|
326
|
-
|
|
327
|
-
|
|
333
|
+
return Array.from(names);
|
|
334
|
+
}
|
|
335
|
+
export function buildGroundingRetryInstruction(result, originalUserQuestion) {
|
|
336
|
+
const namedTools = extractMissingToolNames(result);
|
|
337
|
+
const toolList = namedTools.length > 0
|
|
338
|
+
? namedTools.join(', ')
|
|
328
339
|
: '(see the missing-tool fields in the issues above)';
|
|
329
340
|
const lines = [
|
|
330
341
|
'[GROUNDING CHECK FAILED — RETRY ROUND]',
|
package/dist/agent/llm.d.ts
CHANGED
|
@@ -5,6 +5,30 @@
|
|
|
5
5
|
*/
|
|
6
6
|
import { type Chain } from '../config.js';
|
|
7
7
|
import type { Dialogue, CapabilityDefinition, ContentPart, CapabilityInvocation } from './types.js';
|
|
8
|
+
/**
|
|
9
|
+
* Anthropic-compatible tool_choice. Forwarded as-is through the proxy and on
|
|
10
|
+
* to the backend (Anthropic / OpenAI / Gemini gateways translate as needed).
|
|
11
|
+
*
|
|
12
|
+
* - `auto` — model decides (default if omitted)
|
|
13
|
+
* - `any` — must call SOME tool, model picks which
|
|
14
|
+
* - `tool` — must call the specifically named tool
|
|
15
|
+
* - `none` — must not call any tool
|
|
16
|
+
*
|
|
17
|
+
* Used by the grounding-retry path in `loop.ts`: when the evaluator catches
|
|
18
|
+
* an ungrounded answer that should have invoked tools, the next round sets
|
|
19
|
+
* `tool_choice` to force tool use rather than relying on a soft instruction
|
|
20
|
+
* the model can defy by fabricating citations.
|
|
21
|
+
*/
|
|
22
|
+
export type ToolChoice = {
|
|
23
|
+
type: 'auto';
|
|
24
|
+
} | {
|
|
25
|
+
type: 'any';
|
|
26
|
+
} | {
|
|
27
|
+
type: 'tool';
|
|
28
|
+
name: string;
|
|
29
|
+
} | {
|
|
30
|
+
type: 'none';
|
|
31
|
+
};
|
|
8
32
|
export interface ModelRequest {
|
|
9
33
|
model: string;
|
|
10
34
|
messages: Dialogue[];
|
|
@@ -13,6 +37,7 @@ export interface ModelRequest {
|
|
|
13
37
|
max_tokens?: number;
|
|
14
38
|
stream?: boolean;
|
|
15
39
|
temperature?: number;
|
|
40
|
+
tool_choice?: ToolChoice;
|
|
16
41
|
}
|
|
17
42
|
export interface StreamChunk {
|
|
18
43
|
kind: 'content_block_start' | 'content_block_delta' | 'content_block_stop' | 'message_start' | 'message_delta' | 'message_stop' | 'ping' | 'error';
|
package/dist/agent/llm.js
CHANGED
|
@@ -15,12 +15,12 @@ function parseTimeoutEnv(name) {
|
|
|
15
15
|
function getModelRequestTimeoutMs() {
|
|
16
16
|
return (parseTimeoutEnv('FRANKLIN_MODEL_REQUEST_TIMEOUT_MS') ??
|
|
17
17
|
parseTimeoutEnv('FRANKLIN_MODEL_IDLE_TIMEOUT_MS') ??
|
|
18
|
-
|
|
18
|
+
8_000);
|
|
19
19
|
}
|
|
20
20
|
function getModelStreamIdleTimeoutMs() {
|
|
21
21
|
return (parseTimeoutEnv('FRANKLIN_MODEL_STREAM_IDLE_TIMEOUT_MS') ??
|
|
22
22
|
parseTimeoutEnv('FRANKLIN_MODEL_IDLE_TIMEOUT_MS') ??
|
|
23
|
-
|
|
23
|
+
25_000);
|
|
24
24
|
}
|
|
25
25
|
function linkAbortSignal(parent, child) {
|
|
26
26
|
if (!parent)
|
|
@@ -273,6 +273,12 @@ export class ModelClient {
|
|
|
273
273
|
const isGLM = request.model.startsWith('zai/') || request.model.includes('glm');
|
|
274
274
|
// Build the request payload, injecting model-specific optimizations
|
|
275
275
|
let requestPayload = { ...request, stream: true };
|
|
276
|
+
// Safety: tool_choice without tools causes upstream 400. Strip rather
|
|
277
|
+
// than reject so callers don't have to coordinate the two fields.
|
|
278
|
+
if (requestPayload['tool_choice'] !== undefined &&
|
|
279
|
+
(!Array.isArray(requestPayload['tools']) || requestPayload['tools'].length === 0)) {
|
|
280
|
+
delete requestPayload['tool_choice'];
|
|
281
|
+
}
|
|
276
282
|
// ── GLM-specific optimizations ───────────────────────────────────────────
|
|
277
283
|
// GLM models work best with temperature=0.8 per official zai spec.
|
|
278
284
|
// Enable thinking mode only for explicit reasoning variants (-thinking-).
|
package/dist/agent/loop.js
CHANGED
|
@@ -25,7 +25,7 @@ import { routeRequestAsync, resolveTierToModel, parseRoutingProfile } from '../r
|
|
|
25
25
|
import { recordOutcome } from '../router/local-elo.js';
|
|
26
26
|
import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
|
|
27
27
|
import { shouldVerify, runVerification } from './verification.js';
|
|
28
|
-
import { shouldCheckGrounding, checkGrounding, renderGroundingFollowup, buildGroundingRetryInstruction, } from './evaluator.js';
|
|
28
|
+
import { shouldCheckGrounding, checkGrounding, renderGroundingFollowup, buildGroundingRetryInstruction, extractMissingToolNames, } from './evaluator.js';
|
|
29
29
|
import { augmentUserMessage, prefetchForIntent } from './intent-prefetch.js';
|
|
30
30
|
import { analyzeTurn } from './turn-analyzer.js';
|
|
31
31
|
import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, loadSessionHistory, loadSessionMeta, } from '../session/storage.js';
|
|
@@ -464,6 +464,12 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
464
464
|
// decide — avoids pathological loops, caps wall-clock cost.
|
|
465
465
|
let groundingRetryCount = 0;
|
|
466
466
|
const MAX_GROUNDING_RETRIES = 1;
|
|
467
|
+
// When the previous round failed grounding and we're retrying, force the
|
|
468
|
+
// model to actually call a tool this round instead of trusting it to
|
|
469
|
+
// comply with a soft instruction. Single-shot — cleared after attached.
|
|
470
|
+
// Set to `{ type: "tool", name: "X" }` if the evaluator named exactly
|
|
471
|
+
// one available tool, else `{ type: "any" }` so the model picks.
|
|
472
|
+
let forceToolChoiceNextRound = null;
|
|
467
473
|
// ── Plan-then-execute state (per turn) ──
|
|
468
474
|
let planActive = false;
|
|
469
475
|
let planPlannerModel = '';
|
|
@@ -767,6 +773,11 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
767
773
|
if (sanitized.length !== history.length) {
|
|
768
774
|
replaceHistory(history, sanitized);
|
|
769
775
|
}
|
|
776
|
+
// Consume any pending forced tool_choice from the previous round's
|
|
777
|
+
// grounding-retry decision. `tool_choice` is dropped automatically in
|
|
778
|
+
// llm.ts if `tools` ended up empty, so it's safe to attach here.
|
|
779
|
+
const callToolChoice = forceToolChoiceNextRound;
|
|
780
|
+
forceToolChoiceNextRound = null;
|
|
770
781
|
try {
|
|
771
782
|
const result = await client.complete({
|
|
772
783
|
model: resolvedModel,
|
|
@@ -775,6 +786,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
775
786
|
tools: callToolDefs,
|
|
776
787
|
max_tokens: callMaxTokens,
|
|
777
788
|
stream: true,
|
|
789
|
+
...(callToolChoice ? { tool_choice: callToolChoice } : {}),
|
|
778
790
|
}, abort.signal,
|
|
779
791
|
// Start concurrent tools as soon as their input is fully received
|
|
780
792
|
(tool) => streamExec.onToolReceived(tool),
|
|
@@ -1144,9 +1156,24 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1144
1156
|
const feedbackMsg = { role: 'user', content: retryMsg };
|
|
1145
1157
|
history.push(feedbackMsg);
|
|
1146
1158
|
persistSessionMessage(feedbackMsg);
|
|
1159
|
+
// Hard enforcement: set tool_choice so the model can't fabricate
|
|
1160
|
+
// citations in lieu of running tools (the round-2 failure mode
|
|
1161
|
+
// from the Tampa→Miami log). If the evaluator named exactly one
|
|
1162
|
+
// available tool, pin to it; otherwise force "any" tool use.
|
|
1163
|
+
const namedTools = extractMissingToolNames(gResult);
|
|
1164
|
+
const availableNames = new Set(buildCallToolDefs().map(t => t.name));
|
|
1165
|
+
const matched = namedTools.filter(n => availableNames.has(n));
|
|
1166
|
+
if (matched.length === 1) {
|
|
1167
|
+
forceToolChoiceNextRound = { type: 'tool', name: matched[0] };
|
|
1168
|
+
}
|
|
1169
|
+
else if (availableNames.size > 0) {
|
|
1170
|
+
forceToolChoiceNextRound = { type: 'any' };
|
|
1171
|
+
}
|
|
1147
1172
|
onEvent({
|
|
1148
1173
|
kind: 'text_delta',
|
|
1149
|
-
text:
|
|
1174
|
+
text: forceToolChoiceNextRound
|
|
1175
|
+
? `\n\n*Ungrounded claims detected — forcing tool use (${forceToolChoiceNextRound.type === 'tool' ? forceToolChoiceNextRound.name : 'any'}) and retrying...*\n\n`
|
|
1176
|
+
: '\n\n*Ungrounded claims detected — retrying with required tool calls...*\n\n',
|
|
1150
1177
|
});
|
|
1151
1178
|
continue; // Re-enter outer loop — generator will produce a new response.
|
|
1152
1179
|
}
|
package/package.json
CHANGED