@blockrun/franklin 3.8.24 → 3.8.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -38,6 +38,12 @@ export interface GroundingResult {
38
38
  * of this module is to cover read-heavy turns the code verifier misses.
39
39
  */
40
40
  export declare function shouldCheckGrounding(userInput: string, assistantText: string): boolean;
41
+ /**
42
+ * Find the `[FRANKLIN HARNESS PREFETCH]` block in the most recent user
43
+ * message (that's where intent-prefetch injects it). Returns the inner
44
+ * payload or null if no prefetch happened this turn.
45
+ */
46
+ export declare function extractPrefetchBlock(history: Dialogue[]): string | null;
41
47
  export declare function parseGroundingResponse(raw: string): GroundingResult;
42
48
  /** Cheap model for grading. Default matches existing verification.ts
43
49
  * choice so both quality gates have the same cost profile. Override via
@@ -37,11 +37,13 @@ const EVALUATOR_PROMPT = `You are a GROUNDING CHECK agent. Your job is to verify
37
37
 
38
38
  ### A. Ungrounded claims
39
39
  Every **factual claim** in the answer must trace to ONE of:
40
- (a) A successful tool call result from this turn, OR
40
+ (a) A tool call result from this turn (model-initiated OR listed under "Pre-fetched by Franklin harness"), OR
41
41
  (b) Explicit acknowledgment of uncertainty ("I'm not sure", "based on older data")
42
42
 
43
+ **Harness-prefetched data is evidence.** When the turn includes a "Pre-fetched by Franklin harness" section, the data listed there was fetched live from tools on the assistant's behalf (TradingMarket, ExaAnswer, etc). Treat it identically to a model-initiated tool call — claims that reference prefetched prices, numbers, or news snippets are GROUNDED.
44
+
43
45
  Flag as ungrounded:
44
- - Specific current-world facts stated with confidence but not backed by any tool call this turn
46
+ - Specific current-world facts stated with confidence but not backed by any tool call this turn (including prefetch)
45
47
  - Recommendations or conclusions that depend on unstated data (e.g. "you should sell" without a price lookup)
46
48
  - Invented specifics — names, numbers, dates the model produced without a tool call supporting them
47
49
 
@@ -102,7 +104,20 @@ function summarizeTurn(userInput, history, assistantText) {
102
104
  lines.push(`## User question`);
103
105
  lines.push(userInput.trim().slice(0, 800));
104
106
  lines.push('');
105
- lines.push(`## Tool calls this turn`);
107
+ // ── Harness prefetch (treated as synthetic tool calls) ──
108
+ // When intent-prefetch fires, it prepends a [FRANKLIN HARNESS PREFETCH]
109
+ // block to the user message. The LLM answers based on that data, but
110
+ // the evaluator previously only looked for tool_use/tool_result pairs
111
+ // and missed the injection — flagging answers that were actually
112
+ // grounded in live data as UNGROUNDED. Surface the block explicitly so
113
+ // the evaluator counts it as evidence.
114
+ const prefetchBlock = extractPrefetchBlock(history);
115
+ if (prefetchBlock) {
116
+ lines.push(`## Pre-fetched by Franklin harness (counts as tool evidence)`);
117
+ lines.push(prefetchBlock.slice(0, 1200));
118
+ lines.push('');
119
+ }
120
+ lines.push(`## Tool calls this turn (model-initiated)`);
106
121
  // Walk from the end of history back to (but not including) the user message.
107
122
  // Each assistant tool_use and each user tool_result get condensed to one line.
108
123
  let found = 0;
@@ -136,7 +151,7 @@ function summarizeTurn(userInput, history, assistantText) {
136
151
  }
137
152
  }
138
153
  if (toolLines.length === 0) {
139
- lines.push(' (none)');
154
+ lines.push(prefetchBlock ? ' (none — but harness pre-fetched data above)' : ' (none)');
140
155
  }
141
156
  else {
142
157
  lines.push(...toolLines);
@@ -146,6 +161,31 @@ function summarizeTurn(userInput, history, assistantText) {
146
161
  lines.push(assistantText.trim().slice(0, 2400));
147
162
  return lines.join('\n');
148
163
  }
164
+ /**
165
+ * Find the `[FRANKLIN HARNESS PREFETCH]` block in the most recent user
166
+ * message (that's where intent-prefetch injects it). Returns the inner
167
+ * payload or null if no prefetch happened this turn.
168
+ */
169
+ export function extractPrefetchBlock(history) {
170
+ for (let i = history.length - 1; i >= 0; i--) {
171
+ const msg = history[i];
172
+ if (msg.role !== 'user')
173
+ continue;
174
+ const content = typeof msg.content === 'string' ? msg.content : null;
175
+ if (!content)
176
+ continue;
177
+ const startIdx = content.indexOf('[FRANKLIN HARNESS PREFETCH]');
178
+ if (startIdx < 0)
179
+ return null; // Most recent user message has no prefetch — we're done
180
+ // Capture from the marker up to (but not including) the "Original user message:" divider
181
+ const endMarker = '\nOriginal user message:';
182
+ const endIdx = content.indexOf(endMarker, startIdx);
183
+ if (endIdx < 0)
184
+ return content.slice(startIdx).trim();
185
+ return content.slice(startIdx, endIdx).trim();
186
+ }
187
+ return null;
188
+ }
149
189
  // ─── Verdict parser ──────────────────────────────────────────────────────
150
190
  export function parseGroundingResponse(raw) {
151
191
  const text = raw.trim();
@@ -717,32 +717,43 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
717
717
  let routingConfidence;
718
718
  let routingSavings;
719
719
  if (routingProfile) {
720
- // Extract latest user text for classification
721
- const lastUser = [...history].reverse().find((m) => m.role === 'user');
722
- const userText = typeof lastUser?.content === 'string'
723
- ? lastUser.content
724
- : Array.isArray(lastUser?.content)
720
+ if (groundingRetryCount > 0 && lastRoutedModel) {
721
+ // Grounding retry re-enters the loop with a `[GROUNDING CHECK
722
+ // FAILED]` user message that the router would classify as
723
+ // SIMPLE on its own — which drops the turn onto a weak model
724
+ // mid-task (observed in the CRCL log on 2026-04-22). Pin the
725
+ // model the router picked on the first iteration so retries
726
+ // stay on the same tier.
727
+ resolvedModel = lastRoutedModel;
728
+ }
729
+ else {
730
+ // Extract latest user text for classification
731
+ const lastUser = [...history].reverse().find((m) => m.role === 'user');
732
+ const userText = typeof lastUser?.content === 'string'
725
733
  ? lastUser.content
726
- .filter(p => p.type === 'text')
727
- .map(p => p.text ?? '')
728
- .join(' ')
729
- : '';
730
- const routing = await routeRequestAsync(userText, routingProfile);
731
- resolvedModel = routing.model;
732
- routingTier = routing.tier;
733
- routingConfidence = routing.confidence;
734
- routingSavings = routing.savings;
735
- lastRoutedModel = routing.model;
736
- lastRoutedCategory = routing.signals[0] || '';
737
- // Surface the routing decision so users know which concrete model
738
- // just got picked. Without this the status bar reads "auto" and
739
- // users have no idea what's actually running or worse, they
740
- // believe they're stuck on the last-seen concrete name.
741
- if (loopCount === 1) {
742
- onEvent({
743
- kind: 'text_delta',
744
- text: `*Auto → ${routing.model}*\n\n`,
745
- });
734
+ : Array.isArray(lastUser?.content)
735
+ ? lastUser.content
736
+ .filter(p => p.type === 'text')
737
+ .map(p => p.text ?? '')
738
+ .join(' ')
739
+ : '';
740
+ const routing = await routeRequestAsync(userText, routingProfile);
741
+ resolvedModel = routing.model;
742
+ routingTier = routing.tier;
743
+ routingConfidence = routing.confidence;
744
+ routingSavings = routing.savings;
745
+ lastRoutedModel = routing.model;
746
+ lastRoutedCategory = routing.signals[0] || '';
747
+ // Surface the routing decision so users know which concrete model
748
+ // just got picked. Without this the status bar reads "auto" and
749
+ // users have no idea what's actually running — or worse, they
750
+ // believe they're stuck on the last-seen concrete name.
751
+ if (loopCount === 1) {
752
+ onEvent({
753
+ kind: 'text_delta',
754
+ text: `*Auto → ${routing.model}*\n\n`,
755
+ });
756
+ }
746
757
  }
747
758
  }
748
759
  // Update token estimation model for more accurate byte-per-token ratio
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.8.24",
3
+ "version": "3.8.25",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {