@blockrun/franklin 3.8.25 → 3.8.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,12 +21,13 @@ import { appendAudit, extractLastUserPrompt } from '../stats/audit.js';
21
21
  import { estimateCost, OPUS_PRICING } from '../pricing.js';
22
22
  import { maybeMidSessionExtract } from '../learnings/extractor.js';
23
23
  import { extractMentions, buildEntityContext, loadEntities } from '../brain/store.js';
24
- import { routeRequestAsync, parseRoutingProfile } from '../router/index.js';
24
+ import { routeRequestAsync, resolveTierToModel, parseRoutingProfile } from '../router/index.js';
25
25
  import { recordOutcome } from '../router/local-elo.js';
26
26
  import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
27
27
  import { shouldVerify, runVerification } from './verification.js';
28
28
  import { shouldCheckGrounding, checkGrounding, renderGroundingFollowup, buildGroundingRetryInstruction, } from './evaluator.js';
29
- import { augmentUserMessage, classifyIntent, prefetchForIntent } from './intent-prefetch.js';
29
+ import { augmentUserMessage, prefetchForIntent } from './intent-prefetch.js';
30
+ import { analyzeTurn } from './turn-analyzer.js';
30
31
  import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, loadSessionHistory, loadSessionMeta, } from '../session/storage.js';
31
32
  /**
32
33
  * Atomically replace all elements in a history array.
@@ -552,24 +553,58 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
552
553
  const MAX_TINY_RESPONSES = 2; // Break after N tiny responses — if 2 calls return near-empty, something is wrong
553
554
  let turnSpend = 0; // Cost spent this user turn (USD)
554
555
  const MAX_TURN_SPEND_USD = 0.25; // Hard circuit breaker per user message (lowered — user wallets are real money)
556
+ // ── Turn analysis (one classifier call, drives routing + prefetch) ──
557
+ // Single LLM pass that answers every routing-adjacent question the
558
+ // harness needs BEFORE the main model runs: tier, ticker intent,
559
+ // pushback, planning need, live-data signal. Replaces what used to be
560
+ // two separate classifier calls (router + prefetch) plus keyword rule
561
+ // engines for pushback / shouldPlan. Safe-defaults on any failure so
562
+ // the main flow never blocks on it.
563
+ let turnAnalysis = null;
564
+ try {
565
+ // Anchor 1: the user's current message (already in lastUserInput).
566
+ // Anchor 2: first chunk of the previous assistant reply — gives the
567
+ // analyzer enough context to resolve deictic follow-ups like "那 AAPL 呢".
568
+ const lastAssistantText = (() => {
569
+ const prior = [...history.slice(0, -1)].reverse()
570
+ .find((m) => m.role === 'assistant');
571
+ if (!prior)
572
+ return '';
573
+ if (typeof prior.content === 'string')
574
+ return prior.content;
575
+ if (!Array.isArray(prior.content))
576
+ return '';
577
+ return prior.content
578
+ .filter(p => p.type === 'text')
579
+ .map(p => p.text ?? '')
580
+ .join(' ');
581
+ })();
582
+ // Anchor 3: the very first user message in this session (session goal).
583
+ const sessionGoal = (() => {
584
+ const first = history.find((m) => m.role === 'user');
585
+ if (!first)
586
+ return '';
587
+ return typeof first.content === 'string' ? first.content : '';
588
+ })();
589
+ turnAnalysis = await analyzeTurn(input, {
590
+ lastAssistantText,
591
+ sessionGoal,
592
+ client,
593
+ });
594
+ }
595
+ catch {
596
+ // Analyzer is best-effort; ignore.
597
+ }
555
598
  // ── Proactive prefetch ────────────────────────────────────────────
556
- // Before the main model gets a chance to answer a live-world question
557
- // from stale training data, the harness detects ticker / price / news
558
- // intent and fetches the data itself. Result is prepended to the user's
559
- // message so the model sees it as ground truth for this turn. This
560
- // makes the answer tool-grounded regardless of the model's willingness
561
- // to call tools on its own — important for models with strong
562
- // refusal priors on financial data.
599
+ // Uses the intent the analyzer already extracted. Skips the separate
600
+ // prefetch-classifier call that previously ran here.
563
601
  try {
564
- const intent = await classifyIntent(input, client);
565
- if (intent) {
566
- const prefetch = await prefetchForIntent(intent, client);
602
+ if (turnAnalysis?.intent) {
603
+ const prefetch = await prefetchForIntent(turnAnalysis.intent, client);
567
604
  if (prefetch && prefetch.anyOk) {
568
605
  if (config.showPrefetchStatus !== false) {
569
606
  onEvent({ kind: 'text_delta', text: `\n${prefetch.statusLine}\n\n` });
570
607
  }
571
- // Augment the last user message in history (NOT lastUserInput,
572
- // which /retry restores — that should remain the user's original).
573
608
  const lastIdx = history.length - 1;
574
609
  const last = history[lastIdx];
575
610
  if (last && last.role === 'user' && typeof last.content === 'string') {
@@ -579,8 +614,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
579
614
  }
580
615
  }
581
616
  catch {
582
- // Prefetch is best-effort — if the classifier or any fetch trips,
583
- // fall through and let the main loop do its own thing.
617
+ // Prefetch is best-effort — never block the main loop.
584
618
  }
585
619
  // Agent loop for this user message
586
620
  while (loopCount < maxTurns) {
@@ -711,49 +745,29 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
711
745
  sessionId,
712
746
  });
713
747
  // ── Router: resolve routing profiles to concrete models ──
748
+ // Uses the tier already decided by the turn-analyzer — one LLM call
749
+ // up-front rather than a separate classifier here. Fallback to the
750
+ // stand-alone classifier if analyzer wasn't available.
714
751
  const routingProfile = parseRoutingProfile(config.model);
715
752
  let resolvedModel = config.model;
716
753
  let routingTier;
717
754
  let routingConfidence;
718
755
  let routingSavings;
719
756
  if (routingProfile) {
720
- if (groundingRetryCount > 0 && lastRoutedModel) {
721
- // Grounding retry re-enters the loop with a `[GROUNDING CHECK
722
- // FAILED]` user message that the router would classify as
723
- // SIMPLE on its own — which drops the turn onto a weak model
724
- // mid-task (observed in the CRCL log on 2026-04-22). Pin the
725
- // model the router picked on the first iteration so retries
726
- // stay on the same tier.
727
- resolvedModel = lastRoutedModel;
728
- }
729
- else {
730
- // Extract latest user text for classification
731
- const lastUser = [...history].reverse().find((m) => m.role === 'user');
732
- const userText = typeof lastUser?.content === 'string'
733
- ? lastUser.content
734
- : Array.isArray(lastUser?.content)
735
- ? lastUser.content
736
- .filter(p => p.type === 'text')
737
- .map(p => p.text ?? '')
738
- .join(' ')
739
- : '';
740
- const routing = await routeRequestAsync(userText, routingProfile);
741
- resolvedModel = routing.model;
742
- routingTier = routing.tier;
743
- routingConfidence = routing.confidence;
744
- routingSavings = routing.savings;
745
- lastRoutedModel = routing.model;
746
- lastRoutedCategory = routing.signals[0] || '';
747
- // Surface the routing decision so users know which concrete model
748
- // just got picked. Without this the status bar reads "auto" and
749
- // users have no idea what's actually running — or worse, they
750
- // believe they're stuck on the last-seen concrete name.
751
- if (loopCount === 1) {
752
- onEvent({
753
- kind: 'text_delta',
754
- text: `*Auto → ${routing.model}*\n\n`,
755
- });
756
- }
757
+ const routing = turnAnalysis
758
+ ? resolveTierToModel(turnAnalysis.tier, routingProfile)
759
+ : await routeRequestAsync(lastUserInput || '', routingProfile);
760
+ resolvedModel = routing.model;
761
+ routingTier = routing.tier;
762
+ routingConfidence = routing.confidence;
763
+ routingSavings = routing.savings;
764
+ lastRoutedModel = routing.model;
765
+ lastRoutedCategory = routing.signals[0] || '';
766
+ if (loopCount === 1) {
767
+ onEvent({
768
+ kind: 'text_delta',
769
+ text: `*Auto ${routing.model}*\n\n`,
770
+ });
757
771
  }
758
772
  }
759
773
  // Update token estimation model for more accurate byte-per-token ratio
@@ -0,0 +1,70 @@
1
+ /**
2
+ * Turn analyzer — one LLM call per turn that answers every routing-adjacent
3
+ * question the harness needs to make BEFORE the main model runs.
4
+ *
5
+ * Why this exists:
6
+ * Prior versions called separate classifiers for routing (what tier?) and
7
+ * prefetch (is there a ticker?). Each additional harness decision tempted
8
+ * us to add yet another classifier call (pushback? plan? needs-grounding?).
9
+ * Each call adds ~500-800ms of serial latency; stack six of them and the
10
+ * user waits multiple seconds before the main model even starts.
11
+ *
12
+ * This consolidates every LLM-decidable pre-turn question into a single
13
+ * call with a structured JSON response. Net result: 1 classifier call per
14
+ * turn (was 2), replacing multiple keyword rule engines (pushback regex,
15
+ * shouldPlan keyword list, shouldCheckGrounding length gates).
16
+ *
17
+ * Principle: harness orchestrates, models decide. No keyword allowlists,
18
+ * no length thresholds, no regex heuristics encoded in TypeScript.
19
+ *
20
+ * Budget discipline:
21
+ * - Input capped at ~1500 chars across three anchors (current, prev reply,
22
+ * session goal). Never the full history.
23
+ * - Output capped at 128 tokens (compact single-line JSON).
24
+ * - 2.5s hard timeout; on any failure, conservative default returned so
25
+ * the main flow never blocks.
26
+ * - 30s in-memory cache keyed on the three anchors so back-to-back near-
27
+ * identical turns don't re-pay the latency.
28
+ */
29
+ import type { ModelClient } from './llm.js';
30
+ import type { MarketCode } from '../trading/providers/standard-models.js';
31
+ import type { Tier } from '../router/index.js';
32
+ export interface TurnIntent {
33
+ kind: 'ticker';
34
+ symbol: string;
35
+ assetClass: 'stock' | 'crypto';
36
+ market?: MarketCode;
37
+ wantNews: boolean;
38
+ }
39
+ export interface TurnAnalysis {
40
+ tier: Tier;
41
+ intent: TurnIntent | null;
42
+ /** True for substantive multi-step engineering tasks worth a plan-then-execute split. */
43
+ needsPlanning: boolean;
44
+ /** True when the user is correcting the previous assistant turn. */
45
+ isPushback: boolean;
46
+ /** True when the user asks for current prices / today's state / recent news. */
47
+ asksForLiveData: boolean;
48
+ }
49
+ /** Test / reset helper. */
50
+ export declare function clearAnalyzerCache(): void;
51
+ /**
52
+ * Parse the analyzer's JSON output. Returns null on any structural issue;
53
+ * caller falls back to conservative defaults.
54
+ */
55
+ export declare function parseAnalysis(raw: string): TurnAnalysis | null;
56
+ export interface AnalyzeOpts {
57
+ lastAssistantText?: string;
58
+ sessionGoal?: string;
59
+ client: ModelClient;
60
+ model?: string;
61
+ signal?: AbortSignal;
62
+ }
63
+ /**
64
+ * Analyze one turn. Always returns a TurnAnalysis — never throws. On any
65
+ * failure path (timeout, parse error, empty response, gateway down) the
66
+ * conservative default is returned so the main flow proceeds without the
67
+ * harness's pre-decisions. The analyzer is a quality booster, not a
68
+ * correctness requirement.
69
+ */
70
+ export declare function analyzeTurn(userInput: string, opts: AnalyzeOpts): Promise<TurnAnalysis>;
@@ -0,0 +1,297 @@
1
+ /**
2
+ * Turn analyzer — one LLM call per turn that answers every routing-adjacent
3
+ * question the harness needs to make BEFORE the main model runs.
4
+ *
5
+ * Why this exists:
6
+ * Prior versions called separate classifiers for routing (what tier?) and
7
+ * prefetch (is there a ticker?). Each additional harness decision tempted
8
+ * us to add yet another classifier call (pushback? plan? needs-grounding?).
9
+ * Each call adds ~500-800ms of serial latency; stack six of them and the
10
+ * user waits multiple seconds before the main model even starts.
11
+ *
12
+ * This consolidates every LLM-decidable pre-turn question into a single
13
+ * call with a structured JSON response. Net result: 1 classifier call per
14
+ * turn (was 2), replacing multiple keyword rule engines (pushback regex,
15
+ * shouldPlan keyword list, shouldCheckGrounding length gates).
16
+ *
17
+ * Principle: harness orchestrates, models decide. No keyword allowlists,
18
+ * no length thresholds, no regex heuristics encoded in TypeScript.
19
+ *
20
+ * Budget discipline:
21
+ * - Input capped at ~1500 chars across three anchors (current, prev reply,
22
+ * session goal). Never the full history.
23
+ * - Output capped at 128 tokens (compact single-line JSON).
24
+ * - 2.5s hard timeout; on any failure, conservative default returned so
25
+ * the main flow never blocks.
26
+ * - 30s in-memory cache keyed on the three anchors so back-to-back near-
27
+ * identical turns don't re-pay the latency.
28
+ */
29
+ /**
30
+ * Safe default returned when the analyzer call fails (timeout, parse error,
31
+ * gateway down). Chosen to be neutral:
32
+ * - MEDIUM tier → router picks a capable mid-tier model, not the cheapest
33
+ * - no intent → prefetch skips
34
+ * - all booleans false → downstream gates don't fire speculatively
35
+ * The main-flow still runs; the harness just loses its per-turn pre-decisions.
36
+ */
37
+ const CONSERVATIVE_DEFAULT = {
38
+ tier: 'MEDIUM',
39
+ intent: null,
40
+ needsPlanning: false,
41
+ isPushback: false,
42
+ asksForLiveData: false,
43
+ };
44
+ // ─── Input budget ───────────────────────────────────────────────────────
45
+ const MAX_CURRENT_CHARS = 800;
46
+ const MAX_PREV_REPLY_CHARS = 300;
47
+ const MAX_GOAL_CHARS = 200;
48
+ const TIMEOUT_MS = 2_500;
49
+ const MAX_ANALYZER_TOKENS = 128;
50
+ const CACHE_TTL_MS = 30_000;
51
+ const CACHE_MAX_SIZE = 64;
52
+ // ─── Analyzer prompt ────────────────────────────────────────────────────
53
+ //
54
+ // Design: one compact prompt, a few precise examples, instruct the model to
55
+ // emit a single-line JSON. Maverick (the classifier backbone since v3.8.23)
56
+ // reliably produces plain-text structured output under tight max_tokens,
57
+ // unlike thinking-first models that leave text empty.
58
+ const ANALYZER_MODEL_DEFAULT = process.env.FRANKLIN_ANALYZER_MODEL || 'nvidia/llama-4-maverick';
59
+ const ANALYZER_SYSTEM = `You analyze ONE user message for Franklin's routing + prefetch harness. Output ONE LINE of compact JSON — no explanation, no markdown, no code fences.
60
+
61
+ ## Fields
62
+
63
+ tier: "SIMPLE" | "MEDIUM" | "COMPLEX" | "REASONING"
64
+ SIMPLE — greetings, arithmetic, trivia, short factual Q
65
+ MEDIUM — targeted code edits, simple lookups, summaries, single-tool tasks
66
+ COMPLEX — analysis, recommendations, research questions needing live data, multi-step tool use
67
+ REASONING — formal proofs, derivations, deep logic, multi-variable optimization
68
+ NEVER route ticker / price / stock / "should I" / "why did" questions below COMPLEX.
69
+
70
+ intent: null OR {"kind":"ticker","symbol":"...","assetClass":"stock"|"crypto","market":"us"|"hk"|"jp"|"kr"|"gb"|"de"|"fr"|"nl"|"ie"|"lu"|"cn"|"ca","wantNews":true|false}
71
+ Set when the user names a ticker, a publicly-traded company, or a cryptocurrency.
72
+ Omit "market" for crypto; default "us" for stocks if unclear.
73
+ wantNews: true if the user asks why / what happened / analyze. false for plain price lookup.
74
+
75
+ needsPlanning: true | false
76
+ true only for substantive multi-step engineering tasks (build X, refactor Y across many files).
77
+
78
+ isPushback: true | false
79
+ true when the user is correcting / disagreeing with the previous assistant turn.
80
+
81
+ asksForLiveData: true | false
82
+ true when the user asks for a current price, today's news, or any live-world state.
83
+
84
+ ## Context anchors in input
85
+
86
+ [CURRENT] user's message this turn (primary signal)
87
+ [PREV_REPLY] last assistant reply, first ~300 chars (for follow-up references: "那 AAPL 呢", "and that one?", "the other ticker")
88
+ [GOAL] original session prompt, first ~200 chars
89
+
90
+ If [CURRENT] uses a deictic ("it", "that", "那", "这个"), resolve intent/tier from [PREV_REPLY] or [GOAL].
91
+
92
+ ## Examples
93
+
94
+ Input:
95
+ [CURRENT] hi
96
+ Output: {"tier":"SIMPLE","intent":null,"needsPlanning":false,"isPushback":false,"asksForLiveData":false}
97
+
98
+ Input:
99
+ [CURRENT] should I sell CRCL and why did it drop
100
+ Output: {"tier":"COMPLEX","intent":{"kind":"ticker","symbol":"CRCL","assetClass":"stock","market":"us","wantNews":true},"needsPlanning":false,"isPushback":false,"asksForLiveData":true}
101
+
102
+ Input:
103
+ [CURRENT] 那 AAPL 呢
104
+ [PREV_REPLY] CRCL 当前价格 $96.18,最近因 Drift 诉讼下跌...
105
+ Output: {"tier":"COMPLEX","intent":{"kind":"ticker","symbol":"AAPL","assetClass":"stock","market":"us","wantNews":false},"needsPlanning":false,"isPushback":false,"asksForLiveData":true}
106
+
107
+ Input:
108
+ [CURRENT] BTC 为什么跌了
109
+ Output: {"tier":"COMPLEX","intent":{"kind":"ticker","symbol":"BTC","assetClass":"crypto","wantNews":true},"needsPlanning":false,"isPushback":false,"asksForLiveData":true}
110
+
111
+ Input:
112
+ [CURRENT] 不对,你应该看 NVDA 不是 AAPL
113
+ [PREV_REPLY] AAPL 当前价格 $186.42
114
+ Output: {"tier":"COMPLEX","intent":{"kind":"ticker","symbol":"NVDA","assetClass":"stock","market":"us","wantNews":false},"needsPlanning":false,"isPushback":true,"asksForLiveData":true}
115
+
116
+ Input:
117
+ [CURRENT] refactor the wallet module to use typed errors across all call sites
118
+ Output: {"tier":"MEDIUM","intent":null,"needsPlanning":true,"isPushback":false,"asksForLiveData":false}
119
+
120
+ Input:
121
+ [CURRENT] prove that sqrt(2) is irrational
122
+ Output: {"tier":"REASONING","intent":null,"needsPlanning":false,"isPushback":false,"asksForLiveData":false}
123
+
124
+ Output the JSON only. One line. No trailing text.`;
125
+ const cache = new Map();
126
+ /** Simple deterministic string hash for cache keys — no crypto, just bucketing. */
127
+ function hashKey(parts) {
128
+ const joined = parts.join('');
129
+ let h = 0;
130
+ for (let i = 0; i < joined.length; i++) {
131
+ h = ((h << 5) - h + joined.charCodeAt(i)) | 0;
132
+ }
133
+ return String(h);
134
+ }
135
+ function cacheGet(key) {
136
+ const hit = cache.get(key);
137
+ if (!hit)
138
+ return null;
139
+ if (Date.now() > hit.expiresAt) {
140
+ cache.delete(key);
141
+ return null;
142
+ }
143
+ return hit.value;
144
+ }
145
+ function cacheSet(key, value) {
146
+ if (cache.size >= CACHE_MAX_SIZE) {
147
+ // Evict oldest by insertion order (Map preserves it).
148
+ const firstKey = cache.keys().next().value;
149
+ if (firstKey)
150
+ cache.delete(firstKey);
151
+ }
152
+ cache.set(key, { value, expiresAt: Date.now() + CACHE_TTL_MS });
153
+ }
154
+ /** Test / reset helper. */
155
+ export function clearAnalyzerCache() {
156
+ cache.clear();
157
+ }
158
+ // ─── Parsing ────────────────────────────────────────────────────────────
159
+ const VALID_TIERS = new Set(['SIMPLE', 'MEDIUM', 'COMPLEX', 'REASONING']);
160
+ const VALID_MARKETS = new Set([
161
+ 'us', 'hk', 'jp', 'kr', 'gb', 'de', 'fr', 'nl', 'ie', 'lu', 'cn', 'ca',
162
+ ]);
163
+ function validateIntent(raw) {
164
+ if (!raw || typeof raw !== 'object')
165
+ return null;
166
+ const o = raw;
167
+ if (o.kind !== 'ticker')
168
+ return null;
169
+ const symbol = typeof o.symbol === 'string' ? o.symbol.trim().toUpperCase() : '';
170
+ if (!symbol || !/^[A-Z0-9.\-]+$/.test(symbol))
171
+ return null;
172
+ const assetClass = o.assetClass === 'stock' || o.assetClass === 'crypto' ? o.assetClass : null;
173
+ if (!assetClass)
174
+ return null;
175
+ let market;
176
+ if (assetClass === 'stock') {
177
+ const m = typeof o.market === 'string' ? o.market.toLowerCase() : 'us';
178
+ market = VALID_MARKETS.has(m) ? m : 'us';
179
+ }
180
+ return {
181
+ kind: 'ticker',
182
+ symbol,
183
+ assetClass,
184
+ ...(market ? { market } : {}),
185
+ wantNews: Boolean(o.wantNews),
186
+ };
187
+ }
188
+ /**
189
+ * Parse the analyzer's JSON output. Returns null on any structural issue;
190
+ * caller falls back to conservative defaults.
191
+ */
192
+ export function parseAnalysis(raw) {
193
+ const jsonMatch = raw.match(/\{[\s\S]*\}/);
194
+ if (!jsonMatch)
195
+ return null;
196
+ try {
197
+ const parsed = JSON.parse(jsonMatch[0]);
198
+ const tier = typeof parsed.tier === 'string' && VALID_TIERS.has(parsed.tier)
199
+ ? parsed.tier
200
+ : null;
201
+ if (!tier)
202
+ return null;
203
+ return {
204
+ tier,
205
+ intent: validateIntent(parsed.intent),
206
+ needsPlanning: Boolean(parsed.needsPlanning),
207
+ isPushback: Boolean(parsed.isPushback),
208
+ asksForLiveData: Boolean(parsed.asksForLiveData),
209
+ };
210
+ }
211
+ catch {
212
+ return null;
213
+ }
214
+ }
215
+ // ─── Input assembly ─────────────────────────────────────────────────────
216
+ /** Build the bounded input the analyzer sees. Never sends raw history. */
217
+ function buildAnalyzerInput(userInput, lastAssistantText, sessionGoal) {
218
+ const parts = [];
219
+ parts.push(`[CURRENT]`);
220
+ parts.push(userInput.trim().slice(0, MAX_CURRENT_CHARS));
221
+ if (lastAssistantText && lastAssistantText.trim().length > 0) {
222
+ // First paragraph is usually the most informative. Strip markdown chrome.
223
+ const cleaned = lastAssistantText.trim()
224
+ .replace(/^#+\s+/gm, '')
225
+ .replace(/\*\*/g, '');
226
+ parts.push('');
227
+ parts.push('[PREV_REPLY]');
228
+ parts.push(cleaned.slice(0, MAX_PREV_REPLY_CHARS));
229
+ }
230
+ if (sessionGoal && sessionGoal.trim().length > 0 && sessionGoal.trim() !== userInput.trim()) {
231
+ parts.push('');
232
+ parts.push('[GOAL]');
233
+ parts.push(sessionGoal.trim().slice(0, MAX_GOAL_CHARS));
234
+ }
235
+ return parts.join('\n');
236
+ }
237
+ /**
238
+ * Analyze one turn. Always returns a TurnAnalysis — never throws. On any
239
+ * failure path (timeout, parse error, empty response, gateway down) the
240
+ * conservative default is returned so the main flow proceeds without the
241
+ * harness's pre-decisions. The analyzer is a quality booster, not a
242
+ * correctness requirement.
243
+ */
244
+ export async function analyzeTurn(userInput, opts) {
245
+ if (process.env.FRANKLIN_NO_ANALYZER === '1')
246
+ return CONSERVATIVE_DEFAULT;
247
+ const trimmed = userInput.trim();
248
+ if (!trimmed)
249
+ return CONSERVATIVE_DEFAULT;
250
+ const prevReply = opts.lastAssistantText?.trim().slice(0, MAX_PREV_REPLY_CHARS) || '';
251
+ const goal = opts.sessionGoal?.trim().slice(0, MAX_GOAL_CHARS) || '';
252
+ const key = hashKey([trimmed.slice(0, MAX_CURRENT_CHARS), prevReply, goal]);
253
+ const cached = cacheGet(key);
254
+ if (cached)
255
+ return cached;
256
+ const input = buildAnalyzerInput(trimmed, prevReply || undefined, goal || undefined);
257
+ const timeoutCtrl = new AbortController();
258
+ const timer = setTimeout(() => timeoutCtrl.abort(), TIMEOUT_MS);
259
+ const signal = opts.signal ? anySignal([opts.signal, timeoutCtrl.signal]) : timeoutCtrl.signal;
260
+ try {
261
+ const result = await opts.client.complete({
262
+ model: opts.model || ANALYZER_MODEL_DEFAULT,
263
+ system: ANALYZER_SYSTEM,
264
+ messages: [{ role: 'user', content: input }],
265
+ tools: [],
266
+ max_tokens: MAX_ANALYZER_TOKENS,
267
+ }, signal);
268
+ let raw = '';
269
+ for (const part of result.content) {
270
+ if (typeof part === 'object' && part.type === 'text' && part.text)
271
+ raw += part.text;
272
+ }
273
+ const parsed = parseAnalysis(raw);
274
+ const final = parsed || CONSERVATIVE_DEFAULT;
275
+ if (parsed)
276
+ cacheSet(key, parsed);
277
+ return final;
278
+ }
279
+ catch {
280
+ return CONSERVATIVE_DEFAULT;
281
+ }
282
+ finally {
283
+ clearTimeout(timer);
284
+ }
285
+ }
286
+ /** Compose two AbortSignals into one — aborts when either source aborts. */
287
+ function anySignal(signals) {
288
+ const ctrl = new AbortController();
289
+ for (const s of signals) {
290
+ if (s.aborted) {
291
+ ctrl.abort();
292
+ break;
293
+ }
294
+ s.addEventListener('abort', () => ctrl.abort(), { once: true });
295
+ }
296
+ return ctrl.signal;
297
+ }
@@ -32,6 +32,16 @@ export declare function llmClassifyRequest(prompt: string): Promise<Tier | null>
32
32
  * the concrete model; the classifier only picks the TIER.
33
33
  */
34
34
  export declare function routeRequestAsync(prompt: string, profile?: RoutingProfile, classify?: TierClassifier): Promise<RoutingResult>;
35
+ /**
36
+ * Map a pre-classified tier to a concrete model + savings using the profile's
37
+ * tier table. No classifier call — assumes the caller already decided the
38
+ * tier (typically via the turn-analyzer, which rolls tier classification in
39
+ * with intent / pushback / planning decisions in one LLM call).
40
+ *
41
+ * Use this when you have a tier already. Use `routeRequestAsync` when you
42
+ * need the classifier to produce the tier.
43
+ */
44
+ export declare function resolveTierToModel(tier: Tier, profile?: RoutingProfile): RoutingResult;
35
45
  export declare function routeRequest(prompt: string, profile?: RoutingProfile): RoutingResult;
36
46
  /**
37
47
  * Get fallback models for a tier
@@ -393,6 +393,45 @@ export async function routeRequestAsync(prompt, profile = 'auto', classify = llm
393
393
  savings: computeSavings(model),
394
394
  };
395
395
  }
396
+ /**
397
+ * Map a pre-classified tier to a concrete model + savings using the profile's
398
+ * tier table. No classifier call — assumes the caller already decided the
399
+ * tier (typically via the turn-analyzer, which rolls tier classification in
400
+ * with intent / pushback / planning decisions in one LLM call).
401
+ *
402
+ * Use this when you have a tier already. Use `routeRequestAsync` when you
403
+ * need the classifier to produce the tier.
404
+ */
405
+ export function resolveTierToModel(tier, profile = 'auto') {
406
+ // Free profile short-circuits — everything routes to a single free model.
407
+ if (profile === 'free') {
408
+ return {
409
+ model: 'nvidia/glm-4.7',
410
+ tier: 'SIMPLE',
411
+ confidence: 1.0,
412
+ signals: ['free-profile'],
413
+ savings: 1.0,
414
+ };
415
+ }
416
+ let tierConfigs;
417
+ switch (profile) {
418
+ case 'eco':
419
+ tierConfigs = ECO_TIERS;
420
+ break;
421
+ case 'premium':
422
+ tierConfigs = PREMIUM_TIERS;
423
+ break;
424
+ default: tierConfigs = AUTO_TIERS;
425
+ }
426
+ const model = tierConfigs[tier].primary;
427
+ return {
428
+ model,
429
+ tier,
430
+ confidence: 0.85,
431
+ signals: ['pre-classified'],
432
+ savings: computeSavings(model),
433
+ };
434
+ }
396
435
  // ─── Main Router ───
397
436
  export function routeRequest(prompt, profile = 'auto') {
398
437
  // Free profile — always use free model
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.8.25",
3
+ "version": "3.8.27",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {