@blockrun/franklin 3.8.26 → 3.8.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/loop.js +88 -109
- package/dist/agent/planner.d.ts +10 -4
- package/dist/agent/planner.js +19 -34
- package/dist/agent/turn-analyzer.d.ts +70 -0
- package/dist/agent/turn-analyzer.js +297 -0
- package/dist/router/index.d.ts +10 -0
- package/dist/router/index.js +39 -0
- package/package.json +1 -1
package/dist/agent/loop.js
CHANGED
|
@@ -21,12 +21,13 @@ import { appendAudit, extractLastUserPrompt } from '../stats/audit.js';
|
|
|
21
21
|
import { estimateCost, OPUS_PRICING } from '../pricing.js';
|
|
22
22
|
import { maybeMidSessionExtract } from '../learnings/extractor.js';
|
|
23
23
|
import { extractMentions, buildEntityContext, loadEntities } from '../brain/store.js';
|
|
24
|
-
import { routeRequestAsync, parseRoutingProfile } from '../router/index.js';
|
|
24
|
+
import { routeRequestAsync, resolveTierToModel, parseRoutingProfile } from '../router/index.js';
|
|
25
25
|
import { recordOutcome } from '../router/local-elo.js';
|
|
26
26
|
import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
|
|
27
27
|
import { shouldVerify, runVerification } from './verification.js';
|
|
28
28
|
import { shouldCheckGrounding, checkGrounding, renderGroundingFollowup, buildGroundingRetryInstruction, } from './evaluator.js';
|
|
29
|
-
import { augmentUserMessage,
|
|
29
|
+
import { augmentUserMessage, prefetchForIntent } from './intent-prefetch.js';
|
|
30
|
+
import { analyzeTurn } from './turn-analyzer.js';
|
|
30
31
|
import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, loadSessionHistory, loadSessionMeta, } from '../session/storage.js';
|
|
31
32
|
/**
|
|
32
33
|
* Atomically replace all elements in a history array.
|
|
@@ -38,77 +39,12 @@ function replaceHistory(target, replacement) {
|
|
|
38
39
|
target.splice(0, target.length, ...replacement);
|
|
39
40
|
}
|
|
40
41
|
// ─── Pushback detection ───────────────────────────────────────────────────
|
|
41
|
-
//
|
|
42
|
-
//
|
|
43
|
-
//
|
|
44
|
-
//
|
|
45
|
-
//
|
|
46
|
-
//
|
|
47
|
-
// STRONG patterns: high-precision correction language. Fires even on short input.
|
|
48
|
-
const PUSHBACK_STRONG = [
|
|
49
|
-
/\b(that'?s?\s+(wrong|incorrect|not\s+right)|you'?re?\s+wrong)\b/i,
|
|
50
|
-
/\b(i\s+(said|told\s+you)|not\s+what\s+i)\b/i,
|
|
51
|
-
/^(stop|wrong|incorrect|try\s+again)\b/i,
|
|
52
|
-
/^(不对|不是|错了|再试|重来)/,
|
|
53
|
-
];
|
|
54
|
-
// WEAK patterns: common correction starters that also appear in casual speech.
|
|
55
|
-
// Require a corroborating signal (see detectPushback) to count as pushback.
|
|
56
|
-
const PUSHBACK_WEAK = [
|
|
57
|
-
/^(but|however|actually|wait|no+\b|hmm)\b/i,
|
|
58
|
-
/\b(we\s+are\s+using|the\s+correct|the\s+actual)\b/i,
|
|
59
|
-
/^(但是|其实|等等|停)/,
|
|
60
|
-
];
|
|
61
|
-
/**
|
|
62
|
-
* True if the last assistant turn made a concrete claim worth pushing back
|
|
63
|
-
* against: executed a tool, wrote code, or produced a non-trivial answer.
|
|
64
|
-
* Casual assistant chatter doesn't warrant treating a "but" as a correction.
|
|
65
|
-
*/
|
|
66
|
-
function lastAssistantHasClaim(history) {
|
|
67
|
-
for (let i = history.length - 1; i >= 0; i--) {
|
|
68
|
-
const msg = history[i];
|
|
69
|
-
if (msg.role !== 'assistant')
|
|
70
|
-
continue;
|
|
71
|
-
if (Array.isArray(msg.content)) {
|
|
72
|
-
for (const part of msg.content) {
|
|
73
|
-
const p = part;
|
|
74
|
-
if (p.type === 'tool_use')
|
|
75
|
-
return true;
|
|
76
|
-
if (p.type === 'text' && typeof p.text === 'string' && p.text.trim().length >= 40) {
|
|
77
|
-
return true;
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
return false;
|
|
81
|
-
}
|
|
82
|
-
if (typeof msg.content === 'string' && msg.content.trim().length >= 40)
|
|
83
|
-
return true;
|
|
84
|
-
return false;
|
|
85
|
-
}
|
|
86
|
-
return false;
|
|
87
|
-
}
|
|
88
|
-
function detectPushback(input, history) {
|
|
89
|
-
// Only count as pushback if there's a prior assistant turn to push back against.
|
|
90
|
-
if (history.length === 0)
|
|
91
|
-
return false;
|
|
92
|
-
if (!lastAssistantHasClaim(history))
|
|
93
|
-
return false;
|
|
94
|
-
const trimmed = input.trim();
|
|
95
|
-
if (trimmed.length === 0 || trimmed.length > 500)
|
|
96
|
-
return false;
|
|
97
|
-
// Strong patterns: direct correction language — fire immediately.
|
|
98
|
-
if (PUSHBACK_STRONG.some((re) => re.test(trimmed)))
|
|
99
|
-
return true;
|
|
100
|
-
// Weak patterns: only count if the message is short (< 120 chars) AND doesn't
|
|
101
|
-
// also contain a fresh request. A weak starter followed by "can you also X"
|
|
102
|
-
// or "please do Y" is scope addition, not correction.
|
|
103
|
-
if (PUSHBACK_WEAK.some((re) => re.test(trimmed))) {
|
|
104
|
-
if (trimmed.length > 120)
|
|
105
|
-
return false;
|
|
106
|
-
if (/\b(can you|could you|please|also|add|include)\b/i.test(trimmed))
|
|
107
|
-
return false;
|
|
108
|
-
return true;
|
|
109
|
-
}
|
|
110
|
-
return false;
|
|
111
|
-
}
|
|
42
|
+
// Formerly a pair of regex lists (PUSHBACK_STRONG / PUSHBACK_WEAK) plus a
|
|
43
|
+
// claim-on-prior-turn check — ~70 lines of keyword heuristics. Replaced by
|
|
44
|
+
// `turnAnalysis.isPushback` from `turn-analyzer.ts` (v3.8.27): the free
|
|
45
|
+
// classifier reads the user's actual phrasing AND the prior assistant
|
|
46
|
+
// reply and decides whether this turn is a correction. Zero keyword
|
|
47
|
+
// allowlist, works across languages and phrasings the regex never covered.
|
|
112
48
|
/**
|
|
113
49
|
* Sanitize history: fix orphaned tool results AND inject missing results.
|
|
114
50
|
*
|
|
@@ -455,20 +391,14 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
455
391
|
input = cmdResult.rewritten;
|
|
456
392
|
}
|
|
457
393
|
}
|
|
458
|
-
// ── Pushback detection ──
|
|
459
|
-
// When the user corrects us ("no", "but", "actually", "wrong"), we must throw
|
|
460
|
-
// away the previous plan and reconsider — not continue the failing approach.
|
|
461
|
-
// Without this signal, cheap models tend to plough forward with the same bad idea.
|
|
462
|
-
const pushbackSignal = detectPushback(input, history);
|
|
463
|
-
const effectiveInput = pushbackSignal
|
|
464
|
-
? `${input}\n\n[SYSTEM NOTE] The user is correcting you. Your previous response was wrong or off-target. Do NOT continue the previous approach. Re-read the conversation, identify what specifically the user is correcting, and change your strategy. If the user pointed out a fact (e.g. "we are using X"), treat that fact as ground truth and rebuild your answer around it.`
|
|
465
|
-
: input;
|
|
466
394
|
lastUserInput = input;
|
|
467
|
-
|
|
395
|
+
// Push the user's clean message; any harness-injected annotations
|
|
396
|
+
// (pushback SYSTEM NOTE, prefetch context block) are applied AFTER
|
|
397
|
+
// the turn analyzer runs so they get driven by model-decided flags
|
|
398
|
+
// instead of keyword regex.
|
|
399
|
+
history.push({ role: 'user', content: input });
|
|
468
400
|
turnCount++;
|
|
469
401
|
toolGuard.startTurn();
|
|
470
|
-
// Persist the user's original message, not the injected SYSTEM NOTE scaffold.
|
|
471
|
-
// Resumed sessions should show what the user typed, not our internal prompt engineering.
|
|
472
402
|
persistSessionMessage({ role: 'user', content: input });
|
|
473
403
|
// ── Model recovery: try original model at the start of each new turn ──
|
|
474
404
|
// If we fell back to a free model last turn due to a transient error, try original again.
|
|
@@ -552,24 +482,74 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
552
482
|
const MAX_TINY_RESPONSES = 2; // Break after N tiny responses — if 2 calls return near-empty, something is wrong
|
|
553
483
|
let turnSpend = 0; // Cost spent this user turn (USD)
|
|
554
484
|
const MAX_TURN_SPEND_USD = 0.25; // Hard circuit breaker per user message (lowered — user wallets are real money)
|
|
485
|
+
// ── Turn analysis (one classifier call, drives routing + prefetch) ──
|
|
486
|
+
// Single LLM pass that answers every routing-adjacent question the
|
|
487
|
+
// harness needs BEFORE the main model runs: tier, ticker intent,
|
|
488
|
+
// pushback, planning need, live-data signal. Replaces what used to be
|
|
489
|
+
// two separate classifier calls (router + prefetch) plus keyword rule
|
|
490
|
+
// engines for pushback / shouldPlan. Safe-defaults on any failure so
|
|
491
|
+
// the main flow never blocks on it.
|
|
492
|
+
let turnAnalysis = null;
|
|
493
|
+
try {
|
|
494
|
+
// Anchor 1: the user's current message (already in lastUserInput).
|
|
495
|
+
// Anchor 2: first chunk of the previous assistant reply — gives the
|
|
496
|
+
// analyzer enough context to resolve deictic follow-ups like "那 AAPL 呢".
|
|
497
|
+
const lastAssistantText = (() => {
|
|
498
|
+
const prior = [...history.slice(0, -1)].reverse()
|
|
499
|
+
.find((m) => m.role === 'assistant');
|
|
500
|
+
if (!prior)
|
|
501
|
+
return '';
|
|
502
|
+
if (typeof prior.content === 'string')
|
|
503
|
+
return prior.content;
|
|
504
|
+
if (!Array.isArray(prior.content))
|
|
505
|
+
return '';
|
|
506
|
+
return prior.content
|
|
507
|
+
.filter(p => p.type === 'text')
|
|
508
|
+
.map(p => p.text ?? '')
|
|
509
|
+
.join(' ');
|
|
510
|
+
})();
|
|
511
|
+
// Anchor 3: the very first user message in this session (session goal).
|
|
512
|
+
const sessionGoal = (() => {
|
|
513
|
+
const first = history.find((m) => m.role === 'user');
|
|
514
|
+
if (!first)
|
|
515
|
+
return '';
|
|
516
|
+
return typeof first.content === 'string' ? first.content : '';
|
|
517
|
+
})();
|
|
518
|
+
turnAnalysis = await analyzeTurn(input, {
|
|
519
|
+
lastAssistantText,
|
|
520
|
+
sessionGoal,
|
|
521
|
+
client,
|
|
522
|
+
});
|
|
523
|
+
}
|
|
524
|
+
catch {
|
|
525
|
+
// Analyzer is best-effort; ignore.
|
|
526
|
+
}
|
|
527
|
+
// ── Pushback annotation ─────────────────────────────────────────
|
|
528
|
+
// If the analyzer judged this turn as a user correction of the
|
|
529
|
+
// previous answer, inject a SYSTEM NOTE into the user message so the
|
|
530
|
+
// model resets its approach rather than doubling down. Replaces the
|
|
531
|
+
// former PUSHBACK_STRONG / PUSHBACK_WEAK regex lists — model-decided,
|
|
532
|
+
// no keyword allowlist to rot.
|
|
533
|
+
if (turnAnalysis?.isPushback) {
|
|
534
|
+
const lastIdx = history.length - 1;
|
|
535
|
+
const last = history[lastIdx];
|
|
536
|
+
if (last && last.role === 'user' && typeof last.content === 'string') {
|
|
537
|
+
history[lastIdx] = {
|
|
538
|
+
role: 'user',
|
|
539
|
+
content: `${last.content}\n\n[SYSTEM NOTE] The user is correcting you. Your previous response was wrong or off-target. Do NOT continue the previous approach. Re-read the conversation, identify what specifically the user is correcting, and change your strategy. If the user pointed out a fact (e.g. "we are using X"), treat that fact as ground truth and rebuild your answer around it.`,
|
|
540
|
+
};
|
|
541
|
+
}
|
|
542
|
+
}
|
|
555
543
|
// ── Proactive prefetch ────────────────────────────────────────────
|
|
556
|
-
//
|
|
557
|
-
//
|
|
558
|
-
// intent and fetches the data itself. Result is prepended to the user's
|
|
559
|
-
// message so the model sees it as ground truth for this turn. This
|
|
560
|
-
// makes the answer tool-grounded regardless of the model's willingness
|
|
561
|
-
// to call tools on its own — important for models with strong
|
|
562
|
-
// refusal priors on financial data.
|
|
544
|
+
// Uses the intent the analyzer already extracted. Skips the separate
|
|
545
|
+
// prefetch-classifier call that previously ran here.
|
|
563
546
|
try {
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
const prefetch = await prefetchForIntent(intent, client);
|
|
547
|
+
if (turnAnalysis?.intent) {
|
|
548
|
+
const prefetch = await prefetchForIntent(turnAnalysis.intent, client);
|
|
567
549
|
if (prefetch && prefetch.anyOk) {
|
|
568
550
|
if (config.showPrefetchStatus !== false) {
|
|
569
551
|
onEvent({ kind: 'text_delta', text: `\n${prefetch.statusLine}\n\n` });
|
|
570
552
|
}
|
|
571
|
-
// Augment the last user message in history (NOT lastUserInput,
|
|
572
|
-
// which /retry restores — that should remain the user's original).
|
|
573
553
|
const lastIdx = history.length - 1;
|
|
574
554
|
const last = history[lastIdx];
|
|
575
555
|
if (last && last.role === 'user' && typeof last.content === 'string') {
|
|
@@ -579,8 +559,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
579
559
|
}
|
|
580
560
|
}
|
|
581
561
|
catch {
|
|
582
|
-
// Prefetch is best-effort —
|
|
583
|
-
// fall through and let the main loop do its own thing.
|
|
562
|
+
// Prefetch is best-effort — never block the main loop.
|
|
584
563
|
}
|
|
585
564
|
// Agent loop for this user message
|
|
586
565
|
while (loopCount < maxTurns) {
|
|
@@ -711,28 +690,24 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
711
690
|
sessionId,
|
|
712
691
|
});
|
|
713
692
|
// ── Router: resolve routing profiles to concrete models ──
|
|
714
|
-
//
|
|
715
|
-
//
|
|
716
|
-
//
|
|
717
|
-
// across iterations → same tier → stable resolved model. Stops the
|
|
718
|
-
// failure mode where a retry message classified as SIMPLE dropped
|
|
719
|
-
// a COMPLEX task down to gemini mid-way.
|
|
693
|
+
// Uses the tier already decided by the turn-analyzer — one LLM call
|
|
694
|
+
// up-front rather than a separate classifier here. Fallback to the
|
|
695
|
+
// stand-alone classifier if analyzer wasn't available.
|
|
720
696
|
const routingProfile = parseRoutingProfile(config.model);
|
|
721
697
|
let resolvedModel = config.model;
|
|
722
698
|
let routingTier;
|
|
723
699
|
let routingConfidence;
|
|
724
700
|
let routingSavings;
|
|
725
701
|
if (routingProfile) {
|
|
726
|
-
const
|
|
727
|
-
|
|
702
|
+
const routing = turnAnalysis
|
|
703
|
+
? resolveTierToModel(turnAnalysis.tier, routingProfile)
|
|
704
|
+
: await routeRequestAsync(lastUserInput || '', routingProfile);
|
|
728
705
|
resolvedModel = routing.model;
|
|
729
706
|
routingTier = routing.tier;
|
|
730
707
|
routingConfidence = routing.confidence;
|
|
731
708
|
routingSavings = routing.savings;
|
|
732
709
|
lastRoutedModel = routing.model;
|
|
733
710
|
lastRoutedCategory = routing.signals[0] || '';
|
|
734
|
-
// Surface the routing decision on the first iteration so the user
|
|
735
|
-
// sees which concrete model got picked, not just "auto".
|
|
736
711
|
if (loopCount === 1) {
|
|
737
712
|
onEvent({
|
|
738
713
|
kind: 'text_delta',
|
|
@@ -743,8 +718,12 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
743
718
|
// Update token estimation model for more accurate byte-per-token ratio
|
|
744
719
|
setEstimationModel(resolvedModel);
|
|
745
720
|
// ── Plan-then-execute: detect and activate ──
|
|
721
|
+
// `needsPlanning` flag comes from turn-analyzer (one-word LLM decision
|
|
722
|
+
// on the user's original prompt). shouldPlan still guards env / profile /
|
|
723
|
+
// ultrathink / per-session overrides — those are operator policy, not
|
|
724
|
+
// model decisions.
|
|
746
725
|
if (loopCount === 1 && !planActive && routingProfile &&
|
|
747
|
-
shouldPlan(
|
|
726
|
+
shouldPlan(routingProfile, !!config.ultrathink, !!config.planDisabled, turnAnalysis?.needsPlanning ?? false)) {
|
|
748
727
|
planActive = true;
|
|
749
728
|
planPlannerModel = resolvedModel;
|
|
750
729
|
planExecutorModel = getExecutorModel(routingProfile);
|
package/dist/agent/planner.d.ts
CHANGED
|
@@ -7,13 +7,19 @@
|
|
|
7
7
|
* Flow: detect complexity → plan with strong model → execute with cheap model
|
|
8
8
|
* → escalate back to strong model if executor gets stuck
|
|
9
9
|
*/
|
|
10
|
-
import type {
|
|
10
|
+
import type { RoutingProfile } from '../router/index.js';
|
|
11
11
|
/**
|
|
12
12
|
* Should this task use plan-then-execute?
|
|
13
|
-
*
|
|
14
|
-
* the
|
|
13
|
+
*
|
|
14
|
+
* Replaces the former AGENTIC_KEYWORDS / MULTI_STEP_PATTERN regex heuristics
|
|
15
|
+
* with a single read of `turnAnalysis.needsPlanning`. The free model judged
|
|
16
|
+
* whether the task is substantive-multi-step from the user's actual phrasing,
|
|
17
|
+
* no keyword allowlist to maintain.
|
|
18
|
+
*
|
|
19
|
+
* Environment gates (opt-in / opt-out / profile / ultrathink / session
|
|
20
|
+
* override) remain — those are operator decisions, not model decisions.
|
|
15
21
|
*/
|
|
16
|
-
export declare function shouldPlan(
|
|
22
|
+
export declare function shouldPlan(profile: RoutingProfile | undefined, ultrathink: boolean, planDisabled: boolean, analyzerSaysNeedsPlanning: boolean): boolean;
|
|
17
23
|
/**
|
|
18
24
|
* Returns the planning system prompt section.
|
|
19
25
|
* Injected alongside the normal system prompt during the planning call.
|
package/dist/agent/planner.js
CHANGED
|
@@ -7,53 +7,38 @@
|
|
|
7
7
|
* Flow: detect complexity → plan with strong model → execute with cheap model
|
|
8
8
|
* → escalate back to strong model if executor gets stuck
|
|
9
9
|
*/
|
|
10
|
-
// ─── Agentic keywords that suggest multi-step work ───────────────────────
|
|
11
|
-
const AGENTIC_KEYWORDS = /\b(implement|refactor|build|fix|debug|migrate|deploy|create|add|remove|update|restructure|extract|rewrite|optimize|convert|integrate|setup|configure)\b/i;
|
|
12
|
-
const MULTI_STEP_PATTERN = /first.*then|step\s+\d|\d+\.\s|and\s+then|after\s+that|next\s*,|finally\b/i;
|
|
13
10
|
// ─── Detection ───────────────────────────────────────────────────────────
|
|
14
11
|
/**
|
|
15
12
|
* Should this task use plan-then-execute?
|
|
16
|
-
*
|
|
17
|
-
* the
|
|
13
|
+
*
|
|
14
|
+
* Replaces the former AGENTIC_KEYWORDS / MULTI_STEP_PATTERN regex heuristics
|
|
15
|
+
* with a single read of `turnAnalysis.needsPlanning`. The free model judged
|
|
16
|
+
* whether the task is substantive-multi-step from the user's actual phrasing,
|
|
17
|
+
* no keyword allowlist to maintain.
|
|
18
|
+
*
|
|
19
|
+
* Environment gates (opt-in / opt-out / profile / ultrathink / session
|
|
20
|
+
* override) remain — those are operator decisions, not model decisions.
|
|
18
21
|
*/
|
|
19
|
-
export function shouldPlan(
|
|
20
|
-
// Default: plan-then-execute is OFF (v3.8.18).
|
|
21
|
-
//
|
|
22
|
-
//
|
|
23
|
-
//
|
|
24
|
-
// The cheap-executor pattern was load-bearing for Sonnet 4.0-era models;
|
|
25
|
-
// Opus 4.7 / Sonnet 4.6 handle multi-step tool use coherently in a
|
|
26
|
-
// single pass, so the two-call path is pure overhead — and it actively
|
|
27
|
-
// hurts when the executor is weaker than the planner.
|
|
28
|
-
// Opt back in with FRANKLIN_PLAN=1 (for experiments / ablation).
|
|
22
|
+
export function shouldPlan(profile, ultrathink, planDisabled, analyzerSaysNeedsPlanning) {
|
|
23
|
+
// Default: plan-then-execute is OFF (since v3.8.18). The cheap-executor
|
|
24
|
+
// pattern was load-bearing for Sonnet-4.0-era models but Opus 4.7 /
|
|
25
|
+
// Sonnet 4.6 handle multi-step tool use in a single pass. Opt in with
|
|
26
|
+
// FRANKLIN_PLAN=1 for ablation / experiments.
|
|
29
27
|
if (process.env.FRANKLIN_PLAN !== '1')
|
|
30
28
|
return false;
|
|
31
|
-
// Legacy env opt-out
|
|
29
|
+
// Legacy env opt-out still honored for users who set it previously.
|
|
32
30
|
if (process.env.FRANKLIN_NOPLAN === '1')
|
|
33
31
|
return false;
|
|
34
|
-
//
|
|
32
|
+
// Per-session / per-turn overrides from the agent surface.
|
|
35
33
|
if (planDisabled)
|
|
36
34
|
return false;
|
|
37
|
-
// Ultrathink already provides deep reasoning
|
|
38
35
|
if (ultrathink)
|
|
39
|
-
return false;
|
|
40
|
-
// Only auto
|
|
36
|
+
return false; // ultrathink already provides deep reasoning
|
|
37
|
+
// Only auto / premium profiles — eco / free are cost-constrained.
|
|
41
38
|
if (profile !== 'auto' && profile !== 'premium')
|
|
42
39
|
return false;
|
|
43
|
-
//
|
|
44
|
-
|
|
45
|
-
if (MULTI_STEP_PATTERN.test(userText))
|
|
46
|
-
return true;
|
|
47
|
-
// Planning is high-ROI on COMPLEX / REASONING tiers for agentic verbs,
|
|
48
|
-
// even when the prompt is short ("refactor the wallet module", "migrate to TS")
|
|
49
|
-
if (tier === 'COMPLEX' || tier === 'REASONING') {
|
|
50
|
-
return AGENTIC_KEYWORDS.test(userText) || userText.length >= 60;
|
|
51
|
-
}
|
|
52
|
-
// On MEDIUM tier: plan only if long AND agentic
|
|
53
|
-
if (tier === 'MEDIUM' && userText.length >= 120 && AGENTIC_KEYWORDS.test(userText)) {
|
|
54
|
-
return true;
|
|
55
|
-
}
|
|
56
|
-
return false;
|
|
40
|
+
// Final decision comes from the turn analyzer's boolean flag.
|
|
41
|
+
return analyzerSaysNeedsPlanning;
|
|
57
42
|
}
|
|
58
43
|
// ─── Planning Prompt ─────────────────────────────────────────────────────
|
|
59
44
|
/**
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Turn analyzer — one LLM call per turn that answers every routing-adjacent
|
|
3
|
+
* question the harness needs to make BEFORE the main model runs.
|
|
4
|
+
*
|
|
5
|
+
* Why this exists:
|
|
6
|
+
* Prior versions called separate classifiers for routing (what tier?) and
|
|
7
|
+
* prefetch (is there a ticker?). Each additional harness decision tempted
|
|
8
|
+
* us to add yet another classifier call (pushback? plan? needs-grounding?).
|
|
9
|
+
* Each call adds ~500-800ms of serial latency; stack six of them and the
|
|
10
|
+
* user waits multiple seconds before the main model even starts.
|
|
11
|
+
*
|
|
12
|
+
* This consolidates every LLM-decidable pre-turn question into a single
|
|
13
|
+
* call with a structured JSON response. Net result: 1 classifier call per
|
|
14
|
+
* turn (was 2), replacing multiple keyword rule engines (pushback regex,
|
|
15
|
+
* shouldPlan keyword list, shouldCheckGrounding length gates).
|
|
16
|
+
*
|
|
17
|
+
* Principle: harness orchestrates, models decide. No keyword allowlists,
|
|
18
|
+
* no length thresholds, no regex heuristics encoded in TypeScript.
|
|
19
|
+
*
|
|
20
|
+
* Budget discipline:
|
|
21
|
+
* - Input capped at ~1500 chars across three anchors (current, prev reply,
|
|
22
|
+
* session goal). Never the full history.
|
|
23
|
+
* - Output capped at 128 tokens (compact single-line JSON).
|
|
24
|
+
* - 2.5s hard timeout; on any failure, conservative default returned so
|
|
25
|
+
* the main flow never blocks.
|
|
26
|
+
* - 30s in-memory cache keyed on the three anchors so back-to-back near-
|
|
27
|
+
* identical turns don't re-pay the latency.
|
|
28
|
+
*/
|
|
29
|
+
import type { ModelClient } from './llm.js';
|
|
30
|
+
import type { MarketCode } from '../trading/providers/standard-models.js';
|
|
31
|
+
import type { Tier } from '../router/index.js';
|
|
32
|
+
export interface TurnIntent {
|
|
33
|
+
kind: 'ticker';
|
|
34
|
+
symbol: string;
|
|
35
|
+
assetClass: 'stock' | 'crypto';
|
|
36
|
+
market?: MarketCode;
|
|
37
|
+
wantNews: boolean;
|
|
38
|
+
}
|
|
39
|
+
export interface TurnAnalysis {
|
|
40
|
+
tier: Tier;
|
|
41
|
+
intent: TurnIntent | null;
|
|
42
|
+
/** True for substantive multi-step engineering tasks worth a plan-then-execute split. */
|
|
43
|
+
needsPlanning: boolean;
|
|
44
|
+
/** True when the user is correcting the previous assistant turn. */
|
|
45
|
+
isPushback: boolean;
|
|
46
|
+
/** True when the user asks for current prices / today's state / recent news. */
|
|
47
|
+
asksForLiveData: boolean;
|
|
48
|
+
}
|
|
49
|
+
/** Test / reset helper. */
|
|
50
|
+
export declare function clearAnalyzerCache(): void;
|
|
51
|
+
/**
|
|
52
|
+
* Parse the analyzer's JSON output. Returns null on any structural issue;
|
|
53
|
+
* caller falls back to conservative defaults.
|
|
54
|
+
*/
|
|
55
|
+
export declare function parseAnalysis(raw: string): TurnAnalysis | null;
|
|
56
|
+
export interface AnalyzeOpts {
|
|
57
|
+
lastAssistantText?: string;
|
|
58
|
+
sessionGoal?: string;
|
|
59
|
+
client: ModelClient;
|
|
60
|
+
model?: string;
|
|
61
|
+
signal?: AbortSignal;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Analyze one turn. Always returns a TurnAnalysis — never throws. On any
|
|
65
|
+
* failure path (timeout, parse error, empty response, gateway down) the
|
|
66
|
+
* conservative default is returned so the main flow proceeds without the
|
|
67
|
+
* harness's pre-decisions. The analyzer is a quality booster, not a
|
|
68
|
+
* correctness requirement.
|
|
69
|
+
*/
|
|
70
|
+
export declare function analyzeTurn(userInput: string, opts: AnalyzeOpts): Promise<TurnAnalysis>;
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Turn analyzer — one LLM call per turn that answers every routing-adjacent
|
|
3
|
+
* question the harness needs to make BEFORE the main model runs.
|
|
4
|
+
*
|
|
5
|
+
* Why this exists:
|
|
6
|
+
* Prior versions called separate classifiers for routing (what tier?) and
|
|
7
|
+
* prefetch (is there a ticker?). Each additional harness decision tempted
|
|
8
|
+
* us to add yet another classifier call (pushback? plan? needs-grounding?).
|
|
9
|
+
* Each call adds ~500-800ms of serial latency; stack six of them and the
|
|
10
|
+
* user waits multiple seconds before the main model even starts.
|
|
11
|
+
*
|
|
12
|
+
* This consolidates every LLM-decidable pre-turn question into a single
|
|
13
|
+
* call with a structured JSON response. Net result: 1 classifier call per
|
|
14
|
+
* turn (was 2), replacing multiple keyword rule engines (pushback regex,
|
|
15
|
+
* shouldPlan keyword list, shouldCheckGrounding length gates).
|
|
16
|
+
*
|
|
17
|
+
* Principle: harness orchestrates, models decide. No keyword allowlists,
|
|
18
|
+
* no length thresholds, no regex heuristics encoded in TypeScript.
|
|
19
|
+
*
|
|
20
|
+
* Budget discipline:
|
|
21
|
+
* - Input capped at ~1500 chars across three anchors (current, prev reply,
|
|
22
|
+
* session goal). Never the full history.
|
|
23
|
+
* - Output capped at 128 tokens (compact single-line JSON).
|
|
24
|
+
* - 2.5s hard timeout; on any failure, conservative default returned so
|
|
25
|
+
* the main flow never blocks.
|
|
26
|
+
* - 30s in-memory cache keyed on the three anchors so back-to-back near-
|
|
27
|
+
* identical turns don't re-pay the latency.
|
|
28
|
+
*/
|
|
29
|
+
/**
|
|
30
|
+
* Safe default returned when the analyzer call fails (timeout, parse error,
|
|
31
|
+
* gateway down). Chosen to be neutral:
|
|
32
|
+
* - MEDIUM tier → router picks a capable mid-tier model, not the cheapest
|
|
33
|
+
* - no intent → prefetch skips
|
|
34
|
+
* - all booleans false → downstream gates don't fire speculatively
|
|
35
|
+
* The main-flow still runs; the harness just loses its per-turn pre-decisions.
|
|
36
|
+
*/
|
|
37
|
+
const CONSERVATIVE_DEFAULT = {
|
|
38
|
+
tier: 'MEDIUM',
|
|
39
|
+
intent: null,
|
|
40
|
+
needsPlanning: false,
|
|
41
|
+
isPushback: false,
|
|
42
|
+
asksForLiveData: false,
|
|
43
|
+
};
|
|
44
|
+
// ─── Input budget ───────────────────────────────────────────────────────
|
|
45
|
+
const MAX_CURRENT_CHARS = 800;
|
|
46
|
+
const MAX_PREV_REPLY_CHARS = 300;
|
|
47
|
+
const MAX_GOAL_CHARS = 200;
|
|
48
|
+
const TIMEOUT_MS = 2_500;
|
|
49
|
+
const MAX_ANALYZER_TOKENS = 128;
|
|
50
|
+
const CACHE_TTL_MS = 30_000;
|
|
51
|
+
const CACHE_MAX_SIZE = 64;
|
|
52
|
+
// ─── Analyzer prompt ────────────────────────────────────────────────────
|
|
53
|
+
//
|
|
54
|
+
// Design: one compact prompt, a few precise examples, instruct the model to
|
|
55
|
+
// emit a single-line JSON. Maverick (the classifier backbone since v3.8.23)
|
|
56
|
+
// reliably produces plain-text structured output under tight max_tokens,
|
|
57
|
+
// unlike thinking-first models that leave text empty.
|
|
58
|
+
const ANALYZER_MODEL_DEFAULT = process.env.FRANKLIN_ANALYZER_MODEL || 'nvidia/llama-4-maverick';
|
|
59
|
+
const ANALYZER_SYSTEM = `You analyze ONE user message for Franklin's routing + prefetch harness. Output ONE LINE of compact JSON — no explanation, no markdown, no code fences.
|
|
60
|
+
|
|
61
|
+
## Fields
|
|
62
|
+
|
|
63
|
+
tier: "SIMPLE" | "MEDIUM" | "COMPLEX" | "REASONING"
|
|
64
|
+
SIMPLE — greetings, arithmetic, trivia, short factual Q
|
|
65
|
+
MEDIUM — targeted code edits, simple lookups, summaries, single-tool tasks
|
|
66
|
+
COMPLEX — analysis, recommendations, research questions needing live data, multi-step tool use
|
|
67
|
+
REASONING — formal proofs, derivations, deep logic, multi-variable optimization
|
|
68
|
+
NEVER route ticker / price / stock / "should I" / "why did" questions below COMPLEX.
|
|
69
|
+
|
|
70
|
+
intent: null OR {"kind":"ticker","symbol":"...","assetClass":"stock"|"crypto","market":"us"|"hk"|"jp"|"kr"|"gb"|"de"|"fr"|"nl"|"ie"|"lu"|"cn"|"ca","wantNews":true|false}
|
|
71
|
+
Set when the user names a ticker, a publicly-traded company, or a cryptocurrency.
|
|
72
|
+
Omit "market" for crypto; default "us" for stocks if unclear.
|
|
73
|
+
wantNews: true if the user asks why / what happened / analyze. false for plain price lookup.
|
|
74
|
+
|
|
75
|
+
needsPlanning: true | false
|
|
76
|
+
true only for substantive multi-step engineering tasks (build X, refactor Y across many files).
|
|
77
|
+
|
|
78
|
+
isPushback: true | false
|
|
79
|
+
true when the user is correcting / disagreeing with the previous assistant turn.
|
|
80
|
+
|
|
81
|
+
asksForLiveData: true | false
|
|
82
|
+
true when the user asks for a current price, today's news, or any live-world state.
|
|
83
|
+
|
|
84
|
+
## Context anchors in input
|
|
85
|
+
|
|
86
|
+
[CURRENT] user's message this turn (primary signal)
|
|
87
|
+
[PREV_REPLY] last assistant reply, first ~300 chars (for follow-up references: "那 AAPL 呢", "and that one?", "the other ticker")
|
|
88
|
+
[GOAL] original session prompt, first ~200 chars
|
|
89
|
+
|
|
90
|
+
If [CURRENT] uses a deictic ("it", "that", "那", "这个"), resolve intent/tier from [PREV_REPLY] or [GOAL].
|
|
91
|
+
|
|
92
|
+
## Examples
|
|
93
|
+
|
|
94
|
+
Input:
|
|
95
|
+
[CURRENT] hi
|
|
96
|
+
Output: {"tier":"SIMPLE","intent":null,"needsPlanning":false,"isPushback":false,"asksForLiveData":false}
|
|
97
|
+
|
|
98
|
+
Input:
|
|
99
|
+
[CURRENT] should I sell CRCL and why did it drop
|
|
100
|
+
Output: {"tier":"COMPLEX","intent":{"kind":"ticker","symbol":"CRCL","assetClass":"stock","market":"us","wantNews":true},"needsPlanning":false,"isPushback":false,"asksForLiveData":true}
|
|
101
|
+
|
|
102
|
+
Input:
|
|
103
|
+
[CURRENT] 那 AAPL 呢
|
|
104
|
+
[PREV_REPLY] CRCL 当前价格 $96.18,最近因 Drift 诉讼下跌...
|
|
105
|
+
Output: {"tier":"COMPLEX","intent":{"kind":"ticker","symbol":"AAPL","assetClass":"stock","market":"us","wantNews":false},"needsPlanning":false,"isPushback":false,"asksForLiveData":true}
|
|
106
|
+
|
|
107
|
+
Input:
|
|
108
|
+
[CURRENT] BTC 为什么跌了
|
|
109
|
+
Output: {"tier":"COMPLEX","intent":{"kind":"ticker","symbol":"BTC","assetClass":"crypto","wantNews":true},"needsPlanning":false,"isPushback":false,"asksForLiveData":true}
|
|
110
|
+
|
|
111
|
+
Input:
|
|
112
|
+
[CURRENT] 不对,你应该看 NVDA 不是 AAPL
|
|
113
|
+
[PREV_REPLY] AAPL 当前价格 $186.42
|
|
114
|
+
Output: {"tier":"COMPLEX","intent":{"kind":"ticker","symbol":"NVDA","assetClass":"stock","market":"us","wantNews":false},"needsPlanning":false,"isPushback":true,"asksForLiveData":true}
|
|
115
|
+
|
|
116
|
+
Input:
|
|
117
|
+
[CURRENT] refactor the wallet module to use typed errors across all call sites
|
|
118
|
+
Output: {"tier":"MEDIUM","intent":null,"needsPlanning":true,"isPushback":false,"asksForLiveData":false}
|
|
119
|
+
|
|
120
|
+
Input:
|
|
121
|
+
[CURRENT] prove that sqrt(2) is irrational
|
|
122
|
+
Output: {"tier":"REASONING","intent":null,"needsPlanning":false,"isPushback":false,"asksForLiveData":false}
|
|
123
|
+
|
|
124
|
+
Output the JSON only. One line. No trailing text.`;
|
|
125
|
+
const cache = new Map();
|
|
126
|
+
/** Simple deterministic string hash for cache keys — no crypto, just bucketing. */
|
|
127
|
+
function hashKey(parts) {
|
|
128
|
+
const joined = parts.join('');
|
|
129
|
+
let h = 0;
|
|
130
|
+
for (let i = 0; i < joined.length; i++) {
|
|
131
|
+
h = ((h << 5) - h + joined.charCodeAt(i)) | 0;
|
|
132
|
+
}
|
|
133
|
+
return String(h);
|
|
134
|
+
}
|
|
135
|
+
function cacheGet(key) {
|
|
136
|
+
const hit = cache.get(key);
|
|
137
|
+
if (!hit)
|
|
138
|
+
return null;
|
|
139
|
+
if (Date.now() > hit.expiresAt) {
|
|
140
|
+
cache.delete(key);
|
|
141
|
+
return null;
|
|
142
|
+
}
|
|
143
|
+
return hit.value;
|
|
144
|
+
}
|
|
145
|
+
function cacheSet(key, value) {
|
|
146
|
+
if (cache.size >= CACHE_MAX_SIZE) {
|
|
147
|
+
// Evict oldest by insertion order (Map preserves it).
|
|
148
|
+
const firstKey = cache.keys().next().value;
|
|
149
|
+
if (firstKey)
|
|
150
|
+
cache.delete(firstKey);
|
|
151
|
+
}
|
|
152
|
+
cache.set(key, { value, expiresAt: Date.now() + CACHE_TTL_MS });
|
|
153
|
+
}
|
|
154
|
+
/** Test / reset helper. */
|
|
155
|
+
export function clearAnalyzerCache() {
|
|
156
|
+
cache.clear();
|
|
157
|
+
}
|
|
158
|
+
// ─── Parsing ────────────────────────────────────────────────────────────
|
|
159
|
+
const VALID_TIERS = new Set(['SIMPLE', 'MEDIUM', 'COMPLEX', 'REASONING']);
|
|
160
|
+
const VALID_MARKETS = new Set([
|
|
161
|
+
'us', 'hk', 'jp', 'kr', 'gb', 'de', 'fr', 'nl', 'ie', 'lu', 'cn', 'ca',
|
|
162
|
+
]);
|
|
163
|
+
function validateIntent(raw) {
|
|
164
|
+
if (!raw || typeof raw !== 'object')
|
|
165
|
+
return null;
|
|
166
|
+
const o = raw;
|
|
167
|
+
if (o.kind !== 'ticker')
|
|
168
|
+
return null;
|
|
169
|
+
const symbol = typeof o.symbol === 'string' ? o.symbol.trim().toUpperCase() : '';
|
|
170
|
+
if (!symbol || !/^[A-Z0-9.\-]+$/.test(symbol))
|
|
171
|
+
return null;
|
|
172
|
+
const assetClass = o.assetClass === 'stock' || o.assetClass === 'crypto' ? o.assetClass : null;
|
|
173
|
+
if (!assetClass)
|
|
174
|
+
return null;
|
|
175
|
+
let market;
|
|
176
|
+
if (assetClass === 'stock') {
|
|
177
|
+
const m = typeof o.market === 'string' ? o.market.toLowerCase() : 'us';
|
|
178
|
+
market = VALID_MARKETS.has(m) ? m : 'us';
|
|
179
|
+
}
|
|
180
|
+
return {
|
|
181
|
+
kind: 'ticker',
|
|
182
|
+
symbol,
|
|
183
|
+
assetClass,
|
|
184
|
+
...(market ? { market } : {}),
|
|
185
|
+
wantNews: Boolean(o.wantNews),
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Parse the analyzer's JSON output. Returns null on any structural issue;
|
|
190
|
+
* caller falls back to conservative defaults.
|
|
191
|
+
*/
|
|
192
|
+
export function parseAnalysis(raw) {
|
|
193
|
+
const jsonMatch = raw.match(/\{[\s\S]*\}/);
|
|
194
|
+
if (!jsonMatch)
|
|
195
|
+
return null;
|
|
196
|
+
try {
|
|
197
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
198
|
+
const tier = typeof parsed.tier === 'string' && VALID_TIERS.has(parsed.tier)
|
|
199
|
+
? parsed.tier
|
|
200
|
+
: null;
|
|
201
|
+
if (!tier)
|
|
202
|
+
return null;
|
|
203
|
+
return {
|
|
204
|
+
tier,
|
|
205
|
+
intent: validateIntent(parsed.intent),
|
|
206
|
+
needsPlanning: Boolean(parsed.needsPlanning),
|
|
207
|
+
isPushback: Boolean(parsed.isPushback),
|
|
208
|
+
asksForLiveData: Boolean(parsed.asksForLiveData),
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
catch {
|
|
212
|
+
return null;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
// ─── Input assembly ─────────────────────────────────────────────────────
|
|
216
|
+
/** Build the bounded input the analyzer sees. Never sends raw history. */
|
|
217
|
+
function buildAnalyzerInput(userInput, lastAssistantText, sessionGoal) {
|
|
218
|
+
const parts = [];
|
|
219
|
+
parts.push(`[CURRENT]`);
|
|
220
|
+
parts.push(userInput.trim().slice(0, MAX_CURRENT_CHARS));
|
|
221
|
+
if (lastAssistantText && lastAssistantText.trim().length > 0) {
|
|
222
|
+
// First paragraph is usually the most informative. Strip markdown chrome.
|
|
223
|
+
const cleaned = lastAssistantText.trim()
|
|
224
|
+
.replace(/^#+\s+/gm, '')
|
|
225
|
+
.replace(/\*\*/g, '');
|
|
226
|
+
parts.push('');
|
|
227
|
+
parts.push('[PREV_REPLY]');
|
|
228
|
+
parts.push(cleaned.slice(0, MAX_PREV_REPLY_CHARS));
|
|
229
|
+
}
|
|
230
|
+
if (sessionGoal && sessionGoal.trim().length > 0 && sessionGoal.trim() !== userInput.trim()) {
|
|
231
|
+
parts.push('');
|
|
232
|
+
parts.push('[GOAL]');
|
|
233
|
+
parts.push(sessionGoal.trim().slice(0, MAX_GOAL_CHARS));
|
|
234
|
+
}
|
|
235
|
+
return parts.join('\n');
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Analyze one turn. Always returns a TurnAnalysis — never throws. On any
|
|
239
|
+
* failure path (timeout, parse error, empty response, gateway down) the
|
|
240
|
+
* conservative default is returned so the main flow proceeds without the
|
|
241
|
+
* harness's pre-decisions. The analyzer is a quality booster, not a
|
|
242
|
+
* correctness requirement.
|
|
243
|
+
*/
|
|
244
|
+
export async function analyzeTurn(userInput, opts) {
|
|
245
|
+
if (process.env.FRANKLIN_NO_ANALYZER === '1')
|
|
246
|
+
return CONSERVATIVE_DEFAULT;
|
|
247
|
+
const trimmed = userInput.trim();
|
|
248
|
+
if (!trimmed)
|
|
249
|
+
return CONSERVATIVE_DEFAULT;
|
|
250
|
+
const prevReply = opts.lastAssistantText?.trim().slice(0, MAX_PREV_REPLY_CHARS) || '';
|
|
251
|
+
const goal = opts.sessionGoal?.trim().slice(0, MAX_GOAL_CHARS) || '';
|
|
252
|
+
const key = hashKey([trimmed.slice(0, MAX_CURRENT_CHARS), prevReply, goal]);
|
|
253
|
+
const cached = cacheGet(key);
|
|
254
|
+
if (cached)
|
|
255
|
+
return cached;
|
|
256
|
+
const input = buildAnalyzerInput(trimmed, prevReply || undefined, goal || undefined);
|
|
257
|
+
const timeoutCtrl = new AbortController();
|
|
258
|
+
const timer = setTimeout(() => timeoutCtrl.abort(), TIMEOUT_MS);
|
|
259
|
+
const signal = opts.signal ? anySignal([opts.signal, timeoutCtrl.signal]) : timeoutCtrl.signal;
|
|
260
|
+
try {
|
|
261
|
+
const result = await opts.client.complete({
|
|
262
|
+
model: opts.model || ANALYZER_MODEL_DEFAULT,
|
|
263
|
+
system: ANALYZER_SYSTEM,
|
|
264
|
+
messages: [{ role: 'user', content: input }],
|
|
265
|
+
tools: [],
|
|
266
|
+
max_tokens: MAX_ANALYZER_TOKENS,
|
|
267
|
+
}, signal);
|
|
268
|
+
let raw = '';
|
|
269
|
+
for (const part of result.content) {
|
|
270
|
+
if (typeof part === 'object' && part.type === 'text' && part.text)
|
|
271
|
+
raw += part.text;
|
|
272
|
+
}
|
|
273
|
+
const parsed = parseAnalysis(raw);
|
|
274
|
+
const final = parsed || CONSERVATIVE_DEFAULT;
|
|
275
|
+
if (parsed)
|
|
276
|
+
cacheSet(key, parsed);
|
|
277
|
+
return final;
|
|
278
|
+
}
|
|
279
|
+
catch {
|
|
280
|
+
return CONSERVATIVE_DEFAULT;
|
|
281
|
+
}
|
|
282
|
+
finally {
|
|
283
|
+
clearTimeout(timer);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
/** Compose two AbortSignals into one — aborts when either source aborts. */
|
|
287
|
+
function anySignal(signals) {
|
|
288
|
+
const ctrl = new AbortController();
|
|
289
|
+
for (const s of signals) {
|
|
290
|
+
if (s.aborted) {
|
|
291
|
+
ctrl.abort();
|
|
292
|
+
break;
|
|
293
|
+
}
|
|
294
|
+
s.addEventListener('abort', () => ctrl.abort(), { once: true });
|
|
295
|
+
}
|
|
296
|
+
return ctrl.signal;
|
|
297
|
+
}
|
package/dist/router/index.d.ts
CHANGED
|
@@ -32,6 +32,16 @@ export declare function llmClassifyRequest(prompt: string): Promise<Tier | null>
|
|
|
32
32
|
* the concrete model; the classifier only picks the TIER.
|
|
33
33
|
*/
|
|
34
34
|
export declare function routeRequestAsync(prompt: string, profile?: RoutingProfile, classify?: TierClassifier): Promise<RoutingResult>;
|
|
35
|
+
/**
|
|
36
|
+
* Map a pre-classified tier to a concrete model + savings using the profile's
|
|
37
|
+
* tier table. No classifier call — assumes the caller already decided the
|
|
38
|
+
* tier (typically via the turn-analyzer, which rolls tier classification in
|
|
39
|
+
* with intent / pushback / planning decisions in one LLM call).
|
|
40
|
+
*
|
|
41
|
+
* Use this when you have a tier already. Use `routeRequestAsync` when you
|
|
42
|
+
* need the classifier to produce the tier.
|
|
43
|
+
*/
|
|
44
|
+
export declare function resolveTierToModel(tier: Tier, profile?: RoutingProfile): RoutingResult;
|
|
35
45
|
export declare function routeRequest(prompt: string, profile?: RoutingProfile): RoutingResult;
|
|
36
46
|
/**
|
|
37
47
|
* Get fallback models for a tier
|
package/dist/router/index.js
CHANGED
|
@@ -393,6 +393,45 @@ export async function routeRequestAsync(prompt, profile = 'auto', classify = llm
|
|
|
393
393
|
savings: computeSavings(model),
|
|
394
394
|
};
|
|
395
395
|
}
|
|
396
|
+
/**
|
|
397
|
+
* Map a pre-classified tier to a concrete model + savings using the profile's
|
|
398
|
+
* tier table. No classifier call — assumes the caller already decided the
|
|
399
|
+
* tier (typically via the turn-analyzer, which rolls tier classification in
|
|
400
|
+
* with intent / pushback / planning decisions in one LLM call).
|
|
401
|
+
*
|
|
402
|
+
* Use this when you have a tier already. Use `routeRequestAsync` when you
|
|
403
|
+
* need the classifier to produce the tier.
|
|
404
|
+
*/
|
|
405
|
+
export function resolveTierToModel(tier, profile = 'auto') {
|
|
406
|
+
// Free profile short-circuits — everything routes to a single free model.
|
|
407
|
+
if (profile === 'free') {
|
|
408
|
+
return {
|
|
409
|
+
model: 'nvidia/glm-4.7',
|
|
410
|
+
tier: 'SIMPLE',
|
|
411
|
+
confidence: 1.0,
|
|
412
|
+
signals: ['free-profile'],
|
|
413
|
+
savings: 1.0,
|
|
414
|
+
};
|
|
415
|
+
}
|
|
416
|
+
let tierConfigs;
|
|
417
|
+
switch (profile) {
|
|
418
|
+
case 'eco':
|
|
419
|
+
tierConfigs = ECO_TIERS;
|
|
420
|
+
break;
|
|
421
|
+
case 'premium':
|
|
422
|
+
tierConfigs = PREMIUM_TIERS;
|
|
423
|
+
break;
|
|
424
|
+
default: tierConfigs = AUTO_TIERS;
|
|
425
|
+
}
|
|
426
|
+
const model = tierConfigs[tier].primary;
|
|
427
|
+
return {
|
|
428
|
+
model,
|
|
429
|
+
tier,
|
|
430
|
+
confidence: 0.85,
|
|
431
|
+
signals: ['pre-classified'],
|
|
432
|
+
savings: computeSavings(model),
|
|
433
|
+
};
|
|
434
|
+
}
|
|
396
435
|
// ─── Main Router ───
|
|
397
436
|
export function routeRequest(prompt, profile = 'auto') {
|
|
398
437
|
// Free profile — always use free model
|
package/package.json
CHANGED