@blockrun/franklin 3.8.27 → 3.8.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/intent-prefetch.d.ts +0 -3
- package/dist/agent/intent-prefetch.js +10 -105
- package/dist/agent/loop.js +33 -84
- package/dist/agent/planner.d.ts +10 -4
- package/dist/agent/planner.js +19 -34
- package/dist/router/index.d.ts +2 -0
- package/dist/router/index.js +5 -1
- package/package.json +1 -1
|
@@ -51,9 +51,6 @@ export interface PrefetchResult {
|
|
|
51
51
|
* decide to skip injection entirely and let the model try its own way. */
|
|
52
52
|
anyOk: boolean;
|
|
53
53
|
}
|
|
54
|
-
/** Parse the classifier's one-line reply. Very strict — any junk → null. */
|
|
55
|
-
export declare function parseIntentReply(reply: string): Intent;
|
|
56
|
-
export declare function classifyIntent(userInput: string, client: ModelClient): Promise<Intent>;
|
|
57
54
|
/** Run the prefetch for an intent. Concurrent fan-out for price + news. */
|
|
58
55
|
export declare function prefetchForIntent(intent: Intent, client: ModelClient): Promise<PrefetchResult | null>;
|
|
59
56
|
/**
|
|
@@ -26,111 +26,16 @@
|
|
|
26
26
|
* coordination gap (harness fetches, model synthesizes)."
|
|
27
27
|
*/
|
|
28
28
|
import { getStockPrice, getPrice } from '../trading/data.js';
|
|
29
|
-
// ───
|
|
30
|
-
//
|
|
31
|
-
//
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
1. STOCK <TICKER> <MARKET> <NEWS>
|
|
41
|
-
When the user asks about a specific publicly-traded equity — by ticker (CRCL, AAPL, NVDA, 7203, 0005) or by company name that maps to one (Circle → CRCL, Apple → AAPL, Toyota → 7203, HSBC → 0005).
|
|
42
|
-
MARKET: us | hk | jp | kr | gb | de | fr | nl | ie | lu | cn | ca
|
|
43
|
-
NEWS: yes if the user also asks "why / what happened / analysis"; no otherwise.
|
|
44
|
-
Default market: us.
|
|
45
|
-
|
|
46
|
-
2. CRYPTO <SYMBOL> <NEWS>
|
|
47
|
-
When the user asks about a cryptocurrency by symbol or name (BTC, ETH, Bitcoin, Ethereum, SOL, Solana).
|
|
48
|
-
NEWS: yes if asks why / recent news.
|
|
49
|
-
|
|
50
|
-
3. NONE
|
|
51
|
-
Any other message: greetings, coding questions, general chat, questions about non-traded entities.
|
|
52
|
-
|
|
53
|
-
Rules:
|
|
54
|
-
- If the company could be either public or private and you're unsure, assume PUBLIC and emit STOCK with your best ticker guess. The tool will 404 gracefully if wrong.
|
|
55
|
-
- One output line only. No explanation. No punctuation beyond what's shown.
|
|
56
|
-
- Ticker in UPPERCASE.
|
|
57
|
-
|
|
58
|
-
Examples:
|
|
59
|
-
User: 帮我看看 CRCL 股票 → STOCK CRCL us no
|
|
60
|
-
User: should I sell Circle stock? → STOCK CRCL us no
|
|
61
|
-
User: why did CRCL drop this week → STOCK CRCL us yes
|
|
62
|
-
User: BTC 现在价格 → CRYPTO BTC no
|
|
63
|
-
User: 为什么以太坊跌了 → CRYPTO ETH yes
|
|
64
|
-
User: Toyota 股价 → STOCK 7203 jp no
|
|
65
|
-
User: hi how are you → NONE
|
|
66
|
-
User: fix the bug in foo.ts → NONE
|
|
67
|
-
|
|
68
|
-
Answer with just the one-line directive.`;
|
|
69
|
-
/** Parse the classifier's one-line reply. Very strict — any junk → null. */
|
|
70
|
-
export function parseIntentReply(reply) {
|
|
71
|
-
const line = reply.trim().split('\n')[0].trim().toUpperCase();
|
|
72
|
-
if (!line || line.startsWith('NONE'))
|
|
73
|
-
return null;
|
|
74
|
-
const stockMatch = line.match(/^STOCK\s+([A-Z0-9.\-]+)\s+([A-Z]{2})\s+(YES|NO)\b/);
|
|
75
|
-
if (stockMatch) {
|
|
76
|
-
const market = stockMatch[2].toLowerCase();
|
|
77
|
-
const validMarkets = ['us', 'hk', 'jp', 'kr', 'gb', 'de', 'fr', 'nl', 'ie', 'lu', 'cn', 'ca'];
|
|
78
|
-
if (!validMarkets.includes(market))
|
|
79
|
-
return null;
|
|
80
|
-
return {
|
|
81
|
-
kind: 'ticker',
|
|
82
|
-
symbol: stockMatch[1],
|
|
83
|
-
market: market,
|
|
84
|
-
assetClass: 'stock',
|
|
85
|
-
wantNews: stockMatch[3] === 'YES',
|
|
86
|
-
};
|
|
87
|
-
}
|
|
88
|
-
const cryptoMatch = line.match(/^CRYPTO\s+([A-Z0-9.\-]+)\s+(YES|NO)\b/);
|
|
89
|
-
if (cryptoMatch) {
|
|
90
|
-
return {
|
|
91
|
-
kind: 'ticker',
|
|
92
|
-
symbol: cryptoMatch[1],
|
|
93
|
-
assetClass: 'crypto',
|
|
94
|
-
wantNews: cryptoMatch[2] === 'YES',
|
|
95
|
-
};
|
|
96
|
-
}
|
|
97
|
-
return null;
|
|
98
|
-
}
|
|
99
|
-
export async function classifyIntent(userInput, client) {
|
|
100
|
-
if (process.env.FRANKLIN_NO_PREFETCH === '1')
|
|
101
|
-
return null;
|
|
102
|
-
const trimmed = userInput.trim();
|
|
103
|
-
// Only the cheapest gate — skip very short inputs that can't be a real
|
|
104
|
-
// market question ("hi", "ok", "thanks"). 6 chars covers those while
|
|
105
|
-
// still letting short-form Chinese / ticker prompts through, e.g.
|
|
106
|
-
// "BTC 价格" (6), "CRCL 多少" (7). Longer prompts all route to the LLM
|
|
107
|
-
// classifier, which decides NONE cheaply when not market-related.
|
|
108
|
-
if (trimmed.length < 6)
|
|
109
|
-
return null;
|
|
110
|
-
const ctrl = new AbortController();
|
|
111
|
-
const timer = setTimeout(() => ctrl.abort(), CLASSIFIER_TIMEOUT_MS);
|
|
112
|
-
try {
|
|
113
|
-
const result = await client.complete({
|
|
114
|
-
model: CLASSIFIER_MODEL,
|
|
115
|
-
system: CLASSIFIER_PROMPT,
|
|
116
|
-
messages: [{ role: 'user', content: trimmed.slice(0, 800) }],
|
|
117
|
-
tools: [],
|
|
118
|
-
max_tokens: 24,
|
|
119
|
-
}, ctrl.signal);
|
|
120
|
-
let raw = '';
|
|
121
|
-
for (const part of result.content) {
|
|
122
|
-
if (typeof part === 'object' && part.type === 'text' && part.text)
|
|
123
|
-
raw += part.text;
|
|
124
|
-
}
|
|
125
|
-
return parseIntentReply(raw);
|
|
126
|
-
}
|
|
127
|
-
catch {
|
|
128
|
-
return null;
|
|
129
|
-
}
|
|
130
|
-
finally {
|
|
131
|
-
clearTimeout(timer);
|
|
132
|
-
}
|
|
133
|
-
}
|
|
29
|
+
// ─── Intent source ──────────────────────────────────────────────────────
|
|
30
|
+
//
|
|
31
|
+
// Historical note: this file used to host its own LLM classifier
|
|
32
|
+
// (`classifyIntent` + `parseIntentReply` + a ~40-line STOCK/CRYPTO/NONE
|
|
33
|
+
// prompt). Since v3.8.27 the unified `turn-analyzer.ts` produces intent
|
|
34
|
+
// as part of a single pre-turn call, and `loop.ts` reads
|
|
35
|
+
// `turnAnalysis.intent` directly — the standalone classifier was dead
|
|
36
|
+
// code with no remaining callers. Removed in v3.8.29. The TurnIntent
|
|
37
|
+
// shape lives in turn-analyzer and is consumed by `prefetchForIntent`
|
|
38
|
+
// below.
|
|
134
39
|
// ─── Prefetch dispatcher ─────────────────────────────────────────────────
|
|
135
40
|
function formatUsd(n) {
|
|
136
41
|
if (!Number.isFinite(n))
|
package/dist/agent/loop.js
CHANGED
|
@@ -39,77 +39,12 @@ function replaceHistory(target, replacement) {
|
|
|
39
39
|
target.splice(0, target.length, ...replacement);
|
|
40
40
|
}
|
|
41
41
|
// ─── Pushback detection ───────────────────────────────────────────────────
|
|
42
|
-
//
|
|
43
|
-
//
|
|
44
|
-
//
|
|
45
|
-
//
|
|
46
|
-
//
|
|
47
|
-
//
|
|
48
|
-
// STRONG patterns: high-precision correction language. Fires even on short input.
|
|
49
|
-
const PUSHBACK_STRONG = [
|
|
50
|
-
/\b(that'?s?\s+(wrong|incorrect|not\s+right)|you'?re?\s+wrong)\b/i,
|
|
51
|
-
/\b(i\s+(said|told\s+you)|not\s+what\s+i)\b/i,
|
|
52
|
-
/^(stop|wrong|incorrect|try\s+again)\b/i,
|
|
53
|
-
/^(不对|不是|错了|再试|重来)/,
|
|
54
|
-
];
|
|
55
|
-
// WEAK patterns: common correction starters that also appear in casual speech.
|
|
56
|
-
// Require a corroborating signal (see detectPushback) to count as pushback.
|
|
57
|
-
const PUSHBACK_WEAK = [
|
|
58
|
-
/^(but|however|actually|wait|no+\b|hmm)\b/i,
|
|
59
|
-
/\b(we\s+are\s+using|the\s+correct|the\s+actual)\b/i,
|
|
60
|
-
/^(但是|其实|等等|停)/,
|
|
61
|
-
];
|
|
62
|
-
/**
|
|
63
|
-
* True if the last assistant turn made a concrete claim worth pushing back
|
|
64
|
-
* against: executed a tool, wrote code, or produced a non-trivial answer.
|
|
65
|
-
* Casual assistant chatter doesn't warrant treating a "but" as a correction.
|
|
66
|
-
*/
|
|
67
|
-
function lastAssistantHasClaim(history) {
|
|
68
|
-
for (let i = history.length - 1; i >= 0; i--) {
|
|
69
|
-
const msg = history[i];
|
|
70
|
-
if (msg.role !== 'assistant')
|
|
71
|
-
continue;
|
|
72
|
-
if (Array.isArray(msg.content)) {
|
|
73
|
-
for (const part of msg.content) {
|
|
74
|
-
const p = part;
|
|
75
|
-
if (p.type === 'tool_use')
|
|
76
|
-
return true;
|
|
77
|
-
if (p.type === 'text' && typeof p.text === 'string' && p.text.trim().length >= 40) {
|
|
78
|
-
return true;
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
return false;
|
|
82
|
-
}
|
|
83
|
-
if (typeof msg.content === 'string' && msg.content.trim().length >= 40)
|
|
84
|
-
return true;
|
|
85
|
-
return false;
|
|
86
|
-
}
|
|
87
|
-
return false;
|
|
88
|
-
}
|
|
89
|
-
function detectPushback(input, history) {
|
|
90
|
-
// Only count as pushback if there's a prior assistant turn to push back against.
|
|
91
|
-
if (history.length === 0)
|
|
92
|
-
return false;
|
|
93
|
-
if (!lastAssistantHasClaim(history))
|
|
94
|
-
return false;
|
|
95
|
-
const trimmed = input.trim();
|
|
96
|
-
if (trimmed.length === 0 || trimmed.length > 500)
|
|
97
|
-
return false;
|
|
98
|
-
// Strong patterns: direct correction language — fire immediately.
|
|
99
|
-
if (PUSHBACK_STRONG.some((re) => re.test(trimmed)))
|
|
100
|
-
return true;
|
|
101
|
-
// Weak patterns: only count if the message is short (< 120 chars) AND doesn't
|
|
102
|
-
// also contain a fresh request. A weak starter followed by "can you also X"
|
|
103
|
-
// or "please do Y" is scope addition, not correction.
|
|
104
|
-
if (PUSHBACK_WEAK.some((re) => re.test(trimmed))) {
|
|
105
|
-
if (trimmed.length > 120)
|
|
106
|
-
return false;
|
|
107
|
-
if (/\b(can you|could you|please|also|add|include)\b/i.test(trimmed))
|
|
108
|
-
return false;
|
|
109
|
-
return true;
|
|
110
|
-
}
|
|
111
|
-
return false;
|
|
112
|
-
}
|
|
42
|
+
// Formerly a pair of regex lists (PUSHBACK_STRONG / PUSHBACK_WEAK) plus a
|
|
43
|
+
// claim-on-prior-turn check — ~70 lines of keyword heuristics. Replaced by
|
|
44
|
+
// `turnAnalysis.isPushback` from `turn-analyzer.ts` (v3.8.27): the free
|
|
45
|
+
// classifier reads the user's actual phrasing AND the prior assistant
|
|
46
|
+
// reply and decides whether this turn is a correction. Zero keyword
|
|
47
|
+
// allowlist, works across languages and phrasings the regex never covered.
|
|
113
48
|
/**
|
|
114
49
|
* Sanitize history: fix orphaned tool results AND inject missing results.
|
|
115
50
|
*
|
|
@@ -456,20 +391,14 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
456
391
|
input = cmdResult.rewritten;
|
|
457
392
|
}
|
|
458
393
|
}
|
|
459
|
-
// ── Pushback detection ──
|
|
460
|
-
// When the user corrects us ("no", "but", "actually", "wrong"), we must throw
|
|
461
|
-
// away the previous plan and reconsider — not continue the failing approach.
|
|
462
|
-
// Without this signal, cheap models tend to plough forward with the same bad idea.
|
|
463
|
-
const pushbackSignal = detectPushback(input, history);
|
|
464
|
-
const effectiveInput = pushbackSignal
|
|
465
|
-
? `${input}\n\n[SYSTEM NOTE] The user is correcting you. Your previous response was wrong or off-target. Do NOT continue the previous approach. Re-read the conversation, identify what specifically the user is correcting, and change your strategy. If the user pointed out a fact (e.g. "we are using X"), treat that fact as ground truth and rebuild your answer around it.`
|
|
466
|
-
: input;
|
|
467
394
|
lastUserInput = input;
|
|
468
|
-
|
|
395
|
+
// Push the user's clean message; any harness-injected annotations
|
|
396
|
+
// (pushback SYSTEM NOTE, prefetch context block) are applied AFTER
|
|
397
|
+
// the turn analyzer runs so they get driven by model-decided flags
|
|
398
|
+
// instead of keyword regex.
|
|
399
|
+
history.push({ role: 'user', content: input });
|
|
469
400
|
turnCount++;
|
|
470
401
|
toolGuard.startTurn();
|
|
471
|
-
// Persist the user's original message, not the injected SYSTEM NOTE scaffold.
|
|
472
|
-
// Resumed sessions should show what the user typed, not our internal prompt engineering.
|
|
473
402
|
persistSessionMessage({ role: 'user', content: input });
|
|
474
403
|
// ── Model recovery: try original model at the start of each new turn ──
|
|
475
404
|
// If we fell back to a free model last turn due to a transient error, try original again.
|
|
@@ -595,6 +524,22 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
595
524
|
catch {
|
|
596
525
|
// Analyzer is best-effort; ignore.
|
|
597
526
|
}
|
|
527
|
+
// ── Pushback annotation ─────────────────────────────────────────
|
|
528
|
+
// If the analyzer judged this turn as a user correction of the
|
|
529
|
+
// previous answer, inject a SYSTEM NOTE into the user message so the
|
|
530
|
+
// model resets its approach rather than doubling down. Replaces the
|
|
531
|
+
// former PUSHBACK_STRONG / PUSHBACK_WEAK regex lists — model-decided,
|
|
532
|
+
// no keyword allowlist to rot.
|
|
533
|
+
if (turnAnalysis?.isPushback) {
|
|
534
|
+
const lastIdx = history.length - 1;
|
|
535
|
+
const last = history[lastIdx];
|
|
536
|
+
if (last && last.role === 'user' && typeof last.content === 'string') {
|
|
537
|
+
history[lastIdx] = {
|
|
538
|
+
role: 'user',
|
|
539
|
+
content: `${last.content}\n\n[SYSTEM NOTE] The user is correcting you. Your previous response was wrong or off-target. Do NOT continue the previous approach. Re-read the conversation, identify what specifically the user is correcting, and change your strategy. If the user pointed out a fact (e.g. "we are using X"), treat that fact as ground truth and rebuild your answer around it.`,
|
|
540
|
+
};
|
|
541
|
+
}
|
|
542
|
+
}
|
|
598
543
|
// ── Proactive prefetch ────────────────────────────────────────────
|
|
599
544
|
// Uses the intent the analyzer already extracted. Skips the separate
|
|
600
545
|
// prefetch-classifier call that previously ran here.
|
|
@@ -762,7 +707,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
762
707
|
routingConfidence = routing.confidence;
|
|
763
708
|
routingSavings = routing.savings;
|
|
764
709
|
lastRoutedModel = routing.model;
|
|
765
|
-
lastRoutedCategory = routing.
|
|
710
|
+
lastRoutedCategory = routing.category || '';
|
|
766
711
|
if (loopCount === 1) {
|
|
767
712
|
onEvent({
|
|
768
713
|
kind: 'text_delta',
|
|
@@ -773,8 +718,12 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
773
718
|
// Update token estimation model for more accurate byte-per-token ratio
|
|
774
719
|
setEstimationModel(resolvedModel);
|
|
775
720
|
// ── Plan-then-execute: detect and activate ──
|
|
721
|
+
// `needsPlanning` flag comes from turn-analyzer (one-word LLM decision
|
|
722
|
+
// on the user's original prompt). shouldPlan still guards env / profile /
|
|
723
|
+
// ultrathink / per-session overrides — those are operator policy, not
|
|
724
|
+
// model decisions.
|
|
776
725
|
if (loopCount === 1 && !planActive && routingProfile &&
|
|
777
|
-
shouldPlan(
|
|
726
|
+
shouldPlan(routingProfile, !!config.ultrathink, !!config.planDisabled, turnAnalysis?.needsPlanning ?? false)) {
|
|
778
727
|
planActive = true;
|
|
779
728
|
planPlannerModel = resolvedModel;
|
|
780
729
|
planExecutorModel = getExecutorModel(routingProfile);
|
package/dist/agent/planner.d.ts
CHANGED
|
@@ -7,13 +7,19 @@
|
|
|
7
7
|
* Flow: detect complexity → plan with strong model → execute with cheap model
|
|
8
8
|
* → escalate back to strong model if executor gets stuck
|
|
9
9
|
*/
|
|
10
|
-
import type {
|
|
10
|
+
import type { RoutingProfile } from '../router/index.js';
|
|
11
11
|
/**
|
|
12
12
|
* Should this task use plan-then-execute?
|
|
13
|
-
*
|
|
14
|
-
* the
|
|
13
|
+
*
|
|
14
|
+
* Replaces the former AGENTIC_KEYWORDS / MULTI_STEP_PATTERN regex heuristics
|
|
15
|
+
* with a single read of `turnAnalysis.needsPlanning`. The free model judged
|
|
16
|
+
* whether the task is substantive-multi-step from the user's actual phrasing,
|
|
17
|
+
* no keyword allowlist to maintain.
|
|
18
|
+
*
|
|
19
|
+
* Environment gates (opt-in / opt-out / profile / ultrathink / session
|
|
20
|
+
* override) remain — those are operator decisions, not model decisions.
|
|
15
21
|
*/
|
|
16
|
-
export declare function shouldPlan(
|
|
22
|
+
export declare function shouldPlan(profile: RoutingProfile | undefined, ultrathink: boolean, planDisabled: boolean, analyzerSaysNeedsPlanning: boolean): boolean;
|
|
17
23
|
/**
|
|
18
24
|
* Returns the planning system prompt section.
|
|
19
25
|
* Injected alongside the normal system prompt during the planning call.
|
package/dist/agent/planner.js
CHANGED
|
@@ -7,53 +7,38 @@
|
|
|
7
7
|
* Flow: detect complexity → plan with strong model → execute with cheap model
|
|
8
8
|
* → escalate back to strong model if executor gets stuck
|
|
9
9
|
*/
|
|
10
|
-
// ─── Agentic keywords that suggest multi-step work ───────────────────────
|
|
11
|
-
const AGENTIC_KEYWORDS = /\b(implement|refactor|build|fix|debug|migrate|deploy|create|add|remove|update|restructure|extract|rewrite|optimize|convert|integrate|setup|configure)\b/i;
|
|
12
|
-
const MULTI_STEP_PATTERN = /first.*then|step\s+\d|\d+\.\s|and\s+then|after\s+that|next\s*,|finally\b/i;
|
|
13
10
|
// ─── Detection ───────────────────────────────────────────────────────────
|
|
14
11
|
/**
|
|
15
12
|
* Should this task use plan-then-execute?
|
|
16
|
-
*
|
|
17
|
-
* the
|
|
13
|
+
*
|
|
14
|
+
* Replaces the former AGENTIC_KEYWORDS / MULTI_STEP_PATTERN regex heuristics
|
|
15
|
+
* with a single read of `turnAnalysis.needsPlanning`. The free model judged
|
|
16
|
+
* whether the task is substantive-multi-step from the user's actual phrasing,
|
|
17
|
+
* no keyword allowlist to maintain.
|
|
18
|
+
*
|
|
19
|
+
* Environment gates (opt-in / opt-out / profile / ultrathink / session
|
|
20
|
+
* override) remain — those are operator decisions, not model decisions.
|
|
18
21
|
*/
|
|
19
|
-
export function shouldPlan(
|
|
20
|
-
// Default: plan-then-execute is OFF (v3.8.18).
|
|
21
|
-
//
|
|
22
|
-
//
|
|
23
|
-
//
|
|
24
|
-
// The cheap-executor pattern was load-bearing for Sonnet 4.0-era models;
|
|
25
|
-
// Opus 4.7 / Sonnet 4.6 handle multi-step tool use coherently in a
|
|
26
|
-
// single pass, so the two-call path is pure overhead — and it actively
|
|
27
|
-
// hurts when the executor is weaker than the planner.
|
|
28
|
-
// Opt back in with FRANKLIN_PLAN=1 (for experiments / ablation).
|
|
22
|
+
export function shouldPlan(profile, ultrathink, planDisabled, analyzerSaysNeedsPlanning) {
|
|
23
|
+
// Default: plan-then-execute is OFF (since v3.8.18). The cheap-executor
|
|
24
|
+
// pattern was load-bearing for Sonnet-4.0-era models but Opus 4.7 /
|
|
25
|
+
// Sonnet 4.6 handle multi-step tool use in a single pass. Opt in with
|
|
26
|
+
// FRANKLIN_PLAN=1 for ablation / experiments.
|
|
29
27
|
if (process.env.FRANKLIN_PLAN !== '1')
|
|
30
28
|
return false;
|
|
31
|
-
// Legacy env opt-out
|
|
29
|
+
// Legacy env opt-out still honored for users who set it previously.
|
|
32
30
|
if (process.env.FRANKLIN_NOPLAN === '1')
|
|
33
31
|
return false;
|
|
34
|
-
//
|
|
32
|
+
// Per-session / per-turn overrides from the agent surface.
|
|
35
33
|
if (planDisabled)
|
|
36
34
|
return false;
|
|
37
|
-
// Ultrathink already provides deep reasoning
|
|
38
35
|
if (ultrathink)
|
|
39
|
-
return false;
|
|
40
|
-
// Only auto
|
|
36
|
+
return false; // ultrathink already provides deep reasoning
|
|
37
|
+
// Only auto / premium profiles — eco / free are cost-constrained.
|
|
41
38
|
if (profile !== 'auto' && profile !== 'premium')
|
|
42
39
|
return false;
|
|
43
|
-
//
|
|
44
|
-
|
|
45
|
-
if (MULTI_STEP_PATTERN.test(userText))
|
|
46
|
-
return true;
|
|
47
|
-
// Planning is high-ROI on COMPLEX / REASONING tiers for agentic verbs,
|
|
48
|
-
// even when the prompt is short ("refactor the wallet module", "migrate to TS")
|
|
49
|
-
if (tier === 'COMPLEX' || tier === 'REASONING') {
|
|
50
|
-
return AGENTIC_KEYWORDS.test(userText) || userText.length >= 60;
|
|
51
|
-
}
|
|
52
|
-
// On MEDIUM tier: plan only if long AND agentic
|
|
53
|
-
if (tier === 'MEDIUM' && userText.length >= 120 && AGENTIC_KEYWORDS.test(userText)) {
|
|
54
|
-
return true;
|
|
55
|
-
}
|
|
56
|
-
return false;
|
|
40
|
+
// Final decision comes from the turn analyzer's boolean flag.
|
|
41
|
+
return analyzerSaysNeedsPlanning;
|
|
57
42
|
}
|
|
58
43
|
// ─── Planning Prompt ─────────────────────────────────────────────────────
|
|
59
44
|
/**
|
package/dist/router/index.d.ts
CHANGED
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
* and picks the model with the best quality-to-cost ratio for that category.
|
|
10
10
|
* Local Elo adjustments personalize routing per user over time.
|
|
11
11
|
*/
|
|
12
|
+
import { type Category } from './categories.js';
|
|
12
13
|
export type Tier = 'SIMPLE' | 'MEDIUM' | 'COMPLEX' | 'REASONING';
|
|
13
14
|
export type RoutingProfile = 'auto' | 'eco' | 'premium' | 'free';
|
|
14
15
|
export interface RoutingResult {
|
|
@@ -17,6 +18,7 @@ export interface RoutingResult {
|
|
|
17
18
|
confidence: number;
|
|
18
19
|
signals: string[];
|
|
19
20
|
savings: number;
|
|
21
|
+
category?: Category;
|
|
20
22
|
}
|
|
21
23
|
export type TierClassifier = (prompt: string) => Promise<Tier | null>;
|
|
22
24
|
/**
|
package/dist/router/index.js
CHANGED
|
@@ -265,7 +265,8 @@ function classicRouteRequest(prompt, profile) {
|
|
|
265
265
|
}
|
|
266
266
|
const model = tierConfigs[tier].primary;
|
|
267
267
|
const savings = computeSavings(model);
|
|
268
|
-
|
|
268
|
+
const category = detectCategory(prompt, loadLearnedWeights()?.category_keywords).category;
|
|
269
|
+
return { model, tier, confidence, signals, savings, category };
|
|
269
270
|
}
|
|
270
271
|
// ─── LLM-based classifier ───
|
|
271
272
|
//
|
|
@@ -385,12 +386,14 @@ export async function routeRequestAsync(prompt, profile = 'auto', classify = llm
|
|
|
385
386
|
default: tierConfigs = AUTO_TIERS;
|
|
386
387
|
}
|
|
387
388
|
const model = tierConfigs[tier].primary;
|
|
389
|
+
const category = detectCategory(prompt, loadLearnedWeights()?.category_keywords).category;
|
|
388
390
|
return {
|
|
389
391
|
model,
|
|
390
392
|
tier,
|
|
391
393
|
confidence: 0.85, // LLM classification — medium-high confidence
|
|
392
394
|
signals: ['llm-classified'],
|
|
393
395
|
savings: computeSavings(model),
|
|
396
|
+
category,
|
|
394
397
|
};
|
|
395
398
|
}
|
|
396
399
|
/**
|
|
@@ -481,6 +484,7 @@ export function routeRequest(prompt, profile = 'auto') {
|
|
|
481
484
|
confidence,
|
|
482
485
|
signals: [category],
|
|
483
486
|
savings,
|
|
487
|
+
category,
|
|
484
488
|
};
|
|
485
489
|
}
|
|
486
490
|
// Fall through to classic if selectModel returns null (no candidates for category)
|
package/package.json
CHANGED