lynkr 9.0.2 → 9.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -10
- package/bin/cli.js +18 -1
- package/bin/lynkr-trajectory.js +136 -0
- package/bin/lynkr-usage.js +219 -0
- package/funding.json +110 -0
- package/package.json +4 -2
- package/public/dashboard.html +665 -0
- package/scripts/build-knn-index.js +130 -0
- package/scripts/calibrate-thresholds.js +197 -0
- package/scripts/compare-policies.js +67 -0
- package/scripts/learn-output-ratios.js +162 -0
- package/scripts/refresh-pricing.js +122 -0
- package/scripts/run-routerarena.js +26 -0
- package/scripts/sample-regret.js +84 -0
- package/scripts/train-risk-classifier.js +191 -0
- package/src/api/files-router.js +6 -6
- package/src/api/middleware/budget-enforcer.js +60 -0
- package/src/api/middleware/budget.js +19 -1
- package/src/api/middleware/load-shedding.js +17 -0
- package/src/api/middleware/tenant.js +21 -0
- package/src/api/openai-router.js +1 -1
- package/src/api/router.js +204 -87
- package/src/budget/hierarchical-budget.js +159 -0
- package/src/cache/semantic.js +28 -2
- package/src/clients/databricks.js +68 -10
- package/src/clients/openai-format.js +31 -5
- package/src/config/index.js +246 -43
- package/src/context/toon.js +5 -4
- package/src/dashboard/api.js +170 -0
- package/src/dashboard/router.js +13 -0
- package/src/headroom/client.js +3 -109
- package/src/headroom/index.js +0 -14
- package/src/memory/search.js +0 -50
- package/src/orchestrator/index.js +106 -11
- package/src/orchestrator/preflight.js +188 -0
- package/src/prompts/system.js +34 -6
- package/src/routing/bandit.js +246 -0
- package/src/routing/cascade.js +106 -0
- package/src/routing/complexity-analyzer.js +7 -15
- package/src/routing/confidence-scorer.js +121 -0
- package/src/routing/context-validator.js +71 -0
- package/src/routing/cost-optimizer.js +5 -2
- package/src/routing/deadline.js +52 -0
- package/src/routing/drift-monitor.js +113 -0
- package/src/routing/embedding-cache.js +77 -0
- package/src/routing/index.js +374 -4
- package/src/routing/interaction.js +183 -0
- package/src/routing/knn-router.js +206 -0
- package/src/routing/latency-tracker.js +113 -71
- package/src/routing/model-tiers.js +156 -6
- package/src/routing/output-ratios.js +57 -0
- package/src/routing/regret-estimator.js +91 -0
- package/src/routing/reward-pipeline.js +62 -0
- package/src/routing/risk-analyzer.js +194 -0
- package/src/routing/risk-classifier.js +130 -0
- package/src/routing/shadow-mode.js +77 -0
- package/src/routing/telemetry.js +7 -0
- package/src/routing/tenant-policy.js +96 -0
- package/src/routing/tokenizer.js +162 -0
- package/src/server.js +12 -0
- package/src/stores/file-store.js +42 -7
- package/src/tools/smart-selection.js +11 -2
- package/src/training/trajectory-compressor.js +266 -0
- package/src/usage/aggregator.js +206 -0
- package/src/utils/markdown-ansi.js +146 -0
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Preflight Checks
|
|
3
|
+
*
|
|
4
|
+
* Runs user-supplied commands before invoking the model. If they all
|
|
5
|
+
* exit 0, the work is already done — we skip the LLM call entirely
|
|
6
|
+
* and return a synthetic "preflight_satisfied" response at zero cost.
|
|
7
|
+
*
|
|
8
|
+
* Typical use case: a fix-the-failing-test request that arrives after
|
|
9
|
+
* the test already passes (CI lag, retry-after-fix, idempotent agent
|
|
10
|
+
* retries).
|
|
11
|
+
*
|
|
12
|
+
* The request opts in by including a top-level `preflight_commands`
|
|
13
|
+
* array on the Anthropic-format payload, e.g.:
|
|
14
|
+
*
|
|
15
|
+
* {
|
|
16
|
+
* "model": "...",
|
|
17
|
+
* "messages": [...],
|
|
18
|
+
* "preflight_commands": ["pnpm test -- user-service"]
|
|
19
|
+
* }
|
|
20
|
+
*
|
|
21
|
+
* Disabled by default — gated on LYNKR_PREFLIGHT_ENABLED=true. The
|
|
22
|
+
* commands run with the same permissions as the Lynkr server, so
|
|
23
|
+
* operators should only enable this on workspaces where that is OK.
|
|
24
|
+
*
|
|
25
|
+
* @module orchestrator/preflight
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
const { spawnSync } = require('child_process');
|
|
29
|
+
const path = require('path');
|
|
30
|
+
const config = require('../config');
|
|
31
|
+
const logger = require('../logger');
|
|
32
|
+
|
|
33
|
+
const MAX_COMMANDS = 10;
|
|
34
|
+
const MAX_OUTPUT_BYTES = 4000;
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Extract the preflight command list from a request payload.
|
|
38
|
+
* Accepts either `preflight_commands` (Lynkr-specific) or
|
|
39
|
+
* `metadata.lynkr_preflight_commands` (for clients that strip unknown
|
|
40
|
+
* top-level fields).
|
|
41
|
+
*
|
|
42
|
+
* @param {object} payload
|
|
43
|
+
* @returns {string[]}
|
|
44
|
+
*/
|
|
45
|
+
function extractCommands(payload) {
|
|
46
|
+
if (!payload) return [];
|
|
47
|
+
const raw =
|
|
48
|
+
payload.preflight_commands ||
|
|
49
|
+
payload.metadata?.lynkr_preflight_commands ||
|
|
50
|
+
[];
|
|
51
|
+
if (!Array.isArray(raw)) return [];
|
|
52
|
+
return raw
|
|
53
|
+
.filter(cmd => typeof cmd === 'string' && cmd.trim().length > 0)
|
|
54
|
+
.slice(0, MAX_COMMANDS);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Resolve the workspace path for command execution. Falls back to
|
|
59
|
+
* process.cwd() if no workspace is supplied (the caller should usually
|
|
60
|
+
* pass one explicitly).
|
|
61
|
+
*
|
|
62
|
+
* @param {string|null|undefined} cwd
|
|
63
|
+
* @returns {string|null} absolute path, or null if invalid
|
|
64
|
+
*/
|
|
65
|
+
function resolveCwd(cwd) {
|
|
66
|
+
if (!cwd || typeof cwd !== 'string') return null;
|
|
67
|
+
if (!path.isAbsolute(cwd)) return null;
|
|
68
|
+
return cwd;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Run a single command, returning a structured result.
|
|
73
|
+
*
|
|
74
|
+
* @param {string} command
|
|
75
|
+
* @param {string} cwd
|
|
76
|
+
* @param {number} timeoutMs
|
|
77
|
+
* @returns {{ command: string, exit_code: number|null, stdout: string, stderr: string, timed_out: boolean }}
|
|
78
|
+
*/
|
|
79
|
+
function runCommand(command, cwd, timeoutMs) {
|
|
80
|
+
const result = spawnSync(command, {
|
|
81
|
+
cwd,
|
|
82
|
+
shell: true,
|
|
83
|
+
encoding: 'utf8',
|
|
84
|
+
timeout: timeoutMs,
|
|
85
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
86
|
+
});
|
|
87
|
+
return {
|
|
88
|
+
command,
|
|
89
|
+
exit_code: result.status,
|
|
90
|
+
stdout: (result.stdout || '').slice(-MAX_OUTPUT_BYTES),
|
|
91
|
+
stderr: (result.stderr || '').slice(-MAX_OUTPUT_BYTES),
|
|
92
|
+
timed_out: result.signal === 'SIGTERM',
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Try the preflight pass. Returns null when preflight should be
|
|
98
|
+
* skipped (disabled, no commands, missing cwd). Returns a result
|
|
99
|
+
* object otherwise.
|
|
100
|
+
*
|
|
101
|
+
* @param {object} args
|
|
102
|
+
* @param {object} args.payload - Anthropic-format request payload
|
|
103
|
+
* @param {string} [args.cwd] - Workspace cwd (absolute path)
|
|
104
|
+
* @returns {null | {
|
|
105
|
+
* satisfied: boolean,
|
|
106
|
+
* results: object[],
|
|
107
|
+
* failedCommand: string|null,
|
|
108
|
+
* reason: string,
|
|
109
|
+
* }}
|
|
110
|
+
*/
|
|
111
|
+
function tryPreflight({ payload, cwd }) {
|
|
112
|
+
if (!config.routing?.preflightEnabled) return null;
|
|
113
|
+
const commands = extractCommands(payload);
|
|
114
|
+
if (commands.length === 0) return null;
|
|
115
|
+
const workspaceCwd = resolveCwd(cwd);
|
|
116
|
+
if (!workspaceCwd) {
|
|
117
|
+
logger.debug({ cwd }, '[Preflight] No valid cwd, skipping');
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const timeoutMs = config.routing?.preflightTimeoutMs || 120000;
|
|
122
|
+
const results = [];
|
|
123
|
+
for (const command of commands) {
|
|
124
|
+
const r = runCommand(command, workspaceCwd, timeoutMs);
|
|
125
|
+
results.push(r);
|
|
126
|
+
if (r.exit_code !== 0) {
|
|
127
|
+
return {
|
|
128
|
+
satisfied: false,
|
|
129
|
+
results,
|
|
130
|
+
failedCommand: command,
|
|
131
|
+
reason: r.timed_out
|
|
132
|
+
? `Preflight command timed out: ${command}`
|
|
133
|
+
: `Preflight command exited ${r.exit_code}: ${command}`,
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
return {
|
|
138
|
+
satisfied: true,
|
|
139
|
+
results,
|
|
140
|
+
failedCommand: null,
|
|
141
|
+
reason: 'All preflight commands passed.',
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Build a synthetic "preflight satisfied" Anthropic Message response
|
|
147
|
+
* that processMessage can return without hitting the model.
|
|
148
|
+
*
|
|
149
|
+
* @param {object} args
|
|
150
|
+
* @param {string} args.model
|
|
151
|
+
* @param {object} args.preflightResult
|
|
152
|
+
* @returns {object} The full processMessage return value.
|
|
153
|
+
*/
|
|
154
|
+
function buildSatisfiedResponse({ model, preflightResult }) {
|
|
155
|
+
const summary = `Preflight satisfied — work appears already complete (${preflightResult.results.length} command${preflightResult.results.length === 1 ? '' : 's'} passed).`;
|
|
156
|
+
return {
|
|
157
|
+
response: {
|
|
158
|
+
json: {
|
|
159
|
+
id: `msg_preflight_${Date.now()}`,
|
|
160
|
+
type: 'message',
|
|
161
|
+
role: 'assistant',
|
|
162
|
+
content: [{ type: 'text', text: summary }],
|
|
163
|
+
model,
|
|
164
|
+
stop_reason: 'end_turn',
|
|
165
|
+
stop_sequence: null,
|
|
166
|
+
usage: { input_tokens: 0, output_tokens: 0 },
|
|
167
|
+
lynkr_preflight: {
|
|
168
|
+
satisfied: true,
|
|
169
|
+
reason: preflightResult.reason,
|
|
170
|
+
results: preflightResult.results,
|
|
171
|
+
},
|
|
172
|
+
},
|
|
173
|
+
ok: true,
|
|
174
|
+
status: 200,
|
|
175
|
+
},
|
|
176
|
+
steps: 0,
|
|
177
|
+
durationMs: 0,
|
|
178
|
+
terminationReason: 'preflight_satisfied',
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
module.exports = {
|
|
183
|
+
tryPreflight,
|
|
184
|
+
buildSatisfiedResponse,
|
|
185
|
+
extractCommands,
|
|
186
|
+
// Exposed for tests
|
|
187
|
+
resolveCwd,
|
|
188
|
+
};
|
package/src/prompts/system.js
CHANGED
|
@@ -70,13 +70,41 @@ function compressToolDescriptions(tools, mode = null) {
|
|
|
70
70
|
return tools; // Return unmodified if not in minimal mode
|
|
71
71
|
}
|
|
72
72
|
|
|
73
|
-
|
|
73
|
+
const validTools = tools.filter(tool => {
|
|
74
|
+
// Handle both Anthropic format (name + input_schema) and OpenAI format (function.name)
|
|
75
|
+
const hasAnthropicFormat = tool && tool.name && tool.input_schema;
|
|
76
|
+
const hasOpenAIFormat = tool && tool.function && tool.function.name;
|
|
77
|
+
const isValid = hasAnthropicFormat || hasOpenAIFormat;
|
|
78
|
+
|
|
79
|
+
if (!isValid) {
|
|
80
|
+
logger.debug({
|
|
81
|
+
hasName: !!tool?.name,
|
|
82
|
+
hasSchema: !!tool?.input_schema,
|
|
83
|
+
hasFunctionName: !!tool?.function?.name,
|
|
84
|
+
toolType: typeof tool
|
|
85
|
+
}, 'Filtered out malformed tool');
|
|
86
|
+
}
|
|
87
|
+
return isValid;
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
if (validTools.length === 0 && tools.length > 0) {
|
|
91
|
+
logger.warn({ originalCount: tools.length }, 'All tools filtered out as malformed - returning original');
|
|
92
|
+
return tools;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return validTools.map(tool => {
|
|
96
|
+
// If already in OpenAI format, return as-is (no compression for OpenAI format)
|
|
97
|
+
if (tool.function && !tool.input_schema) {
|
|
98
|
+
return tool;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Compress Anthropic format
|
|
74
102
|
const compressed = {
|
|
75
103
|
name: tool.name,
|
|
76
104
|
input_schema: {
|
|
77
|
-
type: tool.input_schema
|
|
105
|
+
type: tool.input_schema?.type || "object",
|
|
78
106
|
properties: {},
|
|
79
|
-
required: tool.input_schema
|
|
107
|
+
required: tool.input_schema?.required || [],
|
|
80
108
|
}
|
|
81
109
|
};
|
|
82
110
|
|
|
@@ -190,7 +218,7 @@ function optimizeSystemPrompt(system, context = {}, mode = null) {
|
|
|
190
218
|
|
|
191
219
|
// 2. Remove file operation guidelines if no file tools
|
|
192
220
|
const hasFileTools = context.tools?.some(t =>
|
|
193
|
-
['Read', 'Write', 'Edit', 'Glob', 'Grep'].includes(t.name)
|
|
221
|
+
t?.name && ['Read', 'Write', 'Edit', 'Glob', 'Grep'].includes(t.name)
|
|
194
222
|
);
|
|
195
223
|
if (!hasFileTools) {
|
|
196
224
|
text = removeSection(text, /# File Operations?[\s\S]*?(?=\n#|\n\n[A-Z]|$)/gi, optimizations, 'file operations');
|
|
@@ -198,7 +226,7 @@ function optimizeSystemPrompt(system, context = {}, mode = null) {
|
|
|
198
226
|
|
|
199
227
|
// 3. Remove git guidelines if no git tools
|
|
200
228
|
const hasGitTools = context.tools?.some(t =>
|
|
201
|
-
t.name.toLowerCase().includes('git')
|
|
229
|
+
t?.name && t.name.toLowerCase().includes('git')
|
|
202
230
|
);
|
|
203
231
|
if (!hasGitTools) {
|
|
204
232
|
text = removeSection(text, /# Git.*?[\s\S]*?(?=\n#|\n\n[A-Z]|$)/gi, optimizations, 'git guidelines');
|
|
@@ -207,7 +235,7 @@ function optimizeSystemPrompt(system, context = {}, mode = null) {
|
|
|
207
235
|
|
|
208
236
|
// 4. Remove web search guidelines if no web tools
|
|
209
237
|
const hasWebTools = context.tools?.some(t =>
|
|
210
|
-
['WebSearch', 'WebFetch'].includes(t.name)
|
|
238
|
+
t?.name && ['WebSearch', 'WebFetch'].includes(t.name)
|
|
211
239
|
);
|
|
212
240
|
if (!hasWebTools) {
|
|
213
241
|
text = removeSection(text, /# Web.*?[\s\S]*?(?=\n#|\n\n[A-Z]|$)/gi, optimizations, 'web guidelines');
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LinUCB contextual bandit for intra-tier model selection (Phase 4.1).
|
|
3
|
+
*
|
|
4
|
+
* Standard LinUCB-with-disjoint-models algorithm (Li et al. 2010).
|
|
5
|
+
* - One arm per (provider, model) pair in a tier
|
|
6
|
+
* - Context = numerical feature vector for the request
|
|
7
|
+
* - Reward = quality_score - λ·norm_cost - μ·norm_latency
|
|
8
|
+
* - Per-arm A (d×d ridge-regression matrix) and b (d-vector) stored to disk
|
|
9
|
+
*
|
|
10
|
+
* State persists to data/bandit-state.json. Loaded on startup; saved on
|
|
11
|
+
* every `update()` (cheap — small matrices) and on graceful shutdown.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
const fs = require('fs');
|
|
15
|
+
const path = require('path');
|
|
16
|
+
const logger = require('../logger');
|
|
17
|
+
|
|
18
|
+
const STATE_PATH = path.join(__dirname, '../../data/bandit-state.json');
|
|
19
|
+
const DEFAULT_ALPHA = 1.5;
|
|
20
|
+
const DEFAULT_LAMBDA = 0.3; // cost penalty weight
|
|
21
|
+
const DEFAULT_MU = 0.1; // latency penalty weight
|
|
22
|
+
const FEATURE_DIM = 12;
|
|
23
|
+
const EXPLORATION_RATE = 0.05;
|
|
24
|
+
|
|
25
|
+
function _identity(d) {
|
|
26
|
+
const m = new Array(d);
|
|
27
|
+
for (let i = 0; i < d; i++) {
|
|
28
|
+
m[i] = new Array(d).fill(0);
|
|
29
|
+
m[i][i] = 1;
|
|
30
|
+
}
|
|
31
|
+
return m;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function _zeros(d) {
|
|
35
|
+
return new Array(d).fill(0);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function _matVec(M, v) {
|
|
39
|
+
const d = v.length;
|
|
40
|
+
const out = new Array(d).fill(0);
|
|
41
|
+
for (let i = 0; i < d; i++) {
|
|
42
|
+
for (let j = 0; j < d; j++) out[i] += M[i][j] * v[j];
|
|
43
|
+
}
|
|
44
|
+
return out;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function _dot(a, b) {
|
|
48
|
+
let s = 0;
|
|
49
|
+
for (let i = 0; i < a.length; i++) s += a[i] * b[i];
|
|
50
|
+
return s;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function _outer(a, b) {
|
|
54
|
+
const out = new Array(a.length);
|
|
55
|
+
for (let i = 0; i < a.length; i++) {
|
|
56
|
+
out[i] = new Array(b.length);
|
|
57
|
+
for (let j = 0; j < b.length; j++) out[i][j] = a[i] * b[j];
|
|
58
|
+
}
|
|
59
|
+
return out;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function _addMat(A, B) {
|
|
63
|
+
for (let i = 0; i < A.length; i++) {
|
|
64
|
+
for (let j = 0; j < A[i].length; j++) A[i][j] += B[i][j];
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function _addVec(a, b) {
|
|
69
|
+
for (let i = 0; i < a.length; i++) a[i] += b[i];
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Invert a small dense matrix via Gauss-Jordan. For d=12 this is plenty fast
|
|
74
|
+
* and saves us a dependency on a linear algebra library.
|
|
75
|
+
*/
|
|
76
|
+
function _inv(M) {
|
|
77
|
+
const d = M.length;
|
|
78
|
+
const aug = M.map((row, i) => {
|
|
79
|
+
const r = row.slice();
|
|
80
|
+
for (let j = 0; j < d; j++) r.push(i === j ? 1 : 0);
|
|
81
|
+
return r;
|
|
82
|
+
});
|
|
83
|
+
for (let i = 0; i < d; i++) {
|
|
84
|
+
let pivot = aug[i][i];
|
|
85
|
+
if (Math.abs(pivot) < 1e-12) {
|
|
86
|
+
let swap = -1;
|
|
87
|
+
for (let k = i + 1; k < d; k++) {
|
|
88
|
+
if (Math.abs(aug[k][i]) > 1e-12) { swap = k; break; }
|
|
89
|
+
}
|
|
90
|
+
if (swap < 0) throw new Error('matrix singular');
|
|
91
|
+
[aug[i], aug[swap]] = [aug[swap], aug[i]];
|
|
92
|
+
pivot = aug[i][i];
|
|
93
|
+
}
|
|
94
|
+
for (let j = 0; j < 2 * d; j++) aug[i][j] /= pivot;
|
|
95
|
+
for (let k = 0; k < d; k++) {
|
|
96
|
+
if (k === i) continue;
|
|
97
|
+
const factor = aug[k][i];
|
|
98
|
+
for (let j = 0; j < 2 * d; j++) aug[k][j] -= factor * aug[i][j];
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return aug.map(row => row.slice(d));
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
class LinUCBBandit {
|
|
105
|
+
constructor({ alpha = DEFAULT_ALPHA, lambda = DEFAULT_LAMBDA, mu = DEFAULT_MU, dim = FEATURE_DIM } = {}) {
|
|
106
|
+
this.alpha = alpha;
|
|
107
|
+
this.lambda = lambda;
|
|
108
|
+
this.mu = mu;
|
|
109
|
+
this.dim = dim;
|
|
110
|
+
/** arms: Map<armKey, { A: number[][], b: number[], count: number }> */
|
|
111
|
+
this.arms = new Map();
|
|
112
|
+
this.steps = 0;
|
|
113
|
+
this._load();
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
_armKey(tier, provider, model) {
|
|
117
|
+
return `${tier}|${provider}:${model}`;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
_ensureArm(armKey) {
|
|
121
|
+
if (!this.arms.has(armKey)) {
|
|
122
|
+
this.arms.set(armKey, { A: _identity(this.dim), b: _zeros(this.dim), count: 0 });
|
|
123
|
+
}
|
|
124
|
+
return this.arms.get(armKey);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Pick an arm for a given tier and context.
|
|
129
|
+
* @param {string} tier
|
|
130
|
+
* @param {Array<{ provider: string, model: string }>} candidates — qualifying arms
|
|
131
|
+
* @param {number[]} context — feature vector
|
|
132
|
+
* @returns {{ provider, model, ucb, explored }} chosen arm
|
|
133
|
+
*/
|
|
134
|
+
pick(tier, candidates, context) {
|
|
135
|
+
if (!candidates || candidates.length === 0) return null;
|
|
136
|
+
if (context.length !== this.dim) {
|
|
137
|
+
// Pad or truncate to dim
|
|
138
|
+
context = context.slice(0, this.dim);
|
|
139
|
+
while (context.length < this.dim) context.push(0);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// ε-greedy: 5% pure exploration
|
|
143
|
+
if (Math.random() < EXPLORATION_RATE) {
|
|
144
|
+
const random = candidates[Math.floor(Math.random() * candidates.length)];
|
|
145
|
+
return { ...random, ucb: null, explored: true };
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
let best = null;
|
|
149
|
+
let bestUcb = -Infinity;
|
|
150
|
+
for (const c of candidates) {
|
|
151
|
+
const key = this._armKey(tier, c.provider, c.model);
|
|
152
|
+
const arm = this._ensureArm(key);
|
|
153
|
+
let Ainv;
|
|
154
|
+
try {
|
|
155
|
+
Ainv = _inv(arm.A);
|
|
156
|
+
} catch (err) {
|
|
157
|
+
continue;
|
|
158
|
+
}
|
|
159
|
+
const theta = _matVec(Ainv, arm.b);
|
|
160
|
+
const mean = _dot(theta, context);
|
|
161
|
+
const variance = _dot(context, _matVec(Ainv, context));
|
|
162
|
+
const ucb = mean + this.alpha * Math.sqrt(Math.max(0, variance));
|
|
163
|
+
if (ucb > bestUcb) {
|
|
164
|
+
bestUcb = ucb;
|
|
165
|
+
best = { ...c, ucb, explored: false };
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
return best;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Update the chosen arm with the observed reward.
|
|
173
|
+
* @param {string} tier
|
|
174
|
+
* @param {string} provider
|
|
175
|
+
* @param {string} model
|
|
176
|
+
* @param {number[]} context
|
|
177
|
+
* @param {number} reward — typically in [0, 100]; will be rescaled to [0, 1] internally
|
|
178
|
+
*/
|
|
179
|
+
update(tier, provider, model, context, reward) {
|
|
180
|
+
const key = this._armKey(tier, provider, model);
|
|
181
|
+
const arm = this._ensureArm(key);
|
|
182
|
+
let ctx = context;
|
|
183
|
+
if (ctx.length !== this.dim) {
|
|
184
|
+
ctx = ctx.slice(0, this.dim);
|
|
185
|
+
while (ctx.length < this.dim) ctx.push(0);
|
|
186
|
+
}
|
|
187
|
+
const r = Math.max(0, Math.min(1, reward / 100));
|
|
188
|
+
_addMat(arm.A, _outer(ctx, ctx));
|
|
189
|
+
_addVec(arm.b, ctx.map(x => x * r));
|
|
190
|
+
arm.count++;
|
|
191
|
+
this.steps++;
|
|
192
|
+
// Save periodically (not every step to limit IO)
|
|
193
|
+
if (this.steps % 25 === 0) this._save();
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
_save() {
|
|
197
|
+
try {
|
|
198
|
+
fs.mkdirSync(path.dirname(STATE_PATH), { recursive: true });
|
|
199
|
+
const arms = {};
|
|
200
|
+
for (const [k, v] of this.arms) arms[k] = v;
|
|
201
|
+
fs.writeFileSync(STATE_PATH, JSON.stringify({
|
|
202
|
+
savedAt: Date.now(),
|
|
203
|
+
steps: this.steps,
|
|
204
|
+
alpha: this.alpha,
|
|
205
|
+
lambda: this.lambda,
|
|
206
|
+
mu: this.mu,
|
|
207
|
+
dim: this.dim,
|
|
208
|
+
arms,
|
|
209
|
+
}, null, 0));
|
|
210
|
+
} catch (err) {
|
|
211
|
+
logger.debug({ err: err.message }, '[Bandit] State save failed');
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
_load() {
|
|
216
|
+
try {
|
|
217
|
+
if (!fs.existsSync(STATE_PATH)) return;
|
|
218
|
+
const raw = JSON.parse(fs.readFileSync(STATE_PATH, 'utf8'));
|
|
219
|
+
if (raw.dim && raw.dim === this.dim) {
|
|
220
|
+
for (const [k, v] of Object.entries(raw.arms || {})) {
|
|
221
|
+
this.arms.set(k, v);
|
|
222
|
+
}
|
|
223
|
+
this.steps = raw.steps || 0;
|
|
224
|
+
logger.info({ arms: this.arms.size, steps: this.steps }, '[Bandit] State loaded');
|
|
225
|
+
}
|
|
226
|
+
} catch (err) {
|
|
227
|
+
logger.debug({ err: err.message }, '[Bandit] State load failed');
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
getStats() {
|
|
232
|
+
const armStats = {};
|
|
233
|
+
for (const [k, v] of this.arms) {
|
|
234
|
+
armStats[k] = { count: v.count };
|
|
235
|
+
}
|
|
236
|
+
return { steps: this.steps, arms: armStats, alpha: this.alpha };
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
let _instance = null;
|
|
241
|
+
function getBandit() {
|
|
242
|
+
if (!_instance) _instance = new LinUCBBandit();
|
|
243
|
+
return _instance;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
module.exports = { LinUCBBandit, getBandit, FEATURE_DIM };
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Small-first cascade with confidence-based deferral (Phase 3.3).
|
|
3
|
+
*
|
|
4
|
+
* For tier-MEDIUM/COMPLEX requests, optionally try a smaller model first.
|
|
5
|
+
* If the response confidence (from confidence-scorer) ≥ threshold, accept it.
|
|
6
|
+
* Otherwise, escalate to the originally-routed tier model.
|
|
7
|
+
*
|
|
8
|
+
* Off by default for streaming (can't retry mid-stream cleanly).
|
|
9
|
+
* Opt-in via LYNKR_CASCADE_ENABLED=true.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const logger = require('../logger');
|
|
13
|
+
const confidenceScorer = require('./confidence-scorer');
|
|
14
|
+
|
|
15
|
+
const DEFAULT_THRESHOLD = 0.85;
|
|
16
|
+
const TIERS_ELIGIBLE = ['MEDIUM', 'COMPLEX'];
|
|
17
|
+
|
|
18
|
+
function isEnabled() {
|
|
19
|
+
return process.env.LYNKR_CASCADE_ENABLED === 'true';
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* @param {object} args
|
|
24
|
+
* @param {string} args.tier — the originally selected tier
|
|
25
|
+
* @param {boolean} args.streaming — true if the request is streaming
|
|
26
|
+
* @param {boolean} args.hasTools — true if tools are present
|
|
27
|
+
* @returns {boolean}
|
|
28
|
+
*/
|
|
29
|
+
function shouldCascade(args) {
|
|
30
|
+
if (!isEnabled()) return false;
|
|
31
|
+
if (args.streaming) return false; // streaming responses can't be retried cleanly
|
|
32
|
+
if (args.hasTools) return false; // tool calls have side effects; don't double-run
|
|
33
|
+
if (!TIERS_ELIGIBLE.includes(args.tier)) return false;
|
|
34
|
+
return true;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Run a small-first cascade.
|
|
39
|
+
*
|
|
40
|
+
* @param {object} args
|
|
41
|
+
* @param {object} args.payload — the request payload
|
|
42
|
+
* @param {object} args.smallModel — { provider, model }
|
|
43
|
+
* @param {object} args.bigModel — { provider, model }
|
|
44
|
+
* @param {function} args.invoke — async (provider, model, payload) → response
|
|
45
|
+
* @param {string} args.taskType — used by confidence scorer
|
|
46
|
+
* @param {number} args.threshold — confidence threshold, defaults to 0.85
|
|
47
|
+
* @param {function} args.judge — optional judge LLM for reasoning tasks
|
|
48
|
+
* @returns {Promise<{ response, usedModel, cascadeStats }>}
|
|
49
|
+
*/
|
|
50
|
+
async function run(args) {
|
|
51
|
+
const threshold = args.threshold ?? DEFAULT_THRESHOLD;
|
|
52
|
+
const start = Date.now();
|
|
53
|
+
let smallLatency = 0;
|
|
54
|
+
let bigLatency = 0;
|
|
55
|
+
|
|
56
|
+
// Try small model
|
|
57
|
+
let smallResponse;
|
|
58
|
+
try {
|
|
59
|
+
const t0 = Date.now();
|
|
60
|
+
smallResponse = await args.invoke(args.smallModel.provider, args.smallModel.model, args.payload);
|
|
61
|
+
smallLatency = Date.now() - t0;
|
|
62
|
+
} catch (err) {
|
|
63
|
+
logger.debug({ err: err.message }, '[Cascade] Small model failed, escalating');
|
|
64
|
+
const t0 = Date.now();
|
|
65
|
+
const bigResponse = await args.invoke(args.bigModel.provider, args.bigModel.model, args.payload);
|
|
66
|
+
bigLatency = Date.now() - t0;
|
|
67
|
+
return {
|
|
68
|
+
response: bigResponse,
|
|
69
|
+
usedModel: args.bigModel,
|
|
70
|
+
cascadeStats: { accepted: false, reason: 'small_failed', smallLatency, bigLatency, totalLatency: Date.now() - start },
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const confidence = await confidenceScorer.score(smallResponse, {
|
|
75
|
+
taskType: args.taskType,
|
|
76
|
+
question: args.payload?.messages?.[args.payload.messages.length - 1]?.content,
|
|
77
|
+
judge: args.judge,
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
if (confidence >= threshold) {
|
|
81
|
+
return {
|
|
82
|
+
response: smallResponse,
|
|
83
|
+
usedModel: args.smallModel,
|
|
84
|
+
cascadeStats: { accepted: true, confidence, smallLatency, bigLatency: 0, totalLatency: Date.now() - start },
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Escalate
|
|
89
|
+
const t0 = Date.now();
|
|
90
|
+
const bigResponse = await args.invoke(args.bigModel.provider, args.bigModel.model, args.payload);
|
|
91
|
+
bigLatency = Date.now() - t0;
|
|
92
|
+
return {
|
|
93
|
+
response: bigResponse,
|
|
94
|
+
usedModel: args.bigModel,
|
|
95
|
+
cascadeStats: {
|
|
96
|
+
accepted: false,
|
|
97
|
+
confidence,
|
|
98
|
+
threshold,
|
|
99
|
+
smallLatency,
|
|
100
|
+
bigLatency,
|
|
101
|
+
totalLatency: Date.now() - start,
|
|
102
|
+
},
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
module.exports = { run, shouldCascade, isEnabled, DEFAULT_THRESHOLD };
|
|
@@ -395,24 +395,16 @@ function extractContent(payload) {
|
|
|
395
395
|
}
|
|
396
396
|
|
|
397
397
|
/**
|
|
398
|
-
* Estimate token count
|
|
398
|
+
* Estimate token count.
|
|
399
|
+
*
|
|
400
|
+
* Phase 1.1: delegates to the tiktoken-backed tokenizer (graceful fallback to
|
|
401
|
+
* chars/4 if js-tiktoken is unavailable).
|
|
399
402
|
*/
|
|
403
|
+
const { countPayloadTokens } = require('./tokenizer');
|
|
404
|
+
|
|
400
405
|
function estimateTokens(payload) {
|
|
401
406
|
if (!payload?.messages) return 0;
|
|
402
|
-
|
|
403
|
-
let totalChars = 0;
|
|
404
|
-
for (const msg of payload.messages) {
|
|
405
|
-
if (typeof msg.content === 'string') {
|
|
406
|
-
totalChars += msg.content.length;
|
|
407
|
-
} else if (Array.isArray(msg.content)) {
|
|
408
|
-
for (const block of msg.content) {
|
|
409
|
-
if (block?.text) totalChars += block.text.length;
|
|
410
|
-
}
|
|
411
|
-
}
|
|
412
|
-
}
|
|
413
|
-
|
|
414
|
-
// Rough approximation: 4 chars per token
|
|
415
|
-
return Math.ceil(totalChars / 4);
|
|
407
|
+
return countPayloadTokens(payload, payload?.model);
|
|
416
408
|
}
|
|
417
409
|
|
|
418
410
|
/**
|