lynkr 9.4.6 → 9.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -14
- package/install.sh +21 -5
- package/package.json +4 -2
- package/public/dashboard.html +13 -1
- package/scripts/check-native.js +97 -0
- package/src/clients/databricks.js +80 -3
- package/src/clients/openrouter-utils.js +15 -0
- package/src/config/index.js +9 -0
- package/src/context/caveman.js +94 -0
- package/src/context/tool-dedup.js +95 -0
- package/src/context/tool-result-compressor.js +106 -0
- package/src/dashboard/api.js +69 -18
- package/src/orchestrator/bypass.js +135 -0
- package/src/orchestrator/index.js +33 -2
- package/src/routing/index.js +39 -0
- package/src/routing/model-registry.js +89 -26
- package/src/routing/risk-analyzer.js +7 -2
- package/src/routing/session-affinity.js +96 -0
- package/src/routing/telemetry.js +16 -3
- package/.impeccable/live/config.json +0 -8
|
@@ -54,9 +54,41 @@ const DATABRICKS_FALLBACK = {
|
|
|
54
54
|
'databricks-bge-large-en': { input: 0.02, output: 0, context: 512 },
|
|
55
55
|
};
|
|
56
56
|
|
|
57
|
-
// Default cost for unknown models
|
|
57
|
+
// Default cost for unknown models. Returned with `unknown: true` so callers can
|
|
58
|
+
// distinguish a real price from a fabricated guess.
|
|
58
59
|
const DEFAULT_COST = { input: 1.0, output: 3.0, context: 128000 };
|
|
59
60
|
|
|
61
|
+
// Curated name aliases (exact, one-directional). Maps a name a caller might use
|
|
62
|
+
// to the canonical key likely present in the pricing data. Misses are harmless
|
|
63
|
+
// (resolution simply continues down the ladder).
|
|
64
|
+
const MODEL_ALIASES = {
|
|
65
|
+
'claude-sonnet-4-5': 'claude-sonnet-4-5-20250929',
|
|
66
|
+
'claude-opus-4-1': 'claude-opus-4-1-20250805',
|
|
67
|
+
'claude-3-5-sonnet': 'claude-3-5-sonnet-20241022',
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Parse MODEL_PRICE_OVERRIDES env (JSON object of
|
|
72
|
+
* { "<model>": { "input": <usd/1M>, "output": <usd/1M>, "context"?: N } }).
|
|
73
|
+
* Lets operators pin correct prices for models the registry doesn't know.
|
|
74
|
+
*/
|
|
75
|
+
function _loadOverrides() {
|
|
76
|
+
const out = new Map();
|
|
77
|
+
const raw = process.env.MODEL_PRICE_OVERRIDES;
|
|
78
|
+
if (!raw) return out;
|
|
79
|
+
try {
|
|
80
|
+
const parsed = JSON.parse(raw);
|
|
81
|
+
for (const [name, info] of Object.entries(parsed)) {
|
|
82
|
+
if (info && typeof info.input === 'number' && typeof info.output === 'number') {
|
|
83
|
+
out.set(name.toLowerCase(), { context: 128000, ...info });
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
} catch (err) {
|
|
87
|
+
logger.warn({ err: err.message }, '[ModelRegistry] Failed to parse MODEL_PRICE_OVERRIDES');
|
|
88
|
+
}
|
|
89
|
+
return out;
|
|
90
|
+
}
|
|
91
|
+
|
|
60
92
|
class ModelRegistry {
|
|
61
93
|
constructor() {
|
|
62
94
|
this.litellmPrices = {};
|
|
@@ -64,6 +96,7 @@ class ModelRegistry {
|
|
|
64
96
|
this.loaded = false;
|
|
65
97
|
this.lastFetch = 0;
|
|
66
98
|
this.modelIndex = new Map();
|
|
99
|
+
this.overrides = _loadOverrides();
|
|
67
100
|
}
|
|
68
101
|
|
|
69
102
|
/**
|
|
@@ -255,40 +288,70 @@ class ModelRegistry {
|
|
|
255
288
|
* @returns {Object} Cost info { input, output, context, ... }
|
|
256
289
|
*/
|
|
257
290
|
getCost(modelName) {
|
|
258
|
-
if (!modelName) return { ...DEFAULT_COST, source: 'default' };
|
|
291
|
+
if (!modelName) return { ...DEFAULT_COST, source: 'default', unknown: true };
|
|
259
292
|
|
|
260
|
-
const
|
|
293
|
+
const name = String(modelName).toLowerCase().trim();
|
|
294
|
+
const hit = this._resolveCost(name);
|
|
295
|
+
if (hit) return hit;
|
|
261
296
|
|
|
262
|
-
//
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
297
|
+
// Nothing matched — report unknown rather than silently fabricating a price.
|
|
298
|
+
logger.debug({ model: modelName }, '[ModelRegistry] Model not found — cost unknown');
|
|
299
|
+
return { ...DEFAULT_COST, source: 'default', unknown: true };
|
|
300
|
+
}
|
|
266
301
|
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
302
|
+
/**
|
|
303
|
+
* Deterministic price resolution. Each step is exact (no bidirectional
|
|
304
|
+
* substring matching), and the only loose step (longest-prefix) is
|
|
305
|
+
* one-directional and length-bounded, so unrelated names can't false-match.
|
|
306
|
+
* Returns a cost object with a `resolution` tag, or null if nothing matched.
|
|
307
|
+
* @param {string} name - already lowercased/trimmed
|
|
308
|
+
*/
|
|
309
|
+
_resolveCost(name) {
|
|
310
|
+
const tag = (value, resolution, matchedAs) => ({
|
|
311
|
+
...value,
|
|
312
|
+
resolution,
|
|
313
|
+
...(matchedAs && matchedAs !== name ? { matchedAs } : {}),
|
|
314
|
+
});
|
|
315
|
+
|
|
316
|
+
// 1. Operator overrides (exact) — ground truth.
|
|
317
|
+
if (this.overrides.has(name)) return tag({ ...this.overrides.get(name), source: 'override' }, 'override');
|
|
318
|
+
|
|
319
|
+
// 2. Exact registry hit.
|
|
320
|
+
if (this.modelIndex.has(name)) return tag(this.modelIndex.get(name), 'exact');
|
|
321
|
+
|
|
322
|
+
// 3. Provider-prefix strip (exact).
|
|
323
|
+
const stripped = [
|
|
324
|
+
name.replace(/^databricks-/, ''),
|
|
325
|
+
name.replace(/^azure\//, ''),
|
|
326
|
+
name.replace(/^bedrock\//, ''),
|
|
327
|
+
name.replace(/^anthropic\./, ''),
|
|
328
|
+
name.replace(/^openai\//, ''),
|
|
329
|
+
name.includes('/') ? name.split('/').pop() : null,
|
|
330
|
+
].filter((v) => v && v !== name);
|
|
331
|
+
for (const v of stripped) {
|
|
332
|
+
if (this.overrides.has(v)) return tag({ ...this.overrides.get(v), source: 'override' }, 'prefix-strip', v);
|
|
333
|
+
if (this.modelIndex.has(v)) return tag(this.modelIndex.get(v), 'prefix-strip', v);
|
|
281
334
|
}
|
|
282
335
|
|
|
283
|
-
//
|
|
336
|
+
// 4. Curated alias (exact).
|
|
337
|
+
const alias = MODEL_ALIASES[name];
|
|
338
|
+
if (alias && this.modelIndex.has(alias)) return tag(this.modelIndex.get(alias), 'alias', alias);
|
|
339
|
+
|
|
340
|
+
// 5. Date/version-suffix normalization (e.g. -20250929, -2025-09-29, -v2).
|
|
341
|
+
const dateless = name.replace(/[-@](\d{8}|\d{4}-\d{2}-\d{2}|v\d+)$/, '');
|
|
342
|
+
if (dateless !== name && this.modelIndex.has(dateless)) return tag(this.modelIndex.get(dateless), 'date-normalize', dateless);
|
|
343
|
+
|
|
344
|
+
// 6. Longest registry key that is a prefix of the requested name. Bounded so
|
|
345
|
+
// short keys can't grab unrelated names (e.g. "gpt-5.2-chat-2026" → "gpt-5.2-chat").
|
|
346
|
+
let best = null;
|
|
284
347
|
for (const [key, value] of this.modelIndex.entries()) {
|
|
285
|
-
if (key.
|
|
286
|
-
|
|
348
|
+
if (key.length >= 6 && name.startsWith(key) && (!best || key.length > best.key.length)) {
|
|
349
|
+
best = { key, value };
|
|
287
350
|
}
|
|
288
351
|
}
|
|
352
|
+
if (best) return tag(best.value, 'longest-prefix', best.key);
|
|
289
353
|
|
|
290
|
-
|
|
291
|
-
return { ...DEFAULT_COST, source: 'default' };
|
|
354
|
+
return null;
|
|
292
355
|
}
|
|
293
356
|
|
|
294
357
|
/**
|
|
@@ -13,13 +13,18 @@ const { extractContent } = require('./complexity-analyzer');
|
|
|
13
13
|
// Substring keywords found in file paths or instruction text.
|
|
14
14
|
// Matched case-insensitively as raw substrings, so "auth" hits
|
|
15
15
|
// "src/auth/login.ts" and "authentication".
|
|
16
|
+
// NOTE: keywords are matched as case-insensitive *substrings* against file
|
|
17
|
+
// paths, so overly generic terms cause false positives. 'session' and 'token'
|
|
18
|
+
// were removed because they match benign paths (src/sessions/*, tokenizer.js,
|
|
19
|
+
// token-budget.js) and were force-escalating ordinary requests to COMPLEX —
|
|
20
|
+
// real secrets/credentials are still covered by the keywords below.
|
|
16
21
|
const PROTECTED_PATH_KEYWORDS = [
|
|
17
|
-
'auth', 'oauth', 'jwt', '
|
|
22
|
+
'auth', 'oauth', 'jwt', 'security', 'permission', 'rbac',
|
|
18
23
|
'payment', 'payments', 'billing', 'invoice', 'subscription',
|
|
19
24
|
'migration', 'migrations', 'schema',
|
|
20
25
|
'infra', 'terraform', 'kustomize', 'helm', 'kubernetes',
|
|
21
26
|
'.github/workflows', '.env', 'secret', 'credential',
|
|
22
|
-
'api-key', 'api_key', 'apikey',
|
|
27
|
+
'api-key', 'api_key', 'apikey',
|
|
23
28
|
'webhook', 'admin',
|
|
24
29
|
];
|
|
25
30
|
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session → Provider Affinity
|
|
3
|
+
*
|
|
4
|
+
* A multi-turn agentic conversation builds up tool_use / tool_result history
|
|
5
|
+
* whose tool-call IDs are formatted for the provider that produced them. If a
|
|
6
|
+
* later turn re-routes to a *different* provider (because per-turn complexity
|
|
7
|
+
* or risk changed), that provider rejects the orphaned tool linkage:
|
|
8
|
+
*
|
|
9
|
+
* Azure: 400 "No tool call found for function call output with call_id …"
|
|
10
|
+
* Moonshot: 400 "Invalid request: tool_call_id is not found"
|
|
11
|
+
*
|
|
12
|
+
* To prevent that, once a session has chosen a provider we keep subsequent
|
|
13
|
+
* turns on it *while the payload carries tool history*. Fresh turns (no tool
|
|
14
|
+
* state) still route normally, so per-turn tier routing is preserved.
|
|
15
|
+
*
|
|
16
|
+
* @module routing/session-affinity
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
const MAX_ENTRIES = 2000;
|
|
20
|
+
const TTL_MS = 60 * 60 * 1000; // 1 hour
|
|
21
|
+
|
|
22
|
+
/** @type {Map<string, {provider:string, model:string|null, tier:string|null, ts:number}>} */
|
|
23
|
+
const pins = new Map();
|
|
24
|
+
|
|
25
|
+
function _evictIfNeeded() {
|
|
26
|
+
if (pins.size <= MAX_ENTRIES) return;
|
|
27
|
+
// Map preserves insertion order — drop the oldest.
|
|
28
|
+
const oldest = pins.keys().next().value;
|
|
29
|
+
if (oldest !== undefined) pins.delete(oldest);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* True when the payload contains an in-flight tool exchange — i.e. a prior
|
|
34
|
+
* assistant tool_use or a user tool_result. These are the turns whose
|
|
35
|
+
* tool-call IDs break if the provider changes.
|
|
36
|
+
* @param {object} payload
|
|
37
|
+
* @returns {boolean}
|
|
38
|
+
*/
|
|
39
|
+
function payloadHasToolHistory(payload) {
|
|
40
|
+
const messages = payload?.messages;
|
|
41
|
+
if (!Array.isArray(messages)) return false;
|
|
42
|
+
for (const msg of messages) {
|
|
43
|
+
const content = msg?.content;
|
|
44
|
+
if (!Array.isArray(content)) continue;
|
|
45
|
+
for (const block of content) {
|
|
46
|
+
const t = block?.type;
|
|
47
|
+
if (t === "tool_use" || t === "tool_result") return true;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return false;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Return the pinned routing decision for a session, or null if none / expired.
|
|
55
|
+
* @param {string} sessionId
|
|
56
|
+
*/
|
|
57
|
+
function getPinned(sessionId) {
|
|
58
|
+
if (!sessionId) return null;
|
|
59
|
+
const entry = pins.get(sessionId);
|
|
60
|
+
if (!entry) return null;
|
|
61
|
+
if (Date.now() - entry.ts > TTL_MS) {
|
|
62
|
+
pins.delete(sessionId);
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
return entry;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Record the provider a session routed to, for reuse on later tool-bearing turns.
|
|
70
|
+
* @param {string} sessionId
|
|
71
|
+
* @param {{provider:string, model?:string|null, tier?:string|null}} decision
|
|
72
|
+
*/
|
|
73
|
+
function setPinned(sessionId, decision) {
|
|
74
|
+
if (!sessionId || !decision?.provider) return;
|
|
75
|
+
// Refresh insertion order so active sessions aren't evicted.
|
|
76
|
+
pins.delete(sessionId);
|
|
77
|
+
pins.set(sessionId, {
|
|
78
|
+
provider: decision.provider,
|
|
79
|
+
model: decision.model ?? null,
|
|
80
|
+
tier: decision.tier ?? null,
|
|
81
|
+
ts: Date.now(),
|
|
82
|
+
});
|
|
83
|
+
_evictIfNeeded();
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/** Test/maintenance helper. */
|
|
87
|
+
function _clear() {
|
|
88
|
+
pins.clear();
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
module.exports = {
|
|
92
|
+
payloadHasToolHistory,
|
|
93
|
+
getPinned,
|
|
94
|
+
setPinned,
|
|
95
|
+
_clear,
|
|
96
|
+
};
|
package/src/routing/telemetry.js
CHANGED
|
@@ -94,7 +94,9 @@ function init() {
|
|
|
94
94
|
circuit_breaker_state TEXT,
|
|
95
95
|
quality_score REAL,
|
|
96
96
|
tokens_per_second REAL,
|
|
97
|
-
cost_efficiency REAL
|
|
97
|
+
cost_efficiency REAL,
|
|
98
|
+
request_text TEXT,
|
|
99
|
+
response_text TEXT
|
|
98
100
|
);
|
|
99
101
|
|
|
100
102
|
CREATE INDEX IF NOT EXISTS idx_telemetry_provider
|
|
@@ -110,6 +112,15 @@ function init() {
|
|
|
110
112
|
ON routing_telemetry(session_id, timestamp);
|
|
111
113
|
`);
|
|
112
114
|
|
|
115
|
+
// Migration: add columns to pre-existing tables (CREATE TABLE IF NOT EXISTS
|
|
116
|
+
// won't add them to a DB created before these columns existed).
|
|
117
|
+
const existingCols = new Set(db.prepare("PRAGMA table_info(routing_telemetry)").all().map((c) => c.name));
|
|
118
|
+
for (const col of ["request_text", "response_text"]) {
|
|
119
|
+
if (!existingCols.has(col)) {
|
|
120
|
+
db.exec(`ALTER TABLE routing_telemetry ADD COLUMN ${col} TEXT`);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
113
124
|
logger.info({ dbPath }, "Routing telemetry database initialised");
|
|
114
125
|
return true;
|
|
115
126
|
} catch (err) {
|
|
@@ -163,14 +174,14 @@ function record(data) {
|
|
|
163
174
|
provider, model, routing_method, was_fallback, output_tokens,
|
|
164
175
|
latency_ms, status_code, error_type, cost_usd, tool_calls_made,
|
|
165
176
|
retry_count, circuit_breaker_state, quality_score, tokens_per_second,
|
|
166
|
-
cost_efficiency
|
|
177
|
+
cost_efficiency, request_text, response_text
|
|
167
178
|
) VALUES (
|
|
168
179
|
@request_id, @session_id, @timestamp, @complexity_score, @tier,
|
|
169
180
|
@agentic_type, @tool_count, @input_tokens, @message_count, @request_type,
|
|
170
181
|
@provider, @model, @routing_method, @was_fallback, @output_tokens,
|
|
171
182
|
@latency_ms, @status_code, @error_type, @cost_usd, @tool_calls_made,
|
|
172
183
|
@retry_count, @circuit_breaker_state, @quality_score, @tokens_per_second,
|
|
173
|
-
@cost_efficiency
|
|
184
|
+
@cost_efficiency, @request_text, @response_text
|
|
174
185
|
)`
|
|
175
186
|
);
|
|
176
187
|
if (!insert) return;
|
|
@@ -201,6 +212,8 @@ function record(data) {
|
|
|
201
212
|
quality_score: data.quality_score ?? null,
|
|
202
213
|
tokens_per_second: data.tokens_per_second ?? null,
|
|
203
214
|
cost_efficiency: data.cost_efficiency ?? null,
|
|
215
|
+
request_text: data.request_text ?? null,
|
|
216
|
+
response_text: data.response_text ?? null,
|
|
204
217
|
});
|
|
205
218
|
} catch (err) {
|
|
206
219
|
logger.debug({ err: err.message }, "Telemetry record failed");
|