lynkr 9.0.2 → 9.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -10
- package/bin/cli.js +18 -1
- package/bin/lynkr-trajectory.js +136 -0
- package/bin/lynkr-usage.js +219 -0
- package/funding.json +110 -0
- package/package.json +4 -2
- package/public/dashboard.html +665 -0
- package/scripts/build-knn-index.js +130 -0
- package/scripts/calibrate-thresholds.js +197 -0
- package/scripts/compare-policies.js +67 -0
- package/scripts/learn-output-ratios.js +162 -0
- package/scripts/refresh-pricing.js +122 -0
- package/scripts/run-routerarena.js +26 -0
- package/scripts/sample-regret.js +84 -0
- package/scripts/train-risk-classifier.js +191 -0
- package/src/api/files-router.js +6 -6
- package/src/api/middleware/budget-enforcer.js +60 -0
- package/src/api/middleware/budget.js +19 -1
- package/src/api/middleware/load-shedding.js +17 -0
- package/src/api/middleware/tenant.js +21 -0
- package/src/api/openai-router.js +1 -1
- package/src/api/router.js +204 -87
- package/src/budget/hierarchical-budget.js +159 -0
- package/src/cache/semantic.js +28 -2
- package/src/clients/databricks.js +68 -10
- package/src/clients/openai-format.js +31 -5
- package/src/config/index.js +246 -43
- package/src/context/toon.js +5 -4
- package/src/dashboard/api.js +170 -0
- package/src/dashboard/router.js +13 -0
- package/src/headroom/client.js +3 -109
- package/src/headroom/index.js +0 -14
- package/src/memory/search.js +0 -50
- package/src/orchestrator/index.js +106 -11
- package/src/orchestrator/preflight.js +188 -0
- package/src/prompts/system.js +34 -6
- package/src/routing/bandit.js +246 -0
- package/src/routing/cascade.js +106 -0
- package/src/routing/complexity-analyzer.js +7 -15
- package/src/routing/confidence-scorer.js +121 -0
- package/src/routing/context-validator.js +71 -0
- package/src/routing/cost-optimizer.js +5 -2
- package/src/routing/deadline.js +52 -0
- package/src/routing/drift-monitor.js +113 -0
- package/src/routing/embedding-cache.js +77 -0
- package/src/routing/index.js +374 -4
- package/src/routing/interaction.js +183 -0
- package/src/routing/knn-router.js +206 -0
- package/src/routing/latency-tracker.js +113 -71
- package/src/routing/model-tiers.js +156 -6
- package/src/routing/output-ratios.js +57 -0
- package/src/routing/regret-estimator.js +91 -0
- package/src/routing/reward-pipeline.js +62 -0
- package/src/routing/risk-analyzer.js +194 -0
- package/src/routing/risk-classifier.js +130 -0
- package/src/routing/shadow-mode.js +77 -0
- package/src/routing/telemetry.js +7 -0
- package/src/routing/tenant-policy.js +96 -0
- package/src/routing/tokenizer.js +162 -0
- package/src/server.js +12 -0
- package/src/stores/file-store.js +42 -7
- package/src/tools/smart-selection.js +11 -2
- package/src/training/trajectory-compressor.js +266 -0
- package/src/usage/aggregator.js +206 -0
- package/src/utils/markdown-ansi.js +146 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
const config = require('../config');
|
|
2
|
+
const telemetry = require('../routing/telemetry');
|
|
3
|
+
const { getUsage } = require('../usage/aggregator');
|
|
4
|
+
const metrics = require('../metrics');
|
|
5
|
+
const { getMetricsCollector } = require('../observability/metrics');
|
|
6
|
+
const { TIER_DEFINITIONS } = require('../routing/model-tiers');
|
|
7
|
+
|
|
8
|
+
function getConfiguredProviders() {
|
|
9
|
+
const c = config;
|
|
10
|
+
const providers = [];
|
|
11
|
+
const add = (name, type, ok) => ok && providers.push({ name, type });
|
|
12
|
+
|
|
13
|
+
add('databricks', 'cloud', c.databricks?.url && c.databricks?.apiKey);
|
|
14
|
+
add('azure-anthropic','cloud', c.azureAnthropic?.endpoint && c.azureAnthropic?.apiKey);
|
|
15
|
+
add('bedrock', 'cloud', c.bedrock?.apiKey);
|
|
16
|
+
add('openrouter', 'cloud', c.openrouter?.apiKey);
|
|
17
|
+
add('openai', 'cloud', c.openai?.apiKey);
|
|
18
|
+
add('azure-openai', 'cloud', c.azureOpenAI?.endpoint && c.azureOpenAI?.apiKey);
|
|
19
|
+
add('vertex', 'cloud', c.vertex?.projectId);
|
|
20
|
+
add('moonshot', 'cloud', c.moonshot?.apiKey);
|
|
21
|
+
add('ollama', 'local', c.ollama?.endpoint);
|
|
22
|
+
add('llamacpp', 'local', c.llamacpp?.endpoint);
|
|
23
|
+
add('lmstudio', 'local', c.lmstudio?.endpoint);
|
|
24
|
+
|
|
25
|
+
return providers;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Noise provider names injected by unit tests — filter them out of UI
|
|
29
|
+
const TEST_PROVIDER_RE = /^(accuracy-|stats-|provider-stats-|roundtrip-|latency-)/;
|
|
30
|
+
|
|
31
|
+
// Find the widest window that has at least one row, so the UI never shows
|
|
32
|
+
// empty panels just because there were no requests in the last 24 hours.
|
|
33
|
+
function findActiveWindow() {
|
|
34
|
+
const newest = telemetry.query({ limit: 1 });
|
|
35
|
+
if (!newest.length) return { since: Date.now() - 86400000, label: '24h' };
|
|
36
|
+
|
|
37
|
+
const ageMs = Date.now() - newest[0].timestamp;
|
|
38
|
+
if (ageMs <= 86400000) return { since: Date.now() - 86400000, label: '24h' };
|
|
39
|
+
if (ageMs <= 7*86400000) return { since: Date.now() - 7*86400000, label: '7d' };
|
|
40
|
+
if (ageMs <= 30*86400000) return { since: Date.now() - 30*86400000, label: '30d' };
|
|
41
|
+
return { since: 0, label: 'all time' };
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function getCircuitBreakerStates() {
|
|
45
|
+
try {
|
|
46
|
+
const { getCircuitBreakerRegistry } = require('../clients/circuit-breaker');
|
|
47
|
+
const reg = getCircuitBreakerRegistry();
|
|
48
|
+
return reg.getAll();
|
|
49
|
+
} catch {
|
|
50
|
+
return {};
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Group telemetry rows by calendar day (UTC), returning last `days` buckets
|
|
55
|
+
function dailyBreakdown(rows, days = 7) {
|
|
56
|
+
const now = Date.now();
|
|
57
|
+
const DAY = 86400000;
|
|
58
|
+
const result = [];
|
|
59
|
+
|
|
60
|
+
for (let i = days - 1; i >= 0; i--) {
|
|
61
|
+
const start = now - (i + 1) * DAY;
|
|
62
|
+
const end = now - i * DAY;
|
|
63
|
+
const bucket = rows.filter(r => r.timestamp >= start && r.timestamp < end);
|
|
64
|
+
|
|
65
|
+
const byTier = {};
|
|
66
|
+
let cost = 0;
|
|
67
|
+
for (const r of bucket) {
|
|
68
|
+
const t = r.tier || 'UNKNOWN';
|
|
69
|
+
byTier[t] = (byTier[t] || 0) + 1;
|
|
70
|
+
cost += Number(r.cost_usd) || 0;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
result.push({
|
|
74
|
+
label: new Date(start).toLocaleDateString('en-US', { month: 'short', day: 'numeric' }),
|
|
75
|
+
total: bucket.length,
|
|
76
|
+
byTier,
|
|
77
|
+
cost: Math.round(cost * 10000) / 10000,
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
return result;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function overview(req, res) {
|
|
84
|
+
const win = findActiveWindow();
|
|
85
|
+
const todayUsage = getUsage({ window: win.label === '24h' ? '1d' : win.label === 'all time' ? 'all' : win.label });
|
|
86
|
+
const recentRows = telemetry.query({ limit: 10 });
|
|
87
|
+
const todayStats = telemetry.getStats({ since: win.since });
|
|
88
|
+
const snap = metrics.snapshot();
|
|
89
|
+
|
|
90
|
+
res.json({
|
|
91
|
+
uptime: Math.floor(process.uptime()),
|
|
92
|
+
port: config.port,
|
|
93
|
+
version: process.env.npm_package_version || '9.0.2',
|
|
94
|
+
modelProvider: config.modelProvider?.type || 'unknown',
|
|
95
|
+
providers: getConfiguredProviders(),
|
|
96
|
+
statsWindow: win.label,
|
|
97
|
+
metrics: {
|
|
98
|
+
requestsTotal: snap.requestsTotal,
|
|
99
|
+
responsesSuccess: snap.responses?.success || 0,
|
|
100
|
+
responsesError: snap.responses?.error || 0,
|
|
101
|
+
},
|
|
102
|
+
today: {
|
|
103
|
+
requests: todayUsage.totals?.requests || 0,
|
|
104
|
+
totalTokens: todayUsage.totals?.totalTokens || 0,
|
|
105
|
+
cost: todayUsage.totals?.actualCost || 0,
|
|
106
|
+
saved: todayUsage.totals?.saved || 0,
|
|
107
|
+
savedPercent: todayUsage.totals?.savedPercent || 0,
|
|
108
|
+
},
|
|
109
|
+
stats: todayStats,
|
|
110
|
+
recentRequests: recentRows,
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function usage(req, res) {
|
|
115
|
+
const window = req.query.window || '7d';
|
|
116
|
+
const provider = req.query.provider || undefined;
|
|
117
|
+
const model = req.query.model || undefined;
|
|
118
|
+
|
|
119
|
+
const data = getUsage({ window, provider, model });
|
|
120
|
+
|
|
121
|
+
// Add daily breakdown for chart (last 7 or 30 days depending on window)
|
|
122
|
+
const days = window === '1d' ? 1 : window === '30d' ? 30 : 7;
|
|
123
|
+
const since = window === 'all' ? 0 : Date.now() - days * 86400000;
|
|
124
|
+
const rawRows = since > 0
|
|
125
|
+
? telemetry.query({ since, limit: 50000 })
|
|
126
|
+
: telemetry.query({ limit: 50000 });
|
|
127
|
+
|
|
128
|
+
data.daily = dailyBreakdown(rawRows, Math.min(days, 30));
|
|
129
|
+
|
|
130
|
+
res.json(data);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function routing(req, res) {
|
|
134
|
+
const win = findActiveWindow();
|
|
135
|
+
const { since } = win;
|
|
136
|
+
|
|
137
|
+
const accuracy = telemetry.getRoutingAccuracy({ since });
|
|
138
|
+
const stats = telemetry.getStats({ since });
|
|
139
|
+
const cbStates = getCircuitBreakerStates();
|
|
140
|
+
|
|
141
|
+
// Derive providers from actual DB rows — never miss a provider not in config
|
|
142
|
+
const dbRows = telemetry.query({ limit: 100000, since });
|
|
143
|
+
const dbProviders = [...new Set(
|
|
144
|
+
dbRows.map(r => r.provider).filter(p => p && !TEST_PROVIDER_RE.test(p))
|
|
145
|
+
)];
|
|
146
|
+
|
|
147
|
+
const providerStats = {};
|
|
148
|
+
for (const p of dbProviders) {
|
|
149
|
+
const s = telemetry.getProviderStats(p, { since });
|
|
150
|
+
if (s) providerStats[p] = s;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
res.json({ tierDefinitions: TIER_DEFINITIONS, accuracy, stats, providerStats, circuitBreakers: cbStates, window: win.label });
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
function logs(req, res) {
|
|
157
|
+
const limit = Math.min(parseInt(req.query.limit || '100', 10), 500);
|
|
158
|
+
const filters = { limit };
|
|
159
|
+
|
|
160
|
+
if (req.query.provider) filters.provider = req.query.provider;
|
|
161
|
+
if (req.query.tier) filters.tier = req.query.tier;
|
|
162
|
+
if (req.query.since) filters.since = parseInt(req.query.since, 10);
|
|
163
|
+
|
|
164
|
+
let rows = telemetry.query(filters);
|
|
165
|
+
if (req.query.error === 'true') rows = rows.filter(r => r.error_type);
|
|
166
|
+
|
|
167
|
+
res.json(rows);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
module.exports = { overview, usage, routing, logs };
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
const express = require('express');
|
|
2
|
+
const path = require('path');
|
|
3
|
+
const api = require('./api');
|
|
4
|
+
|
|
5
|
+
const router = express.Router();
|
|
6
|
+
|
|
7
|
+
router.get('/', (_req, res) => res.sendFile(path.join(__dirname, '../../public/dashboard.html')));
|
|
8
|
+
router.get('/api/overview', api.overview);
|
|
9
|
+
router.get('/api/usage', api.usage);
|
|
10
|
+
router.get('/api/routing', api.routing);
|
|
11
|
+
router.get('/api/logs', api.logs);
|
|
12
|
+
|
|
13
|
+
module.exports = router;
|
package/src/headroom/client.js
CHANGED
|
@@ -58,6 +58,7 @@ async function checkHealth() {
|
|
|
58
58
|
return {
|
|
59
59
|
available: data.headroom_loaded === true,
|
|
60
60
|
status: data.status,
|
|
61
|
+
version: data.headroom_version,
|
|
61
62
|
ccrEnabled: data.ccr_enabled,
|
|
62
63
|
llmlinguaEnabled: data.llmlingua_enabled,
|
|
63
64
|
entriesCached: data.entries_cached,
|
|
@@ -154,8 +155,10 @@ async function compressMessages(messages, tools = [], options = {}) {
|
|
|
154
155
|
tokensBefore: result.stats?.tokens_before,
|
|
155
156
|
tokensAfter: result.stats?.tokens_after,
|
|
156
157
|
savingsPercent: result.stats?.savings_percent,
|
|
158
|
+
compressionRatio: result.stats?.compression_ratio,
|
|
157
159
|
latencyMs: result.stats?.latency_ms,
|
|
158
160
|
transforms: result.stats?.transforms_applied,
|
|
161
|
+
headroomVersion: result.stats?.headroom_version,
|
|
159
162
|
},
|
|
160
163
|
"Headroom compression applied"
|
|
161
164
|
);
|
|
@@ -245,112 +248,6 @@ async function ccrRetrieve(hash, query = null, maxResults = 20) {
|
|
|
245
248
|
}
|
|
246
249
|
}
|
|
247
250
|
|
|
248
|
-
/**
|
|
249
|
-
* Track compression for proactive CCR expansion
|
|
250
|
-
*/
|
|
251
|
-
async function ccrTrack(hashKey, turnNumber, toolName, sample) {
|
|
252
|
-
const headroomConfig = getConfig();
|
|
253
|
-
|
|
254
|
-
if (!isEnabled()) {
|
|
255
|
-
return { tracked: false };
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
try {
|
|
259
|
-
const params = new URLSearchParams({
|
|
260
|
-
hash_key: hashKey,
|
|
261
|
-
turn_number: String(turnNumber),
|
|
262
|
-
tool_name: toolName,
|
|
263
|
-
sample: sample.substring(0, 500),
|
|
264
|
-
});
|
|
265
|
-
|
|
266
|
-
const response = await fetch(`${headroomConfig.endpoint}/ccr/track?${params}`, {
|
|
267
|
-
method: "POST",
|
|
268
|
-
signal: AbortSignal.timeout(2000),
|
|
269
|
-
});
|
|
270
|
-
|
|
271
|
-
if (response.ok) {
|
|
272
|
-
return await response.json();
|
|
273
|
-
}
|
|
274
|
-
return { tracked: false };
|
|
275
|
-
} catch (err) {
|
|
276
|
-
logger.debug({ error: err.message }, "CCR tracking failed");
|
|
277
|
-
return { tracked: false };
|
|
278
|
-
}
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
/**
|
|
282
|
-
* Analyze query for proactive CCR expansion
|
|
283
|
-
*/
|
|
284
|
-
async function ccrAnalyze(query, turnNumber) {
|
|
285
|
-
const headroomConfig = getConfig();
|
|
286
|
-
|
|
287
|
-
if (!isEnabled()) {
|
|
288
|
-
return { expansions: [] };
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
try {
|
|
292
|
-
const response = await fetch(`${headroomConfig.endpoint}/ccr/analyze`, {
|
|
293
|
-
method: "POST",
|
|
294
|
-
headers: { "Content-Type": "application/json" },
|
|
295
|
-
body: JSON.stringify({ query, turn_number: turnNumber }),
|
|
296
|
-
signal: AbortSignal.timeout(2000),
|
|
297
|
-
});
|
|
298
|
-
|
|
299
|
-
if (response.ok) {
|
|
300
|
-
return await response.json();
|
|
301
|
-
}
|
|
302
|
-
return { expansions: [] };
|
|
303
|
-
} catch (err) {
|
|
304
|
-
logger.debug({ error: err.message }, "CCR analysis failed");
|
|
305
|
-
return { expansions: [] };
|
|
306
|
-
}
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
/**
|
|
310
|
-
* Compress text using LLMLingua-2 ML compression
|
|
311
|
-
* (Optional - requires LLMLingua enabled in sidecar)
|
|
312
|
-
*/
|
|
313
|
-
async function llmlinguaCompress(text, targetRatio = 0.5, forceTokens = null) {
|
|
314
|
-
const headroomConfig = getConfig();
|
|
315
|
-
|
|
316
|
-
if (!isEnabled()) {
|
|
317
|
-
return { success: false, error: "Headroom disabled" };
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
try {
|
|
321
|
-
const params = new URLSearchParams({
|
|
322
|
-
text,
|
|
323
|
-
target_ratio: String(targetRatio),
|
|
324
|
-
});
|
|
325
|
-
|
|
326
|
-
if (forceTokens && Array.isArray(forceTokens)) {
|
|
327
|
-
params.append("force_tokens", JSON.stringify(forceTokens));
|
|
328
|
-
}
|
|
329
|
-
|
|
330
|
-
const response = await fetch(`${headroomConfig.endpoint}/compress/llmlingua?${params}`, {
|
|
331
|
-
method: "POST",
|
|
332
|
-
signal: AbortSignal.timeout(30000), // LLMLingua can be slow
|
|
333
|
-
});
|
|
334
|
-
|
|
335
|
-
if (!response.ok) {
|
|
336
|
-
const error = await response.text();
|
|
337
|
-
return { success: false, error };
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
const result = await response.json();
|
|
341
|
-
return {
|
|
342
|
-
success: true,
|
|
343
|
-
compressed: result.compressed,
|
|
344
|
-
originalTokens: result.original_tokens,
|
|
345
|
-
compressedTokens: result.compressed_tokens,
|
|
346
|
-
ratio: result.ratio,
|
|
347
|
-
};
|
|
348
|
-
} catch (err) {
|
|
349
|
-
logger.error({ error: err.message }, "LLMLingua compression failed");
|
|
350
|
-
return { success: false, error: err.message };
|
|
351
|
-
}
|
|
352
|
-
}
|
|
353
|
-
|
|
354
251
|
/**
|
|
355
252
|
* Get client-side metrics
|
|
356
253
|
*/
|
|
@@ -424,9 +321,6 @@ module.exports = {
|
|
|
424
321
|
checkHealth,
|
|
425
322
|
compressMessages,
|
|
426
323
|
ccrRetrieve,
|
|
427
|
-
ccrTrack,
|
|
428
|
-
ccrAnalyze,
|
|
429
|
-
llmlinguaCompress,
|
|
430
324
|
getMetrics,
|
|
431
325
|
getServerMetrics,
|
|
432
326
|
getCombinedMetrics,
|
package/src/headroom/index.js
CHANGED
|
@@ -125,20 +125,6 @@ class HeadroomManager {
|
|
|
125
125
|
return client.ccrRetrieve(hash, query, maxResults);
|
|
126
126
|
}
|
|
127
127
|
|
|
128
|
-
/**
|
|
129
|
-
* Track compression for proactive expansion
|
|
130
|
-
*/
|
|
131
|
-
async ccrTrack(hashKey, turnNumber, toolName, sample) {
|
|
132
|
-
return client.ccrTrack(hashKey, turnNumber, toolName, sample);
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
/**
|
|
136
|
-
* Analyze query for proactive CCR expansion
|
|
137
|
-
*/
|
|
138
|
-
async ccrAnalyze(query, turnNumber) {
|
|
139
|
-
return client.ccrAnalyze(query, turnNumber);
|
|
140
|
-
}
|
|
141
|
-
|
|
142
128
|
/**
|
|
143
129
|
* Check if Headroom is enabled
|
|
144
130
|
*/
|
package/src/memory/search.js
CHANGED
|
@@ -258,58 +258,8 @@ function searchMemories(options) {
|
|
|
258
258
|
}
|
|
259
259
|
}
|
|
260
260
|
|
|
261
|
-
/**
|
|
262
|
-
|
|
263
|
-
* Search with keyword expansion (UPDATED - now uses sanitized keywords)
|
|
264
|
-
=======
|
|
265
|
-
* Prepare FTS5 query - handle special characters and phrases
|
|
266
|
-
*/
|
|
267
|
-
function prepareFTS5Query(query) {
|
|
268
|
-
// FTS5 special characters: " * ( ) < > - : AND OR NOT
|
|
269
|
-
// Strategy: Strip XML/HTML tags, then sanitize remaining text
|
|
270
|
-
let cleaned = query.trim();
|
|
271
|
-
|
|
272
|
-
// Step 1: Remove XML/HTML tags (common in error messages)
|
|
273
|
-
// Matches: <tag>, </tag>, <tag attr="value">
|
|
274
|
-
cleaned = cleaned.replace(/<[^>]+>/g, ' ');
|
|
275
|
-
|
|
276
|
-
// Step 2: Remove excess whitespace from tag removal
|
|
277
|
-
cleaned = cleaned.replace(/\s+/g, ' ').trim();
|
|
278
|
-
|
|
279
|
-
if (!cleaned) {
|
|
280
|
-
// Query was all tags, return safe fallback
|
|
281
|
-
return '"empty query"';
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
// Step 3: Check if query contains FTS5 operators (AND, OR, NOT)
|
|
285
|
-
const hasFTS5Operators = /\b(AND|OR|NOT)\b/i.test(cleaned);
|
|
286
|
-
|
|
287
|
-
// Step 4: ENHANCED - Remove ALL special characters that could break FTS5
|
|
288
|
-
// Keep only: letters, numbers, spaces
|
|
289
|
-
// Remove: * ( ) < > - : [ ] | , + = ? ! ; / \ @ # $ % ^ & { }
|
|
290
|
-
cleaned = cleaned.replace(/[*()<>\-:\[\]|,+=?!;\/\\@#$%^&{}]/g, ' ');
|
|
291
|
-
cleaned = cleaned.replace(/\s+/g, ' ').trim();
|
|
292
|
-
|
|
293
|
-
// Step 5: Escape double quotes (FTS5 uses "" for literal quote)
|
|
294
|
-
cleaned = cleaned.replace(/"/g, '""');
|
|
295
|
-
|
|
296
|
-
// Step 6: Additional safety - remove any remaining non-alphanumeric except spaces
|
|
297
|
-
cleaned = cleaned.replace(/[^\w\s""]/g, ' ');
|
|
298
|
-
cleaned = cleaned.replace(/\s+/g, ' ').trim();
|
|
299
|
-
|
|
300
|
-
// Step 7: Wrap in quotes for phrase search (safest approach)
|
|
301
|
-
if (!hasFTS5Operators) {
|
|
302
|
-
// Treat as literal phrase search
|
|
303
|
-
cleaned = `"${cleaned}"`;
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
// If query has FTS5 operators, let FTS5 parse them (advanced users)
|
|
307
|
-
return cleaned;
|
|
308
|
-
}
|
|
309
|
-
|
|
310
261
|
/**
|
|
311
262
|
* Search with keyword expansion (extract key terms)
|
|
312
|
-
|
|
313
263
|
*/
|
|
314
264
|
function searchWithExpansion(options) {
|
|
315
265
|
const { query, limit = 10 } = options;
|
|
@@ -17,6 +17,7 @@ const { compressMessages: headroomCompress, isEnabled: isHeadroomEnabled } = req
|
|
|
17
17
|
const { createAuditLogger } = require("../logger/audit-logger");
|
|
18
18
|
const { getResolvedIp, runWithDnsContext } = require("../clients/dns-logger");
|
|
19
19
|
const { getShuttingDown } = require("../api/health");
|
|
20
|
+
const { tryPreflight, buildSatisfiedResponse: buildPreflightResponse } = require("./preflight");
|
|
20
21
|
const crypto = require("crypto");
|
|
21
22
|
const { asyncClone, asyncTransform, getPoolStats } = require("../workers/helpers");
|
|
22
23
|
const { getSemanticCache, isSemanticCacheEnabled } = require("../cache/semantic");
|
|
@@ -1100,7 +1101,7 @@ function toAnthropicResponse(openai, requestedModel, wantsThinking) {
|
|
|
1100
1101
|
};
|
|
1101
1102
|
}
|
|
1102
1103
|
|
|
1103
|
-
function sanitizePayload(payload) {
|
|
1104
|
+
async function sanitizePayload(payload) {
|
|
1104
1105
|
const { clonePayloadSmart } = require("../utils/payload");
|
|
1105
1106
|
const providerType = config.modelProvider?.type ?? "databricks";
|
|
1106
1107
|
const willFlatten = providerType !== "azure-anthropic";
|
|
@@ -1383,7 +1384,9 @@ function sanitizePayload(payload) {
|
|
|
1383
1384
|
clean.tools = selectedTools.length > 0 ? selectedTools : undefined;
|
|
1384
1385
|
}
|
|
1385
1386
|
|
|
1386
|
-
|
|
1387
|
+
// Always false: the agent loop needs buffered JSON to parse tool calls.
|
|
1388
|
+
// Lynkr synthesises SSE back to the client from the buffered response.
|
|
1389
|
+
clean.stream = false;
|
|
1387
1390
|
|
|
1388
1391
|
if (
|
|
1389
1392
|
config.modelProvider?.type === "azure-anthropic" &&
|
|
@@ -1415,7 +1418,7 @@ function sanitizePayload(payload) {
|
|
|
1415
1418
|
|
|
1416
1419
|
// Optional TOON conversion for large JSON message payloads (prompt context only).
|
|
1417
1420
|
// Run this BEFORE message coalescing to preserve parseable JSON boundaries.
|
|
1418
|
-
applyToonCompression(clean, config.toon, { logger });
|
|
1421
|
+
await applyToonCompression(clean, config.toon, { logger });
|
|
1419
1422
|
|
|
1420
1423
|
// FIX: Handle consecutive messages with the same role (causes llama.cpp 400 error)
|
|
1421
1424
|
// Strategy: Merge consecutive same-role messages, but NEVER merge messages
|
|
@@ -1526,12 +1529,35 @@ function getToolCallSignature(toolCall) {
|
|
|
1526
1529
|
}
|
|
1527
1530
|
|
|
1528
1531
|
function buildNonJsonResponse(databricksResponse) {
|
|
1532
|
+
// Convert plain text response to Anthropic message format
|
|
1533
|
+
// so SSE handler can properly render it
|
|
1534
|
+
const textContent = databricksResponse.text || "";
|
|
1535
|
+
|
|
1529
1536
|
return {
|
|
1530
1537
|
status: databricksResponse.status,
|
|
1531
1538
|
headers: {
|
|
1532
|
-
"Content-Type":
|
|
1539
|
+
"Content-Type": "application/json", // Changed from text/plain
|
|
1540
|
+
},
|
|
1541
|
+
body: {
|
|
1542
|
+
id: `msg_${Date.now()}`,
|
|
1543
|
+
type: "message",
|
|
1544
|
+
role: "assistant",
|
|
1545
|
+
model: "unknown",
|
|
1546
|
+
content: [
|
|
1547
|
+
{
|
|
1548
|
+
type: "text",
|
|
1549
|
+
text: textContent
|
|
1550
|
+
}
|
|
1551
|
+
],
|
|
1552
|
+
stop_reason: "end_turn",
|
|
1553
|
+
stop_sequence: null,
|
|
1554
|
+
usage: {
|
|
1555
|
+
input_tokens: 0,
|
|
1556
|
+
output_tokens: 0,
|
|
1557
|
+
cache_creation_input_tokens: 0,
|
|
1558
|
+
cache_read_input_tokens: 0,
|
|
1559
|
+
}
|
|
1533
1560
|
},
|
|
1534
|
-
body: databricksResponse.text,
|
|
1535
1561
|
terminationReason: "non_json_response",
|
|
1536
1562
|
};
|
|
1537
1563
|
}
|
|
@@ -1799,9 +1825,11 @@ async function runAgentLoop({
|
|
|
1799
1825
|
}
|
|
1800
1826
|
}
|
|
1801
1827
|
|
|
1802
|
-
|
|
1803
|
-
//
|
|
1804
|
-
|
|
1828
|
+
const hasRequestTools = Array.isArray(cleanPayload.tools) && cleanPayload.tools.length > 0;
|
|
1829
|
+
// Inject tool termination instructions for non-Claude models — only when tools
|
|
1830
|
+
// are actually in the request. Injecting when there are no tools confuses models
|
|
1831
|
+
// like MiniMax into hallucinating tool_use blocks spontaneously.
|
|
1832
|
+
if (steps === 1 && hasRequestTools && providerType !== 'databricks' && providerType !== 'azure-anthropic') {
|
|
1805
1833
|
const toolTerminationInstruction = `
|
|
1806
1834
|
|
|
1807
1835
|
IMPORTANT TOOL USAGE RULES:
|
|
@@ -1815,6 +1843,13 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
1815
1843
|
logger.debug({ sessionId: session?.id ?? null }, 'Tool termination instructions injected for non-Claude model');
|
|
1816
1844
|
}
|
|
1817
1845
|
|
|
1846
|
+
// When no tools are in the request, explicitly forbid tool_use output for
|
|
1847
|
+
// Ollama models that have been trained on Claude Code data and tend to emit
|
|
1848
|
+
// tool_use blocks spontaneously (e.g. minimax-m2.5:cloud calling Write).
|
|
1849
|
+
if (steps === 1 && !hasRequestTools && providerType === 'ollama') {
|
|
1850
|
+
cleanPayload.system = (cleanPayload.system || '') + '\n\nCRITICAL: You have NO tools available. Do NOT generate tool_use, function_call, or code_execution blocks. Output ONLY text content directly.';
|
|
1851
|
+
}
|
|
1852
|
+
|
|
1818
1853
|
// Compute model-aware token budget thresholds
|
|
1819
1854
|
const registry = getModelRegistrySync();
|
|
1820
1855
|
const modelInfo = registry.getCost(requestedModel);
|
|
@@ -1954,6 +1989,17 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
1954
1989
|
cleanPayload._workspace = headers["x-lynkr-workspace"];
|
|
1955
1990
|
}
|
|
1956
1991
|
|
|
1992
|
+
// Phase 6.3 — thread deadline for latency-aware routing.
|
|
1993
|
+
if (headers?.["lynkr-deadline-ms"]) {
|
|
1994
|
+
const dl = parseInt(headers["lynkr-deadline-ms"], 10);
|
|
1995
|
+
if (!isNaN(dl) && dl > 0) cleanPayload._deadlineMs = dl;
|
|
1996
|
+
}
|
|
1997
|
+
|
|
1998
|
+
// Phase 6.1 — thread tenant policy for per-tenant routing overrides.
|
|
1999
|
+
if (options?.tenantPolicy) {
|
|
2000
|
+
cleanPayload._tenantPolicy = options.tenantPolicy;
|
|
2001
|
+
}
|
|
2002
|
+
|
|
1957
2003
|
// RTK-inspired tool result compression: compress large tool_results
|
|
1958
2004
|
// before they reach the model (saves 60-90% on test/git/lint output)
|
|
1959
2005
|
if (config.toolResultCompression?.enabled !== false) {
|
|
@@ -2210,7 +2256,30 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2210
2256
|
noToolInjection: !!cleanPayload._noToolInjection,
|
|
2211
2257
|
}, "Dropped hallucinated tool calls (no tools were sent to model)");
|
|
2212
2258
|
toolCalls = [];
|
|
2213
|
-
|
|
2259
|
+
|
|
2260
|
+
// Check if there is any text content alongside the hallucinated tool calls.
|
|
2261
|
+
// If not, the response is effectively empty. Inject a redirect message so the
|
|
2262
|
+
// model outputs the artifact directly instead of looping tool-call attempts.
|
|
2263
|
+
const hasTextContent = isAnthropicFormat
|
|
2264
|
+
? (databricksResponse.json?.content ?? []).some(b => b?.type === "text" && String(b.text || "").trim().length > 0)
|
|
2265
|
+
: (typeof message.content === "string" && message.content.trim().length > 0);
|
|
2266
|
+
|
|
2267
|
+
if (!hasTextContent && steps < settings.maxSteps - 1) {
|
|
2268
|
+
logger.info({
|
|
2269
|
+
sessionId: session?.id ?? null,
|
|
2270
|
+
step: steps,
|
|
2271
|
+
}, "Hallucinated tool calls with no text content — injecting redirect to force direct output");
|
|
2272
|
+
|
|
2273
|
+
// Push a phantom assistant turn (thinking only, no tool_use) then a user
|
|
2274
|
+
// redirect message so the model outputs the artifact directly.
|
|
2275
|
+
const redirectUser = {
|
|
2276
|
+
role: "user",
|
|
2277
|
+
content: "You don't have any tools available in this context. Please output the result directly as an <artifact identifier=\"design.html\" type=\"text/html\" title=\"Design\"> block containing complete HTML. Do not attempt to call any tools.",
|
|
2278
|
+
};
|
|
2279
|
+
cleanPayload.messages.push(redirectUser);
|
|
2280
|
+
steps++;
|
|
2281
|
+
continue;
|
|
2282
|
+
}
|
|
2214
2283
|
}
|
|
2215
2284
|
|
|
2216
2285
|
if (toolCalls.length > 0) {
|
|
@@ -3689,6 +3758,28 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
|
|
|
3689
3758
|
};
|
|
3690
3759
|
}
|
|
3691
3760
|
|
|
3761
|
+
// === PREFLIGHT CHECK ===
|
|
3762
|
+
// If the request supplied preflight_commands and they all pass in
|
|
3763
|
+
// the workspace, the work is already done — short-circuit with a
|
|
3764
|
+
// synthetic response and never touch the model. No-op when the
|
|
3765
|
+
// feature is disabled or the request didn't opt in.
|
|
3766
|
+
const preflightResult = tryPreflight({ payload, cwd });
|
|
3767
|
+
if (preflightResult?.satisfied) {
|
|
3768
|
+
logger.info({
|
|
3769
|
+
commands: preflightResult.results.length,
|
|
3770
|
+
reason: preflightResult.reason,
|
|
3771
|
+
}, '[Preflight] Satisfied — skipping model call');
|
|
3772
|
+
return buildPreflightResponse({
|
|
3773
|
+
model: requestedModel,
|
|
3774
|
+
preflightResult,
|
|
3775
|
+
});
|
|
3776
|
+
}
|
|
3777
|
+
if (preflightResult && !preflightResult.satisfied) {
|
|
3778
|
+
logger.debug({
|
|
3779
|
+
failedCommand: preflightResult.failedCommand,
|
|
3780
|
+
}, '[Preflight] Not satisfied — proceeding with model call');
|
|
3781
|
+
}
|
|
3782
|
+
|
|
3692
3783
|
// === TOOL LOOP GUARD (EARLY CHECK) ===
|
|
3693
3784
|
// Check BEFORE sanitization since sanitizePayload removes conversation history
|
|
3694
3785
|
// All providers use threshold 2 to catch loops early
|
|
@@ -3838,7 +3929,7 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
|
|
|
3838
3929
|
const { createTimer } = require("../utils/perf-timer");
|
|
3839
3930
|
const pTimer = createTimer("processMessage");
|
|
3840
3931
|
|
|
3841
|
-
const cleanPayload = sanitizePayload(payload);
|
|
3932
|
+
const cleanPayload = await sanitizePayload(payload);
|
|
3842
3933
|
pTimer.mark("sanitizePayload");
|
|
3843
3934
|
|
|
3844
3935
|
// Proactively load tools based on prompt content (lazy loading)
|
|
@@ -3976,7 +4067,11 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
|
|
|
3976
4067
|
if (semanticCache.isEnabled() && semanticLookupResult && !semanticLookupResult.hit) {
|
|
3977
4068
|
if (loopResult.response?.status === 200 && loopResult.response?.body) {
|
|
3978
4069
|
try {
|
|
3979
|
-
|
|
4070
|
+
// Only cache valid JSON responses, not HTML error pages
|
|
4071
|
+
const body = loopResult.response.body;
|
|
4072
|
+
if (typeof body === 'object' || (typeof body === 'string' && body.trim().startsWith('{'))) {
|
|
4073
|
+
await semanticCache.store(semanticLookupResult, body);
|
|
4074
|
+
}
|
|
3980
4075
|
} catch (err) {
|
|
3981
4076
|
logger.debug({ error: err.message }, "Semantic cache store failed");
|
|
3982
4077
|
}
|