lynkr 8.0.1 → 9.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,464 @@
1
+ /**
2
+ * Routing Telemetry Module
3
+ *
4
+ * Persists per-request routing telemetry into a dedicated SQLite database
5
+ * at .lynkr/telemetry.db. Provides query helpers for dashboards, accuracy
6
+ * analysis, and automated routing feedback loops.
7
+ *
8
+ * Uses lazy initialisation so the proxy starts even when better-sqlite3 is
9
+ * not installed (it is an optionalDependency).
10
+ *
11
+ * @module routing/telemetry
12
+ */
13
+
14
+ const fs = require("fs");
15
+ const path = require("path");
16
+ const logger = require("../logger");
17
+
18
+ // ---------------------------------------------------------------------------
19
+ // Lazy database initialisation
20
+ // ---------------------------------------------------------------------------
21
+
22
+ let Database;
23
+ try {
24
+ Database = require("better-sqlite3");
25
+ } catch {
26
+ Database = null;
27
+ }
28
+
29
+ /** @type {import('better-sqlite3').Database|null} */
30
+ let db = null;
31
+
32
+ /** @type {boolean} */
33
+ let initialised = false;
34
+
35
+ /** Default retention: 30 days */
36
+ const DEFAULT_RETENTION_MS = 30 * 24 * 60 * 60 * 1000;
37
+
38
+ /**
39
+ * Initialise the telemetry database (singleton, idempotent).
40
+ * @returns {boolean} true if the DB is usable
41
+ */
42
+ function init() {
43
+ if (initialised) return db !== null;
44
+ initialised = true;
45
+
46
+ if (!Database) {
47
+ logger.debug("Telemetry: better-sqlite3 not available, telemetry disabled");
48
+ return false;
49
+ }
50
+
51
+ try {
52
+ const dbDir = path.resolve(process.cwd(), ".lynkr");
53
+ if (!fs.existsSync(dbDir)) {
54
+ fs.mkdirSync(dbDir, { recursive: true });
55
+ }
56
+
57
+ const dbPath = path.join(dbDir, "telemetry.db");
58
+ db = new Database(dbPath, {
59
+ verbose: process.env.DEBUG_SQL ? console.log : null,
60
+ fileMustExist: false,
61
+ });
62
+
63
+ // Performance pragmas (same pattern as src/db/index.js)
64
+ db.pragma("journal_mode = WAL");
65
+ db.pragma("synchronous = NORMAL");
66
+ db.pragma("cache_size = -16000");
67
+ db.pragma("temp_store = MEMORY");
68
+ db.pragma("busy_timeout = 3000");
69
+
70
+ db.exec(`
71
+ CREATE TABLE IF NOT EXISTS routing_telemetry (
72
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
73
+ request_id TEXT NOT NULL,
74
+ session_id TEXT,
75
+ timestamp INTEGER NOT NULL,
76
+ complexity_score REAL,
77
+ tier TEXT,
78
+ agentic_type TEXT,
79
+ tool_count INTEGER,
80
+ input_tokens INTEGER,
81
+ message_count INTEGER,
82
+ request_type TEXT,
83
+ provider TEXT NOT NULL,
84
+ model TEXT,
85
+ routing_method TEXT,
86
+ was_fallback INTEGER DEFAULT 0,
87
+ output_tokens INTEGER,
88
+ latency_ms INTEGER,
89
+ status_code INTEGER,
90
+ error_type TEXT,
91
+ cost_usd REAL,
92
+ tool_calls_made INTEGER,
93
+ retry_count INTEGER DEFAULT 0,
94
+ circuit_breaker_state TEXT,
95
+ quality_score REAL,
96
+ tokens_per_second REAL,
97
+ cost_efficiency REAL
98
+ );
99
+
100
+ CREATE INDEX IF NOT EXISTS idx_telemetry_provider
101
+ ON routing_telemetry(provider);
102
+
103
+ CREATE INDEX IF NOT EXISTS idx_telemetry_tier
104
+ ON routing_telemetry(tier);
105
+
106
+ CREATE INDEX IF NOT EXISTS idx_telemetry_timestamp
107
+ ON routing_telemetry(timestamp);
108
+ `);
109
+
110
+ logger.info({ dbPath }, "Routing telemetry database initialised");
111
+ return true;
112
+ } catch (err) {
113
+ logger.warn({ err: err.message }, "Failed to initialise telemetry database");
114
+ db = null;
115
+ return false;
116
+ }
117
+ }
118
+
119
+ // ---------------------------------------------------------------------------
120
+ // Prepared statements (created lazily after init)
121
+ // ---------------------------------------------------------------------------
122
+
123
+ /** @type {Map<string, import('better-sqlite3').Statement>} */
124
+ const stmts = new Map();
125
+
126
+ /**
127
+ * Get or create a prepared statement.
128
+ * @param {string} key
129
+ * @param {string} sql
130
+ * @returns {import('better-sqlite3').Statement|null}
131
+ */
132
+ function stmt(key, sql) {
133
+ if (!db) return null;
134
+ if (!stmts.has(key)) {
135
+ stmts.set(key, db.prepare(sql));
136
+ }
137
+ return stmts.get(key);
138
+ }
139
+
140
+ // ---------------------------------------------------------------------------
141
+ // Public API
142
+ // ---------------------------------------------------------------------------
143
+
144
+ /**
145
+ * Record a telemetry data point. Executes asynchronously via setImmediate
146
+ * so it never blocks the request path.
147
+ *
148
+ * @param {Object} data - Telemetry fields (see table schema)
149
+ */
150
+ function record(data) {
151
+ if (!init()) return;
152
+
153
+ setImmediate(() => {
154
+ try {
155
+ const insert = stmt(
156
+ "insert",
157
+ `INSERT INTO routing_telemetry (
158
+ request_id, session_id, timestamp, complexity_score, tier,
159
+ agentic_type, tool_count, input_tokens, message_count, request_type,
160
+ provider, model, routing_method, was_fallback, output_tokens,
161
+ latency_ms, status_code, error_type, cost_usd, tool_calls_made,
162
+ retry_count, circuit_breaker_state, quality_score, tokens_per_second,
163
+ cost_efficiency
164
+ ) VALUES (
165
+ @request_id, @session_id, @timestamp, @complexity_score, @tier,
166
+ @agentic_type, @tool_count, @input_tokens, @message_count, @request_type,
167
+ @provider, @model, @routing_method, @was_fallback, @output_tokens,
168
+ @latency_ms, @status_code, @error_type, @cost_usd, @tool_calls_made,
169
+ @retry_count, @circuit_breaker_state, @quality_score, @tokens_per_second,
170
+ @cost_efficiency
171
+ )`
172
+ );
173
+ if (!insert) return;
174
+
175
+ insert.run({
176
+ request_id: data.request_id ?? null,
177
+ session_id: data.session_id ?? null,
178
+ timestamp: data.timestamp ?? Date.now(),
179
+ complexity_score: data.complexity_score ?? null,
180
+ tier: data.tier ?? null,
181
+ agentic_type: data.agentic_type ?? null,
182
+ tool_count: data.tool_count ?? null,
183
+ input_tokens: data.input_tokens ?? null,
184
+ message_count: data.message_count ?? null,
185
+ request_type: data.request_type ?? null,
186
+ provider: data.provider,
187
+ model: data.model ?? null,
188
+ routing_method: data.routing_method ?? null,
189
+ was_fallback: data.was_fallback ? 1 : 0,
190
+ output_tokens: data.output_tokens ?? null,
191
+ latency_ms: data.latency_ms ?? null,
192
+ status_code: data.status_code ?? null,
193
+ error_type: data.error_type ?? null,
194
+ cost_usd: data.cost_usd ?? null,
195
+ tool_calls_made: data.tool_calls_made ?? null,
196
+ retry_count: data.retry_count ?? 0,
197
+ circuit_breaker_state: data.circuit_breaker_state ?? null,
198
+ quality_score: data.quality_score ?? null,
199
+ tokens_per_second: data.tokens_per_second ?? null,
200
+ cost_efficiency: data.cost_efficiency ?? null,
201
+ });
202
+ } catch (err) {
203
+ logger.debug({ err: err.message }, "Telemetry record failed");
204
+ }
205
+ });
206
+ }
207
+
208
+ /**
209
+ * Query telemetry records with optional filters.
210
+ *
211
+ * @param {Object} [filters]
212
+ * @param {string} [filters.provider] - Filter by provider name
213
+ * @param {string} [filters.tier] - Filter by tier
214
+ * @param {number} [filters.since] - Only records after this timestamp (ms)
215
+ * @param {number} [filters.limit] - Max rows to return (default 100)
216
+ * @returns {Object[]} Matching telemetry rows
217
+ */
218
+ function query(filters = {}) {
219
+ if (!init()) return [];
220
+
221
+ const clauses = [];
222
+ const params = {};
223
+
224
+ if (filters.provider) {
225
+ clauses.push("provider = @provider");
226
+ params.provider = filters.provider;
227
+ }
228
+ if (filters.tier) {
229
+ clauses.push("tier = @tier");
230
+ params.tier = filters.tier;
231
+ }
232
+ if (filters.since) {
233
+ clauses.push("timestamp >= @since");
234
+ params.since = filters.since;
235
+ }
236
+
237
+ const where = clauses.length > 0 ? `WHERE ${clauses.join(" AND ")}` : "";
238
+ const limit = filters.limit ?? 100;
239
+
240
+ try {
241
+ const sql = `SELECT * FROM routing_telemetry ${where} ORDER BY timestamp DESC LIMIT ${Number(limit)}`;
242
+ return db.prepare(sql).all(params);
243
+ } catch (err) {
244
+ logger.debug({ err: err.message }, "Telemetry query failed");
245
+ return [];
246
+ }
247
+ }
248
+
249
+ /**
250
+ * Get aggregate statistics over a time range.
251
+ *
252
+ * @param {Object} [timeRange]
253
+ * @param {number} [timeRange.since] - Start timestamp (ms). Defaults to 24 hours ago.
254
+ * @param {number} [timeRange.until] - End timestamp (ms). Defaults to now.
255
+ * @returns {Object|null} Aggregated statistics
256
+ */
257
+ function getStats(timeRange = {}) {
258
+ if (!init()) return null;
259
+
260
+ const since = timeRange.since ?? Date.now() - 24 * 60 * 60 * 1000;
261
+ const until = timeRange.until ?? Date.now();
262
+
263
+ try {
264
+ // Total requests
265
+ const total = db
266
+ .prepare("SELECT COUNT(*) as cnt FROM routing_telemetry WHERE timestamp BETWEEN ? AND ?")
267
+ .get(since, until);
268
+
269
+ if (!total || total.cnt === 0) return null;
270
+
271
+ // Average latency per provider
272
+ const latencyRows = db
273
+ .prepare(
274
+ `SELECT provider, AVG(latency_ms) as avg_latency, COUNT(*) as cnt
275
+ FROM routing_telemetry
276
+ WHERE timestamp BETWEEN ? AND ? AND latency_ms IS NOT NULL
277
+ GROUP BY provider`
278
+ )
279
+ .all(since, until);
280
+
281
+ const avgLatencyByProvider = {};
282
+ for (const row of latencyRows) {
283
+ avgLatencyByProvider[row.provider] = Math.round(row.avg_latency);
284
+ }
285
+
286
+ // Average quality per tier
287
+ const qualityRows = db
288
+ .prepare(
289
+ `SELECT tier, AVG(quality_score) as avg_quality, COUNT(*) as cnt
290
+ FROM routing_telemetry
291
+ WHERE timestamp BETWEEN ? AND ? AND quality_score IS NOT NULL AND tier IS NOT NULL
292
+ GROUP BY tier`
293
+ )
294
+ .all(since, until);
295
+
296
+ const avgQualityByTier = {};
297
+ for (const row of qualityRows) {
298
+ avgQualityByTier[row.tier] = Math.round(row.avg_quality * 10) / 10;
299
+ }
300
+
301
+ // Error rate
302
+ const errors = db
303
+ .prepare(
304
+ "SELECT COUNT(*) as cnt FROM routing_telemetry WHERE timestamp BETWEEN ? AND ? AND error_type IS NOT NULL"
305
+ )
306
+ .get(since, until);
307
+
308
+ const errorRate = Math.round((errors.cnt / total.cnt) * 1000) / 10; // one decimal %
309
+
310
+ // Over/under provisioned percentages
311
+ const accuracy = getRoutingAccuracy({ since, until });
312
+
313
+ return {
314
+ totalRequests: total.cnt,
315
+ avgLatencyByProvider,
316
+ avgQualityByTier,
317
+ errorRate,
318
+ overProvisionedPct: accuracy ? accuracy.overProvisionedPct : 0,
319
+ underProvisionedPct: accuracy ? accuracy.underProvisionedPct : 0,
320
+ };
321
+ } catch (err) {
322
+ logger.debug({ err: err.message }, "Telemetry getStats failed");
323
+ return null;
324
+ }
325
+ }
326
+
327
+ /**
328
+ * Get aggregated statistics for a specific provider.
329
+ *
330
+ * @param {string} provider - Provider name
331
+ * @param {Object} [timeRange]
332
+ * @param {number} [timeRange.since]
333
+ * @param {number} [timeRange.until]
334
+ * @returns {Object|null}
335
+ */
336
+ function getProviderStats(provider, timeRange = {}) {
337
+ if (!init()) return null;
338
+
339
+ const since = timeRange.since ?? Date.now() - 24 * 60 * 60 * 1000;
340
+ const until = timeRange.until ?? Date.now();
341
+
342
+ try {
343
+ const row = db
344
+ .prepare(
345
+ `SELECT
346
+ COUNT(*) as total,
347
+ AVG(latency_ms) as avg_latency,
348
+ AVG(quality_score) as avg_quality,
349
+ AVG(output_tokens) as avg_output_tokens,
350
+ SUM(CASE WHEN error_type IS NOT NULL THEN 1 ELSE 0 END) as errors,
351
+ SUM(CASE WHEN was_fallback = 1 THEN 1 ELSE 0 END) as fallbacks,
352
+ AVG(tokens_per_second) as avg_tps,
353
+ SUM(cost_usd) as total_cost
354
+ FROM routing_telemetry
355
+ WHERE provider = ? AND timestamp BETWEEN ? AND ?`
356
+ )
357
+ .get(provider, since, until);
358
+
359
+ if (!row || row.total === 0) return null;
360
+
361
+ return {
362
+ total: row.total,
363
+ avgLatency: row.avg_latency ? Math.round(row.avg_latency) : null,
364
+ avgQuality: row.avg_quality ? Math.round(row.avg_quality * 10) / 10 : null,
365
+ avgOutputTokens: row.avg_output_tokens ? Math.round(row.avg_output_tokens) : null,
366
+ errorRate: Math.round((row.errors / row.total) * 1000) / 10,
367
+ fallbackRate: Math.round((row.fallbacks / row.total) * 1000) / 10,
368
+ avgTokensPerSecond: row.avg_tps ? Math.round(row.avg_tps * 10) / 10 : null,
369
+ totalCost: row.total_cost ? Math.round(row.total_cost * 10000) / 10000 : null,
370
+ };
371
+ } catch (err) {
372
+ logger.debug({ err: err.message }, "Telemetry getProviderStats failed");
373
+ return null;
374
+ }
375
+ }
376
+
377
+ /**
378
+ * Calculate routing accuracy: percentage of requests that were over- or
379
+ * under-provisioned.
380
+ *
381
+ * Over-provisioned: quality_score > 80 AND output_tokens < 50 on REASONING or COMPLEX tier.
382
+ * Under-provisioned: quality_score < 45 on SIMPLE tier.
383
+ *
384
+ * @param {Object} [timeRange]
385
+ * @param {number} [timeRange.since]
386
+ * @param {number} [timeRange.until]
387
+ * @returns {Object|null}
388
+ */
389
+ function getRoutingAccuracy(timeRange = {}) {
390
+ if (!init()) return null;
391
+
392
+ const since = timeRange.since ?? Date.now() - 24 * 60 * 60 * 1000;
393
+ const until = timeRange.until ?? Date.now();
394
+
395
+ try {
396
+ const total = db
397
+ .prepare("SELECT COUNT(*) as cnt FROM routing_telemetry WHERE timestamp BETWEEN ? AND ?")
398
+ .get(since, until);
399
+
400
+ if (!total || total.cnt === 0) return null;
401
+
402
+ const overProvisioned = db
403
+ .prepare(
404
+ `SELECT COUNT(*) as cnt FROM routing_telemetry
405
+ WHERE timestamp BETWEEN ? AND ?
406
+ AND quality_score > 80
407
+ AND output_tokens < 50
408
+ AND tier IN ('REASONING', 'COMPLEX')`
409
+ )
410
+ .get(since, until);
411
+
412
+ const underProvisioned = db
413
+ .prepare(
414
+ `SELECT COUNT(*) as cnt FROM routing_telemetry
415
+ WHERE timestamp BETWEEN ? AND ?
416
+ AND quality_score < 45
417
+ AND tier = 'SIMPLE'`
418
+ )
419
+ .get(since, until);
420
+
421
+ return {
422
+ totalRequests: total.cnt,
423
+ overProvisioned: overProvisioned.cnt,
424
+ underProvisioned: underProvisioned.cnt,
425
+ overProvisionedPct: Math.round((overProvisioned.cnt / total.cnt) * 1000) / 10,
426
+ underProvisionedPct: Math.round((underProvisioned.cnt / total.cnt) * 1000) / 10,
427
+ };
428
+ } catch (err) {
429
+ logger.debug({ err: err.message }, "Telemetry getRoutingAccuracy failed");
430
+ return null;
431
+ }
432
+ }
433
+
434
+ /**
435
+ * Delete telemetry records older than a given threshold.
436
+ *
437
+ * @param {number} [olderThanMs] - Age threshold in ms. Defaults to 30 days.
438
+ * @returns {number} Number of rows deleted
439
+ */
440
+ function cleanup(olderThanMs) {
441
+ if (!init()) return 0;
442
+
443
+ const threshold = Date.now() - (olderThanMs ?? DEFAULT_RETENTION_MS);
444
+
445
+ try {
446
+ const del = stmt("cleanup", "DELETE FROM routing_telemetry WHERE timestamp < ?");
447
+ if (!del) return 0;
448
+ const result = del.run(threshold);
449
+ logger.debug({ deleted: result.changes }, "Telemetry cleanup complete");
450
+ return result.changes;
451
+ } catch (err) {
452
+ logger.debug({ err: err.message }, "Telemetry cleanup failed");
453
+ return 0;
454
+ }
455
+ }
456
+
457
+ module.exports = {
458
+ record,
459
+ query,
460
+ getStats,
461
+ getProviderStats,
462
+ getRoutingAccuracy,
463
+ cleanup,
464
+ };
package/src/server.js CHANGED
@@ -228,6 +228,17 @@ async function start() {
228
228
  });
229
229
  }
230
230
 
231
+ // Register Codex process shutdown callback
232
+ shutdownManager.onShutdown(async () => {
233
+ try {
234
+ const { getCodexProcess } = require("./clients/codex-process");
235
+ const codex = getCodexProcess();
236
+ if (codex.child) {
237
+ await codex.shutdown();
238
+ }
239
+ } catch { /* ignore if codex never started */ }
240
+ });
241
+
231
242
  // Initialize hot reload config watcher
232
243
  if (config.hotReload?.enabled !== false) {
233
244
  const watcher = initConfigWatcher({