@relayplane/proxy 0.1.9 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,2881 +1,45 @@
1
1
  #!/usr/bin/env node
2
2
  "use strict";
3
- var __create = Object.create;
4
- var __defProp = Object.defineProperty;
5
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
6
- var __getOwnPropNames = Object.getOwnPropertyNames;
7
- var __getProtoOf = Object.getPrototypeOf;
8
- var __hasOwnProp = Object.prototype.hasOwnProperty;
9
- var __copyProps = (to, from, except, desc) => {
10
- if (from && typeof from === "object" || typeof from === "function") {
11
- for (let key of __getOwnPropNames(from))
12
- if (!__hasOwnProp.call(to, key) && key !== except)
13
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
14
- }
15
- return to;
16
- };
17
- var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
18
- // If the importer is in node compatibility mode or this is not an ESM
19
- // file that has been converted to a CommonJS file using a Babel-
20
- // compatible transform (i.e. "__esModule" has not been set), then set
21
- // "default" to the CommonJS "module.exports" for node compatibility.
22
- isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
23
- mod
24
- ));
25
-
26
- // src/proxy.ts
27
- var http = __toESM(require("http"));
28
- var url = __toESM(require("url"));
29
-
30
- // src/storage/store.ts
31
- var import_better_sqlite3 = __toESM(require("better-sqlite3"));
32
-
33
- // node_modules/nanoid/index.js
34
- var import_crypto = __toESM(require("crypto"), 1);
35
-
36
- // node_modules/nanoid/url-alphabet/index.js
37
- var urlAlphabet = "useandom-26T198340PX75pxJACKVERYMINDBUSHWOLF_GQZbfghjklqvwyzrict";
38
-
39
- // node_modules/nanoid/index.js
40
- var POOL_SIZE_MULTIPLIER = 128;
41
- var pool;
42
- var poolOffset;
43
- var fillPool = (bytes) => {
44
- if (!pool || pool.length < bytes) {
45
- pool = Buffer.allocUnsafe(bytes * POOL_SIZE_MULTIPLIER);
46
- import_crypto.default.randomFillSync(pool);
47
- poolOffset = 0;
48
- } else if (poolOffset + bytes > pool.length) {
49
- import_crypto.default.randomFillSync(pool);
50
- poolOffset = 0;
51
- }
52
- poolOffset += bytes;
53
- };
54
- var nanoid = (size = 21) => {
55
- fillPool(size |= 0);
56
- let id = "";
57
- for (let i = poolOffset - size; i < poolOffset; i++) {
58
- id += urlAlphabet[pool[i] & 63];
59
- }
60
- return id;
61
- };
62
-
63
- // src/storage/store.ts
64
- var fs = __toESM(require("fs"));
65
- var path = __toESM(require("path"));
66
- var os = __toESM(require("os"));
67
-
68
- // src/storage/schema.ts
69
- var SCHEMA_SQL = `
70
- -- Runs table: stores all LLM invocations
71
- CREATE TABLE IF NOT EXISTS runs (
72
- id TEXT PRIMARY KEY,
73
- prompt TEXT NOT NULL,
74
- system_prompt TEXT,
75
- task_type TEXT NOT NULL,
76
- model TEXT NOT NULL,
77
- success INTEGER NOT NULL,
78
- output TEXT,
79
- error TEXT,
80
- duration_ms INTEGER NOT NULL,
81
- tokens_in INTEGER,
82
- tokens_out INTEGER,
83
- cost_usd REAL,
84
- metadata TEXT,
85
- created_at TEXT NOT NULL DEFAULT (datetime('now'))
86
- );
87
-
88
- -- Index for task type queries
89
- CREATE INDEX IF NOT EXISTS idx_runs_task_type ON runs(task_type);
90
-
91
- -- Index for model queries
92
- CREATE INDEX IF NOT EXISTS idx_runs_model ON runs(model);
93
-
94
- -- Index for time-based queries
95
- CREATE INDEX IF NOT EXISTS idx_runs_created_at ON runs(created_at);
96
-
97
- -- Outcomes table: stores user feedback on runs
98
- CREATE TABLE IF NOT EXISTS outcomes (
99
- id TEXT PRIMARY KEY,
100
- run_id TEXT NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
101
- success INTEGER NOT NULL,
102
- quality TEXT,
103
- latency_satisfactory INTEGER,
104
- cost_satisfactory INTEGER,
105
- feedback TEXT,
106
- created_at TEXT NOT NULL DEFAULT (datetime('now')),
107
- UNIQUE(run_id)
108
- );
109
-
110
- -- Index for run lookups
111
- CREATE INDEX IF NOT EXISTS idx_outcomes_run_id ON outcomes(run_id);
112
-
113
- -- Routing rules table: stores routing preferences
114
- CREATE TABLE IF NOT EXISTS routing_rules (
115
- id TEXT PRIMARY KEY,
116
- task_type TEXT NOT NULL UNIQUE,
117
- preferred_model TEXT NOT NULL,
118
- source TEXT NOT NULL DEFAULT 'default',
119
- confidence REAL,
120
- sample_count INTEGER,
121
- created_at TEXT NOT NULL DEFAULT (datetime('now')),
122
- updated_at TEXT NOT NULL DEFAULT (datetime('now'))
123
- );
124
-
125
- -- Index for task type lookups
126
- CREATE INDEX IF NOT EXISTS idx_routing_rules_task_type ON routing_rules(task_type);
127
-
128
- -- Suggestions table: stores routing improvement suggestions
129
- CREATE TABLE IF NOT EXISTS suggestions (
130
- id TEXT PRIMARY KEY,
131
- task_type TEXT NOT NULL,
132
- current_model TEXT NOT NULL,
133
- suggested_model TEXT NOT NULL,
134
- reason TEXT NOT NULL,
135
- confidence REAL NOT NULL,
136
- expected_improvement TEXT NOT NULL,
137
- sample_count INTEGER NOT NULL,
138
- accepted INTEGER,
139
- created_at TEXT NOT NULL DEFAULT (datetime('now')),
140
- accepted_at TEXT
141
- );
142
-
143
- -- Index for task type lookups
144
- CREATE INDEX IF NOT EXISTS idx_suggestions_task_type ON suggestions(task_type);
145
-
146
- -- Index for pending suggestions
147
- CREATE INDEX IF NOT EXISTS idx_suggestions_accepted ON suggestions(accepted);
148
-
149
- -- Schema version table for migrations
150
- CREATE TABLE IF NOT EXISTS schema_version (
151
- version INTEGER PRIMARY KEY,
152
- applied_at TEXT NOT NULL DEFAULT (datetime('now'))
153
- );
154
-
155
- -- Insert initial schema version
156
- INSERT OR IGNORE INTO schema_version (version) VALUES (1);
157
- `;
158
- var DEFAULT_ROUTING_RULES = [
159
- // Complex tasks → Sonnet (need reasoning & quality)
160
- { taskType: "code_generation", preferredModel: "anthropic:claude-sonnet-4-20250514" },
161
- { taskType: "code_review", preferredModel: "anthropic:claude-sonnet-4-20250514" },
162
- { taskType: "analysis", preferredModel: "anthropic:claude-sonnet-4-20250514" },
163
- { taskType: "creative_writing", preferredModel: "anthropic:claude-sonnet-4-20250514" },
164
- // Simple tasks → Haiku (cost efficient)
165
- { taskType: "summarization", preferredModel: "anthropic:claude-3-5-haiku-latest" },
166
- { taskType: "data_extraction", preferredModel: "anthropic:claude-3-5-haiku-latest" },
167
- { taskType: "translation", preferredModel: "anthropic:claude-3-5-haiku-latest" },
168
- { taskType: "question_answering", preferredModel: "anthropic:claude-3-5-haiku-latest" },
169
- { taskType: "general", preferredModel: "anthropic:claude-3-5-haiku-latest" }
170
- ];
171
- function generateSeedSQL() {
172
- const values = DEFAULT_ROUTING_RULES.map((rule, index) => {
173
- const id = `default-${rule.taskType}`;
174
- return `('${id}', '${rule.taskType}', '${rule.preferredModel}', 'default', NULL, NULL, datetime('now'), datetime('now'))`;
175
- }).join(",\n ");
176
- return `
177
- INSERT OR IGNORE INTO routing_rules (id, task_type, preferred_model, source, confidence, sample_count, created_at, updated_at)
178
- VALUES
179
- ${values};
180
- `;
181
- }
182
-
183
- // src/storage/store.ts
184
- function getDefaultDbPath() {
185
- return path.join(os.homedir(), ".relayplane", "data.db");
186
- }
187
- var Store = class {
188
- db;
189
- dbPath;
190
- /**
191
- * Creates a new Store instance.
192
- *
193
- * @param dbPath - Path to the SQLite database file. Defaults to ~/.relayplane/data.db
194
- */
195
- constructor(dbPath) {
196
- this.dbPath = dbPath ?? getDefaultDbPath();
197
- const dir = path.dirname(this.dbPath);
198
- if (!fs.existsSync(dir)) {
199
- fs.mkdirSync(dir, { recursive: true });
200
- }
201
- this.db = new import_better_sqlite3.default(this.dbPath);
202
- this.db.pragma("journal_mode = WAL");
203
- this.db.pragma("foreign_keys = ON");
204
- this.initializeSchema();
205
- }
206
- /**
207
- * Initializes the database schema.
208
- */
209
- initializeSchema() {
210
- this.db.exec(SCHEMA_SQL);
211
- this.db.exec(generateSeedSQL());
212
- }
213
- /**
214
- * Closes the database connection.
215
- */
216
- close() {
217
- this.db.close();
218
- }
219
- /**
220
- * Gets the database path.
221
- */
222
- getDbPath() {
223
- return this.dbPath;
224
- }
225
- // ============================================================================
226
- // Runs
227
- // ============================================================================
228
- /**
229
- * Records a new run.
230
- */
231
- recordRun(run) {
232
- const id = nanoid();
233
- const stmt = this.db.prepare(`
234
- INSERT INTO runs (id, prompt, system_prompt, task_type, model, success, output, error, duration_ms, tokens_in, tokens_out, cost_usd, metadata, created_at)
235
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))
236
- `);
237
- stmt.run(
238
- id,
239
- run.prompt,
240
- run.systemPrompt,
241
- run.taskType,
242
- run.model,
243
- run.success ? 1 : 0,
244
- run.output,
245
- run.error,
246
- run.durationMs,
247
- run.tokensIn,
248
- run.tokensOut,
249
- run.costUsd,
250
- run.metadata
251
- );
252
- return id;
253
- }
254
- /**
255
- * Gets a run by ID.
256
- */
257
- getRun(id) {
258
- const stmt = this.db.prepare(`
259
- SELECT id, prompt, system_prompt as systemPrompt, task_type as taskType, model, success, output, error, duration_ms as durationMs, tokens_in as tokensIn, tokens_out as tokensOut, cost_usd as costUsd, metadata, created_at as createdAt
260
- FROM runs
261
- WHERE id = ?
262
- `);
263
- const row = stmt.get(id);
264
- if (!row) return null;
265
- return {
266
- ...row,
267
- success: Boolean(row.success)
268
- };
269
- }
270
- /**
271
- * Gets runs with optional filters.
272
- */
273
- getRuns(options) {
274
- const conditions = [];
275
- const params = [];
276
- if (options?.taskType) {
277
- conditions.push("task_type = ?");
278
- params.push(options.taskType);
279
- }
280
- if (options?.model) {
281
- conditions.push("model = ?");
282
- params.push(options.model);
283
- }
284
- if (options?.from) {
285
- conditions.push("created_at >= ?");
286
- params.push(options.from);
287
- }
288
- if (options?.to) {
289
- conditions.push("created_at <= ?");
290
- params.push(options.to);
291
- }
292
- const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
293
- const limit = options?.limit ?? 100;
294
- const offset = options?.offset ?? 0;
295
- const stmt = this.db.prepare(`
296
- SELECT id, prompt, system_prompt as systemPrompt, task_type as taskType, model, success, output, error, duration_ms as durationMs, tokens_in as tokensIn, tokens_out as tokensOut, cost_usd as costUsd, metadata, created_at as createdAt
297
- FROM runs
298
- ${whereClause}
299
- ORDER BY created_at DESC
300
- LIMIT ? OFFSET ?
301
- `);
302
- params.push(limit, offset);
303
- const rows = stmt.all(...params);
304
- return rows.map((row) => ({
305
- ...row,
306
- success: Boolean(row.success)
307
- }));
308
- }
309
- /**
310
- * Counts runs with optional filters.
311
- */
312
- countRuns(options) {
313
- const conditions = [];
314
- const params = [];
315
- if (options?.taskType) {
316
- conditions.push("task_type = ?");
317
- params.push(options.taskType);
318
- }
319
- if (options?.model) {
320
- conditions.push("model = ?");
321
- params.push(options.model);
322
- }
323
- if (options?.from) {
324
- conditions.push("created_at >= ?");
325
- params.push(options.from);
326
- }
327
- if (options?.to) {
328
- conditions.push("created_at <= ?");
329
- params.push(options.to);
330
- }
331
- const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
332
- const stmt = this.db.prepare(`
333
- SELECT COUNT(*) as count
334
- FROM runs
335
- ${whereClause}
336
- `);
337
- const row = stmt.get(...params);
338
- return row.count;
339
- }
340
- // ============================================================================
341
- // Outcomes
342
- // ============================================================================
343
- /**
344
- * Records an outcome for a run.
345
- */
346
- recordOutcome(outcome) {
347
- const id = nanoid();
348
- const stmt = this.db.prepare(`
349
- INSERT INTO outcomes (id, run_id, success, quality, latency_satisfactory, cost_satisfactory, feedback, created_at)
350
- VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'))
351
- ON CONFLICT(run_id) DO UPDATE SET
352
- success = excluded.success,
353
- quality = excluded.quality,
354
- latency_satisfactory = excluded.latency_satisfactory,
355
- cost_satisfactory = excluded.cost_satisfactory,
356
- feedback = excluded.feedback,
357
- created_at = datetime('now')
358
- `);
359
- stmt.run(
360
- id,
361
- outcome.runId,
362
- outcome.success ? 1 : 0,
363
- outcome.quality,
364
- outcome.latencySatisfactory != null ? outcome.latencySatisfactory ? 1 : 0 : null,
365
- outcome.costSatisfactory != null ? outcome.costSatisfactory ? 1 : 0 : null,
366
- outcome.feedback
367
- );
368
- return id;
369
- }
370
- /**
371
- * Gets an outcome for a run.
372
- */
373
- getOutcome(runId) {
374
- const stmt = this.db.prepare(`
375
- SELECT id, run_id as runId, success, quality, latency_satisfactory as latencySatisfactory, cost_satisfactory as costSatisfactory, feedback, created_at as createdAt
376
- FROM outcomes
377
- WHERE run_id = ?
378
- `);
379
- const row = stmt.get(runId);
380
- if (!row) return null;
381
- return {
382
- ...row,
383
- success: Boolean(row.success),
384
- latencySatisfactory: row.latencySatisfactory != null ? Boolean(row.latencySatisfactory) : null,
385
- costSatisfactory: row.costSatisfactory != null ? Boolean(row.costSatisfactory) : null
386
- };
387
- }
388
- /**
389
- * Gets outcomes with optional filters.
390
- */
391
- getOutcomes(options) {
392
- const conditions = [];
393
- const params = [];
394
- if (options?.taskType) {
395
- conditions.push("r.task_type = ?");
396
- params.push(options.taskType);
397
- }
398
- if (options?.model) {
399
- conditions.push("r.model = ?");
400
- params.push(options.model);
401
- }
402
- if (options?.from) {
403
- conditions.push("o.created_at >= ?");
404
- params.push(options.from);
405
- }
406
- if (options?.to) {
407
- conditions.push("o.created_at <= ?");
408
- params.push(options.to);
409
- }
410
- const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
411
- const limit = options?.limit ?? 100;
412
- const stmt = this.db.prepare(`
413
- SELECT o.id, o.run_id as runId, o.success, o.quality, o.latency_satisfactory as latencySatisfactory, o.cost_satisfactory as costSatisfactory, o.feedback, o.created_at as createdAt, r.task_type as taskType, r.model
414
- FROM outcomes o
415
- JOIN runs r ON o.run_id = r.id
416
- ${whereClause}
417
- ORDER BY o.created_at DESC
418
- LIMIT ?
419
- `);
420
- params.push(limit);
421
- const rows = stmt.all(...params);
422
- return rows.map((row) => ({
423
- ...row,
424
- success: Boolean(row.success),
425
- latencySatisfactory: row.latencySatisfactory != null ? Boolean(row.latencySatisfactory) : null,
426
- costSatisfactory: row.costSatisfactory != null ? Boolean(row.costSatisfactory) : null
427
- }));
428
- }
429
- // ============================================================================
430
- // Routing Rules
431
- // ============================================================================
432
- /**
433
- * Gets a routing rule for a task type.
434
- */
435
- getRule(taskType) {
436
- const stmt = this.db.prepare(`
437
- SELECT id, task_type as taskType, preferred_model as preferredModel, source, confidence, sample_count as sampleCount, created_at as createdAt, updated_at as updatedAt
438
- FROM routing_rules
439
- WHERE task_type = ?
440
- `);
441
- return stmt.get(taskType) ?? null;
442
- }
443
- /**
444
- * Sets a routing rule for a task type.
445
- */
446
- setRule(taskType, preferredModel, source, confidence, sampleCount) {
447
- const existingRule = this.getRule(taskType);
448
- const id = existingRule?.id ?? nanoid();
449
- const stmt = this.db.prepare(`
450
- INSERT INTO routing_rules (id, task_type, preferred_model, source, confidence, sample_count, created_at, updated_at)
451
- VALUES (?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))
452
- ON CONFLICT(task_type) DO UPDATE SET
453
- preferred_model = excluded.preferred_model,
454
- source = excluded.source,
455
- confidence = excluded.confidence,
456
- sample_count = excluded.sample_count,
457
- updated_at = datetime('now')
458
- `);
459
- stmt.run(id, taskType, preferredModel, source, confidence ?? null, sampleCount ?? null);
460
- return id;
461
- }
462
- /**
463
- * Lists all routing rules.
464
- */
465
- listRules() {
466
- const stmt = this.db.prepare(`
467
- SELECT id, task_type as taskType, preferred_model as preferredModel, source, confidence, sample_count as sampleCount, created_at as createdAt, updated_at as updatedAt
468
- FROM routing_rules
469
- ORDER BY task_type
470
- `);
471
- return stmt.all();
472
- }
473
- /**
474
- * Deletes a routing rule and resets to default.
475
- */
476
- deleteRule(taskType) {
477
- const defaultRule = DEFAULT_ROUTING_RULES.find((r) => r.taskType === taskType);
478
- if (!defaultRule) return false;
479
- this.setRule(taskType, defaultRule.preferredModel, "default");
480
- return true;
481
- }
482
- // ============================================================================
483
- // Suggestions
484
- // ============================================================================
485
- /**
486
- * Records a suggestion.
487
- */
488
- recordSuggestion(suggestion) {
489
- const id = nanoid();
490
- const stmt = this.db.prepare(`
491
- INSERT INTO suggestions (id, task_type, current_model, suggested_model, reason, confidence, expected_improvement, sample_count, accepted, created_at)
492
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))
493
- `);
494
- stmt.run(
495
- id,
496
- suggestion.taskType,
497
- suggestion.currentModel,
498
- suggestion.suggestedModel,
499
- suggestion.reason,
500
- suggestion.confidence,
501
- suggestion.expectedImprovement,
502
- suggestion.sampleCount,
503
- suggestion.accepted ?? null
504
- );
505
- return id;
506
- }
507
- /**
508
- * Gets a suggestion by ID.
509
- */
510
- getSuggestion(id) {
511
- const stmt = this.db.prepare(`
512
- SELECT id, task_type as taskType, current_model as currentModel, suggested_model as suggestedModel, reason, confidence, expected_improvement as expectedImprovement, sample_count as sampleCount, accepted, created_at as createdAt, accepted_at as acceptedAt
513
- FROM suggestions
514
- WHERE id = ?
515
- `);
516
- const row = stmt.get(id);
517
- if (!row) return null;
518
- return {
519
- ...row,
520
- accepted: row.accepted != null ? Boolean(row.accepted) : null
521
- };
522
- }
523
- /**
524
- * Gets pending (unaccepted) suggestions.
525
- */
526
- getPendingSuggestions() {
527
- const stmt = this.db.prepare(`
528
- SELECT id, task_type as taskType, current_model as currentModel, suggested_model as suggestedModel, reason, confidence, expected_improvement as expectedImprovement, sample_count as sampleCount, accepted, created_at as createdAt, accepted_at as acceptedAt
529
- FROM suggestions
530
- WHERE accepted IS NULL
531
- ORDER BY confidence DESC
532
- `);
533
- const rows = stmt.all();
534
- return rows.map((row) => ({
535
- ...row,
536
- accepted: row.accepted != null ? Boolean(row.accepted) : null
537
- }));
538
- }
539
- /**
540
- * Accepts a suggestion.
541
- */
542
- acceptSuggestion(id) {
543
- const suggestion = this.getSuggestion(id);
544
- if (!suggestion) return false;
545
- const updateStmt = this.db.prepare(`
546
- UPDATE suggestions
547
- SET accepted = 1, accepted_at = datetime('now')
548
- WHERE id = ?
549
- `);
550
- updateStmt.run(id);
551
- this.setRule(
552
- suggestion.taskType,
553
- suggestion.suggestedModel,
554
- "learned",
555
- suggestion.confidence,
556
- suggestion.sampleCount
557
- );
558
- return true;
559
- }
560
- /**
561
- * Rejects a suggestion.
562
- */
563
- rejectSuggestion(id) {
564
- const stmt = this.db.prepare(`
565
- UPDATE suggestions
566
- SET accepted = 0, accepted_at = datetime('now')
567
- WHERE id = ?
568
- `);
569
- const result = stmt.run(id);
570
- return result.changes > 0;
571
- }
572
- // ============================================================================
573
- // Statistics
574
- // ============================================================================
575
- /**
576
- * Gets aggregated statistics.
577
- */
578
- getStats(options) {
579
- const conditions = [];
580
- const params = [];
581
- if (options?.from) {
582
- conditions.push("created_at >= ?");
583
- params.push(options.from);
584
- }
585
- if (options?.to) {
586
- conditions.push("created_at <= ?");
587
- params.push(options.to);
588
- }
589
- const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
590
- const overallStmt = this.db.prepare(`
591
- SELECT
592
- COUNT(*) as totalRuns,
593
- SUM(success) as successfulRuns,
594
- AVG(duration_ms) as avgDurationMs
595
- FROM runs
596
- ${whereClause}
597
- `);
598
- const overall = overallStmt.get(...params);
599
- const byTaskTypeStmt = this.db.prepare(`
600
- SELECT
601
- task_type as taskType,
602
- COUNT(*) as runs,
603
- AVG(success) as successRate,
604
- AVG(duration_ms) as avgDurationMs
605
- FROM runs
606
- ${whereClause}
607
- GROUP BY task_type
608
- `);
609
- const byTaskTypeRows = byTaskTypeStmt.all(...params);
610
- const byTaskType = {};
611
- for (const row of byTaskTypeRows) {
612
- byTaskType[row.taskType] = {
613
- runs: row.runs,
614
- successRate: row.successRate,
615
- avgDurationMs: row.avgDurationMs
616
- };
617
- }
618
- const byModelStmt = this.db.prepare(`
619
- SELECT
620
- model,
621
- COUNT(*) as runs,
622
- AVG(success) as successRate,
623
- AVG(duration_ms) as avgDurationMs
624
- FROM runs
625
- ${whereClause}
626
- GROUP BY model
627
- `);
628
- const byModelRows = byModelStmt.all(...params);
629
- const byModel = {};
630
- for (const row of byModelRows) {
631
- byModel[row.model] = {
632
- runs: row.runs,
633
- successRate: row.successRate,
634
- avgDurationMs: row.avgDurationMs
635
- };
636
- }
637
- return {
638
- totalRuns: overall.totalRuns,
639
- successfulRuns: overall.successfulRuns ?? 0,
640
- avgDurationMs: overall.avgDurationMs ?? 0,
641
- byTaskType,
642
- byModel
643
- };
644
- }
645
- /**
646
- * Gets statistics for learning (outcomes joined with runs).
647
- */
648
- getLearningStats(taskType) {
649
- const stmt = this.db.prepare(`
650
- SELECT
651
- r.model,
652
- COUNT(*) as runs,
653
- AVG(r.success) as successRate,
654
- AVG(r.duration_ms) as avgDurationMs,
655
- AVG(CASE WHEN o.success IS NOT NULL THEN o.success ELSE r.success END) as outcomeSuccessRate
656
- FROM runs r
657
- LEFT JOIN outcomes o ON r.id = o.run_id
658
- WHERE r.task_type = ?
659
- GROUP BY r.model
660
- HAVING runs >= 5
661
- `);
662
- return stmt.all(taskType);
663
- }
664
- };
665
-
666
- // src/routing/inference.ts
667
- var TASK_PATTERNS = {
668
- code_generation: [
669
- { pattern: /\b(write|create|generate|implement|build|code|develop|make)\b.{0,50}\b(function|class|code|script|program|method|module|api|endpoint|component)\b/i, weight: 10 },
670
- { pattern: /\b(write|create|generate)\b.{0,30}\b(python|javascript|typescript|java|go|rust|c\+\+|ruby|php|swift)\b/i, weight: 10 },
671
- { pattern: /\bcreate a.{0,30}(that|which|to)\b/i, weight: 5 },
672
- { pattern: /\bimplement\b.{0,50}\b(algorithm|logic|feature)\b/i, weight: 8 },
673
- { pattern: /\bcode\s+for\b/i, weight: 7 },
674
- { pattern: /\bwrite me\b.{0,30}\b(code|script|function)\b/i, weight: 9 },
675
- { pattern: /```[\w]*\n/i, weight: 3 }
676
- // Code blocks suggest code context
677
- ],
678
- code_review: [
679
- { pattern: /\b(review|analyze|check|audit|inspect|evaluate|assess|critique)\b.{0,30}\b(code|function|class|script|implementation|pull request|pr|diff)\b/i, weight: 10 },
680
- { pattern: /\b(what'?s? wrong|find\s+(bugs?|issues?|problems?|errors?))\b.{0,30}\b(code|function|this)\b/i, weight: 9 },
681
- { pattern: /\b(improve|optimize|refactor)\b.{0,30}\b(code|function|this)\b/i, weight: 7 },
682
- { pattern: /\blook\s+(at|over)\s+(this|my)\s+code\b/i, weight: 8 },
683
- { pattern: /\bcode\s+review\b/i, weight: 10 },
684
- { pattern: /\bcan you (check|review)\b/i, weight: 5 }
685
- ],
686
- summarization: [
687
- { pattern: /\b(summarize|summarise|summary|tldr|tl;dr|recap|condense|brief|overview)\b/i, weight: 10 },
688
- { pattern: /\b(give|provide|write)\s+(me\s+)?(a\s+)?(brief|short|quick|concise)\s+(summary|overview)\b/i, weight: 9 },
689
- { pattern: /\bshorten\s+(this|the)\b/i, weight: 6 },
690
- { pattern: /\bin\s+(brief|short|a nutshell)\b/i, weight: 7 },
691
- { pattern: /\bkey\s+(points?|takeaways?)\b/i, weight: 8 },
692
- { pattern: /\bmain\s+(ideas?|points?)\b/i, weight: 7 }
693
- ],
694
- analysis: [
695
- { pattern: /\b(analyze|analyse|analysis|examine|investigate|assess|evaluate|study)\b/i, weight: 8 },
696
- { pattern: /\b(compare|contrast|differentiate|distinguish)\b.{0,30}\b(between|and)\b/i, weight: 9 },
697
- { pattern: /\b(pros?\s+and\s+cons?|advantages?\s+and\s+disadvantages?|strengths?\s+and\s+weaknesses?)\b/i, weight: 9 },
698
- { pattern: /\b(what\s+are|explain)\s+(the\s+)?(implications?|consequences?|effects?|impacts?)\b/i, weight: 8 },
699
- { pattern: /\bbreak\s*down\b/i, weight: 6 },
700
- { pattern: /\bdeep\s*dive\b/i, weight: 7 },
701
- { pattern: /\bcritical(ly)?\s+(analysis|evaluation|assessment)\b/i, weight: 9 }
702
- ],
703
- creative_writing: [
704
- { pattern: /\b(write|create|compose|craft|author)\b.{0,30}\b(story|poem|essay|article|blog|post|narrative|fiction|novel|song|lyrics)\b/i, weight: 10 },
705
- { pattern: /\b(creative|imaginative|fictional)\s+(writing|story|piece)\b/i, weight: 10 },
706
- { pattern: /\bonce upon a time\b/i, weight: 8 },
707
- { pattern: /\b(write|tell)\s+(me\s+)?(a\s+)?(short\s+)?story\b/i, weight: 9 },
708
- { pattern: /\b(brainstorm|ideate)\b.{0,30}\b(ideas?|concepts?|themes?)\b/i, weight: 7 },
709
- { pattern: /\bwrite\s+(in|like)\s+(the\s+)?style\s+of\b/i, weight: 8 },
710
- { pattern: /\b(catchy|creative|engaging)\s+(title|headline|tagline|slogan)\b/i, weight: 7 }
711
- ],
712
- data_extraction: [
713
- { pattern: /\b(extract|parse|pull|get|retrieve|find|identify)\b.{0,30}\b(data|information|details?|values?|fields?|entities?|names?|numbers?|dates?|emails?|phones?|addresses?)\b/i, weight: 10 },
714
- { pattern: /\b(convert|transform)\b.{0,30}\b(to|into)\s+(json|csv|xml|yaml|table|structured)\b/i, weight: 9 },
715
- { pattern: /\bstructured\s+(data|output|format)\b/i, weight: 8 },
716
- { pattern: /\bnamed\s+entity\s+(recognition|extraction)\b/i, weight: 10 },
717
- { pattern: /\b(scrape|crawl)\b/i, weight: 6 },
718
- { pattern: /\bjson\s+(output|format|schema)\b/i, weight: 7 }
719
- ],
720
- translation: [
721
- { pattern: /\b(translate|translation|translator)\b/i, weight: 10 },
722
- { pattern: /\b(convert|change)\b.{0,20}\b(to|into)\s+(english|spanish|french|german|chinese|japanese|korean|portuguese|italian|russian|arabic|hindi|dutch)\b/i, weight: 9 },
723
- { pattern: /\b(in|to)\s+(english|spanish|french|german|chinese|japanese|korean|portuguese|italian|russian|arabic|hindi|dutch)\b/i, weight: 6 },
724
- { pattern: /\bfrom\s+(english|spanish|french|german|chinese|japanese|korean|portuguese|italian|russian|arabic|hindi|dutch)\s+to\b/i, weight: 10 },
725
- { pattern: /\blocalize|localization\b/i, weight: 7 }
726
- ],
727
- question_answering: [
728
- { pattern: /^(what|who|where|when|why|how|which|is|are|does|do|can|could|would|should|will|did)\s/i, weight: 7 },
729
- { pattern: /\?$/i, weight: 5 },
730
- { pattern: /\b(explain|describe|define|what\s+is|what\s+are|tell\s+me\s+about)\b/i, weight: 8 },
731
- { pattern: /\b(answer|respond|reply)\b.{0,20}\b(question|query)\b/i, weight: 9 },
732
- { pattern: /\bfaq\b/i, weight: 8 },
733
- { pattern: /\bi\s+(want|need)\s+to\s+know\b/i, weight: 6 },
734
- { pattern: /\bcan\s+you\s+(tell|explain|help)\b/i, weight: 5 }
735
- ],
736
- general: [
737
- // Catch-all patterns with low weights
738
- { pattern: /./i, weight: 1 }
739
- ]
740
- };
741
- function inferTaskType(prompt) {
742
- const normalizedPrompt = prompt.trim().toLowerCase();
743
- const scores = {
744
- code_generation: 0,
745
- code_review: 0,
746
- summarization: 0,
747
- analysis: 0,
748
- creative_writing: 0,
749
- data_extraction: 0,
750
- translation: 0,
751
- question_answering: 0,
752
- general: 0
753
- };
754
- for (const [taskType, patterns] of Object.entries(TASK_PATTERNS)) {
755
- for (const { pattern, weight } of patterns) {
756
- if (pattern.test(prompt)) {
757
- scores[taskType] += weight;
758
- }
759
- }
760
- }
761
- let maxScore = 0;
762
- let inferredType = "general";
763
- for (const [taskType, score] of Object.entries(scores)) {
764
- if (score > maxScore) {
765
- maxScore = score;
766
- inferredType = taskType;
767
- }
768
- }
769
- if (maxScore <= 1) {
770
- return "general";
771
- }
772
- return inferredType;
773
- }
774
- function getInferenceConfidence(prompt, taskType) {
775
- const patterns = TASK_PATTERNS[taskType];
776
- if (!patterns) return 0;
777
- let totalWeight = 0;
778
- let maxPossibleWeight = 0;
779
- for (const { pattern, weight } of patterns) {
780
- maxPossibleWeight += weight;
781
- if (pattern.test(prompt)) {
782
- totalWeight += weight;
783
- }
784
- }
785
- if (maxPossibleWeight === 0) return 0;
786
- return Math.min(totalWeight / maxPossibleWeight, 0.95);
787
- }
788
-
789
- // src/routing/engine.ts
790
- var RoutingEngine = class {
791
- store;
792
- /**
793
- * Creates a new RoutingEngine.
794
- *
795
- * @param store - The storage instance to use
796
- */
797
- constructor(store) {
798
- this.store = store;
799
- }
800
- /**
801
- * Infers the task type from a prompt.
802
- *
803
- * @param prompt - The prompt to analyze
804
- * @returns The inferred task type
805
- */
806
- inferTaskType(prompt) {
807
- return inferTaskType(prompt);
808
- }
809
- /**
810
- * Gets the inference confidence for a task type.
811
- *
812
- * @param prompt - The prompt to analyze
813
- * @param taskType - The task type to check
814
- * @returns Confidence score (0-1)
815
- */
816
- getInferenceConfidence(prompt, taskType) {
817
- return getInferenceConfidence(prompt, taskType);
818
- }
819
- /**
820
- * Gets the routing rule for a task type.
821
- *
822
- * @param taskType - The task type to get the rule for
823
- * @returns The routing rule, or null if not found
824
- */
825
- get(taskType) {
826
- const record = this.store.getRule(taskType);
827
- if (!record) return null;
828
- return {
829
- id: record.id,
830
- taskType: record.taskType,
831
- preferredModel: record.preferredModel,
832
- source: record.source,
833
- confidence: record.confidence ?? void 0,
834
- sampleCount: record.sampleCount ?? void 0,
835
- createdAt: record.createdAt,
836
- updatedAt: record.updatedAt
837
- };
838
- }
839
- /**
840
- * Sets a routing rule for a task type.
841
- *
842
- * @param taskType - The task type to set the rule for
843
- * @param preferredModel - The preferred model (format: "provider:model")
844
- * @param source - How the rule was created
845
- * @param options - Optional confidence and sample count
846
- * @returns The rule ID
847
- */
848
- set(taskType, preferredModel, source = "user", options) {
849
- return this.store.setRule(
850
- taskType,
851
- preferredModel,
852
- source,
853
- options?.confidence,
854
- options?.sampleCount
855
- );
856
- }
857
- /**
858
- * Lists all routing rules.
859
- *
860
- * @returns Array of all routing rules
861
- */
862
- list() {
863
- const records = this.store.listRules();
864
- return records.map((record) => ({
865
- id: record.id,
866
- taskType: record.taskType,
867
- preferredModel: record.preferredModel,
868
- source: record.source,
869
- confidence: record.confidence ?? void 0,
870
- sampleCount: record.sampleCount ?? void 0,
871
- createdAt: record.createdAt,
872
- updatedAt: record.updatedAt
873
- }));
874
- }
875
- /**
876
- * Deletes a routing rule and resets to default.
877
- *
878
- * @param taskType - The task type to reset
879
- * @returns True if the rule was reset
880
- */
881
- delete(taskType) {
882
- return this.store.deleteRule(taskType);
883
- }
884
- /**
885
- * Gets the preferred model for a task type.
886
- *
887
- * @param taskType - The task type
888
- * @returns The preferred model string, or a default
889
- */
890
- getPreferredModel(taskType) {
891
- const rule = this.get(taskType);
892
- return rule?.preferredModel ?? "local:llama3.2";
893
- }
894
- /**
895
- * Parses a model string into provider and model name.
896
- *
897
- * @param modelString - The model string (format: "provider:model")
898
- * @returns Object with provider and model
899
- */
900
- parseModel(modelString) {
901
- const parts = modelString.split(":");
902
- if (parts.length < 2) {
903
- return { provider: "local", model: modelString };
904
- }
905
- return { provider: parts[0], model: parts.slice(1).join(":") };
906
- }
907
- /**
908
- * Resolves the model to use for a prompt.
909
- *
910
- * @param prompt - The prompt to analyze
911
- * @param overrideTaskType - Optional task type override
912
- * @param overrideModel - Optional model override
913
- * @returns Object with resolved taskType, model, provider, and confidence
914
- */
915
- resolve(prompt, overrideTaskType, overrideModel) {
916
- const taskType = overrideTaskType ?? this.inferTaskType(prompt);
917
- const confidence = this.getInferenceConfidence(prompt, taskType);
918
- let model;
919
- if (overrideModel) {
920
- model = overrideModel;
921
- } else {
922
- model = this.getPreferredModel(taskType);
923
- }
924
- const { provider, model: modelName } = this.parseModel(model);
925
- return {
926
- taskType,
927
- model,
928
- provider,
929
- modelName,
930
- confidence
931
- };
932
- }
933
- };
934
-
935
- // src/learning/outcomes.ts
936
- var OutcomeRecorder = class {
937
- store;
938
- /**
939
- * Creates a new OutcomeRecorder.
940
- *
941
- * @param store - The storage instance to use
942
- */
943
- constructor(store) {
944
- this.store = store;
945
- }
946
- /**
947
- * Records an outcome for a run.
948
- *
949
- * @param input - The outcome input
950
- * @returns The recorded outcome
951
- * @throws If the run ID is not found
952
- */
953
- record(input) {
954
- const run = this.store.getRun(input.runId);
955
- if (!run) {
956
- throw new Error(`Run not found: ${input.runId}`);
957
- }
958
- const id = this.store.recordOutcome({
959
- runId: input.runId,
960
- success: input.success,
961
- quality: input.quality ?? null,
962
- latencySatisfactory: input.latencySatisfactory ?? null,
963
- costSatisfactory: input.costSatisfactory ?? null,
964
- feedback: input.feedback ?? null
965
- });
966
- const outcome = this.store.getOutcome(input.runId);
967
- if (!outcome) {
968
- throw new Error("Failed to record outcome");
969
- }
970
- return {
971
- id: outcome.id,
972
- runId: outcome.runId,
973
- success: outcome.success,
974
- quality: outcome.quality ?? void 0,
975
- latencySatisfactory: outcome.latencySatisfactory ?? void 0,
976
- costSatisfactory: outcome.costSatisfactory ?? void 0,
977
- feedback: outcome.feedback ?? void 0,
978
- recordedAt: outcome.createdAt
979
- };
980
- }
981
- /**
982
- * Gets an outcome for a run.
983
- *
984
- * @param runId - The run ID
985
- * @returns The outcome, or null if not found
986
- */
987
- get(runId) {
988
- const outcome = this.store.getOutcome(runId);
989
- if (!outcome) return null;
990
- return {
991
- id: outcome.id,
992
- runId: outcome.runId,
993
- success: outcome.success,
994
- quality: outcome.quality ?? void 0,
995
- latencySatisfactory: outcome.latencySatisfactory ?? void 0,
996
- costSatisfactory: outcome.costSatisfactory ?? void 0,
997
- feedback: outcome.feedback ?? void 0,
998
- recordedAt: outcome.createdAt
999
- };
1000
- }
1001
- /**
1002
- * Gets outcome statistics for a task type.
1003
- *
1004
- * @param taskType - The task type to get stats for
1005
- * @returns Outcome statistics
1006
- */
1007
- getTaskStats(taskType) {
1008
- const outcomes = this.store.getOutcomes({ taskType, limit: 1e3 });
1009
- if (outcomes.length === 0) {
1010
- return {
1011
- totalOutcomes: 0,
1012
- successRate: 0,
1013
- qualityDistribution: {},
1014
- latencySatisfactionRate: 0,
1015
- costSatisfactionRate: 0
1016
- };
1017
- }
1018
- let successCount = 0;
1019
- let latencySatisfiedCount = 0;
1020
- let latencyRatedCount = 0;
1021
- let costSatisfiedCount = 0;
1022
- let costRatedCount = 0;
1023
- const qualityDistribution = {};
1024
- for (const outcome of outcomes) {
1025
- if (outcome.success) successCount++;
1026
- if (outcome.quality) {
1027
- qualityDistribution[outcome.quality] = (qualityDistribution[outcome.quality] ?? 0) + 1;
1028
- }
1029
- if (outcome.latencySatisfactory != null) {
1030
- latencyRatedCount++;
1031
- if (outcome.latencySatisfactory) latencySatisfiedCount++;
1032
- }
1033
- if (outcome.costSatisfactory != null) {
1034
- costRatedCount++;
1035
- if (outcome.costSatisfactory) costSatisfiedCount++;
1036
- }
1037
- }
1038
- return {
1039
- totalOutcomes: outcomes.length,
1040
- successRate: successCount / outcomes.length,
1041
- qualityDistribution,
1042
- latencySatisfactionRate: latencyRatedCount > 0 ? latencySatisfiedCount / latencyRatedCount : 0,
1043
- costSatisfactionRate: costRatedCount > 0 ? costSatisfiedCount / costRatedCount : 0
1044
- };
1045
- }
1046
- };
1047
-
1048
- // src/learning/savings.ts
1049
- var MODEL_PRICING = {
1050
- // Anthropic models
1051
- "claude-3-5-haiku-latest": { input: 0.25, output: 1.25 },
1052
- "claude-3-5-haiku-20241022": { input: 0.25, output: 1.25 },
1053
- "claude-3-5-sonnet-latest": { input: 3, output: 15 },
1054
- "claude-3-5-sonnet-20241022": { input: 3, output: 15 },
1055
- "claude-sonnet-4-20250514": { input: 3, output: 15 },
1056
- "claude-3-opus-latest": { input: 15, output: 75 },
1057
- "claude-3-opus-20240229": { input: 15, output: 75 },
1058
- "claude-opus-4-5-20250514": { input: 15, output: 75 },
1059
- // OpenAI models
1060
- "gpt-4o": { input: 2.5, output: 10 },
1061
- "gpt-4o-mini": { input: 0.15, output: 0.6 },
1062
- "gpt-4.1": { input: 2, output: 8 },
1063
- "gpt-4-turbo": { input: 10, output: 30 },
1064
- // Google models
1065
- "gemini-1.5-flash": { input: 0.075, output: 0.3 },
1066
- "gemini-1.5-pro": { input: 1.25, output: 5 },
1067
- "gemini-2.0-flash": { input: 0.1, output: 0.4 },
1068
- // xAI models
1069
- "grok-2": { input: 2, output: 10 },
1070
- "grok-2-latest": { input: 2, output: 10 },
1071
- // Moonshot models
1072
- "moonshot-v1-8k": { input: 0.1, output: 0.1 },
1073
- "moonshot-v1-32k": { input: 0.2, output: 0.2 }
1074
- };
1075
- var BASELINE_MODEL = "claude-3-opus-latest";
1076
- function calculateCost(model, tokensIn, tokensOut) {
1077
- const modelName = model.includes(":") ? model.split(":")[1] : model;
1078
- const pricing = MODEL_PRICING[modelName] ?? MODEL_PRICING[model] ?? { input: 1, output: 3 };
1079
- const inputCost = tokensIn / 1e6 * pricing.input;
1080
- const outputCost = tokensOut / 1e6 * pricing.output;
1081
- return inputCost + outputCost;
1082
- }
1083
- function calculateSavings(store, days = 30) {
1084
- const to = /* @__PURE__ */ new Date();
1085
- const from = /* @__PURE__ */ new Date();
1086
- from.setDate(from.getDate() - days);
1087
- const fromStr = from.toISOString();
1088
- const toStr = to.toISOString();
1089
- const runs = store.getRuns({
1090
- from: fromStr,
1091
- to: toStr,
1092
- limit: 1e5
1093
- // Get all runs
1094
- });
1095
- const byModel = {};
1096
- const byTaskType = {};
1097
- let totalTokensIn = 0;
1098
- let totalTokensOut = 0;
1099
- let actualCost = 0;
1100
- let baselineCost = 0;
1101
- const baselinePricing = MODEL_PRICING[BASELINE_MODEL] ?? { input: 15, output: 75 };
1102
- for (const run of runs) {
1103
- const tokensIn = run.tokensIn ?? 0;
1104
- const tokensOut = run.tokensOut ?? 0;
1105
- const modelName = run.model.includes(":") ? run.model.split(":")[1] ?? run.model : run.model;
1106
- const runCost = calculateCost(run.model, tokensIn, tokensOut);
1107
- actualCost += runCost;
1108
- const baselineRunCost = tokensIn / 1e6 * (baselinePricing?.input ?? 15) + tokensOut / 1e6 * (baselinePricing?.output ?? 75);
1109
- baselineCost += baselineRunCost;
1110
- totalTokensIn += tokensIn;
1111
- totalTokensOut += tokensOut;
1112
- if (!byModel[modelName]) {
1113
- byModel[modelName] = {
1114
- runs: 0,
1115
- tokensIn: 0,
1116
- tokensOut: 0,
1117
- cost: 0,
1118
- successRate: 0,
1119
- avgLatencyMs: 0
1120
- };
1121
- }
1122
- const modelStats = byModel[modelName];
1123
- modelStats.runs++;
1124
- modelStats.tokensIn += tokensIn;
1125
- modelStats.tokensOut += tokensOut;
1126
- modelStats.cost += runCost;
1127
- modelStats.avgLatencyMs += run.durationMs;
1128
- if (run.success) {
1129
- modelStats.successRate++;
1130
- }
1131
- if (!byTaskType[run.taskType]) {
1132
- byTaskType[run.taskType] = { runs: 0, cost: 0, totalCost: 0 };
1133
- }
1134
- const taskStats = byTaskType[run.taskType];
1135
- taskStats.runs++;
1136
- taskStats.totalCost += runCost;
1137
- }
1138
- for (const model of Object.keys(byModel)) {
1139
- const stats = byModel[model];
1140
- stats.successRate = stats.runs > 0 ? stats.successRate / stats.runs : 0;
1141
- stats.avgLatencyMs = stats.runs > 0 ? stats.avgLatencyMs / stats.runs : 0;
1142
- }
1143
- const byTaskTypeFinal = {};
1144
- for (const [taskType, stats] of Object.entries(byTaskType)) {
1145
- byTaskTypeFinal[taskType] = {
1146
- runs: stats.runs,
1147
- cost: stats.totalCost,
1148
- avgCostPerRun: stats.runs > 0 ? stats.totalCost / stats.runs : 0
1149
- };
1150
- }
1151
- const savings = baselineCost - actualCost;
1152
- const savingsPercent = baselineCost > 0 ? savings / baselineCost * 100 : 0;
1153
- return {
1154
- periodDays: days,
1155
- period: {
1156
- from: fromStr,
1157
- to: toStr
1158
- },
1159
- totalRuns: runs.length,
1160
- totalTokensIn,
1161
- totalTokensOut,
1162
- actualCost,
1163
- baselineCost,
1164
- savings: Math.max(0, savings),
1165
- // Don't report negative savings
1166
- savingsPercent: Math.max(0, savingsPercent),
1167
- byModel,
1168
- byTaskType: byTaskTypeFinal
1169
- };
1170
- }
1171
-
1172
- // src/learning/patterns.ts
1173
- var MIN_RUNS_FOR_SUGGESTION = 10;
1174
- var MIN_CONFIDENCE_THRESHOLD = 0.6;
1175
- var MIN_IMPROVEMENT_THRESHOLD = 0.1;
1176
- var MIN_COST_IMPROVEMENT_THRESHOLD = 0.2;
1177
- var PatternDetector = class {
1178
- store;
1179
- /**
1180
- * Creates a new PatternDetector.
1181
- *
1182
- * @param store - The storage instance to use
1183
- */
1184
- constructor(store) {
1185
- this.store = store;
1186
- }
1187
- /**
1188
- * Analyzes a task type and generates suggestions if appropriate.
1189
- *
1190
- * @param taskType - The task type to analyze
1191
- * @returns Array of suggestions
1192
- */
1193
- analyzeTaskType(taskType) {
1194
- const stats = this.store.getLearningStats(taskType);
1195
- if (stats.length < 2) {
1196
- return [];
1197
- }
1198
- const currentRule = this.store.getRule(taskType);
1199
- if (!currentRule) return [];
1200
- const currentModel = currentRule.preferredModel;
1201
- const currentStats = stats.find((s) => s.model === currentModel);
1202
- if (!currentStats) return [];
1203
- const currentModelName = currentModel.includes(":") ? currentModel.split(":")[1] : currentModel;
1204
- const currentPricing = MODEL_PRICING[currentModelName];
1205
- const suggestions = [];
1206
- for (const modelStats of stats) {
1207
- if (modelStats.model === currentModel) continue;
1208
- if (modelStats.runs < MIN_RUNS_FOR_SUGGESTION) continue;
1209
- const successImprovement = modelStats.outcomeSuccessRate - currentStats.outcomeSuccessRate;
1210
- const latencyImprovement = (currentStats.avgDurationMs - modelStats.avgDurationMs) / currentStats.avgDurationMs;
1211
- const suggestedModelName = modelStats.model.includes(":") ? modelStats.model.split(":")[1] : modelStats.model;
1212
- const suggestedPricing = MODEL_PRICING[suggestedModelName];
1213
- let costImprovement = 0;
1214
- if (currentPricing && suggestedPricing) {
1215
- const currentAvgCost = (currentPricing.input + currentPricing.output) / 2;
1216
- const suggestedAvgCost = (suggestedPricing.input + suggestedPricing.output) / 2;
1217
- costImprovement = (currentAvgCost - suggestedAvgCost) / currentAvgCost;
1218
- }
1219
- const isSignificantlyBetter = successImprovement > MIN_IMPROVEMENT_THRESHOLD || successImprovement >= 0 && latencyImprovement > MIN_IMPROVEMENT_THRESHOLD || successImprovement >= -0.05 && costImprovement > MIN_COST_IMPROVEMENT_THRESHOLD;
1220
- if (!isSignificantlyBetter) continue;
1221
- const sampleConfidence = Math.min(modelStats.runs / 50, 1);
1222
- const improvementConfidence = Math.min(
1223
- Math.abs(successImprovement) / 0.3 + Math.abs(latencyImprovement) / 0.5 + Math.abs(costImprovement) / 0.5,
1224
- 1
1225
- );
1226
- const confidence = (sampleConfidence + improvementConfidence) / 2;
1227
- if (confidence < MIN_CONFIDENCE_THRESHOLD) continue;
1228
- const reasons = [];
1229
- if (successImprovement > 0) {
1230
- reasons.push(`${(successImprovement * 100).toFixed(0)}% higher success rate`);
1231
- }
1232
- if (latencyImprovement > 0) {
1233
- reasons.push(`${(latencyImprovement * 100).toFixed(0)}% faster`);
1234
- }
1235
- if (costImprovement > 0) {
1236
- reasons.push(`${(costImprovement * 100).toFixed(0)}% cheaper`);
1237
- }
1238
- const suggestion = {
1239
- id: nanoid(),
1240
- taskType,
1241
- currentModel,
1242
- suggestedModel: modelStats.model,
1243
- reason: reasons.join(", "),
1244
- confidence,
1245
- expectedImprovement: {
1246
- successRate: successImprovement > 0 ? successImprovement : void 0,
1247
- latency: latencyImprovement > 0 ? latencyImprovement : void 0,
1248
- cost: costImprovement > 0 ? costImprovement : void 0
1249
- },
1250
- sampleCount: modelStats.runs,
1251
- createdAt: (/* @__PURE__ */ new Date()).toISOString()
1252
- };
1253
- suggestions.push(suggestion);
1254
- }
1255
- suggestions.sort((a, b) => b.confidence - a.confidence);
1256
- return suggestions;
1257
- }
1258
- /**
1259
- * Analyzes all task types and generates suggestions.
1260
- *
1261
- * @returns Array of all suggestions across task types
1262
- */
1263
- analyzeAll() {
1264
- const taskTypes = [
1265
- "code_generation",
1266
- "code_review",
1267
- "summarization",
1268
- "analysis",
1269
- "creative_writing",
1270
- "data_extraction",
1271
- "translation",
1272
- "question_answering",
1273
- "general"
1274
- ];
1275
- const allSuggestions = [];
1276
- for (const taskType of taskTypes) {
1277
- const suggestions = this.analyzeTaskType(taskType);
1278
- allSuggestions.push(...suggestions);
1279
- }
1280
- allSuggestions.sort((a, b) => b.confidence - a.confidence);
1281
- return allSuggestions;
1282
- }
1283
- /**
1284
- * Stores suggestions in the database.
1285
- *
1286
- * @param suggestions - The suggestions to store
1287
- * @returns Array of suggestion IDs
1288
- */
1289
- storeSuggestions(suggestions) {
1290
- const ids = [];
1291
- for (const suggestion of suggestions) {
1292
- const id = this.store.recordSuggestion({
1293
- taskType: suggestion.taskType,
1294
- currentModel: suggestion.currentModel,
1295
- suggestedModel: suggestion.suggestedModel,
1296
- reason: suggestion.reason,
1297
- confidence: suggestion.confidence,
1298
- expectedImprovement: JSON.stringify(suggestion.expectedImprovement),
1299
- sampleCount: suggestion.sampleCount,
1300
- accepted: null
1301
- });
1302
- ids.push(id);
1303
- }
1304
- return ids;
1305
- }
1306
- /**
1307
- * Generates and stores new suggestions, returning only new ones.
1308
- *
1309
- * @returns Array of new suggestions
1310
- */
1311
- generateSuggestions() {
1312
- const suggestions = this.analyzeAll();
1313
- const pending = this.store.getPendingSuggestions();
1314
- const existingKeys = new Set(
1315
- pending.map((s) => `${s.taskType}:${s.suggestedModel}`)
1316
- );
1317
- const newSuggestions = suggestions.filter(
1318
- (s) => !existingKeys.has(`${s.taskType}:${s.suggestedModel}`)
1319
- );
1320
- this.storeSuggestions(newSuggestions);
1321
- return newSuggestions;
1322
- }
1323
- };
1324
-
1325
- // src/relay.ts
1326
- var RelayPlane = class {
1327
- store;
1328
- _routing;
1329
- outcomeRecorder;
1330
- patternDetector;
1331
- config;
1332
- adapterRegistry = null;
1333
- /**
1334
- * Creates a new RelayPlane instance.
1335
- *
1336
- * @param config - Configuration options
1337
- */
1338
- constructor(config = {}) {
1339
- this.config = {
1340
- dbPath: config.dbPath ?? getDefaultDbPath(),
1341
- defaultProvider: config.defaultProvider ?? "local",
1342
- defaultModel: config.defaultModel ?? "llama3.2",
1343
- providers: config.providers ?? {}
1344
- };
1345
- this.store = new Store(this.config.dbPath);
1346
- this._routing = new RoutingEngine(this.store);
1347
- this.outcomeRecorder = new OutcomeRecorder(this.store);
1348
- this.patternDetector = new PatternDetector(this.store);
1349
- }
1350
- /**
1351
- * Gets the routing engine for direct access.
1352
- */
1353
- get routing() {
1354
- return this._routing;
1355
- }
1356
- /**
1357
- * Runs a prompt through the appropriate model.
1358
- *
1359
- * @param input - The run input
1360
- * @returns The run result
1361
- */
1362
- async run(input) {
1363
- const startTime = Date.now();
1364
- const resolved = this._routing.resolve(input.prompt, input.taskType, input.model);
1365
- const adapter = await this.getAdapter(resolved.provider);
1366
- if (!adapter) {
1367
- const runId2 = this.store.recordRun({
1368
- prompt: input.prompt,
1369
- systemPrompt: input.systemPrompt ?? null,
1370
- taskType: resolved.taskType,
1371
- model: resolved.model,
1372
- success: false,
1373
- output: null,
1374
- error: `No adapter configured for provider: ${resolved.provider}`,
1375
- durationMs: Date.now() - startTime,
1376
- tokensIn: null,
1377
- tokensOut: null,
1378
- costUsd: null,
1379
- metadata: input.metadata ? JSON.stringify(input.metadata) : null
1380
- });
1381
- return {
1382
- runId: runId2,
1383
- success: false,
1384
- error: `No adapter configured for provider: ${resolved.provider}`,
1385
- taskType: resolved.taskType,
1386
- model: resolved.model,
1387
- durationMs: Date.now() - startTime,
1388
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
1389
- };
1390
- }
1391
- const providerConfig = this.config.providers?.[resolved.provider];
1392
- const apiKey = providerConfig?.apiKey ?? this.getEnvApiKey(resolved.provider);
1393
- const fullInput = input.systemPrompt ? `${input.systemPrompt}
1394
-
1395
- ${input.prompt}` : input.prompt;
1396
- const result = await adapter.execute({
1397
- model: resolved.modelName,
1398
- input: fullInput,
1399
- apiKey: apiKey ?? "",
1400
- baseUrl: providerConfig?.baseUrl
1401
- });
1402
- const durationMs = Date.now() - startTime;
1403
- const tokensIn = result.tokensIn ?? 0;
1404
- const tokensOut = result.tokensOut ?? 0;
1405
- const costUsd = calculateCost(resolved.model, tokensIn, tokensOut);
1406
- const runId = this.store.recordRun({
1407
- prompt: input.prompt,
1408
- systemPrompt: input.systemPrompt ?? null,
1409
- taskType: resolved.taskType,
1410
- model: resolved.model,
1411
- success: result.success,
1412
- output: result.output ?? null,
1413
- error: result.error?.message ?? null,
1414
- durationMs,
1415
- tokensIn: result.tokensIn ?? null,
1416
- tokensOut: result.tokensOut ?? null,
1417
- costUsd: costUsd > 0 ? costUsd : null,
1418
- metadata: input.metadata ? JSON.stringify(input.metadata) : null
1419
- });
1420
- return {
1421
- runId,
1422
- success: result.success,
1423
- output: result.output,
1424
- error: result.error?.message,
1425
- taskType: resolved.taskType,
1426
- model: resolved.model,
1427
- durationMs,
1428
- tokensIn: result.tokensIn,
1429
- tokensOut: result.tokensOut,
1430
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
1431
- };
1432
- }
1433
- /**
1434
- * Gets an adapter for a provider.
1435
- * Note: In the standalone proxy package, adapters are not used.
1436
- * The proxy handles provider calls directly via HTTP.
1437
- */
1438
- async getAdapter(_provider) {
1439
- return null;
1440
- }
1441
- /**
1442
- * Gets an API key from environment variables.
1443
- */
1444
- getEnvApiKey(provider) {
1445
- const envVars = {
1446
- openai: "OPENAI_API_KEY",
1447
- anthropic: "ANTHROPIC_API_KEY",
1448
- google: "GOOGLE_API_KEY",
1449
- xai: "XAI_API_KEY",
1450
- moonshot: "MOONSHOT_API_KEY",
1451
- local: ""
1452
- };
1453
- const envVar = envVars[provider];
1454
- return envVar ? process.env[envVar] : void 0;
1455
- }
1456
- /**
1457
- * Records an outcome for a run.
1458
- *
1459
- * @param runId - The run ID
1460
- * @param outcome - The outcome details
1461
- * @returns The recorded outcome
1462
- */
1463
- recordOutcome(runId, outcome) {
1464
- return this.outcomeRecorder.record({
1465
- runId,
1466
- ...outcome
1467
- });
1468
- }
1469
- /**
1470
- * Gets an outcome for a run.
1471
- *
1472
- * @param runId - The run ID
1473
- * @returns The outcome, or null if not found
1474
- */
1475
- getOutcome(runId) {
1476
- return this.outcomeRecorder.get(runId);
1477
- }
1478
- /**
1479
- * Gets statistics for runs.
1480
- *
1481
- * @param options - Optional filters
1482
- * @returns Statistics object
1483
- */
1484
- stats(options) {
1485
- const raw = this.store.getStats(options);
1486
- const byTaskType = {};
1487
- const taskTypes = [
1488
- "code_generation",
1489
- "code_review",
1490
- "summarization",
1491
- "analysis",
1492
- "creative_writing",
1493
- "data_extraction",
1494
- "translation",
1495
- "question_answering",
1496
- "general"
1497
- ];
1498
- for (const taskType of taskTypes) {
1499
- const taskStats = raw.byTaskType[taskType];
1500
- byTaskType[taskType] = {
1501
- taskType,
1502
- totalRuns: taskStats?.runs ?? 0,
1503
- successfulRuns: Math.round((taskStats?.runs ?? 0) * (taskStats?.successRate ?? 0)),
1504
- successRate: taskStats?.successRate ?? 0,
1505
- avgDurationMs: taskStats?.avgDurationMs ?? 0,
1506
- byModel: {}
1507
- };
1508
- }
1509
- for (const [model, modelStats] of Object.entries(raw.byModel)) {
1510
- for (const taskType of taskTypes) {
1511
- if (!byTaskType[taskType].byModel[model]) {
1512
- byTaskType[taskType].byModel[model] = {
1513
- runs: 0,
1514
- successRate: 0,
1515
- avgDurationMs: 0
1516
- };
1517
- }
1518
- }
1519
- }
1520
- return {
1521
- totalRuns: raw.totalRuns,
1522
- overallSuccessRate: raw.totalRuns > 0 ? raw.successfulRuns / raw.totalRuns : 0,
1523
- byTaskType,
1524
- period: {
1525
- from: options?.from ?? "",
1526
- to: options?.to ?? (/* @__PURE__ */ new Date()).toISOString()
1527
- }
1528
- };
1529
- }
1530
- /**
1531
- * Gets a savings report.
1532
- *
1533
- * @param days - Number of days to include (default: 30)
1534
- * @returns Savings report
1535
- */
1536
- savingsReport(days = 30) {
1537
- return calculateSavings(this.store, days);
1538
- }
1539
- /**
1540
- * Gets routing improvement suggestions.
1541
- *
1542
- * @returns Array of suggestions
1543
- */
1544
- getSuggestions() {
1545
- const pending = this.store.getPendingSuggestions();
1546
- return pending.map((record) => ({
1547
- id: record.id,
1548
- taskType: record.taskType,
1549
- currentModel: record.currentModel,
1550
- suggestedModel: record.suggestedModel,
1551
- reason: record.reason,
1552
- confidence: record.confidence,
1553
- expectedImprovement: JSON.parse(record.expectedImprovement),
1554
- sampleCount: record.sampleCount,
1555
- createdAt: record.createdAt,
1556
- accepted: record.accepted ?? void 0,
1557
- acceptedAt: record.acceptedAt ?? void 0
1558
- }));
1559
- }
1560
- /**
1561
- * Generates new suggestions based on current data.
1562
- *
1563
- * @returns Array of newly generated suggestions
1564
- */
1565
- generateSuggestions() {
1566
- return this.patternDetector.generateSuggestions();
1567
- }
1568
- /**
1569
- * Accepts a suggestion and updates routing.
1570
- *
1571
- * @param suggestionId - The suggestion ID to accept
1572
- * @returns True if successful
1573
- */
1574
- acceptSuggestion(suggestionId) {
1575
- return this.store.acceptSuggestion(suggestionId);
1576
- }
1577
- /**
1578
- * Rejects a suggestion.
1579
- *
1580
- * @param suggestionId - The suggestion ID to reject
1581
- * @returns True if successful
1582
- */
1583
- rejectSuggestion(suggestionId) {
1584
- return this.store.rejectSuggestion(suggestionId);
1585
- }
1586
- /**
1587
- * Closes the RelayPlane instance and releases resources.
1588
- */
1589
- close() {
1590
- this.store.close();
1591
- }
1592
- };
1593
-
1594
- // src/config.ts
1595
- var fs2 = __toESM(require("fs"));
1596
- var path2 = __toESM(require("path"));
1597
- var os2 = __toESM(require("os"));
1598
- var import_zod = require("zod");
1599
- var StrategySchema = import_zod.z.object({
1600
- model: import_zod.z.string(),
1601
- minConfidence: import_zod.z.number().min(0).max(1).optional(),
1602
- fallback: import_zod.z.string().optional()
1603
- });
1604
- var AuthSchema = import_zod.z.object({
1605
- anthropicApiKey: import_zod.z.string().optional(),
1606
- anthropicMaxToken: import_zod.z.string().optional(),
1607
- useMaxForModels: import_zod.z.array(import_zod.z.string()).optional()
1608
- // Default: ['opus']
1609
- }).optional();
1610
- var ConfigSchema = import_zod.z.object({
1611
- strategies: import_zod.z.record(import_zod.z.string(), StrategySchema).optional(),
1612
- defaults: import_zod.z.object({
1613
- qualityModel: import_zod.z.string().optional(),
1614
- costModel: import_zod.z.string().optional()
1615
- }).optional(),
1616
- auth: AuthSchema
1617
- });
1618
- var DEFAULT_CONFIG = {
1619
- strategies: {
1620
- code_review: { model: "anthropic:claude-sonnet-4-20250514" },
1621
- code_generation: { model: "anthropic:claude-3-5-haiku-latest" },
1622
- analysis: { model: "anthropic:claude-sonnet-4-20250514" },
1623
- summarization: { model: "anthropic:claude-3-5-haiku-latest" },
1624
- creative_writing: { model: "anthropic:claude-sonnet-4-20250514" },
1625
- data_extraction: { model: "anthropic:claude-3-5-haiku-latest" },
1626
- translation: { model: "anthropic:claude-3-5-haiku-latest" },
1627
- question_answering: { model: "anthropic:claude-3-5-haiku-latest" },
1628
- general: { model: "anthropic:claude-3-5-haiku-latest" }
1629
- },
1630
- defaults: {
1631
- qualityModel: "claude-sonnet-4-20250514",
1632
- costModel: "claude-3-5-haiku-latest"
1633
- }
1634
- };
1635
- function getConfigPath() {
1636
- return path2.join(os2.homedir(), ".relayplane", "config.json");
1637
- }
1638
- function writeDefaultConfig() {
1639
- const configPath = getConfigPath();
1640
- const dir = path2.dirname(configPath);
1641
- if (!fs2.existsSync(dir)) {
1642
- fs2.mkdirSync(dir, { recursive: true });
1643
- }
1644
- if (!fs2.existsSync(configPath)) {
1645
- fs2.writeFileSync(
1646
- configPath,
1647
- JSON.stringify(DEFAULT_CONFIG, null, 2) + "\n",
1648
- "utf-8"
1649
- );
1650
- console.log(`[relayplane] Created default config at ${configPath}`);
1651
- }
1652
- }
1653
- function loadConfig() {
1654
- const configPath = getConfigPath();
1655
- writeDefaultConfig();
1656
- try {
1657
- const raw = fs2.readFileSync(configPath, "utf-8");
1658
- const parsed = JSON.parse(raw);
1659
- const validated = ConfigSchema.parse(parsed);
1660
- return validated;
1661
- } catch (err) {
1662
- if (err instanceof import_zod.z.ZodError) {
1663
- console.error(`[relayplane] Invalid config: ${err.message}`);
1664
- } else if (err instanceof SyntaxError) {
1665
- console.error(`[relayplane] Config JSON parse error: ${err.message}`);
1666
- } else {
1667
- console.error(`[relayplane] Failed to load config: ${err}`);
1668
- }
1669
- console.log("[relayplane] Using default config");
1670
- return DEFAULT_CONFIG;
1671
- }
1672
- }
1673
- function getStrategy(config, taskType) {
1674
- return config.strategies?.[taskType] ?? null;
1675
- }
1676
- function getAnthropicAuth(config, model) {
1677
- const auth = config.auth;
1678
- const useMaxForModels = auth?.useMaxForModels ?? ["opus"];
1679
- const shouldUseMax = useMaxForModels.some((m) => model.toLowerCase().includes(m.toLowerCase()));
1680
- if (shouldUseMax && auth?.anthropicMaxToken) {
1681
- return { type: "max", value: auth.anthropicMaxToken };
1682
- }
1683
- const apiKey = auth?.anthropicApiKey ?? process.env["ANTHROPIC_API_KEY"];
1684
- if (apiKey) {
1685
- return { type: "apiKey", value: apiKey };
1686
- }
1687
- return null;
1688
- }
1689
- function watchConfig(onChange) {
1690
- const configPath = getConfigPath();
1691
- const dir = path2.dirname(configPath);
1692
- if (!fs2.existsSync(dir)) {
1693
- fs2.mkdirSync(dir, { recursive: true });
1694
- }
1695
- let debounceTimer = null;
1696
- fs2.watch(dir, (eventType, filename) => {
1697
- if (filename === "config.json") {
1698
- if (debounceTimer) clearTimeout(debounceTimer);
1699
- debounceTimer = setTimeout(() => {
1700
- console.log("[relayplane] Config file changed, reloading...");
1701
- const newConfig = loadConfig();
1702
- onChange(newConfig);
1703
- }, 100);
1704
- }
1705
- });
1706
- }
1707
-
1708
- // src/proxy.ts
1709
- var VERSION = "0.1.9";
1710
- var recentRuns = [];
1711
- var MAX_RECENT_RUNS = 100;
1712
- var modelCounts = {};
1713
- var serverStartTime = 0;
1714
- var currentConfig = loadConfig();
1715
- var DEFAULT_ENDPOINTS = {
1716
- anthropic: {
1717
- baseUrl: "https://api.anthropic.com/v1",
1718
- apiKeyEnv: "ANTHROPIC_API_KEY"
1719
- },
1720
- openai: {
1721
- baseUrl: "https://api.openai.com/v1",
1722
- apiKeyEnv: "OPENAI_API_KEY"
1723
- },
1724
- google: {
1725
- baseUrl: "https://generativelanguage.googleapis.com/v1beta",
1726
- apiKeyEnv: "GEMINI_API_KEY"
1727
- },
1728
- xai: {
1729
- baseUrl: "https://api.x.ai/v1",
1730
- apiKeyEnv: "XAI_API_KEY"
1731
- },
1732
- moonshot: {
1733
- baseUrl: "https://api.moonshot.cn/v1",
1734
- apiKeyEnv: "MOONSHOT_API_KEY"
1735
- }
1736
- };
1737
- var MODEL_MAPPING = {
1738
- // Anthropic models (using correct API model IDs)
1739
- "claude-opus-4-5": { provider: "anthropic", model: "claude-opus-4-5-20250514" },
1740
- "claude-sonnet-4": { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1741
- "claude-3-5-sonnet": { provider: "anthropic", model: "claude-3-5-sonnet-20241022" },
1742
- "claude-3-5-haiku": { provider: "anthropic", model: "claude-3-5-haiku-20241022" },
1743
- haiku: { provider: "anthropic", model: "claude-3-5-haiku-20241022" },
1744
- sonnet: { provider: "anthropic", model: "claude-3-5-sonnet-20241022" },
1745
- opus: { provider: "anthropic", model: "claude-3-opus-20240229" },
1746
- // OpenAI models
1747
- "gpt-4o": { provider: "openai", model: "gpt-4o" },
1748
- "gpt-4o-mini": { provider: "openai", model: "gpt-4o-mini" },
1749
- "gpt-4.1": { provider: "openai", model: "gpt-4.1" }
1750
- };
1751
- var DEFAULT_ROUTING = {
1752
- // Complex tasks → Sonnet (need reasoning & quality)
1753
- code_review: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1754
- analysis: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1755
- creative_writing: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1756
- // Medium tasks → Sonnet (benefit from better model)
1757
- code_generation: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1758
- // Simple tasks → Haiku (cost efficient)
1759
- summarization: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1760
- data_extraction: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1761
- translation: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1762
- question_answering: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1763
- general: { provider: "anthropic", model: "claude-3-5-haiku-latest" }
1764
- };
1765
- function extractPromptText(messages) {
1766
- return messages.map((msg) => {
1767
- if (typeof msg.content === "string") return msg.content;
1768
- if (Array.isArray(msg.content)) {
1769
- return msg.content.map((c) => {
1770
- const part = c;
1771
- return part.type === "text" ? part.text ?? "" : "";
1772
- }).join(" ");
1773
- }
1774
- return "";
1775
- }).join("\n");
1776
- }
1777
- async function forwardToAnthropic(request, targetModel, auth, betaHeaders) {
1778
- const anthropicBody = buildAnthropicBody(request, targetModel, false);
1779
- const headers = {
1780
- "Content-Type": "application/json",
1781
- "anthropic-version": "2023-06-01"
1782
- };
1783
- if (auth.type === "max") {
1784
- headers["Authorization"] = `Bearer ${auth.value}`;
1785
- } else {
1786
- headers["x-api-key"] = auth.value;
1787
- }
1788
- if (betaHeaders) {
1789
- headers["anthropic-beta"] = betaHeaders;
1790
- }
1791
- const response = await fetch("https://api.anthropic.com/v1/messages", {
1792
- method: "POST",
1793
- headers,
1794
- body: JSON.stringify(anthropicBody)
1795
- });
1796
- return response;
1797
- }
1798
- async function forwardToAnthropicStream(request, targetModel, auth, betaHeaders) {
1799
- const anthropicBody = buildAnthropicBody(request, targetModel, true);
1800
- const headers = {
1801
- "Content-Type": "application/json",
1802
- "anthropic-version": "2023-06-01"
1803
- };
1804
- if (auth.type === "max") {
1805
- headers["Authorization"] = `Bearer ${auth.value}`;
1806
- } else {
1807
- headers["x-api-key"] = auth.value;
1808
- }
1809
- if (betaHeaders) {
1810
- headers["anthropic-beta"] = betaHeaders;
1811
- }
1812
- const response = await fetch("https://api.anthropic.com/v1/messages", {
1813
- method: "POST",
1814
- headers,
1815
- body: JSON.stringify(anthropicBody)
1816
- });
1817
- return response;
1818
- }
1819
- function convertMessagesToAnthropic(messages) {
1820
- const result = [];
1821
- for (const msg of messages) {
1822
- const m = msg;
1823
- if (m.role === "system") continue;
1824
- if (m.role === "tool") {
1825
- result.push({
1826
- role: "user",
1827
- content: [
1828
- {
1829
- type: "tool_result",
1830
- tool_use_id: m.tool_call_id,
1831
- content: typeof m.content === "string" ? m.content : JSON.stringify(m.content)
1832
- }
1833
- ]
1834
- });
1835
- continue;
1836
- }
1837
- if (m.role === "assistant" && m.tool_calls && m.tool_calls.length > 0) {
1838
- const content = [];
1839
- if (m.content && typeof m.content === "string") {
1840
- content.push({ type: "text", text: m.content });
1841
- }
1842
- for (const tc of m.tool_calls) {
1843
- content.push({
1844
- type: "tool_use",
1845
- id: tc.id,
1846
- name: tc.function.name,
1847
- input: JSON.parse(tc.function.arguments || "{}")
1848
- });
1849
- }
1850
- result.push({ role: "assistant", content });
1851
- continue;
1852
- }
1853
- result.push({
1854
- role: m.role === "assistant" ? "assistant" : "user",
1855
- content: m.content
1856
- });
1857
- }
1858
- return result;
1859
- }
1860
- function buildAnthropicBody(request, targetModel, stream) {
1861
- const anthropicMessages = convertMessagesToAnthropic(request.messages);
1862
- const systemMessage = request.messages.find((m) => m.role === "system");
1863
- const anthropicBody = {
1864
- model: targetModel,
1865
- messages: anthropicMessages,
1866
- max_tokens: request.max_tokens ?? 4096,
1867
- stream
1868
- };
1869
- if (systemMessage) {
1870
- anthropicBody["system"] = systemMessage.content;
1871
- }
1872
- if (request.temperature !== void 0) {
1873
- anthropicBody["temperature"] = request.temperature;
1874
- }
1875
- if (request.tools && Array.isArray(request.tools)) {
1876
- anthropicBody["tools"] = convertToolsToAnthropic(request.tools);
1877
- }
1878
- if (request.tool_choice) {
1879
- anthropicBody["tool_choice"] = convertToolChoiceToAnthropic(request.tool_choice);
1880
- }
1881
- return anthropicBody;
1882
- }
1883
- function convertToolsToAnthropic(tools) {
1884
- return tools.map((tool) => {
1885
- const t = tool;
1886
- if (t.type === "function" && t.function) {
1887
- return {
1888
- name: t.function.name,
1889
- description: t.function.description,
1890
- input_schema: t.function.parameters || { type: "object", properties: {} }
1891
- };
1892
- }
1893
- return tool;
1894
- });
1895
- }
1896
- function convertToolChoiceToAnthropic(toolChoice) {
1897
- if (toolChoice === "auto") return { type: "auto" };
1898
- if (toolChoice === "none") return { type: "none" };
1899
- if (toolChoice === "required") return { type: "any" };
1900
- const tc = toolChoice;
1901
- if (tc.type === "function" && tc.function?.name) {
1902
- return { type: "tool", name: tc.function.name };
1903
- }
1904
- return toolChoice;
1905
- }
1906
- async function forwardToOpenAI(request, targetModel, apiKey) {
1907
- const openaiBody = {
1908
- ...request,
1909
- model: targetModel,
1910
- stream: false
1911
- };
1912
- const response = await fetch("https://api.openai.com/v1/chat/completions", {
1913
- method: "POST",
1914
- headers: {
1915
- "Content-Type": "application/json",
1916
- Authorization: `Bearer ${apiKey}`
1917
- },
1918
- body: JSON.stringify(openaiBody)
1919
- });
1920
- return response;
1921
- }
1922
- async function forwardToOpenAIStream(request, targetModel, apiKey) {
1923
- const openaiBody = {
1924
- ...request,
1925
- model: targetModel,
1926
- stream: true
1927
- };
1928
- const response = await fetch("https://api.openai.com/v1/chat/completions", {
1929
- method: "POST",
1930
- headers: {
1931
- "Content-Type": "application/json",
1932
- Authorization: `Bearer ${apiKey}`
1933
- },
1934
- body: JSON.stringify(openaiBody)
1935
- });
1936
- return response;
1937
- }
1938
- async function forwardToXAI(request, targetModel, apiKey) {
1939
- const xaiBody = {
1940
- ...request,
1941
- model: targetModel,
1942
- stream: false
1943
- };
1944
- const response = await fetch("https://api.x.ai/v1/chat/completions", {
1945
- method: "POST",
1946
- headers: {
1947
- "Content-Type": "application/json",
1948
- Authorization: `Bearer ${apiKey}`
1949
- },
1950
- body: JSON.stringify(xaiBody)
1951
- });
1952
- return response;
1953
- }
1954
- async function forwardToXAIStream(request, targetModel, apiKey) {
1955
- const xaiBody = {
1956
- ...request,
1957
- model: targetModel,
1958
- stream: true
1959
- };
1960
- const response = await fetch("https://api.x.ai/v1/chat/completions", {
1961
- method: "POST",
1962
- headers: {
1963
- "Content-Type": "application/json",
1964
- Authorization: `Bearer ${apiKey}`
1965
- },
1966
- body: JSON.stringify(xaiBody)
1967
- });
1968
- return response;
1969
- }
1970
- async function forwardToMoonshot(request, targetModel, apiKey) {
1971
- const moonshotBody = {
1972
- ...request,
1973
- model: targetModel,
1974
- stream: false
1975
- };
1976
- const response = await fetch("https://api.moonshot.cn/v1/chat/completions", {
1977
- method: "POST",
1978
- headers: {
1979
- "Content-Type": "application/json",
1980
- Authorization: `Bearer ${apiKey}`
1981
- },
1982
- body: JSON.stringify(moonshotBody)
1983
- });
1984
- return response;
1985
- }
1986
- async function forwardToMoonshotStream(request, targetModel, apiKey) {
1987
- const moonshotBody = {
1988
- ...request,
1989
- model: targetModel,
1990
- stream: true
1991
- };
1992
- const response = await fetch("https://api.moonshot.cn/v1/chat/completions", {
1993
- method: "POST",
1994
- headers: {
1995
- "Content-Type": "application/json",
1996
- Authorization: `Bearer ${apiKey}`
1997
- },
1998
- body: JSON.stringify(moonshotBody)
1999
- });
2000
- return response;
2001
- }
2002
- function convertMessagesToGemini(messages) {
2003
- const geminiContents = [];
2004
- for (const msg of messages) {
2005
- if (msg.role === "system") continue;
2006
- const role = msg.role === "assistant" ? "model" : "user";
2007
- if (typeof msg.content === "string") {
2008
- geminiContents.push({
2009
- role,
2010
- parts: [{ text: msg.content }]
2011
- });
2012
- } else if (Array.isArray(msg.content)) {
2013
- const parts = msg.content.map((part) => {
2014
- const p = part;
2015
- if (p.type === "text") {
2016
- return { text: p.text };
2017
- }
2018
- if (p.type === "image_url" && p.image_url?.url) {
2019
- const url2 = p.image_url.url;
2020
- if (url2.startsWith("data:")) {
2021
- const match = url2.match(/^data:([^;]+);base64,(.+)$/);
2022
- if (match) {
2023
- return {
2024
- inline_data: {
2025
- mime_type: match[1],
2026
- data: match[2]
2027
- }
2028
- };
2029
- }
2030
- }
2031
- return { text: `[Image: ${url2}]` };
2032
- }
2033
- return { text: "" };
2034
- });
2035
- geminiContents.push({ role, parts });
2036
- }
2037
- }
2038
- return geminiContents;
2039
- }
2040
- async function forwardToGemini(request, targetModel, apiKey) {
2041
- const systemMessage = request.messages.find((m) => m.role === "system");
2042
- const geminiContents = convertMessagesToGemini(request.messages);
2043
- const geminiBody = {
2044
- contents: geminiContents,
2045
- generationConfig: {
2046
- maxOutputTokens: request.max_tokens ?? 4096
2047
- }
2048
- };
2049
- if (request.temperature !== void 0) {
2050
- geminiBody["generationConfig"]["temperature"] = request.temperature;
2051
- }
2052
- if (systemMessage && typeof systemMessage.content === "string") {
2053
- geminiBody["systemInstruction"] = {
2054
- parts: [{ text: systemMessage.content }]
2055
- };
2056
- }
2057
- const response = await fetch(
2058
- `https://generativelanguage.googleapis.com/v1beta/models/${targetModel}:generateContent?key=${apiKey}`,
2059
- {
2060
- method: "POST",
2061
- headers: {
2062
- "Content-Type": "application/json"
2063
- },
2064
- body: JSON.stringify(geminiBody)
2065
- }
2066
- );
2067
- return response;
2068
- }
2069
- async function forwardToGeminiStream(request, targetModel, apiKey) {
2070
- const systemMessage = request.messages.find((m) => m.role === "system");
2071
- const geminiContents = convertMessagesToGemini(request.messages);
2072
- const geminiBody = {
2073
- contents: geminiContents,
2074
- generationConfig: {
2075
- maxOutputTokens: request.max_tokens ?? 4096
2076
- }
2077
- };
2078
- if (request.temperature !== void 0) {
2079
- geminiBody["generationConfig"]["temperature"] = request.temperature;
2080
- }
2081
- if (systemMessage && typeof systemMessage.content === "string") {
2082
- geminiBody["systemInstruction"] = {
2083
- parts: [{ text: systemMessage.content }]
2084
- };
2085
- }
2086
- const response = await fetch(
2087
- `https://generativelanguage.googleapis.com/v1beta/models/${targetModel}:streamGenerateContent?alt=sse&key=${apiKey}`,
2088
- {
2089
- method: "POST",
2090
- headers: {
2091
- "Content-Type": "application/json"
2092
- },
2093
- body: JSON.stringify(geminiBody)
2094
- }
2095
- );
2096
- return response;
2097
- }
2098
- function convertGeminiResponse(geminiData, model) {
2099
- const candidate = geminiData.candidates?.[0];
2100
- const text = candidate?.content?.parts?.map((p) => p.text ?? "").join("") ?? "";
2101
- let finishReason = "stop";
2102
- if (candidate?.finishReason === "MAX_TOKENS") {
2103
- finishReason = "length";
2104
- } else if (candidate?.finishReason === "SAFETY") {
2105
- finishReason = "content_filter";
2106
- }
2107
- return {
2108
- id: `chatcmpl-${Date.now()}`,
2109
- object: "chat.completion",
2110
- created: Math.floor(Date.now() / 1e3),
2111
- model,
2112
- choices: [
2113
- {
2114
- index: 0,
2115
- message: {
2116
- role: "assistant",
2117
- content: text
2118
- },
2119
- finish_reason: finishReason
2120
- }
2121
- ],
2122
- usage: {
2123
- prompt_tokens: geminiData.usageMetadata?.promptTokenCount ?? 0,
2124
- completion_tokens: geminiData.usageMetadata?.candidatesTokenCount ?? 0,
2125
- total_tokens: (geminiData.usageMetadata?.promptTokenCount ?? 0) + (geminiData.usageMetadata?.candidatesTokenCount ?? 0)
2126
- }
2127
- };
2128
- }
2129
- function convertGeminiStreamEvent(eventData, messageId, model, isFirst) {
2130
- const candidate = eventData.candidates?.[0];
2131
- const text = candidate?.content?.parts?.map((p) => p.text ?? "").join("") ?? "";
2132
- const choice = {
2133
- index: 0,
2134
- delta: {},
2135
- finish_reason: null
2136
- };
2137
- if (isFirst) {
2138
- choice["delta"] = { role: "assistant", content: text };
2139
- } else if (text) {
2140
- choice["delta"] = { content: text };
2141
- }
2142
- if (candidate?.finishReason) {
2143
- let finishReason = "stop";
2144
- if (candidate.finishReason === "MAX_TOKENS") {
2145
- finishReason = "length";
2146
- } else if (candidate.finishReason === "SAFETY") {
2147
- finishReason = "content_filter";
2148
- }
2149
- choice["finish_reason"] = finishReason;
2150
- }
2151
- const chunk = {
2152
- id: messageId,
2153
- object: "chat.completion.chunk",
2154
- created: Math.floor(Date.now() / 1e3),
2155
- model,
2156
- choices: [choice]
2157
- };
2158
- return `data: ${JSON.stringify(chunk)}
2159
-
2160
- `;
2161
- }
2162
- async function* convertGeminiStream(response, model) {
2163
- const reader = response.body?.getReader();
2164
- if (!reader) {
2165
- throw new Error("No response body");
2166
- }
2167
- const decoder = new TextDecoder();
2168
- let buffer = "";
2169
- const messageId = `chatcmpl-${Date.now()}`;
2170
- let isFirst = true;
2171
- try {
2172
- while (true) {
2173
- const { done, value } = await reader.read();
2174
- if (done) break;
2175
- buffer += decoder.decode(value, { stream: true });
2176
- const lines = buffer.split("\n");
2177
- buffer = lines.pop() || "";
2178
- for (const line of lines) {
2179
- if (line.startsWith("data: ")) {
2180
- const jsonStr = line.slice(6);
2181
- if (jsonStr.trim() === "[DONE]") {
2182
- yield "data: [DONE]\n\n";
2183
- continue;
2184
- }
2185
- try {
2186
- const parsed = JSON.parse(jsonStr);
2187
- const converted = convertGeminiStreamEvent(parsed, messageId, model, isFirst);
2188
- if (converted) {
2189
- yield converted;
2190
- isFirst = false;
2191
- }
2192
- } catch {
2193
- }
2194
- }
2195
- }
2196
- }
2197
- yield "data: [DONE]\n\n";
2198
- } finally {
2199
- reader.releaseLock();
2200
- }
2201
- }
2202
- function convertAnthropicResponse(anthropicData) {
2203
- const textBlocks = anthropicData.content?.filter((c) => c.type === "text") ?? [];
2204
- const toolBlocks = anthropicData.content?.filter((c) => c.type === "tool_use") ?? [];
2205
- const textContent = textBlocks.map((c) => c.text ?? "").join("");
2206
- const message = {
2207
- role: "assistant",
2208
- content: textContent || null
2209
- };
2210
- if (toolBlocks.length > 0) {
2211
- message["tool_calls"] = toolBlocks.map((block) => ({
2212
- id: block.id || `call_${Date.now()}`,
2213
- type: "function",
2214
- function: {
2215
- name: block.name,
2216
- arguments: typeof block.input === "string" ? block.input : JSON.stringify(block.input ?? {})
2217
- }
2218
- }));
2219
- }
2220
- let finishReason = "stop";
2221
- if (anthropicData.stop_reason === "tool_use") {
2222
- finishReason = "tool_calls";
2223
- } else if (anthropicData.stop_reason === "end_turn") {
2224
- finishReason = "stop";
2225
- } else if (anthropicData.stop_reason) {
2226
- finishReason = anthropicData.stop_reason;
2227
- }
2228
- return {
2229
- id: anthropicData.id || `chatcmpl-${Date.now()}`,
2230
- object: "chat.completion",
2231
- created: Math.floor(Date.now() / 1e3),
2232
- model: anthropicData.model,
2233
- choices: [
2234
- {
2235
- index: 0,
2236
- message,
2237
- finish_reason: finishReason
2238
- }
2239
- ],
2240
- usage: {
2241
- prompt_tokens: anthropicData.usage?.input_tokens ?? 0,
2242
- completion_tokens: anthropicData.usage?.output_tokens ?? 0,
2243
- total_tokens: (anthropicData.usage?.input_tokens ?? 0) + (anthropicData.usage?.output_tokens ?? 0)
2244
- }
2245
- };
2246
- }
2247
- function convertAnthropicStreamEvent(eventType, eventData, messageId, model, toolState) {
2248
- const choice = { index: 0, delta: {}, finish_reason: null };
2249
- const baseChunk = {
2250
- id: messageId,
2251
- object: "chat.completion.chunk",
2252
- created: Math.floor(Date.now() / 1e3),
2253
- model,
2254
- choices: [choice]
2255
- };
2256
- switch (eventType) {
2257
- case "message_start": {
2258
- const msg = eventData["message"];
2259
- baseChunk.id = msg?.["id"] || messageId;
2260
- choice.delta = { role: "assistant", content: "" };
2261
- return `data: ${JSON.stringify(baseChunk)}
2262
-
2263
- `;
2264
- }
2265
- case "content_block_start": {
2266
- const contentBlock = eventData["content_block"];
2267
- const blockIndex = eventData["index"];
2268
- if (contentBlock?.["type"] === "tool_use") {
2269
- const toolId = contentBlock["id"];
2270
- const toolName = contentBlock["name"];
2271
- toolState.tools.set(blockIndex ?? toolState.currentToolIndex, {
2272
- id: toolId,
2273
- name: toolName,
2274
- arguments: ""
2275
- });
2276
- toolState.currentToolIndex = blockIndex ?? toolState.currentToolIndex;
2277
- choice.delta = {
2278
- tool_calls: [{
2279
- index: blockIndex ?? 0,
2280
- id: toolId,
2281
- type: "function",
2282
- function: { name: toolName, arguments: "" }
2283
- }]
2284
- };
2285
- return `data: ${JSON.stringify(baseChunk)}
2286
-
2287
- `;
2288
- }
2289
- return null;
2290
- }
2291
- case "content_block_delta": {
2292
- const delta = eventData["delta"];
2293
- const blockIndex = eventData["index"];
2294
- if (delta?.["type"] === "text_delta") {
2295
- choice.delta = { content: delta["text"] };
2296
- return `data: ${JSON.stringify(baseChunk)}
2297
-
2298
- `;
2299
- }
2300
- if (delta?.["type"] === "input_json_delta") {
2301
- const partialJson = delta["partial_json"] || "";
2302
- const tool = toolState.tools.get(blockIndex ?? toolState.currentToolIndex);
2303
- if (tool) {
2304
- tool.arguments += partialJson;
2305
- }
2306
- choice.delta = {
2307
- tool_calls: [{
2308
- index: blockIndex ?? 0,
2309
- function: { arguments: partialJson }
2310
- }]
2311
- };
2312
- return `data: ${JSON.stringify(baseChunk)}
2313
-
2314
- `;
2315
- }
2316
- return null;
2317
- }
2318
- case "message_delta": {
2319
- const delta = eventData["delta"];
2320
- const stopReason = delta?.["stop_reason"];
2321
- if (stopReason === "tool_use") {
2322
- choice.finish_reason = "tool_calls";
2323
- } else if (stopReason === "end_turn") {
2324
- choice.finish_reason = "stop";
2325
- } else {
2326
- choice.finish_reason = stopReason || "stop";
2327
- }
2328
- choice.delta = {};
2329
- return `data: ${JSON.stringify(baseChunk)}
2330
-
2331
- `;
2332
- }
2333
- case "message_stop": {
2334
- return "data: [DONE]\n\n";
2335
- }
2336
- default:
2337
- return null;
2338
- }
2339
- }
2340
- async function* convertAnthropicStream(response, model) {
2341
- const reader = response.body?.getReader();
2342
- if (!reader) {
2343
- throw new Error("No response body");
2344
- }
2345
- const decoder = new TextDecoder();
2346
- let buffer = "";
2347
- let messageId = `chatcmpl-${Date.now()}`;
2348
- const toolState = {
2349
- currentToolIndex: 0,
2350
- tools: /* @__PURE__ */ new Map()
2351
- };
2352
- try {
2353
- while (true) {
2354
- const { done, value } = await reader.read();
2355
- if (done) break;
2356
- buffer += decoder.decode(value, { stream: true });
2357
- const lines = buffer.split("\n");
2358
- buffer = lines.pop() || "";
2359
- let eventType = "";
2360
- let eventData = "";
2361
- for (const line of lines) {
2362
- if (line.startsWith("event: ")) {
2363
- eventType = line.slice(7).trim();
2364
- } else if (line.startsWith("data: ")) {
2365
- eventData = line.slice(6);
2366
- } else if (line === "" && eventType && eventData) {
2367
- try {
2368
- const parsed = JSON.parse(eventData);
2369
- const converted = convertAnthropicStreamEvent(eventType, parsed, messageId, model, toolState);
2370
- if (converted) {
2371
- yield converted;
2372
- }
2373
- } catch {
2374
- }
2375
- eventType = "";
2376
- eventData = "";
2377
- }
2378
- }
2379
- }
2380
- } finally {
2381
- reader.releaseLock();
2382
- }
2383
- }
2384
- async function* pipeOpenAIStream(response) {
2385
- const reader = response.body?.getReader();
2386
- if (!reader) {
2387
- throw new Error("No response body");
2388
- }
2389
- const decoder = new TextDecoder();
2390
- try {
2391
- while (true) {
2392
- const { done, value } = await reader.read();
2393
- if (done) break;
2394
- yield decoder.decode(value, { stream: true });
2395
- }
2396
- } finally {
2397
- reader.releaseLock();
2398
- }
2399
- }
2400
- function parsePreferredModel(preferredModel) {
2401
- const [provider, model] = preferredModel.split(":");
2402
- if (!provider || !model) return null;
2403
- const validProviders = ["openai", "anthropic", "google", "xai", "moonshot", "local"];
2404
- if (!validProviders.includes(provider)) return null;
2405
- return { provider, model };
2406
- }
2407
- function resolveExplicitModel(modelName) {
2408
- if (MODEL_MAPPING[modelName]) {
2409
- return MODEL_MAPPING[modelName];
2410
- }
2411
- if (modelName.startsWith("claude-")) {
2412
- return { provider: "anthropic", model: modelName };
2413
- }
2414
- if (modelName.startsWith("gpt-") || modelName.startsWith("o1-") || modelName.startsWith("o3-") || modelName.startsWith("chatgpt-") || modelName.startsWith("text-") || modelName.startsWith("dall-e") || modelName.startsWith("whisper") || modelName.startsWith("tts-")) {
2415
- return { provider: "openai", model: modelName };
2416
- }
2417
- if (modelName.startsWith("gemini-") || modelName.startsWith("palm-")) {
2418
- return { provider: "google", model: modelName };
2419
- }
2420
- if (modelName.startsWith("grok-")) {
2421
- return { provider: "xai", model: modelName };
2422
- }
2423
- if (modelName.startsWith("moonshot-")) {
2424
- return { provider: "moonshot", model: modelName };
2425
- }
2426
- if (modelName.includes("/")) {
2427
- const [provider, model] = modelName.split("/");
2428
- const validProviders = ["openai", "anthropic", "google", "xai", "moonshot", "local"];
2429
- if (provider && model && validProviders.includes(provider)) {
2430
- return { provider, model };
2431
- }
2432
- }
2433
- return null;
2434
- }
2435
- async function startProxy(config = {}) {
2436
- const port = config.port ?? 3001;
2437
- const host = config.host ?? "127.0.0.1";
2438
- const verbose = config.verbose ?? false;
2439
- const relay = new RelayPlane({ dbPath: config.dbPath });
2440
- const log = (msg) => {
2441
- if (verbose) console.log(`[relayplane] ${msg}`);
2442
- };
2443
- const server = http.createServer(async (req, res) => {
2444
- res.setHeader("Access-Control-Allow-Origin", "*");
2445
- res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
2446
- res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
2447
- if (req.method === "OPTIONS") {
2448
- res.writeHead(204);
2449
- res.end();
2450
- return;
2451
- }
2452
- const parsedUrl = url.parse(req.url || "", true);
2453
- const pathname = parsedUrl.pathname || "";
2454
- if (req.method === "GET" && pathname === "/health") {
2455
- const uptimeMs = Date.now() - serverStartTime;
2456
- const uptimeSecs = Math.floor(uptimeMs / 1e3);
2457
- const hours = Math.floor(uptimeSecs / 3600);
2458
- const mins = Math.floor(uptimeSecs % 3600 / 60);
2459
- const secs = uptimeSecs % 60;
2460
- const providers = {};
2461
- for (const [name, config2] of Object.entries(DEFAULT_ENDPOINTS)) {
2462
- providers[name] = !!process.env[config2.apiKeyEnv];
2463
- }
2464
- res.writeHead(200, { "Content-Type": "application/json" });
2465
- res.end(JSON.stringify({
2466
- status: "ok",
2467
- version: VERSION,
2468
- uptime: `${hours}h ${mins}m ${secs}s`,
2469
- uptimeMs,
2470
- providers,
2471
- totalRuns: recentRuns.length > 0 ? Object.values(modelCounts).reduce((a, b) => a + b, 0) : 0
2472
- }));
2473
- return;
2474
- }
2475
- if (req.method === "GET" && pathname === "/stats") {
2476
- const stats = relay.stats();
2477
- const savings = relay.savingsReport(30);
2478
- const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
2479
- const modelDistribution = {};
2480
- for (const [model, count] of Object.entries(modelCounts)) {
2481
- modelDistribution[model] = {
2482
- count,
2483
- percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
2484
- };
2485
- }
2486
- res.writeHead(200, { "Content-Type": "application/json" });
2487
- res.end(JSON.stringify({
2488
- totalRuns,
2489
- savings: {
2490
- estimatedSavingsPercent: savings.savingsPercent.toFixed(1) + "%",
2491
- actualCostUsd: savings.actualCost.toFixed(4),
2492
- baselineCostUsd: savings.baselineCost.toFixed(4),
2493
- savedUsd: savings.savings.toFixed(4)
2494
- },
2495
- modelDistribution,
2496
- byTaskType: stats.byTaskType,
2497
- period: stats.period
2498
- }));
2499
- return;
2500
- }
2501
- if (req.method === "GET" && pathname === "/runs") {
2502
- const limitParam = parsedUrl.query["limit"];
2503
- const parsedLimit = limitParam ? parseInt(String(limitParam), 10) : 20;
2504
- const limit = Math.min(Number.isNaN(parsedLimit) ? 20 : parsedLimit, MAX_RECENT_RUNS);
2505
- res.writeHead(200, { "Content-Type": "application/json" });
2506
- res.end(JSON.stringify({
2507
- runs: recentRuns.slice(0, limit),
2508
- total: recentRuns.length
2509
- }));
2510
- return;
2511
- }
2512
- if (req.method === "GET" && pathname.includes("/models")) {
2513
- res.writeHead(200, { "Content-Type": "application/json" });
2514
- res.end(
2515
- JSON.stringify({
2516
- object: "list",
2517
- data: [
2518
- { id: "relayplane:auto", object: "model", owned_by: "relayplane" },
2519
- { id: "relayplane:cost", object: "model", owned_by: "relayplane" },
2520
- { id: "relayplane:quality", object: "model", owned_by: "relayplane" }
2521
- ]
2522
- })
2523
- );
2524
- return;
2525
- }
2526
- if (req.method !== "POST" || !pathname.includes("/chat/completions")) {
2527
- res.writeHead(404, { "Content-Type": "application/json" });
2528
- res.end(JSON.stringify({ error: "Not found" }));
2529
- return;
2530
- }
2531
- let body = "";
2532
- for await (const chunk of req) {
2533
- body += chunk;
2534
- }
2535
- let request;
2536
- try {
2537
- request = JSON.parse(body);
2538
- } catch {
2539
- res.writeHead(400, { "Content-Type": "application/json" });
2540
- res.end(JSON.stringify({ error: "Invalid JSON" }));
2541
- return;
2542
- }
2543
- const isStreaming = request.stream === true;
2544
- const requestedModel = request.model;
2545
- let routingMode = "auto";
2546
- let targetModel = "";
2547
- let targetProvider = "anthropic";
2548
- if (requestedModel.startsWith("relayplane:")) {
2549
- if (requestedModel.includes(":cost")) {
2550
- routingMode = "cost";
2551
- } else if (requestedModel.includes(":quality")) {
2552
- routingMode = "quality";
2553
- }
2554
- } else {
2555
- routingMode = "passthrough";
2556
- const resolved = resolveExplicitModel(requestedModel);
2557
- if (resolved) {
2558
- targetProvider = resolved.provider;
2559
- targetModel = resolved.model;
2560
- log(`Pass-through mode: ${requestedModel} \u2192 ${targetProvider}/${targetModel}`);
2561
- } else {
2562
- res.writeHead(400, { "Content-Type": "application/json" });
2563
- res.end(JSON.stringify({ error: `Unknown model: ${requestedModel}` }));
2564
- return;
2565
- }
2566
- }
2567
- log(`Received request for model: ${requestedModel} (mode: ${routingMode}, stream: ${isStreaming})`);
2568
- const promptText = extractPromptText(request.messages);
2569
- const taskType = inferTaskType(promptText);
2570
- const confidence = getInferenceConfidence(promptText, taskType);
2571
- log(`Inferred task: ${taskType} (confidence: ${confidence.toFixed(2)})`);
2572
- if (routingMode !== "passthrough") {
2573
- const configStrategy = getStrategy(currentConfig, taskType);
2574
- if (configStrategy) {
2575
- const parsed = parsePreferredModel(configStrategy.model);
2576
- if (parsed) {
2577
- targetProvider = parsed.provider;
2578
- targetModel = parsed.model;
2579
- log(`Using config strategy: ${configStrategy.model}`);
2580
- }
2581
- }
2582
- if (!configStrategy) {
2583
- const rule = relay.routing.get(taskType);
2584
- if (rule && rule.preferredModel) {
2585
- const parsed = parsePreferredModel(rule.preferredModel);
2586
- if (parsed) {
2587
- targetProvider = parsed.provider;
2588
- targetModel = parsed.model;
2589
- log(`Using learned rule: ${rule.preferredModel}`);
2590
- } else {
2591
- const defaultRoute = DEFAULT_ROUTING[taskType];
2592
- targetProvider = defaultRoute.provider;
2593
- targetModel = defaultRoute.model;
2594
- }
2595
- } else {
2596
- const defaultRoute = DEFAULT_ROUTING[taskType];
2597
- targetProvider = defaultRoute.provider;
2598
- targetModel = defaultRoute.model;
2599
- }
2600
- }
2601
- if (routingMode === "cost") {
2602
- const costModel = currentConfig.defaults?.costModel || "claude-3-5-haiku-latest";
2603
- targetModel = costModel;
2604
- targetProvider = "anthropic";
2605
- log(`Cost mode: using ${costModel}`);
2606
- } else if (routingMode === "quality") {
2607
- const qualityModel = currentConfig.defaults?.qualityModel || process.env["RELAYPLANE_QUALITY_MODEL"] || "claude-sonnet-4-20250514";
2608
- targetModel = qualityModel;
2609
- targetProvider = "anthropic";
2610
- log(`Quality mode: using ${qualityModel}`);
2611
- }
2612
- }
2613
- log(`Routing to: ${targetProvider}/${targetModel}`);
2614
- let apiKey;
2615
- let anthropicAuth = null;
2616
- if (targetProvider === "anthropic") {
2617
- anthropicAuth = getAnthropicAuth(currentConfig, targetModel);
2618
- if (!anthropicAuth) {
2619
- res.writeHead(500, { "Content-Type": "application/json" });
2620
- res.end(JSON.stringify({ error: "No Anthropic auth configured (set ANTHROPIC_API_KEY or config.auth.anthropicMaxToken)" }));
2621
- return;
2622
- }
2623
- log(`Using ${anthropicAuth.type === "max" ? "MAX token" : "API key"} auth for ${targetModel}`);
2624
- } else {
2625
- const apiKeyEnv = DEFAULT_ENDPOINTS[targetProvider]?.apiKeyEnv ?? `${targetProvider.toUpperCase()}_API_KEY`;
2626
- apiKey = process.env[apiKeyEnv];
2627
- if (!apiKey) {
2628
- res.writeHead(500, { "Content-Type": "application/json" });
2629
- res.end(JSON.stringify({ error: `Missing ${apiKeyEnv} environment variable` }));
2630
- return;
2631
- }
2632
- }
2633
- const startTime = Date.now();
2634
- const betaHeaders = req.headers["anthropic-beta"];
2635
- if (isStreaming) {
2636
- await handleStreamingRequest(
2637
- res,
2638
- request,
2639
- targetProvider,
2640
- targetModel,
2641
- apiKey,
2642
- anthropicAuth,
2643
- relay,
2644
- promptText,
2645
- taskType,
2646
- confidence,
2647
- routingMode,
2648
- startTime,
2649
- log,
2650
- betaHeaders
2651
- );
2652
- } else {
2653
- await handleNonStreamingRequest(
2654
- res,
2655
- request,
2656
- targetProvider,
2657
- targetModel,
2658
- apiKey,
2659
- anthropicAuth,
2660
- relay,
2661
- promptText,
2662
- taskType,
2663
- confidence,
2664
- routingMode,
2665
- startTime,
2666
- log,
2667
- betaHeaders
2668
- );
2669
- }
2670
- });
2671
- watchConfig((newConfig) => {
2672
- currentConfig = newConfig;
2673
- console.log("[relayplane] Config reloaded");
2674
- });
2675
- return new Promise((resolve, reject) => {
2676
- server.on("error", reject);
2677
- server.listen(port, host, () => {
2678
- serverStartTime = Date.now();
2679
- console.log(`RelayPlane proxy listening on http://${host}:${port}`);
2680
- console.log(` Models: relayplane:auto, relayplane:cost, relayplane:quality`);
2681
- console.log(` Endpoint: POST /v1/chat/completions`);
2682
- console.log(` Stats: GET /stats, /runs, /health`);
2683
- console.log(` Config: ~/.relayplane/config.json (hot-reload enabled)`);
2684
- console.log(` Streaming: \u2705 Enabled`);
2685
- resolve(server);
2686
- });
2687
- });
2688
- }
2689
- async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2690
- let providerResponse;
2691
- try {
2692
- switch (targetProvider) {
2693
- case "anthropic":
2694
- if (!anthropicAuth) throw new Error("No Anthropic auth");
2695
- providerResponse = await forwardToAnthropicStream(request, targetModel, anthropicAuth, betaHeaders);
2696
- break;
2697
- case "google":
2698
- providerResponse = await forwardToGeminiStream(request, targetModel, apiKey);
2699
- break;
2700
- case "xai":
2701
- providerResponse = await forwardToXAIStream(request, targetModel, apiKey);
2702
- break;
2703
- case "moonshot":
2704
- providerResponse = await forwardToMoonshotStream(request, targetModel, apiKey);
2705
- break;
2706
- default:
2707
- providerResponse = await forwardToOpenAIStream(request, targetModel, apiKey);
2708
- }
2709
- if (!providerResponse.ok) {
2710
- const errorData = await providerResponse.json();
2711
- res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
2712
- res.end(JSON.stringify(errorData));
2713
- return;
2714
- }
2715
- } catch (err) {
2716
- const errorMsg = err instanceof Error ? err.message : String(err);
2717
- res.writeHead(500, { "Content-Type": "application/json" });
2718
- res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
2719
- return;
2720
- }
2721
- res.writeHead(200, {
2722
- "Content-Type": "text/event-stream",
2723
- "Cache-Control": "no-cache",
2724
- "Connection": "keep-alive"
2725
- });
2726
- try {
2727
- switch (targetProvider) {
2728
- case "anthropic":
2729
- for await (const chunk of convertAnthropicStream(providerResponse, targetModel)) {
2730
- res.write(chunk);
2731
- }
2732
- break;
2733
- case "google":
2734
- for await (const chunk of convertGeminiStream(providerResponse, targetModel)) {
2735
- res.write(chunk);
2736
- }
2737
- break;
2738
- default:
2739
- for await (const chunk of pipeOpenAIStream(providerResponse)) {
2740
- res.write(chunk);
2741
- }
2742
- }
2743
- } catch (err) {
2744
- log(`Streaming error: ${err}`);
2745
- }
2746
- const durationMs = Date.now() - startTime;
2747
- const modelKey = `${targetProvider}/${targetModel}`;
2748
- modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2749
- relay.run({
2750
- prompt: promptText.slice(0, 500),
2751
- taskType,
2752
- model: `${targetProvider}:${targetModel}`
2753
- }).then((runResult) => {
2754
- recentRuns.unshift({
2755
- runId: runResult.runId,
2756
- timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2757
- model: modelKey,
2758
- taskType,
2759
- confidence,
2760
- mode: routingMode,
2761
- durationMs,
2762
- promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
2763
- });
2764
- if (recentRuns.length > MAX_RECENT_RUNS) {
2765
- recentRuns.pop();
2766
- }
2767
- log(`Completed streaming in ${durationMs}ms, runId: ${runResult.runId}`);
2768
- }).catch((err) => {
2769
- log(`Failed to record run: ${err}`);
2770
- });
2771
- res.end();
2772
- }
2773
- async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2774
- let providerResponse;
2775
- let responseData;
2776
- try {
2777
- switch (targetProvider) {
2778
- case "anthropic": {
2779
- if (!anthropicAuth) throw new Error("No Anthropic auth");
2780
- providerResponse = await forwardToAnthropic(request, targetModel, anthropicAuth, betaHeaders);
2781
- const rawData = await providerResponse.json();
2782
- if (!providerResponse.ok) {
2783
- res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
2784
- res.end(JSON.stringify(rawData));
2785
- return;
2786
- }
2787
- responseData = convertAnthropicResponse(rawData);
2788
- break;
2789
- }
2790
- case "google": {
2791
- providerResponse = await forwardToGemini(request, targetModel, apiKey);
2792
- const rawData = await providerResponse.json();
2793
- if (!providerResponse.ok) {
2794
- res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
2795
- res.end(JSON.stringify(rawData));
2796
- return;
2797
- }
2798
- responseData = convertGeminiResponse(rawData, targetModel);
2799
- break;
2800
- }
2801
- case "xai": {
2802
- providerResponse = await forwardToXAI(request, targetModel, apiKey);
2803
- responseData = await providerResponse.json();
2804
- if (!providerResponse.ok) {
2805
- res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
2806
- res.end(JSON.stringify(responseData));
2807
- return;
2808
- }
2809
- break;
2810
- }
2811
- case "moonshot": {
2812
- providerResponse = await forwardToMoonshot(request, targetModel, apiKey);
2813
- responseData = await providerResponse.json();
2814
- if (!providerResponse.ok) {
2815
- res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
2816
- res.end(JSON.stringify(responseData));
2817
- return;
2818
- }
2819
- break;
2820
- }
2821
- default: {
2822
- providerResponse = await forwardToOpenAI(request, targetModel, apiKey);
2823
- responseData = await providerResponse.json();
2824
- if (!providerResponse.ok) {
2825
- res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
2826
- res.end(JSON.stringify(responseData));
2827
- return;
2828
- }
2829
- }
2830
- }
2831
- } catch (err) {
2832
- const errorMsg = err instanceof Error ? err.message : String(err);
2833
- res.writeHead(500, { "Content-Type": "application/json" });
2834
- res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
2835
- return;
2836
- }
2837
- const durationMs = Date.now() - startTime;
2838
- const modelKey = `${targetProvider}/${targetModel}`;
2839
- modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2840
- try {
2841
- const runResult = await relay.run({
2842
- prompt: promptText.slice(0, 500),
2843
- taskType,
2844
- model: `${targetProvider}:${targetModel}`
2845
- });
2846
- recentRuns.unshift({
2847
- runId: runResult.runId,
2848
- timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2849
- model: modelKey,
2850
- taskType,
2851
- confidence,
2852
- mode: routingMode,
2853
- durationMs,
2854
- promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
2855
- });
2856
- if (recentRuns.length > MAX_RECENT_RUNS) {
2857
- recentRuns.pop();
2858
- }
2859
- responseData["_relayplane"] = {
2860
- runId: runResult.runId,
2861
- routedTo: modelKey,
2862
- taskType,
2863
- confidence,
2864
- durationMs,
2865
- mode: routingMode
2866
- };
2867
- log(`Completed in ${durationMs}ms, runId: ${runResult.runId}`);
2868
- } catch (err) {
2869
- log(`Failed to record run: ${err}`);
2870
- }
2871
- res.writeHead(200, { "Content-Type": "application/json" });
2872
- res.end(JSON.stringify(responseData));
2873
- }
2874
-
2875
- // src/cli.ts
2876
- var import_better_sqlite32 = __toESM(require("better-sqlite3"));
3
+ /**
4
+ * RelayPlane Proxy CLI
5
+ *
6
+ * Intelligent AI model routing proxy server.
7
+ *
8
+ * Usage:
9
+ * npx @relayplane/proxy [command] [options]
10
+ * relayplane-proxy [command] [options]
11
+ *
12
+ * Commands:
13
+ * (default) Start the proxy server
14
+ * telemetry [on|off|status] Manage telemetry settings
15
+ * stats Show usage statistics
16
+ * config Show configuration
17
+ *
18
+ * Options:
19
+ * --port <number> Port to listen on (default: 3001)
20
+ * --host <string> Host to bind to (default: 127.0.0.1)
21
+ * --offline Disable all network calls except LLM endpoints
22
+ * --audit Show telemetry payloads before sending
23
+ * -v, --verbose Enable verbose logging
24
+ * -h, --help Show this help message
25
+ * --version Show version
26
+ *
27
+ * Environment Variables:
28
+ * ANTHROPIC_API_KEY Anthropic API key
29
+ * OPENAI_API_KEY OpenAI API key
30
+ * GEMINI_API_KEY Google Gemini API key
31
+ * XAI_API_KEY xAI/Grok API key
32
+ * MOONSHOT_API_KEY Moonshot API key
33
+ *
34
+ * @packageDocumentation
35
+ */
36
+ Object.defineProperty(exports, "__esModule", { value: true });
37
+ const openclaw_1 = require("@relayplane/openclaw");
38
+ const config_js_1 = require("./config.js");
39
+ const telemetry_js_1 = require("./telemetry.js");
40
+ const VERSION = '0.2.0';
2877
41
  function printHelp() {
2878
- console.log(`
42
+ console.log(`
2879
43
  RelayPlane Proxy - Intelligent AI Model Routing
2880
44
 
2881
45
  Usage:
@@ -2883,19 +47,19 @@ Usage:
2883
47
  relayplane-proxy [command] [options]
2884
48
 
2885
49
  Commands:
2886
- (default) Start the proxy server
2887
- stats Show routing statistics
50
+ (default) Start the proxy server
51
+ telemetry [on|off|status] Manage telemetry settings
52
+ stats Show usage statistics
53
+ config Show configuration
2888
54
 
2889
- Server Options:
55
+ Options:
2890
56
  --port <number> Port to listen on (default: 3001)
2891
57
  --host <string> Host to bind to (default: 127.0.0.1)
58
+ --offline Disable all network calls except LLM endpoints
59
+ --audit Show telemetry payloads before sending
2892
60
  -v, --verbose Enable verbose logging
2893
-
2894
- Stats Options:
2895
- --days <number> Days of history to show (default: 7)
2896
-
2897
- General:
2898
61
  -h, --help Show this help message
62
+ --version Show version
2899
63
 
2900
64
  Environment Variables:
2901
65
  ANTHROPIC_API_KEY Anthropic API key
@@ -2904,163 +68,237 @@ Environment Variables:
2904
68
  XAI_API_KEY xAI/Grok API key (optional)
2905
69
  MOONSHOT_API_KEY Moonshot API key (optional)
2906
70
 
2907
- Examples:
71
+ Example:
2908
72
  # Start proxy on default port
2909
73
  npx @relayplane/proxy
2910
74
 
2911
- # Start on custom port with verbose logging
2912
- npx @relayplane/proxy --port 8080 -v
75
+ # Start with audit mode (see telemetry before it's sent)
76
+ npx @relayplane/proxy --audit
2913
77
 
2914
- # View routing stats for last 7 days
2915
- npx @relayplane/proxy stats
78
+ # Start in offline mode (no telemetry transmission)
79
+ npx @relayplane/proxy --offline
2916
80
 
2917
- # View stats for last 30 days
2918
- npx @relayplane/proxy stats --days 30
81
+ # Disable telemetry completely
82
+ npx @relayplane/proxy telemetry off
2919
83
 
2920
- Learn more: https://relayplane.com/integrations/openclaw
84
+ # Then point your SDKs to the proxy
85
+ export ANTHROPIC_BASE_URL=http://localhost:3001
86
+ export OPENAI_BASE_URL=http://localhost:3001
87
+
88
+ Learn more: https://relayplane.com/docs
2921
89
  `);
2922
90
  }
2923
- function showStats(days) {
2924
- const dbPath = getDefaultDbPath();
2925
- try {
2926
- const db = new import_better_sqlite32.default(dbPath, { readonly: true });
2927
- const cutoff = new Date(Date.now() - days * 24 * 60 * 60 * 1e3).toISOString();
2928
- const runs = db.prepare(`
2929
- SELECT
2930
- model,
2931
- task_type,
2932
- COUNT(*) as count,
2933
- SUM(tokens_in) as total_in,
2934
- SUM(tokens_out) as total_out,
2935
- AVG(duration_ms) as avg_duration,
2936
- SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as successes
2937
- FROM runs
2938
- WHERE created_at >= ?
2939
- GROUP BY model
2940
- ORDER BY count DESC
2941
- `).all(cutoff);
2942
- const totalRuns = runs.reduce((sum, r) => sum + r.count, 0);
2943
- const totalTokensIn = runs.reduce((sum, r) => sum + (r.total_in || 0), 0);
2944
- const totalTokensOut = runs.reduce((sum, r) => sum + (r.total_out || 0), 0);
2945
- console.log("");
2946
- console.log(` \u256D\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u256E`);
2947
- console.log(` \u2502 RelayPlane Routing Stats \u2502`);
2948
- console.log(` \u2502 Last ${String(days).padStart(2)} days \u2502`);
2949
- console.log(` \u2570\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u256F`);
2950
- console.log("");
2951
- if (totalRuns === 0) {
2952
- console.log(" No routing data found for this period.");
2953
- console.log(" Start using the proxy to collect stats!");
2954
- console.log("");
2955
- return;
91
+ function printVersion() {
92
+ console.log(`RelayPlane Proxy v${VERSION}`);
93
+ }
94
+ function handleTelemetryCommand(args) {
95
+ const subcommand = args[0];
96
+ switch (subcommand) {
97
+ case 'on':
98
+ (0, config_js_1.enableTelemetry)();
99
+ console.log('✅ Telemetry enabled');
100
+ console.log(' Anonymous usage data will be collected to improve routing.');
101
+ console.log(' Run with --audit to see exactly what\'s collected.');
102
+ break;
103
+ case 'off':
104
+ (0, config_js_1.disableTelemetry)();
105
+ console.log('✅ Telemetry disabled');
106
+ console.log(' No usage data will be collected.');
107
+ console.log(' The proxy will continue to work normally.');
108
+ break;
109
+ case 'status':
110
+ default:
111
+ const enabled = (0, config_js_1.isTelemetryEnabled)();
112
+ console.log('');
113
+ console.log('📊 Telemetry Status');
114
+ console.log('───────────────────');
115
+ console.log(` Enabled: ${enabled ? '✅ Yes' : '❌ No'}`);
116
+ console.log(` Data file: ${(0, telemetry_js_1.getTelemetryPath)()}`);
117
+ console.log('');
118
+ console.log(' To enable: relayplane-proxy telemetry on');
119
+ console.log(' To disable: relayplane-proxy telemetry off');
120
+ console.log(' To audit: relayplane-proxy --audit');
121
+ console.log('');
122
+ break;
123
+ }
124
+ }
125
+ function handleStatsCommand() {
126
+ const stats = (0, telemetry_js_1.getTelemetryStats)();
127
+ console.log('');
128
+ console.log('📊 Usage Statistics');
129
+ console.log('═══════════════════');
130
+ console.log('');
131
+ console.log(` Total requests: ${stats.totalEvents}`);
132
+ console.log(` Total cost: $${stats.totalCost.toFixed(2)}`);
133
+ console.log(` Success rate: ${(stats.successRate * 100).toFixed(1)}%`);
134
+ console.log('');
135
+ if (Object.keys(stats.byModel).length > 0) {
136
+ console.log(' By Model:');
137
+ for (const [model, data] of Object.entries(stats.byModel)) {
138
+ console.log(` ${model}: ${data.count} requests, $${data.cost.toFixed(2)}`);
139
+ }
140
+ console.log('');
141
+ }
142
+ if (Object.keys(stats.byTaskType).length > 0) {
143
+ console.log(' By Task Type:');
144
+ for (const [taskType, data] of Object.entries(stats.byTaskType)) {
145
+ console.log(` ${taskType}: ${data.count} requests, $${data.cost.toFixed(2)}`);
146
+ }
147
+ console.log('');
2956
148
  }
2957
- console.log(" Summary:");
2958
- console.log(` Total requests: ${totalRuns.toLocaleString()}`);
2959
- console.log(` Total tokens in: ${totalTokensIn.toLocaleString()}`);
2960
- console.log(` Total tokens out: ${totalTokensOut.toLocaleString()}`);
2961
- console.log("");
2962
- console.log(" By Model:");
2963
- console.log(" \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
2964
- for (const row of runs) {
2965
- const pct = (row.count / totalRuns * 100).toFixed(1);
2966
- const successRate = row.count > 0 ? (row.successes / row.count * 100).toFixed(0) : "0";
2967
- console.log(` ${row.model.padEnd(35)} ${String(row.count).padStart(6)} (${pct.padStart(5)}%) ${successRate}% ok`);
149
+ if (stats.totalEvents === 0) {
150
+ console.log(' No data yet. Start using the proxy to collect statistics.');
151
+ console.log('');
2968
152
  }
2969
- console.log("");
2970
- const haikuRuns = runs.filter((r) => r.model.includes("haiku"));
2971
- const haikuTokensIn = haikuRuns.reduce((sum, r) => sum + (r.total_in || 0), 0);
2972
- const haikuTokensOut = haikuRuns.reduce((sum, r) => sum + (r.total_out || 0), 0);
2973
- const opusCost = totalTokensIn * 15 / 1e6 + totalTokensOut * 75 / 1e6;
2974
- const haikuCost = haikuTokensIn * 0.25 / 1e6 + haikuTokensOut * 1.25 / 1e6;
2975
- const nonHaikuCost = (totalTokensIn - haikuTokensIn) * 3 / 1e6 + (totalTokensOut - haikuTokensOut) * 15 / 1e6;
2976
- const actualCost = haikuCost + nonHaikuCost;
2977
- const savings = opusCost - actualCost;
2978
- if (savings > 0) {
2979
- console.log(" Estimated Savings:");
2980
- console.log(` If all Opus: $${opusCost.toFixed(2)}`);
2981
- console.log(` With routing: $${actualCost.toFixed(2)}`);
2982
- console.log(` Saved: $${savings.toFixed(2)} (${(savings / opusCost * 100).toFixed(0)}%)`);
2983
- console.log("");
153
+ }
154
+ function handleConfigCommand(args) {
155
+ const subcommand = args[0];
156
+ if (subcommand === 'set-key' && args[1]) {
157
+ (0, config_js_1.setApiKey)(args[1]);
158
+ console.log('✅ API key saved');
159
+ console.log(' Pro features will be enabled on next proxy start.');
160
+ return;
2984
161
  }
2985
- db.close();
2986
- } catch (err) {
2987
- console.error("Error reading stats:", err);
2988
- console.log("");
2989
- console.log(" No data found. The proxy stores data at:");
2990
- console.log(` ${dbPath}`);
2991
- console.log("");
2992
- }
162
+ const config = (0, config_js_1.loadConfig)();
163
+ console.log('');
164
+ console.log('⚙️ Configuration');
165
+ console.log('═════════════════');
166
+ console.log('');
167
+ console.log(` Config file: ${(0, config_js_1.getConfigPath)()}`);
168
+ console.log(` Device ID: ${config.device_id}`);
169
+ console.log(` Telemetry: ${config.telemetry_enabled ? '✅ Enabled' : '❌ Disabled'}`);
170
+ console.log(` API Key: ${config.api_key ? '••••' + config.api_key.slice(-4) : 'Not set'}`);
171
+ console.log(` Created: ${config.created_at}`);
172
+ console.log('');
173
+ console.log(' To set API key: relayplane-proxy config set-key <your-key>');
174
+ console.log('');
2993
175
  }
2994
176
  async function main() {
2995
- const args = process.argv.slice(2);
2996
- if (args.includes("-h") || args.includes("--help")) {
2997
- printHelp();
2998
- process.exit(0);
2999
- }
3000
- if (args[0] === "stats") {
3001
- let days = 7;
3002
- const daysIdx = args.indexOf("--days");
3003
- if (daysIdx !== -1 && args[daysIdx + 1]) {
3004
- days = parseInt(args[daysIdx + 1], 10) || 7;
177
+ const args = process.argv.slice(2);
178
+ // Check for help
179
+ if (args.includes('-h') || args.includes('--help')) {
180
+ printHelp();
181
+ process.exit(0);
182
+ }
183
+ // Check for version
184
+ if (args.includes('--version')) {
185
+ printVersion();
186
+ process.exit(0);
187
+ }
188
+ // Handle commands
189
+ const command = args[0];
190
+ if (command === 'telemetry') {
191
+ handleTelemetryCommand(args.slice(1));
192
+ process.exit(0);
193
+ }
194
+ if (command === 'stats') {
195
+ handleStatsCommand();
196
+ process.exit(0);
197
+ }
198
+ if (command === 'config') {
199
+ handleConfigCommand(args.slice(1));
200
+ process.exit(0);
201
+ }
202
+ // Parse server options
203
+ let port = 3001;
204
+ let host = '127.0.0.1';
205
+ let verbose = false;
206
+ let audit = false;
207
+ let offline = false;
208
+ for (let i = 0; i < args.length; i++) {
209
+ const arg = args[i];
210
+ if (arg === '--port' && args[i + 1]) {
211
+ port = parseInt(args[i + 1], 10);
212
+ if (isNaN(port) || port < 1 || port > 65535) {
213
+ console.error('Error: Invalid port number');
214
+ process.exit(1);
215
+ }
216
+ i++;
217
+ }
218
+ else if (arg === '--host' && args[i + 1]) {
219
+ host = args[i + 1];
220
+ i++;
221
+ }
222
+ else if (arg === '-v' || arg === '--verbose') {
223
+ verbose = true;
224
+ }
225
+ else if (arg === '--audit') {
226
+ audit = true;
227
+ }
228
+ else if (arg === '--offline') {
229
+ offline = true;
230
+ }
3005
231
  }
3006
- showStats(days);
3007
- process.exit(0);
3008
- }
3009
- let port = 3001;
3010
- let host = "127.0.0.1";
3011
- let verbose = false;
3012
- for (let i = 0; i < args.length; i++) {
3013
- const arg = args[i];
3014
- if (arg === "--port" && args[i + 1]) {
3015
- port = parseInt(args[i + 1], 10);
3016
- if (isNaN(port) || port < 1 || port > 65535) {
3017
- console.error("Error: Invalid port number");
232
+ // Set modes
233
+ (0, telemetry_js_1.setAuditMode)(audit);
234
+ (0, telemetry_js_1.setOfflineMode)(offline);
235
+ // First run disclosure
236
+ if ((0, config_js_1.isFirstRun)()) {
237
+ (0, telemetry_js_1.printTelemetryDisclosure)();
238
+ (0, config_js_1.markFirstRunComplete)();
239
+ // Wait for user to read (brief pause)
240
+ await new Promise(resolve => setTimeout(resolve, 1000));
241
+ }
242
+ // Check for at least one API key
243
+ const hasAnthropicKey = !!process.env['ANTHROPIC_API_KEY'];
244
+ const hasOpenAIKey = !!process.env['OPENAI_API_KEY'];
245
+ const hasGeminiKey = !!process.env['GEMINI_API_KEY'];
246
+ const hasXAIKey = !!process.env['XAI_API_KEY'];
247
+ const hasMoonshotKey = !!process.env['MOONSHOT_API_KEY'];
248
+ if (!hasAnthropicKey && !hasOpenAIKey && !hasGeminiKey && !hasXAIKey && !hasMoonshotKey) {
249
+ console.error('Error: No API keys found. Set at least one of:');
250
+ console.error(' ANTHROPIC_API_KEY, OPENAI_API_KEY, GEMINI_API_KEY, XAI_API_KEY, MOONSHOT_API_KEY');
251
+ process.exit(1);
252
+ }
253
+ // Print startup info
254
+ console.log('');
255
+ console.log(' ╭─────────────────────────────────────────╮');
256
+ console.log(` │ RelayPlane Proxy v${VERSION} │`);
257
+ console.log(' │ Intelligent AI Model Routing │');
258
+ console.log(' ╰─────────────────────────────────────────╯');
259
+ console.log('');
260
+ // Show modes
261
+ const telemetryEnabled = (0, config_js_1.isTelemetryEnabled)();
262
+ console.log(' Mode:');
263
+ if (offline) {
264
+ console.log(' 🔒 Offline (no telemetry transmission)');
265
+ }
266
+ else if (audit) {
267
+ console.log(' 🔍 Audit (showing telemetry payloads)');
268
+ }
269
+ else if (telemetryEnabled) {
270
+ console.log(' 📊 Telemetry enabled (--audit to inspect, telemetry off to disable)');
271
+ }
272
+ else {
273
+ console.log(' 📴 Telemetry disabled');
274
+ }
275
+ console.log('');
276
+ console.log(' Providers:');
277
+ if (hasAnthropicKey)
278
+ console.log(' ✓ Anthropic');
279
+ if (hasOpenAIKey)
280
+ console.log(' ✓ OpenAI');
281
+ if (hasGeminiKey)
282
+ console.log(' ✓ Google Gemini');
283
+ if (hasXAIKey)
284
+ console.log(' ✓ xAI (Grok)');
285
+ if (hasMoonshotKey)
286
+ console.log(' ✓ Moonshot');
287
+ console.log('');
288
+ try {
289
+ await (0, openclaw_1.startProxy)({ port, host, verbose });
290
+ console.log('');
291
+ console.log(' To use, set these environment variables:');
292
+ console.log(` export ANTHROPIC_BASE_URL=http://${host}:${port}`);
293
+ console.log(` export OPENAI_BASE_URL=http://${host}:${port}`);
294
+ console.log('');
295
+ console.log(' Then run your agent (OpenClaw, Cursor, Aider, etc.)');
296
+ console.log('');
297
+ }
298
+ catch (err) {
299
+ console.error('Failed to start proxy:', err);
3018
300
  process.exit(1);
3019
- }
3020
- i++;
3021
- } else if (arg === "--host" && args[i + 1]) {
3022
- host = args[i + 1];
3023
- i++;
3024
- } else if (arg === "-v" || arg === "--verbose") {
3025
- verbose = true;
3026
301
  }
3027
- }
3028
- const hasAnthropicKey = !!process.env["ANTHROPIC_API_KEY"];
3029
- const hasOpenAIKey = !!process.env["OPENAI_API_KEY"];
3030
- const hasGeminiKey = !!process.env["GEMINI_API_KEY"];
3031
- const hasXAIKey = !!process.env["XAI_API_KEY"];
3032
- const hasMoonshotKey = !!process.env["MOONSHOT_API_KEY"];
3033
- if (!hasAnthropicKey && !hasOpenAIKey && !hasGeminiKey && !hasXAIKey && !hasMoonshotKey) {
3034
- console.error("Error: No API keys found. Set at least one of:");
3035
- console.error(" ANTHROPIC_API_KEY, OPENAI_API_KEY, GEMINI_API_KEY, XAI_API_KEY, MOONSHOT_API_KEY");
3036
- process.exit(1);
3037
- }
3038
- console.log("");
3039
- console.log(" \u256D\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u256E");
3040
- console.log(" \u2502 RelayPlane Proxy v0.1.0 \u2502");
3041
- console.log(" \u2502 Intelligent AI Model Routing \u2502");
3042
- console.log(" \u2570\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u256F");
3043
- console.log("");
3044
- console.log(" Providers:");
3045
- if (hasAnthropicKey) console.log(" \u2713 Anthropic");
3046
- if (hasOpenAIKey) console.log(" \u2713 OpenAI");
3047
- if (hasGeminiKey) console.log(" \u2713 Google Gemini");
3048
- if (hasXAIKey) console.log(" \u2713 xAI (Grok)");
3049
- if (hasMoonshotKey) console.log(" \u2713 Moonshot");
3050
- console.log("");
3051
- try {
3052
- await startProxy({ port, host, verbose });
3053
- console.log("");
3054
- console.log(" To use, set these environment variables:");
3055
- console.log(` export ANTHROPIC_BASE_URL=http://${host}:${port}`);
3056
- console.log(` export OPENAI_BASE_URL=http://${host}:${port}`);
3057
- console.log("");
3058
- console.log(" Then run your agent (OpenClaw, Cursor, Aider, etc.)");
3059
- console.log("");
3060
- } catch (err) {
3061
- console.error("Failed to start proxy:", err);
3062
- process.exit(1);
3063
- }
3064
302
  }
3065
303
  main();
3066
304
  //# sourceMappingURL=cli.js.map