@relayplane/proxy 0.1.9 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,3007 +1,74 @@
1
1
  "use strict";
2
- var __create = Object.create;
3
- var __defProp = Object.defineProperty;
4
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
- var __getOwnPropNames = Object.getOwnPropertyNames;
6
- var __getProtoOf = Object.getPrototypeOf;
7
- var __hasOwnProp = Object.prototype.hasOwnProperty;
8
- var __export = (target, all) => {
9
- for (var name in all)
10
- __defProp(target, name, { get: all[name], enumerable: true });
11
- };
12
- var __copyProps = (to, from, except, desc) => {
13
- if (from && typeof from === "object" || typeof from === "function") {
14
- for (let key of __getOwnPropNames(from))
15
- if (!__hasOwnProp.call(to, key) && key !== except)
16
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
- }
18
- return to;
19
- };
20
- var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
- // If the importer is in node compatibility mode or this is not an ESM
22
- // file that has been converted to a CommonJS file using a Babel-
23
- // compatible transform (i.e. "__esModule" has not been set), then set
24
- // "default" to the CommonJS "module.exports" for node compatibility.
25
- isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
- mod
27
- ));
28
- var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
-
30
- // src/index.ts
31
- var index_exports = {};
32
- __export(index_exports, {
33
- DEFAULT_CONFIG: () => DEFAULT_CONFIG,
34
- DEFAULT_ENDPOINTS: () => DEFAULT_ENDPOINTS,
35
- MODEL_MAPPING: () => MODEL_MAPPING,
36
- MODEL_PRICING: () => MODEL_PRICING,
37
- OutcomeRecorder: () => OutcomeRecorder,
38
- PatternDetector: () => PatternDetector,
39
- ProviderSchema: () => ProviderSchema,
40
- Providers: () => Providers,
41
- RelayPlane: () => RelayPlane,
42
- RoutingEngine: () => RoutingEngine,
43
- Store: () => Store,
44
- TaskTypeSchema: () => TaskTypeSchema,
45
- TaskTypes: () => TaskTypes,
46
- calculateCost: () => calculateCost,
47
- calculateSavings: () => calculateSavings,
48
- getConfigPath: () => getConfigPath,
49
- getInferenceConfidence: () => getInferenceConfidence,
50
- getModelPricing: () => getModelPricing,
51
- getStrategy: () => getStrategy,
52
- inferTaskType: () => inferTaskType,
53
- loadConfig: () => loadConfig,
54
- startProxy: () => startProxy,
55
- watchConfig: () => watchConfig
56
- });
57
- module.exports = __toCommonJS(index_exports);
58
-
59
- // src/proxy.ts
60
- var http = __toESM(require("http"));
61
- var url = __toESM(require("url"));
62
-
63
- // src/storage/store.ts
64
- var import_better_sqlite3 = __toESM(require("better-sqlite3"));
65
-
66
- // node_modules/nanoid/index.js
67
- var import_crypto = __toESM(require("crypto"), 1);
68
-
69
- // node_modules/nanoid/url-alphabet/index.js
70
- var urlAlphabet = "useandom-26T198340PX75pxJACKVERYMINDBUSHWOLF_GQZbfghjklqvwyzrict";
71
-
72
- // node_modules/nanoid/index.js
73
- var POOL_SIZE_MULTIPLIER = 128;
74
- var pool;
75
- var poolOffset;
76
- var fillPool = (bytes) => {
77
- if (!pool || pool.length < bytes) {
78
- pool = Buffer.allocUnsafe(bytes * POOL_SIZE_MULTIPLIER);
79
- import_crypto.default.randomFillSync(pool);
80
- poolOffset = 0;
81
- } else if (poolOffset + bytes > pool.length) {
82
- import_crypto.default.randomFillSync(pool);
83
- poolOffset = 0;
84
- }
85
- poolOffset += bytes;
86
- };
87
- var nanoid = (size = 21) => {
88
- fillPool(size |= 0);
89
- let id = "";
90
- for (let i = poolOffset - size; i < poolOffset; i++) {
91
- id += urlAlphabet[pool[i] & 63];
92
- }
93
- return id;
94
- };
95
-
96
- // src/storage/store.ts
97
- var fs = __toESM(require("fs"));
98
- var path = __toESM(require("path"));
99
- var os = __toESM(require("os"));
100
-
101
- // src/storage/schema.ts
102
- var SCHEMA_SQL = `
103
- -- Runs table: stores all LLM invocations
104
- CREATE TABLE IF NOT EXISTS runs (
105
- id TEXT PRIMARY KEY,
106
- prompt TEXT NOT NULL,
107
- system_prompt TEXT,
108
- task_type TEXT NOT NULL,
109
- model TEXT NOT NULL,
110
- success INTEGER NOT NULL,
111
- output TEXT,
112
- error TEXT,
113
- duration_ms INTEGER NOT NULL,
114
- tokens_in INTEGER,
115
- tokens_out INTEGER,
116
- cost_usd REAL,
117
- metadata TEXT,
118
- created_at TEXT NOT NULL DEFAULT (datetime('now'))
119
- );
120
-
121
- -- Index for task type queries
122
- CREATE INDEX IF NOT EXISTS idx_runs_task_type ON runs(task_type);
123
-
124
- -- Index for model queries
125
- CREATE INDEX IF NOT EXISTS idx_runs_model ON runs(model);
126
-
127
- -- Index for time-based queries
128
- CREATE INDEX IF NOT EXISTS idx_runs_created_at ON runs(created_at);
129
-
130
- -- Outcomes table: stores user feedback on runs
131
- CREATE TABLE IF NOT EXISTS outcomes (
132
- id TEXT PRIMARY KEY,
133
- run_id TEXT NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
134
- success INTEGER NOT NULL,
135
- quality TEXT,
136
- latency_satisfactory INTEGER,
137
- cost_satisfactory INTEGER,
138
- feedback TEXT,
139
- created_at TEXT NOT NULL DEFAULT (datetime('now')),
140
- UNIQUE(run_id)
141
- );
142
-
143
- -- Index for run lookups
144
- CREATE INDEX IF NOT EXISTS idx_outcomes_run_id ON outcomes(run_id);
145
-
146
- -- Routing rules table: stores routing preferences
147
- CREATE TABLE IF NOT EXISTS routing_rules (
148
- id TEXT PRIMARY KEY,
149
- task_type TEXT NOT NULL UNIQUE,
150
- preferred_model TEXT NOT NULL,
151
- source TEXT NOT NULL DEFAULT 'default',
152
- confidence REAL,
153
- sample_count INTEGER,
154
- created_at TEXT NOT NULL DEFAULT (datetime('now')),
155
- updated_at TEXT NOT NULL DEFAULT (datetime('now'))
156
- );
157
-
158
- -- Index for task type lookups
159
- CREATE INDEX IF NOT EXISTS idx_routing_rules_task_type ON routing_rules(task_type);
160
-
161
- -- Suggestions table: stores routing improvement suggestions
162
- CREATE TABLE IF NOT EXISTS suggestions (
163
- id TEXT PRIMARY KEY,
164
- task_type TEXT NOT NULL,
165
- current_model TEXT NOT NULL,
166
- suggested_model TEXT NOT NULL,
167
- reason TEXT NOT NULL,
168
- confidence REAL NOT NULL,
169
- expected_improvement TEXT NOT NULL,
170
- sample_count INTEGER NOT NULL,
171
- accepted INTEGER,
172
- created_at TEXT NOT NULL DEFAULT (datetime('now')),
173
- accepted_at TEXT
174
- );
175
-
176
- -- Index for task type lookups
177
- CREATE INDEX IF NOT EXISTS idx_suggestions_task_type ON suggestions(task_type);
178
-
179
- -- Index for pending suggestions
180
- CREATE INDEX IF NOT EXISTS idx_suggestions_accepted ON suggestions(accepted);
181
-
182
- -- Schema version table for migrations
183
- CREATE TABLE IF NOT EXISTS schema_version (
184
- version INTEGER PRIMARY KEY,
185
- applied_at TEXT NOT NULL DEFAULT (datetime('now'))
186
- );
187
-
188
- -- Insert initial schema version
189
- INSERT OR IGNORE INTO schema_version (version) VALUES (1);
190
- `;
191
- var DEFAULT_ROUTING_RULES = [
192
- // Complex tasks → Sonnet (need reasoning & quality)
193
- { taskType: "code_generation", preferredModel: "anthropic:claude-sonnet-4-20250514" },
194
- { taskType: "code_review", preferredModel: "anthropic:claude-sonnet-4-20250514" },
195
- { taskType: "analysis", preferredModel: "anthropic:claude-sonnet-4-20250514" },
196
- { taskType: "creative_writing", preferredModel: "anthropic:claude-sonnet-4-20250514" },
197
- // Simple tasks → Haiku (cost efficient)
198
- { taskType: "summarization", preferredModel: "anthropic:claude-3-5-haiku-latest" },
199
- { taskType: "data_extraction", preferredModel: "anthropic:claude-3-5-haiku-latest" },
200
- { taskType: "translation", preferredModel: "anthropic:claude-3-5-haiku-latest" },
201
- { taskType: "question_answering", preferredModel: "anthropic:claude-3-5-haiku-latest" },
202
- { taskType: "general", preferredModel: "anthropic:claude-3-5-haiku-latest" }
203
- ];
204
- function generateSeedSQL() {
205
- const values = DEFAULT_ROUTING_RULES.map((rule, index) => {
206
- const id = `default-${rule.taskType}`;
207
- return `('${id}', '${rule.taskType}', '${rule.preferredModel}', 'default', NULL, NULL, datetime('now'), datetime('now'))`;
208
- }).join(",\n ");
209
- return `
210
- INSERT OR IGNORE INTO routing_rules (id, task_type, preferred_model, source, confidence, sample_count, created_at, updated_at)
211
- VALUES
212
- ${values};
213
- `;
214
- }
215
-
216
- // src/storage/store.ts
217
- function getDefaultDbPath() {
218
- return path.join(os.homedir(), ".relayplane", "data.db");
219
- }
220
- var Store = class {
221
- db;
222
- dbPath;
223
- /**
224
- * Creates a new Store instance.
225
- *
226
- * @param dbPath - Path to the SQLite database file. Defaults to ~/.relayplane/data.db
227
- */
228
- constructor(dbPath) {
229
- this.dbPath = dbPath ?? getDefaultDbPath();
230
- const dir = path.dirname(this.dbPath);
231
- if (!fs.existsSync(dir)) {
232
- fs.mkdirSync(dir, { recursive: true });
233
- }
234
- this.db = new import_better_sqlite3.default(this.dbPath);
235
- this.db.pragma("journal_mode = WAL");
236
- this.db.pragma("foreign_keys = ON");
237
- this.initializeSchema();
238
- }
239
- /**
240
- * Initializes the database schema.
241
- */
242
- initializeSchema() {
243
- this.db.exec(SCHEMA_SQL);
244
- this.db.exec(generateSeedSQL());
245
- }
246
- /**
247
- * Closes the database connection.
248
- */
249
- close() {
250
- this.db.close();
251
- }
252
- /**
253
- * Gets the database path.
254
- */
255
- getDbPath() {
256
- return this.dbPath;
257
- }
258
- // ============================================================================
259
- // Runs
260
- // ============================================================================
261
- /**
262
- * Records a new run.
263
- */
264
- recordRun(run) {
265
- const id = nanoid();
266
- const stmt = this.db.prepare(`
267
- INSERT INTO runs (id, prompt, system_prompt, task_type, model, success, output, error, duration_ms, tokens_in, tokens_out, cost_usd, metadata, created_at)
268
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))
269
- `);
270
- stmt.run(
271
- id,
272
- run.prompt,
273
- run.systemPrompt,
274
- run.taskType,
275
- run.model,
276
- run.success ? 1 : 0,
277
- run.output,
278
- run.error,
279
- run.durationMs,
280
- run.tokensIn,
281
- run.tokensOut,
282
- run.costUsd,
283
- run.metadata
284
- );
285
- return id;
286
- }
287
- /**
288
- * Gets a run by ID.
289
- */
290
- getRun(id) {
291
- const stmt = this.db.prepare(`
292
- SELECT id, prompt, system_prompt as systemPrompt, task_type as taskType, model, success, output, error, duration_ms as durationMs, tokens_in as tokensIn, tokens_out as tokensOut, cost_usd as costUsd, metadata, created_at as createdAt
293
- FROM runs
294
- WHERE id = ?
295
- `);
296
- const row = stmt.get(id);
297
- if (!row) return null;
298
- return {
299
- ...row,
300
- success: Boolean(row.success)
301
- };
302
- }
303
- /**
304
- * Gets runs with optional filters.
305
- */
306
- getRuns(options) {
307
- const conditions = [];
308
- const params = [];
309
- if (options?.taskType) {
310
- conditions.push("task_type = ?");
311
- params.push(options.taskType);
312
- }
313
- if (options?.model) {
314
- conditions.push("model = ?");
315
- params.push(options.model);
316
- }
317
- if (options?.from) {
318
- conditions.push("created_at >= ?");
319
- params.push(options.from);
320
- }
321
- if (options?.to) {
322
- conditions.push("created_at <= ?");
323
- params.push(options.to);
324
- }
325
- const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
326
- const limit = options?.limit ?? 100;
327
- const offset = options?.offset ?? 0;
328
- const stmt = this.db.prepare(`
329
- SELECT id, prompt, system_prompt as systemPrompt, task_type as taskType, model, success, output, error, duration_ms as durationMs, tokens_in as tokensIn, tokens_out as tokensOut, cost_usd as costUsd, metadata, created_at as createdAt
330
- FROM runs
331
- ${whereClause}
332
- ORDER BY created_at DESC
333
- LIMIT ? OFFSET ?
334
- `);
335
- params.push(limit, offset);
336
- const rows = stmt.all(...params);
337
- return rows.map((row) => ({
338
- ...row,
339
- success: Boolean(row.success)
340
- }));
341
- }
342
- /**
343
- * Counts runs with optional filters.
344
- */
345
- countRuns(options) {
346
- const conditions = [];
347
- const params = [];
348
- if (options?.taskType) {
349
- conditions.push("task_type = ?");
350
- params.push(options.taskType);
351
- }
352
- if (options?.model) {
353
- conditions.push("model = ?");
354
- params.push(options.model);
355
- }
356
- if (options?.from) {
357
- conditions.push("created_at >= ?");
358
- params.push(options.from);
359
- }
360
- if (options?.to) {
361
- conditions.push("created_at <= ?");
362
- params.push(options.to);
363
- }
364
- const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
365
- const stmt = this.db.prepare(`
366
- SELECT COUNT(*) as count
367
- FROM runs
368
- ${whereClause}
369
- `);
370
- const row = stmt.get(...params);
371
- return row.count;
372
- }
373
- // ============================================================================
374
- // Outcomes
375
- // ============================================================================
376
- /**
377
- * Records an outcome for a run.
378
- */
379
- recordOutcome(outcome) {
380
- const id = nanoid();
381
- const stmt = this.db.prepare(`
382
- INSERT INTO outcomes (id, run_id, success, quality, latency_satisfactory, cost_satisfactory, feedback, created_at)
383
- VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'))
384
- ON CONFLICT(run_id) DO UPDATE SET
385
- success = excluded.success,
386
- quality = excluded.quality,
387
- latency_satisfactory = excluded.latency_satisfactory,
388
- cost_satisfactory = excluded.cost_satisfactory,
389
- feedback = excluded.feedback,
390
- created_at = datetime('now')
391
- `);
392
- stmt.run(
393
- id,
394
- outcome.runId,
395
- outcome.success ? 1 : 0,
396
- outcome.quality,
397
- outcome.latencySatisfactory != null ? outcome.latencySatisfactory ? 1 : 0 : null,
398
- outcome.costSatisfactory != null ? outcome.costSatisfactory ? 1 : 0 : null,
399
- outcome.feedback
400
- );
401
- return id;
402
- }
403
- /**
404
- * Gets an outcome for a run.
405
- */
406
- getOutcome(runId) {
407
- const stmt = this.db.prepare(`
408
- SELECT id, run_id as runId, success, quality, latency_satisfactory as latencySatisfactory, cost_satisfactory as costSatisfactory, feedback, created_at as createdAt
409
- FROM outcomes
410
- WHERE run_id = ?
411
- `);
412
- const row = stmt.get(runId);
413
- if (!row) return null;
414
- return {
415
- ...row,
416
- success: Boolean(row.success),
417
- latencySatisfactory: row.latencySatisfactory != null ? Boolean(row.latencySatisfactory) : null,
418
- costSatisfactory: row.costSatisfactory != null ? Boolean(row.costSatisfactory) : null
419
- };
420
- }
421
- /**
422
- * Gets outcomes with optional filters.
423
- */
424
- getOutcomes(options) {
425
- const conditions = [];
426
- const params = [];
427
- if (options?.taskType) {
428
- conditions.push("r.task_type = ?");
429
- params.push(options.taskType);
430
- }
431
- if (options?.model) {
432
- conditions.push("r.model = ?");
433
- params.push(options.model);
434
- }
435
- if (options?.from) {
436
- conditions.push("o.created_at >= ?");
437
- params.push(options.from);
438
- }
439
- if (options?.to) {
440
- conditions.push("o.created_at <= ?");
441
- params.push(options.to);
442
- }
443
- const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
444
- const limit = options?.limit ?? 100;
445
- const stmt = this.db.prepare(`
446
- SELECT o.id, o.run_id as runId, o.success, o.quality, o.latency_satisfactory as latencySatisfactory, o.cost_satisfactory as costSatisfactory, o.feedback, o.created_at as createdAt, r.task_type as taskType, r.model
447
- FROM outcomes o
448
- JOIN runs r ON o.run_id = r.id
449
- ${whereClause}
450
- ORDER BY o.created_at DESC
451
- LIMIT ?
452
- `);
453
- params.push(limit);
454
- const rows = stmt.all(...params);
455
- return rows.map((row) => ({
456
- ...row,
457
- success: Boolean(row.success),
458
- latencySatisfactory: row.latencySatisfactory != null ? Boolean(row.latencySatisfactory) : null,
459
- costSatisfactory: row.costSatisfactory != null ? Boolean(row.costSatisfactory) : null
460
- }));
461
- }
462
- // ============================================================================
463
- // Routing Rules
464
- // ============================================================================
465
- /**
466
- * Gets a routing rule for a task type.
467
- */
468
- getRule(taskType) {
469
- const stmt = this.db.prepare(`
470
- SELECT id, task_type as taskType, preferred_model as preferredModel, source, confidence, sample_count as sampleCount, created_at as createdAt, updated_at as updatedAt
471
- FROM routing_rules
472
- WHERE task_type = ?
473
- `);
474
- return stmt.get(taskType) ?? null;
475
- }
476
- /**
477
- * Sets a routing rule for a task type.
478
- */
479
- setRule(taskType, preferredModel, source, confidence, sampleCount) {
480
- const existingRule = this.getRule(taskType);
481
- const id = existingRule?.id ?? nanoid();
482
- const stmt = this.db.prepare(`
483
- INSERT INTO routing_rules (id, task_type, preferred_model, source, confidence, sample_count, created_at, updated_at)
484
- VALUES (?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))
485
- ON CONFLICT(task_type) DO UPDATE SET
486
- preferred_model = excluded.preferred_model,
487
- source = excluded.source,
488
- confidence = excluded.confidence,
489
- sample_count = excluded.sample_count,
490
- updated_at = datetime('now')
491
- `);
492
- stmt.run(id, taskType, preferredModel, source, confidence ?? null, sampleCount ?? null);
493
- return id;
494
- }
495
- /**
496
- * Lists all routing rules.
497
- */
498
- listRules() {
499
- const stmt = this.db.prepare(`
500
- SELECT id, task_type as taskType, preferred_model as preferredModel, source, confidence, sample_count as sampleCount, created_at as createdAt, updated_at as updatedAt
501
- FROM routing_rules
502
- ORDER BY task_type
503
- `);
504
- return stmt.all();
505
- }
506
- /**
507
- * Deletes a routing rule and resets to default.
508
- */
509
- deleteRule(taskType) {
510
- const defaultRule = DEFAULT_ROUTING_RULES.find((r) => r.taskType === taskType);
511
- if (!defaultRule) return false;
512
- this.setRule(taskType, defaultRule.preferredModel, "default");
513
- return true;
514
- }
515
- // ============================================================================
516
- // Suggestions
517
- // ============================================================================
518
- /**
519
- * Records a suggestion.
520
- */
521
- recordSuggestion(suggestion) {
522
- const id = nanoid();
523
- const stmt = this.db.prepare(`
524
- INSERT INTO suggestions (id, task_type, current_model, suggested_model, reason, confidence, expected_improvement, sample_count, accepted, created_at)
525
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))
526
- `);
527
- stmt.run(
528
- id,
529
- suggestion.taskType,
530
- suggestion.currentModel,
531
- suggestion.suggestedModel,
532
- suggestion.reason,
533
- suggestion.confidence,
534
- suggestion.expectedImprovement,
535
- suggestion.sampleCount,
536
- suggestion.accepted ?? null
537
- );
538
- return id;
539
- }
540
- /**
541
- * Gets a suggestion by ID.
542
- */
543
- getSuggestion(id) {
544
- const stmt = this.db.prepare(`
545
- SELECT id, task_type as taskType, current_model as currentModel, suggested_model as suggestedModel, reason, confidence, expected_improvement as expectedImprovement, sample_count as sampleCount, accepted, created_at as createdAt, accepted_at as acceptedAt
546
- FROM suggestions
547
- WHERE id = ?
548
- `);
549
- const row = stmt.get(id);
550
- if (!row) return null;
551
- return {
552
- ...row,
553
- accepted: row.accepted != null ? Boolean(row.accepted) : null
554
- };
555
- }
556
- /**
557
- * Gets pending (unaccepted) suggestions.
558
- */
559
- getPendingSuggestions() {
560
- const stmt = this.db.prepare(`
561
- SELECT id, task_type as taskType, current_model as currentModel, suggested_model as suggestedModel, reason, confidence, expected_improvement as expectedImprovement, sample_count as sampleCount, accepted, created_at as createdAt, accepted_at as acceptedAt
562
- FROM suggestions
563
- WHERE accepted IS NULL
564
- ORDER BY confidence DESC
565
- `);
566
- const rows = stmt.all();
567
- return rows.map((row) => ({
568
- ...row,
569
- accepted: row.accepted != null ? Boolean(row.accepted) : null
570
- }));
571
- }
572
- /**
573
- * Accepts a suggestion.
574
- */
575
- acceptSuggestion(id) {
576
- const suggestion = this.getSuggestion(id);
577
- if (!suggestion) return false;
578
- const updateStmt = this.db.prepare(`
579
- UPDATE suggestions
580
- SET accepted = 1, accepted_at = datetime('now')
581
- WHERE id = ?
582
- `);
583
- updateStmt.run(id);
584
- this.setRule(
585
- suggestion.taskType,
586
- suggestion.suggestedModel,
587
- "learned",
588
- suggestion.confidence,
589
- suggestion.sampleCount
590
- );
591
- return true;
592
- }
593
- /**
594
- * Rejects a suggestion.
595
- */
596
- rejectSuggestion(id) {
597
- const stmt = this.db.prepare(`
598
- UPDATE suggestions
599
- SET accepted = 0, accepted_at = datetime('now')
600
- WHERE id = ?
601
- `);
602
- const result = stmt.run(id);
603
- return result.changes > 0;
604
- }
605
- // ============================================================================
606
- // Statistics
607
- // ============================================================================
608
- /**
609
- * Gets aggregated statistics.
610
- */
611
- getStats(options) {
612
- const conditions = [];
613
- const params = [];
614
- if (options?.from) {
615
- conditions.push("created_at >= ?");
616
- params.push(options.from);
617
- }
618
- if (options?.to) {
619
- conditions.push("created_at <= ?");
620
- params.push(options.to);
621
- }
622
- const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
623
- const overallStmt = this.db.prepare(`
624
- SELECT
625
- COUNT(*) as totalRuns,
626
- SUM(success) as successfulRuns,
627
- AVG(duration_ms) as avgDurationMs
628
- FROM runs
629
- ${whereClause}
630
- `);
631
- const overall = overallStmt.get(...params);
632
- const byTaskTypeStmt = this.db.prepare(`
633
- SELECT
634
- task_type as taskType,
635
- COUNT(*) as runs,
636
- AVG(success) as successRate,
637
- AVG(duration_ms) as avgDurationMs
638
- FROM runs
639
- ${whereClause}
640
- GROUP BY task_type
641
- `);
642
- const byTaskTypeRows = byTaskTypeStmt.all(...params);
643
- const byTaskType = {};
644
- for (const row of byTaskTypeRows) {
645
- byTaskType[row.taskType] = {
646
- runs: row.runs,
647
- successRate: row.successRate,
648
- avgDurationMs: row.avgDurationMs
649
- };
650
- }
651
- const byModelStmt = this.db.prepare(`
652
- SELECT
653
- model,
654
- COUNT(*) as runs,
655
- AVG(success) as successRate,
656
- AVG(duration_ms) as avgDurationMs
657
- FROM runs
658
- ${whereClause}
659
- GROUP BY model
660
- `);
661
- const byModelRows = byModelStmt.all(...params);
662
- const byModel = {};
663
- for (const row of byModelRows) {
664
- byModel[row.model] = {
665
- runs: row.runs,
666
- successRate: row.successRate,
667
- avgDurationMs: row.avgDurationMs
668
- };
669
- }
670
- return {
671
- totalRuns: overall.totalRuns,
672
- successfulRuns: overall.successfulRuns ?? 0,
673
- avgDurationMs: overall.avgDurationMs ?? 0,
674
- byTaskType,
675
- byModel
676
- };
677
- }
678
- /**
679
- * Gets statistics for learning (outcomes joined with runs).
680
- */
681
- getLearningStats(taskType) {
682
- const stmt = this.db.prepare(`
683
- SELECT
684
- r.model,
685
- COUNT(*) as runs,
686
- AVG(r.success) as successRate,
687
- AVG(r.duration_ms) as avgDurationMs,
688
- AVG(CASE WHEN o.success IS NOT NULL THEN o.success ELSE r.success END) as outcomeSuccessRate
689
- FROM runs r
690
- LEFT JOIN outcomes o ON r.id = o.run_id
691
- WHERE r.task_type = ?
692
- GROUP BY r.model
693
- HAVING runs >= 5
694
- `);
695
- return stmt.all(taskType);
696
- }
697
- };
698
-
699
- // src/routing/inference.ts
700
- var TASK_PATTERNS = {
701
- code_generation: [
702
- { pattern: /\b(write|create|generate|implement|build|code|develop|make)\b.{0,50}\b(function|class|code|script|program|method|module|api|endpoint|component)\b/i, weight: 10 },
703
- { pattern: /\b(write|create|generate)\b.{0,30}\b(python|javascript|typescript|java|go|rust|c\+\+|ruby|php|swift)\b/i, weight: 10 },
704
- { pattern: /\bcreate a.{0,30}(that|which|to)\b/i, weight: 5 },
705
- { pattern: /\bimplement\b.{0,50}\b(algorithm|logic|feature)\b/i, weight: 8 },
706
- { pattern: /\bcode\s+for\b/i, weight: 7 },
707
- { pattern: /\bwrite me\b.{0,30}\b(code|script|function)\b/i, weight: 9 },
708
- { pattern: /```[\w]*\n/i, weight: 3 }
709
- // Code blocks suggest code context
710
- ],
711
- code_review: [
712
- { pattern: /\b(review|analyze|check|audit|inspect|evaluate|assess|critique)\b.{0,30}\b(code|function|class|script|implementation|pull request|pr|diff)\b/i, weight: 10 },
713
- { pattern: /\b(what'?s? wrong|find\s+(bugs?|issues?|problems?|errors?))\b.{0,30}\b(code|function|this)\b/i, weight: 9 },
714
- { pattern: /\b(improve|optimize|refactor)\b.{0,30}\b(code|function|this)\b/i, weight: 7 },
715
- { pattern: /\blook\s+(at|over)\s+(this|my)\s+code\b/i, weight: 8 },
716
- { pattern: /\bcode\s+review\b/i, weight: 10 },
717
- { pattern: /\bcan you (check|review)\b/i, weight: 5 }
718
- ],
719
- summarization: [
720
- { pattern: /\b(summarize|summarise|summary|tldr|tl;dr|recap|condense|brief|overview)\b/i, weight: 10 },
721
- { pattern: /\b(give|provide|write)\s+(me\s+)?(a\s+)?(brief|short|quick|concise)\s+(summary|overview)\b/i, weight: 9 },
722
- { pattern: /\bshorten\s+(this|the)\b/i, weight: 6 },
723
- { pattern: /\bin\s+(brief|short|a nutshell)\b/i, weight: 7 },
724
- { pattern: /\bkey\s+(points?|takeaways?)\b/i, weight: 8 },
725
- { pattern: /\bmain\s+(ideas?|points?)\b/i, weight: 7 }
726
- ],
727
- analysis: [
728
- { pattern: /\b(analyze|analyse|analysis|examine|investigate|assess|evaluate|study)\b/i, weight: 8 },
729
- { pattern: /\b(compare|contrast|differentiate|distinguish)\b.{0,30}\b(between|and)\b/i, weight: 9 },
730
- { pattern: /\b(pros?\s+and\s+cons?|advantages?\s+and\s+disadvantages?|strengths?\s+and\s+weaknesses?)\b/i, weight: 9 },
731
- { pattern: /\b(what\s+are|explain)\s+(the\s+)?(implications?|consequences?|effects?|impacts?)\b/i, weight: 8 },
732
- { pattern: /\bbreak\s*down\b/i, weight: 6 },
733
- { pattern: /\bdeep\s*dive\b/i, weight: 7 },
734
- { pattern: /\bcritical(ly)?\s+(analysis|evaluation|assessment)\b/i, weight: 9 }
735
- ],
736
- creative_writing: [
737
- { pattern: /\b(write|create|compose|craft|author)\b.{0,30}\b(story|poem|essay|article|blog|post|narrative|fiction|novel|song|lyrics)\b/i, weight: 10 },
738
- { pattern: /\b(creative|imaginative|fictional)\s+(writing|story|piece)\b/i, weight: 10 },
739
- { pattern: /\bonce upon a time\b/i, weight: 8 },
740
- { pattern: /\b(write|tell)\s+(me\s+)?(a\s+)?(short\s+)?story\b/i, weight: 9 },
741
- { pattern: /\b(brainstorm|ideate)\b.{0,30}\b(ideas?|concepts?|themes?)\b/i, weight: 7 },
742
- { pattern: /\bwrite\s+(in|like)\s+(the\s+)?style\s+of\b/i, weight: 8 },
743
- { pattern: /\b(catchy|creative|engaging)\s+(title|headline|tagline|slogan)\b/i, weight: 7 }
744
- ],
745
- data_extraction: [
746
- { pattern: /\b(extract|parse|pull|get|retrieve|find|identify)\b.{0,30}\b(data|information|details?|values?|fields?|entities?|names?|numbers?|dates?|emails?|phones?|addresses?)\b/i, weight: 10 },
747
- { pattern: /\b(convert|transform)\b.{0,30}\b(to|into)\s+(json|csv|xml|yaml|table|structured)\b/i, weight: 9 },
748
- { pattern: /\bstructured\s+(data|output|format)\b/i, weight: 8 },
749
- { pattern: /\bnamed\s+entity\s+(recognition|extraction)\b/i, weight: 10 },
750
- { pattern: /\b(scrape|crawl)\b/i, weight: 6 },
751
- { pattern: /\bjson\s+(output|format|schema)\b/i, weight: 7 }
752
- ],
753
- translation: [
754
- { pattern: /\b(translate|translation|translator)\b/i, weight: 10 },
755
- { pattern: /\b(convert|change)\b.{0,20}\b(to|into)\s+(english|spanish|french|german|chinese|japanese|korean|portuguese|italian|russian|arabic|hindi|dutch)\b/i, weight: 9 },
756
- { pattern: /\b(in|to)\s+(english|spanish|french|german|chinese|japanese|korean|portuguese|italian|russian|arabic|hindi|dutch)\b/i, weight: 6 },
757
- { pattern: /\bfrom\s+(english|spanish|french|german|chinese|japanese|korean|portuguese|italian|russian|arabic|hindi|dutch)\s+to\b/i, weight: 10 },
758
- { pattern: /\blocalize|localization\b/i, weight: 7 }
759
- ],
760
- question_answering: [
761
- { pattern: /^(what|who|where|when|why|how|which|is|are|does|do|can|could|would|should|will|did)\s/i, weight: 7 },
762
- { pattern: /\?$/i, weight: 5 },
763
- { pattern: /\b(explain|describe|define|what\s+is|what\s+are|tell\s+me\s+about)\b/i, weight: 8 },
764
- { pattern: /\b(answer|respond|reply)\b.{0,20}\b(question|query)\b/i, weight: 9 },
765
- { pattern: /\bfaq\b/i, weight: 8 },
766
- { pattern: /\bi\s+(want|need)\s+to\s+know\b/i, weight: 6 },
767
- { pattern: /\bcan\s+you\s+(tell|explain|help)\b/i, weight: 5 }
768
- ],
769
- general: [
770
- // Catch-all patterns with low weights
771
- { pattern: /./i, weight: 1 }
772
- ]
773
- };
774
- function inferTaskType(prompt) {
775
- const normalizedPrompt = prompt.trim().toLowerCase();
776
- const scores = {
777
- code_generation: 0,
778
- code_review: 0,
779
- summarization: 0,
780
- analysis: 0,
781
- creative_writing: 0,
782
- data_extraction: 0,
783
- translation: 0,
784
- question_answering: 0,
785
- general: 0
786
- };
787
- for (const [taskType, patterns] of Object.entries(TASK_PATTERNS)) {
788
- for (const { pattern, weight } of patterns) {
789
- if (pattern.test(prompt)) {
790
- scores[taskType] += weight;
791
- }
792
- }
793
- }
794
- let maxScore = 0;
795
- let inferredType = "general";
796
- for (const [taskType, score] of Object.entries(scores)) {
797
- if (score > maxScore) {
798
- maxScore = score;
799
- inferredType = taskType;
800
- }
801
- }
802
- if (maxScore <= 1) {
803
- return "general";
804
- }
805
- return inferredType;
806
- }
807
- function getInferenceConfidence(prompt, taskType) {
808
- const patterns = TASK_PATTERNS[taskType];
809
- if (!patterns) return 0;
810
- let totalWeight = 0;
811
- let maxPossibleWeight = 0;
812
- for (const { pattern, weight } of patterns) {
813
- maxPossibleWeight += weight;
814
- if (pattern.test(prompt)) {
815
- totalWeight += weight;
816
- }
817
- }
818
- if (maxPossibleWeight === 0) return 0;
819
- return Math.min(totalWeight / maxPossibleWeight, 0.95);
820
- }
821
-
822
- // src/routing/engine.ts
823
- var RoutingEngine = class {
824
- store;
825
- /**
826
- * Creates a new RoutingEngine.
827
- *
828
- * @param store - The storage instance to use
829
- */
830
- constructor(store) {
831
- this.store = store;
832
- }
833
- /**
834
- * Infers the task type from a prompt.
835
- *
836
- * @param prompt - The prompt to analyze
837
- * @returns The inferred task type
838
- */
839
- inferTaskType(prompt) {
840
- return inferTaskType(prompt);
841
- }
842
- /**
843
- * Gets the inference confidence for a task type.
844
- *
845
- * @param prompt - The prompt to analyze
846
- * @param taskType - The task type to check
847
- * @returns Confidence score (0-1)
848
- */
849
- getInferenceConfidence(prompt, taskType) {
850
- return getInferenceConfidence(prompt, taskType);
851
- }
852
- /**
853
- * Gets the routing rule for a task type.
854
- *
855
- * @param taskType - The task type to get the rule for
856
- * @returns The routing rule, or null if not found
857
- */
858
- get(taskType) {
859
- const record = this.store.getRule(taskType);
860
- if (!record) return null;
861
- return {
862
- id: record.id,
863
- taskType: record.taskType,
864
- preferredModel: record.preferredModel,
865
- source: record.source,
866
- confidence: record.confidence ?? void 0,
867
- sampleCount: record.sampleCount ?? void 0,
868
- createdAt: record.createdAt,
869
- updatedAt: record.updatedAt
870
- };
871
- }
872
- /**
873
- * Sets a routing rule for a task type.
874
- *
875
- * @param taskType - The task type to set the rule for
876
- * @param preferredModel - The preferred model (format: "provider:model")
877
- * @param source - How the rule was created
878
- * @param options - Optional confidence and sample count
879
- * @returns The rule ID
880
- */
881
- set(taskType, preferredModel, source = "user", options) {
882
- return this.store.setRule(
883
- taskType,
884
- preferredModel,
885
- source,
886
- options?.confidence,
887
- options?.sampleCount
888
- );
889
- }
890
- /**
891
- * Lists all routing rules.
892
- *
893
- * @returns Array of all routing rules
894
- */
895
- list() {
896
- const records = this.store.listRules();
897
- return records.map((record) => ({
898
- id: record.id,
899
- taskType: record.taskType,
900
- preferredModel: record.preferredModel,
901
- source: record.source,
902
- confidence: record.confidence ?? void 0,
903
- sampleCount: record.sampleCount ?? void 0,
904
- createdAt: record.createdAt,
905
- updatedAt: record.updatedAt
906
- }));
907
- }
908
- /**
909
- * Deletes a routing rule and resets to default.
910
- *
911
- * @param taskType - The task type to reset
912
- * @returns True if the rule was reset
913
- */
914
- delete(taskType) {
915
- return this.store.deleteRule(taskType);
916
- }
917
- /**
918
- * Gets the preferred model for a task type.
919
- *
920
- * @param taskType - The task type
921
- * @returns The preferred model string, or a default
922
- */
923
- getPreferredModel(taskType) {
924
- const rule = this.get(taskType);
925
- return rule?.preferredModel ?? "local:llama3.2";
926
- }
927
- /**
928
- * Parses a model string into provider and model name.
929
- *
930
- * @param modelString - The model string (format: "provider:model")
931
- * @returns Object with provider and model
932
- */
933
- parseModel(modelString) {
934
- const parts = modelString.split(":");
935
- if (parts.length < 2) {
936
- return { provider: "local", model: modelString };
937
- }
938
- return { provider: parts[0], model: parts.slice(1).join(":") };
939
- }
940
- /**
941
- * Resolves the model to use for a prompt.
942
- *
943
- * @param prompt - The prompt to analyze
944
- * @param overrideTaskType - Optional task type override
945
- * @param overrideModel - Optional model override
946
- * @returns Object with resolved taskType, model, provider, and confidence
947
- */
948
- resolve(prompt, overrideTaskType, overrideModel) {
949
- const taskType = overrideTaskType ?? this.inferTaskType(prompt);
950
- const confidence = this.getInferenceConfidence(prompt, taskType);
951
- let model;
952
- if (overrideModel) {
953
- model = overrideModel;
954
- } else {
955
- model = this.getPreferredModel(taskType);
956
- }
957
- const { provider, model: modelName } = this.parseModel(model);
958
- return {
959
- taskType,
960
- model,
961
- provider,
962
- modelName,
963
- confidence
964
- };
965
- }
966
- };
967
-
968
- // src/learning/outcomes.ts
969
- var OutcomeRecorder = class {
970
- store;
971
- /**
972
- * Creates a new OutcomeRecorder.
973
- *
974
- * @param store - The storage instance to use
975
- */
976
- constructor(store) {
977
- this.store = store;
978
- }
979
- /**
980
- * Records an outcome for a run.
981
- *
982
- * @param input - The outcome input
983
- * @returns The recorded outcome
984
- * @throws If the run ID is not found
985
- */
986
- record(input) {
987
- const run = this.store.getRun(input.runId);
988
- if (!run) {
989
- throw new Error(`Run not found: ${input.runId}`);
990
- }
991
- const id = this.store.recordOutcome({
992
- runId: input.runId,
993
- success: input.success,
994
- quality: input.quality ?? null,
995
- latencySatisfactory: input.latencySatisfactory ?? null,
996
- costSatisfactory: input.costSatisfactory ?? null,
997
- feedback: input.feedback ?? null
998
- });
999
- const outcome = this.store.getOutcome(input.runId);
1000
- if (!outcome) {
1001
- throw new Error("Failed to record outcome");
1002
- }
1003
- return {
1004
- id: outcome.id,
1005
- runId: outcome.runId,
1006
- success: outcome.success,
1007
- quality: outcome.quality ?? void 0,
1008
- latencySatisfactory: outcome.latencySatisfactory ?? void 0,
1009
- costSatisfactory: outcome.costSatisfactory ?? void 0,
1010
- feedback: outcome.feedback ?? void 0,
1011
- recordedAt: outcome.createdAt
1012
- };
1013
- }
1014
- /**
1015
- * Gets an outcome for a run.
1016
- *
1017
- * @param runId - The run ID
1018
- * @returns The outcome, or null if not found
1019
- */
1020
- get(runId) {
1021
- const outcome = this.store.getOutcome(runId);
1022
- if (!outcome) return null;
1023
- return {
1024
- id: outcome.id,
1025
- runId: outcome.runId,
1026
- success: outcome.success,
1027
- quality: outcome.quality ?? void 0,
1028
- latencySatisfactory: outcome.latencySatisfactory ?? void 0,
1029
- costSatisfactory: outcome.costSatisfactory ?? void 0,
1030
- feedback: outcome.feedback ?? void 0,
1031
- recordedAt: outcome.createdAt
1032
- };
1033
- }
1034
- /**
1035
- * Gets outcome statistics for a task type.
1036
- *
1037
- * @param taskType - The task type to get stats for
1038
- * @returns Outcome statistics
1039
- */
1040
- getTaskStats(taskType) {
1041
- const outcomes = this.store.getOutcomes({ taskType, limit: 1e3 });
1042
- if (outcomes.length === 0) {
1043
- return {
1044
- totalOutcomes: 0,
1045
- successRate: 0,
1046
- qualityDistribution: {},
1047
- latencySatisfactionRate: 0,
1048
- costSatisfactionRate: 0
1049
- };
1050
- }
1051
- let successCount = 0;
1052
- let latencySatisfiedCount = 0;
1053
- let latencyRatedCount = 0;
1054
- let costSatisfiedCount = 0;
1055
- let costRatedCount = 0;
1056
- const qualityDistribution = {};
1057
- for (const outcome of outcomes) {
1058
- if (outcome.success) successCount++;
1059
- if (outcome.quality) {
1060
- qualityDistribution[outcome.quality] = (qualityDistribution[outcome.quality] ?? 0) + 1;
1061
- }
1062
- if (outcome.latencySatisfactory != null) {
1063
- latencyRatedCount++;
1064
- if (outcome.latencySatisfactory) latencySatisfiedCount++;
1065
- }
1066
- if (outcome.costSatisfactory != null) {
1067
- costRatedCount++;
1068
- if (outcome.costSatisfactory) costSatisfiedCount++;
1069
- }
1070
- }
1071
- return {
1072
- totalOutcomes: outcomes.length,
1073
- successRate: successCount / outcomes.length,
1074
- qualityDistribution,
1075
- latencySatisfactionRate: latencyRatedCount > 0 ? latencySatisfiedCount / latencyRatedCount : 0,
1076
- costSatisfactionRate: costRatedCount > 0 ? costSatisfiedCount / costRatedCount : 0
1077
- };
1078
- }
1079
- };
1080
-
1081
- // src/learning/savings.ts
1082
- var MODEL_PRICING = {
1083
- // Anthropic models
1084
- "claude-3-5-haiku-latest": { input: 0.25, output: 1.25 },
1085
- "claude-3-5-haiku-20241022": { input: 0.25, output: 1.25 },
1086
- "claude-3-5-sonnet-latest": { input: 3, output: 15 },
1087
- "claude-3-5-sonnet-20241022": { input: 3, output: 15 },
1088
- "claude-sonnet-4-20250514": { input: 3, output: 15 },
1089
- "claude-3-opus-latest": { input: 15, output: 75 },
1090
- "claude-3-opus-20240229": { input: 15, output: 75 },
1091
- "claude-opus-4-5-20250514": { input: 15, output: 75 },
1092
- // OpenAI models
1093
- "gpt-4o": { input: 2.5, output: 10 },
1094
- "gpt-4o-mini": { input: 0.15, output: 0.6 },
1095
- "gpt-4.1": { input: 2, output: 8 },
1096
- "gpt-4-turbo": { input: 10, output: 30 },
1097
- // Google models
1098
- "gemini-1.5-flash": { input: 0.075, output: 0.3 },
1099
- "gemini-1.5-pro": { input: 1.25, output: 5 },
1100
- "gemini-2.0-flash": { input: 0.1, output: 0.4 },
1101
- // xAI models
1102
- "grok-2": { input: 2, output: 10 },
1103
- "grok-2-latest": { input: 2, output: 10 },
1104
- // Moonshot models
1105
- "moonshot-v1-8k": { input: 0.1, output: 0.1 },
1106
- "moonshot-v1-32k": { input: 0.2, output: 0.2 }
1107
- };
1108
- var BASELINE_MODEL = "claude-3-opus-latest";
1109
- function calculateCost(model, tokensIn, tokensOut) {
1110
- const modelName = model.includes(":") ? model.split(":")[1] : model;
1111
- const pricing = MODEL_PRICING[modelName] ?? MODEL_PRICING[model] ?? { input: 1, output: 3 };
1112
- const inputCost = tokensIn / 1e6 * pricing.input;
1113
- const outputCost = tokensOut / 1e6 * pricing.output;
1114
- return inputCost + outputCost;
1115
- }
1116
- function getModelPricing(model) {
1117
- const modelName = model.includes(":") ? model.split(":")[1] : model;
1118
- return MODEL_PRICING[modelName] ?? MODEL_PRICING[model] ?? null;
1119
- }
1120
- function calculateSavings(store, days = 30) {
1121
- const to = /* @__PURE__ */ new Date();
1122
- const from = /* @__PURE__ */ new Date();
1123
- from.setDate(from.getDate() - days);
1124
- const fromStr = from.toISOString();
1125
- const toStr = to.toISOString();
1126
- const runs = store.getRuns({
1127
- from: fromStr,
1128
- to: toStr,
1129
- limit: 1e5
1130
- // Get all runs
1131
- });
1132
- const byModel = {};
1133
- const byTaskType = {};
1134
- let totalTokensIn = 0;
1135
- let totalTokensOut = 0;
1136
- let actualCost = 0;
1137
- let baselineCost = 0;
1138
- const baselinePricing = MODEL_PRICING[BASELINE_MODEL] ?? { input: 15, output: 75 };
1139
- for (const run of runs) {
1140
- const tokensIn = run.tokensIn ?? 0;
1141
- const tokensOut = run.tokensOut ?? 0;
1142
- const modelName = run.model.includes(":") ? run.model.split(":")[1] ?? run.model : run.model;
1143
- const runCost = calculateCost(run.model, tokensIn, tokensOut);
1144
- actualCost += runCost;
1145
- const baselineRunCost = tokensIn / 1e6 * (baselinePricing?.input ?? 15) + tokensOut / 1e6 * (baselinePricing?.output ?? 75);
1146
- baselineCost += baselineRunCost;
1147
- totalTokensIn += tokensIn;
1148
- totalTokensOut += tokensOut;
1149
- if (!byModel[modelName]) {
1150
- byModel[modelName] = {
1151
- runs: 0,
1152
- tokensIn: 0,
1153
- tokensOut: 0,
1154
- cost: 0,
1155
- successRate: 0,
1156
- avgLatencyMs: 0
1157
- };
1158
- }
1159
- const modelStats = byModel[modelName];
1160
- modelStats.runs++;
1161
- modelStats.tokensIn += tokensIn;
1162
- modelStats.tokensOut += tokensOut;
1163
- modelStats.cost += runCost;
1164
- modelStats.avgLatencyMs += run.durationMs;
1165
- if (run.success) {
1166
- modelStats.successRate++;
1167
- }
1168
- if (!byTaskType[run.taskType]) {
1169
- byTaskType[run.taskType] = { runs: 0, cost: 0, totalCost: 0 };
1170
- }
1171
- const taskStats = byTaskType[run.taskType];
1172
- taskStats.runs++;
1173
- taskStats.totalCost += runCost;
1174
- }
1175
- for (const model of Object.keys(byModel)) {
1176
- const stats = byModel[model];
1177
- stats.successRate = stats.runs > 0 ? stats.successRate / stats.runs : 0;
1178
- stats.avgLatencyMs = stats.runs > 0 ? stats.avgLatencyMs / stats.runs : 0;
1179
- }
1180
- const byTaskTypeFinal = {};
1181
- for (const [taskType, stats] of Object.entries(byTaskType)) {
1182
- byTaskTypeFinal[taskType] = {
1183
- runs: stats.runs,
1184
- cost: stats.totalCost,
1185
- avgCostPerRun: stats.runs > 0 ? stats.totalCost / stats.runs : 0
1186
- };
1187
- }
1188
- const savings = baselineCost - actualCost;
1189
- const savingsPercent = baselineCost > 0 ? savings / baselineCost * 100 : 0;
1190
- return {
1191
- periodDays: days,
1192
- period: {
1193
- from: fromStr,
1194
- to: toStr
1195
- },
1196
- totalRuns: runs.length,
1197
- totalTokensIn,
1198
- totalTokensOut,
1199
- actualCost,
1200
- baselineCost,
1201
- savings: Math.max(0, savings),
1202
- // Don't report negative savings
1203
- savingsPercent: Math.max(0, savingsPercent),
1204
- byModel,
1205
- byTaskType: byTaskTypeFinal
1206
- };
1207
- }
1208
-
1209
- // src/learning/patterns.ts
1210
- var MIN_RUNS_FOR_SUGGESTION = 10;
1211
- var MIN_CONFIDENCE_THRESHOLD = 0.6;
1212
- var MIN_IMPROVEMENT_THRESHOLD = 0.1;
1213
- var MIN_COST_IMPROVEMENT_THRESHOLD = 0.2;
1214
- var PatternDetector = class {
1215
- store;
1216
- /**
1217
- * Creates a new PatternDetector.
1218
- *
1219
- * @param store - The storage instance to use
1220
- */
1221
- constructor(store) {
1222
- this.store = store;
1223
- }
1224
- /**
1225
- * Analyzes a task type and generates suggestions if appropriate.
1226
- *
1227
- * @param taskType - The task type to analyze
1228
- * @returns Array of suggestions
1229
- */
1230
- analyzeTaskType(taskType) {
1231
- const stats = this.store.getLearningStats(taskType);
1232
- if (stats.length < 2) {
1233
- return [];
1234
- }
1235
- const currentRule = this.store.getRule(taskType);
1236
- if (!currentRule) return [];
1237
- const currentModel = currentRule.preferredModel;
1238
- const currentStats = stats.find((s) => s.model === currentModel);
1239
- if (!currentStats) return [];
1240
- const currentModelName = currentModel.includes(":") ? currentModel.split(":")[1] : currentModel;
1241
- const currentPricing = MODEL_PRICING[currentModelName];
1242
- const suggestions = [];
1243
- for (const modelStats of stats) {
1244
- if (modelStats.model === currentModel) continue;
1245
- if (modelStats.runs < MIN_RUNS_FOR_SUGGESTION) continue;
1246
- const successImprovement = modelStats.outcomeSuccessRate - currentStats.outcomeSuccessRate;
1247
- const latencyImprovement = (currentStats.avgDurationMs - modelStats.avgDurationMs) / currentStats.avgDurationMs;
1248
- const suggestedModelName = modelStats.model.includes(":") ? modelStats.model.split(":")[1] : modelStats.model;
1249
- const suggestedPricing = MODEL_PRICING[suggestedModelName];
1250
- let costImprovement = 0;
1251
- if (currentPricing && suggestedPricing) {
1252
- const currentAvgCost = (currentPricing.input + currentPricing.output) / 2;
1253
- const suggestedAvgCost = (suggestedPricing.input + suggestedPricing.output) / 2;
1254
- costImprovement = (currentAvgCost - suggestedAvgCost) / currentAvgCost;
1255
- }
1256
- const isSignificantlyBetter = successImprovement > MIN_IMPROVEMENT_THRESHOLD || successImprovement >= 0 && latencyImprovement > MIN_IMPROVEMENT_THRESHOLD || successImprovement >= -0.05 && costImprovement > MIN_COST_IMPROVEMENT_THRESHOLD;
1257
- if (!isSignificantlyBetter) continue;
1258
- const sampleConfidence = Math.min(modelStats.runs / 50, 1);
1259
- const improvementConfidence = Math.min(
1260
- Math.abs(successImprovement) / 0.3 + Math.abs(latencyImprovement) / 0.5 + Math.abs(costImprovement) / 0.5,
1261
- 1
1262
- );
1263
- const confidence = (sampleConfidence + improvementConfidence) / 2;
1264
- if (confidence < MIN_CONFIDENCE_THRESHOLD) continue;
1265
- const reasons = [];
1266
- if (successImprovement > 0) {
1267
- reasons.push(`${(successImprovement * 100).toFixed(0)}% higher success rate`);
1268
- }
1269
- if (latencyImprovement > 0) {
1270
- reasons.push(`${(latencyImprovement * 100).toFixed(0)}% faster`);
1271
- }
1272
- if (costImprovement > 0) {
1273
- reasons.push(`${(costImprovement * 100).toFixed(0)}% cheaper`);
1274
- }
1275
- const suggestion = {
1276
- id: nanoid(),
1277
- taskType,
1278
- currentModel,
1279
- suggestedModel: modelStats.model,
1280
- reason: reasons.join(", "),
1281
- confidence,
1282
- expectedImprovement: {
1283
- successRate: successImprovement > 0 ? successImprovement : void 0,
1284
- latency: latencyImprovement > 0 ? latencyImprovement : void 0,
1285
- cost: costImprovement > 0 ? costImprovement : void 0
1286
- },
1287
- sampleCount: modelStats.runs,
1288
- createdAt: (/* @__PURE__ */ new Date()).toISOString()
1289
- };
1290
- suggestions.push(suggestion);
1291
- }
1292
- suggestions.sort((a, b) => b.confidence - a.confidence);
1293
- return suggestions;
1294
- }
1295
- /**
1296
- * Analyzes all task types and generates suggestions.
1297
- *
1298
- * @returns Array of all suggestions across task types
1299
- */
1300
- analyzeAll() {
1301
- const taskTypes = [
1302
- "code_generation",
1303
- "code_review",
1304
- "summarization",
1305
- "analysis",
1306
- "creative_writing",
1307
- "data_extraction",
1308
- "translation",
1309
- "question_answering",
1310
- "general"
1311
- ];
1312
- const allSuggestions = [];
1313
- for (const taskType of taskTypes) {
1314
- const suggestions = this.analyzeTaskType(taskType);
1315
- allSuggestions.push(...suggestions);
1316
- }
1317
- allSuggestions.sort((a, b) => b.confidence - a.confidence);
1318
- return allSuggestions;
1319
- }
1320
- /**
1321
- * Stores suggestions in the database.
1322
- *
1323
- * @param suggestions - The suggestions to store
1324
- * @returns Array of suggestion IDs
1325
- */
1326
- storeSuggestions(suggestions) {
1327
- const ids = [];
1328
- for (const suggestion of suggestions) {
1329
- const id = this.store.recordSuggestion({
1330
- taskType: suggestion.taskType,
1331
- currentModel: suggestion.currentModel,
1332
- suggestedModel: suggestion.suggestedModel,
1333
- reason: suggestion.reason,
1334
- confidence: suggestion.confidence,
1335
- expectedImprovement: JSON.stringify(suggestion.expectedImprovement),
1336
- sampleCount: suggestion.sampleCount,
1337
- accepted: null
1338
- });
1339
- ids.push(id);
1340
- }
1341
- return ids;
1342
- }
1343
- /**
1344
- * Generates and stores new suggestions, returning only new ones.
1345
- *
1346
- * @returns Array of new suggestions
1347
- */
1348
- generateSuggestions() {
1349
- const suggestions = this.analyzeAll();
1350
- const pending = this.store.getPendingSuggestions();
1351
- const existingKeys = new Set(
1352
- pending.map((s) => `${s.taskType}:${s.suggestedModel}`)
1353
- );
1354
- const newSuggestions = suggestions.filter(
1355
- (s) => !existingKeys.has(`${s.taskType}:${s.suggestedModel}`)
1356
- );
1357
- this.storeSuggestions(newSuggestions);
1358
- return newSuggestions;
1359
- }
1360
- };
1361
-
1362
- // src/relay.ts
1363
- var RelayPlane = class {
1364
- store;
1365
- _routing;
1366
- outcomeRecorder;
1367
- patternDetector;
1368
- config;
1369
- adapterRegistry = null;
1370
- /**
1371
- * Creates a new RelayPlane instance.
1372
- *
1373
- * @param config - Configuration options
1374
- */
1375
- constructor(config = {}) {
1376
- this.config = {
1377
- dbPath: config.dbPath ?? getDefaultDbPath(),
1378
- defaultProvider: config.defaultProvider ?? "local",
1379
- defaultModel: config.defaultModel ?? "llama3.2",
1380
- providers: config.providers ?? {}
1381
- };
1382
- this.store = new Store(this.config.dbPath);
1383
- this._routing = new RoutingEngine(this.store);
1384
- this.outcomeRecorder = new OutcomeRecorder(this.store);
1385
- this.patternDetector = new PatternDetector(this.store);
1386
- }
1387
- /**
1388
- * Gets the routing engine for direct access.
1389
- */
1390
- get routing() {
1391
- return this._routing;
1392
- }
1393
- /**
1394
- * Runs a prompt through the appropriate model.
1395
- *
1396
- * @param input - The run input
1397
- * @returns The run result
1398
- */
1399
- async run(input) {
1400
- const startTime = Date.now();
1401
- const resolved = this._routing.resolve(input.prompt, input.taskType, input.model);
1402
- const adapter = await this.getAdapter(resolved.provider);
1403
- if (!adapter) {
1404
- const runId2 = this.store.recordRun({
1405
- prompt: input.prompt,
1406
- systemPrompt: input.systemPrompt ?? null,
1407
- taskType: resolved.taskType,
1408
- model: resolved.model,
1409
- success: false,
1410
- output: null,
1411
- error: `No adapter configured for provider: ${resolved.provider}`,
1412
- durationMs: Date.now() - startTime,
1413
- tokensIn: null,
1414
- tokensOut: null,
1415
- costUsd: null,
1416
- metadata: input.metadata ? JSON.stringify(input.metadata) : null
1417
- });
1418
- return {
1419
- runId: runId2,
1420
- success: false,
1421
- error: `No adapter configured for provider: ${resolved.provider}`,
1422
- taskType: resolved.taskType,
1423
- model: resolved.model,
1424
- durationMs: Date.now() - startTime,
1425
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
1426
- };
1427
- }
1428
- const providerConfig = this.config.providers?.[resolved.provider];
1429
- const apiKey = providerConfig?.apiKey ?? this.getEnvApiKey(resolved.provider);
1430
- const fullInput = input.systemPrompt ? `${input.systemPrompt}
1431
-
1432
- ${input.prompt}` : input.prompt;
1433
- const result = await adapter.execute({
1434
- model: resolved.modelName,
1435
- input: fullInput,
1436
- apiKey: apiKey ?? "",
1437
- baseUrl: providerConfig?.baseUrl
1438
- });
1439
- const durationMs = Date.now() - startTime;
1440
- const tokensIn = result.tokensIn ?? 0;
1441
- const tokensOut = result.tokensOut ?? 0;
1442
- const costUsd = calculateCost(resolved.model, tokensIn, tokensOut);
1443
- const runId = this.store.recordRun({
1444
- prompt: input.prompt,
1445
- systemPrompt: input.systemPrompt ?? null,
1446
- taskType: resolved.taskType,
1447
- model: resolved.model,
1448
- success: result.success,
1449
- output: result.output ?? null,
1450
- error: result.error?.message ?? null,
1451
- durationMs,
1452
- tokensIn: result.tokensIn ?? null,
1453
- tokensOut: result.tokensOut ?? null,
1454
- costUsd: costUsd > 0 ? costUsd : null,
1455
- metadata: input.metadata ? JSON.stringify(input.metadata) : null
1456
- });
1457
- return {
1458
- runId,
1459
- success: result.success,
1460
- output: result.output,
1461
- error: result.error?.message,
1462
- taskType: resolved.taskType,
1463
- model: resolved.model,
1464
- durationMs,
1465
- tokensIn: result.tokensIn,
1466
- tokensOut: result.tokensOut,
1467
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
1468
- };
1469
- }
1470
- /**
1471
- * Gets an adapter for a provider.
1472
- * Note: In the standalone proxy package, adapters are not used.
1473
- * The proxy handles provider calls directly via HTTP.
1474
- */
1475
- async getAdapter(_provider) {
1476
- return null;
1477
- }
1478
- /**
1479
- * Gets an API key from environment variables.
1480
- */
1481
- getEnvApiKey(provider) {
1482
- const envVars = {
1483
- openai: "OPENAI_API_KEY",
1484
- anthropic: "ANTHROPIC_API_KEY",
1485
- google: "GOOGLE_API_KEY",
1486
- xai: "XAI_API_KEY",
1487
- moonshot: "MOONSHOT_API_KEY",
1488
- local: ""
1489
- };
1490
- const envVar = envVars[provider];
1491
- return envVar ? process.env[envVar] : void 0;
1492
- }
1493
- /**
1494
- * Records an outcome for a run.
1495
- *
1496
- * @param runId - The run ID
1497
- * @param outcome - The outcome details
1498
- * @returns The recorded outcome
1499
- */
1500
- recordOutcome(runId, outcome) {
1501
- return this.outcomeRecorder.record({
1502
- runId,
1503
- ...outcome
1504
- });
1505
- }
1506
- /**
1507
- * Gets an outcome for a run.
1508
- *
1509
- * @param runId - The run ID
1510
- * @returns The outcome, or null if not found
1511
- */
1512
- getOutcome(runId) {
1513
- return this.outcomeRecorder.get(runId);
1514
- }
1515
- /**
1516
- * Gets statistics for runs.
1517
- *
1518
- * @param options - Optional filters
1519
- * @returns Statistics object
1520
- */
1521
- stats(options) {
1522
- const raw = this.store.getStats(options);
1523
- const byTaskType = {};
1524
- const taskTypes = [
1525
- "code_generation",
1526
- "code_review",
1527
- "summarization",
1528
- "analysis",
1529
- "creative_writing",
1530
- "data_extraction",
1531
- "translation",
1532
- "question_answering",
1533
- "general"
1534
- ];
1535
- for (const taskType of taskTypes) {
1536
- const taskStats = raw.byTaskType[taskType];
1537
- byTaskType[taskType] = {
1538
- taskType,
1539
- totalRuns: taskStats?.runs ?? 0,
1540
- successfulRuns: Math.round((taskStats?.runs ?? 0) * (taskStats?.successRate ?? 0)),
1541
- successRate: taskStats?.successRate ?? 0,
1542
- avgDurationMs: taskStats?.avgDurationMs ?? 0,
1543
- byModel: {}
1544
- };
1545
- }
1546
- for (const [model, modelStats] of Object.entries(raw.byModel)) {
1547
- for (const taskType of taskTypes) {
1548
- if (!byTaskType[taskType].byModel[model]) {
1549
- byTaskType[taskType].byModel[model] = {
1550
- runs: 0,
1551
- successRate: 0,
1552
- avgDurationMs: 0
1553
- };
1554
- }
1555
- }
1556
- }
1557
- return {
1558
- totalRuns: raw.totalRuns,
1559
- overallSuccessRate: raw.totalRuns > 0 ? raw.successfulRuns / raw.totalRuns : 0,
1560
- byTaskType,
1561
- period: {
1562
- from: options?.from ?? "",
1563
- to: options?.to ?? (/* @__PURE__ */ new Date()).toISOString()
1564
- }
1565
- };
1566
- }
1567
- /**
1568
- * Gets a savings report.
1569
- *
1570
- * @param days - Number of days to include (default: 30)
1571
- * @returns Savings report
1572
- */
1573
- savingsReport(days = 30) {
1574
- return calculateSavings(this.store, days);
1575
- }
1576
- /**
1577
- * Gets routing improvement suggestions.
1578
- *
1579
- * @returns Array of suggestions
1580
- */
1581
- getSuggestions() {
1582
- const pending = this.store.getPendingSuggestions();
1583
- return pending.map((record) => ({
1584
- id: record.id,
1585
- taskType: record.taskType,
1586
- currentModel: record.currentModel,
1587
- suggestedModel: record.suggestedModel,
1588
- reason: record.reason,
1589
- confidence: record.confidence,
1590
- expectedImprovement: JSON.parse(record.expectedImprovement),
1591
- sampleCount: record.sampleCount,
1592
- createdAt: record.createdAt,
1593
- accepted: record.accepted ?? void 0,
1594
- acceptedAt: record.acceptedAt ?? void 0
1595
- }));
1596
- }
1597
- /**
1598
- * Generates new suggestions based on current data.
1599
- *
1600
- * @returns Array of newly generated suggestions
1601
- */
1602
- generateSuggestions() {
1603
- return this.patternDetector.generateSuggestions();
1604
- }
1605
- /**
1606
- * Accepts a suggestion and updates routing.
1607
- *
1608
- * @param suggestionId - The suggestion ID to accept
1609
- * @returns True if successful
1610
- */
1611
- acceptSuggestion(suggestionId) {
1612
- return this.store.acceptSuggestion(suggestionId);
1613
- }
1614
- /**
1615
- * Rejects a suggestion.
1616
- *
1617
- * @param suggestionId - The suggestion ID to reject
1618
- * @returns True if successful
1619
- */
1620
- rejectSuggestion(suggestionId) {
1621
- return this.store.rejectSuggestion(suggestionId);
1622
- }
1623
- /**
1624
- * Closes the RelayPlane instance and releases resources.
1625
- */
1626
- close() {
1627
- this.store.close();
1628
- }
1629
- };
1630
-
1631
- // src/config.ts
1632
- var fs2 = __toESM(require("fs"));
1633
- var path2 = __toESM(require("path"));
1634
- var os2 = __toESM(require("os"));
1635
- var import_zod = require("zod");
1636
- var StrategySchema = import_zod.z.object({
1637
- model: import_zod.z.string(),
1638
- minConfidence: import_zod.z.number().min(0).max(1).optional(),
1639
- fallback: import_zod.z.string().optional()
1640
- });
1641
- var AuthSchema = import_zod.z.object({
1642
- anthropicApiKey: import_zod.z.string().optional(),
1643
- anthropicMaxToken: import_zod.z.string().optional(),
1644
- useMaxForModels: import_zod.z.array(import_zod.z.string()).optional()
1645
- // Default: ['opus']
1646
- }).optional();
1647
- var ConfigSchema = import_zod.z.object({
1648
- strategies: import_zod.z.record(import_zod.z.string(), StrategySchema).optional(),
1649
- defaults: import_zod.z.object({
1650
- qualityModel: import_zod.z.string().optional(),
1651
- costModel: import_zod.z.string().optional()
1652
- }).optional(),
1653
- auth: AuthSchema
1654
- });
1655
- var DEFAULT_CONFIG = {
1656
- strategies: {
1657
- code_review: { model: "anthropic:claude-sonnet-4-20250514" },
1658
- code_generation: { model: "anthropic:claude-3-5-haiku-latest" },
1659
- analysis: { model: "anthropic:claude-sonnet-4-20250514" },
1660
- summarization: { model: "anthropic:claude-3-5-haiku-latest" },
1661
- creative_writing: { model: "anthropic:claude-sonnet-4-20250514" },
1662
- data_extraction: { model: "anthropic:claude-3-5-haiku-latest" },
1663
- translation: { model: "anthropic:claude-3-5-haiku-latest" },
1664
- question_answering: { model: "anthropic:claude-3-5-haiku-latest" },
1665
- general: { model: "anthropic:claude-3-5-haiku-latest" }
1666
- },
1667
- defaults: {
1668
- qualityModel: "claude-sonnet-4-20250514",
1669
- costModel: "claude-3-5-haiku-latest"
1670
- }
1671
- };
1672
- function getConfigPath() {
1673
- return path2.join(os2.homedir(), ".relayplane", "config.json");
1674
- }
1675
- function writeDefaultConfig() {
1676
- const configPath = getConfigPath();
1677
- const dir = path2.dirname(configPath);
1678
- if (!fs2.existsSync(dir)) {
1679
- fs2.mkdirSync(dir, { recursive: true });
1680
- }
1681
- if (!fs2.existsSync(configPath)) {
1682
- fs2.writeFileSync(
1683
- configPath,
1684
- JSON.stringify(DEFAULT_CONFIG, null, 2) + "\n",
1685
- "utf-8"
1686
- );
1687
- console.log(`[relayplane] Created default config at ${configPath}`);
1688
- }
1689
- }
1690
- function loadConfig() {
1691
- const configPath = getConfigPath();
1692
- writeDefaultConfig();
1693
- try {
1694
- const raw = fs2.readFileSync(configPath, "utf-8");
1695
- const parsed = JSON.parse(raw);
1696
- const validated = ConfigSchema.parse(parsed);
1697
- return validated;
1698
- } catch (err) {
1699
- if (err instanceof import_zod.z.ZodError) {
1700
- console.error(`[relayplane] Invalid config: ${err.message}`);
1701
- } else if (err instanceof SyntaxError) {
1702
- console.error(`[relayplane] Config JSON parse error: ${err.message}`);
1703
- } else {
1704
- console.error(`[relayplane] Failed to load config: ${err}`);
1705
- }
1706
- console.log("[relayplane] Using default config");
1707
- return DEFAULT_CONFIG;
1708
- }
1709
- }
1710
- function getStrategy(config, taskType) {
1711
- return config.strategies?.[taskType] ?? null;
1712
- }
1713
- function getAnthropicAuth(config, model) {
1714
- const auth = config.auth;
1715
- const useMaxForModels = auth?.useMaxForModels ?? ["opus"];
1716
- const shouldUseMax = useMaxForModels.some((m) => model.toLowerCase().includes(m.toLowerCase()));
1717
- if (shouldUseMax && auth?.anthropicMaxToken) {
1718
- return { type: "max", value: auth.anthropicMaxToken };
1719
- }
1720
- const apiKey = auth?.anthropicApiKey ?? process.env["ANTHROPIC_API_KEY"];
1721
- if (apiKey) {
1722
- return { type: "apiKey", value: apiKey };
1723
- }
1724
- return null;
1725
- }
1726
- function watchConfig(onChange) {
1727
- const configPath = getConfigPath();
1728
- const dir = path2.dirname(configPath);
1729
- if (!fs2.existsSync(dir)) {
1730
- fs2.mkdirSync(dir, { recursive: true });
1731
- }
1732
- let debounceTimer = null;
1733
- fs2.watch(dir, (eventType, filename) => {
1734
- if (filename === "config.json") {
1735
- if (debounceTimer) clearTimeout(debounceTimer);
1736
- debounceTimer = setTimeout(() => {
1737
- console.log("[relayplane] Config file changed, reloading...");
1738
- const newConfig = loadConfig();
1739
- onChange(newConfig);
1740
- }, 100);
1741
- }
1742
- });
1743
- }
1744
-
1745
- // src/proxy.ts
1746
- var VERSION = "0.1.9";
1747
- var recentRuns = [];
1748
- var MAX_RECENT_RUNS = 100;
1749
- var modelCounts = {};
1750
- var serverStartTime = 0;
1751
- var currentConfig = loadConfig();
1752
- var DEFAULT_ENDPOINTS = {
1753
- anthropic: {
1754
- baseUrl: "https://api.anthropic.com/v1",
1755
- apiKeyEnv: "ANTHROPIC_API_KEY"
1756
- },
1757
- openai: {
1758
- baseUrl: "https://api.openai.com/v1",
1759
- apiKeyEnv: "OPENAI_API_KEY"
1760
- },
1761
- google: {
1762
- baseUrl: "https://generativelanguage.googleapis.com/v1beta",
1763
- apiKeyEnv: "GEMINI_API_KEY"
1764
- },
1765
- xai: {
1766
- baseUrl: "https://api.x.ai/v1",
1767
- apiKeyEnv: "XAI_API_KEY"
1768
- },
1769
- moonshot: {
1770
- baseUrl: "https://api.moonshot.cn/v1",
1771
- apiKeyEnv: "MOONSHOT_API_KEY"
1772
- }
1773
- };
1774
- var MODEL_MAPPING = {
1775
- // Anthropic models (using correct API model IDs)
1776
- "claude-opus-4-5": { provider: "anthropic", model: "claude-opus-4-5-20250514" },
1777
- "claude-sonnet-4": { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1778
- "claude-3-5-sonnet": { provider: "anthropic", model: "claude-3-5-sonnet-20241022" },
1779
- "claude-3-5-haiku": { provider: "anthropic", model: "claude-3-5-haiku-20241022" },
1780
- haiku: { provider: "anthropic", model: "claude-3-5-haiku-20241022" },
1781
- sonnet: { provider: "anthropic", model: "claude-3-5-sonnet-20241022" },
1782
- opus: { provider: "anthropic", model: "claude-3-opus-20240229" },
1783
- // OpenAI models
1784
- "gpt-4o": { provider: "openai", model: "gpt-4o" },
1785
- "gpt-4o-mini": { provider: "openai", model: "gpt-4o-mini" },
1786
- "gpt-4.1": { provider: "openai", model: "gpt-4.1" }
1787
- };
1788
- var DEFAULT_ROUTING = {
1789
- // Complex tasks → Sonnet (need reasoning & quality)
1790
- code_review: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1791
- analysis: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1792
- creative_writing: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1793
- // Medium tasks → Sonnet (benefit from better model)
1794
- code_generation: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1795
- // Simple tasks → Haiku (cost efficient)
1796
- summarization: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1797
- data_extraction: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1798
- translation: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1799
- question_answering: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1800
- general: { provider: "anthropic", model: "claude-3-5-haiku-latest" }
1801
- };
1802
- function extractPromptText(messages) {
1803
- return messages.map((msg) => {
1804
- if (typeof msg.content === "string") return msg.content;
1805
- if (Array.isArray(msg.content)) {
1806
- return msg.content.map((c) => {
1807
- const part = c;
1808
- return part.type === "text" ? part.text ?? "" : "";
1809
- }).join(" ");
1810
- }
1811
- return "";
1812
- }).join("\n");
1813
- }
1814
- async function forwardToAnthropic(request, targetModel, auth, betaHeaders) {
1815
- const anthropicBody = buildAnthropicBody(request, targetModel, false);
1816
- const headers = {
1817
- "Content-Type": "application/json",
1818
- "anthropic-version": "2023-06-01"
1819
- };
1820
- if (auth.type === "max") {
1821
- headers["Authorization"] = `Bearer ${auth.value}`;
1822
- } else {
1823
- headers["x-api-key"] = auth.value;
1824
- }
1825
- if (betaHeaders) {
1826
- headers["anthropic-beta"] = betaHeaders;
1827
- }
1828
- const response = await fetch("https://api.anthropic.com/v1/messages", {
1829
- method: "POST",
1830
- headers,
1831
- body: JSON.stringify(anthropicBody)
1832
- });
1833
- return response;
1834
- }
1835
- async function forwardToAnthropicStream(request, targetModel, auth, betaHeaders) {
1836
- const anthropicBody = buildAnthropicBody(request, targetModel, true);
1837
- const headers = {
1838
- "Content-Type": "application/json",
1839
- "anthropic-version": "2023-06-01"
1840
- };
1841
- if (auth.type === "max") {
1842
- headers["Authorization"] = `Bearer ${auth.value}`;
1843
- } else {
1844
- headers["x-api-key"] = auth.value;
1845
- }
1846
- if (betaHeaders) {
1847
- headers["anthropic-beta"] = betaHeaders;
1848
- }
1849
- const response = await fetch("https://api.anthropic.com/v1/messages", {
1850
- method: "POST",
1851
- headers,
1852
- body: JSON.stringify(anthropicBody)
1853
- });
1854
- return response;
1855
- }
1856
- function convertMessagesToAnthropic(messages) {
1857
- const result = [];
1858
- for (const msg of messages) {
1859
- const m = msg;
1860
- if (m.role === "system") continue;
1861
- if (m.role === "tool") {
1862
- result.push({
1863
- role: "user",
1864
- content: [
1865
- {
1866
- type: "tool_result",
1867
- tool_use_id: m.tool_call_id,
1868
- content: typeof m.content === "string" ? m.content : JSON.stringify(m.content)
1869
- }
1870
- ]
1871
- });
1872
- continue;
1873
- }
1874
- if (m.role === "assistant" && m.tool_calls && m.tool_calls.length > 0) {
1875
- const content = [];
1876
- if (m.content && typeof m.content === "string") {
1877
- content.push({ type: "text", text: m.content });
1878
- }
1879
- for (const tc of m.tool_calls) {
1880
- content.push({
1881
- type: "tool_use",
1882
- id: tc.id,
1883
- name: tc.function.name,
1884
- input: JSON.parse(tc.function.arguments || "{}")
1885
- });
1886
- }
1887
- result.push({ role: "assistant", content });
1888
- continue;
1889
- }
1890
- result.push({
1891
- role: m.role === "assistant" ? "assistant" : "user",
1892
- content: m.content
1893
- });
1894
- }
1895
- return result;
1896
- }
1897
- function buildAnthropicBody(request, targetModel, stream) {
1898
- const anthropicMessages = convertMessagesToAnthropic(request.messages);
1899
- const systemMessage = request.messages.find((m) => m.role === "system");
1900
- const anthropicBody = {
1901
- model: targetModel,
1902
- messages: anthropicMessages,
1903
- max_tokens: request.max_tokens ?? 4096,
1904
- stream
1905
- };
1906
- if (systemMessage) {
1907
- anthropicBody["system"] = systemMessage.content;
1908
- }
1909
- if (request.temperature !== void 0) {
1910
- anthropicBody["temperature"] = request.temperature;
1911
- }
1912
- if (request.tools && Array.isArray(request.tools)) {
1913
- anthropicBody["tools"] = convertToolsToAnthropic(request.tools);
1914
- }
1915
- if (request.tool_choice) {
1916
- anthropicBody["tool_choice"] = convertToolChoiceToAnthropic(request.tool_choice);
1917
- }
1918
- return anthropicBody;
1919
- }
1920
- function convertToolsToAnthropic(tools) {
1921
- return tools.map((tool) => {
1922
- const t = tool;
1923
- if (t.type === "function" && t.function) {
1924
- return {
1925
- name: t.function.name,
1926
- description: t.function.description,
1927
- input_schema: t.function.parameters || { type: "object", properties: {} }
1928
- };
1929
- }
1930
- return tool;
1931
- });
1932
- }
1933
- function convertToolChoiceToAnthropic(toolChoice) {
1934
- if (toolChoice === "auto") return { type: "auto" };
1935
- if (toolChoice === "none") return { type: "none" };
1936
- if (toolChoice === "required") return { type: "any" };
1937
- const tc = toolChoice;
1938
- if (tc.type === "function" && tc.function?.name) {
1939
- return { type: "tool", name: tc.function.name };
1940
- }
1941
- return toolChoice;
1942
- }
1943
- async function forwardToOpenAI(request, targetModel, apiKey) {
1944
- const openaiBody = {
1945
- ...request,
1946
- model: targetModel,
1947
- stream: false
1948
- };
1949
- const response = await fetch("https://api.openai.com/v1/chat/completions", {
1950
- method: "POST",
1951
- headers: {
1952
- "Content-Type": "application/json",
1953
- Authorization: `Bearer ${apiKey}`
1954
- },
1955
- body: JSON.stringify(openaiBody)
1956
- });
1957
- return response;
1958
- }
1959
- async function forwardToOpenAIStream(request, targetModel, apiKey) {
1960
- const openaiBody = {
1961
- ...request,
1962
- model: targetModel,
1963
- stream: true
1964
- };
1965
- const response = await fetch("https://api.openai.com/v1/chat/completions", {
1966
- method: "POST",
1967
- headers: {
1968
- "Content-Type": "application/json",
1969
- Authorization: `Bearer ${apiKey}`
1970
- },
1971
- body: JSON.stringify(openaiBody)
1972
- });
1973
- return response;
1974
- }
1975
- async function forwardToXAI(request, targetModel, apiKey) {
1976
- const xaiBody = {
1977
- ...request,
1978
- model: targetModel,
1979
- stream: false
1980
- };
1981
- const response = await fetch("https://api.x.ai/v1/chat/completions", {
1982
- method: "POST",
1983
- headers: {
1984
- "Content-Type": "application/json",
1985
- Authorization: `Bearer ${apiKey}`
1986
- },
1987
- body: JSON.stringify(xaiBody)
1988
- });
1989
- return response;
1990
- }
1991
- async function forwardToXAIStream(request, targetModel, apiKey) {
1992
- const xaiBody = {
1993
- ...request,
1994
- model: targetModel,
1995
- stream: true
1996
- };
1997
- const response = await fetch("https://api.x.ai/v1/chat/completions", {
1998
- method: "POST",
1999
- headers: {
2000
- "Content-Type": "application/json",
2001
- Authorization: `Bearer ${apiKey}`
2002
- },
2003
- body: JSON.stringify(xaiBody)
2004
- });
2005
- return response;
2006
- }
2007
- async function forwardToMoonshot(request, targetModel, apiKey) {
2008
- const moonshotBody = {
2009
- ...request,
2010
- model: targetModel,
2011
- stream: false
2012
- };
2013
- const response = await fetch("https://api.moonshot.cn/v1/chat/completions", {
2014
- method: "POST",
2015
- headers: {
2016
- "Content-Type": "application/json",
2017
- Authorization: `Bearer ${apiKey}`
2018
- },
2019
- body: JSON.stringify(moonshotBody)
2020
- });
2021
- return response;
2022
- }
2023
- async function forwardToMoonshotStream(request, targetModel, apiKey) {
2024
- const moonshotBody = {
2025
- ...request,
2026
- model: targetModel,
2027
- stream: true
2028
- };
2029
- const response = await fetch("https://api.moonshot.cn/v1/chat/completions", {
2030
- method: "POST",
2031
- headers: {
2032
- "Content-Type": "application/json",
2033
- Authorization: `Bearer ${apiKey}`
2034
- },
2035
- body: JSON.stringify(moonshotBody)
2036
- });
2037
- return response;
2038
- }
2039
- function convertMessagesToGemini(messages) {
2040
- const geminiContents = [];
2041
- for (const msg of messages) {
2042
- if (msg.role === "system") continue;
2043
- const role = msg.role === "assistant" ? "model" : "user";
2044
- if (typeof msg.content === "string") {
2045
- geminiContents.push({
2046
- role,
2047
- parts: [{ text: msg.content }]
2048
- });
2049
- } else if (Array.isArray(msg.content)) {
2050
- const parts = msg.content.map((part) => {
2051
- const p = part;
2052
- if (p.type === "text") {
2053
- return { text: p.text };
2054
- }
2055
- if (p.type === "image_url" && p.image_url?.url) {
2056
- const url2 = p.image_url.url;
2057
- if (url2.startsWith("data:")) {
2058
- const match = url2.match(/^data:([^;]+);base64,(.+)$/);
2059
- if (match) {
2060
- return {
2061
- inline_data: {
2062
- mime_type: match[1],
2063
- data: match[2]
2064
- }
2065
- };
2066
- }
2067
- }
2068
- return { text: `[Image: ${url2}]` };
2069
- }
2070
- return { text: "" };
2071
- });
2072
- geminiContents.push({ role, parts });
2073
- }
2074
- }
2075
- return geminiContents;
2076
- }
2077
- async function forwardToGemini(request, targetModel, apiKey) {
2078
- const systemMessage = request.messages.find((m) => m.role === "system");
2079
- const geminiContents = convertMessagesToGemini(request.messages);
2080
- const geminiBody = {
2081
- contents: geminiContents,
2082
- generationConfig: {
2083
- maxOutputTokens: request.max_tokens ?? 4096
2084
- }
2085
- };
2086
- if (request.temperature !== void 0) {
2087
- geminiBody["generationConfig"]["temperature"] = request.temperature;
2088
- }
2089
- if (systemMessage && typeof systemMessage.content === "string") {
2090
- geminiBody["systemInstruction"] = {
2091
- parts: [{ text: systemMessage.content }]
2092
- };
2093
- }
2094
- const response = await fetch(
2095
- `https://generativelanguage.googleapis.com/v1beta/models/${targetModel}:generateContent?key=${apiKey}`,
2096
- {
2097
- method: "POST",
2098
- headers: {
2099
- "Content-Type": "application/json"
2100
- },
2101
- body: JSON.stringify(geminiBody)
2102
- }
2103
- );
2104
- return response;
2105
- }
2106
- async function forwardToGeminiStream(request, targetModel, apiKey) {
2107
- const systemMessage = request.messages.find((m) => m.role === "system");
2108
- const geminiContents = convertMessagesToGemini(request.messages);
2109
- const geminiBody = {
2110
- contents: geminiContents,
2111
- generationConfig: {
2112
- maxOutputTokens: request.max_tokens ?? 4096
2113
- }
2114
- };
2115
- if (request.temperature !== void 0) {
2116
- geminiBody["generationConfig"]["temperature"] = request.temperature;
2117
- }
2118
- if (systemMessage && typeof systemMessage.content === "string") {
2119
- geminiBody["systemInstruction"] = {
2120
- parts: [{ text: systemMessage.content }]
2121
- };
2122
- }
2123
- const response = await fetch(
2124
- `https://generativelanguage.googleapis.com/v1beta/models/${targetModel}:streamGenerateContent?alt=sse&key=${apiKey}`,
2125
- {
2126
- method: "POST",
2127
- headers: {
2128
- "Content-Type": "application/json"
2129
- },
2130
- body: JSON.stringify(geminiBody)
2131
- }
2132
- );
2133
- return response;
2134
- }
2135
- function convertGeminiResponse(geminiData, model) {
2136
- const candidate = geminiData.candidates?.[0];
2137
- const text = candidate?.content?.parts?.map((p) => p.text ?? "").join("") ?? "";
2138
- let finishReason = "stop";
2139
- if (candidate?.finishReason === "MAX_TOKENS") {
2140
- finishReason = "length";
2141
- } else if (candidate?.finishReason === "SAFETY") {
2142
- finishReason = "content_filter";
2143
- }
2144
- return {
2145
- id: `chatcmpl-${Date.now()}`,
2146
- object: "chat.completion",
2147
- created: Math.floor(Date.now() / 1e3),
2148
- model,
2149
- choices: [
2150
- {
2151
- index: 0,
2152
- message: {
2153
- role: "assistant",
2154
- content: text
2155
- },
2156
- finish_reason: finishReason
2157
- }
2158
- ],
2159
- usage: {
2160
- prompt_tokens: geminiData.usageMetadata?.promptTokenCount ?? 0,
2161
- completion_tokens: geminiData.usageMetadata?.candidatesTokenCount ?? 0,
2162
- total_tokens: (geminiData.usageMetadata?.promptTokenCount ?? 0) + (geminiData.usageMetadata?.candidatesTokenCount ?? 0)
2163
- }
2164
- };
2165
- }
2166
- function convertGeminiStreamEvent(eventData, messageId, model, isFirst) {
2167
- const candidate = eventData.candidates?.[0];
2168
- const text = candidate?.content?.parts?.map((p) => p.text ?? "").join("") ?? "";
2169
- const choice = {
2170
- index: 0,
2171
- delta: {},
2172
- finish_reason: null
2173
- };
2174
- if (isFirst) {
2175
- choice["delta"] = { role: "assistant", content: text };
2176
- } else if (text) {
2177
- choice["delta"] = { content: text };
2178
- }
2179
- if (candidate?.finishReason) {
2180
- let finishReason = "stop";
2181
- if (candidate.finishReason === "MAX_TOKENS") {
2182
- finishReason = "length";
2183
- } else if (candidate.finishReason === "SAFETY") {
2184
- finishReason = "content_filter";
2185
- }
2186
- choice["finish_reason"] = finishReason;
2187
- }
2188
- const chunk = {
2189
- id: messageId,
2190
- object: "chat.completion.chunk",
2191
- created: Math.floor(Date.now() / 1e3),
2192
- model,
2193
- choices: [choice]
2194
- };
2195
- return `data: ${JSON.stringify(chunk)}
2196
-
2197
- `;
2198
- }
2199
- async function* convertGeminiStream(response, model) {
2200
- const reader = response.body?.getReader();
2201
- if (!reader) {
2202
- throw new Error("No response body");
2203
- }
2204
- const decoder = new TextDecoder();
2205
- let buffer = "";
2206
- const messageId = `chatcmpl-${Date.now()}`;
2207
- let isFirst = true;
2208
- try {
2209
- while (true) {
2210
- const { done, value } = await reader.read();
2211
- if (done) break;
2212
- buffer += decoder.decode(value, { stream: true });
2213
- const lines = buffer.split("\n");
2214
- buffer = lines.pop() || "";
2215
- for (const line of lines) {
2216
- if (line.startsWith("data: ")) {
2217
- const jsonStr = line.slice(6);
2218
- if (jsonStr.trim() === "[DONE]") {
2219
- yield "data: [DONE]\n\n";
2220
- continue;
2221
- }
2222
- try {
2223
- const parsed = JSON.parse(jsonStr);
2224
- const converted = convertGeminiStreamEvent(parsed, messageId, model, isFirst);
2225
- if (converted) {
2226
- yield converted;
2227
- isFirst = false;
2228
- }
2229
- } catch {
2230
- }
2231
- }
2232
- }
2233
- }
2234
- yield "data: [DONE]\n\n";
2235
- } finally {
2236
- reader.releaseLock();
2237
- }
2238
- }
2239
- function convertAnthropicResponse(anthropicData) {
2240
- const textBlocks = anthropicData.content?.filter((c) => c.type === "text") ?? [];
2241
- const toolBlocks = anthropicData.content?.filter((c) => c.type === "tool_use") ?? [];
2242
- const textContent = textBlocks.map((c) => c.text ?? "").join("");
2243
- const message = {
2244
- role: "assistant",
2245
- content: textContent || null
2246
- };
2247
- if (toolBlocks.length > 0) {
2248
- message["tool_calls"] = toolBlocks.map((block) => ({
2249
- id: block.id || `call_${Date.now()}`,
2250
- type: "function",
2251
- function: {
2252
- name: block.name,
2253
- arguments: typeof block.input === "string" ? block.input : JSON.stringify(block.input ?? {})
2254
- }
2255
- }));
2256
- }
2257
- let finishReason = "stop";
2258
- if (anthropicData.stop_reason === "tool_use") {
2259
- finishReason = "tool_calls";
2260
- } else if (anthropicData.stop_reason === "end_turn") {
2261
- finishReason = "stop";
2262
- } else if (anthropicData.stop_reason) {
2263
- finishReason = anthropicData.stop_reason;
2264
- }
2265
- return {
2266
- id: anthropicData.id || `chatcmpl-${Date.now()}`,
2267
- object: "chat.completion",
2268
- created: Math.floor(Date.now() / 1e3),
2269
- model: anthropicData.model,
2270
- choices: [
2271
- {
2272
- index: 0,
2273
- message,
2274
- finish_reason: finishReason
2275
- }
2276
- ],
2277
- usage: {
2278
- prompt_tokens: anthropicData.usage?.input_tokens ?? 0,
2279
- completion_tokens: anthropicData.usage?.output_tokens ?? 0,
2280
- total_tokens: (anthropicData.usage?.input_tokens ?? 0) + (anthropicData.usage?.output_tokens ?? 0)
2281
- }
2282
- };
2283
- }
2284
- function convertAnthropicStreamEvent(eventType, eventData, messageId, model, toolState) {
2285
- const choice = { index: 0, delta: {}, finish_reason: null };
2286
- const baseChunk = {
2287
- id: messageId,
2288
- object: "chat.completion.chunk",
2289
- created: Math.floor(Date.now() / 1e3),
2290
- model,
2291
- choices: [choice]
2292
- };
2293
- switch (eventType) {
2294
- case "message_start": {
2295
- const msg = eventData["message"];
2296
- baseChunk.id = msg?.["id"] || messageId;
2297
- choice.delta = { role: "assistant", content: "" };
2298
- return `data: ${JSON.stringify(baseChunk)}
2299
-
2300
- `;
2301
- }
2302
- case "content_block_start": {
2303
- const contentBlock = eventData["content_block"];
2304
- const blockIndex = eventData["index"];
2305
- if (contentBlock?.["type"] === "tool_use") {
2306
- const toolId = contentBlock["id"];
2307
- const toolName = contentBlock["name"];
2308
- toolState.tools.set(blockIndex ?? toolState.currentToolIndex, {
2309
- id: toolId,
2310
- name: toolName,
2311
- arguments: ""
2312
- });
2313
- toolState.currentToolIndex = blockIndex ?? toolState.currentToolIndex;
2314
- choice.delta = {
2315
- tool_calls: [{
2316
- index: blockIndex ?? 0,
2317
- id: toolId,
2318
- type: "function",
2319
- function: { name: toolName, arguments: "" }
2320
- }]
2321
- };
2322
- return `data: ${JSON.stringify(baseChunk)}
2323
-
2324
- `;
2325
- }
2326
- return null;
2327
- }
2328
- case "content_block_delta": {
2329
- const delta = eventData["delta"];
2330
- const blockIndex = eventData["index"];
2331
- if (delta?.["type"] === "text_delta") {
2332
- choice.delta = { content: delta["text"] };
2333
- return `data: ${JSON.stringify(baseChunk)}
2334
-
2335
- `;
2336
- }
2337
- if (delta?.["type"] === "input_json_delta") {
2338
- const partialJson = delta["partial_json"] || "";
2339
- const tool = toolState.tools.get(blockIndex ?? toolState.currentToolIndex);
2340
- if (tool) {
2341
- tool.arguments += partialJson;
2342
- }
2343
- choice.delta = {
2344
- tool_calls: [{
2345
- index: blockIndex ?? 0,
2346
- function: { arguments: partialJson }
2347
- }]
2348
- };
2349
- return `data: ${JSON.stringify(baseChunk)}
2350
-
2351
- `;
2352
- }
2353
- return null;
2354
- }
2355
- case "message_delta": {
2356
- const delta = eventData["delta"];
2357
- const stopReason = delta?.["stop_reason"];
2358
- if (stopReason === "tool_use") {
2359
- choice.finish_reason = "tool_calls";
2360
- } else if (stopReason === "end_turn") {
2361
- choice.finish_reason = "stop";
2362
- } else {
2363
- choice.finish_reason = stopReason || "stop";
2364
- }
2365
- choice.delta = {};
2366
- return `data: ${JSON.stringify(baseChunk)}
2367
-
2368
- `;
2369
- }
2370
- case "message_stop": {
2371
- return "data: [DONE]\n\n";
2372
- }
2373
- default:
2374
- return null;
2375
- }
2376
- }
2377
- async function* convertAnthropicStream(response, model) {
2378
- const reader = response.body?.getReader();
2379
- if (!reader) {
2380
- throw new Error("No response body");
2381
- }
2382
- const decoder = new TextDecoder();
2383
- let buffer = "";
2384
- let messageId = `chatcmpl-${Date.now()}`;
2385
- const toolState = {
2386
- currentToolIndex: 0,
2387
- tools: /* @__PURE__ */ new Map()
2388
- };
2389
- try {
2390
- while (true) {
2391
- const { done, value } = await reader.read();
2392
- if (done) break;
2393
- buffer += decoder.decode(value, { stream: true });
2394
- const lines = buffer.split("\n");
2395
- buffer = lines.pop() || "";
2396
- let eventType = "";
2397
- let eventData = "";
2398
- for (const line of lines) {
2399
- if (line.startsWith("event: ")) {
2400
- eventType = line.slice(7).trim();
2401
- } else if (line.startsWith("data: ")) {
2402
- eventData = line.slice(6);
2403
- } else if (line === "" && eventType && eventData) {
2404
- try {
2405
- const parsed = JSON.parse(eventData);
2406
- const converted = convertAnthropicStreamEvent(eventType, parsed, messageId, model, toolState);
2407
- if (converted) {
2408
- yield converted;
2409
- }
2410
- } catch {
2411
- }
2412
- eventType = "";
2413
- eventData = "";
2414
- }
2415
- }
2416
- }
2417
- } finally {
2418
- reader.releaseLock();
2419
- }
2420
- }
2421
- async function* pipeOpenAIStream(response) {
2422
- const reader = response.body?.getReader();
2423
- if (!reader) {
2424
- throw new Error("No response body");
2425
- }
2426
- const decoder = new TextDecoder();
2427
- try {
2428
- while (true) {
2429
- const { done, value } = await reader.read();
2430
- if (done) break;
2431
- yield decoder.decode(value, { stream: true });
2432
- }
2433
- } finally {
2434
- reader.releaseLock();
2435
- }
2436
- }
2437
- function parsePreferredModel(preferredModel) {
2438
- const [provider, model] = preferredModel.split(":");
2439
- if (!provider || !model) return null;
2440
- const validProviders = ["openai", "anthropic", "google", "xai", "moonshot", "local"];
2441
- if (!validProviders.includes(provider)) return null;
2442
- return { provider, model };
2443
- }
2444
- function resolveExplicitModel(modelName) {
2445
- if (MODEL_MAPPING[modelName]) {
2446
- return MODEL_MAPPING[modelName];
2447
- }
2448
- if (modelName.startsWith("claude-")) {
2449
- return { provider: "anthropic", model: modelName };
2450
- }
2451
- if (modelName.startsWith("gpt-") || modelName.startsWith("o1-") || modelName.startsWith("o3-") || modelName.startsWith("chatgpt-") || modelName.startsWith("text-") || modelName.startsWith("dall-e") || modelName.startsWith("whisper") || modelName.startsWith("tts-")) {
2452
- return { provider: "openai", model: modelName };
2453
- }
2454
- if (modelName.startsWith("gemini-") || modelName.startsWith("palm-")) {
2455
- return { provider: "google", model: modelName };
2456
- }
2457
- if (modelName.startsWith("grok-")) {
2458
- return { provider: "xai", model: modelName };
2459
- }
2460
- if (modelName.startsWith("moonshot-")) {
2461
- return { provider: "moonshot", model: modelName };
2462
- }
2463
- if (modelName.includes("/")) {
2464
- const [provider, model] = modelName.split("/");
2465
- const validProviders = ["openai", "anthropic", "google", "xai", "moonshot", "local"];
2466
- if (provider && model && validProviders.includes(provider)) {
2467
- return { provider, model };
2468
- }
2469
- }
2470
- return null;
2471
- }
2472
- async function startProxy(config = {}) {
2473
- const port = config.port ?? 3001;
2474
- const host = config.host ?? "127.0.0.1";
2475
- const verbose = config.verbose ?? false;
2476
- const relay = new RelayPlane({ dbPath: config.dbPath });
2477
- const log = (msg) => {
2478
- if (verbose) console.log(`[relayplane] ${msg}`);
2479
- };
2480
- const server = http.createServer(async (req, res) => {
2481
- res.setHeader("Access-Control-Allow-Origin", "*");
2482
- res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
2483
- res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
2484
- if (req.method === "OPTIONS") {
2485
- res.writeHead(204);
2486
- res.end();
2487
- return;
2488
- }
2489
- const parsedUrl = url.parse(req.url || "", true);
2490
- const pathname = parsedUrl.pathname || "";
2491
- if (req.method === "GET" && pathname === "/health") {
2492
- const uptimeMs = Date.now() - serverStartTime;
2493
- const uptimeSecs = Math.floor(uptimeMs / 1e3);
2494
- const hours = Math.floor(uptimeSecs / 3600);
2495
- const mins = Math.floor(uptimeSecs % 3600 / 60);
2496
- const secs = uptimeSecs % 60;
2497
- const providers = {};
2498
- for (const [name, config2] of Object.entries(DEFAULT_ENDPOINTS)) {
2499
- providers[name] = !!process.env[config2.apiKeyEnv];
2500
- }
2501
- res.writeHead(200, { "Content-Type": "application/json" });
2502
- res.end(JSON.stringify({
2503
- status: "ok",
2504
- version: VERSION,
2505
- uptime: `${hours}h ${mins}m ${secs}s`,
2506
- uptimeMs,
2507
- providers,
2508
- totalRuns: recentRuns.length > 0 ? Object.values(modelCounts).reduce((a, b) => a + b, 0) : 0
2509
- }));
2510
- return;
2511
- }
2512
- if (req.method === "GET" && pathname === "/stats") {
2513
- const stats = relay.stats();
2514
- const savings = relay.savingsReport(30);
2515
- const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
2516
- const modelDistribution = {};
2517
- for (const [model, count] of Object.entries(modelCounts)) {
2518
- modelDistribution[model] = {
2519
- count,
2520
- percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
2521
- };
2522
- }
2523
- res.writeHead(200, { "Content-Type": "application/json" });
2524
- res.end(JSON.stringify({
2525
- totalRuns,
2526
- savings: {
2527
- estimatedSavingsPercent: savings.savingsPercent.toFixed(1) + "%",
2528
- actualCostUsd: savings.actualCost.toFixed(4),
2529
- baselineCostUsd: savings.baselineCost.toFixed(4),
2530
- savedUsd: savings.savings.toFixed(4)
2531
- },
2532
- modelDistribution,
2533
- byTaskType: stats.byTaskType,
2534
- period: stats.period
2535
- }));
2536
- return;
2537
- }
2538
- if (req.method === "GET" && pathname === "/runs") {
2539
- const limitParam = parsedUrl.query["limit"];
2540
- const parsedLimit = limitParam ? parseInt(String(limitParam), 10) : 20;
2541
- const limit = Math.min(Number.isNaN(parsedLimit) ? 20 : parsedLimit, MAX_RECENT_RUNS);
2542
- res.writeHead(200, { "Content-Type": "application/json" });
2543
- res.end(JSON.stringify({
2544
- runs: recentRuns.slice(0, limit),
2545
- total: recentRuns.length
2546
- }));
2547
- return;
2548
- }
2549
- if (req.method === "GET" && pathname.includes("/models")) {
2550
- res.writeHead(200, { "Content-Type": "application/json" });
2551
- res.end(
2552
- JSON.stringify({
2553
- object: "list",
2554
- data: [
2555
- { id: "relayplane:auto", object: "model", owned_by: "relayplane" },
2556
- { id: "relayplane:cost", object: "model", owned_by: "relayplane" },
2557
- { id: "relayplane:quality", object: "model", owned_by: "relayplane" }
2558
- ]
2559
- })
2560
- );
2561
- return;
2562
- }
2563
- if (req.method !== "POST" || !pathname.includes("/chat/completions")) {
2564
- res.writeHead(404, { "Content-Type": "application/json" });
2565
- res.end(JSON.stringify({ error: "Not found" }));
2566
- return;
2567
- }
2568
- let body = "";
2569
- for await (const chunk of req) {
2570
- body += chunk;
2571
- }
2572
- let request;
2573
- try {
2574
- request = JSON.parse(body);
2575
- } catch {
2576
- res.writeHead(400, { "Content-Type": "application/json" });
2577
- res.end(JSON.stringify({ error: "Invalid JSON" }));
2578
- return;
2579
- }
2580
- const isStreaming = request.stream === true;
2581
- const requestedModel = request.model;
2582
- let routingMode = "auto";
2583
- let targetModel = "";
2584
- let targetProvider = "anthropic";
2585
- if (requestedModel.startsWith("relayplane:")) {
2586
- if (requestedModel.includes(":cost")) {
2587
- routingMode = "cost";
2588
- } else if (requestedModel.includes(":quality")) {
2589
- routingMode = "quality";
2590
- }
2591
- } else {
2592
- routingMode = "passthrough";
2593
- const resolved = resolveExplicitModel(requestedModel);
2594
- if (resolved) {
2595
- targetProvider = resolved.provider;
2596
- targetModel = resolved.model;
2597
- log(`Pass-through mode: ${requestedModel} \u2192 ${targetProvider}/${targetModel}`);
2598
- } else {
2599
- res.writeHead(400, { "Content-Type": "application/json" });
2600
- res.end(JSON.stringify({ error: `Unknown model: ${requestedModel}` }));
2601
- return;
2602
- }
2603
- }
2604
- log(`Received request for model: ${requestedModel} (mode: ${routingMode}, stream: ${isStreaming})`);
2605
- const promptText = extractPromptText(request.messages);
2606
- const taskType = inferTaskType(promptText);
2607
- const confidence = getInferenceConfidence(promptText, taskType);
2608
- log(`Inferred task: ${taskType} (confidence: ${confidence.toFixed(2)})`);
2609
- if (routingMode !== "passthrough") {
2610
- const configStrategy = getStrategy(currentConfig, taskType);
2611
- if (configStrategy) {
2612
- const parsed = parsePreferredModel(configStrategy.model);
2613
- if (parsed) {
2614
- targetProvider = parsed.provider;
2615
- targetModel = parsed.model;
2616
- log(`Using config strategy: ${configStrategy.model}`);
2617
- }
2618
- }
2619
- if (!configStrategy) {
2620
- const rule = relay.routing.get(taskType);
2621
- if (rule && rule.preferredModel) {
2622
- const parsed = parsePreferredModel(rule.preferredModel);
2623
- if (parsed) {
2624
- targetProvider = parsed.provider;
2625
- targetModel = parsed.model;
2626
- log(`Using learned rule: ${rule.preferredModel}`);
2627
- } else {
2628
- const defaultRoute = DEFAULT_ROUTING[taskType];
2629
- targetProvider = defaultRoute.provider;
2630
- targetModel = defaultRoute.model;
2631
- }
2632
- } else {
2633
- const defaultRoute = DEFAULT_ROUTING[taskType];
2634
- targetProvider = defaultRoute.provider;
2635
- targetModel = defaultRoute.model;
2636
- }
2637
- }
2638
- if (routingMode === "cost") {
2639
- const costModel = currentConfig.defaults?.costModel || "claude-3-5-haiku-latest";
2640
- targetModel = costModel;
2641
- targetProvider = "anthropic";
2642
- log(`Cost mode: using ${costModel}`);
2643
- } else if (routingMode === "quality") {
2644
- const qualityModel = currentConfig.defaults?.qualityModel || process.env["RELAYPLANE_QUALITY_MODEL"] || "claude-sonnet-4-20250514";
2645
- targetModel = qualityModel;
2646
- targetProvider = "anthropic";
2647
- log(`Quality mode: using ${qualityModel}`);
2648
- }
2649
- }
2650
- log(`Routing to: ${targetProvider}/${targetModel}`);
2651
- let apiKey;
2652
- let anthropicAuth = null;
2653
- if (targetProvider === "anthropic") {
2654
- anthropicAuth = getAnthropicAuth(currentConfig, targetModel);
2655
- if (!anthropicAuth) {
2656
- res.writeHead(500, { "Content-Type": "application/json" });
2657
- res.end(JSON.stringify({ error: "No Anthropic auth configured (set ANTHROPIC_API_KEY or config.auth.anthropicMaxToken)" }));
2658
- return;
2659
- }
2660
- log(`Using ${anthropicAuth.type === "max" ? "MAX token" : "API key"} auth for ${targetModel}`);
2661
- } else {
2662
- const apiKeyEnv = DEFAULT_ENDPOINTS[targetProvider]?.apiKeyEnv ?? `${targetProvider.toUpperCase()}_API_KEY`;
2663
- apiKey = process.env[apiKeyEnv];
2664
- if (!apiKey) {
2665
- res.writeHead(500, { "Content-Type": "application/json" });
2666
- res.end(JSON.stringify({ error: `Missing ${apiKeyEnv} environment variable` }));
2667
- return;
2668
- }
2669
- }
2670
- const startTime = Date.now();
2671
- const betaHeaders = req.headers["anthropic-beta"];
2672
- if (isStreaming) {
2673
- await handleStreamingRequest(
2674
- res,
2675
- request,
2676
- targetProvider,
2677
- targetModel,
2678
- apiKey,
2679
- anthropicAuth,
2680
- relay,
2681
- promptText,
2682
- taskType,
2683
- confidence,
2684
- routingMode,
2685
- startTime,
2686
- log,
2687
- betaHeaders
2688
- );
2689
- } else {
2690
- await handleNonStreamingRequest(
2691
- res,
2692
- request,
2693
- targetProvider,
2694
- targetModel,
2695
- apiKey,
2696
- anthropicAuth,
2697
- relay,
2698
- promptText,
2699
- taskType,
2700
- confidence,
2701
- routingMode,
2702
- startTime,
2703
- log,
2704
- betaHeaders
2705
- );
2706
- }
2707
- });
2708
- watchConfig((newConfig) => {
2709
- currentConfig = newConfig;
2710
- console.log("[relayplane] Config reloaded");
2711
- });
2712
- return new Promise((resolve, reject) => {
2713
- server.on("error", reject);
2714
- server.listen(port, host, () => {
2715
- serverStartTime = Date.now();
2716
- console.log(`RelayPlane proxy listening on http://${host}:${port}`);
2717
- console.log(` Models: relayplane:auto, relayplane:cost, relayplane:quality`);
2718
- console.log(` Endpoint: POST /v1/chat/completions`);
2719
- console.log(` Stats: GET /stats, /runs, /health`);
2720
- console.log(` Config: ~/.relayplane/config.json (hot-reload enabled)`);
2721
- console.log(` Streaming: \u2705 Enabled`);
2722
- resolve(server);
2723
- });
2724
- });
2725
- }
2726
- async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2727
- let providerResponse;
2728
- try {
2729
- switch (targetProvider) {
2730
- case "anthropic":
2731
- if (!anthropicAuth) throw new Error("No Anthropic auth");
2732
- providerResponse = await forwardToAnthropicStream(request, targetModel, anthropicAuth, betaHeaders);
2733
- break;
2734
- case "google":
2735
- providerResponse = await forwardToGeminiStream(request, targetModel, apiKey);
2736
- break;
2737
- case "xai":
2738
- providerResponse = await forwardToXAIStream(request, targetModel, apiKey);
2739
- break;
2740
- case "moonshot":
2741
- providerResponse = await forwardToMoonshotStream(request, targetModel, apiKey);
2742
- break;
2743
- default:
2744
- providerResponse = await forwardToOpenAIStream(request, targetModel, apiKey);
2745
- }
2746
- if (!providerResponse.ok) {
2747
- const errorData = await providerResponse.json();
2748
- res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
2749
- res.end(JSON.stringify(errorData));
2750
- return;
2751
- }
2752
- } catch (err) {
2753
- const errorMsg = err instanceof Error ? err.message : String(err);
2754
- res.writeHead(500, { "Content-Type": "application/json" });
2755
- res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
2756
- return;
2757
- }
2758
- res.writeHead(200, {
2759
- "Content-Type": "text/event-stream",
2760
- "Cache-Control": "no-cache",
2761
- "Connection": "keep-alive"
2762
- });
2763
- try {
2764
- switch (targetProvider) {
2765
- case "anthropic":
2766
- for await (const chunk of convertAnthropicStream(providerResponse, targetModel)) {
2767
- res.write(chunk);
2768
- }
2769
- break;
2770
- case "google":
2771
- for await (const chunk of convertGeminiStream(providerResponse, targetModel)) {
2772
- res.write(chunk);
2773
- }
2774
- break;
2775
- default:
2776
- for await (const chunk of pipeOpenAIStream(providerResponse)) {
2777
- res.write(chunk);
2778
- }
2779
- }
2780
- } catch (err) {
2781
- log(`Streaming error: ${err}`);
2782
- }
2783
- const durationMs = Date.now() - startTime;
2784
- const modelKey = `${targetProvider}/${targetModel}`;
2785
- modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2786
- relay.run({
2787
- prompt: promptText.slice(0, 500),
2788
- taskType,
2789
- model: `${targetProvider}:${targetModel}`
2790
- }).then((runResult) => {
2791
- recentRuns.unshift({
2792
- runId: runResult.runId,
2793
- timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2794
- model: modelKey,
2795
- taskType,
2796
- confidence,
2797
- mode: routingMode,
2798
- durationMs,
2799
- promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
2800
- });
2801
- if (recentRuns.length > MAX_RECENT_RUNS) {
2802
- recentRuns.pop();
2803
- }
2804
- log(`Completed streaming in ${durationMs}ms, runId: ${runResult.runId}`);
2805
- }).catch((err) => {
2806
- log(`Failed to record run: ${err}`);
2807
- });
2808
- res.end();
2809
- }
2810
- async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2811
- let providerResponse;
2812
- let responseData;
2813
- try {
2814
- switch (targetProvider) {
2815
- case "anthropic": {
2816
- if (!anthropicAuth) throw new Error("No Anthropic auth");
2817
- providerResponse = await forwardToAnthropic(request, targetModel, anthropicAuth, betaHeaders);
2818
- const rawData = await providerResponse.json();
2819
- if (!providerResponse.ok) {
2820
- res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
2821
- res.end(JSON.stringify(rawData));
2822
- return;
2823
- }
2824
- responseData = convertAnthropicResponse(rawData);
2825
- break;
2826
- }
2827
- case "google": {
2828
- providerResponse = await forwardToGemini(request, targetModel, apiKey);
2829
- const rawData = await providerResponse.json();
2830
- if (!providerResponse.ok) {
2831
- res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
2832
- res.end(JSON.stringify(rawData));
2833
- return;
2834
- }
2835
- responseData = convertGeminiResponse(rawData, targetModel);
2836
- break;
2837
- }
2838
- case "xai": {
2839
- providerResponse = await forwardToXAI(request, targetModel, apiKey);
2840
- responseData = await providerResponse.json();
2841
- if (!providerResponse.ok) {
2842
- res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
2843
- res.end(JSON.stringify(responseData));
2844
- return;
2845
- }
2846
- break;
2847
- }
2848
- case "moonshot": {
2849
- providerResponse = await forwardToMoonshot(request, targetModel, apiKey);
2850
- responseData = await providerResponse.json();
2851
- if (!providerResponse.ok) {
2852
- res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
2853
- res.end(JSON.stringify(responseData));
2854
- return;
2855
- }
2856
- break;
2857
- }
2858
- default: {
2859
- providerResponse = await forwardToOpenAI(request, targetModel, apiKey);
2860
- responseData = await providerResponse.json();
2861
- if (!providerResponse.ok) {
2862
- res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
2863
- res.end(JSON.stringify(responseData));
2864
- return;
2865
- }
2866
- }
2867
- }
2868
- } catch (err) {
2869
- const errorMsg = err instanceof Error ? err.message : String(err);
2870
- res.writeHead(500, { "Content-Type": "application/json" });
2871
- res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
2872
- return;
2873
- }
2874
- const durationMs = Date.now() - startTime;
2875
- const modelKey = `${targetProvider}/${targetModel}`;
2876
- modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2877
- try {
2878
- const runResult = await relay.run({
2879
- prompt: promptText.slice(0, 500),
2880
- taskType,
2881
- model: `${targetProvider}:${targetModel}`
2882
- });
2883
- recentRuns.unshift({
2884
- runId: runResult.runId,
2885
- timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2886
- model: modelKey,
2887
- taskType,
2888
- confidence,
2889
- mode: routingMode,
2890
- durationMs,
2891
- promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
2892
- });
2893
- if (recentRuns.length > MAX_RECENT_RUNS) {
2894
- recentRuns.pop();
2895
- }
2896
- responseData["_relayplane"] = {
2897
- runId: runResult.runId,
2898
- routedTo: modelKey,
2899
- taskType,
2900
- confidence,
2901
- durationMs,
2902
- mode: routingMode
2903
- };
2904
- log(`Completed in ${durationMs}ms, runId: ${runResult.runId}`);
2905
- } catch (err) {
2906
- log(`Failed to record run: ${err}`);
2907
- }
2908
- res.writeHead(200, { "Content-Type": "application/json" });
2909
- res.end(JSON.stringify(responseData));
2910
- }
2911
-
2912
- // src/types.ts
2913
- var import_zod2 = require("zod");
2914
- var TaskTypes = [
2915
- "code_generation",
2916
- "code_review",
2917
- "summarization",
2918
- "analysis",
2919
- "creative_writing",
2920
- "data_extraction",
2921
- "translation",
2922
- "question_answering",
2923
- "general"
2924
- ];
2925
- var TaskTypeSchema = import_zod2.z.enum(TaskTypes);
2926
- var Providers = ["openai", "anthropic", "google", "xai", "moonshot", "local"];
2927
- var ProviderSchema = import_zod2.z.enum(Providers);
2928
- var RelayPlaneConfigSchema = import_zod2.z.object({
2929
- dbPath: import_zod2.z.string().optional(),
2930
- providers: import_zod2.z.record(ProviderSchema, import_zod2.z.object({
2931
- apiKey: import_zod2.z.string().optional(),
2932
- baseUrl: import_zod2.z.string().optional()
2933
- })).optional(),
2934
- defaultProvider: ProviderSchema.optional(),
2935
- defaultModel: import_zod2.z.string().optional()
2936
- });
2937
- var RunInputSchema = import_zod2.z.object({
2938
- prompt: import_zod2.z.string().min(1),
2939
- systemPrompt: import_zod2.z.string().optional(),
2940
- taskType: TaskTypeSchema.optional(),
2941
- model: import_zod2.z.string().optional(),
2942
- metadata: import_zod2.z.record(import_zod2.z.unknown()).optional()
2943
- });
2944
- var RuleSources = ["default", "user", "learned"];
2945
- var RoutingRuleSchema = import_zod2.z.object({
2946
- id: import_zod2.z.string(),
2947
- taskType: TaskTypeSchema,
2948
- preferredModel: import_zod2.z.string(),
2949
- source: import_zod2.z.enum(RuleSources),
2950
- confidence: import_zod2.z.number().min(0).max(1).optional(),
2951
- sampleCount: import_zod2.z.number().int().positive().optional(),
2952
- createdAt: import_zod2.z.string(),
2953
- updatedAt: import_zod2.z.string()
2954
- });
2955
- var OutcomeQualities = ["excellent", "good", "acceptable", "poor", "failed"];
2956
- var OutcomeInputSchema = import_zod2.z.object({
2957
- runId: import_zod2.z.string().min(1),
2958
- success: import_zod2.z.boolean(),
2959
- quality: import_zod2.z.enum(OutcomeQualities).optional(),
2960
- latencySatisfactory: import_zod2.z.boolean().optional(),
2961
- costSatisfactory: import_zod2.z.boolean().optional(),
2962
- feedback: import_zod2.z.string().optional()
2963
- });
2964
- var SuggestionSchema = import_zod2.z.object({
2965
- id: import_zod2.z.string(),
2966
- taskType: TaskTypeSchema,
2967
- currentModel: import_zod2.z.string(),
2968
- suggestedModel: import_zod2.z.string(),
2969
- reason: import_zod2.z.string(),
2970
- confidence: import_zod2.z.number().min(0).max(1),
2971
- expectedImprovement: import_zod2.z.object({
2972
- successRate: import_zod2.z.number().optional(),
2973
- latency: import_zod2.z.number().optional(),
2974
- cost: import_zod2.z.number().optional()
2975
- }),
2976
- sampleCount: import_zod2.z.number().int().positive(),
2977
- createdAt: import_zod2.z.string(),
2978
- accepted: import_zod2.z.boolean().optional(),
2979
- acceptedAt: import_zod2.z.string().optional()
2980
- });
2981
- // Annotate the CommonJS export names for ESM import in node:
2982
- 0 && (module.exports = {
2983
- DEFAULT_CONFIG,
2984
- DEFAULT_ENDPOINTS,
2985
- MODEL_MAPPING,
2986
- MODEL_PRICING,
2987
- OutcomeRecorder,
2988
- PatternDetector,
2989
- ProviderSchema,
2990
- Providers,
2991
- RelayPlane,
2992
- RoutingEngine,
2993
- Store,
2994
- TaskTypeSchema,
2995
- TaskTypes,
2996
- calculateCost,
2997
- calculateSavings,
2998
- getConfigPath,
2999
- getInferenceConfidence,
3000
- getModelPricing,
3001
- getStrategy,
3002
- inferTaskType,
3003
- loadConfig,
3004
- startProxy,
3005
- watchConfig
3006
- });
2
+ /**
3
+ * @relayplane/proxy
4
+ *
5
+ * RelayPlane Agent Ops Proxy Server
6
+ *
7
+ * Intelligent AI model routing with integrated observability via
8
+ * the Learning Ledger and auth enforcement via Auth Gate.
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * import { createProxyServer } from '@relayplane/proxy';
13
+ *
14
+ * const server = createProxyServer({
15
+ * port: 3001,
16
+ * providers: {
17
+ * anthropic: { apiKey: process.env.ANTHROPIC_API_KEY! },
18
+ * openai: { apiKey: process.env.OPENAI_API_KEY! },
19
+ * },
20
+ * });
21
+ *
22
+ * await server.start();
23
+ * ```
24
+ *
25
+ * @packageDocumentation
26
+ */
27
+ Object.defineProperty(exports, "__esModule", { value: true });
28
+ exports.printTelemetryDisclosure = exports.getTelemetryPath = exports.clearTelemetry = exports.getTelemetryStats = exports.getLocalTelemetry = exports.clearAuditBuffer = exports.getAuditBuffer = exports.isOfflineMode = exports.setOfflineMode = exports.isAuditMode = exports.setAuditMode = exports.estimateCost = exports.inferTaskType = exports.recordTelemetry = exports.getConfigPath = exports.getConfigDir = exports.getApiKey = exports.setApiKey = exports.getDeviceId = exports.disableTelemetry = exports.enableTelemetry = exports.isTelemetryEnabled = exports.markFirstRunComplete = exports.isFirstRun = exports.updateConfig = exports.saveConfig = exports.loadConfig = exports.startProxy = exports.startKeepAlive = exports.aggregateStreamingResponse = exports.streamProviderResponse = exports.createSSEWriter = exports.SSEWriter = exports.createProxyServer = exports.ProxyServer = void 0;
29
+ // New Agent Ops proxy server (Phase 1)
30
+ var server_js_1 = require("./server.js");
31
+ Object.defineProperty(exports, "ProxyServer", { enumerable: true, get: function () { return server_js_1.ProxyServer; } });
32
+ Object.defineProperty(exports, "createProxyServer", { enumerable: true, get: function () { return server_js_1.createProxyServer; } });
33
+ // Streaming support (Phase 8)
34
+ var streaming_js_1 = require("./streaming.js");
35
+ Object.defineProperty(exports, "SSEWriter", { enumerable: true, get: function () { return streaming_js_1.SSEWriter; } });
36
+ Object.defineProperty(exports, "createSSEWriter", { enumerable: true, get: function () { return streaming_js_1.createSSEWriter; } });
37
+ Object.defineProperty(exports, "streamProviderResponse", { enumerable: true, get: function () { return streaming_js_1.streamProviderResponse; } });
38
+ Object.defineProperty(exports, "aggregateStreamingResponse", { enumerable: true, get: function () { return streaming_js_1.aggregateStreamingResponse; } });
39
+ Object.defineProperty(exports, "startKeepAlive", { enumerable: true, get: function () { return streaming_js_1.startKeepAlive; } });
40
+ // Legacy proxy functionality from openclaw
41
+ var openclaw_1 = require("@relayplane/openclaw");
42
+ Object.defineProperty(exports, "startProxy", { enumerable: true, get: function () { return openclaw_1.startProxy; } });
43
+ // Configuration
44
+ var config_js_1 = require("./config.js");
45
+ Object.defineProperty(exports, "loadConfig", { enumerable: true, get: function () { return config_js_1.loadConfig; } });
46
+ Object.defineProperty(exports, "saveConfig", { enumerable: true, get: function () { return config_js_1.saveConfig; } });
47
+ Object.defineProperty(exports, "updateConfig", { enumerable: true, get: function () { return config_js_1.updateConfig; } });
48
+ Object.defineProperty(exports, "isFirstRun", { enumerable: true, get: function () { return config_js_1.isFirstRun; } });
49
+ Object.defineProperty(exports, "markFirstRunComplete", { enumerable: true, get: function () { return config_js_1.markFirstRunComplete; } });
50
+ Object.defineProperty(exports, "isTelemetryEnabled", { enumerable: true, get: function () { return config_js_1.isTelemetryEnabled; } });
51
+ Object.defineProperty(exports, "enableTelemetry", { enumerable: true, get: function () { return config_js_1.enableTelemetry; } });
52
+ Object.defineProperty(exports, "disableTelemetry", { enumerable: true, get: function () { return config_js_1.disableTelemetry; } });
53
+ Object.defineProperty(exports, "getDeviceId", { enumerable: true, get: function () { return config_js_1.getDeviceId; } });
54
+ Object.defineProperty(exports, "setApiKey", { enumerable: true, get: function () { return config_js_1.setApiKey; } });
55
+ Object.defineProperty(exports, "getApiKey", { enumerable: true, get: function () { return config_js_1.getApiKey; } });
56
+ Object.defineProperty(exports, "getConfigDir", { enumerable: true, get: function () { return config_js_1.getConfigDir; } });
57
+ Object.defineProperty(exports, "getConfigPath", { enumerable: true, get: function () { return config_js_1.getConfigPath; } });
58
+ // Telemetry
59
+ var telemetry_js_1 = require("./telemetry.js");
60
+ Object.defineProperty(exports, "recordTelemetry", { enumerable: true, get: function () { return telemetry_js_1.recordTelemetry; } });
61
+ Object.defineProperty(exports, "inferTaskType", { enumerable: true, get: function () { return telemetry_js_1.inferTaskType; } });
62
+ Object.defineProperty(exports, "estimateCost", { enumerable: true, get: function () { return telemetry_js_1.estimateCost; } });
63
+ Object.defineProperty(exports, "setAuditMode", { enumerable: true, get: function () { return telemetry_js_1.setAuditMode; } });
64
+ Object.defineProperty(exports, "isAuditMode", { enumerable: true, get: function () { return telemetry_js_1.isAuditMode; } });
65
+ Object.defineProperty(exports, "setOfflineMode", { enumerable: true, get: function () { return telemetry_js_1.setOfflineMode; } });
66
+ Object.defineProperty(exports, "isOfflineMode", { enumerable: true, get: function () { return telemetry_js_1.isOfflineMode; } });
67
+ Object.defineProperty(exports, "getAuditBuffer", { enumerable: true, get: function () { return telemetry_js_1.getAuditBuffer; } });
68
+ Object.defineProperty(exports, "clearAuditBuffer", { enumerable: true, get: function () { return telemetry_js_1.clearAuditBuffer; } });
69
+ Object.defineProperty(exports, "getLocalTelemetry", { enumerable: true, get: function () { return telemetry_js_1.getLocalTelemetry; } });
70
+ Object.defineProperty(exports, "getTelemetryStats", { enumerable: true, get: function () { return telemetry_js_1.getTelemetryStats; } });
71
+ Object.defineProperty(exports, "clearTelemetry", { enumerable: true, get: function () { return telemetry_js_1.clearTelemetry; } });
72
+ Object.defineProperty(exports, "getTelemetryPath", { enumerable: true, get: function () { return telemetry_js_1.getTelemetryPath; } });
73
+ Object.defineProperty(exports, "printTelemetryDisclosure", { enumerable: true, get: function () { return telemetry_js_1.printTelemetryDisclosure; } });
3007
74
  //# sourceMappingURL=index.js.map