@relayplane/proxy 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +156 -0
- package/dist/cli.d.mts +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +2716 -0
- package/dist/cli.js.map +1 -0
- package/dist/cli.mjs +2693 -0
- package/dist/cli.mjs.map +1 -0
- package/dist/index.d.mts +1023 -0
- package/dist/index.d.ts +1023 -0
- package/dist/index.js +2739 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +2685 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +76 -0
package/dist/cli.mjs
ADDED
|
@@ -0,0 +1,2693 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// src/proxy.ts
|
|
4
|
+
import * as http from "http";
|
|
5
|
+
|
|
6
|
+
// src/storage/store.ts
|
|
7
|
+
import Database from "better-sqlite3";
|
|
8
|
+
|
|
9
|
+
// node_modules/nanoid/index.js
|
|
10
|
+
import crypto from "crypto";
|
|
11
|
+
|
|
12
|
+
// node_modules/nanoid/url-alphabet/index.js
|
|
13
|
+
var urlAlphabet = "useandom-26T198340PX75pxJACKVERYMINDBUSHWOLF_GQZbfghjklqvwyzrict";
|
|
14
|
+
|
|
15
|
+
// node_modules/nanoid/index.js
|
|
16
|
+
var POOL_SIZE_MULTIPLIER = 128;
|
|
17
|
+
var pool;
|
|
18
|
+
var poolOffset;
|
|
19
|
+
var fillPool = (bytes) => {
|
|
20
|
+
if (!pool || pool.length < bytes) {
|
|
21
|
+
pool = Buffer.allocUnsafe(bytes * POOL_SIZE_MULTIPLIER);
|
|
22
|
+
crypto.randomFillSync(pool);
|
|
23
|
+
poolOffset = 0;
|
|
24
|
+
} else if (poolOffset + bytes > pool.length) {
|
|
25
|
+
crypto.randomFillSync(pool);
|
|
26
|
+
poolOffset = 0;
|
|
27
|
+
}
|
|
28
|
+
poolOffset += bytes;
|
|
29
|
+
};
|
|
30
|
+
var nanoid = (size = 21) => {
|
|
31
|
+
fillPool(size |= 0);
|
|
32
|
+
let id = "";
|
|
33
|
+
for (let i = poolOffset - size; i < poolOffset; i++) {
|
|
34
|
+
id += urlAlphabet[pool[i] & 63];
|
|
35
|
+
}
|
|
36
|
+
return id;
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
// src/storage/store.ts
|
|
40
|
+
import * as fs from "fs";
|
|
41
|
+
import * as path from "path";
|
|
42
|
+
import * as os from "os";
|
|
43
|
+
|
|
44
|
+
// src/storage/schema.ts
|
|
45
|
+
var SCHEMA_SQL = `
|
|
46
|
+
-- Runs table: stores all LLM invocations
|
|
47
|
+
CREATE TABLE IF NOT EXISTS runs (
|
|
48
|
+
id TEXT PRIMARY KEY,
|
|
49
|
+
prompt TEXT NOT NULL,
|
|
50
|
+
system_prompt TEXT,
|
|
51
|
+
task_type TEXT NOT NULL,
|
|
52
|
+
model TEXT NOT NULL,
|
|
53
|
+
success INTEGER NOT NULL,
|
|
54
|
+
output TEXT,
|
|
55
|
+
error TEXT,
|
|
56
|
+
duration_ms INTEGER NOT NULL,
|
|
57
|
+
tokens_in INTEGER,
|
|
58
|
+
tokens_out INTEGER,
|
|
59
|
+
cost_usd REAL,
|
|
60
|
+
metadata TEXT,
|
|
61
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
62
|
+
);
|
|
63
|
+
|
|
64
|
+
-- Index for task type queries
|
|
65
|
+
CREATE INDEX IF NOT EXISTS idx_runs_task_type ON runs(task_type);
|
|
66
|
+
|
|
67
|
+
-- Index for model queries
|
|
68
|
+
CREATE INDEX IF NOT EXISTS idx_runs_model ON runs(model);
|
|
69
|
+
|
|
70
|
+
-- Index for time-based queries
|
|
71
|
+
CREATE INDEX IF NOT EXISTS idx_runs_created_at ON runs(created_at);
|
|
72
|
+
|
|
73
|
+
-- Outcomes table: stores user feedback on runs
|
|
74
|
+
CREATE TABLE IF NOT EXISTS outcomes (
|
|
75
|
+
id TEXT PRIMARY KEY,
|
|
76
|
+
run_id TEXT NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
|
|
77
|
+
success INTEGER NOT NULL,
|
|
78
|
+
quality TEXT,
|
|
79
|
+
latency_satisfactory INTEGER,
|
|
80
|
+
cost_satisfactory INTEGER,
|
|
81
|
+
feedback TEXT,
|
|
82
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
83
|
+
UNIQUE(run_id)
|
|
84
|
+
);
|
|
85
|
+
|
|
86
|
+
-- Index for run lookups
|
|
87
|
+
CREATE INDEX IF NOT EXISTS idx_outcomes_run_id ON outcomes(run_id);
|
|
88
|
+
|
|
89
|
+
-- Routing rules table: stores routing preferences
|
|
90
|
+
CREATE TABLE IF NOT EXISTS routing_rules (
|
|
91
|
+
id TEXT PRIMARY KEY,
|
|
92
|
+
task_type TEXT NOT NULL UNIQUE,
|
|
93
|
+
preferred_model TEXT NOT NULL,
|
|
94
|
+
source TEXT NOT NULL DEFAULT 'default',
|
|
95
|
+
confidence REAL,
|
|
96
|
+
sample_count INTEGER,
|
|
97
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
98
|
+
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
99
|
+
);
|
|
100
|
+
|
|
101
|
+
-- Index for task type lookups
|
|
102
|
+
CREATE INDEX IF NOT EXISTS idx_routing_rules_task_type ON routing_rules(task_type);
|
|
103
|
+
|
|
104
|
+
-- Suggestions table: stores routing improvement suggestions
|
|
105
|
+
CREATE TABLE IF NOT EXISTS suggestions (
|
|
106
|
+
id TEXT PRIMARY KEY,
|
|
107
|
+
task_type TEXT NOT NULL,
|
|
108
|
+
current_model TEXT NOT NULL,
|
|
109
|
+
suggested_model TEXT NOT NULL,
|
|
110
|
+
reason TEXT NOT NULL,
|
|
111
|
+
confidence REAL NOT NULL,
|
|
112
|
+
expected_improvement TEXT NOT NULL,
|
|
113
|
+
sample_count INTEGER NOT NULL,
|
|
114
|
+
accepted INTEGER,
|
|
115
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
116
|
+
accepted_at TEXT
|
|
117
|
+
);
|
|
118
|
+
|
|
119
|
+
-- Index for task type lookups
|
|
120
|
+
CREATE INDEX IF NOT EXISTS idx_suggestions_task_type ON suggestions(task_type);
|
|
121
|
+
|
|
122
|
+
-- Index for pending suggestions
|
|
123
|
+
CREATE INDEX IF NOT EXISTS idx_suggestions_accepted ON suggestions(accepted);
|
|
124
|
+
|
|
125
|
+
-- Schema version table for migrations
|
|
126
|
+
CREATE TABLE IF NOT EXISTS schema_version (
|
|
127
|
+
version INTEGER PRIMARY KEY,
|
|
128
|
+
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
129
|
+
);
|
|
130
|
+
|
|
131
|
+
-- Insert initial schema version
|
|
132
|
+
INSERT OR IGNORE INTO schema_version (version) VALUES (1);
|
|
133
|
+
`;
|
|
134
|
+
var DEFAULT_ROUTING_RULES = [
|
|
135
|
+
{ taskType: "code_generation", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
136
|
+
{ taskType: "code_review", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
137
|
+
{ taskType: "summarization", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
138
|
+
{ taskType: "analysis", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
139
|
+
{ taskType: "creative_writing", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
140
|
+
{ taskType: "data_extraction", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
141
|
+
{ taskType: "translation", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
142
|
+
{ taskType: "question_answering", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
143
|
+
{ taskType: "general", preferredModel: "anthropic:claude-3-5-haiku-latest" }
|
|
144
|
+
];
|
|
145
|
+
function generateSeedSQL() {
|
|
146
|
+
const values = DEFAULT_ROUTING_RULES.map((rule, index) => {
|
|
147
|
+
const id = `default-${rule.taskType}`;
|
|
148
|
+
return `('${id}', '${rule.taskType}', '${rule.preferredModel}', 'default', NULL, NULL, datetime('now'), datetime('now'))`;
|
|
149
|
+
}).join(",\n ");
|
|
150
|
+
return `
|
|
151
|
+
INSERT OR IGNORE INTO routing_rules (id, task_type, preferred_model, source, confidence, sample_count, created_at, updated_at)
|
|
152
|
+
VALUES
|
|
153
|
+
${values};
|
|
154
|
+
`;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// src/storage/store.ts
|
|
158
|
+
function getDefaultDbPath() {
|
|
159
|
+
return path.join(os.homedir(), ".relayplane", "data.db");
|
|
160
|
+
}
|
|
161
|
+
var Store = class {
|
|
162
|
+
db;
|
|
163
|
+
dbPath;
|
|
164
|
+
/**
|
|
165
|
+
* Creates a new Store instance.
|
|
166
|
+
*
|
|
167
|
+
* @param dbPath - Path to the SQLite database file. Defaults to ~/.relayplane/data.db
|
|
168
|
+
*/
|
|
169
|
+
constructor(dbPath) {
|
|
170
|
+
this.dbPath = dbPath ?? getDefaultDbPath();
|
|
171
|
+
const dir = path.dirname(this.dbPath);
|
|
172
|
+
if (!fs.existsSync(dir)) {
|
|
173
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
174
|
+
}
|
|
175
|
+
this.db = new Database(this.dbPath);
|
|
176
|
+
this.db.pragma("journal_mode = WAL");
|
|
177
|
+
this.db.pragma("foreign_keys = ON");
|
|
178
|
+
this.initializeSchema();
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Initializes the database schema.
|
|
182
|
+
*/
|
|
183
|
+
initializeSchema() {
|
|
184
|
+
this.db.exec(SCHEMA_SQL);
|
|
185
|
+
this.db.exec(generateSeedSQL());
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Closes the database connection.
|
|
189
|
+
*/
|
|
190
|
+
close() {
|
|
191
|
+
this.db.close();
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Gets the database path.
|
|
195
|
+
*/
|
|
196
|
+
getDbPath() {
|
|
197
|
+
return this.dbPath;
|
|
198
|
+
}
|
|
199
|
+
// ============================================================================
|
|
200
|
+
// Runs
|
|
201
|
+
// ============================================================================
|
|
202
|
+
/**
|
|
203
|
+
* Records a new run.
|
|
204
|
+
*/
|
|
205
|
+
recordRun(run) {
|
|
206
|
+
const id = nanoid();
|
|
207
|
+
const stmt = this.db.prepare(`
|
|
208
|
+
INSERT INTO runs (id, prompt, system_prompt, task_type, model, success, output, error, duration_ms, tokens_in, tokens_out, cost_usd, metadata, created_at)
|
|
209
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))
|
|
210
|
+
`);
|
|
211
|
+
stmt.run(
|
|
212
|
+
id,
|
|
213
|
+
run.prompt,
|
|
214
|
+
run.systemPrompt,
|
|
215
|
+
run.taskType,
|
|
216
|
+
run.model,
|
|
217
|
+
run.success ? 1 : 0,
|
|
218
|
+
run.output,
|
|
219
|
+
run.error,
|
|
220
|
+
run.durationMs,
|
|
221
|
+
run.tokensIn,
|
|
222
|
+
run.tokensOut,
|
|
223
|
+
run.costUsd,
|
|
224
|
+
run.metadata
|
|
225
|
+
);
|
|
226
|
+
return id;
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Gets a run by ID.
|
|
230
|
+
*/
|
|
231
|
+
getRun(id) {
|
|
232
|
+
const stmt = this.db.prepare(`
|
|
233
|
+
SELECT id, prompt, system_prompt as systemPrompt, task_type as taskType, model, success, output, error, duration_ms as durationMs, tokens_in as tokensIn, tokens_out as tokensOut, cost_usd as costUsd, metadata, created_at as createdAt
|
|
234
|
+
FROM runs
|
|
235
|
+
WHERE id = ?
|
|
236
|
+
`);
|
|
237
|
+
const row = stmt.get(id);
|
|
238
|
+
if (!row) return null;
|
|
239
|
+
return {
|
|
240
|
+
...row,
|
|
241
|
+
success: Boolean(row.success)
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* Gets runs with optional filters.
|
|
246
|
+
*/
|
|
247
|
+
getRuns(options) {
|
|
248
|
+
const conditions = [];
|
|
249
|
+
const params = [];
|
|
250
|
+
if (options?.taskType) {
|
|
251
|
+
conditions.push("task_type = ?");
|
|
252
|
+
params.push(options.taskType);
|
|
253
|
+
}
|
|
254
|
+
if (options?.model) {
|
|
255
|
+
conditions.push("model = ?");
|
|
256
|
+
params.push(options.model);
|
|
257
|
+
}
|
|
258
|
+
if (options?.from) {
|
|
259
|
+
conditions.push("created_at >= ?");
|
|
260
|
+
params.push(options.from);
|
|
261
|
+
}
|
|
262
|
+
if (options?.to) {
|
|
263
|
+
conditions.push("created_at <= ?");
|
|
264
|
+
params.push(options.to);
|
|
265
|
+
}
|
|
266
|
+
const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
|
|
267
|
+
const limit = options?.limit ?? 100;
|
|
268
|
+
const offset = options?.offset ?? 0;
|
|
269
|
+
const stmt = this.db.prepare(`
|
|
270
|
+
SELECT id, prompt, system_prompt as systemPrompt, task_type as taskType, model, success, output, error, duration_ms as durationMs, tokens_in as tokensIn, tokens_out as tokensOut, cost_usd as costUsd, metadata, created_at as createdAt
|
|
271
|
+
FROM runs
|
|
272
|
+
${whereClause}
|
|
273
|
+
ORDER BY created_at DESC
|
|
274
|
+
LIMIT ? OFFSET ?
|
|
275
|
+
`);
|
|
276
|
+
params.push(limit, offset);
|
|
277
|
+
const rows = stmt.all(...params);
|
|
278
|
+
return rows.map((row) => ({
|
|
279
|
+
...row,
|
|
280
|
+
success: Boolean(row.success)
|
|
281
|
+
}));
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Counts runs with optional filters.
|
|
285
|
+
*/
|
|
286
|
+
countRuns(options) {
|
|
287
|
+
const conditions = [];
|
|
288
|
+
const params = [];
|
|
289
|
+
if (options?.taskType) {
|
|
290
|
+
conditions.push("task_type = ?");
|
|
291
|
+
params.push(options.taskType);
|
|
292
|
+
}
|
|
293
|
+
if (options?.model) {
|
|
294
|
+
conditions.push("model = ?");
|
|
295
|
+
params.push(options.model);
|
|
296
|
+
}
|
|
297
|
+
if (options?.from) {
|
|
298
|
+
conditions.push("created_at >= ?");
|
|
299
|
+
params.push(options.from);
|
|
300
|
+
}
|
|
301
|
+
if (options?.to) {
|
|
302
|
+
conditions.push("created_at <= ?");
|
|
303
|
+
params.push(options.to);
|
|
304
|
+
}
|
|
305
|
+
const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
|
|
306
|
+
const stmt = this.db.prepare(`
|
|
307
|
+
SELECT COUNT(*) as count
|
|
308
|
+
FROM runs
|
|
309
|
+
${whereClause}
|
|
310
|
+
`);
|
|
311
|
+
const row = stmt.get(...params);
|
|
312
|
+
return row.count;
|
|
313
|
+
}
|
|
314
|
+
// ============================================================================
|
|
315
|
+
// Outcomes
|
|
316
|
+
// ============================================================================
|
|
317
|
+
/**
|
|
318
|
+
* Records an outcome for a run.
|
|
319
|
+
*/
|
|
320
|
+
recordOutcome(outcome) {
|
|
321
|
+
const id = nanoid();
|
|
322
|
+
const stmt = this.db.prepare(`
|
|
323
|
+
INSERT INTO outcomes (id, run_id, success, quality, latency_satisfactory, cost_satisfactory, feedback, created_at)
|
|
324
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'))
|
|
325
|
+
ON CONFLICT(run_id) DO UPDATE SET
|
|
326
|
+
success = excluded.success,
|
|
327
|
+
quality = excluded.quality,
|
|
328
|
+
latency_satisfactory = excluded.latency_satisfactory,
|
|
329
|
+
cost_satisfactory = excluded.cost_satisfactory,
|
|
330
|
+
feedback = excluded.feedback,
|
|
331
|
+
created_at = datetime('now')
|
|
332
|
+
`);
|
|
333
|
+
stmt.run(
|
|
334
|
+
id,
|
|
335
|
+
outcome.runId,
|
|
336
|
+
outcome.success ? 1 : 0,
|
|
337
|
+
outcome.quality,
|
|
338
|
+
outcome.latencySatisfactory != null ? outcome.latencySatisfactory ? 1 : 0 : null,
|
|
339
|
+
outcome.costSatisfactory != null ? outcome.costSatisfactory ? 1 : 0 : null,
|
|
340
|
+
outcome.feedback
|
|
341
|
+
);
|
|
342
|
+
return id;
|
|
343
|
+
}
|
|
344
|
+
/**
|
|
345
|
+
* Gets an outcome for a run.
|
|
346
|
+
*/
|
|
347
|
+
getOutcome(runId) {
|
|
348
|
+
const stmt = this.db.prepare(`
|
|
349
|
+
SELECT id, run_id as runId, success, quality, latency_satisfactory as latencySatisfactory, cost_satisfactory as costSatisfactory, feedback, created_at as createdAt
|
|
350
|
+
FROM outcomes
|
|
351
|
+
WHERE run_id = ?
|
|
352
|
+
`);
|
|
353
|
+
const row = stmt.get(runId);
|
|
354
|
+
if (!row) return null;
|
|
355
|
+
return {
|
|
356
|
+
...row,
|
|
357
|
+
success: Boolean(row.success),
|
|
358
|
+
latencySatisfactory: row.latencySatisfactory != null ? Boolean(row.latencySatisfactory) : null,
|
|
359
|
+
costSatisfactory: row.costSatisfactory != null ? Boolean(row.costSatisfactory) : null
|
|
360
|
+
};
|
|
361
|
+
}
|
|
362
|
+
/**
|
|
363
|
+
* Gets outcomes with optional filters.
|
|
364
|
+
*/
|
|
365
|
+
getOutcomes(options) {
|
|
366
|
+
const conditions = [];
|
|
367
|
+
const params = [];
|
|
368
|
+
if (options?.taskType) {
|
|
369
|
+
conditions.push("r.task_type = ?");
|
|
370
|
+
params.push(options.taskType);
|
|
371
|
+
}
|
|
372
|
+
if (options?.model) {
|
|
373
|
+
conditions.push("r.model = ?");
|
|
374
|
+
params.push(options.model);
|
|
375
|
+
}
|
|
376
|
+
if (options?.from) {
|
|
377
|
+
conditions.push("o.created_at >= ?");
|
|
378
|
+
params.push(options.from);
|
|
379
|
+
}
|
|
380
|
+
if (options?.to) {
|
|
381
|
+
conditions.push("o.created_at <= ?");
|
|
382
|
+
params.push(options.to);
|
|
383
|
+
}
|
|
384
|
+
const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
|
|
385
|
+
const limit = options?.limit ?? 100;
|
|
386
|
+
const stmt = this.db.prepare(`
|
|
387
|
+
SELECT o.id, o.run_id as runId, o.success, o.quality, o.latency_satisfactory as latencySatisfactory, o.cost_satisfactory as costSatisfactory, o.feedback, o.created_at as createdAt, r.task_type as taskType, r.model
|
|
388
|
+
FROM outcomes o
|
|
389
|
+
JOIN runs r ON o.run_id = r.id
|
|
390
|
+
${whereClause}
|
|
391
|
+
ORDER BY o.created_at DESC
|
|
392
|
+
LIMIT ?
|
|
393
|
+
`);
|
|
394
|
+
params.push(limit);
|
|
395
|
+
const rows = stmt.all(...params);
|
|
396
|
+
return rows.map((row) => ({
|
|
397
|
+
...row,
|
|
398
|
+
success: Boolean(row.success),
|
|
399
|
+
latencySatisfactory: row.latencySatisfactory != null ? Boolean(row.latencySatisfactory) : null,
|
|
400
|
+
costSatisfactory: row.costSatisfactory != null ? Boolean(row.costSatisfactory) : null
|
|
401
|
+
}));
|
|
402
|
+
}
|
|
403
|
+
// ============================================================================
|
|
404
|
+
// Routing Rules
|
|
405
|
+
// ============================================================================
|
|
406
|
+
/**
|
|
407
|
+
* Gets a routing rule for a task type.
|
|
408
|
+
*/
|
|
409
|
+
getRule(taskType) {
|
|
410
|
+
const stmt = this.db.prepare(`
|
|
411
|
+
SELECT id, task_type as taskType, preferred_model as preferredModel, source, confidence, sample_count as sampleCount, created_at as createdAt, updated_at as updatedAt
|
|
412
|
+
FROM routing_rules
|
|
413
|
+
WHERE task_type = ?
|
|
414
|
+
`);
|
|
415
|
+
return stmt.get(taskType) ?? null;
|
|
416
|
+
}
|
|
417
|
+
/**
|
|
418
|
+
* Sets a routing rule for a task type.
|
|
419
|
+
*/
|
|
420
|
+
setRule(taskType, preferredModel, source, confidence, sampleCount) {
|
|
421
|
+
const existingRule = this.getRule(taskType);
|
|
422
|
+
const id = existingRule?.id ?? nanoid();
|
|
423
|
+
const stmt = this.db.prepare(`
|
|
424
|
+
INSERT INTO routing_rules (id, task_type, preferred_model, source, confidence, sample_count, created_at, updated_at)
|
|
425
|
+
VALUES (?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))
|
|
426
|
+
ON CONFLICT(task_type) DO UPDATE SET
|
|
427
|
+
preferred_model = excluded.preferred_model,
|
|
428
|
+
source = excluded.source,
|
|
429
|
+
confidence = excluded.confidence,
|
|
430
|
+
sample_count = excluded.sample_count,
|
|
431
|
+
updated_at = datetime('now')
|
|
432
|
+
`);
|
|
433
|
+
stmt.run(id, taskType, preferredModel, source, confidence ?? null, sampleCount ?? null);
|
|
434
|
+
return id;
|
|
435
|
+
}
|
|
436
|
+
/**
|
|
437
|
+
* Lists all routing rules.
|
|
438
|
+
*/
|
|
439
|
+
listRules() {
|
|
440
|
+
const stmt = this.db.prepare(`
|
|
441
|
+
SELECT id, task_type as taskType, preferred_model as preferredModel, source, confidence, sample_count as sampleCount, created_at as createdAt, updated_at as updatedAt
|
|
442
|
+
FROM routing_rules
|
|
443
|
+
ORDER BY task_type
|
|
444
|
+
`);
|
|
445
|
+
return stmt.all();
|
|
446
|
+
}
|
|
447
|
+
/**
|
|
448
|
+
* Deletes a routing rule and resets to default.
|
|
449
|
+
*/
|
|
450
|
+
deleteRule(taskType) {
|
|
451
|
+
const defaultRule = DEFAULT_ROUTING_RULES.find((r) => r.taskType === taskType);
|
|
452
|
+
if (!defaultRule) return false;
|
|
453
|
+
this.setRule(taskType, defaultRule.preferredModel, "default");
|
|
454
|
+
return true;
|
|
455
|
+
}
|
|
456
|
+
// ============================================================================
|
|
457
|
+
// Suggestions
|
|
458
|
+
// ============================================================================
|
|
459
|
+
/**
|
|
460
|
+
* Records a suggestion.
|
|
461
|
+
*/
|
|
462
|
+
recordSuggestion(suggestion) {
|
|
463
|
+
const id = nanoid();
|
|
464
|
+
const stmt = this.db.prepare(`
|
|
465
|
+
INSERT INTO suggestions (id, task_type, current_model, suggested_model, reason, confidence, expected_improvement, sample_count, accepted, created_at)
|
|
466
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))
|
|
467
|
+
`);
|
|
468
|
+
stmt.run(
|
|
469
|
+
id,
|
|
470
|
+
suggestion.taskType,
|
|
471
|
+
suggestion.currentModel,
|
|
472
|
+
suggestion.suggestedModel,
|
|
473
|
+
suggestion.reason,
|
|
474
|
+
suggestion.confidence,
|
|
475
|
+
suggestion.expectedImprovement,
|
|
476
|
+
suggestion.sampleCount,
|
|
477
|
+
suggestion.accepted ?? null
|
|
478
|
+
);
|
|
479
|
+
return id;
|
|
480
|
+
}
|
|
481
|
+
/**
|
|
482
|
+
* Gets a suggestion by ID.
|
|
483
|
+
*/
|
|
484
|
+
getSuggestion(id) {
|
|
485
|
+
const stmt = this.db.prepare(`
|
|
486
|
+
SELECT id, task_type as taskType, current_model as currentModel, suggested_model as suggestedModel, reason, confidence, expected_improvement as expectedImprovement, sample_count as sampleCount, accepted, created_at as createdAt, accepted_at as acceptedAt
|
|
487
|
+
FROM suggestions
|
|
488
|
+
WHERE id = ?
|
|
489
|
+
`);
|
|
490
|
+
const row = stmt.get(id);
|
|
491
|
+
if (!row) return null;
|
|
492
|
+
return {
|
|
493
|
+
...row,
|
|
494
|
+
accepted: row.accepted != null ? Boolean(row.accepted) : null
|
|
495
|
+
};
|
|
496
|
+
}
|
|
497
|
+
/**
|
|
498
|
+
* Gets pending (unaccepted) suggestions.
|
|
499
|
+
*/
|
|
500
|
+
getPendingSuggestions() {
|
|
501
|
+
const stmt = this.db.prepare(`
|
|
502
|
+
SELECT id, task_type as taskType, current_model as currentModel, suggested_model as suggestedModel, reason, confidence, expected_improvement as expectedImprovement, sample_count as sampleCount, accepted, created_at as createdAt, accepted_at as acceptedAt
|
|
503
|
+
FROM suggestions
|
|
504
|
+
WHERE accepted IS NULL
|
|
505
|
+
ORDER BY confidence DESC
|
|
506
|
+
`);
|
|
507
|
+
const rows = stmt.all();
|
|
508
|
+
return rows.map((row) => ({
|
|
509
|
+
...row,
|
|
510
|
+
accepted: row.accepted != null ? Boolean(row.accepted) : null
|
|
511
|
+
}));
|
|
512
|
+
}
|
|
513
|
+
/**
|
|
514
|
+
* Accepts a suggestion.
|
|
515
|
+
*/
|
|
516
|
+
acceptSuggestion(id) {
|
|
517
|
+
const suggestion = this.getSuggestion(id);
|
|
518
|
+
if (!suggestion) return false;
|
|
519
|
+
const updateStmt = this.db.prepare(`
|
|
520
|
+
UPDATE suggestions
|
|
521
|
+
SET accepted = 1, accepted_at = datetime('now')
|
|
522
|
+
WHERE id = ?
|
|
523
|
+
`);
|
|
524
|
+
updateStmt.run(id);
|
|
525
|
+
this.setRule(
|
|
526
|
+
suggestion.taskType,
|
|
527
|
+
suggestion.suggestedModel,
|
|
528
|
+
"learned",
|
|
529
|
+
suggestion.confidence,
|
|
530
|
+
suggestion.sampleCount
|
|
531
|
+
);
|
|
532
|
+
return true;
|
|
533
|
+
}
|
|
534
|
+
/**
|
|
535
|
+
* Rejects a suggestion.
|
|
536
|
+
*/
|
|
537
|
+
rejectSuggestion(id) {
|
|
538
|
+
const stmt = this.db.prepare(`
|
|
539
|
+
UPDATE suggestions
|
|
540
|
+
SET accepted = 0, accepted_at = datetime('now')
|
|
541
|
+
WHERE id = ?
|
|
542
|
+
`);
|
|
543
|
+
const result = stmt.run(id);
|
|
544
|
+
return result.changes > 0;
|
|
545
|
+
}
|
|
546
|
+
// ============================================================================
|
|
547
|
+
// Statistics
|
|
548
|
+
// ============================================================================
|
|
549
|
+
/**
|
|
550
|
+
* Gets aggregated statistics.
|
|
551
|
+
*/
|
|
552
|
+
getStats(options) {
|
|
553
|
+
const conditions = [];
|
|
554
|
+
const params = [];
|
|
555
|
+
if (options?.from) {
|
|
556
|
+
conditions.push("created_at >= ?");
|
|
557
|
+
params.push(options.from);
|
|
558
|
+
}
|
|
559
|
+
if (options?.to) {
|
|
560
|
+
conditions.push("created_at <= ?");
|
|
561
|
+
params.push(options.to);
|
|
562
|
+
}
|
|
563
|
+
const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
|
|
564
|
+
const overallStmt = this.db.prepare(`
|
|
565
|
+
SELECT
|
|
566
|
+
COUNT(*) as totalRuns,
|
|
567
|
+
SUM(success) as successfulRuns,
|
|
568
|
+
AVG(duration_ms) as avgDurationMs
|
|
569
|
+
FROM runs
|
|
570
|
+
${whereClause}
|
|
571
|
+
`);
|
|
572
|
+
const overall = overallStmt.get(...params);
|
|
573
|
+
const byTaskTypeStmt = this.db.prepare(`
|
|
574
|
+
SELECT
|
|
575
|
+
task_type as taskType,
|
|
576
|
+
COUNT(*) as runs,
|
|
577
|
+
AVG(success) as successRate,
|
|
578
|
+
AVG(duration_ms) as avgDurationMs
|
|
579
|
+
FROM runs
|
|
580
|
+
${whereClause}
|
|
581
|
+
GROUP BY task_type
|
|
582
|
+
`);
|
|
583
|
+
const byTaskTypeRows = byTaskTypeStmt.all(...params);
|
|
584
|
+
const byTaskType = {};
|
|
585
|
+
for (const row of byTaskTypeRows) {
|
|
586
|
+
byTaskType[row.taskType] = {
|
|
587
|
+
runs: row.runs,
|
|
588
|
+
successRate: row.successRate,
|
|
589
|
+
avgDurationMs: row.avgDurationMs
|
|
590
|
+
};
|
|
591
|
+
}
|
|
592
|
+
const byModelStmt = this.db.prepare(`
|
|
593
|
+
SELECT
|
|
594
|
+
model,
|
|
595
|
+
COUNT(*) as runs,
|
|
596
|
+
AVG(success) as successRate,
|
|
597
|
+
AVG(duration_ms) as avgDurationMs
|
|
598
|
+
FROM runs
|
|
599
|
+
${whereClause}
|
|
600
|
+
GROUP BY model
|
|
601
|
+
`);
|
|
602
|
+
const byModelRows = byModelStmt.all(...params);
|
|
603
|
+
const byModel = {};
|
|
604
|
+
for (const row of byModelRows) {
|
|
605
|
+
byModel[row.model] = {
|
|
606
|
+
runs: row.runs,
|
|
607
|
+
successRate: row.successRate,
|
|
608
|
+
avgDurationMs: row.avgDurationMs
|
|
609
|
+
};
|
|
610
|
+
}
|
|
611
|
+
return {
|
|
612
|
+
totalRuns: overall.totalRuns,
|
|
613
|
+
successfulRuns: overall.successfulRuns ?? 0,
|
|
614
|
+
avgDurationMs: overall.avgDurationMs ?? 0,
|
|
615
|
+
byTaskType,
|
|
616
|
+
byModel
|
|
617
|
+
};
|
|
618
|
+
}
|
|
619
|
+
/**
|
|
620
|
+
* Gets statistics for learning (outcomes joined with runs).
|
|
621
|
+
*/
|
|
622
|
+
getLearningStats(taskType) {
|
|
623
|
+
const stmt = this.db.prepare(`
|
|
624
|
+
SELECT
|
|
625
|
+
r.model,
|
|
626
|
+
COUNT(*) as runs,
|
|
627
|
+
AVG(r.success) as successRate,
|
|
628
|
+
AVG(r.duration_ms) as avgDurationMs,
|
|
629
|
+
AVG(CASE WHEN o.success IS NOT NULL THEN o.success ELSE r.success END) as outcomeSuccessRate
|
|
630
|
+
FROM runs r
|
|
631
|
+
LEFT JOIN outcomes o ON r.id = o.run_id
|
|
632
|
+
WHERE r.task_type = ?
|
|
633
|
+
GROUP BY r.model
|
|
634
|
+
HAVING runs >= 5
|
|
635
|
+
`);
|
|
636
|
+
return stmt.all(taskType);
|
|
637
|
+
}
|
|
638
|
+
};
|
|
639
|
+
|
|
640
|
+
// src/routing/inference.ts
|
|
641
|
+
var TASK_PATTERNS = {
|
|
642
|
+
code_generation: [
|
|
643
|
+
{ pattern: /\b(write|create|generate|implement|build|code|develop|make)\b.{0,50}\b(function|class|code|script|program|method|module|api|endpoint|component)\b/i, weight: 10 },
|
|
644
|
+
{ pattern: /\b(write|create|generate)\b.{0,30}\b(python|javascript|typescript|java|go|rust|c\+\+|ruby|php|swift)\b/i, weight: 10 },
|
|
645
|
+
{ pattern: /\bcreate a.{0,30}(that|which|to)\b/i, weight: 5 },
|
|
646
|
+
{ pattern: /\bimplement\b.{0,50}\b(algorithm|logic|feature)\b/i, weight: 8 },
|
|
647
|
+
{ pattern: /\bcode\s+for\b/i, weight: 7 },
|
|
648
|
+
{ pattern: /\bwrite me\b.{0,30}\b(code|script|function)\b/i, weight: 9 },
|
|
649
|
+
{ pattern: /```[\w]*\n/i, weight: 3 }
|
|
650
|
+
// Code blocks suggest code context
|
|
651
|
+
],
|
|
652
|
+
code_review: [
|
|
653
|
+
{ pattern: /\b(review|analyze|check|audit|inspect|evaluate|assess|critique)\b.{0,30}\b(code|function|class|script|implementation|pull request|pr|diff)\b/i, weight: 10 },
|
|
654
|
+
{ pattern: /\b(what'?s? wrong|find\s+(bugs?|issues?|problems?|errors?))\b.{0,30}\b(code|function|this)\b/i, weight: 9 },
|
|
655
|
+
{ pattern: /\b(improve|optimize|refactor)\b.{0,30}\b(code|function|this)\b/i, weight: 7 },
|
|
656
|
+
{ pattern: /\blook\s+(at|over)\s+(this|my)\s+code\b/i, weight: 8 },
|
|
657
|
+
{ pattern: /\bcode\s+review\b/i, weight: 10 },
|
|
658
|
+
{ pattern: /\bcan you (check|review)\b/i, weight: 5 }
|
|
659
|
+
],
|
|
660
|
+
summarization: [
|
|
661
|
+
{ pattern: /\b(summarize|summarise|summary|tldr|tl;dr|recap|condense|brief|overview)\b/i, weight: 10 },
|
|
662
|
+
{ pattern: /\b(give|provide|write)\s+(me\s+)?(a\s+)?(brief|short|quick|concise)\s+(summary|overview)\b/i, weight: 9 },
|
|
663
|
+
{ pattern: /\bshorten\s+(this|the)\b/i, weight: 6 },
|
|
664
|
+
{ pattern: /\bin\s+(brief|short|a nutshell)\b/i, weight: 7 },
|
|
665
|
+
{ pattern: /\bkey\s+(points?|takeaways?)\b/i, weight: 8 },
|
|
666
|
+
{ pattern: /\bmain\s+(ideas?|points?)\b/i, weight: 7 }
|
|
667
|
+
],
|
|
668
|
+
analysis: [
|
|
669
|
+
{ pattern: /\b(analyze|analyse|analysis|examine|investigate|assess|evaluate|study)\b/i, weight: 8 },
|
|
670
|
+
{ pattern: /\b(compare|contrast|differentiate|distinguish)\b.{0,30}\b(between|and)\b/i, weight: 9 },
|
|
671
|
+
{ pattern: /\b(pros?\s+and\s+cons?|advantages?\s+and\s+disadvantages?|strengths?\s+and\s+weaknesses?)\b/i, weight: 9 },
|
|
672
|
+
{ pattern: /\b(what\s+are|explain)\s+(the\s+)?(implications?|consequences?|effects?|impacts?)\b/i, weight: 8 },
|
|
673
|
+
{ pattern: /\bbreak\s*down\b/i, weight: 6 },
|
|
674
|
+
{ pattern: /\bdeep\s*dive\b/i, weight: 7 },
|
|
675
|
+
{ pattern: /\bcritical(ly)?\s+(analysis|evaluation|assessment)\b/i, weight: 9 }
|
|
676
|
+
],
|
|
677
|
+
creative_writing: [
|
|
678
|
+
{ pattern: /\b(write|create|compose|craft|author)\b.{0,30}\b(story|poem|essay|article|blog|post|narrative|fiction|novel|song|lyrics)\b/i, weight: 10 },
|
|
679
|
+
{ pattern: /\b(creative|imaginative|fictional)\s+(writing|story|piece)\b/i, weight: 10 },
|
|
680
|
+
{ pattern: /\bonce upon a time\b/i, weight: 8 },
|
|
681
|
+
{ pattern: /\b(write|tell)\s+(me\s+)?(a\s+)?(short\s+)?story\b/i, weight: 9 },
|
|
682
|
+
{ pattern: /\b(brainstorm|ideate)\b.{0,30}\b(ideas?|concepts?|themes?)\b/i, weight: 7 },
|
|
683
|
+
{ pattern: /\bwrite\s+(in|like)\s+(the\s+)?style\s+of\b/i, weight: 8 },
|
|
684
|
+
{ pattern: /\b(catchy|creative|engaging)\s+(title|headline|tagline|slogan)\b/i, weight: 7 }
|
|
685
|
+
],
|
|
686
|
+
data_extraction: [
|
|
687
|
+
{ pattern: /\b(extract|parse|pull|get|retrieve|find|identify)\b.{0,30}\b(data|information|details?|values?|fields?|entities?|names?|numbers?|dates?|emails?|phones?|addresses?)\b/i, weight: 10 },
|
|
688
|
+
{ pattern: /\b(convert|transform)\b.{0,30}\b(to|into)\s+(json|csv|xml|yaml|table|structured)\b/i, weight: 9 },
|
|
689
|
+
{ pattern: /\bstructured\s+(data|output|format)\b/i, weight: 8 },
|
|
690
|
+
{ pattern: /\bnamed\s+entity\s+(recognition|extraction)\b/i, weight: 10 },
|
|
691
|
+
{ pattern: /\b(scrape|crawl)\b/i, weight: 6 },
|
|
692
|
+
{ pattern: /\bjson\s+(output|format|schema)\b/i, weight: 7 }
|
|
693
|
+
],
|
|
694
|
+
translation: [
|
|
695
|
+
{ pattern: /\b(translate|translation|translator)\b/i, weight: 10 },
|
|
696
|
+
{ pattern: /\b(convert|change)\b.{0,20}\b(to|into)\s+(english|spanish|french|german|chinese|japanese|korean|portuguese|italian|russian|arabic|hindi|dutch)\b/i, weight: 9 },
|
|
697
|
+
{ pattern: /\b(in|to)\s+(english|spanish|french|german|chinese|japanese|korean|portuguese|italian|russian|arabic|hindi|dutch)\b/i, weight: 6 },
|
|
698
|
+
{ pattern: /\bfrom\s+(english|spanish|french|german|chinese|japanese|korean|portuguese|italian|russian|arabic|hindi|dutch)\s+to\b/i, weight: 10 },
|
|
699
|
+
{ pattern: /\blocalize|localization\b/i, weight: 7 }
|
|
700
|
+
],
|
|
701
|
+
question_answering: [
|
|
702
|
+
{ pattern: /^(what|who|where|when|why|how|which|is|are|does|do|can|could|would|should|will|did)\s/i, weight: 7 },
|
|
703
|
+
{ pattern: /\?$/i, weight: 5 },
|
|
704
|
+
{ pattern: /\b(explain|describe|define|what\s+is|what\s+are|tell\s+me\s+about)\b/i, weight: 8 },
|
|
705
|
+
{ pattern: /\b(answer|respond|reply)\b.{0,20}\b(question|query)\b/i, weight: 9 },
|
|
706
|
+
{ pattern: /\bfaq\b/i, weight: 8 },
|
|
707
|
+
{ pattern: /\bi\s+(want|need)\s+to\s+know\b/i, weight: 6 },
|
|
708
|
+
{ pattern: /\bcan\s+you\s+(tell|explain|help)\b/i, weight: 5 }
|
|
709
|
+
],
|
|
710
|
+
general: [
|
|
711
|
+
// Catch-all patterns with low weights
|
|
712
|
+
{ pattern: /./i, weight: 1 }
|
|
713
|
+
]
|
|
714
|
+
};
|
|
715
|
+
function inferTaskType(prompt) {
|
|
716
|
+
const normalizedPrompt = prompt.trim().toLowerCase();
|
|
717
|
+
const scores = {
|
|
718
|
+
code_generation: 0,
|
|
719
|
+
code_review: 0,
|
|
720
|
+
summarization: 0,
|
|
721
|
+
analysis: 0,
|
|
722
|
+
creative_writing: 0,
|
|
723
|
+
data_extraction: 0,
|
|
724
|
+
translation: 0,
|
|
725
|
+
question_answering: 0,
|
|
726
|
+
general: 0
|
|
727
|
+
};
|
|
728
|
+
for (const [taskType, patterns] of Object.entries(TASK_PATTERNS)) {
|
|
729
|
+
for (const { pattern, weight } of patterns) {
|
|
730
|
+
if (pattern.test(prompt)) {
|
|
731
|
+
scores[taskType] += weight;
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
let maxScore = 0;
|
|
736
|
+
let inferredType = "general";
|
|
737
|
+
for (const [taskType, score] of Object.entries(scores)) {
|
|
738
|
+
if (score > maxScore) {
|
|
739
|
+
maxScore = score;
|
|
740
|
+
inferredType = taskType;
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
if (maxScore <= 1) {
|
|
744
|
+
return "general";
|
|
745
|
+
}
|
|
746
|
+
return inferredType;
|
|
747
|
+
}
|
|
748
|
+
function getInferenceConfidence(prompt, taskType) {
|
|
749
|
+
const patterns = TASK_PATTERNS[taskType];
|
|
750
|
+
if (!patterns) return 0;
|
|
751
|
+
let totalWeight = 0;
|
|
752
|
+
let maxPossibleWeight = 0;
|
|
753
|
+
for (const { pattern, weight } of patterns) {
|
|
754
|
+
maxPossibleWeight += weight;
|
|
755
|
+
if (pattern.test(prompt)) {
|
|
756
|
+
totalWeight += weight;
|
|
757
|
+
}
|
|
758
|
+
}
|
|
759
|
+
if (maxPossibleWeight === 0) return 0;
|
|
760
|
+
return Math.min(totalWeight / maxPossibleWeight, 0.95);
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
// src/routing/engine.ts
|
|
764
|
+
var RoutingEngine = class {
|
|
765
|
+
store;
|
|
766
|
+
/**
|
|
767
|
+
* Creates a new RoutingEngine.
|
|
768
|
+
*
|
|
769
|
+
* @param store - The storage instance to use
|
|
770
|
+
*/
|
|
771
|
+
constructor(store) {
|
|
772
|
+
this.store = store;
|
|
773
|
+
}
|
|
774
|
+
/**
|
|
775
|
+
* Infers the task type from a prompt.
|
|
776
|
+
*
|
|
777
|
+
* @param prompt - The prompt to analyze
|
|
778
|
+
* @returns The inferred task type
|
|
779
|
+
*/
|
|
780
|
+
inferTaskType(prompt) {
|
|
781
|
+
return inferTaskType(prompt);
|
|
782
|
+
}
|
|
783
|
+
/**
|
|
784
|
+
* Gets the inference confidence for a task type.
|
|
785
|
+
*
|
|
786
|
+
* @param prompt - The prompt to analyze
|
|
787
|
+
* @param taskType - The task type to check
|
|
788
|
+
* @returns Confidence score (0-1)
|
|
789
|
+
*/
|
|
790
|
+
getInferenceConfidence(prompt, taskType) {
|
|
791
|
+
return getInferenceConfidence(prompt, taskType);
|
|
792
|
+
}
|
|
793
|
+
/**
|
|
794
|
+
* Gets the routing rule for a task type.
|
|
795
|
+
*
|
|
796
|
+
* @param taskType - The task type to get the rule for
|
|
797
|
+
* @returns The routing rule, or null if not found
|
|
798
|
+
*/
|
|
799
|
+
get(taskType) {
|
|
800
|
+
const record = this.store.getRule(taskType);
|
|
801
|
+
if (!record) return null;
|
|
802
|
+
return {
|
|
803
|
+
id: record.id,
|
|
804
|
+
taskType: record.taskType,
|
|
805
|
+
preferredModel: record.preferredModel,
|
|
806
|
+
source: record.source,
|
|
807
|
+
confidence: record.confidence ?? void 0,
|
|
808
|
+
sampleCount: record.sampleCount ?? void 0,
|
|
809
|
+
createdAt: record.createdAt,
|
|
810
|
+
updatedAt: record.updatedAt
|
|
811
|
+
};
|
|
812
|
+
}
|
|
813
|
+
/**
|
|
814
|
+
* Sets a routing rule for a task type.
|
|
815
|
+
*
|
|
816
|
+
* @param taskType - The task type to set the rule for
|
|
817
|
+
* @param preferredModel - The preferred model (format: "provider:model")
|
|
818
|
+
* @param source - How the rule was created
|
|
819
|
+
* @param options - Optional confidence and sample count
|
|
820
|
+
* @returns The rule ID
|
|
821
|
+
*/
|
|
822
|
+
set(taskType, preferredModel, source = "user", options) {
|
|
823
|
+
return this.store.setRule(
|
|
824
|
+
taskType,
|
|
825
|
+
preferredModel,
|
|
826
|
+
source,
|
|
827
|
+
options?.confidence,
|
|
828
|
+
options?.sampleCount
|
|
829
|
+
);
|
|
830
|
+
}
|
|
831
|
+
/**
|
|
832
|
+
* Lists all routing rules.
|
|
833
|
+
*
|
|
834
|
+
* @returns Array of all routing rules
|
|
835
|
+
*/
|
|
836
|
+
list() {
|
|
837
|
+
const records = this.store.listRules();
|
|
838
|
+
return records.map((record) => ({
|
|
839
|
+
id: record.id,
|
|
840
|
+
taskType: record.taskType,
|
|
841
|
+
preferredModel: record.preferredModel,
|
|
842
|
+
source: record.source,
|
|
843
|
+
confidence: record.confidence ?? void 0,
|
|
844
|
+
sampleCount: record.sampleCount ?? void 0,
|
|
845
|
+
createdAt: record.createdAt,
|
|
846
|
+
updatedAt: record.updatedAt
|
|
847
|
+
}));
|
|
848
|
+
}
|
|
849
|
+
/**
|
|
850
|
+
* Deletes a routing rule and resets to default.
|
|
851
|
+
*
|
|
852
|
+
* @param taskType - The task type to reset
|
|
853
|
+
* @returns True if the rule was reset
|
|
854
|
+
*/
|
|
855
|
+
delete(taskType) {
|
|
856
|
+
return this.store.deleteRule(taskType);
|
|
857
|
+
}
|
|
858
|
+
/**
|
|
859
|
+
* Gets the preferred model for a task type.
|
|
860
|
+
*
|
|
861
|
+
* @param taskType - The task type
|
|
862
|
+
* @returns The preferred model string, or a default
|
|
863
|
+
*/
|
|
864
|
+
getPreferredModel(taskType) {
|
|
865
|
+
const rule = this.get(taskType);
|
|
866
|
+
return rule?.preferredModel ?? "local:llama3.2";
|
|
867
|
+
}
|
|
868
|
+
/**
|
|
869
|
+
* Parses a model string into provider and model name.
|
|
870
|
+
*
|
|
871
|
+
* @param modelString - The model string (format: "provider:model")
|
|
872
|
+
* @returns Object with provider and model
|
|
873
|
+
*/
|
|
874
|
+
parseModel(modelString) {
|
|
875
|
+
const parts = modelString.split(":");
|
|
876
|
+
if (parts.length < 2) {
|
|
877
|
+
return { provider: "local", model: modelString };
|
|
878
|
+
}
|
|
879
|
+
return { provider: parts[0], model: parts.slice(1).join(":") };
|
|
880
|
+
}
|
|
881
|
+
/**
|
|
882
|
+
* Resolves the model to use for a prompt.
|
|
883
|
+
*
|
|
884
|
+
* @param prompt - The prompt to analyze
|
|
885
|
+
* @param overrideTaskType - Optional task type override
|
|
886
|
+
* @param overrideModel - Optional model override
|
|
887
|
+
* @returns Object with resolved taskType, model, provider, and confidence
|
|
888
|
+
*/
|
|
889
|
+
resolve(prompt, overrideTaskType, overrideModel) {
|
|
890
|
+
const taskType = overrideTaskType ?? this.inferTaskType(prompt);
|
|
891
|
+
const confidence = this.getInferenceConfidence(prompt, taskType);
|
|
892
|
+
let model;
|
|
893
|
+
if (overrideModel) {
|
|
894
|
+
model = overrideModel;
|
|
895
|
+
} else {
|
|
896
|
+
model = this.getPreferredModel(taskType);
|
|
897
|
+
}
|
|
898
|
+
const { provider, model: modelName } = this.parseModel(model);
|
|
899
|
+
return {
|
|
900
|
+
taskType,
|
|
901
|
+
model,
|
|
902
|
+
provider,
|
|
903
|
+
modelName,
|
|
904
|
+
confidence
|
|
905
|
+
};
|
|
906
|
+
}
|
|
907
|
+
};
|
|
908
|
+
|
|
909
|
+
// src/learning/outcomes.ts
|
|
910
|
+
var OutcomeRecorder = class {
|
|
911
|
+
store;
|
|
912
|
+
/**
|
|
913
|
+
* Creates a new OutcomeRecorder.
|
|
914
|
+
*
|
|
915
|
+
* @param store - The storage instance to use
|
|
916
|
+
*/
|
|
917
|
+
constructor(store) {
|
|
918
|
+
this.store = store;
|
|
919
|
+
}
|
|
920
|
+
/**
|
|
921
|
+
* Records an outcome for a run.
|
|
922
|
+
*
|
|
923
|
+
* @param input - The outcome input
|
|
924
|
+
* @returns The recorded outcome
|
|
925
|
+
* @throws If the run ID is not found
|
|
926
|
+
*/
|
|
927
|
+
record(input) {
|
|
928
|
+
const run = this.store.getRun(input.runId);
|
|
929
|
+
if (!run) {
|
|
930
|
+
throw new Error(`Run not found: ${input.runId}`);
|
|
931
|
+
}
|
|
932
|
+
const id = this.store.recordOutcome({
|
|
933
|
+
runId: input.runId,
|
|
934
|
+
success: input.success,
|
|
935
|
+
quality: input.quality ?? null,
|
|
936
|
+
latencySatisfactory: input.latencySatisfactory ?? null,
|
|
937
|
+
costSatisfactory: input.costSatisfactory ?? null,
|
|
938
|
+
feedback: input.feedback ?? null
|
|
939
|
+
});
|
|
940
|
+
const outcome = this.store.getOutcome(input.runId);
|
|
941
|
+
if (!outcome) {
|
|
942
|
+
throw new Error("Failed to record outcome");
|
|
943
|
+
}
|
|
944
|
+
return {
|
|
945
|
+
id: outcome.id,
|
|
946
|
+
runId: outcome.runId,
|
|
947
|
+
success: outcome.success,
|
|
948
|
+
quality: outcome.quality ?? void 0,
|
|
949
|
+
latencySatisfactory: outcome.latencySatisfactory ?? void 0,
|
|
950
|
+
costSatisfactory: outcome.costSatisfactory ?? void 0,
|
|
951
|
+
feedback: outcome.feedback ?? void 0,
|
|
952
|
+
recordedAt: outcome.createdAt
|
|
953
|
+
};
|
|
954
|
+
}
|
|
955
|
+
/**
|
|
956
|
+
* Gets an outcome for a run.
|
|
957
|
+
*
|
|
958
|
+
* @param runId - The run ID
|
|
959
|
+
* @returns The outcome, or null if not found
|
|
960
|
+
*/
|
|
961
|
+
get(runId) {
|
|
962
|
+
const outcome = this.store.getOutcome(runId);
|
|
963
|
+
if (!outcome) return null;
|
|
964
|
+
return {
|
|
965
|
+
id: outcome.id,
|
|
966
|
+
runId: outcome.runId,
|
|
967
|
+
success: outcome.success,
|
|
968
|
+
quality: outcome.quality ?? void 0,
|
|
969
|
+
latencySatisfactory: outcome.latencySatisfactory ?? void 0,
|
|
970
|
+
costSatisfactory: outcome.costSatisfactory ?? void 0,
|
|
971
|
+
feedback: outcome.feedback ?? void 0,
|
|
972
|
+
recordedAt: outcome.createdAt
|
|
973
|
+
};
|
|
974
|
+
}
|
|
975
|
+
/**
|
|
976
|
+
* Gets outcome statistics for a task type.
|
|
977
|
+
*
|
|
978
|
+
* @param taskType - The task type to get stats for
|
|
979
|
+
* @returns Outcome statistics
|
|
980
|
+
*/
|
|
981
|
+
getTaskStats(taskType) {
|
|
982
|
+
const outcomes = this.store.getOutcomes({ taskType, limit: 1e3 });
|
|
983
|
+
if (outcomes.length === 0) {
|
|
984
|
+
return {
|
|
985
|
+
totalOutcomes: 0,
|
|
986
|
+
successRate: 0,
|
|
987
|
+
qualityDistribution: {},
|
|
988
|
+
latencySatisfactionRate: 0,
|
|
989
|
+
costSatisfactionRate: 0
|
|
990
|
+
};
|
|
991
|
+
}
|
|
992
|
+
let successCount = 0;
|
|
993
|
+
let latencySatisfiedCount = 0;
|
|
994
|
+
let latencyRatedCount = 0;
|
|
995
|
+
let costSatisfiedCount = 0;
|
|
996
|
+
let costRatedCount = 0;
|
|
997
|
+
const qualityDistribution = {};
|
|
998
|
+
for (const outcome of outcomes) {
|
|
999
|
+
if (outcome.success) successCount++;
|
|
1000
|
+
if (outcome.quality) {
|
|
1001
|
+
qualityDistribution[outcome.quality] = (qualityDistribution[outcome.quality] ?? 0) + 1;
|
|
1002
|
+
}
|
|
1003
|
+
if (outcome.latencySatisfactory != null) {
|
|
1004
|
+
latencyRatedCount++;
|
|
1005
|
+
if (outcome.latencySatisfactory) latencySatisfiedCount++;
|
|
1006
|
+
}
|
|
1007
|
+
if (outcome.costSatisfactory != null) {
|
|
1008
|
+
costRatedCount++;
|
|
1009
|
+
if (outcome.costSatisfactory) costSatisfiedCount++;
|
|
1010
|
+
}
|
|
1011
|
+
}
|
|
1012
|
+
return {
|
|
1013
|
+
totalOutcomes: outcomes.length,
|
|
1014
|
+
successRate: successCount / outcomes.length,
|
|
1015
|
+
qualityDistribution,
|
|
1016
|
+
latencySatisfactionRate: latencyRatedCount > 0 ? latencySatisfiedCount / latencyRatedCount : 0,
|
|
1017
|
+
costSatisfactionRate: costRatedCount > 0 ? costSatisfiedCount / costRatedCount : 0
|
|
1018
|
+
};
|
|
1019
|
+
}
|
|
1020
|
+
};
|
|
1021
|
+
|
|
1022
|
+
// src/learning/savings.ts
|
|
1023
|
+
var MODEL_PRICING = {
|
|
1024
|
+
// Anthropic models
|
|
1025
|
+
"claude-3-5-haiku-latest": { input: 0.25, output: 1.25 },
|
|
1026
|
+
"claude-3-5-haiku-20241022": { input: 0.25, output: 1.25 },
|
|
1027
|
+
"claude-3-5-sonnet-latest": { input: 3, output: 15 },
|
|
1028
|
+
"claude-3-5-sonnet-20241022": { input: 3, output: 15 },
|
|
1029
|
+
"claude-sonnet-4-20250514": { input: 3, output: 15 },
|
|
1030
|
+
"claude-3-opus-latest": { input: 15, output: 75 },
|
|
1031
|
+
"claude-3-opus-20240229": { input: 15, output: 75 },
|
|
1032
|
+
"claude-opus-4-5-20250514": { input: 15, output: 75 },
|
|
1033
|
+
// OpenAI models
|
|
1034
|
+
"gpt-4o": { input: 2.5, output: 10 },
|
|
1035
|
+
"gpt-4o-mini": { input: 0.15, output: 0.6 },
|
|
1036
|
+
"gpt-4.1": { input: 2, output: 8 },
|
|
1037
|
+
"gpt-4-turbo": { input: 10, output: 30 },
|
|
1038
|
+
// Google models
|
|
1039
|
+
"gemini-1.5-flash": { input: 0.075, output: 0.3 },
|
|
1040
|
+
"gemini-1.5-pro": { input: 1.25, output: 5 },
|
|
1041
|
+
"gemini-2.0-flash": { input: 0.1, output: 0.4 },
|
|
1042
|
+
// xAI models
|
|
1043
|
+
"grok-2": { input: 2, output: 10 },
|
|
1044
|
+
"grok-2-latest": { input: 2, output: 10 },
|
|
1045
|
+
// Moonshot models
|
|
1046
|
+
"moonshot-v1-8k": { input: 0.1, output: 0.1 },
|
|
1047
|
+
"moonshot-v1-32k": { input: 0.2, output: 0.2 }
|
|
1048
|
+
};
|
|
1049
|
+
var BASELINE_MODEL = "claude-3-opus-latest";
|
|
1050
|
+
function calculateCost(model, tokensIn, tokensOut) {
|
|
1051
|
+
const modelName = model.includes(":") ? model.split(":")[1] : model;
|
|
1052
|
+
const pricing = MODEL_PRICING[modelName] ?? MODEL_PRICING[model] ?? { input: 1, output: 3 };
|
|
1053
|
+
const inputCost = tokensIn / 1e6 * pricing.input;
|
|
1054
|
+
const outputCost = tokensOut / 1e6 * pricing.output;
|
|
1055
|
+
return inputCost + outputCost;
|
|
1056
|
+
}
|
|
1057
|
+
function calculateSavings(store, days = 30) {
|
|
1058
|
+
const to = /* @__PURE__ */ new Date();
|
|
1059
|
+
const from = /* @__PURE__ */ new Date();
|
|
1060
|
+
from.setDate(from.getDate() - days);
|
|
1061
|
+
const fromStr = from.toISOString();
|
|
1062
|
+
const toStr = to.toISOString();
|
|
1063
|
+
const runs = store.getRuns({
|
|
1064
|
+
from: fromStr,
|
|
1065
|
+
to: toStr,
|
|
1066
|
+
limit: 1e5
|
|
1067
|
+
// Get all runs
|
|
1068
|
+
});
|
|
1069
|
+
const byModel = {};
|
|
1070
|
+
const byTaskType = {};
|
|
1071
|
+
let totalTokensIn = 0;
|
|
1072
|
+
let totalTokensOut = 0;
|
|
1073
|
+
let actualCost = 0;
|
|
1074
|
+
let baselineCost = 0;
|
|
1075
|
+
const baselinePricing = MODEL_PRICING[BASELINE_MODEL] ?? { input: 15, output: 75 };
|
|
1076
|
+
for (const run of runs) {
|
|
1077
|
+
const tokensIn = run.tokensIn ?? 0;
|
|
1078
|
+
const tokensOut = run.tokensOut ?? 0;
|
|
1079
|
+
const modelName = run.model.includes(":") ? run.model.split(":")[1] ?? run.model : run.model;
|
|
1080
|
+
const runCost = calculateCost(run.model, tokensIn, tokensOut);
|
|
1081
|
+
actualCost += runCost;
|
|
1082
|
+
const baselineRunCost = tokensIn / 1e6 * (baselinePricing?.input ?? 15) + tokensOut / 1e6 * (baselinePricing?.output ?? 75);
|
|
1083
|
+
baselineCost += baselineRunCost;
|
|
1084
|
+
totalTokensIn += tokensIn;
|
|
1085
|
+
totalTokensOut += tokensOut;
|
|
1086
|
+
if (!byModel[modelName]) {
|
|
1087
|
+
byModel[modelName] = {
|
|
1088
|
+
runs: 0,
|
|
1089
|
+
tokensIn: 0,
|
|
1090
|
+
tokensOut: 0,
|
|
1091
|
+
cost: 0,
|
|
1092
|
+
successRate: 0,
|
|
1093
|
+
avgLatencyMs: 0
|
|
1094
|
+
};
|
|
1095
|
+
}
|
|
1096
|
+
const modelStats = byModel[modelName];
|
|
1097
|
+
modelStats.runs++;
|
|
1098
|
+
modelStats.tokensIn += tokensIn;
|
|
1099
|
+
modelStats.tokensOut += tokensOut;
|
|
1100
|
+
modelStats.cost += runCost;
|
|
1101
|
+
modelStats.avgLatencyMs += run.durationMs;
|
|
1102
|
+
if (run.success) {
|
|
1103
|
+
modelStats.successRate++;
|
|
1104
|
+
}
|
|
1105
|
+
if (!byTaskType[run.taskType]) {
|
|
1106
|
+
byTaskType[run.taskType] = { runs: 0, cost: 0, totalCost: 0 };
|
|
1107
|
+
}
|
|
1108
|
+
const taskStats = byTaskType[run.taskType];
|
|
1109
|
+
taskStats.runs++;
|
|
1110
|
+
taskStats.totalCost += runCost;
|
|
1111
|
+
}
|
|
1112
|
+
for (const model of Object.keys(byModel)) {
|
|
1113
|
+
const stats = byModel[model];
|
|
1114
|
+
stats.successRate = stats.runs > 0 ? stats.successRate / stats.runs : 0;
|
|
1115
|
+
stats.avgLatencyMs = stats.runs > 0 ? stats.avgLatencyMs / stats.runs : 0;
|
|
1116
|
+
}
|
|
1117
|
+
const byTaskTypeFinal = {};
|
|
1118
|
+
for (const [taskType, stats] of Object.entries(byTaskType)) {
|
|
1119
|
+
byTaskTypeFinal[taskType] = {
|
|
1120
|
+
runs: stats.runs,
|
|
1121
|
+
cost: stats.totalCost,
|
|
1122
|
+
avgCostPerRun: stats.runs > 0 ? stats.totalCost / stats.runs : 0
|
|
1123
|
+
};
|
|
1124
|
+
}
|
|
1125
|
+
const savings = baselineCost - actualCost;
|
|
1126
|
+
const savingsPercent = baselineCost > 0 ? savings / baselineCost * 100 : 0;
|
|
1127
|
+
return {
|
|
1128
|
+
periodDays: days,
|
|
1129
|
+
period: {
|
|
1130
|
+
from: fromStr,
|
|
1131
|
+
to: toStr
|
|
1132
|
+
},
|
|
1133
|
+
totalRuns: runs.length,
|
|
1134
|
+
totalTokensIn,
|
|
1135
|
+
totalTokensOut,
|
|
1136
|
+
actualCost,
|
|
1137
|
+
baselineCost,
|
|
1138
|
+
savings: Math.max(0, savings),
|
|
1139
|
+
// Don't report negative savings
|
|
1140
|
+
savingsPercent: Math.max(0, savingsPercent),
|
|
1141
|
+
byModel,
|
|
1142
|
+
byTaskType: byTaskTypeFinal
|
|
1143
|
+
};
|
|
1144
|
+
}
|
|
1145
|
+
|
|
1146
|
+
// src/learning/patterns.ts
|
|
1147
|
+
var MIN_RUNS_FOR_SUGGESTION = 10;
|
|
1148
|
+
var MIN_CONFIDENCE_THRESHOLD = 0.6;
|
|
1149
|
+
var MIN_IMPROVEMENT_THRESHOLD = 0.1;
|
|
1150
|
+
var MIN_COST_IMPROVEMENT_THRESHOLD = 0.2;
|
|
1151
|
+
var PatternDetector = class {
|
|
1152
|
+
store;
|
|
1153
|
+
/**
|
|
1154
|
+
* Creates a new PatternDetector.
|
|
1155
|
+
*
|
|
1156
|
+
* @param store - The storage instance to use
|
|
1157
|
+
*/
|
|
1158
|
+
constructor(store) {
|
|
1159
|
+
this.store = store;
|
|
1160
|
+
}
|
|
1161
|
+
/**
|
|
1162
|
+
* Analyzes a task type and generates suggestions if appropriate.
|
|
1163
|
+
*
|
|
1164
|
+
* @param taskType - The task type to analyze
|
|
1165
|
+
* @returns Array of suggestions
|
|
1166
|
+
*/
|
|
1167
|
+
analyzeTaskType(taskType) {
|
|
1168
|
+
const stats = this.store.getLearningStats(taskType);
|
|
1169
|
+
if (stats.length < 2) {
|
|
1170
|
+
return [];
|
|
1171
|
+
}
|
|
1172
|
+
const currentRule = this.store.getRule(taskType);
|
|
1173
|
+
if (!currentRule) return [];
|
|
1174
|
+
const currentModel = currentRule.preferredModel;
|
|
1175
|
+
const currentStats = stats.find((s) => s.model === currentModel);
|
|
1176
|
+
if (!currentStats) return [];
|
|
1177
|
+
const currentModelName = currentModel.includes(":") ? currentModel.split(":")[1] : currentModel;
|
|
1178
|
+
const currentPricing = MODEL_PRICING[currentModelName];
|
|
1179
|
+
const suggestions = [];
|
|
1180
|
+
for (const modelStats of stats) {
|
|
1181
|
+
if (modelStats.model === currentModel) continue;
|
|
1182
|
+
if (modelStats.runs < MIN_RUNS_FOR_SUGGESTION) continue;
|
|
1183
|
+
const successImprovement = modelStats.outcomeSuccessRate - currentStats.outcomeSuccessRate;
|
|
1184
|
+
const latencyImprovement = (currentStats.avgDurationMs - modelStats.avgDurationMs) / currentStats.avgDurationMs;
|
|
1185
|
+
const suggestedModelName = modelStats.model.includes(":") ? modelStats.model.split(":")[1] : modelStats.model;
|
|
1186
|
+
const suggestedPricing = MODEL_PRICING[suggestedModelName];
|
|
1187
|
+
let costImprovement = 0;
|
|
1188
|
+
if (currentPricing && suggestedPricing) {
|
|
1189
|
+
const currentAvgCost = (currentPricing.input + currentPricing.output) / 2;
|
|
1190
|
+
const suggestedAvgCost = (suggestedPricing.input + suggestedPricing.output) / 2;
|
|
1191
|
+
costImprovement = (currentAvgCost - suggestedAvgCost) / currentAvgCost;
|
|
1192
|
+
}
|
|
1193
|
+
const isSignificantlyBetter = successImprovement > MIN_IMPROVEMENT_THRESHOLD || successImprovement >= 0 && latencyImprovement > MIN_IMPROVEMENT_THRESHOLD || successImprovement >= -0.05 && costImprovement > MIN_COST_IMPROVEMENT_THRESHOLD;
|
|
1194
|
+
if (!isSignificantlyBetter) continue;
|
|
1195
|
+
const sampleConfidence = Math.min(modelStats.runs / 50, 1);
|
|
1196
|
+
const improvementConfidence = Math.min(
|
|
1197
|
+
Math.abs(successImprovement) / 0.3 + Math.abs(latencyImprovement) / 0.5 + Math.abs(costImprovement) / 0.5,
|
|
1198
|
+
1
|
|
1199
|
+
);
|
|
1200
|
+
const confidence = (sampleConfidence + improvementConfidence) / 2;
|
|
1201
|
+
if (confidence < MIN_CONFIDENCE_THRESHOLD) continue;
|
|
1202
|
+
const reasons = [];
|
|
1203
|
+
if (successImprovement > 0) {
|
|
1204
|
+
reasons.push(`${(successImprovement * 100).toFixed(0)}% higher success rate`);
|
|
1205
|
+
}
|
|
1206
|
+
if (latencyImprovement > 0) {
|
|
1207
|
+
reasons.push(`${(latencyImprovement * 100).toFixed(0)}% faster`);
|
|
1208
|
+
}
|
|
1209
|
+
if (costImprovement > 0) {
|
|
1210
|
+
reasons.push(`${(costImprovement * 100).toFixed(0)}% cheaper`);
|
|
1211
|
+
}
|
|
1212
|
+
const suggestion = {
|
|
1213
|
+
id: nanoid(),
|
|
1214
|
+
taskType,
|
|
1215
|
+
currentModel,
|
|
1216
|
+
suggestedModel: modelStats.model,
|
|
1217
|
+
reason: reasons.join(", "),
|
|
1218
|
+
confidence,
|
|
1219
|
+
expectedImprovement: {
|
|
1220
|
+
successRate: successImprovement > 0 ? successImprovement : void 0,
|
|
1221
|
+
latency: latencyImprovement > 0 ? latencyImprovement : void 0,
|
|
1222
|
+
cost: costImprovement > 0 ? costImprovement : void 0
|
|
1223
|
+
},
|
|
1224
|
+
sampleCount: modelStats.runs,
|
|
1225
|
+
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
1226
|
+
};
|
|
1227
|
+
suggestions.push(suggestion);
|
|
1228
|
+
}
|
|
1229
|
+
suggestions.sort((a, b) => b.confidence - a.confidence);
|
|
1230
|
+
return suggestions;
|
|
1231
|
+
}
|
|
1232
|
+
/**
|
|
1233
|
+
* Analyzes all task types and generates suggestions.
|
|
1234
|
+
*
|
|
1235
|
+
* @returns Array of all suggestions across task types
|
|
1236
|
+
*/
|
|
1237
|
+
analyzeAll() {
|
|
1238
|
+
const taskTypes = [
|
|
1239
|
+
"code_generation",
|
|
1240
|
+
"code_review",
|
|
1241
|
+
"summarization",
|
|
1242
|
+
"analysis",
|
|
1243
|
+
"creative_writing",
|
|
1244
|
+
"data_extraction",
|
|
1245
|
+
"translation",
|
|
1246
|
+
"question_answering",
|
|
1247
|
+
"general"
|
|
1248
|
+
];
|
|
1249
|
+
const allSuggestions = [];
|
|
1250
|
+
for (const taskType of taskTypes) {
|
|
1251
|
+
const suggestions = this.analyzeTaskType(taskType);
|
|
1252
|
+
allSuggestions.push(...suggestions);
|
|
1253
|
+
}
|
|
1254
|
+
allSuggestions.sort((a, b) => b.confidence - a.confidence);
|
|
1255
|
+
return allSuggestions;
|
|
1256
|
+
}
|
|
1257
|
+
/**
|
|
1258
|
+
* Stores suggestions in the database.
|
|
1259
|
+
*
|
|
1260
|
+
* @param suggestions - The suggestions to store
|
|
1261
|
+
* @returns Array of suggestion IDs
|
|
1262
|
+
*/
|
|
1263
|
+
storeSuggestions(suggestions) {
|
|
1264
|
+
const ids = [];
|
|
1265
|
+
for (const suggestion of suggestions) {
|
|
1266
|
+
const id = this.store.recordSuggestion({
|
|
1267
|
+
taskType: suggestion.taskType,
|
|
1268
|
+
currentModel: suggestion.currentModel,
|
|
1269
|
+
suggestedModel: suggestion.suggestedModel,
|
|
1270
|
+
reason: suggestion.reason,
|
|
1271
|
+
confidence: suggestion.confidence,
|
|
1272
|
+
expectedImprovement: JSON.stringify(suggestion.expectedImprovement),
|
|
1273
|
+
sampleCount: suggestion.sampleCount,
|
|
1274
|
+
accepted: null
|
|
1275
|
+
});
|
|
1276
|
+
ids.push(id);
|
|
1277
|
+
}
|
|
1278
|
+
return ids;
|
|
1279
|
+
}
|
|
1280
|
+
/**
|
|
1281
|
+
* Generates and stores new suggestions, returning only new ones.
|
|
1282
|
+
*
|
|
1283
|
+
* @returns Array of new suggestions
|
|
1284
|
+
*/
|
|
1285
|
+
generateSuggestions() {
|
|
1286
|
+
const suggestions = this.analyzeAll();
|
|
1287
|
+
const pending = this.store.getPendingSuggestions();
|
|
1288
|
+
const existingKeys = new Set(
|
|
1289
|
+
pending.map((s) => `${s.taskType}:${s.suggestedModel}`)
|
|
1290
|
+
);
|
|
1291
|
+
const newSuggestions = suggestions.filter(
|
|
1292
|
+
(s) => !existingKeys.has(`${s.taskType}:${s.suggestedModel}`)
|
|
1293
|
+
);
|
|
1294
|
+
this.storeSuggestions(newSuggestions);
|
|
1295
|
+
return newSuggestions;
|
|
1296
|
+
}
|
|
1297
|
+
};
|
|
1298
|
+
|
|
1299
|
+
// src/relay.ts
|
|
1300
|
+
var RelayPlane = class {
|
|
1301
|
+
store;
|
|
1302
|
+
_routing;
|
|
1303
|
+
outcomeRecorder;
|
|
1304
|
+
patternDetector;
|
|
1305
|
+
config;
|
|
1306
|
+
adapterRegistry = null;
|
|
1307
|
+
/**
|
|
1308
|
+
* Creates a new RelayPlane instance.
|
|
1309
|
+
*
|
|
1310
|
+
* @param config - Configuration options
|
|
1311
|
+
*/
|
|
1312
|
+
constructor(config = {}) {
|
|
1313
|
+
this.config = {
|
|
1314
|
+
dbPath: config.dbPath ?? getDefaultDbPath(),
|
|
1315
|
+
defaultProvider: config.defaultProvider ?? "local",
|
|
1316
|
+
defaultModel: config.defaultModel ?? "llama3.2",
|
|
1317
|
+
providers: config.providers ?? {}
|
|
1318
|
+
};
|
|
1319
|
+
this.store = new Store(this.config.dbPath);
|
|
1320
|
+
this._routing = new RoutingEngine(this.store);
|
|
1321
|
+
this.outcomeRecorder = new OutcomeRecorder(this.store);
|
|
1322
|
+
this.patternDetector = new PatternDetector(this.store);
|
|
1323
|
+
}
|
|
1324
|
+
/**
|
|
1325
|
+
* Gets the routing engine for direct access.
|
|
1326
|
+
*/
|
|
1327
|
+
get routing() {
|
|
1328
|
+
return this._routing;
|
|
1329
|
+
}
|
|
1330
|
+
/**
|
|
1331
|
+
* Runs a prompt through the appropriate model.
|
|
1332
|
+
*
|
|
1333
|
+
* @param input - The run input
|
|
1334
|
+
* @returns The run result
|
|
1335
|
+
*/
|
|
1336
|
+
async run(input) {
|
|
1337
|
+
const startTime = Date.now();
|
|
1338
|
+
const resolved = this._routing.resolve(input.prompt, input.taskType, input.model);
|
|
1339
|
+
const adapter = await this.getAdapter(resolved.provider);
|
|
1340
|
+
if (!adapter) {
|
|
1341
|
+
const runId2 = this.store.recordRun({
|
|
1342
|
+
prompt: input.prompt,
|
|
1343
|
+
systemPrompt: input.systemPrompt ?? null,
|
|
1344
|
+
taskType: resolved.taskType,
|
|
1345
|
+
model: resolved.model,
|
|
1346
|
+
success: false,
|
|
1347
|
+
output: null,
|
|
1348
|
+
error: `No adapter configured for provider: ${resolved.provider}`,
|
|
1349
|
+
durationMs: Date.now() - startTime,
|
|
1350
|
+
tokensIn: null,
|
|
1351
|
+
tokensOut: null,
|
|
1352
|
+
costUsd: null,
|
|
1353
|
+
metadata: input.metadata ? JSON.stringify(input.metadata) : null
|
|
1354
|
+
});
|
|
1355
|
+
return {
|
|
1356
|
+
runId: runId2,
|
|
1357
|
+
success: false,
|
|
1358
|
+
error: `No adapter configured for provider: ${resolved.provider}`,
|
|
1359
|
+
taskType: resolved.taskType,
|
|
1360
|
+
model: resolved.model,
|
|
1361
|
+
durationMs: Date.now() - startTime,
|
|
1362
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
1363
|
+
};
|
|
1364
|
+
}
|
|
1365
|
+
const providerConfig = this.config.providers?.[resolved.provider];
|
|
1366
|
+
const apiKey = providerConfig?.apiKey ?? this.getEnvApiKey(resolved.provider);
|
|
1367
|
+
const fullInput = input.systemPrompt ? `${input.systemPrompt}
|
|
1368
|
+
|
|
1369
|
+
${input.prompt}` : input.prompt;
|
|
1370
|
+
const result = await adapter.execute({
|
|
1371
|
+
model: resolved.modelName,
|
|
1372
|
+
input: fullInput,
|
|
1373
|
+
apiKey: apiKey ?? "",
|
|
1374
|
+
baseUrl: providerConfig?.baseUrl
|
|
1375
|
+
});
|
|
1376
|
+
const durationMs = Date.now() - startTime;
|
|
1377
|
+
const tokensIn = result.tokensIn ?? 0;
|
|
1378
|
+
const tokensOut = result.tokensOut ?? 0;
|
|
1379
|
+
const costUsd = calculateCost(resolved.model, tokensIn, tokensOut);
|
|
1380
|
+
const runId = this.store.recordRun({
|
|
1381
|
+
prompt: input.prompt,
|
|
1382
|
+
systemPrompt: input.systemPrompt ?? null,
|
|
1383
|
+
taskType: resolved.taskType,
|
|
1384
|
+
model: resolved.model,
|
|
1385
|
+
success: result.success,
|
|
1386
|
+
output: result.output ?? null,
|
|
1387
|
+
error: result.error?.message ?? null,
|
|
1388
|
+
durationMs,
|
|
1389
|
+
tokensIn: result.tokensIn ?? null,
|
|
1390
|
+
tokensOut: result.tokensOut ?? null,
|
|
1391
|
+
costUsd: costUsd > 0 ? costUsd : null,
|
|
1392
|
+
metadata: input.metadata ? JSON.stringify(input.metadata) : null
|
|
1393
|
+
});
|
|
1394
|
+
return {
|
|
1395
|
+
runId,
|
|
1396
|
+
success: result.success,
|
|
1397
|
+
output: result.output,
|
|
1398
|
+
error: result.error?.message,
|
|
1399
|
+
taskType: resolved.taskType,
|
|
1400
|
+
model: resolved.model,
|
|
1401
|
+
durationMs,
|
|
1402
|
+
tokensIn: result.tokensIn,
|
|
1403
|
+
tokensOut: result.tokensOut,
|
|
1404
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
1405
|
+
};
|
|
1406
|
+
}
|
|
1407
|
+
/**
|
|
1408
|
+
* Gets an adapter for a provider.
|
|
1409
|
+
* Note: In the standalone proxy package, adapters are not used.
|
|
1410
|
+
* The proxy handles provider calls directly via HTTP.
|
|
1411
|
+
*/
|
|
1412
|
+
async getAdapter(_provider) {
|
|
1413
|
+
return null;
|
|
1414
|
+
}
|
|
1415
|
+
/**
|
|
1416
|
+
* Gets an API key from environment variables.
|
|
1417
|
+
*/
|
|
1418
|
+
getEnvApiKey(provider) {
|
|
1419
|
+
const envVars = {
|
|
1420
|
+
openai: "OPENAI_API_KEY",
|
|
1421
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
1422
|
+
google: "GOOGLE_API_KEY",
|
|
1423
|
+
xai: "XAI_API_KEY",
|
|
1424
|
+
moonshot: "MOONSHOT_API_KEY",
|
|
1425
|
+
local: ""
|
|
1426
|
+
};
|
|
1427
|
+
const envVar = envVars[provider];
|
|
1428
|
+
return envVar ? process.env[envVar] : void 0;
|
|
1429
|
+
}
|
|
1430
|
+
/**
|
|
1431
|
+
* Records an outcome for a run.
|
|
1432
|
+
*
|
|
1433
|
+
* @param runId - The run ID
|
|
1434
|
+
* @param outcome - The outcome details
|
|
1435
|
+
* @returns The recorded outcome
|
|
1436
|
+
*/
|
|
1437
|
+
recordOutcome(runId, outcome) {
|
|
1438
|
+
return this.outcomeRecorder.record({
|
|
1439
|
+
runId,
|
|
1440
|
+
...outcome
|
|
1441
|
+
});
|
|
1442
|
+
}
|
|
1443
|
+
/**
|
|
1444
|
+
* Gets an outcome for a run.
|
|
1445
|
+
*
|
|
1446
|
+
* @param runId - The run ID
|
|
1447
|
+
* @returns The outcome, or null if not found
|
|
1448
|
+
*/
|
|
1449
|
+
getOutcome(runId) {
|
|
1450
|
+
return this.outcomeRecorder.get(runId);
|
|
1451
|
+
}
|
|
1452
|
+
/**
|
|
1453
|
+
* Gets statistics for runs.
|
|
1454
|
+
*
|
|
1455
|
+
* @param options - Optional filters
|
|
1456
|
+
* @returns Statistics object
|
|
1457
|
+
*/
|
|
1458
|
+
stats(options) {
|
|
1459
|
+
const raw = this.store.getStats(options);
|
|
1460
|
+
const byTaskType = {};
|
|
1461
|
+
const taskTypes = [
|
|
1462
|
+
"code_generation",
|
|
1463
|
+
"code_review",
|
|
1464
|
+
"summarization",
|
|
1465
|
+
"analysis",
|
|
1466
|
+
"creative_writing",
|
|
1467
|
+
"data_extraction",
|
|
1468
|
+
"translation",
|
|
1469
|
+
"question_answering",
|
|
1470
|
+
"general"
|
|
1471
|
+
];
|
|
1472
|
+
for (const taskType of taskTypes) {
|
|
1473
|
+
const taskStats = raw.byTaskType[taskType];
|
|
1474
|
+
byTaskType[taskType] = {
|
|
1475
|
+
taskType,
|
|
1476
|
+
totalRuns: taskStats?.runs ?? 0,
|
|
1477
|
+
successfulRuns: Math.round((taskStats?.runs ?? 0) * (taskStats?.successRate ?? 0)),
|
|
1478
|
+
successRate: taskStats?.successRate ?? 0,
|
|
1479
|
+
avgDurationMs: taskStats?.avgDurationMs ?? 0,
|
|
1480
|
+
byModel: {}
|
|
1481
|
+
};
|
|
1482
|
+
}
|
|
1483
|
+
for (const [model, modelStats] of Object.entries(raw.byModel)) {
|
|
1484
|
+
for (const taskType of taskTypes) {
|
|
1485
|
+
if (!byTaskType[taskType].byModel[model]) {
|
|
1486
|
+
byTaskType[taskType].byModel[model] = {
|
|
1487
|
+
runs: 0,
|
|
1488
|
+
successRate: 0,
|
|
1489
|
+
avgDurationMs: 0
|
|
1490
|
+
};
|
|
1491
|
+
}
|
|
1492
|
+
}
|
|
1493
|
+
}
|
|
1494
|
+
return {
|
|
1495
|
+
totalRuns: raw.totalRuns,
|
|
1496
|
+
overallSuccessRate: raw.totalRuns > 0 ? raw.successfulRuns / raw.totalRuns : 0,
|
|
1497
|
+
byTaskType,
|
|
1498
|
+
period: {
|
|
1499
|
+
from: options?.from ?? "",
|
|
1500
|
+
to: options?.to ?? (/* @__PURE__ */ new Date()).toISOString()
|
|
1501
|
+
}
|
|
1502
|
+
};
|
|
1503
|
+
}
|
|
1504
|
+
/**
|
|
1505
|
+
* Gets a savings report.
|
|
1506
|
+
*
|
|
1507
|
+
* @param days - Number of days to include (default: 30)
|
|
1508
|
+
* @returns Savings report
|
|
1509
|
+
*/
|
|
1510
|
+
savingsReport(days = 30) {
|
|
1511
|
+
return calculateSavings(this.store, days);
|
|
1512
|
+
}
|
|
1513
|
+
/**
|
|
1514
|
+
* Gets routing improvement suggestions.
|
|
1515
|
+
*
|
|
1516
|
+
* @returns Array of suggestions
|
|
1517
|
+
*/
|
|
1518
|
+
getSuggestions() {
|
|
1519
|
+
const pending = this.store.getPendingSuggestions();
|
|
1520
|
+
return pending.map((record) => ({
|
|
1521
|
+
id: record.id,
|
|
1522
|
+
taskType: record.taskType,
|
|
1523
|
+
currentModel: record.currentModel,
|
|
1524
|
+
suggestedModel: record.suggestedModel,
|
|
1525
|
+
reason: record.reason,
|
|
1526
|
+
confidence: record.confidence,
|
|
1527
|
+
expectedImprovement: JSON.parse(record.expectedImprovement),
|
|
1528
|
+
sampleCount: record.sampleCount,
|
|
1529
|
+
createdAt: record.createdAt,
|
|
1530
|
+
accepted: record.accepted ?? void 0,
|
|
1531
|
+
acceptedAt: record.acceptedAt ?? void 0
|
|
1532
|
+
}));
|
|
1533
|
+
}
|
|
1534
|
+
/**
|
|
1535
|
+
* Generates new suggestions based on current data.
|
|
1536
|
+
*
|
|
1537
|
+
* @returns Array of newly generated suggestions
|
|
1538
|
+
*/
|
|
1539
|
+
generateSuggestions() {
|
|
1540
|
+
return this.patternDetector.generateSuggestions();
|
|
1541
|
+
}
|
|
1542
|
+
/**
|
|
1543
|
+
* Accepts a suggestion and updates routing.
|
|
1544
|
+
*
|
|
1545
|
+
* @param suggestionId - The suggestion ID to accept
|
|
1546
|
+
* @returns True if successful
|
|
1547
|
+
*/
|
|
1548
|
+
acceptSuggestion(suggestionId) {
|
|
1549
|
+
return this.store.acceptSuggestion(suggestionId);
|
|
1550
|
+
}
|
|
1551
|
+
/**
|
|
1552
|
+
* Rejects a suggestion.
|
|
1553
|
+
*
|
|
1554
|
+
* @param suggestionId - The suggestion ID to reject
|
|
1555
|
+
* @returns True if successful
|
|
1556
|
+
*/
|
|
1557
|
+
rejectSuggestion(suggestionId) {
|
|
1558
|
+
return this.store.rejectSuggestion(suggestionId);
|
|
1559
|
+
}
|
|
1560
|
+
/**
|
|
1561
|
+
* Closes the RelayPlane instance and releases resources.
|
|
1562
|
+
*/
|
|
1563
|
+
close() {
|
|
1564
|
+
this.store.close();
|
|
1565
|
+
}
|
|
1566
|
+
};
|
|
1567
|
+
|
|
1568
|
+
// src/proxy.ts
|
|
1569
|
+
var DEFAULT_ENDPOINTS = {
|
|
1570
|
+
anthropic: {
|
|
1571
|
+
baseUrl: "https://api.anthropic.com/v1",
|
|
1572
|
+
apiKeyEnv: "ANTHROPIC_API_KEY"
|
|
1573
|
+
},
|
|
1574
|
+
openai: {
|
|
1575
|
+
baseUrl: "https://api.openai.com/v1",
|
|
1576
|
+
apiKeyEnv: "OPENAI_API_KEY"
|
|
1577
|
+
},
|
|
1578
|
+
google: {
|
|
1579
|
+
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
|
|
1580
|
+
apiKeyEnv: "GEMINI_API_KEY"
|
|
1581
|
+
},
|
|
1582
|
+
xai: {
|
|
1583
|
+
baseUrl: "https://api.x.ai/v1",
|
|
1584
|
+
apiKeyEnv: "XAI_API_KEY"
|
|
1585
|
+
},
|
|
1586
|
+
moonshot: {
|
|
1587
|
+
baseUrl: "https://api.moonshot.cn/v1",
|
|
1588
|
+
apiKeyEnv: "MOONSHOT_API_KEY"
|
|
1589
|
+
}
|
|
1590
|
+
};
|
|
1591
|
+
var MODEL_MAPPING = {
|
|
1592
|
+
// Anthropic models (using correct API model IDs)
|
|
1593
|
+
"claude-opus-4-5": { provider: "anthropic", model: "claude-opus-4-5-20250514" },
|
|
1594
|
+
"claude-sonnet-4": { provider: "anthropic", model: "claude-sonnet-4-20250514" },
|
|
1595
|
+
"claude-3-5-sonnet": { provider: "anthropic", model: "claude-3-5-sonnet-20241022" },
|
|
1596
|
+
"claude-3-5-haiku": { provider: "anthropic", model: "claude-3-5-haiku-20241022" },
|
|
1597
|
+
haiku: { provider: "anthropic", model: "claude-3-5-haiku-20241022" },
|
|
1598
|
+
sonnet: { provider: "anthropic", model: "claude-3-5-sonnet-20241022" },
|
|
1599
|
+
opus: { provider: "anthropic", model: "claude-3-opus-20240229" },
|
|
1600
|
+
// OpenAI models
|
|
1601
|
+
"gpt-4o": { provider: "openai", model: "gpt-4o" },
|
|
1602
|
+
"gpt-4o-mini": { provider: "openai", model: "gpt-4o-mini" },
|
|
1603
|
+
"gpt-4.1": { provider: "openai", model: "gpt-4.1" }
|
|
1604
|
+
};
|
|
1605
|
+
var DEFAULT_ROUTING = {
|
|
1606
|
+
code_generation: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1607
|
+
code_review: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1608
|
+
summarization: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1609
|
+
analysis: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1610
|
+
creative_writing: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1611
|
+
data_extraction: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1612
|
+
translation: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1613
|
+
question_answering: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1614
|
+
general: { provider: "anthropic", model: "claude-3-5-haiku-latest" }
|
|
1615
|
+
};
|
|
1616
|
+
function extractPromptText(messages) {
|
|
1617
|
+
return messages.map((msg) => {
|
|
1618
|
+
if (typeof msg.content === "string") return msg.content;
|
|
1619
|
+
if (Array.isArray(msg.content)) {
|
|
1620
|
+
return msg.content.map((c) => {
|
|
1621
|
+
const part = c;
|
|
1622
|
+
return part.type === "text" ? part.text ?? "" : "";
|
|
1623
|
+
}).join(" ");
|
|
1624
|
+
}
|
|
1625
|
+
return "";
|
|
1626
|
+
}).join("\n");
|
|
1627
|
+
}
|
|
1628
|
+
async function forwardToAnthropic(request, targetModel, apiKey, betaHeaders) {
|
|
1629
|
+
const anthropicBody = buildAnthropicBody(request, targetModel, false);
|
|
1630
|
+
const headers = {
|
|
1631
|
+
"Content-Type": "application/json",
|
|
1632
|
+
"x-api-key": apiKey,
|
|
1633
|
+
"anthropic-version": "2023-06-01"
|
|
1634
|
+
};
|
|
1635
|
+
if (betaHeaders) {
|
|
1636
|
+
headers["anthropic-beta"] = betaHeaders;
|
|
1637
|
+
}
|
|
1638
|
+
const response = await fetch("https://api.anthropic.com/v1/messages", {
|
|
1639
|
+
method: "POST",
|
|
1640
|
+
headers,
|
|
1641
|
+
body: JSON.stringify(anthropicBody)
|
|
1642
|
+
});
|
|
1643
|
+
return response;
|
|
1644
|
+
}
|
|
1645
|
+
async function forwardToAnthropicStream(request, targetModel, apiKey, betaHeaders) {
|
|
1646
|
+
const anthropicBody = buildAnthropicBody(request, targetModel, true);
|
|
1647
|
+
const headers = {
|
|
1648
|
+
"Content-Type": "application/json",
|
|
1649
|
+
"x-api-key": apiKey,
|
|
1650
|
+
"anthropic-version": "2023-06-01"
|
|
1651
|
+
};
|
|
1652
|
+
if (betaHeaders) {
|
|
1653
|
+
headers["anthropic-beta"] = betaHeaders;
|
|
1654
|
+
}
|
|
1655
|
+
const response = await fetch("https://api.anthropic.com/v1/messages", {
|
|
1656
|
+
method: "POST",
|
|
1657
|
+
headers,
|
|
1658
|
+
body: JSON.stringify(anthropicBody)
|
|
1659
|
+
});
|
|
1660
|
+
return response;
|
|
1661
|
+
}
|
|
1662
|
+
function convertMessagesToAnthropic(messages) {
|
|
1663
|
+
const result = [];
|
|
1664
|
+
for (const msg of messages) {
|
|
1665
|
+
const m = msg;
|
|
1666
|
+
if (m.role === "system") continue;
|
|
1667
|
+
if (m.role === "tool") {
|
|
1668
|
+
result.push({
|
|
1669
|
+
role: "user",
|
|
1670
|
+
content: [
|
|
1671
|
+
{
|
|
1672
|
+
type: "tool_result",
|
|
1673
|
+
tool_use_id: m.tool_call_id,
|
|
1674
|
+
content: typeof m.content === "string" ? m.content : JSON.stringify(m.content)
|
|
1675
|
+
}
|
|
1676
|
+
]
|
|
1677
|
+
});
|
|
1678
|
+
continue;
|
|
1679
|
+
}
|
|
1680
|
+
if (m.role === "assistant" && m.tool_calls && m.tool_calls.length > 0) {
|
|
1681
|
+
const content = [];
|
|
1682
|
+
if (m.content && typeof m.content === "string") {
|
|
1683
|
+
content.push({ type: "text", text: m.content });
|
|
1684
|
+
}
|
|
1685
|
+
for (const tc of m.tool_calls) {
|
|
1686
|
+
content.push({
|
|
1687
|
+
type: "tool_use",
|
|
1688
|
+
id: tc.id,
|
|
1689
|
+
name: tc.function.name,
|
|
1690
|
+
input: JSON.parse(tc.function.arguments || "{}")
|
|
1691
|
+
});
|
|
1692
|
+
}
|
|
1693
|
+
result.push({ role: "assistant", content });
|
|
1694
|
+
continue;
|
|
1695
|
+
}
|
|
1696
|
+
result.push({
|
|
1697
|
+
role: m.role === "assistant" ? "assistant" : "user",
|
|
1698
|
+
content: m.content
|
|
1699
|
+
});
|
|
1700
|
+
}
|
|
1701
|
+
return result;
|
|
1702
|
+
}
|
|
1703
|
+
function buildAnthropicBody(request, targetModel, stream) {
|
|
1704
|
+
const anthropicMessages = convertMessagesToAnthropic(request.messages);
|
|
1705
|
+
const systemMessage = request.messages.find((m) => m.role === "system");
|
|
1706
|
+
const anthropicBody = {
|
|
1707
|
+
model: targetModel,
|
|
1708
|
+
messages: anthropicMessages,
|
|
1709
|
+
max_tokens: request.max_tokens ?? 4096,
|
|
1710
|
+
stream
|
|
1711
|
+
};
|
|
1712
|
+
if (systemMessage) {
|
|
1713
|
+
anthropicBody["system"] = systemMessage.content;
|
|
1714
|
+
}
|
|
1715
|
+
if (request.temperature !== void 0) {
|
|
1716
|
+
anthropicBody["temperature"] = request.temperature;
|
|
1717
|
+
}
|
|
1718
|
+
if (request.tools && Array.isArray(request.tools)) {
|
|
1719
|
+
anthropicBody["tools"] = convertToolsToAnthropic(request.tools);
|
|
1720
|
+
}
|
|
1721
|
+
if (request.tool_choice) {
|
|
1722
|
+
anthropicBody["tool_choice"] = convertToolChoiceToAnthropic(request.tool_choice);
|
|
1723
|
+
}
|
|
1724
|
+
return anthropicBody;
|
|
1725
|
+
}
|
|
1726
|
+
function convertToolsToAnthropic(tools) {
|
|
1727
|
+
return tools.map((tool) => {
|
|
1728
|
+
const t = tool;
|
|
1729
|
+
if (t.type === "function" && t.function) {
|
|
1730
|
+
return {
|
|
1731
|
+
name: t.function.name,
|
|
1732
|
+
description: t.function.description,
|
|
1733
|
+
input_schema: t.function.parameters || { type: "object", properties: {} }
|
|
1734
|
+
};
|
|
1735
|
+
}
|
|
1736
|
+
return tool;
|
|
1737
|
+
});
|
|
1738
|
+
}
|
|
1739
|
+
function convertToolChoiceToAnthropic(toolChoice) {
|
|
1740
|
+
if (toolChoice === "auto") return { type: "auto" };
|
|
1741
|
+
if (toolChoice === "none") return { type: "none" };
|
|
1742
|
+
if (toolChoice === "required") return { type: "any" };
|
|
1743
|
+
const tc = toolChoice;
|
|
1744
|
+
if (tc.type === "function" && tc.function?.name) {
|
|
1745
|
+
return { type: "tool", name: tc.function.name };
|
|
1746
|
+
}
|
|
1747
|
+
return toolChoice;
|
|
1748
|
+
}
|
|
1749
|
+
async function forwardToOpenAI(request, targetModel, apiKey) {
|
|
1750
|
+
const openaiBody = {
|
|
1751
|
+
...request,
|
|
1752
|
+
model: targetModel,
|
|
1753
|
+
stream: false
|
|
1754
|
+
};
|
|
1755
|
+
const response = await fetch("https://api.openai.com/v1/chat/completions", {
|
|
1756
|
+
method: "POST",
|
|
1757
|
+
headers: {
|
|
1758
|
+
"Content-Type": "application/json",
|
|
1759
|
+
Authorization: `Bearer ${apiKey}`
|
|
1760
|
+
},
|
|
1761
|
+
body: JSON.stringify(openaiBody)
|
|
1762
|
+
});
|
|
1763
|
+
return response;
|
|
1764
|
+
}
|
|
1765
|
+
async function forwardToOpenAIStream(request, targetModel, apiKey) {
|
|
1766
|
+
const openaiBody = {
|
|
1767
|
+
...request,
|
|
1768
|
+
model: targetModel,
|
|
1769
|
+
stream: true
|
|
1770
|
+
};
|
|
1771
|
+
const response = await fetch("https://api.openai.com/v1/chat/completions", {
|
|
1772
|
+
method: "POST",
|
|
1773
|
+
headers: {
|
|
1774
|
+
"Content-Type": "application/json",
|
|
1775
|
+
Authorization: `Bearer ${apiKey}`
|
|
1776
|
+
},
|
|
1777
|
+
body: JSON.stringify(openaiBody)
|
|
1778
|
+
});
|
|
1779
|
+
return response;
|
|
1780
|
+
}
|
|
1781
|
+
async function forwardToXAI(request, targetModel, apiKey) {
|
|
1782
|
+
const xaiBody = {
|
|
1783
|
+
...request,
|
|
1784
|
+
model: targetModel,
|
|
1785
|
+
stream: false
|
|
1786
|
+
};
|
|
1787
|
+
const response = await fetch("https://api.x.ai/v1/chat/completions", {
|
|
1788
|
+
method: "POST",
|
|
1789
|
+
headers: {
|
|
1790
|
+
"Content-Type": "application/json",
|
|
1791
|
+
Authorization: `Bearer ${apiKey}`
|
|
1792
|
+
},
|
|
1793
|
+
body: JSON.stringify(xaiBody)
|
|
1794
|
+
});
|
|
1795
|
+
return response;
|
|
1796
|
+
}
|
|
1797
|
+
async function forwardToXAIStream(request, targetModel, apiKey) {
|
|
1798
|
+
const xaiBody = {
|
|
1799
|
+
...request,
|
|
1800
|
+
model: targetModel,
|
|
1801
|
+
stream: true
|
|
1802
|
+
};
|
|
1803
|
+
const response = await fetch("https://api.x.ai/v1/chat/completions", {
|
|
1804
|
+
method: "POST",
|
|
1805
|
+
headers: {
|
|
1806
|
+
"Content-Type": "application/json",
|
|
1807
|
+
Authorization: `Bearer ${apiKey}`
|
|
1808
|
+
},
|
|
1809
|
+
body: JSON.stringify(xaiBody)
|
|
1810
|
+
});
|
|
1811
|
+
return response;
|
|
1812
|
+
}
|
|
1813
|
+
async function forwardToMoonshot(request, targetModel, apiKey) {
|
|
1814
|
+
const moonshotBody = {
|
|
1815
|
+
...request,
|
|
1816
|
+
model: targetModel,
|
|
1817
|
+
stream: false
|
|
1818
|
+
};
|
|
1819
|
+
const response = await fetch("https://api.moonshot.cn/v1/chat/completions", {
|
|
1820
|
+
method: "POST",
|
|
1821
|
+
headers: {
|
|
1822
|
+
"Content-Type": "application/json",
|
|
1823
|
+
Authorization: `Bearer ${apiKey}`
|
|
1824
|
+
},
|
|
1825
|
+
body: JSON.stringify(moonshotBody)
|
|
1826
|
+
});
|
|
1827
|
+
return response;
|
|
1828
|
+
}
|
|
1829
|
+
async function forwardToMoonshotStream(request, targetModel, apiKey) {
|
|
1830
|
+
const moonshotBody = {
|
|
1831
|
+
...request,
|
|
1832
|
+
model: targetModel,
|
|
1833
|
+
stream: true
|
|
1834
|
+
};
|
|
1835
|
+
const response = await fetch("https://api.moonshot.cn/v1/chat/completions", {
|
|
1836
|
+
method: "POST",
|
|
1837
|
+
headers: {
|
|
1838
|
+
"Content-Type": "application/json",
|
|
1839
|
+
Authorization: `Bearer ${apiKey}`
|
|
1840
|
+
},
|
|
1841
|
+
body: JSON.stringify(moonshotBody)
|
|
1842
|
+
});
|
|
1843
|
+
return response;
|
|
1844
|
+
}
|
|
1845
|
+
function convertMessagesToGemini(messages) {
|
|
1846
|
+
const geminiContents = [];
|
|
1847
|
+
for (const msg of messages) {
|
|
1848
|
+
if (msg.role === "system") continue;
|
|
1849
|
+
const role = msg.role === "assistant" ? "model" : "user";
|
|
1850
|
+
if (typeof msg.content === "string") {
|
|
1851
|
+
geminiContents.push({
|
|
1852
|
+
role,
|
|
1853
|
+
parts: [{ text: msg.content }]
|
|
1854
|
+
});
|
|
1855
|
+
} else if (Array.isArray(msg.content)) {
|
|
1856
|
+
const parts = msg.content.map((part) => {
|
|
1857
|
+
const p = part;
|
|
1858
|
+
if (p.type === "text") {
|
|
1859
|
+
return { text: p.text };
|
|
1860
|
+
}
|
|
1861
|
+
if (p.type === "image_url" && p.image_url?.url) {
|
|
1862
|
+
const url = p.image_url.url;
|
|
1863
|
+
if (url.startsWith("data:")) {
|
|
1864
|
+
const match = url.match(/^data:([^;]+);base64,(.+)$/);
|
|
1865
|
+
if (match) {
|
|
1866
|
+
return {
|
|
1867
|
+
inline_data: {
|
|
1868
|
+
mime_type: match[1],
|
|
1869
|
+
data: match[2]
|
|
1870
|
+
}
|
|
1871
|
+
};
|
|
1872
|
+
}
|
|
1873
|
+
}
|
|
1874
|
+
return { text: `[Image: ${url}]` };
|
|
1875
|
+
}
|
|
1876
|
+
return { text: "" };
|
|
1877
|
+
});
|
|
1878
|
+
geminiContents.push({ role, parts });
|
|
1879
|
+
}
|
|
1880
|
+
}
|
|
1881
|
+
return geminiContents;
|
|
1882
|
+
}
|
|
1883
|
+
async function forwardToGemini(request, targetModel, apiKey) {
|
|
1884
|
+
const systemMessage = request.messages.find((m) => m.role === "system");
|
|
1885
|
+
const geminiContents = convertMessagesToGemini(request.messages);
|
|
1886
|
+
const geminiBody = {
|
|
1887
|
+
contents: geminiContents,
|
|
1888
|
+
generationConfig: {
|
|
1889
|
+
maxOutputTokens: request.max_tokens ?? 4096
|
|
1890
|
+
}
|
|
1891
|
+
};
|
|
1892
|
+
if (request.temperature !== void 0) {
|
|
1893
|
+
geminiBody["generationConfig"]["temperature"] = request.temperature;
|
|
1894
|
+
}
|
|
1895
|
+
if (systemMessage && typeof systemMessage.content === "string") {
|
|
1896
|
+
geminiBody["systemInstruction"] = {
|
|
1897
|
+
parts: [{ text: systemMessage.content }]
|
|
1898
|
+
};
|
|
1899
|
+
}
|
|
1900
|
+
const response = await fetch(
|
|
1901
|
+
`https://generativelanguage.googleapis.com/v1beta/models/${targetModel}:generateContent?key=${apiKey}`,
|
|
1902
|
+
{
|
|
1903
|
+
method: "POST",
|
|
1904
|
+
headers: {
|
|
1905
|
+
"Content-Type": "application/json"
|
|
1906
|
+
},
|
|
1907
|
+
body: JSON.stringify(geminiBody)
|
|
1908
|
+
}
|
|
1909
|
+
);
|
|
1910
|
+
return response;
|
|
1911
|
+
}
|
|
1912
|
+
async function forwardToGeminiStream(request, targetModel, apiKey) {
|
|
1913
|
+
const systemMessage = request.messages.find((m) => m.role === "system");
|
|
1914
|
+
const geminiContents = convertMessagesToGemini(request.messages);
|
|
1915
|
+
const geminiBody = {
|
|
1916
|
+
contents: geminiContents,
|
|
1917
|
+
generationConfig: {
|
|
1918
|
+
maxOutputTokens: request.max_tokens ?? 4096
|
|
1919
|
+
}
|
|
1920
|
+
};
|
|
1921
|
+
if (request.temperature !== void 0) {
|
|
1922
|
+
geminiBody["generationConfig"]["temperature"] = request.temperature;
|
|
1923
|
+
}
|
|
1924
|
+
if (systemMessage && typeof systemMessage.content === "string") {
|
|
1925
|
+
geminiBody["systemInstruction"] = {
|
|
1926
|
+
parts: [{ text: systemMessage.content }]
|
|
1927
|
+
};
|
|
1928
|
+
}
|
|
1929
|
+
const response = await fetch(
|
|
1930
|
+
`https://generativelanguage.googleapis.com/v1beta/models/${targetModel}:streamGenerateContent?alt=sse&key=${apiKey}`,
|
|
1931
|
+
{
|
|
1932
|
+
method: "POST",
|
|
1933
|
+
headers: {
|
|
1934
|
+
"Content-Type": "application/json"
|
|
1935
|
+
},
|
|
1936
|
+
body: JSON.stringify(geminiBody)
|
|
1937
|
+
}
|
|
1938
|
+
);
|
|
1939
|
+
return response;
|
|
1940
|
+
}
|
|
1941
|
+
function convertGeminiResponse(geminiData, model) {
|
|
1942
|
+
const candidate = geminiData.candidates?.[0];
|
|
1943
|
+
const text = candidate?.content?.parts?.map((p) => p.text ?? "").join("") ?? "";
|
|
1944
|
+
let finishReason = "stop";
|
|
1945
|
+
if (candidate?.finishReason === "MAX_TOKENS") {
|
|
1946
|
+
finishReason = "length";
|
|
1947
|
+
} else if (candidate?.finishReason === "SAFETY") {
|
|
1948
|
+
finishReason = "content_filter";
|
|
1949
|
+
}
|
|
1950
|
+
return {
|
|
1951
|
+
id: `chatcmpl-${Date.now()}`,
|
|
1952
|
+
object: "chat.completion",
|
|
1953
|
+
created: Math.floor(Date.now() / 1e3),
|
|
1954
|
+
model,
|
|
1955
|
+
choices: [
|
|
1956
|
+
{
|
|
1957
|
+
index: 0,
|
|
1958
|
+
message: {
|
|
1959
|
+
role: "assistant",
|
|
1960
|
+
content: text
|
|
1961
|
+
},
|
|
1962
|
+
finish_reason: finishReason
|
|
1963
|
+
}
|
|
1964
|
+
],
|
|
1965
|
+
usage: {
|
|
1966
|
+
prompt_tokens: geminiData.usageMetadata?.promptTokenCount ?? 0,
|
|
1967
|
+
completion_tokens: geminiData.usageMetadata?.candidatesTokenCount ?? 0,
|
|
1968
|
+
total_tokens: (geminiData.usageMetadata?.promptTokenCount ?? 0) + (geminiData.usageMetadata?.candidatesTokenCount ?? 0)
|
|
1969
|
+
}
|
|
1970
|
+
};
|
|
1971
|
+
}
|
|
1972
|
+
function convertGeminiStreamEvent(eventData, messageId, model, isFirst) {
|
|
1973
|
+
const candidate = eventData.candidates?.[0];
|
|
1974
|
+
const text = candidate?.content?.parts?.map((p) => p.text ?? "").join("") ?? "";
|
|
1975
|
+
const choice = {
|
|
1976
|
+
index: 0,
|
|
1977
|
+
delta: {},
|
|
1978
|
+
finish_reason: null
|
|
1979
|
+
};
|
|
1980
|
+
if (isFirst) {
|
|
1981
|
+
choice["delta"] = { role: "assistant", content: text };
|
|
1982
|
+
} else if (text) {
|
|
1983
|
+
choice["delta"] = { content: text };
|
|
1984
|
+
}
|
|
1985
|
+
if (candidate?.finishReason) {
|
|
1986
|
+
let finishReason = "stop";
|
|
1987
|
+
if (candidate.finishReason === "MAX_TOKENS") {
|
|
1988
|
+
finishReason = "length";
|
|
1989
|
+
} else if (candidate.finishReason === "SAFETY") {
|
|
1990
|
+
finishReason = "content_filter";
|
|
1991
|
+
}
|
|
1992
|
+
choice["finish_reason"] = finishReason;
|
|
1993
|
+
}
|
|
1994
|
+
const chunk = {
|
|
1995
|
+
id: messageId,
|
|
1996
|
+
object: "chat.completion.chunk",
|
|
1997
|
+
created: Math.floor(Date.now() / 1e3),
|
|
1998
|
+
model,
|
|
1999
|
+
choices: [choice]
|
|
2000
|
+
};
|
|
2001
|
+
return `data: ${JSON.stringify(chunk)}
|
|
2002
|
+
|
|
2003
|
+
`;
|
|
2004
|
+
}
|
|
2005
|
+
async function* convertGeminiStream(response, model) {
|
|
2006
|
+
const reader = response.body?.getReader();
|
|
2007
|
+
if (!reader) {
|
|
2008
|
+
throw new Error("No response body");
|
|
2009
|
+
}
|
|
2010
|
+
const decoder = new TextDecoder();
|
|
2011
|
+
let buffer = "";
|
|
2012
|
+
const messageId = `chatcmpl-${Date.now()}`;
|
|
2013
|
+
let isFirst = true;
|
|
2014
|
+
try {
|
|
2015
|
+
while (true) {
|
|
2016
|
+
const { done, value } = await reader.read();
|
|
2017
|
+
if (done) break;
|
|
2018
|
+
buffer += decoder.decode(value, { stream: true });
|
|
2019
|
+
const lines = buffer.split("\n");
|
|
2020
|
+
buffer = lines.pop() || "";
|
|
2021
|
+
for (const line of lines) {
|
|
2022
|
+
if (line.startsWith("data: ")) {
|
|
2023
|
+
const jsonStr = line.slice(6);
|
|
2024
|
+
if (jsonStr.trim() === "[DONE]") {
|
|
2025
|
+
yield "data: [DONE]\n\n";
|
|
2026
|
+
continue;
|
|
2027
|
+
}
|
|
2028
|
+
try {
|
|
2029
|
+
const parsed = JSON.parse(jsonStr);
|
|
2030
|
+
const converted = convertGeminiStreamEvent(parsed, messageId, model, isFirst);
|
|
2031
|
+
if (converted) {
|
|
2032
|
+
yield converted;
|
|
2033
|
+
isFirst = false;
|
|
2034
|
+
}
|
|
2035
|
+
} catch {
|
|
2036
|
+
}
|
|
2037
|
+
}
|
|
2038
|
+
}
|
|
2039
|
+
}
|
|
2040
|
+
yield "data: [DONE]\n\n";
|
|
2041
|
+
} finally {
|
|
2042
|
+
reader.releaseLock();
|
|
2043
|
+
}
|
|
2044
|
+
}
|
|
2045
|
+
function convertAnthropicResponse(anthropicData) {
|
|
2046
|
+
const textBlocks = anthropicData.content?.filter((c) => c.type === "text") ?? [];
|
|
2047
|
+
const toolBlocks = anthropicData.content?.filter((c) => c.type === "tool_use") ?? [];
|
|
2048
|
+
const textContent = textBlocks.map((c) => c.text ?? "").join("");
|
|
2049
|
+
const message = {
|
|
2050
|
+
role: "assistant",
|
|
2051
|
+
content: textContent || null
|
|
2052
|
+
};
|
|
2053
|
+
if (toolBlocks.length > 0) {
|
|
2054
|
+
message["tool_calls"] = toolBlocks.map((block) => ({
|
|
2055
|
+
id: block.id || `call_${Date.now()}`,
|
|
2056
|
+
type: "function",
|
|
2057
|
+
function: {
|
|
2058
|
+
name: block.name,
|
|
2059
|
+
arguments: typeof block.input === "string" ? block.input : JSON.stringify(block.input ?? {})
|
|
2060
|
+
}
|
|
2061
|
+
}));
|
|
2062
|
+
}
|
|
2063
|
+
let finishReason = "stop";
|
|
2064
|
+
if (anthropicData.stop_reason === "tool_use") {
|
|
2065
|
+
finishReason = "tool_calls";
|
|
2066
|
+
} else if (anthropicData.stop_reason === "end_turn") {
|
|
2067
|
+
finishReason = "stop";
|
|
2068
|
+
} else if (anthropicData.stop_reason) {
|
|
2069
|
+
finishReason = anthropicData.stop_reason;
|
|
2070
|
+
}
|
|
2071
|
+
return {
|
|
2072
|
+
id: anthropicData.id || `chatcmpl-${Date.now()}`,
|
|
2073
|
+
object: "chat.completion",
|
|
2074
|
+
created: Math.floor(Date.now() / 1e3),
|
|
2075
|
+
model: anthropicData.model,
|
|
2076
|
+
choices: [
|
|
2077
|
+
{
|
|
2078
|
+
index: 0,
|
|
2079
|
+
message,
|
|
2080
|
+
finish_reason: finishReason
|
|
2081
|
+
}
|
|
2082
|
+
],
|
|
2083
|
+
usage: {
|
|
2084
|
+
prompt_tokens: anthropicData.usage?.input_tokens ?? 0,
|
|
2085
|
+
completion_tokens: anthropicData.usage?.output_tokens ?? 0,
|
|
2086
|
+
total_tokens: (anthropicData.usage?.input_tokens ?? 0) + (anthropicData.usage?.output_tokens ?? 0)
|
|
2087
|
+
}
|
|
2088
|
+
};
|
|
2089
|
+
}
|
|
2090
|
+
function convertAnthropicStreamEvent(eventType, eventData, messageId, model, toolState) {
|
|
2091
|
+
const choice = { index: 0, delta: {}, finish_reason: null };
|
|
2092
|
+
const baseChunk = {
|
|
2093
|
+
id: messageId,
|
|
2094
|
+
object: "chat.completion.chunk",
|
|
2095
|
+
created: Math.floor(Date.now() / 1e3),
|
|
2096
|
+
model,
|
|
2097
|
+
choices: [choice]
|
|
2098
|
+
};
|
|
2099
|
+
switch (eventType) {
|
|
2100
|
+
case "message_start": {
|
|
2101
|
+
const msg = eventData["message"];
|
|
2102
|
+
baseChunk.id = msg?.["id"] || messageId;
|
|
2103
|
+
choice.delta = { role: "assistant", content: "" };
|
|
2104
|
+
return `data: ${JSON.stringify(baseChunk)}
|
|
2105
|
+
|
|
2106
|
+
`;
|
|
2107
|
+
}
|
|
2108
|
+
case "content_block_start": {
|
|
2109
|
+
const contentBlock = eventData["content_block"];
|
|
2110
|
+
const blockIndex = eventData["index"];
|
|
2111
|
+
if (contentBlock?.["type"] === "tool_use") {
|
|
2112
|
+
const toolId = contentBlock["id"];
|
|
2113
|
+
const toolName = contentBlock["name"];
|
|
2114
|
+
toolState.tools.set(blockIndex ?? toolState.currentToolIndex, {
|
|
2115
|
+
id: toolId,
|
|
2116
|
+
name: toolName,
|
|
2117
|
+
arguments: ""
|
|
2118
|
+
});
|
|
2119
|
+
toolState.currentToolIndex = blockIndex ?? toolState.currentToolIndex;
|
|
2120
|
+
choice.delta = {
|
|
2121
|
+
tool_calls: [{
|
|
2122
|
+
index: blockIndex ?? 0,
|
|
2123
|
+
id: toolId,
|
|
2124
|
+
type: "function",
|
|
2125
|
+
function: { name: toolName, arguments: "" }
|
|
2126
|
+
}]
|
|
2127
|
+
};
|
|
2128
|
+
return `data: ${JSON.stringify(baseChunk)}
|
|
2129
|
+
|
|
2130
|
+
`;
|
|
2131
|
+
}
|
|
2132
|
+
return null;
|
|
2133
|
+
}
|
|
2134
|
+
case "content_block_delta": {
|
|
2135
|
+
const delta = eventData["delta"];
|
|
2136
|
+
const blockIndex = eventData["index"];
|
|
2137
|
+
if (delta?.["type"] === "text_delta") {
|
|
2138
|
+
choice.delta = { content: delta["text"] };
|
|
2139
|
+
return `data: ${JSON.stringify(baseChunk)}
|
|
2140
|
+
|
|
2141
|
+
`;
|
|
2142
|
+
}
|
|
2143
|
+
if (delta?.["type"] === "input_json_delta") {
|
|
2144
|
+
const partialJson = delta["partial_json"] || "";
|
|
2145
|
+
const tool = toolState.tools.get(blockIndex ?? toolState.currentToolIndex);
|
|
2146
|
+
if (tool) {
|
|
2147
|
+
tool.arguments += partialJson;
|
|
2148
|
+
}
|
|
2149
|
+
choice.delta = {
|
|
2150
|
+
tool_calls: [{
|
|
2151
|
+
index: blockIndex ?? 0,
|
|
2152
|
+
function: { arguments: partialJson }
|
|
2153
|
+
}]
|
|
2154
|
+
};
|
|
2155
|
+
return `data: ${JSON.stringify(baseChunk)}
|
|
2156
|
+
|
|
2157
|
+
`;
|
|
2158
|
+
}
|
|
2159
|
+
return null;
|
|
2160
|
+
}
|
|
2161
|
+
case "message_delta": {
|
|
2162
|
+
const delta = eventData["delta"];
|
|
2163
|
+
const stopReason = delta?.["stop_reason"];
|
|
2164
|
+
if (stopReason === "tool_use") {
|
|
2165
|
+
choice.finish_reason = "tool_calls";
|
|
2166
|
+
} else if (stopReason === "end_turn") {
|
|
2167
|
+
choice.finish_reason = "stop";
|
|
2168
|
+
} else {
|
|
2169
|
+
choice.finish_reason = stopReason || "stop";
|
|
2170
|
+
}
|
|
2171
|
+
choice.delta = {};
|
|
2172
|
+
return `data: ${JSON.stringify(baseChunk)}
|
|
2173
|
+
|
|
2174
|
+
`;
|
|
2175
|
+
}
|
|
2176
|
+
case "message_stop": {
|
|
2177
|
+
return "data: [DONE]\n\n";
|
|
2178
|
+
}
|
|
2179
|
+
default:
|
|
2180
|
+
return null;
|
|
2181
|
+
}
|
|
2182
|
+
}
|
|
2183
|
+
async function* convertAnthropicStream(response, model) {
|
|
2184
|
+
const reader = response.body?.getReader();
|
|
2185
|
+
if (!reader) {
|
|
2186
|
+
throw new Error("No response body");
|
|
2187
|
+
}
|
|
2188
|
+
const decoder = new TextDecoder();
|
|
2189
|
+
let buffer = "";
|
|
2190
|
+
let messageId = `chatcmpl-${Date.now()}`;
|
|
2191
|
+
const toolState = {
|
|
2192
|
+
currentToolIndex: 0,
|
|
2193
|
+
tools: /* @__PURE__ */ new Map()
|
|
2194
|
+
};
|
|
2195
|
+
try {
|
|
2196
|
+
while (true) {
|
|
2197
|
+
const { done, value } = await reader.read();
|
|
2198
|
+
if (done) break;
|
|
2199
|
+
buffer += decoder.decode(value, { stream: true });
|
|
2200
|
+
const lines = buffer.split("\n");
|
|
2201
|
+
buffer = lines.pop() || "";
|
|
2202
|
+
let eventType = "";
|
|
2203
|
+
let eventData = "";
|
|
2204
|
+
for (const line of lines) {
|
|
2205
|
+
if (line.startsWith("event: ")) {
|
|
2206
|
+
eventType = line.slice(7).trim();
|
|
2207
|
+
} else if (line.startsWith("data: ")) {
|
|
2208
|
+
eventData = line.slice(6);
|
|
2209
|
+
} else if (line === "" && eventType && eventData) {
|
|
2210
|
+
try {
|
|
2211
|
+
const parsed = JSON.parse(eventData);
|
|
2212
|
+
const converted = convertAnthropicStreamEvent(eventType, parsed, messageId, model, toolState);
|
|
2213
|
+
if (converted) {
|
|
2214
|
+
yield converted;
|
|
2215
|
+
}
|
|
2216
|
+
} catch {
|
|
2217
|
+
}
|
|
2218
|
+
eventType = "";
|
|
2219
|
+
eventData = "";
|
|
2220
|
+
}
|
|
2221
|
+
}
|
|
2222
|
+
}
|
|
2223
|
+
} finally {
|
|
2224
|
+
reader.releaseLock();
|
|
2225
|
+
}
|
|
2226
|
+
}
|
|
2227
|
+
async function* pipeOpenAIStream(response) {
|
|
2228
|
+
const reader = response.body?.getReader();
|
|
2229
|
+
if (!reader) {
|
|
2230
|
+
throw new Error("No response body");
|
|
2231
|
+
}
|
|
2232
|
+
const decoder = new TextDecoder();
|
|
2233
|
+
try {
|
|
2234
|
+
while (true) {
|
|
2235
|
+
const { done, value } = await reader.read();
|
|
2236
|
+
if (done) break;
|
|
2237
|
+
yield decoder.decode(value, { stream: true });
|
|
2238
|
+
}
|
|
2239
|
+
} finally {
|
|
2240
|
+
reader.releaseLock();
|
|
2241
|
+
}
|
|
2242
|
+
}
|
|
2243
|
+
function parsePreferredModel(preferredModel) {
|
|
2244
|
+
const [provider, model] = preferredModel.split(":");
|
|
2245
|
+
if (!provider || !model) return null;
|
|
2246
|
+
const validProviders = ["openai", "anthropic", "google", "xai", "moonshot", "local"];
|
|
2247
|
+
if (!validProviders.includes(provider)) return null;
|
|
2248
|
+
return { provider, model };
|
|
2249
|
+
}
|
|
2250
|
+
function resolveExplicitModel(modelName) {
|
|
2251
|
+
if (MODEL_MAPPING[modelName]) {
|
|
2252
|
+
return MODEL_MAPPING[modelName];
|
|
2253
|
+
}
|
|
2254
|
+
if (modelName.startsWith("claude-")) {
|
|
2255
|
+
return { provider: "anthropic", model: modelName };
|
|
2256
|
+
}
|
|
2257
|
+
if (modelName.startsWith("gpt-") || modelName.startsWith("o1-") || modelName.startsWith("o3-") || modelName.startsWith("chatgpt-") || modelName.startsWith("text-") || modelName.startsWith("dall-e") || modelName.startsWith("whisper") || modelName.startsWith("tts-")) {
|
|
2258
|
+
return { provider: "openai", model: modelName };
|
|
2259
|
+
}
|
|
2260
|
+
if (modelName.startsWith("gemini-") || modelName.startsWith("palm-")) {
|
|
2261
|
+
return { provider: "google", model: modelName };
|
|
2262
|
+
}
|
|
2263
|
+
if (modelName.startsWith("grok-")) {
|
|
2264
|
+
return { provider: "xai", model: modelName };
|
|
2265
|
+
}
|
|
2266
|
+
if (modelName.startsWith("moonshot-")) {
|
|
2267
|
+
return { provider: "moonshot", model: modelName };
|
|
2268
|
+
}
|
|
2269
|
+
if (modelName.includes("/")) {
|
|
2270
|
+
const [provider, model] = modelName.split("/");
|
|
2271
|
+
const validProviders = ["openai", "anthropic", "google", "xai", "moonshot", "local"];
|
|
2272
|
+
if (provider && model && validProviders.includes(provider)) {
|
|
2273
|
+
return { provider, model };
|
|
2274
|
+
}
|
|
2275
|
+
}
|
|
2276
|
+
return null;
|
|
2277
|
+
}
|
|
2278
|
+
async function startProxy(config = {}) {
|
|
2279
|
+
const port = config.port ?? 3001;
|
|
2280
|
+
const host = config.host ?? "127.0.0.1";
|
|
2281
|
+
const verbose = config.verbose ?? false;
|
|
2282
|
+
const relay = new RelayPlane({ dbPath: config.dbPath });
|
|
2283
|
+
const log = (msg) => {
|
|
2284
|
+
if (verbose) console.log(`[relayplane] ${msg}`);
|
|
2285
|
+
};
|
|
2286
|
+
const server = http.createServer(async (req, res) => {
|
|
2287
|
+
res.setHeader("Access-Control-Allow-Origin", "*");
|
|
2288
|
+
res.setHeader("Access-Control-Allow-Methods", "POST, OPTIONS");
|
|
2289
|
+
res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
|
|
2290
|
+
if (req.method === "OPTIONS") {
|
|
2291
|
+
res.writeHead(204);
|
|
2292
|
+
res.end();
|
|
2293
|
+
return;
|
|
2294
|
+
}
|
|
2295
|
+
if (req.method !== "POST" || !req.url?.includes("/chat/completions")) {
|
|
2296
|
+
if (req.method === "GET" && req.url?.includes("/models")) {
|
|
2297
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2298
|
+
res.end(
|
|
2299
|
+
JSON.stringify({
|
|
2300
|
+
object: "list",
|
|
2301
|
+
data: [
|
|
2302
|
+
{ id: "relayplane:auto", object: "model", owned_by: "relayplane" },
|
|
2303
|
+
{ id: "relayplane:cost", object: "model", owned_by: "relayplane" },
|
|
2304
|
+
{ id: "relayplane:quality", object: "model", owned_by: "relayplane" }
|
|
2305
|
+
]
|
|
2306
|
+
})
|
|
2307
|
+
);
|
|
2308
|
+
return;
|
|
2309
|
+
}
|
|
2310
|
+
res.writeHead(404, { "Content-Type": "application/json" });
|
|
2311
|
+
res.end(JSON.stringify({ error: "Not found" }));
|
|
2312
|
+
return;
|
|
2313
|
+
}
|
|
2314
|
+
let body = "";
|
|
2315
|
+
for await (const chunk of req) {
|
|
2316
|
+
body += chunk;
|
|
2317
|
+
}
|
|
2318
|
+
let request;
|
|
2319
|
+
try {
|
|
2320
|
+
request = JSON.parse(body);
|
|
2321
|
+
} catch {
|
|
2322
|
+
res.writeHead(400, { "Content-Type": "application/json" });
|
|
2323
|
+
res.end(JSON.stringify({ error: "Invalid JSON" }));
|
|
2324
|
+
return;
|
|
2325
|
+
}
|
|
2326
|
+
const isStreaming = request.stream === true;
|
|
2327
|
+
const requestedModel = request.model;
|
|
2328
|
+
let routingMode = "auto";
|
|
2329
|
+
let targetModel = "";
|
|
2330
|
+
let targetProvider = "anthropic";
|
|
2331
|
+
if (requestedModel.startsWith("relayplane:")) {
|
|
2332
|
+
if (requestedModel.includes(":cost")) {
|
|
2333
|
+
routingMode = "cost";
|
|
2334
|
+
} else if (requestedModel.includes(":quality")) {
|
|
2335
|
+
routingMode = "quality";
|
|
2336
|
+
}
|
|
2337
|
+
} else {
|
|
2338
|
+
routingMode = "passthrough";
|
|
2339
|
+
const resolved = resolveExplicitModel(requestedModel);
|
|
2340
|
+
if (resolved) {
|
|
2341
|
+
targetProvider = resolved.provider;
|
|
2342
|
+
targetModel = resolved.model;
|
|
2343
|
+
log(`Pass-through mode: ${requestedModel} \u2192 ${targetProvider}/${targetModel}`);
|
|
2344
|
+
} else {
|
|
2345
|
+
res.writeHead(400, { "Content-Type": "application/json" });
|
|
2346
|
+
res.end(JSON.stringify({ error: `Unknown model: ${requestedModel}` }));
|
|
2347
|
+
return;
|
|
2348
|
+
}
|
|
2349
|
+
}
|
|
2350
|
+
log(`Received request for model: ${requestedModel} (mode: ${routingMode}, stream: ${isStreaming})`);
|
|
2351
|
+
const promptText = extractPromptText(request.messages);
|
|
2352
|
+
const taskType = inferTaskType(promptText);
|
|
2353
|
+
const confidence = getInferenceConfidence(promptText, taskType);
|
|
2354
|
+
log(`Inferred task: ${taskType} (confidence: ${confidence.toFixed(2)})`);
|
|
2355
|
+
if (routingMode !== "passthrough") {
|
|
2356
|
+
const rule = relay.routing.get(taskType);
|
|
2357
|
+
if (rule && rule.preferredModel) {
|
|
2358
|
+
const parsed = parsePreferredModel(rule.preferredModel);
|
|
2359
|
+
if (parsed) {
|
|
2360
|
+
targetProvider = parsed.provider;
|
|
2361
|
+
targetModel = parsed.model;
|
|
2362
|
+
log(`Using learned rule: ${rule.preferredModel}`);
|
|
2363
|
+
} else {
|
|
2364
|
+
const defaultRoute = DEFAULT_ROUTING[taskType];
|
|
2365
|
+
targetProvider = defaultRoute.provider;
|
|
2366
|
+
targetModel = defaultRoute.model;
|
|
2367
|
+
}
|
|
2368
|
+
} else {
|
|
2369
|
+
const defaultRoute = DEFAULT_ROUTING[taskType];
|
|
2370
|
+
targetProvider = defaultRoute.provider;
|
|
2371
|
+
targetModel = defaultRoute.model;
|
|
2372
|
+
}
|
|
2373
|
+
if (routingMode === "cost") {
|
|
2374
|
+
const simpleTasks = ["summarization", "data_extraction", "translation", "question_answering"];
|
|
2375
|
+
if (simpleTasks.includes(taskType)) {
|
|
2376
|
+
targetModel = "claude-3-5-haiku-latest";
|
|
2377
|
+
targetProvider = "anthropic";
|
|
2378
|
+
}
|
|
2379
|
+
} else if (routingMode === "quality") {
|
|
2380
|
+
const qualityModel = process.env["RELAYPLANE_QUALITY_MODEL"] || "claude-3-5-sonnet-latest";
|
|
2381
|
+
targetModel = qualityModel;
|
|
2382
|
+
targetProvider = "anthropic";
|
|
2383
|
+
}
|
|
2384
|
+
}
|
|
2385
|
+
log(`Routing to: ${targetProvider}/${targetModel}`);
|
|
2386
|
+
const apiKeyEnv = DEFAULT_ENDPOINTS[targetProvider]?.apiKeyEnv ?? `${targetProvider.toUpperCase()}_API_KEY`;
|
|
2387
|
+
const apiKey = process.env[apiKeyEnv];
|
|
2388
|
+
if (!apiKey) {
|
|
2389
|
+
res.writeHead(500, { "Content-Type": "application/json" });
|
|
2390
|
+
res.end(JSON.stringify({ error: `Missing ${apiKeyEnv} environment variable` }));
|
|
2391
|
+
return;
|
|
2392
|
+
}
|
|
2393
|
+
const startTime = Date.now();
|
|
2394
|
+
const betaHeaders = req.headers["anthropic-beta"];
|
|
2395
|
+
if (isStreaming) {
|
|
2396
|
+
await handleStreamingRequest(
|
|
2397
|
+
res,
|
|
2398
|
+
request,
|
|
2399
|
+
targetProvider,
|
|
2400
|
+
targetModel,
|
|
2401
|
+
apiKey,
|
|
2402
|
+
relay,
|
|
2403
|
+
promptText,
|
|
2404
|
+
taskType,
|
|
2405
|
+
confidence,
|
|
2406
|
+
routingMode,
|
|
2407
|
+
startTime,
|
|
2408
|
+
log,
|
|
2409
|
+
betaHeaders
|
|
2410
|
+
);
|
|
2411
|
+
} else {
|
|
2412
|
+
await handleNonStreamingRequest(
|
|
2413
|
+
res,
|
|
2414
|
+
request,
|
|
2415
|
+
targetProvider,
|
|
2416
|
+
targetModel,
|
|
2417
|
+
apiKey,
|
|
2418
|
+
relay,
|
|
2419
|
+
promptText,
|
|
2420
|
+
taskType,
|
|
2421
|
+
confidence,
|
|
2422
|
+
routingMode,
|
|
2423
|
+
startTime,
|
|
2424
|
+
log,
|
|
2425
|
+
betaHeaders
|
|
2426
|
+
);
|
|
2427
|
+
}
|
|
2428
|
+
});
|
|
2429
|
+
return new Promise((resolve, reject) => {
|
|
2430
|
+
server.on("error", reject);
|
|
2431
|
+
server.listen(port, host, () => {
|
|
2432
|
+
console.log(`RelayPlane proxy listening on http://${host}:${port}`);
|
|
2433
|
+
console.log(` Models: relayplane:auto, relayplane:cost, relayplane:quality`);
|
|
2434
|
+
console.log(` Endpoint: POST /v1/chat/completions`);
|
|
2435
|
+
console.log(` Streaming: \u2705 Enabled`);
|
|
2436
|
+
resolve(server);
|
|
2437
|
+
});
|
|
2438
|
+
});
|
|
2439
|
+
}
|
|
2440
|
+
async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
|
|
2441
|
+
let providerResponse;
|
|
2442
|
+
try {
|
|
2443
|
+
switch (targetProvider) {
|
|
2444
|
+
case "anthropic":
|
|
2445
|
+
providerResponse = await forwardToAnthropicStream(request, targetModel, apiKey, betaHeaders);
|
|
2446
|
+
break;
|
|
2447
|
+
case "google":
|
|
2448
|
+
providerResponse = await forwardToGeminiStream(request, targetModel, apiKey);
|
|
2449
|
+
break;
|
|
2450
|
+
case "xai":
|
|
2451
|
+
providerResponse = await forwardToXAIStream(request, targetModel, apiKey);
|
|
2452
|
+
break;
|
|
2453
|
+
case "moonshot":
|
|
2454
|
+
providerResponse = await forwardToMoonshotStream(request, targetModel, apiKey);
|
|
2455
|
+
break;
|
|
2456
|
+
default:
|
|
2457
|
+
providerResponse = await forwardToOpenAIStream(request, targetModel, apiKey);
|
|
2458
|
+
}
|
|
2459
|
+
if (!providerResponse.ok) {
|
|
2460
|
+
const errorData = await providerResponse.json();
|
|
2461
|
+
res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
|
|
2462
|
+
res.end(JSON.stringify(errorData));
|
|
2463
|
+
return;
|
|
2464
|
+
}
|
|
2465
|
+
} catch (err) {
|
|
2466
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
2467
|
+
res.writeHead(500, { "Content-Type": "application/json" });
|
|
2468
|
+
res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
|
|
2469
|
+
return;
|
|
2470
|
+
}
|
|
2471
|
+
res.writeHead(200, {
|
|
2472
|
+
"Content-Type": "text/event-stream",
|
|
2473
|
+
"Cache-Control": "no-cache",
|
|
2474
|
+
"Connection": "keep-alive"
|
|
2475
|
+
});
|
|
2476
|
+
try {
|
|
2477
|
+
switch (targetProvider) {
|
|
2478
|
+
case "anthropic":
|
|
2479
|
+
for await (const chunk of convertAnthropicStream(providerResponse, targetModel)) {
|
|
2480
|
+
res.write(chunk);
|
|
2481
|
+
}
|
|
2482
|
+
break;
|
|
2483
|
+
case "google":
|
|
2484
|
+
for await (const chunk of convertGeminiStream(providerResponse, targetModel)) {
|
|
2485
|
+
res.write(chunk);
|
|
2486
|
+
}
|
|
2487
|
+
break;
|
|
2488
|
+
default:
|
|
2489
|
+
for await (const chunk of pipeOpenAIStream(providerResponse)) {
|
|
2490
|
+
res.write(chunk);
|
|
2491
|
+
}
|
|
2492
|
+
}
|
|
2493
|
+
} catch (err) {
|
|
2494
|
+
log(`Streaming error: ${err}`);
|
|
2495
|
+
}
|
|
2496
|
+
const durationMs = Date.now() - startTime;
|
|
2497
|
+
relay.run({
|
|
2498
|
+
prompt: promptText.slice(0, 500),
|
|
2499
|
+
taskType,
|
|
2500
|
+
model: `${targetProvider}:${targetModel}`
|
|
2501
|
+
}).then((runResult) => {
|
|
2502
|
+
log(`Completed streaming in ${durationMs}ms, runId: ${runResult.runId}`);
|
|
2503
|
+
}).catch((err) => {
|
|
2504
|
+
log(`Failed to record run: ${err}`);
|
|
2505
|
+
});
|
|
2506
|
+
res.end();
|
|
2507
|
+
}
|
|
2508
|
+
async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
|
|
2509
|
+
let providerResponse;
|
|
2510
|
+
let responseData;
|
|
2511
|
+
try {
|
|
2512
|
+
switch (targetProvider) {
|
|
2513
|
+
case "anthropic": {
|
|
2514
|
+
providerResponse = await forwardToAnthropic(request, targetModel, apiKey, betaHeaders);
|
|
2515
|
+
const rawData = await providerResponse.json();
|
|
2516
|
+
if (!providerResponse.ok) {
|
|
2517
|
+
res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
|
|
2518
|
+
res.end(JSON.stringify(rawData));
|
|
2519
|
+
return;
|
|
2520
|
+
}
|
|
2521
|
+
responseData = convertAnthropicResponse(rawData);
|
|
2522
|
+
break;
|
|
2523
|
+
}
|
|
2524
|
+
case "google": {
|
|
2525
|
+
providerResponse = await forwardToGemini(request, targetModel, apiKey);
|
|
2526
|
+
const rawData = await providerResponse.json();
|
|
2527
|
+
if (!providerResponse.ok) {
|
|
2528
|
+
res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
|
|
2529
|
+
res.end(JSON.stringify(rawData));
|
|
2530
|
+
return;
|
|
2531
|
+
}
|
|
2532
|
+
responseData = convertGeminiResponse(rawData, targetModel);
|
|
2533
|
+
break;
|
|
2534
|
+
}
|
|
2535
|
+
case "xai": {
|
|
2536
|
+
providerResponse = await forwardToXAI(request, targetModel, apiKey);
|
|
2537
|
+
responseData = await providerResponse.json();
|
|
2538
|
+
if (!providerResponse.ok) {
|
|
2539
|
+
res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
|
|
2540
|
+
res.end(JSON.stringify(responseData));
|
|
2541
|
+
return;
|
|
2542
|
+
}
|
|
2543
|
+
break;
|
|
2544
|
+
}
|
|
2545
|
+
case "moonshot": {
|
|
2546
|
+
providerResponse = await forwardToMoonshot(request, targetModel, apiKey);
|
|
2547
|
+
responseData = await providerResponse.json();
|
|
2548
|
+
if (!providerResponse.ok) {
|
|
2549
|
+
res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
|
|
2550
|
+
res.end(JSON.stringify(responseData));
|
|
2551
|
+
return;
|
|
2552
|
+
}
|
|
2553
|
+
break;
|
|
2554
|
+
}
|
|
2555
|
+
default: {
|
|
2556
|
+
providerResponse = await forwardToOpenAI(request, targetModel, apiKey);
|
|
2557
|
+
responseData = await providerResponse.json();
|
|
2558
|
+
if (!providerResponse.ok) {
|
|
2559
|
+
res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
|
|
2560
|
+
res.end(JSON.stringify(responseData));
|
|
2561
|
+
return;
|
|
2562
|
+
}
|
|
2563
|
+
}
|
|
2564
|
+
}
|
|
2565
|
+
} catch (err) {
|
|
2566
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
2567
|
+
res.writeHead(500, { "Content-Type": "application/json" });
|
|
2568
|
+
res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
|
|
2569
|
+
return;
|
|
2570
|
+
}
|
|
2571
|
+
const durationMs = Date.now() - startTime;
|
|
2572
|
+
try {
|
|
2573
|
+
const runResult = await relay.run({
|
|
2574
|
+
prompt: promptText.slice(0, 500),
|
|
2575
|
+
taskType,
|
|
2576
|
+
model: `${targetProvider}:${targetModel}`
|
|
2577
|
+
});
|
|
2578
|
+
responseData["_relayplane"] = {
|
|
2579
|
+
runId: runResult.runId,
|
|
2580
|
+
routedTo: `${targetProvider}/${targetModel}`,
|
|
2581
|
+
taskType,
|
|
2582
|
+
confidence,
|
|
2583
|
+
durationMs,
|
|
2584
|
+
mode: routingMode
|
|
2585
|
+
};
|
|
2586
|
+
log(`Completed in ${durationMs}ms, runId: ${runResult.runId}`);
|
|
2587
|
+
} catch (err) {
|
|
2588
|
+
log(`Failed to record run: ${err}`);
|
|
2589
|
+
}
|
|
2590
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2591
|
+
res.end(JSON.stringify(responseData));
|
|
2592
|
+
}
|
|
2593
|
+
|
|
2594
|
+
// src/cli.ts
|
|
2595
|
+
function printHelp() {
|
|
2596
|
+
console.log(`
|
|
2597
|
+
RelayPlane Proxy - Intelligent AI Model Routing
|
|
2598
|
+
|
|
2599
|
+
Usage:
|
|
2600
|
+
npx @relayplane/proxy [options]
|
|
2601
|
+
relayplane-proxy [options]
|
|
2602
|
+
|
|
2603
|
+
Options:
|
|
2604
|
+
--port <number> Port to listen on (default: 3001)
|
|
2605
|
+
--host <string> Host to bind to (default: 127.0.0.1)
|
|
2606
|
+
-v, --verbose Enable verbose logging
|
|
2607
|
+
-h, --help Show this help message
|
|
2608
|
+
|
|
2609
|
+
Environment Variables:
|
|
2610
|
+
ANTHROPIC_API_KEY Anthropic API key
|
|
2611
|
+
OPENAI_API_KEY OpenAI API key
|
|
2612
|
+
GEMINI_API_KEY Google Gemini API key (optional)
|
|
2613
|
+
XAI_API_KEY xAI/Grok API key (optional)
|
|
2614
|
+
MOONSHOT_API_KEY Moonshot API key (optional)
|
|
2615
|
+
|
|
2616
|
+
Example:
|
|
2617
|
+
# Start proxy on default port
|
|
2618
|
+
npx @relayplane/proxy
|
|
2619
|
+
|
|
2620
|
+
# Start on custom port with verbose logging
|
|
2621
|
+
npx @relayplane/proxy --port 8080 -v
|
|
2622
|
+
|
|
2623
|
+
# Then point your SDKs to the proxy
|
|
2624
|
+
export ANTHROPIC_BASE_URL=http://localhost:3001
|
|
2625
|
+
export OPENAI_BASE_URL=http://localhost:3001
|
|
2626
|
+
|
|
2627
|
+
Learn more: https://relayplane.com/integrations/openclaw
|
|
2628
|
+
`);
|
|
2629
|
+
}
|
|
2630
|
+
async function main() {
|
|
2631
|
+
const args = process.argv.slice(2);
|
|
2632
|
+
if (args.includes("-h") || args.includes("--help")) {
|
|
2633
|
+
printHelp();
|
|
2634
|
+
process.exit(0);
|
|
2635
|
+
}
|
|
2636
|
+
let port = 3001;
|
|
2637
|
+
let host = "127.0.0.1";
|
|
2638
|
+
let verbose = false;
|
|
2639
|
+
for (let i = 0; i < args.length; i++) {
|
|
2640
|
+
const arg = args[i];
|
|
2641
|
+
if (arg === "--port" && args[i + 1]) {
|
|
2642
|
+
port = parseInt(args[i + 1], 10);
|
|
2643
|
+
if (isNaN(port) || port < 1 || port > 65535) {
|
|
2644
|
+
console.error("Error: Invalid port number");
|
|
2645
|
+
process.exit(1);
|
|
2646
|
+
}
|
|
2647
|
+
i++;
|
|
2648
|
+
} else if (arg === "--host" && args[i + 1]) {
|
|
2649
|
+
host = args[i + 1];
|
|
2650
|
+
i++;
|
|
2651
|
+
} else if (arg === "-v" || arg === "--verbose") {
|
|
2652
|
+
verbose = true;
|
|
2653
|
+
}
|
|
2654
|
+
}
|
|
2655
|
+
const hasAnthropicKey = !!process.env["ANTHROPIC_API_KEY"];
|
|
2656
|
+
const hasOpenAIKey = !!process.env["OPENAI_API_KEY"];
|
|
2657
|
+
const hasGeminiKey = !!process.env["GEMINI_API_KEY"];
|
|
2658
|
+
const hasXAIKey = !!process.env["XAI_API_KEY"];
|
|
2659
|
+
const hasMoonshotKey = !!process.env["MOONSHOT_API_KEY"];
|
|
2660
|
+
if (!hasAnthropicKey && !hasOpenAIKey && !hasGeminiKey && !hasXAIKey && !hasMoonshotKey) {
|
|
2661
|
+
console.error("Error: No API keys found. Set at least one of:");
|
|
2662
|
+
console.error(" ANTHROPIC_API_KEY, OPENAI_API_KEY, GEMINI_API_KEY, XAI_API_KEY, MOONSHOT_API_KEY");
|
|
2663
|
+
process.exit(1);
|
|
2664
|
+
}
|
|
2665
|
+
console.log("");
|
|
2666
|
+
console.log(" \u256D\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u256E");
|
|
2667
|
+
console.log(" \u2502 RelayPlane Proxy v0.1.0 \u2502");
|
|
2668
|
+
console.log(" \u2502 Intelligent AI Model Routing \u2502");
|
|
2669
|
+
console.log(" \u2570\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u256F");
|
|
2670
|
+
console.log("");
|
|
2671
|
+
console.log(" Providers:");
|
|
2672
|
+
if (hasAnthropicKey) console.log(" \u2713 Anthropic");
|
|
2673
|
+
if (hasOpenAIKey) console.log(" \u2713 OpenAI");
|
|
2674
|
+
if (hasGeminiKey) console.log(" \u2713 Google Gemini");
|
|
2675
|
+
if (hasXAIKey) console.log(" \u2713 xAI (Grok)");
|
|
2676
|
+
if (hasMoonshotKey) console.log(" \u2713 Moonshot");
|
|
2677
|
+
console.log("");
|
|
2678
|
+
try {
|
|
2679
|
+
await startProxy({ port, host, verbose });
|
|
2680
|
+
console.log("");
|
|
2681
|
+
console.log(" To use, set these environment variables:");
|
|
2682
|
+
console.log(` export ANTHROPIC_BASE_URL=http://${host}:${port}`);
|
|
2683
|
+
console.log(` export OPENAI_BASE_URL=http://${host}:${port}`);
|
|
2684
|
+
console.log("");
|
|
2685
|
+
console.log(" Then run your agent (OpenClaw, Cursor, Aider, etc.)");
|
|
2686
|
+
console.log("");
|
|
2687
|
+
} catch (err) {
|
|
2688
|
+
console.error("Failed to start proxy:", err);
|
|
2689
|
+
process.exit(1);
|
|
2690
|
+
}
|
|
2691
|
+
}
|
|
2692
|
+
main();
|
|
2693
|
+
//# sourceMappingURL=cli.mjs.map
|