@relayplane/proxy 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -3
- package/dist/cli.js +131 -28
- package/dist/cli.js.map +1 -1
- package/dist/cli.mjs +131 -28
- package/dist/cli.mjs.map +1 -1
- package/dist/index.js +131 -28
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +131 -28
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
// src/proxy.ts
|
|
2
2
|
import * as http from "http";
|
|
3
|
+
import * as url from "url";
|
|
3
4
|
|
|
4
5
|
// src/storage/store.ts
|
|
5
6
|
import Database from "better-sqlite3";
|
|
@@ -130,11 +131,13 @@ CREATE TABLE IF NOT EXISTS schema_version (
|
|
|
130
131
|
INSERT OR IGNORE INTO schema_version (version) VALUES (1);
|
|
131
132
|
`;
|
|
132
133
|
var DEFAULT_ROUTING_RULES = [
|
|
133
|
-
|
|
134
|
-
{ taskType: "
|
|
134
|
+
// Complex tasks → Sonnet (need reasoning & quality)
|
|
135
|
+
{ taskType: "code_generation", preferredModel: "anthropic:claude-sonnet-4-20250514" },
|
|
136
|
+
{ taskType: "code_review", preferredModel: "anthropic:claude-sonnet-4-20250514" },
|
|
137
|
+
{ taskType: "analysis", preferredModel: "anthropic:claude-sonnet-4-20250514" },
|
|
138
|
+
{ taskType: "creative_writing", preferredModel: "anthropic:claude-sonnet-4-20250514" },
|
|
139
|
+
// Simple tasks → Haiku (cost efficient)
|
|
135
140
|
{ taskType: "summarization", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
136
|
-
{ taskType: "analysis", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
137
|
-
{ taskType: "creative_writing", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
138
141
|
{ taskType: "data_extraction", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
139
142
|
{ taskType: "translation", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
140
143
|
{ taskType: "question_answering", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
@@ -1568,6 +1571,11 @@ ${input.prompt}` : input.prompt;
|
|
|
1568
1571
|
};
|
|
1569
1572
|
|
|
1570
1573
|
// src/proxy.ts
|
|
1574
|
+
var VERSION = "0.1.7";
|
|
1575
|
+
var recentRuns = [];
|
|
1576
|
+
var MAX_RECENT_RUNS = 100;
|
|
1577
|
+
var modelCounts = {};
|
|
1578
|
+
var serverStartTime = 0;
|
|
1571
1579
|
var DEFAULT_ENDPOINTS = {
|
|
1572
1580
|
anthropic: {
|
|
1573
1581
|
baseUrl: "https://api.anthropic.com/v1",
|
|
@@ -1605,11 +1613,14 @@ var MODEL_MAPPING = {
|
|
|
1605
1613
|
"gpt-4.1": { provider: "openai", model: "gpt-4.1" }
|
|
1606
1614
|
};
|
|
1607
1615
|
var DEFAULT_ROUTING = {
|
|
1608
|
-
|
|
1609
|
-
code_review: { provider: "anthropic", model: "claude-
|
|
1616
|
+
// Complex tasks → Sonnet (need reasoning & quality)
|
|
1617
|
+
code_review: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
|
|
1618
|
+
analysis: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
|
|
1619
|
+
creative_writing: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
|
|
1620
|
+
// Medium tasks → Sonnet (benefit from better model)
|
|
1621
|
+
code_generation: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
|
|
1622
|
+
// Simple tasks → Haiku (cost efficient)
|
|
1610
1623
|
summarization: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1611
|
-
analysis: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1612
|
-
creative_writing: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1613
1624
|
data_extraction: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1614
1625
|
translation: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1615
1626
|
question_answering: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
@@ -1861,9 +1872,9 @@ function convertMessagesToGemini(messages) {
|
|
|
1861
1872
|
return { text: p.text };
|
|
1862
1873
|
}
|
|
1863
1874
|
if (p.type === "image_url" && p.image_url?.url) {
|
|
1864
|
-
const
|
|
1865
|
-
if (
|
|
1866
|
-
const match =
|
|
1875
|
+
const url2 = p.image_url.url;
|
|
1876
|
+
if (url2.startsWith("data:")) {
|
|
1877
|
+
const match = url2.match(/^data:([^;]+);base64,(.+)$/);
|
|
1867
1878
|
if (match) {
|
|
1868
1879
|
return {
|
|
1869
1880
|
inline_data: {
|
|
@@ -1873,7 +1884,7 @@ function convertMessagesToGemini(messages) {
|
|
|
1873
1884
|
};
|
|
1874
1885
|
}
|
|
1875
1886
|
}
|
|
1876
|
-
return { text: `[Image: ${
|
|
1887
|
+
return { text: `[Image: ${url2}]` };
|
|
1877
1888
|
}
|
|
1878
1889
|
return { text: "" };
|
|
1879
1890
|
});
|
|
@@ -2287,28 +2298,88 @@ async function startProxy(config = {}) {
|
|
|
2287
2298
|
};
|
|
2288
2299
|
const server = http.createServer(async (req, res) => {
|
|
2289
2300
|
res.setHeader("Access-Control-Allow-Origin", "*");
|
|
2290
|
-
res.setHeader("Access-Control-Allow-Methods", "POST, OPTIONS");
|
|
2301
|
+
res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
|
|
2291
2302
|
res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
|
|
2292
2303
|
if (req.method === "OPTIONS") {
|
|
2293
2304
|
res.writeHead(204);
|
|
2294
2305
|
res.end();
|
|
2295
2306
|
return;
|
|
2296
2307
|
}
|
|
2297
|
-
|
|
2298
|
-
|
|
2299
|
-
|
|
2300
|
-
|
|
2301
|
-
|
|
2302
|
-
|
|
2303
|
-
|
|
2304
|
-
|
|
2305
|
-
|
|
2306
|
-
|
|
2307
|
-
|
|
2308
|
-
})
|
|
2309
|
-
);
|
|
2310
|
-
return;
|
|
2308
|
+
const parsedUrl = url.parse(req.url || "", true);
|
|
2309
|
+
const pathname = parsedUrl.pathname || "";
|
|
2310
|
+
if (req.method === "GET" && pathname === "/health") {
|
|
2311
|
+
const uptimeMs = Date.now() - serverStartTime;
|
|
2312
|
+
const uptimeSecs = Math.floor(uptimeMs / 1e3);
|
|
2313
|
+
const hours = Math.floor(uptimeSecs / 3600);
|
|
2314
|
+
const mins = Math.floor(uptimeSecs % 3600 / 60);
|
|
2315
|
+
const secs = uptimeSecs % 60;
|
|
2316
|
+
const providers = {};
|
|
2317
|
+
for (const [name, config2] of Object.entries(DEFAULT_ENDPOINTS)) {
|
|
2318
|
+
providers[name] = !!process.env[config2.apiKeyEnv];
|
|
2311
2319
|
}
|
|
2320
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2321
|
+
res.end(JSON.stringify({
|
|
2322
|
+
status: "ok",
|
|
2323
|
+
version: VERSION,
|
|
2324
|
+
uptime: `${hours}h ${mins}m ${secs}s`,
|
|
2325
|
+
uptimeMs,
|
|
2326
|
+
providers,
|
|
2327
|
+
totalRuns: recentRuns.length > 0 ? Object.values(modelCounts).reduce((a, b) => a + b, 0) : 0
|
|
2328
|
+
}));
|
|
2329
|
+
return;
|
|
2330
|
+
}
|
|
2331
|
+
if (req.method === "GET" && pathname === "/stats") {
|
|
2332
|
+
const stats = relay.stats();
|
|
2333
|
+
const savings = relay.savingsReport(30);
|
|
2334
|
+
const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
|
|
2335
|
+
const modelDistribution = {};
|
|
2336
|
+
for (const [model, count] of Object.entries(modelCounts)) {
|
|
2337
|
+
modelDistribution[model] = {
|
|
2338
|
+
count,
|
|
2339
|
+
percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
|
|
2340
|
+
};
|
|
2341
|
+
}
|
|
2342
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2343
|
+
res.end(JSON.stringify({
|
|
2344
|
+
totalRuns,
|
|
2345
|
+
savings: {
|
|
2346
|
+
estimatedSavingsPercent: savings.savingsPercent.toFixed(1) + "%",
|
|
2347
|
+
actualCostUsd: savings.actualCost.toFixed(4),
|
|
2348
|
+
baselineCostUsd: savings.baselineCost.toFixed(4),
|
|
2349
|
+
savedUsd: savings.savings.toFixed(4)
|
|
2350
|
+
},
|
|
2351
|
+
modelDistribution,
|
|
2352
|
+
byTaskType: stats.byTaskType,
|
|
2353
|
+
period: stats.period
|
|
2354
|
+
}));
|
|
2355
|
+
return;
|
|
2356
|
+
}
|
|
2357
|
+
if (req.method === "GET" && pathname === "/runs") {
|
|
2358
|
+
const limitParam = parsedUrl.query["limit"];
|
|
2359
|
+
const parsedLimit = limitParam ? parseInt(String(limitParam), 10) : 20;
|
|
2360
|
+
const limit = Math.min(Number.isNaN(parsedLimit) ? 20 : parsedLimit, MAX_RECENT_RUNS);
|
|
2361
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2362
|
+
res.end(JSON.stringify({
|
|
2363
|
+
runs: recentRuns.slice(0, limit),
|
|
2364
|
+
total: recentRuns.length
|
|
2365
|
+
}));
|
|
2366
|
+
return;
|
|
2367
|
+
}
|
|
2368
|
+
if (req.method === "GET" && pathname.includes("/models")) {
|
|
2369
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2370
|
+
res.end(
|
|
2371
|
+
JSON.stringify({
|
|
2372
|
+
object: "list",
|
|
2373
|
+
data: [
|
|
2374
|
+
{ id: "relayplane:auto", object: "model", owned_by: "relayplane" },
|
|
2375
|
+
{ id: "relayplane:cost", object: "model", owned_by: "relayplane" },
|
|
2376
|
+
{ id: "relayplane:quality", object: "model", owned_by: "relayplane" }
|
|
2377
|
+
]
|
|
2378
|
+
})
|
|
2379
|
+
);
|
|
2380
|
+
return;
|
|
2381
|
+
}
|
|
2382
|
+
if (req.method !== "POST" || !pathname.includes("/chat/completions")) {
|
|
2312
2383
|
res.writeHead(404, { "Content-Type": "application/json" });
|
|
2313
2384
|
res.end(JSON.stringify({ error: "Not found" }));
|
|
2314
2385
|
return;
|
|
@@ -2431,9 +2502,11 @@ async function startProxy(config = {}) {
|
|
|
2431
2502
|
return new Promise((resolve, reject) => {
|
|
2432
2503
|
server.on("error", reject);
|
|
2433
2504
|
server.listen(port, host, () => {
|
|
2505
|
+
serverStartTime = Date.now();
|
|
2434
2506
|
console.log(`RelayPlane proxy listening on http://${host}:${port}`);
|
|
2435
2507
|
console.log(` Models: relayplane:auto, relayplane:cost, relayplane:quality`);
|
|
2436
2508
|
console.log(` Endpoint: POST /v1/chat/completions`);
|
|
2509
|
+
console.log(` Stats: GET /stats, /runs, /health`);
|
|
2437
2510
|
console.log(` Streaming: \u2705 Enabled`);
|
|
2438
2511
|
resolve(server);
|
|
2439
2512
|
});
|
|
@@ -2496,11 +2569,26 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2496
2569
|
log(`Streaming error: ${err}`);
|
|
2497
2570
|
}
|
|
2498
2571
|
const durationMs = Date.now() - startTime;
|
|
2572
|
+
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2573
|
+
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2499
2574
|
relay.run({
|
|
2500
2575
|
prompt: promptText.slice(0, 500),
|
|
2501
2576
|
taskType,
|
|
2502
2577
|
model: `${targetProvider}:${targetModel}`
|
|
2503
2578
|
}).then((runResult) => {
|
|
2579
|
+
recentRuns.unshift({
|
|
2580
|
+
runId: runResult.runId,
|
|
2581
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2582
|
+
model: modelKey,
|
|
2583
|
+
taskType,
|
|
2584
|
+
confidence,
|
|
2585
|
+
mode: routingMode,
|
|
2586
|
+
durationMs,
|
|
2587
|
+
promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
|
|
2588
|
+
});
|
|
2589
|
+
if (recentRuns.length > MAX_RECENT_RUNS) {
|
|
2590
|
+
recentRuns.pop();
|
|
2591
|
+
}
|
|
2504
2592
|
log(`Completed streaming in ${durationMs}ms, runId: ${runResult.runId}`);
|
|
2505
2593
|
}).catch((err) => {
|
|
2506
2594
|
log(`Failed to record run: ${err}`);
|
|
@@ -2571,15 +2659,30 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
2571
2659
|
return;
|
|
2572
2660
|
}
|
|
2573
2661
|
const durationMs = Date.now() - startTime;
|
|
2662
|
+
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2663
|
+
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2574
2664
|
try {
|
|
2575
2665
|
const runResult = await relay.run({
|
|
2576
2666
|
prompt: promptText.slice(0, 500),
|
|
2577
2667
|
taskType,
|
|
2578
2668
|
model: `${targetProvider}:${targetModel}`
|
|
2579
2669
|
});
|
|
2670
|
+
recentRuns.unshift({
|
|
2671
|
+
runId: runResult.runId,
|
|
2672
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2673
|
+
model: modelKey,
|
|
2674
|
+
taskType,
|
|
2675
|
+
confidence,
|
|
2676
|
+
mode: routingMode,
|
|
2677
|
+
durationMs,
|
|
2678
|
+
promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
|
|
2679
|
+
});
|
|
2680
|
+
if (recentRuns.length > MAX_RECENT_RUNS) {
|
|
2681
|
+
recentRuns.pop();
|
|
2682
|
+
}
|
|
2580
2683
|
responseData["_relayplane"] = {
|
|
2581
2684
|
runId: runResult.runId,
|
|
2582
|
-
routedTo:
|
|
2685
|
+
routedTo: modelKey,
|
|
2583
2686
|
taskType,
|
|
2584
2687
|
confidence,
|
|
2585
2688
|
durationMs,
|