@relayplane/proxy 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -3
- package/dist/cli.js +131 -28
- package/dist/cli.js.map +1 -1
- package/dist/cli.mjs +131 -28
- package/dist/cli.mjs.map +1 -1
- package/dist/index.js +131 -28
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +131 -28
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/cli.mjs
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
// src/proxy.ts
|
|
4
4
|
import * as http from "http";
|
|
5
|
+
import * as url from "url";
|
|
5
6
|
|
|
6
7
|
// src/storage/store.ts
|
|
7
8
|
import Database from "better-sqlite3";
|
|
@@ -132,11 +133,13 @@ CREATE TABLE IF NOT EXISTS schema_version (
|
|
|
132
133
|
INSERT OR IGNORE INTO schema_version (version) VALUES (1);
|
|
133
134
|
`;
|
|
134
135
|
var DEFAULT_ROUTING_RULES = [
|
|
135
|
-
|
|
136
|
-
{ taskType: "
|
|
136
|
+
// Complex tasks → Sonnet (need reasoning & quality)
|
|
137
|
+
{ taskType: "code_generation", preferredModel: "anthropic:claude-sonnet-4-20250514" },
|
|
138
|
+
{ taskType: "code_review", preferredModel: "anthropic:claude-sonnet-4-20250514" },
|
|
139
|
+
{ taskType: "analysis", preferredModel: "anthropic:claude-sonnet-4-20250514" },
|
|
140
|
+
{ taskType: "creative_writing", preferredModel: "anthropic:claude-sonnet-4-20250514" },
|
|
141
|
+
// Simple tasks → Haiku (cost efficient)
|
|
137
142
|
{ taskType: "summarization", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
138
|
-
{ taskType: "analysis", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
139
|
-
{ taskType: "creative_writing", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
140
143
|
{ taskType: "data_extraction", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
141
144
|
{ taskType: "translation", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
142
145
|
{ taskType: "question_answering", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
@@ -1566,6 +1569,11 @@ ${input.prompt}` : input.prompt;
|
|
|
1566
1569
|
};
|
|
1567
1570
|
|
|
1568
1571
|
// src/proxy.ts
|
|
1572
|
+
var VERSION = "0.1.7";
|
|
1573
|
+
var recentRuns = [];
|
|
1574
|
+
var MAX_RECENT_RUNS = 100;
|
|
1575
|
+
var modelCounts = {};
|
|
1576
|
+
var serverStartTime = 0;
|
|
1569
1577
|
var DEFAULT_ENDPOINTS = {
|
|
1570
1578
|
anthropic: {
|
|
1571
1579
|
baseUrl: "https://api.anthropic.com/v1",
|
|
@@ -1603,11 +1611,14 @@ var MODEL_MAPPING = {
|
|
|
1603
1611
|
"gpt-4.1": { provider: "openai", model: "gpt-4.1" }
|
|
1604
1612
|
};
|
|
1605
1613
|
var DEFAULT_ROUTING = {
|
|
1606
|
-
|
|
1607
|
-
code_review: { provider: "anthropic", model: "claude-
|
|
1614
|
+
// Complex tasks → Sonnet (need reasoning & quality)
|
|
1615
|
+
code_review: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
|
|
1616
|
+
analysis: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
|
|
1617
|
+
creative_writing: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
|
|
1618
|
+
// Medium tasks → Sonnet (benefit from better model)
|
|
1619
|
+
code_generation: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
|
|
1620
|
+
// Simple tasks → Haiku (cost efficient)
|
|
1608
1621
|
summarization: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1609
|
-
analysis: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1610
|
-
creative_writing: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1611
1622
|
data_extraction: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1612
1623
|
translation: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1613
1624
|
question_answering: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
@@ -1859,9 +1870,9 @@ function convertMessagesToGemini(messages) {
|
|
|
1859
1870
|
return { text: p.text };
|
|
1860
1871
|
}
|
|
1861
1872
|
if (p.type === "image_url" && p.image_url?.url) {
|
|
1862
|
-
const
|
|
1863
|
-
if (
|
|
1864
|
-
const match =
|
|
1873
|
+
const url2 = p.image_url.url;
|
|
1874
|
+
if (url2.startsWith("data:")) {
|
|
1875
|
+
const match = url2.match(/^data:([^;]+);base64,(.+)$/);
|
|
1865
1876
|
if (match) {
|
|
1866
1877
|
return {
|
|
1867
1878
|
inline_data: {
|
|
@@ -1871,7 +1882,7 @@ function convertMessagesToGemini(messages) {
|
|
|
1871
1882
|
};
|
|
1872
1883
|
}
|
|
1873
1884
|
}
|
|
1874
|
-
return { text: `[Image: ${
|
|
1885
|
+
return { text: `[Image: ${url2}]` };
|
|
1875
1886
|
}
|
|
1876
1887
|
return { text: "" };
|
|
1877
1888
|
});
|
|
@@ -2285,28 +2296,88 @@ async function startProxy(config = {}) {
|
|
|
2285
2296
|
};
|
|
2286
2297
|
const server = http.createServer(async (req, res) => {
|
|
2287
2298
|
res.setHeader("Access-Control-Allow-Origin", "*");
|
|
2288
|
-
res.setHeader("Access-Control-Allow-Methods", "POST, OPTIONS");
|
|
2299
|
+
res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
|
|
2289
2300
|
res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
|
|
2290
2301
|
if (req.method === "OPTIONS") {
|
|
2291
2302
|
res.writeHead(204);
|
|
2292
2303
|
res.end();
|
|
2293
2304
|
return;
|
|
2294
2305
|
}
|
|
2295
|
-
|
|
2296
|
-
|
|
2297
|
-
|
|
2298
|
-
|
|
2299
|
-
|
|
2300
|
-
|
|
2301
|
-
|
|
2302
|
-
|
|
2303
|
-
|
|
2304
|
-
|
|
2305
|
-
|
|
2306
|
-
})
|
|
2307
|
-
);
|
|
2308
|
-
return;
|
|
2306
|
+
const parsedUrl = url.parse(req.url || "", true);
|
|
2307
|
+
const pathname = parsedUrl.pathname || "";
|
|
2308
|
+
if (req.method === "GET" && pathname === "/health") {
|
|
2309
|
+
const uptimeMs = Date.now() - serverStartTime;
|
|
2310
|
+
const uptimeSecs = Math.floor(uptimeMs / 1e3);
|
|
2311
|
+
const hours = Math.floor(uptimeSecs / 3600);
|
|
2312
|
+
const mins = Math.floor(uptimeSecs % 3600 / 60);
|
|
2313
|
+
const secs = uptimeSecs % 60;
|
|
2314
|
+
const providers = {};
|
|
2315
|
+
for (const [name, config2] of Object.entries(DEFAULT_ENDPOINTS)) {
|
|
2316
|
+
providers[name] = !!process.env[config2.apiKeyEnv];
|
|
2309
2317
|
}
|
|
2318
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2319
|
+
res.end(JSON.stringify({
|
|
2320
|
+
status: "ok",
|
|
2321
|
+
version: VERSION,
|
|
2322
|
+
uptime: `${hours}h ${mins}m ${secs}s`,
|
|
2323
|
+
uptimeMs,
|
|
2324
|
+
providers,
|
|
2325
|
+
totalRuns: recentRuns.length > 0 ? Object.values(modelCounts).reduce((a, b) => a + b, 0) : 0
|
|
2326
|
+
}));
|
|
2327
|
+
return;
|
|
2328
|
+
}
|
|
2329
|
+
if (req.method === "GET" && pathname === "/stats") {
|
|
2330
|
+
const stats = relay.stats();
|
|
2331
|
+
const savings = relay.savingsReport(30);
|
|
2332
|
+
const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
|
|
2333
|
+
const modelDistribution = {};
|
|
2334
|
+
for (const [model, count] of Object.entries(modelCounts)) {
|
|
2335
|
+
modelDistribution[model] = {
|
|
2336
|
+
count,
|
|
2337
|
+
percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
|
|
2338
|
+
};
|
|
2339
|
+
}
|
|
2340
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2341
|
+
res.end(JSON.stringify({
|
|
2342
|
+
totalRuns,
|
|
2343
|
+
savings: {
|
|
2344
|
+
estimatedSavingsPercent: savings.savingsPercent.toFixed(1) + "%",
|
|
2345
|
+
actualCostUsd: savings.actualCost.toFixed(4),
|
|
2346
|
+
baselineCostUsd: savings.baselineCost.toFixed(4),
|
|
2347
|
+
savedUsd: savings.savings.toFixed(4)
|
|
2348
|
+
},
|
|
2349
|
+
modelDistribution,
|
|
2350
|
+
byTaskType: stats.byTaskType,
|
|
2351
|
+
period: stats.period
|
|
2352
|
+
}));
|
|
2353
|
+
return;
|
|
2354
|
+
}
|
|
2355
|
+
if (req.method === "GET" && pathname === "/runs") {
|
|
2356
|
+
const limitParam = parsedUrl.query["limit"];
|
|
2357
|
+
const parsedLimit = limitParam ? parseInt(String(limitParam), 10) : 20;
|
|
2358
|
+
const limit = Math.min(Number.isNaN(parsedLimit) ? 20 : parsedLimit, MAX_RECENT_RUNS);
|
|
2359
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2360
|
+
res.end(JSON.stringify({
|
|
2361
|
+
runs: recentRuns.slice(0, limit),
|
|
2362
|
+
total: recentRuns.length
|
|
2363
|
+
}));
|
|
2364
|
+
return;
|
|
2365
|
+
}
|
|
2366
|
+
if (req.method === "GET" && pathname.includes("/models")) {
|
|
2367
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2368
|
+
res.end(
|
|
2369
|
+
JSON.stringify({
|
|
2370
|
+
object: "list",
|
|
2371
|
+
data: [
|
|
2372
|
+
{ id: "relayplane:auto", object: "model", owned_by: "relayplane" },
|
|
2373
|
+
{ id: "relayplane:cost", object: "model", owned_by: "relayplane" },
|
|
2374
|
+
{ id: "relayplane:quality", object: "model", owned_by: "relayplane" }
|
|
2375
|
+
]
|
|
2376
|
+
})
|
|
2377
|
+
);
|
|
2378
|
+
return;
|
|
2379
|
+
}
|
|
2380
|
+
if (req.method !== "POST" || !pathname.includes("/chat/completions")) {
|
|
2310
2381
|
res.writeHead(404, { "Content-Type": "application/json" });
|
|
2311
2382
|
res.end(JSON.stringify({ error: "Not found" }));
|
|
2312
2383
|
return;
|
|
@@ -2429,9 +2500,11 @@ async function startProxy(config = {}) {
|
|
|
2429
2500
|
return new Promise((resolve, reject) => {
|
|
2430
2501
|
server.on("error", reject);
|
|
2431
2502
|
server.listen(port, host, () => {
|
|
2503
|
+
serverStartTime = Date.now();
|
|
2432
2504
|
console.log(`RelayPlane proxy listening on http://${host}:${port}`);
|
|
2433
2505
|
console.log(` Models: relayplane:auto, relayplane:cost, relayplane:quality`);
|
|
2434
2506
|
console.log(` Endpoint: POST /v1/chat/completions`);
|
|
2507
|
+
console.log(` Stats: GET /stats, /runs, /health`);
|
|
2435
2508
|
console.log(` Streaming: \u2705 Enabled`);
|
|
2436
2509
|
resolve(server);
|
|
2437
2510
|
});
|
|
@@ -2494,11 +2567,26 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2494
2567
|
log(`Streaming error: ${err}`);
|
|
2495
2568
|
}
|
|
2496
2569
|
const durationMs = Date.now() - startTime;
|
|
2570
|
+
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2571
|
+
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2497
2572
|
relay.run({
|
|
2498
2573
|
prompt: promptText.slice(0, 500),
|
|
2499
2574
|
taskType,
|
|
2500
2575
|
model: `${targetProvider}:${targetModel}`
|
|
2501
2576
|
}).then((runResult) => {
|
|
2577
|
+
recentRuns.unshift({
|
|
2578
|
+
runId: runResult.runId,
|
|
2579
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2580
|
+
model: modelKey,
|
|
2581
|
+
taskType,
|
|
2582
|
+
confidence,
|
|
2583
|
+
mode: routingMode,
|
|
2584
|
+
durationMs,
|
|
2585
|
+
promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
|
|
2586
|
+
});
|
|
2587
|
+
if (recentRuns.length > MAX_RECENT_RUNS) {
|
|
2588
|
+
recentRuns.pop();
|
|
2589
|
+
}
|
|
2502
2590
|
log(`Completed streaming in ${durationMs}ms, runId: ${runResult.runId}`);
|
|
2503
2591
|
}).catch((err) => {
|
|
2504
2592
|
log(`Failed to record run: ${err}`);
|
|
@@ -2569,15 +2657,30 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
2569
2657
|
return;
|
|
2570
2658
|
}
|
|
2571
2659
|
const durationMs = Date.now() - startTime;
|
|
2660
|
+
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2661
|
+
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2572
2662
|
try {
|
|
2573
2663
|
const runResult = await relay.run({
|
|
2574
2664
|
prompt: promptText.slice(0, 500),
|
|
2575
2665
|
taskType,
|
|
2576
2666
|
model: `${targetProvider}:${targetModel}`
|
|
2577
2667
|
});
|
|
2668
|
+
recentRuns.unshift({
|
|
2669
|
+
runId: runResult.runId,
|
|
2670
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2671
|
+
model: modelKey,
|
|
2672
|
+
taskType,
|
|
2673
|
+
confidence,
|
|
2674
|
+
mode: routingMode,
|
|
2675
|
+
durationMs,
|
|
2676
|
+
promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
|
|
2677
|
+
});
|
|
2678
|
+
if (recentRuns.length > MAX_RECENT_RUNS) {
|
|
2679
|
+
recentRuns.pop();
|
|
2680
|
+
}
|
|
2578
2681
|
responseData["_relayplane"] = {
|
|
2579
2682
|
runId: runResult.runId,
|
|
2580
|
-
routedTo:
|
|
2683
|
+
routedTo: modelKey,
|
|
2581
2684
|
taskType,
|
|
2582
2685
|
confidence,
|
|
2583
2686
|
durationMs,
|