@relayplane/proxy 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -3
- package/dist/cli.js +131 -28
- package/dist/cli.js.map +1 -1
- package/dist/cli.mjs +131 -28
- package/dist/cli.mjs.map +1 -1
- package/dist/index.js +131 -28
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +131 -28
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -53,6 +53,7 @@ module.exports = __toCommonJS(index_exports);
|
|
|
53
53
|
|
|
54
54
|
// src/proxy.ts
|
|
55
55
|
var http = __toESM(require("http"));
|
|
56
|
+
var url = __toESM(require("url"));
|
|
56
57
|
|
|
57
58
|
// src/storage/store.ts
|
|
58
59
|
var import_better_sqlite3 = __toESM(require("better-sqlite3"));
|
|
@@ -183,11 +184,13 @@ CREATE TABLE IF NOT EXISTS schema_version (
|
|
|
183
184
|
INSERT OR IGNORE INTO schema_version (version) VALUES (1);
|
|
184
185
|
`;
|
|
185
186
|
var DEFAULT_ROUTING_RULES = [
|
|
186
|
-
|
|
187
|
-
{ taskType: "
|
|
187
|
+
// Complex tasks → Sonnet (need reasoning & quality)
|
|
188
|
+
{ taskType: "code_generation", preferredModel: "anthropic:claude-sonnet-4-20250514" },
|
|
189
|
+
{ taskType: "code_review", preferredModel: "anthropic:claude-sonnet-4-20250514" },
|
|
190
|
+
{ taskType: "analysis", preferredModel: "anthropic:claude-sonnet-4-20250514" },
|
|
191
|
+
{ taskType: "creative_writing", preferredModel: "anthropic:claude-sonnet-4-20250514" },
|
|
192
|
+
// Simple tasks → Haiku (cost efficient)
|
|
188
193
|
{ taskType: "summarization", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
189
|
-
{ taskType: "analysis", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
190
|
-
{ taskType: "creative_writing", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
191
194
|
{ taskType: "data_extraction", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
192
195
|
{ taskType: "translation", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
193
196
|
{ taskType: "question_answering", preferredModel: "anthropic:claude-3-5-haiku-latest" },
|
|
@@ -1621,6 +1624,11 @@ ${input.prompt}` : input.prompt;
|
|
|
1621
1624
|
};
|
|
1622
1625
|
|
|
1623
1626
|
// src/proxy.ts
|
|
1627
|
+
var VERSION = "0.1.7";
|
|
1628
|
+
var recentRuns = [];
|
|
1629
|
+
var MAX_RECENT_RUNS = 100;
|
|
1630
|
+
var modelCounts = {};
|
|
1631
|
+
var serverStartTime = 0;
|
|
1624
1632
|
var DEFAULT_ENDPOINTS = {
|
|
1625
1633
|
anthropic: {
|
|
1626
1634
|
baseUrl: "https://api.anthropic.com/v1",
|
|
@@ -1658,11 +1666,14 @@ var MODEL_MAPPING = {
|
|
|
1658
1666
|
"gpt-4.1": { provider: "openai", model: "gpt-4.1" }
|
|
1659
1667
|
};
|
|
1660
1668
|
var DEFAULT_ROUTING = {
|
|
1661
|
-
|
|
1662
|
-
code_review: { provider: "anthropic", model: "claude-
|
|
1669
|
+
// Complex tasks → Sonnet (need reasoning & quality)
|
|
1670
|
+
code_review: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
|
|
1671
|
+
analysis: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
|
|
1672
|
+
creative_writing: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
|
|
1673
|
+
// Medium tasks → Sonnet (benefit from better model)
|
|
1674
|
+
code_generation: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
|
|
1675
|
+
// Simple tasks → Haiku (cost efficient)
|
|
1663
1676
|
summarization: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1664
|
-
analysis: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1665
|
-
creative_writing: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1666
1677
|
data_extraction: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1667
1678
|
translation: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
1668
1679
|
question_answering: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
|
|
@@ -1914,9 +1925,9 @@ function convertMessagesToGemini(messages) {
|
|
|
1914
1925
|
return { text: p.text };
|
|
1915
1926
|
}
|
|
1916
1927
|
if (p.type === "image_url" && p.image_url?.url) {
|
|
1917
|
-
const
|
|
1918
|
-
if (
|
|
1919
|
-
const match =
|
|
1928
|
+
const url2 = p.image_url.url;
|
|
1929
|
+
if (url2.startsWith("data:")) {
|
|
1930
|
+
const match = url2.match(/^data:([^;]+);base64,(.+)$/);
|
|
1920
1931
|
if (match) {
|
|
1921
1932
|
return {
|
|
1922
1933
|
inline_data: {
|
|
@@ -1926,7 +1937,7 @@ function convertMessagesToGemini(messages) {
|
|
|
1926
1937
|
};
|
|
1927
1938
|
}
|
|
1928
1939
|
}
|
|
1929
|
-
return { text: `[Image: ${
|
|
1940
|
+
return { text: `[Image: ${url2}]` };
|
|
1930
1941
|
}
|
|
1931
1942
|
return { text: "" };
|
|
1932
1943
|
});
|
|
@@ -2340,28 +2351,88 @@ async function startProxy(config = {}) {
|
|
|
2340
2351
|
};
|
|
2341
2352
|
const server = http.createServer(async (req, res) => {
|
|
2342
2353
|
res.setHeader("Access-Control-Allow-Origin", "*");
|
|
2343
|
-
res.setHeader("Access-Control-Allow-Methods", "POST, OPTIONS");
|
|
2354
|
+
res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
|
|
2344
2355
|
res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
|
|
2345
2356
|
if (req.method === "OPTIONS") {
|
|
2346
2357
|
res.writeHead(204);
|
|
2347
2358
|
res.end();
|
|
2348
2359
|
return;
|
|
2349
2360
|
}
|
|
2350
|
-
|
|
2351
|
-
|
|
2352
|
-
|
|
2353
|
-
|
|
2354
|
-
|
|
2355
|
-
|
|
2356
|
-
|
|
2357
|
-
|
|
2358
|
-
|
|
2359
|
-
|
|
2360
|
-
|
|
2361
|
-
})
|
|
2362
|
-
);
|
|
2363
|
-
return;
|
|
2361
|
+
const parsedUrl = url.parse(req.url || "", true);
|
|
2362
|
+
const pathname = parsedUrl.pathname || "";
|
|
2363
|
+
if (req.method === "GET" && pathname === "/health") {
|
|
2364
|
+
const uptimeMs = Date.now() - serverStartTime;
|
|
2365
|
+
const uptimeSecs = Math.floor(uptimeMs / 1e3);
|
|
2366
|
+
const hours = Math.floor(uptimeSecs / 3600);
|
|
2367
|
+
const mins = Math.floor(uptimeSecs % 3600 / 60);
|
|
2368
|
+
const secs = uptimeSecs % 60;
|
|
2369
|
+
const providers = {};
|
|
2370
|
+
for (const [name, config2] of Object.entries(DEFAULT_ENDPOINTS)) {
|
|
2371
|
+
providers[name] = !!process.env[config2.apiKeyEnv];
|
|
2364
2372
|
}
|
|
2373
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2374
|
+
res.end(JSON.stringify({
|
|
2375
|
+
status: "ok",
|
|
2376
|
+
version: VERSION,
|
|
2377
|
+
uptime: `${hours}h ${mins}m ${secs}s`,
|
|
2378
|
+
uptimeMs,
|
|
2379
|
+
providers,
|
|
2380
|
+
totalRuns: recentRuns.length > 0 ? Object.values(modelCounts).reduce((a, b) => a + b, 0) : 0
|
|
2381
|
+
}));
|
|
2382
|
+
return;
|
|
2383
|
+
}
|
|
2384
|
+
if (req.method === "GET" && pathname === "/stats") {
|
|
2385
|
+
const stats = relay.stats();
|
|
2386
|
+
const savings = relay.savingsReport(30);
|
|
2387
|
+
const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
|
|
2388
|
+
const modelDistribution = {};
|
|
2389
|
+
for (const [model, count] of Object.entries(modelCounts)) {
|
|
2390
|
+
modelDistribution[model] = {
|
|
2391
|
+
count,
|
|
2392
|
+
percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
|
|
2393
|
+
};
|
|
2394
|
+
}
|
|
2395
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2396
|
+
res.end(JSON.stringify({
|
|
2397
|
+
totalRuns,
|
|
2398
|
+
savings: {
|
|
2399
|
+
estimatedSavingsPercent: savings.savingsPercent.toFixed(1) + "%",
|
|
2400
|
+
actualCostUsd: savings.actualCost.toFixed(4),
|
|
2401
|
+
baselineCostUsd: savings.baselineCost.toFixed(4),
|
|
2402
|
+
savedUsd: savings.savings.toFixed(4)
|
|
2403
|
+
},
|
|
2404
|
+
modelDistribution,
|
|
2405
|
+
byTaskType: stats.byTaskType,
|
|
2406
|
+
period: stats.period
|
|
2407
|
+
}));
|
|
2408
|
+
return;
|
|
2409
|
+
}
|
|
2410
|
+
if (req.method === "GET" && pathname === "/runs") {
|
|
2411
|
+
const limitParam = parsedUrl.query["limit"];
|
|
2412
|
+
const parsedLimit = limitParam ? parseInt(String(limitParam), 10) : 20;
|
|
2413
|
+
const limit = Math.min(Number.isNaN(parsedLimit) ? 20 : parsedLimit, MAX_RECENT_RUNS);
|
|
2414
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2415
|
+
res.end(JSON.stringify({
|
|
2416
|
+
runs: recentRuns.slice(0, limit),
|
|
2417
|
+
total: recentRuns.length
|
|
2418
|
+
}));
|
|
2419
|
+
return;
|
|
2420
|
+
}
|
|
2421
|
+
if (req.method === "GET" && pathname.includes("/models")) {
|
|
2422
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2423
|
+
res.end(
|
|
2424
|
+
JSON.stringify({
|
|
2425
|
+
object: "list",
|
|
2426
|
+
data: [
|
|
2427
|
+
{ id: "relayplane:auto", object: "model", owned_by: "relayplane" },
|
|
2428
|
+
{ id: "relayplane:cost", object: "model", owned_by: "relayplane" },
|
|
2429
|
+
{ id: "relayplane:quality", object: "model", owned_by: "relayplane" }
|
|
2430
|
+
]
|
|
2431
|
+
})
|
|
2432
|
+
);
|
|
2433
|
+
return;
|
|
2434
|
+
}
|
|
2435
|
+
if (req.method !== "POST" || !pathname.includes("/chat/completions")) {
|
|
2365
2436
|
res.writeHead(404, { "Content-Type": "application/json" });
|
|
2366
2437
|
res.end(JSON.stringify({ error: "Not found" }));
|
|
2367
2438
|
return;
|
|
@@ -2484,9 +2555,11 @@ async function startProxy(config = {}) {
|
|
|
2484
2555
|
return new Promise((resolve, reject) => {
|
|
2485
2556
|
server.on("error", reject);
|
|
2486
2557
|
server.listen(port, host, () => {
|
|
2558
|
+
serverStartTime = Date.now();
|
|
2487
2559
|
console.log(`RelayPlane proxy listening on http://${host}:${port}`);
|
|
2488
2560
|
console.log(` Models: relayplane:auto, relayplane:cost, relayplane:quality`);
|
|
2489
2561
|
console.log(` Endpoint: POST /v1/chat/completions`);
|
|
2562
|
+
console.log(` Stats: GET /stats, /runs, /health`);
|
|
2490
2563
|
console.log(` Streaming: \u2705 Enabled`);
|
|
2491
2564
|
resolve(server);
|
|
2492
2565
|
});
|
|
@@ -2549,11 +2622,26 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2549
2622
|
log(`Streaming error: ${err}`);
|
|
2550
2623
|
}
|
|
2551
2624
|
const durationMs = Date.now() - startTime;
|
|
2625
|
+
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2626
|
+
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2552
2627
|
relay.run({
|
|
2553
2628
|
prompt: promptText.slice(0, 500),
|
|
2554
2629
|
taskType,
|
|
2555
2630
|
model: `${targetProvider}:${targetModel}`
|
|
2556
2631
|
}).then((runResult) => {
|
|
2632
|
+
recentRuns.unshift({
|
|
2633
|
+
runId: runResult.runId,
|
|
2634
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2635
|
+
model: modelKey,
|
|
2636
|
+
taskType,
|
|
2637
|
+
confidence,
|
|
2638
|
+
mode: routingMode,
|
|
2639
|
+
durationMs,
|
|
2640
|
+
promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
|
|
2641
|
+
});
|
|
2642
|
+
if (recentRuns.length > MAX_RECENT_RUNS) {
|
|
2643
|
+
recentRuns.pop();
|
|
2644
|
+
}
|
|
2557
2645
|
log(`Completed streaming in ${durationMs}ms, runId: ${runResult.runId}`);
|
|
2558
2646
|
}).catch((err) => {
|
|
2559
2647
|
log(`Failed to record run: ${err}`);
|
|
@@ -2624,15 +2712,30 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
2624
2712
|
return;
|
|
2625
2713
|
}
|
|
2626
2714
|
const durationMs = Date.now() - startTime;
|
|
2715
|
+
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2716
|
+
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2627
2717
|
try {
|
|
2628
2718
|
const runResult = await relay.run({
|
|
2629
2719
|
prompt: promptText.slice(0, 500),
|
|
2630
2720
|
taskType,
|
|
2631
2721
|
model: `${targetProvider}:${targetModel}`
|
|
2632
2722
|
});
|
|
2723
|
+
recentRuns.unshift({
|
|
2724
|
+
runId: runResult.runId,
|
|
2725
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2726
|
+
model: modelKey,
|
|
2727
|
+
taskType,
|
|
2728
|
+
confidence,
|
|
2729
|
+
mode: routingMode,
|
|
2730
|
+
durationMs,
|
|
2731
|
+
promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
|
|
2732
|
+
});
|
|
2733
|
+
if (recentRuns.length > MAX_RECENT_RUNS) {
|
|
2734
|
+
recentRuns.pop();
|
|
2735
|
+
}
|
|
2633
2736
|
responseData["_relayplane"] = {
|
|
2634
2737
|
runId: runResult.runId,
|
|
2635
|
-
routedTo:
|
|
2738
|
+
routedTo: modelKey,
|
|
2636
2739
|
taskType,
|
|
2637
2740
|
confidence,
|
|
2638
2741
|
durationMs,
|