@relayplane/proxy 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,5 +1,6 @@
1
1
  // src/proxy.ts
2
2
  import * as http from "http";
3
+ import * as url from "url";
3
4
 
4
5
  // src/storage/store.ts
5
6
  import Database from "better-sqlite3";
@@ -130,11 +131,13 @@ CREATE TABLE IF NOT EXISTS schema_version (
130
131
  INSERT OR IGNORE INTO schema_version (version) VALUES (1);
131
132
  `;
132
133
  var DEFAULT_ROUTING_RULES = [
133
- { taskType: "code_generation", preferredModel: "anthropic:claude-3-5-haiku-latest" },
134
- { taskType: "code_review", preferredModel: "anthropic:claude-3-5-haiku-latest" },
134
+ // Complex tasks Sonnet (need reasoning & quality)
135
+ { taskType: "code_generation", preferredModel: "anthropic:claude-sonnet-4-20250514" },
136
+ { taskType: "code_review", preferredModel: "anthropic:claude-sonnet-4-20250514" },
137
+ { taskType: "analysis", preferredModel: "anthropic:claude-sonnet-4-20250514" },
138
+ { taskType: "creative_writing", preferredModel: "anthropic:claude-sonnet-4-20250514" },
139
+ // Simple tasks → Haiku (cost efficient)
135
140
  { taskType: "summarization", preferredModel: "anthropic:claude-3-5-haiku-latest" },
136
- { taskType: "analysis", preferredModel: "anthropic:claude-3-5-haiku-latest" },
137
- { taskType: "creative_writing", preferredModel: "anthropic:claude-3-5-haiku-latest" },
138
141
  { taskType: "data_extraction", preferredModel: "anthropic:claude-3-5-haiku-latest" },
139
142
  { taskType: "translation", preferredModel: "anthropic:claude-3-5-haiku-latest" },
140
143
  { taskType: "question_answering", preferredModel: "anthropic:claude-3-5-haiku-latest" },
@@ -1568,6 +1571,11 @@ ${input.prompt}` : input.prompt;
1568
1571
  };
1569
1572
 
1570
1573
  // src/proxy.ts
1574
+ var VERSION = "0.1.7";
1575
+ var recentRuns = [];
1576
+ var MAX_RECENT_RUNS = 100;
1577
+ var modelCounts = {};
1578
+ var serverStartTime = 0;
1571
1579
  var DEFAULT_ENDPOINTS = {
1572
1580
  anthropic: {
1573
1581
  baseUrl: "https://api.anthropic.com/v1",
@@ -1605,11 +1613,14 @@ var MODEL_MAPPING = {
1605
1613
  "gpt-4.1": { provider: "openai", model: "gpt-4.1" }
1606
1614
  };
1607
1615
  var DEFAULT_ROUTING = {
1608
- code_generation: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1609
- code_review: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1616
+ // Complex tasks Sonnet (need reasoning & quality)
1617
+ code_review: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1618
+ analysis: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1619
+ creative_writing: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1620
+ // Medium tasks → Sonnet (benefit from better model)
1621
+ code_generation: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1622
+ // Simple tasks → Haiku (cost efficient)
1610
1623
  summarization: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1611
- analysis: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1612
- creative_writing: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1613
1624
  data_extraction: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1614
1625
  translation: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1615
1626
  question_answering: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
@@ -1861,9 +1872,9 @@ function convertMessagesToGemini(messages) {
1861
1872
  return { text: p.text };
1862
1873
  }
1863
1874
  if (p.type === "image_url" && p.image_url?.url) {
1864
- const url = p.image_url.url;
1865
- if (url.startsWith("data:")) {
1866
- const match = url.match(/^data:([^;]+);base64,(.+)$/);
1875
+ const url2 = p.image_url.url;
1876
+ if (url2.startsWith("data:")) {
1877
+ const match = url2.match(/^data:([^;]+);base64,(.+)$/);
1867
1878
  if (match) {
1868
1879
  return {
1869
1880
  inline_data: {
@@ -1873,7 +1884,7 @@ function convertMessagesToGemini(messages) {
1873
1884
  };
1874
1885
  }
1875
1886
  }
1876
- return { text: `[Image: ${url}]` };
1887
+ return { text: `[Image: ${url2}]` };
1877
1888
  }
1878
1889
  return { text: "" };
1879
1890
  });
@@ -2287,28 +2298,88 @@ async function startProxy(config = {}) {
2287
2298
  };
2288
2299
  const server = http.createServer(async (req, res) => {
2289
2300
  res.setHeader("Access-Control-Allow-Origin", "*");
2290
- res.setHeader("Access-Control-Allow-Methods", "POST, OPTIONS");
2301
+ res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
2291
2302
  res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
2292
2303
  if (req.method === "OPTIONS") {
2293
2304
  res.writeHead(204);
2294
2305
  res.end();
2295
2306
  return;
2296
2307
  }
2297
- if (req.method !== "POST" || !req.url?.includes("/chat/completions")) {
2298
- if (req.method === "GET" && req.url?.includes("/models")) {
2299
- res.writeHead(200, { "Content-Type": "application/json" });
2300
- res.end(
2301
- JSON.stringify({
2302
- object: "list",
2303
- data: [
2304
- { id: "relayplane:auto", object: "model", owned_by: "relayplane" },
2305
- { id: "relayplane:cost", object: "model", owned_by: "relayplane" },
2306
- { id: "relayplane:quality", object: "model", owned_by: "relayplane" }
2307
- ]
2308
- })
2309
- );
2310
- return;
2308
+ const parsedUrl = url.parse(req.url || "", true);
2309
+ const pathname = parsedUrl.pathname || "";
2310
+ if (req.method === "GET" && pathname === "/health") {
2311
+ const uptimeMs = Date.now() - serverStartTime;
2312
+ const uptimeSecs = Math.floor(uptimeMs / 1e3);
2313
+ const hours = Math.floor(uptimeSecs / 3600);
2314
+ const mins = Math.floor(uptimeSecs % 3600 / 60);
2315
+ const secs = uptimeSecs % 60;
2316
+ const providers = {};
2317
+ for (const [name, config2] of Object.entries(DEFAULT_ENDPOINTS)) {
2318
+ providers[name] = !!process.env[config2.apiKeyEnv];
2311
2319
  }
2320
+ res.writeHead(200, { "Content-Type": "application/json" });
2321
+ res.end(JSON.stringify({
2322
+ status: "ok",
2323
+ version: VERSION,
2324
+ uptime: `${hours}h ${mins}m ${secs}s`,
2325
+ uptimeMs,
2326
+ providers,
2327
+ totalRuns: recentRuns.length > 0 ? Object.values(modelCounts).reduce((a, b) => a + b, 0) : 0
2328
+ }));
2329
+ return;
2330
+ }
2331
+ if (req.method === "GET" && pathname === "/stats") {
2332
+ const stats = relay.stats();
2333
+ const savings = relay.savingsReport(30);
2334
+ const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
2335
+ const modelDistribution = {};
2336
+ for (const [model, count] of Object.entries(modelCounts)) {
2337
+ modelDistribution[model] = {
2338
+ count,
2339
+ percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
2340
+ };
2341
+ }
2342
+ res.writeHead(200, { "Content-Type": "application/json" });
2343
+ res.end(JSON.stringify({
2344
+ totalRuns,
2345
+ savings: {
2346
+ estimatedSavingsPercent: savings.savingsPercent.toFixed(1) + "%",
2347
+ actualCostUsd: savings.actualCost.toFixed(4),
2348
+ baselineCostUsd: savings.baselineCost.toFixed(4),
2349
+ savedUsd: savings.savings.toFixed(4)
2350
+ },
2351
+ modelDistribution,
2352
+ byTaskType: stats.byTaskType,
2353
+ period: stats.period
2354
+ }));
2355
+ return;
2356
+ }
2357
+ if (req.method === "GET" && pathname === "/runs") {
2358
+ const limitParam = parsedUrl.query["limit"];
2359
+ const parsedLimit = limitParam ? parseInt(String(limitParam), 10) : 20;
2360
+ const limit = Math.min(Number.isNaN(parsedLimit) ? 20 : parsedLimit, MAX_RECENT_RUNS);
2361
+ res.writeHead(200, { "Content-Type": "application/json" });
2362
+ res.end(JSON.stringify({
2363
+ runs: recentRuns.slice(0, limit),
2364
+ total: recentRuns.length
2365
+ }));
2366
+ return;
2367
+ }
2368
+ if (req.method === "GET" && pathname.includes("/models")) {
2369
+ res.writeHead(200, { "Content-Type": "application/json" });
2370
+ res.end(
2371
+ JSON.stringify({
2372
+ object: "list",
2373
+ data: [
2374
+ { id: "relayplane:auto", object: "model", owned_by: "relayplane" },
2375
+ { id: "relayplane:cost", object: "model", owned_by: "relayplane" },
2376
+ { id: "relayplane:quality", object: "model", owned_by: "relayplane" }
2377
+ ]
2378
+ })
2379
+ );
2380
+ return;
2381
+ }
2382
+ if (req.method !== "POST" || !pathname.includes("/chat/completions")) {
2312
2383
  res.writeHead(404, { "Content-Type": "application/json" });
2313
2384
  res.end(JSON.stringify({ error: "Not found" }));
2314
2385
  return;
@@ -2431,9 +2502,11 @@ async function startProxy(config = {}) {
2431
2502
  return new Promise((resolve, reject) => {
2432
2503
  server.on("error", reject);
2433
2504
  server.listen(port, host, () => {
2505
+ serverStartTime = Date.now();
2434
2506
  console.log(`RelayPlane proxy listening on http://${host}:${port}`);
2435
2507
  console.log(` Models: relayplane:auto, relayplane:cost, relayplane:quality`);
2436
2508
  console.log(` Endpoint: POST /v1/chat/completions`);
2509
+ console.log(` Stats: GET /stats, /runs, /health`);
2437
2510
  console.log(` Streaming: \u2705 Enabled`);
2438
2511
  resolve(server);
2439
2512
  });
@@ -2496,11 +2569,26 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2496
2569
  log(`Streaming error: ${err}`);
2497
2570
  }
2498
2571
  const durationMs = Date.now() - startTime;
2572
+ const modelKey = `${targetProvider}/${targetModel}`;
2573
+ modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2499
2574
  relay.run({
2500
2575
  prompt: promptText.slice(0, 500),
2501
2576
  taskType,
2502
2577
  model: `${targetProvider}:${targetModel}`
2503
2578
  }).then((runResult) => {
2579
+ recentRuns.unshift({
2580
+ runId: runResult.runId,
2581
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2582
+ model: modelKey,
2583
+ taskType,
2584
+ confidence,
2585
+ mode: routingMode,
2586
+ durationMs,
2587
+ promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
2588
+ });
2589
+ if (recentRuns.length > MAX_RECENT_RUNS) {
2590
+ recentRuns.pop();
2591
+ }
2504
2592
  log(`Completed streaming in ${durationMs}ms, runId: ${runResult.runId}`);
2505
2593
  }).catch((err) => {
2506
2594
  log(`Failed to record run: ${err}`);
@@ -2571,15 +2659,30 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2571
2659
  return;
2572
2660
  }
2573
2661
  const durationMs = Date.now() - startTime;
2662
+ const modelKey = `${targetProvider}/${targetModel}`;
2663
+ modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2574
2664
  try {
2575
2665
  const runResult = await relay.run({
2576
2666
  prompt: promptText.slice(0, 500),
2577
2667
  taskType,
2578
2668
  model: `${targetProvider}:${targetModel}`
2579
2669
  });
2670
+ recentRuns.unshift({
2671
+ runId: runResult.runId,
2672
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2673
+ model: modelKey,
2674
+ taskType,
2675
+ confidence,
2676
+ mode: routingMode,
2677
+ durationMs,
2678
+ promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
2679
+ });
2680
+ if (recentRuns.length > MAX_RECENT_RUNS) {
2681
+ recentRuns.pop();
2682
+ }
2580
2683
  responseData["_relayplane"] = {
2581
2684
  runId: runResult.runId,
2582
- routedTo: `${targetProvider}/${targetModel}`,
2685
+ routedTo: modelKey,
2583
2686
  taskType,
2584
2687
  confidence,
2585
2688
  durationMs,