@relayplane/proxy 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  // src/proxy.ts
4
4
  import * as http from "http";
5
+ import * as url from "url";
5
6
 
6
7
  // src/storage/store.ts
7
8
  import Database from "better-sqlite3";
@@ -132,11 +133,13 @@ CREATE TABLE IF NOT EXISTS schema_version (
132
133
  INSERT OR IGNORE INTO schema_version (version) VALUES (1);
133
134
  `;
134
135
  var DEFAULT_ROUTING_RULES = [
135
- { taskType: "code_generation", preferredModel: "anthropic:claude-3-5-haiku-latest" },
136
- { taskType: "code_review", preferredModel: "anthropic:claude-3-5-haiku-latest" },
136
+ // Complex tasks Sonnet (need reasoning & quality)
137
+ { taskType: "code_generation", preferredModel: "anthropic:claude-sonnet-4-20250514" },
138
+ { taskType: "code_review", preferredModel: "anthropic:claude-sonnet-4-20250514" },
139
+ { taskType: "analysis", preferredModel: "anthropic:claude-sonnet-4-20250514" },
140
+ { taskType: "creative_writing", preferredModel: "anthropic:claude-sonnet-4-20250514" },
141
+ // Simple tasks → Haiku (cost efficient)
137
142
  { taskType: "summarization", preferredModel: "anthropic:claude-3-5-haiku-latest" },
138
- { taskType: "analysis", preferredModel: "anthropic:claude-3-5-haiku-latest" },
139
- { taskType: "creative_writing", preferredModel: "anthropic:claude-3-5-haiku-latest" },
140
143
  { taskType: "data_extraction", preferredModel: "anthropic:claude-3-5-haiku-latest" },
141
144
  { taskType: "translation", preferredModel: "anthropic:claude-3-5-haiku-latest" },
142
145
  { taskType: "question_answering", preferredModel: "anthropic:claude-3-5-haiku-latest" },
@@ -1566,6 +1569,11 @@ ${input.prompt}` : input.prompt;
1566
1569
  };
1567
1570
 
1568
1571
  // src/proxy.ts
1572
+ var VERSION = "0.1.7";
1573
+ var recentRuns = [];
1574
+ var MAX_RECENT_RUNS = 100;
1575
+ var modelCounts = {};
1576
+ var serverStartTime = 0;
1569
1577
  var DEFAULT_ENDPOINTS = {
1570
1578
  anthropic: {
1571
1579
  baseUrl: "https://api.anthropic.com/v1",
@@ -1603,11 +1611,14 @@ var MODEL_MAPPING = {
1603
1611
  "gpt-4.1": { provider: "openai", model: "gpt-4.1" }
1604
1612
  };
1605
1613
  var DEFAULT_ROUTING = {
1606
- code_generation: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1607
- code_review: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1614
+ // Complex tasks Sonnet (need reasoning & quality)
1615
+ code_review: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1616
+ analysis: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1617
+ creative_writing: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1618
+ // Medium tasks → Sonnet (benefit from better model)
1619
+ code_generation: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1620
+ // Simple tasks → Haiku (cost efficient)
1608
1621
  summarization: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1609
- analysis: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1610
- creative_writing: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1611
1622
  data_extraction: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1612
1623
  translation: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1613
1624
  question_answering: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
@@ -1859,9 +1870,9 @@ function convertMessagesToGemini(messages) {
1859
1870
  return { text: p.text };
1860
1871
  }
1861
1872
  if (p.type === "image_url" && p.image_url?.url) {
1862
- const url = p.image_url.url;
1863
- if (url.startsWith("data:")) {
1864
- const match = url.match(/^data:([^;]+);base64,(.+)$/);
1873
+ const url2 = p.image_url.url;
1874
+ if (url2.startsWith("data:")) {
1875
+ const match = url2.match(/^data:([^;]+);base64,(.+)$/);
1865
1876
  if (match) {
1866
1877
  return {
1867
1878
  inline_data: {
@@ -1871,7 +1882,7 @@ function convertMessagesToGemini(messages) {
1871
1882
  };
1872
1883
  }
1873
1884
  }
1874
- return { text: `[Image: ${url}]` };
1885
+ return { text: `[Image: ${url2}]` };
1875
1886
  }
1876
1887
  return { text: "" };
1877
1888
  });
@@ -2285,28 +2296,88 @@ async function startProxy(config = {}) {
2285
2296
  };
2286
2297
  const server = http.createServer(async (req, res) => {
2287
2298
  res.setHeader("Access-Control-Allow-Origin", "*");
2288
- res.setHeader("Access-Control-Allow-Methods", "POST, OPTIONS");
2299
+ res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
2289
2300
  res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
2290
2301
  if (req.method === "OPTIONS") {
2291
2302
  res.writeHead(204);
2292
2303
  res.end();
2293
2304
  return;
2294
2305
  }
2295
- if (req.method !== "POST" || !req.url?.includes("/chat/completions")) {
2296
- if (req.method === "GET" && req.url?.includes("/models")) {
2297
- res.writeHead(200, { "Content-Type": "application/json" });
2298
- res.end(
2299
- JSON.stringify({
2300
- object: "list",
2301
- data: [
2302
- { id: "relayplane:auto", object: "model", owned_by: "relayplane" },
2303
- { id: "relayplane:cost", object: "model", owned_by: "relayplane" },
2304
- { id: "relayplane:quality", object: "model", owned_by: "relayplane" }
2305
- ]
2306
- })
2307
- );
2308
- return;
2306
+ const parsedUrl = url.parse(req.url || "", true);
2307
+ const pathname = parsedUrl.pathname || "";
2308
+ if (req.method === "GET" && pathname === "/health") {
2309
+ const uptimeMs = Date.now() - serverStartTime;
2310
+ const uptimeSecs = Math.floor(uptimeMs / 1e3);
2311
+ const hours = Math.floor(uptimeSecs / 3600);
2312
+ const mins = Math.floor(uptimeSecs % 3600 / 60);
2313
+ const secs = uptimeSecs % 60;
2314
+ const providers = {};
2315
+ for (const [name, config2] of Object.entries(DEFAULT_ENDPOINTS)) {
2316
+ providers[name] = !!process.env[config2.apiKeyEnv];
2309
2317
  }
2318
+ res.writeHead(200, { "Content-Type": "application/json" });
2319
+ res.end(JSON.stringify({
2320
+ status: "ok",
2321
+ version: VERSION,
2322
+ uptime: `${hours}h ${mins}m ${secs}s`,
2323
+ uptimeMs,
2324
+ providers,
2325
+ totalRuns: recentRuns.length > 0 ? Object.values(modelCounts).reduce((a, b) => a + b, 0) : 0
2326
+ }));
2327
+ return;
2328
+ }
2329
+ if (req.method === "GET" && pathname === "/stats") {
2330
+ const stats = relay.stats();
2331
+ const savings = relay.savingsReport(30);
2332
+ const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
2333
+ const modelDistribution = {};
2334
+ for (const [model, count] of Object.entries(modelCounts)) {
2335
+ modelDistribution[model] = {
2336
+ count,
2337
+ percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
2338
+ };
2339
+ }
2340
+ res.writeHead(200, { "Content-Type": "application/json" });
2341
+ res.end(JSON.stringify({
2342
+ totalRuns,
2343
+ savings: {
2344
+ estimatedSavingsPercent: savings.savingsPercent.toFixed(1) + "%",
2345
+ actualCostUsd: savings.actualCost.toFixed(4),
2346
+ baselineCostUsd: savings.baselineCost.toFixed(4),
2347
+ savedUsd: savings.savings.toFixed(4)
2348
+ },
2349
+ modelDistribution,
2350
+ byTaskType: stats.byTaskType,
2351
+ period: stats.period
2352
+ }));
2353
+ return;
2354
+ }
2355
+ if (req.method === "GET" && pathname === "/runs") {
2356
+ const limitParam = parsedUrl.query["limit"];
2357
+ const parsedLimit = limitParam ? parseInt(String(limitParam), 10) : 20;
2358
+ const limit = Math.min(Number.isNaN(parsedLimit) ? 20 : parsedLimit, MAX_RECENT_RUNS);
2359
+ res.writeHead(200, { "Content-Type": "application/json" });
2360
+ res.end(JSON.stringify({
2361
+ runs: recentRuns.slice(0, limit),
2362
+ total: recentRuns.length
2363
+ }));
2364
+ return;
2365
+ }
2366
+ if (req.method === "GET" && pathname.includes("/models")) {
2367
+ res.writeHead(200, { "Content-Type": "application/json" });
2368
+ res.end(
2369
+ JSON.stringify({
2370
+ object: "list",
2371
+ data: [
2372
+ { id: "relayplane:auto", object: "model", owned_by: "relayplane" },
2373
+ { id: "relayplane:cost", object: "model", owned_by: "relayplane" },
2374
+ { id: "relayplane:quality", object: "model", owned_by: "relayplane" }
2375
+ ]
2376
+ })
2377
+ );
2378
+ return;
2379
+ }
2380
+ if (req.method !== "POST" || !pathname.includes("/chat/completions")) {
2310
2381
  res.writeHead(404, { "Content-Type": "application/json" });
2311
2382
  res.end(JSON.stringify({ error: "Not found" }));
2312
2383
  return;
@@ -2429,9 +2500,11 @@ async function startProxy(config = {}) {
2429
2500
  return new Promise((resolve, reject) => {
2430
2501
  server.on("error", reject);
2431
2502
  server.listen(port, host, () => {
2503
+ serverStartTime = Date.now();
2432
2504
  console.log(`RelayPlane proxy listening on http://${host}:${port}`);
2433
2505
  console.log(` Models: relayplane:auto, relayplane:cost, relayplane:quality`);
2434
2506
  console.log(` Endpoint: POST /v1/chat/completions`);
2507
+ console.log(` Stats: GET /stats, /runs, /health`);
2435
2508
  console.log(` Streaming: \u2705 Enabled`);
2436
2509
  resolve(server);
2437
2510
  });
@@ -2494,11 +2567,26 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2494
2567
  log(`Streaming error: ${err}`);
2495
2568
  }
2496
2569
  const durationMs = Date.now() - startTime;
2570
+ const modelKey = `${targetProvider}/${targetModel}`;
2571
+ modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2497
2572
  relay.run({
2498
2573
  prompt: promptText.slice(0, 500),
2499
2574
  taskType,
2500
2575
  model: `${targetProvider}:${targetModel}`
2501
2576
  }).then((runResult) => {
2577
+ recentRuns.unshift({
2578
+ runId: runResult.runId,
2579
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2580
+ model: modelKey,
2581
+ taskType,
2582
+ confidence,
2583
+ mode: routingMode,
2584
+ durationMs,
2585
+ promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
2586
+ });
2587
+ if (recentRuns.length > MAX_RECENT_RUNS) {
2588
+ recentRuns.pop();
2589
+ }
2502
2590
  log(`Completed streaming in ${durationMs}ms, runId: ${runResult.runId}`);
2503
2591
  }).catch((err) => {
2504
2592
  log(`Failed to record run: ${err}`);
@@ -2569,15 +2657,30 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2569
2657
  return;
2570
2658
  }
2571
2659
  const durationMs = Date.now() - startTime;
2660
+ const modelKey = `${targetProvider}/${targetModel}`;
2661
+ modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2572
2662
  try {
2573
2663
  const runResult = await relay.run({
2574
2664
  prompt: promptText.slice(0, 500),
2575
2665
  taskType,
2576
2666
  model: `${targetProvider}:${targetModel}`
2577
2667
  });
2668
+ recentRuns.unshift({
2669
+ runId: runResult.runId,
2670
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2671
+ model: modelKey,
2672
+ taskType,
2673
+ confidence,
2674
+ mode: routingMode,
2675
+ durationMs,
2676
+ promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
2677
+ });
2678
+ if (recentRuns.length > MAX_RECENT_RUNS) {
2679
+ recentRuns.pop();
2680
+ }
2578
2681
  responseData["_relayplane"] = {
2579
2682
  runId: runResult.runId,
2580
- routedTo: `${targetProvider}/${targetModel}`,
2683
+ routedTo: modelKey,
2581
2684
  taskType,
2582
2685
  confidence,
2583
2686
  durationMs,