@relayplane/proxy 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -53,6 +53,7 @@ module.exports = __toCommonJS(index_exports);
53
53
 
54
54
  // src/proxy.ts
55
55
  var http = __toESM(require("http"));
56
+ var url = __toESM(require("url"));
56
57
 
57
58
  // src/storage/store.ts
58
59
  var import_better_sqlite3 = __toESM(require("better-sqlite3"));
@@ -183,11 +184,13 @@ CREATE TABLE IF NOT EXISTS schema_version (
183
184
  INSERT OR IGNORE INTO schema_version (version) VALUES (1);
184
185
  `;
185
186
  var DEFAULT_ROUTING_RULES = [
186
- { taskType: "code_generation", preferredModel: "anthropic:claude-3-5-haiku-latest" },
187
- { taskType: "code_review", preferredModel: "anthropic:claude-3-5-haiku-latest" },
187
+ // Complex tasks Sonnet (need reasoning & quality)
188
+ { taskType: "code_generation", preferredModel: "anthropic:claude-sonnet-4-20250514" },
189
+ { taskType: "code_review", preferredModel: "anthropic:claude-sonnet-4-20250514" },
190
+ { taskType: "analysis", preferredModel: "anthropic:claude-sonnet-4-20250514" },
191
+ { taskType: "creative_writing", preferredModel: "anthropic:claude-sonnet-4-20250514" },
192
+ // Simple tasks → Haiku (cost efficient)
188
193
  { taskType: "summarization", preferredModel: "anthropic:claude-3-5-haiku-latest" },
189
- { taskType: "analysis", preferredModel: "anthropic:claude-3-5-haiku-latest" },
190
- { taskType: "creative_writing", preferredModel: "anthropic:claude-3-5-haiku-latest" },
191
194
  { taskType: "data_extraction", preferredModel: "anthropic:claude-3-5-haiku-latest" },
192
195
  { taskType: "translation", preferredModel: "anthropic:claude-3-5-haiku-latest" },
193
196
  { taskType: "question_answering", preferredModel: "anthropic:claude-3-5-haiku-latest" },
@@ -1621,6 +1624,11 @@ ${input.prompt}` : input.prompt;
1621
1624
  };
1622
1625
 
1623
1626
  // src/proxy.ts
1627
+ var VERSION = "0.1.7";
1628
+ var recentRuns = [];
1629
+ var MAX_RECENT_RUNS = 100;
1630
+ var modelCounts = {};
1631
+ var serverStartTime = 0;
1624
1632
  var DEFAULT_ENDPOINTS = {
1625
1633
  anthropic: {
1626
1634
  baseUrl: "https://api.anthropic.com/v1",
@@ -1658,11 +1666,14 @@ var MODEL_MAPPING = {
1658
1666
  "gpt-4.1": { provider: "openai", model: "gpt-4.1" }
1659
1667
  };
1660
1668
  var DEFAULT_ROUTING = {
1661
- code_generation: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1662
- code_review: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1669
+ // Complex tasks Sonnet (need reasoning & quality)
1670
+ code_review: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1671
+ analysis: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1672
+ creative_writing: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1673
+ // Medium tasks → Sonnet (benefit from better model)
1674
+ code_generation: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1675
+ // Simple tasks → Haiku (cost efficient)
1663
1676
  summarization: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1664
- analysis: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1665
- creative_writing: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1666
1677
  data_extraction: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1667
1678
  translation: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1668
1679
  question_answering: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
@@ -1914,9 +1925,9 @@ function convertMessagesToGemini(messages) {
1914
1925
  return { text: p.text };
1915
1926
  }
1916
1927
  if (p.type === "image_url" && p.image_url?.url) {
1917
- const url = p.image_url.url;
1918
- if (url.startsWith("data:")) {
1919
- const match = url.match(/^data:([^;]+);base64,(.+)$/);
1928
+ const url2 = p.image_url.url;
1929
+ if (url2.startsWith("data:")) {
1930
+ const match = url2.match(/^data:([^;]+);base64,(.+)$/);
1920
1931
  if (match) {
1921
1932
  return {
1922
1933
  inline_data: {
@@ -1926,7 +1937,7 @@ function convertMessagesToGemini(messages) {
1926
1937
  };
1927
1938
  }
1928
1939
  }
1929
- return { text: `[Image: ${url}]` };
1940
+ return { text: `[Image: ${url2}]` };
1930
1941
  }
1931
1942
  return { text: "" };
1932
1943
  });
@@ -2340,28 +2351,88 @@ async function startProxy(config = {}) {
2340
2351
  };
2341
2352
  const server = http.createServer(async (req, res) => {
2342
2353
  res.setHeader("Access-Control-Allow-Origin", "*");
2343
- res.setHeader("Access-Control-Allow-Methods", "POST, OPTIONS");
2354
+ res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
2344
2355
  res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
2345
2356
  if (req.method === "OPTIONS") {
2346
2357
  res.writeHead(204);
2347
2358
  res.end();
2348
2359
  return;
2349
2360
  }
2350
- if (req.method !== "POST" || !req.url?.includes("/chat/completions")) {
2351
- if (req.method === "GET" && req.url?.includes("/models")) {
2352
- res.writeHead(200, { "Content-Type": "application/json" });
2353
- res.end(
2354
- JSON.stringify({
2355
- object: "list",
2356
- data: [
2357
- { id: "relayplane:auto", object: "model", owned_by: "relayplane" },
2358
- { id: "relayplane:cost", object: "model", owned_by: "relayplane" },
2359
- { id: "relayplane:quality", object: "model", owned_by: "relayplane" }
2360
- ]
2361
- })
2362
- );
2363
- return;
2361
+ const parsedUrl = url.parse(req.url || "", true);
2362
+ const pathname = parsedUrl.pathname || "";
2363
+ if (req.method === "GET" && pathname === "/health") {
2364
+ const uptimeMs = Date.now() - serverStartTime;
2365
+ const uptimeSecs = Math.floor(uptimeMs / 1e3);
2366
+ const hours = Math.floor(uptimeSecs / 3600);
2367
+ const mins = Math.floor(uptimeSecs % 3600 / 60);
2368
+ const secs = uptimeSecs % 60;
2369
+ const providers = {};
2370
+ for (const [name, config2] of Object.entries(DEFAULT_ENDPOINTS)) {
2371
+ providers[name] = !!process.env[config2.apiKeyEnv];
2364
2372
  }
2373
+ res.writeHead(200, { "Content-Type": "application/json" });
2374
+ res.end(JSON.stringify({
2375
+ status: "ok",
2376
+ version: VERSION,
2377
+ uptime: `${hours}h ${mins}m ${secs}s`,
2378
+ uptimeMs,
2379
+ providers,
2380
+ totalRuns: recentRuns.length > 0 ? Object.values(modelCounts).reduce((a, b) => a + b, 0) : 0
2381
+ }));
2382
+ return;
2383
+ }
2384
+ if (req.method === "GET" && pathname === "/stats") {
2385
+ const stats = relay.stats();
2386
+ const savings = relay.savingsReport(30);
2387
+ const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
2388
+ const modelDistribution = {};
2389
+ for (const [model, count] of Object.entries(modelCounts)) {
2390
+ modelDistribution[model] = {
2391
+ count,
2392
+ percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
2393
+ };
2394
+ }
2395
+ res.writeHead(200, { "Content-Type": "application/json" });
2396
+ res.end(JSON.stringify({
2397
+ totalRuns,
2398
+ savings: {
2399
+ estimatedSavingsPercent: savings.savingsPercent.toFixed(1) + "%",
2400
+ actualCostUsd: savings.actualCost.toFixed(4),
2401
+ baselineCostUsd: savings.baselineCost.toFixed(4),
2402
+ savedUsd: savings.savings.toFixed(4)
2403
+ },
2404
+ modelDistribution,
2405
+ byTaskType: stats.byTaskType,
2406
+ period: stats.period
2407
+ }));
2408
+ return;
2409
+ }
2410
+ if (req.method === "GET" && pathname === "/runs") {
2411
+ const limitParam = parsedUrl.query["limit"];
2412
+ const parsedLimit = limitParam ? parseInt(String(limitParam), 10) : 20;
2413
+ const limit = Math.min(Number.isNaN(parsedLimit) ? 20 : parsedLimit, MAX_RECENT_RUNS);
2414
+ res.writeHead(200, { "Content-Type": "application/json" });
2415
+ res.end(JSON.stringify({
2416
+ runs: recentRuns.slice(0, limit),
2417
+ total: recentRuns.length
2418
+ }));
2419
+ return;
2420
+ }
2421
+ if (req.method === "GET" && pathname.includes("/models")) {
2422
+ res.writeHead(200, { "Content-Type": "application/json" });
2423
+ res.end(
2424
+ JSON.stringify({
2425
+ object: "list",
2426
+ data: [
2427
+ { id: "relayplane:auto", object: "model", owned_by: "relayplane" },
2428
+ { id: "relayplane:cost", object: "model", owned_by: "relayplane" },
2429
+ { id: "relayplane:quality", object: "model", owned_by: "relayplane" }
2430
+ ]
2431
+ })
2432
+ );
2433
+ return;
2434
+ }
2435
+ if (req.method !== "POST" || !pathname.includes("/chat/completions")) {
2365
2436
  res.writeHead(404, { "Content-Type": "application/json" });
2366
2437
  res.end(JSON.stringify({ error: "Not found" }));
2367
2438
  return;
@@ -2484,9 +2555,11 @@ async function startProxy(config = {}) {
2484
2555
  return new Promise((resolve, reject) => {
2485
2556
  server.on("error", reject);
2486
2557
  server.listen(port, host, () => {
2558
+ serverStartTime = Date.now();
2487
2559
  console.log(`RelayPlane proxy listening on http://${host}:${port}`);
2488
2560
  console.log(` Models: relayplane:auto, relayplane:cost, relayplane:quality`);
2489
2561
  console.log(` Endpoint: POST /v1/chat/completions`);
2562
+ console.log(` Stats: GET /stats, /runs, /health`);
2490
2563
  console.log(` Streaming: \u2705 Enabled`);
2491
2564
  resolve(server);
2492
2565
  });
@@ -2549,11 +2622,26 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2549
2622
  log(`Streaming error: ${err}`);
2550
2623
  }
2551
2624
  const durationMs = Date.now() - startTime;
2625
+ const modelKey = `${targetProvider}/${targetModel}`;
2626
+ modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2552
2627
  relay.run({
2553
2628
  prompt: promptText.slice(0, 500),
2554
2629
  taskType,
2555
2630
  model: `${targetProvider}:${targetModel}`
2556
2631
  }).then((runResult) => {
2632
+ recentRuns.unshift({
2633
+ runId: runResult.runId,
2634
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2635
+ model: modelKey,
2636
+ taskType,
2637
+ confidence,
2638
+ mode: routingMode,
2639
+ durationMs,
2640
+ promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
2641
+ });
2642
+ if (recentRuns.length > MAX_RECENT_RUNS) {
2643
+ recentRuns.pop();
2644
+ }
2557
2645
  log(`Completed streaming in ${durationMs}ms, runId: ${runResult.runId}`);
2558
2646
  }).catch((err) => {
2559
2647
  log(`Failed to record run: ${err}`);
@@ -2624,15 +2712,30 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2624
2712
  return;
2625
2713
  }
2626
2714
  const durationMs = Date.now() - startTime;
2715
+ const modelKey = `${targetProvider}/${targetModel}`;
2716
+ modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2627
2717
  try {
2628
2718
  const runResult = await relay.run({
2629
2719
  prompt: promptText.slice(0, 500),
2630
2720
  taskType,
2631
2721
  model: `${targetProvider}:${targetModel}`
2632
2722
  });
2723
+ recentRuns.unshift({
2724
+ runId: runResult.runId,
2725
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2726
+ model: modelKey,
2727
+ taskType,
2728
+ confidence,
2729
+ mode: routingMode,
2730
+ durationMs,
2731
+ promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
2732
+ });
2733
+ if (recentRuns.length > MAX_RECENT_RUNS) {
2734
+ recentRuns.pop();
2735
+ }
2633
2736
  responseData["_relayplane"] = {
2634
2737
  runId: runResult.runId,
2635
- routedTo: `${targetProvider}/${targetModel}`,
2738
+ routedTo: modelKey,
2636
2739
  taskType,
2637
2740
  confidence,
2638
2741
  durationMs,