@relayplane/proxy 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -94,11 +94,14 @@ RelayPlane doesn't just route — it **learns from every request**:
94
94
  - **Local Intelligence** — All learning happens in your local SQLite DB
95
95
 
96
96
  ```bash
97
- # View your routing stats
97
+ # View your routing stats (last 7 days)
98
98
  npx @relayplane/proxy stats
99
99
 
100
- # Query the raw data
101
- sqlite3 ~/.relayplane/data.db "SELECT model, outcome, COUNT(*) FROM runs GROUP BY model, outcome"
100
+ # View last 30 days
101
+ npx @relayplane/proxy stats --days 30
102
+
103
+ # Query the raw data directly
104
+ sqlite3 ~/.relayplane/data.db "SELECT model, task_type, COUNT(*) FROM runs GROUP BY model, task_type"
102
105
  ```
103
106
 
104
107
  Unlike static routing rules, RelayPlane adapts to **your** usage patterns.
package/dist/cli.js CHANGED
@@ -25,6 +25,7 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
25
25
 
26
26
  // src/proxy.ts
27
27
  var http = __toESM(require("http"));
28
+ var url = __toESM(require("url"));
28
29
 
29
30
  // src/storage/store.ts
30
31
  var import_better_sqlite3 = __toESM(require("better-sqlite3"));
@@ -155,11 +156,13 @@ CREATE TABLE IF NOT EXISTS schema_version (
155
156
  INSERT OR IGNORE INTO schema_version (version) VALUES (1);
156
157
  `;
157
158
  var DEFAULT_ROUTING_RULES = [
158
- { taskType: "code_generation", preferredModel: "anthropic:claude-3-5-haiku-latest" },
159
- { taskType: "code_review", preferredModel: "anthropic:claude-3-5-haiku-latest" },
159
+ // Complex tasks Sonnet (need reasoning & quality)
160
+ { taskType: "code_generation", preferredModel: "anthropic:claude-sonnet-4-20250514" },
161
+ { taskType: "code_review", preferredModel: "anthropic:claude-sonnet-4-20250514" },
162
+ { taskType: "analysis", preferredModel: "anthropic:claude-sonnet-4-20250514" },
163
+ { taskType: "creative_writing", preferredModel: "anthropic:claude-sonnet-4-20250514" },
164
+ // Simple tasks → Haiku (cost efficient)
160
165
  { taskType: "summarization", preferredModel: "anthropic:claude-3-5-haiku-latest" },
161
- { taskType: "analysis", preferredModel: "anthropic:claude-3-5-haiku-latest" },
162
- { taskType: "creative_writing", preferredModel: "anthropic:claude-3-5-haiku-latest" },
163
166
  { taskType: "data_extraction", preferredModel: "anthropic:claude-3-5-haiku-latest" },
164
167
  { taskType: "translation", preferredModel: "anthropic:claude-3-5-haiku-latest" },
165
168
  { taskType: "question_answering", preferredModel: "anthropic:claude-3-5-haiku-latest" },
@@ -1589,6 +1592,11 @@ ${input.prompt}` : input.prompt;
1589
1592
  };
1590
1593
 
1591
1594
  // src/proxy.ts
1595
+ var VERSION = "0.1.7";
1596
+ var recentRuns = [];
1597
+ var MAX_RECENT_RUNS = 100;
1598
+ var modelCounts = {};
1599
+ var serverStartTime = 0;
1592
1600
  var DEFAULT_ENDPOINTS = {
1593
1601
  anthropic: {
1594
1602
  baseUrl: "https://api.anthropic.com/v1",
@@ -1626,11 +1634,14 @@ var MODEL_MAPPING = {
1626
1634
  "gpt-4.1": { provider: "openai", model: "gpt-4.1" }
1627
1635
  };
1628
1636
  var DEFAULT_ROUTING = {
1629
- code_generation: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1630
- code_review: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1637
+ // Complex tasks Sonnet (need reasoning & quality)
1638
+ code_review: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1639
+ analysis: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1640
+ creative_writing: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1641
+ // Medium tasks → Sonnet (benefit from better model)
1642
+ code_generation: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
1643
+ // Simple tasks → Haiku (cost efficient)
1631
1644
  summarization: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1632
- analysis: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1633
- creative_writing: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1634
1645
  data_extraction: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1635
1646
  translation: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
1636
1647
  question_answering: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
@@ -1882,9 +1893,9 @@ function convertMessagesToGemini(messages) {
1882
1893
  return { text: p.text };
1883
1894
  }
1884
1895
  if (p.type === "image_url" && p.image_url?.url) {
1885
- const url = p.image_url.url;
1886
- if (url.startsWith("data:")) {
1887
- const match = url.match(/^data:([^;]+);base64,(.+)$/);
1896
+ const url2 = p.image_url.url;
1897
+ if (url2.startsWith("data:")) {
1898
+ const match = url2.match(/^data:([^;]+);base64,(.+)$/);
1888
1899
  if (match) {
1889
1900
  return {
1890
1901
  inline_data: {
@@ -1894,7 +1905,7 @@ function convertMessagesToGemini(messages) {
1894
1905
  };
1895
1906
  }
1896
1907
  }
1897
- return { text: `[Image: ${url}]` };
1908
+ return { text: `[Image: ${url2}]` };
1898
1909
  }
1899
1910
  return { text: "" };
1900
1911
  });
@@ -2308,28 +2319,88 @@ async function startProxy(config = {}) {
2308
2319
  };
2309
2320
  const server = http.createServer(async (req, res) => {
2310
2321
  res.setHeader("Access-Control-Allow-Origin", "*");
2311
- res.setHeader("Access-Control-Allow-Methods", "POST, OPTIONS");
2322
+ res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
2312
2323
  res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
2313
2324
  if (req.method === "OPTIONS") {
2314
2325
  res.writeHead(204);
2315
2326
  res.end();
2316
2327
  return;
2317
2328
  }
2318
- if (req.method !== "POST" || !req.url?.includes("/chat/completions")) {
2319
- if (req.method === "GET" && req.url?.includes("/models")) {
2320
- res.writeHead(200, { "Content-Type": "application/json" });
2321
- res.end(
2322
- JSON.stringify({
2323
- object: "list",
2324
- data: [
2325
- { id: "relayplane:auto", object: "model", owned_by: "relayplane" },
2326
- { id: "relayplane:cost", object: "model", owned_by: "relayplane" },
2327
- { id: "relayplane:quality", object: "model", owned_by: "relayplane" }
2328
- ]
2329
- })
2330
- );
2331
- return;
2329
+ const parsedUrl = url.parse(req.url || "", true);
2330
+ const pathname = parsedUrl.pathname || "";
2331
+ if (req.method === "GET" && pathname === "/health") {
2332
+ const uptimeMs = Date.now() - serverStartTime;
2333
+ const uptimeSecs = Math.floor(uptimeMs / 1e3);
2334
+ const hours = Math.floor(uptimeSecs / 3600);
2335
+ const mins = Math.floor(uptimeSecs % 3600 / 60);
2336
+ const secs = uptimeSecs % 60;
2337
+ const providers = {};
2338
+ for (const [name, config2] of Object.entries(DEFAULT_ENDPOINTS)) {
2339
+ providers[name] = !!process.env[config2.apiKeyEnv];
2332
2340
  }
2341
+ res.writeHead(200, { "Content-Type": "application/json" });
2342
+ res.end(JSON.stringify({
2343
+ status: "ok",
2344
+ version: VERSION,
2345
+ uptime: `${hours}h ${mins}m ${secs}s`,
2346
+ uptimeMs,
2347
+ providers,
2348
+ totalRuns: recentRuns.length > 0 ? Object.values(modelCounts).reduce((a, b) => a + b, 0) : 0
2349
+ }));
2350
+ return;
2351
+ }
2352
+ if (req.method === "GET" && pathname === "/stats") {
2353
+ const stats = relay.stats();
2354
+ const savings = relay.savingsReport(30);
2355
+ const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
2356
+ const modelDistribution = {};
2357
+ for (const [model, count] of Object.entries(modelCounts)) {
2358
+ modelDistribution[model] = {
2359
+ count,
2360
+ percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
2361
+ };
2362
+ }
2363
+ res.writeHead(200, { "Content-Type": "application/json" });
2364
+ res.end(JSON.stringify({
2365
+ totalRuns,
2366
+ savings: {
2367
+ estimatedSavingsPercent: savings.savingsPercent.toFixed(1) + "%",
2368
+ actualCostUsd: savings.actualCost.toFixed(4),
2369
+ baselineCostUsd: savings.baselineCost.toFixed(4),
2370
+ savedUsd: savings.savings.toFixed(4)
2371
+ },
2372
+ modelDistribution,
2373
+ byTaskType: stats.byTaskType,
2374
+ period: stats.period
2375
+ }));
2376
+ return;
2377
+ }
2378
+ if (req.method === "GET" && pathname === "/runs") {
2379
+ const limitParam = parsedUrl.query["limit"];
2380
+ const parsedLimit = limitParam ? parseInt(String(limitParam), 10) : 20;
2381
+ const limit = Math.min(Number.isNaN(parsedLimit) ? 20 : parsedLimit, MAX_RECENT_RUNS);
2382
+ res.writeHead(200, { "Content-Type": "application/json" });
2383
+ res.end(JSON.stringify({
2384
+ runs: recentRuns.slice(0, limit),
2385
+ total: recentRuns.length
2386
+ }));
2387
+ return;
2388
+ }
2389
+ if (req.method === "GET" && pathname.includes("/models")) {
2390
+ res.writeHead(200, { "Content-Type": "application/json" });
2391
+ res.end(
2392
+ JSON.stringify({
2393
+ object: "list",
2394
+ data: [
2395
+ { id: "relayplane:auto", object: "model", owned_by: "relayplane" },
2396
+ { id: "relayplane:cost", object: "model", owned_by: "relayplane" },
2397
+ { id: "relayplane:quality", object: "model", owned_by: "relayplane" }
2398
+ ]
2399
+ })
2400
+ );
2401
+ return;
2402
+ }
2403
+ if (req.method !== "POST" || !pathname.includes("/chat/completions")) {
2333
2404
  res.writeHead(404, { "Content-Type": "application/json" });
2334
2405
  res.end(JSON.stringify({ error: "Not found" }));
2335
2406
  return;
@@ -2452,9 +2523,11 @@ async function startProxy(config = {}) {
2452
2523
  return new Promise((resolve, reject) => {
2453
2524
  server.on("error", reject);
2454
2525
  server.listen(port, host, () => {
2526
+ serverStartTime = Date.now();
2455
2527
  console.log(`RelayPlane proxy listening on http://${host}:${port}`);
2456
2528
  console.log(` Models: relayplane:auto, relayplane:cost, relayplane:quality`);
2457
2529
  console.log(` Endpoint: POST /v1/chat/completions`);
2530
+ console.log(` Stats: GET /stats, /runs, /health`);
2458
2531
  console.log(` Streaming: \u2705 Enabled`);
2459
2532
  resolve(server);
2460
2533
  });
@@ -2517,11 +2590,26 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2517
2590
  log(`Streaming error: ${err}`);
2518
2591
  }
2519
2592
  const durationMs = Date.now() - startTime;
2593
+ const modelKey = `${targetProvider}/${targetModel}`;
2594
+ modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2520
2595
  relay.run({
2521
2596
  prompt: promptText.slice(0, 500),
2522
2597
  taskType,
2523
2598
  model: `${targetProvider}:${targetModel}`
2524
2599
  }).then((runResult) => {
2600
+ recentRuns.unshift({
2601
+ runId: runResult.runId,
2602
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2603
+ model: modelKey,
2604
+ taskType,
2605
+ confidence,
2606
+ mode: routingMode,
2607
+ durationMs,
2608
+ promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
2609
+ });
2610
+ if (recentRuns.length > MAX_RECENT_RUNS) {
2611
+ recentRuns.pop();
2612
+ }
2525
2613
  log(`Completed streaming in ${durationMs}ms, runId: ${runResult.runId}`);
2526
2614
  }).catch((err) => {
2527
2615
  log(`Failed to record run: ${err}`);
@@ -2592,15 +2680,30 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2592
2680
  return;
2593
2681
  }
2594
2682
  const durationMs = Date.now() - startTime;
2683
+ const modelKey = `${targetProvider}/${targetModel}`;
2684
+ modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2595
2685
  try {
2596
2686
  const runResult = await relay.run({
2597
2687
  prompt: promptText.slice(0, 500),
2598
2688
  taskType,
2599
2689
  model: `${targetProvider}:${targetModel}`
2600
2690
  });
2691
+ recentRuns.unshift({
2692
+ runId: runResult.runId,
2693
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2694
+ model: modelKey,
2695
+ taskType,
2696
+ confidence,
2697
+ mode: routingMode,
2698
+ durationMs,
2699
+ promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
2700
+ });
2701
+ if (recentRuns.length > MAX_RECENT_RUNS) {
2702
+ recentRuns.pop();
2703
+ }
2601
2704
  responseData["_relayplane"] = {
2602
2705
  runId: runResult.runId,
2603
- routedTo: `${targetProvider}/${targetModel}`,
2706
+ routedTo: modelKey,
2604
2707
  taskType,
2605
2708
  confidence,
2606
2709
  durationMs,