npm - @relayplane/proxy - Versions diffs - 0.1.5 → 0.1.7 - Mend

@relayplane/proxy 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/cli.mjs CHANGED Viewed

@@ -2,6 +2,7 @@
 // src/proxy.ts
 import * as http from "http";
+import * as url from "url";
 // src/storage/store.ts
 import Database from "better-sqlite3";
@@ -132,11 +133,13 @@ CREATE TABLE IF NOT EXISTS schema_version (
 INSERT OR IGNORE INTO schema_version (version) VALUES (1);
 `;
 var DEFAULT_ROUTING_RULES = [
-  { taskType: "code_generation", preferredModel: "anthropic:claude-3-5-haiku-latest" },
-  { taskType: "code_review", preferredModel: "anthropic:claude-3-5-haiku-latest" },
+  // Complex tasks → Sonnet (need reasoning & quality)
+  { taskType: "code_generation", preferredModel: "anthropic:claude-sonnet-4-20250514" },
+  { taskType: "code_review", preferredModel: "anthropic:claude-sonnet-4-20250514" },
+  { taskType: "analysis", preferredModel: "anthropic:claude-sonnet-4-20250514" },
+  { taskType: "creative_writing", preferredModel: "anthropic:claude-sonnet-4-20250514" },
+  // Simple tasks → Haiku (cost efficient)
   { taskType: "summarization", preferredModel: "anthropic:claude-3-5-haiku-latest" },
-  { taskType: "analysis", preferredModel: "anthropic:claude-3-5-haiku-latest" },
-  { taskType: "creative_writing", preferredModel: "anthropic:claude-3-5-haiku-latest" },
   { taskType: "data_extraction", preferredModel: "anthropic:claude-3-5-haiku-latest" },
   { taskType: "translation", preferredModel: "anthropic:claude-3-5-haiku-latest" },
   { taskType: "question_answering", preferredModel: "anthropic:claude-3-5-haiku-latest" },
@@ -1566,6 +1569,11 @@ ${input.prompt}` : input.prompt;
 };
 // src/proxy.ts
+var VERSION = "0.1.7";
+var recentRuns = [];
+var MAX_RECENT_RUNS = 100;
+var modelCounts = {};
+var serverStartTime = 0;
 var DEFAULT_ENDPOINTS = {
   anthropic: {
     baseUrl: "https://api.anthropic.com/v1",
@@ -1603,11 +1611,14 @@ var MODEL_MAPPING = {
   "gpt-4.1": { provider: "openai", model: "gpt-4.1" }
 };
 var DEFAULT_ROUTING = {
-  code_generation: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
-  code_review: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
+  // Complex tasks → Sonnet (need reasoning & quality)
+  code_review: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
+  analysis: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
+  creative_writing: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
+  // Medium tasks → Sonnet (benefit from better model)
+  code_generation: { provider: "anthropic", model: "claude-sonnet-4-20250514" },
+  // Simple tasks → Haiku (cost efficient)
   summarization: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
-  analysis: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
-  creative_writing: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
   data_extraction: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
   translation: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
   question_answering: { provider: "anthropic", model: "claude-3-5-haiku-latest" },
@@ -1859,9 +1870,9 @@ function convertMessagesToGemini(messages) {
           return { text: p.text };
         }
         if (p.type === "image_url" && p.image_url?.url) {
-          const url = p.image_url.url;
-          if (url.startsWith("data:")) {
-            const match = url.match(/^data:([^;]+);base64,(.+)$/);
+          const url2 = p.image_url.url;
+          if (url2.startsWith("data:")) {
+            const match = url2.match(/^data:([^;]+);base64,(.+)$/);
             if (match) {
               return {
                 inline_data: {
@@ -1871,7 +1882,7 @@ function convertMessagesToGemini(messages) {
               };
             }
           }
-          return { text: `[Image: ${url}]` };
+          return { text: `[Image: ${url2}]` };
         }
         return { text: "" };
       });
@@ -2285,28 +2296,88 @@ async function startProxy(config = {}) {
   };
   const server = http.createServer(async (req, res) => {
     res.setHeader("Access-Control-Allow-Origin", "*");
-    res.setHeader("Access-Control-Allow-Methods", "POST, OPTIONS");
+    res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
     res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
     if (req.method === "OPTIONS") {
       res.writeHead(204);
       res.end();
       return;
     }
-    if (req.method !== "POST" || !req.url?.includes("/chat/completions")) {
-      if (req.method === "GET" && req.url?.includes("/models")) {
-        res.writeHead(200, { "Content-Type": "application/json" });
-        res.end(
-          JSON.stringify({
-            object: "list",
-            data: [
-              { id: "relayplane:auto", object: "model", owned_by: "relayplane" },
-              { id: "relayplane:cost", object: "model", owned_by: "relayplane" },
-              { id: "relayplane:quality", object: "model", owned_by: "relayplane" }
-            ]
-          })
-        );
-        return;
+    const parsedUrl = url.parse(req.url || "", true);
+    const pathname = parsedUrl.pathname || "";
+    if (req.method === "GET" && pathname === "/health") {
+      const uptimeMs = Date.now() - serverStartTime;
+      const uptimeSecs = Math.floor(uptimeMs / 1e3);
+      const hours = Math.floor(uptimeSecs / 3600);
+      const mins = Math.floor(uptimeSecs % 3600 / 60);
+      const secs = uptimeSecs % 60;
+      const providers = {};
+      for (const [name, config2] of Object.entries(DEFAULT_ENDPOINTS)) {
+        providers[name] = !!process.env[config2.apiKeyEnv];
       }
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({
+        status: "ok",
+        version: VERSION,
+        uptime: `${hours}h ${mins}m ${secs}s`,
+        uptimeMs,
+        providers,
+        totalRuns: recentRuns.length > 0 ? Object.values(modelCounts).reduce((a, b) => a + b, 0) : 0
+      }));
+      return;
+    }
+    if (req.method === "GET" && pathname === "/stats") {
+      const stats = relay.stats();
+      const savings = relay.savingsReport(30);
+      const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
+      const modelDistribution = {};
+      for (const [model, count] of Object.entries(modelCounts)) {
+        modelDistribution[model] = {
+          count,
+          percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
+        };
+      }
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({
+        totalRuns,
+        savings: {
+          estimatedSavingsPercent: savings.savingsPercent.toFixed(1) + "%",
+          actualCostUsd: savings.actualCost.toFixed(4),
+          baselineCostUsd: savings.baselineCost.toFixed(4),
+          savedUsd: savings.savings.toFixed(4)
+        },
+        modelDistribution,
+        byTaskType: stats.byTaskType,
+        period: stats.period
+      }));
+      return;
+    }
+    if (req.method === "GET" && pathname === "/runs") {
+      const limitParam = parsedUrl.query["limit"];
+      const parsedLimit = limitParam ? parseInt(String(limitParam), 10) : 20;
+      const limit = Math.min(Number.isNaN(parsedLimit) ? 20 : parsedLimit, MAX_RECENT_RUNS);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({
+        runs: recentRuns.slice(0, limit),
+        total: recentRuns.length
+      }));
+      return;
+    }
+    if (req.method === "GET" && pathname.includes("/models")) {
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(
+        JSON.stringify({
+          object: "list",
+          data: [
+            { id: "relayplane:auto", object: "model", owned_by: "relayplane" },
+            { id: "relayplane:cost", object: "model", owned_by: "relayplane" },
+            { id: "relayplane:quality", object: "model", owned_by: "relayplane" }
+          ]
+        })
+      );
+      return;
+    }
+    if (req.method !== "POST" || !pathname.includes("/chat/completions")) {
       res.writeHead(404, { "Content-Type": "application/json" });
       res.end(JSON.stringify({ error: "Not found" }));
       return;
@@ -2429,9 +2500,11 @@ async function startProxy(config = {}) {
   return new Promise((resolve, reject) => {
     server.on("error", reject);
     server.listen(port, host, () => {
+      serverStartTime = Date.now();
       console.log(`RelayPlane proxy listening on http://${host}:${port}`);
       console.log(`  Models: relayplane:auto, relayplane:cost, relayplane:quality`);
       console.log(`  Endpoint: POST /v1/chat/completions`);
+      console.log(`  Stats: GET /stats, /runs, /health`);
       console.log(`  Streaming: \u2705 Enabled`);
       resolve(server);
     });
@@ -2494,11 +2567,26 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
     log(`Streaming error: ${err}`);
   }
   const durationMs = Date.now() - startTime;
+  const modelKey = `${targetProvider}/${targetModel}`;
+  modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
   relay.run({
     prompt: promptText.slice(0, 500),
     taskType,
     model: `${targetProvider}:${targetModel}`
   }).then((runResult) => {
+    recentRuns.unshift({
+      runId: runResult.runId,
+      timestamp: (/* @__PURE__ */ new Date()).toISOString(),
+      model: modelKey,
+      taskType,
+      confidence,
+      mode: routingMode,
+      durationMs,
+      promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
+    });
+    if (recentRuns.length > MAX_RECENT_RUNS) {
+      recentRuns.pop();
+    }
     log(`Completed streaming in ${durationMs}ms, runId: ${runResult.runId}`);
   }).catch((err) => {
     log(`Failed to record run: ${err}`);
@@ -2569,15 +2657,30 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
     return;
   }
   const durationMs = Date.now() - startTime;
+  const modelKey = `${targetProvider}/${targetModel}`;
+  modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
   try {
     const runResult = await relay.run({
       prompt: promptText.slice(0, 500),
       taskType,
       model: `${targetProvider}:${targetModel}`
     });
+    recentRuns.unshift({
+      runId: runResult.runId,
+      timestamp: (/* @__PURE__ */ new Date()).toISOString(),
+      model: modelKey,
+      taskType,
+      confidence,
+      mode: routingMode,
+      durationMs,
+      promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
+    });
+    if (recentRuns.length > MAX_RECENT_RUNS) {
+      recentRuns.pop();
+    }
     responseData["_relayplane"] = {
       runId: runResult.runId,
-      routedTo: `${targetProvider}/${targetModel}`,
+      routedTo: modelKey,
       taskType,
       confidence,
       durationMs,