npm - @relayplane/proxy - Versions diffs - 0.1.6 → 0.1.8 - Mend

@relayplane/proxy 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/index.mjs CHANGED Viewed

@@ -1,5 +1,6 @@
 // src/proxy.ts
 import * as http from "http";
+import * as url from "url";
 // src/storage/store.ts
 import Database from "better-sqlite3";
@@ -1569,7 +1570,107 @@ ${input.prompt}` : input.prompt;
   }
 };
+// src/config.ts
+import * as fs2 from "fs";
+import * as path2 from "path";
+import * as os2 from "os";
+import { z } from "zod";
+var StrategySchema = z.object({
+  model: z.string(),
+  minConfidence: z.number().min(0).max(1).optional(),
+  fallback: z.string().optional()
+});
+var ConfigSchema = z.object({
+  strategies: z.record(z.string(), StrategySchema).optional(),
+  defaults: z.object({
+    qualityModel: z.string().optional(),
+    costModel: z.string().optional()
+  }).optional()
+});
+var DEFAULT_CONFIG = {
+  strategies: {
+    code_review: { model: "anthropic:claude-sonnet-4-20250514" },
+    code_generation: { model: "anthropic:claude-3-5-haiku-latest" },
+    analysis: { model: "anthropic:claude-sonnet-4-20250514" },
+    summarization: { model: "anthropic:claude-3-5-haiku-latest" },
+    creative_writing: { model: "anthropic:claude-sonnet-4-20250514" },
+    data_extraction: { model: "anthropic:claude-3-5-haiku-latest" },
+    translation: { model: "anthropic:claude-3-5-haiku-latest" },
+    question_answering: { model: "anthropic:claude-3-5-haiku-latest" },
+    general: { model: "anthropic:claude-3-5-haiku-latest" }
+  },
+  defaults: {
+    qualityModel: "claude-sonnet-4-20250514",
+    costModel: "claude-3-5-haiku-latest"
+  }
+};
+function getConfigPath() {
+  return path2.join(os2.homedir(), ".relayplane", "config.json");
+}
+function writeDefaultConfig() {
+  const configPath = getConfigPath();
+  const dir = path2.dirname(configPath);
+  if (!fs2.existsSync(dir)) {
+    fs2.mkdirSync(dir, { recursive: true });
+  }
+  if (!fs2.existsSync(configPath)) {
+    fs2.writeFileSync(
+      configPath,
+      JSON.stringify(DEFAULT_CONFIG, null, 2) + "\n",
+      "utf-8"
+    );
+    console.log(`[relayplane] Created default config at ${configPath}`);
+  }
+}
+function loadConfig() {
+  const configPath = getConfigPath();
+  writeDefaultConfig();
+  try {
+    const raw = fs2.readFileSync(configPath, "utf-8");
+    const parsed = JSON.parse(raw);
+    const validated = ConfigSchema.parse(parsed);
+    return validated;
+  } catch (err) {
+    if (err instanceof z.ZodError) {
+      console.error(`[relayplane] Invalid config: ${err.message}`);
+    } else if (err instanceof SyntaxError) {
+      console.error(`[relayplane] Config JSON parse error: ${err.message}`);
+    } else {
+      console.error(`[relayplane] Failed to load config: ${err}`);
+    }
+    console.log("[relayplane] Using default config");
+    return DEFAULT_CONFIG;
+  }
+}
+function getStrategy(config, taskType) {
+  return config.strategies?.[taskType] ?? null;
+}
+function watchConfig(onChange) {
+  const configPath = getConfigPath();
+  const dir = path2.dirname(configPath);
+  if (!fs2.existsSync(dir)) {
+    fs2.mkdirSync(dir, { recursive: true });
+  }
+  let debounceTimer = null;
+  fs2.watch(dir, (eventType, filename) => {
+    if (filename === "config.json") {
+      if (debounceTimer) clearTimeout(debounceTimer);
+      debounceTimer = setTimeout(() => {
+        console.log("[relayplane] Config file changed, reloading...");
+        const newConfig = loadConfig();
+        onChange(newConfig);
+      }, 100);
+    }
+  });
+}
 // src/proxy.ts
+var VERSION = "0.1.8";
+var recentRuns = [];
+var MAX_RECENT_RUNS = 100;
+var modelCounts = {};
+var serverStartTime = 0;
+var currentConfig = loadConfig();
 var DEFAULT_ENDPOINTS = {
   anthropic: {
     baseUrl: "https://api.anthropic.com/v1",
@@ -1866,9 +1967,9 @@ function convertMessagesToGemini(messages) {
           return { text: p.text };
         }
         if (p.type === "image_url" && p.image_url?.url) {
-          const url = p.image_url.url;
-          if (url.startsWith("data:")) {
-            const match = url.match(/^data:([^;]+);base64,(.+)$/);
+          const url2 = p.image_url.url;
+          if (url2.startsWith("data:")) {
+            const match = url2.match(/^data:([^;]+);base64,(.+)$/);
             if (match) {
               return {
                 inline_data: {
@@ -1878,7 +1979,7 @@ function convertMessagesToGemini(messages) {
               };
             }
           }
-          return { text: `[Image: ${url}]` };
+          return { text: `[Image: ${url2}]` };
         }
         return { text: "" };
       });
@@ -2292,28 +2393,88 @@ async function startProxy(config = {}) {
   };
   const server = http.createServer(async (req, res) => {
     res.setHeader("Access-Control-Allow-Origin", "*");
-    res.setHeader("Access-Control-Allow-Methods", "POST, OPTIONS");
+    res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
     res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
     if (req.method === "OPTIONS") {
       res.writeHead(204);
       res.end();
       return;
     }
-    if (req.method !== "POST" || !req.url?.includes("/chat/completions")) {
-      if (req.method === "GET" && req.url?.includes("/models")) {
-        res.writeHead(200, { "Content-Type": "application/json" });
-        res.end(
-          JSON.stringify({
-            object: "list",
-            data: [
-              { id: "relayplane:auto", object: "model", owned_by: "relayplane" },
-              { id: "relayplane:cost", object: "model", owned_by: "relayplane" },
-              { id: "relayplane:quality", object: "model", owned_by: "relayplane" }
-            ]
-          })
-        );
-        return;
+    const parsedUrl = url.parse(req.url || "", true);
+    const pathname = parsedUrl.pathname || "";
+    if (req.method === "GET" && pathname === "/health") {
+      const uptimeMs = Date.now() - serverStartTime;
+      const uptimeSecs = Math.floor(uptimeMs / 1e3);
+      const hours = Math.floor(uptimeSecs / 3600);
+      const mins = Math.floor(uptimeSecs % 3600 / 60);
+      const secs = uptimeSecs % 60;
+      const providers = {};
+      for (const [name, config2] of Object.entries(DEFAULT_ENDPOINTS)) {
+        providers[name] = !!process.env[config2.apiKeyEnv];
+      }
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({
+        status: "ok",
+        version: VERSION,
+        uptime: `${hours}h ${mins}m ${secs}s`,
+        uptimeMs,
+        providers,
+        totalRuns: recentRuns.length > 0 ? Object.values(modelCounts).reduce((a, b) => a + b, 0) : 0
+      }));
+      return;
+    }
+    if (req.method === "GET" && pathname === "/stats") {
+      const stats = relay.stats();
+      const savings = relay.savingsReport(30);
+      const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
+      const modelDistribution = {};
+      for (const [model, count] of Object.entries(modelCounts)) {
+        modelDistribution[model] = {
+          count,
+          percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
+        };
       }
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({
+        totalRuns,
+        savings: {
+          estimatedSavingsPercent: savings.savingsPercent.toFixed(1) + "%",
+          actualCostUsd: savings.actualCost.toFixed(4),
+          baselineCostUsd: savings.baselineCost.toFixed(4),
+          savedUsd: savings.savings.toFixed(4)
+        },
+        modelDistribution,
+        byTaskType: stats.byTaskType,
+        period: stats.period
+      }));
+      return;
+    }
+    if (req.method === "GET" && pathname === "/runs") {
+      const limitParam = parsedUrl.query["limit"];
+      const parsedLimit = limitParam ? parseInt(String(limitParam), 10) : 20;
+      const limit = Math.min(Number.isNaN(parsedLimit) ? 20 : parsedLimit, MAX_RECENT_RUNS);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({
+        runs: recentRuns.slice(0, limit),
+        total: recentRuns.length
+      }));
+      return;
+    }
+    if (req.method === "GET" && pathname.includes("/models")) {
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(
+        JSON.stringify({
+          object: "list",
+          data: [
+            { id: "relayplane:auto", object: "model", owned_by: "relayplane" },
+            { id: "relayplane:cost", object: "model", owned_by: "relayplane" },
+            { id: "relayplane:quality", object: "model", owned_by: "relayplane" }
+          ]
+        })
+      );
+      return;
+    }
+    if (req.method !== "POST" || !pathname.includes("/chat/completions")) {
       res.writeHead(404, { "Content-Type": "application/json" });
       res.end(JSON.stringify({ error: "Not found" }));
       return;
@@ -2360,33 +2521,44 @@ async function startProxy(config = {}) {
     const confidence = getInferenceConfidence(promptText, taskType);
     log(`Inferred task: ${taskType} (confidence: ${confidence.toFixed(2)})`);
     if (routingMode !== "passthrough") {
-      const rule = relay.routing.get(taskType);
-      if (rule && rule.preferredModel) {
-        const parsed = parsePreferredModel(rule.preferredModel);
+      const configStrategy = getStrategy(currentConfig, taskType);
+      if (configStrategy) {
+        const parsed = parsePreferredModel(configStrategy.model);
         if (parsed) {
           targetProvider = parsed.provider;
           targetModel = parsed.model;
-          log(`Using learned rule: ${rule.preferredModel}`);
+          log(`Using config strategy: ${configStrategy.model}`);
+        }
+      }
+      if (!configStrategy) {
+        const rule = relay.routing.get(taskType);
+        if (rule && rule.preferredModel) {
+          const parsed = parsePreferredModel(rule.preferredModel);
+          if (parsed) {
+            targetProvider = parsed.provider;
+            targetModel = parsed.model;
+            log(`Using learned rule: ${rule.preferredModel}`);
+          } else {
+            const defaultRoute = DEFAULT_ROUTING[taskType];
+            targetProvider = defaultRoute.provider;
+            targetModel = defaultRoute.model;
+          }
         } else {
           const defaultRoute = DEFAULT_ROUTING[taskType];
           targetProvider = defaultRoute.provider;
           targetModel = defaultRoute.model;
         }
-      } else {
-        const defaultRoute = DEFAULT_ROUTING[taskType];
-        targetProvider = defaultRoute.provider;
-        targetModel = defaultRoute.model;
       }
       if (routingMode === "cost") {
-        const simpleTasks = ["summarization", "data_extraction", "translation", "question_answering"];
-        if (simpleTasks.includes(taskType)) {
-          targetModel = "claude-3-5-haiku-latest";
-          targetProvider = "anthropic";
-        }
+        const costModel = currentConfig.defaults?.costModel || "claude-3-5-haiku-latest";
+        targetModel = costModel;
+        targetProvider = "anthropic";
+        log(`Cost mode: using ${costModel}`);
       } else if (routingMode === "quality") {
-        const qualityModel = process.env["RELAYPLANE_QUALITY_MODEL"] || "claude-sonnet-4-20250514";
+        const qualityModel = currentConfig.defaults?.qualityModel || process.env["RELAYPLANE_QUALITY_MODEL"] || "claude-sonnet-4-20250514";
         targetModel = qualityModel;
         targetProvider = "anthropic";
+        log(`Quality mode: using ${qualityModel}`);
       }
     }
     log(`Routing to: ${targetProvider}/${targetModel}`);
@@ -2433,12 +2605,19 @@ async function startProxy(config = {}) {
       );
     }
   });
+  watchConfig((newConfig) => {
+    currentConfig = newConfig;
+    console.log("[relayplane] Config reloaded");
+  });
   return new Promise((resolve, reject) => {
     server.on("error", reject);
     server.listen(port, host, () => {
+      serverStartTime = Date.now();
       console.log(`RelayPlane proxy listening on http://${host}:${port}`);
       console.log(`  Models: relayplane:auto, relayplane:cost, relayplane:quality`);
       console.log(`  Endpoint: POST /v1/chat/completions`);
+      console.log(`  Stats: GET /stats, /runs, /health`);
+      console.log(`  Config: ~/.relayplane/config.json (hot-reload enabled)`);
       console.log(`  Streaming: \u2705 Enabled`);
       resolve(server);
     });
@@ -2501,11 +2680,26 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
     log(`Streaming error: ${err}`);
   }
   const durationMs = Date.now() - startTime;
+  const modelKey = `${targetProvider}/${targetModel}`;
+  modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
   relay.run({
     prompt: promptText.slice(0, 500),
     taskType,
     model: `${targetProvider}:${targetModel}`
   }).then((runResult) => {
+    recentRuns.unshift({
+      runId: runResult.runId,
+      timestamp: (/* @__PURE__ */ new Date()).toISOString(),
+      model: modelKey,
+      taskType,
+      confidence,
+      mode: routingMode,
+      durationMs,
+      promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
+    });
+    if (recentRuns.length > MAX_RECENT_RUNS) {
+      recentRuns.pop();
+    }
     log(`Completed streaming in ${durationMs}ms, runId: ${runResult.runId}`);
   }).catch((err) => {
     log(`Failed to record run: ${err}`);
@@ -2576,15 +2770,30 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
     return;
   }
   const durationMs = Date.now() - startTime;
+  const modelKey = `${targetProvider}/${targetModel}`;
+  modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
   try {
     const runResult = await relay.run({
       prompt: promptText.slice(0, 500),
       taskType,
       model: `${targetProvider}:${targetModel}`
     });
+    recentRuns.unshift({
+      runId: runResult.runId,
+      timestamp: (/* @__PURE__ */ new Date()).toISOString(),
+      model: modelKey,
+      taskType,
+      confidence,
+      mode: routingMode,
+      durationMs,
+      promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
+    });
+    if (recentRuns.length > MAX_RECENT_RUNS) {
+      recentRuns.pop();
+    }
     responseData["_relayplane"] = {
       runId: runResult.runId,
-      routedTo: `${targetProvider}/${targetModel}`,
+      routedTo: modelKey,
       taskType,
       confidence,
       durationMs,
@@ -2599,7 +2808,7 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
 }
 // src/types.ts
-import { z } from "zod";
+import { z as z2 } from "zod";
 var TaskTypes = [
   "code_generation",
   "code_review",
@@ -2611,63 +2820,64 @@ var TaskTypes = [
   "question_answering",
   "general"
 ];
-var TaskTypeSchema = z.enum(TaskTypes);
+var TaskTypeSchema = z2.enum(TaskTypes);
 var Providers = ["openai", "anthropic", "google", "xai", "moonshot", "local"];
-var ProviderSchema = z.enum(Providers);
-var RelayPlaneConfigSchema = z.object({
-  dbPath: z.string().optional(),
-  providers: z.record(ProviderSchema, z.object({
-    apiKey: z.string().optional(),
-    baseUrl: z.string().optional()
+var ProviderSchema = z2.enum(Providers);
+var RelayPlaneConfigSchema = z2.object({
+  dbPath: z2.string().optional(),
+  providers: z2.record(ProviderSchema, z2.object({
+    apiKey: z2.string().optional(),
+    baseUrl: z2.string().optional()
   })).optional(),
   defaultProvider: ProviderSchema.optional(),
-  defaultModel: z.string().optional()
+  defaultModel: z2.string().optional()
 });
-var RunInputSchema = z.object({
-  prompt: z.string().min(1),
-  systemPrompt: z.string().optional(),
+var RunInputSchema = z2.object({
+  prompt: z2.string().min(1),
+  systemPrompt: z2.string().optional(),
   taskType: TaskTypeSchema.optional(),
-  model: z.string().optional(),
-  metadata: z.record(z.unknown()).optional()
+  model: z2.string().optional(),
+  metadata: z2.record(z2.unknown()).optional()
 });
 var RuleSources = ["default", "user", "learned"];
-var RoutingRuleSchema = z.object({
-  id: z.string(),
+var RoutingRuleSchema = z2.object({
+  id: z2.string(),
   taskType: TaskTypeSchema,
-  preferredModel: z.string(),
-  source: z.enum(RuleSources),
-  confidence: z.number().min(0).max(1).optional(),
-  sampleCount: z.number().int().positive().optional(),
-  createdAt: z.string(),
-  updatedAt: z.string()
+  preferredModel: z2.string(),
+  source: z2.enum(RuleSources),
+  confidence: z2.number().min(0).max(1).optional(),
+  sampleCount: z2.number().int().positive().optional(),
+  createdAt: z2.string(),
+  updatedAt: z2.string()
 });
 var OutcomeQualities = ["excellent", "good", "acceptable", "poor", "failed"];
-var OutcomeInputSchema = z.object({
-  runId: z.string().min(1),
-  success: z.boolean(),
-  quality: z.enum(OutcomeQualities).optional(),
-  latencySatisfactory: z.boolean().optional(),
-  costSatisfactory: z.boolean().optional(),
-  feedback: z.string().optional()
+var OutcomeInputSchema = z2.object({
+  runId: z2.string().min(1),
+  success: z2.boolean(),
+  quality: z2.enum(OutcomeQualities).optional(),
+  latencySatisfactory: z2.boolean().optional(),
+  costSatisfactory: z2.boolean().optional(),
+  feedback: z2.string().optional()
 });
-var SuggestionSchema = z.object({
-  id: z.string(),
+var SuggestionSchema = z2.object({
+  id: z2.string(),
   taskType: TaskTypeSchema,
-  currentModel: z.string(),
-  suggestedModel: z.string(),
-  reason: z.string(),
-  confidence: z.number().min(0).max(1),
-  expectedImprovement: z.object({
-    successRate: z.number().optional(),
-    latency: z.number().optional(),
-    cost: z.number().optional()
+  currentModel: z2.string(),
+  suggestedModel: z2.string(),
+  reason: z2.string(),
+  confidence: z2.number().min(0).max(1),
+  expectedImprovement: z2.object({
+    successRate: z2.number().optional(),
+    latency: z2.number().optional(),
+    cost: z2.number().optional()
   }),
-  sampleCount: z.number().int().positive(),
-  createdAt: z.string(),
-  accepted: z.boolean().optional(),
-  acceptedAt: z.string().optional()
+  sampleCount: z2.number().int().positive(),
+  createdAt: z2.string(),
+  accepted: z2.boolean().optional(),
+  acceptedAt: z2.string().optional()
 });
 export {
+  DEFAULT_CONFIG,
   DEFAULT_ENDPOINTS,
   MODEL_MAPPING,
   MODEL_PRICING,
@@ -2682,9 +2892,13 @@ export {
   TaskTypes,
   calculateCost,
   calculateSavings,
+  getConfigPath,
   getInferenceConfidence,
   getModelPricing,
+  getStrategy,
   inferTaskType,
-  startProxy
+  loadConfig,
+  startProxy,
+  watchConfig
 };
 //# sourceMappingURL=index.mjs.map