@relayplane/proxy 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -176,13 +176,85 @@ Options:
176
176
  -h, --help Show help
177
177
  ```
178
178
 
179
+ ## REST API
180
+
181
+ The proxy exposes endpoints for stats and monitoring:
182
+
183
+ ### `GET /health`
184
+
185
+ Server health and version info.
186
+
187
+ ```bash
188
+ curl http://localhost:3001/health
189
+ ```
190
+
191
+ ```json
192
+ {
193
+ "status": "ok",
194
+ "version": "0.1.7",
195
+ "uptime": "2h 15m 30s",
196
+ "providers": { "anthropic": true, "openai": true, "google": false },
197
+ "totalRuns": 142
198
+ }
199
+ ```
200
+
201
+ ### `GET /stats`
202
+
203
+ Aggregated statistics and cost savings.
204
+
205
+ ```bash
206
+ curl http://localhost:3001/stats
207
+ ```
208
+
209
+ ```json
210
+ {
211
+ "totalRuns": 142,
212
+ "savings": {
213
+ "estimatedSavingsPercent": "73.2%",
214
+ "actualCostUsd": "0.0234",
215
+ "baselineCostUsd": "0.0873",
216
+ "savedUsd": "0.0639"
217
+ },
218
+ "modelDistribution": {
219
+ "anthropic/claude-3-5-haiku-latest": { "count": 98, "percentage": "69.0%" },
220
+ "anthropic/claude-sonnet-4-20250514": { "count": 44, "percentage": "31.0%" }
221
+ }
222
+ }
223
+ ```
224
+
225
+ ### `GET /runs`
226
+
227
+ Recent routing decisions.
228
+
229
+ ```bash
230
+ curl "http://localhost:3001/runs?limit=10"
231
+ ```
232
+
233
+ ```json
234
+ {
235
+ "runs": [
236
+ {
237
+ "runId": "abc123",
238
+ "timestamp": "2026-02-03T13:26:03Z",
239
+ "model": "anthropic/claude-3-5-haiku-latest",
240
+ "taskType": "code_generation",
241
+ "confidence": 0.92,
242
+ "mode": "auto",
243
+ "durationMs": 1203,
244
+ "promptPreview": "Write a function that..."
245
+ }
246
+ ],
247
+ "total": 142
248
+ }
249
+ ```
250
+
179
251
  ## Data Storage
180
252
 
181
253
  All data stored locally at `~/.relayplane/data.db` (SQLite).
182
254
 
183
255
  ```bash
184
256
  # View recent runs
185
- sqlite3 ~/.relayplane/data.db "SELECT * FROM runs ORDER BY timestamp DESC LIMIT 10"
257
+ sqlite3 ~/.relayplane/data.db "SELECT * FROM runs ORDER BY created_at DESC LIMIT 10"
186
258
 
187
259
  # Check routing rules
188
260
  sqlite3 ~/.relayplane/data.db "SELECT * FROM routing_rules"
package/dist/cli.js CHANGED
@@ -25,6 +25,7 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
25
25
 
26
26
  // src/proxy.ts
27
27
  var http = __toESM(require("http"));
28
+ var url = __toESM(require("url"));
28
29
 
29
30
  // src/storage/store.ts
30
31
  var import_better_sqlite3 = __toESM(require("better-sqlite3"));
@@ -1590,7 +1591,107 @@ ${input.prompt}` : input.prompt;
1590
1591
  }
1591
1592
  };
1592
1593
 
1594
+ // src/config.ts
1595
+ var fs2 = __toESM(require("fs"));
1596
+ var path2 = __toESM(require("path"));
1597
+ var os2 = __toESM(require("os"));
1598
+ var import_zod = require("zod");
1599
+ var StrategySchema = import_zod.z.object({
1600
+ model: import_zod.z.string(),
1601
+ minConfidence: import_zod.z.number().min(0).max(1).optional(),
1602
+ fallback: import_zod.z.string().optional()
1603
+ });
1604
+ var ConfigSchema = import_zod.z.object({
1605
+ strategies: import_zod.z.record(import_zod.z.string(), StrategySchema).optional(),
1606
+ defaults: import_zod.z.object({
1607
+ qualityModel: import_zod.z.string().optional(),
1608
+ costModel: import_zod.z.string().optional()
1609
+ }).optional()
1610
+ });
1611
+ var DEFAULT_CONFIG = {
1612
+ strategies: {
1613
+ code_review: { model: "anthropic:claude-sonnet-4-20250514" },
1614
+ code_generation: { model: "anthropic:claude-3-5-haiku-latest" },
1615
+ analysis: { model: "anthropic:claude-sonnet-4-20250514" },
1616
+ summarization: { model: "anthropic:claude-3-5-haiku-latest" },
1617
+ creative_writing: { model: "anthropic:claude-sonnet-4-20250514" },
1618
+ data_extraction: { model: "anthropic:claude-3-5-haiku-latest" },
1619
+ translation: { model: "anthropic:claude-3-5-haiku-latest" },
1620
+ question_answering: { model: "anthropic:claude-3-5-haiku-latest" },
1621
+ general: { model: "anthropic:claude-3-5-haiku-latest" }
1622
+ },
1623
+ defaults: {
1624
+ qualityModel: "claude-sonnet-4-20250514",
1625
+ costModel: "claude-3-5-haiku-latest"
1626
+ }
1627
+ };
1628
+ function getConfigPath() {
1629
+ return path2.join(os2.homedir(), ".relayplane", "config.json");
1630
+ }
1631
+ function writeDefaultConfig() {
1632
+ const configPath = getConfigPath();
1633
+ const dir = path2.dirname(configPath);
1634
+ if (!fs2.existsSync(dir)) {
1635
+ fs2.mkdirSync(dir, { recursive: true });
1636
+ }
1637
+ if (!fs2.existsSync(configPath)) {
1638
+ fs2.writeFileSync(
1639
+ configPath,
1640
+ JSON.stringify(DEFAULT_CONFIG, null, 2) + "\n",
1641
+ "utf-8"
1642
+ );
1643
+ console.log(`[relayplane] Created default config at ${configPath}`);
1644
+ }
1645
+ }
1646
+ function loadConfig() {
1647
+ const configPath = getConfigPath();
1648
+ writeDefaultConfig();
1649
+ try {
1650
+ const raw = fs2.readFileSync(configPath, "utf-8");
1651
+ const parsed = JSON.parse(raw);
1652
+ const validated = ConfigSchema.parse(parsed);
1653
+ return validated;
1654
+ } catch (err) {
1655
+ if (err instanceof import_zod.z.ZodError) {
1656
+ console.error(`[relayplane] Invalid config: ${err.message}`);
1657
+ } else if (err instanceof SyntaxError) {
1658
+ console.error(`[relayplane] Config JSON parse error: ${err.message}`);
1659
+ } else {
1660
+ console.error(`[relayplane] Failed to load config: ${err}`);
1661
+ }
1662
+ console.log("[relayplane] Using default config");
1663
+ return DEFAULT_CONFIG;
1664
+ }
1665
+ }
1666
+ function getStrategy(config, taskType) {
1667
+ return config.strategies?.[taskType] ?? null;
1668
+ }
1669
+ function watchConfig(onChange) {
1670
+ const configPath = getConfigPath();
1671
+ const dir = path2.dirname(configPath);
1672
+ if (!fs2.existsSync(dir)) {
1673
+ fs2.mkdirSync(dir, { recursive: true });
1674
+ }
1675
+ let debounceTimer = null;
1676
+ fs2.watch(dir, (eventType, filename) => {
1677
+ if (filename === "config.json") {
1678
+ if (debounceTimer) clearTimeout(debounceTimer);
1679
+ debounceTimer = setTimeout(() => {
1680
+ console.log("[relayplane] Config file changed, reloading...");
1681
+ const newConfig = loadConfig();
1682
+ onChange(newConfig);
1683
+ }, 100);
1684
+ }
1685
+ });
1686
+ }
1687
+
1593
1688
  // src/proxy.ts
1689
+ var VERSION = "0.1.8";
1690
+ var recentRuns = [];
1691
+ var MAX_RECENT_RUNS = 100;
1692
+ var modelCounts = {};
1693
+ var serverStartTime = 0;
1694
+ var currentConfig = loadConfig();
1594
1695
  var DEFAULT_ENDPOINTS = {
1595
1696
  anthropic: {
1596
1697
  baseUrl: "https://api.anthropic.com/v1",
@@ -1887,9 +1988,9 @@ function convertMessagesToGemini(messages) {
1887
1988
  return { text: p.text };
1888
1989
  }
1889
1990
  if (p.type === "image_url" && p.image_url?.url) {
1890
- const url = p.image_url.url;
1891
- if (url.startsWith("data:")) {
1892
- const match = url.match(/^data:([^;]+);base64,(.+)$/);
1991
+ const url2 = p.image_url.url;
1992
+ if (url2.startsWith("data:")) {
1993
+ const match = url2.match(/^data:([^;]+);base64,(.+)$/);
1893
1994
  if (match) {
1894
1995
  return {
1895
1996
  inline_data: {
@@ -1899,7 +2000,7 @@ function convertMessagesToGemini(messages) {
1899
2000
  };
1900
2001
  }
1901
2002
  }
1902
- return { text: `[Image: ${url}]` };
2003
+ return { text: `[Image: ${url2}]` };
1903
2004
  }
1904
2005
  return { text: "" };
1905
2006
  });
@@ -2313,28 +2414,88 @@ async function startProxy(config = {}) {
2313
2414
  };
2314
2415
  const server = http.createServer(async (req, res) => {
2315
2416
  res.setHeader("Access-Control-Allow-Origin", "*");
2316
- res.setHeader("Access-Control-Allow-Methods", "POST, OPTIONS");
2417
+ res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
2317
2418
  res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
2318
2419
  if (req.method === "OPTIONS") {
2319
2420
  res.writeHead(204);
2320
2421
  res.end();
2321
2422
  return;
2322
2423
  }
2323
- if (req.method !== "POST" || !req.url?.includes("/chat/completions")) {
2324
- if (req.method === "GET" && req.url?.includes("/models")) {
2325
- res.writeHead(200, { "Content-Type": "application/json" });
2326
- res.end(
2327
- JSON.stringify({
2328
- object: "list",
2329
- data: [
2330
- { id: "relayplane:auto", object: "model", owned_by: "relayplane" },
2331
- { id: "relayplane:cost", object: "model", owned_by: "relayplane" },
2332
- { id: "relayplane:quality", object: "model", owned_by: "relayplane" }
2333
- ]
2334
- })
2335
- );
2336
- return;
2424
+ const parsedUrl = url.parse(req.url || "", true);
2425
+ const pathname = parsedUrl.pathname || "";
2426
+ if (req.method === "GET" && pathname === "/health") {
2427
+ const uptimeMs = Date.now() - serverStartTime;
2428
+ const uptimeSecs = Math.floor(uptimeMs / 1e3);
2429
+ const hours = Math.floor(uptimeSecs / 3600);
2430
+ const mins = Math.floor(uptimeSecs % 3600 / 60);
2431
+ const secs = uptimeSecs % 60;
2432
+ const providers = {};
2433
+ for (const [name, config2] of Object.entries(DEFAULT_ENDPOINTS)) {
2434
+ providers[name] = !!process.env[config2.apiKeyEnv];
2337
2435
  }
2436
+ res.writeHead(200, { "Content-Type": "application/json" });
2437
+ res.end(JSON.stringify({
2438
+ status: "ok",
2439
+ version: VERSION,
2440
+ uptime: `${hours}h ${mins}m ${secs}s`,
2441
+ uptimeMs,
2442
+ providers,
2443
+ totalRuns: recentRuns.length > 0 ? Object.values(modelCounts).reduce((a, b) => a + b, 0) : 0
2444
+ }));
2445
+ return;
2446
+ }
2447
+ if (req.method === "GET" && pathname === "/stats") {
2448
+ const stats = relay.stats();
2449
+ const savings = relay.savingsReport(30);
2450
+ const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
2451
+ const modelDistribution = {};
2452
+ for (const [model, count] of Object.entries(modelCounts)) {
2453
+ modelDistribution[model] = {
2454
+ count,
2455
+ percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
2456
+ };
2457
+ }
2458
+ res.writeHead(200, { "Content-Type": "application/json" });
2459
+ res.end(JSON.stringify({
2460
+ totalRuns,
2461
+ savings: {
2462
+ estimatedSavingsPercent: savings.savingsPercent.toFixed(1) + "%",
2463
+ actualCostUsd: savings.actualCost.toFixed(4),
2464
+ baselineCostUsd: savings.baselineCost.toFixed(4),
2465
+ savedUsd: savings.savings.toFixed(4)
2466
+ },
2467
+ modelDistribution,
2468
+ byTaskType: stats.byTaskType,
2469
+ period: stats.period
2470
+ }));
2471
+ return;
2472
+ }
2473
+ if (req.method === "GET" && pathname === "/runs") {
2474
+ const limitParam = parsedUrl.query["limit"];
2475
+ const parsedLimit = limitParam ? parseInt(String(limitParam), 10) : 20;
2476
+ const limit = Math.min(Number.isNaN(parsedLimit) ? 20 : parsedLimit, MAX_RECENT_RUNS);
2477
+ res.writeHead(200, { "Content-Type": "application/json" });
2478
+ res.end(JSON.stringify({
2479
+ runs: recentRuns.slice(0, limit),
2480
+ total: recentRuns.length
2481
+ }));
2482
+ return;
2483
+ }
2484
+ if (req.method === "GET" && pathname.includes("/models")) {
2485
+ res.writeHead(200, { "Content-Type": "application/json" });
2486
+ res.end(
2487
+ JSON.stringify({
2488
+ object: "list",
2489
+ data: [
2490
+ { id: "relayplane:auto", object: "model", owned_by: "relayplane" },
2491
+ { id: "relayplane:cost", object: "model", owned_by: "relayplane" },
2492
+ { id: "relayplane:quality", object: "model", owned_by: "relayplane" }
2493
+ ]
2494
+ })
2495
+ );
2496
+ return;
2497
+ }
2498
+ if (req.method !== "POST" || !pathname.includes("/chat/completions")) {
2338
2499
  res.writeHead(404, { "Content-Type": "application/json" });
2339
2500
  res.end(JSON.stringify({ error: "Not found" }));
2340
2501
  return;
@@ -2381,33 +2542,44 @@ async function startProxy(config = {}) {
2381
2542
  const confidence = getInferenceConfidence(promptText, taskType);
2382
2543
  log(`Inferred task: ${taskType} (confidence: ${confidence.toFixed(2)})`);
2383
2544
  if (routingMode !== "passthrough") {
2384
- const rule = relay.routing.get(taskType);
2385
- if (rule && rule.preferredModel) {
2386
- const parsed = parsePreferredModel(rule.preferredModel);
2545
+ const configStrategy = getStrategy(currentConfig, taskType);
2546
+ if (configStrategy) {
2547
+ const parsed = parsePreferredModel(configStrategy.model);
2387
2548
  if (parsed) {
2388
2549
  targetProvider = parsed.provider;
2389
2550
  targetModel = parsed.model;
2390
- log(`Using learned rule: ${rule.preferredModel}`);
2551
+ log(`Using config strategy: ${configStrategy.model}`);
2552
+ }
2553
+ }
2554
+ if (!configStrategy) {
2555
+ const rule = relay.routing.get(taskType);
2556
+ if (rule && rule.preferredModel) {
2557
+ const parsed = parsePreferredModel(rule.preferredModel);
2558
+ if (parsed) {
2559
+ targetProvider = parsed.provider;
2560
+ targetModel = parsed.model;
2561
+ log(`Using learned rule: ${rule.preferredModel}`);
2562
+ } else {
2563
+ const defaultRoute = DEFAULT_ROUTING[taskType];
2564
+ targetProvider = defaultRoute.provider;
2565
+ targetModel = defaultRoute.model;
2566
+ }
2391
2567
  } else {
2392
2568
  const defaultRoute = DEFAULT_ROUTING[taskType];
2393
2569
  targetProvider = defaultRoute.provider;
2394
2570
  targetModel = defaultRoute.model;
2395
2571
  }
2396
- } else {
2397
- const defaultRoute = DEFAULT_ROUTING[taskType];
2398
- targetProvider = defaultRoute.provider;
2399
- targetModel = defaultRoute.model;
2400
2572
  }
2401
2573
  if (routingMode === "cost") {
2402
- const simpleTasks = ["summarization", "data_extraction", "translation", "question_answering"];
2403
- if (simpleTasks.includes(taskType)) {
2404
- targetModel = "claude-3-5-haiku-latest";
2405
- targetProvider = "anthropic";
2406
- }
2574
+ const costModel = currentConfig.defaults?.costModel || "claude-3-5-haiku-latest";
2575
+ targetModel = costModel;
2576
+ targetProvider = "anthropic";
2577
+ log(`Cost mode: using ${costModel}`);
2407
2578
  } else if (routingMode === "quality") {
2408
- const qualityModel = process.env["RELAYPLANE_QUALITY_MODEL"] || "claude-sonnet-4-20250514";
2579
+ const qualityModel = currentConfig.defaults?.qualityModel || process.env["RELAYPLANE_QUALITY_MODEL"] || "claude-sonnet-4-20250514";
2409
2580
  targetModel = qualityModel;
2410
2581
  targetProvider = "anthropic";
2582
+ log(`Quality mode: using ${qualityModel}`);
2411
2583
  }
2412
2584
  }
2413
2585
  log(`Routing to: ${targetProvider}/${targetModel}`);
@@ -2454,12 +2626,19 @@ async function startProxy(config = {}) {
2454
2626
  );
2455
2627
  }
2456
2628
  });
2629
+ watchConfig((newConfig) => {
2630
+ currentConfig = newConfig;
2631
+ console.log("[relayplane] Config reloaded");
2632
+ });
2457
2633
  return new Promise((resolve, reject) => {
2458
2634
  server.on("error", reject);
2459
2635
  server.listen(port, host, () => {
2636
+ serverStartTime = Date.now();
2460
2637
  console.log(`RelayPlane proxy listening on http://${host}:${port}`);
2461
2638
  console.log(` Models: relayplane:auto, relayplane:cost, relayplane:quality`);
2462
2639
  console.log(` Endpoint: POST /v1/chat/completions`);
2640
+ console.log(` Stats: GET /stats, /runs, /health`);
2641
+ console.log(` Config: ~/.relayplane/config.json (hot-reload enabled)`);
2463
2642
  console.log(` Streaming: \u2705 Enabled`);
2464
2643
  resolve(server);
2465
2644
  });
@@ -2522,11 +2701,26 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2522
2701
  log(`Streaming error: ${err}`);
2523
2702
  }
2524
2703
  const durationMs = Date.now() - startTime;
2704
+ const modelKey = `${targetProvider}/${targetModel}`;
2705
+ modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2525
2706
  relay.run({
2526
2707
  prompt: promptText.slice(0, 500),
2527
2708
  taskType,
2528
2709
  model: `${targetProvider}:${targetModel}`
2529
2710
  }).then((runResult) => {
2711
+ recentRuns.unshift({
2712
+ runId: runResult.runId,
2713
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2714
+ model: modelKey,
2715
+ taskType,
2716
+ confidence,
2717
+ mode: routingMode,
2718
+ durationMs,
2719
+ promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
2720
+ });
2721
+ if (recentRuns.length > MAX_RECENT_RUNS) {
2722
+ recentRuns.pop();
2723
+ }
2530
2724
  log(`Completed streaming in ${durationMs}ms, runId: ${runResult.runId}`);
2531
2725
  }).catch((err) => {
2532
2726
  log(`Failed to record run: ${err}`);
@@ -2597,15 +2791,30 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2597
2791
  return;
2598
2792
  }
2599
2793
  const durationMs = Date.now() - startTime;
2794
+ const modelKey = `${targetProvider}/${targetModel}`;
2795
+ modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2600
2796
  try {
2601
2797
  const runResult = await relay.run({
2602
2798
  prompt: promptText.slice(0, 500),
2603
2799
  taskType,
2604
2800
  model: `${targetProvider}:${targetModel}`
2605
2801
  });
2802
+ recentRuns.unshift({
2803
+ runId: runResult.runId,
2804
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2805
+ model: modelKey,
2806
+ taskType,
2807
+ confidence,
2808
+ mode: routingMode,
2809
+ durationMs,
2810
+ promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
2811
+ });
2812
+ if (recentRuns.length > MAX_RECENT_RUNS) {
2813
+ recentRuns.pop();
2814
+ }
2606
2815
  responseData["_relayplane"] = {
2607
2816
  runId: runResult.runId,
2608
- routedTo: `${targetProvider}/${targetModel}`,
2817
+ routedTo: modelKey,
2609
2818
  taskType,
2610
2819
  confidence,
2611
2820
  durationMs,