@relayplane/proxy 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  // src/proxy.ts
4
4
  import * as http from "http";
5
+ import * as url from "url";
5
6
 
6
7
  // src/storage/store.ts
7
8
  import Database from "better-sqlite3";
@@ -1567,7 +1568,107 @@ ${input.prompt}` : input.prompt;
1567
1568
  }
1568
1569
  };
1569
1570
 
1571
+ // src/config.ts
1572
+ import * as fs2 from "fs";
1573
+ import * as path2 from "path";
1574
+ import * as os2 from "os";
1575
+ import { z } from "zod";
1576
+ var StrategySchema = z.object({
1577
+ model: z.string(),
1578
+ minConfidence: z.number().min(0).max(1).optional(),
1579
+ fallback: z.string().optional()
1580
+ });
1581
+ var ConfigSchema = z.object({
1582
+ strategies: z.record(z.string(), StrategySchema).optional(),
1583
+ defaults: z.object({
1584
+ qualityModel: z.string().optional(),
1585
+ costModel: z.string().optional()
1586
+ }).optional()
1587
+ });
1588
+ var DEFAULT_CONFIG = {
1589
+ strategies: {
1590
+ code_review: { model: "anthropic:claude-sonnet-4-20250514" },
1591
+ code_generation: { model: "anthropic:claude-3-5-haiku-latest" },
1592
+ analysis: { model: "anthropic:claude-sonnet-4-20250514" },
1593
+ summarization: { model: "anthropic:claude-3-5-haiku-latest" },
1594
+ creative_writing: { model: "anthropic:claude-sonnet-4-20250514" },
1595
+ data_extraction: { model: "anthropic:claude-3-5-haiku-latest" },
1596
+ translation: { model: "anthropic:claude-3-5-haiku-latest" },
1597
+ question_answering: { model: "anthropic:claude-3-5-haiku-latest" },
1598
+ general: { model: "anthropic:claude-3-5-haiku-latest" }
1599
+ },
1600
+ defaults: {
1601
+ qualityModel: "claude-sonnet-4-20250514",
1602
+ costModel: "claude-3-5-haiku-latest"
1603
+ }
1604
+ };
1605
+ function getConfigPath() {
1606
+ return path2.join(os2.homedir(), ".relayplane", "config.json");
1607
+ }
1608
+ function writeDefaultConfig() {
1609
+ const configPath = getConfigPath();
1610
+ const dir = path2.dirname(configPath);
1611
+ if (!fs2.existsSync(dir)) {
1612
+ fs2.mkdirSync(dir, { recursive: true });
1613
+ }
1614
+ if (!fs2.existsSync(configPath)) {
1615
+ fs2.writeFileSync(
1616
+ configPath,
1617
+ JSON.stringify(DEFAULT_CONFIG, null, 2) + "\n",
1618
+ "utf-8"
1619
+ );
1620
+ console.log(`[relayplane] Created default config at ${configPath}`);
1621
+ }
1622
+ }
1623
+ function loadConfig() {
1624
+ const configPath = getConfigPath();
1625
+ writeDefaultConfig();
1626
+ try {
1627
+ const raw = fs2.readFileSync(configPath, "utf-8");
1628
+ const parsed = JSON.parse(raw);
1629
+ const validated = ConfigSchema.parse(parsed);
1630
+ return validated;
1631
+ } catch (err) {
1632
+ if (err instanceof z.ZodError) {
1633
+ console.error(`[relayplane] Invalid config: ${err.message}`);
1634
+ } else if (err instanceof SyntaxError) {
1635
+ console.error(`[relayplane] Config JSON parse error: ${err.message}`);
1636
+ } else {
1637
+ console.error(`[relayplane] Failed to load config: ${err}`);
1638
+ }
1639
+ console.log("[relayplane] Using default config");
1640
+ return DEFAULT_CONFIG;
1641
+ }
1642
+ }
1643
+ function getStrategy(config, taskType) {
1644
+ return config.strategies?.[taskType] ?? null;
1645
+ }
1646
+ function watchConfig(onChange) {
1647
+ const configPath = getConfigPath();
1648
+ const dir = path2.dirname(configPath);
1649
+ if (!fs2.existsSync(dir)) {
1650
+ fs2.mkdirSync(dir, { recursive: true });
1651
+ }
1652
+ let debounceTimer = null;
1653
+ fs2.watch(dir, (eventType, filename) => {
1654
+ if (filename === "config.json") {
1655
+ if (debounceTimer) clearTimeout(debounceTimer);
1656
+ debounceTimer = setTimeout(() => {
1657
+ console.log("[relayplane] Config file changed, reloading...");
1658
+ const newConfig = loadConfig();
1659
+ onChange(newConfig);
1660
+ }, 100);
1661
+ }
1662
+ });
1663
+ }
1664
+
1570
1665
  // src/proxy.ts
1666
+ var VERSION = "0.1.8";
1667
+ var recentRuns = [];
1668
+ var MAX_RECENT_RUNS = 100;
1669
+ var modelCounts = {};
1670
+ var serverStartTime = 0;
1671
+ var currentConfig = loadConfig();
1571
1672
  var DEFAULT_ENDPOINTS = {
1572
1673
  anthropic: {
1573
1674
  baseUrl: "https://api.anthropic.com/v1",
@@ -1864,9 +1965,9 @@ function convertMessagesToGemini(messages) {
1864
1965
  return { text: p.text };
1865
1966
  }
1866
1967
  if (p.type === "image_url" && p.image_url?.url) {
1867
- const url = p.image_url.url;
1868
- if (url.startsWith("data:")) {
1869
- const match = url.match(/^data:([^;]+);base64,(.+)$/);
1968
+ const url2 = p.image_url.url;
1969
+ if (url2.startsWith("data:")) {
1970
+ const match = url2.match(/^data:([^;]+);base64,(.+)$/);
1870
1971
  if (match) {
1871
1972
  return {
1872
1973
  inline_data: {
@@ -1876,7 +1977,7 @@ function convertMessagesToGemini(messages) {
1876
1977
  };
1877
1978
  }
1878
1979
  }
1879
- return { text: `[Image: ${url}]` };
1980
+ return { text: `[Image: ${url2}]` };
1880
1981
  }
1881
1982
  return { text: "" };
1882
1983
  });
@@ -2290,28 +2391,88 @@ async function startProxy(config = {}) {
2290
2391
  };
2291
2392
  const server = http.createServer(async (req, res) => {
2292
2393
  res.setHeader("Access-Control-Allow-Origin", "*");
2293
- res.setHeader("Access-Control-Allow-Methods", "POST, OPTIONS");
2394
+ res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
2294
2395
  res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
2295
2396
  if (req.method === "OPTIONS") {
2296
2397
  res.writeHead(204);
2297
2398
  res.end();
2298
2399
  return;
2299
2400
  }
2300
- if (req.method !== "POST" || !req.url?.includes("/chat/completions")) {
2301
- if (req.method === "GET" && req.url?.includes("/models")) {
2302
- res.writeHead(200, { "Content-Type": "application/json" });
2303
- res.end(
2304
- JSON.stringify({
2305
- object: "list",
2306
- data: [
2307
- { id: "relayplane:auto", object: "model", owned_by: "relayplane" },
2308
- { id: "relayplane:cost", object: "model", owned_by: "relayplane" },
2309
- { id: "relayplane:quality", object: "model", owned_by: "relayplane" }
2310
- ]
2311
- })
2312
- );
2313
- return;
2401
+ const parsedUrl = url.parse(req.url || "", true);
2402
+ const pathname = parsedUrl.pathname || "";
2403
+ if (req.method === "GET" && pathname === "/health") {
2404
+ const uptimeMs = Date.now() - serverStartTime;
2405
+ const uptimeSecs = Math.floor(uptimeMs / 1e3);
2406
+ const hours = Math.floor(uptimeSecs / 3600);
2407
+ const mins = Math.floor(uptimeSecs % 3600 / 60);
2408
+ const secs = uptimeSecs % 60;
2409
+ const providers = {};
2410
+ for (const [name, config2] of Object.entries(DEFAULT_ENDPOINTS)) {
2411
+ providers[name] = !!process.env[config2.apiKeyEnv];
2314
2412
  }
2413
+ res.writeHead(200, { "Content-Type": "application/json" });
2414
+ res.end(JSON.stringify({
2415
+ status: "ok",
2416
+ version: VERSION,
2417
+ uptime: `${hours}h ${mins}m ${secs}s`,
2418
+ uptimeMs,
2419
+ providers,
2420
+ totalRuns: recentRuns.length > 0 ? Object.values(modelCounts).reduce((a, b) => a + b, 0) : 0
2421
+ }));
2422
+ return;
2423
+ }
2424
+ if (req.method === "GET" && pathname === "/stats") {
2425
+ const stats = relay.stats();
2426
+ const savings = relay.savingsReport(30);
2427
+ const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
2428
+ const modelDistribution = {};
2429
+ for (const [model, count] of Object.entries(modelCounts)) {
2430
+ modelDistribution[model] = {
2431
+ count,
2432
+ percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
2433
+ };
2434
+ }
2435
+ res.writeHead(200, { "Content-Type": "application/json" });
2436
+ res.end(JSON.stringify({
2437
+ totalRuns,
2438
+ savings: {
2439
+ estimatedSavingsPercent: savings.savingsPercent.toFixed(1) + "%",
2440
+ actualCostUsd: savings.actualCost.toFixed(4),
2441
+ baselineCostUsd: savings.baselineCost.toFixed(4),
2442
+ savedUsd: savings.savings.toFixed(4)
2443
+ },
2444
+ modelDistribution,
2445
+ byTaskType: stats.byTaskType,
2446
+ period: stats.period
2447
+ }));
2448
+ return;
2449
+ }
2450
+ if (req.method === "GET" && pathname === "/runs") {
2451
+ const limitParam = parsedUrl.query["limit"];
2452
+ const parsedLimit = limitParam ? parseInt(String(limitParam), 10) : 20;
2453
+ const limit = Math.min(Number.isNaN(parsedLimit) ? 20 : parsedLimit, MAX_RECENT_RUNS);
2454
+ res.writeHead(200, { "Content-Type": "application/json" });
2455
+ res.end(JSON.stringify({
2456
+ runs: recentRuns.slice(0, limit),
2457
+ total: recentRuns.length
2458
+ }));
2459
+ return;
2460
+ }
2461
+ if (req.method === "GET" && pathname.includes("/models")) {
2462
+ res.writeHead(200, { "Content-Type": "application/json" });
2463
+ res.end(
2464
+ JSON.stringify({
2465
+ object: "list",
2466
+ data: [
2467
+ { id: "relayplane:auto", object: "model", owned_by: "relayplane" },
2468
+ { id: "relayplane:cost", object: "model", owned_by: "relayplane" },
2469
+ { id: "relayplane:quality", object: "model", owned_by: "relayplane" }
2470
+ ]
2471
+ })
2472
+ );
2473
+ return;
2474
+ }
2475
+ if (req.method !== "POST" || !pathname.includes("/chat/completions")) {
2315
2476
  res.writeHead(404, { "Content-Type": "application/json" });
2316
2477
  res.end(JSON.stringify({ error: "Not found" }));
2317
2478
  return;
@@ -2358,33 +2519,44 @@ async function startProxy(config = {}) {
2358
2519
  const confidence = getInferenceConfidence(promptText, taskType);
2359
2520
  log(`Inferred task: ${taskType} (confidence: ${confidence.toFixed(2)})`);
2360
2521
  if (routingMode !== "passthrough") {
2361
- const rule = relay.routing.get(taskType);
2362
- if (rule && rule.preferredModel) {
2363
- const parsed = parsePreferredModel(rule.preferredModel);
2522
+ const configStrategy = getStrategy(currentConfig, taskType);
2523
+ if (configStrategy) {
2524
+ const parsed = parsePreferredModel(configStrategy.model);
2364
2525
  if (parsed) {
2365
2526
  targetProvider = parsed.provider;
2366
2527
  targetModel = parsed.model;
2367
- log(`Using learned rule: ${rule.preferredModel}`);
2528
+ log(`Using config strategy: ${configStrategy.model}`);
2529
+ }
2530
+ }
2531
+ if (!configStrategy) {
2532
+ const rule = relay.routing.get(taskType);
2533
+ if (rule && rule.preferredModel) {
2534
+ const parsed = parsePreferredModel(rule.preferredModel);
2535
+ if (parsed) {
2536
+ targetProvider = parsed.provider;
2537
+ targetModel = parsed.model;
2538
+ log(`Using learned rule: ${rule.preferredModel}`);
2539
+ } else {
2540
+ const defaultRoute = DEFAULT_ROUTING[taskType];
2541
+ targetProvider = defaultRoute.provider;
2542
+ targetModel = defaultRoute.model;
2543
+ }
2368
2544
  } else {
2369
2545
  const defaultRoute = DEFAULT_ROUTING[taskType];
2370
2546
  targetProvider = defaultRoute.provider;
2371
2547
  targetModel = defaultRoute.model;
2372
2548
  }
2373
- } else {
2374
- const defaultRoute = DEFAULT_ROUTING[taskType];
2375
- targetProvider = defaultRoute.provider;
2376
- targetModel = defaultRoute.model;
2377
2549
  }
2378
2550
  if (routingMode === "cost") {
2379
- const simpleTasks = ["summarization", "data_extraction", "translation", "question_answering"];
2380
- if (simpleTasks.includes(taskType)) {
2381
- targetModel = "claude-3-5-haiku-latest";
2382
- targetProvider = "anthropic";
2383
- }
2551
+ const costModel = currentConfig.defaults?.costModel || "claude-3-5-haiku-latest";
2552
+ targetModel = costModel;
2553
+ targetProvider = "anthropic";
2554
+ log(`Cost mode: using ${costModel}`);
2384
2555
  } else if (routingMode === "quality") {
2385
- const qualityModel = process.env["RELAYPLANE_QUALITY_MODEL"] || "claude-sonnet-4-20250514";
2556
+ const qualityModel = currentConfig.defaults?.qualityModel || process.env["RELAYPLANE_QUALITY_MODEL"] || "claude-sonnet-4-20250514";
2386
2557
  targetModel = qualityModel;
2387
2558
  targetProvider = "anthropic";
2559
+ log(`Quality mode: using ${qualityModel}`);
2388
2560
  }
2389
2561
  }
2390
2562
  log(`Routing to: ${targetProvider}/${targetModel}`);
@@ -2431,12 +2603,19 @@ async function startProxy(config = {}) {
2431
2603
  );
2432
2604
  }
2433
2605
  });
2606
+ watchConfig((newConfig) => {
2607
+ currentConfig = newConfig;
2608
+ console.log("[relayplane] Config reloaded");
2609
+ });
2434
2610
  return new Promise((resolve, reject) => {
2435
2611
  server.on("error", reject);
2436
2612
  server.listen(port, host, () => {
2613
+ serverStartTime = Date.now();
2437
2614
  console.log(`RelayPlane proxy listening on http://${host}:${port}`);
2438
2615
  console.log(` Models: relayplane:auto, relayplane:cost, relayplane:quality`);
2439
2616
  console.log(` Endpoint: POST /v1/chat/completions`);
2617
+ console.log(` Stats: GET /stats, /runs, /health`);
2618
+ console.log(` Config: ~/.relayplane/config.json (hot-reload enabled)`);
2440
2619
  console.log(` Streaming: \u2705 Enabled`);
2441
2620
  resolve(server);
2442
2621
  });
@@ -2499,11 +2678,26 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2499
2678
  log(`Streaming error: ${err}`);
2500
2679
  }
2501
2680
  const durationMs = Date.now() - startTime;
2681
+ const modelKey = `${targetProvider}/${targetModel}`;
2682
+ modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2502
2683
  relay.run({
2503
2684
  prompt: promptText.slice(0, 500),
2504
2685
  taskType,
2505
2686
  model: `${targetProvider}:${targetModel}`
2506
2687
  }).then((runResult) => {
2688
+ recentRuns.unshift({
2689
+ runId: runResult.runId,
2690
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2691
+ model: modelKey,
2692
+ taskType,
2693
+ confidence,
2694
+ mode: routingMode,
2695
+ durationMs,
2696
+ promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
2697
+ });
2698
+ if (recentRuns.length > MAX_RECENT_RUNS) {
2699
+ recentRuns.pop();
2700
+ }
2507
2701
  log(`Completed streaming in ${durationMs}ms, runId: ${runResult.runId}`);
2508
2702
  }).catch((err) => {
2509
2703
  log(`Failed to record run: ${err}`);
@@ -2574,15 +2768,30 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2574
2768
  return;
2575
2769
  }
2576
2770
  const durationMs = Date.now() - startTime;
2771
+ const modelKey = `${targetProvider}/${targetModel}`;
2772
+ modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2577
2773
  try {
2578
2774
  const runResult = await relay.run({
2579
2775
  prompt: promptText.slice(0, 500),
2580
2776
  taskType,
2581
2777
  model: `${targetProvider}:${targetModel}`
2582
2778
  });
2779
+ recentRuns.unshift({
2780
+ runId: runResult.runId,
2781
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2782
+ model: modelKey,
2783
+ taskType,
2784
+ confidence,
2785
+ mode: routingMode,
2786
+ durationMs,
2787
+ promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
2788
+ });
2789
+ if (recentRuns.length > MAX_RECENT_RUNS) {
2790
+ recentRuns.pop();
2791
+ }
2583
2792
  responseData["_relayplane"] = {
2584
2793
  runId: runResult.runId,
2585
- routedTo: `${targetProvider}/${targetModel}`,
2794
+ routedTo: modelKey,
2586
2795
  taskType,
2587
2796
  confidence,
2588
2797
  durationMs,