@relayplane/proxy 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,5 +1,6 @@
1
1
  // src/proxy.ts
2
2
  import * as http from "http";
3
+ import * as url from "url";
3
4
 
4
5
  // src/storage/store.ts
5
6
  import Database from "better-sqlite3";
@@ -1569,7 +1570,107 @@ ${input.prompt}` : input.prompt;
1569
1570
  }
1570
1571
  };
1571
1572
 
1573
+ // src/config.ts
1574
+ import * as fs2 from "fs";
1575
+ import * as path2 from "path";
1576
+ import * as os2 from "os";
1577
+ import { z } from "zod";
1578
+ var StrategySchema = z.object({
1579
+ model: z.string(),
1580
+ minConfidence: z.number().min(0).max(1).optional(),
1581
+ fallback: z.string().optional()
1582
+ });
1583
+ var ConfigSchema = z.object({
1584
+ strategies: z.record(z.string(), StrategySchema).optional(),
1585
+ defaults: z.object({
1586
+ qualityModel: z.string().optional(),
1587
+ costModel: z.string().optional()
1588
+ }).optional()
1589
+ });
1590
+ var DEFAULT_CONFIG = {
1591
+ strategies: {
1592
+ code_review: { model: "anthropic:claude-sonnet-4-20250514" },
1593
+ code_generation: { model: "anthropic:claude-3-5-haiku-latest" },
1594
+ analysis: { model: "anthropic:claude-sonnet-4-20250514" },
1595
+ summarization: { model: "anthropic:claude-3-5-haiku-latest" },
1596
+ creative_writing: { model: "anthropic:claude-sonnet-4-20250514" },
1597
+ data_extraction: { model: "anthropic:claude-3-5-haiku-latest" },
1598
+ translation: { model: "anthropic:claude-3-5-haiku-latest" },
1599
+ question_answering: { model: "anthropic:claude-3-5-haiku-latest" },
1600
+ general: { model: "anthropic:claude-3-5-haiku-latest" }
1601
+ },
1602
+ defaults: {
1603
+ qualityModel: "claude-sonnet-4-20250514",
1604
+ costModel: "claude-3-5-haiku-latest"
1605
+ }
1606
+ };
1607
+ function getConfigPath() {
1608
+ return path2.join(os2.homedir(), ".relayplane", "config.json");
1609
+ }
1610
+ function writeDefaultConfig() {
1611
+ const configPath = getConfigPath();
1612
+ const dir = path2.dirname(configPath);
1613
+ if (!fs2.existsSync(dir)) {
1614
+ fs2.mkdirSync(dir, { recursive: true });
1615
+ }
1616
+ if (!fs2.existsSync(configPath)) {
1617
+ fs2.writeFileSync(
1618
+ configPath,
1619
+ JSON.stringify(DEFAULT_CONFIG, null, 2) + "\n",
1620
+ "utf-8"
1621
+ );
1622
+ console.log(`[relayplane] Created default config at ${configPath}`);
1623
+ }
1624
+ }
1625
+ function loadConfig() {
1626
+ const configPath = getConfigPath();
1627
+ writeDefaultConfig();
1628
+ try {
1629
+ const raw = fs2.readFileSync(configPath, "utf-8");
1630
+ const parsed = JSON.parse(raw);
1631
+ const validated = ConfigSchema.parse(parsed);
1632
+ return validated;
1633
+ } catch (err) {
1634
+ if (err instanceof z.ZodError) {
1635
+ console.error(`[relayplane] Invalid config: ${err.message}`);
1636
+ } else if (err instanceof SyntaxError) {
1637
+ console.error(`[relayplane] Config JSON parse error: ${err.message}`);
1638
+ } else {
1639
+ console.error(`[relayplane] Failed to load config: ${err}`);
1640
+ }
1641
+ console.log("[relayplane] Using default config");
1642
+ return DEFAULT_CONFIG;
1643
+ }
1644
+ }
1645
+ function getStrategy(config, taskType) {
1646
+ return config.strategies?.[taskType] ?? null;
1647
+ }
1648
+ function watchConfig(onChange) {
1649
+ const configPath = getConfigPath();
1650
+ const dir = path2.dirname(configPath);
1651
+ if (!fs2.existsSync(dir)) {
1652
+ fs2.mkdirSync(dir, { recursive: true });
1653
+ }
1654
+ let debounceTimer = null;
1655
+ fs2.watch(dir, (eventType, filename) => {
1656
+ if (filename === "config.json") {
1657
+ if (debounceTimer) clearTimeout(debounceTimer);
1658
+ debounceTimer = setTimeout(() => {
1659
+ console.log("[relayplane] Config file changed, reloading...");
1660
+ const newConfig = loadConfig();
1661
+ onChange(newConfig);
1662
+ }, 100);
1663
+ }
1664
+ });
1665
+ }
1666
+
1572
1667
  // src/proxy.ts
1668
+ var VERSION = "0.1.8";
1669
+ var recentRuns = [];
1670
+ var MAX_RECENT_RUNS = 100;
1671
+ var modelCounts = {};
1672
+ var serverStartTime = 0;
1673
+ var currentConfig = loadConfig();
1573
1674
  var DEFAULT_ENDPOINTS = {
1574
1675
  anthropic: {
1575
1676
  baseUrl: "https://api.anthropic.com/v1",
@@ -1866,9 +1967,9 @@ function convertMessagesToGemini(messages) {
1866
1967
  return { text: p.text };
1867
1968
  }
1868
1969
  if (p.type === "image_url" && p.image_url?.url) {
1869
- const url = p.image_url.url;
1870
- if (url.startsWith("data:")) {
1871
- const match = url.match(/^data:([^;]+);base64,(.+)$/);
1970
+ const url2 = p.image_url.url;
1971
+ if (url2.startsWith("data:")) {
1972
+ const match = url2.match(/^data:([^;]+);base64,(.+)$/);
1872
1973
  if (match) {
1873
1974
  return {
1874
1975
  inline_data: {
@@ -1878,7 +1979,7 @@ function convertMessagesToGemini(messages) {
1878
1979
  };
1879
1980
  }
1880
1981
  }
1881
- return { text: `[Image: ${url}]` };
1982
+ return { text: `[Image: ${url2}]` };
1882
1983
  }
1883
1984
  return { text: "" };
1884
1985
  });
@@ -2292,28 +2393,88 @@ async function startProxy(config = {}) {
2292
2393
  };
2293
2394
  const server = http.createServer(async (req, res) => {
2294
2395
  res.setHeader("Access-Control-Allow-Origin", "*");
2295
- res.setHeader("Access-Control-Allow-Methods", "POST, OPTIONS");
2396
+ res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
2296
2397
  res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
2297
2398
  if (req.method === "OPTIONS") {
2298
2399
  res.writeHead(204);
2299
2400
  res.end();
2300
2401
  return;
2301
2402
  }
2302
- if (req.method !== "POST" || !req.url?.includes("/chat/completions")) {
2303
- if (req.method === "GET" && req.url?.includes("/models")) {
2304
- res.writeHead(200, { "Content-Type": "application/json" });
2305
- res.end(
2306
- JSON.stringify({
2307
- object: "list",
2308
- data: [
2309
- { id: "relayplane:auto", object: "model", owned_by: "relayplane" },
2310
- { id: "relayplane:cost", object: "model", owned_by: "relayplane" },
2311
- { id: "relayplane:quality", object: "model", owned_by: "relayplane" }
2312
- ]
2313
- })
2314
- );
2315
- return;
2403
+ const parsedUrl = url.parse(req.url || "", true);
2404
+ const pathname = parsedUrl.pathname || "";
2405
+ if (req.method === "GET" && pathname === "/health") {
2406
+ const uptimeMs = Date.now() - serverStartTime;
2407
+ const uptimeSecs = Math.floor(uptimeMs / 1e3);
2408
+ const hours = Math.floor(uptimeSecs / 3600);
2409
+ const mins = Math.floor(uptimeSecs % 3600 / 60);
2410
+ const secs = uptimeSecs % 60;
2411
+ const providers = {};
2412
+ for (const [name, config2] of Object.entries(DEFAULT_ENDPOINTS)) {
2413
+ providers[name] = !!process.env[config2.apiKeyEnv];
2414
+ }
2415
+ res.writeHead(200, { "Content-Type": "application/json" });
2416
+ res.end(JSON.stringify({
2417
+ status: "ok",
2418
+ version: VERSION,
2419
+ uptime: `${hours}h ${mins}m ${secs}s`,
2420
+ uptimeMs,
2421
+ providers,
2422
+ totalRuns: recentRuns.length > 0 ? Object.values(modelCounts).reduce((a, b) => a + b, 0) : 0
2423
+ }));
2424
+ return;
2425
+ }
2426
+ if (req.method === "GET" && pathname === "/stats") {
2427
+ const stats = relay.stats();
2428
+ const savings = relay.savingsReport(30);
2429
+ const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
2430
+ const modelDistribution = {};
2431
+ for (const [model, count] of Object.entries(modelCounts)) {
2432
+ modelDistribution[model] = {
2433
+ count,
2434
+ percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
2435
+ };
2316
2436
  }
2437
+ res.writeHead(200, { "Content-Type": "application/json" });
2438
+ res.end(JSON.stringify({
2439
+ totalRuns,
2440
+ savings: {
2441
+ estimatedSavingsPercent: savings.savingsPercent.toFixed(1) + "%",
2442
+ actualCostUsd: savings.actualCost.toFixed(4),
2443
+ baselineCostUsd: savings.baselineCost.toFixed(4),
2444
+ savedUsd: savings.savings.toFixed(4)
2445
+ },
2446
+ modelDistribution,
2447
+ byTaskType: stats.byTaskType,
2448
+ period: stats.period
2449
+ }));
2450
+ return;
2451
+ }
2452
+ if (req.method === "GET" && pathname === "/runs") {
2453
+ const limitParam = parsedUrl.query["limit"];
2454
+ const parsedLimit = limitParam ? parseInt(String(limitParam), 10) : 20;
2455
+ const limit = Math.min(Number.isNaN(parsedLimit) ? 20 : parsedLimit, MAX_RECENT_RUNS);
2456
+ res.writeHead(200, { "Content-Type": "application/json" });
2457
+ res.end(JSON.stringify({
2458
+ runs: recentRuns.slice(0, limit),
2459
+ total: recentRuns.length
2460
+ }));
2461
+ return;
2462
+ }
2463
+ if (req.method === "GET" && pathname.includes("/models")) {
2464
+ res.writeHead(200, { "Content-Type": "application/json" });
2465
+ res.end(
2466
+ JSON.stringify({
2467
+ object: "list",
2468
+ data: [
2469
+ { id: "relayplane:auto", object: "model", owned_by: "relayplane" },
2470
+ { id: "relayplane:cost", object: "model", owned_by: "relayplane" },
2471
+ { id: "relayplane:quality", object: "model", owned_by: "relayplane" }
2472
+ ]
2473
+ })
2474
+ );
2475
+ return;
2476
+ }
2477
+ if (req.method !== "POST" || !pathname.includes("/chat/completions")) {
2317
2478
  res.writeHead(404, { "Content-Type": "application/json" });
2318
2479
  res.end(JSON.stringify({ error: "Not found" }));
2319
2480
  return;
@@ -2360,33 +2521,44 @@ async function startProxy(config = {}) {
2360
2521
  const confidence = getInferenceConfidence(promptText, taskType);
2361
2522
  log(`Inferred task: ${taskType} (confidence: ${confidence.toFixed(2)})`);
2362
2523
  if (routingMode !== "passthrough") {
2363
- const rule = relay.routing.get(taskType);
2364
- if (rule && rule.preferredModel) {
2365
- const parsed = parsePreferredModel(rule.preferredModel);
2524
+ const configStrategy = getStrategy(currentConfig, taskType);
2525
+ if (configStrategy) {
2526
+ const parsed = parsePreferredModel(configStrategy.model);
2366
2527
  if (parsed) {
2367
2528
  targetProvider = parsed.provider;
2368
2529
  targetModel = parsed.model;
2369
- log(`Using learned rule: ${rule.preferredModel}`);
2530
+ log(`Using config strategy: ${configStrategy.model}`);
2531
+ }
2532
+ }
2533
+ if (!configStrategy) {
2534
+ const rule = relay.routing.get(taskType);
2535
+ if (rule && rule.preferredModel) {
2536
+ const parsed = parsePreferredModel(rule.preferredModel);
2537
+ if (parsed) {
2538
+ targetProvider = parsed.provider;
2539
+ targetModel = parsed.model;
2540
+ log(`Using learned rule: ${rule.preferredModel}`);
2541
+ } else {
2542
+ const defaultRoute = DEFAULT_ROUTING[taskType];
2543
+ targetProvider = defaultRoute.provider;
2544
+ targetModel = defaultRoute.model;
2545
+ }
2370
2546
  } else {
2371
2547
  const defaultRoute = DEFAULT_ROUTING[taskType];
2372
2548
  targetProvider = defaultRoute.provider;
2373
2549
  targetModel = defaultRoute.model;
2374
2550
  }
2375
- } else {
2376
- const defaultRoute = DEFAULT_ROUTING[taskType];
2377
- targetProvider = defaultRoute.provider;
2378
- targetModel = defaultRoute.model;
2379
2551
  }
2380
2552
  if (routingMode === "cost") {
2381
- const simpleTasks = ["summarization", "data_extraction", "translation", "question_answering"];
2382
- if (simpleTasks.includes(taskType)) {
2383
- targetModel = "claude-3-5-haiku-latest";
2384
- targetProvider = "anthropic";
2385
- }
2553
+ const costModel = currentConfig.defaults?.costModel || "claude-3-5-haiku-latest";
2554
+ targetModel = costModel;
2555
+ targetProvider = "anthropic";
2556
+ log(`Cost mode: using ${costModel}`);
2386
2557
  } else if (routingMode === "quality") {
2387
- const qualityModel = process.env["RELAYPLANE_QUALITY_MODEL"] || "claude-sonnet-4-20250514";
2558
+ const qualityModel = currentConfig.defaults?.qualityModel || process.env["RELAYPLANE_QUALITY_MODEL"] || "claude-sonnet-4-20250514";
2388
2559
  targetModel = qualityModel;
2389
2560
  targetProvider = "anthropic";
2561
+ log(`Quality mode: using ${qualityModel}`);
2390
2562
  }
2391
2563
  }
2392
2564
  log(`Routing to: ${targetProvider}/${targetModel}`);
@@ -2433,12 +2605,19 @@ async function startProxy(config = {}) {
2433
2605
  );
2434
2606
  }
2435
2607
  });
2608
+ watchConfig((newConfig) => {
2609
+ currentConfig = newConfig;
2610
+ console.log("[relayplane] Config reloaded");
2611
+ });
2436
2612
  return new Promise((resolve, reject) => {
2437
2613
  server.on("error", reject);
2438
2614
  server.listen(port, host, () => {
2615
+ serverStartTime = Date.now();
2439
2616
  console.log(`RelayPlane proxy listening on http://${host}:${port}`);
2440
2617
  console.log(` Models: relayplane:auto, relayplane:cost, relayplane:quality`);
2441
2618
  console.log(` Endpoint: POST /v1/chat/completions`);
2619
+ console.log(` Stats: GET /stats, /runs, /health`);
2620
+ console.log(` Config: ~/.relayplane/config.json (hot-reload enabled)`);
2442
2621
  console.log(` Streaming: \u2705 Enabled`);
2443
2622
  resolve(server);
2444
2623
  });
@@ -2501,11 +2680,26 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2501
2680
  log(`Streaming error: ${err}`);
2502
2681
  }
2503
2682
  const durationMs = Date.now() - startTime;
2683
+ const modelKey = `${targetProvider}/${targetModel}`;
2684
+ modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2504
2685
  relay.run({
2505
2686
  prompt: promptText.slice(0, 500),
2506
2687
  taskType,
2507
2688
  model: `${targetProvider}:${targetModel}`
2508
2689
  }).then((runResult) => {
2690
+ recentRuns.unshift({
2691
+ runId: runResult.runId,
2692
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2693
+ model: modelKey,
2694
+ taskType,
2695
+ confidence,
2696
+ mode: routingMode,
2697
+ durationMs,
2698
+ promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
2699
+ });
2700
+ if (recentRuns.length > MAX_RECENT_RUNS) {
2701
+ recentRuns.pop();
2702
+ }
2509
2703
  log(`Completed streaming in ${durationMs}ms, runId: ${runResult.runId}`);
2510
2704
  }).catch((err) => {
2511
2705
  log(`Failed to record run: ${err}`);
@@ -2576,15 +2770,30 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2576
2770
  return;
2577
2771
  }
2578
2772
  const durationMs = Date.now() - startTime;
2773
+ const modelKey = `${targetProvider}/${targetModel}`;
2774
+ modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2579
2775
  try {
2580
2776
  const runResult = await relay.run({
2581
2777
  prompt: promptText.slice(0, 500),
2582
2778
  taskType,
2583
2779
  model: `${targetProvider}:${targetModel}`
2584
2780
  });
2781
+ recentRuns.unshift({
2782
+ runId: runResult.runId,
2783
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2784
+ model: modelKey,
2785
+ taskType,
2786
+ confidence,
2787
+ mode: routingMode,
2788
+ durationMs,
2789
+ promptPreview: promptText.slice(0, 100) + (promptText.length > 100 ? "..." : "")
2790
+ });
2791
+ if (recentRuns.length > MAX_RECENT_RUNS) {
2792
+ recentRuns.pop();
2793
+ }
2585
2794
  responseData["_relayplane"] = {
2586
2795
  runId: runResult.runId,
2587
- routedTo: `${targetProvider}/${targetModel}`,
2796
+ routedTo: modelKey,
2588
2797
  taskType,
2589
2798
  confidence,
2590
2799
  durationMs,
@@ -2599,7 +2808,7 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2599
2808
  }
2600
2809
 
2601
2810
  // src/types.ts
2602
- import { z } from "zod";
2811
+ import { z as z2 } from "zod";
2603
2812
  var TaskTypes = [
2604
2813
  "code_generation",
2605
2814
  "code_review",
@@ -2611,63 +2820,64 @@ var TaskTypes = [
2611
2820
  "question_answering",
2612
2821
  "general"
2613
2822
  ];
2614
- var TaskTypeSchema = z.enum(TaskTypes);
2823
+ var TaskTypeSchema = z2.enum(TaskTypes);
2615
2824
  var Providers = ["openai", "anthropic", "google", "xai", "moonshot", "local"];
2616
- var ProviderSchema = z.enum(Providers);
2617
- var RelayPlaneConfigSchema = z.object({
2618
- dbPath: z.string().optional(),
2619
- providers: z.record(ProviderSchema, z.object({
2620
- apiKey: z.string().optional(),
2621
- baseUrl: z.string().optional()
2825
+ var ProviderSchema = z2.enum(Providers);
2826
+ var RelayPlaneConfigSchema = z2.object({
2827
+ dbPath: z2.string().optional(),
2828
+ providers: z2.record(ProviderSchema, z2.object({
2829
+ apiKey: z2.string().optional(),
2830
+ baseUrl: z2.string().optional()
2622
2831
  })).optional(),
2623
2832
  defaultProvider: ProviderSchema.optional(),
2624
- defaultModel: z.string().optional()
2833
+ defaultModel: z2.string().optional()
2625
2834
  });
2626
- var RunInputSchema = z.object({
2627
- prompt: z.string().min(1),
2628
- systemPrompt: z.string().optional(),
2835
+ var RunInputSchema = z2.object({
2836
+ prompt: z2.string().min(1),
2837
+ systemPrompt: z2.string().optional(),
2629
2838
  taskType: TaskTypeSchema.optional(),
2630
- model: z.string().optional(),
2631
- metadata: z.record(z.unknown()).optional()
2839
+ model: z2.string().optional(),
2840
+ metadata: z2.record(z2.unknown()).optional()
2632
2841
  });
2633
2842
  var RuleSources = ["default", "user", "learned"];
2634
- var RoutingRuleSchema = z.object({
2635
- id: z.string(),
2843
+ var RoutingRuleSchema = z2.object({
2844
+ id: z2.string(),
2636
2845
  taskType: TaskTypeSchema,
2637
- preferredModel: z.string(),
2638
- source: z.enum(RuleSources),
2639
- confidence: z.number().min(0).max(1).optional(),
2640
- sampleCount: z.number().int().positive().optional(),
2641
- createdAt: z.string(),
2642
- updatedAt: z.string()
2846
+ preferredModel: z2.string(),
2847
+ source: z2.enum(RuleSources),
2848
+ confidence: z2.number().min(0).max(1).optional(),
2849
+ sampleCount: z2.number().int().positive().optional(),
2850
+ createdAt: z2.string(),
2851
+ updatedAt: z2.string()
2643
2852
  });
2644
2853
  var OutcomeQualities = ["excellent", "good", "acceptable", "poor", "failed"];
2645
- var OutcomeInputSchema = z.object({
2646
- runId: z.string().min(1),
2647
- success: z.boolean(),
2648
- quality: z.enum(OutcomeQualities).optional(),
2649
- latencySatisfactory: z.boolean().optional(),
2650
- costSatisfactory: z.boolean().optional(),
2651
- feedback: z.string().optional()
2854
+ var OutcomeInputSchema = z2.object({
2855
+ runId: z2.string().min(1),
2856
+ success: z2.boolean(),
2857
+ quality: z2.enum(OutcomeQualities).optional(),
2858
+ latencySatisfactory: z2.boolean().optional(),
2859
+ costSatisfactory: z2.boolean().optional(),
2860
+ feedback: z2.string().optional()
2652
2861
  });
2653
- var SuggestionSchema = z.object({
2654
- id: z.string(),
2862
+ var SuggestionSchema = z2.object({
2863
+ id: z2.string(),
2655
2864
  taskType: TaskTypeSchema,
2656
- currentModel: z.string(),
2657
- suggestedModel: z.string(),
2658
- reason: z.string(),
2659
- confidence: z.number().min(0).max(1),
2660
- expectedImprovement: z.object({
2661
- successRate: z.number().optional(),
2662
- latency: z.number().optional(),
2663
- cost: z.number().optional()
2865
+ currentModel: z2.string(),
2866
+ suggestedModel: z2.string(),
2867
+ reason: z2.string(),
2868
+ confidence: z2.number().min(0).max(1),
2869
+ expectedImprovement: z2.object({
2870
+ successRate: z2.number().optional(),
2871
+ latency: z2.number().optional(),
2872
+ cost: z2.number().optional()
2664
2873
  }),
2665
- sampleCount: z.number().int().positive(),
2666
- createdAt: z.string(),
2667
- accepted: z.boolean().optional(),
2668
- acceptedAt: z.string().optional()
2874
+ sampleCount: z2.number().int().positive(),
2875
+ createdAt: z2.string(),
2876
+ accepted: z2.boolean().optional(),
2877
+ acceptedAt: z2.string().optional()
2669
2878
  });
2670
2879
  export {
2880
+ DEFAULT_CONFIG,
2671
2881
  DEFAULT_ENDPOINTS,
2672
2882
  MODEL_MAPPING,
2673
2883
  MODEL_PRICING,
@@ -2682,9 +2892,13 @@ export {
2682
2892
  TaskTypes,
2683
2893
  calculateCost,
2684
2894
  calculateSavings,
2895
+ getConfigPath,
2685
2896
  getInferenceConfidence,
2686
2897
  getModelPricing,
2898
+ getStrategy,
2687
2899
  inferTaskType,
2688
- startProxy
2900
+ loadConfig,
2901
+ startProxy,
2902
+ watchConfig
2689
2903
  };
2690
2904
  //# sourceMappingURL=index.mjs.map