@relayplane/proxy 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1570,12 +1570,127 @@ ${input.prompt}` : input.prompt;
1570
1570
  }
1571
1571
  };
1572
1572
 
1573
+ // src/config.ts
1574
+ import * as fs2 from "fs";
1575
+ import * as path2 from "path";
1576
+ import * as os2 from "os";
1577
+ import { z } from "zod";
1578
+ var StrategySchema = z.object({
1579
+ model: z.string(),
1580
+ minConfidence: z.number().min(0).max(1).optional(),
1581
+ fallback: z.string().optional()
1582
+ });
1583
+ var AuthSchema = z.object({
1584
+ anthropicApiKey: z.string().optional(),
1585
+ anthropicMaxToken: z.string().optional(),
1586
+ useMaxForModels: z.array(z.string()).optional()
1587
+ // Default: ['opus']
1588
+ }).optional();
1589
+ var ConfigSchema = z.object({
1590
+ strategies: z.record(z.string(), StrategySchema).optional(),
1591
+ defaults: z.object({
1592
+ qualityModel: z.string().optional(),
1593
+ costModel: z.string().optional()
1594
+ }).optional(),
1595
+ auth: AuthSchema
1596
+ });
1597
+ var DEFAULT_CONFIG = {
1598
+ strategies: {
1599
+ code_review: { model: "anthropic:claude-sonnet-4-20250514" },
1600
+ code_generation: { model: "anthropic:claude-3-5-haiku-latest" },
1601
+ analysis: { model: "anthropic:claude-sonnet-4-20250514" },
1602
+ summarization: { model: "anthropic:claude-3-5-haiku-latest" },
1603
+ creative_writing: { model: "anthropic:claude-sonnet-4-20250514" },
1604
+ data_extraction: { model: "anthropic:claude-3-5-haiku-latest" },
1605
+ translation: { model: "anthropic:claude-3-5-haiku-latest" },
1606
+ question_answering: { model: "anthropic:claude-3-5-haiku-latest" },
1607
+ general: { model: "anthropic:claude-3-5-haiku-latest" }
1608
+ },
1609
+ defaults: {
1610
+ qualityModel: "claude-sonnet-4-20250514",
1611
+ costModel: "claude-3-5-haiku-latest"
1612
+ }
1613
+ };
1614
+ function getConfigPath() {
1615
+ return path2.join(os2.homedir(), ".relayplane", "config.json");
1616
+ }
1617
+ function writeDefaultConfig() {
1618
+ const configPath = getConfigPath();
1619
+ const dir = path2.dirname(configPath);
1620
+ if (!fs2.existsSync(dir)) {
1621
+ fs2.mkdirSync(dir, { recursive: true });
1622
+ }
1623
+ if (!fs2.existsSync(configPath)) {
1624
+ fs2.writeFileSync(
1625
+ configPath,
1626
+ JSON.stringify(DEFAULT_CONFIG, null, 2) + "\n",
1627
+ "utf-8"
1628
+ );
1629
+ console.log(`[relayplane] Created default config at ${configPath}`);
1630
+ }
1631
+ }
1632
+ function loadConfig() {
1633
+ const configPath = getConfigPath();
1634
+ writeDefaultConfig();
1635
+ try {
1636
+ const raw = fs2.readFileSync(configPath, "utf-8");
1637
+ const parsed = JSON.parse(raw);
1638
+ const validated = ConfigSchema.parse(parsed);
1639
+ return validated;
1640
+ } catch (err) {
1641
+ if (err instanceof z.ZodError) {
1642
+ console.error(`[relayplane] Invalid config: ${err.message}`);
1643
+ } else if (err instanceof SyntaxError) {
1644
+ console.error(`[relayplane] Config JSON parse error: ${err.message}`);
1645
+ } else {
1646
+ console.error(`[relayplane] Failed to load config: ${err}`);
1647
+ }
1648
+ console.log("[relayplane] Using default config");
1649
+ return DEFAULT_CONFIG;
1650
+ }
1651
+ }
1652
+ function getStrategy(config, taskType) {
1653
+ return config.strategies?.[taskType] ?? null;
1654
+ }
1655
+ function getAnthropicAuth(config, model) {
1656
+ const auth = config.auth;
1657
+ const useMaxForModels = auth?.useMaxForModels ?? ["opus"];
1658
+ const shouldUseMax = useMaxForModels.some((m) => model.toLowerCase().includes(m.toLowerCase()));
1659
+ if (shouldUseMax && auth?.anthropicMaxToken) {
1660
+ return { type: "max", value: auth.anthropicMaxToken };
1661
+ }
1662
+ const apiKey = auth?.anthropicApiKey ?? process.env["ANTHROPIC_API_KEY"];
1663
+ if (apiKey) {
1664
+ return { type: "apiKey", value: apiKey };
1665
+ }
1666
+ return null;
1667
+ }
1668
+ function watchConfig(onChange) {
1669
+ const configPath = getConfigPath();
1670
+ const dir = path2.dirname(configPath);
1671
+ if (!fs2.existsSync(dir)) {
1672
+ fs2.mkdirSync(dir, { recursive: true });
1673
+ }
1674
+ let debounceTimer = null;
1675
+ fs2.watch(dir, (eventType, filename) => {
1676
+ if (filename === "config.json") {
1677
+ if (debounceTimer) clearTimeout(debounceTimer);
1678
+ debounceTimer = setTimeout(() => {
1679
+ console.log("[relayplane] Config file changed, reloading...");
1680
+ const newConfig = loadConfig();
1681
+ onChange(newConfig);
1682
+ }, 100);
1683
+ }
1684
+ });
1685
+ }
1686
+
1573
1687
  // src/proxy.ts
1574
- var VERSION = "0.1.7";
1688
+ var VERSION = "0.1.9";
1575
1689
  var recentRuns = [];
1576
1690
  var MAX_RECENT_RUNS = 100;
1577
1691
  var modelCounts = {};
1578
1692
  var serverStartTime = 0;
1693
+ var currentConfig = loadConfig();
1579
1694
  var DEFAULT_ENDPOINTS = {
1580
1695
  anthropic: {
1581
1696
  baseUrl: "https://api.anthropic.com/v1",
@@ -1638,13 +1753,17 @@ function extractPromptText(messages) {
1638
1753
  return "";
1639
1754
  }).join("\n");
1640
1755
  }
1641
- async function forwardToAnthropic(request, targetModel, apiKey, betaHeaders) {
1756
+ async function forwardToAnthropic(request, targetModel, auth, betaHeaders) {
1642
1757
  const anthropicBody = buildAnthropicBody(request, targetModel, false);
1643
1758
  const headers = {
1644
1759
  "Content-Type": "application/json",
1645
- "x-api-key": apiKey,
1646
1760
  "anthropic-version": "2023-06-01"
1647
1761
  };
1762
+ if (auth.type === "max") {
1763
+ headers["Authorization"] = `Bearer ${auth.value}`;
1764
+ } else {
1765
+ headers["x-api-key"] = auth.value;
1766
+ }
1648
1767
  if (betaHeaders) {
1649
1768
  headers["anthropic-beta"] = betaHeaders;
1650
1769
  }
@@ -1655,13 +1774,17 @@ async function forwardToAnthropic(request, targetModel, apiKey, betaHeaders) {
1655
1774
  });
1656
1775
  return response;
1657
1776
  }
1658
- async function forwardToAnthropicStream(request, targetModel, apiKey, betaHeaders) {
1777
+ async function forwardToAnthropicStream(request, targetModel, auth, betaHeaders) {
1659
1778
  const anthropicBody = buildAnthropicBody(request, targetModel, true);
1660
1779
  const headers = {
1661
1780
  "Content-Type": "application/json",
1662
- "x-api-key": apiKey,
1663
1781
  "anthropic-version": "2023-06-01"
1664
1782
  };
1783
+ if (auth.type === "max") {
1784
+ headers["Authorization"] = `Bearer ${auth.value}`;
1785
+ } else {
1786
+ headers["x-api-key"] = auth.value;
1787
+ }
1665
1788
  if (betaHeaders) {
1666
1789
  headers["anthropic-beta"] = betaHeaders;
1667
1790
  }
@@ -2426,42 +2549,65 @@ async function startProxy(config = {}) {
2426
2549
  const confidence = getInferenceConfidence(promptText, taskType);
2427
2550
  log(`Inferred task: ${taskType} (confidence: ${confidence.toFixed(2)})`);
2428
2551
  if (routingMode !== "passthrough") {
2429
- const rule = relay.routing.get(taskType);
2430
- if (rule && rule.preferredModel) {
2431
- const parsed = parsePreferredModel(rule.preferredModel);
2552
+ const configStrategy = getStrategy(currentConfig, taskType);
2553
+ if (configStrategy) {
2554
+ const parsed = parsePreferredModel(configStrategy.model);
2432
2555
  if (parsed) {
2433
2556
  targetProvider = parsed.provider;
2434
2557
  targetModel = parsed.model;
2435
- log(`Using learned rule: ${rule.preferredModel}`);
2558
+ log(`Using config strategy: ${configStrategy.model}`);
2559
+ }
2560
+ }
2561
+ if (!configStrategy) {
2562
+ const rule = relay.routing.get(taskType);
2563
+ if (rule && rule.preferredModel) {
2564
+ const parsed = parsePreferredModel(rule.preferredModel);
2565
+ if (parsed) {
2566
+ targetProvider = parsed.provider;
2567
+ targetModel = parsed.model;
2568
+ log(`Using learned rule: ${rule.preferredModel}`);
2569
+ } else {
2570
+ const defaultRoute = DEFAULT_ROUTING[taskType];
2571
+ targetProvider = defaultRoute.provider;
2572
+ targetModel = defaultRoute.model;
2573
+ }
2436
2574
  } else {
2437
2575
  const defaultRoute = DEFAULT_ROUTING[taskType];
2438
2576
  targetProvider = defaultRoute.provider;
2439
2577
  targetModel = defaultRoute.model;
2440
2578
  }
2441
- } else {
2442
- const defaultRoute = DEFAULT_ROUTING[taskType];
2443
- targetProvider = defaultRoute.provider;
2444
- targetModel = defaultRoute.model;
2445
2579
  }
2446
2580
  if (routingMode === "cost") {
2447
- const simpleTasks = ["summarization", "data_extraction", "translation", "question_answering"];
2448
- if (simpleTasks.includes(taskType)) {
2449
- targetModel = "claude-3-5-haiku-latest";
2450
- targetProvider = "anthropic";
2451
- }
2581
+ const costModel = currentConfig.defaults?.costModel || "claude-3-5-haiku-latest";
2582
+ targetModel = costModel;
2583
+ targetProvider = "anthropic";
2584
+ log(`Cost mode: using ${costModel}`);
2452
2585
  } else if (routingMode === "quality") {
2453
- const qualityModel = process.env["RELAYPLANE_QUALITY_MODEL"] || "claude-sonnet-4-20250514";
2586
+ const qualityModel = currentConfig.defaults?.qualityModel || process.env["RELAYPLANE_QUALITY_MODEL"] || "claude-sonnet-4-20250514";
2454
2587
  targetModel = qualityModel;
2455
2588
  targetProvider = "anthropic";
2589
+ log(`Quality mode: using ${qualityModel}`);
2456
2590
  }
2457
2591
  }
2458
2592
  log(`Routing to: ${targetProvider}/${targetModel}`);
2459
- const apiKeyEnv = DEFAULT_ENDPOINTS[targetProvider]?.apiKeyEnv ?? `${targetProvider.toUpperCase()}_API_KEY`;
2460
- const apiKey = process.env[apiKeyEnv];
2461
- if (!apiKey) {
2462
- res.writeHead(500, { "Content-Type": "application/json" });
2463
- res.end(JSON.stringify({ error: `Missing ${apiKeyEnv} environment variable` }));
2464
- return;
2593
+ let apiKey;
2594
+ let anthropicAuth = null;
2595
+ if (targetProvider === "anthropic") {
2596
+ anthropicAuth = getAnthropicAuth(currentConfig, targetModel);
2597
+ if (!anthropicAuth) {
2598
+ res.writeHead(500, { "Content-Type": "application/json" });
2599
+ res.end(JSON.stringify({ error: "No Anthropic auth configured (set ANTHROPIC_API_KEY or config.auth.anthropicMaxToken)" }));
2600
+ return;
2601
+ }
2602
+ log(`Using ${anthropicAuth.type === "max" ? "MAX token" : "API key"} auth for ${targetModel}`);
2603
+ } else {
2604
+ const apiKeyEnv = DEFAULT_ENDPOINTS[targetProvider]?.apiKeyEnv ?? `${targetProvider.toUpperCase()}_API_KEY`;
2605
+ apiKey = process.env[apiKeyEnv];
2606
+ if (!apiKey) {
2607
+ res.writeHead(500, { "Content-Type": "application/json" });
2608
+ res.end(JSON.stringify({ error: `Missing ${apiKeyEnv} environment variable` }));
2609
+ return;
2610
+ }
2465
2611
  }
2466
2612
  const startTime = Date.now();
2467
2613
  const betaHeaders = req.headers["anthropic-beta"];
@@ -2472,6 +2618,7 @@ async function startProxy(config = {}) {
2472
2618
  targetProvider,
2473
2619
  targetModel,
2474
2620
  apiKey,
2621
+ anthropicAuth,
2475
2622
  relay,
2476
2623
  promptText,
2477
2624
  taskType,
@@ -2488,6 +2635,7 @@ async function startProxy(config = {}) {
2488
2635
  targetProvider,
2489
2636
  targetModel,
2490
2637
  apiKey,
2638
+ anthropicAuth,
2491
2639
  relay,
2492
2640
  promptText,
2493
2641
  taskType,
@@ -2499,6 +2647,10 @@ async function startProxy(config = {}) {
2499
2647
  );
2500
2648
  }
2501
2649
  });
2650
+ watchConfig((newConfig) => {
2651
+ currentConfig = newConfig;
2652
+ console.log("[relayplane] Config reloaded");
2653
+ });
2502
2654
  return new Promise((resolve, reject) => {
2503
2655
  server.on("error", reject);
2504
2656
  server.listen(port, host, () => {
@@ -2507,17 +2659,19 @@ async function startProxy(config = {}) {
2507
2659
  console.log(` Models: relayplane:auto, relayplane:cost, relayplane:quality`);
2508
2660
  console.log(` Endpoint: POST /v1/chat/completions`);
2509
2661
  console.log(` Stats: GET /stats, /runs, /health`);
2662
+ console.log(` Config: ~/.relayplane/config.json (hot-reload enabled)`);
2510
2663
  console.log(` Streaming: \u2705 Enabled`);
2511
2664
  resolve(server);
2512
2665
  });
2513
2666
  });
2514
2667
  }
2515
- async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2668
+ async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2516
2669
  let providerResponse;
2517
2670
  try {
2518
2671
  switch (targetProvider) {
2519
2672
  case "anthropic":
2520
- providerResponse = await forwardToAnthropicStream(request, targetModel, apiKey, betaHeaders);
2673
+ if (!anthropicAuth) throw new Error("No Anthropic auth");
2674
+ providerResponse = await forwardToAnthropicStream(request, targetModel, anthropicAuth, betaHeaders);
2521
2675
  break;
2522
2676
  case "google":
2523
2677
  providerResponse = await forwardToGeminiStream(request, targetModel, apiKey);
@@ -2595,13 +2749,14 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2595
2749
  });
2596
2750
  res.end();
2597
2751
  }
2598
- async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2752
+ async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2599
2753
  let providerResponse;
2600
2754
  let responseData;
2601
2755
  try {
2602
2756
  switch (targetProvider) {
2603
2757
  case "anthropic": {
2604
- providerResponse = await forwardToAnthropic(request, targetModel, apiKey, betaHeaders);
2758
+ if (!anthropicAuth) throw new Error("No Anthropic auth");
2759
+ providerResponse = await forwardToAnthropic(request, targetModel, anthropicAuth, betaHeaders);
2605
2760
  const rawData = await providerResponse.json();
2606
2761
  if (!providerResponse.ok) {
2607
2762
  res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
@@ -2697,7 +2852,7 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2697
2852
  }
2698
2853
 
2699
2854
  // src/types.ts
2700
- import { z } from "zod";
2855
+ import { z as z2 } from "zod";
2701
2856
  var TaskTypes = [
2702
2857
  "code_generation",
2703
2858
  "code_review",
@@ -2709,63 +2864,64 @@ var TaskTypes = [
2709
2864
  "question_answering",
2710
2865
  "general"
2711
2866
  ];
2712
- var TaskTypeSchema = z.enum(TaskTypes);
2867
+ var TaskTypeSchema = z2.enum(TaskTypes);
2713
2868
  var Providers = ["openai", "anthropic", "google", "xai", "moonshot", "local"];
2714
- var ProviderSchema = z.enum(Providers);
2715
- var RelayPlaneConfigSchema = z.object({
2716
- dbPath: z.string().optional(),
2717
- providers: z.record(ProviderSchema, z.object({
2718
- apiKey: z.string().optional(),
2719
- baseUrl: z.string().optional()
2869
+ var ProviderSchema = z2.enum(Providers);
2870
+ var RelayPlaneConfigSchema = z2.object({
2871
+ dbPath: z2.string().optional(),
2872
+ providers: z2.record(ProviderSchema, z2.object({
2873
+ apiKey: z2.string().optional(),
2874
+ baseUrl: z2.string().optional()
2720
2875
  })).optional(),
2721
2876
  defaultProvider: ProviderSchema.optional(),
2722
- defaultModel: z.string().optional()
2877
+ defaultModel: z2.string().optional()
2723
2878
  });
2724
- var RunInputSchema = z.object({
2725
- prompt: z.string().min(1),
2726
- systemPrompt: z.string().optional(),
2879
+ var RunInputSchema = z2.object({
2880
+ prompt: z2.string().min(1),
2881
+ systemPrompt: z2.string().optional(),
2727
2882
  taskType: TaskTypeSchema.optional(),
2728
- model: z.string().optional(),
2729
- metadata: z.record(z.unknown()).optional()
2883
+ model: z2.string().optional(),
2884
+ metadata: z2.record(z2.unknown()).optional()
2730
2885
  });
2731
2886
  var RuleSources = ["default", "user", "learned"];
2732
- var RoutingRuleSchema = z.object({
2733
- id: z.string(),
2887
+ var RoutingRuleSchema = z2.object({
2888
+ id: z2.string(),
2734
2889
  taskType: TaskTypeSchema,
2735
- preferredModel: z.string(),
2736
- source: z.enum(RuleSources),
2737
- confidence: z.number().min(0).max(1).optional(),
2738
- sampleCount: z.number().int().positive().optional(),
2739
- createdAt: z.string(),
2740
- updatedAt: z.string()
2890
+ preferredModel: z2.string(),
2891
+ source: z2.enum(RuleSources),
2892
+ confidence: z2.number().min(0).max(1).optional(),
2893
+ sampleCount: z2.number().int().positive().optional(),
2894
+ createdAt: z2.string(),
2895
+ updatedAt: z2.string()
2741
2896
  });
2742
2897
  var OutcomeQualities = ["excellent", "good", "acceptable", "poor", "failed"];
2743
- var OutcomeInputSchema = z.object({
2744
- runId: z.string().min(1),
2745
- success: z.boolean(),
2746
- quality: z.enum(OutcomeQualities).optional(),
2747
- latencySatisfactory: z.boolean().optional(),
2748
- costSatisfactory: z.boolean().optional(),
2749
- feedback: z.string().optional()
2898
+ var OutcomeInputSchema = z2.object({
2899
+ runId: z2.string().min(1),
2900
+ success: z2.boolean(),
2901
+ quality: z2.enum(OutcomeQualities).optional(),
2902
+ latencySatisfactory: z2.boolean().optional(),
2903
+ costSatisfactory: z2.boolean().optional(),
2904
+ feedback: z2.string().optional()
2750
2905
  });
2751
- var SuggestionSchema = z.object({
2752
- id: z.string(),
2906
+ var SuggestionSchema = z2.object({
2907
+ id: z2.string(),
2753
2908
  taskType: TaskTypeSchema,
2754
- currentModel: z.string(),
2755
- suggestedModel: z.string(),
2756
- reason: z.string(),
2757
- confidence: z.number().min(0).max(1),
2758
- expectedImprovement: z.object({
2759
- successRate: z.number().optional(),
2760
- latency: z.number().optional(),
2761
- cost: z.number().optional()
2909
+ currentModel: z2.string(),
2910
+ suggestedModel: z2.string(),
2911
+ reason: z2.string(),
2912
+ confidence: z2.number().min(0).max(1),
2913
+ expectedImprovement: z2.object({
2914
+ successRate: z2.number().optional(),
2915
+ latency: z2.number().optional(),
2916
+ cost: z2.number().optional()
2762
2917
  }),
2763
- sampleCount: z.number().int().positive(),
2764
- createdAt: z.string(),
2765
- accepted: z.boolean().optional(),
2766
- acceptedAt: z.string().optional()
2918
+ sampleCount: z2.number().int().positive(),
2919
+ createdAt: z2.string(),
2920
+ accepted: z2.boolean().optional(),
2921
+ acceptedAt: z2.string().optional()
2767
2922
  });
2768
2923
  export {
2924
+ DEFAULT_CONFIG,
2769
2925
  DEFAULT_ENDPOINTS,
2770
2926
  MODEL_MAPPING,
2771
2927
  MODEL_PRICING,
@@ -2780,9 +2936,13 @@ export {
2780
2936
  TaskTypes,
2781
2937
  calculateCost,
2782
2938
  calculateSavings,
2939
+ getConfigPath,
2783
2940
  getInferenceConfidence,
2784
2941
  getModelPricing,
2942
+ getStrategy,
2785
2943
  inferTaskType,
2786
- startProxy
2944
+ loadConfig,
2945
+ startProxy,
2946
+ watchConfig
2787
2947
  };
2788
2948
  //# sourceMappingURL=index.mjs.map