@relayplane/proxy 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -1071,6 +1071,19 @@ declare const ConfigSchema: z.ZodObject<{
1071
1071
  qualityModel?: string | undefined;
1072
1072
  costModel?: string | undefined;
1073
1073
  }>>;
1074
+ auth: z.ZodOptional<z.ZodObject<{
1075
+ anthropicApiKey: z.ZodOptional<z.ZodString>;
1076
+ anthropicMaxToken: z.ZodOptional<z.ZodString>;
1077
+ useMaxForModels: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
1078
+ }, "strip", z.ZodTypeAny, {
1079
+ anthropicApiKey?: string | undefined;
1080
+ anthropicMaxToken?: string | undefined;
1081
+ useMaxForModels?: string[] | undefined;
1082
+ }, {
1083
+ anthropicApiKey?: string | undefined;
1084
+ anthropicMaxToken?: string | undefined;
1085
+ useMaxForModels?: string[] | undefined;
1086
+ }>>;
1074
1087
  }, "strip", z.ZodTypeAny, {
1075
1088
  strategies?: Record<string, {
1076
1089
  model: string;
@@ -1081,6 +1094,11 @@ declare const ConfigSchema: z.ZodObject<{
1081
1094
  qualityModel?: string | undefined;
1082
1095
  costModel?: string | undefined;
1083
1096
  } | undefined;
1097
+ auth?: {
1098
+ anthropicApiKey?: string | undefined;
1099
+ anthropicMaxToken?: string | undefined;
1100
+ useMaxForModels?: string[] | undefined;
1101
+ } | undefined;
1084
1102
  }, {
1085
1103
  strategies?: Record<string, {
1086
1104
  model: string;
@@ -1091,6 +1109,11 @@ declare const ConfigSchema: z.ZodObject<{
1091
1109
  qualityModel?: string | undefined;
1092
1110
  costModel?: string | undefined;
1093
1111
  } | undefined;
1112
+ auth?: {
1113
+ anthropicApiKey?: string | undefined;
1114
+ anthropicMaxToken?: string | undefined;
1115
+ useMaxForModels?: string[] | undefined;
1116
+ } | undefined;
1094
1117
  }>;
1095
1118
  type StrategyConfig = z.infer<typeof StrategySchema>;
1096
1119
  type Config = z.infer<typeof ConfigSchema>;
package/dist/index.d.ts CHANGED
@@ -1071,6 +1071,19 @@ declare const ConfigSchema: z.ZodObject<{
1071
1071
  qualityModel?: string | undefined;
1072
1072
  costModel?: string | undefined;
1073
1073
  }>>;
1074
+ auth: z.ZodOptional<z.ZodObject<{
1075
+ anthropicApiKey: z.ZodOptional<z.ZodString>;
1076
+ anthropicMaxToken: z.ZodOptional<z.ZodString>;
1077
+ useMaxForModels: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
1078
+ }, "strip", z.ZodTypeAny, {
1079
+ anthropicApiKey?: string | undefined;
1080
+ anthropicMaxToken?: string | undefined;
1081
+ useMaxForModels?: string[] | undefined;
1082
+ }, {
1083
+ anthropicApiKey?: string | undefined;
1084
+ anthropicMaxToken?: string | undefined;
1085
+ useMaxForModels?: string[] | undefined;
1086
+ }>>;
1074
1087
  }, "strip", z.ZodTypeAny, {
1075
1088
  strategies?: Record<string, {
1076
1089
  model: string;
@@ -1081,6 +1094,11 @@ declare const ConfigSchema: z.ZodObject<{
1081
1094
  qualityModel?: string | undefined;
1082
1095
  costModel?: string | undefined;
1083
1096
  } | undefined;
1097
+ auth?: {
1098
+ anthropicApiKey?: string | undefined;
1099
+ anthropicMaxToken?: string | undefined;
1100
+ useMaxForModels?: string[] | undefined;
1101
+ } | undefined;
1084
1102
  }, {
1085
1103
  strategies?: Record<string, {
1086
1104
  model: string;
@@ -1091,6 +1109,11 @@ declare const ConfigSchema: z.ZodObject<{
1091
1109
  qualityModel?: string | undefined;
1092
1110
  costModel?: string | undefined;
1093
1111
  } | undefined;
1112
+ auth?: {
1113
+ anthropicApiKey?: string | undefined;
1114
+ anthropicMaxToken?: string | undefined;
1115
+ useMaxForModels?: string[] | undefined;
1116
+ } | undefined;
1094
1117
  }>;
1095
1118
  type StrategyConfig = z.infer<typeof StrategySchema>;
1096
1119
  type Config = z.infer<typeof ConfigSchema>;
package/dist/index.js CHANGED
@@ -1638,12 +1638,19 @@ var StrategySchema = import_zod.z.object({
1638
1638
  minConfidence: import_zod.z.number().min(0).max(1).optional(),
1639
1639
  fallback: import_zod.z.string().optional()
1640
1640
  });
1641
+ var AuthSchema = import_zod.z.object({
1642
+ anthropicApiKey: import_zod.z.string().optional(),
1643
+ anthropicMaxToken: import_zod.z.string().optional(),
1644
+ useMaxForModels: import_zod.z.array(import_zod.z.string()).optional()
1645
+ // Default: ['opus']
1646
+ }).optional();
1641
1647
  var ConfigSchema = import_zod.z.object({
1642
1648
  strategies: import_zod.z.record(import_zod.z.string(), StrategySchema).optional(),
1643
1649
  defaults: import_zod.z.object({
1644
1650
  qualityModel: import_zod.z.string().optional(),
1645
1651
  costModel: import_zod.z.string().optional()
1646
- }).optional()
1652
+ }).optional(),
1653
+ auth: AuthSchema
1647
1654
  });
1648
1655
  var DEFAULT_CONFIG = {
1649
1656
  strategies: {
@@ -1703,6 +1710,19 @@ function loadConfig() {
1703
1710
  function getStrategy(config, taskType) {
1704
1711
  return config.strategies?.[taskType] ?? null;
1705
1712
  }
1713
+ function getAnthropicAuth(config, model) {
1714
+ const auth = config.auth;
1715
+ const useMaxForModels = auth?.useMaxForModels ?? ["opus"];
1716
+ const shouldUseMax = useMaxForModels.some((m) => model.toLowerCase().includes(m.toLowerCase()));
1717
+ if (shouldUseMax && auth?.anthropicMaxToken) {
1718
+ return { type: "max", value: auth.anthropicMaxToken };
1719
+ }
1720
+ const apiKey = auth?.anthropicApiKey ?? process.env["ANTHROPIC_API_KEY"];
1721
+ if (apiKey) {
1722
+ return { type: "apiKey", value: apiKey };
1723
+ }
1724
+ return null;
1725
+ }
1706
1726
  function watchConfig(onChange) {
1707
1727
  const configPath = getConfigPath();
1708
1728
  const dir = path2.dirname(configPath);
@@ -1723,10 +1743,67 @@ function watchConfig(onChange) {
1723
1743
  }
1724
1744
 
1725
1745
  // src/proxy.ts
1726
- var VERSION = "0.1.8";
1746
+ var VERSION = "0.1.9";
1727
1747
  var recentRuns = [];
1728
1748
  var MAX_RECENT_RUNS = 100;
1729
1749
  var modelCounts = {};
1750
+ var tokenStats = {};
1751
+ var MODEL_PRICING2 = {
1752
+ // Anthropic
1753
+ "claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
1754
+ "claude-3-5-haiku-20241022": { input: 1, output: 5 },
1755
+ "claude-3-5-haiku-latest": { input: 1, output: 5 },
1756
+ "claude-3-5-sonnet-20241022": { input: 3, output: 15 },
1757
+ "claude-sonnet-4-20250514": { input: 3, output: 15 },
1758
+ "claude-3-opus-20240229": { input: 15, output: 75 },
1759
+ "claude-opus-4-5-20250514": { input: 15, output: 75 },
1760
+ // OpenAI
1761
+ "gpt-4o": { input: 2.5, output: 10 },
1762
+ "gpt-4o-mini": { input: 0.15, output: 0.6 },
1763
+ "gpt-4-turbo": { input: 10, output: 30 },
1764
+ // Defaults for unknown models
1765
+ "default-cheap": { input: 1, output: 5 },
1766
+ "default-expensive": { input: 15, output: 75 }
1767
+ };
1768
+ function trackTokens(model, inputTokens, outputTokens) {
1769
+ if (!tokenStats[model]) {
1770
+ tokenStats[model] = { inputTokens: 0, outputTokens: 0, requests: 0 };
1771
+ }
1772
+ tokenStats[model].inputTokens += inputTokens;
1773
+ tokenStats[model].outputTokens += outputTokens;
1774
+ tokenStats[model].requests += 1;
1775
+ }
1776
+ function calculateCosts() {
1777
+ let totalInputTokens = 0;
1778
+ let totalOutputTokens = 0;
1779
+ let actualCostUsd = 0;
1780
+ const byModel = {};
1781
+ for (const [model, stats] of Object.entries(tokenStats)) {
1782
+ totalInputTokens += stats.inputTokens;
1783
+ totalOutputTokens += stats.outputTokens;
1784
+ const pricing = MODEL_PRICING2[model] || MODEL_PRICING2["default-cheap"];
1785
+ const cost = stats.inputTokens / 1e6 * pricing.input + stats.outputTokens / 1e6 * pricing.output;
1786
+ actualCostUsd += cost;
1787
+ byModel[model] = {
1788
+ inputTokens: stats.inputTokens,
1789
+ outputTokens: stats.outputTokens,
1790
+ costUsd: parseFloat(cost.toFixed(4))
1791
+ };
1792
+ }
1793
+ const opusPricing = MODEL_PRICING2["claude-opus-4-5-20250514"];
1794
+ const opusCostUsd = totalInputTokens / 1e6 * opusPricing.input + totalOutputTokens / 1e6 * opusPricing.output;
1795
+ const savingsUsd = opusCostUsd - actualCostUsd;
1796
+ const savingsPercent = opusCostUsd > 0 ? (savingsUsd / opusCostUsd * 100).toFixed(1) + "%" : "0%";
1797
+ return {
1798
+ totalInputTokens,
1799
+ totalOutputTokens,
1800
+ actualCostUsd: parseFloat(actualCostUsd.toFixed(4)),
1801
+ opusCostUsd: parseFloat(opusCostUsd.toFixed(4)),
1802
+ savingsUsd: parseFloat(savingsUsd.toFixed(4)),
1803
+ savingsPercent,
1804
+ byModel
1805
+ };
1806
+ }
1730
1807
  var serverStartTime = 0;
1731
1808
  var currentConfig = loadConfig();
1732
1809
  var DEFAULT_ENDPOINTS = {
@@ -1791,13 +1868,17 @@ function extractPromptText(messages) {
1791
1868
  return "";
1792
1869
  }).join("\n");
1793
1870
  }
1794
- async function forwardToAnthropic(request, targetModel, apiKey, betaHeaders) {
1871
+ async function forwardToAnthropic(request, targetModel, auth, betaHeaders) {
1795
1872
  const anthropicBody = buildAnthropicBody(request, targetModel, false);
1796
1873
  const headers = {
1797
1874
  "Content-Type": "application/json",
1798
- "x-api-key": apiKey,
1799
1875
  "anthropic-version": "2023-06-01"
1800
1876
  };
1877
+ if (auth.type === "max") {
1878
+ headers["Authorization"] = `Bearer ${auth.value}`;
1879
+ } else {
1880
+ headers["x-api-key"] = auth.value;
1881
+ }
1801
1882
  if (betaHeaders) {
1802
1883
  headers["anthropic-beta"] = betaHeaders;
1803
1884
  }
@@ -1808,13 +1889,17 @@ async function forwardToAnthropic(request, targetModel, apiKey, betaHeaders) {
1808
1889
  });
1809
1890
  return response;
1810
1891
  }
1811
- async function forwardToAnthropicStream(request, targetModel, apiKey, betaHeaders) {
1892
+ async function forwardToAnthropicStream(request, targetModel, auth, betaHeaders) {
1812
1893
  const anthropicBody = buildAnthropicBody(request, targetModel, true);
1813
1894
  const headers = {
1814
1895
  "Content-Type": "application/json",
1815
- "x-api-key": apiKey,
1816
1896
  "anthropic-version": "2023-06-01"
1817
1897
  };
1898
+ if (auth.type === "max") {
1899
+ headers["Authorization"] = `Bearer ${auth.value}`;
1900
+ } else {
1901
+ headers["x-api-key"] = auth.value;
1902
+ }
1818
1903
  if (betaHeaders) {
1819
1904
  headers["anthropic-beta"] = betaHeaders;
1820
1905
  }
@@ -2346,6 +2431,7 @@ function convertAnthropicStreamEvent(eventType, eventData, messageId, model, too
2346
2431
  return null;
2347
2432
  }
2348
2433
  }
2434
+ var lastStreamingUsage = null;
2349
2435
  async function* convertAnthropicStream(response, model) {
2350
2436
  const reader = response.body?.getReader();
2351
2437
  if (!reader) {
@@ -2358,6 +2444,8 @@ async function* convertAnthropicStream(response, model) {
2358
2444
  currentToolIndex: 0,
2359
2445
  tools: /* @__PURE__ */ new Map()
2360
2446
  };
2447
+ let streamInputTokens = 0;
2448
+ let streamOutputTokens = 0;
2361
2449
  try {
2362
2450
  while (true) {
2363
2451
  const { done, value } = await reader.read();
@@ -2375,6 +2463,17 @@ async function* convertAnthropicStream(response, model) {
2375
2463
  } else if (line === "" && eventType && eventData) {
2376
2464
  try {
2377
2465
  const parsed = JSON.parse(eventData);
2466
+ if (eventType === "message_start") {
2467
+ const msg = parsed["message"];
2468
+ if (msg?.usage?.input_tokens) {
2469
+ streamInputTokens = msg.usage.input_tokens;
2470
+ }
2471
+ } else if (eventType === "message_delta") {
2472
+ const usage = parsed["usage"];
2473
+ if (usage?.output_tokens) {
2474
+ streamOutputTokens = usage.output_tokens;
2475
+ }
2476
+ }
2378
2477
  const converted = convertAnthropicStreamEvent(eventType, parsed, messageId, model, toolState);
2379
2478
  if (converted) {
2380
2479
  yield converted;
@@ -2386,6 +2485,7 @@ async function* convertAnthropicStream(response, model) {
2386
2485
  }
2387
2486
  }
2388
2487
  }
2488
+ lastStreamingUsage = { inputTokens: streamInputTokens, outputTokens: streamOutputTokens };
2389
2489
  } finally {
2390
2490
  reader.releaseLock();
2391
2491
  }
@@ -2483,23 +2583,32 @@ async function startProxy(config = {}) {
2483
2583
  }
2484
2584
  if (req.method === "GET" && pathname === "/stats") {
2485
2585
  const stats = relay.stats();
2486
- const savings = relay.savingsReport(30);
2586
+ const costs = calculateCosts();
2487
2587
  const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
2488
2588
  const modelDistribution = {};
2489
2589
  for (const [model, count] of Object.entries(modelCounts)) {
2590
+ const modelName = model.split("/")[1] || model;
2591
+ const tokenData = costs.byModel[modelName];
2490
2592
  modelDistribution[model] = {
2491
2593
  count,
2492
- percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
2594
+ percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%",
2595
+ tokens: tokenData ? { input: tokenData.inputTokens, output: tokenData.outputTokens } : void 0,
2596
+ costUsd: tokenData?.costUsd
2493
2597
  };
2494
2598
  }
2495
2599
  res.writeHead(200, { "Content-Type": "application/json" });
2496
2600
  res.end(JSON.stringify({
2497
2601
  totalRuns,
2498
- savings: {
2499
- estimatedSavingsPercent: savings.savingsPercent.toFixed(1) + "%",
2500
- actualCostUsd: savings.actualCost.toFixed(4),
2501
- baselineCostUsd: savings.baselineCost.toFixed(4),
2502
- savedUsd: savings.savings.toFixed(4)
2602
+ tokens: {
2603
+ input: costs.totalInputTokens,
2604
+ output: costs.totalOutputTokens,
2605
+ total: costs.totalInputTokens + costs.totalOutputTokens
2606
+ },
2607
+ costs: {
2608
+ actualUsd: costs.actualCostUsd,
2609
+ opusBaselineUsd: costs.opusCostUsd,
2610
+ savingsUsd: costs.savingsUsd,
2611
+ savingsPercent: costs.savingsPercent
2503
2612
  },
2504
2613
  modelDistribution,
2505
2614
  byTaskType: stats.byTaskType,
@@ -2620,12 +2729,24 @@ async function startProxy(config = {}) {
2620
2729
  }
2621
2730
  }
2622
2731
  log(`Routing to: ${targetProvider}/${targetModel}`);
2623
- const apiKeyEnv = DEFAULT_ENDPOINTS[targetProvider]?.apiKeyEnv ?? `${targetProvider.toUpperCase()}_API_KEY`;
2624
- const apiKey = process.env[apiKeyEnv];
2625
- if (!apiKey) {
2626
- res.writeHead(500, { "Content-Type": "application/json" });
2627
- res.end(JSON.stringify({ error: `Missing ${apiKeyEnv} environment variable` }));
2628
- return;
2732
+ let apiKey;
2733
+ let anthropicAuth = null;
2734
+ if (targetProvider === "anthropic") {
2735
+ anthropicAuth = getAnthropicAuth(currentConfig, targetModel);
2736
+ if (!anthropicAuth) {
2737
+ res.writeHead(500, { "Content-Type": "application/json" });
2738
+ res.end(JSON.stringify({ error: "No Anthropic auth configured (set ANTHROPIC_API_KEY or config.auth.anthropicMaxToken)" }));
2739
+ return;
2740
+ }
2741
+ log(`Using ${anthropicAuth.type === "max" ? "MAX token" : "API key"} auth for ${targetModel}`);
2742
+ } else {
2743
+ const apiKeyEnv = DEFAULT_ENDPOINTS[targetProvider]?.apiKeyEnv ?? `${targetProvider.toUpperCase()}_API_KEY`;
2744
+ apiKey = process.env[apiKeyEnv];
2745
+ if (!apiKey) {
2746
+ res.writeHead(500, { "Content-Type": "application/json" });
2747
+ res.end(JSON.stringify({ error: `Missing ${apiKeyEnv} environment variable` }));
2748
+ return;
2749
+ }
2629
2750
  }
2630
2751
  const startTime = Date.now();
2631
2752
  const betaHeaders = req.headers["anthropic-beta"];
@@ -2636,6 +2757,7 @@ async function startProxy(config = {}) {
2636
2757
  targetProvider,
2637
2758
  targetModel,
2638
2759
  apiKey,
2760
+ anthropicAuth,
2639
2761
  relay,
2640
2762
  promptText,
2641
2763
  taskType,
@@ -2652,6 +2774,7 @@ async function startProxy(config = {}) {
2652
2774
  targetProvider,
2653
2775
  targetModel,
2654
2776
  apiKey,
2777
+ anthropicAuth,
2655
2778
  relay,
2656
2779
  promptText,
2657
2780
  taskType,
@@ -2681,12 +2804,13 @@ async function startProxy(config = {}) {
2681
2804
  });
2682
2805
  });
2683
2806
  }
2684
- async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2807
+ async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2685
2808
  let providerResponse;
2686
2809
  try {
2687
2810
  switch (targetProvider) {
2688
2811
  case "anthropic":
2689
- providerResponse = await forwardToAnthropicStream(request, targetModel, apiKey, betaHeaders);
2812
+ if (!anthropicAuth) throw new Error("No Anthropic auth");
2813
+ providerResponse = await forwardToAnthropicStream(request, targetModel, anthropicAuth, betaHeaders);
2690
2814
  break;
2691
2815
  case "google":
2692
2816
  providerResponse = await forwardToGeminiStream(request, targetModel, apiKey);
@@ -2740,6 +2864,11 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2740
2864
  const durationMs = Date.now() - startTime;
2741
2865
  const modelKey = `${targetProvider}/${targetModel}`;
2742
2866
  modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2867
+ if (lastStreamingUsage && (lastStreamingUsage.inputTokens > 0 || lastStreamingUsage.outputTokens > 0)) {
2868
+ trackTokens(targetModel, lastStreamingUsage.inputTokens, lastStreamingUsage.outputTokens);
2869
+ log(`Tokens: ${lastStreamingUsage.inputTokens} in, ${lastStreamingUsage.outputTokens} out`);
2870
+ lastStreamingUsage = null;
2871
+ }
2743
2872
  relay.run({
2744
2873
  prompt: promptText.slice(0, 500),
2745
2874
  taskType,
@@ -2764,13 +2893,14 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2764
2893
  });
2765
2894
  res.end();
2766
2895
  }
2767
- async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2896
+ async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2768
2897
  let providerResponse;
2769
2898
  let responseData;
2770
2899
  try {
2771
2900
  switch (targetProvider) {
2772
2901
  case "anthropic": {
2773
- providerResponse = await forwardToAnthropic(request, targetModel, apiKey, betaHeaders);
2902
+ if (!anthropicAuth) throw new Error("No Anthropic auth");
2903
+ providerResponse = await forwardToAnthropic(request, targetModel, anthropicAuth, betaHeaders);
2774
2904
  const rawData = await providerResponse.json();
2775
2905
  if (!providerResponse.ok) {
2776
2906
  res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
@@ -2830,6 +2960,11 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2830
2960
  const durationMs = Date.now() - startTime;
2831
2961
  const modelKey = `${targetProvider}/${targetModel}`;
2832
2962
  modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2963
+ const usage = responseData["usage"];
2964
+ if (usage?.prompt_tokens || usage?.completion_tokens) {
2965
+ trackTokens(targetModel, usage.prompt_tokens ?? 0, usage.completion_tokens ?? 0);
2966
+ log(`Tokens: ${usage.prompt_tokens ?? 0} in, ${usage.completion_tokens ?? 0} out`);
2967
+ }
2833
2968
  try {
2834
2969
  const runResult = await relay.run({
2835
2970
  prompt: promptText.slice(0, 500),