@relayplane/proxy 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -1578,12 +1578,19 @@ var StrategySchema = z.object({
1578
1578
  minConfidence: z.number().min(0).max(1).optional(),
1579
1579
  fallback: z.string().optional()
1580
1580
  });
1581
+ var AuthSchema = z.object({
1582
+ anthropicApiKey: z.string().optional(),
1583
+ anthropicMaxToken: z.string().optional(),
1584
+ useMaxForModels: z.array(z.string()).optional()
1585
+ // Default: ['opus']
1586
+ }).optional();
1581
1587
  var ConfigSchema = z.object({
1582
1588
  strategies: z.record(z.string(), StrategySchema).optional(),
1583
1589
  defaults: z.object({
1584
1590
  qualityModel: z.string().optional(),
1585
1591
  costModel: z.string().optional()
1586
- }).optional()
1592
+ }).optional(),
1593
+ auth: AuthSchema
1587
1594
  });
1588
1595
  var DEFAULT_CONFIG = {
1589
1596
  strategies: {
@@ -1643,6 +1650,19 @@ function loadConfig() {
1643
1650
  function getStrategy(config, taskType) {
1644
1651
  return config.strategies?.[taskType] ?? null;
1645
1652
  }
1653
+ function getAnthropicAuth(config, model) {
1654
+ const auth = config.auth;
1655
+ const useMaxForModels = auth?.useMaxForModels ?? ["opus"];
1656
+ const shouldUseMax = useMaxForModels.some((m) => model.toLowerCase().includes(m.toLowerCase()));
1657
+ if (shouldUseMax && auth?.anthropicMaxToken) {
1658
+ return { type: "max", value: auth.anthropicMaxToken };
1659
+ }
1660
+ const apiKey = auth?.anthropicApiKey ?? process.env["ANTHROPIC_API_KEY"];
1661
+ if (apiKey) {
1662
+ return { type: "apiKey", value: apiKey };
1663
+ }
1664
+ return null;
1665
+ }
1646
1666
  function watchConfig(onChange) {
1647
1667
  const configPath = getConfigPath();
1648
1668
  const dir = path2.dirname(configPath);
@@ -1663,10 +1683,67 @@ function watchConfig(onChange) {
1663
1683
  }
1664
1684
 
1665
1685
  // src/proxy.ts
1666
- var VERSION = "0.1.8";
1686
+ var VERSION = "0.1.9";
1667
1687
  var recentRuns = [];
1668
1688
  var MAX_RECENT_RUNS = 100;
1669
1689
  var modelCounts = {};
1690
+ var tokenStats = {};
1691
+ var MODEL_PRICING2 = {
1692
+ // Anthropic
1693
+ "claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
1694
+ "claude-3-5-haiku-20241022": { input: 1, output: 5 },
1695
+ "claude-3-5-haiku-latest": { input: 1, output: 5 },
1696
+ "claude-3-5-sonnet-20241022": { input: 3, output: 15 },
1697
+ "claude-sonnet-4-20250514": { input: 3, output: 15 },
1698
+ "claude-3-opus-20240229": { input: 15, output: 75 },
1699
+ "claude-opus-4-5-20250514": { input: 15, output: 75 },
1700
+ // OpenAI
1701
+ "gpt-4o": { input: 2.5, output: 10 },
1702
+ "gpt-4o-mini": { input: 0.15, output: 0.6 },
1703
+ "gpt-4-turbo": { input: 10, output: 30 },
1704
+ // Defaults for unknown models
1705
+ "default-cheap": { input: 1, output: 5 },
1706
+ "default-expensive": { input: 15, output: 75 }
1707
+ };
1708
+ function trackTokens(model, inputTokens, outputTokens) {
1709
+ if (!tokenStats[model]) {
1710
+ tokenStats[model] = { inputTokens: 0, outputTokens: 0, requests: 0 };
1711
+ }
1712
+ tokenStats[model].inputTokens += inputTokens;
1713
+ tokenStats[model].outputTokens += outputTokens;
1714
+ tokenStats[model].requests += 1;
1715
+ }
1716
+ function calculateCosts() {
1717
+ let totalInputTokens = 0;
1718
+ let totalOutputTokens = 0;
1719
+ let actualCostUsd = 0;
1720
+ const byModel = {};
1721
+ for (const [model, stats] of Object.entries(tokenStats)) {
1722
+ totalInputTokens += stats.inputTokens;
1723
+ totalOutputTokens += stats.outputTokens;
1724
+ const pricing = MODEL_PRICING2[model] || MODEL_PRICING2["default-cheap"];
1725
+ const cost = stats.inputTokens / 1e6 * pricing.input + stats.outputTokens / 1e6 * pricing.output;
1726
+ actualCostUsd += cost;
1727
+ byModel[model] = {
1728
+ inputTokens: stats.inputTokens,
1729
+ outputTokens: stats.outputTokens,
1730
+ costUsd: parseFloat(cost.toFixed(4))
1731
+ };
1732
+ }
1733
+ const opusPricing = MODEL_PRICING2["claude-opus-4-5-20250514"];
1734
+ const opusCostUsd = totalInputTokens / 1e6 * opusPricing.input + totalOutputTokens / 1e6 * opusPricing.output;
1735
+ const savingsUsd = opusCostUsd - actualCostUsd;
1736
+ const savingsPercent = opusCostUsd > 0 ? (savingsUsd / opusCostUsd * 100).toFixed(1) + "%" : "0%";
1737
+ return {
1738
+ totalInputTokens,
1739
+ totalOutputTokens,
1740
+ actualCostUsd: parseFloat(actualCostUsd.toFixed(4)),
1741
+ opusCostUsd: parseFloat(opusCostUsd.toFixed(4)),
1742
+ savingsUsd: parseFloat(savingsUsd.toFixed(4)),
1743
+ savingsPercent,
1744
+ byModel
1745
+ };
1746
+ }
1670
1747
  var serverStartTime = 0;
1671
1748
  var currentConfig = loadConfig();
1672
1749
  var DEFAULT_ENDPOINTS = {
@@ -1731,13 +1808,17 @@ function extractPromptText(messages) {
1731
1808
  return "";
1732
1809
  }).join("\n");
1733
1810
  }
1734
- async function forwardToAnthropic(request, targetModel, apiKey, betaHeaders) {
1811
+ async function forwardToAnthropic(request, targetModel, auth, betaHeaders) {
1735
1812
  const anthropicBody = buildAnthropicBody(request, targetModel, false);
1736
1813
  const headers = {
1737
1814
  "Content-Type": "application/json",
1738
- "x-api-key": apiKey,
1739
1815
  "anthropic-version": "2023-06-01"
1740
1816
  };
1817
+ if (auth.type === "max") {
1818
+ headers["Authorization"] = `Bearer ${auth.value}`;
1819
+ } else {
1820
+ headers["x-api-key"] = auth.value;
1821
+ }
1741
1822
  if (betaHeaders) {
1742
1823
  headers["anthropic-beta"] = betaHeaders;
1743
1824
  }
@@ -1748,13 +1829,17 @@ async function forwardToAnthropic(request, targetModel, apiKey, betaHeaders) {
1748
1829
  });
1749
1830
  return response;
1750
1831
  }
1751
- async function forwardToAnthropicStream(request, targetModel, apiKey, betaHeaders) {
1832
+ async function forwardToAnthropicStream(request, targetModel, auth, betaHeaders) {
1752
1833
  const anthropicBody = buildAnthropicBody(request, targetModel, true);
1753
1834
  const headers = {
1754
1835
  "Content-Type": "application/json",
1755
- "x-api-key": apiKey,
1756
1836
  "anthropic-version": "2023-06-01"
1757
1837
  };
1838
+ if (auth.type === "max") {
1839
+ headers["Authorization"] = `Bearer ${auth.value}`;
1840
+ } else {
1841
+ headers["x-api-key"] = auth.value;
1842
+ }
1758
1843
  if (betaHeaders) {
1759
1844
  headers["anthropic-beta"] = betaHeaders;
1760
1845
  }
@@ -2286,6 +2371,7 @@ function convertAnthropicStreamEvent(eventType, eventData, messageId, model, too
2286
2371
  return null;
2287
2372
  }
2288
2373
  }
2374
+ var lastStreamingUsage = null;
2289
2375
  async function* convertAnthropicStream(response, model) {
2290
2376
  const reader = response.body?.getReader();
2291
2377
  if (!reader) {
@@ -2298,6 +2384,8 @@ async function* convertAnthropicStream(response, model) {
2298
2384
  currentToolIndex: 0,
2299
2385
  tools: /* @__PURE__ */ new Map()
2300
2386
  };
2387
+ let streamInputTokens = 0;
2388
+ let streamOutputTokens = 0;
2301
2389
  try {
2302
2390
  while (true) {
2303
2391
  const { done, value } = await reader.read();
@@ -2315,6 +2403,17 @@ async function* convertAnthropicStream(response, model) {
2315
2403
  } else if (line === "" && eventType && eventData) {
2316
2404
  try {
2317
2405
  const parsed = JSON.parse(eventData);
2406
+ if (eventType === "message_start") {
2407
+ const msg = parsed["message"];
2408
+ if (msg?.usage?.input_tokens) {
2409
+ streamInputTokens = msg.usage.input_tokens;
2410
+ }
2411
+ } else if (eventType === "message_delta") {
2412
+ const usage = parsed["usage"];
2413
+ if (usage?.output_tokens) {
2414
+ streamOutputTokens = usage.output_tokens;
2415
+ }
2416
+ }
2318
2417
  const converted = convertAnthropicStreamEvent(eventType, parsed, messageId, model, toolState);
2319
2418
  if (converted) {
2320
2419
  yield converted;
@@ -2326,6 +2425,7 @@ async function* convertAnthropicStream(response, model) {
2326
2425
  }
2327
2426
  }
2328
2427
  }
2428
+ lastStreamingUsage = { inputTokens: streamInputTokens, outputTokens: streamOutputTokens };
2329
2429
  } finally {
2330
2430
  reader.releaseLock();
2331
2431
  }
@@ -2423,23 +2523,32 @@ async function startProxy(config = {}) {
2423
2523
  }
2424
2524
  if (req.method === "GET" && pathname === "/stats") {
2425
2525
  const stats = relay.stats();
2426
- const savings = relay.savingsReport(30);
2526
+ const costs = calculateCosts();
2427
2527
  const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
2428
2528
  const modelDistribution = {};
2429
2529
  for (const [model, count] of Object.entries(modelCounts)) {
2530
+ const modelName = model.split("/")[1] || model;
2531
+ const tokenData = costs.byModel[modelName];
2430
2532
  modelDistribution[model] = {
2431
2533
  count,
2432
- percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
2534
+ percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%",
2535
+ tokens: tokenData ? { input: tokenData.inputTokens, output: tokenData.outputTokens } : void 0,
2536
+ costUsd: tokenData?.costUsd
2433
2537
  };
2434
2538
  }
2435
2539
  res.writeHead(200, { "Content-Type": "application/json" });
2436
2540
  res.end(JSON.stringify({
2437
2541
  totalRuns,
2438
- savings: {
2439
- estimatedSavingsPercent: savings.savingsPercent.toFixed(1) + "%",
2440
- actualCostUsd: savings.actualCost.toFixed(4),
2441
- baselineCostUsd: savings.baselineCost.toFixed(4),
2442
- savedUsd: savings.savings.toFixed(4)
2542
+ tokens: {
2543
+ input: costs.totalInputTokens,
2544
+ output: costs.totalOutputTokens,
2545
+ total: costs.totalInputTokens + costs.totalOutputTokens
2546
+ },
2547
+ costs: {
2548
+ actualUsd: costs.actualCostUsd,
2549
+ opusBaselineUsd: costs.opusCostUsd,
2550
+ savingsUsd: costs.savingsUsd,
2551
+ savingsPercent: costs.savingsPercent
2443
2552
  },
2444
2553
  modelDistribution,
2445
2554
  byTaskType: stats.byTaskType,
@@ -2560,12 +2669,24 @@ async function startProxy(config = {}) {
2560
2669
  }
2561
2670
  }
2562
2671
  log(`Routing to: ${targetProvider}/${targetModel}`);
2563
- const apiKeyEnv = DEFAULT_ENDPOINTS[targetProvider]?.apiKeyEnv ?? `${targetProvider.toUpperCase()}_API_KEY`;
2564
- const apiKey = process.env[apiKeyEnv];
2565
- if (!apiKey) {
2566
- res.writeHead(500, { "Content-Type": "application/json" });
2567
- res.end(JSON.stringify({ error: `Missing ${apiKeyEnv} environment variable` }));
2568
- return;
2672
+ let apiKey;
2673
+ let anthropicAuth = null;
2674
+ if (targetProvider === "anthropic") {
2675
+ anthropicAuth = getAnthropicAuth(currentConfig, targetModel);
2676
+ if (!anthropicAuth) {
2677
+ res.writeHead(500, { "Content-Type": "application/json" });
2678
+ res.end(JSON.stringify({ error: "No Anthropic auth configured (set ANTHROPIC_API_KEY or config.auth.anthropicMaxToken)" }));
2679
+ return;
2680
+ }
2681
+ log(`Using ${anthropicAuth.type === "max" ? "MAX token" : "API key"} auth for ${targetModel}`);
2682
+ } else {
2683
+ const apiKeyEnv = DEFAULT_ENDPOINTS[targetProvider]?.apiKeyEnv ?? `${targetProvider.toUpperCase()}_API_KEY`;
2684
+ apiKey = process.env[apiKeyEnv];
2685
+ if (!apiKey) {
2686
+ res.writeHead(500, { "Content-Type": "application/json" });
2687
+ res.end(JSON.stringify({ error: `Missing ${apiKeyEnv} environment variable` }));
2688
+ return;
2689
+ }
2569
2690
  }
2570
2691
  const startTime = Date.now();
2571
2692
  const betaHeaders = req.headers["anthropic-beta"];
@@ -2576,6 +2697,7 @@ async function startProxy(config = {}) {
2576
2697
  targetProvider,
2577
2698
  targetModel,
2578
2699
  apiKey,
2700
+ anthropicAuth,
2579
2701
  relay,
2580
2702
  promptText,
2581
2703
  taskType,
@@ -2592,6 +2714,7 @@ async function startProxy(config = {}) {
2592
2714
  targetProvider,
2593
2715
  targetModel,
2594
2716
  apiKey,
2717
+ anthropicAuth,
2595
2718
  relay,
2596
2719
  promptText,
2597
2720
  taskType,
@@ -2621,12 +2744,13 @@ async function startProxy(config = {}) {
2621
2744
  });
2622
2745
  });
2623
2746
  }
2624
- async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2747
+ async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2625
2748
  let providerResponse;
2626
2749
  try {
2627
2750
  switch (targetProvider) {
2628
2751
  case "anthropic":
2629
- providerResponse = await forwardToAnthropicStream(request, targetModel, apiKey, betaHeaders);
2752
+ if (!anthropicAuth) throw new Error("No Anthropic auth");
2753
+ providerResponse = await forwardToAnthropicStream(request, targetModel, anthropicAuth, betaHeaders);
2630
2754
  break;
2631
2755
  case "google":
2632
2756
  providerResponse = await forwardToGeminiStream(request, targetModel, apiKey);
@@ -2680,6 +2804,11 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2680
2804
  const durationMs = Date.now() - startTime;
2681
2805
  const modelKey = `${targetProvider}/${targetModel}`;
2682
2806
  modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2807
+ if (lastStreamingUsage && (lastStreamingUsage.inputTokens > 0 || lastStreamingUsage.outputTokens > 0)) {
2808
+ trackTokens(targetModel, lastStreamingUsage.inputTokens, lastStreamingUsage.outputTokens);
2809
+ log(`Tokens: ${lastStreamingUsage.inputTokens} in, ${lastStreamingUsage.outputTokens} out`);
2810
+ lastStreamingUsage = null;
2811
+ }
2683
2812
  relay.run({
2684
2813
  prompt: promptText.slice(0, 500),
2685
2814
  taskType,
@@ -2704,13 +2833,14 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2704
2833
  });
2705
2834
  res.end();
2706
2835
  }
2707
- async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2836
+ async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2708
2837
  let providerResponse;
2709
2838
  let responseData;
2710
2839
  try {
2711
2840
  switch (targetProvider) {
2712
2841
  case "anthropic": {
2713
- providerResponse = await forwardToAnthropic(request, targetModel, apiKey, betaHeaders);
2842
+ if (!anthropicAuth) throw new Error("No Anthropic auth");
2843
+ providerResponse = await forwardToAnthropic(request, targetModel, anthropicAuth, betaHeaders);
2714
2844
  const rawData = await providerResponse.json();
2715
2845
  if (!providerResponse.ok) {
2716
2846
  res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
@@ -2770,6 +2900,11 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2770
2900
  const durationMs = Date.now() - startTime;
2771
2901
  const modelKey = `${targetProvider}/${targetModel}`;
2772
2902
  modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2903
+ const usage = responseData["usage"];
2904
+ if (usage?.prompt_tokens || usage?.completion_tokens) {
2905
+ trackTokens(targetModel, usage.prompt_tokens ?? 0, usage.completion_tokens ?? 0);
2906
+ log(`Tokens: ${usage.prompt_tokens ?? 0} in, ${usage.completion_tokens ?? 0} out`);
2907
+ }
2773
2908
  try {
2774
2909
  const runResult = await relay.run({
2775
2910
  prompt: promptText.slice(0, 500),