@relayplane/proxy 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1580,12 +1580,19 @@ var StrategySchema = z.object({
1580
1580
  minConfidence: z.number().min(0).max(1).optional(),
1581
1581
  fallback: z.string().optional()
1582
1582
  });
1583
+ var AuthSchema = z.object({
1584
+ anthropicApiKey: z.string().optional(),
1585
+ anthropicMaxToken: z.string().optional(),
1586
+ useMaxForModels: z.array(z.string()).optional()
1587
+ // Default: ['opus']
1588
+ }).optional();
1583
1589
  var ConfigSchema = z.object({
1584
1590
  strategies: z.record(z.string(), StrategySchema).optional(),
1585
1591
  defaults: z.object({
1586
1592
  qualityModel: z.string().optional(),
1587
1593
  costModel: z.string().optional()
1588
- }).optional()
1594
+ }).optional(),
1595
+ auth: AuthSchema
1589
1596
  });
1590
1597
  var DEFAULT_CONFIG = {
1591
1598
  strategies: {
@@ -1645,6 +1652,19 @@ function loadConfig() {
1645
1652
  function getStrategy(config, taskType) {
1646
1653
  return config.strategies?.[taskType] ?? null;
1647
1654
  }
1655
+ function getAnthropicAuth(config, model) {
1656
+ const auth = config.auth;
1657
+ const useMaxForModels = auth?.useMaxForModels ?? ["opus"];
1658
+ const shouldUseMax = useMaxForModels.some((m) => model.toLowerCase().includes(m.toLowerCase()));
1659
+ if (shouldUseMax && auth?.anthropicMaxToken) {
1660
+ return { type: "max", value: auth.anthropicMaxToken };
1661
+ }
1662
+ const apiKey = auth?.anthropicApiKey ?? process.env["ANTHROPIC_API_KEY"];
1663
+ if (apiKey) {
1664
+ return { type: "apiKey", value: apiKey };
1665
+ }
1666
+ return null;
1667
+ }
1648
1668
  function watchConfig(onChange) {
1649
1669
  const configPath = getConfigPath();
1650
1670
  const dir = path2.dirname(configPath);
@@ -1665,10 +1685,67 @@ function watchConfig(onChange) {
1665
1685
  }
1666
1686
 
1667
1687
  // src/proxy.ts
1668
- var VERSION = "0.1.8";
1688
+ var VERSION = "0.1.9";
1669
1689
  var recentRuns = [];
1670
1690
  var MAX_RECENT_RUNS = 100;
1671
1691
  var modelCounts = {};
1692
+ var tokenStats = {};
1693
+ var MODEL_PRICING2 = {
1694
+ // Anthropic
1695
+ "claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
1696
+ "claude-3-5-haiku-20241022": { input: 1, output: 5 },
1697
+ "claude-3-5-haiku-latest": { input: 1, output: 5 },
1698
+ "claude-3-5-sonnet-20241022": { input: 3, output: 15 },
1699
+ "claude-sonnet-4-20250514": { input: 3, output: 15 },
1700
+ "claude-3-opus-20240229": { input: 15, output: 75 },
1701
+ "claude-opus-4-5-20250514": { input: 15, output: 75 },
1702
+ // OpenAI
1703
+ "gpt-4o": { input: 2.5, output: 10 },
1704
+ "gpt-4o-mini": { input: 0.15, output: 0.6 },
1705
+ "gpt-4-turbo": { input: 10, output: 30 },
1706
+ // Defaults for unknown models
1707
+ "default-cheap": { input: 1, output: 5 },
1708
+ "default-expensive": { input: 15, output: 75 }
1709
+ };
1710
+ function trackTokens(model, inputTokens, outputTokens) {
1711
+ if (!tokenStats[model]) {
1712
+ tokenStats[model] = { inputTokens: 0, outputTokens: 0, requests: 0 };
1713
+ }
1714
+ tokenStats[model].inputTokens += inputTokens;
1715
+ tokenStats[model].outputTokens += outputTokens;
1716
+ tokenStats[model].requests += 1;
1717
+ }
1718
+ function calculateCosts() {
1719
+ let totalInputTokens = 0;
1720
+ let totalOutputTokens = 0;
1721
+ let actualCostUsd = 0;
1722
+ const byModel = {};
1723
+ for (const [model, stats] of Object.entries(tokenStats)) {
1724
+ totalInputTokens += stats.inputTokens;
1725
+ totalOutputTokens += stats.outputTokens;
1726
+ const pricing = MODEL_PRICING2[model] || MODEL_PRICING2["default-cheap"];
1727
+ const cost = stats.inputTokens / 1e6 * pricing.input + stats.outputTokens / 1e6 * pricing.output;
1728
+ actualCostUsd += cost;
1729
+ byModel[model] = {
1730
+ inputTokens: stats.inputTokens,
1731
+ outputTokens: stats.outputTokens,
1732
+ costUsd: parseFloat(cost.toFixed(4))
1733
+ };
1734
+ }
1735
+ const opusPricing = MODEL_PRICING2["claude-opus-4-5-20250514"];
1736
+ const opusCostUsd = totalInputTokens / 1e6 * opusPricing.input + totalOutputTokens / 1e6 * opusPricing.output;
1737
+ const savingsUsd = opusCostUsd - actualCostUsd;
1738
+ const savingsPercent = opusCostUsd > 0 ? (savingsUsd / opusCostUsd * 100).toFixed(1) + "%" : "0%";
1739
+ return {
1740
+ totalInputTokens,
1741
+ totalOutputTokens,
1742
+ actualCostUsd: parseFloat(actualCostUsd.toFixed(4)),
1743
+ opusCostUsd: parseFloat(opusCostUsd.toFixed(4)),
1744
+ savingsUsd: parseFloat(savingsUsd.toFixed(4)),
1745
+ savingsPercent,
1746
+ byModel
1747
+ };
1748
+ }
1672
1749
  var serverStartTime = 0;
1673
1750
  var currentConfig = loadConfig();
1674
1751
  var DEFAULT_ENDPOINTS = {
@@ -1733,13 +1810,17 @@ function extractPromptText(messages) {
1733
1810
  return "";
1734
1811
  }).join("\n");
1735
1812
  }
1736
- async function forwardToAnthropic(request, targetModel, apiKey, betaHeaders) {
1813
+ async function forwardToAnthropic(request, targetModel, auth, betaHeaders) {
1737
1814
  const anthropicBody = buildAnthropicBody(request, targetModel, false);
1738
1815
  const headers = {
1739
1816
  "Content-Type": "application/json",
1740
- "x-api-key": apiKey,
1741
1817
  "anthropic-version": "2023-06-01"
1742
1818
  };
1819
+ if (auth.type === "max") {
1820
+ headers["Authorization"] = `Bearer ${auth.value}`;
1821
+ } else {
1822
+ headers["x-api-key"] = auth.value;
1823
+ }
1743
1824
  if (betaHeaders) {
1744
1825
  headers["anthropic-beta"] = betaHeaders;
1745
1826
  }
@@ -1750,13 +1831,17 @@ async function forwardToAnthropic(request, targetModel, apiKey, betaHeaders) {
1750
1831
  });
1751
1832
  return response;
1752
1833
  }
1753
- async function forwardToAnthropicStream(request, targetModel, apiKey, betaHeaders) {
1834
+ async function forwardToAnthropicStream(request, targetModel, auth, betaHeaders) {
1754
1835
  const anthropicBody = buildAnthropicBody(request, targetModel, true);
1755
1836
  const headers = {
1756
1837
  "Content-Type": "application/json",
1757
- "x-api-key": apiKey,
1758
1838
  "anthropic-version": "2023-06-01"
1759
1839
  };
1840
+ if (auth.type === "max") {
1841
+ headers["Authorization"] = `Bearer ${auth.value}`;
1842
+ } else {
1843
+ headers["x-api-key"] = auth.value;
1844
+ }
1760
1845
  if (betaHeaders) {
1761
1846
  headers["anthropic-beta"] = betaHeaders;
1762
1847
  }
@@ -2288,6 +2373,7 @@ function convertAnthropicStreamEvent(eventType, eventData, messageId, model, too
2288
2373
  return null;
2289
2374
  }
2290
2375
  }
2376
+ var lastStreamingUsage = null;
2291
2377
  async function* convertAnthropicStream(response, model) {
2292
2378
  const reader = response.body?.getReader();
2293
2379
  if (!reader) {
@@ -2300,6 +2386,8 @@ async function* convertAnthropicStream(response, model) {
2300
2386
  currentToolIndex: 0,
2301
2387
  tools: /* @__PURE__ */ new Map()
2302
2388
  };
2389
+ let streamInputTokens = 0;
2390
+ let streamOutputTokens = 0;
2303
2391
  try {
2304
2392
  while (true) {
2305
2393
  const { done, value } = await reader.read();
@@ -2317,6 +2405,17 @@ async function* convertAnthropicStream(response, model) {
2317
2405
  } else if (line === "" && eventType && eventData) {
2318
2406
  try {
2319
2407
  const parsed = JSON.parse(eventData);
2408
+ if (eventType === "message_start") {
2409
+ const msg = parsed["message"];
2410
+ if (msg?.usage?.input_tokens) {
2411
+ streamInputTokens = msg.usage.input_tokens;
2412
+ }
2413
+ } else if (eventType === "message_delta") {
2414
+ const usage = parsed["usage"];
2415
+ if (usage?.output_tokens) {
2416
+ streamOutputTokens = usage.output_tokens;
2417
+ }
2418
+ }
2320
2419
  const converted = convertAnthropicStreamEvent(eventType, parsed, messageId, model, toolState);
2321
2420
  if (converted) {
2322
2421
  yield converted;
@@ -2328,6 +2427,7 @@ async function* convertAnthropicStream(response, model) {
2328
2427
  }
2329
2428
  }
2330
2429
  }
2430
+ lastStreamingUsage = { inputTokens: streamInputTokens, outputTokens: streamOutputTokens };
2331
2431
  } finally {
2332
2432
  reader.releaseLock();
2333
2433
  }
@@ -2425,23 +2525,32 @@ async function startProxy(config = {}) {
2425
2525
  }
2426
2526
  if (req.method === "GET" && pathname === "/stats") {
2427
2527
  const stats = relay.stats();
2428
- const savings = relay.savingsReport(30);
2528
+ const costs = calculateCosts();
2429
2529
  const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
2430
2530
  const modelDistribution = {};
2431
2531
  for (const [model, count] of Object.entries(modelCounts)) {
2532
+ const modelName = model.split("/")[1] || model;
2533
+ const tokenData = costs.byModel[modelName];
2432
2534
  modelDistribution[model] = {
2433
2535
  count,
2434
- percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
2536
+ percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%",
2537
+ tokens: tokenData ? { input: tokenData.inputTokens, output: tokenData.outputTokens } : void 0,
2538
+ costUsd: tokenData?.costUsd
2435
2539
  };
2436
2540
  }
2437
2541
  res.writeHead(200, { "Content-Type": "application/json" });
2438
2542
  res.end(JSON.stringify({
2439
2543
  totalRuns,
2440
- savings: {
2441
- estimatedSavingsPercent: savings.savingsPercent.toFixed(1) + "%",
2442
- actualCostUsd: savings.actualCost.toFixed(4),
2443
- baselineCostUsd: savings.baselineCost.toFixed(4),
2444
- savedUsd: savings.savings.toFixed(4)
2544
+ tokens: {
2545
+ input: costs.totalInputTokens,
2546
+ output: costs.totalOutputTokens,
2547
+ total: costs.totalInputTokens + costs.totalOutputTokens
2548
+ },
2549
+ costs: {
2550
+ actualUsd: costs.actualCostUsd,
2551
+ opusBaselineUsd: costs.opusCostUsd,
2552
+ savingsUsd: costs.savingsUsd,
2553
+ savingsPercent: costs.savingsPercent
2445
2554
  },
2446
2555
  modelDistribution,
2447
2556
  byTaskType: stats.byTaskType,
@@ -2562,12 +2671,24 @@ async function startProxy(config = {}) {
2562
2671
  }
2563
2672
  }
2564
2673
  log(`Routing to: ${targetProvider}/${targetModel}`);
2565
- const apiKeyEnv = DEFAULT_ENDPOINTS[targetProvider]?.apiKeyEnv ?? `${targetProvider.toUpperCase()}_API_KEY`;
2566
- const apiKey = process.env[apiKeyEnv];
2567
- if (!apiKey) {
2568
- res.writeHead(500, { "Content-Type": "application/json" });
2569
- res.end(JSON.stringify({ error: `Missing ${apiKeyEnv} environment variable` }));
2570
- return;
2674
+ let apiKey;
2675
+ let anthropicAuth = null;
2676
+ if (targetProvider === "anthropic") {
2677
+ anthropicAuth = getAnthropicAuth(currentConfig, targetModel);
2678
+ if (!anthropicAuth) {
2679
+ res.writeHead(500, { "Content-Type": "application/json" });
2680
+ res.end(JSON.stringify({ error: "No Anthropic auth configured (set ANTHROPIC_API_KEY or config.auth.anthropicMaxToken)" }));
2681
+ return;
2682
+ }
2683
+ log(`Using ${anthropicAuth.type === "max" ? "MAX token" : "API key"} auth for ${targetModel}`);
2684
+ } else {
2685
+ const apiKeyEnv = DEFAULT_ENDPOINTS[targetProvider]?.apiKeyEnv ?? `${targetProvider.toUpperCase()}_API_KEY`;
2686
+ apiKey = process.env[apiKeyEnv];
2687
+ if (!apiKey) {
2688
+ res.writeHead(500, { "Content-Type": "application/json" });
2689
+ res.end(JSON.stringify({ error: `Missing ${apiKeyEnv} environment variable` }));
2690
+ return;
2691
+ }
2571
2692
  }
2572
2693
  const startTime = Date.now();
2573
2694
  const betaHeaders = req.headers["anthropic-beta"];
@@ -2578,6 +2699,7 @@ async function startProxy(config = {}) {
2578
2699
  targetProvider,
2579
2700
  targetModel,
2580
2701
  apiKey,
2702
+ anthropicAuth,
2581
2703
  relay,
2582
2704
  promptText,
2583
2705
  taskType,
@@ -2594,6 +2716,7 @@ async function startProxy(config = {}) {
2594
2716
  targetProvider,
2595
2717
  targetModel,
2596
2718
  apiKey,
2719
+ anthropicAuth,
2597
2720
  relay,
2598
2721
  promptText,
2599
2722
  taskType,
@@ -2623,12 +2746,13 @@ async function startProxy(config = {}) {
2623
2746
  });
2624
2747
  });
2625
2748
  }
2626
- async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2749
+ async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2627
2750
  let providerResponse;
2628
2751
  try {
2629
2752
  switch (targetProvider) {
2630
2753
  case "anthropic":
2631
- providerResponse = await forwardToAnthropicStream(request, targetModel, apiKey, betaHeaders);
2754
+ if (!anthropicAuth) throw new Error("No Anthropic auth");
2755
+ providerResponse = await forwardToAnthropicStream(request, targetModel, anthropicAuth, betaHeaders);
2632
2756
  break;
2633
2757
  case "google":
2634
2758
  providerResponse = await forwardToGeminiStream(request, targetModel, apiKey);
@@ -2682,6 +2806,11 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2682
2806
  const durationMs = Date.now() - startTime;
2683
2807
  const modelKey = `${targetProvider}/${targetModel}`;
2684
2808
  modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2809
+ if (lastStreamingUsage && (lastStreamingUsage.inputTokens > 0 || lastStreamingUsage.outputTokens > 0)) {
2810
+ trackTokens(targetModel, lastStreamingUsage.inputTokens, lastStreamingUsage.outputTokens);
2811
+ log(`Tokens: ${lastStreamingUsage.inputTokens} in, ${lastStreamingUsage.outputTokens} out`);
2812
+ lastStreamingUsage = null;
2813
+ }
2685
2814
  relay.run({
2686
2815
  prompt: promptText.slice(0, 500),
2687
2816
  taskType,
@@ -2706,13 +2835,14 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2706
2835
  });
2707
2836
  res.end();
2708
2837
  }
2709
- async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2838
+ async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2710
2839
  let providerResponse;
2711
2840
  let responseData;
2712
2841
  try {
2713
2842
  switch (targetProvider) {
2714
2843
  case "anthropic": {
2715
- providerResponse = await forwardToAnthropic(request, targetModel, apiKey, betaHeaders);
2844
+ if (!anthropicAuth) throw new Error("No Anthropic auth");
2845
+ providerResponse = await forwardToAnthropic(request, targetModel, anthropicAuth, betaHeaders);
2716
2846
  const rawData = await providerResponse.json();
2717
2847
  if (!providerResponse.ok) {
2718
2848
  res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
@@ -2772,6 +2902,11 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2772
2902
  const durationMs = Date.now() - startTime;
2773
2903
  const modelKey = `${targetProvider}/${targetModel}`;
2774
2904
  modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2905
+ const usage = responseData["usage"];
2906
+ if (usage?.prompt_tokens || usage?.completion_tokens) {
2907
+ trackTokens(targetModel, usage.prompt_tokens ?? 0, usage.completion_tokens ?? 0);
2908
+ log(`Tokens: ${usage.prompt_tokens ?? 0} in, ${usage.completion_tokens ?? 0} out`);
2909
+ }
2775
2910
  try {
2776
2911
  const runResult = await relay.run({
2777
2912
  prompt: promptText.slice(0, 500),