@relayplane/proxy 0.1.9 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1747,6 +1747,63 @@ var VERSION = "0.1.9";
1747
1747
  var recentRuns = [];
1748
1748
  var MAX_RECENT_RUNS = 100;
1749
1749
  var modelCounts = {};
1750
+ var tokenStats = {};
1751
+ var MODEL_PRICING2 = {
1752
+ // Anthropic
1753
+ "claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
1754
+ "claude-3-5-haiku-20241022": { input: 1, output: 5 },
1755
+ "claude-3-5-haiku-latest": { input: 1, output: 5 },
1756
+ "claude-3-5-sonnet-20241022": { input: 3, output: 15 },
1757
+ "claude-sonnet-4-20250514": { input: 3, output: 15 },
1758
+ "claude-3-opus-20240229": { input: 15, output: 75 },
1759
+ "claude-opus-4-5-20250514": { input: 15, output: 75 },
1760
+ // OpenAI
1761
+ "gpt-4o": { input: 2.5, output: 10 },
1762
+ "gpt-4o-mini": { input: 0.15, output: 0.6 },
1763
+ "gpt-4-turbo": { input: 10, output: 30 },
1764
+ // Defaults for unknown models
1765
+ "default-cheap": { input: 1, output: 5 },
1766
+ "default-expensive": { input: 15, output: 75 }
1767
+ };
1768
+ function trackTokens(model, inputTokens, outputTokens) {
1769
+ if (!tokenStats[model]) {
1770
+ tokenStats[model] = { inputTokens: 0, outputTokens: 0, requests: 0 };
1771
+ }
1772
+ tokenStats[model].inputTokens += inputTokens;
1773
+ tokenStats[model].outputTokens += outputTokens;
1774
+ tokenStats[model].requests += 1;
1775
+ }
1776
+ function calculateCosts() {
1777
+ let totalInputTokens = 0;
1778
+ let totalOutputTokens = 0;
1779
+ let actualCostUsd = 0;
1780
+ const byModel = {};
1781
+ for (const [model, stats] of Object.entries(tokenStats)) {
1782
+ totalInputTokens += stats.inputTokens;
1783
+ totalOutputTokens += stats.outputTokens;
1784
+ const pricing = MODEL_PRICING2[model] || MODEL_PRICING2["default-cheap"];
1785
+ const cost = stats.inputTokens / 1e6 * pricing.input + stats.outputTokens / 1e6 * pricing.output;
1786
+ actualCostUsd += cost;
1787
+ byModel[model] = {
1788
+ inputTokens: stats.inputTokens,
1789
+ outputTokens: stats.outputTokens,
1790
+ costUsd: parseFloat(cost.toFixed(4))
1791
+ };
1792
+ }
1793
+ const opusPricing = MODEL_PRICING2["claude-opus-4-5-20250514"];
1794
+ const opusCostUsd = totalInputTokens / 1e6 * opusPricing.input + totalOutputTokens / 1e6 * opusPricing.output;
1795
+ const savingsUsd = opusCostUsd - actualCostUsd;
1796
+ const savingsPercent = opusCostUsd > 0 ? (savingsUsd / opusCostUsd * 100).toFixed(1) + "%" : "0%";
1797
+ return {
1798
+ totalInputTokens,
1799
+ totalOutputTokens,
1800
+ actualCostUsd: parseFloat(actualCostUsd.toFixed(4)),
1801
+ opusCostUsd: parseFloat(opusCostUsd.toFixed(4)),
1802
+ savingsUsd: parseFloat(savingsUsd.toFixed(4)),
1803
+ savingsPercent,
1804
+ byModel
1805
+ };
1806
+ }
1750
1807
  var serverStartTime = 0;
1751
1808
  var currentConfig = loadConfig();
1752
1809
  var DEFAULT_ENDPOINTS = {
@@ -2374,6 +2431,7 @@ function convertAnthropicStreamEvent(eventType, eventData, messageId, model, too
2374
2431
  return null;
2375
2432
  }
2376
2433
  }
2434
+ var lastStreamingUsage = null;
2377
2435
  async function* convertAnthropicStream(response, model) {
2378
2436
  const reader = response.body?.getReader();
2379
2437
  if (!reader) {
@@ -2386,6 +2444,8 @@ async function* convertAnthropicStream(response, model) {
2386
2444
  currentToolIndex: 0,
2387
2445
  tools: /* @__PURE__ */ new Map()
2388
2446
  };
2447
+ let streamInputTokens = 0;
2448
+ let streamOutputTokens = 0;
2389
2449
  try {
2390
2450
  while (true) {
2391
2451
  const { done, value } = await reader.read();
@@ -2403,6 +2463,17 @@ async function* convertAnthropicStream(response, model) {
2403
2463
  } else if (line === "" && eventType && eventData) {
2404
2464
  try {
2405
2465
  const parsed = JSON.parse(eventData);
2466
+ if (eventType === "message_start") {
2467
+ const msg = parsed["message"];
2468
+ if (msg?.usage?.input_tokens) {
2469
+ streamInputTokens = msg.usage.input_tokens;
2470
+ }
2471
+ } else if (eventType === "message_delta") {
2472
+ const usage = parsed["usage"];
2473
+ if (usage?.output_tokens) {
2474
+ streamOutputTokens = usage.output_tokens;
2475
+ }
2476
+ }
2406
2477
  const converted = convertAnthropicStreamEvent(eventType, parsed, messageId, model, toolState);
2407
2478
  if (converted) {
2408
2479
  yield converted;
@@ -2414,6 +2485,7 @@ async function* convertAnthropicStream(response, model) {
2414
2485
  }
2415
2486
  }
2416
2487
  }
2488
+ lastStreamingUsage = { inputTokens: streamInputTokens, outputTokens: streamOutputTokens };
2417
2489
  } finally {
2418
2490
  reader.releaseLock();
2419
2491
  }
@@ -2511,23 +2583,32 @@ async function startProxy(config = {}) {
2511
2583
  }
2512
2584
  if (req.method === "GET" && pathname === "/stats") {
2513
2585
  const stats = relay.stats();
2514
- const savings = relay.savingsReport(30);
2586
+ const costs = calculateCosts();
2515
2587
  const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
2516
2588
  const modelDistribution = {};
2517
2589
  for (const [model, count] of Object.entries(modelCounts)) {
2590
+ const modelName = model.split("/")[1] || model;
2591
+ const tokenData = costs.byModel[modelName];
2518
2592
  modelDistribution[model] = {
2519
2593
  count,
2520
- percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
2594
+ percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%",
2595
+ tokens: tokenData ? { input: tokenData.inputTokens, output: tokenData.outputTokens } : void 0,
2596
+ costUsd: tokenData?.costUsd
2521
2597
  };
2522
2598
  }
2523
2599
  res.writeHead(200, { "Content-Type": "application/json" });
2524
2600
  res.end(JSON.stringify({
2525
2601
  totalRuns,
2526
- savings: {
2527
- estimatedSavingsPercent: savings.savingsPercent.toFixed(1) + "%",
2528
- actualCostUsd: savings.actualCost.toFixed(4),
2529
- baselineCostUsd: savings.baselineCost.toFixed(4),
2530
- savedUsd: savings.savings.toFixed(4)
2602
+ tokens: {
2603
+ input: costs.totalInputTokens,
2604
+ output: costs.totalOutputTokens,
2605
+ total: costs.totalInputTokens + costs.totalOutputTokens
2606
+ },
2607
+ costs: {
2608
+ actualUsd: costs.actualCostUsd,
2609
+ opusBaselineUsd: costs.opusCostUsd,
2610
+ savingsUsd: costs.savingsUsd,
2611
+ savingsPercent: costs.savingsPercent
2531
2612
  },
2532
2613
  modelDistribution,
2533
2614
  byTaskType: stats.byTaskType,
@@ -2783,6 +2864,11 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2783
2864
  const durationMs = Date.now() - startTime;
2784
2865
  const modelKey = `${targetProvider}/${targetModel}`;
2785
2866
  modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2867
+ if (lastStreamingUsage && (lastStreamingUsage.inputTokens > 0 || lastStreamingUsage.outputTokens > 0)) {
2868
+ trackTokens(targetModel, lastStreamingUsage.inputTokens, lastStreamingUsage.outputTokens);
2869
+ log(`Tokens: ${lastStreamingUsage.inputTokens} in, ${lastStreamingUsage.outputTokens} out`);
2870
+ lastStreamingUsage = null;
2871
+ }
2786
2872
  relay.run({
2787
2873
  prompt: promptText.slice(0, 500),
2788
2874
  taskType,
@@ -2874,6 +2960,11 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2874
2960
  const durationMs = Date.now() - startTime;
2875
2961
  const modelKey = `${targetProvider}/${targetModel}`;
2876
2962
  modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2963
+ const usage = responseData["usage"];
2964
+ if (usage?.prompt_tokens || usage?.completion_tokens) {
2965
+ trackTokens(targetModel, usage.prompt_tokens ?? 0, usage.completion_tokens ?? 0);
2966
+ log(`Tokens: ${usage.prompt_tokens ?? 0} in, ${usage.completion_tokens ?? 0} out`);
2967
+ }
2877
2968
  try {
2878
2969
  const runResult = await relay.run({
2879
2970
  prompt: promptText.slice(0, 500),