@relayplane/proxy 0.1.9 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +123 -71
- package/dist/cli.js +98 -7
- package/dist/cli.js.map +1 -1
- package/dist/cli.mjs +98 -7
- package/dist/cli.mjs.map +1 -1
- package/dist/index.js +98 -7
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +98 -7
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/cli.mjs
CHANGED
|
@@ -1687,6 +1687,63 @@ var VERSION = "0.1.9";
|
|
|
1687
1687
|
var recentRuns = [];
|
|
1688
1688
|
var MAX_RECENT_RUNS = 100;
|
|
1689
1689
|
var modelCounts = {};
|
|
1690
|
+
var tokenStats = {};
|
|
1691
|
+
var MODEL_PRICING2 = {
|
|
1692
|
+
// Anthropic
|
|
1693
|
+
"claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
|
|
1694
|
+
"claude-3-5-haiku-20241022": { input: 1, output: 5 },
|
|
1695
|
+
"claude-3-5-haiku-latest": { input: 1, output: 5 },
|
|
1696
|
+
"claude-3-5-sonnet-20241022": { input: 3, output: 15 },
|
|
1697
|
+
"claude-sonnet-4-20250514": { input: 3, output: 15 },
|
|
1698
|
+
"claude-3-opus-20240229": { input: 15, output: 75 },
|
|
1699
|
+
"claude-opus-4-5-20250514": { input: 15, output: 75 },
|
|
1700
|
+
// OpenAI
|
|
1701
|
+
"gpt-4o": { input: 2.5, output: 10 },
|
|
1702
|
+
"gpt-4o-mini": { input: 0.15, output: 0.6 },
|
|
1703
|
+
"gpt-4-turbo": { input: 10, output: 30 },
|
|
1704
|
+
// Defaults for unknown models
|
|
1705
|
+
"default-cheap": { input: 1, output: 5 },
|
|
1706
|
+
"default-expensive": { input: 15, output: 75 }
|
|
1707
|
+
};
|
|
1708
|
+
function trackTokens(model, inputTokens, outputTokens) {
|
|
1709
|
+
if (!tokenStats[model]) {
|
|
1710
|
+
tokenStats[model] = { inputTokens: 0, outputTokens: 0, requests: 0 };
|
|
1711
|
+
}
|
|
1712
|
+
tokenStats[model].inputTokens += inputTokens;
|
|
1713
|
+
tokenStats[model].outputTokens += outputTokens;
|
|
1714
|
+
tokenStats[model].requests += 1;
|
|
1715
|
+
}
|
|
1716
|
+
function calculateCosts() {
|
|
1717
|
+
let totalInputTokens = 0;
|
|
1718
|
+
let totalOutputTokens = 0;
|
|
1719
|
+
let actualCostUsd = 0;
|
|
1720
|
+
const byModel = {};
|
|
1721
|
+
for (const [model, stats] of Object.entries(tokenStats)) {
|
|
1722
|
+
totalInputTokens += stats.inputTokens;
|
|
1723
|
+
totalOutputTokens += stats.outputTokens;
|
|
1724
|
+
const pricing = MODEL_PRICING2[model] || MODEL_PRICING2["default-cheap"];
|
|
1725
|
+
const cost = stats.inputTokens / 1e6 * pricing.input + stats.outputTokens / 1e6 * pricing.output;
|
|
1726
|
+
actualCostUsd += cost;
|
|
1727
|
+
byModel[model] = {
|
|
1728
|
+
inputTokens: stats.inputTokens,
|
|
1729
|
+
outputTokens: stats.outputTokens,
|
|
1730
|
+
costUsd: parseFloat(cost.toFixed(4))
|
|
1731
|
+
};
|
|
1732
|
+
}
|
|
1733
|
+
const opusPricing = MODEL_PRICING2["claude-opus-4-5-20250514"];
|
|
1734
|
+
const opusCostUsd = totalInputTokens / 1e6 * opusPricing.input + totalOutputTokens / 1e6 * opusPricing.output;
|
|
1735
|
+
const savingsUsd = opusCostUsd - actualCostUsd;
|
|
1736
|
+
const savingsPercent = opusCostUsd > 0 ? (savingsUsd / opusCostUsd * 100).toFixed(1) + "%" : "0%";
|
|
1737
|
+
return {
|
|
1738
|
+
totalInputTokens,
|
|
1739
|
+
totalOutputTokens,
|
|
1740
|
+
actualCostUsd: parseFloat(actualCostUsd.toFixed(4)),
|
|
1741
|
+
opusCostUsd: parseFloat(opusCostUsd.toFixed(4)),
|
|
1742
|
+
savingsUsd: parseFloat(savingsUsd.toFixed(4)),
|
|
1743
|
+
savingsPercent,
|
|
1744
|
+
byModel
|
|
1745
|
+
};
|
|
1746
|
+
}
|
|
1690
1747
|
var serverStartTime = 0;
|
|
1691
1748
|
var currentConfig = loadConfig();
|
|
1692
1749
|
var DEFAULT_ENDPOINTS = {
|
|
@@ -2314,6 +2371,7 @@ function convertAnthropicStreamEvent(eventType, eventData, messageId, model, too
|
|
|
2314
2371
|
return null;
|
|
2315
2372
|
}
|
|
2316
2373
|
}
|
|
2374
|
+
var lastStreamingUsage = null;
|
|
2317
2375
|
async function* convertAnthropicStream(response, model) {
|
|
2318
2376
|
const reader = response.body?.getReader();
|
|
2319
2377
|
if (!reader) {
|
|
@@ -2326,6 +2384,8 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2326
2384
|
currentToolIndex: 0,
|
|
2327
2385
|
tools: /* @__PURE__ */ new Map()
|
|
2328
2386
|
};
|
|
2387
|
+
let streamInputTokens = 0;
|
|
2388
|
+
let streamOutputTokens = 0;
|
|
2329
2389
|
try {
|
|
2330
2390
|
while (true) {
|
|
2331
2391
|
const { done, value } = await reader.read();
|
|
@@ -2343,6 +2403,17 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2343
2403
|
} else if (line === "" && eventType && eventData) {
|
|
2344
2404
|
try {
|
|
2345
2405
|
const parsed = JSON.parse(eventData);
|
|
2406
|
+
if (eventType === "message_start") {
|
|
2407
|
+
const msg = parsed["message"];
|
|
2408
|
+
if (msg?.usage?.input_tokens) {
|
|
2409
|
+
streamInputTokens = msg.usage.input_tokens;
|
|
2410
|
+
}
|
|
2411
|
+
} else if (eventType === "message_delta") {
|
|
2412
|
+
const usage = parsed["usage"];
|
|
2413
|
+
if (usage?.output_tokens) {
|
|
2414
|
+
streamOutputTokens = usage.output_tokens;
|
|
2415
|
+
}
|
|
2416
|
+
}
|
|
2346
2417
|
const converted = convertAnthropicStreamEvent(eventType, parsed, messageId, model, toolState);
|
|
2347
2418
|
if (converted) {
|
|
2348
2419
|
yield converted;
|
|
@@ -2354,6 +2425,7 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2354
2425
|
}
|
|
2355
2426
|
}
|
|
2356
2427
|
}
|
|
2428
|
+
lastStreamingUsage = { inputTokens: streamInputTokens, outputTokens: streamOutputTokens };
|
|
2357
2429
|
} finally {
|
|
2358
2430
|
reader.releaseLock();
|
|
2359
2431
|
}
|
|
@@ -2451,23 +2523,32 @@ async function startProxy(config = {}) {
|
|
|
2451
2523
|
}
|
|
2452
2524
|
if (req.method === "GET" && pathname === "/stats") {
|
|
2453
2525
|
const stats = relay.stats();
|
|
2454
|
-
const
|
|
2526
|
+
const costs = calculateCosts();
|
|
2455
2527
|
const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
|
|
2456
2528
|
const modelDistribution = {};
|
|
2457
2529
|
for (const [model, count] of Object.entries(modelCounts)) {
|
|
2530
|
+
const modelName = model.split("/")[1] || model;
|
|
2531
|
+
const tokenData = costs.byModel[modelName];
|
|
2458
2532
|
modelDistribution[model] = {
|
|
2459
2533
|
count,
|
|
2460
|
-
percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
|
|
2534
|
+
percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%",
|
|
2535
|
+
tokens: tokenData ? { input: tokenData.inputTokens, output: tokenData.outputTokens } : void 0,
|
|
2536
|
+
costUsd: tokenData?.costUsd
|
|
2461
2537
|
};
|
|
2462
2538
|
}
|
|
2463
2539
|
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2464
2540
|
res.end(JSON.stringify({
|
|
2465
2541
|
totalRuns,
|
|
2466
|
-
|
|
2467
|
-
|
|
2468
|
-
|
|
2469
|
-
|
|
2470
|
-
|
|
2542
|
+
tokens: {
|
|
2543
|
+
input: costs.totalInputTokens,
|
|
2544
|
+
output: costs.totalOutputTokens,
|
|
2545
|
+
total: costs.totalInputTokens + costs.totalOutputTokens
|
|
2546
|
+
},
|
|
2547
|
+
costs: {
|
|
2548
|
+
actualUsd: costs.actualCostUsd,
|
|
2549
|
+
opusBaselineUsd: costs.opusCostUsd,
|
|
2550
|
+
savingsUsd: costs.savingsUsd,
|
|
2551
|
+
savingsPercent: costs.savingsPercent
|
|
2471
2552
|
},
|
|
2472
2553
|
modelDistribution,
|
|
2473
2554
|
byTaskType: stats.byTaskType,
|
|
@@ -2723,6 +2804,11 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2723
2804
|
const durationMs = Date.now() - startTime;
|
|
2724
2805
|
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2725
2806
|
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2807
|
+
if (lastStreamingUsage && (lastStreamingUsage.inputTokens > 0 || lastStreamingUsage.outputTokens > 0)) {
|
|
2808
|
+
trackTokens(targetModel, lastStreamingUsage.inputTokens, lastStreamingUsage.outputTokens);
|
|
2809
|
+
log(`Tokens: ${lastStreamingUsage.inputTokens} in, ${lastStreamingUsage.outputTokens} out`);
|
|
2810
|
+
lastStreamingUsage = null;
|
|
2811
|
+
}
|
|
2726
2812
|
relay.run({
|
|
2727
2813
|
prompt: promptText.slice(0, 500),
|
|
2728
2814
|
taskType,
|
|
@@ -2814,6 +2900,11 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
2814
2900
|
const durationMs = Date.now() - startTime;
|
|
2815
2901
|
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2816
2902
|
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2903
|
+
const usage = responseData["usage"];
|
|
2904
|
+
if (usage?.prompt_tokens || usage?.completion_tokens) {
|
|
2905
|
+
trackTokens(targetModel, usage.prompt_tokens ?? 0, usage.completion_tokens ?? 0);
|
|
2906
|
+
log(`Tokens: ${usage.prompt_tokens ?? 0} in, ${usage.completion_tokens ?? 0} out`);
|
|
2907
|
+
}
|
|
2817
2908
|
try {
|
|
2818
2909
|
const runResult = await relay.run({
|
|
2819
2910
|
prompt: promptText.slice(0, 500),
|