@relayplane/proxy 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +144 -43
- package/dist/cli.js +158 -23
- package/dist/cli.js.map +1 -1
- package/dist/cli.mjs +158 -23
- package/dist/cli.mjs.map +1 -1
- package/dist/index.d.mts +23 -0
- package/dist/index.d.ts +23 -0
- package/dist/index.js +158 -23
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +158 -23
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/cli.mjs
CHANGED
|
@@ -1578,12 +1578,19 @@ var StrategySchema = z.object({
|
|
|
1578
1578
|
minConfidence: z.number().min(0).max(1).optional(),
|
|
1579
1579
|
fallback: z.string().optional()
|
|
1580
1580
|
});
|
|
1581
|
+
var AuthSchema = z.object({
|
|
1582
|
+
anthropicApiKey: z.string().optional(),
|
|
1583
|
+
anthropicMaxToken: z.string().optional(),
|
|
1584
|
+
useMaxForModels: z.array(z.string()).optional()
|
|
1585
|
+
// Default: ['opus']
|
|
1586
|
+
}).optional();
|
|
1581
1587
|
var ConfigSchema = z.object({
|
|
1582
1588
|
strategies: z.record(z.string(), StrategySchema).optional(),
|
|
1583
1589
|
defaults: z.object({
|
|
1584
1590
|
qualityModel: z.string().optional(),
|
|
1585
1591
|
costModel: z.string().optional()
|
|
1586
|
-
}).optional()
|
|
1592
|
+
}).optional(),
|
|
1593
|
+
auth: AuthSchema
|
|
1587
1594
|
});
|
|
1588
1595
|
var DEFAULT_CONFIG = {
|
|
1589
1596
|
strategies: {
|
|
@@ -1643,6 +1650,19 @@ function loadConfig() {
|
|
|
1643
1650
|
function getStrategy(config, taskType) {
|
|
1644
1651
|
return config.strategies?.[taskType] ?? null;
|
|
1645
1652
|
}
|
|
1653
|
+
function getAnthropicAuth(config, model) {
|
|
1654
|
+
const auth = config.auth;
|
|
1655
|
+
const useMaxForModels = auth?.useMaxForModels ?? ["opus"];
|
|
1656
|
+
const shouldUseMax = useMaxForModels.some((m) => model.toLowerCase().includes(m.toLowerCase()));
|
|
1657
|
+
if (shouldUseMax && auth?.anthropicMaxToken) {
|
|
1658
|
+
return { type: "max", value: auth.anthropicMaxToken };
|
|
1659
|
+
}
|
|
1660
|
+
const apiKey = auth?.anthropicApiKey ?? process.env["ANTHROPIC_API_KEY"];
|
|
1661
|
+
if (apiKey) {
|
|
1662
|
+
return { type: "apiKey", value: apiKey };
|
|
1663
|
+
}
|
|
1664
|
+
return null;
|
|
1665
|
+
}
|
|
1646
1666
|
function watchConfig(onChange) {
|
|
1647
1667
|
const configPath = getConfigPath();
|
|
1648
1668
|
const dir = path2.dirname(configPath);
|
|
@@ -1663,10 +1683,67 @@ function watchConfig(onChange) {
|
|
|
1663
1683
|
}
|
|
1664
1684
|
|
|
1665
1685
|
// src/proxy.ts
|
|
1666
|
-
var VERSION = "0.1.
|
|
1686
|
+
var VERSION = "0.1.9";
|
|
1667
1687
|
var recentRuns = [];
|
|
1668
1688
|
var MAX_RECENT_RUNS = 100;
|
|
1669
1689
|
var modelCounts = {};
|
|
1690
|
+
var tokenStats = {};
|
|
1691
|
+
var MODEL_PRICING2 = {
|
|
1692
|
+
// Anthropic
|
|
1693
|
+
"claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
|
|
1694
|
+
"claude-3-5-haiku-20241022": { input: 1, output: 5 },
|
|
1695
|
+
"claude-3-5-haiku-latest": { input: 1, output: 5 },
|
|
1696
|
+
"claude-3-5-sonnet-20241022": { input: 3, output: 15 },
|
|
1697
|
+
"claude-sonnet-4-20250514": { input: 3, output: 15 },
|
|
1698
|
+
"claude-3-opus-20240229": { input: 15, output: 75 },
|
|
1699
|
+
"claude-opus-4-5-20250514": { input: 15, output: 75 },
|
|
1700
|
+
// OpenAI
|
|
1701
|
+
"gpt-4o": { input: 2.5, output: 10 },
|
|
1702
|
+
"gpt-4o-mini": { input: 0.15, output: 0.6 },
|
|
1703
|
+
"gpt-4-turbo": { input: 10, output: 30 },
|
|
1704
|
+
// Defaults for unknown models
|
|
1705
|
+
"default-cheap": { input: 1, output: 5 },
|
|
1706
|
+
"default-expensive": { input: 15, output: 75 }
|
|
1707
|
+
};
|
|
1708
|
+
function trackTokens(model, inputTokens, outputTokens) {
|
|
1709
|
+
if (!tokenStats[model]) {
|
|
1710
|
+
tokenStats[model] = { inputTokens: 0, outputTokens: 0, requests: 0 };
|
|
1711
|
+
}
|
|
1712
|
+
tokenStats[model].inputTokens += inputTokens;
|
|
1713
|
+
tokenStats[model].outputTokens += outputTokens;
|
|
1714
|
+
tokenStats[model].requests += 1;
|
|
1715
|
+
}
|
|
1716
|
+
function calculateCosts() {
|
|
1717
|
+
let totalInputTokens = 0;
|
|
1718
|
+
let totalOutputTokens = 0;
|
|
1719
|
+
let actualCostUsd = 0;
|
|
1720
|
+
const byModel = {};
|
|
1721
|
+
for (const [model, stats] of Object.entries(tokenStats)) {
|
|
1722
|
+
totalInputTokens += stats.inputTokens;
|
|
1723
|
+
totalOutputTokens += stats.outputTokens;
|
|
1724
|
+
const pricing = MODEL_PRICING2[model] || MODEL_PRICING2["default-cheap"];
|
|
1725
|
+
const cost = stats.inputTokens / 1e6 * pricing.input + stats.outputTokens / 1e6 * pricing.output;
|
|
1726
|
+
actualCostUsd += cost;
|
|
1727
|
+
byModel[model] = {
|
|
1728
|
+
inputTokens: stats.inputTokens,
|
|
1729
|
+
outputTokens: stats.outputTokens,
|
|
1730
|
+
costUsd: parseFloat(cost.toFixed(4))
|
|
1731
|
+
};
|
|
1732
|
+
}
|
|
1733
|
+
const opusPricing = MODEL_PRICING2["claude-opus-4-5-20250514"];
|
|
1734
|
+
const opusCostUsd = totalInputTokens / 1e6 * opusPricing.input + totalOutputTokens / 1e6 * opusPricing.output;
|
|
1735
|
+
const savingsUsd = opusCostUsd - actualCostUsd;
|
|
1736
|
+
const savingsPercent = opusCostUsd > 0 ? (savingsUsd / opusCostUsd * 100).toFixed(1) + "%" : "0%";
|
|
1737
|
+
return {
|
|
1738
|
+
totalInputTokens,
|
|
1739
|
+
totalOutputTokens,
|
|
1740
|
+
actualCostUsd: parseFloat(actualCostUsd.toFixed(4)),
|
|
1741
|
+
opusCostUsd: parseFloat(opusCostUsd.toFixed(4)),
|
|
1742
|
+
savingsUsd: parseFloat(savingsUsd.toFixed(4)),
|
|
1743
|
+
savingsPercent,
|
|
1744
|
+
byModel
|
|
1745
|
+
};
|
|
1746
|
+
}
|
|
1670
1747
|
var serverStartTime = 0;
|
|
1671
1748
|
var currentConfig = loadConfig();
|
|
1672
1749
|
var DEFAULT_ENDPOINTS = {
|
|
@@ -1731,13 +1808,17 @@ function extractPromptText(messages) {
|
|
|
1731
1808
|
return "";
|
|
1732
1809
|
}).join("\n");
|
|
1733
1810
|
}
|
|
1734
|
-
async function forwardToAnthropic(request, targetModel,
|
|
1811
|
+
async function forwardToAnthropic(request, targetModel, auth, betaHeaders) {
|
|
1735
1812
|
const anthropicBody = buildAnthropicBody(request, targetModel, false);
|
|
1736
1813
|
const headers = {
|
|
1737
1814
|
"Content-Type": "application/json",
|
|
1738
|
-
"x-api-key": apiKey,
|
|
1739
1815
|
"anthropic-version": "2023-06-01"
|
|
1740
1816
|
};
|
|
1817
|
+
if (auth.type === "max") {
|
|
1818
|
+
headers["Authorization"] = `Bearer ${auth.value}`;
|
|
1819
|
+
} else {
|
|
1820
|
+
headers["x-api-key"] = auth.value;
|
|
1821
|
+
}
|
|
1741
1822
|
if (betaHeaders) {
|
|
1742
1823
|
headers["anthropic-beta"] = betaHeaders;
|
|
1743
1824
|
}
|
|
@@ -1748,13 +1829,17 @@ async function forwardToAnthropic(request, targetModel, apiKey, betaHeaders) {
|
|
|
1748
1829
|
});
|
|
1749
1830
|
return response;
|
|
1750
1831
|
}
|
|
1751
|
-
async function forwardToAnthropicStream(request, targetModel,
|
|
1832
|
+
async function forwardToAnthropicStream(request, targetModel, auth, betaHeaders) {
|
|
1752
1833
|
const anthropicBody = buildAnthropicBody(request, targetModel, true);
|
|
1753
1834
|
const headers = {
|
|
1754
1835
|
"Content-Type": "application/json",
|
|
1755
|
-
"x-api-key": apiKey,
|
|
1756
1836
|
"anthropic-version": "2023-06-01"
|
|
1757
1837
|
};
|
|
1838
|
+
if (auth.type === "max") {
|
|
1839
|
+
headers["Authorization"] = `Bearer ${auth.value}`;
|
|
1840
|
+
} else {
|
|
1841
|
+
headers["x-api-key"] = auth.value;
|
|
1842
|
+
}
|
|
1758
1843
|
if (betaHeaders) {
|
|
1759
1844
|
headers["anthropic-beta"] = betaHeaders;
|
|
1760
1845
|
}
|
|
@@ -2286,6 +2371,7 @@ function convertAnthropicStreamEvent(eventType, eventData, messageId, model, too
|
|
|
2286
2371
|
return null;
|
|
2287
2372
|
}
|
|
2288
2373
|
}
|
|
2374
|
+
var lastStreamingUsage = null;
|
|
2289
2375
|
async function* convertAnthropicStream(response, model) {
|
|
2290
2376
|
const reader = response.body?.getReader();
|
|
2291
2377
|
if (!reader) {
|
|
@@ -2298,6 +2384,8 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2298
2384
|
currentToolIndex: 0,
|
|
2299
2385
|
tools: /* @__PURE__ */ new Map()
|
|
2300
2386
|
};
|
|
2387
|
+
let streamInputTokens = 0;
|
|
2388
|
+
let streamOutputTokens = 0;
|
|
2301
2389
|
try {
|
|
2302
2390
|
while (true) {
|
|
2303
2391
|
const { done, value } = await reader.read();
|
|
@@ -2315,6 +2403,17 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2315
2403
|
} else if (line === "" && eventType && eventData) {
|
|
2316
2404
|
try {
|
|
2317
2405
|
const parsed = JSON.parse(eventData);
|
|
2406
|
+
if (eventType === "message_start") {
|
|
2407
|
+
const msg = parsed["message"];
|
|
2408
|
+
if (msg?.usage?.input_tokens) {
|
|
2409
|
+
streamInputTokens = msg.usage.input_tokens;
|
|
2410
|
+
}
|
|
2411
|
+
} else if (eventType === "message_delta") {
|
|
2412
|
+
const usage = parsed["usage"];
|
|
2413
|
+
if (usage?.output_tokens) {
|
|
2414
|
+
streamOutputTokens = usage.output_tokens;
|
|
2415
|
+
}
|
|
2416
|
+
}
|
|
2318
2417
|
const converted = convertAnthropicStreamEvent(eventType, parsed, messageId, model, toolState);
|
|
2319
2418
|
if (converted) {
|
|
2320
2419
|
yield converted;
|
|
@@ -2326,6 +2425,7 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2326
2425
|
}
|
|
2327
2426
|
}
|
|
2328
2427
|
}
|
|
2428
|
+
lastStreamingUsage = { inputTokens: streamInputTokens, outputTokens: streamOutputTokens };
|
|
2329
2429
|
} finally {
|
|
2330
2430
|
reader.releaseLock();
|
|
2331
2431
|
}
|
|
@@ -2423,23 +2523,32 @@ async function startProxy(config = {}) {
|
|
|
2423
2523
|
}
|
|
2424
2524
|
if (req.method === "GET" && pathname === "/stats") {
|
|
2425
2525
|
const stats = relay.stats();
|
|
2426
|
-
const
|
|
2526
|
+
const costs = calculateCosts();
|
|
2427
2527
|
const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
|
|
2428
2528
|
const modelDistribution = {};
|
|
2429
2529
|
for (const [model, count] of Object.entries(modelCounts)) {
|
|
2530
|
+
const modelName = model.split("/")[1] || model;
|
|
2531
|
+
const tokenData = costs.byModel[modelName];
|
|
2430
2532
|
modelDistribution[model] = {
|
|
2431
2533
|
count,
|
|
2432
|
-
percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
|
|
2534
|
+
percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%",
|
|
2535
|
+
tokens: tokenData ? { input: tokenData.inputTokens, output: tokenData.outputTokens } : void 0,
|
|
2536
|
+
costUsd: tokenData?.costUsd
|
|
2433
2537
|
};
|
|
2434
2538
|
}
|
|
2435
2539
|
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2436
2540
|
res.end(JSON.stringify({
|
|
2437
2541
|
totalRuns,
|
|
2438
|
-
|
|
2439
|
-
|
|
2440
|
-
|
|
2441
|
-
|
|
2442
|
-
|
|
2542
|
+
tokens: {
|
|
2543
|
+
input: costs.totalInputTokens,
|
|
2544
|
+
output: costs.totalOutputTokens,
|
|
2545
|
+
total: costs.totalInputTokens + costs.totalOutputTokens
|
|
2546
|
+
},
|
|
2547
|
+
costs: {
|
|
2548
|
+
actualUsd: costs.actualCostUsd,
|
|
2549
|
+
opusBaselineUsd: costs.opusCostUsd,
|
|
2550
|
+
savingsUsd: costs.savingsUsd,
|
|
2551
|
+
savingsPercent: costs.savingsPercent
|
|
2443
2552
|
},
|
|
2444
2553
|
modelDistribution,
|
|
2445
2554
|
byTaskType: stats.byTaskType,
|
|
@@ -2560,12 +2669,24 @@ async function startProxy(config = {}) {
|
|
|
2560
2669
|
}
|
|
2561
2670
|
}
|
|
2562
2671
|
log(`Routing to: ${targetProvider}/${targetModel}`);
|
|
2563
|
-
|
|
2564
|
-
|
|
2565
|
-
if (
|
|
2566
|
-
|
|
2567
|
-
|
|
2568
|
-
|
|
2672
|
+
let apiKey;
|
|
2673
|
+
let anthropicAuth = null;
|
|
2674
|
+
if (targetProvider === "anthropic") {
|
|
2675
|
+
anthropicAuth = getAnthropicAuth(currentConfig, targetModel);
|
|
2676
|
+
if (!anthropicAuth) {
|
|
2677
|
+
res.writeHead(500, { "Content-Type": "application/json" });
|
|
2678
|
+
res.end(JSON.stringify({ error: "No Anthropic auth configured (set ANTHROPIC_API_KEY or config.auth.anthropicMaxToken)" }));
|
|
2679
|
+
return;
|
|
2680
|
+
}
|
|
2681
|
+
log(`Using ${anthropicAuth.type === "max" ? "MAX token" : "API key"} auth for ${targetModel}`);
|
|
2682
|
+
} else {
|
|
2683
|
+
const apiKeyEnv = DEFAULT_ENDPOINTS[targetProvider]?.apiKeyEnv ?? `${targetProvider.toUpperCase()}_API_KEY`;
|
|
2684
|
+
apiKey = process.env[apiKeyEnv];
|
|
2685
|
+
if (!apiKey) {
|
|
2686
|
+
res.writeHead(500, { "Content-Type": "application/json" });
|
|
2687
|
+
res.end(JSON.stringify({ error: `Missing ${apiKeyEnv} environment variable` }));
|
|
2688
|
+
return;
|
|
2689
|
+
}
|
|
2569
2690
|
}
|
|
2570
2691
|
const startTime = Date.now();
|
|
2571
2692
|
const betaHeaders = req.headers["anthropic-beta"];
|
|
@@ -2576,6 +2697,7 @@ async function startProxy(config = {}) {
|
|
|
2576
2697
|
targetProvider,
|
|
2577
2698
|
targetModel,
|
|
2578
2699
|
apiKey,
|
|
2700
|
+
anthropicAuth,
|
|
2579
2701
|
relay,
|
|
2580
2702
|
promptText,
|
|
2581
2703
|
taskType,
|
|
@@ -2592,6 +2714,7 @@ async function startProxy(config = {}) {
|
|
|
2592
2714
|
targetProvider,
|
|
2593
2715
|
targetModel,
|
|
2594
2716
|
apiKey,
|
|
2717
|
+
anthropicAuth,
|
|
2595
2718
|
relay,
|
|
2596
2719
|
promptText,
|
|
2597
2720
|
taskType,
|
|
@@ -2621,12 +2744,13 @@ async function startProxy(config = {}) {
|
|
|
2621
2744
|
});
|
|
2622
2745
|
});
|
|
2623
2746
|
}
|
|
2624
|
-
async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
|
|
2747
|
+
async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
|
|
2625
2748
|
let providerResponse;
|
|
2626
2749
|
try {
|
|
2627
2750
|
switch (targetProvider) {
|
|
2628
2751
|
case "anthropic":
|
|
2629
|
-
|
|
2752
|
+
if (!anthropicAuth) throw new Error("No Anthropic auth");
|
|
2753
|
+
providerResponse = await forwardToAnthropicStream(request, targetModel, anthropicAuth, betaHeaders);
|
|
2630
2754
|
break;
|
|
2631
2755
|
case "google":
|
|
2632
2756
|
providerResponse = await forwardToGeminiStream(request, targetModel, apiKey);
|
|
@@ -2680,6 +2804,11 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2680
2804
|
const durationMs = Date.now() - startTime;
|
|
2681
2805
|
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2682
2806
|
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2807
|
+
if (lastStreamingUsage && (lastStreamingUsage.inputTokens > 0 || lastStreamingUsage.outputTokens > 0)) {
|
|
2808
|
+
trackTokens(targetModel, lastStreamingUsage.inputTokens, lastStreamingUsage.outputTokens);
|
|
2809
|
+
log(`Tokens: ${lastStreamingUsage.inputTokens} in, ${lastStreamingUsage.outputTokens} out`);
|
|
2810
|
+
lastStreamingUsage = null;
|
|
2811
|
+
}
|
|
2683
2812
|
relay.run({
|
|
2684
2813
|
prompt: promptText.slice(0, 500),
|
|
2685
2814
|
taskType,
|
|
@@ -2704,13 +2833,14 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2704
2833
|
});
|
|
2705
2834
|
res.end();
|
|
2706
2835
|
}
|
|
2707
|
-
async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
|
|
2836
|
+
async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
|
|
2708
2837
|
let providerResponse;
|
|
2709
2838
|
let responseData;
|
|
2710
2839
|
try {
|
|
2711
2840
|
switch (targetProvider) {
|
|
2712
2841
|
case "anthropic": {
|
|
2713
|
-
|
|
2842
|
+
if (!anthropicAuth) throw new Error("No Anthropic auth");
|
|
2843
|
+
providerResponse = await forwardToAnthropic(request, targetModel, anthropicAuth, betaHeaders);
|
|
2714
2844
|
const rawData = await providerResponse.json();
|
|
2715
2845
|
if (!providerResponse.ok) {
|
|
2716
2846
|
res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
|
|
@@ -2770,6 +2900,11 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
2770
2900
|
const durationMs = Date.now() - startTime;
|
|
2771
2901
|
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2772
2902
|
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2903
|
+
const usage = responseData["usage"];
|
|
2904
|
+
if (usage?.prompt_tokens || usage?.completion_tokens) {
|
|
2905
|
+
trackTokens(targetModel, usage.prompt_tokens ?? 0, usage.completion_tokens ?? 0);
|
|
2906
|
+
log(`Tokens: ${usage.prompt_tokens ?? 0} in, ${usage.completion_tokens ?? 0} out`);
|
|
2907
|
+
}
|
|
2773
2908
|
try {
|
|
2774
2909
|
const runResult = await relay.run({
|
|
2775
2910
|
prompt: promptText.slice(0, 500),
|