@relayplane/proxy 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +144 -43
- package/dist/cli.js +158 -23
- package/dist/cli.js.map +1 -1
- package/dist/cli.mjs +158 -23
- package/dist/cli.mjs.map +1 -1
- package/dist/index.d.mts +23 -0
- package/dist/index.d.ts +23 -0
- package/dist/index.js +158 -23
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +158 -23
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -1580,12 +1580,19 @@ var StrategySchema = z.object({
|
|
|
1580
1580
|
minConfidence: z.number().min(0).max(1).optional(),
|
|
1581
1581
|
fallback: z.string().optional()
|
|
1582
1582
|
});
|
|
1583
|
+
var AuthSchema = z.object({
|
|
1584
|
+
anthropicApiKey: z.string().optional(),
|
|
1585
|
+
anthropicMaxToken: z.string().optional(),
|
|
1586
|
+
useMaxForModels: z.array(z.string()).optional()
|
|
1587
|
+
// Default: ['opus']
|
|
1588
|
+
}).optional();
|
|
1583
1589
|
var ConfigSchema = z.object({
|
|
1584
1590
|
strategies: z.record(z.string(), StrategySchema).optional(),
|
|
1585
1591
|
defaults: z.object({
|
|
1586
1592
|
qualityModel: z.string().optional(),
|
|
1587
1593
|
costModel: z.string().optional()
|
|
1588
|
-
}).optional()
|
|
1594
|
+
}).optional(),
|
|
1595
|
+
auth: AuthSchema
|
|
1589
1596
|
});
|
|
1590
1597
|
var DEFAULT_CONFIG = {
|
|
1591
1598
|
strategies: {
|
|
@@ -1645,6 +1652,19 @@ function loadConfig() {
|
|
|
1645
1652
|
function getStrategy(config, taskType) {
|
|
1646
1653
|
return config.strategies?.[taskType] ?? null;
|
|
1647
1654
|
}
|
|
1655
|
+
function getAnthropicAuth(config, model) {
|
|
1656
|
+
const auth = config.auth;
|
|
1657
|
+
const useMaxForModels = auth?.useMaxForModels ?? ["opus"];
|
|
1658
|
+
const shouldUseMax = useMaxForModels.some((m) => model.toLowerCase().includes(m.toLowerCase()));
|
|
1659
|
+
if (shouldUseMax && auth?.anthropicMaxToken) {
|
|
1660
|
+
return { type: "max", value: auth.anthropicMaxToken };
|
|
1661
|
+
}
|
|
1662
|
+
const apiKey = auth?.anthropicApiKey ?? process.env["ANTHROPIC_API_KEY"];
|
|
1663
|
+
if (apiKey) {
|
|
1664
|
+
return { type: "apiKey", value: apiKey };
|
|
1665
|
+
}
|
|
1666
|
+
return null;
|
|
1667
|
+
}
|
|
1648
1668
|
function watchConfig(onChange) {
|
|
1649
1669
|
const configPath = getConfigPath();
|
|
1650
1670
|
const dir = path2.dirname(configPath);
|
|
@@ -1665,10 +1685,67 @@ function watchConfig(onChange) {
|
|
|
1665
1685
|
}
|
|
1666
1686
|
|
|
1667
1687
|
// src/proxy.ts
|
|
1668
|
-
var VERSION = "0.1.
|
|
1688
|
+
var VERSION = "0.1.9";
|
|
1669
1689
|
var recentRuns = [];
|
|
1670
1690
|
var MAX_RECENT_RUNS = 100;
|
|
1671
1691
|
var modelCounts = {};
|
|
1692
|
+
var tokenStats = {};
|
|
1693
|
+
var MODEL_PRICING2 = {
|
|
1694
|
+
// Anthropic
|
|
1695
|
+
"claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
|
|
1696
|
+
"claude-3-5-haiku-20241022": { input: 1, output: 5 },
|
|
1697
|
+
"claude-3-5-haiku-latest": { input: 1, output: 5 },
|
|
1698
|
+
"claude-3-5-sonnet-20241022": { input: 3, output: 15 },
|
|
1699
|
+
"claude-sonnet-4-20250514": { input: 3, output: 15 },
|
|
1700
|
+
"claude-3-opus-20240229": { input: 15, output: 75 },
|
|
1701
|
+
"claude-opus-4-5-20250514": { input: 15, output: 75 },
|
|
1702
|
+
// OpenAI
|
|
1703
|
+
"gpt-4o": { input: 2.5, output: 10 },
|
|
1704
|
+
"gpt-4o-mini": { input: 0.15, output: 0.6 },
|
|
1705
|
+
"gpt-4-turbo": { input: 10, output: 30 },
|
|
1706
|
+
// Defaults for unknown models
|
|
1707
|
+
"default-cheap": { input: 1, output: 5 },
|
|
1708
|
+
"default-expensive": { input: 15, output: 75 }
|
|
1709
|
+
};
|
|
1710
|
+
function trackTokens(model, inputTokens, outputTokens) {
|
|
1711
|
+
if (!tokenStats[model]) {
|
|
1712
|
+
tokenStats[model] = { inputTokens: 0, outputTokens: 0, requests: 0 };
|
|
1713
|
+
}
|
|
1714
|
+
tokenStats[model].inputTokens += inputTokens;
|
|
1715
|
+
tokenStats[model].outputTokens += outputTokens;
|
|
1716
|
+
tokenStats[model].requests += 1;
|
|
1717
|
+
}
|
|
1718
|
+
function calculateCosts() {
|
|
1719
|
+
let totalInputTokens = 0;
|
|
1720
|
+
let totalOutputTokens = 0;
|
|
1721
|
+
let actualCostUsd = 0;
|
|
1722
|
+
const byModel = {};
|
|
1723
|
+
for (const [model, stats] of Object.entries(tokenStats)) {
|
|
1724
|
+
totalInputTokens += stats.inputTokens;
|
|
1725
|
+
totalOutputTokens += stats.outputTokens;
|
|
1726
|
+
const pricing = MODEL_PRICING2[model] || MODEL_PRICING2["default-cheap"];
|
|
1727
|
+
const cost = stats.inputTokens / 1e6 * pricing.input + stats.outputTokens / 1e6 * pricing.output;
|
|
1728
|
+
actualCostUsd += cost;
|
|
1729
|
+
byModel[model] = {
|
|
1730
|
+
inputTokens: stats.inputTokens,
|
|
1731
|
+
outputTokens: stats.outputTokens,
|
|
1732
|
+
costUsd: parseFloat(cost.toFixed(4))
|
|
1733
|
+
};
|
|
1734
|
+
}
|
|
1735
|
+
const opusPricing = MODEL_PRICING2["claude-opus-4-5-20250514"];
|
|
1736
|
+
const opusCostUsd = totalInputTokens / 1e6 * opusPricing.input + totalOutputTokens / 1e6 * opusPricing.output;
|
|
1737
|
+
const savingsUsd = opusCostUsd - actualCostUsd;
|
|
1738
|
+
const savingsPercent = opusCostUsd > 0 ? (savingsUsd / opusCostUsd * 100).toFixed(1) + "%" : "0%";
|
|
1739
|
+
return {
|
|
1740
|
+
totalInputTokens,
|
|
1741
|
+
totalOutputTokens,
|
|
1742
|
+
actualCostUsd: parseFloat(actualCostUsd.toFixed(4)),
|
|
1743
|
+
opusCostUsd: parseFloat(opusCostUsd.toFixed(4)),
|
|
1744
|
+
savingsUsd: parseFloat(savingsUsd.toFixed(4)),
|
|
1745
|
+
savingsPercent,
|
|
1746
|
+
byModel
|
|
1747
|
+
};
|
|
1748
|
+
}
|
|
1672
1749
|
var serverStartTime = 0;
|
|
1673
1750
|
var currentConfig = loadConfig();
|
|
1674
1751
|
var DEFAULT_ENDPOINTS = {
|
|
@@ -1733,13 +1810,17 @@ function extractPromptText(messages) {
|
|
|
1733
1810
|
return "";
|
|
1734
1811
|
}).join("\n");
|
|
1735
1812
|
}
|
|
1736
|
-
async function forwardToAnthropic(request, targetModel,
|
|
1813
|
+
async function forwardToAnthropic(request, targetModel, auth, betaHeaders) {
|
|
1737
1814
|
const anthropicBody = buildAnthropicBody(request, targetModel, false);
|
|
1738
1815
|
const headers = {
|
|
1739
1816
|
"Content-Type": "application/json",
|
|
1740
|
-
"x-api-key": apiKey,
|
|
1741
1817
|
"anthropic-version": "2023-06-01"
|
|
1742
1818
|
};
|
|
1819
|
+
if (auth.type === "max") {
|
|
1820
|
+
headers["Authorization"] = `Bearer ${auth.value}`;
|
|
1821
|
+
} else {
|
|
1822
|
+
headers["x-api-key"] = auth.value;
|
|
1823
|
+
}
|
|
1743
1824
|
if (betaHeaders) {
|
|
1744
1825
|
headers["anthropic-beta"] = betaHeaders;
|
|
1745
1826
|
}
|
|
@@ -1750,13 +1831,17 @@ async function forwardToAnthropic(request, targetModel, apiKey, betaHeaders) {
|
|
|
1750
1831
|
});
|
|
1751
1832
|
return response;
|
|
1752
1833
|
}
|
|
1753
|
-
async function forwardToAnthropicStream(request, targetModel,
|
|
1834
|
+
async function forwardToAnthropicStream(request, targetModel, auth, betaHeaders) {
|
|
1754
1835
|
const anthropicBody = buildAnthropicBody(request, targetModel, true);
|
|
1755
1836
|
const headers = {
|
|
1756
1837
|
"Content-Type": "application/json",
|
|
1757
|
-
"x-api-key": apiKey,
|
|
1758
1838
|
"anthropic-version": "2023-06-01"
|
|
1759
1839
|
};
|
|
1840
|
+
if (auth.type === "max") {
|
|
1841
|
+
headers["Authorization"] = `Bearer ${auth.value}`;
|
|
1842
|
+
} else {
|
|
1843
|
+
headers["x-api-key"] = auth.value;
|
|
1844
|
+
}
|
|
1760
1845
|
if (betaHeaders) {
|
|
1761
1846
|
headers["anthropic-beta"] = betaHeaders;
|
|
1762
1847
|
}
|
|
@@ -2288,6 +2373,7 @@ function convertAnthropicStreamEvent(eventType, eventData, messageId, model, too
|
|
|
2288
2373
|
return null;
|
|
2289
2374
|
}
|
|
2290
2375
|
}
|
|
2376
|
+
var lastStreamingUsage = null;
|
|
2291
2377
|
async function* convertAnthropicStream(response, model) {
|
|
2292
2378
|
const reader = response.body?.getReader();
|
|
2293
2379
|
if (!reader) {
|
|
@@ -2300,6 +2386,8 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2300
2386
|
currentToolIndex: 0,
|
|
2301
2387
|
tools: /* @__PURE__ */ new Map()
|
|
2302
2388
|
};
|
|
2389
|
+
let streamInputTokens = 0;
|
|
2390
|
+
let streamOutputTokens = 0;
|
|
2303
2391
|
try {
|
|
2304
2392
|
while (true) {
|
|
2305
2393
|
const { done, value } = await reader.read();
|
|
@@ -2317,6 +2405,17 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2317
2405
|
} else if (line === "" && eventType && eventData) {
|
|
2318
2406
|
try {
|
|
2319
2407
|
const parsed = JSON.parse(eventData);
|
|
2408
|
+
if (eventType === "message_start") {
|
|
2409
|
+
const msg = parsed["message"];
|
|
2410
|
+
if (msg?.usage?.input_tokens) {
|
|
2411
|
+
streamInputTokens = msg.usage.input_tokens;
|
|
2412
|
+
}
|
|
2413
|
+
} else if (eventType === "message_delta") {
|
|
2414
|
+
const usage = parsed["usage"];
|
|
2415
|
+
if (usage?.output_tokens) {
|
|
2416
|
+
streamOutputTokens = usage.output_tokens;
|
|
2417
|
+
}
|
|
2418
|
+
}
|
|
2320
2419
|
const converted = convertAnthropicStreamEvent(eventType, parsed, messageId, model, toolState);
|
|
2321
2420
|
if (converted) {
|
|
2322
2421
|
yield converted;
|
|
@@ -2328,6 +2427,7 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2328
2427
|
}
|
|
2329
2428
|
}
|
|
2330
2429
|
}
|
|
2430
|
+
lastStreamingUsage = { inputTokens: streamInputTokens, outputTokens: streamOutputTokens };
|
|
2331
2431
|
} finally {
|
|
2332
2432
|
reader.releaseLock();
|
|
2333
2433
|
}
|
|
@@ -2425,23 +2525,32 @@ async function startProxy(config = {}) {
|
|
|
2425
2525
|
}
|
|
2426
2526
|
if (req.method === "GET" && pathname === "/stats") {
|
|
2427
2527
|
const stats = relay.stats();
|
|
2428
|
-
const
|
|
2528
|
+
const costs = calculateCosts();
|
|
2429
2529
|
const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
|
|
2430
2530
|
const modelDistribution = {};
|
|
2431
2531
|
for (const [model, count] of Object.entries(modelCounts)) {
|
|
2532
|
+
const modelName = model.split("/")[1] || model;
|
|
2533
|
+
const tokenData = costs.byModel[modelName];
|
|
2432
2534
|
modelDistribution[model] = {
|
|
2433
2535
|
count,
|
|
2434
|
-
percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
|
|
2536
|
+
percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%",
|
|
2537
|
+
tokens: tokenData ? { input: tokenData.inputTokens, output: tokenData.outputTokens } : void 0,
|
|
2538
|
+
costUsd: tokenData?.costUsd
|
|
2435
2539
|
};
|
|
2436
2540
|
}
|
|
2437
2541
|
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2438
2542
|
res.end(JSON.stringify({
|
|
2439
2543
|
totalRuns,
|
|
2440
|
-
|
|
2441
|
-
|
|
2442
|
-
|
|
2443
|
-
|
|
2444
|
-
|
|
2544
|
+
tokens: {
|
|
2545
|
+
input: costs.totalInputTokens,
|
|
2546
|
+
output: costs.totalOutputTokens,
|
|
2547
|
+
total: costs.totalInputTokens + costs.totalOutputTokens
|
|
2548
|
+
},
|
|
2549
|
+
costs: {
|
|
2550
|
+
actualUsd: costs.actualCostUsd,
|
|
2551
|
+
opusBaselineUsd: costs.opusCostUsd,
|
|
2552
|
+
savingsUsd: costs.savingsUsd,
|
|
2553
|
+
savingsPercent: costs.savingsPercent
|
|
2445
2554
|
},
|
|
2446
2555
|
modelDistribution,
|
|
2447
2556
|
byTaskType: stats.byTaskType,
|
|
@@ -2562,12 +2671,24 @@ async function startProxy(config = {}) {
|
|
|
2562
2671
|
}
|
|
2563
2672
|
}
|
|
2564
2673
|
log(`Routing to: ${targetProvider}/${targetModel}`);
|
|
2565
|
-
|
|
2566
|
-
|
|
2567
|
-
if (
|
|
2568
|
-
|
|
2569
|
-
|
|
2570
|
-
|
|
2674
|
+
let apiKey;
|
|
2675
|
+
let anthropicAuth = null;
|
|
2676
|
+
if (targetProvider === "anthropic") {
|
|
2677
|
+
anthropicAuth = getAnthropicAuth(currentConfig, targetModel);
|
|
2678
|
+
if (!anthropicAuth) {
|
|
2679
|
+
res.writeHead(500, { "Content-Type": "application/json" });
|
|
2680
|
+
res.end(JSON.stringify({ error: "No Anthropic auth configured (set ANTHROPIC_API_KEY or config.auth.anthropicMaxToken)" }));
|
|
2681
|
+
return;
|
|
2682
|
+
}
|
|
2683
|
+
log(`Using ${anthropicAuth.type === "max" ? "MAX token" : "API key"} auth for ${targetModel}`);
|
|
2684
|
+
} else {
|
|
2685
|
+
const apiKeyEnv = DEFAULT_ENDPOINTS[targetProvider]?.apiKeyEnv ?? `${targetProvider.toUpperCase()}_API_KEY`;
|
|
2686
|
+
apiKey = process.env[apiKeyEnv];
|
|
2687
|
+
if (!apiKey) {
|
|
2688
|
+
res.writeHead(500, { "Content-Type": "application/json" });
|
|
2689
|
+
res.end(JSON.stringify({ error: `Missing ${apiKeyEnv} environment variable` }));
|
|
2690
|
+
return;
|
|
2691
|
+
}
|
|
2571
2692
|
}
|
|
2572
2693
|
const startTime = Date.now();
|
|
2573
2694
|
const betaHeaders = req.headers["anthropic-beta"];
|
|
@@ -2578,6 +2699,7 @@ async function startProxy(config = {}) {
|
|
|
2578
2699
|
targetProvider,
|
|
2579
2700
|
targetModel,
|
|
2580
2701
|
apiKey,
|
|
2702
|
+
anthropicAuth,
|
|
2581
2703
|
relay,
|
|
2582
2704
|
promptText,
|
|
2583
2705
|
taskType,
|
|
@@ -2594,6 +2716,7 @@ async function startProxy(config = {}) {
|
|
|
2594
2716
|
targetProvider,
|
|
2595
2717
|
targetModel,
|
|
2596
2718
|
apiKey,
|
|
2719
|
+
anthropicAuth,
|
|
2597
2720
|
relay,
|
|
2598
2721
|
promptText,
|
|
2599
2722
|
taskType,
|
|
@@ -2623,12 +2746,13 @@ async function startProxy(config = {}) {
|
|
|
2623
2746
|
});
|
|
2624
2747
|
});
|
|
2625
2748
|
}
|
|
2626
|
-
async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
|
|
2749
|
+
async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
|
|
2627
2750
|
let providerResponse;
|
|
2628
2751
|
try {
|
|
2629
2752
|
switch (targetProvider) {
|
|
2630
2753
|
case "anthropic":
|
|
2631
|
-
|
|
2754
|
+
if (!anthropicAuth) throw new Error("No Anthropic auth");
|
|
2755
|
+
providerResponse = await forwardToAnthropicStream(request, targetModel, anthropicAuth, betaHeaders);
|
|
2632
2756
|
break;
|
|
2633
2757
|
case "google":
|
|
2634
2758
|
providerResponse = await forwardToGeminiStream(request, targetModel, apiKey);
|
|
@@ -2682,6 +2806,11 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2682
2806
|
const durationMs = Date.now() - startTime;
|
|
2683
2807
|
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2684
2808
|
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2809
|
+
if (lastStreamingUsage && (lastStreamingUsage.inputTokens > 0 || lastStreamingUsage.outputTokens > 0)) {
|
|
2810
|
+
trackTokens(targetModel, lastStreamingUsage.inputTokens, lastStreamingUsage.outputTokens);
|
|
2811
|
+
log(`Tokens: ${lastStreamingUsage.inputTokens} in, ${lastStreamingUsage.outputTokens} out`);
|
|
2812
|
+
lastStreamingUsage = null;
|
|
2813
|
+
}
|
|
2685
2814
|
relay.run({
|
|
2686
2815
|
prompt: promptText.slice(0, 500),
|
|
2687
2816
|
taskType,
|
|
@@ -2706,13 +2835,14 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2706
2835
|
});
|
|
2707
2836
|
res.end();
|
|
2708
2837
|
}
|
|
2709
|
-
async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
|
|
2838
|
+
async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
|
|
2710
2839
|
let providerResponse;
|
|
2711
2840
|
let responseData;
|
|
2712
2841
|
try {
|
|
2713
2842
|
switch (targetProvider) {
|
|
2714
2843
|
case "anthropic": {
|
|
2715
|
-
|
|
2844
|
+
if (!anthropicAuth) throw new Error("No Anthropic auth");
|
|
2845
|
+
providerResponse = await forwardToAnthropic(request, targetModel, anthropicAuth, betaHeaders);
|
|
2716
2846
|
const rawData = await providerResponse.json();
|
|
2717
2847
|
if (!providerResponse.ok) {
|
|
2718
2848
|
res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
|
|
@@ -2772,6 +2902,11 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
2772
2902
|
const durationMs = Date.now() - startTime;
|
|
2773
2903
|
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2774
2904
|
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2905
|
+
const usage = responseData["usage"];
|
|
2906
|
+
if (usage?.prompt_tokens || usage?.completion_tokens) {
|
|
2907
|
+
trackTokens(targetModel, usage.prompt_tokens ?? 0, usage.completion_tokens ?? 0);
|
|
2908
|
+
log(`Tokens: ${usage.prompt_tokens ?? 0} in, ${usage.completion_tokens ?? 0} out`);
|
|
2909
|
+
}
|
|
2775
2910
|
try {
|
|
2776
2911
|
const runResult = await relay.run({
|
|
2777
2912
|
prompt: promptText.slice(0, 500),
|