@relayplane/proxy 0.1.9 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +123 -71
- package/dist/cli.js +98 -7
- package/dist/cli.js.map +1 -1
- package/dist/cli.mjs +98 -7
- package/dist/cli.mjs.map +1 -1
- package/dist/index.js +98 -7
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +98 -7
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -4,6 +4,19 @@
|
|
|
4
4
|
|
|
5
5
|
Intelligent AI model routing that cuts costs by 50-80% while maintaining quality.
|
|
6
6
|
|
|
7
|
+
> **Note:** Designed for standard API key users (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`). MAX subscription OAuth is not currently supported — MAX users should continue using their provider directly.
|
|
8
|
+
|
|
9
|
+
> ⚠️ **Cost Monitoring Required**
|
|
10
|
+
>
|
|
11
|
+
> RelayPlane routes requests to LLM providers using your API keys. **This incurs real costs.**
|
|
12
|
+
>
|
|
13
|
+
> - Set up billing alerts with your providers (Anthropic, OpenAI, etc.)
|
|
14
|
+
> - Monitor usage through your provider's dashboard
|
|
15
|
+
> - Use `/relayplane stats` or `curl localhost:3001/control/stats` to track usage
|
|
16
|
+
> - Start with test requests to understand routing behavior
|
|
17
|
+
>
|
|
18
|
+
> RelayPlane provides cost *optimization*, not cost *elimination*. You are responsible for monitoring your actual spending.
|
|
19
|
+
|
|
7
20
|
[](https://github.com/RelayPlane/proxy/actions/workflows/ci.yml)
|
|
8
21
|
[](https://www.npmjs.com/package/@relayplane/proxy)
|
|
9
22
|
[](https://opensource.org/licenses/MIT)
|
|
@@ -39,6 +52,23 @@ npx @relayplane/proxy stats --days 30
|
|
|
39
52
|
npx @relayplane/proxy --help
|
|
40
53
|
```
|
|
41
54
|
|
|
55
|
+
## OpenClaw Slash Commands
|
|
56
|
+
|
|
57
|
+
If you're using OpenClaw, these chat commands are available:
|
|
58
|
+
|
|
59
|
+
| Command | Description |
|
|
60
|
+
|---------|-------------|
|
|
61
|
+
| `/relayplane stats` | Show usage statistics and cost savings |
|
|
62
|
+
| `/relayplane status` | Show proxy health and configuration |
|
|
63
|
+
| `/relayplane switch <mode>` | Change routing mode (auto\|cost\|fast\|quality) |
|
|
64
|
+
| `/relayplane models` | List available routing models |
|
|
65
|
+
|
|
66
|
+
Example:
|
|
67
|
+
```
|
|
68
|
+
/relayplane stats
|
|
69
|
+
/relayplane switch cost
|
|
70
|
+
```
|
|
71
|
+
|
|
42
72
|
## Quick Start
|
|
43
73
|
|
|
44
74
|
### 1. Set your API keys
|
|
@@ -110,11 +140,11 @@ Unlike static routing rules, RelayPlane adapts to **your** usage patterns.
|
|
|
110
140
|
|
|
111
141
|
| Provider | Models | Streaming | Tools |
|
|
112
142
|
|----------|--------|-----------|-------|
|
|
113
|
-
| **Anthropic** | Claude
|
|
114
|
-
| **OpenAI** | GPT-
|
|
115
|
-
| **Google** | Gemini 2.0 Flash,
|
|
116
|
-
| **xAI** | Grok
|
|
117
|
-
| **Moonshot** | v1
|
|
143
|
+
| **Anthropic** | Claude 3.5 Haiku, Sonnet 4, Opus 4.5 | ✓ | ✓ |
|
|
144
|
+
| **OpenAI** | GPT-4o, GPT-4o-mini, GPT-4.1, o1, o3 | ✓ | ✓ |
|
|
145
|
+
| **Google** | Gemini 2.0 Flash, Gemini Pro | ✓ | ✓ |
|
|
146
|
+
| **xAI** | Grok (grok-*) | ✓ | ✓ |
|
|
147
|
+
| **Moonshot** | Moonshot v1 (8k, 32k, 128k) | ✓ | ✓ |
|
|
118
148
|
|
|
119
149
|
## Routing Modes
|
|
120
150
|
|
|
@@ -178,74 +208,71 @@ Options:
|
|
|
178
208
|
|
|
179
209
|
## REST API
|
|
180
210
|
|
|
181
|
-
The proxy exposes endpoints for stats and monitoring:
|
|
211
|
+
The proxy exposes control endpoints for stats and monitoring:
|
|
182
212
|
|
|
183
|
-
### `GET /
|
|
213
|
+
### `GET /control/status`
|
|
184
214
|
|
|
185
|
-
|
|
215
|
+
Proxy status and current configuration.
|
|
186
216
|
|
|
187
217
|
```bash
|
|
188
|
-
curl http://localhost:3001/
|
|
218
|
+
curl http://localhost:3001/control/status
|
|
189
219
|
```
|
|
190
220
|
|
|
191
221
|
```json
|
|
192
222
|
{
|
|
193
|
-
"
|
|
194
|
-
"
|
|
195
|
-
"
|
|
196
|
-
"providers": { "anthropic": true, "openai": true, "google": false },
|
|
197
|
-
"totalRuns": 142
|
|
223
|
+
"enabled": true,
|
|
224
|
+
"mode": "cascade",
|
|
225
|
+
"modelOverrides": {}
|
|
198
226
|
}
|
|
199
227
|
```
|
|
200
228
|
|
|
201
|
-
### `GET /stats`
|
|
229
|
+
### `GET /control/stats`
|
|
202
230
|
|
|
203
|
-
Aggregated statistics and
|
|
231
|
+
Aggregated statistics and routing counts.
|
|
204
232
|
|
|
205
233
|
```bash
|
|
206
|
-
curl http://localhost:3001/stats
|
|
234
|
+
curl http://localhost:3001/control/stats
|
|
207
235
|
```
|
|
208
236
|
|
|
209
237
|
```json
|
|
210
238
|
{
|
|
211
|
-
"
|
|
212
|
-
"
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
239
|
+
"uptimeMs": 3600000,
|
|
240
|
+
"uptimeFormatted": "60m 0s",
|
|
241
|
+
"totalRequests": 142,
|
|
242
|
+
"successfulRequests": 138,
|
|
243
|
+
"failedRequests": 4,
|
|
244
|
+
"successRate": "97.2%",
|
|
245
|
+
"avgLatencyMs": 1203,
|
|
246
|
+
"escalations": 12,
|
|
247
|
+
"routingCounts": {
|
|
248
|
+
"auto": 100,
|
|
249
|
+
"cost": 30,
|
|
250
|
+
"passthrough": 12
|
|
217
251
|
},
|
|
218
|
-
"
|
|
219
|
-
"anthropic/claude-3-5-haiku-latest":
|
|
220
|
-
"anthropic/claude-sonnet-4-20250514":
|
|
252
|
+
"modelCounts": {
|
|
253
|
+
"anthropic/claude-3-5-haiku-latest": 98,
|
|
254
|
+
"anthropic/claude-sonnet-4-20250514": 44
|
|
221
255
|
}
|
|
222
256
|
}
|
|
223
257
|
```
|
|
224
258
|
|
|
225
|
-
### `
|
|
259
|
+
### `POST /control/enable` / `POST /control/disable`
|
|
226
260
|
|
|
227
|
-
|
|
261
|
+
Enable or disable routing (passthrough mode when disabled).
|
|
228
262
|
|
|
229
263
|
```bash
|
|
230
|
-
curl
|
|
264
|
+
curl -X POST http://localhost:3001/control/enable
|
|
265
|
+
curl -X POST http://localhost:3001/control/disable
|
|
231
266
|
```
|
|
232
267
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
"confidence": 0.92,
|
|
242
|
-
"mode": "auto",
|
|
243
|
-
"durationMs": 1203,
|
|
244
|
-
"promptPreview": "Write a function that..."
|
|
245
|
-
}
|
|
246
|
-
],
|
|
247
|
-
"total": 142
|
|
248
|
-
}
|
|
268
|
+
### `POST /control/config`
|
|
269
|
+
|
|
270
|
+
Update configuration (hot-reload, merges with existing).
|
|
271
|
+
|
|
272
|
+
```bash
|
|
273
|
+
curl -X POST http://localhost:3001/control/config \
|
|
274
|
+
-H "Content-Type: application/json" \
|
|
275
|
+
-d '{"routing": {"mode": "cascade"}}'
|
|
249
276
|
```
|
|
250
277
|
|
|
251
278
|
## Configuration
|
|
@@ -254,46 +281,71 @@ RelayPlane creates a config file on first run at `~/.relayplane/config.json`:
|
|
|
254
281
|
|
|
255
282
|
```json
|
|
256
283
|
{
|
|
257
|
-
"
|
|
258
|
-
|
|
259
|
-
"
|
|
260
|
-
"
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
284
|
+
"enabled": true,
|
|
285
|
+
"routing": {
|
|
286
|
+
"mode": "cascade",
|
|
287
|
+
"cascade": {
|
|
288
|
+
"enabled": true,
|
|
289
|
+
"models": [
|
|
290
|
+
"claude-3-haiku-20240307",
|
|
291
|
+
"claude-3-5-sonnet-20241022",
|
|
292
|
+
"claude-3-opus-20240229"
|
|
293
|
+
],
|
|
294
|
+
"escalateOn": "uncertainty",
|
|
295
|
+
"maxEscalations": 1
|
|
296
|
+
},
|
|
297
|
+
"complexity": {
|
|
298
|
+
"enabled": true,
|
|
299
|
+
"simple": "claude-3-haiku-20240307",
|
|
300
|
+
"moderate": "claude-3-5-sonnet-20241022",
|
|
301
|
+
"complex": "claude-3-opus-20240229"
|
|
302
|
+
}
|
|
267
303
|
},
|
|
268
|
-
"
|
|
269
|
-
"
|
|
270
|
-
|
|
271
|
-
|
|
304
|
+
"reliability": {
|
|
305
|
+
"cooldowns": {
|
|
306
|
+
"enabled": true,
|
|
307
|
+
"allowedFails": 3,
|
|
308
|
+
"windowSeconds": 60,
|
|
309
|
+
"cooldownSeconds": 120
|
|
310
|
+
}
|
|
311
|
+
},
|
|
312
|
+
"modelOverrides": {}
|
|
272
313
|
}
|
|
273
314
|
```
|
|
274
315
|
|
|
275
316
|
**Edit and save — changes apply instantly** (hot-reload, no restart needed).
|
|
276
317
|
|
|
277
|
-
###
|
|
318
|
+
### Configuration Options
|
|
278
319
|
|
|
279
320
|
| Field | Description |
|
|
280
321
|
|-------|-------------|
|
|
281
|
-
| `
|
|
282
|
-
| `
|
|
283
|
-
| `
|
|
322
|
+
| `enabled` | Enable/disable routing (false = passthrough mode) |
|
|
323
|
+
| `routing.mode` | `"cascade"` or `"standard"` |
|
|
324
|
+
| `routing.cascade.models` | Ordered list of models to try (cheapest first) |
|
|
325
|
+
| `routing.cascade.escalateOn` | When to escalate: `"uncertainty"`, `"refusal"`, or `"error"` |
|
|
326
|
+
| `routing.complexity.simple/moderate/complex` | Models for each complexity level |
|
|
327
|
+
| `reliability.cooldowns` | Auto-disable failing providers temporarily |
|
|
328
|
+
| `modelOverrides` | Map input model names to different targets |
|
|
284
329
|
|
|
285
330
|
### Examples
|
|
286
331
|
|
|
287
|
-
|
|
332
|
+
Use GPT-4o for complex tasks:
|
|
288
333
|
```json
|
|
289
|
-
|
|
334
|
+
{
|
|
335
|
+
"routing": {
|
|
336
|
+
"complexity": {
|
|
337
|
+
"complex": "gpt-4o"
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
}
|
|
290
341
|
```
|
|
291
342
|
|
|
292
|
-
|
|
343
|
+
Override a specific model:
|
|
293
344
|
```json
|
|
294
|
-
|
|
295
|
-
"
|
|
296
|
-
|
|
345
|
+
{
|
|
346
|
+
"modelOverrides": {
|
|
347
|
+
"claude-3-opus": "claude-3-5-sonnet-20241022"
|
|
348
|
+
}
|
|
297
349
|
}
|
|
298
350
|
```
|
|
299
351
|
|
|
@@ -311,9 +363,9 @@ sqlite3 ~/.relayplane/data.db "SELECT * FROM routing_rules"
|
|
|
311
363
|
|
|
312
364
|
## Links
|
|
313
365
|
|
|
314
|
-
- [
|
|
366
|
+
- [RelayPlane Proxy](https://relayplane.com/integrations/openclaw)
|
|
315
367
|
- [GitHub](https://github.com/RelayPlane/proxy)
|
|
316
|
-
- [RelayPlane
|
|
368
|
+
- [RelayPlane](https://relayplane.com/)
|
|
317
369
|
|
|
318
370
|
## License
|
|
319
371
|
|
package/dist/cli.js
CHANGED
|
@@ -1710,6 +1710,63 @@ var VERSION = "0.1.9";
|
|
|
1710
1710
|
var recentRuns = [];
|
|
1711
1711
|
var MAX_RECENT_RUNS = 100;
|
|
1712
1712
|
var modelCounts = {};
|
|
1713
|
+
var tokenStats = {};
|
|
1714
|
+
var MODEL_PRICING2 = {
|
|
1715
|
+
// Anthropic
|
|
1716
|
+
"claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
|
|
1717
|
+
"claude-3-5-haiku-20241022": { input: 1, output: 5 },
|
|
1718
|
+
"claude-3-5-haiku-latest": { input: 1, output: 5 },
|
|
1719
|
+
"claude-3-5-sonnet-20241022": { input: 3, output: 15 },
|
|
1720
|
+
"claude-sonnet-4-20250514": { input: 3, output: 15 },
|
|
1721
|
+
"claude-3-opus-20240229": { input: 15, output: 75 },
|
|
1722
|
+
"claude-opus-4-5-20250514": { input: 15, output: 75 },
|
|
1723
|
+
// OpenAI
|
|
1724
|
+
"gpt-4o": { input: 2.5, output: 10 },
|
|
1725
|
+
"gpt-4o-mini": { input: 0.15, output: 0.6 },
|
|
1726
|
+
"gpt-4-turbo": { input: 10, output: 30 },
|
|
1727
|
+
// Defaults for unknown models
|
|
1728
|
+
"default-cheap": { input: 1, output: 5 },
|
|
1729
|
+
"default-expensive": { input: 15, output: 75 }
|
|
1730
|
+
};
|
|
1731
|
+
function trackTokens(model, inputTokens, outputTokens) {
|
|
1732
|
+
if (!tokenStats[model]) {
|
|
1733
|
+
tokenStats[model] = { inputTokens: 0, outputTokens: 0, requests: 0 };
|
|
1734
|
+
}
|
|
1735
|
+
tokenStats[model].inputTokens += inputTokens;
|
|
1736
|
+
tokenStats[model].outputTokens += outputTokens;
|
|
1737
|
+
tokenStats[model].requests += 1;
|
|
1738
|
+
}
|
|
1739
|
+
function calculateCosts() {
|
|
1740
|
+
let totalInputTokens = 0;
|
|
1741
|
+
let totalOutputTokens = 0;
|
|
1742
|
+
let actualCostUsd = 0;
|
|
1743
|
+
const byModel = {};
|
|
1744
|
+
for (const [model, stats] of Object.entries(tokenStats)) {
|
|
1745
|
+
totalInputTokens += stats.inputTokens;
|
|
1746
|
+
totalOutputTokens += stats.outputTokens;
|
|
1747
|
+
const pricing = MODEL_PRICING2[model] || MODEL_PRICING2["default-cheap"];
|
|
1748
|
+
const cost = stats.inputTokens / 1e6 * pricing.input + stats.outputTokens / 1e6 * pricing.output;
|
|
1749
|
+
actualCostUsd += cost;
|
|
1750
|
+
byModel[model] = {
|
|
1751
|
+
inputTokens: stats.inputTokens,
|
|
1752
|
+
outputTokens: stats.outputTokens,
|
|
1753
|
+
costUsd: parseFloat(cost.toFixed(4))
|
|
1754
|
+
};
|
|
1755
|
+
}
|
|
1756
|
+
const opusPricing = MODEL_PRICING2["claude-opus-4-5-20250514"];
|
|
1757
|
+
const opusCostUsd = totalInputTokens / 1e6 * opusPricing.input + totalOutputTokens / 1e6 * opusPricing.output;
|
|
1758
|
+
const savingsUsd = opusCostUsd - actualCostUsd;
|
|
1759
|
+
const savingsPercent = opusCostUsd > 0 ? (savingsUsd / opusCostUsd * 100).toFixed(1) + "%" : "0%";
|
|
1760
|
+
return {
|
|
1761
|
+
totalInputTokens,
|
|
1762
|
+
totalOutputTokens,
|
|
1763
|
+
actualCostUsd: parseFloat(actualCostUsd.toFixed(4)),
|
|
1764
|
+
opusCostUsd: parseFloat(opusCostUsd.toFixed(4)),
|
|
1765
|
+
savingsUsd: parseFloat(savingsUsd.toFixed(4)),
|
|
1766
|
+
savingsPercent,
|
|
1767
|
+
byModel
|
|
1768
|
+
};
|
|
1769
|
+
}
|
|
1713
1770
|
var serverStartTime = 0;
|
|
1714
1771
|
var currentConfig = loadConfig();
|
|
1715
1772
|
var DEFAULT_ENDPOINTS = {
|
|
@@ -2337,6 +2394,7 @@ function convertAnthropicStreamEvent(eventType, eventData, messageId, model, too
|
|
|
2337
2394
|
return null;
|
|
2338
2395
|
}
|
|
2339
2396
|
}
|
|
2397
|
+
var lastStreamingUsage = null;
|
|
2340
2398
|
async function* convertAnthropicStream(response, model) {
|
|
2341
2399
|
const reader = response.body?.getReader();
|
|
2342
2400
|
if (!reader) {
|
|
@@ -2349,6 +2407,8 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2349
2407
|
currentToolIndex: 0,
|
|
2350
2408
|
tools: /* @__PURE__ */ new Map()
|
|
2351
2409
|
};
|
|
2410
|
+
let streamInputTokens = 0;
|
|
2411
|
+
let streamOutputTokens = 0;
|
|
2352
2412
|
try {
|
|
2353
2413
|
while (true) {
|
|
2354
2414
|
const { done, value } = await reader.read();
|
|
@@ -2366,6 +2426,17 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2366
2426
|
} else if (line === "" && eventType && eventData) {
|
|
2367
2427
|
try {
|
|
2368
2428
|
const parsed = JSON.parse(eventData);
|
|
2429
|
+
if (eventType === "message_start") {
|
|
2430
|
+
const msg = parsed["message"];
|
|
2431
|
+
if (msg?.usage?.input_tokens) {
|
|
2432
|
+
streamInputTokens = msg.usage.input_tokens;
|
|
2433
|
+
}
|
|
2434
|
+
} else if (eventType === "message_delta") {
|
|
2435
|
+
const usage = parsed["usage"];
|
|
2436
|
+
if (usage?.output_tokens) {
|
|
2437
|
+
streamOutputTokens = usage.output_tokens;
|
|
2438
|
+
}
|
|
2439
|
+
}
|
|
2369
2440
|
const converted = convertAnthropicStreamEvent(eventType, parsed, messageId, model, toolState);
|
|
2370
2441
|
if (converted) {
|
|
2371
2442
|
yield converted;
|
|
@@ -2377,6 +2448,7 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2377
2448
|
}
|
|
2378
2449
|
}
|
|
2379
2450
|
}
|
|
2451
|
+
lastStreamingUsage = { inputTokens: streamInputTokens, outputTokens: streamOutputTokens };
|
|
2380
2452
|
} finally {
|
|
2381
2453
|
reader.releaseLock();
|
|
2382
2454
|
}
|
|
@@ -2474,23 +2546,32 @@ async function startProxy(config = {}) {
|
|
|
2474
2546
|
}
|
|
2475
2547
|
if (req.method === "GET" && pathname === "/stats") {
|
|
2476
2548
|
const stats = relay.stats();
|
|
2477
|
-
const
|
|
2549
|
+
const costs = calculateCosts();
|
|
2478
2550
|
const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
|
|
2479
2551
|
const modelDistribution = {};
|
|
2480
2552
|
for (const [model, count] of Object.entries(modelCounts)) {
|
|
2553
|
+
const modelName = model.split("/")[1] || model;
|
|
2554
|
+
const tokenData = costs.byModel[modelName];
|
|
2481
2555
|
modelDistribution[model] = {
|
|
2482
2556
|
count,
|
|
2483
|
-
percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
|
|
2557
|
+
percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%",
|
|
2558
|
+
tokens: tokenData ? { input: tokenData.inputTokens, output: tokenData.outputTokens } : void 0,
|
|
2559
|
+
costUsd: tokenData?.costUsd
|
|
2484
2560
|
};
|
|
2485
2561
|
}
|
|
2486
2562
|
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2487
2563
|
res.end(JSON.stringify({
|
|
2488
2564
|
totalRuns,
|
|
2489
|
-
|
|
2490
|
-
|
|
2491
|
-
|
|
2492
|
-
|
|
2493
|
-
|
|
2565
|
+
tokens: {
|
|
2566
|
+
input: costs.totalInputTokens,
|
|
2567
|
+
output: costs.totalOutputTokens,
|
|
2568
|
+
total: costs.totalInputTokens + costs.totalOutputTokens
|
|
2569
|
+
},
|
|
2570
|
+
costs: {
|
|
2571
|
+
actualUsd: costs.actualCostUsd,
|
|
2572
|
+
opusBaselineUsd: costs.opusCostUsd,
|
|
2573
|
+
savingsUsd: costs.savingsUsd,
|
|
2574
|
+
savingsPercent: costs.savingsPercent
|
|
2494
2575
|
},
|
|
2495
2576
|
modelDistribution,
|
|
2496
2577
|
byTaskType: stats.byTaskType,
|
|
@@ -2746,6 +2827,11 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2746
2827
|
const durationMs = Date.now() - startTime;
|
|
2747
2828
|
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2748
2829
|
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2830
|
+
if (lastStreamingUsage && (lastStreamingUsage.inputTokens > 0 || lastStreamingUsage.outputTokens > 0)) {
|
|
2831
|
+
trackTokens(targetModel, lastStreamingUsage.inputTokens, lastStreamingUsage.outputTokens);
|
|
2832
|
+
log(`Tokens: ${lastStreamingUsage.inputTokens} in, ${lastStreamingUsage.outputTokens} out`);
|
|
2833
|
+
lastStreamingUsage = null;
|
|
2834
|
+
}
|
|
2749
2835
|
relay.run({
|
|
2750
2836
|
prompt: promptText.slice(0, 500),
|
|
2751
2837
|
taskType,
|
|
@@ -2837,6 +2923,11 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
2837
2923
|
const durationMs = Date.now() - startTime;
|
|
2838
2924
|
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2839
2925
|
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2926
|
+
const usage = responseData["usage"];
|
|
2927
|
+
if (usage?.prompt_tokens || usage?.completion_tokens) {
|
|
2928
|
+
trackTokens(targetModel, usage.prompt_tokens ?? 0, usage.completion_tokens ?? 0);
|
|
2929
|
+
log(`Tokens: ${usage.prompt_tokens ?? 0} in, ${usage.completion_tokens ?? 0} out`);
|
|
2930
|
+
}
|
|
2840
2931
|
try {
|
|
2841
2932
|
const runResult = await relay.run({
|
|
2842
2933
|
prompt: promptText.slice(0, 500),
|