agent-duelist 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,2162 @@
1
+ // src/scorers/latency.ts
2
+ var MIN_MS = 500;
3
+ var MAX_MS = 1e4;
4
+ var latencyScorer = ({ result }) => {
5
+ const clamped = Math.max(MIN_MS, Math.min(MAX_MS, result.latencyMs));
6
+ const value = 1 - (clamped - MIN_MS) / (MAX_MS - MIN_MS);
7
+ return {
8
+ name: "latency",
9
+ value: Math.round(value * 100) / 100,
10
+ details: { ms: result.latencyMs }
11
+ };
12
+ };
13
+
14
+ // src/pricing/catalog.json
15
+ var catalog_default = {
16
+ _meta: {
17
+ source: "OpenRouter API \u2014 all providers (https://openrouter.ai/api/v1/models)",
18
+ updatedAt: "2026-02-28",
19
+ unit: "USD per token"
20
+ },
21
+ models: {
22
+ "ai21/jamba-large-1.7": {
23
+ inputPerToken: 2e-6,
24
+ outputPerToken: 8e-6
25
+ },
26
+ "aion-labs/aion-1.0": {
27
+ inputPerToken: 4e-6,
28
+ outputPerToken: 8e-6
29
+ },
30
+ "aion-labs/aion-1.0-mini": {
31
+ inputPerToken: 7e-7,
32
+ outputPerToken: 14e-7
33
+ },
34
+ "aion-labs/aion-2.0": {
35
+ inputPerToken: 8e-7,
36
+ outputPerToken: 16e-7
37
+ },
38
+ "aion-labs/aion-rp-llama-3.1-8b": {
39
+ inputPerToken: 8e-7,
40
+ outputPerToken: 16e-7
41
+ },
42
+ "alfredpros/codellama-7b-instruct-solidity": {
43
+ inputPerToken: 8e-7,
44
+ outputPerToken: 12e-7
45
+ },
46
+ "alibaba/tongyi-deepresearch-30b-a3b": {
47
+ inputPerToken: 9e-8,
48
+ outputPerToken: 45e-8
49
+ },
50
+ "allenai/molmo-2-8b": {
51
+ inputPerToken: 2e-7,
52
+ outputPerToken: 2e-7
53
+ },
54
+ "allenai/olmo-2-0325-32b-instruct": {
55
+ inputPerToken: 5e-8,
56
+ outputPerToken: 2e-7
57
+ },
58
+ "allenai/olmo-3-32b-think": {
59
+ inputPerToken: 15e-8,
60
+ outputPerToken: 5e-7
61
+ },
62
+ "allenai/olmo-3-7b-instruct": {
63
+ inputPerToken: 1e-7,
64
+ outputPerToken: 2e-7
65
+ },
66
+ "allenai/olmo-3-7b-think": {
67
+ inputPerToken: 12e-8,
68
+ outputPerToken: 2e-7
69
+ },
70
+ "allenai/olmo-3.1-32b-instruct": {
71
+ inputPerToken: 2e-7,
72
+ outputPerToken: 6e-7
73
+ },
74
+ "alpindale/goliath-120b": {
75
+ inputPerToken: 375e-8,
76
+ outputPerToken: 75e-7
77
+ },
78
+ "amazon/nova-2-lite-v1": {
79
+ inputPerToken: 3e-7,
80
+ outputPerToken: 25e-7
81
+ },
82
+ "amazon/nova-lite-v1": {
83
+ inputPerToken: 6e-8,
84
+ outputPerToken: 24e-8
85
+ },
86
+ "amazon/nova-micro-v1": {
87
+ inputPerToken: 35e-9,
88
+ outputPerToken: 14e-8
89
+ },
90
+ "amazon/nova-premier-v1": {
91
+ inputPerToken: 25e-7,
92
+ outputPerToken: 125e-7
93
+ },
94
+ "amazon/nova-pro-v1": {
95
+ inputPerToken: 8e-7,
96
+ outputPerToken: 32e-7
97
+ },
98
+ "anthracite-org/magnum-v4-72b": {
99
+ inputPerToken: 3e-6,
100
+ outputPerToken: 5e-6
101
+ },
102
+ "anthropic/claude-3-haiku": {
103
+ inputPerToken: 25e-8,
104
+ outputPerToken: 125e-8
105
+ },
106
+ "anthropic/claude-3.5-haiku": {
107
+ inputPerToken: 8e-7,
108
+ outputPerToken: 4e-6
109
+ },
110
+ "anthropic/claude-3.5-sonnet": {
111
+ inputPerToken: 6e-6,
112
+ outputPerToken: 3e-5
113
+ },
114
+ "anthropic/claude-3.7-sonnet": {
115
+ inputPerToken: 3e-6,
116
+ outputPerToken: 15e-6
117
+ },
118
+ "anthropic/claude-3.7-sonnet:thinking": {
119
+ inputPerToken: 3e-6,
120
+ outputPerToken: 15e-6
121
+ },
122
+ "anthropic/claude-haiku-4.5": {
123
+ inputPerToken: 1e-6,
124
+ outputPerToken: 5e-6
125
+ },
126
+ "anthropic/claude-opus-4": {
127
+ inputPerToken: 15e-6,
128
+ outputPerToken: 75e-6
129
+ },
130
+ "anthropic/claude-opus-4.1": {
131
+ inputPerToken: 15e-6,
132
+ outputPerToken: 75e-6
133
+ },
134
+ "anthropic/claude-opus-4.5": {
135
+ inputPerToken: 5e-6,
136
+ outputPerToken: 25e-6
137
+ },
138
+ "anthropic/claude-opus-4.6": {
139
+ inputPerToken: 5e-6,
140
+ outputPerToken: 25e-6
141
+ },
142
+ "anthropic/claude-sonnet-4": {
143
+ inputPerToken: 3e-6,
144
+ outputPerToken: 15e-6
145
+ },
146
+ "anthropic/claude-sonnet-4.5": {
147
+ inputPerToken: 3e-6,
148
+ outputPerToken: 15e-6
149
+ },
150
+ "anthropic/claude-sonnet-4.6": {
151
+ inputPerToken: 3e-6,
152
+ outputPerToken: 15e-6
153
+ },
154
+ "arcee-ai/coder-large": {
155
+ inputPerToken: 5e-7,
156
+ outputPerToken: 8e-7
157
+ },
158
+ "arcee-ai/maestro-reasoning": {
159
+ inputPerToken: 9e-7,
160
+ outputPerToken: 33e-7
161
+ },
162
+ "arcee-ai/spotlight": {
163
+ inputPerToken: 18e-8,
164
+ outputPerToken: 18e-8
165
+ },
166
+ "arcee-ai/trinity-mini": {
167
+ inputPerToken: 45e-9,
168
+ outputPerToken: 15e-8
169
+ },
170
+ "arcee-ai/virtuoso-large": {
171
+ inputPerToken: 75e-8,
172
+ outputPerToken: 12e-7
173
+ },
174
+ "baidu/ernie-4.5-21b-a3b": {
175
+ inputPerToken: 7e-8,
176
+ outputPerToken: 28e-8
177
+ },
178
+ "baidu/ernie-4.5-21b-a3b-thinking": {
179
+ inputPerToken: 7e-8,
180
+ outputPerToken: 28e-8
181
+ },
182
+ "baidu/ernie-4.5-300b-a47b": {
183
+ inputPerToken: 28e-8,
184
+ outputPerToken: 11e-7
185
+ },
186
+ "baidu/ernie-4.5-vl-28b-a3b": {
187
+ inputPerToken: 14e-8,
188
+ outputPerToken: 56e-8
189
+ },
190
+ "baidu/ernie-4.5-vl-424b-a47b": {
191
+ inputPerToken: 42e-8,
192
+ outputPerToken: 125e-8
193
+ },
194
+ "bytedance/seed-1.6": {
195
+ inputPerToken: 25e-8,
196
+ outputPerToken: 2e-6
197
+ },
198
+ "bytedance/seed-1.6-flash": {
199
+ inputPerToken: 75e-9,
200
+ outputPerToken: 3e-7
201
+ },
202
+ "bytedance/seed-2.0-mini": {
203
+ inputPerToken: 1e-7,
204
+ outputPerToken: 4e-7
205
+ },
206
+ "bytedance/ui-tars-1.5-7b": {
207
+ inputPerToken: 1e-7,
208
+ outputPerToken: 2e-7
209
+ },
210
+ "cohere/command-a": {
211
+ inputPerToken: 25e-7,
212
+ outputPerToken: 1e-5
213
+ },
214
+ "cohere/command-r-08-2024": {
215
+ inputPerToken: 15e-8,
216
+ outputPerToken: 6e-7
217
+ },
218
+ "cohere/command-r-plus-08-2024": {
219
+ inputPerToken: 25e-7,
220
+ outputPerToken: 1e-5
221
+ },
222
+ "cohere/command-r7b-12-2024": {
223
+ inputPerToken: 375e-10,
224
+ outputPerToken: 15e-8
225
+ },
226
+ "deepcogito/cogito-v2.1-671b": {
227
+ inputPerToken: 125e-8,
228
+ outputPerToken: 125e-8
229
+ },
230
+ "deepseek/deepseek-chat": {
231
+ inputPerToken: 32e-8,
232
+ outputPerToken: 89e-8
233
+ },
234
+ "deepseek/deepseek-chat-v3-0324": {
235
+ inputPerToken: 2e-7,
236
+ outputPerToken: 77e-8
237
+ },
238
+ "deepseek/deepseek-chat-v3.1": {
239
+ inputPerToken: 15e-8,
240
+ outputPerToken: 75e-8
241
+ },
242
+ "deepseek/deepseek-r1": {
243
+ inputPerToken: 7e-7,
244
+ outputPerToken: 25e-7
245
+ },
246
+ "deepseek/deepseek-r1-0528": {
247
+ inputPerToken: 45e-8,
248
+ outputPerToken: 215e-8
249
+ },
250
+ "deepseek/deepseek-r1-distill-llama-70b": {
251
+ inputPerToken: 7e-7,
252
+ outputPerToken: 8e-7
253
+ },
254
+ "deepseek/deepseek-r1-distill-qwen-32b": {
255
+ inputPerToken: 29e-8,
256
+ outputPerToken: 29e-8
257
+ },
258
+ "deepseek/deepseek-v3": {
259
+ inputPerToken: 3e-7,
260
+ outputPerToken: 88e-8
261
+ },
262
+ "deepseek/deepseek-v3.1-terminus": {
263
+ inputPerToken: 21e-8,
264
+ outputPerToken: 79e-8
265
+ },
266
+ "deepseek/deepseek-v3.1-terminus:exacto": {
267
+ inputPerToken: 21e-8,
268
+ outputPerToken: 79e-8
269
+ },
270
+ "deepseek/deepseek-v3.2": {
271
+ inputPerToken: 25e-8,
272
+ outputPerToken: 4e-7
273
+ },
274
+ "deepseek/deepseek-v3.2-exp": {
275
+ inputPerToken: 27e-8,
276
+ outputPerToken: 41e-8
277
+ },
278
+ "deepseek/deepseek-v3.2-speciale": {
279
+ inputPerToken: 4e-7,
280
+ outputPerToken: 12e-7
281
+ },
282
+ "eleutherai/llemma_7b": {
283
+ inputPerToken: 8e-7,
284
+ outputPerToken: 12e-7
285
+ },
286
+ "essentialai/rnj-1-instruct": {
287
+ inputPerToken: 15e-8,
288
+ outputPerToken: 15e-8
289
+ },
290
+ "google/gemini-2.0-flash": {
291
+ inputPerToken: 1e-7,
292
+ outputPerToken: 4e-7
293
+ },
294
+ "google/gemini-2.0-flash-001": {
295
+ inputPerToken: 1e-7,
296
+ outputPerToken: 4e-7
297
+ },
298
+ "google/gemini-2.0-flash-lite-001": {
299
+ inputPerToken: 75e-9,
300
+ outputPerToken: 3e-7
301
+ },
302
+ "google/gemini-2.5-flash": {
303
+ inputPerToken: 3e-7,
304
+ outputPerToken: 25e-7
305
+ },
306
+ "google/gemini-2.5-flash-image": {
307
+ inputPerToken: 3e-7,
308
+ outputPerToken: 25e-7
309
+ },
310
+ "google/gemini-2.5-flash-lite": {
311
+ inputPerToken: 1e-7,
312
+ outputPerToken: 4e-7
313
+ },
314
+ "google/gemini-2.5-flash-lite-preview-09-2025": {
315
+ inputPerToken: 1e-7,
316
+ outputPerToken: 4e-7
317
+ },
318
+ "google/gemini-2.5-pro": {
319
+ inputPerToken: 125e-8,
320
+ outputPerToken: 1e-5
321
+ },
322
+ "google/gemini-2.5-pro-preview": {
323
+ inputPerToken: 125e-8,
324
+ outputPerToken: 1e-5
325
+ },
326
+ "google/gemini-2.5-pro-preview-05-06": {
327
+ inputPerToken: 125e-8,
328
+ outputPerToken: 1e-5
329
+ },
330
+ "google/gemini-3-flash-preview": {
331
+ inputPerToken: 5e-7,
332
+ outputPerToken: 3e-6
333
+ },
334
+ "google/gemini-3-pro-image-preview": {
335
+ inputPerToken: 2e-6,
336
+ outputPerToken: 12e-6
337
+ },
338
+ "google/gemini-3-pro-preview": {
339
+ inputPerToken: 2e-6,
340
+ outputPerToken: 12e-6
341
+ },
342
+ "google/gemini-3.1-flash-image-preview": {
343
+ inputPerToken: 25e-8,
344
+ outputPerToken: 15e-7
345
+ },
346
+ "google/gemini-3.1-pro-preview": {
347
+ inputPerToken: 2e-6,
348
+ outputPerToken: 12e-6
349
+ },
350
+ "google/gemini-3.1-pro-preview-customtools": {
351
+ inputPerToken: 2e-6,
352
+ outputPerToken: 12e-6
353
+ },
354
+ "google/gemma-2-27b-it": {
355
+ inputPerToken: 65e-8,
356
+ outputPerToken: 65e-8
357
+ },
358
+ "google/gemma-2-9b-it": {
359
+ inputPerToken: 3e-8,
360
+ outputPerToken: 9e-8
361
+ },
362
+ "google/gemma-3-12b-it": {
363
+ inputPerToken: 4e-8,
364
+ outputPerToken: 13e-8
365
+ },
366
+ "google/gemma-3-27b-it": {
367
+ inputPerToken: 4e-8,
368
+ outputPerToken: 15e-8
369
+ },
370
+ "google/gemma-3-4b-it": {
371
+ inputPerToken: 4e-8,
372
+ outputPerToken: 8e-8
373
+ },
374
+ "google/gemma-3n-e4b-it": {
375
+ inputPerToken: 2e-8,
376
+ outputPerToken: 4e-8
377
+ },
378
+ "gryphe/mythomax-l2-13b": {
379
+ inputPerToken: 6e-8,
380
+ outputPerToken: 6e-8
381
+ },
382
+ "ibm-granite/granite-4.0-h-micro": {
383
+ inputPerToken: 17e-9,
384
+ outputPerToken: 11e-8
385
+ },
386
+ "inception/mercury": {
387
+ inputPerToken: 25e-8,
388
+ outputPerToken: 1e-6
389
+ },
390
+ "inception/mercury-coder": {
391
+ inputPerToken: 25e-8,
392
+ outputPerToken: 1e-6
393
+ },
394
+ "inflection/inflection-3-pi": {
395
+ inputPerToken: 25e-7,
396
+ outputPerToken: 1e-5
397
+ },
398
+ "inflection/inflection-3-productivity": {
399
+ inputPerToken: 25e-7,
400
+ outputPerToken: 1e-5
401
+ },
402
+ "kwaipilot/kat-coder-pro": {
403
+ inputPerToken: 207e-9,
404
+ outputPerToken: 828e-9
405
+ },
406
+ "liquid/lfm-2-24b-a2b": {
407
+ inputPerToken: 3e-8,
408
+ outputPerToken: 12e-8
409
+ },
410
+ "liquid/lfm-2.2-6b": {
411
+ inputPerToken: 1e-8,
412
+ outputPerToken: 2e-8
413
+ },
414
+ "liquid/lfm2-8b-a1b": {
415
+ inputPerToken: 1e-8,
416
+ outputPerToken: 2e-8
417
+ },
418
+ "mancer/weaver": {
419
+ inputPerToken: 75e-8,
420
+ outputPerToken: 1e-6
421
+ },
422
+ "meituan/longcat-flash-chat": {
423
+ inputPerToken: 2e-7,
424
+ outputPerToken: 8e-7
425
+ },
426
+ "meta/llama-3-70b-instruct": {
427
+ inputPerToken: 51e-8,
428
+ outputPerToken: 74e-8
429
+ },
430
+ "meta/llama-3-8b-instruct": {
431
+ inputPerToken: 3e-8,
432
+ outputPerToken: 4e-8
433
+ },
434
+ "meta/llama-3.1-405b": {
435
+ inputPerToken: 4e-6,
436
+ outputPerToken: 4e-6
437
+ },
438
+ "meta/llama-3.1-405b-instruct": {
439
+ inputPerToken: 4e-6,
440
+ outputPerToken: 4e-6
441
+ },
442
+ "meta/llama-3.1-70b-instruct": {
443
+ inputPerToken: 4e-7,
444
+ outputPerToken: 4e-7
445
+ },
446
+ "meta/llama-3.1-8b-instruct": {
447
+ inputPerToken: 2e-8,
448
+ outputPerToken: 5e-8
449
+ },
450
+ "meta/llama-3.2-11b-vision-instruct": {
451
+ inputPerToken: 49e-9,
452
+ outputPerToken: 49e-9
453
+ },
454
+ "meta/llama-3.2-1b-instruct": {
455
+ inputPerToken: 27e-9,
456
+ outputPerToken: 2e-7
457
+ },
458
+ "meta/llama-3.2-3b-instruct": {
459
+ inputPerToken: 2e-8,
460
+ outputPerToken: 2e-8
461
+ },
462
+ "meta/llama-3.3-70b": {
463
+ inputPerToken: 12e-8,
464
+ outputPerToken: 3e-7
465
+ },
466
+ "meta/llama-3.3-70b-instruct": {
467
+ inputPerToken: 1e-7,
468
+ outputPerToken: 32e-8
469
+ },
470
+ "meta/llama-4-maverick": {
471
+ inputPerToken: 15e-8,
472
+ outputPerToken: 6e-7
473
+ },
474
+ "meta/llama-4-scout": {
475
+ inputPerToken: 8e-8,
476
+ outputPerToken: 3e-7
477
+ },
478
+ "meta/llama-guard-2-8b": {
479
+ inputPerToken: 2e-7,
480
+ outputPerToken: 2e-7
481
+ },
482
+ "meta/llama-guard-3-8b": {
483
+ inputPerToken: 2e-8,
484
+ outputPerToken: 6e-8
485
+ },
486
+ "meta/llama-guard-4-12b": {
487
+ inputPerToken: 18e-8,
488
+ outputPerToken: 18e-8
489
+ },
490
+ "microsoft/phi-4": {
491
+ inputPerToken: 6e-8,
492
+ outputPerToken: 14e-8
493
+ },
494
+ "microsoft/wizardlm-2-8x22b": {
495
+ inputPerToken: 62e-8,
496
+ outputPerToken: 62e-8
497
+ },
498
+ "minimax/minimax-01": {
499
+ inputPerToken: 2e-7,
500
+ outputPerToken: 11e-7
501
+ },
502
+ "minimax/minimax-m1": {
503
+ inputPerToken: 4e-7,
504
+ outputPerToken: 22e-7
505
+ },
506
+ "minimax/minimax-m2": {
507
+ inputPerToken: 255e-9,
508
+ outputPerToken: 1e-6
509
+ },
510
+ "minimax/minimax-m2-her": {
511
+ inputPerToken: 3e-7,
512
+ outputPerToken: 12e-7
513
+ },
514
+ "minimax/minimax-m2.1": {
515
+ inputPerToken: 27e-8,
516
+ outputPerToken: 95e-8
517
+ },
518
+ "minimax/minimax-m2.5": {
519
+ inputPerToken: 295e-9,
520
+ outputPerToken: 12e-7
521
+ },
522
+ "mistral/codestral-2508": {
523
+ inputPerToken: 3e-7,
524
+ outputPerToken: 9e-7
525
+ },
526
+ "mistral/devstral-2512": {
527
+ inputPerToken: 4e-7,
528
+ outputPerToken: 2e-6
529
+ },
530
+ "mistral/devstral-medium": {
531
+ inputPerToken: 4e-7,
532
+ outputPerToken: 2e-6
533
+ },
534
+ "mistral/devstral-small": {
535
+ inputPerToken: 1e-7,
536
+ outputPerToken: 3e-7
537
+ },
538
+ "mistral/ministral-14b-2512": {
539
+ inputPerToken: 2e-7,
540
+ outputPerToken: 2e-7
541
+ },
542
+ "mistral/ministral-3b-2512": {
543
+ inputPerToken: 1e-7,
544
+ outputPerToken: 1e-7
545
+ },
546
+ "mistral/ministral-8b-2512": {
547
+ inputPerToken: 15e-8,
548
+ outputPerToken: 15e-8
549
+ },
550
+ "mistral/mistral-7b-instruct": {
551
+ inputPerToken: 2e-7,
552
+ outputPerToken: 2e-7
553
+ },
554
+ "mistral/mistral-7b-instruct-v0.1": {
555
+ inputPerToken: 11e-8,
556
+ outputPerToken: 19e-8
557
+ },
558
+ "mistral/mistral-7b-instruct-v0.3": {
559
+ inputPerToken: 2e-7,
560
+ outputPerToken: 2e-7
561
+ },
562
+ "mistral/mistral-large": {
563
+ inputPerToken: 2e-6,
564
+ outputPerToken: 6e-6
565
+ },
566
+ "mistral/mistral-large-2407": {
567
+ inputPerToken: 2e-6,
568
+ outputPerToken: 6e-6
569
+ },
570
+ "mistral/mistral-large-2411": {
571
+ inputPerToken: 2e-6,
572
+ outputPerToken: 6e-6
573
+ },
574
+ "mistral/mistral-large-2512": {
575
+ inputPerToken: 5e-7,
576
+ outputPerToken: 15e-7
577
+ },
578
+ "mistral/mistral-medium-3": {
579
+ inputPerToken: 4e-7,
580
+ outputPerToken: 2e-6
581
+ },
582
+ "mistral/mistral-medium-3.1": {
583
+ inputPerToken: 4e-7,
584
+ outputPerToken: 2e-6
585
+ },
586
+ "mistral/mistral-nemo": {
587
+ inputPerToken: 2e-8,
588
+ outputPerToken: 4e-8
589
+ },
590
+ "mistral/mistral-saba": {
591
+ inputPerToken: 2e-7,
592
+ outputPerToken: 6e-7
593
+ },
594
+ "mistral/mistral-small": {
595
+ inputPerToken: 1e-7,
596
+ outputPerToken: 3e-7
597
+ },
598
+ "mistral/mistral-small-24b-instruct-2501": {
599
+ inputPerToken: 5e-8,
600
+ outputPerToken: 8e-8
601
+ },
602
+ "mistral/mistral-small-3.1-24b-instruct": {
603
+ inputPerToken: 35e-8,
604
+ outputPerToken: 56e-8
605
+ },
606
+ "mistral/mistral-small-3.2-24b-instruct": {
607
+ inputPerToken: 6e-8,
608
+ outputPerToken: 18e-8
609
+ },
610
+ "mistral/mistral-small-creative": {
611
+ inputPerToken: 1e-7,
612
+ outputPerToken: 3e-7
613
+ },
614
+ "mistral/mixtral-8x22b-instruct": {
615
+ inputPerToken: 2e-6,
616
+ outputPerToken: 6e-6
617
+ },
618
+ "mistral/mixtral-8x7b-instruct": {
619
+ inputPerToken: 54e-8,
620
+ outputPerToken: 54e-8
621
+ },
622
+ "mistral/pixtral-large-2411": {
623
+ inputPerToken: 2e-6,
624
+ outputPerToken: 6e-6
625
+ },
626
+ "mistral/voxtral-small-24b-2507": {
627
+ inputPerToken: 1e-7,
628
+ outputPerToken: 3e-7
629
+ },
630
+ "moonshotai/kimi-k2": {
631
+ inputPerToken: 55e-8,
632
+ outputPerToken: 22e-7
633
+ },
634
+ "moonshotai/kimi-k2-0905": {
635
+ inputPerToken: 4e-7,
636
+ outputPerToken: 2e-6
637
+ },
638
+ "moonshotai/kimi-k2-0905:exacto": {
639
+ inputPerToken: 6e-7,
640
+ outputPerToken: 25e-7
641
+ },
642
+ "moonshotai/kimi-k2-thinking": {
643
+ inputPerToken: 47e-8,
644
+ outputPerToken: 2e-6
645
+ },
646
+ "moonshotai/kimi-k2.5": {
647
+ inputPerToken: 45e-8,
648
+ outputPerToken: 22e-7
649
+ },
650
+ "morph/morph-v3-fast": {
651
+ inputPerToken: 8e-7,
652
+ outputPerToken: 12e-7
653
+ },
654
+ "morph/morph-v3-large": {
655
+ inputPerToken: 9e-7,
656
+ outputPerToken: 19e-7
657
+ },
658
+ "neversleep/llama-3.1-lumimaid-8b": {
659
+ inputPerToken: 9e-8,
660
+ outputPerToken: 6e-7
661
+ },
662
+ "neversleep/noromaid-20b": {
663
+ inputPerToken: 1e-6,
664
+ outputPerToken: 175e-8
665
+ },
666
+ "nex-agi/deepseek-v3.1-nex-n1": {
667
+ inputPerToken: 27e-8,
668
+ outputPerToken: 1e-6
669
+ },
670
+ "nousresearch/hermes-2-pro-llama-3-8b": {
671
+ inputPerToken: 14e-8,
672
+ outputPerToken: 14e-8
673
+ },
674
+ "nousresearch/hermes-3-llama-3.1-405b": {
675
+ inputPerToken: 1e-6,
676
+ outputPerToken: 1e-6
677
+ },
678
+ "nousresearch/hermes-3-llama-3.1-70b": {
679
+ inputPerToken: 3e-7,
680
+ outputPerToken: 3e-7
681
+ },
682
+ "nousresearch/hermes-4-405b": {
683
+ inputPerToken: 1e-6,
684
+ outputPerToken: 3e-6
685
+ },
686
+ "nousresearch/hermes-4-70b": {
687
+ inputPerToken: 13e-8,
688
+ outputPerToken: 4e-7
689
+ },
690
+ "nvidia/llama-3.1-nemotron-70b-instruct": {
691
+ inputPerToken: 12e-7,
692
+ outputPerToken: 12e-7
693
+ },
694
+ "nvidia/llama-3.3-nemotron-super-49b-v1.5": {
695
+ inputPerToken: 1e-7,
696
+ outputPerToken: 4e-7
697
+ },
698
+ "nvidia/nemotron-3-nano-30b-a3b": {
699
+ inputPerToken: 5e-8,
700
+ outputPerToken: 2e-7
701
+ },
702
+ "nvidia/nemotron-nano-12b-v2-vl": {
703
+ inputPerToken: 2e-7,
704
+ outputPerToken: 6e-7
705
+ },
706
+ "nvidia/nemotron-nano-9b-v2": {
707
+ inputPerToken: 4e-8,
708
+ outputPerToken: 16e-8
709
+ },
710
+ "openai/gpt-3.5-turbo": {
711
+ inputPerToken: 5e-7,
712
+ outputPerToken: 15e-7
713
+ },
714
+ "openai/gpt-3.5-turbo-0613": {
715
+ inputPerToken: 1e-6,
716
+ outputPerToken: 2e-6
717
+ },
718
+ "openai/gpt-3.5-turbo-16k": {
719
+ inputPerToken: 3e-6,
720
+ outputPerToken: 4e-6
721
+ },
722
+ "openai/gpt-3.5-turbo-instruct": {
723
+ inputPerToken: 15e-7,
724
+ outputPerToken: 2e-6
725
+ },
726
+ "openai/gpt-4": {
727
+ inputPerToken: 3e-5,
728
+ outputPerToken: 6e-5
729
+ },
730
+ "openai/gpt-4-0314": {
731
+ inputPerToken: 3e-5,
732
+ outputPerToken: 6e-5
733
+ },
734
+ "openai/gpt-4-1106-preview": {
735
+ inputPerToken: 1e-5,
736
+ outputPerToken: 3e-5
737
+ },
738
+ "openai/gpt-4-turbo": {
739
+ inputPerToken: 1e-5,
740
+ outputPerToken: 3e-5
741
+ },
742
+ "openai/gpt-4-turbo-preview": {
743
+ inputPerToken: 1e-5,
744
+ outputPerToken: 3e-5
745
+ },
746
+ "openai/gpt-4.1": {
747
+ inputPerToken: 2e-6,
748
+ outputPerToken: 8e-6
749
+ },
750
+ "openai/gpt-4.1-mini": {
751
+ inputPerToken: 4e-7,
752
+ outputPerToken: 16e-7
753
+ },
754
+ "openai/gpt-4.1-nano": {
755
+ inputPerToken: 1e-7,
756
+ outputPerToken: 4e-7
757
+ },
758
+ "openai/gpt-4o": {
759
+ inputPerToken: 25e-7,
760
+ outputPerToken: 1e-5
761
+ },
762
+ "openai/gpt-4o-2024-05-13": {
763
+ inputPerToken: 5e-6,
764
+ outputPerToken: 15e-6
765
+ },
766
+ "openai/gpt-4o-2024-08-06": {
767
+ inputPerToken: 25e-7,
768
+ outputPerToken: 1e-5
769
+ },
770
+ "openai/gpt-4o-2024-11-20": {
771
+ inputPerToken: 25e-7,
772
+ outputPerToken: 1e-5
773
+ },
774
+ "openai/gpt-4o-audio-preview": {
775
+ inputPerToken: 25e-7,
776
+ outputPerToken: 1e-5
777
+ },
778
+ "openai/gpt-4o-mini": {
779
+ inputPerToken: 15e-8,
780
+ outputPerToken: 6e-7
781
+ },
782
+ "openai/gpt-4o-mini-2024-07-18": {
783
+ inputPerToken: 15e-8,
784
+ outputPerToken: 6e-7
785
+ },
786
+ "openai/gpt-4o-mini-search-preview": {
787
+ inputPerToken: 15e-8,
788
+ outputPerToken: 6e-7
789
+ },
790
+ "openai/gpt-4o-search-preview": {
791
+ inputPerToken: 25e-7,
792
+ outputPerToken: 1e-5
793
+ },
794
+ "openai/gpt-4o:extended": {
795
+ inputPerToken: 6e-6,
796
+ outputPerToken: 18e-6
797
+ },
798
+ "openai/gpt-5": {
799
+ inputPerToken: 125e-8,
800
+ outputPerToken: 1e-5
801
+ },
802
+ "openai/gpt-5-chat": {
803
+ inputPerToken: 125e-8,
804
+ outputPerToken: 1e-5
805
+ },
806
+ "openai/gpt-5-codex": {
807
+ inputPerToken: 125e-8,
808
+ outputPerToken: 1e-5
809
+ },
810
+ "openai/gpt-5-image": {
811
+ inputPerToken: 1e-5,
812
+ outputPerToken: 1e-5
813
+ },
814
+ "openai/gpt-5-image-mini": {
815
+ inputPerToken: 25e-7,
816
+ outputPerToken: 2e-6
817
+ },
818
+ "openai/gpt-5-mini": {
819
+ inputPerToken: 25e-8,
820
+ outputPerToken: 2e-6
821
+ },
822
+ "openai/gpt-5-nano": {
823
+ inputPerToken: 5e-8,
824
+ outputPerToken: 4e-7
825
+ },
826
+ "openai/gpt-5-pro": {
827
+ inputPerToken: 15e-6,
828
+ outputPerToken: 12e-5
829
+ },
830
+ "openai/gpt-5.1": {
831
+ inputPerToken: 125e-8,
832
+ outputPerToken: 1e-5
833
+ },
834
+ "openai/gpt-5.1-chat": {
835
+ inputPerToken: 125e-8,
836
+ outputPerToken: 1e-5
837
+ },
838
+ "openai/gpt-5.1-codex": {
839
+ inputPerToken: 125e-8,
840
+ outputPerToken: 1e-5
841
+ },
842
+ "openai/gpt-5.1-codex-max": {
843
+ inputPerToken: 125e-8,
844
+ outputPerToken: 1e-5
845
+ },
846
+ "openai/gpt-5.1-codex-mini": {
847
+ inputPerToken: 25e-8,
848
+ outputPerToken: 2e-6
849
+ },
850
+ "openai/gpt-5.2": {
851
+ inputPerToken: 175e-8,
852
+ outputPerToken: 14e-6
853
+ },
854
+ "openai/gpt-5.2-chat": {
855
+ inputPerToken: 175e-8,
856
+ outputPerToken: 14e-6
857
+ },
858
+ "openai/gpt-5.2-codex": {
859
+ inputPerToken: 175e-8,
860
+ outputPerToken: 14e-6
861
+ },
862
+ "openai/gpt-5.2-pro": {
863
+ inputPerToken: 21e-6,
864
+ outputPerToken: 168e-6
865
+ },
866
+ "openai/gpt-5.3-codex": {
867
+ inputPerToken: 175e-8,
868
+ outputPerToken: 14e-6
869
+ },
870
+ "openai/gpt-audio": {
871
+ inputPerToken: 25e-7,
872
+ outputPerToken: 1e-5
873
+ },
874
+ "openai/gpt-audio-mini": {
875
+ inputPerToken: 6e-7,
876
+ outputPerToken: 24e-7
877
+ },
878
+ "openai/gpt-oss-120b": {
879
+ inputPerToken: 39e-9,
880
+ outputPerToken: 19e-8
881
+ },
882
+ "openai/gpt-oss-120b:exacto": {
883
+ inputPerToken: 39e-9,
884
+ outputPerToken: 19e-8
885
+ },
886
+ "openai/gpt-oss-20b": {
887
+ inputPerToken: 3e-8,
888
+ outputPerToken: 14e-8
889
+ },
890
+ "openai/gpt-oss-safeguard-20b": {
891
+ inputPerToken: 75e-9,
892
+ outputPerToken: 3e-7
893
+ },
894
+ "openai/o1": {
895
+ inputPerToken: 15e-6,
896
+ outputPerToken: 6e-5
897
+ },
898
+ "openai/o1-pro": {
899
+ inputPerToken: 15e-5,
900
+ outputPerToken: 6e-4
901
+ },
902
+ "openai/o3": {
903
+ inputPerToken: 2e-6,
904
+ outputPerToken: 8e-6
905
+ },
906
+ "openai/o3-deep-research": {
907
+ inputPerToken: 1e-5,
908
+ outputPerToken: 4e-5
909
+ },
910
+ "openai/o3-mini": {
911
+ inputPerToken: 11e-7,
912
+ outputPerToken: 44e-7
913
+ },
914
+ "openai/o3-mini-high": {
915
+ inputPerToken: 11e-7,
916
+ outputPerToken: 44e-7
917
+ },
918
+ "openai/o3-pro": {
919
+ inputPerToken: 2e-5,
920
+ outputPerToken: 8e-5
921
+ },
922
+ "openai/o4-mini": {
923
+ inputPerToken: 11e-7,
924
+ outputPerToken: 44e-7
925
+ },
926
+ "openai/o4-mini-deep-research": {
927
+ inputPerToken: 2e-6,
928
+ outputPerToken: 8e-6
929
+ },
930
+ "openai/o4-mini-high": {
931
+ inputPerToken: 11e-7,
932
+ outputPerToken: 44e-7
933
+ },
934
+ "opengvlab/internvl3-78b": {
935
+ inputPerToken: 15e-8,
936
+ outputPerToken: 6e-7
937
+ },
938
+ "perplexity/sonar": {
939
+ inputPerToken: 1e-6,
940
+ outputPerToken: 1e-6
941
+ },
942
+ "perplexity/sonar-deep-research": {
943
+ inputPerToken: 2e-6,
944
+ outputPerToken: 8e-6
945
+ },
946
+ "perplexity/sonar-pro": {
947
+ inputPerToken: 3e-6,
948
+ outputPerToken: 15e-6
949
+ },
950
+ "perplexity/sonar-pro-search": {
951
+ inputPerToken: 3e-6,
952
+ outputPerToken: 15e-6
953
+ },
954
+ "perplexity/sonar-reasoning-pro": {
955
+ inputPerToken: 2e-6,
956
+ outputPerToken: 8e-6
957
+ },
958
+ "prime-intellect/intellect-3": {
959
+ inputPerToken: 2e-7,
960
+ outputPerToken: 11e-7
961
+ },
962
+ "qwen/qwen-2.5-72b-instruct": {
963
+ inputPerToken: 12e-8,
964
+ outputPerToken: 39e-8
965
+ },
966
+ "qwen/qwen-2.5-7b-instruct": {
967
+ inputPerToken: 4e-8,
968
+ outputPerToken: 1e-7
969
+ },
970
+ "qwen/qwen-2.5-coder-32b-instruct": {
971
+ inputPerToken: 20000000000000002e-23,
972
+ outputPerToken: 20000000000000002e-23
973
+ },
974
+ "qwen/qwen-2.5-vl-7b-instruct": {
975
+ inputPerToken: 20000000000000002e-23,
976
+ outputPerToken: 20000000000000002e-23
977
+ },
978
+ "qwen/qwen-max": {
979
+ inputPerToken: 16e-7,
980
+ outputPerToken: 64e-7
981
+ },
982
+ "qwen/qwen-plus": {
983
+ inputPerToken: 4e-7,
984
+ outputPerToken: 12e-7
985
+ },
986
+ "qwen/qwen-plus-2025-07-28": {
987
+ inputPerToken: 4e-7,
988
+ outputPerToken: 12e-7
989
+ },
990
+ "qwen/qwen-plus-2025-07-28:thinking": {
991
+ inputPerToken: 4e-7,
992
+ outputPerToken: 12e-7
993
+ },
994
+ "qwen/qwen-turbo": {
995
+ inputPerToken: 5e-8,
996
+ outputPerToken: 2e-7
997
+ },
998
+ "qwen/qwen-vl-max": {
999
+ inputPerToken: 8e-7,
1000
+ outputPerToken: 32e-7
1001
+ },
1002
+ "qwen/qwen-vl-plus": {
1003
+ inputPerToken: 21e-8,
1004
+ outputPerToken: 63e-8
1005
+ },
1006
+ "qwen/qwen2.5-coder-7b-instruct": {
1007
+ inputPerToken: 3e-8,
1008
+ outputPerToken: 9e-8
1009
+ },
1010
+ "qwen/qwen2.5-vl-32b-instruct": {
1011
+ inputPerToken: 2e-7,
1012
+ outputPerToken: 6e-7
1013
+ },
1014
+ "qwen/qwen2.5-vl-72b-instruct": {
1015
+ inputPerToken: 8e-7,
1016
+ outputPerToken: 8e-7
1017
+ },
1018
+ "qwen/qwen3-14b": {
1019
+ inputPerToken: 6e-8,
1020
+ outputPerToken: 24e-8
1021
+ },
1022
+ "qwen/qwen3-235b-a22b": {
1023
+ inputPerToken: 455e-9,
1024
+ outputPerToken: 182e-8
1025
+ },
1026
+ "qwen/qwen3-235b-a22b-2507": {
1027
+ inputPerToken: 71e-9,
1028
+ outputPerToken: 1e-7
1029
+ },
1030
+ "qwen/qwen3-30b-a3b": {
1031
+ inputPerToken: 8e-8,
1032
+ outputPerToken: 28e-8
1033
+ },
1034
+ "qwen/qwen3-30b-a3b-instruct-2507": {
1035
+ inputPerToken: 9e-8,
1036
+ outputPerToken: 3e-7
1037
+ },
1038
+ "qwen/qwen3-30b-a3b-thinking-2507": {
1039
+ inputPerToken: 51e-9,
1040
+ outputPerToken: 34e-8
1041
+ },
1042
+ "qwen/qwen3-32b": {
1043
+ inputPerToken: 8e-8,
1044
+ outputPerToken: 24e-8
1045
+ },
1046
+ "qwen/qwen3-8b": {
1047
+ inputPerToken: 5e-8,
1048
+ outputPerToken: 4e-7
1049
+ },
1050
+ "qwen/qwen3-coder": {
1051
+ inputPerToken: 22e-8,
1052
+ outputPerToken: 1e-6
1053
+ },
1054
+ "qwen/qwen3-coder-30b-a3b-instruct": {
1055
+ inputPerToken: 7e-8,
1056
+ outputPerToken: 27e-8
1057
+ },
1058
+ "qwen/qwen3-coder-flash": {
1059
+ inputPerToken: 3e-7,
1060
+ outputPerToken: 15e-7
1061
+ },
1062
+ "qwen/qwen3-coder-next": {
1063
+ inputPerToken: 12e-8,
1064
+ outputPerToken: 75e-8
1065
+ },
1066
+ "qwen/qwen3-coder-plus": {
1067
+ inputPerToken: 1e-6,
1068
+ outputPerToken: 5e-6
1069
+ },
1070
+ "qwen/qwen3-coder:exacto": {
1071
+ inputPerToken: 22e-8,
1072
+ outputPerToken: 18e-7
1073
+ },
1074
+ "qwen/qwen3-max": {
1075
+ inputPerToken: 12e-7,
1076
+ outputPerToken: 6e-6
1077
+ },
1078
+ "qwen/qwen3-max-thinking": {
1079
+ inputPerToken: 12e-7,
1080
+ outputPerToken: 6e-6
1081
+ },
1082
+ "qwen/qwen3-next-80b-a3b-instruct": {
1083
+ inputPerToken: 9e-8,
1084
+ outputPerToken: 11e-7
1085
+ },
1086
+ "qwen/qwen3-next-80b-a3b-thinking": {
1087
+ inputPerToken: 15e-8,
1088
+ outputPerToken: 12e-7
1089
+ },
1090
+ "qwen/qwen3-vl-235b-a22b-instruct": {
1091
+ inputPerToken: 2e-7,
1092
+ outputPerToken: 88e-8
1093
+ },
1094
+ "qwen/qwen3-vl-30b-a3b-instruct": {
1095
+ inputPerToken: 13e-8,
1096
+ outputPerToken: 52e-8
1097
+ },
1098
+ "qwen/qwen3-vl-32b-instruct": {
1099
+ inputPerToken: 104e-9,
1100
+ outputPerToken: 416e-9
1101
+ },
1102
+ "qwen/qwen3-vl-8b-instruct": {
1103
+ inputPerToken: 8e-8,
1104
+ outputPerToken: 5e-7
1105
+ },
1106
+ "qwen/qwen3-vl-8b-thinking": {
1107
+ inputPerToken: 117e-9,
1108
+ outputPerToken: 1365e-9
1109
+ },
1110
+ "qwen/qwen3.5-122b-a10b": {
1111
+ inputPerToken: 4e-7,
1112
+ outputPerToken: 32e-7
1113
+ },
1114
+ "qwen/qwen3.5-27b": {
1115
+ inputPerToken: 3e-7,
1116
+ outputPerToken: 24e-7
1117
+ },
1118
+ "qwen/qwen3.5-35b-a3b": {
1119
+ inputPerToken: 25e-8,
1120
+ outputPerToken: 2e-6
1121
+ },
1122
+ "qwen/qwen3.5-397b-a17b": {
1123
+ inputPerToken: 55e-8,
1124
+ outputPerToken: 35e-7
1125
+ },
1126
+ "qwen/qwen3.5-flash-02-23": {
1127
+ inputPerToken: 1e-7,
1128
+ outputPerToken: 4e-7
1129
+ },
1130
+ "qwen/qwen3.5-plus-02-15": {
1131
+ inputPerToken: 4e-7,
1132
+ outputPerToken: 24e-7
1133
+ },
1134
+ "qwen/qwq-32b": {
1135
+ inputPerToken: 15e-8,
1136
+ outputPerToken: 4e-7
1137
+ },
1138
+ "raifle/sorcererlm-8x22b": {
1139
+ inputPerToken: 45e-7,
1140
+ outputPerToken: 45e-7
1141
+ },
1142
+ "relace/relace-apply-3": {
1143
+ inputPerToken: 85e-8,
1144
+ outputPerToken: 125e-8
1145
+ },
1146
+ "relace/relace-search": {
1147
+ inputPerToken: 1e-6,
1148
+ outputPerToken: 3e-6
1149
+ },
1150
+ "sao10k/l3-euryale-70b": {
1151
+ inputPerToken: 148e-8,
1152
+ outputPerToken: 148e-8
1153
+ },
1154
+ "sao10k/l3-lunaris-8b": {
1155
+ inputPerToken: 4e-8,
1156
+ outputPerToken: 5e-8
1157
+ },
1158
+ "sao10k/l3.1-70b-hanami-x1": {
1159
+ inputPerToken: 3e-6,
1160
+ outputPerToken: 3e-6
1161
+ },
1162
+ "sao10k/l3.1-euryale-70b": {
1163
+ inputPerToken: 65e-8,
1164
+ outputPerToken: 75e-8
1165
+ },
1166
+ "sao10k/l3.3-euryale-70b": {
1167
+ inputPerToken: 65e-8,
1168
+ outputPerToken: 75e-8
1169
+ },
1170
+ "stepfun/step-3.5-flash": {
1171
+ inputPerToken: 1e-7,
1172
+ outputPerToken: 3e-7
1173
+ },
1174
+ "switchpoint/router": {
1175
+ inputPerToken: 85e-8,
1176
+ outputPerToken: 34e-7
1177
+ },
1178
+ "tencent/hunyuan-a13b-instruct": {
1179
+ inputPerToken: 14e-8,
1180
+ outputPerToken: 57e-8
1181
+ },
1182
+ "thedrummer/cydonia-24b-v4.1": {
1183
+ inputPerToken: 3e-7,
1184
+ outputPerToken: 5e-7
1185
+ },
1186
+ "thedrummer/rocinante-12b": {
1187
+ inputPerToken: 17e-8,
1188
+ outputPerToken: 43e-8
1189
+ },
1190
+ "thedrummer/skyfall-36b-v2": {
1191
+ inputPerToken: 55e-8,
1192
+ outputPerToken: 8e-7
1193
+ },
1194
+ "thedrummer/unslopnemo-12b": {
1195
+ inputPerToken: 4e-7,
1196
+ outputPerToken: 4e-7
1197
+ },
1198
+ "tngtech/deepseek-r1t2-chimera": {
1199
+ inputPerToken: 25e-8,
1200
+ outputPerToken: 85e-8
1201
+ },
1202
+ "undi95/remm-slerp-l2-13b": {
1203
+ inputPerToken: 45e-8,
1204
+ outputPerToken: 65e-8
1205
+ },
1206
+ "writer/palmyra-x5": {
1207
+ inputPerToken: 6e-7,
1208
+ outputPerToken: 6e-6
1209
+ },
1210
+ "xai/grok-3": {
1211
+ inputPerToken: 3e-6,
1212
+ outputPerToken: 15e-6
1213
+ },
1214
+ "xai/grok-3-beta": {
1215
+ inputPerToken: 3e-6,
1216
+ outputPerToken: 15e-6
1217
+ },
1218
+ "xai/grok-3-mini": {
1219
+ inputPerToken: 3e-7,
1220
+ outputPerToken: 5e-7
1221
+ },
1222
+ "xai/grok-3-mini-beta": {
1223
+ inputPerToken: 3e-7,
1224
+ outputPerToken: 5e-7
1225
+ },
1226
+ "xai/grok-4": {
1227
+ inputPerToken: 3e-6,
1228
+ outputPerToken: 15e-6
1229
+ },
1230
+ "xai/grok-4-fast": {
1231
+ inputPerToken: 2e-7,
1232
+ outputPerToken: 5e-7
1233
+ },
1234
+ "xai/grok-4.1-fast": {
1235
+ inputPerToken: 2e-7,
1236
+ outputPerToken: 5e-7
1237
+ },
1238
+ "xai/grok-code-fast-1": {
1239
+ inputPerToken: 2e-7,
1240
+ outputPerToken: 15e-7
1241
+ },
1242
+ "xiaomi/mimo-v2-flash": {
1243
+ inputPerToken: 9e-8,
1244
+ outputPerToken: 29e-8
1245
+ },
1246
+ "z-ai/glm-4-32b": {
1247
+ inputPerToken: 1e-7,
1248
+ outputPerToken: 1e-7
1249
+ },
1250
+ "z-ai/glm-4.5": {
1251
+ inputPerToken: 55e-8,
1252
+ outputPerToken: 2e-6
1253
+ },
1254
+ "z-ai/glm-4.5-air": {
1255
+ inputPerToken: 13e-8,
1256
+ outputPerToken: 85e-8
1257
+ },
1258
+ "z-ai/glm-4.5v": {
1259
+ inputPerToken: 6e-7,
1260
+ outputPerToken: 18e-7
1261
+ },
1262
+ "z-ai/glm-4.6": {
1263
+ inputPerToken: 35e-8,
1264
+ outputPerToken: 171e-8
1265
+ },
1266
+ "z-ai/glm-4.6:exacto": {
1267
+ inputPerToken: 44e-8,
1268
+ outputPerToken: 176e-8
1269
+ },
1270
+ "z-ai/glm-4.6v": {
1271
+ inputPerToken: 3e-7,
1272
+ outputPerToken: 9e-7
1273
+ },
1274
+ "z-ai/glm-4.7": {
1275
+ inputPerToken: 3e-7,
1276
+ outputPerToken: 14e-7
1277
+ },
1278
+ "z-ai/glm-4.7-flash": {
1279
+ inputPerToken: 6e-8,
1280
+ outputPerToken: 4e-7
1281
+ },
1282
+ "z-ai/glm-5": {
1283
+ inputPerToken: 95e-8,
1284
+ outputPerToken: 255e-8
1285
+ }
1286
+ }
1287
+ };
1288
+
1289
+ // src/pricing/lookup.ts
1290
+ var models = catalog_default.models;
1291
+ var modelNameIndex = /* @__PURE__ */ new Map();
1292
+ for (const key of Object.keys(models)) {
1293
+ const name = key.split("/").slice(1).join("/");
1294
+ if (name && !modelNameIndex.has(name)) {
1295
+ modelNameIndex.set(name, key);
1296
+ }
1297
+ }
1298
+ function lookupPricing(providerId) {
1299
+ if (models[providerId]) return models[providerId];
1300
+ const model = providerId.split("/").slice(1).join("/");
1301
+ if (!model) return void 0;
1302
+ const asOpenai = `openai/${model}`;
1303
+ if (models[asOpenai]) return models[asOpenai];
1304
+ const crossKey = modelNameIndex.get(model);
1305
+ if (crossKey) return models[crossKey];
1306
+ return void 0;
1307
+ }
1308
+ function registerPricing(providerId, pricing) {
1309
+ models[providerId] = pricing;
1310
+ }
1311
+ function estimateCost(pricing, promptTokens, completionTokens) {
1312
+ return pricing.inputPerToken * promptTokens + pricing.outputPerToken * completionTokens;
1313
+ }
1314
+
1315
+ // src/scorers/cost.ts
1316
+ var costScorer = ({ result }, providerId) => {
1317
+ const promptTokens = result.usage?.promptTokens ?? 0;
1318
+ const completionTokens = result.usage?.completionTokens ?? 0;
1319
+ const totalTokens = promptTokens + completionTokens;
1320
+ const pricing = lookupPricing(providerId);
1321
+ if (!pricing) {
1322
+ return {
1323
+ name: "cost",
1324
+ value: -1,
1325
+ details: {
1326
+ estimatedUsd: null,
1327
+ promptTokens,
1328
+ completionTokens,
1329
+ totalTokens,
1330
+ note: "No pricing data available for this model"
1331
+ }
1332
+ };
1333
+ }
1334
+ const usd = estimateCost(pricing, promptTokens, completionTokens);
1335
+ return {
1336
+ name: "cost",
1337
+ value: usd,
1338
+ details: {
1339
+ estimatedUsd: usd,
1340
+ promptTokens,
1341
+ completionTokens,
1342
+ totalTokens
1343
+ }
1344
+ };
1345
+ };
1346
+
1347
+ // src/scorers/correctness.ts
1348
+ var correctnessScorer = ({ task, result }) => {
1349
+ if (task.expected === void 0) {
1350
+ return { name: "correctness", value: 0.5, details: { reason: "no expected value" } };
1351
+ }
1352
+ const match = deepEqual(task.expected, result.output);
1353
+ return {
1354
+ name: "correctness",
1355
+ value: match ? 1 : 0,
1356
+ details: { expected: task.expected, actual: result.output }
1357
+ };
1358
+ };
1359
+ function deepEqual(a, b) {
1360
+ if (a === b) return true;
1361
+ if (typeof a === "string" && typeof b === "string") {
1362
+ return a.trim().toLowerCase() === b.trim().toLowerCase();
1363
+ }
1364
+ if (typeof a !== typeof b) return false;
1365
+ if (a === null || b === null) return a === b;
1366
+ if (Array.isArray(a) && Array.isArray(b)) {
1367
+ if (a.length !== b.length) return false;
1368
+ return a.every((val, i) => deepEqual(val, b[i]));
1369
+ }
1370
+ if (typeof a === "object" && typeof b === "object") {
1371
+ const objA = a;
1372
+ const objB = b;
1373
+ const keysA = Object.keys(objA);
1374
+ const keysB = Object.keys(objB);
1375
+ if (keysA.length !== keysB.length) return false;
1376
+ return keysA.every((key) => key in objB && deepEqual(objA[key], objB[key]));
1377
+ }
1378
+ return a === b;
1379
+ }
1380
+
1381
+ // src/scorers/schema-correctness.ts
1382
+ var schemaCorrectnessScorer = ({ task, result }) => {
1383
+ if (!task.schema) {
1384
+ return { name: "schema-correctness", value: -1, details: { reason: "no schema defined" } };
1385
+ }
1386
+ let data = result.output;
1387
+ if (typeof data === "string") {
1388
+ try {
1389
+ data = JSON.parse(data);
1390
+ } catch {
1391
+ return {
1392
+ name: "schema-correctness",
1393
+ value: 0,
1394
+ details: { reason: "output is not valid JSON" }
1395
+ };
1396
+ }
1397
+ }
1398
+ const parsed = task.schema.safeParse(data);
1399
+ return {
1400
+ name: "schema-correctness",
1401
+ value: parsed.success ? 1 : 0,
1402
+ details: parsed.success ? { valid: true } : { valid: false, errors: parsed.error.issues.map((i) => i.message) }
1403
+ };
1404
+ };
1405
+
1406
+ // src/scorers/fuzzy-similarity.ts
1407
+ var fuzzySimilarityScorer = ({ task, result }) => {
1408
+ if (task.expected === void 0) {
1409
+ return { name: "fuzzy-similarity", value: -1, details: { reason: "no expected value" } };
1410
+ }
1411
+ const a = stringify(task.expected);
1412
+ const b = stringify(result.output);
1413
+ const similarity = jaccardSimilarity(tokenize(a), tokenize(b));
1414
+ return {
1415
+ name: "fuzzy-similarity",
1416
+ value: Math.round(similarity * 100) / 100,
1417
+ details: { method: "jaccard", expectedTokens: tokenize(a).size, actualTokens: tokenize(b).size }
1418
+ };
1419
+ };
1420
+ function stringify(value) {
1421
+ if (typeof value === "string") return value.toLowerCase();
1422
+ return JSON.stringify(value).toLowerCase();
1423
+ }
1424
+ function tokenize(text) {
1425
+ return new Set(text.match(/\w+/g) ?? []);
1426
+ }
1427
+ function jaccardSimilarity(a, b) {
1428
+ if (a.size === 0 && b.size === 0) return 1;
1429
+ let intersection = 0;
1430
+ for (const token of a) {
1431
+ if (b.has(token)) intersection++;
1432
+ }
1433
+ const union = a.size + b.size - intersection;
1434
+ return union === 0 ? 1 : intersection / union;
1435
+ }
1436
+
1437
+ // src/scorers/llm-judge.ts
1438
+ import OpenAI, { AzureOpenAI } from "openai";
1439
+ var JUDGE_PROMPT = `You are a strict scoring judge. Evaluate the actual output against the expected output on three criteria. Score each from 0.0 to 1.0 using the full range (not just 0, 0.5, 1).
1440
+
1441
+ Criteria:
1442
+ 1. Accuracy \u2014 are the facts, entities, and claims correct? Penalize hallucinations or wrong details.
1443
+ 2. Completeness \u2014 does it capture all key information from the expected output? Penalize missing points.
1444
+ 3. Conciseness \u2014 is it free of unnecessary filler, repetition, or tangential content? Penalize verbosity.
1445
+
1446
+ Respond with ONLY this exact format \u2014 three lines, no other text:
1447
+ accuracy: <number>
1448
+ completeness: <number>
1449
+ conciseness: <number>
1450
+
1451
+ Task: {task}
1452
+ Expected: {expected}
1453
+ Actual: {actual}`;
1454
+ function resolveJudgeClient(configModel) {
1455
+ const model = configModel ?? process.env.DUELIST_JUDGE_MODEL ?? "gpt-4o-mini";
1456
+ if (model.startsWith("gemini") && process.env.GOOGLE_API_KEY) {
1457
+ return {
1458
+ client: new OpenAI({
1459
+ apiKey: process.env.GOOGLE_API_KEY,
1460
+ baseURL: "https://generativelanguage.googleapis.com/v1beta/openai/"
1461
+ }),
1462
+ model
1463
+ };
1464
+ }
1465
+ if (!process.env.OPENAI_API_KEY && process.env.AZURE_OPENAI_API_KEY) {
1466
+ return {
1467
+ client: new AzureOpenAI({
1468
+ apiKey: process.env.AZURE_OPENAI_API_KEY,
1469
+ endpoint: process.env.AZURE_OPENAI_ENDPOINT,
1470
+ apiVersion: process.env.AZURE_OPENAI_API_VERSION ?? "2024-12-01-preview",
1471
+ deployment: model
1472
+ }),
1473
+ model
1474
+ };
1475
+ }
1476
+ const apiKey = process.env.OPENAI_API_KEY;
1477
+ if (!apiKey) return void 0;
1478
+ return { client: new OpenAI({ apiKey }), model };
1479
+ }
1480
+ function createLlmJudgeScorer(judgeModel) {
1481
+ let cached = void 0;
1482
+ return async ({ task, result }) => {
1483
+ if (task.expected === void 0) {
1484
+ return { name: "llm-judge-correctness", value: -1, details: { reason: "no expected value" } };
1485
+ }
1486
+ if (cached === void 0) {
1487
+ cached = resolveJudgeClient(judgeModel) ?? null;
1488
+ }
1489
+ if (!cached) {
1490
+ return {
1491
+ name: "llm-judge-correctness",
1492
+ value: -1,
1493
+ details: { reason: "no API key available for judge model" }
1494
+ };
1495
+ }
1496
+ const { client, model } = cached;
1497
+ const prompt = JUDGE_PROMPT.replace("{task}", task.prompt).replace("{expected}", JSON.stringify(task.expected)).replace("{actual}", JSON.stringify(result.output));
1498
+ try {
1499
+ const response = await client.chat.completions.create({
1500
+ model,
1501
+ messages: [{ role: "user", content: prompt }],
1502
+ temperature: 0,
1503
+ max_tokens: 2048
1504
+ });
1505
+ const content = response.choices[0]?.message?.content?.trim() ?? "";
1506
+ const parsed = {};
1507
+ for (const line of content.split("\n")) {
1508
+ const match = line.match(/^(accuracy|completeness|conciseness)\s*:\s*([\d.]+)/i);
1509
+ if (match) parsed[match[1].toLowerCase()] = parseFloat(match[2]);
1510
+ }
1511
+ const accuracy = parsed.accuracy;
1512
+ const completeness = parsed.completeness;
1513
+ const conciseness = parsed.conciseness;
1514
+ if (accuracy == null || completeness == null || conciseness == null || [accuracy, completeness, conciseness].some((s) => isNaN(s) || s < 0 || s > 1)) {
1515
+ return {
1516
+ name: "llm-judge-correctness",
1517
+ value: -1,
1518
+ details: { reason: `judge returned unparseable scores: "${content}"`, model }
1519
+ };
1520
+ }
1521
+ const composite = Math.round((accuracy + completeness + conciseness) / 3 * 100) / 100;
1522
+ return {
1523
+ name: "llm-judge-correctness",
1524
+ value: composite,
1525
+ details: { model, accuracy, completeness, conciseness }
1526
+ };
1527
+ } catch (err) {
1528
+ return {
1529
+ name: "llm-judge-correctness",
1530
+ value: -1,
1531
+ details: { reason: `judge call failed: ${err instanceof Error ? err.message : String(err)}` }
1532
+ };
1533
+ }
1534
+ };
1535
+ }
1536
+
1537
+ // src/scorers/tool-usage.ts
1538
+ var toolUsageScorer = ({ task, result }) => {
1539
+ const expectedToolName = task.tools?.[0]?.name;
1540
+ if (!expectedToolName) {
1541
+ return { name: "tool-usage", value: -1, details: { reason: "no tools configured on task" } };
1542
+ }
1543
+ const usedTool = result.toolCalls?.some((c) => c.name === expectedToolName) ?? false;
1544
+ return {
1545
+ name: "tool-usage",
1546
+ value: usedTool ? 1 : 0,
1547
+ details: { expectedToolName, usedTool, toolCalls: result.toolCalls ?? [] }
1548
+ };
1549
+ };
1550
+
1551
+ // src/scorers/index.ts
1552
+ var staticScorers = {
1553
+ latency: latencyScorer,
1554
+ cost: costScorer,
1555
+ correctness: correctnessScorer,
1556
+ "schema-correctness": schemaCorrectnessScorer,
1557
+ "fuzzy-similarity": fuzzySimilarityScorer,
1558
+ "tool-usage": toolUsageScorer
1559
+ };
1560
+ function resolveScorers(names, judgeModel) {
1561
+ return names.map((name) => {
1562
+ if (name === "llm-judge-correctness") {
1563
+ return createLlmJudgeScorer(judgeModel);
1564
+ }
1565
+ const scorer = staticScorers[name];
1566
+ if (!scorer) {
1567
+ throw new Error(`Unknown scorer: "${name}"`);
1568
+ }
1569
+ return scorer;
1570
+ });
1571
+ }
1572
+
1573
+ // src/runner.ts
1574
+ async function runBenchmarks(options) {
1575
+ const { providers, tasks, scorers, runs, onResult } = options;
1576
+ const results = [];
1577
+ for (const task of tasks) {
1578
+ for (const provider of providers) {
1579
+ for (let run = 1; run <= runs; run++) {
1580
+ let result;
1581
+ try {
1582
+ const taskResult = await provider.run({
1583
+ prompt: task.prompt,
1584
+ schema: task.schema,
1585
+ tools: task.tools
1586
+ });
1587
+ const scores = await Promise.all(
1588
+ scorers.map((scorer) => scorer({ task, result: taskResult }, provider.id))
1589
+ );
1590
+ result = {
1591
+ providerId: provider.id,
1592
+ taskName: task.name,
1593
+ run,
1594
+ scores,
1595
+ raw: {
1596
+ output: taskResult.output,
1597
+ latencyMs: taskResult.latencyMs,
1598
+ usage: taskResult.usage,
1599
+ toolCalls: taskResult.toolCalls
1600
+ }
1601
+ };
1602
+ } catch (err) {
1603
+ const message = err instanceof Error ? err.message : String(err);
1604
+ result = {
1605
+ providerId: provider.id,
1606
+ taskName: task.name,
1607
+ run,
1608
+ scores: [],
1609
+ error: message,
1610
+ raw: { output: "", latencyMs: 0 }
1611
+ };
1612
+ }
1613
+ results.push(result);
1614
+ onResult?.(result);
1615
+ }
1616
+ }
1617
+ }
1618
+ return results;
1619
+ }
1620
+
1621
+ // src/reporter/console.ts
1622
+ var reset = "\x1B[0m";
1623
+ var boldCode = "\x1B[1m";
1624
+ var dimCode = "\x1B[2m";
1625
+ var green = "\x1B[32m";
1626
+ var red = "\x1B[31m";
1627
+ var yellow = "\x1B[33m";
1628
+ var cyan = "\x1B[36m";
1629
+ function bold(s) {
1630
+ return `${boldCode}${s}${reset}`;
1631
+ }
1632
+ function dim(s) {
1633
+ return `${dimCode}${s}${reset}`;
1634
+ }
1635
+ function colorScore(value) {
1636
+ const pct = Math.round(value * 100);
1637
+ const str = `${pct}%`;
1638
+ if (value >= 0.8) return `${green}${str}${reset}`;
1639
+ if (value >= 0.5) return `${yellow}${str}${reset}`;
1640
+ return `${red}${str}${reset}`;
1641
+ }
1642
+ function consoleReporter(results) {
1643
+ if (results.length === 0) {
1644
+ console.log("\nNo results to display.\n");
1645
+ return;
1646
+ }
1647
+ const tasks = [...new Set(results.map((r) => r.taskName))];
1648
+ const providers = [...new Set(results.map((r) => r.providerId))];
1649
+ const scorerNames = [...new Set(results.flatMap((r) => r.scores.map((s) => s.name)))];
1650
+ const hasCost = scorerNames.includes("cost");
1651
+ const hasErrors = results.some((r) => r.error);
1652
+ const runsPerCell = Math.max(...results.map((r) => r.run));
1653
+ const runLabel = runsPerCell > 1 ? ` (${runsPerCell} runs each)` : "";
1654
+ console.log("");
1655
+ console.log(` ${bold(`\u2B21 Agent Duelist Results${runLabel}`)}`);
1656
+ console.log(` ${dim("\u2500".repeat(70))}`);
1657
+ console.log("");
1658
+ for (const task of tasks) {
1659
+ console.log(` ${bold(`Task: ${task}`)}`);
1660
+ const cols = [{ label: "Provider", width: 22, align: "left" }];
1661
+ for (const name of scorerNames) {
1662
+ if (name === "latency") cols.push({ label: "Latency", width: 10, align: "right" });
1663
+ else if (name === "cost") {
1664
+ cols.push({ label: "Cost", width: 12, align: "right" });
1665
+ cols.push({ label: "Tokens", width: 9, align: "right" });
1666
+ } else if (name === "correctness") cols.push({ label: "Match", width: 8, align: "right" });
1667
+ else if (name === "schema-correctness") cols.push({ label: "Schema", width: 8, align: "right" });
1668
+ else if (name === "fuzzy-similarity") cols.push({ label: "Fuzzy", width: 8, align: "right" });
1669
+ else if (name === "llm-judge-correctness") cols.push({ label: "Judge", width: 8, align: "right" });
1670
+ else if (name === "tool-usage") cols.push({ label: "Tool", width: 8, align: "right" });
1671
+ else cols.push({ label: name, width: 10, align: "right" });
1672
+ }
1673
+ if (hasErrors) cols.push({ label: "Status", width: 8, align: "left" });
1674
+ const totalWidth = cols.reduce((sum, c) => sum + c.width + 2, 0);
1675
+ console.log(` ${dim(cols.map((c) => pad(c.label, c.width + 2, c.align)).join(""))}`);
1676
+ console.log(` ${dim("\u2500".repeat(totalWidth))}`);
1677
+ for (const provider of providers) {
1678
+ const taskResults = results.filter(
1679
+ (r) => r.taskName === task && r.providerId === provider
1680
+ );
1681
+ const errorResults2 = taskResults.filter((r) => r.error);
1682
+ const successResults = taskResults.filter((r) => !r.error);
1683
+ if (successResults.length === 0 && errorResults2.length > 0) {
1684
+ const cells2 = [pad(provider, 24, "left")];
1685
+ for (const name of scorerNames) {
1686
+ if (name === "cost") {
1687
+ cells2.push(pad("\u2014", 14, "right"));
1688
+ cells2.push(pad("\u2014", 11, "right"));
1689
+ } else cells2.push(pad("\u2014", cols.find((c) => c.label !== "Provider").width + 2, "right"));
1690
+ }
1691
+ if (hasErrors) cells2.push(` ${red}FAIL${reset}`);
1692
+ console.log(` ${cells2.join("")}`);
1693
+ continue;
1694
+ }
1695
+ const avgScores = averageScores(successResults);
1696
+ const avgDetails = averageDetails(successResults);
1697
+ const latencyMs = average(successResults.map((r) => r.raw.latencyMs));
1698
+ const cells = [pad(provider, 24, "left")];
1699
+ for (const name of scorerNames) {
1700
+ if (name === "latency") {
1701
+ cells.push(pad(latencyMs !== void 0 ? `${Math.round(latencyMs)}ms` : "\u2014", 12, "right"));
1702
+ } else if (name === "cost") {
1703
+ cells.push(pad(formatCost(avgDetails.costUsd), 14, "right"));
1704
+ cells.push(pad(avgDetails.totalTokens !== void 0 ? `${avgDetails.totalTokens}` : "\u2014", 11, "right"));
1705
+ } else {
1706
+ const val = avgScores[name];
1707
+ if (val === void 0) cells.push(pad("\u2014", 10, "right"));
1708
+ else cells.push(pad(colorScore(val), 10 + colorLen(colorScore(val)), "right"));
1709
+ }
1710
+ }
1711
+ if (hasErrors) {
1712
+ const failCount = errorResults2.length;
1713
+ cells.push(failCount > 0 ? ` ${yellow}${failCount} err${reset}` : ` ${green}OK${reset}`);
1714
+ }
1715
+ console.log(` ${cells.join("")}`);
1716
+ }
1717
+ console.log("");
1718
+ }
1719
+ printSummary(results, providers);
1720
+ const errorResults = results.filter((r) => r.error);
1721
+ if (errorResults.length > 0) {
1722
+ console.log(` ${bold("Errors")}`);
1723
+ console.log(` ${dim("\u2500".repeat(70))}`);
1724
+ const seen = /* @__PURE__ */ new Set();
1725
+ for (const r of errorResults) {
1726
+ const key = `${r.providerId}::${r.error}`;
1727
+ if (seen.has(key)) continue;
1728
+ seen.add(key);
1729
+ const count = errorResults.filter((e) => e.providerId === r.providerId && e.error === r.error).length;
1730
+ const suffix = count > 1 ? ` (\xD7${count})` : "";
1731
+ console.log(` ${red}\u2717${reset} ${r.providerId}: ${r.error}${suffix}`);
1732
+ const hint = apiKeyHint(r.providerId, r.error ?? "");
1733
+ if (hint) console.log(` ${dim(hint)}`);
1734
+ }
1735
+ console.log("");
1736
+ }
1737
+ if (hasCost) {
1738
+ console.log(dim(` Costs estimated from OpenRouter pricing catalog. Run npx tsx scripts/update-pricing.ts to refresh.`));
1739
+ console.log("");
1740
+ }
1741
+ }
1742
+ function printSummary(results, providers) {
1743
+ const successResults = results.filter((r) => !r.error);
1744
+ if (successResults.length === 0) return;
1745
+ console.log(` ${dim("\u2500".repeat(70))}`);
1746
+ console.log(` ${bold("Summary")}`);
1747
+ console.log("");
1748
+ const single = providers.length === 1;
1749
+ const correctnessKey = successResults.some((r) => r.scores.some((s) => s.name === "llm-judge-correctness" && s.value >= 0)) ? "llm-judge-correctness" : "correctness";
1750
+ const byCorrectness = rankProviders(successResults, providers, correctnessKey);
1751
+ if (byCorrectness) {
1752
+ const label = single ? "Avg correctness" : `Most correct: ${bold(byCorrectness.id)} ${dim(providerLabel(byCorrectness.id))}`;
1753
+ console.log(` ${cyan}\u25C6${reset} ${label} (avg ${colorScore(byCorrectness.avg)})`);
1754
+ }
1755
+ const byLatency = providers.map((id) => {
1756
+ const runs = successResults.filter((r) => r.providerId === id);
1757
+ const avg = average(runs.map((r) => r.raw.latencyMs));
1758
+ return { id, avg: avg ?? Infinity };
1759
+ }).sort((a, b) => a.avg - b.avg)[0];
1760
+ if (byLatency && byLatency.avg !== Infinity) {
1761
+ const label = single ? "Avg latency" : `Fastest: ${bold(byLatency.id)} ${dim(providerLabel(byLatency.id))}`;
1762
+ console.log(` ${cyan}\u25C6${reset} ${label} (avg ${Math.round(byLatency.avg)}ms)`);
1763
+ }
1764
+ const byCost = providers.map((id) => {
1765
+ const runs = successResults.filter((r) => r.providerId === id);
1766
+ const costs = runs.map((r) => {
1767
+ const s = r.scores.find((s2) => s2.name === "cost");
1768
+ return s && s.value >= 0 ? s.value : void 0;
1769
+ }).filter((c) => c !== void 0);
1770
+ const avg = costs.length > 0 ? costs.reduce((a, b) => a + b, 0) / costs.length : void 0;
1771
+ return { id, avg };
1772
+ }).filter((p) => p.avg !== void 0).sort((a, b) => a.avg - b.avg)[0];
1773
+ if (byCost?.avg !== void 0) {
1774
+ const label = single ? "Avg cost" : `Cheapest: ${bold(byCost.id)} ${dim(providerLabel(byCost.id))}`;
1775
+ console.log(` ${cyan}\u25C6${reset} ${label} (avg ${formatCost(byCost.avg)})`);
1776
+ }
1777
+ console.log("");
1778
+ }
1779
+ function rankProviders(results, providers, scorerName) {
1780
+ const ranked = providers.map((id) => {
1781
+ const runs = results.filter((r) => r.providerId === id);
1782
+ const scores = runs.flatMap((r) => r.scores.filter((s) => s.name === scorerName && s.value >= 0)).map((s) => s.value);
1783
+ const avg = scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : void 0;
1784
+ return { id, avg };
1785
+ }).filter((p) => p.avg !== void 0).sort((a, b) => b.avg - a.avg);
1786
+ return ranked[0] ? { id: ranked[0].id, avg: ranked[0].avg } : void 0;
1787
+ }
1788
+ function averageScores(results) {
1789
+ const sums = {};
1790
+ const counts = {};
1791
+ for (const result of results) {
1792
+ for (const score of result.scores) {
1793
+ if (score.value < 0) continue;
1794
+ sums[score.name] = (sums[score.name] ?? 0) + score.value;
1795
+ counts[score.name] = (counts[score.name] ?? 0) + 1;
1796
+ }
1797
+ }
1798
+ const avgs = {};
1799
+ for (const name of Object.keys(sums)) {
1800
+ avgs[name] = sums[name] / counts[name];
1801
+ }
1802
+ return avgs;
1803
+ }
1804
+ function averageDetails(results) {
1805
+ let costSum = 0;
1806
+ let costCount = 0;
1807
+ let tokenSum = 0;
1808
+ let tokenCount = 0;
1809
+ for (const result of results) {
1810
+ const costScore = result.scores.find((s) => s.name === "cost");
1811
+ const details = costScore?.details;
1812
+ if (details?.estimatedUsd != null) {
1813
+ costSum += details.estimatedUsd;
1814
+ costCount++;
1815
+ }
1816
+ if (details?.totalTokens != null) {
1817
+ tokenSum += details.totalTokens;
1818
+ tokenCount++;
1819
+ }
1820
+ }
1821
+ return {
1822
+ costUsd: costCount > 0 ? costSum / costCount : void 0,
1823
+ totalTokens: tokenCount > 0 ? Math.round(tokenSum / tokenCount) : void 0
1824
+ };
1825
+ }
1826
+ function average(nums) {
1827
+ if (nums.length === 0) return void 0;
1828
+ return nums.reduce((a, b) => a + b, 0) / nums.length;
1829
+ }
1830
+ function formatCost(usd) {
1831
+ if (usd === void 0) return "\u2014";
1832
+ if (usd === 0) return "$0.00";
1833
+ if (usd >= 0.01) return `~$${usd.toFixed(2)}`;
1834
+ const digits = Math.max(4, -Math.floor(Math.log10(usd)) + 1);
1835
+ return `~$${usd.toFixed(digits).replace(/0+$/, "")}`;
1836
+ }
1837
+ function pad(str, width, align) {
1838
+ if (align === "right") return str.padStart(width);
1839
+ return str.padEnd(width);
1840
+ }
1841
+ function colorLen(str) {
1842
+ const stripped = str.replace(/\x1b\[[0-9;]*m/g, "");
1843
+ return str.length - stripped.length;
1844
+ }
1845
+ function apiKeyHint(providerId, error) {
1846
+ const lower = error.toLowerCase();
1847
+ const isAuthError = lower.includes("api key") || lower.includes("401") || lower.includes("unauthorized") || lower.includes("authentication") || lower.includes("incorrect api key") || lower.includes("apikey");
1848
+ if (!isAuthError) return void 0;
1849
+ const prefix = providerId.split("/")[0];
1850
+ switch (prefix) {
1851
+ case "openai":
1852
+ return "Set: export OPENAI_API_KEY=sk-...";
1853
+ case "azure":
1854
+ return "Set: export AZURE_OPENAI_API_KEY=... and AZURE_OPENAI_ENDPOINT=...";
1855
+ case "anthropic":
1856
+ return "Set: export ANTHROPIC_API_KEY=sk-ant-...";
1857
+ case "google":
1858
+ return "Set: export GOOGLE_API_KEY=...";
1859
+ default:
1860
+ return `Check the API key for ${providerId}`;
1861
+ }
1862
+ }
1863
+ function providerLabel(providerId) {
1864
+ const prefix = providerId.split("/")[0];
1865
+ switch (prefix) {
1866
+ case "azure":
1867
+ return "(OpenAI via Azure)";
1868
+ case "openai":
1869
+ return "(OpenAI)";
1870
+ case "anthropic":
1871
+ return "(Anthropic)";
1872
+ case "google":
1873
+ return "(Google)";
1874
+ case "mistral":
1875
+ return "(Mistral)";
1876
+ case "meta":
1877
+ return "(Meta)";
1878
+ case "deepseek":
1879
+ return "(DeepSeek)";
1880
+ case "cohere":
1881
+ return "(Cohere)";
1882
+ case "qwen":
1883
+ return "(Qwen)";
1884
+ case "xai":
1885
+ return "(xAI)";
1886
+ case "minimax":
1887
+ return "(MiniMax)";
1888
+ case "moonshot":
1889
+ return "(Moonshot / Kimi)";
1890
+ case "perplexity":
1891
+ return "(Perplexity)";
1892
+ case "amazon":
1893
+ return "(Amazon)";
1894
+ case "nvidia":
1895
+ return "(NVIDIA)";
1896
+ case "microsoft":
1897
+ return "(Microsoft)";
1898
+ case "ai21":
1899
+ return "(AI21 Labs)";
1900
+ case "bytedance":
1901
+ return "(ByteDance)";
1902
+ case "together":
1903
+ return "(Together AI)";
1904
+ case "fireworks":
1905
+ return "(Fireworks AI)";
1906
+ case "groq":
1907
+ return "(Groq)";
1908
+ case "cerebras":
1909
+ return "(Cerebras)";
1910
+ default:
1911
+ return `(${prefix})`;
1912
+ }
1913
+ }
1914
+
1915
+ // src/reporter/json.ts
1916
+ function jsonReporter(results) {
1917
+ return JSON.stringify(
1918
+ {
1919
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
1920
+ summary: buildSummary(results),
1921
+ results
1922
+ },
1923
+ null,
1924
+ 2
1925
+ );
1926
+ }
1927
+ function buildSummary(results) {
1928
+ const tasks = [...new Set(results.map((r) => r.taskName))];
1929
+ const providers = [...new Set(results.map((r) => r.providerId))];
1930
+ return {
1931
+ totalBenchmarks: results.length,
1932
+ tasks: tasks.length,
1933
+ providers: providers.length,
1934
+ providerIds: providers,
1935
+ taskNames: tasks
1936
+ };
1937
+ }
1938
+
1939
+ // src/arena.ts
1940
+ function defineArena(config) {
1941
+ if (config.providers.length === 0) {
1942
+ throw new Error("At least one provider is required");
1943
+ }
1944
+ if (config.tasks.length === 0) {
1945
+ throw new Error("At least one task is required");
1946
+ }
1947
+ const scorerNames = config.scorers ?? ["latency", "cost", "correctness"];
1948
+ const scorerFns = resolveScorers(scorerNames, config.judgeModel);
1949
+ const runs = config.runs ?? 1;
1950
+ return {
1951
+ config,
1952
+ async run(options) {
1953
+ return runBenchmarks({
1954
+ providers: config.providers,
1955
+ tasks: config.tasks,
1956
+ scorers: scorerFns,
1957
+ runs,
1958
+ onResult: options?.onResult
1959
+ });
1960
+ }
1961
+ };
1962
+ }
1963
+
1964
+ // src/providers/openai.ts
1965
+ import OpenAI2, { AzureOpenAI as AzureOpenAI2 } from "openai";
1966
+ import { zodToJsonSchema } from "zod-to-json-schema";
1967
+ function openai(model, options) {
1968
+ const client = new OpenAI2({
1969
+ apiKey: options?.apiKey ?? process.env.OPENAI_API_KEY,
1970
+ baseURL: options?.baseURL
1971
+ });
1972
+ return makeProvider(`openai/${model}`, "OpenAI", model, client, model);
1973
+ }
1974
+ function openaiCompatible(options) {
1975
+ const apiKey = options.apiKey ?? (options.apiKeyEnv ? process.env[options.apiKeyEnv] : void 0) ?? "no-key";
1976
+ const client = new OpenAI2({
1977
+ apiKey,
1978
+ baseURL: options.baseURL
1979
+ });
1980
+ if (options.free) {
1981
+ registerPricing(options.id, { inputPerToken: 0, outputPerToken: 0 });
1982
+ }
1983
+ return makeProvider(options.id, options.name, options.model, client, options.model, options.stripThinking);
1984
+ }
1985
+ function azureOpenai(model, options) {
1986
+ const deployment = options?.deployment ?? model;
1987
+ const client = new AzureOpenAI2({
1988
+ apiKey: options?.apiKey ?? process.env.AZURE_OPENAI_API_KEY,
1989
+ endpoint: options?.endpoint ?? process.env.AZURE_OPENAI_ENDPOINT,
1990
+ apiVersion: options?.apiVersion ?? process.env.AZURE_OPENAI_API_VERSION ?? "2024-12-01-preview",
1991
+ deployment
1992
+ });
1993
+ return makeProvider(`azure/${model}`, "Azure OpenAI", model, client, deployment);
1994
+ }
1995
+ function makeProvider(id, name, model, client, requestModel, stripThinking) {
1996
+ return {
1997
+ id,
1998
+ name,
1999
+ model,
2000
+ async run(input) {
2001
+ const start = Date.now();
2002
+ const params = {
2003
+ model: requestModel,
2004
+ messages: [{ role: "user", content: input.prompt }]
2005
+ };
2006
+ if (input.schema) {
2007
+ params.response_format = { type: "json_object" };
2008
+ params.messages = [
2009
+ { role: "system", content: "Respond with valid JSON matching the requested schema." },
2010
+ ...params.messages
2011
+ ];
2012
+ }
2013
+ if (input.tools?.length) {
2014
+ params.tools = input.tools.map(toolDefToOpenAI);
2015
+ params.tool_choice = "auto";
2016
+ }
2017
+ const response = await client.chat.completions.create(params);
2018
+ let totalPromptTokens = response.usage?.prompt_tokens ?? 0;
2019
+ let totalCompletionTokens = response.usage?.completion_tokens ?? 0;
2020
+ const choice = response.choices[0];
2021
+ const toolCallsRaw = choice?.message?.tool_calls;
2022
+ const collectedToolCalls = [];
2023
+ let finalResponse = response;
2024
+ if (toolCallsRaw?.length && input.tools?.length) {
2025
+ const toolMessages = [
2026
+ ...params.messages,
2027
+ choice.message
2028
+ ];
2029
+ for (const tc of toolCallsRaw) {
2030
+ const toolDef = input.tools.find((t) => t.name === tc.function.name);
2031
+ let args;
2032
+ try {
2033
+ args = JSON.parse(tc.function.arguments);
2034
+ } catch {
2035
+ args = tc.function.arguments;
2036
+ }
2037
+ let result;
2038
+ if (toolDef?.handler) {
2039
+ result = await toolDef.handler(args);
2040
+ }
2041
+ collectedToolCalls.push({ name: tc.function.name, arguments: args, result });
2042
+ toolMessages.push({
2043
+ role: "tool",
2044
+ tool_call_id: tc.id,
2045
+ content: JSON.stringify(result ?? {})
2046
+ });
2047
+ }
2048
+ const followUp = await client.chat.completions.create({
2049
+ model: requestModel,
2050
+ messages: toolMessages
2051
+ });
2052
+ totalPromptTokens += followUp.usage?.prompt_tokens ?? 0;
2053
+ totalCompletionTokens += followUp.usage?.completion_tokens ?? 0;
2054
+ finalResponse = followUp;
2055
+ }
2056
+ const latencyMs = Date.now() - start;
2057
+ const finalChoice = finalResponse.choices[0];
2058
+ let rawContent = finalChoice?.message?.content ?? "";
2059
+ if (stripThinking) {
2060
+ rawContent = rawContent.replace(/<think>[\s\S]*?<\/think>\s*/, "");
2061
+ }
2062
+ let output = rawContent;
2063
+ if (input.schema) {
2064
+ try {
2065
+ output = JSON.parse(rawContent);
2066
+ } catch {
2067
+ }
2068
+ }
2069
+ return {
2070
+ output,
2071
+ usage: {
2072
+ promptTokens: totalPromptTokens || void 0,
2073
+ completionTokens: totalCompletionTokens || void 0
2074
+ },
2075
+ latencyMs,
2076
+ raw: finalResponse,
2077
+ toolCalls: collectedToolCalls.length > 0 ? collectedToolCalls : void 0
2078
+ };
2079
+ }
2080
+ };
2081
+ }
2082
+ function toolDefToOpenAI(tool) {
2083
+ return {
2084
+ type: "function",
2085
+ function: {
2086
+ name: tool.name,
2087
+ description: tool.description,
2088
+ parameters: zodToJsonSchema(tool.parameters, { target: "openAi" })
2089
+ }
2090
+ };
2091
+ }
2092
+
2093
+ // src/providers/anthropic.ts
2094
+ import Anthropic from "@anthropic-ai/sdk";
2095
+ function anthropic(model, options) {
2096
+ const client = new Anthropic({
2097
+ apiKey: options?.apiKey ?? process.env.ANTHROPIC_API_KEY
2098
+ });
2099
+ const maxTokens = options?.maxTokens ?? 1024;
2100
+ return {
2101
+ id: `anthropic/${model}`,
2102
+ name: "Anthropic",
2103
+ model,
2104
+ async run(input) {
2105
+ const start = Date.now();
2106
+ const systemMessage = input.schema ? "Respond with valid JSON matching the requested schema." : void 0;
2107
+ const response = await client.messages.create({
2108
+ model,
2109
+ max_tokens: maxTokens,
2110
+ system: systemMessage,
2111
+ messages: [{ role: "user", content: input.prompt }]
2112
+ });
2113
+ const latencyMs = Date.now() - start;
2114
+ const textBlock = response.content.find((b) => b.type === "text");
2115
+ const rawContent = textBlock?.type === "text" ? textBlock.text : "";
2116
+ let output = rawContent;
2117
+ if (input.schema) {
2118
+ try {
2119
+ output = JSON.parse(rawContent);
2120
+ } catch {
2121
+ }
2122
+ }
2123
+ return {
2124
+ output,
2125
+ usage: {
2126
+ promptTokens: response.usage.input_tokens,
2127
+ completionTokens: response.usage.output_tokens
2128
+ },
2129
+ latencyMs,
2130
+ raw: response
2131
+ };
2132
+ }
2133
+ };
2134
+ }
2135
+
2136
+ // src/providers/gemini.ts
2137
+ import OpenAI3 from "openai";
2138
+ function gemini(model, options) {
2139
+ const apiKey = options?.apiKey ?? process.env.GOOGLE_API_KEY;
2140
+ if (!apiKey) {
2141
+ throw new Error(
2142
+ `Missing API key for google/${model}. Set GOOGLE_API_KEY or pass apiKey option.`
2143
+ );
2144
+ }
2145
+ const client = new OpenAI3({
2146
+ apiKey,
2147
+ baseURL: "https://generativelanguage.googleapis.com/v1beta/openai/"
2148
+ });
2149
+ return makeProvider(`google/${model}`, "Google AI", model, client, model);
2150
+ }
2151
+ export {
2152
+ anthropic,
2153
+ azureOpenai,
2154
+ consoleReporter,
2155
+ defineArena,
2156
+ gemini,
2157
+ jsonReporter,
2158
+ openai,
2159
+ openaiCompatible,
2160
+ registerPricing
2161
+ };
2162
+ //# sourceMappingURL=index.js.map