agent-duelist 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -498,7 +498,7 @@ function Pe(e5, t3, s5 = Q.DEFAULT) {
498
498
  return p5(), n3.getToken() !== 2 ? k3(7, [2], []) : b3(), true;
499
499
  }
500
500
  i2(M2, "parseObject");
501
- function z2() {
501
+ function z3() {
502
502
  D3(), b3();
503
503
  let w4 = true, j2 = false;
504
504
  for (; n3.getToken() !== 4 && n3.getToken() !== 17; ) {
@@ -509,11 +509,11 @@ function Pe(e5, t3, s5 = Q.DEFAULT) {
509
509
  }
510
510
  return L3(), w4 || o7.pop(), n3.getToken() !== 4 ? k3(8, [4], []) : b3(), true;
511
511
  }
512
- i2(z2, "parseArray");
512
+ i2(z3, "parseArray");
513
513
  function U2() {
514
514
  switch (n3.getToken()) {
515
515
  case 3:
516
- return z2();
516
+ return z3();
517
517
  case 1:
518
518
  return M2();
519
519
  case 10:
@@ -746,7 +746,7 @@ var init_dist2 = __esm({
746
746
  }, "interpolateConfigDir");
747
747
  Me = ["outDir", "declarationDir", "outFile", "rootDir", "baseUrl", "tsBuildInfoFile"];
748
748
  ze = i2((e5) => {
749
- var t3, s5, n3, o7, l3, u5, a7, r3, g2, v4, d5, _4, p5, D3, L3, T3, F4, x, c3, y3, A3, b3, k3, R5, W, V2, M2, z2, U2, w4, j2, S2, $2;
749
+ var t3, s5, n3, o7, l3, u5, a7, r3, g2, v4, d5, _4, p5, D3, L3, T3, F4, x, c3, y3, A3, b3, k3, R5, W, V2, M2, z3, U2, w4, j2, S2, $2;
750
750
  if (e5.strict) {
751
751
  const f6 = ["noImplicitAny", "noImplicitThis", "strictNullChecks", "strictFunctionTypes", "strictBindCallApply", "strictPropertyInitialization", "strictBuiltinIteratorReturn", "alwaysStrict", "useUnknownInCatchVariables"];
752
752
  for (const B2 of f6) e5[B2] === void 0 && (e5[B2] = true);
@@ -767,7 +767,7 @@ var init_dist2 = __esm({
767
767
  let f6 = e5.moduleResolution.toLowerCase();
768
768
  f6 === "node" && (f6 = "node10"), e5.moduleResolution = f6, (f6 === "node16" || f6 === "nodenext" || f6 === "bundler") && ((R5 = e5.resolvePackageJsonExports) != null || (e5.resolvePackageJsonExports = true), (W = e5.resolvePackageJsonImports) != null || (e5.resolvePackageJsonImports = true)), f6 === "bundler" && ((V2 = e5.allowSyntheticDefaultImports) != null || (e5.allowSyntheticDefaultImports = true), (M2 = e5.resolveJsonModule) != null || (e5.resolveJsonModule = true));
769
769
  }
770
- e5.jsx && (e5.jsx = e5.jsx.toLowerCase()), e5.moduleDetection && (e5.moduleDetection = e5.moduleDetection.toLowerCase()), e5.importsNotUsedAsValues && (e5.importsNotUsedAsValues = e5.importsNotUsedAsValues.toLowerCase()), e5.newLine && (e5.newLine = e5.newLine.toLowerCase()), e5.esModuleInterop && ((z2 = e5.allowSyntheticDefaultImports) != null || (e5.allowSyntheticDefaultImports = true)), e5.verbatimModuleSyntax && ((U2 = e5.isolatedModules) != null || (e5.isolatedModules = true), (w4 = e5.preserveConstEnums) != null || (e5.preserveConstEnums = true)), e5.isolatedModules && ((j2 = e5.preserveConstEnums) != null || (e5.preserveConstEnums = true)), e5.rewriteRelativeImportExtensions && ((S2 = e5.allowImportingTsExtensions) != null || (e5.allowImportingTsExtensions = true)), e5.lib && (e5.lib = e5.lib.map((f6) => f6.toLowerCase())), e5.checkJs && (($2 = e5.allowJs) != null || (e5.allowJs = true));
770
+ e5.jsx && (e5.jsx = e5.jsx.toLowerCase()), e5.moduleDetection && (e5.moduleDetection = e5.moduleDetection.toLowerCase()), e5.importsNotUsedAsValues && (e5.importsNotUsedAsValues = e5.importsNotUsedAsValues.toLowerCase()), e5.newLine && (e5.newLine = e5.newLine.toLowerCase()), e5.esModuleInterop && ((z3 = e5.allowSyntheticDefaultImports) != null || (e5.allowSyntheticDefaultImports = true)), e5.verbatimModuleSyntax && ((U2 = e5.isolatedModules) != null || (e5.isolatedModules = true), (w4 = e5.preserveConstEnums) != null || (e5.preserveConstEnums = true)), e5.isolatedModules && ((j2 = e5.preserveConstEnums) != null || (e5.preserveConstEnums = true)), e5.rewriteRelativeImportExtensions && ((S2 = e5.allowImportingTsExtensions) != null || (e5.allowImportingTsExtensions = true)), e5.lib && (e5.lib = e5.lib.map((f6) => f6.toLowerCase())), e5.checkJs && (($2 = e5.allowJs) != null || (e5.allowJs = true));
771
771
  }, "normalizeCompilerOptions");
772
772
  pe = i2((e5, t3 = /* @__PURE__ */ new Map()) => {
773
773
  const s5 = m3.resolve(e5), n3 = ve(s5, t3), o7 = m3.dirname(s5), { compilerOptions: l3 } = n3;
@@ -3345,14 +3345,14 @@ function fn(s5, e5 = "@") {
3345
3345
  case 32:
3346
3346
  break;
3347
3347
  case 101: {
3348
- if (!(d5[400] | 0) && z2(h4) | 0 && !(A3(t3 + 4 | 0, 16, 10) | 0) && ($2(), (b3[804] | 0) == 0)) {
3348
+ if (!(d5[400] | 0) && z3(h4) | 0 && !(A3(t3 + 4 | 0, 16, 10) | 0) && ($2(), (b3[804] | 0) == 0)) {
3349
3349
  l3 = 9;
3350
3350
  break e;
3351
3351
  } else l3 = 17;
3352
3352
  break;
3353
3353
  }
3354
3354
  case 105: {
3355
- z2(h4) | 0 && !(A3(t3 + 4 | 0, 26, 10) | 0) && W(), l3 = 17;
3355
+ z3(h4) | 0 && !(A3(t3 + 4 | 0, 26, 10) | 0) && W(), l3 = 17;
3356
3356
  break;
3357
3357
  }
3358
3358
  case 59: {
@@ -3400,15 +3400,15 @@ function fn(s5, e5 = "@") {
3400
3400
  case 32:
3401
3401
  break;
3402
3402
  case 101: {
3403
- !(d5[400] | 0) && z2(a7) | 0 && !(A3(t3 + 4 | 0, 16, 10) | 0) && $2(), l3 = 91;
3403
+ !(d5[400] | 0) && z3(a7) | 0 && !(A3(t3 + 4 | 0, 16, 10) | 0) && $2(), l3 = 91;
3404
3404
  break;
3405
3405
  }
3406
3406
  case 105: {
3407
- z2(a7) | 0 && !(A3(t3 + 4 | 0, 26, 10) | 0) && W(), l3 = 91;
3407
+ z3(a7) | 0 && !(A3(t3 + 4 | 0, 26, 10) | 0) && W(), l3 = 91;
3408
3408
  break;
3409
3409
  }
3410
3410
  case 99: {
3411
- z2(a7) | 0 && !(A3(t3 + 4 | 0, 36, 8) | 0) && P3(d5[t3 + 12 >> 1] | 0) | 0 && (b3[806] = 1), l3 = 91;
3411
+ z3(a7) | 0 && !(A3(t3 + 4 | 0, 36, 8) | 0) && P3(d5[t3 + 12 >> 1] | 0) | 0 && (b3[806] = 1), l3 = 91;
3412
3412
  break;
3413
3413
  }
3414
3414
  case 40: {
@@ -4437,10 +4437,10 @@ function fn(s5, e5 = "@") {
4437
4437
  return t3 = t3 | 0, (d5[t3 >> 1] | 0) == 46 && (d5[t3 + -2 >> 1] | 0) == 46 ? t3 = (d5[t3 + -4 >> 1] | 0) == 46 : t3 = 0, t3 | 0;
4438
4438
  }
4439
4439
  f2(de3, "G");
4440
- function z2(t3) {
4440
+ function z3(t3) {
4441
4441
  return t3 = t3 | 0, (r3[3] | 0) == (t3 | 0) ? t3 = 1 : t3 = Oe2(t3 + -2 | 0) | 0, t3 | 0;
4442
4442
  }
4443
- f2(z2, "H");
4443
+ f2(z3, "H");
4444
4444
  function vt() {
4445
4445
  var t3 = 0;
4446
4446
  return t3 = r3[(r3[62] | 0) + 12 >> 2] | 0, t3 ? t3 = t3 - (r3[3] | 0) >> 1 : t3 = -1, t3 | 0;
@@ -5821,7 +5821,7 @@ import p4 from "path";
5821
5821
  import { fileURLToPath as O4 } from "url";
5822
5822
  import se3, { writeSync as te2 } from "fs";
5823
5823
  import { inspect as oe3 } from "util";
5824
- var K3, o4, R4, D2, me3, N2, j, pe2, y2, C3, de2, E4, ge2, Q4, M, _3, S, A2, T2, Pe3, I4, F3, v3, J3, P2, je3, be2, xe3, k2, $, ye3, Ee, B, G3, _e3, Se3, b2, X3, w2, ve3, z, we2, Me3, Te3, Fe3, H2, $e3;
5824
+ var K3, o4, R4, D2, me3, N2, j, pe2, y2, C3, de2, E4, ge2, Q4, M, _3, S, A2, T2, Pe3, I4, F3, v3, J3, P2, je3, be2, xe3, k2, $, ye3, Ee, B, G3, _e3, Se3, b2, X3, w2, ve3, z2, we2, Me3, Te3, Fe3, H2, $e3;
5825
5825
  var init_register_CFH5oNdT = __esm({
5826
5826
  "node_modules/tsx/dist/register-CFH5oNdT.mjs"() {
5827
5827
  "use strict";
@@ -5995,11 +5995,11 @@ var init_register_CFH5oNdT = __esm({
5995
5995
  throw t3;
5996
5996
  }
5997
5997
  }, "createTsExtensionResolver");
5998
- z = "at cjsPreparseModuleExports (node:internal";
5998
+ z2 = "at cjsPreparseModuleExports (node:internal";
5999
5999
  we2 = o4((s5) => {
6000
6000
  const e5 = s5.stack.split(`
6001
6001
  `).slice(1);
6002
- return e5[1].includes(z) || e5[2].includes(z);
6002
+ return e5[1].includes(z2) || e5[2].includes(z2);
6003
6003
  }, "isFromCjsLexer");
6004
6004
  Me3 = o4((s5, e5) => {
6005
6005
  const a7 = s5.split("?"), n3 = new URLSearchParams(a7[1]);
@@ -6197,6 +6197,1707 @@ import { readFileSync as readFileSync3, writeFileSync as writeFileSync2, mkdirSy
6197
6197
  import { resolve, join, dirname as dirname2 } from "path";
6198
6198
  import { pathToFileURL, fileURLToPath } from "url";
6199
6199
 
6200
+ // src/scorers/latency.ts
6201
+ var MIN_MS = 500;
6202
+ var MAX_MS = 1e4;
6203
+ var latencyScorer = ({ result }) => {
6204
+ const clamped = Math.max(MIN_MS, Math.min(MAX_MS, result.latencyMs));
6205
+ const value = 1 - (clamped - MIN_MS) / (MAX_MS - MIN_MS);
6206
+ return {
6207
+ name: "latency",
6208
+ value: Math.round(value * 100) / 100,
6209
+ details: { ms: result.latencyMs }
6210
+ };
6211
+ };
6212
+
6213
+ // src/pricing/catalog.json
6214
+ var catalog_default = {
6215
+ _meta: {
6216
+ source: "OpenRouter API \u2014 all providers (https://openrouter.ai/api/v1/models)",
6217
+ updatedAt: "2026-02-28",
6218
+ unit: "USD per token"
6219
+ },
6220
+ models: {
6221
+ "ai21/jamba-large-1.7": {
6222
+ inputPerToken: 2e-6,
6223
+ outputPerToken: 8e-6
6224
+ },
6225
+ "aion-labs/aion-1.0": {
6226
+ inputPerToken: 4e-6,
6227
+ outputPerToken: 8e-6
6228
+ },
6229
+ "aion-labs/aion-1.0-mini": {
6230
+ inputPerToken: 7e-7,
6231
+ outputPerToken: 14e-7
6232
+ },
6233
+ "aion-labs/aion-2.0": {
6234
+ inputPerToken: 8e-7,
6235
+ outputPerToken: 16e-7
6236
+ },
6237
+ "aion-labs/aion-rp-llama-3.1-8b": {
6238
+ inputPerToken: 8e-7,
6239
+ outputPerToken: 16e-7
6240
+ },
6241
+ "alfredpros/codellama-7b-instruct-solidity": {
6242
+ inputPerToken: 8e-7,
6243
+ outputPerToken: 12e-7
6244
+ },
6245
+ "alibaba/tongyi-deepresearch-30b-a3b": {
6246
+ inputPerToken: 9e-8,
6247
+ outputPerToken: 45e-8
6248
+ },
6249
+ "allenai/molmo-2-8b": {
6250
+ inputPerToken: 2e-7,
6251
+ outputPerToken: 2e-7
6252
+ },
6253
+ "allenai/olmo-2-0325-32b-instruct": {
6254
+ inputPerToken: 5e-8,
6255
+ outputPerToken: 2e-7
6256
+ },
6257
+ "allenai/olmo-3-32b-think": {
6258
+ inputPerToken: 15e-8,
6259
+ outputPerToken: 5e-7
6260
+ },
6261
+ "allenai/olmo-3-7b-instruct": {
6262
+ inputPerToken: 1e-7,
6263
+ outputPerToken: 2e-7
6264
+ },
6265
+ "allenai/olmo-3-7b-think": {
6266
+ inputPerToken: 12e-8,
6267
+ outputPerToken: 2e-7
6268
+ },
6269
+ "allenai/olmo-3.1-32b-instruct": {
6270
+ inputPerToken: 2e-7,
6271
+ outputPerToken: 6e-7
6272
+ },
6273
+ "alpindale/goliath-120b": {
6274
+ inputPerToken: 375e-8,
6275
+ outputPerToken: 75e-7
6276
+ },
6277
+ "amazon/nova-2-lite-v1": {
6278
+ inputPerToken: 3e-7,
6279
+ outputPerToken: 25e-7
6280
+ },
6281
+ "amazon/nova-lite-v1": {
6282
+ inputPerToken: 6e-8,
6283
+ outputPerToken: 24e-8
6284
+ },
6285
+ "amazon/nova-micro-v1": {
6286
+ inputPerToken: 35e-9,
6287
+ outputPerToken: 14e-8
6288
+ },
6289
+ "amazon/nova-premier-v1": {
6290
+ inputPerToken: 25e-7,
6291
+ outputPerToken: 125e-7
6292
+ },
6293
+ "amazon/nova-pro-v1": {
6294
+ inputPerToken: 8e-7,
6295
+ outputPerToken: 32e-7
6296
+ },
6297
+ "anthracite-org/magnum-v4-72b": {
6298
+ inputPerToken: 3e-6,
6299
+ outputPerToken: 5e-6
6300
+ },
6301
+ "anthropic/claude-3-haiku": {
6302
+ inputPerToken: 25e-8,
6303
+ outputPerToken: 125e-8
6304
+ },
6305
+ "anthropic/claude-3.5-haiku": {
6306
+ inputPerToken: 8e-7,
6307
+ outputPerToken: 4e-6
6308
+ },
6309
+ "anthropic/claude-3.5-sonnet": {
6310
+ inputPerToken: 6e-6,
6311
+ outputPerToken: 3e-5
6312
+ },
6313
+ "anthropic/claude-3.7-sonnet": {
6314
+ inputPerToken: 3e-6,
6315
+ outputPerToken: 15e-6
6316
+ },
6317
+ "anthropic/claude-3.7-sonnet:thinking": {
6318
+ inputPerToken: 3e-6,
6319
+ outputPerToken: 15e-6
6320
+ },
6321
+ "anthropic/claude-haiku-4.5": {
6322
+ inputPerToken: 1e-6,
6323
+ outputPerToken: 5e-6
6324
+ },
6325
+ "anthropic/claude-opus-4": {
6326
+ inputPerToken: 15e-6,
6327
+ outputPerToken: 75e-6
6328
+ },
6329
+ "anthropic/claude-opus-4.1": {
6330
+ inputPerToken: 15e-6,
6331
+ outputPerToken: 75e-6
6332
+ },
6333
+ "anthropic/claude-opus-4.5": {
6334
+ inputPerToken: 5e-6,
6335
+ outputPerToken: 25e-6
6336
+ },
6337
+ "anthropic/claude-opus-4.6": {
6338
+ inputPerToken: 5e-6,
6339
+ outputPerToken: 25e-6
6340
+ },
6341
+ "anthropic/claude-sonnet-4": {
6342
+ inputPerToken: 3e-6,
6343
+ outputPerToken: 15e-6
6344
+ },
6345
+ "anthropic/claude-sonnet-4.5": {
6346
+ inputPerToken: 3e-6,
6347
+ outputPerToken: 15e-6
6348
+ },
6349
+ "anthropic/claude-sonnet-4.6": {
6350
+ inputPerToken: 3e-6,
6351
+ outputPerToken: 15e-6
6352
+ },
6353
+ "arcee-ai/coder-large": {
6354
+ inputPerToken: 5e-7,
6355
+ outputPerToken: 8e-7
6356
+ },
6357
+ "arcee-ai/maestro-reasoning": {
6358
+ inputPerToken: 9e-7,
6359
+ outputPerToken: 33e-7
6360
+ },
6361
+ "arcee-ai/spotlight": {
6362
+ inputPerToken: 18e-8,
6363
+ outputPerToken: 18e-8
6364
+ },
6365
+ "arcee-ai/trinity-mini": {
6366
+ inputPerToken: 45e-9,
6367
+ outputPerToken: 15e-8
6368
+ },
6369
+ "arcee-ai/virtuoso-large": {
6370
+ inputPerToken: 75e-8,
6371
+ outputPerToken: 12e-7
6372
+ },
6373
+ "baidu/ernie-4.5-21b-a3b": {
6374
+ inputPerToken: 7e-8,
6375
+ outputPerToken: 28e-8
6376
+ },
6377
+ "baidu/ernie-4.5-21b-a3b-thinking": {
6378
+ inputPerToken: 7e-8,
6379
+ outputPerToken: 28e-8
6380
+ },
6381
+ "baidu/ernie-4.5-300b-a47b": {
6382
+ inputPerToken: 28e-8,
6383
+ outputPerToken: 11e-7
6384
+ },
6385
+ "baidu/ernie-4.5-vl-28b-a3b": {
6386
+ inputPerToken: 14e-8,
6387
+ outputPerToken: 56e-8
6388
+ },
6389
+ "baidu/ernie-4.5-vl-424b-a47b": {
6390
+ inputPerToken: 42e-8,
6391
+ outputPerToken: 125e-8
6392
+ },
6393
+ "bytedance/seed-1.6": {
6394
+ inputPerToken: 25e-8,
6395
+ outputPerToken: 2e-6
6396
+ },
6397
+ "bytedance/seed-1.6-flash": {
6398
+ inputPerToken: 75e-9,
6399
+ outputPerToken: 3e-7
6400
+ },
6401
+ "bytedance/seed-2.0-mini": {
6402
+ inputPerToken: 1e-7,
6403
+ outputPerToken: 4e-7
6404
+ },
6405
+ "bytedance/ui-tars-1.5-7b": {
6406
+ inputPerToken: 1e-7,
6407
+ outputPerToken: 2e-7
6408
+ },
6409
+ "cohere/command-a": {
6410
+ inputPerToken: 25e-7,
6411
+ outputPerToken: 1e-5
6412
+ },
6413
+ "cohere/command-r-08-2024": {
6414
+ inputPerToken: 15e-8,
6415
+ outputPerToken: 6e-7
6416
+ },
6417
+ "cohere/command-r-plus-08-2024": {
6418
+ inputPerToken: 25e-7,
6419
+ outputPerToken: 1e-5
6420
+ },
6421
+ "cohere/command-r7b-12-2024": {
6422
+ inputPerToken: 375e-10,
6423
+ outputPerToken: 15e-8
6424
+ },
6425
+ "deepcogito/cogito-v2.1-671b": {
6426
+ inputPerToken: 125e-8,
6427
+ outputPerToken: 125e-8
6428
+ },
6429
+ "deepseek/deepseek-chat": {
6430
+ inputPerToken: 32e-8,
6431
+ outputPerToken: 89e-8
6432
+ },
6433
+ "deepseek/deepseek-chat-v3-0324": {
6434
+ inputPerToken: 2e-7,
6435
+ outputPerToken: 77e-8
6436
+ },
6437
+ "deepseek/deepseek-chat-v3.1": {
6438
+ inputPerToken: 15e-8,
6439
+ outputPerToken: 75e-8
6440
+ },
6441
+ "deepseek/deepseek-r1": {
6442
+ inputPerToken: 7e-7,
6443
+ outputPerToken: 25e-7
6444
+ },
6445
+ "deepseek/deepseek-r1-0528": {
6446
+ inputPerToken: 45e-8,
6447
+ outputPerToken: 215e-8
6448
+ },
6449
+ "deepseek/deepseek-r1-distill-llama-70b": {
6450
+ inputPerToken: 7e-7,
6451
+ outputPerToken: 8e-7
6452
+ },
6453
+ "deepseek/deepseek-r1-distill-qwen-32b": {
6454
+ inputPerToken: 29e-8,
6455
+ outputPerToken: 29e-8
6456
+ },
6457
+ "deepseek/deepseek-v3": {
6458
+ inputPerToken: 3e-7,
6459
+ outputPerToken: 88e-8
6460
+ },
6461
+ "deepseek/deepseek-v3.1-terminus": {
6462
+ inputPerToken: 21e-8,
6463
+ outputPerToken: 79e-8
6464
+ },
6465
+ "deepseek/deepseek-v3.1-terminus:exacto": {
6466
+ inputPerToken: 21e-8,
6467
+ outputPerToken: 79e-8
6468
+ },
6469
+ "deepseek/deepseek-v3.2": {
6470
+ inputPerToken: 25e-8,
6471
+ outputPerToken: 4e-7
6472
+ },
6473
+ "deepseek/deepseek-v3.2-exp": {
6474
+ inputPerToken: 27e-8,
6475
+ outputPerToken: 41e-8
6476
+ },
6477
+ "deepseek/deepseek-v3.2-speciale": {
6478
+ inputPerToken: 4e-7,
6479
+ outputPerToken: 12e-7
6480
+ },
6481
+ "eleutherai/llemma_7b": {
6482
+ inputPerToken: 8e-7,
6483
+ outputPerToken: 12e-7
6484
+ },
6485
+ "essentialai/rnj-1-instruct": {
6486
+ inputPerToken: 15e-8,
6487
+ outputPerToken: 15e-8
6488
+ },
6489
+ "google/gemini-2.0-flash": {
6490
+ inputPerToken: 1e-7,
6491
+ outputPerToken: 4e-7
6492
+ },
6493
+ "google/gemini-2.0-flash-001": {
6494
+ inputPerToken: 1e-7,
6495
+ outputPerToken: 4e-7
6496
+ },
6497
+ "google/gemini-2.0-flash-lite-001": {
6498
+ inputPerToken: 75e-9,
6499
+ outputPerToken: 3e-7
6500
+ },
6501
+ "google/gemini-2.5-flash": {
6502
+ inputPerToken: 3e-7,
6503
+ outputPerToken: 25e-7
6504
+ },
6505
+ "google/gemini-2.5-flash-image": {
6506
+ inputPerToken: 3e-7,
6507
+ outputPerToken: 25e-7
6508
+ },
6509
+ "google/gemini-2.5-flash-lite": {
6510
+ inputPerToken: 1e-7,
6511
+ outputPerToken: 4e-7
6512
+ },
6513
+ "google/gemini-2.5-flash-lite-preview-09-2025": {
6514
+ inputPerToken: 1e-7,
6515
+ outputPerToken: 4e-7
6516
+ },
6517
+ "google/gemini-2.5-pro": {
6518
+ inputPerToken: 125e-8,
6519
+ outputPerToken: 1e-5
6520
+ },
6521
+ "google/gemini-2.5-pro-preview": {
6522
+ inputPerToken: 125e-8,
6523
+ outputPerToken: 1e-5
6524
+ },
6525
+ "google/gemini-2.5-pro-preview-05-06": {
6526
+ inputPerToken: 125e-8,
6527
+ outputPerToken: 1e-5
6528
+ },
6529
+ "google/gemini-3-flash-preview": {
6530
+ inputPerToken: 5e-7,
6531
+ outputPerToken: 3e-6
6532
+ },
6533
+ "google/gemini-3-pro-image-preview": {
6534
+ inputPerToken: 2e-6,
6535
+ outputPerToken: 12e-6
6536
+ },
6537
+ "google/gemini-3-pro-preview": {
6538
+ inputPerToken: 2e-6,
6539
+ outputPerToken: 12e-6
6540
+ },
6541
+ "google/gemini-3.1-flash-image-preview": {
6542
+ inputPerToken: 25e-8,
6543
+ outputPerToken: 15e-7
6544
+ },
6545
+ "google/gemini-3.1-pro-preview": {
6546
+ inputPerToken: 2e-6,
6547
+ outputPerToken: 12e-6
6548
+ },
6549
+ "google/gemini-3.1-pro-preview-customtools": {
6550
+ inputPerToken: 2e-6,
6551
+ outputPerToken: 12e-6
6552
+ },
6553
+ "google/gemma-2-27b-it": {
6554
+ inputPerToken: 65e-8,
6555
+ outputPerToken: 65e-8
6556
+ },
6557
+ "google/gemma-2-9b-it": {
6558
+ inputPerToken: 3e-8,
6559
+ outputPerToken: 9e-8
6560
+ },
6561
+ "google/gemma-3-12b-it": {
6562
+ inputPerToken: 4e-8,
6563
+ outputPerToken: 13e-8
6564
+ },
6565
+ "google/gemma-3-27b-it": {
6566
+ inputPerToken: 4e-8,
6567
+ outputPerToken: 15e-8
6568
+ },
6569
+ "google/gemma-3-4b-it": {
6570
+ inputPerToken: 4e-8,
6571
+ outputPerToken: 8e-8
6572
+ },
6573
+ "google/gemma-3n-e4b-it": {
6574
+ inputPerToken: 2e-8,
6575
+ outputPerToken: 4e-8
6576
+ },
6577
+ "gryphe/mythomax-l2-13b": {
6578
+ inputPerToken: 6e-8,
6579
+ outputPerToken: 6e-8
6580
+ },
6581
+ "ibm-granite/granite-4.0-h-micro": {
6582
+ inputPerToken: 17e-9,
6583
+ outputPerToken: 11e-8
6584
+ },
6585
+ "inception/mercury": {
6586
+ inputPerToken: 25e-8,
6587
+ outputPerToken: 1e-6
6588
+ },
6589
+ "inception/mercury-coder": {
6590
+ inputPerToken: 25e-8,
6591
+ outputPerToken: 1e-6
6592
+ },
6593
+ "inflection/inflection-3-pi": {
6594
+ inputPerToken: 25e-7,
6595
+ outputPerToken: 1e-5
6596
+ },
6597
+ "inflection/inflection-3-productivity": {
6598
+ inputPerToken: 25e-7,
6599
+ outputPerToken: 1e-5
6600
+ },
6601
+ "kwaipilot/kat-coder-pro": {
6602
+ inputPerToken: 207e-9,
6603
+ outputPerToken: 828e-9
6604
+ },
6605
+ "liquid/lfm-2-24b-a2b": {
6606
+ inputPerToken: 3e-8,
6607
+ outputPerToken: 12e-8
6608
+ },
6609
+ "liquid/lfm-2.2-6b": {
6610
+ inputPerToken: 1e-8,
6611
+ outputPerToken: 2e-8
6612
+ },
6613
+ "liquid/lfm2-8b-a1b": {
6614
+ inputPerToken: 1e-8,
6615
+ outputPerToken: 2e-8
6616
+ },
6617
+ "mancer/weaver": {
6618
+ inputPerToken: 75e-8,
6619
+ outputPerToken: 1e-6
6620
+ },
6621
+ "meituan/longcat-flash-chat": {
6622
+ inputPerToken: 2e-7,
6623
+ outputPerToken: 8e-7
6624
+ },
6625
+ "meta/llama-3-70b-instruct": {
6626
+ inputPerToken: 51e-8,
6627
+ outputPerToken: 74e-8
6628
+ },
6629
+ "meta/llama-3-8b-instruct": {
6630
+ inputPerToken: 3e-8,
6631
+ outputPerToken: 4e-8
6632
+ },
6633
+ "meta/llama-3.1-405b": {
6634
+ inputPerToken: 4e-6,
6635
+ outputPerToken: 4e-6
6636
+ },
6637
+ "meta/llama-3.1-405b-instruct": {
6638
+ inputPerToken: 4e-6,
6639
+ outputPerToken: 4e-6
6640
+ },
6641
+ "meta/llama-3.1-70b-instruct": {
6642
+ inputPerToken: 4e-7,
6643
+ outputPerToken: 4e-7
6644
+ },
6645
+ "meta/llama-3.1-8b-instruct": {
6646
+ inputPerToken: 2e-8,
6647
+ outputPerToken: 5e-8
6648
+ },
6649
+ "meta/llama-3.2-11b-vision-instruct": {
6650
+ inputPerToken: 49e-9,
6651
+ outputPerToken: 49e-9
6652
+ },
6653
+ "meta/llama-3.2-1b-instruct": {
6654
+ inputPerToken: 27e-9,
6655
+ outputPerToken: 2e-7
6656
+ },
6657
+ "meta/llama-3.2-3b-instruct": {
6658
+ inputPerToken: 2e-8,
6659
+ outputPerToken: 2e-8
6660
+ },
6661
+ "meta/llama-3.3-70b": {
6662
+ inputPerToken: 12e-8,
6663
+ outputPerToken: 3e-7
6664
+ },
6665
+ "meta/llama-3.3-70b-instruct": {
6666
+ inputPerToken: 1e-7,
6667
+ outputPerToken: 32e-8
6668
+ },
6669
+ "meta/llama-4-maverick": {
6670
+ inputPerToken: 15e-8,
6671
+ outputPerToken: 6e-7
6672
+ },
6673
+ "meta/llama-4-scout": {
6674
+ inputPerToken: 8e-8,
6675
+ outputPerToken: 3e-7
6676
+ },
6677
+ "meta/llama-guard-2-8b": {
6678
+ inputPerToken: 2e-7,
6679
+ outputPerToken: 2e-7
6680
+ },
6681
+ "meta/llama-guard-3-8b": {
6682
+ inputPerToken: 2e-8,
6683
+ outputPerToken: 6e-8
6684
+ },
6685
+ "meta/llama-guard-4-12b": {
6686
+ inputPerToken: 18e-8,
6687
+ outputPerToken: 18e-8
6688
+ },
6689
+ "microsoft/phi-4": {
6690
+ inputPerToken: 6e-8,
6691
+ outputPerToken: 14e-8
6692
+ },
6693
+ "microsoft/wizardlm-2-8x22b": {
6694
+ inputPerToken: 62e-8,
6695
+ outputPerToken: 62e-8
6696
+ },
6697
+ "minimax/minimax-01": {
6698
+ inputPerToken: 2e-7,
6699
+ outputPerToken: 11e-7
6700
+ },
6701
+ "minimax/minimax-m1": {
6702
+ inputPerToken: 4e-7,
6703
+ outputPerToken: 22e-7
6704
+ },
6705
+ "minimax/minimax-m2": {
6706
+ inputPerToken: 255e-9,
6707
+ outputPerToken: 1e-6
6708
+ },
6709
+ "minimax/minimax-m2-her": {
6710
+ inputPerToken: 3e-7,
6711
+ outputPerToken: 12e-7
6712
+ },
6713
+ "minimax/minimax-m2.1": {
6714
+ inputPerToken: 27e-8,
6715
+ outputPerToken: 95e-8
6716
+ },
6717
+ "minimax/minimax-m2.5": {
6718
+ inputPerToken: 295e-9,
6719
+ outputPerToken: 12e-7
6720
+ },
6721
+ "mistral/codestral-2508": {
6722
+ inputPerToken: 3e-7,
6723
+ outputPerToken: 9e-7
6724
+ },
6725
+ "mistral/devstral-2512": {
6726
+ inputPerToken: 4e-7,
6727
+ outputPerToken: 2e-6
6728
+ },
6729
+ "mistral/devstral-medium": {
6730
+ inputPerToken: 4e-7,
6731
+ outputPerToken: 2e-6
6732
+ },
6733
+ "mistral/devstral-small": {
6734
+ inputPerToken: 1e-7,
6735
+ outputPerToken: 3e-7
6736
+ },
6737
+ "mistral/ministral-14b-2512": {
6738
+ inputPerToken: 2e-7,
6739
+ outputPerToken: 2e-7
6740
+ },
6741
+ "mistral/ministral-3b-2512": {
6742
+ inputPerToken: 1e-7,
6743
+ outputPerToken: 1e-7
6744
+ },
6745
+ "mistral/ministral-8b-2512": {
6746
+ inputPerToken: 15e-8,
6747
+ outputPerToken: 15e-8
6748
+ },
6749
+ "mistral/mistral-7b-instruct": {
6750
+ inputPerToken: 2e-7,
6751
+ outputPerToken: 2e-7
6752
+ },
6753
+ "mistral/mistral-7b-instruct-v0.1": {
6754
+ inputPerToken: 11e-8,
6755
+ outputPerToken: 19e-8
6756
+ },
6757
+ "mistral/mistral-7b-instruct-v0.3": {
6758
+ inputPerToken: 2e-7,
6759
+ outputPerToken: 2e-7
6760
+ },
6761
+ "mistral/mistral-large": {
6762
+ inputPerToken: 2e-6,
6763
+ outputPerToken: 6e-6
6764
+ },
6765
+ "mistral/mistral-large-2407": {
6766
+ inputPerToken: 2e-6,
6767
+ outputPerToken: 6e-6
6768
+ },
6769
+ "mistral/mistral-large-2411": {
6770
+ inputPerToken: 2e-6,
6771
+ outputPerToken: 6e-6
6772
+ },
6773
+ "mistral/mistral-large-2512": {
6774
+ inputPerToken: 5e-7,
6775
+ outputPerToken: 15e-7
6776
+ },
6777
+ "mistral/mistral-medium-3": {
6778
+ inputPerToken: 4e-7,
6779
+ outputPerToken: 2e-6
6780
+ },
6781
+ "mistral/mistral-medium-3.1": {
6782
+ inputPerToken: 4e-7,
6783
+ outputPerToken: 2e-6
6784
+ },
6785
+ "mistral/mistral-nemo": {
6786
+ inputPerToken: 2e-8,
6787
+ outputPerToken: 4e-8
6788
+ },
6789
+ "mistral/mistral-saba": {
6790
+ inputPerToken: 2e-7,
6791
+ outputPerToken: 6e-7
6792
+ },
6793
+ "mistral/mistral-small": {
6794
+ inputPerToken: 1e-7,
6795
+ outputPerToken: 3e-7
6796
+ },
6797
+ "mistral/mistral-small-24b-instruct-2501": {
6798
+ inputPerToken: 5e-8,
6799
+ outputPerToken: 8e-8
6800
+ },
6801
+ "mistral/mistral-small-3.1-24b-instruct": {
6802
+ inputPerToken: 35e-8,
6803
+ outputPerToken: 56e-8
6804
+ },
6805
+ "mistral/mistral-small-3.2-24b-instruct": {
6806
+ inputPerToken: 6e-8,
6807
+ outputPerToken: 18e-8
6808
+ },
6809
+ "mistral/mistral-small-creative": {
6810
+ inputPerToken: 1e-7,
6811
+ outputPerToken: 3e-7
6812
+ },
6813
+ "mistral/mixtral-8x22b-instruct": {
6814
+ inputPerToken: 2e-6,
6815
+ outputPerToken: 6e-6
6816
+ },
6817
+ "mistral/mixtral-8x7b-instruct": {
6818
+ inputPerToken: 54e-8,
6819
+ outputPerToken: 54e-8
6820
+ },
6821
+ "mistral/pixtral-large-2411": {
6822
+ inputPerToken: 2e-6,
6823
+ outputPerToken: 6e-6
6824
+ },
6825
+ "mistral/voxtral-small-24b-2507": {
6826
+ inputPerToken: 1e-7,
6827
+ outputPerToken: 3e-7
6828
+ },
6829
+ "moonshotai/kimi-k2": {
6830
+ inputPerToken: 55e-8,
6831
+ outputPerToken: 22e-7
6832
+ },
6833
+ "moonshotai/kimi-k2-0905": {
6834
+ inputPerToken: 4e-7,
6835
+ outputPerToken: 2e-6
6836
+ },
6837
+ "moonshotai/kimi-k2-0905:exacto": {
6838
+ inputPerToken: 6e-7,
6839
+ outputPerToken: 25e-7
6840
+ },
6841
+ "moonshotai/kimi-k2-thinking": {
6842
+ inputPerToken: 47e-8,
6843
+ outputPerToken: 2e-6
6844
+ },
6845
+ "moonshotai/kimi-k2.5": {
6846
+ inputPerToken: 45e-8,
6847
+ outputPerToken: 22e-7
6848
+ },
6849
+ "morph/morph-v3-fast": {
6850
+ inputPerToken: 8e-7,
6851
+ outputPerToken: 12e-7
6852
+ },
6853
+ "morph/morph-v3-large": {
6854
+ inputPerToken: 9e-7,
6855
+ outputPerToken: 19e-7
6856
+ },
6857
+ "neversleep/llama-3.1-lumimaid-8b": {
6858
+ inputPerToken: 9e-8,
6859
+ outputPerToken: 6e-7
6860
+ },
6861
+ "neversleep/noromaid-20b": {
6862
+ inputPerToken: 1e-6,
6863
+ outputPerToken: 175e-8
6864
+ },
6865
+ "nex-agi/deepseek-v3.1-nex-n1": {
6866
+ inputPerToken: 27e-8,
6867
+ outputPerToken: 1e-6
6868
+ },
6869
+ "nousresearch/hermes-2-pro-llama-3-8b": {
6870
+ inputPerToken: 14e-8,
6871
+ outputPerToken: 14e-8
6872
+ },
6873
+ "nousresearch/hermes-3-llama-3.1-405b": {
6874
+ inputPerToken: 1e-6,
6875
+ outputPerToken: 1e-6
6876
+ },
6877
+ "nousresearch/hermes-3-llama-3.1-70b": {
6878
+ inputPerToken: 3e-7,
6879
+ outputPerToken: 3e-7
6880
+ },
6881
+ "nousresearch/hermes-4-405b": {
6882
+ inputPerToken: 1e-6,
6883
+ outputPerToken: 3e-6
6884
+ },
6885
+ "nousresearch/hermes-4-70b": {
6886
+ inputPerToken: 13e-8,
6887
+ outputPerToken: 4e-7
6888
+ },
6889
+ "nvidia/llama-3.1-nemotron-70b-instruct": {
6890
+ inputPerToken: 12e-7,
6891
+ outputPerToken: 12e-7
6892
+ },
6893
+ "nvidia/llama-3.3-nemotron-super-49b-v1.5": {
6894
+ inputPerToken: 1e-7,
6895
+ outputPerToken: 4e-7
6896
+ },
6897
+ "nvidia/nemotron-3-nano-30b-a3b": {
6898
+ inputPerToken: 5e-8,
6899
+ outputPerToken: 2e-7
6900
+ },
6901
+ "nvidia/nemotron-nano-12b-v2-vl": {
6902
+ inputPerToken: 2e-7,
6903
+ outputPerToken: 6e-7
6904
+ },
6905
+ "nvidia/nemotron-nano-9b-v2": {
6906
+ inputPerToken: 4e-8,
6907
+ outputPerToken: 16e-8
6908
+ },
6909
+ "openai/gpt-3.5-turbo": {
6910
+ inputPerToken: 5e-7,
6911
+ outputPerToken: 15e-7
6912
+ },
6913
+ "openai/gpt-3.5-turbo-0613": {
6914
+ inputPerToken: 1e-6,
6915
+ outputPerToken: 2e-6
6916
+ },
6917
+ "openai/gpt-3.5-turbo-16k": {
6918
+ inputPerToken: 3e-6,
6919
+ outputPerToken: 4e-6
6920
+ },
6921
+ "openai/gpt-3.5-turbo-instruct": {
6922
+ inputPerToken: 15e-7,
6923
+ outputPerToken: 2e-6
6924
+ },
6925
+ "openai/gpt-4": {
6926
+ inputPerToken: 3e-5,
6927
+ outputPerToken: 6e-5
6928
+ },
6929
+ "openai/gpt-4-0314": {
6930
+ inputPerToken: 3e-5,
6931
+ outputPerToken: 6e-5
6932
+ },
6933
+ "openai/gpt-4-1106-preview": {
6934
+ inputPerToken: 1e-5,
6935
+ outputPerToken: 3e-5
6936
+ },
6937
+ "openai/gpt-4-turbo": {
6938
+ inputPerToken: 1e-5,
6939
+ outputPerToken: 3e-5
6940
+ },
6941
+ "openai/gpt-4-turbo-preview": {
6942
+ inputPerToken: 1e-5,
6943
+ outputPerToken: 3e-5
6944
+ },
6945
+ "openai/gpt-4.1": {
6946
+ inputPerToken: 2e-6,
6947
+ outputPerToken: 8e-6
6948
+ },
6949
+ "openai/gpt-4.1-mini": {
6950
+ inputPerToken: 4e-7,
6951
+ outputPerToken: 16e-7
6952
+ },
6953
+ "openai/gpt-4.1-nano": {
6954
+ inputPerToken: 1e-7,
6955
+ outputPerToken: 4e-7
6956
+ },
6957
+ "openai/gpt-4o": {
6958
+ inputPerToken: 25e-7,
6959
+ outputPerToken: 1e-5
6960
+ },
6961
+ "openai/gpt-4o-2024-05-13": {
6962
+ inputPerToken: 5e-6,
6963
+ outputPerToken: 15e-6
6964
+ },
6965
+ "openai/gpt-4o-2024-08-06": {
6966
+ inputPerToken: 25e-7,
6967
+ outputPerToken: 1e-5
6968
+ },
6969
+ "openai/gpt-4o-2024-11-20": {
6970
+ inputPerToken: 25e-7,
6971
+ outputPerToken: 1e-5
6972
+ },
6973
+ "openai/gpt-4o-audio-preview": {
6974
+ inputPerToken: 25e-7,
6975
+ outputPerToken: 1e-5
6976
+ },
6977
+ "openai/gpt-4o-mini": {
6978
+ inputPerToken: 15e-8,
6979
+ outputPerToken: 6e-7
6980
+ },
6981
+ "openai/gpt-4o-mini-2024-07-18": {
6982
+ inputPerToken: 15e-8,
6983
+ outputPerToken: 6e-7
6984
+ },
6985
+ "openai/gpt-4o-mini-search-preview": {
6986
+ inputPerToken: 15e-8,
6987
+ outputPerToken: 6e-7
6988
+ },
6989
+ "openai/gpt-4o-search-preview": {
6990
+ inputPerToken: 25e-7,
6991
+ outputPerToken: 1e-5
6992
+ },
6993
+ "openai/gpt-4o:extended": {
6994
+ inputPerToken: 6e-6,
6995
+ outputPerToken: 18e-6
6996
+ },
6997
+ "openai/gpt-5": {
6998
+ inputPerToken: 125e-8,
6999
+ outputPerToken: 1e-5
7000
+ },
7001
+ "openai/gpt-5-chat": {
7002
+ inputPerToken: 125e-8,
7003
+ outputPerToken: 1e-5
7004
+ },
7005
+ "openai/gpt-5-codex": {
7006
+ inputPerToken: 125e-8,
7007
+ outputPerToken: 1e-5
7008
+ },
7009
+ "openai/gpt-5-image": {
7010
+ inputPerToken: 1e-5,
7011
+ outputPerToken: 1e-5
7012
+ },
7013
+ "openai/gpt-5-image-mini": {
7014
+ inputPerToken: 25e-7,
7015
+ outputPerToken: 2e-6
7016
+ },
7017
+ "openai/gpt-5-mini": {
7018
+ inputPerToken: 25e-8,
7019
+ outputPerToken: 2e-6
7020
+ },
7021
+ "openai/gpt-5-nano": {
7022
+ inputPerToken: 5e-8,
7023
+ outputPerToken: 4e-7
7024
+ },
7025
+ "openai/gpt-5-pro": {
7026
+ inputPerToken: 15e-6,
7027
+ outputPerToken: 12e-5
7028
+ },
7029
+ "openai/gpt-5.1": {
7030
+ inputPerToken: 125e-8,
7031
+ outputPerToken: 1e-5
7032
+ },
7033
+ "openai/gpt-5.1-chat": {
7034
+ inputPerToken: 125e-8,
7035
+ outputPerToken: 1e-5
7036
+ },
7037
+ "openai/gpt-5.1-codex": {
7038
+ inputPerToken: 125e-8,
7039
+ outputPerToken: 1e-5
7040
+ },
7041
+ "openai/gpt-5.1-codex-max": {
7042
+ inputPerToken: 125e-8,
7043
+ outputPerToken: 1e-5
7044
+ },
7045
+ "openai/gpt-5.1-codex-mini": {
7046
+ inputPerToken: 25e-8,
7047
+ outputPerToken: 2e-6
7048
+ },
7049
+ "openai/gpt-5.2": {
7050
+ inputPerToken: 175e-8,
7051
+ outputPerToken: 14e-6
7052
+ },
7053
+ "openai/gpt-5.2-chat": {
7054
+ inputPerToken: 175e-8,
7055
+ outputPerToken: 14e-6
7056
+ },
7057
+ "openai/gpt-5.2-codex": {
7058
+ inputPerToken: 175e-8,
7059
+ outputPerToken: 14e-6
7060
+ },
7061
+ "openai/gpt-5.2-pro": {
7062
+ inputPerToken: 21e-6,
7063
+ outputPerToken: 168e-6
7064
+ },
7065
+ "openai/gpt-5.3-codex": {
7066
+ inputPerToken: 175e-8,
7067
+ outputPerToken: 14e-6
7068
+ },
7069
+ "openai/gpt-audio": {
7070
+ inputPerToken: 25e-7,
7071
+ outputPerToken: 1e-5
7072
+ },
7073
+ "openai/gpt-audio-mini": {
7074
+ inputPerToken: 6e-7,
7075
+ outputPerToken: 24e-7
7076
+ },
7077
+ "openai/gpt-oss-120b": {
7078
+ inputPerToken: 39e-9,
7079
+ outputPerToken: 19e-8
7080
+ },
7081
+ "openai/gpt-oss-120b:exacto": {
7082
+ inputPerToken: 39e-9,
7083
+ outputPerToken: 19e-8
7084
+ },
7085
+ "openai/gpt-oss-20b": {
7086
+ inputPerToken: 3e-8,
7087
+ outputPerToken: 14e-8
7088
+ },
7089
+ "openai/gpt-oss-safeguard-20b": {
7090
+ inputPerToken: 75e-9,
7091
+ outputPerToken: 3e-7
7092
+ },
7093
+ "openai/o1": {
7094
+ inputPerToken: 15e-6,
7095
+ outputPerToken: 6e-5
7096
+ },
7097
+ "openai/o1-pro": {
7098
+ inputPerToken: 15e-5,
7099
+ outputPerToken: 6e-4
7100
+ },
7101
+ "openai/o3": {
7102
+ inputPerToken: 2e-6,
7103
+ outputPerToken: 8e-6
7104
+ },
7105
+ "openai/o3-deep-research": {
7106
+ inputPerToken: 1e-5,
7107
+ outputPerToken: 4e-5
7108
+ },
7109
+ "openai/o3-mini": {
7110
+ inputPerToken: 11e-7,
7111
+ outputPerToken: 44e-7
7112
+ },
7113
+ "openai/o3-mini-high": {
7114
+ inputPerToken: 11e-7,
7115
+ outputPerToken: 44e-7
7116
+ },
7117
+ "openai/o3-pro": {
7118
+ inputPerToken: 2e-5,
7119
+ outputPerToken: 8e-5
7120
+ },
7121
+ "openai/o4-mini": {
7122
+ inputPerToken: 11e-7,
7123
+ outputPerToken: 44e-7
7124
+ },
7125
+ "openai/o4-mini-deep-research": {
7126
+ inputPerToken: 2e-6,
7127
+ outputPerToken: 8e-6
7128
+ },
7129
+ "openai/o4-mini-high": {
7130
+ inputPerToken: 11e-7,
7131
+ outputPerToken: 44e-7
7132
+ },
7133
+ "opengvlab/internvl3-78b": {
7134
+ inputPerToken: 15e-8,
7135
+ outputPerToken: 6e-7
7136
+ },
7137
+ "perplexity/sonar": {
7138
+ inputPerToken: 1e-6,
7139
+ outputPerToken: 1e-6
7140
+ },
7141
+ "perplexity/sonar-deep-research": {
7142
+ inputPerToken: 2e-6,
7143
+ outputPerToken: 8e-6
7144
+ },
7145
+ "perplexity/sonar-pro": {
7146
+ inputPerToken: 3e-6,
7147
+ outputPerToken: 15e-6
7148
+ },
7149
+ "perplexity/sonar-pro-search": {
7150
+ inputPerToken: 3e-6,
7151
+ outputPerToken: 15e-6
7152
+ },
7153
+ "perplexity/sonar-reasoning-pro": {
7154
+ inputPerToken: 2e-6,
7155
+ outputPerToken: 8e-6
7156
+ },
7157
+ "prime-intellect/intellect-3": {
7158
+ inputPerToken: 2e-7,
7159
+ outputPerToken: 11e-7
7160
+ },
7161
+ "qwen/qwen-2.5-72b-instruct": {
7162
+ inputPerToken: 12e-8,
7163
+ outputPerToken: 39e-8
7164
+ },
7165
+ "qwen/qwen-2.5-7b-instruct": {
7166
+ inputPerToken: 4e-8,
7167
+ outputPerToken: 1e-7
7168
+ },
7169
+ "qwen/qwen-2.5-coder-32b-instruct": {
7170
+ inputPerToken: 20000000000000002e-23,
7171
+ outputPerToken: 20000000000000002e-23
7172
+ },
7173
+ "qwen/qwen-2.5-vl-7b-instruct": {
7174
+ inputPerToken: 20000000000000002e-23,
7175
+ outputPerToken: 20000000000000002e-23
7176
+ },
7177
+ "qwen/qwen-max": {
7178
+ inputPerToken: 16e-7,
7179
+ outputPerToken: 64e-7
7180
+ },
7181
+ "qwen/qwen-plus": {
7182
+ inputPerToken: 4e-7,
7183
+ outputPerToken: 12e-7
7184
+ },
7185
+ "qwen/qwen-plus-2025-07-28": {
7186
+ inputPerToken: 4e-7,
7187
+ outputPerToken: 12e-7
7188
+ },
7189
+ "qwen/qwen-plus-2025-07-28:thinking": {
7190
+ inputPerToken: 4e-7,
7191
+ outputPerToken: 12e-7
7192
+ },
7193
+ "qwen/qwen-turbo": {
7194
+ inputPerToken: 5e-8,
7195
+ outputPerToken: 2e-7
7196
+ },
7197
+ "qwen/qwen-vl-max": {
7198
+ inputPerToken: 8e-7,
7199
+ outputPerToken: 32e-7
7200
+ },
7201
+ "qwen/qwen-vl-plus": {
7202
+ inputPerToken: 21e-8,
7203
+ outputPerToken: 63e-8
7204
+ },
7205
+ "qwen/qwen2.5-coder-7b-instruct": {
7206
+ inputPerToken: 3e-8,
7207
+ outputPerToken: 9e-8
7208
+ },
7209
+ "qwen/qwen2.5-vl-32b-instruct": {
7210
+ inputPerToken: 2e-7,
7211
+ outputPerToken: 6e-7
7212
+ },
7213
+ "qwen/qwen2.5-vl-72b-instruct": {
7214
+ inputPerToken: 8e-7,
7215
+ outputPerToken: 8e-7
7216
+ },
7217
+ "qwen/qwen3-14b": {
7218
+ inputPerToken: 6e-8,
7219
+ outputPerToken: 24e-8
7220
+ },
7221
+ "qwen/qwen3-235b-a22b": {
7222
+ inputPerToken: 455e-9,
7223
+ outputPerToken: 182e-8
7224
+ },
7225
+ "qwen/qwen3-235b-a22b-2507": {
7226
+ inputPerToken: 71e-9,
7227
+ outputPerToken: 1e-7
7228
+ },
7229
+ "qwen/qwen3-30b-a3b": {
7230
+ inputPerToken: 8e-8,
7231
+ outputPerToken: 28e-8
7232
+ },
7233
+ "qwen/qwen3-30b-a3b-instruct-2507": {
7234
+ inputPerToken: 9e-8,
7235
+ outputPerToken: 3e-7
7236
+ },
7237
+ "qwen/qwen3-30b-a3b-thinking-2507": {
7238
+ inputPerToken: 51e-9,
7239
+ outputPerToken: 34e-8
7240
+ },
7241
+ "qwen/qwen3-32b": {
7242
+ inputPerToken: 8e-8,
7243
+ outputPerToken: 24e-8
7244
+ },
7245
+ "qwen/qwen3-8b": {
7246
+ inputPerToken: 5e-8,
7247
+ outputPerToken: 4e-7
7248
+ },
7249
+ "qwen/qwen3-coder": {
7250
+ inputPerToken: 22e-8,
7251
+ outputPerToken: 1e-6
7252
+ },
7253
+ "qwen/qwen3-coder-30b-a3b-instruct": {
7254
+ inputPerToken: 7e-8,
7255
+ outputPerToken: 27e-8
7256
+ },
7257
+ "qwen/qwen3-coder-flash": {
7258
+ inputPerToken: 3e-7,
7259
+ outputPerToken: 15e-7
7260
+ },
7261
+ "qwen/qwen3-coder-next": {
7262
+ inputPerToken: 12e-8,
7263
+ outputPerToken: 75e-8
7264
+ },
7265
+ "qwen/qwen3-coder-plus": {
7266
+ inputPerToken: 1e-6,
7267
+ outputPerToken: 5e-6
7268
+ },
7269
+ "qwen/qwen3-coder:exacto": {
7270
+ inputPerToken: 22e-8,
7271
+ outputPerToken: 18e-7
7272
+ },
7273
+ "qwen/qwen3-max": {
7274
+ inputPerToken: 12e-7,
7275
+ outputPerToken: 6e-6
7276
+ },
7277
+ "qwen/qwen3-max-thinking": {
7278
+ inputPerToken: 12e-7,
7279
+ outputPerToken: 6e-6
7280
+ },
7281
+ "qwen/qwen3-next-80b-a3b-instruct": {
7282
+ inputPerToken: 9e-8,
7283
+ outputPerToken: 11e-7
7284
+ },
7285
+ "qwen/qwen3-next-80b-a3b-thinking": {
7286
+ inputPerToken: 15e-8,
7287
+ outputPerToken: 12e-7
7288
+ },
7289
+ "qwen/qwen3-vl-235b-a22b-instruct": {
7290
+ inputPerToken: 2e-7,
7291
+ outputPerToken: 88e-8
7292
+ },
7293
+ "qwen/qwen3-vl-30b-a3b-instruct": {
7294
+ inputPerToken: 13e-8,
7295
+ outputPerToken: 52e-8
7296
+ },
7297
+ "qwen/qwen3-vl-32b-instruct": {
7298
+ inputPerToken: 104e-9,
7299
+ outputPerToken: 416e-9
7300
+ },
7301
+ "qwen/qwen3-vl-8b-instruct": {
7302
+ inputPerToken: 8e-8,
7303
+ outputPerToken: 5e-7
7304
+ },
7305
+ "qwen/qwen3-vl-8b-thinking": {
7306
+ inputPerToken: 117e-9,
7307
+ outputPerToken: 1365e-9
7308
+ },
7309
+ "qwen/qwen3.5-122b-a10b": {
7310
+ inputPerToken: 4e-7,
7311
+ outputPerToken: 32e-7
7312
+ },
7313
+ "qwen/qwen3.5-27b": {
7314
+ inputPerToken: 3e-7,
7315
+ outputPerToken: 24e-7
7316
+ },
7317
+ "qwen/qwen3.5-35b-a3b": {
7318
+ inputPerToken: 25e-8,
7319
+ outputPerToken: 2e-6
7320
+ },
7321
+ "qwen/qwen3.5-397b-a17b": {
7322
+ inputPerToken: 55e-8,
7323
+ outputPerToken: 35e-7
7324
+ },
7325
+ "qwen/qwen3.5-flash-02-23": {
7326
+ inputPerToken: 1e-7,
7327
+ outputPerToken: 4e-7
7328
+ },
7329
+ "qwen/qwen3.5-plus-02-15": {
7330
+ inputPerToken: 4e-7,
7331
+ outputPerToken: 24e-7
7332
+ },
7333
+ "qwen/qwq-32b": {
7334
+ inputPerToken: 15e-8,
7335
+ outputPerToken: 4e-7
7336
+ },
7337
+ "raifle/sorcererlm-8x22b": {
7338
+ inputPerToken: 45e-7,
7339
+ outputPerToken: 45e-7
7340
+ },
7341
+ "relace/relace-apply-3": {
7342
+ inputPerToken: 85e-8,
7343
+ outputPerToken: 125e-8
7344
+ },
7345
+ "relace/relace-search": {
7346
+ inputPerToken: 1e-6,
7347
+ outputPerToken: 3e-6
7348
+ },
7349
+ "sao10k/l3-euryale-70b": {
7350
+ inputPerToken: 148e-8,
7351
+ outputPerToken: 148e-8
7352
+ },
7353
+ "sao10k/l3-lunaris-8b": {
7354
+ inputPerToken: 4e-8,
7355
+ outputPerToken: 5e-8
7356
+ },
7357
+ "sao10k/l3.1-70b-hanami-x1": {
7358
+ inputPerToken: 3e-6,
7359
+ outputPerToken: 3e-6
7360
+ },
7361
+ "sao10k/l3.1-euryale-70b": {
7362
+ inputPerToken: 65e-8,
7363
+ outputPerToken: 75e-8
7364
+ },
7365
+ "sao10k/l3.3-euryale-70b": {
7366
+ inputPerToken: 65e-8,
7367
+ outputPerToken: 75e-8
7368
+ },
7369
+ "stepfun/step-3.5-flash": {
7370
+ inputPerToken: 1e-7,
7371
+ outputPerToken: 3e-7
7372
+ },
7373
+ "switchpoint/router": {
7374
+ inputPerToken: 85e-8,
7375
+ outputPerToken: 34e-7
7376
+ },
7377
+ "tencent/hunyuan-a13b-instruct": {
7378
+ inputPerToken: 14e-8,
7379
+ outputPerToken: 57e-8
7380
+ },
7381
+ "thedrummer/cydonia-24b-v4.1": {
7382
+ inputPerToken: 3e-7,
7383
+ outputPerToken: 5e-7
7384
+ },
7385
+ "thedrummer/rocinante-12b": {
7386
+ inputPerToken: 17e-8,
7387
+ outputPerToken: 43e-8
7388
+ },
7389
+ "thedrummer/skyfall-36b-v2": {
7390
+ inputPerToken: 55e-8,
7391
+ outputPerToken: 8e-7
7392
+ },
7393
+ "thedrummer/unslopnemo-12b": {
7394
+ inputPerToken: 4e-7,
7395
+ outputPerToken: 4e-7
7396
+ },
7397
+ "tngtech/deepseek-r1t2-chimera": {
7398
+ inputPerToken: 25e-8,
7399
+ outputPerToken: 85e-8
7400
+ },
7401
+ "undi95/remm-slerp-l2-13b": {
7402
+ inputPerToken: 45e-8,
7403
+ outputPerToken: 65e-8
7404
+ },
7405
+ "writer/palmyra-x5": {
7406
+ inputPerToken: 6e-7,
7407
+ outputPerToken: 6e-6
7408
+ },
7409
+ "xai/grok-3": {
7410
+ inputPerToken: 3e-6,
7411
+ outputPerToken: 15e-6
7412
+ },
7413
+ "xai/grok-3-beta": {
7414
+ inputPerToken: 3e-6,
7415
+ outputPerToken: 15e-6
7416
+ },
7417
+ "xai/grok-3-mini": {
7418
+ inputPerToken: 3e-7,
7419
+ outputPerToken: 5e-7
7420
+ },
7421
+ "xai/grok-3-mini-beta": {
7422
+ inputPerToken: 3e-7,
7423
+ outputPerToken: 5e-7
7424
+ },
7425
+ "xai/grok-4": {
7426
+ inputPerToken: 3e-6,
7427
+ outputPerToken: 15e-6
7428
+ },
7429
+ "xai/grok-4-fast": {
7430
+ inputPerToken: 2e-7,
7431
+ outputPerToken: 5e-7
7432
+ },
7433
+ "xai/grok-4.1-fast": {
7434
+ inputPerToken: 2e-7,
7435
+ outputPerToken: 5e-7
7436
+ },
7437
+ "xai/grok-code-fast-1": {
7438
+ inputPerToken: 2e-7,
7439
+ outputPerToken: 15e-7
7440
+ },
7441
+ "xiaomi/mimo-v2-flash": {
7442
+ inputPerToken: 9e-8,
7443
+ outputPerToken: 29e-8
7444
+ },
7445
+ "z-ai/glm-4-32b": {
7446
+ inputPerToken: 1e-7,
7447
+ outputPerToken: 1e-7
7448
+ },
7449
+ "z-ai/glm-4.5": {
7450
+ inputPerToken: 55e-8,
7451
+ outputPerToken: 2e-6
7452
+ },
7453
+ "z-ai/glm-4.5-air": {
7454
+ inputPerToken: 13e-8,
7455
+ outputPerToken: 85e-8
7456
+ },
7457
+ "z-ai/glm-4.5v": {
7458
+ inputPerToken: 6e-7,
7459
+ outputPerToken: 18e-7
7460
+ },
7461
+ "z-ai/glm-4.6": {
7462
+ inputPerToken: 35e-8,
7463
+ outputPerToken: 171e-8
7464
+ },
7465
+ "z-ai/glm-4.6:exacto": {
7466
+ inputPerToken: 44e-8,
7467
+ outputPerToken: 176e-8
7468
+ },
7469
+ "z-ai/glm-4.6v": {
7470
+ inputPerToken: 3e-7,
7471
+ outputPerToken: 9e-7
7472
+ },
7473
+ "z-ai/glm-4.7": {
7474
+ inputPerToken: 3e-7,
7475
+ outputPerToken: 14e-7
7476
+ },
7477
+ "z-ai/glm-4.7-flash": {
7478
+ inputPerToken: 6e-8,
7479
+ outputPerToken: 4e-7
7480
+ },
7481
+ "z-ai/glm-5": {
7482
+ inputPerToken: 95e-8,
7483
+ outputPerToken: 255e-8
7484
+ }
7485
+ }
7486
+ };
7487
+
7488
+ // src/pricing/lookup.ts
7489
+ var models = catalog_default.models;
7490
+ var modelNameIndex = /* @__PURE__ */ new Map();
7491
+ for (const key of Object.keys(models)) {
7492
+ const name = key.split("/").slice(1).join("/");
7493
+ if (name && !modelNameIndex.has(name)) {
7494
+ modelNameIndex.set(name, key);
7495
+ }
7496
+ }
7497
+ function lookupPricing(providerId) {
7498
+ if (models[providerId]) return models[providerId];
7499
+ const model = providerId.split("/").slice(1).join("/");
7500
+ if (!model) return void 0;
7501
+ const asOpenai = `openai/${model}`;
7502
+ if (models[asOpenai]) return models[asOpenai];
7503
+ const crossKey = modelNameIndex.get(model);
7504
+ if (crossKey) return models[crossKey];
7505
+ return void 0;
7506
+ }
7507
+ function estimateCost(pricing, promptTokens, completionTokens) {
7508
+ return pricing.inputPerToken * promptTokens + pricing.outputPerToken * completionTokens;
7509
+ }
7510
+
7511
+ // src/scorers/cost.ts
7512
+ var costScorer = ({ result }, providerId) => {
7513
+ const promptTokens = result.usage?.promptTokens ?? 0;
7514
+ const completionTokens = result.usage?.completionTokens ?? 0;
7515
+ const totalTokens = promptTokens + completionTokens;
7516
+ const pricing = lookupPricing(providerId);
7517
+ if (!pricing) {
7518
+ return {
7519
+ name: "cost",
7520
+ value: -1,
7521
+ details: {
7522
+ estimatedUsd: null,
7523
+ promptTokens,
7524
+ completionTokens,
7525
+ totalTokens,
7526
+ note: "No pricing data available for this model"
7527
+ }
7528
+ };
7529
+ }
7530
+ const usd = estimateCost(pricing, promptTokens, completionTokens);
7531
+ return {
7532
+ name: "cost",
7533
+ value: usd,
7534
+ details: {
7535
+ estimatedUsd: usd,
7536
+ promptTokens,
7537
+ completionTokens,
7538
+ totalTokens
7539
+ }
7540
+ };
7541
+ };
7542
+
7543
+ // src/scorers/correctness.ts
7544
+ var correctnessScorer = ({ task, result }) => {
7545
+ if (task.expected === void 0) {
7546
+ return { name: "correctness", value: 0.5, details: { reason: "no expected value" } };
7547
+ }
7548
+ const actual = normalizeOutput(task.expected, result.output);
7549
+ const match = deepEqual(task.expected, actual);
7550
+ return {
7551
+ name: "correctness",
7552
+ value: match ? 1 : 0,
7553
+ details: { expected: task.expected, actual: result.output }
7554
+ };
7555
+ };
7556
+ function normalizeOutput(expected, actual) {
7557
+ if (Array.isArray(expected) && !Array.isArray(actual) && typeof actual === "object" && actual !== null) {
7558
+ const entries = Object.entries(actual);
7559
+ const arrayEntries = entries.filter(([, v4]) => Array.isArray(v4));
7560
+ if (arrayEntries.length === 1) {
7561
+ return arrayEntries[0][1];
7562
+ }
7563
+ }
7564
+ return actual;
7565
+ }
7566
+ function deepEqual(expected, actual) {
7567
+ if (expected === actual) return true;
7568
+ if (typeof expected === "string" && typeof actual === "string") {
7569
+ return expected.trim().toLowerCase() === actual.trim().toLowerCase();
7570
+ }
7571
+ if (typeof expected !== typeof actual) return false;
7572
+ if (expected === null || actual === null) return expected === actual;
7573
+ if (Array.isArray(expected) && Array.isArray(actual)) {
7574
+ if (expected.length !== actual.length) return false;
7575
+ return expected.every((val, i7) => deepEqual(val, actual[i7]));
7576
+ }
7577
+ if (typeof expected === "object" && typeof actual === "object") {
7578
+ const objExpected = expected;
7579
+ const objActual = actual;
7580
+ const keysExpected = Object.keys(objExpected);
7581
+ return keysExpected.every((key) => key in objActual && deepEqual(objExpected[key], objActual[key]));
7582
+ }
7583
+ return expected === actual;
7584
+ }
7585
+
7586
+ // src/scorers/schema-correctness.ts
7587
+ var schemaCorrectnessScorer = ({ task, result }) => {
7588
+ if (!task.schema) {
7589
+ return { name: "schema-correctness", value: -1, details: { reason: "no schema defined" } };
7590
+ }
7591
+ let data = result.output;
7592
+ if (typeof data === "string") {
7593
+ try {
7594
+ data = JSON.parse(data);
7595
+ } catch {
7596
+ return {
7597
+ name: "schema-correctness",
7598
+ value: 0,
7599
+ details: { reason: "output is not valid JSON" }
7600
+ };
7601
+ }
7602
+ }
7603
+ let parsed = task.schema.safeParse(data);
7604
+ if (!parsed.success && !Array.isArray(data) && typeof data === "object" && data !== null) {
7605
+ const arrayEntries = Object.entries(data).filter(([, v4]) => Array.isArray(v4));
7606
+ if (arrayEntries.length === 1) {
7607
+ const unwrapped = task.schema.safeParse(arrayEntries[0][1]);
7608
+ if (unwrapped.success) parsed = unwrapped;
7609
+ }
7610
+ }
7611
+ return {
7612
+ name: "schema-correctness",
7613
+ value: parsed.success ? 1 : 0,
7614
+ details: parsed.success ? { valid: true } : { valid: false, errors: parsed.error.issues.map((i7) => i7.message) }
7615
+ };
7616
+ };
7617
+
7618
+ // src/scorers/fuzzy-similarity.ts
7619
+ var fuzzySimilarityScorer = ({ task, result }) => {
7620
+ if (task.expected === void 0) {
7621
+ return { name: "fuzzy-similarity", value: -1, details: { reason: "no expected value" } };
7622
+ }
7623
+ const a7 = stringify(task.expected);
7624
+ const b3 = stringify(result.output);
7625
+ const setA = tokenize(a7);
7626
+ const setB = tokenize(b3);
7627
+ const similarity = jaccardSimilarity(setA, setB);
7628
+ return {
7629
+ name: "fuzzy-similarity",
7630
+ value: Math.round(similarity * 100) / 100,
7631
+ details: { method: "jaccard", expectedTokens: setA.size, actualTokens: setB.size }
7632
+ };
7633
+ };
7634
+ function stringify(value) {
7635
+ if (typeof value === "string") return value.toLowerCase();
7636
+ return JSON.stringify(value).toLowerCase();
7637
+ }
7638
+ function tokenize(text) {
7639
+ return new Set(text.match(/\w+/g) ?? []);
7640
+ }
7641
+ function jaccardSimilarity(a7, b3) {
7642
+ if (a7.size === 0 && b3.size === 0) return 1;
7643
+ let intersection = 0;
7644
+ for (const token of a7) {
7645
+ if (b3.has(token)) intersection++;
7646
+ }
7647
+ const union = a7.size + b3.size - intersection;
7648
+ return union === 0 ? 1 : intersection / union;
7649
+ }
7650
+
7651
+ // src/scorers/llm-judge.ts
7652
+ import OpenAI2, { AzureOpenAI as AzureOpenAI2 } from "openai";
7653
+
7654
+ // src/providers/openai.ts
7655
+ import OpenAI, { AzureOpenAI } from "openai";
7656
+ import { zodToJsonSchema as zodToJsonSchema2 } from "zod-to-json-schema";
7657
+
7658
+ // src/providers/shared.ts
7659
+ import { zodToJsonSchema } from "zod-to-json-schema";
7660
+
7661
+ // src/providers/openai.ts
7662
+ var REQUEST_TIMEOUT_MS = 6e4;
7663
+
7664
+ // src/scorers/llm-judge.ts
7665
+ var JUDGE_PROMPT = `You are a strict scoring judge. Evaluate the actual output against the expected output on three criteria. Score each from 0.0 to 1.0 using the full range (not just 0, 0.5, 1).
7666
+
7667
+ Criteria:
7668
+ 1. Accuracy \u2014 are the facts, entities, and claims correct? Penalize hallucinations or wrong details.
7669
+ 2. Completeness \u2014 does it capture all key information from the expected output? Penalize missing points.
7670
+ 3. Conciseness \u2014 is it free of unnecessary filler, repetition, or tangential content? Penalize verbosity.
7671
+
7672
+ Respond with ONLY this exact format \u2014 three lines, no other text:
7673
+ accuracy: <number>
7674
+ completeness: <number>
7675
+ conciseness: <number>
7676
+
7677
+ Task: {task}
7678
+ Expected: {expected}
7679
+ Actual: {actual}`;
7680
+ function resolveJudgeClient(configModel, timeoutMs = REQUEST_TIMEOUT_MS) {
7681
+ const model = configModel ?? process.env.DUELIST_JUDGE_MODEL ?? "gpt-5-mini";
7682
+ if (model.startsWith("gemini") && process.env.GOOGLE_API_KEY) {
7683
+ return {
7684
+ client: new OpenAI2({
7685
+ apiKey: process.env.GOOGLE_API_KEY,
7686
+ baseURL: "https://generativelanguage.googleapis.com/v1beta/openai/",
7687
+ timeout: timeoutMs
7688
+ }),
7689
+ model
7690
+ };
7691
+ }
7692
+ if (!process.env.OPENAI_API_KEY && process.env.AZURE_OPENAI_API_KEY) {
7693
+ return {
7694
+ client: new AzureOpenAI2({
7695
+ apiKey: process.env.AZURE_OPENAI_API_KEY,
7696
+ endpoint: process.env.AZURE_OPENAI_ENDPOINT,
7697
+ apiVersion: process.env.AZURE_OPENAI_API_VERSION ?? "2024-12-01-preview",
7698
+ deployment: model,
7699
+ timeout: timeoutMs
7700
+ }),
7701
+ model
7702
+ };
7703
+ }
7704
+ const apiKey = process.env.OPENAI_API_KEY;
7705
+ if (!apiKey) return void 0;
7706
+ return { client: new OpenAI2({ apiKey, timeout: timeoutMs }), model };
7707
+ }
7708
+ function isTemperatureError(err) {
7709
+ const msg = err instanceof Error ? err.message : String(err);
7710
+ const lower = msg.toLowerCase();
7711
+ return lower.includes("temperature") && (lower.includes("not supported") || lower.includes("is not allowed") || lower.includes("unsupported") || lower.includes("invalid"));
7712
+ }
7713
+ function createLlmJudgeScorer(judgeModel, timeoutMs = REQUEST_TIMEOUT_MS) {
7714
+ let cached = void 0;
7715
+ let useTemperature = true;
7716
+ return async ({ task, result }) => {
7717
+ if (task.expected === void 0) {
7718
+ return { name: "llm-judge-correctness", value: -1, details: { reason: "no expected value" } };
7719
+ }
7720
+ if (cached === void 0) {
7721
+ cached = resolveJudgeClient(judgeModel, timeoutMs) ?? null;
7722
+ }
7723
+ if (!cached) {
7724
+ return {
7725
+ name: "llm-judge-correctness",
7726
+ value: -1,
7727
+ details: { reason: "no API key available for judge model" }
7728
+ };
7729
+ }
7730
+ const { client, model } = cached;
7731
+ const prompt = JUDGE_PROMPT.replace("{task}", task.prompt).replace("{expected}", JSON.stringify(task.expected)).replace("{actual}", JSON.stringify(result.output));
7732
+ const messages = [{ role: "user", content: prompt }];
7733
+ try {
7734
+ const response = await callJudge(client, model, messages, useTemperature);
7735
+ return parseJudgeResponse(response, model);
7736
+ } catch (err) {
7737
+ if (useTemperature && isTemperatureError(err)) {
7738
+ useTemperature = false;
7739
+ try {
7740
+ const response = await callJudge(client, model, messages, false);
7741
+ return parseJudgeResponse(response, model);
7742
+ } catch (retryErr) {
7743
+ return {
7744
+ name: "llm-judge-correctness",
7745
+ value: -1,
7746
+ details: { reason: `judge call failed: ${retryErr instanceof Error ? retryErr.message : String(retryErr)}` }
7747
+ };
7748
+ }
7749
+ }
7750
+ return {
7751
+ name: "llm-judge-correctness",
7752
+ value: -1,
7753
+ details: { reason: `judge call failed: ${err instanceof Error ? err.message : String(err)}` }
7754
+ };
7755
+ }
7756
+ };
7757
+ }
7758
+ async function callJudge(client, model, messages, withTemperature) {
7759
+ return client.chat.completions.create({
7760
+ model,
7761
+ messages,
7762
+ max_completion_tokens: 2048,
7763
+ ...withTemperature ? { temperature: 0 } : {}
7764
+ });
7765
+ }
7766
+ function parseJudgeResponse(response, model) {
7767
+ const content = response.choices[0]?.message?.content?.trim() ?? "";
7768
+ const parsed = {};
7769
+ for (const line of content.split("\n")) {
7770
+ const match = line.match(/^(accuracy|completeness|conciseness)\s*:\s*([\d.]+)/i);
7771
+ if (match) parsed[match[1].toLowerCase()] = parseFloat(match[2]);
7772
+ }
7773
+ const accuracy = parsed.accuracy;
7774
+ const completeness = parsed.completeness;
7775
+ const conciseness = parsed.conciseness;
7776
+ if (accuracy == null || completeness == null || conciseness == null || [accuracy, completeness, conciseness].some((s5) => isNaN(s5) || s5 < 0 || s5 > 1)) {
7777
+ return {
7778
+ name: "llm-judge-correctness",
7779
+ value: -1,
7780
+ details: { reason: `judge returned unparseable scores: "${content}"`, model }
7781
+ };
7782
+ }
7783
+ const composite = Math.round((accuracy + completeness + conciseness) / 3 * 100) / 100;
7784
+ return {
7785
+ name: "llm-judge-correctness",
7786
+ value: composite,
7787
+ details: { model, accuracy, completeness, conciseness }
7788
+ };
7789
+ }
7790
+
7791
+ // src/scorers/tool-usage.ts
7792
+ var toolUsageScorer = ({ task, result }) => {
7793
+ const expectedToolName = task.tools?.[0]?.name;
7794
+ if (!expectedToolName) {
7795
+ return { name: "tool-usage", value: -1, details: { reason: "no tools configured on task" } };
7796
+ }
7797
+ const usedTool = result.toolCalls?.some((c3) => c3.name === expectedToolName) ?? false;
7798
+ return {
7799
+ name: "tool-usage",
7800
+ value: usedTool ? 1 : 0,
7801
+ details: { expectedToolName, usedTool, toolCalls: result.toolCalls ?? [] }
7802
+ };
7803
+ };
7804
+
7805
+ // src/scorers/index.ts
7806
+ var staticScorers = {
7807
+ latency: latencyScorer,
7808
+ cost: costScorer,
7809
+ correctness: correctnessScorer,
7810
+ "schema-correctness": schemaCorrectnessScorer,
7811
+ "fuzzy-similarity": fuzzySimilarityScorer,
7812
+ "tool-usage": toolUsageScorer
7813
+ };
7814
+ function resolveScorers(names, judgeModel, timeoutMs) {
7815
+ return names.map((name) => {
7816
+ if (name === "llm-judge-correctness") {
7817
+ return createLlmJudgeScorer(judgeModel, timeoutMs);
7818
+ }
7819
+ const scorer = staticScorers[name];
7820
+ if (!scorer) {
7821
+ throw new Error(`Unknown scorer: "${name}"`);
7822
+ }
7823
+ return scorer;
7824
+ });
7825
+ }
7826
+
7827
+ // src/runner.ts
7828
+ var DEFAULT_TIMEOUT_MS = 6e4;
7829
+ function withTimeout(run, ms) {
7830
+ return new Promise((resolve2, reject) => {
7831
+ const controller = new AbortController();
7832
+ const timer = setTimeout(() => {
7833
+ controller.abort();
7834
+ reject(new Error(`Request timed out after ${ms}ms`));
7835
+ }, ms);
7836
+ run(controller.signal).then(
7837
+ (v4) => {
7838
+ clearTimeout(timer);
7839
+ resolve2(v4);
7840
+ },
7841
+ (e5) => {
7842
+ clearTimeout(timer);
7843
+ reject(e5);
7844
+ }
7845
+ );
7846
+ });
7847
+ }
7848
+ async function runBenchmarks(options) {
7849
+ const { providers, tasks, scorers, runs, onResult } = options;
7850
+ const timeout = options.timeout ?? DEFAULT_TIMEOUT_MS;
7851
+ const results = [];
7852
+ for (const task of tasks) {
7853
+ for (let run = 1; run <= runs; run++) {
7854
+ const runResults = await Promise.all(
7855
+ providers.map(async (provider) => {
7856
+ let result;
7857
+ try {
7858
+ const taskResult = await withTimeout((signal) => provider.run({
7859
+ prompt: task.prompt,
7860
+ schema: task.schema,
7861
+ tools: task.tools,
7862
+ signal,
7863
+ timeout
7864
+ }), timeout);
7865
+ const scores = await Promise.all(
7866
+ scorers.map((scorer) => scorer({ task, result: taskResult }, provider.id))
7867
+ );
7868
+ result = {
7869
+ providerId: provider.id,
7870
+ taskName: task.name,
7871
+ run,
7872
+ scores,
7873
+ raw: {
7874
+ output: taskResult.output,
7875
+ latencyMs: taskResult.latencyMs,
7876
+ usage: taskResult.usage,
7877
+ toolCalls: taskResult.toolCalls
7878
+ }
7879
+ };
7880
+ } catch (err) {
7881
+ const message = err instanceof Error ? err.message : String(err);
7882
+ result = {
7883
+ providerId: provider.id,
7884
+ taskName: task.name,
7885
+ run,
7886
+ scores: [],
7887
+ error: message,
7888
+ raw: { output: "", latencyMs: 0 }
7889
+ };
7890
+ }
7891
+ onResult?.(result);
7892
+ return result;
7893
+ })
7894
+ );
7895
+ results.push(...runResults);
7896
+ }
7897
+ }
7898
+ return results;
7899
+ }
7900
+
6200
7901
  // src/utils/format.ts
6201
7902
  var MAX_FRACTION_DIGITS = 100;
6202
7903
  function formatCost(usd) {
@@ -6368,37 +8069,76 @@ function computeColumnStats(providerData, scorerNames) {
6368
8069
  }
6369
8070
  return stats;
6370
8071
  }
8072
+ var QUALITY_SCORERS = /* @__PURE__ */ new Set([
8073
+ "correctness",
8074
+ "schema-correctness",
8075
+ "fuzzy-similarity",
8076
+ "llm-judge-correctness",
8077
+ "tool-usage"
8078
+ ]);
8079
+ function passesQualityGate(providerId, columnStats) {
8080
+ const qualityColumns = [...columnStats.keys()].filter((k3) => QUALITY_SCORERS.has(k3));
8081
+ if (qualityColumns.length === 0) return true;
8082
+ return qualityColumns.some((col) => {
8083
+ const val = columnStats.get(col)?.values.get(providerId);
8084
+ return val !== void 0 && val > 0;
8085
+ });
8086
+ }
6371
8087
  function computeMedals(columnStats, providerIds) {
6372
8088
  const medals = /* @__PURE__ */ new Map();
6373
8089
  if (providerIds.length < 2) {
6374
8090
  for (const id of providerIds) medals.set(id, "none");
6375
8091
  return medals;
6376
8092
  }
6377
- const wins = /* @__PURE__ */ new Map();
6378
- for (const id of providerIds) wins.set(id, 0);
6379
- for (const [, colStats] of columnStats) {
8093
+ const eligible = new Set(providerIds.filter((id) => passesQualityGate(id, columnStats)));
8094
+ const qualityWins = /* @__PURE__ */ new Map();
8095
+ const efficiencyWins = /* @__PURE__ */ new Map();
8096
+ for (const id of providerIds) {
8097
+ qualityWins.set(id, 0);
8098
+ efficiencyWins.set(id, 0);
8099
+ }
8100
+ for (const [colName, colStats] of columnStats) {
6380
8101
  if (colStats.best === void 0) continue;
6381
8102
  const bestProviders = [...colStats.values.entries()].filter(([, v4]) => v4 !== void 0 && v4 === colStats.best);
6382
8103
  if (bestProviders.length === 1) {
6383
- wins.set(bestProviders[0][0], (wins.get(bestProviders[0][0]) ?? 0) + 1);
8104
+ const winnerId = bestProviders[0][0];
8105
+ if (QUALITY_SCORERS.has(colName)) {
8106
+ qualityWins.set(winnerId, (qualityWins.get(winnerId) ?? 0) + 1);
8107
+ } else {
8108
+ efficiencyWins.set(winnerId, (efficiencyWins.get(winnerId) ?? 0) + 1);
8109
+ }
6384
8110
  }
6385
8111
  }
6386
- const totalWins = [...wins.values()].reduce((a7, b3) => a7 + b3, 0);
8112
+ const totalWins = [...qualityWins.values()].reduce((a7, b3) => a7 + b3, 0) + [...efficiencyWins.values()].reduce((a7, b3) => a7 + b3, 0);
6387
8113
  if (totalWins === 0) {
6388
8114
  for (const id of providerIds) medals.set(id, "none");
6389
8115
  return medals;
6390
8116
  }
6391
- const sorted = [...wins.entries()].sort(
6392
- (a7, b3) => b3[1] - a7[1] || a7[0].localeCompare(b3[0])
6393
- );
8117
+ const eligibleSorted = providerIds.filter((id) => eligible.has(id)).sort((a7, b3) => {
8118
+ const qDiff = (qualityWins.get(b3) ?? 0) - (qualityWins.get(a7) ?? 0);
8119
+ if (qDiff !== 0) return qDiff;
8120
+ const eDiff = (efficiencyWins.get(b3) ?? 0) - (efficiencyWins.get(a7) ?? 0);
8121
+ if (eDiff !== 0) return eDiff;
8122
+ return a7.localeCompare(b3);
8123
+ });
6394
8124
  const medalList = ["gold", "silver", "bronze"];
6395
8125
  let rank = 0;
6396
- for (let i7 = 0; i7 < sorted.length; i7++) {
6397
- if (i7 > 0 && sorted[i7][1] < sorted[i7 - 1][1]) {
6398
- rank = i7;
8126
+ for (let i7 = 0; i7 < eligibleSorted.length; i7++) {
8127
+ if (i7 > 0) {
8128
+ const prevQ = qualityWins.get(eligibleSorted[i7 - 1]) ?? 0;
8129
+ const currQ = qualityWins.get(eligibleSorted[i7]) ?? 0;
8130
+ if (currQ < prevQ) {
8131
+ rank = i7;
8132
+ } else if (currQ === prevQ) {
8133
+ const prevE = efficiencyWins.get(eligibleSorted[i7 - 1]) ?? 0;
8134
+ const currE = efficiencyWins.get(eligibleSorted[i7]) ?? 0;
8135
+ if (currE < prevE) rank = i7;
8136
+ }
6399
8137
  }
6400
- const hasWins = sorted[i7][1] > 0;
6401
- medals.set(sorted[i7][0], hasWins && rank < medalList.length ? medalList[rank] : "none");
8138
+ medals.set(eligibleSorted[i7], rank < medalList.length ? medalList[rank] : "none");
8139
+ }
8140
+ for (const id of providerIds) {
8141
+ if (!eligible.has(id)) medals.set(id, "none");
6402
8142
  }
6403
8143
  return medals;
6404
8144
  }
@@ -6795,24 +8535,10 @@ function printSummary(results, providers, byProvider) {
6795
8535
  console.log(` ${medal} Cheapest: ${bold(byCost.id)} ${dim(providerLabel(byCost.id))} ${brightGreen}${boldCode}${costStr}${reset}`);
6796
8536
  }
6797
8537
  }
6798
- if (!single) {
6799
- const wins = /* @__PURE__ */ new Map();
6800
- for (const id of providers) wins.set(id, 0);
6801
- if (byCorrectness) wins.set(byCorrectness.id, (wins.get(byCorrectness.id) ?? 0) + 1);
6802
- if (byLatency && byLatency.avg !== Infinity) wins.set(byLatency.id, (wins.get(byLatency.id) ?? 0) + 1);
6803
- if (byCost?.avg !== void 0) wins.set(byCost.id, (wins.get(byCost.id) ?? 0) + 1);
6804
- const maxWins = Math.max(...wins.values());
6805
- if (maxWins > 0) {
6806
- const topProviders = [...wins.entries()].filter(([, w4]) => w4 === maxWins);
6807
- console.log("");
6808
- if (topProviders.length === 1) {
6809
- const [winnerId, winCount] = topProviders[0];
6810
- console.log(` \u{1F3C6} Overall: ${brightGreen}${boldCode}${winnerId}${reset} ${dim(providerLabel(winnerId))} ${dim(`(${winCount}/3 categories)`)}`);
6811
- } else {
6812
- const names = topProviders.map(([id]) => bold(id)).join(dim(", "));
6813
- console.log(` \u{1F3C6} Overall: ${names} ${dim(`(tied at ${maxWins}/3)`)}`);
6814
- }
6815
- }
8538
+ if (!single && byCorrectness && byCorrectness.avg > 0) {
8539
+ console.log("");
8540
+ const pct = `${Math.round(byCorrectness.avg * 100)}%`;
8541
+ console.log(` \u{1F3C6} Overall: ${brightGreen}${boldCode}${byCorrectness.id}${reset} ${dim(providerLabel(byCorrectness.id))} ${dim(`(${pct} avg correctness)`)}`);
6816
8542
  }
6817
8543
  console.log("");
6818
8544
  }
@@ -6841,6 +8567,32 @@ function buildSummary(results) {
6841
8567
  };
6842
8568
  }
6843
8569
 
8570
+ // src/arena.ts
8571
+ function defineArena(config) {
8572
+ if (config.providers.length === 0) {
8573
+ throw new Error("At least one provider is required");
8574
+ }
8575
+ const scorerNames = config.scorers ?? ["latency", "cost", "correctness"];
8576
+ const scorerFns = resolveScorers(scorerNames, config.judgeModel, config.timeout);
8577
+ const runs = config.runs ?? 1;
8578
+ return {
8579
+ config,
8580
+ async run(options) {
8581
+ if (config.tasks.length === 0) {
8582
+ throw new Error("At least one task is required");
8583
+ }
8584
+ return runBenchmarks({
8585
+ providers: config.providers,
8586
+ tasks: config.tasks,
8587
+ scorers: scorerFns,
8588
+ runs,
8589
+ timeout: config.timeout,
8590
+ onResult: options?.onResult
8591
+ });
8592
+ }
8593
+ };
8594
+ }
8595
+
6844
8596
  // src/reporter/markdown.ts
6845
8597
  var COMMENT_MARKER = "<!-- duelist-ci-report -->";
6846
8598
  function markdownReporter(report, _current) {
@@ -6968,17 +8720,8 @@ function htmlReporter(results) {
6968
8720
  return { id, avg };
6969
8721
  }).filter((p5) => p5.avg !== void 0).sort((a7, b3) => a7.avg - b3.avg)[0];
6970
8722
  let overallWinner;
6971
- if (multi) {
6972
- const wins = /* @__PURE__ */ new Map();
6973
- for (const id of providers) wins.set(id, 0);
6974
- if (byCorrectness) wins.set(byCorrectness.id, (wins.get(byCorrectness.id) ?? 0) + 1);
6975
- if (byLatency && byLatency.avg !== Infinity) wins.set(byLatency.id, (wins.get(byLatency.id) ?? 0) + 1);
6976
- if (byCost?.avg !== void 0) wins.set(byCost.id, (wins.get(byCost.id) ?? 0) + 1);
6977
- const maxWins = Math.max(...wins.values());
6978
- if (maxWins > 0) {
6979
- const tops = [...wins.entries()].filter(([, w4]) => w4 === maxWins);
6980
- if (tops.length === 1) overallWinner = tops[0][0];
6981
- }
8723
+ if (multi && byCorrectness && byCorrectness.avg > 0) {
8724
+ overallWinner = byCorrectness.id;
6982
8725
  }
6983
8726
  const errorResults = results.filter((r3) => r3.error);
6984
8727
  const deduped = dedupeErrors(errorResults);
@@ -7499,7 +9242,7 @@ function renderErrors(errors) {
7499
9242
  </div>`;
7500
9243
  }).join("\n");
7501
9244
  return `<section class="errors-section">
7502
- <h2 class="errors-title" onclick="this.nextElementSibling.style.display=this.nextElementSibling.style.display==='none'?'block':'block'">Errors</h2>
9245
+ <h2 class="errors-title" onclick="this.nextElementSibling.style.display=this.nextElementSibling.style.display==='none'?'block':'none'">Errors</h2>
7503
9246
  <div class="errors-list">
7504
9247
  ${items}
7505
9248
  </div>
@@ -7843,6 +9586,168 @@ async function upsertPrComment(ctx, body, marker) {
7843
9586
  }
7844
9587
  }
7845
9588
 
9589
+ // src/packs/structured-output.ts
9590
+ import { z } from "zod";
9591
+ var structuredOutputPack = {
9592
+ name: "structured-output",
9593
+ label: "Structured Output",
9594
+ description: "Zod schema stress test \u2014 flat objects, nesting, arrays, enums, empty arrays, and adversarial input",
9595
+ tasks: [
9596
+ {
9597
+ name: "so:flat-entity",
9598
+ prompt: "Extract the person's details from this text: 'Maria Garcia, age 34, works as a software architect in Barcelona, Spain. Her employee ID is EMP-2847.' Return as JSON.",
9599
+ expected: {
9600
+ name: "Maria Garcia",
9601
+ age: 34,
9602
+ role: "software architect",
9603
+ city: "Barcelona",
9604
+ country: "Spain",
9605
+ employeeId: "EMP-2847"
9606
+ },
9607
+ schema: z.object({
9608
+ name: z.string(),
9609
+ age: z.number(),
9610
+ role: z.string(),
9611
+ city: z.string(),
9612
+ country: z.string(),
9613
+ employeeId: z.string()
9614
+ })
9615
+ },
9616
+ {
9617
+ name: "so:nested-object",
9618
+ prompt: "Parse this shipping label into structured JSON: 'Ship to: Acme Corp, Attn: John Lee, 4th Floor, 742 Evergreen Terrace, Springfield, IL 62704, USA. Order #ORD-9912, 3 items, 2.4kg, express shipping.' Use shippingMethod values: standard, express, or overnight. Return as JSON.",
9619
+ expected: {
9620
+ recipient: { company: "Acme Corp", contact: "John Lee", floor: "4th Floor" },
9621
+ address: { street: "742 Evergreen Terrace", city: "Springfield", state: "IL", zip: "62704", country: "USA" },
9622
+ order: { id: "ORD-9912", itemCount: 3, weightKg: 2.4, shippingMethod: "express" }
9623
+ },
9624
+ schema: z.object({
9625
+ recipient: z.object({ company: z.string(), contact: z.string(), floor: z.string() }),
9626
+ address: z.object({
9627
+ street: z.string(),
9628
+ city: z.string(),
9629
+ state: z.string(),
9630
+ zip: z.string(),
9631
+ country: z.string()
9632
+ }),
9633
+ order: z.object({
9634
+ id: z.string(),
9635
+ itemCount: z.number(),
9636
+ weightKg: z.number(),
9637
+ shippingMethod: z.enum(["standard", "express", "overnight"])
9638
+ })
9639
+ })
9640
+ },
9641
+ {
9642
+ name: "so:array-of-objects",
9643
+ prompt: "Extract all mentioned products with their prices and categories from this text: 'Our summer sale includes the UltraWidget Pro ($49.99, Electronics), ComfortMax Chair ($199.00, Furniture), and AquaPure Filter ($24.50, Home & Kitchen). The SmartLamp Mini is also available at $34.99 in the Electronics category.' Return as a JSON array.",
9644
+ expected: [
9645
+ { name: "UltraWidget Pro", price: 49.99, category: "Electronics" },
9646
+ { name: "ComfortMax Chair", price: 199, category: "Furniture" },
9647
+ { name: "AquaPure Filter", price: 24.5, category: "Home & Kitchen" },
9648
+ { name: "SmartLamp Mini", price: 34.99, category: "Electronics" }
9649
+ ],
9650
+ schema: z.array(z.object({ name: z.string(), price: z.number(), category: z.string() }))
9651
+ },
9652
+ {
9653
+ name: "so:empty-arrays",
9654
+ prompt: "Extract all error codes and their severity levels from this log message: 'System health check completed at 14:32 UTC. All services operational. No warnings or errors detected. Uptime: 99.97%.' Classify status as one of: healthy, degraded, or down. Return as JSON.",
9655
+ expected: { errors: [], warnings: [], status: "healthy", uptimePercent: 99.97 },
9656
+ schema: z.object({
9657
+ errors: z.array(z.object({ code: z.string(), severity: z.string() })),
9658
+ warnings: z.array(z.string()),
9659
+ status: z.enum(["healthy", "degraded", "down"]),
9660
+ uptimePercent: z.number()
9661
+ })
9662
+ },
9663
+ {
9664
+ name: "so:enum-classification",
9665
+ prompt: "Classify each of these support tickets by priority (low/medium/high/critical) and category (billing/technical/account/general). Use just the letter (A, B, C, D) as the id.\nTicket A: 'My account was charged twice for the same subscription.'\nTicket B: 'The API returns 500 errors intermittently.'\nTicket C: 'How do I update my display name?'\nTicket D: 'Production database is completely unresponsive, all services down.'\nReturn as a JSON array.",
9666
+ expected: [
9667
+ { id: "A", priority: "high", category: "billing" },
9668
+ { id: "B", priority: "high", category: "technical" },
9669
+ { id: "C", priority: "low", category: "account" },
9670
+ { id: "D", priority: "critical", category: "technical" }
9671
+ ],
9672
+ schema: z.array(
9673
+ z.object({
9674
+ id: z.string(),
9675
+ priority: z.enum(["low", "medium", "high", "critical"]),
9676
+ category: z.enum(["billing", "technical", "account", "general"])
9677
+ })
9678
+ )
9679
+ },
9680
+ {
9681
+ name: "so:adversarial-input",
9682
+ prompt: `Extract the actual product review data from this messy input. Ignore any JSON-like noise in the text.
9683
+
9684
+ User said: 'I bought the {product: "fake"} headphones for $59.99 and they're great! Rating: 5/5. The "noise-cancelling" feature works well even in {"noisy": true} environments. Would recommend to friend=true. Purchased on 01/15/2026.'
9685
+ Return as JSON. Use ISO 8601 date format (YYYY-MM-DD).`,
9686
+ expected: {
9687
+ product: "headphones",
9688
+ price: 59.99,
9689
+ rating: 5,
9690
+ maxRating: 5,
9691
+ features: ["noise-cancelling"],
9692
+ recommended: true,
9693
+ purchaseDate: "2026-01-15"
9694
+ },
9695
+ schema: z.object({
9696
+ product: z.string(),
9697
+ price: z.number(),
9698
+ rating: z.number(),
9699
+ maxRating: z.number(),
9700
+ features: z.array(z.string()),
9701
+ recommended: z.boolean(),
9702
+ purchaseDate: z.string()
9703
+ })
9704
+ }
9705
+ ],
9706
+ scorers: ["correctness", "schema-correctness", "latency", "cost"]
9707
+ };
9708
+
9709
+ // src/packs/index.ts
9710
+ var registry = /* @__PURE__ */ new Map();
9711
+ function register(pack) {
9712
+ registry.set(pack.name, pack);
9713
+ }
9714
+ register(structuredOutputPack);
9715
+ function loadPack(name) {
9716
+ const pack = registry.get(name);
9717
+ if (!pack) {
9718
+ const available = [...registry.keys()].join(", ");
9719
+ throw new Error(`Unknown pack "${name}". Available packs: ${available}`);
9720
+ }
9721
+ return pack;
9722
+ }
9723
+ function listPacks() {
9724
+ return [...registry.values()].map((p5) => ({
9725
+ name: p5.name,
9726
+ label: p5.label,
9727
+ description: p5.description,
9728
+ taskCount: p5.tasks.length
9729
+ }));
9730
+ }
9731
+
9732
+ // src/packs/loader.ts
9733
+ function buildPackConfig(config) {
9734
+ const packs = config.packs.map((name) => loadPack(name));
9735
+ const tasks = packs.flatMap((p5) => p5.tasks);
9736
+ const scorerSet = /* @__PURE__ */ new Set();
9737
+ for (const pack of packs) {
9738
+ for (const scorer of pack.scorers) {
9739
+ scorerSet.add(scorer);
9740
+ }
9741
+ }
9742
+ return {
9743
+ providers: config.providers,
9744
+ tasks,
9745
+ scorers: [...scorerSet],
9746
+ runs: config.runs ?? 1,
9747
+ timeout: config.timeout
9748
+ };
9749
+ }
9750
+
7846
9751
  // src/cli.ts
7847
9752
  var __dirname2 = dirname2(fileURLToPath(import.meta.url));
7848
9753
  var program = new Command();
@@ -7867,12 +9772,16 @@ program.command("init").description("Scaffold an arena.config.ts in the current
7867
9772
  console.log(" 1. export OPENAI_API_KEY=sk-...");
7868
9773
  console.log(" 2. npx duelist run");
7869
9774
  });
7870
- program.command("run").description("Run benchmarks defined in your arena config").option("-c, --config <path>", "Path to config file", "arena.config.ts").option("--reporter <type>", "Output format: console, json, or html", "console").option("--output <path>", "Output file path (used with html reporter)", "duelist-report.html").option("-q, --quiet", "Suppress per-result progress (show only final report)").action(async (opts) => {
9775
+ program.command("run").description("Run benchmarks defined in your arena config").option("-c, --config <path>", "Path to config file", "arena.config.ts").option("--pack <names>", 'Run built-in task pack(s), comma-separated. Use "list" to see available packs.').option("--reporter <type>", "Output format: console, json, or html", "console").option("--output <path>", "Output file path (used with html reporter)", "duelist-report.html").option("-q, --quiet", "Suppress per-result progress (show only final report)").action(async (opts) => {
9776
+ if (opts.pack === "list") {
9777
+ printPackList();
9778
+ return;
9779
+ }
7871
9780
  if (!["console", "json", "html"].includes(opts.reporter)) {
7872
9781
  console.error(`Unknown reporter "${opts.reporter}". Use "console", "json", or "html".`);
7873
9782
  process.exit(1);
7874
9783
  }
7875
- const typedArena = await loadArenaConfig(opts.config);
9784
+ const typedArena = opts.pack ? await loadArenaWithPacks(opts.pack, opts.config) : await loadArenaConfig(opts.config);
7876
9785
  try {
7877
9786
  const showProgress = opts.reporter !== "json" && !opts.quiet;
7878
9787
  const onResult = showProgress ? logResult : void 0;
@@ -7907,7 +9816,11 @@ function collectThreshold(value, previous) {
7907
9816
  previous.set(scorer, Number(delta));
7908
9817
  return previous;
7909
9818
  }
7910
- program.command("ci").description("Run benchmarks, compare against baseline, and enforce quality gates").option("-c, --config <path>", "Path to config file", "arena.config.ts").option("--baseline <path>", "Baseline JSON file", ".duelist/baseline.json").option("--budget <dollars>", "Max total cost in USD", parseFloat).option("--threshold <scorer=delta>", "Regression threshold (repeatable)", collectThreshold, /* @__PURE__ */ new Map()).option("--update-baseline", "Save results as new baseline after passing").option("--comment", "Post results as GitHub PR comment").option("-q, --quiet", "Suppress per-result progress").action(async (opts) => {
9819
+ program.command("ci").description("Run benchmarks, compare against baseline, and enforce quality gates").option("-c, --config <path>", "Path to config file", "arena.config.ts").option("--pack <names>", 'Run built-in task pack(s), comma-separated. Use "list" to see available packs.').option("--baseline <path>", "Baseline JSON file", ".duelist/baseline.json").option("--budget <dollars>", "Max total cost in USD", parseFloat).option("--threshold <scorer=delta>", "Regression threshold (repeatable)", collectThreshold, /* @__PURE__ */ new Map()).option("--update-baseline", "Save results as new baseline after passing").option("--comment", "Post results as GitHub PR comment").option("-q, --quiet", "Suppress per-result progress").action(async (opts) => {
9820
+ if (opts.pack === "list") {
9821
+ printPackList();
9822
+ return;
9823
+ }
7911
9824
  const ciOpts = {
7912
9825
  configPath: opts.config,
7913
9826
  baselinePath: resolve(opts.baseline),
@@ -7917,7 +9830,7 @@ program.command("ci").description("Run benchmarks, compare against baseline, and
7917
9830
  comment: opts.comment ?? false,
7918
9831
  quiet: opts.quiet ?? false
7919
9832
  };
7920
- const typedArena = await loadArenaConfig(ciOpts.configPath);
9833
+ const typedArena = opts.pack ? await loadArenaWithPacks(opts.pack, ciOpts.configPath) : await loadArenaConfig(ciOpts.configPath);
7921
9834
  console.log("Running benchmarks...");
7922
9835
  const onResult = ciOpts.quiet ? void 0 : logResult;
7923
9836
  let results;
@@ -7974,6 +9887,35 @@ program.command("ci").description("Run benchmarks, compare against baseline, and
7974
9887
  process.exit(report.failed ? 1 : 0);
7975
9888
  });
7976
9889
  program.parse();
9890
+ function printPackList() {
9891
+ const packs = listPacks();
9892
+ if (packs.length === 0) {
9893
+ console.log("No packs available.");
9894
+ return;
9895
+ }
9896
+ console.log("Available task packs:\n");
9897
+ for (const p5 of packs) {
9898
+ console.log(` ${p5.name.padEnd(24)} ${p5.description} (${p5.taskCount} tasks)`);
9899
+ }
9900
+ console.log("\nUsage: npx duelist run --pack <name>");
9901
+ }
9902
+ async function loadArenaWithPacks(packNames, configOpt) {
9903
+ const configPath = resolve(configOpt);
9904
+ if (!existsSync(configPath)) {
9905
+ console.error("No arena.config.ts found. Create one with `npx duelist init` to configure");
9906
+ console.error("your providers, then re-run with --pack.");
9907
+ process.exit(1);
9908
+ }
9909
+ const userArena = await loadArenaConfig(configOpt);
9910
+ const packs = packNames.split(",").map((s5) => s5.trim());
9911
+ const packConfig = buildPackConfig({
9912
+ packs,
9913
+ providers: userArena.config.providers,
9914
+ runs: userArena.config.runs,
9915
+ timeout: userArena.config.timeout
9916
+ });
9917
+ return defineArena(packConfig);
9918
+ }
7977
9919
  async function loadArenaConfig(configOpt) {
7978
9920
  const configPath = resolve(configOpt);
7979
9921
  if (!existsSync(configPath)) {