@artemiskit/core 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -10896,6 +10896,55 @@ var require_public_api = __commonJS((exports) => {
10896
10896
  exports.stringify = stringify;
10897
10897
  });
10898
10898
 
10899
+ // ../../node_modules/.bun/yaml@2.8.2/node_modules/yaml/dist/index.js
10900
+ var require_dist = __commonJS((exports) => {
10901
+ var composer = require_composer();
10902
+ var Document = require_Document();
10903
+ var Schema = require_Schema();
10904
+ var errors2 = require_errors();
10905
+ var Alias = require_Alias();
10906
+ var identity = require_identity();
10907
+ var Pair = require_Pair();
10908
+ var Scalar = require_Scalar();
10909
+ var YAMLMap = require_YAMLMap();
10910
+ var YAMLSeq = require_YAMLSeq();
10911
+ var cst = require_cst();
10912
+ var lexer = require_lexer();
10913
+ var lineCounter = require_line_counter();
10914
+ var parser = require_parser();
10915
+ var publicApi = require_public_api();
10916
+ var visit = require_visit();
10917
+ exports.Composer = composer.Composer;
10918
+ exports.Document = Document.Document;
10919
+ exports.Schema = Schema.Schema;
10920
+ exports.YAMLError = errors2.YAMLError;
10921
+ exports.YAMLParseError = errors2.YAMLParseError;
10922
+ exports.YAMLWarning = errors2.YAMLWarning;
10923
+ exports.Alias = Alias.Alias;
10924
+ exports.isAlias = identity.isAlias;
10925
+ exports.isCollection = identity.isCollection;
10926
+ exports.isDocument = identity.isDocument;
10927
+ exports.isMap = identity.isMap;
10928
+ exports.isNode = identity.isNode;
10929
+ exports.isPair = identity.isPair;
10930
+ exports.isScalar = identity.isScalar;
10931
+ exports.isSeq = identity.isSeq;
10932
+ exports.Pair = Pair.Pair;
10933
+ exports.Scalar = Scalar.Scalar;
10934
+ exports.YAMLMap = YAMLMap.YAMLMap;
10935
+ exports.YAMLSeq = YAMLSeq.YAMLSeq;
10936
+ exports.CST = cst;
10937
+ exports.Lexer = lexer.Lexer;
10938
+ exports.LineCounter = lineCounter.LineCounter;
10939
+ exports.Parser = parser.Parser;
10940
+ exports.parse = publicApi.parse;
10941
+ exports.parseAllDocuments = publicApi.parseAllDocuments;
10942
+ exports.parseDocument = publicApi.parseDocument;
10943
+ exports.stringify = publicApi.stringify;
10944
+ exports.visit = visit.visit;
10945
+ exports.visitAsync = visit.visitAsync;
10946
+ });
10947
+
10899
10948
  // src/evaluators/combined.ts
10900
10949
  async function getEvaluatorForType(type) {
10901
10950
  const { getEvaluator } = await Promise.resolve().then(() => (init_evaluators(), exports_evaluators));
@@ -13616,55 +13665,8 @@ var ScenarioSchema = exports_external.object({
13616
13665
  }).optional()
13617
13666
  });
13618
13667
  // src/scenario/parser.ts
13668
+ var import_yaml = __toESM(require_dist(), 1);
13619
13669
  import { readFile } from "fs/promises";
13620
-
13621
- // ../../node_modules/.bun/yaml@2.8.2/node_modules/yaml/dist/index.js
13622
- var composer = require_composer();
13623
- var Document = require_Document();
13624
- var Schema = require_Schema();
13625
- var errors2 = require_errors();
13626
- var Alias = require_Alias();
13627
- var identity = require_identity();
13628
- var Pair = require_Pair();
13629
- var Scalar = require_Scalar();
13630
- var YAMLMap = require_YAMLMap();
13631
- var YAMLSeq = require_YAMLSeq();
13632
- var cst = require_cst();
13633
- var lexer = require_lexer();
13634
- var lineCounter = require_line_counter();
13635
- var parser = require_parser();
13636
- var publicApi = require_public_api();
13637
- var visit = require_visit();
13638
- var $Composer = composer.Composer;
13639
- var $Document = Document.Document;
13640
- var $Schema = Schema.Schema;
13641
- var $YAMLError = errors2.YAMLError;
13642
- var $YAMLParseError = errors2.YAMLParseError;
13643
- var $YAMLWarning = errors2.YAMLWarning;
13644
- var $Alias = Alias.Alias;
13645
- var $isAlias = identity.isAlias;
13646
- var $isCollection = identity.isCollection;
13647
- var $isDocument = identity.isDocument;
13648
- var $isMap = identity.isMap;
13649
- var $isNode = identity.isNode;
13650
- var $isPair = identity.isPair;
13651
- var $isScalar = identity.isScalar;
13652
- var $isSeq = identity.isSeq;
13653
- var $Pair = Pair.Pair;
13654
- var $Scalar = Scalar.Scalar;
13655
- var $YAMLMap = YAMLMap.YAMLMap;
13656
- var $YAMLSeq = YAMLSeq.YAMLSeq;
13657
- var $Lexer = lexer.Lexer;
13658
- var $LineCounter = lineCounter.LineCounter;
13659
- var $Parser = parser.Parser;
13660
- var $parse = publicApi.parse;
13661
- var $parseAllDocuments = publicApi.parseAllDocuments;
13662
- var $parseDocument = publicApi.parseDocument;
13663
- var $stringify = publicApi.stringify;
13664
- var $visit = visit.visit;
13665
- var $visitAsync = visit.visitAsync;
13666
-
13667
- // src/scenario/parser.ts
13668
13670
  function expandEnvVars(obj) {
13669
13671
  if (typeof obj === "string") {
13670
13672
  return obj.replace(/\$\{([^}]+)\}/g, (_, expr) => {
@@ -13699,7 +13701,7 @@ async function parseScenarioFile(filePath) {
13699
13701
  }
13700
13702
  function parseScenarioString(content, source) {
13701
13703
  try {
13702
- const raw = $parse(content);
13704
+ const raw = import_yaml.parse(content);
13703
13705
  const expanded = expandEnvVars(raw);
13704
13706
  const result = ScenarioSchema.safeParse(expanded);
13705
13707
  if (!result.success) {
@@ -14238,54 +14240,367 @@ function nanoid(size = 21) {
14238
14240
  return id;
14239
14241
  }
14240
14242
 
14241
- // src/provenance/environment.ts
14242
- function getEnvironmentInfo() {
14243
- return {
14244
- node_version: process.version,
14245
- platform: process.platform,
14246
- arch: process.arch
14247
- };
14248
- }
14249
-
14250
- // src/provenance/git.ts
14251
- import { execSync } from "child_process";
14252
- function getGitInfo() {
14253
- try {
14254
- const commit = execGit("rev-parse HEAD");
14255
- const branch = execGit("rev-parse --abbrev-ref HEAD");
14256
- const dirty = execGit("status --porcelain").length > 0;
14257
- const remote = execGit("remote get-url origin", true);
14258
- return {
14259
- commit,
14260
- branch,
14261
- dirty,
14262
- remote: remote || undefined
14263
- };
14264
- } catch {
14265
- return {
14266
- commit: "unknown",
14267
- branch: "unknown",
14268
- dirty: false
14269
- };
14243
+ // src/cost/pricing.ts
14244
+ var MODEL_PRICING = {
14245
+ "gpt-5": {
14246
+ promptPer1K: 0.00125,
14247
+ completionPer1K: 0.01,
14248
+ lastUpdated: "2026-01",
14249
+ notes: "400K context window"
14250
+ },
14251
+ "gpt-5.1": {
14252
+ promptPer1K: 0.00125,
14253
+ completionPer1K: 0.01,
14254
+ lastUpdated: "2026-01"
14255
+ },
14256
+ "gpt-5.2": {
14257
+ promptPer1K: 0.00175,
14258
+ completionPer1K: 0.014,
14259
+ lastUpdated: "2026-01"
14260
+ },
14261
+ "gpt-5-mini": {
14262
+ promptPer1K: 0.00025,
14263
+ completionPer1K: 0.002,
14264
+ lastUpdated: "2026-01"
14265
+ },
14266
+ "gpt-5-nano": {
14267
+ promptPer1K: 0.00005,
14268
+ completionPer1K: 0.0004,
14269
+ lastUpdated: "2026-01"
14270
+ },
14271
+ "gpt-4.1": {
14272
+ promptPer1K: 0.002,
14273
+ completionPer1K: 0.008,
14274
+ lastUpdated: "2026-01",
14275
+ notes: "1M context window"
14276
+ },
14277
+ "gpt-4.1-mini": {
14278
+ promptPer1K: 0.0004,
14279
+ completionPer1K: 0.0016,
14280
+ lastUpdated: "2026-01"
14281
+ },
14282
+ "gpt-4.1-nano": {
14283
+ promptPer1K: 0.0001,
14284
+ completionPer1K: 0.0004,
14285
+ lastUpdated: "2026-01"
14286
+ },
14287
+ "gpt-4o": {
14288
+ promptPer1K: 0.0025,
14289
+ completionPer1K: 0.01,
14290
+ lastUpdated: "2026-01",
14291
+ notes: "128K context window"
14292
+ },
14293
+ "gpt-4o-mini": {
14294
+ promptPer1K: 0.00015,
14295
+ completionPer1K: 0.0006,
14296
+ lastUpdated: "2026-01",
14297
+ notes: "128K context window"
14298
+ },
14299
+ o1: {
14300
+ promptPer1K: 0.015,
14301
+ completionPer1K: 0.06,
14302
+ lastUpdated: "2026-01",
14303
+ notes: "Reasoning model - internal thinking tokens billed as output"
14304
+ },
14305
+ o3: {
14306
+ promptPer1K: 0.002,
14307
+ completionPer1K: 0.008,
14308
+ lastUpdated: "2026-01"
14309
+ },
14310
+ "o3-mini": {
14311
+ promptPer1K: 0.0011,
14312
+ completionPer1K: 0.0044,
14313
+ lastUpdated: "2026-01"
14314
+ },
14315
+ "o4-mini": {
14316
+ promptPer1K: 0.0011,
14317
+ completionPer1K: 0.0044,
14318
+ lastUpdated: "2026-01"
14319
+ },
14320
+ "gpt-4-turbo": {
14321
+ promptPer1K: 0.01,
14322
+ completionPer1K: 0.03,
14323
+ lastUpdated: "2026-01"
14324
+ },
14325
+ "gpt-4": {
14326
+ promptPer1K: 0.03,
14327
+ completionPer1K: 0.06,
14328
+ lastUpdated: "2026-01"
14329
+ },
14330
+ "gpt-3.5-turbo": {
14331
+ promptPer1K: 0.0005,
14332
+ completionPer1K: 0.0015,
14333
+ lastUpdated: "2026-01"
14334
+ },
14335
+ "claude-opus-4.5": {
14336
+ promptPer1K: 0.005,
14337
+ completionPer1K: 0.025,
14338
+ lastUpdated: "2026-01",
14339
+ notes: "Most capable Claude model"
14340
+ },
14341
+ "claude-sonnet-4.5": {
14342
+ promptPer1K: 0.003,
14343
+ completionPer1K: 0.015,
14344
+ lastUpdated: "2026-01",
14345
+ notes: "Balanced performance and cost"
14346
+ },
14347
+ "claude-haiku-4.5": {
14348
+ promptPer1K: 0.001,
14349
+ completionPer1K: 0.005,
14350
+ lastUpdated: "2026-01",
14351
+ notes: "Fastest Claude model"
14352
+ },
14353
+ "claude-opus-4": {
14354
+ promptPer1K: 0.015,
14355
+ completionPer1K: 0.075,
14356
+ lastUpdated: "2026-01"
14357
+ },
14358
+ "claude-opus-4.1": {
14359
+ promptPer1K: 0.015,
14360
+ completionPer1K: 0.075,
14361
+ lastUpdated: "2026-01"
14362
+ },
14363
+ "claude-sonnet-4": {
14364
+ promptPer1K: 0.003,
14365
+ completionPer1K: 0.015,
14366
+ lastUpdated: "2026-01"
14367
+ },
14368
+ "claude-sonnet-3.7": {
14369
+ promptPer1K: 0.003,
14370
+ completionPer1K: 0.015,
14371
+ lastUpdated: "2026-01"
14372
+ },
14373
+ "claude-3-7-sonnet": {
14374
+ promptPer1K: 0.003,
14375
+ completionPer1K: 0.015,
14376
+ lastUpdated: "2026-01"
14377
+ },
14378
+ "claude-3-5-sonnet-20241022": {
14379
+ promptPer1K: 0.003,
14380
+ completionPer1K: 0.015,
14381
+ lastUpdated: "2026-01"
14382
+ },
14383
+ "claude-3-5-haiku-20241022": {
14384
+ promptPer1K: 0.0008,
14385
+ completionPer1K: 0.004,
14386
+ lastUpdated: "2026-01"
14387
+ },
14388
+ "claude-haiku-3.5": {
14389
+ promptPer1K: 0.0008,
14390
+ completionPer1K: 0.004,
14391
+ lastUpdated: "2026-01"
14392
+ },
14393
+ "claude-3-opus": {
14394
+ promptPer1K: 0.015,
14395
+ completionPer1K: 0.075,
14396
+ lastUpdated: "2026-01"
14397
+ },
14398
+ "claude-3-sonnet": {
14399
+ promptPer1K: 0.003,
14400
+ completionPer1K: 0.015,
14401
+ lastUpdated: "2026-01"
14402
+ },
14403
+ "claude-3-haiku": {
14404
+ promptPer1K: 0.00025,
14405
+ completionPer1K: 0.00125,
14406
+ lastUpdated: "2026-01"
14407
+ },
14408
+ "claude-3.5-sonnet": {
14409
+ promptPer1K: 0.003,
14410
+ completionPer1K: 0.015,
14411
+ lastUpdated: "2026-01"
14412
+ },
14413
+ "claude-3.5-haiku": {
14414
+ promptPer1K: 0.0008,
14415
+ completionPer1K: 0.004,
14416
+ lastUpdated: "2026-01"
14270
14417
  }
14271
- }
14272
- function execGit(command, allowFailure = false) {
14273
- try {
14274
- return execSync(`git ${command}`, {
14275
- encoding: "utf-8",
14276
- stdio: ["pipe", "pipe", "pipe"]
14277
- }).trim();
14278
- } catch {
14279
- if (allowFailure) {
14280
- return "";
14418
+ };
14419
+ var DEFAULT_PRICING = {
14420
+ promptPer1K: 0.003,
14421
+ completionPer1K: 0.015,
14422
+ lastUpdated: "2026-01",
14423
+ notes: "Default pricing - verify with provider"
14424
+ };
14425
+ function getModelPricing(model) {
14426
+ if (MODEL_PRICING[model]) {
14427
+ return MODEL_PRICING[model];
14428
+ }
14429
+ const lowerModel = model.toLowerCase();
14430
+ for (const [key, pricing] of Object.entries(MODEL_PRICING)) {
14431
+ if (key.toLowerCase() === lowerModel) {
14432
+ return pricing;
14281
14433
  }
14282
- throw new Error(`Git command failed: ${command}`);
14283
14434
  }
14284
- }
14285
-
14286
- // src/artifacts/manifest.ts
14287
- function createRunManifest(options) {
14288
- const {
14435
+ if (lowerModel.includes("gpt-5.2")) {
14436
+ return MODEL_PRICING["gpt-5.2"];
14437
+ }
14438
+ if (lowerModel.includes("gpt-5.1")) {
14439
+ return MODEL_PRICING["gpt-5.1"];
14440
+ }
14441
+ if (lowerModel.includes("gpt-5-mini")) {
14442
+ return MODEL_PRICING["gpt-5-mini"];
14443
+ }
14444
+ if (lowerModel.includes("gpt-5-nano")) {
14445
+ return MODEL_PRICING["gpt-5-nano"];
14446
+ }
14447
+ if (lowerModel.includes("gpt-5")) {
14448
+ return MODEL_PRICING["gpt-5"];
14449
+ }
14450
+ if (lowerModel.includes("gpt-4.1-mini")) {
14451
+ return MODEL_PRICING["gpt-4.1-mini"];
14452
+ }
14453
+ if (lowerModel.includes("gpt-4.1-nano")) {
14454
+ return MODEL_PRICING["gpt-4.1-nano"];
14455
+ }
14456
+ if (lowerModel.includes("gpt-4.1")) {
14457
+ return MODEL_PRICING["gpt-4.1"];
14458
+ }
14459
+ if (lowerModel.includes("gpt-4o-mini")) {
14460
+ return MODEL_PRICING["gpt-4o-mini"];
14461
+ }
14462
+ if (lowerModel.includes("gpt-4o")) {
14463
+ return MODEL_PRICING["gpt-4o"];
14464
+ }
14465
+ if (lowerModel.includes("o4-mini")) {
14466
+ return MODEL_PRICING["o4-mini"];
14467
+ }
14468
+ if (lowerModel.includes("o3-mini")) {
14469
+ return MODEL_PRICING["o3-mini"];
14470
+ }
14471
+ if (lowerModel.includes("o3")) {
14472
+ return MODEL_PRICING.o3;
14473
+ }
14474
+ if (lowerModel.includes("o1")) {
14475
+ return MODEL_PRICING.o1;
14476
+ }
14477
+ if (lowerModel.includes("gpt-4-turbo")) {
14478
+ return MODEL_PRICING["gpt-4-turbo"];
14479
+ }
14480
+ if (lowerModel.includes("gpt-4")) {
14481
+ return MODEL_PRICING["gpt-4"];
14482
+ }
14483
+ if (lowerModel.includes("gpt-3.5")) {
14484
+ return MODEL_PRICING["gpt-3.5-turbo"];
14485
+ }
14486
+ if (lowerModel.includes("opus-4.5") || lowerModel.includes("opus-4-5")) {
14487
+ return MODEL_PRICING["claude-opus-4.5"];
14488
+ }
14489
+ if (lowerModel.includes("sonnet-4.5") || lowerModel.includes("sonnet-4-5")) {
14490
+ return MODEL_PRICING["claude-sonnet-4.5"];
14491
+ }
14492
+ if (lowerModel.includes("haiku-4.5") || lowerModel.includes("haiku-4-5")) {
14493
+ return MODEL_PRICING["claude-haiku-4.5"];
14494
+ }
14495
+ if (lowerModel.includes("opus-4.1") || lowerModel.includes("opus-4-1")) {
14496
+ return MODEL_PRICING["claude-opus-4.1"];
14497
+ }
14498
+ if (lowerModel.includes("opus-4")) {
14499
+ return MODEL_PRICING["claude-opus-4"];
14500
+ }
14501
+ if (lowerModel.includes("sonnet-4")) {
14502
+ return MODEL_PRICING["claude-sonnet-4"];
14503
+ }
14504
+ if (lowerModel.includes("sonnet-3.7") || lowerModel.includes("sonnet-3-7")) {
14505
+ return MODEL_PRICING["claude-sonnet-3.7"];
14506
+ }
14507
+ if (lowerModel.includes("claude-3-5-sonnet") || lowerModel.includes("claude-3.5-sonnet")) {
14508
+ return MODEL_PRICING["claude-3.5-sonnet"];
14509
+ }
14510
+ if (lowerModel.includes("claude-3-5-haiku") || lowerModel.includes("claude-3.5-haiku")) {
14511
+ return MODEL_PRICING["claude-3.5-haiku"];
14512
+ }
14513
+ if (lowerModel.includes("claude-3-opus")) {
14514
+ return MODEL_PRICING["claude-3-opus"];
14515
+ }
14516
+ if (lowerModel.includes("claude-3-sonnet")) {
14517
+ return MODEL_PRICING["claude-3-sonnet"];
14518
+ }
14519
+ if (lowerModel.includes("claude-3-haiku")) {
14520
+ return MODEL_PRICING["claude-3-haiku"];
14521
+ }
14522
+ if (lowerModel.includes("claude")) {
14523
+ return MODEL_PRICING["claude-sonnet-4.5"];
14524
+ }
14525
+ return DEFAULT_PRICING;
14526
+ }
14527
+ function estimateCost(promptTokens, completionTokens, model) {
14528
+ const pricing = getModelPricing(model);
14529
+ const promptCostUsd = promptTokens / 1000 * pricing.promptPer1K;
14530
+ const completionCostUsd = completionTokens / 1000 * pricing.completionPer1K;
14531
+ const totalUsd = promptCostUsd + completionCostUsd;
14532
+ return {
14533
+ totalUsd,
14534
+ promptCostUsd,
14535
+ completionCostUsd,
14536
+ model,
14537
+ pricing
14538
+ };
14539
+ }
14540
+ function formatCost(costUsd) {
14541
+ if (costUsd < 0.01) {
14542
+ return `$${(costUsd * 100).toFixed(4)} cents`;
14543
+ }
14544
+ if (costUsd < 1) {
14545
+ return `$${costUsd.toFixed(4)}`;
14546
+ }
14547
+ return `$${costUsd.toFixed(2)}`;
14548
+ }
14549
+ function listKnownModels() {
14550
+ return Object.entries(MODEL_PRICING).map(([model, pricing]) => ({
14551
+ model,
14552
+ pricing
14553
+ }));
14554
+ }
14555
+
14556
+ // src/provenance/environment.ts
14557
+ function getEnvironmentInfo() {
14558
+ return {
14559
+ node_version: process.version,
14560
+ platform: process.platform,
14561
+ arch: process.arch
14562
+ };
14563
+ }
14564
+
14565
+ // src/provenance/git.ts
14566
+ import { execSync } from "child_process";
14567
+ function getGitInfo() {
14568
+ try {
14569
+ const commit = execGit("rev-parse HEAD");
14570
+ const branch = execGit("rev-parse --abbrev-ref HEAD");
14571
+ const dirty = execGit("status --porcelain").length > 0;
14572
+ const remote = execGit("remote get-url origin", true);
14573
+ return {
14574
+ commit,
14575
+ branch,
14576
+ dirty,
14577
+ remote: remote || undefined
14578
+ };
14579
+ } catch {
14580
+ return {
14581
+ commit: "unknown",
14582
+ branch: "unknown",
14583
+ dirty: false
14584
+ };
14585
+ }
14586
+ }
14587
+ function execGit(command, allowFailure = false) {
14588
+ try {
14589
+ return execSync(`git ${command}`, {
14590
+ encoding: "utf-8",
14591
+ stdio: ["pipe", "pipe", "pipe"]
14592
+ }).trim();
14593
+ } catch {
14594
+ if (allowFailure) {
14595
+ return "";
14596
+ }
14597
+ throw new Error(`Git command failed: ${command}`);
14598
+ }
14599
+ }
14600
+
14601
+ // src/artifacts/manifest.ts
14602
+ function createRunManifest(options) {
14603
+ const {
14289
14604
  project,
14290
14605
  config,
14291
14606
  resolvedConfig,
@@ -14296,7 +14611,8 @@ function createRunManifest(options) {
14296
14611
  runReason,
14297
14612
  redaction
14298
14613
  } = options;
14299
- const metrics = calculateMetrics(cases);
14614
+ const modelForCost = resolvedConfig?.model || config.model;
14615
+ const metrics = calculateMetrics(cases, modelForCost);
14300
14616
  const git = getGitInfo();
14301
14617
  const environment = getEnvironmentInfo();
14302
14618
  return {
@@ -14320,7 +14636,7 @@ function createRunManifest(options) {
14320
14636
  redaction
14321
14637
  };
14322
14638
  }
14323
- function calculateMetrics(cases) {
14639
+ function calculateMetrics(cases, model) {
14324
14640
  const passedCases = cases.filter((c) => c.ok);
14325
14641
  const latencies = cases.map((c) => c.latencyMs).sort((a, b) => a - b);
14326
14642
  const medianLatency = latencies.length > 0 ? latencies[Math.floor(latencies.length / 2)] : 0;
@@ -14328,6 +14644,21 @@ function calculateMetrics(cases) {
14328
14644
  const p95Latency = latencies.length > 0 ? latencies[p95Index] : 0;
14329
14645
  const totalPromptTokens = cases.reduce((sum, c) => sum + c.tokens.prompt, 0);
14330
14646
  const totalCompletionTokens = cases.reduce((sum, c) => sum + c.tokens.completion, 0);
14647
+ let cost;
14648
+ if (model && (totalPromptTokens > 0 || totalCompletionTokens > 0)) {
14649
+ const costEstimate = estimateCost(totalPromptTokens, totalCompletionTokens, model);
14650
+ const pricing = getModelPricing(model);
14651
+ cost = {
14652
+ total_usd: costEstimate.totalUsd,
14653
+ prompt_cost_usd: costEstimate.promptCostUsd,
14654
+ completion_cost_usd: costEstimate.completionCostUsd,
14655
+ model: costEstimate.model,
14656
+ pricing: {
14657
+ prompt_per_1k: pricing.promptPer1K,
14658
+ completion_per_1k: pricing.completionPer1K
14659
+ }
14660
+ };
14661
+ }
14331
14662
  return {
14332
14663
  success_rate: cases.length > 0 ? passedCases.length / cases.length : 0,
14333
14664
  total_cases: cases.length,
@@ -14337,7 +14668,8 @@ function calculateMetrics(cases) {
14337
14668
  p95_latency_ms: p95Latency,
14338
14669
  total_tokens: totalPromptTokens + totalCompletionTokens,
14339
14670
  total_prompt_tokens: totalPromptTokens,
14340
- total_completion_tokens: totalCompletionTokens
14671
+ total_completion_tokens: totalCompletionTokens,
14672
+ cost
14341
14673
  };
14342
14674
  }
14343
14675
  function detectCIEnvironment() {
@@ -14508,6 +14840,16 @@ function getSuccessRate(manifest) {
14508
14840
  }
14509
14841
  return manifest.metrics.success_rate;
14510
14842
  }
14843
+ function getEstimatedCost(manifest) {
14844
+ const type = getManifestType(manifest);
14845
+ if (type === "stress") {
14846
+ return manifest.metrics.cost?.estimated_total_usd;
14847
+ }
14848
+ if (type === "run") {
14849
+ return manifest.metrics.cost?.total_usd;
14850
+ }
14851
+ return;
14852
+ }
14511
14853
  function getScenario(manifest) {
14512
14854
  return manifest.config.scenario;
14513
14855
  }
@@ -14577,13 +14919,17 @@ class LocalStorageAdapter {
14577
14919
  if (options?.scenario && getScenario(manifest) !== options.scenario) {
14578
14920
  continue;
14579
14921
  }
14580
- results.push({
14922
+ const item = {
14581
14923
  runId: manifest.run_id,
14582
14924
  scenario: getScenario(manifest),
14583
14925
  successRate: getSuccessRate(manifest),
14584
14926
  createdAt: manifest.start_time,
14585
14927
  type: manifestType
14586
- });
14928
+ };
14929
+ if (options?.includeCost) {
14930
+ item.estimatedCostUsd = getEstimatedCost(manifest);
14931
+ }
14932
+ results.push(item);
14587
14933
  } catch {}
14588
14934
  }
14589
14935
  }
@@ -16585,7 +16931,7 @@ class RealtimeChannel {
16585
16931
  }).map((bind) => {
16586
16932
  if (typeof handledPayload === "object" && "ids" in handledPayload) {
16587
16933
  const postgresChanges = handledPayload.data;
16588
- const { schema: schema2, table, commit_timestamp, type: type2, errors: errors3 } = postgresChanges;
16934
+ const { schema: schema2, table, commit_timestamp, type: type2, errors: errors2 } = postgresChanges;
16589
16935
  const enrichedPayload = {
16590
16936
  schema: schema2,
16591
16937
  table,
@@ -16593,7 +16939,7 @@ class RealtimeChannel {
16593
16939
  eventType: type2,
16594
16940
  new: {},
16595
16941
  old: {},
16596
- errors: errors3
16942
+ errors: errors2
16597
16943
  };
16598
16944
  handledPayload = Object.assign(Object.assign({}, enrichedPayload), this._getPayloadRecords(postgresChanges));
16599
16945
  }
@@ -22164,7 +22510,7 @@ class GoTrueClient {
22164
22510
  }
22165
22511
  });
22166
22512
  }
22167
- async unlinkIdentity(identity2) {
22513
+ async unlinkIdentity(identity) {
22168
22514
  try {
22169
22515
  return await this._useSession(async (result) => {
22170
22516
  var _a, _b;
@@ -22172,7 +22518,7 @@ class GoTrueClient {
22172
22518
  if (error) {
22173
22519
  throw error;
22174
22520
  }
22175
- return await _request(this.fetch, "DELETE", `${this.url}/user/identities/${identity2.identity_id}`, {
22521
+ return await _request(this.fetch, "DELETE", `${this.url}/user/identities/${identity.identity_id}`, {
22176
22522
  headers: this.headers,
22177
22523
  jwt: (_b = (_a = data.session) === null || _a === undefined ? undefined : _a.access_token) !== null && _b !== undefined ? _b : undefined
22178
22524
  });
@@ -22346,20 +22692,20 @@ class GoTrueClient {
22346
22692
  if (this.broadcastChannel && broadcast) {
22347
22693
  this.broadcastChannel.postMessage({ event, session });
22348
22694
  }
22349
- const errors3 = [];
22695
+ const errors2 = [];
22350
22696
  const promises = Array.from(this.stateChangeEmitters.values()).map(async (x) => {
22351
22697
  try {
22352
22698
  await x.callback(event, session);
22353
22699
  } catch (e) {
22354
- errors3.push(e);
22700
+ errors2.push(e);
22355
22701
  }
22356
22702
  });
22357
22703
  await Promise.all(promises);
22358
- if (errors3.length > 0) {
22359
- for (let i = 0;i < errors3.length; i += 1) {
22360
- console.error(errors3[i]);
22704
+ if (errors2.length > 0) {
22705
+ for (let i = 0;i < errors2.length; i += 1) {
22706
+ console.error(errors2[i]);
22361
22707
  }
22362
- throw errors3[0];
22708
+ throw errors2[0];
22363
22709
  }
22364
22710
  } finally {
22365
22711
  this._debug(debugName, "end");
@@ -24407,317 +24753,250 @@ class Logger {
24407
24753
  }
24408
24754
  }
24409
24755
  var logger = new Logger("artemis");
24410
- // src/cost/pricing.ts
24411
- var MODEL_PRICING = {
24412
- "gpt-5": {
24413
- promptPer1K: 0.00125,
24414
- completionPer1K: 0.01,
24415
- lastUpdated: "2026-01",
24416
- notes: "400K context window"
24417
- },
24418
- "gpt-5.1": {
24419
- promptPer1K: 0.00125,
24420
- completionPer1K: 0.01,
24421
- lastUpdated: "2026-01"
24422
- },
24423
- "gpt-5.2": {
24424
- promptPer1K: 0.00175,
24425
- completionPer1K: 0.014,
24426
- lastUpdated: "2026-01"
24427
- },
24428
- "gpt-5-mini": {
24429
- promptPer1K: 0.00025,
24430
- completionPer1K: 0.002,
24431
- lastUpdated: "2026-01"
24432
- },
24433
- "gpt-5-nano": {
24434
- promptPer1K: 0.00005,
24435
- completionPer1K: 0.0004,
24436
- lastUpdated: "2026-01"
24437
- },
24438
- "gpt-4.1": {
24439
- promptPer1K: 0.002,
24440
- completionPer1K: 0.008,
24441
- lastUpdated: "2026-01",
24442
- notes: "1M context window"
24443
- },
24444
- "gpt-4.1-mini": {
24445
- promptPer1K: 0.0004,
24446
- completionPer1K: 0.0016,
24447
- lastUpdated: "2026-01"
24448
- },
24449
- "gpt-4.1-nano": {
24450
- promptPer1K: 0.0001,
24451
- completionPer1K: 0.0004,
24452
- lastUpdated: "2026-01"
24453
- },
24454
- "gpt-4o": {
24455
- promptPer1K: 0.0025,
24456
- completionPer1K: 0.01,
24457
- lastUpdated: "2026-01",
24458
- notes: "128K context window"
24459
- },
24460
- "gpt-4o-mini": {
24461
- promptPer1K: 0.00015,
24462
- completionPer1K: 0.0006,
24463
- lastUpdated: "2026-01",
24464
- notes: "128K context window"
24465
- },
24466
- o1: {
24467
- promptPer1K: 0.015,
24468
- completionPer1K: 0.06,
24469
- lastUpdated: "2026-01",
24470
- notes: "Reasoning model - internal thinking tokens billed as output"
24471
- },
24472
- o3: {
24473
- promptPer1K: 0.002,
24474
- completionPer1K: 0.008,
24475
- lastUpdated: "2026-01"
24476
- },
24477
- "o3-mini": {
24478
- promptPer1K: 0.0011,
24479
- completionPer1K: 0.0044,
24480
- lastUpdated: "2026-01"
24481
- },
24482
- "o4-mini": {
24483
- promptPer1K: 0.0011,
24484
- completionPer1K: 0.0044,
24485
- lastUpdated: "2026-01"
24486
- },
24487
- "gpt-4-turbo": {
24488
- promptPer1K: 0.01,
24489
- completionPer1K: 0.03,
24490
- lastUpdated: "2026-01"
24491
- },
24492
- "gpt-4": {
24493
- promptPer1K: 0.03,
24494
- completionPer1K: 0.06,
24495
- lastUpdated: "2026-01"
24496
- },
24497
- "gpt-3.5-turbo": {
24498
- promptPer1K: 0.0005,
24499
- completionPer1K: 0.0015,
24500
- lastUpdated: "2026-01"
24501
- },
24502
- "claude-opus-4.5": {
24503
- promptPer1K: 0.005,
24504
- completionPer1K: 0.025,
24505
- lastUpdated: "2026-01",
24506
- notes: "Most capable Claude model"
24507
- },
24508
- "claude-sonnet-4.5": {
24509
- promptPer1K: 0.003,
24510
- completionPer1K: 0.015,
24511
- lastUpdated: "2026-01",
24512
- notes: "Balanced performance and cost"
24513
- },
24514
- "claude-haiku-4.5": {
24515
- promptPer1K: 0.001,
24516
- completionPer1K: 0.005,
24517
- lastUpdated: "2026-01",
24518
- notes: "Fastest Claude model"
24519
- },
24520
- "claude-opus-4": {
24521
- promptPer1K: 0.015,
24522
- completionPer1K: 0.075,
24523
- lastUpdated: "2026-01"
24524
- },
24525
- "claude-opus-4.1": {
24526
- promptPer1K: 0.015,
24527
- completionPer1K: 0.075,
24528
- lastUpdated: "2026-01"
24529
- },
24530
- "claude-sonnet-4": {
24531
- promptPer1K: 0.003,
24532
- completionPer1K: 0.015,
24533
- lastUpdated: "2026-01"
24534
- },
24535
- "claude-sonnet-3.7": {
24536
- promptPer1K: 0.003,
24537
- completionPer1K: 0.015,
24538
- lastUpdated: "2026-01"
24539
- },
24540
- "claude-3-7-sonnet": {
24541
- promptPer1K: 0.003,
24542
- completionPer1K: 0.015,
24543
- lastUpdated: "2026-01"
24544
- },
24545
- "claude-3-5-sonnet-20241022": {
24546
- promptPer1K: 0.003,
24547
- completionPer1K: 0.015,
24548
- lastUpdated: "2026-01"
24549
- },
24550
- "claude-3-5-haiku-20241022": {
24551
- promptPer1K: 0.0008,
24552
- completionPer1K: 0.004,
24553
- lastUpdated: "2026-01"
24554
- },
24555
- "claude-haiku-3.5": {
24556
- promptPer1K: 0.0008,
24557
- completionPer1K: 0.004,
24558
- lastUpdated: "2026-01"
24559
- },
24560
- "claude-3-opus": {
24561
- promptPer1K: 0.015,
24562
- completionPer1K: 0.075,
24563
- lastUpdated: "2026-01"
24564
- },
24565
- "claude-3-sonnet": {
24566
- promptPer1K: 0.003,
24567
- completionPer1K: 0.015,
24568
- lastUpdated: "2026-01"
24569
- },
24570
- "claude-3-haiku": {
24571
- promptPer1K: 0.00025,
24572
- completionPer1K: 0.00125,
24573
- lastUpdated: "2026-01"
24574
- },
24575
- "claude-3.5-sonnet": {
24576
- promptPer1K: 0.003,
24577
- completionPer1K: 0.015,
24578
- lastUpdated: "2026-01"
24579
- },
24580
- "claude-3.5-haiku": {
24581
- promptPer1K: 0.0008,
24582
- completionPer1K: 0.004,
24583
- lastUpdated: "2026-01"
24756
+ // src/validator/validator.ts
24757
+ var import_yaml2 = __toESM(require_dist(), 1);
24758
+ import { readFileSync } from "fs";
24759
+ class ScenarioValidator {
24760
+ _options;
24761
+ constructor(options = {}) {
24762
+ this._options = options;
24584
24763
  }
24585
- };
24586
- var DEFAULT_PRICING = {
24587
- promptPer1K: 0.003,
24588
- completionPer1K: 0.015,
24589
- lastUpdated: "2026-01",
24590
- notes: "Default pricing - verify with provider"
24591
- };
24592
- function getModelPricing(model) {
24593
- if (MODEL_PRICING[model]) {
24594
- return MODEL_PRICING[model];
24764
+ get options() {
24765
+ return this._options;
24595
24766
  }
24596
- const lowerModel = model.toLowerCase();
24597
- for (const [key, pricing] of Object.entries(MODEL_PRICING)) {
24598
- if (key.toLowerCase() === lowerModel) {
24599
- return pricing;
24767
+ validate(filePath) {
24768
+ const errors4 = [];
24769
+ const warnings = [];
24770
+ let content;
24771
+ try {
24772
+ content = readFileSync(filePath, "utf-8");
24773
+ } catch (err) {
24774
+ const error = err;
24775
+ errors4.push({
24776
+ line: 1,
24777
+ message: `Failed to read file: ${error.message}`,
24778
+ rule: "file-read",
24779
+ severity: "error"
24780
+ });
24781
+ return { file: filePath, valid: false, errors: errors4, warnings };
24600
24782
  }
24783
+ let parsed;
24784
+ try {
24785
+ parsed = import_yaml2.default.parse(content, {
24786
+ prettyErrors: true,
24787
+ strict: true
24788
+ });
24789
+ } catch (err) {
24790
+ if (err instanceof import_yaml2.default.YAMLError) {
24791
+ const linePos = err.linePos?.[0];
24792
+ errors4.push({
24793
+ line: linePos?.line || 1,
24794
+ column: linePos?.col,
24795
+ message: `Invalid YAML syntax: ${err.message}`,
24796
+ rule: "yaml-syntax",
24797
+ severity: "error"
24798
+ });
24799
+ } else {
24800
+ errors4.push({
24801
+ line: 1,
24802
+ message: `YAML parse error: ${err.message}`,
24803
+ rule: "yaml-syntax",
24804
+ severity: "error"
24805
+ });
24806
+ }
24807
+ return { file: filePath, valid: false, errors: errors4, warnings };
24808
+ }
24809
+ if (parsed === null || typeof parsed !== "object") {
24810
+ errors4.push({
24811
+ line: 1,
24812
+ message: "Scenario must be a YAML object",
24813
+ rule: "schema-type",
24814
+ severity: "error"
24815
+ });
24816
+ return { file: filePath, valid: false, errors: errors4, warnings };
24817
+ }
24818
+ const schemaResult = ScenarioSchema.safeParse(parsed);
24819
+ if (!schemaResult.success) {
24820
+ const zodErrors = this.formatZodErrors(schemaResult.error, content);
24821
+ errors4.push(...zodErrors);
24822
+ }
24823
+ if (schemaResult.success) {
24824
+ const semanticErrors = this.validateSemantics(schemaResult.data, content);
24825
+ errors4.push(...semanticErrors);
24826
+ }
24827
+ const detectedWarnings = this.detectWarnings(parsed, content);
24828
+ warnings.push(...detectedWarnings);
24829
+ return {
24830
+ file: filePath,
24831
+ valid: errors4.length === 0,
24832
+ errors: errors4,
24833
+ warnings
24834
+ };
24601
24835
  }
24602
- if (lowerModel.includes("gpt-5.2")) {
24603
- return MODEL_PRICING["gpt-5.2"];
24604
- }
24605
- if (lowerModel.includes("gpt-5.1")) {
24606
- return MODEL_PRICING["gpt-5.1"];
24607
- }
24608
- if (lowerModel.includes("gpt-5-mini")) {
24609
- return MODEL_PRICING["gpt-5-mini"];
24610
- }
24611
- if (lowerModel.includes("gpt-5-nano")) {
24612
- return MODEL_PRICING["gpt-5-nano"];
24613
- }
24614
- if (lowerModel.includes("gpt-5")) {
24615
- return MODEL_PRICING["gpt-5"];
24616
- }
24617
- if (lowerModel.includes("gpt-4.1-mini")) {
24618
- return MODEL_PRICING["gpt-4.1-mini"];
24619
- }
24620
- if (lowerModel.includes("gpt-4.1-nano")) {
24621
- return MODEL_PRICING["gpt-4.1-nano"];
24622
- }
24623
- if (lowerModel.includes("gpt-4.1")) {
24624
- return MODEL_PRICING["gpt-4.1"];
24625
- }
24626
- if (lowerModel.includes("gpt-4o-mini")) {
24627
- return MODEL_PRICING["gpt-4o-mini"];
24628
- }
24629
- if (lowerModel.includes("gpt-4o")) {
24630
- return MODEL_PRICING["gpt-4o"];
24631
- }
24632
- if (lowerModel.includes("o4-mini")) {
24633
- return MODEL_PRICING["o4-mini"];
24634
- }
24635
- if (lowerModel.includes("o3-mini")) {
24636
- return MODEL_PRICING["o3-mini"];
24637
- }
24638
- if (lowerModel.includes("o3")) {
24639
- return MODEL_PRICING.o3;
24640
- }
24641
- if (lowerModel.includes("o1")) {
24642
- return MODEL_PRICING.o1;
24643
- }
24644
- if (lowerModel.includes("gpt-4-turbo")) {
24645
- return MODEL_PRICING["gpt-4-turbo"];
24646
- }
24647
- if (lowerModel.includes("gpt-4")) {
24648
- return MODEL_PRICING["gpt-4"];
24649
- }
24650
- if (lowerModel.includes("gpt-3.5")) {
24651
- return MODEL_PRICING["gpt-3.5-turbo"];
24652
- }
24653
- if (lowerModel.includes("opus-4.5") || lowerModel.includes("opus-4-5")) {
24654
- return MODEL_PRICING["claude-opus-4.5"];
24655
- }
24656
- if (lowerModel.includes("sonnet-4.5") || lowerModel.includes("sonnet-4-5")) {
24657
- return MODEL_PRICING["claude-sonnet-4.5"];
24658
- }
24659
- if (lowerModel.includes("haiku-4.5") || lowerModel.includes("haiku-4-5")) {
24660
- return MODEL_PRICING["claude-haiku-4.5"];
24661
- }
24662
- if (lowerModel.includes("opus-4.1") || lowerModel.includes("opus-4-1")) {
24663
- return MODEL_PRICING["claude-opus-4.1"];
24664
- }
24665
- if (lowerModel.includes("opus-4")) {
24666
- return MODEL_PRICING["claude-opus-4"];
24667
- }
24668
- if (lowerModel.includes("sonnet-4")) {
24669
- return MODEL_PRICING["claude-sonnet-4"];
24670
- }
24671
- if (lowerModel.includes("sonnet-3.7") || lowerModel.includes("sonnet-3-7")) {
24672
- return MODEL_PRICING["claude-sonnet-3.7"];
24673
- }
24674
- if (lowerModel.includes("claude-3-5-sonnet") || lowerModel.includes("claude-3.5-sonnet")) {
24675
- return MODEL_PRICING["claude-3.5-sonnet"];
24836
+ formatZodErrors(error, content) {
24837
+ const issues = [];
24838
+ const lines = content.split(`
24839
+ `);
24840
+ for (const issue of error.issues) {
24841
+ const path = issue.path.join(".");
24842
+ const line = this.findLineForPath(lines, issue.path);
24843
+ let message;
24844
+ switch (issue.code) {
24845
+ case "invalid_type":
24846
+ message = `'${path}' expected ${issue.expected}, received ${issue.received}`;
24847
+ break;
24848
+ case "invalid_enum_value":
24849
+ message = `'${path}' must be one of: ${issue.options.join(", ")}`;
24850
+ break;
24851
+ case "too_small":
24852
+ if (issue.type === "array") {
24853
+ message = `'${path}' must have at least ${issue.minimum} item(s)`;
24854
+ } else {
24855
+ message = `'${path}' is too small`;
24856
+ }
24857
+ break;
24858
+ case "unrecognized_keys":
24859
+ message = `Unrecognized field(s): ${issue.keys.join(", ")}`;
24860
+ break;
24861
+ default:
24862
+ message = issue.message;
24863
+ }
24864
+ issues.push({
24865
+ line,
24866
+ message,
24867
+ rule: `schema-${issue.code}`,
24868
+ severity: "error"
24869
+ });
24870
+ }
24871
+ return issues;
24676
24872
  }
24677
- if (lowerModel.includes("claude-3-5-haiku") || lowerModel.includes("claude-3.5-haiku")) {
24678
- return MODEL_PRICING["claude-3.5-haiku"];
24873
+ findLineForPath(lines, path) {
24874
+ if (path.length === 0)
24875
+ return 1;
24876
+ const searchKey = String(path[path.length - 1]);
24877
+ for (let i2 = 0;i2 < lines.length; i2++) {
24878
+ const line = lines[i2];
24879
+ if (line.includes(`${searchKey}:`) || line.includes(`- ${searchKey}:`)) {
24880
+ return i2 + 1;
24881
+ }
24882
+ if (typeof path[path.length - 1] === "number" && path.includes("cases")) {
24883
+ if (line.trim().startsWith("- id:")) {
24884
+ return i2 + 1;
24885
+ }
24886
+ }
24887
+ }
24888
+ return 1;
24679
24889
  }
24680
- if (lowerModel.includes("claude-3-opus")) {
24681
- return MODEL_PRICING["claude-3-opus"];
24890
+ validateSemantics(scenario, content) {
24891
+ const errors4 = [];
24892
+ const lines = content.split(`
24893
+ `);
24894
+ const caseIds = new Set;
24895
+ for (const testCase of scenario.cases) {
24896
+ if (caseIds.has(testCase.id)) {
24897
+ const line = this.findLineForCaseId(lines, testCase.id);
24898
+ errors4.push({
24899
+ line,
24900
+ message: `Duplicate case ID: '${testCase.id}'`,
24901
+ rule: "duplicate-case-id",
24902
+ severity: "error"
24903
+ });
24904
+ }
24905
+ caseIds.add(testCase.id);
24906
+ }
24907
+ const globalVars = scenario.variables || {};
24908
+ for (const testCase of scenario.cases) {
24909
+ const caseVars = testCase.variables || {};
24910
+ const allVars = { ...globalVars, ...caseVars };
24911
+ const prompt2 = typeof testCase.prompt === "string" ? testCase.prompt : JSON.stringify(testCase.prompt);
24912
+ const refs = this.extractVariableRefs(prompt2);
24913
+ for (const ref of refs) {
24914
+ if (!(ref in allVars)) {
24915
+ const line = this.findLineForCaseId(lines, testCase.id);
24916
+ errors4.push({
24917
+ line,
24918
+ message: `Undefined variable '{{${ref}}}' in case '${testCase.id}'`,
24919
+ rule: "undefined-variable",
24920
+ severity: "error",
24921
+ suggestion: `Define '${ref}' in scenario.variables or case.variables`
24922
+ });
24923
+ }
24924
+ }
24925
+ }
24926
+ return errors4;
24682
24927
  }
24683
- if (lowerModel.includes("claude-3-sonnet")) {
24684
- return MODEL_PRICING["claude-3-sonnet"];
24928
+ findLineForCaseId(lines, caseId) {
24929
+ for (let i2 = 0;i2 < lines.length; i2++) {
24930
+ if (lines[i2].includes(`id: ${caseId}`) || lines[i2].includes(`id: "${caseId}"`) || lines[i2].includes(`id: '${caseId}'`)) {
24931
+ return i2 + 1;
24932
+ }
24933
+ }
24934
+ return 1;
24685
24935
  }
24686
- if (lowerModel.includes("claude-3-haiku")) {
24687
- return MODEL_PRICING["claude-3-haiku"];
24936
+ extractVariableRefs(text) {
24937
+ const regex2 = /\{\{(\w+)\}\}/g;
24938
+ const refs = [];
24939
+ const matches = text.matchAll(regex2);
24940
+ for (const match of matches) {
24941
+ refs.push(match[1]);
24942
+ }
24943
+ return refs;
24688
24944
  }
24689
- if (lowerModel.includes("claude")) {
24690
- return MODEL_PRICING["claude-sonnet-4.5"];
24945
+ detectWarnings(parsed, content) {
24946
+ const warnings = [];
24947
+ const lines = content.split(`
24948
+ `);
24949
+ if (parsed && typeof parsed === "object") {
24950
+ const obj = parsed;
24951
+ if (this.hasDeepKey(obj, "criteria")) {
24952
+ const line = this.findLineForKey(lines, "criteria");
24953
+ warnings.push({
24954
+ line,
24955
+ message: "'criteria' is deprecated, use 'rubric' instead (llm_grader)",
24956
+ rule: "deprecated-field",
24957
+ severity: "warning",
24958
+ suggestion: "Replace 'criteria' with 'rubric'"
24959
+ });
24960
+ }
24961
+ const cases = obj.cases;
24962
+ if (Array.isArray(cases) && cases.length > 20) {
24963
+ warnings.push({
24964
+ line: 1,
24965
+ message: `Scenario has ${cases.length} cases. Consider using --parallel for faster execution.`,
24966
+ rule: "performance-hint",
24967
+ severity: "warning"
24968
+ });
24969
+ }
24970
+ if (!obj.description) {
24971
+ warnings.push({
24972
+ line: 1,
24973
+ message: "Scenario is missing 'description' field. Adding a description improves documentation.",
24974
+ rule: "missing-description",
24975
+ severity: "warning"
24976
+ });
24977
+ }
24978
+ }
24979
+ return warnings;
24691
24980
  }
24692
- return DEFAULT_PRICING;
24693
- }
24694
- function estimateCost(promptTokens, completionTokens, model) {
24695
- const pricing = getModelPricing(model);
24696
- const promptCostUsd = promptTokens / 1000 * pricing.promptPer1K;
24697
- const completionCostUsd = completionTokens / 1000 * pricing.completionPer1K;
24698
- const totalUsd = promptCostUsd + completionCostUsd;
24699
- return {
24700
- totalUsd,
24701
- promptCostUsd,
24702
- completionCostUsd,
24703
- model,
24704
- pricing
24705
- };
24706
- }
24707
- function formatCost(costUsd) {
24708
- if (costUsd < 0.01) {
24709
- return `$${(costUsd * 100).toFixed(4)} cents`;
24981
+ hasDeepKey(obj, key) {
24982
+ if (obj === null || typeof obj !== "object")
24983
+ return false;
24984
+ if (key in obj)
24985
+ return true;
24986
+ for (const value of Object.values(obj)) {
24987
+ if (this.hasDeepKey(value, key))
24988
+ return true;
24989
+ }
24990
+ return false;
24710
24991
  }
24711
- if (costUsd < 1) {
24712
- return `$${costUsd.toFixed(4)}`;
24992
+ findLineForKey(lines, key) {
24993
+ for (let i2 = 0;i2 < lines.length; i2++) {
24994
+ if (lines[i2].includes(`${key}:`)) {
24995
+ return i2 + 1;
24996
+ }
24997
+ }
24998
+ return 1;
24713
24999
  }
24714
- return `$${costUsd.toFixed(2)}`;
24715
- }
24716
- function listKnownModels() {
24717
- return Object.entries(MODEL_PRICING).map(([model, pricing]) => ({
24718
- model,
24719
- pricing
24720
- }));
24721
25000
  }
24722
25001
  export {
24723
25002
  wrapError,
@@ -24766,6 +25045,7 @@ export {
24766
25045
  TestCaseSchema,
24767
25046
  SupabaseStorageAdapter,
24768
25047
  SimilarityEvaluator,
25048
+ ScenarioValidator,
24769
25049
  ScenarioSchema,
24770
25050
  SUPPORTED_EXPRESSIONS,
24771
25051
  RegexEvaluator,