@artemiskit/core 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +71 -0
- package/dist/artifacts/manifest.d.ts.map +1 -1
- package/dist/artifacts/types.d.ts +20 -0
- package/dist/artifacts/types.d.ts.map +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +688 -408
- package/dist/storage/local.d.ts.map +1 -1
- package/dist/storage/types.d.ts +4 -0
- package/dist/storage/types.d.ts.map +1 -1
- package/dist/validator/index.d.ts +6 -0
- package/dist/validator/index.d.ts.map +1 -0
- package/dist/validator/types.d.ts +58 -0
- package/dist/validator/types.d.ts.map +1 -0
- package/dist/validator/validator.d.ts +55 -0
- package/dist/validator/validator.d.ts.map +1 -0
- package/package.json +1 -1
- package/src/artifacts/manifest.ts +24 -2
- package/src/artifacts/types.ts +21 -0
- package/src/evaluators/similarity.test.ts +4 -3
- package/src/index.ts +3 -0
- package/src/storage/local.ts +24 -2
- package/src/storage/types.ts +4 -0
- package/src/validator/index.ts +6 -0
- package/src/validator/types.ts +62 -0
- package/src/validator/validator.ts +345 -0
package/dist/index.js
CHANGED
|
@@ -10896,6 +10896,55 @@ var require_public_api = __commonJS((exports) => {
|
|
|
10896
10896
|
exports.stringify = stringify;
|
|
10897
10897
|
});
|
|
10898
10898
|
|
|
10899
|
+
// ../../node_modules/.bun/yaml@2.8.2/node_modules/yaml/dist/index.js
|
|
10900
|
+
var require_dist = __commonJS((exports) => {
|
|
10901
|
+
var composer = require_composer();
|
|
10902
|
+
var Document = require_Document();
|
|
10903
|
+
var Schema = require_Schema();
|
|
10904
|
+
var errors2 = require_errors();
|
|
10905
|
+
var Alias = require_Alias();
|
|
10906
|
+
var identity = require_identity();
|
|
10907
|
+
var Pair = require_Pair();
|
|
10908
|
+
var Scalar = require_Scalar();
|
|
10909
|
+
var YAMLMap = require_YAMLMap();
|
|
10910
|
+
var YAMLSeq = require_YAMLSeq();
|
|
10911
|
+
var cst = require_cst();
|
|
10912
|
+
var lexer = require_lexer();
|
|
10913
|
+
var lineCounter = require_line_counter();
|
|
10914
|
+
var parser = require_parser();
|
|
10915
|
+
var publicApi = require_public_api();
|
|
10916
|
+
var visit = require_visit();
|
|
10917
|
+
exports.Composer = composer.Composer;
|
|
10918
|
+
exports.Document = Document.Document;
|
|
10919
|
+
exports.Schema = Schema.Schema;
|
|
10920
|
+
exports.YAMLError = errors2.YAMLError;
|
|
10921
|
+
exports.YAMLParseError = errors2.YAMLParseError;
|
|
10922
|
+
exports.YAMLWarning = errors2.YAMLWarning;
|
|
10923
|
+
exports.Alias = Alias.Alias;
|
|
10924
|
+
exports.isAlias = identity.isAlias;
|
|
10925
|
+
exports.isCollection = identity.isCollection;
|
|
10926
|
+
exports.isDocument = identity.isDocument;
|
|
10927
|
+
exports.isMap = identity.isMap;
|
|
10928
|
+
exports.isNode = identity.isNode;
|
|
10929
|
+
exports.isPair = identity.isPair;
|
|
10930
|
+
exports.isScalar = identity.isScalar;
|
|
10931
|
+
exports.isSeq = identity.isSeq;
|
|
10932
|
+
exports.Pair = Pair.Pair;
|
|
10933
|
+
exports.Scalar = Scalar.Scalar;
|
|
10934
|
+
exports.YAMLMap = YAMLMap.YAMLMap;
|
|
10935
|
+
exports.YAMLSeq = YAMLSeq.YAMLSeq;
|
|
10936
|
+
exports.CST = cst;
|
|
10937
|
+
exports.Lexer = lexer.Lexer;
|
|
10938
|
+
exports.LineCounter = lineCounter.LineCounter;
|
|
10939
|
+
exports.Parser = parser.Parser;
|
|
10940
|
+
exports.parse = publicApi.parse;
|
|
10941
|
+
exports.parseAllDocuments = publicApi.parseAllDocuments;
|
|
10942
|
+
exports.parseDocument = publicApi.parseDocument;
|
|
10943
|
+
exports.stringify = publicApi.stringify;
|
|
10944
|
+
exports.visit = visit.visit;
|
|
10945
|
+
exports.visitAsync = visit.visitAsync;
|
|
10946
|
+
});
|
|
10947
|
+
|
|
10899
10948
|
// src/evaluators/combined.ts
|
|
10900
10949
|
async function getEvaluatorForType(type) {
|
|
10901
10950
|
const { getEvaluator } = await Promise.resolve().then(() => (init_evaluators(), exports_evaluators));
|
|
@@ -13616,55 +13665,8 @@ var ScenarioSchema = exports_external.object({
|
|
|
13616
13665
|
}).optional()
|
|
13617
13666
|
});
|
|
13618
13667
|
// src/scenario/parser.ts
|
|
13668
|
+
var import_yaml = __toESM(require_dist(), 1);
|
|
13619
13669
|
import { readFile } from "fs/promises";
|
|
13620
|
-
|
|
13621
|
-
// ../../node_modules/.bun/yaml@2.8.2/node_modules/yaml/dist/index.js
|
|
13622
|
-
var composer = require_composer();
|
|
13623
|
-
var Document = require_Document();
|
|
13624
|
-
var Schema = require_Schema();
|
|
13625
|
-
var errors2 = require_errors();
|
|
13626
|
-
var Alias = require_Alias();
|
|
13627
|
-
var identity = require_identity();
|
|
13628
|
-
var Pair = require_Pair();
|
|
13629
|
-
var Scalar = require_Scalar();
|
|
13630
|
-
var YAMLMap = require_YAMLMap();
|
|
13631
|
-
var YAMLSeq = require_YAMLSeq();
|
|
13632
|
-
var cst = require_cst();
|
|
13633
|
-
var lexer = require_lexer();
|
|
13634
|
-
var lineCounter = require_line_counter();
|
|
13635
|
-
var parser = require_parser();
|
|
13636
|
-
var publicApi = require_public_api();
|
|
13637
|
-
var visit = require_visit();
|
|
13638
|
-
var $Composer = composer.Composer;
|
|
13639
|
-
var $Document = Document.Document;
|
|
13640
|
-
var $Schema = Schema.Schema;
|
|
13641
|
-
var $YAMLError = errors2.YAMLError;
|
|
13642
|
-
var $YAMLParseError = errors2.YAMLParseError;
|
|
13643
|
-
var $YAMLWarning = errors2.YAMLWarning;
|
|
13644
|
-
var $Alias = Alias.Alias;
|
|
13645
|
-
var $isAlias = identity.isAlias;
|
|
13646
|
-
var $isCollection = identity.isCollection;
|
|
13647
|
-
var $isDocument = identity.isDocument;
|
|
13648
|
-
var $isMap = identity.isMap;
|
|
13649
|
-
var $isNode = identity.isNode;
|
|
13650
|
-
var $isPair = identity.isPair;
|
|
13651
|
-
var $isScalar = identity.isScalar;
|
|
13652
|
-
var $isSeq = identity.isSeq;
|
|
13653
|
-
var $Pair = Pair.Pair;
|
|
13654
|
-
var $Scalar = Scalar.Scalar;
|
|
13655
|
-
var $YAMLMap = YAMLMap.YAMLMap;
|
|
13656
|
-
var $YAMLSeq = YAMLSeq.YAMLSeq;
|
|
13657
|
-
var $Lexer = lexer.Lexer;
|
|
13658
|
-
var $LineCounter = lineCounter.LineCounter;
|
|
13659
|
-
var $Parser = parser.Parser;
|
|
13660
|
-
var $parse = publicApi.parse;
|
|
13661
|
-
var $parseAllDocuments = publicApi.parseAllDocuments;
|
|
13662
|
-
var $parseDocument = publicApi.parseDocument;
|
|
13663
|
-
var $stringify = publicApi.stringify;
|
|
13664
|
-
var $visit = visit.visit;
|
|
13665
|
-
var $visitAsync = visit.visitAsync;
|
|
13666
|
-
|
|
13667
|
-
// src/scenario/parser.ts
|
|
13668
13670
|
function expandEnvVars(obj) {
|
|
13669
13671
|
if (typeof obj === "string") {
|
|
13670
13672
|
return obj.replace(/\$\{([^}]+)\}/g, (_, expr) => {
|
|
@@ -13699,7 +13701,7 @@ async function parseScenarioFile(filePath) {
|
|
|
13699
13701
|
}
|
|
13700
13702
|
function parseScenarioString(content, source) {
|
|
13701
13703
|
try {
|
|
13702
|
-
const raw =
|
|
13704
|
+
const raw = import_yaml.parse(content);
|
|
13703
13705
|
const expanded = expandEnvVars(raw);
|
|
13704
13706
|
const result = ScenarioSchema.safeParse(expanded);
|
|
13705
13707
|
if (!result.success) {
|
|
@@ -14238,54 +14240,367 @@ function nanoid(size = 21) {
|
|
|
14238
14240
|
return id;
|
|
14239
14241
|
}
|
|
14240
14242
|
|
|
14241
|
-
// src/
|
|
14242
|
-
|
|
14243
|
-
|
|
14244
|
-
|
|
14245
|
-
|
|
14246
|
-
|
|
14247
|
-
|
|
14248
|
-
}
|
|
14249
|
-
|
|
14250
|
-
|
|
14251
|
-
|
|
14252
|
-
|
|
14253
|
-
|
|
14254
|
-
|
|
14255
|
-
|
|
14256
|
-
|
|
14257
|
-
|
|
14258
|
-
|
|
14259
|
-
|
|
14260
|
-
|
|
14261
|
-
|
|
14262
|
-
|
|
14263
|
-
|
|
14264
|
-
|
|
14265
|
-
|
|
14266
|
-
|
|
14267
|
-
|
|
14268
|
-
|
|
14269
|
-
|
|
14243
|
+
// src/cost/pricing.ts
|
|
14244
|
+
var MODEL_PRICING = {
|
|
14245
|
+
"gpt-5": {
|
|
14246
|
+
promptPer1K: 0.00125,
|
|
14247
|
+
completionPer1K: 0.01,
|
|
14248
|
+
lastUpdated: "2026-01",
|
|
14249
|
+
notes: "400K context window"
|
|
14250
|
+
},
|
|
14251
|
+
"gpt-5.1": {
|
|
14252
|
+
promptPer1K: 0.00125,
|
|
14253
|
+
completionPer1K: 0.01,
|
|
14254
|
+
lastUpdated: "2026-01"
|
|
14255
|
+
},
|
|
14256
|
+
"gpt-5.2": {
|
|
14257
|
+
promptPer1K: 0.00175,
|
|
14258
|
+
completionPer1K: 0.014,
|
|
14259
|
+
lastUpdated: "2026-01"
|
|
14260
|
+
},
|
|
14261
|
+
"gpt-5-mini": {
|
|
14262
|
+
promptPer1K: 0.00025,
|
|
14263
|
+
completionPer1K: 0.002,
|
|
14264
|
+
lastUpdated: "2026-01"
|
|
14265
|
+
},
|
|
14266
|
+
"gpt-5-nano": {
|
|
14267
|
+
promptPer1K: 0.00005,
|
|
14268
|
+
completionPer1K: 0.0004,
|
|
14269
|
+
lastUpdated: "2026-01"
|
|
14270
|
+
},
|
|
14271
|
+
"gpt-4.1": {
|
|
14272
|
+
promptPer1K: 0.002,
|
|
14273
|
+
completionPer1K: 0.008,
|
|
14274
|
+
lastUpdated: "2026-01",
|
|
14275
|
+
notes: "1M context window"
|
|
14276
|
+
},
|
|
14277
|
+
"gpt-4.1-mini": {
|
|
14278
|
+
promptPer1K: 0.0004,
|
|
14279
|
+
completionPer1K: 0.0016,
|
|
14280
|
+
lastUpdated: "2026-01"
|
|
14281
|
+
},
|
|
14282
|
+
"gpt-4.1-nano": {
|
|
14283
|
+
promptPer1K: 0.0001,
|
|
14284
|
+
completionPer1K: 0.0004,
|
|
14285
|
+
lastUpdated: "2026-01"
|
|
14286
|
+
},
|
|
14287
|
+
"gpt-4o": {
|
|
14288
|
+
promptPer1K: 0.0025,
|
|
14289
|
+
completionPer1K: 0.01,
|
|
14290
|
+
lastUpdated: "2026-01",
|
|
14291
|
+
notes: "128K context window"
|
|
14292
|
+
},
|
|
14293
|
+
"gpt-4o-mini": {
|
|
14294
|
+
promptPer1K: 0.00015,
|
|
14295
|
+
completionPer1K: 0.0006,
|
|
14296
|
+
lastUpdated: "2026-01",
|
|
14297
|
+
notes: "128K context window"
|
|
14298
|
+
},
|
|
14299
|
+
o1: {
|
|
14300
|
+
promptPer1K: 0.015,
|
|
14301
|
+
completionPer1K: 0.06,
|
|
14302
|
+
lastUpdated: "2026-01",
|
|
14303
|
+
notes: "Reasoning model - internal thinking tokens billed as output"
|
|
14304
|
+
},
|
|
14305
|
+
o3: {
|
|
14306
|
+
promptPer1K: 0.002,
|
|
14307
|
+
completionPer1K: 0.008,
|
|
14308
|
+
lastUpdated: "2026-01"
|
|
14309
|
+
},
|
|
14310
|
+
"o3-mini": {
|
|
14311
|
+
promptPer1K: 0.0011,
|
|
14312
|
+
completionPer1K: 0.0044,
|
|
14313
|
+
lastUpdated: "2026-01"
|
|
14314
|
+
},
|
|
14315
|
+
"o4-mini": {
|
|
14316
|
+
promptPer1K: 0.0011,
|
|
14317
|
+
completionPer1K: 0.0044,
|
|
14318
|
+
lastUpdated: "2026-01"
|
|
14319
|
+
},
|
|
14320
|
+
"gpt-4-turbo": {
|
|
14321
|
+
promptPer1K: 0.01,
|
|
14322
|
+
completionPer1K: 0.03,
|
|
14323
|
+
lastUpdated: "2026-01"
|
|
14324
|
+
},
|
|
14325
|
+
"gpt-4": {
|
|
14326
|
+
promptPer1K: 0.03,
|
|
14327
|
+
completionPer1K: 0.06,
|
|
14328
|
+
lastUpdated: "2026-01"
|
|
14329
|
+
},
|
|
14330
|
+
"gpt-3.5-turbo": {
|
|
14331
|
+
promptPer1K: 0.0005,
|
|
14332
|
+
completionPer1K: 0.0015,
|
|
14333
|
+
lastUpdated: "2026-01"
|
|
14334
|
+
},
|
|
14335
|
+
"claude-opus-4.5": {
|
|
14336
|
+
promptPer1K: 0.005,
|
|
14337
|
+
completionPer1K: 0.025,
|
|
14338
|
+
lastUpdated: "2026-01",
|
|
14339
|
+
notes: "Most capable Claude model"
|
|
14340
|
+
},
|
|
14341
|
+
"claude-sonnet-4.5": {
|
|
14342
|
+
promptPer1K: 0.003,
|
|
14343
|
+
completionPer1K: 0.015,
|
|
14344
|
+
lastUpdated: "2026-01",
|
|
14345
|
+
notes: "Balanced performance and cost"
|
|
14346
|
+
},
|
|
14347
|
+
"claude-haiku-4.5": {
|
|
14348
|
+
promptPer1K: 0.001,
|
|
14349
|
+
completionPer1K: 0.005,
|
|
14350
|
+
lastUpdated: "2026-01",
|
|
14351
|
+
notes: "Fastest Claude model"
|
|
14352
|
+
},
|
|
14353
|
+
"claude-opus-4": {
|
|
14354
|
+
promptPer1K: 0.015,
|
|
14355
|
+
completionPer1K: 0.075,
|
|
14356
|
+
lastUpdated: "2026-01"
|
|
14357
|
+
},
|
|
14358
|
+
"claude-opus-4.1": {
|
|
14359
|
+
promptPer1K: 0.015,
|
|
14360
|
+
completionPer1K: 0.075,
|
|
14361
|
+
lastUpdated: "2026-01"
|
|
14362
|
+
},
|
|
14363
|
+
"claude-sonnet-4": {
|
|
14364
|
+
promptPer1K: 0.003,
|
|
14365
|
+
completionPer1K: 0.015,
|
|
14366
|
+
lastUpdated: "2026-01"
|
|
14367
|
+
},
|
|
14368
|
+
"claude-sonnet-3.7": {
|
|
14369
|
+
promptPer1K: 0.003,
|
|
14370
|
+
completionPer1K: 0.015,
|
|
14371
|
+
lastUpdated: "2026-01"
|
|
14372
|
+
},
|
|
14373
|
+
"claude-3-7-sonnet": {
|
|
14374
|
+
promptPer1K: 0.003,
|
|
14375
|
+
completionPer1K: 0.015,
|
|
14376
|
+
lastUpdated: "2026-01"
|
|
14377
|
+
},
|
|
14378
|
+
"claude-3-5-sonnet-20241022": {
|
|
14379
|
+
promptPer1K: 0.003,
|
|
14380
|
+
completionPer1K: 0.015,
|
|
14381
|
+
lastUpdated: "2026-01"
|
|
14382
|
+
},
|
|
14383
|
+
"claude-3-5-haiku-20241022": {
|
|
14384
|
+
promptPer1K: 0.0008,
|
|
14385
|
+
completionPer1K: 0.004,
|
|
14386
|
+
lastUpdated: "2026-01"
|
|
14387
|
+
},
|
|
14388
|
+
"claude-haiku-3.5": {
|
|
14389
|
+
promptPer1K: 0.0008,
|
|
14390
|
+
completionPer1K: 0.004,
|
|
14391
|
+
lastUpdated: "2026-01"
|
|
14392
|
+
},
|
|
14393
|
+
"claude-3-opus": {
|
|
14394
|
+
promptPer1K: 0.015,
|
|
14395
|
+
completionPer1K: 0.075,
|
|
14396
|
+
lastUpdated: "2026-01"
|
|
14397
|
+
},
|
|
14398
|
+
"claude-3-sonnet": {
|
|
14399
|
+
promptPer1K: 0.003,
|
|
14400
|
+
completionPer1K: 0.015,
|
|
14401
|
+
lastUpdated: "2026-01"
|
|
14402
|
+
},
|
|
14403
|
+
"claude-3-haiku": {
|
|
14404
|
+
promptPer1K: 0.00025,
|
|
14405
|
+
completionPer1K: 0.00125,
|
|
14406
|
+
lastUpdated: "2026-01"
|
|
14407
|
+
},
|
|
14408
|
+
"claude-3.5-sonnet": {
|
|
14409
|
+
promptPer1K: 0.003,
|
|
14410
|
+
completionPer1K: 0.015,
|
|
14411
|
+
lastUpdated: "2026-01"
|
|
14412
|
+
},
|
|
14413
|
+
"claude-3.5-haiku": {
|
|
14414
|
+
promptPer1K: 0.0008,
|
|
14415
|
+
completionPer1K: 0.004,
|
|
14416
|
+
lastUpdated: "2026-01"
|
|
14270
14417
|
}
|
|
14271
|
-
}
|
|
14272
|
-
|
|
14273
|
-
|
|
14274
|
-
|
|
14275
|
-
|
|
14276
|
-
|
|
14277
|
-
|
|
14278
|
-
|
|
14279
|
-
|
|
14280
|
-
|
|
14418
|
+
};
|
|
14419
|
+
var DEFAULT_PRICING = {
|
|
14420
|
+
promptPer1K: 0.003,
|
|
14421
|
+
completionPer1K: 0.015,
|
|
14422
|
+
lastUpdated: "2026-01",
|
|
14423
|
+
notes: "Default pricing - verify with provider"
|
|
14424
|
+
};
|
|
14425
|
+
function getModelPricing(model) {
|
|
14426
|
+
if (MODEL_PRICING[model]) {
|
|
14427
|
+
return MODEL_PRICING[model];
|
|
14428
|
+
}
|
|
14429
|
+
const lowerModel = model.toLowerCase();
|
|
14430
|
+
for (const [key, pricing] of Object.entries(MODEL_PRICING)) {
|
|
14431
|
+
if (key.toLowerCase() === lowerModel) {
|
|
14432
|
+
return pricing;
|
|
14281
14433
|
}
|
|
14282
|
-
throw new Error(`Git command failed: ${command}`);
|
|
14283
14434
|
}
|
|
14284
|
-
|
|
14285
|
-
|
|
14286
|
-
|
|
14287
|
-
|
|
14288
|
-
|
|
14435
|
+
if (lowerModel.includes("gpt-5.2")) {
|
|
14436
|
+
return MODEL_PRICING["gpt-5.2"];
|
|
14437
|
+
}
|
|
14438
|
+
if (lowerModel.includes("gpt-5.1")) {
|
|
14439
|
+
return MODEL_PRICING["gpt-5.1"];
|
|
14440
|
+
}
|
|
14441
|
+
if (lowerModel.includes("gpt-5-mini")) {
|
|
14442
|
+
return MODEL_PRICING["gpt-5-mini"];
|
|
14443
|
+
}
|
|
14444
|
+
if (lowerModel.includes("gpt-5-nano")) {
|
|
14445
|
+
return MODEL_PRICING["gpt-5-nano"];
|
|
14446
|
+
}
|
|
14447
|
+
if (lowerModel.includes("gpt-5")) {
|
|
14448
|
+
return MODEL_PRICING["gpt-5"];
|
|
14449
|
+
}
|
|
14450
|
+
if (lowerModel.includes("gpt-4.1-mini")) {
|
|
14451
|
+
return MODEL_PRICING["gpt-4.1-mini"];
|
|
14452
|
+
}
|
|
14453
|
+
if (lowerModel.includes("gpt-4.1-nano")) {
|
|
14454
|
+
return MODEL_PRICING["gpt-4.1-nano"];
|
|
14455
|
+
}
|
|
14456
|
+
if (lowerModel.includes("gpt-4.1")) {
|
|
14457
|
+
return MODEL_PRICING["gpt-4.1"];
|
|
14458
|
+
}
|
|
14459
|
+
if (lowerModel.includes("gpt-4o-mini")) {
|
|
14460
|
+
return MODEL_PRICING["gpt-4o-mini"];
|
|
14461
|
+
}
|
|
14462
|
+
if (lowerModel.includes("gpt-4o")) {
|
|
14463
|
+
return MODEL_PRICING["gpt-4o"];
|
|
14464
|
+
}
|
|
14465
|
+
if (lowerModel.includes("o4-mini")) {
|
|
14466
|
+
return MODEL_PRICING["o4-mini"];
|
|
14467
|
+
}
|
|
14468
|
+
if (lowerModel.includes("o3-mini")) {
|
|
14469
|
+
return MODEL_PRICING["o3-mini"];
|
|
14470
|
+
}
|
|
14471
|
+
if (lowerModel.includes("o3")) {
|
|
14472
|
+
return MODEL_PRICING.o3;
|
|
14473
|
+
}
|
|
14474
|
+
if (lowerModel.includes("o1")) {
|
|
14475
|
+
return MODEL_PRICING.o1;
|
|
14476
|
+
}
|
|
14477
|
+
if (lowerModel.includes("gpt-4-turbo")) {
|
|
14478
|
+
return MODEL_PRICING["gpt-4-turbo"];
|
|
14479
|
+
}
|
|
14480
|
+
if (lowerModel.includes("gpt-4")) {
|
|
14481
|
+
return MODEL_PRICING["gpt-4"];
|
|
14482
|
+
}
|
|
14483
|
+
if (lowerModel.includes("gpt-3.5")) {
|
|
14484
|
+
return MODEL_PRICING["gpt-3.5-turbo"];
|
|
14485
|
+
}
|
|
14486
|
+
if (lowerModel.includes("opus-4.5") || lowerModel.includes("opus-4-5")) {
|
|
14487
|
+
return MODEL_PRICING["claude-opus-4.5"];
|
|
14488
|
+
}
|
|
14489
|
+
if (lowerModel.includes("sonnet-4.5") || lowerModel.includes("sonnet-4-5")) {
|
|
14490
|
+
return MODEL_PRICING["claude-sonnet-4.5"];
|
|
14491
|
+
}
|
|
14492
|
+
if (lowerModel.includes("haiku-4.5") || lowerModel.includes("haiku-4-5")) {
|
|
14493
|
+
return MODEL_PRICING["claude-haiku-4.5"];
|
|
14494
|
+
}
|
|
14495
|
+
if (lowerModel.includes("opus-4.1") || lowerModel.includes("opus-4-1")) {
|
|
14496
|
+
return MODEL_PRICING["claude-opus-4.1"];
|
|
14497
|
+
}
|
|
14498
|
+
if (lowerModel.includes("opus-4")) {
|
|
14499
|
+
return MODEL_PRICING["claude-opus-4"];
|
|
14500
|
+
}
|
|
14501
|
+
if (lowerModel.includes("sonnet-4")) {
|
|
14502
|
+
return MODEL_PRICING["claude-sonnet-4"];
|
|
14503
|
+
}
|
|
14504
|
+
if (lowerModel.includes("sonnet-3.7") || lowerModel.includes("sonnet-3-7")) {
|
|
14505
|
+
return MODEL_PRICING["claude-sonnet-3.7"];
|
|
14506
|
+
}
|
|
14507
|
+
if (lowerModel.includes("claude-3-5-sonnet") || lowerModel.includes("claude-3.5-sonnet")) {
|
|
14508
|
+
return MODEL_PRICING["claude-3.5-sonnet"];
|
|
14509
|
+
}
|
|
14510
|
+
if (lowerModel.includes("claude-3-5-haiku") || lowerModel.includes("claude-3.5-haiku")) {
|
|
14511
|
+
return MODEL_PRICING["claude-3.5-haiku"];
|
|
14512
|
+
}
|
|
14513
|
+
if (lowerModel.includes("claude-3-opus")) {
|
|
14514
|
+
return MODEL_PRICING["claude-3-opus"];
|
|
14515
|
+
}
|
|
14516
|
+
if (lowerModel.includes("claude-3-sonnet")) {
|
|
14517
|
+
return MODEL_PRICING["claude-3-sonnet"];
|
|
14518
|
+
}
|
|
14519
|
+
if (lowerModel.includes("claude-3-haiku")) {
|
|
14520
|
+
return MODEL_PRICING["claude-3-haiku"];
|
|
14521
|
+
}
|
|
14522
|
+
if (lowerModel.includes("claude")) {
|
|
14523
|
+
return MODEL_PRICING["claude-sonnet-4.5"];
|
|
14524
|
+
}
|
|
14525
|
+
return DEFAULT_PRICING;
|
|
14526
|
+
}
|
|
14527
|
+
function estimateCost(promptTokens, completionTokens, model) {
|
|
14528
|
+
const pricing = getModelPricing(model);
|
|
14529
|
+
const promptCostUsd = promptTokens / 1000 * pricing.promptPer1K;
|
|
14530
|
+
const completionCostUsd = completionTokens / 1000 * pricing.completionPer1K;
|
|
14531
|
+
const totalUsd = promptCostUsd + completionCostUsd;
|
|
14532
|
+
return {
|
|
14533
|
+
totalUsd,
|
|
14534
|
+
promptCostUsd,
|
|
14535
|
+
completionCostUsd,
|
|
14536
|
+
model,
|
|
14537
|
+
pricing
|
|
14538
|
+
};
|
|
14539
|
+
}
|
|
14540
|
+
function formatCost(costUsd) {
|
|
14541
|
+
if (costUsd < 0.01) {
|
|
14542
|
+
return `$${(costUsd * 100).toFixed(4)} cents`;
|
|
14543
|
+
}
|
|
14544
|
+
if (costUsd < 1) {
|
|
14545
|
+
return `$${costUsd.toFixed(4)}`;
|
|
14546
|
+
}
|
|
14547
|
+
return `$${costUsd.toFixed(2)}`;
|
|
14548
|
+
}
|
|
14549
|
+
function listKnownModels() {
|
|
14550
|
+
return Object.entries(MODEL_PRICING).map(([model, pricing]) => ({
|
|
14551
|
+
model,
|
|
14552
|
+
pricing
|
|
14553
|
+
}));
|
|
14554
|
+
}
|
|
14555
|
+
|
|
14556
|
+
// src/provenance/environment.ts
|
|
14557
|
+
function getEnvironmentInfo() {
|
|
14558
|
+
return {
|
|
14559
|
+
node_version: process.version,
|
|
14560
|
+
platform: process.platform,
|
|
14561
|
+
arch: process.arch
|
|
14562
|
+
};
|
|
14563
|
+
}
|
|
14564
|
+
|
|
14565
|
+
// src/provenance/git.ts
|
|
14566
|
+
import { execSync } from "child_process";
|
|
14567
|
+
function getGitInfo() {
|
|
14568
|
+
try {
|
|
14569
|
+
const commit = execGit("rev-parse HEAD");
|
|
14570
|
+
const branch = execGit("rev-parse --abbrev-ref HEAD");
|
|
14571
|
+
const dirty = execGit("status --porcelain").length > 0;
|
|
14572
|
+
const remote = execGit("remote get-url origin", true);
|
|
14573
|
+
return {
|
|
14574
|
+
commit,
|
|
14575
|
+
branch,
|
|
14576
|
+
dirty,
|
|
14577
|
+
remote: remote || undefined
|
|
14578
|
+
};
|
|
14579
|
+
} catch {
|
|
14580
|
+
return {
|
|
14581
|
+
commit: "unknown",
|
|
14582
|
+
branch: "unknown",
|
|
14583
|
+
dirty: false
|
|
14584
|
+
};
|
|
14585
|
+
}
|
|
14586
|
+
}
|
|
14587
|
+
function execGit(command, allowFailure = false) {
|
|
14588
|
+
try {
|
|
14589
|
+
return execSync(`git ${command}`, {
|
|
14590
|
+
encoding: "utf-8",
|
|
14591
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
14592
|
+
}).trim();
|
|
14593
|
+
} catch {
|
|
14594
|
+
if (allowFailure) {
|
|
14595
|
+
return "";
|
|
14596
|
+
}
|
|
14597
|
+
throw new Error(`Git command failed: ${command}`);
|
|
14598
|
+
}
|
|
14599
|
+
}
|
|
14600
|
+
|
|
14601
|
+
// src/artifacts/manifest.ts
|
|
14602
|
+
function createRunManifest(options) {
|
|
14603
|
+
const {
|
|
14289
14604
|
project,
|
|
14290
14605
|
config,
|
|
14291
14606
|
resolvedConfig,
|
|
@@ -14296,7 +14611,8 @@ function createRunManifest(options) {
|
|
|
14296
14611
|
runReason,
|
|
14297
14612
|
redaction
|
|
14298
14613
|
} = options;
|
|
14299
|
-
const
|
|
14614
|
+
const modelForCost = resolvedConfig?.model || config.model;
|
|
14615
|
+
const metrics = calculateMetrics(cases, modelForCost);
|
|
14300
14616
|
const git = getGitInfo();
|
|
14301
14617
|
const environment = getEnvironmentInfo();
|
|
14302
14618
|
return {
|
|
@@ -14320,7 +14636,7 @@ function createRunManifest(options) {
|
|
|
14320
14636
|
redaction
|
|
14321
14637
|
};
|
|
14322
14638
|
}
|
|
14323
|
-
function calculateMetrics(cases) {
|
|
14639
|
+
function calculateMetrics(cases, model) {
|
|
14324
14640
|
const passedCases = cases.filter((c) => c.ok);
|
|
14325
14641
|
const latencies = cases.map((c) => c.latencyMs).sort((a, b) => a - b);
|
|
14326
14642
|
const medianLatency = latencies.length > 0 ? latencies[Math.floor(latencies.length / 2)] : 0;
|
|
@@ -14328,6 +14644,21 @@ function calculateMetrics(cases) {
|
|
|
14328
14644
|
const p95Latency = latencies.length > 0 ? latencies[p95Index] : 0;
|
|
14329
14645
|
const totalPromptTokens = cases.reduce((sum, c) => sum + c.tokens.prompt, 0);
|
|
14330
14646
|
const totalCompletionTokens = cases.reduce((sum, c) => sum + c.tokens.completion, 0);
|
|
14647
|
+
let cost;
|
|
14648
|
+
if (model && (totalPromptTokens > 0 || totalCompletionTokens > 0)) {
|
|
14649
|
+
const costEstimate = estimateCost(totalPromptTokens, totalCompletionTokens, model);
|
|
14650
|
+
const pricing = getModelPricing(model);
|
|
14651
|
+
cost = {
|
|
14652
|
+
total_usd: costEstimate.totalUsd,
|
|
14653
|
+
prompt_cost_usd: costEstimate.promptCostUsd,
|
|
14654
|
+
completion_cost_usd: costEstimate.completionCostUsd,
|
|
14655
|
+
model: costEstimate.model,
|
|
14656
|
+
pricing: {
|
|
14657
|
+
prompt_per_1k: pricing.promptPer1K,
|
|
14658
|
+
completion_per_1k: pricing.completionPer1K
|
|
14659
|
+
}
|
|
14660
|
+
};
|
|
14661
|
+
}
|
|
14331
14662
|
return {
|
|
14332
14663
|
success_rate: cases.length > 0 ? passedCases.length / cases.length : 0,
|
|
14333
14664
|
total_cases: cases.length,
|
|
@@ -14337,7 +14668,8 @@ function calculateMetrics(cases) {
|
|
|
14337
14668
|
p95_latency_ms: p95Latency,
|
|
14338
14669
|
total_tokens: totalPromptTokens + totalCompletionTokens,
|
|
14339
14670
|
total_prompt_tokens: totalPromptTokens,
|
|
14340
|
-
total_completion_tokens: totalCompletionTokens
|
|
14671
|
+
total_completion_tokens: totalCompletionTokens,
|
|
14672
|
+
cost
|
|
14341
14673
|
};
|
|
14342
14674
|
}
|
|
14343
14675
|
function detectCIEnvironment() {
|
|
@@ -14508,6 +14840,16 @@ function getSuccessRate(manifest) {
|
|
|
14508
14840
|
}
|
|
14509
14841
|
return manifest.metrics.success_rate;
|
|
14510
14842
|
}
|
|
14843
|
+
function getEstimatedCost(manifest) {
|
|
14844
|
+
const type = getManifestType(manifest);
|
|
14845
|
+
if (type === "stress") {
|
|
14846
|
+
return manifest.metrics.cost?.estimated_total_usd;
|
|
14847
|
+
}
|
|
14848
|
+
if (type === "run") {
|
|
14849
|
+
return manifest.metrics.cost?.total_usd;
|
|
14850
|
+
}
|
|
14851
|
+
return;
|
|
14852
|
+
}
|
|
14511
14853
|
function getScenario(manifest) {
|
|
14512
14854
|
return manifest.config.scenario;
|
|
14513
14855
|
}
|
|
@@ -14577,13 +14919,17 @@ class LocalStorageAdapter {
|
|
|
14577
14919
|
if (options?.scenario && getScenario(manifest) !== options.scenario) {
|
|
14578
14920
|
continue;
|
|
14579
14921
|
}
|
|
14580
|
-
|
|
14922
|
+
const item = {
|
|
14581
14923
|
runId: manifest.run_id,
|
|
14582
14924
|
scenario: getScenario(manifest),
|
|
14583
14925
|
successRate: getSuccessRate(manifest),
|
|
14584
14926
|
createdAt: manifest.start_time,
|
|
14585
14927
|
type: manifestType
|
|
14586
|
-
}
|
|
14928
|
+
};
|
|
14929
|
+
if (options?.includeCost) {
|
|
14930
|
+
item.estimatedCostUsd = getEstimatedCost(manifest);
|
|
14931
|
+
}
|
|
14932
|
+
results.push(item);
|
|
14587
14933
|
} catch {}
|
|
14588
14934
|
}
|
|
14589
14935
|
}
|
|
@@ -16585,7 +16931,7 @@ class RealtimeChannel {
|
|
|
16585
16931
|
}).map((bind) => {
|
|
16586
16932
|
if (typeof handledPayload === "object" && "ids" in handledPayload) {
|
|
16587
16933
|
const postgresChanges = handledPayload.data;
|
|
16588
|
-
const { schema: schema2, table, commit_timestamp, type: type2, errors:
|
|
16934
|
+
const { schema: schema2, table, commit_timestamp, type: type2, errors: errors2 } = postgresChanges;
|
|
16589
16935
|
const enrichedPayload = {
|
|
16590
16936
|
schema: schema2,
|
|
16591
16937
|
table,
|
|
@@ -16593,7 +16939,7 @@ class RealtimeChannel {
|
|
|
16593
16939
|
eventType: type2,
|
|
16594
16940
|
new: {},
|
|
16595
16941
|
old: {},
|
|
16596
|
-
errors:
|
|
16942
|
+
errors: errors2
|
|
16597
16943
|
};
|
|
16598
16944
|
handledPayload = Object.assign(Object.assign({}, enrichedPayload), this._getPayloadRecords(postgresChanges));
|
|
16599
16945
|
}
|
|
@@ -22164,7 +22510,7 @@ class GoTrueClient {
|
|
|
22164
22510
|
}
|
|
22165
22511
|
});
|
|
22166
22512
|
}
|
|
22167
|
-
async unlinkIdentity(
|
|
22513
|
+
async unlinkIdentity(identity) {
|
|
22168
22514
|
try {
|
|
22169
22515
|
return await this._useSession(async (result) => {
|
|
22170
22516
|
var _a, _b;
|
|
@@ -22172,7 +22518,7 @@ class GoTrueClient {
|
|
|
22172
22518
|
if (error) {
|
|
22173
22519
|
throw error;
|
|
22174
22520
|
}
|
|
22175
|
-
return await _request(this.fetch, "DELETE", `${this.url}/user/identities/${
|
|
22521
|
+
return await _request(this.fetch, "DELETE", `${this.url}/user/identities/${identity.identity_id}`, {
|
|
22176
22522
|
headers: this.headers,
|
|
22177
22523
|
jwt: (_b = (_a = data.session) === null || _a === undefined ? undefined : _a.access_token) !== null && _b !== undefined ? _b : undefined
|
|
22178
22524
|
});
|
|
@@ -22346,20 +22692,20 @@ class GoTrueClient {
|
|
|
22346
22692
|
if (this.broadcastChannel && broadcast) {
|
|
22347
22693
|
this.broadcastChannel.postMessage({ event, session });
|
|
22348
22694
|
}
|
|
22349
|
-
const
|
|
22695
|
+
const errors2 = [];
|
|
22350
22696
|
const promises = Array.from(this.stateChangeEmitters.values()).map(async (x) => {
|
|
22351
22697
|
try {
|
|
22352
22698
|
await x.callback(event, session);
|
|
22353
22699
|
} catch (e) {
|
|
22354
|
-
|
|
22700
|
+
errors2.push(e);
|
|
22355
22701
|
}
|
|
22356
22702
|
});
|
|
22357
22703
|
await Promise.all(promises);
|
|
22358
|
-
if (
|
|
22359
|
-
for (let i = 0;i <
|
|
22360
|
-
console.error(
|
|
22704
|
+
if (errors2.length > 0) {
|
|
22705
|
+
for (let i = 0;i < errors2.length; i += 1) {
|
|
22706
|
+
console.error(errors2[i]);
|
|
22361
22707
|
}
|
|
22362
|
-
throw
|
|
22708
|
+
throw errors2[0];
|
|
22363
22709
|
}
|
|
22364
22710
|
} finally {
|
|
22365
22711
|
this._debug(debugName, "end");
|
|
@@ -24407,317 +24753,250 @@ class Logger {
|
|
|
24407
24753
|
}
|
|
24408
24754
|
}
|
|
24409
24755
|
var logger = new Logger("artemis");
|
|
24410
|
-
// src/
|
|
24411
|
-
var
|
|
24412
|
-
|
|
24413
|
-
|
|
24414
|
-
|
|
24415
|
-
|
|
24416
|
-
|
|
24417
|
-
},
|
|
24418
|
-
"gpt-5.1": {
|
|
24419
|
-
promptPer1K: 0.00125,
|
|
24420
|
-
completionPer1K: 0.01,
|
|
24421
|
-
lastUpdated: "2026-01"
|
|
24422
|
-
},
|
|
24423
|
-
"gpt-5.2": {
|
|
24424
|
-
promptPer1K: 0.00175,
|
|
24425
|
-
completionPer1K: 0.014,
|
|
24426
|
-
lastUpdated: "2026-01"
|
|
24427
|
-
},
|
|
24428
|
-
"gpt-5-mini": {
|
|
24429
|
-
promptPer1K: 0.00025,
|
|
24430
|
-
completionPer1K: 0.002,
|
|
24431
|
-
lastUpdated: "2026-01"
|
|
24432
|
-
},
|
|
24433
|
-
"gpt-5-nano": {
|
|
24434
|
-
promptPer1K: 0.00005,
|
|
24435
|
-
completionPer1K: 0.0004,
|
|
24436
|
-
lastUpdated: "2026-01"
|
|
24437
|
-
},
|
|
24438
|
-
"gpt-4.1": {
|
|
24439
|
-
promptPer1K: 0.002,
|
|
24440
|
-
completionPer1K: 0.008,
|
|
24441
|
-
lastUpdated: "2026-01",
|
|
24442
|
-
notes: "1M context window"
|
|
24443
|
-
},
|
|
24444
|
-
"gpt-4.1-mini": {
|
|
24445
|
-
promptPer1K: 0.0004,
|
|
24446
|
-
completionPer1K: 0.0016,
|
|
24447
|
-
lastUpdated: "2026-01"
|
|
24448
|
-
},
|
|
24449
|
-
"gpt-4.1-nano": {
|
|
24450
|
-
promptPer1K: 0.0001,
|
|
24451
|
-
completionPer1K: 0.0004,
|
|
24452
|
-
lastUpdated: "2026-01"
|
|
24453
|
-
},
|
|
24454
|
-
"gpt-4o": {
|
|
24455
|
-
promptPer1K: 0.0025,
|
|
24456
|
-
completionPer1K: 0.01,
|
|
24457
|
-
lastUpdated: "2026-01",
|
|
24458
|
-
notes: "128K context window"
|
|
24459
|
-
},
|
|
24460
|
-
"gpt-4o-mini": {
|
|
24461
|
-
promptPer1K: 0.00015,
|
|
24462
|
-
completionPer1K: 0.0006,
|
|
24463
|
-
lastUpdated: "2026-01",
|
|
24464
|
-
notes: "128K context window"
|
|
24465
|
-
},
|
|
24466
|
-
o1: {
|
|
24467
|
-
promptPer1K: 0.015,
|
|
24468
|
-
completionPer1K: 0.06,
|
|
24469
|
-
lastUpdated: "2026-01",
|
|
24470
|
-
notes: "Reasoning model - internal thinking tokens billed as output"
|
|
24471
|
-
},
|
|
24472
|
-
o3: {
|
|
24473
|
-
promptPer1K: 0.002,
|
|
24474
|
-
completionPer1K: 0.008,
|
|
24475
|
-
lastUpdated: "2026-01"
|
|
24476
|
-
},
|
|
24477
|
-
"o3-mini": {
|
|
24478
|
-
promptPer1K: 0.0011,
|
|
24479
|
-
completionPer1K: 0.0044,
|
|
24480
|
-
lastUpdated: "2026-01"
|
|
24481
|
-
},
|
|
24482
|
-
"o4-mini": {
|
|
24483
|
-
promptPer1K: 0.0011,
|
|
24484
|
-
completionPer1K: 0.0044,
|
|
24485
|
-
lastUpdated: "2026-01"
|
|
24486
|
-
},
|
|
24487
|
-
"gpt-4-turbo": {
|
|
24488
|
-
promptPer1K: 0.01,
|
|
24489
|
-
completionPer1K: 0.03,
|
|
24490
|
-
lastUpdated: "2026-01"
|
|
24491
|
-
},
|
|
24492
|
-
"gpt-4": {
|
|
24493
|
-
promptPer1K: 0.03,
|
|
24494
|
-
completionPer1K: 0.06,
|
|
24495
|
-
lastUpdated: "2026-01"
|
|
24496
|
-
},
|
|
24497
|
-
"gpt-3.5-turbo": {
|
|
24498
|
-
promptPer1K: 0.0005,
|
|
24499
|
-
completionPer1K: 0.0015,
|
|
24500
|
-
lastUpdated: "2026-01"
|
|
24501
|
-
},
|
|
24502
|
-
"claude-opus-4.5": {
|
|
24503
|
-
promptPer1K: 0.005,
|
|
24504
|
-
completionPer1K: 0.025,
|
|
24505
|
-
lastUpdated: "2026-01",
|
|
24506
|
-
notes: "Most capable Claude model"
|
|
24507
|
-
},
|
|
24508
|
-
"claude-sonnet-4.5": {
|
|
24509
|
-
promptPer1K: 0.003,
|
|
24510
|
-
completionPer1K: 0.015,
|
|
24511
|
-
lastUpdated: "2026-01",
|
|
24512
|
-
notes: "Balanced performance and cost"
|
|
24513
|
-
},
|
|
24514
|
-
"claude-haiku-4.5": {
|
|
24515
|
-
promptPer1K: 0.001,
|
|
24516
|
-
completionPer1K: 0.005,
|
|
24517
|
-
lastUpdated: "2026-01",
|
|
24518
|
-
notes: "Fastest Claude model"
|
|
24519
|
-
},
|
|
24520
|
-
"claude-opus-4": {
|
|
24521
|
-
promptPer1K: 0.015,
|
|
24522
|
-
completionPer1K: 0.075,
|
|
24523
|
-
lastUpdated: "2026-01"
|
|
24524
|
-
},
|
|
24525
|
-
"claude-opus-4.1": {
|
|
24526
|
-
promptPer1K: 0.015,
|
|
24527
|
-
completionPer1K: 0.075,
|
|
24528
|
-
lastUpdated: "2026-01"
|
|
24529
|
-
},
|
|
24530
|
-
"claude-sonnet-4": {
|
|
24531
|
-
promptPer1K: 0.003,
|
|
24532
|
-
completionPer1K: 0.015,
|
|
24533
|
-
lastUpdated: "2026-01"
|
|
24534
|
-
},
|
|
24535
|
-
"claude-sonnet-3.7": {
|
|
24536
|
-
promptPer1K: 0.003,
|
|
24537
|
-
completionPer1K: 0.015,
|
|
24538
|
-
lastUpdated: "2026-01"
|
|
24539
|
-
},
|
|
24540
|
-
"claude-3-7-sonnet": {
|
|
24541
|
-
promptPer1K: 0.003,
|
|
24542
|
-
completionPer1K: 0.015,
|
|
24543
|
-
lastUpdated: "2026-01"
|
|
24544
|
-
},
|
|
24545
|
-
"claude-3-5-sonnet-20241022": {
|
|
24546
|
-
promptPer1K: 0.003,
|
|
24547
|
-
completionPer1K: 0.015,
|
|
24548
|
-
lastUpdated: "2026-01"
|
|
24549
|
-
},
|
|
24550
|
-
"claude-3-5-haiku-20241022": {
|
|
24551
|
-
promptPer1K: 0.0008,
|
|
24552
|
-
completionPer1K: 0.004,
|
|
24553
|
-
lastUpdated: "2026-01"
|
|
24554
|
-
},
|
|
24555
|
-
"claude-haiku-3.5": {
|
|
24556
|
-
promptPer1K: 0.0008,
|
|
24557
|
-
completionPer1K: 0.004,
|
|
24558
|
-
lastUpdated: "2026-01"
|
|
24559
|
-
},
|
|
24560
|
-
"claude-3-opus": {
|
|
24561
|
-
promptPer1K: 0.015,
|
|
24562
|
-
completionPer1K: 0.075,
|
|
24563
|
-
lastUpdated: "2026-01"
|
|
24564
|
-
},
|
|
24565
|
-
"claude-3-sonnet": {
|
|
24566
|
-
promptPer1K: 0.003,
|
|
24567
|
-
completionPer1K: 0.015,
|
|
24568
|
-
lastUpdated: "2026-01"
|
|
24569
|
-
},
|
|
24570
|
-
"claude-3-haiku": {
|
|
24571
|
-
promptPer1K: 0.00025,
|
|
24572
|
-
completionPer1K: 0.00125,
|
|
24573
|
-
lastUpdated: "2026-01"
|
|
24574
|
-
},
|
|
24575
|
-
"claude-3.5-sonnet": {
|
|
24576
|
-
promptPer1K: 0.003,
|
|
24577
|
-
completionPer1K: 0.015,
|
|
24578
|
-
lastUpdated: "2026-01"
|
|
24579
|
-
},
|
|
24580
|
-
"claude-3.5-haiku": {
|
|
24581
|
-
promptPer1K: 0.0008,
|
|
24582
|
-
completionPer1K: 0.004,
|
|
24583
|
-
lastUpdated: "2026-01"
|
|
24756
|
+
// src/validator/validator.ts
|
|
24757
|
+
var import_yaml2 = __toESM(require_dist(), 1);
|
|
24758
|
+
import { readFileSync } from "fs";
|
|
24759
|
+
class ScenarioValidator {
|
|
24760
|
+
_options;
|
|
24761
|
+
constructor(options = {}) {
|
|
24762
|
+
this._options = options;
|
|
24584
24763
|
}
|
|
24585
|
-
|
|
24586
|
-
|
|
24587
|
-
promptPer1K: 0.003,
|
|
24588
|
-
completionPer1K: 0.015,
|
|
24589
|
-
lastUpdated: "2026-01",
|
|
24590
|
-
notes: "Default pricing - verify with provider"
|
|
24591
|
-
};
|
|
24592
|
-
function getModelPricing(model) {
|
|
24593
|
-
if (MODEL_PRICING[model]) {
|
|
24594
|
-
return MODEL_PRICING[model];
|
|
24764
|
+
get options() {
|
|
24765
|
+
return this._options;
|
|
24595
24766
|
}
|
|
24596
|
-
|
|
24597
|
-
|
|
24598
|
-
|
|
24599
|
-
|
|
24767
|
+
validate(filePath) {
|
|
24768
|
+
const errors4 = [];
|
|
24769
|
+
const warnings = [];
|
|
24770
|
+
let content;
|
|
24771
|
+
try {
|
|
24772
|
+
content = readFileSync(filePath, "utf-8");
|
|
24773
|
+
} catch (err) {
|
|
24774
|
+
const error = err;
|
|
24775
|
+
errors4.push({
|
|
24776
|
+
line: 1,
|
|
24777
|
+
message: `Failed to read file: ${error.message}`,
|
|
24778
|
+
rule: "file-read",
|
|
24779
|
+
severity: "error"
|
|
24780
|
+
});
|
|
24781
|
+
return { file: filePath, valid: false, errors: errors4, warnings };
|
|
24600
24782
|
}
|
|
24783
|
+
let parsed;
|
|
24784
|
+
try {
|
|
24785
|
+
parsed = import_yaml2.default.parse(content, {
|
|
24786
|
+
prettyErrors: true,
|
|
24787
|
+
strict: true
|
|
24788
|
+
});
|
|
24789
|
+
} catch (err) {
|
|
24790
|
+
if (err instanceof import_yaml2.default.YAMLError) {
|
|
24791
|
+
const linePos = err.linePos?.[0];
|
|
24792
|
+
errors4.push({
|
|
24793
|
+
line: linePos?.line || 1,
|
|
24794
|
+
column: linePos?.col,
|
|
24795
|
+
message: `Invalid YAML syntax: ${err.message}`,
|
|
24796
|
+
rule: "yaml-syntax",
|
|
24797
|
+
severity: "error"
|
|
24798
|
+
});
|
|
24799
|
+
} else {
|
|
24800
|
+
errors4.push({
|
|
24801
|
+
line: 1,
|
|
24802
|
+
message: `YAML parse error: ${err.message}`,
|
|
24803
|
+
rule: "yaml-syntax",
|
|
24804
|
+
severity: "error"
|
|
24805
|
+
});
|
|
24806
|
+
}
|
|
24807
|
+
return { file: filePath, valid: false, errors: errors4, warnings };
|
|
24808
|
+
}
|
|
24809
|
+
if (parsed === null || typeof parsed !== "object") {
|
|
24810
|
+
errors4.push({
|
|
24811
|
+
line: 1,
|
|
24812
|
+
message: "Scenario must be a YAML object",
|
|
24813
|
+
rule: "schema-type",
|
|
24814
|
+
severity: "error"
|
|
24815
|
+
});
|
|
24816
|
+
return { file: filePath, valid: false, errors: errors4, warnings };
|
|
24817
|
+
}
|
|
24818
|
+
const schemaResult = ScenarioSchema.safeParse(parsed);
|
|
24819
|
+
if (!schemaResult.success) {
|
|
24820
|
+
const zodErrors = this.formatZodErrors(schemaResult.error, content);
|
|
24821
|
+
errors4.push(...zodErrors);
|
|
24822
|
+
}
|
|
24823
|
+
if (schemaResult.success) {
|
|
24824
|
+
const semanticErrors = this.validateSemantics(schemaResult.data, content);
|
|
24825
|
+
errors4.push(...semanticErrors);
|
|
24826
|
+
}
|
|
24827
|
+
const detectedWarnings = this.detectWarnings(parsed, content);
|
|
24828
|
+
warnings.push(...detectedWarnings);
|
|
24829
|
+
return {
|
|
24830
|
+
file: filePath,
|
|
24831
|
+
valid: errors4.length === 0,
|
|
24832
|
+
errors: errors4,
|
|
24833
|
+
warnings
|
|
24834
|
+
};
|
|
24601
24835
|
}
|
|
24602
|
-
|
|
24603
|
-
|
|
24604
|
-
|
|
24605
|
-
|
|
24606
|
-
|
|
24607
|
-
|
|
24608
|
-
|
|
24609
|
-
|
|
24610
|
-
|
|
24611
|
-
|
|
24612
|
-
|
|
24613
|
-
|
|
24614
|
-
|
|
24615
|
-
|
|
24616
|
-
|
|
24617
|
-
|
|
24618
|
-
|
|
24619
|
-
|
|
24620
|
-
|
|
24621
|
-
|
|
24622
|
-
|
|
24623
|
-
|
|
24624
|
-
|
|
24625
|
-
|
|
24626
|
-
|
|
24627
|
-
|
|
24628
|
-
|
|
24629
|
-
|
|
24630
|
-
|
|
24631
|
-
|
|
24632
|
-
|
|
24633
|
-
|
|
24634
|
-
|
|
24635
|
-
|
|
24636
|
-
|
|
24637
|
-
|
|
24638
|
-
if (lowerModel.includes("o3")) {
|
|
24639
|
-
return MODEL_PRICING.o3;
|
|
24640
|
-
}
|
|
24641
|
-
if (lowerModel.includes("o1")) {
|
|
24642
|
-
return MODEL_PRICING.o1;
|
|
24643
|
-
}
|
|
24644
|
-
if (lowerModel.includes("gpt-4-turbo")) {
|
|
24645
|
-
return MODEL_PRICING["gpt-4-turbo"];
|
|
24646
|
-
}
|
|
24647
|
-
if (lowerModel.includes("gpt-4")) {
|
|
24648
|
-
return MODEL_PRICING["gpt-4"];
|
|
24649
|
-
}
|
|
24650
|
-
if (lowerModel.includes("gpt-3.5")) {
|
|
24651
|
-
return MODEL_PRICING["gpt-3.5-turbo"];
|
|
24652
|
-
}
|
|
24653
|
-
if (lowerModel.includes("opus-4.5") || lowerModel.includes("opus-4-5")) {
|
|
24654
|
-
return MODEL_PRICING["claude-opus-4.5"];
|
|
24655
|
-
}
|
|
24656
|
-
if (lowerModel.includes("sonnet-4.5") || lowerModel.includes("sonnet-4-5")) {
|
|
24657
|
-
return MODEL_PRICING["claude-sonnet-4.5"];
|
|
24658
|
-
}
|
|
24659
|
-
if (lowerModel.includes("haiku-4.5") || lowerModel.includes("haiku-4-5")) {
|
|
24660
|
-
return MODEL_PRICING["claude-haiku-4.5"];
|
|
24661
|
-
}
|
|
24662
|
-
if (lowerModel.includes("opus-4.1") || lowerModel.includes("opus-4-1")) {
|
|
24663
|
-
return MODEL_PRICING["claude-opus-4.1"];
|
|
24664
|
-
}
|
|
24665
|
-
if (lowerModel.includes("opus-4")) {
|
|
24666
|
-
return MODEL_PRICING["claude-opus-4"];
|
|
24667
|
-
}
|
|
24668
|
-
if (lowerModel.includes("sonnet-4")) {
|
|
24669
|
-
return MODEL_PRICING["claude-sonnet-4"];
|
|
24670
|
-
}
|
|
24671
|
-
if (lowerModel.includes("sonnet-3.7") || lowerModel.includes("sonnet-3-7")) {
|
|
24672
|
-
return MODEL_PRICING["claude-sonnet-3.7"];
|
|
24673
|
-
}
|
|
24674
|
-
if (lowerModel.includes("claude-3-5-sonnet") || lowerModel.includes("claude-3.5-sonnet")) {
|
|
24675
|
-
return MODEL_PRICING["claude-3.5-sonnet"];
|
|
24836
|
+
formatZodErrors(error, content) {
|
|
24837
|
+
const issues = [];
|
|
24838
|
+
const lines = content.split(`
|
|
24839
|
+
`);
|
|
24840
|
+
for (const issue of error.issues) {
|
|
24841
|
+
const path = issue.path.join(".");
|
|
24842
|
+
const line = this.findLineForPath(lines, issue.path);
|
|
24843
|
+
let message;
|
|
24844
|
+
switch (issue.code) {
|
|
24845
|
+
case "invalid_type":
|
|
24846
|
+
message = `'${path}' expected ${issue.expected}, received ${issue.received}`;
|
|
24847
|
+
break;
|
|
24848
|
+
case "invalid_enum_value":
|
|
24849
|
+
message = `'${path}' must be one of: ${issue.options.join(", ")}`;
|
|
24850
|
+
break;
|
|
24851
|
+
case "too_small":
|
|
24852
|
+
if (issue.type === "array") {
|
|
24853
|
+
message = `'${path}' must have at least ${issue.minimum} item(s)`;
|
|
24854
|
+
} else {
|
|
24855
|
+
message = `'${path}' is too small`;
|
|
24856
|
+
}
|
|
24857
|
+
break;
|
|
24858
|
+
case "unrecognized_keys":
|
|
24859
|
+
message = `Unrecognized field(s): ${issue.keys.join(", ")}`;
|
|
24860
|
+
break;
|
|
24861
|
+
default:
|
|
24862
|
+
message = issue.message;
|
|
24863
|
+
}
|
|
24864
|
+
issues.push({
|
|
24865
|
+
line,
|
|
24866
|
+
message,
|
|
24867
|
+
rule: `schema-${issue.code}`,
|
|
24868
|
+
severity: "error"
|
|
24869
|
+
});
|
|
24870
|
+
}
|
|
24871
|
+
return issues;
|
|
24676
24872
|
}
|
|
24677
|
-
|
|
24678
|
-
|
|
24873
|
+
findLineForPath(lines, path) {
|
|
24874
|
+
if (path.length === 0)
|
|
24875
|
+
return 1;
|
|
24876
|
+
const searchKey = String(path[path.length - 1]);
|
|
24877
|
+
for (let i2 = 0;i2 < lines.length; i2++) {
|
|
24878
|
+
const line = lines[i2];
|
|
24879
|
+
if (line.includes(`${searchKey}:`) || line.includes(`- ${searchKey}:`)) {
|
|
24880
|
+
return i2 + 1;
|
|
24881
|
+
}
|
|
24882
|
+
if (typeof path[path.length - 1] === "number" && path.includes("cases")) {
|
|
24883
|
+
if (line.trim().startsWith("- id:")) {
|
|
24884
|
+
return i2 + 1;
|
|
24885
|
+
}
|
|
24886
|
+
}
|
|
24887
|
+
}
|
|
24888
|
+
return 1;
|
|
24679
24889
|
}
|
|
24680
|
-
|
|
24681
|
-
|
|
24890
|
+
validateSemantics(scenario, content) {
|
|
24891
|
+
const errors4 = [];
|
|
24892
|
+
const lines = content.split(`
|
|
24893
|
+
`);
|
|
24894
|
+
const caseIds = new Set;
|
|
24895
|
+
for (const testCase of scenario.cases) {
|
|
24896
|
+
if (caseIds.has(testCase.id)) {
|
|
24897
|
+
const line = this.findLineForCaseId(lines, testCase.id);
|
|
24898
|
+
errors4.push({
|
|
24899
|
+
line,
|
|
24900
|
+
message: `Duplicate case ID: '${testCase.id}'`,
|
|
24901
|
+
rule: "duplicate-case-id",
|
|
24902
|
+
severity: "error"
|
|
24903
|
+
});
|
|
24904
|
+
}
|
|
24905
|
+
caseIds.add(testCase.id);
|
|
24906
|
+
}
|
|
24907
|
+
const globalVars = scenario.variables || {};
|
|
24908
|
+
for (const testCase of scenario.cases) {
|
|
24909
|
+
const caseVars = testCase.variables || {};
|
|
24910
|
+
const allVars = { ...globalVars, ...caseVars };
|
|
24911
|
+
const prompt2 = typeof testCase.prompt === "string" ? testCase.prompt : JSON.stringify(testCase.prompt);
|
|
24912
|
+
const refs = this.extractVariableRefs(prompt2);
|
|
24913
|
+
for (const ref of refs) {
|
|
24914
|
+
if (!(ref in allVars)) {
|
|
24915
|
+
const line = this.findLineForCaseId(lines, testCase.id);
|
|
24916
|
+
errors4.push({
|
|
24917
|
+
line,
|
|
24918
|
+
message: `Undefined variable '{{${ref}}}' in case '${testCase.id}'`,
|
|
24919
|
+
rule: "undefined-variable",
|
|
24920
|
+
severity: "error",
|
|
24921
|
+
suggestion: `Define '${ref}' in scenario.variables or case.variables`
|
|
24922
|
+
});
|
|
24923
|
+
}
|
|
24924
|
+
}
|
|
24925
|
+
}
|
|
24926
|
+
return errors4;
|
|
24682
24927
|
}
|
|
24683
|
-
|
|
24684
|
-
|
|
24928
|
+
findLineForCaseId(lines, caseId) {
|
|
24929
|
+
for (let i2 = 0;i2 < lines.length; i2++) {
|
|
24930
|
+
if (lines[i2].includes(`id: ${caseId}`) || lines[i2].includes(`id: "${caseId}"`) || lines[i2].includes(`id: '${caseId}'`)) {
|
|
24931
|
+
return i2 + 1;
|
|
24932
|
+
}
|
|
24933
|
+
}
|
|
24934
|
+
return 1;
|
|
24685
24935
|
}
|
|
24686
|
-
|
|
24687
|
-
|
|
24936
|
+
extractVariableRefs(text) {
|
|
24937
|
+
const regex2 = /\{\{(\w+)\}\}/g;
|
|
24938
|
+
const refs = [];
|
|
24939
|
+
const matches = text.matchAll(regex2);
|
|
24940
|
+
for (const match of matches) {
|
|
24941
|
+
refs.push(match[1]);
|
|
24942
|
+
}
|
|
24943
|
+
return refs;
|
|
24688
24944
|
}
|
|
24689
|
-
|
|
24690
|
-
|
|
24945
|
+
detectWarnings(parsed, content) {
|
|
24946
|
+
const warnings = [];
|
|
24947
|
+
const lines = content.split(`
|
|
24948
|
+
`);
|
|
24949
|
+
if (parsed && typeof parsed === "object") {
|
|
24950
|
+
const obj = parsed;
|
|
24951
|
+
if (this.hasDeepKey(obj, "criteria")) {
|
|
24952
|
+
const line = this.findLineForKey(lines, "criteria");
|
|
24953
|
+
warnings.push({
|
|
24954
|
+
line,
|
|
24955
|
+
message: "'criteria' is deprecated, use 'rubric' instead (llm_grader)",
|
|
24956
|
+
rule: "deprecated-field",
|
|
24957
|
+
severity: "warning",
|
|
24958
|
+
suggestion: "Replace 'criteria' with 'rubric'"
|
|
24959
|
+
});
|
|
24960
|
+
}
|
|
24961
|
+
const cases = obj.cases;
|
|
24962
|
+
if (Array.isArray(cases) && cases.length > 20) {
|
|
24963
|
+
warnings.push({
|
|
24964
|
+
line: 1,
|
|
24965
|
+
message: `Scenario has ${cases.length} cases. Consider using --parallel for faster execution.`,
|
|
24966
|
+
rule: "performance-hint",
|
|
24967
|
+
severity: "warning"
|
|
24968
|
+
});
|
|
24969
|
+
}
|
|
24970
|
+
if (!obj.description) {
|
|
24971
|
+
warnings.push({
|
|
24972
|
+
line: 1,
|
|
24973
|
+
message: "Scenario is missing 'description' field. Adding a description improves documentation.",
|
|
24974
|
+
rule: "missing-description",
|
|
24975
|
+
severity: "warning"
|
|
24976
|
+
});
|
|
24977
|
+
}
|
|
24978
|
+
}
|
|
24979
|
+
return warnings;
|
|
24691
24980
|
}
|
|
24692
|
-
|
|
24693
|
-
|
|
24694
|
-
|
|
24695
|
-
|
|
24696
|
-
|
|
24697
|
-
|
|
24698
|
-
|
|
24699
|
-
|
|
24700
|
-
|
|
24701
|
-
|
|
24702
|
-
completionCostUsd,
|
|
24703
|
-
model,
|
|
24704
|
-
pricing
|
|
24705
|
-
};
|
|
24706
|
-
}
|
|
24707
|
-
function formatCost(costUsd) {
|
|
24708
|
-
if (costUsd < 0.01) {
|
|
24709
|
-
return `$${(costUsd * 100).toFixed(4)} cents`;
|
|
24981
|
+
hasDeepKey(obj, key) {
|
|
24982
|
+
if (obj === null || typeof obj !== "object")
|
|
24983
|
+
return false;
|
|
24984
|
+
if (key in obj)
|
|
24985
|
+
return true;
|
|
24986
|
+
for (const value of Object.values(obj)) {
|
|
24987
|
+
if (this.hasDeepKey(value, key))
|
|
24988
|
+
return true;
|
|
24989
|
+
}
|
|
24990
|
+
return false;
|
|
24710
24991
|
}
|
|
24711
|
-
|
|
24712
|
-
|
|
24992
|
+
findLineForKey(lines, key) {
|
|
24993
|
+
for (let i2 = 0;i2 < lines.length; i2++) {
|
|
24994
|
+
if (lines[i2].includes(`${key}:`)) {
|
|
24995
|
+
return i2 + 1;
|
|
24996
|
+
}
|
|
24997
|
+
}
|
|
24998
|
+
return 1;
|
|
24713
24999
|
}
|
|
24714
|
-
return `$${costUsd.toFixed(2)}`;
|
|
24715
|
-
}
|
|
24716
|
-
function listKnownModels() {
|
|
24717
|
-
return Object.entries(MODEL_PRICING).map(([model, pricing]) => ({
|
|
24718
|
-
model,
|
|
24719
|
-
pricing
|
|
24720
|
-
}));
|
|
24721
25000
|
}
|
|
24722
25001
|
export {
|
|
24723
25002
|
wrapError,
|
|
@@ -24766,6 +25045,7 @@ export {
|
|
|
24766
25045
|
TestCaseSchema,
|
|
24767
25046
|
SupabaseStorageAdapter,
|
|
24768
25047
|
SimilarityEvaluator,
|
|
25048
|
+
ScenarioValidator,
|
|
24769
25049
|
ScenarioSchema,
|
|
24770
25050
|
SUPPORTED_EXPRESSIONS,
|
|
24771
25051
|
RegexEvaluator,
|