@artemiskit/core 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +164 -0
- package/adapters/openai/dist/index.js +5626 -0
- package/dist/adapters/registry.d.ts.map +1 -1
- package/dist/adapters/types.d.ts +32 -2
- package/dist/adapters/types.d.ts.map +1 -1
- package/dist/artifacts/types.d.ts +12 -0
- package/dist/artifacts/types.d.ts.map +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +762 -63
- package/dist/scenario/schema.d.ts +116 -84
- package/dist/scenario/schema.d.ts.map +1 -1
- package/dist/storage/supabase.d.ts +25 -4
- package/dist/storage/supabase.d.ts.map +1 -1
- package/dist/storage/types.d.ts +162 -0
- package/dist/storage/types.d.ts.map +1 -1
- package/dist/validator/index.d.ts +6 -0
- package/dist/validator/index.d.ts.map +1 -0
- package/dist/validator/types.d.ts +58 -0
- package/dist/validator/types.d.ts.map +1 -0
- package/dist/validator/validator.d.ts +55 -0
- package/dist/validator/validator.d.ts.map +1 -0
- package/package.json +1 -1
- package/src/adapters/registry.ts +38 -0
- package/src/adapters/types.ts +38 -0
- package/src/artifacts/types.ts +16 -0
- package/src/index.ts +3 -0
- package/src/scenario/schema.ts +10 -0
- package/src/storage/supabase.test.ts +988 -0
- package/src/storage/supabase.ts +599 -5
- package/src/storage/types.ts +196 -0
- package/src/validator/index.ts +6 -0
- package/src/validator/types.ts +62 -0
- package/src/validator/validator.ts +345 -0
package/dist/index.js
CHANGED
|
@@ -4,25 +4,43 @@ var __getProtoOf = Object.getPrototypeOf;
|
|
|
4
4
|
var __defProp = Object.defineProperty;
|
|
5
5
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
6
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
+
function __accessProp(key) {
|
|
8
|
+
return this[key];
|
|
9
|
+
}
|
|
10
|
+
var __toESMCache_node;
|
|
11
|
+
var __toESMCache_esm;
|
|
7
12
|
var __toESM = (mod, isNodeMode, target) => {
|
|
13
|
+
var canCache = mod != null && typeof mod === "object";
|
|
14
|
+
if (canCache) {
|
|
15
|
+
var cache = isNodeMode ? __toESMCache_node ??= new WeakMap : __toESMCache_esm ??= new WeakMap;
|
|
16
|
+
var cached = cache.get(mod);
|
|
17
|
+
if (cached)
|
|
18
|
+
return cached;
|
|
19
|
+
}
|
|
8
20
|
target = mod != null ? __create(__getProtoOf(mod)) : {};
|
|
9
21
|
const to = isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target;
|
|
10
22
|
for (let key of __getOwnPropNames(mod))
|
|
11
23
|
if (!__hasOwnProp.call(to, key))
|
|
12
24
|
__defProp(to, key, {
|
|
13
|
-
get: (
|
|
25
|
+
get: __accessProp.bind(mod, key),
|
|
14
26
|
enumerable: true
|
|
15
27
|
});
|
|
28
|
+
if (canCache)
|
|
29
|
+
cache.set(mod, to);
|
|
16
30
|
return to;
|
|
17
31
|
};
|
|
18
32
|
var __commonJS = (cb, mod) => () => (mod || cb((mod = { exports: {} }).exports, mod), mod.exports);
|
|
33
|
+
var __returnValue = (v) => v;
|
|
34
|
+
function __exportSetter(name, newValue) {
|
|
35
|
+
this[name] = __returnValue.bind(null, newValue);
|
|
36
|
+
}
|
|
19
37
|
var __export = (target, all) => {
|
|
20
38
|
for (var name in all)
|
|
21
39
|
__defProp(target, name, {
|
|
22
40
|
get: all[name],
|
|
23
41
|
enumerable: true,
|
|
24
42
|
configurable: true,
|
|
25
|
-
set: (
|
|
43
|
+
set: __exportSetter.bind(all, name)
|
|
26
44
|
});
|
|
27
45
|
};
|
|
28
46
|
var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
|
|
@@ -10896,6 +10914,55 @@ var require_public_api = __commonJS((exports) => {
|
|
|
10896
10914
|
exports.stringify = stringify;
|
|
10897
10915
|
});
|
|
10898
10916
|
|
|
10917
|
+
// ../../node_modules/.bun/yaml@2.8.2/node_modules/yaml/dist/index.js
|
|
10918
|
+
var require_dist = __commonJS((exports) => {
|
|
10919
|
+
var composer = require_composer();
|
|
10920
|
+
var Document = require_Document();
|
|
10921
|
+
var Schema = require_Schema();
|
|
10922
|
+
var errors2 = require_errors();
|
|
10923
|
+
var Alias = require_Alias();
|
|
10924
|
+
var identity = require_identity();
|
|
10925
|
+
var Pair = require_Pair();
|
|
10926
|
+
var Scalar = require_Scalar();
|
|
10927
|
+
var YAMLMap = require_YAMLMap();
|
|
10928
|
+
var YAMLSeq = require_YAMLSeq();
|
|
10929
|
+
var cst = require_cst();
|
|
10930
|
+
var lexer = require_lexer();
|
|
10931
|
+
var lineCounter = require_line_counter();
|
|
10932
|
+
var parser = require_parser();
|
|
10933
|
+
var publicApi = require_public_api();
|
|
10934
|
+
var visit = require_visit();
|
|
10935
|
+
exports.Composer = composer.Composer;
|
|
10936
|
+
exports.Document = Document.Document;
|
|
10937
|
+
exports.Schema = Schema.Schema;
|
|
10938
|
+
exports.YAMLError = errors2.YAMLError;
|
|
10939
|
+
exports.YAMLParseError = errors2.YAMLParseError;
|
|
10940
|
+
exports.YAMLWarning = errors2.YAMLWarning;
|
|
10941
|
+
exports.Alias = Alias.Alias;
|
|
10942
|
+
exports.isAlias = identity.isAlias;
|
|
10943
|
+
exports.isCollection = identity.isCollection;
|
|
10944
|
+
exports.isDocument = identity.isDocument;
|
|
10945
|
+
exports.isMap = identity.isMap;
|
|
10946
|
+
exports.isNode = identity.isNode;
|
|
10947
|
+
exports.isPair = identity.isPair;
|
|
10948
|
+
exports.isScalar = identity.isScalar;
|
|
10949
|
+
exports.isSeq = identity.isSeq;
|
|
10950
|
+
exports.Pair = Pair.Pair;
|
|
10951
|
+
exports.Scalar = Scalar.Scalar;
|
|
10952
|
+
exports.YAMLMap = YAMLMap.YAMLMap;
|
|
10953
|
+
exports.YAMLSeq = YAMLSeq.YAMLSeq;
|
|
10954
|
+
exports.CST = cst;
|
|
10955
|
+
exports.Lexer = lexer.Lexer;
|
|
10956
|
+
exports.LineCounter = lineCounter.LineCounter;
|
|
10957
|
+
exports.Parser = parser.Parser;
|
|
10958
|
+
exports.parse = publicApi.parse;
|
|
10959
|
+
exports.parseAllDocuments = publicApi.parseAllDocuments;
|
|
10960
|
+
exports.parseDocument = publicApi.parseDocument;
|
|
10961
|
+
exports.stringify = publicApi.stringify;
|
|
10962
|
+
exports.visit = visit.visit;
|
|
10963
|
+
exports.visitAsync = visit.visitAsync;
|
|
10964
|
+
});
|
|
10965
|
+
|
|
10899
10966
|
// src/evaluators/combined.ts
|
|
10900
10967
|
async function getEvaluatorForType(type) {
|
|
10901
10968
|
const { getEvaluator } = await Promise.resolve().then(() => (init_evaluators(), exports_evaluators));
|
|
@@ -13401,6 +13468,22 @@ async function registerBuiltInAdapters() {
|
|
|
13401
13468
|
const mod = await tryImport("@artemiskit/adapter-anthropic");
|
|
13402
13469
|
return new mod.AnthropicAdapter(config);
|
|
13403
13470
|
});
|
|
13471
|
+
adapterRegistry.register("langchain", async (config) => {
|
|
13472
|
+
const mod = await tryImport("@artemiskit/adapter-langchain");
|
|
13473
|
+
const runnable = config.metadata?.runnable;
|
|
13474
|
+
if (!runnable) {
|
|
13475
|
+
throw new Error("LangChain adapter requires a runnable instance. Pass it via config.metadata.runnable or use createLangChainAdapter() directly.");
|
|
13476
|
+
}
|
|
13477
|
+
return new mod.LangChainAdapter(config, runnable);
|
|
13478
|
+
});
|
|
13479
|
+
adapterRegistry.register("deepagents", async (config) => {
|
|
13480
|
+
const mod = await tryImport("@artemiskit/adapter-deepagents");
|
|
13481
|
+
const system = config.metadata?.system;
|
|
13482
|
+
if (!system) {
|
|
13483
|
+
throw new Error("DeepAgents adapter requires a system instance. Pass it via config.metadata.system or use createDeepAgentsAdapter() directly.");
|
|
13484
|
+
}
|
|
13485
|
+
return new mod.DeepAgentsAdapter(config, system);
|
|
13486
|
+
});
|
|
13404
13487
|
adapterRegistry.markUnavailable("google", "Google adapter coming in v0.3.0");
|
|
13405
13488
|
adapterRegistry.markUnavailable("mistral", "Mistral adapter coming in v0.3.0");
|
|
13406
13489
|
adapterRegistry.markUnavailable("ollama", "Ollama adapter coming in v0.3.0");
|
|
@@ -13494,6 +13577,8 @@ var ProviderSchema = exports_external.enum([
|
|
|
13494
13577
|
"cohere",
|
|
13495
13578
|
"huggingface",
|
|
13496
13579
|
"ollama",
|
|
13580
|
+
"langchain",
|
|
13581
|
+
"deepagents",
|
|
13497
13582
|
"custom"
|
|
13498
13583
|
]);
|
|
13499
13584
|
var ProviderConfigSchema = exports_external.object({
|
|
@@ -13508,7 +13593,11 @@ var ProviderConfigSchema = exports_external.object({
|
|
|
13508
13593
|
apiVersion: exports_external.string().optional(),
|
|
13509
13594
|
embeddingDeploymentName: exports_external.string().optional(),
|
|
13510
13595
|
modelFamily: exports_external.string().optional(),
|
|
13511
|
-
underlyingProvider: exports_external.enum(["openai", "azure", "anthropic", "google", "mistral"]).optional()
|
|
13596
|
+
underlyingProvider: exports_external.enum(["openai", "azure", "anthropic", "google", "mistral"]).optional(),
|
|
13597
|
+
name: exports_external.string().optional(),
|
|
13598
|
+
runnableType: exports_external.enum(["chain", "agent", "llm", "runnable"]).optional(),
|
|
13599
|
+
captureTraces: exports_external.boolean().optional(),
|
|
13600
|
+
captureMessages: exports_external.boolean().optional()
|
|
13512
13601
|
}).optional();
|
|
13513
13602
|
var BaseExpectedSchema = exports_external.discriminatedUnion("type", [
|
|
13514
13603
|
exports_external.object({
|
|
@@ -13616,55 +13705,8 @@ var ScenarioSchema = exports_external.object({
|
|
|
13616
13705
|
}).optional()
|
|
13617
13706
|
});
|
|
13618
13707
|
// src/scenario/parser.ts
|
|
13708
|
+
var import_yaml = __toESM(require_dist(), 1);
|
|
13619
13709
|
import { readFile } from "fs/promises";
|
|
13620
|
-
|
|
13621
|
-
// ../../node_modules/.bun/yaml@2.8.2/node_modules/yaml/dist/index.js
|
|
13622
|
-
var composer = require_composer();
|
|
13623
|
-
var Document = require_Document();
|
|
13624
|
-
var Schema = require_Schema();
|
|
13625
|
-
var errors2 = require_errors();
|
|
13626
|
-
var Alias = require_Alias();
|
|
13627
|
-
var identity = require_identity();
|
|
13628
|
-
var Pair = require_Pair();
|
|
13629
|
-
var Scalar = require_Scalar();
|
|
13630
|
-
var YAMLMap = require_YAMLMap();
|
|
13631
|
-
var YAMLSeq = require_YAMLSeq();
|
|
13632
|
-
var cst = require_cst();
|
|
13633
|
-
var lexer = require_lexer();
|
|
13634
|
-
var lineCounter = require_line_counter();
|
|
13635
|
-
var parser = require_parser();
|
|
13636
|
-
var publicApi = require_public_api();
|
|
13637
|
-
var visit = require_visit();
|
|
13638
|
-
var $Composer = composer.Composer;
|
|
13639
|
-
var $Document = Document.Document;
|
|
13640
|
-
var $Schema = Schema.Schema;
|
|
13641
|
-
var $YAMLError = errors2.YAMLError;
|
|
13642
|
-
var $YAMLParseError = errors2.YAMLParseError;
|
|
13643
|
-
var $YAMLWarning = errors2.YAMLWarning;
|
|
13644
|
-
var $Alias = Alias.Alias;
|
|
13645
|
-
var $isAlias = identity.isAlias;
|
|
13646
|
-
var $isCollection = identity.isCollection;
|
|
13647
|
-
var $isDocument = identity.isDocument;
|
|
13648
|
-
var $isMap = identity.isMap;
|
|
13649
|
-
var $isNode = identity.isNode;
|
|
13650
|
-
var $isPair = identity.isPair;
|
|
13651
|
-
var $isScalar = identity.isScalar;
|
|
13652
|
-
var $isSeq = identity.isSeq;
|
|
13653
|
-
var $Pair = Pair.Pair;
|
|
13654
|
-
var $Scalar = Scalar.Scalar;
|
|
13655
|
-
var $YAMLMap = YAMLMap.YAMLMap;
|
|
13656
|
-
var $YAMLSeq = YAMLSeq.YAMLSeq;
|
|
13657
|
-
var $Lexer = lexer.Lexer;
|
|
13658
|
-
var $LineCounter = lineCounter.LineCounter;
|
|
13659
|
-
var $Parser = parser.Parser;
|
|
13660
|
-
var $parse = publicApi.parse;
|
|
13661
|
-
var $parseAllDocuments = publicApi.parseAllDocuments;
|
|
13662
|
-
var $parseDocument = publicApi.parseDocument;
|
|
13663
|
-
var $stringify = publicApi.stringify;
|
|
13664
|
-
var $visit = visit.visit;
|
|
13665
|
-
var $visitAsync = visit.visitAsync;
|
|
13666
|
-
|
|
13667
|
-
// src/scenario/parser.ts
|
|
13668
13710
|
function expandEnvVars(obj) {
|
|
13669
13711
|
if (typeof obj === "string") {
|
|
13670
13712
|
return obj.replace(/\$\{([^}]+)\}/g, (_, expr) => {
|
|
@@ -13699,7 +13741,7 @@ async function parseScenarioFile(filePath) {
|
|
|
13699
13741
|
}
|
|
13700
13742
|
function parseScenarioString(content, source) {
|
|
13701
13743
|
try {
|
|
13702
|
-
const raw =
|
|
13744
|
+
const raw = import_yaml.parse(content);
|
|
13703
13745
|
const expanded = expandEnvVars(raw);
|
|
13704
13746
|
const result = ScenarioSchema.safeParse(expanded);
|
|
13705
13747
|
if (!result.success) {
|
|
@@ -16929,7 +16971,7 @@ class RealtimeChannel {
|
|
|
16929
16971
|
}).map((bind) => {
|
|
16930
16972
|
if (typeof handledPayload === "object" && "ids" in handledPayload) {
|
|
16931
16973
|
const postgresChanges = handledPayload.data;
|
|
16932
|
-
const { schema: schema2, table, commit_timestamp, type: type2, errors:
|
|
16974
|
+
const { schema: schema2, table, commit_timestamp, type: type2, errors: errors2 } = postgresChanges;
|
|
16933
16975
|
const enrichedPayload = {
|
|
16934
16976
|
schema: schema2,
|
|
16935
16977
|
table,
|
|
@@ -16937,7 +16979,7 @@ class RealtimeChannel {
|
|
|
16937
16979
|
eventType: type2,
|
|
16938
16980
|
new: {},
|
|
16939
16981
|
old: {},
|
|
16940
|
-
errors:
|
|
16982
|
+
errors: errors2
|
|
16941
16983
|
};
|
|
16942
16984
|
handledPayload = Object.assign(Object.assign({}, enrichedPayload), this._getPayloadRecords(postgresChanges));
|
|
16943
16985
|
}
|
|
@@ -22508,7 +22550,7 @@ class GoTrueClient {
|
|
|
22508
22550
|
}
|
|
22509
22551
|
});
|
|
22510
22552
|
}
|
|
22511
|
-
async unlinkIdentity(
|
|
22553
|
+
async unlinkIdentity(identity) {
|
|
22512
22554
|
try {
|
|
22513
22555
|
return await this._useSession(async (result) => {
|
|
22514
22556
|
var _a, _b;
|
|
@@ -22516,7 +22558,7 @@ class GoTrueClient {
|
|
|
22516
22558
|
if (error) {
|
|
22517
22559
|
throw error;
|
|
22518
22560
|
}
|
|
22519
|
-
return await _request(this.fetch, "DELETE", `${this.url}/user/identities/${
|
|
22561
|
+
return await _request(this.fetch, "DELETE", `${this.url}/user/identities/${identity.identity_id}`, {
|
|
22520
22562
|
headers: this.headers,
|
|
22521
22563
|
jwt: (_b = (_a = data.session) === null || _a === undefined ? undefined : _a.access_token) !== null && _b !== undefined ? _b : undefined
|
|
22522
22564
|
});
|
|
@@ -22690,20 +22732,20 @@ class GoTrueClient {
|
|
|
22690
22732
|
if (this.broadcastChannel && broadcast) {
|
|
22691
22733
|
this.broadcastChannel.postMessage({ event, session });
|
|
22692
22734
|
}
|
|
22693
|
-
const
|
|
22735
|
+
const errors2 = [];
|
|
22694
22736
|
const promises = Array.from(this.stateChangeEmitters.values()).map(async (x) => {
|
|
22695
22737
|
try {
|
|
22696
22738
|
await x.callback(event, session);
|
|
22697
22739
|
} catch (e) {
|
|
22698
|
-
|
|
22740
|
+
errors2.push(e);
|
|
22699
22741
|
}
|
|
22700
22742
|
});
|
|
22701
22743
|
await Promise.all(promises);
|
|
22702
|
-
if (
|
|
22703
|
-
for (let i = 0;i <
|
|
22704
|
-
console.error(
|
|
22744
|
+
if (errors2.length > 0) {
|
|
22745
|
+
for (let i = 0;i < errors2.length; i += 1) {
|
|
22746
|
+
console.error(errors2[i]);
|
|
22705
22747
|
}
|
|
22706
|
-
throw
|
|
22748
|
+
throw errors2[0];
|
|
22707
22749
|
}
|
|
22708
22750
|
} finally {
|
|
22709
22751
|
this._debug(debugName, "end");
|
|
@@ -23578,12 +23620,33 @@ if (shouldShowDeprecationWarning())
|
|
|
23578
23620
|
console.warn("\u26A0\uFE0F Node.js 18 and below are deprecated and will no longer be supported in future versions of @supabase/supabase-js. Please upgrade to Node.js 20 or later. For more information, visit: https://github.com/orgs/supabase/discussions/37217");
|
|
23579
23621
|
|
|
23580
23622
|
// src/storage/supabase.ts
|
|
23623
|
+
function mapCaseToRecord(runId, caseResult) {
|
|
23624
|
+
return {
|
|
23625
|
+
runId,
|
|
23626
|
+
caseId: caseResult.id,
|
|
23627
|
+
caseName: caseResult.name,
|
|
23628
|
+
status: caseResult.error ? "error" : caseResult.ok ? "passed" : "failed",
|
|
23629
|
+
score: caseResult.score,
|
|
23630
|
+
matcherType: caseResult.matcherType,
|
|
23631
|
+
reason: caseResult.reason,
|
|
23632
|
+
response: caseResult.response,
|
|
23633
|
+
latencyMs: caseResult.latencyMs,
|
|
23634
|
+
promptTokens: caseResult.tokens.prompt,
|
|
23635
|
+
completionTokens: caseResult.tokens.completion,
|
|
23636
|
+
totalTokens: caseResult.tokens.total,
|
|
23637
|
+
error: caseResult.error,
|
|
23638
|
+
tags: caseResult.tags
|
|
23639
|
+
};
|
|
23640
|
+
}
|
|
23641
|
+
|
|
23581
23642
|
class SupabaseStorageAdapter {
|
|
23582
23643
|
client;
|
|
23583
23644
|
bucket;
|
|
23584
|
-
|
|
23645
|
+
project;
|
|
23646
|
+
constructor(config, project) {
|
|
23585
23647
|
this.client = createClient(config.url, config.anonKey);
|
|
23586
23648
|
this.bucket = config.bucket || "artemis-runs";
|
|
23649
|
+
this.project = project || "default";
|
|
23587
23650
|
}
|
|
23588
23651
|
async save(manifest) {
|
|
23589
23652
|
const filePath = `${manifest.project}/${manifest.run_id}.json`;
|
|
@@ -23619,6 +23682,10 @@ class SupabaseStorageAdapter {
|
|
|
23619
23682
|
if (dbError) {
|
|
23620
23683
|
throw new Error(`Failed to save run metadata: ${dbError.message}`);
|
|
23621
23684
|
}
|
|
23685
|
+
if (manifest.cases && manifest.cases.length > 0) {
|
|
23686
|
+
const caseRecords = manifest.cases.map((c) => mapCaseToRecord(manifest.run_id, c));
|
|
23687
|
+
await this.saveCaseResults(caseRecords);
|
|
23688
|
+
}
|
|
23622
23689
|
return filePath;
|
|
23623
23690
|
}
|
|
23624
23691
|
async load(runId) {
|
|
@@ -23677,6 +23744,392 @@ class SupabaseStorageAdapter {
|
|
|
23677
23744
|
}
|
|
23678
23745
|
};
|
|
23679
23746
|
}
|
|
23747
|
+
async setBaseline(scenario, runId, tag) {
|
|
23748
|
+
const { data: run, error: runError } = await this.client.from("runs").select("*").eq("run_id", runId).single();
|
|
23749
|
+
if (runError || !run) {
|
|
23750
|
+
throw new Error(`Run not found: ${runId}`);
|
|
23751
|
+
}
|
|
23752
|
+
const baselineData = {
|
|
23753
|
+
project: run.project,
|
|
23754
|
+
scenario,
|
|
23755
|
+
run_id: runId,
|
|
23756
|
+
success_rate: run.success_rate,
|
|
23757
|
+
median_latency_ms: run.median_latency_ms,
|
|
23758
|
+
total_tokens: run.total_tokens,
|
|
23759
|
+
passed_cases: run.passed_cases,
|
|
23760
|
+
failed_cases: run.failed_cases,
|
|
23761
|
+
total_cases: run.total_cases,
|
|
23762
|
+
tag,
|
|
23763
|
+
created_by: run.run_by
|
|
23764
|
+
};
|
|
23765
|
+
const { error } = await this.client.from("baselines").upsert(baselineData, {
|
|
23766
|
+
onConflict: "project,scenario"
|
|
23767
|
+
});
|
|
23768
|
+
if (error) {
|
|
23769
|
+
throw new Error(`Failed to set baseline: ${error.message}`);
|
|
23770
|
+
}
|
|
23771
|
+
return {
|
|
23772
|
+
scenario,
|
|
23773
|
+
runId,
|
|
23774
|
+
createdAt: new Date().toISOString(),
|
|
23775
|
+
metrics: {
|
|
23776
|
+
successRate: run.success_rate,
|
|
23777
|
+
medianLatencyMs: run.median_latency_ms,
|
|
23778
|
+
totalTokens: run.total_tokens,
|
|
23779
|
+
passedCases: run.passed_cases,
|
|
23780
|
+
failedCases: run.failed_cases,
|
|
23781
|
+
totalCases: run.total_cases
|
|
23782
|
+
},
|
|
23783
|
+
tag
|
|
23784
|
+
};
|
|
23785
|
+
}
|
|
23786
|
+
async getBaseline(scenario) {
|
|
23787
|
+
const { data, error } = await this.client.from("baselines").select("*").eq("project", this.project).eq("scenario", scenario).single();
|
|
23788
|
+
if (error || !data) {
|
|
23789
|
+
return null;
|
|
23790
|
+
}
|
|
23791
|
+
return {
|
|
23792
|
+
scenario: data.scenario,
|
|
23793
|
+
runId: data.run_id,
|
|
23794
|
+
createdAt: data.created_at,
|
|
23795
|
+
metrics: {
|
|
23796
|
+
successRate: data.success_rate,
|
|
23797
|
+
medianLatencyMs: data.median_latency_ms,
|
|
23798
|
+
totalTokens: data.total_tokens,
|
|
23799
|
+
passedCases: data.passed_cases,
|
|
23800
|
+
failedCases: data.failed_cases,
|
|
23801
|
+
totalCases: data.total_cases
|
|
23802
|
+
},
|
|
23803
|
+
tag: data.tag
|
|
23804
|
+
};
|
|
23805
|
+
}
|
|
23806
|
+
async getBaselineByRunId(runId) {
|
|
23807
|
+
const { data, error } = await this.client.from("baselines").select("*").eq("run_id", runId).single();
|
|
23808
|
+
if (error || !data) {
|
|
23809
|
+
return null;
|
|
23810
|
+
}
|
|
23811
|
+
return {
|
|
23812
|
+
scenario: data.scenario,
|
|
23813
|
+
runId: data.run_id,
|
|
23814
|
+
createdAt: data.created_at,
|
|
23815
|
+
metrics: {
|
|
23816
|
+
successRate: data.success_rate,
|
|
23817
|
+
medianLatencyMs: data.median_latency_ms,
|
|
23818
|
+
totalTokens: data.total_tokens,
|
|
23819
|
+
passedCases: data.passed_cases,
|
|
23820
|
+
failedCases: data.failed_cases,
|
|
23821
|
+
totalCases: data.total_cases
|
|
23822
|
+
},
|
|
23823
|
+
tag: data.tag
|
|
23824
|
+
};
|
|
23825
|
+
}
|
|
23826
|
+
async listBaselines() {
|
|
23827
|
+
const { data, error } = await this.client.from("baselines").select("*").eq("project", this.project).order("created_at", { ascending: false });
|
|
23828
|
+
if (error) {
|
|
23829
|
+
throw new Error(`Failed to list baselines: ${error.message}`);
|
|
23830
|
+
}
|
|
23831
|
+
return (data || []).map((b) => ({
|
|
23832
|
+
scenario: b.scenario,
|
|
23833
|
+
runId: b.run_id,
|
|
23834
|
+
createdAt: b.created_at,
|
|
23835
|
+
metrics: {
|
|
23836
|
+
successRate: b.success_rate,
|
|
23837
|
+
medianLatencyMs: b.median_latency_ms,
|
|
23838
|
+
totalTokens: b.total_tokens,
|
|
23839
|
+
passedCases: b.passed_cases,
|
|
23840
|
+
failedCases: b.failed_cases,
|
|
23841
|
+
totalCases: b.total_cases
|
|
23842
|
+
},
|
|
23843
|
+
tag: b.tag
|
|
23844
|
+
}));
|
|
23845
|
+
}
|
|
23846
|
+
async removeBaseline(scenario) {
|
|
23847
|
+
const { error, count } = await this.client.from("baselines").delete().eq("project", this.project).eq("scenario", scenario);
|
|
23848
|
+
if (error) {
|
|
23849
|
+
throw new Error(`Failed to remove baseline: ${error.message}`);
|
|
23850
|
+
}
|
|
23851
|
+
return (count ?? 0) > 0;
|
|
23852
|
+
}
|
|
23853
|
+
async removeBaselineByRunId(runId) {
|
|
23854
|
+
const { error, count } = await this.client.from("baselines").delete().eq("run_id", runId);
|
|
23855
|
+
if (error) {
|
|
23856
|
+
throw new Error(`Failed to remove baseline: ${error.message}`);
|
|
23857
|
+
}
|
|
23858
|
+
return (count ?? 0) > 0;
|
|
23859
|
+
}
|
|
23860
|
+
async compareToBaseline(runId, regressionThreshold = 0.05) {
|
|
23861
|
+
const { data: run, error: runError } = await this.client.from("runs").select("scenario").eq("run_id", runId).single();
|
|
23862
|
+
if (runError || !run) {
|
|
23863
|
+
return null;
|
|
23864
|
+
}
|
|
23865
|
+
const baseline = await this.getBaseline(run.scenario);
|
|
23866
|
+
if (!baseline) {
|
|
23867
|
+
return null;
|
|
23868
|
+
}
|
|
23869
|
+
const comparison = await this.compare(baseline.runId, runId);
|
|
23870
|
+
const hasRegression = comparison.delta.successRate < -regressionThreshold;
|
|
23871
|
+
return {
|
|
23872
|
+
baseline,
|
|
23873
|
+
comparison,
|
|
23874
|
+
hasRegression,
|
|
23875
|
+
regressionThreshold
|
|
23876
|
+
};
|
|
23877
|
+
}
|
|
23878
|
+
async saveCaseResult(result) {
|
|
23879
|
+
const dbRecord = {
|
|
23880
|
+
run_id: result.runId,
|
|
23881
|
+
case_id: result.caseId,
|
|
23882
|
+
case_name: result.caseName,
|
|
23883
|
+
status: result.status,
|
|
23884
|
+
score: result.score,
|
|
23885
|
+
matcher_type: result.matcherType,
|
|
23886
|
+
reason: result.reason,
|
|
23887
|
+
response: result.response,
|
|
23888
|
+
latency_ms: result.latencyMs,
|
|
23889
|
+
prompt_tokens: result.promptTokens,
|
|
23890
|
+
completion_tokens: result.completionTokens,
|
|
23891
|
+
total_tokens: result.totalTokens,
|
|
23892
|
+
error: result.error,
|
|
23893
|
+
tags: result.tags || []
|
|
23894
|
+
};
|
|
23895
|
+
const { data, error } = await this.client.from("case_results").upsert(dbRecord, { onConflict: "run_id,case_id" }).select("id").single();
|
|
23896
|
+
if (error) {
|
|
23897
|
+
throw new Error(`Failed to save case result: ${error.message}`);
|
|
23898
|
+
}
|
|
23899
|
+
return data?.id || result.caseId;
|
|
23900
|
+
}
|
|
23901
|
+
async saveCaseResults(results) {
|
|
23902
|
+
if (results.length === 0) {
|
|
23903
|
+
return [];
|
|
23904
|
+
}
|
|
23905
|
+
const dbRecords = results.map((r) => ({
|
|
23906
|
+
run_id: r.runId,
|
|
23907
|
+
case_id: r.caseId,
|
|
23908
|
+
case_name: r.caseName,
|
|
23909
|
+
status: r.status,
|
|
23910
|
+
score: r.score,
|
|
23911
|
+
matcher_type: r.matcherType,
|
|
23912
|
+
reason: r.reason,
|
|
23913
|
+
response: r.response,
|
|
23914
|
+
latency_ms: r.latencyMs,
|
|
23915
|
+
prompt_tokens: r.promptTokens,
|
|
23916
|
+
completion_tokens: r.completionTokens,
|
|
23917
|
+
total_tokens: r.totalTokens,
|
|
23918
|
+
error: r.error,
|
|
23919
|
+
tags: r.tags || []
|
|
23920
|
+
}));
|
|
23921
|
+
const { data, error } = await this.client.from("case_results").upsert(dbRecords, { onConflict: "run_id,case_id" }).select("id");
|
|
23922
|
+
if (error) {
|
|
23923
|
+
throw new Error(`Failed to save case results: ${error.message}`);
|
|
23924
|
+
}
|
|
23925
|
+
return (data || []).map((d) => d.id);
|
|
23926
|
+
}
|
|
23927
|
+
async getCaseResults(runId) {
|
|
23928
|
+
const { data, error } = await this.client.from("case_results").select("*").eq("run_id", runId).order("created_at", { ascending: true });
|
|
23929
|
+
if (error) {
|
|
23930
|
+
throw new Error(`Failed to get case results: ${error.message}`);
|
|
23931
|
+
}
|
|
23932
|
+
return (data || []).map((r) => ({
|
|
23933
|
+
id: r.id,
|
|
23934
|
+
runId: r.run_id,
|
|
23935
|
+
caseId: r.case_id,
|
|
23936
|
+
caseName: r.case_name,
|
|
23937
|
+
status: r.status,
|
|
23938
|
+
score: r.score,
|
|
23939
|
+
matcherType: r.matcher_type,
|
|
23940
|
+
reason: r.reason,
|
|
23941
|
+
response: r.response,
|
|
23942
|
+
latencyMs: r.latency_ms,
|
|
23943
|
+
promptTokens: r.prompt_tokens,
|
|
23944
|
+
completionTokens: r.completion_tokens,
|
|
23945
|
+
totalTokens: r.total_tokens,
|
|
23946
|
+
error: r.error,
|
|
23947
|
+
tags: r.tags,
|
|
23948
|
+
createdAt: r.created_at
|
|
23949
|
+
}));
|
|
23950
|
+
}
|
|
23951
|
+
async queryCaseResults(options) {
|
|
23952
|
+
let query = this.client.from("case_results").select("*").order("created_at", { ascending: false });
|
|
23953
|
+
if (options.runId) {
|
|
23954
|
+
query = query.eq("run_id", options.runId);
|
|
23955
|
+
}
|
|
23956
|
+
if (options.caseId) {
|
|
23957
|
+
query = query.eq("case_id", options.caseId);
|
|
23958
|
+
}
|
|
23959
|
+
if (options.status) {
|
|
23960
|
+
query = query.eq("status", options.status);
|
|
23961
|
+
}
|
|
23962
|
+
if (options.tags && options.tags.length > 0) {
|
|
23963
|
+
query = query.overlaps("tags", options.tags);
|
|
23964
|
+
}
|
|
23965
|
+
if (options.offset && options.limit) {
|
|
23966
|
+
query = query.range(options.offset, options.offset + options.limit - 1);
|
|
23967
|
+
} else if (options.limit) {
|
|
23968
|
+
query = query.limit(options.limit);
|
|
23969
|
+
}
|
|
23970
|
+
const { data, error } = await query;
|
|
23971
|
+
if (error) {
|
|
23972
|
+
throw new Error(`Failed to query case results: ${error.message}`);
|
|
23973
|
+
}
|
|
23974
|
+
return (data || []).map((r) => ({
|
|
23975
|
+
id: r.id,
|
|
23976
|
+
runId: r.run_id,
|
|
23977
|
+
caseId: r.case_id,
|
|
23978
|
+
caseName: r.case_name,
|
|
23979
|
+
status: r.status,
|
|
23980
|
+
score: r.score,
|
|
23981
|
+
matcherType: r.matcher_type,
|
|
23982
|
+
reason: r.reason,
|
|
23983
|
+
response: r.response,
|
|
23984
|
+
latencyMs: r.latency_ms,
|
|
23985
|
+
promptTokens: r.prompt_tokens,
|
|
23986
|
+
completionTokens: r.completion_tokens,
|
|
23987
|
+
totalTokens: r.total_tokens,
|
|
23988
|
+
error: r.error,
|
|
23989
|
+
tags: r.tags,
|
|
23990
|
+
createdAt: r.created_at
|
|
23991
|
+
}));
|
|
23992
|
+
}
|
|
23993
|
+
async saveMetricsSnapshot(snapshot) {
|
|
23994
|
+
const dbRecord = {
|
|
23995
|
+
date: snapshot.date,
|
|
23996
|
+
project: snapshot.project,
|
|
23997
|
+
scenario: snapshot.scenario || null,
|
|
23998
|
+
total_runs: snapshot.totalRuns,
|
|
23999
|
+
total_cases: snapshot.totalCases,
|
|
24000
|
+
passed_cases: snapshot.passedCases,
|
|
24001
|
+
failed_cases: snapshot.failedCases,
|
|
24002
|
+
avg_success_rate: snapshot.avgSuccessRate,
|
|
24003
|
+
avg_latency_ms: snapshot.avgLatencyMs,
|
|
24004
|
+
avg_tokens_per_run: snapshot.avgTokensPerRun,
|
|
24005
|
+
min_success_rate: snapshot.minSuccessRate,
|
|
24006
|
+
max_success_rate: snapshot.maxSuccessRate,
|
|
24007
|
+
min_latency_ms: snapshot.minLatencyMs,
|
|
24008
|
+
max_latency_ms: snapshot.maxLatencyMs,
|
|
24009
|
+
total_tokens: snapshot.totalTokens
|
|
24010
|
+
};
|
|
24011
|
+
const { data, error } = await this.client.from("metrics_history").upsert(dbRecord, { onConflict: "date,project,scenario" }).select("id").single();
|
|
24012
|
+
if (error) {
|
|
24013
|
+
throw new Error(`Failed to save metrics snapshot: ${error.message}`);
|
|
24014
|
+
}
|
|
24015
|
+
return data?.id || `${snapshot.date}-${snapshot.project}`;
|
|
24016
|
+
}
|
|
24017
|
+
async getMetricsTrend(options) {
|
|
24018
|
+
let query = this.client.from("metrics_history").select("date, avg_success_rate, avg_latency_ms, total_runs, total_tokens").eq("project", options.project).order("date", { ascending: true });
|
|
24019
|
+
if (options.scenario) {
|
|
24020
|
+
query = query.eq("scenario", options.scenario);
|
|
24021
|
+
} else {
|
|
24022
|
+
query = query.is("scenario", null);
|
|
24023
|
+
}
|
|
24024
|
+
if (options.startDate) {
|
|
24025
|
+
query = query.gte("date", options.startDate);
|
|
24026
|
+
}
|
|
24027
|
+
if (options.endDate) {
|
|
24028
|
+
query = query.lte("date", options.endDate);
|
|
24029
|
+
}
|
|
24030
|
+
if (options.limit) {
|
|
24031
|
+
query = query.limit(options.limit);
|
|
24032
|
+
}
|
|
24033
|
+
const { data, error } = await query;
|
|
24034
|
+
if (error) {
|
|
24035
|
+
throw new Error(`Failed to get metrics trend: ${error.message}`);
|
|
24036
|
+
}
|
|
24037
|
+
return (data || []).map((m) => ({
|
|
24038
|
+
date: m.date,
|
|
24039
|
+
successRate: m.avg_success_rate,
|
|
24040
|
+
latencyMs: m.avg_latency_ms,
|
|
24041
|
+
totalRuns: m.total_runs,
|
|
24042
|
+
totalTokens: m.total_tokens
|
|
24043
|
+
}));
|
|
24044
|
+
}
|
|
24045
|
+
async getMetricsSnapshot(date, project, scenario) {
|
|
24046
|
+
let query = this.client.from("metrics_history").select("*").eq("date", date).eq("project", project);
|
|
24047
|
+
if (scenario) {
|
|
24048
|
+
query = query.eq("scenario", scenario);
|
|
24049
|
+
} else {
|
|
24050
|
+
query = query.is("scenario", null);
|
|
24051
|
+
}
|
|
24052
|
+
const { data, error } = await query.single();
|
|
24053
|
+
if (error || !data) {
|
|
24054
|
+
return null;
|
|
24055
|
+
}
|
|
24056
|
+
return {
|
|
24057
|
+
id: data.id,
|
|
24058
|
+
date: data.date,
|
|
24059
|
+
project: data.project,
|
|
24060
|
+
scenario: data.scenario,
|
|
24061
|
+
totalRuns: data.total_runs,
|
|
24062
|
+
totalCases: data.total_cases,
|
|
24063
|
+
passedCases: data.passed_cases,
|
|
24064
|
+
failedCases: data.failed_cases,
|
|
24065
|
+
avgSuccessRate: data.avg_success_rate,
|
|
24066
|
+
avgLatencyMs: data.avg_latency_ms,
|
|
24067
|
+
avgTokensPerRun: data.avg_tokens_per_run,
|
|
24068
|
+
minSuccessRate: data.min_success_rate,
|
|
24069
|
+
maxSuccessRate: data.max_success_rate,
|
|
24070
|
+
minLatencyMs: data.min_latency_ms,
|
|
24071
|
+
maxLatencyMs: data.max_latency_ms,
|
|
24072
|
+
totalTokens: data.total_tokens,
|
|
24073
|
+
createdAt: data.created_at,
|
|
24074
|
+
updatedAt: data.updated_at
|
|
24075
|
+
};
|
|
24076
|
+
}
|
|
24077
|
+
async aggregateDailyMetrics(date, project, scenario) {
|
|
24078
|
+
const startOfDay = `${date}T00:00:00.000Z`;
|
|
24079
|
+
const endOfDay = `${date}T23:59:59.999Z`;
|
|
24080
|
+
let query = this.client.from("runs").select("*").eq("project", project).gte("started_at", startOfDay).lte("started_at", endOfDay);
|
|
24081
|
+
if (scenario) {
|
|
24082
|
+
query = query.eq("scenario", scenario);
|
|
24083
|
+
}
|
|
24084
|
+
const { data: runs, error } = await query;
|
|
24085
|
+
if (error) {
|
|
24086
|
+
throw new Error(`Failed to aggregate metrics: ${error.message}`);
|
|
24087
|
+
}
|
|
24088
|
+
const runList = runs || [];
|
|
24089
|
+
if (runList.length === 0) {
|
|
24090
|
+
const emptySnapshot = {
|
|
24091
|
+
date,
|
|
24092
|
+
project,
|
|
24093
|
+
scenario,
|
|
24094
|
+
totalRuns: 0,
|
|
24095
|
+
totalCases: 0,
|
|
24096
|
+
passedCases: 0,
|
|
24097
|
+
failedCases: 0,
|
|
24098
|
+
avgSuccessRate: 0,
|
|
24099
|
+
avgLatencyMs: 0,
|
|
24100
|
+
avgTokensPerRun: 0,
|
|
24101
|
+
totalTokens: 0
|
|
24102
|
+
};
|
|
24103
|
+
await this.saveMetricsSnapshot(emptySnapshot);
|
|
24104
|
+
return emptySnapshot;
|
|
24105
|
+
}
|
|
24106
|
+
const totalRuns = runList.length;
|
|
24107
|
+
const totalCases = runList.reduce((sum, r) => sum + r.total_cases, 0);
|
|
24108
|
+
const passedCases = runList.reduce((sum, r) => sum + r.passed_cases, 0);
|
|
24109
|
+
const failedCases = runList.reduce((sum, r) => sum + r.failed_cases, 0);
|
|
24110
|
+
const totalTokens = runList.reduce((sum, r) => sum + r.total_tokens, 0);
|
|
24111
|
+
const successRates = runList.map((r) => r.success_rate);
|
|
24112
|
+
const latencies = runList.map((r) => r.median_latency_ms);
|
|
24113
|
+
const snapshot = {
|
|
24114
|
+
date,
|
|
24115
|
+
project,
|
|
24116
|
+
scenario,
|
|
24117
|
+
totalRuns,
|
|
24118
|
+
totalCases,
|
|
24119
|
+
passedCases,
|
|
24120
|
+
failedCases,
|
|
24121
|
+
avgSuccessRate: successRates.reduce((a, b) => a + b, 0) / totalRuns,
|
|
24122
|
+
avgLatencyMs: latencies.reduce((a, b) => a + b, 0) / totalRuns,
|
|
24123
|
+
avgTokensPerRun: totalTokens / totalRuns,
|
|
24124
|
+
minSuccessRate: Math.min(...successRates),
|
|
24125
|
+
maxSuccessRate: Math.max(...successRates),
|
|
24126
|
+
minLatencyMs: Math.min(...latencies),
|
|
24127
|
+
maxLatencyMs: Math.max(...latencies),
|
|
24128
|
+
totalTokens
|
|
24129
|
+
};
|
|
24130
|
+
await this.saveMetricsSnapshot(snapshot);
|
|
24131
|
+
return snapshot;
|
|
24132
|
+
}
|
|
23680
24133
|
}
|
|
23681
24134
|
|
|
23682
24135
|
// src/storage/factory.ts
|
|
@@ -24751,6 +25204,251 @@ class Logger {
|
|
|
24751
25204
|
}
|
|
24752
25205
|
}
|
|
24753
25206
|
var logger = new Logger("artemis");
|
|
25207
|
+
// src/validator/validator.ts
|
|
25208
|
+
var import_yaml2 = __toESM(require_dist(), 1);
|
|
25209
|
+
import { readFileSync } from "fs";
|
|
25210
|
+
class ScenarioValidator {
|
|
25211
|
+
_options;
|
|
25212
|
+
constructor(options = {}) {
|
|
25213
|
+
this._options = options;
|
|
25214
|
+
}
|
|
25215
|
+
get options() {
|
|
25216
|
+
return this._options;
|
|
25217
|
+
}
|
|
25218
|
+
validate(filePath) {
|
|
25219
|
+
const errors4 = [];
|
|
25220
|
+
const warnings = [];
|
|
25221
|
+
let content;
|
|
25222
|
+
try {
|
|
25223
|
+
content = readFileSync(filePath, "utf-8");
|
|
25224
|
+
} catch (err) {
|
|
25225
|
+
const error = err;
|
|
25226
|
+
errors4.push({
|
|
25227
|
+
line: 1,
|
|
25228
|
+
message: `Failed to read file: ${error.message}`,
|
|
25229
|
+
rule: "file-read",
|
|
25230
|
+
severity: "error"
|
|
25231
|
+
});
|
|
25232
|
+
return { file: filePath, valid: false, errors: errors4, warnings };
|
|
25233
|
+
}
|
|
25234
|
+
let parsed;
|
|
25235
|
+
try {
|
|
25236
|
+
parsed = import_yaml2.default.parse(content, {
|
|
25237
|
+
prettyErrors: true,
|
|
25238
|
+
strict: true
|
|
25239
|
+
});
|
|
25240
|
+
} catch (err) {
|
|
25241
|
+
if (err instanceof import_yaml2.default.YAMLError) {
|
|
25242
|
+
const linePos = err.linePos?.[0];
|
|
25243
|
+
errors4.push({
|
|
25244
|
+
line: linePos?.line || 1,
|
|
25245
|
+
column: linePos?.col,
|
|
25246
|
+
message: `Invalid YAML syntax: ${err.message}`,
|
|
25247
|
+
rule: "yaml-syntax",
|
|
25248
|
+
severity: "error"
|
|
25249
|
+
});
|
|
25250
|
+
} else {
|
|
25251
|
+
errors4.push({
|
|
25252
|
+
line: 1,
|
|
25253
|
+
message: `YAML parse error: ${err.message}`,
|
|
25254
|
+
rule: "yaml-syntax",
|
|
25255
|
+
severity: "error"
|
|
25256
|
+
});
|
|
25257
|
+
}
|
|
25258
|
+
return { file: filePath, valid: false, errors: errors4, warnings };
|
|
25259
|
+
}
|
|
25260
|
+
if (parsed === null || typeof parsed !== "object") {
|
|
25261
|
+
errors4.push({
|
|
25262
|
+
line: 1,
|
|
25263
|
+
message: "Scenario must be a YAML object",
|
|
25264
|
+
rule: "schema-type",
|
|
25265
|
+
severity: "error"
|
|
25266
|
+
});
|
|
25267
|
+
return { file: filePath, valid: false, errors: errors4, warnings };
|
|
25268
|
+
}
|
|
25269
|
+
const schemaResult = ScenarioSchema.safeParse(parsed);
|
|
25270
|
+
if (!schemaResult.success) {
|
|
25271
|
+
const zodErrors = this.formatZodErrors(schemaResult.error, content);
|
|
25272
|
+
errors4.push(...zodErrors);
|
|
25273
|
+
}
|
|
25274
|
+
if (schemaResult.success) {
|
|
25275
|
+
const semanticErrors = this.validateSemantics(schemaResult.data, content);
|
|
25276
|
+
errors4.push(...semanticErrors);
|
|
25277
|
+
}
|
|
25278
|
+
const detectedWarnings = this.detectWarnings(parsed, content);
|
|
25279
|
+
warnings.push(...detectedWarnings);
|
|
25280
|
+
return {
|
|
25281
|
+
file: filePath,
|
|
25282
|
+
valid: errors4.length === 0,
|
|
25283
|
+
errors: errors4,
|
|
25284
|
+
warnings
|
|
25285
|
+
};
|
|
25286
|
+
}
|
|
25287
|
+
formatZodErrors(error, content) {
|
|
25288
|
+
const issues = [];
|
|
25289
|
+
const lines = content.split(`
|
|
25290
|
+
`);
|
|
25291
|
+
for (const issue of error.issues) {
|
|
25292
|
+
const path = issue.path.join(".");
|
|
25293
|
+
const line = this.findLineForPath(lines, issue.path);
|
|
25294
|
+
let message;
|
|
25295
|
+
switch (issue.code) {
|
|
25296
|
+
case "invalid_type":
|
|
25297
|
+
message = `'${path}' expected ${issue.expected}, received ${issue.received}`;
|
|
25298
|
+
break;
|
|
25299
|
+
case "invalid_enum_value":
|
|
25300
|
+
message = `'${path}' must be one of: ${issue.options.join(", ")}`;
|
|
25301
|
+
break;
|
|
25302
|
+
case "too_small":
|
|
25303
|
+
if (issue.type === "array") {
|
|
25304
|
+
message = `'${path}' must have at least ${issue.minimum} item(s)`;
|
|
25305
|
+
} else {
|
|
25306
|
+
message = `'${path}' is too small`;
|
|
25307
|
+
}
|
|
25308
|
+
break;
|
|
25309
|
+
case "unrecognized_keys":
|
|
25310
|
+
message = `Unrecognized field(s): ${issue.keys.join(", ")}`;
|
|
25311
|
+
break;
|
|
25312
|
+
default:
|
|
25313
|
+
message = issue.message;
|
|
25314
|
+
}
|
|
25315
|
+
issues.push({
|
|
25316
|
+
line,
|
|
25317
|
+
message,
|
|
25318
|
+
rule: `schema-${issue.code}`,
|
|
25319
|
+
severity: "error"
|
|
25320
|
+
});
|
|
25321
|
+
}
|
|
25322
|
+
return issues;
|
|
25323
|
+
}
|
|
25324
|
+
findLineForPath(lines, path) {
|
|
25325
|
+
if (path.length === 0)
|
|
25326
|
+
return 1;
|
|
25327
|
+
const searchKey = String(path[path.length - 1]);
|
|
25328
|
+
for (let i2 = 0;i2 < lines.length; i2++) {
|
|
25329
|
+
const line = lines[i2];
|
|
25330
|
+
if (line.includes(`${searchKey}:`) || line.includes(`- ${searchKey}:`)) {
|
|
25331
|
+
return i2 + 1;
|
|
25332
|
+
}
|
|
25333
|
+
if (typeof path[path.length - 1] === "number" && path.includes("cases")) {
|
|
25334
|
+
if (line.trim().startsWith("- id:")) {
|
|
25335
|
+
return i2 + 1;
|
|
25336
|
+
}
|
|
25337
|
+
}
|
|
25338
|
+
}
|
|
25339
|
+
return 1;
|
|
25340
|
+
}
|
|
25341
|
+
validateSemantics(scenario, content) {
|
|
25342
|
+
const errors4 = [];
|
|
25343
|
+
const lines = content.split(`
|
|
25344
|
+
`);
|
|
25345
|
+
const caseIds = new Set;
|
|
25346
|
+
for (const testCase of scenario.cases) {
|
|
25347
|
+
if (caseIds.has(testCase.id)) {
|
|
25348
|
+
const line = this.findLineForCaseId(lines, testCase.id);
|
|
25349
|
+
errors4.push({
|
|
25350
|
+
line,
|
|
25351
|
+
message: `Duplicate case ID: '${testCase.id}'`,
|
|
25352
|
+
rule: "duplicate-case-id",
|
|
25353
|
+
severity: "error"
|
|
25354
|
+
});
|
|
25355
|
+
}
|
|
25356
|
+
caseIds.add(testCase.id);
|
|
25357
|
+
}
|
|
25358
|
+
const globalVars = scenario.variables || {};
|
|
25359
|
+
for (const testCase of scenario.cases) {
|
|
25360
|
+
const caseVars = testCase.variables || {};
|
|
25361
|
+
const allVars = { ...globalVars, ...caseVars };
|
|
25362
|
+
const prompt2 = typeof testCase.prompt === "string" ? testCase.prompt : JSON.stringify(testCase.prompt);
|
|
25363
|
+
const refs = this.extractVariableRefs(prompt2);
|
|
25364
|
+
for (const ref of refs) {
|
|
25365
|
+
if (!(ref in allVars)) {
|
|
25366
|
+
const line = this.findLineForCaseId(lines, testCase.id);
|
|
25367
|
+
errors4.push({
|
|
25368
|
+
line,
|
|
25369
|
+
message: `Undefined variable '{{${ref}}}' in case '${testCase.id}'`,
|
|
25370
|
+
rule: "undefined-variable",
|
|
25371
|
+
severity: "error",
|
|
25372
|
+
suggestion: `Define '${ref}' in scenario.variables or case.variables`
|
|
25373
|
+
});
|
|
25374
|
+
}
|
|
25375
|
+
}
|
|
25376
|
+
}
|
|
25377
|
+
return errors4;
|
|
25378
|
+
}
|
|
25379
|
+
findLineForCaseId(lines, caseId) {
|
|
25380
|
+
for (let i2 = 0;i2 < lines.length; i2++) {
|
|
25381
|
+
if (lines[i2].includes(`id: ${caseId}`) || lines[i2].includes(`id: "${caseId}"`) || lines[i2].includes(`id: '${caseId}'`)) {
|
|
25382
|
+
return i2 + 1;
|
|
25383
|
+
}
|
|
25384
|
+
}
|
|
25385
|
+
return 1;
|
|
25386
|
+
}
|
|
25387
|
+
extractVariableRefs(text) {
|
|
25388
|
+
const regex2 = /\{\{(\w+)\}\}/g;
|
|
25389
|
+
const refs = [];
|
|
25390
|
+
const matches = text.matchAll(regex2);
|
|
25391
|
+
for (const match of matches) {
|
|
25392
|
+
refs.push(match[1]);
|
|
25393
|
+
}
|
|
25394
|
+
return refs;
|
|
25395
|
+
}
|
|
25396
|
+
detectWarnings(parsed, content) {
|
|
25397
|
+
const warnings = [];
|
|
25398
|
+
const lines = content.split(`
|
|
25399
|
+
`);
|
|
25400
|
+
if (parsed && typeof parsed === "object") {
|
|
25401
|
+
const obj = parsed;
|
|
25402
|
+
if (this.hasDeepKey(obj, "criteria")) {
|
|
25403
|
+
const line = this.findLineForKey(lines, "criteria");
|
|
25404
|
+
warnings.push({
|
|
25405
|
+
line,
|
|
25406
|
+
message: "'criteria' is deprecated, use 'rubric' instead (llm_grader)",
|
|
25407
|
+
rule: "deprecated-field",
|
|
25408
|
+
severity: "warning",
|
|
25409
|
+
suggestion: "Replace 'criteria' with 'rubric'"
|
|
25410
|
+
});
|
|
25411
|
+
}
|
|
25412
|
+
const cases = obj.cases;
|
|
25413
|
+
if (Array.isArray(cases) && cases.length > 20) {
|
|
25414
|
+
warnings.push({
|
|
25415
|
+
line: 1,
|
|
25416
|
+
message: `Scenario has ${cases.length} cases. Consider using --parallel for faster execution.`,
|
|
25417
|
+
rule: "performance-hint",
|
|
25418
|
+
severity: "warning"
|
|
25419
|
+
});
|
|
25420
|
+
}
|
|
25421
|
+
if (!obj.description) {
|
|
25422
|
+
warnings.push({
|
|
25423
|
+
line: 1,
|
|
25424
|
+
message: "Scenario is missing 'description' field. Adding a description improves documentation.",
|
|
25425
|
+
rule: "missing-description",
|
|
25426
|
+
severity: "warning"
|
|
25427
|
+
});
|
|
25428
|
+
}
|
|
25429
|
+
}
|
|
25430
|
+
return warnings;
|
|
25431
|
+
}
|
|
25432
|
+
hasDeepKey(obj, key) {
|
|
25433
|
+
if (obj === null || typeof obj !== "object")
|
|
25434
|
+
return false;
|
|
25435
|
+
if (key in obj)
|
|
25436
|
+
return true;
|
|
25437
|
+
for (const value of Object.values(obj)) {
|
|
25438
|
+
if (this.hasDeepKey(value, key))
|
|
25439
|
+
return true;
|
|
25440
|
+
}
|
|
25441
|
+
return false;
|
|
25442
|
+
}
|
|
25443
|
+
findLineForKey(lines, key) {
|
|
25444
|
+
for (let i2 = 0;i2 < lines.length; i2++) {
|
|
25445
|
+
if (lines[i2].includes(`${key}:`)) {
|
|
25446
|
+
return i2 + 1;
|
|
25447
|
+
}
|
|
25448
|
+
}
|
|
25449
|
+
return 1;
|
|
25450
|
+
}
|
|
25451
|
+
}
|
|
24754
25452
|
export {
|
|
24755
25453
|
wrapError,
|
|
24756
25454
|
validateScenario,
|
|
@@ -24798,6 +25496,7 @@ export {
|
|
|
24798
25496
|
TestCaseSchema,
|
|
24799
25497
|
SupabaseStorageAdapter,
|
|
24800
25498
|
SimilarityEvaluator,
|
|
25499
|
+
ScenarioValidator,
|
|
24801
25500
|
ScenarioSchema,
|
|
24802
25501
|
SUPPORTED_EXPRESSIONS,
|
|
24803
25502
|
RegexEvaluator,
|