@artemiskit/core 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -4,25 +4,43 @@ var __getProtoOf = Object.getPrototypeOf;
4
4
  var __defProp = Object.defineProperty;
5
5
  var __getOwnPropNames = Object.getOwnPropertyNames;
6
6
  var __hasOwnProp = Object.prototype.hasOwnProperty;
7
+ function __accessProp(key) {
8
+ return this[key];
9
+ }
10
+ var __toESMCache_node;
11
+ var __toESMCache_esm;
7
12
  var __toESM = (mod, isNodeMode, target) => {
13
+ var canCache = mod != null && typeof mod === "object";
14
+ if (canCache) {
15
+ var cache = isNodeMode ? __toESMCache_node ??= new WeakMap : __toESMCache_esm ??= new WeakMap;
16
+ var cached = cache.get(mod);
17
+ if (cached)
18
+ return cached;
19
+ }
8
20
  target = mod != null ? __create(__getProtoOf(mod)) : {};
9
21
  const to = isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target;
10
22
  for (let key of __getOwnPropNames(mod))
11
23
  if (!__hasOwnProp.call(to, key))
12
24
  __defProp(to, key, {
13
- get: () => mod[key],
25
+ get: __accessProp.bind(mod, key),
14
26
  enumerable: true
15
27
  });
28
+ if (canCache)
29
+ cache.set(mod, to);
16
30
  return to;
17
31
  };
18
32
  var __commonJS = (cb, mod) => () => (mod || cb((mod = { exports: {} }).exports, mod), mod.exports);
33
+ var __returnValue = (v) => v;
34
+ function __exportSetter(name, newValue) {
35
+ this[name] = __returnValue.bind(null, newValue);
36
+ }
19
37
  var __export = (target, all) => {
20
38
  for (var name in all)
21
39
  __defProp(target, name, {
22
40
  get: all[name],
23
41
  enumerable: true,
24
42
  configurable: true,
25
- set: (newValue) => all[name] = () => newValue
43
+ set: __exportSetter.bind(all, name)
26
44
  });
27
45
  };
28
46
  var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
@@ -10896,6 +10914,55 @@ var require_public_api = __commonJS((exports) => {
10896
10914
  exports.stringify = stringify;
10897
10915
  });
10898
10916
 
10917
+ // ../../node_modules/.bun/yaml@2.8.2/node_modules/yaml/dist/index.js
10918
+ var require_dist = __commonJS((exports) => {
10919
+ var composer = require_composer();
10920
+ var Document = require_Document();
10921
+ var Schema = require_Schema();
10922
+ var errors2 = require_errors();
10923
+ var Alias = require_Alias();
10924
+ var identity = require_identity();
10925
+ var Pair = require_Pair();
10926
+ var Scalar = require_Scalar();
10927
+ var YAMLMap = require_YAMLMap();
10928
+ var YAMLSeq = require_YAMLSeq();
10929
+ var cst = require_cst();
10930
+ var lexer = require_lexer();
10931
+ var lineCounter = require_line_counter();
10932
+ var parser = require_parser();
10933
+ var publicApi = require_public_api();
10934
+ var visit = require_visit();
10935
+ exports.Composer = composer.Composer;
10936
+ exports.Document = Document.Document;
10937
+ exports.Schema = Schema.Schema;
10938
+ exports.YAMLError = errors2.YAMLError;
10939
+ exports.YAMLParseError = errors2.YAMLParseError;
10940
+ exports.YAMLWarning = errors2.YAMLWarning;
10941
+ exports.Alias = Alias.Alias;
10942
+ exports.isAlias = identity.isAlias;
10943
+ exports.isCollection = identity.isCollection;
10944
+ exports.isDocument = identity.isDocument;
10945
+ exports.isMap = identity.isMap;
10946
+ exports.isNode = identity.isNode;
10947
+ exports.isPair = identity.isPair;
10948
+ exports.isScalar = identity.isScalar;
10949
+ exports.isSeq = identity.isSeq;
10950
+ exports.Pair = Pair.Pair;
10951
+ exports.Scalar = Scalar.Scalar;
10952
+ exports.YAMLMap = YAMLMap.YAMLMap;
10953
+ exports.YAMLSeq = YAMLSeq.YAMLSeq;
10954
+ exports.CST = cst;
10955
+ exports.Lexer = lexer.Lexer;
10956
+ exports.LineCounter = lineCounter.LineCounter;
10957
+ exports.Parser = parser.Parser;
10958
+ exports.parse = publicApi.parse;
10959
+ exports.parseAllDocuments = publicApi.parseAllDocuments;
10960
+ exports.parseDocument = publicApi.parseDocument;
10961
+ exports.stringify = publicApi.stringify;
10962
+ exports.visit = visit.visit;
10963
+ exports.visitAsync = visit.visitAsync;
10964
+ });
10965
+
10899
10966
  // src/evaluators/combined.ts
10900
10967
  async function getEvaluatorForType(type) {
10901
10968
  const { getEvaluator } = await Promise.resolve().then(() => (init_evaluators(), exports_evaluators));
@@ -13401,6 +13468,22 @@ async function registerBuiltInAdapters() {
13401
13468
  const mod = await tryImport("@artemiskit/adapter-anthropic");
13402
13469
  return new mod.AnthropicAdapter(config);
13403
13470
  });
13471
+ adapterRegistry.register("langchain", async (config) => {
13472
+ const mod = await tryImport("@artemiskit/adapter-langchain");
13473
+ const runnable = config.metadata?.runnable;
13474
+ if (!runnable) {
13475
+ throw new Error("LangChain adapter requires a runnable instance. Pass it via config.metadata.runnable or use createLangChainAdapter() directly.");
13476
+ }
13477
+ return new mod.LangChainAdapter(config, runnable);
13478
+ });
13479
+ adapterRegistry.register("deepagents", async (config) => {
13480
+ const mod = await tryImport("@artemiskit/adapter-deepagents");
13481
+ const system = config.metadata?.system;
13482
+ if (!system) {
13483
+ throw new Error("DeepAgents adapter requires a system instance. Pass it via config.metadata.system or use createDeepAgentsAdapter() directly.");
13484
+ }
13485
+ return new mod.DeepAgentsAdapter(config, system);
13486
+ });
13404
13487
  adapterRegistry.markUnavailable("google", "Google adapter coming in v0.3.0");
13405
13488
  adapterRegistry.markUnavailable("mistral", "Mistral adapter coming in v0.3.0");
13406
13489
  adapterRegistry.markUnavailable("ollama", "Ollama adapter coming in v0.3.0");
@@ -13494,6 +13577,8 @@ var ProviderSchema = exports_external.enum([
13494
13577
  "cohere",
13495
13578
  "huggingface",
13496
13579
  "ollama",
13580
+ "langchain",
13581
+ "deepagents",
13497
13582
  "custom"
13498
13583
  ]);
13499
13584
  var ProviderConfigSchema = exports_external.object({
@@ -13508,7 +13593,11 @@ var ProviderConfigSchema = exports_external.object({
13508
13593
  apiVersion: exports_external.string().optional(),
13509
13594
  embeddingDeploymentName: exports_external.string().optional(),
13510
13595
  modelFamily: exports_external.string().optional(),
13511
- underlyingProvider: exports_external.enum(["openai", "azure", "anthropic", "google", "mistral"]).optional()
13596
+ underlyingProvider: exports_external.enum(["openai", "azure", "anthropic", "google", "mistral"]).optional(),
13597
+ name: exports_external.string().optional(),
13598
+ runnableType: exports_external.enum(["chain", "agent", "llm", "runnable"]).optional(),
13599
+ captureTraces: exports_external.boolean().optional(),
13600
+ captureMessages: exports_external.boolean().optional()
13512
13601
  }).optional();
13513
13602
  var BaseExpectedSchema = exports_external.discriminatedUnion("type", [
13514
13603
  exports_external.object({
@@ -13616,55 +13705,8 @@ var ScenarioSchema = exports_external.object({
13616
13705
  }).optional()
13617
13706
  });
13618
13707
  // src/scenario/parser.ts
13708
+ var import_yaml = __toESM(require_dist(), 1);
13619
13709
  import { readFile } from "fs/promises";
13620
-
13621
- // ../../node_modules/.bun/yaml@2.8.2/node_modules/yaml/dist/index.js
13622
- var composer = require_composer();
13623
- var Document = require_Document();
13624
- var Schema = require_Schema();
13625
- var errors2 = require_errors();
13626
- var Alias = require_Alias();
13627
- var identity = require_identity();
13628
- var Pair = require_Pair();
13629
- var Scalar = require_Scalar();
13630
- var YAMLMap = require_YAMLMap();
13631
- var YAMLSeq = require_YAMLSeq();
13632
- var cst = require_cst();
13633
- var lexer = require_lexer();
13634
- var lineCounter = require_line_counter();
13635
- var parser = require_parser();
13636
- var publicApi = require_public_api();
13637
- var visit = require_visit();
13638
- var $Composer = composer.Composer;
13639
- var $Document = Document.Document;
13640
- var $Schema = Schema.Schema;
13641
- var $YAMLError = errors2.YAMLError;
13642
- var $YAMLParseError = errors2.YAMLParseError;
13643
- var $YAMLWarning = errors2.YAMLWarning;
13644
- var $Alias = Alias.Alias;
13645
- var $isAlias = identity.isAlias;
13646
- var $isCollection = identity.isCollection;
13647
- var $isDocument = identity.isDocument;
13648
- var $isMap = identity.isMap;
13649
- var $isNode = identity.isNode;
13650
- var $isPair = identity.isPair;
13651
- var $isScalar = identity.isScalar;
13652
- var $isSeq = identity.isSeq;
13653
- var $Pair = Pair.Pair;
13654
- var $Scalar = Scalar.Scalar;
13655
- var $YAMLMap = YAMLMap.YAMLMap;
13656
- var $YAMLSeq = YAMLSeq.YAMLSeq;
13657
- var $Lexer = lexer.Lexer;
13658
- var $LineCounter = lineCounter.LineCounter;
13659
- var $Parser = parser.Parser;
13660
- var $parse = publicApi.parse;
13661
- var $parseAllDocuments = publicApi.parseAllDocuments;
13662
- var $parseDocument = publicApi.parseDocument;
13663
- var $stringify = publicApi.stringify;
13664
- var $visit = visit.visit;
13665
- var $visitAsync = visit.visitAsync;
13666
-
13667
- // src/scenario/parser.ts
13668
13710
  function expandEnvVars(obj) {
13669
13711
  if (typeof obj === "string") {
13670
13712
  return obj.replace(/\$\{([^}]+)\}/g, (_, expr) => {
@@ -13699,7 +13741,7 @@ async function parseScenarioFile(filePath) {
13699
13741
  }
13700
13742
  function parseScenarioString(content, source) {
13701
13743
  try {
13702
- const raw = $parse(content);
13744
+ const raw = import_yaml.parse(content);
13703
13745
  const expanded = expandEnvVars(raw);
13704
13746
  const result = ScenarioSchema.safeParse(expanded);
13705
13747
  if (!result.success) {
@@ -16929,7 +16971,7 @@ class RealtimeChannel {
16929
16971
  }).map((bind) => {
16930
16972
  if (typeof handledPayload === "object" && "ids" in handledPayload) {
16931
16973
  const postgresChanges = handledPayload.data;
16932
- const { schema: schema2, table, commit_timestamp, type: type2, errors: errors3 } = postgresChanges;
16974
+ const { schema: schema2, table, commit_timestamp, type: type2, errors: errors2 } = postgresChanges;
16933
16975
  const enrichedPayload = {
16934
16976
  schema: schema2,
16935
16977
  table,
@@ -16937,7 +16979,7 @@ class RealtimeChannel {
16937
16979
  eventType: type2,
16938
16980
  new: {},
16939
16981
  old: {},
16940
- errors: errors3
16982
+ errors: errors2
16941
16983
  };
16942
16984
  handledPayload = Object.assign(Object.assign({}, enrichedPayload), this._getPayloadRecords(postgresChanges));
16943
16985
  }
@@ -22508,7 +22550,7 @@ class GoTrueClient {
22508
22550
  }
22509
22551
  });
22510
22552
  }
22511
- async unlinkIdentity(identity2) {
22553
+ async unlinkIdentity(identity) {
22512
22554
  try {
22513
22555
  return await this._useSession(async (result) => {
22514
22556
  var _a, _b;
@@ -22516,7 +22558,7 @@ class GoTrueClient {
22516
22558
  if (error) {
22517
22559
  throw error;
22518
22560
  }
22519
- return await _request(this.fetch, "DELETE", `${this.url}/user/identities/${identity2.identity_id}`, {
22561
+ return await _request(this.fetch, "DELETE", `${this.url}/user/identities/${identity.identity_id}`, {
22520
22562
  headers: this.headers,
22521
22563
  jwt: (_b = (_a = data.session) === null || _a === undefined ? undefined : _a.access_token) !== null && _b !== undefined ? _b : undefined
22522
22564
  });
@@ -22690,20 +22732,20 @@ class GoTrueClient {
22690
22732
  if (this.broadcastChannel && broadcast) {
22691
22733
  this.broadcastChannel.postMessage({ event, session });
22692
22734
  }
22693
- const errors3 = [];
22735
+ const errors2 = [];
22694
22736
  const promises = Array.from(this.stateChangeEmitters.values()).map(async (x) => {
22695
22737
  try {
22696
22738
  await x.callback(event, session);
22697
22739
  } catch (e) {
22698
- errors3.push(e);
22740
+ errors2.push(e);
22699
22741
  }
22700
22742
  });
22701
22743
  await Promise.all(promises);
22702
- if (errors3.length > 0) {
22703
- for (let i = 0;i < errors3.length; i += 1) {
22704
- console.error(errors3[i]);
22744
+ if (errors2.length > 0) {
22745
+ for (let i = 0;i < errors2.length; i += 1) {
22746
+ console.error(errors2[i]);
22705
22747
  }
22706
- throw errors3[0];
22748
+ throw errors2[0];
22707
22749
  }
22708
22750
  } finally {
22709
22751
  this._debug(debugName, "end");
@@ -23578,12 +23620,33 @@ if (shouldShowDeprecationWarning())
23578
23620
  console.warn("\u26A0\uFE0F Node.js 18 and below are deprecated and will no longer be supported in future versions of @supabase/supabase-js. Please upgrade to Node.js 20 or later. For more information, visit: https://github.com/orgs/supabase/discussions/37217");
23579
23621
 
23580
23622
  // src/storage/supabase.ts
23623
+ function mapCaseToRecord(runId, caseResult) {
23624
+ return {
23625
+ runId,
23626
+ caseId: caseResult.id,
23627
+ caseName: caseResult.name,
23628
+ status: caseResult.error ? "error" : caseResult.ok ? "passed" : "failed",
23629
+ score: caseResult.score,
23630
+ matcherType: caseResult.matcherType,
23631
+ reason: caseResult.reason,
23632
+ response: caseResult.response,
23633
+ latencyMs: caseResult.latencyMs,
23634
+ promptTokens: caseResult.tokens.prompt,
23635
+ completionTokens: caseResult.tokens.completion,
23636
+ totalTokens: caseResult.tokens.total,
23637
+ error: caseResult.error,
23638
+ tags: caseResult.tags
23639
+ };
23640
+ }
23641
+
23581
23642
  class SupabaseStorageAdapter {
23582
23643
  client;
23583
23644
  bucket;
23584
- constructor(config) {
23645
+ project;
23646
+ constructor(config, project) {
23585
23647
  this.client = createClient(config.url, config.anonKey);
23586
23648
  this.bucket = config.bucket || "artemis-runs";
23649
+ this.project = project || "default";
23587
23650
  }
23588
23651
  async save(manifest) {
23589
23652
  const filePath = `${manifest.project}/${manifest.run_id}.json`;
@@ -23619,6 +23682,10 @@ class SupabaseStorageAdapter {
23619
23682
  if (dbError) {
23620
23683
  throw new Error(`Failed to save run metadata: ${dbError.message}`);
23621
23684
  }
23685
+ if (manifest.cases && manifest.cases.length > 0) {
23686
+ const caseRecords = manifest.cases.map((c) => mapCaseToRecord(manifest.run_id, c));
23687
+ await this.saveCaseResults(caseRecords);
23688
+ }
23622
23689
  return filePath;
23623
23690
  }
23624
23691
  async load(runId) {
@@ -23677,6 +23744,392 @@ class SupabaseStorageAdapter {
23677
23744
  }
23678
23745
  };
23679
23746
  }
23747
+ async setBaseline(scenario, runId, tag) {
23748
+ const { data: run, error: runError } = await this.client.from("runs").select("*").eq("run_id", runId).single();
23749
+ if (runError || !run) {
23750
+ throw new Error(`Run not found: ${runId}`);
23751
+ }
23752
+ const baselineData = {
23753
+ project: run.project,
23754
+ scenario,
23755
+ run_id: runId,
23756
+ success_rate: run.success_rate,
23757
+ median_latency_ms: run.median_latency_ms,
23758
+ total_tokens: run.total_tokens,
23759
+ passed_cases: run.passed_cases,
23760
+ failed_cases: run.failed_cases,
23761
+ total_cases: run.total_cases,
23762
+ tag,
23763
+ created_by: run.run_by
23764
+ };
23765
+ const { error } = await this.client.from("baselines").upsert(baselineData, {
23766
+ onConflict: "project,scenario"
23767
+ });
23768
+ if (error) {
23769
+ throw new Error(`Failed to set baseline: ${error.message}`);
23770
+ }
23771
+ return {
23772
+ scenario,
23773
+ runId,
23774
+ createdAt: new Date().toISOString(),
23775
+ metrics: {
23776
+ successRate: run.success_rate,
23777
+ medianLatencyMs: run.median_latency_ms,
23778
+ totalTokens: run.total_tokens,
23779
+ passedCases: run.passed_cases,
23780
+ failedCases: run.failed_cases,
23781
+ totalCases: run.total_cases
23782
+ },
23783
+ tag
23784
+ };
23785
+ }
23786
+ async getBaseline(scenario) {
23787
+ const { data, error } = await this.client.from("baselines").select("*").eq("project", this.project).eq("scenario", scenario).single();
23788
+ if (error || !data) {
23789
+ return null;
23790
+ }
23791
+ return {
23792
+ scenario: data.scenario,
23793
+ runId: data.run_id,
23794
+ createdAt: data.created_at,
23795
+ metrics: {
23796
+ successRate: data.success_rate,
23797
+ medianLatencyMs: data.median_latency_ms,
23798
+ totalTokens: data.total_tokens,
23799
+ passedCases: data.passed_cases,
23800
+ failedCases: data.failed_cases,
23801
+ totalCases: data.total_cases
23802
+ },
23803
+ tag: data.tag
23804
+ };
23805
+ }
23806
+ async getBaselineByRunId(runId) {
23807
+ const { data, error } = await this.client.from("baselines").select("*").eq("run_id", runId).single();
23808
+ if (error || !data) {
23809
+ return null;
23810
+ }
23811
+ return {
23812
+ scenario: data.scenario,
23813
+ runId: data.run_id,
23814
+ createdAt: data.created_at,
23815
+ metrics: {
23816
+ successRate: data.success_rate,
23817
+ medianLatencyMs: data.median_latency_ms,
23818
+ totalTokens: data.total_tokens,
23819
+ passedCases: data.passed_cases,
23820
+ failedCases: data.failed_cases,
23821
+ totalCases: data.total_cases
23822
+ },
23823
+ tag: data.tag
23824
+ };
23825
+ }
23826
+ async listBaselines() {
23827
+ const { data, error } = await this.client.from("baselines").select("*").eq("project", this.project).order("created_at", { ascending: false });
23828
+ if (error) {
23829
+ throw new Error(`Failed to list baselines: ${error.message}`);
23830
+ }
23831
+ return (data || []).map((b) => ({
23832
+ scenario: b.scenario,
23833
+ runId: b.run_id,
23834
+ createdAt: b.created_at,
23835
+ metrics: {
23836
+ successRate: b.success_rate,
23837
+ medianLatencyMs: b.median_latency_ms,
23838
+ totalTokens: b.total_tokens,
23839
+ passedCases: b.passed_cases,
23840
+ failedCases: b.failed_cases,
23841
+ totalCases: b.total_cases
23842
+ },
23843
+ tag: b.tag
23844
+ }));
23845
+ }
23846
+ async removeBaseline(scenario) {
23847
+ const { error, count } = await this.client.from("baselines").delete().eq("project", this.project).eq("scenario", scenario);
23848
+ if (error) {
23849
+ throw new Error(`Failed to remove baseline: ${error.message}`);
23850
+ }
23851
+ return (count ?? 0) > 0;
23852
+ }
23853
+ async removeBaselineByRunId(runId) {
23854
+ const { error, count } = await this.client.from("baselines").delete().eq("run_id", runId);
23855
+ if (error) {
23856
+ throw new Error(`Failed to remove baseline: ${error.message}`);
23857
+ }
23858
+ return (count ?? 0) > 0;
23859
+ }
23860
+ async compareToBaseline(runId, regressionThreshold = 0.05) {
23861
+ const { data: run, error: runError } = await this.client.from("runs").select("scenario").eq("run_id", runId).single();
23862
+ if (runError || !run) {
23863
+ return null;
23864
+ }
23865
+ const baseline = await this.getBaseline(run.scenario);
23866
+ if (!baseline) {
23867
+ return null;
23868
+ }
23869
+ const comparison = await this.compare(baseline.runId, runId);
23870
+ const hasRegression = comparison.delta.successRate < -regressionThreshold;
23871
+ return {
23872
+ baseline,
23873
+ comparison,
23874
+ hasRegression,
23875
+ regressionThreshold
23876
+ };
23877
+ }
23878
+ async saveCaseResult(result) {
23879
+ const dbRecord = {
23880
+ run_id: result.runId,
23881
+ case_id: result.caseId,
23882
+ case_name: result.caseName,
23883
+ status: result.status,
23884
+ score: result.score,
23885
+ matcher_type: result.matcherType,
23886
+ reason: result.reason,
23887
+ response: result.response,
23888
+ latency_ms: result.latencyMs,
23889
+ prompt_tokens: result.promptTokens,
23890
+ completion_tokens: result.completionTokens,
23891
+ total_tokens: result.totalTokens,
23892
+ error: result.error,
23893
+ tags: result.tags || []
23894
+ };
23895
+ const { data, error } = await this.client.from("case_results").upsert(dbRecord, { onConflict: "run_id,case_id" }).select("id").single();
23896
+ if (error) {
23897
+ throw new Error(`Failed to save case result: ${error.message}`);
23898
+ }
23899
+ return data?.id || result.caseId;
23900
+ }
23901
+ async saveCaseResults(results) {
23902
+ if (results.length === 0) {
23903
+ return [];
23904
+ }
23905
+ const dbRecords = results.map((r) => ({
23906
+ run_id: r.runId,
23907
+ case_id: r.caseId,
23908
+ case_name: r.caseName,
23909
+ status: r.status,
23910
+ score: r.score,
23911
+ matcher_type: r.matcherType,
23912
+ reason: r.reason,
23913
+ response: r.response,
23914
+ latency_ms: r.latencyMs,
23915
+ prompt_tokens: r.promptTokens,
23916
+ completion_tokens: r.completionTokens,
23917
+ total_tokens: r.totalTokens,
23918
+ error: r.error,
23919
+ tags: r.tags || []
23920
+ }));
23921
+ const { data, error } = await this.client.from("case_results").upsert(dbRecords, { onConflict: "run_id,case_id" }).select("id");
23922
+ if (error) {
23923
+ throw new Error(`Failed to save case results: ${error.message}`);
23924
+ }
23925
+ return (data || []).map((d) => d.id);
23926
+ }
23927
+ async getCaseResults(runId) {
23928
+ const { data, error } = await this.client.from("case_results").select("*").eq("run_id", runId).order("created_at", { ascending: true });
23929
+ if (error) {
23930
+ throw new Error(`Failed to get case results: ${error.message}`);
23931
+ }
23932
+ return (data || []).map((r) => ({
23933
+ id: r.id,
23934
+ runId: r.run_id,
23935
+ caseId: r.case_id,
23936
+ caseName: r.case_name,
23937
+ status: r.status,
23938
+ score: r.score,
23939
+ matcherType: r.matcher_type,
23940
+ reason: r.reason,
23941
+ response: r.response,
23942
+ latencyMs: r.latency_ms,
23943
+ promptTokens: r.prompt_tokens,
23944
+ completionTokens: r.completion_tokens,
23945
+ totalTokens: r.total_tokens,
23946
+ error: r.error,
23947
+ tags: r.tags,
23948
+ createdAt: r.created_at
23949
+ }));
23950
+ }
23951
+ async queryCaseResults(options) {
23952
+ let query = this.client.from("case_results").select("*").order("created_at", { ascending: false });
23953
+ if (options.runId) {
23954
+ query = query.eq("run_id", options.runId);
23955
+ }
23956
+ if (options.caseId) {
23957
+ query = query.eq("case_id", options.caseId);
23958
+ }
23959
+ if (options.status) {
23960
+ query = query.eq("status", options.status);
23961
+ }
23962
+ if (options.tags && options.tags.length > 0) {
23963
+ query = query.overlaps("tags", options.tags);
23964
+ }
23965
+ if (options.offset && options.limit) {
23966
+ query = query.range(options.offset, options.offset + options.limit - 1);
23967
+ } else if (options.limit) {
23968
+ query = query.limit(options.limit);
23969
+ }
23970
+ const { data, error } = await query;
23971
+ if (error) {
23972
+ throw new Error(`Failed to query case results: ${error.message}`);
23973
+ }
23974
+ return (data || []).map((r) => ({
23975
+ id: r.id,
23976
+ runId: r.run_id,
23977
+ caseId: r.case_id,
23978
+ caseName: r.case_name,
23979
+ status: r.status,
23980
+ score: r.score,
23981
+ matcherType: r.matcher_type,
23982
+ reason: r.reason,
23983
+ response: r.response,
23984
+ latencyMs: r.latency_ms,
23985
+ promptTokens: r.prompt_tokens,
23986
+ completionTokens: r.completion_tokens,
23987
+ totalTokens: r.total_tokens,
23988
+ error: r.error,
23989
+ tags: r.tags,
23990
+ createdAt: r.created_at
23991
+ }));
23992
+ }
23993
+ async saveMetricsSnapshot(snapshot) {
23994
+ const dbRecord = {
23995
+ date: snapshot.date,
23996
+ project: snapshot.project,
23997
+ scenario: snapshot.scenario || null,
23998
+ total_runs: snapshot.totalRuns,
23999
+ total_cases: snapshot.totalCases,
24000
+ passed_cases: snapshot.passedCases,
24001
+ failed_cases: snapshot.failedCases,
24002
+ avg_success_rate: snapshot.avgSuccessRate,
24003
+ avg_latency_ms: snapshot.avgLatencyMs,
24004
+ avg_tokens_per_run: snapshot.avgTokensPerRun,
24005
+ min_success_rate: snapshot.minSuccessRate,
24006
+ max_success_rate: snapshot.maxSuccessRate,
24007
+ min_latency_ms: snapshot.minLatencyMs,
24008
+ max_latency_ms: snapshot.maxLatencyMs,
24009
+ total_tokens: snapshot.totalTokens
24010
+ };
24011
+ const { data, error } = await this.client.from("metrics_history").upsert(dbRecord, { onConflict: "date,project,scenario" }).select("id").single();
24012
+ if (error) {
24013
+ throw new Error(`Failed to save metrics snapshot: ${error.message}`);
24014
+ }
24015
+ return data?.id || `${snapshot.date}-${snapshot.project}`;
24016
+ }
24017
+ async getMetricsTrend(options) {
24018
+ let query = this.client.from("metrics_history").select("date, avg_success_rate, avg_latency_ms, total_runs, total_tokens").eq("project", options.project).order("date", { ascending: true });
24019
+ if (options.scenario) {
24020
+ query = query.eq("scenario", options.scenario);
24021
+ } else {
24022
+ query = query.is("scenario", null);
24023
+ }
24024
+ if (options.startDate) {
24025
+ query = query.gte("date", options.startDate);
24026
+ }
24027
+ if (options.endDate) {
24028
+ query = query.lte("date", options.endDate);
24029
+ }
24030
+ if (options.limit) {
24031
+ query = query.limit(options.limit);
24032
+ }
24033
+ const { data, error } = await query;
24034
+ if (error) {
24035
+ throw new Error(`Failed to get metrics trend: ${error.message}`);
24036
+ }
24037
+ return (data || []).map((m) => ({
24038
+ date: m.date,
24039
+ successRate: m.avg_success_rate,
24040
+ latencyMs: m.avg_latency_ms,
24041
+ totalRuns: m.total_runs,
24042
+ totalTokens: m.total_tokens
24043
+ }));
24044
+ }
24045
+ async getMetricsSnapshot(date, project, scenario) {
24046
+ let query = this.client.from("metrics_history").select("*").eq("date", date).eq("project", project);
24047
+ if (scenario) {
24048
+ query = query.eq("scenario", scenario);
24049
+ } else {
24050
+ query = query.is("scenario", null);
24051
+ }
24052
+ const { data, error } = await query.single();
24053
+ if (error || !data) {
24054
+ return null;
24055
+ }
24056
+ return {
24057
+ id: data.id,
24058
+ date: data.date,
24059
+ project: data.project,
24060
+ scenario: data.scenario,
24061
+ totalRuns: data.total_runs,
24062
+ totalCases: data.total_cases,
24063
+ passedCases: data.passed_cases,
24064
+ failedCases: data.failed_cases,
24065
+ avgSuccessRate: data.avg_success_rate,
24066
+ avgLatencyMs: data.avg_latency_ms,
24067
+ avgTokensPerRun: data.avg_tokens_per_run,
24068
+ minSuccessRate: data.min_success_rate,
24069
+ maxSuccessRate: data.max_success_rate,
24070
+ minLatencyMs: data.min_latency_ms,
24071
+ maxLatencyMs: data.max_latency_ms,
24072
+ totalTokens: data.total_tokens,
24073
+ createdAt: data.created_at,
24074
+ updatedAt: data.updated_at
24075
+ };
24076
+ }
24077
+ async aggregateDailyMetrics(date, project, scenario) {
24078
+ const startOfDay = `${date}T00:00:00.000Z`;
24079
+ const endOfDay = `${date}T23:59:59.999Z`;
24080
+ let query = this.client.from("runs").select("*").eq("project", project).gte("started_at", startOfDay).lte("started_at", endOfDay);
24081
+ if (scenario) {
24082
+ query = query.eq("scenario", scenario);
24083
+ }
24084
+ const { data: runs, error } = await query;
24085
+ if (error) {
24086
+ throw new Error(`Failed to aggregate metrics: ${error.message}`);
24087
+ }
24088
+ const runList = runs || [];
24089
+ if (runList.length === 0) {
24090
+ const emptySnapshot = {
24091
+ date,
24092
+ project,
24093
+ scenario,
24094
+ totalRuns: 0,
24095
+ totalCases: 0,
24096
+ passedCases: 0,
24097
+ failedCases: 0,
24098
+ avgSuccessRate: 0,
24099
+ avgLatencyMs: 0,
24100
+ avgTokensPerRun: 0,
24101
+ totalTokens: 0
24102
+ };
24103
+ await this.saveMetricsSnapshot(emptySnapshot);
24104
+ return emptySnapshot;
24105
+ }
24106
+ const totalRuns = runList.length;
24107
+ const totalCases = runList.reduce((sum, r) => sum + r.total_cases, 0);
24108
+ const passedCases = runList.reduce((sum, r) => sum + r.passed_cases, 0);
24109
+ const failedCases = runList.reduce((sum, r) => sum + r.failed_cases, 0);
24110
+ const totalTokens = runList.reduce((sum, r) => sum + r.total_tokens, 0);
24111
+ const successRates = runList.map((r) => r.success_rate);
24112
+ const latencies = runList.map((r) => r.median_latency_ms);
24113
+ const snapshot = {
24114
+ date,
24115
+ project,
24116
+ scenario,
24117
+ totalRuns,
24118
+ totalCases,
24119
+ passedCases,
24120
+ failedCases,
24121
+ avgSuccessRate: successRates.reduce((a, b) => a + b, 0) / totalRuns,
24122
+ avgLatencyMs: latencies.reduce((a, b) => a + b, 0) / totalRuns,
24123
+ avgTokensPerRun: totalTokens / totalRuns,
24124
+ minSuccessRate: Math.min(...successRates),
24125
+ maxSuccessRate: Math.max(...successRates),
24126
+ minLatencyMs: Math.min(...latencies),
24127
+ maxLatencyMs: Math.max(...latencies),
24128
+ totalTokens
24129
+ };
24130
+ await this.saveMetricsSnapshot(snapshot);
24131
+ return snapshot;
24132
+ }
23680
24133
  }
23681
24134
 
23682
24135
  // src/storage/factory.ts
@@ -24751,6 +25204,251 @@ class Logger {
24751
25204
  }
24752
25205
  }
24753
25206
  var logger = new Logger("artemis");
25207
+ // src/validator/validator.ts
25208
+ var import_yaml2 = __toESM(require_dist(), 1);
25209
+ import { readFileSync } from "fs";
25210
+ class ScenarioValidator {
25211
+ _options;
25212
+ constructor(options = {}) {
25213
+ this._options = options;
25214
+ }
25215
+ get options() {
25216
+ return this._options;
25217
+ }
25218
+ validate(filePath) {
25219
+ const errors4 = [];
25220
+ const warnings = [];
25221
+ let content;
25222
+ try {
25223
+ content = readFileSync(filePath, "utf-8");
25224
+ } catch (err) {
25225
+ const error = err;
25226
+ errors4.push({
25227
+ line: 1,
25228
+ message: `Failed to read file: ${error.message}`,
25229
+ rule: "file-read",
25230
+ severity: "error"
25231
+ });
25232
+ return { file: filePath, valid: false, errors: errors4, warnings };
25233
+ }
25234
+ let parsed;
25235
+ try {
25236
+ parsed = import_yaml2.default.parse(content, {
25237
+ prettyErrors: true,
25238
+ strict: true
25239
+ });
25240
+ } catch (err) {
25241
+ if (err instanceof import_yaml2.default.YAMLError) {
25242
+ const linePos = err.linePos?.[0];
25243
+ errors4.push({
25244
+ line: linePos?.line || 1,
25245
+ column: linePos?.col,
25246
+ message: `Invalid YAML syntax: ${err.message}`,
25247
+ rule: "yaml-syntax",
25248
+ severity: "error"
25249
+ });
25250
+ } else {
25251
+ errors4.push({
25252
+ line: 1,
25253
+ message: `YAML parse error: ${err.message}`,
25254
+ rule: "yaml-syntax",
25255
+ severity: "error"
25256
+ });
25257
+ }
25258
+ return { file: filePath, valid: false, errors: errors4, warnings };
25259
+ }
25260
+ if (parsed === null || typeof parsed !== "object") {
25261
+ errors4.push({
25262
+ line: 1,
25263
+ message: "Scenario must be a YAML object",
25264
+ rule: "schema-type",
25265
+ severity: "error"
25266
+ });
25267
+ return { file: filePath, valid: false, errors: errors4, warnings };
25268
+ }
25269
+ const schemaResult = ScenarioSchema.safeParse(parsed);
25270
+ if (!schemaResult.success) {
25271
+ const zodErrors = this.formatZodErrors(schemaResult.error, content);
25272
+ errors4.push(...zodErrors);
25273
+ }
25274
+ if (schemaResult.success) {
25275
+ const semanticErrors = this.validateSemantics(schemaResult.data, content);
25276
+ errors4.push(...semanticErrors);
25277
+ }
25278
+ const detectedWarnings = this.detectWarnings(parsed, content);
25279
+ warnings.push(...detectedWarnings);
25280
+ return {
25281
+ file: filePath,
25282
+ valid: errors4.length === 0,
25283
+ errors: errors4,
25284
+ warnings
25285
+ };
25286
+ }
25287
+ formatZodErrors(error, content) {
25288
+ const issues = [];
25289
+ const lines = content.split(`
25290
+ `);
25291
+ for (const issue of error.issues) {
25292
+ const path = issue.path.join(".");
25293
+ const line = this.findLineForPath(lines, issue.path);
25294
+ let message;
25295
+ switch (issue.code) {
25296
+ case "invalid_type":
25297
+ message = `'${path}' expected ${issue.expected}, received ${issue.received}`;
25298
+ break;
25299
+ case "invalid_enum_value":
25300
+ message = `'${path}' must be one of: ${issue.options.join(", ")}`;
25301
+ break;
25302
+ case "too_small":
25303
+ if (issue.type === "array") {
25304
+ message = `'${path}' must have at least ${issue.minimum} item(s)`;
25305
+ } else {
25306
+ message = `'${path}' is too small`;
25307
+ }
25308
+ break;
25309
+ case "unrecognized_keys":
25310
+ message = `Unrecognized field(s): ${issue.keys.join(", ")}`;
25311
+ break;
25312
+ default:
25313
+ message = issue.message;
25314
+ }
25315
+ issues.push({
25316
+ line,
25317
+ message,
25318
+ rule: `schema-${issue.code}`,
25319
+ severity: "error"
25320
+ });
25321
+ }
25322
+ return issues;
25323
+ }
25324
+ findLineForPath(lines, path) {
25325
+ if (path.length === 0)
25326
+ return 1;
25327
+ const searchKey = String(path[path.length - 1]);
25328
+ for (let i2 = 0;i2 < lines.length; i2++) {
25329
+ const line = lines[i2];
25330
+ if (line.includes(`${searchKey}:`) || line.includes(`- ${searchKey}:`)) {
25331
+ return i2 + 1;
25332
+ }
25333
+ if (typeof path[path.length - 1] === "number" && path.includes("cases")) {
25334
+ if (line.trim().startsWith("- id:")) {
25335
+ return i2 + 1;
25336
+ }
25337
+ }
25338
+ }
25339
+ return 1;
25340
+ }
25341
+ validateSemantics(scenario, content) {
25342
+ const errors4 = [];
25343
+ const lines = content.split(`
25344
+ `);
25345
+ const caseIds = new Set;
25346
+ for (const testCase of scenario.cases) {
25347
+ if (caseIds.has(testCase.id)) {
25348
+ const line = this.findLineForCaseId(lines, testCase.id);
25349
+ errors4.push({
25350
+ line,
25351
+ message: `Duplicate case ID: '${testCase.id}'`,
25352
+ rule: "duplicate-case-id",
25353
+ severity: "error"
25354
+ });
25355
+ }
25356
+ caseIds.add(testCase.id);
25357
+ }
25358
+ const globalVars = scenario.variables || {};
25359
+ for (const testCase of scenario.cases) {
25360
+ const caseVars = testCase.variables || {};
25361
+ const allVars = { ...globalVars, ...caseVars };
25362
+ const prompt2 = typeof testCase.prompt === "string" ? testCase.prompt : JSON.stringify(testCase.prompt);
25363
+ const refs = this.extractVariableRefs(prompt2);
25364
+ for (const ref of refs) {
25365
+ if (!(ref in allVars)) {
25366
+ const line = this.findLineForCaseId(lines, testCase.id);
25367
+ errors4.push({
25368
+ line,
25369
+ message: `Undefined variable '{{${ref}}}' in case '${testCase.id}'`,
25370
+ rule: "undefined-variable",
25371
+ severity: "error",
25372
+ suggestion: `Define '${ref}' in scenario.variables or case.variables`
25373
+ });
25374
+ }
25375
+ }
25376
+ }
25377
+ return errors4;
25378
+ }
25379
+ findLineForCaseId(lines, caseId) {
25380
+ for (let i2 = 0;i2 < lines.length; i2++) {
25381
+ if (lines[i2].includes(`id: ${caseId}`) || lines[i2].includes(`id: "${caseId}"`) || lines[i2].includes(`id: '${caseId}'`)) {
25382
+ return i2 + 1;
25383
+ }
25384
+ }
25385
+ return 1;
25386
+ }
25387
+ extractVariableRefs(text) {
25388
+ const regex2 = /\{\{(\w+)\}\}/g;
25389
+ const refs = [];
25390
+ const matches = text.matchAll(regex2);
25391
+ for (const match of matches) {
25392
+ refs.push(match[1]);
25393
+ }
25394
+ return refs;
25395
+ }
25396
+ detectWarnings(parsed, content) {
25397
+ const warnings = [];
25398
+ const lines = content.split(`
25399
+ `);
25400
+ if (parsed && typeof parsed === "object") {
25401
+ const obj = parsed;
25402
+ if (this.hasDeepKey(obj, "criteria")) {
25403
+ const line = this.findLineForKey(lines, "criteria");
25404
+ warnings.push({
25405
+ line,
25406
+ message: "'criteria' is deprecated, use 'rubric' instead (llm_grader)",
25407
+ rule: "deprecated-field",
25408
+ severity: "warning",
25409
+ suggestion: "Replace 'criteria' with 'rubric'"
25410
+ });
25411
+ }
25412
+ const cases = obj.cases;
25413
+ if (Array.isArray(cases) && cases.length > 20) {
25414
+ warnings.push({
25415
+ line: 1,
25416
+ message: `Scenario has ${cases.length} cases. Consider using --parallel for faster execution.`,
25417
+ rule: "performance-hint",
25418
+ severity: "warning"
25419
+ });
25420
+ }
25421
+ if (!obj.description) {
25422
+ warnings.push({
25423
+ line: 1,
25424
+ message: "Scenario is missing 'description' field. Adding a description improves documentation.",
25425
+ rule: "missing-description",
25426
+ severity: "warning"
25427
+ });
25428
+ }
25429
+ }
25430
+ return warnings;
25431
+ }
25432
+ hasDeepKey(obj, key) {
25433
+ if (obj === null || typeof obj !== "object")
25434
+ return false;
25435
+ if (key in obj)
25436
+ return true;
25437
+ for (const value of Object.values(obj)) {
25438
+ if (this.hasDeepKey(value, key))
25439
+ return true;
25440
+ }
25441
+ return false;
25442
+ }
25443
+ findLineForKey(lines, key) {
25444
+ for (let i2 = 0;i2 < lines.length; i2++) {
25445
+ if (lines[i2].includes(`${key}:`)) {
25446
+ return i2 + 1;
25447
+ }
25448
+ }
25449
+ return 1;
25450
+ }
25451
+ }
24754
25452
  export {
24755
25453
  wrapError,
24756
25454
  validateScenario,
@@ -24798,6 +25496,7 @@ export {
24798
25496
  TestCaseSchema,
24799
25497
  SupabaseStorageAdapter,
24800
25498
  SimilarityEvaluator,
25499
+ ScenarioValidator,
24801
25500
  ScenarioSchema,
24802
25501
  SUPPORTED_EXPRESSIONS,
24803
25502
  RegexEvaluator,