npm - @artemiskit/core - Versions diffs - 0.2.3 → 0.3.0 - Mend

@artemiskit/core 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/CHANGELOG.md +164 -0
package/adapters/openai/dist/index.js +5626 -0
package/dist/adapters/registry.d.ts.map +1 -1
package/dist/adapters/types.d.ts +32 -2
package/dist/adapters/types.d.ts.map +1 -1
package/dist/artifacts/types.d.ts +12 -0
package/dist/artifacts/types.d.ts.map +1 -1
package/dist/index.d.ts +1 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +762 -63
package/dist/scenario/schema.d.ts +116 -84
package/dist/scenario/schema.d.ts.map +1 -1
package/dist/storage/supabase.d.ts +25 -4
package/dist/storage/supabase.d.ts.map +1 -1
package/dist/storage/types.d.ts +162 -0
package/dist/storage/types.d.ts.map +1 -1
package/dist/validator/index.d.ts +6 -0
package/dist/validator/index.d.ts.map +1 -0
package/dist/validator/types.d.ts +58 -0
package/dist/validator/types.d.ts.map +1 -0
package/dist/validator/validator.d.ts +55 -0
package/dist/validator/validator.d.ts.map +1 -0
package/package.json +1 -1
package/src/adapters/registry.ts +38 -0
package/src/adapters/types.ts +38 -0
package/src/artifacts/types.ts +16 -0
package/src/index.ts +3 -0
package/src/scenario/schema.ts +10 -0
package/src/storage/supabase.test.ts +988 -0
package/src/storage/supabase.ts +599 -5
package/src/storage/types.ts +196 -0
package/src/validator/index.ts +6 -0
package/src/validator/types.ts +62 -0
package/src/validator/validator.ts +345 -0

package/dist/index.js CHANGED Viewed

@@ -4,25 +4,43 @@ var __getProtoOf = Object.getPrototypeOf;
 var __defProp = Object.defineProperty;
 var __getOwnPropNames = Object.getOwnPropertyNames;
 var __hasOwnProp = Object.prototype.hasOwnProperty;
+function __accessProp(key) {
+  return this[key];
+}
+var __toESMCache_node;
+var __toESMCache_esm;
 var __toESM = (mod, isNodeMode, target) => {
+  var canCache = mod != null && typeof mod === "object";
+  if (canCache) {
+    var cache = isNodeMode ? __toESMCache_node ??= new WeakMap : __toESMCache_esm ??= new WeakMap;
+    var cached = cache.get(mod);
+    if (cached)
+      return cached;
+  }
   target = mod != null ? __create(__getProtoOf(mod)) : {};
   const to = isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target;
   for (let key of __getOwnPropNames(mod))
     if (!__hasOwnProp.call(to, key))
       __defProp(to, key, {
-        get: () => mod[key],
+        get: __accessProp.bind(mod, key),
         enumerable: true
       });
+  if (canCache)
+    cache.set(mod, to);
   return to;
 };
 var __commonJS = (cb, mod) => () => (mod || cb((mod = { exports: {} }).exports, mod), mod.exports);
+var __returnValue = (v) => v;
+function __exportSetter(name, newValue) {
+  this[name] = __returnValue.bind(null, newValue);
+}
 var __export = (target, all) => {
   for (var name in all)
     __defProp(target, name, {
       get: all[name],
       enumerable: true,
       configurable: true,
-      set: (newValue) => all[name] = () => newValue
+      set: __exportSetter.bind(all, name)
     });
 };
 var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
@@ -10896,6 +10914,55 @@ var require_public_api = __commonJS((exports) => {
   exports.stringify = stringify;
 });
+// ../../node_modules/.bun/yaml@2.8.2/node_modules/yaml/dist/index.js
+var require_dist = __commonJS((exports) => {
+  var composer = require_composer();
+  var Document = require_Document();
+  var Schema = require_Schema();
+  var errors2 = require_errors();
+  var Alias = require_Alias();
+  var identity = require_identity();
+  var Pair = require_Pair();
+  var Scalar = require_Scalar();
+  var YAMLMap = require_YAMLMap();
+  var YAMLSeq = require_YAMLSeq();
+  var cst = require_cst();
+  var lexer = require_lexer();
+  var lineCounter = require_line_counter();
+  var parser = require_parser();
+  var publicApi = require_public_api();
+  var visit = require_visit();
+  exports.Composer = composer.Composer;
+  exports.Document = Document.Document;
+  exports.Schema = Schema.Schema;
+  exports.YAMLError = errors2.YAMLError;
+  exports.YAMLParseError = errors2.YAMLParseError;
+  exports.YAMLWarning = errors2.YAMLWarning;
+  exports.Alias = Alias.Alias;
+  exports.isAlias = identity.isAlias;
+  exports.isCollection = identity.isCollection;
+  exports.isDocument = identity.isDocument;
+  exports.isMap = identity.isMap;
+  exports.isNode = identity.isNode;
+  exports.isPair = identity.isPair;
+  exports.isScalar = identity.isScalar;
+  exports.isSeq = identity.isSeq;
+  exports.Pair = Pair.Pair;
+  exports.Scalar = Scalar.Scalar;
+  exports.YAMLMap = YAMLMap.YAMLMap;
+  exports.YAMLSeq = YAMLSeq.YAMLSeq;
+  exports.CST = cst;
+  exports.Lexer = lexer.Lexer;
+  exports.LineCounter = lineCounter.LineCounter;
+  exports.Parser = parser.Parser;
+  exports.parse = publicApi.parse;
+  exports.parseAllDocuments = publicApi.parseAllDocuments;
+  exports.parseDocument = publicApi.parseDocument;
+  exports.stringify = publicApi.stringify;
+  exports.visit = visit.visit;
+  exports.visitAsync = visit.visitAsync;
+});
 // src/evaluators/combined.ts
 async function getEvaluatorForType(type) {
   const { getEvaluator } = await Promise.resolve().then(() => (init_evaluators(), exports_evaluators));
@@ -13401,6 +13468,22 @@ async function registerBuiltInAdapters() {
     const mod = await tryImport("@artemiskit/adapter-anthropic");
     return new mod.AnthropicAdapter(config);
   });
+  adapterRegistry.register("langchain", async (config) => {
+    const mod = await tryImport("@artemiskit/adapter-langchain");
+    const runnable = config.metadata?.runnable;
+    if (!runnable) {
+      throw new Error("LangChain adapter requires a runnable instance. Pass it via config.metadata.runnable or use createLangChainAdapter() directly.");
+    }
+    return new mod.LangChainAdapter(config, runnable);
+  });
+  adapterRegistry.register("deepagents", async (config) => {
+    const mod = await tryImport("@artemiskit/adapter-deepagents");
+    const system = config.metadata?.system;
+    if (!system) {
+      throw new Error("DeepAgents adapter requires a system instance. Pass it via config.metadata.system or use createDeepAgentsAdapter() directly.");
+    }
+    return new mod.DeepAgentsAdapter(config, system);
+  });
   adapterRegistry.markUnavailable("google", "Google adapter coming in v0.3.0");
   adapterRegistry.markUnavailable("mistral", "Mistral adapter coming in v0.3.0");
   adapterRegistry.markUnavailable("ollama", "Ollama adapter coming in v0.3.0");
@@ -13494,6 +13577,8 @@ var ProviderSchema = exports_external.enum([
   "cohere",
   "huggingface",
   "ollama",
+  "langchain",
+  "deepagents",
   "custom"
 ]);
 var ProviderConfigSchema = exports_external.object({
@@ -13508,7 +13593,11 @@ var ProviderConfigSchema = exports_external.object({
   apiVersion: exports_external.string().optional(),
   embeddingDeploymentName: exports_external.string().optional(),
   modelFamily: exports_external.string().optional(),
-  underlyingProvider: exports_external.enum(["openai", "azure", "anthropic", "google", "mistral"]).optional()
+  underlyingProvider: exports_external.enum(["openai", "azure", "anthropic", "google", "mistral"]).optional(),
+  name: exports_external.string().optional(),
+  runnableType: exports_external.enum(["chain", "agent", "llm", "runnable"]).optional(),
+  captureTraces: exports_external.boolean().optional(),
+  captureMessages: exports_external.boolean().optional()
 }).optional();
 var BaseExpectedSchema = exports_external.discriminatedUnion("type", [
   exports_external.object({
@@ -13616,55 +13705,8 @@ var ScenarioSchema = exports_external.object({
   }).optional()
 });
 // src/scenario/parser.ts
+var import_yaml = __toESM(require_dist(), 1);
 import { readFile } from "fs/promises";
-// ../../node_modules/.bun/yaml@2.8.2/node_modules/yaml/dist/index.js
-var composer = require_composer();
-var Document = require_Document();
-var Schema = require_Schema();
-var errors2 = require_errors();
-var Alias = require_Alias();
-var identity = require_identity();
-var Pair = require_Pair();
-var Scalar = require_Scalar();
-var YAMLMap = require_YAMLMap();
-var YAMLSeq = require_YAMLSeq();
-var cst = require_cst();
-var lexer = require_lexer();
-var lineCounter = require_line_counter();
-var parser = require_parser();
-var publicApi = require_public_api();
-var visit = require_visit();
-var $Composer = composer.Composer;
-var $Document = Document.Document;
-var $Schema = Schema.Schema;
-var $YAMLError = errors2.YAMLError;
-var $YAMLParseError = errors2.YAMLParseError;
-var $YAMLWarning = errors2.YAMLWarning;
-var $Alias = Alias.Alias;
-var $isAlias = identity.isAlias;
-var $isCollection = identity.isCollection;
-var $isDocument = identity.isDocument;
-var $isMap = identity.isMap;
-var $isNode = identity.isNode;
-var $isPair = identity.isPair;
-var $isScalar = identity.isScalar;
-var $isSeq = identity.isSeq;
-var $Pair = Pair.Pair;
-var $Scalar = Scalar.Scalar;
-var $YAMLMap = YAMLMap.YAMLMap;
-var $YAMLSeq = YAMLSeq.YAMLSeq;
-var $Lexer = lexer.Lexer;
-var $LineCounter = lineCounter.LineCounter;
-var $Parser = parser.Parser;
-var $parse = publicApi.parse;
-var $parseAllDocuments = publicApi.parseAllDocuments;
-var $parseDocument = publicApi.parseDocument;
-var $stringify = publicApi.stringify;
-var $visit = visit.visit;
-var $visitAsync = visit.visitAsync;
-// src/scenario/parser.ts
 function expandEnvVars(obj) {
   if (typeof obj === "string") {
     return obj.replace(/\$\{([^}]+)\}/g, (_, expr) => {
@@ -13699,7 +13741,7 @@ async function parseScenarioFile(filePath) {
 }
 function parseScenarioString(content, source) {
   try {
-    const raw = $parse(content);
+    const raw = import_yaml.parse(content);
     const expanded = expandEnvVars(raw);
     const result = ScenarioSchema.safeParse(expanded);
     if (!result.success) {
@@ -16929,7 +16971,7 @@ class RealtimeChannel {
       }).map((bind) => {
         if (typeof handledPayload === "object" && "ids" in handledPayload) {
           const postgresChanges = handledPayload.data;
-          const { schema: schema2, table, commit_timestamp, type: type2, errors: errors3 } = postgresChanges;
+          const { schema: schema2, table, commit_timestamp, type: type2, errors: errors2 } = postgresChanges;
           const enrichedPayload = {
             schema: schema2,
             table,
@@ -16937,7 +16979,7 @@ class RealtimeChannel {
             eventType: type2,
             new: {},
             old: {},
-            errors: errors3
+            errors: errors2
           };
           handledPayload = Object.assign(Object.assign({}, enrichedPayload), this._getPayloadRecords(postgresChanges));
         }
@@ -22508,7 +22550,7 @@ class GoTrueClient {
       }
     });
   }
-  async unlinkIdentity(identity2) {
+  async unlinkIdentity(identity) {
     try {
       return await this._useSession(async (result) => {
         var _a, _b;
@@ -22516,7 +22558,7 @@ class GoTrueClient {
         if (error) {
           throw error;
         }
-        return await _request(this.fetch, "DELETE", `${this.url}/user/identities/${identity2.identity_id}`, {
+        return await _request(this.fetch, "DELETE", `${this.url}/user/identities/${identity.identity_id}`, {
           headers: this.headers,
           jwt: (_b = (_a = data.session) === null || _a === undefined ? undefined : _a.access_token) !== null && _b !== undefined ? _b : undefined
         });
@@ -22690,20 +22732,20 @@ class GoTrueClient {
       if (this.broadcastChannel && broadcast) {
         this.broadcastChannel.postMessage({ event, session });
       }
-      const errors3 = [];
+      const errors2 = [];
       const promises = Array.from(this.stateChangeEmitters.values()).map(async (x) => {
         try {
           await x.callback(event, session);
         } catch (e) {
-          errors3.push(e);
+          errors2.push(e);
         }
       });
       await Promise.all(promises);
-      if (errors3.length > 0) {
-        for (let i = 0;i < errors3.length; i += 1) {
-          console.error(errors3[i]);
+      if (errors2.length > 0) {
+        for (let i = 0;i < errors2.length; i += 1) {
+          console.error(errors2[i]);
         }
-        throw errors3[0];
+        throw errors2[0];
       }
     } finally {
       this._debug(debugName, "end");
@@ -23578,12 +23620,33 @@ if (shouldShowDeprecationWarning())
   console.warn("\u26A0\uFE0F  Node.js 18 and below are deprecated and will no longer be supported in future versions of @supabase/supabase-js. Please upgrade to Node.js 20 or later. For more information, visit: https://github.com/orgs/supabase/discussions/37217");
 // src/storage/supabase.ts
+function mapCaseToRecord(runId, caseResult) {
+  return {
+    runId,
+    caseId: caseResult.id,
+    caseName: caseResult.name,
+    status: caseResult.error ? "error" : caseResult.ok ? "passed" : "failed",
+    score: caseResult.score,
+    matcherType: caseResult.matcherType,
+    reason: caseResult.reason,
+    response: caseResult.response,
+    latencyMs: caseResult.latencyMs,
+    promptTokens: caseResult.tokens.prompt,
+    completionTokens: caseResult.tokens.completion,
+    totalTokens: caseResult.tokens.total,
+    error: caseResult.error,
+    tags: caseResult.tags
+  };
+}
 class SupabaseStorageAdapter {
   client;
   bucket;
-  constructor(config) {
+  project;
+  constructor(config, project) {
     this.client = createClient(config.url, config.anonKey);
     this.bucket = config.bucket || "artemis-runs";
+    this.project = project || "default";
   }
   async save(manifest) {
     const filePath = `${manifest.project}/${manifest.run_id}.json`;
@@ -23619,6 +23682,10 @@ class SupabaseStorageAdapter {
     if (dbError) {
       throw new Error(`Failed to save run metadata: ${dbError.message}`);
     }
+    if (manifest.cases && manifest.cases.length > 0) {
+      const caseRecords = manifest.cases.map((c) => mapCaseToRecord(manifest.run_id, c));
+      await this.saveCaseResults(caseRecords);
+    }
     return filePath;
   }
   async load(runId) {
@@ -23677,6 +23744,392 @@ class SupabaseStorageAdapter {
       }
     };
   }
+  async setBaseline(scenario, runId, tag) {
+    const { data: run, error: runError } = await this.client.from("runs").select("*").eq("run_id", runId).single();
+    if (runError || !run) {
+      throw new Error(`Run not found: ${runId}`);
+    }
+    const baselineData = {
+      project: run.project,
+      scenario,
+      run_id: runId,
+      success_rate: run.success_rate,
+      median_latency_ms: run.median_latency_ms,
+      total_tokens: run.total_tokens,
+      passed_cases: run.passed_cases,
+      failed_cases: run.failed_cases,
+      total_cases: run.total_cases,
+      tag,
+      created_by: run.run_by
+    };
+    const { error } = await this.client.from("baselines").upsert(baselineData, {
+      onConflict: "project,scenario"
+    });
+    if (error) {
+      throw new Error(`Failed to set baseline: ${error.message}`);
+    }
+    return {
+      scenario,
+      runId,
+      createdAt: new Date().toISOString(),
+      metrics: {
+        successRate: run.success_rate,
+        medianLatencyMs: run.median_latency_ms,
+        totalTokens: run.total_tokens,
+        passedCases: run.passed_cases,
+        failedCases: run.failed_cases,
+        totalCases: run.total_cases
+      },
+      tag
+    };
+  }
+  async getBaseline(scenario) {
+    const { data, error } = await this.client.from("baselines").select("*").eq("project", this.project).eq("scenario", scenario).single();
+    if (error || !data) {
+      return null;
+    }
+    return {
+      scenario: data.scenario,
+      runId: data.run_id,
+      createdAt: data.created_at,
+      metrics: {
+        successRate: data.success_rate,
+        medianLatencyMs: data.median_latency_ms,
+        totalTokens: data.total_tokens,
+        passedCases: data.passed_cases,
+        failedCases: data.failed_cases,
+        totalCases: data.total_cases
+      },
+      tag: data.tag
+    };
+  }
+  async getBaselineByRunId(runId) {
+    const { data, error } = await this.client.from("baselines").select("*").eq("run_id", runId).single();
+    if (error || !data) {
+      return null;
+    }
+    return {
+      scenario: data.scenario,
+      runId: data.run_id,
+      createdAt: data.created_at,
+      metrics: {
+        successRate: data.success_rate,
+        medianLatencyMs: data.median_latency_ms,
+        totalTokens: data.total_tokens,
+        passedCases: data.passed_cases,
+        failedCases: data.failed_cases,
+        totalCases: data.total_cases
+      },
+      tag: data.tag
+    };
+  }
+  async listBaselines() {
+    const { data, error } = await this.client.from("baselines").select("*").eq("project", this.project).order("created_at", { ascending: false });
+    if (error) {
+      throw new Error(`Failed to list baselines: ${error.message}`);
+    }
+    return (data || []).map((b) => ({
+      scenario: b.scenario,
+      runId: b.run_id,
+      createdAt: b.created_at,
+      metrics: {
+        successRate: b.success_rate,
+        medianLatencyMs: b.median_latency_ms,
+        totalTokens: b.total_tokens,
+        passedCases: b.passed_cases,
+        failedCases: b.failed_cases,
+        totalCases: b.total_cases
+      },
+      tag: b.tag
+    }));
+  }
+  async removeBaseline(scenario) {
+    const { error, count } = await this.client.from("baselines").delete().eq("project", this.project).eq("scenario", scenario);
+    if (error) {
+      throw new Error(`Failed to remove baseline: ${error.message}`);
+    }
+    return (count ?? 0) > 0;
+  }
+  async removeBaselineByRunId(runId) {
+    const { error, count } = await this.client.from("baselines").delete().eq("run_id", runId);
+    if (error) {
+      throw new Error(`Failed to remove baseline: ${error.message}`);
+    }
+    return (count ?? 0) > 0;
+  }
+  async compareToBaseline(runId, regressionThreshold = 0.05) {
+    const { data: run, error: runError } = await this.client.from("runs").select("scenario").eq("run_id", runId).single();
+    if (runError || !run) {
+      return null;
+    }
+    const baseline = await this.getBaseline(run.scenario);
+    if (!baseline) {
+      return null;
+    }
+    const comparison = await this.compare(baseline.runId, runId);
+    const hasRegression = comparison.delta.successRate < -regressionThreshold;
+    return {
+      baseline,
+      comparison,
+      hasRegression,
+      regressionThreshold
+    };
+  }
+  async saveCaseResult(result) {
+    const dbRecord = {
+      run_id: result.runId,
+      case_id: result.caseId,
+      case_name: result.caseName,
+      status: result.status,
+      score: result.score,
+      matcher_type: result.matcherType,
+      reason: result.reason,
+      response: result.response,
+      latency_ms: result.latencyMs,
+      prompt_tokens: result.promptTokens,
+      completion_tokens: result.completionTokens,
+      total_tokens: result.totalTokens,
+      error: result.error,
+      tags: result.tags || []
+    };
+    const { data, error } = await this.client.from("case_results").upsert(dbRecord, { onConflict: "run_id,case_id" }).select("id").single();
+    if (error) {
+      throw new Error(`Failed to save case result: ${error.message}`);
+    }
+    return data?.id || result.caseId;
+  }
+  async saveCaseResults(results) {
+    if (results.length === 0) {
+      return [];
+    }
+    const dbRecords = results.map((r) => ({
+      run_id: r.runId,
+      case_id: r.caseId,
+      case_name: r.caseName,
+      status: r.status,
+      score: r.score,
+      matcher_type: r.matcherType,
+      reason: r.reason,
+      response: r.response,
+      latency_ms: r.latencyMs,
+      prompt_tokens: r.promptTokens,
+      completion_tokens: r.completionTokens,
+      total_tokens: r.totalTokens,
+      error: r.error,
+      tags: r.tags || []
+    }));
+    const { data, error } = await this.client.from("case_results").upsert(dbRecords, { onConflict: "run_id,case_id" }).select("id");
+    if (error) {
+      throw new Error(`Failed to save case results: ${error.message}`);
+    }
+    return (data || []).map((d) => d.id);
+  }
+  async getCaseResults(runId) {
+    const { data, error } = await this.client.from("case_results").select("*").eq("run_id", runId).order("created_at", { ascending: true });
+    if (error) {
+      throw new Error(`Failed to get case results: ${error.message}`);
+    }
+    return (data || []).map((r) => ({
+      id: r.id,
+      runId: r.run_id,
+      caseId: r.case_id,
+      caseName: r.case_name,
+      status: r.status,
+      score: r.score,
+      matcherType: r.matcher_type,
+      reason: r.reason,
+      response: r.response,
+      latencyMs: r.latency_ms,
+      promptTokens: r.prompt_tokens,
+      completionTokens: r.completion_tokens,
+      totalTokens: r.total_tokens,
+      error: r.error,
+      tags: r.tags,
+      createdAt: r.created_at
+    }));
+  }
+  async queryCaseResults(options) {
+    let query = this.client.from("case_results").select("*").order("created_at", { ascending: false });
+    if (options.runId) {
+      query = query.eq("run_id", options.runId);
+    }
+    if (options.caseId) {
+      query = query.eq("case_id", options.caseId);
+    }
+    if (options.status) {
+      query = query.eq("status", options.status);
+    }
+    if (options.tags && options.tags.length > 0) {
+      query = query.overlaps("tags", options.tags);
+    }
+    if (options.offset && options.limit) {
+      query = query.range(options.offset, options.offset + options.limit - 1);
+    } else if (options.limit) {
+      query = query.limit(options.limit);
+    }
+    const { data, error } = await query;
+    if (error) {
+      throw new Error(`Failed to query case results: ${error.message}`);
+    }
+    return (data || []).map((r) => ({
+      id: r.id,
+      runId: r.run_id,
+      caseId: r.case_id,
+      caseName: r.case_name,
+      status: r.status,
+      score: r.score,
+      matcherType: r.matcher_type,
+      reason: r.reason,
+      response: r.response,
+      latencyMs: r.latency_ms,
+      promptTokens: r.prompt_tokens,
+      completionTokens: r.completion_tokens,
+      totalTokens: r.total_tokens,
+      error: r.error,
+      tags: r.tags,
+      createdAt: r.created_at
+    }));
+  }
+  async saveMetricsSnapshot(snapshot) {
+    const dbRecord = {
+      date: snapshot.date,
+      project: snapshot.project,
+      scenario: snapshot.scenario || null,
+      total_runs: snapshot.totalRuns,
+      total_cases: snapshot.totalCases,
+      passed_cases: snapshot.passedCases,
+      failed_cases: snapshot.failedCases,
+      avg_success_rate: snapshot.avgSuccessRate,
+      avg_latency_ms: snapshot.avgLatencyMs,
+      avg_tokens_per_run: snapshot.avgTokensPerRun,
+      min_success_rate: snapshot.minSuccessRate,
+      max_success_rate: snapshot.maxSuccessRate,
+      min_latency_ms: snapshot.minLatencyMs,
+      max_latency_ms: snapshot.maxLatencyMs,
+      total_tokens: snapshot.totalTokens
+    };
+    const { data, error } = await this.client.from("metrics_history").upsert(dbRecord, { onConflict: "date,project,scenario" }).select("id").single();
+    if (error) {
+      throw new Error(`Failed to save metrics snapshot: ${error.message}`);
+    }
+    return data?.id || `${snapshot.date}-${snapshot.project}`;
+  }
+  async getMetricsTrend(options) {
+    let query = this.client.from("metrics_history").select("date, avg_success_rate, avg_latency_ms, total_runs, total_tokens").eq("project", options.project).order("date", { ascending: true });
+    if (options.scenario) {
+      query = query.eq("scenario", options.scenario);
+    } else {
+      query = query.is("scenario", null);
+    }
+    if (options.startDate) {
+      query = query.gte("date", options.startDate);
+    }
+    if (options.endDate) {
+      query = query.lte("date", options.endDate);
+    }
+    if (options.limit) {
+      query = query.limit(options.limit);
+    }
+    const { data, error } = await query;
+    if (error) {
+      throw new Error(`Failed to get metrics trend: ${error.message}`);
+    }
+    return (data || []).map((m) => ({
+      date: m.date,
+      successRate: m.avg_success_rate,
+      latencyMs: m.avg_latency_ms,
+      totalRuns: m.total_runs,
+      totalTokens: m.total_tokens
+    }));
+  }
+  async getMetricsSnapshot(date, project, scenario) {
+    let query = this.client.from("metrics_history").select("*").eq("date", date).eq("project", project);
+    if (scenario) {
+      query = query.eq("scenario", scenario);
+    } else {
+      query = query.is("scenario", null);
+    }
+    const { data, error } = await query.single();
+    if (error || !data) {
+      return null;
+    }
+    return {
+      id: data.id,
+      date: data.date,
+      project: data.project,
+      scenario: data.scenario,
+      totalRuns: data.total_runs,
+      totalCases: data.total_cases,
+      passedCases: data.passed_cases,
+      failedCases: data.failed_cases,
+      avgSuccessRate: data.avg_success_rate,
+      avgLatencyMs: data.avg_latency_ms,
+      avgTokensPerRun: data.avg_tokens_per_run,
+      minSuccessRate: data.min_success_rate,
+      maxSuccessRate: data.max_success_rate,
+      minLatencyMs: data.min_latency_ms,
+      maxLatencyMs: data.max_latency_ms,
+      totalTokens: data.total_tokens,
+      createdAt: data.created_at,
+      updatedAt: data.updated_at
+    };
+  }
+  async aggregateDailyMetrics(date, project, scenario) {
+    const startOfDay = `${date}T00:00:00.000Z`;
+    const endOfDay = `${date}T23:59:59.999Z`;
+    let query = this.client.from("runs").select("*").eq("project", project).gte("started_at", startOfDay).lte("started_at", endOfDay);
+    if (scenario) {
+      query = query.eq("scenario", scenario);
+    }
+    const { data: runs, error } = await query;
+    if (error) {
+      throw new Error(`Failed to aggregate metrics: ${error.message}`);
+    }
+    const runList = runs || [];
+    if (runList.length === 0) {
+      const emptySnapshot = {
+        date,
+        project,
+        scenario,
+        totalRuns: 0,
+        totalCases: 0,
+        passedCases: 0,
+        failedCases: 0,
+        avgSuccessRate: 0,
+        avgLatencyMs: 0,
+        avgTokensPerRun: 0,
+        totalTokens: 0
+      };
+      await this.saveMetricsSnapshot(emptySnapshot);
+      return emptySnapshot;
+    }
+    const totalRuns = runList.length;
+    const totalCases = runList.reduce((sum, r) => sum + r.total_cases, 0);
+    const passedCases = runList.reduce((sum, r) => sum + r.passed_cases, 0);
+    const failedCases = runList.reduce((sum, r) => sum + r.failed_cases, 0);
+    const totalTokens = runList.reduce((sum, r) => sum + r.total_tokens, 0);
+    const successRates = runList.map((r) => r.success_rate);
+    const latencies = runList.map((r) => r.median_latency_ms);
+    const snapshot = {
+      date,
+      project,
+      scenario,
+      totalRuns,
+      totalCases,
+      passedCases,
+      failedCases,
+      avgSuccessRate: successRates.reduce((a, b) => a + b, 0) / totalRuns,
+      avgLatencyMs: latencies.reduce((a, b) => a + b, 0) / totalRuns,
+      avgTokensPerRun: totalTokens / totalRuns,
+      minSuccessRate: Math.min(...successRates),
+      maxSuccessRate: Math.max(...successRates),
+      minLatencyMs: Math.min(...latencies),
+      maxLatencyMs: Math.max(...latencies),
+      totalTokens
+    };
+    await this.saveMetricsSnapshot(snapshot);
+    return snapshot;
+  }
 }
 // src/storage/factory.ts
@@ -24751,6 +25204,251 @@ class Logger {
   }
 }
 var logger = new Logger("artemis");
+// src/validator/validator.ts
+var import_yaml2 = __toESM(require_dist(), 1);
+import { readFileSync } from "fs";
+class ScenarioValidator {
+  _options;
+  constructor(options = {}) {
+    this._options = options;
+  }
+  get options() {
+    return this._options;
+  }
+  validate(filePath) {
+    const errors4 = [];
+    const warnings = [];
+    let content;
+    try {
+      content = readFileSync(filePath, "utf-8");
+    } catch (err) {
+      const error = err;
+      errors4.push({
+        line: 1,
+        message: `Failed to read file: ${error.message}`,
+        rule: "file-read",
+        severity: "error"
+      });
+      return { file: filePath, valid: false, errors: errors4, warnings };
+    }
+    let parsed;
+    try {
+      parsed = import_yaml2.default.parse(content, {
+        prettyErrors: true,
+        strict: true
+      });
+    } catch (err) {
+      if (err instanceof import_yaml2.default.YAMLError) {
+        const linePos = err.linePos?.[0];
+        errors4.push({
+          line: linePos?.line || 1,
+          column: linePos?.col,
+          message: `Invalid YAML syntax: ${err.message}`,
+          rule: "yaml-syntax",
+          severity: "error"
+        });
+      } else {
+        errors4.push({
+          line: 1,
+          message: `YAML parse error: ${err.message}`,
+          rule: "yaml-syntax",
+          severity: "error"
+        });
+      }
+      return { file: filePath, valid: false, errors: errors4, warnings };
+    }
+    if (parsed === null || typeof parsed !== "object") {
+      errors4.push({
+        line: 1,
+        message: "Scenario must be a YAML object",
+        rule: "schema-type",
+        severity: "error"
+      });
+      return { file: filePath, valid: false, errors: errors4, warnings };
+    }
+    const schemaResult = ScenarioSchema.safeParse(parsed);
+    if (!schemaResult.success) {
+      const zodErrors = this.formatZodErrors(schemaResult.error, content);
+      errors4.push(...zodErrors);
+    }
+    if (schemaResult.success) {
+      const semanticErrors = this.validateSemantics(schemaResult.data, content);
+      errors4.push(...semanticErrors);
+    }
+    const detectedWarnings = this.detectWarnings(parsed, content);
+    warnings.push(...detectedWarnings);
+    return {
+      file: filePath,
+      valid: errors4.length === 0,
+      errors: errors4,
+      warnings
+    };
+  }
+  formatZodErrors(error, content) {
+    const issues = [];
+    const lines = content.split(`
+`);
+    for (const issue of error.issues) {
+      const path = issue.path.join(".");
+      const line = this.findLineForPath(lines, issue.path);
+      let message;
+      switch (issue.code) {
+        case "invalid_type":
+          message = `'${path}' expected ${issue.expected}, received ${issue.received}`;
+          break;
+        case "invalid_enum_value":
+          message = `'${path}' must be one of: ${issue.options.join(", ")}`;
+          break;
+        case "too_small":
+          if (issue.type === "array") {
+            message = `'${path}' must have at least ${issue.minimum} item(s)`;
+          } else {
+            message = `'${path}' is too small`;
+          }
+          break;
+        case "unrecognized_keys":
+          message = `Unrecognized field(s): ${issue.keys.join(", ")}`;
+          break;
+        default:
+          message = issue.message;
+      }
+      issues.push({
+        line,
+        message,
+        rule: `schema-${issue.code}`,
+        severity: "error"
+      });
+    }
+    return issues;
+  }
+  findLineForPath(lines, path) {
+    if (path.length === 0)
+      return 1;
+    const searchKey = String(path[path.length - 1]);
+    for (let i2 = 0;i2 < lines.length; i2++) {
+      const line = lines[i2];
+      if (line.includes(`${searchKey}:`) || line.includes(`- ${searchKey}:`)) {
+        return i2 + 1;
+      }
+      if (typeof path[path.length - 1] === "number" && path.includes("cases")) {
+        if (line.trim().startsWith("- id:")) {
+          return i2 + 1;
+        }
+      }
+    }
+    return 1;
+  }
+  validateSemantics(scenario, content) {
+    const errors4 = [];
+    const lines = content.split(`
+`);
+    const caseIds = new Set;
+    for (const testCase of scenario.cases) {
+      if (caseIds.has(testCase.id)) {
+        const line = this.findLineForCaseId(lines, testCase.id);
+        errors4.push({
+          line,
+          message: `Duplicate case ID: '${testCase.id}'`,
+          rule: "duplicate-case-id",
+          severity: "error"
+        });
+      }
+      caseIds.add(testCase.id);
+    }
+    const globalVars = scenario.variables || {};
+    for (const testCase of scenario.cases) {
+      const caseVars = testCase.variables || {};
+      const allVars = { ...globalVars, ...caseVars };
+      const prompt2 = typeof testCase.prompt === "string" ? testCase.prompt : JSON.stringify(testCase.prompt);
+      const refs = this.extractVariableRefs(prompt2);
+      for (const ref of refs) {
+        if (!(ref in allVars)) {
+          const line = this.findLineForCaseId(lines, testCase.id);
+          errors4.push({
+            line,
+            message: `Undefined variable '{{${ref}}}' in case '${testCase.id}'`,
+            rule: "undefined-variable",
+            severity: "error",
+            suggestion: `Define '${ref}' in scenario.variables or case.variables`
+          });
+        }
+      }
+    }
+    return errors4;
+  }
+  findLineForCaseId(lines, caseId) {
+    for (let i2 = 0;i2 < lines.length; i2++) {
+      if (lines[i2].includes(`id: ${caseId}`) || lines[i2].includes(`id: "${caseId}"`) || lines[i2].includes(`id: '${caseId}'`)) {
+        return i2 + 1;
+      }
+    }
+    return 1;
+  }
+  extractVariableRefs(text) {
+    const regex2 = /\{\{(\w+)\}\}/g;
+    const refs = [];
+    const matches = text.matchAll(regex2);
+    for (const match of matches) {
+      refs.push(match[1]);
+    }
+    return refs;
+  }
+  detectWarnings(parsed, content) {
+    const warnings = [];
+    const lines = content.split(`
+`);
+    if (parsed && typeof parsed === "object") {
+      const obj = parsed;
+      if (this.hasDeepKey(obj, "criteria")) {
+        const line = this.findLineForKey(lines, "criteria");
+        warnings.push({
+          line,
+          message: "'criteria' is deprecated, use 'rubric' instead (llm_grader)",
+          rule: "deprecated-field",
+          severity: "warning",
+          suggestion: "Replace 'criteria' with 'rubric'"
+        });
+      }
+      const cases = obj.cases;
+      if (Array.isArray(cases) && cases.length > 20) {
+        warnings.push({
+          line: 1,
+          message: `Scenario has ${cases.length} cases. Consider using --parallel for faster execution.`,
+          rule: "performance-hint",
+          severity: "warning"
+        });
+      }
+      if (!obj.description) {
+        warnings.push({
+          line: 1,
+          message: "Scenario is missing 'description' field. Adding a description improves documentation.",
+          rule: "missing-description",
+          severity: "warning"
+        });
+      }
+    }
+    return warnings;
+  }
+  hasDeepKey(obj, key) {
+    if (obj === null || typeof obj !== "object")
+      return false;
+    if (key in obj)
+      return true;
+    for (const value of Object.values(obj)) {
+      if (this.hasDeepKey(value, key))
+        return true;
+    }
+    return false;
+  }
+  findLineForKey(lines, key) {
+    for (let i2 = 0;i2 < lines.length; i2++) {
+      if (lines[i2].includes(`${key}:`)) {
+        return i2 + 1;
+      }
+    }
+    return 1;
+  }
+}
 export {
   wrapError,
   validateScenario,
@@ -24798,6 +25496,7 @@ export {
   TestCaseSchema,
   SupabaseStorageAdapter,
   SimilarityEvaluator,
+  ScenarioValidator,
   ScenarioSchema,
   SUPPORTED_EXPRESSIONS,
   RegexEvaluator,