@artemiskit/core 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,42 @@
1
1
  # @artemiskit/core
2
2
 
3
+ ## 0.2.4
4
+
5
+ ### Patch Changes
6
+
7
+ - 16604a6: ## New Features
8
+
9
+ ### Validate Command
10
+
11
+ New `artemiskit validate` command for validating scenario files without running them:
12
+
13
+ - **YAML syntax validation** - Catches formatting errors
14
+ - **Schema validation** - Validates against ArtemisKit schema using Zod
15
+ - **Semantic validation** - Detects duplicate case IDs, undefined variables
16
+ - **Warnings** - Identifies deprecated fields, missing descriptions, performance hints
17
+
18
+ Options:
19
+
20
+ - `--json` - Output results as JSON
21
+ - `--strict` - Treat warnings as errors
22
+ - `--quiet` - Only show errors
23
+ - `--export junit` - Export to JUnit XML for CI integration
24
+
25
+ ### JUnit XML Export
26
+
27
+ Added JUnit XML export support for CI/CD integration with Jenkins, GitHub Actions, GitLab CI, and other systems:
28
+
29
+ - `akit run scenarios/ --export junit` - Export run results
30
+ - `akit redteam scenarios/chatbot.yaml --export junit` - Export security test results
31
+ - `akit validate scenarios/ --export junit` - Export validation results
32
+
33
+ JUnit reports include:
34
+
35
+ - Test suite metadata (run ID, provider, model, success rate)
36
+ - Individual test cases with pass/fail status
37
+ - Failure details with matcher type and expected values
38
+ - Timing information for each test
39
+
3
40
  ## 0.2.3
4
41
 
5
42
  ### Patch Changes
package/dist/index.d.ts CHANGED
@@ -12,4 +12,5 @@ export * from './provenance';
12
12
  export * from './utils';
13
13
  export * from './redaction';
14
14
  export * from './cost';
15
+ export * from './validator';
15
16
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,cAAc,YAAY,CAAC;AAG3B,cAAc,YAAY,CAAC;AAG3B,cAAc,cAAc,CAAC;AAG7B,cAAc,UAAU,CAAC;AAGzB,cAAc,WAAW,CAAC;AAG1B,cAAc,aAAa,CAAC;AAG5B,cAAc,cAAc,CAAC;AAG7B,cAAc,SAAS,CAAC;AAGxB,cAAc,aAAa,CAAC;AAG5B,cAAc,QAAQ,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,cAAc,YAAY,CAAC;AAG3B,cAAc,YAAY,CAAC;AAG3B,cAAc,cAAc,CAAC;AAG7B,cAAc,UAAU,CAAC;AAGzB,cAAc,WAAW,CAAC;AAG1B,cAAc,aAAa,CAAC;AAG5B,cAAc,cAAc,CAAC;AAG7B,cAAc,SAAS,CAAC;AAGxB,cAAc,aAAa,CAAC;AAG5B,cAAc,QAAQ,CAAC;AAGvB,cAAc,aAAa,CAAC"}
package/dist/index.js CHANGED
@@ -10896,6 +10896,55 @@ var require_public_api = __commonJS((exports) => {
10896
10896
  exports.stringify = stringify;
10897
10897
  });
10898
10898
 
10899
+ // ../../node_modules/.bun/yaml@2.8.2/node_modules/yaml/dist/index.js
10900
+ var require_dist = __commonJS((exports) => {
10901
+ var composer = require_composer();
10902
+ var Document = require_Document();
10903
+ var Schema = require_Schema();
10904
+ var errors2 = require_errors();
10905
+ var Alias = require_Alias();
10906
+ var identity = require_identity();
10907
+ var Pair = require_Pair();
10908
+ var Scalar = require_Scalar();
10909
+ var YAMLMap = require_YAMLMap();
10910
+ var YAMLSeq = require_YAMLSeq();
10911
+ var cst = require_cst();
10912
+ var lexer = require_lexer();
10913
+ var lineCounter = require_line_counter();
10914
+ var parser = require_parser();
10915
+ var publicApi = require_public_api();
10916
+ var visit = require_visit();
10917
+ exports.Composer = composer.Composer;
10918
+ exports.Document = Document.Document;
10919
+ exports.Schema = Schema.Schema;
10920
+ exports.YAMLError = errors2.YAMLError;
10921
+ exports.YAMLParseError = errors2.YAMLParseError;
10922
+ exports.YAMLWarning = errors2.YAMLWarning;
10923
+ exports.Alias = Alias.Alias;
10924
+ exports.isAlias = identity.isAlias;
10925
+ exports.isCollection = identity.isCollection;
10926
+ exports.isDocument = identity.isDocument;
10927
+ exports.isMap = identity.isMap;
10928
+ exports.isNode = identity.isNode;
10929
+ exports.isPair = identity.isPair;
10930
+ exports.isScalar = identity.isScalar;
10931
+ exports.isSeq = identity.isSeq;
10932
+ exports.Pair = Pair.Pair;
10933
+ exports.Scalar = Scalar.Scalar;
10934
+ exports.YAMLMap = YAMLMap.YAMLMap;
10935
+ exports.YAMLSeq = YAMLSeq.YAMLSeq;
10936
+ exports.CST = cst;
10937
+ exports.Lexer = lexer.Lexer;
10938
+ exports.LineCounter = lineCounter.LineCounter;
10939
+ exports.Parser = parser.Parser;
10940
+ exports.parse = publicApi.parse;
10941
+ exports.parseAllDocuments = publicApi.parseAllDocuments;
10942
+ exports.parseDocument = publicApi.parseDocument;
10943
+ exports.stringify = publicApi.stringify;
10944
+ exports.visit = visit.visit;
10945
+ exports.visitAsync = visit.visitAsync;
10946
+ });
10947
+
10899
10948
  // src/evaluators/combined.ts
10900
10949
  async function getEvaluatorForType(type) {
10901
10950
  const { getEvaluator } = await Promise.resolve().then(() => (init_evaluators(), exports_evaluators));
@@ -13616,55 +13665,8 @@ var ScenarioSchema = exports_external.object({
13616
13665
  }).optional()
13617
13666
  });
13618
13667
  // src/scenario/parser.ts
13668
+ var import_yaml = __toESM(require_dist(), 1);
13619
13669
  import { readFile } from "fs/promises";
13620
-
13621
- // ../../node_modules/.bun/yaml@2.8.2/node_modules/yaml/dist/index.js
13622
- var composer = require_composer();
13623
- var Document = require_Document();
13624
- var Schema = require_Schema();
13625
- var errors2 = require_errors();
13626
- var Alias = require_Alias();
13627
- var identity = require_identity();
13628
- var Pair = require_Pair();
13629
- var Scalar = require_Scalar();
13630
- var YAMLMap = require_YAMLMap();
13631
- var YAMLSeq = require_YAMLSeq();
13632
- var cst = require_cst();
13633
- var lexer = require_lexer();
13634
- var lineCounter = require_line_counter();
13635
- var parser = require_parser();
13636
- var publicApi = require_public_api();
13637
- var visit = require_visit();
13638
- var $Composer = composer.Composer;
13639
- var $Document = Document.Document;
13640
- var $Schema = Schema.Schema;
13641
- var $YAMLError = errors2.YAMLError;
13642
- var $YAMLParseError = errors2.YAMLParseError;
13643
- var $YAMLWarning = errors2.YAMLWarning;
13644
- var $Alias = Alias.Alias;
13645
- var $isAlias = identity.isAlias;
13646
- var $isCollection = identity.isCollection;
13647
- var $isDocument = identity.isDocument;
13648
- var $isMap = identity.isMap;
13649
- var $isNode = identity.isNode;
13650
- var $isPair = identity.isPair;
13651
- var $isScalar = identity.isScalar;
13652
- var $isSeq = identity.isSeq;
13653
- var $Pair = Pair.Pair;
13654
- var $Scalar = Scalar.Scalar;
13655
- var $YAMLMap = YAMLMap.YAMLMap;
13656
- var $YAMLSeq = YAMLSeq.YAMLSeq;
13657
- var $Lexer = lexer.Lexer;
13658
- var $LineCounter = lineCounter.LineCounter;
13659
- var $Parser = parser.Parser;
13660
- var $parse = publicApi.parse;
13661
- var $parseAllDocuments = publicApi.parseAllDocuments;
13662
- var $parseDocument = publicApi.parseDocument;
13663
- var $stringify = publicApi.stringify;
13664
- var $visit = visit.visit;
13665
- var $visitAsync = visit.visitAsync;
13666
-
13667
- // src/scenario/parser.ts
13668
13670
  function expandEnvVars(obj) {
13669
13671
  if (typeof obj === "string") {
13670
13672
  return obj.replace(/\$\{([^}]+)\}/g, (_, expr) => {
@@ -13699,7 +13701,7 @@ async function parseScenarioFile(filePath) {
13699
13701
  }
13700
13702
  function parseScenarioString(content, source) {
13701
13703
  try {
13702
- const raw = $parse(content);
13704
+ const raw = import_yaml.parse(content);
13703
13705
  const expanded = expandEnvVars(raw);
13704
13706
  const result = ScenarioSchema.safeParse(expanded);
13705
13707
  if (!result.success) {
@@ -16929,7 +16931,7 @@ class RealtimeChannel {
16929
16931
  }).map((bind) => {
16930
16932
  if (typeof handledPayload === "object" && "ids" in handledPayload) {
16931
16933
  const postgresChanges = handledPayload.data;
16932
- const { schema: schema2, table, commit_timestamp, type: type2, errors: errors3 } = postgresChanges;
16934
+ const { schema: schema2, table, commit_timestamp, type: type2, errors: errors2 } = postgresChanges;
16933
16935
  const enrichedPayload = {
16934
16936
  schema: schema2,
16935
16937
  table,
@@ -16937,7 +16939,7 @@ class RealtimeChannel {
16937
16939
  eventType: type2,
16938
16940
  new: {},
16939
16941
  old: {},
16940
- errors: errors3
16942
+ errors: errors2
16941
16943
  };
16942
16944
  handledPayload = Object.assign(Object.assign({}, enrichedPayload), this._getPayloadRecords(postgresChanges));
16943
16945
  }
@@ -22508,7 +22510,7 @@ class GoTrueClient {
22508
22510
  }
22509
22511
  });
22510
22512
  }
22511
- async unlinkIdentity(identity2) {
22513
+ async unlinkIdentity(identity) {
22512
22514
  try {
22513
22515
  return await this._useSession(async (result) => {
22514
22516
  var _a, _b;
@@ -22516,7 +22518,7 @@ class GoTrueClient {
22516
22518
  if (error) {
22517
22519
  throw error;
22518
22520
  }
22519
- return await _request(this.fetch, "DELETE", `${this.url}/user/identities/${identity2.identity_id}`, {
22521
+ return await _request(this.fetch, "DELETE", `${this.url}/user/identities/${identity.identity_id}`, {
22520
22522
  headers: this.headers,
22521
22523
  jwt: (_b = (_a = data.session) === null || _a === undefined ? undefined : _a.access_token) !== null && _b !== undefined ? _b : undefined
22522
22524
  });
@@ -22690,20 +22692,20 @@ class GoTrueClient {
22690
22692
  if (this.broadcastChannel && broadcast) {
22691
22693
  this.broadcastChannel.postMessage({ event, session });
22692
22694
  }
22693
- const errors3 = [];
22695
+ const errors2 = [];
22694
22696
  const promises = Array.from(this.stateChangeEmitters.values()).map(async (x) => {
22695
22697
  try {
22696
22698
  await x.callback(event, session);
22697
22699
  } catch (e) {
22698
- errors3.push(e);
22700
+ errors2.push(e);
22699
22701
  }
22700
22702
  });
22701
22703
  await Promise.all(promises);
22702
- if (errors3.length > 0) {
22703
- for (let i = 0;i < errors3.length; i += 1) {
22704
- console.error(errors3[i]);
22704
+ if (errors2.length > 0) {
22705
+ for (let i = 0;i < errors2.length; i += 1) {
22706
+ console.error(errors2[i]);
22705
22707
  }
22706
- throw errors3[0];
22708
+ throw errors2[0];
22707
22709
  }
22708
22710
  } finally {
22709
22711
  this._debug(debugName, "end");
@@ -24751,6 +24753,251 @@ class Logger {
24751
24753
  }
24752
24754
  }
24753
24755
  var logger = new Logger("artemis");
24756
+ // src/validator/validator.ts
24757
+ var import_yaml2 = __toESM(require_dist(), 1);
24758
+ import { readFileSync } from "fs";
24759
+ class ScenarioValidator {
24760
+ _options;
24761
+ constructor(options = {}) {
24762
+ this._options = options;
24763
+ }
24764
+ get options() {
24765
+ return this._options;
24766
+ }
24767
+ validate(filePath) {
24768
+ const errors4 = [];
24769
+ const warnings = [];
24770
+ let content;
24771
+ try {
24772
+ content = readFileSync(filePath, "utf-8");
24773
+ } catch (err) {
24774
+ const error = err;
24775
+ errors4.push({
24776
+ line: 1,
24777
+ message: `Failed to read file: ${error.message}`,
24778
+ rule: "file-read",
24779
+ severity: "error"
24780
+ });
24781
+ return { file: filePath, valid: false, errors: errors4, warnings };
24782
+ }
24783
+ let parsed;
24784
+ try {
24785
+ parsed = import_yaml2.default.parse(content, {
24786
+ prettyErrors: true,
24787
+ strict: true
24788
+ });
24789
+ } catch (err) {
24790
+ if (err instanceof import_yaml2.default.YAMLError) {
24791
+ const linePos = err.linePos?.[0];
24792
+ errors4.push({
24793
+ line: linePos?.line || 1,
24794
+ column: linePos?.col,
24795
+ message: `Invalid YAML syntax: ${err.message}`,
24796
+ rule: "yaml-syntax",
24797
+ severity: "error"
24798
+ });
24799
+ } else {
24800
+ errors4.push({
24801
+ line: 1,
24802
+ message: `YAML parse error: ${err.message}`,
24803
+ rule: "yaml-syntax",
24804
+ severity: "error"
24805
+ });
24806
+ }
24807
+ return { file: filePath, valid: false, errors: errors4, warnings };
24808
+ }
24809
+ if (parsed === null || typeof parsed !== "object") {
24810
+ errors4.push({
24811
+ line: 1,
24812
+ message: "Scenario must be a YAML object",
24813
+ rule: "schema-type",
24814
+ severity: "error"
24815
+ });
24816
+ return { file: filePath, valid: false, errors: errors4, warnings };
24817
+ }
24818
+ const schemaResult = ScenarioSchema.safeParse(parsed);
24819
+ if (!schemaResult.success) {
24820
+ const zodErrors = this.formatZodErrors(schemaResult.error, content);
24821
+ errors4.push(...zodErrors);
24822
+ }
24823
+ if (schemaResult.success) {
24824
+ const semanticErrors = this.validateSemantics(schemaResult.data, content);
24825
+ errors4.push(...semanticErrors);
24826
+ }
24827
+ const detectedWarnings = this.detectWarnings(parsed, content);
24828
+ warnings.push(...detectedWarnings);
24829
+ return {
24830
+ file: filePath,
24831
+ valid: errors4.length === 0,
24832
+ errors: errors4,
24833
+ warnings
24834
+ };
24835
+ }
24836
+ formatZodErrors(error, content) {
24837
+ const issues = [];
24838
+ const lines = content.split(`
24839
+ `);
24840
+ for (const issue of error.issues) {
24841
+ const path = issue.path.join(".");
24842
+ const line = this.findLineForPath(lines, issue.path);
24843
+ let message;
24844
+ switch (issue.code) {
24845
+ case "invalid_type":
24846
+ message = `'${path}' expected ${issue.expected}, received ${issue.received}`;
24847
+ break;
24848
+ case "invalid_enum_value":
24849
+ message = `'${path}' must be one of: ${issue.options.join(", ")}`;
24850
+ break;
24851
+ case "too_small":
24852
+ if (issue.type === "array") {
24853
+ message = `'${path}' must have at least ${issue.minimum} item(s)`;
24854
+ } else {
24855
+ message = `'${path}' is too small`;
24856
+ }
24857
+ break;
24858
+ case "unrecognized_keys":
24859
+ message = `Unrecognized field(s): ${issue.keys.join(", ")}`;
24860
+ break;
24861
+ default:
24862
+ message = issue.message;
24863
+ }
24864
+ issues.push({
24865
+ line,
24866
+ message,
24867
+ rule: `schema-${issue.code}`,
24868
+ severity: "error"
24869
+ });
24870
+ }
24871
+ return issues;
24872
+ }
24873
+ findLineForPath(lines, path) {
24874
+ if (path.length === 0)
24875
+ return 1;
24876
+ const searchKey = String(path[path.length - 1]);
24877
+ for (let i2 = 0;i2 < lines.length; i2++) {
24878
+ const line = lines[i2];
24879
+ if (line.includes(`${searchKey}:`) || line.includes(`- ${searchKey}:`)) {
24880
+ return i2 + 1;
24881
+ }
24882
+ if (typeof path[path.length - 1] === "number" && path.includes("cases")) {
24883
+ if (line.trim().startsWith("- id:")) {
24884
+ return i2 + 1;
24885
+ }
24886
+ }
24887
+ }
24888
+ return 1;
24889
+ }
24890
+ validateSemantics(scenario, content) {
24891
+ const errors4 = [];
24892
+ const lines = content.split(`
24893
+ `);
24894
+ const caseIds = new Set;
24895
+ for (const testCase of scenario.cases) {
24896
+ if (caseIds.has(testCase.id)) {
24897
+ const line = this.findLineForCaseId(lines, testCase.id);
24898
+ errors4.push({
24899
+ line,
24900
+ message: `Duplicate case ID: '${testCase.id}'`,
24901
+ rule: "duplicate-case-id",
24902
+ severity: "error"
24903
+ });
24904
+ }
24905
+ caseIds.add(testCase.id);
24906
+ }
24907
+ const globalVars = scenario.variables || {};
24908
+ for (const testCase of scenario.cases) {
24909
+ const caseVars = testCase.variables || {};
24910
+ const allVars = { ...globalVars, ...caseVars };
24911
+ const prompt2 = typeof testCase.prompt === "string" ? testCase.prompt : JSON.stringify(testCase.prompt);
24912
+ const refs = this.extractVariableRefs(prompt2);
24913
+ for (const ref of refs) {
24914
+ if (!(ref in allVars)) {
24915
+ const line = this.findLineForCaseId(lines, testCase.id);
24916
+ errors4.push({
24917
+ line,
24918
+ message: `Undefined variable '{{${ref}}}' in case '${testCase.id}'`,
24919
+ rule: "undefined-variable",
24920
+ severity: "error",
24921
+ suggestion: `Define '${ref}' in scenario.variables or case.variables`
24922
+ });
24923
+ }
24924
+ }
24925
+ }
24926
+ return errors4;
24927
+ }
24928
+ findLineForCaseId(lines, caseId) {
24929
+ for (let i2 = 0;i2 < lines.length; i2++) {
24930
+ if (lines[i2].includes(`id: ${caseId}`) || lines[i2].includes(`id: "${caseId}"`) || lines[i2].includes(`id: '${caseId}'`)) {
24931
+ return i2 + 1;
24932
+ }
24933
+ }
24934
+ return 1;
24935
+ }
24936
+ extractVariableRefs(text) {
24937
+ const regex2 = /\{\{(\w+)\}\}/g;
24938
+ const refs = [];
24939
+ const matches = text.matchAll(regex2);
24940
+ for (const match of matches) {
24941
+ refs.push(match[1]);
24942
+ }
24943
+ return refs;
24944
+ }
24945
+ detectWarnings(parsed, content) {
24946
+ const warnings = [];
24947
+ const lines = content.split(`
24948
+ `);
24949
+ if (parsed && typeof parsed === "object") {
24950
+ const obj = parsed;
24951
+ if (this.hasDeepKey(obj, "criteria")) {
24952
+ const line = this.findLineForKey(lines, "criteria");
24953
+ warnings.push({
24954
+ line,
24955
+ message: "'criteria' is deprecated, use 'rubric' instead (llm_grader)",
24956
+ rule: "deprecated-field",
24957
+ severity: "warning",
24958
+ suggestion: "Replace 'criteria' with 'rubric'"
24959
+ });
24960
+ }
24961
+ const cases = obj.cases;
24962
+ if (Array.isArray(cases) && cases.length > 20) {
24963
+ warnings.push({
24964
+ line: 1,
24965
+ message: `Scenario has ${cases.length} cases. Consider using --parallel for faster execution.`,
24966
+ rule: "performance-hint",
24967
+ severity: "warning"
24968
+ });
24969
+ }
24970
+ if (!obj.description) {
24971
+ warnings.push({
24972
+ line: 1,
24973
+ message: "Scenario is missing 'description' field. Adding a description improves documentation.",
24974
+ rule: "missing-description",
24975
+ severity: "warning"
24976
+ });
24977
+ }
24978
+ }
24979
+ return warnings;
24980
+ }
24981
+ hasDeepKey(obj, key) {
24982
+ if (obj === null || typeof obj !== "object")
24983
+ return false;
24984
+ if (key in obj)
24985
+ return true;
24986
+ for (const value of Object.values(obj)) {
24987
+ if (this.hasDeepKey(value, key))
24988
+ return true;
24989
+ }
24990
+ return false;
24991
+ }
24992
+ findLineForKey(lines, key) {
24993
+ for (let i2 = 0;i2 < lines.length; i2++) {
24994
+ if (lines[i2].includes(`${key}:`)) {
24995
+ return i2 + 1;
24996
+ }
24997
+ }
24998
+ return 1;
24999
+ }
25000
+ }
24754
25001
  export {
24755
25002
  wrapError,
24756
25003
  validateScenario,
@@ -24798,6 +25045,7 @@ export {
24798
25045
  TestCaseSchema,
24799
25046
  SupabaseStorageAdapter,
24800
25047
  SimilarityEvaluator,
25048
+ ScenarioValidator,
24801
25049
  ScenarioSchema,
24802
25050
  SUPPORTED_EXPRESSIONS,
24803
25051
  RegexEvaluator,
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Validator module exports
3
+ */
4
+ export * from './types';
5
+ export { ScenarioValidator } from './validator';
6
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/validator/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,cAAc,SAAS,CAAC;AACxB,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC"}
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Validator types
3
+ */
4
+ /**
5
+ * Validation error severity
6
+ */
7
+ export type ValidationSeverity = 'error' | 'warning';
8
+ /**
9
+ * Validation error/warning
10
+ */
11
+ export interface ValidationIssue {
12
+ /** Line number in the file (1-indexed) */
13
+ line: number;
14
+ /** Column number (optional) */
15
+ column?: number;
16
+ /** Error/warning message */
17
+ message: string;
18
+ /** Rule that triggered this issue */
19
+ rule: string;
20
+ /** Severity level */
21
+ severity: ValidationSeverity;
22
+ /** Suggested fix (optional) */
23
+ suggestion?: string;
24
+ }
25
+ /**
26
+ * Result for a single file validation
27
+ */
28
+ export interface ValidationResult {
29
+ /** File path that was validated */
30
+ file: string;
31
+ /** Whether the file is valid (no errors) */
32
+ valid: boolean;
33
+ /** List of errors found */
34
+ errors: ValidationIssue[];
35
+ /** List of warnings found */
36
+ warnings: ValidationIssue[];
37
+ }
38
+ /**
39
+ * Summary of validation across multiple files
40
+ */
41
+ export interface ValidationSummary {
42
+ /** Total files validated */
43
+ total: number;
44
+ /** Files that passed validation */
45
+ passed: number;
46
+ /** Files that failed validation */
47
+ failed: number;
48
+ /** Files with warnings only */
49
+ withWarnings: number;
50
+ }
51
+ /**
52
+ * Options for the validator
53
+ */
54
+ export interface ValidatorOptions {
55
+ /** Treat warnings as errors */
56
+ strict?: boolean;
57
+ }
58
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/validator/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;GAEG;AACH,MAAM,MAAM,kBAAkB,GAAG,OAAO,GAAG,SAAS,CAAC;AAErD;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,0CAA0C;IAC1C,IAAI,EAAE,MAAM,CAAC;IACb,+BAA+B;IAC/B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,4BAA4B;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,qCAAqC;IACrC,IAAI,EAAE,MAAM,CAAC;IACb,qBAAqB;IACrB,QAAQ,EAAE,kBAAkB,CAAC;IAC7B,+BAA+B;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,mCAAmC;IACnC,IAAI,EAAE,MAAM,CAAC;IACb,4CAA4C;IAC5C,KAAK,EAAE,OAAO,CAAC;IACf,2BAA2B;IAC3B,MAAM,EAAE,eAAe,EAAE,CAAC;IAC1B,6BAA6B;IAC7B,QAAQ,EAAE,eAAe,EAAE,CAAC;CAC7B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,4BAA4B;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,mCAAmC;IACnC,MAAM,EAAE,MAAM,CAAC;IACf,mCAAmC;IACnC,MAAM,EAAE,MAAM,CAAC;IACf,+BAA+B;IAC/B,YAAY,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,+BAA+B;IAC/B,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB"}
@@ -0,0 +1,55 @@
1
+ /**
2
+ * Scenario Validator
3
+ *
4
+ * Validates scenario files for:
5
+ * 1. YAML syntax errors
6
+ * 2. Schema violations (required fields, types)
7
+ * 3. Semantic errors (duplicate IDs, undefined variables)
8
+ * 4. Warnings (deprecated patterns)
9
+ */
10
+ import type { ValidationResult, ValidatorOptions } from './types';
11
+ /**
12
+ * Scenario validator class
13
+ */
14
+ export declare class ScenarioValidator {
15
+ private _options;
16
+ constructor(options?: ValidatorOptions);
17
+ get options(): ValidatorOptions;
18
+ /**
19
+ * Validate a scenario file
20
+ */
21
+ validate(filePath: string): ValidationResult;
22
+ /**
23
+ * Format Zod errors into ValidationIssues
24
+ */
25
+ private formatZodErrors;
26
+ /**
27
+ * Find approximate line number for a YAML path
28
+ */
29
+ private findLineForPath;
30
+ /**
31
+ * Validate semantic rules
32
+ */
33
+ private validateSemantics;
34
+ /**
35
+ * Find line number for a case ID
36
+ */
37
+ private findLineForCaseId;
38
+ /**
39
+ * Extract variable references from a string ({{varName}} format)
40
+ */
41
+ private extractVariableRefs;
42
+ /**
43
+ * Detect warnings (non-blocking issues)
44
+ */
45
+ private detectWarnings;
46
+ /**
47
+ * Check if object has a key at any depth
48
+ */
49
+ private hasDeepKey;
50
+ /**
51
+ * Find line number for a key
52
+ */
53
+ private findLineForKey;
54
+ }
55
+ //# sourceMappingURL=validator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"validator.d.ts","sourceRoot":"","sources":["../../src/validator/validator.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAMH,OAAO,KAAK,EAAmB,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAEnF;;GAEG;AACH,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,QAAQ,CAAmB;gBAEvB,OAAO,GAAE,gBAAqB;IAI1C,IAAI,OAAO,IAAI,gBAAgB,CAE9B;IAED;;OAEG;IACH,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,gBAAgB;IAmF5C;;OAEG;IACH,OAAO,CAAC,eAAe;IAyCvB;;OAEG;IACH,OAAO,CAAC,eAAe;IAuBvB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAoDzB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAazB;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAU3B;;OAEG;IACH,OAAO,CAAC,cAAc;IA6CtB;;OAEG;IACH,OAAO,CAAC,UAAU;IAYlB;;OAEG;IACH,OAAO,CAAC,cAAc;CAQvB"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@artemiskit/core",
3
- "version": "0.2.3",
3
+ "version": "0.2.4",
4
4
  "description": "Core runner, evaluators, and storage for ArtemisKit LLM evaluation toolkit",
5
5
  "type": "module",
6
6
  "license": "Apache-2.0",
package/src/index.ts CHANGED
@@ -32,3 +32,6 @@ export * from './redaction';
32
32
 
33
33
  // Cost estimation
34
34
  export * from './cost';
35
+
36
+ // Validator
37
+ export * from './validator';
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Validator module exports
3
+ */
4
+
5
+ export * from './types';
6
+ export { ScenarioValidator } from './validator';
@@ -0,0 +1,62 @@
1
+ /**
2
+ * Validator types
3
+ */
4
+
5
+ /**
6
+ * Validation error severity
7
+ */
8
+ export type ValidationSeverity = 'error' | 'warning';
9
+
10
+ /**
11
+ * Validation error/warning
12
+ */
13
+ export interface ValidationIssue {
14
+ /** Line number in the file (1-indexed) */
15
+ line: number;
16
+ /** Column number (optional) */
17
+ column?: number;
18
+ /** Error/warning message */
19
+ message: string;
20
+ /** Rule that triggered this issue */
21
+ rule: string;
22
+ /** Severity level */
23
+ severity: ValidationSeverity;
24
+ /** Suggested fix (optional) */
25
+ suggestion?: string;
26
+ }
27
+
28
+ /**
29
+ * Result for a single file validation
30
+ */
31
+ export interface ValidationResult {
32
+ /** File path that was validated */
33
+ file: string;
34
+ /** Whether the file is valid (no errors) */
35
+ valid: boolean;
36
+ /** List of errors found */
37
+ errors: ValidationIssue[];
38
+ /** List of warnings found */
39
+ warnings: ValidationIssue[];
40
+ }
41
+
42
+ /**
43
+ * Summary of validation across multiple files
44
+ */
45
+ export interface ValidationSummary {
46
+ /** Total files validated */
47
+ total: number;
48
+ /** Files that passed validation */
49
+ passed: number;
50
+ /** Files that failed validation */
51
+ failed: number;
52
+ /** Files with warnings only */
53
+ withWarnings: number;
54
+ }
55
+
56
+ /**
57
+ * Options for the validator
58
+ */
59
+ export interface ValidatorOptions {
60
+ /** Treat warnings as errors */
61
+ strict?: boolean;
62
+ }
@@ -0,0 +1,345 @@
1
+ /**
2
+ * Scenario Validator
3
+ *
4
+ * Validates scenario files for:
5
+ * 1. YAML syntax errors
6
+ * 2. Schema violations (required fields, types)
7
+ * 3. Semantic errors (duplicate IDs, undefined variables)
8
+ * 4. Warnings (deprecated patterns)
9
+ */
10
+
11
+ import { readFileSync } from 'node:fs';
12
+ import yaml from 'yaml';
13
+ import type { ZodError } from 'zod';
14
+ import { ScenarioSchema } from '../scenario/schema';
15
+ import type { ValidationIssue, ValidationResult, ValidatorOptions } from './types';
16
+
17
+ /**
18
+ * Scenario validator class
19
+ */
20
+ export class ScenarioValidator {
21
+ private _options: ValidatorOptions;
22
+
23
+ constructor(options: ValidatorOptions = {}) {
24
+ this._options = options;
25
+ }
26
+
27
+ get options(): ValidatorOptions {
28
+ return this._options;
29
+ }
30
+
31
+ /**
32
+ * Validate a scenario file
33
+ */
34
+ validate(filePath: string): ValidationResult {
35
+ const errors: ValidationIssue[] = [];
36
+ const warnings: ValidationIssue[] = [];
37
+
38
+ // Read file content
39
+ let content: string;
40
+ try {
41
+ content = readFileSync(filePath, 'utf-8');
42
+ } catch (err) {
43
+ const error = err as NodeJS.ErrnoException;
44
+ errors.push({
45
+ line: 1,
46
+ message: `Failed to read file: ${error.message}`,
47
+ rule: 'file-read',
48
+ severity: 'error',
49
+ });
50
+ return { file: filePath, valid: false, errors, warnings };
51
+ }
52
+
53
+ // Level 1: YAML Syntax validation
54
+ let parsed: unknown;
55
+ try {
56
+ parsed = yaml.parse(content, {
57
+ prettyErrors: true,
58
+ strict: true,
59
+ });
60
+ } catch (err) {
61
+ if (err instanceof yaml.YAMLError) {
62
+ const linePos = err.linePos?.[0];
63
+ errors.push({
64
+ line: linePos?.line || 1,
65
+ column: linePos?.col,
66
+ message: `Invalid YAML syntax: ${err.message}`,
67
+ rule: 'yaml-syntax',
68
+ severity: 'error',
69
+ });
70
+ } else {
71
+ errors.push({
72
+ line: 1,
73
+ message: `YAML parse error: ${(err as Error).message}`,
74
+ rule: 'yaml-syntax',
75
+ severity: 'error',
76
+ });
77
+ }
78
+ return { file: filePath, valid: false, errors, warnings };
79
+ }
80
+
81
+ // Check if parsed result is null or not an object
82
+ if (parsed === null || typeof parsed !== 'object') {
83
+ errors.push({
84
+ line: 1,
85
+ message: 'Scenario must be a YAML object',
86
+ rule: 'schema-type',
87
+ severity: 'error',
88
+ });
89
+ return { file: filePath, valid: false, errors, warnings };
90
+ }
91
+
92
+ // Level 2: Schema validation using Zod
93
+ const schemaResult = ScenarioSchema.safeParse(parsed);
94
+ if (!schemaResult.success) {
95
+ const zodErrors = this.formatZodErrors(schemaResult.error, content);
96
+ errors.push(...zodErrors);
97
+ }
98
+
99
+ // Level 3: Semantic validation (only if schema passed)
100
+ if (schemaResult.success) {
101
+ const semanticErrors = this.validateSemantics(schemaResult.data, content);
102
+ errors.push(...semanticErrors);
103
+ }
104
+
105
+ // Level 4: Warnings detection
106
+ const detectedWarnings = this.detectWarnings(parsed, content);
107
+ warnings.push(...detectedWarnings);
108
+
109
+ return {
110
+ file: filePath,
111
+ valid: errors.length === 0,
112
+ errors,
113
+ warnings,
114
+ };
115
+ }
116
+
117
+ /**
118
+ * Format Zod errors into ValidationIssues
119
+ */
120
+ private formatZodErrors(error: ZodError, content: string): ValidationIssue[] {
121
+ const issues: ValidationIssue[] = [];
122
+ const lines = content.split('\n');
123
+
124
+ for (const issue of error.issues) {
125
+ const path = issue.path.join('.');
126
+ const line = this.findLineForPath(lines, issue.path);
127
+
128
+ let message: string;
129
+ switch (issue.code) {
130
+ case 'invalid_type':
131
+ message = `'${path}' expected ${issue.expected}, received ${issue.received}`;
132
+ break;
133
+ case 'invalid_enum_value':
134
+ message = `'${path}' must be one of: ${(issue as { options: string[] }).options.join(', ')}`;
135
+ break;
136
+ case 'too_small':
137
+ if ((issue as { type: string }).type === 'array') {
138
+ message = `'${path}' must have at least ${(issue as { minimum: number }).minimum} item(s)`;
139
+ } else {
140
+ message = `'${path}' is too small`;
141
+ }
142
+ break;
143
+ case 'unrecognized_keys':
144
+ message = `Unrecognized field(s): ${(issue as { keys: string[] }).keys.join(', ')}`;
145
+ break;
146
+ default:
147
+ message = issue.message;
148
+ }
149
+
150
+ issues.push({
151
+ line,
152
+ message,
153
+ rule: `schema-${issue.code}`,
154
+ severity: 'error',
155
+ });
156
+ }
157
+
158
+ return issues;
159
+ }
160
+
161
+ /**
162
+ * Find approximate line number for a YAML path
163
+ */
164
+ private findLineForPath(lines: string[], path: (string | number)[]): number {
165
+ if (path.length === 0) return 1;
166
+
167
+ // Simple heuristic: search for the key in the file
168
+ const searchKey = String(path[path.length - 1]);
169
+
170
+ for (let i = 0; i < lines.length; i++) {
171
+ const line = lines[i];
172
+ // Check if line contains the key (accounting for YAML formatting)
173
+ if (line.includes(`${searchKey}:`) || line.includes(`- ${searchKey}:`)) {
174
+ return i + 1; // 1-indexed
175
+ }
176
+ // For array indices, look for "- id:" pattern
177
+ if (typeof path[path.length - 1] === 'number' && path.includes('cases')) {
178
+ if (line.trim().startsWith('- id:')) {
179
+ return i + 1;
180
+ }
181
+ }
182
+ }
183
+
184
+ return 1; // Default to first line
185
+ }
186
+
187
+ /**
188
+ * Validate semantic rules
189
+ */
190
+ private validateSemantics(
191
+ scenario: {
192
+ cases: Array<{ id: string; prompt: string | unknown; variables?: Record<string, unknown> }>;
193
+ variables?: Record<string, unknown>;
194
+ },
195
+ content: string
196
+ ): ValidationIssue[] {
197
+ const errors: ValidationIssue[] = [];
198
+ const lines = content.split('\n');
199
+
200
+ // Check for duplicate case IDs
201
+ const caseIds = new Set<string>();
202
+ for (const testCase of scenario.cases) {
203
+ if (caseIds.has(testCase.id)) {
204
+ const line = this.findLineForCaseId(lines, testCase.id);
205
+ errors.push({
206
+ line,
207
+ message: `Duplicate case ID: '${testCase.id}'`,
208
+ rule: 'duplicate-case-id',
209
+ severity: 'error',
210
+ });
211
+ }
212
+ caseIds.add(testCase.id);
213
+ }
214
+
215
+ // Check variable references
216
+ const globalVars = scenario.variables || {};
217
+ for (const testCase of scenario.cases) {
218
+ const caseVars = testCase.variables || {};
219
+ const allVars = { ...globalVars, ...caseVars };
220
+
221
+ const prompt =
222
+ typeof testCase.prompt === 'string' ? testCase.prompt : JSON.stringify(testCase.prompt);
223
+
224
+ const refs = this.extractVariableRefs(prompt);
225
+ for (const ref of refs) {
226
+ if (!(ref in allVars)) {
227
+ const line = this.findLineForCaseId(lines, testCase.id);
228
+ errors.push({
229
+ line,
230
+ message: `Undefined variable '{{${ref}}}' in case '${testCase.id}'`,
231
+ rule: 'undefined-variable',
232
+ severity: 'error',
233
+ suggestion: `Define '${ref}' in scenario.variables or case.variables`,
234
+ });
235
+ }
236
+ }
237
+ }
238
+
239
+ return errors;
240
+ }
241
+
242
+ /**
243
+ * Find line number for a case ID
244
+ */
245
+ private findLineForCaseId(lines: string[], caseId: string): number {
246
+ for (let i = 0; i < lines.length; i++) {
247
+ if (
248
+ lines[i].includes(`id: ${caseId}`) ||
249
+ lines[i].includes(`id: "${caseId}"`) ||
250
+ lines[i].includes(`id: '${caseId}'`)
251
+ ) {
252
+ return i + 1;
253
+ }
254
+ }
255
+ return 1;
256
+ }
257
+
258
+ /**
259
+ * Extract variable references from a string ({{varName}} format)
260
+ */
261
+ private extractVariableRefs(text: string): string[] {
262
+ const regex = /\{\{(\w+)\}\}/g;
263
+ const refs: string[] = [];
264
+ const matches = text.matchAll(regex);
265
+ for (const match of matches) {
266
+ refs.push(match[1]);
267
+ }
268
+ return refs;
269
+ }
270
+
271
+ /**
272
+ * Detect warnings (non-blocking issues)
273
+ */
274
+ private detectWarnings(parsed: unknown, content: string): ValidationIssue[] {
275
+ const warnings: ValidationIssue[] = [];
276
+ const lines = content.split('\n');
277
+
278
+ if (parsed && typeof parsed === 'object') {
279
+ const obj = parsed as Record<string, unknown>;
280
+
281
+ // Check for deprecated 'criteria' field (should be 'rubric' for llm_grader)
282
+ if (this.hasDeepKey(obj, 'criteria')) {
283
+ const line = this.findLineForKey(lines, 'criteria');
284
+ warnings.push({
285
+ line,
286
+ message: "'criteria' is deprecated, use 'rubric' instead (llm_grader)",
287
+ rule: 'deprecated-field',
288
+ severity: 'warning',
289
+ suggestion: "Replace 'criteria' with 'rubric'",
290
+ });
291
+ }
292
+
293
+ // Check for very large number of cases without parallel recommendation
294
+ const cases = obj.cases as unknown[] | undefined;
295
+ if (Array.isArray(cases) && cases.length > 20) {
296
+ warnings.push({
297
+ line: 1,
298
+ message: `Scenario has ${cases.length} cases. Consider using --parallel for faster execution.`,
299
+ rule: 'performance-hint',
300
+ severity: 'warning',
301
+ });
302
+ }
303
+
304
+ // Check for missing description
305
+ if (!obj.description) {
306
+ warnings.push({
307
+ line: 1,
308
+ message:
309
+ "Scenario is missing 'description' field. Adding a description improves documentation.",
310
+ rule: 'missing-description',
311
+ severity: 'warning',
312
+ });
313
+ }
314
+ }
315
+
316
+ return warnings;
317
+ }
318
+
319
+ /**
320
+ * Check if object has a key at any depth
321
+ */
322
+ private hasDeepKey(obj: unknown, key: string): boolean {
323
+ if (obj === null || typeof obj !== 'object') return false;
324
+
325
+ if (key in (obj as Record<string, unknown>)) return true;
326
+
327
+ for (const value of Object.values(obj as Record<string, unknown>)) {
328
+ if (this.hasDeepKey(value, key)) return true;
329
+ }
330
+
331
+ return false;
332
+ }
333
+
334
+ /**
335
+ * Find line number for a key
336
+ */
337
+ private findLineForKey(lines: string[], key: string): number {
338
+ for (let i = 0; i < lines.length; i++) {
339
+ if (lines[i].includes(`${key}:`)) {
340
+ return i + 1;
341
+ }
342
+ }
343
+ return 1;
344
+ }
345
+ }