@artemiskit/core 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/CHANGELOG.md +74 -0
  2. package/dist/adapters/types.d.ts +3 -1
  3. package/dist/adapters/types.d.ts.map +1 -1
  4. package/dist/artifacts/types.d.ts +39 -0
  5. package/dist/artifacts/types.d.ts.map +1 -1
  6. package/dist/cost/index.d.ts +5 -0
  7. package/dist/cost/index.d.ts.map +1 -0
  8. package/dist/cost/pricing.d.ts +66 -0
  9. package/dist/cost/pricing.d.ts.map +1 -0
  10. package/dist/evaluators/combined.d.ts +10 -0
  11. package/dist/evaluators/combined.d.ts.map +1 -0
  12. package/dist/evaluators/index.d.ts +4 -0
  13. package/dist/evaluators/index.d.ts.map +1 -1
  14. package/dist/evaluators/inline.d.ts +22 -0
  15. package/dist/evaluators/inline.d.ts.map +1 -0
  16. package/dist/evaluators/not-contains.d.ts +10 -0
  17. package/dist/evaluators/not-contains.d.ts.map +1 -0
  18. package/dist/evaluators/similarity.d.ts +16 -0
  19. package/dist/evaluators/similarity.d.ts.map +1 -0
  20. package/dist/events/emitter.d.ts +111 -0
  21. package/dist/events/emitter.d.ts.map +1 -0
  22. package/dist/events/index.d.ts +6 -0
  23. package/dist/events/index.d.ts.map +1 -0
  24. package/dist/events/types.d.ts +177 -0
  25. package/dist/events/types.d.ts.map +1 -0
  26. package/dist/index.d.ts +1 -0
  27. package/dist/index.d.ts.map +1 -1
  28. package/dist/index.js +13056 -12093
  29. package/dist/scenario/discovery.d.ts +72 -0
  30. package/dist/scenario/discovery.d.ts.map +1 -0
  31. package/dist/scenario/index.d.ts +1 -0
  32. package/dist/scenario/index.d.ts.map +1 -1
  33. package/dist/scenario/schema.d.ts +1245 -9
  34. package/dist/scenario/schema.d.ts.map +1 -1
  35. package/package.json +1 -1
  36. package/src/adapters/types.ts +3 -1
  37. package/src/artifacts/types.ts +39 -0
  38. package/src/cost/index.ts +14 -0
  39. package/src/cost/pricing.ts +273 -0
  40. package/src/evaluators/combined.test.ts +172 -0
  41. package/src/evaluators/combined.ts +95 -0
  42. package/src/evaluators/index.ts +12 -0
  43. package/src/evaluators/inline.test.ts +409 -0
  44. package/src/evaluators/inline.ts +393 -0
  45. package/src/evaluators/not-contains.test.ts +105 -0
  46. package/src/evaluators/not-contains.ts +45 -0
  47. package/src/evaluators/similarity.test.ts +333 -0
  48. package/src/evaluators/similarity.ts +258 -0
  49. package/src/index.ts +3 -0
  50. package/src/scenario/discovery.test.ts +153 -0
  51. package/src/scenario/discovery.ts +277 -0
  52. package/src/scenario/index.ts +1 -0
  53. package/src/scenario/schema.ts +43 -2
@@ -0,0 +1,393 @@
1
+ /**
2
+ * Inline custom matcher evaluator
3
+ * Allows users to define simple matching expressions directly in YAML
4
+ *
5
+ * Supports a safe subset of JavaScript-like expressions:
6
+ * - String methods: includes, startsWith, endsWith, length, toLowerCase, toUpperCase, trim
7
+ * - Comparisons: ==, !=, >, <, >=, <=
8
+ * - Logical operators: &&, ||, !
9
+ * - Regex matching: response.match(/pattern/)
10
+ * - JSON parsing: JSON.parse(response)
11
+ */
12
+
13
+ import type { Expected } from '../scenario/schema';
14
+ import type { Evaluator, EvaluatorContext, EvaluatorResult } from './types';
15
+
16
+ /**
17
+ * Split a string on a delimiter, but only when the delimiter is outside of quotes
18
+ */
19
+ function splitOutsideQuotes(str: string, delimiter: string): string[] {
20
+ const parts: string[] = [];
21
+ let current = '';
22
+ let inSingleQuote = false;
23
+ let inDoubleQuote = false;
24
+ let i = 0;
25
+
26
+ while (i < str.length) {
27
+ const char = str[i];
28
+
29
+ // Check for delimiter outside of quotes
30
+ if (!inSingleQuote && !inDoubleQuote && str.slice(i, i + delimiter.length) === delimiter) {
31
+ parts.push(current);
32
+ current = '';
33
+ i += delimiter.length;
34
+ continue;
35
+ }
36
+
37
+ // Track quote state (handle escaped quotes)
38
+ if (char === "'" && !inDoubleQuote && (i === 0 || str[i - 1] !== '\\')) {
39
+ inSingleQuote = !inSingleQuote;
40
+ } else if (char === '"' && !inSingleQuote && (i === 0 || str[i - 1] !== '\\')) {
41
+ inDoubleQuote = !inDoubleQuote;
42
+ }
43
+
44
+ current += char;
45
+ i++;
46
+ }
47
+
48
+ parts.push(current);
49
+ return parts;
50
+ }
51
+
52
+ /**
53
+ * Safe expression context with allowed variables and functions
54
+ */
55
+ interface ExpressionContext {
56
+ response: string;
57
+ expected: string | undefined;
58
+ length: number;
59
+ words: string[];
60
+ lines: string[];
61
+ json: unknown | null;
62
+ }
63
+
64
+ /**
65
+ * Evaluate a safe expression
66
+ */
67
+ function evaluateExpression(
68
+ expression: string,
69
+ context: ExpressionContext
70
+ ): { result: boolean; score: number } {
71
+ const { response, expected, length, words, lines, json } = context;
72
+
73
+ // Normalize the expression
74
+ const expr = expression.trim();
75
+
76
+ // Handle common patterns with safe evaluation
77
+ try {
78
+ // Handle combined patterns with && and || FIRST (before individual patterns)
79
+ // Split on && or || that appear outside of quoted strings
80
+ const andParts = splitOutsideQuotes(expr, '&&');
81
+ if (andParts.length > 1) {
82
+ const results = andParts.map((p) => evaluateExpression(p.trim(), context));
83
+ const allPassed = results.every((r) => r.result);
84
+ const avgScore = results.reduce((sum, r) => sum + r.score, 0) / results.length;
85
+ return { result: allPassed, score: avgScore };
86
+ }
87
+
88
+ const orParts = splitOutsideQuotes(expr, '||');
89
+ if (orParts.length > 1) {
90
+ const results = orParts.map((p) => evaluateExpression(p.trim(), context));
91
+ const anyPassed = results.some((r) => r.result);
92
+ const maxScore = Math.max(...results.map((r) => r.score));
93
+ return { result: anyPassed, score: maxScore };
94
+ }
95
+
96
+ // Pattern: response.includes("text")
97
+ const includesMatch = expr.match(/^response\.includes\s*\(\s*["'](.+?)["']\s*\)$/);
98
+ if (includesMatch) {
99
+ const result = response.includes(includesMatch[1]);
100
+ return { result, score: result ? 1 : 0 };
101
+ }
102
+
103
+ // Pattern: !response.includes("text")
104
+ const notIncludesMatch = expr.match(/^!\s*response\.includes\s*\(\s*["'](.+?)["']\s*\)$/);
105
+ if (notIncludesMatch) {
106
+ const result = !response.includes(notIncludesMatch[1]);
107
+ return { result, score: result ? 1 : 0 };
108
+ }
109
+
110
+ // Pattern: response.startsWith("text")
111
+ const startsWithMatch = expr.match(/^response\.startsWith\s*\(\s*["'](.+?)["']\s*\)$/);
112
+ if (startsWithMatch) {
113
+ const result = response.startsWith(startsWithMatch[1]);
114
+ return { result, score: result ? 1 : 0 };
115
+ }
116
+
117
+ // Pattern: response.endsWith("text")
118
+ const endsWithMatch = expr.match(/^response\.endsWith\s*\(\s*["'](.+?)["']\s*\)$/);
119
+ if (endsWithMatch) {
120
+ const result = response.endsWith(endsWithMatch[1]);
121
+ return { result, score: result ? 1 : 0 };
122
+ }
123
+
124
+ // Pattern: response.toLowerCase().includes("text")
125
+ const lowerIncludesMatch = expr.match(
126
+ /^response\.toLowerCase\s*\(\s*\)\.includes\s*\(\s*["'](.+?)["']\s*\)$/
127
+ );
128
+ if (lowerIncludesMatch) {
129
+ const result = response.toLowerCase().includes(lowerIncludesMatch[1].toLowerCase());
130
+ return { result, score: result ? 1 : 0 };
131
+ }
132
+
133
+ // Pattern: response.match(/regex/)
134
+ const regexMatch = expr.match(/^response\.match\s*\(\s*\/(.+?)\/([gimsuy]*)\s*\)$/);
135
+ if (regexMatch) {
136
+ const regex = new RegExp(regexMatch[1], regexMatch[2]);
137
+ const result = regex.test(response);
138
+ return { result, score: result ? 1 : 0 };
139
+ }
140
+
141
+ // Pattern: !response.match(/regex/)
142
+ const notRegexMatch = expr.match(/^!\s*response\.match\s*\(\s*\/(.+?)\/([gimsuy]*)\s*\)$/);
143
+ if (notRegexMatch) {
144
+ const regex = new RegExp(notRegexMatch[1], notRegexMatch[2]);
145
+ const result = !regex.test(response);
146
+ return { result, score: result ? 1 : 0 };
147
+ }
148
+
149
+ // Pattern: length > N, length < N, length >= N, length <= N, length == N
150
+ const lengthMatch = expr.match(/^length\s*(>=|<=|>|<|==|!=)\s*(\d+)$/);
151
+ if (lengthMatch) {
152
+ const op = lengthMatch[1];
153
+ const num = Number.parseInt(lengthMatch[2], 10);
154
+ let result = false;
155
+ switch (op) {
156
+ case '>':
157
+ result = length > num;
158
+ break;
159
+ case '<':
160
+ result = length < num;
161
+ break;
162
+ case '>=':
163
+ result = length >= num;
164
+ break;
165
+ case '<=':
166
+ result = length <= num;
167
+ break;
168
+ case '==':
169
+ result = length === num;
170
+ break;
171
+ case '!=':
172
+ result = length !== num;
173
+ break;
174
+ }
175
+ return { result, score: result ? 1 : 0 };
176
+ }
177
+
178
+ // Pattern: words.length > N
179
+ const wordsLengthMatch = expr.match(/^words\.length\s*(>=|<=|>|<|==|!=)\s*(\d+)$/);
180
+ if (wordsLengthMatch) {
181
+ const op = wordsLengthMatch[1];
182
+ const num = Number.parseInt(wordsLengthMatch[2], 10);
183
+ let result = false;
184
+ switch (op) {
185
+ case '>':
186
+ result = words.length > num;
187
+ break;
188
+ case '<':
189
+ result = words.length < num;
190
+ break;
191
+ case '>=':
192
+ result = words.length >= num;
193
+ break;
194
+ case '<=':
195
+ result = words.length <= num;
196
+ break;
197
+ case '==':
198
+ result = words.length === num;
199
+ break;
200
+ case '!=':
201
+ result = words.length !== num;
202
+ break;
203
+ }
204
+ return { result, score: result ? 1 : 0 };
205
+ }
206
+
207
+ // Pattern: lines.length > N
208
+ const linesLengthMatch = expr.match(/^lines\.length\s*(>=|<=|>|<|==|!=)\s*(\d+)$/);
209
+ if (linesLengthMatch) {
210
+ const op = linesLengthMatch[1];
211
+ const num = Number.parseInt(linesLengthMatch[2], 10);
212
+ let result = false;
213
+ switch (op) {
214
+ case '>':
215
+ result = lines.length > num;
216
+ break;
217
+ case '<':
218
+ result = lines.length < num;
219
+ break;
220
+ case '>=':
221
+ result = lines.length >= num;
222
+ break;
223
+ case '<=':
224
+ result = lines.length <= num;
225
+ break;
226
+ case '==':
227
+ result = lines.length === num;
228
+ break;
229
+ case '!=':
230
+ result = lines.length !== num;
231
+ break;
232
+ }
233
+ return { result, score: result ? 1 : 0 };
234
+ }
235
+
236
+ // Pattern: json.field == "value" or json.field == value
237
+ const jsonFieldMatch = expr.match(
238
+ /^json\.(\w+(?:\.\w+)*)\s*(>=|<=|>|<|==|!=)\s*(?:["'](.+?)["']|(\d+(?:\.\d+)?)|(true|false|null))$/
239
+ );
240
+ if (jsonFieldMatch && json !== null) {
241
+ const path = jsonFieldMatch[1];
242
+ const op = jsonFieldMatch[2];
243
+ let compareValue: unknown = jsonFieldMatch[3] ?? jsonFieldMatch[4] ?? jsonFieldMatch[5];
244
+
245
+ // Parse numbers and booleans
246
+ if (compareValue === 'true') compareValue = true;
247
+ else if (compareValue === 'false') compareValue = false;
248
+ else if (compareValue === 'null') compareValue = null;
249
+ else if (jsonFieldMatch[4]) compareValue = Number.parseFloat(jsonFieldMatch[4]);
250
+
251
+ // Navigate JSON path
252
+ let fieldValue: unknown = json;
253
+ for (const key of path.split('.')) {
254
+ if (fieldValue && typeof fieldValue === 'object' && key in fieldValue) {
255
+ fieldValue = (fieldValue as Record<string, unknown>)[key];
256
+ } else {
257
+ fieldValue = undefined;
258
+ break;
259
+ }
260
+ }
261
+
262
+ let result = false;
263
+ switch (op) {
264
+ case '==':
265
+ result = fieldValue === compareValue;
266
+ break;
267
+ case '!=':
268
+ result = fieldValue !== compareValue;
269
+ break;
270
+ case '>':
271
+ result = typeof fieldValue === 'number' && fieldValue > (compareValue as number);
272
+ break;
273
+ case '<':
274
+ result = typeof fieldValue === 'number' && fieldValue < (compareValue as number);
275
+ break;
276
+ case '>=':
277
+ result = typeof fieldValue === 'number' && fieldValue >= (compareValue as number);
278
+ break;
279
+ case '<=':
280
+ result = typeof fieldValue === 'number' && fieldValue <= (compareValue as number);
281
+ break;
282
+ }
283
+ return { result, score: result ? 1 : 0 };
284
+ }
285
+
286
+ // Pattern: json != null (check if valid JSON)
287
+ if (expr === 'json != null') {
288
+ const result = json !== null;
289
+ return { result, score: result ? 1 : 0 };
290
+ }
291
+
292
+ // Pattern: response == expected
293
+ if (expr === 'response == expected' && expected !== undefined) {
294
+ const result = response === expected;
295
+ return { result, score: result ? 1 : 0 };
296
+ }
297
+
298
+ // Pattern: response.trim() == expected
299
+ if (expr === 'response.trim() == expected' && expected !== undefined) {
300
+ const result = response.trim() === expected;
301
+ return { result, score: result ? 1 : 0 };
302
+ }
303
+
304
+ // Unknown expression pattern
305
+ throw new Error(`Unsupported expression pattern: ${expr}`);
306
+ } catch (error) {
307
+ throw new Error(`Expression evaluation failed: ${(error as Error).message}`);
308
+ }
309
+ }
310
+
311
+ export class InlineEvaluator implements Evaluator {
312
+ readonly type = 'inline';
313
+
314
+ async evaluate(
315
+ response: string,
316
+ expected: Expected,
317
+ _context?: EvaluatorContext
318
+ ): Promise<EvaluatorResult> {
319
+ if (expected.type !== 'inline') {
320
+ throw new Error('Invalid expected type for InlineEvaluator');
321
+ }
322
+
323
+ const expression = expected.expression;
324
+ const expectedValue = expected.value;
325
+
326
+ // Build context
327
+ let json: unknown = null;
328
+ try {
329
+ json = JSON.parse(response);
330
+ } catch {
331
+ // Not valid JSON, that's fine
332
+ }
333
+
334
+ const context: ExpressionContext = {
335
+ response,
336
+ expected: expectedValue,
337
+ length: response.length,
338
+ words: response.split(/\s+/).filter((w) => w.length > 0),
339
+ lines: response.split('\n').filter((l) => l.trim().length > 0),
340
+ json,
341
+ };
342
+
343
+ try {
344
+ const { result, score } = evaluateExpression(expression, context);
345
+
346
+ return {
347
+ passed: result,
348
+ score,
349
+ reason: result ? `Expression passed: ${expression}` : `Expression failed: ${expression}`,
350
+ details: {
351
+ expression,
352
+ expectedValue,
353
+ responseLength: response.length,
354
+ wordCount: context.words.length,
355
+ lineCount: context.lines.length,
356
+ isValidJson: json !== null,
357
+ },
358
+ };
359
+ } catch (error) {
360
+ return {
361
+ passed: false,
362
+ score: 0,
363
+ reason: `Inline matcher error: ${(error as Error).message}`,
364
+ details: {
365
+ expression,
366
+ error: (error as Error).message,
367
+ },
368
+ };
369
+ }
370
+ }
371
+ }
372
+
373
+ /**
374
+ * List of supported expression patterns for documentation
375
+ */
376
+ export const SUPPORTED_EXPRESSIONS = [
377
+ 'response.includes("text")',
378
+ '!response.includes("text")',
379
+ 'response.startsWith("text")',
380
+ 'response.endsWith("text")',
381
+ 'response.toLowerCase().includes("text")',
382
+ 'response.match(/regex/)',
383
+ '!response.match(/regex/)',
384
+ 'length > N / length < N / length >= N / length <= N / length == N',
385
+ 'words.length > N',
386
+ 'lines.length > N',
387
+ 'json.field == "value"',
388
+ 'json.field > N',
389
+ 'json != null',
390
+ 'response == expected',
391
+ 'expression1 && expression2',
392
+ 'expression1 || expression2',
393
+ ];
@@ -0,0 +1,105 @@
1
+ /**
2
+ * Tests for NotContainsEvaluator
3
+ */
4
+
5
+ import { describe, expect, test } from 'bun:test';
6
+ import { NotContainsEvaluator } from './not-contains';
7
+
8
+ describe('NotContainsEvaluator', () => {
9
+ const evaluator = new NotContainsEvaluator();
10
+
11
+ test('passes when no forbidden values are present (mode: all)', async () => {
12
+ const result = await evaluator.evaluate('The colors are green and purple.', {
13
+ type: 'not_contains',
14
+ values: ['red', 'blue', 'yellow'],
15
+ mode: 'all',
16
+ });
17
+ expect(result.passed).toBe(true);
18
+ expect(result.score).toBe(1);
19
+ });
20
+
21
+ test('fails when any forbidden value is present (mode: all)', async () => {
22
+ const result = await evaluator.evaluate('The colors are red and green.', {
23
+ type: 'not_contains',
24
+ values: ['red', 'blue', 'yellow'],
25
+ mode: 'all',
26
+ });
27
+ expect(result.passed).toBe(false);
28
+ expect(result.score).toBeCloseTo(0.67, 1);
29
+ });
30
+
31
+ test('fails when all forbidden values are present (mode: all)', async () => {
32
+ const result = await evaluator.evaluate('The colors are red, blue, and yellow.', {
33
+ type: 'not_contains',
34
+ values: ['red', 'blue', 'yellow'],
35
+ mode: 'all',
36
+ });
37
+ expect(result.passed).toBe(false);
38
+ expect(result.score).toBe(0);
39
+ });
40
+
41
+ test('passes when at least one forbidden value is absent (mode: any)', async () => {
42
+ const result = await evaluator.evaluate('The colors are red and blue.', {
43
+ type: 'not_contains',
44
+ values: ['red', 'blue', 'yellow'],
45
+ mode: 'any',
46
+ });
47
+ expect(result.passed).toBe(true);
48
+ expect(result.score).toBeCloseTo(0.33, 1);
49
+ });
50
+
51
+ test('fails when all forbidden values are present (mode: any)', async () => {
52
+ const result = await evaluator.evaluate('I have red, blue, and yellow paint.', {
53
+ type: 'not_contains',
54
+ values: ['red', 'blue', 'yellow'],
55
+ mode: 'any',
56
+ });
57
+ expect(result.passed).toBe(false);
58
+ expect(result.score).toBe(0);
59
+ });
60
+
61
+ test('is case insensitive', async () => {
62
+ const result = await evaluator.evaluate('GREEN PURPLE ORANGE', {
63
+ type: 'not_contains',
64
+ values: ['red', 'blue', 'yellow'],
65
+ mode: 'all',
66
+ });
67
+ expect(result.passed).toBe(true);
68
+ });
69
+
70
+ test('detects values case insensitively', async () => {
71
+ const result = await evaluator.evaluate('I have RED paint', {
72
+ type: 'not_contains',
73
+ values: ['red'],
74
+ mode: 'all',
75
+ });
76
+ expect(result.passed).toBe(false);
77
+ });
78
+
79
+ test('handles empty values array', async () => {
80
+ const result = await evaluator.evaluate('Any response text here', {
81
+ type: 'not_contains',
82
+ values: [],
83
+ mode: 'all',
84
+ });
85
+ expect(result.passed).toBe(true);
86
+ expect(result.score).toBe(1);
87
+ });
88
+
89
+ test('provides detailed results in details field', async () => {
90
+ const result = await evaluator.evaluate('The answer is red and green.', {
91
+ type: 'not_contains',
92
+ values: ['red', 'blue'],
93
+ mode: 'all',
94
+ });
95
+ expect(result.details).toEqual({
96
+ mode: 'all',
97
+ results: [
98
+ { value: 'red', found: true },
99
+ { value: 'blue', found: false },
100
+ ],
101
+ notFoundCount: 1,
102
+ totalCount: 2,
103
+ });
104
+ });
105
+ });
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Not Contains evaluator - checks if response does NOT contain specific values
3
+ */
4
+
5
+ import type { Expected } from '../scenario/schema';
6
+ import type { Evaluator, EvaluatorResult } from './types';
7
+
8
+ export class NotContainsEvaluator implements Evaluator {
9
+ readonly type = 'not_contains';
10
+
11
+ async evaluate(response: string, expected: Expected): Promise<EvaluatorResult> {
12
+ if (expected.type !== 'not_contains') {
13
+ throw new Error('Invalid expected type for NotContainsEvaluator');
14
+ }
15
+
16
+ const normalizedResponse = response.toLowerCase();
17
+ const results = expected.values.map((value) => ({
18
+ value,
19
+ found: normalizedResponse.includes(value.toLowerCase()),
20
+ }));
21
+
22
+ const notFoundCount = results.filter((r) => !r.found).length;
23
+
24
+ // mode: 'all' means ALL values must be absent
25
+ // mode: 'any' means AT LEAST ONE value must be absent
26
+ const passed =
27
+ expected.mode === 'all' ? notFoundCount === expected.values.length : notFoundCount > 0;
28
+
29
+ const score = expected.values.length > 0 ? notFoundCount / expected.values.length : 1;
30
+
31
+ return {
32
+ passed,
33
+ score,
34
+ reason: passed
35
+ ? `Correctly absent: ${notFoundCount}/${expected.values.length} values (mode: ${expected.mode})`
36
+ : `Found forbidden values (mode: ${expected.mode})`,
37
+ details: {
38
+ mode: expected.mode,
39
+ results,
40
+ notFoundCount,
41
+ totalCount: expected.values.length,
42
+ },
43
+ };
44
+ }
45
+ }