@artemiskit/core 0.1.6 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/CHANGELOG.md +116 -0
  2. package/dist/adapters/types.d.ts +8 -1
  3. package/dist/adapters/types.d.ts.map +1 -1
  4. package/dist/artifacts/types.d.ts +39 -0
  5. package/dist/artifacts/types.d.ts.map +1 -1
  6. package/dist/cost/index.d.ts +5 -0
  7. package/dist/cost/index.d.ts.map +1 -0
  8. package/dist/cost/pricing.d.ts +67 -0
  9. package/dist/cost/pricing.d.ts.map +1 -0
  10. package/dist/evaluators/combined.d.ts +10 -0
  11. package/dist/evaluators/combined.d.ts.map +1 -0
  12. package/dist/evaluators/index.d.ts +4 -0
  13. package/dist/evaluators/index.d.ts.map +1 -1
  14. package/dist/evaluators/inline.d.ts +22 -0
  15. package/dist/evaluators/inline.d.ts.map +1 -0
  16. package/dist/evaluators/llm-grader.d.ts.map +1 -1
  17. package/dist/evaluators/not-contains.d.ts +10 -0
  18. package/dist/evaluators/not-contains.d.ts.map +1 -0
  19. package/dist/evaluators/similarity.d.ts +16 -0
  20. package/dist/evaluators/similarity.d.ts.map +1 -0
  21. package/dist/index.d.ts +1 -0
  22. package/dist/index.d.ts.map +1 -1
  23. package/dist/index.js +13212 -12018
  24. package/dist/scenario/discovery.d.ts +72 -0
  25. package/dist/scenario/discovery.d.ts.map +1 -0
  26. package/dist/scenario/index.d.ts +1 -0
  27. package/dist/scenario/index.d.ts.map +1 -1
  28. package/dist/scenario/schema.d.ts +1253 -9
  29. package/dist/scenario/schema.d.ts.map +1 -1
  30. package/dist/storage/local.d.ts +44 -2
  31. package/dist/storage/local.d.ts.map +1 -1
  32. package/dist/storage/types.d.ts +62 -0
  33. package/dist/storage/types.d.ts.map +1 -1
  34. package/package.json +1 -1
  35. package/src/adapters/types.ts +8 -1
  36. package/src/artifacts/types.ts +39 -0
  37. package/src/cost/index.ts +14 -0
  38. package/src/cost/pricing.ts +450 -0
  39. package/src/evaluators/combined.test.ts +172 -0
  40. package/src/evaluators/combined.ts +95 -0
  41. package/src/evaluators/index.ts +12 -0
  42. package/src/evaluators/inline.test.ts +409 -0
  43. package/src/evaluators/inline.ts +393 -0
  44. package/src/evaluators/llm-grader.ts +45 -13
  45. package/src/evaluators/not-contains.test.ts +105 -0
  46. package/src/evaluators/not-contains.ts +45 -0
  47. package/src/evaluators/similarity.test.ts +333 -0
  48. package/src/evaluators/similarity.ts +258 -0
  49. package/src/index.ts +3 -0
  50. package/src/scenario/discovery.test.ts +153 -0
  51. package/src/scenario/discovery.ts +277 -0
  52. package/src/scenario/index.ts +1 -0
  53. package/src/scenario/schema.ts +47 -2
  54. package/src/storage/local.test.ts +243 -0
  55. package/src/storage/local.ts +162 -2
  56. package/src/storage/types.ts +73 -0
@@ -2,12 +2,16 @@
2
2
  * Evaluators module - exports all evaluator types and utilities
3
3
  */
4
4
 
5
+ import { CombinedEvaluator } from './combined';
5
6
  import { ContainsEvaluator } from './contains';
6
7
  import { ExactEvaluator } from './exact';
7
8
  import { FuzzyEvaluator } from './fuzzy';
9
+ import { InlineEvaluator } from './inline';
8
10
  import { JsonSchemaEvaluator } from './json-schema';
9
11
  import { LLMGraderEvaluator } from './llm-grader';
12
+ import { NotContainsEvaluator } from './not-contains';
10
13
  import { RegexEvaluator } from './regex';
14
+ import { SimilarityEvaluator } from './similarity';
11
15
  import type { Evaluator } from './types';
12
16
 
13
17
  const evaluators = new Map<string, Evaluator>();
@@ -15,8 +19,12 @@ evaluators.set('exact', new ExactEvaluator());
15
19
  evaluators.set('regex', new RegexEvaluator());
16
20
  evaluators.set('fuzzy', new FuzzyEvaluator());
17
21
  evaluators.set('contains', new ContainsEvaluator());
22
+ evaluators.set('not_contains', new NotContainsEvaluator());
23
+ evaluators.set('combined', new CombinedEvaluator());
18
24
  evaluators.set('json_schema', new JsonSchemaEvaluator());
19
25
  evaluators.set('llm_grader', new LLMGraderEvaluator());
26
+ evaluators.set('similarity', new SimilarityEvaluator());
27
+ evaluators.set('inline', new InlineEvaluator());
20
28
 
21
29
  /**
22
30
  * Get an evaluator by type
@@ -49,5 +57,9 @@ export { ExactEvaluator } from './exact';
49
57
  export { RegexEvaluator } from './regex';
50
58
  export { FuzzyEvaluator } from './fuzzy';
51
59
  export { ContainsEvaluator } from './contains';
60
+ export { NotContainsEvaluator } from './not-contains';
61
+ export { CombinedEvaluator } from './combined';
52
62
  export { JsonSchemaEvaluator } from './json-schema';
53
63
  export { LLMGraderEvaluator } from './llm-grader';
64
+ export { SimilarityEvaluator } from './similarity';
65
+ export { InlineEvaluator, SUPPORTED_EXPRESSIONS } from './inline';
@@ -0,0 +1,409 @@
1
+ /**
2
+ * Tests for inline custom matcher evaluator
3
+ */
4
+
5
+ import { describe, expect, it } from 'vitest';
6
+ import type { Expected } from '../scenario/schema';
7
+ import { InlineEvaluator, SUPPORTED_EXPRESSIONS } from './inline';
8
+
9
+ describe('InlineEvaluator', () => {
10
+ const evaluator = new InlineEvaluator();
11
+
12
+ describe('type', () => {
13
+ it('should have correct type', () => {
14
+ expect(evaluator.type).toBe('inline');
15
+ });
16
+ });
17
+
18
+ describe('string methods', () => {
19
+ it('should evaluate response.includes() - pass', async () => {
20
+ const expected: Expected = {
21
+ type: 'inline',
22
+ expression: 'response.includes("hello")',
23
+ };
24
+ const result = await evaluator.evaluate('hello world', expected);
25
+ expect(result.passed).toBe(true);
26
+ expect(result.score).toBe(1);
27
+ });
28
+
29
+ it('should evaluate response.includes() - fail', async () => {
30
+ const expected: Expected = {
31
+ type: 'inline',
32
+ expression: 'response.includes("goodbye")',
33
+ };
34
+ const result = await evaluator.evaluate('hello world', expected);
35
+ expect(result.passed).toBe(false);
36
+ expect(result.score).toBe(0);
37
+ });
38
+
39
+ it('should evaluate !response.includes() - pass', async () => {
40
+ const expected: Expected = {
41
+ type: 'inline',
42
+ expression: '!response.includes("goodbye")',
43
+ };
44
+ const result = await evaluator.evaluate('hello world', expected);
45
+ expect(result.passed).toBe(true);
46
+ expect(result.score).toBe(1);
47
+ });
48
+
49
+ it('should evaluate !response.includes() - fail', async () => {
50
+ const expected: Expected = {
51
+ type: 'inline',
52
+ expression: '!response.includes("hello")',
53
+ };
54
+ const result = await evaluator.evaluate('hello world', expected);
55
+ expect(result.passed).toBe(false);
56
+ expect(result.score).toBe(0);
57
+ });
58
+
59
+ it('should evaluate response.startsWith()', async () => {
60
+ const expected: Expected = {
61
+ type: 'inline',
62
+ expression: 'response.startsWith("hello")',
63
+ };
64
+ const result = await evaluator.evaluate('hello world', expected);
65
+ expect(result.passed).toBe(true);
66
+ });
67
+
68
+ it('should evaluate response.endsWith()', async () => {
69
+ const expected: Expected = {
70
+ type: 'inline',
71
+ expression: 'response.endsWith("world")',
72
+ };
73
+ const result = await evaluator.evaluate('hello world', expected);
74
+ expect(result.passed).toBe(true);
75
+ });
76
+
77
+ it('should evaluate response.toLowerCase().includes()', async () => {
78
+ const expected: Expected = {
79
+ type: 'inline',
80
+ expression: 'response.toLowerCase().includes("hello")',
81
+ };
82
+ const result = await evaluator.evaluate('HELLO WORLD', expected);
83
+ expect(result.passed).toBe(true);
84
+ });
85
+ });
86
+
87
+ describe('regex matching', () => {
88
+ it('should evaluate response.match() - pass', async () => {
89
+ const expected: Expected = {
90
+ type: 'inline',
91
+ expression: 'response.match(/\\d{3}-\\d{4}/)',
92
+ };
93
+ const result = await evaluator.evaluate('Call me at 555-1234', expected);
94
+ expect(result.passed).toBe(true);
95
+ });
96
+
97
+ it('should evaluate response.match() - fail', async () => {
98
+ const expected: Expected = {
99
+ type: 'inline',
100
+ expression: 'response.match(/\\d{3}-\\d{4}/)',
101
+ };
102
+ const result = await evaluator.evaluate('No phone number here', expected);
103
+ expect(result.passed).toBe(false);
104
+ });
105
+
106
+ it('should evaluate response.match() with flags', async () => {
107
+ const expected: Expected = {
108
+ type: 'inline',
109
+ expression: 'response.match(/hello/i)',
110
+ };
111
+ const result = await evaluator.evaluate('HELLO world', expected);
112
+ expect(result.passed).toBe(true);
113
+ });
114
+
115
+ it('should evaluate !response.match()', async () => {
116
+ const expected: Expected = {
117
+ type: 'inline',
118
+ expression: '!response.match(/error/i)',
119
+ };
120
+ const result = await evaluator.evaluate('Success!', expected);
121
+ expect(result.passed).toBe(true);
122
+ });
123
+ });
124
+
125
+ describe('length comparisons', () => {
126
+ it('should evaluate length > N', async () => {
127
+ const expected: Expected = {
128
+ type: 'inline',
129
+ expression: 'length > 5',
130
+ };
131
+ const result = await evaluator.evaluate('hello world', expected);
132
+ expect(result.passed).toBe(true);
133
+ });
134
+
135
+ it('should evaluate length < N', async () => {
136
+ const expected: Expected = {
137
+ type: 'inline',
138
+ expression: 'length < 100',
139
+ };
140
+ const result = await evaluator.evaluate('short', expected);
141
+ expect(result.passed).toBe(true);
142
+ });
143
+
144
+ it('should evaluate length >= N', async () => {
145
+ const expected: Expected = {
146
+ type: 'inline',
147
+ expression: 'length >= 11',
148
+ };
149
+ const result = await evaluator.evaluate('hello world', expected);
150
+ expect(result.passed).toBe(true);
151
+ });
152
+
153
+ it('should evaluate length <= N', async () => {
154
+ const expected: Expected = {
155
+ type: 'inline',
156
+ expression: 'length <= 11',
157
+ };
158
+ const result = await evaluator.evaluate('hello world', expected);
159
+ expect(result.passed).toBe(true);
160
+ });
161
+
162
+ it('should evaluate length == N', async () => {
163
+ const expected: Expected = {
164
+ type: 'inline',
165
+ expression: 'length == 11',
166
+ };
167
+ const result = await evaluator.evaluate('hello world', expected);
168
+ expect(result.passed).toBe(true);
169
+ });
170
+
171
+ it('should evaluate length != N', async () => {
172
+ const expected: Expected = {
173
+ type: 'inline',
174
+ expression: 'length != 5',
175
+ };
176
+ const result = await evaluator.evaluate('hello world', expected);
177
+ expect(result.passed).toBe(true);
178
+ });
179
+ });
180
+
181
+ describe('words and lines', () => {
182
+ it('should evaluate words.length > N', async () => {
183
+ const expected: Expected = {
184
+ type: 'inline',
185
+ expression: 'words.length > 2',
186
+ };
187
+ const result = await evaluator.evaluate('one two three four', expected);
188
+ expect(result.passed).toBe(true);
189
+ });
190
+
191
+ it('should evaluate words.length == N', async () => {
192
+ const expected: Expected = {
193
+ type: 'inline',
194
+ expression: 'words.length == 4',
195
+ };
196
+ const result = await evaluator.evaluate('one two three four', expected);
197
+ expect(result.passed).toBe(true);
198
+ });
199
+
200
+ it('should evaluate lines.length > N', async () => {
201
+ const expected: Expected = {
202
+ type: 'inline',
203
+ expression: 'lines.length > 1',
204
+ };
205
+ const result = await evaluator.evaluate('line one\nline two\nline three', expected);
206
+ expect(result.passed).toBe(true);
207
+ });
208
+
209
+ it('should evaluate lines.length == N', async () => {
210
+ const expected: Expected = {
211
+ type: 'inline',
212
+ expression: 'lines.length == 3',
213
+ };
214
+ const result = await evaluator.evaluate('line one\nline two\nline three', expected);
215
+ expect(result.passed).toBe(true);
216
+ });
217
+ });
218
+
219
+ describe('JSON field access', () => {
220
+ it('should evaluate json.field == "value"', async () => {
221
+ const expected: Expected = {
222
+ type: 'inline',
223
+ expression: 'json.status == "success"',
224
+ };
225
+ const result = await evaluator.evaluate('{"status": "success"}', expected);
226
+ expect(result.passed).toBe(true);
227
+ });
228
+
229
+ it('should evaluate json.field != "value"', async () => {
230
+ const expected: Expected = {
231
+ type: 'inline',
232
+ expression: 'json.status != "error"',
233
+ };
234
+ const result = await evaluator.evaluate('{"status": "success"}', expected);
235
+ expect(result.passed).toBe(true);
236
+ });
237
+
238
+ it('should evaluate json.field > N', async () => {
239
+ const expected: Expected = {
240
+ type: 'inline',
241
+ expression: 'json.count > 5',
242
+ };
243
+ const result = await evaluator.evaluate('{"count": 10}', expected);
244
+ expect(result.passed).toBe(true);
245
+ });
246
+
247
+ it('should evaluate nested json.field.subfield', async () => {
248
+ const expected: Expected = {
249
+ type: 'inline',
250
+ expression: 'json.data.value == "test"',
251
+ };
252
+ const result = await evaluator.evaluate('{"data": {"value": "test"}}', expected);
253
+ expect(result.passed).toBe(true);
254
+ });
255
+
256
+ it('should evaluate json != null for valid JSON', async () => {
257
+ const expected: Expected = {
258
+ type: 'inline',
259
+ expression: 'json != null',
260
+ };
261
+ const result = await evaluator.evaluate('{"valid": true}', expected);
262
+ expect(result.passed).toBe(true);
263
+ });
264
+
265
+ it('should evaluate json != null for invalid JSON', async () => {
266
+ const expected: Expected = {
267
+ type: 'inline',
268
+ expression: 'json != null',
269
+ };
270
+ const result = await evaluator.evaluate('not valid json', expected);
271
+ expect(result.passed).toBe(false);
272
+ });
273
+
274
+ it('should handle boolean values in JSON', async () => {
275
+ const expected: Expected = {
276
+ type: 'inline',
277
+ expression: 'json.active == true',
278
+ };
279
+ const result = await evaluator.evaluate('{"active": true}', expected);
280
+ expect(result.passed).toBe(true);
281
+ });
282
+ });
283
+
284
+ describe('expected value comparison', () => {
285
+ it('should evaluate response == expected', async () => {
286
+ const expected: Expected = {
287
+ type: 'inline',
288
+ expression: 'response == expected',
289
+ value: 'hello world',
290
+ };
291
+ const result = await evaluator.evaluate('hello world', expected);
292
+ expect(result.passed).toBe(true);
293
+ });
294
+
295
+ it('should evaluate response.trim() == expected', async () => {
296
+ const expected: Expected = {
297
+ type: 'inline',
298
+ expression: 'response.trim() == expected',
299
+ value: 'hello world',
300
+ };
301
+ const result = await evaluator.evaluate(' hello world ', expected);
302
+ expect(result.passed).toBe(true);
303
+ });
304
+ });
305
+
306
+ describe('combined expressions', () => {
307
+ it('should evaluate && expressions - all pass', async () => {
308
+ const expected: Expected = {
309
+ type: 'inline',
310
+ expression: 'response.includes("hello") && response.includes("world")',
311
+ };
312
+ const result = await evaluator.evaluate('hello world', expected);
313
+ expect(result.passed).toBe(true);
314
+ expect(result.score).toBe(1);
315
+ });
316
+
317
+ it('should evaluate && expressions - one fails', async () => {
318
+ const expected: Expected = {
319
+ type: 'inline',
320
+ expression: 'response.includes("hello") && response.includes("goodbye")',
321
+ };
322
+ const result = await evaluator.evaluate('hello world', expected);
323
+ expect(result.passed).toBe(false);
324
+ expect(result.score).toBe(0.5);
325
+ });
326
+
327
+ it('should evaluate || expressions - one passes', async () => {
328
+ const expected: Expected = {
329
+ type: 'inline',
330
+ expression: 'response.includes("hello") || response.includes("goodbye")',
331
+ };
332
+ const result = await evaluator.evaluate('hello world', expected);
333
+ expect(result.passed).toBe(true);
334
+ expect(result.score).toBe(1);
335
+ });
336
+
337
+ it('should evaluate || expressions - none pass', async () => {
338
+ const expected: Expected = {
339
+ type: 'inline',
340
+ expression: 'response.includes("foo") || response.includes("bar")',
341
+ };
342
+ const result = await evaluator.evaluate('hello world', expected);
343
+ expect(result.passed).toBe(false);
344
+ expect(result.score).toBe(0);
345
+ });
346
+
347
+ it('should handle complex combined expressions', async () => {
348
+ const expected: Expected = {
349
+ type: 'inline',
350
+ expression: 'length > 5 && words.length >= 2',
351
+ };
352
+ const result = await evaluator.evaluate('hello world', expected);
353
+ expect(result.passed).toBe(true);
354
+ });
355
+ });
356
+
357
+ describe('error handling', () => {
358
+ it('should handle unsupported expression patterns', async () => {
359
+ const expected: Expected = {
360
+ type: 'inline',
361
+ expression: 'someUnsupportedFunction()',
362
+ };
363
+ const result = await evaluator.evaluate('hello world', expected);
364
+ expect(result.passed).toBe(false);
365
+ expect(result.reason).toContain('Unsupported expression pattern');
366
+ });
367
+
368
+ it('should throw for wrong expected type', async () => {
369
+ const expected = {
370
+ type: 'exact',
371
+ value: 'hello',
372
+ } as Expected;
373
+ await expect(evaluator.evaluate('hello', expected)).rejects.toThrow(
374
+ 'Invalid expected type for InlineEvaluator'
375
+ );
376
+ });
377
+ });
378
+
379
+ describe('result details', () => {
380
+ it('should include details in result', async () => {
381
+ const expected: Expected = {
382
+ type: 'inline',
383
+ expression: 'response.includes("hello")',
384
+ value: 'test value',
385
+ };
386
+ const result = await evaluator.evaluate('hello world', expected);
387
+ expect(result.details).toBeDefined();
388
+ expect(result.details?.expression).toBe('response.includes("hello")');
389
+ expect(result.details?.expectedValue).toBe('test value');
390
+ expect(result.details?.responseLength).toBe(11);
391
+ expect(result.details?.wordCount).toBe(2);
392
+ expect(result.details?.lineCount).toBe(1);
393
+ expect(result.details?.isValidJson).toBe(false);
394
+ });
395
+ });
396
+
397
+ describe('SUPPORTED_EXPRESSIONS', () => {
398
+ it('should export list of supported expressions', () => {
399
+ expect(SUPPORTED_EXPRESSIONS).toBeDefined();
400
+ expect(Array.isArray(SUPPORTED_EXPRESSIONS)).toBe(true);
401
+ expect(SUPPORTED_EXPRESSIONS.length).toBeGreaterThan(0);
402
+ expect(SUPPORTED_EXPRESSIONS).toContain('response.includes("text")');
403
+ expect(SUPPORTED_EXPRESSIONS).toContain('response.match(/regex/)');
404
+ expect(SUPPORTED_EXPRESSIONS).toContain(
405
+ 'length > N / length < N / length >= N / length <= N / length == N'
406
+ );
407
+ });
408
+ });
409
+ });