langium-ai-tools 4.2.1 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +123 -7
- package/dist/analyzer/document-analyzer.d.ts +23 -8
- package/dist/analyzer/document-analyzer.d.ts.map +1 -1
- package/dist/analyzer/document-analyzer.js +36 -27
- package/dist/analyzer/document-analyzer.js.map +1 -1
- package/dist/evals/index.d.ts +118 -0
- package/dist/evals/index.d.ts.map +1 -0
- package/dist/evals/index.js +163 -0
- package/dist/evals/index.js.map +1 -0
- package/dist/evals/runner.d.ts +21 -0
- package/dist/evals/runner.d.ts.map +1 -0
- package/dist/evals/runner.js +159 -0
- package/dist/evals/runner.js.map +1 -0
- package/dist/evaluator/chart.d.ts +2 -7
- package/dist/evaluator/chart.d.ts.map +1 -1
- package/dist/evaluator/chart.js +17 -19
- package/dist/evaluator/chart.js.map +1 -1
- package/dist/evaluator/document-evaluator.d.ts +12 -9
- package/dist/evaluator/document-evaluator.d.ts.map +1 -1
- package/dist/evaluator/document-evaluator.js +11 -8
- package/dist/evaluator/document-evaluator.js.map +1 -1
- package/dist/evaluator/eval-case.d.ts +6 -2
- package/dist/evaluator/eval-case.d.ts.map +1 -1
- package/dist/evaluator/eval-case.js +76 -30
- package/dist/evaluator/eval-case.js.map +1 -1
- package/dist/evaluator/eval-matrix.d.ts +3 -3
- package/dist/evaluator/eval-matrix.d.ts.map +1 -1
- package/dist/evaluator/eval-matrix.js +18 -19
- package/dist/evaluator/eval-matrix.js.map +1 -1
- package/dist/evaluator/evaluator.d.ts +14 -6
- package/dist/evaluator/evaluator.d.ts.map +1 -1
- package/dist/evaluator/evaluator.js +16 -21
- package/dist/evaluator/evaluator.js.map +1 -1
- package/dist/evaluator/langium-evaluator.d.ts +6 -6
- package/dist/evaluator/langium-evaluator.d.ts.map +1 -1
- package/dist/evaluator/langium-evaluator.js +10 -8
- package/dist/evaluator/langium-evaluator.js.map +1 -1
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -0
- package/dist/index.js.map +1 -1
- package/dist/splitter/program-map.d.ts +3 -3
- package/dist/splitter/program-map.d.ts.map +1 -1
- package/dist/splitter/program-map.js +2 -3
- package/dist/splitter/program-map.js.map +1 -1
- package/dist/splitter/splitter.d.ts +5 -5
- package/dist/splitter/splitter.d.ts.map +1 -1
- package/dist/splitter/splitter.js +7 -8
- package/dist/splitter/splitter.js.map +1 -1
- package/dist/types.d.ts +45 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +7 -0
- package/dist/types.js.map +1 -0
- package/package.json +57 -61
package/README.md
CHANGED
|
@@ -22,17 +22,13 @@ LLMs (and transformers in general), are evolving quite rapidly. With this approa
|
|
|
22
22
|
|
|
23
23
|
## Installation
|
|
24
24
|
|
|
25
|
-
Langium AI
|
|
25
|
+
Langium AI Tools treats Langium as a **peer dependency**, supporting Langium 4.x and up. Your project provides its own Langium version, and `langium-ai-tools` works alongside it.
|
|
26
26
|
|
|
27
27
|
```bash
|
|
28
|
-
|
|
29
|
-
npm i --save langium-ai-tools@^4.1.0
|
|
30
|
-
|
|
31
|
-
# or 3.5.X
|
|
32
|
-
npm i --save langium-ai-tools@^3.5.0
|
|
28
|
+
npm i --save langium-ai-tools
|
|
33
29
|
```
|
|
34
30
|
|
|
35
|
-
|
|
31
|
+
Make sure your project already has a compatible version of `langium` installed (4.x or later).
|
|
36
32
|
|
|
37
33
|
## Usage
|
|
38
34
|
|
|
@@ -151,6 +147,59 @@ You can also define custom evaluators that are more tuned to the needs of your D
|
|
|
151
147
|
|
|
152
148
|
In general we stick to focusing on what Langium can do to help with evaluation, but leave the opportunity open for you to extend, supplement, or modify evaluation logic as you see fit.
|
|
153
149
|
|
|
150
|
+
### Testing API
|
|
151
|
+
|
|
152
|
+
Langium AI Tools provides a vitest-style testing API for writing programmatic evaluation test suites. This allows you to define test cases in TypeScript with familiar features like:
|
|
153
|
+
|
|
154
|
+
- **Test suites** with `describe()`, `describe.skip()`, and `describe.only()`
|
|
155
|
+
- **Lifecycle hooks**: `beforeAll()`, `afterAll()`, `beforeEach()`, `afterEach()`
|
|
156
|
+
- **Parametrized tests** with `evaluation.each()` for testing multiple data sets
|
|
157
|
+
- **Test filtering** with `.skip()` and `.only()` modifiers
|
|
158
|
+
|
|
159
|
+
```typescript
|
|
160
|
+
import { describe, evaluation, beforeAll, afterAll, beforeEach } from 'langium-ai-tools/testing';
|
|
161
|
+
|
|
162
|
+
describe('DSL Generation Tests', () => {
|
|
163
|
+
let model;
|
|
164
|
+
|
|
165
|
+
beforeAll(async () => {
|
|
166
|
+
// runs once before all tests
|
|
167
|
+
model = await setupModel();
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
beforeEach(() => {
|
|
171
|
+
// runs before each test
|
|
172
|
+
clearCache();
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
evaluation('generates valid syntax', async (ctx) => {
|
|
176
|
+
const result = await generateCode(model, ctx.systemPrompt);
|
|
177
|
+
return {
|
|
178
|
+
passed: validateSyntax(result),
|
|
179
|
+
error: !validateSyntax(result) ? 'Invalid syntax' : undefined
|
|
180
|
+
};
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
// parametrized tests
|
|
184
|
+
evaluation.each([
|
|
185
|
+
{ input: 'person Alice', expected: 'Alice' },
|
|
186
|
+
{ input: 'person Bob', expected: 'Bob' }
|
|
187
|
+
])('extracts name $expected', (data) => async (ctx) => {
|
|
188
|
+
const parsed = parse(data.input);
|
|
189
|
+
return {
|
|
190
|
+
passed: parsed.name === data.expected
|
|
191
|
+
};
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
afterAll(() => {
|
|
195
|
+
// cleanup after all tests
|
|
196
|
+
cleanupModel();
|
|
197
|
+
});
|
|
198
|
+
});
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
**For detailed documentation on the Testing API**, see the [Testing API Reference](../cli/docs/testing-api.md).
|
|
202
|
+
|
|
154
203
|
### Evaluation Matrix
|
|
155
204
|
|
|
156
205
|
The Evaluation Matrix provides a framework for testing multiple model configurations against a set of test cases using Langium AI evaluators. This is particularly helpful when comparing across models, prompt strategies, RAG setups, or other variations in your AI stack.
|
|
@@ -443,6 +492,73 @@ console.table(averaged.map(r => ({
|
|
|
443
492
|
|
|
444
493
|
For more complete examples, see the [example-dsl-evaluator](../examples/example-dsl-evaluator) project.
|
|
445
494
|
|
|
495
|
+
### Analysis
|
|
496
|
+
|
|
497
|
+
The `LangiumDocumentAnalyzer` extends the `LangiumEvaluator` with syntax usage analysis. It helps you collects statistics about which Langium grammar rules are used in documents, and it computes diversity metrics. Generally, this is useful for determining the coverage of a set of DSL programs for evaluation or training in regards to what your Langium grammar can express. It won't indicate the exact way rules are expressed, but it will give you quantitative values you can use to determine the breadth & quality of your dataset.
|
|
498
|
+
|
|
499
|
+
The following is a quick example of how you can leverage the analyzer:
|
|
500
|
+
|
|
501
|
+
```ts
|
|
502
|
+
import { LangiumDocumentAnalyzer, AnalysisMode } from 'langium-ai-tools/analyzer';
|
|
503
|
+
import { createMyDSLServices } from './my-dsl';
|
|
504
|
+
import { EmptyFileSystem } from 'langium';
|
|
505
|
+
|
|
506
|
+
const services = createMyDSLServices(EmptyFileSystem).MyDSL;
|
|
507
|
+
|
|
508
|
+
// create an analyzer
|
|
509
|
+
const analyzer = new LangiumDocumentAnalyzer(services, {
|
|
510
|
+
analysisMode: AnalysisMode.ALL,
|
|
511
|
+
// rules to exclude from analysis (WS is always excluded)
|
|
512
|
+
excludeRules: ['DeprecatedRule'],
|
|
513
|
+
// include rules from imported grammars (defaults to true)
|
|
514
|
+
includeImportedRules: true,
|
|
515
|
+
// include hidden tokens, like comments (defaults to true)
|
|
516
|
+
includeHiddenRules: true,
|
|
517
|
+
// compute diversity metrics (defaults to true)
|
|
518
|
+
computeDiversity: true
|
|
519
|
+
});
|
|
520
|
+
|
|
521
|
+
// evaluate per usual
|
|
522
|
+
const result = analyzer.evaluate(someGeneratedDSLCode);
|
|
523
|
+
|
|
524
|
+
// extract syntax statistics from the result
|
|
525
|
+
const stats = analyzer.extractStatisticsFromResult(result);
|
|
526
|
+
if (stats) {
|
|
527
|
+
// { RuleName: count, ... }
|
|
528
|
+
console.log('Rule usage:', stats.ruleUsage);
|
|
529
|
+
// percentage of grammar rules used
|
|
530
|
+
console.log('Coverage:', stats.coverage);
|
|
531
|
+
// { entropy, giniCoefficient, simpsonIndex }
|
|
532
|
+
console.log('Diversity:', stats.diversity);
|
|
533
|
+
}
|
|
534
|
+
```
|
|
535
|
+
|
|
536
|
+
The analyzer computes the following metrics when parsing is successful:
|
|
537
|
+
|
|
538
|
+
- **Rule usage**: A map of grammar rule names to the number of times each rule was matched in the document.
|
|
539
|
+
- **Coverage**: The percentage of available grammar rules that are present 1 or more times.
|
|
540
|
+
- **Diversity metrics**:
|
|
541
|
+
- **Shannon entropy** — higher values indicate more diverse rule usage patterns
|
|
542
|
+
- **Gini coefficient** — 0 suggests perfectly equal usage, 1 means maximally unequal
|
|
543
|
+
- **Simpson's diversity index** — higher values suggest more diversity
|
|
544
|
+
|
|
545
|
+
The `LangiumDocumentAnalyzer` can be used as a drop-in replacement anywhere a regular `LangiumEvaluator` is expected, such as in an `EvalMatrix`:
|
|
546
|
+
|
|
547
|
+
```ts
|
|
548
|
+
import { EvalMatrix } from 'langium-ai-tools/evaluator';
|
|
549
|
+
import { LangiumDocumentAnalyzer } from 'langium-ai-tools/analyzer';
|
|
550
|
+
|
|
551
|
+
const matrix = new EvalMatrix({
|
|
552
|
+
config: { name: 'Analysis Run', history_folder: '.eval-history', num_runs: 1 },
|
|
553
|
+
runners: [myRunner],
|
|
554
|
+
evaluators: [{
|
|
555
|
+
name: 'Langium Analyzer',
|
|
556
|
+
eval: new LangiumDocumentAnalyzer(services)
|
|
557
|
+
}],
|
|
558
|
+
cases: testCases
|
|
559
|
+
});
|
|
560
|
+
```
|
|
561
|
+
|
|
446
562
|
## Contributing
|
|
447
563
|
|
|
448
564
|
If you want to help feel free to open an issue or a PR. As a general note we're open to accept changes that focus on improving how we can support AI application development for Langium DSLs. But we don't want to provide explicit bindings to actual services/providers at this time, such as LLamaIndex, Ollama, LangChain, or others. Similarly this package doesn't provide direct bindings for AI providers such as OpenAI and Anthropic here. Instead these changes will go into a separate package under Langium AI that is intended for this purpose.
|
|
@@ -5,22 +5,37 @@
|
|
|
5
5
|
*
|
|
6
6
|
* @author Dennis Hübner
|
|
7
7
|
******************************************************************************/
|
|
8
|
-
import { type Grammar, GrammarAST, type LangiumDocument } from
|
|
9
|
-
import {
|
|
10
|
-
import { type EvaluationContext } from
|
|
11
|
-
import { type EvaluatorResult } from
|
|
12
|
-
import { LangiumEvaluator, type LangiumEvaluatorResultData } from
|
|
13
|
-
import { EvaluatorResultMsg, SyntaxStatistic } from
|
|
8
|
+
import { type Grammar, GrammarAST, type LangiumDocument } from 'langium';
|
|
9
|
+
import type { LangiumServicesLike } from '../types.js';
|
|
10
|
+
import { type EvaluationContext } from '../evaluator/document-evaluator.js';
|
|
11
|
+
import { type EvaluatorResult } from '../evaluator/evaluator.js';
|
|
12
|
+
import { LangiumEvaluator, type LangiumEvaluatorResultData } from '../evaluator/langium-evaluator.js';
|
|
13
|
+
import { EvaluatorResultMsg, SyntaxStatistic } from '../gen/interface.js';
|
|
14
|
+
/**
|
|
15
|
+
* Options for resolving grammar imports when collecting rules.
|
|
16
|
+
* If provided, the analyzer can follow grammar imports to include
|
|
17
|
+
* rules from transitively imported grammars.
|
|
18
|
+
*/
|
|
19
|
+
export interface GrammarImportResolver {
|
|
20
|
+
/**
|
|
21
|
+
* Resolve all transitively imported grammars and return their grammar objects.
|
|
22
|
+
* This mirrors the behavior of langium's `resolveTransitiveImports`, which should be passed here
|
|
23
|
+
*/
|
|
24
|
+
resolveImports(grammar: Grammar): Grammar[];
|
|
25
|
+
}
|
|
14
26
|
/**
|
|
15
27
|
* Extends LangiumEvaluator and adds analysis capabilities.
|
|
16
28
|
*/
|
|
17
|
-
export declare class LangiumDocumentAnalyzer<T extends
|
|
29
|
+
export declare class LangiumDocumentAnalyzer<T extends LangiumServicesLike> extends LangiumEvaluator<T> {
|
|
18
30
|
static readonly METADATA_KEY = "syntax_statistics";
|
|
19
31
|
private readonly analysisOptions;
|
|
32
|
+
private readonly importResolver?;
|
|
20
33
|
/**
|
|
21
34
|
* Creates an instance of LangiumDocumentAnalyzer.
|
|
22
35
|
* @param services Langium services
|
|
23
36
|
* @param analysisOptions Analysis options
|
|
37
|
+
* @param importResolver Optional resolver for grammar imports. If not provided,
|
|
38
|
+
* imported grammar rules will not be included even if `includeImportedRules` is true.
|
|
24
39
|
* @example
|
|
25
40
|
* ```typescript
|
|
26
41
|
* const analyzer = new LangiumDocumentAnalyzer(services, {
|
|
@@ -30,7 +45,7 @@ export declare class LangiumDocumentAnalyzer<T extends LangiumServices> extends
|
|
|
30
45
|
* });
|
|
31
46
|
* ```
|
|
32
47
|
*/
|
|
33
|
-
constructor(services: T, analysisOptions?: Partial<AnalysisOptions
|
|
48
|
+
constructor(services: T, analysisOptions?: Partial<AnalysisOptions>, importResolver?: GrammarImportResolver);
|
|
34
49
|
/**
|
|
35
50
|
* Evaluates a Langium document.
|
|
36
51
|
* Here we return protocol compatible object EvaluatorResultMsg.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"document-analyzer.d.ts","sourceRoot":"","sources":["../../src/analyzer/document-analyzer.ts"],"names":[],"mappings":"AAAA;;;;;;gFAMgF;AAEhF,OAAO,EAAY,KAAK,OAAO,EAAE,UAAU,EAAE,KAAK,eAAe,EAAiB,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"document-analyzer.d.ts","sourceRoot":"","sources":["../../src/analyzer/document-analyzer.ts"],"names":[],"mappings":"AAAA;;;;;;gFAMgF;AAEhF,OAAO,EAAY,KAAK,OAAO,EAAE,UAAU,EAAE,KAAK,eAAe,EAAiB,MAAM,SAAS,CAAC;AAClG,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAC;AACvD,OAAO,EAAE,KAAK,iBAAiB,EAAE,MAAM,oCAAoC,CAAC;AAC5E,OAAO,EAAE,KAAK,eAAe,EAAE,MAAM,2BAA2B,CAAC;AACjE,OAAO,EAAE,gBAAgB,EAAE,KAAK,0BAA0B,EAAE,MAAM,mCAAmC,CAAC;AACtG,OAAO,EAAE,kBAAkB,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAE1E;;;;GAIG;AACH,MAAM,WAAW,qBAAqB;IAClC;;;OAGG;IACH,cAAc,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,EAAE,CAAC;CAC/C;AAED;;GAEG;AACH,qBAAa,uBAAuB,CAAC,CAAC,SAAS,mBAAmB,CAAE,SAAQ,gBAAgB,CAAC,CAAC,CAAC;IAC3F,gBAAuB,YAAY,uBAAuB;IAE1D,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAwB;IAExD;;;;;;;;;;;;;;OAcG;gBACS,QAAQ,EAAE,CAAC,EAAE,eAAe,GAAE,OAAO,CAAC,eAAe,CAAM,EAAE,cAAc,CAAC,EAAE,qBAAqB;IAM/G;;;;;;;OAOG;IACH,gBAAgB,CACZ,GAAG,EAAE,eAAe,EACpB,GAAG,EAAE,iBAAiB,GACvB,eAAe,CAAC,0BAA0B,CAAC,GAAG,kBAAkB;IAgCnE,4BAA4B,CAAC,GAAG,EAAE,eAAe,EAAE,OAAO,EAAE,OAAO,GAAG,eAAe;IAqDrF;;OAEG;IACH,eAAe,CAAC,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,MAAM;IAK1D;;;OAGG;IACH,cAAc,CAAC,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,MAAM;IAgBzD;;;OAGG;IACH,sBAAsB,CAAC,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,MAAM;IAmBjE;;;OAGG;IACH,mBAAmB,CAAC,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,MAAM;IAe9D;;;;OAIG;IACH,2BAA2B,CAAC,MAAM,EAAE,OAAO,CAAC,eAAe,CAAC,GAAG,SAAS,GAAG,eAAe,GAAG,SAAS;IAgBtG,SAAS,CAAC,eAAe,CAAC,OAAO,EAAE,OAAO,GAAG,UAAU,CAAC,YAAY,EAAE;IAiBtE,SAAS,CAAC,0BAA0B,IAAI,eAAe;CAW1D;AAED;;GAEG;AACH,oBAAY,YAAY;IACpB,GAAG,QAAQ;IACX,YAAY,iBAAiB;CAChC;AAED,UAAU,eAAe;IACrB,YAAY,EAAE,YAAY,CAAC;IAC3B;;;OAGG;IACH,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB;;OAEG;IACH,oBAAoB,EAAE,OAAO,CAAC;IAC9B;;;OAGG;IACH,kBAAkB,EAAE,OAAO,CAAC;IAC5B;;OAEG;IACH,gBAAgB,EAAE,OAAO,CAAC;CAC7B"}
|
|
@@ -5,13 +5,11 @@
|
|
|
5
5
|
*
|
|
6
6
|
* @author Dennis Hübner
|
|
7
7
|
******************************************************************************/
|
|
8
|
-
import { CstUtils, GrammarAST, isLeafCstNode } from
|
|
9
|
-
import {
|
|
10
|
-
import {} from
|
|
11
|
-
import {} from
|
|
12
|
-
import {} from
|
|
13
|
-
import { LangiumEvaluator } from "../evaluator/langium-evaluator.js";
|
|
14
|
-
import { EvaluatorResultMsg, SyntaxStatistic } from "../gen/interface.js";
|
|
8
|
+
import { CstUtils, GrammarAST, isLeafCstNode } from 'langium';
|
|
9
|
+
import {} from '../evaluator/document-evaluator.js';
|
|
10
|
+
import {} from '../evaluator/evaluator.js';
|
|
11
|
+
import { LangiumEvaluator } from '../evaluator/langium-evaluator.js';
|
|
12
|
+
import { EvaluatorResultMsg, SyntaxStatistic } from '../gen/interface.js';
|
|
15
13
|
/**
|
|
16
14
|
* Extends LangiumEvaluator and adds analysis capabilities.
|
|
17
15
|
*/
|
|
@@ -20,6 +18,8 @@ export class LangiumDocumentAnalyzer extends LangiumEvaluator {
|
|
|
20
18
|
* Creates an instance of LangiumDocumentAnalyzer.
|
|
21
19
|
* @param services Langium services
|
|
22
20
|
* @param analysisOptions Analysis options
|
|
21
|
+
* @param importResolver Optional resolver for grammar imports. If not provided,
|
|
22
|
+
* imported grammar rules will not be included even if `includeImportedRules` is true.
|
|
23
23
|
* @example
|
|
24
24
|
* ```typescript
|
|
25
25
|
* const analyzer = new LangiumDocumentAnalyzer(services, {
|
|
@@ -29,9 +29,10 @@ export class LangiumDocumentAnalyzer extends LangiumEvaluator {
|
|
|
29
29
|
* });
|
|
30
30
|
* ```
|
|
31
31
|
*/
|
|
32
|
-
constructor(services, analysisOptions = {}) {
|
|
32
|
+
constructor(services, analysisOptions = {}, importResolver) {
|
|
33
33
|
super(services);
|
|
34
34
|
this.analysisOptions = { ...DEFAULT_OPTIONS, ...analysisOptions };
|
|
35
|
+
this.importResolver = importResolver;
|
|
35
36
|
}
|
|
36
37
|
/**
|
|
37
38
|
* Evaluates a Langium document.
|
|
@@ -43,29 +44,31 @@ export class LangiumDocumentAnalyzer extends LangiumEvaluator {
|
|
|
43
44
|
*/
|
|
44
45
|
evaluateDocument(doc, ctx) {
|
|
45
46
|
const validationResult = super.evaluateDocument(doc, ctx);
|
|
46
|
-
if (this.analysisOptions.analysisMode !== AnalysisMode.NO_STATISTIC &&
|
|
47
|
-
|
|
47
|
+
if (this.analysisOptions.analysisMode !== AnalysisMode.NO_STATISTIC &&
|
|
48
|
+
validationResult.data &&
|
|
49
|
+
validationResult.data.failures === 0) {
|
|
50
|
+
// add syntax usage statistics only if build was successful
|
|
48
51
|
const statistics = this.collectSyntaxUsageStatistics(doc, this.services.Grammar);
|
|
49
52
|
validationResult.metadata[LangiumDocumentAnalyzer.METADATA_KEY] = {
|
|
50
53
|
value: {
|
|
51
54
|
oneofKind: 'syntaxStatisticValue',
|
|
52
|
-
syntaxStatisticValue: statistics
|
|
53
|
-
}
|
|
55
|
+
syntaxStatisticValue: statistics,
|
|
56
|
+
},
|
|
54
57
|
};
|
|
55
58
|
}
|
|
56
|
-
// make sure we fulfill the EvaluatorResultMsg interface
|
|
59
|
+
// make sure we fulfill the EvaluatorResultMsg interface
|
|
57
60
|
return {
|
|
58
61
|
...validationResult,
|
|
59
62
|
data: {
|
|
60
63
|
...validationResult.data,
|
|
61
|
-
diagnostics: validationResult.data.diagnostics.map(diagnostic => {
|
|
64
|
+
diagnostics: validationResult.data.diagnostics.map((diagnostic) => {
|
|
62
65
|
const code = typeof diagnostic.code === 'number' ? String(diagnostic.code) : diagnostic.code;
|
|
63
66
|
return {
|
|
64
67
|
...diagnostic,
|
|
65
|
-
code
|
|
68
|
+
code,
|
|
66
69
|
};
|
|
67
|
-
})
|
|
68
|
-
}
|
|
70
|
+
}),
|
|
71
|
+
},
|
|
69
72
|
};
|
|
70
73
|
}
|
|
71
74
|
collectSyntaxUsageStatistics(doc, grammar) {
|
|
@@ -78,11 +81,11 @@ export class LangiumDocumentAnalyzer extends LangiumEvaluator {
|
|
|
78
81
|
const isRuleExcluded = (ruleName) => ruleName === 'WS' || excludedRules.has(ruleName);
|
|
79
82
|
const allRules = includeImportedRules ? this.collectAllRules(grammar) : grammar.rules;
|
|
80
83
|
const ruleUsage = {};
|
|
81
|
-
//
|
|
84
|
+
// initialize rule usage map, excluding rules specified in excludeRules. Also skip entry rule.
|
|
82
85
|
for (const rule of allRules) {
|
|
83
86
|
if (!isRuleExcluded(rule.name)) {
|
|
84
|
-
if ((GrammarAST.isParserRule(rule) && rule.entry)
|
|
85
|
-
|
|
87
|
+
if ((GrammarAST.isParserRule(rule) && rule.entry) ||
|
|
88
|
+
(GrammarAST.isTerminalRule(rule) && rule.hidden && !includeHiddenRules)) {
|
|
86
89
|
continue;
|
|
87
90
|
}
|
|
88
91
|
ruleUsage[rule.name] = 0;
|
|
@@ -96,7 +99,7 @@ export class LangiumDocumentAnalyzer extends LangiumEvaluator {
|
|
|
96
99
|
}
|
|
97
100
|
};
|
|
98
101
|
if (grammarSource && GrammarAST.isRuleCall(grammarSource)) {
|
|
99
|
-
//
|
|
102
|
+
// for now handle only RuleCalls
|
|
100
103
|
addIfNotExcluded(grammarSource.rule.ref?.name ?? 'unknown');
|
|
101
104
|
}
|
|
102
105
|
else if (includeHiddenRules && cstNode.hidden && isLeafCstNode(cstNode)) {
|
|
@@ -108,7 +111,7 @@ export class LangiumDocumentAnalyzer extends LangiumEvaluator {
|
|
|
108
111
|
diversity = {
|
|
109
112
|
entropy: this.computeEntropy(ruleUsage),
|
|
110
113
|
giniCoefficient: this.computeGiniCoefficient(ruleUsage),
|
|
111
|
-
simpsonIndex: this.computeSimpsonIndex(ruleUsage)
|
|
114
|
+
simpsonIndex: this.computeSimpsonIndex(ruleUsage),
|
|
112
115
|
};
|
|
113
116
|
}
|
|
114
117
|
const coverage = this.computeCoverage(ruleUsage);
|
|
@@ -118,7 +121,7 @@ export class LangiumDocumentAnalyzer extends LangiumEvaluator {
|
|
|
118
121
|
* Computes coverage as percentage of used rules over all available rules
|
|
119
122
|
*/
|
|
120
123
|
computeCoverage(ruleUsage) {
|
|
121
|
-
const usedRules = Object.values(ruleUsage).filter(count => count > 0).length;
|
|
124
|
+
const usedRules = Object.values(ruleUsage).filter((count) => count > 0).length;
|
|
122
125
|
return usedRules > 0 ? (usedRules / Object.keys(ruleUsage).length) * 100 : 0;
|
|
123
126
|
}
|
|
124
127
|
/**
|
|
@@ -192,8 +195,14 @@ export class LangiumDocumentAnalyzer extends LangiumEvaluator {
|
|
|
192
195
|
return undefined;
|
|
193
196
|
}
|
|
194
197
|
collectAllRules(grammar) {
|
|
198
|
+
if (!this.importResolver) {
|
|
199
|
+
return grammar.rules;
|
|
200
|
+
}
|
|
195
201
|
try {
|
|
196
|
-
return grammar.rules.concat(
|
|
202
|
+
return grammar.rules.concat(this.importResolver
|
|
203
|
+
.resolveImports(grammar)
|
|
204
|
+
.map((g) => g.rules)
|
|
205
|
+
.flat());
|
|
197
206
|
}
|
|
198
207
|
catch (e) {
|
|
199
208
|
console.error('Error resolving imports: ', e);
|
|
@@ -207,8 +216,8 @@ export class LangiumDocumentAnalyzer extends LangiumEvaluator {
|
|
|
207
216
|
diversity: {
|
|
208
217
|
entropy: 0,
|
|
209
218
|
giniCoefficient: 0,
|
|
210
|
-
simpsonIndex: 0
|
|
211
|
-
}
|
|
219
|
+
simpsonIndex: 0,
|
|
220
|
+
},
|
|
212
221
|
};
|
|
213
222
|
}
|
|
214
223
|
}
|
|
@@ -226,6 +235,6 @@ const DEFAULT_OPTIONS = {
|
|
|
226
235
|
excludeRules: [],
|
|
227
236
|
includeImportedRules: true,
|
|
228
237
|
includeHiddenRules: true,
|
|
229
|
-
computeDiversity: true
|
|
238
|
+
computeDiversity: true,
|
|
230
239
|
};
|
|
231
240
|
//# sourceMappingURL=document-analyzer.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"document-analyzer.js","sourceRoot":"","sources":["../../src/analyzer/document-analyzer.ts"],"names":[],"mappings":"AAAA;;;;;;gFAMgF;AAEhF,OAAO,EAAE,QAAQ,EAAgB,UAAU,EAAwB,aAAa,EAAE,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"document-analyzer.js","sourceRoot":"","sources":["../../src/analyzer/document-analyzer.ts"],"names":[],"mappings":"AAAA;;;;;;gFAMgF;AAEhF,OAAO,EAAE,QAAQ,EAAgB,UAAU,EAAwB,aAAa,EAAE,MAAM,SAAS,CAAC;AAElG,OAAO,EAA0B,MAAM,oCAAoC,CAAC;AAC5E,OAAO,EAAwB,MAAM,2BAA2B,CAAC;AACjE,OAAO,EAAE,gBAAgB,EAAmC,MAAM,mCAAmC,CAAC;AACtG,OAAO,EAAE,kBAAkB,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAe1E;;GAEG;AACH,MAAM,OAAO,uBAAuD,SAAQ,gBAAmB;IAM3F;;;;;;;;;;;;;;OAcG;IACH,YAAY,QAAW,EAAE,kBAA4C,EAAE,EAAE,cAAsC;QAC3G,KAAK,CAAC,QAAQ,CAAC,CAAC;QAChB,IAAI,CAAC,eAAe,GAAG,EAAE,GAAG,eAAe,EAAE,GAAG,eAAe,EAAE,CAAC;QAClE,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;IACzC,CAAC;IAED;;;;;;;OAOG;IACH,gBAAgB,CACZ,GAAoB,EACpB,GAAsB;QAEtB,MAAM,gBAAgB,GAAG,KAAK,CAAC,gBAAgB,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;QAC1D,IACI,IAAI,CAAC,eAAe,CAAC,YAAY,KAAK,YAAY,CAAC,YAAY;YAC/D,gBAAgB,CAAC,IAAI;YACrB,gBAAgB,CAAC,IAAI,CAAC,QAAQ,KAAK,CAAC,EACtC,CAAC;YACC,2DAA2D;YAC3D,MAAM,UAAU,GAAG,IAAI,CAAC,4BAA4B,CAAC,GAAG,EAAE,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YACjF,gBAAgB,CAAC,QAAQ,CAAC,uBAAuB,CAAC,YAAY,CAAC,GAAG;gBAC9D,KAAK,EAAE;oBACH,SAAS,EAAE,sBAAsB;oBACjC,oBAAoB,EAAE,UAAU;iBACnC;aACJ,CAAC;QACN,CAAC;QACD,wDAAwD;QACxD,OAAO;YACH,GAAG,gBAAgB;YACnB,IAAI,EAAE;gBACF,GAAG,gBAAgB,CAAC,IAAI;gBACxB,WAAW,EAAE,gBAAgB,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,UAAU,EAAE,EAAE;oBAC9D,MAAM,IAAI,GAAG,OAAO,UAAU,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC;oBAC7F,OAAO;wBACH,GAAG,UAAU;wBACb,IAAI;qBACP,CAAC;gBACN,CAAC,CAAC;aACL;SACgE,CAAC;IAC1E,CAAC;IAED,4BAA4B,CAAC,GAAoB,EAAE,OAAgB;QAC/D,MAAM,WAAW,GAAG,GAAG,CAAC,WAAW,CAAC,KAAK,CAAC,QAAQ,CAAC;QACnD,IAAI,CAAC,WAAW,EAAE,CAAC;YACf,OAAO,IAAI,CAAC,0BAA0B,EAAE,CAAC;QAC7C,CAAC;QACD,MAAM,EAAE,oBAAoB,EAAE,YAAY,EAAE,gBAAgB,EAAE,kBAAkB,EAAE,GAAG,IAAI,CAAC,eAAe,CAAC;QAC1G,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC;QAC5C,MAAM,cAAc,GAAG,CAAC,QAAgB,EAAE,EAAE,CAAC,QAAQ,KAAK,IAAI,IAAI,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAE9F,MAAM,QAAQ,GAAG,oBAAoB,CAAC,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC;QACtF,MAAM,SAAS,GAA2B,EAAE,CAAC;QAC7C,8FAA8F;QAC9F,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;YAC1B,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC7B,IACI,CAAC,UAAU,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC;oBAC7C,CAAC,UAAU,CAAC,cAAc,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,kBAAkB,CAAC,EACzE,CAAC;oBACC,SAAS;gBACb,CAAC;gBACD,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC7B,CAAC;QACL,CAAC;QAED,KAAK,MAAM,OAAO,IAAI,QAAQ,CAAC,SAAS,CAAC,WAAW,CAAC,EAAE,CAAC;YACpD,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,CAAC;YAE5C,MAAM,gBAAgB,GAAG,CAAC,QAAgB,EAAE,EAAE;gBAC1C,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC;oBAC5B,SAAS,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;gBACzD,CAAC;YACL,CAAC,CAAC;YAEF,IAAI,aAAa,IAAI,UAAU,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;gBACxD,gCAAgC;gBAChC,gBAAgB,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,SAAS,CAAC,CAAC;YAChE,CAAC;iBAAM,IAAI,kBAAkB,IAAI,OAAO,CAAC,MAAM,IAAI,aAAa,CAAC,OAAO,CAAC,EAAE,CAAC;gBACxE,gBAAgB,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YAC7C,CAAC;QACL,CAAC;QAED,IAAI,SAAS,GAAG,EAAE,OAAO,EAAE,CAAC,EAAE,eAAe,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,CAAC;QACpE,IAAI,gBAAgB,EAAE,CAAC;YACnB,SAAS,GAAG;gBACR,OAAO,EAAE,IAAI,CAAC,cAAc,CAAC,SAAS,CAAC;gBACvC,eAAe,EAAE,IAAI,CAAC,sBAAsB,CAAC,SAAS,CAAC;gBACvD,YAAY,EAAE,IAAI,CAAC,mBAAmB,CAAC,SAAS,CAAC;aACpD,CAAC;QACN,CAAC;QACD,MAAM,QAAQ,GAAG,IAAI,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;QACjD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC;IAC9C,CAAC;IAED;;OAEG;IACH,eAAe,CAAC,SAAiC;QAC7C,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;QAC/E,OAAO,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACjF,CAAC;IAED;;;OAGG;IACH,cAAc,CAAC,SAAiC;QAC5C,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,KAAK,EAAE,CAAC,CAAC,CAAC;QACnF,IAAI,UAAU,KAAK,CAAC,EAAE,CAAC;YACnB,OAAO,CAAC,CAAC;QACb,CAAC;QAED,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,EAAE,CAAC;YAC3C,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;gBACZ,MAAM,WAAW,GAAG,KAAK,GAAG,UAAU,CAAC;gBACvC,OAAO,IAAI,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YACpD,CAAC;QACL,CAAC;QACD,OAAO,OAAO,CAAC;IACnB,CAAC;IAED;;;OAGG;IACH,sBAAsB,CAAC,SAAiC;QACpD,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC9D,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;QACxB,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YACV,OAAO,CAAC,CAAC;QACb,CAAC;QAED,MAAM,GAAG,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC;QACtD,IAAI,GAAG,KAAK,CAAC,EAAE,CAAC;YACZ,OAAO,CAAC,CAAC;QACb,CAAC;QAED,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACzB,SAAS,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACnD,CAAC;QACD,OAAO,SAAS,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC;IACjC,CAAC;IAED;;;OAGG;IACH,mBAAmB,CAAC,SAAiC;QACjD,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,KAAK,EAAE,CAAC,CAAC,CAAC;QACnF,IAAI,UAAU,KAAK,CAAC,EAAE,CAAC;YACnB,OAAO,CAAC,CAAC;QACb,CAAC;QAED,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,EAAE,CAAC;YAC3C,MAAM,WAAW,GAAG,KAAK,GAAG,UAAU,CAAC;YACvC,GAAG,IAAI,WAAW,GAAG,WAAW,CAAC;QACrC,CAAC;QAED,OAAO,CAAC,GAAG,GAAG,CAAC,CAAC,kCAAkC;IACtD,CAAC;IAED;;;;OAIG;IACH,2BAA2B,CAAC,MAA4C;QACpE,MAAM,QAAQ,GAAG,MAAM,EAAE,QAAQ,CAAC;QAClC,IAAI,QAAQ,IAAI,QAAQ,CAAC,uBAAuB,CAAC,YAAY,CAAC,EAAE,CAAC;YAC7D,MAAM,KAAK,GACP,QAAQ,CAAC,uBAAuB,CAAC,YAAY,CAGhD,CAAC,KAAK,CAAC;YACR,IAAI,KAAK,CAAC,SAAS,KAAK,sBAAsB,EAAE,CAAC;gBAC7C,OAAO,KAAK,CAAC,oBAAoB,CAAC;YACtC,CAAC;YACD,OAAO,SAAS,CAAC;QACrB,CAAC;QACD,OAAO,SAAS,CAAC;IACrB,CAAC;IAES,eAAe,CAAC,OAAgB;QACtC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,CAAC;YACvB,OAAO,OAAO,CAAC,KAAK,CAAC;QACzB,CAAC;QACD,IAAI,CAAC;YACD,OAAO,OAAO,CAAC,KAAK,CAAC,MAAM,CACvB,IAAI,CAAC,cAAc;iBACd,cAAc,CAAC,OAAO,CAAC;iBACvB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;iBACnB,IAAI,EAAE,CACd,CAAC;QACN,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACT,OAAO,CAAC,KAAK,CAAC,2BAA2B,EAAE,CAAC,CAAC,CAAC;YAC9C,OAAO,EAAE,CAAC;QACd,CAAC;IACL,CAAC;IAES,0BAA0B;QAChC,OAAO;YACH,SAAS,EAAE,EAAE;YACb,QAAQ,EAAE,CAAC;YACX,SAAS,EAAE;gBACP,OAAO,EAAE,CAAC;gBACV,eAAe,EAAE,CAAC;gBAClB,YAAY,EAAE,CAAC;aAClB;SACJ,CAAC;IACN,CAAC;;AAhPsB,oCAAY,GAAG,mBAAmB,CAAC;AAmP9D;;GAEG;AACH,MAAM,CAAN,IAAY,YAGX;AAHD,WAAY,YAAY;IACpB,2BAAW,CAAA;IACX,6CAA6B,CAAA;AACjC,CAAC,EAHW,YAAY,KAAZ,YAAY,QAGvB;AAwBD,MAAM,eAAe,GAAoB;IACrC,YAAY,EAAE,YAAY,CAAC,GAAG;IAC9B,YAAY,EAAE,EAAE;IAChB,oBAAoB,EAAE,IAAI;IAC1B,kBAAkB,EAAE,IAAI;IACxB,gBAAgB,EAAE,IAAI;CACzB,CAAC"}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Testing API for langium-ai evaluations
|
|
3
|
+
*
|
|
4
|
+
* Provides a vitest-style API for defining evaluation test cases.
|
|
5
|
+
*/
|
|
6
|
+
import type { EvaluatorResult } from '../evaluator/evaluator.js';
|
|
7
|
+
/**
|
|
8
|
+
* Standard data that can be expected from a case
|
|
9
|
+
*/
|
|
10
|
+
export type EvaluationData = {
|
|
11
|
+
/**
|
|
12
|
+
* Normalized score between 0 and 1 (0 = complete failure, 1 = full pass)
|
|
13
|
+
*/
|
|
14
|
+
score: number;
|
|
15
|
+
/**
|
|
16
|
+
* Whether this case was skipped (via .skip() or .only() filtering)
|
|
17
|
+
*/
|
|
18
|
+
skipped?: boolean;
|
|
19
|
+
/**
|
|
20
|
+
* When score is 0, expect an error here
|
|
21
|
+
*/
|
|
22
|
+
error?: Error | string;
|
|
23
|
+
};
|
|
24
|
+
/**
|
|
25
|
+
* Evaluation case result from running the lai cli
|
|
26
|
+
*
|
|
27
|
+
* A modified version of the baseline evaluator result, to accomodate for additional
|
|
28
|
+
* requisite properties (certain metadat for cases + suites & a 'pass' result)
|
|
29
|
+
*/
|
|
30
|
+
export type EvaluationCaseResult = EvaluatorResult<EvaluationData> & {
|
|
31
|
+
metadata: {
|
|
32
|
+
/**
|
|
33
|
+
* Containing evaluation file for the associated case
|
|
34
|
+
*/
|
|
35
|
+
evalFile: string;
|
|
36
|
+
/**
|
|
37
|
+
* Name of the suite that we ran under
|
|
38
|
+
*/
|
|
39
|
+
suiteName: string;
|
|
40
|
+
/**
|
|
41
|
+
* Evaluation case name
|
|
42
|
+
*/
|
|
43
|
+
caseName: string;
|
|
44
|
+
/**
|
|
45
|
+
* Duration of evaluation case from start to finish
|
|
46
|
+
*/
|
|
47
|
+
duration: number;
|
|
48
|
+
};
|
|
49
|
+
};
|
|
50
|
+
export interface EvalContext {
|
|
51
|
+
systemPrompt: string;
|
|
52
|
+
project: {
|
|
53
|
+
name: string;
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Evalutor function definition, takes some context & produce evaluation data
|
|
58
|
+
*/
|
|
59
|
+
export type EvaluatorFunction = (ctx: EvalContext) => Promise<EvaluationData>;
|
|
60
|
+
interface EvalDefinition {
|
|
61
|
+
name: string;
|
|
62
|
+
fn: EvaluatorFunction;
|
|
63
|
+
skip?: boolean;
|
|
64
|
+
only?: boolean;
|
|
65
|
+
}
|
|
66
|
+
interface EvalSuite {
|
|
67
|
+
name: string;
|
|
68
|
+
evaluations: EvalDefinition[];
|
|
69
|
+
beforeAllHook?: () => void | Promise<void>;
|
|
70
|
+
afterAllHook?: () => void | Promise<void>;
|
|
71
|
+
beforeEachHook?: () => void | Promise<void>;
|
|
72
|
+
afterEachHook?: () => void | Promise<void>;
|
|
73
|
+
skip?: boolean;
|
|
74
|
+
only?: boolean;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Define a test suite
|
|
78
|
+
*/
|
|
79
|
+
export declare function describe(name: string, fn: () => void): void;
|
|
80
|
+
export declare namespace describe {
|
|
81
|
+
var skip: (name: string, fn: () => void) => void;
|
|
82
|
+
var only: (name: string, fn: () => void) => void;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Define an evaluation case
|
|
86
|
+
*/
|
|
87
|
+
export declare function evaluation(name: string, fn: EvaluatorFunction): void;
|
|
88
|
+
export declare namespace evaluation {
|
|
89
|
+
var skip: (name: string, fn: EvaluatorFunction) => void;
|
|
90
|
+
var only: (name: string, fn: EvaluatorFunction) => void;
|
|
91
|
+
var each: <T>(cases: T[]) => (name: string, fn: (data: T) => EvaluatorFunction) => void;
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Define a hook that runs before each evaluation in the suite
|
|
95
|
+
*/
|
|
96
|
+
export declare function beforeEach(fn: () => void | Promise<void>): void;
|
|
97
|
+
/**
|
|
98
|
+
* Define a hook that runs after each evaluation in the suite
|
|
99
|
+
*/
|
|
100
|
+
export declare function afterEach(fn: () => void | Promise<void>): void;
|
|
101
|
+
/**
|
|
102
|
+
* Define a hook that runs once before all evaluations in the suite
|
|
103
|
+
*/
|
|
104
|
+
export declare function beforeAll(fn: () => void | Promise<void>): void;
|
|
105
|
+
/**
|
|
106
|
+
* Define a hook that runs once after all evaluations in the suite
|
|
107
|
+
*/
|
|
108
|
+
export declare function afterAll(fn: () => void | Promise<void>): void;
|
|
109
|
+
/**
|
|
110
|
+
* Export collected suites for runner
|
|
111
|
+
*/
|
|
112
|
+
export declare function getCollectedSuites(): EvalSuite[];
|
|
113
|
+
/**
|
|
114
|
+
* Clear all collected suites
|
|
115
|
+
*/
|
|
116
|
+
export declare function clearSuites(): void;
|
|
117
|
+
export { runEvalFile, type EvalProgressCallback } from './runner.js';
|
|
118
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/evals/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAC;AAEjE;;GAEG;AACH,MAAM,MAAM,cAAc,GAAG;IACzB;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;OAEG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAElB;;OAEG;IACH,KAAK,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC;CAC1B,CAAC;AAEF;;;;;GAKG;AACH,MAAM,MAAM,oBAAoB,GAAG,eAAe,CAAC,cAAc,CAAC,GAAG;IACjE,QAAQ,EAAE;QACN;;WAEG;QACH,QAAQ,EAAE,MAAM,CAAC;QAEjB;;WAEG;QACH,SAAS,EAAE,MAAM,CAAC;QAElB;;WAEG;QACH,QAAQ,EAAE,MAAM,CAAC;QAEjB;;WAEG;QACH,QAAQ,EAAE,MAAM,CAAC;KACpB,CAAC;CACL,CAAC;AAEF,MAAM,WAAW,WAAW;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;CAC7B;AAED;;GAEG;AACH,MAAM,MAAM,iBAAiB,GAAG,CAAC,GAAG,EAAE,WAAW,KAAK,OAAO,CAAC,cAAc,CAAC,CAAC;AAG9E,UAAU,cAAc;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,EAAE,EAAE,iBAAiB,CAAC;IACtB,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,IAAI,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,UAAU,SAAS;IACf,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,cAAc,EAAE,CAAC;IAC9B,aAAa,CAAC,EAAE,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC3C,YAAY,CAAC,EAAE,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC1C,cAAc,CAAC,EAAE,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC5C,aAAa,CAAC,EAAE,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC3C,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,IAAI,CAAC,EAAE,OAAO,CAAC;CAClB;AAKD;;GAEG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,IAAI,GAAG,IAAI,CAM3D;yBANe,QAAQ;qBAWQ,MAAM,MAAM,MAAM,IAAI,KAAG,IAAI;qBAW7B,MAAM,MAAM,MAAM,IAAI,KAAG,IAAI;;AAQ7D;;GAEG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,iBAAiB,GAAG,IAAI,CAKpE;yBALe,UAAU;qBAUQ,MAAM,MAAM,iBAAiB,KAAG,IAAI;qBAUpC,MAAM,MAAM,iBAAiB,KAAG,IAAI;eAkB1C,CAAC,SAAS,CAAC,EAAE,KAAG,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,iBAAiB,KAAK,IAAI;;AAwCtG;;GAEG;AACH,wBAAgB,UAAU,CAAC,EAAE,EAAE,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAK/D;AAED;;GAEG;AACH,wBAAgB,SAAS,CAAC,EAAE,EAAE,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAK9D;AAED;;GAEG;AACH,wBAAgB,SAAS,CAAC,EAAE,EAAE,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAK9D;AAED;;GAEG;AACH,wBAAgB,QAAQ,CAAC,EAAE,EAAE,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAK7D;AAED;;GAEG;AACH,wBAAgB,kBAAkB,IAAI,SAAS,EAAE,CAIhD;AAED;;GAEG;AACH,wBAAgB,WAAW,IAAI,IAAI,CAGlC;AAGD,OAAO,EAAE,WAAW,EAAE,KAAK,oBAAoB,EAAE,MAAM,aAAa,CAAC"}
|