promptfoo 0.7.0 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +123 -73
- package/dist/assertions.d.ts +4 -10
- package/dist/assertions.d.ts.map +1 -1
- package/dist/assertions.js +126 -20
- package/dist/assertions.js.map +1 -1
- package/dist/cache.d.ts.map +1 -1
- package/dist/cache.js.map +1 -1
- package/dist/evaluator.d.ts +2 -2
- package/dist/evaluator.d.ts.map +1 -1
- package/dist/evaluator.js +72 -41
- package/dist/evaluator.js.map +1 -1
- package/dist/index.d.ts +6 -4
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +8 -21
- package/dist/index.js.map +1 -1
- package/dist/main.js +89 -81
- package/dist/main.js.map +1 -1
- package/dist/onboarding.d.ts +4 -0
- package/dist/onboarding.d.ts.map +1 -0
- package/dist/onboarding.js +63 -0
- package/dist/onboarding.js.map +1 -0
- package/dist/providers.d.ts +1 -0
- package/dist/providers.d.ts.map +1 -1
- package/dist/providers.js +11 -1
- package/dist/providers.js.map +1 -1
- package/dist/types.d.ts +41 -9
- package/dist/types.d.ts.map +1 -1
- package/dist/util.d.ts +8 -4
- package/dist/util.d.ts.map +1 -1
- package/dist/util.js +91 -3
- package/dist/util.js.map +1 -1
- package/dist/web/server.d.ts.map +1 -1
- package/dist/web/server.js +0 -11
- package/dist/web/server.js.map +1 -1
- package/package.json +2 -1
- package/src/assertions.ts +141 -28
- package/src/cache.ts +0 -1
- package/src/evaluator.ts +88 -44
- package/src/index.ts +14 -26
- package/src/main.ts +118 -100
- package/src/onboarding.ts +61 -0
- package/src/providers.ts +9 -0
- package/src/types.ts +90 -12
- package/src/util.ts +107 -5
- package/src/web/server.ts +0 -18
package/README.md
CHANGED
|
@@ -32,21 +32,21 @@ It works on the command line too:
|
|
|
32
32
|
|
|
33
33
|
Start by establishing a handful of test cases - core use cases and failure cases that you want to ensure your prompt can handle.
|
|
34
34
|
|
|
35
|
-
As you explore modifications to the prompt, use `promptfoo eval` to rate all outputs.
|
|
35
|
+
As you explore modifications to the prompt, use `promptfoo eval` to rate all outputs. This ensures the prompt is actually improving overall.
|
|
36
36
|
|
|
37
37
|
As you collect more examples and establish a user feedback loop, continue to build the pool of test cases.
|
|
38
38
|
|
|
39
39
|
<img width="772" alt="LLM ops" src="https://github.com/typpo/promptfoo/assets/310310/cf0461a7-2832-4362-9fbb-4ebd911d06ff">
|
|
40
40
|
|
|
41
|
-
## Usage
|
|
41
|
+
## Usage
|
|
42
42
|
|
|
43
|
-
To get started, run
|
|
43
|
+
To get started, run this command:
|
|
44
44
|
|
|
45
45
|
```
|
|
46
46
|
npx promptfoo init
|
|
47
47
|
```
|
|
48
48
|
|
|
49
|
-
This will create some
|
|
49
|
+
This will create some placeholders in your current directory: `prompts.txt` and `promptfooconfig.yaml`.
|
|
50
50
|
|
|
51
51
|
After editing the prompts and variables to your liking, run the eval command to kick off an evaluation:
|
|
52
52
|
|
|
@@ -54,20 +54,75 @@ After editing the prompts and variables to your liking, run the eval command to
|
|
|
54
54
|
npx promptfoo eval
|
|
55
55
|
```
|
|
56
56
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
57
|
+
### Configuration
|
|
58
|
+
|
|
59
|
+
The YAML configuration format runs each prompt through a series of example inputs (aka "test case") and checks if they meet requirements (aka "assert").
|
|
60
|
+
|
|
61
|
+
See the [Configuration docs](https://www.promptfoo.dev/docs/configuration/guide) for a detailed guide.
|
|
62
|
+
|
|
63
|
+
```yaml
|
|
64
|
+
prompts: [prompts.txt]
|
|
65
|
+
providers: [openai:gpt-3.5-turbo]
|
|
66
|
+
tests:
|
|
67
|
+
- description: First test case - automatic review
|
|
68
|
+
vars:
|
|
69
|
+
var1: first variable's value
|
|
70
|
+
var2: another value
|
|
71
|
+
var3: some other value
|
|
72
|
+
assert:
|
|
73
|
+
- type: equality
|
|
74
|
+
value: expected LLM output goes here
|
|
75
|
+
- type: function
|
|
76
|
+
value: output.includes('some text')
|
|
77
|
+
|
|
78
|
+
- description: Second test case - manual review
|
|
79
|
+
# Test cases don't need assertions if you prefer to review the output yourself
|
|
80
|
+
vars:
|
|
81
|
+
var1: new value
|
|
82
|
+
var2: another value
|
|
83
|
+
var3: third value
|
|
84
|
+
|
|
85
|
+
- description: Third test case - other types of automatic review
|
|
86
|
+
vars:
|
|
87
|
+
var1: yet another value
|
|
88
|
+
var2: and another
|
|
89
|
+
var3: dear llm, please output your response in json format
|
|
90
|
+
assert:
|
|
91
|
+
- type: contains-json
|
|
92
|
+
- type: similarity
|
|
93
|
+
value: ensures that output is semantically similar to this text
|
|
94
|
+
- type: llm-rubric
|
|
95
|
+
value: ensure that output contains a reference to X
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Tests on spreadsheet
|
|
99
|
+
|
|
100
|
+
Some people prefer to configure their LLM tests in a CSV. In that case, the config is pretty simple:
|
|
101
|
+
|
|
102
|
+
```yaml
|
|
103
|
+
prompts: [prompts.txt]
|
|
104
|
+
providers: [openai:gpt-3.5-turbo]
|
|
105
|
+
tests: tests.csv
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
See [example CSV](https://github.com/typpo/promptfoo/blob/main/examples/simple-test/tests.csv).
|
|
109
|
+
|
|
110
|
+
### Command-line
|
|
111
|
+
|
|
112
|
+
If you're looking to customize your usage, you have a wide set of parameters at your disposal.
|
|
113
|
+
|
|
114
|
+
| Option | Description |
|
|
115
|
+
| ----------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
116
|
+
| `-p, --prompts <paths...>` | Paths to [prompt files](https://promptfoo.dev/docs/configuration/parameters#prompt-files), directory, or glob |
|
|
117
|
+
| `-r, --providers <name or path...>` | One of: openai:chat, openai:completion, openai:model-name, localai:chat:model-name, localai:completion:model-name. See [API providers](https://promptfoo.dev/docs/configuration/providers) |
|
|
118
|
+
| `-o, --output <path>` | Path to [output file](https://promptfoo.dev/docs/configuration/parameters#output-file) (csv, json, yaml, html) |
|
|
119
|
+
| `--tests <path>` | Path to [external test file](https://promptfoo.dev/docs/configurationexpected-outputsassertions#load-an-external-tests-file) |
|
|
120
|
+
| `-c, --config <path>` | Path to [configuration file](https://promptfoo.dev/docs/configuration/guide). `promptfooconfig.js/json/yaml` is automatically loaded if present |
|
|
121
|
+
| `-j, --max-concurrency <number>` | Maximum number of concurrent API calls |
|
|
122
|
+
| `--table-cell-max-length <number>` | Truncate console table cells to this length |
|
|
123
|
+
| `--prompt-prefix <path>` | This prefix is prepended to every prompt |
|
|
124
|
+
| `--prompt-suffix <path>` | This suffix is append to every prompt |
|
|
125
|
+
| `--grader` | [Provider](https://promptfoo.dev/docs/configuration/providers) that will conduct the evaluation, if you are [using LLM to grade your output](https://promptfoo.dev/docs/configuration/expected-outputs#llm-evaluation) |
|
|
71
126
|
|
|
72
127
|
After running an eval, you may optionally use the `view` command to open the web viewer:
|
|
73
128
|
|
|
@@ -79,10 +134,10 @@ npx promptfoo view
|
|
|
79
134
|
|
|
80
135
|
#### Prompt quality
|
|
81
136
|
|
|
82
|
-
In this example, we evaluate whether adding adjectives to the personality of an assistant bot affects the responses:
|
|
137
|
+
In [this example](https://github.com/typpo/promptfoo/tree/main/examples/assistant-cli), we evaluate whether adding adjectives to the personality of an assistant bot affects the responses:
|
|
83
138
|
|
|
84
139
|
```bash
|
|
85
|
-
npx promptfoo eval -p prompts.txt -
|
|
140
|
+
npx promptfoo eval -p prompts.txt -r openai:gpt-3.5-turbo -t tests.csv
|
|
86
141
|
```
|
|
87
142
|
|
|
88
143
|
<!--
|
|
@@ -93,15 +148,13 @@ npx promptfoo eval -p prompts.txt -v vars.csv -r openai:gpt-3.5-turbo
|
|
|
93
148
|
|
|
94
149
|
This command will evaluate the prompts in `prompts.txt`, substituing the variable values from `vars.csv`, and output results in your terminal.
|
|
95
150
|
|
|
96
|
-
Have a look at the setup and full output [here](https://github.com/typpo/promptfoo/tree/main/examples/assistant-cli).
|
|
97
|
-
|
|
98
151
|
You can also output a nice [spreadsheet](https://docs.google.com/spreadsheets/d/1nanoj3_TniWrDl1Sj-qYqIMD6jwm5FBy15xPFdUTsmI/edit?usp=sharing), [JSON](https://github.com/typpo/promptfoo/blob/main/examples/simple-cli/output.json), YAML, or an HTML file:
|
|
99
152
|
|
|
100
153
|

|
|
101
154
|
|
|
102
155
|
#### Model quality
|
|
103
156
|
|
|
104
|
-
In
|
|
157
|
+
In the [next example](https://github.com/typpo/promptfoo/tree/main/examples/gpt-3.5-vs-4), we evaluate the difference between GPT 3 and GPT 4 outputs for a given prompt:
|
|
105
158
|
|
|
106
159
|
```bash
|
|
107
160
|
npx promptfoo eval -p prompts.txt -r openai:gpt-3.5-turbo openai:gpt-4 -o output.html
|
|
@@ -111,19 +164,46 @@ Produces this HTML table:
|
|
|
111
164
|
|
|
112
165
|

|
|
113
166
|
|
|
114
|
-
Full setup and output [here](https://github.com/typpo/promptfoo/tree/main/examples/gpt-3.5-vs-4).
|
|
115
|
-
|
|
116
167
|
## Usage (node package)
|
|
117
168
|
|
|
118
169
|
You can also use `promptfoo` as a library in your project by importing the `evaluate` function. The function takes the following parameters:
|
|
119
170
|
|
|
120
|
-
- `
|
|
121
|
-
- `options`: the prompts and variables you want to test:
|
|
171
|
+
- `testSuite`: the Javascript equivalent of the promptfooconfig.yaml
|
|
122
172
|
|
|
123
173
|
```typescript
|
|
124
|
-
{
|
|
125
|
-
|
|
174
|
+
interface TestSuiteConfig {
|
|
175
|
+
providers: string[]; // Valid provider name (e.g. openai:gpt-3.5-turbo)
|
|
176
|
+
prompts: string[]; // List of prompts
|
|
177
|
+
tests: string | TestCase[]; // Path to a CSV file, or list of test cases
|
|
178
|
+
|
|
179
|
+
defaultTest?: Omit<TestCase, 'description'>; // Optional: add default vars and assertions on test case
|
|
180
|
+
outputPath?: string; // Optional: write results to file
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
interface TestCase {
|
|
184
|
+
description?: string;
|
|
126
185
|
vars?: Record<string, string>;
|
|
186
|
+
assert?: Assertion[];
|
|
187
|
+
|
|
188
|
+
prompt?: PromptConfig;
|
|
189
|
+
grading?: GradingConfig;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
interface Assertion {
|
|
193
|
+
type: 'equality' | 'is-json' | 'contains-json' | 'function' | 'similarity' | 'llm-rubric';
|
|
194
|
+
value?: string;
|
|
195
|
+
threshold?: number; // For similarity assertions
|
|
196
|
+
provider?: ApiProvider; // For assertions that require an LLM provider
|
|
197
|
+
}
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
- `options`: misc options related to how the tests are run
|
|
201
|
+
|
|
202
|
+
```typescript
|
|
203
|
+
interface EvaluateOptions {
|
|
204
|
+
maxConcurrency?: number;
|
|
205
|
+
showProgressBar?: boolean;
|
|
206
|
+
generateSuggestions?: boolean;
|
|
127
207
|
}
|
|
128
208
|
```
|
|
129
209
|
|
|
@@ -134,61 +214,31 @@ You can also use `promptfoo` as a library in your project by importing the `eval
|
|
|
134
214
|
```js
|
|
135
215
|
import promptfoo from 'promptfoo';
|
|
136
216
|
|
|
137
|
-
const
|
|
217
|
+
const results = await promptfoo.evaluate({
|
|
138
218
|
prompts: ['Rephrase this in French: {{body}}', 'Rephrase this like a pirate: {{body}}'],
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
(async () => {
|
|
143
|
-
const summary = await promptfoo.evaluate('openai:gpt-3.5-turbo', options);
|
|
144
|
-
console.log(summary);
|
|
145
|
-
})();
|
|
146
|
-
```
|
|
147
|
-
|
|
148
|
-
This code imports the `promptfoo` library, defines the evaluation options, and then calls the `evaluate` function with these options. The results are logged to the console:
|
|
149
|
-
|
|
150
|
-
```js
|
|
151
|
-
{
|
|
152
|
-
"results": [
|
|
219
|
+
providers: ['openai:gpt-3.5-turbo'],
|
|
220
|
+
tests: [
|
|
153
221
|
{
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
"display": "Rephrase this in French: {{body}}"
|
|
222
|
+
vars: {
|
|
223
|
+
body: 'Hello world',
|
|
157
224
|
},
|
|
158
|
-
|
|
159
|
-
|
|
225
|
+
},
|
|
226
|
+
{
|
|
227
|
+
vars: {
|
|
228
|
+
body: "I'm hungry",
|
|
160
229
|
},
|
|
161
|
-
"response": {
|
|
162
|
-
"output": "Bonjour le monde",
|
|
163
|
-
"tokenUsage": {
|
|
164
|
-
"total": 19,
|
|
165
|
-
"prompt": 16,
|
|
166
|
-
"completion": 3
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
230
|
},
|
|
170
|
-
// ...
|
|
171
231
|
],
|
|
172
|
-
|
|
173
|
-
"successes": 4,
|
|
174
|
-
"failures": 0,
|
|
175
|
-
"tokenUsage": {
|
|
176
|
-
"total": 120,
|
|
177
|
-
"prompt": 72,
|
|
178
|
-
"completion": 48
|
|
179
|
-
}
|
|
180
|
-
},
|
|
181
|
-
"table": [
|
|
182
|
-
// ...
|
|
183
|
-
]
|
|
184
|
-
}
|
|
232
|
+
});
|
|
185
233
|
```
|
|
186
234
|
|
|
187
|
-
|
|
235
|
+
This code imports the `promptfoo` library, defines the evaluation options, and then calls the `evaluate` function with these options.
|
|
236
|
+
|
|
237
|
+
See the full example [here](https://github.com/typpo/promptfoo/tree/main/examples/simple-import), which includes an example results object.
|
|
188
238
|
|
|
189
239
|
## Configuration
|
|
190
240
|
|
|
191
|
-
- **[
|
|
241
|
+
- **[Main guide](https://promptfoo.dev/docs/configuration/guide)**: Learn about how to configure your YAML file, setup prompt files, etc.
|
|
192
242
|
- **[Configuring test cases](https://promptfoo.dev/docs/configuration/expected-outputs)**: Learn more about how to configure expected outputs and test assertions.
|
|
193
243
|
|
|
194
244
|
## Installation
|
package/dist/assertions.d.ts
CHANGED
|
@@ -1,15 +1,9 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
reason: string;
|
|
5
|
-
tokensUsed: TokenUsage;
|
|
6
|
-
}
|
|
7
|
-
export declare function matchesExpectedValue(expected: string, output: string, options: EvaluateOptions): Promise<{
|
|
8
|
-
pass: boolean;
|
|
9
|
-
reason?: string;
|
|
10
|
-
}>;
|
|
1
|
+
import type { Assertion, GradingConfig, TestCase, GradingResult } from './types.js';
|
|
2
|
+
export declare function runAssertions(test: TestCase, output: string): Promise<GradingResult>;
|
|
3
|
+
export declare function runAssertion(assertion: Assertion, test: TestCase, output: string): Promise<GradingResult>;
|
|
11
4
|
export declare function matchesSimilarity(expected: string, output: string, threshold: number): Promise<GradingResult>;
|
|
12
5
|
export declare function matchesLlmRubric(expected: string, output: string, options?: GradingConfig): Promise<GradingResult>;
|
|
6
|
+
export declare function assertionFromString(expected: string): Assertion;
|
|
13
7
|
declare const _default: {
|
|
14
8
|
matchesSimilarity: typeof matchesSimilarity;
|
|
15
9
|
matchesLlmRubric: typeof matchesLlmRubric;
|
package/dist/assertions.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"assertions.d.ts","sourceRoot":"","sources":["../src/assertions.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"assertions.d.ts","sourceRoot":"","sources":["../src/assertions.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,SAAS,EAAE,aAAa,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAMpF,wBAAsB,aAAa,CAAC,IAAI,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC,CAyB1F;AAED,wBAAsB,YAAY,CAChC,SAAS,EAAE,SAAS,EACpB,IAAI,EAAE,QAAQ,EACd,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,aAAa,CAAC,CA2DxB;AAoBD,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,aAAa,CAAC,CA0CxB;AAED,wBAAsB,gBAAgB,CACpC,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,aAAa,CAAC,CAgDxB;AAED,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,GAAG,SAAS,CAmC/D;;;;;AAED,wBAGE"}
|
package/dist/assertions.js
CHANGED
|
@@ -3,7 +3,8 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.matchesLlmRubric = exports.matchesSimilarity = exports.
|
|
6
|
+
exports.assertionFromString = exports.matchesLlmRubric = exports.matchesSimilarity = exports.runAssertion = exports.runAssertions = void 0;
|
|
7
|
+
const tiny_invariant_1 = __importDefault(require("tiny-invariant"));
|
|
7
8
|
const nunjucks_1 = __importDefault(require("nunjucks"));
|
|
8
9
|
const openai_js_1 = require("./providers/openai.js");
|
|
9
10
|
const util_js_1 = require("./util.js");
|
|
@@ -11,32 +12,100 @@ const providers_js_1 = require("./providers.js");
|
|
|
11
12
|
const prompts_js_1 = require("./prompts.js");
|
|
12
13
|
const SIMILAR_REGEX = /similar(?::|\((\d+(\.\d+)?)\):)/;
|
|
13
14
|
const DEFAULT_SEMANTIC_SIMILARITY_THRESHOLD = 0.8;
|
|
14
|
-
async function
|
|
15
|
-
const
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
15
|
+
async function runAssertions(test, output) {
|
|
16
|
+
const tokensUsed = {
|
|
17
|
+
total: 0,
|
|
18
|
+
prompt: 0,
|
|
19
|
+
completion: 0,
|
|
20
|
+
};
|
|
21
|
+
if (!test.assert) {
|
|
22
|
+
return { pass: true, reason: 'No assertions', tokensUsed };
|
|
20
23
|
}
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
24
|
+
for (const assertion of test.assert) {
|
|
25
|
+
const result = await runAssertion(assertion, test, output);
|
|
26
|
+
if (!result.pass) {
|
|
27
|
+
return result;
|
|
28
|
+
}
|
|
29
|
+
if (result.tokensUsed) {
|
|
30
|
+
tokensUsed.total += result.tokensUsed.total;
|
|
31
|
+
tokensUsed.prompt += result.tokensUsed.prompt;
|
|
32
|
+
tokensUsed.completion += result.tokensUsed.completion;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
return { pass: true, reason: 'All assertions passed', tokensUsed };
|
|
36
|
+
}
|
|
37
|
+
exports.runAssertions = runAssertions;
|
|
38
|
+
async function runAssertion(assertion, test, output) {
|
|
39
|
+
let pass = false;
|
|
40
|
+
if (assertion.type === 'equals') {
|
|
41
|
+
pass = assertion.value === output;
|
|
42
|
+
return {
|
|
43
|
+
pass,
|
|
44
|
+
reason: pass ? 'Assertion passed' : `Expected output "${assertion.value}"`,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
if (assertion.type === 'is-json') {
|
|
48
|
+
try {
|
|
49
|
+
JSON.parse(output);
|
|
50
|
+
return { pass: true, reason: 'Assertion passed' };
|
|
51
|
+
}
|
|
52
|
+
catch (err) {
|
|
53
|
+
return {
|
|
54
|
+
pass: false,
|
|
55
|
+
reason: `Expected output to be valid JSON, but it isn't.\nError: ${err}`,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
27
58
|
}
|
|
28
|
-
|
|
29
|
-
|
|
59
|
+
if (assertion.type === 'contains-json') {
|
|
60
|
+
const pass = containsJSON(output);
|
|
61
|
+
return {
|
|
62
|
+
pass,
|
|
63
|
+
reason: pass ? 'Assertion passed' : 'Expected output to contain valid JSON',
|
|
64
|
+
};
|
|
30
65
|
}
|
|
31
|
-
|
|
32
|
-
|
|
66
|
+
if (assertion.type === 'javascript') {
|
|
67
|
+
try {
|
|
68
|
+
const customFunction = new Function('output', `return ${assertion.value}`);
|
|
69
|
+
pass = customFunction(output);
|
|
70
|
+
}
|
|
71
|
+
catch (err) {
|
|
72
|
+
return {
|
|
73
|
+
pass: false,
|
|
74
|
+
reason: `Custom function threw error: ${err.message}`,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
33
77
|
return {
|
|
34
78
|
pass,
|
|
35
|
-
reason: pass ?
|
|
79
|
+
reason: pass ? 'Assertion passed' : `Custom function returned false`,
|
|
36
80
|
};
|
|
37
81
|
}
|
|
82
|
+
if (assertion.type === 'similar') {
|
|
83
|
+
(0, tiny_invariant_1.default)(assertion.value, 'Similarity assertion must have a string value');
|
|
84
|
+
(0, tiny_invariant_1.default)(assertion.threshold, 'Similarity assertion must have a threshold');
|
|
85
|
+
return matchesSimilarity(assertion.value, output, assertion.threshold);
|
|
86
|
+
}
|
|
87
|
+
if (assertion.type === 'llm-rubric') {
|
|
88
|
+
(0, tiny_invariant_1.default)(assertion.value, 'Similarity assertion must have a string value');
|
|
89
|
+
return matchesLlmRubric(assertion.value, output, test.options);
|
|
90
|
+
}
|
|
91
|
+
throw new Error('Unknown assertion type: ' + assertion.type);
|
|
92
|
+
}
|
|
93
|
+
exports.runAssertion = runAssertion;
|
|
94
|
+
function containsJSON(str) {
|
|
95
|
+
// Regular expression to check for JSON-like pattern
|
|
96
|
+
const jsonPattern = /({[\s\S]*}|\[[\s\S]*])/;
|
|
97
|
+
const match = str.match(jsonPattern);
|
|
98
|
+
if (!match) {
|
|
99
|
+
return false;
|
|
100
|
+
}
|
|
101
|
+
try {
|
|
102
|
+
JSON.parse(match[0]);
|
|
103
|
+
return true;
|
|
104
|
+
}
|
|
105
|
+
catch (error) {
|
|
106
|
+
return false;
|
|
107
|
+
}
|
|
38
108
|
}
|
|
39
|
-
exports.matchesExpectedValue = matchesExpectedValue;
|
|
40
109
|
async function matchesSimilarity(expected, output, threshold) {
|
|
41
110
|
const expectedEmbedding = await openai_js_1.DefaultEmbeddingProvider.callEmbeddingApi(expected);
|
|
42
111
|
const outputEmbedding = await openai_js_1.DefaultEmbeddingProvider.callEmbeddingApi(output);
|
|
@@ -79,7 +148,7 @@ async function matchesLlmRubric(expected, output, options) {
|
|
|
79
148
|
if (!options) {
|
|
80
149
|
throw new Error('Cannot grade output without grading config. Specify --grader option or grading config.');
|
|
81
150
|
}
|
|
82
|
-
const prompt = nunjucks_1.default.renderString(options.
|
|
151
|
+
const prompt = nunjucks_1.default.renderString(options.rubricPrompt || prompts_js_1.DEFAULT_GRADING_PROMPT, {
|
|
83
152
|
content: output,
|
|
84
153
|
rubric: expected,
|
|
85
154
|
});
|
|
@@ -121,6 +190,43 @@ async function matchesLlmRubric(expected, output, options) {
|
|
|
121
190
|
}
|
|
122
191
|
}
|
|
123
192
|
exports.matchesLlmRubric = matchesLlmRubric;
|
|
193
|
+
function assertionFromString(expected) {
|
|
194
|
+
const match = expected.match(SIMILAR_REGEX);
|
|
195
|
+
if (match) {
|
|
196
|
+
const threshold = parseFloat(match[1]) || DEFAULT_SEMANTIC_SIMILARITY_THRESHOLD;
|
|
197
|
+
const rest = expected.replace(SIMILAR_REGEX, '').trim();
|
|
198
|
+
return {
|
|
199
|
+
type: 'similar',
|
|
200
|
+
value: rest,
|
|
201
|
+
threshold,
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
if (expected.startsWith('fn:') || expected.startsWith('eval:')) {
|
|
205
|
+
// TODO(1.0): delete eval: legacy option
|
|
206
|
+
const sliceLength = expected.startsWith('fn:') ? 'fn:'.length : 'eval:'.length;
|
|
207
|
+
const functionBody = expected.slice(sliceLength);
|
|
208
|
+
return {
|
|
209
|
+
type: 'javascript',
|
|
210
|
+
value: functionBody,
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
if (expected.startsWith('grade:')) {
|
|
214
|
+
return {
|
|
215
|
+
type: 'llm-rubric',
|
|
216
|
+
value: expected.slice(6),
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
if (expected === 'is-json' || expected === 'contains-json') {
|
|
220
|
+
return {
|
|
221
|
+
type: expected,
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
return {
|
|
225
|
+
type: 'equals',
|
|
226
|
+
value: expected,
|
|
227
|
+
};
|
|
228
|
+
}
|
|
229
|
+
exports.assertionFromString = assertionFromString;
|
|
124
230
|
exports.default = {
|
|
125
231
|
matchesSimilarity,
|
|
126
232
|
matchesLlmRubric,
|
package/dist/assertions.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"assertions.js","sourceRoot":"","sources":["../src/assertions.ts"],"names":[],"mappings":";;;;;;AAAA,wDAAgC;AAEhC,qDAAyF;AACzF,uCAA6C;AAC7C,iDAAiD;AACjD,6CAAsD;
|
|
1
|
+
{"version":3,"file":"assertions.js","sourceRoot":"","sources":["../src/assertions.ts"],"names":[],"mappings":";;;;;;AAAA,oEAAuC;AACvC,wDAAgC;AAEhC,qDAAyF;AACzF,uCAA6C;AAC7C,iDAAiD;AACjD,6CAAsD;AAItD,MAAM,aAAa,GAAG,iCAAiC,CAAC;AAExD,MAAM,qCAAqC,GAAG,GAAG,CAAC;AAE3C,KAAK,UAAU,aAAa,CAAC,IAAc,EAAE,MAAc;IAChE,MAAM,UAAU,GAAG;QACjB,KAAK,EAAE,CAAC;QACR,MAAM,EAAE,CAAC;QACT,UAAU,EAAE,CAAC;KACd,CAAC;IAEF,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;QAChB,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,eAAe,EAAE,UAAU,EAAE,CAAC;KAC5D;IAED,KAAK,MAAM,SAAS,IAAI,IAAI,CAAC,MAAM,EAAE;QACnC,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,SAAS,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;QAC3D,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE;YAChB,OAAO,MAAM,CAAC;SACf;QAED,IAAI,MAAM,CAAC,UAAU,EAAE;YACrB,UAAU,CAAC,KAAK,IAAI,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC;YAC5C,UAAU,CAAC,MAAM,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC;YAC9C,UAAU,CAAC,UAAU,IAAI,MAAM,CAAC,UAAU,CAAC,UAAU,CAAC;SACvD;KACF;IAED,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,uBAAuB,EAAE,UAAU,EAAE,CAAC;AACrE,CAAC;AAzBD,sCAyBC;AAEM,KAAK,UAAU,YAAY,CAChC,SAAoB,EACpB,IAAc,EACd,MAAc;IAEd,IAAI,IAAI,GAAY,KAAK,CAAC;IAE1B,IAAI,SAAS,CAAC,IAAI,KAAK,QAAQ,EAAE;QAC/B,IAAI,GAAG,SAAS,CAAC,KAAK,KAAK,MAAM,CAAC;QAClC,OAAO;YACL,IAAI;YACJ,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,oBAAoB,SAAS,CAAC,KAAK,GAAG;SAC3E,CAAC;KACH;IAED,IAAI,SAAS,CAAC,IAAI,KAAK,SAAS,EAAE;QAChC,IAAI;YACF,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YACnB,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,kBAAkB,EAAE,CAAC;SACnD;QAAC,OAAO,GAAG,EAAE;YACZ,OAAO;gBACL,IAAI,EAAE,KAAK;gBACX,MAAM,EAAE,2DAA2D,GAAG,EAAE;aACzE,CAAC;SACH;KACF;IAED,IAAI,SAAS,CAAC,IAAI,KAAK,eAAe,EAAE;QACtC,MAAM,IAAI,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;QAClC,OAAO;YACL,IAAI;YACJ,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,uCAAuC;SAC5E,CAAC;KACH;IAED,IAAI,SAAS,CAAC,IAAI,KAAK,YAAY,EAAE;QACnC,IAAI;YACF,MAAM,cAAc,GAAG,IAAI,QAAQ,CAAC,QAAQ,EAAE,UAAU,SAAS,CAAC,KAAK,EAAE,CAAC,CAAC;YAC3E,IAAI,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC;SAC/B;QAAC,OAAO,GAAG,EAAE;YACZ,OAAO;gBACL,IAAI,EAAE,KAAK;gBACX,MAAM,EAAE,gCAAiC,GAAa,CAAC,OAAO,EAAE;aACjE,CAAC;SACH;QACD,OAAO;YACL,IAAI;YACJ,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,gCAAgC;SACrE,CAAC;KACH;IAED,IAAI,SAAS,CAAC,IAAI,KAAK,SAAS,EAAE;QAChC,IAAA,wBAAS,EAAC,SAAS,CAAC,KAAK,EAAE,+CAA+C,CAAC,CAAC;QAC5E,IAAA,wBAAS,EAAC,SAAS,CAAC,SAAS,EAAE,4CAA4C,CAAC,CAAC;QAC7E,OAAO,iBAAiB,CAAC,SAAS,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,CAAC,SAAS,CAAC,CAAC;KACxE;IAED,IAAI,SAAS,CAAC,IAAI,KAAK,YAAY,EAAE;QACnC,IAAA,wBAAS,EAAC,SAAS,CAAC,KAAK,EAAE,+CAA+C,CAAC,CAAC;QAC5E,OAAO,gBAAgB,CAAC,SAAS,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;KAChE;IAED,MAAM,IAAI,KAAK,CAAC,0BAA0B,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;AAC/D,CAAC;AA/DD,oCA+DC;AAED,SAAS,YAAY,CAAC,GAAW;IAC/B,oDAAoD;IACpD,MAAM,WAAW,GAAG,wBAAwB,CAAC;IAE7C,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;IAErC,IAAI,CAAC,KAAK,EAAE;QACV,OAAO,KAAK,CAAC;KACd;IAED,IAAI;QACF,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,OAAO,IAAI,CAAC;KACb;IAAC,OAAO,KAAK,EAAE;QACd,OAAO,KAAK,CAAC;KACd;AACH,CAAC;AAEM,KAAK,UAAU,iBAAiB,CACrC,QAAgB,EAChB,MAAc,EACd,SAAiB;IAEjB,MAAM,iBAAiB,GAAG,MAAM,oCAAwB,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IACpF,MAAM,eAAe,GAAG,MAAM,oCAAwB,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC;IAEhF,MAAM,UAAU,GAAG;QACjB,KAAK,EAAE,CAAC,iBAAiB,CAAC,UAAU,EAAE,KAAK,IAAI,CAAC,CAAC,GAAG,CAAC,eAAe,CAAC,UAAU,EAAE,KAAK,IAAI,CAAC,CAAC;QAC5F,MAAM,EAAE,CAAC,iBAAiB,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC,CAAC,GAAG,CAAC,eAAe,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC,CAAC;QAC/F,UAAU,EACR,CAAC,iBAAiB,CAAC,UAAU,EAAE,UAAU,IAAI,CAAC,CAAC;YAC/C,CAAC,eAAe,CAAC,UAAU,EAAE,UAAU,IAAI,CAAC,CAAC;KAChD,CAAC;IAEF,IAAI,iBAAiB,CAAC,KAAK,IAAI,eAAe,CAAC,KAAK,EAAE;QACpD,OAAO;YACL,IAAI,EAAE,KAAK;YACX,MAAM,EACJ,iBAAiB,CAAC,KAAK,IAAI,eAAe,CAAC,KAAK,IAAI,mCAAmC;YACzF,UAAU;SACX,CAAC;KACH;IAED,IAAI,CAAC,iBAAiB,CAAC,SAAS,IAAI,CAAC,eAAe,CAAC,SAAS,EAAE;QAC9D,OAAO;YACL,IAAI,EAAE,KAAK;YACX,MAAM,EAAE,qBAAqB;YAC7B,UAAU;SACX,CAAC;KACH;IAED,MAAM,UAAU,GAAG,IAAA,0BAAgB,EAAC,iBAAiB,CAAC,SAAS,EAAE,eAAe,CAAC,SAAS,CAAC,CAAC;IAC5F,IAAI,UAAU,GAAG,SAAS,EAAE;QAC1B,OAAO;YACL,IAAI,EAAE,KAAK;YACX,MAAM,EAAE,cAAc,UAAU,2BAA2B,SAAS,EAAE;YACtE,UAAU;SACX,CAAC;KACH;IACD,OAAO;QACL,IAAI,EAAE,IAAI;QACV,MAAM,EAAE,cAAc,UAAU,8BAA8B,SAAS,EAAE;QACzE,UAAU;KACX,CAAC;AACJ,CAAC;AA9CD,8CA8CC;AAEM,KAAK,UAAU,gBAAgB,CACpC,QAAgB,EAChB,MAAc,EACd,OAAuB;IAEvB,IAAI,CAAC,OAAO,EAAE;QACZ,MAAM,IAAI,KAAK,CACb,wFAAwF,CACzF,CAAC;KACH;IAED,MAAM,MAAM,GAAG,kBAAQ,CAAC,YAAY,CAAC,OAAO,CAAC,YAAY,IAAI,mCAAsB,EAAE;QACnF,OAAO,EAAE,MAAM;QACf,MAAM,EAAE,QAAQ;KACjB,CAAC,CAAC;IAEH,IAAI,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,kCAAsB,CAAC;IAC1D,IAAI,OAAO,QAAQ,KAAK,QAAQ,EAAE;QAChC,QAAQ,GAAG,MAAM,IAAA,8BAAe,EAAC,QAAQ,CAAC,CAAC;KAC5C;IACD,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAC5C,IAAI,IAAI,CAAC,KAAK,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;QAC9B,OAAO;YACL,IAAI,EAAE,KAAK;YACX,MAAM,EAAE,IAAI,CAAC,KAAK,IAAI,WAAW;YACjC,UAAU,EAAE;gBACV,KAAK,EAAE,IAAI,CAAC,UAAU,EAAE,KAAK,IAAI,CAAC;gBAClC,MAAM,EAAE,IAAI,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC;gBACpC,UAAU,EAAE,IAAI,CAAC,UAAU,EAAE,UAAU,IAAI,CAAC;aAC7C;SACF,CAAC;KACH;IAED,IAAI;QACF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAkB,CAAC;QACxD,MAAM,CAAC,UAAU,GAAG;YAClB,KAAK,EAAE,IAAI,CAAC,UAAU,EAAE,KAAK,IAAI,CAAC;YAClC,MAAM,EAAE,IAAI,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC;YACpC,UAAU,EAAE,IAAI,CAAC,UAAU,EAAE,UAAU,IAAI,CAAC;SAC7C,CAAC;QACF,OAAO,MAAM,CAAC;KACf;IAAC,OAAO,GAAG,EAAE;QACZ,OAAO;YACL,IAAI,EAAE,KAAK;YACX,MAAM,EAAE,6BAA6B,IAAI,CAAC,MAAM,EAAE;YAClD,UAAU,EAAE;gBACV,KAAK,EAAE,IAAI,CAAC,UAAU,EAAE,KAAK,IAAI,CAAC;gBAClC,MAAM,EAAE,IAAI,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC;gBACpC,UAAU,EAAE,IAAI,CAAC,UAAU,EAAE,UAAU,IAAI,CAAC;aAC7C;SACF,CAAC;KACH;AACH,CAAC;AApDD,4CAoDC;AAED,SAAgB,mBAAmB,CAAC,QAAgB;IAClD,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;IAC5C,IAAI,KAAK,EAAE;QACT,MAAM,SAAS,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,qCAAqC,CAAC;QAChF,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QACxD,OAAO;YACL,IAAI,EAAE,SAAS;YACf,KAAK,EAAE,IAAI;YACX,SAAS;SACV,CAAC;KACH;IACD,IAAI,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,QAAQ,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE;QAC9D,wCAAwC;QACxC,MAAM,WAAW,GAAG,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC;QAC/E,MAAM,YAAY,GAAG,QAAQ,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;QACjD,OAAO;YACL,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,YAAY;SACpB,CAAC;KACH;IACD,IAAI,QAAQ,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE;QACjC,OAAO;YACL,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;SACzB,CAAC;KACH;IACD,IAAI,QAAQ,KAAK,SAAS,IAAI,QAAQ,KAAK,eAAe,EAAE;QAC1D,OAAO;YACL,IAAI,EAAE,QAAQ;SACf,CAAC;KACH;IACD,OAAO;QACL,IAAI,EAAE,QAAQ;QACd,KAAK,EAAE,QAAQ;KAChB,CAAC;AACJ,CAAC;AAnCD,kDAmCC;AAED,kBAAe;IACb,iBAAiB;IACjB,gBAAgB;CACjB,CAAC"}
|
package/dist/cache.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":"AASA,OAAO,KAAK,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AA4B3D,wBAAsB,kBAAkB,CACtC,GAAG,EAAE,WAAW,EAChB,OAAO,yBAAkB,EACzB,OAAO,EAAE,MAAM,GACd,OAAO,CAAC;IAAE,IAAI,EAAE,GAAG,CAAC;IAAC,MAAM,EAAE,OAAO,CAAA;CAAE,CAAC,CAuCzC;AAED,wBAAgB,WAAW,SAE1B;AAED,wBAAgB,YAAY,SAG3B"}
|
package/dist/cache.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cache.js","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":";;;;;;AAAA,0DAA6B;AAE7B,kEAAyC;AACzC,kFAA4C;
|
|
1
|
+
{"version":3,"file":"cache.js","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":";;;;;;AAAA,0DAA6B;AAE7B,kEAAyC;AACzC,kFAA4C;AAE5C,4DAAiC;AACjC,uCAAqE;AAKrE,IAAI,aAAgC,CAAC;AAErC,IAAI,OAAO,GACT,OAAO,OAAO,CAAC,GAAG,CAAC,uBAAuB,KAAK,WAAW;IACxD,CAAC,CAAC,IAAI;IACN,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;AAEnD,MAAM,SAAS,GACb,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;AAE5F,SAAS,QAAQ;IACf,IAAI,CAAC,aAAa,EAAE;QAClB,aAAa,GAAG,uBAAY,CAAC,OAAO,CAAC;YACnC,KAAK,EAAE,SAAS,KAAK,MAAM,CAAC,CAAC,CAAC,+BAAO,CAAC,CAAC,CAAC,QAAQ;YAChD,OAAO,EAAE;gBACP,GAAG,EAAE,OAAO,CAAC,GAAG,CAAC,8BAA8B,IAAI,KAAM;gBACzD,IAAI,EAAE,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,mBAAI,CAAC,IAAI,CAAC,IAAA,gCAAsB,GAAE,EAAE,OAAO,CAAC;gBACtF,GAAG,EAAE,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE;gBACzD,OAAO,EAAE,OAAO,CAAC,GAAG,CAAC,wBAAwB,IAAI,GAAG,EAAE,iBAAiB;gBACvE,+CAA+C;aAChD;SACF,CAAC,CAAC;KACJ;IACD,OAAO,aAAa,CAAC;AACvB,CAAC;AAEM,KAAK,UAAU,kBAAkB,CACtC,GAAgB,EAChB,UAAuB,EAAE,EACzB,OAAe;IAEf,IAAI,CAAC,OAAO,EAAE;QACZ,MAAM,IAAI,GAAG,MAAM,IAAA,0BAAgB,EAAC,GAAG,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;QAC3D,OAAO;YACL,MAAM,EAAE,KAAK;YACb,IAAI,EAAE,MAAM,IAAI,CAAC,IAAI,EAAE;SACxB,CAAC;KACH;IAED,MAAM,KAAK,GAAG,MAAM,QAAQ,EAAE,CAAC;IAE/B,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC;IACxC,OAAO,IAAI,CAAC,OAAO,CAAC;IACpB,MAAM,QAAQ,GAAG,SAAS,GAAG,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC;IAExD,iCAAiC;IACjC,MAAM,cAAc,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAEjD,IAAI,cAAc,EAAE;QAClB,mBAAM,CAAC,KAAK,CAAC,iCAAiC,GAAG,KAAK,cAAc,EAAE,CAAC,CAAC;QACxE,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,cAAwB,CAAC;SAC3C,CAAC;KACH;IAED,kDAAkD;IAClD,MAAM,QAAQ,GAAG,MAAM,IAAA,0BAAgB,EAAC,GAAG,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IAC/D,IAAI;QACF,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,mBAAM,CAAC,KAAK,CAAC,WAAW,GAAG,uBAAuB,IAAI,EAAE,CAAC,CAAC;QAC1D,MAAM,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC;QAChD,OAAO;YACL,MAAM,EAAE,KAAK;YACb,IAAI;SACL,CAAC;KACH;IAAC,OAAO,GAAG,EAAE;QACZ,MAAM,IAAI,KAAK,CAAC,+BAA+B,GAAG,KAAK,GAAG,EAAE,CAAC,CAAC;KAC/D;AACH,CAAC;AA3CD,gDA2CC;AAED,SAAgB,WAAW;IACzB,OAAO,GAAG,IAAI,CAAC;AACjB,CAAC;AAFD,kCAEC;AAED,SAAgB,YAAY;IAC1B,mBAAM,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;IAClC,OAAO,GAAG,KAAK,CAAC;AAClB,CAAC;AAHD,oCAGC"}
|
package/dist/evaluator.d.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import type { EvaluateOptions, EvaluateSummary } from './types.js';
|
|
2
|
-
export declare function evaluate(options: EvaluateOptions): Promise<EvaluateSummary>;
|
|
1
|
+
import type { EvaluateOptions, EvaluateSummary, TestSuite } from './types.js';
|
|
2
|
+
export declare function evaluate(testSuite: TestSuite, options: EvaluateOptions): Promise<EvaluateSummary>;
|
|
3
3
|
//# sourceMappingURL=evaluator.d.ts.map
|
package/dist/evaluator.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["../src/evaluator.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAEV,eAAe,EAGf,eAAe,
|
|
1
|
+
{"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["../src/evaluator.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAEV,eAAe,EAGf,eAAe,EAEf,SAAS,EAGV,MAAM,YAAY,CAAC;AA6TpB,wBAAgB,QAAQ,CAAC,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,eAAe,4BAGtE"}
|