promptfoo 0.9.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -40
- package/dist/assertions.d.ts +2 -2
- package/dist/assertions.d.ts.map +1 -1
- package/dist/assertions.js +186 -44
- package/dist/assertions.js.map +1 -1
- package/dist/cache.js +9 -9
- package/dist/cache.js.map +1 -1
- package/dist/evaluator.d.ts +1 -1
- package/dist/evaluator.d.ts.map +1 -1
- package/dist/evaluator.js +30 -23
- package/dist/evaluator.js.map +1 -1
- package/dist/index.d.ts +10 -10
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +18 -14
- package/dist/index.js.map +1 -1
- package/dist/main.js +49 -44
- package/dist/main.js.map +1 -1
- package/dist/providers/localai.js +11 -11
- package/dist/providers/localai.js.map +1 -1
- package/dist/providers/openai.d.ts.map +1 -1
- package/dist/providers/openai.js +30 -21
- package/dist/providers/openai.js.map +1 -1
- package/dist/providers.d.ts +3 -3
- package/dist/providers.d.ts.map +1 -1
- package/dist/providers.js +15 -15
- package/dist/providers.js.map +1 -1
- package/dist/types.d.ts +7 -3
- package/dist/types.d.ts.map +1 -1
- package/dist/util.d.ts +4 -4
- package/dist/util.d.ts.map +1 -1
- package/dist/util.js +49 -18
- package/dist/util.js.map +1 -1
- package/dist/web/client/assets/index-15dfcd18.js +172 -0
- package/dist/web/client/assets/index-87905193.css +1 -0
- package/dist/web/client/index.html +2 -2
- package/dist/web/server.js +9 -9
- package/dist/web/server.js.map +1 -1
- package/package.json +3 -1
- package/src/assertions.ts +249 -38
- package/src/cache.ts +2 -2
- package/src/evaluator.ts +25 -18
- package/src/index.ts +13 -8
- package/src/main.ts +28 -15
- package/src/providers/localai.ts +3 -3
- package/src/providers/openai.ts +16 -8
- package/src/providers.ts +3 -3
- package/src/types.ts +24 -3
- package/src/util.ts +48 -17
- package/src/web/client/package-lock.json +5729 -0
- package/src/web/client/src/ResultsTable.css +35 -4
- package/src/web/client/src/ResultsTable.tsx +150 -70
- package/src/web/client/src/ResultsView.tsx +83 -18
- package/src/web/client/src/index.css +6 -0
- package/src/web/client/src/types.ts +2 -0
- package/src/web/server.ts +3 -3
- package/dist/web/client/assets/index-207192fc.css +0 -1
- package/dist/web/client/assets/index-8751749f.js +0 -172
package/src/main.ts
CHANGED
|
@@ -6,9 +6,9 @@ import Table from 'cli-table3';
|
|
|
6
6
|
import chalk from 'chalk';
|
|
7
7
|
import { Command } from 'commander';
|
|
8
8
|
|
|
9
|
-
import logger, { setLogLevel } from './logger
|
|
10
|
-
import { loadApiProvider, loadApiProviders } from './providers
|
|
11
|
-
import { evaluate } from './evaluator
|
|
9
|
+
import logger, { setLogLevel } from './logger';
|
|
10
|
+
import { loadApiProvider, loadApiProviders } from './providers';
|
|
11
|
+
import { evaluate } from './evaluator';
|
|
12
12
|
import {
|
|
13
13
|
maybeReadConfig,
|
|
14
14
|
readConfig,
|
|
@@ -16,10 +16,10 @@ import {
|
|
|
16
16
|
readTests,
|
|
17
17
|
writeLatestResults,
|
|
18
18
|
writeOutput,
|
|
19
|
-
} from './util
|
|
20
|
-
import { getDirectory } from './esm
|
|
21
|
-
import { init } from './web/server
|
|
22
|
-
import { disableCache } from './cache
|
|
19
|
+
} from './util';
|
|
20
|
+
import { getDirectory } from './esm';
|
|
21
|
+
import { init } from './web/server';
|
|
22
|
+
import { disableCache } from './cache';
|
|
23
23
|
|
|
24
24
|
import type {
|
|
25
25
|
CommandLineOptions,
|
|
@@ -27,8 +27,8 @@ import type {
|
|
|
27
27
|
TestCase,
|
|
28
28
|
TestSuite,
|
|
29
29
|
UnifiedConfig,
|
|
30
|
-
} from './types
|
|
31
|
-
import { DEFAULT_README, DEFAULT_YAML_CONFIG, DEFAULT_PROMPTS } from './onboarding
|
|
30
|
+
} from './types';
|
|
31
|
+
import { DEFAULT_README, DEFAULT_YAML_CONFIG, DEFAULT_PROMPTS } from './onboarding';
|
|
32
32
|
|
|
33
33
|
function createDummyFiles(directory: string | null) {
|
|
34
34
|
if (directory) {
|
|
@@ -68,7 +68,7 @@ async function main() {
|
|
|
68
68
|
];
|
|
69
69
|
let config: Partial<UnifiedConfig> = {};
|
|
70
70
|
for (const path of potentialPaths) {
|
|
71
|
-
const maybeConfig = maybeReadConfig(path);
|
|
71
|
+
const maybeConfig = await maybeReadConfig(path);
|
|
72
72
|
if (maybeConfig) {
|
|
73
73
|
config = maybeConfig;
|
|
74
74
|
break;
|
|
@@ -154,8 +154,16 @@ async function main() {
|
|
|
154
154
|
'This suffix is append to every prompt',
|
|
155
155
|
config.defaultTest?.options?.suffix,
|
|
156
156
|
)
|
|
157
|
-
.option(
|
|
158
|
-
|
|
157
|
+
.option(
|
|
158
|
+
'--no-write',
|
|
159
|
+
'Do not write results to promptfoo directory',
|
|
160
|
+
config?.commandLineOptions?.write,
|
|
161
|
+
)
|
|
162
|
+
.option(
|
|
163
|
+
'--no-cache',
|
|
164
|
+
'Do not read or write results to disk cache',
|
|
165
|
+
config?.commandLineOptions?.cache,
|
|
166
|
+
)
|
|
159
167
|
.option('--grader', 'Model that will grade outputs', config?.commandLineOptions?.grader)
|
|
160
168
|
.option('--verbose', 'Show debug logs', config?.commandLineOptions?.verbose)
|
|
161
169
|
.option('--view [port]', 'View in browser ui')
|
|
@@ -172,12 +180,13 @@ async function main() {
|
|
|
172
180
|
const maxConcurrency = parseInt(cmdObj.maxConcurrency || '', 10);
|
|
173
181
|
const configPath = cmdObj.config;
|
|
174
182
|
if (configPath) {
|
|
175
|
-
config = readConfig(configPath);
|
|
183
|
+
config = await readConfig(configPath);
|
|
176
184
|
} else {
|
|
177
185
|
config = {
|
|
178
186
|
prompts: cmdObj.prompts || config.prompts,
|
|
179
187
|
providers: cmdObj.providers || config.providers,
|
|
180
188
|
tests: cmdObj.tests || cmdObj.vars || config.tests,
|
|
189
|
+
defaultTest: config.defaultTest,
|
|
181
190
|
};
|
|
182
191
|
}
|
|
183
192
|
|
|
@@ -255,8 +264,9 @@ async function main() {
|
|
|
255
264
|
},
|
|
256
265
|
});
|
|
257
266
|
// Skip first row (header) and add the rest. Color PASS/FAIL
|
|
258
|
-
for (const row of summary.table.body) {
|
|
267
|
+
for (const row of summary.table.body.slice(0, 25)) {
|
|
259
268
|
table.push([
|
|
269
|
+
...row.vars,
|
|
260
270
|
...row.outputs.map((col) => {
|
|
261
271
|
const tableCellMaxLength = parseInt(cmdObj.tableCellMaxLength || '', 10);
|
|
262
272
|
if (!isNaN(tableCellMaxLength) && col.length > tableCellMaxLength) {
|
|
@@ -274,11 +284,14 @@ async function main() {
|
|
|
274
284
|
}
|
|
275
285
|
return col;
|
|
276
286
|
}),
|
|
277
|
-
...row.vars,
|
|
278
287
|
]);
|
|
279
288
|
}
|
|
280
289
|
|
|
281
290
|
logger.info('\n' + table.toString());
|
|
291
|
+
if (summary.table.body.length > 25) {
|
|
292
|
+
const rowsLeft = summary.table.body.length - 25;
|
|
293
|
+
logger.info(`... ${rowsLeft} more row${rowsLeft === 1 ? '' : 's'} not shown ...\n`);
|
|
294
|
+
}
|
|
282
295
|
}
|
|
283
296
|
if (cmdObj.view || !cmdObj.write) {
|
|
284
297
|
logger.info('Evaluation complete');
|
package/src/providers/localai.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import logger from '../logger
|
|
2
|
-
import { fetchJsonWithCache } from '../cache
|
|
3
|
-
import { REQUEST_TIMEOUT_MS } from './shared
|
|
1
|
+
import logger from '../logger';
|
|
2
|
+
import { fetchJsonWithCache } from '../cache';
|
|
3
|
+
import { REQUEST_TIMEOUT_MS } from './shared';
|
|
4
4
|
|
|
5
5
|
import type { ApiProvider, ProviderResponse } from '../types.js';
|
|
6
6
|
|
package/src/providers/openai.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import logger from '../logger
|
|
2
|
-
import { fetchJsonWithCache } from '../cache
|
|
3
|
-
import { REQUEST_TIMEOUT_MS } from './shared
|
|
1
|
+
import logger from '../logger';
|
|
2
|
+
import { fetchJsonWithCache } from '../cache';
|
|
3
|
+
import { REQUEST_TIMEOUT_MS } from './shared';
|
|
4
4
|
|
|
5
5
|
import type { ApiProvider, ProviderEmbeddingResponse, ProviderResponse } from '../types.js';
|
|
6
6
|
|
|
@@ -126,12 +126,20 @@ export class OpenAiCompletionProvider extends OpenAiGenericProvider {
|
|
|
126
126
|
);
|
|
127
127
|
}
|
|
128
128
|
|
|
129
|
+
let stop: string;
|
|
130
|
+
try {
|
|
131
|
+
stop = process.env.OPENAI_STOP
|
|
132
|
+
? JSON.parse(process.env.OPENAI_STOP)
|
|
133
|
+
: ['<|im_end|>', '<|endoftext|>'];
|
|
134
|
+
} catch (err) {
|
|
135
|
+
throw new Error(`OPENAI_STOP is not a valid JSON string: ${err}`);
|
|
136
|
+
}
|
|
129
137
|
const body = {
|
|
130
138
|
model: this.modelName,
|
|
131
139
|
prompt,
|
|
132
|
-
max_tokens: process.env.OPENAI_MAX_TOKENS || 1024,
|
|
133
|
-
temperature: options?.temperature ?? (process.env.
|
|
134
|
-
stop
|
|
140
|
+
max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
|
|
141
|
+
temperature: options?.temperature ?? parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
|
|
142
|
+
stop,
|
|
135
143
|
};
|
|
136
144
|
logger.debug(`Calling OpenAI API: ${JSON.stringify(body)}`);
|
|
137
145
|
let data,
|
|
@@ -210,8 +218,8 @@ export class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
|
|
|
210
218
|
const body = {
|
|
211
219
|
model: this.modelName,
|
|
212
220
|
messages: messages,
|
|
213
|
-
max_tokens: process.env.OPENAI_MAX_TOKENS || 1024,
|
|
214
|
-
temperature: options?.temperature ?? (process.env.
|
|
221
|
+
max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
|
|
222
|
+
temperature: options?.temperature ?? parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
|
|
215
223
|
};
|
|
216
224
|
logger.debug(`Calling OpenAI API: ${JSON.stringify(body)}`);
|
|
217
225
|
|
package/src/providers.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
2
|
|
|
3
|
-
import { ApiProvider } from './types
|
|
3
|
+
import { ApiProvider } from './types';
|
|
4
4
|
|
|
5
|
-
import { OpenAiCompletionProvider, OpenAiChatCompletionProvider } from './providers/openai
|
|
6
|
-
import { LocalAiCompletionProvider, LocalAiChatProvider } from './providers/localai
|
|
5
|
+
import { OpenAiCompletionProvider, OpenAiChatCompletionProvider } from './providers/openai';
|
|
6
|
+
import { LocalAiCompletionProvider, LocalAiChatProvider } from './providers/localai';
|
|
7
7
|
|
|
8
8
|
export async function loadApiProviders(providerPaths: string | string[]): Promise<ApiProvider[]> {
|
|
9
9
|
if (typeof providerPaths === 'string') {
|
package/src/types.ts
CHANGED
|
@@ -113,13 +113,34 @@ export interface GradingResult {
|
|
|
113
113
|
tokensUsed?: TokenUsage;
|
|
114
114
|
}
|
|
115
115
|
|
|
116
|
+
type BaseAssertionTypes =
|
|
117
|
+
| 'equals'
|
|
118
|
+
| 'contains'
|
|
119
|
+
| 'icontains'
|
|
120
|
+
| 'contains-all'
|
|
121
|
+
| 'contains-any'
|
|
122
|
+
| 'regex'
|
|
123
|
+
| 'is-json'
|
|
124
|
+
| 'contains-json'
|
|
125
|
+
| 'javascript'
|
|
126
|
+
| 'similar'
|
|
127
|
+
| 'llm-rubric'
|
|
128
|
+
| 'webhook'
|
|
129
|
+
| 'rouge-n'
|
|
130
|
+
| 'rouge-s'
|
|
131
|
+
| 'rouge-l';
|
|
132
|
+
|
|
133
|
+
type NotPrefixed<T extends string> = `not-${T}`;
|
|
134
|
+
|
|
135
|
+
export type AssertionType = BaseAssertionTypes | NotPrefixed<BaseAssertionTypes>;
|
|
136
|
+
|
|
116
137
|
// TODO(ian): maybe Assertion should support {type: config} to make the yaml cleaner
|
|
117
138
|
export interface Assertion {
|
|
118
139
|
// Type of assertion
|
|
119
|
-
type:
|
|
140
|
+
type: AssertionType;
|
|
120
141
|
|
|
121
142
|
// The expected value, if applicable
|
|
122
|
-
value?: string;
|
|
143
|
+
value?: string | string[];
|
|
123
144
|
|
|
124
145
|
// The threshold value, only applicable for similarity (cosine distance)
|
|
125
146
|
threshold?: number;
|
|
@@ -157,7 +178,7 @@ export interface TestSuite {
|
|
|
157
178
|
providers: ApiProvider[];
|
|
158
179
|
|
|
159
180
|
// One or more prompt strings
|
|
160
|
-
prompts:
|
|
181
|
+
prompts: Prompt[];
|
|
161
182
|
|
|
162
183
|
// Test cases
|
|
163
184
|
tests?: TestCase[];
|
package/src/util.ts
CHANGED
|
@@ -2,6 +2,7 @@ import * as fs from 'fs';
|
|
|
2
2
|
import * as path from 'node:path';
|
|
3
3
|
import * as os from 'node:os';
|
|
4
4
|
|
|
5
|
+
import $RefParser from '@apidevtools/json-schema-ref-parser';
|
|
5
6
|
import fetch from 'node-fetch';
|
|
6
7
|
import yaml from 'js-yaml';
|
|
7
8
|
import nunjucks from 'nunjucks';
|
|
@@ -10,13 +11,13 @@ import { parse as parsePath } from 'path';
|
|
|
10
11
|
import { parse as parseCsv } from 'csv-parse/sync';
|
|
11
12
|
import { stringify } from 'csv-stringify/sync';
|
|
12
13
|
|
|
13
|
-
import logger from './logger
|
|
14
|
-
import { getDirectory } from './esm
|
|
14
|
+
import logger from './logger';
|
|
15
|
+
import { getDirectory } from './esm';
|
|
15
16
|
|
|
16
17
|
import type { RequestInfo, RequestInit, Response } from 'node-fetch';
|
|
17
18
|
|
|
18
|
-
import type { Assertion, CsvRow, EvaluateSummary, UnifiedConfig, TestCase } from './types
|
|
19
|
-
import { assertionFromString } from './assertions
|
|
19
|
+
import type { Assertion, CsvRow, EvaluateSummary, UnifiedConfig, TestCase, Prompt } from './types';
|
|
20
|
+
import { assertionFromString } from './assertions';
|
|
20
21
|
|
|
21
22
|
const PROMPT_DELIMITER = '---';
|
|
22
23
|
|
|
@@ -28,14 +29,14 @@ function parseJson(json: string): any | undefined {
|
|
|
28
29
|
}
|
|
29
30
|
}
|
|
30
31
|
|
|
31
|
-
export function maybeReadConfig(configPath: string): UnifiedConfig | undefined {
|
|
32
|
+
export async function maybeReadConfig(configPath: string): Promise<UnifiedConfig | undefined> {
|
|
32
33
|
if (!fs.existsSync(configPath)) {
|
|
33
34
|
return undefined;
|
|
34
35
|
}
|
|
35
36
|
return readConfig(configPath);
|
|
36
37
|
}
|
|
37
38
|
|
|
38
|
-
export function readConfig(configPath: string): UnifiedConfig {
|
|
39
|
+
export async function readConfig(configPath: string): Promise<UnifiedConfig> {
|
|
39
40
|
const ext = path.parse(configPath).ext;
|
|
40
41
|
switch (ext) {
|
|
41
42
|
case '.json':
|
|
@@ -44,17 +45,38 @@ export function readConfig(configPath: string): UnifiedConfig {
|
|
|
44
45
|
case '.js':
|
|
45
46
|
return require(configPath) as UnifiedConfig;
|
|
46
47
|
case '.yaml':
|
|
47
|
-
|
|
48
|
+
case '.yml':
|
|
49
|
+
let ret = yaml.load(fs.readFileSync(configPath, 'utf-8')) as UnifiedConfig;
|
|
50
|
+
ret = (await $RefParser.dereference(ret)) as UnifiedConfig;
|
|
51
|
+
return ret;
|
|
48
52
|
default:
|
|
49
53
|
throw new Error(`Unsupported configuration file format: ${ext}`);
|
|
50
54
|
}
|
|
51
55
|
}
|
|
52
56
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
57
|
+
enum PromptInputType {
|
|
58
|
+
STRING = 1,
|
|
59
|
+
ARRAY = 2,
|
|
60
|
+
NAMED = 3,
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export function readPrompts(
|
|
64
|
+
promptPathOrGlobs: string | string[] | Record<string, string>,
|
|
65
|
+
): Prompt[] {
|
|
66
|
+
let promptPaths: string[] = [];
|
|
67
|
+
let promptContents: Prompt[] = [];
|
|
68
|
+
|
|
69
|
+
let inputType: PromptInputType | undefined;
|
|
70
|
+
if (typeof promptPathOrGlobs === 'string') {
|
|
71
|
+
promptPaths = [promptPathOrGlobs];
|
|
72
|
+
inputType = PromptInputType.STRING;
|
|
73
|
+
} else if (Array.isArray(promptPathOrGlobs)) {
|
|
74
|
+
promptPaths = promptPathOrGlobs.flatMap((pathOrGlob) => globSync(pathOrGlob));
|
|
75
|
+
inputType = PromptInputType.ARRAY;
|
|
76
|
+
} else if (typeof promptPathOrGlobs === 'object') {
|
|
77
|
+
promptPaths = Object.keys(promptPathOrGlobs);
|
|
78
|
+
inputType = PromptInputType.NAMED;
|
|
79
|
+
}
|
|
58
80
|
|
|
59
81
|
for (const promptPath of promptPaths) {
|
|
60
82
|
const stat = fs.statSync(promptPath);
|
|
@@ -63,18 +85,27 @@ export function readPrompts(promptPathsOrGlobs: string | string[]): string[] {
|
|
|
63
85
|
const fileContents = filesInDirectory.map((fileName) =>
|
|
64
86
|
fs.readFileSync(path.join(promptPath, fileName), 'utf-8'),
|
|
65
87
|
);
|
|
66
|
-
promptContents.push(...fileContents);
|
|
88
|
+
promptContents.push(...fileContents.map((content) => ({ raw: content, display: content })));
|
|
67
89
|
} else {
|
|
68
90
|
const fileContent = fs.readFileSync(promptPath, 'utf-8');
|
|
69
|
-
|
|
91
|
+
let display;
|
|
92
|
+
if (inputType === PromptInputType.NAMED) {
|
|
93
|
+
display = (promptPathOrGlobs as Record<string, string>)[promptPath];
|
|
94
|
+
} else {
|
|
95
|
+
display = fileContent.length > 200 ? promptPath : fileContent;
|
|
96
|
+
}
|
|
97
|
+
promptContents.push({ raw: fileContent, display });
|
|
70
98
|
}
|
|
71
99
|
}
|
|
72
100
|
|
|
73
|
-
if (promptContents.length === 1) {
|
|
74
|
-
|
|
101
|
+
if (promptContents.length === 1 && inputType !== PromptInputType.NAMED) {
|
|
102
|
+
const content = promptContents[0].raw;
|
|
103
|
+
promptContents = content
|
|
104
|
+
.split(PROMPT_DELIMITER)
|
|
105
|
+
.map((p) => ({ raw: p.trim(), display: p.trim() }));
|
|
75
106
|
}
|
|
76
107
|
if (promptContents.length === 0) {
|
|
77
|
-
throw new Error(`There are no prompts in ${
|
|
108
|
+
throw new Error(`There are no prompts in ${JSON.stringify(promptPathOrGlobs)}`);
|
|
78
109
|
}
|
|
79
110
|
return promptContents;
|
|
80
111
|
}
|