promptfoo 0.10.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -35
- package/dist/package.json +87 -0
- package/dist/src/__mocks__/esm.d.ts.map +1 -0
- package/dist/src/__mocks__/esm.js.map +1 -0
- package/dist/{assertions.d.ts → src/assertions.d.ts} +1 -1
- package/dist/src/assertions.d.ts.map +1 -0
- package/dist/src/assertions.js +374 -0
- package/dist/src/assertions.js.map +1 -0
- package/dist/src/cache.d.ts.map +1 -0
- package/dist/src/cache.js.map +1 -0
- package/dist/src/esm.d.ts.map +1 -0
- package/dist/src/esm.js.map +1 -0
- package/dist/src/evaluator.d.ts.map +1 -0
- package/dist/{evaluator.js → src/evaluator.js} +3 -1
- package/dist/src/evaluator.js.map +1 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/{index.js → src/index.js} +10 -7
- package/dist/src/index.js.map +1 -0
- package/dist/src/logger.d.ts.map +1 -0
- package/dist/src/logger.js.map +1 -0
- package/dist/src/main.d.ts.map +1 -0
- package/dist/{main.js → src/main.js} +35 -13
- package/dist/src/main.js.map +1 -0
- package/dist/src/onboarding.d.ts.map +1 -0
- package/dist/src/onboarding.js.map +1 -0
- package/dist/src/prompts.d.ts.map +1 -0
- package/dist/src/prompts.js.map +1 -0
- package/dist/src/providers/localai.d.ts.map +1 -0
- package/dist/src/providers/localai.js.map +1 -0
- package/dist/src/providers/openai.d.ts.map +1 -0
- package/dist/src/providers/openai.js.map +1 -0
- package/dist/src/providers/shared.d.ts.map +1 -0
- package/dist/src/providers/shared.js.map +1 -0
- package/dist/src/providers.d.ts.map +1 -0
- package/dist/src/providers.js.map +1 -0
- package/dist/src/suggestions.d.ts.map +1 -0
- package/dist/src/suggestions.js.map +1 -0
- package/dist/src/telemetry.d.ts +10 -0
- package/dist/src/telemetry.d.ts.map +1 -0
- package/dist/src/telemetry.js +48 -0
- package/dist/src/telemetry.js.map +1 -0
- package/dist/{types.d.ts → src/types.d.ts} +6 -2
- package/dist/src/types.d.ts.map +1 -0
- package/dist/src/types.js.map +1 -0
- package/dist/src/updates.d.ts +3 -0
- package/dist/src/updates.d.ts.map +1 -0
- package/dist/src/updates.js +36 -0
- package/dist/src/updates.js.map +1 -0
- package/dist/{util.d.ts → src/util.d.ts} +3 -3
- package/dist/src/util.d.ts.map +1 -0
- package/dist/{util.js → src/util.js} +12 -5
- package/dist/src/util.js.map +1 -0
- package/dist/src/web/client/assets/index-87905193.css +1 -0
- package/dist/src/web/client/assets/index-eb6d3769.js +199 -0
- package/dist/src/web/client/assets/js-yaml-8bbf9398.js +32 -0
- package/dist/{web → src/web}/client/index.html +2 -2
- package/dist/src/web/server.d.ts.map +1 -0
- package/dist/{web → src/web}/server.js +3 -4
- package/dist/src/web/server.js.map +1 -0
- package/package.json +13 -9
- package/src/assertions.ts +247 -41
- package/src/evaluator.ts +5 -2
- package/src/index.ts +7 -4
- package/src/main.ts +50 -13
- package/src/telemetry.ts +57 -0
- package/src/types.ts +23 -2
- package/src/updates.ts +37 -0
- package/src/util.ts +28 -6
- package/src/web/client/package-lock.json +3 -6
- package/src/web/client/package.json +1 -0
- package/src/web/client/src/App.tsx +32 -12
- package/src/web/client/src/ConfigModal.tsx +81 -0
- package/src/web/client/src/ResultsTable.css +18 -6
- package/src/web/client/src/ResultsTable.tsx +101 -35
- package/src/web/client/src/ResultsView.tsx +148 -12
- package/src/web/client/src/ShareModal.tsx +70 -0
- package/src/web/client/src/index.css +6 -0
- package/src/web/client/src/store.ts +6 -1
- package/src/web/client/src/types.ts +4 -0
- package/src/web/server.ts +3 -7
- package/dist/__mocks__/esm.d.ts.map +0 -1
- package/dist/__mocks__/esm.js.map +0 -1
- package/dist/assertions.d.ts.map +0 -1
- package/dist/assertions.js +0 -233
- package/dist/assertions.js.map +0 -1
- package/dist/cache.d.ts.map +0 -1
- package/dist/cache.js.map +0 -1
- package/dist/esm.d.ts.map +0 -1
- package/dist/esm.js.map +0 -1
- package/dist/evaluator.d.ts.map +0 -1
- package/dist/evaluator.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/logger.d.ts.map +0 -1
- package/dist/logger.js.map +0 -1
- package/dist/main.d.ts.map +0 -1
- package/dist/main.js.map +0 -1
- package/dist/onboarding.d.ts.map +0 -1
- package/dist/onboarding.js.map +0 -1
- package/dist/prompts.d.ts.map +0 -1
- package/dist/prompts.js.map +0 -1
- package/dist/providers/localai.d.ts.map +0 -1
- package/dist/providers/localai.js.map +0 -1
- package/dist/providers/openai.d.ts.map +0 -1
- package/dist/providers/openai.js.map +0 -1
- package/dist/providers/shared.d.ts.map +0 -1
- package/dist/providers/shared.js.map +0 -1
- package/dist/providers.d.ts.map +0 -1
- package/dist/providers.js.map +0 -1
- package/dist/suggestions.d.ts.map +0 -1
- package/dist/suggestions.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
- package/dist/util.d.ts.map +0 -1
- package/dist/util.js.map +0 -1
- package/dist/web/client/assets/index-9a9ba400.css +0 -1
- package/dist/web/client/assets/index-b72d3ca9.js +0 -172
- package/dist/web/server.d.ts.map +0 -1
- package/dist/web/server.js.map +0 -1
- /package/dist/{__mocks__ → src/__mocks__}/esm.d.ts +0 -0
- /package/dist/{__mocks__ → src/__mocks__}/esm.js +0 -0
- /package/dist/{cache.d.ts → src/cache.d.ts} +0 -0
- /package/dist/{cache.js → src/cache.js} +0 -0
- /package/dist/{esm.d.ts → src/esm.d.ts} +0 -0
- /package/dist/{esm.js → src/esm.js} +0 -0
- /package/dist/{evaluator.d.ts → src/evaluator.d.ts} +0 -0
- /package/dist/{index.d.ts → src/index.d.ts} +0 -0
- /package/dist/{logger.d.ts → src/logger.d.ts} +0 -0
- /package/dist/{logger.js → src/logger.js} +0 -0
- /package/dist/{main.d.ts → src/main.d.ts} +0 -0
- /package/dist/{onboarding.d.ts → src/onboarding.d.ts} +0 -0
- /package/dist/{onboarding.js → src/onboarding.js} +0 -0
- /package/dist/{prompts.d.ts → src/prompts.d.ts} +0 -0
- /package/dist/{prompts.js → src/prompts.js} +0 -0
- /package/dist/{providers → src/providers}/localai.d.ts +0 -0
- /package/dist/{providers → src/providers}/localai.js +0 -0
- /package/dist/{providers → src/providers}/openai.d.ts +0 -0
- /package/dist/{providers → src/providers}/openai.js +0 -0
- /package/dist/{providers → src/providers}/shared.d.ts +0 -0
- /package/dist/{providers → src/providers}/shared.js +0 -0
- /package/dist/{providers.d.ts → src/providers.d.ts} +0 -0
- /package/dist/{providers.js → src/providers.js} +0 -0
- /package/dist/{suggestions.d.ts → src/suggestions.d.ts} +0 -0
- /package/dist/{suggestions.js → src/suggestions.js} +0 -0
- /package/dist/{types.js → src/types.js} +0 -0
- /package/dist/{web → src/web}/client/favicon.ico +0 -0
- /package/dist/{web → src/web}/client/logo.svg +0 -0
- /package/dist/{web → src/web}/server.d.ts +0 -0
package/src/index.ts
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
import { evaluate as doEvaluate } from './evaluator';
|
|
2
|
-
import { loadApiProviders } from './providers';
|
|
3
1
|
import assertions from './assertions';
|
|
4
2
|
import providers from './providers';
|
|
3
|
+
import telemetry from './telemetry';
|
|
4
|
+
import { evaluate as doEvaluate } from './evaluator';
|
|
5
|
+
import { loadApiProviders } from './providers';
|
|
6
|
+
import { readTests } from './util';
|
|
5
7
|
|
|
6
8
|
import type { EvaluateOptions, TestSuite, TestSuiteConfig } from './types';
|
|
7
|
-
import { readTests } from './util';
|
|
8
9
|
|
|
9
10
|
export * from './types';
|
|
10
11
|
|
|
@@ -24,7 +25,9 @@ async function evaluate(testSuite: EvaluateTestSuite, options: EvaluateOptions =
|
|
|
24
25
|
display: promptContent,
|
|
25
26
|
})),
|
|
26
27
|
};
|
|
27
|
-
|
|
28
|
+
const ret = await doEvaluate(constructedTestSuite, options);
|
|
29
|
+
await telemetry.send();
|
|
30
|
+
return ret;
|
|
28
31
|
}
|
|
29
32
|
|
|
30
33
|
module.exports = {
|
package/src/main.ts
CHANGED
|
@@ -6,6 +6,7 @@ import Table from 'cli-table3';
|
|
|
6
6
|
import chalk from 'chalk';
|
|
7
7
|
import { Command } from 'commander';
|
|
8
8
|
|
|
9
|
+
import telemetry from './telemetry';
|
|
9
10
|
import logger, { setLogLevel } from './logger';
|
|
10
11
|
import { loadApiProvider, loadApiProviders } from './providers';
|
|
11
12
|
import { evaluate } from './evaluator';
|
|
@@ -17,9 +18,11 @@ import {
|
|
|
17
18
|
writeLatestResults,
|
|
18
19
|
writeOutput,
|
|
19
20
|
} from './util';
|
|
21
|
+
import { DEFAULT_README, DEFAULT_YAML_CONFIG, DEFAULT_PROMPTS } from './onboarding';
|
|
22
|
+
import { disableCache } from './cache';
|
|
20
23
|
import { getDirectory } from './esm';
|
|
21
24
|
import { init } from './web/server';
|
|
22
|
-
import {
|
|
25
|
+
import { checkForUpdates } from './updates';
|
|
23
26
|
|
|
24
27
|
import type {
|
|
25
28
|
CommandLineOptions,
|
|
@@ -28,7 +31,6 @@ import type {
|
|
|
28
31
|
TestSuite,
|
|
29
32
|
UnifiedConfig,
|
|
30
33
|
} from './types';
|
|
31
|
-
import { DEFAULT_README, DEFAULT_YAML_CONFIG, DEFAULT_PROMPTS } from './onboarding';
|
|
32
34
|
|
|
33
35
|
function createDummyFiles(directory: string | null) {
|
|
34
36
|
if (directory) {
|
|
@@ -60,6 +62,8 @@ function createDummyFiles(directory: string | null) {
|
|
|
60
62
|
}
|
|
61
63
|
|
|
62
64
|
async function main() {
|
|
65
|
+
await checkForUpdates();
|
|
66
|
+
|
|
63
67
|
const pwd = process.cwd();
|
|
64
68
|
const potentialPaths = [
|
|
65
69
|
pathJoin(pwd, 'promptfooconfig.js'),
|
|
@@ -68,7 +72,7 @@ async function main() {
|
|
|
68
72
|
];
|
|
69
73
|
let config: Partial<UnifiedConfig> = {};
|
|
70
74
|
for (const path of potentialPaths) {
|
|
71
|
-
const maybeConfig = maybeReadConfig(path);
|
|
75
|
+
const maybeConfig = await maybeReadConfig(path);
|
|
72
76
|
if (maybeConfig) {
|
|
73
77
|
config = maybeConfig;
|
|
74
78
|
break;
|
|
@@ -95,15 +99,23 @@ async function main() {
|
|
|
95
99
|
program
|
|
96
100
|
.command('init [directory]')
|
|
97
101
|
.description('Initialize project with dummy files')
|
|
98
|
-
.action((directory: string | null) => {
|
|
102
|
+
.action(async (directory: string | null) => {
|
|
99
103
|
createDummyFiles(directory);
|
|
104
|
+
telemetry.record('command_used', {
|
|
105
|
+
name: 'init',
|
|
106
|
+
});
|
|
107
|
+
await telemetry.send();
|
|
100
108
|
});
|
|
101
109
|
|
|
102
110
|
program
|
|
103
111
|
.command('view')
|
|
104
112
|
.description('Start browser ui')
|
|
105
113
|
.option('-p, --port <number>', 'Port number', '15500')
|
|
106
|
-
.action((cmdObj: { port: number } & Command) => {
|
|
114
|
+
.action(async (cmdObj: { port: number } & Command) => {
|
|
115
|
+
telemetry.record('command_used', {
|
|
116
|
+
name: 'view',
|
|
117
|
+
});
|
|
118
|
+
await telemetry.send();
|
|
107
119
|
init(cmdObj.port);
|
|
108
120
|
});
|
|
109
121
|
|
|
@@ -154,8 +166,16 @@ async function main() {
|
|
|
154
166
|
'This suffix is append to every prompt',
|
|
155
167
|
config.defaultTest?.options?.suffix,
|
|
156
168
|
)
|
|
157
|
-
.option(
|
|
158
|
-
|
|
169
|
+
.option(
|
|
170
|
+
'--no-write',
|
|
171
|
+
'Do not write results to promptfoo directory',
|
|
172
|
+
config?.commandLineOptions?.write,
|
|
173
|
+
)
|
|
174
|
+
.option(
|
|
175
|
+
'--no-cache',
|
|
176
|
+
'Do not read or write results to disk cache',
|
|
177
|
+
config?.commandLineOptions?.cache,
|
|
178
|
+
)
|
|
159
179
|
.option('--grader', 'Model that will grade outputs', config?.commandLineOptions?.grader)
|
|
160
180
|
.option('--verbose', 'Show debug logs', config?.commandLineOptions?.verbose)
|
|
161
181
|
.option('--view [port]', 'View in browser ui')
|
|
@@ -172,7 +192,7 @@ async function main() {
|
|
|
172
192
|
const maxConcurrency = parseInt(cmdObj.maxConcurrency || '', 10);
|
|
173
193
|
const configPath = cmdObj.config;
|
|
174
194
|
if (configPath) {
|
|
175
|
-
config = readConfig(configPath);
|
|
195
|
+
config = await readConfig(configPath);
|
|
176
196
|
} else {
|
|
177
197
|
config = {
|
|
178
198
|
prompts: cmdObj.prompts || config.prompts,
|
|
@@ -256,8 +276,9 @@ async function main() {
|
|
|
256
276
|
},
|
|
257
277
|
});
|
|
258
278
|
// Skip first row (header) and add the rest. Color PASS/FAIL
|
|
259
|
-
for (const row of summary.table.body) {
|
|
279
|
+
for (const row of summary.table.body.slice(0, 25)) {
|
|
260
280
|
table.push([
|
|
281
|
+
...row.vars,
|
|
261
282
|
...row.outputs.map((col) => {
|
|
262
283
|
const tableCellMaxLength = parseInt(cmdObj.tableCellMaxLength || '', 10);
|
|
263
284
|
if (!isNaN(tableCellMaxLength) && col.length > tableCellMaxLength) {
|
|
@@ -275,18 +296,29 @@ async function main() {
|
|
|
275
296
|
}
|
|
276
297
|
return col;
|
|
277
298
|
}),
|
|
278
|
-
...row.vars,
|
|
279
299
|
]);
|
|
280
300
|
}
|
|
281
301
|
|
|
282
302
|
logger.info('\n' + table.toString());
|
|
303
|
+
if (summary.table.body.length > 25) {
|
|
304
|
+
const rowsLeft = summary.table.body.length - 25;
|
|
305
|
+
logger.info(`... ${rowsLeft} more row${rowsLeft === 1 ? '' : 's'} not shown ...\n`);
|
|
306
|
+
}
|
|
283
307
|
}
|
|
308
|
+
|
|
309
|
+
const border = '='.repeat(process.stdout.columns - 10);
|
|
310
|
+
logger.info(border);
|
|
284
311
|
if (cmdObj.view || !cmdObj.write) {
|
|
285
|
-
logger.info('Evaluation complete
|
|
312
|
+
logger.info(`${chalk.green('✔')} Evaluation complete`);
|
|
286
313
|
} else {
|
|
287
|
-
writeLatestResults(summary);
|
|
288
|
-
logger.info(
|
|
314
|
+
writeLatestResults(summary, config);
|
|
315
|
+
logger.info(
|
|
316
|
+
`${chalk.green('✔')} Evaluation complete. To use web viewer, run ${chalk.green(
|
|
317
|
+
'promptfoo view',
|
|
318
|
+
)}`,
|
|
319
|
+
);
|
|
289
320
|
}
|
|
321
|
+
logger.info(border);
|
|
290
322
|
logger.info(chalk.green.bold(`Successes: ${summary.stats.successes}`));
|
|
291
323
|
logger.info(chalk.red.bold(`Failures: ${summary.stats.failures}`));
|
|
292
324
|
logger.info(
|
|
@@ -294,6 +326,11 @@ async function main() {
|
|
|
294
326
|
);
|
|
295
327
|
logger.info('Done.');
|
|
296
328
|
|
|
329
|
+
telemetry.record('command_used', {
|
|
330
|
+
name: 'eval',
|
|
331
|
+
});
|
|
332
|
+
await telemetry.send();
|
|
333
|
+
|
|
297
334
|
if (cmdObj.view) {
|
|
298
335
|
init(parseInt(cmdObj.view, 10) || 15500);
|
|
299
336
|
}
|
package/src/telemetry.ts
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import packageJson from '../package.json';
|
|
2
|
+
import { fetchWithTimeout } from './util';
|
|
3
|
+
|
|
4
|
+
type TelemetryEvent = {
|
|
5
|
+
event: string;
|
|
6
|
+
packageVersion: string;
|
|
7
|
+
properties: Record<string, string | number>;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
type TelemetryEventTypes = 'eval_ran' | 'assertion_used' | 'command_used';
|
|
11
|
+
|
|
12
|
+
const TELEMETRY_ENDPOINT = 'https://api.promptfoo.dev/telemetry';
|
|
13
|
+
|
|
14
|
+
const TELEMETRY_TIMEOUT_MS = 1000;
|
|
15
|
+
|
|
16
|
+
export class Telemetry {
|
|
17
|
+
private events: TelemetryEvent[] = [];
|
|
18
|
+
|
|
19
|
+
get disabled() {
|
|
20
|
+
return process.env.PROMPTFOO_DISABLE_TELEMETRY === '1';
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
record(eventName: TelemetryEventTypes, properties: Record<string, string | number>): void {
|
|
24
|
+
if (!this.disabled) {
|
|
25
|
+
this.events.push({
|
|
26
|
+
event: eventName,
|
|
27
|
+
packageVersion: packageJson.version,
|
|
28
|
+
properties,
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async send(): Promise<void> {
|
|
34
|
+
if (!this.disabled && this.events.length > 0) {
|
|
35
|
+
try {
|
|
36
|
+
const response = await fetchWithTimeout(
|
|
37
|
+
TELEMETRY_ENDPOINT,
|
|
38
|
+
{
|
|
39
|
+
method: 'POST',
|
|
40
|
+
headers: {
|
|
41
|
+
'Content-Type': 'application/json',
|
|
42
|
+
},
|
|
43
|
+
body: JSON.stringify(this.events),
|
|
44
|
+
},
|
|
45
|
+
TELEMETRY_TIMEOUT_MS,
|
|
46
|
+
);
|
|
47
|
+
|
|
48
|
+
if (response.ok) {
|
|
49
|
+
this.events = [];
|
|
50
|
+
}
|
|
51
|
+
} catch (err) {}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const telemetry = new Telemetry();
|
|
57
|
+
export default telemetry;
|
package/src/types.ts
CHANGED
|
@@ -113,13 +113,34 @@ export interface GradingResult {
|
|
|
113
113
|
tokensUsed?: TokenUsage;
|
|
114
114
|
}
|
|
115
115
|
|
|
116
|
+
type BaseAssertionTypes =
|
|
117
|
+
| 'equals'
|
|
118
|
+
| 'contains'
|
|
119
|
+
| 'icontains'
|
|
120
|
+
| 'contains-all'
|
|
121
|
+
| 'contains-any'
|
|
122
|
+
| 'regex'
|
|
123
|
+
| 'is-json'
|
|
124
|
+
| 'contains-json'
|
|
125
|
+
| 'javascript'
|
|
126
|
+
| 'similar'
|
|
127
|
+
| 'llm-rubric'
|
|
128
|
+
| 'webhook'
|
|
129
|
+
| 'rouge-n'
|
|
130
|
+
| 'rouge-s'
|
|
131
|
+
| 'rouge-l';
|
|
132
|
+
|
|
133
|
+
type NotPrefixed<T extends string> = `not-${T}`;
|
|
134
|
+
|
|
135
|
+
export type AssertionType = BaseAssertionTypes | NotPrefixed<BaseAssertionTypes>;
|
|
136
|
+
|
|
116
137
|
// TODO(ian): maybe Assertion should support {type: config} to make the yaml cleaner
|
|
117
138
|
export interface Assertion {
|
|
118
139
|
// Type of assertion
|
|
119
|
-
type:
|
|
140
|
+
type: AssertionType;
|
|
120
141
|
|
|
121
142
|
// The expected value, if applicable
|
|
122
|
-
value?: string;
|
|
143
|
+
value?: string | string[];
|
|
123
144
|
|
|
124
145
|
// The threshold value, only applicable for similarity (cosine distance)
|
|
125
146
|
threshold?: number;
|
package/src/updates.ts
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import chalk from 'chalk';
|
|
2
|
+
import semverGt from 'semver/functions/gt';
|
|
3
|
+
|
|
4
|
+
import logger from './logger';
|
|
5
|
+
import { fetchWithTimeout } from './util';
|
|
6
|
+
import packageJson from '../package.json';
|
|
7
|
+
|
|
8
|
+
const VERSION = packageJson.version;
|
|
9
|
+
|
|
10
|
+
export async function getLatestVersion(packageName: string) {
|
|
11
|
+
const response = await fetchWithTimeout(`https://registry.npmjs.org/${packageName}`, {}, 1000);
|
|
12
|
+
if (!response.ok) {
|
|
13
|
+
throw new Error(`Failed to fetch package information for ${packageName}`);
|
|
14
|
+
}
|
|
15
|
+
const data = await response.json();
|
|
16
|
+
return data['dist-tags'].latest;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export async function checkForUpdates(): Promise<boolean> {
|
|
20
|
+
const latestVersion = await getLatestVersion('promptfoo');
|
|
21
|
+
if (semverGt(latestVersion, VERSION)) {
|
|
22
|
+
const border = '='.repeat(process.stdout.columns - 10);
|
|
23
|
+
logger.info(
|
|
24
|
+
`\n${border}
|
|
25
|
+
${chalk.yellow('⚠️')} The current version of promptfoo ${chalk.yellow(
|
|
26
|
+
VERSION,
|
|
27
|
+
)} is lower than the latest available version ${chalk.green(latestVersion)}.
|
|
28
|
+
|
|
29
|
+
Please run ${chalk.green('npx promptfoo@latest')} or ${chalk.green(
|
|
30
|
+
'npm install -g promptfoo@latest',
|
|
31
|
+
)} to update.
|
|
32
|
+
${border}\n`,
|
|
33
|
+
);
|
|
34
|
+
return true;
|
|
35
|
+
}
|
|
36
|
+
return false;
|
|
37
|
+
}
|
package/src/util.ts
CHANGED
|
@@ -2,6 +2,7 @@ import * as fs from 'fs';
|
|
|
2
2
|
import * as path from 'node:path';
|
|
3
3
|
import * as os from 'node:os';
|
|
4
4
|
|
|
5
|
+
import $RefParser from '@apidevtools/json-schema-ref-parser';
|
|
5
6
|
import fetch from 'node-fetch';
|
|
6
7
|
import yaml from 'js-yaml';
|
|
7
8
|
import nunjucks from 'nunjucks';
|
|
@@ -15,7 +16,15 @@ import { getDirectory } from './esm';
|
|
|
15
16
|
|
|
16
17
|
import type { RequestInfo, RequestInit, Response } from 'node-fetch';
|
|
17
18
|
|
|
18
|
-
import type {
|
|
19
|
+
import type {
|
|
20
|
+
Assertion,
|
|
21
|
+
CsvRow,
|
|
22
|
+
EvaluateSummary,
|
|
23
|
+
UnifiedConfig,
|
|
24
|
+
TestCase,
|
|
25
|
+
Prompt,
|
|
26
|
+
TestSuite,
|
|
27
|
+
} from './types';
|
|
19
28
|
import { assertionFromString } from './assertions';
|
|
20
29
|
|
|
21
30
|
const PROMPT_DELIMITER = '---';
|
|
@@ -28,14 +37,14 @@ function parseJson(json: string): any | undefined {
|
|
|
28
37
|
}
|
|
29
38
|
}
|
|
30
39
|
|
|
31
|
-
export function maybeReadConfig(configPath: string): UnifiedConfig | undefined {
|
|
40
|
+
export async function maybeReadConfig(configPath: string): Promise<UnifiedConfig | undefined> {
|
|
32
41
|
if (!fs.existsSync(configPath)) {
|
|
33
42
|
return undefined;
|
|
34
43
|
}
|
|
35
44
|
return readConfig(configPath);
|
|
36
45
|
}
|
|
37
46
|
|
|
38
|
-
export function readConfig(configPath: string): UnifiedConfig {
|
|
47
|
+
export async function readConfig(configPath: string): Promise<UnifiedConfig> {
|
|
39
48
|
const ext = path.parse(configPath).ext;
|
|
40
49
|
switch (ext) {
|
|
41
50
|
case '.json':
|
|
@@ -45,7 +54,9 @@ export function readConfig(configPath: string): UnifiedConfig {
|
|
|
45
54
|
return require(configPath) as UnifiedConfig;
|
|
46
55
|
case '.yaml':
|
|
47
56
|
case '.yml':
|
|
48
|
-
|
|
57
|
+
let ret = yaml.load(fs.readFileSync(configPath, 'utf-8')) as UnifiedConfig;
|
|
58
|
+
ret = (await $RefParser.dereference(ret)) as UnifiedConfig;
|
|
59
|
+
return ret;
|
|
49
60
|
default:
|
|
50
61
|
throw new Error(`Unsupported configuration file format: ${ext}`);
|
|
51
62
|
}
|
|
@@ -235,11 +246,22 @@ export function getLatestResultsPath(): string {
|
|
|
235
246
|
return path.join(getConfigDirectoryPath(), 'output', 'latest.json');
|
|
236
247
|
}
|
|
237
248
|
|
|
238
|
-
export function writeLatestResults(results: EvaluateSummary) {
|
|
249
|
+
export function writeLatestResults(results: EvaluateSummary, config: Partial<UnifiedConfig>) {
|
|
239
250
|
const latestResultsPath = getLatestResultsPath();
|
|
240
251
|
try {
|
|
241
252
|
fs.mkdirSync(path.dirname(latestResultsPath), { recursive: true });
|
|
242
|
-
fs.writeFileSync(
|
|
253
|
+
fs.writeFileSync(
|
|
254
|
+
latestResultsPath,
|
|
255
|
+
JSON.stringify(
|
|
256
|
+
{
|
|
257
|
+
version: 1,
|
|
258
|
+
config,
|
|
259
|
+
results,
|
|
260
|
+
},
|
|
261
|
+
null,
|
|
262
|
+
2,
|
|
263
|
+
),
|
|
264
|
+
);
|
|
243
265
|
} catch (err) {
|
|
244
266
|
logger.error(`Failed to write latest results to ${latestResultsPath}:\n${err}`);
|
|
245
267
|
}
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
"@mui/icons-material": "^5.11.16",
|
|
14
14
|
"@mui/material": "^5.13.0",
|
|
15
15
|
"@tanstack/react-table": "^8.9.1",
|
|
16
|
+
"js-yaml": "^4.1.0",
|
|
16
17
|
"react": "^18.2.0",
|
|
17
18
|
"react-dnd": "^16.0.1",
|
|
18
19
|
"react-dnd-html5-backend": "^16.0.1",
|
|
@@ -1652,8 +1653,7 @@
|
|
|
1652
1653
|
"node_modules/argparse": {
|
|
1653
1654
|
"version": "2.0.1",
|
|
1654
1655
|
"resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
|
|
1655
|
-
"integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="
|
|
1656
|
-
"dev": true
|
|
1656
|
+
"integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="
|
|
1657
1657
|
},
|
|
1658
1658
|
"node_modules/array-union": {
|
|
1659
1659
|
"version": "2.1.0",
|
|
@@ -2512,7 +2512,6 @@
|
|
|
2512
2512
|
"version": "4.1.0",
|
|
2513
2513
|
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
|
|
2514
2514
|
"integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
|
|
2515
|
-
"dev": true,
|
|
2516
2515
|
"dependencies": {
|
|
2517
2516
|
"argparse": "^2.0.1"
|
|
2518
2517
|
},
|
|
@@ -4436,8 +4435,7 @@
|
|
|
4436
4435
|
"argparse": {
|
|
4437
4436
|
"version": "2.0.1",
|
|
4438
4437
|
"resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
|
|
4439
|
-
"integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="
|
|
4440
|
-
"dev": true
|
|
4438
|
+
"integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="
|
|
4441
4439
|
},
|
|
4442
4440
|
"array-union": {
|
|
4443
4441
|
"version": "2.1.0",
|
|
@@ -5102,7 +5100,6 @@
|
|
|
5102
5100
|
"version": "4.1.0",
|
|
5103
5101
|
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
|
|
5104
5102
|
"integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
|
|
5105
|
-
"dev": true,
|
|
5106
5103
|
"requires": {
|
|
5107
5104
|
"argparse": "^2.0.1"
|
|
5108
5105
|
}
|
|
@@ -11,8 +11,9 @@ import { useStore } from './store.js';
|
|
|
11
11
|
import './App.css';
|
|
12
12
|
|
|
13
13
|
function App() {
|
|
14
|
-
const { table, setTable } = useStore();
|
|
14
|
+
const { table, setTable, setConfig } = useStore();
|
|
15
15
|
const [loaded, setLoaded] = React.useState<boolean>(false);
|
|
16
|
+
const loadedFromApi = React.useRef(false);
|
|
16
17
|
|
|
17
18
|
const prefersDarkMode = useMediaQuery('(prefers-color-scheme: dark)');
|
|
18
19
|
const [darkMode, setDarkMode] = React.useState(prefersDarkMode);
|
|
@@ -37,24 +38,43 @@ function App() {
|
|
|
37
38
|
};
|
|
38
39
|
|
|
39
40
|
React.useEffect(() => {
|
|
40
|
-
|
|
41
|
+
const fetchEvalData = async (id: string) => {
|
|
42
|
+
if (loadedFromApi.current) {
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
loadedFromApi.current = true;
|
|
46
|
+
const response = await fetch(`https://api.promptfoo.dev/eval/${id}`);
|
|
47
|
+
const body = await response.json();
|
|
48
|
+
setTable(body.data.results.table);
|
|
49
|
+
setConfig(body.data.config);
|
|
50
|
+
setLoaded(true);
|
|
51
|
+
};
|
|
52
|
+
|
|
41
53
|
const socket = SocketIOClient(`http://localhost:15500`);
|
|
42
54
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
}
|
|
55
|
+
const pathMatch = window.location.pathname.match(/\/eval\/([\w:-]+)/);
|
|
56
|
+
if (pathMatch) {
|
|
57
|
+
const id = pathMatch[1];
|
|
58
|
+
fetchEvalData(id);
|
|
59
|
+
} else {
|
|
60
|
+
socket.on('init', (data) => {
|
|
61
|
+
console.log('Initialized socket connection', data);
|
|
62
|
+
setLoaded(true);
|
|
63
|
+
setTable(data.results.table);
|
|
64
|
+
setConfig(data.config);
|
|
65
|
+
});
|
|
48
66
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
67
|
+
socket.on('update', (data) => {
|
|
68
|
+
console.log('Received data update', data);
|
|
69
|
+
setTable(data.results.table);
|
|
70
|
+
setConfig(data.config);
|
|
71
|
+
});
|
|
72
|
+
}
|
|
53
73
|
|
|
54
74
|
return () => {
|
|
55
75
|
socket.disconnect();
|
|
56
76
|
};
|
|
57
|
-
}, [
|
|
77
|
+
}, [setTable, setConfig]);
|
|
58
78
|
|
|
59
79
|
return (
|
|
60
80
|
<ThemeProvider theme={theme}>
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import React from 'react';
|
|
2
|
+
import Dialog from '@mui/material/Dialog';
|
|
3
|
+
import DialogTitle from '@mui/material/DialogTitle';
|
|
4
|
+
import DialogContent from '@mui/material/DialogContent';
|
|
5
|
+
import DialogActions from '@mui/material/DialogActions';
|
|
6
|
+
import Button from '@mui/material/Button';
|
|
7
|
+
import Typography from '@mui/material/Typography';
|
|
8
|
+
import { useStore } from './store';
|
|
9
|
+
import { IconButton, Box } from '@mui/material';
|
|
10
|
+
import { FileCopy, Check } from '@mui/icons-material';
|
|
11
|
+
|
|
12
|
+
interface ConfigModalProps {
|
|
13
|
+
open: boolean;
|
|
14
|
+
onClose: () => void;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export default function ConfigModal({ open, onClose }: ConfigModalProps) {
|
|
18
|
+
const { config } = useStore();
|
|
19
|
+
const textareaRef = React.useRef<HTMLTextAreaElement>(null);
|
|
20
|
+
const [copied, setCopied] = React.useState(false);
|
|
21
|
+
const [yamlConfig, setYamlConfig] = React.useState('');
|
|
22
|
+
|
|
23
|
+
React.useEffect(() => {
|
|
24
|
+
if (open) {
|
|
25
|
+
(async () => {
|
|
26
|
+
const { default: yaml } = await import('js-yaml');
|
|
27
|
+
setYamlConfig(yaml.dump(config));
|
|
28
|
+
})();
|
|
29
|
+
}
|
|
30
|
+
}, [open, config]);
|
|
31
|
+
|
|
32
|
+
const handleCopyClick = () => {
|
|
33
|
+
if (textareaRef.current) {
|
|
34
|
+
textareaRef.current.select();
|
|
35
|
+
document.execCommand('copy');
|
|
36
|
+
setCopied(true);
|
|
37
|
+
}
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
const handleClose = () => {
|
|
41
|
+
setCopied(false);
|
|
42
|
+
onClose();
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
return (
|
|
46
|
+
<Dialog
|
|
47
|
+
open={open}
|
|
48
|
+
onClose={handleClose}
|
|
49
|
+
aria-labelledby="config-dialog-title"
|
|
50
|
+
maxWidth="md"
|
|
51
|
+
fullWidth
|
|
52
|
+
>
|
|
53
|
+
<DialogTitle id="config-dialog-title">
|
|
54
|
+
<Box display="flex" justifyContent="space-between" alignItems="center">
|
|
55
|
+
<Typography variant="h6">Config</Typography>
|
|
56
|
+
<IconButton onClick={handleCopyClick}>{copied ? <Check /> : <FileCopy />}</IconButton>
|
|
57
|
+
</Box>
|
|
58
|
+
</DialogTitle>
|
|
59
|
+
<DialogContent>
|
|
60
|
+
<Typography variant="body1" component="div">
|
|
61
|
+
<textarea
|
|
62
|
+
ref={textareaRef}
|
|
63
|
+
readOnly
|
|
64
|
+
value={yamlConfig}
|
|
65
|
+
style={{
|
|
66
|
+
width: '100%',
|
|
67
|
+
minHeight: '400px',
|
|
68
|
+
fontFamily: 'monospace',
|
|
69
|
+
border: '1px solid #ccc',
|
|
70
|
+
}}
|
|
71
|
+
/>
|
|
72
|
+
</Typography>
|
|
73
|
+
</DialogContent>
|
|
74
|
+
<DialogActions>
|
|
75
|
+
<Button onClick={handleClose} color="primary">
|
|
76
|
+
Close
|
|
77
|
+
</Button>
|
|
78
|
+
</DialogActions>
|
|
79
|
+
</Dialog>
|
|
80
|
+
);
|
|
81
|
+
}
|
|
@@ -40,7 +40,6 @@ td,
|
|
|
40
40
|
.td {
|
|
41
41
|
position: relative;
|
|
42
42
|
box-shadow: inset 0 0 0 1px var(--border-color);
|
|
43
|
-
word-break: break-all;
|
|
44
43
|
vertical-align: top;
|
|
45
44
|
|
|
46
45
|
padding: 1.5rem;
|
|
@@ -50,11 +49,11 @@ th.variable,
|
|
|
50
49
|
.th.variable,
|
|
51
50
|
td.variable,
|
|
52
51
|
.td.variable {
|
|
53
|
-
background-color:
|
|
52
|
+
background-color: var(--variable-background-color);
|
|
54
53
|
}
|
|
55
54
|
|
|
56
55
|
tr.header {
|
|
57
|
-
background-color:
|
|
56
|
+
background-color: var(--header-background-color);
|
|
58
57
|
}
|
|
59
58
|
|
|
60
59
|
th,
|
|
@@ -62,7 +61,7 @@ th,
|
|
|
62
61
|
padding: 1rem;
|
|
63
62
|
position: relative;
|
|
64
63
|
text-align: center;
|
|
65
|
-
|
|
64
|
+
vertical-align: bottom;
|
|
66
65
|
}
|
|
67
66
|
|
|
68
67
|
tr .cell {
|
|
@@ -72,7 +71,7 @@ tr .cell-rating {
|
|
|
72
71
|
visibility: hidden;
|
|
73
72
|
position: absolute;
|
|
74
73
|
bottom: 1.25rem;
|
|
75
|
-
right:
|
|
74
|
+
right: 0;
|
|
76
75
|
line-height: 0;
|
|
77
76
|
font-size: 1.75rem;
|
|
78
77
|
}
|
|
@@ -83,7 +82,10 @@ tr:hover .cell-rating {
|
|
|
83
82
|
|
|
84
83
|
tr .cell-rating .rating {
|
|
85
84
|
cursor: pointer;
|
|
86
|
-
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
tr .cell-rating .rating:first-child {
|
|
88
|
+
margin-right: 0.5rem;
|
|
87
89
|
}
|
|
88
90
|
|
|
89
91
|
th .smalltext {
|
|
@@ -97,6 +99,16 @@ th:hover .smalltext {
|
|
|
97
99
|
visibility: visible;
|
|
98
100
|
}
|
|
99
101
|
|
|
102
|
+
th .summary {
|
|
103
|
+
font-weight: normal;
|
|
104
|
+
font-size: 0.8rem;
|
|
105
|
+
padding: 0.25rem;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
th .summary.highlight {
|
|
109
|
+
background-color: var(--success-background-color);
|
|
110
|
+
}
|
|
111
|
+
|
|
100
112
|
td,
|
|
101
113
|
.td {
|
|
102
114
|
}
|