promptfoo 0.11.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package.json +87 -0
- package/dist/src/__mocks__/esm.d.ts.map +1 -0
- package/dist/src/__mocks__/esm.js.map +1 -0
- package/dist/src/assertions.d.ts.map +1 -0
- package/dist/{assertions.js → src/assertions.js} +19 -20
- package/dist/src/assertions.js.map +1 -0
- package/dist/src/cache.d.ts.map +1 -0
- package/dist/{cache.js → src/cache.js} +1 -1
- package/dist/src/cache.js.map +1 -0
- package/dist/src/esm.d.ts.map +1 -0
- package/dist/src/esm.js.map +1 -0
- package/dist/src/evaluator.d.ts.map +1 -0
- package/dist/{evaluator.js → src/evaluator.js} +28 -25
- package/dist/src/evaluator.js.map +1 -0
- package/dist/{index.d.ts → src/index.d.ts} +4 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/{index.js → src/index.js} +15 -11
- package/dist/src/index.js.map +1 -0
- package/dist/src/logger.d.ts +12 -0
- package/dist/src/logger.d.ts.map +1 -0
- package/dist/{logger.js → src/logger.js} +13 -5
- package/dist/src/logger.js.map +1 -0
- package/dist/src/main.d.ts.map +1 -0
- package/dist/{main.js → src/main.js} +30 -47
- package/dist/src/main.js.map +1 -0
- package/dist/{onboarding.d.ts → src/onboarding.d.ts} +1 -1
- package/dist/src/onboarding.d.ts.map +1 -0
- package/dist/{onboarding.js → src/onboarding.js} +6 -0
- package/dist/src/onboarding.js.map +1 -0
- package/dist/src/prompts.d.ts.map +1 -0
- package/dist/src/prompts.js.map +1 -0
- package/dist/src/providers/localai.d.ts.map +1 -0
- package/dist/src/providers/localai.js.map +1 -0
- package/dist/{providers → src/providers}/openai.d.ts +11 -3
- package/dist/src/providers/openai.d.ts.map +1 -0
- package/dist/{providers → src/providers}/openai.js +17 -5
- package/dist/src/providers/openai.js.map +1 -0
- package/dist/src/providers/shared.d.ts.map +1 -0
- package/dist/src/providers/shared.js.map +1 -0
- package/dist/{providers.d.ts → src/providers.d.ts} +3 -3
- package/dist/src/providers.d.ts.map +1 -0
- package/dist/{providers.js → src/providers.js} +16 -7
- package/dist/src/providers.js.map +1 -0
- package/dist/src/suggestions.d.ts.map +1 -0
- package/dist/src/suggestions.js.map +1 -0
- package/dist/src/table.d.ts +4 -0
- package/dist/src/table.d.ts.map +1 -0
- package/dist/src/table.js +48 -0
- package/dist/src/table.js.map +1 -0
- package/dist/src/telemetry.d.ts +10 -0
- package/dist/src/telemetry.d.ts.map +1 -0
- package/dist/src/telemetry.js +48 -0
- package/dist/src/telemetry.js.map +1 -0
- package/dist/{types.d.ts → src/types.d.ts} +7 -1
- package/dist/src/types.d.ts.map +1 -0
- package/dist/src/types.js.map +1 -0
- package/dist/src/updates.d.ts +3 -0
- package/dist/src/updates.d.ts.map +1 -0
- package/dist/src/updates.js +36 -0
- package/dist/src/updates.js.map +1 -0
- package/dist/{util.d.ts → src/util.d.ts} +1 -1
- package/dist/src/util.d.ts.map +1 -0
- package/dist/{util.js → src/util.js} +15 -2
- package/dist/src/util.js.map +1 -0
- package/dist/src/web/client/assets/index-0f6d6b29.js +199 -0
- package/dist/src/web/client/assets/js-yaml-8bbf9398.js +32 -0
- package/dist/{web → src/web}/client/index.html +1 -1
- package/dist/src/web/server.d.ts.map +1 -0
- package/dist/{web → src/web}/server.js +3 -4
- package/dist/src/web/server.js.map +1 -0
- package/package.json +11 -9
- package/src/assertions.ts +18 -23
- package/src/cache.ts +1 -1
- package/src/evaluator.ts +38 -34
- package/src/index.ts +10 -10
- package/src/logger.ts +13 -5
- package/src/main.ts +38 -48
- package/src/onboarding.ts +6 -0
- package/src/providers/openai.ts +32 -6
- package/src/providers.ts +32 -9
- package/src/table.ts +41 -0
- package/src/telemetry.ts +57 -0
- package/src/types.ts +10 -1
- package/src/updates.ts +37 -0
- package/src/util.ts +34 -4
- package/src/web/client/package.json +1 -0
- package/src/web/client/src/App.tsx +36 -12
- package/src/web/client/src/ConfigModal.tsx +81 -0
- package/src/web/client/src/ResultsView.tsx +76 -3
- package/src/web/client/src/ShareModal.tsx +70 -0
- package/src/web/client/src/store.ts +6 -1
- package/src/web/client/src/types.ts +2 -0
- package/src/web/server.ts +3 -7
- package/dist/__mocks__/esm.d.ts.map +0 -1
- package/dist/__mocks__/esm.js.map +0 -1
- package/dist/assertions.d.ts.map +0 -1
- package/dist/assertions.js.map +0 -1
- package/dist/cache.d.ts.map +0 -1
- package/dist/cache.js.map +0 -1
- package/dist/esm.d.ts.map +0 -1
- package/dist/esm.js.map +0 -1
- package/dist/evaluator.d.ts.map +0 -1
- package/dist/evaluator.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/logger.d.ts +0 -11
- package/dist/logger.d.ts.map +0 -1
- package/dist/logger.js.map +0 -1
- package/dist/main.d.ts.map +0 -1
- package/dist/main.js.map +0 -1
- package/dist/onboarding.d.ts.map +0 -1
- package/dist/onboarding.js.map +0 -1
- package/dist/prompts.d.ts.map +0 -1
- package/dist/prompts.js.map +0 -1
- package/dist/providers/localai.d.ts.map +0 -1
- package/dist/providers/localai.js.map +0 -1
- package/dist/providers/openai.d.ts.map +0 -1
- package/dist/providers/openai.js.map +0 -1
- package/dist/providers/shared.d.ts.map +0 -1
- package/dist/providers/shared.js.map +0 -1
- package/dist/providers.d.ts.map +0 -1
- package/dist/providers.js.map +0 -1
- package/dist/suggestions.d.ts.map +0 -1
- package/dist/suggestions.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
- package/dist/util.d.ts.map +0 -1
- package/dist/util.js.map +0 -1
- package/dist/web/client/assets/index-15dfcd18.js +0 -172
- package/dist/web/server.d.ts.map +0 -1
- package/dist/web/server.js.map +0 -1
- package/src/web/client/package-lock.json +0 -5729
- /package/dist/{__mocks__ → src/__mocks__}/esm.d.ts +0 -0
- /package/dist/{__mocks__ → src/__mocks__}/esm.js +0 -0
- /package/dist/{assertions.d.ts → src/assertions.d.ts} +0 -0
- /package/dist/{cache.d.ts → src/cache.d.ts} +0 -0
- /package/dist/{esm.d.ts → src/esm.d.ts} +0 -0
- /package/dist/{esm.js → src/esm.js} +0 -0
- /package/dist/{evaluator.d.ts → src/evaluator.d.ts} +0 -0
- /package/dist/{main.d.ts → src/main.d.ts} +0 -0
- /package/dist/{prompts.d.ts → src/prompts.d.ts} +0 -0
- /package/dist/{prompts.js → src/prompts.js} +0 -0
- /package/dist/{providers → src/providers}/localai.d.ts +0 -0
- /package/dist/{providers → src/providers}/localai.js +0 -0
- /package/dist/{providers → src/providers}/shared.d.ts +0 -0
- /package/dist/{providers → src/providers}/shared.js +0 -0
- /package/dist/{suggestions.d.ts → src/suggestions.d.ts} +0 -0
- /package/dist/{suggestions.js → src/suggestions.js} +0 -0
- /package/dist/{tableOutput.html → src/tableOutput.html} +0 -0
- /package/dist/{types.js → src/types.js} +0 -0
- /package/dist/{web → src/web}/client/assets/index-87905193.css +0 -0
- /package/dist/{web → src/web}/client/favicon.ico +0 -0
- /package/dist/{web → src/web}/client/logo.svg +0 -0
- /package/dist/{web → src/web}/server.d.ts +0 -0
package/src/main.ts
CHANGED
|
@@ -2,11 +2,11 @@
|
|
|
2
2
|
import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
|
|
3
3
|
import { join as pathJoin } from 'path';
|
|
4
4
|
|
|
5
|
-
import Table from 'cli-table3';
|
|
6
5
|
import chalk from 'chalk';
|
|
7
6
|
import { Command } from 'commander';
|
|
8
7
|
|
|
9
|
-
import
|
|
8
|
+
import telemetry from './telemetry';
|
|
9
|
+
import logger, { getLogLevel, setLogLevel } from './logger';
|
|
10
10
|
import { loadApiProvider, loadApiProviders } from './providers';
|
|
11
11
|
import { evaluate } from './evaluator';
|
|
12
12
|
import {
|
|
@@ -17,9 +17,11 @@ import {
|
|
|
17
17
|
writeLatestResults,
|
|
18
18
|
writeOutput,
|
|
19
19
|
} from './util';
|
|
20
|
+
import { DEFAULT_README, DEFAULT_YAML_CONFIG, DEFAULT_PROMPTS } from './onboarding';
|
|
21
|
+
import { disableCache } from './cache';
|
|
20
22
|
import { getDirectory } from './esm';
|
|
21
23
|
import { init } from './web/server';
|
|
22
|
-
import {
|
|
24
|
+
import { checkForUpdates } from './updates';
|
|
23
25
|
|
|
24
26
|
import type {
|
|
25
27
|
CommandLineOptions,
|
|
@@ -28,7 +30,7 @@ import type {
|
|
|
28
30
|
TestSuite,
|
|
29
31
|
UnifiedConfig,
|
|
30
32
|
} from './types';
|
|
31
|
-
import {
|
|
33
|
+
import { generateTable } from './table';
|
|
32
34
|
|
|
33
35
|
function createDummyFiles(directory: string | null) {
|
|
34
36
|
if (directory) {
|
|
@@ -60,6 +62,8 @@ function createDummyFiles(directory: string | null) {
|
|
|
60
62
|
}
|
|
61
63
|
|
|
62
64
|
async function main() {
|
|
65
|
+
await checkForUpdates();
|
|
66
|
+
|
|
63
67
|
const pwd = process.cwd();
|
|
64
68
|
const potentialPaths = [
|
|
65
69
|
pathJoin(pwd, 'promptfooconfig.js'),
|
|
@@ -95,15 +99,23 @@ async function main() {
|
|
|
95
99
|
program
|
|
96
100
|
.command('init [directory]')
|
|
97
101
|
.description('Initialize project with dummy files')
|
|
98
|
-
.action((directory: string | null) => {
|
|
102
|
+
.action(async (directory: string | null) => {
|
|
99
103
|
createDummyFiles(directory);
|
|
104
|
+
telemetry.record('command_used', {
|
|
105
|
+
name: 'init',
|
|
106
|
+
});
|
|
107
|
+
await telemetry.send();
|
|
100
108
|
});
|
|
101
109
|
|
|
102
110
|
program
|
|
103
111
|
.command('view')
|
|
104
112
|
.description('Start browser ui')
|
|
105
113
|
.option('-p, --port <number>', 'Port number', '15500')
|
|
106
|
-
.action((cmdObj: { port: number } & Command) => {
|
|
114
|
+
.action(async (cmdObj: { port: number } & Command) => {
|
|
115
|
+
telemetry.record('command_used', {
|
|
116
|
+
name: 'view',
|
|
117
|
+
});
|
|
118
|
+
await telemetry.send();
|
|
107
119
|
init(cmdObj.port);
|
|
108
120
|
});
|
|
109
121
|
|
|
@@ -111,10 +123,9 @@ async function main() {
|
|
|
111
123
|
.command('eval')
|
|
112
124
|
.description('Evaluate prompts')
|
|
113
125
|
.requiredOption('-p, --prompts <paths...>', 'Paths to prompt files (.txt)', config.prompts)
|
|
114
|
-
.
|
|
126
|
+
.option(
|
|
115
127
|
'-r, --providers <name or path...>',
|
|
116
128
|
'One of: openai:chat, openai:completion, openai:<model name>, or path to custom API caller module',
|
|
117
|
-
config?.providers,
|
|
118
129
|
)
|
|
119
130
|
.option(
|
|
120
131
|
'-c, --config <path>',
|
|
@@ -231,7 +242,7 @@ async function main() {
|
|
|
231
242
|
};
|
|
232
243
|
|
|
233
244
|
const options: EvaluateOptions = {
|
|
234
|
-
showProgressBar:
|
|
245
|
+
showProgressBar: getLogLevel() !== 'debug',
|
|
235
246
|
maxConcurrency: !isNaN(maxConcurrency) && maxConcurrency > 0 ? maxConcurrency : undefined,
|
|
236
247
|
...evaluateOptions,
|
|
237
248
|
};
|
|
@@ -249,43 +260,9 @@ async function main() {
|
|
|
249
260
|
if (cmdObj.output) {
|
|
250
261
|
logger.info(chalk.yellow(`Writing output to ${cmdObj.output}`));
|
|
251
262
|
writeOutput(cmdObj.output, summary);
|
|
252
|
-
} else {
|
|
263
|
+
} else if (getLogLevel() !== 'debug') {
|
|
253
264
|
// Output table by default
|
|
254
|
-
const
|
|
255
|
-
const head = summary.table.head;
|
|
256
|
-
const headLength = head.prompts.length + head.vars.length;
|
|
257
|
-
const table = new Table({
|
|
258
|
-
head: [...head.prompts, ...head.vars],
|
|
259
|
-
colWidths: Array(headLength).fill(Math.floor(maxWidth / headLength)),
|
|
260
|
-
wordWrap: true,
|
|
261
|
-
wrapOnWordBoundary: false,
|
|
262
|
-
style: {
|
|
263
|
-
head: ['blue', 'bold'],
|
|
264
|
-
},
|
|
265
|
-
});
|
|
266
|
-
// Skip first row (header) and add the rest. Color PASS/FAIL
|
|
267
|
-
for (const row of summary.table.body.slice(0, 25)) {
|
|
268
|
-
table.push([
|
|
269
|
-
...row.vars,
|
|
270
|
-
...row.outputs.map((col) => {
|
|
271
|
-
const tableCellMaxLength = parseInt(cmdObj.tableCellMaxLength || '', 10);
|
|
272
|
-
if (!isNaN(tableCellMaxLength) && col.length > tableCellMaxLength) {
|
|
273
|
-
col = col.slice(0, tableCellMaxLength) + '...';
|
|
274
|
-
}
|
|
275
|
-
if (col.startsWith('[PASS]')) {
|
|
276
|
-
// color '[PASS]' green
|
|
277
|
-
return chalk.green.bold(col.slice(0, 6)) + col.slice(6);
|
|
278
|
-
} else if (col.startsWith('[FAIL]')) {
|
|
279
|
-
// color everything red up until '---'
|
|
280
|
-
return col
|
|
281
|
-
.split('---')
|
|
282
|
-
.map((c, idx) => (idx === 0 ? chalk.red.bold(c) : c))
|
|
283
|
-
.join('---');
|
|
284
|
-
}
|
|
285
|
-
return col;
|
|
286
|
-
}),
|
|
287
|
-
]);
|
|
288
|
-
}
|
|
265
|
+
const table = generateTable(summary, parseInt(cmdObj.tableCellMaxLength || '', 10));
|
|
289
266
|
|
|
290
267
|
logger.info('\n' + table.toString());
|
|
291
268
|
if (summary.table.body.length > 25) {
|
|
@@ -293,12 +270,20 @@ async function main() {
|
|
|
293
270
|
logger.info(`... ${rowsLeft} more row${rowsLeft === 1 ? '' : 's'} not shown ...\n`);
|
|
294
271
|
}
|
|
295
272
|
}
|
|
273
|
+
|
|
274
|
+
const border = '='.repeat(process.stdout.columns - 10);
|
|
275
|
+
logger.info(border);
|
|
296
276
|
if (cmdObj.view || !cmdObj.write) {
|
|
297
|
-
logger.info('Evaluation complete
|
|
277
|
+
logger.info(`${chalk.green('✔')} Evaluation complete`);
|
|
298
278
|
} else {
|
|
299
|
-
writeLatestResults(summary);
|
|
300
|
-
logger.info(
|
|
279
|
+
writeLatestResults(summary, config);
|
|
280
|
+
logger.info(
|
|
281
|
+
`${chalk.green('✔')} Evaluation complete. To use web viewer, run ${chalk.green(
|
|
282
|
+
'promptfoo view',
|
|
283
|
+
)}`,
|
|
284
|
+
);
|
|
301
285
|
}
|
|
286
|
+
logger.info(border);
|
|
302
287
|
logger.info(chalk.green.bold(`Successes: ${summary.stats.successes}`));
|
|
303
288
|
logger.info(chalk.red.bold(`Failures: ${summary.stats.failures}`));
|
|
304
289
|
logger.info(
|
|
@@ -306,6 +291,11 @@ async function main() {
|
|
|
306
291
|
);
|
|
307
292
|
logger.info('Done.');
|
|
308
293
|
|
|
294
|
+
telemetry.record('command_used', {
|
|
295
|
+
name: 'eval',
|
|
296
|
+
});
|
|
297
|
+
await telemetry.send();
|
|
298
|
+
|
|
309
299
|
if (cmdObj.view) {
|
|
310
300
|
init(parseInt(cmdObj.view, 10) || 15500);
|
|
311
301
|
}
|
package/src/onboarding.ts
CHANGED
|
@@ -9,6 +9,12 @@ These prompts are nunjucks templates, so you can use logic like this:
|
|
|
9
9
|
{{ var1 }}
|
|
10
10
|
{% endif %}
|
|
11
11
|
---
|
|
12
|
+
[
|
|
13
|
+
{"role": "system", "content": "Use JSON too for more complex payloads"},
|
|
14
|
+
{"role": "user", "content": "Such as multi-shot prompts"}
|
|
15
|
+
{"role": "user", "content": "Variable substitution still works: {{ var3 }}"}
|
|
16
|
+
]
|
|
17
|
+
---
|
|
12
18
|
If you prefer, you can break prompts into multiple files (make sure to edit promptfooconfig.yaml accordingly)
|
|
13
19
|
`;
|
|
14
20
|
|
package/src/providers/openai.ts
CHANGED
|
@@ -7,7 +7,13 @@ import type { ApiProvider, ProviderEmbeddingResponse, ProviderResponse } from '.
|
|
|
7
7
|
const DEFAULT_OPENAI_HOST = 'api.openai.com';
|
|
8
8
|
|
|
9
9
|
interface OpenAiCompletionOptions {
|
|
10
|
-
temperature
|
|
10
|
+
temperature?: number;
|
|
11
|
+
functions?: {
|
|
12
|
+
name: string;
|
|
13
|
+
description?: string;
|
|
14
|
+
parameters: any;
|
|
15
|
+
}[];
|
|
16
|
+
function_call?: 'none' | 'auto';
|
|
11
17
|
}
|
|
12
18
|
|
|
13
19
|
class OpenAiGenericProvider implements ApiProvider {
|
|
@@ -112,11 +118,14 @@ export class OpenAiCompletionProvider extends OpenAiGenericProvider {
|
|
|
112
118
|
'text-ada-001',
|
|
113
119
|
];
|
|
114
120
|
|
|
115
|
-
|
|
121
|
+
options: OpenAiCompletionOptions;
|
|
122
|
+
|
|
123
|
+
constructor(modelName: string, apiKey?: string, context?: OpenAiCompletionOptions) {
|
|
116
124
|
if (!OpenAiCompletionProvider.OPENAI_COMPLETION_MODELS.includes(modelName)) {
|
|
117
125
|
logger.warn(`Using unknown OpenAI completion model: ${modelName}`);
|
|
118
126
|
}
|
|
119
127
|
super(modelName, apiKey);
|
|
128
|
+
this.options = context || {};
|
|
120
129
|
}
|
|
121
130
|
|
|
122
131
|
async callApi(prompt: string, options?: OpenAiCompletionOptions): Promise<ProviderResponse> {
|
|
@@ -138,7 +147,10 @@ export class OpenAiCompletionProvider extends OpenAiGenericProvider {
|
|
|
138
147
|
model: this.modelName,
|
|
139
148
|
prompt,
|
|
140
149
|
max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
|
|
141
|
-
temperature:
|
|
150
|
+
temperature:
|
|
151
|
+
options?.temperature ??
|
|
152
|
+
this.options.temperature ??
|
|
153
|
+
parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
|
|
142
154
|
stop,
|
|
143
155
|
};
|
|
144
156
|
logger.debug(`Calling OpenAI API: ${JSON.stringify(body)}`);
|
|
@@ -186,17 +198,22 @@ export class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
|
|
|
186
198
|
static OPENAI_CHAT_MODELS = [
|
|
187
199
|
'gpt-4',
|
|
188
200
|
'gpt-4-0314',
|
|
201
|
+
'gpt-4-0613',
|
|
189
202
|
'gpt-4-32k',
|
|
190
203
|
'gpt-4-32k-0314',
|
|
191
204
|
'gpt-3.5-turbo',
|
|
192
205
|
'gpt-3.5-turbo-0301',
|
|
206
|
+
'gpt-3.5-turbo-0613',
|
|
193
207
|
];
|
|
194
208
|
|
|
195
|
-
|
|
209
|
+
options: OpenAiCompletionOptions;
|
|
210
|
+
|
|
211
|
+
constructor(modelName: string, apiKey?: string, context?: OpenAiCompletionOptions) {
|
|
196
212
|
if (!OpenAiChatCompletionProvider.OPENAI_CHAT_MODELS.includes(modelName)) {
|
|
197
213
|
logger.warn(`Using unknown OpenAI chat model: ${modelName}`);
|
|
198
214
|
}
|
|
199
215
|
super(modelName, apiKey);
|
|
216
|
+
this.options = context || {};
|
|
200
217
|
}
|
|
201
218
|
|
|
202
219
|
// TODO(ian): support passing in `messages` directly
|
|
@@ -215,11 +232,17 @@ export class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
|
|
|
215
232
|
} catch (err) {
|
|
216
233
|
messages = [{ role: 'user', content: prompt }];
|
|
217
234
|
}
|
|
235
|
+
|
|
218
236
|
const body = {
|
|
219
237
|
model: this.modelName,
|
|
220
238
|
messages: messages,
|
|
221
239
|
max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
|
|
222
|
-
temperature:
|
|
240
|
+
temperature:
|
|
241
|
+
options?.temperature ??
|
|
242
|
+
this.options.temperature ??
|
|
243
|
+
parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
|
|
244
|
+
functions: options?.functions || this.options.functions || undefined,
|
|
245
|
+
function_call: options?.function_call || this.options.function_call || undefined,
|
|
223
246
|
};
|
|
224
247
|
logger.debug(`Calling OpenAI API: ${JSON.stringify(body)}`);
|
|
225
248
|
|
|
@@ -246,8 +269,11 @@ export class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
|
|
|
246
269
|
|
|
247
270
|
logger.debug(`\tOpenAI API response: ${JSON.stringify(data)}`);
|
|
248
271
|
try {
|
|
272
|
+
const message = data.choices[0].message;
|
|
273
|
+
const output =
|
|
274
|
+
message.content === null ? JSON.stringify(message.function_call) : message.content;
|
|
249
275
|
return {
|
|
250
|
-
output
|
|
276
|
+
output,
|
|
251
277
|
tokenUsage: cached
|
|
252
278
|
? { cached: data.usage.total_tokens }
|
|
253
279
|
: {
|
package/src/providers.ts
CHANGED
|
@@ -1,20 +1,35 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
2
|
|
|
3
|
-
import { ApiProvider } from './types';
|
|
3
|
+
import { ApiProvider, ProviderConfig, ProviderId, RawProviderConfig } from './types';
|
|
4
4
|
|
|
5
5
|
import { OpenAiCompletionProvider, OpenAiChatCompletionProvider } from './providers/openai';
|
|
6
6
|
import { LocalAiCompletionProvider, LocalAiChatProvider } from './providers/localai';
|
|
7
7
|
|
|
8
|
-
export async function loadApiProviders(
|
|
8
|
+
export async function loadApiProviders(
|
|
9
|
+
providerPaths: ProviderId | ProviderId[] | RawProviderConfig[],
|
|
10
|
+
): Promise<ApiProvider[]> {
|
|
9
11
|
if (typeof providerPaths === 'string') {
|
|
10
12
|
return [await loadApiProvider(providerPaths)];
|
|
11
13
|
} else if (Array.isArray(providerPaths)) {
|
|
12
|
-
return Promise.all(
|
|
14
|
+
return Promise.all(
|
|
15
|
+
providerPaths.map((provider) => {
|
|
16
|
+
if (typeof provider === 'string') {
|
|
17
|
+
return loadApiProvider(provider);
|
|
18
|
+
} else {
|
|
19
|
+
const id = Object.keys(provider)[0];
|
|
20
|
+
const context = { ...provider[id], id };
|
|
21
|
+
return loadApiProvider(id, context);
|
|
22
|
+
}
|
|
23
|
+
}),
|
|
24
|
+
);
|
|
13
25
|
}
|
|
14
26
|
throw new Error('Invalid providers list');
|
|
15
27
|
}
|
|
16
28
|
|
|
17
|
-
export async function loadApiProvider(
|
|
29
|
+
export async function loadApiProvider(
|
|
30
|
+
providerPath: string,
|
|
31
|
+
context: ProviderConfig | undefined = undefined,
|
|
32
|
+
): Promise<ApiProvider> {
|
|
18
33
|
if (providerPath?.startsWith('openai:')) {
|
|
19
34
|
// Load OpenAI module
|
|
20
35
|
const options = providerPath.split(':');
|
|
@@ -22,13 +37,21 @@ export async function loadApiProvider(providerPath: string): Promise<ApiProvider
|
|
|
22
37
|
const modelName = options[2];
|
|
23
38
|
|
|
24
39
|
if (modelType === 'chat') {
|
|
25
|
-
return new OpenAiChatCompletionProvider(
|
|
40
|
+
return new OpenAiChatCompletionProvider(
|
|
41
|
+
modelName || 'gpt-3.5-turbo',
|
|
42
|
+
undefined,
|
|
43
|
+
context?.config,
|
|
44
|
+
);
|
|
26
45
|
} else if (modelType === 'completion') {
|
|
27
|
-
return new OpenAiCompletionProvider(
|
|
46
|
+
return new OpenAiCompletionProvider(
|
|
47
|
+
modelName || 'text-davinci-003',
|
|
48
|
+
undefined,
|
|
49
|
+
context?.config,
|
|
50
|
+
);
|
|
28
51
|
} else if (OpenAiChatCompletionProvider.OPENAI_CHAT_MODELS.includes(modelType)) {
|
|
29
|
-
return new OpenAiChatCompletionProvider(modelType);
|
|
52
|
+
return new OpenAiChatCompletionProvider(modelType, undefined, context?.config);
|
|
30
53
|
} else if (OpenAiCompletionProvider.OPENAI_COMPLETION_MODELS.includes(modelType)) {
|
|
31
|
-
return new OpenAiCompletionProvider(modelType);
|
|
54
|
+
return new OpenAiCompletionProvider(modelType, undefined, context?.config);
|
|
32
55
|
} else {
|
|
33
56
|
throw new Error(
|
|
34
57
|
`Unknown OpenAI model type: ${modelType}. Use one of the following providers: openai:chat:<model name>, openai:completion:<model name>`,
|
|
@@ -52,7 +75,7 @@ export async function loadApiProvider(providerPath: string): Promise<ApiProvider
|
|
|
52
75
|
|
|
53
76
|
// Load custom module
|
|
54
77
|
const CustomApiProvider = (await import(path.join(process.cwd(), providerPath))).default;
|
|
55
|
-
return new CustomApiProvider();
|
|
78
|
+
return new CustomApiProvider(context);
|
|
56
79
|
}
|
|
57
80
|
|
|
58
81
|
export default {
|
package/src/table.ts
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import Table from 'cli-table3';
|
|
2
|
+
import chalk from 'chalk';
|
|
3
|
+
import type { EvaluateSummary } from './types';
|
|
4
|
+
|
|
5
|
+
export function generateTable(summary: EvaluateSummary, tableCellMaxLength = 250, maxRows = 25) {
|
|
6
|
+
const maxWidth = process.stdout.columns ? process.stdout.columns - 10 : 120;
|
|
7
|
+
const head = summary.table.head;
|
|
8
|
+
const headLength = head.prompts.length + head.vars.length;
|
|
9
|
+
const table = new Table({
|
|
10
|
+
head: [...head.prompts, ...head.vars],
|
|
11
|
+
colWidths: Array(headLength).fill(Math.floor(maxWidth / headLength)),
|
|
12
|
+
wordWrap: true,
|
|
13
|
+
wrapOnWordBoundary: false,
|
|
14
|
+
style: {
|
|
15
|
+
head: ['blue', 'bold'],
|
|
16
|
+
},
|
|
17
|
+
});
|
|
18
|
+
// Skip first row (header) and add the rest. Color PASS/FAIL
|
|
19
|
+
for (const row of summary.table.body.slice(0, maxRows)) {
|
|
20
|
+
table.push([
|
|
21
|
+
...row.vars,
|
|
22
|
+
...row.outputs.map((col) => {
|
|
23
|
+
if (col.length > tableCellMaxLength) {
|
|
24
|
+
col = col.slice(0, tableCellMaxLength) + '...';
|
|
25
|
+
}
|
|
26
|
+
if (col.startsWith('[PASS]')) {
|
|
27
|
+
// color '[PASS]' green
|
|
28
|
+
return chalk.green.bold(col.slice(0, 6)) + col.slice(6);
|
|
29
|
+
} else if (col.startsWith('[FAIL]')) {
|
|
30
|
+
// color everything red up until '---'
|
|
31
|
+
return col
|
|
32
|
+
.split('---')
|
|
33
|
+
.map((c, idx) => (idx === 0 ? chalk.red.bold(c) : c))
|
|
34
|
+
.join('---');
|
|
35
|
+
}
|
|
36
|
+
return col;
|
|
37
|
+
}),
|
|
38
|
+
]);
|
|
39
|
+
}
|
|
40
|
+
return table;
|
|
41
|
+
}
|
package/src/telemetry.ts
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import packageJson from '../package.json';
|
|
2
|
+
import { fetchWithTimeout } from './util';
|
|
3
|
+
|
|
4
|
+
type TelemetryEvent = {
|
|
5
|
+
event: string;
|
|
6
|
+
packageVersion: string;
|
|
7
|
+
properties: Record<string, string | number>;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
type TelemetryEventTypes = 'eval_ran' | 'assertion_used' | 'command_used';
|
|
11
|
+
|
|
12
|
+
const TELEMETRY_ENDPOINT = 'https://api.promptfoo.dev/telemetry';
|
|
13
|
+
|
|
14
|
+
const TELEMETRY_TIMEOUT_MS = 1000;
|
|
15
|
+
|
|
16
|
+
export class Telemetry {
|
|
17
|
+
private events: TelemetryEvent[] = [];
|
|
18
|
+
|
|
19
|
+
get disabled() {
|
|
20
|
+
return process.env.PROMPTFOO_DISABLE_TELEMETRY === '1';
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
record(eventName: TelemetryEventTypes, properties: Record<string, string | number>): void {
|
|
24
|
+
if (!this.disabled) {
|
|
25
|
+
this.events.push({
|
|
26
|
+
event: eventName,
|
|
27
|
+
packageVersion: packageJson.version,
|
|
28
|
+
properties,
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async send(): Promise<void> {
|
|
34
|
+
if (!this.disabled && this.events.length > 0) {
|
|
35
|
+
try {
|
|
36
|
+
const response = await fetchWithTimeout(
|
|
37
|
+
TELEMETRY_ENDPOINT,
|
|
38
|
+
{
|
|
39
|
+
method: 'POST',
|
|
40
|
+
headers: {
|
|
41
|
+
'Content-Type': 'application/json',
|
|
42
|
+
},
|
|
43
|
+
body: JSON.stringify(this.events),
|
|
44
|
+
},
|
|
45
|
+
TELEMETRY_TIMEOUT_MS,
|
|
46
|
+
);
|
|
47
|
+
|
|
48
|
+
if (response.ok) {
|
|
49
|
+
this.events = [];
|
|
50
|
+
}
|
|
51
|
+
} catch (err) {}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const telemetry = new Telemetry();
|
|
57
|
+
export default telemetry;
|
package/src/types.ts
CHANGED
|
@@ -23,6 +23,11 @@ export interface CommandLineOptions {
|
|
|
23
23
|
promptSuffix?: string;
|
|
24
24
|
}
|
|
25
25
|
|
|
26
|
+
export interface ProviderConfig {
|
|
27
|
+
id: ProviderId;
|
|
28
|
+
config?: any;
|
|
29
|
+
}
|
|
30
|
+
|
|
26
31
|
export interface ApiProvider {
|
|
27
32
|
id: () => string;
|
|
28
33
|
callApi: (prompt: string) => Promise<ProviderResponse>;
|
|
@@ -187,13 +192,17 @@ export interface TestSuite {
|
|
|
187
192
|
defaultTest?: Partial<TestCase>;
|
|
188
193
|
}
|
|
189
194
|
|
|
195
|
+
export type ProviderId = string;
|
|
196
|
+
|
|
197
|
+
export type RawProviderConfig = Record<ProviderId, Omit<ProviderConfig, 'id'>>;
|
|
198
|
+
|
|
190
199
|
// TestSuiteConfig = Test Suite, but before everything is parsed and resolved. Providers are just strings, prompts are filepaths, tests can be filepath or inline.
|
|
191
200
|
export interface TestSuiteConfig {
|
|
192
201
|
// Optional description of what your LLM is trying to do
|
|
193
202
|
description?: string;
|
|
194
203
|
|
|
195
204
|
// One or more LLM APIs to use, for example: openai:gpt-3.5-turbo, openai:gpt-4, localai:chat:vicuna
|
|
196
|
-
providers:
|
|
205
|
+
providers: ProviderId | ProviderId[] | RawProviderConfig[];
|
|
197
206
|
|
|
198
207
|
// One or more prompt files to load
|
|
199
208
|
prompts: string | string[];
|
package/src/updates.ts
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import chalk from 'chalk';
|
|
2
|
+
import semverGt from 'semver/functions/gt';
|
|
3
|
+
|
|
4
|
+
import logger from './logger';
|
|
5
|
+
import { fetchWithTimeout } from './util';
|
|
6
|
+
import packageJson from '../package.json';
|
|
7
|
+
|
|
8
|
+
const VERSION = packageJson.version;
|
|
9
|
+
|
|
10
|
+
export async function getLatestVersion(packageName: string) {
|
|
11
|
+
const response = await fetchWithTimeout(`https://registry.npmjs.org/${packageName}`, {}, 1000);
|
|
12
|
+
if (!response.ok) {
|
|
13
|
+
throw new Error(`Failed to fetch package information for ${packageName}`);
|
|
14
|
+
}
|
|
15
|
+
const data = await response.json();
|
|
16
|
+
return data['dist-tags'].latest;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export async function checkForUpdates(): Promise<boolean> {
|
|
20
|
+
const latestVersion = await getLatestVersion('promptfoo');
|
|
21
|
+
if (semverGt(latestVersion, VERSION)) {
|
|
22
|
+
const border = '='.repeat(process.stdout.columns - 10);
|
|
23
|
+
logger.info(
|
|
24
|
+
`\n${border}
|
|
25
|
+
${chalk.yellow('⚠️')} The current version of promptfoo ${chalk.yellow(
|
|
26
|
+
VERSION,
|
|
27
|
+
)} is lower than the latest available version ${chalk.green(latestVersion)}.
|
|
28
|
+
|
|
29
|
+
Please run ${chalk.green('npx promptfoo@latest')} or ${chalk.green(
|
|
30
|
+
'npm install -g promptfoo@latest',
|
|
31
|
+
)} to update.
|
|
32
|
+
${border}\n`,
|
|
33
|
+
);
|
|
34
|
+
return true;
|
|
35
|
+
}
|
|
36
|
+
return false;
|
|
37
|
+
}
|
package/src/util.ts
CHANGED
|
@@ -16,7 +16,15 @@ import { getDirectory } from './esm';
|
|
|
16
16
|
|
|
17
17
|
import type { RequestInfo, RequestInit, Response } from 'node-fetch';
|
|
18
18
|
|
|
19
|
-
import type {
|
|
19
|
+
import type {
|
|
20
|
+
Assertion,
|
|
21
|
+
CsvRow,
|
|
22
|
+
EvaluateSummary,
|
|
23
|
+
UnifiedConfig,
|
|
24
|
+
TestCase,
|
|
25
|
+
Prompt,
|
|
26
|
+
TestSuite,
|
|
27
|
+
} from './types';
|
|
20
28
|
import { assertionFromString } from './assertions';
|
|
21
29
|
|
|
22
30
|
const PROMPT_DELIMITER = '---';
|
|
@@ -88,11 +96,22 @@ export function readPrompts(
|
|
|
88
96
|
promptContents.push(...fileContents.map((content) => ({ raw: content, display: content })));
|
|
89
97
|
} else {
|
|
90
98
|
const fileContent = fs.readFileSync(promptPath, 'utf-8');
|
|
91
|
-
|
|
99
|
+
|
|
100
|
+
let display: string | undefined;
|
|
92
101
|
if (inputType === PromptInputType.NAMED) {
|
|
93
102
|
display = (promptPathOrGlobs as Record<string, string>)[promptPath];
|
|
94
103
|
} else {
|
|
95
104
|
display = fileContent.length > 200 ? promptPath : fileContent;
|
|
105
|
+
|
|
106
|
+
const ext = path.parse(promptPath).ext;
|
|
107
|
+
if (ext === '.jsonl') {
|
|
108
|
+
// Special case for JSONL file
|
|
109
|
+
const jsonLines = fileContent.split(/\r?\n/).filter((line) => line.length > 0);
|
|
110
|
+
for (const json of jsonLines) {
|
|
111
|
+
promptContents.push({ raw: json, display: json });
|
|
112
|
+
}
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
96
115
|
}
|
|
97
116
|
promptContents.push({ raw: fileContent, display });
|
|
98
117
|
}
|
|
@@ -238,11 +257,22 @@ export function getLatestResultsPath(): string {
|
|
|
238
257
|
return path.join(getConfigDirectoryPath(), 'output', 'latest.json');
|
|
239
258
|
}
|
|
240
259
|
|
|
241
|
-
export function writeLatestResults(results: EvaluateSummary) {
|
|
260
|
+
export function writeLatestResults(results: EvaluateSummary, config: Partial<UnifiedConfig>) {
|
|
242
261
|
const latestResultsPath = getLatestResultsPath();
|
|
243
262
|
try {
|
|
244
263
|
fs.mkdirSync(path.dirname(latestResultsPath), { recursive: true });
|
|
245
|
-
fs.writeFileSync(
|
|
264
|
+
fs.writeFileSync(
|
|
265
|
+
latestResultsPath,
|
|
266
|
+
JSON.stringify(
|
|
267
|
+
{
|
|
268
|
+
version: 1,
|
|
269
|
+
config,
|
|
270
|
+
results,
|
|
271
|
+
},
|
|
272
|
+
null,
|
|
273
|
+
2,
|
|
274
|
+
),
|
|
275
|
+
);
|
|
246
276
|
} catch (err) {
|
|
247
277
|
logger.error(`Failed to write latest results to ${latestResultsPath}:\n${err}`);
|
|
248
278
|
}
|
|
@@ -11,8 +11,9 @@ import { useStore } from './store.js';
|
|
|
11
11
|
import './App.css';
|
|
12
12
|
|
|
13
13
|
function App() {
|
|
14
|
-
const { table, setTable } = useStore();
|
|
14
|
+
const { table, setTable, setConfig } = useStore();
|
|
15
15
|
const [loaded, setLoaded] = React.useState<boolean>(false);
|
|
16
|
+
const loadedFromApi = React.useRef(false);
|
|
16
17
|
|
|
17
18
|
const prefersDarkMode = useMediaQuery('(prefers-color-scheme: dark)');
|
|
18
19
|
const [darkMode, setDarkMode] = React.useState(prefersDarkMode);
|
|
@@ -37,24 +38,47 @@ function App() {
|
|
|
37
38
|
};
|
|
38
39
|
|
|
39
40
|
React.useEffect(() => {
|
|
40
|
-
|
|
41
|
+
const fetchEvalData = async (id: string) => {
|
|
42
|
+
if (loadedFromApi.current) {
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
loadedFromApi.current = true;
|
|
46
|
+
const response = await fetch(`https://api.promptfoo.dev/eval/${id}`);
|
|
47
|
+
const body = await response.json();
|
|
48
|
+
setTable(
|
|
49
|
+
body.data.results?.table ||
|
|
50
|
+
// Backwards compatibility with <= 0.12.0
|
|
51
|
+
body.data.table,
|
|
52
|
+
);
|
|
53
|
+
setConfig(body.data.config);
|
|
54
|
+
setLoaded(true);
|
|
55
|
+
};
|
|
56
|
+
|
|
41
57
|
const socket = SocketIOClient(`http://localhost:15500`);
|
|
42
58
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
}
|
|
59
|
+
const pathMatch = window.location.pathname.match(/\/eval\/([\w:-]+)/);
|
|
60
|
+
if (pathMatch) {
|
|
61
|
+
const id = pathMatch[1];
|
|
62
|
+
fetchEvalData(id);
|
|
63
|
+
} else {
|
|
64
|
+
socket.on('init', (data) => {
|
|
65
|
+
console.log('Initialized socket connection', data);
|
|
66
|
+
setLoaded(true);
|
|
67
|
+
setTable(data.results.table);
|
|
68
|
+
setConfig(data.config);
|
|
69
|
+
});
|
|
48
70
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
71
|
+
socket.on('update', (data) => {
|
|
72
|
+
console.log('Received data update', data);
|
|
73
|
+
setTable(data.results.table);
|
|
74
|
+
setConfig(data.config);
|
|
75
|
+
});
|
|
76
|
+
}
|
|
53
77
|
|
|
54
78
|
return () => {
|
|
55
79
|
socket.disconnect();
|
|
56
80
|
};
|
|
57
|
-
}, [
|
|
81
|
+
}, [setTable, setConfig]);
|
|
58
82
|
|
|
59
83
|
return (
|
|
60
84
|
<ThemeProvider theme={theme}>
|