promptfoo 0.17.6 → 0.17.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package.json +2 -2
- package/dist/src/assertions.js +2 -2
- package/dist/src/assertions.js.map +1 -1
- package/dist/src/evaluator.d.ts.map +1 -1
- package/dist/src/evaluator.js +37 -6
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/main.js +4 -0
- package/dist/src/main.js.map +1 -1
- package/dist/src/providers/azureopenai.d.ts +4 -0
- package/dist/src/providers/azureopenai.d.ts.map +1 -1
- package/dist/src/providers/azureopenai.js +15 -0
- package/dist/src/providers/azureopenai.js.map +1 -1
- package/dist/src/providers/openai.d.ts +5 -0
- package/dist/src/providers/openai.d.ts.map +1 -1
- package/dist/src/providers/openai.js +21 -2
- package/dist/src/providers/openai.js.map +1 -1
- package/dist/src/providers/replicate.d.ts +8 -1
- package/dist/src/providers/replicate.d.ts.map +1 -1
- package/dist/src/providers/replicate.js +9 -6
- package/dist/src/providers/replicate.js.map +1 -1
- package/dist/src/providers/shared.d.ts.map +1 -1
- package/dist/src/providers/shared.js.map +1 -1
- package/dist/src/providers.js +1 -1
- package/dist/src/providers.js.map +1 -1
- package/dist/src/types.d.ts +6 -1
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/util.d.ts +8 -1
- package/dist/src/util.d.ts.map +1 -1
- package/dist/src/util.js +81 -26
- package/dist/src/util.js.map +1 -1
- package/dist/src/web/client/assets/{index-13198388.js → index-0c6f887d.js} +25 -25
- package/dist/src/web/client/index.html +1 -1
- package/dist/src/web/server.d.ts.map +1 -1
- package/dist/src/web/server.js +26 -3
- package/dist/src/web/server.js.map +1 -1
- package/package.json +2 -2
- package/src/assertions.ts +2 -2
- package/src/evaluator.ts +42 -6
- package/src/main.ts +6 -0
- package/src/providers/azureopenai.ts +24 -0
- package/src/providers/openai.ts +33 -3
- package/src/providers/replicate.ts +20 -7
- package/src/providers/shared.ts +3 -1
- package/src/providers.ts +1 -1
- package/src/types.ts +10 -1
- package/src/util.ts +95 -27
- package/src/web/client/src/App.tsx +24 -1
- package/src/web/client/src/ResultsView.tsx +42 -3
- package/src/web/server.ts +33 -10
- package/src/web/client/package-lock.json +0 -5726
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
<link rel="icon" type="image/svg+xml" href="favicon.ico" />
|
|
6
6
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
7
7
|
<title>promptfoo web viewer</title>
|
|
8
|
-
<script type="module" crossorigin src="/assets/index-
|
|
8
|
+
<script type="module" crossorigin src="/assets/index-0c6f887d.js"></script>
|
|
9
9
|
<link rel="stylesheet" href="/assets/index-f9b230d1.css">
|
|
10
10
|
</head>
|
|
11
11
|
<body>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../../src/web/server.ts"],"names":[],"mappings":"AAeA,wBAAgB,IAAI,CAAC,IAAI,SAAQ,
|
|
1
|
+
{"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../../src/web/server.ts"],"names":[],"mappings":"AAeA,wBAAgB,IAAI,CAAC,IAAI,SAAQ,QAiFhC"}
|
package/dist/src/web/server.js
CHANGED
|
@@ -37,11 +37,34 @@ function init(port = 15500) {
|
|
|
37
37
|
// Send the initial table data when a client connects
|
|
38
38
|
socket.emit('init', readLatestJson());
|
|
39
39
|
// Watch for changes to latest.json and emit the update event
|
|
40
|
-
|
|
41
|
-
if (
|
|
40
|
+
const watcher = (0, debounce_1.default)((curr, prev) => {
|
|
41
|
+
if (curr.mtime !== prev.mtime) {
|
|
42
42
|
socket.emit('update', readLatestJson());
|
|
43
43
|
}
|
|
44
|
-
}, 250)
|
|
44
|
+
}, 250);
|
|
45
|
+
fs_1.default.watchFile(latestJsonPath, watcher);
|
|
46
|
+
// Stop watching the file when the socket connection is closed
|
|
47
|
+
socket.on('disconnect', () => {
|
|
48
|
+
fs_1.default.unwatchFile(latestJsonPath, watcher);
|
|
49
|
+
});
|
|
50
|
+
});
|
|
51
|
+
app.get('/results', (req, res) => {
|
|
52
|
+
const previousResults = (0, util_1.listPreviousResults)();
|
|
53
|
+
res.json({ data: previousResults });
|
|
54
|
+
});
|
|
55
|
+
app.get('/results/:filename', (req, res) => {
|
|
56
|
+
const filename = req.params.filename;
|
|
57
|
+
const safeFilename = node_path_1.default.basename(filename);
|
|
58
|
+
if (safeFilename !== filename || !(0, util_1.listPreviousResults)().includes(safeFilename)) {
|
|
59
|
+
res.status(400).send('Invalid filename');
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
const result = (0, util_1.readResult)(safeFilename);
|
|
63
|
+
if (!result) {
|
|
64
|
+
res.status(404).send('Result not found');
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
res.json({ data: result });
|
|
45
68
|
});
|
|
46
69
|
httpServer.listen(port, () => {
|
|
47
70
|
const url = `http://localhost:${port}`;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"server.js","sourceRoot":"","sources":["../../../src/web/server.ts"],"names":[],"mappings":";;;;;;AAAA,
|
|
1
|
+
{"version":3,"file":"server.js","sourceRoot":"","sources":["../../../src/web/server.ts"],"names":[],"mappings":";;;;;;AAAA,4CAA+B;AAC/B,0DAA6B;AAC7B,kEAAqC;AACrC,0DAA6B;AAE7B,wDAAgC;AAChC,sDAA8B;AAC9B,gDAAwB;AACxB,oDAA4B;AAC5B,yCAAqD;AAErD,uDAA+B;AAC/B,gCAAsC;AACtC,kCAAgF;AAEhF,SAAgB,IAAI,CAAC,IAAI,GAAG,KAAK;IAC/B,MAAM,GAAG,GAAG,IAAA,iBAAO,GAAE,CAAC;IAEtB,MAAM,SAAS,GAAG,mBAAI,CAAC,IAAI,CAAC,IAAA,kBAAY,GAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC;IAE7D,GAAG,CAAC,GAAG,CAAC,IAAA,cAAI,GAAE,CAAC,CAAC;IAChB,GAAG,CAAC,GAAG,CAAC,iBAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IACxB,GAAG,CAAC,GAAG,CAAC,iBAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC;IAEnC,MAAM,UAAU,GAAG,mBAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;IAC1C,MAAM,EAAE,GAAG,IAAI,kBAAc,CAAC,UAAU,EAAE;QACxC,IAAI,EAAE;YACJ,MAAM,EAAE,GAAG;SACZ;KACF,CAAC,CAAC;IAEH,MAAM,cAAc,GAAG,IAAA,2BAAoB,GAAE,CAAC;IAC9C,MAAM,cAAc,GAAG,GAAG,EAAE;QAC1B,MAAM,IAAI,GAAG,YAAE,CAAC,YAAY,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;QACrD,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC,CAAC;IAEF,EAAE,CAAC,EAAE,CAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE;QAC7B,qDAAqD;QACrD,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,cAAc,EAAE,CAAC,CAAC;QAEtC,6DAA6D;QAC7D,MAAM,OAAO,GAAG,IAAA,kBAAQ,EAAC,CAAC,IAAW,EAAE,IAAW,EAAE,EAAE;YACpD,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,KAAK,EAAE;gBAC7B,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,cAAc,EAAE,CAAC,CAAC;aACzC;QACH,CAAC,EAAE,GAAG,CAAC,CAAC;QACR,YAAE,CAAC,SAAS,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC;QAEtC,8DAA8D;QAC9D,MAAM,CAAC,EAAE,CAAC,YAAY,EAAE,GAAG,EAAE;YAC3B,YAAE,CAAC,WAAW,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC;QAC1C,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,GAAG,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;QAC/B,MAAM,eAAe,GAAG,IAAA,0BAAmB,GAAE,CAAC;QAC9C,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,eAAe,EAAE,CAAC,CAAC;IACtC,CAAC,CAAC,CAAC;IAEH,GAAG,CAAC,GAAG,CAAC,oBAAoB,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;QACzC,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC;QACrC,MAAM,YAAY,GAAG,mBAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC7C,IAAI,YAAY,KAAK,QAAQ,IAAI,CAAC,IAAA,0BAAmB,GAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE;YAC9E,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;YACzC,OAAO;SACR;QACD,MAAM,MAAM,GAAG,IAAA,iBAAU,EAAC,YAAY,CAAC,CAAC;QACxC,IAAI,CAAC,MAAM,EAAE;YACX,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;YACzC,OAAO;SACR;QACD,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;IAC7B,CAAC,CAAC,CAAC;IAEH,UAAU,CAAC,MAAM,CAAC,IAAI,EAAE,GAAG,EAAE;QAC3B,MAAM,GAAG,GAAG,oBAAoB,IAAI,EAAE,CAAC;QACvC,gBAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;QAE1C,MAAM,EAAE,GAAG,uBAAQ,CAAC,eAAe,CAAC;YAClC,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;SACvB,CAAC,CAAC;QACH,EAAE,CAAC,QAAQ,CAAC,qDAAqD,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE;YAClF,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE;gBACxC,IAAI;oBACF,MAAM,IAAA,gBAAM,EAAC,GAAG,CAAC,CAAC;oBAClB,gBAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;iBAC3C;gBAAC,OAAO,GAAG,EAAE;oBACZ,gBAAM,CAAC,KAAK,CAAC,2BAA2B,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;iBACxD;aACF;YACD,EAAE,CAAC,KAAK,EAAE,CAAC;YACX,gBAAM,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAjFD,oBAiFC"}
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "promptfoo",
|
|
3
3
|
"description": "LLM eval & testing toolkit",
|
|
4
4
|
"author": "Ian Webster",
|
|
5
|
-
"version": "0.17.
|
|
5
|
+
"version": "0.17.8",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"type": "commonjs",
|
|
8
8
|
"main": "dist/src/index.js",
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
"src"
|
|
20
20
|
],
|
|
21
21
|
"engines": {
|
|
22
|
-
"node": ">=
|
|
22
|
+
"node": ">=16"
|
|
23
23
|
},
|
|
24
24
|
"bin": {
|
|
25
25
|
"promptfoo": "dist/src/main.js"
|
package/src/assertions.ts
CHANGED
|
@@ -432,8 +432,8 @@ export async function matchesLlmRubric(
|
|
|
432
432
|
}
|
|
433
433
|
|
|
434
434
|
const prompt = nunjucks.renderString(options.rubricPrompt || DEFAULT_GRADING_PROMPT, {
|
|
435
|
-
output,
|
|
436
|
-
rubric: expected,
|
|
435
|
+
output: output.replace(/\n/g, '\\n').replace(/"/g, '\\"'),
|
|
436
|
+
rubric: expected.replace(/\n/g, '\\n').replace(/"/g, '\\"'),
|
|
437
437
|
});
|
|
438
438
|
|
|
439
439
|
let provider = options.provider || DefaultGradingProvider;
|
package/src/evaluator.ts
CHANGED
|
@@ -3,6 +3,7 @@ import readline from 'readline';
|
|
|
3
3
|
import async from 'async';
|
|
4
4
|
import chalk from 'chalk';
|
|
5
5
|
import nunjucks from 'nunjucks';
|
|
6
|
+
import invariant from 'tiny-invariant';
|
|
6
7
|
|
|
7
8
|
import logger from './logger';
|
|
8
9
|
import telemetry from './telemetry';
|
|
@@ -121,7 +122,23 @@ class Evaluator {
|
|
|
121
122
|
if (response.error) {
|
|
122
123
|
ret.error = response.error;
|
|
123
124
|
} else if (response.output) {
|
|
124
|
-
|
|
125
|
+
// Create a copy of response so we can potentially mutate it.
|
|
126
|
+
let processedResponse = { ...response };
|
|
127
|
+
if (test.options?.postprocess) {
|
|
128
|
+
const { postprocess } = test.options;
|
|
129
|
+
const postprocessFn = new Function(
|
|
130
|
+
'output',
|
|
131
|
+
'context',
|
|
132
|
+
postprocess.includes('\n') ? postprocess : `return ${postprocess}`,
|
|
133
|
+
);
|
|
134
|
+
processedResponse.output = postprocessFn(processedResponse.output);
|
|
135
|
+
if (processedResponse.output == null) {
|
|
136
|
+
throw new Error('Postprocess function did not return a value');
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
invariant(processedResponse.output != null, 'Response output should not be null');
|
|
141
|
+
const checkResult = await runAssertions(test, processedResponse.output);
|
|
125
142
|
if (!checkResult.pass) {
|
|
126
143
|
ret.error = checkResult.reason;
|
|
127
144
|
}
|
|
@@ -132,6 +149,7 @@ class Evaluator {
|
|
|
132
149
|
this.stats.tokenUsage.prompt += checkResult.tokensUsed.prompt;
|
|
133
150
|
this.stats.tokenUsage.completion += checkResult.tokensUsed.completion;
|
|
134
151
|
}
|
|
152
|
+
ret.response = processedResponse;
|
|
135
153
|
} else {
|
|
136
154
|
ret.success = false;
|
|
137
155
|
ret.score = 0;
|
|
@@ -213,6 +231,13 @@ class Evaluator {
|
|
|
213
231
|
// Split prompts by provider
|
|
214
232
|
for (const prompt of testSuite.prompts) {
|
|
215
233
|
for (const provider of testSuite.providers) {
|
|
234
|
+
// Check if providerPromptMap exists and if it contains the current prompt's display
|
|
235
|
+
if (testSuite.providerPromptMap) {
|
|
236
|
+
const allowedPrompts = testSuite.providerPromptMap[provider.id()];
|
|
237
|
+
if (allowedPrompts && !allowedPrompts.includes(prompt.display)) {
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
216
241
|
const updatedDisplay =
|
|
217
242
|
testSuite.providers.length > 1 ? `[${provider.id()}] ${prompt.display}` : prompt.display;
|
|
218
243
|
prompts.push({
|
|
@@ -225,11 +250,13 @@ class Evaluator {
|
|
|
225
250
|
// Aggregate all vars across test cases
|
|
226
251
|
|
|
227
252
|
const tests = (
|
|
228
|
-
testSuite.tests
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
253
|
+
testSuite.tests && testSuite.tests.length > 0
|
|
254
|
+
? testSuite.tests
|
|
255
|
+
: [
|
|
256
|
+
{
|
|
257
|
+
// Dummy test for cases when we're only comparing raw prompts.
|
|
258
|
+
},
|
|
259
|
+
]
|
|
233
260
|
).map((test) => {
|
|
234
261
|
const finalTestCase: TestCase = Object.assign({}, testSuite.defaultTest);
|
|
235
262
|
return Object.assign(finalTestCase, test);
|
|
@@ -263,6 +290,8 @@ class Evaluator {
|
|
|
263
290
|
testCase.options?.prefix || testSuite.defaultTest?.options?.prefix || '';
|
|
264
291
|
const appendToPrompt =
|
|
265
292
|
testCase.options?.suffix || testSuite.defaultTest?.options?.suffix || '';
|
|
293
|
+
testCase.options.postprocess =
|
|
294
|
+
testCase.options.postprocess || testSuite.defaultTest?.options?.postprocess;
|
|
266
295
|
|
|
267
296
|
// Finalize test case eval
|
|
268
297
|
const varCombinations = generateVarCombinations(testCase.vars || {});
|
|
@@ -274,6 +303,13 @@ class Evaluator {
|
|
|
274
303
|
let colIndex = 0;
|
|
275
304
|
for (const prompt of testSuite.prompts) {
|
|
276
305
|
for (const provider of testSuite.providers) {
|
|
306
|
+
if (testSuite.providerPromptMap) {
|
|
307
|
+
const allowedPrompts = testSuite.providerPromptMap[provider.id()];
|
|
308
|
+
if (allowedPrompts && !allowedPrompts.includes(prompt.display)) {
|
|
309
|
+
// This prompt should not be used with this provider.
|
|
310
|
+
continue;
|
|
311
|
+
}
|
|
312
|
+
}
|
|
277
313
|
runEvalOptions.push({
|
|
278
314
|
provider,
|
|
279
315
|
prompt: {
|
package/src/main.ts
CHANGED
|
@@ -11,10 +11,12 @@ import logger, { getLogLevel, setLogLevel } from './logger';
|
|
|
11
11
|
import { loadApiProvider, loadApiProviders } from './providers';
|
|
12
12
|
import { evaluate } from './evaluator';
|
|
13
13
|
import {
|
|
14
|
+
cleanupOldResults,
|
|
14
15
|
maybeReadConfig,
|
|
15
16
|
readConfig,
|
|
16
17
|
readLatestResults,
|
|
17
18
|
readPrompts,
|
|
19
|
+
readProviderPromptMap,
|
|
18
20
|
readTests,
|
|
19
21
|
writeLatestResults,
|
|
20
22
|
writeOutput,
|
|
@@ -180,6 +182,7 @@ async function main() {
|
|
|
180
182
|
.action(async () => {
|
|
181
183
|
telemetry.maybeShowNotice();
|
|
182
184
|
await clearCache();
|
|
185
|
+
cleanupOldResults(0);
|
|
183
186
|
telemetry.record('command_used', {
|
|
184
187
|
name: 'cache_clear',
|
|
185
188
|
});
|
|
@@ -307,6 +310,7 @@ async function main() {
|
|
|
307
310
|
config.tests,
|
|
308
311
|
cmdObj.tests ? undefined : basePath,
|
|
309
312
|
);
|
|
313
|
+
const parsedProviderPromptMap = readProviderPromptMap(config, parsedPrompts);
|
|
310
314
|
|
|
311
315
|
if (parsedPrompts.length === 0) {
|
|
312
316
|
logger.error(chalk.red('No prompts found'));
|
|
@@ -319,6 +323,7 @@ async function main() {
|
|
|
319
323
|
suffix: cmdObj.promptSuffix,
|
|
320
324
|
provider: cmdObj.grader,
|
|
321
325
|
// rubricPrompt:
|
|
326
|
+
// postprocess
|
|
322
327
|
},
|
|
323
328
|
...config.defaultTest,
|
|
324
329
|
};
|
|
@@ -327,6 +332,7 @@ async function main() {
|
|
|
327
332
|
description: config.description,
|
|
328
333
|
prompts: parsedPrompts,
|
|
329
334
|
providers: parsedProviders,
|
|
335
|
+
providerPromptMap: parsedProviderPromptMap,
|
|
330
336
|
tests: parsedTests,
|
|
331
337
|
defaultTest,
|
|
332
338
|
};
|
|
@@ -6,6 +6,10 @@ import type { ApiProvider, ProviderEmbeddingResponse, ProviderResponse } from '.
|
|
|
6
6
|
|
|
7
7
|
interface AzureOpenAiCompletionOptions {
|
|
8
8
|
temperature?: number;
|
|
9
|
+
top_p?: number;
|
|
10
|
+
frequency_penalty?: number;
|
|
11
|
+
presence_penalty?: number;
|
|
12
|
+
best_of?: number;
|
|
9
13
|
functions?: {
|
|
10
14
|
name: string;
|
|
11
15
|
description?: string;
|
|
@@ -144,6 +148,17 @@ export class AzureOpenAiCompletionProvider extends AzureOpenAiGenericProvider {
|
|
|
144
148
|
options?.temperature ??
|
|
145
149
|
this.options.temperature ??
|
|
146
150
|
parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
|
|
151
|
+
top_p: options?.top_p ?? this.options.top_p ?? parseFloat(process.env.OPENAI_TOP_P || '1'),
|
|
152
|
+
presence_penalty:
|
|
153
|
+
options?.presence_penalty ??
|
|
154
|
+
this.options.presence_penalty ??
|
|
155
|
+
parseFloat(process.env.OPENAI_PRESENCE_PENALTY || '0'),
|
|
156
|
+
frequency_penalty:
|
|
157
|
+
options?.frequency_penalty ??
|
|
158
|
+
this.options.frequency_penalty ??
|
|
159
|
+
parseFloat(process.env.OPENAI_FREQUENCY_PENALTY || '0'),
|
|
160
|
+
best_of:
|
|
161
|
+
options?.best_of ?? this.options.best_of ?? parseInt(process.env.OPENAI_BEST_OF || '1'),
|
|
147
162
|
stop,
|
|
148
163
|
};
|
|
149
164
|
logger.debug(`Calling Azure OpenAI API: ${JSON.stringify(body)}`);
|
|
@@ -214,6 +229,15 @@ export class AzureOpenAiChatCompletionProvider extends AzureOpenAiGenericProvide
|
|
|
214
229
|
options?.temperature ??
|
|
215
230
|
this.options.temperature ??
|
|
216
231
|
parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
|
|
232
|
+
top_p: options?.top_p ?? this.options.top_p ?? parseFloat(process.env.OPENAI_TOP_P || '1'),
|
|
233
|
+
presence_penalty:
|
|
234
|
+
options?.presence_penalty ??
|
|
235
|
+
this.options.presence_penalty ??
|
|
236
|
+
parseFloat(process.env.OPENAI_PRESENCE_PENALTY || '0'),
|
|
237
|
+
frequency_penalty:
|
|
238
|
+
options?.frequency_penalty ??
|
|
239
|
+
this.options.frequency_penalty ??
|
|
240
|
+
parseFloat(process.env.OPENAI_FREQUENCY_PENALTY || '0'),
|
|
217
241
|
functions: options?.functions || this.options.functions || undefined,
|
|
218
242
|
function_call: options?.function_call || this.options.function_call || undefined,
|
|
219
243
|
};
|
package/src/providers/openai.ts
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
|
|
2
1
|
import logger from '../logger';
|
|
3
2
|
import { fetchJsonWithCache } from '../cache';
|
|
4
3
|
import { REQUEST_TIMEOUT_MS, parseChatPrompt } from './shared';
|
|
@@ -9,6 +8,11 @@ const DEFAULT_OPENAI_HOST = 'api.openai.com';
|
|
|
9
8
|
|
|
10
9
|
interface OpenAiCompletionOptions {
|
|
11
10
|
temperature?: number;
|
|
11
|
+
max_tokens?: number;
|
|
12
|
+
top_p?: number;
|
|
13
|
+
frequency_penalty?: number;
|
|
14
|
+
presence_penalty?: number;
|
|
15
|
+
best_of?: number;
|
|
12
16
|
functions?: {
|
|
13
17
|
name: string;
|
|
14
18
|
description?: string;
|
|
@@ -147,11 +151,25 @@ export class OpenAiCompletionProvider extends OpenAiGenericProvider {
|
|
|
147
151
|
const body = {
|
|
148
152
|
model: this.modelName,
|
|
149
153
|
prompt,
|
|
150
|
-
max_tokens:
|
|
154
|
+
max_tokens:
|
|
155
|
+
options?.max_tokens ??
|
|
156
|
+
this.options.max_tokens ??
|
|
157
|
+
parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
|
|
151
158
|
temperature:
|
|
152
159
|
options?.temperature ??
|
|
153
160
|
this.options.temperature ??
|
|
154
161
|
parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
|
|
162
|
+
top_p: options?.top_p ?? this.options.top_p ?? parseFloat(process.env.OPENAI_TOP_P || '1'),
|
|
163
|
+
presence_penalty:
|
|
164
|
+
options?.presence_penalty ??
|
|
165
|
+
this.options.presence_penalty ??
|
|
166
|
+
parseFloat(process.env.OPENAI_PRESENCE_PENALTY || '0'),
|
|
167
|
+
frequency_penalty:
|
|
168
|
+
options?.frequency_penalty ??
|
|
169
|
+
this.options.frequency_penalty ??
|
|
170
|
+
parseFloat(process.env.OPENAI_FREQUENCY_PENALTY || '0'),
|
|
171
|
+
best_of:
|
|
172
|
+
options?.best_of ?? this.options.best_of ?? parseInt(process.env.OPENAI_BEST_OF || '1'),
|
|
155
173
|
stop,
|
|
156
174
|
};
|
|
157
175
|
logger.debug(`Calling OpenAI API: ${JSON.stringify(body)}`);
|
|
@@ -230,11 +248,23 @@ export class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
|
|
|
230
248
|
const body = {
|
|
231
249
|
model: this.modelName,
|
|
232
250
|
messages: messages,
|
|
233
|
-
max_tokens:
|
|
251
|
+
max_tokens:
|
|
252
|
+
options?.max_tokens ??
|
|
253
|
+
this.options.max_tokens ??
|
|
254
|
+
parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
|
|
234
255
|
temperature:
|
|
235
256
|
options?.temperature ??
|
|
236
257
|
this.options.temperature ??
|
|
237
258
|
parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
|
|
259
|
+
top_p: options?.top_p ?? this.options.top_p ?? parseFloat(process.env.OPENAI_TOP_P || '1'),
|
|
260
|
+
presence_penalty:
|
|
261
|
+
options?.presence_penalty ??
|
|
262
|
+
this.options.presence_penalty ??
|
|
263
|
+
parseFloat(process.env.OPENAI_PRESENCE_PENALTY || '0'),
|
|
264
|
+
frequency_penalty:
|
|
265
|
+
options?.frequency_penalty ??
|
|
266
|
+
this.options.frequency_penalty ??
|
|
267
|
+
parseFloat(process.env.OPENAI_FREQUENCY_PENALTY || '0'),
|
|
238
268
|
functions: options?.functions || this.options.functions || undefined,
|
|
239
269
|
function_call: options?.function_call || this.options.function_call || undefined,
|
|
240
270
|
};
|
|
@@ -6,14 +6,22 @@ import { getCache, isCacheEnabled } from '../cache';
|
|
|
6
6
|
|
|
7
7
|
import type { ApiProvider, ProviderResponse } from '../types.js';
|
|
8
8
|
|
|
9
|
+
interface ReplicateCompletionOptions {
|
|
10
|
+
temperature?: number;
|
|
11
|
+
max_length?: number;
|
|
12
|
+
repetition_penalty?: number;
|
|
13
|
+
}
|
|
14
|
+
|
|
9
15
|
export class ReplicateProvider implements ApiProvider {
|
|
10
16
|
modelName: string;
|
|
11
17
|
apiKey?: string;
|
|
12
18
|
replicate: any;
|
|
19
|
+
options: ReplicateCompletionOptions;
|
|
13
20
|
|
|
14
|
-
constructor(modelName: string, apiKey?: string) {
|
|
21
|
+
constructor(modelName: string, apiKey?: string, options?: ReplicateCompletionOptions) {
|
|
15
22
|
this.modelName = modelName;
|
|
16
23
|
this.apiKey = apiKey || process.env.REPLICATE_API_TOKEN || process.env.REPLICATE_API_KEY;
|
|
24
|
+
this.options = options || {};
|
|
17
25
|
}
|
|
18
26
|
|
|
19
27
|
id(): string {
|
|
@@ -24,7 +32,7 @@ export class ReplicateProvider implements ApiProvider {
|
|
|
24
32
|
return `[Replicate Provider ${this.modelName}]`;
|
|
25
33
|
}
|
|
26
34
|
|
|
27
|
-
|
|
35
|
+
async callApi(prompt: string): Promise<ProviderResponse> {
|
|
28
36
|
if (!this.apiKey) {
|
|
29
37
|
throw new Error(
|
|
30
38
|
'Replicate API key is not set. Set REPLICATE_API_TOKEN environment variable or pass it as an argument to the constructor.',
|
|
@@ -54,14 +62,19 @@ export class ReplicateProvider implements ApiProvider {
|
|
|
54
62
|
logger.debug(`Calling Replicate: ${prompt}`);
|
|
55
63
|
let response;
|
|
56
64
|
try {
|
|
57
|
-
|
|
65
|
+
const data = {
|
|
58
66
|
input: {
|
|
59
67
|
prompt,
|
|
60
|
-
max_length:
|
|
61
|
-
|
|
62
|
-
|
|
68
|
+
max_length:
|
|
69
|
+
this.options.max_length || parseInt(process.env.REPLICATE_MAX_LENGTH || '2046', 10),
|
|
70
|
+
temperature:
|
|
71
|
+
this.options.temperature || parseFloat(process.env.REPLICATE_TEMPERATURE || '0.01'),
|
|
72
|
+
repetition_penalty:
|
|
73
|
+
this.options.repetition_penalty ||
|
|
74
|
+
parseFloat(process.env.REPLICATE_REPETITION_PENALTY || '1.0'),
|
|
63
75
|
},
|
|
64
|
-
}
|
|
76
|
+
};
|
|
77
|
+
response = await replicate.run(this.modelName as any, data);
|
|
65
78
|
} catch (err) {
|
|
66
79
|
return {
|
|
67
80
|
error: `API call error: ${String(err)}`,
|
package/src/providers/shared.ts
CHANGED
|
@@ -4,7 +4,9 @@ export const REQUEST_TIMEOUT_MS = process.env.REQUEST_TIMEOUT_MS
|
|
|
4
4
|
? parseInt(process.env.REQUEST_TIMEOUT_MS, 10)
|
|
5
5
|
: 300_000;
|
|
6
6
|
|
|
7
|
-
export function parseChatPrompt(
|
|
7
|
+
export function parseChatPrompt(
|
|
8
|
+
prompt: string,
|
|
9
|
+
): { role: string; content: string; name?: string }[] {
|
|
8
10
|
const trimmedPrompt = prompt.trim();
|
|
9
11
|
if (trimmedPrompt.startsWith('- role:')) {
|
|
10
12
|
try {
|
package/src/providers.ts
CHANGED
|
@@ -112,7 +112,7 @@ export async function loadApiProvider(
|
|
|
112
112
|
const options = providerPath.split(':');
|
|
113
113
|
const modelName = options.slice(1).join(':');
|
|
114
114
|
|
|
115
|
-
return new ReplicateProvider(modelName, undefined);
|
|
115
|
+
return new ReplicateProvider(modelName, undefined, context?.config);
|
|
116
116
|
}
|
|
117
117
|
|
|
118
118
|
if (providerPath?.startsWith('localai:')) {
|
package/src/types.ts
CHANGED
|
@@ -30,6 +30,7 @@ export interface CommandLineOptions {
|
|
|
30
30
|
export interface ProviderConfig {
|
|
31
31
|
id: ProviderId;
|
|
32
32
|
config?: any;
|
|
33
|
+
prompts?: string[]; // List of prompt display strings
|
|
33
34
|
}
|
|
34
35
|
|
|
35
36
|
export interface ApiProvider {
|
|
@@ -72,6 +73,10 @@ export interface PromptConfig {
|
|
|
72
73
|
suffix?: string;
|
|
73
74
|
}
|
|
74
75
|
|
|
76
|
+
export interface OutputConfig {
|
|
77
|
+
postprocess?: string;
|
|
78
|
+
}
|
|
79
|
+
|
|
75
80
|
export interface EvaluateOptions {
|
|
76
81
|
maxConcurrency?: number;
|
|
77
82
|
showProgressBar?: boolean;
|
|
@@ -184,7 +189,7 @@ export interface TestCase {
|
|
|
184
189
|
assert?: Assertion[];
|
|
185
190
|
|
|
186
191
|
// Additional configuration settings for the prompt
|
|
187
|
-
options?: PromptConfig & GradingConfig;
|
|
192
|
+
options?: PromptConfig & OutputConfig & GradingConfig;
|
|
188
193
|
}
|
|
189
194
|
|
|
190
195
|
// Same as a TestCase, except the `vars` object has been flattened into its final form.
|
|
@@ -203,6 +208,10 @@ export interface TestSuite {
|
|
|
203
208
|
// One or more prompt strings
|
|
204
209
|
prompts: Prompt[];
|
|
205
210
|
|
|
211
|
+
// Optional mapping of provider to prompt display strings. If not provided,
|
|
212
|
+
// all prompts are used for all providers.
|
|
213
|
+
providerPromptMap?: Record<string, string[]>;
|
|
214
|
+
|
|
206
215
|
// Test cases
|
|
207
216
|
tests?: TestCase[];
|
|
208
217
|
|
package/src/util.ts
CHANGED
|
@@ -25,8 +25,36 @@ import type {
|
|
|
25
25
|
UnifiedConfig,
|
|
26
26
|
TestCase,
|
|
27
27
|
Prompt,
|
|
28
|
+
RawProviderConfig,
|
|
29
|
+
TestSuite,
|
|
28
30
|
} from './types';
|
|
29
31
|
|
|
32
|
+
export function readProviderPromptMap(
|
|
33
|
+
config: Partial<UnifiedConfig>,
|
|
34
|
+
parsedPrompts: Prompt[],
|
|
35
|
+
): TestSuite['providerPromptMap'] {
|
|
36
|
+
const ret: Record<string, string[]> = {};
|
|
37
|
+
|
|
38
|
+
if (!config.providers) {
|
|
39
|
+
return ret;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const allPrompts = [];
|
|
43
|
+
for (const prompt of parsedPrompts) {
|
|
44
|
+
allPrompts.push(prompt.display);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
for (const provider of config.providers) {
|
|
48
|
+
if (typeof provider === 'object') {
|
|
49
|
+
const rawProvider = provider as RawProviderConfig;
|
|
50
|
+
const id = Object.keys(rawProvider)[0];
|
|
51
|
+
ret[id] = rawProvider[id].prompts || allPrompts;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return ret;
|
|
56
|
+
}
|
|
57
|
+
|
|
30
58
|
const PROMPT_DELIMITER = '---';
|
|
31
59
|
|
|
32
60
|
function parseJson(json: string): any | undefined {
|
|
@@ -288,28 +316,31 @@ export function writeOutput(
|
|
|
288
316
|
}
|
|
289
317
|
}
|
|
290
318
|
|
|
291
|
-
export
|
|
319
|
+
export function fetchWithTimeout(
|
|
292
320
|
url: RequestInfo,
|
|
293
321
|
options: RequestInit = {},
|
|
294
322
|
timeout: number,
|
|
295
323
|
): Promise<Response> {
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
324
|
+
return new Promise((resolve, reject) => {
|
|
325
|
+
const controller = new AbortController();
|
|
326
|
+
const { signal } = controller;
|
|
327
|
+
options.signal = signal;
|
|
328
|
+
|
|
329
|
+
const timeoutId = setTimeout(() => {
|
|
330
|
+
controller.abort();
|
|
331
|
+
reject(new Error(`Request timed out after ${timeout} ms`));
|
|
332
|
+
}, timeout);
|
|
333
|
+
|
|
334
|
+
fetch(url, options)
|
|
335
|
+
.then((response) => {
|
|
336
|
+
clearTimeout(timeoutId);
|
|
337
|
+
resolve(response);
|
|
338
|
+
})
|
|
339
|
+
.catch((error) => {
|
|
340
|
+
clearTimeout(timeoutId);
|
|
341
|
+
reject(error);
|
|
342
|
+
});
|
|
343
|
+
});
|
|
313
344
|
}
|
|
314
345
|
|
|
315
346
|
export async function fetchWithRetries(
|
|
@@ -331,6 +362,8 @@ export async function fetchWithRetries(
|
|
|
331
362
|
throw new Error(`Request failed after ${retries} retries: ${(lastError as Error).message}`);
|
|
332
363
|
}
|
|
333
364
|
|
|
365
|
+
const RESULT_HISTORY_LENGTH = 50;
|
|
366
|
+
|
|
334
367
|
export function getConfigDirectoryPath(): string {
|
|
335
368
|
return path.join(os.homedir(), '.promptfoo');
|
|
336
369
|
}
|
|
@@ -340,11 +373,14 @@ export function getLatestResultsPath(): string {
|
|
|
340
373
|
}
|
|
341
374
|
|
|
342
375
|
export function writeLatestResults(results: EvaluateSummary, config: Partial<UnifiedConfig>) {
|
|
376
|
+
const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
|
|
377
|
+
const timestamp = new Date().toISOString();
|
|
378
|
+
const newResultsPath = path.join(resultsDirectory, `eval-${timestamp}.json`);
|
|
343
379
|
const latestResultsPath = getLatestResultsPath();
|
|
344
380
|
try {
|
|
345
|
-
fs.mkdirSync(
|
|
381
|
+
fs.mkdirSync(resultsDirectory, { recursive: true });
|
|
346
382
|
fs.writeFileSync(
|
|
347
|
-
|
|
383
|
+
newResultsPath,
|
|
348
384
|
JSON.stringify(
|
|
349
385
|
{
|
|
350
386
|
version: 1,
|
|
@@ -355,8 +391,45 @@ export function writeLatestResults(results: EvaluateSummary, config: Partial<Uni
|
|
|
355
391
|
2,
|
|
356
392
|
),
|
|
357
393
|
);
|
|
394
|
+
if (fs.existsSync(latestResultsPath)) {
|
|
395
|
+
fs.unlinkSync(latestResultsPath);
|
|
396
|
+
}
|
|
397
|
+
fs.symlinkSync(newResultsPath, latestResultsPath);
|
|
398
|
+
cleanupOldResults();
|
|
358
399
|
} catch (err) {
|
|
359
|
-
logger.error(`Failed to write latest results to ${
|
|
400
|
+
logger.error(`Failed to write latest results to ${newResultsPath}:\n${err}`);
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
export function listPreviousResults(): string[] {
|
|
405
|
+
const directory = path.join(getConfigDirectoryPath(), 'output');
|
|
406
|
+
const files = fs.readdirSync(directory);
|
|
407
|
+
const resultsFiles = files.filter((file) => file.startsWith('eval-') && file.endsWith('.json'));
|
|
408
|
+
const sortedFiles = resultsFiles.sort((a, b) => {
|
|
409
|
+
const statA = fs.statSync(path.join(directory, a));
|
|
410
|
+
const statB = fs.statSync(path.join(directory, b));
|
|
411
|
+
return statB.birthtime.getTime() - statA.birthtime.getTime(); // sort in descending order
|
|
412
|
+
});
|
|
413
|
+
return sortedFiles;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
export function cleanupOldResults(remaining = RESULT_HISTORY_LENGTH) {
|
|
417
|
+
const sortedFiles = listPreviousResults();
|
|
418
|
+
for (let i = 0; i < sortedFiles.length - remaining; i++) {
|
|
419
|
+
fs.unlinkSync(path.join(getConfigDirectoryPath(), 'output', sortedFiles[i]));
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
export function readResult(
|
|
424
|
+
name: string,
|
|
425
|
+
): { results: EvaluateSummary; config: Partial<UnifiedConfig> } | undefined {
|
|
426
|
+
const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
|
|
427
|
+
const resultsPath = path.join(resultsDirectory, name);
|
|
428
|
+
try {
|
|
429
|
+
const results = JSON.parse(fs.readFileSync(fs.realpathSync(resultsPath), 'utf-8'));
|
|
430
|
+
return results;
|
|
431
|
+
} catch (err) {
|
|
432
|
+
logger.error(`Failed to read results from ${resultsPath}:\n${err}`);
|
|
360
433
|
}
|
|
361
434
|
}
|
|
362
435
|
|
|
@@ -364,12 +437,7 @@ export function readLatestResults():
|
|
|
364
437
|
| { results: EvaluateSummary; config: Partial<UnifiedConfig> }
|
|
365
438
|
| undefined {
|
|
366
439
|
const latestResultsPath = getLatestResultsPath();
|
|
367
|
-
|
|
368
|
-
const latestResults = JSON.parse(fs.readFileSync(latestResultsPath, 'utf-8'));
|
|
369
|
-
return latestResults;
|
|
370
|
-
} catch (err) {
|
|
371
|
-
logger.error(`Failed to read latest results from ${latestResultsPath}:\n${err}`);
|
|
372
|
-
}
|
|
440
|
+
return readResult(latestResultsPath);
|
|
373
441
|
}
|
|
374
442
|
|
|
375
443
|
export function cosineSimilarity(vecA: number[], vecB: number[]) {
|