promptfoo 0.17.4 → 0.17.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/dist/package.json +3 -2
- package/dist/src/assertions.d.ts.map +1 -1
- package/dist/src/assertions.js +14 -2
- package/dist/src/assertions.js.map +1 -1
- package/dist/src/cache.d.ts +3 -0
- package/dist/src/cache.d.ts.map +1 -1
- package/dist/src/cache.js +6 -1
- package/dist/src/cache.js.map +1 -1
- package/dist/src/evaluator.d.ts.map +1 -1
- package/dist/src/evaluator.js +21 -17
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/index.d.ts +1 -0
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/main.js +66 -36
- package/dist/src/main.js.map +1 -1
- package/dist/src/providers/anthropic.js.map +1 -1
- package/dist/src/providers/azureopenai.d.ts +34 -0
- package/dist/src/providers/azureopenai.d.ts.map +1 -0
- package/dist/src/providers/azureopenai.js +222 -0
- package/dist/src/providers/azureopenai.js.map +1 -0
- package/dist/src/providers/localai.d.ts.map +1 -1
- package/dist/src/providers/localai.js +2 -1
- package/dist/src/providers/localai.js.map +1 -1
- package/dist/src/providers/openai.d.ts.map +1 -1
- package/dist/src/providers/openai.js +1 -13
- package/dist/src/providers/openai.js.map +1 -1
- package/dist/src/providers/replicate.d.ts +11 -0
- package/dist/src/providers/replicate.d.ts.map +1 -0
- package/dist/src/providers/replicate.js +78 -0
- package/dist/src/providers/replicate.js.map +1 -0
- package/dist/src/providers/shared.d.ts +5 -0
- package/dist/src/providers/shared.d.ts.map +1 -1
- package/dist/src/providers/shared.js +33 -1
- package/dist/src/providers/shared.js.map +1 -1
- package/dist/src/providers.d.ts +2 -0
- package/dist/src/providers.d.ts.map +1 -1
- package/dist/src/providers.js +24 -0
- package/dist/src/providers.js.map +1 -1
- package/dist/src/types.d.ts +4 -1
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/updates.d.ts.map +1 -1
- package/dist/src/updates.js +3 -0
- package/dist/src/updates.js.map +1 -1
- package/dist/src/web/client/assets/{index-58a0e3e3.js → index-13198388.js} +23 -23
- package/dist/src/web/client/assets/index-f9b230d1.css +1 -0
- package/dist/src/web/client/index.html +2 -2
- package/package.json +3 -2
- package/src/assertions.ts +18 -2
- package/src/cache.ts +5 -1
- package/src/evaluator.ts +23 -17
- package/src/main.ts +87 -38
- package/src/providers/anthropic.ts +1 -1
- package/src/providers/azureopenai.ts +264 -0
- package/src/providers/localai.ts +3 -2
- package/src/providers/openai.ts +3 -18
- package/src/providers/replicate.ts +86 -0
- package/src/providers/shared.ts +29 -0
- package/src/providers.ts +27 -0
- package/src/types.ts +6 -0
- package/src/updates.ts +4 -0
- package/src/web/client/src/App.tsx +6 -0
- package/src/web/client/src/EvalOutputPromptDialog.tsx +6 -2
- package/src/web/client/src/ResultsTable.tsx +5 -0
- package/src/web/client/src/ResultsView.tsx +14 -11
- package/src/web/client/src/index.css +1 -12
- package/src/web/client/src/types.ts +1 -1
- package/dist/src/web/client/assets/index-b82d0138.css +0 -1
|
@@ -0,0 +1 @@
|
|
|
1
|
+
:root{font-family:system-ui,Avenir,Helvetica,Arial,sans-serif;font-synthesis:none;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;-webkit-text-size-adjust:100%;--background-color: #ffffff;--text-color: #404040;--border-color: lightgray;--table-border-color: lightgray;--pass-color: green;--fail-color: #ad0000;--smalltext-color: gray;--success-background-color: #d1ffd7;--variable-background-color: #f7f7f7;--header-background-color: #fffdf7}[data-theme=dark]{--background-color: #1a1a1a;--text-color: #f0f0f0;--border-color: #444444;--table-border-color: #444444;--pass-color: #4caf50;--fail-color: #f44336;--smalltext-color: #888888;--success-background-color: #216d2b;--variable-background-color: #333;--header-background-color: #333}html{font-size:calc(14px + (18 - 14) * ((100vw - 300px) / (1600 - 300)))}*{box-sizing:border-box}html{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol;font-size:16px;background-color:var(--background-color);color:var(--text-color)}table,.divTable{border:1px solid var(--table-border-color);border-collapse:collapse;width:100%;margin:1rem 0;box-shadow:0 2px 4px #0000001a}.tr{display:flex}tr,.tr{width:fit-content}tr:hover,.tr:hover{background-color:#0000000d}th,.th,td,.td{position:relative;box-shadow:inset 0 0 0 1px var(--border-color);vertical-align:top;padding:1.5rem}th.variable,.th.variable,td.variable,.td.variable{background-color:var(--variable-background-color)}tr.header{background-color:var(--header-background-color)}th,.th{padding:1rem;position:relative;text-align:center;vertical-align:bottom}th .action{cursor:pointer;margin-left:.5rem}tr .cell-actions{display:flex;gap:.5rem;visibility:hidden;position:absolute;bottom:1.25rem;right:0;line-height:0;font-size:1.75rem}tr:hover .cell-actions{visibility:visible}tr .cell-actions .action{cursor:pointer}th .smalltext{visibility:hidden;font-weight:400;font-size:.75rem;color:var(--smalltext-color)}th:hover .smalltext{visibility:visible}th .summary{font-weight:400;font-size:.8rem;padding:.25rem}th .summary.highlight{background-color:var(--success-background-color)}td .status{margin-bottom:.5rem;font-weight:700}td .score{font-weight:400}td .pass{color:var(--pass-color)}td .fail{color:var(--fail-color)}.first-prompt-col{border-left:2px solid #888}.first-prompt-row{border-top:2px solid #888}.resizer{position:absolute;right:0;top:0;height:100%;width:5px;cursor:col-resize;user-select:none;touch-action:none;background:var(--text-color);opacity:.5}.resizer.isResizing{background:var(--text-color);opacity:1}@media (hover: hover){.resizer{opacity:0}*:hover>.resizer{opacity:1}}.logo{display:flex;align-items:center;gap:4px}.logo img{width:30px}.logo span{margin-bottom:6px;color:var(--text-color)}[data-theme=dark] .logo img{filter:invert(1)}nav{display:flex;justify-content:space-between;align-items:center;margin-bottom:1rem;color:var(--text-color)}.dark-mode-toggle{background-color:transparent;border:none;color:var(--text-color);cursor:pointer;font-size:16px;padding:8px;transition:color .3s}.dark-mode-toggle:hover{color:var(--pass-color)}body{background-color:var(--background-color);color:var(--text-color)}
|
|
@@ -5,8 +5,8 @@
|
|
|
5
5
|
<link rel="icon" type="image/svg+xml" href="favicon.ico" />
|
|
6
6
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
7
7
|
<title>promptfoo web viewer</title>
|
|
8
|
-
<script type="module" crossorigin src="/assets/index-
|
|
9
|
-
<link rel="stylesheet" href="/assets/index-
|
|
8
|
+
<script type="module" crossorigin src="/assets/index-13198388.js"></script>
|
|
9
|
+
<link rel="stylesheet" href="/assets/index-f9b230d1.css">
|
|
10
10
|
</head>
|
|
11
11
|
<body>
|
|
12
12
|
<div id="root"></div>
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "promptfoo",
|
|
3
3
|
"description": "LLM eval & testing toolkit",
|
|
4
4
|
"author": "Ian Webster",
|
|
5
|
-
"version": "0.17.
|
|
5
|
+
"version": "0.17.6",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"type": "commonjs",
|
|
8
8
|
"main": "dist/src/index.js",
|
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
"promptfoo": "dist/src/main.js"
|
|
26
26
|
},
|
|
27
27
|
"scripts": {
|
|
28
|
-
"local": "ts-node --esm src/main.ts",
|
|
28
|
+
"local": "ts-node --esm --files src/main.ts",
|
|
29
29
|
"install:client": "cd src/web/client && npm install",
|
|
30
30
|
"build:clean": "rm -rf dist",
|
|
31
31
|
"build:client": "cd src/web/client && npm run build && cp -r dist/ ../../../dist/src/web/client",
|
|
@@ -79,6 +79,7 @@
|
|
|
79
79
|
"node-fetch": "^2.6.7",
|
|
80
80
|
"nunjucks": "^3.2.4",
|
|
81
81
|
"opener": "^1.5.2",
|
|
82
|
+
"replicate": "^0.12.3",
|
|
82
83
|
"rouge": "^1.0.3",
|
|
83
84
|
"semver": "^7.5.3",
|
|
84
85
|
"socket.io": "^4.6.1",
|
package/src/assertions.ts
CHANGED
|
@@ -207,6 +207,22 @@ export async function runAssertion(
|
|
|
207
207
|
};
|
|
208
208
|
}
|
|
209
209
|
|
|
210
|
+
if (baseType === 'starts-with') {
|
|
211
|
+
invariant(assertion.value, '"starts-with" assertion type must have a string value');
|
|
212
|
+
invariant(
|
|
213
|
+
typeof assertion.value === 'string',
|
|
214
|
+
'"starts-with" assertion type must have a string value',
|
|
215
|
+
);
|
|
216
|
+
pass = output.startsWith(String(assertion.value)) !== inverse;
|
|
217
|
+
return {
|
|
218
|
+
pass,
|
|
219
|
+
score: pass ? 1 : 0,
|
|
220
|
+
reason: pass
|
|
221
|
+
? 'Assertion passed'
|
|
222
|
+
: `Expected output to ${inverse ? 'not ' : ''}start with "${assertion.value}"`,
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
|
|
210
226
|
if (baseType === 'contains-json') {
|
|
211
227
|
pass = containsJSON(output) !== inverse;
|
|
212
228
|
return {
|
|
@@ -480,7 +496,7 @@ export function assertionFromString(expected: string): Assertion {
|
|
|
480
496
|
|
|
481
497
|
// New options
|
|
482
498
|
const assertionRegex =
|
|
483
|
-
/^(not-)?(equals|contains-any|contains-all|contains-json|is-json|regex|icontains|contains|webhook|rouge-n|similar)(?::|\((\d+(\.\d+)?)\):)?(.*)$/;
|
|
499
|
+
/^(not-)?(equals|contains-any|contains-all|contains-json|is-json|regex|icontains|contains|webhook|rouge-n|similar|starts-with)(?::|\((\d+(\.\d+)?)\):)?(.*)$/;
|
|
484
500
|
const regexMatch = expected.match(assertionRegex);
|
|
485
501
|
|
|
486
502
|
if (regexMatch) {
|
|
@@ -497,7 +513,7 @@ export function assertionFromString(expected: string): Assertion {
|
|
|
497
513
|
return {
|
|
498
514
|
type: fullType as AssertionType,
|
|
499
515
|
};
|
|
500
|
-
} else if (type === 'rouge-n' || type === 'similar') {
|
|
516
|
+
} else if (type === 'rouge-n' || type === 'similar' || type === 'starts-with') {
|
|
501
517
|
return {
|
|
502
518
|
type: fullType as AssertionType,
|
|
503
519
|
value,
|
package/src/cache.ts
CHANGED
|
@@ -20,7 +20,7 @@ let enabled =
|
|
|
20
20
|
const cacheType =
|
|
21
21
|
process.env.PROMPTFOO_CACHE_TYPE || (process.env.NODE_ENV === 'test' ? 'memory' : 'disk');
|
|
22
22
|
|
|
23
|
-
function getCache() {
|
|
23
|
+
export function getCache() {
|
|
24
24
|
if (!cacheInstance) {
|
|
25
25
|
const cachePath =
|
|
26
26
|
process.env.PROMPTFOO_CACHE_PATH || path.join(getConfigDirectoryPath(), 'cache');
|
|
@@ -102,3 +102,7 @@ export async function clearCache() {
|
|
|
102
102
|
logger.info('Clearing cache...');
|
|
103
103
|
return getCache().reset();
|
|
104
104
|
}
|
|
105
|
+
|
|
106
|
+
export function isCacheEnabled() {
|
|
107
|
+
return enabled;
|
|
108
|
+
}
|
package/src/evaluator.ts
CHANGED
|
@@ -33,6 +33,7 @@ interface RunEvalOptions {
|
|
|
33
33
|
|
|
34
34
|
rowIndex: number;
|
|
35
35
|
colIndex: number;
|
|
36
|
+
repeatIndex: number;
|
|
36
37
|
}
|
|
37
38
|
|
|
38
39
|
const DEFAULT_MAX_CONCURRENCY = 4;
|
|
@@ -266,25 +267,30 @@ class Evaluator {
|
|
|
266
267
|
// Finalize test case eval
|
|
267
268
|
const varCombinations = generateVarCombinations(testCase.vars || {});
|
|
268
269
|
totalVarCombinations += varCombinations.length;
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
270
|
+
|
|
271
|
+
const numRepeat = this.options.repeat || 1;
|
|
272
|
+
for (let repeatIndex = 0; repeatIndex < numRepeat; repeatIndex++) {
|
|
273
|
+
for (const vars of varCombinations) {
|
|
274
|
+
let colIndex = 0;
|
|
275
|
+
for (const prompt of testSuite.prompts) {
|
|
276
|
+
for (const provider of testSuite.providers) {
|
|
277
|
+
runEvalOptions.push({
|
|
278
|
+
provider,
|
|
279
|
+
prompt: {
|
|
280
|
+
...prompt,
|
|
281
|
+
raw: prependToPrompt + prompt.raw + appendToPrompt,
|
|
282
|
+
},
|
|
283
|
+
test: { ...testCase, vars },
|
|
284
|
+
includeProviderId: testSuite.providers.length > 1,
|
|
285
|
+
rowIndex,
|
|
286
|
+
colIndex,
|
|
287
|
+
repeatIndex,
|
|
288
|
+
});
|
|
289
|
+
colIndex++;
|
|
290
|
+
}
|
|
285
291
|
}
|
|
292
|
+
rowIndex++;
|
|
286
293
|
}
|
|
287
|
-
rowIndex++;
|
|
288
294
|
}
|
|
289
295
|
}
|
|
290
296
|
|
package/src/main.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
|
|
3
3
|
import { join as pathJoin, dirname } from 'path';
|
|
4
|
+
import readline from 'readline';
|
|
4
5
|
|
|
5
6
|
import chalk from 'chalk';
|
|
6
7
|
import { Command } from 'commander';
|
|
@@ -56,7 +57,11 @@ function createDummyFiles(directory: string | null) {
|
|
|
56
57
|
writeFileSync(pathJoin(process.cwd(), directory, 'README.md'), DEFAULT_README);
|
|
57
58
|
|
|
58
59
|
if (directory === '.') {
|
|
59
|
-
logger.info(
|
|
60
|
+
logger.info(
|
|
61
|
+
chalk.green.bold(
|
|
62
|
+
'Wrote prompts.txt and promptfooconfig.yaml. Open README.md to get started!',
|
|
63
|
+
),
|
|
64
|
+
);
|
|
60
65
|
} else {
|
|
61
66
|
logger.info(chalk.green.bold(`Wrote prompts.txt and promptfooconfig.yaml to ./${directory}`));
|
|
62
67
|
logger.info(chalk.green(`\`cd ${directory}\` and open README.md to get started!`));
|
|
@@ -72,20 +77,20 @@ async function main() {
|
|
|
72
77
|
pathJoin(pwd, 'promptfooconfig.json'),
|
|
73
78
|
pathJoin(pwd, 'promptfooconfig.yaml'),
|
|
74
79
|
];
|
|
75
|
-
let
|
|
80
|
+
let defaultConfig: Partial<UnifiedConfig> = {};
|
|
76
81
|
for (const path of potentialPaths) {
|
|
77
82
|
const maybeConfig = await maybeReadConfig(path);
|
|
78
83
|
if (maybeConfig) {
|
|
79
|
-
|
|
84
|
+
defaultConfig = maybeConfig;
|
|
80
85
|
break;
|
|
81
86
|
}
|
|
82
87
|
}
|
|
83
88
|
|
|
84
89
|
let evaluateOptions: EvaluateOptions = {};
|
|
85
|
-
if (
|
|
86
|
-
evaluateOptions.generateSuggestions =
|
|
87
|
-
evaluateOptions.maxConcurrency =
|
|
88
|
-
evaluateOptions.showProgressBar =
|
|
90
|
+
if (defaultConfig.evaluateOptions) {
|
|
91
|
+
evaluateOptions.generateSuggestions = defaultConfig.evaluateOptions.generateSuggestions;
|
|
92
|
+
evaluateOptions.maxConcurrency = defaultConfig.evaluateOptions.maxConcurrency;
|
|
93
|
+
evaluateOptions.showProgressBar = defaultConfig.evaluateOptions.showProgressBar;
|
|
89
94
|
}
|
|
90
95
|
|
|
91
96
|
const program = new Command();
|
|
@@ -125,21 +130,46 @@ async function main() {
|
|
|
125
130
|
|
|
126
131
|
program
|
|
127
132
|
.command('share')
|
|
128
|
-
.description('
|
|
129
|
-
.
|
|
133
|
+
.description('Create a shareable URL of your most recent eval')
|
|
134
|
+
.option('-y, --yes', 'Skip confirmation')
|
|
135
|
+
.action(async (cmdObj: { yes: boolean } & Command) => {
|
|
130
136
|
telemetry.maybeShowNotice();
|
|
131
137
|
telemetry.record('command_used', {
|
|
132
138
|
name: 'share',
|
|
133
139
|
});
|
|
134
140
|
await telemetry.send();
|
|
135
141
|
|
|
136
|
-
const
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
142
|
+
const createPublicUrl = async () => {
|
|
143
|
+
const latestResults = readLatestResults();
|
|
144
|
+
if (!latestResults) {
|
|
145
|
+
logger.error('Could not load results. Do you need to run `promptfoo eval` first?');
|
|
146
|
+
process.exit(1);
|
|
147
|
+
}
|
|
148
|
+
const url = await createShareableUrl(latestResults.results, latestResults.config);
|
|
149
|
+
logger.info(`View results: ${chalk.greenBright.bold(url)}`);
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
if (cmdObj.yes || process.env.PROMPTFOO_DISABLE_SHARE_WARNING) {
|
|
153
|
+
createPublicUrl();
|
|
154
|
+
} else {
|
|
155
|
+
const reader = readline.createInterface({
|
|
156
|
+
input: process.stdin,
|
|
157
|
+
output: process.stdout,
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
reader.question(
|
|
161
|
+
'Are you sure you want to create a shareable URL of your most recent eval? Anyone you give this URL to will be able to view the results [Y/n] ',
|
|
162
|
+
async function (answer: string) {
|
|
163
|
+
if (answer.toLowerCase() !== 'yes' && answer.toLowerCase() !== 'y' && answer !== '') {
|
|
164
|
+
reader.close();
|
|
165
|
+
return;
|
|
166
|
+
}
|
|
167
|
+
reader.close();
|
|
168
|
+
|
|
169
|
+
createPublicUrl();
|
|
170
|
+
},
|
|
171
|
+
);
|
|
140
172
|
}
|
|
141
|
-
const url = await createShareableUrl(latestResults.results, latestResults.config);
|
|
142
|
-
logger.info(`View results: ${chalk.greenBright.bold(url)}`);
|
|
143
173
|
});
|
|
144
174
|
|
|
145
175
|
program
|
|
@@ -159,28 +189,39 @@ async function main() {
|
|
|
159
189
|
program
|
|
160
190
|
.command('eval')
|
|
161
191
|
.description('Evaluate prompts')
|
|
162
|
-
.option('-p, --prompts <paths...>', 'Paths to prompt files (.txt)'
|
|
192
|
+
.option('-p, --prompts <paths...>', 'Paths to prompt files (.txt)')
|
|
163
193
|
.option(
|
|
164
194
|
'-r, --providers <name or path...>',
|
|
165
195
|
'One of: openai:chat, openai:completion, openai:<model name>, or path to custom API caller module',
|
|
166
196
|
)
|
|
167
197
|
.option(
|
|
168
198
|
'-c, --config <path>',
|
|
169
|
-
'Path to configuration file. Automatically loads
|
|
199
|
+
'Path to configuration file. Automatically loads promptfoodefaultConfig.js/json/yaml',
|
|
170
200
|
)
|
|
171
201
|
.option(
|
|
172
202
|
// TODO(ian): Remove `vars` for v1
|
|
173
203
|
'-v, --vars, -t, --tests <path>',
|
|
174
204
|
'Path to CSV with test cases',
|
|
175
|
-
|
|
205
|
+
defaultConfig?.commandLineOptions?.vars,
|
|
206
|
+
)
|
|
207
|
+
.option('-t, --tests <path>', 'Path to CSV with test cases')
|
|
208
|
+
.option(
|
|
209
|
+
'-o, --output <path>',
|
|
210
|
+
'Path to output file (csv, json, yaml, html)',
|
|
211
|
+
defaultConfig.outputPath,
|
|
176
212
|
)
|
|
177
|
-
.option('-t, --tests <path>', 'Path to CSV with test cases', config?.commandLineOptions?.tests)
|
|
178
|
-
.option('-o, --output <path>', 'Path to output file (csv, json, yaml, html)', config.outputPath)
|
|
179
213
|
.option(
|
|
180
214
|
'-j, --max-concurrency <number>',
|
|
181
215
|
'Maximum number of concurrent API calls',
|
|
182
|
-
|
|
183
|
-
? String(
|
|
216
|
+
defaultConfig.evaluateOptions?.maxConcurrency
|
|
217
|
+
? String(defaultConfig.evaluateOptions.maxConcurrency)
|
|
218
|
+
: undefined,
|
|
219
|
+
)
|
|
220
|
+
.option(
|
|
221
|
+
'--repeat <number>',
|
|
222
|
+
'Number of times to run each test',
|
|
223
|
+
defaultConfig.evaluateOptions?.repeat
|
|
224
|
+
? String(defaultConfig.evaluateOptions.repeat)
|
|
184
225
|
: undefined,
|
|
185
226
|
)
|
|
186
227
|
.option(
|
|
@@ -195,28 +236,28 @@ async function main() {
|
|
|
195
236
|
.option(
|
|
196
237
|
'--prompt-prefix <path>',
|
|
197
238
|
'This prefix is prepended to every prompt',
|
|
198
|
-
|
|
239
|
+
defaultConfig.defaultTest?.options?.prefix,
|
|
199
240
|
)
|
|
200
241
|
.option(
|
|
201
242
|
'--prompt-suffix <path>',
|
|
202
243
|
'This suffix is append to every prompt',
|
|
203
|
-
|
|
244
|
+
defaultConfig.defaultTest?.options?.suffix,
|
|
204
245
|
)
|
|
205
246
|
.option(
|
|
206
247
|
'--no-write',
|
|
207
248
|
'Do not write results to promptfoo directory',
|
|
208
|
-
|
|
249
|
+
defaultConfig?.commandLineOptions?.write,
|
|
209
250
|
)
|
|
210
251
|
.option(
|
|
211
252
|
'--no-cache',
|
|
212
253
|
'Do not read or write results to disk cache',
|
|
213
|
-
|
|
254
|
+
defaultConfig?.commandLineOptions?.cache,
|
|
214
255
|
)
|
|
215
256
|
.option('--no-progress-bar', 'Do not show progress bar')
|
|
216
|
-
.option('--no-table', 'Do not output table in CLI',
|
|
217
|
-
.option('--share', 'Create a shareable URL',
|
|
218
|
-
.option('--grader', 'Model that will grade outputs',
|
|
219
|
-
.option('--verbose', 'Show debug logs',
|
|
257
|
+
.option('--no-table', 'Do not output table in CLI', defaultConfig?.commandLineOptions?.table)
|
|
258
|
+
.option('--share', 'Create a shareable URL', defaultConfig?.commandLineOptions?.share)
|
|
259
|
+
.option('--grader', 'Model that will grade outputs', defaultConfig?.commandLineOptions?.grader)
|
|
260
|
+
.option('--verbose', 'Show debug logs', defaultConfig?.commandLineOptions?.verbose)
|
|
220
261
|
.option('--view [port]', 'View in browser ui')
|
|
221
262
|
.action(async (cmdObj: CommandLineOptions & Command) => {
|
|
222
263
|
// Misc settings
|
|
@@ -228,16 +269,20 @@ async function main() {
|
|
|
228
269
|
}
|
|
229
270
|
|
|
230
271
|
// Config parsing
|
|
231
|
-
|
|
272
|
+
let fileConfig: Partial<UnifiedConfig> = {};
|
|
232
273
|
const configPath = cmdObj.config;
|
|
233
274
|
if (configPath) {
|
|
234
|
-
|
|
275
|
+
fileConfig = await readConfig(configPath);
|
|
235
276
|
}
|
|
236
|
-
config = {
|
|
237
|
-
prompts: cmdObj.prompts ||
|
|
238
|
-
providers: cmdObj.providers ||
|
|
239
|
-
tests: cmdObj.tests || cmdObj.vars ||
|
|
240
|
-
|
|
277
|
+
const config: Partial<UnifiedConfig> = {
|
|
278
|
+
prompts: cmdObj.prompts || fileConfig.prompts || defaultConfig.prompts,
|
|
279
|
+
providers: cmdObj.providers || fileConfig.providers || defaultConfig.providers,
|
|
280
|
+
tests: cmdObj.tests || cmdObj.vars || fileConfig.tests || defaultConfig.tests,
|
|
281
|
+
sharing:
|
|
282
|
+
process.env.PROMPTFOO_DISABLE_SHARING === '1'
|
|
283
|
+
? false
|
|
284
|
+
: fileConfig.sharing ?? defaultConfig.sharing ?? true,
|
|
285
|
+
defaultTest: fileConfig.defaultTest,
|
|
241
286
|
};
|
|
242
287
|
|
|
243
288
|
// Validation
|
|
@@ -286,12 +331,15 @@ async function main() {
|
|
|
286
331
|
defaultTest,
|
|
287
332
|
};
|
|
288
333
|
|
|
334
|
+
const maxConcurrency = parseInt(cmdObj.maxConcurrency || '', 10);
|
|
335
|
+
const iterations = parseInt(cmdObj.repeat || '', 10);
|
|
289
336
|
const options: EvaluateOptions = {
|
|
290
337
|
showProgressBar:
|
|
291
338
|
typeof cmdObj.progressBar === 'undefined'
|
|
292
339
|
? getLogLevel() !== 'debug'
|
|
293
340
|
: cmdObj.progressBar,
|
|
294
341
|
maxConcurrency: !isNaN(maxConcurrency) && maxConcurrency > 0 ? maxConcurrency : undefined,
|
|
342
|
+
repeat: !isNaN(iterations) && iterations > 0 ? iterations : 1,
|
|
295
343
|
...evaluateOptions,
|
|
296
344
|
};
|
|
297
345
|
|
|
@@ -305,7 +353,8 @@ async function main() {
|
|
|
305
353
|
|
|
306
354
|
const summary = await evaluate(testSuite, options);
|
|
307
355
|
|
|
308
|
-
const shareableUrl =
|
|
356
|
+
const shareableUrl =
|
|
357
|
+
cmdObj.share && config.sharing ? await createShareableUrl(summary, config) : null;
|
|
309
358
|
|
|
310
359
|
if (cmdObj.output) {
|
|
311
360
|
logger.info(chalk.yellow(`Writing output to ${cmdObj.output}`));
|
|
@@ -25,7 +25,7 @@ export class AnthropicCompletionProvider implements ApiProvider {
|
|
|
25
25
|
constructor(modelName: string, apiKey?: string, context?: AnthropicCompletionOptions) {
|
|
26
26
|
this.modelName = modelName;
|
|
27
27
|
this.apiKey = apiKey || process.env.ANTHROPIC_API_KEY;
|
|
28
|
-
this.anthropic = new Anthropic({apiKey: this.apiKey});
|
|
28
|
+
this.anthropic = new Anthropic({ apiKey: this.apiKey });
|
|
29
29
|
this.options = context || {};
|
|
30
30
|
}
|
|
31
31
|
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
import logger from '../logger';
|
|
2
|
+
import { fetchJsonWithCache } from '../cache';
|
|
3
|
+
import { REQUEST_TIMEOUT_MS, parseChatPrompt } from './shared';
|
|
4
|
+
|
|
5
|
+
import type { ApiProvider, ProviderEmbeddingResponse, ProviderResponse } from '../types.js';
|
|
6
|
+
|
|
7
|
+
interface AzureOpenAiCompletionOptions {
|
|
8
|
+
temperature?: number;
|
|
9
|
+
functions?: {
|
|
10
|
+
name: string;
|
|
11
|
+
description?: string;
|
|
12
|
+
parameters: any;
|
|
13
|
+
}[];
|
|
14
|
+
function_call?: 'none' | 'auto';
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
class AzureOpenAiGenericProvider implements ApiProvider {
|
|
18
|
+
deploymentName: string;
|
|
19
|
+
apiKey?: string;
|
|
20
|
+
apiHost?: string;
|
|
21
|
+
|
|
22
|
+
constructor(deploymentName: string, apiKey?: string) {
|
|
23
|
+
this.deploymentName = deploymentName;
|
|
24
|
+
|
|
25
|
+
this.apiKey = apiKey || process.env.AZURE_OPENAI_API_KEY;
|
|
26
|
+
|
|
27
|
+
this.apiHost = process.env.AZURE_OPENAI_API_HOST;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
id(): string {
|
|
31
|
+
return `azureopenai:${this.deploymentName}`;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
toString(): string {
|
|
35
|
+
return `[Azure OpenAI Provider ${this.deploymentName}]`;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// @ts-ignore: Prompt is not used in this implementation
|
|
39
|
+
async callApi(prompt: string, options?: AzureOpenAiCompletionOptions): Promise<ProviderResponse> {
|
|
40
|
+
throw new Error('Not implemented');
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export class AzureOpenAiEmbeddingProvider extends AzureOpenAiGenericProvider {
|
|
45
|
+
async callEmbeddingApi(text: string): Promise<ProviderEmbeddingResponse> {
|
|
46
|
+
if (!this.apiKey) {
|
|
47
|
+
throw new Error('Azure OpenAI API key must be set for similarity comparison');
|
|
48
|
+
}
|
|
49
|
+
if (!this.apiHost) {
|
|
50
|
+
throw new Error('Azure OpenAI API host must be set');
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const body = {
|
|
54
|
+
input: text,
|
|
55
|
+
model: this.deploymentName,
|
|
56
|
+
};
|
|
57
|
+
let data,
|
|
58
|
+
cached = false;
|
|
59
|
+
try {
|
|
60
|
+
({ data, cached } = (await fetchJsonWithCache(
|
|
61
|
+
`https://${this.apiHost}/openai/deployments/${this.deploymentName}/embeddings?api-version=2023-07-01-preview`,
|
|
62
|
+
{
|
|
63
|
+
method: 'POST',
|
|
64
|
+
headers: {
|
|
65
|
+
'Content-Type': 'application/json',
|
|
66
|
+
'api-key': this.apiKey,
|
|
67
|
+
},
|
|
68
|
+
body: JSON.stringify(body),
|
|
69
|
+
},
|
|
70
|
+
REQUEST_TIMEOUT_MS,
|
|
71
|
+
)) as unknown as any);
|
|
72
|
+
} catch (err) {
|
|
73
|
+
return {
|
|
74
|
+
error: `API call error: ${String(err)}`,
|
|
75
|
+
tokenUsage: {
|
|
76
|
+
total: 0,
|
|
77
|
+
prompt: 0,
|
|
78
|
+
completion: 0,
|
|
79
|
+
},
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
logger.debug(`\tAzure OpenAI API response (embeddings): ${JSON.stringify(data)}`);
|
|
83
|
+
|
|
84
|
+
try {
|
|
85
|
+
const embedding = data?.data?.[0]?.embedding;
|
|
86
|
+
if (!embedding) {
|
|
87
|
+
throw new Error('No embedding returned');
|
|
88
|
+
}
|
|
89
|
+
const ret = {
|
|
90
|
+
embedding,
|
|
91
|
+
tokenUsage: cached
|
|
92
|
+
? { cached: data.usage.total_tokens }
|
|
93
|
+
: {
|
|
94
|
+
total: data.usage.total_tokens,
|
|
95
|
+
prompt: data.usage.prompt_tokens,
|
|
96
|
+
completion: data.usage.completion_tokens,
|
|
97
|
+
},
|
|
98
|
+
};
|
|
99
|
+
return ret;
|
|
100
|
+
} catch (err) {
|
|
101
|
+
return {
|
|
102
|
+
error: `API response error: ${String(err)}: ${JSON.stringify(data)}`,
|
|
103
|
+
tokenUsage: {
|
|
104
|
+
total: data?.usage?.total_tokens,
|
|
105
|
+
prompt: data?.usage?.prompt_tokens,
|
|
106
|
+
completion: data?.usage?.completion_tokens,
|
|
107
|
+
},
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export class AzureOpenAiCompletionProvider extends AzureOpenAiGenericProvider {
|
|
114
|
+
options: AzureOpenAiCompletionOptions;
|
|
115
|
+
|
|
116
|
+
constructor(deploymentName: string, apiKey?: string, context?: AzureOpenAiCompletionOptions) {
|
|
117
|
+
super(deploymentName, apiKey);
|
|
118
|
+
this.options = context || {};
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
async callApi(prompt: string, options?: AzureOpenAiCompletionOptions): Promise<ProviderResponse> {
|
|
122
|
+
if (!this.apiKey) {
|
|
123
|
+
throw new Error(
|
|
124
|
+
'Azure OpenAI API key is not set. Set AZURE_OPENAI_API_KEY environment variable or pass it as an argument to the constructor.',
|
|
125
|
+
);
|
|
126
|
+
}
|
|
127
|
+
if (!this.apiHost) {
|
|
128
|
+
throw new Error('Azure OpenAI API host must be set');
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
let stop: string;
|
|
132
|
+
try {
|
|
133
|
+
stop = process.env.OPENAI_STOP
|
|
134
|
+
? JSON.parse(process.env.OPENAI_STOP)
|
|
135
|
+
: ['<|im_end|>', '<|endoftext|>'];
|
|
136
|
+
} catch (err) {
|
|
137
|
+
throw new Error(`OPENAI_STOP is not a valid JSON string: ${err}`);
|
|
138
|
+
}
|
|
139
|
+
const body = {
|
|
140
|
+
model: this.deploymentName,
|
|
141
|
+
prompt,
|
|
142
|
+
max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
|
|
143
|
+
temperature:
|
|
144
|
+
options?.temperature ??
|
|
145
|
+
this.options.temperature ??
|
|
146
|
+
parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
|
|
147
|
+
stop,
|
|
148
|
+
};
|
|
149
|
+
logger.debug(`Calling Azure OpenAI API: ${JSON.stringify(body)}`);
|
|
150
|
+
let data,
|
|
151
|
+
cached = false;
|
|
152
|
+
try {
|
|
153
|
+
({ data, cached } = (await fetchJsonWithCache(
|
|
154
|
+
`https://${this.apiHost}/openai/deployments/${this.deploymentName}/completions?api-version=2023-07-01-preview`,
|
|
155
|
+
{
|
|
156
|
+
method: 'POST',
|
|
157
|
+
headers: {
|
|
158
|
+
'Content-Type': 'application/json',
|
|
159
|
+
'api-key': this.apiKey,
|
|
160
|
+
},
|
|
161
|
+
body: JSON.stringify(body),
|
|
162
|
+
},
|
|
163
|
+
REQUEST_TIMEOUT_MS,
|
|
164
|
+
)) as unknown as any);
|
|
165
|
+
} catch (err) {
|
|
166
|
+
return {
|
|
167
|
+
error: `API call error: ${String(err)}`,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
logger.debug(`\tAzure OpenAI API response: ${JSON.stringify(data)}`);
|
|
171
|
+
try {
|
|
172
|
+
return {
|
|
173
|
+
output: data.choices[0].text,
|
|
174
|
+
tokenUsage: cached
|
|
175
|
+
? { cached: data.usage.total_tokens }
|
|
176
|
+
: {
|
|
177
|
+
total: data.usage.total_tokens,
|
|
178
|
+
prompt: data.usage.prompt_tokens,
|
|
179
|
+
completion: data.usage.completion_tokens,
|
|
180
|
+
},
|
|
181
|
+
};
|
|
182
|
+
} catch (err) {
|
|
183
|
+
return {
|
|
184
|
+
error: `API response error: ${String(err)}: ${JSON.stringify(data)}`,
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
export class AzureOpenAiChatCompletionProvider extends AzureOpenAiGenericProvider {
|
|
191
|
+
options: AzureOpenAiCompletionOptions;
|
|
192
|
+
|
|
193
|
+
constructor(deploymentName: string, apiKey?: string, context?: AzureOpenAiCompletionOptions) {
|
|
194
|
+
super(deploymentName, apiKey);
|
|
195
|
+
this.options = context || {};
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
async callApi(prompt: string, options?: AzureOpenAiCompletionOptions): Promise<ProviderResponse> {
|
|
199
|
+
if (!this.apiKey) {
|
|
200
|
+
throw new Error(
|
|
201
|
+
'Azure OpenAI API key is not set. Set AZURE_OPENAI_API_KEY environment variable or pass it as an argument to the constructor.',
|
|
202
|
+
);
|
|
203
|
+
}
|
|
204
|
+
if (!this.apiHost) {
|
|
205
|
+
throw new Error('Azure OpenAI API host must be set');
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
const messages = parseChatPrompt(prompt);
|
|
209
|
+
const body = {
|
|
210
|
+
model: this.deploymentName,
|
|
211
|
+
messages: messages,
|
|
212
|
+
max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
|
|
213
|
+
temperature:
|
|
214
|
+
options?.temperature ??
|
|
215
|
+
this.options.temperature ??
|
|
216
|
+
parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
|
|
217
|
+
functions: options?.functions || this.options.functions || undefined,
|
|
218
|
+
function_call: options?.function_call || this.options.function_call || undefined,
|
|
219
|
+
};
|
|
220
|
+
logger.debug(`Calling Azure OpenAI API: ${JSON.stringify(body)}`);
|
|
221
|
+
|
|
222
|
+
let data,
|
|
223
|
+
cached = false;
|
|
224
|
+
try {
|
|
225
|
+
({ data, cached } = (await fetchJsonWithCache(
|
|
226
|
+
`https://${this.apiHost}/openai/deployments/${this.deploymentName}/chat/completions?api-version=2023-07-01-preview`,
|
|
227
|
+
{
|
|
228
|
+
method: 'POST',
|
|
229
|
+
headers: {
|
|
230
|
+
'Content-Type': 'application/json',
|
|
231
|
+
'api-key': this.apiKey,
|
|
232
|
+
},
|
|
233
|
+
body: JSON.stringify(body),
|
|
234
|
+
},
|
|
235
|
+
REQUEST_TIMEOUT_MS,
|
|
236
|
+
)) as unknown as any);
|
|
237
|
+
} catch (err) {
|
|
238
|
+
return {
|
|
239
|
+
error: `API call error: ${String(err)}`,
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
logger.debug(`\tAzure OpenAI API response: ${JSON.stringify(data)}`);
|
|
244
|
+
try {
|
|
245
|
+
const message = data.choices[0].message;
|
|
246
|
+
const output =
|
|
247
|
+
message.content === null ? JSON.stringify(message.function_call) : message.content;
|
|
248
|
+
return {
|
|
249
|
+
output,
|
|
250
|
+
tokenUsage: cached
|
|
251
|
+
? { cached: data.usage.total_tokens }
|
|
252
|
+
: {
|
|
253
|
+
total: data.usage.total_tokens,
|
|
254
|
+
prompt: data.usage.prompt_tokens,
|
|
255
|
+
completion: data.usage.completion_tokens,
|
|
256
|
+
},
|
|
257
|
+
};
|
|
258
|
+
} catch (err) {
|
|
259
|
+
return {
|
|
260
|
+
error: `API response error: ${String(err)}: ${JSON.stringify(data)}`,
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|