promptfoo 0.17.4 → 0.17.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +1 -0
  2. package/dist/package.json +3 -2
  3. package/dist/src/assertions.d.ts.map +1 -1
  4. package/dist/src/assertions.js +14 -2
  5. package/dist/src/assertions.js.map +1 -1
  6. package/dist/src/cache.d.ts +3 -0
  7. package/dist/src/cache.d.ts.map +1 -1
  8. package/dist/src/cache.js +6 -1
  9. package/dist/src/cache.js.map +1 -1
  10. package/dist/src/evaluator.d.ts.map +1 -1
  11. package/dist/src/evaluator.js +21 -17
  12. package/dist/src/evaluator.js.map +1 -1
  13. package/dist/src/index.d.ts +1 -0
  14. package/dist/src/index.d.ts.map +1 -1
  15. package/dist/src/main.js +66 -36
  16. package/dist/src/main.js.map +1 -1
  17. package/dist/src/providers/anthropic.js.map +1 -1
  18. package/dist/src/providers/azureopenai.d.ts +34 -0
  19. package/dist/src/providers/azureopenai.d.ts.map +1 -0
  20. package/dist/src/providers/azureopenai.js +222 -0
  21. package/dist/src/providers/azureopenai.js.map +1 -0
  22. package/dist/src/providers/localai.d.ts.map +1 -1
  23. package/dist/src/providers/localai.js +2 -1
  24. package/dist/src/providers/localai.js.map +1 -1
  25. package/dist/src/providers/openai.d.ts.map +1 -1
  26. package/dist/src/providers/openai.js +1 -13
  27. package/dist/src/providers/openai.js.map +1 -1
  28. package/dist/src/providers/replicate.d.ts +11 -0
  29. package/dist/src/providers/replicate.d.ts.map +1 -0
  30. package/dist/src/providers/replicate.js +78 -0
  31. package/dist/src/providers/replicate.js.map +1 -0
  32. package/dist/src/providers/shared.d.ts +5 -0
  33. package/dist/src/providers/shared.d.ts.map +1 -1
  34. package/dist/src/providers/shared.js +33 -1
  35. package/dist/src/providers/shared.js.map +1 -1
  36. package/dist/src/providers.d.ts +2 -0
  37. package/dist/src/providers.d.ts.map +1 -1
  38. package/dist/src/providers.js +24 -0
  39. package/dist/src/providers.js.map +1 -1
  40. package/dist/src/types.d.ts +4 -1
  41. package/dist/src/types.d.ts.map +1 -1
  42. package/dist/src/updates.d.ts.map +1 -1
  43. package/dist/src/updates.js +3 -0
  44. package/dist/src/updates.js.map +1 -1
  45. package/dist/src/web/client/assets/{index-58a0e3e3.js → index-13198388.js} +23 -23
  46. package/dist/src/web/client/assets/index-f9b230d1.css +1 -0
  47. package/dist/src/web/client/index.html +2 -2
  48. package/package.json +3 -2
  49. package/src/assertions.ts +18 -2
  50. package/src/cache.ts +5 -1
  51. package/src/evaluator.ts +23 -17
  52. package/src/main.ts +87 -38
  53. package/src/providers/anthropic.ts +1 -1
  54. package/src/providers/azureopenai.ts +264 -0
  55. package/src/providers/localai.ts +3 -2
  56. package/src/providers/openai.ts +3 -18
  57. package/src/providers/replicate.ts +86 -0
  58. package/src/providers/shared.ts +29 -0
  59. package/src/providers.ts +27 -0
  60. package/src/types.ts +6 -0
  61. package/src/updates.ts +4 -0
  62. package/src/web/client/src/App.tsx +6 -0
  63. package/src/web/client/src/EvalOutputPromptDialog.tsx +6 -2
  64. package/src/web/client/src/ResultsTable.tsx +5 -0
  65. package/src/web/client/src/ResultsView.tsx +14 -11
  66. package/src/web/client/src/index.css +1 -12
  67. package/src/web/client/src/types.ts +1 -1
  68. package/dist/src/web/client/assets/index-b82d0138.css +0 -1
@@ -0,0 +1 @@
1
+ :root{font-family:system-ui,Avenir,Helvetica,Arial,sans-serif;font-synthesis:none;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;-webkit-text-size-adjust:100%;--background-color: #ffffff;--text-color: #404040;--border-color: lightgray;--table-border-color: lightgray;--pass-color: green;--fail-color: #ad0000;--smalltext-color: gray;--success-background-color: #d1ffd7;--variable-background-color: #f7f7f7;--header-background-color: #fffdf7}[data-theme=dark]{--background-color: #1a1a1a;--text-color: #f0f0f0;--border-color: #444444;--table-border-color: #444444;--pass-color: #4caf50;--fail-color: #f44336;--smalltext-color: #888888;--success-background-color: #216d2b;--variable-background-color: #333;--header-background-color: #333}html{font-size:calc(14px + (18 - 14) * ((100vw - 300px) / (1600 - 300)))}*{box-sizing:border-box}html{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol;font-size:16px;background-color:var(--background-color);color:var(--text-color)}table,.divTable{border:1px solid var(--table-border-color);border-collapse:collapse;width:100%;margin:1rem 0;box-shadow:0 2px 4px #0000001a}.tr{display:flex}tr,.tr{width:fit-content}tr:hover,.tr:hover{background-color:#0000000d}th,.th,td,.td{position:relative;box-shadow:inset 0 0 0 1px var(--border-color);vertical-align:top;padding:1.5rem}th.variable,.th.variable,td.variable,.td.variable{background-color:var(--variable-background-color)}tr.header{background-color:var(--header-background-color)}th,.th{padding:1rem;position:relative;text-align:center;vertical-align:bottom}th .action{cursor:pointer;margin-left:.5rem}tr .cell-actions{display:flex;gap:.5rem;visibility:hidden;position:absolute;bottom:1.25rem;right:0;line-height:0;font-size:1.75rem}tr:hover .cell-actions{visibility:visible}tr .cell-actions .action{cursor:pointer}th .smalltext{visibility:hidden;font-weight:400;font-size:.75rem;color:var(--smalltext-color)}th:hover .smalltext{visibility:visible}th .summary{font-weight:400;font-size:.8rem;padding:.25rem}th .summary.highlight{background-color:var(--success-background-color)}td .status{margin-bottom:.5rem;font-weight:700}td .score{font-weight:400}td .pass{color:var(--pass-color)}td .fail{color:var(--fail-color)}.first-prompt-col{border-left:2px solid #888}.first-prompt-row{border-top:2px solid #888}.resizer{position:absolute;right:0;top:0;height:100%;width:5px;cursor:col-resize;user-select:none;touch-action:none;background:var(--text-color);opacity:.5}.resizer.isResizing{background:var(--text-color);opacity:1}@media (hover: hover){.resizer{opacity:0}*:hover>.resizer{opacity:1}}.logo{display:flex;align-items:center;gap:4px}.logo img{width:30px}.logo span{margin-bottom:6px;color:var(--text-color)}[data-theme=dark] .logo img{filter:invert(1)}nav{display:flex;justify-content:space-between;align-items:center;margin-bottom:1rem;color:var(--text-color)}.dark-mode-toggle{background-color:transparent;border:none;color:var(--text-color);cursor:pointer;font-size:16px;padding:8px;transition:color .3s}.dark-mode-toggle:hover{color:var(--pass-color)}body{background-color:var(--background-color);color:var(--text-color)}
@@ -5,8 +5,8 @@
5
5
  <link rel="icon" type="image/svg+xml" href="favicon.ico" />
6
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
7
  <title>promptfoo web viewer</title>
8
- <script type="module" crossorigin src="/assets/index-58a0e3e3.js"></script>
9
- <link rel="stylesheet" href="/assets/index-b82d0138.css">
8
+ <script type="module" crossorigin src="/assets/index-13198388.js"></script>
9
+ <link rel="stylesheet" href="/assets/index-f9b230d1.css">
10
10
  </head>
11
11
  <body>
12
12
  <div id="root"></div>
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "promptfoo",
3
3
  "description": "LLM eval & testing toolkit",
4
4
  "author": "Ian Webster",
5
- "version": "0.17.4",
5
+ "version": "0.17.6",
6
6
  "license": "MIT",
7
7
  "type": "commonjs",
8
8
  "main": "dist/src/index.js",
@@ -25,7 +25,7 @@
25
25
  "promptfoo": "dist/src/main.js"
26
26
  },
27
27
  "scripts": {
28
- "local": "ts-node --esm src/main.ts",
28
+ "local": "ts-node --esm --files src/main.ts",
29
29
  "install:client": "cd src/web/client && npm install",
30
30
  "build:clean": "rm -rf dist",
31
31
  "build:client": "cd src/web/client && npm run build && cp -r dist/ ../../../dist/src/web/client",
@@ -79,6 +79,7 @@
79
79
  "node-fetch": "^2.6.7",
80
80
  "nunjucks": "^3.2.4",
81
81
  "opener": "^1.5.2",
82
+ "replicate": "^0.12.3",
82
83
  "rouge": "^1.0.3",
83
84
  "semver": "^7.5.3",
84
85
  "socket.io": "^4.6.1",
package/src/assertions.ts CHANGED
@@ -207,6 +207,22 @@ export async function runAssertion(
207
207
  };
208
208
  }
209
209
 
210
+ if (baseType === 'starts-with') {
211
+ invariant(assertion.value, '"starts-with" assertion type must have a string value');
212
+ invariant(
213
+ typeof assertion.value === 'string',
214
+ '"starts-with" assertion type must have a string value',
215
+ );
216
+ pass = output.startsWith(String(assertion.value)) !== inverse;
217
+ return {
218
+ pass,
219
+ score: pass ? 1 : 0,
220
+ reason: pass
221
+ ? 'Assertion passed'
222
+ : `Expected output to ${inverse ? 'not ' : ''}start with "${assertion.value}"`,
223
+ };
224
+ }
225
+
210
226
  if (baseType === 'contains-json') {
211
227
  pass = containsJSON(output) !== inverse;
212
228
  return {
@@ -480,7 +496,7 @@ export function assertionFromString(expected: string): Assertion {
480
496
 
481
497
  // New options
482
498
  const assertionRegex =
483
- /^(not-)?(equals|contains-any|contains-all|contains-json|is-json|regex|icontains|contains|webhook|rouge-n|similar)(?::|\((\d+(\.\d+)?)\):)?(.*)$/;
499
+ /^(not-)?(equals|contains-any|contains-all|contains-json|is-json|regex|icontains|contains|webhook|rouge-n|similar|starts-with)(?::|\((\d+(\.\d+)?)\):)?(.*)$/;
484
500
  const regexMatch = expected.match(assertionRegex);
485
501
 
486
502
  if (regexMatch) {
@@ -497,7 +513,7 @@ export function assertionFromString(expected: string): Assertion {
497
513
  return {
498
514
  type: fullType as AssertionType,
499
515
  };
500
- } else if (type === 'rouge-n' || type === 'similar') {
516
+ } else if (type === 'rouge-n' || type === 'similar' || type === 'starts-with') {
501
517
  return {
502
518
  type: fullType as AssertionType,
503
519
  value,
package/src/cache.ts CHANGED
@@ -20,7 +20,7 @@ let enabled =
20
20
  const cacheType =
21
21
  process.env.PROMPTFOO_CACHE_TYPE || (process.env.NODE_ENV === 'test' ? 'memory' : 'disk');
22
22
 
23
- function getCache() {
23
+ export function getCache() {
24
24
  if (!cacheInstance) {
25
25
  const cachePath =
26
26
  process.env.PROMPTFOO_CACHE_PATH || path.join(getConfigDirectoryPath(), 'cache');
@@ -102,3 +102,7 @@ export async function clearCache() {
102
102
  logger.info('Clearing cache...');
103
103
  return getCache().reset();
104
104
  }
105
+
106
+ export function isCacheEnabled() {
107
+ return enabled;
108
+ }
package/src/evaluator.ts CHANGED
@@ -33,6 +33,7 @@ interface RunEvalOptions {
33
33
 
34
34
  rowIndex: number;
35
35
  colIndex: number;
36
+ repeatIndex: number;
36
37
  }
37
38
 
38
39
  const DEFAULT_MAX_CONCURRENCY = 4;
@@ -266,25 +267,30 @@ class Evaluator {
266
267
  // Finalize test case eval
267
268
  const varCombinations = generateVarCombinations(testCase.vars || {});
268
269
  totalVarCombinations += varCombinations.length;
269
- for (const vars of varCombinations) {
270
- let colIndex = 0;
271
- for (const prompt of testSuite.prompts) {
272
- for (const provider of testSuite.providers) {
273
- runEvalOptions.push({
274
- provider,
275
- prompt: {
276
- ...prompt,
277
- raw: prependToPrompt + prompt.raw + appendToPrompt,
278
- },
279
- test: { ...testCase, vars },
280
- includeProviderId: testSuite.providers.length > 1,
281
- rowIndex,
282
- colIndex,
283
- });
284
- colIndex++;
270
+
271
+ const numRepeat = this.options.repeat || 1;
272
+ for (let repeatIndex = 0; repeatIndex < numRepeat; repeatIndex++) {
273
+ for (const vars of varCombinations) {
274
+ let colIndex = 0;
275
+ for (const prompt of testSuite.prompts) {
276
+ for (const provider of testSuite.providers) {
277
+ runEvalOptions.push({
278
+ provider,
279
+ prompt: {
280
+ ...prompt,
281
+ raw: prependToPrompt + prompt.raw + appendToPrompt,
282
+ },
283
+ test: { ...testCase, vars },
284
+ includeProviderId: testSuite.providers.length > 1,
285
+ rowIndex,
286
+ colIndex,
287
+ repeatIndex,
288
+ });
289
+ colIndex++;
290
+ }
285
291
  }
292
+ rowIndex++;
286
293
  }
287
- rowIndex++;
288
294
  }
289
295
  }
290
296
 
package/src/main.ts CHANGED
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
3
3
  import { join as pathJoin, dirname } from 'path';
4
+ import readline from 'readline';
4
5
 
5
6
  import chalk from 'chalk';
6
7
  import { Command } from 'commander';
@@ -56,7 +57,11 @@ function createDummyFiles(directory: string | null) {
56
57
  writeFileSync(pathJoin(process.cwd(), directory, 'README.md'), DEFAULT_README);
57
58
 
58
59
  if (directory === '.') {
59
- logger.info(chalk.green.bold('Wrote prompts.txt and promptfooconfig.yaml. Open README.md to get started!'));
60
+ logger.info(
61
+ chalk.green.bold(
62
+ 'Wrote prompts.txt and promptfooconfig.yaml. Open README.md to get started!',
63
+ ),
64
+ );
60
65
  } else {
61
66
  logger.info(chalk.green.bold(`Wrote prompts.txt and promptfooconfig.yaml to ./${directory}`));
62
67
  logger.info(chalk.green(`\`cd ${directory}\` and open README.md to get started!`));
@@ -72,20 +77,20 @@ async function main() {
72
77
  pathJoin(pwd, 'promptfooconfig.json'),
73
78
  pathJoin(pwd, 'promptfooconfig.yaml'),
74
79
  ];
75
- let config: Partial<UnifiedConfig> = {};
80
+ let defaultConfig: Partial<UnifiedConfig> = {};
76
81
  for (const path of potentialPaths) {
77
82
  const maybeConfig = await maybeReadConfig(path);
78
83
  if (maybeConfig) {
79
- config = maybeConfig;
84
+ defaultConfig = maybeConfig;
80
85
  break;
81
86
  }
82
87
  }
83
88
 
84
89
  let evaluateOptions: EvaluateOptions = {};
85
- if (config.evaluateOptions) {
86
- evaluateOptions.generateSuggestions = config.evaluateOptions.generateSuggestions;
87
- evaluateOptions.maxConcurrency = config.evaluateOptions.maxConcurrency;
88
- evaluateOptions.showProgressBar = config.evaluateOptions.showProgressBar;
90
+ if (defaultConfig.evaluateOptions) {
91
+ evaluateOptions.generateSuggestions = defaultConfig.evaluateOptions.generateSuggestions;
92
+ evaluateOptions.maxConcurrency = defaultConfig.evaluateOptions.maxConcurrency;
93
+ evaluateOptions.showProgressBar = defaultConfig.evaluateOptions.showProgressBar;
89
94
  }
90
95
 
91
96
  const program = new Command();
@@ -125,21 +130,46 @@ async function main() {
125
130
 
126
131
  program
127
132
  .command('share')
128
- .description('Share your most recent result')
129
- .action(async (cmdObj: { port: number } & Command) => {
133
+ .description('Create a shareable URL of your most recent eval')
134
+ .option('-y, --yes', 'Skip confirmation')
135
+ .action(async (cmdObj: { yes: boolean } & Command) => {
130
136
  telemetry.maybeShowNotice();
131
137
  telemetry.record('command_used', {
132
138
  name: 'share',
133
139
  });
134
140
  await telemetry.send();
135
141
 
136
- const latestResults = readLatestResults();
137
- if (!latestResults) {
138
- logger.error('Could not load results. Do you need to run `promptfoo eval` first?');
139
- process.exit(1);
142
+ const createPublicUrl = async () => {
143
+ const latestResults = readLatestResults();
144
+ if (!latestResults) {
145
+ logger.error('Could not load results. Do you need to run `promptfoo eval` first?');
146
+ process.exit(1);
147
+ }
148
+ const url = await createShareableUrl(latestResults.results, latestResults.config);
149
+ logger.info(`View results: ${chalk.greenBright.bold(url)}`);
150
+ };
151
+
152
+ if (cmdObj.yes || process.env.PROMPTFOO_DISABLE_SHARE_WARNING) {
153
+ createPublicUrl();
154
+ } else {
155
+ const reader = readline.createInterface({
156
+ input: process.stdin,
157
+ output: process.stdout,
158
+ });
159
+
160
+ reader.question(
161
+ 'Are you sure you want to create a shareable URL of your most recent eval? Anyone you give this URL to will be able to view the results [Y/n] ',
162
+ async function (answer: string) {
163
+ if (answer.toLowerCase() !== 'yes' && answer.toLowerCase() !== 'y' && answer !== '') {
164
+ reader.close();
165
+ return;
166
+ }
167
+ reader.close();
168
+
169
+ createPublicUrl();
170
+ },
171
+ );
140
172
  }
141
- const url = await createShareableUrl(latestResults.results, latestResults.config);
142
- logger.info(`View results: ${chalk.greenBright.bold(url)}`);
143
173
  });
144
174
 
145
175
  program
@@ -159,28 +189,39 @@ async function main() {
159
189
  program
160
190
  .command('eval')
161
191
  .description('Evaluate prompts')
162
- .option('-p, --prompts <paths...>', 'Paths to prompt files (.txt)', config.prompts)
192
+ .option('-p, --prompts <paths...>', 'Paths to prompt files (.txt)')
163
193
  .option(
164
194
  '-r, --providers <name or path...>',
165
195
  'One of: openai:chat, openai:completion, openai:<model name>, or path to custom API caller module',
166
196
  )
167
197
  .option(
168
198
  '-c, --config <path>',
169
- 'Path to configuration file. Automatically loads promptfooconfig.js/json/yaml',
199
+ 'Path to configuration file. Automatically loads promptfoodefaultConfig.js/json/yaml',
170
200
  )
171
201
  .option(
172
202
  // TODO(ian): Remove `vars` for v1
173
203
  '-v, --vars, -t, --tests <path>',
174
204
  'Path to CSV with test cases',
175
- config?.commandLineOptions?.vars,
205
+ defaultConfig?.commandLineOptions?.vars,
206
+ )
207
+ .option('-t, --tests <path>', 'Path to CSV with test cases')
208
+ .option(
209
+ '-o, --output <path>',
210
+ 'Path to output file (csv, json, yaml, html)',
211
+ defaultConfig.outputPath,
176
212
  )
177
- .option('-t, --tests <path>', 'Path to CSV with test cases', config?.commandLineOptions?.tests)
178
- .option('-o, --output <path>', 'Path to output file (csv, json, yaml, html)', config.outputPath)
179
213
  .option(
180
214
  '-j, --max-concurrency <number>',
181
215
  'Maximum number of concurrent API calls',
182
- config.evaluateOptions?.maxConcurrency
183
- ? String(config.evaluateOptions.maxConcurrency)
216
+ defaultConfig.evaluateOptions?.maxConcurrency
217
+ ? String(defaultConfig.evaluateOptions.maxConcurrency)
218
+ : undefined,
219
+ )
220
+ .option(
221
+ '--repeat <number>',
222
+ 'Number of times to run each test',
223
+ defaultConfig.evaluateOptions?.repeat
224
+ ? String(defaultConfig.evaluateOptions.repeat)
184
225
  : undefined,
185
226
  )
186
227
  .option(
@@ -195,28 +236,28 @@ async function main() {
195
236
  .option(
196
237
  '--prompt-prefix <path>',
197
238
  'This prefix is prepended to every prompt',
198
- config.defaultTest?.options?.prefix,
239
+ defaultConfig.defaultTest?.options?.prefix,
199
240
  )
200
241
  .option(
201
242
  '--prompt-suffix <path>',
202
243
  'This suffix is append to every prompt',
203
- config.defaultTest?.options?.suffix,
244
+ defaultConfig.defaultTest?.options?.suffix,
204
245
  )
205
246
  .option(
206
247
  '--no-write',
207
248
  'Do not write results to promptfoo directory',
208
- config?.commandLineOptions?.write,
249
+ defaultConfig?.commandLineOptions?.write,
209
250
  )
210
251
  .option(
211
252
  '--no-cache',
212
253
  'Do not read or write results to disk cache',
213
- config?.commandLineOptions?.cache,
254
+ defaultConfig?.commandLineOptions?.cache,
214
255
  )
215
256
  .option('--no-progress-bar', 'Do not show progress bar')
216
- .option('--no-table', 'Do not output table in CLI', config?.commandLineOptions?.table)
217
- .option('--share', 'Create a shareable URL', config?.commandLineOptions?.share)
218
- .option('--grader', 'Model that will grade outputs', config?.commandLineOptions?.grader)
219
- .option('--verbose', 'Show debug logs', config?.commandLineOptions?.verbose)
257
+ .option('--no-table', 'Do not output table in CLI', defaultConfig?.commandLineOptions?.table)
258
+ .option('--share', 'Create a shareable URL', defaultConfig?.commandLineOptions?.share)
259
+ .option('--grader', 'Model that will grade outputs', defaultConfig?.commandLineOptions?.grader)
260
+ .option('--verbose', 'Show debug logs', defaultConfig?.commandLineOptions?.verbose)
220
261
  .option('--view [port]', 'View in browser ui')
221
262
  .action(async (cmdObj: CommandLineOptions & Command) => {
222
263
  // Misc settings
@@ -228,16 +269,20 @@ async function main() {
228
269
  }
229
270
 
230
271
  // Config parsing
231
- const maxConcurrency = parseInt(cmdObj.maxConcurrency || '', 10);
272
+ let fileConfig: Partial<UnifiedConfig> = {};
232
273
  const configPath = cmdObj.config;
233
274
  if (configPath) {
234
- config = await readConfig(configPath);
275
+ fileConfig = await readConfig(configPath);
235
276
  }
236
- config = {
237
- prompts: cmdObj.prompts || config.prompts,
238
- providers: cmdObj.providers || config.providers,
239
- tests: cmdObj.tests || cmdObj.vars || config.tests,
240
- defaultTest: config.defaultTest,
277
+ const config: Partial<UnifiedConfig> = {
278
+ prompts: cmdObj.prompts || fileConfig.prompts || defaultConfig.prompts,
279
+ providers: cmdObj.providers || fileConfig.providers || defaultConfig.providers,
280
+ tests: cmdObj.tests || cmdObj.vars || fileConfig.tests || defaultConfig.tests,
281
+ sharing:
282
+ process.env.PROMPTFOO_DISABLE_SHARING === '1'
283
+ ? false
284
+ : fileConfig.sharing ?? defaultConfig.sharing ?? true,
285
+ defaultTest: fileConfig.defaultTest,
241
286
  };
242
287
 
243
288
  // Validation
@@ -286,12 +331,15 @@ async function main() {
286
331
  defaultTest,
287
332
  };
288
333
 
334
+ const maxConcurrency = parseInt(cmdObj.maxConcurrency || '', 10);
335
+ const iterations = parseInt(cmdObj.repeat || '', 10);
289
336
  const options: EvaluateOptions = {
290
337
  showProgressBar:
291
338
  typeof cmdObj.progressBar === 'undefined'
292
339
  ? getLogLevel() !== 'debug'
293
340
  : cmdObj.progressBar,
294
341
  maxConcurrency: !isNaN(maxConcurrency) && maxConcurrency > 0 ? maxConcurrency : undefined,
342
+ repeat: !isNaN(iterations) && iterations > 0 ? iterations : 1,
295
343
  ...evaluateOptions,
296
344
  };
297
345
 
@@ -305,7 +353,8 @@ async function main() {
305
353
 
306
354
  const summary = await evaluate(testSuite, options);
307
355
 
308
- const shareableUrl = cmdObj.share ? await createShareableUrl(summary, config) : null;
356
+ const shareableUrl =
357
+ cmdObj.share && config.sharing ? await createShareableUrl(summary, config) : null;
309
358
 
310
359
  if (cmdObj.output) {
311
360
  logger.info(chalk.yellow(`Writing output to ${cmdObj.output}`));
@@ -25,7 +25,7 @@ export class AnthropicCompletionProvider implements ApiProvider {
25
25
  constructor(modelName: string, apiKey?: string, context?: AnthropicCompletionOptions) {
26
26
  this.modelName = modelName;
27
27
  this.apiKey = apiKey || process.env.ANTHROPIC_API_KEY;
28
- this.anthropic = new Anthropic({apiKey: this.apiKey});
28
+ this.anthropic = new Anthropic({ apiKey: this.apiKey });
29
29
  this.options = context || {};
30
30
  }
31
31
 
@@ -0,0 +1,264 @@
1
+ import logger from '../logger';
2
+ import { fetchJsonWithCache } from '../cache';
3
+ import { REQUEST_TIMEOUT_MS, parseChatPrompt } from './shared';
4
+
5
+ import type { ApiProvider, ProviderEmbeddingResponse, ProviderResponse } from '../types.js';
6
+
7
+ interface AzureOpenAiCompletionOptions {
8
+ temperature?: number;
9
+ functions?: {
10
+ name: string;
11
+ description?: string;
12
+ parameters: any;
13
+ }[];
14
+ function_call?: 'none' | 'auto';
15
+ }
16
+
17
+ class AzureOpenAiGenericProvider implements ApiProvider {
18
+ deploymentName: string;
19
+ apiKey?: string;
20
+ apiHost?: string;
21
+
22
+ constructor(deploymentName: string, apiKey?: string) {
23
+ this.deploymentName = deploymentName;
24
+
25
+ this.apiKey = apiKey || process.env.AZURE_OPENAI_API_KEY;
26
+
27
+ this.apiHost = process.env.AZURE_OPENAI_API_HOST;
28
+ }
29
+
30
+ id(): string {
31
+ return `azureopenai:${this.deploymentName}`;
32
+ }
33
+
34
+ toString(): string {
35
+ return `[Azure OpenAI Provider ${this.deploymentName}]`;
36
+ }
37
+
38
+ // @ts-ignore: Prompt is not used in this implementation
39
+ async callApi(prompt: string, options?: AzureOpenAiCompletionOptions): Promise<ProviderResponse> {
40
+ throw new Error('Not implemented');
41
+ }
42
+ }
43
+
44
+ export class AzureOpenAiEmbeddingProvider extends AzureOpenAiGenericProvider {
45
+ async callEmbeddingApi(text: string): Promise<ProviderEmbeddingResponse> {
46
+ if (!this.apiKey) {
47
+ throw new Error('Azure OpenAI API key must be set for similarity comparison');
48
+ }
49
+ if (!this.apiHost) {
50
+ throw new Error('Azure OpenAI API host must be set');
51
+ }
52
+
53
+ const body = {
54
+ input: text,
55
+ model: this.deploymentName,
56
+ };
57
+ let data,
58
+ cached = false;
59
+ try {
60
+ ({ data, cached } = (await fetchJsonWithCache(
61
+ `https://${this.apiHost}/openai/deployments/${this.deploymentName}/embeddings?api-version=2023-07-01-preview`,
62
+ {
63
+ method: 'POST',
64
+ headers: {
65
+ 'Content-Type': 'application/json',
66
+ 'api-key': this.apiKey,
67
+ },
68
+ body: JSON.stringify(body),
69
+ },
70
+ REQUEST_TIMEOUT_MS,
71
+ )) as unknown as any);
72
+ } catch (err) {
73
+ return {
74
+ error: `API call error: ${String(err)}`,
75
+ tokenUsage: {
76
+ total: 0,
77
+ prompt: 0,
78
+ completion: 0,
79
+ },
80
+ };
81
+ }
82
+ logger.debug(`\tAzure OpenAI API response (embeddings): ${JSON.stringify(data)}`);
83
+
84
+ try {
85
+ const embedding = data?.data?.[0]?.embedding;
86
+ if (!embedding) {
87
+ throw new Error('No embedding returned');
88
+ }
89
+ const ret = {
90
+ embedding,
91
+ tokenUsage: cached
92
+ ? { cached: data.usage.total_tokens }
93
+ : {
94
+ total: data.usage.total_tokens,
95
+ prompt: data.usage.prompt_tokens,
96
+ completion: data.usage.completion_tokens,
97
+ },
98
+ };
99
+ return ret;
100
+ } catch (err) {
101
+ return {
102
+ error: `API response error: ${String(err)}: ${JSON.stringify(data)}`,
103
+ tokenUsage: {
104
+ total: data?.usage?.total_tokens,
105
+ prompt: data?.usage?.prompt_tokens,
106
+ completion: data?.usage?.completion_tokens,
107
+ },
108
+ };
109
+ }
110
+ }
111
+ }
112
+
113
+ export class AzureOpenAiCompletionProvider extends AzureOpenAiGenericProvider {
114
+ options: AzureOpenAiCompletionOptions;
115
+
116
+ constructor(deploymentName: string, apiKey?: string, context?: AzureOpenAiCompletionOptions) {
117
+ super(deploymentName, apiKey);
118
+ this.options = context || {};
119
+ }
120
+
121
+ async callApi(prompt: string, options?: AzureOpenAiCompletionOptions): Promise<ProviderResponse> {
122
+ if (!this.apiKey) {
123
+ throw new Error(
124
+ 'Azure OpenAI API key is not set. Set AZURE_OPENAI_API_KEY environment variable or pass it as an argument to the constructor.',
125
+ );
126
+ }
127
+ if (!this.apiHost) {
128
+ throw new Error('Azure OpenAI API host must be set');
129
+ }
130
+
131
+ let stop: string;
132
+ try {
133
+ stop = process.env.OPENAI_STOP
134
+ ? JSON.parse(process.env.OPENAI_STOP)
135
+ : ['<|im_end|>', '<|endoftext|>'];
136
+ } catch (err) {
137
+ throw new Error(`OPENAI_STOP is not a valid JSON string: ${err}`);
138
+ }
139
+ const body = {
140
+ model: this.deploymentName,
141
+ prompt,
142
+ max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
143
+ temperature:
144
+ options?.temperature ??
145
+ this.options.temperature ??
146
+ parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
147
+ stop,
148
+ };
149
+ logger.debug(`Calling Azure OpenAI API: ${JSON.stringify(body)}`);
150
+ let data,
151
+ cached = false;
152
+ try {
153
+ ({ data, cached } = (await fetchJsonWithCache(
154
+ `https://${this.apiHost}/openai/deployments/${this.deploymentName}/completions?api-version=2023-07-01-preview`,
155
+ {
156
+ method: 'POST',
157
+ headers: {
158
+ 'Content-Type': 'application/json',
159
+ 'api-key': this.apiKey,
160
+ },
161
+ body: JSON.stringify(body),
162
+ },
163
+ REQUEST_TIMEOUT_MS,
164
+ )) as unknown as any);
165
+ } catch (err) {
166
+ return {
167
+ error: `API call error: ${String(err)}`,
168
+ };
169
+ }
170
+ logger.debug(`\tAzure OpenAI API response: ${JSON.stringify(data)}`);
171
+ try {
172
+ return {
173
+ output: data.choices[0].text,
174
+ tokenUsage: cached
175
+ ? { cached: data.usage.total_tokens }
176
+ : {
177
+ total: data.usage.total_tokens,
178
+ prompt: data.usage.prompt_tokens,
179
+ completion: data.usage.completion_tokens,
180
+ },
181
+ };
182
+ } catch (err) {
183
+ return {
184
+ error: `API response error: ${String(err)}: ${JSON.stringify(data)}`,
185
+ };
186
+ }
187
+ }
188
+ }
189
+
190
+ export class AzureOpenAiChatCompletionProvider extends AzureOpenAiGenericProvider {
191
+ options: AzureOpenAiCompletionOptions;
192
+
193
+ constructor(deploymentName: string, apiKey?: string, context?: AzureOpenAiCompletionOptions) {
194
+ super(deploymentName, apiKey);
195
+ this.options = context || {};
196
+ }
197
+
198
+ async callApi(prompt: string, options?: AzureOpenAiCompletionOptions): Promise<ProviderResponse> {
199
+ if (!this.apiKey) {
200
+ throw new Error(
201
+ 'Azure OpenAI API key is not set. Set AZURE_OPENAI_API_KEY environment variable or pass it as an argument to the constructor.',
202
+ );
203
+ }
204
+ if (!this.apiHost) {
205
+ throw new Error('Azure OpenAI API host must be set');
206
+ }
207
+
208
+ const messages = parseChatPrompt(prompt);
209
+ const body = {
210
+ model: this.deploymentName,
211
+ messages: messages,
212
+ max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
213
+ temperature:
214
+ options?.temperature ??
215
+ this.options.temperature ??
216
+ parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
217
+ functions: options?.functions || this.options.functions || undefined,
218
+ function_call: options?.function_call || this.options.function_call || undefined,
219
+ };
220
+ logger.debug(`Calling Azure OpenAI API: ${JSON.stringify(body)}`);
221
+
222
+ let data,
223
+ cached = false;
224
+ try {
225
+ ({ data, cached } = (await fetchJsonWithCache(
226
+ `https://${this.apiHost}/openai/deployments/${this.deploymentName}/chat/completions?api-version=2023-07-01-preview`,
227
+ {
228
+ method: 'POST',
229
+ headers: {
230
+ 'Content-Type': 'application/json',
231
+ 'api-key': this.apiKey,
232
+ },
233
+ body: JSON.stringify(body),
234
+ },
235
+ REQUEST_TIMEOUT_MS,
236
+ )) as unknown as any);
237
+ } catch (err) {
238
+ return {
239
+ error: `API call error: ${String(err)}`,
240
+ };
241
+ }
242
+
243
+ logger.debug(`\tAzure OpenAI API response: ${JSON.stringify(data)}`);
244
+ try {
245
+ const message = data.choices[0].message;
246
+ const output =
247
+ message.content === null ? JSON.stringify(message.function_call) : message.content;
248
+ return {
249
+ output,
250
+ tokenUsage: cached
251
+ ? { cached: data.usage.total_tokens }
252
+ : {
253
+ total: data.usage.total_tokens,
254
+ prompt: data.usage.prompt_tokens,
255
+ completion: data.usage.completion_tokens,
256
+ },
257
+ };
258
+ } catch (err) {
259
+ return {
260
+ error: `API response error: ${String(err)}: ${JSON.stringify(data)}`,
261
+ };
262
+ }
263
+ }
264
+ }