promptfoo 0.17.5 → 0.17.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/package.json +3 -2
  2. package/dist/src/assertions.js +2 -2
  3. package/dist/src/assertions.js.map +1 -1
  4. package/dist/src/cache.d.ts +3 -0
  5. package/dist/src/cache.d.ts.map +1 -1
  6. package/dist/src/cache.js +6 -1
  7. package/dist/src/cache.js.map +1 -1
  8. package/dist/src/evaluator.d.ts.map +1 -1
  9. package/dist/src/evaluator.js +35 -17
  10. package/dist/src/evaluator.js.map +1 -1
  11. package/dist/src/index.d.ts +1 -0
  12. package/dist/src/index.d.ts.map +1 -1
  13. package/dist/src/main.js +11 -5
  14. package/dist/src/main.js.map +1 -1
  15. package/dist/src/providers/azureopenai.d.ts.map +1 -1
  16. package/dist/src/providers/azureopenai.js +1 -13
  17. package/dist/src/providers/azureopenai.js.map +1 -1
  18. package/dist/src/providers/localai.d.ts.map +1 -1
  19. package/dist/src/providers/localai.js +2 -1
  20. package/dist/src/providers/localai.js.map +1 -1
  21. package/dist/src/providers/openai.d.ts +1 -0
  22. package/dist/src/providers/openai.d.ts.map +1 -1
  23. package/dist/src/providers/openai.js +3 -29
  24. package/dist/src/providers/openai.js.map +1 -1
  25. package/dist/src/providers/replicate.d.ts +18 -0
  26. package/dist/src/providers/replicate.d.ts.map +1 -0
  27. package/dist/src/providers/replicate.js +80 -0
  28. package/dist/src/providers/replicate.js.map +1 -0
  29. package/dist/src/providers/shared.d.ts +5 -0
  30. package/dist/src/providers/shared.d.ts.map +1 -1
  31. package/dist/src/providers/shared.js +33 -1
  32. package/dist/src/providers/shared.js.map +1 -1
  33. package/dist/src/providers.d.ts +2 -0
  34. package/dist/src/providers.d.ts.map +1 -1
  35. package/dist/src/providers.js +8 -0
  36. package/dist/src/providers.js.map +1 -1
  37. package/dist/src/types.d.ts +4 -0
  38. package/dist/src/types.d.ts.map +1 -1
  39. package/dist/src/util.d.ts +2 -1
  40. package/dist/src/util.d.ts.map +1 -1
  41. package/dist/src/util.js +20 -1
  42. package/dist/src/util.js.map +1 -1
  43. package/dist/src/web/client/assets/{index-c2756e5d.js → index-13198388.js} +23 -23
  44. package/dist/src/web/client/assets/index-f9b230d1.css +1 -0
  45. package/dist/src/web/client/index.html +2 -2
  46. package/package.json +3 -2
  47. package/src/assertions.ts +2 -2
  48. package/src/cache.ts +5 -1
  49. package/src/evaluator.ts +37 -17
  50. package/src/main.ts +16 -5
  51. package/src/providers/azureopenai.ts +2 -18
  52. package/src/providers/localai.ts +3 -2
  53. package/src/providers/openai.ts +5 -35
  54. package/src/providers/replicate.ts +95 -0
  55. package/src/providers/shared.ts +29 -0
  56. package/src/providers.ts +8 -0
  57. package/src/types.ts +7 -0
  58. package/src/util.ts +25 -0
  59. package/src/web/client/src/App.tsx +6 -0
  60. package/src/web/client/src/EvalOutputPromptDialog.tsx +6 -2
  61. package/src/web/client/src/ResultsTable.tsx +5 -0
  62. package/src/web/client/src/ResultsView.tsx +2 -1
  63. package/src/web/client/src/index.css +1 -12
  64. package/src/web/client/src/types.ts +1 -1
  65. package/dist/src/web/client/assets/index-b82d0138.css +0 -1
@@ -0,0 +1 @@
1
+ :root{font-family:system-ui,Avenir,Helvetica,Arial,sans-serif;font-synthesis:none;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;-webkit-text-size-adjust:100%;--background-color: #ffffff;--text-color: #404040;--border-color: lightgray;--table-border-color: lightgray;--pass-color: green;--fail-color: #ad0000;--smalltext-color: gray;--success-background-color: #d1ffd7;--variable-background-color: #f7f7f7;--header-background-color: #fffdf7}[data-theme=dark]{--background-color: #1a1a1a;--text-color: #f0f0f0;--border-color: #444444;--table-border-color: #444444;--pass-color: #4caf50;--fail-color: #f44336;--smalltext-color: #888888;--success-background-color: #216d2b;--variable-background-color: #333;--header-background-color: #333}html{font-size:calc(14px + (18 - 14) * ((100vw - 300px) / (1600 - 300)))}*{box-sizing:border-box}html{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol;font-size:16px;background-color:var(--background-color);color:var(--text-color)}table,.divTable{border:1px solid var(--table-border-color);border-collapse:collapse;width:100%;margin:1rem 0;box-shadow:0 2px 4px #0000001a}.tr{display:flex}tr,.tr{width:fit-content}tr:hover,.tr:hover{background-color:#0000000d}th,.th,td,.td{position:relative;box-shadow:inset 0 0 0 1px var(--border-color);vertical-align:top;padding:1.5rem}th.variable,.th.variable,td.variable,.td.variable{background-color:var(--variable-background-color)}tr.header{background-color:var(--header-background-color)}th,.th{padding:1rem;position:relative;text-align:center;vertical-align:bottom}th .action{cursor:pointer;margin-left:.5rem}tr .cell-actions{display:flex;gap:.5rem;visibility:hidden;position:absolute;bottom:1.25rem;right:0;line-height:0;font-size:1.75rem}tr:hover .cell-actions{visibility:visible}tr .cell-actions .action{cursor:pointer}th .smalltext{visibility:hidden;font-weight:400;font-size:.75rem;color:var(--smalltext-color)}th:hover .smalltext{visibility:visible}th .summary{font-weight:400;font-size:.8rem;padding:.25rem}th .summary.highlight{background-color:var(--success-background-color)}td .status{margin-bottom:.5rem;font-weight:700}td .score{font-weight:400}td .pass{color:var(--pass-color)}td .fail{color:var(--fail-color)}.first-prompt-col{border-left:2px solid #888}.first-prompt-row{border-top:2px solid #888}.resizer{position:absolute;right:0;top:0;height:100%;width:5px;cursor:col-resize;user-select:none;touch-action:none;background:var(--text-color);opacity:.5}.resizer.isResizing{background:var(--text-color);opacity:1}@media (hover: hover){.resizer{opacity:0}*:hover>.resizer{opacity:1}}.logo{display:flex;align-items:center;gap:4px}.logo img{width:30px}.logo span{margin-bottom:6px;color:var(--text-color)}[data-theme=dark] .logo img{filter:invert(1)}nav{display:flex;justify-content:space-between;align-items:center;margin-bottom:1rem;color:var(--text-color)}.dark-mode-toggle{background-color:transparent;border:none;color:var(--text-color);cursor:pointer;font-size:16px;padding:8px;transition:color .3s}.dark-mode-toggle:hover{color:var(--pass-color)}body{background-color:var(--background-color);color:var(--text-color)}
@@ -5,8 +5,8 @@
5
5
  <link rel="icon" type="image/svg+xml" href="favicon.ico" />
6
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
7
  <title>promptfoo web viewer</title>
8
- <script type="module" crossorigin src="/assets/index-c2756e5d.js"></script>
9
- <link rel="stylesheet" href="/assets/index-b82d0138.css">
8
+ <script type="module" crossorigin src="/assets/index-13198388.js"></script>
9
+ <link rel="stylesheet" href="/assets/index-f9b230d1.css">
10
10
  </head>
11
11
  <body>
12
12
  <div id="root"></div>
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "promptfoo",
3
3
  "description": "LLM eval & testing toolkit",
4
4
  "author": "Ian Webster",
5
- "version": "0.17.5",
5
+ "version": "0.17.7",
6
6
  "license": "MIT",
7
7
  "type": "commonjs",
8
8
  "main": "dist/src/index.js",
@@ -19,7 +19,7 @@
19
19
  "src"
20
20
  ],
21
21
  "engines": {
22
- "node": ">=12"
22
+ "node": ">=16"
23
23
  },
24
24
  "bin": {
25
25
  "promptfoo": "dist/src/main.js"
@@ -79,6 +79,7 @@
79
79
  "node-fetch": "^2.6.7",
80
80
  "nunjucks": "^3.2.4",
81
81
  "opener": "^1.5.2",
82
+ "replicate": "^0.12.3",
82
83
  "rouge": "^1.0.3",
83
84
  "semver": "^7.5.3",
84
85
  "socket.io": "^4.6.1",
package/src/assertions.ts CHANGED
@@ -432,8 +432,8 @@ export async function matchesLlmRubric(
432
432
  }
433
433
 
434
434
  const prompt = nunjucks.renderString(options.rubricPrompt || DEFAULT_GRADING_PROMPT, {
435
- output,
436
- rubric: expected,
435
+ output: output.replace(/\n/g, '\\n').replace(/"/g, '\\"'),
436
+ rubric: expected.replace(/\n/g, '\\n').replace(/"/g, '\\"'),
437
437
  });
438
438
 
439
439
  let provider = options.provider || DefaultGradingProvider;
package/src/cache.ts CHANGED
@@ -20,7 +20,7 @@ let enabled =
20
20
  const cacheType =
21
21
  process.env.PROMPTFOO_CACHE_TYPE || (process.env.NODE_ENV === 'test' ? 'memory' : 'disk');
22
22
 
23
- function getCache() {
23
+ export function getCache() {
24
24
  if (!cacheInstance) {
25
25
  const cachePath =
26
26
  process.env.PROMPTFOO_CACHE_PATH || path.join(getConfigDirectoryPath(), 'cache');
@@ -102,3 +102,7 @@ export async function clearCache() {
102
102
  logger.info('Clearing cache...');
103
103
  return getCache().reset();
104
104
  }
105
+
106
+ export function isCacheEnabled() {
107
+ return enabled;
108
+ }
package/src/evaluator.ts CHANGED
@@ -33,6 +33,7 @@ interface RunEvalOptions {
33
33
 
34
34
  rowIndex: number;
35
35
  colIndex: number;
36
+ repeatIndex: number;
36
37
  }
37
38
 
38
39
  const DEFAULT_MAX_CONCURRENCY = 4;
@@ -212,6 +213,13 @@ class Evaluator {
212
213
  // Split prompts by provider
213
214
  for (const prompt of testSuite.prompts) {
214
215
  for (const provider of testSuite.providers) {
216
+ // Check if providerPromptMap exists and if it contains the current prompt's display
217
+ if (testSuite.providerPromptMap) {
218
+ const allowedPrompts = testSuite.providerPromptMap[provider.id()];
219
+ if (allowedPrompts && !allowedPrompts.includes(prompt.display)) {
220
+ continue;
221
+ }
222
+ }
215
223
  const updatedDisplay =
216
224
  testSuite.providers.length > 1 ? `[${provider.id()}] ${prompt.display}` : prompt.display;
217
225
  prompts.push({
@@ -266,25 +274,37 @@ class Evaluator {
266
274
  // Finalize test case eval
267
275
  const varCombinations = generateVarCombinations(testCase.vars || {});
268
276
  totalVarCombinations += varCombinations.length;
269
- for (const vars of varCombinations) {
270
- let colIndex = 0;
271
- for (const prompt of testSuite.prompts) {
272
- for (const provider of testSuite.providers) {
273
- runEvalOptions.push({
274
- provider,
275
- prompt: {
276
- ...prompt,
277
- raw: prependToPrompt + prompt.raw + appendToPrompt,
278
- },
279
- test: { ...testCase, vars },
280
- includeProviderId: testSuite.providers.length > 1,
281
- rowIndex,
282
- colIndex,
283
- });
284
- colIndex++;
277
+
278
+ const numRepeat = this.options.repeat || 1;
279
+ for (let repeatIndex = 0; repeatIndex < numRepeat; repeatIndex++) {
280
+ for (const vars of varCombinations) {
281
+ let colIndex = 0;
282
+ for (const prompt of testSuite.prompts) {
283
+ for (const provider of testSuite.providers) {
284
+ if (testSuite.providerPromptMap) {
285
+ const allowedPrompts = testSuite.providerPromptMap[provider.id()];
286
+ if (allowedPrompts && !allowedPrompts.includes(prompt.display)) {
287
+ // This prompt should not be used with this provider.
288
+ continue;
289
+ }
290
+ }
291
+ runEvalOptions.push({
292
+ provider,
293
+ prompt: {
294
+ ...prompt,
295
+ raw: prependToPrompt + prompt.raw + appendToPrompt,
296
+ },
297
+ test: { ...testCase, vars },
298
+ includeProviderId: testSuite.providers.length > 1,
299
+ rowIndex,
300
+ colIndex,
301
+ repeatIndex,
302
+ });
303
+ colIndex++;
304
+ }
285
305
  }
306
+ rowIndex++;
286
307
  }
287
- rowIndex++;
288
308
  }
289
309
  }
290
310
 
package/src/main.ts CHANGED
@@ -15,6 +15,7 @@ import {
15
15
  readConfig,
16
16
  readLatestResults,
17
17
  readPrompts,
18
+ readProviderPromptMap,
18
19
  readTests,
19
20
  writeLatestResults,
20
21
  writeOutput,
@@ -130,7 +131,7 @@ async function main() {
130
131
 
131
132
  program
132
133
  .command('share')
133
- .description('Share your most recent result')
134
+ .description('Create a shareable URL of your most recent eval')
134
135
  .option('-y, --yes', 'Skip confirmation')
135
136
  .action(async (cmdObj: { yes: boolean } & Command) => {
136
137
  telemetry.maybeShowNotice();
@@ -158,10 +159,9 @@ async function main() {
158
159
  });
159
160
 
160
161
  reader.question(
161
- 'Are you sure you want to create a public URL? [y/N] ',
162
+ 'Are you sure you want to create a shareable URL of your most recent eval? Anyone you give this URL to will be able to view the results [Y/n] ',
162
163
  async function (answer: string) {
163
- if (answer.toLowerCase() !== 'yes' && answer.toLowerCase() !== 'y') {
164
- logger.info('Did not create a public URL.');
164
+ if (answer.toLowerCase() !== 'yes' && answer.toLowerCase() !== 'y' && answer !== '') {
165
165
  reader.close();
166
166
  return;
167
167
  }
@@ -218,6 +218,13 @@ async function main() {
218
218
  ? String(defaultConfig.evaluateOptions.maxConcurrency)
219
219
  : undefined,
220
220
  )
221
+ .option(
222
+ '--repeat <number>',
223
+ 'Number of times to run each test',
224
+ defaultConfig.evaluateOptions?.repeat
225
+ ? String(defaultConfig.evaluateOptions.repeat)
226
+ : undefined,
227
+ )
221
228
  .option(
222
229
  '--table-cell-max-length <number>',
223
230
  'Truncate console table cells to this length',
@@ -263,7 +270,6 @@ async function main() {
263
270
  }
264
271
 
265
272
  // Config parsing
266
- const maxConcurrency = parseInt(cmdObj.maxConcurrency || '', 10);
267
273
  let fileConfig: Partial<UnifiedConfig> = {};
268
274
  const configPath = cmdObj.config;
269
275
  if (configPath) {
@@ -302,6 +308,7 @@ async function main() {
302
308
  config.tests,
303
309
  cmdObj.tests ? undefined : basePath,
304
310
  );
311
+ const parsedProviderPromptMap = readProviderPromptMap(config, parsedPrompts);
305
312
 
306
313
  if (parsedPrompts.length === 0) {
307
314
  logger.error(chalk.red('No prompts found'));
@@ -322,16 +329,20 @@ async function main() {
322
329
  description: config.description,
323
330
  prompts: parsedPrompts,
324
331
  providers: parsedProviders,
332
+ providerPromptMap: parsedProviderPromptMap,
325
333
  tests: parsedTests,
326
334
  defaultTest,
327
335
  };
328
336
 
337
+ const maxConcurrency = parseInt(cmdObj.maxConcurrency || '', 10);
338
+ const iterations = parseInt(cmdObj.repeat || '', 10);
329
339
  const options: EvaluateOptions = {
330
340
  showProgressBar:
331
341
  typeof cmdObj.progressBar === 'undefined'
332
342
  ? getLogLevel() !== 'debug'
333
343
  : cmdObj.progressBar,
334
344
  maxConcurrency: !isNaN(maxConcurrency) && maxConcurrency > 0 ? maxConcurrency : undefined,
345
+ repeat: !isNaN(iterations) && iterations > 0 ? iterations : 1,
335
346
  ...evaluateOptions,
336
347
  };
337
348
 
@@ -1,6 +1,6 @@
1
1
  import logger from '../logger';
2
2
  import { fetchJsonWithCache } from '../cache';
3
- import { REQUEST_TIMEOUT_MS } from './shared';
3
+ import { REQUEST_TIMEOUT_MS, parseChatPrompt } from './shared';
4
4
 
5
5
  import type { ApiProvider, ProviderEmbeddingResponse, ProviderResponse } from '../types.js';
6
6
 
@@ -205,23 +205,7 @@ export class AzureOpenAiChatCompletionProvider extends AzureOpenAiGenericProvide
205
205
  throw new Error('Azure OpenAI API host must be set');
206
206
  }
207
207
 
208
- let messages: { role: string; content: string; name?: string }[];
209
- try {
210
- messages = JSON.parse(prompt) as { role: string; content: string }[];
211
- } catch (err) {
212
- const trimmedPrompt = prompt.trim();
213
- if (
214
- process.env.PROMPTFOO_REQUIRE_JSON_PROMPTS ||
215
- trimmedPrompt.startsWith('{') ||
216
- trimmedPrompt.startsWith('[')
217
- ) {
218
- throw new Error(
219
- `Azure OpenAI Chat Completion prompt is not a valid JSON string: ${err}\n\n${prompt}`,
220
- );
221
- }
222
- messages = [{ role: 'user', content: prompt }];
223
- }
224
-
208
+ const messages = parseChatPrompt(prompt);
225
209
  const body = {
226
210
  model: this.deploymentName,
227
211
  messages: messages,
@@ -1,6 +1,6 @@
1
1
  import logger from '../logger';
2
2
  import { fetchJsonWithCache } from '../cache';
3
- import { REQUEST_TIMEOUT_MS } from './shared';
3
+ import { REQUEST_TIMEOUT_MS, parseChatPrompt } from './shared';
4
4
 
5
5
  import type { ApiProvider, ProviderResponse } from '../types.js';
6
6
 
@@ -29,9 +29,10 @@ class LocalAiGenericProvider implements ApiProvider {
29
29
 
30
30
  export class LocalAiChatProvider extends LocalAiGenericProvider {
31
31
  async callApi(prompt: string): Promise<ProviderResponse> {
32
+ const messages = parseChatPrompt(prompt);
32
33
  const body = {
33
34
  model: this.modelName,
34
- prompt,
35
+ messages: messages,
35
36
  temperature: process.env.LOCALAI_TEMPERATURE || 0.7,
36
37
  };
37
38
  logger.debug(`Calling LocalAI API: ${JSON.stringify(body)}`);
@@ -1,8 +1,7 @@
1
- import yaml from 'js-yaml';
2
1
 
3
2
  import logger from '../logger';
4
3
  import { fetchJsonWithCache } from '../cache';
5
- import { REQUEST_TIMEOUT_MS } from './shared';
4
+ import { REQUEST_TIMEOUT_MS, parseChatPrompt } from './shared';
6
5
 
7
6
  import type { ApiProvider, ProviderEmbeddingResponse, ProviderResponse } from '../types.js';
8
7
 
@@ -10,6 +9,7 @@ const DEFAULT_OPENAI_HOST = 'api.openai.com';
10
9
 
11
10
  interface OpenAiCompletionOptions {
12
11
  temperature?: number;
12
+ max_tokens?: number;
13
13
  functions?: {
14
14
  name: string;
15
15
  description?: string;
@@ -148,7 +148,7 @@ export class OpenAiCompletionProvider extends OpenAiGenericProvider {
148
148
  const body = {
149
149
  model: this.modelName,
150
150
  prompt,
151
- max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
151
+ max_tokens: options?.max_tokens ?? this.options.max_tokens ?? parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
152
152
  temperature:
153
153
  options?.temperature ??
154
154
  this.options.temperature ??
@@ -227,41 +227,11 @@ export class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
227
227
  );
228
228
  }
229
229
 
230
- let messages: { role: string; content: string; name?: string }[];
231
- const trimmedPrompt = prompt.trim();
232
- if (trimmedPrompt.startsWith('- role:')) {
233
- try {
234
- // Try YAML
235
- messages = yaml.load(prompt) as { role: string; content: string }[];
236
- } catch (err) {
237
- throw new Error(
238
- `OpenAI Chat Completion prompt is not a valid YAML string: ${err}\n\n${prompt}`,
239
- );
240
- }
241
- } else {
242
- try {
243
- // Try JSON
244
- messages = JSON.parse(prompt) as { role: string; content: string }[];
245
- } catch (err) {
246
- if (
247
- process.env.PROMPTFOO_REQUIRE_JSON_PROMPTS ||
248
- trimmedPrompt.startsWith('{') ||
249
- trimmedPrompt.startsWith('[')
250
- ) {
251
- throw new Error(
252
- `OpenAI Chat Completion prompt is not a valid JSON string: ${err}\n\n${prompt}`,
253
- );
254
- }
255
-
256
- // Fall back to wrapping the prompt in a user message
257
- messages = [{ role: 'user', content: prompt }];
258
- }
259
- }
260
-
230
+ const messages = parseChatPrompt(prompt);
261
231
  const body = {
262
232
  model: this.modelName,
263
233
  messages: messages,
264
- max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
234
+ max_tokens: options?.max_tokens ?? this.options.max_tokens ?? parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
265
235
  temperature:
266
236
  options?.temperature ??
267
237
  this.options.temperature ??
@@ -0,0 +1,95 @@
1
+ import Replicate from 'replicate';
2
+
3
+ import fetch from 'node-fetch';
4
+ import logger from '../logger';
5
+ import { getCache, isCacheEnabled } from '../cache';
6
+
7
+ import type { ApiProvider, ProviderResponse } from '../types.js';
8
+
9
+ interface ReplicateCompletionOptions {
10
+ temperature?: number;
11
+ max_length?: number;
12
+ repetition_penalty?: number;
13
+ }
14
+
15
+ export class ReplicateProvider implements ApiProvider {
16
+ modelName: string;
17
+ apiKey?: string;
18
+ replicate: any;
19
+ options: ReplicateCompletionOptions;
20
+
21
+ constructor(modelName: string, apiKey?: string, options?: ReplicateCompletionOptions) {
22
+ this.modelName = modelName;
23
+ this.apiKey = apiKey || process.env.REPLICATE_API_TOKEN || process.env.REPLICATE_API_KEY;
24
+ this.options = options || {};
25
+ }
26
+
27
+ id(): string {
28
+ return `replicate:${this.modelName}`;
29
+ }
30
+
31
+ toString(): string {
32
+ return `[Replicate Provider ${this.modelName}]`;
33
+ }
34
+
35
+ async callApi(prompt: string): Promise<ProviderResponse> {
36
+ if (!this.apiKey) {
37
+ throw new Error(
38
+ 'Replicate API key is not set. Set REPLICATE_API_TOKEN environment variable or pass it as an argument to the constructor.',
39
+ );
40
+ }
41
+
42
+ let cache;
43
+ let cacheKey;
44
+ if (isCacheEnabled()) {
45
+ cache = await getCache();
46
+ cacheKey = `replicate:${this.modelName}:${prompt}`;
47
+
48
+ // Try to get the cached response
49
+ const cachedResponse = await cache.get(cacheKey);
50
+
51
+ if (cachedResponse) {
52
+ logger.debug(`Returning cached response for ${prompt}: ${cachedResponse}`);
53
+ return JSON.parse(cachedResponse as string);
54
+ }
55
+ }
56
+
57
+ const replicate = new Replicate({
58
+ auth: this.apiKey,
59
+ fetch,
60
+ });
61
+
62
+ logger.debug(`Calling Replicate: ${prompt}`);
63
+ let response;
64
+ try {
65
+ const data = {
66
+ input: {
67
+ prompt,
68
+ max_length: this.options.max_length || parseInt(process.env.REPLICATE_MAX_LENGTH || '2046', 10),
69
+ temperature: this.options.temperature || parseFloat(process.env.REPLICATE_TEMPERATURE || '0.01'),
70
+ repetition_penalty: this.options.repetition_penalty || parseFloat(process.env.REPLICATE_REPETITION_PENALTY || '1.0'),
71
+ },
72
+ };
73
+ response = await replicate.run(this.modelName as any, data);
74
+ } catch (err) {
75
+ return {
76
+ error: `API call error: ${String(err)}`,
77
+ };
78
+ }
79
+ logger.debug(`\tReplicate API response: ${JSON.stringify(response)}`);
80
+ try {
81
+ const result = {
82
+ output: (response as string[]).join(''),
83
+ tokenUsage: {}, // TODO: add token usage once Replicate API supports it
84
+ };
85
+ if (cache && cacheKey) {
86
+ await cache.set(cacheKey, JSON.stringify(result));
87
+ }
88
+ return result;
89
+ } catch (err) {
90
+ return {
91
+ error: `API response error: ${String(err)}: ${JSON.stringify(response)}`,
92
+ };
93
+ }
94
+ }
95
+ }
@@ -1,3 +1,32 @@
1
+ import yaml from 'js-yaml';
2
+
1
3
  export const REQUEST_TIMEOUT_MS = process.env.REQUEST_TIMEOUT_MS
2
4
  ? parseInt(process.env.REQUEST_TIMEOUT_MS, 10)
3
5
  : 300_000;
6
+
7
+ export function parseChatPrompt(prompt: string): { role: string; content: string; name?: string }[] {
8
+ const trimmedPrompt = prompt.trim();
9
+ if (trimmedPrompt.startsWith('- role:')) {
10
+ try {
11
+ // Try YAML
12
+ return yaml.load(prompt) as { role: string; content: string }[];
13
+ } catch (err) {
14
+ throw new Error(`Chat Completion prompt is not a valid YAML string: ${err}\n\n${prompt}`);
15
+ }
16
+ } else {
17
+ try {
18
+ // Try JSON
19
+ return JSON.parse(prompt) as { role: string; content: string }[];
20
+ } catch (err) {
21
+ if (
22
+ process.env.PROMPTFOO_REQUIRE_JSON_PROMPTS ||
23
+ trimmedPrompt.startsWith('{') ||
24
+ trimmedPrompt.startsWith('[')
25
+ ) {
26
+ throw new Error(`Chat Completion prompt is not a valid JSON string: ${err}\n\n${prompt}`);
27
+ }
28
+ // Fall back to wrapping the prompt in a user message
29
+ return [{ role: 'user', content: prompt }];
30
+ }
31
+ }
32
+ }
package/src/providers.ts CHANGED
@@ -4,6 +4,7 @@ import { ApiProvider, ProviderConfig, ProviderId, RawProviderConfig } from './ty
4
4
 
5
5
  import { OpenAiCompletionProvider, OpenAiChatCompletionProvider } from './providers/openai';
6
6
  import { AnthropicCompletionProvider } from './providers/anthropic';
7
+ import { ReplicateProvider } from './providers/replicate';
7
8
  import { LocalAiCompletionProvider, LocalAiChatProvider } from './providers/localai';
8
9
  import { ScriptCompletionProvider } from './providers/scriptCompletion';
9
10
  import {
@@ -106,6 +107,12 @@ export async function loadApiProvider(
106
107
  `Unknown Anthropic model type: ${modelType}. Use one of the following providers: anthropic:completion:<model name>`,
107
108
  );
108
109
  }
110
+ } else if (providerPath?.startsWith('replicate:')) {
111
+ // Load Replicate module
112
+ const options = providerPath.split(':');
113
+ const modelName = options.slice(1).join(':');
114
+
115
+ return new ReplicateProvider(modelName, undefined, context?.config);
109
116
  }
110
117
 
111
118
  if (providerPath?.startsWith('localai:')) {
@@ -131,6 +138,7 @@ export default {
131
138
  OpenAiCompletionProvider,
132
139
  OpenAiChatCompletionProvider,
133
140
  AnthropicCompletionProvider,
141
+ ReplicateProvider,
134
142
  LocalAiCompletionProvider,
135
143
  LocalAiChatProvider,
136
144
  loadApiProvider,
package/src/types.ts CHANGED
@@ -6,6 +6,7 @@ export interface CommandLineOptions {
6
6
 
7
7
  // Shared with EvaluateOptions
8
8
  maxConcurrency: string;
9
+ repeat: string;
9
10
 
10
11
  // Command line only
11
12
  vars?: string;
@@ -29,6 +30,7 @@ export interface CommandLineOptions {
29
30
  export interface ProviderConfig {
30
31
  id: ProviderId;
31
32
  config?: any;
33
+ prompts?: string[]; // List of prompt display strings
32
34
  }
33
35
 
34
36
  export interface ApiProvider {
@@ -75,6 +77,7 @@ export interface EvaluateOptions {
75
77
  maxConcurrency?: number;
76
78
  showProgressBar?: boolean;
77
79
  generateSuggestions?: boolean;
80
+ repeat?: number;
78
81
  }
79
82
 
80
83
  export interface Prompt {
@@ -201,6 +204,10 @@ export interface TestSuite {
201
204
  // One or more prompt strings
202
205
  prompts: Prompt[];
203
206
 
207
+ // Optional mapping of provider to prompt display strings. If not provided,
208
+ // all prompts are used for all providers.
209
+ providerPromptMap?: Record<string, string[]>;
210
+
204
211
  // Test cases
205
212
  tests?: TestCase[];
206
213
 
package/src/util.ts CHANGED
@@ -25,8 +25,33 @@ import type {
25
25
  UnifiedConfig,
26
26
  TestCase,
27
27
  Prompt,
28
+ RawProviderConfig,
29
+ TestSuite,
28
30
  } from './types';
29
31
 
32
+ export function readProviderPromptMap(config: Partial<UnifiedConfig>, parsedPrompts: Prompt[]): TestSuite["providerPromptMap"] {
33
+ const ret: Record<string, string[]> = {};
34
+
35
+ if (!config.providers) {
36
+ return ret;
37
+ }
38
+
39
+ const allPrompts = [];
40
+ for (const prompt of parsedPrompts) {
41
+ allPrompts.push(prompt.display);
42
+ }
43
+
44
+ for (const provider of config.providers) {
45
+ if (typeof provider === 'object') {
46
+ const rawProvider = provider as RawProviderConfig;
47
+ const id = Object.keys(rawProvider)[0];
48
+ ret[id] = rawProvider[id].prompts || allPrompts;
49
+ }
50
+ }
51
+
52
+ return ret;
53
+ }
54
+
30
55
  const PROMPT_DELIMITER = '---';
31
56
 
32
57
  function parseJson(json: string): any | undefined {
@@ -37,6 +37,12 @@ function App() {
37
37
  }
38
38
  };
39
39
 
40
+ React.useEffect(() => {
41
+ if (prefersDarkMode) {
42
+ document.documentElement.setAttribute('data-theme', 'dark');
43
+ }
44
+ }, [prefersDarkMode]);
45
+
40
46
  React.useEffect(() => {
41
47
  const fetchEvalData = async (id: string) => {
42
48
  if (loadedFromApi.current) {
@@ -37,7 +37,7 @@ export default function EvalOutputPromptDialog({
37
37
  <Dialog open={open} onClose={onClose} fullWidth maxWidth="lg">
38
38
  <DialogTitle>Prompt</DialogTitle>
39
39
  <DialogContent>
40
- <TextareaAutosize readOnly value={prompt} style={{ width: '100%' }} />
40
+ <TextareaAutosize readOnly value={prompt} style={{ width: '100%', padding: '0.75rem' }} />
41
41
  <IconButton
42
42
  onClick={() => copyToClipboard(prompt)}
43
43
  style={{ position: 'absolute', right: '10px', top: '10px' }}
@@ -49,7 +49,11 @@ export default function EvalOutputPromptDialog({
49
49
  <>
50
50
  <DialogTitle>Output</DialogTitle>
51
51
  <DialogContent>
52
- <TextareaAutosize readOnly value={output} style={{ width: '100%' }} />
52
+ <TextareaAutosize
53
+ readOnly
54
+ value={output}
55
+ style={{ width: '100%', padding: '0.75rem' }}
56
+ />
53
57
  </DialogContent>
54
58
  </>
55
59
  )}
@@ -334,6 +334,11 @@ export default function ResultsTable({
334
334
  return failureFilter[columnId] && isFail;
335
335
  });
336
336
  });
337
+ } else if (filterMode === 'different') {
338
+ return body.filter((row) => {
339
+ // TODO(ian): This works for strings, but not objects.
340
+ return !row.outputs.every((output) => output.text === row.outputs[0].text);
341
+ });
337
342
  }
338
343
  return body;
339
344
  }, [body, failureFilter, filterMode]);
@@ -181,7 +181,8 @@ export default function ResultsView() {
181
181
  label="Filter"
182
182
  >
183
183
  <MenuItem value="all">Show all results</MenuItem>
184
- <MenuItem value="failures">Show only failures</MenuItem>
184
+ <MenuItem value="failures">Show failures only</MenuItem>
185
+ <MenuItem value="different">Show different only</MenuItem>
185
186
  </Select>
186
187
  </FormControl>
187
188
  </Box>
@@ -21,19 +21,8 @@
21
21
  }
22
22
 
23
23
  /* Dark mode colors */
24
- @media (prefers-color-scheme: dark) {
25
- :root {
26
- --background-color: #1a1a1a;
27
- --text-color: #f0f0f0;
28
- --border-color: #444444;
29
- --table-border-color: #444444;
30
- --pass-color: #4caf50;
31
- --fail-color: #f44336;
32
- --smalltext-color: #888888;
33
- }
34
- }
35
-
36
24
  [data-theme='dark'] {
25
+ /* Keep synced with prefers-color-scheme above */
37
26
  --background-color: #1a1a1a;
38
27
  --text-color: #f0f0f0;
39
28
  --border-color: #444444;