promptfoo 0.17.4 → 0.17.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +1 -0
  2. package/dist/package.json +3 -2
  3. package/dist/src/assertions.d.ts.map +1 -1
  4. package/dist/src/assertions.js +14 -2
  5. package/dist/src/assertions.js.map +1 -1
  6. package/dist/src/cache.d.ts +3 -0
  7. package/dist/src/cache.d.ts.map +1 -1
  8. package/dist/src/cache.js +6 -1
  9. package/dist/src/cache.js.map +1 -1
  10. package/dist/src/evaluator.d.ts.map +1 -1
  11. package/dist/src/evaluator.js +21 -17
  12. package/dist/src/evaluator.js.map +1 -1
  13. package/dist/src/index.d.ts +1 -0
  14. package/dist/src/index.d.ts.map +1 -1
  15. package/dist/src/main.js +66 -36
  16. package/dist/src/main.js.map +1 -1
  17. package/dist/src/providers/anthropic.js.map +1 -1
  18. package/dist/src/providers/azureopenai.d.ts +34 -0
  19. package/dist/src/providers/azureopenai.d.ts.map +1 -0
  20. package/dist/src/providers/azureopenai.js +222 -0
  21. package/dist/src/providers/azureopenai.js.map +1 -0
  22. package/dist/src/providers/localai.d.ts.map +1 -1
  23. package/dist/src/providers/localai.js +2 -1
  24. package/dist/src/providers/localai.js.map +1 -1
  25. package/dist/src/providers/openai.d.ts.map +1 -1
  26. package/dist/src/providers/openai.js +1 -13
  27. package/dist/src/providers/openai.js.map +1 -1
  28. package/dist/src/providers/replicate.d.ts +11 -0
  29. package/dist/src/providers/replicate.d.ts.map +1 -0
  30. package/dist/src/providers/replicate.js +78 -0
  31. package/dist/src/providers/replicate.js.map +1 -0
  32. package/dist/src/providers/shared.d.ts +5 -0
  33. package/dist/src/providers/shared.d.ts.map +1 -1
  34. package/dist/src/providers/shared.js +33 -1
  35. package/dist/src/providers/shared.js.map +1 -1
  36. package/dist/src/providers.d.ts +2 -0
  37. package/dist/src/providers.d.ts.map +1 -1
  38. package/dist/src/providers.js +24 -0
  39. package/dist/src/providers.js.map +1 -1
  40. package/dist/src/types.d.ts +4 -1
  41. package/dist/src/types.d.ts.map +1 -1
  42. package/dist/src/updates.d.ts.map +1 -1
  43. package/dist/src/updates.js +3 -0
  44. package/dist/src/updates.js.map +1 -1
  45. package/dist/src/web/client/assets/{index-58a0e3e3.js → index-13198388.js} +23 -23
  46. package/dist/src/web/client/assets/index-f9b230d1.css +1 -0
  47. package/dist/src/web/client/index.html +2 -2
  48. package/package.json +3 -2
  49. package/src/assertions.ts +18 -2
  50. package/src/cache.ts +5 -1
  51. package/src/evaluator.ts +23 -17
  52. package/src/main.ts +87 -38
  53. package/src/providers/anthropic.ts +1 -1
  54. package/src/providers/azureopenai.ts +264 -0
  55. package/src/providers/localai.ts +3 -2
  56. package/src/providers/openai.ts +3 -18
  57. package/src/providers/replicate.ts +86 -0
  58. package/src/providers/shared.ts +29 -0
  59. package/src/providers.ts +27 -0
  60. package/src/types.ts +6 -0
  61. package/src/updates.ts +4 -0
  62. package/src/web/client/src/App.tsx +6 -0
  63. package/src/web/client/src/EvalOutputPromptDialog.tsx +6 -2
  64. package/src/web/client/src/ResultsTable.tsx +5 -0
  65. package/src/web/client/src/ResultsView.tsx +14 -11
  66. package/src/web/client/src/index.css +1 -12
  67. package/src/web/client/src/types.ts +1 -1
  68. package/dist/src/web/client/assets/index-b82d0138.css +0 -1
@@ -1,6 +1,6 @@
1
1
  import logger from '../logger';
2
2
  import { fetchJsonWithCache } from '../cache';
3
- import { REQUEST_TIMEOUT_MS } from './shared';
3
+ import { REQUEST_TIMEOUT_MS, parseChatPrompt } from './shared';
4
4
 
5
5
  import type { ApiProvider, ProviderResponse } from '../types.js';
6
6
 
@@ -29,9 +29,10 @@ class LocalAiGenericProvider implements ApiProvider {
29
29
 
30
30
  export class LocalAiChatProvider extends LocalAiGenericProvider {
31
31
  async callApi(prompt: string): Promise<ProviderResponse> {
32
+ const messages = parseChatPrompt(prompt);
32
33
  const body = {
33
34
  model: this.modelName,
34
- prompt,
35
+ messages: messages,
35
36
  temperature: process.env.LOCALAI_TEMPERATURE || 0.7,
36
37
  };
37
38
  logger.debug(`Calling LocalAI API: ${JSON.stringify(body)}`);
@@ -1,6 +1,7 @@
1
+
1
2
  import logger from '../logger';
2
3
  import { fetchJsonWithCache } from '../cache';
3
- import { REQUEST_TIMEOUT_MS } from './shared';
4
+ import { REQUEST_TIMEOUT_MS, parseChatPrompt } from './shared';
4
5
 
5
6
  import type { ApiProvider, ProviderEmbeddingResponse, ProviderResponse } from '../types.js';
6
7
 
@@ -225,23 +226,7 @@ export class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
225
226
  );
226
227
  }
227
228
 
228
- let messages: { role: string; content: string; name?: string }[];
229
- try {
230
- messages = JSON.parse(prompt) as { role: string; content: string }[];
231
- } catch (err) {
232
- const trimmedPrompt = prompt.trim();
233
- if (
234
- process.env.PROMPTFOO_REQUIRE_JSON_PROMPTS ||
235
- trimmedPrompt.startsWith('{') ||
236
- trimmedPrompt.startsWith('[')
237
- ) {
238
- throw new Error(
239
- `OpenAI Chat Completion prompt is not a valid JSON string: ${err}\n\n${prompt}`,
240
- );
241
- }
242
- messages = [{ role: 'user', content: prompt }];
243
- }
244
-
229
+ const messages = parseChatPrompt(prompt);
245
230
  const body = {
246
231
  model: this.modelName,
247
232
  messages: messages,
@@ -0,0 +1,86 @@
1
+ import Replicate from 'replicate';
2
+
3
+ import fetch from 'node-fetch';
4
+ import logger from '../logger';
5
+ import { getCache, isCacheEnabled } from '../cache';
6
+
7
+ import type { ApiProvider, ProviderResponse } from '../types.js';
8
+
9
+ export class ReplicateProvider implements ApiProvider {
10
+ modelName: string;
11
+ apiKey?: string;
12
+ replicate: any;
13
+
14
+ constructor(modelName: string, apiKey?: string) {
15
+ this.modelName = modelName;
16
+ this.apiKey = apiKey || process.env.REPLICATE_API_TOKEN || process.env.REPLICATE_API_KEY;
17
+ }
18
+
19
+ id(): string {
20
+ return `replicate:${this.modelName}`;
21
+ }
22
+
23
+ toString(): string {
24
+ return `[Replicate Provider ${this.modelName}]`;
25
+ }
26
+
27
+ async callApi(prompt: string): Promise<ProviderResponse> {
28
+ if (!this.apiKey) {
29
+ throw new Error(
30
+ 'Replicate API key is not set. Set REPLICATE_API_TOKEN environment variable or pass it as an argument to the constructor.',
31
+ );
32
+ }
33
+
34
+ let cache;
35
+ let cacheKey;
36
+ if (isCacheEnabled()) {
37
+ cache = await getCache();
38
+ cacheKey = `replicate:${this.modelName}:${prompt}`;
39
+
40
+ // Try to get the cached response
41
+ const cachedResponse = await cache.get(cacheKey);
42
+
43
+ if (cachedResponse) {
44
+ logger.debug(`Returning cached response for ${prompt}: ${cachedResponse}`);
45
+ return JSON.parse(cachedResponse as string);
46
+ }
47
+ }
48
+
49
+ const replicate = new Replicate({
50
+ auth: this.apiKey,
51
+ fetch,
52
+ });
53
+
54
+ logger.debug(`Calling Replicate: ${prompt}`);
55
+ let response;
56
+ try {
57
+ response = await replicate.run(this.modelName as any, {
58
+ input: {
59
+ prompt,
60
+ max_length: process.env.REPLICATE_MAX_LENGTH || 2046,
61
+ temperature: process.env.REPLICATE_TEMPERATURE || 0.5,
62
+ repetition_penalty: process.env.REPLICATE_REPETITION_PENALTY || 1.0,
63
+ },
64
+ });
65
+ } catch (err) {
66
+ return {
67
+ error: `API call error: ${String(err)}`,
68
+ };
69
+ }
70
+ logger.debug(`\tReplicate API response: ${JSON.stringify(response)}`);
71
+ try {
72
+ const result = {
73
+ output: (response as string[]).join(''),
74
+ tokenUsage: {}, // TODO: add token usage once Replicate API supports it
75
+ };
76
+ if (cache && cacheKey) {
77
+ await cache.set(cacheKey, JSON.stringify(result));
78
+ }
79
+ return result;
80
+ } catch (err) {
81
+ return {
82
+ error: `API response error: ${String(err)}: ${JSON.stringify(response)}`,
83
+ };
84
+ }
85
+ }
86
+ }
@@ -1,3 +1,32 @@
1
+ import yaml from 'js-yaml';
2
+
1
3
  export const REQUEST_TIMEOUT_MS = process.env.REQUEST_TIMEOUT_MS
2
4
  ? parseInt(process.env.REQUEST_TIMEOUT_MS, 10)
3
5
  : 300_000;
6
+
7
+ export function parseChatPrompt(prompt: string): { role: string; content: string; name?: string }[] {
8
+ const trimmedPrompt = prompt.trim();
9
+ if (trimmedPrompt.startsWith('- role:')) {
10
+ try {
11
+ // Try YAML
12
+ return yaml.load(prompt) as { role: string; content: string }[];
13
+ } catch (err) {
14
+ throw new Error(`Chat Completion prompt is not a valid YAML string: ${err}\n\n${prompt}`);
15
+ }
16
+ } else {
17
+ try {
18
+ // Try JSON
19
+ return JSON.parse(prompt) as { role: string; content: string }[];
20
+ } catch (err) {
21
+ if (
22
+ process.env.PROMPTFOO_REQUIRE_JSON_PROMPTS ||
23
+ trimmedPrompt.startsWith('{') ||
24
+ trimmedPrompt.startsWith('[')
25
+ ) {
26
+ throw new Error(`Chat Completion prompt is not a valid JSON string: ${err}\n\n${prompt}`);
27
+ }
28
+ // Fall back to wrapping the prompt in a user message
29
+ return [{ role: 'user', content: prompt }];
30
+ }
31
+ }
32
+ }
package/src/providers.ts CHANGED
@@ -4,8 +4,13 @@ import { ApiProvider, ProviderConfig, ProviderId, RawProviderConfig } from './ty
4
4
 
5
5
  import { OpenAiCompletionProvider, OpenAiChatCompletionProvider } from './providers/openai';
6
6
  import { AnthropicCompletionProvider } from './providers/anthropic';
7
+ import { ReplicateProvider } from './providers/replicate';
7
8
  import { LocalAiCompletionProvider, LocalAiChatProvider } from './providers/localai';
8
9
  import { ScriptCompletionProvider } from './providers/scriptCompletion';
10
+ import {
11
+ AzureOpenAiChatCompletionProvider,
12
+ AzureOpenAiCompletionProvider,
13
+ } from './providers/azureopenai';
9
14
 
10
15
  export async function loadApiProviders(
11
16
  providerPaths: ProviderId | ProviderId[] | RawProviderConfig[],
@@ -68,6 +73,21 @@ export async function loadApiProvider(
68
73
  `Unknown OpenAI model type: ${modelType}. Use one of the following providers: openai:chat:<model name>, openai:completion:<model name>`,
69
74
  );
70
75
  }
76
+ } else if (providerPath?.startsWith('azureopenai:')) {
77
+ // Load Azure OpenAI module
78
+ const options = providerPath.split(':');
79
+ const modelType = options[1];
80
+ const deploymentName = options[2];
81
+
82
+ if (modelType === 'chat') {
83
+ return new AzureOpenAiChatCompletionProvider(deploymentName, undefined, context?.config);
84
+ } else if (modelType === 'completion') {
85
+ return new AzureOpenAiCompletionProvider(deploymentName, undefined, context?.config);
86
+ } else {
87
+ throw new Error(
88
+ `Unknown Azure OpenAI model type: ${modelType}. Use one of the following providers: openai:chat:<model name>, openai:completion:<model name>`,
89
+ );
90
+ }
71
91
  } else if (providerPath?.startsWith('anthropic:')) {
72
92
  // Load Anthropic module
73
93
  const options = providerPath.split(':');
@@ -87,6 +107,12 @@ export async function loadApiProvider(
87
107
  `Unknown Anthropic model type: ${modelType}. Use one of the following providers: anthropic:completion:<model name>`,
88
108
  );
89
109
  }
110
+ } else if (providerPath?.startsWith('replicate:')) {
111
+ // Load Replicate module
112
+ const options = providerPath.split(':');
113
+ const modelName = options.slice(1).join(':');
114
+
115
+ return new ReplicateProvider(modelName, undefined);
90
116
  }
91
117
 
92
118
  if (providerPath?.startsWith('localai:')) {
@@ -112,6 +138,7 @@ export default {
112
138
  OpenAiCompletionProvider,
113
139
  OpenAiChatCompletionProvider,
114
140
  AnthropicCompletionProvider,
141
+ ReplicateProvider,
115
142
  LocalAiCompletionProvider,
116
143
  LocalAiChatProvider,
117
144
  loadApiProvider,
package/src/types.ts CHANGED
@@ -6,6 +6,7 @@ export interface CommandLineOptions {
6
6
 
7
7
  // Shared with EvaluateOptions
8
8
  maxConcurrency: string;
9
+ repeat: string;
9
10
 
10
11
  // Command line only
11
12
  vars?: string;
@@ -75,6 +76,7 @@ export interface EvaluateOptions {
75
76
  maxConcurrency?: number;
76
77
  showProgressBar?: boolean;
77
78
  generateSuggestions?: boolean;
79
+ repeat?: number;
78
80
  }
79
81
 
80
82
  export interface Prompt {
@@ -136,6 +138,7 @@ type BaseAssertionTypes =
136
138
  | 'icontains'
137
139
  | 'contains-all'
138
140
  | 'contains-any'
141
+ | 'starts-with'
139
142
  | 'regex'
140
143
  | 'is-json'
141
144
  | 'contains-json'
@@ -230,6 +233,9 @@ export interface TestSuiteConfig {
230
233
 
231
234
  // Path to write output. Writes to console/web viewer if not set.
232
235
  outputPath?: string;
236
+
237
+ // Determines whether or not sharing is enabled.
238
+ sharing?: boolean;
233
239
  }
234
240
 
235
241
  export type UnifiedConfig = TestSuiteConfig & {
package/src/updates.ts CHANGED
@@ -17,6 +17,10 @@ export async function getLatestVersion(packageName: string) {
17
17
  }
18
18
 
19
19
  export async function checkForUpdates(): Promise<boolean> {
20
+ if (process.env.PROMPTFOO_DISABLE_UPDATE) {
21
+ return false;
22
+ }
23
+
20
24
  let latestVersion: string;
21
25
  try {
22
26
  latestVersion = await getLatestVersion('promptfoo');
@@ -37,6 +37,12 @@ function App() {
37
37
  }
38
38
  };
39
39
 
40
+ React.useEffect(() => {
41
+ if (prefersDarkMode) {
42
+ document.documentElement.setAttribute('data-theme', 'dark');
43
+ }
44
+ }, [prefersDarkMode]);
45
+
40
46
  React.useEffect(() => {
41
47
  const fetchEvalData = async (id: string) => {
42
48
  if (loadedFromApi.current) {
@@ -37,7 +37,7 @@ export default function EvalOutputPromptDialog({
37
37
  <Dialog open={open} onClose={onClose} fullWidth maxWidth="lg">
38
38
  <DialogTitle>Prompt</DialogTitle>
39
39
  <DialogContent>
40
- <TextareaAutosize readOnly value={prompt} style={{ width: '100%' }} />
40
+ <TextareaAutosize readOnly value={prompt} style={{ width: '100%', padding: '0.75rem' }} />
41
41
  <IconButton
42
42
  onClick={() => copyToClipboard(prompt)}
43
43
  style={{ position: 'absolute', right: '10px', top: '10px' }}
@@ -49,7 +49,11 @@ export default function EvalOutputPromptDialog({
49
49
  <>
50
50
  <DialogTitle>Output</DialogTitle>
51
51
  <DialogContent>
52
- <TextareaAutosize readOnly value={output} style={{ width: '100%' }} />
52
+ <TextareaAutosize
53
+ readOnly
54
+ value={output}
55
+ style={{ width: '100%', padding: '0.75rem' }}
56
+ />
53
57
  </DialogContent>
54
58
  </>
55
59
  )}
@@ -334,6 +334,11 @@ export default function ResultsTable({
334
334
  return failureFilter[columnId] && isFail;
335
335
  });
336
336
  });
337
+ } else if (filterMode === 'different') {
338
+ return body.filter((row) => {
339
+ // TODO(ian): This works for strings, but not objects.
340
+ return !row.outputs.every((output) => output.text === row.outputs[0].text);
341
+ });
337
342
  }
338
343
  return body;
339
344
  }, [body, failureFilter, filterMode]);
@@ -181,7 +181,8 @@ export default function ResultsView() {
181
181
  label="Filter"
182
182
  >
183
183
  <MenuItem value="all">Show all results</MenuItem>
184
- <MenuItem value="failures">Show only failures</MenuItem>
184
+ <MenuItem value="failures">Show failures only</MenuItem>
185
+ <MenuItem value="different">Show different only</MenuItem>
185
186
  </Select>
186
187
  </FormControl>
187
188
  </Box>
@@ -218,16 +219,18 @@ export default function ResultsView() {
218
219
  </Button>
219
220
  </Tooltip>
220
221
  )}
221
- <Tooltip title="Generate a unique URL that others can access">
222
- <Button
223
- color="primary"
224
- onClick={handleShareButtonClick}
225
- disabled={shareLoading}
226
- startIcon={shareLoading ? <CircularProgress size={16} /> : <ShareIcon />}
227
- >
228
- Share
229
- </Button>
230
- </Tooltip>
222
+ {config?.sharing && (
223
+ <Tooltip title="Generate a unique URL that others can access">
224
+ <Button
225
+ color="primary"
226
+ onClick={handleShareButtonClick}
227
+ disabled={shareLoading}
228
+ startIcon={shareLoading ? <CircularProgress size={16} /> : <ShareIcon />}
229
+ >
230
+ Share
231
+ </Button>
232
+ </Tooltip>
233
+ )}
231
234
  </ResponsiveStack>
232
235
  </Box>
233
236
  </ResponsiveStack>
@@ -21,19 +21,8 @@
21
21
  }
22
22
 
23
23
  /* Dark mode colors */
24
- @media (prefers-color-scheme: dark) {
25
- :root {
26
- --background-color: #1a1a1a;
27
- --text-color: #f0f0f0;
28
- --border-color: #444444;
29
- --table-border-color: #444444;
30
- --pass-color: #4caf50;
31
- --fail-color: #f44336;
32
- --smalltext-color: #888888;
33
- }
34
- }
35
-
36
24
  [data-theme='dark'] {
25
+ /* Keep synced with prefers-color-scheme above */
37
26
  --background-color: #1a1a1a;
38
27
  --text-color: #f0f0f0;
39
28
  --border-color: #444444;
@@ -25,6 +25,6 @@ export type EvalTable = {
25
25
  body: EvalRow[];
26
26
  };
27
27
 
28
- export type FilterMode = 'all' | 'failures';
28
+ export type FilterMode = 'all' | 'failures' | 'different';
29
29
 
30
30
  export type { UnifiedConfig } from '../../../types';
@@ -1 +0,0 @@
1
- :root{font-family:system-ui,Avenir,Helvetica,Arial,sans-serif;font-synthesis:none;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;-webkit-text-size-adjust:100%;--background-color: #ffffff;--text-color: #404040;--border-color: lightgray;--table-border-color: lightgray;--pass-color: green;--fail-color: #ad0000;--smalltext-color: gray;--success-background-color: #d1ffd7;--variable-background-color: #f7f7f7;--header-background-color: #fffdf7}@media (prefers-color-scheme: dark){:root{--background-color: #1a1a1a;--text-color: #f0f0f0;--border-color: #444444;--table-border-color: #444444;--pass-color: #4caf50;--fail-color: #f44336;--smalltext-color: #888888}}[data-theme=dark]{--background-color: #1a1a1a;--text-color: #f0f0f0;--border-color: #444444;--table-border-color: #444444;--pass-color: #4caf50;--fail-color: #f44336;--smalltext-color: #888888;--success-background-color: #216d2b;--variable-background-color: #333;--header-background-color: #333}html{font-size:calc(14px + (18 - 14) * ((100vw - 300px) / (1600 - 300)))}*{box-sizing:border-box}html{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol;font-size:16px;background-color:var(--background-color);color:var(--text-color)}table,.divTable{border:1px solid var(--table-border-color);border-collapse:collapse;width:100%;margin:1rem 0;box-shadow:0 2px 4px #0000001a}.tr{display:flex}tr,.tr{width:fit-content}tr:hover,.tr:hover{background-color:#0000000d}th,.th,td,.td{position:relative;box-shadow:inset 0 0 0 1px var(--border-color);vertical-align:top;padding:1.5rem}th.variable,.th.variable,td.variable,.td.variable{background-color:var(--variable-background-color)}tr.header{background-color:var(--header-background-color)}th,.th{padding:1rem;position:relative;text-align:center;vertical-align:bottom}th .action{cursor:pointer;margin-left:.5rem}tr .cell-actions{display:flex;gap:.5rem;visibility:hidden;position:absolute;bottom:1.25rem;right:0;line-height:0;font-size:1.75rem}tr:hover .cell-actions{visibility:visible}tr .cell-actions .action{cursor:pointer}th .smalltext{visibility:hidden;font-weight:400;font-size:.75rem;color:var(--smalltext-color)}th:hover .smalltext{visibility:visible}th .summary{font-weight:400;font-size:.8rem;padding:.25rem}th .summary.highlight{background-color:var(--success-background-color)}td .status{margin-bottom:.5rem;font-weight:700}td .score{font-weight:400}td .pass{color:var(--pass-color)}td .fail{color:var(--fail-color)}.first-prompt-col{border-left:2px solid #888}.first-prompt-row{border-top:2px solid #888}.resizer{position:absolute;right:0;top:0;height:100%;width:5px;cursor:col-resize;user-select:none;touch-action:none;background:var(--text-color);opacity:.5}.resizer.isResizing{background:var(--text-color);opacity:1}@media (hover: hover){.resizer{opacity:0}*:hover>.resizer{opacity:1}}.logo{display:flex;align-items:center;gap:4px}.logo img{width:30px}.logo span{margin-bottom:6px;color:var(--text-color)}[data-theme=dark] .logo img{filter:invert(1)}nav{display:flex;justify-content:space-between;align-items:center;margin-bottom:1rem;color:var(--text-color)}.dark-mode-toggle{background-color:transparent;border:none;color:var(--text-color);cursor:pointer;font-size:16px;padding:8px;transition:color .3s}.dark-mode-toggle:hover{color:var(--pass-color)}body{background-color:var(--background-color);color:var(--text-color)}