promptfoo 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +10 -7
  2. package/dist/package.json +2 -2
  3. package/dist/src/assertions.js +7 -7
  4. package/dist/src/assertions.js.map +1 -1
  5. package/dist/src/cache.d.ts +1 -0
  6. package/dist/src/cache.d.ts.map +1 -1
  7. package/dist/src/cache.js +8 -3
  8. package/dist/src/cache.js.map +1 -1
  9. package/dist/src/evaluator.d.ts.map +1 -1
  10. package/dist/src/evaluator.js +20 -5
  11. package/dist/src/evaluator.js.map +1 -1
  12. package/dist/src/main.js +12 -0
  13. package/dist/src/main.js.map +1 -1
  14. package/dist/src/prompts.js +2 -2
  15. package/dist/src/prompts.js.map +1 -1
  16. package/dist/src/providers/openai.d.ts.map +1 -1
  17. package/dist/src/providers/openai.js +9 -4
  18. package/dist/src/providers/openai.js.map +1 -1
  19. package/dist/src/providers/scriptCompletion.d.ts +9 -0
  20. package/dist/src/providers/scriptCompletion.d.ts.map +1 -0
  21. package/dist/src/providers/scriptCompletion.js +27 -0
  22. package/dist/src/providers/scriptCompletion.js.map +1 -0
  23. package/dist/src/providers.d.ts.map +1 -1
  24. package/dist/src/providers.js +7 -1
  25. package/dist/src/providers.js.map +1 -1
  26. package/dist/src/table.js +1 -1
  27. package/dist/src/table.js.map +1 -1
  28. package/dist/src/types.d.ts +5 -4
  29. package/dist/src/types.d.ts.map +1 -1
  30. package/dist/src/util.d.ts +1 -0
  31. package/dist/src/util.d.ts.map +1 -1
  32. package/dist/src/util.js +33 -23
  33. package/dist/src/util.js.map +1 -1
  34. package/dist/src/web/client/assets/{index-c3faa651.css → index-b82d0138.css} +1 -1
  35. package/dist/src/web/client/assets/{index-9d27a707.js → index-f22a629c.js} +26 -26
  36. package/dist/src/web/client/index.html +2 -2
  37. package/package.json +2 -2
  38. package/src/assertions.ts +10 -10
  39. package/src/cache.ts +8 -3
  40. package/src/evaluator.ts +29 -12
  41. package/src/main.ts +14 -1
  42. package/src/prompts.ts +2 -2
  43. package/src/providers/openai.ts +15 -6
  44. package/src/providers/scriptCompletion.ts +23 -0
  45. package/src/providers.ts +6 -1
  46. package/src/table.ts +1 -1
  47. package/src/types.ts +5 -4
  48. package/src/util.ts +35 -20
  49. package/src/web/client/package-lock.json +5726 -0
  50. package/src/web/client/src/EvalOutputPromptDialog.tsx +61 -0
  51. package/src/web/client/src/ResultsTable.css +10 -7
  52. package/src/web/client/src/ResultsTable.tsx +87 -37
  53. package/src/web/client/src/types.ts +8 -2
@@ -0,0 +1,61 @@
1
+ import { useState, useEffect } from 'react';
2
+ import Button from '@mui/material/Button';
3
+ import Dialog from '@mui/material/Dialog';
4
+ import DialogActions from '@mui/material/DialogActions';
5
+ import DialogContent from '@mui/material/DialogContent';
6
+ import DialogTitle from '@mui/material/DialogTitle';
7
+ import TextareaAutosize from '@mui/base/TextareaAutosize';
8
+ import IconButton from '@mui/material/IconButton';
9
+ import ContentCopyIcon from '@mui/icons-material/ContentCopy';
10
+ import CheckIcon from '@mui/icons-material/Check';
11
+
12
+ interface EvalOutputPromptDialogProps {
13
+ open: boolean;
14
+ onClose: () => void;
15
+ prompt: string;
16
+ output?: string;
17
+ }
18
+
19
+ export default function EvalOutputPromptDialog({
20
+ open,
21
+ onClose,
22
+ prompt,
23
+ output,
24
+ }: EvalOutputPromptDialogProps) {
25
+ const [copied, setCopied] = useState(false);
26
+
27
+ useEffect(() => {
28
+ setCopied(false);
29
+ }, [prompt]);
30
+
31
+ const copyToClipboard = async (text: string) => {
32
+ await navigator.clipboard.writeText(text);
33
+ setCopied(true);
34
+ };
35
+
36
+ return (
37
+ <Dialog open={open} onClose={onClose} fullWidth maxWidth="lg">
38
+ <DialogTitle>Prompt</DialogTitle>
39
+ <DialogContent>
40
+ <TextareaAutosize readOnly value={prompt} style={{ width: '100%' }} />
41
+ <IconButton
42
+ onClick={() => copyToClipboard(prompt)}
43
+ style={{ position: 'absolute', right: '10px', top: '10px' }}
44
+ >
45
+ {copied ? <CheckIcon /> : <ContentCopyIcon />}
46
+ </IconButton>
47
+ </DialogContent>
48
+ {output && (
49
+ <>
50
+ <DialogTitle>Output</DialogTitle>
51
+ <DialogContent>
52
+ <TextareaAutosize readOnly value={output} style={{ width: '100%' }} />
53
+ </DialogContent>
54
+ </>
55
+ )}
56
+ <DialogActions>
57
+ <Button onClick={onClose}>Close</Button>
58
+ </DialogActions>
59
+ </Dialog>
60
+ );
61
+ }
@@ -64,10 +64,17 @@ th,
64
64
  vertical-align: bottom;
65
65
  }
66
66
 
67
+ th .action {
68
+ cursor: pointer;
69
+ margin-left: 0.5rem;
70
+ }
71
+
67
72
  tr .cell {
68
73
  }
69
74
 
70
- tr .cell-rating {
75
+ tr .cell-actions {
76
+ display: flex;
77
+ gap: 0.5rem;
71
78
  visibility: hidden;
72
79
  position: absolute;
73
80
  bottom: 1.25rem;
@@ -76,18 +83,14 @@ tr .cell-rating {
76
83
  font-size: 1.75rem;
77
84
  }
78
85
 
79
- tr:hover .cell-rating {
86
+ tr:hover .cell-actions {
80
87
  visibility: visible;
81
88
  }
82
89
 
83
- tr .cell-rating .rating {
90
+ tr .cell-actions .action {
84
91
  cursor: pointer;
85
92
  }
86
93
 
87
- tr .cell-rating .rating:first-child {
88
- margin-right: 0.5rem;
89
- }
90
-
91
94
  th .smalltext {
92
95
  visibility: hidden;
93
96
  font-weight: normal;
@@ -16,6 +16,8 @@ import { useStore } from './store.js';
16
16
 
17
17
  import type { CellContext, VisibilityState } from '@tanstack/table-core';
18
18
 
19
+ import EvalOutputPromptDialog from './EvalOutputPromptDialog';
20
+
19
21
  import type { EvalRow, EvalRowOutput, FilterMode } from './types.js';
20
22
 
21
23
  import './ResultsTable.css';
@@ -54,7 +56,7 @@ interface TruncatedTextProps {
54
56
 
55
57
  function TruncatedText({ text: rawText, maxLength }: TruncatedTextProps) {
56
58
  const [isTruncated, setIsTruncated] = React.useState<boolean>(true);
57
- const text = String(rawText);
59
+ const text = typeof rawText === 'string' ? rawText : JSON.stringify(rawText);
58
60
 
59
61
  const toggleTruncate = () => {
60
62
  setIsTruncated(!isTruncated);
@@ -62,28 +64,24 @@ function TruncatedText({ text: rawText, maxLength }: TruncatedTextProps) {
62
64
 
63
65
  const renderTruncatedText = () => {
64
66
  if (text.length <= maxLength) {
65
- return text;
67
+ return <span>text</span>;
66
68
  }
67
69
  if (isTruncated) {
68
70
  return (
69
- <>
70
- <span style={{ cursor: 'pointer' }} onClick={toggleTruncate}>
71
- {text.substring(0, maxLength)} ...
72
- </span>
73
- </>
71
+ <span style={{ cursor: 'pointer' }} onClick={toggleTruncate}>
72
+ {text.substring(0, maxLength)} ...
73
+ </span>
74
74
  );
75
75
  } else {
76
76
  return (
77
- <>
78
- <span style={{ cursor: 'pointer' }} onClick={toggleTruncate}>
79
- {text}
80
- </span>
81
- </>
77
+ <span style={{ cursor: 'pointer' }} onClick={toggleTruncate}>
78
+ {text}
79
+ </span>
82
80
  );
83
81
  }
84
82
  };
85
83
 
86
- return <div>{renderTruncatedText()}</div>;
84
+ return renderTruncatedText();
87
85
  }
88
86
 
89
87
  interface PromptOutputProps {
@@ -94,14 +92,21 @@ interface PromptOutputProps {
94
92
  onRating: (rowIndex: number, promptIndex: number, isPass: boolean) => void;
95
93
  }
96
94
 
97
- function PromptOutput({
95
+ function EvalOutputCell({
98
96
  output,
99
97
  maxTextLength,
100
98
  rowIndex,
101
99
  promptIndex,
102
100
  onRating,
103
101
  }: PromptOutputProps) {
104
- let text = String(output.text);
102
+ const [openPrompt, setOpen] = React.useState(false);
103
+ const handlePromptOpen = () => {
104
+ setOpen(true);
105
+ };
106
+ const handlePromptClose = () => {
107
+ setOpen(false);
108
+ };
109
+ let text = typeof output.text === 'string' ? output.text : JSON.stringify(output.text);
105
110
  let chunks: string[] = [];
106
111
  if (!output.pass && text.includes('---')) {
107
112
  // TODO(ian): Plumb through failure message instead of parsing it out.
@@ -113,6 +118,7 @@ function PromptOutput({
113
118
  onRating(rowIndex, promptIndex, isPass);
114
119
  };
115
120
 
121
+ // TODO(ian): output.prompt check for backwards compatibility, remove after 0.17.0
116
122
  return (
117
123
  <>
118
124
  <div className="cell">
@@ -128,11 +134,24 @@ function PromptOutput({
128
134
  )}{' '}
129
135
  <TruncatedText text={text} maxLength={maxTextLength} />
130
136
  </div>
131
- <div className="cell-rating">
132
- <span className="rating" onClick={() => handleClick(true)}>
137
+ <div className="cell-actions">
138
+ {output.prompt && (
139
+ <>
140
+ <span className="action" onClick={handlePromptOpen}>
141
+ 🔎
142
+ </span>
143
+ <EvalOutputPromptDialog
144
+ open={openPrompt}
145
+ onClose={handlePromptClose}
146
+ prompt={output.prompt}
147
+ output={text}
148
+ />
149
+ </>
150
+ )}
151
+ <span className="action" onClick={() => handleClick(true)}>
133
152
  👍
134
153
  </span>
135
- <span className="rating" onClick={() => handleClick(false)}>
154
+ <span className="action" onClick={() => handleClick(false)}>
136
155
  👎
137
156
  </span>
138
157
  </div>
@@ -140,11 +159,35 @@ function PromptOutput({
140
159
  );
141
160
  }
142
161
 
143
- function TableHeader({ text, maxLength, smallText }: TruncatedTextProps & { smallText: string }) {
162
+ function TableHeader({
163
+ text,
164
+ maxLength,
165
+ smallText,
166
+ expandedText,
167
+ }: TruncatedTextProps & { smallText: string; expandedText?: string }) {
168
+ const [openPrompt, setOpen] = React.useState(false);
169
+ const handlePromptOpen = () => {
170
+ setOpen(true);
171
+ };
172
+ const handlePromptClose = () => {
173
+ setOpen(false);
174
+ };
144
175
  return (
145
176
  <div>
146
177
  <TruncatedText text={text} maxLength={maxLength} />
147
- <span className="smalltext">{smallText}</span>
178
+ {expandedText && (
179
+ <>
180
+ <span className="action" onClick={handlePromptOpen}>
181
+ 🔎
182
+ </span>
183
+ <EvalOutputPromptDialog
184
+ open={openPrompt}
185
+ onClose={handlePromptClose}
186
+ prompt={expandedText}
187
+ />
188
+ </>
189
+ )}
190
+ <div className="smalltext">{smallText}</div>
148
191
  </div>
149
192
  );
150
193
  }
@@ -199,21 +242,26 @@ export default function ResultsTable({
199
242
  id: 'vars',
200
243
  header: () => <span>Variables</span>,
201
244
  columns: head.vars.map((varName, idx) =>
202
- columnHelper.accessor((row: EvalRow) => row.vars[idx], {
203
- id: `Variable ${idx + 1}`,
204
- header: () => (
205
- <TableHeader
206
- smallText={`Variable ${idx + 1}`}
207
- text={varName}
208
- maxLength={maxTextLength}
209
- />
210
- ),
211
- cell: (info: CellContext<EvalRow, string>) => (
212
- <TruncatedText text={info.getValue()} maxLength={maxTextLength} />
213
- ),
214
- // Minimize the size of Variable columns.
215
- size: 50,
216
- }),
245
+ columnHelper.accessor(
246
+ (row: EvalRow) => {
247
+ return row.vars[idx];
248
+ },
249
+ {
250
+ id: `Variable ${idx + 1}`,
251
+ header: () => (
252
+ <TableHeader
253
+ smallText={`Variable ${idx + 1}`}
254
+ text={varName}
255
+ maxLength={maxTextLength}
256
+ />
257
+ ),
258
+ cell: (info: CellContext<EvalRow, string>) => (
259
+ <TruncatedText text={info.getValue()} maxLength={maxTextLength} />
260
+ ),
261
+ // Minimize the size of Variable columns.
262
+ size: 50,
263
+ },
264
+ ),
217
265
  ),
218
266
  }),
219
267
  columnHelper.group({
@@ -228,11 +276,13 @@ export default function ResultsTable({
228
276
  numGood[idx] === highestPassingCount && highestPassingCount !== 0;
229
277
  const columnId = `Prompt ${idx + 1}`;
230
278
  const isChecked = failureFilter[columnId] || false;
279
+ // TODO(ian): prompt string support for backwards compatibility, remove after 0.17.0
231
280
  return (
232
281
  <>
233
282
  <TableHeader
234
283
  smallText={`Prompt ${idx + 1}`}
235
- text={prompt}
284
+ text={typeof prompt === 'string' ? prompt : prompt.display}
285
+ expandedText={typeof prompt === 'string' ? undefined : prompt.raw}
236
286
  maxLength={maxTextLength}
237
287
  />
238
288
  {filterMode === 'failures' && (
@@ -258,7 +308,7 @@ export default function ResultsTable({
258
308
  );
259
309
  },
260
310
  cell: (info: CellContext<EvalRow, string>) => (
261
- <PromptOutput
311
+ <EvalOutputCell
262
312
  output={info.getValue() as unknown as EvalRowOutput}
263
313
  maxTextLength={maxTextLength}
264
314
  rowIndex={info.row.index}
@@ -1,12 +1,18 @@
1
+ type Prompt = {
2
+ display: string;
3
+ raw: string;
4
+ };
5
+
1
6
  export type EvalHead = {
2
- prompts: string[];
7
+ prompts: Prompt[];
3
8
  vars: string[];
4
9
  };
5
10
 
6
11
  export type EvalRowOutput = {
7
12
  pass: boolean;
8
13
  score: number;
9
- text: string;
14
+ text: string | object;
15
+ prompt: string;
10
16
  };
11
17
 
12
18
  export type EvalRow = {