promptfoo 0.15.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -7
- package/dist/package.json +2 -2
- package/dist/src/assertions.js +7 -7
- package/dist/src/assertions.js.map +1 -1
- package/dist/src/cache.d.ts +1 -0
- package/dist/src/cache.d.ts.map +1 -1
- package/dist/src/cache.js +8 -3
- package/dist/src/cache.js.map +1 -1
- package/dist/src/evaluator.d.ts.map +1 -1
- package/dist/src/evaluator.js +20 -5
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/main.js +12 -0
- package/dist/src/main.js.map +1 -1
- package/dist/src/prompts.js +2 -2
- package/dist/src/prompts.js.map +1 -1
- package/dist/src/providers/openai.d.ts.map +1 -1
- package/dist/src/providers/openai.js +9 -4
- package/dist/src/providers/openai.js.map +1 -1
- package/dist/src/providers/scriptCompletion.d.ts +9 -0
- package/dist/src/providers/scriptCompletion.d.ts.map +1 -0
- package/dist/src/providers/scriptCompletion.js +27 -0
- package/dist/src/providers/scriptCompletion.js.map +1 -0
- package/dist/src/providers.d.ts.map +1 -1
- package/dist/src/providers.js +7 -1
- package/dist/src/providers.js.map +1 -1
- package/dist/src/table.js +1 -1
- package/dist/src/table.js.map +1 -1
- package/dist/src/types.d.ts +5 -4
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/util.d.ts +1 -0
- package/dist/src/util.d.ts.map +1 -1
- package/dist/src/util.js +33 -23
- package/dist/src/util.js.map +1 -1
- package/dist/src/web/client/assets/{index-c3faa651.css → index-b82d0138.css} +1 -1
- package/dist/src/web/client/assets/{index-9d27a707.js → index-f22a629c.js} +26 -26
- package/dist/src/web/client/index.html +2 -2
- package/package.json +2 -2
- package/src/assertions.ts +10 -10
- package/src/cache.ts +8 -3
- package/src/evaluator.ts +29 -12
- package/src/main.ts +14 -1
- package/src/prompts.ts +2 -2
- package/src/providers/openai.ts +15 -6
- package/src/providers/scriptCompletion.ts +23 -0
- package/src/providers.ts +6 -1
- package/src/table.ts +1 -1
- package/src/types.ts +5 -4
- package/src/util.ts +35 -20
- package/src/web/client/package-lock.json +5726 -0
- package/src/web/client/src/EvalOutputPromptDialog.tsx +61 -0
- package/src/web/client/src/ResultsTable.css +10 -7
- package/src/web/client/src/ResultsTable.tsx +87 -37
- package/src/web/client/src/types.ts +8 -2
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { useState, useEffect } from 'react';
|
|
2
|
+
import Button from '@mui/material/Button';
|
|
3
|
+
import Dialog from '@mui/material/Dialog';
|
|
4
|
+
import DialogActions from '@mui/material/DialogActions';
|
|
5
|
+
import DialogContent from '@mui/material/DialogContent';
|
|
6
|
+
import DialogTitle from '@mui/material/DialogTitle';
|
|
7
|
+
import TextareaAutosize from '@mui/base/TextareaAutosize';
|
|
8
|
+
import IconButton from '@mui/material/IconButton';
|
|
9
|
+
import ContentCopyIcon from '@mui/icons-material/ContentCopy';
|
|
10
|
+
import CheckIcon from '@mui/icons-material/Check';
|
|
11
|
+
|
|
12
|
+
interface EvalOutputPromptDialogProps {
|
|
13
|
+
open: boolean;
|
|
14
|
+
onClose: () => void;
|
|
15
|
+
prompt: string;
|
|
16
|
+
output?: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export default function EvalOutputPromptDialog({
|
|
20
|
+
open,
|
|
21
|
+
onClose,
|
|
22
|
+
prompt,
|
|
23
|
+
output,
|
|
24
|
+
}: EvalOutputPromptDialogProps) {
|
|
25
|
+
const [copied, setCopied] = useState(false);
|
|
26
|
+
|
|
27
|
+
useEffect(() => {
|
|
28
|
+
setCopied(false);
|
|
29
|
+
}, [prompt]);
|
|
30
|
+
|
|
31
|
+
const copyToClipboard = async (text: string) => {
|
|
32
|
+
await navigator.clipboard.writeText(text);
|
|
33
|
+
setCopied(true);
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
return (
|
|
37
|
+
<Dialog open={open} onClose={onClose} fullWidth maxWidth="lg">
|
|
38
|
+
<DialogTitle>Prompt</DialogTitle>
|
|
39
|
+
<DialogContent>
|
|
40
|
+
<TextareaAutosize readOnly value={prompt} style={{ width: '100%' }} />
|
|
41
|
+
<IconButton
|
|
42
|
+
onClick={() => copyToClipboard(prompt)}
|
|
43
|
+
style={{ position: 'absolute', right: '10px', top: '10px' }}
|
|
44
|
+
>
|
|
45
|
+
{copied ? <CheckIcon /> : <ContentCopyIcon />}
|
|
46
|
+
</IconButton>
|
|
47
|
+
</DialogContent>
|
|
48
|
+
{output && (
|
|
49
|
+
<>
|
|
50
|
+
<DialogTitle>Output</DialogTitle>
|
|
51
|
+
<DialogContent>
|
|
52
|
+
<TextareaAutosize readOnly value={output} style={{ width: '100%' }} />
|
|
53
|
+
</DialogContent>
|
|
54
|
+
</>
|
|
55
|
+
)}
|
|
56
|
+
<DialogActions>
|
|
57
|
+
<Button onClick={onClose}>Close</Button>
|
|
58
|
+
</DialogActions>
|
|
59
|
+
</Dialog>
|
|
60
|
+
);
|
|
61
|
+
}
|
|
@@ -64,10 +64,17 @@ th,
|
|
|
64
64
|
vertical-align: bottom;
|
|
65
65
|
}
|
|
66
66
|
|
|
67
|
+
th .action {
|
|
68
|
+
cursor: pointer;
|
|
69
|
+
margin-left: 0.5rem;
|
|
70
|
+
}
|
|
71
|
+
|
|
67
72
|
tr .cell {
|
|
68
73
|
}
|
|
69
74
|
|
|
70
|
-
tr .cell-
|
|
75
|
+
tr .cell-actions {
|
|
76
|
+
display: flex;
|
|
77
|
+
gap: 0.5rem;
|
|
71
78
|
visibility: hidden;
|
|
72
79
|
position: absolute;
|
|
73
80
|
bottom: 1.25rem;
|
|
@@ -76,18 +83,14 @@ tr .cell-rating {
|
|
|
76
83
|
font-size: 1.75rem;
|
|
77
84
|
}
|
|
78
85
|
|
|
79
|
-
tr:hover .cell-
|
|
86
|
+
tr:hover .cell-actions {
|
|
80
87
|
visibility: visible;
|
|
81
88
|
}
|
|
82
89
|
|
|
83
|
-
tr .cell-
|
|
90
|
+
tr .cell-actions .action {
|
|
84
91
|
cursor: pointer;
|
|
85
92
|
}
|
|
86
93
|
|
|
87
|
-
tr .cell-rating .rating:first-child {
|
|
88
|
-
margin-right: 0.5rem;
|
|
89
|
-
}
|
|
90
|
-
|
|
91
94
|
th .smalltext {
|
|
92
95
|
visibility: hidden;
|
|
93
96
|
font-weight: normal;
|
|
@@ -16,6 +16,8 @@ import { useStore } from './store.js';
|
|
|
16
16
|
|
|
17
17
|
import type { CellContext, VisibilityState } from '@tanstack/table-core';
|
|
18
18
|
|
|
19
|
+
import EvalOutputPromptDialog from './EvalOutputPromptDialog';
|
|
20
|
+
|
|
19
21
|
import type { EvalRow, EvalRowOutput, FilterMode } from './types.js';
|
|
20
22
|
|
|
21
23
|
import './ResultsTable.css';
|
|
@@ -54,7 +56,7 @@ interface TruncatedTextProps {
|
|
|
54
56
|
|
|
55
57
|
function TruncatedText({ text: rawText, maxLength }: TruncatedTextProps) {
|
|
56
58
|
const [isTruncated, setIsTruncated] = React.useState<boolean>(true);
|
|
57
|
-
const text =
|
|
59
|
+
const text = typeof rawText === 'string' ? rawText : JSON.stringify(rawText);
|
|
58
60
|
|
|
59
61
|
const toggleTruncate = () => {
|
|
60
62
|
setIsTruncated(!isTruncated);
|
|
@@ -62,28 +64,24 @@ function TruncatedText({ text: rawText, maxLength }: TruncatedTextProps) {
|
|
|
62
64
|
|
|
63
65
|
const renderTruncatedText = () => {
|
|
64
66
|
if (text.length <= maxLength) {
|
|
65
|
-
return text
|
|
67
|
+
return <span>text</span>;
|
|
66
68
|
}
|
|
67
69
|
if (isTruncated) {
|
|
68
70
|
return (
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
</span>
|
|
73
|
-
</>
|
|
71
|
+
<span style={{ cursor: 'pointer' }} onClick={toggleTruncate}>
|
|
72
|
+
{text.substring(0, maxLength)} ...
|
|
73
|
+
</span>
|
|
74
74
|
);
|
|
75
75
|
} else {
|
|
76
76
|
return (
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
</span>
|
|
81
|
-
</>
|
|
77
|
+
<span style={{ cursor: 'pointer' }} onClick={toggleTruncate}>
|
|
78
|
+
{text}
|
|
79
|
+
</span>
|
|
82
80
|
);
|
|
83
81
|
}
|
|
84
82
|
};
|
|
85
83
|
|
|
86
|
-
return
|
|
84
|
+
return renderTruncatedText();
|
|
87
85
|
}
|
|
88
86
|
|
|
89
87
|
interface PromptOutputProps {
|
|
@@ -94,14 +92,21 @@ interface PromptOutputProps {
|
|
|
94
92
|
onRating: (rowIndex: number, promptIndex: number, isPass: boolean) => void;
|
|
95
93
|
}
|
|
96
94
|
|
|
97
|
-
function
|
|
95
|
+
function EvalOutputCell({
|
|
98
96
|
output,
|
|
99
97
|
maxTextLength,
|
|
100
98
|
rowIndex,
|
|
101
99
|
promptIndex,
|
|
102
100
|
onRating,
|
|
103
101
|
}: PromptOutputProps) {
|
|
104
|
-
|
|
102
|
+
const [openPrompt, setOpen] = React.useState(false);
|
|
103
|
+
const handlePromptOpen = () => {
|
|
104
|
+
setOpen(true);
|
|
105
|
+
};
|
|
106
|
+
const handlePromptClose = () => {
|
|
107
|
+
setOpen(false);
|
|
108
|
+
};
|
|
109
|
+
let text = typeof output.text === 'string' ? output.text : JSON.stringify(output.text);
|
|
105
110
|
let chunks: string[] = [];
|
|
106
111
|
if (!output.pass && text.includes('---')) {
|
|
107
112
|
// TODO(ian): Plumb through failure message instead of parsing it out.
|
|
@@ -113,6 +118,7 @@ function PromptOutput({
|
|
|
113
118
|
onRating(rowIndex, promptIndex, isPass);
|
|
114
119
|
};
|
|
115
120
|
|
|
121
|
+
// TODO(ian): output.prompt check for backwards compatibility, remove after 0.17.0
|
|
116
122
|
return (
|
|
117
123
|
<>
|
|
118
124
|
<div className="cell">
|
|
@@ -128,11 +134,24 @@ function PromptOutput({
|
|
|
128
134
|
)}{' '}
|
|
129
135
|
<TruncatedText text={text} maxLength={maxTextLength} />
|
|
130
136
|
</div>
|
|
131
|
-
<div className="cell-
|
|
132
|
-
|
|
137
|
+
<div className="cell-actions">
|
|
138
|
+
{output.prompt && (
|
|
139
|
+
<>
|
|
140
|
+
<span className="action" onClick={handlePromptOpen}>
|
|
141
|
+
🔎
|
|
142
|
+
</span>
|
|
143
|
+
<EvalOutputPromptDialog
|
|
144
|
+
open={openPrompt}
|
|
145
|
+
onClose={handlePromptClose}
|
|
146
|
+
prompt={output.prompt}
|
|
147
|
+
output={text}
|
|
148
|
+
/>
|
|
149
|
+
</>
|
|
150
|
+
)}
|
|
151
|
+
<span className="action" onClick={() => handleClick(true)}>
|
|
133
152
|
👍
|
|
134
153
|
</span>
|
|
135
|
-
<span className="
|
|
154
|
+
<span className="action" onClick={() => handleClick(false)}>
|
|
136
155
|
👎
|
|
137
156
|
</span>
|
|
138
157
|
</div>
|
|
@@ -140,11 +159,35 @@ function PromptOutput({
|
|
|
140
159
|
);
|
|
141
160
|
}
|
|
142
161
|
|
|
143
|
-
function TableHeader({
|
|
162
|
+
function TableHeader({
|
|
163
|
+
text,
|
|
164
|
+
maxLength,
|
|
165
|
+
smallText,
|
|
166
|
+
expandedText,
|
|
167
|
+
}: TruncatedTextProps & { smallText: string; expandedText?: string }) {
|
|
168
|
+
const [openPrompt, setOpen] = React.useState(false);
|
|
169
|
+
const handlePromptOpen = () => {
|
|
170
|
+
setOpen(true);
|
|
171
|
+
};
|
|
172
|
+
const handlePromptClose = () => {
|
|
173
|
+
setOpen(false);
|
|
174
|
+
};
|
|
144
175
|
return (
|
|
145
176
|
<div>
|
|
146
177
|
<TruncatedText text={text} maxLength={maxLength} />
|
|
147
|
-
|
|
178
|
+
{expandedText && (
|
|
179
|
+
<>
|
|
180
|
+
<span className="action" onClick={handlePromptOpen}>
|
|
181
|
+
🔎
|
|
182
|
+
</span>
|
|
183
|
+
<EvalOutputPromptDialog
|
|
184
|
+
open={openPrompt}
|
|
185
|
+
onClose={handlePromptClose}
|
|
186
|
+
prompt={expandedText}
|
|
187
|
+
/>
|
|
188
|
+
</>
|
|
189
|
+
)}
|
|
190
|
+
<div className="smalltext">{smallText}</div>
|
|
148
191
|
</div>
|
|
149
192
|
);
|
|
150
193
|
}
|
|
@@ -199,21 +242,26 @@ export default function ResultsTable({
|
|
|
199
242
|
id: 'vars',
|
|
200
243
|
header: () => <span>Variables</span>,
|
|
201
244
|
columns: head.vars.map((varName, idx) =>
|
|
202
|
-
columnHelper.accessor(
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
245
|
+
columnHelper.accessor(
|
|
246
|
+
(row: EvalRow) => {
|
|
247
|
+
return row.vars[idx];
|
|
248
|
+
},
|
|
249
|
+
{
|
|
250
|
+
id: `Variable ${idx + 1}`,
|
|
251
|
+
header: () => (
|
|
252
|
+
<TableHeader
|
|
253
|
+
smallText={`Variable ${idx + 1}`}
|
|
254
|
+
text={varName}
|
|
255
|
+
maxLength={maxTextLength}
|
|
256
|
+
/>
|
|
257
|
+
),
|
|
258
|
+
cell: (info: CellContext<EvalRow, string>) => (
|
|
259
|
+
<TruncatedText text={info.getValue()} maxLength={maxTextLength} />
|
|
260
|
+
),
|
|
261
|
+
// Minimize the size of Variable columns.
|
|
262
|
+
size: 50,
|
|
263
|
+
},
|
|
264
|
+
),
|
|
217
265
|
),
|
|
218
266
|
}),
|
|
219
267
|
columnHelper.group({
|
|
@@ -228,11 +276,13 @@ export default function ResultsTable({
|
|
|
228
276
|
numGood[idx] === highestPassingCount && highestPassingCount !== 0;
|
|
229
277
|
const columnId = `Prompt ${idx + 1}`;
|
|
230
278
|
const isChecked = failureFilter[columnId] || false;
|
|
279
|
+
// TODO(ian): prompt string support for backwards compatibility, remove after 0.17.0
|
|
231
280
|
return (
|
|
232
281
|
<>
|
|
233
282
|
<TableHeader
|
|
234
283
|
smallText={`Prompt ${idx + 1}`}
|
|
235
|
-
text={prompt}
|
|
284
|
+
text={typeof prompt === 'string' ? prompt : prompt.display}
|
|
285
|
+
expandedText={typeof prompt === 'string' ? undefined : prompt.raw}
|
|
236
286
|
maxLength={maxTextLength}
|
|
237
287
|
/>
|
|
238
288
|
{filterMode === 'failures' && (
|
|
@@ -258,7 +308,7 @@ export default function ResultsTable({
|
|
|
258
308
|
);
|
|
259
309
|
},
|
|
260
310
|
cell: (info: CellContext<EvalRow, string>) => (
|
|
261
|
-
<
|
|
311
|
+
<EvalOutputCell
|
|
262
312
|
output={info.getValue() as unknown as EvalRowOutput}
|
|
263
313
|
maxTextLength={maxTextLength}
|
|
264
314
|
rowIndex={info.row.index}
|
|
@@ -1,12 +1,18 @@
|
|
|
1
|
+
type Prompt = {
|
|
2
|
+
display: string;
|
|
3
|
+
raw: string;
|
|
4
|
+
};
|
|
5
|
+
|
|
1
6
|
export type EvalHead = {
|
|
2
|
-
prompts:
|
|
7
|
+
prompts: Prompt[];
|
|
3
8
|
vars: string[];
|
|
4
9
|
};
|
|
5
10
|
|
|
6
11
|
export type EvalRowOutput = {
|
|
7
12
|
pass: boolean;
|
|
8
13
|
score: number;
|
|
9
|
-
text: string;
|
|
14
|
+
text: string | object;
|
|
15
|
+
prompt: string;
|
|
10
16
|
};
|
|
11
17
|
|
|
12
18
|
export type EvalRow = {
|