promptfoo 0.9.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -40
- package/dist/assertions.d.ts +2 -2
- package/dist/assertions.d.ts.map +1 -1
- package/dist/assertions.js +186 -44
- package/dist/assertions.js.map +1 -1
- package/dist/cache.js +9 -9
- package/dist/cache.js.map +1 -1
- package/dist/evaluator.d.ts +1 -1
- package/dist/evaluator.d.ts.map +1 -1
- package/dist/evaluator.js +30 -23
- package/dist/evaluator.js.map +1 -1
- package/dist/index.d.ts +10 -10
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +18 -14
- package/dist/index.js.map +1 -1
- package/dist/main.js +49 -44
- package/dist/main.js.map +1 -1
- package/dist/providers/localai.js +11 -11
- package/dist/providers/localai.js.map +1 -1
- package/dist/providers/openai.d.ts.map +1 -1
- package/dist/providers/openai.js +30 -21
- package/dist/providers/openai.js.map +1 -1
- package/dist/providers.d.ts +3 -3
- package/dist/providers.d.ts.map +1 -1
- package/dist/providers.js +15 -15
- package/dist/providers.js.map +1 -1
- package/dist/types.d.ts +7 -3
- package/dist/types.d.ts.map +1 -1
- package/dist/util.d.ts +4 -4
- package/dist/util.d.ts.map +1 -1
- package/dist/util.js +49 -18
- package/dist/util.js.map +1 -1
- package/dist/web/client/assets/index-15dfcd18.js +172 -0
- package/dist/web/client/assets/index-87905193.css +1 -0
- package/dist/web/client/index.html +2 -2
- package/dist/web/server.js +9 -9
- package/dist/web/server.js.map +1 -1
- package/package.json +3 -1
- package/src/assertions.ts +249 -38
- package/src/cache.ts +2 -2
- package/src/evaluator.ts +25 -18
- package/src/index.ts +13 -8
- package/src/main.ts +28 -15
- package/src/providers/localai.ts +3 -3
- package/src/providers/openai.ts +16 -8
- package/src/providers.ts +3 -3
- package/src/types.ts +24 -3
- package/src/util.ts +48 -17
- package/src/web/client/package-lock.json +5729 -0
- package/src/web/client/src/ResultsTable.css +35 -4
- package/src/web/client/src/ResultsTable.tsx +150 -70
- package/src/web/client/src/ResultsView.tsx +83 -18
- package/src/web/client/src/index.css +6 -0
- package/src/web/client/src/types.ts +2 -0
- package/src/web/server.ts +3 -3
- package/dist/web/client/assets/index-207192fc.css +0 -1
- package/dist/web/client/assets/index-8751749f.js +0 -172
|
@@ -40,18 +40,28 @@ td,
|
|
|
40
40
|
.td {
|
|
41
41
|
position: relative;
|
|
42
42
|
box-shadow: inset 0 0 0 1px var(--border-color);
|
|
43
|
-
word-break: break-all;
|
|
44
43
|
vertical-align: top;
|
|
45
44
|
|
|
46
45
|
padding: 1.5rem;
|
|
47
46
|
}
|
|
48
47
|
|
|
48
|
+
th.variable,
|
|
49
|
+
.th.variable,
|
|
50
|
+
td.variable,
|
|
51
|
+
.td.variable {
|
|
52
|
+
background-color: var(--variable-background-color);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
tr.header {
|
|
56
|
+
background-color: var(--header-background-color);
|
|
57
|
+
}
|
|
58
|
+
|
|
49
59
|
th,
|
|
50
60
|
.th {
|
|
51
61
|
padding: 1rem;
|
|
52
62
|
position: relative;
|
|
53
63
|
text-align: center;
|
|
54
|
-
|
|
64
|
+
vertical-align: bottom;
|
|
55
65
|
}
|
|
56
66
|
|
|
57
67
|
tr .cell {
|
|
@@ -61,7 +71,7 @@ tr .cell-rating {
|
|
|
61
71
|
visibility: hidden;
|
|
62
72
|
position: absolute;
|
|
63
73
|
bottom: 1.25rem;
|
|
64
|
-
right:
|
|
74
|
+
right: 0;
|
|
65
75
|
line-height: 0;
|
|
66
76
|
font-size: 1.75rem;
|
|
67
77
|
}
|
|
@@ -72,7 +82,10 @@ tr:hover .cell-rating {
|
|
|
72
82
|
|
|
73
83
|
tr .cell-rating .rating {
|
|
74
84
|
cursor: pointer;
|
|
75
|
-
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
tr .cell-rating .rating:first-child {
|
|
88
|
+
margin-right: 0.5rem;
|
|
76
89
|
}
|
|
77
90
|
|
|
78
91
|
th .smalltext {
|
|
@@ -86,6 +99,16 @@ th:hover .smalltext {
|
|
|
86
99
|
visibility: visible;
|
|
87
100
|
}
|
|
88
101
|
|
|
102
|
+
th .summary {
|
|
103
|
+
font-weight: normal;
|
|
104
|
+
font-size: 0.8rem;
|
|
105
|
+
padding: 0.25rem;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
th .summary.highlight {
|
|
109
|
+
background-color: var(--success-background-color);
|
|
110
|
+
}
|
|
111
|
+
|
|
89
112
|
td,
|
|
90
113
|
.td {
|
|
91
114
|
}
|
|
@@ -103,6 +126,14 @@ td .fail {
|
|
|
103
126
|
color: var(--fail-color);
|
|
104
127
|
}
|
|
105
128
|
|
|
129
|
+
.first-prompt-col {
|
|
130
|
+
border-left: 2px solid #888;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
.first-prompt-row {
|
|
134
|
+
border-top: 2px solid #888;
|
|
135
|
+
}
|
|
136
|
+
|
|
106
137
|
.resizer {
|
|
107
138
|
position: absolute;
|
|
108
139
|
right: 0;
|
|
@@ -9,12 +9,14 @@ import {
|
|
|
9
9
|
getCoreRowModel,
|
|
10
10
|
useReactTable,
|
|
11
11
|
} from '@tanstack/react-table';
|
|
12
|
+
import Checkbox from '@mui/material/Checkbox';
|
|
13
|
+
import FormControlLabel from '@mui/material/FormControlLabel';
|
|
12
14
|
|
|
13
15
|
import { useStore } from './store.js';
|
|
14
16
|
|
|
15
17
|
import type { CellContext, VisibilityState } from '@tanstack/table-core';
|
|
16
18
|
|
|
17
|
-
import type { EvalRow } from './types.js';
|
|
19
|
+
import type { EvalRow, FilterMode } from './types.js';
|
|
18
20
|
|
|
19
21
|
import './ResultsTable.css';
|
|
20
22
|
|
|
@@ -114,12 +116,23 @@ function TableHeader({ text, maxLength, smallText }: TruncatedTextProps & { smal
|
|
|
114
116
|
);
|
|
115
117
|
}
|
|
116
118
|
|
|
117
|
-
interface
|
|
119
|
+
interface ResultsTableProps {
|
|
118
120
|
maxTextLength: number;
|
|
119
121
|
columnVisibility: VisibilityState;
|
|
122
|
+
wordBreak: 'break-word' | 'break-all';
|
|
123
|
+
filterMode: FilterMode;
|
|
124
|
+
failureFilter: { [key: string]: boolean };
|
|
125
|
+
onFailureFilterToggle: (columnId: string, checked: boolean) => void;
|
|
120
126
|
}
|
|
121
127
|
|
|
122
|
-
export default function ResultsTable({
|
|
128
|
+
export default function ResultsTable({
|
|
129
|
+
maxTextLength,
|
|
130
|
+
columnVisibility,
|
|
131
|
+
wordBreak,
|
|
132
|
+
filterMode,
|
|
133
|
+
failureFilter,
|
|
134
|
+
onFailureFilterToggle,
|
|
135
|
+
}: ResultsTableProps) {
|
|
123
136
|
const { table, setTable } = useStore();
|
|
124
137
|
invariant(table, 'Table should be defined');
|
|
125
138
|
const { head, body } = table;
|
|
@@ -146,36 +159,12 @@ export default function ResultsTable({ maxTextLength, columnVisibility }: Result
|
|
|
146
159
|
});
|
|
147
160
|
};
|
|
148
161
|
|
|
162
|
+
const highestPassingIndex = numGood.reduce((maxIndex, currentPassCount, currentIndex, array) => {
|
|
163
|
+
return currentPassCount > array[maxIndex] ? currentIndex : maxIndex;
|
|
164
|
+
}, 0);
|
|
165
|
+
const highestPassingCount = numGood[highestPassingIndex];
|
|
149
166
|
const columnHelper = createColumnHelper<EvalRow>();
|
|
150
167
|
const columns = [
|
|
151
|
-
columnHelper.group({
|
|
152
|
-
id: 'prompts',
|
|
153
|
-
header: () => <span>Prompts</span>,
|
|
154
|
-
columns: head.prompts.map((prompt, idx) =>
|
|
155
|
-
columnHelper.accessor((row: EvalRow) => row.outputs[idx], {
|
|
156
|
-
id: `Prompt ${idx + 1}`,
|
|
157
|
-
header: () => (
|
|
158
|
-
<>
|
|
159
|
-
<TableHeader
|
|
160
|
-
smallText={`Prompt ${idx + 1}`}
|
|
161
|
-
text={prompt}
|
|
162
|
-
maxLength={maxTextLength}
|
|
163
|
-
/>
|
|
164
|
-
{numGood[idx]} / {body.length} 👍
|
|
165
|
-
</>
|
|
166
|
-
),
|
|
167
|
-
cell: (info: CellContext<EvalRow, string>) => (
|
|
168
|
-
<PromptOutput
|
|
169
|
-
text={info.getValue()}
|
|
170
|
-
maxTextLength={maxTextLength}
|
|
171
|
-
rowIndex={info.row.index}
|
|
172
|
-
promptIndex={idx}
|
|
173
|
-
onRating={handleRating}
|
|
174
|
-
/>
|
|
175
|
-
),
|
|
176
|
-
}),
|
|
177
|
-
),
|
|
178
|
-
}),
|
|
179
168
|
columnHelper.group({
|
|
180
169
|
id: 'vars',
|
|
181
170
|
header: () => <span>Variables</span>,
|
|
@@ -192,13 +181,84 @@ export default function ResultsTable({ maxTextLength, columnVisibility }: Result
|
|
|
192
181
|
cell: (info: CellContext<EvalRow, string>) => (
|
|
193
182
|
<TruncatedText text={info.getValue()} maxLength={maxTextLength} />
|
|
194
183
|
),
|
|
184
|
+
// Minimize the size of Variable columns.
|
|
185
|
+
size: 50,
|
|
186
|
+
}),
|
|
187
|
+
),
|
|
188
|
+
}),
|
|
189
|
+
columnHelper.group({
|
|
190
|
+
id: 'prompts',
|
|
191
|
+
header: () => <span>Outputs</span>,
|
|
192
|
+
columns: head.prompts.map((prompt, idx) =>
|
|
193
|
+
columnHelper.accessor((row: EvalRow) => row.outputs[idx], {
|
|
194
|
+
id: `Prompt ${idx + 1}`,
|
|
195
|
+
header: () => {
|
|
196
|
+
const pct = ((numGood[idx] / body.length) * 100.0).toFixed(2);
|
|
197
|
+
const isHighestPassing =
|
|
198
|
+
numGood[idx] === highestPassingCount && highestPassingCount !== 0;
|
|
199
|
+
const columnId = `Prompt ${idx + 1}`;
|
|
200
|
+
const isChecked = failureFilter[columnId] || false;
|
|
201
|
+
return (
|
|
202
|
+
<>
|
|
203
|
+
<TableHeader
|
|
204
|
+
smallText={`Prompt ${idx + 1}`}
|
|
205
|
+
text={prompt}
|
|
206
|
+
maxLength={maxTextLength}
|
|
207
|
+
/>
|
|
208
|
+
{filterMode === 'failures' && (
|
|
209
|
+
<FormControlLabel
|
|
210
|
+
sx={{
|
|
211
|
+
'& .MuiFormControlLabel-label': {
|
|
212
|
+
fontSize: '0.75rem',
|
|
213
|
+
},
|
|
214
|
+
}}
|
|
215
|
+
control={
|
|
216
|
+
<Checkbox
|
|
217
|
+
checked={isChecked}
|
|
218
|
+
onChange={(event) => onFailureFilterToggle(columnId, event.target.checked)}
|
|
219
|
+
/>
|
|
220
|
+
}
|
|
221
|
+
label="Show failures"
|
|
222
|
+
/>
|
|
223
|
+
)}
|
|
224
|
+
<div className={`summary ${isHighestPassing ? 'highlight' : ''}`}>
|
|
225
|
+
Passing: <strong>{pct}%</strong> ({numGood[idx]} / {body.length})
|
|
226
|
+
</div>
|
|
227
|
+
</>
|
|
228
|
+
);
|
|
229
|
+
},
|
|
230
|
+
cell: (info: CellContext<EvalRow, string>) => (
|
|
231
|
+
<PromptOutput
|
|
232
|
+
text={info.getValue()}
|
|
233
|
+
maxTextLength={maxTextLength}
|
|
234
|
+
rowIndex={info.row.index}
|
|
235
|
+
promptIndex={idx}
|
|
236
|
+
onRating={handleRating}
|
|
237
|
+
/>
|
|
238
|
+
),
|
|
195
239
|
}),
|
|
196
240
|
),
|
|
197
241
|
}),
|
|
198
242
|
];
|
|
199
243
|
|
|
244
|
+
const filteredBody = React.useMemo(() => {
|
|
245
|
+
if (filterMode === 'failures') {
|
|
246
|
+
if (Object.values(failureFilter).every((v) => !v)) {
|
|
247
|
+
return body;
|
|
248
|
+
}
|
|
249
|
+
return body.filter((row) => {
|
|
250
|
+
return row.outputs.some((output, idx) => {
|
|
251
|
+
const columnId = `Prompt ${idx + 1}`;
|
|
252
|
+
const isFail = output.startsWith('[FAIL] ');
|
|
253
|
+
return failureFilter[columnId] && isFail;
|
|
254
|
+
});
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
return body;
|
|
258
|
+
}, [body, failureFilter, filterMode]);
|
|
259
|
+
|
|
200
260
|
const reactTable = useReactTable({
|
|
201
|
-
data:
|
|
261
|
+
data: filteredBody,
|
|
202
262
|
columns,
|
|
203
263
|
columnResizeMode: 'onChange',
|
|
204
264
|
getCoreRowModel: getCoreRowModel(),
|
|
@@ -209,52 +269,72 @@ export default function ResultsTable({ maxTextLength, columnVisibility }: Result
|
|
|
209
269
|
});
|
|
210
270
|
|
|
211
271
|
return (
|
|
212
|
-
<table
|
|
272
|
+
<table
|
|
273
|
+
style={{
|
|
274
|
+
wordBreak,
|
|
275
|
+
}}
|
|
276
|
+
>
|
|
213
277
|
<thead>
|
|
214
278
|
{reactTable.getHeaderGroups().map((headerGroup) => (
|
|
215
|
-
<tr key={headerGroup.id}>
|
|
216
|
-
{headerGroup.headers.map((header) =>
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
key: header.id,
|
|
220
|
-
colSpan: header.colSpan,
|
|
221
|
-
style: {
|
|
222
|
-
width: header.getSize(),
|
|
223
|
-
},
|
|
224
|
-
}}
|
|
225
|
-
>
|
|
226
|
-
{header.isPlaceholder
|
|
227
|
-
? null
|
|
228
|
-
: flexRender(header.column.columnDef.header, header.getContext())}
|
|
229
|
-
<div
|
|
279
|
+
<tr key={headerGroup.id} className="header">
|
|
280
|
+
{headerGroup.headers.map((header) => {
|
|
281
|
+
return (
|
|
282
|
+
<th
|
|
230
283
|
{...{
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
284
|
+
key: header.id,
|
|
285
|
+
colSpan: header.colSpan,
|
|
286
|
+
style: {
|
|
287
|
+
width: header.getSize(),
|
|
288
|
+
},
|
|
234
289
|
}}
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
290
|
+
>
|
|
291
|
+
{header.isPlaceholder
|
|
292
|
+
? null
|
|
293
|
+
: flexRender(header.column.columnDef.header, header.getContext())}
|
|
294
|
+
<div
|
|
295
|
+
{...{
|
|
296
|
+
onMouseDown: header.getResizeHandler(),
|
|
297
|
+
onTouchStart: header.getResizeHandler(),
|
|
298
|
+
className: `resizer ${header.column.getIsResizing() ? 'isResizing' : ''}`,
|
|
299
|
+
}}
|
|
300
|
+
/>
|
|
301
|
+
</th>
|
|
302
|
+
);
|
|
303
|
+
})}
|
|
238
304
|
</tr>
|
|
239
305
|
))}
|
|
240
306
|
</thead>
|
|
241
307
|
<tbody>
|
|
242
|
-
{reactTable.getRowModel().rows.map((row) =>
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
308
|
+
{reactTable.getRowModel().rows.map((row, rowIndex) => {
|
|
309
|
+
let colBorderDrawn = false;
|
|
310
|
+
return (
|
|
311
|
+
<tr key={row.id}>
|
|
312
|
+
{row.getVisibleCells().map((cell) => {
|
|
313
|
+
const isVariableCol = cell.column.id.startsWith('Variable');
|
|
314
|
+
const shouldDrawColBorder = !isVariableCol && !colBorderDrawn;
|
|
315
|
+
if (shouldDrawColBorder) {
|
|
316
|
+
colBorderDrawn = true;
|
|
317
|
+
}
|
|
318
|
+
const shouldDrawRowBorder = rowIndex === 0 && !isVariableCol;
|
|
319
|
+
return (
|
|
320
|
+
<td
|
|
321
|
+
{...{
|
|
322
|
+
key: cell.id,
|
|
323
|
+
style: {
|
|
324
|
+
width: cell.column.getSize(),
|
|
325
|
+
},
|
|
326
|
+
className: `${isVariableCol ? 'variable' : ''} ${
|
|
327
|
+
shouldDrawRowBorder ? 'first-prompt-row' : ''
|
|
328
|
+
} ${shouldDrawColBorder ? 'first-prompt-col' : ''}`,
|
|
329
|
+
}}
|
|
330
|
+
>
|
|
331
|
+
{flexRender(cell.column.columnDef.cell, cell.getContext())}
|
|
332
|
+
</td>
|
|
333
|
+
);
|
|
334
|
+
})}
|
|
335
|
+
</tr>
|
|
336
|
+
);
|
|
337
|
+
})}
|
|
258
338
|
</tbody>
|
|
259
339
|
</table>
|
|
260
340
|
);
|
|
@@ -2,22 +2,32 @@ import * as React from 'react';
|
|
|
2
2
|
|
|
3
3
|
import invariant from 'tiny-invariant';
|
|
4
4
|
import Box from '@mui/material/Box';
|
|
5
|
-
import
|
|
6
|
-
import Stack from '@mui/material/Stack';
|
|
7
|
-
import Slider from '@mui/material/Slider';
|
|
8
|
-
import Typography from '@mui/material/Typography';
|
|
9
|
-
import OutlinedInput from '@mui/material/OutlinedInput';
|
|
10
|
-
import InputLabel from '@mui/material/InputLabel';
|
|
11
|
-
import MenuItem from '@mui/material/MenuItem';
|
|
5
|
+
import Checkbox from '@mui/material/Checkbox';
|
|
12
6
|
import FormControl from '@mui/material/FormControl';
|
|
7
|
+
import FormControlLabel from '@mui/material/FormControlLabel';
|
|
8
|
+
import InputLabel from '@mui/material/InputLabel';
|
|
13
9
|
import ListItemText from '@mui/material/ListItemText';
|
|
10
|
+
import MenuItem from '@mui/material/MenuItem';
|
|
11
|
+
import OutlinedInput from '@mui/material/OutlinedInput';
|
|
12
|
+
import Paper from '@mui/material/Box';
|
|
14
13
|
import Select, { SelectChangeEvent } from '@mui/material/Select';
|
|
15
|
-
import
|
|
14
|
+
import Slider from '@mui/material/Slider';
|
|
15
|
+
import Stack from '@mui/material/Stack';
|
|
16
|
+
import Tooltip from '@mui/material/Tooltip';
|
|
17
|
+
import Typography from '@mui/material/Typography';
|
|
18
|
+
import { styled } from '@mui/system';
|
|
16
19
|
|
|
17
20
|
import ResultsTable from './ResultsTable.js';
|
|
18
21
|
import { useStore } from './store.js';
|
|
19
22
|
|
|
20
23
|
import type { VisibilityState } from '@tanstack/table-core';
|
|
24
|
+
import { FilterMode } from './types.js';
|
|
25
|
+
|
|
26
|
+
const ResponsiveStack = styled(Stack)(({ theme }) => ({
|
|
27
|
+
[theme.breakpoints.down('sm')]: {
|
|
28
|
+
flexDirection: 'column',
|
|
29
|
+
},
|
|
30
|
+
}));
|
|
21
31
|
|
|
22
32
|
export default function ResultsView() {
|
|
23
33
|
const { table } = useStore();
|
|
@@ -25,6 +35,29 @@ export default function ResultsView() {
|
|
|
25
35
|
const [columnVisibility, setColumnVisibility] = React.useState<VisibilityState>({});
|
|
26
36
|
const [selectedColumns, setSelectedColumns] = React.useState<string[]>([]);
|
|
27
37
|
|
|
38
|
+
const [failureFilter, setFailureFilter] = React.useState<{ [key: string]: boolean }>({});
|
|
39
|
+
const handleFailureFilterToggle = (columnId: string, checked: boolean) => {
|
|
40
|
+
setFailureFilter((prevFailureFilter) => ({ ...prevFailureFilter, [columnId]: checked }));
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
const [filterMode, setFilterMode] = React.useState<FilterMode>('all');
|
|
44
|
+
const handleFilterModeChange = (event: SelectChangeEvent<unknown>) => {
|
|
45
|
+
const mode = event.target.value as FilterMode;
|
|
46
|
+
setFilterMode(mode);
|
|
47
|
+
|
|
48
|
+
const newFailureFilter: { [key: string]: boolean } = {};
|
|
49
|
+
head.prompts.forEach((_, idx) => {
|
|
50
|
+
const columnId = `Prompt ${idx + 1}`;
|
|
51
|
+
newFailureFilter[columnId] = mode === 'failures';
|
|
52
|
+
});
|
|
53
|
+
setFailureFilter(newFailureFilter);
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
const [wordBreak, setWordBreak] = React.useState<'break-word' | 'break-all'>('break-all');
|
|
57
|
+
const handleWordBreakChange = (event: React.ChangeEvent<HTMLInputElement>) => {
|
|
58
|
+
setWordBreak(event.target.checked ? 'break-all' : 'break-word');
|
|
59
|
+
};
|
|
60
|
+
|
|
28
61
|
invariant(table, 'Table data must be loaded before rendering ResultsView');
|
|
29
62
|
const { head } = table;
|
|
30
63
|
|
|
@@ -35,8 +68,8 @@ export default function ResultsView() {
|
|
|
35
68
|
setSelectedColumns(typeof value === 'string' ? value.split(',') : value);
|
|
36
69
|
|
|
37
70
|
const allColumns = [
|
|
38
|
-
...head.prompts.map((_, idx) => `Prompt ${idx + 1}`),
|
|
39
71
|
...head.vars.map((_, idx) => `Variable ${idx + 1}`),
|
|
72
|
+
...head.prompts.map((_, idx) => `Prompt ${idx + 1}`),
|
|
40
73
|
];
|
|
41
74
|
const newColumnVisibility: VisibilityState = {};
|
|
42
75
|
allColumns.forEach((col) => {
|
|
@@ -48,30 +81,30 @@ export default function ResultsView() {
|
|
|
48
81
|
};
|
|
49
82
|
|
|
50
83
|
const columnData = [
|
|
51
|
-
...head.prompts.map((_, idx) => ({
|
|
52
|
-
value: `Prompt ${idx + 1}`,
|
|
53
|
-
label: `Prompt ${idx + 1}`,
|
|
54
|
-
group: 'Prompts',
|
|
55
|
-
})),
|
|
56
84
|
...head.vars.map((_, idx) => ({
|
|
57
85
|
value: `Variable ${idx + 1}`,
|
|
58
86
|
label: `Variable ${idx + 1}`,
|
|
59
87
|
group: 'Variables',
|
|
60
88
|
})),
|
|
89
|
+
...head.prompts.map((_, idx) => ({
|
|
90
|
+
value: `Prompt ${idx + 1}`,
|
|
91
|
+
label: `Prompt ${idx + 1}`,
|
|
92
|
+
group: 'Prompts',
|
|
93
|
+
})),
|
|
61
94
|
];
|
|
62
95
|
|
|
63
96
|
// Set all columns as selected by default
|
|
64
97
|
React.useEffect(() => {
|
|
65
98
|
setSelectedColumns([
|
|
66
|
-
...head.prompts.map((_, idx) => `Prompt ${idx + 1}`),
|
|
67
99
|
...head.vars.map((_, idx) => `Variable ${idx + 1}`),
|
|
100
|
+
...head.prompts.map((_, idx) => `Prompt ${idx + 1}`),
|
|
68
101
|
]);
|
|
69
102
|
}, [head]);
|
|
70
103
|
|
|
71
104
|
return (
|
|
72
105
|
<div>
|
|
73
106
|
<Paper py="md">
|
|
74
|
-
<
|
|
107
|
+
<ResponsiveStack direction="row" spacing={8} alignItems="center">
|
|
75
108
|
<Box>
|
|
76
109
|
<FormControl sx={{ m: 1, minWidth: 300 }} size="small">
|
|
77
110
|
<InputLabel id="visible-columns-label">Visible columns</InputLabel>
|
|
@@ -93,6 +126,21 @@ export default function ResultsView() {
|
|
|
93
126
|
</Select>
|
|
94
127
|
</FormControl>
|
|
95
128
|
</Box>
|
|
129
|
+
<Box>
|
|
130
|
+
<FormControl sx={{ minWidth: 180 }} size="small">
|
|
131
|
+
<InputLabel id="failure-filter-mode-label">Filter</InputLabel>
|
|
132
|
+
<Select
|
|
133
|
+
labelId="filter-mode-label"
|
|
134
|
+
id="filter-mode"
|
|
135
|
+
value={filterMode}
|
|
136
|
+
onChange={handleFilterModeChange}
|
|
137
|
+
label="Filter"
|
|
138
|
+
>
|
|
139
|
+
<MenuItem value="all">Show all results</MenuItem>
|
|
140
|
+
<MenuItem value="failures">Show only failures</MenuItem>
|
|
141
|
+
</Select>
|
|
142
|
+
</FormControl>
|
|
143
|
+
</Box>
|
|
96
144
|
<Box>
|
|
97
145
|
<Typography mt={2}>Max text length: {maxTextLength}</Typography>
|
|
98
146
|
<Slider
|
|
@@ -102,9 +150,26 @@ export default function ResultsView() {
|
|
|
102
150
|
onChange={(_, val: number | number[]) => setMaxTextLength(val as number)}
|
|
103
151
|
/>
|
|
104
152
|
</Box>
|
|
105
|
-
|
|
153
|
+
<Box>
|
|
154
|
+
<Tooltip title="Forcing line breaks makes it easier to adjust column widths to your liking">
|
|
155
|
+
<FormControlLabel
|
|
156
|
+
control={
|
|
157
|
+
<Checkbox checked={wordBreak === 'break-all'} onChange={handleWordBreakChange} />
|
|
158
|
+
}
|
|
159
|
+
label="Force line breaks"
|
|
160
|
+
/>
|
|
161
|
+
</Tooltip>
|
|
162
|
+
</Box>
|
|
163
|
+
</ResponsiveStack>
|
|
106
164
|
</Paper>
|
|
107
|
-
<ResultsTable
|
|
165
|
+
<ResultsTable
|
|
166
|
+
maxTextLength={maxTextLength}
|
|
167
|
+
columnVisibility={columnVisibility}
|
|
168
|
+
wordBreak={wordBreak}
|
|
169
|
+
filterMode={filterMode}
|
|
170
|
+
failureFilter={failureFilter}
|
|
171
|
+
onFailureFilterToggle={handleFailureFilterToggle}
|
|
172
|
+
/>
|
|
108
173
|
</div>
|
|
109
174
|
);
|
|
110
175
|
}
|
|
@@ -15,6 +15,9 @@
|
|
|
15
15
|
--pass-color: green;
|
|
16
16
|
--fail-color: #ad0000;
|
|
17
17
|
--smalltext-color: gray;
|
|
18
|
+
--success-background-color: #d1ffd7;
|
|
19
|
+
--variable-background-color: #f7f7f7;
|
|
20
|
+
--header-background-color: #fffdf7;
|
|
18
21
|
}
|
|
19
22
|
|
|
20
23
|
/* Dark mode colors */
|
|
@@ -38,6 +41,9 @@
|
|
|
38
41
|
--pass-color: #4caf50;
|
|
39
42
|
--fail-color: #f44336;
|
|
40
43
|
--smalltext-color: #888888;
|
|
44
|
+
--success-background-color: #216d2b;
|
|
45
|
+
--variable-background-color: #333;
|
|
46
|
+
--header-background-color: #333;
|
|
41
47
|
}
|
|
42
48
|
|
|
43
49
|
html {
|
package/src/web/server.ts
CHANGED
|
@@ -10,9 +10,9 @@ import opener from 'opener';
|
|
|
10
10
|
import { Server as SocketIOServer } from 'socket.io';
|
|
11
11
|
|
|
12
12
|
import promptfoo from '../index.js';
|
|
13
|
-
import logger from '../logger
|
|
14
|
-
import { getDirectory } from '../esm
|
|
15
|
-
import { getLatestResultsPath } from '../util
|
|
13
|
+
import logger from '../logger';
|
|
14
|
+
import { getDirectory } from '../esm';
|
|
15
|
+
import { getLatestResultsPath } from '../util';
|
|
16
16
|
|
|
17
17
|
import type { Request, Response } from 'express';
|
|
18
18
|
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
:root{font-family:system-ui,Avenir,Helvetica,Arial,sans-serif;font-synthesis:none;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;-webkit-text-size-adjust:100%;--background-color: #ffffff;--text-color: #404040;--border-color: lightgray;--table-border-color: lightgray;--pass-color: green;--fail-color: #ad0000;--smalltext-color: gray}@media (prefers-color-scheme: dark){:root{--background-color: #1a1a1a;--text-color: #f0f0f0;--border-color: #444444;--table-border-color: #444444;--pass-color: #4caf50;--fail-color: #f44336;--smalltext-color: #888888}}[data-theme=dark]{--background-color: #1a1a1a;--text-color: #f0f0f0;--border-color: #444444;--table-border-color: #444444;--pass-color: #4caf50;--fail-color: #f44336;--smalltext-color: #888888}html{font-size:calc(14px + (18 - 14) * ((100vw - 300px) / (1600 - 300)))}*{box-sizing:border-box}html{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol;font-size:16px;background-color:var(--background-color);color:var(--text-color)}table,.divTable{border:1px solid var(--table-border-color);border-collapse:collapse;width:100%;margin:1rem 0;box-shadow:0 2px 4px #0000001a}.tr{display:flex}tr,.tr{width:fit-content}tr:hover,.tr:hover{background-color:#0000000d}th,.th,td,.td{position:relative;box-shadow:inset 0 0 0 1px var(--border-color);word-break:break-all;vertical-align:top;padding:1.5rem}th,.th{padding:1rem;position:relative;text-align:center;font-weight:semi-bold}tr .cell-rating{visibility:hidden;position:absolute;bottom:1.25rem;right:-1rem;line-height:0;font-size:1.75rem}tr:hover .cell-rating{visibility:visible}tr .cell-rating .rating{cursor:pointer;margin-right:1rem}th .smalltext{visibility:hidden;font-weight:400;font-size:.75rem;color:var(--smalltext-color)}th:hover .smalltext{visibility:visible}td .status{margin-bottom:.5rem;font-weight:700}td .pass{color:var(--pass-color)}td .fail{color:var(--fail-color)}.resizer{position:absolute;right:0;top:0;height:100%;width:5px;cursor:col-resize;user-select:none;touch-action:none;background:var(--text-color);opacity:.5}.resizer.isResizing{background:var(--text-color);opacity:1}@media (hover: hover){.resizer{opacity:0}*:hover>.resizer{opacity:1}}.logo{display:flex;align-items:center;gap:4px}.logo img{width:30px}.logo span{margin-bottom:6px;color:var(--text-color)}[data-theme=dark] .logo img{filter:invert(1)}nav{display:flex;justify-content:space-between;align-items:center;margin-bottom:1rem;color:var(--text-color)}.dark-mode-toggle{background-color:transparent;border:none;color:var(--text-color);cursor:pointer;font-size:16px;padding:8px;transition:color .3s}.dark-mode-toggle:hover{color:var(--pass-color)}body{background-color:var(--background-color);color:var(--text-color)}
|