promptfoo 0.19.3 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.md +2 -1
  2. package/dist/package.json +2 -1
  3. package/dist/src/assertions.d.ts.map +1 -1
  4. package/dist/src/assertions.js +26 -4
  5. package/dist/src/assertions.js.map +1 -1
  6. package/dist/src/types.d.ts +1 -1
  7. package/dist/src/types.d.ts.map +1 -1
  8. package/dist/src/web/nextui/404/index.html +1 -1
  9. package/dist/src/web/nextui/404.html +1 -1
  10. package/dist/src/web/nextui/_next/static/US6gOx8LHTX_Hzm9aYNrC/_buildManifest.js +1 -0
  11. package/dist/src/web/nextui/_next/static/chunks/339-4fc8a80fa840e771.js +1 -0
  12. package/dist/src/web/nextui/_next/static/chunks/373-8a280796c0f2d1af.js +1 -0
  13. package/dist/src/web/nextui/_next/static/chunks/583-125d32af505e9bc4.js +1 -0
  14. package/dist/src/web/nextui/_next/static/chunks/596-07e4a23a5c6cdf04.js +25 -0
  15. package/dist/src/web/nextui/_next/static/chunks/658-a62210d07dc4dcb6.js +15 -0
  16. package/dist/src/web/nextui/_next/static/chunks/707-699cbd84b259c37b.js +37 -0
  17. package/dist/src/web/nextui/_next/static/chunks/858-ceb6fa22e614492b.js +125 -0
  18. package/dist/src/web/nextui/_next/static/chunks/891-3000ea7c0a292558.js +1 -0
  19. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-50e40614fa05600e.js +1 -0
  20. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-c19c44ed1b2dfb58.js +1 -0
  21. package/dist/src/web/nextui/_next/static/chunks/app/eval/page-d4a1813b2f8c4532.js +1 -0
  22. package/dist/src/web/nextui/_next/static/chunks/app/layout-664a8d716d2d24b1.js +1 -0
  23. package/dist/src/web/nextui/_next/static/chunks/app/page-1f8ef6a00a2355f0.js +1 -0
  24. package/dist/src/web/nextui/_next/static/chunks/app/setup/page-182018a3c6397345.js +1 -0
  25. package/dist/src/web/nextui/_next/static/chunks/ca377847-cb6ae6a6a073aebb.js +6 -0
  26. package/dist/src/web/nextui/_next/static/chunks/{fd9d1056-d8847af536b5787b.js → fd9d1056-ac777be631f5a9e9.js} +1 -1
  27. package/dist/src/web/nextui/_next/static/chunks/framework-43665103d101a22d.js +25 -0
  28. package/dist/src/web/nextui/_next/static/chunks/main-50cc0a98559591ce.js +1 -0
  29. package/dist/src/web/nextui/_next/static/chunks/main-app-c9dc13756d166550.js +1 -0
  30. package/dist/src/web/nextui/_next/static/chunks/pages/_app-6b79a29ad0d63b21.js +1 -0
  31. package/dist/src/web/nextui/_next/static/chunks/pages/_error-9aeb3e4d490fe4b8.js +1 -0
  32. package/dist/src/web/nextui/_next/static/chunks/{webpack-a886dd767c2e76b7.js → webpack-6e474e42be502dd7.js} +1 -1
  33. package/dist/src/web/nextui/eval/index.html +1 -1
  34. package/dist/src/web/nextui/eval/index.txt +5 -5
  35. package/dist/src/web/nextui/index.html +1 -1
  36. package/dist/src/web/nextui/index.txt +4 -4
  37. package/dist/src/web/nextui/setup/index.html +1 -1
  38. package/dist/src/web/nextui/setup/index.txt +6 -6
  39. package/package.json +2 -1
  40. package/src/assertions.ts +33 -4
  41. package/src/types.ts +2 -1
  42. package/src/web/nextui/package-lock.json +29 -0
  43. package/src/web/nextui/package.json +2 -0
  44. package/src/web/nextui/src/app/eval/ResultsCharts.tsx +355 -0
  45. package/src/web/nextui/src/app/eval/ResultsView.tsx +2 -0
  46. package/dist/src/web/nextui/_next/static/chunks/121-54cee610700b4756.js +0 -27
  47. package/dist/src/web/nextui/_next/static/chunks/339-501c32916b785ef1.js +0 -1
  48. package/dist/src/web/nextui/_next/static/chunks/373-6a411db0b05027d3.js +0 -1
  49. package/dist/src/web/nextui/_next/static/chunks/583-507e6d8883bb85ff.js +0 -1
  50. package/dist/src/web/nextui/_next/static/chunks/596-9c29c47b8dee7a50.js +0 -25
  51. package/dist/src/web/nextui/_next/static/chunks/658-f8f9d18540505edc.js +0 -15
  52. package/dist/src/web/nextui/_next/static/chunks/858-7255df6dbc44dff9.js +0 -125
  53. package/dist/src/web/nextui/_next/static/chunks/97-64e11ce2b0607459.js +0 -1
  54. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-366629541fd598e9.js +0 -1
  55. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-655bc42ac68b25cc.js +0 -1
  56. package/dist/src/web/nextui/_next/static/chunks/app/eval/page-d5e8697859d6294e.js +0 -1
  57. package/dist/src/web/nextui/_next/static/chunks/app/layout-4c714b1a5a3a768d.js +0 -1
  58. package/dist/src/web/nextui/_next/static/chunks/app/page-4fe8a6342d24ca23.js +0 -1
  59. package/dist/src/web/nextui/_next/static/chunks/app/setup/page-cd35686fe6c12be8.js +0 -1
  60. package/dist/src/web/nextui/_next/static/chunks/framework-8883d1e9be70c3da.js +0 -25
  61. package/dist/src/web/nextui/_next/static/chunks/main-0670de04b1c026b4.js +0 -1
  62. package/dist/src/web/nextui/_next/static/chunks/main-app-581ccf0003955b21.js +0 -1
  63. package/dist/src/web/nextui/_next/static/chunks/pages/_app-52924524f99094ab.js +0 -1
  64. package/dist/src/web/nextui/_next/static/chunks/pages/_error-c92d5c4bb2b49926.js +0 -1
  65. package/dist/src/web/nextui/_next/static/eCTjsASjQCuaN3ajMqfGS/_buildManifest.js +0 -1
  66. /package/dist/src/web/nextui/_next/static/{eCTjsASjQCuaN3ajMqfGS → US6gOx8LHTX_Hzm9aYNrC}/_ssgManifest.js +0 -0
@@ -0,0 +1,355 @@
1
+ import React, { useRef, useEffect, useState } from 'react';
2
+ import {
3
+ Chart,
4
+ BarController,
5
+ LineController,
6
+ ScatterController,
7
+ CategoryScale,
8
+ LinearScale,
9
+ BarElement,
10
+ LineElement,
11
+ PointElement,
12
+ Tooltip,
13
+ Colors,
14
+ } from 'chart.js';
15
+ import Select from '@mui/material/Select';
16
+ import MenuItem from '@mui/material/MenuItem';
17
+ import FormControl from '@mui/material/FormControl';
18
+ import Dialog from '@mui/material/Dialog';
19
+ import DialogTitle from '@mui/material/DialogTitle';
20
+ import DialogContent from '@mui/material/DialogContent';
21
+ import { useTheme } from '@mui/material/styles';
22
+ import Paper from '@mui/material/Paper';
23
+ import IconButton from '@mui/material/IconButton';
24
+ import CloseIcon from '@mui/icons-material/Close';
25
+ import { ErrorBoundary } from 'react-error-boundary';
26
+
27
+ import { useStore } from './store';
28
+
29
+ import type { VisibilityState } from '@tanstack/table-core';
30
+ import type { EvalTable } from './types';
31
+
32
+ interface ResultsChartsProps {
33
+ columnVisibility: VisibilityState;
34
+ }
35
+
36
+ interface ChartProps {
37
+ table: EvalTable;
38
+ }
39
+
40
+ const COLOR_PALETTE = [
41
+ '#fd7f6f',
42
+ '#7eb0d5',
43
+ '#b2e061',
44
+ '#bd7ebe',
45
+ '#ffb55a',
46
+ '#ffee65',
47
+ '#beb9db',
48
+ '#fdcce5',
49
+ '#8bd3c7',
50
+ ];
51
+
52
+ Chart.register(
53
+ BarController,
54
+ LineController,
55
+ ScatterController,
56
+ CategoryScale,
57
+ LinearScale,
58
+ BarElement,
59
+ LineElement,
60
+ PointElement,
61
+ Tooltip,
62
+ Colors,
63
+ );
64
+
65
+ function HistogramChart({ table }: ChartProps) {
66
+ const histogramCanvasRef = useRef(null);
67
+ const histogramChartInstance = useRef<Chart | null>(null);
68
+
69
+ useEffect(() => {
70
+ if (!histogramCanvasRef.current) {
71
+ return;
72
+ }
73
+
74
+ if (histogramChartInstance.current) {
75
+ histogramChartInstance.current.destroy();
76
+ }
77
+
78
+ // Calculate bins and their counts
79
+ const scores = table.body.flatMap((row) => row.outputs.map((output) => output.score));
80
+ const maxScore = Math.max(...scores);
81
+ const minScore = Math.min(...scores);
82
+ const range = Math.ceil(maxScore) - Math.floor(minScore); // Adjust the range to be between whole numbers
83
+ const binSize = range / 10; // Define the size of each bin
84
+ const bins = Array.from({ length: 11 }, (_, i) =>
85
+ parseFloat((Math.floor(minScore) + i * binSize).toFixed(2)),
86
+ );
87
+
88
+ const datasets = table.head.prompts.map((prompt, promptIdx) => {
89
+ const scores = table.body.flatMap((row) => row.outputs[promptIdx].score);
90
+ const counts = bins.map(
91
+ (bin) => scores.filter((score) => score >= bin && score < bin + binSize).length,
92
+ );
93
+ return {
94
+ label: `Prompt ${promptIdx + 1}`,
95
+ data: counts,
96
+ backgroundColor: COLOR_PALETTE[promptIdx % COLOR_PALETTE.length],
97
+ };
98
+ });
99
+
100
+ histogramChartInstance.current = new Chart(histogramCanvasRef.current, {
101
+ type: 'bar',
102
+ data: {
103
+ labels: bins,
104
+ datasets,
105
+ },
106
+ options: {
107
+ animation: false,
108
+ plugins: {
109
+ title: {
110
+ display: true,
111
+ text: 'Score Distribution',
112
+ },
113
+ legend: {
114
+ display: false,
115
+ },
116
+ tooltip: {
117
+ callbacks: {
118
+ title: function (context) {
119
+ const datasetIndex = context[0].datasetIndex;
120
+ return `Prompt ${datasetIndex + 1}`;
121
+ },
122
+ label: function (context) {
123
+ const labelIndex = context.dataIndex;
124
+ const lowerBound = bins[labelIndex];
125
+ const upperBound = bins[labelIndex + 1];
126
+ return `${lowerBound} <= score < ${upperBound}`;
127
+ },
128
+ },
129
+ },
130
+ },
131
+ },
132
+ });
133
+ }, [table]);
134
+
135
+ return <canvas ref={histogramCanvasRef} style={{ maxHeight: '300px' }}></canvas>;
136
+ }
137
+
138
+ function PassRateChart({ table }: ChartProps) {
139
+ const passRateCanvasRef = useRef(null);
140
+ const passRateChartInstance = useRef<Chart | null>(null);
141
+
142
+ useEffect(() => {
143
+ if (!passRateCanvasRef.current) {
144
+ return;
145
+ }
146
+
147
+ if (passRateChartInstance.current) {
148
+ passRateChartInstance.current.destroy();
149
+ }
150
+
151
+ const datasets = table.head.prompts.map((prompt, promptIdx) => {
152
+ const outputs = table.body.flatMap((row) => row.outputs[promptIdx]);
153
+ const passCount = outputs.filter((output) => output.pass).length;
154
+ const passRate = (passCount / outputs.length) * 100;
155
+ return {
156
+ label: `Prompt ${promptIdx + 1}`,
157
+ data: [passRate],
158
+ backgroundColor: COLOR_PALETTE[promptIdx % COLOR_PALETTE.length],
159
+ };
160
+ });
161
+
162
+ passRateChartInstance.current = new Chart(passRateCanvasRef.current, {
163
+ type: 'bar',
164
+ data: {
165
+ labels: ['Pass Rate (%)'],
166
+ datasets,
167
+ },
168
+ options: {
169
+ animation: false,
170
+ plugins: {
171
+ title: {
172
+ display: true,
173
+ text: 'Pass rate',
174
+ },
175
+ legend: {
176
+ display: true,
177
+ },
178
+ },
179
+ },
180
+ });
181
+ }, [table]);
182
+
183
+ return <canvas ref={passRateCanvasRef} style={{ maxHeight: '300px' }}></canvas>;
184
+ }
185
+
186
+ function ScatterChart({ table }: ChartProps) {
187
+ const scatterCanvasRef = useRef(null);
188
+ const scatterChartInstance = useRef<Chart | null>(null);
189
+ const [xAxisPrompt, setXAxisPrompt] = useState(0);
190
+ const [yAxisPrompt, setYAxisPrompt] = useState(1);
191
+ const [open, setOpen] = useState(false);
192
+
193
+ useEffect(() => {
194
+ if (!scatterCanvasRef.current) {
195
+ return;
196
+ }
197
+
198
+ if (scatterChartInstance.current) {
199
+ scatterChartInstance.current.destroy();
200
+ }
201
+
202
+ const scores = table.body.flatMap((row) => row.outputs.map((output) => output.score));
203
+ const minScore = Math.min(...scores);
204
+ const maxScore = Math.max(...scores);
205
+
206
+ const data = table.body.map((row) => {
207
+ const prompt1Score = row.outputs[xAxisPrompt].score;
208
+ const prompt2Score = row.outputs[yAxisPrompt].score;
209
+ let backgroundColor;
210
+ if (prompt2Score > prompt1Score) {
211
+ backgroundColor = 'green';
212
+ } else if (prompt2Score < prompt1Score) {
213
+ backgroundColor = 'red';
214
+ } else {
215
+ backgroundColor = 'gray';
216
+ }
217
+ return {
218
+ x: prompt1Score,
219
+ y: prompt2Score,
220
+ backgroundColor,
221
+ };
222
+ });
223
+
224
+ scatterChartInstance.current = new Chart(scatterCanvasRef.current, {
225
+ type: 'scatter',
226
+ data: {
227
+ datasets: [
228
+ {
229
+ data,
230
+ backgroundColor: data.map((point) => point.backgroundColor),
231
+ },
232
+ {
233
+ type: 'line',
234
+ data: [
235
+ // @ts-ignore: types seem wrong, it wants backgroundColor
236
+ { x: minScore, y: minScore },
237
+ // @ts-ignore: types seem wrong, it wants backgroundColor
238
+ { x: maxScore, y: maxScore },
239
+ ],
240
+ borderColor: 'gray',
241
+ borderWidth: 1,
242
+ borderDash: [5, 5],
243
+ pointRadius: 0,
244
+ },
245
+ ],
246
+ },
247
+ options: {
248
+ animation: false,
249
+ plugins: {
250
+ legend: {
251
+ display: false,
252
+ },
253
+ tooltip: {
254
+ callbacks: {
255
+ label: function (tooltipItem) {
256
+ const row = table.body[tooltipItem.dataIndex];
257
+ let prompt1Text = row.outputs[0].text;
258
+ let prompt2Text = row.outputs[1].text;
259
+ if (prompt1Text.length > 30) {
260
+ prompt1Text = prompt1Text.substring(0, 30) + '...';
261
+ }
262
+ if (prompt2Text.length > 30) {
263
+ prompt2Text = prompt2Text.substring(0, 30) + '...';
264
+ }
265
+ return `Output 1: ${prompt1Text}\nOutput 2: ${prompt2Text}`;
266
+ },
267
+ },
268
+ },
269
+ },
270
+ scales: {
271
+ x: {
272
+ title: {
273
+ display: true,
274
+ text: `Prompt ${xAxisPrompt + 1} Score`,
275
+ },
276
+ },
277
+ y: {
278
+ title: {
279
+ display: true,
280
+ text: `Prompt ${yAxisPrompt + 1} Score`,
281
+ },
282
+ },
283
+ },
284
+ },
285
+ });
286
+ }, [table, xAxisPrompt, yAxisPrompt]);
287
+
288
+ return (
289
+ <>
290
+ <Dialog open={open} onClose={() => setOpen(false)}>
291
+ <DialogTitle>Compare prompt outputs</DialogTitle>
292
+ <DialogContent>
293
+ <FormControl sx={{ m: 1, minWidth: 120 }}>
294
+ <Select value={xAxisPrompt} onChange={(e) => setXAxisPrompt(Number(e.target.value))}>
295
+ {table.head.prompts.map((prompt, idx) => (
296
+ <MenuItem key={idx} value={idx}>
297
+ Prompt {idx + 1}
298
+ </MenuItem>
299
+ ))}
300
+ </Select>
301
+ </FormControl>
302
+ <FormControl sx={{ m: 1, minWidth: 120 }}>
303
+ <Select value={yAxisPrompt} onChange={(e) => setYAxisPrompt(Number(e.target.value))}>
304
+ {table.head.prompts.map((prompt, idx) => (
305
+ <MenuItem key={idx} value={idx}>
306
+ Prompt {idx + 1}
307
+ </MenuItem>
308
+ ))}
309
+ </Select>
310
+ </FormControl>
311
+ </DialogContent>
312
+ </Dialog>
313
+ <canvas
314
+ ref={scatterCanvasRef}
315
+ style={{ maxHeight: '300px', cursor: 'pointer' }}
316
+ onClick={() => setOpen(true)}
317
+ ></canvas>
318
+ </>
319
+ );
320
+ }
321
+
322
+ export default function ResultsCharts({ columnVisibility }: ResultsChartsProps) {
323
+ const theme = useTheme();
324
+ Chart.defaults.color = theme.palette.mode === 'dark' ? '#aaa' : '#666';
325
+ const [showCharts, setShowCharts] = useState(true);
326
+
327
+ const { table } = useStore();
328
+ if (!table || !showCharts) {
329
+ return null;
330
+ }
331
+
332
+ return (
333
+ <ErrorBoundary fallback={null}>
334
+ <Paper style={{ position: 'relative', padding: theme.spacing(3) }}>
335
+ <IconButton
336
+ style={{ position: 'absolute', right: 0, top: 0 }}
337
+ onClick={() => setShowCharts(false)}
338
+ >
339
+ <CloseIcon />
340
+ </IconButton>
341
+ <div style={{ display: 'flex', justifyContent: 'space-between', width: '100%' }}>
342
+ <div style={{ width: '33%' }}>
343
+ <PassRateChart table={table} />
344
+ </div>
345
+ <div style={{ width: '33%' }}>
346
+ <HistogramChart table={table} />
347
+ </div>
348
+ <div style={{ width: '33%' }}>
349
+ <ScatterChart table={table} />
350
+ </div>
351
+ </div>
352
+ </Paper>
353
+ </ErrorBoundary>
354
+ );
355
+ }
@@ -21,6 +21,7 @@ import ShareIcon from '@mui/icons-material/Share';
21
21
  import VisibilityIcon from '@mui/icons-material/Visibility';
22
22
  import { styled } from '@mui/system';
23
23
 
24
+ import ResultsCharts from './ResultsCharts';
24
25
  import ResultsTable from './ResultsTable';
25
26
  import ConfigModal from './ConfigModal';
26
27
  import ShareModal from './ShareModal';
@@ -280,6 +281,7 @@ export default function ResultsView({ recentFiles, onRecentFileSelected }: Resul
280
281
  </Box>
281
282
  </ResponsiveStack>
282
283
  </Paper>
284
+ <ResultsCharts columnVisibility={columnVisibility} />
283
285
  <ResultsTable
284
286
  maxTextLength={maxTextLength}
285
287
  columnVisibility={columnVisibility}