promptfoo 0.8.3 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/assertions.d.ts +3 -3
- package/dist/assertions.d.ts.map +1 -1
- package/dist/assertions.js +11 -12
- package/dist/assertions.js.map +1 -1
- package/dist/cache.d.ts.map +1 -1
- package/dist/cache.js +9 -9
- package/dist/cache.js.map +1 -1
- package/dist/evaluator.d.ts +1 -1
- package/dist/evaluator.d.ts.map +1 -1
- package/dist/evaluator.js +60 -34
- package/dist/evaluator.js.map +1 -1
- package/dist/index.d.ts +10 -10
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +18 -14
- package/dist/index.js.map +1 -1
- package/dist/main.js +41 -40
- package/dist/main.js.map +1 -1
- package/dist/providers/localai.js +11 -11
- package/dist/providers/localai.js.map +1 -1
- package/dist/providers/openai.d.ts.map +1 -1
- package/dist/providers/openai.js +30 -21
- package/dist/providers/openai.js.map +1 -1
- package/dist/providers.d.ts +3 -3
- package/dist/providers.d.ts.map +1 -1
- package/dist/providers.js +15 -15
- package/dist/providers.js.map +1 -1
- package/dist/types.d.ts +5 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/util.d.ts +2 -2
- package/dist/util.d.ts.map +1 -1
- package/dist/util.js +43 -15
- package/dist/util.js.map +1 -1
- package/dist/web/client/assets/index-9a9ba400.css +1 -0
- package/dist/web/client/assets/{index-8751749f.js → index-b72d3ca9.js} +12 -12
- package/dist/web/client/index.html +2 -2
- package/dist/web/server.js +9 -9
- package/dist/web/server.js.map +1 -1
- package/package.json +3 -1
- package/src/assertions.ts +8 -9
- package/src/cache.ts +5 -4
- package/src/evaluator.ts +66 -33
- package/src/index.ts +13 -8
- package/src/main.ts +13 -18
- package/src/providers/localai.ts +3 -3
- package/src/providers/openai.ts +16 -8
- package/src/providers.ts +3 -3
- package/src/types.ts +7 -2
- package/src/util.ts +42 -20
- package/src/web/client/package-lock.json +5729 -0
- package/src/web/client/src/ResultsTable.css +19 -0
- package/src/web/client/src/ResultsTable.tsx +51 -37
- package/src/web/client/src/ResultsView.tsx +7 -7
- package/src/web/server.ts +3 -3
- package/dist/web/client/assets/index-207192fc.css +0 -1
|
@@ -46,6 +46,17 @@ td,
|
|
|
46
46
|
padding: 1.5rem;
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
+
th.variable,
|
|
50
|
+
.th.variable,
|
|
51
|
+
td.variable,
|
|
52
|
+
.td.variable {
|
|
53
|
+
background-color: #f8fbff;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
tr.header {
|
|
57
|
+
background-color: #fffdf7;
|
|
58
|
+
}
|
|
59
|
+
|
|
49
60
|
th,
|
|
50
61
|
.th {
|
|
51
62
|
padding: 1rem;
|
|
@@ -103,6 +114,14 @@ td .fail {
|
|
|
103
114
|
color: var(--fail-color);
|
|
104
115
|
}
|
|
105
116
|
|
|
117
|
+
.first-prompt-col {
|
|
118
|
+
border-left: 2px solid #888;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
.first-prompt-row {
|
|
122
|
+
border-top: 2px solid #888;
|
|
123
|
+
}
|
|
124
|
+
|
|
106
125
|
.resizer {
|
|
107
126
|
position: absolute;
|
|
108
127
|
right: 0;
|
|
@@ -148,9 +148,28 @@ export default function ResultsTable({ maxTextLength, columnVisibility }: Result
|
|
|
148
148
|
|
|
149
149
|
const columnHelper = createColumnHelper<EvalRow>();
|
|
150
150
|
const columns = [
|
|
151
|
+
columnHelper.group({
|
|
152
|
+
id: 'vars',
|
|
153
|
+
header: () => <span>Variables</span>,
|
|
154
|
+
columns: head.vars.map((varName, idx) =>
|
|
155
|
+
columnHelper.accessor((row: EvalRow) => row.vars[idx], {
|
|
156
|
+
id: `Variable ${idx + 1}`,
|
|
157
|
+
header: () => (
|
|
158
|
+
<TableHeader
|
|
159
|
+
smallText={`Variable ${idx + 1}`}
|
|
160
|
+
text={varName}
|
|
161
|
+
maxLength={maxTextLength}
|
|
162
|
+
/>
|
|
163
|
+
),
|
|
164
|
+
cell: (info: CellContext<EvalRow, string>) => (
|
|
165
|
+
<TruncatedText text={info.getValue()} maxLength={maxTextLength} />
|
|
166
|
+
),
|
|
167
|
+
}),
|
|
168
|
+
),
|
|
169
|
+
}),
|
|
151
170
|
columnHelper.group({
|
|
152
171
|
id: 'prompts',
|
|
153
|
-
header: () => <span>
|
|
172
|
+
header: () => <span>Outputs</span>,
|
|
154
173
|
columns: head.prompts.map((prompt, idx) =>
|
|
155
174
|
columnHelper.accessor((row: EvalRow) => row.outputs[idx], {
|
|
156
175
|
id: `Prompt ${idx + 1}`,
|
|
@@ -176,25 +195,6 @@ export default function ResultsTable({ maxTextLength, columnVisibility }: Result
|
|
|
176
195
|
}),
|
|
177
196
|
),
|
|
178
197
|
}),
|
|
179
|
-
columnHelper.group({
|
|
180
|
-
id: 'vars',
|
|
181
|
-
header: () => <span>Variables</span>,
|
|
182
|
-
columns: head.vars.map((varName, idx) =>
|
|
183
|
-
columnHelper.accessor((row: EvalRow) => row.vars[idx], {
|
|
184
|
-
id: `Variable ${idx + 1}`,
|
|
185
|
-
header: () => (
|
|
186
|
-
<TableHeader
|
|
187
|
-
smallText={`Variable ${idx + 1}`}
|
|
188
|
-
text={varName}
|
|
189
|
-
maxLength={maxTextLength}
|
|
190
|
-
/>
|
|
191
|
-
),
|
|
192
|
-
cell: (info: CellContext<EvalRow, string>) => (
|
|
193
|
-
<TruncatedText text={info.getValue()} maxLength={maxTextLength} />
|
|
194
|
-
),
|
|
195
|
-
}),
|
|
196
|
-
),
|
|
197
|
-
}),
|
|
198
198
|
];
|
|
199
199
|
|
|
200
200
|
const reactTable = useReactTable({
|
|
@@ -212,7 +212,7 @@ export default function ResultsTable({ maxTextLength, columnVisibility }: Result
|
|
|
212
212
|
<table>
|
|
213
213
|
<thead>
|
|
214
214
|
{reactTable.getHeaderGroups().map((headerGroup) => (
|
|
215
|
-
<tr key={headerGroup.id}>
|
|
215
|
+
<tr key={headerGroup.id} className="header">
|
|
216
216
|
{headerGroup.headers.map((header) => (
|
|
217
217
|
<th
|
|
218
218
|
{...{
|
|
@@ -239,22 +239,36 @@ export default function ResultsTable({ maxTextLength, columnVisibility }: Result
|
|
|
239
239
|
))}
|
|
240
240
|
</thead>
|
|
241
241
|
<tbody>
|
|
242
|
-
{reactTable.getRowModel().rows.map((row) =>
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
242
|
+
{reactTable.getRowModel().rows.map((row, rowIndex) => {
|
|
243
|
+
let colBorderDrawn = false;
|
|
244
|
+
return (
|
|
245
|
+
<tr key={row.id}>
|
|
246
|
+
{row.getVisibleCells().map((cell) => {
|
|
247
|
+
const isVariableCol = cell.column.id.startsWith('Variable');
|
|
248
|
+
const shouldDrawColBorder = !isVariableCol && !colBorderDrawn;
|
|
249
|
+
if (shouldDrawColBorder) {
|
|
250
|
+
colBorderDrawn = true;
|
|
251
|
+
}
|
|
252
|
+
const shouldDrawRowBorder = rowIndex === 0 && !isVariableCol;
|
|
253
|
+
return (
|
|
254
|
+
<td
|
|
255
|
+
{...{
|
|
256
|
+
key: cell.id,
|
|
257
|
+
style: {
|
|
258
|
+
width: cell.column.getSize(),
|
|
259
|
+
},
|
|
260
|
+
className: `${isVariableCol ? 'variable' : ''} ${
|
|
261
|
+
shouldDrawRowBorder ? 'first-prompt-row' : ''
|
|
262
|
+
} ${shouldDrawColBorder ? 'first-prompt-col' : ''}`,
|
|
263
|
+
}}
|
|
264
|
+
>
|
|
265
|
+
{flexRender(cell.column.columnDef.cell, cell.getContext())}
|
|
266
|
+
</td>
|
|
267
|
+
);
|
|
268
|
+
})}
|
|
269
|
+
</tr>
|
|
270
|
+
);
|
|
271
|
+
})}
|
|
258
272
|
</tbody>
|
|
259
273
|
</table>
|
|
260
274
|
);
|
|
@@ -35,8 +35,8 @@ export default function ResultsView() {
|
|
|
35
35
|
setSelectedColumns(typeof value === 'string' ? value.split(',') : value);
|
|
36
36
|
|
|
37
37
|
const allColumns = [
|
|
38
|
-
...head.prompts.map((_, idx) => `Prompt ${idx + 1}`),
|
|
39
38
|
...head.vars.map((_, idx) => `Variable ${idx + 1}`),
|
|
39
|
+
...head.prompts.map((_, idx) => `Prompt ${idx + 1}`),
|
|
40
40
|
];
|
|
41
41
|
const newColumnVisibility: VisibilityState = {};
|
|
42
42
|
allColumns.forEach((col) => {
|
|
@@ -48,23 +48,23 @@ export default function ResultsView() {
|
|
|
48
48
|
};
|
|
49
49
|
|
|
50
50
|
const columnData = [
|
|
51
|
-
...head.prompts.map((_, idx) => ({
|
|
52
|
-
value: `Prompt ${idx + 1}`,
|
|
53
|
-
label: `Prompt ${idx + 1}`,
|
|
54
|
-
group: 'Prompts',
|
|
55
|
-
})),
|
|
56
51
|
...head.vars.map((_, idx) => ({
|
|
57
52
|
value: `Variable ${idx + 1}`,
|
|
58
53
|
label: `Variable ${idx + 1}`,
|
|
59
54
|
group: 'Variables',
|
|
60
55
|
})),
|
|
56
|
+
...head.prompts.map((_, idx) => ({
|
|
57
|
+
value: `Prompt ${idx + 1}`,
|
|
58
|
+
label: `Prompt ${idx + 1}`,
|
|
59
|
+
group: 'Prompts',
|
|
60
|
+
})),
|
|
61
61
|
];
|
|
62
62
|
|
|
63
63
|
// Set all columns as selected by default
|
|
64
64
|
React.useEffect(() => {
|
|
65
65
|
setSelectedColumns([
|
|
66
|
-
...head.prompts.map((_, idx) => `Prompt ${idx + 1}`),
|
|
67
66
|
...head.vars.map((_, idx) => `Variable ${idx + 1}`),
|
|
67
|
+
...head.prompts.map((_, idx) => `Prompt ${idx + 1}`),
|
|
68
68
|
]);
|
|
69
69
|
}, [head]);
|
|
70
70
|
|
package/src/web/server.ts
CHANGED
|
@@ -10,9 +10,9 @@ import opener from 'opener';
|
|
|
10
10
|
import { Server as SocketIOServer } from 'socket.io';
|
|
11
11
|
|
|
12
12
|
import promptfoo from '../index.js';
|
|
13
|
-
import logger from '../logger
|
|
14
|
-
import { getDirectory } from '../esm
|
|
15
|
-
import { getLatestResultsPath } from '../util
|
|
13
|
+
import logger from '../logger';
|
|
14
|
+
import { getDirectory } from '../esm';
|
|
15
|
+
import { getLatestResultsPath } from '../util';
|
|
16
16
|
|
|
17
17
|
import type { Request, Response } from 'express';
|
|
18
18
|
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
:root{font-family:system-ui,Avenir,Helvetica,Arial,sans-serif;font-synthesis:none;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;-webkit-text-size-adjust:100%;--background-color: #ffffff;--text-color: #404040;--border-color: lightgray;--table-border-color: lightgray;--pass-color: green;--fail-color: #ad0000;--smalltext-color: gray}@media (prefers-color-scheme: dark){:root{--background-color: #1a1a1a;--text-color: #f0f0f0;--border-color: #444444;--table-border-color: #444444;--pass-color: #4caf50;--fail-color: #f44336;--smalltext-color: #888888}}[data-theme=dark]{--background-color: #1a1a1a;--text-color: #f0f0f0;--border-color: #444444;--table-border-color: #444444;--pass-color: #4caf50;--fail-color: #f44336;--smalltext-color: #888888}html{font-size:calc(14px + (18 - 14) * ((100vw - 300px) / (1600 - 300)))}*{box-sizing:border-box}html{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol;font-size:16px;background-color:var(--background-color);color:var(--text-color)}table,.divTable{border:1px solid var(--table-border-color);border-collapse:collapse;width:100%;margin:1rem 0;box-shadow:0 2px 4px #0000001a}.tr{display:flex}tr,.tr{width:fit-content}tr:hover,.tr:hover{background-color:#0000000d}th,.th,td,.td{position:relative;box-shadow:inset 0 0 0 1px var(--border-color);word-break:break-all;vertical-align:top;padding:1.5rem}th,.th{padding:1rem;position:relative;text-align:center;font-weight:semi-bold}tr .cell-rating{visibility:hidden;position:absolute;bottom:1.25rem;right:-1rem;line-height:0;font-size:1.75rem}tr:hover .cell-rating{visibility:visible}tr .cell-rating .rating{cursor:pointer;margin-right:1rem}th .smalltext{visibility:hidden;font-weight:400;font-size:.75rem;color:var(--smalltext-color)}th:hover .smalltext{visibility:visible}td .status{margin-bottom:.5rem;font-weight:700}td .pass{color:var(--pass-color)}td .fail{color:var(--fail-color)}.resizer{position:absolute;right:0;top:0;height:100%;width:5px;cursor:col-resize;user-select:none;touch-action:none;background:var(--text-color);opacity:.5}.resizer.isResizing{background:var(--text-color);opacity:1}@media (hover: hover){.resizer{opacity:0}*:hover>.resizer{opacity:1}}.logo{display:flex;align-items:center;gap:4px}.logo img{width:30px}.logo span{margin-bottom:6px;color:var(--text-color)}[data-theme=dark] .logo img{filter:invert(1)}nav{display:flex;justify-content:space-between;align-items:center;margin-bottom:1rem;color:var(--text-color)}.dark-mode-toggle{background-color:transparent;border:none;color:var(--text-color);cursor:pointer;font-size:16px;padding:8px;transition:color .3s}.dark-mode-toggle:hover{color:var(--pass-color)}body{background-color:var(--background-color);color:var(--text-color)}
|