promptfoo 0.20.1 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/dist/package.json +4 -4
  2. package/dist/src/assertions.d.ts.map +1 -1
  3. package/dist/src/assertions.js +5 -0
  4. package/dist/src/assertions.js.map +1 -1
  5. package/dist/src/evaluator.js +1 -1
  6. package/dist/src/evaluator.js.map +1 -1
  7. package/dist/src/index.d.ts +1 -5
  8. package/dist/src/index.d.ts.map +1 -1
  9. package/dist/src/index.js +1 -1
  10. package/dist/src/index.js.map +1 -1
  11. package/dist/src/matchers.d.ts +3 -2
  12. package/dist/src/matchers.d.ts.map +1 -1
  13. package/dist/src/matchers.js +37 -9
  14. package/dist/src/matchers.js.map +1 -1
  15. package/dist/src/providers/anthropic.d.ts +5 -3
  16. package/dist/src/providers/anthropic.d.ts.map +1 -1
  17. package/dist/src/providers/anthropic.js +8 -10
  18. package/dist/src/providers/anthropic.js.map +1 -1
  19. package/dist/src/providers/azureopenai.d.ts +9 -8
  20. package/dist/src/providers/azureopenai.d.ts.map +1 -1
  21. package/dist/src/providers/azureopenai.js +33 -36
  22. package/dist/src/providers/azureopenai.js.map +1 -1
  23. package/dist/src/providers/openai.d.ts +12 -12
  24. package/dist/src/providers/openai.d.ts.map +1 -1
  25. package/dist/src/providers/openai.js +54 -65
  26. package/dist/src/providers/openai.js.map +1 -1
  27. package/dist/src/providers/replicate.d.ts +4 -2
  28. package/dist/src/providers/replicate.d.ts.map +1 -1
  29. package/dist/src/providers/replicate.js +10 -8
  30. package/dist/src/providers/replicate.js.map +1 -1
  31. package/dist/src/providers/webhook.d.ts +9 -0
  32. package/dist/src/providers/webhook.d.ts.map +1 -0
  33. package/dist/src/providers/webhook.js +54 -0
  34. package/dist/src/providers/webhook.js.map +1 -0
  35. package/dist/src/providers.d.ts +1 -1
  36. package/dist/src/providers.d.ts.map +1 -1
  37. package/dist/src/providers.js +36 -28
  38. package/dist/src/providers.js.map +1 -1
  39. package/dist/src/suggestions.d.ts.map +1 -1
  40. package/dist/src/suggestions.js +1 -3
  41. package/dist/src/suggestions.js.map +1 -1
  42. package/dist/src/types.d.ts +7 -1
  43. package/dist/src/types.d.ts.map +1 -1
  44. package/dist/src/util.js +1 -1
  45. package/dist/src/util.js.map +1 -1
  46. package/dist/src/web/nextui/404/index.html +1 -1
  47. package/dist/src/web/nextui/404.html +1 -1
  48. package/dist/src/web/nextui/_next/static/Bl3o5lF4ON7Fjki46lPhr/_buildManifest.js +1 -0
  49. package/dist/src/web/nextui/_next/static/chunks/226-7bbb6c98a19542fd.js +37 -0
  50. package/dist/src/web/nextui/_next/static/chunks/249-ea9c0f034888ccff.js +125 -0
  51. package/dist/src/web/nextui/_next/static/chunks/339-501c32916b785ef1.js +1 -0
  52. package/dist/src/web/nextui/_next/static/chunks/365-e426ea5bc7e815fc.js +8 -0
  53. package/dist/src/web/nextui/_next/static/chunks/396-0a51429a01e24cdd.js +1 -0
  54. package/dist/src/web/nextui/_next/static/chunks/596-297f7ff4a0436e87.js +25 -0
  55. package/dist/src/web/nextui/_next/static/chunks/613-572c22424de64659.js +1 -0
  56. package/dist/src/web/nextui/_next/static/chunks/706-ae1d3352d28419e9.js +9 -0
  57. package/dist/src/web/nextui/_next/static/chunks/891-7035926a62c1c4e0.js +1 -0
  58. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-366629541fd598e9.js +1 -0
  59. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-319d2ee38d37574e.js +1 -0
  60. package/dist/src/web/nextui/_next/static/chunks/app/eval/page-a6b1ff91723b7beb.js +1 -0
  61. package/dist/src/web/nextui/_next/static/chunks/app/layout-024c4adc71c9feb0.js +1 -0
  62. package/dist/src/web/nextui/_next/static/chunks/app/page-1ae60660130041b2.js +1 -0
  63. package/dist/src/web/nextui/_next/static/chunks/app/setup/page-6ef16148040bf4f4.js +1 -0
  64. package/dist/src/web/nextui/_next/static/chunks/{ca377847-cb6ae6a6a073aebb.js → ca377847-26b462611379a4f7.js} +3 -3
  65. package/dist/src/web/nextui/_next/static/chunks/{fd9d1056-ac777be631f5a9e9.js → fd9d1056-fba4b53a2f01213b.js} +1 -1
  66. package/dist/src/web/nextui/_next/static/chunks/framework-8883d1e9be70c3da.js +25 -0
  67. package/dist/src/web/nextui/_next/static/chunks/main-8ea85465d428ecfe.js +1 -0
  68. package/dist/src/web/nextui/_next/static/chunks/main-app-581ccf0003955b21.js +1 -0
  69. package/dist/src/web/nextui/_next/static/chunks/pages/_app-52924524f99094ab.js +1 -0
  70. package/dist/src/web/nextui/_next/static/chunks/pages/_error-c92d5c4bb2b49926.js +1 -0
  71. package/dist/src/web/nextui/_next/static/chunks/webpack-55c264ce2fd85eb7.js +1 -0
  72. package/dist/src/web/nextui/_next/static/css/4d399fceacd06992.css +1 -0
  73. package/dist/src/web/nextui/eval/index.html +1 -1
  74. package/dist/src/web/nextui/eval/index.txt +6 -6
  75. package/dist/src/web/nextui/index.html +1 -1
  76. package/dist/src/web/nextui/index.txt +5 -5
  77. package/dist/src/web/nextui/setup/index.html +27 -1
  78. package/dist/src/web/nextui/setup/index.txt +9 -9
  79. package/dist/src/web/server.d.ts.map +1 -1
  80. package/dist/src/web/server.js +9 -5
  81. package/dist/src/web/server.js.map +1 -1
  82. package/package.json +4 -4
  83. package/dist/src/web/nextui/_next/static/BEyLhF7rmCpG4uhyEM3Pd/_buildManifest.js +0 -1
  84. package/dist/src/web/nextui/_next/static/chunks/339-4fc8a80fa840e771.js +0 -1
  85. package/dist/src/web/nextui/_next/static/chunks/373-8a280796c0f2d1af.js +0 -1
  86. package/dist/src/web/nextui/_next/static/chunks/583-125d32af505e9bc4.js +0 -1
  87. package/dist/src/web/nextui/_next/static/chunks/596-07e4a23a5c6cdf04.js +0 -25
  88. package/dist/src/web/nextui/_next/static/chunks/658-a62210d07dc4dcb6.js +0 -15
  89. package/dist/src/web/nextui/_next/static/chunks/707-699cbd84b259c37b.js +0 -37
  90. package/dist/src/web/nextui/_next/static/chunks/858-ceb6fa22e614492b.js +0 -125
  91. package/dist/src/web/nextui/_next/static/chunks/891-86c74b761e072027.js +0 -1
  92. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-50e40614fa05600e.js +0 -1
  93. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-c19c44ed1b2dfb58.js +0 -1
  94. package/dist/src/web/nextui/_next/static/chunks/app/eval/page-d4a1813b2f8c4532.js +0 -1
  95. package/dist/src/web/nextui/_next/static/chunks/app/layout-664a8d716d2d24b1.js +0 -1
  96. package/dist/src/web/nextui/_next/static/chunks/app/page-1f8ef6a00a2355f0.js +0 -1
  97. package/dist/src/web/nextui/_next/static/chunks/app/setup/page-182018a3c6397345.js +0 -1
  98. package/dist/src/web/nextui/_next/static/chunks/framework-43665103d101a22d.js +0 -25
  99. package/dist/src/web/nextui/_next/static/chunks/main-50cc0a98559591ce.js +0 -1
  100. package/dist/src/web/nextui/_next/static/chunks/main-app-c9dc13756d166550.js +0 -1
  101. package/dist/src/web/nextui/_next/static/chunks/pages/_app-6b79a29ad0d63b21.js +0 -1
  102. package/dist/src/web/nextui/_next/static/chunks/pages/_error-9aeb3e4d490fe4b8.js +0 -1
  103. package/dist/src/web/nextui/_next/static/chunks/webpack-6e474e42be502dd7.js +0 -1
  104. package/dist/src/web/nextui/_next/static/css/a35c840ac696f161.css +0 -1
  105. package/dist/src/web/nextui/api +0 -1
  106. package/src/__mocks__/esm.ts +0 -3
  107. package/src/assertions.ts +0 -580
  108. package/src/cache.ts +0 -109
  109. package/src/esm.ts +0 -13
  110. package/src/evaluator.ts +0 -500
  111. package/src/index.ts +0 -52
  112. package/src/logger.ts +0 -46
  113. package/src/main.ts +0 -442
  114. package/src/matchers.ts +0 -120
  115. package/src/onboarding.ts +0 -69
  116. package/src/prompts.ts +0 -39
  117. package/src/providers/anthropic.ts +0 -88
  118. package/src/providers/azureopenai.ts +0 -299
  119. package/src/providers/llama.ts +0 -95
  120. package/src/providers/localai.ts +0 -111
  121. package/src/providers/ollama.ts +0 -89
  122. package/src/providers/openai.ts +0 -337
  123. package/src/providers/replicate.ts +0 -99
  124. package/src/providers/scriptCompletion.ts +0 -35
  125. package/src/providers/shared.ts +0 -34
  126. package/src/providers.ts +0 -192
  127. package/src/share.ts +0 -27
  128. package/src/suggestions.ts +0 -63
  129. package/src/table.ts +0 -43
  130. package/src/tableOutput.html +0 -52
  131. package/src/telemetry.ts +0 -70
  132. package/src/types.ts +0 -299
  133. package/src/updates.ts +0 -46
  134. package/src/util.ts +0 -543
  135. package/src/web/nextui/.eslintrc.json +0 -3
  136. package/src/web/nextui/next.config.js +0 -14
  137. package/src/web/nextui/package-lock.json +0 -4644
  138. package/src/web/nextui/package.json +0 -47
  139. package/src/web/nextui/public/favicon.ico +0 -0
  140. package/src/web/nextui/public/logo.svg +0 -30
  141. package/src/web/nextui/src/app/Home.css +0 -3
  142. package/src/web/nextui/src/app/api/route.ts +0 -6
  143. package/src/web/nextui/src/app/components/DarkMode.css +0 -22
  144. package/src/web/nextui/src/app/components/DarkMode.tsx +0 -17
  145. package/src/web/nextui/src/app/components/Logo.css +0 -32
  146. package/src/web/nextui/src/app/components/Logo.tsx +0 -11
  147. package/src/web/nextui/src/app/components/PageShell.css +0 -33
  148. package/src/web/nextui/src/app/components/PageShell.tsx +0 -87
  149. package/src/web/nextui/src/app/eval/ConfigModal.tsx +0 -84
  150. package/src/web/nextui/src/app/eval/Eval.css +0 -13
  151. package/src/web/nextui/src/app/eval/Eval.tsx +0 -79
  152. package/src/web/nextui/src/app/eval/EvalOutputPromptDialog.tsx +0 -127
  153. package/src/web/nextui/src/app/eval/ResultsCharts.tsx +0 -364
  154. package/src/web/nextui/src/app/eval/ResultsTable.css +0 -179
  155. package/src/web/nextui/src/app/eval/ResultsTable.tsx +0 -503
  156. package/src/web/nextui/src/app/eval/ResultsView.tsx +0 -301
  157. package/src/web/nextui/src/app/eval/ShareModal.tsx +0 -70
  158. package/src/web/nextui/src/app/eval/[id]/not-found.tsx +0 -5
  159. package/src/web/nextui/src/app/eval/[id]/page.css +0 -9
  160. package/src/web/nextui/src/app/eval/[id]/page.tsx +0 -20
  161. package/src/web/nextui/src/app/eval/index.css +0 -0
  162. package/src/web/nextui/src/app/eval/page.tsx +0 -8
  163. package/src/web/nextui/src/app/eval/store.ts +0 -18
  164. package/src/web/nextui/src/app/eval/types.ts +0 -20
  165. package/src/web/nextui/src/app/globals.css +0 -58
  166. package/src/web/nextui/src/app/layout.tsx +0 -25
  167. package/src/web/nextui/src/app/page.tsx +0 -7
  168. package/src/web/nextui/src/app/setup/AssertsForm.tsx +0 -118
  169. package/src/web/nextui/src/app/setup/PromptDialog.tsx +0 -77
  170. package/src/web/nextui/src/app/setup/PromptsSection.tsx +0 -190
  171. package/src/web/nextui/src/app/setup/ProviderConfigDialog.tsx +0 -99
  172. package/src/web/nextui/src/app/setup/ProviderSelector.tsx +0 -149
  173. package/src/web/nextui/src/app/setup/RunTestSuiteButton.tsx +0 -88
  174. package/src/web/nextui/src/app/setup/TestCaseDialog.tsx +0 -108
  175. package/src/web/nextui/src/app/setup/TestCasesSection.tsx +0 -154
  176. package/src/web/nextui/src/app/setup/VarsForm.tsx +0 -57
  177. package/src/web/nextui/src/app/setup/page.css +0 -3
  178. package/src/web/nextui/src/app/setup/page.tsx +0 -160
  179. package/src/web/nextui/src/util/api.ts +0 -1
  180. package/src/web/nextui/src/util/store.ts +0 -53
  181. package/src/web/nextui/tsconfig.json +0 -28
  182. package/src/web/server.ts +0 -151
  183. /package/dist/src/web/nextui/_next/static/{BEyLhF7rmCpG4uhyEM3Pd → Bl3o5lF4ON7Fjki46lPhr}/_ssgManifest.js +0 -0
@@ -1,503 +0,0 @@
1
- import * as React from 'react';
2
- import { diffSentences, diffJson, diffWords } from 'diff';
3
-
4
- import './index.css';
5
-
6
- import invariant from 'tiny-invariant';
7
- import {
8
- createColumnHelper,
9
- flexRender,
10
- getCoreRowModel,
11
- useReactTable,
12
- } from '@tanstack/react-table';
13
- import Checkbox from '@mui/material/Checkbox';
14
- import FormControlLabel from '@mui/material/FormControlLabel';
15
-
16
- import { useStore } from './store';
17
-
18
- import type { CellContext, VisibilityState } from '@tanstack/table-core';
19
-
20
- import EvalOutputPromptDialog from './EvalOutputPromptDialog';
21
-
22
- import type { EvalRow, EvaluateTableOutput, FilterMode, GradingResult } from './types';
23
-
24
- import './ResultsTable.css';
25
-
26
- function formatRowOutput(output: EvaluateTableOutput | string) {
27
- if (typeof output === 'string') {
28
- // Backwards compatibility for 0.15.0 breaking change. Remove eventually.
29
- const pass = output.startsWith('[PASS]');
30
- let text = output;
31
- if (output.startsWith('[PASS]')) {
32
- text = text.slice('[PASS]'.length);
33
- } else if (output.startsWith('[FAIL]')) {
34
- text = text.slice('[FAIL]'.length);
35
- }
36
- return {
37
- text,
38
- pass,
39
- score: pass ? 1 : 0,
40
- };
41
- }
42
- return output;
43
- }
44
-
45
- function scoreToString(score: number) {
46
- if (score === 0 || score === 1) {
47
- // Don't show boolean scores.
48
- return '';
49
- }
50
- return `(${score.toFixed(2)})`;
51
- }
52
-
53
- interface TruncatedTextProps {
54
- text: string | number;
55
- maxLength: number;
56
- }
57
-
58
- function TruncatedText({ text: rawText, maxLength }: TruncatedTextProps) {
59
- const [isTruncated, setIsTruncated] = React.useState<boolean>(true);
60
- let text = typeof rawText === 'string' ? rawText : JSON.stringify(rawText);
61
- text = text.replace(/\n/g, '<br>');
62
-
63
- const toggleTruncate = () => {
64
- setIsTruncated(!isTruncated);
65
- };
66
-
67
- const renderTruncatedText = () => {
68
- if (text.length <= maxLength) {
69
- return <span dangerouslySetInnerHTML={{ __html: text }} />;
70
- }
71
- if (isTruncated) {
72
- return (
73
- <span style={{ cursor: 'pointer' }} onClick={toggleTruncate}>
74
- <span dangerouslySetInnerHTML={{ __html: text.substring(0, maxLength) }} /> ...
75
- </span>
76
- );
77
- } else {
78
- return (
79
- <span style={{ cursor: 'pointer' }} onClick={toggleTruncate}>
80
- <span dangerouslySetInnerHTML={{ __html: text }} />
81
- </span>
82
- );
83
- }
84
- };
85
-
86
- return renderTruncatedText();
87
- }
88
-
89
- interface PromptOutputProps {
90
- output: EvaluateTableOutput;
91
- maxTextLength: number;
92
- rowIndex: number;
93
- promptIndex: number;
94
- onRating: (rowIndex: number, promptIndex: number, isPass: boolean) => void;
95
- }
96
-
97
- function EvalOutputCell({
98
- output,
99
- maxTextLength,
100
- rowIndex,
101
- promptIndex,
102
- onRating,
103
- firstOutput,
104
- filterMode,
105
- }: PromptOutputProps & { firstOutput: EvaluateTableOutput; filterMode: FilterMode }) {
106
- const [openPrompt, setOpen] = React.useState(false);
107
- const handlePromptOpen = () => {
108
- setOpen(true);
109
- };
110
- const handlePromptClose = () => {
111
- setOpen(false);
112
- };
113
- let text = typeof output.text === 'string' ? output.text : JSON.stringify(output.text);
114
- let chunks: string[] = [];
115
- if (!output.pass && text.includes('---')) {
116
- // TODO(ian): Plumb through failure message instead of parsing it out.
117
- chunks = text.split('---');
118
- text = chunks.slice(1).join('---');
119
- }
120
-
121
- if (filterMode === 'different' && firstOutput) {
122
- let firstOutputText =
123
- typeof firstOutput.text === 'string' ? firstOutput.text : JSON.stringify(firstOutput.text);
124
-
125
- if (firstOutputText.includes('---')) {
126
- firstOutputText = firstOutputText.split('---').slice(1).join('---');
127
- }
128
-
129
- let diffResult;
130
- try {
131
- // Try parsing the texts as JSON
132
- JSON.parse(firstOutputText);
133
- JSON.parse(text);
134
- // If no errors are thrown, the texts are valid JSON
135
- diffResult = diffJson(firstOutputText, text);
136
- } catch (error) {
137
- // If an error is thrown, the texts are not valid JSON
138
- if (firstOutputText.includes('. ') && text.includes('. ')) {
139
- // If the texts contain a period, they are considered as prose
140
- diffResult = diffSentences(firstOutputText, text);
141
- } else {
142
- // If the texts do not contain a period, use diffWords
143
- diffResult = diffWords(firstOutputText, text);
144
- }
145
- }
146
- text = diffResult
147
- .map((part: { added?: boolean; removed?: boolean; value: string }) =>
148
- part.added
149
- ? `<ins>${part.value}</ins>`
150
- : part.removed
151
- ? `<del>${part.value}</del>`
152
- : part.value,
153
- )
154
- .join('');
155
- }
156
-
157
- const handleClick = (isPass: boolean) => {
158
- onRating(rowIndex, promptIndex, isPass);
159
- };
160
-
161
- // TODO(ian): output.prompt check for backwards compatibility, remove after 0.17.0
162
- return (
163
- <>
164
- <div className="cell">
165
- {output.pass && (
166
- <div className="status pass">
167
- PASS <span className="score">{scoreToString(output.score)}</span>
168
- </div>
169
- )}
170
- {!output.pass && (
171
- <div className="status fail">
172
- [FAIL<span className="score">{scoreToString(output.score)}</span>] {chunks[0]}
173
- </div>
174
- )}{' '}
175
- <TruncatedText text={text} maxLength={maxTextLength} />
176
- </div>
177
- <div className="cell-detail">
178
- {output.tokenUsage?.cached ? (
179
- <span>{output.tokenUsage.cached} tokens (cached)</span>
180
- ) : (
181
- <>
182
- {output.tokenUsage?.total && <span>{output.tokenUsage.total} tokens</span>} |{' '}
183
- <span>{output.latencyMs} ms</span>
184
- </>
185
- )}
186
- </div>
187
- <div className="cell-actions">
188
- {output.prompt && (
189
- <>
190
- <span className="action" onClick={handlePromptOpen}>
191
- 🔎
192
- </span>
193
- <EvalOutputPromptDialog
194
- open={openPrompt}
195
- onClose={handlePromptClose}
196
- prompt={output.prompt}
197
- gradingResults={output.gradingResult?.componentResults}
198
- output={text}
199
- />
200
- </>
201
- )}
202
- <span className="action" onClick={() => handleClick(true)}>
203
- 👍
204
- </span>
205
- <span className="action" onClick={() => handleClick(false)}>
206
- 👎
207
- </span>
208
- </div>
209
- </>
210
- );
211
- }
212
-
213
- function TableHeader({
214
- text,
215
- maxLength,
216
- smallText,
217
- expandedText,
218
- }: TruncatedTextProps & { smallText: string; expandedText?: string }) {
219
- const [openPrompt, setOpen] = React.useState(false);
220
- const handlePromptOpen = () => {
221
- setOpen(true);
222
- };
223
- const handlePromptClose = () => {
224
- setOpen(false);
225
- };
226
- return (
227
- <div>
228
- <TruncatedText text={text} maxLength={maxLength} />
229
- {expandedText && (
230
- <>
231
- <span className="action" onClick={handlePromptOpen}>
232
- 🔎
233
- </span>
234
- <EvalOutputPromptDialog
235
- open={openPrompt}
236
- onClose={handlePromptClose}
237
- prompt={expandedText}
238
- />
239
- </>
240
- )}
241
- <div className="smalltext">{smallText}</div>
242
- </div>
243
- );
244
- }
245
-
246
- interface ResultsTableProps {
247
- maxTextLength: number;
248
- columnVisibility: VisibilityState;
249
- wordBreak: 'break-word' | 'break-all';
250
- filterMode: FilterMode;
251
- failureFilter: { [key: string]: boolean };
252
- onFailureFilterToggle: (columnId: string, checked: boolean) => void;
253
- }
254
-
255
- export default function ResultsTable({
256
- maxTextLength,
257
- columnVisibility,
258
- wordBreak,
259
- filterMode,
260
- failureFilter,
261
- onFailureFilterToggle,
262
- }: ResultsTableProps) {
263
- const { table, setTable } = useStore();
264
- invariant(table, 'Table should be defined');
265
- const { head, body } = table;
266
- const numGoodTests = head.prompts.map((_, idx) =>
267
- body.reduce((acc, row) => {
268
- return acc + (row.outputs[idx].pass ? 1 : 0);
269
- }, 0),
270
- );
271
-
272
- const numAsserts = head.prompts.map((_, idx) =>
273
- body.reduce((acc, row) => {
274
- return acc + (row.outputs[idx].gradingResult?.componentResults?.length || 0);
275
- }, 0),
276
- );
277
-
278
- const numGoodAsserts = head.prompts.map((_, idx) =>
279
- body.reduce((acc, row) => {
280
- const componentResults = row.outputs[idx].gradingResult?.componentResults;
281
- return (
282
- acc + (componentResults ? componentResults.filter((r: GradingResult) => r.pass).length : 0)
283
- );
284
- }, 0),
285
- );
286
-
287
- const handleRating = (rowIndex: number, promptIndex: number, isPass: boolean) => {
288
- const updatedData = [...body];
289
- const updatedRow = { ...updatedData[rowIndex] };
290
- const updatedOutputs = [...updatedRow.outputs];
291
- updatedOutputs[promptIndex].pass = isPass;
292
- updatedOutputs[promptIndex].score = isPass ? 1 : 0;
293
- updatedRow.outputs = updatedOutputs;
294
- updatedData[rowIndex] = updatedRow;
295
- setTable({
296
- head,
297
- body: updatedData,
298
- });
299
- };
300
-
301
- const highestPassingIndex = numGoodTests.reduce(
302
- (maxIndex, currentPassCount, currentIndex, array) => {
303
- return currentPassCount > array[maxIndex] ? currentIndex : maxIndex;
304
- },
305
- 0,
306
- );
307
- const highestPassingCount = numGoodTests[highestPassingIndex];
308
- const columnHelper = createColumnHelper<EvalRow>();
309
- const columns = [
310
- columnHelper.group({
311
- id: 'vars',
312
- header: () => <span>Variables</span>,
313
- columns: head.vars.map((varName, idx) =>
314
- columnHelper.accessor(
315
- (row: EvalRow) => {
316
- return row.vars[idx];
317
- },
318
- {
319
- id: `Variable ${idx + 1}`,
320
- header: () => (
321
- <TableHeader
322
- smallText={`Variable ${idx + 1}`}
323
- text={varName}
324
- maxLength={maxTextLength}
325
- />
326
- ),
327
- cell: (info: CellContext<EvalRow, string>) => (
328
- <TruncatedText text={info.getValue()} maxLength={maxTextLength} />
329
- ),
330
- // Minimize the size of Variable columns.
331
- size: 50,
332
- },
333
- ),
334
- ),
335
- }),
336
- columnHelper.group({
337
- id: 'prompts',
338
- header: () => <span>Outputs</span>,
339
- columns: head.prompts.map((prompt, idx) =>
340
- columnHelper.accessor((row: EvalRow) => formatRowOutput(row.outputs[idx]), {
341
- id: `Prompt ${idx + 1}`,
342
- header: () => {
343
- const pct = ((numGoodTests[idx] / body.length) * 100.0).toFixed(2);
344
- const isHighestPassing =
345
- numGoodTests[idx] === highestPassingCount && highestPassingCount !== 0;
346
- const columnId = `Prompt ${idx + 1}`;
347
- const isChecked = failureFilter[columnId] || false;
348
- // TODO(ian): prompt string support for backwards compatibility, remove after 0.17.0
349
- return (
350
- <>
351
- <TableHeader
352
- smallText={`Prompt ${idx + 1}`}
353
- text={typeof prompt === 'string' ? prompt : prompt.display}
354
- expandedText={typeof prompt === 'string' ? undefined : prompt.raw}
355
- maxLength={maxTextLength}
356
- />
357
- {filterMode === 'failures' && (
358
- <FormControlLabel
359
- sx={{
360
- '& .MuiFormControlLabel-label': {
361
- fontSize: '0.75rem',
362
- },
363
- }}
364
- control={
365
- <Checkbox
366
- checked={isChecked}
367
- onChange={(event) => onFailureFilterToggle(columnId, event.target.checked)}
368
- />
369
- }
370
- label="Show failures"
371
- />
372
- )}
373
- <div className={`summary ${isHighestPassing ? 'highlight' : ''}`}>
374
- Passing: <strong>{pct}%</strong> ({numGoodTests[idx]}/{body.length} cases
375
- {numAsserts[idx] ? (
376
- <span>
377
- , {numGoodAsserts[idx]}/{numAsserts[idx]} asserts
378
- </span>
379
- ) : null}
380
- )
381
- </div>
382
- </>
383
- );
384
- },
385
- cell: (info: CellContext<EvalRow, EvaluateTableOutput>) => (
386
- <EvalOutputCell
387
- output={info.getValue() as unknown as EvaluateTableOutput}
388
- maxTextLength={maxTextLength}
389
- rowIndex={info.row.index}
390
- promptIndex={idx}
391
- onRating={handleRating}
392
- firstOutput={filteredBody[info.row.index].outputs[0]}
393
- filterMode={filterMode}
394
- />
395
- ),
396
- }),
397
- ),
398
- }),
399
- ];
400
-
401
- const filteredBody = React.useMemo(() => {
402
- if (filterMode === 'failures') {
403
- if (Object.values(failureFilter).every((v) => !v)) {
404
- return body;
405
- }
406
- return body.filter((row) => {
407
- return row.outputs.some((output, idx) => {
408
- const columnId = `Prompt ${idx + 1}`;
409
- const isFail = !output.pass;
410
- return failureFilter[columnId] && isFail;
411
- });
412
- });
413
- } else if (filterMode === 'different') {
414
- return body.filter((row) => {
415
- // TODO(ian): This works for strings, but not objects.
416
- return !row.outputs.every((output) => output.text === row.outputs[0].text);
417
- });
418
- }
419
- return body;
420
- }, [body, failureFilter, filterMode]);
421
-
422
- const reactTable = useReactTable({
423
- data: filteredBody,
424
- columns,
425
- columnResizeMode: 'onChange',
426
- getCoreRowModel: getCoreRowModel(),
427
-
428
- state: {
429
- columnVisibility,
430
- },
431
- });
432
-
433
- return (
434
- <table
435
- style={{
436
- wordBreak,
437
- }}
438
- >
439
- <thead>
440
- {reactTable.getHeaderGroups().map((headerGroup: any) => (
441
- <tr key={headerGroup.id} className="header">
442
- {headerGroup.headers.map((header: any) => {
443
- return (
444
- <th
445
- key={header.id}
446
- {...{
447
- colSpan: header.colSpan,
448
- style: {
449
- width: header.getSize(),
450
- },
451
- }}
452
- >
453
- {header.isPlaceholder
454
- ? null
455
- : flexRender(header.column.columnDef.header, header.getContext())}
456
- <div
457
- {...{
458
- onMouseDown: header.getResizeHandler(),
459
- onTouchStart: header.getResizeHandler(),
460
- className: `resizer ${header.column.getIsResizing() ? 'isResizing' : ''}`,
461
- }}
462
- />
463
- </th>
464
- );
465
- })}
466
- </tr>
467
- ))}
468
- </thead>
469
- <tbody>
470
- {reactTable.getRowModel().rows.map((row: any, rowIndex: any) => {
471
- let colBorderDrawn = false;
472
- return (
473
- <tr key={row.id}>
474
- {row.getVisibleCells().map((cell: any) => {
475
- const isVariableCol = cell.column.id.startsWith('Variable');
476
- const shouldDrawColBorder = !isVariableCol && !colBorderDrawn;
477
- if (shouldDrawColBorder) {
478
- colBorderDrawn = true;
479
- }
480
- const shouldDrawRowBorder = rowIndex === 0 && !isVariableCol;
481
- return (
482
- <td
483
- key={cell.id}
484
- {...{
485
- style: {
486
- width: cell.column.getSize(),
487
- },
488
- className: `${isVariableCol ? 'variable' : ''} ${
489
- shouldDrawRowBorder ? 'first-prompt-row' : ''
490
- } ${shouldDrawColBorder ? 'first-prompt-col' : ''}`,
491
- }}
492
- >
493
- {flexRender(cell.column.columnDef.cell, cell.getContext())}
494
- </td>
495
- );
496
- })}
497
- </tr>
498
- );
499
- })}
500
- </tbody>
501
- </table>
502
- );
503
- }