promptfoo 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/README.md +1 -1
  2. package/dist/package.json +4 -4
  3. package/dist/src/assertions.d.ts.map +1 -1
  4. package/dist/src/assertions.js +5 -0
  5. package/dist/src/assertions.js.map +1 -1
  6. package/dist/src/evaluator.js +1 -1
  7. package/dist/src/evaluator.js.map +1 -1
  8. package/dist/src/index.d.ts +1 -5
  9. package/dist/src/index.d.ts.map +1 -1
  10. package/dist/src/index.js +1 -1
  11. package/dist/src/index.js.map +1 -1
  12. package/dist/src/matchers.d.ts +3 -2
  13. package/dist/src/matchers.d.ts.map +1 -1
  14. package/dist/src/matchers.js +37 -9
  15. package/dist/src/matchers.js.map +1 -1
  16. package/dist/src/providers/anthropic.d.ts +5 -3
  17. package/dist/src/providers/anthropic.d.ts.map +1 -1
  18. package/dist/src/providers/anthropic.js +8 -10
  19. package/dist/src/providers/anthropic.js.map +1 -1
  20. package/dist/src/providers/azureopenai.d.ts +9 -8
  21. package/dist/src/providers/azureopenai.d.ts.map +1 -1
  22. package/dist/src/providers/azureopenai.js +33 -36
  23. package/dist/src/providers/azureopenai.js.map +1 -1
  24. package/dist/src/providers/openai.d.ts +12 -12
  25. package/dist/src/providers/openai.d.ts.map +1 -1
  26. package/dist/src/providers/openai.js +54 -65
  27. package/dist/src/providers/openai.js.map +1 -1
  28. package/dist/src/providers/replicate.d.ts +4 -2
  29. package/dist/src/providers/replicate.d.ts.map +1 -1
  30. package/dist/src/providers/replicate.js +10 -8
  31. package/dist/src/providers/replicate.js.map +1 -1
  32. package/dist/src/providers/webhook.d.ts +9 -0
  33. package/dist/src/providers/webhook.d.ts.map +1 -0
  34. package/dist/src/providers/webhook.js +54 -0
  35. package/dist/src/providers/webhook.js.map +1 -0
  36. package/dist/src/providers.d.ts +1 -1
  37. package/dist/src/providers.d.ts.map +1 -1
  38. package/dist/src/providers.js +36 -28
  39. package/dist/src/providers.js.map +1 -1
  40. package/dist/src/suggestions.d.ts.map +1 -1
  41. package/dist/src/suggestions.js +1 -3
  42. package/dist/src/suggestions.js.map +1 -1
  43. package/dist/src/types.d.ts +7 -1
  44. package/dist/src/types.d.ts.map +1 -1
  45. package/dist/src/util.js +1 -1
  46. package/dist/src/util.js.map +1 -1
  47. package/dist/src/web/nextui/404/index.html +1 -1
  48. package/dist/src/web/nextui/404.html +1 -1
  49. package/dist/src/web/nextui/_next/static/Bl3o5lF4ON7Fjki46lPhr/_buildManifest.js +1 -0
  50. package/dist/src/web/nextui/_next/static/chunks/226-7bbb6c98a19542fd.js +37 -0
  51. package/dist/src/web/nextui/_next/static/chunks/249-ea9c0f034888ccff.js +125 -0
  52. package/dist/src/web/nextui/_next/static/chunks/339-501c32916b785ef1.js +1 -0
  53. package/dist/src/web/nextui/_next/static/chunks/365-e426ea5bc7e815fc.js +8 -0
  54. package/dist/src/web/nextui/_next/static/chunks/396-0a51429a01e24cdd.js +1 -0
  55. package/dist/src/web/nextui/_next/static/chunks/596-297f7ff4a0436e87.js +25 -0
  56. package/dist/src/web/nextui/_next/static/chunks/613-572c22424de64659.js +1 -0
  57. package/dist/src/web/nextui/_next/static/chunks/706-ae1d3352d28419e9.js +9 -0
  58. package/dist/src/web/nextui/_next/static/chunks/891-7035926a62c1c4e0.js +1 -0
  59. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-366629541fd598e9.js +1 -0
  60. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-319d2ee38d37574e.js +1 -0
  61. package/dist/src/web/nextui/_next/static/chunks/app/eval/page-a6b1ff91723b7beb.js +1 -0
  62. package/dist/src/web/nextui/_next/static/chunks/app/layout-024c4adc71c9feb0.js +1 -0
  63. package/dist/src/web/nextui/_next/static/chunks/app/page-1ae60660130041b2.js +1 -0
  64. package/dist/src/web/nextui/_next/static/chunks/app/setup/page-6ef16148040bf4f4.js +1 -0
  65. package/dist/src/web/nextui/_next/static/chunks/{ca377847-cb6ae6a6a073aebb.js → ca377847-26b462611379a4f7.js} +3 -3
  66. package/dist/src/web/nextui/_next/static/chunks/{fd9d1056-ac777be631f5a9e9.js → fd9d1056-fba4b53a2f01213b.js} +1 -1
  67. package/dist/src/web/nextui/_next/static/chunks/framework-8883d1e9be70c3da.js +25 -0
  68. package/dist/src/web/nextui/_next/static/chunks/main-8ea85465d428ecfe.js +1 -0
  69. package/dist/src/web/nextui/_next/static/chunks/main-app-581ccf0003955b21.js +1 -0
  70. package/dist/src/web/nextui/_next/static/chunks/pages/_app-52924524f99094ab.js +1 -0
  71. package/dist/src/web/nextui/_next/static/chunks/pages/_error-c92d5c4bb2b49926.js +1 -0
  72. package/dist/src/web/nextui/_next/static/chunks/webpack-55c264ce2fd85eb7.js +1 -0
  73. package/dist/src/web/nextui/_next/static/css/4d399fceacd06992.css +1 -0
  74. package/dist/src/web/nextui/eval/index.html +1 -1
  75. package/dist/src/web/nextui/eval/index.txt +6 -6
  76. package/dist/src/web/nextui/index.html +1 -1
  77. package/dist/src/web/nextui/index.txt +5 -5
  78. package/dist/src/web/nextui/setup/index.html +27 -1
  79. package/dist/src/web/nextui/setup/index.txt +9 -9
  80. package/dist/src/web/server.d.ts.map +1 -1
  81. package/dist/src/web/server.js +9 -5
  82. package/dist/src/web/server.js.map +1 -1
  83. package/package.json +4 -4
  84. package/dist/src/web/nextui/_next/static/US6gOx8LHTX_Hzm9aYNrC/_buildManifest.js +0 -1
  85. package/dist/src/web/nextui/_next/static/chunks/339-4fc8a80fa840e771.js +0 -1
  86. package/dist/src/web/nextui/_next/static/chunks/373-8a280796c0f2d1af.js +0 -1
  87. package/dist/src/web/nextui/_next/static/chunks/583-125d32af505e9bc4.js +0 -1
  88. package/dist/src/web/nextui/_next/static/chunks/596-07e4a23a5c6cdf04.js +0 -25
  89. package/dist/src/web/nextui/_next/static/chunks/658-a62210d07dc4dcb6.js +0 -15
  90. package/dist/src/web/nextui/_next/static/chunks/707-699cbd84b259c37b.js +0 -37
  91. package/dist/src/web/nextui/_next/static/chunks/858-ceb6fa22e614492b.js +0 -125
  92. package/dist/src/web/nextui/_next/static/chunks/891-3000ea7c0a292558.js +0 -1
  93. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-50e40614fa05600e.js +0 -1
  94. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-c19c44ed1b2dfb58.js +0 -1
  95. package/dist/src/web/nextui/_next/static/chunks/app/eval/page-d4a1813b2f8c4532.js +0 -1
  96. package/dist/src/web/nextui/_next/static/chunks/app/layout-664a8d716d2d24b1.js +0 -1
  97. package/dist/src/web/nextui/_next/static/chunks/app/page-1f8ef6a00a2355f0.js +0 -1
  98. package/dist/src/web/nextui/_next/static/chunks/app/setup/page-182018a3c6397345.js +0 -1
  99. package/dist/src/web/nextui/_next/static/chunks/framework-43665103d101a22d.js +0 -25
  100. package/dist/src/web/nextui/_next/static/chunks/main-50cc0a98559591ce.js +0 -1
  101. package/dist/src/web/nextui/_next/static/chunks/main-app-c9dc13756d166550.js +0 -1
  102. package/dist/src/web/nextui/_next/static/chunks/pages/_app-6b79a29ad0d63b21.js +0 -1
  103. package/dist/src/web/nextui/_next/static/chunks/pages/_error-9aeb3e4d490fe4b8.js +0 -1
  104. package/dist/src/web/nextui/_next/static/chunks/webpack-6e474e42be502dd7.js +0 -1
  105. package/dist/src/web/nextui/_next/static/css/a35c840ac696f161.css +0 -1
  106. package/dist/src/web/nextui/api +0 -1
  107. package/src/__mocks__/esm.ts +0 -3
  108. package/src/assertions.ts +0 -580
  109. package/src/cache.ts +0 -109
  110. package/src/esm.ts +0 -13
  111. package/src/evaluator.ts +0 -500
  112. package/src/index.ts +0 -52
  113. package/src/logger.ts +0 -46
  114. package/src/main.ts +0 -442
  115. package/src/matchers.ts +0 -120
  116. package/src/onboarding.ts +0 -69
  117. package/src/prompts.ts +0 -39
  118. package/src/providers/anthropic.ts +0 -88
  119. package/src/providers/azureopenai.ts +0 -299
  120. package/src/providers/llama.ts +0 -95
  121. package/src/providers/localai.ts +0 -111
  122. package/src/providers/ollama.ts +0 -89
  123. package/src/providers/openai.ts +0 -337
  124. package/src/providers/replicate.ts +0 -99
  125. package/src/providers/scriptCompletion.ts +0 -35
  126. package/src/providers/shared.ts +0 -34
  127. package/src/providers.ts +0 -192
  128. package/src/share.ts +0 -27
  129. package/src/suggestions.ts +0 -63
  130. package/src/table.ts +0 -43
  131. package/src/tableOutput.html +0 -52
  132. package/src/telemetry.ts +0 -70
  133. package/src/types.ts +0 -299
  134. package/src/updates.ts +0 -46
  135. package/src/util.ts +0 -543
  136. package/src/web/nextui/.eslintrc.json +0 -3
  137. package/src/web/nextui/next.config.js +0 -14
  138. package/src/web/nextui/package-lock.json +0 -4644
  139. package/src/web/nextui/package.json +0 -47
  140. package/src/web/nextui/public/favicon.ico +0 -0
  141. package/src/web/nextui/public/logo.svg +0 -30
  142. package/src/web/nextui/src/app/Home.css +0 -3
  143. package/src/web/nextui/src/app/api/route.ts +0 -6
  144. package/src/web/nextui/src/app/components/DarkMode.css +0 -22
  145. package/src/web/nextui/src/app/components/DarkMode.tsx +0 -17
  146. package/src/web/nextui/src/app/components/Logo.css +0 -32
  147. package/src/web/nextui/src/app/components/Logo.tsx +0 -11
  148. package/src/web/nextui/src/app/components/PageShell.css +0 -33
  149. package/src/web/nextui/src/app/components/PageShell.tsx +0 -87
  150. package/src/web/nextui/src/app/eval/ConfigModal.tsx +0 -84
  151. package/src/web/nextui/src/app/eval/Eval.css +0 -13
  152. package/src/web/nextui/src/app/eval/Eval.tsx +0 -79
  153. package/src/web/nextui/src/app/eval/EvalOutputPromptDialog.tsx +0 -127
  154. package/src/web/nextui/src/app/eval/ResultsCharts.tsx +0 -355
  155. package/src/web/nextui/src/app/eval/ResultsTable.css +0 -179
  156. package/src/web/nextui/src/app/eval/ResultsTable.tsx +0 -503
  157. package/src/web/nextui/src/app/eval/ResultsView.tsx +0 -301
  158. package/src/web/nextui/src/app/eval/ShareModal.tsx +0 -70
  159. package/src/web/nextui/src/app/eval/[id]/not-found.tsx +0 -5
  160. package/src/web/nextui/src/app/eval/[id]/page.css +0 -9
  161. package/src/web/nextui/src/app/eval/[id]/page.tsx +0 -20
  162. package/src/web/nextui/src/app/eval/index.css +0 -0
  163. package/src/web/nextui/src/app/eval/page.tsx +0 -8
  164. package/src/web/nextui/src/app/eval/store.ts +0 -18
  165. package/src/web/nextui/src/app/eval/types.ts +0 -20
  166. package/src/web/nextui/src/app/globals.css +0 -58
  167. package/src/web/nextui/src/app/layout.tsx +0 -25
  168. package/src/web/nextui/src/app/page.tsx +0 -7
  169. package/src/web/nextui/src/app/setup/AssertsForm.tsx +0 -118
  170. package/src/web/nextui/src/app/setup/PromptDialog.tsx +0 -77
  171. package/src/web/nextui/src/app/setup/PromptsSection.tsx +0 -190
  172. package/src/web/nextui/src/app/setup/ProviderConfigDialog.tsx +0 -99
  173. package/src/web/nextui/src/app/setup/ProviderSelector.tsx +0 -149
  174. package/src/web/nextui/src/app/setup/RunTestSuiteButton.tsx +0 -88
  175. package/src/web/nextui/src/app/setup/TestCaseDialog.tsx +0 -108
  176. package/src/web/nextui/src/app/setup/TestCasesSection.tsx +0 -154
  177. package/src/web/nextui/src/app/setup/VarsForm.tsx +0 -57
  178. package/src/web/nextui/src/app/setup/page.css +0 -3
  179. package/src/web/nextui/src/app/setup/page.tsx +0 -160
  180. package/src/web/nextui/src/util/api.ts +0 -1
  181. package/src/web/nextui/src/util/store.ts +0 -53
  182. package/src/web/nextui/tsconfig.json +0 -28
  183. package/src/web/server.ts +0 -151
  184. /package/dist/src/web/nextui/_next/static/{US6gOx8LHTX_Hzm9aYNrC → Bl3o5lF4ON7Fjki46lPhr}/_ssgManifest.js +0 -0
@@ -1,503 +0,0 @@
1
- import * as React from 'react';
2
- import { diffSentences, diffJson, diffWords } from 'diff';
3
-
4
- import './index.css';
5
-
6
- import invariant from 'tiny-invariant';
7
- import {
8
- createColumnHelper,
9
- flexRender,
10
- getCoreRowModel,
11
- useReactTable,
12
- } from '@tanstack/react-table';
13
- import Checkbox from '@mui/material/Checkbox';
14
- import FormControlLabel from '@mui/material/FormControlLabel';
15
-
16
- import { useStore } from './store';
17
-
18
- import type { CellContext, VisibilityState } from '@tanstack/table-core';
19
-
20
- import EvalOutputPromptDialog from './EvalOutputPromptDialog';
21
-
22
- import type { EvalRow, EvaluateTableOutput, FilterMode, GradingResult } from './types';
23
-
24
- import './ResultsTable.css';
25
-
26
- function formatRowOutput(output: EvaluateTableOutput | string) {
27
- if (typeof output === 'string') {
28
- // Backwards compatibility for 0.15.0 breaking change. Remove eventually.
29
- const pass = output.startsWith('[PASS]');
30
- let text = output;
31
- if (output.startsWith('[PASS]')) {
32
- text = text.slice('[PASS]'.length);
33
- } else if (output.startsWith('[FAIL]')) {
34
- text = text.slice('[FAIL]'.length);
35
- }
36
- return {
37
- text,
38
- pass,
39
- score: pass ? 1 : 0,
40
- };
41
- }
42
- return output;
43
- }
44
-
45
- function scoreToString(score: number) {
46
- if (score === 0 || score === 1) {
47
- // Don't show boolean scores.
48
- return '';
49
- }
50
- return `(${score.toFixed(2)})`;
51
- }
52
-
53
- interface TruncatedTextProps {
54
- text: string | number;
55
- maxLength: number;
56
- }
57
-
58
- function TruncatedText({ text: rawText, maxLength }: TruncatedTextProps) {
59
- const [isTruncated, setIsTruncated] = React.useState<boolean>(true);
60
- let text = typeof rawText === 'string' ? rawText : JSON.stringify(rawText);
61
- text = text.replace(/\n/g, '<br>');
62
-
63
- const toggleTruncate = () => {
64
- setIsTruncated(!isTruncated);
65
- };
66
-
67
- const renderTruncatedText = () => {
68
- if (text.length <= maxLength) {
69
- return <span dangerouslySetInnerHTML={{ __html: text }} />;
70
- }
71
- if (isTruncated) {
72
- return (
73
- <span style={{ cursor: 'pointer' }} onClick={toggleTruncate}>
74
- <span dangerouslySetInnerHTML={{ __html: text.substring(0, maxLength) }} /> ...
75
- </span>
76
- );
77
- } else {
78
- return (
79
- <span style={{ cursor: 'pointer' }} onClick={toggleTruncate}>
80
- <span dangerouslySetInnerHTML={{ __html: text }} />
81
- </span>
82
- );
83
- }
84
- };
85
-
86
- return renderTruncatedText();
87
- }
88
-
89
- interface PromptOutputProps {
90
- output: EvaluateTableOutput;
91
- maxTextLength: number;
92
- rowIndex: number;
93
- promptIndex: number;
94
- onRating: (rowIndex: number, promptIndex: number, isPass: boolean) => void;
95
- }
96
-
97
- function EvalOutputCell({
98
- output,
99
- maxTextLength,
100
- rowIndex,
101
- promptIndex,
102
- onRating,
103
- firstOutput,
104
- filterMode,
105
- }: PromptOutputProps & { firstOutput: EvaluateTableOutput; filterMode: FilterMode }) {
106
- const [openPrompt, setOpen] = React.useState(false);
107
- const handlePromptOpen = () => {
108
- setOpen(true);
109
- };
110
- const handlePromptClose = () => {
111
- setOpen(false);
112
- };
113
- let text = typeof output.text === 'string' ? output.text : JSON.stringify(output.text);
114
- let chunks: string[] = [];
115
- if (!output.pass && text.includes('---')) {
116
- // TODO(ian): Plumb through failure message instead of parsing it out.
117
- chunks = text.split('---');
118
- text = chunks.slice(1).join('---');
119
- }
120
-
121
- if (filterMode === 'different' && firstOutput) {
122
- let firstOutputText =
123
- typeof firstOutput.text === 'string' ? firstOutput.text : JSON.stringify(firstOutput.text);
124
-
125
- if (firstOutputText.includes('---')) {
126
- firstOutputText = firstOutputText.split('---').slice(1).join('---');
127
- }
128
-
129
- let diffResult;
130
- try {
131
- // Try parsing the texts as JSON
132
- JSON.parse(firstOutputText);
133
- JSON.parse(text);
134
- // If no errors are thrown, the texts are valid JSON
135
- diffResult = diffJson(firstOutputText, text);
136
- } catch (error) {
137
- // If an error is thrown, the texts are not valid JSON
138
- if (firstOutputText.includes('. ') && text.includes('. ')) {
139
- // If the texts contain a period, they are considered as prose
140
- diffResult = diffSentences(firstOutputText, text);
141
- } else {
142
- // If the texts do not contain a period, use diffWords
143
- diffResult = diffWords(firstOutputText, text);
144
- }
145
- }
146
- text = diffResult
147
- .map((part: { added?: boolean; removed?: boolean; value: string }) =>
148
- part.added
149
- ? `<ins>${part.value}</ins>`
150
- : part.removed
151
- ? `<del>${part.value}</del>`
152
- : part.value,
153
- )
154
- .join('');
155
- }
156
-
157
- const handleClick = (isPass: boolean) => {
158
- onRating(rowIndex, promptIndex, isPass);
159
- };
160
-
161
- // TODO(ian): output.prompt check for backwards compatibility, remove after 0.17.0
162
- return (
163
- <>
164
- <div className="cell">
165
- {output.pass && (
166
- <div className="status pass">
167
- PASS <span className="score">{scoreToString(output.score)}</span>
168
- </div>
169
- )}
170
- {!output.pass && (
171
- <div className="status fail">
172
- [FAIL<span className="score">{scoreToString(output.score)}</span>] {chunks[0]}
173
- </div>
174
- )}{' '}
175
- <TruncatedText text={text} maxLength={maxTextLength} />
176
- </div>
177
- <div className="cell-detail">
178
- {output.tokenUsage?.cached ? (
179
- <span>{output.tokenUsage.cached} tokens (cached)</span>
180
- ) : (
181
- <>
182
- {output.tokenUsage?.total && <span>{output.tokenUsage.total} tokens</span>} |{' '}
183
- <span>{output.latencyMs} ms</span>
184
- </>
185
- )}
186
- </div>
187
- <div className="cell-actions">
188
- {output.prompt && (
189
- <>
190
- <span className="action" onClick={handlePromptOpen}>
191
- 🔎
192
- </span>
193
- <EvalOutputPromptDialog
194
- open={openPrompt}
195
- onClose={handlePromptClose}
196
- prompt={output.prompt}
197
- gradingResults={output.gradingResult?.componentResults}
198
- output={text}
199
- />
200
- </>
201
- )}
202
- <span className="action" onClick={() => handleClick(true)}>
203
- 👍
204
- </span>
205
- <span className="action" onClick={() => handleClick(false)}>
206
- 👎
207
- </span>
208
- </div>
209
- </>
210
- );
211
- }
212
-
213
- function TableHeader({
214
- text,
215
- maxLength,
216
- smallText,
217
- expandedText,
218
- }: TruncatedTextProps & { smallText: string; expandedText?: string }) {
219
- const [openPrompt, setOpen] = React.useState(false);
220
- const handlePromptOpen = () => {
221
- setOpen(true);
222
- };
223
- const handlePromptClose = () => {
224
- setOpen(false);
225
- };
226
- return (
227
- <div>
228
- <TruncatedText text={text} maxLength={maxLength} />
229
- {expandedText && (
230
- <>
231
- <span className="action" onClick={handlePromptOpen}>
232
- 🔎
233
- </span>
234
- <EvalOutputPromptDialog
235
- open={openPrompt}
236
- onClose={handlePromptClose}
237
- prompt={expandedText}
238
- />
239
- </>
240
- )}
241
- <div className="smalltext">{smallText}</div>
242
- </div>
243
- );
244
- }
245
-
246
- interface ResultsTableProps {
247
- maxTextLength: number;
248
- columnVisibility: VisibilityState;
249
- wordBreak: 'break-word' | 'break-all';
250
- filterMode: FilterMode;
251
- failureFilter: { [key: string]: boolean };
252
- onFailureFilterToggle: (columnId: string, checked: boolean) => void;
253
- }
254
-
255
- export default function ResultsTable({
256
- maxTextLength,
257
- columnVisibility,
258
- wordBreak,
259
- filterMode,
260
- failureFilter,
261
- onFailureFilterToggle,
262
- }: ResultsTableProps) {
263
- const { table, setTable } = useStore();
264
- invariant(table, 'Table should be defined');
265
- const { head, body } = table;
266
- const numGoodTests = head.prompts.map((_, idx) =>
267
- body.reduce((acc, row) => {
268
- return acc + (row.outputs[idx].pass ? 1 : 0);
269
- }, 0),
270
- );
271
-
272
- const numAsserts = head.prompts.map((_, idx) =>
273
- body.reduce((acc, row) => {
274
- return acc + (row.outputs[idx].gradingResult?.componentResults?.length || 0);
275
- }, 0),
276
- );
277
-
278
- const numGoodAsserts = head.prompts.map((_, idx) =>
279
- body.reduce((acc, row) => {
280
- const componentResults = row.outputs[idx].gradingResult?.componentResults;
281
- return (
282
- acc + (componentResults ? componentResults.filter((r: GradingResult) => r.pass).length : 0)
283
- );
284
- }, 0),
285
- );
286
-
287
- const handleRating = (rowIndex: number, promptIndex: number, isPass: boolean) => {
288
- const updatedData = [...body];
289
- const updatedRow = { ...updatedData[rowIndex] };
290
- const updatedOutputs = [...updatedRow.outputs];
291
- updatedOutputs[promptIndex].pass = isPass;
292
- updatedOutputs[promptIndex].score = isPass ? 1 : 0;
293
- updatedRow.outputs = updatedOutputs;
294
- updatedData[rowIndex] = updatedRow;
295
- setTable({
296
- head,
297
- body: updatedData,
298
- });
299
- };
300
-
301
- const highestPassingIndex = numGoodTests.reduce(
302
- (maxIndex, currentPassCount, currentIndex, array) => {
303
- return currentPassCount > array[maxIndex] ? currentIndex : maxIndex;
304
- },
305
- 0,
306
- );
307
- const highestPassingCount = numGoodTests[highestPassingIndex];
308
- const columnHelper = createColumnHelper<EvalRow>();
309
- const columns = [
310
- columnHelper.group({
311
- id: 'vars',
312
- header: () => <span>Variables</span>,
313
- columns: head.vars.map((varName, idx) =>
314
- columnHelper.accessor(
315
- (row: EvalRow) => {
316
- return row.vars[idx];
317
- },
318
- {
319
- id: `Variable ${idx + 1}`,
320
- header: () => (
321
- <TableHeader
322
- smallText={`Variable ${idx + 1}`}
323
- text={varName}
324
- maxLength={maxTextLength}
325
- />
326
- ),
327
- cell: (info: CellContext<EvalRow, string>) => (
328
- <TruncatedText text={info.getValue()} maxLength={maxTextLength} />
329
- ),
330
- // Minimize the size of Variable columns.
331
- size: 50,
332
- },
333
- ),
334
- ),
335
- }),
336
- columnHelper.group({
337
- id: 'prompts',
338
- header: () => <span>Outputs</span>,
339
- columns: head.prompts.map((prompt, idx) =>
340
- columnHelper.accessor((row: EvalRow) => formatRowOutput(row.outputs[idx]), {
341
- id: `Prompt ${idx + 1}`,
342
- header: () => {
343
- const pct = ((numGoodTests[idx] / body.length) * 100.0).toFixed(2);
344
- const isHighestPassing =
345
- numGoodTests[idx] === highestPassingCount && highestPassingCount !== 0;
346
- const columnId = `Prompt ${idx + 1}`;
347
- const isChecked = failureFilter[columnId] || false;
348
- // TODO(ian): prompt string support for backwards compatibility, remove after 0.17.0
349
- return (
350
- <>
351
- <TableHeader
352
- smallText={`Prompt ${idx + 1}`}
353
- text={typeof prompt === 'string' ? prompt : prompt.display}
354
- expandedText={typeof prompt === 'string' ? undefined : prompt.raw}
355
- maxLength={maxTextLength}
356
- />
357
- {filterMode === 'failures' && (
358
- <FormControlLabel
359
- sx={{
360
- '& .MuiFormControlLabel-label': {
361
- fontSize: '0.75rem',
362
- },
363
- }}
364
- control={
365
- <Checkbox
366
- checked={isChecked}
367
- onChange={(event) => onFailureFilterToggle(columnId, event.target.checked)}
368
- />
369
- }
370
- label="Show failures"
371
- />
372
- )}
373
- <div className={`summary ${isHighestPassing ? 'highlight' : ''}`}>
374
- Passing: <strong>{pct}%</strong> ({numGoodTests[idx]}/{body.length} cases
375
- {numAsserts[idx] ? (
376
- <span>
377
- , {numGoodAsserts[idx]}/{numAsserts[idx]} asserts
378
- </span>
379
- ) : null}
380
- )
381
- </div>
382
- </>
383
- );
384
- },
385
- cell: (info: CellContext<EvalRow, EvaluateTableOutput>) => (
386
- <EvalOutputCell
387
- output={info.getValue() as unknown as EvaluateTableOutput}
388
- maxTextLength={maxTextLength}
389
- rowIndex={info.row.index}
390
- promptIndex={idx}
391
- onRating={handleRating}
392
- firstOutput={filteredBody[info.row.index].outputs[0]}
393
- filterMode={filterMode}
394
- />
395
- ),
396
- }),
397
- ),
398
- }),
399
- ];
400
-
401
- const filteredBody = React.useMemo(() => {
402
- if (filterMode === 'failures') {
403
- if (Object.values(failureFilter).every((v) => !v)) {
404
- return body;
405
- }
406
- return body.filter((row) => {
407
- return row.outputs.some((output, idx) => {
408
- const columnId = `Prompt ${idx + 1}`;
409
- const isFail = !output.pass;
410
- return failureFilter[columnId] && isFail;
411
- });
412
- });
413
- } else if (filterMode === 'different') {
414
- return body.filter((row) => {
415
- // TODO(ian): This works for strings, but not objects.
416
- return !row.outputs.every((output) => output.text === row.outputs[0].text);
417
- });
418
- }
419
- return body;
420
- }, [body, failureFilter, filterMode]);
421
-
422
- const reactTable = useReactTable({
423
- data: filteredBody,
424
- columns,
425
- columnResizeMode: 'onChange',
426
- getCoreRowModel: getCoreRowModel(),
427
-
428
- state: {
429
- columnVisibility,
430
- },
431
- });
432
-
433
- return (
434
- <table
435
- style={{
436
- wordBreak,
437
- }}
438
- >
439
- <thead>
440
- {reactTable.getHeaderGroups().map((headerGroup: any) => (
441
- <tr key={headerGroup.id} className="header">
442
- {headerGroup.headers.map((header: any) => {
443
- return (
444
- <th
445
- key={header.id}
446
- {...{
447
- colSpan: header.colSpan,
448
- style: {
449
- width: header.getSize(),
450
- },
451
- }}
452
- >
453
- {header.isPlaceholder
454
- ? null
455
- : flexRender(header.column.columnDef.header, header.getContext())}
456
- <div
457
- {...{
458
- onMouseDown: header.getResizeHandler(),
459
- onTouchStart: header.getResizeHandler(),
460
- className: `resizer ${header.column.getIsResizing() ? 'isResizing' : ''}`,
461
- }}
462
- />
463
- </th>
464
- );
465
- })}
466
- </tr>
467
- ))}
468
- </thead>
469
- <tbody>
470
- {reactTable.getRowModel().rows.map((row: any, rowIndex: any) => {
471
- let colBorderDrawn = false;
472
- return (
473
- <tr key={row.id}>
474
- {row.getVisibleCells().map((cell: any) => {
475
- const isVariableCol = cell.column.id.startsWith('Variable');
476
- const shouldDrawColBorder = !isVariableCol && !colBorderDrawn;
477
- if (shouldDrawColBorder) {
478
- colBorderDrawn = true;
479
- }
480
- const shouldDrawRowBorder = rowIndex === 0 && !isVariableCol;
481
- return (
482
- <td
483
- key={cell.id}
484
- {...{
485
- style: {
486
- width: cell.column.getSize(),
487
- },
488
- className: `${isVariableCol ? 'variable' : ''} ${
489
- shouldDrawRowBorder ? 'first-prompt-row' : ''
490
- } ${shouldDrawColBorder ? 'first-prompt-col' : ''}`,
491
- }}
492
- >
493
- {flexRender(cell.column.columnDef.cell, cell.getContext())}
494
- </td>
495
- );
496
- })}
497
- </tr>
498
- );
499
- })}
500
- </tbody>
501
- </table>
502
- );
503
- }