promptfoo 0.18.4 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/dist/package.json +10 -5
  2. package/dist/src/evaluator.d.ts.map +1 -1
  3. package/dist/src/evaluator.js +17 -9
  4. package/dist/src/evaluator.js.map +1 -1
  5. package/dist/src/index.d.ts +1 -0
  6. package/dist/src/index.d.ts.map +1 -1
  7. package/dist/src/index.js +3 -0
  8. package/dist/src/index.js.map +1 -1
  9. package/dist/src/main.js +2 -2
  10. package/dist/src/main.js.map +1 -1
  11. package/dist/src/providers.d.ts +1 -1
  12. package/dist/src/providers.d.ts.map +1 -1
  13. package/dist/src/providers.js +5 -0
  14. package/dist/src/providers.js.map +1 -1
  15. package/dist/src/share.d.ts.map +1 -1
  16. package/dist/src/share.js +8 -7
  17. package/dist/src/share.js.map +1 -1
  18. package/dist/src/types.d.ts +9 -1
  19. package/dist/src/types.d.ts.map +1 -1
  20. package/dist/src/web/nextui/404/index.html +1 -0
  21. package/dist/src/web/nextui/404.html +1 -0
  22. package/dist/src/web/nextui/_next/static/P9zzdx-rDJKPcGFq_qOXC/_buildManifest.js +1 -0
  23. package/dist/src/web/nextui/_next/static/P9zzdx-rDJKPcGFq_qOXC/_ssgManifest.js +1 -0
  24. package/dist/src/web/nextui/_next/static/chunks/121-54cee610700b4756.js +27 -0
  25. package/dist/src/web/nextui/_next/static/chunks/339-501c32916b785ef1.js +1 -0
  26. package/dist/src/web/nextui/_next/static/chunks/373-6a411db0b05027d3.js +1 -0
  27. package/dist/src/web/nextui/_next/static/chunks/583-507e6d8883bb85ff.js +1 -0
  28. package/dist/src/web/nextui/_next/static/chunks/596-9c29c47b8dee7a50.js +25 -0
  29. package/dist/src/web/nextui/_next/static/chunks/658-f8f9d18540505edc.js +15 -0
  30. package/dist/src/web/nextui/_next/static/chunks/858-7255df6dbc44dff9.js +125 -0
  31. package/dist/src/web/nextui/_next/static/chunks/97-64e11ce2b0607459.js +1 -0
  32. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-366629541fd598e9.js +1 -0
  33. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-655bc42ac68b25cc.js +1 -0
  34. package/dist/src/web/nextui/_next/static/chunks/app/eval/page-d5e8697859d6294e.js +1 -0
  35. package/dist/src/web/nextui/_next/static/chunks/app/layout-4c714b1a5a3a768d.js +1 -0
  36. package/dist/src/web/nextui/_next/static/chunks/app/page-4fe8a6342d24ca23.js +1 -0
  37. package/dist/src/web/nextui/_next/static/chunks/app/setup/page-cd35686fe6c12be8.js +1 -0
  38. package/dist/src/web/nextui/_next/static/chunks/fd9d1056-d8847af536b5787b.js +9 -0
  39. package/dist/src/web/nextui/_next/static/chunks/framework-8883d1e9be70c3da.js +25 -0
  40. package/dist/src/web/nextui/_next/static/chunks/main-0670de04b1c026b4.js +1 -0
  41. package/dist/src/web/nextui/_next/static/chunks/main-app-581ccf0003955b21.js +1 -0
  42. package/dist/src/web/nextui/_next/static/chunks/pages/_app-52924524f99094ab.js +1 -0
  43. package/dist/src/web/nextui/_next/static/chunks/pages/_error-c92d5c4bb2b49926.js +1 -0
  44. package/dist/src/web/nextui/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js +1 -0
  45. package/dist/src/web/nextui/_next/static/chunks/webpack-a886dd767c2e76b7.js +1 -0
  46. package/dist/src/web/nextui/_next/static/css/48d388184a2f4ce3.css +1 -0
  47. package/dist/src/web/nextui/_next/static/css/7265c36d84346934.css +1 -0
  48. package/dist/src/web/nextui/_next/static/css/8119d8bd13a8adab.css +1 -0
  49. package/dist/src/web/nextui/_next/static/css/a35c840ac696f161.css +1 -0
  50. package/dist/src/web/nextui/_next/static/css/e388dd377baf25ec.css +1 -0
  51. package/dist/src/web/nextui/_next/static/css/fc460b8a7cadb952.css +1 -0
  52. package/dist/src/web/nextui/_next/static/media/0e4fe491bf84089c-s.p.woff2 +0 -0
  53. package/dist/src/web/nextui/_next/static/media/1c57ca6f5208a29b-s.woff2 +0 -0
  54. package/dist/src/web/nextui/_next/static/media/3dbd163d3bb09d47-s.woff2 +0 -0
  55. package/dist/src/web/nextui/_next/static/media/42d52f46a26971a3-s.woff2 +0 -0
  56. package/dist/src/web/nextui/_next/static/media/5647e4c23315a2d2-s.woff2 +0 -0
  57. package/dist/src/web/nextui/_next/static/media/627622453ef56b0d-s.p.woff2 +0 -0
  58. package/dist/src/web/nextui/_next/static/media/7be645d133f3ee22-s.woff2 +0 -0
  59. package/dist/src/web/nextui/_next/static/media/7c53f7419436e04b-s.woff2 +0 -0
  60. package/dist/src/web/nextui/_next/static/media/8fb72f69fba4e3d2-s.woff2 +0 -0
  61. package/dist/src/web/nextui/_next/static/media/912a9cfe43c928d9-s.woff2 +0 -0
  62. package/dist/src/web/nextui/_next/static/media/934c4b7cb736f2a3-s.p.woff2 +0 -0
  63. package/dist/src/web/nextui/_next/static/media/a5b77b63ef20339c-s.woff2 +0 -0
  64. package/dist/src/web/nextui/_next/static/media/a6d330d7873e7320-s.woff2 +0 -0
  65. package/dist/src/web/nextui/_next/static/media/baf12dd90520ae41-s.woff2 +0 -0
  66. package/dist/src/web/nextui/_next/static/media/bbdb6f0234009aba-s.woff2 +0 -0
  67. package/dist/src/web/nextui/_next/static/media/cff529cd86cc0276-s.woff2 +0 -0
  68. package/dist/src/web/nextui/_next/static/media/d117eea74e01de14-s.woff2 +0 -0
  69. package/dist/src/web/nextui/_next/static/media/dfa8b99978df7bbc-s.woff2 +0 -0
  70. package/dist/src/web/nextui/_next/static/media/e25729ca87cc7df9-s.woff2 +0 -0
  71. package/dist/src/web/nextui/_next/static/media/eb52b768f62eeeb4-s.woff2 +0 -0
  72. package/dist/src/web/nextui/_next/static/media/f06116e890b3dadb-s.woff2 +0 -0
  73. package/dist/src/web/nextui/api +1 -0
  74. package/dist/src/web/nextui/eval/index.html +1 -0
  75. package/dist/src/web/nextui/eval/index.txt +13 -0
  76. package/dist/src/web/nextui/index.html +1 -0
  77. package/dist/src/web/nextui/index.txt +13 -0
  78. package/dist/src/web/nextui/setup/index.html +1 -0
  79. package/dist/src/web/nextui/setup/index.txt +14 -0
  80. package/dist/src/web/server.d.ts +1 -1
  81. package/dist/src/web/server.d.ts.map +1 -1
  82. package/dist/src/web/server.js +47 -4
  83. package/dist/src/web/server.js.map +1 -1
  84. package/package.json +10 -5
  85. package/src/evaluator.ts +17 -9
  86. package/src/index.ts +7 -1
  87. package/src/main.ts +3 -3
  88. package/src/providers.ts +11 -2
  89. package/src/share.ts +10 -8
  90. package/src/types.ts +10 -1
  91. package/src/web/nextui/.eslintrc.json +3 -0
  92. package/src/web/nextui/next.config.js +14 -0
  93. package/src/web/nextui/package-lock.json +4615 -0
  94. package/src/web/nextui/package.json +45 -0
  95. package/src/web/nextui/src/app/Home.css +3 -0
  96. package/src/web/nextui/src/app/api/route.ts +6 -0
  97. package/src/web/{client/src/NavBar.css → nextui/src/app/components/DarkMode.css} +1 -0
  98. package/src/web/{client/src/NavBar.tsx → nextui/src/app/components/DarkMode.tsx} +4 -9
  99. package/src/web/nextui/src/app/components/Logo.css +32 -0
  100. package/src/web/nextui/src/app/components/PageShell.css +33 -0
  101. package/src/web/nextui/src/app/components/PageShell.tsx +87 -0
  102. package/src/web/{client/src → nextui/src/app/eval}/ConfigModal.tsx +8 -5
  103. package/src/web/nextui/src/app/eval/Eval.css +13 -0
  104. package/src/web/nextui/src/app/eval/Eval.tsx +79 -0
  105. package/src/web/{client/src → nextui/src/app/eval}/EvalOutputPromptDialog.tsx +2 -2
  106. package/src/web/{client/src → nextui/src/app/eval}/ResultsTable.css +10 -12
  107. package/src/web/{client/src → nextui/src/app/eval}/ResultsTable.tsx +57 -14
  108. package/src/web/{client/src → nextui/src/app/eval}/ResultsView.tsx +4 -4
  109. package/src/web/nextui/src/app/eval/[id]/not-found.tsx +5 -0
  110. package/src/web/nextui/src/app/eval/[id]/page.css +9 -0
  111. package/src/web/nextui/src/app/eval/[id]/page.tsx +20 -0
  112. package/src/web/nextui/src/app/eval/index.css +0 -0
  113. package/src/web/nextui/src/app/eval/page.tsx +8 -0
  114. package/src/web/{client/src → nextui/src/app/eval}/store.ts +2 -2
  115. package/src/web/nextui/src/app/eval/types.ts +20 -0
  116. package/src/web/{client/src/index.css → nextui/src/app/globals.css} +21 -3
  117. package/src/web/nextui/src/app/layout.tsx +25 -0
  118. package/src/web/nextui/src/app/page.tsx +7 -0
  119. package/src/web/nextui/src/app/setup/AssertsForm.tsx +118 -0
  120. package/src/web/nextui/src/app/setup/PromptDialog.tsx +77 -0
  121. package/src/web/nextui/src/app/setup/PromptsSection.tsx +190 -0
  122. package/src/web/nextui/src/app/setup/ProviderConfigDialog.tsx +99 -0
  123. package/src/web/nextui/src/app/setup/ProviderSelector.tsx +149 -0
  124. package/src/web/nextui/src/app/setup/RunTestSuiteButton.tsx +88 -0
  125. package/src/web/nextui/src/app/setup/TestCaseDialog.tsx +108 -0
  126. package/src/web/nextui/src/app/setup/TestCasesSection.tsx +154 -0
  127. package/src/web/nextui/src/app/setup/VarsForm.tsx +57 -0
  128. package/src/web/nextui/src/app/setup/page.css +3 -0
  129. package/src/web/nextui/src/app/setup/page.tsx +160 -0
  130. package/src/web/nextui/src/util/api.ts +1 -0
  131. package/src/web/nextui/src/util/store.ts +53 -0
  132. package/src/web/nextui/tsconfig.json +28 -0
  133. package/src/web/server.ts +56 -2
  134. package/dist/src/web/client/assets/index-6d2a3573.js +0 -200
  135. package/dist/src/web/client/assets/index-d2b6a160.css +0 -1
  136. package/dist/src/web/client/assets/js-yaml-8bbf9398.js +0 -32
  137. package/dist/src/web/client/index.html +0 -15
  138. package/src/web/client/.eslintrc.cjs +0 -14
  139. package/src/web/client/index.html +0 -13
  140. package/src/web/client/package-lock.json +0 -5726
  141. package/src/web/client/package.json +0 -39
  142. package/src/web/client/src/App.css +0 -4
  143. package/src/web/client/src/App.tsx +0 -120
  144. package/src/web/client/src/Logo.css +0 -18
  145. package/src/web/client/src/main.tsx +0 -10
  146. package/src/web/client/src/types.ts +0 -36
  147. package/src/web/client/src/vite-env.d.ts +0 -1
  148. package/src/web/client/tsconfig.json +0 -24
  149. package/src/web/client/tsconfig.node.json +0 -10
  150. package/src/web/client/vite.config.ts +0 -7
  151. /package/dist/src/web/{client → nextui}/favicon.ico +0 -0
  152. /package/dist/src/web/{client → nextui}/logo.svg +0 -0
  153. /package/src/web/{client → nextui}/public/favicon.ico +0 -0
  154. /package/src/web/{client → nextui}/public/logo.svg +0 -0
  155. /package/src/web/{client/src → nextui/src/app/components}/Logo.tsx +0 -0
  156. /package/src/web/{client/src → nextui/src/app/eval}/ShareModal.tsx +0 -0
@@ -0,0 +1,45 @@
1
+ {
2
+ "name": "nextui",
3
+ "version": "0.1.0",
4
+ "private": true,
5
+ "scripts": {
6
+ "dev": "next dev",
7
+ "build": "next build",
8
+ "start": "next start",
9
+ "lint": "next lint"
10
+ },
11
+ "dependencies": {
12
+ "@emotion/react": "^11.11.1",
13
+ "@emotion/styled": "^11.11.0",
14
+ "@mui/icons-material": "^5.14.3",
15
+ "@mui/material": "^5.14.4",
16
+ "@tanstack/react-table": "^8.9.3",
17
+ "@types/diff": "^5.0.3",
18
+ "@types/node": "20.4.10",
19
+ "@types/react": "18.2.20",
20
+ "@types/react-dom": "18.2.7",
21
+ "@types/react-syntax-highlighter": "^15.5.7",
22
+ "@types/uuid": "^9.0.2",
23
+ "debounce": "^1.2.1",
24
+ "diff": "^5.1.0",
25
+ "eslint": "8.47.0",
26
+ "eslint-config-next": "13.4.13",
27
+ "js-yaml": "^4.1.0",
28
+ "next": "13.4.13",
29
+ "opener": "^1.5.2",
30
+ "react": "18.2.0",
31
+ "react-dnd": "^16.0.1",
32
+ "react-dnd-html5-backend": "^16.0.1",
33
+ "react-dom": "18.2.0",
34
+ "react-syntax-highlighter": "^15.5.0",
35
+ "socket.io": "^4.7.2",
36
+ "socket.io-client": "^4.7.2",
37
+ "tiny-invariant": "^1.3.1",
38
+ "typescript": "5.1.6",
39
+ "uuid": "^9.0.0",
40
+ "zustand": "^4.4.1"
41
+ },
42
+ "devDependencies": {
43
+ "@types/js-yaml": "^4.0.5"
44
+ }
45
+ }
@@ -0,0 +1,3 @@
1
+ .container {
2
+ margin: 2rem;
3
+ }
@@ -0,0 +1,6 @@
1
+ import { NextResponse } from 'next/server';
2
+
3
+ export async function GET() {
4
+ const data = { message: Math.random() < 0.5 ? 'Hello World!' : 'Hello mom' };
5
+ return NextResponse.json({ data });
6
+ }
@@ -7,6 +7,7 @@ nav {
7
7
  }
8
8
 
9
9
  .dark-mode-toggle {
10
+ display: flex;
10
11
  background-color: transparent;
11
12
  border: none;
12
13
  color: var(--text-color);
@@ -1,9 +1,7 @@
1
- import Logo from './Logo';
2
-
3
1
  import DarkModeIcon from '@mui/icons-material/DarkMode';
4
2
  import LightModeIcon from '@mui/icons-material/LightMode';
5
3
 
6
- import './NavBar.css';
4
+ import './DarkMode.css';
7
5
 
8
6
  interface NavbarProps {
9
7
  darkMode: boolean;
@@ -12,11 +10,8 @@ interface NavbarProps {
12
10
 
13
11
  export default function NavBar({ darkMode, onToggleDarkMode }: NavbarProps) {
14
12
  return (
15
- <nav>
16
- <Logo />
17
- <div className="dark-mode-toggle" onClick={onToggleDarkMode}>
18
- {darkMode ? <DarkModeIcon /> : <LightModeIcon />}
19
- </div>
20
- </nav>
13
+ <div className="dark-mode-toggle" onClick={onToggleDarkMode}>
14
+ {darkMode ? <DarkModeIcon /> : <LightModeIcon />}
15
+ </div>
21
16
  );
22
17
  }
@@ -0,0 +1,32 @@
1
+ .logo {
2
+ display: flex;
3
+ font-family: mono;
4
+ font-size: 0.8rem;
5
+ align-items: center;
6
+ gap: 8px;
7
+ background-color: #f0f0f0;
8
+ padding: 0 2rem 0 0;
9
+ border-radius: 0.5rem;
10
+ }
11
+
12
+ [data-theme='dark'] .logo {
13
+ background-color: #333;
14
+ border-color: #444;
15
+ }
16
+
17
+ .logo img {
18
+ width: 25px;
19
+ margin-top: 2px;
20
+ }
21
+
22
+ [data-theme='dark'] .logo img {
23
+ filter: invert(1);
24
+ }
25
+
26
+ .logo span {
27
+ color: var(--text-color);
28
+ }
29
+
30
+ [data-theme='dark'] .logo span {
31
+ color: #f0f0f0;
32
+ }
@@ -0,0 +1,33 @@
1
+ .nav {
2
+ padding: 0.25rem 0 0.25rem 1rem;
3
+ background-color: #eee;
4
+ margin-bottom: 1rem;
5
+ }
6
+
7
+ [data-theme='dark'] .nav {
8
+ background-color: #333;
9
+ }
10
+
11
+ .nav a {
12
+ padding: 0 0.5rem;
13
+ text-decoration: none;
14
+ color: #000;
15
+ align-self: center;
16
+ }
17
+
18
+ [data-theme='dark'] .nav a {
19
+ color: #f0f0f0;
20
+ }
21
+
22
+ .nav div:last-child {
23
+ margin-left: auto;
24
+ margin-right: 0.5rem;
25
+ }
26
+
27
+ .nav a:hover {
28
+ text-decoration: underline;
29
+ }
30
+
31
+ [data-theme='dark'] .nav a:hover {
32
+ color: #ddd;
33
+ }
@@ -0,0 +1,87 @@
1
+ 'use client';
2
+
3
+ import React from 'react';
4
+ import Link from 'next/link';
5
+ import useMediaQuery from '@mui/material/useMediaQuery';
6
+ import { Stack } from '@mui/material';
7
+ import { ThemeProvider, createTheme } from '@mui/material/styles';
8
+
9
+ import Logo from './Logo';
10
+ import DarkMode from './DarkMode';
11
+
12
+ import './PageShell.css';
13
+
14
+ export { PageShell };
15
+
16
+ function PageShell({ children }: { children: React.ReactNode }) {
17
+ const prefersDarkMode = useMediaQuery('(prefers-color-scheme: dark)');
18
+ const [darkMode, setDarkMode] = React.useState(prefersDarkMode);
19
+
20
+ const theme = React.useMemo(
21
+ () =>
22
+ createTheme({
23
+ typography: {
24
+ fontFamily: 'inherit',
25
+ },
26
+ palette: {
27
+ mode: darkMode ? 'dark' : 'light',
28
+ },
29
+ }),
30
+ [darkMode],
31
+ );
32
+
33
+ const toggleDarkMode = () => {
34
+ setDarkMode(!darkMode);
35
+ if (!darkMode) {
36
+ document.documentElement.setAttribute('data-theme', 'dark');
37
+ } else {
38
+ document.documentElement.removeAttribute('data-theme');
39
+ }
40
+ };
41
+
42
+ React.useEffect(() => {
43
+ if (prefersDarkMode) {
44
+ document.documentElement.setAttribute('data-theme', 'dark');
45
+ }
46
+ }, [prefersDarkMode]);
47
+
48
+ return (
49
+ <React.StrictMode>
50
+ <ThemeProvider theme={theme}>
51
+ <Layout>
52
+ <Navigation darkMode={darkMode} onToggleDarkMode={toggleDarkMode} />
53
+ <div>{children}</div>
54
+ </Layout>
55
+ </ThemeProvider>
56
+ </React.StrictMode>
57
+ );
58
+ }
59
+
60
+ function Layout({ children }: { children: React.ReactNode }) {
61
+ return <div>{children}</div>;
62
+ }
63
+
64
+ function Navigation({
65
+ darkMode,
66
+ onToggleDarkMode,
67
+ }: {
68
+ darkMode: boolean;
69
+ onToggleDarkMode: () => void;
70
+ }) {
71
+ if (process.env.NEXT_PUBLIC_NO_BROWSING) {
72
+ return (
73
+ <Stack direction="row" spacing={2} className="nav">
74
+ <Logo />
75
+ <DarkMode darkMode={darkMode} onToggleDarkMode={onToggleDarkMode} />
76
+ </Stack>
77
+ );
78
+ }
79
+ return (
80
+ <Stack direction="row" spacing={2} className="nav">
81
+ <Logo />
82
+ <Link href="/setup">New Eval</Link>
83
+ <Link href="/eval">View Evals</Link>
84
+ <DarkMode darkMode={darkMode} onToggleDarkMode={onToggleDarkMode} />
85
+ </Stack>
86
+ );
87
+ }
@@ -1,13 +1,16 @@
1
1
  import React from 'react';
2
+ import Box from '@mui/material/Box';
3
+ import Button from '@mui/material/Button';
4
+ import Check from '@mui/icons-material/Check';
2
5
  import Dialog from '@mui/material/Dialog';
3
- import DialogTitle from '@mui/material/DialogTitle';
4
- import DialogContent from '@mui/material/DialogContent';
5
6
  import DialogActions from '@mui/material/DialogActions';
6
- import Button from '@mui/material/Button';
7
+ import DialogContent from '@mui/material/DialogContent';
8
+ import DialogTitle from '@mui/material/DialogTitle';
9
+ import FileCopy from '@mui/icons-material/FileCopy';
10
+ import IconButton from '@mui/material/IconButton';
7
11
  import Typography from '@mui/material/Typography';
12
+
8
13
  import { useStore } from './store';
9
- import { IconButton, Box } from '@mui/material';
10
- import { FileCopy, Check } from '@mui/icons-material';
11
14
 
12
15
  interface ConfigModalProps {
13
16
  open: boolean;
@@ -0,0 +1,13 @@
1
+ body {
2
+ background-color: var(--background-color);
3
+ color: var(--text-color);
4
+ }
5
+
6
+ .loading {
7
+ display: flex;
8
+ flex-direction: column;
9
+ gap: 1.5rem;
10
+ justify-content: center;
11
+ align-items: center;
12
+ height: 9rem;
13
+ }
@@ -0,0 +1,79 @@
1
+ 'use client';
2
+
3
+ import * as React from 'react';
4
+ import CircularProgress from '@mui/material/CircularProgress';
5
+ import { io as SocketIOClient } from 'socket.io-client';
6
+
7
+ import ResultsView from './ResultsView';
8
+ import { API_BASE_URL } from '@/util/api';
9
+ import { useStore } from './store';
10
+
11
+ import type { EvalTable, SharedResults } from './types';
12
+
13
+ import './Eval.css';
14
+
15
+ interface EvalOptions {
16
+ preloadedData?: SharedResults;
17
+ }
18
+
19
+ export default function Eval({ preloadedData }: EvalOptions) {
20
+ const { table, setTable, setConfig } = useStore();
21
+ const [loaded, setLoaded] = React.useState<boolean>(false);
22
+ const [recentFiles, setRecentFiles] = React.useState<string[]>([]);
23
+
24
+ const fetchRecentFiles = async () => {
25
+ if (!window.location.href.includes('localhost')) {
26
+ return;
27
+ }
28
+ const resp = await fetch(`${API_BASE_URL}/results`);
29
+ const body = await resp.json();
30
+ setRecentFiles(body.data);
31
+ };
32
+
33
+ const handleRecentFileSelection = async (file: string) => {
34
+ const resp = await fetch(`${API_BASE_URL}/results/${file}`);
35
+ const body = await resp.json();
36
+ setTable(body.data.results.table);
37
+ setConfig(body.data.config);
38
+ };
39
+
40
+ React.useEffect(() => {
41
+ const socket = SocketIOClient(API_BASE_URL);
42
+
43
+ if (preloadedData) {
44
+ setTable(preloadedData.data.results?.table as EvalTable);
45
+ setConfig(preloadedData.data.config);
46
+ setLoaded(true);
47
+ } else {
48
+ socket.on('init', (data) => {
49
+ console.log('Initialized socket connection', data);
50
+ setLoaded(true);
51
+ setTable(data.results.table);
52
+ setConfig(data.config);
53
+ fetchRecentFiles();
54
+ });
55
+
56
+ socket.on('update', (data) => {
57
+ console.log('Received data update', data);
58
+ setTable(data.results.table);
59
+ setConfig(data.config);
60
+ fetchRecentFiles();
61
+ });
62
+ }
63
+
64
+ return () => {
65
+ socket.disconnect();
66
+ };
67
+ }, [setTable, setConfig, preloadedData]);
68
+
69
+ return loaded && table ? (
70
+ <ResultsView recentFiles={recentFiles} onRecentFileSelected={handleRecentFileSelection} />
71
+ ) : (
72
+ <div className="loading">
73
+ <div>
74
+ <CircularProgress size={22} />
75
+ </div>
76
+ <div>Loading eval data</div>
77
+ </div>
78
+ );
79
+ }
@@ -5,7 +5,7 @@ import Dialog from '@mui/material/Dialog';
5
5
  import DialogActions from '@mui/material/DialogActions';
6
6
  import DialogContent from '@mui/material/DialogContent';
7
7
  import DialogTitle from '@mui/material/DialogTitle';
8
- import TextareaAutosize from '@mui/base/TextareaAutosize';
8
+ import TextareaAutosize from '@mui/material/TextareaAutosize';
9
9
  import IconButton from '@mui/material/IconButton';
10
10
  import ContentCopyIcon from '@mui/icons-material/ContentCopy';
11
11
  import CheckIcon from '@mui/icons-material/Check';
@@ -17,7 +17,7 @@ import TableHead from '@mui/material/TableHead';
17
17
  import TableRow from '@mui/material/TableRow';
18
18
  import Typography from '@mui/material/Typography';
19
19
 
20
- import type { GradingResult } from '../../../types';
20
+ import type { GradingResult } from './types';
21
21
 
22
22
  interface EvalOutputPromptDialogProps {
23
23
  open: boolean;
@@ -1,15 +1,3 @@
1
- * {
2
- box-sizing: border-box;
3
- }
4
-
5
- html {
6
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif,
7
- 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';
8
- font-size: 16px;
9
- background-color: var(--background-color);
10
- color: var(--text-color);
11
- }
12
-
13
1
  table,
14
2
  .divTable {
15
3
  border: 1px solid var(--table-border-color);
@@ -20,6 +8,16 @@ table,
20
8
  box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
21
9
  }
22
10
 
11
+ ins {
12
+ background-color: var(--insert-highlight-color);
13
+ text-decoration: none;
14
+ }
15
+
16
+ del {
17
+ background-color: var(--delete-highlight-color);
18
+ text-decoration: strikethrough;
19
+ }
20
+
23
21
  .tr {
24
22
  display: flex;
25
23
  }
@@ -1,4 +1,5 @@
1
1
  import * as React from 'react';
2
+ import { diffSentences, diffJson, diffWords } from 'diff';
2
3
 
3
4
  import './index.css';
4
5
 
@@ -12,17 +13,17 @@ import {
12
13
  import Checkbox from '@mui/material/Checkbox';
13
14
  import FormControlLabel from '@mui/material/FormControlLabel';
14
15
 
15
- import { useStore } from './store.js';
16
+ import { useStore } from './store';
16
17
 
17
18
  import type { CellContext, VisibilityState } from '@tanstack/table-core';
18
19
 
19
20
  import EvalOutputPromptDialog from './EvalOutputPromptDialog';
20
21
 
21
- import type { EvalRow, EvalRowOutput, FilterMode } from './types.js';
22
+ import type { EvalRow, EvaluateTableOutput, FilterMode, GradingResult } from './types';
22
23
 
23
24
  import './ResultsTable.css';
24
25
 
25
- function formatRowOutput(output: EvalRowOutput | string) {
26
+ function formatRowOutput(output: EvaluateTableOutput | string) {
26
27
  if (typeof output === 'string') {
27
28
  // Backwards compatibility for 0.15.0 breaking change. Remove eventually.
28
29
  const pass = output.startsWith('[PASS]');
@@ -86,7 +87,7 @@ function TruncatedText({ text: rawText, maxLength }: TruncatedTextProps) {
86
87
  }
87
88
 
88
89
  interface PromptOutputProps {
89
- output: EvalRowOutput;
90
+ output: EvaluateTableOutput;
90
91
  maxTextLength: number;
91
92
  rowIndex: number;
92
93
  promptIndex: number;
@@ -99,7 +100,9 @@ function EvalOutputCell({
99
100
  rowIndex,
100
101
  promptIndex,
101
102
  onRating,
102
- }: PromptOutputProps) {
103
+ firstOutput,
104
+ filterMode,
105
+ }: PromptOutputProps & { firstOutput: EvaluateTableOutput; filterMode: FilterMode }) {
103
106
  const [openPrompt, setOpen] = React.useState(false);
104
107
  const handlePromptOpen = () => {
105
108
  setOpen(true);
@@ -115,6 +118,42 @@ function EvalOutputCell({
115
118
  text = chunks.slice(1).join('---');
116
119
  }
117
120
 
121
+ if (filterMode === 'different' && firstOutput) {
122
+ let firstOutputText =
123
+ typeof firstOutput.text === 'string' ? firstOutput.text : JSON.stringify(firstOutput.text);
124
+
125
+ if (firstOutputText.includes('---')) {
126
+ firstOutputText = firstOutputText.split('---').slice(1).join('---');
127
+ }
128
+
129
+ let diffResult;
130
+ try {
131
+ // Try parsing the texts as JSON
132
+ JSON.parse(firstOutputText);
133
+ JSON.parse(text);
134
+ // If no errors are thrown, the texts are valid JSON
135
+ diffResult = diffJson(firstOutputText, text);
136
+ } catch (error) {
137
+ // If an error is thrown, the texts are not valid JSON
138
+ if (firstOutputText.includes('. ') && text.includes('. ')) {
139
+ // If the texts contain a period, they are considered as prose
140
+ diffResult = diffSentences(firstOutputText, text);
141
+ } else {
142
+ // If the texts do not contain a period, use diffWords
143
+ diffResult = diffWords(firstOutputText, text);
144
+ }
145
+ }
146
+ text = diffResult
147
+ .map((part: { added?: boolean; removed?: boolean; value: string }) =>
148
+ part.added
149
+ ? `<ins>${part.value}</ins>`
150
+ : part.removed
151
+ ? `<del>${part.value}</del>`
152
+ : part.value,
153
+ )
154
+ .join('');
155
+ }
156
+
118
157
  const handleClick = (isPass: boolean) => {
119
158
  onRating(rowIndex, promptIndex, isPass);
120
159
  };
@@ -239,7 +278,9 @@ export default function ResultsTable({
239
278
  const numGoodAsserts = head.prompts.map((_, idx) =>
240
279
  body.reduce((acc, row) => {
241
280
  const componentResults = row.outputs[idx].gradingResult?.componentResults;
242
- return acc + (componentResults ? componentResults.filter((r) => r.pass).length : 0);
281
+ return (
282
+ acc + (componentResults ? componentResults.filter((r: GradingResult) => r.pass).length : 0)
283
+ );
243
284
  }, 0),
244
285
  );
245
286
 
@@ -341,13 +382,15 @@ export default function ResultsTable({
341
382
  </>
342
383
  );
343
384
  },
344
- cell: (info: CellContext<EvalRow, EvalRowOutput>) => (
385
+ cell: (info: CellContext<EvalRow, EvaluateTableOutput>) => (
345
386
  <EvalOutputCell
346
- output={info.getValue() as unknown as EvalRowOutput}
387
+ output={info.getValue() as unknown as EvaluateTableOutput}
347
388
  maxTextLength={maxTextLength}
348
389
  rowIndex={info.row.index}
349
390
  promptIndex={idx}
350
391
  onRating={handleRating}
392
+ firstOutput={filteredBody[info.row.index].outputs[0]}
393
+ filterMode={filterMode}
351
394
  />
352
395
  ),
353
396
  }),
@@ -394,13 +437,13 @@ export default function ResultsTable({
394
437
  }}
395
438
  >
396
439
  <thead>
397
- {reactTable.getHeaderGroups().map((headerGroup) => (
440
+ {reactTable.getHeaderGroups().map((headerGroup: any) => (
398
441
  <tr key={headerGroup.id} className="header">
399
- {headerGroup.headers.map((header) => {
442
+ {headerGroup.headers.map((header: any) => {
400
443
  return (
401
444
  <th
445
+ key={header.id}
402
446
  {...{
403
- key: header.id,
404
447
  colSpan: header.colSpan,
405
448
  style: {
406
449
  width: header.getSize(),
@@ -424,11 +467,11 @@ export default function ResultsTable({
424
467
  ))}
425
468
  </thead>
426
469
  <tbody>
427
- {reactTable.getRowModel().rows.map((row, rowIndex) => {
470
+ {reactTable.getRowModel().rows.map((row: any, rowIndex: any) => {
428
471
  let colBorderDrawn = false;
429
472
  return (
430
473
  <tr key={row.id}>
431
- {row.getVisibleCells().map((cell) => {
474
+ {row.getVisibleCells().map((cell: any) => {
432
475
  const isVariableCol = cell.column.id.startsWith('Variable');
433
476
  const shouldDrawColBorder = !isVariableCol && !colBorderDrawn;
434
477
  if (shouldDrawColBorder) {
@@ -437,8 +480,8 @@ export default function ResultsTable({
437
480
  const shouldDrawRowBorder = rowIndex === 0 && !isVariableCol;
438
481
  return (
439
482
  <td
483
+ key={cell.id}
440
484
  {...{
441
- key: cell.id,
442
485
  style: {
443
486
  width: cell.column.getSize(),
444
487
  },
@@ -21,13 +21,13 @@ import ShareIcon from '@mui/icons-material/Share';
21
21
  import VisibilityIcon from '@mui/icons-material/Visibility';
22
22
  import { styled } from '@mui/system';
23
23
 
24
- import ResultsTable from './ResultsTable.js';
24
+ import ResultsTable from './ResultsTable';
25
25
  import ConfigModal from './ConfigModal';
26
26
  import ShareModal from './ShareModal';
27
- import { useStore } from './store.js';
27
+ import { useStore } from './store';
28
28
 
29
29
  import type { VisibilityState } from '@tanstack/table-core';
30
- import type { FilterMode } from './types.js';
30
+ import type { FilterMode } from './types';
31
31
 
32
32
  const ResponsiveStack = styled(Stack)(({ theme }) => ({
33
33
  maxWidth: '100%',
@@ -171,7 +171,7 @@ export default function ResultsView({ recentFiles, onRecentFileSelected }: Resul
171
171
  }, [head]);
172
172
 
173
173
  return (
174
- <div>
174
+ <div style={{ marginLeft: '1rem', marginRight: '1rem' }}>
175
175
  <Paper py="md">
176
176
  <ResponsiveStack direction="row" spacing={4} alignItems="center">
177
177
  <Box>
@@ -0,0 +1,5 @@
1
+ import './page.css';
2
+
3
+ export default function NotFound() {
4
+ return <div className="error">Eval not found</div>;
5
+ }
@@ -0,0 +1,9 @@
1
+ .error {
2
+ display: flex;
3
+ flex-direction: column;
4
+ gap: 1.5rem;
5
+ justify-content: center;
6
+ align-items: center;
7
+ height: 9rem;
8
+ font-size: 1.5rem;
9
+ }
@@ -0,0 +1,20 @@
1
+ import React from 'react';
2
+ import { notFound } from 'next/navigation';
3
+
4
+ import Eval from '../Eval';
5
+
6
+ import './page.css';
7
+
8
+ export async function generateStaticParams() {
9
+ return [];
10
+ }
11
+
12
+ export default async function Page({ params }: { params: { id: string } }) {
13
+ const response = await fetch(`https://api.promptfoo.dev/eval/${params.id}`);
14
+ if (!response.ok) {
15
+ notFound();
16
+ }
17
+ const data = await response.json();
18
+
19
+ return <Eval preloadedData={data} />;
20
+ }
File without changes
@@ -0,0 +1,8 @@
1
+ import React from 'react';
2
+
3
+ import Eval from './Eval';
4
+
5
+ export default function Page() {
6
+ // Just return local eval from websocket
7
+ return <Eval />;
8
+ }
@@ -1,6 +1,6 @@
1
- import create from 'zustand';
1
+ import { create } from 'zustand';
2
2
 
3
- import type { EvalTable, UnifiedConfig } from './types.js';
3
+ import type { EvalTable, UnifiedConfig } from './types';
4
4
 
5
5
  interface TableState {
6
6
  table: EvalTable | null;
@@ -0,0 +1,20 @@
1
+ import type { Prompt, EvaluateTableOutput } from '../../../../../types';
2
+
3
+ export type EvalHead = {
4
+ prompts: Prompt[];
5
+ vars: string[];
6
+ };
7
+
8
+ export type EvalRow = {
9
+ outputs: EvaluateTableOutput[];
10
+ vars: string[]; // model outputs
11
+ };
12
+
13
+ export type EvalTable = {
14
+ head: EvalHead;
15
+ body: EvalRow[];
16
+ };
17
+
18
+ export type FilterMode = 'all' | 'failures' | 'different';
19
+
20
+ export * from '../../../../../types';