promptfoo 0.18.4 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package.json +10 -5
- package/dist/src/evaluator.d.ts.map +1 -1
- package/dist/src/evaluator.js +17 -9
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/index.d.ts +1 -0
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +3 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/main.js +2 -2
- package/dist/src/main.js.map +1 -1
- package/dist/src/providers.d.ts +1 -1
- package/dist/src/providers.d.ts.map +1 -1
- package/dist/src/providers.js +5 -0
- package/dist/src/providers.js.map +1 -1
- package/dist/src/share.d.ts.map +1 -1
- package/dist/src/share.js +8 -7
- package/dist/src/share.js.map +1 -1
- package/dist/src/types.d.ts +9 -1
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/web/nextui/404/index.html +1 -0
- package/dist/src/web/nextui/404.html +1 -0
- package/dist/src/web/nextui/_next/static/P9zzdx-rDJKPcGFq_qOXC/_buildManifest.js +1 -0
- package/dist/src/web/nextui/_next/static/P9zzdx-rDJKPcGFq_qOXC/_ssgManifest.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/121-54cee610700b4756.js +27 -0
- package/dist/src/web/nextui/_next/static/chunks/339-501c32916b785ef1.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/373-6a411db0b05027d3.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/583-507e6d8883bb85ff.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/596-9c29c47b8dee7a50.js +25 -0
- package/dist/src/web/nextui/_next/static/chunks/658-f8f9d18540505edc.js +15 -0
- package/dist/src/web/nextui/_next/static/chunks/858-7255df6dbc44dff9.js +125 -0
- package/dist/src/web/nextui/_next/static/chunks/97-64e11ce2b0607459.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-366629541fd598e9.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-655bc42ac68b25cc.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/page-d5e8697859d6294e.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/layout-4c714b1a5a3a768d.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/page-4fe8a6342d24ca23.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/setup/page-cd35686fe6c12be8.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/fd9d1056-d8847af536b5787b.js +9 -0
- package/dist/src/web/nextui/_next/static/chunks/framework-8883d1e9be70c3da.js +25 -0
- package/dist/src/web/nextui/_next/static/chunks/main-0670de04b1c026b4.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/main-app-581ccf0003955b21.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/pages/_app-52924524f99094ab.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/pages/_error-c92d5c4bb2b49926.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/webpack-a886dd767c2e76b7.js +1 -0
- package/dist/src/web/nextui/_next/static/css/48d388184a2f4ce3.css +1 -0
- package/dist/src/web/nextui/_next/static/css/7265c36d84346934.css +1 -0
- package/dist/src/web/nextui/_next/static/css/8119d8bd13a8adab.css +1 -0
- package/dist/src/web/nextui/_next/static/css/a35c840ac696f161.css +1 -0
- package/dist/src/web/nextui/_next/static/css/e388dd377baf25ec.css +1 -0
- package/dist/src/web/nextui/_next/static/css/fc460b8a7cadb952.css +1 -0
- package/dist/src/web/nextui/_next/static/media/0e4fe491bf84089c-s.p.woff2 +0 -0
- package/dist/src/web/nextui/_next/static/media/1c57ca6f5208a29b-s.woff2 +0 -0
- package/dist/src/web/nextui/_next/static/media/3dbd163d3bb09d47-s.woff2 +0 -0
- package/dist/src/web/nextui/_next/static/media/42d52f46a26971a3-s.woff2 +0 -0
- package/dist/src/web/nextui/_next/static/media/5647e4c23315a2d2-s.woff2 +0 -0
- package/dist/src/web/nextui/_next/static/media/627622453ef56b0d-s.p.woff2 +0 -0
- package/dist/src/web/nextui/_next/static/media/7be645d133f3ee22-s.woff2 +0 -0
- package/dist/src/web/nextui/_next/static/media/7c53f7419436e04b-s.woff2 +0 -0
- package/dist/src/web/nextui/_next/static/media/8fb72f69fba4e3d2-s.woff2 +0 -0
- package/dist/src/web/nextui/_next/static/media/912a9cfe43c928d9-s.woff2 +0 -0
- package/dist/src/web/nextui/_next/static/media/934c4b7cb736f2a3-s.p.woff2 +0 -0
- package/dist/src/web/nextui/_next/static/media/a5b77b63ef20339c-s.woff2 +0 -0
- package/dist/src/web/nextui/_next/static/media/a6d330d7873e7320-s.woff2 +0 -0
- package/dist/src/web/nextui/_next/static/media/baf12dd90520ae41-s.woff2 +0 -0
- package/dist/src/web/nextui/_next/static/media/bbdb6f0234009aba-s.woff2 +0 -0
- package/dist/src/web/nextui/_next/static/media/cff529cd86cc0276-s.woff2 +0 -0
- package/dist/src/web/nextui/_next/static/media/d117eea74e01de14-s.woff2 +0 -0
- package/dist/src/web/nextui/_next/static/media/dfa8b99978df7bbc-s.woff2 +0 -0
- package/dist/src/web/nextui/_next/static/media/e25729ca87cc7df9-s.woff2 +0 -0
- package/dist/src/web/nextui/_next/static/media/eb52b768f62eeeb4-s.woff2 +0 -0
- package/dist/src/web/nextui/_next/static/media/f06116e890b3dadb-s.woff2 +0 -0
- package/dist/src/web/nextui/api +1 -0
- package/dist/src/web/nextui/eval/index.html +1 -0
- package/dist/src/web/nextui/eval/index.txt +13 -0
- package/dist/src/web/nextui/index.html +1 -0
- package/dist/src/web/nextui/index.txt +13 -0
- package/dist/src/web/nextui/setup/index.html +1 -0
- package/dist/src/web/nextui/setup/index.txt +14 -0
- package/dist/src/web/server.d.ts +1 -1
- package/dist/src/web/server.d.ts.map +1 -1
- package/dist/src/web/server.js +47 -4
- package/dist/src/web/server.js.map +1 -1
- package/package.json +10 -5
- package/src/evaluator.ts +17 -9
- package/src/index.ts +7 -1
- package/src/main.ts +3 -3
- package/src/providers.ts +11 -2
- package/src/share.ts +10 -8
- package/src/types.ts +10 -1
- package/src/web/nextui/.eslintrc.json +3 -0
- package/src/web/nextui/next.config.js +14 -0
- package/src/web/nextui/package-lock.json +4615 -0
- package/src/web/nextui/package.json +45 -0
- package/src/web/nextui/src/app/Home.css +3 -0
- package/src/web/nextui/src/app/api/route.ts +6 -0
- package/src/web/{client/src/NavBar.css → nextui/src/app/components/DarkMode.css} +1 -0
- package/src/web/{client/src/NavBar.tsx → nextui/src/app/components/DarkMode.tsx} +4 -9
- package/src/web/nextui/src/app/components/Logo.css +32 -0
- package/src/web/nextui/src/app/components/PageShell.css +33 -0
- package/src/web/nextui/src/app/components/PageShell.tsx +87 -0
- package/src/web/{client/src → nextui/src/app/eval}/ConfigModal.tsx +8 -5
- package/src/web/nextui/src/app/eval/Eval.css +13 -0
- package/src/web/nextui/src/app/eval/Eval.tsx +79 -0
- package/src/web/{client/src → nextui/src/app/eval}/EvalOutputPromptDialog.tsx +2 -2
- package/src/web/{client/src → nextui/src/app/eval}/ResultsTable.css +10 -12
- package/src/web/{client/src → nextui/src/app/eval}/ResultsTable.tsx +57 -14
- package/src/web/{client/src → nextui/src/app/eval}/ResultsView.tsx +4 -4
- package/src/web/nextui/src/app/eval/[id]/not-found.tsx +5 -0
- package/src/web/nextui/src/app/eval/[id]/page.css +9 -0
- package/src/web/nextui/src/app/eval/[id]/page.tsx +20 -0
- package/src/web/nextui/src/app/eval/index.css +0 -0
- package/src/web/nextui/src/app/eval/page.tsx +8 -0
- package/src/web/{client/src → nextui/src/app/eval}/store.ts +2 -2
- package/src/web/nextui/src/app/eval/types.ts +20 -0
- package/src/web/{client/src/index.css → nextui/src/app/globals.css} +21 -3
- package/src/web/nextui/src/app/layout.tsx +25 -0
- package/src/web/nextui/src/app/page.tsx +7 -0
- package/src/web/nextui/src/app/setup/AssertsForm.tsx +118 -0
- package/src/web/nextui/src/app/setup/PromptDialog.tsx +77 -0
- package/src/web/nextui/src/app/setup/PromptsSection.tsx +190 -0
- package/src/web/nextui/src/app/setup/ProviderConfigDialog.tsx +99 -0
- package/src/web/nextui/src/app/setup/ProviderSelector.tsx +149 -0
- package/src/web/nextui/src/app/setup/RunTestSuiteButton.tsx +88 -0
- package/src/web/nextui/src/app/setup/TestCaseDialog.tsx +108 -0
- package/src/web/nextui/src/app/setup/TestCasesSection.tsx +154 -0
- package/src/web/nextui/src/app/setup/VarsForm.tsx +57 -0
- package/src/web/nextui/src/app/setup/page.css +3 -0
- package/src/web/nextui/src/app/setup/page.tsx +160 -0
- package/src/web/nextui/src/util/api.ts +1 -0
- package/src/web/nextui/src/util/store.ts +53 -0
- package/src/web/nextui/tsconfig.json +28 -0
- package/src/web/server.ts +56 -2
- package/dist/src/web/client/assets/index-6d2a3573.js +0 -200
- package/dist/src/web/client/assets/index-d2b6a160.css +0 -1
- package/dist/src/web/client/assets/js-yaml-8bbf9398.js +0 -32
- package/dist/src/web/client/index.html +0 -15
- package/src/web/client/.eslintrc.cjs +0 -14
- package/src/web/client/index.html +0 -13
- package/src/web/client/package-lock.json +0 -5726
- package/src/web/client/package.json +0 -39
- package/src/web/client/src/App.css +0 -4
- package/src/web/client/src/App.tsx +0 -120
- package/src/web/client/src/Logo.css +0 -18
- package/src/web/client/src/main.tsx +0 -10
- package/src/web/client/src/types.ts +0 -36
- package/src/web/client/src/vite-env.d.ts +0 -1
- package/src/web/client/tsconfig.json +0 -24
- package/src/web/client/tsconfig.node.json +0 -10
- package/src/web/client/vite.config.ts +0 -7
- /package/dist/src/web/{client → nextui}/favicon.ico +0 -0
- /package/dist/src/web/{client → nextui}/logo.svg +0 -0
- /package/src/web/{client → nextui}/public/favicon.ico +0 -0
- /package/src/web/{client → nextui}/public/logo.svg +0 -0
- /package/src/web/{client/src → nextui/src/app/components}/Logo.tsx +0 -0
- /package/src/web/{client/src → nextui/src/app/eval}/ShareModal.tsx +0 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "nextui",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"private": true,
|
|
5
|
+
"scripts": {
|
|
6
|
+
"dev": "next dev",
|
|
7
|
+
"build": "next build",
|
|
8
|
+
"start": "next start",
|
|
9
|
+
"lint": "next lint"
|
|
10
|
+
},
|
|
11
|
+
"dependencies": {
|
|
12
|
+
"@emotion/react": "^11.11.1",
|
|
13
|
+
"@emotion/styled": "^11.11.0",
|
|
14
|
+
"@mui/icons-material": "^5.14.3",
|
|
15
|
+
"@mui/material": "^5.14.4",
|
|
16
|
+
"@tanstack/react-table": "^8.9.3",
|
|
17
|
+
"@types/diff": "^5.0.3",
|
|
18
|
+
"@types/node": "20.4.10",
|
|
19
|
+
"@types/react": "18.2.20",
|
|
20
|
+
"@types/react-dom": "18.2.7",
|
|
21
|
+
"@types/react-syntax-highlighter": "^15.5.7",
|
|
22
|
+
"@types/uuid": "^9.0.2",
|
|
23
|
+
"debounce": "^1.2.1",
|
|
24
|
+
"diff": "^5.1.0",
|
|
25
|
+
"eslint": "8.47.0",
|
|
26
|
+
"eslint-config-next": "13.4.13",
|
|
27
|
+
"js-yaml": "^4.1.0",
|
|
28
|
+
"next": "13.4.13",
|
|
29
|
+
"opener": "^1.5.2",
|
|
30
|
+
"react": "18.2.0",
|
|
31
|
+
"react-dnd": "^16.0.1",
|
|
32
|
+
"react-dnd-html5-backend": "^16.0.1",
|
|
33
|
+
"react-dom": "18.2.0",
|
|
34
|
+
"react-syntax-highlighter": "^15.5.0",
|
|
35
|
+
"socket.io": "^4.7.2",
|
|
36
|
+
"socket.io-client": "^4.7.2",
|
|
37
|
+
"tiny-invariant": "^1.3.1",
|
|
38
|
+
"typescript": "5.1.6",
|
|
39
|
+
"uuid": "^9.0.0",
|
|
40
|
+
"zustand": "^4.4.1"
|
|
41
|
+
},
|
|
42
|
+
"devDependencies": {
|
|
43
|
+
"@types/js-yaml": "^4.0.5"
|
|
44
|
+
}
|
|
45
|
+
}
|
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
import Logo from './Logo';
|
|
2
|
-
|
|
3
1
|
import DarkModeIcon from '@mui/icons-material/DarkMode';
|
|
4
2
|
import LightModeIcon from '@mui/icons-material/LightMode';
|
|
5
3
|
|
|
6
|
-
import './
|
|
4
|
+
import './DarkMode.css';
|
|
7
5
|
|
|
8
6
|
interface NavbarProps {
|
|
9
7
|
darkMode: boolean;
|
|
@@ -12,11 +10,8 @@ interface NavbarProps {
|
|
|
12
10
|
|
|
13
11
|
export default function NavBar({ darkMode, onToggleDarkMode }: NavbarProps) {
|
|
14
12
|
return (
|
|
15
|
-
<
|
|
16
|
-
<
|
|
17
|
-
|
|
18
|
-
{darkMode ? <DarkModeIcon /> : <LightModeIcon />}
|
|
19
|
-
</div>
|
|
20
|
-
</nav>
|
|
13
|
+
<div className="dark-mode-toggle" onClick={onToggleDarkMode}>
|
|
14
|
+
{darkMode ? <DarkModeIcon /> : <LightModeIcon />}
|
|
15
|
+
</div>
|
|
21
16
|
);
|
|
22
17
|
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
.logo {
|
|
2
|
+
display: flex;
|
|
3
|
+
font-family: mono;
|
|
4
|
+
font-size: 0.8rem;
|
|
5
|
+
align-items: center;
|
|
6
|
+
gap: 8px;
|
|
7
|
+
background-color: #f0f0f0;
|
|
8
|
+
padding: 0 2rem 0 0;
|
|
9
|
+
border-radius: 0.5rem;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
[data-theme='dark'] .logo {
|
|
13
|
+
background-color: #333;
|
|
14
|
+
border-color: #444;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
.logo img {
|
|
18
|
+
width: 25px;
|
|
19
|
+
margin-top: 2px;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
[data-theme='dark'] .logo img {
|
|
23
|
+
filter: invert(1);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
.logo span {
|
|
27
|
+
color: var(--text-color);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
[data-theme='dark'] .logo span {
|
|
31
|
+
color: #f0f0f0;
|
|
32
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
.nav {
|
|
2
|
+
padding: 0.25rem 0 0.25rem 1rem;
|
|
3
|
+
background-color: #eee;
|
|
4
|
+
margin-bottom: 1rem;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
[data-theme='dark'] .nav {
|
|
8
|
+
background-color: #333;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
.nav a {
|
|
12
|
+
padding: 0 0.5rem;
|
|
13
|
+
text-decoration: none;
|
|
14
|
+
color: #000;
|
|
15
|
+
align-self: center;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
[data-theme='dark'] .nav a {
|
|
19
|
+
color: #f0f0f0;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
.nav div:last-child {
|
|
23
|
+
margin-left: auto;
|
|
24
|
+
margin-right: 0.5rem;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
.nav a:hover {
|
|
28
|
+
text-decoration: underline;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
[data-theme='dark'] .nav a:hover {
|
|
32
|
+
color: #ddd;
|
|
33
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
'use client';
|
|
2
|
+
|
|
3
|
+
import React from 'react';
|
|
4
|
+
import Link from 'next/link';
|
|
5
|
+
import useMediaQuery from '@mui/material/useMediaQuery';
|
|
6
|
+
import { Stack } from '@mui/material';
|
|
7
|
+
import { ThemeProvider, createTheme } from '@mui/material/styles';
|
|
8
|
+
|
|
9
|
+
import Logo from './Logo';
|
|
10
|
+
import DarkMode from './DarkMode';
|
|
11
|
+
|
|
12
|
+
import './PageShell.css';
|
|
13
|
+
|
|
14
|
+
export { PageShell };
|
|
15
|
+
|
|
16
|
+
function PageShell({ children }: { children: React.ReactNode }) {
|
|
17
|
+
const prefersDarkMode = useMediaQuery('(prefers-color-scheme: dark)');
|
|
18
|
+
const [darkMode, setDarkMode] = React.useState(prefersDarkMode);
|
|
19
|
+
|
|
20
|
+
const theme = React.useMemo(
|
|
21
|
+
() =>
|
|
22
|
+
createTheme({
|
|
23
|
+
typography: {
|
|
24
|
+
fontFamily: 'inherit',
|
|
25
|
+
},
|
|
26
|
+
palette: {
|
|
27
|
+
mode: darkMode ? 'dark' : 'light',
|
|
28
|
+
},
|
|
29
|
+
}),
|
|
30
|
+
[darkMode],
|
|
31
|
+
);
|
|
32
|
+
|
|
33
|
+
const toggleDarkMode = () => {
|
|
34
|
+
setDarkMode(!darkMode);
|
|
35
|
+
if (!darkMode) {
|
|
36
|
+
document.documentElement.setAttribute('data-theme', 'dark');
|
|
37
|
+
} else {
|
|
38
|
+
document.documentElement.removeAttribute('data-theme');
|
|
39
|
+
}
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
React.useEffect(() => {
|
|
43
|
+
if (prefersDarkMode) {
|
|
44
|
+
document.documentElement.setAttribute('data-theme', 'dark');
|
|
45
|
+
}
|
|
46
|
+
}, [prefersDarkMode]);
|
|
47
|
+
|
|
48
|
+
return (
|
|
49
|
+
<React.StrictMode>
|
|
50
|
+
<ThemeProvider theme={theme}>
|
|
51
|
+
<Layout>
|
|
52
|
+
<Navigation darkMode={darkMode} onToggleDarkMode={toggleDarkMode} />
|
|
53
|
+
<div>{children}</div>
|
|
54
|
+
</Layout>
|
|
55
|
+
</ThemeProvider>
|
|
56
|
+
</React.StrictMode>
|
|
57
|
+
);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function Layout({ children }: { children: React.ReactNode }) {
|
|
61
|
+
return <div>{children}</div>;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function Navigation({
|
|
65
|
+
darkMode,
|
|
66
|
+
onToggleDarkMode,
|
|
67
|
+
}: {
|
|
68
|
+
darkMode: boolean;
|
|
69
|
+
onToggleDarkMode: () => void;
|
|
70
|
+
}) {
|
|
71
|
+
if (process.env.NEXT_PUBLIC_NO_BROWSING) {
|
|
72
|
+
return (
|
|
73
|
+
<Stack direction="row" spacing={2} className="nav">
|
|
74
|
+
<Logo />
|
|
75
|
+
<DarkMode darkMode={darkMode} onToggleDarkMode={onToggleDarkMode} />
|
|
76
|
+
</Stack>
|
|
77
|
+
);
|
|
78
|
+
}
|
|
79
|
+
return (
|
|
80
|
+
<Stack direction="row" spacing={2} className="nav">
|
|
81
|
+
<Logo />
|
|
82
|
+
<Link href="/setup">New Eval</Link>
|
|
83
|
+
<Link href="/eval">View Evals</Link>
|
|
84
|
+
<DarkMode darkMode={darkMode} onToggleDarkMode={onToggleDarkMode} />
|
|
85
|
+
</Stack>
|
|
86
|
+
);
|
|
87
|
+
}
|
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
import React from 'react';
|
|
2
|
+
import Box from '@mui/material/Box';
|
|
3
|
+
import Button from '@mui/material/Button';
|
|
4
|
+
import Check from '@mui/icons-material/Check';
|
|
2
5
|
import Dialog from '@mui/material/Dialog';
|
|
3
|
-
import DialogTitle from '@mui/material/DialogTitle';
|
|
4
|
-
import DialogContent from '@mui/material/DialogContent';
|
|
5
6
|
import DialogActions from '@mui/material/DialogActions';
|
|
6
|
-
import
|
|
7
|
+
import DialogContent from '@mui/material/DialogContent';
|
|
8
|
+
import DialogTitle from '@mui/material/DialogTitle';
|
|
9
|
+
import FileCopy from '@mui/icons-material/FileCopy';
|
|
10
|
+
import IconButton from '@mui/material/IconButton';
|
|
7
11
|
import Typography from '@mui/material/Typography';
|
|
12
|
+
|
|
8
13
|
import { useStore } from './store';
|
|
9
|
-
import { IconButton, Box } from '@mui/material';
|
|
10
|
-
import { FileCopy, Check } from '@mui/icons-material';
|
|
11
14
|
|
|
12
15
|
interface ConfigModalProps {
|
|
13
16
|
open: boolean;
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
'use client';
|
|
2
|
+
|
|
3
|
+
import * as React from 'react';
|
|
4
|
+
import CircularProgress from '@mui/material/CircularProgress';
|
|
5
|
+
import { io as SocketIOClient } from 'socket.io-client';
|
|
6
|
+
|
|
7
|
+
import ResultsView from './ResultsView';
|
|
8
|
+
import { API_BASE_URL } from '@/util/api';
|
|
9
|
+
import { useStore } from './store';
|
|
10
|
+
|
|
11
|
+
import type { EvalTable, SharedResults } from './types';
|
|
12
|
+
|
|
13
|
+
import './Eval.css';
|
|
14
|
+
|
|
15
|
+
interface EvalOptions {
|
|
16
|
+
preloadedData?: SharedResults;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export default function Eval({ preloadedData }: EvalOptions) {
|
|
20
|
+
const { table, setTable, setConfig } = useStore();
|
|
21
|
+
const [loaded, setLoaded] = React.useState<boolean>(false);
|
|
22
|
+
const [recentFiles, setRecentFiles] = React.useState<string[]>([]);
|
|
23
|
+
|
|
24
|
+
const fetchRecentFiles = async () => {
|
|
25
|
+
if (!window.location.href.includes('localhost')) {
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
const resp = await fetch(`${API_BASE_URL}/results`);
|
|
29
|
+
const body = await resp.json();
|
|
30
|
+
setRecentFiles(body.data);
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
const handleRecentFileSelection = async (file: string) => {
|
|
34
|
+
const resp = await fetch(`${API_BASE_URL}/results/${file}`);
|
|
35
|
+
const body = await resp.json();
|
|
36
|
+
setTable(body.data.results.table);
|
|
37
|
+
setConfig(body.data.config);
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
React.useEffect(() => {
|
|
41
|
+
const socket = SocketIOClient(API_BASE_URL);
|
|
42
|
+
|
|
43
|
+
if (preloadedData) {
|
|
44
|
+
setTable(preloadedData.data.results?.table as EvalTable);
|
|
45
|
+
setConfig(preloadedData.data.config);
|
|
46
|
+
setLoaded(true);
|
|
47
|
+
} else {
|
|
48
|
+
socket.on('init', (data) => {
|
|
49
|
+
console.log('Initialized socket connection', data);
|
|
50
|
+
setLoaded(true);
|
|
51
|
+
setTable(data.results.table);
|
|
52
|
+
setConfig(data.config);
|
|
53
|
+
fetchRecentFiles();
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
socket.on('update', (data) => {
|
|
57
|
+
console.log('Received data update', data);
|
|
58
|
+
setTable(data.results.table);
|
|
59
|
+
setConfig(data.config);
|
|
60
|
+
fetchRecentFiles();
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return () => {
|
|
65
|
+
socket.disconnect();
|
|
66
|
+
};
|
|
67
|
+
}, [setTable, setConfig, preloadedData]);
|
|
68
|
+
|
|
69
|
+
return loaded && table ? (
|
|
70
|
+
<ResultsView recentFiles={recentFiles} onRecentFileSelected={handleRecentFileSelection} />
|
|
71
|
+
) : (
|
|
72
|
+
<div className="loading">
|
|
73
|
+
<div>
|
|
74
|
+
<CircularProgress size={22} />
|
|
75
|
+
</div>
|
|
76
|
+
<div>Loading eval data</div>
|
|
77
|
+
</div>
|
|
78
|
+
);
|
|
79
|
+
}
|
|
@@ -5,7 +5,7 @@ import Dialog from '@mui/material/Dialog';
|
|
|
5
5
|
import DialogActions from '@mui/material/DialogActions';
|
|
6
6
|
import DialogContent from '@mui/material/DialogContent';
|
|
7
7
|
import DialogTitle from '@mui/material/DialogTitle';
|
|
8
|
-
import TextareaAutosize from '@mui/
|
|
8
|
+
import TextareaAutosize from '@mui/material/TextareaAutosize';
|
|
9
9
|
import IconButton from '@mui/material/IconButton';
|
|
10
10
|
import ContentCopyIcon from '@mui/icons-material/ContentCopy';
|
|
11
11
|
import CheckIcon from '@mui/icons-material/Check';
|
|
@@ -17,7 +17,7 @@ import TableHead from '@mui/material/TableHead';
|
|
|
17
17
|
import TableRow from '@mui/material/TableRow';
|
|
18
18
|
import Typography from '@mui/material/Typography';
|
|
19
19
|
|
|
20
|
-
import type { GradingResult } from '
|
|
20
|
+
import type { GradingResult } from './types';
|
|
21
21
|
|
|
22
22
|
interface EvalOutputPromptDialogProps {
|
|
23
23
|
open: boolean;
|
|
@@ -1,15 +1,3 @@
|
|
|
1
|
-
* {
|
|
2
|
-
box-sizing: border-box;
|
|
3
|
-
}
|
|
4
|
-
|
|
5
|
-
html {
|
|
6
|
-
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif,
|
|
7
|
-
'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';
|
|
8
|
-
font-size: 16px;
|
|
9
|
-
background-color: var(--background-color);
|
|
10
|
-
color: var(--text-color);
|
|
11
|
-
}
|
|
12
|
-
|
|
13
1
|
table,
|
|
14
2
|
.divTable {
|
|
15
3
|
border: 1px solid var(--table-border-color);
|
|
@@ -20,6 +8,16 @@ table,
|
|
|
20
8
|
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
|
21
9
|
}
|
|
22
10
|
|
|
11
|
+
ins {
|
|
12
|
+
background-color: var(--insert-highlight-color);
|
|
13
|
+
text-decoration: none;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
del {
|
|
17
|
+
background-color: var(--delete-highlight-color);
|
|
18
|
+
text-decoration: strikethrough;
|
|
19
|
+
}
|
|
20
|
+
|
|
23
21
|
.tr {
|
|
24
22
|
display: flex;
|
|
25
23
|
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import * as React from 'react';
|
|
2
|
+
import { diffSentences, diffJson, diffWords } from 'diff';
|
|
2
3
|
|
|
3
4
|
import './index.css';
|
|
4
5
|
|
|
@@ -12,17 +13,17 @@ import {
|
|
|
12
13
|
import Checkbox from '@mui/material/Checkbox';
|
|
13
14
|
import FormControlLabel from '@mui/material/FormControlLabel';
|
|
14
15
|
|
|
15
|
-
import { useStore } from './store
|
|
16
|
+
import { useStore } from './store';
|
|
16
17
|
|
|
17
18
|
import type { CellContext, VisibilityState } from '@tanstack/table-core';
|
|
18
19
|
|
|
19
20
|
import EvalOutputPromptDialog from './EvalOutputPromptDialog';
|
|
20
21
|
|
|
21
|
-
import type { EvalRow,
|
|
22
|
+
import type { EvalRow, EvaluateTableOutput, FilterMode, GradingResult } from './types';
|
|
22
23
|
|
|
23
24
|
import './ResultsTable.css';
|
|
24
25
|
|
|
25
|
-
function formatRowOutput(output:
|
|
26
|
+
function formatRowOutput(output: EvaluateTableOutput | string) {
|
|
26
27
|
if (typeof output === 'string') {
|
|
27
28
|
// Backwards compatibility for 0.15.0 breaking change. Remove eventually.
|
|
28
29
|
const pass = output.startsWith('[PASS]');
|
|
@@ -86,7 +87,7 @@ function TruncatedText({ text: rawText, maxLength }: TruncatedTextProps) {
|
|
|
86
87
|
}
|
|
87
88
|
|
|
88
89
|
interface PromptOutputProps {
|
|
89
|
-
output:
|
|
90
|
+
output: EvaluateTableOutput;
|
|
90
91
|
maxTextLength: number;
|
|
91
92
|
rowIndex: number;
|
|
92
93
|
promptIndex: number;
|
|
@@ -99,7 +100,9 @@ function EvalOutputCell({
|
|
|
99
100
|
rowIndex,
|
|
100
101
|
promptIndex,
|
|
101
102
|
onRating,
|
|
102
|
-
|
|
103
|
+
firstOutput,
|
|
104
|
+
filterMode,
|
|
105
|
+
}: PromptOutputProps & { firstOutput: EvaluateTableOutput; filterMode: FilterMode }) {
|
|
103
106
|
const [openPrompt, setOpen] = React.useState(false);
|
|
104
107
|
const handlePromptOpen = () => {
|
|
105
108
|
setOpen(true);
|
|
@@ -115,6 +118,42 @@ function EvalOutputCell({
|
|
|
115
118
|
text = chunks.slice(1).join('---');
|
|
116
119
|
}
|
|
117
120
|
|
|
121
|
+
if (filterMode === 'different' && firstOutput) {
|
|
122
|
+
let firstOutputText =
|
|
123
|
+
typeof firstOutput.text === 'string' ? firstOutput.text : JSON.stringify(firstOutput.text);
|
|
124
|
+
|
|
125
|
+
if (firstOutputText.includes('---')) {
|
|
126
|
+
firstOutputText = firstOutputText.split('---').slice(1).join('---');
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
let diffResult;
|
|
130
|
+
try {
|
|
131
|
+
// Try parsing the texts as JSON
|
|
132
|
+
JSON.parse(firstOutputText);
|
|
133
|
+
JSON.parse(text);
|
|
134
|
+
// If no errors are thrown, the texts are valid JSON
|
|
135
|
+
diffResult = diffJson(firstOutputText, text);
|
|
136
|
+
} catch (error) {
|
|
137
|
+
// If an error is thrown, the texts are not valid JSON
|
|
138
|
+
if (firstOutputText.includes('. ') && text.includes('. ')) {
|
|
139
|
+
// If the texts contain a period, they are considered as prose
|
|
140
|
+
diffResult = diffSentences(firstOutputText, text);
|
|
141
|
+
} else {
|
|
142
|
+
// If the texts do not contain a period, use diffWords
|
|
143
|
+
diffResult = diffWords(firstOutputText, text);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
text = diffResult
|
|
147
|
+
.map((part: { added?: boolean; removed?: boolean; value: string }) =>
|
|
148
|
+
part.added
|
|
149
|
+
? `<ins>${part.value}</ins>`
|
|
150
|
+
: part.removed
|
|
151
|
+
? `<del>${part.value}</del>`
|
|
152
|
+
: part.value,
|
|
153
|
+
)
|
|
154
|
+
.join('');
|
|
155
|
+
}
|
|
156
|
+
|
|
118
157
|
const handleClick = (isPass: boolean) => {
|
|
119
158
|
onRating(rowIndex, promptIndex, isPass);
|
|
120
159
|
};
|
|
@@ -239,7 +278,9 @@ export default function ResultsTable({
|
|
|
239
278
|
const numGoodAsserts = head.prompts.map((_, idx) =>
|
|
240
279
|
body.reduce((acc, row) => {
|
|
241
280
|
const componentResults = row.outputs[idx].gradingResult?.componentResults;
|
|
242
|
-
return
|
|
281
|
+
return (
|
|
282
|
+
acc + (componentResults ? componentResults.filter((r: GradingResult) => r.pass).length : 0)
|
|
283
|
+
);
|
|
243
284
|
}, 0),
|
|
244
285
|
);
|
|
245
286
|
|
|
@@ -341,13 +382,15 @@ export default function ResultsTable({
|
|
|
341
382
|
</>
|
|
342
383
|
);
|
|
343
384
|
},
|
|
344
|
-
cell: (info: CellContext<EvalRow,
|
|
385
|
+
cell: (info: CellContext<EvalRow, EvaluateTableOutput>) => (
|
|
345
386
|
<EvalOutputCell
|
|
346
|
-
output={info.getValue() as unknown as
|
|
387
|
+
output={info.getValue() as unknown as EvaluateTableOutput}
|
|
347
388
|
maxTextLength={maxTextLength}
|
|
348
389
|
rowIndex={info.row.index}
|
|
349
390
|
promptIndex={idx}
|
|
350
391
|
onRating={handleRating}
|
|
392
|
+
firstOutput={filteredBody[info.row.index].outputs[0]}
|
|
393
|
+
filterMode={filterMode}
|
|
351
394
|
/>
|
|
352
395
|
),
|
|
353
396
|
}),
|
|
@@ -394,13 +437,13 @@ export default function ResultsTable({
|
|
|
394
437
|
}}
|
|
395
438
|
>
|
|
396
439
|
<thead>
|
|
397
|
-
{reactTable.getHeaderGroups().map((headerGroup) => (
|
|
440
|
+
{reactTable.getHeaderGroups().map((headerGroup: any) => (
|
|
398
441
|
<tr key={headerGroup.id} className="header">
|
|
399
|
-
{headerGroup.headers.map((header) => {
|
|
442
|
+
{headerGroup.headers.map((header: any) => {
|
|
400
443
|
return (
|
|
401
444
|
<th
|
|
445
|
+
key={header.id}
|
|
402
446
|
{...{
|
|
403
|
-
key: header.id,
|
|
404
447
|
colSpan: header.colSpan,
|
|
405
448
|
style: {
|
|
406
449
|
width: header.getSize(),
|
|
@@ -424,11 +467,11 @@ export default function ResultsTable({
|
|
|
424
467
|
))}
|
|
425
468
|
</thead>
|
|
426
469
|
<tbody>
|
|
427
|
-
{reactTable.getRowModel().rows.map((row, rowIndex) => {
|
|
470
|
+
{reactTable.getRowModel().rows.map((row: any, rowIndex: any) => {
|
|
428
471
|
let colBorderDrawn = false;
|
|
429
472
|
return (
|
|
430
473
|
<tr key={row.id}>
|
|
431
|
-
{row.getVisibleCells().map((cell) => {
|
|
474
|
+
{row.getVisibleCells().map((cell: any) => {
|
|
432
475
|
const isVariableCol = cell.column.id.startsWith('Variable');
|
|
433
476
|
const shouldDrawColBorder = !isVariableCol && !colBorderDrawn;
|
|
434
477
|
if (shouldDrawColBorder) {
|
|
@@ -437,8 +480,8 @@ export default function ResultsTable({
|
|
|
437
480
|
const shouldDrawRowBorder = rowIndex === 0 && !isVariableCol;
|
|
438
481
|
return (
|
|
439
482
|
<td
|
|
483
|
+
key={cell.id}
|
|
440
484
|
{...{
|
|
441
|
-
key: cell.id,
|
|
442
485
|
style: {
|
|
443
486
|
width: cell.column.getSize(),
|
|
444
487
|
},
|
|
@@ -21,13 +21,13 @@ import ShareIcon from '@mui/icons-material/Share';
|
|
|
21
21
|
import VisibilityIcon from '@mui/icons-material/Visibility';
|
|
22
22
|
import { styled } from '@mui/system';
|
|
23
23
|
|
|
24
|
-
import ResultsTable from './ResultsTable
|
|
24
|
+
import ResultsTable from './ResultsTable';
|
|
25
25
|
import ConfigModal from './ConfigModal';
|
|
26
26
|
import ShareModal from './ShareModal';
|
|
27
|
-
import { useStore } from './store
|
|
27
|
+
import { useStore } from './store';
|
|
28
28
|
|
|
29
29
|
import type { VisibilityState } from '@tanstack/table-core';
|
|
30
|
-
import type { FilterMode } from './types
|
|
30
|
+
import type { FilterMode } from './types';
|
|
31
31
|
|
|
32
32
|
const ResponsiveStack = styled(Stack)(({ theme }) => ({
|
|
33
33
|
maxWidth: '100%',
|
|
@@ -171,7 +171,7 @@ export default function ResultsView({ recentFiles, onRecentFileSelected }: Resul
|
|
|
171
171
|
}, [head]);
|
|
172
172
|
|
|
173
173
|
return (
|
|
174
|
-
<div>
|
|
174
|
+
<div style={{ marginLeft: '1rem', marginRight: '1rem' }}>
|
|
175
175
|
<Paper py="md">
|
|
176
176
|
<ResponsiveStack direction="row" spacing={4} alignItems="center">
|
|
177
177
|
<Box>
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import React from 'react';
|
|
2
|
+
import { notFound } from 'next/navigation';
|
|
3
|
+
|
|
4
|
+
import Eval from '../Eval';
|
|
5
|
+
|
|
6
|
+
import './page.css';
|
|
7
|
+
|
|
8
|
+
export async function generateStaticParams() {
|
|
9
|
+
return [];
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export default async function Page({ params }: { params: { id: string } }) {
|
|
13
|
+
const response = await fetch(`https://api.promptfoo.dev/eval/${params.id}`);
|
|
14
|
+
if (!response.ok) {
|
|
15
|
+
notFound();
|
|
16
|
+
}
|
|
17
|
+
const data = await response.json();
|
|
18
|
+
|
|
19
|
+
return <Eval preloadedData={data} />;
|
|
20
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import type { Prompt, EvaluateTableOutput } from '../../../../../types';
|
|
2
|
+
|
|
3
|
+
export type EvalHead = {
|
|
4
|
+
prompts: Prompt[];
|
|
5
|
+
vars: string[];
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
export type EvalRow = {
|
|
9
|
+
outputs: EvaluateTableOutput[];
|
|
10
|
+
vars: string[]; // model outputs
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
export type EvalTable = {
|
|
14
|
+
head: EvalHead;
|
|
15
|
+
body: EvalRow[];
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
export type FilterMode = 'all' | 'failures' | 'different';
|
|
19
|
+
|
|
20
|
+
export * from '../../../../../types';
|