promptfoo 0.81.0 → 0.81.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package.json +11 -1
- package/dist/src/assertions.d.ts.map +1 -1
- package/dist/src/assertions.js +1 -1
- package/dist/src/assertions.js.map +1 -1
- package/dist/src/cliState.d.ts +2 -0
- package/dist/src/cliState.d.ts.map +1 -1
- package/dist/src/cliState.js.map +1 -1
- package/dist/src/commands/eval.d.ts.map +1 -1
- package/dist/src/commands/eval.js +2 -0
- package/dist/src/commands/eval.js.map +1 -1
- package/dist/src/commands/generate/redteam.d.ts.map +1 -1
- package/dist/src/commands/generate/redteam.js +14 -8
- package/dist/src/commands/generate/redteam.js.map +1 -1
- package/dist/src/commands/redteam.d.ts.map +1 -1
- package/dist/src/commands/redteam.js +66 -16
- package/dist/src/commands/redteam.js.map +1 -1
- package/dist/src/config.d.ts.map +1 -1
- package/dist/src/config.js +3 -1
- package/dist/src/config.js.map +1 -1
- package/dist/src/database/tables.d.ts +1 -0
- package/dist/src/database/tables.d.ts.map +1 -1
- package/dist/src/providers.d.ts.map +1 -1
- package/dist/src/providers.js +3 -1
- package/dist/src/providers.js.map +1 -1
- package/dist/src/redteam/constants.d.ts +1 -1
- package/dist/src/redteam/constants.d.ts.map +1 -1
- package/dist/src/redteam/constants.js +2 -0
- package/dist/src/redteam/constants.js.map +1 -1
- package/dist/src/redteam/graders.d.ts.map +1 -1
- package/dist/src/redteam/graders.js +2 -0
- package/dist/src/redteam/graders.js.map +1 -1
- package/dist/src/redteam/index.d.ts +6 -0
- package/dist/src/redteam/index.d.ts.map +1 -1
- package/dist/src/redteam/index.js +81 -20
- package/dist/src/redteam/index.js.map +1 -1
- package/dist/src/redteam/plugins/base.d.ts +19 -2
- package/dist/src/redteam/plugins/base.d.ts.map +1 -1
- package/dist/src/redteam/plugins/base.js +34 -8
- package/dist/src/redteam/plugins/base.js.map +1 -1
- package/dist/src/redteam/plugins/custom.d.ts +12 -0
- package/dist/src/redteam/plugins/custom.d.ts.map +1 -0
- package/dist/src/redteam/plugins/custom.js +23 -0
- package/dist/src/redteam/plugins/custom.js.map +1 -0
- package/dist/src/redteam/plugins/harmful.d.ts.map +1 -1
- package/dist/src/redteam/plugins/harmful.js +58 -45
- package/dist/src/redteam/plugins/harmful.js.map +1 -1
- package/dist/src/redteam/plugins/index.d.ts.map +1 -1
- package/dist/src/redteam/plugins/index.js +5 -0
- package/dist/src/redteam/plugins/index.js.map +1 -1
- package/dist/src/redteam/plugins/indirectPromptInjection.d.ts +26 -0
- package/dist/src/redteam/plugins/indirectPromptInjection.d.ts.map +1 -0
- package/dist/src/redteam/plugins/indirectPromptInjection.js +96 -0
- package/dist/src/redteam/plugins/indirectPromptInjection.js.map +1 -0
- package/dist/src/redteam/plugins/pii.d.ts.map +1 -1
- package/dist/src/redteam/plugins/pii.js +13 -8
- package/dist/src/redteam/plugins/pii.js.map +1 -1
- package/dist/src/redteam/plugins/policy.d.ts.map +1 -1
- package/dist/src/redteam/plugins/policy.js +1 -1
- package/dist/src/redteam/plugins/policy.js.map +1 -1
- package/dist/src/redteam/plugins/promptExtraction.d.ts.map +1 -1
- package/dist/src/redteam/plugins/promptExtraction.js +1 -1
- package/dist/src/redteam/plugins/promptExtraction.js.map +1 -1
- package/dist/src/redteam/plugins/rbac.d.ts.map +1 -1
- package/dist/src/redteam/plugins/rbac.js +10 -13
- package/dist/src/redteam/plugins/rbac.js.map +1 -1
- package/dist/src/redteam/providers/crescendo/index.d.ts +7 -4
- package/dist/src/redteam/providers/crescendo/index.d.ts.map +1 -1
- package/dist/src/redteam/providers/crescendo/index.js +25 -17
- package/dist/src/redteam/providers/crescendo/index.js.map +1 -1
- package/dist/src/redteam/providers/iterative.d.ts +2 -1
- package/dist/src/redteam/providers/iterative.d.ts.map +1 -1
- package/dist/src/redteam/providers/iterative.js +7 -12
- package/dist/src/redteam/providers/iterative.js.map +1 -1
- package/dist/src/redteam/providers/iterativeImage.d.ts +1 -0
- package/dist/src/redteam/providers/iterativeImage.d.ts.map +1 -1
- package/dist/src/redteam/providers/iterativeImage.js +12 -18
- package/dist/src/redteam/providers/iterativeImage.js.map +1 -1
- package/dist/src/redteam/providers/iterativeTree.d.ts +7 -14
- package/dist/src/redteam/providers/iterativeTree.d.ts.map +1 -1
- package/dist/src/redteam/providers/iterativeTree.js +9 -23
- package/dist/src/redteam/providers/iterativeTree.js.map +1 -1
- package/dist/src/redteam/providers/shared.d.ts +6 -0
- package/dist/src/redteam/providers/shared.d.ts.map +1 -0
- package/dist/src/redteam/providers/shared.js +63 -0
- package/dist/src/redteam/providers/shared.js.map +1 -0
- package/dist/src/redteam/util.d.ts +1 -0
- package/dist/src/redteam/util.d.ts.map +1 -1
- package/dist/src/redteam/util.js +8 -1
- package/dist/src/redteam/util.js.map +1 -1
- package/dist/src/types/index.d.ts +9 -0
- package/dist/src/types/index.d.ts.map +1 -1
- package/dist/src/types/index.js +4 -0
- package/dist/src/types/index.js.map +1 -1
- package/dist/src/types/providers.d.ts.map +1 -1
- package/dist/src/types/providers.js +5 -2
- package/dist/src/types/providers.js.map +1 -1
- package/dist/src/validators/redteam.d.ts +2 -2
- package/dist/src/validators/redteam.d.ts.map +1 -1
- package/dist/src/validators/redteam.js +7 -2
- package/dist/src/validators/redteam.js.map +1 -1
- package/dist/src/web/nextui/404/index.html +1 -1
- package/dist/src/web/nextui/404.html +1 -1
- package/dist/src/web/nextui/_next/static/chunks/{331-15dd9f932990d5bd.js → 19-cbf79a2b4aeaad20.js} +4 -4
- package/dist/src/web/nextui/_next/static/chunks/258-358dad65a69fa35d.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/{page-03861be8c5e5a085.js → page-67165aedfd3a64fc.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/{page-f70bebead3e6df20.js → page-cf4a91cf1554224c.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/app/{layout-389d33c82e892815.js → layout-d74de07e6e80f296.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/app/report/page-6dd357313bf638d1.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/main-app-0dc90092cd7acaaf.js +1 -0
- package/dist/src/web/nextui/auth/login/index.html +1 -1
- package/dist/src/web/nextui/auth/login/index.txt +2 -2
- package/dist/src/web/nextui/auth/signup/index.html +1 -1
- package/dist/src/web/nextui/auth/signup/index.txt +2 -2
- package/dist/src/web/nextui/datasets/index.html +1 -1
- package/dist/src/web/nextui/datasets/index.txt +2 -2
- package/dist/src/web/nextui/eval/index.html +1 -1
- package/dist/src/web/nextui/eval/index.txt +3 -3
- package/dist/src/web/nextui/index.html +1 -1
- package/dist/src/web/nextui/index.txt +2 -2
- package/dist/src/web/nextui/progress/index.html +1 -1
- package/dist/src/web/nextui/progress/index.txt +2 -2
- package/dist/src/web/nextui/prompts/index.html +1 -1
- package/dist/src/web/nextui/prompts/index.txt +2 -2
- package/dist/src/web/nextui/report/index.html +1 -1
- package/dist/src/web/nextui/report/index.txt +3 -3
- package/dist/src/web/nextui/setup/index.html +1 -1
- package/dist/src/web/nextui/setup/index.txt +2 -2
- package/dist/test/config.test.js +4 -0
- package/dist/test/config.test.js.map +1 -1
- package/dist/test/redteam/index.test.js +101 -12
- package/dist/test/redteam/index.test.js.map +1 -1
- package/dist/test/redteam/plugins/base.test.js +17 -13
- package/dist/test/redteam/plugins/base.test.js.map +1 -1
- package/dist/test/redteam/plugins/imitation.test.js +6 -6
- package/dist/test/redteam/plugins/imitation.test.js.map +1 -1
- package/dist/test/redteam/providers/shared.test.d.ts +2 -0
- package/dist/test/redteam/providers/shared.test.d.ts.map +1 -0
- package/dist/test/redteam/providers/shared.test.js +99 -0
- package/dist/test/redteam/providers/shared.test.js.map +1 -0
- package/package.json +11 -1
- package/dist/src/web/nextui/_next/static/chunks/258-b24b1292c6e41f6d.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/report/page-6915154f000de6f3.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/main-app-cf76b7c55faa5c9b.js +0 -1
- /package/dist/src/web/nextui/_next/static/{-GVTgHf1ZK9GxOyhXqerX → V2kgKrpK-FTH5Ywn9fzkB}/_buildManifest.js +0 -0
- /package/dist/src/web/nextui/_next/static/{-GVTgHf1ZK9GxOyhXqerX → V2kgKrpK-FTH5Ywn9fzkB}/_ssgManifest.js +0 -0
|
@@ -1 +0,0 @@
|
|
|
1
|
-
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[258],{49532:function(e,t,n){"use strict";let s,l;async function a(){if(s||(l||(l=fetch("/api/config").then(e=>e.json()).then(e=>s=e.apiBaseUrl)),await l),void 0===s)throw Error("API base URL is undefined");return s}n.d(t,{b:function(){return a}})},55258:function(e,t,n){"use strict";n.r(t),n.d(t,{default:function(){return tc}});var s=n(24004),l=n(14978),a=n(77580);let r=a.env.NEXT_PUBLIC_PROMPTFOO_REMOTE_API_BASE_URL||a.env.NEXT_PUBLIC_PROMPTFOO_BASE_URL||a.env.PROMPTFOO_REMOTE_API_BASE_URL||"https://api.promptfoo.dev",i=a.env.NEXT_PUBLIC_PROMPTFOO_BASE_URL||a.env.PROMPTFOO_REMOTE_APP_BASE_URL||"https://app.promptfoo.dev";a?.stdout?.columns&&a?.stdout?.columns>10&&a?.stdout?.columns;var o=n(49532);let c=(0,l.createContext)(void 0),d=e=>{let{children:t}=e,[n,a]=(0,l.useState)(!1);return(0,l.useEffect)(()=>{let e=e=>{"Shift"===e.key&&a(!0)},t=e=>{"Shift"===e.key&&a(!1)};return window.addEventListener("keydown",e),window.addEventListener("keyup",t),()=>{window.removeEventListener("keydown",e),window.removeEventListener("keyup",t)}},[]),(0,s.jsx)(c.Provider,{value:n,children:t})};var u=n(18960),h=n(67070);let p=(0,l.createContext)(void 0),m=()=>{let e=(0,l.useContext)(p);if(!e)throw Error("useToast must be used within a ToastProvider");return e},x=e=>{let{children:t}=e,[n,a]=(0,l.useState)({message:"",severity:"info",open:!1}),r=()=>{a(e=>({...e,open:!1}))};return(0,s.jsxs)(p.Provider,{value:{showToast:function(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:"info";a({message:e,severity:t,open:!0})}},children:[t,(0,s.jsx)(u.Z,{open:n.open,autoHideDuration:6e3,onClose:r,children:(0,s.jsx)(h.Z,{onClose:r,severity:n.severity,children:n.message})})]})};var v=n(52428),g=n(6882),j=n(12594),f=n(28891),b=n(10804),y=n(93179),Z=n(47887),w=n(11116),C=n(46446),k=n(13391),S=n(98507),E=n(93295),N=n(72467),I=n(95198),R=n(49605),P=n(79245),D=n(49050),F=n(27760),T=n(47827),M=n(84081),O=n(81679),L=n(50819),A=n(8864),U=n(38212),W=n(33932),z=n(74309),V=n(90923),_=n(10654),B=n(13457),J=n(21975),H=n(78276),K=n(72303),Y=n(72053),X=n(94106),G=n(89394),q=n(42834),$=n(26337),Q=n(91797),ee=n(29872),et=n(73701),en=n(39279),es=n(30666),el=n(15795),ea=n(66988),er=n(98489),ei=n(43226),eo=n(32162),ec=n.n(eo),ed=e=>{let{open:t,onClose:n,recentEvals:a,onRecentEvalSelected:r,title:i,description:o}=e,[c,d]=(0,l.useState)(""),[u,h]=(0,l.useState)(-1),p=l.useRef(null),m=l.useRef(null),x=()=>{n(),d(""),h(-1)},v=a.filter(e=>ec()(c.toLowerCase(),e.label.toLowerCase())||"string"==typeof e.description&&ec()(c.toLowerCase(),e.description.toLowerCase())),g=e=>{r(e),x()},j=l.useCallback(()=>{if(u>=0&&m.current){let e=m.current.querySelectorAll("tbody tr"),t=Math.min(u+3,e.length-1);e[t]&&e[t].scrollIntoView({behavior:"smooth",block:"nearest"})}},[u]);l.useEffect(()=>{j()},[j]),l.useEffect(()=>{t&&(h(0),setTimeout(()=>{var e;null===(e=p.current)||void 0===e||e.focus()},0))},[t]);let f=l.useId();return(0,s.jsxs)(G.Z,{open:t,onClose:x,maxWidth:"md",fullWidth:!0,children:[i?(0,s.jsx)(Q.Z,{children:i}):null,(0,s.jsxs)($.Z,{children:[o?(0,s.jsx)(P.Z,{sx:{mb:4},children:o}):null,(0,s.jsxs)(P.Z,{sx:{width:"100%",mt:2},children:[(0,s.jsx)(J.Z,{fullWidth:!0,variant:"outlined",placeholder:"Search",value:c,onChange:e=>{d(e.target.value),h(0)},onKeyDown:e=>{if(e.stopPropagation(),t)switch(e.key){case"ArrowDown":e.preventDefault(),h(e=>Math.min(e+1,v.length-1));break;case"ArrowUp":e.preventDefault(),h(e=>Math.max(e-1,0));break;case"Enter":e.preventDefault(),u>=0&&u<v.length?g(v[u].evalId):v.length>0&&g(v[0].evalId);break;case"Escape":e.preventDefault(),x()}},sx:{mb:2},inputRef:p,id:"eval-selector-search-".concat(f)}),(0,s.jsx)(el.Z,{component:ee.Z,sx:{height:"600px",overflow:"auto"},ref:m,children:(0,s.jsxs)(et.Z,{stickyHeader:!0,children:[(0,s.jsx)(ea.Z,{children:(0,s.jsxs)(er.Z,{children:[(0,s.jsx)(es.Z,{children:"Created"}),(0,s.jsx)(es.Z,{children:"Description"}),(0,s.jsx)(es.Z,{children:"# Tests"})]})}),(0,s.jsx)(en.Z,{children:v.length>0?v.map((e,t)=>(0,s.jsxs)(er.Z,{hover:!0,onClick:()=>g(e.evalId),sx:{cursor:"pointer",backgroundColor:t===u?"rgba(255, 255, 0, 0.1)":"inherit"},children:[(0,s.jsx)(es.Z,{children:new Date(e.createdAt).toLocaleString()}),(0,s.jsx)(es.Z,{children:e.description||e.label}),(0,s.jsx)(es.Z,{children:e.numTests})]},e.evalId)):(0,s.jsx)(er.Z,{children:(0,s.jsx)(es.Z,{colSpan:3,align:"center",sx:{py:4},children:(0,s.jsxs)(P.Z,{sx:{textAlign:"center",color:"text.secondary"},children:[(0,s.jsx)(P.Z,{sx:{fontSize:"3rem",mb:2},children:"\uD83D\uDD0D"}),(0,s.jsx)(ei.Z,{variant:"h6",gutterBottom:!0,children:"No evaluations found"}),(0,s.jsx)(ei.Z,{variant:"body2",children:"Try adjusting your search or create a new evaluation"})]})})})})]})})]})]}),(0,s.jsx)(q.Z,{children:(0,s.jsx)(D.Z,{onClick:x,children:"Cancel"})})]})},eu=n(37204),eh=n(94660),ep=n(74810);let em={getItem:async e=>await (0,eu.U2)(e)||null,setItem:async(e,t)=>{await (0,eu.t8)(e,t)},removeItem:async e=>{await (0,eu.IV)(e)}},ex=(0,eh.Ue)()((0,ep.tJ)((e,t)=>({evalId:null,setEvalId:t=>e(()=>({evalId:t})),author:null,setAuthor:t=>e(()=>({author:t})),table:null,setTable:t=>e(()=>({table:t})),config:null,setConfig:t=>e(()=>({config:t})),maxTextLength:250,setMaxTextLength:t=>e(()=>({maxTextLength:t})),wordBreak:"break-word",setWordBreak:t=>e(()=>({wordBreak:t})),showInferenceDetails:!0,setShowInferenceDetails:t=>e(()=>({showInferenceDetails:t})),renderMarkdown:!1,setRenderMarkdown:t=>e(()=>({renderMarkdown:t})),prettifyJson:!1,setPrettifyJson:t=>e(()=>({prettifyJson:t})),showPrompts:!1,setShowPrompts:t=>e(()=>({showPrompts:t})),showPassFail:!0,setShowPassFail:t=>e(()=>({showPassFail:t})),inComparisonMode:!1,setInComparisonMode:t=>e(()=>({inComparisonMode:t})),columnStates:{},setColumnState:(t,n)=>e(e=>({columnStates:{...e.columnStates,[t]:n}}))}),{name:"ResultsViewStorage",storage:(0,ep.FL)(()=>em)}));var ev=function(e){let{initialEvals:t,onComparisonEvalSelected:n}=e,{evalId:a}=ex(),[r,i]=(0,l.useState)(!1),[c,d]=(0,l.useState)(t),u=async()=>{try{let e=await (0,o.b)(),n=a||t[0].evalId,s=await fetch("".concat(e,"/api/results/").concat(n),{cache:"no-store"}),l=await s.json(),r=l.data.datasetId;if(!r){console.error("No datasetId found for current eval "+n);return}let i=await fetch("".concat(e,"/api/results?datasetId=").concat(r),{cache:"no-store"}),c=await i.json();d(c.data.filter(e=>e.evalId!==n))}catch(e){console.error("Error fetching recent evals:",e)}},h=()=>{i(!1)};return(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)(H.Z,{title:"Combine this eval with another eval run",placement:"left",children:(0,s.jsxs)(z.Z,{onClick:()=>{u(),i(!0)},children:[(0,s.jsx)(A.Z,{children:(0,s.jsx)(X.Z,{fontSize:"small"})}),(0,s.jsx)(U.Z,{children:"Compare with another eval"})]})}),(0,s.jsx)(ed,{open:r,onClose:h,recentEvals:c,onRecentEvalSelected:e=>{n(e),h()},title:"Select an eval to compare",description:"Only evals with the same dataset can be compared."})]})},eg=n(54719),ej=n(52481),ef=n(7175),eb=n(52653);function ey(e){let{open:t,onClose:a}=e,{config:r}=ex(),i=l.useRef(null),[o,c]=l.useState(!1),[d,u]=l.useState("");l.useEffect(()=>{t&&(async()=>{let{default:e}=await Promise.resolve().then(n.bind(n,34235));u(e.dump(r))})()},[t,r]);let h=()=>{c(!1),a()};return(0,s.jsxs)(G.Z,{open:t,onClose:h,"aria-labelledby":"config-dialog-title",maxWidth:"md",fullWidth:!0,children:[(0,s.jsx)(Q.Z,{id:"config-dialog-title",children:(0,s.jsxs)(P.Z,{display:"flex",justifyContent:"space-between",alignItems:"center",children:[(0,s.jsx)(ei.Z,{variant:"h6",style:{flexGrow:1},children:"Config"}),(0,s.jsxs)(P.Z,{children:[(0,s.jsx)(H.Z,{title:"Copy to clipboard",children:(0,s.jsx)(eb.Z,{onClick:()=>{i.current&&(i.current.select(),document.execCommand("copy"),c(!0))},children:o?(0,s.jsx)(eg.Z,{}):(0,s.jsx)(ef.Z,{})})}),(0,s.jsx)(H.Z,{title:"Download .yaml",children:(0,s.jsx)(eb.Z,{onClick:()=>{let e=new Blob([d],{type:"text/yaml;charset=utf-8"}),t=URL.createObjectURL(e),n=document.createElement("a");n.href=t,n.download="config.yaml",document.body.appendChild(n),n.click(),document.body.removeChild(n),URL.revokeObjectURL(t)},children:(0,s.jsx)(ej.Z,{})})})]})]})}),(0,s.jsx)($.Z,{children:(0,s.jsx)(ei.Z,{variant:"body1",component:"div",children:(0,s.jsx)("textarea",{ref:i,readOnly:!0,value:d,style:{width:"100%",minHeight:"400px",fontFamily:"monospace",border:"1px solid #ccc"}})})}),(0,s.jsx)(q.Z,{children:(0,s.jsx)(D.Z,{onClick:h,color:"primary",children:"Close"})})]})}var eZ=n(54986),ew=n(79715),eC=n(34235),ek=function(){let{table:e,config:t,evalId:n}=ex(),[a,r]=l.useState(!1),i=(e,t)=>{let n=URL.createObjectURL(e),s=document.createElement("a");s.href=n,s.download=t,document.body.appendChild(s),s.click(),document.body.removeChild(s),URL.revokeObjectURL(n)},o=()=>{r(!1)},c=()=>{let e=eC.default.dump(t),n=new Blob([e],{type:"text/yaml;charset=utf-8"});i(n,"promptfooconfig.yaml"),o()},d=()=>{if(!e){alert("No table data");return}let t=e.body.map((t,n)=>({chosen:t.outputs.filter(e=>e.pass).map(e=>e.text),rejected:t.outputs.filter(e=>!e.pass).map(e=>e.text),vars:t.test.vars,providers:e.head.prompts.map(e=>e.provider),prompts:e.head.prompts.map(e=>e.label||e.display||e.raw)})),s=new Blob([JSON.stringify(t,null,2)],{type:"application/json"});i(s,"".concat(n,"-dpo.json")),o()},u=()=>{if(!e){alert("No table data");return}let t=new Blob([JSON.stringify(e,null,2)],{type:"application/json"});i(t,"".concat(n,"-table.json")),o()},h=()=>{if(!e){alert("No table data");return}let t=[],s=[...e.head.vars,...e.head.prompts.map(e=>"[".concat(e.provider,"] ").concat(e.label))];t.push(s),e.body.forEach(e=>{let n=[...e.vars,...e.outputs.map(e=>{let{pass:t,text:n}=e;return(t?"[PASS] ":"[FAIL] ")+n})];t.push(n)});let l=(0,ew.P)(t),a=new Blob([l],{type:"text/csv;charset=utf-8;"});i(a,"".concat(n,"-table.csv")),o()},p=()=>{if(!e){alert("No table data");return}let t=e.body.filter(e=>e.outputs.some(e=>null!==e.pass)).map(e=>({vars:{...e.test.vars,output:e.outputs[0].text},assert:{type:"javascript",value:"".concat(e.outputs[0].pass?"":"!","JSON.parse(output).pass")},metadata:e.test.metadata})),s=eC.default.dump(t),l=new Blob([s],{type:"application/x-yaml"});i(l,"".concat(n,"-human-eval-cases.yaml")),o()},m=l.useCallback(e=>{if("Escape"===e.key)o();else if(a&&!e.altKey&&!e.ctrlKey&&!e.metaKey)switch(e.key){case"1":c();break;case"2":h();break;case"3":u();break;case"4":d();break;case"5":p()}},[a]);return l.useEffect(()=>{let e=e=>{a&&m(e)};return document.addEventListener("keydown",e),()=>{document.removeEventListener("keydown",e)}},[m,a]),(0,s.jsxs)(s.Fragment,{children:[(0,s.jsxs)(z.Z,{onClick:()=>{r(!0)},children:[(0,s.jsx)(A.Z,{children:(0,s.jsx)(ej.Z,{fontSize:"small"})}),(0,s.jsx)(U.Z,{children:"Download"})]}),(0,s.jsxs)(G.Z,{onClose:o,open:a,onKeyDown:m,children:[(0,s.jsx)(Q.Z,{children:"Download Options"}),(0,s.jsx)($.Z,{children:(0,s.jsxs)(B.Z,{direction:"column",spacing:2,sx:{width:"100%"},children:[(0,s.jsx)(H.Z,{title:"Download the YAML configuration file",children:(0,s.jsx)(D.Z,{onClick:c,startIcon:(0,s.jsx)(ej.Z,{}),variant:"contained",color:"primary",fullWidth:!0,children:"Download YAML Config"})}),(0,s.jsx)(eZ.Z,{}),(0,s.jsx)(ei.Z,{variant:"subtitle1",children:"Table Data"}),(0,s.jsx)(H.Z,{title:"Download table data in CSV format",children:(0,s.jsx)(D.Z,{onClick:h,startIcon:(0,s.jsx)(ej.Z,{}),variant:"outlined",fullWidth:!0,children:"Download Table CSV"})}),(0,s.jsx)(H.Z,{title:"Download table data in JSON format",children:(0,s.jsx)(D.Z,{onClick:u,startIcon:(0,s.jsx)(ej.Z,{}),variant:"outlined",fullWidth:!0,children:"Download Table JSON"})}),(0,s.jsx)(eZ.Z,{}),(0,s.jsx)(ei.Z,{variant:"subtitle1",children:"Advanced Options"}),(0,s.jsx)(H.Z,{title:"Download Direct Preference Optimization JSON",children:(0,s.jsx)(D.Z,{onClick:d,startIcon:(0,s.jsx)(ej.Z,{}),variant:"outlined",color:"secondary",fullWidth:!0,children:"Download DPO JSON"})}),(0,s.jsx)(H.Z,{title:"Download Evaluation Test Cases in YAML format",children:(0,s.jsx)(D.Z,{onClick:p,startIcon:(0,s.jsx)(ej.Z,{}),variant:"outlined",color:"secondary",fullWidth:!0,children:"Download Human Eval Test YAML"})})]})})]})]})},eS=n(58991),eE=e=>{let{recentEvals:t,onRecentEvalSelected:n}=e,[a,r]=(0,l.useState)(!1),i="undefined"!=typeof navigator&&/Mac|iPod|iPhone|iPad/.test(navigator.userAgent),o=()=>{r(!0)};return l.useEffect(()=>{let e=e=>{(e.ctrlKey||e.metaKey)&&"k"===e.key&&(e.preventDefault(),o())};return window.addEventListener("keydown",e),()=>{window.removeEventListener("keydown",e)}},[]),(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)(H.Z,{title:i?"Search for Evals (⌘ + K)":"Search for Evals (Ctrl + K)",children:(0,s.jsx)(eb.Z,{onClick:o,size:"large",children:(0,s.jsx)(eS.Z,{})})}),(0,s.jsx)(ed,{title:"Open an Eval",open:a,onClose:()=>{r(!1)},recentEvals:t,onRecentEvalSelected:n})]})},eN=n(69179),eI=n(33533),eR=n(41101),eP=n(36001);let eD=["#fd7f6f","#7eb0d5","#b2e061","#bd7ebe","#ffb55a","#ffee65","#beb9db","#fdcce5","#8bd3c7"];function eF(e){let{table:t}=e,n=(0,l.useRef)(null),a=(0,l.useRef)(null);return(0,l.useEffect)(()=>{if(!n.current)return;a.current&&a.current.destroy();let e=t.body.flatMap(e=>e.outputs.map(e=>e.score)),s=Math.min(...e),l=(Math.ceil(Math.max(...e))-Math.floor(s))/10,r=Array.from({length:11},(e,t)=>Number.parseFloat((Math.floor(s)+t*l).toFixed(2))),i=t.head.prompts.map((e,n)=>{let s=t.body.flatMap(e=>e.outputs[n].score),a=r.map(e=>s.filter(t=>t>=e&&t<e+l).length);return{label:"Column ".concat(n+1),data:a,backgroundColor:eD[n%eD.length]}});a.current=new eP.kL(n.current,{type:"bar",data:{labels:r,datasets:i},options:{animation:!1,plugins:{title:{display:!0,text:"Score Distribution"},legend:{display:!1},tooltip:{callbacks:{title:function(e){let t=e[0].datasetIndex;return"Column ".concat(t+1)},label:function(e){let t=e.dataIndex,n=r[t],s=r[t+1];return s?"".concat(n," <= score < ").concat(s):"".concat(n," <= score")}}}}}})},[t]),(0,s.jsx)("canvas",{ref:n,style:{maxHeight:"300px"}})}function eT(e){let{table:t}=e,n=(0,l.useRef)(null),a=(0,l.useRef)(null);return(0,l.useEffect)(()=>{if(!n.current)return;a.current&&a.current.destroy();let e=t.head.prompts.map((e,n)=>{let s=t.body.flatMap(e=>e.outputs[n]),l=s.filter(e=>e.pass).length,a=l/s.length*100;return{label:"Column ".concat(n+1),data:[a],backgroundColor:eD[n%eD.length]}});a.current=new eP.kL(n.current,{type:"bar",data:{labels:["Pass Rate (%)"],datasets:e},options:{animation:!1,plugins:{title:{display:!0,text:"Pass rate"},legend:{display:!0}}}})},[t]),(0,s.jsx)("canvas",{ref:n,style:{maxHeight:"300px"}})}function eM(e){let{table:t}=e,n=(0,l.useRef)(null),a=(0,l.useRef)(null),[r,i]=(0,l.useState)(0),[o,c]=(0,l.useState)(1),[d,u]=(0,l.useState)(!1);return(0,l.useEffect)(()=>{if(!n.current)return;a.current&&a.current.destroy();let e=t.body.flatMap(e=>e.outputs.map(e=>e.score)),s=Math.min(...e),l=Math.max(...e),i=t.body.map(e=>{let t=e.outputs[r].score,n=e.outputs[o].score;return{x:t,y:n,backgroundColor:n>t?"green":n<t?"red":"gray"}});a.current=new eP.kL(n.current,{type:"scatter",data:{datasets:[{data:i,backgroundColor:i.map(e=>e.backgroundColor)},{type:"line",data:[{x:s,y:s},{x:l,y:l}],borderColor:"gray",borderWidth:1,borderDash:[5,5],pointRadius:0}]},options:{animation:!1,plugins:{legend:{display:!1},tooltip:{callbacks:{label:function(e){let n=t.body[e.dataIndex],s=n.outputs[0].text,l=n.outputs[1].text;return s.length>30&&(s=s.substring(0,30)+"..."),l.length>30&&(l=l.substring(0,30)+"..."),"Output 1: ".concat(s,"\nOutput 2: ").concat(l)}}}},scales:{x:{title:{display:!0,text:"Prompt ".concat(r+1," Score")},ticks:{callback:function(e,t,n){let s=String(Math.round(100*Number(e)));return t===n.length-1&&(s+="%"),s}}},y:{title:{display:!0,text:"Prompt ".concat(o+1," Score")},ticks:{callback:function(e,t,n){let s=String(Math.round(100*Number(e)));return t===n.length-1&&(s+="%"),s}}}}}})},[t,r,o]),(0,s.jsxs)(s.Fragment,{children:[(0,s.jsxs)(G.Z,{open:d,onClose:()=>u(!1),children:[(0,s.jsx)(Q.Z,{children:"Compare prompt outputs"}),(0,s.jsxs)($.Z,{children:[(0,s.jsx)(M.Z,{sx:{m:1,minWidth:120},children:(0,s.jsx)(_.Z,{value:r,onChange:e=>i(Number(e.target.value)),children:t.head.prompts.map((e,t)=>(0,s.jsxs)(z.Z,{value:t,children:["Prompt ",t+1]},t))})}),(0,s.jsx)(M.Z,{sx:{m:1,minWidth:120},children:(0,s.jsx)(_.Z,{value:o,onChange:e=>c(Number(e.target.value)),children:t.head.prompts.map((e,t)=>(0,s.jsxs)(z.Z,{value:t,children:["Prompt ",t+1]},t))})})]})]}),(0,s.jsx)("canvas",{ref:n,style:{maxHeight:"300px",cursor:"pointer"},onClick:()=>u(!0)})]})}function eO(e){let{table:t}=e,n=(0,l.useRef)(null),a=(0,l.useRef)(null);return(0,l.useEffect)(()=>{var e;if(!n.current)return;a.current&&a.current.destroy();let s=Object.keys((null===(e=t.head.prompts[0].metrics)||void 0===e?void 0:e.namedScores)||{}),l=t.head.prompts.map((e,n)=>{let l=s.map(n=>{var s;let l=(null===(s=e.metrics)||void 0===s?void 0:s.namedScores[n])||0,a=Math.max(...t.head.prompts.map(e=>{var t;return(null===(t=e.metrics)||void 0===t?void 0:t.namedScores[n])||0}));return l/a});return{label:"".concat(t.head.prompts[n].provider),data:l,backgroundColor:eD[n%eD.length]}});a.current=new eP.kL(n.current,{type:"bar",data:{labels:s,datasets:l},options:{scales:{x:{grid:{display:!1}},y:{ticks:{callback:function(e,t,n){let s=String(Math.round(100*Number(e)));return t===n.length-1&&(s+="%"),s}}}},plugins:{tooltip:{callbacks:{title:function(e){return e[0].dataset.label},label:function(e){let t=e.parsed.y;return"".concat(s[e.dataIndex],": ").concat((100*t).toFixed(2),"% pass rate")}}}}}})},[t]),(0,s.jsx)("canvas",{ref:n,style:{maxHeight:"300px"}})}eP.kL.register(eP.vn,eP.ST,eP.ho,eP.uw,eP.f$,eP.ZL,eP.jn,eP.od,eP.u,eP.wL);var eL=l.memo(function(e){var t;let{columnVisibility:n}=e,a=(0,eR.Z)();eP.kL.defaults.color="dark"===a.palette.mode?"#aaa":"#666";let[r,i]=(0,l.useState)(!0),{table:o}=ex();if(!o||!r||o.head.prompts.length<2)return null;let c=o.body.flatMap(e=>e.outputs.map(e=>e.score)),d=new Set(c);return 1===d.size?null:(0,s.jsx)(eN.SV,{fallback:null,children:(0,s.jsxs)(ee.Z,{sx:{position:"relative",padding:3,mt:2},children:[(0,s.jsx)(eb.Z,{style:{position:"absolute",right:0,top:0},onClick:()=>i(!1),children:(0,s.jsx)(eI.Z,{})}),(0,s.jsxs)("div",{style:{display:"flex",justifyContent:"space-between",width:"100%"},children:[(0,s.jsx)("div",{style:{width:"33%"},children:(0,s.jsx)(eT,{table:o})}),(0,s.jsx)("div",{style:{width:"33%"},children:d.size<=3&&Object.keys((null===(t=o.head.prompts[0].metrics)||void 0===t?void 0:t.namedScores)||{}).length>1?(0,s.jsx)(eO,{table:o}):(0,s.jsx)(eF,{table:o})}),(0,s.jsx)("div",{style:{width:"33%"},children:(0,s.jsx)(eM,{table:o})})]})]})})}),eA=n(7660),eU=n(3216),eW=n(5971);n(55974);var ez=e=>{let{lookup:t,metricTotals:n,onSearchTextChange:a}=e,[r,i]=l.useState(!1);if(!t||!Object.keys(t).length)return null;let o=Object.entries(t),c=r?o:o.slice(0,10);return(0,s.jsxs)("div",{className:"custom-metric-container",children:[c.map(e=>{let[t,l]=e;return t&&void 0!==l?(0,s.jsxs)("span",{onClick:()=>a&&a("metric=".concat(t,":")),className:a?"clickable":"",children:[t,":"," ",n&&n[t]?(0,s.jsxs)(s.Fragment,{children:[(l/n[t]*100).toFixed(2),"% (",l.toFixed(2),"/",n[t].toFixed(2),")"]}):l.toFixed(2)]},t):null}),o.length>10&&(0,s.jsx)("span",{className:"clickable",onClick:()=>i(!r),children:r?"Show less":"Show more..."})]})},eV=n(39329),e_=n(64748);function eB(e,t){return e.length<=t?e:e.slice(0,t)+"..."}function eJ(e){let{gradingResults:t}=e,[n,a]=(0,l.useState)({});if(!t)return null;let r=e=>{a(t=>({...t,[e]:!t[e]}))};return(0,s.jsxs)(P.Z,{mt:2,children:[(0,s.jsx)(ei.Z,{variant:"subtitle1",children:"Assertions"}),(0,s.jsx)(el.Z,{children:(0,s.jsxs)(et.Z,{children:[(0,s.jsx)(ea.Z,{children:(0,s.jsxs)(er.Z,{children:[(0,s.jsx)(es.Z,{style:{fontWeight:"bold"},children:"Pass"}),(0,s.jsx)(es.Z,{style:{fontWeight:"bold"},children:"Score"}),(0,s.jsx)(es.Z,{style:{fontWeight:"bold"},children:"Type"}),(0,s.jsx)(es.Z,{style:{fontWeight:"bold"},children:"Value"}),(0,s.jsx)(es.Z,{style:{fontWeight:"bold"},children:"Reason"})]})}),(0,s.jsx)(en.Z,{children:t.map((e,t)=>{var l,a,i;if(!e)return null;let o=(null===(l=e.assertion)||void 0===l?void 0:l.value)?"object"==typeof e.assertion.value?JSON.stringify(e.assertion.value,null,2):String(e.assertion.value):"-",c=eB(o,300),d=n[t]||!1;return(0,s.jsxs)(er.Z,{children:[(0,s.jsx)(es.Z,{children:e.pass?"✅":"❌"}),(0,s.jsx)(es.Z,{children:null===(a=e.score)||void 0===a?void 0:a.toFixed(2)}),(0,s.jsx)(es.Z,{children:(null===(i=e.assertion)||void 0===i?void 0:i.type)||""}),(0,s.jsx)(es.Z,{style:{whiteSpace:"pre-wrap",cursor:"pointer"},onClick:()=>r(t),children:d?o:c}),(0,s.jsx)(es.Z,{style:{whiteSpace:"pre-wrap"},children:e.reason})]},t)})})]})})]})}function eH(e){let{open:t,onClose:n,prompt:a,provider:r,output:i,gradingResults:o,metadata:c}=e,[d,u]=(0,l.useState)(!1),[h,p]=(0,l.useState)({});(0,l.useEffect)(()=>{u(!1)},[a]);let m=async e=>{await navigator.clipboard.writeText(e),u(!0)},x=e=>{p(t=>({...t,[e]:!t[e]}))};return(0,s.jsxs)(G.Z,{open:t,onClose:n,fullWidth:!0,maxWidth:"lg",children:[(0,s.jsxs)(Q.Z,{children:["Details",r&&": ".concat(r)]}),(0,s.jsxs)($.Z,{children:[(0,s.jsxs)(P.Z,{mb:2,children:[(0,s.jsx)(ei.Z,{variant:"subtitle1",style:{marginBottom:"1rem"},children:"Prompt"}),(0,s.jsx)(e_.Z,{readOnly:!0,value:a,style:{width:"100%",padding:"0.75rem"},maxRows:20}),(0,s.jsx)(eb.Z,{onClick:()=>m(a),style:{position:"absolute",right:"10px",top:"10px"},children:d?(0,s.jsx)(eg.Z,{}):(0,s.jsx)(eV.Z,{})})]}),(null==c?void 0:c.redteamFinalPrompt)&&(0,s.jsxs)(P.Z,{my:2,children:[(0,s.jsx)(ei.Z,{variant:"subtitle1",style:{marginBottom:"1rem",marginTop:"1rem"},children:"Modified User Input (Red Team)"}),(0,s.jsx)(e_.Z,{readOnly:!0,maxRows:20,value:c.redteamFinalPrompt,style:{width:"100%",padding:"0.75rem"}})]}),i&&(0,s.jsxs)(P.Z,{my:2,children:[(0,s.jsx)(ei.Z,{variant:"subtitle1",style:{marginBottom:"1rem",marginTop:"1rem"},children:"Output"}),(0,s.jsx)(e_.Z,{readOnly:!0,maxRows:20,value:i,style:{width:"100%",padding:"0.75rem"}})]}),(0,s.jsx)(eJ,{gradingResults:o}),c&&Object.keys(c).length>0&&(0,s.jsxs)(P.Z,{my:2,children:[(0,s.jsx)(ei.Z,{variant:"subtitle1",style:{marginBottom:"1rem",marginTop:"1rem"},children:"Metadata"}),(0,s.jsx)(el.Z,{children:(0,s.jsxs)(et.Z,{size:"small",children:[(0,s.jsx)(ea.Z,{children:(0,s.jsxs)(er.Z,{children:[(0,s.jsx)(es.Z,{children:(0,s.jsx)("strong",{children:"Key"})}),(0,s.jsx)(es.Z,{children:(0,s.jsx)("strong",{children:"Value"})})]})}),(0,s.jsx)(en.Z,{children:Object.entries(c).map(e=>{let[t,n]=e,l="string"==typeof n?n:JSON.stringify(n),a=eB(l,300),r=h[t]||!1;return(0,s.jsxs)(er.Z,{children:[(0,s.jsx)(es.Z,{children:t}),(0,s.jsx)(es.Z,{style:{whiteSpace:"pre-wrap",cursor:"pointer"},onClick:()=>x(t),children:r?l:a})]},t)})})]})})]})]}),(0,s.jsx)(q.Z,{children:(0,s.jsx)(D.Z,{onClick:n,children:"Close"})})]})}var eK=n(89396),eY=n(64173),eX=function(){let e=(0,eR.Z)(),[t,n]=l.useState(!1),a=()=>{n(!1)};return(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)("div",{style:{textAlign:"center",marginTop:20,marginBottom:40},children:(0,s.jsx)(D.Z,{variant:"text",color:"primary",startIcon:(0,s.jsx)(eK.Z,{}),onClick:()=>{n(!0)},children:"Generate test cases"})}),(0,s.jsxs)(G.Z,{open:t,onClose:a,children:[(0,s.jsx)(Q.Z,{children:"Run on Command Line"}),(0,s.jsx)($.Z,{children:(0,s.jsxs)(eY.Z,{children:[(0,s.jsx)("p",{children:"This feature is in beta. UI coming soon."}),(0,s.jsxs)("p",{children:["Run"," ",(0,s.jsx)(P.Z,{component:"code",sx:{backgroundColor:"dark"===e.palette.mode?"#424242":"#f0f0f0",padding:"2px 4px",borderRadius:"4px"},children:"promptfoo generate dataset"}),"to generate test cases on the command line."]})]})}),(0,s.jsx)(q.Z,{children:(0,s.jsx)(D.Z,{onClick:a,color:"primary",children:"Close"})})]})]})};function eG(e){return"string"==typeof e||"number"==typeof e?e.toString().length:Array.isArray(e)?e.reduce((e,t)=>e+eG(t),0):l.isValidElement(e)&&e.props.children?l.Children.toArray(e.props.children).reduce((e,t)=>e+eG(t),0):0}let eq=l.memo(function(e){let t,{text:n,maxLength:a}=e,[r,i]=l.useState(!0),o=()=>{i(!r)},c=function(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:0;if("string"==typeof e||"number"==typeof e){let n=e.toString();return n.slice(0,a-t)}if(Array.isArray(e)){let n=[],s=t;for(let t of e){let e=eG(t);if(s+e>a){n.push(c(t,s));break}n.push(t),s+=e}return n}if(l.isValidElement(e)&&e.props.children){let n=eG(e.props.children);if(n>a-t)return l.cloneElement(e,{...e.props,children:c(e.props.children,t)})}return e};t=l.isValidElement(n)||"string"==typeof n?n:JSON.stringify(n);let d=r?c(t):t,u=eG(t)>a;return(0,s.jsxs)("div",{style:{cursor:u?"pointer":"normal"},onMouseDown:e=>{let t=e.clientX,n=e.clientY,s=e=>{let l=e.clientX,a=e.clientY;5>Math.abs(l-t)&&5>Math.abs(a-n)&&o(),document.removeEventListener("mouseup",s)};document.addEventListener("mouseup",s)},children:[d,r&&eG(t)>a&&(0,s.jsx)("span",{children:"..."})]})});var e$=n(45391),eQ=n(40182),e0=n(38640),e1=n.n(e0),e2=n(33352),e3=n(15014),e5=n(65101);n(16658);var e4=e=>{let{failReasons:t}=e,[n,a]=(0,l.useState)(0);return t.length<1?null:(0,s.jsxs)("div",{className:"fail-reason",children:[t.length>1&&(0,s.jsxs)("span",{className:"fail-reason-carousel-controls",children:[(0,s.jsx)(eb.Z,{onClick:()=>{a(e=>e>0?e-1:t.length-1)},children:(0,s.jsx)(e3.Z,{sx:{fontSize:12}})}),(0,s.jsxs)("span",{children:[n+1,"/",t.length]}),(0,s.jsx)(eb.Z,{onClick:()=>{a(e=>e<t.length-1?e+1:0)},children:(0,s.jsx)(e5.Z,{sx:{fontSize:12}})})]}),t[n].trim().split("\n").map((e,t)=>(0,s.jsxs)(l.Fragment,{children:[e,(0,s.jsx)("br",{})]},t))]})},e8=e=>{let{open:t,contextText:n,commentText:l,onClose:a,onSave:r,onChange:i}=e,o="dark"===(0,eR.Z)().palette.mode;return(0,s.jsxs)(G.Z,{open:t,onClose:a,fullWidth:!0,maxWidth:"sm",children:[(0,s.jsx)(Q.Z,{children:"Edit Comment"}),(0,s.jsxs)($.Z,{children:[(0,s.jsx)(P.Z,{sx:{backgroundColor:o?"#1e1e1e":"#f0f0f0",padding:2,marginBottom:2},children:n}),(0,s.jsx)(J.Z,{autoFocus:!0,margin:"dense",type:"text",fullWidth:!0,multiline:!0,rows:4,value:l,onChange:e=>i(e.target.value)})]}),(0,s.jsxs)(q.Z,{children:[(0,s.jsx)(D.Z,{onClick:r,color:"primary",variant:"contained",children:"Save"}),(0,s.jsx)(D.Z,{onClick:a,color:"primary",children:"Cancel"})]})]})};let e7=()=>{let e=(0,l.useContext)(c);if(void 0===e)throw Error("useShiftKey must be used within a ShiftKeyProvider");return e};var e9=n(68192);let e6=l.memo(function(e){var t,n,a,r,i,o,c,d,u,h,p,m,x;let v,g,j,f,b,y,{output:Z,maxTextLength:w,rowIndex:C,promptIndex:k,onRating:S,firstOutput:E,showDiffs:N,searchText:I,showStats:R}=e,{renderMarkdown:P,prettifyJson:D,showPrompts:F,showPassFail:T}=ex(),[M,O]=l.useState(!1),[L,A]=l.useState(!1),[U,W]=l.useState(null),z=e=>{W(e||null),A(!L)},[V,_]=l.useState(!1),[B,J]=l.useState((null===(t=Z.gradingResult)||void 0===t?void 0:t.comment)||""),K=()=>{_(!0)},Y="string"==typeof Z.text?Z.text:JSON.stringify(Z.text),X=[];if(!Z.pass&&Y.includes("---")&&(X=((null===(h=Z.gradingResult)||void 0===h?void 0:h.componentResults)||[]).filter(e=>!!e&&!e.pass).map(e=>e.reason),Y=Y.split("---").slice(1).join("---")),N&&E){let e,t="string"==typeof E.text?E.text:JSON.stringify(E.text);t.includes("---")&&(t=t.split("---").slice(1).join("---"));try{JSON.parse(t),JSON.parse(Y),e=(0,e9.CT)(t,Y)}catch(n){e=t.includes(". ")&&Y.includes(". ")?(0,e9.SY)(t,Y):(0,e9.NV)(t,Y)}v=(0,s.jsx)(s.Fragment,{children:e.map((e,t)=>e.added?(0,s.jsx)("ins",{children:e.value},t):e.removed?(0,s.jsx)("del",{children:e.value},t):(0,s.jsx)("span",{children:e.value},t))})}if(I)try{let e;let t=RegExp(I,"gi"),n=[];for(;null!==(e=t.exec(Y));)n.push({start:e.index,end:t.lastIndex});v=(0,s.jsx)(s.Fragment,{children:n.length>0?(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)("span",{children:Y.substring(0,n[0].start)},"text-before"),n.map((e,t)=>(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)("span",{className:"search-highlight",children:Y.substring(e.start,e.end)},"match-"+t),(0,s.jsx)("span",{children:Y.substring(e.end,n[t+1]?n[t+1].start:Y.length)},"text-after-"+t)]}))]}):(0,s.jsx)("span",{children:Y},"no-match")})}catch(e){console.error("Invalid regular expression:",e.message)}else if(P&&!N)v=(0,s.jsx)(eW.U,{remarkPlugins:[e2.Z],components:{img:e=>{let{src:t,alt:n}=e;return(0,s.jsx)("img",{loading:"lazy",src:t,alt:n,onClick:()=>z(t),style:{cursor:"pointer"}})}},children:Y});else if(D)try{v=(0,s.jsx)("pre",{children:JSON.stringify(JSON.parse(Y),null,2)})}catch(e){}let G=l.useCallback(e=>{var t;S(e,void 0,null===(t=Z.gradingResult)||void 0===t?void 0:t.comment)},[S,null===(n=Z.gradingResult)||void 0===n?void 0:n.comment]),q=l.useCallback(()=>{let e=prompt("Set test score (0.0 - 1.0):",String(Z.score));if(null!==e){let n=Number.parseFloat(e);if(!Number.isNaN(n)&&n>=0&&n<=1){var t;S(void 0,n,null===(t=Z.gradingResult)||void 0===t?void 0:t.comment)}else alert("Invalid score. Please enter a value between 0.0 and 1.0.")}},[S,Z.score,null===(a=Z.gradingResult)||void 0===a?void 0:a.comment]),[$,Q]=l.useState(!1),ee=l.useCallback(()=>{navigator.clipboard.writeText(Z.text),Q(!0)},[Z.text]);if(Z.latencyMs&&(j=(0,s.jsxs)("span",{children:[Intl.NumberFormat(void 0,{maximumFractionDigits:0}).format(Z.latencyMs)," ms"]})),null===(r=Z.tokenUsage)||void 0===r?void 0:r.completion){let e=Z.tokenUsage.completion/(Z.latencyMs/1e3);f=(0,s.jsx)("span",{children:Intl.NumberFormat(void 0,{maximumFractionDigits:0}).format(e)})}if(Z.cost&&(b=(0,s.jsxs)("span",{children:["$",Z.cost.toPrecision(2)]})),null===(i=Z.tokenUsage)||void 0===i?void 0:i.cached)g=(0,s.jsxs)("span",{children:[Intl.NumberFormat(void 0,{maximumFractionDigits:0}).format(Z.tokenUsage.cached)," ","(cached)"]});else if(null===(o=Z.tokenUsage)||void 0===o?void 0:o.total){let e=Intl.NumberFormat(void 0,{maximumFractionDigits:0}).format(null!==(p=Z.tokenUsage.prompt)&&void 0!==p?p:0),t=Intl.NumberFormat(void 0,{maximumFractionDigits:0}).format(null!==(m=Z.tokenUsage.completion)&&void 0!==m?m:0),n=Intl.NumberFormat(void 0,{maximumFractionDigits:0}).format(Z.tokenUsage.total);g=(0,s.jsx)(H.Z,{title:"".concat(e," prompt tokens + ").concat(t," completion tokens = ").concat(n," total"),children:(0,s.jsxs)("span",{children:[n,("0"!==e||"0"!==t)&&" (".concat(e,"+").concat(t,")")]})})}let et=(null===(c=Z.gradingResult)||void 0===c?void 0:c.comment)&&"!highlight"!==Z.gradingResult.comment?(0,s.jsx)("div",{className:"comment",onClick:K,children:Z.gradingResult.comment}):null,en=R?(0,s.jsxs)("div",{className:"cell-detail",children:[g&&(0,s.jsxs)("div",{className:"stat-item",children:[(0,s.jsx)("strong",{children:"Tokens:"})," ",g]}),j&&(0,s.jsxs)("div",{className:"stat-item",children:[(0,s.jsx)("strong",{children:"Latency:"})," ",j]}),f&&(0,s.jsxs)("div",{className:"stat-item",children:[(0,s.jsx)("strong",{children:"Tokens/Sec:"})," ",f]}),b&&(0,s.jsxs)("div",{className:"stat-item",children:[(0,s.jsx)("strong",{children:"Cost:"})," ",b]})]}):null,es=e7(),el=(0,s.jsxs)("div",{className:"cell-actions",children:[es&&(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)("span",{className:"action",onClick:ee,onMouseDown:e=>e.preventDefault(),children:(0,s.jsx)(H.Z,{title:"Copy output to clipboard",children:(0,s.jsx)("span",{children:$?"✅":"\uD83D\uDCCB"})})}),(0,s.jsx)("span",{className:"action",onClick:()=>{let e;B.startsWith("!highlight")?S(void 0,void 0,e=B.slice(10).trim()):S(void 0,void 0,e=("!highlight "+B).trim()),J(e)},onMouseDown:e=>e.preventDefault(),children:(0,s.jsx)(H.Z,{title:"Toggle test highlight",children:(0,s.jsx)("span",{children:"\uD83C\uDF1F"})})})]}),Z.prompt&&(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)("span",{className:"action",onClick:()=>{O(!0)},children:(0,s.jsx)(H.Z,{title:"View output and test details",children:(0,s.jsx)("span",{children:"\uD83D\uDD0E"})})}),(0,s.jsx)(eH,{open:M,onClose:()=>{O(!1)},prompt:Z.prompt,provider:Z.provider,gradingResults:null===(d=Z.gradingResult)||void 0===d?void 0:d.componentResults,output:Y,metadata:Z.metadata})]}),(0,s.jsx)("span",{className:"action",onClick:()=>G(!0),children:(0,s.jsx)(H.Z,{title:"Mark test passed (score 1.0)",children:(0,s.jsx)("span",{children:"\uD83D\uDC4D"})})}),(0,s.jsx)("span",{className:"action",onClick:()=>G(!1),children:(0,s.jsx)(H.Z,{title:"Mark test failed (score 0.0)",children:(0,s.jsx)("span",{children:"\uD83D\uDC4E"})})}),(0,s.jsx)("span",{className:"action",onClick:q,children:(0,s.jsx)(H.Z,{title:"Set test score",children:(0,s.jsx)("span",{children:"\uD83D\uDD22"})})}),(0,s.jsx)("span",{className:"action",onClick:K,children:(0,s.jsx)(H.Z,{title:"Edit comment",children:(0,s.jsx)("span",{children:"✏️"})})})]}),ea={};(null===(u=Z.gradingResult)||void 0===u?void 0:u.comment)==="!highlight"&&(ea.backgroundColor="#ffffeb");let er=0,ei=0,eo=Z.gradingResult;if(eo?eo.componentResults?eo.componentResults.forEach(e=>{(null==e?void 0:e.pass)?er++:ei++}):(er=eo.pass?1:0,ei=eo.pass?0:1):Z.pass?er=1:Z.pass||(ei=1),1===ei&&1===er)y=(0,s.jsxs)(s.Fragment,{children:["".concat(ei," FAIL")," ","".concat(er," PASS")]});else{let e="";ei>1||er>1&&ei>0?e="".concat(ei," FAIL"):1===ei&&(e="FAIL");let t="";er>1||ei>1&&er>0?t="".concat(er," PASS"):1===er&&0===ei&&(t="PASS");let n=e&&t?" ":"";y=(0,s.jsxs)(s.Fragment,{children:[e,n,t]})}let ec=null===(x=Z.score)||0===x||1===x?"":"(".concat(x.toFixed(2),")");return(0,s.jsxs)("div",{className:"cell",style:ea,children:[T&&(0,s.jsx)(s.Fragment,{children:Z.pass?(0,s.jsx)(s.Fragment,{children:(0,s.jsxs)("div",{className:"status pass",children:[(0,s.jsxs)("div",{className:"pill",children:[y,ec&&(0,s.jsxs)("span",{className:"score",children:[" ",ec]})]}),(0,s.jsx)(ez,{lookup:Z.namedScores})]})}):(0,s.jsx)(s.Fragment,{children:(0,s.jsxs)("div",{className:"status fail",children:[(0,s.jsxs)("div",{className:"pill",children:[y,ec&&(0,s.jsxs)("span",{className:"score",children:[" ",ec]})]}),(0,s.jsx)(ez,{lookup:Z.namedScores}),(0,s.jsx)("span",{className:"fail-reason",children:(0,s.jsx)(e4,{failReasons:X})})]})})}),F&&E.prompt&&(0,s.jsxs)("div",{className:"prompt",children:[(0,s.jsx)("span",{className:"pill",children:"Prompt"}),Z.prompt]}),(0,s.jsx)(eq,{text:v||Y,maxLength:w}),et,en,el,L&&U&&(0,s.jsx)("div",{className:"lightbox",onClick:()=>z(),children:(0,s.jsx)("img",{src:U,alt:"Lightbox"})}),(0,s.jsx)(e8,{open:V,contextText:Z.text,commentText:B,onClose:()=>{_(!1)},onSave:()=>{S(void 0,void 0,B),_(!1)},onChange:J})]})});function te(e){let{text:t,maxLength:n,expandedText:a,resourceId:r,className:i}=e,[o,c]=l.useState(!1);return(0,s.jsxs)("div",{className:"".concat(i||""),children:[(0,s.jsx)(eq,{text:t,maxLength:n}),a&&(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)(H.Z,{title:"View prompt",children:(0,s.jsx)("span",{className:"action",onClick:()=>{c(!0)},children:"\uD83D\uDD0E"})}),(0,s.jsx)(eH,{open:o,onClose:()=>{c(!1)},prompt:a}),r&&(0,s.jsx)(H.Z,{title:"View other evals and datasets for this prompt",children:(0,s.jsx)("span",{className:"action",children:(0,s.jsx)(e1(),{href:"/prompts/?id=".concat(r),target:"_blank",children:(0,s.jsx)(e$.Z,{fontSize:"small"})})})})]})]})}n(93207);var tt=l.memo(function(e){let{maxTextLength:t,columnVisibility:n,wordBreak:a,filterMode:r,failureFilter:i,searchText:c,showStats:d,onFailureFilterToggle:u,onSearchTextChange:h}=e,{evalId:p,table:x,setTable:v,config:g,inComparisonMode:j}=ex(),{showToast:f}=m();(0,y.Z)(x,"Table should be defined");let{head:b,body:Z}=x,w=l.useCallback(async(e,t,n,s,l)=>{var a,r;let i=[...Z],c={...i[e]},d=[...c.outputs],u=null!=n?n:d[t].pass,h=void 0===s?n?1:0:s||0;d[t].pass=u,d[t].score=h;let m=(null===(a=d[t].gradingResult)||void 0===a?void 0:a.componentResults)||[];if(void 0!==n){let e=m.findIndex(e=>{var t;return(null===(t=e.assertion)||void 0===t?void 0:t.type)==="human"}),t={pass:u,score:h,reason:"Manual result (overrides all other grading results)",comment:l,assertion:{type:"human"}};-1===e?m.push(t):m[e]=t}let x={...d[t].gradingResult||{},pass:u,score:h,reason:"Manual result (overrides all other grading results)",comment:l,assertion:(null===(r=d[t].gradingResult)||void 0===r?void 0:r.assertion)||null,componentResults:m};d[t].gradingResult=x,c.outputs=d,i[e]=c;let g={head:b,body:i};if(v(g),j)f("Ratings are not saved in comparison mode","warning");else try{let e=await fetch("".concat(await (0,o.b)(),"/api/eval/").concat(p),{method:"PATCH",headers:{"Content-Type":"application/json"},body:JSON.stringify({table:g})});if(!e.ok)throw Error("Network response was not ok")}catch(e){console.error("Failed to update table:",e)}},[Z,b,v,p,j,f]),C=Object.keys(n).length>0,k=l.useMemo(()=>{try{return RegExp(c,"i")}catch(e){return console.error("Invalid regular expression:",e.message),null}},[c]),S=l.useMemo(()=>{try{return Z.map((e,t)=>({...e,outputs:e.outputs.map((e,n)=>({...e,originalRowIndex:t,originalPromptIndex:n}))})).filter(e=>{let t=!0;return"failures"===r?t=e.outputs.some((e,t)=>{let s="Prompt ".concat(t+1);return i[s]&&!e.pass&&(!C||n[s])}):"different"===r?t=!e.outputs.every(t=>t.text===e.outputs[0].text):"highlights"===r&&(console.log(e.outputs[0].text),t=e.outputs.some(e=>{var t,n;return null===(n=e.gradingResult)||void 0===n?void 0:null===(t=n.comment)||void 0===t?void 0:t.startsWith("!highlight")})),!!t&&(!c||!k||e.outputs.some(t=>{var n,s;let l=e.vars.map(e=>"var=".concat(e)).join(" "),a="".concat(t.text," ").concat(Object.keys(t.namedScores).map(e=>"metric=".concat(e,":").concat(t.namedScores[e])).join(" ")," ").concat((null===(n=t.gradingResult)||void 0===n?void 0:n.reason)||""," ").concat((null===(s=t.gradingResult)||void 0===s?void 0:s.comment)||""),r="".concat(l," ").concat(a);return k.test(r)}))})}catch(e){return console.error("Invalid regular expression:",e.message),Z}},[Z,i,r,c,n,C,k]),[E,N]=l.useState({pageIndex:0,pageSize:50});l.useEffect(()=>{N(e=>({...e,pageIndex:0}))},[i,r,c]);let I=l.useMemo(()=>b.prompts.map((e,t)=>Z.reduce((e,n)=>e+(n.outputs[t].pass?1:0),0)),[b.prompts,Z]),R=l.useMemo(()=>b.prompts.map((e,t)=>Z.reduce((e,n)=>{var s,l;return e+((null===(l=n.outputs[t].gradingResult)||void 0===l?void 0:null===(s=l.componentResults)||void 0===s?void 0:s.length)||0)},0)),[b.prompts,Z]),T=l.useMemo(()=>b.prompts.map((e,t)=>Z.reduce((e,n)=>{var s;let l=null===(s=n.outputs[t].gradingResult)||void 0===s?void 0:s.componentResults;return e+(l?l.filter(e=>null==e?void 0:e.pass).length:0)},0)),[b.prompts,Z]),M=l.useMemo(()=>I.reduce((e,t,n,s)=>t>s[e]?n:e,0),[I]),O=I[M],L=l.useMemo(()=>(0,eA.Cl)(),[]),{renderMarkdown:A}=ex(),U=l.useMemo(()=>b.vars.length>0?[L.group({id:"vars",header:()=>(0,s.jsx)("span",{className:"font-bold",children:"Variables"}),columns:b.vars.map((e,n)=>L.accessor(e=>e.vars[n],{id:"Variable ".concat(n+1),header:()=>(0,s.jsx)(te,{text:e,maxLength:t,className:"font-bold"}),cell:e=>{let n=e.getValue();return(0,s.jsx)("div",{className:"cell",children:A?(0,s.jsx)(eW.U,{remarkPlugins:[e2.Z],children:n}):(0,s.jsx)(eq,{text:n,maxLength:t})})},size:50}))})]:[],[L,b.vars,t,A]),W=l.useCallback((e,t)=>S[e].outputs[t],[S]),V=l.useCallback(e=>S[e].outputs[0],[S]),B=l.useMemo(()=>{let e={};return null==x||x.body.forEach(t=>{var n;null===(n=t.test.assert)||void 0===n||n.forEach(t=>{t.metric&&(e[t.metric]=(e[t.metric]||0)+1),"assert"in t&&Array.isArray(t.assert)&&t.assert.forEach(t=>{"metric"in t&&t.metric&&(e[t.metric]=(e[t.metric]||0)+1)})})}),e},[x]),K=l.useMemo(()=>[L.group({id:"prompts",header:()=>(0,s.jsx)("span",{className:"font-bold",children:"Outputs"}),columns:b.prompts.map((e,n)=>L.accessor(e=>(function(e){if("string"==typeof e){let t=e.startsWith("[PASS]"),n=e;return e.startsWith("[PASS]")?n=n.slice(6):e.startsWith("[FAIL]")&&(n=n.slice(6)),{text:n,pass:t,score:t?1:0}}return e})(e.outputs[n]),{id:"Prompt ".concat(n+1),header:()=>{var l,a,o,c,p,m,x,v;let j=I[n]&&Z.length?(I[n]/Z.length*100).toFixed(2):"0.00",f=I[n]===O&&0!==O,b="Prompt ".concat(n+1),y=i[b]||!1,w=d?(0,s.jsxs)("div",{className:"prompt-detail",children:[R[n]?(0,s.jsxs)("div",{children:[(0,s.jsx)("strong",{children:"Asserts:"})," ",T[n],"/",R[n]," passed"]}):null,(null===(l=e.metrics)||void 0===l?void 0:l.totalLatencyMs)?(0,s.jsxs)("div",{children:[(0,s.jsx)("strong",{children:"Avg Latency:"})," ",Intl.NumberFormat(void 0,{maximumFractionDigits:0}).format(e.metrics.totalLatencyMs/Z.length)," ","ms"]}):null,(null===(o=e.metrics)||void 0===o?void 0:null===(a=o.tokenUsage)||void 0===a?void 0:a.total)?(0,s.jsxs)("div",{children:[(0,s.jsx)("strong",{children:"Avg Tokens:"})," ",Intl.NumberFormat(void 0,{maximumFractionDigits:0}).format(e.metrics.tokenUsage.total/Z.length)]}):null,(null===(c=e.metrics)||void 0===c?void 0:c.totalLatencyMs)&&(null===(m=e.metrics)||void 0===m?void 0:null===(p=m.tokenUsage)||void 0===p?void 0:p.completion)?(0,s.jsxs)("div",{children:[(0,s.jsx)("strong",{children:"Tokens/Sec:"})," ",Intl.NumberFormat(void 0,{maximumFractionDigits:0}).format(e.metrics.tokenUsage.completion/(e.metrics.totalLatencyMs/1e3))]}):null,(null===(x=e.metrics)||void 0===x?void 0:x.cost)?(0,s.jsxs)("div",{children:[(0,s.jsx)("strong",{children:"Cost:"})," $",e.metrics.cost.toPrecision(2)]}):null]}):null,C=Array.isArray(null==g?void 0:g.providers)?g.providers[n]:void 0,k=e.provider?e.provider.split(":"):[],S=(0,s.jsx)(H.Z,{title:C?(0,s.jsx)("pre",{children:eC.default.dump(C)}):"",children:k.length>1?(0,s.jsxs)(s.Fragment,{children:[k[0],":",(0,s.jsx)("strong",{children:k.slice(1).join(":")})]}):(0,s.jsx)("strong",{children:e.provider})});return(0,s.jsxs)("div",{className:"output-header",children:[(0,s.jsxs)("div",{className:"pills",children:[e.provider?(0,s.jsx)("div",{className:"provider",children:S}):null,(0,s.jsx)("div",{className:"summary",children:(0,s.jsxs)("div",{className:"highlight ".concat(f?"success":""),children:[(0,s.jsxs)("strong",{children:[j,"% passing"]})," (",I[n],"/",Z.length," cases)"]})}),(null===(v=e.metrics)||void 0===v?void 0:v.namedScores)&&Object.keys(e.metrics.namedScores).length>0?(0,s.jsx)(ez,{lookup:e.metrics.namedScores,metricTotals:B,onSearchTextChange:h}):null]}),(0,s.jsx)(te,{className:"prompt-container",text:e.label||e.display||e.raw,expandedText:e.raw,maxLength:t,resourceId:e.id}),w,"failures"===r&&(0,s.jsx)(eQ.Z,{sx:{"& .MuiFormControlLabel-label":{fontSize:"0.75rem"}},control:(0,s.jsx)(F.Z,{checked:y,onChange:e=>u(b,e.target.checked)}),label:"Show failures"})]})},cell:e=>{var l,a;let i=W(e.row.index,n);return(0,s.jsx)(e6,{output:i,maxTextLength:t,rowIndex:e.row.index,promptIndex:n,onRating:w.bind(null,null!==(l=i.originalRowIndex)&&void 0!==l?l:e.row.index,null!==(a=i.originalPromptIndex)&&void 0!==a?a:n),firstOutput:V(e.row.index),showDiffs:"different"===r,searchText:c,showStats:d})}}))})],[Z.length,null==g?void 0:g.providers,L,i,r,V,W,w,b.prompts,O,t,B,R,T,I,u,h,c,d]),Y=l.useMemo(()=>{let e=Z.some(e=>e.description);return e?{accessorFn:e=>e.description||"",id:"description",header:()=>(0,s.jsx)("span",{className:"font-bold",children:"Description"}),cell:e=>(0,s.jsx)("div",{className:"cell",children:(0,s.jsx)(eq,{text:String(e.getValue()),maxLength:t})}),size:50}:null},[Z,t]),X=l.useMemo(()=>{let e=[];return Y&&e.push(Y),e.push(...U,...K),e},[Y,U,K]),G=(0,eU.b7)({data:S,columns:X,columnResizeMode:"onChange",getCoreRowModel:(0,eA.sC)(),getPaginationRowModel:(0,eA.G_)(),state:{columnVisibility:n,pagination:E}});return(0,s.jsxs)("div",{children:[(0,s.jsxs)("table",{className:"results-table firefox-fix ".concat(t<=25?"compact":""),style:{wordBreak:a},children:[(0,s.jsx)("thead",{children:G.getHeaderGroups().map(e=>(0,s.jsx)("tr",{className:"header",children:e.headers.map(e=>(0,s.jsxs)("th",{colSpan:e.colSpan,style:{width:e.getSize()},children:[e.isPlaceholder?null:(0,eU.ie)(e.column.columnDef.header,e.getContext()),(0,s.jsx)("div",{onMouseDown:e.getResizeHandler(),onTouchStart:e.getResizeHandler(),className:"resizer ".concat(e.column.getIsResizing()?"isResizing":"")})]},e.id))},e.id))}),(0,s.jsx)("tbody",{children:G.getRowModel().rows.map((e,t)=>{let n=!1;return(0,s.jsx)("tr",{children:e.getVisibleCells().map(e=>{let l=e.column.id.startsWith("Variable")||"description"===e.column.id,a=!l&&!n;return a&&(n=!0),(0,s.jsx)("td",{style:{width:e.column.getSize()},className:"".concat(l?"variable":""," ").concat(0!==t||l?"":"first-prompt-row"," ").concat(a?"first-prompt-col":""),children:(0,eU.ie)(e.column.columnDef.cell,e.getContext())},e.id)})},e.id)})})]}),G.getPageCount()>1&&(0,s.jsxs)(P.Z,{className:"pagination",mx:1,sx:{display:"flex",alignItems:"center",gap:2},children:[(0,s.jsx)(D.Z,{onClick:()=>{N(e=>({...e,pageIndex:Math.max(e.pageIndex-1,0)})),window.scrollTo(0,0)},disabled:0===G.getState().pagination.pageIndex,variant:"contained",children:"Previous"}),(0,s.jsxs)(ei.Z,{component:"span",sx:{display:"flex",alignItems:"center",gap:1},children:["Page",(0,s.jsx)(J.Z,{size:"small",type:"number",value:G.getState().pagination.pageIndex+1,onChange:e=>{let t=e.target.value?Number(e.target.value)-1:0;N(e=>({...e,pageIndex:Math.min(Math.max(t,0),G.getPageCount()-1)}))},InputProps:{style:{width:"60px",textAlign:"center"}},variant:"outlined"}),(0,s.jsxs)("span",{children:["of ",G.getPageCount()]})]}),(0,s.jsx)(D.Z,{onClick:()=>{N(e=>({...e,pageIndex:Math.min(e.pageIndex+1,G.getPageCount()-1)})),window.scrollTo(0,0)},disabled:G.getState().pagination.pageIndex+1>=G.getPageCount(),variant:"contained",children:"Next"}),(0,s.jsxs)(ei.Z,{component:"span",sx:{display:"flex",alignItems:"center",gap:1},children:[(0,s.jsxs)(_.Z,{value:E.pageSize,onChange:e=>{N({pageIndex:0,pageSize:Number(e.target.value)}),window.scrollTo(0,0)},displayEmpty:!0,inputProps:{"aria-label":"Results per page"},size:"small",sx:{m:1,minWidth:80},children:[(0,s.jsx)(z.Z,{value:10,children:"10"}),(0,s.jsx)(z.Z,{value:50,children:"50"}),(0,s.jsx)(z.Z,{value:100,children:"100"}),(0,s.jsx)(z.Z,{value:500,children:"500"}),(0,s.jsx)(z.Z,{value:1e3,children:"1000"})]}),(0,s.jsx)("span",{children:"results per page"})]})]}),(0,s.jsx)(eX,{})]})}),tn=n(4451),ts=e=>{let{open:t,onClose:n}=e,{maxTextLength:l,setMaxTextLength:a,wordBreak:r,setWordBreak:i,showInferenceDetails:o,setShowInferenceDetails:c,renderMarkdown:d,setRenderMarkdown:u,prettifyJson:h,setPrettifyJson:p,showPrompts:m,setShowPrompts:x,showPassFail:v,setShowPassFail:g}=ex();return(0,s.jsxs)(G.Z,{open:t,onClose:n,fullWidth:!0,maxWidth:"sm",children:[(0,s.jsx)(Q.Z,{children:"Table View Settings"}),(0,s.jsxs)($.Z,{children:[(0,s.jsx)(P.Z,{children:(0,s.jsx)(H.Z,{title:"Forcing line breaks makes it easier to adjust column widths to your liking",placement:"right",children:(0,s.jsx)(eQ.Z,{control:(0,s.jsx)(F.Z,{checked:"break-all"===r,onChange:e=>i(e.target.checked?"break-all":"break-word")}),label:"Force line breaks"})})}),(0,s.jsx)(P.Z,{children:(0,s.jsx)(eQ.Z,{control:(0,s.jsx)(F.Z,{checked:d,onChange:e=>u(e.target.checked)}),label:"Render model outputs as Markdown"})}),(0,s.jsx)(P.Z,{children:(0,s.jsx)(eQ.Z,{control:(0,s.jsx)(F.Z,{checked:h,onChange:e=>p(e.target.checked)}),label:"Prettify JSON outputs"})}),(0,s.jsx)(P.Z,{children:(0,s.jsx)(H.Z,{title:"Show the final prompt that produced the output in each cell.",placement:"right",children:(0,s.jsx)(eQ.Z,{control:(0,s.jsx)(F.Z,{checked:m,onChange:e=>x(e.target.checked)}),label:"Show full prompt in output cell"})})}),(0,s.jsx)(P.Z,{children:(0,s.jsx)(H.Z,{title:"Show pass/fail status for each output.",placement:"right",children:(0,s.jsx)(eQ.Z,{control:(0,s.jsx)(F.Z,{checked:v,onChange:e=>g(e.target.checked)}),label:"Show pass/fail status"})})}),(0,s.jsx)(P.Z,{children:(0,s.jsx)(H.Z,{title:"Show detailed inference statistics such as latency, tokens used, cost, etc.",placement:"right",children:(0,s.jsx)(eQ.Z,{control:(0,s.jsx)(F.Z,{checked:o,onChange:e=>c(e.target.checked)}),label:"Show inference details"})})}),(0,s.jsxs)(P.Z,{maxWidth:"sm",children:[(0,s.jsxs)(ei.Z,{mt:2,children:["Max text length: ",l]}),(0,s.jsx)(tn.ZP,{min:25,max:1e3,value:l,onChange:(e,t)=>a(t)})]})]}),(0,s.jsx)(q.Z,{children:(0,s.jsx)(D.Z,{onClick:n,children:"Close"})})]})},tl=e=>{let{open:t,onClose:n,shareUrl:a}=e,r=(0,l.useRef)(null),[i,o]=(0,l.useState)(!1),c=()=>{n(),o(!1)};return(0,s.jsxs)(G.Z,{open:t,onClose:c,PaperProps:{style:{minWidth:"min(660px, 100%)"}},children:[(0,s.jsx)(Q.Z,{children:"Your eval is ready to share"}),(0,s.jsxs)($.Z,{children:[(0,s.jsx)(J.Z,{inputRef:r,value:a,fullWidth:!0,InputProps:{readOnly:!0,endAdornment:(0,s.jsx)(eb.Z,{onClick:()=>{r.current&&(r.current.select(),document.execCommand("copy"),o(!0))},children:i?(0,s.jsx)(eg.Z,{}):(0,s.jsx)(ef.Z,{})})}}),(0,s.jsx)(eY.Z,{sx:{fontSize:"0.75rem"},children:"Shared URLs are deleted after 2 weeks."})]}),(0,s.jsx)(q.Z,{children:(0,s.jsx)(D.Z,{onClick:c,color:"primary",children:"Close"})})]})};n(58022);let ta=(0,K.Z)(B.Z)(e=>{let{theme:t}=e;return{maxWidth:"100%",flexWrap:"wrap",[t.breakpoints.down("sm")]:{flexDirection:"column"}}});function tr(e){var t;let{recentEvals:n,onRecentEvalSelected:a,defaultEvalId:c}=e,d=(0,f.useRouter)(),h=(0,f.useSearchParams)(),{author:p,table:m,setTable:x,config:j,setConfig:b,maxTextLength:B,wordBreak:K,showInferenceDetails:X,evalId:G,setInComparisonMode:q,columnStates:$,setColumnState:Q}=ex(),{setStateFromConfig:ee}=(0,Z.o)(),[et,en]=l.useState((null==h?void 0:h.get("search"))||""),[es]=(0,Y.Nr)(et,1e3),el=e=>{en(e)},[ea,er]=l.useState({}),ei=l.useCallback((e,t)=>{er(n=>({...n,[e]:t}))},[er]);(0,y.Z)(m,"Table data must be loaded before rendering ResultsView");let{head:eo}=m,[ec,eu]=l.useState("all"),[eh,ep]=l.useState(!1),[em,eg]=l.useState(""),[ej,ef]=l.useState(!1),[eb,eZ]=l.useState(null),ew=G||c||"default",eC=async()=>{ef(!0);let e="";try{if(v.T8){let t=await fetch("".concat(r,"/api/eval"),{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({data:{version:2,createdAt:new Date().toISOString(),results:{table:m},config:j}})}),{id:n}=await t.json();e="".concat(i,"/eval/").concat(n)}else e="".concat(window.location.host,"/eval/?evalId=").concat(G);eg(e),ep(!0)}catch(e){alert("Sorry, something went wrong.")}finally{ef(!1)}},eS=async e=>{eZ(null);try{var t;let n=await fetch("".concat(await (0,o.b)(),"/api/results/").concat(e),{cache:"no-store"}),s=await n.json(),l=s.data.results.table,a={head:{prompts:[...m.head.prompts.map(e=>({...e,label:"[".concat(G||c||"Eval A","] ").concat(e.label||"")})),...l.head.prompts.map(t=>({...t,label:"[".concat(e,"] ").concat(t.label||"")}))],vars:m.head.vars},body:m.body.map((e,t)=>{var n;return{...e,outputs:[...e.outputs,...(null===(n=l.body[t])||void 0===n?void 0:n.outputs)||[]]}})};x(a),b({...j,...s.data.config,description:'Combined: "'.concat((null==j?void 0:j.description)||"Eval A",'" and "').concat((null===(t=s.data.config)||void 0===t?void 0:t.description)||"Eval B",'"')}),q(!0)}catch(e){console.error("Error fetching comparison eval:",e),alert("Failed to load comparison eval. Please try again.")}},eN=l.useMemo(()=>m.body.some(e=>e.description),[m.body]),eI=eo.prompts.map((e,t)=>{let n=e.label||e.display||e.raw;return{value:"Prompt ".concat(t+1),label:"Prompt ".concat(t+1,": ").concat(n&&n.length>100?n.slice(0,100)+"...":n||""),group:"Prompts"}}),eR=l.useMemo(()=>[...eN?[{value:"description",label:"Description"}]:[],...eo.vars.map((e,t)=>({value:"Variable ".concat(t+1),label:"Var ".concat(t+1,": ").concat(eo.vars[t].length>100?eo.vars[t].slice(0,97)+"...":eo.vars[t]),group:"Variables"})),...eI],[eo.vars,eI,eN]),[eP,eD]=l.useState(!1),[eF,eT]=l.useState(!1),eM=l.useMemo(()=>[...eN?["description"]:[],...eo.vars.map((e,t)=>"Variable ".concat(t+1)),...eo.prompts.map((e,t)=>"Prompt ".concat(t+1))],[eN,eo.vars,eo.prompts]),eO=$[ew]||{selectedColumns:[],columnVisibility:{}},eA=l.useCallback(e=>{let t={};eM.forEach(n=>{t[n]=e.includes(n)}),Q(ew,{selectedColumns:e,columnVisibility:t})},[eM,Q,ew]);l.useEffect(()=>{0!==eO.selectedColumns.length&&eO.selectedColumns.every(e=>eM.includes(e))||eA(eM)},[eM,eO.selectedColumns,eA]);let eU=l.useCallback(e=>{let t=Array.isArray(e.target.value)?e.target.value:e.target.value.split(",");eA(t)},[eA]),eW=async()=>{(0,y.Z)(j,"Config must be loaded before clicking its description");let e=window.prompt("Enter new description:",j.description);if(null!==e&&e!==j.description){let t={...j,description:e};try{let e=await fetch("".concat(await (0,o.b)(),"/api/eval/").concat(G),{method:"PATCH",headers:{"Content-Type":"application/json"},body:JSON.stringify({config:t})});if(!e.ok)throw Error("Network response was not ok");b(t)}catch(e){console.error("Failed to update table:",e)}}},ez=async()=>{if(window.confirm("Are you sure you want to delete this evaluation?"))try{let e=await fetch("".concat(await (0,o.b)(),"/api/eval/").concat(G),{method:"DELETE"});if(!e.ok)throw Error("Network response was not ok");d.push("/")}catch(e){console.error("Failed to delete evaluation:",e),alert("Failed to delete evaluation")}},[eV,e_]=l.useState(!1),[eB,eJ]=l.useState(!1),eH=async()=>{G&&(await navigator.clipboard.writeText(G),e_(!0),setTimeout(()=>{e_(!1)},1e3))};return(0,s.jsxs)("div",{style:{marginLeft:"1rem",marginRight:"1rem"},children:[(0,s.jsxs)(ta,{direction:"row",mb:3,spacing:1,alignItems:"center",className:"eval-header",children:[(0,s.jsxs)(P.Z,{sx:{display:"flex",alignItems:"center",width:"100%",maxWidth:250},children:[(0,s.jsx)(J.Z,{variant:"outlined",size:"small",fullWidth:!0,value:(null==j?void 0:j.description)||G||"",InputProps:{readOnly:!0,startAdornment:(0,s.jsx)(O.Z,{position:"start",children:(0,s.jsx)(I.Z,{})}),endAdornment:(0,s.jsx)(O.Z,{position:"end",children:(0,s.jsx)(w.Z,{})})},onClick:()=>eJ(!0),placeholder:"Search or select an eval...",sx:{cursor:"pointer"}}),(0,s.jsx)(ed,{open:eB,onClose:()=>eJ(!1),recentEvals:n,onRecentEvalSelected:a,title:"Select an Eval"})]}),(null==j?void 0:j.description)&&G&&(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)(H.Z,{title:"Click to copy",children:(0,s.jsx)(T.Z,{size:"small",label:(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)("strong",{children:"ID:"})," ",G]}),sx:{opacity:.7,cursor:"pointer"},onClick:eH})}),(0,s.jsx)(u.Z,{open:eV,autoHideDuration:1e3,onClose:()=>e_(!1),message:"Eval id copied to clipboard"})]}),(0,s.jsx)(H.Z,{title:p?"":"Set eval author with `promptfoo config set email`",children:(0,s.jsx)(T.Z,{size:"small",label:(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)("strong",{children:"Author:"})," ",p||"Unknown"]}),sx:{opacity:.7}})})]}),(0,s.jsxs)(ta,{direction:"row",spacing:1,alignItems:"center",children:[(0,s.jsx)(P.Z,{children:(0,s.jsxs)(M.Z,{sx:{minWidth:200,maxWidth:350},size:"small",children:[(0,s.jsx)(L.Z,{id:"visible-columns-label",children:"Columns"}),(0,s.jsx)(_.Z,{labelId:"visible-columns-label",id:"visible-columns",multiple:!0,value:eO.selectedColumns,onChange:eU,input:(0,s.jsx)(V.Z,{label:"Visible columns"}),renderValue:e=>e.join(", "),children:eR.map(e=>(0,s.jsxs)(z.Z,{dense:!0,value:e.value,children:[(0,s.jsx)(F.Z,{checked:eO.selectedColumns.includes(e.value)}),(0,s.jsx)(U.Z,{primary:e.label})]},e.value))})]})}),(0,s.jsx)(P.Z,{children:(0,s.jsxs)(M.Z,{sx:{minWidth:180},size:"small",children:[(0,s.jsx)(L.Z,{id:"failure-filter-mode-label",children:"Display"}),(0,s.jsxs)(_.Z,{labelId:"filter-mode-label",id:"filter-mode",value:ec,onChange:e=>{let t=e.target.value;eu(t);let n={};eo.prompts.forEach((e,s)=>{n["Prompt ".concat(s+1)]="failures"===t}),er(n)},label:"Filter",children:[(0,s.jsx)(z.Z,{value:"all",children:"Show all results"}),(0,s.jsx)(z.Z,{value:"failures",children:"Show failures only"}),(0,s.jsx)(z.Z,{value:"different",children:"Show different only"}),(0,s.jsx)(z.Z,{value:"highlights",children:"Show highlights only"})]})]})}),(0,s.jsx)(P.Z,{children:(0,s.jsx)(J.Z,{sx:{minWidth:180},size:"small",label:"Search",placeholder:"Text or regex",value:et,onChange:e=>el(e.target.value)})}),(0,s.jsx)(P.Z,{flexGrow:1}),(0,s.jsx)(P.Z,{display:"flex",justifyContent:"flex-end",children:(0,s.jsxs)(ta,{direction:"row",spacing:2,children:[(0,s.jsx)(D.Z,{color:"primary",onClick:e=>{eZ(e.currentTarget)},startIcon:(0,s.jsx)(w.Z,{}),children:"Eval actions"}),j&&(0,s.jsxs)(W.Z,{id:"eval-actions-menu",anchorEl:eb,keepMounted:!0,open:!!eb,onClose:()=>{eZ(null)},children:[(0,s.jsx)(H.Z,{title:"Edit the name of this eval",placement:"left",children:(0,s.jsxs)(z.Z,{onClick:eW,children:[(0,s.jsx)(A.Z,{children:(0,s.jsx)(k.Z,{fontSize:"small"})}),"Edit name"]})}),(0,s.jsx)(H.Z,{title:"Edit this eval in the web UI",placement:"left",children:(0,s.jsxs)(z.Z,{onClick:()=>{ee(j),d.push("/setup/")},children:[(0,s.jsx)(A.Z,{children:(0,s.jsx)(S.Z,{fontSize:"small"})}),"Edit and re-run"]})}),(0,s.jsx)(ev,{initialEvals:n,onComparisonEvalSelected:eS}),(0,s.jsx)(H.Z,{title:"View the configuration that defines this eval",placement:"left",children:(0,s.jsxs)(z.Z,{onClick:()=>eD(!0),children:[(0,s.jsx)(A.Z,{children:(0,s.jsx)(R.Z,{fontSize:"small"})}),"View YAML"]})}),(0,s.jsx)(ek,{}),(null==j?void 0:j.sharing)&&(0,s.jsx)(H.Z,{title:"Generate a unique URL that others can access",placement:"left",children:(0,s.jsxs)(z.Z,{onClick:eC,disabled:ej,children:[(0,s.jsx)(A.Z,{children:ej?(0,s.jsx)(g.Z,{size:16}):(0,s.jsx)(N.Z,{fontSize:"small"})}),"Share"]})}),(0,s.jsx)(H.Z,{title:"Delete this eval",placement:"left",children:(0,s.jsxs)(z.Z,{onClick:ez,children:[(0,s.jsx)(A.Z,{children:(0,s.jsx)(C.Z,{fontSize:"small"})}),"Delete"]})})]}),(0,s.jsx)(H.Z,{title:"Edit table view settings",placement:"bottom",children:(0,s.jsx)(D.Z,{color:"primary",onClick:()=>eT(!0),startIcon:(0,s.jsx)(E.Z,{}),children:"Table Settings"})}),((null==j?void 0:j.redteam)||(null==j?void 0:null===(t=j.metadata)||void 0===t?void 0:t.redteam))&&(0,s.jsx)(H.Z,{title:"View vulnerability scan report",placement:"bottom",children:(0,s.jsx)(D.Z,{color:"primary",variant:"contained",startIcon:(0,s.jsx)(R.Z,{}),onClick:()=>d.push("/report/?evalId=".concat(G||c)),children:"Vulnerability Report"})})]})})]}),(0,s.jsx)(eL,{columnVisibility:eO.columnVisibility}),(0,s.jsx)(tt,{maxTextLength:B,columnVisibility:eO.columnVisibility,wordBreak:K,showStats:X,filterMode:ec,failureFilter:ea,searchText:es,onFailureFilterToggle:ei,onSearchTextChange:el}),(0,s.jsx)(ey,{open:eP,onClose:()=>eD(!1)}),(0,s.jsx)(tl,{open:eh,onClose:()=>ep(!1),shareUrl:em}),(0,s.jsx)(ts,{open:eF,onClose:()=>eT(!1)}),(0,s.jsx)(eE,{recentEvals:n,onRecentEvalSelected:a})]})}async function ti(){let e=(0,j.createClientComponentClient)(),{data:{user:t}}=await e.auth.getUser();(0,y.Z)(t,"User not logged in");let{data:n}=await e.from("EvaluationResult").select("id, createdAt").eq("user_id",t.id).order("createdAt",{ascending:!1}).limit(100);return n||[]}async function to(e){let t=(0,j.createClientComponentClient)(),{data:n}=await t.from("EvaluationResult").select("*").eq("id",e).single();return n}function tc(e){var t;let{fetchId:n,preloadedData:a,recentEvals:i,defaultEvalId:c}=e,u=(0,f.useRouter)(),{table:h,setTable:p,config:m,setConfig:j,evalId:Z,setEvalId:w,setAuthor:C,setInComparisonMode:k}=ex(),[S,E]=l.useState(!1),[N,I]=l.useState(!1),[R,P]=l.useState(i||[]),D=async()=>{let e=await fetch("".concat(await (0,o.b)(),"/api/results"),{cache:"no-store"}),t=await e.json();return P(t.data),t.data},F=l.useCallback(async e=>{let t=await fetch("".concat(await (0,o.b)(),"/api/results/").concat(e),{cache:"no-store"}),n=await t.json();p(n.data.results.table),j(n.data.config),C(n.data.author),w(e)},[p,j,w,C]),T=async e=>{v.Ox?(E(!1),u.push("/eval/remote:".concat(encodeURIComponent(e)))):u.push("/eval/?evalId=".concat(encodeURIComponent(e)))},[M,O]=l.useState(c||(null===(t=R[0])||void 0===t?void 0:t.evalId)),L=(0,f.useSearchParams)(),A=L?L.get("evalId"):null;return(l.useEffect(()=>{if(A){console.log("Eval init: Fetching eval by id",A);let e=async()=>{await F(A),E(!0),O(A),D()};e()}else if(a){var e;console.log("Eval init: Using preloaded data"),p(null===(e=a.data.results)||void 0===e?void 0:e.table),j(a.data.config),C(a.data.author||null),E(!0)}else if(n){console.log("Eval init: Fetching eval",n);let e=async()=>{var e;let t=v.T8?r:"",s="".concat(t,"/api/eval/").concat(n);console.log("Fetching eval from remote server",s);let l=await fetch(s);if(!l.ok){I(!0);return}let a=await l.json();p(null===(e=a.data.results)||void 0===e?void 0:e.table),j(a.data.config),C(a.data.author||null),E(!0)};e()}else if(v.T8)console.log("Eval init: Using local server websocket"),(0,o.b)().then(e=>{let t=(0,b.io)(e);return t.on("init",e=>{console.log("Initialized socket connection",e),E(!0),p(null==e?void 0:e.results.table),j(null==e?void 0:e.config),C((null==e?void 0:e.author)||null),D().then(e=>{var t,n,s;O(null===(t=e[0])||void 0===t?void 0:t.evalId),console.log("setting default eval id",null===(n=e[0])||void 0===n?void 0:n.evalId),w(null===(s=e[0])||void 0===s?void 0:s.evalId)})}),t.on("update",e=>{console.log("Received data update",e),p(e.results.table),j(e.config),C(e.author||null),D().then(e=>{var t;let n=null===(t=e[0])||void 0===t?void 0:t.evalId;n&&(O(n),w(n))})}),()=>{t.disconnect()}});else if(v.Ox)console.log("Eval init: Using Supabase"),ti().then(e=>{P(e.map(e=>({evalId:e.id,datasetId:null,label:e.createdAt,createdAt:new Date(e.createdAt).getTime(),description:"None",numTests:-1}))),e.length>0&&to(e[0].id).then(t=>{(0,y.Z)(t,"Eval not found");let n=t.results,s=t.config;O(e[0].id),p(n.table),j(s),C(null),E(!0)})});else{console.log("Eval init: Fetching eval via recent");let e=async()=>{let e=await D();if(!(e.length>0))return(0,s.jsx)("div",{className:"notice",children:"No evals yet. Share some evals to this server and they will appear here."});{let t=await (0,o.b)(),n=e[0].evalId,s=await fetch("".concat(t,"/api/results/").concat(n)),l=await s.json();p(l.data.results.table),j(l.data.config),C(l.data.author||null),E(!0),O(n),w(n)}};e()}k(!1)},[n,p,j,C,w,F,a,O,A,k]),l.useEffect(()=>{document.title="".concat((null==m?void 0:m.description)||Z||"Eval"," | promptfoo")},[m,Z]),N)?(0,s.jsx)("div",{className:"notice",children:"404 Eval not found"}):S&&h?(0,s.jsx)(x,{children:(0,s.jsx)(d,{children:(0,s.jsx)(tr,{defaultEvalId:M,recentEvals:R,onRecentEvalSelected:T})})}):(0,s.jsxs)("div",{className:"notice",children:[(0,s.jsx)("div",{children:(0,s.jsx)(g.Z,{size:22})}),(0,s.jsx)("div",{children:"Waiting for eval data"})]})}n(94455)},52428:function(e,t,n){"use strict";n.d(t,{Ox:function(){return a},T8:function(){return l},eA:function(){return r}});var s=n(77580);let l=!s.env.NEXT_PUBLIC_PROMPTFOO_BUILD_STANDALONE_SERVER,a=!!s.env.NEXT_PUBLIC_PROMPTFOO_USE_SUPABASE,r=""},47887:function(e,t,n){"use strict";n.d(t,{o:function(){return a}});var s=n(94660),l=n(74810);let a=(0,s.Ue)()((0,l.tJ)((e,t)=>({env:{},testCases:[],description:"",providers:[],prompts:[],defaultTest:{},evaluateOptions:{},scenarios:[],setEnv:t=>e({env:t}),setTestCases:t=>e({testCases:t}),setDescription:t=>e({description:t}),setProviders:t=>e({providers:t}),setPrompts:t=>e({prompts:t}),setDefaultTest:t=>e({defaultTest:t}),setEvaluateOptions:t=>e({evaluateOptions:t}),setScenarios:t=>e({scenarios:t}),setStateFromConfig:t=>{let n={};t.description&&(n.description=t.description||""),t.tests&&(n.testCases=t.tests),t.providers&&(n.providers=t.providers),t.prompts&&("string"==typeof t.prompts?n.prompts=[t.prompts]:Array.isArray(t.prompts)?n.prompts=t.prompts.filter(e=>"string"==typeof e&&!e.endsWith(".txt")&&!e.endsWith(".json")&&!e.endsWith(".yaml")):console.warn("Invalid prompts config",t.prompts)),t.defaultTest&&(n.defaultTest=t.defaultTest),t.evaluateOptions&&(n.evaluateOptions=t.evaluateOptions),t.scenarios&&(n.scenarios=t.scenarios),e(n)},getTestSuite:()=>{let{description:e,testCases:n,providers:s,prompts:l,env:a,scenarios:r}=t();return{env:a,description:e,providers:s,prompts:l,tests:n,scenarios:r}}}),{name:"promptfoo",skipHydration:!0}))},55974:function(){},94455:function(){},16658:function(){},93207:function(){},58022:function(){}}]);
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[420],{58004:function(e,t,i){Promise.resolve().then(i.bind(i,24406))},49532:function(e,t,i){"use strict";let a,r;async function s(){if(a||(r||(r=fetch("/api/config").then(e=>e.json()).then(e=>a=e.apiBaseUrl)),await r),void 0===a)throw Error("API base URL is undefined");return a}i.d(t,{b:function(){return s}})},24406:function(e,t,i){"use strict";i.r(t),i.d(t,{default:function(){return ev}});var a,r,s=i(24004),n=i(14978),l=i(49532),o=i(79245),c=i(15133),d=i(47827),u=i(88938),m=i(35266),h=i(80356),p=i(38212),g=i(49658),f=i(13457),v=i(43226),x=i(88469);let y={"Security Risk":["bola","bfla","debug-access","hijacking","pii","prompt-extraction","rbac","shell-injection","sql-injection","ssrf"],"Legal Risk":["contracts","harmful:child-exploitation","harmful:copyright-violations","harmful:cybercrime","harmful:hate","harmful:illegal-activities","harmful:illegal-drugs","harmful:intellectual-property","harmful:privacy","harmful:self-harm","harmful:sex-crime","harmful:sexual-content","harmful:specialized-advice","harmful:violent-crime"],"Brand Risk":["policy","competitors","excessive-agency","hallucination","harmful:graphic-content","harmful:harassment-bullying","harmful:indiscriminate-weapons","harmful:insults","harmful:misinformation-disinformation","harmful:non-violent-crime","harmful:profanity","harmful:radicalization","harmful:unsafe-practices","imitation","overreliance","politics"]},j={"Brand Risk":"Risks that can affect the brand reputation and trustworthiness.","Legal Risk":"Risks that can lead to legal consequences or violations.","Technical Risk":"Risks involving malicious activities targeting the system or users."};(a=r||(r={})).Critical="Critical",a.High="High",a.Medium="Medium",a.Low="Low";let b={"debug-access":r.High,"excessive-agency":r.Medium,"harmful:child-exploitation":r.Critical,"harmful:copyright-violations":r.Low,"harmful:cybercrime":r.Low,"harmful:graphic-content":r.Medium,"harmful:harassment-bullying":r.Low,"harmful:hate":r.Critical,"harmful:illegal-activities":r.Medium,"harmful:illegal-drugs":r.Medium,"harmful:indiscriminate-weapons":r.Medium,"harmful:insults":r.Low,"harmful:intellectual-property":r.Medium,"harmful:misinformation-disinformation":r.Medium,"harmful:non-violent-crime":r.Medium,"harmful:privacy":r.High,"harmful:profanity":r.Low,"harmful:radicalization":r.Low,"harmful:self-harm":r.Critical,"harmful:sex-crime":r.High,"harmful:sexual-content":r.Medium,"harmful:specialized-advice":r.Medium,"harmful:unsafe-practices":r.Low,"harmful:violent-crime":r.High,"prompt-injection":r.Medium,"shell-injection":r.High,"sql-injection":r.High,competitors:r.Low,contracts:r.Medium,hallucination:r.Medium,hijacking:r.High,imitation:r.Low,jailbreak:r.Medium,overreliance:r.Low,pii:r.High,politics:r.Low,rbac:r.High,policy:r.High,bola:r.High,bfla:r.High,ssrf:r.High},Z=Object.entries(y).reduce((e,t)=>{let[i,a]=t;return a.forEach(t=>{e[t]=i}),e},{});Object.keys(Z);let w={bola:"BOLAEnforcement",bfla:"BFLAEnforcement",ssrf:"SSRFEnforcement","debug-access":"DebugAccess","excessive-agency":"ExcessiveAgency","harmful:child-exploitation":"Child Exploitation","harmful:cybercrime":"Cybercrime & Unauthorized Intrusion - Hacking and Malware","harmful:graphic-content":"Graphic & age-restricted content","harmful:harassment-bullying":"Harassment & Bullying","harmful:hate":"Hate","harmful:illegal-activities":"Illegal Activities - Fraud & scams","harmful:illegal-drugs":"Illegal Drugs","harmful:indiscriminate-weapons":"Indiscriminate Weapons","harmful:insults":"Insults and personal attacks","harmful:intellectual-property":"Intellectual Property violation","harmful:misinformation-disinformation":"Misinformation & Disinformation - Harmful lies and propaganda","harmful:non-violent-crime":"Non-Violent Crimes","harmful:privacy":"Privacy violations","harmful:profanity":"Requests containing profanity","harmful:radicalization":"Radicalization","harmful:self-harm":"Self-Harm","harmful:sex-crime":"Sex Crimes","harmful:sexual-content":"Sexual Content","harmful:specialized-advice":"Specialized Advice - Financial","harmful:unsafe-practices":"Promotion of unsafe practices","harmful:violent-crime":"Violent Crimes","harmful:chemical-biological-weapons":"Chemical & Biological Weapons","harmful:copyright-violations":"Copyright Violations - Copyrighted text","prompt-injection":"Harmful/Injection","shell-injection":"ShellInjection","sql-injection":"SqlInjection",competitors:"CompetitorEndorsement",contracts:"ContractualCommitment",hallucination:"Hallucination",hijacking:"Hijacking",imitation:"Imitation",jailbreak:"Harmful/Iterative",overreliance:"Overreliance",pii:"PIILeak",politics:"PoliticalStatement",rbac:"RbacEnforcement",policy:"PolicyViolation"},k=Object.entries(w).reduce((e,t)=>{let[i,a]=t;return e[a]=i,e},{}),C={bola:"Unauthorized Data Access",bfla:"Privilege Escalation",ssrf:"Malicious Resource Fetching","excessive-agency":"Excessive Agency","prompt-injection":"Prompt Injection",competitors:"Competitor Endorsements",contracts:"Unsupervised Contracts",jailbreak:"Single-shot optimization","jailbreak:tree":"Tree-based optimization",pii:"PII Leaks",politics:"Political Opinions","shell-injection":"Shell Injection","sql-injection":"SQL Injection",rbac:"RBAC Enforcement","debug-access":"Debug Access","harmful:specialized-advice":"Specialized Advice","harmful:illegal-activities":"Illegal Activities","harmful:cybercrime":"Cybercrime","harmful:copyright-violations":"Copyright Violations","harmful:misinformation-disinformation":"Misinformation & disinformation",policy:"Custom Policy",basic:"Basic",crescendo:"Multi-turn"},R={default:"Includes common plugins",basic:"Raw attacks without any special attack strategies",multilingual:"Translates the input into low-resource languages",bola:"Broken Object Level Authorization (BOLA) tests",bfla:"Broken Function Level Authorization (BFLA) tests",ssrf:"Server-Side Request Forgery (SSRF) tests","debug-access":"Attempts to access or use debugging commands","excessive-agency":"Model taking excessive initiative or misunderstanding its capabilities","harmful:chemical-biological-weapons":"Content related to chemical or biological weapons","harmful:child-exploitation":"Content exploiting or harming children","harmful:copyright-violations":"Content violating copyright laws","harmful:cybercrime":"Content related to cybercriminal activities","harmful:graphic-content":"Displaying graphic or violent content","harmful:harassment-bullying":"Content that harasses or bullies individuals","harmful:hate":"Content that promotes hate or discrimination","harmful:illegal-activities":"Content promoting illegal activities","harmful:illegal-drugs":"Content related to illegal drug use or trade","harmful:indiscriminate-weapons":"Content related to weapons without context","harmful:insults":"Content that insults or demeans individuals","harmful:intellectual-property":"Content violating intellectual property rights","harmful:misinformation-disinformation":"Spreading false or misleading information","harmful:non-violent-crime":"Content related to non-violent criminal activities","harmful:privacy":"Content violating privacy rights","harmful:profanity":"Content containing profane or inappropriate language","harmful:radicalization":"Content that promotes radical or extremist views","harmful:self-harm":"Content that encourages self-harm or suicide","harmful:sex-crime":"Content related to sexual crimes","harmful:sexual-content":"Explicit or inappropriate sexual content","harmful:specialized-advice":"Providing advice in specialized fields without expertise","harmful:unsafe-practices":"Content promoting unsafe or harmful practices","harmful:violent-crime":"Content related to violent criminal activities","jailbreak:tree":"Tree-based jailbreak search","pii:api-db":"PII exposed through API or database","pii:direct":"Direct exposure of PII","pii:session":"PII exposed in session data","pii:social":"PII exposed through social engineering","prompt-injection":"Malicious inputs designed to manipulate the model's behavior","shell-injection":"Attempts to execute shell commands through the model","sql-injection":"Attempts to perform SQL injection attacks to manipulate database queries",base64:"Attempts to obfuscate malicious content using Base64 encoding",competitors:"Competitor mentions and endorsements",contracts:"Enters business or legal commitments without supervision",hallucination:"Model generating false or misleading information",harmful:"All harmful categories",hijacking:"Unauthorized or off-topic resource use",imitation:"Imitates people, brands, or organizations",jailbreak:"Attempts to bypass security measures through iterative prompt refinement",leetspeak:"Attempts to obfuscate malicious content using leetspeak",overreliance:"Model susceptible to relying on an incorrect user assumption or input",pii:"All PII categories",policy:"Violates a custom configured policy",politics:"Makes political statements",rbac:"Tests whether the model properly implements Role-Based Access Control (RBAC)",rot13:"Attempts to obfuscate malicious content using ROT13 encoding",crescendo:"Conversational attack strategy","prompt-extraction":"Attempts to get the model to reveal its system prompt"};i(4309);var P=e=>{let{categoryStats:t}=e,i=[r.Critical,r.High,r.Medium,r.Low],a=i.reduce((e,i)=>(e[i]=Object.keys(t).reduce((e,t)=>b[t]===i?e+1:e,0),e),{});return(0,s.jsx)(f.Z,{spacing:2,direction:{xs:"column",sm:"row"},children:i.map(e=>(0,s.jsx)(o.Z,{flex:1,children:(0,s.jsx)(c.Z,{className:"severity-card card-".concat(e.toLowerCase()),children:(0,s.jsxs)(x.Z,{onClick:()=>window.location.hash="#table",children:[(0,s.jsx)(v.Z,{variant:"h6",gutterBottom:!0,children:e}),(0,s.jsx)(v.Z,{variant:"h4",color:"text.primary",children:a[e]}),(0,s.jsx)(v.Z,{variant:"body2",color:"text.secondary",children:"issues"})]})})},e))})},A=i(52481),N=i(52653),I=i(78276),S=i(35193),F=i.n(S),L=i(21629),M=e=>{let{evalDescription:t}=e,i=async()=>{let e=document.documentElement,i=await F()(e,{height:Math.max(e.scrollHeight,e.offsetHeight),windowHeight:document.documentElement.scrollHeight}),a=i.toDataURL("image/png"),r=new L.ZP("p","pt",[i.width,i.height]);r.addImage(a,"PNG",0,0,i.width,i.height);let s=t?"report_".concat(t.toLowerCase().replace(/[^a-z0-9]+/g,"-").replace(/(^-|-$)/g,""),".pdf"):"report.pdf";r.save(s)};return(0,s.jsx)(I.Z,{title:"Download report as PDF",placement:"top",children:(0,s.jsx)(N.Z,{onClick:i,sx:{mt:"4px"},"aria-label":"download report",children:(0,s.jsx)(A.Z,{})})})},H=i(93295),z=i(49050),B=i(89394),E=i(42834),D=i(26337),O=i(91797),T=i(4451),W=i(37204),U=i(94660),V=i(74810);let q={getItem:async e=>await (0,W.U2)(e)||null,setItem:async(e,t)=>{await (0,W.t8)(e,t)},removeItem:async e=>{await (0,W.IV)(e)}},_=(0,U.Ue)()((0,V.tJ)(e=>({showPercentagesOnRiskCards:!1,setShowPercentagesOnRiskCards:t=>e(()=>({showPercentagesOnRiskCards:t})),pluginPassRateThreshold:1,setPluginPassRateThreshold:t=>e(()=>({pluginPassRateThreshold:t}))}),{name:"ReportViewStorage",storage:(0,V.FL)(()=>q)}));var J=()=>{let{showPercentagesOnRiskCards:e,setShowPercentagesOnRiskCards:t,pluginPassRateThreshold:i,setPluginPassRateThreshold:a}=_(),[r,l]=n.useState(!1),o=()=>{l(!1)};return(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)(I.Z,{title:"Report Settings",placement:"top",children:(0,s.jsx)(N.Z,{onClick:()=>{l(!0)},"aria-label":"settings",children:(0,s.jsx)(H.Z,{})})}),(0,s.jsxs)(B.Z,{open:r,onClose:o,fullWidth:!0,maxWidth:"sm",children:[(0,s.jsx)(O.Z,{children:"Report Settings"}),(0,s.jsxs)(D.Z,{children:[(0,s.jsx)(v.Z,{component:"div",sx:{padding:"16px 0"},children:(0,s.jsxs)("label",{style:{display:"flex",alignItems:"center",cursor:"pointer"},children:[(0,s.jsx)("input",{type:"checkbox",checked:e,onChange:e=>t(e.target.checked),style:{marginRight:"10px"}}),"Show percentages on risk cards"]})}),(0,s.jsxs)(v.Z,{component:"div",sx:{padding:"16px 0"},children:[(0,s.jsxs)("label",{children:["Plugin Pass Rate Threshold: ",(100*i).toFixed(0),"%"]}),(0,s.jsx)(v.Z,{variant:"body2",color:"textSecondary",sx:{mt:1},children:"Sets the threshold for considering a plugin as passed on the risk cards."}),(0,s.jsx)(T.ZP,{value:i,onChange:(e,t)=>a(t),"aria-labelledby":"plugin-pass-rate-threshold-slider",step:.05,marks:!0,min:0,max:1,valueLabelDisplay:"auto",valueLabelFormat:e=>"".concat((100*e).toFixed(0),"%")})]})]}),(0,s.jsx)(E.Z,{children:(0,s.jsx)(z.Z,{onClick:o,variant:"contained",color:"primary",children:"Close"})})]})]})},G=i(70740),K=i(14565),Q=i(28874),Y=i(48670),$=i(64111);i(11674);var X=e=>{let{open:t,onClose:i,category:a,failures:r,evalId:n,numPassed:l,numFailed:c}=e,d=w[a];if(!d)return console.error("[RiskCategoryDrawer] Could not load category",a),null;let u=C[a]||d,p=l+c,g=p>0?Math.round(l/p*100):0;return 0===p?(0,s.jsx)($.ZP,{anchor:"right",open:t,onClose:i,children:(0,s.jsxs)(o.Z,{sx:{width:500,p:2},className:"risk-category-drawer",children:[(0,s.jsx)(v.Z,{variant:"h6",gutterBottom:!0,children:u}),(0,s.jsx)(v.Z,{variant:"body1",sx:{mt:2,textAlign:"center"},children:"No tests have been run for this category."})]})}):(0,s.jsx)($.ZP,{anchor:"right",open:t,onClose:i,children:(0,s.jsxs)(o.Z,{sx:{width:500,p:2},className:"risk-category-drawer",children:[(0,s.jsx)(v.Z,{variant:"h6",gutterBottom:!0,children:u}),(0,s.jsxs)(o.Z,{sx:{display:"flex",justifyContent:"space-between",alignItems:"center",mb:2},children:[(0,s.jsxs)(o.Z,{sx:{textAlign:"center",flex:1},children:[(0,s.jsx)(v.Z,{variant:"h4",color:"primary",children:l.toString()}),(0,s.jsx)(v.Z,{variant:"body2",children:"Passed"})]}),(0,s.jsxs)(o.Z,{sx:{textAlign:"center",flex:1},children:[(0,s.jsx)(v.Z,{variant:"h4",children:p.toString()}),(0,s.jsx)(v.Z,{variant:"body2",children:"Total"})]}),(0,s.jsxs)(o.Z,{sx:{textAlign:"center",flex:1},children:[(0,s.jsx)(v.Z,{variant:"h4",color:g>=70?"success.main":"error.main",children:"".concat(g,"%")}),(0,s.jsx)(v.Z,{variant:"body2",children:"Pass Rate"})]})]}),(0,s.jsx)(z.Z,{variant:"contained",color:"inherit",fullWidth:!0,onClick:e=>{let t="/eval/?evalId=".concat(n,"&search=").concat(encodeURIComponent("(var=".concat(d,"|metric=").concat(d,")")));e.ctrlKey||e.metaKey?window.open(t,"_blank"):window.location.href=t},children:"View All Logs"}),r.length>0?(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)(v.Z,{variant:"h6",className:"failed-tests-header",children:"Failed Tests"}),(0,s.jsx)(m.Z,{children:r.slice(0,5).map((e,t)=>(0,s.jsx)(h.ZP,{className:"failure-item",children:(0,s.jsxs)(o.Z,{children:[(0,s.jsx)(v.Z,{variant:"subtitle1",className:"prompt",children:function(e){try{let t=JSON.parse(e);if(Array.isArray(t)){let e=t[t.length-1];if(e.content)return e.content||"-"}}catch(e){}return e}(e.prompt)}),(0,s.jsx)(v.Z,{variant:"body2",className:"output",children:function(e){if("string"==typeof e)return e;if(Array.isArray(e)){let t=e.filter(e=>"function"===e.type);if(t.length>0)return t.map(e=>{var t,i;return"".concat(null===(t=e.function)||void 0===t?void 0:t.name,": (").concat(null===(i=e.function)||void 0===i?void 0:i.arguments,")")}).join("\n")}return JSON.stringify(e)}(e.output)})]})},t))})]}):(0,s.jsx)(o.Z,{sx:{mt:2,textAlign:"center"},children:(0,s.jsx)(v.Z,{variant:"body1",children:"All tests passed successfully"})})]})})};i(46553);var ee=e=>{var t,i;let{title:a,subtitle:r,progressValue:l,numTestsPassed:d,numTestsFailed:u,testTypes:g,evalId:f,failuresByPlugin:y}=e,{showPercentagesOnRiskCards:j,pluginPassRateThreshold:b}=_(),[Z,k]=n.useState(!1),[P,A]=n.useState(""),N=g.filter(e=>e.numPassed+e.numFailed>0);return 0===N.length?null:(0,s.jsx)(c.Z,{children:(0,s.jsxs)(x.Z,{className:"risk-card-container",children:[(0,s.jsxs)(Q.ZP,{container:!0,spacing:3,children:[(0,s.jsxs)(Q.ZP,{item:!0,xs:12,md:6,style:{display:"flex",flexDirection:"column",alignItems:"center",textAlign:"center"},children:[(0,s.jsx)(v.Z,{variant:"h5",className:"risk-card-title",children:a}),(0,s.jsx)(v.Z,{variant:"subtitle1",color:"textSecondary",mb:2,children:r}),(0,s.jsx)(o.Z,{sx:{position:"relative",display:"inline-flex",alignItems:"center",justifyContent:"center",width:100,height:100},children:(0,s.jsx)(Y.a,{value:l,max:100,thickness:10,arc:{startAngle:-90,endAngle:90,color:"primary.main"},text:Number.isNaN(l)?"-":"".concat(Math.round(l),"%"),sx:{width:"100%",height:"100%"}})}),(0,s.jsxs)(v.Z,{variant:"h6",className:"risk-card-issues",children:[u," failed probes"]}),(0,s.jsxs)(v.Z,{variant:"subtitle1",color:"textSecondary",className:"risk-card-tests-passed",children:[d,"/",d+u," passed"]})]}),(0,s.jsx)(Q.ZP,{item:!0,xs:6,md:4,children:(0,s.jsx)(m.Z,{dense:!0,children:N.map((e,t)=>{let i=e.numPassed/(e.numPassed+e.numFailed);return(0,s.jsx)(I.Z,{title:R[e.name],placement:"left",arrow:!0,children:(0,s.jsxs)(h.ZP,{className:"risk-card-list-item",onClick:()=>{A(e.name),k(!0)},style:{cursor:"pointer"},children:[(0,s.jsx)(p.Z,{primary:C[e.name]||w[e.name],primaryTypographyProps:{variant:"body2"}}),j?(0,s.jsx)(v.Z,{variant:"body2",className:"risk-card-percentage ".concat(i>=.8?"risk-card-percentage-high":i>=.5?"risk-card-percentage-medium":"risk-card-percentage-low"),children:"".concat(Math.round(100*i),"%")}):i>=b?(0,s.jsx)(K.Z,{className:"risk-card-icon-passed"}):(0,s.jsx)(G.Z,{className:"risk-card-icon-failed"})]})},t)})})})]}),P&&(0,s.jsx)(X,{open:Z,onClose:()=>k(!1),category:P,failures:y[P]||[],evalId:f,numPassed:(null===(t=g.find(e=>e.name===P))||void 0===t?void 0:t.numPassed)||0,numFailed:(null===(i=g.find(e=>e.name===P))||void 0===i?void 0:i.numFailed)||0})]})})};i(32991);var et=e=>{let{categoryStats:t,evalId:i,failuresByPlugin:a}=e,r=Object.keys(y).map(e=>({name:e,passed:y[e].every(e=>{var i,a;return(null===(i=t[e])||void 0===i?void 0:i.pass)===(null===(a=t[e])||void 0===a?void 0:a.total)})}));return(0,s.jsx)(f.Z,{spacing:4,children:r.map((e,r)=>{let n=e.name,l=y[n],o=l.reduce((e,i)=>{var a;return e+((null===(a=t[i])||void 0===a?void 0:a.pass)||0)},0),c=l.reduce((e,i)=>{var a;return e+((null===(a=t[i])||void 0===a?void 0:a.total)||0)},0);return(0,s.jsx)(ee,{title:e.name,subtitle:j[n],progressValue:o/c*100,numTestsPassed:o,numTestsFailed:c-o,testTypes:l.map(e=>{var i,a,r,s,n;return{name:e,categoryPassed:(null===(i=t[e])||void 0===i?void 0:i.pass)===(null===(a=t[e])||void 0===a?void 0:a.total),numPassed:(null===(r=t[e])||void 0===r?void 0:r.pass)||0,numFailed:((null===(s=t[e])||void 0===s?void 0:s.total)||0)-((null===(n=t[e])||void 0===n?void 0:n.pass)||0)}}),evalId:i,failuresByPlugin:a},r)})})},ei=i(54755),ea=i(12120),er=i(35843);i(44097);let es=(0,er.ZP)(ei.Z)(e=>{let{theme:t}=e;return{height:8,borderRadius:8,["&.".concat(ea.Z.colorPrimary)]:{backgroundColor:"light"===t.palette.mode?"#e0e0e0":"#424242"},["& .".concat(ea.Z.bar)]:{borderRadius:8,backgroundColor:"light"===t.palette.mode?"#ff1744":"#ff8a80"}}});var en=e=>{let{strategyStats:t}=e,i=Object.entries(t).sort((e,t)=>(t[1].total-t[1].pass)/t[1].total-(e[1].total-e[1].pass)/e[1].total);return(0,s.jsx)(c.Z,{className:"strategy-stats-card",children:(0,s.jsxs)(x.Z,{className:"strategy-stats-content",children:[(0,s.jsx)(v.Z,{variant:"h5",mb:2,children:"Attack success rates"}),(0,s.jsx)(o.Z,{className:"strategy-grid",children:i.map(e=>{let[t,{pass:i,total:a}]=e,r=(a-i)/a*100;return(0,s.jsxs)(o.Z,{className:"strategy-item",children:[(0,s.jsx)(v.Z,{variant:"body1",className:"strategy-name",children:C[t]||t}),(0,s.jsx)(v.Z,{variant:"body2",color:"text.secondary",className:"strategy-description",children:R[t]||""}),(0,s.jsxs)(o.Z,{display:"flex",alignItems:"center",className:"progress-container",children:[(0,s.jsx)(o.Z,{width:"100%",mr:1,children:(0,s.jsx)(es,{variant:"determinate",value:r})}),(0,s.jsx)(o.Z,{minWidth:45,className:"fail-rate",children:(0,s.jsxs)(v.Z,{variant:"body2",color:"text.secondary",children:[r.toFixed(1),"%"]})})]}),(0,s.jsxs)(v.Z,{variant:"caption",color:"text.secondary",className:"attack-stats",children:[a-i," / ",a," attacks succeeded"]})]},t)})})]})})},el=i(73701),eo=i(39279),ec=i(30666),ed=i(15795),eu=i(66988),em=i(15646),eh=i(98489),ep=i(95781);i(93091);let eg=e=>{let t=[];for(let i of Object.values(y))for(let a of i)t.push({pluginName:a,type:w[a]||a,description:R[a]||"",passRate:e[a]?(e[a].pass/e[a].total*100).toFixed(1)+"%":"N/A",passRateWithFilter:e[a]?(e[a].passWithFilter/e[a].total*100).toFixed(1)+"%":"N/A",severity:b[a]||"Unknown"});return t.sort((e,t)=>"N/A"===e.passRate?1:"N/A"===t.passRate?-1:Number.parseFloat(e.passRate)-Number.parseFloat(t.passRate))};var ef=e=>{let{evalId:t,categoryStats:i}=e,a=eg(i).filter(e=>"N/A"!==e.passRate),[r,l]=n.useState(0),[c,d]=n.useState(10),[u,m]=n.useState("asc"),[h,p]=n.useState("default"),g=e=>{let t=h===e&&"asc"===u;m(t?"desc":"asc"),p(e)};return(0,s.jsxs)(o.Z,{children:[(0,s.jsx)(v.Z,{variant:"h5",gutterBottom:!0,id:"table",children:"Vulnerabilities and Mitigations"}),(0,s.jsxs)(ed.Z,{children:[(0,s.jsxs)(el.Z,{children:[(0,s.jsx)(eu.Z,{children:(0,s.jsxs)(eh.Z,{children:[(0,s.jsx)(ec.Z,{children:"Type"}),(0,s.jsx)(ec.Z,{children:"Description"}),(0,s.jsx)(ec.Z,{children:(0,s.jsx)(ep.Z,{active:"passRate"===h,direction:"passRate"===h?u:"asc",onClick:()=>g("passRate"),children:"Pass rate"})}),(0,s.jsx)(ec.Z,{children:(0,s.jsx)(ep.Z,{active:"severity"===h,direction:"severity"===h?u:"asc",onClick:()=>g("severity"),children:"Severity"})}),(0,s.jsx)(ec.Z,{style:{minWidth:"275px"},children:"Actions"})]})}),(0,s.jsx)(eo.Z,{children:a.sort((e,t)=>{if("passRate"===h)return"N/A"===e.passRate?1:"N/A"===t.passRate?-1:"asc"===u?Number.parseFloat(e.passRate)-Number.parseFloat(t.passRate):Number.parseFloat(t.passRate)-Number.parseFloat(e.passRate);if("severity"===h){if("N/A"===e.passRate)return 1;if("N/A"===t.passRate)return -1;let i={Critical:4,High:3,Medium:2,Low:1};return"asc"===u?i[e.severity]-i[t.severity]:i[t.severity]-i[e.severity]}{let i={Critical:4,High:3,Medium:2,Low:1};return e.severity===t.severity?Number.parseFloat(e.passRate)-Number.parseFloat(t.passRate):i[t.severity]-i[e.severity]}}).slice(r*c,r*c+c).map((e,t)=>{let i="";if("N/A"!==e.passRate){let t=Number.parseFloat(e.passRate);i=t>=75?"pass-high":t>=50?"pass-medium":"pass-low"}return(0,s.jsxs)(eh.Z,{children:[(0,s.jsx)(ec.Z,{children:(0,s.jsx)("span",{style:{fontWeight:500},children:C[e.pluginName]||e.type})}),(0,s.jsx)(ec.Z,{children:e.description}),(0,s.jsxs)(ec.Z,{className:i,children:[(0,s.jsx)("strong",{children:e.passRate}),e.passRateWithFilter===e.passRate?null:(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)("br",{}),"(",e.passRateWithFilter," with mitigation)"]})]}),(0,s.jsx)(ec.Z,{className:"vuln-".concat(e.severity.toLowerCase()),children:e.severity}),(0,s.jsxs)(ec.Z,{style:{minWidth:270},children:[(0,s.jsx)(z.Z,{variant:"contained",size:"small",onClick:()=>{let t=new URLSearchParams(window.location.search),i=t.get("evalId");window.location.href="/eval/?evalId=".concat(i,"&search=").concat(encodeURIComponent("(var=".concat(e.type,"|metric=").concat(e.type,")")))},children:"View logs"}),(0,s.jsx)(I.Z,{title:"Temporarily disabled while in beta, click to contact us to enable",children:(0,s.jsx)(z.Z,{variant:"contained",size:"small",color:"inherit",style:{marginLeft:8},onClick:()=>{window.location.href="mailto:inquiries@promptfoo.dev?subject=Promptfoo%20automatic%20vulnerability%20mitigation&body=Hello%20Promptfoo%20Team,%0D%0A%0D%0AI%20am%20interested%20in%20learning%20more%20about%20the%20automatic%20vulnerability%20mitigation%20beta.%20Please%20provide%20me%20with%20more%20details.%0D%0A%0D%0A"},children:"Apply mitigation"})})]})]},t)})})]}),a.length>c&&(0,s.jsx)(em.Z,{rowsPerPageOptions:[10,25,50],component:"div",count:a.length,rowsPerPage:c,page:r,onPageChange:(e,t)=>{l(t)},onRowsPerPageChange:e=>{d(Number.parseInt(e.target.value,10)),l(0)}})]})]})};i(67667);var ev=()=>{let[e,t]=n.useState(null),[i,a]=n.useState(null),[r,x]=n.useState(0),[y,j]=n.useState(!1),b=n.useMemo(()=>{let e={};return null==i||i.results.results.forEach(t=>{var i,a,r;let s=k[t.vars.harmCategory]||k[Object.keys(t.namedScores)[0]];if(!s){console.warn("Could not get failures for plugin ".concat(s));return}!s||(null===(i=t.gradingResult)||void 0===i?void 0:i.pass)||(e[s]||(e[s]=[]),e[s].push({prompt:(null===(a=t.vars.query)||void 0===a?void 0:a.toString())||t.prompt.raw,output:null===(r=t.response)||void 0===r?void 0:r.output}))}),e},[i]);if(n.useEffect(()=>{let e=async e=>{let t=await fetch("".concat(await (0,l.b)(),"/api/results/").concat(e),{cache:"no-store"}),i=await t.json();a(i.data)},i=new URLSearchParams(window.location.search);if(!i)return;let r=i.get("evalId");r&&(t(r),e(r))},[]),n.useEffect(()=>{document.title="Report: ".concat((null==i?void 0:i.config.description)||e||"Red Team"," | promptfoo")},[i,e]),!i||!e)return(0,s.jsx)(o.Z,{sx:{width:"100%",textAlign:"center"},children:"Loading..."});let Z=i.results.table.head.prompts,w=Z[r],C=i.results.table.body,R=i.results.results.reduce((e,t)=>{var i,a,r,s,n,l,o,c;let d=t.vars.harmCategory,u=(null===(a=t.gradingResult)||void 0===a?void 0:null===(i=a.componentResults)||void 0===i?void 0:i.map(e=>{var t;return null===(t=e.assertion)||void 0===t?void 0:t.metric}))||[],m=[d,...u].filter(e=>e);for(let i of m){if("string"!=typeof i)continue;let a=k[i.split("/")[0]];if(!a){console.log("Unknown harm category:",i);break}let d=null===(s=t.gradingResult)||void 0===s?void 0:null===(r=s.componentResults)||void 0===r?void 0:r.some(e=>{var t;let i=(null===(t=e.assertion)||void 0===t?void 0:t.type)==="moderation",a=e.pass;return i&&a}),u=null===(l=t.gradingResult)||void 0===l?void 0:null===(n=l.componentResults)||void 0===n?void 0:n.some(e=>{var t,i;let a=(null===(t=e.assertion)||void 0===t?void 0:t.type)==="llm-rubric"||(null===(i=e.assertion)||void 0===i?void 0:i.type.startsWith("promptfoo:redteam")),r=e.pass;return a&&r}),m=null===(c=t.gradingResult)||void 0===c?void 0:null===(o=c.componentResults)||void 0===o?void 0:o.some(e=>{var t;let i=(null===(t=e.assertion)||void 0===t?void 0:t.type)==="human",a=e.pass;return i&&a});e[a]=e[a]||{pass:0,total:0,passWithFilter:0},e[a].total++,u||m?(e[a].pass++,e[a].passWithFilter++):!d&&e[a].passWithFilter++}return e},{}),A=i.results.results.reduce((e,t)=>{var i,a,r,s;let n=(null===(a=t.gradingResult)||void 0===a?void 0:null===(i=a.componentResults)||void 0===i?void 0:i.map(e=>{var t;return null===(t=e.assertion)||void 0===t?void 0:t.metric}))||[];for(let i of n){if("string"!=typeof i)continue;let a=function(e){let t=e.split("/")[1];if(t){if("Iterative"===t)return"jailbreak";if("IterativeTree"===t)return"jailbreak:tree";if("Crescendo"===t)return"crescendo";if("Injection"===t)return"prompt-injection"}return null}(i);a||(a="basic"),e[a]||(e[a]={pass:0,total:0}),e[a].total++;let n=null===(s=t.gradingResult)||void 0===s?void 0:null===(r=s.componentResults)||void 0===r?void 0:r.some(e=>{var t;return(null===(t=e.assertion)||void 0===t?void 0:t.metric)===i&&e.pass});n&&e[a].pass++}return e},{}),N=()=>{Z.length>1&&j(!0)},I=e=>{x(e),j(!1)};return(0,s.jsxs)(u.Z,{children:[(0,s.jsxs)(f.Z,{spacing:4,pb:8,pt:2,children:[(0,s.jsxs)(c.Z,{className:"report-header",sx:{position:"relative"},children:[(0,s.jsxs)(o.Z,{sx:{position:"absolute",top:8,right:8,display:"flex"},children:[(0,s.jsx)(M,{evalDescription:i.config.description||e}),(0,s.jsx)(J,{})]}),(0,s.jsxs)(v.Z,{variant:"h4",children:[(0,s.jsx)("strong",{children:"LLM Risk Assessment"}),i.config.description&&": ".concat(i.config.description)]}),(0,s.jsx)(v.Z,{variant:"subtitle1",mb:2,children:new Date(i.createdAt).toLocaleDateString("en-US",{year:"numeric",month:"long",day:"numeric"})}),(0,s.jsxs)(o.Z,{className:"report-details",children:[(0,s.jsx)(d.Z,{size:"small",label:(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)("strong",{children:"Model:"})," ",w.provider]}),onClick:N,style:{cursor:Z.length>1?"pointer":"default"}}),(0,s.jsx)(d.Z,{size:"small",label:(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)("strong",{children:"Dataset:"})," ",C.length," probes"]})}),(0,s.jsx)(d.Z,{size:"small",label:(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)("strong",{children:"Prompt:"}),' "',w.raw.length>40?"".concat(w.raw.substring(0,40),"..."):w.raw,'"']}),onClick:N,style:{cursor:Z.length>1?"pointer":"default"}})]})]}),(0,s.jsx)(P,{categoryStats:R}),(0,s.jsx)(en,{strategyStats:A}),(0,s.jsx)(et,{categoryStats:R,evalId:e,failuresByPlugin:b}),(0,s.jsx)(ef,{evalId:e,categoryStats:R})]}),(0,s.jsx)(g.Z,{open:y,onClose:()=>j(!1),"aria-labelledby":"prompt-modal-title",sx:{"& .MuiModal-root":{display:"flex",alignItems:"center",justifyContent:"center"},"& .MuiBox-root":{width:"80%",maxWidth:800,maxHeight:"90vh",overflowY:"auto"}},children:(0,s.jsxs)(o.Z,{sx:{position:"absolute",top:"50%",left:"50%",transform:"translate(-50%, -50%)",width:400,bgcolor:"background.paper",boxShadow:24,p:4},children:[(0,s.jsx)(v.Z,{id:"prompt-modal-title",variant:"h6",component:"h2",gutterBottom:!0,children:"View results for..."}),(0,s.jsx)(m.Z,{children:Z.map((e,t)=>(0,s.jsx)(h.ZP,{button:!0,onClick:()=>I(t),selected:t===r,children:(0,s.jsx)(p.Z,{primary:"".concat(e.provider),secondary:e.raw.length>100?"".concat(e.raw.substring(0,100),"..."):e.raw})},t))})]})})]})}},4309:function(){},67667:function(){},46553:function(){},32991:function(){},11674:function(){},44097:function(){},93091:function(){}},function(e){e.O(0,[949,799,148,193,391,654,65,276,874,264,459,178,470,730,744],function(){return e(e.s=58004)}),_N_E=e.O()}]);
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{3680:function(e,n,t){Promise.resolve().then(t.t.bind(t,6020,23)),Promise.resolve().then(t.t.bind(t,83507,23)),Promise.resolve().then(t.t.bind(t,27883,23)),Promise.resolve().then(t.t.bind(t,27382,23)),Promise.resolve().then(t.t.bind(t,46785,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[470,730],function(){return n(11409),n(3680)}),_N_E=e.O()}]);
|
|
File without changes
|
|
File without changes
|