promptfoo 0.17.8 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/package.json +1 -1
- package/dist/src/assertions.d.ts.map +1 -1
- package/dist/src/assertions.js +97 -42
- package/dist/src/assertions.js.map +1 -1
- package/dist/src/evaluator.d.ts.map +1 -1
- package/dist/src/evaluator.js +43 -7
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +3 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/main.js +9 -0
- package/dist/src/main.js.map +1 -1
- package/dist/src/providers.d.ts +2 -2
- package/dist/src/providers.d.ts.map +1 -1
- package/dist/src/providers.js +15 -1
- package/dist/src/providers.js.map +1 -1
- package/dist/src/table.js +2 -2
- package/dist/src/table.js.map +1 -1
- package/dist/src/types.d.ts +15 -4
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/util.d.ts +3 -2
- package/dist/src/util.d.ts.map +1 -1
- package/dist/src/util.js +70 -18
- package/dist/src/util.js.map +1 -1
- package/dist/src/web/client/assets/{index-0c6f887d.js → index-8388d689.js} +1 -1
- package/dist/src/web/client/assets/{index-f9b230d1.css → index-d2b6a160.css} +1 -1
- package/dist/src/web/client/index.html +2 -2
- package/package.json +1 -1
- package/src/assertions.ts +102 -49
- package/src/evaluator.ts +42 -4
- package/src/index.ts +6 -1
- package/src/main.ts +14 -0
- package/src/providers.ts +22 -4
- package/src/table.ts +2 -2
- package/src/types.ts +29 -3
- package/src/util.ts +82 -17
- package/src/web/client/package-lock.json +5726 -0
- package/src/web/client/src/ResultsTable.css +11 -1
- package/src/web/client/src/ResultsTable.tsx +10 -0
- package/src/web/client/src/ResultsView.tsx +7 -1
- package/src/web/client/src/types.ts +4 -0
|
@@ -196,4 +196,4 @@ Error generating stack: `+i.message+`
|
|
|
196
196
|
*
|
|
197
197
|
* This source code is licensed under the MIT license found in the
|
|
198
198
|
* LICENSE file in the root directory of this source tree.
|
|
199
|
-
*/var Wu=S,qO=KO;function YO(e,t){return e===t&&(e!==0||1/e===1/t)||e!==e&&t!==t}var XO=typeof Object.is=="function"?Object.is:YO,QO=qO.useSyncExternalStore,ZO=Wu.useRef,JO=Wu.useEffect,eI=Wu.useMemo,tI=Wu.useDebugValue;Y1.useSyncExternalStoreWithSelector=function(e,t,n,r,o){var i=ZO(null);if(i.current===null){var s={hasValue:!1,value:null};i.current=s}else s=i.current;i=eI(function(){function a(m){if(!u){if(u=!0,c=m,m=r(m),o!==void 0&&s.hasValue){var y=s.value;if(o(y,m))return d=y}return d=m}if(y=d,XO(c,m))return y;var v=r(m);return o!==void 0&&o(y,v)?y:(c=m,d=v)}var u=!1,c,d,f=n===void 0?null:n;return[function(){return a(t())},f===null?void 0:function(){return a(f())}]},[t,n,r,o]);var l=QO(e,i[0],i[1]);return JO(function(){s.hasValue=!0,s.value=l},[l]),tI(l),l};q1.exports=Y1;var nI=q1.exports;const rI=af(nI),{useSyncExternalStoreWithSelector:oI}=rI;function iI(e,t=e.getState,n){const r=oI(e.subscribe,e.getState,e.getServerState||e.getState,t,n);return S.useDebugValue(r),r}const Fg=e=>{({BASE_URL:"/",MODE:"production",DEV:!1,PROD:!0,SSR:!1}&&"production")!=="production"&&typeof e!="function"&&console.warn("[DEPRECATED] Passing a vanilla store will be unsupported in a future version. Instead use `import { useStore } from 'zustand'`.");const t=typeof e=="function"?NO(e):e,n=(r,o)=>iI(t,r,o);return Object.assign(n,t),n},sI=e=>e?Fg(e):Fg;var lI=e=>(({BASE_URL:"/",MODE:"production",DEV:!1,PROD:!0,SSR:!1}&&"production")!=="production"&&console.warn("[DEPRECATED] Default export is deprecated. Instead use `import { create } from 'zustand'`."),sI(e));const Uu=lI(e=>({table:null,setTable:t=>e(()=>({table:t})),config:null,setConfig:t=>e(()=>({config:t}))}));function aI(e){return pe("MuiDialog",e)}const uI=fe("MuiDialog",["root","scrollPaper","scrollBody","container","paper","paperScrollPaper","paperScrollBody","paperWidthFalse","paperWidthXs","paperWidthSm","paperWidthMd","paperWidthLg","paperWidthXl","paperFullWidth","paperFullScreen"]),Vc=uI,cI=S.createContext({}),Z1=cI,dI=["aria-describedby","aria-labelledby","BackdropComponent","BackdropProps","children","className","disableEscapeKeyDown","fullScreen","fullWidth","maxWidth","onBackdropClick","onClose","open","PaperComponent","PaperProps","scroll","TransitionComponent","transitionDuration","TransitionProps"],fI=H($1,{name:"MuiDialog",slot:"Backdrop",overrides:(e,t)=>t.backdrop})({zIndex:-1}),pI=e=>{const{classes:t,scroll:n,maxWidth:r,fullWidth:o,fullScreen:i}=e,s={root:["root"],container:["container",`scroll${Y(n)}`],paper:["paper",`paperScroll${Y(n)}`,`paperWidth${Y(String(r))}`,o&&"paperFullWidth",i&&"paperFullScreen"]};return me(s,aI,t)},hI=H(E1,{name:"MuiDialog",slot:"Root",overridesResolver:(e,t)=>t.root})({"@media print":{position:"absolute !important"}}),mI=H("div",{name:"MuiDialog",slot:"Container",overridesResolver:(e,t)=>{const{ownerState:n}=e;return[t.container,t[`scroll${Y(n.scroll)}`]]}})(({ownerState:e})=>b({height:"100%","@media print":{height:"auto"},outline:0},e.scroll==="paper"&&{display:"flex",justifyContent:"center",alignItems:"center"},e.scroll==="body"&&{overflowY:"auto",overflowX:"hidden",textAlign:"center","&:after":{content:'""',display:"inline-block",verticalAlign:"middle",height:"100%",width:"0"}})),gI=H(Hu,{name:"MuiDialog",slot:"Paper",overridesResolver:(e,t)=>{const{ownerState:n}=e;return[t.paper,t[`scrollPaper${Y(n.scroll)}`],t[`paperWidth${Y(String(n.maxWidth))}`],n.fullWidth&&t.paperFullWidth,n.fullScreen&&t.paperFullScreen]}})(({theme:e,ownerState:t})=>b({margin:32,position:"relative",overflowY:"auto","@media print":{overflowY:"visible",boxShadow:"none"}},t.scroll==="paper"&&{display:"flex",flexDirection:"column",maxHeight:"calc(100% - 64px)"},t.scroll==="body"&&{display:"inline-block",verticalAlign:"middle",textAlign:"left"},!t.maxWidth&&{maxWidth:"calc(100% - 64px)"},t.maxWidth==="xs"&&{maxWidth:e.breakpoints.unit==="px"?Math.max(e.breakpoints.values.xs,444):`${e.breakpoints.values.xs}${e.breakpoints.unit}`,[`&.${Vc.paperScrollBody}`]:{[e.breakpoints.down(Math.max(e.breakpoints.values.xs,444)+32*2)]:{maxWidth:"calc(100% - 64px)"}}},t.maxWidth&&t.maxWidth!=="xs"&&{maxWidth:`${e.breakpoints.values[t.maxWidth]}${e.breakpoints.unit}`,[`&.${Vc.paperScrollBody}`]:{[e.breakpoints.down(e.breakpoints.values[t.maxWidth]+32*2)]:{maxWidth:"calc(100% - 64px)"}}},t.fullWidth&&{width:"calc(100% - 64px)"},t.fullScreen&&{margin:0,width:"100%",maxWidth:"100%",height:"100%",maxHeight:"none",borderRadius:0,[`&.${Vc.paperScrollBody}`]:{margin:0,maxWidth:"100%"}})),vI=S.forwardRef(function(t,n){const r=xe({props:t,name:"MuiDialog"}),o=ai(),i={enter:o.transitions.duration.enteringScreen,exit:o.transitions.duration.leavingScreen},{"aria-describedby":s,"aria-labelledby":l,BackdropComponent:a,BackdropProps:u,children:c,className:d,disableEscapeKeyDown:f=!1,fullScreen:m=!1,fullWidth:y=!1,maxWidth:v="sm",onBackdropClick:k,onClose:g,open:p,PaperComponent:h=Hu,PaperProps:x={},scroll:C="paper",TransitionComponent:R=R1,transitionDuration:$=i,TransitionProps:E}=r,M=X(r,dI),P=b({},r,{disableEscapeKeyDown:f,fullScreen:m,fullWidth:y,maxWidth:v,scroll:C}),I=pI(P),j=S.useRef(),D=N=>{j.current=N.target===N.currentTarget},A=N=>{j.current&&(j.current=null,k&&k(N),g&&g(N,"backdropClick"))},_=qa(l),O=S.useMemo(()=>({titleId:_}),[_]);return w.jsx(hI,b({className:J(I.root,d),closeAfterTransition:!0,components:{Backdrop:fI},componentsProps:{backdrop:b({transitionDuration:$,as:a},u)},disableEscapeKeyDown:f,onClose:g,open:p,ref:n,onClick:A,ownerState:P},M,{children:w.jsx(R,b({appear:!0,in:p,timeout:$,role:"presentation"},E,{children:w.jsx(mI,{className:J(I.container),onMouseDown:D,ownerState:P,children:w.jsx(gI,b({as:h,elevation:24,role:"dialog","aria-describedby":s,"aria-labelledby":_},x,{className:J(I.paper,x.className),ownerState:P,children:w.jsx(Z1.Provider,{value:O,children:c})}))})}))}))}),Gp=vI;function yI(e){return pe("MuiDialogActions",e)}fe("MuiDialogActions",["root","spacing"]);const xI=["className","disableSpacing"],SI=e=>{const{classes:t,disableSpacing:n}=e;return me({root:["root",!n&&"spacing"]},yI,t)},bI=H("div",{name:"MuiDialogActions",slot:"Root",overridesResolver:(e,t)=>{const{ownerState:n}=e;return[t.root,!n.disableSpacing&&t.spacing]}})(({ownerState:e})=>b({display:"flex",alignItems:"center",padding:8,justifyContent:"flex-end",flex:"0 0 auto"},!e.disableSpacing&&{"& > :not(:first-of-type)":{marginLeft:8}})),wI=S.forwardRef(function(t,n){const r=xe({props:t,name:"MuiDialogActions"}),{className:o,disableSpacing:i=!1}=r,s=X(r,xI),l=b({},r,{disableSpacing:i}),a=SI(l);return w.jsx(bI,b({className:J(a.root,o),ownerState:l,ref:n},s))}),Kp=wI;function CI(e){return pe("MuiDialogContent",e)}fe("MuiDialogContent",["root","dividers"]);function kI(e){return pe("MuiDialogTitle",e)}const RI=fe("MuiDialogTitle",["root"]),$I=RI,EI=["className","dividers"],PI=e=>{const{classes:t,dividers:n}=e;return me({root:["root",n&&"dividers"]},CI,t)},_I=H("div",{name:"MuiDialogContent",slot:"Root",overridesResolver:(e,t)=>{const{ownerState:n}=e;return[t.root,n.dividers&&t.dividers]}})(({theme:e,ownerState:t})=>b({flex:"1 1 auto",WebkitOverflowScrolling:"touch",overflowY:"auto",padding:"20px 24px"},t.dividers?{padding:"16px 24px",borderTop:`1px solid ${(e.vars||e).palette.divider}`,borderBottom:`1px solid ${(e.vars||e).palette.divider}`}:{[`.${$I.root} + &`]:{paddingTop:0}})),TI=S.forwardRef(function(t,n){const r=xe({props:t,name:"MuiDialogContent"}),{className:o,dividers:i=!1}=r,s=X(r,EI),l=b({},r,{dividers:i}),a=PI(l);return w.jsx(_I,b({className:J(a.root,o),ownerState:l,ref:n},s))}),_a=TI,MI=["className","id"],OI=e=>{const{classes:t}=e;return me({root:["root"]},kI,t)},II=H(yn,{name:"MuiDialogTitle",slot:"Root",overridesResolver:(e,t)=>t.root})({padding:"16px 24px",flex:"0 0 auto"}),AI=S.forwardRef(function(t,n){const r=xe({props:t,name:"MuiDialogTitle"}),{className:o,id:i}=r,s=X(r,MI),l=r,a=OI(l),{titleId:u=i}=S.useContext(Z1);return w.jsx(II,b({component:"h2",className:J(a.root,o),ownerState:l,ref:n,variant:"h6",id:i??u},s))}),Ta=AI;function FI(e){return pe("MuiIconButton",e)}const LI=fe("MuiIconButton",["root","disabled","colorInherit","colorPrimary","colorSecondary","colorError","colorInfo","colorSuccess","colorWarning","edgeStart","edgeEnd","sizeSmall","sizeMedium","sizeLarge"]),NI=LI,zI=["edge","children","className","color","disabled","disableFocusRipple","size"],DI=e=>{const{classes:t,disabled:n,color:r,edge:o,size:i}=e,s={root:["root",n&&"disabled",r!=="default"&&`color${Y(r)}`,o&&`edge${Y(o)}`,`size${Y(i)}`]};return me(s,FI,t)},jI=H(Eu,{name:"MuiIconButton",slot:"Root",overridesResolver:(e,t)=>{const{ownerState:n}=e;return[t.root,n.color!=="default"&&t[`color${Y(n.color)}`],n.edge&&t[`edge${Y(n.edge)}`],t[`size${Y(n.size)}`]]}})(({theme:e,ownerState:t})=>b({textAlign:"center",flex:"0 0 auto",fontSize:e.typography.pxToRem(24),padding:8,borderRadius:"50%",overflow:"visible",color:(e.vars||e).palette.action.active,transition:e.transitions.create("background-color",{duration:e.transitions.duration.shortest})},!t.disableRipple&&{"&:hover":{backgroundColor:e.vars?`rgba(${e.vars.palette.action.activeChannel} / ${e.vars.palette.action.hoverOpacity})`:ft(e.palette.action.active,e.palette.action.hoverOpacity),"@media (hover: none)":{backgroundColor:"transparent"}}},t.edge==="start"&&{marginLeft:t.size==="small"?-3:-12},t.edge==="end"&&{marginRight:t.size==="small"?-3:-12}),({theme:e,ownerState:t})=>{var n;const r=(n=(e.vars||e).palette)==null?void 0:n[t.color];return b({},t.color==="inherit"&&{color:"inherit"},t.color!=="inherit"&&t.color!=="default"&&b({color:r==null?void 0:r.main},!t.disableRipple&&{"&:hover":b({},r&&{backgroundColor:e.vars?`rgba(${r.mainChannel} / ${e.vars.palette.action.hoverOpacity})`:ft(r.main,e.palette.action.hoverOpacity)},{"@media (hover: none)":{backgroundColor:"transparent"}})}),t.size==="small"&&{padding:5,fontSize:e.typography.pxToRem(18)},t.size==="large"&&{padding:12,fontSize:e.typography.pxToRem(28)},{[`&.${NI.disabled}`]:{backgroundColor:"transparent",color:(e.vars||e).palette.action.disabled}})}),BI=S.forwardRef(function(t,n){const r=xe({props:t,name:"MuiIconButton"}),{edge:o=!1,children:i,className:s,color:l="default",disabled:a=!1,disableFocusRipple:u=!1,size:c="medium"}=r,d=X(r,zI),f=b({},r,{edge:o,color:l,disabled:a,disableFocusRipple:u,size:c}),m=DI(f);return w.jsx(jI,b({className:J(m.root,s),centerRipple:!0,focusRipple:!u,disabled:a,ref:n,ownerState:f},d,{children:i}))}),qp=BI;var Yp={},VI=oo;Object.defineProperty(Yp,"__esModule",{value:!0});var J1=Yp.default=void 0,HI=VI(io()),WI=w,UI=(0,HI.default)((0,WI.jsx)("path",{d:"M16 1H4c-1.1 0-2 .9-2 2v14h2V3h12V1zm3 4H8c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h11c1.1 0 2-.9 2-2V7c0-1.1-.9-2-2-2zm0 16H8V7h11v14z"}),"ContentCopy");J1=Yp.default=UI;var Xp={},GI=oo;Object.defineProperty(Xp,"__esModule",{value:!0});var Qp=Xp.default=void 0,KI=GI(io()),qI=w,YI=(0,KI.default)((0,qI.jsx)("path",{d:"M9 16.17 4.83 12l-1.42 1.41L9 19 21 7l-1.41-1.41z"}),"Check");Qp=Xp.default=YI;function ex({open:e,onClose:t,prompt:n,output:r}){const[o,i]=S.useState(!1);S.useEffect(()=>{i(!1)},[n]);const s=async l=>{await navigator.clipboard.writeText(l),i(!0)};return w.jsxs(Gp,{open:e,onClose:t,fullWidth:!0,maxWidth:"lg",children:[w.jsx(Ta,{children:"Prompt"}),w.jsxs(_a,{children:[w.jsx(Xd,{readOnly:!0,value:n,style:{width:"100%",padding:"0.75rem"}}),w.jsx(qp,{onClick:()=>s(n),style:{position:"absolute",right:"10px",top:"10px"},children:o?w.jsx(Qp,{}):w.jsx(J1,{})})]}),r&&w.jsxs(w.Fragment,{children:[w.jsx(Ta,{children:"Output"}),w.jsx(_a,{children:w.jsx(Xd,{readOnly:!0,value:r,style:{width:"100%",padding:"0.75rem"}})})]}),w.jsx(Kp,{children:w.jsx(_s,{onClick:t,children:"Close"})})]})}function XI(e){if(typeof e=="string"){const t=e.startsWith("[PASS]");let n=e;return(e.startsWith("[PASS]")||e.startsWith("[FAIL]"))&&(n=n.slice(6)),{text:n,pass:t,score:t?1:0}}return e}function Lg(e){return e===0||e===1?"":`(${e.toFixed(2)})`}function Zp({text:e,maxLength:t}){const[n,r]=S.useState(!0);let o=typeof e=="string"?e:JSON.stringify(e);o=o.replace(/\n/g,"<br>");const i=()=>{r(!n)};return(()=>o.length<=t?w.jsx("span",{dangerouslySetInnerHTML:{__html:o}}):n?w.jsxs("span",{style:{cursor:"pointer"},onClick:i,children:[w.jsx("span",{dangerouslySetInnerHTML:{__html:o.substring(0,t)}})," ..."]}):w.jsx("span",{style:{cursor:"pointer"},onClick:i,children:w.jsx("span",{dangerouslySetInnerHTML:{__html:o}})}))()}function QI({output:e,maxTextLength:t,rowIndex:n,promptIndex:r,onRating:o}){const[i,s]=S.useState(!1),l=()=>{s(!0)},a=()=>{s(!1)};let u=typeof e.text=="string"?e.text:JSON.stringify(e.text),c=[];!e.pass&&u.includes("---")&&(c=u.split("---"),u=c.slice(1).join("---"));const d=f=>{o(n,r,f)};return w.jsxs(w.Fragment,{children:[w.jsxs("div",{className:"cell",children:[e.pass&&w.jsxs("div",{className:"status pass",children:["PASS ",w.jsx("span",{className:"score",children:Lg(e.score)})]}),!e.pass&&w.jsxs("div",{className:"status fail",children:["[FAIL",w.jsx("span",{className:"score",children:Lg(e.score)}),"] ",c[0]]})," ",w.jsx(Zp,{text:u,maxLength:t})]}),w.jsxs("div",{className:"cell-actions",children:[e.prompt&&w.jsxs(w.Fragment,{children:[w.jsx("span",{className:"action",onClick:l,children:"🔎"}),w.jsx(ex,{open:i,onClose:a,prompt:e.prompt,output:u})]}),w.jsx("span",{className:"action",onClick:()=>d(!0),children:"👍"}),w.jsx("span",{className:"action",onClick:()=>d(!1),children:"👎"})]})]})}function Ng({text:e,maxLength:t,smallText:n,expandedText:r}){const[o,i]=S.useState(!1),s=()=>{i(!0)},l=()=>{i(!1)};return w.jsxs("div",{children:[w.jsx(Zp,{text:e,maxLength:t}),r&&w.jsxs(w.Fragment,{children:[w.jsx("span",{className:"action",onClick:s,children:"🔎"}),w.jsx(ex,{open:o,onClose:l,prompt:r})]}),w.jsx("div",{className:"smalltext",children:n})]})}function ZI({maxTextLength:e,columnVisibility:t,wordBreak:n,filterMode:r,failureFilter:o,onFailureFilterToggle:i}){const{table:s,setTable:l}=Uu();e1(s,"Table should be defined");const{head:a,body:u}=s,c=a.prompts.map((p,h)=>u.reduce((x,C)=>x+(C.outputs[h].pass?1:0),0)),d=(p,h,x)=>{const C=[...u],R={...C[p]},$=[...R.outputs];$[h].pass=x,$[h].score=x?1:0,R.outputs=$,C[p]=R,l({head:a,body:C})},f=c.reduce((p,h,x,C)=>h>C[p]?x:p,0),m=c[f],y=MO(),v=[y.group({id:"vars",header:()=>w.jsx("span",{children:"Variables"}),columns:a.vars.map((p,h)=>y.accessor(x=>x.vars[h],{id:`Variable ${h+1}`,header:()=>w.jsx(Ng,{smallText:`Variable ${h+1}`,text:p,maxLength:e}),cell:x=>w.jsx(Zp,{text:x.getValue(),maxLength:e}),size:50}))}),y.group({id:"prompts",header:()=>w.jsx("span",{children:"Outputs"}),columns:a.prompts.map((p,h)=>y.accessor(x=>XI(x.outputs[h]),{id:`Prompt ${h+1}`,header:()=>{const x=(c[h]/u.length*100).toFixed(2),C=c[h]===m&&m!==0,R=`Prompt ${h+1}`,$=o[R]||!1;return w.jsxs(w.Fragment,{children:[w.jsx(Ng,{smallText:`Prompt ${h+1}`,text:typeof p=="string"?p:p.display,expandedText:typeof p=="string"?void 0:p.raw,maxLength:e}),r==="failures"&&w.jsx(x1,{sx:{"& .MuiFormControlLabel-label":{fontSize:"0.75rem"}},control:w.jsx(ef,{checked:$,onChange:E=>i(R,E.target.checked)}),label:"Show failures"}),w.jsxs("div",{className:`summary ${C?"highlight":""}`,children:["Passing: ",w.jsxs("strong",{children:[x,"%"]})," (",c[h]," / ",u.length,")"]})]})},cell:x=>w.jsx(QI,{output:x.getValue(),maxTextLength:e,rowIndex:x.row.index,promptIndex:h,onRating:d})}))})],k=S.useMemo(()=>r==="failures"?Object.values(o).every(p=>!p)?u:u.filter(p=>p.outputs.some((h,x)=>{const C=`Prompt ${x+1}`,R=!h.pass;return o[C]&&R})):r==="different"?u.filter(p=>!p.outputs.every(h=>h.text===p.outputs[0].text)):u,[u,o,r]),g=LO({data:k,columns:v,columnResizeMode:"onChange",getCoreRowModel:OO(),state:{columnVisibility:t}});return w.jsxs("table",{style:{wordBreak:n},children:[w.jsx("thead",{children:g.getHeaderGroups().map(p=>w.jsx("tr",{className:"header",children:p.headers.map(h=>w.jsxs("th",{key:h.id,colSpan:h.colSpan,style:{width:h.getSize()},children:[h.isPlaceholder?null:Ig(h.column.columnDef.header,h.getContext()),w.jsx("div",{onMouseDown:h.getResizeHandler(),onTouchStart:h.getResizeHandler(),className:`resizer ${h.column.getIsResizing()?"isResizing":""}`})]}))},p.id))}),w.jsx("tbody",{children:g.getRowModel().rows.map((p,h)=>{let x=!1;return w.jsx("tr",{children:p.getVisibleCells().map(C=>{const R=C.column.id.startsWith("Variable"),$=!R&&!x;$&&(x=!0);const E=h===0&&!R;return w.jsx("td",{key:C.id,style:{width:C.column.getSize()},className:`${R?"variable":""} ${E?"first-prompt-row":""} ${$?"first-prompt-col":""}`,children:Ig(C.column.columnDef.cell,C.getContext())})})},p.id)})})]})}const JI="modulepreload",eA=function(e){return"/"+e},zg={},tA=function(t,n,r){if(!n||n.length===0)return t();const o=document.getElementsByTagName("link");return Promise.all(n.map(i=>{if(i=eA(i),i in zg)return;zg[i]=!0;const s=i.endsWith(".css"),l=s?'[rel="stylesheet"]':"";if(!!r)for(let c=o.length-1;c>=0;c--){const d=o[c];if(d.href===i&&(!s||d.rel==="stylesheet"))return}else if(document.querySelector(`link[href="${i}"]${l}`))return;const u=document.createElement("link");if(u.rel=s?"stylesheet":JI,s||(u.as="script",u.crossOrigin=""),u.href=i,document.head.appendChild(u),s)return new Promise((c,d)=>{u.addEventListener("load",c),u.addEventListener("error",()=>d(new Error(`Unable to preload CSS for ${i}`)))})})).then(()=>t())};function nA(e){return pe("MuiDialogContentText",e)}fe("MuiDialogContentText",["root"]);const rA=["children","className"],oA=e=>{const{classes:t}=e,r=me({root:["root"]},nA,t);return b({},t,r)},iA=H(yn,{shouldForwardProp:e=>an(e)||e==="classes",name:"MuiDialogContentText",slot:"Root",overridesResolver:(e,t)=>t.root})({}),sA=S.forwardRef(function(t,n){const r=xe({props:t,name:"MuiDialogContentText"}),{className:o}=r,i=X(r,rA),s=oA(i);return w.jsx(iA,b({component:"p",variant:"body1",color:"text.secondary",ref:n,ownerState:i,className:J(s.root,o)},r,{classes:s}))}),lA=sA;function aA(e){return pe("MuiFormHelperText",e)}const uA=fe("MuiFormHelperText",["root","error","disabled","sizeSmall","sizeMedium","contained","focused","filled","required"]),Dg=uA;var jg;const cA=["children","className","component","disabled","error","filled","focused","margin","required","variant"],dA=e=>{const{classes:t,contained:n,size:r,disabled:o,error:i,filled:s,focused:l,required:a}=e,u={root:["root",o&&"disabled",i&&"error",r&&`size${Y(r)}`,n&&"contained",l&&"focused",s&&"filled",a&&"required"]};return me(u,aA,t)},fA=H("p",{name:"MuiFormHelperText",slot:"Root",overridesResolver:(e,t)=>{const{ownerState:n}=e;return[t.root,n.size&&t[`size${Y(n.size)}`],n.contained&&t.contained,n.filled&&t.filled]}})(({theme:e,ownerState:t})=>b({color:(e.vars||e).palette.text.secondary},e.typography.caption,{textAlign:"left",marginTop:3,marginRight:0,marginBottom:0,marginLeft:0,[`&.${Dg.disabled}`]:{color:(e.vars||e).palette.text.disabled},[`&.${Dg.error}`]:{color:(e.vars||e).palette.error.main}},t.size==="small"&&{marginTop:4},t.contained&&{marginLeft:14,marginRight:14})),pA=S.forwardRef(function(t,n){const r=xe({props:t,name:"MuiFormHelperText"}),{children:o,className:i,component:s="p"}=r,l=X(r,cA),a=_r(),u=ro({props:r,muiFormControl:a,states:["variant","size","disabled","error","filled","focused","required"]}),c=b({},r,{component:s,contained:u.variant==="filled"||u.variant==="outlined",variant:u.variant,size:u.size,disabled:u.disabled,error:u.error,filled:u.filled,focused:u.focused,required:u.required}),d=dA(c);return w.jsx(fA,b({as:s,ownerState:c,className:J(d.root,i),ref:n},l,{children:o===" "?jg||(jg=w.jsx("span",{className:"notranslate",children:""})):o}))}),hA=pA;function mA(e){return pe("MuiTextField",e)}fe("MuiTextField",["root"]);const gA=["autoComplete","autoFocus","children","className","color","defaultValue","disabled","error","FormHelperTextProps","fullWidth","helperText","id","InputLabelProps","inputProps","InputProps","inputRef","label","maxRows","minRows","multiline","name","onBlur","onChange","onClick","onFocus","placeholder","required","rows","select","SelectProps","type","value","variant"],vA={standard:M1,filled:I1,outlined:Lp},yA=e=>{const{classes:t}=e;return me({root:["root"]},mA,t)},xA=H(Wl,{name:"MuiTextField",slot:"Root",overridesResolver:(e,t)=>t.root})({}),SA=S.forwardRef(function(t,n){const r=xe({props:t,name:"MuiTextField"}),{autoComplete:o,autoFocus:i=!1,children:s,className:l,color:a="primary",defaultValue:u,disabled:c=!1,error:d=!1,FormHelperTextProps:f,fullWidth:m=!1,helperText:y,id:v,InputLabelProps:k,inputProps:g,InputProps:p,inputRef:h,label:x,maxRows:C,minRows:R,multiline:$=!1,name:E,onBlur:M,onChange:P,onClick:I,onFocus:j,placeholder:D,required:A=!1,rows:_,select:O=!1,SelectProps:N,type:T,value:L,variant:F="outlined"}=r,q=X(r,gA),G=b({},r,{autoFocus:i,color:a,disabled:c,error:d,fullWidth:m,multiline:$,required:A,select:O,variant:F}),oe=yA(G),te={};F==="outlined"&&(k&&typeof k.shrink<"u"&&(te.notched=k.shrink),te.label=x),O&&((!N||!N.native)&&(te.id=void 0),te["aria-describedby"]=void 0);const Q=qa(v),se=y&&Q?`${Q}-helper-text`:void 0,Ce=x&&Q?`${Q}-label`:void 0,Se=vA[F],ae=w.jsx(Se,b({"aria-describedby":se,autoComplete:o,autoFocus:i,defaultValue:u,fullWidth:m,multiline:$,name:E,rows:_,maxRows:C,minRows:R,type:T,value:L,id:Q,inputRef:h,onBlur:M,onChange:P,onFocus:j,onClick:I,placeholder:D,inputProps:g},te,p));return w.jsxs(xA,b({className:J(oe.root,l),disabled:c,error:d,fullWidth:m,ref:n,required:A,color:a,variant:F,ownerState:G},q,{children:[x!=null&&x!==""&&w.jsx(Ul,b({htmlFor:Q,id:Ce},k,{children:x})),O?w.jsx(Gl,b({"aria-describedby":se,id:Q,labelId:Ce,value:L,input:ae},N,{children:s})):ae,y&&w.jsx(hA,b({id:se},f,{children:y}))]}))}),bA=SA,wA=no(w.jsx("path",{d:"M9 16.17 4.83 12l-1.42 1.41L9 19 21 7l-1.41-1.41z"}),"Check"),CA=no(w.jsx("path",{d:"M16 1H4c-1.1 0-2 .9-2 2v14h2V3h12V1zm-1 4 6 6v10c0 1.1-.9 2-2 2H7.99C6.89 23 6 22.1 6 21l.01-14c0-1.1.89-2 1.99-2h7zm-1 7h5.5L14 6.5V12z"}),"FileCopy");function kA({open:e,onClose:t}){const{config:n}=Uu(),r=wt.useRef(null),[o,i]=wt.useState(!1),[s,l]=wt.useState("");wt.useEffect(()=>{e&&(async()=>{const{default:c}=await tA(()=>import("./js-yaml-8bbf9398.js"),[]);l(c.dump(n))})()},[e,n]);const a=()=>{r.current&&(r.current.select(),document.execCommand("copy"),i(!0))},u=()=>{i(!1),t()};return w.jsxs(Gp,{open:e,onClose:u,"aria-labelledby":"config-dialog-title",maxWidth:"md",fullWidth:!0,children:[w.jsx(Ta,{id:"config-dialog-title",children:w.jsxs(En,{display:"flex",justifyContent:"space-between",alignItems:"center",children:[w.jsx(yn,{variant:"h6",children:"Config"}),w.jsx(qp,{onClick:a,children:o?w.jsx(wA,{}):w.jsx(CA,{})})]})}),w.jsx(_a,{children:w.jsx(yn,{variant:"body1",component:"div",children:w.jsx("textarea",{ref:r,readOnly:!0,value:s,style:{width:"100%",minHeight:"400px",fontFamily:"monospace",border:"1px solid #ccc"}})})}),w.jsx(Kp,{children:w.jsx(_s,{onClick:u,color:"primary",children:"Close"})})]})}var Jp={},RA=oo;Object.defineProperty(Jp,"__esModule",{value:!0});var tx=Jp.default=void 0,$A=RA(io()),EA=w,PA=(0,$A.default)((0,EA.jsx)("path",{d:"M16 1H4c-1.1 0-2 .9-2 2v14h2V3h12V1zm-1 4 6 6v10c0 1.1-.9 2-2 2H7.99C6.89 23 6 22.1 6 21l.01-14c0-1.1.89-2 1.99-2h7zm-1 7h5.5L14 6.5V12z"}),"FileCopy");tx=Jp.default=PA;const _A=({open:e,onClose:t,shareUrl:n})=>{const r=S.useRef(null),[o,i]=S.useState(!1),s=()=>{r.current&&(r.current.select(),document.execCommand("copy"),i(!0))},l=()=>{t(),i(!1)};return w.jsxs(Gp,{open:e,onClose:l,PaperProps:{style:{minWidth:"min(660px, 100%)"}},children:[w.jsx(Ta,{children:"Your eval is ready to share"}),w.jsxs(_a,{children:[w.jsx(bA,{inputRef:r,value:n,fullWidth:!0,InputProps:{readOnly:!0,endAdornment:w.jsx(qp,{onClick:s,children:o?w.jsx(Qp,{}):w.jsx(tx,{})})}}),w.jsx(lA,{sx:{fontSize:"0.75rem"},children:"Shared URLs are deleted after 1 week."})]}),w.jsx(Kp,{children:w.jsx(_s,{onClick:l,color:"primary",children:"Close"})})]})},Bg=Ty(EM)(({theme:e})=>({maxWidth:"100%",flexWrap:"wrap",[e.breakpoints.down("sm")]:{flexDirection:"column"}}));function TA(e){const t=e.slice(5,e.length-5);return new Date(t).toLocaleDateString("en-US",{year:"numeric",month:"long",day:"numeric",hour:"2-digit",minute:"2-digit",second:"2-digit",timeZoneName:"short"})}function MA({recentFiles:e,onRecentFileSelected:t}){const{table:n,config:r}=Uu(),[o,i]=S.useState(250),[s,l]=S.useState({}),[a,u]=S.useState([]),[c,d]=S.useState({}),f=(_,O)=>{d(N=>({...N,[_]:O}))},[m,y]=S.useState("all"),v=_=>{const O=_.target.value;y(O);const N={};j.prompts.forEach((T,L)=>{const F=`Prompt ${L+1}`;N[F]=O==="failures"}),d(N)},[k,g]=S.useState("break-all"),p=_=>{g(_.target.checked?"break-all":"break-word")},[h,x]=S.useState(!1),[C,R]=S.useState(""),[$,E]=S.useState(!1),M=async()=>{E(!0);try{const _=await fetch("https://api.promptfoo.dev/eval",{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({data:{version:1,results:{table:n},config:r}})}),{id:O}=await _.json(),N=`https://app.promptfoo.dev/eval/${O}`;R(N),x(!0)}catch{alert("Sorry, something went wrong.")}finally{E(!1)}},[P,I]=S.useState(!1);e1(n,"Table data must be loaded before rendering ResultsView");const{head:j}=n,D=_=>{const{target:{value:O}}=_;u(typeof O=="string"?O.split(","):O);const N=[...j.vars.map((L,F)=>`Variable ${F+1}`),...j.prompts.map((L,F)=>`Prompt ${F+1}`)],T={};N.forEach(L=>{T[L]=(typeof O=="string"?O.split(","):O).includes(L)}),l(T)},A=[...j.vars.map((_,O)=>({value:`Variable ${O+1}`,label:`Variable ${O+1}`,group:"Variables"})),...j.prompts.map((_,O)=>({value:`Prompt ${O+1}`,label:`Prompt ${O+1}`,group:"Prompts"}))];return S.useEffect(()=>{u([...j.vars.map((_,O)=>`Variable ${O+1}`),...j.prompts.map((_,O)=>`Prompt ${O+1}`)])},[j]),w.jsxs("div",{children:[w.jsx(En,{py:"md",children:w.jsxs(Bg,{direction:"row",spacing:4,alignItems:"center",children:[w.jsx(En,{children:e&&e.length>0&&w.jsxs(Wl,{sx:{m:1,minWidth:200},size:"small",children:[w.jsx(Ul,{children:"View run"}),w.jsx(Gl,{className:"recent-files",label:"Previous runs",defaultValue:e[0],onChange:_=>t(_.target.value),children:e.map(_=>w.jsx(Mi,{value:_,children:TA(_)},_))},e.join(","))]})}),w.jsx(En,{children:w.jsxs(Wl,{sx:{m:1,minWidth:200},size:"small",children:[w.jsx(Ul,{id:"visible-columns-label",children:"Show columns"}),w.jsx(Gl,{labelId:"visible-columns-label",id:"visible-columns",multiple:!0,value:a,onChange:D,input:w.jsx(Lp,{label:"Visible columns"}),renderValue:_=>_.join(", "),children:A.map(_=>w.jsxs(Mi,{dense:!0,value:_.value,children:[w.jsx(ef,{checked:a.indexOf(_.value)>-1}),w.jsx(r_,{primary:_.label})]},_.value))})]})}),w.jsx(En,{children:w.jsxs(Wl,{sx:{minWidth:180},size:"small",children:[w.jsx(Ul,{id:"failure-filter-mode-label",children:"Filter"}),w.jsxs(Gl,{labelId:"filter-mode-label",id:"filter-mode",value:m,onChange:v,label:"Filter",children:[w.jsx(Mi,{value:"all",children:"Show all results"}),w.jsx(Mi,{value:"failures",children:"Show failures only"}),w.jsx(Mi,{value:"different",children:"Show different only"})]})]})}),w.jsxs(En,{children:[w.jsxs(yn,{mt:2,children:["Max text length: ",o]}),w.jsx(RM,{min:25,max:1e3,value:o,onChange:(_,O)=>i(O)})]}),w.jsx(En,{children:w.jsx(Ic,{title:"Forcing line breaks makes it easier to adjust column widths to your liking",children:w.jsx(x1,{control:w.jsx(ef,{checked:k==="break-all",onChange:p}),label:"Force line breaks"})})}),w.jsx(En,{flexGrow:1}),w.jsx(En,{display:"flex",justifyContent:"flex-end",children:w.jsxs(Bg,{direction:"row",spacing:2,children:[r&&w.jsx(Ic,{title:"View config",children:w.jsx(_s,{color:"primary",onClick:()=>I(!0),startIcon:w.jsx(z1,{}),children:"Config"})}),(r==null?void 0:r.sharing)&&w.jsx(Ic,{title:"Generate a unique URL that others can access",children:w.jsx(_s,{color:"primary",onClick:M,disabled:$,startIcon:$?w.jsx(g2,{size:16}):w.jsx(N1,{}),children:"Share"})})]})})]})}),w.jsx(ZI,{maxTextLength:o,columnVisibility:s,wordBreak:k,filterMode:m,failureFilter:c,onFailureFilterToggle:f}),w.jsx(kA,{open:P,onClose:()=>I(!1)}),w.jsx(_A,{open:h,onClose:()=>x(!1),shareUrl:C})]})}function OA(){return w.jsxs(En,{className:"logo",children:[w.jsx("img",{src:"/logo.svg",alt:"Promptfoo logo"})," ",w.jsx("span",{children:"promptfoo"})]})}var eh={},IA=oo;Object.defineProperty(eh,"__esModule",{value:!0});var nx=eh.default=void 0,AA=IA(io()),FA=w,LA=(0,AA.default)((0,FA.jsx)("path",{d:"M12 3c-4.97 0-9 4.03-9 9s4.03 9 9 9 9-4.03 9-9c0-.46-.04-.92-.1-1.36-.98 1.37-2.58 2.26-4.4 2.26-2.98 0-5.4-2.42-5.4-5.4 0-1.81.89-3.42 2.26-4.4-.44-.06-.9-.1-1.36-.1z"}),"DarkMode");nx=eh.default=LA;var th={},NA=oo;Object.defineProperty(th,"__esModule",{value:!0});var rx=th.default=void 0,zA=NA(io()),DA=w,jA=(0,zA.default)((0,DA.jsx)("path",{d:"M12 7c-2.76 0-5 2.24-5 5s2.24 5 5 5 5-2.24 5-5-2.24-5-5-5zM2 13h2c.55 0 1-.45 1-1s-.45-1-1-1H2c-.55 0-1 .45-1 1s.45 1 1 1zm18 0h2c.55 0 1-.45 1-1s-.45-1-1-1h-2c-.55 0-1 .45-1 1s.45 1 1 1zM11 2v2c0 .55.45 1 1 1s1-.45 1-1V2c0-.55-.45-1-1-1s-1 .45-1 1zm0 18v2c0 .55.45 1 1 1s1-.45 1-1v-2c0-.55-.45-1-1-1s-1 .45-1 1zM5.99 4.58c-.39-.39-1.03-.39-1.41 0-.39.39-.39 1.03 0 1.41l1.06 1.06c.39.39 1.03.39 1.41 0s.39-1.03 0-1.41L5.99 4.58zm12.37 12.37c-.39-.39-1.03-.39-1.41 0-.39.39-.39 1.03 0 1.41l1.06 1.06c.39.39 1.03.39 1.41 0 .39-.39.39-1.03 0-1.41l-1.06-1.06zm1.06-10.96c.39-.39.39-1.03 0-1.41-.39-.39-1.03-.39-1.41 0l-1.06 1.06c-.39.39-.39 1.03 0 1.41s1.03.39 1.41 0l1.06-1.06zM7.05 18.36c.39-.39.39-1.03 0-1.41-.39-.39-1.03-.39-1.41 0l-1.06 1.06c-.39.39-.39 1.03 0 1.41s1.03.39 1.41 0l1.06-1.06z"}),"LightMode");rx=th.default=jA;function BA({darkMode:e,onToggleDarkMode:t}){return w.jsxs("nav",{children:[w.jsx(OA,{}),w.jsx("div",{className:"dark-mode-toggle",onClick:t,children:e?w.jsx(nx,{}):w.jsx(rx,{})})]})}function VA(){const{table:e,setTable:t,setConfig:n}=Uu(),[r,o]=S.useState(!1),i=S.useRef(!1),[s,l]=S.useState([]),a=Rk("(prefers-color-scheme: dark)"),[u,c]=S.useState(a),d=S.useMemo(()=>bp({palette:{mode:u?"dark":"light"}}),[u]),f=()=>{c(!u),u?document.documentElement.removeAttribute("data-theme"):document.documentElement.setAttribute("data-theme","dark")};S.useEffect(()=>{a&&document.documentElement.setAttribute("data-theme","dark")},[a]);const m=async()=>{if(!window.location.href.includes("localhost"))return;const k=await(await fetch("http://localhost:15500/results")).json();l(k.data)},y=async v=>{const g=await(await fetch(`http://localhost:15500/results/${v}`)).json();t(g.data.results.table),n(g.data.config)};return S.useEffect(()=>{const v=async p=>{var C;if(i.current)return;i.current=!0;const x=await(await fetch(`https://api.promptfoo.dev/eval/${p}`)).json();t(((C=x.data.results)==null?void 0:C.table)||x.data.table),n(x.data.config),o(!0)},k=Vl("http://localhost:15500"),g=window.location.pathname.match(/\/eval\/([\w:-]+)/);if(g){const p=g[1];v(p)}else k.on("init",p=>{console.log("Initialized socket connection",p),o(!0),t(p.results.table),n(p.config),m()}),k.on("update",p=>{console.log("Received data update",p),t(p.results.table),n(p.config),m()});return()=>{k.disconnect()}},[t,n]),w.jsxs(uR,{theme:d,children:[w.jsx(BA,{darkMode:u,onToggleDarkMode:f}),r&&e?w.jsx(MA,{recentFiles:s,onRecentFileSelected:y}):w.jsx("div",{children:"Loading..."})]})}Hc.createRoot(document.getElementById("root")).render(w.jsx(wt.StrictMode,{children:w.jsx(VA,{})}));
|
|
199
|
+
*/var Wu=S,qO=KO;function YO(e,t){return e===t&&(e!==0||1/e===1/t)||e!==e&&t!==t}var XO=typeof Object.is=="function"?Object.is:YO,QO=qO.useSyncExternalStore,ZO=Wu.useRef,JO=Wu.useEffect,eI=Wu.useMemo,tI=Wu.useDebugValue;Y1.useSyncExternalStoreWithSelector=function(e,t,n,r,o){var i=ZO(null);if(i.current===null){var s={hasValue:!1,value:null};i.current=s}else s=i.current;i=eI(function(){function a(m){if(!u){if(u=!0,c=m,m=r(m),o!==void 0&&s.hasValue){var y=s.value;if(o(y,m))return d=y}return d=m}if(y=d,XO(c,m))return y;var v=r(m);return o!==void 0&&o(y,v)?y:(c=m,d=v)}var u=!1,c,d,f=n===void 0?null:n;return[function(){return a(t())},f===null?void 0:function(){return a(f())}]},[t,n,r,o]);var l=QO(e,i[0],i[1]);return JO(function(){s.hasValue=!0,s.value=l},[l]),tI(l),l};q1.exports=Y1;var nI=q1.exports;const rI=af(nI),{useSyncExternalStoreWithSelector:oI}=rI;function iI(e,t=e.getState,n){const r=oI(e.subscribe,e.getState,e.getServerState||e.getState,t,n);return S.useDebugValue(r),r}const Fg=e=>{({BASE_URL:"/",MODE:"production",DEV:!1,PROD:!0,SSR:!1}&&"production")!=="production"&&typeof e!="function"&&console.warn("[DEPRECATED] Passing a vanilla store will be unsupported in a future version. Instead use `import { useStore } from 'zustand'`.");const t=typeof e=="function"?NO(e):e,n=(r,o)=>iI(t,r,o);return Object.assign(n,t),n},sI=e=>e?Fg(e):Fg;var lI=e=>(({BASE_URL:"/",MODE:"production",DEV:!1,PROD:!0,SSR:!1}&&"production")!=="production"&&console.warn("[DEPRECATED] Default export is deprecated. Instead use `import { create } from 'zustand'`."),sI(e));const Uu=lI(e=>({table:null,setTable:t=>e(()=>({table:t})),config:null,setConfig:t=>e(()=>({config:t}))}));function aI(e){return pe("MuiDialog",e)}const uI=fe("MuiDialog",["root","scrollPaper","scrollBody","container","paper","paperScrollPaper","paperScrollBody","paperWidthFalse","paperWidthXs","paperWidthSm","paperWidthMd","paperWidthLg","paperWidthXl","paperFullWidth","paperFullScreen"]),Vc=uI,cI=S.createContext({}),Z1=cI,dI=["aria-describedby","aria-labelledby","BackdropComponent","BackdropProps","children","className","disableEscapeKeyDown","fullScreen","fullWidth","maxWidth","onBackdropClick","onClose","open","PaperComponent","PaperProps","scroll","TransitionComponent","transitionDuration","TransitionProps"],fI=H($1,{name:"MuiDialog",slot:"Backdrop",overrides:(e,t)=>t.backdrop})({zIndex:-1}),pI=e=>{const{classes:t,scroll:n,maxWidth:r,fullWidth:o,fullScreen:i}=e,s={root:["root"],container:["container",`scroll${Y(n)}`],paper:["paper",`paperScroll${Y(n)}`,`paperWidth${Y(String(r))}`,o&&"paperFullWidth",i&&"paperFullScreen"]};return me(s,aI,t)},hI=H(E1,{name:"MuiDialog",slot:"Root",overridesResolver:(e,t)=>t.root})({"@media print":{position:"absolute !important"}}),mI=H("div",{name:"MuiDialog",slot:"Container",overridesResolver:(e,t)=>{const{ownerState:n}=e;return[t.container,t[`scroll${Y(n.scroll)}`]]}})(({ownerState:e})=>b({height:"100%","@media print":{height:"auto"},outline:0},e.scroll==="paper"&&{display:"flex",justifyContent:"center",alignItems:"center"},e.scroll==="body"&&{overflowY:"auto",overflowX:"hidden",textAlign:"center","&:after":{content:'""',display:"inline-block",verticalAlign:"middle",height:"100%",width:"0"}})),gI=H(Hu,{name:"MuiDialog",slot:"Paper",overridesResolver:(e,t)=>{const{ownerState:n}=e;return[t.paper,t[`scrollPaper${Y(n.scroll)}`],t[`paperWidth${Y(String(n.maxWidth))}`],n.fullWidth&&t.paperFullWidth,n.fullScreen&&t.paperFullScreen]}})(({theme:e,ownerState:t})=>b({margin:32,position:"relative",overflowY:"auto","@media print":{overflowY:"visible",boxShadow:"none"}},t.scroll==="paper"&&{display:"flex",flexDirection:"column",maxHeight:"calc(100% - 64px)"},t.scroll==="body"&&{display:"inline-block",verticalAlign:"middle",textAlign:"left"},!t.maxWidth&&{maxWidth:"calc(100% - 64px)"},t.maxWidth==="xs"&&{maxWidth:e.breakpoints.unit==="px"?Math.max(e.breakpoints.values.xs,444):`${e.breakpoints.values.xs}${e.breakpoints.unit}`,[`&.${Vc.paperScrollBody}`]:{[e.breakpoints.down(Math.max(e.breakpoints.values.xs,444)+32*2)]:{maxWidth:"calc(100% - 64px)"}}},t.maxWidth&&t.maxWidth!=="xs"&&{maxWidth:`${e.breakpoints.values[t.maxWidth]}${e.breakpoints.unit}`,[`&.${Vc.paperScrollBody}`]:{[e.breakpoints.down(e.breakpoints.values[t.maxWidth]+32*2)]:{maxWidth:"calc(100% - 64px)"}}},t.fullWidth&&{width:"calc(100% - 64px)"},t.fullScreen&&{margin:0,width:"100%",maxWidth:"100%",height:"100%",maxHeight:"none",borderRadius:0,[`&.${Vc.paperScrollBody}`]:{margin:0,maxWidth:"100%"}})),vI=S.forwardRef(function(t,n){const r=xe({props:t,name:"MuiDialog"}),o=ai(),i={enter:o.transitions.duration.enteringScreen,exit:o.transitions.duration.leavingScreen},{"aria-describedby":s,"aria-labelledby":l,BackdropComponent:a,BackdropProps:u,children:c,className:d,disableEscapeKeyDown:f=!1,fullScreen:m=!1,fullWidth:y=!1,maxWidth:v="sm",onBackdropClick:k,onClose:g,open:p,PaperComponent:h=Hu,PaperProps:x={},scroll:C="paper",TransitionComponent:R=R1,transitionDuration:$=i,TransitionProps:E}=r,M=X(r,dI),P=b({},r,{disableEscapeKeyDown:f,fullScreen:m,fullWidth:y,maxWidth:v,scroll:C}),I=pI(P),j=S.useRef(),D=N=>{j.current=N.target===N.currentTarget},A=N=>{j.current&&(j.current=null,k&&k(N),g&&g(N,"backdropClick"))},_=qa(l),O=S.useMemo(()=>({titleId:_}),[_]);return w.jsx(hI,b({className:J(I.root,d),closeAfterTransition:!0,components:{Backdrop:fI},componentsProps:{backdrop:b({transitionDuration:$,as:a},u)},disableEscapeKeyDown:f,onClose:g,open:p,ref:n,onClick:A,ownerState:P},M,{children:w.jsx(R,b({appear:!0,in:p,timeout:$,role:"presentation"},E,{children:w.jsx(mI,{className:J(I.container),onMouseDown:D,ownerState:P,children:w.jsx(gI,b({as:h,elevation:24,role:"dialog","aria-describedby":s,"aria-labelledby":_},x,{className:J(I.paper,x.className),ownerState:P,children:w.jsx(Z1.Provider,{value:O,children:c})}))})}))}))}),Gp=vI;function yI(e){return pe("MuiDialogActions",e)}fe("MuiDialogActions",["root","spacing"]);const xI=["className","disableSpacing"],SI=e=>{const{classes:t,disableSpacing:n}=e;return me({root:["root",!n&&"spacing"]},yI,t)},bI=H("div",{name:"MuiDialogActions",slot:"Root",overridesResolver:(e,t)=>{const{ownerState:n}=e;return[t.root,!n.disableSpacing&&t.spacing]}})(({ownerState:e})=>b({display:"flex",alignItems:"center",padding:8,justifyContent:"flex-end",flex:"0 0 auto"},!e.disableSpacing&&{"& > :not(:first-of-type)":{marginLeft:8}})),wI=S.forwardRef(function(t,n){const r=xe({props:t,name:"MuiDialogActions"}),{className:o,disableSpacing:i=!1}=r,s=X(r,xI),l=b({},r,{disableSpacing:i}),a=SI(l);return w.jsx(bI,b({className:J(a.root,o),ownerState:l,ref:n},s))}),Kp=wI;function CI(e){return pe("MuiDialogContent",e)}fe("MuiDialogContent",["root","dividers"]);function kI(e){return pe("MuiDialogTitle",e)}const RI=fe("MuiDialogTitle",["root"]),$I=RI,EI=["className","dividers"],PI=e=>{const{classes:t,dividers:n}=e;return me({root:["root",n&&"dividers"]},CI,t)},_I=H("div",{name:"MuiDialogContent",slot:"Root",overridesResolver:(e,t)=>{const{ownerState:n}=e;return[t.root,n.dividers&&t.dividers]}})(({theme:e,ownerState:t})=>b({flex:"1 1 auto",WebkitOverflowScrolling:"touch",overflowY:"auto",padding:"20px 24px"},t.dividers?{padding:"16px 24px",borderTop:`1px solid ${(e.vars||e).palette.divider}`,borderBottom:`1px solid ${(e.vars||e).palette.divider}`}:{[`.${$I.root} + &`]:{paddingTop:0}})),TI=S.forwardRef(function(t,n){const r=xe({props:t,name:"MuiDialogContent"}),{className:o,dividers:i=!1}=r,s=X(r,EI),l=b({},r,{dividers:i}),a=PI(l);return w.jsx(_I,b({className:J(a.root,o),ownerState:l,ref:n},s))}),_a=TI,MI=["className","id"],OI=e=>{const{classes:t}=e;return me({root:["root"]},kI,t)},II=H(yn,{name:"MuiDialogTitle",slot:"Root",overridesResolver:(e,t)=>t.root})({padding:"16px 24px",flex:"0 0 auto"}),AI=S.forwardRef(function(t,n){const r=xe({props:t,name:"MuiDialogTitle"}),{className:o,id:i}=r,s=X(r,MI),l=r,a=OI(l),{titleId:u=i}=S.useContext(Z1);return w.jsx(II,b({component:"h2",className:J(a.root,o),ownerState:l,ref:n,variant:"h6",id:i??u},s))}),Ta=AI;function FI(e){return pe("MuiIconButton",e)}const LI=fe("MuiIconButton",["root","disabled","colorInherit","colorPrimary","colorSecondary","colorError","colorInfo","colorSuccess","colorWarning","edgeStart","edgeEnd","sizeSmall","sizeMedium","sizeLarge"]),NI=LI,zI=["edge","children","className","color","disabled","disableFocusRipple","size"],DI=e=>{const{classes:t,disabled:n,color:r,edge:o,size:i}=e,s={root:["root",n&&"disabled",r!=="default"&&`color${Y(r)}`,o&&`edge${Y(o)}`,`size${Y(i)}`]};return me(s,FI,t)},jI=H(Eu,{name:"MuiIconButton",slot:"Root",overridesResolver:(e,t)=>{const{ownerState:n}=e;return[t.root,n.color!=="default"&&t[`color${Y(n.color)}`],n.edge&&t[`edge${Y(n.edge)}`],t[`size${Y(n.size)}`]]}})(({theme:e,ownerState:t})=>b({textAlign:"center",flex:"0 0 auto",fontSize:e.typography.pxToRem(24),padding:8,borderRadius:"50%",overflow:"visible",color:(e.vars||e).palette.action.active,transition:e.transitions.create("background-color",{duration:e.transitions.duration.shortest})},!t.disableRipple&&{"&:hover":{backgroundColor:e.vars?`rgba(${e.vars.palette.action.activeChannel} / ${e.vars.palette.action.hoverOpacity})`:ft(e.palette.action.active,e.palette.action.hoverOpacity),"@media (hover: none)":{backgroundColor:"transparent"}}},t.edge==="start"&&{marginLeft:t.size==="small"?-3:-12},t.edge==="end"&&{marginRight:t.size==="small"?-3:-12}),({theme:e,ownerState:t})=>{var n;const r=(n=(e.vars||e).palette)==null?void 0:n[t.color];return b({},t.color==="inherit"&&{color:"inherit"},t.color!=="inherit"&&t.color!=="default"&&b({color:r==null?void 0:r.main},!t.disableRipple&&{"&:hover":b({},r&&{backgroundColor:e.vars?`rgba(${r.mainChannel} / ${e.vars.palette.action.hoverOpacity})`:ft(r.main,e.palette.action.hoverOpacity)},{"@media (hover: none)":{backgroundColor:"transparent"}})}),t.size==="small"&&{padding:5,fontSize:e.typography.pxToRem(18)},t.size==="large"&&{padding:12,fontSize:e.typography.pxToRem(28)},{[`&.${NI.disabled}`]:{backgroundColor:"transparent",color:(e.vars||e).palette.action.disabled}})}),BI=S.forwardRef(function(t,n){const r=xe({props:t,name:"MuiIconButton"}),{edge:o=!1,children:i,className:s,color:l="default",disabled:a=!1,disableFocusRipple:u=!1,size:c="medium"}=r,d=X(r,zI),f=b({},r,{edge:o,color:l,disabled:a,disableFocusRipple:u,size:c}),m=DI(f);return w.jsx(jI,b({className:J(m.root,s),centerRipple:!0,focusRipple:!u,disabled:a,ref:n,ownerState:f},d,{children:i}))}),qp=BI;var Yp={},VI=oo;Object.defineProperty(Yp,"__esModule",{value:!0});var J1=Yp.default=void 0,HI=VI(io()),WI=w,UI=(0,HI.default)((0,WI.jsx)("path",{d:"M16 1H4c-1.1 0-2 .9-2 2v14h2V3h12V1zm3 4H8c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h11c1.1 0 2-.9 2-2V7c0-1.1-.9-2-2-2zm0 16H8V7h11v14z"}),"ContentCopy");J1=Yp.default=UI;var Xp={},GI=oo;Object.defineProperty(Xp,"__esModule",{value:!0});var Qp=Xp.default=void 0,KI=GI(io()),qI=w,YI=(0,KI.default)((0,qI.jsx)("path",{d:"M9 16.17 4.83 12l-1.42 1.41L9 19 21 7l-1.41-1.41z"}),"Check");Qp=Xp.default=YI;function ex({open:e,onClose:t,prompt:n,output:r}){const[o,i]=S.useState(!1);S.useEffect(()=>{i(!1)},[n]);const s=async l=>{await navigator.clipboard.writeText(l),i(!0)};return w.jsxs(Gp,{open:e,onClose:t,fullWidth:!0,maxWidth:"lg",children:[w.jsx(Ta,{children:"Prompt"}),w.jsxs(_a,{children:[w.jsx(Xd,{readOnly:!0,value:n,style:{width:"100%",padding:"0.75rem"}}),w.jsx(qp,{onClick:()=>s(n),style:{position:"absolute",right:"10px",top:"10px"},children:o?w.jsx(Qp,{}):w.jsx(J1,{})})]}),r&&w.jsxs(w.Fragment,{children:[w.jsx(Ta,{children:"Output"}),w.jsx(_a,{children:w.jsx(Xd,{readOnly:!0,value:r,style:{width:"100%",padding:"0.75rem"}})})]}),w.jsx(Kp,{children:w.jsx(_s,{onClick:t,children:"Close"})})]})}function XI(e){if(typeof e=="string"){const t=e.startsWith("[PASS]");let n=e;return(e.startsWith("[PASS]")||e.startsWith("[FAIL]"))&&(n=n.slice(6)),{text:n,pass:t,score:t?1:0}}return e}function Lg(e){return e===0||e===1?"":`(${e.toFixed(2)})`}function Zp({text:e,maxLength:t}){const[n,r]=S.useState(!0);let o=typeof e=="string"?e:JSON.stringify(e);o=o.replace(/\n/g,"<br>");const i=()=>{r(!n)};return(()=>o.length<=t?w.jsx("span",{dangerouslySetInnerHTML:{__html:o}}):n?w.jsxs("span",{style:{cursor:"pointer"},onClick:i,children:[w.jsx("span",{dangerouslySetInnerHTML:{__html:o.substring(0,t)}})," ..."]}):w.jsx("span",{style:{cursor:"pointer"},onClick:i,children:w.jsx("span",{dangerouslySetInnerHTML:{__html:o}})}))()}function QI({output:e,maxTextLength:t,rowIndex:n,promptIndex:r,onRating:o}){var f,m;const[i,s]=S.useState(!1),l=()=>{s(!0)},a=()=>{s(!1)};let u=typeof e.text=="string"?e.text:JSON.stringify(e.text),c=[];!e.pass&&u.includes("---")&&(c=u.split("---"),u=c.slice(1).join("---"));const d=y=>{o(n,r,y)};return w.jsxs(w.Fragment,{children:[w.jsxs("div",{className:"cell",children:[e.pass&&w.jsxs("div",{className:"status pass",children:["PASS ",w.jsx("span",{className:"score",children:Lg(e.score)})]}),!e.pass&&w.jsxs("div",{className:"status fail",children:["[FAIL",w.jsx("span",{className:"score",children:Lg(e.score)}),"] ",c[0]]})," ",w.jsx(Zp,{text:u,maxLength:t})]}),w.jsx("div",{className:"cell-detail",children:(f=e.tokenUsage)!=null&&f.cached?w.jsxs("span",{children:[e.tokenUsage.cached," tokens (cached)"]}):w.jsxs(w.Fragment,{children:[((m=e.tokenUsage)==null?void 0:m.total)&&w.jsxs("span",{children:[e.tokenUsage.total," tokens"]})," |"," ",w.jsxs("span",{children:[e.latencyMs," ms"]})]})}),w.jsxs("div",{className:"cell-actions",children:[e.prompt&&w.jsxs(w.Fragment,{children:[w.jsx("span",{className:"action",onClick:l,children:"🔎"}),w.jsx(ex,{open:i,onClose:a,prompt:e.prompt,output:u})]}),w.jsx("span",{className:"action",onClick:()=>d(!0),children:"👍"}),w.jsx("span",{className:"action",onClick:()=>d(!1),children:"👎"})]})]})}function Ng({text:e,maxLength:t,smallText:n,expandedText:r}){const[o,i]=S.useState(!1),s=()=>{i(!0)},l=()=>{i(!1)};return w.jsxs("div",{children:[w.jsx(Zp,{text:e,maxLength:t}),r&&w.jsxs(w.Fragment,{children:[w.jsx("span",{className:"action",onClick:s,children:"🔎"}),w.jsx(ex,{open:o,onClose:l,prompt:r})]}),w.jsx("div",{className:"smalltext",children:n})]})}function ZI({maxTextLength:e,columnVisibility:t,wordBreak:n,filterMode:r,failureFilter:o,onFailureFilterToggle:i}){const{table:s,setTable:l}=Uu();e1(s,"Table should be defined");const{head:a,body:u}=s,c=a.prompts.map((p,h)=>u.reduce((x,C)=>x+(C.outputs[h].pass?1:0),0)),d=(p,h,x)=>{const C=[...u],R={...C[p]},$=[...R.outputs];$[h].pass=x,$[h].score=x?1:0,R.outputs=$,C[p]=R,l({head:a,body:C})},f=c.reduce((p,h,x,C)=>h>C[p]?x:p,0),m=c[f],y=MO(),v=[y.group({id:"vars",header:()=>w.jsx("span",{children:"Variables"}),columns:a.vars.map((p,h)=>y.accessor(x=>x.vars[h],{id:`Variable ${h+1}`,header:()=>w.jsx(Ng,{smallText:`Variable ${h+1}`,text:p,maxLength:e}),cell:x=>w.jsx(Zp,{text:x.getValue(),maxLength:e}),size:50}))}),y.group({id:"prompts",header:()=>w.jsx("span",{children:"Outputs"}),columns:a.prompts.map((p,h)=>y.accessor(x=>XI(x.outputs[h]),{id:`Prompt ${h+1}`,header:()=>{const x=(c[h]/u.length*100).toFixed(2),C=c[h]===m&&m!==0,R=`Prompt ${h+1}`,$=o[R]||!1;return w.jsxs(w.Fragment,{children:[w.jsx(Ng,{smallText:`Prompt ${h+1}`,text:typeof p=="string"?p:p.display,expandedText:typeof p=="string"?void 0:p.raw,maxLength:e}),r==="failures"&&w.jsx(x1,{sx:{"& .MuiFormControlLabel-label":{fontSize:"0.75rem"}},control:w.jsx(ef,{checked:$,onChange:E=>i(R,E.target.checked)}),label:"Show failures"}),w.jsxs("div",{className:`summary ${C?"highlight":""}`,children:["Passing: ",w.jsxs("strong",{children:[x,"%"]})," (",c[h]," / ",u.length,")"]})]})},cell:x=>w.jsx(QI,{output:x.getValue(),maxTextLength:e,rowIndex:x.row.index,promptIndex:h,onRating:d})}))})],k=S.useMemo(()=>r==="failures"?Object.values(o).every(p=>!p)?u:u.filter(p=>p.outputs.some((h,x)=>{const C=`Prompt ${x+1}`,R=!h.pass;return o[C]&&R})):r==="different"?u.filter(p=>!p.outputs.every(h=>h.text===p.outputs[0].text)):u,[u,o,r]),g=LO({data:k,columns:v,columnResizeMode:"onChange",getCoreRowModel:OO(),state:{columnVisibility:t}});return w.jsxs("table",{style:{wordBreak:n},children:[w.jsx("thead",{children:g.getHeaderGroups().map(p=>w.jsx("tr",{className:"header",children:p.headers.map(h=>w.jsxs("th",{key:h.id,colSpan:h.colSpan,style:{width:h.getSize()},children:[h.isPlaceholder?null:Ig(h.column.columnDef.header,h.getContext()),w.jsx("div",{onMouseDown:h.getResizeHandler(),onTouchStart:h.getResizeHandler(),className:`resizer ${h.column.getIsResizing()?"isResizing":""}`})]}))},p.id))}),w.jsx("tbody",{children:g.getRowModel().rows.map((p,h)=>{let x=!1;return w.jsx("tr",{children:p.getVisibleCells().map(C=>{const R=C.column.id.startsWith("Variable"),$=!R&&!x;$&&(x=!0);const E=h===0&&!R;return w.jsx("td",{key:C.id,style:{width:C.column.getSize()},className:`${R?"variable":""} ${E?"first-prompt-row":""} ${$?"first-prompt-col":""}`,children:Ig(C.column.columnDef.cell,C.getContext())})})},p.id)})})]})}const JI="modulepreload",eA=function(e){return"/"+e},zg={},tA=function(t,n,r){if(!n||n.length===0)return t();const o=document.getElementsByTagName("link");return Promise.all(n.map(i=>{if(i=eA(i),i in zg)return;zg[i]=!0;const s=i.endsWith(".css"),l=s?'[rel="stylesheet"]':"";if(!!r)for(let c=o.length-1;c>=0;c--){const d=o[c];if(d.href===i&&(!s||d.rel==="stylesheet"))return}else if(document.querySelector(`link[href="${i}"]${l}`))return;const u=document.createElement("link");if(u.rel=s?"stylesheet":JI,s||(u.as="script",u.crossOrigin=""),u.href=i,document.head.appendChild(u),s)return new Promise((c,d)=>{u.addEventListener("load",c),u.addEventListener("error",()=>d(new Error(`Unable to preload CSS for ${i}`)))})})).then(()=>t())};function nA(e){return pe("MuiDialogContentText",e)}fe("MuiDialogContentText",["root"]);const rA=["children","className"],oA=e=>{const{classes:t}=e,r=me({root:["root"]},nA,t);return b({},t,r)},iA=H(yn,{shouldForwardProp:e=>an(e)||e==="classes",name:"MuiDialogContentText",slot:"Root",overridesResolver:(e,t)=>t.root})({}),sA=S.forwardRef(function(t,n){const r=xe({props:t,name:"MuiDialogContentText"}),{className:o}=r,i=X(r,rA),s=oA(i);return w.jsx(iA,b({component:"p",variant:"body1",color:"text.secondary",ref:n,ownerState:i,className:J(s.root,o)},r,{classes:s}))}),lA=sA;function aA(e){return pe("MuiFormHelperText",e)}const uA=fe("MuiFormHelperText",["root","error","disabled","sizeSmall","sizeMedium","contained","focused","filled","required"]),Dg=uA;var jg;const cA=["children","className","component","disabled","error","filled","focused","margin","required","variant"],dA=e=>{const{classes:t,contained:n,size:r,disabled:o,error:i,filled:s,focused:l,required:a}=e,u={root:["root",o&&"disabled",i&&"error",r&&`size${Y(r)}`,n&&"contained",l&&"focused",s&&"filled",a&&"required"]};return me(u,aA,t)},fA=H("p",{name:"MuiFormHelperText",slot:"Root",overridesResolver:(e,t)=>{const{ownerState:n}=e;return[t.root,n.size&&t[`size${Y(n.size)}`],n.contained&&t.contained,n.filled&&t.filled]}})(({theme:e,ownerState:t})=>b({color:(e.vars||e).palette.text.secondary},e.typography.caption,{textAlign:"left",marginTop:3,marginRight:0,marginBottom:0,marginLeft:0,[`&.${Dg.disabled}`]:{color:(e.vars||e).palette.text.disabled},[`&.${Dg.error}`]:{color:(e.vars||e).palette.error.main}},t.size==="small"&&{marginTop:4},t.contained&&{marginLeft:14,marginRight:14})),pA=S.forwardRef(function(t,n){const r=xe({props:t,name:"MuiFormHelperText"}),{children:o,className:i,component:s="p"}=r,l=X(r,cA),a=_r(),u=ro({props:r,muiFormControl:a,states:["variant","size","disabled","error","filled","focused","required"]}),c=b({},r,{component:s,contained:u.variant==="filled"||u.variant==="outlined",variant:u.variant,size:u.size,disabled:u.disabled,error:u.error,filled:u.filled,focused:u.focused,required:u.required}),d=dA(c);return w.jsx(fA,b({as:s,ownerState:c,className:J(d.root,i),ref:n},l,{children:o===" "?jg||(jg=w.jsx("span",{className:"notranslate",children:""})):o}))}),hA=pA;function mA(e){return pe("MuiTextField",e)}fe("MuiTextField",["root"]);const gA=["autoComplete","autoFocus","children","className","color","defaultValue","disabled","error","FormHelperTextProps","fullWidth","helperText","id","InputLabelProps","inputProps","InputProps","inputRef","label","maxRows","minRows","multiline","name","onBlur","onChange","onClick","onFocus","placeholder","required","rows","select","SelectProps","type","value","variant"],vA={standard:M1,filled:I1,outlined:Lp},yA=e=>{const{classes:t}=e;return me({root:["root"]},mA,t)},xA=H(Wl,{name:"MuiTextField",slot:"Root",overridesResolver:(e,t)=>t.root})({}),SA=S.forwardRef(function(t,n){const r=xe({props:t,name:"MuiTextField"}),{autoComplete:o,autoFocus:i=!1,children:s,className:l,color:a="primary",defaultValue:u,disabled:c=!1,error:d=!1,FormHelperTextProps:f,fullWidth:m=!1,helperText:y,id:v,InputLabelProps:k,inputProps:g,InputProps:p,inputRef:h,label:x,maxRows:C,minRows:R,multiline:$=!1,name:E,onBlur:M,onChange:P,onClick:I,onFocus:j,placeholder:D,required:A=!1,rows:_,select:O=!1,SelectProps:N,type:T,value:L,variant:F="outlined"}=r,q=X(r,gA),G=b({},r,{autoFocus:i,color:a,disabled:c,error:d,fullWidth:m,multiline:$,required:A,select:O,variant:F}),oe=yA(G),te={};F==="outlined"&&(k&&typeof k.shrink<"u"&&(te.notched=k.shrink),te.label=x),O&&((!N||!N.native)&&(te.id=void 0),te["aria-describedby"]=void 0);const Q=qa(v),se=y&&Q?`${Q}-helper-text`:void 0,Ce=x&&Q?`${Q}-label`:void 0,Se=vA[F],ae=w.jsx(Se,b({"aria-describedby":se,autoComplete:o,autoFocus:i,defaultValue:u,fullWidth:m,multiline:$,name:E,rows:_,maxRows:C,minRows:R,type:T,value:L,id:Q,inputRef:h,onBlur:M,onChange:P,onFocus:j,onClick:I,placeholder:D,inputProps:g},te,p));return w.jsxs(xA,b({className:J(oe.root,l),disabled:c,error:d,fullWidth:m,ref:n,required:A,color:a,variant:F,ownerState:G},q,{children:[x!=null&&x!==""&&w.jsx(Ul,b({htmlFor:Q,id:Ce},k,{children:x})),O?w.jsx(Gl,b({"aria-describedby":se,id:Q,labelId:Ce,value:L,input:ae},N,{children:s})):ae,y&&w.jsx(hA,b({id:se},f,{children:y}))]}))}),bA=SA,wA=no(w.jsx("path",{d:"M9 16.17 4.83 12l-1.42 1.41L9 19 21 7l-1.41-1.41z"}),"Check"),CA=no(w.jsx("path",{d:"M16 1H4c-1.1 0-2 .9-2 2v14h2V3h12V1zm-1 4 6 6v10c0 1.1-.9 2-2 2H7.99C6.89 23 6 22.1 6 21l.01-14c0-1.1.89-2 1.99-2h7zm-1 7h5.5L14 6.5V12z"}),"FileCopy");function kA({open:e,onClose:t}){const{config:n}=Uu(),r=wt.useRef(null),[o,i]=wt.useState(!1),[s,l]=wt.useState("");wt.useEffect(()=>{e&&(async()=>{const{default:c}=await tA(()=>import("./js-yaml-8bbf9398.js"),[]);l(c.dump(n))})()},[e,n]);const a=()=>{r.current&&(r.current.select(),document.execCommand("copy"),i(!0))},u=()=>{i(!1),t()};return w.jsxs(Gp,{open:e,onClose:u,"aria-labelledby":"config-dialog-title",maxWidth:"md",fullWidth:!0,children:[w.jsx(Ta,{id:"config-dialog-title",children:w.jsxs(En,{display:"flex",justifyContent:"space-between",alignItems:"center",children:[w.jsx(yn,{variant:"h6",children:"Config"}),w.jsx(qp,{onClick:a,children:o?w.jsx(wA,{}):w.jsx(CA,{})})]})}),w.jsx(_a,{children:w.jsx(yn,{variant:"body1",component:"div",children:w.jsx("textarea",{ref:r,readOnly:!0,value:s,style:{width:"100%",minHeight:"400px",fontFamily:"monospace",border:"1px solid #ccc"}})})}),w.jsx(Kp,{children:w.jsx(_s,{onClick:u,color:"primary",children:"Close"})})]})}var Jp={},RA=oo;Object.defineProperty(Jp,"__esModule",{value:!0});var tx=Jp.default=void 0,$A=RA(io()),EA=w,PA=(0,$A.default)((0,EA.jsx)("path",{d:"M16 1H4c-1.1 0-2 .9-2 2v14h2V3h12V1zm-1 4 6 6v10c0 1.1-.9 2-2 2H7.99C6.89 23 6 22.1 6 21l.01-14c0-1.1.89-2 1.99-2h7zm-1 7h5.5L14 6.5V12z"}),"FileCopy");tx=Jp.default=PA;const _A=({open:e,onClose:t,shareUrl:n})=>{const r=S.useRef(null),[o,i]=S.useState(!1),s=()=>{r.current&&(r.current.select(),document.execCommand("copy"),i(!0))},l=()=>{t(),i(!1)};return w.jsxs(Gp,{open:e,onClose:l,PaperProps:{style:{minWidth:"min(660px, 100%)"}},children:[w.jsx(Ta,{children:"Your eval is ready to share"}),w.jsxs(_a,{children:[w.jsx(bA,{inputRef:r,value:n,fullWidth:!0,InputProps:{readOnly:!0,endAdornment:w.jsx(qp,{onClick:s,children:o?w.jsx(Qp,{}):w.jsx(tx,{})})}}),w.jsx(lA,{sx:{fontSize:"0.75rem"},children:"Shared URLs are deleted after 1 week."})]}),w.jsx(Kp,{children:w.jsx(_s,{onClick:l,color:"primary",children:"Close"})})]})},Bg=Ty(EM)(({theme:e})=>({maxWidth:"100%",flexWrap:"wrap",[e.breakpoints.down("sm")]:{flexDirection:"column"}}));function TA(e){const n=e.slice(5,e.length-5).split("T"),r=n[1].replace(/-/g,":"),o=`${n[0]}T${r}`;return new Date(o).toLocaleDateString("en-US",{year:"numeric",month:"long",day:"numeric",hour:"2-digit",minute:"2-digit",second:"2-digit",timeZoneName:"short"})}function MA({recentFiles:e,onRecentFileSelected:t}){const{table:n,config:r}=Uu(),[o,i]=S.useState(250),[s,l]=S.useState({}),[a,u]=S.useState([]),[c,d]=S.useState({}),f=(_,O)=>{d(N=>({...N,[_]:O}))},[m,y]=S.useState("all"),v=_=>{const O=_.target.value;y(O);const N={};j.prompts.forEach((T,L)=>{const F=`Prompt ${L+1}`;N[F]=O==="failures"}),d(N)},[k,g]=S.useState("break-all"),p=_=>{g(_.target.checked?"break-all":"break-word")},[h,x]=S.useState(!1),[C,R]=S.useState(""),[$,E]=S.useState(!1),M=async()=>{E(!0);try{const _=await fetch("https://api.promptfoo.dev/eval",{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({data:{version:1,results:{table:n},config:r}})}),{id:O}=await _.json(),N=`https://app.promptfoo.dev/eval/${O}`;R(N),x(!0)}catch{alert("Sorry, something went wrong.")}finally{E(!1)}},[P,I]=S.useState(!1);e1(n,"Table data must be loaded before rendering ResultsView");const{head:j}=n,D=_=>{const{target:{value:O}}=_;u(typeof O=="string"?O.split(","):O);const N=[...j.vars.map((L,F)=>`Variable ${F+1}`),...j.prompts.map((L,F)=>`Prompt ${F+1}`)],T={};N.forEach(L=>{T[L]=(typeof O=="string"?O.split(","):O).includes(L)}),l(T)},A=[...j.vars.map((_,O)=>({value:`Variable ${O+1}`,label:`Variable ${O+1}`,group:"Variables"})),...j.prompts.map((_,O)=>({value:`Prompt ${O+1}`,label:`Prompt ${O+1}`,group:"Prompts"}))];return S.useEffect(()=>{u([...j.vars.map((_,O)=>`Variable ${O+1}`),...j.prompts.map((_,O)=>`Prompt ${O+1}`)])},[j]),w.jsxs("div",{children:[w.jsx(En,{py:"md",children:w.jsxs(Bg,{direction:"row",spacing:4,alignItems:"center",children:[w.jsx(En,{children:e&&e.length>0&&w.jsxs(Wl,{sx:{m:1,minWidth:200},size:"small",children:[w.jsx(Ul,{children:"View run"}),w.jsx(Gl,{className:"recent-files",label:"Previous runs",defaultValue:e[0],onChange:_=>t(_.target.value),children:e.map(_=>w.jsx(Mi,{value:_,children:TA(_)},_))},e.join(","))]})}),w.jsx(En,{children:w.jsxs(Wl,{sx:{m:1,minWidth:200},size:"small",children:[w.jsx(Ul,{id:"visible-columns-label",children:"Show columns"}),w.jsx(Gl,{labelId:"visible-columns-label",id:"visible-columns",multiple:!0,value:a,onChange:D,input:w.jsx(Lp,{label:"Visible columns"}),renderValue:_=>_.join(", "),children:A.map(_=>w.jsxs(Mi,{dense:!0,value:_.value,children:[w.jsx(ef,{checked:a.indexOf(_.value)>-1}),w.jsx(r_,{primary:_.label})]},_.value))})]})}),w.jsx(En,{children:w.jsxs(Wl,{sx:{minWidth:180},size:"small",children:[w.jsx(Ul,{id:"failure-filter-mode-label",children:"Filter"}),w.jsxs(Gl,{labelId:"filter-mode-label",id:"filter-mode",value:m,onChange:v,label:"Filter",children:[w.jsx(Mi,{value:"all",children:"Show all results"}),w.jsx(Mi,{value:"failures",children:"Show failures only"}),w.jsx(Mi,{value:"different",children:"Show different only"})]})]})}),w.jsxs(En,{children:[w.jsxs(yn,{mt:2,children:["Max text length: ",o]}),w.jsx(RM,{min:25,max:1e3,value:o,onChange:(_,O)=>i(O)})]}),w.jsx(En,{children:w.jsx(Ic,{title:"Forcing line breaks makes it easier to adjust column widths to your liking",children:w.jsx(x1,{control:w.jsx(ef,{checked:k==="break-all",onChange:p}),label:"Force line breaks"})})}),w.jsx(En,{flexGrow:1}),w.jsx(En,{display:"flex",justifyContent:"flex-end",children:w.jsxs(Bg,{direction:"row",spacing:2,children:[r&&w.jsx(Ic,{title:"View config",children:w.jsx(_s,{color:"primary",onClick:()=>I(!0),startIcon:w.jsx(z1,{}),children:"Config"})}),(r==null?void 0:r.sharing)&&w.jsx(Ic,{title:"Generate a unique URL that others can access",children:w.jsx(_s,{color:"primary",onClick:M,disabled:$,startIcon:$?w.jsx(g2,{size:16}):w.jsx(N1,{}),children:"Share"})})]})})]})}),w.jsx(ZI,{maxTextLength:o,columnVisibility:s,wordBreak:k,filterMode:m,failureFilter:c,onFailureFilterToggle:f}),w.jsx(kA,{open:P,onClose:()=>I(!1)}),w.jsx(_A,{open:h,onClose:()=>x(!1),shareUrl:C})]})}function OA(){return w.jsxs(En,{className:"logo",children:[w.jsx("img",{src:"/logo.svg",alt:"Promptfoo logo"})," ",w.jsx("span",{children:"promptfoo"})]})}var eh={},IA=oo;Object.defineProperty(eh,"__esModule",{value:!0});var nx=eh.default=void 0,AA=IA(io()),FA=w,LA=(0,AA.default)((0,FA.jsx)("path",{d:"M12 3c-4.97 0-9 4.03-9 9s4.03 9 9 9 9-4.03 9-9c0-.46-.04-.92-.1-1.36-.98 1.37-2.58 2.26-4.4 2.26-2.98 0-5.4-2.42-5.4-5.4 0-1.81.89-3.42 2.26-4.4-.44-.06-.9-.1-1.36-.1z"}),"DarkMode");nx=eh.default=LA;var th={},NA=oo;Object.defineProperty(th,"__esModule",{value:!0});var rx=th.default=void 0,zA=NA(io()),DA=w,jA=(0,zA.default)((0,DA.jsx)("path",{d:"M12 7c-2.76 0-5 2.24-5 5s2.24 5 5 5 5-2.24 5-5-2.24-5-5-5zM2 13h2c.55 0 1-.45 1-1s-.45-1-1-1H2c-.55 0-1 .45-1 1s.45 1 1 1zm18 0h2c.55 0 1-.45 1-1s-.45-1-1-1h-2c-.55 0-1 .45-1 1s.45 1 1 1zM11 2v2c0 .55.45 1 1 1s1-.45 1-1V2c0-.55-.45-1-1-1s-1 .45-1 1zm0 18v2c0 .55.45 1 1 1s1-.45 1-1v-2c0-.55-.45-1-1-1s-1 .45-1 1zM5.99 4.58c-.39-.39-1.03-.39-1.41 0-.39.39-.39 1.03 0 1.41l1.06 1.06c.39.39 1.03.39 1.41 0s.39-1.03 0-1.41L5.99 4.58zm12.37 12.37c-.39-.39-1.03-.39-1.41 0-.39.39-.39 1.03 0 1.41l1.06 1.06c.39.39 1.03.39 1.41 0 .39-.39.39-1.03 0-1.41l-1.06-1.06zm1.06-10.96c.39-.39.39-1.03 0-1.41-.39-.39-1.03-.39-1.41 0l-1.06 1.06c-.39.39-.39 1.03 0 1.41s1.03.39 1.41 0l1.06-1.06zM7.05 18.36c.39-.39.39-1.03 0-1.41-.39-.39-1.03-.39-1.41 0l-1.06 1.06c-.39.39-.39 1.03 0 1.41s1.03.39 1.41 0l1.06-1.06z"}),"LightMode");rx=th.default=jA;function BA({darkMode:e,onToggleDarkMode:t}){return w.jsxs("nav",{children:[w.jsx(OA,{}),w.jsx("div",{className:"dark-mode-toggle",onClick:t,children:e?w.jsx(nx,{}):w.jsx(rx,{})})]})}function VA(){const{table:e,setTable:t,setConfig:n}=Uu(),[r,o]=S.useState(!1),i=S.useRef(!1),[s,l]=S.useState([]),a=Rk("(prefers-color-scheme: dark)"),[u,c]=S.useState(a),d=S.useMemo(()=>bp({palette:{mode:u?"dark":"light"}}),[u]),f=()=>{c(!u),u?document.documentElement.removeAttribute("data-theme"):document.documentElement.setAttribute("data-theme","dark")};S.useEffect(()=>{a&&document.documentElement.setAttribute("data-theme","dark")},[a]);const m=async()=>{if(!window.location.href.includes("localhost"))return;const k=await(await fetch("http://localhost:15500/results")).json();l(k.data)},y=async v=>{const g=await(await fetch(`http://localhost:15500/results/${v}`)).json();t(g.data.results.table),n(g.data.config)};return S.useEffect(()=>{const v=async p=>{var C;if(i.current)return;i.current=!0;const x=await(await fetch(`https://api.promptfoo.dev/eval/${p}`)).json();t(((C=x.data.results)==null?void 0:C.table)||x.data.table),n(x.data.config),o(!0)},k=Vl("http://localhost:15500"),g=window.location.pathname.match(/\/eval\/([\w:-]+)/);if(g){const p=g[1];v(p)}else k.on("init",p=>{console.log("Initialized socket connection",p),o(!0),t(p.results.table),n(p.config),m()}),k.on("update",p=>{console.log("Received data update",p),t(p.results.table),n(p.config),m()});return()=>{k.disconnect()}},[t,n]),w.jsxs(uR,{theme:d,children:[w.jsx(BA,{darkMode:u,onToggleDarkMode:f}),r&&e?w.jsx(MA,{recentFiles:s,onRecentFileSelected:y}):w.jsx("div",{children:"Loading..."})]})}Hc.createRoot(document.getElementById("root")).render(w.jsx(wt.StrictMode,{children:w.jsx(VA,{})}));
|
|
@@ -1 +1 @@
|
|
|
1
|
-
:root{font-family:system-ui,Avenir,Helvetica,Arial,sans-serif;font-synthesis:none;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;-webkit-text-size-adjust:100%;--background-color: #ffffff;--text-color: #404040;--border-color: lightgray;--table-border-color: lightgray;--pass-color: green;--fail-color: #ad0000;--smalltext-color: gray;--success-background-color: #d1ffd7;--variable-background-color: #f7f7f7;--header-background-color: #fffdf7}[data-theme=dark]{--background-color: #1a1a1a;--text-color: #f0f0f0;--border-color: #444444;--table-border-color: #444444;--pass-color: #4caf50;--fail-color: #f44336;--smalltext-color: #888888;--success-background-color: #216d2b;--variable-background-color: #333;--header-background-color: #333}html{font-size:calc(14px + (18 - 14) * ((100vw - 300px) / (1600 - 300)))}*{box-sizing:border-box}html{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol;font-size:16px;background-color:var(--background-color);color:var(--text-color)}table,.divTable{border:1px solid var(--table-border-color);border-collapse:collapse;width:100%;margin:1rem 0;box-shadow:0 2px 4px #0000001a}.tr{display:flex}tr,.tr{width:fit-content}tr:hover,.tr:hover{background-color:#0000000d}th,.th,td,.td{position:relative;box-shadow:inset 0 0 0 1px var(--border-color);vertical-align:top;padding:1.5rem}th.variable,.th.variable,td.variable,.td.variable{background-color:var(--variable-background-color)}tr.header{background-color:var(--header-background-color)}th,.th{padding:1rem;position:relative;text-align:center;vertical-align:bottom}th .action{cursor:pointer;margin-left:.5rem}tr .cell-actions{display:flex;gap:.5rem;visibility:hidden;position:absolute;bottom:1.25rem;right:0;line-height:0;font-size:1.75rem}tr:hover .cell-actions{visibility:visible}tr .cell-actions .action{cursor:pointer}th .smalltext{visibility:hidden;font-weight:400;font-size:.75rem;color:var(--smalltext-color)}th:hover .smalltext{visibility:visible}th .summary{font-weight:400;font-size:.8rem;padding:.25rem}th .summary.highlight{background-color:var(--success-background-color)}td .status{margin-bottom:.5rem;font-weight:700}td .score{font-weight:400}td .pass{color:var(--pass-color)}td .fail{color:var(--fail-color)}.first-prompt-col{border-left:2px solid #888}.first-prompt-row{border-top:2px solid #888}.resizer{position:absolute;right:0;top:0;height:100%;width:5px;cursor:col-resize;user-select:none;touch-action:none;background:var(--text-color);opacity:.5}.resizer.isResizing{background:var(--text-color);opacity:1}@media (hover: hover){.resizer{opacity:0}*:hover>.resizer{opacity:1}}.logo{display:flex;align-items:center;gap:4px}.logo img{width:30px}.logo span{margin-bottom:6px;color:var(--text-color)}[data-theme=dark] .logo img{filter:invert(1)}nav{display:flex;justify-content:space-between;align-items:center;margin-bottom:1rem;color:var(--text-color)}.dark-mode-toggle{background-color:transparent;border:none;color:var(--text-color);cursor:pointer;font-size:16px;padding:8px;transition:color .3s}.dark-mode-toggle:hover{color:var(--pass-color)}body{background-color:var(--background-color);color:var(--text-color)}
|
|
1
|
+
:root{font-family:system-ui,Avenir,Helvetica,Arial,sans-serif;font-synthesis:none;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;-webkit-text-size-adjust:100%;--background-color: #ffffff;--text-color: #404040;--border-color: lightgray;--table-border-color: lightgray;--pass-color: green;--fail-color: #ad0000;--smalltext-color: gray;--success-background-color: #d1ffd7;--variable-background-color: #f7f7f7;--header-background-color: #fffdf7}[data-theme=dark]{--background-color: #1a1a1a;--text-color: #f0f0f0;--border-color: #444444;--table-border-color: #444444;--pass-color: #4caf50;--fail-color: #f44336;--smalltext-color: #888888;--success-background-color: #216d2b;--variable-background-color: #333;--header-background-color: #333}html{font-size:calc(14px + (18 - 14) * ((100vw - 300px) / (1600 - 300)))}*{box-sizing:border-box}html{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol;font-size:16px;background-color:var(--background-color);color:var(--text-color)}table,.divTable{border:1px solid var(--table-border-color);border-collapse:collapse;width:100%;margin:1rem 0;box-shadow:0 2px 4px #0000001a}.tr{display:flex}tr,.tr{width:fit-content}tr:hover,.tr:hover{background-color:#0000000d}th,.th,td,.td{position:relative;box-shadow:inset 0 0 0 1px var(--border-color);vertical-align:top;padding:1.5rem}th.variable,.th.variable,td.variable,.td.variable{background-color:var(--variable-background-color)}tr.header{background-color:var(--header-background-color)}th,.th{padding:1rem;position:relative;text-align:center;vertical-align:bottom}th .action{cursor:pointer;margin-left:.5rem}tr .cell-actions{display:flex;gap:.5rem;visibility:hidden;position:absolute;bottom:1.25rem;right:0;line-height:0;font-size:1.75rem}tr .cell-detail{visibility:hidden;position:absolute;bottom:.25rem;margin-top:1rem;font-size:.75rem;color:#888}tr:hover .cell-actions,tr:hover .cell-detail{visibility:visible}tr .cell-actions .action{cursor:pointer}th .smalltext{visibility:hidden;font-weight:400;font-size:.75rem;color:var(--smalltext-color)}th:hover .smalltext{visibility:visible}th .summary{font-weight:400;font-size:.8rem;padding:.25rem}th .summary.highlight{background-color:var(--success-background-color)}td .status{margin-bottom:.5rem;font-weight:700}td .score{font-weight:400}td .pass{color:var(--pass-color)}td .fail{color:var(--fail-color)}.first-prompt-col{border-left:2px solid #888}.first-prompt-row{border-top:2px solid #888}.resizer{position:absolute;right:0;top:0;height:100%;width:5px;cursor:col-resize;user-select:none;touch-action:none;background:var(--text-color);opacity:.5}.resizer.isResizing{background:var(--text-color);opacity:1}@media (hover: hover){.resizer{opacity:0}*:hover>.resizer{opacity:1}}.logo{display:flex;align-items:center;gap:4px}.logo img{width:30px}.logo span{margin-bottom:6px;color:var(--text-color)}[data-theme=dark] .logo img{filter:invert(1)}nav{display:flex;justify-content:space-between;align-items:center;margin-bottom:1rem;color:var(--text-color)}.dark-mode-toggle{background-color:transparent;border:none;color:var(--text-color);cursor:pointer;font-size:16px;padding:8px;transition:color .3s}.dark-mode-toggle:hover{color:var(--pass-color)}body{background-color:var(--background-color);color:var(--text-color)}
|
|
@@ -5,8 +5,8 @@
|
|
|
5
5
|
<link rel="icon" type="image/svg+xml" href="favicon.ico" />
|
|
6
6
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
7
7
|
<title>promptfoo web viewer</title>
|
|
8
|
-
<script type="module" crossorigin src="/assets/index-
|
|
9
|
-
<link rel="stylesheet" href="/assets/index-
|
|
8
|
+
<script type="module" crossorigin src="/assets/index-8388d689.js"></script>
|
|
9
|
+
<link rel="stylesheet" href="/assets/index-d2b6a160.css">
|
|
10
10
|
</head>
|
|
11
11
|
<body>
|
|
12
12
|
<div id="root"></div>
|
package/package.json
CHANGED
package/src/assertions.ts
CHANGED
|
@@ -99,12 +99,21 @@ export async function runAssertion(
|
|
|
99
99
|
type: baseType,
|
|
100
100
|
});
|
|
101
101
|
|
|
102
|
+
//render assertion values
|
|
103
|
+
let renderedValue = assertion.value;
|
|
104
|
+
// renderString for assertion values
|
|
105
|
+
if (renderedValue && typeof renderedValue === 'string') {
|
|
106
|
+
renderedValue = nunjucks.renderString(renderedValue, test.vars || {});
|
|
107
|
+
} else if (renderedValue && Array.isArray(renderedValue)) {
|
|
108
|
+
renderedValue = renderedValue.map((v) => nunjucks.renderString(v, test.vars || {}));
|
|
109
|
+
}
|
|
110
|
+
|
|
102
111
|
if (baseType === 'equals') {
|
|
103
|
-
pass =
|
|
112
|
+
pass = renderedValue === output;
|
|
104
113
|
return {
|
|
105
114
|
pass,
|
|
106
115
|
score: pass ? 1 : 0,
|
|
107
|
-
reason: pass ? 'Assertion passed' : `Expected output "${
|
|
116
|
+
reason: pass ? 'Assertion passed' : `Expected output "${renderedValue}"`,
|
|
108
117
|
};
|
|
109
118
|
}
|
|
110
119
|
|
|
@@ -123,103 +132,99 @@ export async function runAssertion(
|
|
|
123
132
|
}
|
|
124
133
|
|
|
125
134
|
if (baseType === 'contains') {
|
|
126
|
-
invariant(
|
|
135
|
+
invariant(renderedValue, '"contains" assertion type must have a string or number value');
|
|
127
136
|
invariant(
|
|
128
|
-
typeof
|
|
137
|
+
typeof renderedValue === 'string' || typeof renderedValue === 'number',
|
|
129
138
|
'"contains" assertion type must have a string or number value',
|
|
130
139
|
);
|
|
131
|
-
pass = output.includes(String(
|
|
140
|
+
pass = output.includes(String(renderedValue)) !== inverse;
|
|
132
141
|
return {
|
|
133
142
|
pass,
|
|
134
143
|
score: pass ? 1 : 0,
|
|
135
144
|
reason: pass
|
|
136
145
|
? 'Assertion passed'
|
|
137
|
-
: `Expected output to ${inverse ? 'not ' : ''}contain "${
|
|
146
|
+
: `Expected output to ${inverse ? 'not ' : ''}contain "${renderedValue}"`,
|
|
138
147
|
};
|
|
139
148
|
}
|
|
140
149
|
|
|
141
150
|
if (baseType === 'contains-any') {
|
|
142
|
-
invariant(
|
|
151
|
+
invariant(renderedValue, '"contains-any" assertion type must have a value');
|
|
143
152
|
invariant(
|
|
144
|
-
Array.isArray(
|
|
153
|
+
Array.isArray(renderedValue),
|
|
145
154
|
'"contains-any" assertion type must have an array value',
|
|
146
155
|
);
|
|
147
|
-
pass =
|
|
156
|
+
pass = renderedValue.some((value) => output.includes(value)) !== inverse;
|
|
148
157
|
return {
|
|
149
158
|
pass,
|
|
150
159
|
score: pass ? 1 : 0,
|
|
151
160
|
reason: pass
|
|
152
161
|
? 'Assertion passed'
|
|
153
|
-
: `Expected output to ${inverse ? 'not ' : ''}contain one of "${
|
|
154
|
-
', ',
|
|
155
|
-
)}"`,
|
|
162
|
+
: `Expected output to ${inverse ? 'not ' : ''}contain one of "${renderedValue.join(', ')}"`,
|
|
156
163
|
};
|
|
157
164
|
}
|
|
158
165
|
|
|
159
166
|
if (baseType === 'contains-all') {
|
|
160
|
-
invariant(
|
|
167
|
+
invariant(renderedValue, '"contains-all" assertion type must have a value');
|
|
161
168
|
invariant(
|
|
162
|
-
Array.isArray(
|
|
169
|
+
Array.isArray(renderedValue),
|
|
163
170
|
'"contains-all" assertion type must have an array value',
|
|
164
171
|
);
|
|
165
|
-
pass =
|
|
172
|
+
pass = renderedValue.every((value) => output.includes(value)) !== inverse;
|
|
166
173
|
return {
|
|
167
174
|
pass,
|
|
168
175
|
score: pass ? 1 : 0,
|
|
169
176
|
reason: pass
|
|
170
177
|
? 'Assertion passed'
|
|
171
|
-
: `Expected output to ${inverse ? 'not ' : ''}contain all of "${
|
|
172
|
-
', ',
|
|
173
|
-
)}"`,
|
|
178
|
+
: `Expected output to ${inverse ? 'not ' : ''}contain all of "${renderedValue.join(', ')}"`,
|
|
174
179
|
};
|
|
175
180
|
}
|
|
176
181
|
|
|
177
182
|
if (baseType === 'regex') {
|
|
178
|
-
invariant(
|
|
183
|
+
invariant(renderedValue, '"regex" assertion type must have a string value');
|
|
179
184
|
invariant(
|
|
180
|
-
typeof
|
|
185
|
+
typeof renderedValue === 'string',
|
|
181
186
|
'"contains" assertion type must have a string value',
|
|
182
187
|
);
|
|
183
|
-
const regex = new RegExp(
|
|
188
|
+
const regex = new RegExp(renderedValue);
|
|
184
189
|
pass = regex.test(output) !== inverse;
|
|
185
190
|
return {
|
|
186
191
|
pass,
|
|
187
192
|
score: pass ? 1 : 0,
|
|
188
193
|
reason: pass
|
|
189
194
|
? 'Assertion passed'
|
|
190
|
-
: `Expected output to ${inverse ? 'not ' : ''}match regex "${
|
|
195
|
+
: `Expected output to ${inverse ? 'not ' : ''}match regex "${renderedValue}"`,
|
|
191
196
|
};
|
|
192
197
|
}
|
|
193
198
|
|
|
194
199
|
if (baseType === 'icontains') {
|
|
195
|
-
invariant(
|
|
200
|
+
invariant(renderedValue, '"icontains" assertion type must have a string or number value');
|
|
196
201
|
invariant(
|
|
197
|
-
typeof
|
|
202
|
+
typeof renderedValue === 'string' || typeof renderedValue === 'number',
|
|
198
203
|
'"icontains" assertion type must have a string or number value',
|
|
199
204
|
);
|
|
200
|
-
pass = output.toLowerCase().includes(String(
|
|
205
|
+
pass = output.toLowerCase().includes(String(renderedValue).toLowerCase()) !== inverse;
|
|
201
206
|
return {
|
|
202
207
|
pass,
|
|
203
208
|
score: pass ? 1 : 0,
|
|
204
209
|
reason: pass
|
|
205
210
|
? 'Assertion passed'
|
|
206
|
-
: `Expected output to ${inverse ? 'not ' : ''}contain "${
|
|
211
|
+
: `Expected output to ${inverse ? 'not ' : ''}contain "${renderedValue}"`,
|
|
207
212
|
};
|
|
208
213
|
}
|
|
209
214
|
|
|
210
215
|
if (baseType === 'starts-with') {
|
|
211
|
-
invariant(
|
|
216
|
+
invariant(renderedValue, '"starts-with" assertion type must have a string value');
|
|
212
217
|
invariant(
|
|
213
|
-
typeof
|
|
218
|
+
typeof renderedValue === 'string',
|
|
214
219
|
'"starts-with" assertion type must have a string value',
|
|
215
220
|
);
|
|
216
|
-
pass = output.startsWith(String(
|
|
221
|
+
pass = output.startsWith(String(renderedValue)) !== inverse;
|
|
217
222
|
return {
|
|
218
223
|
pass,
|
|
219
224
|
score: pass ? 1 : 0,
|
|
220
225
|
reason: pass
|
|
221
226
|
? 'Assertion passed'
|
|
222
|
-
: `Expected output to ${inverse ? 'not ' : ''}start with "${
|
|
227
|
+
: `Expected output to ${inverse ? 'not ' : ''}start with "${renderedValue}"`,
|
|
223
228
|
};
|
|
224
229
|
}
|
|
225
230
|
|
|
@@ -234,12 +239,16 @@ export async function runAssertion(
|
|
|
234
239
|
};
|
|
235
240
|
}
|
|
236
241
|
|
|
242
|
+
const context = {
|
|
243
|
+
vars: test.vars || {},
|
|
244
|
+
};
|
|
245
|
+
|
|
237
246
|
if (baseType === 'javascript') {
|
|
238
247
|
try {
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
};
|
|
248
|
+
if (typeof assertion.value === 'function') {
|
|
249
|
+
return assertion.value(output, test, assertion);
|
|
250
|
+
}
|
|
251
|
+
const customFunction = new Function('output', 'context', `return ${renderedValue}`);
|
|
243
252
|
const result = customFunction(output, context) as any;
|
|
244
253
|
if (typeof result === 'boolean') {
|
|
245
254
|
pass = result !== inverse;
|
|
@@ -255,7 +264,7 @@ export async function runAssertion(
|
|
|
255
264
|
pass: false,
|
|
256
265
|
score: 0,
|
|
257
266
|
reason: `Custom function threw error: ${(err as Error).message}
|
|
258
|
-
${
|
|
267
|
+
${renderedValue}`,
|
|
259
268
|
};
|
|
260
269
|
}
|
|
261
270
|
return {
|
|
@@ -264,41 +273,82 @@ ${assertion.value}`,
|
|
|
264
273
|
reason: pass
|
|
265
274
|
? 'Assertion passed'
|
|
266
275
|
: `Custom function returned ${inverse ? 'true' : 'false'}
|
|
276
|
+
${renderedValue}`,
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
if (baseType === 'python') {
|
|
281
|
+
try {
|
|
282
|
+
const { execSync } = require('child_process');
|
|
283
|
+
const escapedOutput = output.replace(/'/g, "\\'").replace(/"/g, '\\"');
|
|
284
|
+
const escapedContext = JSON.stringify(context).replace(/'/g, "\\'").replace(/"/g, '\\"');
|
|
285
|
+
const result = execSync(
|
|
286
|
+
`python -c "import json; import math; import os; import sys; import re; import datetime; import random; import collections; output='${escapedOutput}'; context='${escapedContext}'; print(json.dumps(${assertion.value}))"`,
|
|
287
|
+
)
|
|
288
|
+
.toString()
|
|
289
|
+
.trim();
|
|
290
|
+
if (result === 'true') {
|
|
291
|
+
pass = true;
|
|
292
|
+
score = 1.0;
|
|
293
|
+
} else if (result === 'false') {
|
|
294
|
+
pass = false;
|
|
295
|
+
score = 0.0;
|
|
296
|
+
} else if (result.startsWith('{')) {
|
|
297
|
+
return JSON.parse(result);
|
|
298
|
+
} else {
|
|
299
|
+
pass = true;
|
|
300
|
+
score = parseFloat(result);
|
|
301
|
+
if (isNaN(score)) {
|
|
302
|
+
throw new Error(
|
|
303
|
+
'Python code must return a boolean, number, or {pass, score, reason} object',
|
|
304
|
+
);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
} catch (err) {
|
|
308
|
+
return {
|
|
309
|
+
pass: false,
|
|
310
|
+
score: 0,
|
|
311
|
+
reason: `Python code execution failed: ${(err as Error).message}`,
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
return {
|
|
315
|
+
pass,
|
|
316
|
+
score,
|
|
317
|
+
reason: pass
|
|
318
|
+
? 'Assertion passed'
|
|
319
|
+
: `Python code returned ${pass ? 'true' : 'false'}
|
|
267
320
|
${assertion.value}`,
|
|
268
321
|
};
|
|
269
322
|
}
|
|
270
323
|
|
|
271
324
|
if (baseType === 'similar') {
|
|
272
|
-
invariant(
|
|
325
|
+
invariant(renderedValue, 'Similarity assertion must have a string value');
|
|
273
326
|
invariant(
|
|
274
|
-
typeof
|
|
327
|
+
typeof renderedValue === 'string',
|
|
275
328
|
'"contains" assertion type must have a string value',
|
|
276
329
|
);
|
|
277
|
-
return matchesSimilarity(
|
|
330
|
+
return matchesSimilarity(renderedValue, output, assertion.threshold || 0.75, inverse);
|
|
278
331
|
}
|
|
279
332
|
|
|
280
333
|
if (baseType === 'llm-rubric') {
|
|
281
|
-
invariant(
|
|
334
|
+
invariant(renderedValue, 'Similarity assertion must have a string value');
|
|
282
335
|
invariant(
|
|
283
|
-
typeof
|
|
336
|
+
typeof renderedValue === 'string',
|
|
284
337
|
'"contains" assertion type must have a string value',
|
|
285
338
|
);
|
|
286
|
-
return matchesLlmRubric(
|
|
339
|
+
return matchesLlmRubric(renderedValue, output, test.options);
|
|
287
340
|
}
|
|
288
341
|
|
|
289
342
|
if (baseType === 'webhook') {
|
|
290
|
-
invariant(
|
|
291
|
-
invariant(
|
|
292
|
-
typeof assertion.value === 'string',
|
|
293
|
-
'"webhook" assertion type must have a URL value',
|
|
294
|
-
);
|
|
343
|
+
invariant(renderedValue, '"webhook" assertion type must have a URL value');
|
|
344
|
+
invariant(typeof renderedValue === 'string', '"webhook" assertion type must have a URL value');
|
|
295
345
|
|
|
296
346
|
try {
|
|
297
347
|
const context = {
|
|
298
348
|
vars: test.vars || {},
|
|
299
349
|
};
|
|
300
350
|
const response = await fetchWithRetries(
|
|
301
|
-
|
|
351
|
+
renderedValue,
|
|
302
352
|
{
|
|
303
353
|
method: 'POST',
|
|
304
354
|
headers: {
|
|
@@ -339,8 +389,11 @@ ${assertion.value}`,
|
|
|
339
389
|
}
|
|
340
390
|
|
|
341
391
|
if (baseType === 'rouge-n') {
|
|
342
|
-
invariant(
|
|
343
|
-
|
|
392
|
+
invariant(
|
|
393
|
+
typeof renderedValue === 'string' || Array.isArray(renderedValue),
|
|
394
|
+
'"rouge" assertion type must be a value (string or string array)',
|
|
395
|
+
);
|
|
396
|
+
return handleRougeScore(baseType, assertion, renderedValue, output, inverse);
|
|
344
397
|
}
|
|
345
398
|
|
|
346
399
|
throw new Error('Unknown assertion type: ' + assertion.type);
|
package/src/evaluator.ts
CHANGED
|
@@ -111,13 +111,19 @@ class Evaluator {
|
|
|
111
111
|
vars,
|
|
112
112
|
};
|
|
113
113
|
|
|
114
|
+
let latencyMs = 0;
|
|
114
115
|
try {
|
|
116
|
+
const startTime = Date.now();
|
|
115
117
|
const response = await provider.callApi(renderedPrompt);
|
|
118
|
+
const endTime = Date.now();
|
|
119
|
+
latencyMs = endTime - startTime;
|
|
120
|
+
|
|
116
121
|
const ret: EvaluateResult = {
|
|
117
122
|
...setup,
|
|
118
123
|
response,
|
|
119
124
|
success: false,
|
|
120
125
|
score: 0,
|
|
126
|
+
latencyMs,
|
|
121
127
|
};
|
|
122
128
|
if (response.error) {
|
|
123
129
|
ret.error = response.error;
|
|
@@ -177,6 +183,7 @@ class Evaluator {
|
|
|
177
183
|
error: String(err) + '\n\n' + (err as Error).stack,
|
|
178
184
|
success: false,
|
|
179
185
|
score: 0,
|
|
186
|
+
latencyMs,
|
|
180
187
|
};
|
|
181
188
|
}
|
|
182
189
|
}
|
|
@@ -248,10 +255,11 @@ class Evaluator {
|
|
|
248
255
|
}
|
|
249
256
|
|
|
250
257
|
// Aggregate all vars across test cases
|
|
251
|
-
|
|
252
|
-
const tests = (
|
|
258
|
+
let tests = (
|
|
253
259
|
testSuite.tests && testSuite.tests.length > 0
|
|
254
260
|
? testSuite.tests
|
|
261
|
+
: testSuite.scenarios
|
|
262
|
+
? []
|
|
255
263
|
: [
|
|
256
264
|
{
|
|
257
265
|
// Dummy test for cases when we're only comparing raw prompts.
|
|
@@ -262,6 +270,35 @@ class Evaluator {
|
|
|
262
270
|
return Object.assign(finalTestCase, test);
|
|
263
271
|
});
|
|
264
272
|
|
|
273
|
+
// Build scenarios and add to tests
|
|
274
|
+
if (testSuite.scenarios && testSuite.scenarios.length > 0) {
|
|
275
|
+
for (const scenario of testSuite.scenarios) {
|
|
276
|
+
for (const data of scenario.config) {
|
|
277
|
+
// Merge defaultTest with scenario config
|
|
278
|
+
const scenarioTests = (
|
|
279
|
+
scenario.tests || [
|
|
280
|
+
{
|
|
281
|
+
// Dummy test for cases when we're only comparing raw prompts.
|
|
282
|
+
},
|
|
283
|
+
]
|
|
284
|
+
).map((test) => {
|
|
285
|
+
return {
|
|
286
|
+
...testSuite.defaultTest,
|
|
287
|
+
...data,
|
|
288
|
+
...test,
|
|
289
|
+
vars: {
|
|
290
|
+
...testSuite.defaultTest?.vars,
|
|
291
|
+
...data.vars,
|
|
292
|
+
...test.vars,
|
|
293
|
+
},
|
|
294
|
+
};
|
|
295
|
+
});
|
|
296
|
+
// Add scenario tests to tests
|
|
297
|
+
tests = tests.concat(scenarioTests);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
265
302
|
const varNames: Set<string> = new Set();
|
|
266
303
|
const varsWithSpecialColsRemoved: Record<string, string | string[] | object>[] = [];
|
|
267
304
|
for (const testCase of tests) {
|
|
@@ -345,8 +382,7 @@ class Evaluator {
|
|
|
345
382
|
// Set up progress bar...
|
|
346
383
|
let progressbar: SingleBar | undefined;
|
|
347
384
|
if (options.showProgressBar) {
|
|
348
|
-
const totalNumRuns =
|
|
349
|
-
testSuite.prompts.length * testSuite.providers.length * (totalVarCombinations || 1);
|
|
385
|
+
const totalNumRuns = runEvalOptions.length;
|
|
350
386
|
const cliProgress = await import('cli-progress');
|
|
351
387
|
progressbar = new cliProgress.SingleBar(
|
|
352
388
|
{
|
|
@@ -426,6 +462,8 @@ class Evaluator {
|
|
|
426
462
|
score: row.score,
|
|
427
463
|
text: resultText,
|
|
428
464
|
prompt: row.prompt.raw,
|
|
465
|
+
latencyMs: row.latencyMs,
|
|
466
|
+
tokenUsage: row.response?.tokenUsage,
|
|
429
467
|
};
|
|
430
468
|
},
|
|
431
469
|
);
|
package/src/index.ts
CHANGED
|
@@ -3,7 +3,7 @@ import providers from './providers';
|
|
|
3
3
|
import telemetry from './telemetry';
|
|
4
4
|
import { evaluate as doEvaluate } from './evaluator';
|
|
5
5
|
import { loadApiProviders } from './providers';
|
|
6
|
-
import { readTests } from './util';
|
|
6
|
+
import { readTests, writeOutput } from './util';
|
|
7
7
|
import type { EvaluateOptions, TestSuite, TestSuiteConfig } from './types';
|
|
8
8
|
|
|
9
9
|
export * from './types';
|
|
@@ -28,6 +28,11 @@ async function evaluate(testSuite: EvaluateTestSuite, options: EvaluateOptions =
|
|
|
28
28
|
};
|
|
29
29
|
telemetry.maybeShowNotice();
|
|
30
30
|
const ret = await doEvaluate(constructedTestSuite, options);
|
|
31
|
+
|
|
32
|
+
if (testSuite.outputPath) {
|
|
33
|
+
writeOutput(testSuite.outputPath, ret, testSuite, null);
|
|
34
|
+
}
|
|
35
|
+
|
|
31
36
|
await telemetry.send();
|
|
32
37
|
return ret;
|
|
33
38
|
}
|