promptfoo 0.58.1 → 0.59.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +15 -4
  2. package/dist/package.json +1 -1
  3. package/dist/src/assertions.d.ts.map +1 -1
  4. package/dist/src/assertions.js +3 -2
  5. package/dist/src/assertions.js.map +1 -1
  6. package/dist/src/evaluator.d.ts.map +1 -1
  7. package/dist/src/evaluator.js +14 -21
  8. package/dist/src/evaluator.js.map +1 -1
  9. package/dist/src/integrations/langfuse.d.ts +1 -1
  10. package/dist/src/integrations/langfuse.d.ts.map +1 -1
  11. package/dist/src/integrations/langfuse.js +12 -6
  12. package/dist/src/integrations/langfuse.js.map +1 -1
  13. package/dist/src/main.js +2 -1
  14. package/dist/src/main.js.map +1 -1
  15. package/dist/src/migrate.d.ts.map +1 -1
  16. package/dist/src/migrate.js +3 -1
  17. package/dist/src/migrate.js.map +1 -1
  18. package/dist/src/prompts.d.ts.map +1 -1
  19. package/dist/src/prompts.js +2 -1
  20. package/dist/src/prompts.js.map +1 -1
  21. package/dist/src/providers/azureopenai.d.ts.map +1 -1
  22. package/dist/src/providers/azureopenai.js +5 -4
  23. package/dist/src/providers/azureopenai.js.map +1 -1
  24. package/dist/src/providers/bedrock.d.ts +11 -5
  25. package/dist/src/providers/bedrock.d.ts.map +1 -1
  26. package/dist/src/providers/bedrock.js +51 -5
  27. package/dist/src/providers/bedrock.js.map +1 -1
  28. package/dist/src/providers/openai.d.ts +1 -1
  29. package/dist/src/providers/openai.d.ts.map +1 -1
  30. package/dist/src/providers/openai.js +18 -8
  31. package/dist/src/providers/openai.js.map +1 -1
  32. package/dist/src/providers/openaiUtil.d.ts +1 -1
  33. package/dist/src/providers/openaiUtil.d.ts.map +1 -1
  34. package/dist/src/providers/openaiUtil.js +4 -2
  35. package/dist/src/providers/openaiUtil.js.map +1 -1
  36. package/dist/src/providers/vertex.d.ts.map +1 -1
  37. package/dist/src/providers/vertex.js +16 -8
  38. package/dist/src/providers/vertex.js.map +1 -1
  39. package/dist/src/providers.d.ts.map +1 -1
  40. package/dist/src/providers.js +6 -1
  41. package/dist/src/providers.js.map +1 -1
  42. package/dist/src/types.d.ts +1 -0
  43. package/dist/src/types.d.ts.map +1 -1
  44. package/dist/src/types.js.map +1 -1
  45. package/dist/src/util.d.ts +4 -3
  46. package/dist/src/util.d.ts.map +1 -1
  47. package/dist/src/util.js +42 -11
  48. package/dist/src/util.js.map +1 -1
  49. package/dist/src/web/nextui/404/index.html +1 -1
  50. package/dist/src/web/nextui/404.html +1 -1
  51. package/dist/src/web/nextui/_next/static/chunks/712-b075de116030d628.js +1 -0
  52. package/dist/src/web/nextui/auth/login/index.html +1 -1
  53. package/dist/src/web/nextui/auth/login/index.txt +1 -1
  54. package/dist/src/web/nextui/auth/signup/index.html +1 -1
  55. package/dist/src/web/nextui/auth/signup/index.txt +1 -1
  56. package/dist/src/web/nextui/datasets/index.html +1 -1
  57. package/dist/src/web/nextui/datasets/index.txt +1 -1
  58. package/dist/src/web/nextui/eval/index.html +1 -1
  59. package/dist/src/web/nextui/eval/index.txt +2 -2
  60. package/dist/src/web/nextui/index.html +1 -1
  61. package/dist/src/web/nextui/index.txt +1 -1
  62. package/dist/src/web/nextui/progress/index.html +1 -1
  63. package/dist/src/web/nextui/progress/index.txt +1 -1
  64. package/dist/src/web/nextui/prompts/index.html +1 -1
  65. package/dist/src/web/nextui/prompts/index.txt +1 -1
  66. package/dist/src/web/nextui/setup/index.html +1 -1
  67. package/dist/src/web/nextui/setup/index.txt +1 -1
  68. package/dist/src/web/server.d.ts +1 -1
  69. package/dist/src/web/server.d.ts.map +1 -1
  70. package/dist/src/web/server.js +4 -4
  71. package/dist/src/web/server.js.map +1 -1
  72. package/package.json +1 -1
  73. package/dist/src/web/nextui/_next/static/chunks/712-6aacc65ec1cd9990.js +0 -1
  74. /package/dist/src/web/nextui/_next/static/{VpTktI7OwfKjDVxC-cNBf → M6DuBbMeEniSe5YhgYql4}/_buildManifest.js +0 -0
  75. /package/dist/src/web/nextui/_next/static/{VpTktI7OwfKjDVxC-cNBf → M6DuBbMeEniSe5YhgYql4}/_ssgManifest.js +0 -0
@@ -1 +0,0 @@
1
- (self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[712],{2810:function(e,t,s){"use strict";let n,l;async function a(){if(n||(l||(l=fetch("/api/config").then(e=>e.json()).then(e=>n=e.apiBaseUrl)),await l),void 0===n)throw Error("API base URL is undefined");return n}s.d(t,{b:function(){return a}})},4712:function(e,t,s){"use strict";s.r(t),s.d(t,{default:function(){return eH}});var n=s(7437),l=s(2265),a=s(2130),i=s(6882),r=s(3388),o=s(4033),c=s(3082),d=s(1116),u=s(8440),p=s(6507),h=s(5551),m=s(7760),x=s(6446),j=s(3391),g=s(4081),f=s(3226),v=s(819),b=s(8212),y=s(8864),Z=s(3375),C=s(9114),w=s(923),k=s(654),S=s(3295),N=s(2467),R=s(3457),I=s(1975),P=s(8276),E=s(9605),O=s(9190),T=s(2053),F=s(8339),M=s(2481),L=s(9394),D=s(6337),U=s(4660),A=s(4810);let W=(0,U.Ue)()((0,A.tJ)((e,t)=>({evalId:null,setEvalId:t=>e(()=>({evalId:t})),table:null,setTable:t=>e(()=>({table:t})),config:null,setConfig:t=>e(()=>({config:t})),maxTextLength:250,setMaxTextLength:t=>e(()=>({maxTextLength:t})),wordBreak:"break-word",setWordBreak:t=>e(()=>({wordBreak:t})),showInferenceDetails:!0,setShowInferenceDetails:t=>e(()=>({showInferenceDetails:t})),renderMarkdown:!1,setRenderMarkdown:t=>e(()=>({renderMarkdown:t})),prettifyJson:!1,setPrettifyJson:t=>e(()=>({prettifyJson:t})),showPrompts:!1,setShowPrompts:t=>e(()=>({showPrompts:t}))}),{name:"ResultsViewStorage",storage:(0,A.FL)(()=>localStorage)}));var _=function(){let{table:e,config:t,evalId:s}=W(),[a,i]=l.useState(!1),r=(e,t)=>{let s=URL.createObjectURL(e),n=document.createElement("a");n.href=s,n.download=t,document.body.appendChild(n),n.click(),document.body.removeChild(n),URL.revokeObjectURL(s)},o=()=>{i(!1)};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsxs)(C.Z,{onClick:()=>{i(!0)},children:[(0,n.jsx)(y.Z,{children:(0,n.jsx)(M.Z,{fontSize:"small"})}),(0,n.jsx)(b.Z,{children:"Download"})]}),(0,n.jsx)(L.Z,{onClose:o,open:a,children:(0,n.jsx)(D.Z,{children:(0,n.jsxs)(R.Z,{direction:"column",spacing:2,sx:{width:"100%"},children:[(0,n.jsx)(h.Z,{onClick:()=>{let e=F.default.dump(t),s=new Blob([e],{type:"text/yaml;charset=utf-8"});r(s,"promptfooconfig.yaml"),o()},startIcon:(0,n.jsx)(M.Z,{}),fullWidth:!0,sx:{justifyContent:"flex-start"},children:"Download YAML Config"}),(0,n.jsx)(h.Z,{onClick:()=>{if(!e){alert("No table data");return}let t=new Blob([JSON.stringify(e,null,2)],{type:"application/json"});r(t,"".concat(s,"-table.json")),o()},startIcon:(0,n.jsx)(M.Z,{}),fullWidth:!0,sx:{justifyContent:"flex-start"},children:"Download Table JSON"}),(0,n.jsx)(h.Z,{onClick:()=>{if(!e){alert("No table data");return}let t=e.body.map((t,s)=>({chosen:t.outputs.filter(e=>e.pass).map(e=>e.text),rejected:t.outputs.filter(e=>!e.pass).map(e=>e.text),vars:t.test.vars,providers:e.head.prompts.map(e=>e.provider),prompts:e.head.prompts.map(e=>e.display)})),n=new Blob([JSON.stringify(t,null,2)],{type:"application/json"});r(n,"".concat(s,"-dpo.json")),o()},startIcon:(0,n.jsx)(M.Z,{}),fullWidth:!0,sx:{justifyContent:"flex-start"},children:"Download DPO JSON"})]})})})]})},z=s(6704),V=s(1797),B=s(1101),J=s(9872),H=s(2653),G=s(3533),X=s(4740);let Y=["#fd7f6f","#7eb0d5","#b2e061","#bd7ebe","#ffb55a","#ffee65","#beb9db","#fdcce5","#8bd3c7"];function q(e){let{table:t}=e,s=(0,l.useRef)(null),a=(0,l.useRef)(null);return(0,l.useEffect)(()=>{if(!s.current)return;a.current&&a.current.destroy();let e=t.body.flatMap(e=>e.outputs.map(e=>e.score)),n=Math.min(...e),l=(Math.ceil(Math.max(...e))-Math.floor(n))/10,i=Array.from({length:11},(e,t)=>parseFloat((Math.floor(n)+t*l).toFixed(2))),r=t.head.prompts.map((e,s)=>{let n=t.body.flatMap(e=>e.outputs[s].score),a=i.map(e=>n.filter(t=>t>=e&&t<e+l).length);return{label:"Column ".concat(s+1),data:a,backgroundColor:Y[s%Y.length]}});a.current=new z.kL(s.current,{type:"bar",data:{labels:i,datasets:r},options:{animation:!1,plugins:{title:{display:!0,text:"Score Distribution"},legend:{display:!1},tooltip:{callbacks:{title:function(e){let t=e[0].datasetIndex;return"Column ".concat(t+1)},label:function(e){let t=e.dataIndex,s=i[t],n=i[t+1];return n?"".concat(s," <= score < ").concat(n):"".concat(s," <= score")}}}}}})},[t]),(0,n.jsx)("canvas",{ref:s,style:{maxHeight:"300px"}})}function $(e){let{table:t}=e,s=(0,l.useRef)(null),a=(0,l.useRef)(null);return(0,l.useEffect)(()=>{if(!s.current)return;a.current&&a.current.destroy();let e=t.head.prompts.map((e,s)=>{let n=t.body.flatMap(e=>e.outputs[s]),l=n.filter(e=>e.pass).length,a=l/n.length*100;return{label:"Column ".concat(s+1),data:[a],backgroundColor:Y[s%Y.length]}});a.current=new z.kL(s.current,{type:"bar",data:{labels:["Pass Rate (%)"],datasets:e},options:{animation:!1,plugins:{title:{display:!0,text:"Pass rate"},legend:{display:!0}}}})},[t]),(0,n.jsx)("canvas",{ref:s,style:{maxHeight:"300px"}})}function K(e){let{table:t}=e,s=(0,l.useRef)(null),a=(0,l.useRef)(null),[i,r]=(0,l.useState)(0),[o,c]=(0,l.useState)(1),[d,u]=(0,l.useState)(!1);return(0,l.useEffect)(()=>{if(!s.current)return;a.current&&a.current.destroy();let e=t.body.flatMap(e=>e.outputs.map(e=>e.score)),n=Math.min(...e),l=Math.max(...e),r=t.body.map(e=>{let t=e.outputs[i].score,s=e.outputs[o].score;return{x:t,y:s,backgroundColor:s>t?"green":s<t?"red":"gray"}});a.current=new z.kL(s.current,{type:"scatter",data:{datasets:[{data:r,backgroundColor:r.map(e=>e.backgroundColor)},{type:"line",data:[{x:n,y:n},{x:l,y:l}],borderColor:"gray",borderWidth:1,borderDash:[5,5],pointRadius:0}]},options:{animation:!1,plugins:{legend:{display:!1},tooltip:{callbacks:{label:function(e){let s=t.body[e.dataIndex],n=s.outputs[0].text,l=s.outputs[1].text;return n.length>30&&(n=n.substring(0,30)+"..."),l.length>30&&(l=l.substring(0,30)+"..."),"Output 1: ".concat(n,"\nOutput 2: ").concat(l)}}}},scales:{x:{title:{display:!0,text:"Prompt ".concat(i+1," Score")}},y:{title:{display:!0,text:"Prompt ".concat(o+1," Score")}}}}})},[t,i,o]),(0,n.jsxs)(n.Fragment,{children:[(0,n.jsxs)(L.Z,{open:d,onClose:()=>u(!1),children:[(0,n.jsx)(V.Z,{children:"Compare prompt outputs"}),(0,n.jsxs)(D.Z,{children:[(0,n.jsx)(g.Z,{sx:{m:1,minWidth:120},children:(0,n.jsx)(k.Z,{value:i,onChange:e=>r(Number(e.target.value)),children:t.head.prompts.map((e,t)=>(0,n.jsxs)(C.Z,{value:t,children:["Prompt ",t+1]},t))})}),(0,n.jsx)(g.Z,{sx:{m:1,minWidth:120},children:(0,n.jsx)(k.Z,{value:o,onChange:e=>c(Number(e.target.value)),children:t.head.prompts.map((e,t)=>(0,n.jsxs)(C.Z,{value:t,children:["Prompt ",t+1]},t))})})]})]}),(0,n.jsx)("canvas",{ref:s,style:{maxHeight:"300px",cursor:"pointer"},onClick:()=>u(!0)})]})}z.kL.register(z.vn,z.ST,z.ho,z.uw,z.f$,z.ZL,z.jn,z.od,z.u,z.wL);var Q=l.memo(function(e){let{columnVisibility:t}=e,s=(0,B.Z)();z.kL.defaults.color="dark"===s.palette.mode?"#aaa":"#666";let[a,i]=(0,l.useState)(!0),{table:r}=W();if(!r||!a||r.head.prompts.length<2)return null;let o=r.body.flatMap(e=>e.outputs.map(e=>e.score));return 1===new Set(o).size?null:(0,n.jsx)(X.SV,{fallback:null,children:(0,n.jsxs)(J.Z,{style:{position:"relative",padding:s.spacing(3)},children:[(0,n.jsx)(H.Z,{style:{position:"absolute",right:0,top:0},onClick:()=>i(!1),children:(0,n.jsx)(G.Z,{})}),(0,n.jsxs)("div",{style:{display:"flex",justifyContent:"space-between",width:"100%"},children:[(0,n.jsx)("div",{style:{width:"33%"},children:(0,n.jsx)($,{table:r})}),(0,n.jsx)("div",{style:{width:"33%"},children:(0,n.jsx)(q,{table:r})}),(0,n.jsx)("div",{style:{width:"33%"},children:(0,n.jsx)(K,{table:r})})]})]})})}),ee=s(9116);s(5777);var et=s(3425),es=s(7660),en=s(3216),el=s(182),ea=s(5391),ei=s(1396),er=s.n(ei),eo=s(2834),ec=e=>{let{open:t,contextText:s,commentText:l,onClose:a,onSave:i,onChange:r}=e;return(0,n.jsxs)(L.Z,{open:t,onClose:a,fullWidth:!0,maxWidth:"sm",children:[(0,n.jsx)(V.Z,{children:"Edit Comment"}),(0,n.jsxs)(D.Z,{children:[(0,n.jsx)(p.Z,{sx:{backgroundColor:"#f0f0f0",padding:2,marginBottom:2},children:s}),(0,n.jsx)(I.Z,{autoFocus:!0,margin:"dense",type:"text",fullWidth:!0,multiline:!0,rows:4,value:l,onChange:e=>r(e.target.value)})]}),(0,n.jsxs)(eo.Z,{children:[(0,n.jsx)(h.Z,{onClick:i,color:"primary",variant:"contained",children:"Save"}),(0,n.jsx)(h.Z,{onClick:a,color:"primary",children:"Cancel"})]})]})};s(8715);var ed=e=>{let{lookup:t}=e;return t&&Object.keys(t).length?(0,n.jsx)("div",{className:"custom-metric-container",children:Object.entries(t).map(e=>{let[t,s]=e;return t&&void 0!==s?(0,n.jsxs)("span",{children:[t,": ",s.toFixed(2)]},t):null})}):null},eu=s(5446),ep=s(9329),eh=s(4719),em=s(3701),ex=s(9279),ej=s(666),eg=s(5795),ef=s(6988),ev=s(4147);function eb(e){let{gradingResults:t}=e;return t?(0,n.jsxs)(p.Z,{mt:2,children:[(0,n.jsx)(f.Z,{variant:"subtitle1",children:"Assertions"}),(0,n.jsx)(eg.Z,{children:(0,n.jsxs)(em.Z,{children:[(0,n.jsx)(ef.Z,{children:(0,n.jsxs)(ev.Z,{children:[(0,n.jsx)(ej.Z,{style:{fontWeight:"bold"},children:"Pass"}),(0,n.jsx)(ej.Z,{style:{fontWeight:"bold"},children:"Score"}),(0,n.jsx)(ej.Z,{style:{fontWeight:"bold"},children:"Type"}),(0,n.jsx)(ej.Z,{style:{fontWeight:"bold"},children:"Value"}),(0,n.jsx)(ej.Z,{style:{fontWeight:"bold"},children:"Reason"})]})}),(0,n.jsx)(ex.Z,{children:t.map((e,t)=>{var s,l;return(0,n.jsxs)(ev.Z,{children:[(0,n.jsx)(ej.Z,{children:e.pass?"✅":"❌"}),(0,n.jsx)(ej.Z,{children:e.score.toFixed(2)}),(0,n.jsx)(ej.Z,{children:(null===(s=e.assertion)||void 0===s?void 0:s.type)||""}),(0,n.jsx)(ej.Z,{style:{whiteSpace:"pre-wrap"},children:(null===(l=e.assertion)||void 0===l?void 0:l.value)?String(e.assertion.value):"-"}),(0,n.jsx)(ej.Z,{style:{whiteSpace:"pre-wrap"},children:e.reason})]},t)})})]})})]}):null}function ey(e){let{open:t,onClose:s,prompt:a,provider:i,output:r,gradingResults:o}=e,[c,d]=(0,l.useState)(!1);(0,l.useEffect)(()=>{d(!1)},[a]);let u=async e=>{await navigator.clipboard.writeText(e),d(!0)};return(0,n.jsxs)(L.Z,{open:t,onClose:s,fullWidth:!0,maxWidth:"lg",children:[(0,n.jsxs)(V.Z,{children:["Details",i&&": ".concat(i)]}),(0,n.jsxs)(D.Z,{children:[(0,n.jsxs)(p.Z,{mb:2,children:[(0,n.jsx)(f.Z,{variant:"subtitle1",style:{marginBottom:"1rem"},children:"Prompt"}),(0,n.jsx)(eu.u,{readOnly:!0,value:a,style:{width:"100%",padding:"0.75rem"},maxRows:20}),(0,n.jsx)(H.Z,{onClick:()=>u(a),style:{position:"absolute",right:"10px",top:"10px"},children:c?(0,n.jsx)(eh.Z,{}):(0,n.jsx)(ep.Z,{})})]}),r&&(0,n.jsxs)(p.Z,{my:2,children:[(0,n.jsx)(f.Z,{variant:"subtitle1",style:{marginBottom:"1rem",marginTop:"1rem"},children:"Output"}),(0,n.jsx)(eu.u,{readOnly:!0,maxRows:20,value:r,style:{width:"100%",padding:"0.75rem"}})]}),(0,n.jsx)(eb,{gradingResults:o})]}),(0,n.jsx)(eo.Z,{children:(0,n.jsx)(h.Z,{onClick:s,children:"Close"})})]})}var eZ=s(9396),eC=s(4173),ew=function(){let e=(0,B.Z)(),[t,s]=l.useState(!1),a=()=>{s(!1)};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)("div",{style:{textAlign:"center",marginTop:20,marginBottom:40},children:(0,n.jsx)(h.Z,{variant:"text",color:"primary",startIcon:(0,n.jsx)(eZ.Z,{}),onClick:()=>{s(!0)},children:"Generate test cases"})}),(0,n.jsxs)(L.Z,{open:t,onClose:a,children:[(0,n.jsx)(V.Z,{children:"Run on Command Line"}),(0,n.jsx)(D.Z,{children:(0,n.jsxs)(eC.Z,{children:[(0,n.jsx)("p",{children:"This feature is in beta. UI coming soon."}),(0,n.jsxs)("p",{children:["Run"," ",(0,n.jsx)(p.Z,{component:"code",sx:{backgroundColor:"dark"===e.palette.mode?"#424242":"#f0f0f0",padding:"2px 4px",borderRadius:"4px"},children:"promptfoo generate dataset"}),"to generate test cases on the command line."]})]})}),(0,n.jsx)(eo.Z,{children:(0,n.jsx)(h.Z,{onClick:a,color:"primary",children:"Close"})})]})]})},ek=s(2810);function eS(e){return null===e||0===e||1===e?"":"(".concat(e.toFixed(2),")")}function eN(e){return"string"==typeof e||"number"==typeof e?e.toString().length:Array.isArray(e)?e.reduce((e,t)=>e+eN(t),0):l.isValidElement(e)&&e.props.children?l.Children.toArray(e.props.children).reduce((e,t)=>e+eN(t),0):0}s(2604);let eR=l.memo(function(e){let t,{text:s,maxLength:a}=e,[i,r]=l.useState(!0),o=function(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:0;if("string"==typeof e||"number"==typeof e){let s=e.toString();return s.slice(0,a-t)}if(Array.isArray(e)){let s=[],n=t;for(let t of e){let e=eN(t);if(n+e>a){s.push(o(t,n));break}s.push(t),n+=e}return s}if(l.isValidElement(e)&&e.props.children){let s=eN(e.props.children);if(s>a-t)return l.cloneElement(e,{...e.props,children:o(e.props.children,t)})}return e};t=l.isValidElement(s)||"string"==typeof s?s:JSON.stringify(s);let c=i?o(t):t,d=eN(t)>a;return(0,n.jsxs)("div",{style:{cursor:d?"pointer":"normal"},onClick:()=>{r(!i)},children:[c,i&&eN(t)>a&&(0,n.jsx)("span",{children:"..."})]})}),eI=l.memo(function(e){var t,s,a,i,r,o,c,d,u;let p,h,m,x,j,{output:g,maxTextLength:f,rowIndex:v,promptIndex:b,onRating:y,firstOutput:Z,showDiffs:C,searchText:w,showStats:k}=e,{renderMarkdown:S,prettifyJson:N,showPrompts:R}=W(),[I,E]=l.useState(!1),[O,T]=l.useState(!1),F=()=>T(!O),[M,L]=l.useState(!1),[D,U]=l.useState((null===(t=g.gradingResult)||void 0===t?void 0:t.comment)||""),A=()=>{L(!0)},_="string"==typeof g.text?g.text:JSON.stringify(g.text),z=[];if(_.startsWith("[IMAGE]")){let e=_.slice(7).trim();p=(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)("img",{loading:"lazy",src:e,alt:g.prompt,onClick:F}),O&&(0,n.jsx)("div",{className:"lightbox",onClick:F,children:(0,n.jsx)("img",{src:e,alt:g.prompt})})]})}else!g.pass&&_.includes("---")?_=(z=_.split("---")).slice(1).join("---"):z=[_];if(C&&Z){let e,t="string"==typeof Z.text?Z.text:JSON.stringify(Z.text);t.includes("---")&&(t=t.split("---").slice(1).join("---"));try{JSON.parse(t),JSON.parse(_),e=(0,ee.CT)(t,_)}catch(s){e=t.includes(". ")&&_.includes(". ")?(0,ee.SY)(t,_):(0,ee.NV)(t,_)}p=(0,n.jsx)(n.Fragment,{children:e.map((e,t)=>e.added?(0,n.jsx)("ins",{children:e.value},t):e.removed?(0,n.jsx)("del",{children:e.value},t):(0,n.jsx)("span",{children:e.value},t))})}if(w)try{let e;let t=RegExp(w,"gi"),s=[];for(;null!==(e=t.exec(_));)s.push({start:e.index,end:t.lastIndex});p=(0,n.jsx)(n.Fragment,{children:s.length>0?(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)("span",{children:_.substring(0,s[0].start)},"text-before"),s.map((e,t)=>(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)("span",{className:"search-highlight",children:_.substring(e.start,e.end)},"match-"+t),(0,n.jsx)("span",{children:_.substring(e.end,s[t+1]?s[t+1].start:_.length)},"text-after-"+t)]}))]}):(0,n.jsx)("span",{children:_},"no-match")})}catch(e){console.error("Invalid regular expression:",e.message)}else if(S)p=(0,n.jsx)(et.U,{children:_});else if(N)try{p=(0,n.jsx)("pre",{children:JSON.stringify(JSON.parse(_),null,2)})}catch(e){}let V=l.useCallback(e=>{var t;y(e,void 0,null===(t=g.gradingResult)||void 0===t?void 0:t.comment)},[y,null===(s=g.gradingResult)||void 0===s?void 0:s.comment]),B=l.useCallback(()=>{let e=prompt("Set test score (0.0 - 1.0):",String(g.score));if(null!==e){let s=parseFloat(e);if(!isNaN(s)&&s>=0&&s<=1){var t;y(void 0,s,null===(t=g.gradingResult)||void 0===t?void 0:t.comment)}else alert("Invalid score. Please enter a value between 0.0 and 1.0.")}},[y,g.score,null===(a=g.gradingResult)||void 0===a?void 0:a.comment]);if(null===(i=g.tokenUsage)||void 0===i?void 0:i.completion){m=(0,n.jsxs)("span",{children:[Intl.NumberFormat(void 0,{maximumFractionDigits:0}).format(g.latencyMs)," ms"]});let e=g.tokenUsage.completion/(g.latencyMs/1e3);x=(0,n.jsx)("span",{children:Intl.NumberFormat(void 0,{maximumFractionDigits:0}).format(e)})}g.cost&&(j=(0,n.jsxs)("span",{children:["$",g.cost.toPrecision(2)]})),(null===(r=g.tokenUsage)||void 0===r?void 0:r.cached)?h=(0,n.jsxs)("span",{children:[Intl.NumberFormat(void 0,{maximumFractionDigits:0}).format(g.tokenUsage.cached)," ","(cached)"]}):(null===(o=g.tokenUsage)||void 0===o?void 0:o.total)&&(h=(0,n.jsx)("span",{children:Intl.NumberFormat(void 0,{maximumFractionDigits:0}).format(g.tokenUsage.total)}));let J=(null===(c=g.gradingResult)||void 0===c?void 0:c.comment)?(0,n.jsx)("div",{className:"comment",onClick:A,children:g.gradingResult.comment}):null,H=k?(0,n.jsxs)("div",{className:"cell-detail",children:[h&&(0,n.jsxs)("div",{className:"stat-item",children:[(0,n.jsx)("strong",{children:"Tokens:"})," ",h]}),m&&(0,n.jsxs)("div",{className:"stat-item",children:[(0,n.jsx)("strong",{children:"Latency:"})," ",m]}),x&&(0,n.jsxs)("div",{className:"stat-item",children:[(0,n.jsx)("strong",{children:"Tokens/Sec:"})," ",x]}),j&&(0,n.jsxs)("div",{className:"stat-item",children:[(0,n.jsx)("strong",{children:"Cost:"})," ",j]})]}):null,G=(0,n.jsxs)("div",{className:"cell-actions",children:[g.prompt&&(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)("span",{className:"action",onClick:()=>{E(!0)},children:(0,n.jsx)(P.Z,{title:"View ouput and test details",children:(0,n.jsx)("span",{children:"\uD83D\uDD0E"})})}),(0,n.jsx)(ey,{open:I,onClose:()=>{E(!1)},prompt:g.prompt,provider:g.provider,gradingResults:null===(d=g.gradingResult)||void 0===d?void 0:d.componentResults,output:_})]}),(0,n.jsx)("span",{className:"action",onClick:()=>V(!0),children:(0,n.jsx)(P.Z,{title:"Mark test passed (score 1.0)",children:(0,n.jsx)("span",{children:"\uD83D\uDC4D"})})}),(0,n.jsx)("span",{className:"action",onClick:()=>V(!1),children:(0,n.jsx)(P.Z,{title:"Mark test failed (score 0.0)",children:(0,n.jsx)("span",{children:"\uD83D\uDC4E"})})}),(0,n.jsx)("span",{className:"action",onClick:B,children:(0,n.jsx)(P.Z,{title:"Set test score",children:(0,n.jsx)("span",{children:"\uD83D\uDD22"})})}),(0,n.jsx)("span",{className:"action",onClick:A,children:(0,n.jsx)(P.Z,{title:"Edit comment",children:(0,n.jsx)("span",{children:"✏️"})})})]});return(0,n.jsxs)("div",{className:"cell",children:[g.pass?(0,n.jsx)(n.Fragment,{children:(0,n.jsxs)("div",{className:"status pass",children:[(0,n.jsxs)("div",{className:"pill",children:["PASS ",(0,n.jsx)("span",{className:"score",children:eS(g.score)})]}),(0,n.jsx)(ed,{lookup:g.namedScores})]})}):(0,n.jsx)(n.Fragment,{children:(0,n.jsxs)("div",{className:"status fail",children:[(0,n.jsxs)("div",{className:"pill",children:["FAIL",g.score>0?" ":"",(0,n.jsx)("span",{className:"score",children:eS(g.score)})]}),(0,n.jsx)(ed,{lookup:g.namedScores}),(0,n.jsx)("span",{className:"fail-reason",children:null===(u=z[0])||void 0===u?void 0:u.trim().split("\n").map((e,t)=>(0,n.jsxs)(l.Fragment,{children:[e,(0,n.jsx)("br",{})]},t))})]})}),R&&Z.prompt&&(0,n.jsxs)("div",{className:"prompt",children:[(0,n.jsx)("span",{className:"pill",children:"Prompt"}),g.prompt]}),(0,n.jsx)(eR,{text:p||_,maxLength:f}),J,H,G,(0,n.jsx)(ec,{open:M,contextText:g.text,commentText:D,onClose:()=>{L(!1)},onSave:()=>{y(void 0,void 0,D),L(!1)},onChange:U})]})});function eP(e){let{text:t,maxLength:s,expandedText:a,resourceId:i,className:r}=e,[o,c]=l.useState(!1);return(0,n.jsxs)("div",{className:"".concat(r||""),children:[(0,n.jsx)(eR,{text:t,maxLength:s}),a&&(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(P.Z,{title:"View prompt",children:(0,n.jsx)("span",{className:"action",onClick:()=>{c(!0)},children:"\uD83D\uDD0E"})}),(0,n.jsx)(ey,{open:o,onClose:()=>{c(!1)},prompt:a}),i&&(0,n.jsx)(P.Z,{title:"View other evals and datasets for this prompt",children:(0,n.jsx)("span",{className:"action",children:(0,n.jsx)(er(),{href:"/prompts/?id=".concat(i),target:"_blank",children:(0,n.jsx)(ea.Z,{fontSize:"small"})})})})]})]})}var eE=l.memo(function(e){let{maxTextLength:t,columnVisibility:s,wordBreak:i,filterMode:r,failureFilter:o,searchText:c,showStats:d,onFailureFilterToggle:u}=e,{evalId:x,table:j,setTable:g}=W();(0,a.Z)(j,"Table should be defined");let{head:v,body:b}=j,y=l.useCallback(async(e,t,s,n,l)=>{var a;let i=[...b],r={...i[e]},o=[...r.outputs],c=null!=s?s:o[t].pass,d=void 0===n?s?1:0:n||0;o[t].pass=c,o[t].score=d;let u={...o[t].gradingResult||{},pass:c,score:d,reason:"Manual result (overrides all other grading results)",comment:l,assertion:(null===(a=o[t].gradingResult)||void 0===a?void 0:a.assertion)||null};o[t].gradingResult=u,r.outputs=o,i[e]=r;let p={head:v,body:i};g(p);try{let e=await fetch("".concat(await (0,ek.b)(),"/api/eval/").concat(x),{method:"PATCH",headers:{"Content-Type":"application/json"},body:JSON.stringify({table:p})});if(!e.ok)throw Error("Network response was not ok")}catch(e){console.error("Failed to update table:",e)}},[b,v,g,x]),Z=Object.keys(s).length>0,w=l.useMemo(()=>{try{let e=RegExp(c,"i");return b.map((e,t)=>({...e,outputs:e.outputs.map((e,s)=>({...e,originalRowIndex:t,originalPromptIndex:s}))})).filter(t=>{let n="failures"===r?t.outputs.some((e,t)=>{let n="Prompt ".concat(t+1);return o[n]&&!e.pass&&(!Z||s[n])}):"different"!==r||!t.outputs.every(e=>e.text===t.outputs[0].text),l=!c||t.outputs.some(t=>{var s,n;let l="".concat(t.text," ").concat(Object.keys(t.namedScores)," ").concat((null===(s=t.gradingResult)||void 0===s?void 0:s.reason)||""," ").concat((null===(n=t.gradingResult)||void 0===n?void 0:n.comment)||"");return e.test(l)});return n&&l})}catch(e){return console.error("Invalid regular expression:",e.message),b}},[b,o,r,c,s,Z]),S=l.useMemo(()=>v.prompts.map((e,t)=>b.reduce((e,s)=>e+(s.outputs[t].pass?1:0),0)),[v.prompts,b]),N=l.useMemo(()=>v.prompts.map((e,t)=>b.reduce((e,s)=>{var n,l;return e+((null===(l=s.outputs[t].gradingResult)||void 0===l?void 0:null===(n=l.componentResults)||void 0===n?void 0:n.length)||0)},0)),[v.prompts,b]),R=l.useMemo(()=>v.prompts.map((e,t)=>b.reduce((e,s)=>{var n;let l=null===(n=s.outputs[t].gradingResult)||void 0===n?void 0:n.componentResults;return e+(l?l.filter(e=>e.pass).length:0)},0)),[v.prompts,b]),P=l.useMemo(()=>S.reduce((e,t,s,n)=>t>n[e]?s:e,0),[S]),E=S[P],O=l.useMemo(()=>(0,es.Cl)(),[]),T=l.useMemo(()=>v.vars.length>0?[O.group({id:"vars",header:()=>(0,n.jsx)("span",{className:"font-bold",children:"Variables"}),columns:v.vars.map((e,s)=>O.accessor(e=>e.vars[s],{id:"Variable ".concat(s+1),header:()=>(0,n.jsx)(eP,{text:e,maxLength:t,className:"font-bold"}),cell:e=>(0,n.jsx)(eR,{text:e.getValue(),maxLength:t}),size:50}))})]:[],[O,v.vars,t]),F=l.useCallback((e,t)=>w[e].outputs[t],[w]),M=l.useCallback(e=>w[e].outputs[0],[w]),L=l.useMemo(()=>[O.group({id:"prompts",header:()=>(0,n.jsx)("span",{className:"font-bold",children:"Outputs"}),columns:v.prompts.map((e,s)=>O.accessor(e=>(function(e){if("string"==typeof e){let t=e.startsWith("[PASS]"),s=e;return e.startsWith("[PASS]")?s=s.slice(6):e.startsWith("[FAIL]")&&(s=s.slice(6)),{text:s,pass:t,score:t?1:0}}return e})(e.outputs[s]),{id:"Prompt ".concat(s+1),header:()=>{var l,a,i,c,p,h,x,j;let g=S[s]&&b.length?(S[s]/b.length*100).toFixed(2):"0.00",f=S[s]===E&&0!==E,v="Prompt ".concat(s+1),y=o[v]||!1,Z=d?(0,n.jsxs)("div",{className:"prompt-detail",children:[N[s]?(0,n.jsxs)("div",{children:[(0,n.jsx)("strong",{children:"Asserts:"})," ",R[s],"/",N[s]," passed"]}):null,(null===(l=e.metrics)||void 0===l?void 0:l.totalLatencyMs)?(0,n.jsxs)("div",{children:[(0,n.jsx)("strong",{children:"Avg Latency:"})," ",Intl.NumberFormat(void 0,{maximumFractionDigits:0}).format(e.metrics.totalLatencyMs/b.length)," ","ms"]}):null,(null===(i=e.metrics)||void 0===i?void 0:null===(a=i.tokenUsage)||void 0===a?void 0:a.total)?(0,n.jsxs)("div",{children:[(0,n.jsx)("strong",{children:"Avg Tokens:"})," ",Intl.NumberFormat(void 0,{maximumFractionDigits:0}).format(e.metrics.tokenUsage.total/b.length)]}):null,(null===(c=e.metrics)||void 0===c?void 0:c.totalLatencyMs)&&(null===(h=e.metrics)||void 0===h?void 0:null===(p=h.tokenUsage)||void 0===p?void 0:p.completion)?(0,n.jsxs)("div",{children:[(0,n.jsx)("strong",{children:"Tokens/Sec:"})," ",Intl.NumberFormat(void 0,{maximumFractionDigits:0}).format(e.metrics.tokenUsage.completion/(e.metrics.totalLatencyMs/1e3))]}):null,(null===(x=e.metrics)||void 0===x?void 0:x.cost)?(0,n.jsxs)("div",{children:[(0,n.jsx)("strong",{children:"Cost:"})," $",e.metrics.cost.toPrecision(2)]}):null]}):null,C=e.provider?e.provider.split(":"):[],w=C.length>1?(0,n.jsxs)(n.Fragment,{children:[C[0],":",(0,n.jsx)("strong",{children:C.slice(1).join(":")})]}):(0,n.jsx)("strong",{children:e.provider});return(0,n.jsxs)("div",{className:"output-header",children:[(0,n.jsxs)("div",{className:"pills",children:[e.provider?(0,n.jsx)("div",{className:"provider",children:w}):null,(0,n.jsx)("div",{className:"summary",children:(0,n.jsxs)("div",{className:"highlight ".concat(f?"success":""),children:[(0,n.jsxs)("strong",{children:[g,"% passing"]})," (",S[s],"/",b.length," cases)"]})}),(null===(j=e.metrics)||void 0===j?void 0:j.namedScores)&&Object.keys(e.metrics.namedScores).length>0?(0,n.jsx)(ed,{lookup:e.metrics.namedScores}):null]}),(0,n.jsx)(eP,{className:"prompt-container",text:e.display,expandedText:e.raw,maxLength:t,resourceId:e.id}),Z,"failures"===r&&(0,n.jsx)(el.Z,{sx:{"& .MuiFormControlLabel-label":{fontSize:"0.75rem"}},control:(0,n.jsx)(m.Z,{checked:y,onChange:e=>u(v,e.target.checked)}),label:"Show failures"})]})},cell:e=>{var l,a;let i=F(e.row.index,s);return(0,n.jsx)(eI,{output:i,maxTextLength:t,rowIndex:e.row.index,promptIndex:s,onRating:y.bind(null,null!==(l=i.originalRowIndex)&&void 0!==l?l:e.row.index,null!==(a=i.originalPromptIndex)&&void 0!==a?a:s),firstOutput:M(e.row.index),showDiffs:"different"===r,searchText:c,showStats:d})}}))})],[O,v.prompts,S,b.length,E,o,d,N,R,t,u,r,c,F,M,y]),D=l.useMemo(()=>{let e=b.some(e=>e.description);return e?{accessorFn:e=>e.description||"",id:"description",header:()=>(0,n.jsx)("span",{className:"font-bold",children:"Description"}),cell:e=>(0,n.jsx)(eR,{text:String(e.getValue()),maxLength:t}),size:50}:null},[b,t]),U=l.useMemo(()=>{let e=[];return D&&e.push(D),e.push(...T,...L),e},[D,T,L]),[A,_]=l.useState({pageIndex:0,pageSize:50}),z=(0,en.b7)({data:w,columns:U,columnResizeMode:"onChange",getCoreRowModel:(0,es.sC)(),getPaginationRowModel:(0,es.G_)(),state:{columnVisibility:s,pagination:A}});return(0,n.jsxs)("div",{children:[(0,n.jsxs)("table",{className:"results-table firefox-fix ".concat(t<=25?"compact":""),style:{wordBreak:i},children:[(0,n.jsx)("thead",{children:z.getHeaderGroups().map(e=>(0,n.jsx)("tr",{className:"header",children:e.headers.map(e=>(0,n.jsxs)("th",{colSpan:e.colSpan,style:{width:e.getSize()},children:[e.isPlaceholder?null:(0,en.ie)(e.column.columnDef.header,e.getContext()),(0,n.jsx)("div",{onMouseDown:e.getResizeHandler(),onTouchStart:e.getResizeHandler(),className:"resizer ".concat(e.column.getIsResizing()?"isResizing":"")})]},e.id))},e.id))}),(0,n.jsx)("tbody",{children:z.getRowModel().rows.map((e,t)=>{let s=!1;return(0,n.jsx)("tr",{children:e.getVisibleCells().map(e=>{let l=e.column.id.startsWith("Variable")||"description"===e.column.id,a=!l&&!s;return a&&(s=!0),(0,n.jsx)("td",{style:{width:e.column.getSize()},className:"".concat(l?"variable":""," ").concat(0!==t||l?"":"first-prompt-row"," ").concat(a?"first-prompt-col":""),children:(0,en.ie)(e.column.columnDef.cell,e.getContext())},e.id)})},e.id)})})]}),z.getPageCount()>1&&(0,n.jsxs)(p.Z,{className:"pagination",sx:{display:"flex",alignItems:"center",gap:2},children:[(0,n.jsx)(h.Z,{onClick:()=>{_(e=>({...e,pageIndex:Math.max(e.pageIndex-1,0)})),window.scrollTo(0,0)},disabled:0===z.getState().pagination.pageIndex,variant:"contained",children:"Previous"}),(0,n.jsxs)(f.Z,{component:"span",sx:{display:"flex",alignItems:"center",gap:1},children:["Page",(0,n.jsx)(I.Z,{size:"small",type:"number",value:z.getState().pagination.pageIndex+1,onChange:e=>{let t=e.target.value?Number(e.target.value)-1:0;_(e=>({...e,pageIndex:Math.min(Math.max(t,0),z.getPageCount()-1)}))},InputProps:{style:{width:"60px",textAlign:"center"}},variant:"outlined"}),(0,n.jsxs)("span",{children:["of ",z.getPageCount()]})]}),(0,n.jsx)(h.Z,{onClick:()=>{_(e=>({...e,pageIndex:Math.min(e.pageIndex+1,z.getPageCount()-1)})),window.scrollTo(0,0)},disabled:z.getState().pagination.pageIndex+1>=z.getPageCount(),variant:"contained",children:"Next"}),(0,n.jsxs)(f.Z,{component:"span",sx:{display:"flex",alignItems:"center",gap:1},children:[(0,n.jsxs)(k.Z,{value:A.pageSize,onChange:e=>{_({pageIndex:0,pageSize:Number(e.target.value)}),window.scrollTo(0,0)},displayEmpty:!0,inputProps:{"aria-label":"Results per page"},size:"small",sx:{m:1,minWidth:80},children:[(0,n.jsx)(C.Z,{value:10,children:"10"}),(0,n.jsx)(C.Z,{value:50,children:"50"}),(0,n.jsx)(C.Z,{value:100,children:"100"}),(0,n.jsx)(C.Z,{value:500,children:"500"}),(0,n.jsx)(C.Z,{value:1e3,children:"1000"})]}),(0,n.jsx)("span",{children:"results per page"})]})]}),(0,n.jsx)(ew,{})]})}),eO=s(7175);function eT(e){let{open:t,onClose:a}=e,{config:i}=W(),r=l.useRef(null),[o,c]=l.useState(!1),[d,u]=l.useState("");l.useEffect(()=>{t&&(async()=>{let{default:e}=await Promise.resolve().then(s.bind(s,8339));u(e.dump(i))})()},[t,i]);let m=()=>{c(!1),a()};return(0,n.jsxs)(L.Z,{open:t,onClose:m,"aria-labelledby":"config-dialog-title",maxWidth:"md",fullWidth:!0,children:[(0,n.jsx)(V.Z,{id:"config-dialog-title",children:(0,n.jsxs)(p.Z,{display:"flex",justifyContent:"space-between",alignItems:"center",children:[(0,n.jsx)(f.Z,{variant:"h6",style:{flexGrow:1},children:"Config"}),(0,n.jsxs)(p.Z,{children:[(0,n.jsx)(P.Z,{title:"Copy to clipboard",children:(0,n.jsx)(H.Z,{onClick:()=>{r.current&&(r.current.select(),document.execCommand("copy"),c(!0))},children:o?(0,n.jsx)(eh.Z,{}):(0,n.jsx)(eO.Z,{})})}),(0,n.jsx)(P.Z,{title:"Download .yaml",children:(0,n.jsx)(H.Z,{onClick:()=>{let e=new Blob([d],{type:"text/yaml;charset=utf-8"}),t=URL.createObjectURL(e),s=document.createElement("a");s.href=t,s.download="config.yaml",document.body.appendChild(s),s.click(),document.body.removeChild(s),URL.revokeObjectURL(t)},children:(0,n.jsx)(M.Z,{})})})]})]})}),(0,n.jsx)(D.Z,{children:(0,n.jsx)(f.Z,{variant:"body1",component:"div",children:(0,n.jsx)("textarea",{ref:r,readOnly:!0,value:d,style:{width:"100%",minHeight:"400px",fontFamily:"monospace",border:"1px solid #ccc"}})})}),(0,n.jsx)(eo.Z,{children:(0,n.jsx)(h.Z,{onClick:m,color:"primary",children:"Close"})})]})}var eF=e=>{let{open:t,onClose:s,shareUrl:a}=e,i=(0,l.useRef)(null),[r,o]=(0,l.useState)(!1),c=()=>{s(),o(!1)};return(0,n.jsxs)(L.Z,{open:t,onClose:c,PaperProps:{style:{minWidth:"min(660px, 100%)"}},children:[(0,n.jsx)(V.Z,{children:"Your eval is ready to share"}),(0,n.jsxs)(D.Z,{children:[(0,n.jsx)(I.Z,{inputRef:i,value:a,fullWidth:!0,InputProps:{readOnly:!0,endAdornment:(0,n.jsx)(H.Z,{onClick:()=>{i.current&&(i.current.select(),document.execCommand("copy"),o(!0))},children:r?(0,n.jsx)(eh.Z,{}):(0,n.jsx)(eO.Z,{})})}}),(0,n.jsx)(eC.Z,{sx:{fontSize:"0.75rem"},children:"Shared URLs are deleted after 2 weeks."})]}),(0,n.jsx)(eo.Z,{children:(0,n.jsx)(h.Z,{onClick:c,color:"primary",children:"Close"})})]})},eM=s(33),eL=e=>{let{open:t,onClose:s}=e,{maxTextLength:l,setMaxTextLength:a,wordBreak:i,setWordBreak:r,showInferenceDetails:o,setShowInferenceDetails:c,renderMarkdown:d,setRenderMarkdown:u,prettifyJson:x,setPrettifyJson:j,showPrompts:g,setShowPrompts:v}=W();return(0,n.jsxs)(L.Z,{open:t,onClose:s,fullWidth:!0,maxWidth:"sm",children:[(0,n.jsx)(V.Z,{children:"Table View Settings"}),(0,n.jsxs)(D.Z,{children:[(0,n.jsx)(p.Z,{children:(0,n.jsx)(P.Z,{title:"Forcing line breaks makes it easier to adjust column widths to your liking",children:(0,n.jsx)(el.Z,{control:(0,n.jsx)(m.Z,{checked:"break-all"===i,onChange:e=>r(e.target.checked?"break-all":"break-word")}),label:"Force line breaks"})})}),(0,n.jsx)(p.Z,{children:(0,n.jsx)(el.Z,{control:(0,n.jsx)(m.Z,{checked:d,onChange:e=>u(e.target.checked)}),label:"Render model outputs as Markdown"})}),(0,n.jsx)(p.Z,{children:(0,n.jsx)(el.Z,{control:(0,n.jsx)(m.Z,{checked:x,onChange:e=>j(e.target.checked)}),label:"Prettify JSON outputs"})}),(0,n.jsx)(p.Z,{children:(0,n.jsx)(P.Z,{title:"Show the final prompt that produced the output in each cell.",children:(0,n.jsx)(el.Z,{control:(0,n.jsx)(m.Z,{checked:g,onChange:e=>v(e.target.checked)}),label:"Show full prompt in output cell"})})}),(0,n.jsx)(p.Z,{children:(0,n.jsx)(P.Z,{title:"Show detailed inference statistics such as latency, tokens used, cost, etc.",children:(0,n.jsx)(el.Z,{control:(0,n.jsx)(m.Z,{checked:o,onChange:e=>c(e.target.checked)}),label:"Show inference details"})})}),(0,n.jsxs)(p.Z,{maxWidth:"sm",children:[(0,n.jsxs)(f.Z,{mt:2,children:["Max text length: ",l]}),(0,n.jsx)(eM.ZP,{min:25,max:1e3,value:l,onChange:(e,t)=>a(t)})]})]}),(0,n.jsx)(eo.Z,{children:(0,n.jsx)(h.Z,{onClick:s,children:"Close"})})]})},eD=s(1938),eU=s(7580);let eA=eU.env.NEXT_PUBLIC_PROMPTFOO_REMOTE_API_BASE_URL||eU.env.NEXT_PUBLIC_PROMPTFOO_BASE_URL||eU.env.PROMPTFOO_REMOTE_API_BASE_URL||"https://api.promptfoo.dev",eW=eU.env.NEXT_PUBLIC_PROMPTFOO_BASE_URL||eU.env.PROMPTFOO_REMOTE_APP_BASE_URL||"https://app.promptfoo.dev";s(6911);let e_=(0,O.Z)(R.Z)(e=>{let{theme:t}=e;return{maxWidth:"100%",flexWrap:"wrap",[t.breakpoints.down("sm")]:{flexDirection:"column"}}});function ez(e){let{recentEvals:t,onRecentEvalSelected:s,defaultEvalId:r}=e,c=(0,o.useRouter)(),{table:R,config:O,setConfig:F,maxTextLength:M,wordBreak:L,showInferenceDetails:D,evalId:U}=W(),{setStateFromConfig:A}=(0,eD.o)(),[z,V]=l.useState({}),[B,J]=l.useState([]),[H,G]=l.useState(""),[X]=(0,T.Nr)(H,1e3),[Y,q]=l.useState({}),[$,K]=l.useState("all"),[ee,et]=l.useState(!1),[es,en]=l.useState(""),[el,ea]=l.useState(!1),ei=async()=>{ea(!0);try{let e=await fetch("".concat(eA,"/api/eval"),{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({data:{version:2,createdAt:new Date().toISOString(),results:{table:R},config:O}})}),{id:t}=await e.json(),s="".concat(eW,"/eval/").concat(t);en(s),et(!0)}catch(e){alert("Sorry, something went wrong.")}finally{ea(!1)}},[er,eo]=l.useState(!1),[ec,ed]=l.useState(!1);(0,a.Z)(R,"Table data must be loaded before rendering ResultsView");let{head:eu}=R,ep=async()=>{(0,a.Z)(O,"Config must be loaded before clicking its description");let e=window.prompt("Enter new description:",O.description);if(null!==e&&e!==O.description){let t={...O,description:e};try{let e=await fetch("".concat(await (0,ek.b)(),"/api/eval/").concat(U),{method:"PATCH",headers:{"Content-Type":"application/json"},body:JSON.stringify({config:t})});if(!e.ok)throw Error("Network response was not ok");F(t)}catch(e){console.error("Failed to update table:",e)}}},eh=async()=>{if(window.confirm("Are you sure you want to delete this evaluation?"))try{let e=await fetch("".concat(await (0,ek.b)(),"/api/eval/").concat(U),{method:"DELETE"});if(!e.ok)throw Error("Network response was not ok");c.push("/")}catch(e){console.error("Failed to delete evaluation:",e),alert("Failed to delete evaluation")}},em=[...eu.vars.map((e,t)=>({value:"Variable ".concat(t+1),label:"Var ".concat(t+1,": ").concat(eu.vars[t].length>100?eu.vars[t].slice(0,97)+"...":eu.vars[t]),group:"Variables"})),...eu.prompts.map((e,t)=>({value:"Prompt ".concat(t+1),label:"Prompt ".concat(t+1,": ").concat(eu.prompts[t].display.length>100?eu.prompts[t].display.slice(0,97)+"...":eu.prompts[t].display),group:"Prompts"}))];l.useEffect(()=>{J(em.map(e=>e.value))},[eu]);let[ex,ej]=l.useState(null);return(0,n.jsxs)("div",{style:{marginLeft:"1rem",marginRight:"1rem"},children:[(0,n.jsx)(p.Z,{mb:2,sx:{display:"flex",alignItems:"center"},children:(0,n.jsxs)(f.Z,{variant:"h5",sx:{flexGrow:1},children:[(0,n.jsx)("span",{className:"description",onClick:ep,children:(null==O?void 0:O.description)||U})," ",(null==O?void 0:O.description)&&(0,n.jsx)("span",{className:"description-filepath",children:U})]})}),(0,n.jsx)(p.Z,{py:"md",children:(0,n.jsxs)(e_,{direction:"row",spacing:4,alignItems:"center",children:[(0,n.jsx)(p.Z,{children:t&&t.length>0&&(0,n.jsx)(g.Z,{sx:{m:1,minWidth:200},size:"small",children:(0,n.jsx)(u.Z,{size:"small",options:t,renderOption:(e,t)=>(0,l.createElement)("li",{...e,key:t.id},t.label),style:{width:350},renderInput:e=>(0,n.jsx)(I.Z,{...e,label:"Eval run",variant:"outlined"}),defaultValue:t.find(e=>e.id===r)||t[0],onChange:(e,t)=>{t&&t.id&&s(t.id)},disableClearable:!0})})}),(0,n.jsx)(p.Z,{children:(0,n.jsxs)(g.Z,{sx:{m:1,minWidth:200,maxWidth:350},size:"small",children:[(0,n.jsx)(v.Z,{id:"visible-columns-label",children:"Columns"}),(0,n.jsx)(k.Z,{labelId:"visible-columns-label",id:"visible-columns",multiple:!0,value:B,onChange:e=>{let{target:{value:t}}=e;J("string"==typeof t?t.split(","):t);let s=[...eu.vars.map((e,t)=>"Variable ".concat(t+1)),...eu.prompts.map((e,t)=>"Prompt ".concat(t+1))],n={};s.forEach(e=>{n[e]=("string"==typeof t?t.split(","):t).includes(e)}),V(n)},input:(0,n.jsx)(w.Z,{label:"Visible columns"}),renderValue:e=>e.join(", "),children:em.map(e=>(0,n.jsxs)(C.Z,{dense:!0,value:e.value,children:[(0,n.jsx)(m.Z,{checked:B.indexOf(e.value)>-1}),(0,n.jsx)(b.Z,{primary:e.label})]},e.value))})]})}),(0,n.jsx)(p.Z,{children:(0,n.jsxs)(g.Z,{sx:{minWidth:180},size:"small",children:[(0,n.jsx)(v.Z,{id:"failure-filter-mode-label",children:"Display"}),(0,n.jsxs)(k.Z,{labelId:"filter-mode-label",id:"filter-mode",value:$,onChange:e=>{let t=e.target.value;K(t);let s={};eu.prompts.forEach((e,n)=>{s["Prompt ".concat(n+1)]="failures"===t}),q(s)},label:"Filter",children:[(0,n.jsx)(C.Z,{value:"all",children:"Show all results"}),(0,n.jsx)(C.Z,{value:"failures",children:"Show failures only"}),(0,n.jsx)(C.Z,{value:"different",children:"Show different only"})]})]})}),(0,n.jsx)(p.Z,{children:(0,n.jsx)(I.Z,{sx:{minWidth:180},size:"small",label:"Search",placeholder:"Text or regex",value:H,onChange:e=>{G(e.target.value)}})}),(0,n.jsx)(p.Z,{flexGrow:1}),(0,n.jsx)(p.Z,{display:"flex",justifyContent:"flex-end",children:(0,n.jsxs)(e_,{direction:"row",spacing:2,children:[(0,n.jsx)(h.Z,{color:"primary",onClick:e=>{ej(e.currentTarget)},startIcon:(0,n.jsx)(d.Z,{}),children:"Eval actions"}),O&&(0,n.jsxs)(Z.Z,{id:"eval-actions-menu",anchorEl:ex,keepMounted:!0,open:!!ex,onClose:()=>{ej(null)},children:[(0,n.jsx)(P.Z,{title:"View the configuration that defines this eval",placement:"left",children:(0,n.jsxs)(C.Z,{onClick:()=>eo(!0),children:[(0,n.jsx)(y.Z,{children:(0,n.jsx)(E.Z,{fontSize:"small"})}),"View YAML"]})}),(0,n.jsx)(P.Z,{title:"Edit this eval in the web UI",placement:"left",children:(0,n.jsxs)(C.Z,{onClick:()=>{A(O),c.push("/setup/")},children:[(0,n.jsx)(y.Z,{children:(0,n.jsx)(j.Z,{fontSize:"small"})}),"Edit Eval"]})}),(0,n.jsx)(_,{}),(null==O?void 0:O.sharing)&&(0,n.jsx)(P.Z,{title:"Generate a unique URL that others can access",placement:"left",children:(0,n.jsxs)(C.Z,{onClick:ei,disabled:el,children:[(0,n.jsx)(y.Z,{children:el?(0,n.jsx)(i.Z,{size:16}):(0,n.jsx)(N.Z,{fontSize:"small"})}),"Share"]})}),(0,n.jsx)(P.Z,{title:"Delete this eval",placement:"left",children:(0,n.jsxs)(C.Z,{onClick:eh,children:[(0,n.jsx)(y.Z,{children:(0,n.jsx)(x.Z,{fontSize:"small"})}),"Delete"]})})]}),(0,n.jsx)(P.Z,{title:"Edit table view settings",placement:"bottom",children:(0,n.jsx)(h.Z,{color:"primary",onClick:()=>ed(!0),startIcon:(0,n.jsx)(S.Z,{}),children:"Table Settings"})})]})})]})}),(0,n.jsx)(Q,{columnVisibility:z}),(0,n.jsx)(eE,{maxTextLength:M,columnVisibility:z,wordBreak:L,showStats:D,filterMode:$,failureFilter:Y,searchText:X,onFailureFilterToggle:(e,t)=>{q(s=>({...s,[e]:t}))}}),(0,n.jsx)(eT,{open:er,onClose:()=>eo(!1)}),(0,n.jsx)(eF,{open:ee,onClose:()=>et(!1),shareUrl:es}),(0,n.jsx)(eL,{open:ec,onClose:()=>ed(!1)})]})}var eV=s(279);async function eB(){let e=(0,c.createClientComponentClient)(),{data:{user:t}}=await e.auth.getUser();(0,a.Z)(t,"User not logged in");let{data:s,error:n}=await e.from("EvaluationResult").select("id, createdAt").eq("user_id",t.id).order("createdAt",{ascending:!1}).limit(100);return s||[]}async function eJ(e){let t=(0,c.createClientComponentClient)(),{data:s,error:n}=await t.from("EvaluationResult").select("*").eq("id",e).single();return s}function eH(e){var t;let{fetchId:s,preloadedData:c,recentEvals:d,defaultEvalId:u}=e,p=(0,o.useRouter)(),{table:h,setTable:m,setConfig:x,setEvalId:j}=W(),[g,f]=l.useState(!1),[v,b]=l.useState(!1),[y,Z]=l.useState(d||[]),C=async()=>{let e=await fetch("".concat(await (0,ek.b)(),"/api/results"),{cache:"no-store"}),t=await e.json();return Z(t.data),t.data},w=l.useCallback(async e=>{let t=await fetch("".concat(await (0,ek.b)(),"/api/results/").concat(e),{cache:"no-store"}),s=await t.json();m(s.data.results.table),x(s.data.config),j(e)},[m,x,j]),k=async e=>{eV.Ox?(f(!1),p.push("/eval/remote:".concat(encodeURIComponent(e)))):p.push("/eval/?evalId=".concat(encodeURIComponent(e)))},[S,N]=l.useState(u||(null===(t=y[0])||void 0===t?void 0:t.id)),R=(0,o.useSearchParams)(),I=R?R.get("evalId"):null;return(l.useEffect(()=>{if(I){let e=async()=>{await w(I),f(!0),N(I),C()};e()}else if(c){var e;m(null===(e=c.data.results)||void 0===e?void 0:e.table),x(c.data.config),f(!0)}else if(s){let e=async()=>{var e;let t="".concat(eA,"/api/eval/").concat(s);console.log("Fetching eval from remote server",t);let n=await fetch(t);if(!n.ok){b(!0);return}let l=await n.json();m(null===(e=l.data.results)||void 0===e?void 0:e.table),x(l.data.config),f(!0)};e()}else if(eV.T8)(0,ek.b)().then(e=>{let t=(0,r.io)(e);return t.on("init",e=>{console.log("Initialized socket connection",e),f(!0),m(null==e?void 0:e.results.table),x(null==e?void 0:e.config),C().then(e=>{var t,s;N(null===(t=e[0])||void 0===t?void 0:t.id),j(null===(s=e[0])||void 0===s?void 0:s.id)})}),t.on("update",e=>{console.log("Received data update",e),m(e.results.table),x(e.config),C().then(e=>{var t;let s=null===(t=e[0])||void 0===t?void 0:t.id;s&&(N(s),j(s))})}),()=>{t.disconnect()}});else if(eV.Ox)eB().then(e=>{Z(e.map(e=>({id:e.id,label:e.createdAt}))),e.length>0&&eJ(e[0].id).then(t=>{(0,a.Z)(t,"Eval not found");let s=t.results,n=t.config;N(e[0].id),m(s.table),x(n),f(!0)})});else{let e=async()=>{let e=await C();if(!(e.length>0))return(0,n.jsx)("div",{className:"notice",children:"No evals yet. Share some evals to this server and they will appear here."});{let t=await (0,ek.b)(),s=e[0].id,n=await fetch("".concat(t,"/api/results/").concat(s)),l=await n.json();m(l.data.results.table),x(l.data.config),f(!0),N(s),j(s)}};e()}},[s,m,x,j,w,c,N,I]),v)?(0,n.jsx)("div",{className:"notice",children:"404 Eval not found"}):g&&h?(0,n.jsx)(ez,{defaultEvalId:S,recentEvals:y,onRecentEvalSelected:k}):(0,n.jsxs)("div",{className:"notice",children:[(0,n.jsx)("div",{children:(0,n.jsx)(i.Z,{size:22})}),(0,n.jsx)("div",{children:"Loading eval data"})]})}s(5038)},279:function(e,t,s){"use strict";s.d(t,{Ox:function(){return a},T8:function(){return l},eA:function(){return i}});var n=s(7580);let l=!n.env.NEXT_PUBLIC_PROMPTFOO_BUILD_STANDALONE_SERVER,a=!!n.env.NEXT_PUBLIC_PROMPTFOO_USE_SUPABASE,i=""},1938:function(e,t,s){"use strict";s.d(t,{o:function(){return a}});var n=s(4660),l=s(4810);let a=(0,n.Ue)()((0,l.tJ)((e,t)=>({env:{},testCases:[],description:"",providers:[],prompts:[],defaultTest:{},evaluateOptions:{},setEnv:t=>e({env:t}),setTestCases:t=>e({testCases:t}),setDescription:t=>e({description:t}),setProviders:t=>e({providers:t}),setPrompts:t=>e({prompts:t}),setDefaultTest:t=>e({defaultTest:t}),setEvaluateOptions:t=>e({evaluateOptions:t}),setStateFromConfig:t=>{let s={};t.description&&(s.description=t.description||""),t.tests&&(s.testCases=t.tests),t.providers&&(s.providers=t.providers),t.prompts&&("string"==typeof t.prompts?s.prompts=[t.prompts]:Array.isArray(t.prompts)?s.prompts=t.prompts.filter(e=>!e.endsWith(".txt")&&!e.endsWith(".json")&&!e.endsWith(".yaml")):console.warn("Invalid prompts config",t.prompts)),e(s)},getTestSuite:()=>{let{description:e,testCases:s,providers:n,prompts:l,env:a}=t();return{env:a,description:e,providers:n,prompts:l,tests:s}}}),{name:"promptfoo",skipHydration:!0}))},8715:function(){},5038:function(){},2604:function(){},6911:function(){},5777:function(){}}]);