opik 1.11.12 → 1.11.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,14 +19,14 @@ ${n}`),n}isChatPrompt(o){try{let e=JSON.parse(o);return Array.isArray(e)&&e.leng
19
19
  `),(c[d].length>0||d<c.length-1)&&(l+=qS()+c[d]);return s.call(o==="stdout"?process.stdout:process.stderr,l,i,a)}return s.call(o==="stdout"?process.stdout:process.stderr,t,i,a)}}function mx(){var s;mb||(lx=typeof((s=process.stdout)==null?void 0:s.isTTY)=="boolean"&&process.stdout.isTTY,mb=process.stdout.write.bind(process.stdout),sx=process.stderr.write.bind(process.stderr),process.stdout.write=cx(mb,"stdout"),process.stderr.write=cx(sx,"stderr"));}function dx(s){var e;let o=(e=ec.get(s))!=null?e:[];return ec.delete(s),o}var eA=500,rA=3e5,ux=1e4,rc=class{constructor(o,e,t){this.shutdownRequested=false;this.heartbeatTimer=null;this.pollTimer=null;this.cancelledJobs=new Map;this.activeTasks=new Set;this.pollFailures=0;var i,a;this.api=o,this.runnerId=e,this.heartbeatIntervalMs=(i=t==null?void 0:t.heartbeatIntervalMs)!=null?i:5e3,this.backoffCapMs=(a=t==null?void 0:t.backoffCapMs)!=null?a:3e4;}start(){this.startHeartbeat(),this.startPolling();}shutdown(){this.shutdownRequested=true,this.heartbeatTimer&&(clearInterval(this.heartbeatTimer),this.heartbeatTimer=null),this.pollTimer&&(clearTimeout(this.pollTimer),this.pollTimer=null);}startHeartbeat(){let o=setInterval(()=>{this.heartbeatTick().catch(e=>{v.debug("Heartbeat tick error",{error:e});});},this.heartbeatIntervalMs);o.unref(),this.heartbeatTimer=o;}async heartbeatTick(){var o;if(!this.shutdownRequested)try{let t=(o=(await this.api.runners.heartbeat(this.runnerId)).cancelledJobIds)!=null?o:[],i=Date.now();for(let a of t)this.cancelledJobs.set(a,i);this.pruneCancelledJobs(i);}catch(e){if(e instanceof Ar){v.info("Runner deregistered (410), shutting down"),this.shutdown();return}v.debug("Heartbeat error",{error:e});}}startPolling(){this.pollTick(1e3);}pollTick(o){if(this.shutdownRequested)return;(async()=>{if(this.shutdownRequested)return;let t=o;try{let i=await this.api.runners.nextJob(this.runnerId);if(this.pollFailures=0,t=1e3,i===null){this.scheduleNextPoll(eA,t);return}this.spawnJob(i);}catch(i){if(this.pollFailures++,this.pollFailures===1){let n=i instanceof x?i.statusCode:void 0;v.warn("Unable to reach Opik server"+(n?` (API ${n})`:"")+". Retrying...",{error:i});}else v.debug("Poll error",{error:i});let a=this.jitteredBackoff(t);t=Math.min(t*2,this.backoffCapMs),this.scheduleNextPoll(a,t);return}this.scheduleNextPoll(0,t);})().catch(t=>{this.pollFailures++,this.pollFailures===1?v.warn("Unable to reach Opik server. Retrying...",{error:t}):v.debug("Poll tick error",{error:t}),this.scheduleNextPoll(this.jitteredBackoff(o),Math.min(o*2,this.backoffCapMs));});}scheduleNextPoll(o,e){if(this.shutdownRequested)return;let t=setTimeout(()=>this.pollTick(e),o);t.unref(),this.pollTimer=t;}spawnJob(o){let e=this.executeJob(o).finally(()=>{this.activeTasks.delete(e);});this.activeTasks.add(e);}async executeJob(o){var n,p;let e=(n=o.id)!=null?n:"",t=(p=o.agentName)!=null?p:"";if(this.cancelledJobs.has(e)){v.debug(`Skipping cancelled job ${e}`),this.cancelledJobs.delete(e);return}if(!Ft().get(t)){v.error(`Unknown agent '${t}' for job ${e}`),await this.reportJobResult(e,{status:"failed",error:`Unknown agent: ${t}`,traceId:o.traceId});return}let a=oe();await this.reportJobResult(e,{status:"running",traceId:a});try{let c=await this.invokeAgent(o,e,a);await Op().catch(l=>{v.debug("Flush error after job execution",{error:l});}),await this.sendJobLogs(e),await this.reportJobResult(e,{status:"completed",result:this.normalizeResult(c),traceId:a});}catch(c){await Op().catch(()=>{}),await this.sendJobLogs(e);let l=o.timeout,d=c instanceof Qa?`Job timed out after ${l}s`:c instanceof Error?`${c.name}: ${c.message}`:String(c);c instanceof Qa?v.warn(`Job ${e} timed out after ${l}s`):v.error(`Job ${e} failed: ${d}`),await this.reportJobResult(e,{status:"failed",error:d,traceId:a});}}async invokeAgent(o,e,t){var u,f;let i=(u=o.agentName)!=null?u:"",a=(f=o.inputs)!=null?f:{},n=o.maskId,p=o.blueprintName,c=Ft().get(i),l=c.params.map(h=>tA(a[h.name],h.type)),d=()=>qw({traceId:t,jobId:e},()=>n||p?Zp({blueprintName:p,maskId:n},()=>c.func(...l)):c.func(...l)),m=Promise.resolve(d()),g=o.timeout;return g&&g>0?Promise.race([m,new Promise((h,R)=>{setTimeout(()=>R(new Qa("Job timed out")),g*1e3).unref();})]):m}normalizeResult(o){return o!=null&&typeof o!="string"&&typeof o!="number"&&typeof o!="boolean"&&!Array.isArray(o)&&typeof o!="object"&&(o=String(o)),typeof o=="object"&&o!==null&&!Array.isArray(o)?o:{result:o}}async reportJobResult(o,e){try{await this.api.runners.reportJobResult(o,e);}catch(t){v.warn(`Failed to report result for job ${o}`,{error:t});}}async sendJobLogs(o){let e=dx(o);if(e.length!==0)try{await this.api.runners.appendJobLogs(o,{body:e});}catch{v.debug(`Failed to send logs for job ${o}`);}}pruneCancelledJobs(o){let e=o-rA;for(let[t,i]of this.cancelledJobs)i<=e&&this.cancelledJobs.delete(t);if(this.cancelledJobs.size>ux){let t=[...this.cancelledJobs.entries()].sort((a,n)=>a[1]-n[1]),i=t.length-ux;for(let a=0;a<i;a++)this.cancelledJobs.delete(t[a][0]);}}jitteredBackoff(o){return Math.min(o,this.backoffCapMs)*(.5+Math.random()*.5)}};function tA(s,o){if(s==null)return s;switch(o){case "boolean":return typeof s=="boolean"?s:Nt(String(s),"boolean");case "float":case "integer":{if(typeof s=="number")return s;let e=Nt(String(s),"float");if(typeof e=="number"&&Number.isNaN(e))throw new TypeError(`Cannot cast "${s}" to number`);return o==="integer"?Math.trunc(e):e}default:return typeof s=="string"?s:Array.isArray(s)||typeof s=="object"&&s!==null?JSON.stringify(s):String(s)}}var Qa=class extends Error{constructor(o){super(o),this.name="TimeoutError";}};var gx=false,fx=false;function ub(){process.env.OPIK_RUNNER_MODE==="true"&&(gx||(gx=true,oA().catch(s=>{v.error("Runner activation failed",{error:s});})));}async function oA(){var l,d;let s=(l=process.env.OPIK_RUNNER_ID)!=null?l:"",o=(d=process.env.OPIK_PROJECT_NAME)!=null?d:"";if(!s){v.error("OPIK_RUNNER_ID not set, cannot activate runner");return}iA(s,o),mx();let e=new cr,t=e.api;function i(m){return {description:m.docstring,language:"typescript",params:m.params.map(g=>({name:g.name,type:g.type})),timeout:0}}function a(m){let g=Ft(),u={};for(let[f,h]of g)u[f]=i(h);t.runners.registerAgents(s,{body:u}).catch(f=>{v.debug("Failed to sync agents after new registration",{error:f});});}await new Promise(m=>setImmediate(m));let n=Ft();if(n.size>0){let m={};for(let[g,u]of n)m[g]=i(u);try{await t.runners.registerAgents(s,{body:m});}catch{v.debug("Failed to register agents on startup");}}ox(a),v.info("Runner activated");let p=new rc(t,s);p.start();let c=()=>{fx=true,v.info("Received shutdown signal, stopping runner..."),p.shutdown(),e.flush().catch(()=>{}).finally(()=>process.exit(0));};process.once("SIGTERM",c),process.once("SIGINT",c),process.on("exit",()=>{fx||console.error(`
20
20
  Warning: The process exited without blocking. The runner needs the process to stay alive to process jobs.
21
21
  Use a server framework like express or fastify to keep the process running.
22
- `);});}function iA(s,o){let e=[" \u2800\u20DD","opik ",`runner: ${s}`];o&&e.push(` project: ${o}`),console.log(e.join("")),console.log();}var nA="track.decorator",Or=new AsyncLocalStorage,fb=()=>{let{span:s,trace:o}=Or.getStore()||{};if(!(!s||!o))return {span:s,trace:o}};function sA(s){return !!s&&(typeof s=="object"||typeof s=="function")&&typeof s.then=="function"}function pA({name:s,parentSpan:o,projectName:e,trace:t,type:i="llm"}){v.debug("Creating new span:",{name:s,parentSpan:o==null?void 0:o.data.id,projectName:e,type:i});let a=t;if(!a){let p=Ow();a=qp().trace({name:s,projectName:e,...p?{id:p}:{}});}let n=a.span({name:s,parentSpanId:o==null?void 0:o.data.id,projectName:e,type:i});return v.debug("Span created with ID:",n.data.id),{span:n,trace:a}}function cA({args:s,span:o,trace:e}){if(v.debug("Starting span execution:",{spanId:o.data.id,traceId:e==null?void 0:e.data.id}),s.length===0)return;let t={arguments:s};v.debug("Recording span input"),o.update({input:t}),e&&(v.debug("Recording trace input"),e.update({input:t}));}function hx({result:s,span:o,trace:e,enrichSpan:t}){v.debug("Recording successful execution:",{spanId:o.data.id,traceId:e==null?void 0:e.data.id});let i=typeof s=="object"?s:{result:s},a=new Date,n={endTime:a,output:i};if(t){let p=t(s);Object.assign(n,p);}o.update(n),e&&e.update({endTime:a,output:i});}function yx({span:s,error:o,trace:e}){var t,i;v.error("Recording execution error:",{spanId:s.data.id,traceId:e==null?void 0:e.data.id,error:o instanceof Error?{name:o.name,message:o.message,stack:o.stack}:o}),o instanceof Error&&s.update({errorInfo:{message:o.message,exceptionType:o.name,traceback:(t=o.stack)!=null?t:""}}),s.end(),e&&(e.update({errorInfo:{message:o.message,exceptionType:o.name,traceback:(i=o.stack)!=null?i:""}}),e.end());}function tc({name:s,projectName:o,type:e,enrichSpan:t}={},i){return function(...n){let p=Or.getStore(),{span:c,trace:l}=pA({name:s!=null?s:i.name||nA,parentSpan:p==null?void 0:p.span,projectName:o,trace:p==null?void 0:p.trace,type:e}),d=!p,m=this;return Or.run({span:c,trace:l},()=>{let g=d?l:void 0;try{cA({args:n,span:c,trace:g});let u=i.apply(m,n);return sA(u)?u.then(f=>(hx({span:c,result:f,trace:g,enrichSpan:t}),f),f=>{throw yx({span:c,error:f,trace:g}),f}):(hx({span:c,result:u,trace:g,enrichSpan:t}),u)}catch(u){throw yx({span:c,error:u,trace:g}),u}})}}function xr(s,o){if(typeof s=="function")return tc({},s);let e=s;if(o){let t=tc(e,o);return e.entrypoint&&lA(o,t,e),t}return function(...t){if(t.length===2&&typeof t[1]=="object"&&t[1]!==null&&"kind"in t[1]){let[n,p]=t;if(p.kind!=="method")throw new Error("track decorator is only applicable to methods");return tc(e,n)}let[,,i]=t;if(!i||typeof i.value!="function")throw new Error("track decorator can only be applied to methods");let a=i.value;return i.value=tc(e,a),i}}function lA(s,o,e){var l;let t=e.name||s.name;if(!t)throw new Error("entrypoint functions must have a name. Provide one via track({ name: '...' }) or use a named function.");let i=e.projectName||qp().config.projectName,a=(l=e.params)!=null?l:ix(s),n=new Set(["string","number","float","integer","boolean"]),p=a.filter(d=>!n.has(d.type));if(p.length>0){let d=p.map(m=>`${m.name} (${m.type})`);v.warn(`Could not resolve type for parameter(s) [${d.join(", ")}] in "${t}". These parameters will default to 'string' and cannot be modified via the UI. Consider using a supported type (string, number, boolean) or choosing a different entrypoint.`);}let c=a.map(d=>({...d,type:d.type==="number"?"float":d.type}));tx({func:o,name:t,project:i,params:c,docstring:""}),ub();}var gb=null;function qp(){return gb===null&&(gb=new cr),gb}function mA(s,o){if(s!=null)return o==="prompt"||o==="prompt_commit"?ib(s,o):s}var dA=new Set(["blueprintId","blueprintVersion","isFallback"]);function hb(s){let{values:o,fieldNames:e,blueprintId:t,blueprintVersion:i,isFallback:a,maskId:n}=s,p={...o};return Object.defineProperties(p,{blueprintId:{value:t,enumerable:false,writable:false},blueprintVersion:{value:i,enumerable:false,writable:false},isFallback:{value:a,enumerable:false,writable:false}}),new Proxy(p,{get(l,d){return typeof d!="string"||dA.has(d)||e.has(d)&&uA({blueprintId:t,blueprintVersion:i,maskId:n,fieldNames:e,values:o}),Reflect.get(l,d)}})}function uA(s){let o=fb();if(!o)return;let{blueprintId:e,blueprintVersion:t,maskId:i,fieldNames:a,values:n}=s,p={};for(let d of a){let m=n[d];if(m===void 0)continue;let g=Yp(m);p[d]={value:mA(m,g),type:g};}let c={_blueprint_id:e,blueprint_version:t,values:p};i!==void 0&&(c._mask_id=i);let l={agent_configuration:c};o.span.update({metadata:l}),o.trace.update({metadata:l});}var gA=300,fA=1e3,bx=100;function Rx(){let s=process.env.OPIK_CONFIG_TTL_SECONDS;if(s!==void 0){let o=parseInt(s,10);if(!isNaN(o))return o}return gA}var yb=class{constructor(o){this._blueprint=null;this._lastFetchMs=null;this._refreshCallback=null;this._ttlMs=o*1e3;}setRefreshCallback(o){this._refreshCallback===null&&(this._refreshCallback=o);}update(o){this._blueprint=o,this._lastFetchMs=Date.now();}getBlueprint(){return this._blueprint}isStale(){return this._lastFetchMs===null?true:Date.now()-this._lastFetchMs>=this._ttlMs}async tryBackgroundRefresh(){if(this._refreshCallback!==null)try{let o=await this._refreshCallback();o!==null&&this.update(o);}catch(o){v.debug("Background blueprint cache refresh failed",o);}}},bb=class{constructor(){this._entries=new Map;this._intervalHandle=null;this._refreshRunning=false;}getOrCreate(o,e,t,i=null){let a=`${o}::${e!=null?e:""}::${t!=null?t:""}::${i!=null?i:""}`,n=this._entries.get(a);return n||(n=new yb(Rx()),this._entries.set(a,n)),n}ensureRefreshTimerStarted(){if(this._intervalHandle!==null)return;let o=Math.max(Rx()*1e3,fA);this._intervalHandle=setInterval(()=>{this._refreshAllStale();},o),this._intervalHandle.unref();}async _refreshAllStale(){if(!this._refreshRunning){this._refreshRunning=true;try{let o=[...this._entries.values()].filter(e=>e.isStale());for(let e=0;e<o.length;e+=bx)await Promise.all(o.slice(e,e+bx).map(t=>t.tryBackgroundRefresh()));}finally{this._refreshRunning=false;}}}clear(){this._intervalHandle!==null&&(clearInterval(this._intervalHandle),this._intervalHandle=null),this._refreshRunning=false,this._entries.clear();}},Rb=new bb;function wx(s,o,e,t=null){return Rb.getOrCreate(s,o,e,t)}function wb(s,o,e,t,i,a=null){let n=Rb.getOrCreate(s,o,e,a);t!==null&&n.update(t),i!==null&&e===null&&(n.setRefreshCallback(i),Rb.ensureRefreshTimerStarted());}var pb=[],xx=false;var cr=class{constructor(o){this.displayTraceLog=(o,e)=>{if(e===this.lastProjectNameLogged||!this.config.apiUrl)return;let t=Tw(o,this.config.apiUrl);v.info(`Started logging traces to the "${e}" project at ${nn(t)}`),this.lastProjectNameLogged=e;};this.trace=o=>{v.debug("Creating new trace with data:",o);let e=this.resolveProjectName(o.projectName),t=new Tp({id:oe(),startTime:new Date,source:"sdk",...o,projectName:e},this);return this.traceBatchQueue.create(t.data),v.debug("Trace added to the queue with ID:",t.data.id),this.displayTraceLog(t.data.id,e),t};this.getDataset=async(o,e)=>{let t=this.resolveProjectName(e);v.debug(`Getting dataset with name "${o}"`);try{await this.datasetBatchQueue.flush();let i=await this.api.datasets.getDatasetByIdentifier({datasetName:o,projectName:t});return new Ke({...i,projectName:t},this)}catch(i){throw i instanceof x&&i.statusCode===404?new $r(o):i}};this.createDataset=async(o,e,t)=>{let i=this.resolveProjectName(t);v.debug(`Creating dataset with name "${o}"`);let a=new Ke({name:o,description:e,projectName:i},this);try{return this.datasetBatchQueue.create({name:a.name,description:a.description,id:a.id,projectName:i}),v.debug("Dataset added to the queue with name:",a.name),a}catch(n){throw v.error(`Failed to create dataset "${o}"`,{error:n}),new Error(`Error creating dataset "${o}": ${n}`)}};this.getOrCreateDataset=async(o,e,t)=>{v.debug(`Attempting to retrieve or create dataset with name: "${o}"`);try{return await this.getDataset(o,t)}catch(i){if(i instanceof $r)return v.info(`Dataset "${o}" not found. Proceeding to create a new one.`),this.createDataset(o,e,t);throw v.error(`Error retrieving dataset "${o}":`,i),i}};this.getDatasets=async(o=100,e)=>{let t=this.resolveProjectName(e);v.debug(`Getting all datasets (limit: ${o})`);try{await this.datasetBatchQueue.flush();let i;try{i=await this.getProjectIdByName(t);}catch{}let a=await this.api.datasets.findDatasets({size:o,...i&&{projectId:i}}),n=[];for(let p of a.content||[])n.push(new Ke({...p,projectName:t},this));return v.info(`Retrieved ${n.length} datasets`),n}catch(i){throw v.error("Failed to retrieve datasets",{error:i}),new Error("Failed to retrieve datasets")}};this.deleteDataset=async(o,e)=>{v.debug(`Deleting dataset with name "${o}"`);try{let t=await this.getDataset(o,e);if(!t.id)throw new Error(`Cannot delete dataset "${o}": ID not available`);this.datasetBatchQueue.delete(t.id);}catch(t){throw v.error(`Failed to delete dataset "${o}"`,{error:t}),new Error(`Failed to delete dataset "${o}": ${t}`)}};this.createTestSuite=async o=>{v.debug(`Creating test suite with name "${o.name}"`);let{TestSuite:e}=await import('./suite-VJR7SY57.js');return e.create(this,o)};this.getTestSuite=async(o,e)=>{let t=this.resolveProjectName(e);v.debug(`Getting test suite with name "${o}"`);let{TestSuite:i}=await import('./suite-VJR7SY57.js');return i.get(this,o,t)};this.getOrCreateTestSuite=async o=>{v.debug(`Attempting to retrieve or create test suite with name: "${o.name}"`);let{TestSuite:e}=await import('./suite-VJR7SY57.js');return e.getOrCreate(this,o)};this.deleteTestSuite=async(o,e)=>{let t=this.resolveProjectName(e);v.debug(`Deleting test suite with name "${o}"`);let{TestSuite:i}=await import('./suite-VJR7SY57.js');await i.delete(this,o,t);};this.getTestSuites=async(o=1e3,e)=>{var i;let t=this.resolveProjectName(e);v.debug(`Getting all test suites (limit: ${o})`);try{await this.datasetBatchQueue.flush();let a=await this.resolveProjectId(t),{TestSuite:n}=await import('./suite-VJR7SY57.js'),p=[],c=1,l=100;for(;p.length<o;){let m=(i=(await this.api.datasets.findDatasets({page:c,size:l,...a&&{projectId:a}})).content)!=null?i:[];if(m.length===0)break;for(let g of m){if(p.length>=o)break;g.type===Xc.EvaluationSuite&&p.push(new n(new Ke({...g,projectName:t},this),this));}c++;}return v.info(`Retrieved ${p.length} test suites`),p}catch(a){throw v.error("Failed to retrieve test suites",{error:a}),new Error("Failed to retrieve test suites")}};this.createTracesAnnotationQueue=async o=>this.createAnnotationQueueInternal(o,Rr);this.createThreadsAnnotationQueue=async o=>this.createAnnotationQueueInternal(o,wr);this.getTracesAnnotationQueue=async o=>this.fetchAnnotationQueueById(o,"trace",Rr);this.getThreadsAnnotationQueue=async o=>this.fetchAnnotationQueueById(o,"thread",wr);this.getTracesAnnotationQueues=async o=>(await this.getAnnotationQueuesByScope("trace",o)).map(t=>new Rr(t,this));this.getThreadsAnnotationQueues=async o=>(await this.getAnnotationQueuesByScope("thread",o)).map(t=>new wr(t,this));this.deleteTracesAnnotationQueue=async o=>this.deleteAnnotationQueueById(o,"traces");this.deleteThreadsAnnotationQueue=async o=>this.deleteAnnotationQueueById(o,"threads");this.createExperiment=async({datasetName:o,name:e,experimentConfig:t,prompts:i,type:a=qc.Regular,optimizationId:n,datasetVersionId:p,evaluationMethod:c,tags:l,projectName:d})=>{if(v.debug(`Creating experiment for dataset "${o}"`),!o)throw new Error("Dataset name is required to create an experiment");let[m,g]=Cw(t,i),u=this.resolveProjectName(d),f=oe(),h=new Qr({id:f,name:e,datasetName:o,prompts:i,tags:l,projectName:u},this);try{return await this.api.experiments.createExperiment({id:f,datasetName:o,name:e,metadata:m,promptVersions:g,type:a,optimizationId:n,datasetVersionId:p,tags:l,evaluationMethod:c,projectName:u}),v.debug("Experiment created with id:",f),h}catch(R){throw v.error(`Failed to create experiment for dataset "${o}"`,{error:R}),new Error(`Error creating experiment: ${R}`)}};this.updateExperiment=async(o,e)=>{if(!o)throw new Error("id is required to update an experiment");let{name:t,experimentConfig:i}=e;if(!t&&!i)throw new Error("At least one of 'name' or 'experimentConfig' must be provided to update an experiment");v.debug(`Updating experiment with ID "${o}"`);let a={};t!==void 0&&(a.name=t),i!==void 0&&(a.metadata=i);try{await this.api.experiments.updateExperiment(o,{body:a});}catch(n){throw v.error(`Failed to update experiment with ID "${o}"`,{error:n}),n}};this.getExperimentById=async o=>{var e,t;v.debug(`Getting experiment with ID "${o}"`);try{let i=await this.api.experiments.getExperimentById(o);return new Qr({id:i.id,name:i.name,datasetName:(e=i.datasetName)!=null?e:void 0,projectName:(t=i.projectName)!=null?t:void 0},this)}catch(i){throw i instanceof x&&i.statusCode===404?new Ha(`No experiment found with ID '${o}'`):(v.error(`Failed to get experiment with ID "${o}"`,{error:i}),i)}};this.getExperimentsByName=async(o,e)=>{let t=this.resolveProjectName(e);v.debug(`Getting experiments with name "${o}"`);try{let i=await this.api.experiments.streamExperiments({name:o,projectName:t});return (await Ge(i,H.ExperimentPublic)).map(n=>{var p,c;return new Qr({id:n.id,name:n.name,datasetName:(p=n.datasetName)!=null?p:void 0,projectName:(c=n.projectName)!=null?c:void 0},this)})}catch(i){throw v.error(`Failed to get experiments with name "${o}"`,{error:i}),i}};this.getExperiment=async(o,e)=>{v.debug(`Getting experiment with name "${o}"`);let t=await this.getExperimentsByName(o,e);if(t.length===0)throw new Ha(o);return t[0]};this.getDatasetExperiments=async(o,e=100,t)=>{var p,c;v.debug(`Getting experiments for dataset "${o}"`);let i=await this.getDataset(o,t),a=Math.min(100,e),n=[];try{let l=1;for(;n.length<e;){let d=await this.api.experiments.findExperiments({page:l,size:a,datasetId:i.id}),m=(p=d==null?void 0:d.content)!=null?p:[];if(m.length===0)break;let g=e-n.length,u=Math.min(m.length,g);for(let f=0;f<u;f++){let h=m[f];n.push(new Qr({id:h.id,name:h.name,datasetName:(c=h.datasetName)!=null?c:void 0},this));}if(u<m.length)break;l+=1;}return n}catch(l){throw v.error(`Failed to get experiments for dataset "${o}"`,{error:l}),l}};this.deleteExperiment=async o=>{v.debug(`Deleting experiment with ID "${o}"`);try{await this.api.experiments.deleteExperimentsById({ids:[o]});}catch(e){throw v.error(`Failed to delete experiment with ID "${o}"`,{error:e}),e}};this.createPromptInternal=async(o,e,t,i,a,n,p,c,l)=>{var d;v.debug(`Creating ${c}`,{name:o});try{let m=await Mw(this.api.prompts,o,this.api.requestOptions);a(m);let g=(d=i.type)!=null?d:we.MUSTACHE,u=Bw({prompt:e,metadata:i.metadata},m,g),f;if(u?(v.debug(`Creating new ${c} version`,{name:o}),f=await this.api.prompts.createPromptVersion({name:o,version:{template:e,metadata:i.metadata,type:g},templateStructure:t,projectName:l},this.api.requestOptions)):(v.debug(`Returning existing ${c} version`,{name:o}),f=m),!f.promptId)throw new Error("Invalid API response: missing promptId");let h=await this.api.prompts.getPromptById(f.promptId,this.api.requestOptions),R=n(h,f);return v.debug(`${c} created`,{name:o}),i.description||i.tags?await R.updateProperties({description:i.description,tags:i.tags}):R}catch(m){if(m instanceof x||m instanceof mr)return v.warn(`Failed to sync ${c} '${o}' with the backend. The prompt will work locally but is not persisted on the server. You can retry by calling .syncWithBackend().`,{error:m}),p();throw v.error(`Failed to create ${c}`,{name:o,error:m}),m}};this.createPrompt=async o=>{let e=this.resolveProjectName(o.projectName);return this.createPromptInternal(o.name,o.prompt,he.Text,o,()=>{},(t,i)=>Me.fromApiResponse(t,i,this,e),()=>{var t;return new Me({name:o.name,prompt:o.prompt,metadata:o.metadata,type:(t=o.type)!=null?t:we.MUSTACHE,description:o.description,tags:o.tags,synced:false,projectName:e},this)},"prompt",e)};this.createChatPrompt=async o=>{let e=this.resolveProjectName(o.projectName),t=JSON.stringify(o.messages);return this.createPromptInternal(o.name,t,he.Chat,o,i=>{if(i&&i.templateStructure&&i.templateStructure!==he.Chat)throw new Yr(o.name,i.templateStructure,he.Chat)},(i,a)=>Be.fromApiResponse(i,a,this,e),()=>{var i;return new Be({name:o.name,messages:structuredClone(o.messages),metadata:o.metadata,type:(i=o.type)!=null?i:we.MUSTACHE,description:o.description,tags:o.tags,synced:false,projectName:e},this)},"chat prompt",e)};this.getPrompt=async o=>{var e;v.debug("Getting prompt",o);try{let t=this.resolveProjectName(o.projectName),i={...o,projectName:t},a;try{a=await this.getProjectIdByName(t);}catch{}let p=(e=(await this.api.prompts.getPrompts({filters:JSON.stringify([{field:"name",operator:"=",value:o.name}]),size:1,...a&&{projectId:a}},this.api.requestOptions)).content)==null?void 0:e[0];if(!p)return v.debug("Prompt not found",{name:o.name}),null;let c=await this.api.prompts.retrievePromptVersion(i,this.api.requestOptions),l=c.templateStructure;if(l&&l!==he.Text)throw new Yr(o.name,l,he.Text);return Me.fromApiResponse(p,c,this,t)}catch(t){if(t instanceof x&&t.statusCode===404)return null;throw v.error("Failed to get prompt",{name:o.name,error:t}),t}};this.getChatPrompt=async o=>{var e;v.debug("Getting chat prompt",o);try{let t=this.resolveProjectName(o.projectName),i={...o,projectName:t},a;try{a=await this.getProjectIdByName(t);}catch{}let p=(e=(await this.api.prompts.getPrompts({filters:JSON.stringify([{field:"name",operator:"=",value:o.name}]),size:1,...a&&{projectId:a}},this.api.requestOptions)).content)==null?void 0:e[0];if(!p)return v.debug("Chat prompt not found",{name:o.name}),null;let c=await this.api.prompts.retrievePromptVersion(i,this.api.requestOptions),l=c.templateStructure;if(!l||l!==he.Chat)throw new Yr(o.name,l!=null?l:"undefined",he.Chat);return Be.fromApiResponse(p,c,this,t)}catch(t){if(t instanceof x&&t.statusCode===404)return null;throw v.error("Failed to get chat prompt",{name:o.name,error:t}),t}};this.searchPrompts=async o=>{var e;v.debug("Searching prompts",{filterString:o});try{let t;if(o){let c=Qe.forPrompts(o).getFilterExpressions();t=c?JSON.stringify(c):void 0;}let a=(e=(await this.api.prompts.getPrompts({filters:t,size:1e3},this.api.requestOptions)).content)!=null?e:[];return (await Promise.all(a.map(async p=>{if(!p.name)return null;try{let c=await this.api.prompts.retrievePromptVersion({name:p.name},this.api.requestOptions),l=c.templateStructure,d=this.resolveProjectName();return !l||l===he.Text?Me.fromApiResponse(p,c,this,d):l===he.Chat?Be.fromApiResponse(p,c,this,d):null}catch(c){return v.debug("Failed to get version for prompt",{name:p.name,error:c}),null}}))).filter(p=>p!==null)}catch(t){throw v.error("Failed to search prompts",{error:t}),t}};this.deletePrompts=async o=>{v.debug("Deleting prompts in batch",{count:o.length});try{await this.api.prompts.deletePromptsBatch({ids:o},this.api.requestOptions),v.info("Successfully deleted prompts",{count:o.length});}catch(e){throw v.error("Failed to delete prompts",{count:o.length,error:e}),e}};this.searchTraces=async o=>{let{exclude:e,...t}=o!=null?o:{};return this.executeSearch("traces",t,$w,(i,a,n,p,c)=>Jw(i,a,n,p,c,e))};this.searchThreads=async o=>this.executeSearch("threads",o!=null?o:{},Qw,Kw);this.searchSpans=async o=>{let{exclude:e,...t}=o!=null?o:{};return this.executeSearch("spans",t,Xw,(i,a,n,p,c)=>Yw(i,a,n,p,c,e))};this.flush=async o=>{var t;let e=(t=o==null?void 0:o.silent)!=null?t:false;v.debug("Starting flush operation");try{await this.traceBatchQueue.flush(),await this.spanBatchQueue.flush(),await this.traceFeedbackScoresBatchQueue.flush(),await this.spanFeedbackScoresBatchQueue.flush(),await this.datasetBatchQueue.flush(),e||v.info("Successfully flushed all data to Opik");}catch(i){v.error("Error during flush operation:",{error:i instanceof Error?i.message:i});}};this.createConfig=async(o,e)=>{var c,l;let t=(c=e==null?void 0:e.projectName)!=null?c:this.config.projectName;this._validatePromptProjects(o,t);let i=new Zr(t,this),a=ab(o),n=await i.getBlueprint(),p;if(n)p=await i.updateBlueprint({values:a,description:e==null?void 0:e.description});else try{p=await i.createBlueprint({values:a,description:e==null?void 0:e.description});}catch(d){if(d instanceof x&&d.statusCode===409)p=await i.updateBlueprint({values:a,description:e==null?void 0:e.description});else throw d}return (l=p.name)!=null?l:p.id};this.setConfigEnv=async o=>{var n;let e=(n=o.projectName)!=null?n:this.config.projectName,i=await new Zr(e,this).getBlueprint({name:o.version});if(!i)throw new sr(`No config version "${o.version}" found in project "${e}".`);let a=await this.api.projects.retrieveProject({name:e});if(!(a!=null&&a.id))throw new Error(`Project "${e}" not found`);await this.api.agentConfigs.createOrUpdateEnvs({projectId:a.id,envs:[{envName:o.env,blueprintId:i.id}]});};this.updatePromptVersionTags=async(o,e)=>{var t;v.debug("Updating prompt version tags",{count:o.length,options:e});try{await this.api.prompts.updatePromptVersions({ids:o,update:{tags:(t=e==null?void 0:e.tags)!=null?t:void 0},mergeTags:e==null?void 0:e.mergeTags},this.api.requestOptions),v.debug("Successfully updated prompt version tags",{count:o.length});}catch(i){throw v.error("Failed to update prompt version tags",{count:o.length,error:i}),i}};v.debug("Initializing OpikClient with config:",o),this.config=Gb(o);let e={apiKey:this.config.apiKey,environment:this.config.apiUrl,workspaceName:this.config.workspaceName};o!=null&&o.headers&&(v.debug("Initializing OpikClient with additional headers:",o==null?void 0:o.headers),e.requestOptions={headers:o==null?void 0:o.headers}),this.api=new Dp(e);let t=this.config.holdUntilFlush?1440*60*1e3:this.config.batchDelayMs;this.spanBatchQueue=new Ep(this.api,t),this.traceBatchQueue=new Cp(this.api,t),this.spanFeedbackScoresBatchQueue=new jp(this.api,t),this.traceFeedbackScoresBatchQueue=new Ip(this.api,t),this.datasetBatchQueue=new Up(this.api,t),pb.push(this);}resolveProjectName(o){return o!==void 0?o:(!xx&&this.config.projectName===Xt.projectName&&(xx=true,v.warn(`No project name configured. Traces are being logged to "Default Project".
22
+ `);});}function iA(s,o){let e=[" \u2800\u20DD","opik ",`runner: ${s}`];o&&e.push(` project: ${o}`),console.log(e.join("")),console.log();}var nA="track.decorator",Or=new AsyncLocalStorage,fb=()=>{let{span:s,trace:o}=Or.getStore()||{};if(!(!s||!o))return {span:s,trace:o}};function sA(s){return !!s&&(typeof s=="object"||typeof s=="function")&&typeof s.then=="function"}function pA({name:s,parentSpan:o,projectName:e,trace:t,type:i="llm"}){v.debug("Creating new span:",{name:s,parentSpan:o==null?void 0:o.data.id,projectName:e,type:i});let a=t;if(!a){let p=Ow();a=qp().trace({name:s,projectName:e,...p?{id:p}:{}});}let n=a.span({name:s,parentSpanId:o==null?void 0:o.data.id,projectName:e,type:i});return v.debug("Span created with ID:",n.data.id),{span:n,trace:a}}function cA({args:s,span:o,trace:e}){if(v.debug("Starting span execution:",{spanId:o.data.id,traceId:e==null?void 0:e.data.id}),s.length===0)return;let t={arguments:s};v.debug("Recording span input"),o.update({input:t}),e&&(v.debug("Recording trace input"),e.update({input:t}));}function hx({result:s,span:o,trace:e,enrichSpan:t}){v.debug("Recording successful execution:",{spanId:o.data.id,traceId:e==null?void 0:e.data.id});let i=typeof s=="object"?s:{result:s},a=new Date,n={endTime:a,output:i};if(t){let p=t(s);Object.assign(n,p);}o.update(n),e&&e.update({endTime:a,output:i});}function yx({span:s,error:o,trace:e}){var t,i;v.error("Recording execution error:",{spanId:s.data.id,traceId:e==null?void 0:e.data.id,error:o instanceof Error?{name:o.name,message:o.message,stack:o.stack}:o}),o instanceof Error&&s.update({errorInfo:{message:o.message,exceptionType:o.name,traceback:(t=o.stack)!=null?t:""}}),s.end(),e&&(e.update({errorInfo:{message:o.message,exceptionType:o.name,traceback:(i=o.stack)!=null?i:""}}),e.end());}function tc({name:s,projectName:o,type:e,enrichSpan:t}={},i){return function(...n){let p=Or.getStore(),{span:c,trace:l}=pA({name:s!=null?s:i.name||nA,parentSpan:p==null?void 0:p.span,projectName:o,trace:p==null?void 0:p.trace,type:e}),d=!p,m=this;return Or.run({span:c,trace:l},()=>{let g=d?l:void 0;try{cA({args:n,span:c,trace:g});let u=i.apply(m,n);return sA(u)?u.then(f=>(hx({span:c,result:f,trace:g,enrichSpan:t}),f),f=>{throw yx({span:c,error:f,trace:g}),f}):(hx({span:c,result:u,trace:g,enrichSpan:t}),u)}catch(u){throw yx({span:c,error:u,trace:g}),u}})}}function xr(s,o){if(typeof s=="function")return tc({},s);let e=s;if(o){let t=tc(e,o);return e.entrypoint&&lA(o,t,e),t}return function(...t){if(t.length===2&&typeof t[1]=="object"&&t[1]!==null&&"kind"in t[1]){let[n,p]=t;if(p.kind!=="method")throw new Error("track decorator is only applicable to methods");return tc(e,n)}let[,,i]=t;if(!i||typeof i.value!="function")throw new Error("track decorator can only be applied to methods");let a=i.value;return i.value=tc(e,a),i}}function lA(s,o,e){var l;let t=e.name||s.name;if(!t)throw new Error("entrypoint functions must have a name. Provide one via track({ name: '...' }) or use a named function.");let i=e.projectName||qp().config.projectName,a=(l=e.params)!=null?l:ix(s),n=new Set(["string","number","float","integer","boolean"]),p=a.filter(d=>!n.has(d.type));if(p.length>0){let d=p.map(m=>`${m.name} (${m.type})`);v.warn(`Could not resolve type for parameter(s) [${d.join(", ")}] in "${t}". These parameters will default to 'string' and cannot be modified via the UI. Consider using a supported type (string, number, boolean) or choosing a different entrypoint.`);}let c=a.map(d=>({...d,type:d.type==="number"?"float":d.type}));tx({func:o,name:t,project:i,params:c,docstring:""}),ub();}var gb=null;function qp(){return gb===null&&(gb=new cr),gb}function mA(s,o){if(s!=null)return o==="prompt"||o==="prompt_commit"?ib(s,o):s}var dA=new Set(["blueprintId","blueprintVersion","isFallback"]);function hb(s){let{values:o,fieldNames:e,blueprintId:t,blueprintVersion:i,isFallback:a,maskId:n}=s,p={...o};return Object.defineProperties(p,{blueprintId:{value:t,enumerable:false,writable:false},blueprintVersion:{value:i,enumerable:false,writable:false},isFallback:{value:a,enumerable:false,writable:false}}),new Proxy(p,{get(l,d){return typeof d!="string"||dA.has(d)||e.has(d)&&uA({blueprintId:t,blueprintVersion:i,maskId:n,fieldNames:e,values:o}),Reflect.get(l,d)}})}function uA(s){let o=fb();if(!o)return;let{blueprintId:e,blueprintVersion:t,maskId:i,fieldNames:a,values:n}=s,p={};for(let d of a){let m=n[d];if(m===void 0)continue;let g=Yp(m);p[d]={value:mA(m,g),type:g};}let c={_blueprint_id:e,blueprint_version:t,values:p};i!==void 0&&(c._mask_id=i);let l={agent_configuration:c};o.span.update({metadata:l}),o.trace.update({metadata:l});}var gA=300,fA=1e3,bx=100;function Rx(){let s=process.env.OPIK_CONFIG_TTL_SECONDS;if(s!==void 0){let o=parseInt(s,10);if(!isNaN(o))return o}return gA}var yb=class{constructor(o){this._blueprint=null;this._lastFetchMs=null;this._refreshCallback=null;this._ttlMs=o*1e3;}setRefreshCallback(o){this._refreshCallback===null&&(this._refreshCallback=o);}update(o){this._blueprint=o,this._lastFetchMs=Date.now();}getBlueprint(){return this._blueprint}isStale(){return this._lastFetchMs===null?true:Date.now()-this._lastFetchMs>=this._ttlMs}async tryBackgroundRefresh(){if(this._refreshCallback!==null)try{let o=await this._refreshCallback();o!==null&&this.update(o);}catch(o){v.debug("Background blueprint cache refresh failed",o);}}},bb=class{constructor(){this._entries=new Map;this._intervalHandle=null;this._refreshRunning=false;}getOrCreate(o,e,t,i=null){let a=`${o}::${e!=null?e:""}::${t!=null?t:""}::${i!=null?i:""}`,n=this._entries.get(a);return n||(n=new yb(Rx()),this._entries.set(a,n)),n}ensureRefreshTimerStarted(){if(this._intervalHandle!==null)return;let o=Math.max(Rx()*1e3,fA);this._intervalHandle=setInterval(()=>{this._refreshAllStale();},o),this._intervalHandle.unref();}async _refreshAllStale(){if(!this._refreshRunning){this._refreshRunning=true;try{let o=[...this._entries.values()].filter(e=>e.isStale());for(let e=0;e<o.length;e+=bx)await Promise.all(o.slice(e,e+bx).map(t=>t.tryBackgroundRefresh()));}finally{this._refreshRunning=false;}}}clear(){this._intervalHandle!==null&&(clearInterval(this._intervalHandle),this._intervalHandle=null),this._refreshRunning=false,this._entries.clear();}},Rb=new bb;function wx(s,o,e,t=null){return Rb.getOrCreate(s,o,e,t)}function wb(s,o,e,t,i,a=null){let n=Rb.getOrCreate(s,o,e,a);t!==null&&n.update(t),i!==null&&e===null&&(n.setRefreshCallback(i),Rb.ensureRefreshTimerStarted());}var pb=[],xx=false;var cr=class{constructor(o){this.displayTraceLog=(o,e)=>{if(e===this.lastProjectNameLogged||!this.config.apiUrl)return;let t=Tw(o,this.config.apiUrl);v.info(`Started logging traces to the "${e}" project at ${nn(t)}`),this.lastProjectNameLogged=e;};this.trace=o=>{v.debug("Creating new trace with data:",o);let e=this.resolveProjectName(o.projectName),t=new Tp({id:oe(),startTime:new Date,source:"sdk",...o,projectName:e},this);return this.traceBatchQueue.create(t.data),v.debug("Trace added to the queue with ID:",t.data.id),this.displayTraceLog(t.data.id,e),t};this.getDataset=async(o,e)=>{let t=this.resolveProjectName(e);v.debug(`Getting dataset with name "${o}"`);try{await this.datasetBatchQueue.flush();let i=await this.api.datasets.getDatasetByIdentifier({datasetName:o,projectName:t});return new Ke({...i,projectName:t},this)}catch(i){throw i instanceof x&&i.statusCode===404?new $r(o):i}};this.createDataset=async(o,e,t)=>{let i=this.resolveProjectName(t);v.debug(`Creating dataset with name "${o}"`);let a=new Ke({name:o,description:e,projectName:i},this);try{return this.datasetBatchQueue.create({name:a.name,description:a.description,id:a.id,projectName:i}),v.debug("Dataset added to the queue with name:",a.name),a}catch(n){throw v.error(`Failed to create dataset "${o}"`,{error:n}),new Error(`Error creating dataset "${o}": ${n}`)}};this.getOrCreateDataset=async(o,e,t)=>{v.debug(`Attempting to retrieve or create dataset with name: "${o}"`);try{return await this.getDataset(o,t)}catch(i){if(i instanceof $r)return v.info(`Dataset "${o}" not found. Proceeding to create a new one.`),this.createDataset(o,e,t);throw v.error(`Error retrieving dataset "${o}":`,i),i}};this.getDatasets=async(o=100,e)=>{let t=this.resolveProjectName(e);v.debug(`Getting all datasets (limit: ${o})`);try{await this.datasetBatchQueue.flush();let i;try{i=await this.getProjectIdByName(t);}catch{}let a=await this.api.datasets.findDatasets({size:o,...i&&{projectId:i}}),n=[];for(let p of a.content||[])n.push(new Ke({...p,projectName:t},this));return v.info(`Retrieved ${n.length} datasets`),n}catch(i){throw v.error("Failed to retrieve datasets",{error:i}),new Error("Failed to retrieve datasets")}};this.deleteDataset=async(o,e)=>{v.debug(`Deleting dataset with name "${o}"`);try{let t=await this.getDataset(o,e);if(!t.id)throw new Error(`Cannot delete dataset "${o}": ID not available`);this.datasetBatchQueue.delete(t.id);}catch(t){throw v.error(`Failed to delete dataset "${o}"`,{error:t}),new Error(`Failed to delete dataset "${o}": ${t}`)}};this.createTestSuite=async o=>{v.debug(`Creating test suite with name "${o.name}"`);let{TestSuite:e}=await import('./suite-6XFUV2Y7.js');return e.create(this,o)};this.getTestSuite=async(o,e)=>{let t=this.resolveProjectName(e);v.debug(`Getting test suite with name "${o}"`);let{TestSuite:i}=await import('./suite-6XFUV2Y7.js');return i.get(this,o,t)};this.getOrCreateTestSuite=async o=>{v.debug(`Attempting to retrieve or create test suite with name: "${o.name}"`);let{TestSuite:e}=await import('./suite-6XFUV2Y7.js');return e.getOrCreate(this,o)};this.deleteTestSuite=async(o,e)=>{let t=this.resolveProjectName(e);v.debug(`Deleting test suite with name "${o}"`);let{TestSuite:i}=await import('./suite-6XFUV2Y7.js');await i.delete(this,o,t);};this.getTestSuites=async(o=1e3,e)=>{var i;let t=this.resolveProjectName(e);v.debug(`Getting all test suites (limit: ${o})`);try{await this.datasetBatchQueue.flush();let a=await this.resolveProjectId(t),{TestSuite:n}=await import('./suite-6XFUV2Y7.js'),p=[],c=1,l=100;for(;p.length<o;){let m=(i=(await this.api.datasets.findDatasets({page:c,size:l,...a&&{projectId:a}})).content)!=null?i:[];if(m.length===0)break;for(let g of m){if(p.length>=o)break;g.type===Xc.EvaluationSuite&&p.push(new n(new Ke({...g,projectName:t},this),this));}c++;}return v.info(`Retrieved ${p.length} test suites`),p}catch(a){throw v.error("Failed to retrieve test suites",{error:a}),new Error("Failed to retrieve test suites")}};this.createTracesAnnotationQueue=async o=>this.createAnnotationQueueInternal(o,Rr);this.createThreadsAnnotationQueue=async o=>this.createAnnotationQueueInternal(o,wr);this.getTracesAnnotationQueue=async o=>this.fetchAnnotationQueueById(o,"trace",Rr);this.getThreadsAnnotationQueue=async o=>this.fetchAnnotationQueueById(o,"thread",wr);this.getTracesAnnotationQueues=async o=>(await this.getAnnotationQueuesByScope("trace",o)).map(t=>new Rr(t,this));this.getThreadsAnnotationQueues=async o=>(await this.getAnnotationQueuesByScope("thread",o)).map(t=>new wr(t,this));this.deleteTracesAnnotationQueue=async o=>this.deleteAnnotationQueueById(o,"traces");this.deleteThreadsAnnotationQueue=async o=>this.deleteAnnotationQueueById(o,"threads");this.createExperiment=async({datasetName:o,name:e,experimentConfig:t,prompts:i,type:a=qc.Regular,optimizationId:n,datasetVersionId:p,evaluationMethod:c,tags:l,projectName:d})=>{if(v.debug(`Creating experiment for dataset "${o}"`),!o)throw new Error("Dataset name is required to create an experiment");let[m,g]=Cw(t,i),u=this.resolveProjectName(d),f=oe(),h=new Qr({id:f,name:e,datasetName:o,prompts:i,tags:l,projectName:u},this);try{return await this.api.experiments.createExperiment({id:f,datasetName:o,name:e,metadata:m,promptVersions:g,type:a,optimizationId:n,datasetVersionId:p,tags:l,evaluationMethod:c,projectName:u}),v.debug("Experiment created with id:",f),h}catch(R){throw v.error(`Failed to create experiment for dataset "${o}"`,{error:R}),new Error(`Error creating experiment: ${R}`)}};this.updateExperiment=async(o,e)=>{if(!o)throw new Error("id is required to update an experiment");let{name:t,experimentConfig:i}=e;if(!t&&!i)throw new Error("At least one of 'name' or 'experimentConfig' must be provided to update an experiment");v.debug(`Updating experiment with ID "${o}"`);let a={};t!==void 0&&(a.name=t),i!==void 0&&(a.metadata=i);try{await this.api.experiments.updateExperiment(o,{body:a});}catch(n){throw v.error(`Failed to update experiment with ID "${o}"`,{error:n}),n}};this.getExperimentById=async o=>{var e,t;v.debug(`Getting experiment with ID "${o}"`);try{let i=await this.api.experiments.getExperimentById(o);return new Qr({id:i.id,name:i.name,datasetName:(e=i.datasetName)!=null?e:void 0,projectName:(t=i.projectName)!=null?t:void 0},this)}catch(i){throw i instanceof x&&i.statusCode===404?new Ha(`No experiment found with ID '${o}'`):(v.error(`Failed to get experiment with ID "${o}"`,{error:i}),i)}};this.getExperimentsByName=async(o,e)=>{let t=this.resolveProjectName(e);v.debug(`Getting experiments with name "${o}"`);try{let i=await this.api.experiments.streamExperiments({name:o,projectName:t});return (await Ge(i,H.ExperimentPublic)).map(n=>{var p,c;return new Qr({id:n.id,name:n.name,datasetName:(p=n.datasetName)!=null?p:void 0,projectName:(c=n.projectName)!=null?c:void 0},this)})}catch(i){throw v.error(`Failed to get experiments with name "${o}"`,{error:i}),i}};this.getExperiment=async(o,e)=>{v.debug(`Getting experiment with name "${o}"`);let t=await this.getExperimentsByName(o,e);if(t.length===0)throw new Ha(o);return t[0]};this.getDatasetExperiments=async(o,e=100,t)=>{var p,c;v.debug(`Getting experiments for dataset "${o}"`);let i=await this.getDataset(o,t),a=Math.min(100,e),n=[];try{let l=1;for(;n.length<e;){let d=await this.api.experiments.findExperiments({page:l,size:a,datasetId:i.id}),m=(p=d==null?void 0:d.content)!=null?p:[];if(m.length===0)break;let g=e-n.length,u=Math.min(m.length,g);for(let f=0;f<u;f++){let h=m[f];n.push(new Qr({id:h.id,name:h.name,datasetName:(c=h.datasetName)!=null?c:void 0},this));}if(u<m.length)break;l+=1;}return n}catch(l){throw v.error(`Failed to get experiments for dataset "${o}"`,{error:l}),l}};this.deleteExperiment=async o=>{v.debug(`Deleting experiment with ID "${o}"`);try{await this.api.experiments.deleteExperimentsById({ids:[o]});}catch(e){throw v.error(`Failed to delete experiment with ID "${o}"`,{error:e}),e}};this.createPromptInternal=async(o,e,t,i,a,n,p,c,l)=>{var d;v.debug(`Creating ${c}`,{name:o});try{let m=await Mw(this.api.prompts,o,this.api.requestOptions);a(m);let g=(d=i.type)!=null?d:we.MUSTACHE,u=Bw({prompt:e,metadata:i.metadata},m,g),f;if(u?(v.debug(`Creating new ${c} version`,{name:o}),f=await this.api.prompts.createPromptVersion({name:o,version:{template:e,metadata:i.metadata,type:g},templateStructure:t,projectName:l},this.api.requestOptions)):(v.debug(`Returning existing ${c} version`,{name:o}),f=m),!f.promptId)throw new Error("Invalid API response: missing promptId");let h=await this.api.prompts.getPromptById(f.promptId,this.api.requestOptions),R=n(h,f);return v.debug(`${c} created`,{name:o}),i.description||i.tags?await R.updateProperties({description:i.description,tags:i.tags}):R}catch(m){if(m instanceof x||m instanceof mr)return v.warn(`Failed to sync ${c} '${o}' with the backend. The prompt will work locally but is not persisted on the server. You can retry by calling .syncWithBackend().`,{error:m}),p();throw v.error(`Failed to create ${c}`,{name:o,error:m}),m}};this.createPrompt=async o=>{let e=this.resolveProjectName(o.projectName);return this.createPromptInternal(o.name,o.prompt,he.Text,o,()=>{},(t,i)=>Me.fromApiResponse(t,i,this,e),()=>{var t;return new Me({name:o.name,prompt:o.prompt,metadata:o.metadata,type:(t=o.type)!=null?t:we.MUSTACHE,description:o.description,tags:o.tags,synced:false,projectName:e},this)},"prompt",e)};this.createChatPrompt=async o=>{let e=this.resolveProjectName(o.projectName),t=JSON.stringify(o.messages);return this.createPromptInternal(o.name,t,he.Chat,o,i=>{if(i&&i.templateStructure&&i.templateStructure!==he.Chat)throw new Yr(o.name,i.templateStructure,he.Chat)},(i,a)=>Be.fromApiResponse(i,a,this,e),()=>{var i;return new Be({name:o.name,messages:structuredClone(o.messages),metadata:o.metadata,type:(i=o.type)!=null?i:we.MUSTACHE,description:o.description,tags:o.tags,synced:false,projectName:e},this)},"chat prompt",e)};this.getPrompt=async o=>{var e;v.debug("Getting prompt",o);try{let t=this.resolveProjectName(o.projectName),i={...o,projectName:t},a;try{a=await this.getProjectIdByName(t);}catch{}let p=(e=(await this.api.prompts.getPrompts({filters:JSON.stringify([{field:"name",operator:"=",value:o.name}]),size:1,...a&&{projectId:a}},this.api.requestOptions)).content)==null?void 0:e[0];if(!p)return v.debug("Prompt not found",{name:o.name}),null;let c=await this.api.prompts.retrievePromptVersion(i,this.api.requestOptions),l=c.templateStructure;if(l&&l!==he.Text)throw new Yr(o.name,l,he.Text);return Me.fromApiResponse(p,c,this,t)}catch(t){if(t instanceof x&&t.statusCode===404)return null;throw v.error("Failed to get prompt",{name:o.name,error:t}),t}};this.getChatPrompt=async o=>{var e;v.debug("Getting chat prompt",o);try{let t=this.resolveProjectName(o.projectName),i={...o,projectName:t},a;try{a=await this.getProjectIdByName(t);}catch{}let p=(e=(await this.api.prompts.getPrompts({filters:JSON.stringify([{field:"name",operator:"=",value:o.name}]),size:1,...a&&{projectId:a}},this.api.requestOptions)).content)==null?void 0:e[0];if(!p)return v.debug("Chat prompt not found",{name:o.name}),null;let c=await this.api.prompts.retrievePromptVersion(i,this.api.requestOptions),l=c.templateStructure;if(!l||l!==he.Chat)throw new Yr(o.name,l!=null?l:"undefined",he.Chat);return Be.fromApiResponse(p,c,this,t)}catch(t){if(t instanceof x&&t.statusCode===404)return null;throw v.error("Failed to get chat prompt",{name:o.name,error:t}),t}};this.searchPrompts=async o=>{var e;v.debug("Searching prompts",{filterString:o});try{let t;if(o){let c=Qe.forPrompts(o).getFilterExpressions();t=c?JSON.stringify(c):void 0;}let a=(e=(await this.api.prompts.getPrompts({filters:t,size:1e3},this.api.requestOptions)).content)!=null?e:[];return (await Promise.all(a.map(async p=>{if(!p.name)return null;try{let c=await this.api.prompts.retrievePromptVersion({name:p.name},this.api.requestOptions),l=c.templateStructure,d=this.resolveProjectName();return !l||l===he.Text?Me.fromApiResponse(p,c,this,d):l===he.Chat?Be.fromApiResponse(p,c,this,d):null}catch(c){return v.debug("Failed to get version for prompt",{name:p.name,error:c}),null}}))).filter(p=>p!==null)}catch(t){throw v.error("Failed to search prompts",{error:t}),t}};this.deletePrompts=async o=>{v.debug("Deleting prompts in batch",{count:o.length});try{await this.api.prompts.deletePromptsBatch({ids:o},this.api.requestOptions),v.info("Successfully deleted prompts",{count:o.length});}catch(e){throw v.error("Failed to delete prompts",{count:o.length,error:e}),e}};this.searchTraces=async o=>{let{exclude:e,...t}=o!=null?o:{};return this.executeSearch("traces",t,$w,(i,a,n,p,c)=>Jw(i,a,n,p,c,e))};this.searchThreads=async o=>this.executeSearch("threads",o!=null?o:{},Qw,Kw);this.searchSpans=async o=>{let{exclude:e,...t}=o!=null?o:{};return this.executeSearch("spans",t,Xw,(i,a,n,p,c)=>Yw(i,a,n,p,c,e))};this.flush=async o=>{var t;let e=(t=o==null?void 0:o.silent)!=null?t:false;v.debug("Starting flush operation");try{await this.traceBatchQueue.flush(),await this.spanBatchQueue.flush(),await this.traceFeedbackScoresBatchQueue.flush(),await this.spanFeedbackScoresBatchQueue.flush(),await this.datasetBatchQueue.flush(),e||v.info("Successfully flushed all data to Opik");}catch(i){v.error("Error during flush operation:",{error:i instanceof Error?i.message:i});}};this.createConfig=async(o,e)=>{var c,l;let t=(c=e==null?void 0:e.projectName)!=null?c:this.config.projectName;this._validatePromptProjects(o,t);let i=new Zr(t,this),a=ab(o),n=await i.getBlueprint(),p;if(n)p=await i.updateBlueprint({values:a,description:e==null?void 0:e.description});else try{p=await i.createBlueprint({values:a,description:e==null?void 0:e.description});}catch(d){if(d instanceof x&&d.statusCode===409)p=await i.updateBlueprint({values:a,description:e==null?void 0:e.description});else throw d}return (l=p.name)!=null?l:p.id};this.setConfigEnv=async o=>{var n;let e=(n=o.projectName)!=null?n:this.config.projectName,i=await new Zr(e,this).getBlueprint({name:o.version});if(!i)throw new sr(`No config version "${o.version}" found in project "${e}".`);let a=await this.api.projects.retrieveProject({name:e});if(!(a!=null&&a.id))throw new Error(`Project "${e}" not found`);await this.api.agentConfigs.createOrUpdateEnvs({projectId:a.id,envs:[{envName:o.env,blueprintId:i.id}]});};this.updatePromptVersionTags=async(o,e)=>{var t;v.debug("Updating prompt version tags",{count:o.length,options:e});try{await this.api.prompts.updatePromptVersions({ids:o,update:{tags:(t=e==null?void 0:e.tags)!=null?t:void 0},mergeTags:e==null?void 0:e.mergeTags},this.api.requestOptions),v.debug("Successfully updated prompt version tags",{count:o.length});}catch(i){throw v.error("Failed to update prompt version tags",{count:o.length,error:i}),i}};v.debug("Initializing OpikClient with config:",o),this.config=Gb(o);let e={apiKey:this.config.apiKey,environment:this.config.apiUrl,workspaceName:this.config.workspaceName};o!=null&&o.headers&&(v.debug("Initializing OpikClient with additional headers:",o==null?void 0:o.headers),e.requestOptions={headers:o==null?void 0:o.headers}),this.api=new Dp(e);let t=this.config.holdUntilFlush?1440*60*1e3:this.config.batchDelayMs;this.spanBatchQueue=new Ep(this.api,t),this.traceBatchQueue=new Cp(this.api,t),this.spanFeedbackScoresBatchQueue=new jp(this.api,t),this.traceFeedbackScoresBatchQueue=new Ip(this.api,t),this.datasetBatchQueue=new Up(this.api,t),pb.push(this);}resolveProjectName(o){return o!==void 0?o:(!xx&&this.config.projectName===Xt.projectName&&(xx=true,v.warn(`No project name configured. Traces are being logged to "Default Project".
23
23
  Set OPIK_PROJECT_NAME environment variable or pass projectName to the Opik client
24
24
  to log to a specific project.
25
25
  See https://www.comet.com/docs/opik/tracing/sdk_configuration`)),this.config.projectName)}async getProjectIdByName(o){let e=await this.api.projects.retrieveProject({name:o});if(!(e!=null&&e.id))throw new Error(`Project "${o}" not found`);return e.id}async resolveProjectId(o){if(o!==void 0)return this.getProjectIdByName(o)}async createAnnotationQueueInternal(o,e){let{name:t,projectName:i,description:a,instructions:n,commentsEnabled:p,feedbackDefinitionNames:c}=o,l=e.SCOPE;v.debug(`Creating ${l} annotation queue "${t}"`);let d=i!=null?i:this.config.projectName;try{let m=await this.getProjectIdByName(d),g=oe();return await this.api.annotationQueues.createAnnotationQueue({id:g,projectId:m,name:t,scope:l,description:a,instructions:n,commentsEnabled:p,feedbackDefinitionNames:c}),v.debug(`Created ${l} annotation queue "${t}" with ID "${g}"`),new e({id:g,name:t,projectId:m,scope:l,description:a,instructions:n,commentsEnabled:p,feedbackDefinitionNames:c},this)}catch(m){throw v.error(`Failed to create ${l} annotation queue "${t}"`,{error:m}),m}}async fetchAnnotationQueueById(o,e,t){v.debug(`Getting ${e} annotation queue with ID "${o}"`);try{let i=await this.api.annotationQueues.getAnnotationQueueById(o);if(i.scope!==e)throw new Error(`Annotation queue "${o}" is not a ${e} queue (scope: ${i.scope})`);return new t(i,this)}catch(i){if(i instanceof x){if(i.statusCode===404)throw new Bp(o);v.error(`Failed to get ${e} annotation queue with ID "${o}"`,{error:i});}throw i}}async getAnnotationQueuesByScope(o,e){let{projectName:t,maxResults:i=1e3}=e!=null?e:{};v.debug(`Getting ${o} annotation queues (project: ${t!=null?t:"all"}, limit: ${i})`);try{let a;if(t){let c=await this.getProjectIdByName(t);a=JSON.stringify([{field:"project_id",operator:"=",value:c},{field:"scope",operator:"=",value:o}]);}else a=JSON.stringify([{field:"scope",operator:"=",value:o}]);let p=(await this.api.annotationQueues.findAnnotationQueues({size:i,filters:a})).content||[];return v.info(`Retrieved ${p.length} ${o} annotation queues`),p}catch(a){throw v.error(`Failed to retrieve ${o} annotation queues`,{error:a}),a}}async deleteAnnotationQueueById(o,e){v.debug(`Deleting ${e} annotation queue with ID "${o}"`);try{await this.api.annotationQueues.deleteAnnotationQueueBatch({ids:[o]}),v.debug(`Successfully deleted ${e} annotation queue with ID "${o}"`);}catch(t){throw v.error(`Failed to delete ${e} annotation queue with ID "${o}"`,{error:t}),t}}async executeSearch(o,e,t,i){let{projectName:a,filterString:n,maxResults:p=1e3,truncate:c=true,waitForAtLeast:l,waitForTimeout:d=60}=e;v.debug(`Searching ${o}`,{projectName:a,filterString:n,maxResults:p,truncate:c,waitForAtLeast:l,waitForTimeout:d});let m=t(n),g=a!=null?a:this.config.projectName,u=()=>i(this.api,g,m,p,c);if(l===void 0)return await u();let f=await Gw(u,l,d*1e3,5e3);if(f.length<l)throw new Fp(`Timeout after ${d} seconds: expected ${l} ${o}, but only ${f.length} were found.`);return f}logFeedbackScores(o,e){var t;for(let i of o)e.create({...i,projectName:(t=i.projectName)!=null?t:this.config.projectName,source:el.Sdk});}logTracesFeedbackScores(o){this.logFeedbackScores(o,this.traceFeedbackScoresBatchQueue);}logSpansFeedbackScores(o){this.logFeedbackScores(o,this.spanFeedbackScoresBatchQueue);}getOrCreateConfig(o){return this._getOrCreateConfigImpl(o)}_validatePromptProjects(o,e){for(let[t,i]of Object.entries(o))if(i instanceof $e&&i.projectName!==void 0&&i.projectName!==e)throw new Et(`Field "${t}": prompt project "${i.projectName}" does not match config project "${e}". All prompts referenced in a config must belong to the same project as the config.`)}_makeFallbackConfig(o,e){return hb({values:o,fieldNames:new Set(Object.keys(o)),blueprintId:void 0,blueprintVersion:void 0,isFallback:true,maskId:e})}async _fetchBlueprintFromBackend(o,e){let{blueprintName:t,isLatest:i,hasNamedVersion:a,namedVersion:n,effectiveEnv:p,maskId:c,projectName:l,effectiveVersion:d,fallback:m}=e,g=wx(l,p,c!=null?c:null,d);if(!g.isStale())return g.getBlueprint();let u=null;try{t?u=await o.getBlueprint({name:t,maskId:c}):i?u=await o.getBlueprint({maskId:c}):a?u=await o.getBlueprint({name:n,maskId:c}):u=await o.getBlueprint({env:p,maskId:c});}catch(h){if(m!==void 0)return v.debug("Failed to fetch config from backend, using fallback",{error:h}),this._makeFallbackConfig(m,c);throw h}let f=c===void 0&&!a?i?()=>o.getBlueprint({maskId:void 0}):()=>o.getBlueprint({env:p,maskId:void 0}):null;return wb(l,p,c!=null?c:null,u,f,d),u}async _resolveNullBlueprint(o,e){let{projectName:t,effectiveEnv:i,effectiveVersion:a,maskId:n,hasNamedVersion:p,hasExplicitEnv:c,isExplicitBlueprintFromContext:l,isLatest:d,fallback:m}=e;if(p||c||l)throw new sr(`No config found for project "${t}" with the specified selector`);if(!d){let u=null;try{u=await o.getBlueprint({maskId:void 0});}catch(f){if(m!==void 0)return v.debug("Failed to probe project-wide config, using fallback",{error:f}),this._makeFallbackConfig(m,n);throw f}if(u!==null)throw new sr(`No config tagged with env="prod" in project "${t}", but other configs exist. Use setConfigEnv() to tag a version, or pass an explicit env/version.`)}if(m===void 0)throw new sr(`No config found in project "${t}". Pass a fallback to auto-create one.`);this._validatePromptProjects(m,t);let g;try{g=await o.createBlueprint({values:ab(m)});}catch(u){if(u instanceof x&&u.statusCode===409){let f=await o.getBlueprint({maskId:void 0});if(!f)throw new sr(`Failed to create or fetch config in project "${t}".`);g=f;}else throw u}return wb(t,i,n!=null?n:null,g,null,a),g}_buildConfigFromBlueprint(o,e,t){var n;let i=Object.fromEntries(o.keys().map(p=>[p,o.getRawEntry(p)]));if(e!==void 0){let p=Object.keys(e).filter(c=>i[c]===void 0);if(p.length>0){let c=(n=o.name)!=null?n:o.id;throw new Et(`Config version "${c}" is missing expected field(s): ${p.join(", ")}. The retrieved version does not contain all fields declared in the fallback.`)}}let a=Zw(i,o.values,e!==void 0?Object.keys(e):void 0);return hb({values:a,fieldNames:new Set(Object.keys(e!=null?e:a)),blueprintId:o.id,blueprintVersion:o.name,isFallback:false,maskId:t})}async _getOrCreateConfigImpl(o){var f,h,R,_;if(!Or.getStore())throw new Error("getOrCreateConfig() must be called inside a track() function");if((o==null?void 0:o.version)!==void 0&&(o==null?void 0:o.env)!==void 0)throw new Error("Only one of 'version' or 'env' may be specified in getOrCreateConfig().");let e=o==null?void 0:o.fallback,t=(f=o==null?void 0:o.projectName)!=null?f:this.config.projectName,i=(h=sb())!=null?h:void 0,a=(R=ax())!=null?R:void 0,n=a!==void 0,p=new Zr(t,this),c=(o==null?void 0:o.version)==="latest",l=(o==null?void 0:o.version)!==void 0&&!c,d=o!=null&&o.version?null:(_=o==null?void 0:o.env)!=null?_:"prod",m=a!=null?a:l?o.version:null,g=await this._fetchBlueprintFromBackend(p,{blueprintName:a,isLatest:c,hasNamedVersion:l,namedVersion:o==null?void 0:o.version,effectiveEnv:d,maskId:i,projectName:t,effectiveVersion:m,fallback:e});if(g!==null&&!(g instanceof Ye))return g;let u=g;if(!u){let E=await this._resolveNullBlueprint(p,{projectName:t,effectiveEnv:d,effectiveVersion:m,maskId:i,hasNamedVersion:l,hasExplicitEnv:(o==null?void 0:o.env)!==void 0,isExplicitBlueprintFromContext:n,isLatest:c,fallback:e});if(!(E instanceof Ye))return E;u=E;}return this._buildConfigFromBlueprint(u,e,i)}};function _x(s,o){if(!o||typeof o!="object")throw new Error("Arguments must be an object");let e=s.validationSchema,i=e.extend(Object.fromEntries(Object.entries(e.shape).map(([a,n])=>[a,n.refine(p=>p!==void 0,{message:`${a} cannot be undefined`,path:[a]})]))).safeParse(o);if(!i.success){let a=i.error.issues.map(p=>p.path[0]).filter(Boolean),n=[...new Set(a)];throw new Error(hA(s,o,n))}}function hA(s,o,e){let t=Object.keys(o),i=e.filter(a=>!(a in o));return `Metric '${s.name}' is skipped, missing required arguments: ${i.join(", ")}. Available arguments: ${t.join(", ")}.`}var ic=class{static calculateAverageScores(o){if(!o||o.length===0)return new Map;let e=new Map;for(let i of o)if(!(!i||!i.scoreResults||i.scoreResults.length===0))for(let a of i.scoreResults){if(!a||a.scoringFailed||typeof a.value!="number")continue;let n=e.get(a.name)||{sum:0,count:0};n.sum+=a.value,n.count+=1,e.set(a.name,n);}let t=new Map;return e.forEach((i,a)=>{t.set(a,i.count>0?i.sum/i.count:0);}),t}static formatScore(o){return o.toFixed(4)}static formatTime(o){let e=Math.floor(o/3600),t=Math.floor(o%3600/60),i=Math.floor(o%60);return `${e.toString().padStart(2,"0")}:${t.toString().padStart(2,"0")}:${i.toString().padStart(2,"0")}`}static async generateResultTable(o,e,t,i,a){if(o.length===0){v.info(`
26
26
  No test results available to display.`);return}let n=[...t.keys()].sort(),p=this.formatTime(i),c=[];if(a&&c.push(oc.bold.cyan(nn(a,"View results in Opik dashboard")),""),c.push(oc.bold(`Total time: ${p}`),oc.bold(`Number of samples: ${o.length}`)),n.length>0){c.push("");for(let g of n){let u=this.formatScore(t.get(g)||0);c.push(oc.green(`${g}: ${u} (avg)`));}}let l=c.join(`
27
27
  `),d=await e.ensureNameLoaded(),m=yA(l,{title:`${d} (${o.length} samples)`,titleAlignment:"left",padding:1,margin:0,borderColor:"cyan",borderStyle:"round"});v.info(`
28
28
  `+m+`
29
- `);}static async processResults(o,e,t=0,i=[]){let a=this.calculateAverageScores(o),n;try{n=await e.getUrl();}catch{v.debug("Could not resolve experiment URL, skipping dashboard link");}await this.generateResultTable(o,e,a,t,n);let p=await e.ensureNameLoaded();return {experimentId:e.id,experimentName:p,testResults:o,errors:i}}};function kx(s,o,e){if(!s||typeof o!="string"||o.trim()==="")return e;let t=o.replace(/\[(\w+)\]/g,".$1").replace(/^\./,"").split("."),i=s;for(let a of t)if(typeof i=="object"&&i!==null&&a in i)i=i[a];else return e;return i===void 0?e:i}var zx,xb;zx=[xr({name:"metrics_calculation",type:Xe.General})];var _r=class{constructor(o,e,t){Bb(xb,5,this);this.client=void 0;this.dataset=void 0;this.task=void 0;this.scoringMetrics=void 0;this.projectName=void 0;this.nbSamples=void 0;this.scoringKeyMapping=void 0;this.experiment=void 0;this.suiteMode=void 0;this.executionPolicy=void 0;this.prefetchedItems=void 0;this.itemMetricsMap=void 0;this.itemPolicyMap=void 0;var i;this.client=e,this.dataset=o.dataset,this.experiment=t,this.task=o.task,this.scoringMetrics=o.scoringMetrics||[],this.projectName=o.projectName,this.nbSamples=o.nbSamples,this.scoringKeyMapping=o.scoringKeyMapping,this.suiteMode=(i=o.suiteMode)!=null?i:false,this.executionPolicy=o.executionPolicy,this.prefetchedItems=o.prefetchedItems,this.itemMetricsMap=o.itemMetricsMap,this.itemPolicyMap=o.itemPolicyMap;}async execute(){let o=await this.getDatasetItems(),e=this.calculateTotalRuns(o),t=this.createProgressTracker(o.length,e),i=performance.now();try{let a=[],n=[],p=[],c=0;for(let d=0;d<o.length;d++){let m=o[d],g=this.getRunsPerItem(m),u=this.getItemMetrics(m);for(let f=0;f<g;f++){try{let h=await this.executeItemRun(m,u,f,p);a.push(h);}catch(h){let R=h instanceof Error?h.message:String(h);n.push({datasetItemId:m.id,runIndex:f,message:R,...h instanceof Error&&{error:h}}),t.recordFailure();}c++;}t.update(c,d);}this.experiment.insert(p),await this.client.flush();let l=(performance.now()-i)/1e3;return t.complete(l),t.reportErrors(n),ic.processResults(a,this.experiment,l,n)}finally{t.restoreLogLevel();}}async getDatasetItems(){var o;return (o=this.prefetchedItems)!=null?o:await this.dataset.getItems(this.nbSamples)}calculateTotalRuns(o){var t,i;let e=(i=(t=this.executionPolicy)==null?void 0:t.runsPerItem)!=null?i:1;return this.itemPolicyMap?o.reduce((a,n)=>{var c;let p=this.itemPolicyMap.get(n.id);return a+((c=p==null?void 0:p.runsPerItem)!=null?c:e)},0):o.length*e}getRunsPerItem(o){var e,t,i,a,n;return (n=(a=(t=(e=this.itemPolicyMap)==null?void 0:e.get(o.id))==null?void 0:t.runsPerItem)!=null?a:(i=this.executionPolicy)==null?void 0:i.runsPerItem)!=null?n:1}getItemMetrics(o){var e;return (e=this.itemMetricsMap)==null?void 0:e.get(o.id)}createProgressTracker(o,e){let t=v.settings.minLevel;v.settings.minLevel=6;let i=0,a=this.suiteMode?`Evaluating dataset (0/${e} runs across ${o} items)`:`Evaluating dataset (0/${o} items)`,n=bA({text:a}).start(),p=()=>i>0?`, ${i} failed`:"";return {update:(c,l)=>{n.text=this.suiteMode?`Evaluating dataset (${c}/${e} runs across ${o} items, ${Math.round(c/e*100)}%${p()})`:`Evaluating dataset (${l+1}/${o} items, ${Math.round((l+1)/o*100)}%${p()})`;},complete:c=>{let l=this.suiteMode?`Evaluation complete: ${e} runs across ${o} items processed in ${c.toFixed(2)}s`:`Evaluation complete: ${o} items processed in ${c.toFixed(2)}s`;i>0?n.warn(`${l} (${i} failed)`):n.succeed(l);},recordFailure:()=>{i++;},reportErrors:c=>{for(let l of c)v.error(`Dataset item ${l.datasetItemId} (run ${l.runIndex}): ${l.message}`);},restoreLogLevel:()=>{v.settings.minLevel=t;}}}async executeItemRun(o,e,t,i){var n,p;let a=this.client.trace({projectName:this.projectName,name:"evaluation_task",createdBy:"evaluation",source:"experiment",input:o});Or.enterWith({trace:a});try{let c=await this.executeTask(o,e,a);if(this.suiteMode){c.trialId=t;let l=(n=this.itemPolicyMap)==null?void 0:n.get(o.id);l&&(c.resolvedExecutionPolicy=l);}return a.update({output:c.testCase.taskOutput,endTime:new Date}),c}catch(c){throw c instanceof Error&&a.update({errorInfo:{message:c.message,exceptionType:c.name,traceback:(p=c.stack)!=null?p:""},endTime:new Date}),c}finally{i.push(new Jp({datasetItemId:o.id,traceId:a.data.id,projectName:a.data.projectName}));}}async executeTask(o,e,t){let i={},a=[];v.debug(`Starting evaluation task on dataset item ${o.id}`),i=await xr({name:"llm_task",type:Xe.General},this.task)(o),v.debug(`Finished evaluation task on dataset item ${o.id}`);let n=this.prepareScoringInputs(o,i),p={traceId:t.data.id,datasetItemId:o.id,scoringInputs:n,taskOutput:i},c=e!=null?e:this.scoringMetrics;return c.length>0?this.calculateScores(p,c,t):{testCase:p,scoreResults:a}}async calculateScores(o,e,t){let i=[],{scoringInputs:a}=o,n=e!=null?e:this.scoringMetrics;for(let p of n){v.debug(`Calculating score for metric ${p.name}`);try{_x(p,a);let c=await p.score(a),l=Array.isArray(c)?c:[c];i.push(...l);}catch(c){let l=c instanceof Error?c.message:String(c);v.error(`Metric ${p.name} failed: ${l}`);}v.debug(`Finished calculating score for metric ${p.name}`);}return i.forEach(p=>t.score({name:p.name,value:p.value,reason:p.reason,categoryName:p.categoryName})),{testCase:o,scoreResults:i}}prepareScoringInputs(o,e){let t={...o,...e};if(!this.scoringKeyMapping)return t;let i={...t};for(let[a,n]of Object.entries(this.scoringKeyMapping)){let p=kx(t,n);p!==void 0&&(i[a]=p);}return i}};xb=Fb(),Wb(xb,1,"calculateScores",zx,_r),Ac(xb,_r);async function Px(s){var i;if(!s.dataset)throw new Error("Dataset is required for evaluation");if(!s.task)throw new Error("Task function is required for evaluation");let o=(i=s.client)!=null?i:Mt.getInstance(),e=await s.dataset.getVersionInfo(),t=await o.createExperiment({name:s.experimentName,datasetName:s.dataset.name,experimentConfig:s.experimentConfig,prompts:s.prompts,datasetVersionId:e==null?void 0:e.id,tags:s.tags,projectName:s.projectName});try{let a=new _r(s,o,t);return v.info("Starting evaluation"),a.execute()}catch(a){throw v.error(`Error during evaluation: ${a}`),a}}var qr=class{constructor(o){this.modelName=o;}};var Ya=class s extends Error{constructor(o){super(o),this.name="ModelError",Error.captureStackTrace&&Error.captureStackTrace(this,s);}},Bt=class s extends Ya{constructor(e,t){super(e);this.cause=t;this.name="ModelGenerationError",Error.captureStackTrace&&Error.captureStackTrace(this,s);}},kr=class s extends Ya{constructor(o){super(o),this.name="ModelConfigurationError",Error.captureStackTrace&&Error.captureStackTrace(this,s);}};function _A(s,o){return s.startsWith("gpt-")||s.startsWith("o1")||s.startsWith("o3")||s.startsWith("chatgpt-")}function kA(s,o){return s.startsWith("claude-")}function zA(s,o){return s.startsWith("gemini-")||s.startsWith("gemma-")}function _b(s,o,e){if(!o)throw new kr(`API key for ${s} is not configured. Please provide it via the 'apiKey' option or set the ${e} environment variable.`)}function kb(s,o){if(_A(s)){let e=(o==null?void 0:o.apiKey)||process.env.OPENAI_API_KEY;_b("OpenAI",e,"OPENAI_API_KEY");let t=(o==null?void 0:o.organization)||process.env.OPENAI_ORG_ID;return createOpenAI({...o,apiKey:e,...t&&{organization:t}})(s)}if(kA(s)){let e=(o==null?void 0:o.apiKey)||process.env.ANTHROPIC_API_KEY;return _b("Anthropic",e,"ANTHROPIC_API_KEY"),createAnthropic({...o,apiKey:e})(s)}if(zA(s)){let e=(o==null?void 0:o.apiKey)||process.env.GOOGLE_API_KEY;return _b("Google Gemini",e,"GOOGLE_API_KEY"),createGoogleGenerativeAI({...o,apiKey:e})(s)}throw new kr(`Unable to detect provider for model ID: ${s}. Supported providers are OpenAI (gpt-*, o1*, o3*, chatgpt-*), Anthropic (claude-*), and Google Gemini (gemini-*, gemma-*).`)}var PA={inputTokens:"prompt_tokens",outputTokens:"completion_tokens",totalTokens:"total_tokens"};function SA(s){if(!s||typeof s!="object")return false;let o=s,e="usage"in o,t="response"in o,i="text"in o,a="object"in o;return e||t||i||a}function Sx(s,o){if(!SA(s))return {model:o};let e={model:AA(s,o)},t=TA(s.usage);t&&(e.usage=t);let i=vA(s);i&&(e.provider=i);let a=jA(s);a&&(e.metadata=a);let n=CA(s);return n&&(e.output=n),e}function AA(s,o){var e,t;return (t=(e=s.response)==null?void 0:e.modelId)!=null?t:o}function TA(s){var i,a;if(!s)return;let o={};for(let[n,p]of Object.entries(PA)){let c=s[n];typeof c=="number"&&(o[p]=c);}let e=(i=s.inputTokenDetails)==null?void 0:i.cacheReadTokens;typeof e=="number"&&(o.cached_input_tokens=e);let t=(a=s.outputTokenDetails)==null?void 0:a.reasoningTokens;return typeof t=="number"&&(o.reasoning_tokens=t),Object.keys(o).length>0?o:void 0}function vA(s){if(!s.providerMetadata)return;let o=Object.keys(s.providerMetadata);return o.length>0?o[0]:void 0}function EA(s){if(!s)return;let o={};return s.id&&(o.id=s.id),s.timestamp&&(o.timestamp=s.timestamp.toISOString()),Object.keys(o).length>0?o:void 0}function jA(s){let o={};s.usage&&(o.usage=s.usage),s.warnings&&s.warnings.length>0&&(o.warnings=s.warnings);let e=EA(s.response);return e&&(o.response=e),s.providerMetadata&&(o.providerMetadata=s.providerMetadata),s.finishReason&&(o.finishReason=s.finishReason),Object.keys(o).length>0?o:void 0}function CA(s){let o={};return "text"in s&&s.text&&(o.text=s.text),"object"in s&&s.object!==void 0&&(o.object=s.object),"toolCalls"in s&&Array.isArray(s.toolCalls)&&(o.toolCalls=s.toolCalls),"toolResults"in s&&Array.isArray(s.toolResults)&&(o.toolResults=s.toolResults),"sources"in s&&Array.isArray(s.sources)&&(o.sources=s.sources),Object.keys(o).length>0?o:void 0}var zr=class extends qr{constructor(o,e={trackGenerations:true}){let t=typeof o=="string"?o:o.modelId;super(t);let{trackGenerations:i,...a}=e;try{typeof o!="string"?(this.model=o,v.debug("Initialized VercelAIChatModel with custom LanguageModel")):(this.model=kb(o,a),v.debug(`Initialized VercelAIChatModel with model ID: ${o}`)),i?this._generateText=xr({name:"model.generateText",type:Xe.Llm,enrichSpan:n=>Sx(n,this.modelName)},generateText):this._generateText=generateText;}catch(n){throw new kr(`Failed to initialize model ${t}: ${n instanceof Error?n.message:String(n)}`)}}async generateString(o,e,t){try{let i;if(e){v.debug(`Generating structured output with model ${this.modelName}, input length: ${o.length}`);let a=await this._generateText({model:this.model,prompt:o,output:Output.object({schema:e}),...t});v.debug(`Generated structured output with model ${this.modelName}`),i=JSON.stringify(a.output);}else {v.debug(`Generating text with model ${this.modelName}, input length: ${o.length}`);let a=await this._generateText({model:this.model,prompt:o,...t});v.debug(`Generated text with model ${this.modelName}, output length: ${a.text.length}`),i=a.text;}return i}catch(i){let a=e?`Failed to generate structured output with model ${this.modelName}`:`Failed to generate text with model ${this.modelName}`;throw v.error(a,{error:i}),new Bt(a,i instanceof Error?i:new Error(String(i)))}}async generateProviderResponse(o,e){try{v.debug(`Generating provider response with model ${this.modelName}, messages count: ${o.length}`);let t=await this._generateText({model:this.model,messages:o,...e});return v.debug(`Generated provider response with model ${this.modelName}`),t}catch(t){let i=`Failed to generate provider response with model ${this.modelName}`;throw v.error(i,{error:t}),new Bt(i,t instanceof Error?t:new Error(String(t)))}}};var Tx="gpt-5-nano";function zb(s,o){return new zr(s,o)}function vx(s,o){return new zr(s,o)}function DA(s){return s instanceof qr}function UA(s){return typeof s=="object"&&s!==null&&"modelId"in s&&typeof s.modelId=="string"}function NA(s){return typeof s=="string"&&s.length>0}function FA(s){let o=typeof s,e=o==="object"?JSON.stringify(s):String(s);return new Error(`Invalid model type. Expected one of:
29
+ `);}static async processResults(o,e,t=0,i=[]){let a=this.calculateAverageScores(o),n;try{n=await e.getUrl();}catch{v.debug("Could not resolve experiment URL, skipping dashboard link");}await this.generateResultTable(o,e,a,t,n);let p=await e.ensureNameLoaded();return {experimentId:e.id,experimentName:p,testResults:o,errors:i}}};function kx(s,o,e){if(!s||typeof o!="string"||o.trim()==="")return e;let t=o.replace(/\[(\w+)\]/g,".$1").replace(/^\./,"").split("."),i=s;for(let a of t)if(typeof i=="object"&&i!==null&&a in i)i=i[a];else return e;return i===void 0?e:i}var zx,xb;zx=[xr({name:"metrics_calculation",type:Xe.General})];var _r=class{constructor(o,e,t){Bb(xb,5,this);this.client=void 0;this.dataset=void 0;this.task=void 0;this.scoringMetrics=void 0;this.projectName=void 0;this.nbSamples=void 0;this.scoringKeyMapping=void 0;this.experiment=void 0;this.suiteMode=void 0;this.executionPolicy=void 0;this.prefetchedItems=void 0;this.itemMetricsMap=void 0;this.itemPolicyMap=void 0;var i;this.client=e,this.dataset=o.dataset,this.experiment=t,this.task=o.task,this.scoringMetrics=o.scoringMetrics||[],this.projectName=o.projectName,this.nbSamples=o.nbSamples,this.scoringKeyMapping=o.scoringKeyMapping,this.suiteMode=(i=o.suiteMode)!=null?i:false,this.executionPolicy=o.executionPolicy,this.prefetchedItems=o.prefetchedItems,this.itemMetricsMap=o.itemMetricsMap,this.itemPolicyMap=o.itemPolicyMap;}async execute(){let o=await this.getDatasetItems(),e=this.calculateTotalRuns(o),t=this.createProgressTracker(o.length,e),i=performance.now();try{let a=[],n=[],p=[],c=0;for(let d=0;d<o.length;d++){let m=o[d],g=this.getRunsPerItem(m),u=this.getItemMetrics(m);for(let f=0;f<g;f++){try{let h=await this.executeItemRun(m,u,f,p);a.push(h);}catch(h){let R=h instanceof Error?h.message:String(h);n.push({datasetItemId:m.id,runIndex:f,message:R,...h instanceof Error&&{error:h}}),t.recordFailure();}c++;}t.update(c,d);}this.experiment.insert(p),await this.client.flush();let l=(performance.now()-i)/1e3;return t.complete(l),t.reportErrors(n),ic.processResults(a,this.experiment,l,n)}finally{t.restoreLogLevel();}}async getDatasetItems(){var o;return (o=this.prefetchedItems)!=null?o:await this.dataset.getItems(this.nbSamples)}calculateTotalRuns(o){var t,i;let e=(i=(t=this.executionPolicy)==null?void 0:t.runsPerItem)!=null?i:1;return this.itemPolicyMap?o.reduce((a,n)=>{var c;let p=this.itemPolicyMap.get(n.id);return a+((c=p==null?void 0:p.runsPerItem)!=null?c:e)},0):o.length*e}getRunsPerItem(o){var e,t,i,a,n;return (n=(a=(t=(e=this.itemPolicyMap)==null?void 0:e.get(o.id))==null?void 0:t.runsPerItem)!=null?a:(i=this.executionPolicy)==null?void 0:i.runsPerItem)!=null?n:1}getItemMetrics(o){var e;return (e=this.itemMetricsMap)==null?void 0:e.get(o.id)}createProgressTracker(o,e){let t=v.settings.minLevel;v.settings.minLevel=6;let i=0,a=this.suiteMode?`Evaluating dataset (0/${e} runs across ${o} items)`:`Evaluating dataset (0/${o} items)`,n=bA({text:a}).start(),p=()=>i>0?`, ${i} failed`:"";return {update:(c,l)=>{n.text=this.suiteMode?`Evaluating dataset (${c}/${e} runs across ${o} items, ${Math.round(c/e*100)}%${p()})`:`Evaluating dataset (${l+1}/${o} items, ${Math.round((l+1)/o*100)}%${p()})`;},complete:c=>{let l=this.suiteMode?`Evaluation complete: ${e} runs across ${o} items processed in ${c.toFixed(2)}s`:`Evaluation complete: ${o} items processed in ${c.toFixed(2)}s`;i>0?n.warn(`${l} (${i} failed)`):n.succeed(l);},recordFailure:()=>{i++;},reportErrors:c=>{for(let l of c)v.error(`Dataset item ${l.datasetItemId} (run ${l.runIndex}): ${l.message}`);},restoreLogLevel:()=>{v.settings.minLevel=t;}}}async executeItemRun(o,e,t,i){var n,p;let a=this.client.trace({projectName:this.projectName,name:"evaluation_task",createdBy:"evaluation",source:"experiment",input:o});Or.enterWith({trace:a});try{let c=await this.executeTask(o,e,a);if(this.suiteMode){c.trialId=t;let l=(n=this.itemPolicyMap)==null?void 0:n.get(o.id);l&&(c.resolvedExecutionPolicy=l);}return a.update({output:c.testCase.taskOutput,endTime:new Date}),c}catch(c){throw c instanceof Error&&a.update({errorInfo:{message:c.message,exceptionType:c.name,traceback:(p=c.stack)!=null?p:""},endTime:new Date}),c}finally{i.push(new Jp({datasetItemId:o.id,traceId:a.data.id,projectName:a.data.projectName}));}}async executeTask(o,e,t){let i={},a=[];v.debug(`Starting evaluation task on dataset item ${o.id}`),i=await xr({name:"llm_task",type:Xe.General},this.task)(o),v.debug(`Finished evaluation task on dataset item ${o.id}`);let n=this.prepareScoringInputs(o,i),p={traceId:t.data.id,datasetItemId:o.id,scoringInputs:n,taskOutput:i},c=e!=null?e:this.scoringMetrics;return c.length>0?this.calculateScores(p,c,t):{testCase:p,scoreResults:a}}async calculateScores(o,e,t){let i=[],{scoringInputs:a}=o,n=e!=null?e:this.scoringMetrics;for(let p of n){v.debug(`Calculating score for metric ${p.name}`);try{_x(p,a);let c=await p.score(a),l=Array.isArray(c)?c:[c];i.push(...l);}catch(c){let l=c instanceof Error?c.message:String(c);v.error(`Metric ${p.name} failed: ${l}`);}v.debug(`Finished calculating score for metric ${p.name}`);}return i.forEach(p=>t.score({name:p.name,value:p.value,reason:p.reason,categoryName:p.categoryName})),{testCase:o,scoreResults:i}}prepareScoringInputs(o,e){let t={...o,...e};if(!this.scoringKeyMapping)return t;let i={...t};for(let[a,n]of Object.entries(this.scoringKeyMapping)){let p=kx(t,n);p!==void 0&&(i[a]=p);}return i}};xb=Fb(),Wb(xb,1,"calculateScores",zx,_r),Ac(xb,_r);async function Px(s){var a;if(!s.dataset)throw new Error("Dataset is required for evaluation");if(!s.task)throw new Error("Task function is required for evaluation");let o=(a=s.client)!=null?a:Mt.getInstance(),e=s.experimentConfig;if(s.blueprintId){let n={_blueprint_id:s.blueprintId};try{let p=await o.api.agentConfigs.getBlueprintById(s.blueprintId);p.name&&(n.blueprint_version=p.name);}catch(p){v.debug(`Failed to fetch blueprint ${s.blueprintId}: ${p}`);}e={...e,agent_configuration:n};}let t=await s.dataset.getVersionInfo(),i=await o.createExperiment({name:s.experimentName,datasetName:s.dataset.name,experimentConfig:e,prompts:s.prompts,datasetVersionId:t==null?void 0:t.id,tags:s.tags,projectName:s.projectName});try{let n=new _r(s,o,i);return v.info("Starting evaluation"),n.execute()}catch(n){throw v.error(`Error during evaluation: ${n}`),n}}var qr=class{constructor(o){this.modelName=o;}};var Ya=class s extends Error{constructor(o){super(o),this.name="ModelError",Error.captureStackTrace&&Error.captureStackTrace(this,s);}},Bt=class s extends Ya{constructor(e,t){super(e);this.cause=t;this.name="ModelGenerationError",Error.captureStackTrace&&Error.captureStackTrace(this,s);}},kr=class s extends Ya{constructor(o){super(o),this.name="ModelConfigurationError",Error.captureStackTrace&&Error.captureStackTrace(this,s);}};function _A(s,o){return s.startsWith("gpt-")||s.startsWith("o1")||s.startsWith("o3")||s.startsWith("chatgpt-")}function kA(s,o){return s.startsWith("claude-")}function zA(s,o){return s.startsWith("gemini-")||s.startsWith("gemma-")}function _b(s,o,e){if(!o)throw new kr(`API key for ${s} is not configured. Please provide it via the 'apiKey' option or set the ${e} environment variable.`)}function kb(s,o){if(_A(s)){let e=(o==null?void 0:o.apiKey)||process.env.OPENAI_API_KEY;_b("OpenAI",e,"OPENAI_API_KEY");let t=(o==null?void 0:o.organization)||process.env.OPENAI_ORG_ID;return createOpenAI({...o,apiKey:e,...t&&{organization:t}})(s)}if(kA(s)){let e=(o==null?void 0:o.apiKey)||process.env.ANTHROPIC_API_KEY;return _b("Anthropic",e,"ANTHROPIC_API_KEY"),createAnthropic({...o,apiKey:e})(s)}if(zA(s)){let e=(o==null?void 0:o.apiKey)||process.env.GOOGLE_API_KEY;return _b("Google Gemini",e,"GOOGLE_API_KEY"),createGoogleGenerativeAI({...o,apiKey:e})(s)}throw new kr(`Unable to detect provider for model ID: ${s}. Supported providers are OpenAI (gpt-*, o1*, o3*, chatgpt-*), Anthropic (claude-*), and Google Gemini (gemini-*, gemma-*).`)}var PA={inputTokens:"prompt_tokens",outputTokens:"completion_tokens",totalTokens:"total_tokens"};function SA(s){if(!s||typeof s!="object")return false;let o=s,e="usage"in o,t="response"in o,i="text"in o,a="object"in o;return e||t||i||a}function Sx(s,o){if(!SA(s))return {model:o};let e={model:AA(s,o)},t=TA(s.usage);t&&(e.usage=t);let i=vA(s);i&&(e.provider=i);let a=jA(s);a&&(e.metadata=a);let n=CA(s);return n&&(e.output=n),e}function AA(s,o){var e,t;return (t=(e=s.response)==null?void 0:e.modelId)!=null?t:o}function TA(s){var i,a;if(!s)return;let o={};for(let[n,p]of Object.entries(PA)){let c=s[n];typeof c=="number"&&(o[p]=c);}let e=(i=s.inputTokenDetails)==null?void 0:i.cacheReadTokens;typeof e=="number"&&(o.cached_input_tokens=e);let t=(a=s.outputTokenDetails)==null?void 0:a.reasoningTokens;return typeof t=="number"&&(o.reasoning_tokens=t),Object.keys(o).length>0?o:void 0}function vA(s){if(!s.providerMetadata)return;let o=Object.keys(s.providerMetadata);return o.length>0?o[0]:void 0}function EA(s){if(!s)return;let o={};return s.id&&(o.id=s.id),s.timestamp&&(o.timestamp=s.timestamp.toISOString()),Object.keys(o).length>0?o:void 0}function jA(s){let o={};s.usage&&(o.usage=s.usage),s.warnings&&s.warnings.length>0&&(o.warnings=s.warnings);let e=EA(s.response);return e&&(o.response=e),s.providerMetadata&&(o.providerMetadata=s.providerMetadata),s.finishReason&&(o.finishReason=s.finishReason),Object.keys(o).length>0?o:void 0}function CA(s){let o={};return "text"in s&&s.text&&(o.text=s.text),"object"in s&&s.object!==void 0&&(o.object=s.object),"toolCalls"in s&&Array.isArray(s.toolCalls)&&(o.toolCalls=s.toolCalls),"toolResults"in s&&Array.isArray(s.toolResults)&&(o.toolResults=s.toolResults),"sources"in s&&Array.isArray(s.sources)&&(o.sources=s.sources),Object.keys(o).length>0?o:void 0}var zr=class extends qr{constructor(o,e={trackGenerations:true}){let t=typeof o=="string"?o:o.modelId;super(t);let{trackGenerations:i,...a}=e;try{typeof o!="string"?(this.model=o,v.debug("Initialized VercelAIChatModel with custom LanguageModel")):(this.model=kb(o,a),v.debug(`Initialized VercelAIChatModel with model ID: ${o}`)),i?this._generateText=xr({name:"model.generateText",type:Xe.Llm,enrichSpan:n=>Sx(n,this.modelName)},generateText):this._generateText=generateText;}catch(n){throw new kr(`Failed to initialize model ${t}: ${n instanceof Error?n.message:String(n)}`)}}async generateString(o,e,t){try{let i;if(e){v.debug(`Generating structured output with model ${this.modelName}, input length: ${o.length}`);let a=await this._generateText({model:this.model,prompt:o,output:Output.object({schema:e}),...t});v.debug(`Generated structured output with model ${this.modelName}`),i=JSON.stringify(a.output);}else {v.debug(`Generating text with model ${this.modelName}, input length: ${o.length}`);let a=await this._generateText({model:this.model,prompt:o,...t});v.debug(`Generated text with model ${this.modelName}, output length: ${a.text.length}`),i=a.text;}return i}catch(i){let a=e?`Failed to generate structured output with model ${this.modelName}`:`Failed to generate text with model ${this.modelName}`;throw v.error(a,{error:i}),new Bt(a,i instanceof Error?i:new Error(String(i)))}}async generateProviderResponse(o,e){try{v.debug(`Generating provider response with model ${this.modelName}, messages count: ${o.length}`);let t=await this._generateText({model:this.model,messages:o,...e});return v.debug(`Generated provider response with model ${this.modelName}`),t}catch(t){let i=`Failed to generate provider response with model ${this.modelName}`;throw v.error(i,{error:t}),new Bt(i,t instanceof Error?t:new Error(String(t)))}}};var Tx="gpt-5-nano";function zb(s,o){return new zr(s,o)}function vx(s,o){return new zr(s,o)}function DA(s){return s instanceof qr}function UA(s){return typeof s=="object"&&s!==null&&"modelId"in s&&typeof s.modelId=="string"}function NA(s){return typeof s=="string"&&s.length>0}function FA(s){let o=typeof s,e=o==="object"?JSON.stringify(s):String(s);return new Error(`Invalid model type. Expected one of:
30
30
  - string (model ID like 'gpt-5-nano', 'claude-3-5-sonnet-latest')
31
31
  - LanguageModel instance from Vercel AI SDK
32
32
  - OpikBaseModel instance
package/dist/index.cjs CHANGED
@@ -54,7 +54,7 @@ Each assertion below is an EVALUATION CRITERION to check against the agent's out
54
54
  ---BEGIN ASSERTIONS---
55
55
  {assertions}
56
56
  ---END ASSERTIONS---`;});var E6;exports.ResponseSchema=void 0;var Hj=n(()=>{E6=zod.z.object({score:zod.z.boolean(),reason:zod.z.string(),confidence:zod.z.number().min(0).max(1)}),exports.ResponseSchema=class{constructor(i){this.fieldMapping=new Map(i.map((t,o)=>[`assertion_${o+1}`,t]));let e={};for(let[t,o]of this.fieldMapping)e[t]=E6.describe(o);this.schema=zod.z.object(e);}get responseSchema(){return this.schema}formatAssertions(){return [...this.fieldMapping.entries()].map(([i,e])=>`- \`${i}\`: ${e}`).join(`
57
- `)}parse(i){let e=[];for(let[t,o]of this.fieldMapping){let a=i[t];if(a==null){e.push({name:o,value:0,reason:`Assertion field missing from LLM response: "${t}"`,scoringFailed:true,categoryName:"suite_assertion"});continue}if(typeof a!="object"||Array.isArray(a)){e.push({name:o,value:0,reason:`Assertion field malformed in LLM response: "${t}"`,scoringFailed:true,categoryName:"suite_assertion"});continue}let s=E6.safeParse(a);if(!s.success){e.push({name:o,value:0,reason:`Assertion field malformed in LLM response: "${t}"`,scoringFailed:true,categoryName:"suite_assertion"});continue}e.push({name:o,value:s.data.score?1:0,reason:s.data.reason,categoryName:"suite_assertion"});}return e}};});function ks(p){return typeof p=="object"&&p!==null?p:{}}var z8;exports.LLMJudge=void 0;var As=n(()=>{Bj();Nm();Jj();Hj();Z();z8="low";exports.LLMJudge=class p extends exports.BaseSuiteEvaluator{constructor(i){var e,t,o,a,s;if(super((e=i.name)!=null?e:"llm_judge",(t=i.track)!=null?t:true),i.assertions.length===0)throw new Error("LLMJudge requires at least one assertion");for(let c of i.assertions)if(typeof c!="string"||c.trim()==="")throw new Error(`LLMJudge assertions must be non-empty strings. Received: ${JSON.stringify(c)}`);this.assertions=i.assertions,this.modelName=(o=i.model)!=null?o:"gpt-5-nano",this.seed=i.seed,this.temperature=i.temperature,this.reasoningEffort=(a=i.reasoningEffort)!=null?a:z8,this.projectName=i.projectName,this.model=Dt(this.modelName,{trackGenerations:(s=i.track)!=null?s:true}),this.responseSchema=new exports.ResponseSchema(this.assertions);}toConfig(){let i=exports.USER_PROMPT_TEMPLATE.replace("{assertions}",this.responseSchema.formatAssertions());return {version:"1.0.0",name:this.name,model:{name:this.modelName,...this.temperature!==void 0&&{temperature:this.temperature},...this.seed!==void 0&&{seed:this.seed},customParameters:{reasoning_effort:this.reasoningEffort}},messages:[{role:"SYSTEM",content:exports.SYSTEM_PROMPT},{role:"USER",content:i}],variables:{input:"input",output:"output"},schema:this.assertions.map(e=>({name:e,type:"BOOLEAN",description:e}))}}hasSameSettings(i){return this.modelName===i.modelName&&this.temperature===i.temperature&&this.seed===i.seed&&this.reasoningEffort===i.reasoningEffort&&this.trackMetric===i.trackMetric}static merged(i){if(i.length<=1)return;let e=i[0];if(!i.every(a=>e.hasSameSettings(a)))return;let t=new Set,o=[];for(let a of i)for(let s of a.assertions)t.has(s)||(t.add(s),o.push(s));return new p({assertions:o,name:e.name,model:e.modelName,seed:e.seed,temperature:e.temperature,reasoningEffort:e.reasoningEffort,track:e.trackMetric})}static fromConfig(i,e){var l,m,u;let t=(l=i.schema)!=null?l:[],o=ks(i.model),a=t.map(d=>{var f;return (f=d.description)!=null?f:d.name}),s=ks(o.customParameters),c=typeof s.reasoning_effort=="string"?s.reasoning_effort:void 0;return new p({assertions:a,name:typeof i.name=="string"?i.name:"llm_judge",model:(m=e==null?void 0:e.model)!=null?m:typeof o.name=="string"?o.name:"gpt-5-nano",temperature:typeof o.temperature=="number"?o.temperature:void 0,seed:typeof o.seed=="number"?o.seed:void 0,reasoningEffort:c,track:(u=e==null?void 0:e.track)!=null?u:true})}async score(i){var s,c;let e=ks(i),t=typeof e.input=="string"?e.input:JSON.stringify((s=e.input)!=null?s:""),o=typeof e.output=="string"?e.output:JSON.stringify((c=e.output)!=null?c:""),a=exports.USER_PROMPT_TEMPLATE.replace("{input}",t).replace("{output}",o).replace("{assertions}",this.responseSchema.formatAssertions());try{let l=await this.model.generateProviderResponse([{role:"system",content:exports.SYSTEM_PROMPT},{role:"user",content:a}],{...this.temperature!==void 0&&{temperature:this.temperature},...this.seed!==void 0&&{seed:this.seed},reasoning_effort:this.reasoningEffort,output:ai$1.Output.object({schema:this.responseSchema.responseSchema})}),m=ks(ks(l).output);return this.responseSchema.parse(m)}catch(l){return exports.logger.debug(`LLMJudge scoring failed: ${l instanceof Error?l.message:String(l)}`),this.assertions.map(m=>({name:m,value:0,reason:`LLM scoring failed: ${l instanceof Error?l.message:String(l)}`,scoringFailed:true,categoryName:"suite_assertion"}))}}};});function Ut(p){return p.map(i=>({name:i.name,type:wu.LlmJudge,config:i.toConfig()}))}function Zr(p,i){let e=[];for(let t of p)t.type===xu.LlmJudge?e.push(exports.LLMJudge.fromConfig(t.config,i?{model:i}:void 0)):exports.logger.warn(`Unsupported evaluator type: ${t.type}. Skipping.`);return e}function Er(p){var i,e;return {runsPerItem:(i=p==null?void 0:p.runsPerItem)!=null?i:exports.DEFAULT_EXECUTION_POLICY.runsPerItem,passThreshold:(e=p==null?void 0:p.passThreshold)!=null?e:exports.DEFAULT_EXECUTION_POLICY.passThreshold}}function C6(p,i){if(p.length!==i.length)return false;let e=a=>a.map(s=>JSON.stringify(s.toConfig())).sort(),t=e(p),o=e(i);return t.every((a,s)=>a===o[s])}function I6(p,i){return p.runsPerItem===i.runsPerItem&&p.passThreshold===i.passThreshold}function Bi(p,i){var e,t;return p?{runsPerItem:(e=p.runsPerItem)!=null?e:i.runsPerItem,passThreshold:(t=p.passThreshold)!=null?t:i.passThreshold}:i}function D6(p){if(typeof p!="object"||p===null)throw new TypeError(`The task function must return an object with 'input' and 'output' keys, but it returned ${typeof p}. Example: return { input: data, output: response }`);let i=p,e=[];if("input"in i||e.push("input"),"output"in i||e.push("output"),e.length>0)throw new Error(`The task function must return an object with 'input' and 'output' keys, but the returned object is missing: ${e.join(", ")}. Got keys: ${Object.keys(i).join(", ")}. Example: return { input: data, output: response }`);return i}var Ps=n(()=>{As();_u();Ru();jm();Z();});async function Mm(p){var u;if(!p.dataset)throw new Error("Dataset is required for test suite");if(!p.task)throw new Error("Task function is required for test suite");let i=(u=p.client)!=null?u:Ni.getInstance(),e=await p.dataset.getVersionInfo(),t=e!=null&&e.evaluators?Zr(e.evaluators,p.evaluatorModel):[],o=Er(e==null?void 0:e.executionPolicy),a=await p.dataset.getRawItems(),s=new Map,c=new Map;for(let d of a){let f=d.evaluators?Zr(d.evaluators,p.evaluatorModel):[],g=[...t,...f],h=g.filter(C=>C instanceof exports.LLMJudge),y=exports.LLMJudge.merged(h),w=y?[y,...g.filter(C=>!(C instanceof exports.LLMJudge))]:g;s.set(d.id,w);let A=Bi(d.executionPolicy,o);c.set(d.id,A);}let l=a.map(d=>d.getContent(true)),m=await i.createExperiment({name:p.experimentName,datasetName:p.dataset.name,experimentConfig:p.experimentConfig,prompts:p.prompts,datasetVersionId:e==null?void 0:e.id,evaluationMethod:Ns.EvaluationSuite,tags:p.tags,projectName:p.projectName});try{let d={suiteMode:!0,dataset:p.dataset,task:p.task,scoringMetrics:t,projectName:p.projectName,executionPolicy:o,prefetchedItems:l,itemMetricsMap:s,itemPolicyMap:c};return await new $r(d,i,m).execute()}catch(d){throw exports.logger.error(`Error during test suite: ${d}`),d}}var Gj=n(()=>{Uj();Fj();As();Z();Ps();Cd();});async function U6(p){let{testSuite:i,task:e,model:t,experimentTags:o,...a}=p,s=async l=>{let m=await e(l);return D6(m)},c=await Mm({dataset:i.dataset,task:s,client:i.client,evaluatorModel:t,tags:o,...a});return Ij(c)}var N6=n(()=>{Gj();Dj();Ps();});async function F6(p){var o;if(!p.dataset)throw new Error("Dataset is required for evaluation");if(!p.task)throw new Error("Task function is required for evaluation");let i=(o=p.client)!=null?o:Ni.getInstance(),e=await p.dataset.getVersionInfo(),t=await i.createExperiment({name:p.experimentName,datasetName:p.dataset.name,experimentConfig:p.experimentConfig,prompts:p.prompts,datasetVersionId:e==null?void 0:e.id,tags:p.tags,projectName:p.projectName});try{let a=new $r(p,i,t);return exports.logger.info("Starting evaluation"),a.execute()}catch(a){throw exports.logger.error(`Error during evaluation: ${a}`),a}}var Oj=n(()=>{Z();Fj();Uj();});var Kj=n(()=>{Im();Um();Nm();Dm();Lj();});function B6(p){return p.map(i=>{let e=Array.isArray(i.content)?S8(i.content):i.content;return `${i.role}: ${e}`}).join(`
57
+ `)}parse(i){let e=[];for(let[t,o]of this.fieldMapping){let a=i[t];if(a==null){e.push({name:o,value:0,reason:`Assertion field missing from LLM response: "${t}"`,scoringFailed:true,categoryName:"suite_assertion"});continue}if(typeof a!="object"||Array.isArray(a)){e.push({name:o,value:0,reason:`Assertion field malformed in LLM response: "${t}"`,scoringFailed:true,categoryName:"suite_assertion"});continue}let s=E6.safeParse(a);if(!s.success){e.push({name:o,value:0,reason:`Assertion field malformed in LLM response: "${t}"`,scoringFailed:true,categoryName:"suite_assertion"});continue}e.push({name:o,value:s.data.score?1:0,reason:s.data.reason,categoryName:"suite_assertion"});}return e}};});function ks(p){return typeof p=="object"&&p!==null?p:{}}var z8;exports.LLMJudge=void 0;var As=n(()=>{Bj();Nm();Jj();Hj();Z();z8="low";exports.LLMJudge=class p extends exports.BaseSuiteEvaluator{constructor(i){var e,t,o,a,s;if(super((e=i.name)!=null?e:"llm_judge",(t=i.track)!=null?t:true),i.assertions.length===0)throw new Error("LLMJudge requires at least one assertion");for(let c of i.assertions)if(typeof c!="string"||c.trim()==="")throw new Error(`LLMJudge assertions must be non-empty strings. Received: ${JSON.stringify(c)}`);this.assertions=i.assertions,this.modelName=(o=i.model)!=null?o:"gpt-5-nano",this.seed=i.seed,this.temperature=i.temperature,this.reasoningEffort=(a=i.reasoningEffort)!=null?a:z8,this.projectName=i.projectName,this.model=Dt(this.modelName,{trackGenerations:(s=i.track)!=null?s:true}),this.responseSchema=new exports.ResponseSchema(this.assertions);}toConfig(){let i=exports.USER_PROMPT_TEMPLATE.replace("{assertions}",this.responseSchema.formatAssertions());return {version:"1.0.0",name:this.name,model:{name:this.modelName,...this.temperature!==void 0&&{temperature:this.temperature},...this.seed!==void 0&&{seed:this.seed},customParameters:{reasoning_effort:this.reasoningEffort}},messages:[{role:"SYSTEM",content:exports.SYSTEM_PROMPT},{role:"USER",content:i}],variables:{input:"input",output:"output"},schema:this.assertions.map(e=>({name:e,type:"BOOLEAN",description:e}))}}hasSameSettings(i){return this.modelName===i.modelName&&this.temperature===i.temperature&&this.seed===i.seed&&this.reasoningEffort===i.reasoningEffort&&this.trackMetric===i.trackMetric}static merged(i){if(i.length<=1)return;let e=i[0];if(!i.every(a=>e.hasSameSettings(a)))return;let t=new Set,o=[];for(let a of i)for(let s of a.assertions)t.has(s)||(t.add(s),o.push(s));return new p({assertions:o,name:e.name,model:e.modelName,seed:e.seed,temperature:e.temperature,reasoningEffort:e.reasoningEffort,track:e.trackMetric})}static fromConfig(i,e){var l,m,u;let t=(l=i.schema)!=null?l:[],o=ks(i.model),a=t.map(d=>{var f;return (f=d.description)!=null?f:d.name}),s=ks(o.customParameters),c=typeof s.reasoning_effort=="string"?s.reasoning_effort:void 0;return new p({assertions:a,name:typeof i.name=="string"?i.name:"llm_judge",model:(m=e==null?void 0:e.model)!=null?m:typeof o.name=="string"?o.name:"gpt-5-nano",temperature:typeof o.temperature=="number"?o.temperature:void 0,seed:typeof o.seed=="number"?o.seed:void 0,reasoningEffort:c,track:(u=e==null?void 0:e.track)!=null?u:true})}async score(i){var s,c;let e=ks(i),t=typeof e.input=="string"?e.input:JSON.stringify((s=e.input)!=null?s:""),o=typeof e.output=="string"?e.output:JSON.stringify((c=e.output)!=null?c:""),a=exports.USER_PROMPT_TEMPLATE.replace("{input}",t).replace("{output}",o).replace("{assertions}",this.responseSchema.formatAssertions());try{let l=await this.model.generateProviderResponse([{role:"system",content:exports.SYSTEM_PROMPT},{role:"user",content:a}],{...this.temperature!==void 0&&{temperature:this.temperature},...this.seed!==void 0&&{seed:this.seed},reasoning_effort:this.reasoningEffort,output:ai$1.Output.object({schema:this.responseSchema.responseSchema})}),m=ks(ks(l).output);return this.responseSchema.parse(m)}catch(l){return exports.logger.debug(`LLMJudge scoring failed: ${l instanceof Error?l.message:String(l)}`),this.assertions.map(m=>({name:m,value:0,reason:`LLM scoring failed: ${l instanceof Error?l.message:String(l)}`,scoringFailed:true,categoryName:"suite_assertion"}))}}};});function Ut(p){return p.map(i=>({name:i.name,type:wu.LlmJudge,config:i.toConfig()}))}function Zr(p,i){let e=[];for(let t of p)t.type===xu.LlmJudge?e.push(exports.LLMJudge.fromConfig(t.config,i?{model:i}:void 0)):exports.logger.warn(`Unsupported evaluator type: ${t.type}. Skipping.`);return e}function Er(p){var i,e;return {runsPerItem:(i=p==null?void 0:p.runsPerItem)!=null?i:exports.DEFAULT_EXECUTION_POLICY.runsPerItem,passThreshold:(e=p==null?void 0:p.passThreshold)!=null?e:exports.DEFAULT_EXECUTION_POLICY.passThreshold}}function C6(p,i){if(p.length!==i.length)return false;let e=a=>a.map(s=>JSON.stringify(s.toConfig())).sort(),t=e(p),o=e(i);return t.every((a,s)=>a===o[s])}function I6(p,i){return p.runsPerItem===i.runsPerItem&&p.passThreshold===i.passThreshold}function Bi(p,i){var e,t;return p?{runsPerItem:(e=p.runsPerItem)!=null?e:i.runsPerItem,passThreshold:(t=p.passThreshold)!=null?t:i.passThreshold}:i}function D6(p){if(typeof p!="object"||p===null)throw new TypeError(`The task function must return an object with 'input' and 'output' keys, but it returned ${typeof p}. Example: return { input: data, output: response }`);let i=p,e=[];if("input"in i||e.push("input"),"output"in i||e.push("output"),e.length>0)throw new Error(`The task function must return an object with 'input' and 'output' keys, but the returned object is missing: ${e.join(", ")}. Got keys: ${Object.keys(i).join(", ")}. Example: return { input: data, output: response }`);return i}var Ps=n(()=>{As();_u();Ru();jm();Z();});async function Mm(p){var u;if(!p.dataset)throw new Error("Dataset is required for test suite");if(!p.task)throw new Error("Task function is required for test suite");let i=(u=p.client)!=null?u:Ni.getInstance(),e=await p.dataset.getVersionInfo(),t=e!=null&&e.evaluators?Zr(e.evaluators,p.evaluatorModel):[],o=Er(e==null?void 0:e.executionPolicy),a=await p.dataset.getRawItems(),s=new Map,c=new Map;for(let d of a){let f=d.evaluators?Zr(d.evaluators,p.evaluatorModel):[],g=[...t,...f],h=g.filter(C=>C instanceof exports.LLMJudge),y=exports.LLMJudge.merged(h),w=y?[y,...g.filter(C=>!(C instanceof exports.LLMJudge))]:g;s.set(d.id,w);let A=Bi(d.executionPolicy,o);c.set(d.id,A);}let l=a.map(d=>d.getContent(true)),m=await i.createExperiment({name:p.experimentName,datasetName:p.dataset.name,experimentConfig:p.experimentConfig,prompts:p.prompts,datasetVersionId:e==null?void 0:e.id,evaluationMethod:Ns.EvaluationSuite,tags:p.tags,projectName:p.projectName});try{let d={suiteMode:!0,dataset:p.dataset,task:p.task,scoringMetrics:t,projectName:p.projectName,executionPolicy:o,prefetchedItems:l,itemMetricsMap:s,itemPolicyMap:c};return await new $r(d,i,m).execute()}catch(d){throw exports.logger.error(`Error during test suite: ${d}`),d}}var Gj=n(()=>{Uj();Fj();As();Z();Ps();Cd();});async function U6(p){let{testSuite:i,task:e,model:t,experimentTags:o,...a}=p,s=async l=>{let m=await e(l);return D6(m)},c=await Mm({dataset:i.dataset,task:s,client:i.client,evaluatorModel:t,tags:o,...a});return Ij(c)}var N6=n(()=>{Gj();Dj();Ps();});async function F6(p){var a;if(!p.dataset)throw new Error("Dataset is required for evaluation");if(!p.task)throw new Error("Task function is required for evaluation");let i=(a=p.client)!=null?a:Ni.getInstance(),e=p.experimentConfig;if(p.blueprintId){let s={_blueprint_id:p.blueprintId};try{let c=await i.api.agentConfigs.getBlueprintById(p.blueprintId);c.name&&(s.blueprint_version=c.name);}catch(c){exports.logger.debug(`Failed to fetch blueprint ${p.blueprintId}: ${c}`);}e={...e,agent_configuration:s};}let t=await p.dataset.getVersionInfo(),o=await i.createExperiment({name:p.experimentName,datasetName:p.dataset.name,experimentConfig:e,prompts:p.prompts,datasetVersionId:t==null?void 0:t.id,tags:p.tags,projectName:p.projectName});try{let s=new $r(p,i,o);return exports.logger.info("Starting evaluation"),s.execute()}catch(s){throw exports.logger.error(`Error during evaluation: ${s}`),s}}var Oj=n(()=>{Z();Fj();Uj();});var Kj=n(()=>{Im();Um();Nm();Dm();Lj();});function B6(p){return p.map(i=>{let e=Array.isArray(i.content)?S8(i.content):i.content;return `${i.role}: ${e}`}).join(`
58
58
  `)}function S8(p){return p.map(i=>i.type==="text"?i.text:i.type==="file"?`[file: ${i.filename||"unknown"}]`:i.type==="tool-call"?`[tool-call: ${i.toolName}]`:i.type==="tool-result"?`[tool-result: ${i.toolName}]`:i.type==="reasoning"?`[reasoning: ${i.text}]`:i.type==="tool-approval-request"?"[tool-approval-request]":i.type==="tool-approval-response"?"[tool-approval-response]":"").filter(i=>i.length>0).join(`
59
59
  `)}function T8(p,i,e){return typeof p=="string"?je(p,i,e):Array.isArray(p)?p.map(o=>o.type==="text"&&typeof o.text=="string"?{...o,text:je(o.text,i,e)}:o.type==="image"&&typeof o.image=="string"?{...o,image:je(o.image,i,e)}:o.type==="file"&&typeof o.data=="string"?{...o,data:je(o.data,i,e)}:o.type==="reasoning"&&typeof o.text=="string"?{...o,text:je(o.text,i,e)}:o):p}function M6(p,i,e){let t=T8(p.content,i,e);return {...p,content:t}}var W6=n(()=>{ps();});async function kar(p){var o;if(!p.dataset)throw new Error("Dataset is required for prompt evaluation");if(!p.messages||p.messages.length===0)throw new Error("Messages array is required and cannot be empty");if(p.experimentConfig!==void 0&&(typeof p.experimentConfig!="object"||p.experimentConfig===null||Array.isArray(p.experimentConfig)))throw new Error("experimentConfig must be a plain object, not an array or primitive value");let i=Dt(p.model),e={...p.experimentConfig,prompt_template:p.messages,model:i.modelName,...p.temperature!==void 0&&{temperature:p.temperature},...p.seed!==void 0&&{seed:p.seed}},t=v8(i,p.messages,(o=p.templateType)!=null?o:exports.PromptType.MUSTACHE,{temperature:p.temperature,seed:p.seed});return F6({dataset:p.dataset,task:t,scoringMetrics:p.scoringMetrics,experimentName:p.experimentName,projectName:p.projectName,experimentConfig:e,prompts:p.prompts,client:p.client,nbSamples:p.nbSamples,scoringKeyMapping:p.scoringKeyMapping})}function v8(p,i,e,t){return async o=>{let a=i.map(m=>M6(m,o,e)),s=await p.generateProviderResponse(a,t),c=j8(s);return {input:B6(a),output:c}}}function j8(p){return p&&typeof p=="object"?"text"in p&&typeof p.text=="string"?p.text:"content"in p&&typeof p.content=="string"?p.content:JSON.stringify(p):String(p)}var L6=n(()=>{Kj();Oj();Tt();W6();});var E8;exports.ExactMatch=void 0;var J6=n(()=>{Qr();E8=zod.z.object({output:zod.z.unknown(),expected:zod.z.unknown()}),exports.ExactMatch=class extends exports.BaseMetric{constructor(e="exact_match",t=true){super(e,t);this.validationSchema=E8;}async score(e){let{output:t,expected:o}=e,a=t===o?1:0;return {name:this.name,value:a,reason:`Exact match: ${a===1?"Match":"No match"}`}}};});var C8;exports.Contains=void 0;var G6=n(()=>{Qr();C8=zod.z.object({output:zod.z.string(),substring:zod.z.string()}),exports.Contains=class extends exports.BaseMetric{constructor(e="contains",t=true,o=false){super(e,t);this.validationSchema=C8;this.caseSensitive=o;}async score(e){let{output:t,substring:o}=e,a=this.caseSensitive?t:t.toLowerCase(),s=this.caseSensitive?o:o.toLowerCase();return a.includes(s)?{name:this.name,value:1,reason:`Contains: "${o}" found in output.`}:{name:this.name,value:0,reason:`Contains: "${o}" not found in output.`}}};});var I8;exports.RegexMatch=void 0;var K6=n(()=>{Qr();I8=zod.z.object({output:zod.z.string(),pattern:zod.z.string(),flags:zod.z.string().optional()}),exports.RegexMatch=class extends exports.BaseMetric{constructor(e="regex_match",t=true){super(e,t);this.validationSchema=I8;}async score(e){let{output:t,pattern:o,flags:a}=e,s;typeof o=="string"&&a?s=new RegExp(o,a):s=new RegExp(o);let c=s.test(t);return {name:this.name,value:c?1:0,reason:c?`Regex: Output matches the regex pattern: ${o}`:`Regex: Output does not match the regex pattern: ${o}`}}};});var D8;exports.IsJson=void 0;var Y6=n(()=>{Qr();D8=zod.z.object({output:zod.z.unknown()}),exports.IsJson=class extends exports.BaseMetric{constructor(e="is_json_metric",t=true){super(e,t);this.validationSchema=D8;}async score(e){let{output:t}=e;try{return JSON.parse(t),{name:this.name,value:1,reason:"IsJson: Output is valid JSON."}}catch{return {name:this.name,value:0,reason:"IsJson: Output is not valid JSON"}}}};});var X6=n(()=>{J6();G6();K6();Y6();});exports.BaseLLMJudgeMetric=void 0;var Nt=n(()=>{Qr();Nm();exports.BaseLLMJudgeMetric=class extends exports.BaseMetric{constructor(i,e){var o;let t=(o=e==null?void 0:e.trackMetric)!=null?o:true;super(i,t),this.temperature=e==null?void 0:e.temperature,this.seed=e==null?void 0:e.seed,this.maxTokens=e==null?void 0:e.maxTokens,this.modelSettings=e==null?void 0:e.modelSettings,this.model=this.initModel(e==null?void 0:e.model,{trackGenerations:t});}initModel(i,e){return Dt(i,e)}buildModelOptions(){return {...this.modelSettings,...this.temperature!==void 0&&{temperature:this.temperature},...this.seed!==void 0&&{seed:this.seed},...this.maxTokens!==void 0&&{maxTokens:this.maxTokens}}}};});function Z6(p,i=[]){let e="";return i.length>0&&(e=`
60
60
 
package/dist/index.d.cts CHANGED
@@ -14087,6 +14087,8 @@ interface EvaluateOptions<T = Record<string, unknown>> {
14087
14087
  scoringKeyMapping?: ScoringKeyMappingType;
14088
14088
  /** Optional list of tags to associate with the experiment */
14089
14089
  tags?: string[];
14090
+ /** Optional agent configuration blueprint ID to link with the experiment */
14091
+ blueprintId?: string;
14090
14092
  }
14091
14093
  declare function evaluate<T = Record<string, unknown>>(options: EvaluateOptions<T>): Promise<EvaluationResult>;
14092
14094
 
package/dist/index.d.ts CHANGED
@@ -14087,6 +14087,8 @@ interface EvaluateOptions<T = Record<string, unknown>> {
14087
14087
  scoringKeyMapping?: ScoringKeyMappingType;
14088
14088
  /** Optional list of tags to associate with the experiment */
14089
14089
  tags?: string[];
14090
+ /** Optional agent configuration blueprint ID to link with the experiment */
14091
+ blueprintId?: string;
14090
14092
  }
14091
14093
  declare function evaluate<T = Record<string, unknown>>(options: EvaluateOptions<T>): Promise<EvaluationResult>;
14092
14094
 
package/dist/index.js CHANGED
@@ -1 +1 @@
1
- export{na as AgentTaskCompletionJudge,ma as AgentToolCorrectnessJudge,aa as AnswerRelevance,Y as BaseLLMJudgeMetric,x as BaseMetric,y as BaseSuiteEvaluator,m as ChatPrompt,pa as ComplianceRiskJudge,i as ConfigMismatchError,h as ConfigNotFoundError,V as Contains,v as DEFAULT_EXECUTION_POLICY,j as Dataset,f as DatasetVersion,g as DatasetVersionNotFoundError,ha as DemographicBiasJudge,fa as DialogueHelpfulnessJudge,U as ExactMatch,ba as GEval,ca as GEvalPreset,ja as GenderBiasJudge,$ as Hallucination,X as IsJson,L as LLMJudge,C as ModelConfigurationError,A as ModelError,B as ModelGenerationError,Z as Moderation,ua as Opik,z as OpikBaseModel,n as OpikQueryLanguage,d as OpikSpanType,ia as PoliticalBiasJudge,l as Prompt,k as PromptType,oa as PromptUncertaintyJudge,ga as QARelevanceJudge,W as RegexMatch,la as RegionalBiasJudge,ka as ReligiousBiasJudge,K as ResponseSchema,I as SYSTEM_PROMPT,ea as SummarizationCoherenceJudge,da as SummarizationConsistencyJudge,ta as TestSuite,p as ThreadsAnnotationQueue,o as TracesAnnotationQueue,J as USER_PROMPT_TEMPLATE,_ as Usefulness,E as VercelAIChatModel,s as activateRunner,q as agentConfigContext,w as buildSuiteResult,F as createModel,G as createModelFromInstance,N as deserializeEvaluators,D as detectProvider,c as disableLogger,S as evaluate,T as evaluatePrompt,Q as evaluateTestSuite,r as flushAll,e as generateId,t as getTrackContext,a as logger,qa as resolveEvaluators,O as resolveExecutionPolicy,P as resolveItemExecutionPolicy,H as resolveModel,R as runTests,M as serializeEvaluators,b as setLoggerLevel,u as track,ra as validateEvaluators,sa as validateExecutionPolicy,va as z}from'./chunk-5DQLREVI.js';
1
+ export{na as AgentTaskCompletionJudge,ma as AgentToolCorrectnessJudge,aa as AnswerRelevance,Y as BaseLLMJudgeMetric,x as BaseMetric,y as BaseSuiteEvaluator,m as ChatPrompt,pa as ComplianceRiskJudge,i as ConfigMismatchError,h as ConfigNotFoundError,V as Contains,v as DEFAULT_EXECUTION_POLICY,j as Dataset,f as DatasetVersion,g as DatasetVersionNotFoundError,ha as DemographicBiasJudge,fa as DialogueHelpfulnessJudge,U as ExactMatch,ba as GEval,ca as GEvalPreset,ja as GenderBiasJudge,$ as Hallucination,X as IsJson,L as LLMJudge,C as ModelConfigurationError,A as ModelError,B as ModelGenerationError,Z as Moderation,ua as Opik,z as OpikBaseModel,n as OpikQueryLanguage,d as OpikSpanType,ia as PoliticalBiasJudge,l as Prompt,k as PromptType,oa as PromptUncertaintyJudge,ga as QARelevanceJudge,W as RegexMatch,la as RegionalBiasJudge,ka as ReligiousBiasJudge,K as ResponseSchema,I as SYSTEM_PROMPT,ea as SummarizationCoherenceJudge,da as SummarizationConsistencyJudge,ta as TestSuite,p as ThreadsAnnotationQueue,o as TracesAnnotationQueue,J as USER_PROMPT_TEMPLATE,_ as Usefulness,E as VercelAIChatModel,s as activateRunner,q as agentConfigContext,w as buildSuiteResult,F as createModel,G as createModelFromInstance,N as deserializeEvaluators,D as detectProvider,c as disableLogger,S as evaluate,T as evaluatePrompt,Q as evaluateTestSuite,r as flushAll,e as generateId,t as getTrackContext,a as logger,qa as resolveEvaluators,O as resolveExecutionPolicy,P as resolveItemExecutionPolicy,H as resolveModel,R as runTests,M as serializeEvaluators,b as setLoggerLevel,u as track,ra as validateEvaluators,sa as validateExecutionPolicy,va as z}from'./chunk-MHMIE52N.js';
@@ -1 +1 @@
1
- export{v as DEFAULT_EXECUTION_POLICY,ta as TestSuite,w as buildSuiteResult,N as deserializeEvaluators,Q as evaluateTestSuite,O as resolveExecutionPolicy,P as resolveItemExecutionPolicy,R as runTests,M as serializeEvaluators}from'./chunk-5DQLREVI.js';
1
+ export{v as DEFAULT_EXECUTION_POLICY,ta as TestSuite,w as buildSuiteResult,N as deserializeEvaluators,Q as evaluateTestSuite,O as resolveExecutionPolicy,P as resolveItemExecutionPolicy,R as runTests,M as serializeEvaluators}from'./chunk-MHMIE52N.js';
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "opik",
3
3
  "description": "Opik TypeScript and JavaScript SDK",
4
- "version": "1.11.12",
4
+ "version": "1.11.13",
5
5
  "repository": {
6
6
  "type": "git",
7
7
  "url": "git+https://github.com/comet-ml/opik.git",