eve 0.7.0 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/dist/docs/public/README.md +16 -11
- package/dist/docs/public/agent-config.md +9 -9
- package/dist/docs/public/channels/custom.mdx +4 -4
- package/dist/docs/public/channels/discord.mdx +1 -1
- package/dist/docs/public/channels/eve.mdx +9 -9
- package/dist/docs/public/channels/github.mdx +1 -1
- package/dist/docs/public/channels/overview.mdx +21 -15
- package/dist/docs/public/channels/slack.mdx +2 -2
- package/dist/docs/public/channels/teams.mdx +1 -1
- package/dist/docs/public/channels/telegram.mdx +1 -1
- package/dist/docs/public/channels/twilio.mdx +1 -1
- package/dist/docs/public/{advanced → concepts}/context-control.md +3 -3
- package/dist/docs/public/{advanced → concepts}/default-harness.md +3 -3
- package/dist/docs/public/{advanced → concepts}/execution-model-and-durability.md +3 -1
- package/dist/docs/public/concepts/meta.json +10 -0
- package/dist/docs/public/{advanced → concepts}/security-model.md +2 -2
- package/dist/docs/public/{advanced → concepts}/sessions-runs-and-streaming.md +7 -7
- package/dist/docs/public/connections.mdx +3 -3
- package/dist/docs/public/evals/cases.mdx +2 -3
- package/dist/docs/public/evals/judge.mdx +1 -1
- package/dist/docs/public/evals/overview.mdx +4 -4
- package/dist/docs/public/evals/running.mdx +5 -10
- package/dist/docs/public/evals/targets.mdx +4 -23
- package/dist/docs/public/getting-started.mdx +28 -35
- package/dist/docs/public/{advanced → guides}/auth-and-route-protection.md +2 -2
- package/dist/docs/public/{client → guides/client}/continuations.mdx +2 -2
- package/dist/docs/public/{client → guides/client}/messages.mdx +1 -1
- package/dist/docs/public/{client → guides/client}/meta.json +1 -1
- package/dist/docs/public/{client → guides/client}/output-schema.mdx +2 -2
- package/dist/docs/public/{client → guides/client}/overview.mdx +5 -5
- package/dist/docs/public/{client → guides/client}/streaming.mdx +1 -1
- package/dist/docs/public/{advanced → guides}/deployment.md +9 -1
- package/dist/docs/public/{advanced → guides}/dynamic-capabilities.md +1 -1
- package/dist/docs/public/{advanced → guides}/dynamic-workflows.md +1 -1
- package/dist/docs/public/{frontend → guides/frontend}/nextjs.mdx +3 -3
- package/dist/docs/public/{frontend → guides/frontend}/nuxt.mdx +3 -3
- package/dist/docs/public/{frontend → guides/frontend}/overview.mdx +6 -6
- package/dist/docs/public/{frontend → guides/frontend}/sveltekit.mdx +3 -3
- package/dist/docs/public/{frontend → guides/frontend}/use-eve-agent-svelte.mdx +2 -2
- package/dist/docs/public/{frontend → guides/frontend}/use-eve-agent-vue.mdx +2 -2
- package/dist/docs/public/{advanced → guides}/hooks.md +2 -2
- package/dist/docs/public/{advanced → guides}/instrumentation.md +2 -0
- package/dist/docs/public/{advanced → guides}/meta.json +8 -11
- package/dist/docs/public/{advanced → guides}/session-context.md +2 -2
- package/dist/docs/public/{advanced → guides}/state.md +1 -1
- package/dist/docs/public/instructions.mdx +2 -2
- package/dist/docs/public/introduction.md +5 -2
- package/dist/docs/public/meta.json +3 -3
- package/dist/docs/public/reference/cli.md +3 -3
- package/dist/docs/public/reference/meta.json +1 -1
- package/dist/docs/public/reference/project-layout.md +5 -1
- package/dist/docs/public/reference/typescript-api.md +23 -23
- package/dist/docs/public/sandbox.mdx +1 -1
- package/dist/docs/public/schedules.mdx +2 -2
- package/dist/docs/public/skills.mdx +3 -3
- package/dist/docs/public/subagents.mdx +3 -3
- package/dist/docs/public/tools.mdx +4 -4
- package/dist/docs/public/tutorial/connect-a-warehouse.mdx +2 -2
- package/dist/docs/public/tutorial/first-agent.mdx +1 -1
- package/dist/docs/public/tutorial/guard-the-spend.mdx +1 -1
- package/dist/docs/public/tutorial/how-it-runs.mdx +2 -2
- package/dist/docs/public/tutorial/meta.json +1 -1
- package/dist/docs/public/tutorial/query-sample-data.mdx +1 -1
- package/dist/docs/public/tutorial/remember-definitions.mdx +3 -3
- package/dist/docs/public/tutorial/run-analysis.mdx +1 -1
- package/dist/docs/public/tutorial/ship-it.mdx +4 -4
- package/dist/docs/public/tutorial/team-playbooks.mdx +3 -3
- package/dist/src/cli/dev/tui/prompt-command-handler.js +1 -1
- package/dist/src/cli/dev/tui/runner.d.ts +2 -2
- package/dist/src/cli/dev/tui/runner.js +1 -1
- package/dist/src/cli/dev/tui/tui.js +1 -1
- package/dist/src/cli/run.d.ts +0 -2
- package/dist/src/cli/run.js +1 -1
- package/dist/src/client/types.d.ts +0 -1
- package/dist/src/compiled/.vendor-stamp.json +2 -2
- package/dist/src/compiled/@workflow/core/capabilities.d.ts +19 -1
- package/dist/src/compiled/@workflow/core/class-serialization.d.ts +32 -0
- package/dist/src/compiled/@workflow/core/create-hook.d.ts +37 -0
- package/dist/src/compiled/@workflow/core/global.d.ts +11 -1
- package/dist/src/compiled/@workflow/core/index.js +2 -2
- package/dist/src/compiled/@workflow/core/runtime/start.d.ts +6 -0
- package/dist/src/compiled/@workflow/core/runtime/suspension-handler.d.ts +15 -2
- package/dist/src/compiled/@workflow/core/runtime/wait-continuation.d.ts +84 -0
- package/dist/src/compiled/@workflow/core/runtime.js +27 -27
- package/dist/src/compiled/@workflow/core/serialization/types.d.ts +21 -0
- package/dist/src/compiled/@workflow/core/serialization.d.ts +72 -6
- package/dist/src/compiled/@workflow/core/symbols.d.ts +2 -0
- package/dist/src/compiled/@workflow/core/version.d.ts +1 -1
- package/dist/src/compiled/@workflow/core/workflow/attribute-dispatcher.d.ts +6 -0
- package/dist/src/compiled/@workflow/core/workflow/set-attributes.d.ts +3 -4
- package/dist/src/compiled/@workflow/core/workflow.js +1 -1
- package/dist/src/compiled/@workflow/world/events.d.ts +48 -0
- package/dist/src/compiled/@workflow/world/index.d.ts +1 -1
- package/dist/src/compiled/@workflow/world/queue.d.ts +3 -0
- package/dist/src/compiled/@workflow/world/runs.d.ts +2 -0
- package/dist/src/compiled/@workflow/world/spec-version.d.ts +2 -1
- package/dist/src/compiled/_chunks/workflow/attribute-changes-DGVGRGfw.js +59 -0
- package/dist/src/compiled/_chunks/workflow/resume-hook-DMSadN9o.js +1 -0
- package/dist/src/compiled/_chunks/workflow/run-BRdn7zy_.js +1 -0
- package/dist/src/compiled/_chunks/workflow/sleep-CpXfoXLF.js +1 -0
- package/dist/src/evals/cli/eval.d.ts +0 -2
- package/dist/src/evals/cli/eval.js +1 -1
- package/dist/src/evals/define-eval.d.ts +1 -1
- package/dist/src/evals/define-eval.js +1 -1
- package/dist/src/evals/index.d.ts +1 -2
- package/dist/src/evals/index.js +1 -1
- package/dist/src/evals/runner/artifacts.js +1 -1
- package/dist/src/evals/runner/execute-eval.d.ts +2 -3
- package/dist/src/evals/runner/execute-eval.js +1 -1
- package/dist/src/evals/runner/execute-task.d.ts +1 -2
- package/dist/src/evals/runner/execute-task.js +1 -1
- package/dist/src/evals/runner/reporters/console.js +1 -1
- package/dist/src/evals/runner/reporters/junit.js +3 -4
- package/dist/src/evals/runner/run-evals.d.ts +0 -1
- package/dist/src/evals/runner/run-evals.js +1 -1
- package/dist/src/evals/target.d.ts +1 -6
- package/dist/src/evals/target.js +1 -1
- package/dist/src/evals/types.d.ts +2 -18
- package/dist/src/execution/node-step.js +1 -1
- package/dist/src/execution/sandbox/bindings/vercel.d.ts +2 -6
- package/dist/src/execution/sandbox/bindings/vercel.js +1 -1
- package/dist/src/internal/application/package.js +1 -1
- package/dist/src/internal/nitro/routes/agent-info/build-agent-info-response-from-manifest.js +1 -1
- package/dist/src/internal/nitro/routes/agent-info/build-agent-info-response.d.ts +0 -1
- package/dist/src/internal/nitro/routes/agent-info/build-agent-info-response.js +1 -1
- package/dist/src/runtime/agent/mock-model-adapter.d.ts +3 -6
- package/dist/src/runtime/agent/mock-model-adapter.js +2 -2
- package/dist/src/runtime/agent/resolve-model.d.ts +2 -2
- package/dist/src/runtime/agent/resolve-model.js +1 -1
- package/dist/src/runtime/sandbox/keys.js +1 -1
- package/dist/src/setup/primitives/pm/pnpm.js +1 -1
- package/dist/src/setup/scaffold/create/add-to-project.js +1 -1
- package/dist/src/setup/scaffold/create/project.js +2 -2
- package/dist/src/setup/scaffold/update/channels.js +1 -1
- package/package.json +6 -6
- package/dist/docs/public/reference/faqs.md +0 -48
- package/dist/src/compiled/_chunks/workflow/resume-hook-CEAS3opc.js +0 -12
- package/dist/src/compiled/_chunks/workflow/sleep-Cup6vPoA.js +0 -1
- package/dist/src/compiled/_chunks/workflow/symbols-BUTtwS7j.js +0 -48
- package/dist/src/evals/requirements.d.ts +0 -3
- package/dist/src/evals/requirements.js +0 -1
- /package/dist/docs/public/{advanced → guides}/dev-tui.md +0 -0
- /package/dist/docs/public/{frontend → guides/frontend}/meta.json +0 -0
- /package/dist/docs/public/{advanced → guides}/remote-agents.md +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{n as e}from"./chunk-DSjMdhoD.js";import{n as t,o as n,w as r}from"./dist-6a3viBXZ.js";import{$ as i,Bt as a,E as o,G as s,H as c,Mt as l,Q as u,Yt as d,a as f,et as p,kt as m,m as h,mn as g,n as _,r as v,rn as y,sn as b,y as x,z as S}from"./attribute-changes-DGVGRGfw.js";var C=e({getHookByToken:()=>T,resumeHook:()=>E,resumeWebhook:()=>D});async function w(e){let t=await y(),n=await t.hooks.getByToken(e),r=await t.runs.get(n.runId),i=await t.getEncryptionKeyForRun?.(r),a=i?await b(i):void 0;return n.metadata!==void 0&&(n.metadata=await o(n.metadata,n.runId,a)),{hook:n,run:r,encryptionKey:a}}async function T(e){let{hook:t}=await w(e);return t}async function E(e,t,n){return await v(()=>s(`hook.resume`,async r=>{let o=await y();try{let s,u,d;if(typeof e==`string`){let t=await w(e);s=t.hook,u=t.run,d=n??t.encryptionKey}else if(s=e,u=await o.runs.get(s.runId),n)d=n;else{let e=await o.getEncryptionKeyForRun?.(u);d=e?await b(e):void 0}r?.setAttributes({...p(s.token),...i(s.hookId),...l(s.runId)});let v=u.executionContext?.workflowCoreVersion,y=h(typeof v==`string`?v:void 0);y.supportedFormats.has(a.ENCRYPTED)||(d=void 0);let C=[],T=g(s.specVersion),E=await x(t,s.runId,d,C,globalThis,T,y.framedByteStreams);_(Promise.all(C),e=>{e!==void 0&&S.warn(`Background flush of hook payload ops failed`,{workflowRunId:s.runId,hookId:s.hookId,error:e instanceof Error?e.message:String(e)})}),await o.events.create(s.runId,{eventType:`hook_received`,specVersion:4,correlationId:s.hookId,eventData:{...T?{}:{token:s.token},payload:E}},{v1Compat:T}),r?.setAttributes({...m(u.workflowName)});let D=u.executionContext?.traceCarrier;if(D){let e=await c(D);e&&r?.addLink?.({context:e})}return await o.queue(f(u.workflowName),{runId:s.runId,traceCarrier:u.executionContext?.traceCarrier??void 0},{deploymentId:u.deploymentId,specVersion:u.specVersion??1}),s}catch(t){throw r?.setAttributes({...p(typeof e==`string`?e:e.token),...u(!1)}),t}}))}async function D(e,i){let{hook:a,encryptionKey:o}=await w(e);if(a.isWebhook===!1)throw new n(e);let s,c;if(a.metadata&&typeof a.metadata==`object`&&`respondWith`in a.metadata)if(a.metadata.respondWith===`manual`){let{readable:e,writable:t}=new TransformStream;c=e,i[d]=t}else if(a.metadata.respondWith instanceof Response)s=a.metadata.respondWith;else throw new r(`Invalid \`respondWith\` value: ${a.metadata.respondWith}`,{slug:t.WEBHOOK_INVALID_RESPOND_WITH_VALUE});else s=new Response(null,{status:202});if(await E(a,i,o),c){let e=c.getReader(),t=await e.read();t.value&&(s=t.value),e.cancel()}if(!s)throw new r(`Workflow run did not send a response`,{slug:t.WEBHOOK_RESPONSE_NOT_SENT});return s}export{C as i,E as n,D as r,T as t};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{C as e,S as t,T as n,b as r,m as i,r as a,w as o,x as s}from"./dist-6a3viBXZ.js";import{A as c,At as l,C as u,Ct as d,G as f,J as p,L as m,Mt as h,Nt as g,T as _,W as v,a as y,b,fn as x,k as S,kt as C,m as w,mn as T,n as E,nn as D,p as O,r as k,rn as A,s as j,sn as M,t as N,tn as P,z as F}from"./attribute-changes-DGVGRGfw.js";const I=x();async function L(e,t,n){"use step";return await k(()=>{let r=e?.workflowId;if(!r)throw new o(`'start' received an invalid workflow function. Ensure the Workflow SDK is configured correctly and the function includes a 'use workflow' directive.`,{slug:`start-invalid-workflow-function`});return f(`workflow.start ${r}`,async e=>{e?.setAttributes({...C(r),...l(`start`)});let i=[],s=n??{};Array.isArray(t)?i=t:typeof t==`object`&&(s=t),e?.setAttributes({...d(i.length)});let c=s?.world??await A(),u=await c.getDeploymentId(),f=s.deploymentId??u;if(f===`latest`){if(!c.resolveLatestDeploymentId)throw new o(`deploymentId 'latest' requires a World that implements resolveLatestDeploymentId()`);f=await c.resolveLatestDeploymentId()}let m;m=f===u?!0:typeof c.streams?.get==`function`?w((await j(c,`workflow`,{deploymentId:f,timeout:2e3}).catch(()=>void 0))?.workflowCoreVersion).framedByteStreams:!1;let _=[],x=`wrun_${I()}`,S=await v(),D=s.specVersion??c.specVersion??2,k=T(D),P;if(s.attributes&&Object.keys(s.attributes).length>0){if(D<4)throw new o(`Initial workflow attributes require a World that supports spec version 4 or later.`);for(let[e,t]of Object.entries(s.attributes))if(typeof t!=`string`)throw new o(`Initial workflow attribute ${JSON.stringify(e)} must be a string value.`);let e=N(s.attributes);P=Object.fromEntries(e.map(({key:e,value:t})=>[e,t]))}let L=await c.getEncryptionKeyForRun?.(x,{...s,deploymentId:f}),z=L?await M(L):void 0,B=await b(i,x,z,_,globalThis,k,m),V={traceCarrier:S,workflowCoreVersion:O,features:{encryption:!!z}},[H,U]=await Promise.allSettled([c.events.create(x,{eventType:`run_created`,specVersion:D,eventData:{deploymentId:f,workflowName:r,input:B,executionContext:V,...P?{attributes:P}:{}}},{v1Compat:k}),c.queue(y(r),{runId:x,traceCarrier:S,...D>=3?{runInput:{input:B,deploymentId:f,workflowName:r,specVersion:D,executionContext:V,...P?{attributes:P}:{}}}:{}},{deploymentId:f,specVersion:D})]);if(U.status===`rejected`)throw U.reason;let W=!1;if(H.status===`rejected`){let e=H.reason;if(!a.is(e))if(R(e))W=!0,F.warn(`Run creation event failed, but the run was accepted via the queue. The run_created event will be re-tried async by the runtime.`,{workflowRunId:x,error:e.message});else throw e}else{let e=H.value;if(!e.run)throw new o(`Missing 'run' in server response for 'run_created' event`);if(!k&&e.run.runId!==x)throw new o(`Server returned different runId than requested: expected ${x}, got ${e.run.runId}`)}return E(Promise.all(_),e=>{F.warn(`Background flush of workflow argument streams failed`,{workflowRunId:x,error:e instanceof Error?e.message:String(e)})}),e?.setAttributes({...h(x),...p(f),...H.status===`fulfilled`&&H.value.run?g(H.value.run.status):{}}),new K(x,{resilientStart:W})})})}function R(e){return!!(i.is(e)||n.is(e)&&e.status&&e.status>=500)}const z=e=>Array.isArray(e)?e:[e];async function B(e,t,n={}){try{let r=await e.runs.get(t,{resolveData:`all`}),i=await e.getEncryptionKeyForRun?.(r),a=i?await M(i):void 0,o=z(await S(r.input,t,a,globalThis)),s=n.specVersion??r.specVersion??1,c=n.deploymentId??r.deploymentId;return(await L({workflowId:r.workflowName},o,{deploymentId:c,world:e,specVersion:s})).runId}catch(e){throw Error(`Failed to recreate run from ${t}: ${e instanceof Error?e.message:String(e)}`,{cause:e})}}async function V(e,t){try{let n=(await e.runs.get(t,{resolveData:`none`})).specVersion??1,r=T(n),i={eventType:`run_cancelled`,specVersion:n};await e.events.create(t,i,{v1Compat:r})}catch(e){throw Error(`Failed to cancel run ${t}: ${e instanceof Error?e.message:String(e)}`,{cause:e})}}async function H(e,t){try{let n=await e.runs.get(t,{resolveData:`none`});await e.queue(y(n.workflowName),{runId:t},{deploymentId:n.deploymentId,specVersion:n.specVersion??1})}catch(e){throw Error(`Failed to re-enqueue run ${t}: ${e instanceof Error?e.message:String(e)}`,{cause:e})}}async function U(e,t,n){try{let r=await e.runs.get(t,{resolveData:`none`}),i=T(r.specVersion),o=[],s=null;do{let n=await e.events.list({runId:t,pagination:{limit:1e3,...s?{cursor:s}:{}},resolveData:`none`});o.push(...n.data),s=n.hasMore?n.cursor:null}while(s);let c=o.filter(e=>e.eventType===`wait_created`),l=new Set(o.filter(e=>e.eventType===`wait_completed`).map(e=>e.correlationId)),u=c.filter(e=>!l.has(e.correlationId));if(n?.correlationIds&&n.correlationIds.length>0){let e=new Set(n.correlationIds);u=u.filter(t=>t.correlationId&&e.has(t.correlationId))}let d=[],f=0;for(let n of u){if(!n.correlationId)continue;let o=i?{eventType:`wait_completed`,correlationId:n.correlationId}:{eventType:`wait_completed`,correlationId:n.correlationId,specVersion:r.specVersion,eventData:{resumeAt:n.eventData.resumeAt}};try{await e.events.create(t,o,{v1Compat:i}),f++}catch(e){a.is(e)?f++:d.push(e instanceof Error?e:Error(String(e)))}}if(f>0&&await e.queue(y(r.workflowName),{runId:t},{deploymentId:r.deploymentId,specVersion:r.specVersion??1}),d.length>0)throw AggregateError(d,`Failed to complete ${d.length}/${u.length} pending wait(s) for run ${t}`);return{stoppedCount:f}}catch(e){throw e instanceof AggregateError?e:Error(`Failed to wake up run ${t}: ${e instanceof Error?e.message:String(e)}`,{cause:e})}}async function W(e,t,n,r){try{return await e.streams.get(t,n,r?.startIndex)}catch(e){throw Error(`Failed to read stream ${n}: ${e instanceof Error?e.message:String(e)}`,{cause:e})}}async function G(e,t){try{return await e.streams.list(t)}catch(e){throw Error(`Failed to list streams for run ${t}: ${e instanceof Error?e.message:String(e)}`,{cause:e})}}var K=class n{static[D](e){return{runId:e.runId,resilientStart:e.#r}}static[P](e){return new n(e.runId,{resilientStart:e.resilientStart})}runId;#e;get#t(){return this.#e||=A(),this.#e}#n=null;#r=!1;constructor(e,t){this.runId=e,this.#r=t?.resilientStart??!1}#i(){return this.#n||=(async()=>{let e=await this.#t,t=await e.runs.get(this.runId),n=await e.getEncryptionKeyForRun?.(t);return n?await M(n):void 0})(),this.#n}#a(){return()=>this.#i()}async wakeUp(e){"use step";return U(await this.#t,this.runId,e)}async cancel(){"use step";await(await this.#t).events.create(this.runId,{eventType:`run_cancelled`,specVersion:4})}get exists(){"use step";return this.#t.then(t=>t.runs.get(this.runId,{resolveData:`none`}).then(()=>!0).catch(t=>{if(e.is(t))return!1;throw t}))}get status(){"use step";return this.#t.then(e=>e.runs.get(this.runId).then(e=>e.status))}get returnValue(){"use step";return this.#o()}get workflowName(){"use step";return this.#t.then(e=>e.runs.get(this.runId).then(e=>e.workflowName))}get createdAt(){"use step";return this.#t.then(e=>e.runs.get(this.runId).then(e=>e.createdAt))}get startedAt(){"use step";return this.#t.then(e=>e.runs.get(this.runId).then(e=>e.startedAt))}get completedAt(){"use step";return this.#t.then(e=>e.runs.get(this.runId).then(e=>e.completedAt))}get readable(){return this.getReadable()}getReadable(e={}){"use step";let{ops:t=[],global:n=globalThis,startIndex:r,namespace:i}=e,a=m(this.runId,i),o=this.#a(),s=u(n,t,this.runId,o).ReadableStream({name:a,startIndex:r}),c=this.#t,l=this.runId;return Object.assign(s,{getTailIndex:async()=>(await(await c).streams.getInfo(l,a)).tailIndex})}async#o(){let n=await this.#t,i=0,a=this.#r?3:0,o=[1e3,3e3,6e3];for(;;)try{let e=await n.runs.get(this.runId);if(e.status===`completed`){let t=await this.#i();return await c(e.output,this.runId,t)}if(e.status===`cancelled`)throw new r(this.runId);if(e.status===`failed`){let t=await this.#i(),n;try{n=await _(e.error,this.runId,t)}catch{n=Error(`Failed to hydrate workflow run error`)}throw new s(this.runId,n,{errorCode:e.errorCode})}throw new t(this.runId,e.status)}catch(n){if(t.is(n)){await new Promise(e=>setTimeout(e,1e3));continue}if(e.is(n)&&i<a){let e=o[i];i++,await new Promise(t=>setTimeout(t,e));continue}throw n}}};function q(e){return new K(e)}export{W as a,U as c,G as i,L as l,q as n,B as o,V as r,H as s,K as t};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{$t as e,Yn as t}from"./attribute-changes-DGVGRGfw.js";async function n(r){let i=globalThis[e];return i||t(`sleep()`,`https://workflow-sdk.dev/docs/api-reference/workflow/sleep`,n),i(r)}export{n as t};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{basename,join}from"node:path";import{readFile}from"node:fs/promises";import{resolveApplicationRoot}from"#internal/application/paths.js";import{loadDevelopmentEnvironmentFiles}from"#cli/dev/environment.js";import{startDevelopmentServer}from"#internal/nitro/host.js";import{
|
|
1
|
+
import{basename,join}from"node:path";import{readFile}from"node:fs/promises";import{resolveApplicationRoot}from"#internal/application/paths.js";import{loadDevelopmentEnvironmentFiles}from"#cli/dev/environment.js";import{startDevelopmentServer}from"#internal/nitro/host.js";import{createEvalClient}from"#evals/cli/eval-client.js";import{discoverAndImportEvals,discoverEvalConfig}from"#evals/runner/discover.js";import{runEvals}from"#evals/runner/run-evals.js";import{ConsoleReporter}from"#evals/runner/reporters/console.js";import{JUnit}from"#evals/runner/reporters/junit.js";import{resolveEvalTargetHandle}from"#evals/target.js";async function runEvalCommand(e,t,n){let s=resolveApplicationRoot();loadDevelopmentEnvironmentFiles(s);let c=e.length>0?e:void 0,l=await discoverAndImportEvals(s,c);if(l.length===0){c?n.error(`No evals found matching: ${c.join(`, `)}`):n.error(`No evals found. Create files under evals/ with the *.eval.ts extension.`),process.exitCode=2;return}let u=filterEvalsByTag(l,t.tag??[]);if(u.length===0){n.error(`No evals matched the provided tags (${(t.tag??[]).join(`, `)}).`),process.exitCode=2;return}let d,f;try{d=parsePositiveInteger(t.maxConcurrency,`--max-concurrency`),f=parseNonNegativeInteger(t.timeout,`--timeout`)}catch(e){n.error(e instanceof Error?e.message:String(e)),process.exitCode=2;return}if(t.list===!0){printEvalList(u,t.json===!0,n);return}let p;try{p=await discoverEvalConfig(s)}catch(e){n.error(e instanceof Error?e.message:String(e)),process.exitCode=2;return}let m,h;try{t.url?h=await resolveEvalTargetHandle({client:createEvalClient({kind:`remote`,url:t.url}),expectedAgentName:await readExpectedAgentName(s),kind:`remote`,url:t.url}):(m=await startDevelopmentServer(s,{host:`127.0.0.1`,port:0}),h=await resolveEvalTargetHandle({client:createEvalClient({kind:`local`,url:m.url}),expectedAgentName:await readExpectedAgentName(s),kind:`local`,url:m.url}));let e=createEvalClient(h),r=t.json===!0?[]:[new ConsoleReporter];t.junit!==void 0&&r.push(JUnit({filePath:t.junit}));let i=await runEvals({evaluations:u,config:p,target:h,client:e,appRoot:s,reporters:r,includeEvalReporters:t.skipReport!==!0,maxConcurrency:d,timeoutMs:f,onEvalLog:t.verbose===!0?(e,t)=>n.log(`[${e}] ${t}`):void 0});t.json&&n.log(JSON.stringify(i,null,2));let a=i.failed>0,o=t.strict===!0&&i.scored>0;(a||o)&&(process.exitCode=1)}finally{m&&await m.close()}let g=typeof process.exitCode==`number`?process.exitCode:0;process.exit(g)}function parsePositiveInteger(e,t){if(e===void 0)return;let n=Number(e);if(!Number.isInteger(n)||n<1)throw Error(`${t} must be a positive integer; got "${e}".`);return n}function parseNonNegativeInteger(e,t){if(e===void 0)return;let n=Number(e);if(!Number.isInteger(n)||n<0)throw Error(`${t} must be a non-negative integer; got "${e}".`);return n}function filterEvalsByTag(e,t){return t.length===0?[...e]:e.filter(e=>e.tags?.some(e=>t.includes(e))??!1)}function printEvalList(e,t,n){if(t){let t=e.map(e=>({id:e.id,description:e.description,tags:e.tags}));n.log(JSON.stringify(t,null,2));return}for(let t of e){let e=t.description===void 0?``:` — ${t.description}`,r=t.tags!==void 0&&t.tags.length>0?` [${t.tags.join(`, `)}]`:``;n.log(`${t.id}${r}${e}`)}}async function readExpectedAgentName(r){try{let i=JSON.parse(await readFile(join(r,`package.json`),`utf8`));return typeof i.name==`string`&&i.name.length>0?i.name:basename(r)}catch{return basename(r)}}export{runEvalCommand};
|
|
@@ -15,6 +15,6 @@ import type { EveEvalDefinition, EveEvalInput } from "#evals/types.js";
|
|
|
15
15
|
*
|
|
16
16
|
* Throws on invalid input: a missing `test` function, a removed legacy key
|
|
17
17
|
* (`input`/`run`/`checks`/`scores`/`expected`/`thresholds`/`parseOutput`/
|
|
18
|
-
* `model`), or a negative or non-finite `timeoutMs`.
|
|
18
|
+
* `model`/`requires`), or a negative or non-finite `timeoutMs`.
|
|
19
19
|
*/
|
|
20
20
|
export declare function defineEval(input: EveEvalInput): EveEvalDefinition;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
function defineEval(e){return validateEvalInput(e),{...e,_tag:`EveEval`}}function validateEvalInput(e){if(`id`in e)throw Error("Eval must not specify `id`. Eval identity is derived from the file path under evals/.");if(`name`in e)throw Error("Eval must not specify `name`. Eval identity is derived from the file path under evals/.");if(rejectLegacyKey(e,`input`,"Send the prompt inside `test`: `async test(t) { await t.send(...) }`."),rejectLegacyKey(e,`run`,"Rename `run` to `test`; it receives the same context `t`."),rejectLegacyKey(e,`checks`,"Assert inline inside `test` (e.g. `t.completed()`, `t.calledTool(...)`)."),rejectLegacyKey(e,`scores`,"Use soft assertions inside `test`: `t.check(...).atLeast(n)` or `t.judge.autoevals.*`."),rejectLegacyKey(e,`expected`,"Pass the reference value to the assertion (e.g. `t.check(t.reply, includes(value))`)."),rejectLegacyKey(e,`thresholds`,"Put the threshold on the assertion: `.atLeast(n)`."),rejectLegacyKey(e,`parseOutput`,"Read the value you want inside `test` and assert on it directly."),rejectLegacyKey(e,`model`,"Rename `model` to `judge: { model }`."),rejectLegacyKey(e,`modelOptions`,"Move it under `judge: { model, modelOptions }`."),rejectLegacyKey(e,`cases`,"Each eval file is one case; default-export an array of `defineEval(...)` for datasets."),typeof e.test!=`function`)throw Error("Eval requires a `test(t)` function.");if(
|
|
1
|
+
function defineEval(e){return validateEvalInput(e),{...e,_tag:`EveEval`}}function validateEvalInput(e){if(`id`in e)throw Error("Eval must not specify `id`. Eval identity is derived from the file path under evals/.");if(`name`in e)throw Error("Eval must not specify `name`. Eval identity is derived from the file path under evals/.");if(rejectLegacyKey(e,`input`,"Send the prompt inside `test`: `async test(t) { await t.send(...) }`."),rejectLegacyKey(e,`run`,"Rename `run` to `test`; it receives the same context `t`."),rejectLegacyKey(e,`checks`,"Assert inline inside `test` (e.g. `t.completed()`, `t.calledTool(...)`)."),rejectLegacyKey(e,`scores`,"Use soft assertions inside `test`: `t.check(...).atLeast(n)` or `t.judge.autoevals.*`."),rejectLegacyKey(e,`expected`,"Pass the reference value to the assertion (e.g. `t.check(t.reply, includes(value))`)."),rejectLegacyKey(e,`thresholds`,"Put the threshold on the assertion: `.atLeast(n)`."),rejectLegacyKey(e,`parseOutput`,"Read the value you want inside `test` and assert on it directly."),rejectLegacyKey(e,`model`,"Rename `model` to `judge: { model }`."),rejectLegacyKey(e,`modelOptions`,"Move it under `judge: { model, modelOptions }`."),rejectLegacyKey(e,`cases`,"Each eval file is one case; default-export an array of `defineEval(...)` for datasets."),rejectLegacyKey(e,`requires`,`Point real-model evals at credentialed targets directly; dev-only routes are enforced from the live target.`),typeof e.test!=`function`)throw Error("Eval requires a `test(t)` function.");if(e.timeoutMs!==void 0&&(e.timeoutMs<0||!Number.isFinite(e.timeoutMs)))throw Error("Eval `timeoutMs` must be a non-negative finite number.")}function rejectLegacyKey(e,t,n){if(t in e)throw Error(`Eval \`${t}\` is no longer supported. ${n}`)}export{defineEval};
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
export { defineEval } from "#evals/define-eval.js";
|
|
2
2
|
export { defineEvalConfig } from "#evals/define-eval-config.js";
|
|
3
3
|
export { EveEvalTurnFailedError } from "#evals/session.js";
|
|
4
|
-
export { EveEvalRequirementError } from "#evals/target.js";
|
|
5
4
|
export type { RuntimeIdentity } from "#protocol/message.js";
|
|
6
5
|
export type { InputRequest } from "#runtime/input/types.js";
|
|
7
6
|
export type { EveEvalValueMatcher, EveEvalToolCallMatchOptions, EveEvalSubagentCallMatchOptions, } from "#evals/match.js";
|
|
8
|
-
export type { Assertion, AssertionHandle, AssertionResult, AssertionSeverity, AutoevalsJudges, EveEvalContext, EveEvalDerivedFacts, EveEvalJudgeConfig, EveEvalRunSummary, EveEvalSession, EveEvalSessionResult,
|
|
7
|
+
export type { Assertion, AssertionHandle, AssertionResult, AssertionSeverity, AutoevalsJudges, EveEvalContext, EveEvalDerivedFacts, EveEvalJudgeConfig, EveEvalRunSummary, EveEvalSession, EveEvalSessionResult, EveEvalScheduleDispatchResult, EveEvalSubagentCall, EveEval, EveEvalConfig, EveEvalConfigInput, EveEvalDefinition, EveEvalInput, EveEvalResult, EveEvalTarget, EveEvalTargetCapabilities, EveEvalTargetHandle, EveEvalTaskResult, EveEvalToolCall, EveEvalTurn, EveEvalVerdict, JudgeContext, JudgeOpts, } from "#evals/types.js";
|
package/dist/src/evals/index.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
import{
|
|
1
|
+
import{EveEvalTurnFailedError}from"#evals/session.js";import{defineEval}from"#evals/define-eval.js";import{defineEvalConfig}from"#evals/define-eval-config.js";export{EveEvalTurnFailedError,defineEval,defineEvalConfig};
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
import{dirname,join}from"node:path";import{mkdir,writeFile}from"node:fs/promises";function resolveArtifactDirectory(e){return join(e,`.eve`,`evals`,new Date().toISOString().replace(/[:.]/g,`-`).slice(0,19))}async function writeArtifacts(t,n){let r=join(t,`evals`);await mkdir(r,{recursive:!0}),await writeFile(join(t,`summary.json`),JSON.stringify(buildSummaryArtifact(n),null,2));let i=n.results.map(e=>JSON.stringify(buildResultLine(e))).join(`
|
|
2
2
|
`);await writeFile(join(t,`results.jsonl`),`${i}\n`),await Promise.all(n.results.map(async t=>{let n=join(r,`${sanitizeArtifactPath(t.id)}.json`);await mkdir(dirname(n),{recursive:!0}),await writeFile(n,JSON.stringify(buildEvalArtifact(t),null,2));let i=t.result.events.map(e=>JSON.stringify(e)).join(`
|
|
3
|
-
`);await writeFile(join(r,`${sanitizeArtifactPath(t.id)}.events.ndjson`),`${i}\n`)}))}function buildSummaryArtifact(e){return{target:e.target,startedAt:e.startedAt,completedAt:e.completedAt,passed:e.passed,failed:e.failed,scored:e.scored,
|
|
3
|
+
`);await writeFile(join(r,`${sanitizeArtifactPath(t.id)}.events.ndjson`),`${i}\n`)}))}function buildSummaryArtifact(e){return{target:e.target,startedAt:e.startedAt,completedAt:e.completedAt,passed:e.passed,failed:e.failed,scored:e.scored,errored:e.errored,totalEvals:e.results.length,evals:e.results.map(e=>({id:e.id,verdict:e.verdict,status:e.result.status,assertions:e.assertions.map(e=>({name:e.name,score:e.score,severity:e.severity,passed:e.passed})),error:e.error}))}}function buildResultLine(e){return{id:e.id,verdict:e.verdict,status:e.result.status,output:e.result.output,assertions:e.assertions,error:e.error}}function buildEvalArtifact(e){return{id:e.id,result:{output:e.result.output,finalMessage:e.result.finalMessage,sessionId:e.result.sessionId,status:e.result.status,logs:e.result.logs,derived:e.result.derived,sessions:e.result.sessions},verdict:e.verdict,assertions:e.assertions,error:e.error}}function sanitizeArtifactPath(e){return e.split(`/`).map(e=>e.replace(/[^a-zA-Z0-9_-]/g,`_`)).join(`/`)}export{resolveArtifactDirectory,writeArtifacts};
|
|
@@ -5,7 +5,6 @@ import type { EveEval, EveEvalResult, EveEvalTargetHandle } from "#evals/types.j
|
|
|
5
5
|
*/
|
|
6
6
|
export interface ExecuteEvalOptions {
|
|
7
7
|
readonly evaluation: EveEval;
|
|
8
|
-
readonly failOnSkip?: boolean;
|
|
9
8
|
/** Receives `t.log` lines as the eval runs (used by `--verbose`). */
|
|
10
9
|
readonly onLog?: (message: string) => void;
|
|
11
10
|
readonly target: EveEvalTargetHandle;
|
|
@@ -19,7 +18,7 @@ export interface ExecuteEvalOptions {
|
|
|
19
18
|
readonly client: Client;
|
|
20
19
|
}
|
|
21
20
|
/**
|
|
22
|
-
* Executes one eval end to end:
|
|
23
|
-
*
|
|
21
|
+
* Executes one eval end to end: runs `test(t)`, collects its assertions, and
|
|
22
|
+
* computes the verdict.
|
|
24
23
|
*/
|
|
25
24
|
export declare function executeEval(options: ExecuteEvalOptions): Promise<EveEvalResult>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{toErrorMessage}from"#shared/errors.js";import{createEmptyDerivedFacts}from"#evals/runner/derive-run-facts.js";import{executeTask}from"#evals/runner/execute-task.js";import{
|
|
1
|
+
import{toErrorMessage}from"#shared/errors.js";import{createEmptyDerivedFacts}from"#evals/runner/derive-run-facts.js";import{executeTask}from"#evals/runner/execute-task.js";import{computeEvalVerdict}from"#evals/runner/verdict.js";async function executeEval(n){let{evaluation:r,target:i,client:a}=n,o=new Date().toISOString(),s,c=[],l;try{let e=await executeTask({client:a,evaluation:r,onLog:n.onLog,target:i,timeoutMs:n.timeoutMs??r.timeoutMs});s=e.result,c=e.assertions,l=e.error}catch(t){l=toErrorMessage(t),s={output:null,finalMessage:null,status:`failed`,events:[],derived:createEmptyDerivedFacts()}}let u=computeEvalVerdict({error:l,assertions:c});return{id:r.id,result:s,assertions:c,verdict:u,error:l,startedAt:o,completedAt:new Date().toISOString()}}export{executeEval};
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { Client } from "#client/client.js";
|
|
2
|
-
import type { AssertionResult, EveEval,
|
|
2
|
+
import type { AssertionResult, EveEval, EveEvalTargetHandle, EveEvalTaskResult } from "#evals/types.js";
|
|
3
3
|
/**
|
|
4
4
|
* Options for executing one eval's task.
|
|
5
5
|
*/
|
|
@@ -8,7 +8,6 @@ interface ExecuteTaskOptions {
|
|
|
8
8
|
readonly evaluation: EveEval;
|
|
9
9
|
/** Receives each `t.log` line as it is written (used by `--verbose`). */
|
|
10
10
|
readonly onLog?: (message: string) => void;
|
|
11
|
-
readonly requirements?: readonly EveEvalRequirement[];
|
|
12
11
|
readonly target: EveEvalTargetHandle;
|
|
13
12
|
readonly timeoutMs?: number;
|
|
14
13
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{toErrorMessage}from"#shared/errors.js";import{scopeEvalTargetHandle}from"#evals/target.js";import{EvalSessionManager}from"#evals/session.js";import{createEmptyDerivedFacts}from"#evals/runner/derive-run-facts.js";import{createEvalContext}from"#evals/context.js";async function executeTask(r){let{client:a,evaluation:o,target:s,timeoutMs:c}=r,l=c===void 0?neverAbortSignal():AbortSignal.timeout(c),u=new EvalSessionManager({client:a,signal:l}),d=scopeEvalTargetHandle(s,{
|
|
1
|
+
import{toErrorMessage}from"#shared/errors.js";import{scopeEvalTargetHandle}from"#evals/target.js";import{EvalSessionManager}from"#evals/session.js";import{createEmptyDerivedFacts}from"#evals/runner/derive-run-facts.js";import{createEvalContext}from"#evals/context.js";async function executeTask(r){let{client:a,evaluation:o,target:s,timeoutMs:c}=r,l=c===void 0?neverAbortSignal():AbortSignal.timeout(c),u=new EvalSessionManager({client:a,signal:l}),d=scopeEvalTargetHandle(s,{sessions:u}),f=[],{context:p,collector:m}=createEvalContext({manager:u,target:d,signal:l,judge:o.judge,log:e=>{f.push(e),r.onLog?.(e)}}),h;try{await o.test(p)}catch(t){h=toErrorMessage(t)}let g=buildTaskResult({logs:f,sessions:u.snapshots(),turn:u.lastTurnSession()?.lastTurn});return{result:g,assertions:await m.finalize(g),error:h}}function buildTaskResult(e){let t=e.sessions.flatMap(e=>e.events),n=e.turn?.message??null;return{output:n,finalMessage:n,sessionId:selectPrimarySessionId(e.sessions),status:e.turn?.status??`completed`,events:t,logs:e.logs,derived:combineDerivedFacts(e.sessions),sessions:e.sessions,runtimeIdentity:extractRuntimeIdentity(t)}}function combineDerivedFacts(e){if(e.length===0)return createEmptyDerivedFacts();let t=e.flatMap(e=>e.derived.toolCalls),n=e.flatMap(e=>e.derived.subagentCalls),i=e.flatMap(e=>e.derived.inputRequests),a=e.find(e=>e.derived.failureCode!==void 0)?.derived.failureCode;return{toolCalls:t,toolCallCount:t.length,subagentCalls:n,subagentCallCount:n.length,inputRequests:i,parked:e.some(e=>e.derived.parked),messageCount:sum(e,e=>e.derived.messageCount),reasoningBlockCount:sum(e,e=>e.derived.reasoningBlockCount),failureCode:a}}function selectPrimarySessionId(e){return e.find(e=>e.primary)?.sessionId??e[0]?.sessionId}function extractRuntimeIdentity(e){for(let t of e)if(t.type===`session.started`&&t.data.runtime!==void 0)return t.data.runtime}function sum(e,t){return e.reduce((e,n)=>e+t(n),0)}function neverAbortSignal(){return new AbortController().signal}export{executeTask};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import picocolors from"#compiled/picocolors/index.js";var ConsoleReporter=class{#e;#t;constructor(t){this.#e=t?.log??console.log,this.#t=picocolors.createColors(t?.color??!!process.stdout.isTTY)}onRunStart(e,t){this.#e(``),this.#e(`${this.#t.bold(this.#t.cyan(`EVALS`))} ${this.#t.bold(String(e.length))}`),this.#e(`${this.#t.dim(`target`)} ${t.kind===`local`?this.#t.green(t.url):this.#t.blue(t.url)}`),this.#e(``)}onEvalComplete(e){let{assertions:t,verdict:n,error:r}=e,i=t.filter(e=>e.severity===`gate`),a=t.filter(e=>e.severity===`soft`),o=this.#n(n),s=i.length>0?this.#r(i.filter(e=>e.passed).length,i.length):``,c=a.map(e=>this.#i(e.name,e.score)).join(` `),l=[o,this.#t.dim(e.id),s,c].filter(Boolean).join(` `);this.#e(l)
|
|
1
|
+
import picocolors from"#compiled/picocolors/index.js";var ConsoleReporter=class{#e;#t;constructor(t){this.#e=t?.log??console.log,this.#t=picocolors.createColors(t?.color??!!process.stdout.isTTY)}onRunStart(e,t){this.#e(``),this.#e(`${this.#t.bold(this.#t.cyan(`EVALS`))} ${this.#t.bold(String(e.length))}`),this.#e(`${this.#t.dim(`target`)} ${t.kind===`local`?this.#t.green(t.url):this.#t.blue(t.url)}`),this.#e(``)}onEvalComplete(e){let{assertions:t,verdict:n,error:r}=e,i=t.filter(e=>e.severity===`gate`),a=t.filter(e=>e.severity===`soft`),o=this.#n(n),s=i.length>0?this.#r(i.filter(e=>e.passed).length,i.length):``,c=a.map(e=>this.#i(e.name,e.score)).join(` `),l=[o,this.#t.dim(e.id),s,c].filter(Boolean).join(` `);this.#e(l);for(let e of t){if(e.passed)continue;let t=e.message===void 0?``:`: ${e.message}`;this.#e(` ${this.#t.red(`✗ ${e.name}${t}`)}`)}r&&this.#e(` ${this.#t.red(r)}`)}onRunComplete(e){this.#e(``);let{passed:t,failed:n,scored:r,results:i}=e,a=i.length,o=[];t>0&&o.push(this.#t.green(`${t} passed`)),n>0&&o.push(this.#t.red(`${n} failed`)),r>0&&o.push(this.#t.yellow(`${r} scored`)),o.length===0&&o.push(this.#t.dim(`0 evals`)),this.#e(`${this.#t.bold(`Results:`)} ${o.join(`, `)} ${this.#t.dim(`(${a} total)`)}`);let s=this.#a(i);if(s.total>0){let e=this.#t.green(`${s.passed} passed`),t=s.failed>0?`, ${this.#t.red(`${s.failed} failed`)}`:``;this.#e(`${this.#t.bold(`Gates:`)} ${e}${t}`)}let c=this.#o(i);if(c.length>0){this.#e(``);for(let{name:e,avg:t,count:n}of c){let r=this.#i(e,t);this.#e(` ${r} ${this.#t.dim(`(${n} evals)`)}`)}}let l=computeDurationMs(e.startedAt,e.completedAt);this.#e(``),this.#e(this.#t.dim(`Completed in ${formatDuration(l)}`)),this.#e(``)}#n(e){switch(e){case`passed`:return this.#t.green(`✓`);case`failed`:return this.#t.red(`✗`);case`scored`:return this.#t.yellow(`○`)}}#r(e,t){let n=`gates ${e}/${t}`;return e===t?this.#t.green(n):this.#t.red(n)}#i(e,t){let n=`${e}: ${Math.round(t*100)}%`;return t===1?this.#t.green(n):t===0?this.#t.red(n):this.#t.yellow(n)}#a(e){let t=0,n=0;for(let r of e)for(let e of gatesOf(r))e.passed?t+=1:n+=1;return{passed:t,failed:n,total:t+n}}#o(e){let t=new Map;for(let n of e)for(let e of n.assertions){if(e.severity!==`soft`)continue;let n=t.get(e.name);n?(n.sum+=e.score,n.count+=1):t.set(e.name,{sum:e.score,count:1})}return[...t.entries()].map(([e,{sum:t,count:n}])=>({name:e,avg:t/n,count:n}))}};function gatesOf(e){return e.assertions.filter(e=>e.severity===`gate`)}function computeDurationMs(e,t){return new Date(t).getTime()-new Date(e).getTime()}function formatDuration(e){return e<1e3?`${e}ms`:e<6e4?`${(e/1e3).toFixed(1)}s`:`${Math.floor(e/6e4)}m ${(e%6e4/1e3).toFixed(0)}s`}export{ConsoleReporter};
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import{dirname}from"node:path";import{mkdir,writeFile}from"node:fs/promises";function JUnit(e){return new JUnitReporter(e)}var JUnitReporter=class{#e;constructor(e){this.#e=e}onRunStart(){}onEvalComplete(){}async onRunComplete(r){let i=renderJUnit(r,{suiteName:this.#e.suiteName});await mkdir(dirname(this.#e.filePath),{recursive:!0}),await writeFile(this.#e.filePath,i)}};function renderJUnit(e,t){let n=e.failed+e.scored,r=e.results.map(renderTestCase);return[`<?xml version="1.0" encoding="UTF-8"?>`,`<testsuite name="${escapeXml(t.suiteName??`eve evals`)}" tests="${e.results.length}" failures="${n}" skipped="
|
|
2
|
-
`)}function renderTestCase(e){let t=`classname="eve.eval" name="${escapeXml(e.id)}" time="${formatSeconds(durationSeconds(e))}"`;if(e.verdict===`passed`)return` <testcase ${t}/>`;
|
|
3
|
-
`);let
|
|
4
|
-
`)}function buildFailureDetail(e){return{verdict:e.verdict,error:e.error,assertions:e.assertions,logs:e.result.logs}}function failureMessage(e){if(e.error!==void 0)return e.error;let t=e.assertions.find(e=>!e.passed);return t===void 0?e.verdict===`scored`?`score below threshold`:e.skipReason??e.verdict:t.message===void 0?t.name:`${t.name}: ${t.message}`}function durationSeconds(e){let t=new Date(e.completedAt).getTime()-new Date(e.startedAt).getTime();return Math.max(0,t/1e3)}function formatSeconds(e){return e.toFixed(3)}function escapeXml(e){return e.replace(/&/g,`&`).replace(/"/g,`"`).replace(/'/g,`'`).replace(/</g,`<`).replace(/>/g,`>`)}export{JUnit};
|
|
1
|
+
import{dirname}from"node:path";import{mkdir,writeFile}from"node:fs/promises";function JUnit(e){return new JUnitReporter(e)}var JUnitReporter=class{#e;constructor(e){this.#e=e}onRunStart(){}onEvalComplete(){}async onRunComplete(r){let i=renderJUnit(r,{suiteName:this.#e.suiteName});await mkdir(dirname(this.#e.filePath),{recursive:!0}),await writeFile(this.#e.filePath,i)}};function renderJUnit(e,t){let n=e.failed+e.scored,r=e.results.map(renderTestCase);return[`<?xml version="1.0" encoding="UTF-8"?>`,`<testsuite name="${escapeXml(t.suiteName??`eve evals`)}" tests="${e.results.length}" failures="${n}" skipped="0" time="${formatSeconds(durationSeconds(e))}">`,...r,`</testsuite>`,``].join(`
|
|
2
|
+
`)}function renderTestCase(e){let t=`classname="eve.eval" name="${escapeXml(e.id)}" time="${formatSeconds(durationSeconds(e))}"`;if(e.verdict===`passed`)return` <testcase ${t}/>`;let n=failureMessage(e);return[` <testcase ${t}>`,` <failure message="${escapeXml(n)}">${escapeXml(JSON.stringify(buildFailureDetail(e),null,2))}</failure>`,` </testcase>`].join(`
|
|
3
|
+
`)}function buildFailureDetail(e){return{verdict:e.verdict,error:e.error,assertions:e.assertions,logs:e.result.logs}}function failureMessage(e){if(e.error!==void 0)return e.error;let t=e.assertions.find(e=>!e.passed);return t===void 0?e.verdict===`scored`?`score below threshold`:e.verdict:t.message===void 0?t.name:`${t.name}: ${t.message}`}function durationSeconds(e){let t=new Date(e.completedAt).getTime()-new Date(e.startedAt).getTime();return Math.max(0,t/1e3)}function formatSeconds(e){return e.toFixed(3)}function escapeXml(e){return e.replace(/&/g,`&`).replace(/"/g,`"`).replace(/'/g,`'`).replace(/</g,`<`).replace(/>/g,`>`)}export{JUnit};
|
|
@@ -15,7 +15,6 @@ export interface RunEvalsOptions {
|
|
|
15
15
|
readonly reporters: readonly EvalReporter[];
|
|
16
16
|
/** When false, eval-defined and config `reporters` are ignored (CLI `--skip-report`). */
|
|
17
17
|
readonly includeEvalReporters?: boolean;
|
|
18
|
-
readonly failOnSkip?: boolean;
|
|
19
18
|
/**
|
|
20
19
|
* Maximum number of evals executing at once. Must be a positive integer.
|
|
21
20
|
* Overrides the config `maxConcurrency`; defaults to 8 when neither is set.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{resolveArtifactDirectory,writeArtifacts}from"#evals/runner/artifacts.js";import{executeEval}from"#evals/runner/execute-eval.js";async function runEvals(r){let{config:i,target:a,client:o,appRoot:s}=r,c=r.maxConcurrency??i.maxConcurrency??8;if(!Number.isInteger(c)||c<1)throw Error(`Eval maxConcurrency must be a positive integer; got ${String(r.maxConcurrency??i.maxConcurrency)}.`);let l=r.evaluations.map(e=>applyConfigDefaults(e,i)),u=new Date().toISOString(),d=buildReporterBindings({...r,evaluations:l});for(let e of d)await e.reporter.onRunStart(l.filter(t=>e.evalIds.has(t.id)),a);let f=[],p=[...l],m=new Set,h=Promise.resolve();for(;p.length>0||m.size>0;){for(;p.length>0&&m.size<c;){let e=p.shift();if(e===void 0)break;let t=(async()=>{let t=await executeEval({client:o,evaluation:e,
|
|
1
|
+
import{resolveArtifactDirectory,writeArtifacts}from"#evals/runner/artifacts.js";import{executeEval}from"#evals/runner/execute-eval.js";async function runEvals(r){let{config:i,target:a,client:o,appRoot:s}=r,c=r.maxConcurrency??i.maxConcurrency??8;if(!Number.isInteger(c)||c<1)throw Error(`Eval maxConcurrency must be a positive integer; got ${String(r.maxConcurrency??i.maxConcurrency)}.`);let l=r.evaluations.map(e=>applyConfigDefaults(e,i)),u=new Date().toISOString(),d=buildReporterBindings({...r,evaluations:l});for(let e of d)await e.reporter.onRunStart(l.filter(t=>e.evalIds.has(t.id)),a);let f=[],p=[...l],m=new Set,h=Promise.resolve();for(;p.length>0||m.size>0;){for(;p.length>0&&m.size<c;){let e=p.shift();if(e===void 0)break;let t=(async()=>{let t=await executeEval({client:o,evaluation:e,onLog:r.onEvalLog===void 0?void 0:t=>r.onEvalLog?.(e.id,t),target:a,timeoutMs:r.timeoutMs});f.push(t),h=h.then(async()=>{for(let e of d)e.evalIds.has(t.id)&&await e.reporter.onEvalComplete(t)})})().finally(()=>{m.delete(t)});m.add(t)}m.size>0&&await Promise.race(m)}await h;let g=new Map(l.map((e,t)=>[e.id,t]));f.sort((e,t)=>(g.get(e.id)??0)-(g.get(t.id)??0));let _=buildSummary(a,f,u);await writeArtifacts(resolveArtifactDirectory(s),_);for(let e of d)await e.reporter.onRunComplete(scopeSummary(_,e.evalIds));return _}function buildReporterBindings(e){let t=new Set(e.evaluations.map(e=>e.id)),n=new Set(e.reporters);if(e.includeEvalReporters!==!1)for(let t of e.config.reporters??[])n.add(t);let r=[...n].map(e=>({reporter:e,evalIds:t}));if(e.includeEvalReporters===!1)return r;let i=new Map;for(let t of e.evaluations)for(let e of t.reporters??[]){if(n.has(e))continue;let r=i.get(e)??new Set;r.add(t.id),i.set(e,r)}for(let[e,t]of i)r.push({reporter:e,evalIds:t});return r}function applyConfigDefaults(e,t){return e.judge!==void 0||t.judge===void 0?e:{...e,judge:t.judge}}function buildSummary(e,t,n){return{target:e,results:t,startedAt:n,completedAt:new Date().toISOString(),passed:countVerdicts(t,`passed`),failed:countVerdicts(t,`failed`),scored:countVerdicts(t,`scored`),errored:t.filter(e=>e.error!==void 0).length}}function scopeSummary(e,t){if(e.results.every(e=>t.has(e.id)))return e;let n=e.results.filter(e=>t.has(e.id));return{...e,results:n,passed:countVerdicts(n,`passed`),failed:countVerdicts(n,`failed`),scored:countVerdicts(n,`scored`),errored:n.filter(e=>e.error!==void 0).length}}function countVerdicts(e,t){return e.filter(e=>e.verdict===t).length}export{runEvals};
|
|
@@ -1,10 +1,6 @@
|
|
|
1
1
|
import { Client } from "#client/client.js";
|
|
2
2
|
import { EvalSessionManager } from "#evals/session.js";
|
|
3
|
-
import type {
|
|
4
|
-
export declare class EveEvalRequirementError extends Error {
|
|
5
|
-
readonly requirement: EveEvalRequirement;
|
|
6
|
-
constructor(requirement: EveEvalRequirement, message: string);
|
|
7
|
-
}
|
|
3
|
+
import type { EveEvalTargetCapabilities, EveEvalTargetHandle } from "#evals/types.js";
|
|
8
4
|
export declare function resolveEvalTargetHandle(input: {
|
|
9
5
|
readonly client: Client;
|
|
10
6
|
readonly expectedAgentName?: string;
|
|
@@ -18,6 +14,5 @@ export declare function createEvalTargetHandle(input: {
|
|
|
18
14
|
readonly url: string;
|
|
19
15
|
}): EveEvalTargetHandle;
|
|
20
16
|
export declare function scopeEvalTargetHandle(target: EveEvalTargetHandle, input: {
|
|
21
|
-
readonly requirements: readonly EveEvalRequirement[];
|
|
22
17
|
readonly sessions?: EvalSessionManager;
|
|
23
18
|
}): EveEvalTargetHandle;
|
package/dist/src/evals/target.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
import{createEveDevDispatchSchedulePath}from"#protocol/routes.js";import{toErrorMessage}from"#shared/errors.js";import"#client/client.js";import{EvalSessionManager}from"#evals/session.js";import{setTimeout}from"node:timers/promises";
|
|
1
|
+
import{createEveDevDispatchSchedulePath}from"#protocol/routes.js";import{toErrorMessage}from"#shared/errors.js";import"#client/client.js";import{EvalSessionManager}from"#evals/session.js";import{setTimeout}from"node:timers/promises";async function resolveEvalTargetHandle(e){await waitForTargetHealth(e.client,e.url);let t=await e.client.info();if(assertAgentInfoShape(t,e.url),e.expectedAgentName!==void 0&&t.agent.name!==e.expectedAgentName)throw Error(`Expected eval target ${JSON.stringify(e.expectedAgentName)} at ${e.url}, but ${JSON.stringify(t.agent.name)} is responding there.`);return createEvalTargetHandle({capabilities:capabilitiesFromInfo(t),client:e.client,kind:e.kind,url:e.url})}function createEvalTargetHandle(e){return createHandle({capabilities:e.capabilities,client:e.client,kind:e.kind,sessions:void 0,url:e.url})}function scopeEvalTargetHandle(e,t){return createHandle({capabilities:e.capabilities,client:void 0,delegate:e,kind:e.kind,sessions:t.sessions,url:e.url})}function createHandle(t){let r=t.delegate,i=t.client,fetchTarget=async(e,t)=>{if(r!==void 0)return await r.fetch(e,t);if(i===void 0)throw Error(`Eval target cannot fetch without a client.`);return await i.fetch(e,t)};return{capabilities:t.capabilities,kind:t.kind,url:t.url,async attachSession(e,a){if(t.sessions!==void 0)return await t.sessions.attachSession(e,a);if(r!==void 0)return await r.attachSession(e,a);if(i===void 0)throw Error(`Eval target cannot attach sessions without a client.`);return await new EvalSessionManager({client:i}).attachSession(e,a)},async dispatchSchedule(n){if(!t.capabilities.devRoutes)throw Error(`target.dispatchSchedule() requires a target with dev routes enabled.`);let r=await fetchTarget(createEveDevDispatchSchedulePath(n),{method:`POST`});if(!r.ok){let e=await readResponseBodySafely(r);throw Error(`Schedule dispatch failed: ${r.status} ${r.statusText}`+(e.length>0?`, ${e}`:``))}return parseScheduleDispatchResult(await r.json())},async fetch(e,t){return await fetchTarget(e,t)}}}function capabilitiesFromInfo(e){return{devRoutes:e.capabilities?.devRoutes??e.mode===`development`}}async function waitForTargetHealth(e,n){let i=Date.now()+6e4,a;for(;Date.now()<i;)try{await e.health();return}catch(e){a=toErrorMessage(e),await setTimeout(250)}throw Error(`Timed out waiting for eval target health at ${n}.`+(a===void 0?``:` Last error: ${a}`))}function assertAgentInfoShape(e,t){if(e.kind!==`eve-agent-info`||e.version!==1)throw Error(`Eval target ${t} returned an unrecognized /eve/v1/info payload.`)}function parseScheduleDispatchResult(e){if(typeof e!=`object`||!e||!(`scheduleId`in e)||typeof e.scheduleId!=`string`||!(`sessionIds`in e)||!Array.isArray(e.sessionIds)||e.sessionIds.some(e=>typeof e!=`string`))throw Error(`Schedule dispatch returned an unexpected response shape: ${JSON.stringify(e)}`);return{scheduleId:e.scheduleId,sessionIds:[...e.sessionIds]}}async function readResponseBodySafely(e){try{return(await e.text()).trim()}catch{return``}}export{createEvalTargetHandle,resolveEvalTargetHandle,scopeEvalTargetHandle};
|
|
@@ -7,11 +7,6 @@ import type { JsonObject } from "#shared/json.js";
|
|
|
7
7
|
import type { AgentModelOptionsDefinition } from "#shared/agent-definition.js";
|
|
8
8
|
import type { EvalReporter } from "#evals/runner/reporters/types.js";
|
|
9
9
|
import type { EveEvalSubagentCallMatchOptions, EveEvalToolCallMatchOptions } from "#evals/match.js";
|
|
10
|
-
/**
|
|
11
|
-
* Assumptions an eval needs the runner to verify against the live target
|
|
12
|
-
* or eval process environment before executing it.
|
|
13
|
-
*/
|
|
14
|
-
export type EveEvalRequirement = "mockModels" | "devRoutes" | `env:${string}`;
|
|
15
10
|
/**
|
|
16
11
|
* One tool call extracted from the captured stream, pairing the
|
|
17
12
|
* `actions.requested` request with its matching `action.result`.
|
|
@@ -291,7 +286,6 @@ export interface EveEvalTarget {
|
|
|
291
286
|
}
|
|
292
287
|
export interface EveEvalTargetCapabilities {
|
|
293
288
|
readonly devRoutes: boolean;
|
|
294
|
-
readonly mockModels: boolean;
|
|
295
289
|
}
|
|
296
290
|
export interface EveEvalScheduleDispatchResult {
|
|
297
291
|
readonly scheduleId: string;
|
|
@@ -301,7 +295,7 @@ export interface EveEvalScheduleDispatchResult {
|
|
|
301
295
|
* Live target handle exposed to eval runs.
|
|
302
296
|
*/
|
|
303
297
|
export interface EveEvalTargetHandle extends EveEvalTarget {
|
|
304
|
-
/** Dispatch a dev-only authored schedule. Requires
|
|
298
|
+
/** Dispatch a dev-only authored schedule. Requires a target with dev routes enabled. */
|
|
305
299
|
dispatchSchedule(scheduleId: string): Promise<EveEvalScheduleDispatchResult>;
|
|
306
300
|
/** Authenticated fetch against the target base URL. */
|
|
307
301
|
fetch(path: string, init?: RequestInit): Promise<Response>;
|
|
@@ -318,11 +312,6 @@ export interface EveEvalTargetHandle extends EveEvalTarget {
|
|
|
318
312
|
*/
|
|
319
313
|
interface EveEvalBase {
|
|
320
314
|
readonly description?: string;
|
|
321
|
-
/**
|
|
322
|
-
* Target/process assumptions verified before execution. The eval is
|
|
323
|
-
* skipped when any requirement is unmet.
|
|
324
|
-
*/
|
|
325
|
-
readonly requires?: readonly EveEvalRequirement[];
|
|
326
315
|
/**
|
|
327
316
|
* Judge model for this eval's `t.judge.*` assertions. Optional: when
|
|
328
317
|
* omitted, judge assertions fall back to the `judge` declared in
|
|
@@ -379,9 +368,8 @@ export type EveEval = EveEvalDefinition & {
|
|
|
379
368
|
* - `"passed"` — no execution error, every gate held, every soft threshold met
|
|
380
369
|
* - `"failed"` — a gate assertion failed or execution errored (timeout, transport, thrown task)
|
|
381
370
|
* - `"scored"` — every gate held but a soft assertion fell below its threshold
|
|
382
|
-
* - `"skipped"` — the eval was not executed (unmet `requires` entries)
|
|
383
371
|
*/
|
|
384
|
-
export type EveEvalVerdict = "passed" | "failed" | "scored"
|
|
372
|
+
export type EveEvalVerdict = "passed" | "failed" | "scored";
|
|
385
373
|
/**
|
|
386
374
|
* Result of executing and asserting one eval.
|
|
387
375
|
*
|
|
@@ -396,8 +384,6 @@ export interface EveEvalResult {
|
|
|
396
384
|
/** Per-eval verdict; see {@link EveEvalVerdict}. */
|
|
397
385
|
readonly verdict: EveEvalVerdict;
|
|
398
386
|
readonly error?: string;
|
|
399
|
-
/** Why the eval was skipped, when `verdict` is `"skipped"`. */
|
|
400
|
-
readonly skipReason?: string;
|
|
401
387
|
readonly startedAt: string;
|
|
402
388
|
readonly completedAt: string;
|
|
403
389
|
}
|
|
@@ -415,8 +401,6 @@ export interface EveEvalRunSummary {
|
|
|
415
401
|
readonly failed: number;
|
|
416
402
|
/** Evals with verdict `"scored"` (below-threshold soft assertions only). */
|
|
417
403
|
readonly scored: number;
|
|
418
|
-
/** Evals with verdict `"skipped"`. */
|
|
419
|
-
readonly skipped: number;
|
|
420
404
|
/** The execution-error subset of `failed` (timeouts, connection failures, exceptions). */
|
|
421
405
|
readonly errored: number;
|
|
422
406
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{createLogger}from"#internal/logging.js";import{resolveInstalledPackageInfo}from"#internal/application/package.js";import{jsonSchema}from"ai";import{
|
|
1
|
+
import{createLogger}from"#internal/logging.js";import{resolveInstalledPackageInfo}from"#internal/application/package.js";import{jsonSchema}from"ai";import{createToolLoopHarness}from"#harness/tool-loop.js";import{resolveCodeModeEnabled}from"#shared/code-mode.js";import{resolveRuntimeModelReference}from"#runtime/agent/resolve-model.js";import{findRegisteredRuntimeTool}from"#runtime/tools/registry.js";import{SUBAGENT_TOOL_INPUT_SCHEMA}from"#runtime/subagents/registry.js";import{preserveFrameworkStateOnCompaction}from"#execution/compaction.js";import{buildUnauthorizedToolContext,createAuthorizedToolExecute}from"#execution/tool-auth.js";const log=createLogger(`execution.node-step`);function createExecutionNodeStep(e){let t=createRuntimeModelResolver(e.compiledArtifactsSource),n=createNodeHarnessTools({node:e.node});return createToolLoopHarness({capabilities:e.capabilities,codeMode:resolveCodeModeEnabled(e.node.agent.config?.experimental?.codeMode),workflow:e.node.agent.workflowEnabled===!0,handleEvent:e.handleEvent,mode:e.mode,onCompaction:preserveFrameworkStateOnCompaction,resolveModel:t,runtimeIdentity:buildRuntimeIdentity(e.node),tools:n})}function buildRuntimeIdentity(e){let n=resolveInstalledPackageInfo(),r={agentId:e.turnAgent.id,agentName:e.agent.config?.name,eveVersion:n.version,modelId:e.turnAgent.model.id},i=process.env.VERCEL_GIT_COMMIT_SHA?.trim(),a=process.env.VERCEL_GIT_COMMIT_REF?.trim(),o=process.env.VERCEL_DEPLOYMENT_CREATED_AT?.trim();return i||a||o?{...r,build:{deployedAt:o||void 0,gitBranch:a||void 0,gitSha:i||void 0}}:r}function createRuntimeModelResolver(e){return t=>resolveRuntimeModelReference(t,{compiledArtifactsSource:e})}function createNodeHarnessTools(e){let t=new Map;for(let n of e.node.turnAgent.tools){let r=resolveHarnessToolDefinition({node:e.node,tool:n});r!==null&&t.set(n.name,r)}return t.has(`agent`)||t.set(`agent`,{description:`Launch a new agent to handle a complex, multi-step subtask.`,inputSchema:jsonSchema(SUBAGENT_TOOL_INPUT_SCHEMA),name:`agent`,runtimeAction:{kind:`subagent-call`,nodeId:e.node.nodeId,subagentName:`agent`}}),t}function resolveHarnessToolDefinition(e){if(e.tool.kind===`subagent`)return{description:e.tool.description??``,inputSchema:jsonSchema(e.tool.inputSchema??{}),name:e.tool.name,outputSchema:e.tool.outputSchema===void 0?void 0:jsonSchema(e.tool.outputSchema),runtimeAction:{kind:`subagent-call`,nodeId:e.tool.nodeId,subagentName:e.tool.name}};if(e.tool.kind===`remote`)return{description:e.tool.description??``,inputSchema:jsonSchema(e.tool.inputSchema??{}),name:e.tool.name,outputSchema:e.tool.outputSchema===void 0?void 0:jsonSchema(e.tool.outputSchema),runtimeAction:{kind:`remote-agent-call`,nodeId:e.tool.nodeId,remoteAgentName:e.tool.name,subagentName:e.tool.name}};let t=findRegisteredRuntimeTool(e.node.toolRegistry,e.tool.name);if(t===null)return log.warn(`declared tool is not registered — omitting from toolset`,{toolName:e.tool.name,nodeId:e.node.nodeId}),null;let r=t.definition,i=r.sourceId.startsWith(`eve:`),a=r.execute;return{approvalKey:r.approvalKey,description:r.description,execute:resolveAuthoredExecute({auth:r.auth,isFrameworkTool:i,rawExecute:a,scope:r.name}),inputSchema:r.inputStandardSchema??jsonSchema(r.inputSchema??{}),name:r.name,needsApproval:r.needsApproval,outputSchema:r.outputStandardSchema??maybeJsonSchema(r.outputSchema),toModelOutput:r.toModelOutput}}function resolveAuthoredExecute(e){let{auth:t,isFrameworkTool:n,rawExecute:r,scope:i}=e;if(r===void 0)return;if(n)return r;let a=r;return t===void 0?e=>a(e,buildUnauthorizedToolContext(i)):createAuthorizedToolExecute({auth:t,execute:a,scope:i})}function maybeJsonSchema(e){return e===void 0?void 0:jsonSchema(e)}export{createExecutionNodeStep,createNodeHarnessTools};
|
|
@@ -1,19 +1,15 @@
|
|
|
1
1
|
import type * as VercelSandboxSdk from "#compiled/@vercel/sandbox/index.js";
|
|
2
|
-
import type {
|
|
2
|
+
import type { SandboxCreateOptions } from "#compiled/@vercel/sandbox/index.js";
|
|
3
3
|
import type { SandboxBackend } from "#public/definitions/sandbox-backend.js";
|
|
4
4
|
import type { VercelSandboxBootstrapUseOptions, VercelSandboxSessionUseOptions } from "#public/sandbox/vercel-sandbox.js";
|
|
5
5
|
type VercelSandboxModule = typeof VercelSandboxSdk;
|
|
6
|
-
/**
|
|
7
|
-
* User-controllable subset of `Sandbox.create` parameters.
|
|
8
|
-
*/
|
|
9
|
-
export type VercelSandboxCreateOptions = Omit<NonNullable<Parameters<typeof SdkSandbox.create>[0]>, "name" | "onResume" | "persistent" | "signal">;
|
|
10
6
|
/**
|
|
11
7
|
* Construction input for {@link createVercelSandboxBackend}. Internal —
|
|
12
8
|
* the public surface is the `vercelBackend()` factory under
|
|
13
9
|
* `eve/sandbox`.
|
|
14
10
|
*/
|
|
15
11
|
export interface CreateVercelSandboxBackendInput {
|
|
16
|
-
readonly createOptions?:
|
|
12
|
+
readonly createOptions?: SandboxCreateOptions;
|
|
17
13
|
readonly loadSandboxModule?: () => Promise<VercelSandboxModule>;
|
|
18
14
|
}
|
|
19
15
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{SandboxTemplateNotProvisionedError}from"#public/definitions/sandbox-backend.js";import{WORKSPACE_ROOT}from"#runtime/workspace/types.js";import{buildSandboxSession}from"#execution/sandbox/session.js";import{streamToBuffer}from"#execution/sandbox/stream-utils.js";function createVercelSandboxBackend(e={}){let t=e.loadSandboxModule??(async()=>await import(`#compiled/@vercel/sandbox/index.js`)),n={timeout:DEFAULT_SANDBOX_TIMEOUT_MS,...e.createOptions},r=new Map;return{name:`vercel`,async create(e){let i=resolveVercelSandboxTags(n.tags,e.tags),a=e.templateKey===null?null:await readTemplateForCreate({loadSandboxModule:t,prewarmedTemplates:r,templateKey:e.templateKey}),o;try{o=await ensureSession({createOptions:n,existingMetadata:e.existingMetadata,sandboxModule:await t(),sessionKey:e.sessionKey,snapshotId:a?.snapshotId,tags:i})}catch(t){throw Error(`Failed to create sandbox session "${e.sessionKey}": ${errorMessage(t)}`,{cause:t})}return a===null&&o.created&&await ensureSandboxWorkingDirectory(o.sandbox,n),createHandle(o.sandbox,e.sessionKey)},async prewarm(e){let i;try{i=await ensureTemplate({bootstrap:e.bootstrap,createOptions:n,loadSandboxModule:t,seedFiles:e.seedFiles,templateKey:e.templateKey})}catch(t){throw Error(`Failed to prewarm Vercel sandbox template "${e.templateKey}": ${errorMessage(t)}. Run \`vercel login\` and \`vercel link\` so the SDK can authenticate, or set VERCEL_TOKEN.`,{cause:t})}return r.set(e.templateKey,i.template),{reused:i.reused}}}}async function readTemplate(t){let n=t.prewarmedTemplates.get(t.templateKey);if(n!==void 0)return n;let r=await getNamedSandbox(await t.loadSandboxModule(),t.templateKey);if(r===null||typeof r.currentSnapshotId!=`string`)throw new SandboxTemplateNotProvisionedError({backendName:`vercel`,templateKey:t.templateKey});return{sandboxName:r.name,snapshotId:r.currentSnapshotId,templateKey:t.templateKey}}async function readTemplateForCreate(t){try{return await readTemplate(t)}catch(n){throw SandboxTemplateNotProvisionedError.is(n)?n:Error(`Failed to read sandbox template "${t.templateKey}": ${errorMessage(n)}`,{cause:n})}}async function ensureTemplate(e){let t=await e.loadSandboxModule(),r=await getNamedSandbox(t,e.templateKey),i=resolveVercelSandboxTags(e.createOptions.tags,e.tags);
|
|
1
|
+
import{SandboxTemplateNotProvisionedError}from"#public/definitions/sandbox-backend.js";import{WORKSPACE_ROOT}from"#runtime/workspace/types.js";import{buildSandboxSession}from"#execution/sandbox/session.js";import{streamToBuffer}from"#execution/sandbox/stream-utils.js";function createVercelSandboxBackend(e={}){let t=e.loadSandboxModule??(async()=>await import(`#compiled/@vercel/sandbox/index.js`)),n={timeout:DEFAULT_SANDBOX_TIMEOUT_MS,...e.createOptions},r=new Map;return{name:`vercel`,async create(e){let i=resolveVercelSandboxTags(n.tags,e.tags),a=e.templateKey===null?null:await readTemplateForCreate({loadSandboxModule:t,prewarmedTemplates:r,templateKey:e.templateKey}),o;try{o=await ensureSession({createOptions:n,existingMetadata:e.existingMetadata,sandboxModule:await t(),sessionKey:e.sessionKey,snapshotId:a?.snapshotId,tags:i})}catch(t){throw Error(`Failed to create sandbox session "${e.sessionKey}": ${errorMessage(t)}`,{cause:t})}return a===null&&o.created&&await ensureSandboxWorkingDirectory(o.sandbox,n),createHandle(o.sandbox,e.sessionKey)},async prewarm(e){let i;try{i=await ensureTemplate({bootstrap:e.bootstrap,createOptions:n,loadSandboxModule:t,seedFiles:e.seedFiles,templateKey:e.templateKey})}catch(t){throw Error(`Failed to prewarm Vercel sandbox template "${e.templateKey}": ${errorMessage(t)}. Run \`vercel login\` and \`vercel link\` so the SDK can authenticate, or set VERCEL_TOKEN.`,{cause:t})}return r.set(e.templateKey,i.template),{reused:i.reused}}}}async function readTemplate(t){let n=t.prewarmedTemplates.get(t.templateKey);if(n!==void 0)return n;let r=await getNamedSandbox(await t.loadSandboxModule(),t.templateKey);if(r===null||typeof r.currentSnapshotId!=`string`)throw new SandboxTemplateNotProvisionedError({backendName:`vercel`,templateKey:t.templateKey});return{sandboxName:r.name,snapshotId:r.currentSnapshotId,templateKey:t.templateKey}}async function readTemplateForCreate(t){try{return await readTemplate(t)}catch(n){throw SandboxTemplateNotProvisionedError.is(n)?n:Error(`Failed to read sandbox template "${t.templateKey}": ${errorMessage(n)}`,{cause:n})}}async function ensureTemplate(e){let t=await e.loadSandboxModule(),r=await getNamedSandbox(t,e.templateKey),i=resolveVercelSandboxTags(e.createOptions.tags,e.tags);r===null?r=await t.Sandbox.create({...e.createOptions,name:e.templateKey,persistent:!1,tags:i}):await ensureVercelSandboxTags(r,i);let a=extractAuthorSnapshotId(e.createOptions);if(typeof r.currentSnapshotId==`string`&&r.currentSnapshotId.length>0&&r.currentSnapshotId!==a)return{reused:!0,template:{sandboxName:r.name,snapshotId:r.currentSnapshotId,templateKey:e.templateKey}};await ensureSandboxWorkingDirectory(r,e.createOptions);let o=buildSandboxSession(createVercelInternalSandboxSession(r,e.templateKey),createVercelNetworkPolicySetter(r));e.bootstrap!==void 0&&await e.bootstrap({use:async e=>(e!==void 0&&await r.update(e),o)});for(let t of e.seedFiles)typeof t.content==`string`?await o.writeTextFile({content:t.content,path:t.path}):await o.writeBinaryFile({content:t.content,path:t.path});let s=await r.snapshot();return{reused:!1,template:{sandboxName:r.name,snapshotId:s.snapshotId,templateKey:e.templateKey}}}async function ensureSession(e){let t=getVercelSandboxName(e.existingMetadata)??e.sessionKey,n=await getNamedSandbox(e.sandboxModule,t);if(n!==null)return await ensureVercelSandboxTags(n,e.tags),{created:!1,sandbox:n};let r=createSessionCreateParams(e,t);return e.tags!==void 0&&(r.tags=e.tags),{created:!0,sandbox:await e.sandboxModule.Sandbox.create(r)}}function createSessionCreateParams(e,t){if(e.snapshotId===void 0)return{...e.createOptions,name:t,persistent:!0};let{runtime:n,source:r,...i}=e.createOptions;return{...i,name:t,persistent:!0,source:{snapshotId:e.snapshotId,type:`snapshot`}}}function createHandle(e,t){return{session:buildSandboxSession(createVercelInternalSandboxSession(e,t),createVercelNetworkPolicySetter(e)),useSessionFn:async r=>(r!==void 0&&await e.update(r),buildSandboxSession(createVercelInternalSandboxSession(e,t),createVercelNetworkPolicySetter(e))),async captureState(){return{backendName:`vercel`,metadata:{sandboxName:e.name},sessionKey:t}},async dispose(){}}}function createVercelNetworkPolicySetter(e){return async t=>{await e.update({networkPolicy:t})}}function createVercelInternalSandboxSession(e,n){return{id:n,resolvePath:resolveVercelSandboxPath,async spawn(n){return adaptVercelCommandToSandboxProcess(await e.runCommand({args:[`-lc`,n.command],cmd:`bash`,cwd:n.workingDirectory??WORKSPACE_ROOT,detached:!0,env:n.env,signal:n.abortSignal}))},async readFile(t){return await e.readFile({path:t.path})??null},async writeFile(t){let n=await streamToBuffer(t.content);await e.writeFiles([{content:n,path:t.path}])},async removePath(t){await e.fs.rm(t.path,{force:t.force,recursive:t.recursive,signal:t.abortSignal})}}}function adaptVercelCommandToSandboxProcess(e){let t=new TextEncoder,n,r,i=!1,a,o=new ReadableStream({start(e){n=e}}),s=new ReadableStream({start(e){r=e}});return(async()=>{try{for await(let i of e.logs()){let e=t.encode(i.data);i.stream===`stdout`?n?.enqueue(e):r?.enqueue(e)}}catch(e){a=e,n?.error(e),r?.error(e)}finally{i=!0,a===void 0&&(n?.close(),r?.close())}})(),{stdout:o,stderr:s,async wait(){let t=await e.wait();for(;!i;)await new Promise(e=>setTimeout(e,0));if(a!==void 0)throw a;return{exitCode:t.exitCode}},async kill(){await e.kill()}}}function resolveVercelSandboxPath(e){return e.startsWith(`/`)?e:`${WORKSPACE_ROOT}/${e}`}async function ensureSandboxWorkingDirectory(e,n){await runSandboxBootstrapStep(e,{failureMessage:`Failed to initialize Vercel sandbox workspace.`,script:`mkdir -p ${WORKSPACE_ROOT} && chown ${SANDBOX_USER}:${SANDBOX_USER} ${WORKSPACE_ROOT}`}),n.networkPolicy!==`deny-all`&&await runSandboxBootstrapStep(e,{failureMessage:`Failed to install ripgrep in Vercel sandbox.`,script:`command -v rg >/dev/null 2>&1 || { dnf install -y spal-release && dnf install -y ripgrep; }`})}async function runSandboxBootstrapStep(e,t){let n=await e.runCommand({args:[`-lc`,t.script],cmd:`bash`,sudo:!0});if(n.exitCode!==0){let e=await n.stderr();throw Error(`${t.failureMessage} ${e}`.trim())}}const SANDBOX_USER=`vercel-sandbox`;async function getNamedSandbox(e,t){try{return await e.Sandbox.get({name:t})}catch(e){if(isSandboxMissingError(e))return null;throw Error(`Failed to look up Vercel sandbox "${t}": ${errorMessage(e)}`,{cause:e})}}function isSandboxMissingError(e){return e instanceof Error?(e.response?.status??e.cause?.response?.status)===404:!1}function extractAuthorSnapshotId(e){let t=e.source;if(t?.type===`snapshot`&&typeof t.snapshotId==`string`)return t.snapshotId}function getVercelSandboxName(e){let t=e?.sandboxName;return typeof t==`string`?t:void 0}function resolveVercelSandboxTags(e,t){let n={};if(e!==void 0)for(let[t,r]of Object.entries(e))n[t]=r;if(t!==void 0)for(let[e,r]of Object.entries(t))n[e]=r;let r=Object.keys(n).length;if(r!==0){if(r>VERCEL_SANDBOX_TAG_LIMIT)throw Error(`Vercel Sandbox supports at most ${VERCEL_SANDBOX_TAG_LIMIT} tags. Eve reserves "agent", "channel", and "sessionId"; remove or consolidate custom tags passed to vercelBackend().`);return n}}async function ensureVercelSandboxTags(e,t){t===void 0||areVercelSandboxTagsEqual(e.tags,t)||await e.update({tags:t})}function areVercelSandboxTagsEqual(e,t){let n=e??{},r=Object.entries(n),i=Object.entries(t);return r.length===i.length?i.every(([e,t])=>n[e]===t):!1}function errorMessage(e){return e instanceof Error?e.message:String(e)}const DEFAULT_SANDBOX_TIMEOUT_MS=1800*1e3,VERCEL_SANDBOX_TAG_LIMIT=5;export{createVercelSandboxBackend};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{createRequire}from"node:module";import{basename,dirname,join}from"node:path";import{existsSync,readFileSync,realpathSync}from"node:fs";import{EVE_PACKAGE_NAME}from"#internal/package-name.js";import{fileURLToPath}from"node:url";let cachedPackageInfo;const WORKFLOW_MODULE_ALIASES={"workflow/api":`src/compiled/@workflow/core/runtime.js`,"workflow/errors":`src/compiled/@workflow/errors/index.js`,"workflow/internal/private":`src/compiled/@workflow/core/private.js`,"workflow/runtime":`src/compiled/@workflow/core/runtime.js`};function resolveFallbackPackageVersion(){return`0.
|
|
1
|
+
import{createRequire}from"node:module";import{basename,dirname,join}from"node:path";import{existsSync,readFileSync,realpathSync}from"node:fs";import{EVE_PACKAGE_NAME}from"#internal/package-name.js";import{fileURLToPath}from"node:url";let cachedPackageInfo;const BUNDLED_FALLBACK_PACKAGE_VERSION=`0.7.3`,WORKFLOW_MODULE_ALIASES={"workflow/api":`src/compiled/@workflow/core/runtime.js`,"workflow/errors":`src/compiled/@workflow/errors/index.js`,"workflow/internal/private":`src/compiled/@workflow/core/private.js`,"workflow/runtime":`src/compiled/@workflow/core/runtime.js`};function resolveFallbackPackageVersion(){return BUNDLED_FALLBACK_PACKAGE_VERSION.startsWith(`__`)?`0.0.0`:BUNDLED_FALLBACK_PACKAGE_VERSION}const FALLBACK_PACKAGE_INFO={name:EVE_PACKAGE_NAME,version:resolveFallbackPackageVersion()};function resolveCurrentModulePath(){return typeof __filename==`string`?__filename:resolveCurrentModulePathFromStack()}function resolveCurrentModulePathFromStack(){let e=Error.prepareStackTrace;try{Error.prepareStackTrace=(e,t)=>t;let e=Error().stack?.[0]?.getFileName();if(typeof e!=`string`||e.length===0)throw Error(`Failed to resolve the current module path from the stack trace.`);return e.startsWith(`file:`)?fileURLToPath(e):e}finally{Error.prepareStackTrace=e}}const require=createRequire(resolveCurrentModulePath());function isBuildOutputPackageRoot(e){return basename(e)===`dist`&&existsSync(join(dirname(e),`package.json`))}function resolvePackageBuildRoot(){let e=dirname(realpathSync(resolveCurrentModulePath()));for(;;){if(isBuildOutputPackageRoot(e))return e;let t=dirname(e);if(t===e)return null;e=t}}function findNearestPackageRoot(e){let t=e;for(;;){if(existsSync(join(t,`package.json`))&&!isBuildOutputPackageRoot(t))return t;let r=dirname(t);if(r===t)throw Error(`Failed to resolve package root from "${e}".`);t=r}}function resolvePackageRoot(){return findNearestPackageRoot(dirname(realpathSync(resolveCurrentModulePath())))}function tryResolvePackageRoot(){try{return resolvePackageRoot()}catch{return}}function rewriteSourceFilePathForBuild(e){return e.replace(/\.[cm]?tsx?$/,`.js`)}function resolvePackageSourceFilePath(e){let t=resolvePackageBuildRoot();return t===null?join(resolvePackageRoot(),e):join(t,rewriteSourceFilePathForBuild(e))}function resolvePackageSourceDirectoryPath(e){let t=resolvePackageBuildRoot();return join(t===null?resolvePackageRoot():t,e)}function resolvePackageDependencyPath(e){return require.resolve(e)}function resolvePackageCompiledFilePath(e){let t=resolvePackageBuildRoot();return t===null?join(resolvePackageRoot(),`.generated`,`compiled`,e.replace(/^src\/compiled\//,``)):join(t,e)}function normalizeInstalledPackageInfo(e){let t=e;if(!(typeof t.name!=`string`||typeof t.version!=`string`))return{name:t.name,version:t.version}}function tryReadInstalledPackageInfo(e,t){let n=normalizeInstalledPackageInfo(JSON.parse(readFileSync(e,`utf8`)));if(n?.name===t)return n}function resolveInstalledPackageInfo(){if(cachedPackageInfo)return cachedPackageInfo;let e=tryResolvePackageRoot(),t=e===void 0?void 0:tryReadInstalledPackageInfo(join(e,`package.json`),EVE_PACKAGE_NAME);if(t)return cachedPackageInfo=t,cachedPackageInfo;try{let e=tryReadInstalledPackageInfo(require.resolve(`${EVE_PACKAGE_NAME}/package.json`),EVE_PACKAGE_NAME);if(e)return cachedPackageInfo=e,cachedPackageInfo}catch{}return cachedPackageInfo={...FALLBACK_PACKAGE_INFO},cachedPackageInfo}function resolveWorkflowModulePath(e){if(e===`workflow`)return resolvePackageSourceFilePath(`src/internal/workflow/index.ts`);if(e===`workflow/internal/builtins`)return resolvePackageSourceFilePath(`src/internal/workflow/builtins.ts`);let t=WORKFLOW_MODULE_ALIASES[e];return t===void 0?require.resolve(e):resolvePackageCompiledFilePath(t)}export{resolveInstalledPackageInfo,resolvePackageDependencyPath,resolvePackageRoot,resolvePackageSourceDirectoryPath,resolvePackageSourceFilePath,resolveWorkflowModulePath};
|
package/dist/src/internal/nitro/routes/agent-info/build-agent-info-response-from-manifest.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
import{
|
|
1
|
+
import{CODE_MODE_TOOL_NAME,WORKFLOW_TOOL_NAME}from"#shared/code-mode.js";import{LOAD_SKILL_TOOL_NAME}from"#runtime/skills/fragment-context.js";import{getAllFrameworkChannelNames,getFrameworkChannelDefinitions}from"#runtime/framework-channels/index.js";import{getAllFrameworkToolNames,getFrameworkToolDefinitions}from"#runtime/framework-tools/index.js";import{createConnectionSearchResolver}from"#runtime/framework-tools/connection-search-dynamic.js";import{renderChannel,renderDynamicResolver,renderSchedule,renderSubagent,renderTool,toSource}from"#internal/nitro/routes/agent-info/build-agent-info-response.js";function buildAgentInfoResponseFromManifest(i,a){let o=i.manifest,s=o.channels.filter(e=>e.kind===`channel`),c=o.channels.filter(e=>e.kind===`disabled`).map(e=>e.name),l=new Set(o.tools.map(e=>e.name)),u=new Set(o.disabledFrameworkTools),d=getAllFrameworkToolNames(),f=getAllFrameworkChannelNames(),p=getFrameworkToolDefinitions({hasConnections:o.connections.length>0}),m=getFrameworkChannelDefinitions(),h=p.filter(e=>!l.has(e.name)&&!u.has(e.name)),g=o.tools.map(e=>({...toSource(e),description:e.description,hasAuth:!1,hasCompactionHook:!1,hasExecute:!0,hasModelOutputProjection:!1,hasOutputSchema:e.outputSchema!==void 0&&e.outputSchema!==null,inputSchema:e.inputSchema,name:e.name,origin:`authored`,outputSchema:e.outputSchema??null,replacesFrameworkTool:d.has(e.name),requiresApproval:!1})),_=new Set(s.map(e=>e.name)),v=new Set(c),y=m.filter(e=>!_.has(e.name)&&!v.has(e.name)),b=s.map(e=>({...toSource(e),adapterKind:e.adapterKind,method:e.method,name:e.name,origin:`authored`,urlPath:e.urlPath}));return{agent:{agentRoot:o.agentRoot,appRoot:o.appRoot,configSource:o.config.source?toSource(o.config.source):void 0,description:o.config.description,model:{contextWindowTokens:o.config.model.contextWindowTokens,id:o.config.model.id,providerOptions:o.config.model.providerOptions,source:o.config.model.source?toSource(o.config.model.source):void 0},name:o.config.name,outputSchema:o.config.outputSchema},capabilities:{devRoutes:a.mode===`development`},channels:{authored:b,available:[...y.map(e=>renderChannel(e,{origin:`framework`})),...b],disabledFramework:c,framework:m.filter(e=>f.has(e.name)).map(e=>{let t=_.has(e.name),n=v.has(e.name),r=n?`disabled`:t?`replaced`:`active`;return{...renderChannel(e,{origin:`framework`}),disabledByAuthor:n,replacedByAuthoredChannel:t,status:r}})},connections:o.connections.map(e=>({...toSource(e),connectionName:e.connectionName,description:e.description,hasApproval:!1,hasAuthorization:e.vercelConnect!==void 0,hasHeaders:!1,protocol:e.protocol,url:e.url})),diagnostics:{discoveryErrors:o.diagnosticsSummary.errors,discoveryWarnings:o.diagnosticsSummary.warnings},hooks:o.hooks.map(e=>({...toSource(e),eventNames:[],slug:e.slug})),instructions:{dynamic:o.dynamicInstructions.map(e=>renderDynamicResolver(e,{origin:`authored`})),static:o.instructions===void 0?null:{...toSource(o.instructions),markdown:o.instructions.markdown,name:o.instructions.name}},kind:`eve-agent-info`,mode:a.mode,sandbox:o.sandbox===null?null:{...toSource(o.sandbox),description:o.sandbox.description,hasBootstrap:!1,hasOnSession:!1,revalidationKey:o.sandbox.revalidationKey,sourceHash:o.sandbox.sourceHash},schedules:i.schedules.map(renderSchedule),skills:{static:o.skills.map(e=>({...toSource(e),description:e.description,license:e.license,markdown:e.markdown,metadata:e.metadata,name:e.name})),dynamic:o.dynamicSkills.map(e=>renderDynamicResolver(e,{origin:`authored`}))},subagents:{local:o.subagents.map(renderSubagent),total:o.subagents.length},tools:{available:[...h.map(e=>renderTool(e,{origin:`framework`,replacesFrameworkTool:!1})),...g],authored:g,disabledFramework:[...o.disabledFrameworkTools],dynamic:[...o.connections.length>0?[renderDynamicResolver(createConnectionSearchResolver(),{origin:`framework`})]:[],...o.dynamicTools.map(e=>renderDynamicResolver(e,{origin:`authored`}))],framework:p.map(e=>{let t=l.has(e.name),n=u.has(e.name),r=n?`disabled`:t?`replaced`:`active`;return{...renderTool(e,{origin:`framework`,replacesFrameworkTool:!1}),disabledByAuthor:n,replacedByAuthoredTool:t,status:r}}),reserved:[CODE_MODE_TOOL_NAME,WORKFLOW_TOOL_NAME,LOAD_SKILL_TOOL_NAME]},version:1,workflow:{enabled:o.workflowEnabled,toolName:WORKFLOW_TOOL_NAME},workspace:{resourceRoot:o.workspaceResourceRoot,rootEntries:[...o.workspaceResourceRoot.rootEntries]}}}export{buildAgentInfoResponseFromManifest};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{
|
|
1
|
+
import{CODE_MODE_TOOL_NAME,WORKFLOW_TOOL_NAME}from"#shared/code-mode.js";import{LOAD_SKILL_TOOL_NAME}from"#runtime/skills/fragment-context.js";import{getAllFrameworkChannelNames,getFrameworkChannelDefinitions}from"#runtime/framework-channels/index.js";import{getAllFrameworkToolNames,getFrameworkToolDefinitions}from"#runtime/framework-tools/index.js";import{createConnectionSearchResolver}from"#runtime/framework-tools/connection-search-dynamic.js";function buildAgentInfoResponse(e,n){let r=e.agent,i=buildToolInfo(r);return{agent:{agentRoot:r.metadata.agentRoot,appRoot:r.metadata.appRoot,configSource:r.config.source?toSource(r.config.source):void 0,description:r.config.description,model:{contextWindowTokens:r.config.model.contextWindowTokens,id:r.config.model.id,providerOptions:r.config.model.providerOptions,source:r.config.model.source?toSource(r.config.model.source):void 0},name:r.config.name,outputSchema:r.config.outputSchema},capabilities:{devRoutes:n.mode===`development`},channels:buildChannelInfo(r),connections:r.connections.map(e=>({...toSource(e),connectionName:e.connectionName,description:e.description,hasApproval:e.approval!==void 0,hasAuthorization:e.authorization!==void 0,hasHeaders:e.headers!==void 0,protocol:e.protocol,toolFilter:e.tools,url:e.url})),diagnostics:{discoveryErrors:r.metadata.diagnosticsSummary.errors,discoveryWarnings:r.metadata.diagnosticsSummary.warnings},hooks:r.hooks.map(e=>({...toSource(e),eventNames:Object.keys(e.events).sort(),slug:e.slug})),instructions:{dynamic:r.dynamicInstructionsResolvers.map(e=>renderDynamicResolver(e,{origin:`authored`})),static:r.instructions?{...toSource(r.instructions),markdown:r.instructions.markdown,name:r.instructions.name}:null},kind:`eve-agent-info`,mode:n.mode,sandbox:renderSandbox(r.sandbox),schedules:e.schedules.map(renderSchedule),skills:{static:r.skills.map(renderSkill),dynamic:r.dynamicSkillResolvers.map(e=>renderDynamicResolver(e,{origin:`authored`}))},subagents:{local:e.manifest.subagents.map(renderSubagent),total:e.manifest.subagents.length},tools:i,version:1,workflow:{enabled:r.workflowEnabled,toolName:WORKFLOW_TOOL_NAME},workspace:{resourceRoot:r.workspaceResourceRoot,rootEntries:[...r.workspaceSpec.rootEntries]}}}function buildChannelInfo(e){let t=new Set(e.channels.map(e=>e.name)),n=new Set(e.disabledFrameworkChannels),a=getAllFrameworkChannelNames(),o=getFrameworkChannelDefinitions(),s=o.filter(e=>!t.has(e.name)&&!n.has(e.name)),c=e.channels.map(e=>renderChannel(e,{origin:`authored`})),l=o.map(e=>{let r=t.has(e.name),i=n.has(e.name),a=i?`disabled`:r?`replaced`:`active`;return{...renderChannel(e,{origin:`framework`}),disabledByAuthor:i,replacedByAuthoredChannel:r,status:a}});return{authored:c,available:[...s.map(e=>renderChannel(e,{origin:`framework`})),...c],disabledFramework:[...e.disabledFrameworkChannels],framework:l.filter(e=>a.has(e.name))}}function buildToolInfo(r){let i=new Set(r.tools.map(e=>e.name)),c=new Set(r.disabledFrameworkTools),l=getAllFrameworkToolNames(),u=getFrameworkToolDefinitions({hasConnections:r.connections.length>0}),d=r.connections.length>0?[createConnectionSearchResolver()]:[],f=u.filter(e=>!i.has(e.name)&&!c.has(e.name)),p=r.tools.map(e=>renderTool(e,{origin:`authored`,replacesFrameworkTool:l.has(e.name)})),m=u.map(e=>{let t=i.has(e.name),n=c.has(e.name),r=n?`disabled`:t?`replaced`:`active`;return{...renderTool(e,{origin:`framework`,replacesFrameworkTool:!1}),disabledByAuthor:n,replacedByAuthoredTool:t,status:r}});return{available:[...f.map(e=>renderTool(e,{origin:`framework`,replacesFrameworkTool:!1})),...p],authored:p,disabledFramework:[...r.disabledFrameworkTools],dynamic:[...d.map(e=>renderDynamicResolver(e,{origin:`framework`})),...r.dynamicToolResolvers.map(e=>renderDynamicResolver(e,{origin:`authored`}))],framework:m,reserved:[CODE_MODE_TOOL_NAME,WORKFLOW_TOOL_NAME,LOAD_SKILL_TOOL_NAME]}}function renderChannel(e,t){return{...toSource(e),adapterKind:e.adapter?.kind,method:e.method,name:e.name,origin:t.origin,urlPath:e.urlPath}}function renderTool(e,t){return{...toSource(e),description:e.description,hasAuth:e.auth!==void 0,hasExecute:e.execute!==void 0,hasModelOutputProjection:e.toModelOutput!==void 0,hasOutputSchema:e.outputSchema!==void 0&&e.outputSchema!==null,inputSchema:e.inputSchema,name:e.name,origin:t.origin,outputSchema:e.outputSchema,replacesFrameworkTool:t.replacesFrameworkTool,requiresApproval:e.needsApproval!==void 0}}function renderSkill(e){return{...toSource(e),description:e.description,license:e.license,markdown:e.markdown,metadata:e.metadata,name:e.name}}function renderSchedule(e){return{...toSource(e),cron:e.cron,hasRun:e.hasRun,markdown:e.markdown,name:e.name}}function renderSandbox(e){return e===null?null:{...toSource(e),backendKind:resolveBackendKind(e.backend),description:e.description,hasBootstrap:e.bootstrap!==void 0,hasOnSession:e.onSession!==void 0,revalidationKey:e.revalidationKey,sourceHash:e.sourceHash}}function renderSubagent(e){return{...toSource(e),description:e.description,entryPath:e.entryPath,name:e.name,nodeId:e.nodeId,rootPath:e.rootPath,summary:{channels:e.agent.channels.length,connections:e.agent.connections.length,hooks:e.agent.hooks.length,instructions:e.agent.instructions!==void 0,schedules:e.agent.schedules.length,skills:e.agent.skills.length,tools:e.agent.tools.length}}}function renderDynamicResolver(e,t){return{...toSource(e),eventNames:[...e.eventNames],origin:t.origin,slug:e.slug}}function toSource(e){return{exportName:e.exportName,logicalPath:e.logicalPath,sourceId:e.sourceId,sourceKind:e.sourceKind}}function resolveBackendKind(e){if(typeof e!=`object`||!e)return;let t=e.kind;return typeof t==`string`?t:void 0}export{buildAgentInfoResponse,renderChannel,renderDynamicResolver,renderSchedule,renderSubagent,renderTool,toSource};
|
|
@@ -1,13 +1,10 @@
|
|
|
1
1
|
import type { LanguageModel } from "ai";
|
|
2
2
|
import { type RuntimeModelReference } from "#runtime/agent/bootstrap.js";
|
|
3
|
-
/**
|
|
4
|
-
* Environment variable that forces authored runtime models onto the dedicated
|
|
5
|
-
* deterministic mock adapter.
|
|
6
|
-
*/
|
|
7
|
-
export declare const EVE_MOCK_AUTHORED_MODELS_ENV = "EVE_MOCK_AUTHORED_MODELS";
|
|
8
3
|
/**
|
|
9
4
|
* Returns true when authored runtime models should resolve through the
|
|
10
|
-
* dedicated deterministic mock adapter.
|
|
5
|
+
* dedicated deterministic mock adapter. The adapter is internal to the test
|
|
6
|
+
* tiers: it activates only under `NODE_ENV=test`, keeping the unit,
|
|
7
|
+
* integration, and scenario suites deterministic and credential-free.
|
|
11
8
|
*/
|
|
12
9
|
export declare function shouldMockAuthoredRuntimeModels(): boolean;
|
|
13
10
|
/**
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import{z}from"#compiled/zod/index.js";import{CODE_MODE_TOOL_NAME}from"#shared/code-mode.js";import{LOAD_SKILL_TOOL_NAME}from"#runtime/skills/fragment-context.js";import{FINAL_OUTPUT_TOOL_NAME}from"#runtime/framework-tools/final-output.js";import{MockLanguageModelV3}from"ai/test";import{BOOTSTRAP_RUNTIME_MODEL_ID,BOOTSTRAP_RUNTIME_SYSTEM_PROMPT}from"#runtime/agent/bootstrap.js";import{createBootstrapGenerateResult,createBootstrapStreamResult,estimateTokenCount,getLastUserPromptText,getPromptContentText,getPromptText}from"#runtime/agent/bootstrap-model-utils.js";import{createMockAuthoredToolInput,formatToolOutput,resolveMockFixtureToken,resolveWeatherCity}from"#runtime/agent/mock-model-fixtures.js";import{findRelevantSkill,getActivatedSkillIds,getAvailableSkills}from"#runtime/agent/mock-model-skill-selection.js";import{createJsonSchemaSample}from"#runtime/agent/mock-structured-output.js";const authoredRuntimeModelMocks=new Map,bootstrapWeatherPayloadSchema=z.object({city:z.string(),condition:z.string(),summary:z.string(),temperatureF:z.number().finite()}).strict()
|
|
1
|
+
import{z}from"#compiled/zod/index.js";import{CODE_MODE_TOOL_NAME}from"#shared/code-mode.js";import{LOAD_SKILL_TOOL_NAME}from"#runtime/skills/fragment-context.js";import{FINAL_OUTPUT_TOOL_NAME}from"#runtime/framework-tools/final-output.js";import{MockLanguageModelV3}from"ai/test";import{BOOTSTRAP_RUNTIME_MODEL_ID,BOOTSTRAP_RUNTIME_SYSTEM_PROMPT}from"#runtime/agent/bootstrap.js";import{createBootstrapGenerateResult,createBootstrapStreamResult,estimateTokenCount,getLastUserPromptText,getPromptContentText,getPromptText}from"#runtime/agent/bootstrap-model-utils.js";import{createMockAuthoredToolInput,formatToolOutput,resolveMockFixtureToken,resolveWeatherCity}from"#runtime/agent/mock-model-fixtures.js";import{findRelevantSkill,getActivatedSkillIds,getAvailableSkills}from"#runtime/agent/mock-model-skill-selection.js";import{createJsonSchemaSample}from"#runtime/agent/mock-structured-output.js";const authoredRuntimeModelMocks=new Map,bootstrapWeatherPayloadSchema=z.object({city:z.string(),condition:z.string(),summary:z.string(),temperatureF:z.number().finite()}).strict();function shouldMockAuthoredRuntimeModels(){return process.env.NODE_ENV===`test`}function createMockAuthoredRuntimeModel(e){let t=authoredRuntimeModelMocks.get(e.id);if(t!==void 0)return t;let n=new MockLanguageModelV3({modelId:e.id,provider:`eve-runtime-mock`,doGenerate:async t=>createMockModelResult(t,e.id),doStream:async t=>createBootstrapStreamResult(createMockModelResult(t,e.id))});return authoredRuntimeModelMocks.set(e.id,n),n}function createMockModelResult(e,t){let n=getLastAuthoredToolResult(e.prompt);if(n!==null){let r=createFollowUpToolCallResult({modelId:t,options:e,result:n});if(r!==null)return r}else{let n=createSkillLoadResult(e.prompt,t)??createAuthoredToolCallResult(e,t);if(n!==null)return n}let r=createFinalOutputResult(e,t);if(r!==null)return r;let i=n===null?createAssistantMessage(e.prompt):formatToolResultReply(n,e.prompt);return createBootstrapGenerateResult({inputTokens:estimateTokenCount(getPromptText(e.prompt)),modelId:t,outputTokens:estimateTokenCount(i),text:i})}function createFinalOutputResult(e,t){let n=getAvailableTools(e).find(e=>e.name===FINAL_OUTPUT_TOOL_NAME);if(n===void 0)return null;let i=createJsonSchemaSample(n.inputSchema);return createToolCallGenerateResult({input:i,inputTokens:estimateTokenCount(getPromptText(e.prompt)),modelId:t,outputTokens:estimateTokenCount(JSON.stringify(i)),toolCallId:createToolCallId(FINAL_OUTPUT_TOOL_NAME),toolName:FINAL_OUTPUT_TOOL_NAME})}function resolveMockAuthoredRuntimeModel(e){return!shouldMockAuthoredRuntimeModels()||e.id===BOOTSTRAP_RUNTIME_MODEL_ID?null:createMockAuthoredRuntimeModel(e)}function createSkillLoadResult(e,t){let r=getLastUserPromptText(e);if(r===null||getActivatedSkillIds(e).length>0)return null;let i=findRelevantSkill(getAvailableSkills(e),r);return i===null?null:createToolCallGenerateResult({input:{skill:i.name},inputTokens:estimateTokenCount(getPromptText(e)),modelId:t,outputTokens:estimateTokenCount(i.name),toolCallId:`call_load_skill`,toolName:LOAD_SKILL_TOOL_NAME})}function createAuthoredToolCallResult(e,n){let r=getLastUserPromptText(e.prompt);if(r===null)return null;let i=findRelevantTool(getAvailableTools(e),r);if(i===null)return null;let a=resolveWeatherCity(r),o=createMockAuthoredToolInput(i,r,a);if(i.name===CODE_MODE_TOOL_NAME){let t=findRelevantCodeModeHostTool(i.description,r);if(t===null)return null;let o=`return await tools${formatCodeModeToolAccess(t)}({ city: ${JSON.stringify(a)} });`;return createToolCallGenerateResult({input:{js:o},inputTokens:estimateTokenCount(getPromptText(e.prompt)),modelId:n,outputTokens:estimateTokenCount(o),toolCallId:createToolCallId(i.name),toolName:i.name})}return createToolCallGenerateResult({input:o,inputTokens:estimateTokenCount(getPromptText(e.prompt)),modelId:n,outputTokens:estimateTokenCount(Object.values(o).join(` `)),toolCallId:createToolCallId(i.name),toolName:i.name})}function createFollowUpToolCallResult(e){let t=findNextExplicitToolAfterResult({previousToolName:e.result.toolName,prompt:e.options.prompt,tools:getAvailableTools(e.options)});if(t===null)return null;let n=createFollowUpToolInput(e.result.output);return n===null?null:createToolCallGenerateResult({input:n,inputTokens:estimateTokenCount(getPromptText(e.options.prompt)),modelId:e.modelId,outputTokens:estimateTokenCount(Object.values(n).join(` `)),toolCallId:createToolCallId(t.name),toolName:t.name})}function createAssistantMessage(e){let t=getLastUserPromptText(e)??`Hello from Eve`,n=getSystemPromptLabels(e),r=resolveSystemProbe(e),i=resolveMockFixtureToken(e);return i===null?n.length>0?r===null?`Bootstrap reply [${n.join(`, `)}]: ${t}`:`Bootstrap reply [${n.join(`, `)}; probe=${r}]: ${t}`:r===null?`Bootstrap reply: ${t}`:`Bootstrap reply [probe=${r}]: ${t}`:i}function formatToolResultReply(e,t){if(e.isError)return`Local weather tool failed: ${formatToolOutput(e.output)}`;if(isWeatherPayload(e.output))return`Used local weather tool for ${e.output.city}: ${e.output.condition}, ${e.output.temperatureF}F. ${e.output.summary}`;let n=getLastUserPromptText(t)??`Hello from Eve`;return`Used ${e.toolName} for "${n}": ${formatToolOutput(e.output)}`}function createToolCallGenerateResult(e){return{content:[{input:JSON.stringify(e.input),toolCallId:e.toolCallId,toolName:e.toolName,type:`tool-call`}],finishReason:{raw:void 0,unified:`tool-calls`},response:{id:`bootstrap-response`,modelId:e.modelId,timestamp:new Date(`2026-03-16T00:00:00.000Z`)},usage:{inputTokens:{cacheRead:0,cacheWrite:0,noCache:e.inputTokens,total:e.inputTokens},outputTokens:{reasoning:0,text:e.outputTokens,total:e.outputTokens}},warnings:[]}}function getAvailableTools(e){return(e.tools??[]).flatMap(e=>e.type===`function`?[{description:e.description,inputSchema:`inputSchema`in e?e.inputSchema:void 0,name:e.name,outputSchema:`outputSchema`in e?e.outputSchema:void 0}]:[])}function getLastAuthoredToolResult(e){for(let t of[...e].reverse()){if(t.role===`user`)return null;if(!(t.role!==`tool`&&t.role!==`assistant`)){for(let e of[...t.content].reverse())if(!(typeof e==`string`||e.type!==`tool-result`)&&e.toolName!==LOAD_SKILL_TOOL_NAME)return{isError:e.output.type===`error-json`||e.output.type===`error-text`||e.output.type===`execution-denied`,output:e.output.type===`execution-denied`?{reason:e.output.reason??null,type:e.output.type}:e.output.value,toolCallId:e.toolCallId,toolName:e.toolName}}}return null}function findNextExplicitToolAfterResult(e){let t=getLastUserPromptText(e.prompt);if(t===null)return null;let n=normalizeText(t),r=n.indexOf(normalizeText(e.previousToolName));return r<0?null:e.tools.filter(t=>t.name!==e.previousToolName).flatMap(e=>{let t=n.indexOf(normalizeText(e.name),r+1);return t<0?[]:[{index:t,tool:e}]}).sort((e,t)=>e.index-t.index)[0]?.tool??null}function createFollowUpToolInput(e){return isRecord(e)&&typeof e.stepKey==`string`?{stepKey:e.stepKey}:null}function getSystemPromptLabels(e){let t=e.filter(e=>e.role===`system`);if(t.length===0)return[];let n=t.flatMap(e=>{let t=getPromptContentText(e.content);if(t.startsWith(`Available skills
|
|
2
2
|
`))return[];let n=t.split(`
|
|
3
3
|
`).map(e=>e.trim()).filter(e=>e.length>0),r=[];for(let e of n){if(e===BOOTSTRAP_RUNTIME_SYSTEM_PROMPT||e===`Available skills`)continue;let t=/^System \((.+)\)$/.exec(e);if(t?.[1]){r.push(t[1]);continue}let n=/^Skill \((.+)\)$/.exec(e);n?.[1]&&r.push(n[1])}if(r.length>0)return r;let i=n.find(e=>e!==BOOTSTRAP_RUNTIME_SYSTEM_PROMPT&&e!==`Available skills`);return i===void 0?[]:[i]});return[...new Set(n)]}function findRelevantTool(e,t){let r=normalizeText(t),i=e.find(e=>e.name!==`agent`&&e.name!==LOAD_SKILL_TOOL_NAME&&r.includes(normalizeText(e.name)));return i===void 0?/\b(forecast|temperature|weather|wind|rain|snow)\b/u.test(r)?e.find(e=>/\b(forecast|temperature|weather|wind|rain|snow)\b/u.test(normalizeText(`${e.name} ${e.description??``}`)))??null:null:i}function findRelevantCodeModeHostTool(e,t){return e===void 0?null:findRelevantTool(parseCodeModeHostTools(e),t)?.name??null}function parseCodeModeHostTools(e){let t=[],n;for(let r of e.split(`
|
|
4
4
|
`)){let e=/^\s*\/\*\*\s*(.*?)\s*\*\/\s*$/u.exec(r);if(e?.[1]!==void 0){n=e[1];continue}let i=/^\s*(?:([$A-Z_a-z][$\w]*)|(["'])(.*?)\2)\s*:\s*\(input:/u.exec(r),a=i?.[1]??i?.[3];a!==void 0&&(t.push({description:n,name:a}),n=void 0)}return t}function formatCodeModeToolAccess(e){return/^[$A-Z_a-z][$\w]*$/u.test(e)?`.${e}`:`[${JSON.stringify(e)}]`}function normalizeText(e){return e.toLowerCase().replace(/[^a-z0-9]+/gu,` `).trim()}function createToolCallId(e){return`call_${e.toLowerCase().replace(/[^a-z0-9]+/gu,`_`).replace(/^_+|_+$/gu,``)||`tool`}`}function resolveSystemProbe(e){let t=e.filter(e=>e.role===`system`).map(e=>getPromptContentText(e.content)).join(`
|
|
5
|
-
`);return/hmr-probe:\s*([^\n]+)/iu.exec(t)?.[1]?.trim()||null}function isWeatherPayload(e){return bootstrapWeatherPayloadSchema.safeParse(e).success}function isRecord(e){return typeof e==`object`&&!!e&&!Array.isArray(e)}export{
|
|
5
|
+
`);return/hmr-probe:\s*([^\n]+)/iu.exec(t)?.[1]?.trim()||null}function isWeatherPayload(e){return bootstrapWeatherPayloadSchema.safeParse(e).success}function isRecord(e){return typeof e==`object`&&!!e&&!Array.isArray(e)}export{createMockAuthoredRuntimeModel,resolveMockAuthoredRuntimeModel,shouldMockAuthoredRuntimeModels};
|