eve 0.6.0-beta.9 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (650) hide show
  1. package/CHANGELOG.md +281 -0
  2. package/README.md +9 -6
  3. package/dist/docs/public/README.md +17 -12
  4. package/dist/docs/public/agent-config.md +10 -10
  5. package/dist/docs/public/channels/custom.mdx +4 -4
  6. package/dist/docs/public/channels/discord.mdx +1 -1
  7. package/dist/docs/public/channels/eve.mdx +10 -10
  8. package/dist/docs/public/channels/github.mdx +1 -1
  9. package/dist/docs/public/channels/overview.mdx +21 -15
  10. package/dist/docs/public/channels/slack.mdx +16 -4
  11. package/dist/docs/public/channels/teams.mdx +1 -1
  12. package/dist/docs/public/channels/telegram.mdx +1 -1
  13. package/dist/docs/public/channels/twilio.mdx +1 -1
  14. package/dist/docs/public/{advanced → concepts}/context-control.md +3 -3
  15. package/dist/docs/public/{advanced → concepts}/default-harness.md +5 -5
  16. package/dist/docs/public/{advanced → concepts}/execution-model-and-durability.md +3 -1
  17. package/dist/docs/public/concepts/meta.json +10 -0
  18. package/dist/docs/public/{advanced → concepts}/security-model.md +3 -3
  19. package/dist/docs/public/{advanced → concepts}/sessions-runs-and-streaming.md +7 -7
  20. package/dist/docs/public/connections.mdx +6 -4
  21. package/dist/docs/public/evals/assertions.mdx +108 -0
  22. package/dist/docs/public/evals/cases.mdx +143 -0
  23. package/dist/docs/public/evals/judge.mdx +94 -0
  24. package/dist/docs/public/evals/meta.json +4 -0
  25. package/dist/docs/public/evals/overview.mdx +118 -0
  26. package/dist/docs/public/evals/reporters.mdx +62 -0
  27. package/dist/docs/public/evals/running.mdx +63 -0
  28. package/dist/docs/public/evals/targets.mdx +54 -0
  29. package/dist/docs/public/getting-started.mdx +38 -33
  30. package/dist/docs/public/{advanced → guides}/auth-and-route-protection.md +5 -3
  31. package/dist/docs/public/{client → guides/client}/continuations.mdx +2 -2
  32. package/dist/docs/public/{client → guides/client}/messages.mdx +1 -1
  33. package/dist/docs/public/{client → guides/client}/meta.json +1 -1
  34. package/dist/docs/public/{client → guides/client}/output-schema.mdx +2 -2
  35. package/dist/docs/public/{client → guides/client}/overview.mdx +5 -5
  36. package/dist/docs/public/{client → guides/client}/streaming.mdx +1 -1
  37. package/dist/docs/public/{advanced → guides}/deployment.md +9 -1
  38. package/dist/docs/public/guides/dev-tui.md +50 -0
  39. package/dist/docs/public/{advanced → guides}/dynamic-capabilities.md +1 -1
  40. package/dist/docs/public/{advanced → guides}/dynamic-workflows.md +1 -1
  41. package/dist/docs/public/{frontend → guides/frontend}/nextjs.mdx +16 -7
  42. package/dist/docs/public/{frontend → guides/frontend}/nuxt.mdx +7 -7
  43. package/dist/docs/public/{frontend → guides/frontend}/overview.mdx +6 -6
  44. package/dist/docs/public/{frontend → guides/frontend}/sveltekit.mdx +5 -5
  45. package/dist/docs/public/{frontend → guides/frontend}/use-eve-agent-svelte.mdx +2 -2
  46. package/dist/docs/public/{frontend → guides/frontend}/use-eve-agent-vue.mdx +2 -2
  47. package/dist/docs/public/{advanced → guides}/hooks.md +2 -2
  48. package/dist/docs/public/{advanced → guides}/instrumentation.md +3 -1
  49. package/dist/docs/public/{advanced → guides}/meta.json +8 -12
  50. package/dist/docs/public/{advanced → guides}/session-context.md +3 -3
  51. package/dist/docs/public/{advanced → guides}/state.md +1 -1
  52. package/dist/docs/public/instructions.mdx +2 -2
  53. package/dist/docs/public/introduction.md +5 -2
  54. package/dist/docs/public/meta.json +4 -3
  55. package/dist/docs/public/reference/cli.md +35 -19
  56. package/dist/docs/public/reference/meta.json +1 -1
  57. package/dist/docs/public/reference/project-layout.md +5 -1
  58. package/dist/docs/public/reference/typescript-api.md +27 -23
  59. package/dist/docs/public/sandbox.mdx +1 -1
  60. package/dist/docs/public/schedules.mdx +2 -2
  61. package/dist/docs/public/skills.mdx +3 -3
  62. package/dist/docs/public/subagents.mdx +3 -3
  63. package/dist/docs/public/tools.mdx +4 -8
  64. package/dist/docs/public/tutorial/connect-a-warehouse.mdx +3 -3
  65. package/dist/docs/public/tutorial/first-agent.mdx +6 -3
  66. package/dist/docs/public/tutorial/guard-the-spend.mdx +1 -1
  67. package/dist/docs/public/tutorial/how-it-runs.mdx +2 -2
  68. package/dist/docs/public/tutorial/meta.json +1 -1
  69. package/dist/docs/public/tutorial/query-sample-data.mdx +1 -1
  70. package/dist/docs/public/tutorial/remember-definitions.mdx +3 -3
  71. package/dist/docs/public/tutorial/run-analysis.mdx +1 -1
  72. package/dist/docs/public/tutorial/ship-it.mdx +4 -4
  73. package/dist/docs/public/tutorial/team-playbooks.mdx +3 -3
  74. package/dist/src/chunks/{use-eve-agent-DCZbkLG7.js → use-eve-agent-DErQj5hs.js} +125 -37
  75. package/dist/src/chunks/{use-eve-agent-DoheC4_o.js → use-eve-agent-DoR8C4i6.js} +125 -37
  76. package/dist/src/cli/banner.d.ts +7 -0
  77. package/dist/src/cli/banner.js +1 -0
  78. package/dist/src/cli/commands/channel-add-conflicts.d.ts +1 -1
  79. package/dist/src/cli/commands/channels.d.ts +9 -6
  80. package/dist/src/cli/commands/channels.js +1 -1
  81. package/dist/src/cli/commands/deploy.d.ts +21 -0
  82. package/dist/src/cli/commands/deploy.js +1 -0
  83. package/dist/src/cli/commands/init-git.d.ts +15 -0
  84. package/dist/src/cli/commands/init-git.js +1 -0
  85. package/dist/src/cli/commands/init.d.ts +29 -0
  86. package/dist/src/cli/commands/init.js +1 -0
  87. package/dist/src/cli/commands/link.d.ts +21 -0
  88. package/dist/src/cli/commands/link.js +1 -0
  89. package/dist/src/cli/commands/preconditions.d.ts +7 -0
  90. package/dist/src/cli/commands/preconditions.js +1 -0
  91. package/dist/src/cli/commands/register-project-commands.d.ts +12 -0
  92. package/dist/src/cli/commands/register-project-commands.js +1 -0
  93. package/dist/src/cli/dev/tui/agent-header.d.ts +15 -9
  94. package/dist/src/cli/dev/tui/agent-header.js +1 -1
  95. package/dist/src/cli/dev/tui/blocks.d.ts +1 -1
  96. package/dist/src/cli/dev/tui/blocks.js +3 -2
  97. package/dist/src/cli/dev/tui/command-typeahead.d.ts +47 -0
  98. package/dist/src/cli/dev/tui/command-typeahead.js +1 -0
  99. package/dist/src/cli/dev/tui/dev-rebuild-status.d.ts +21 -0
  100. package/dist/src/cli/dev/tui/dev-rebuild-status.js +1 -0
  101. package/dist/src/cli/dev/tui/errors.d.ts +18 -0
  102. package/dist/src/cli/dev/tui/errors.js +1 -1
  103. package/dist/src/cli/dev/tui/prompt-command-handler.d.ts +14 -0
  104. package/dist/src/cli/dev/tui/prompt-command-handler.js +1 -0
  105. package/dist/src/cli/dev/tui/prompt-commands.d.ts +54 -0
  106. package/dist/src/cli/dev/tui/prompt-commands.js +2 -0
  107. package/dist/src/cli/dev/tui/runner.d.ts +64 -7
  108. package/dist/src/cli/dev/tui/runner.js +1 -1
  109. package/dist/src/cli/dev/tui/setup-commands.d.ts +48 -0
  110. package/dist/src/cli/dev/tui/setup-commands.js +2 -0
  111. package/dist/src/cli/dev/tui/setup-flow.d.ts +35 -0
  112. package/dist/src/cli/dev/tui/setup-issues.d.ts +40 -0
  113. package/dist/src/cli/dev/tui/setup-issues.js +1 -0
  114. package/dist/src/cli/dev/tui/setup-panel.d.ts +103 -0
  115. package/dist/src/cli/dev/tui/setup-panel.js +1 -0
  116. package/dist/src/cli/dev/tui/status-line.d.ts +25 -0
  117. package/dist/src/cli/dev/tui/status-line.js +1 -0
  118. package/dist/src/cli/dev/tui/stream-format.d.ts +16 -1
  119. package/dist/src/cli/dev/tui/stream-format.js +1 -1
  120. package/dist/src/cli/dev/tui/terminal-renderer.d.ts +32 -3
  121. package/dist/src/cli/dev/tui/terminal-renderer.js +5 -2
  122. package/dist/src/cli/dev/tui/test/index.d.ts +3 -1
  123. package/dist/src/cli/dev/tui/test/index.js +1 -1
  124. package/dist/src/cli/dev/tui/test/mock-terminal.d.ts +1 -0
  125. package/dist/src/cli/dev/tui/test/mock-terminal.js +1 -1
  126. package/dist/src/cli/dev/tui/theme.d.ts +10 -0
  127. package/dist/src/cli/dev/tui/theme.js +1 -1
  128. package/dist/src/cli/dev/tui/tui-prompter.d.ts +20 -0
  129. package/dist/src/cli/dev/tui/tui-prompter.js +1 -0
  130. package/dist/src/cli/dev/tui/tui.d.ts +6 -8
  131. package/dist/src/cli/dev/tui/tui.js +1 -1
  132. package/dist/src/cli/dev/tui/types.d.ts +4 -3
  133. package/dist/src/cli/dev/tui/vercel-status.d.ts +47 -0
  134. package/dist/src/cli/dev/tui/vercel-status.js +1 -0
  135. package/dist/src/cli/run.d.ts +9 -18
  136. package/dist/src/cli/run.js +2 -2
  137. package/dist/src/client/client.d.ts +8 -0
  138. package/dist/src/client/client.js +1 -1
  139. package/dist/src/client/file-parts.d.ts +18 -0
  140. package/dist/src/client/file-parts.js +1 -0
  141. package/dist/src/client/index.d.ts +3 -2
  142. package/dist/src/client/index.js +1 -1
  143. package/dist/src/client/message-response.js +1 -1
  144. package/dist/src/client/open-stream.d.ts +6 -0
  145. package/dist/src/client/open-stream.js +1 -1
  146. package/dist/src/client/session-utils.d.ts +5 -0
  147. package/dist/src/client/session-utils.js +1 -1
  148. package/dist/src/client/session.js +1 -1
  149. package/dist/src/client/types.d.ts +9 -2
  150. package/dist/src/compiled/.vendor-stamp.json +8 -8
  151. package/dist/src/compiled/@ai-sdk/anthropic/index.d.ts +56 -31
  152. package/dist/src/compiled/@ai-sdk/anthropic/index.js +2 -2
  153. package/dist/src/compiled/@ai-sdk/google/index.js +1 -1
  154. package/dist/src/compiled/@ai-sdk/mcp/index.js +1 -1
  155. package/dist/src/compiled/@ai-sdk/openai/index.d.ts +16 -9
  156. package/dist/src/compiled/@ai-sdk/openai/index.js +2 -2
  157. package/dist/src/compiled/@ai-sdk/otel/index.js +2 -2
  158. package/dist/src/compiled/@vercel/sandbox/index.js +1 -1
  159. package/dist/src/compiled/@workflow/core/capabilities.d.ts +19 -1
  160. package/dist/src/compiled/@workflow/core/class-serialization.d.ts +32 -0
  161. package/dist/src/compiled/@workflow/core/create-hook.d.ts +37 -0
  162. package/dist/src/compiled/@workflow/core/global.d.ts +11 -1
  163. package/dist/src/compiled/@workflow/core/index.js +2 -2
  164. package/dist/src/compiled/@workflow/core/runtime/helpers.d.ts +4 -2
  165. package/dist/src/compiled/@workflow/core/runtime/start.d.ts +6 -0
  166. package/dist/src/compiled/@workflow/core/runtime/suspension-handler.d.ts +15 -2
  167. package/dist/src/compiled/@workflow/core/runtime/wait-continuation.d.ts +84 -0
  168. package/dist/src/compiled/@workflow/core/runtime/wait-until.d.ts +18 -0
  169. package/dist/src/compiled/@workflow/core/runtime.d.ts +3 -1
  170. package/dist/src/compiled/@workflow/core/runtime.js +28 -28
  171. package/dist/src/compiled/@workflow/core/serialization/types.d.ts +21 -0
  172. package/dist/src/compiled/@workflow/core/serialization.d.ts +113 -6
  173. package/dist/src/compiled/@workflow/core/symbols.d.ts +2 -0
  174. package/dist/src/compiled/@workflow/core/util.d.ts +0 -5
  175. package/dist/src/compiled/@workflow/core/version.d.ts +1 -1
  176. package/dist/src/compiled/@workflow/core/workflow/attribute-dispatcher.d.ts +6 -0
  177. package/dist/src/compiled/@workflow/core/workflow/set-attributes.d.ts +3 -4
  178. package/dist/src/compiled/@workflow/core/workflow.js +1 -1
  179. package/dist/src/compiled/@workflow/world/events.d.ts +48 -0
  180. package/dist/src/compiled/@workflow/world/index.d.ts +3 -3
  181. package/dist/src/compiled/@workflow/world/queue.d.ts +31 -2
  182. package/dist/src/compiled/@workflow/world/runs.d.ts +2 -0
  183. package/dist/src/compiled/@workflow/world/spec-version.d.ts +2 -1
  184. package/dist/src/compiled/_chunks/workflow/attribute-changes-DGVGRGfw.js +59 -0
  185. package/dist/src/compiled/_chunks/workflow/{dist-gEXVSMPU.js → dist-CkMRLaRV.js} +1 -1
  186. package/dist/src/compiled/_chunks/workflow/functions-DuPjIvMH.js +1 -0
  187. package/dist/src/compiled/_chunks/workflow/resume-hook-DMSadN9o.js +1 -0
  188. package/dist/src/compiled/_chunks/workflow/run-BRdn7zy_.js +1 -0
  189. package/dist/src/compiled/_chunks/workflow/sleep-CpXfoXLF.js +1 -0
  190. package/dist/src/compiled/just-bash/index.d.ts +4 -4
  191. package/dist/src/compiler/artifacts.js +1 -1
  192. package/dist/src/compiler/manifest.d.ts +8 -8
  193. package/dist/src/compiler/normalize-agent-config.js +1 -1
  194. package/dist/src/compiler/normalize-channel.d.ts +2 -1
  195. package/dist/src/compiler/normalize-channel.js +1 -1
  196. package/dist/src/compiler/normalize-connection.d.ts +2 -1
  197. package/dist/src/compiler/normalize-connection.js +1 -1
  198. package/dist/src/compiler/normalize-helpers.d.ts +5 -0
  199. package/dist/src/compiler/normalize-helpers.js +1 -1
  200. package/dist/src/compiler/normalize-instructions.d.ts +3 -2
  201. package/dist/src/compiler/normalize-instructions.js +1 -1
  202. package/dist/src/compiler/normalize-manifest.js +2 -2
  203. package/dist/src/compiler/normalize-sandbox.d.ts +2 -1
  204. package/dist/src/compiler/normalize-sandbox.js +1 -1
  205. package/dist/src/compiler/normalize-schedule.d.ts +2 -1
  206. package/dist/src/compiler/normalize-schedule.js +1 -1
  207. package/dist/src/compiler/normalize-skill.d.ts +2 -1
  208. package/dist/src/compiler/normalize-skill.js +1 -1
  209. package/dist/src/compiler/normalize-subagent.d.ts +4 -1
  210. package/dist/src/compiler/normalize-subagent.js +1 -1
  211. package/dist/src/compiler/normalize-tool.d.ts +2 -1
  212. package/dist/src/compiler/normalize-tool.js +1 -1
  213. package/dist/src/compiler/workspace-resources.js +1 -1
  214. package/dist/src/context/node.d.ts +1 -1
  215. package/dist/src/evals/assertions/collector.d.ts +43 -0
  216. package/dist/src/evals/assertions/collector.js +1 -0
  217. package/dist/src/evals/assertions/run.d.ts +72 -0
  218. package/dist/src/evals/assertions/run.js +2 -0
  219. package/dist/src/evals/autoevals-client.js +2 -0
  220. package/dist/src/evals/cli/eval-client.d.ts +22 -0
  221. package/dist/src/evals/cli/eval-client.js +1 -0
  222. package/dist/src/evals/cli/eval.d.ts +8 -5
  223. package/dist/src/evals/cli/eval.js +1 -1
  224. package/dist/src/evals/context.d.ts +19 -0
  225. package/dist/src/evals/context.js +1 -0
  226. package/dist/src/evals/define-eval-config.d.ts +16 -0
  227. package/dist/src/evals/define-eval-config.js +1 -0
  228. package/dist/src/evals/define-eval.d.ts +20 -0
  229. package/dist/src/evals/define-eval.js +1 -0
  230. package/dist/src/evals/expect/index.d.ts +25 -0
  231. package/dist/src/evals/expect/index.js +1 -0
  232. package/dist/src/evals/index.d.ts +6 -2
  233. package/dist/src/evals/index.js +1 -1
  234. package/dist/src/evals/judge.d.ts +20 -0
  235. package/dist/src/evals/judge.js +1 -0
  236. package/dist/src/evals/{checks/match.d.ts → match.d.ts} +17 -18
  237. package/dist/src/evals/match.js +1 -0
  238. package/dist/src/evals/reporters/index.d.ts +1 -0
  239. package/dist/src/evals/reporters/index.js +1 -1
  240. package/dist/src/evals/requirements.d.ts +3 -0
  241. package/dist/src/evals/requirements.js +1 -0
  242. package/dist/src/evals/runner/artifacts.d.ts +7 -6
  243. package/dist/src/evals/runner/artifacts.js +3 -3
  244. package/dist/src/evals/runner/discover.d.ts +31 -10
  245. package/dist/src/evals/runner/discover.js +1 -1
  246. package/dist/src/evals/runner/execute-eval.d.ts +25 -0
  247. package/dist/src/evals/runner/execute-eval.js +1 -0
  248. package/dist/src/evals/runner/execute-task.d.ts +31 -0
  249. package/dist/src/evals/runner/execute-task.js +1 -0
  250. package/dist/src/evals/runner/reporters/braintrust.d.ts +7 -5
  251. package/dist/src/evals/runner/reporters/braintrust.js +2 -2
  252. package/dist/src/evals/runner/reporters/console.d.ts +4 -4
  253. package/dist/src/evals/runner/reporters/console.js +1 -1
  254. package/dist/src/evals/runner/reporters/junit.d.ts +10 -0
  255. package/dist/src/evals/runner/reporters/junit.js +4 -0
  256. package/dist/src/evals/runner/reporters/types.d.ts +14 -8
  257. package/dist/src/evals/runner/run-evals.d.ts +38 -0
  258. package/dist/src/evals/runner/run-evals.js +1 -0
  259. package/dist/src/evals/runner/verdict.d.ts +10 -15
  260. package/dist/src/evals/runner/verdict.js +1 -1
  261. package/dist/src/evals/session.d.ts +52 -0
  262. package/dist/src/evals/session.js +1 -0
  263. package/dist/src/evals/target.d.ts +23 -0
  264. package/dist/src/evals/target.js +1 -0
  265. package/dist/src/evals/types.d.ts +294 -219
  266. package/dist/src/execution/compaction.d.ts +14 -0
  267. package/dist/src/execution/compaction.js +1 -0
  268. package/dist/src/execution/delegated-parent-notification.js +1 -1
  269. package/dist/src/execution/dispatch-runtime-actions-step.js +1 -1
  270. package/dist/src/execution/node-step.js +1 -1
  271. package/dist/src/execution/sandbox/bash-tool.d.ts +6 -6
  272. package/dist/src/execution/sandbox/bash-tool.js +1 -1
  273. package/dist/src/execution/sandbox/bindings/local.js +1 -1
  274. package/dist/src/execution/sandbox/bindings/vercel.d.ts +2 -6
  275. package/dist/src/execution/sandbox/bindings/vercel.js +1 -1
  276. package/dist/src/execution/sandbox/glob-tool.js +3 -3
  277. package/dist/src/execution/sandbox/grep-tool.js +3 -3
  278. package/dist/src/execution/sandbox/read-file-tool.js +1 -1
  279. package/dist/src/execution/subagent-adapter.js +1 -1
  280. package/dist/src/execution/tool-auth.js +1 -1
  281. package/dist/src/execution/turn-workflow.js +1 -1
  282. package/dist/src/execution/workflow-runtime.d.ts +2 -2
  283. package/dist/src/execution/workflow-runtime.js +1 -1
  284. package/dist/src/execution/workflow-steps.js +1 -1
  285. package/dist/src/harness/action-result-helpers.js +1 -1
  286. package/dist/src/harness/authorization.d.ts +26 -0
  287. package/dist/src/harness/authorization.js +1 -1
  288. package/dist/src/harness/code-mode-lifecycle.js +1 -1
  289. package/dist/src/harness/emission.d.ts +12 -5
  290. package/dist/src/harness/emission.js +1 -1
  291. package/dist/src/harness/model-call-error.d.ts +35 -6
  292. package/dist/src/harness/model-call-error.js +1 -1
  293. package/dist/src/harness/step-hooks.d.ts +10 -4
  294. package/dist/src/harness/step-hooks.js +1 -1
  295. package/dist/src/harness/tool-loop.js +1 -1
  296. package/dist/src/harness/tools.d.ts +4 -6
  297. package/dist/src/harness/tools.js +1 -1
  298. package/dist/src/harness/turn-tag-state.d.ts +4 -0
  299. package/dist/src/harness/turn-tag-state.js +1 -1
  300. package/dist/src/harness/types.d.ts +4 -15
  301. package/dist/src/internal/application/cache-metadata.js +1 -1
  302. package/dist/src/internal/application/compiled-artifacts.js +1 -1
  303. package/dist/src/internal/application/package.js +1 -1
  304. package/dist/src/internal/application/paths.js +1 -1
  305. package/dist/src/internal/authored-definition/schema-backed.js +1 -1
  306. package/dist/src/internal/authored-module-loader.d.ts +4 -1
  307. package/dist/src/internal/authored-module-loader.js +2 -2
  308. package/dist/src/internal/authored-module-map-loader.js +1 -1
  309. package/dist/src/internal/nitro/dev-runtime-artifacts.js +1 -1
  310. package/dist/src/internal/nitro/host/build-application.js +1 -1
  311. package/dist/src/internal/nitro/host/build-vercel-agent-summary.js +1 -1
  312. package/dist/src/internal/nitro/host/configure-nitro-routes.js +3 -3
  313. package/dist/src/internal/nitro/host/create-application-nitro.js +1 -1
  314. package/dist/src/internal/nitro/host/dev-authored-source-watcher.js +1 -1
  315. package/dist/src/internal/nitro/host/dev-watcher-log.d.ts +37 -0
  316. package/dist/src/internal/nitro/host/dev-watcher-log.js +1 -0
  317. package/dist/src/internal/nitro/host/ports.d.ts +8 -0
  318. package/dist/src/internal/nitro/host/ports.js +1 -0
  319. package/dist/src/internal/nitro/host/prepare-application-host.js +1 -1
  320. package/dist/src/internal/nitro/host/server-external-packages.d.ts +1 -1
  321. package/dist/src/internal/nitro/host/server-external-packages.js +1 -1
  322. package/dist/src/internal/nitro/host/start-development-server.js +1 -1
  323. package/dist/src/internal/nitro/host/start-production-server.js +1 -1
  324. package/dist/src/internal/nitro/routes/agent-info/build-agent-info-response-from-manifest.d.ts +5 -0
  325. package/dist/src/internal/nitro/routes/agent-info/build-agent-info-response-from-manifest.js +1 -0
  326. package/dist/src/internal/nitro/routes/agent-info/build-agent-info-response.d.ts +31 -2
  327. package/dist/src/internal/nitro/routes/agent-info/build-agent-info-response.js +1 -1
  328. package/dist/src/internal/nitro/routes/agent-info/load-agent-info-data.d.ts +13 -0
  329. package/dist/src/internal/nitro/routes/agent-info/load-agent-info-data.js +1 -1
  330. package/dist/src/internal/nitro/routes/info.d.ts +2 -2
  331. package/dist/src/internal/nitro/routes/info.js +1 -1
  332. package/dist/src/internal/workflow/queue-namespace.d.ts +5 -0
  333. package/dist/src/internal/workflow/queue-namespace.js +1 -0
  334. package/dist/src/internal/workflow-bundle/builder-support.js +2 -2
  335. package/dist/src/internal/workflow-bundle/builder.js +3 -5
  336. package/dist/src/internal/workflow-bundle/vercel-workflow-output.js +1 -1
  337. package/dist/src/internal/workflow-bundle/workflow-builders.d.ts +1 -1
  338. package/dist/src/internal/workflow-bundle/workflow-builders.js +1 -1
  339. package/dist/src/node_modules/.pnpm/@clack_core@1.3.1/node_modules/@clack/core/dist/index.js +4 -4
  340. package/dist/src/protocol/message.d.ts +15 -0
  341. package/dist/src/protocol/message.js +2 -2
  342. package/dist/src/public/channels/slack/api.d.ts +8 -0
  343. package/dist/src/public/channels/slack/api.js +1 -1
  344. package/dist/src/public/channels/slack/connections.d.ts +26 -18
  345. package/dist/src/public/channels/slack/connections.js +1 -1
  346. package/dist/src/public/channels/slack/defaults.d.ts +5 -2
  347. package/dist/src/public/channels/slack/defaults.js +1 -1
  348. package/dist/src/public/channels/slack/index.d.ts +1 -1
  349. package/dist/src/public/channels/slack/slackChannel.d.ts +65 -5
  350. package/dist/src/public/channels/slack/slackChannel.js +1 -1
  351. package/dist/src/public/channels/teams/defaults.js +1 -1
  352. package/dist/src/public/connections/errors.d.ts +8 -0
  353. package/dist/src/public/definitions/tool.d.ts +0 -33
  354. package/dist/src/public/next/index.d.ts +7 -1
  355. package/dist/src/public/next/index.js +1 -1
  356. package/dist/src/public/next/server.d.ts +1 -0
  357. package/dist/src/public/next/server.js +1 -1
  358. package/dist/src/public/nuxt/dev-server.js +1 -1
  359. package/dist/src/public/sveltekit/dev-server.js +1 -1
  360. package/dist/src/public/sveltekit/index.d.ts +1 -1
  361. package/dist/src/public/tools/defaults.d.ts +2 -4
  362. package/dist/src/public/tools/defaults.js +1 -1
  363. package/dist/src/public/tools/define-bash-tool.d.ts +3 -3
  364. package/dist/src/public/tools/define-bash-tool.js +1 -1
  365. package/dist/src/public/tools/define-read-file-tool.d.ts +0 -6
  366. package/dist/src/public/tools/define-read-file-tool.js +1 -1
  367. package/dist/src/public/tools/index.d.ts +2 -2
  368. package/dist/src/public/tools/index.js +1 -1
  369. package/dist/src/public/tools/internal.js +1 -1
  370. package/dist/src/runtime/actions/types.d.ts +11 -11
  371. package/dist/src/runtime/agent/mock-model-adapter.js +1 -1
  372. package/dist/src/runtime/agent/mock-model-fixtures.js +3 -2
  373. package/dist/src/runtime/agent/mock-model-skill-selection.js +3 -4
  374. package/dist/src/runtime/connections/callback-route.js +1 -1
  375. package/dist/src/runtime/connections/mcp-client.js +1 -1
  376. package/dist/src/runtime/connections/scoped-authorization.d.ts +21 -5
  377. package/dist/src/runtime/connections/scoped-authorization.js +1 -1
  378. package/dist/src/runtime/connections/types.d.ts +33 -0
  379. package/dist/src/runtime/connections/validate-authorization.js +1 -1
  380. package/dist/src/runtime/framework-tools/bash.d.ts +3 -3
  381. package/dist/src/runtime/framework-tools/bash.js +1 -1
  382. package/dist/src/runtime/framework-tools/connection-search-dynamic.d.ts +1 -1
  383. package/dist/src/runtime/framework-tools/connection-search-dynamic.js +1 -1
  384. package/dist/src/runtime/framework-tools/file-state.d.ts +3 -3
  385. package/dist/src/runtime/framework-tools/index.js +1 -1
  386. package/dist/src/runtime/framework-tools/read-file.js +2 -2
  387. package/dist/src/runtime/framework-tools/todo.d.ts +7 -0
  388. package/dist/src/runtime/framework-tools/todo.js +2 -2
  389. package/dist/src/runtime/governance/auth/http-basic.js +1 -1
  390. package/dist/src/runtime/input/types.d.ts +1 -1
  391. package/dist/src/runtime/resolve-tool.d.ts +2 -2
  392. package/dist/src/runtime/resolve-tool.js +1 -1
  393. package/dist/src/runtime/sandbox/keys.js +1 -1
  394. package/dist/src/runtime/session-callback-route.js +1 -1
  395. package/dist/src/runtime/types.d.ts +1 -7
  396. package/dist/src/services/dev-client/client-options.d.ts +8 -0
  397. package/dist/src/services/dev-client/client-options.js +1 -0
  398. package/dist/src/services/dev-client/runtime-artifacts.d.ts +13 -0
  399. package/dist/src/services/dev-client/runtime-artifacts.js +1 -0
  400. package/dist/src/services/dev-client.d.ts +13 -46
  401. package/dist/src/services/dev-client.js +1 -1
  402. package/dist/src/setup/ask.d.ts +205 -0
  403. package/dist/src/setup/ask.js +1 -0
  404. package/dist/src/setup/boxes/add-channels.d.ts +100 -16
  405. package/dist/src/setup/boxes/add-channels.js +2 -1
  406. package/dist/src/setup/boxes/add-connections.d.ts +13 -23
  407. package/dist/src/setup/boxes/add-connections.js +1 -1
  408. package/dist/src/setup/boxes/apply-ai-gateway-credential.d.ts +2 -2
  409. package/dist/src/setup/boxes/apply-ai-gateway-credential.js +1 -1
  410. package/dist/src/setup/boxes/deploy-project.d.ts +46 -14
  411. package/dist/src/setup/boxes/deploy-project.js +1 -1
  412. package/dist/src/setup/boxes/detect-ai-gateway.d.ts +10 -3
  413. package/dist/src/setup/boxes/detect-ai-gateway.js +1 -1
  414. package/dist/src/setup/boxes/link-project.d.ts +3 -3
  415. package/dist/src/setup/boxes/link-project.js +1 -1
  416. package/dist/src/setup/boxes/one-shot-next-steps.d.ts +18 -0
  417. package/dist/src/setup/boxes/one-shot-next-steps.js +2 -0
  418. package/dist/src/setup/boxes/preflight.d.ts +14 -6
  419. package/dist/src/setup/boxes/preflight.js +1 -1
  420. package/dist/src/setup/boxes/resolve-provisioning.d.ts +36 -8
  421. package/dist/src/setup/boxes/resolve-provisioning.js +1 -1
  422. package/dist/src/setup/boxes/resolve-target.d.ts +25 -8
  423. package/dist/src/setup/boxes/resolve-target.js +1 -1
  424. package/dist/src/setup/boxes/scaffold.d.ts +12 -6
  425. package/dist/src/setup/boxes/scaffold.js +1 -1
  426. package/dist/src/setup/boxes/select-channels.d.ts +38 -9
  427. package/dist/src/setup/boxes/select-channels.js +1 -1
  428. package/dist/src/setup/boxes/select-chat.d.ts +15 -11
  429. package/dist/src/setup/boxes/select-chat.js +1 -1
  430. package/dist/src/setup/boxes/select-connections.d.ts +30 -0
  431. package/dist/src/setup/boxes/select-connections.js +1 -0
  432. package/dist/src/setup/boxes/select-model.d.ts +18 -14
  433. package/dist/src/setup/boxes/select-model.js +1 -1
  434. package/dist/src/setup/boxes/select-setup-mode.d.ts +32 -0
  435. package/dist/src/setup/boxes/select-setup-mode.js +1 -0
  436. package/dist/src/setup/channel-add-conflicts.d.ts +28 -0
  437. package/dist/src/setup/channel-add-conflicts.js +1 -0
  438. package/dist/src/setup/cli/channel-setup-prompter.d.ts +23 -0
  439. package/dist/src/setup/cli/channel-setup-prompter.js +1 -0
  440. package/dist/src/setup/cli/connection-add-prompter.d.ts +8 -0
  441. package/dist/src/setup/cli/connection-add-prompter.js +1 -0
  442. package/dist/src/setup/{scaffold/cli → cli}/index.d.ts +4 -3
  443. package/dist/src/setup/cli/index.js +1 -0
  444. package/dist/src/setup/{scaffold/cli → cli}/prompt-ui.d.ts +39 -15
  445. package/dist/src/setup/cli/prompt-ui.js +5 -0
  446. package/dist/src/setup/{scaffold/cli → cli}/rail-log.d.ts +2 -0
  447. package/dist/src/setup/{scaffold/cli → cli}/rail-log.js +2 -2
  448. package/dist/src/setup/{scaffold/cli → cli}/select-component.d.ts +18 -3
  449. package/dist/src/setup/cli/select-component.js +1 -0
  450. package/dist/src/setup/cli/select-option-codec.d.ts +12 -0
  451. package/dist/src/setup/cli/select-option-codec.js +1 -0
  452. package/dist/src/setup/{scaffold/cli → cli}/select-state.d.ts +13 -1
  453. package/dist/src/setup/cli/select-state.js +1 -0
  454. package/dist/src/setup/cli/whimsy.d.ts +16 -0
  455. package/dist/src/setup/cli/whimsy.js +1 -0
  456. package/dist/src/setup/{scaffold/steps/setup-connection.d.ts → connection-connector.d.ts} +3 -2
  457. package/dist/src/setup/connection-connector.js +1 -0
  458. package/dist/src/setup/flows/channels.d.ts +43 -0
  459. package/dist/src/setup/flows/channels.js +1 -0
  460. package/dist/src/setup/flows/deploy.d.ts +40 -0
  461. package/dist/src/setup/flows/deploy.js +1 -0
  462. package/dist/src/setup/flows/in-project.d.ts +16 -0
  463. package/dist/src/setup/flows/in-project.js +1 -0
  464. package/dist/src/setup/flows/link.d.ts +43 -0
  465. package/dist/src/setup/flows/link.js +1 -0
  466. package/dist/src/setup/flows/model.d.ts +112 -0
  467. package/dist/src/setup/flows/model.js +1 -0
  468. package/dist/src/setup/flows/vercel.d.ts +31 -0
  469. package/dist/src/setup/flows/vercel.js +2 -0
  470. package/dist/src/setup/gateway-models.js +1 -1
  471. package/dist/src/setup/headless.d.ts +1 -1
  472. package/dist/src/setup/index.d.ts +10 -4
  473. package/dist/src/setup/index.js +1 -1
  474. package/dist/src/setup/onboarding.d.ts +7 -4
  475. package/dist/src/setup/onboarding.js +1 -1
  476. package/dist/src/setup/package-manager.d.ts +27 -0
  477. package/dist/src/setup/package-manager.js +1 -0
  478. package/dist/src/setup/primitives/index.d.ts +3 -0
  479. package/dist/src/setup/primitives/index.js +1 -0
  480. package/dist/src/setup/primitives/pm/bun.d.ts +10 -0
  481. package/dist/src/setup/primitives/pm/bun.js +1 -0
  482. package/dist/src/setup/primitives/pm/index.d.ts +11 -0
  483. package/dist/src/setup/primitives/pm/index.js +1 -0
  484. package/dist/src/setup/primitives/pm/npm.d.ts +10 -0
  485. package/dist/src/setup/primitives/pm/npm.js +1 -0
  486. package/dist/src/setup/primitives/pm/pnpm.d.ts +27 -0
  487. package/dist/src/setup/primitives/pm/pnpm.js +8 -0
  488. package/dist/src/setup/primitives/pm/run.d.ts +23 -0
  489. package/dist/src/setup/primitives/pm/run.js +1 -0
  490. package/dist/src/setup/primitives/pm/shared.d.ts +8 -0
  491. package/dist/src/setup/primitives/pm/shared.js +1 -0
  492. package/dist/src/setup/primitives/pm/types.d.ts +37 -0
  493. package/dist/src/setup/primitives/pm/types.js +1 -0
  494. package/dist/src/setup/primitives/pm/yarn.d.ts +10 -0
  495. package/dist/src/setup/primitives/pm/yarn.js +1 -0
  496. package/dist/src/setup/primitives/run-pnpm.d.ts +1 -0
  497. package/dist/src/setup/primitives/run-pnpm.js +1 -0
  498. package/dist/src/setup/{scaffold/primitives → primitives}/run-vercel.d.ts +7 -0
  499. package/dist/src/setup/primitives/run-vercel.js +1 -0
  500. package/dist/src/setup/project-name.d.ts +4 -0
  501. package/dist/src/setup/project-name.js +1 -0
  502. package/dist/src/setup/project-resolution.d.ts +54 -0
  503. package/dist/src/setup/project-resolution.js +1 -0
  504. package/dist/src/setup/prompter.d.ts +52 -4
  505. package/dist/src/setup/prompter.js +1 -1
  506. package/dist/src/setup/quit-guard.d.ts +1 -1
  507. package/dist/src/setup/run-vercel-link.d.ts +1 -1
  508. package/dist/src/setup/run-vercel-link.js +1 -1
  509. package/dist/src/setup/runner.d.ts +5 -4
  510. package/dist/src/setup/runner.js +1 -1
  511. package/dist/src/setup/scaffold/channels-catalog.d.ts +3 -3
  512. package/dist/src/setup/scaffold/channels-catalog.js +1 -1
  513. package/dist/src/setup/scaffold/create/add-to-project.d.ts +26 -0
  514. package/dist/src/setup/scaffold/create/add-to-project.js +1 -0
  515. package/dist/src/setup/scaffold/create/project.d.ts +54 -0
  516. package/dist/src/setup/scaffold/create/project.js +80 -0
  517. package/dist/src/setup/scaffold/index.d.ts +4 -4
  518. package/dist/src/setup/scaffold/index.js +1 -1
  519. package/dist/src/setup/scaffold/{channels.d.ts → update/channels.d.ts} +11 -0
  520. package/dist/src/setup/scaffold/update/channels.js +7 -0
  521. package/dist/src/setup/scaffold/{connections.d.ts → update/connections.d.ts} +1 -1
  522. package/dist/src/setup/scaffold/update/connections.js +21 -0
  523. package/dist/src/setup/scaffold/version-tokens.d.ts +11 -0
  524. package/dist/src/setup/scaffold/version-tokens.js +1 -0
  525. package/dist/src/setup/{scaffold/steps/setup-slackbot.d.ts → slackbot.d.ts} +24 -20
  526. package/dist/src/setup/slackbot.js +1 -0
  527. package/dist/src/setup/state.d.ts +62 -15
  528. package/dist/src/setup/state.js +1 -1
  529. package/dist/src/setup/step.d.ts +9 -18
  530. package/dist/src/setup/vercel-project.d.ts +15 -8
  531. package/dist/src/setup/vercel-project.js +1 -1
  532. package/dist/src/shared/agent-definition.d.ts +5 -3
  533. package/dist/src/shared/default-agent-model.d.ts +5 -0
  534. package/dist/src/shared/default-agent-model.js +1 -0
  535. package/dist/src/source-change/apply-model-name.d.ts +25 -0
  536. package/dist/src/source-change/apply-model-name.js +2 -0
  537. package/dist/src/source-change/static-source-change.d.ts +36 -0
  538. package/dist/src/source-change/static-source-change.js +1 -0
  539. package/dist/src/svelte/index.js +1 -1
  540. package/dist/src/svelte/use-eve-agent.js +1 -1
  541. package/dist/src/vue/index.js +1 -1
  542. package/dist/src/vue/use-eve-agent.js +1 -1
  543. package/package.json +22 -42
  544. package/dist/docs/evals-v2-plan.md +0 -939
  545. package/dist/docs/public/advanced/dev-tui.md +0 -52
  546. package/dist/docs/public/advanced/evals.md +0 -158
  547. package/dist/docs/public/reference/faqs.md +0 -48
  548. package/dist/src/cli/commands/setup.d.ts +0 -55
  549. package/dist/src/cli/commands/setup.js +0 -1
  550. package/dist/src/cli/dev/repl/input-requests.d.ts +0 -38
  551. package/dist/src/cli/dev/repl/input-requests.js +0 -1
  552. package/dist/src/cli/dev/repl/input.d.ts +0 -19
  553. package/dist/src/cli/dev/repl/input.js +0 -1
  554. package/dist/src/cli/dev/repl/repl.d.ts +0 -62
  555. package/dist/src/cli/dev/repl/repl.js +0 -2
  556. package/dist/src/cli/dev/repl/terminal.d.ts +0 -21
  557. package/dist/src/cli/dev/repl/terminal.js +0 -5
  558. package/dist/src/compiled/_chunks/workflow/resume-hook-0Zk0zSvq.js +0 -12
  559. package/dist/src/compiled/_chunks/workflow/sleep-DXZr2BgM.js +0 -1
  560. package/dist/src/compiled/_chunks/workflow/symbols-BWCAoPHE.js +0 -48
  561. package/dist/src/evals/checks/checks.d.ts +0 -66
  562. package/dist/src/evals/checks/checks.js +0 -2
  563. package/dist/src/evals/checks/index.d.ts +0 -21
  564. package/dist/src/evals/checks/index.js +0 -1
  565. package/dist/src/evals/checks/match.js +0 -1
  566. package/dist/src/evals/define-eval-suite.d.ts +0 -18
  567. package/dist/src/evals/define-eval-suite.js +0 -1
  568. package/dist/src/evals/runner/execute-case.d.ts +0 -23
  569. package/dist/src/evals/runner/execute-case.js +0 -1
  570. package/dist/src/evals/runner/execute-suite.d.ts +0 -24
  571. package/dist/src/evals/runner/execute-suite.js +0 -1
  572. package/dist/src/evals/scorers/autoevals-client.js +0 -2
  573. package/dist/src/evals/scorers/autoevals.d.ts +0 -58
  574. package/dist/src/evals/scorers/autoevals.js +0 -1
  575. package/dist/src/evals/scorers/json.d.ts +0 -10
  576. package/dist/src/evals/scorers/json.js +0 -1
  577. package/dist/src/evals/scorers/model-marker.d.ts +0 -12
  578. package/dist/src/evals/scorers/model-marker.js +0 -1
  579. package/dist/src/evals/scorers/run.d.ts +0 -24
  580. package/dist/src/evals/scorers/run.js +0 -1
  581. package/dist/src/evals/scorers/sql.d.ts +0 -9
  582. package/dist/src/evals/scorers/sql.js +0 -1
  583. package/dist/src/evals/scorers/text.d.ts +0 -18
  584. package/dist/src/evals/scorers/text.js +0 -1
  585. package/dist/src/evals/scores/index.d.ts +0 -72
  586. package/dist/src/evals/scores/index.js +0 -1
  587. package/dist/src/execution/tool-compaction.d.ts +0 -9
  588. package/dist/src/execution/tool-compaction.js +0 -1
  589. package/dist/src/services/dev-client/stream.d.ts +0 -5
  590. package/dist/src/services/dev-client/stream.js +0 -1
  591. package/dist/src/services/dev-client/url.d.ts +0 -11
  592. package/dist/src/services/dev-client/url.js +0 -1
  593. package/dist/src/setup/channel-setup-prompter.d.ts +0 -8
  594. package/dist/src/setup/channel-setup-prompter.js +0 -1
  595. package/dist/src/setup/scaffold/channels.js +0 -7
  596. package/dist/src/setup/scaffold/cli/channel-add-prompter.d.ts +0 -12
  597. package/dist/src/setup/scaffold/cli/channel-add-prompter.js +0 -1
  598. package/dist/src/setup/scaffold/cli/channel-setup-prompter.d.ts +0 -56
  599. package/dist/src/setup/scaffold/cli/connection-add-prompter.d.ts +0 -44
  600. package/dist/src/setup/scaffold/cli/connection-add-prompter.js +0 -1
  601. package/dist/src/setup/scaffold/cli/index.js +0 -1
  602. package/dist/src/setup/scaffold/cli/prompt-ui.js +0 -5
  603. package/dist/src/setup/scaffold/cli/select-component.js +0 -1
  604. package/dist/src/setup/scaffold/cli/select-state.js +0 -1
  605. package/dist/src/setup/scaffold/connections.js +0 -21
  606. package/dist/src/setup/scaffold/pnpm-workspace.d.ts +0 -3
  607. package/dist/src/setup/scaffold/pnpm-workspace.js +0 -11
  608. package/dist/src/setup/scaffold/primitives/detect-deployment.d.ts +0 -13
  609. package/dist/src/setup/scaffold/primitives/detect-deployment.js +0 -1
  610. package/dist/src/setup/scaffold/primitives/index.d.ts +0 -3
  611. package/dist/src/setup/scaffold/primitives/index.js +0 -1
  612. package/dist/src/setup/scaffold/primitives/pnpm-invocation.d.ts +0 -12
  613. package/dist/src/setup/scaffold/primitives/pnpm-invocation.js +0 -1
  614. package/dist/src/setup/scaffold/primitives/run-pnpm.d.ts +0 -17
  615. package/dist/src/setup/scaffold/primitives/run-pnpm.js +0 -1
  616. package/dist/src/setup/scaffold/primitives/run-vercel.js +0 -1
  617. package/dist/src/setup/scaffold/project.d.ts +0 -21
  618. package/dist/src/setup/scaffold/project.js +0 -80
  619. package/dist/src/setup/scaffold/steps/deploy-to-vercel.d.ts +0 -17
  620. package/dist/src/setup/scaffold/steps/deploy-to-vercel.js +0 -1
  621. package/dist/src/setup/scaffold/steps/index.d.ts +0 -4
  622. package/dist/src/setup/scaffold/steps/index.js +0 -1
  623. package/dist/src/setup/scaffold/steps/project-resolution.d.ts +0 -19
  624. package/dist/src/setup/scaffold/steps/project-resolution.js +0 -1
  625. package/dist/src/setup/scaffold/steps/run-add-connection.d.ts +0 -40
  626. package/dist/src/setup/scaffold/steps/run-add-connection.js +0 -1
  627. package/dist/src/setup/scaffold/steps/run-add-to-agent.d.ts +0 -81
  628. package/dist/src/setup/scaffold/steps/run-add-to-agent.js +0 -2
  629. package/dist/src/setup/scaffold/steps/setup-connection.js +0 -1
  630. package/dist/src/setup/scaffold/steps/setup-slackbot.js +0 -1
  631. /package/dist/docs/public/{frontend → guides/frontend}/meta.json +0 -0
  632. /package/dist/docs/public/{advanced → guides}/remote-agents.md +0 -0
  633. /package/dist/src/{setup/scaffold/cli/channel-setup-prompter.js → cli/dev/tui/setup-flow.js} +0 -0
  634. /package/dist/src/evals/{scorers/autoevals-client.d.ts → autoevals-client.d.ts} +0 -0
  635. /package/dist/src/setup/{scaffold/cli → cli}/command-output.d.ts +0 -0
  636. /package/dist/src/setup/{scaffold/cli → cli}/command-output.js +0 -0
  637. /package/dist/src/setup/{scaffold/human-action.d.ts → human-action.d.ts} +0 -0
  638. /package/dist/src/setup/{scaffold/human-action.js → human-action.js} +0 -0
  639. /package/dist/src/setup/{scaffold/primitives → primitives}/process-output.d.ts +0 -0
  640. /package/dist/src/setup/{scaffold/primitives → primitives}/process-output.js +0 -0
  641. /package/dist/src/setup/scaffold/{web-template.d.ts → create/web-template.d.ts} +0 -0
  642. /package/dist/src/setup/scaffold/{web-template.js → create/web-template.js} +0 -0
  643. /package/dist/src/setup/scaffold/{module-files.d.ts → update/module-files.d.ts} +0 -0
  644. /package/dist/src/setup/scaffold/{module-files.js → update/module-files.js} +0 -0
  645. /package/dist/src/setup/scaffold/{package-json.d.ts → update/package-json.d.ts} +0 -0
  646. /package/dist/src/setup/scaffold/{package-json.js → update/package-json.js} +0 -0
  647. /package/dist/src/setup/scaffold/{primitives → update}/update-connection-connector.d.ts +0 -0
  648. /package/dist/src/setup/scaffold/{primitives → update}/update-connection-connector.js +0 -0
  649. /package/dist/src/setup/scaffold/{primitives → update}/update-slack-channel.d.ts +0 -0
  650. /package/dist/src/setup/scaffold/{primitives → update}/update-slack-channel.js +0 -0
@@ -1,5 +1,6 @@
1
1
  import type { ModuleSourceRef } from "#shared/source-ref.js";
2
2
  import type { CompiledToolDefinition, CompiledDynamicToolDefinition } from "#compiler/manifest.js";
3
+ import { type ModuleBackedDefinitionLoadOptions } from "#compiler/normalize-helpers.js";
3
4
  /**
4
5
  * Compiled tool entry produced from one authored `tools/*.ts` file.
5
6
  *
@@ -31,4 +32,4 @@ export type CompiledToolEntry = {
31
32
  * directories into a slug-safe single segment. Authored `name` fields
32
33
  * are rejected by the normalizer.
33
34
  */
34
- export declare function compileToolEntry(agentRoot: string, source: ModuleSourceRef): Promise<CompiledToolEntry>;
35
+ export declare function compileToolEntry(agentRoot: string, source: ModuleSourceRef, options?: ModuleBackedDefinitionLoadOptions): Promise<CompiledToolEntry>;
@@ -1 +1 @@
1
- import{stripLogicalPathExtension}from"#discover/filesystem.js";import{loadModuleBackedDefinition}from"#compiler/normalize-helpers.js";import{normalizeToolDefinition}from"#internal/authored-definition/schema-backed.js";async function compileToolEntry(e,t){let n=normalizeToolDefinition(await loadModuleBackedDefinition({agentRoot:e,kind:`tool`,source:t}),`Expected the tool export "${t.exportName??`default`}" from "${t.logicalPath}" to match the public Eve shape.`),r=stripLogicalPathExtension(t.logicalPath).replace(/^tools\//,``).replaceAll(`/`,`-`);return n.kind===`disabled`?{kind:`disabled`,name:r}:n.kind===`enable-workflow`?{kind:`enable-workflow`}:n.kind===`dynamic-tool`?{kind:`dynamic-tool`,definition:{eventNames:[...n.eventNames],exportName:t.exportName,logicalPath:t.logicalPath,slug:r,sourceId:t.sourceId,sourceKind:`module`}}:{kind:`tool`,definition:{description:n.definition.description,exportName:t.exportName,inputSchema:n.definition.inputSchema??null,logicalPath:t.logicalPath,name:r,outputSchema:n.definition.outputSchema,sourceId:t.sourceId,sourceKind:`module`}}}export{compileToolEntry};
1
+ import{stripLogicalPathExtension}from"#discover/filesystem.js";import{loadModuleBackedDefinition}from"#compiler/normalize-helpers.js";import{normalizeToolDefinition}from"#internal/authored-definition/schema-backed.js";async function compileToolEntry(e,t,n={}){let r=normalizeToolDefinition(await loadModuleBackedDefinition({agentRoot:e,externalDependencies:n.externalDependencies,kind:`tool`,source:t}),`Expected the tool export "${t.exportName??`default`}" from "${t.logicalPath}" to match the public Eve shape.`),i=stripLogicalPathExtension(t.logicalPath).replace(/^tools\//,``).replaceAll(`/`,`-`);return r.kind===`disabled`?{kind:`disabled`,name:i}:r.kind===`enable-workflow`?{kind:`enable-workflow`}:r.kind===`dynamic-tool`?{kind:`dynamic-tool`,definition:{eventNames:[...r.eventNames],exportName:t.exportName,logicalPath:t.logicalPath,slug:i,sourceId:t.sourceId,sourceKind:`module`}}:{kind:`tool`,definition:{description:r.definition.description,exportName:t.exportName,inputSchema:r.definition.inputSchema??null,logicalPath:t.logicalPath,name:i,outputSchema:r.definition.outputSchema,sourceId:t.sourceId,sourceKind:`module`}}}export{compileToolEntry};
@@ -1 +1 @@
1
- import{join,posix}from"node:path";import{createHash}from"node:crypto";import{cp,mkdir,readFile,readdir,rm}from"node:fs/promises";import{normalizeLogicalPath}from"#discover/filesystem.js";import{ROOT_COMPILED_AGENT_NODE_ID,deriveResourceRootEntries}from"#compiler/manifest.js";import{normalizeSkillPackage,writeSkillPackageDirectory}from"#shared/skill-package.js";const RESOURCES_DIRECTORY=`workspace-resources`;async function materializeWorkspaceResources(t){let n=join(t.compileDirectoryPath,RESOURCES_DIRECTORY);await rm(n,{force:!0,recursive:!0});let r=await materializeNode({nodeId:ROOT_COMPILED_AGENT_NODE_ID,resourcesRoot:n,manifest:t.manifest}),i=await Promise.all(t.manifest.subagents.map(async e=>({...e,agent:await materializeNode({nodeId:e.nodeId,resourcesRoot:n,manifest:e.agent})})));return{...r,kind:t.manifest.kind,subagentEdges:t.manifest.subagentEdges,subagents:i,version:t.manifest.version}}function createResourceRoot(t,n,r){return{contentHash:r,logicalPath:normalizeLogicalPath(join(RESOURCES_DIRECTORY,n)),rootEntries:deriveResourceRootEntries({sandboxWorkspaces:t.sandboxWorkspaces,skills:t.skills})}}async function materializeNode(t){for(let e of t.manifest.sandboxWorkspaces)if(e.rootEntries.some(e=>e===`skills/`||e===`skills`))throw Error(`Sandbox workspace "${e.logicalPath}" cannot define "skills" because Eve manages that workspace entry.`);let n=join(t.resourcesRoot,t.nodeId);await mkdir(n,{recursive:!0});for(let e of t.manifest.sandboxWorkspaces)await cp(e.sourcePath,n,{recursive:!0});for(let e of t.manifest.skills)await materializeSkill({nodeRoot:n,skill:e});let a=await heveWorkspaceResourceRoot(n);return{...t.manifest,skills:t.manifest.skills.map(stripSkillPackageFiles),workspaceResourceRoot:createResourceRoot(t.manifest,t.nodeId,a)}}async function materializeSkill(t){let n=join(t.nodeRoot,`skills`,t.skill.name);if(t.skill.sourceKind===`skill-package`){await cp(t.skill.rootPath,n,{recursive:!0});return}await writeSkillPackageDirectory({rootPath:t.nodeRoot,skill:normalizeSkillPackage(t.skill)})}function stripSkillPackageFiles(e){let{files:t,...n}=e;return n}async function heveWorkspaceResourceRoot(e){let t=await listWorkspaceResourceFiles({logicalDirectoryPath:`.`,sourceDirectoryPath:e});t.sort((e,t)=>e.logicalPath.localeCompare(t.logicalPath));let r=createHash(`sha256`);r.update(`eve-workspace-resource-root-v1\0`);for(let e of t){let t=await readFile(e.sourcePath);r.update(e.logicalPath),r.update(`\0`),r.update(String(t.byteLength)),r.update(`\0`),r.update(t),r.update(`\0`)}return r.digest(`hex`)}async function listWorkspaceResourceFiles(n){let r=[],i=await readdir(n.sourceDirectoryPath,{withFileTypes:!0});for(let a of i){if(!a.isDirectory()&&!a.isFile())continue;let i=join(n.sourceDirectoryPath,a.name),o=posix.join(n.logicalDirectoryPath,a.name);if(a.isDirectory()){r.push(...await listWorkspaceResourceFiles({logicalDirectoryPath:o,sourceDirectoryPath:i}));continue}r.push({logicalPath:o,sourcePath:i})}return r}export{materializeWorkspaceResources};
1
+ import{join,posix}from"node:path";import{cp,mkdir,readFile,readdir,rm}from"node:fs/promises";import{createHash}from"node:crypto";import{normalizeLogicalPath}from"#discover/filesystem.js";import{ROOT_COMPILED_AGENT_NODE_ID,deriveResourceRootEntries}from"#compiler/manifest.js";import{normalizeSkillPackage,writeSkillPackageDirectory}from"#shared/skill-package.js";const RESOURCES_DIRECTORY=`workspace-resources`;async function materializeWorkspaceResources(t){let n=join(t.compileDirectoryPath,RESOURCES_DIRECTORY);await rm(n,{force:!0,recursive:!0});let r=await materializeNode({nodeId:ROOT_COMPILED_AGENT_NODE_ID,resourcesRoot:n,manifest:t.manifest}),i=await Promise.all(t.manifest.subagents.map(async e=>({...e,agent:await materializeNode({nodeId:e.nodeId,resourcesRoot:n,manifest:e.agent})})));return{...r,kind:t.manifest.kind,subagentEdges:t.manifest.subagentEdges,subagents:i,version:t.manifest.version}}function createResourceRoot(t,n,r){return{contentHash:r,logicalPath:normalizeLogicalPath(join(RESOURCES_DIRECTORY,n)),rootEntries:deriveResourceRootEntries({sandboxWorkspaces:t.sandboxWorkspaces,skills:t.skills})}}async function materializeNode(t){for(let e of t.manifest.sandboxWorkspaces)if(e.rootEntries.some(e=>e===`skills/`||e===`skills`))throw Error(`Sandbox workspace "${e.logicalPath}" cannot define "skills" because Eve manages that workspace entry.`);let i=join(t.resourcesRoot,t.nodeId);await mkdir(i,{recursive:!0});for(let e of t.manifest.sandboxWorkspaces)await cp(e.sourcePath,i,{recursive:!0});for(let e of t.manifest.skills)await materializeSkill({nodeRoot:i,skill:e});let a=await hashWorkspaceResourceRoot(i);return{...t.manifest,skills:t.manifest.skills.map(stripSkillPackageFiles),workspaceResourceRoot:createResourceRoot(t.manifest,t.nodeId,a)}}async function materializeSkill(t){let r=join(t.nodeRoot,`skills`,t.skill.name);if(t.skill.sourceKind===`skill-package`){await cp(t.skill.rootPath,r,{recursive:!0});return}await writeSkillPackageDirectory({rootPath:t.nodeRoot,skill:normalizeSkillPackage(t.skill)})}function stripSkillPackageFiles(e){let{files:t,...n}=e;return n}async function hashWorkspaceResourceRoot(e){let t=await listWorkspaceResourceFiles({logicalDirectoryPath:`.`,sourceDirectoryPath:e});t.sort((e,t)=>e.logicalPath.localeCompare(t.logicalPath));let n=createHash(`sha256`);n.update(`eve-workspace-resource-root-v1\0`);for(let e of t){let t=await readFile(e.sourcePath);n.update(e.logicalPath),n.update(`\0`),n.update(String(t.byteLength)),n.update(`\0`),n.update(t),n.update(`\0`)}return n.digest(`hex`)}async function listWorkspaceResourceFiles(n){let r=[],i=await readdir(n.sourceDirectoryPath,{withFileTypes:!0});for(let a of i){if(!a.isDirectory()&&!a.isFile())continue;let i=join(n.sourceDirectoryPath,a.name),o=posix.join(n.logicalDirectoryPath,a.name);if(a.isDirectory()){r.push(...await listWorkspaceResourceFiles({logicalDirectoryPath:o,sourceDirectoryPath:i}));continue}r.push({logicalPath:o,sourcePath:i})}return r}export{materializeWorkspaceResources};
@@ -4,4 +4,4 @@ import type { ContextReader } from "#context/provider.js";
4
4
  * context. The bundle is already resolved to the correct node (root or
5
5
  * subagent) at run start.
6
6
  */
7
- export declare function getActiveRuntimeNode(ctx: ContextReader): import("../runtime/graph.js").ResolvedRuntimeAgentNode;
7
+ export declare function getActiveRuntimeNode(ctx: ContextReader): import("../runtime/graph.ts").ResolvedRuntimeAgentNode;
@@ -0,0 +1,43 @@
1
+ import type { AssertionHandle, AssertionResult, AssertionSeverity, EveEvalTaskResult } from "#evals/types.js";
2
+ /**
3
+ * Outcome of evaluating one assertion: a 0–1 score (boolean assertions use
4
+ * exactly 0 or 1) with optional human-readable detail and metadata.
5
+ */
6
+ export interface AssertionOutcome {
7
+ readonly score: number;
8
+ readonly message?: string;
9
+ readonly metadata?: Readonly<Record<string, unknown>>;
10
+ }
11
+ /**
12
+ * A run-level assertion (e.g. `t.completed()`), evaluated lazily against the
13
+ * final task result after `test(t)` returns. The evaluation is deferred so
14
+ * the assertion always sees the complete run regardless of call order.
15
+ */
16
+ export interface RunAssertion {
17
+ readonly name: string;
18
+ evaluate(result: EveEvalTaskResult): AssertionOutcome | Promise<AssertionOutcome>;
19
+ }
20
+ /**
21
+ * Collects the assertions recorded by an eval's `test(t)`. Run-level
22
+ * assertions register a deferred spec; value/judge assertions evaluate their
23
+ * captured value immediately (the value is ephemeral) and register the pending
24
+ * promise. {@link finalize} resolves everything against the final result and
25
+ * produces the ordered {@link AssertionResult} list the verdict reads.
26
+ */
27
+ export declare class AssertionCollector {
28
+ #private;
29
+ /** Register a run-level assertion evaluated against the final result. */
30
+ recordRun(spec: RunAssertion, severity?: AssertionSeverity): AssertionHandle;
31
+ /** Register a value/judge assertion, evaluating the captured value now. */
32
+ recordValue(input: {
33
+ readonly name: string;
34
+ readonly severity: AssertionSeverity;
35
+ readonly threshold?: number;
36
+ readonly score: () => Promise<AssertionOutcome>;
37
+ }): AssertionHandle;
38
+ /**
39
+ * Awaits every pending value/judge assertion, evaluates the deferred
40
+ * run-level assertions against `result`, and returns the recorded results.
41
+ */
42
+ finalize(result: EveEvalTaskResult): Promise<readonly AssertionResult[]>;
43
+ }
@@ -0,0 +1 @@
1
+ import{toErrorMessage}from"#shared/errors.js";var AssertionCollector=class{#e=[];#t=[];recordRun(e,t=`gate`){let n={name:e.name,severity:t,threshold:void 0,kind:`deferred`,spec:e,score:0,failed:!1};return this.#e.push(n),makeHandle(n,Promise.resolve())}recordValue(t){let n={name:t.name,severity:t.severity,threshold:t.threshold,kind:`resolved`,score:0,failed:!1};this.#e.push(n);let r=t.score().then(e=>{n.score=e.score,n.message=e.message,n.metadata=e.metadata}).catch(t=>{n.score=0,n.severity=`gate`,n.threshold=void 0,n.message=toErrorMessage(t),n.failed=!0});return this.#t.push(r),makeHandle(n,r)}async finalize(e){await Promise.all(this.#t);let t=[];for(let n of this.#e){if(n.kind===`deferred`&&n.spec!==void 0){let t=await n.spec.evaluate(e);n.score=t.score,n.message=t.message,n.metadata=t.metadata}t.push({name:n.name,score:n.score,severity:n.severity,threshold:n.threshold,passed:computePassed(n.severity,n.threshold,n.score,n.failed),message:n.message,metadata:n.metadata})}return t}};function computePassed(e,t,n,r){if(r)return!1;let i=t??(e===`gate`?1:void 0);return i===void 0||n>=i}function makeHandle(e,t){let n={gate(t){return e.severity=`gate`,e.threshold=t,n},soft(t){return e.severity=`soft`,e.threshold=t,n},atLeast(t){return e.severity=`soft`,e.threshold=t,n},then(e,n){return t.then(e,n)}};return n}export{AssertionCollector};
@@ -0,0 +1,72 @@
1
+ import type { StandardSchemaV1 } from "#compiled/@standard-schema/spec/index.js";
2
+ import type { HandleMessageStreamEvent } from "#protocol/message.js";
3
+ import { type EveEvalSubagentCallMatchOptions, type EveEvalToolCallMatchOptions } from "#evals/match.js";
4
+ import type { RunAssertion } from "#evals/assertions/collector.js";
5
+ /**
6
+ * Asserts the run ran to completion: it did not fail and did not park on an
7
+ * unanswered HITL input request.
8
+ */
9
+ export declare function completed(): RunAssertion;
10
+ /**
11
+ * Asserts the run ended parked on HITL input. For approval gates and
12
+ * ask-question flows.
13
+ */
14
+ export declare function waiting(): RunAssertion;
15
+ /**
16
+ * Asserts the run did not fail: terminal status is not `"failed"` and no
17
+ * `turn.failed` / `step.failed` events were emitted. Parked runs pass; use
18
+ * {@link completed} to also reject parking.
19
+ */
20
+ export declare function didNotFail(): RunAssertion;
21
+ /**
22
+ * Asserts the joined assistant message text contains `token` (substring for
23
+ * strings, `test` for RegExps).
24
+ */
25
+ export declare function messageIncludes(token: string | RegExp): RunAssertion;
26
+ /**
27
+ * Asserts a tool call with `name` happened. Options constrain the call
28
+ * further: `input` partial-deep-matches, `output` matches the result,
29
+ * `isError` constrains error state, and `times` requires an exact count.
30
+ */
31
+ export declare function calledTool(name: string, options?: EveEvalToolCallMatchOptions): RunAssertion;
32
+ /**
33
+ * Asserts no tool call with `name` happened.
34
+ */
35
+ export declare function notCalledTool(name: string): RunAssertion;
36
+ /**
37
+ * Asserts the named tools were called in the given order (subsequence match:
38
+ * other calls may interleave).
39
+ */
40
+ export declare function toolOrder(names: readonly string[]): RunAssertion;
41
+ /**
42
+ * Asserts the run made no tool calls at all.
43
+ */
44
+ export declare function usedNoTools(): RunAssertion;
45
+ /**
46
+ * Asserts the run made at most `max` tool calls.
47
+ */
48
+ export declare function maxToolCalls(max: number): RunAssertion;
49
+ /**
50
+ * Asserts no action result (tool, subagent, or skill) reported a failure.
51
+ */
52
+ export declare function noFailedActions(): RunAssertion;
53
+ /**
54
+ * Asserts a subagent delegation to `name` occurred. `remoteUrl` matches the
55
+ * `subagent.called` remote metadata, `output` matches the `subagent.completed`
56
+ * output.
57
+ */
58
+ export declare function calledSubagent(name: string, options?: EveEvalSubagentCallMatchOptions): RunAssertion;
59
+ /**
60
+ * Escape hatch: asserts an arbitrary predicate over the full typed event
61
+ * stream. `label` names the assertion in reports.
62
+ */
63
+ export declare function event(predicate: (events: readonly HandleMessageStreamEvent[]) => boolean, label: string): RunAssertion;
64
+ /**
65
+ * Asserts `result.output` (the final assistant message) deep-equals `value`.
66
+ */
67
+ export declare function outputEquals(value: unknown): RunAssertion;
68
+ /**
69
+ * Asserts `result.output` validates against a Standard Schema (e.g. a Zod
70
+ * schema).
71
+ */
72
+ export declare function outputMatches(schema: StandardSchemaV1): RunAssertion;
@@ -0,0 +1,2 @@
1
+ import{deepEquals,subagentCallMatches,testRegExp,toolCallMatches}from"#evals/match.js";const PASS={score:1},fail=(e,t)=>({score:0,message:e,metadata:t});function completed(){return{name:`completed`,evaluate(e){return e.status===`failed`?fail(failureDetail(`run failed`,e.derived.failureCode)):e.derived.parked?fail(`run parked on ${e.derived.inputRequests.length} unanswered input request(s)`):PASS}}}function waiting(){return{name:`waiting`,evaluate(e){return e.derived.parked?PASS:fail(`expected the run to park on HITL input; it ended "${e.status}" with no pending requests`)}}}function didNotFail(){return{name:`didNotFail`,evaluate(e){if(e.status===`failed`)return fail(failureDetail(`run failed`,e.derived.failureCode));let t=e.events.find(e=>e.type===`turn.failed`||e.type===`step.failed`);return t===void 0?PASS:fail(`${t.type} (${t.data.code}): ${t.data.message}`)}}}function messageIncludes(e){return{name:`messageIncludes(${String(e)})`,evaluate(t){let r=joinCompletedMessages(t.events);return(typeof e==`string`?r.includes(e):testRegExp(e,r))?PASS:fail(`assistant messages did not include ${String(e)}; got: ${truncate(r)}`)}}}function calledTool(e,t={}){return{name:`calledTool(${e})`,evaluate(n){let i=n.derived.toolCalls.filter(t=>t.name===e),a=i.filter(e=>toolCallMatches(e,t));if(t.times===void 0?a.length>0:a.length===t.times)return{score:1,metadata:{matchingCalls:a.length}};let o=i.length>0?`observed ${e} calls: ${i.map(e=>truncate(JSON.stringify(e.input))).join(`, `)}`:`observed tools: [${n.derived.toolCalls.map(e=>e.name).join(`, `)}]`;return fail(`${t.times===void 0?`expected a matching call to "${e}"`:`expected exactly ${t.times} matching call(s), found ${a.length}`}; ${o}`)}}}function notCalledTool(e){return{name:`notCalledTool(${e})`,evaluate(t){let n=t.derived.toolCalls.filter(t=>t.name===e).length;return n===0?PASS:fail(`"${e}" was called ${n} time(s)`)}}}function toolOrder(e){return{name:`toolOrder(${e.join(` → `)})`,evaluate(t){let n=t.derived.toolCalls.map(e=>e.name),r=0;for(let t of n)if(t===e[r]&&(r+=1),r===e.length)break;return r===e.length?PASS:fail(`missing "${e[r]}" after [${e.slice(0,r).join(`, `)}]; observed order: [${n.join(`, `)}]`)}}}function usedNoTools(){return{name:`usedNoTools`,evaluate(e){let t=e.derived.toolCallCount;return t===0?PASS:fail(`expected no tool calls, got ${t}`,{toolCallCount:t})}}}function maxToolCalls(e){return{name:`maxToolCalls(${e})`,evaluate(t){let n=t.derived.toolCallCount;return n<=e?PASS:fail(`expected at most ${e} tool calls, got ${n}`,{maxAllowed:e,toolCallCount:n})}}}function noFailedActions(){return{name:`noFailedActions`,evaluate(e){let t=e.events.filter(e=>e.type===`action.result`&&(e.data.status===`failed`||e.data.result.isError===!0));if(t.length===0)return PASS;let n=t.map(e=>e.data.result.kind===`tool-result`?e.data.result.toolName:e.data.result.kind);return fail(`${t.length} failed action(s): ${n.join(`, `)}`)}}}function calledSubagent(e,n={}){return{name:`calledSubagent(${e})`,evaluate(r){let a=r.derived.subagentCalls.filter(t=>t.name===e);return a.filter(e=>subagentCallMatches(e,n)).length>0?PASS:a.length===0?fail(`subagent "${e}" was never called; observed: [${r.derived.subagentCalls.map(e=>e.name).join(`, `)}]`,{observedSubagentCalls:r.derived.subagentCalls}):fail(`subagent "${e}" was called but no call matched the constraints`,{observedSubagentCalls:a})}}}function event(e,t){return{name:`event(${t})`,evaluate(n){return e(n.events)?PASS:fail(`event predicate "${t}" did not hold`)}}}function outputEquals(t){return{name:`outputEquals`,evaluate(n){return deepEquals(n.output,t)?PASS:fail(`output ${truncate(JSON.stringify(n.output))} does not equal expected ${truncate(JSON.stringify(t))}`)}}}function outputMatches(e){return{name:`outputMatches`,async evaluate(t){let n=await e[`~standard`].validate(t.output);return!(`issues`in n)||n.issues===void 0?PASS:fail(`output failed schema validation: ${n.issues.map(e=>e.message).join(`; `)}`)}}}function joinCompletedMessages(e){let t=[];for(let n of e)n.type===`message.completed`&&n.data.message!==null&&t.push(n.data.message);return t.join(`
2
+ `)}function failureDetail(e,t){return t===void 0?e:`${e} (code: ${t})`}function truncate(e,t=200){return e===void 0?`undefined`:e.length<=t?e:`${e.slice(0,t)}…`}export{calledSubagent,calledTool,completed,didNotFail,event,maxToolCalls,messageIncludes,noFailedActions,notCalledTool,outputEquals,outputMatches,toolOrder,usedNoTools,waiting};
@@ -0,0 +1,2 @@
1
+ import"../node_modules/.pnpm/autoevals@0.0.132_ws@8.21.0/node_modules/autoevals/jsdist/index.js";import{generateText,jsonSchema}from"ai";function createAutoevalsClient(e){return{chat:{completions:{create:t=>createChatCompletion(t,e)}}}}async function createChatCompletion(t,n){let r=convertTools(t.tools),i=await generateText({model:n.languageModel,messages:convertMessages(t.messages??[]),tools:Object.keys(r).length>0?r:void 0,toolChoice:convertToolChoice(t.tool_choice),providerOptions:n.providerOptions}),a=i.toolCalls.map(e=>({id:e.toolCallId,type:`function`,function:{name:e.toolName,arguments:JSON.stringify(e.input??{})}}));return{choices:[{index:0,finish_reason:a.length>0?`tool_calls`:`stop`,message:{role:`assistant`,content:i.text||null,tool_calls:a.length>0?a:void 0}}]}}function convertMessages(e){return e.map(e=>{let t=contentToText(e.content);switch(e.role){case`assistant`:return{role:`assistant`,content:t};case`developer`:case`system`:return{role:`system`,content:t};default:return{role:`user`,content:t}}})}function contentToText(e){return e==null?``:typeof e==`string`?e:e.map(e=>e.text??``).filter(Boolean).join(`
2
+ `)}function convertTools(e){let n={};for(let r of e??[])r.type!==`function`||r.function?.name===void 0||(n[r.function.name]={description:r.function.description,inputSchema:jsonSchema(r.function.parameters??{})});return n}function convertToolChoice(e){if(e!==void 0)return typeof e==`string`?e:{type:`tool`,toolName:e.function.name}}export{createAutoevalsClient};
@@ -0,0 +1,22 @@
1
+ import { Client } from "#client/client.js";
2
+ import type { ClientOptions } from "#client/types.js";
3
+ import type { EveEvalTargetHandle } from "#evals/types.js";
4
+ /**
5
+ * Resolves the {@link ClientOptions} for an eval target.
6
+ *
7
+ * Local targets need no auth. Remote targets connect with the same options
8
+ * as every other development client (`resolveDevelopmentClientOptions`):
9
+ * per-request headers carrying the Vercel OIDC trusted-IDP token (which
10
+ * bypasses Deployment Protection without a per-project secret) plus
11
+ * `x-vercel-protection-bypass` when `VERCEL_AUTOMATION_BYPASS_SECRET` is
12
+ * set, and a bearer resolved from the same OIDC cascade.
13
+ *
14
+ * `EVE_EVAL_AUTH_TOKEN` overrides the bearer with a static token for
15
+ * targets whose auth is not OIDC-based.
16
+ */
17
+ export declare function resolveEvalClientOptions(target: Pick<EveEvalTargetHandle, "kind" | "url">): ClientOptions;
18
+ /**
19
+ * Creates the Eve {@link Client} for an eval target from
20
+ * {@link resolveEvalClientOptions}.
21
+ */
22
+ export declare function createEvalClient(target: Pick<EveEvalTargetHandle, "kind" | "url">): Client;
@@ -0,0 +1 @@
1
+ import{resolveDevelopmentClientOptions}from"#services/dev-client/client-options.js";import{Client}from"#client/client.js";function resolveEvalClientOptions(t){if(t.kind===`local`)return{host:t.url};let n=resolveDevelopmentClientOptions(t.url),r=process.env.EVE_EVAL_AUTH_TOKEN?.trim();return r?{...n,auth:{bearer:r}}:n}function createEvalClient(e){return new Client(resolveEvalClientOptions(e))}export{createEvalClient,resolveEvalClientOptions};
@@ -3,11 +3,14 @@ interface EvalCliOptions {
3
3
  timeout?: string;
4
4
  maxConcurrency?: string;
5
5
  json?: boolean;
6
+ junit?: string;
6
7
  skipReport?: boolean;
7
8
  strict?: boolean;
8
9
  list?: boolean;
10
+ mockModels?: boolean;
11
+ noSkips?: boolean;
9
12
  tag?: string[];
10
- case?: string[];
13
+ verbose?: boolean;
11
14
  }
12
15
  type EvalCliLogger = {
13
16
  log(message: string): void;
@@ -16,9 +19,9 @@ type EvalCliLogger = {
16
19
  /**
17
20
  * Runs the `eve eval` command with already-parsed Commander options.
18
21
  *
19
- * Exit codes: `0` when every executed case passed its checks (and score
20
- * thresholds under `--strict`), `1` when any case failed, `2` for runner or
21
- * configuration errors (no suites discovered, no cases matching filters).
22
+ * Exit codes: `0` when every executed eval passed its gate assertions (and
23
+ * soft thresholds under `--strict`), `1` when any eval failed, `2` for runner
24
+ * or configuration errors (no evals discovered, no evals matching filters).
22
25
  */
23
- export declare function runEvalCommand(suiteIds: readonly string[], options: EvalCliOptions, logger: EvalCliLogger): Promise<void>;
26
+ export declare function runEvalCommand(evalIds: readonly string[], options: EvalCliOptions, logger: EvalCliLogger): Promise<void>;
24
27
  export {};
@@ -1 +1 @@
1
- import{toErrorMessage}from"#shared/errors.js";import{VERCEL_PROTECTION_BYPASS_HEADER}from"#services/dev-client/request-headers.js";import{resolveApplicationRoot}from"#internal/application/paths.js";import{Client}from"#client/client.js";import{getVercelOidcToken}from"#compiled/@vercel/oidc/index.js";import{loadDevelopmentEnvironmentFiles}from"#cli/dev/environment.js";import{startDevelopmentServer}from"#internal/nitro/host.js";import{discoverAndImportSuites}from"#evals/runner/discover.js";import{executeSuite}from"#evals/runner/execute-suite.js";import{ConsoleReporter}from"#evals/runner/reporters/console.js";async function runEvalCommand(e,t,r){let i=resolveApplicationRoot();loadDevelopmentEnvironmentFiles(i);let l=e.length>0?e:void 0,u=await discoverAndImportSuites(i,l);if(u.length===0){l?r.error(`No suites found matching: ${l.join(`, `)}`):r.error(`No eval suites found. Create suite files under evals/ with the *.eval.ts extension.`),process.exitCode=2;return}let d=await loadAndFilterSuites(u,t,r);if(d===void 0){process.exitCode=2;return}if(t.list===!0){printSuiteList(d,t.json===!0,r);return}let f,p;t.url?p={kind:`remote`,url:t.url}:(f=await startDevelopmentServer(i,{host:`127.0.0.1`,port:0}),p={kind:`local`,url:f.url});let m=createEvalClient(p);try{let e=[];for(let n of d){let r=applyCliOverrides(n,t),a=buildReporters(r,{json:t.json===!0,skipReport:t.skipReport===!0}),o=await executeSuite({suite:r,target:p,reporters:a,appRoot:i,client:m});e.push(o)}t.json&&r.log(JSON.stringify(e,null,2));let n=e.some(e=>e.failed>0),a=t.strict===!0&&e.some(e=>e.scored>0);(n||a)&&(process.exitCode=1)}finally{f&&await f.close()}process.exit(process.exitCode??0)}async function loadAndFilterSuites(t,n,r){let i=n.tag??[],a=n.case??[],o=i.length>0||a.length>0,s=[];for(let n of t){let t;try{t=await n.load()}catch(t){r.error(`Failed to load cases for suite "${n.id}": ${toErrorMessage(t)}`);return}let c=t.filter(e=>!(a.length>0&&!a.includes(e.id)||i.length>0&&!carriesTag(n,e,i)));o&&c.length===0||s.push({suite:n,cases:c})}if(s.length===0){r.error(`No cases matched the provided filters (${describeFilters(i,a)}).`);return}return s}function carriesTag(e,t,n){let r=e.tags?.some(e=>n.includes(e))??!1,i=t.tags?.some(e=>n.includes(e))??!1;return r||i}function describeFilters(e,t){let n=[];return e.length>0&&n.push(`tags: ${e.join(`, `)}`),t.length>0&&n.push(`cases: ${t.join(`, `)}`),n.join(`; `)}function printSuiteList(e,t,n){if(t){let t=e.map(({suite:e,cases:t})=>({id:e.id,description:e.description,tags:e.tags,cases:t.map(e=>({id:e.id,tags:e.tags}))}));n.log(JSON.stringify(t,null,2));return}for(let{suite:t,cases:r}of e){let e=t.description===void 0?``:` — ${t.description}`;n.log(`${t.id}${e} (${r.length} case${r.length===1?``:`s`})`);for(let e of r){let t=e.tags!==void 0&&e.tags.length>0?` [${e.tags.join(`, `)}]`:``;n.log(` ${e.id}${t}`)}}}function createEvalClient(e){if(e.kind===`local`)return new Client({host:e.url});let n={},i=process.env.VERCEL_AUTOMATION_BYPASS_SECRET?.trim();return i&&(n[VERCEL_PROTECTION_BYPASS_HEADER]=i),new Client({auth:resolveRemoteBearerAuth(),headers:Object.keys(n).length>0?n:void 0,host:e.url})}function resolveRemoteBearerAuth(){let e=process.env.EVE_EVAL_AUTH_TOKEN?.trim();return e?{bearer:e}:{bearer:resolveOidcBearerToken}}async function resolveOidcBearerToken(){try{let e=(await getVercelOidcToken()).trim();if(e.length>0)return e}catch{}return process.env.VERCEL_OIDC_TOKEN?.trim()??``}function applyCliOverrides(e,t){let n=t.maxConcurrency?Number.parseInt(t.maxConcurrency,10):void 0,r=t.timeout?Number.parseInt(t.timeout,10):void 0,i={...e.suite,load:()=>Promise.resolve([...e.cases])};return n!==void 0&&(i.maxConcurrency=n),r!==void 0&&(i.timeoutMs=r),i}function buildReporters(e,t){let n=t.json?[]:[new ConsoleReporter];return!t.skipReport&&e.reporters&&n.push(...e.reporters),n}export{runEvalCommand};
1
+ import{basename,join}from"node:path";import{readFile}from"node:fs/promises";import{resolveApplicationRoot}from"#internal/application/paths.js";import{loadDevelopmentEnvironmentFiles}from"#cli/dev/environment.js";import{startDevelopmentServer}from"#internal/nitro/host.js";import{EVE_MOCK_AUTHORED_MODELS_ENV}from"#runtime/agent/resolve-model.js";import{createEvalClient}from"#evals/cli/eval-client.js";import{discoverAndImportEvals,discoverEvalConfig}from"#evals/runner/discover.js";import{runEvals}from"#evals/runner/run-evals.js";import{ConsoleReporter}from"#evals/runner/reporters/console.js";import{JUnit}from"#evals/runner/reporters/junit.js";import{resolveEvalTargetHandle}from"#evals/target.js";async function runEvalCommand(e,t,n){let s=resolveApplicationRoot();if(t.url&&t.mockModels===!0){n.error(`--mock-models cannot be used with --url. The target's mock-model state is discovered from /eve/v1/info.`),process.exitCode=2;return}let c=t.mockModels===!0?setEnvForRun(EVE_MOCK_AUTHORED_MODELS_ENV,`1`):void 0,l;try{loadDevelopmentEnvironmentFiles(s);let r=e.length>0?e:void 0,a=await discoverAndImportEvals(s,r);if(a.length===0){r?n.error(`No evals found matching: ${r.join(`, `)}`):n.error(`No evals found. Create files under evals/ with the *.eval.ts extension.`),process.exitCode=2;return}let c=filterEvalsByTag(a,t.tag??[]);if(c.length===0){n.error(`No evals matched the provided tags (${(t.tag??[]).join(`, `)}).`),process.exitCode=2;return}let u,d;try{u=parsePositiveInteger(t.maxConcurrency,`--max-concurrency`),d=parseNonNegativeInteger(t.timeout,`--timeout`)}catch(e){n.error(e instanceof Error?e.message:String(e)),process.exitCode=2;return}if(t.list===!0){printEvalList(c,t.json===!0,n);return}let f;try{f=await discoverEvalConfig(s)}catch(e){n.error(e instanceof Error?e.message:String(e)),process.exitCode=2;return}let p,m;try{t.url?m=await resolveEvalTargetHandle({client:createEvalClient({kind:`remote`,url:t.url}),expectedAgentName:await readExpectedAgentName(s),kind:`remote`,url:t.url}):(p=await startDevelopmentServer(s,{host:`127.0.0.1`,port:0}),m=await resolveEvalTargetHandle({client:createEvalClient({kind:`local`,url:p.url}),expectedAgentName:await readExpectedAgentName(s),kind:`local`,url:p.url}));let e=createEvalClient(m),r=t.json===!0?[]:[new ConsoleReporter];t.junit!==void 0&&r.push(JUnit({filePath:t.junit}));let i=await runEvals({evaluations:c,config:f,target:m,client:e,appRoot:s,reporters:r,includeEvalReporters:t.skipReport!==!0,failOnSkip:t.noSkips===!0,maxConcurrency:u,timeoutMs:d,onEvalLog:t.verbose===!0?(e,t)=>n.log(`[${e}] ${t}`):void 0});t.json&&n.log(JSON.stringify(i,null,2));let a=i.failed>0,o=t.strict===!0&&i.scored>0;(a||o)&&(process.exitCode=1)}finally{p&&await p.close()}l=typeof process.exitCode==`number`?process.exitCode:0}finally{c?.()}l!==void 0&&process.exit(l)}function parsePositiveInteger(e,t){if(e===void 0)return;let n=Number(e);if(!Number.isInteger(n)||n<1)throw Error(`${t} must be a positive integer; got "${e}".`);return n}function parseNonNegativeInteger(e,t){if(e===void 0)return;let n=Number(e);if(!Number.isInteger(n)||n<0)throw Error(`${t} must be a non-negative integer; got "${e}".`);return n}function filterEvalsByTag(e,t){return t.length===0?[...e]:e.filter(e=>e.tags?.some(e=>t.includes(e))??!1)}function printEvalList(e,t,n){if(t){let t=e.map(e=>({id:e.id,description:e.description,tags:e.tags}));n.log(JSON.stringify(t,null,2));return}for(let t of e){let e=t.description===void 0?``:` — ${t.description}`,r=t.tags!==void 0&&t.tags.length>0?` [${t.tags.join(`, `)}]`:``;n.log(`${t.id}${r}${e}`)}}function setEnvForRun(e,t){let n=process.env[e];return process.env[e]=t,()=>{n===void 0?delete process.env[e]:process.env[e]=n}}async function readExpectedAgentName(r){try{let i=JSON.parse(await readFile(join(r,`package.json`),`utf8`));return typeof i.name==`string`&&i.name.length>0?i.name:basename(r)}catch{return basename(r)}}export{runEvalCommand};
@@ -0,0 +1,19 @@
1
+ import { EvalSessionManager } from "#evals/session.js";
2
+ import { AssertionCollector } from "#evals/assertions/collector.js";
3
+ import type { EveEvalContext, EveEvalJudgeConfig, EveEvalTargetHandle } from "#evals/types.js";
4
+ /**
5
+ * Builds the `EveEvalContext` (`t`) for one eval run, wiring the session
6
+ * manager (driving), the assertion collector (recording), and the judge
7
+ * namespace. Returns the collector so the runner can {@link
8
+ * AssertionCollector.finalize} it against the completed task result.
9
+ */
10
+ export declare function createEvalContext(deps: {
11
+ readonly manager: EvalSessionManager;
12
+ readonly target: EveEvalTargetHandle;
13
+ readonly signal: AbortSignal;
14
+ readonly judge: EveEvalJudgeConfig | undefined;
15
+ readonly log: (message: string) => void;
16
+ }): {
17
+ readonly context: EveEvalContext;
18
+ readonly collector: AssertionCollector;
19
+ };
@@ -0,0 +1 @@
1
+ import"#evals/session.js";import{AssertionCollector}from"#evals/assertions/collector.js";import*as RunAssertions from"#evals/assertions/run.js";import{buildJudgeContext}from"#evals/judge.js";function createEvalContext(r){let i=new AssertionCollector,a=``,primary=()=>r.manager.primary,replyMessage=()=>r.manager.lastTurnSession()?.lastTurn?.message??null,o=buildJudgeContext({collector:i,getReply:replyMessage,getInput:()=>a,judge:r.judge});return{context:{get events(){return primary().events},get pendingInputRequests(){return primary().pendingInputRequests},get state(){return primary().state},get sessionId(){return primary().sessionId},expectInputRequests:e=>primary().expectInputRequests(e),respond:(...e)=>primary().respond(...e),respondAll:e=>primary().respondAll(e),send:e=>(a=promptText(e),primary().send(e)),sendFile:(e,t,n)=>(a=e,primary().sendFile(e,t,n)),signal:r.signal,target:r.target,get reply(){return replyMessage()},log:r.log,newSession:()=>r.manager.newSession(),completed:()=>i.recordRun(RunAssertions.completed()),didNotFail:()=>i.recordRun(RunAssertions.didNotFail()),waiting:()=>i.recordRun(RunAssertions.waiting()),messageIncludes:e=>i.recordRun(RunAssertions.messageIncludes(e)),calledTool:(e,n)=>i.recordRun(RunAssertions.calledTool(e,n)),notCalledTool:e=>i.recordRun(RunAssertions.notCalledTool(e)),toolOrder:e=>i.recordRun(RunAssertions.toolOrder(e)),usedNoTools:()=>i.recordRun(RunAssertions.usedNoTools()),maxToolCalls:e=>i.recordRun(RunAssertions.maxToolCalls(e)),calledSubagent:(e,n)=>i.recordRun(RunAssertions.calledSubagent(e,n)),noFailedActions:()=>i.recordRun(RunAssertions.noFailedActions()),event:(e,n)=>i.recordRun(RunAssertions.event(e,n)),outputEquals:e=>i.recordRun(RunAssertions.outputEquals(e)),outputMatches:e=>i.recordRun(RunAssertions.outputMatches(e)),check:(e,t)=>recordCheck(i,e,t),judge:o},collector:i}}function recordCheck(e,t,n){return e.recordValue({name:n.name,severity:n.severity,threshold:n.threshold,score:async()=>({score:await n.score(t)})})}function promptText(e){if(typeof e==`string`)return e;let t=e.message;return typeof t==`string`?t:``}export{createEvalContext};
@@ -0,0 +1,16 @@
1
+ import type { EveEvalConfig, EveEvalConfigInput } from "#evals/types.js";
2
+ /**
3
+ * Defines the run-wide configuration shared by every eval, authored as the
4
+ * default export of `evals.config.ts` at the root of the `evals/` directory.
5
+ *
6
+ * Exactly one `evals.config.ts` is required. It supplies the optional default
7
+ * `judge` model for `t.judge.*` assertions (so individual evals need not
8
+ * repeat it), optional run-level `reporters`, a default `maxConcurrency`, and a
9
+ * default `timeoutMs`. CLI flags (`--max-concurrency`, `--timeout`) and
10
+ * per-eval values take precedence over the config defaults.
11
+ *
12
+ * Throws on invalid input: a `judge` without a `model`, a non-positive or
13
+ * non-integer `maxConcurrency`, a negative or non-finite `timeoutMs`, or a
14
+ * non-array `reporters`.
15
+ */
16
+ export declare function defineEvalConfig(input: EveEvalConfigInput): EveEvalConfig;
@@ -0,0 +1 @@
1
+ function defineEvalConfig(e){return validateEvalConfigInput(e),{...e,_tag:`EveEvalConfig`}}function validateEvalConfigInput(e){if(e.judge!==void 0&&(e.judge.model===void 0||e.judge.model===null))throw Error("Eval config `judge` requires a `model`. It is the default judge model for `t.judge.*` assertions across every eval.");if(e.maxConcurrency!==void 0&&(!Number.isInteger(e.maxConcurrency)||e.maxConcurrency<1))throw Error("Eval config `maxConcurrency` must be a positive integer.");if(e.timeoutMs!==void 0&&(e.timeoutMs<0||!Number.isFinite(e.timeoutMs)))throw Error("Eval config `timeoutMs` must be a non-negative finite number.");if(e.reporters!==void 0&&!Array.isArray(e.reporters))throw Error("Eval config `reporters` must be an array of reporters.")}export{defineEvalConfig};
@@ -0,0 +1,20 @@
1
+ import type { EveEvalDefinition, EveEvalInput } from "#evals/types.js";
2
+ /**
3
+ * Defines one Eve eval. Each eval file is exactly one case: an imperative
4
+ * `test(t)` function that drives the agent (`t.send`, `t.respond`, …) and
5
+ * asserts on what it produced (`t.completed()`, `t.check(...)`,
6
+ * `t.judge.autoevals.*`). Organize related evals with directory nesting under
7
+ * `evals/`, or default-export an array of evals to fan one file out over a
8
+ * dataset.
9
+ *
10
+ * A `judge` is optional: `t.judge.*` assertions fall back to the `judge`
11
+ * declared in `evals.config.ts` unless this eval overrides it. The judge model
12
+ * is used solely for scoring, never for the agent under test. Eval identity is
13
+ * derived from the `evals/<path>.eval.ts` file path by the discovery layer, so
14
+ * authoring `id` or `name` throws.
15
+ *
16
+ * Throws on invalid input: a missing `test` function, a removed legacy key
17
+ * (`input`/`run`/`checks`/`scores`/`expected`/`thresholds`/`parseOutput`/
18
+ * `model`), or a negative or non-finite `timeoutMs`.
19
+ */
20
+ export declare function defineEval(input: EveEvalInput): EveEvalDefinition;
@@ -0,0 +1 @@
1
+ function defineEval(e){return validateEvalInput(e),{...e,_tag:`EveEval`}}function validateEvalInput(e){if(`id`in e)throw Error("Eval must not specify `id`. Eval identity is derived from the file path under evals/.");if(`name`in e)throw Error("Eval must not specify `name`. Eval identity is derived from the file path under evals/.");if(rejectLegacyKey(e,`input`,"Send the prompt inside `test`: `async test(t) { await t.send(...) }`."),rejectLegacyKey(e,`run`,"Rename `run` to `test`; it receives the same context `t`."),rejectLegacyKey(e,`checks`,"Assert inline inside `test` (e.g. `t.completed()`, `t.calledTool(...)`)."),rejectLegacyKey(e,`scores`,"Use soft assertions inside `test`: `t.check(...).atLeast(n)` or `t.judge.autoevals.*`."),rejectLegacyKey(e,`expected`,"Pass the reference value to the assertion (e.g. `t.check(t.reply, includes(value))`)."),rejectLegacyKey(e,`thresholds`,"Put the threshold on the assertion: `.atLeast(n)`."),rejectLegacyKey(e,`parseOutput`,"Read the value you want inside `test` and assert on it directly."),rejectLegacyKey(e,`model`,"Rename `model` to `judge: { model }`."),rejectLegacyKey(e,`modelOptions`,"Move it under `judge: { model, modelOptions }`."),rejectLegacyKey(e,`cases`,"Each eval file is one case; default-export an array of `defineEval(...)` for datasets."),typeof e.test!=`function`)throw Error("Eval requires a `test(t)` function.");if(validateRequirements(e.requires,"Eval `requires`"),e.timeoutMs!==void 0&&(e.timeoutMs<0||!Number.isFinite(e.timeoutMs)))throw Error("Eval `timeoutMs` must be a non-negative finite number.")}function rejectLegacyKey(e,t,n){if(t in e)throw Error(`Eval \`${t}\` is no longer supported. ${n}`)}function validateRequirements(e,t){if(e!==void 0){if(!Array.isArray(e))throw Error(`${t} must be an array of requirement strings.`);for(let n of e)if(!(n===`mockModels`||n===`devRoutes`)&&!(n.startsWith(`env:`)&&n.length>4))throw Error(`${t} contains unsupported requirement "${n}". Expected "mockModels", "devRoutes", or "env:<NAME>".`)}}export{defineEval};
@@ -0,0 +1,25 @@
1
+ import type { StandardSchemaV1 } from "#compiled/@standard-schema/spec/index.js";
2
+ import type { Assertion } from "#evals/types.js";
3
+ export type { Assertion, AssertionHandle, AssertionSeverity } from "#evals/types.js";
4
+ /**
5
+ * Passes when the value (coerced to a string) contains `substring`. A hard
6
+ * gate by default. Apply with `t.check(value, includes("..."))`.
7
+ */
8
+ export declare function includes(substring: string): Assertion;
9
+ /**
10
+ * Passes when the value deep-equals `expected` (exact structural equality).
11
+ * A hard gate by default.
12
+ */
13
+ export declare function equals(expected: unknown): Assertion;
14
+ /**
15
+ * Passes when the value validates against a Standard Schema (e.g. a Zod
16
+ * schema). A hard gate by default.
17
+ */
18
+ export declare function matches(schema: StandardSchemaV1): Assertion;
19
+ /**
20
+ * Scores normalized character-level Levenshtein similarity between the value
21
+ * and `expected` (1 = identical, 0 = entirely different). Soft by default —
22
+ * tracked unless you set a bar with `.atLeast(...)`. Use it for fuzzy
23
+ * comparison when exact match is too strict but a judge model is overkill.
24
+ */
25
+ export declare function similarity(expected: string): Assertion;
@@ -0,0 +1 @@
1
+ import{Levenshtein}from"../../node_modules/.pnpm/autoevals@0.0.132_ws@8.21.0/node_modules/autoevals/jsdist/index.js";import{deepEquals}from"#evals/match.js";function makeAssertion(e){return{name:e.name,severity:e.severity,threshold:e.threshold,score:e.score,gate(t){return makeAssertion({...e,severity:`gate`,threshold:t})},soft(t){return makeAssertion({...e,severity:`soft`,threshold:t})},atLeast(t){return makeAssertion({...e,severity:`soft`,threshold:t})}}}function includes(e){return makeAssertion({name:`includes(${e})`,severity:`gate`,score:t=>+!!String(t??``).includes(e)})}function equals(e){return makeAssertion({name:`equals`,severity:`gate`,score:n=>+!!deepEquals(n,e)})}function matches(e){return makeAssertion({name:`matches`,severity:`gate`,score:async t=>{let n=await e[`~standard`].validate(t);return+(!(`issues`in n)||n.issues===void 0)}})}function similarity(t){return makeAssertion({name:`similarity`,severity:`soft`,score:async n=>(await Levenshtein({output:String(n??``),expected:t})).score??0})}export{equals,includes,matches,similarity};
@@ -1,4 +1,8 @@
1
- export { defineEvalSuite } from "#evals/define-eval-suite.js";
1
+ export { defineEval } from "#evals/define-eval.js";
2
+ export { defineEvalConfig } from "#evals/define-eval-config.js";
3
+ export { EveEvalTurnFailedError } from "#evals/session.js";
4
+ export { EveEvalRequirementError } from "#evals/target.js";
2
5
  export type { RuntimeIdentity } from "#protocol/message.js";
3
6
  export type { InputRequest } from "#runtime/input/types.js";
4
- export type { EveEvalCase, EveEvalCaseResult, EveEvalCaseVerdict, EveEvalCheck, EveEvalCheckArgs, EveEvalCheckResult, EveEvalDerivedFacts, EveEvalScorer, EveEvalScorerArgs, EveEvalScorerResult, EveEvalSubagentCall, EveEvalSuite, EveEvalSuiteDefinition, EveEvalSuiteInput, EveEvalSuiteResult, EveEvalTarget, EveEvalTask, EveEvalTaskResult, EveEvalToolCall, } from "#evals/types.js";
7
+ export type { EveEvalValueMatcher, EveEvalToolCallMatchOptions, EveEvalSubagentCallMatchOptions, } from "#evals/match.js";
8
+ export type { Assertion, AssertionHandle, AssertionResult, AssertionSeverity, AutoevalsJudges, EveEvalContext, EveEvalDerivedFacts, EveEvalJudgeConfig, EveEvalRunSummary, EveEvalSession, EveEvalSessionResult, EveEvalRequirement, EveEvalScheduleDispatchResult, EveEvalSubagentCall, EveEval, EveEvalConfig, EveEvalConfigInput, EveEvalDefinition, EveEvalInput, EveEvalResult, EveEvalTarget, EveEvalTargetCapabilities, EveEvalTargetHandle, EveEvalTaskResult, EveEvalToolCall, EveEvalTurn, EveEvalVerdict, JudgeContext, JudgeOpts, } from "#evals/types.js";
@@ -1 +1 @@
1
- import{defineEvalSuite}from"#evals/define-eval-suite.js";export{defineEvalSuite};
1
+ import{EveEvalRequirementError}from"#evals/target.js";import{EveEvalTurnFailedError}from"#evals/session.js";import{defineEval}from"#evals/define-eval.js";import{defineEvalConfig}from"#evals/define-eval-config.js";export{EveEvalRequirementError,EveEvalTurnFailedError,defineEval,defineEvalConfig};
@@ -0,0 +1,20 @@
1
+ import type { AssertionCollector } from "#evals/assertions/collector.js";
2
+ import type { EveEvalJudgeConfig, JudgeContext } from "#evals/types.js";
3
+ /**
4
+ * Dependencies the judge namespace closes over: where to record assertions,
5
+ * how to resolve the default graded value (`t.reply`) and the original prompt
6
+ * (the autoevals `input`), and the eval/config judge model.
7
+ */
8
+ export interface JudgeDeps {
9
+ readonly collector: AssertionCollector;
10
+ readonly getReply: () => unknown;
11
+ readonly getInput: () => string;
12
+ readonly judge: EveEvalJudgeConfig | undefined;
13
+ }
14
+ /**
15
+ * Builds the `t.judge` namespace bound to the resolved judge model. Each
16
+ * grader records a soft assertion (override with `.atLeast`/`.gate`) that
17
+ * fires the model call immediately; the collector awaits it before the
18
+ * verdict.
19
+ */
20
+ export declare function buildJudgeContext(deps: JudgeDeps): JudgeContext;
@@ -0,0 +1 @@
1
+ import{ClosedQA,Factuality,Sql,Summary}from"../node_modules/.pnpm/autoevals@0.0.132_ws@8.21.0/node_modules/autoevals/jsdist/index.js";import{formatLanguageModelGatewayId}from"#internal/runtime-model.js";import{createAutoevalsClient}from"#evals/autoevals-client.js";function buildJudgeContext(a){function grade(e,t,n){let r=t?.on??a.getReply(),o=String(r??``),s=a.getInput(),c=t?.model??a.judge?.model,l=t?.modelOptions??a.judge?.modelOptions;return a.collector.recordValue({name:e,severity:`soft`,score:async()=>{if(c===void 0)throw Error(`${e} needs a judge model. Set \`judge\` on the eval or in evals.config.ts, or pass { model } to the call.`);return{score:(await n({input:s,output:o,model:c,modelOptions:l})).score??0,metadata:{judge:formatLanguageModelGatewayId(c)}}}})}return{autoevals:{factuality:(e,n)=>grade(`judge.autoevals.factuality`,n,({input:n,output:r,model:i,modelOptions:a})=>Factuality({input:n,output:r,expected:e,...client(i,a)})),summarizes:(e,t)=>grade(`judge.autoevals.summarizes`,t,({input:t,output:n,model:i,modelOptions:a})=>Summary({input:t,output:n,expected:e,...client(i,a)})),closedQA:(t,n)=>grade(`judge.autoevals.closedQA`,n,({input:n,output:r,model:i,modelOptions:a})=>ClosedQA({input:n,output:r,criteria:t,...client(i,a)})),sql:(e,t)=>grade(`judge.autoevals.sql`,t,({input:t,output:r,model:i,modelOptions:a})=>Sql({input:t,output:r,expected:e,...client(i,a)}))}}}function client(e,t){return{model:formatLanguageModelGatewayId(e),client:createAutoevalsClient({languageModel:e,providerOptions:t?.providerOptions})}}export{buildJudgeContext};
@@ -1,7 +1,7 @@
1
- import type { EveEvalCheckArgs, EveEvalSubagentCall, EveEvalToolCall } from "#evals/types.js";
1
+ import type { EveEvalSubagentCall, EveEvalToolCall } from "#evals/types.js";
2
2
  /**
3
- * One matcher accepted by the built-in checks' options (`Checks.toolCalled`,
4
- * `Checks.subagentCalled`, `Run.usedTool`):
3
+ * One matcher accepted by the assertion options (`t.calledTool`,
4
+ * `t.calledSubagent`):
5
5
  *
6
6
  * - a **literal** is compared structurally; objects partial-deep-match (every
7
7
  * key in the matcher must match the observed value, recursively, and nested
@@ -9,17 +9,16 @@ import type { EveEvalCheckArgs, EveEvalSubagentCall, EveEvalToolCall } from "#ev
9
9
  * compare with `Object.is`
10
10
  * - a **RegExp** tests string values directly and the JSON serialization of
11
11
  * anything else
12
- * - a **function** receives the observed value (plus the check args when
13
- * invoked from a check) and either returns a boolean verdict, or returns a
14
- * resolved expected value that is then compared like a literal — e.g.
15
- * `() => process.env.EVE_WEATHER_AGENT_HOST` compares against a
16
- * runner-assigned value. To assert a literal boolean field, use the literal
17
- * directly; boolean returns are always treated as verdicts.
12
+ * - a **function** receives the observed value and either returns a boolean
13
+ * verdict, or returns a resolved expected value that is then compared like a
14
+ * literal e.g. `(o) => o === process.env.EVE_WEATHER_AGENT_HOST`. To assert
15
+ * a literal boolean field, use the literal directly; boolean returns are
16
+ * always treated as verdicts.
18
17
  */
19
- export type EveEvalValueMatcher<T = unknown> = T | RegExp | ((value: T, args?: EveEvalCheckArgs) => unknown);
18
+ export type EveEvalValueMatcher<T = unknown> = T | RegExp | ((value: T) => unknown);
20
19
  /**
21
- * Constraints applied to tool calls by `Checks.toolCalled` and
22
- * `Run.usedTool`. All provided constraints must hold for a call to match.
20
+ * Constraints applied to tool calls by `t.calledTool`. All provided
21
+ * constraints must hold for a call to match.
23
22
  */
24
23
  export interface EveEvalToolCallMatchOptions {
25
24
  /** Partial-deep matcher over the call input (see {@link EveEvalValueMatcher}). */
@@ -32,7 +31,7 @@ export interface EveEvalToolCallMatchOptions {
32
31
  readonly times?: number;
33
32
  }
34
33
  /**
35
- * Constraints applied to subagent calls by `Checks.subagentCalled`.
34
+ * Constraints applied to subagent calls by `t.calledSubagent`.
36
35
  */
37
36
  export interface EveEvalSubagentCallMatchOptions {
38
37
  /** Matcher over the `subagent.called` remote URL. */
@@ -44,25 +43,25 @@ export interface EveEvalSubagentCallMatchOptions {
44
43
  * Returns true when the observed value satisfies a matcher (literal, RegExp,
45
44
  * or function — see {@link EveEvalValueMatcher}).
46
45
  */
47
- export declare function matchesValue(matcher: unknown, value: unknown, args?: EveEvalCheckArgs): boolean;
46
+ export declare function matchesValue(matcher: unknown, value: unknown): boolean;
48
47
  /**
49
48
  * Returns true when one derived tool call satisfies the `input`/`output`/
50
49
  * `isError` constraints (the `times` count is the caller's concern).
51
50
  */
52
- export declare function toolCallMatches(call: EveEvalToolCall, options: EveEvalToolCallMatchOptions, args?: EveEvalCheckArgs): boolean;
51
+ export declare function toolCallMatches(call: EveEvalToolCall, options: EveEvalToolCallMatchOptions): boolean;
53
52
  /**
54
53
  * Returns true when one derived subagent call satisfies the `remoteUrl`/
55
54
  * `output` constraints.
56
55
  */
57
- export declare function subagentCallMatches(call: EveEvalSubagentCall, options: EveEvalSubagentCallMatchOptions, args?: EveEvalCheckArgs): boolean;
56
+ export declare function subagentCallMatches(call: EveEvalSubagentCall, options: EveEvalSubagentCallMatchOptions): boolean;
58
57
  /**
59
- * Strict structural equality used by `Checks.outputEquals`: unlike matcher
58
+ * Strict structural equality used by `t.outputEquals`: unlike matcher
60
59
  * comparison, objects must carry exactly the same keys on both sides.
61
60
  */
62
61
  export declare function deepEquals(a: unknown, b: unknown): boolean;
63
62
  /**
64
63
  * Tests a RegExp without carrying `lastIndex` state between calls. Matcher
65
- * patterns are reused across tool calls and across every case in a suite, so
64
+ * patterns are reused across tool calls and across every case in an eval, so
66
65
  * a `g`/`y`-flagged pattern would otherwise return order-dependent results.
67
66
  */
68
67
  export declare function testRegExp(pattern: RegExp, text: string): boolean;
@@ -0,0 +1 @@
1
+ function matchesValue(e,t){if(e instanceof RegExp)return testRegExpAgainst(e,t);if(typeof e==`function`){let n=e(t);return typeof n==`boolean`?n:typeof n==`function`?Object.is(n,t):matchesValue(n,t)}return Array.isArray(e)?!Array.isArray(t)||t.length!==e.length?!1:e.every((e,n)=>matchesValue(e,t[n])):isPlainObject(e)?isPlainObject(t)?Object.entries(e).every(([e,n])=>matchesValue(n,t[e])):!1:Object.is(e,t)}function toolCallMatches(e,t){return!(t.input!==void 0&&!matchesValue(t.input,e.input)||t.output!==void 0&&!matchesValue(t.output,e.output)||t.isError!==void 0&&e.isError!==t.isError)}function subagentCallMatches(e,t){return!(t.remoteUrl!==void 0&&!matchesValue(t.remoteUrl,e.remoteUrl)||t.output!==void 0&&!matchesValue(t.output,e.output))}function deepEquals(e,t){if(Object.is(e,t))return!0;if(Array.isArray(e)||Array.isArray(t))return!Array.isArray(e)||!Array.isArray(t)||e.length!==t.length?!1:e.every((e,n)=>deepEquals(e,t[n]));if(isPlainObject(e)&&isPlainObject(t)){let n=Object.keys(e),r=Object.keys(t);return n.length===r.length?n.every(n=>deepEquals(e[n],t[n])):!1}return!1}function testRegExpAgainst(e,t){if(typeof t==`string`)return testRegExp(e,t);if(t===void 0)return!1;let n=JSON.stringify(t);return n!==void 0&&testRegExp(e,n)}function testRegExp(e,t){return(e.global||e.sticky)&&(e.lastIndex=0),e.test(t)}function isPlainObject(e){return typeof e==`object`&&!!e&&!Array.isArray(e)}export{deepEquals,matchesValue,subagentCallMatches,testRegExp,toolCallMatches};
@@ -1,2 +1,3 @@
1
1
  export { Braintrust, type BraintrustReporterConfig } from "#evals/runner/reporters/braintrust.js";
2
+ export { JUnit, type JUnitReporterConfig } from "#evals/runner/reporters/junit.js";
2
3
  export type { EvalReporter } from "#evals/runner/reporters/types.js";
@@ -1 +1 @@
1
- import{Braintrust}from"#evals/runner/reporters/braintrust.js";export{Braintrust};
1
+ import{JUnit}from"#evals/runner/reporters/junit.js";import{Braintrust}from"#evals/runner/reporters/braintrust.js";export{Braintrust,JUnit};
@@ -0,0 +1,3 @@
1
+ import type { EveEvalRequirement, EveEvalTarget } from "#evals/types.js";
2
+ export declare function findUnmetRequirements(requirements: readonly EveEvalRequirement[], target: EveEvalTarget, env?: NodeJS.ProcessEnv): readonly EveEvalRequirement[];
3
+ export declare function formatUnmetRequirements(requirements: readonly EveEvalRequirement[]): string;
@@ -0,0 +1 @@
1
+ function findUnmetRequirements(e,t,n=process.env){let r=[];for(let i of new Set(e))requirementMet(i,t,n)||r.push(i);return r}function formatUnmetRequirements(e){return e.length===0?``:`Unmet eval requirement${e.length===1?``:`s`}: ${e.map(formatRequirement).join(`, `)}`}function requirementMet(e,t,n){if(e===`mockModels`)return t.capabilities.mockModels;if(e===`devRoutes`)return t.capabilities.devRoutes;let r=e.slice(4);return r.length>0&&n[r]!==void 0}function formatRequirement(e){return e===`mockModels`?`"mockModels" (local evals can enable this with --mock-models)`:e===`devRoutes`?`"devRoutes"`:`"${e}"`}export{findUnmetRequirements,formatUnmetRequirements};
@@ -1,11 +1,12 @@
1
- import type { EveEvalSuiteResult } from "#evals/types.js";
1
+ import type { EveEvalRunSummary } from "#evals/types.js";
2
2
  /**
3
- * Resolves the artifact output directory for one suite run.
3
+ * Resolves the artifact output directory for one `eve eval` run.
4
4
  *
5
- * Layout: `.eve/evals/<timestamp>-<suite-id>/`
5
+ * Layout: `.eve/evals/<timestamp>/`
6
6
  */
7
- export declare function resolveArtifactDirectory(appRoot: string, suiteId: string): string;
7
+ export declare function resolveArtifactDirectory(appRoot: string): string;
8
8
  /**
9
- * Writes all artifacts for a completed suite run.
9
+ * Writes all artifacts for a completed eval run: a run summary, a JSONL
10
+ * results index, and per-eval detail/event files under `evals/`.
10
11
  */
11
- export declare function writeArtifacts(artifactDir: string, suiteResult: EveEvalSuiteResult): Promise<void>;
12
+ export declare function writeArtifacts(artifactDir: string, summary: EveEvalRunSummary): Promise<void>;
@@ -1,3 +1,3 @@
1
- import{join}from"node:path";import{mkdir,writeFile}from"node:fs/promises";function resolveArtifactDirectory(t,n){return join(t,`.eve`,`evals`,`${new Date().toISOString().replace(/[:.]/g,`-`).slice(0,19)}-${n}`)}async function writeArtifacts(r,i){let a=join(r,`cases`);await mkdir(a,{recursive:!0}),await writeFile(join(r,`summary.json`),JSON.stringify(buildSummaryArtifact(i),null,2));let o=i.cases.map(e=>JSON.stringify(buildCaseResultLine(e))).join(`
2
- `);await writeFile(join(r,`results.jsonl`),`${o}\n`),await Promise.all(i.cases.map(async t=>{let r=sanitizeFileName(t.case.id);await writeFile(join(a,`${r}.json`),JSON.stringify(buildCaseArtifact(t),null,2));let i=t.result.events.map(e=>JSON.stringify(e)).join(`
3
- `);await writeFile(join(a,`${r}.events.ndjson`),`${i}\n`)}))}function buildSummaryArtifact(e){return{suite:e.suite,target:e.target,startedAt:e.startedAt,completedAt:e.completedAt,passed:e.passed,failed:e.failed,scored:e.scored,skipped:e.skipped,errored:e.errored,totalCases:e.cases.length,cases:e.cases.map(e=>({id:e.case.id,verdict:e.verdict,status:e.result.status,checks:e.checks.map(e=>({name:e.name,passed:e.passed})),scores:e.scores.map(e=>({name:e.name,score:e.score})),error:e.error,skipReason:e.skipReason}))}}function buildCaseResultLine(e){return{caseId:e.case.id,verdict:e.verdict,status:e.result.status,output:e.result.output,checks:e.checks,scores:e.scores.map(e=>({name:e.name,score:e.score})),error:e.error,skipReason:e.skipReason}}function buildCaseArtifact(e){return{case:{id:e.case.id,input:e.case.input,expected:e.case.expected,tags:e.case.tags,metadata:e.case.metadata},result:{output:e.result.output,finalMessage:e.result.finalMessage,sessionId:e.result.sessionId,status:e.result.status,derived:e.result.derived},verdict:e.verdict,checks:e.checks,scores:e.scores,error:e.error,skipReason:e.skipReason}}function sanitizeFileName(e){return e.replace(/[^a-zA-Z0-9_-]/g,`_`)}export{resolveArtifactDirectory,writeArtifacts};
1
+ import{dirname,join}from"node:path";import{mkdir,writeFile}from"node:fs/promises";function resolveArtifactDirectory(e){return join(e,`.eve`,`evals`,new Date().toISOString().replace(/[:.]/g,`-`).slice(0,19))}async function writeArtifacts(t,n){let r=join(t,`evals`);await mkdir(r,{recursive:!0}),await writeFile(join(t,`summary.json`),JSON.stringify(buildSummaryArtifact(n),null,2));let i=n.results.map(e=>JSON.stringify(buildResultLine(e))).join(`
2
+ `);await writeFile(join(t,`results.jsonl`),`${i}\n`),await Promise.all(n.results.map(async t=>{let n=join(r,`${sanitizeArtifactPath(t.id)}.json`);await mkdir(dirname(n),{recursive:!0}),await writeFile(n,JSON.stringify(buildEvalArtifact(t),null,2));let i=t.result.events.map(e=>JSON.stringify(e)).join(`
3
+ `);await writeFile(join(r,`${sanitizeArtifactPath(t.id)}.events.ndjson`),`${i}\n`)}))}function buildSummaryArtifact(e){return{target:e.target,startedAt:e.startedAt,completedAt:e.completedAt,passed:e.passed,failed:e.failed,scored:e.scored,skipped:e.skipped,errored:e.errored,totalEvals:e.results.length,evals:e.results.map(e=>({id:e.id,verdict:e.verdict,status:e.result.status,assertions:e.assertions.map(e=>({name:e.name,score:e.score,severity:e.severity,passed:e.passed})),error:e.error,skipReason:e.skipReason}))}}function buildResultLine(e){return{id:e.id,verdict:e.verdict,status:e.result.status,output:e.result.output,assertions:e.assertions,error:e.error,skipReason:e.skipReason}}function buildEvalArtifact(e){return{id:e.id,result:{output:e.result.output,finalMessage:e.result.finalMessage,sessionId:e.result.sessionId,status:e.result.status,logs:e.result.logs,derived:e.result.derived,sessions:e.result.sessions},verdict:e.verdict,assertions:e.assertions,error:e.error,skipReason:e.skipReason}}function sanitizeArtifactPath(e){return e.split(`/`).map(e=>e.replace(/[^a-zA-Z0-9_-]/g,`_`)).join(`/`)}export{resolveArtifactDirectory,writeArtifacts};