eve 0.6.0-beta.9 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (650) hide show
  1. package/CHANGELOG.md +281 -0
  2. package/README.md +9 -6
  3. package/dist/docs/public/README.md +17 -12
  4. package/dist/docs/public/agent-config.md +10 -10
  5. package/dist/docs/public/channels/custom.mdx +4 -4
  6. package/dist/docs/public/channels/discord.mdx +1 -1
  7. package/dist/docs/public/channels/eve.mdx +10 -10
  8. package/dist/docs/public/channels/github.mdx +1 -1
  9. package/dist/docs/public/channels/overview.mdx +21 -15
  10. package/dist/docs/public/channels/slack.mdx +16 -4
  11. package/dist/docs/public/channels/teams.mdx +1 -1
  12. package/dist/docs/public/channels/telegram.mdx +1 -1
  13. package/dist/docs/public/channels/twilio.mdx +1 -1
  14. package/dist/docs/public/{advanced → concepts}/context-control.md +3 -3
  15. package/dist/docs/public/{advanced → concepts}/default-harness.md +5 -5
  16. package/dist/docs/public/{advanced → concepts}/execution-model-and-durability.md +3 -1
  17. package/dist/docs/public/concepts/meta.json +10 -0
  18. package/dist/docs/public/{advanced → concepts}/security-model.md +3 -3
  19. package/dist/docs/public/{advanced → concepts}/sessions-runs-and-streaming.md +7 -7
  20. package/dist/docs/public/connections.mdx +6 -4
  21. package/dist/docs/public/evals/assertions.mdx +108 -0
  22. package/dist/docs/public/evals/cases.mdx +143 -0
  23. package/dist/docs/public/evals/judge.mdx +94 -0
  24. package/dist/docs/public/evals/meta.json +4 -0
  25. package/dist/docs/public/evals/overview.mdx +118 -0
  26. package/dist/docs/public/evals/reporters.mdx +62 -0
  27. package/dist/docs/public/evals/running.mdx +63 -0
  28. package/dist/docs/public/evals/targets.mdx +54 -0
  29. package/dist/docs/public/getting-started.mdx +38 -33
  30. package/dist/docs/public/{advanced → guides}/auth-and-route-protection.md +5 -3
  31. package/dist/docs/public/{client → guides/client}/continuations.mdx +2 -2
  32. package/dist/docs/public/{client → guides/client}/messages.mdx +1 -1
  33. package/dist/docs/public/{client → guides/client}/meta.json +1 -1
  34. package/dist/docs/public/{client → guides/client}/output-schema.mdx +2 -2
  35. package/dist/docs/public/{client → guides/client}/overview.mdx +5 -5
  36. package/dist/docs/public/{client → guides/client}/streaming.mdx +1 -1
  37. package/dist/docs/public/{advanced → guides}/deployment.md +9 -1
  38. package/dist/docs/public/guides/dev-tui.md +50 -0
  39. package/dist/docs/public/{advanced → guides}/dynamic-capabilities.md +1 -1
  40. package/dist/docs/public/{advanced → guides}/dynamic-workflows.md +1 -1
  41. package/dist/docs/public/{frontend → guides/frontend}/nextjs.mdx +16 -7
  42. package/dist/docs/public/{frontend → guides/frontend}/nuxt.mdx +7 -7
  43. package/dist/docs/public/{frontend → guides/frontend}/overview.mdx +6 -6
  44. package/dist/docs/public/{frontend → guides/frontend}/sveltekit.mdx +5 -5
  45. package/dist/docs/public/{frontend → guides/frontend}/use-eve-agent-svelte.mdx +2 -2
  46. package/dist/docs/public/{frontend → guides/frontend}/use-eve-agent-vue.mdx +2 -2
  47. package/dist/docs/public/{advanced → guides}/hooks.md +2 -2
  48. package/dist/docs/public/{advanced → guides}/instrumentation.md +3 -1
  49. package/dist/docs/public/{advanced → guides}/meta.json +8 -12
  50. package/dist/docs/public/{advanced → guides}/session-context.md +3 -3
  51. package/dist/docs/public/{advanced → guides}/state.md +1 -1
  52. package/dist/docs/public/instructions.mdx +2 -2
  53. package/dist/docs/public/introduction.md +5 -2
  54. package/dist/docs/public/meta.json +4 -3
  55. package/dist/docs/public/reference/cli.md +35 -19
  56. package/dist/docs/public/reference/meta.json +1 -1
  57. package/dist/docs/public/reference/project-layout.md +5 -1
  58. package/dist/docs/public/reference/typescript-api.md +27 -23
  59. package/dist/docs/public/sandbox.mdx +1 -1
  60. package/dist/docs/public/schedules.mdx +2 -2
  61. package/dist/docs/public/skills.mdx +3 -3
  62. package/dist/docs/public/subagents.mdx +3 -3
  63. package/dist/docs/public/tools.mdx +4 -8
  64. package/dist/docs/public/tutorial/connect-a-warehouse.mdx +3 -3
  65. package/dist/docs/public/tutorial/first-agent.mdx +6 -3
  66. package/dist/docs/public/tutorial/guard-the-spend.mdx +1 -1
  67. package/dist/docs/public/tutorial/how-it-runs.mdx +2 -2
  68. package/dist/docs/public/tutorial/meta.json +1 -1
  69. package/dist/docs/public/tutorial/query-sample-data.mdx +1 -1
  70. package/dist/docs/public/tutorial/remember-definitions.mdx +3 -3
  71. package/dist/docs/public/tutorial/run-analysis.mdx +1 -1
  72. package/dist/docs/public/tutorial/ship-it.mdx +4 -4
  73. package/dist/docs/public/tutorial/team-playbooks.mdx +3 -3
  74. package/dist/src/chunks/{use-eve-agent-DCZbkLG7.js → use-eve-agent-DErQj5hs.js} +125 -37
  75. package/dist/src/chunks/{use-eve-agent-DoheC4_o.js → use-eve-agent-DoR8C4i6.js} +125 -37
  76. package/dist/src/cli/banner.d.ts +7 -0
  77. package/dist/src/cli/banner.js +1 -0
  78. package/dist/src/cli/commands/channel-add-conflicts.d.ts +1 -1
  79. package/dist/src/cli/commands/channels.d.ts +9 -6
  80. package/dist/src/cli/commands/channels.js +1 -1
  81. package/dist/src/cli/commands/deploy.d.ts +21 -0
  82. package/dist/src/cli/commands/deploy.js +1 -0
  83. package/dist/src/cli/commands/init-git.d.ts +15 -0
  84. package/dist/src/cli/commands/init-git.js +1 -0
  85. package/dist/src/cli/commands/init.d.ts +29 -0
  86. package/dist/src/cli/commands/init.js +1 -0
  87. package/dist/src/cli/commands/link.d.ts +21 -0
  88. package/dist/src/cli/commands/link.js +1 -0
  89. package/dist/src/cli/commands/preconditions.d.ts +7 -0
  90. package/dist/src/cli/commands/preconditions.js +1 -0
  91. package/dist/src/cli/commands/register-project-commands.d.ts +12 -0
  92. package/dist/src/cli/commands/register-project-commands.js +1 -0
  93. package/dist/src/cli/dev/tui/agent-header.d.ts +15 -9
  94. package/dist/src/cli/dev/tui/agent-header.js +1 -1
  95. package/dist/src/cli/dev/tui/blocks.d.ts +1 -1
  96. package/dist/src/cli/dev/tui/blocks.js +3 -2
  97. package/dist/src/cli/dev/tui/command-typeahead.d.ts +47 -0
  98. package/dist/src/cli/dev/tui/command-typeahead.js +1 -0
  99. package/dist/src/cli/dev/tui/dev-rebuild-status.d.ts +21 -0
  100. package/dist/src/cli/dev/tui/dev-rebuild-status.js +1 -0
  101. package/dist/src/cli/dev/tui/errors.d.ts +18 -0
  102. package/dist/src/cli/dev/tui/errors.js +1 -1
  103. package/dist/src/cli/dev/tui/prompt-command-handler.d.ts +14 -0
  104. package/dist/src/cli/dev/tui/prompt-command-handler.js +1 -0
  105. package/dist/src/cli/dev/tui/prompt-commands.d.ts +54 -0
  106. package/dist/src/cli/dev/tui/prompt-commands.js +2 -0
  107. package/dist/src/cli/dev/tui/runner.d.ts +64 -7
  108. package/dist/src/cli/dev/tui/runner.js +1 -1
  109. package/dist/src/cli/dev/tui/setup-commands.d.ts +48 -0
  110. package/dist/src/cli/dev/tui/setup-commands.js +2 -0
  111. package/dist/src/cli/dev/tui/setup-flow.d.ts +35 -0
  112. package/dist/src/cli/dev/tui/setup-issues.d.ts +40 -0
  113. package/dist/src/cli/dev/tui/setup-issues.js +1 -0
  114. package/dist/src/cli/dev/tui/setup-panel.d.ts +103 -0
  115. package/dist/src/cli/dev/tui/setup-panel.js +1 -0
  116. package/dist/src/cli/dev/tui/status-line.d.ts +25 -0
  117. package/dist/src/cli/dev/tui/status-line.js +1 -0
  118. package/dist/src/cli/dev/tui/stream-format.d.ts +16 -1
  119. package/dist/src/cli/dev/tui/stream-format.js +1 -1
  120. package/dist/src/cli/dev/tui/terminal-renderer.d.ts +32 -3
  121. package/dist/src/cli/dev/tui/terminal-renderer.js +5 -2
  122. package/dist/src/cli/dev/tui/test/index.d.ts +3 -1
  123. package/dist/src/cli/dev/tui/test/index.js +1 -1
  124. package/dist/src/cli/dev/tui/test/mock-terminal.d.ts +1 -0
  125. package/dist/src/cli/dev/tui/test/mock-terminal.js +1 -1
  126. package/dist/src/cli/dev/tui/theme.d.ts +10 -0
  127. package/dist/src/cli/dev/tui/theme.js +1 -1
  128. package/dist/src/cli/dev/tui/tui-prompter.d.ts +20 -0
  129. package/dist/src/cli/dev/tui/tui-prompter.js +1 -0
  130. package/dist/src/cli/dev/tui/tui.d.ts +6 -8
  131. package/dist/src/cli/dev/tui/tui.js +1 -1
  132. package/dist/src/cli/dev/tui/types.d.ts +4 -3
  133. package/dist/src/cli/dev/tui/vercel-status.d.ts +47 -0
  134. package/dist/src/cli/dev/tui/vercel-status.js +1 -0
  135. package/dist/src/cli/run.d.ts +9 -18
  136. package/dist/src/cli/run.js +2 -2
  137. package/dist/src/client/client.d.ts +8 -0
  138. package/dist/src/client/client.js +1 -1
  139. package/dist/src/client/file-parts.d.ts +18 -0
  140. package/dist/src/client/file-parts.js +1 -0
  141. package/dist/src/client/index.d.ts +3 -2
  142. package/dist/src/client/index.js +1 -1
  143. package/dist/src/client/message-response.js +1 -1
  144. package/dist/src/client/open-stream.d.ts +6 -0
  145. package/dist/src/client/open-stream.js +1 -1
  146. package/dist/src/client/session-utils.d.ts +5 -0
  147. package/dist/src/client/session-utils.js +1 -1
  148. package/dist/src/client/session.js +1 -1
  149. package/dist/src/client/types.d.ts +9 -2
  150. package/dist/src/compiled/.vendor-stamp.json +8 -8
  151. package/dist/src/compiled/@ai-sdk/anthropic/index.d.ts +56 -31
  152. package/dist/src/compiled/@ai-sdk/anthropic/index.js +2 -2
  153. package/dist/src/compiled/@ai-sdk/google/index.js +1 -1
  154. package/dist/src/compiled/@ai-sdk/mcp/index.js +1 -1
  155. package/dist/src/compiled/@ai-sdk/openai/index.d.ts +16 -9
  156. package/dist/src/compiled/@ai-sdk/openai/index.js +2 -2
  157. package/dist/src/compiled/@ai-sdk/otel/index.js +2 -2
  158. package/dist/src/compiled/@vercel/sandbox/index.js +1 -1
  159. package/dist/src/compiled/@workflow/core/capabilities.d.ts +19 -1
  160. package/dist/src/compiled/@workflow/core/class-serialization.d.ts +32 -0
  161. package/dist/src/compiled/@workflow/core/create-hook.d.ts +37 -0
  162. package/dist/src/compiled/@workflow/core/global.d.ts +11 -1
  163. package/dist/src/compiled/@workflow/core/index.js +2 -2
  164. package/dist/src/compiled/@workflow/core/runtime/helpers.d.ts +4 -2
  165. package/dist/src/compiled/@workflow/core/runtime/start.d.ts +6 -0
  166. package/dist/src/compiled/@workflow/core/runtime/suspension-handler.d.ts +15 -2
  167. package/dist/src/compiled/@workflow/core/runtime/wait-continuation.d.ts +84 -0
  168. package/dist/src/compiled/@workflow/core/runtime/wait-until.d.ts +18 -0
  169. package/dist/src/compiled/@workflow/core/runtime.d.ts +3 -1
  170. package/dist/src/compiled/@workflow/core/runtime.js +28 -28
  171. package/dist/src/compiled/@workflow/core/serialization/types.d.ts +21 -0
  172. package/dist/src/compiled/@workflow/core/serialization.d.ts +113 -6
  173. package/dist/src/compiled/@workflow/core/symbols.d.ts +2 -0
  174. package/dist/src/compiled/@workflow/core/util.d.ts +0 -5
  175. package/dist/src/compiled/@workflow/core/version.d.ts +1 -1
  176. package/dist/src/compiled/@workflow/core/workflow/attribute-dispatcher.d.ts +6 -0
  177. package/dist/src/compiled/@workflow/core/workflow/set-attributes.d.ts +3 -4
  178. package/dist/src/compiled/@workflow/core/workflow.js +1 -1
  179. package/dist/src/compiled/@workflow/world/events.d.ts +48 -0
  180. package/dist/src/compiled/@workflow/world/index.d.ts +3 -3
  181. package/dist/src/compiled/@workflow/world/queue.d.ts +31 -2
  182. package/dist/src/compiled/@workflow/world/runs.d.ts +2 -0
  183. package/dist/src/compiled/@workflow/world/spec-version.d.ts +2 -1
  184. package/dist/src/compiled/_chunks/workflow/attribute-changes-DGVGRGfw.js +59 -0
  185. package/dist/src/compiled/_chunks/workflow/{dist-gEXVSMPU.js → dist-CkMRLaRV.js} +1 -1
  186. package/dist/src/compiled/_chunks/workflow/functions-DuPjIvMH.js +1 -0
  187. package/dist/src/compiled/_chunks/workflow/resume-hook-DMSadN9o.js +1 -0
  188. package/dist/src/compiled/_chunks/workflow/run-BRdn7zy_.js +1 -0
  189. package/dist/src/compiled/_chunks/workflow/sleep-CpXfoXLF.js +1 -0
  190. package/dist/src/compiled/just-bash/index.d.ts +4 -4
  191. package/dist/src/compiler/artifacts.js +1 -1
  192. package/dist/src/compiler/manifest.d.ts +8 -8
  193. package/dist/src/compiler/normalize-agent-config.js +1 -1
  194. package/dist/src/compiler/normalize-channel.d.ts +2 -1
  195. package/dist/src/compiler/normalize-channel.js +1 -1
  196. package/dist/src/compiler/normalize-connection.d.ts +2 -1
  197. package/dist/src/compiler/normalize-connection.js +1 -1
  198. package/dist/src/compiler/normalize-helpers.d.ts +5 -0
  199. package/dist/src/compiler/normalize-helpers.js +1 -1
  200. package/dist/src/compiler/normalize-instructions.d.ts +3 -2
  201. package/dist/src/compiler/normalize-instructions.js +1 -1
  202. package/dist/src/compiler/normalize-manifest.js +2 -2
  203. package/dist/src/compiler/normalize-sandbox.d.ts +2 -1
  204. package/dist/src/compiler/normalize-sandbox.js +1 -1
  205. package/dist/src/compiler/normalize-schedule.d.ts +2 -1
  206. package/dist/src/compiler/normalize-schedule.js +1 -1
  207. package/dist/src/compiler/normalize-skill.d.ts +2 -1
  208. package/dist/src/compiler/normalize-skill.js +1 -1
  209. package/dist/src/compiler/normalize-subagent.d.ts +4 -1
  210. package/dist/src/compiler/normalize-subagent.js +1 -1
  211. package/dist/src/compiler/normalize-tool.d.ts +2 -1
  212. package/dist/src/compiler/normalize-tool.js +1 -1
  213. package/dist/src/compiler/workspace-resources.js +1 -1
  214. package/dist/src/context/node.d.ts +1 -1
  215. package/dist/src/evals/assertions/collector.d.ts +43 -0
  216. package/dist/src/evals/assertions/collector.js +1 -0
  217. package/dist/src/evals/assertions/run.d.ts +72 -0
  218. package/dist/src/evals/assertions/run.js +2 -0
  219. package/dist/src/evals/autoevals-client.js +2 -0
  220. package/dist/src/evals/cli/eval-client.d.ts +22 -0
  221. package/dist/src/evals/cli/eval-client.js +1 -0
  222. package/dist/src/evals/cli/eval.d.ts +8 -5
  223. package/dist/src/evals/cli/eval.js +1 -1
  224. package/dist/src/evals/context.d.ts +19 -0
  225. package/dist/src/evals/context.js +1 -0
  226. package/dist/src/evals/define-eval-config.d.ts +16 -0
  227. package/dist/src/evals/define-eval-config.js +1 -0
  228. package/dist/src/evals/define-eval.d.ts +20 -0
  229. package/dist/src/evals/define-eval.js +1 -0
  230. package/dist/src/evals/expect/index.d.ts +25 -0
  231. package/dist/src/evals/expect/index.js +1 -0
  232. package/dist/src/evals/index.d.ts +6 -2
  233. package/dist/src/evals/index.js +1 -1
  234. package/dist/src/evals/judge.d.ts +20 -0
  235. package/dist/src/evals/judge.js +1 -0
  236. package/dist/src/evals/{checks/match.d.ts → match.d.ts} +17 -18
  237. package/dist/src/evals/match.js +1 -0
  238. package/dist/src/evals/reporters/index.d.ts +1 -0
  239. package/dist/src/evals/reporters/index.js +1 -1
  240. package/dist/src/evals/requirements.d.ts +3 -0
  241. package/dist/src/evals/requirements.js +1 -0
  242. package/dist/src/evals/runner/artifacts.d.ts +7 -6
  243. package/dist/src/evals/runner/artifacts.js +3 -3
  244. package/dist/src/evals/runner/discover.d.ts +31 -10
  245. package/dist/src/evals/runner/discover.js +1 -1
  246. package/dist/src/evals/runner/execute-eval.d.ts +25 -0
  247. package/dist/src/evals/runner/execute-eval.js +1 -0
  248. package/dist/src/evals/runner/execute-task.d.ts +31 -0
  249. package/dist/src/evals/runner/execute-task.js +1 -0
  250. package/dist/src/evals/runner/reporters/braintrust.d.ts +7 -5
  251. package/dist/src/evals/runner/reporters/braintrust.js +2 -2
  252. package/dist/src/evals/runner/reporters/console.d.ts +4 -4
  253. package/dist/src/evals/runner/reporters/console.js +1 -1
  254. package/dist/src/evals/runner/reporters/junit.d.ts +10 -0
  255. package/dist/src/evals/runner/reporters/junit.js +4 -0
  256. package/dist/src/evals/runner/reporters/types.d.ts +14 -8
  257. package/dist/src/evals/runner/run-evals.d.ts +38 -0
  258. package/dist/src/evals/runner/run-evals.js +1 -0
  259. package/dist/src/evals/runner/verdict.d.ts +10 -15
  260. package/dist/src/evals/runner/verdict.js +1 -1
  261. package/dist/src/evals/session.d.ts +52 -0
  262. package/dist/src/evals/session.js +1 -0
  263. package/dist/src/evals/target.d.ts +23 -0
  264. package/dist/src/evals/target.js +1 -0
  265. package/dist/src/evals/types.d.ts +294 -219
  266. package/dist/src/execution/compaction.d.ts +14 -0
  267. package/dist/src/execution/compaction.js +1 -0
  268. package/dist/src/execution/delegated-parent-notification.js +1 -1
  269. package/dist/src/execution/dispatch-runtime-actions-step.js +1 -1
  270. package/dist/src/execution/node-step.js +1 -1
  271. package/dist/src/execution/sandbox/bash-tool.d.ts +6 -6
  272. package/dist/src/execution/sandbox/bash-tool.js +1 -1
  273. package/dist/src/execution/sandbox/bindings/local.js +1 -1
  274. package/dist/src/execution/sandbox/bindings/vercel.d.ts +2 -6
  275. package/dist/src/execution/sandbox/bindings/vercel.js +1 -1
  276. package/dist/src/execution/sandbox/glob-tool.js +3 -3
  277. package/dist/src/execution/sandbox/grep-tool.js +3 -3
  278. package/dist/src/execution/sandbox/read-file-tool.js +1 -1
  279. package/dist/src/execution/subagent-adapter.js +1 -1
  280. package/dist/src/execution/tool-auth.js +1 -1
  281. package/dist/src/execution/turn-workflow.js +1 -1
  282. package/dist/src/execution/workflow-runtime.d.ts +2 -2
  283. package/dist/src/execution/workflow-runtime.js +1 -1
  284. package/dist/src/execution/workflow-steps.js +1 -1
  285. package/dist/src/harness/action-result-helpers.js +1 -1
  286. package/dist/src/harness/authorization.d.ts +26 -0
  287. package/dist/src/harness/authorization.js +1 -1
  288. package/dist/src/harness/code-mode-lifecycle.js +1 -1
  289. package/dist/src/harness/emission.d.ts +12 -5
  290. package/dist/src/harness/emission.js +1 -1
  291. package/dist/src/harness/model-call-error.d.ts +35 -6
  292. package/dist/src/harness/model-call-error.js +1 -1
  293. package/dist/src/harness/step-hooks.d.ts +10 -4
  294. package/dist/src/harness/step-hooks.js +1 -1
  295. package/dist/src/harness/tool-loop.js +1 -1
  296. package/dist/src/harness/tools.d.ts +4 -6
  297. package/dist/src/harness/tools.js +1 -1
  298. package/dist/src/harness/turn-tag-state.d.ts +4 -0
  299. package/dist/src/harness/turn-tag-state.js +1 -1
  300. package/dist/src/harness/types.d.ts +4 -15
  301. package/dist/src/internal/application/cache-metadata.js +1 -1
  302. package/dist/src/internal/application/compiled-artifacts.js +1 -1
  303. package/dist/src/internal/application/package.js +1 -1
  304. package/dist/src/internal/application/paths.js +1 -1
  305. package/dist/src/internal/authored-definition/schema-backed.js +1 -1
  306. package/dist/src/internal/authored-module-loader.d.ts +4 -1
  307. package/dist/src/internal/authored-module-loader.js +2 -2
  308. package/dist/src/internal/authored-module-map-loader.js +1 -1
  309. package/dist/src/internal/nitro/dev-runtime-artifacts.js +1 -1
  310. package/dist/src/internal/nitro/host/build-application.js +1 -1
  311. package/dist/src/internal/nitro/host/build-vercel-agent-summary.js +1 -1
  312. package/dist/src/internal/nitro/host/configure-nitro-routes.js +3 -3
  313. package/dist/src/internal/nitro/host/create-application-nitro.js +1 -1
  314. package/dist/src/internal/nitro/host/dev-authored-source-watcher.js +1 -1
  315. package/dist/src/internal/nitro/host/dev-watcher-log.d.ts +37 -0
  316. package/dist/src/internal/nitro/host/dev-watcher-log.js +1 -0
  317. package/dist/src/internal/nitro/host/ports.d.ts +8 -0
  318. package/dist/src/internal/nitro/host/ports.js +1 -0
  319. package/dist/src/internal/nitro/host/prepare-application-host.js +1 -1
  320. package/dist/src/internal/nitro/host/server-external-packages.d.ts +1 -1
  321. package/dist/src/internal/nitro/host/server-external-packages.js +1 -1
  322. package/dist/src/internal/nitro/host/start-development-server.js +1 -1
  323. package/dist/src/internal/nitro/host/start-production-server.js +1 -1
  324. package/dist/src/internal/nitro/routes/agent-info/build-agent-info-response-from-manifest.d.ts +5 -0
  325. package/dist/src/internal/nitro/routes/agent-info/build-agent-info-response-from-manifest.js +1 -0
  326. package/dist/src/internal/nitro/routes/agent-info/build-agent-info-response.d.ts +31 -2
  327. package/dist/src/internal/nitro/routes/agent-info/build-agent-info-response.js +1 -1
  328. package/dist/src/internal/nitro/routes/agent-info/load-agent-info-data.d.ts +13 -0
  329. package/dist/src/internal/nitro/routes/agent-info/load-agent-info-data.js +1 -1
  330. package/dist/src/internal/nitro/routes/info.d.ts +2 -2
  331. package/dist/src/internal/nitro/routes/info.js +1 -1
  332. package/dist/src/internal/workflow/queue-namespace.d.ts +5 -0
  333. package/dist/src/internal/workflow/queue-namespace.js +1 -0
  334. package/dist/src/internal/workflow-bundle/builder-support.js +2 -2
  335. package/dist/src/internal/workflow-bundle/builder.js +3 -5
  336. package/dist/src/internal/workflow-bundle/vercel-workflow-output.js +1 -1
  337. package/dist/src/internal/workflow-bundle/workflow-builders.d.ts +1 -1
  338. package/dist/src/internal/workflow-bundle/workflow-builders.js +1 -1
  339. package/dist/src/node_modules/.pnpm/@clack_core@1.3.1/node_modules/@clack/core/dist/index.js +4 -4
  340. package/dist/src/protocol/message.d.ts +15 -0
  341. package/dist/src/protocol/message.js +2 -2
  342. package/dist/src/public/channels/slack/api.d.ts +8 -0
  343. package/dist/src/public/channels/slack/api.js +1 -1
  344. package/dist/src/public/channels/slack/connections.d.ts +26 -18
  345. package/dist/src/public/channels/slack/connections.js +1 -1
  346. package/dist/src/public/channels/slack/defaults.d.ts +5 -2
  347. package/dist/src/public/channels/slack/defaults.js +1 -1
  348. package/dist/src/public/channels/slack/index.d.ts +1 -1
  349. package/dist/src/public/channels/slack/slackChannel.d.ts +65 -5
  350. package/dist/src/public/channels/slack/slackChannel.js +1 -1
  351. package/dist/src/public/channels/teams/defaults.js +1 -1
  352. package/dist/src/public/connections/errors.d.ts +8 -0
  353. package/dist/src/public/definitions/tool.d.ts +0 -33
  354. package/dist/src/public/next/index.d.ts +7 -1
  355. package/dist/src/public/next/index.js +1 -1
  356. package/dist/src/public/next/server.d.ts +1 -0
  357. package/dist/src/public/next/server.js +1 -1
  358. package/dist/src/public/nuxt/dev-server.js +1 -1
  359. package/dist/src/public/sveltekit/dev-server.js +1 -1
  360. package/dist/src/public/sveltekit/index.d.ts +1 -1
  361. package/dist/src/public/tools/defaults.d.ts +2 -4
  362. package/dist/src/public/tools/defaults.js +1 -1
  363. package/dist/src/public/tools/define-bash-tool.d.ts +3 -3
  364. package/dist/src/public/tools/define-bash-tool.js +1 -1
  365. package/dist/src/public/tools/define-read-file-tool.d.ts +0 -6
  366. package/dist/src/public/tools/define-read-file-tool.js +1 -1
  367. package/dist/src/public/tools/index.d.ts +2 -2
  368. package/dist/src/public/tools/index.js +1 -1
  369. package/dist/src/public/tools/internal.js +1 -1
  370. package/dist/src/runtime/actions/types.d.ts +11 -11
  371. package/dist/src/runtime/agent/mock-model-adapter.js +1 -1
  372. package/dist/src/runtime/agent/mock-model-fixtures.js +3 -2
  373. package/dist/src/runtime/agent/mock-model-skill-selection.js +3 -4
  374. package/dist/src/runtime/connections/callback-route.js +1 -1
  375. package/dist/src/runtime/connections/mcp-client.js +1 -1
  376. package/dist/src/runtime/connections/scoped-authorization.d.ts +21 -5
  377. package/dist/src/runtime/connections/scoped-authorization.js +1 -1
  378. package/dist/src/runtime/connections/types.d.ts +33 -0
  379. package/dist/src/runtime/connections/validate-authorization.js +1 -1
  380. package/dist/src/runtime/framework-tools/bash.d.ts +3 -3
  381. package/dist/src/runtime/framework-tools/bash.js +1 -1
  382. package/dist/src/runtime/framework-tools/connection-search-dynamic.d.ts +1 -1
  383. package/dist/src/runtime/framework-tools/connection-search-dynamic.js +1 -1
  384. package/dist/src/runtime/framework-tools/file-state.d.ts +3 -3
  385. package/dist/src/runtime/framework-tools/index.js +1 -1
  386. package/dist/src/runtime/framework-tools/read-file.js +2 -2
  387. package/dist/src/runtime/framework-tools/todo.d.ts +7 -0
  388. package/dist/src/runtime/framework-tools/todo.js +2 -2
  389. package/dist/src/runtime/governance/auth/http-basic.js +1 -1
  390. package/dist/src/runtime/input/types.d.ts +1 -1
  391. package/dist/src/runtime/resolve-tool.d.ts +2 -2
  392. package/dist/src/runtime/resolve-tool.js +1 -1
  393. package/dist/src/runtime/sandbox/keys.js +1 -1
  394. package/dist/src/runtime/session-callback-route.js +1 -1
  395. package/dist/src/runtime/types.d.ts +1 -7
  396. package/dist/src/services/dev-client/client-options.d.ts +8 -0
  397. package/dist/src/services/dev-client/client-options.js +1 -0
  398. package/dist/src/services/dev-client/runtime-artifacts.d.ts +13 -0
  399. package/dist/src/services/dev-client/runtime-artifacts.js +1 -0
  400. package/dist/src/services/dev-client.d.ts +13 -46
  401. package/dist/src/services/dev-client.js +1 -1
  402. package/dist/src/setup/ask.d.ts +205 -0
  403. package/dist/src/setup/ask.js +1 -0
  404. package/dist/src/setup/boxes/add-channels.d.ts +100 -16
  405. package/dist/src/setup/boxes/add-channels.js +2 -1
  406. package/dist/src/setup/boxes/add-connections.d.ts +13 -23
  407. package/dist/src/setup/boxes/add-connections.js +1 -1
  408. package/dist/src/setup/boxes/apply-ai-gateway-credential.d.ts +2 -2
  409. package/dist/src/setup/boxes/apply-ai-gateway-credential.js +1 -1
  410. package/dist/src/setup/boxes/deploy-project.d.ts +46 -14
  411. package/dist/src/setup/boxes/deploy-project.js +1 -1
  412. package/dist/src/setup/boxes/detect-ai-gateway.d.ts +10 -3
  413. package/dist/src/setup/boxes/detect-ai-gateway.js +1 -1
  414. package/dist/src/setup/boxes/link-project.d.ts +3 -3
  415. package/dist/src/setup/boxes/link-project.js +1 -1
  416. package/dist/src/setup/boxes/one-shot-next-steps.d.ts +18 -0
  417. package/dist/src/setup/boxes/one-shot-next-steps.js +2 -0
  418. package/dist/src/setup/boxes/preflight.d.ts +14 -6
  419. package/dist/src/setup/boxes/preflight.js +1 -1
  420. package/dist/src/setup/boxes/resolve-provisioning.d.ts +36 -8
  421. package/dist/src/setup/boxes/resolve-provisioning.js +1 -1
  422. package/dist/src/setup/boxes/resolve-target.d.ts +25 -8
  423. package/dist/src/setup/boxes/resolve-target.js +1 -1
  424. package/dist/src/setup/boxes/scaffold.d.ts +12 -6
  425. package/dist/src/setup/boxes/scaffold.js +1 -1
  426. package/dist/src/setup/boxes/select-channels.d.ts +38 -9
  427. package/dist/src/setup/boxes/select-channels.js +1 -1
  428. package/dist/src/setup/boxes/select-chat.d.ts +15 -11
  429. package/dist/src/setup/boxes/select-chat.js +1 -1
  430. package/dist/src/setup/boxes/select-connections.d.ts +30 -0
  431. package/dist/src/setup/boxes/select-connections.js +1 -0
  432. package/dist/src/setup/boxes/select-model.d.ts +18 -14
  433. package/dist/src/setup/boxes/select-model.js +1 -1
  434. package/dist/src/setup/boxes/select-setup-mode.d.ts +32 -0
  435. package/dist/src/setup/boxes/select-setup-mode.js +1 -0
  436. package/dist/src/setup/channel-add-conflicts.d.ts +28 -0
  437. package/dist/src/setup/channel-add-conflicts.js +1 -0
  438. package/dist/src/setup/cli/channel-setup-prompter.d.ts +23 -0
  439. package/dist/src/setup/cli/channel-setup-prompter.js +1 -0
  440. package/dist/src/setup/cli/connection-add-prompter.d.ts +8 -0
  441. package/dist/src/setup/cli/connection-add-prompter.js +1 -0
  442. package/dist/src/setup/{scaffold/cli → cli}/index.d.ts +4 -3
  443. package/dist/src/setup/cli/index.js +1 -0
  444. package/dist/src/setup/{scaffold/cli → cli}/prompt-ui.d.ts +39 -15
  445. package/dist/src/setup/cli/prompt-ui.js +5 -0
  446. package/dist/src/setup/{scaffold/cli → cli}/rail-log.d.ts +2 -0
  447. package/dist/src/setup/{scaffold/cli → cli}/rail-log.js +2 -2
  448. package/dist/src/setup/{scaffold/cli → cli}/select-component.d.ts +18 -3
  449. package/dist/src/setup/cli/select-component.js +1 -0
  450. package/dist/src/setup/cli/select-option-codec.d.ts +12 -0
  451. package/dist/src/setup/cli/select-option-codec.js +1 -0
  452. package/dist/src/setup/{scaffold/cli → cli}/select-state.d.ts +13 -1
  453. package/dist/src/setup/cli/select-state.js +1 -0
  454. package/dist/src/setup/cli/whimsy.d.ts +16 -0
  455. package/dist/src/setup/cli/whimsy.js +1 -0
  456. package/dist/src/setup/{scaffold/steps/setup-connection.d.ts → connection-connector.d.ts} +3 -2
  457. package/dist/src/setup/connection-connector.js +1 -0
  458. package/dist/src/setup/flows/channels.d.ts +43 -0
  459. package/dist/src/setup/flows/channels.js +1 -0
  460. package/dist/src/setup/flows/deploy.d.ts +40 -0
  461. package/dist/src/setup/flows/deploy.js +1 -0
  462. package/dist/src/setup/flows/in-project.d.ts +16 -0
  463. package/dist/src/setup/flows/in-project.js +1 -0
  464. package/dist/src/setup/flows/link.d.ts +43 -0
  465. package/dist/src/setup/flows/link.js +1 -0
  466. package/dist/src/setup/flows/model.d.ts +112 -0
  467. package/dist/src/setup/flows/model.js +1 -0
  468. package/dist/src/setup/flows/vercel.d.ts +31 -0
  469. package/dist/src/setup/flows/vercel.js +2 -0
  470. package/dist/src/setup/gateway-models.js +1 -1
  471. package/dist/src/setup/headless.d.ts +1 -1
  472. package/dist/src/setup/index.d.ts +10 -4
  473. package/dist/src/setup/index.js +1 -1
  474. package/dist/src/setup/onboarding.d.ts +7 -4
  475. package/dist/src/setup/onboarding.js +1 -1
  476. package/dist/src/setup/package-manager.d.ts +27 -0
  477. package/dist/src/setup/package-manager.js +1 -0
  478. package/dist/src/setup/primitives/index.d.ts +3 -0
  479. package/dist/src/setup/primitives/index.js +1 -0
  480. package/dist/src/setup/primitives/pm/bun.d.ts +10 -0
  481. package/dist/src/setup/primitives/pm/bun.js +1 -0
  482. package/dist/src/setup/primitives/pm/index.d.ts +11 -0
  483. package/dist/src/setup/primitives/pm/index.js +1 -0
  484. package/dist/src/setup/primitives/pm/npm.d.ts +10 -0
  485. package/dist/src/setup/primitives/pm/npm.js +1 -0
  486. package/dist/src/setup/primitives/pm/pnpm.d.ts +27 -0
  487. package/dist/src/setup/primitives/pm/pnpm.js +8 -0
  488. package/dist/src/setup/primitives/pm/run.d.ts +23 -0
  489. package/dist/src/setup/primitives/pm/run.js +1 -0
  490. package/dist/src/setup/primitives/pm/shared.d.ts +8 -0
  491. package/dist/src/setup/primitives/pm/shared.js +1 -0
  492. package/dist/src/setup/primitives/pm/types.d.ts +37 -0
  493. package/dist/src/setup/primitives/pm/types.js +1 -0
  494. package/dist/src/setup/primitives/pm/yarn.d.ts +10 -0
  495. package/dist/src/setup/primitives/pm/yarn.js +1 -0
  496. package/dist/src/setup/primitives/run-pnpm.d.ts +1 -0
  497. package/dist/src/setup/primitives/run-pnpm.js +1 -0
  498. package/dist/src/setup/{scaffold/primitives → primitives}/run-vercel.d.ts +7 -0
  499. package/dist/src/setup/primitives/run-vercel.js +1 -0
  500. package/dist/src/setup/project-name.d.ts +4 -0
  501. package/dist/src/setup/project-name.js +1 -0
  502. package/dist/src/setup/project-resolution.d.ts +54 -0
  503. package/dist/src/setup/project-resolution.js +1 -0
  504. package/dist/src/setup/prompter.d.ts +52 -4
  505. package/dist/src/setup/prompter.js +1 -1
  506. package/dist/src/setup/quit-guard.d.ts +1 -1
  507. package/dist/src/setup/run-vercel-link.d.ts +1 -1
  508. package/dist/src/setup/run-vercel-link.js +1 -1
  509. package/dist/src/setup/runner.d.ts +5 -4
  510. package/dist/src/setup/runner.js +1 -1
  511. package/dist/src/setup/scaffold/channels-catalog.d.ts +3 -3
  512. package/dist/src/setup/scaffold/channels-catalog.js +1 -1
  513. package/dist/src/setup/scaffold/create/add-to-project.d.ts +26 -0
  514. package/dist/src/setup/scaffold/create/add-to-project.js +1 -0
  515. package/dist/src/setup/scaffold/create/project.d.ts +54 -0
  516. package/dist/src/setup/scaffold/create/project.js +80 -0
  517. package/dist/src/setup/scaffold/index.d.ts +4 -4
  518. package/dist/src/setup/scaffold/index.js +1 -1
  519. package/dist/src/setup/scaffold/{channels.d.ts → update/channels.d.ts} +11 -0
  520. package/dist/src/setup/scaffold/update/channels.js +7 -0
  521. package/dist/src/setup/scaffold/{connections.d.ts → update/connections.d.ts} +1 -1
  522. package/dist/src/setup/scaffold/update/connections.js +21 -0
  523. package/dist/src/setup/scaffold/version-tokens.d.ts +11 -0
  524. package/dist/src/setup/scaffold/version-tokens.js +1 -0
  525. package/dist/src/setup/{scaffold/steps/setup-slackbot.d.ts → slackbot.d.ts} +24 -20
  526. package/dist/src/setup/slackbot.js +1 -0
  527. package/dist/src/setup/state.d.ts +62 -15
  528. package/dist/src/setup/state.js +1 -1
  529. package/dist/src/setup/step.d.ts +9 -18
  530. package/dist/src/setup/vercel-project.d.ts +15 -8
  531. package/dist/src/setup/vercel-project.js +1 -1
  532. package/dist/src/shared/agent-definition.d.ts +5 -3
  533. package/dist/src/shared/default-agent-model.d.ts +5 -0
  534. package/dist/src/shared/default-agent-model.js +1 -0
  535. package/dist/src/source-change/apply-model-name.d.ts +25 -0
  536. package/dist/src/source-change/apply-model-name.js +2 -0
  537. package/dist/src/source-change/static-source-change.d.ts +36 -0
  538. package/dist/src/source-change/static-source-change.js +1 -0
  539. package/dist/src/svelte/index.js +1 -1
  540. package/dist/src/svelte/use-eve-agent.js +1 -1
  541. package/dist/src/vue/index.js +1 -1
  542. package/dist/src/vue/use-eve-agent.js +1 -1
  543. package/package.json +22 -42
  544. package/dist/docs/evals-v2-plan.md +0 -939
  545. package/dist/docs/public/advanced/dev-tui.md +0 -52
  546. package/dist/docs/public/advanced/evals.md +0 -158
  547. package/dist/docs/public/reference/faqs.md +0 -48
  548. package/dist/src/cli/commands/setup.d.ts +0 -55
  549. package/dist/src/cli/commands/setup.js +0 -1
  550. package/dist/src/cli/dev/repl/input-requests.d.ts +0 -38
  551. package/dist/src/cli/dev/repl/input-requests.js +0 -1
  552. package/dist/src/cli/dev/repl/input.d.ts +0 -19
  553. package/dist/src/cli/dev/repl/input.js +0 -1
  554. package/dist/src/cli/dev/repl/repl.d.ts +0 -62
  555. package/dist/src/cli/dev/repl/repl.js +0 -2
  556. package/dist/src/cli/dev/repl/terminal.d.ts +0 -21
  557. package/dist/src/cli/dev/repl/terminal.js +0 -5
  558. package/dist/src/compiled/_chunks/workflow/resume-hook-0Zk0zSvq.js +0 -12
  559. package/dist/src/compiled/_chunks/workflow/sleep-DXZr2BgM.js +0 -1
  560. package/dist/src/compiled/_chunks/workflow/symbols-BWCAoPHE.js +0 -48
  561. package/dist/src/evals/checks/checks.d.ts +0 -66
  562. package/dist/src/evals/checks/checks.js +0 -2
  563. package/dist/src/evals/checks/index.d.ts +0 -21
  564. package/dist/src/evals/checks/index.js +0 -1
  565. package/dist/src/evals/checks/match.js +0 -1
  566. package/dist/src/evals/define-eval-suite.d.ts +0 -18
  567. package/dist/src/evals/define-eval-suite.js +0 -1
  568. package/dist/src/evals/runner/execute-case.d.ts +0 -23
  569. package/dist/src/evals/runner/execute-case.js +0 -1
  570. package/dist/src/evals/runner/execute-suite.d.ts +0 -24
  571. package/dist/src/evals/runner/execute-suite.js +0 -1
  572. package/dist/src/evals/scorers/autoevals-client.js +0 -2
  573. package/dist/src/evals/scorers/autoevals.d.ts +0 -58
  574. package/dist/src/evals/scorers/autoevals.js +0 -1
  575. package/dist/src/evals/scorers/json.d.ts +0 -10
  576. package/dist/src/evals/scorers/json.js +0 -1
  577. package/dist/src/evals/scorers/model-marker.d.ts +0 -12
  578. package/dist/src/evals/scorers/model-marker.js +0 -1
  579. package/dist/src/evals/scorers/run.d.ts +0 -24
  580. package/dist/src/evals/scorers/run.js +0 -1
  581. package/dist/src/evals/scorers/sql.d.ts +0 -9
  582. package/dist/src/evals/scorers/sql.js +0 -1
  583. package/dist/src/evals/scorers/text.d.ts +0 -18
  584. package/dist/src/evals/scorers/text.js +0 -1
  585. package/dist/src/evals/scores/index.d.ts +0 -72
  586. package/dist/src/evals/scores/index.js +0 -1
  587. package/dist/src/execution/tool-compaction.d.ts +0 -9
  588. package/dist/src/execution/tool-compaction.js +0 -1
  589. package/dist/src/services/dev-client/stream.d.ts +0 -5
  590. package/dist/src/services/dev-client/stream.js +0 -1
  591. package/dist/src/services/dev-client/url.d.ts +0 -11
  592. package/dist/src/services/dev-client/url.js +0 -1
  593. package/dist/src/setup/channel-setup-prompter.d.ts +0 -8
  594. package/dist/src/setup/channel-setup-prompter.js +0 -1
  595. package/dist/src/setup/scaffold/channels.js +0 -7
  596. package/dist/src/setup/scaffold/cli/channel-add-prompter.d.ts +0 -12
  597. package/dist/src/setup/scaffold/cli/channel-add-prompter.js +0 -1
  598. package/dist/src/setup/scaffold/cli/channel-setup-prompter.d.ts +0 -56
  599. package/dist/src/setup/scaffold/cli/connection-add-prompter.d.ts +0 -44
  600. package/dist/src/setup/scaffold/cli/connection-add-prompter.js +0 -1
  601. package/dist/src/setup/scaffold/cli/index.js +0 -1
  602. package/dist/src/setup/scaffold/cli/prompt-ui.js +0 -5
  603. package/dist/src/setup/scaffold/cli/select-component.js +0 -1
  604. package/dist/src/setup/scaffold/cli/select-state.js +0 -1
  605. package/dist/src/setup/scaffold/connections.js +0 -21
  606. package/dist/src/setup/scaffold/pnpm-workspace.d.ts +0 -3
  607. package/dist/src/setup/scaffold/pnpm-workspace.js +0 -11
  608. package/dist/src/setup/scaffold/primitives/detect-deployment.d.ts +0 -13
  609. package/dist/src/setup/scaffold/primitives/detect-deployment.js +0 -1
  610. package/dist/src/setup/scaffold/primitives/index.d.ts +0 -3
  611. package/dist/src/setup/scaffold/primitives/index.js +0 -1
  612. package/dist/src/setup/scaffold/primitives/pnpm-invocation.d.ts +0 -12
  613. package/dist/src/setup/scaffold/primitives/pnpm-invocation.js +0 -1
  614. package/dist/src/setup/scaffold/primitives/run-pnpm.d.ts +0 -17
  615. package/dist/src/setup/scaffold/primitives/run-pnpm.js +0 -1
  616. package/dist/src/setup/scaffold/primitives/run-vercel.js +0 -1
  617. package/dist/src/setup/scaffold/project.d.ts +0 -21
  618. package/dist/src/setup/scaffold/project.js +0 -80
  619. package/dist/src/setup/scaffold/steps/deploy-to-vercel.d.ts +0 -17
  620. package/dist/src/setup/scaffold/steps/deploy-to-vercel.js +0 -1
  621. package/dist/src/setup/scaffold/steps/index.d.ts +0 -4
  622. package/dist/src/setup/scaffold/steps/index.js +0 -1
  623. package/dist/src/setup/scaffold/steps/project-resolution.d.ts +0 -19
  624. package/dist/src/setup/scaffold/steps/project-resolution.js +0 -1
  625. package/dist/src/setup/scaffold/steps/run-add-connection.d.ts +0 -40
  626. package/dist/src/setup/scaffold/steps/run-add-connection.js +0 -1
  627. package/dist/src/setup/scaffold/steps/run-add-to-agent.d.ts +0 -81
  628. package/dist/src/setup/scaffold/steps/run-add-to-agent.js +0 -2
  629. package/dist/src/setup/scaffold/steps/setup-connection.js +0 -1
  630. package/dist/src/setup/scaffold/steps/setup-slackbot.js +0 -1
  631. /package/dist/docs/public/{frontend → guides/frontend}/meta.json +0 -0
  632. /package/dist/docs/public/{advanced → guides}/remote-agents.md +0 -0
  633. /package/dist/src/{setup/scaffold/cli/channel-setup-prompter.js → cli/dev/tui/setup-flow.js} +0 -0
  634. /package/dist/src/evals/{scorers/autoevals-client.d.ts → autoevals-client.d.ts} +0 -0
  635. /package/dist/src/setup/{scaffold/cli → cli}/command-output.d.ts +0 -0
  636. /package/dist/src/setup/{scaffold/cli → cli}/command-output.js +0 -0
  637. /package/dist/src/setup/{scaffold/human-action.d.ts → human-action.d.ts} +0 -0
  638. /package/dist/src/setup/{scaffold/human-action.js → human-action.js} +0 -0
  639. /package/dist/src/setup/{scaffold/primitives → primitives}/process-output.d.ts +0 -0
  640. /package/dist/src/setup/{scaffold/primitives → primitives}/process-output.js +0 -0
  641. /package/dist/src/setup/scaffold/{web-template.d.ts → create/web-template.d.ts} +0 -0
  642. /package/dist/src/setup/scaffold/{web-template.js → create/web-template.js} +0 -0
  643. /package/dist/src/setup/scaffold/{module-files.d.ts → update/module-files.d.ts} +0 -0
  644. /package/dist/src/setup/scaffold/{module-files.js → update/module-files.js} +0 -0
  645. /package/dist/src/setup/scaffold/{package-json.d.ts → update/package-json.d.ts} +0 -0
  646. /package/dist/src/setup/scaffold/{package-json.js → update/package-json.js} +0 -0
  647. /package/dist/src/setup/scaffold/{primitives → update}/update-connection-connector.d.ts +0 -0
  648. /package/dist/src/setup/scaffold/{primitives → update}/update-connection-connector.js +0 -0
  649. /package/dist/src/setup/scaffold/{primitives → update}/update-slack-channel.d.ts +0 -0
  650. /package/dist/src/setup/scaffold/{primitives → update}/update-slack-channel.js +0 -0
@@ -1,39 +1,17 @@
1
1
  import type { LanguageModel } from "ai";
2
+ import type { StandardSchemaV1 } from "#compiled/@standard-schema/spec/index.js";
2
3
  import type { HandleMessageStreamEvent, RuntimeIdentity } from "#protocol/message.js";
3
- import type { InputRequest } from "#runtime/input/types.js";
4
+ import type { SendTurnInput, SessionState } from "#client/types.js";
5
+ import type { InputRequest, InputResponse } from "#runtime/input/types.js";
4
6
  import type { JsonObject } from "#shared/json.js";
5
7
  import type { AgentModelOptionsDefinition } from "#shared/agent-definition.js";
6
8
  import type { EvalReporter } from "#evals/runner/reporters/types.js";
9
+ import type { EveEvalSubagentCallMatchOptions, EveEvalToolCallMatchOptions } from "#evals/match.js";
7
10
  /**
8
- * One normalized eval case. Suites produce these from raw data sources.
11
+ * Assumptions an eval needs the runner to verify against the live target
12
+ * or eval process environment before executing it.
9
13
  */
10
- export interface EveEvalCase {
11
- /** Uniquely identifies the case within its suite. */
12
- readonly id: string;
13
- /**
14
- * The case prompt, or a structured record the task derives messages from.
15
- * A string is sent verbatim and a record is `JSON.stringify`d, unless the
16
- * task's `messages`/`prompt` reads specific fields.
17
- */
18
- readonly input: string | Record<string, unknown>;
19
- /**
20
- * Reference value scorers compare against. The runner coerces this to a
21
- * string for autoevals-compatible scorers; `args.case.expected` exposes it
22
- * unmodified.
23
- */
24
- readonly expected?: unknown;
25
- /**
26
- * Hard assertions for this case, appended to the suite-level `checks`.
27
- * Any failure marks the case failed and flips the CLI exit code.
28
- */
29
- readonly checks?: readonly EveEvalCheck[];
30
- /** Additional scorers for this case, appended to the suite-level `scores`. */
31
- readonly scores?: readonly EveEvalScorer[];
32
- /** Used by `--tag` filtering, and passed through for reporting. */
33
- readonly tags?: readonly string[];
34
- /** Passed through for reporting and Braintrust span logging. */
35
- readonly metadata?: Readonly<Record<string, unknown>>;
36
- }
14
+ export type EveEvalRequirement = "mockModels" | "devRoutes" | `env:${string}`;
37
15
  /**
38
16
  * One tool call extracted from the captured stream, pairing the
39
17
  * `actions.requested` request with its matching `action.result`.
@@ -87,12 +65,23 @@ export interface EveEvalDerivedFacts {
87
65
  readonly failureCode?: string;
88
66
  }
89
67
  /**
90
- * Full result of executing one eval case against an Eve agent.
68
+ * Captured event stream and facts for one session involved in an eval.
69
+ */
70
+ export interface EveEvalSessionResult {
71
+ readonly derived: EveEvalDerivedFacts;
72
+ readonly events: readonly HandleMessageStreamEvent[];
73
+ readonly primary: boolean;
74
+ readonly sessionId?: string;
75
+ readonly state: SessionState;
76
+ }
77
+ /**
78
+ * Full result of executing one eval against an Eve agent.
91
79
  */
92
80
  export interface EveEvalTaskResult {
93
81
  /**
94
- * The scored value. Defaults to `finalMessage` unless `task.parseOutput`
95
- * overrides it. Mutable because the runner assigns it after parsing.
82
+ * The agent's last assistant message (same as {@link finalMessage}), retained
83
+ * for reporters and artifacts that log a single "output" value. Mutable
84
+ * because the runner assigns it after the run completes.
96
85
  */
97
86
  output: unknown;
98
87
  /** The agent's last assistant message, or null when none was produced. */
@@ -106,8 +95,12 @@ export interface EveEvalTaskResult {
106
95
  readonly status: "completed" | "failed" | "waiting";
107
96
  /** The captured stream events from the run. */
108
97
  readonly events: readonly HandleMessageStreamEvent[];
98
+ /** Lines written through `t.log` while the eval ran. */
99
+ readonly logs?: readonly string[];
109
100
  /** Facts extracted from the stream (tool calls, message counts, etc.). */
110
101
  readonly derived: EveEvalDerivedFacts;
102
+ /** Per-session event streams captured while executing this eval. */
103
+ readonly sessions?: readonly EveEvalSessionResult[];
111
104
  /**
112
105
  * Runtime identity metadata captured from the `session.started` stream event.
113
106
  * Present when the Eve server populates the event with its runtime metadata.
@@ -115,116 +108,175 @@ export interface EveEvalTaskResult {
115
108
  readonly runtimeIdentity?: RuntimeIdentity;
116
109
  }
117
110
  /**
118
- * Result returned by a single scorer invocation.
111
+ * How a failing assertion affects the verdict. A `"gate"` is a hard
112
+ * assertion: missing it fails the eval. A `"soft"` assertion is tracked
113
+ * data that only fails the eval under `eve eval --strict` (and only when it
114
+ * carries a threshold).
119
115
  */
120
- export interface EveEvalScorerResult {
121
- /** Scorer name. Used as the key in Braintrust score maps. */
122
- readonly name: string;
123
- /** Score between 0 and 1, or null if the scorer could not produce a score. */
124
- readonly score: number | null;
125
- /** Optional metadata for debugging or Braintrust span logging. */
126
- readonly metadata?: Readonly<Record<string, unknown>>;
127
- }
116
+ export type AssertionSeverity = "gate" | "soft";
128
117
  /**
129
- * Arguments passed to every scorer invocation.
118
+ * A value-level assertion produced by the builders in `eve/evals/expect`
119
+ * (e.g. `includes`, `equals`, `similarity`) and applied to an explicit value
120
+ * via `t.check(value, assertion)`. Boolean assertions score exactly 0 or 1.
130
121
  *
131
- * The flat `input`, `output`, and `expected` fields are coerced to `string`
132
- * for compatibility with Braintrust autoevals scorers (e.g. `Factuality`,
133
- * `Levenshtein`). For the original values, use `case.input`, `case.expected`,
134
- * and `result.output`.
122
+ * The chainable `gate`/`soft`/`atLeast` return a new assertion with the
123
+ * severity or threshold overridden, so the threshold rides on the assertion
124
+ * itself rather than a detached map.
135
125
  */
136
- export interface EveEvalScorerArgs {
137
- /** The eval case input, coerced to string. Autoevals-compatible. */
138
- readonly input: string;
139
- /** The task output, coerced to string. Autoevals-compatible. */
140
- readonly output: string;
141
- /** The expected value, coerced to string. Autoevals-compatible. */
142
- readonly expected?: string | undefined;
143
- /**
144
- * Suite-level scorer model, when the suite provides one. This does not
145
- * change the target Eve agent model. `undefined` when the suite omits
146
- * `model`; model-backed scorers throw a descriptive error in that case.
147
- */
148
- readonly model: LanguageModel | undefined;
149
- /** Suite-level provider options for model-backed scorers. */
150
- readonly modelOptions?: AgentModelOptionsDefinition;
151
- /** Full eval case. */
152
- readonly case: EveEvalCase;
153
- /** Full task result with events and derived facts. */
154
- readonly result: EveEvalTaskResult;
126
+ export interface Assertion {
127
+ readonly name: string;
128
+ readonly severity: AssertionSeverity;
129
+ /** Minimum passing score. `undefined` on a soft assertion = tracked only. */
130
+ readonly threshold?: number;
131
+ score(value: unknown): number | Promise<number>;
132
+ gate(threshold?: number): Assertion;
133
+ soft(threshold?: number): Assertion;
134
+ atLeast(threshold: number): Assertion;
155
135
  }
156
136
  /**
157
- * Receives flattened input/output/expected fields (autoevals-compatible) plus
158
- * the full Eve case and result. Return `null` to skip scoring a case (e.g. when
159
- * expected is absent).
137
+ * Handle to a recorded assertion, returned by every `t` assertion method.
138
+ * Chain `gate`/`soft`/`atLeast` to override the recorded severity or
139
+ * threshold, and `await` it to surface model-backed (judge) errors and ensure
140
+ * the assertion has resolved before the run continues.
160
141
  */
161
- export type EveEvalScorer = (args: EveEvalScorerArgs) => EveEvalScorerResult | Promise<EveEvalScorerResult | null> | null;
142
+ export interface AssertionHandle extends PromiseLike<void> {
143
+ gate(threshold?: number): this;
144
+ soft(threshold?: number): this;
145
+ atLeast(threshold: number): this;
146
+ }
162
147
  /**
163
- * Result returned by a single check invocation.
164
- *
165
- * Unlike scores, checks are hard assertions: any `passed: false` marks the
166
- * case failed and produces a non-zero `eve eval` exit code.
148
+ * The recorded outcome of one assertion, consumed by the verdict, reporters,
149
+ * and artifacts. A boolean assertion has `score` 0 or 1.
167
150
  */
168
- export interface EveEvalCheckResult {
151
+ export interface AssertionResult {
169
152
  readonly name: string;
153
+ readonly score: number;
154
+ readonly severity: AssertionSeverity;
155
+ readonly threshold?: number;
170
156
  readonly passed: boolean;
171
157
  /** Human-readable failure detail, shown in console output and artifacts. */
172
158
  readonly message?: string;
173
159
  readonly metadata?: Readonly<Record<string, unknown>>;
174
160
  }
175
161
  /**
176
- * Arguments passed to every check invocation. Checks see the same result data
177
- * as scorers but never receive a judge model — they are deterministic
178
- * assertions over the captured run.
162
+ * Driver for one session, exposed on the eval context and by `t.newSession()`.
179
163
  */
180
- export interface EveEvalCheckArgs {
181
- readonly case: EveEvalCase;
182
- readonly result: EveEvalTaskResult;
183
- /** The target under test, so checks can reference runner-assigned values like its URL. */
184
- readonly target: EveEvalTarget;
164
+ export interface EveEvalSession {
165
+ /** All events observed on this session so far. */
166
+ readonly events: readonly HandleMessageStreamEvent[];
167
+ /** Input requests left pending by the last parked turn. */
168
+ readonly pendingInputRequests: readonly InputRequest[];
169
+ /** Serializable cursor for resuming this session. */
170
+ readonly state: SessionState;
171
+ /** Eve session id after the first successful send. */
172
+ readonly sessionId: string | undefined;
173
+ /** Assert the last turn parked on HITL input and return matching requests. */
174
+ expectInputRequests(filter?: {
175
+ readonly display?: InputRequest["display"];
176
+ readonly toolName?: string;
177
+ }): readonly InputRequest[];
178
+ /** Resolve specific pending requests and run the resumed turn. */
179
+ respond(...responses: InputResponse[]): Promise<EveEvalTurn>;
180
+ /** Resolve every pending request with the same option id. */
181
+ respondAll(optionId: string): Promise<EveEvalTurn>;
182
+ /** Send one turn through this session. */
183
+ send(input: SendTurnInput): Promise<EveEvalTurn>;
184
+ /** Send one text turn with a local file attached as a data URL. */
185
+ sendFile(text: string, filePath: string, mediaType?: string): Promise<EveEvalTurn>;
185
186
  }
186
187
  /**
187
- * One hard assertion over a completed eval case. Built-ins live in
188
- * `eve/evals/checks`; custom checks are plain functions with this shape.
188
+ * One completed eval-driver turn.
189
189
  */
190
- export type EveEvalCheck = (args: EveEvalCheckArgs) => EveEvalCheckResult | Promise<EveEvalCheckResult>;
191
- export interface EveEvalTaskFields {
192
- readonly messages?: (testCase: EveEvalCase) => string[];
193
- /**
194
- * Transform the raw task result into the scored output value. Available on
195
- * every task variant. When omitted, `result.output` defaults to
196
- * `result.finalMessage`.
197
- */
198
- readonly parseOutput?: (result: EveEvalTaskResult) => unknown;
199
- readonly prompt?: (testCase: EveEvalCase) => string;
190
+ export interface EveEvalTurn {
191
+ readonly data: unknown;
192
+ readonly events: readonly HandleMessageStreamEvent[];
193
+ readonly inputRequests: readonly InputRequest[];
194
+ readonly message: string | undefined;
195
+ readonly status: "completed" | "failed" | "waiting";
196
+ readonly toolCalls: readonly EveEvalToolCall[];
197
+ expectOk(): this;
200
198
  }
201
199
  /**
202
- * Declarative task configuration for a suite. The runner owns session
203
- * lifecycle, stream capture, and derived metadata; suites only declare how to
204
- * derive messages and parse outputs.
200
+ * The judge model used by `t.judge.*` assertions, configured per-eval or as
201
+ * the run-wide default in `evals.config.ts`. Only ever used for scoring; it
202
+ * never changes the agent under test. String model ids route through the
203
+ * Vercel AI Gateway; provider model instances run directly.
205
204
  */
206
- export type EveEvalTask = (EveEvalTaskFields & {
207
- /**
208
- * Derive an ordered list of messages for a multi-turn eval.
209
- * Mutually exclusive with `prompt`.
210
- */
211
- readonly messages: (testCase: EveEvalCase) => string[];
212
- readonly prompt?: never;
213
- }) | (EveEvalTaskFields & {
214
- readonly messages?: never;
215
- /**
216
- * Derive a single prompt string from one eval case.
217
- * Mutually exclusive with `messages`.
218
- */
219
- readonly prompt: (testCase: EveEvalCase) => string;
220
- }) | {
221
- readonly messages?: never;
222
- /** See {@link EveEvalTaskFields.parseOutput}. */
223
- readonly parseOutput?: (result: EveEvalTaskResult) => unknown;
224
- readonly prompt?: never;
225
- };
205
+ export interface EveEvalJudgeConfig {
206
+ readonly model: LanguageModel;
207
+ readonly modelOptions?: AgentModelOptionsDefinition;
208
+ }
209
+ /**
210
+ * Per-call options for `t.judge.autoevals.*` assertions.
211
+ */
212
+ export interface JudgeOpts {
213
+ /** Value to grade. Defaults to the final assistant message (`t.reply`). */
214
+ readonly on?: unknown;
215
+ /** Judge model for this call only; overrides the eval/config judge model. */
216
+ readonly model?: LanguageModel;
217
+ readonly modelOptions?: AgentModelOptionsDefinition;
218
+ }
219
+ /**
220
+ * Braintrust autoevals graders, bound to the resolved judge model. The grader
221
+ * family is named so its semantics are explicit: `factuality`'s consistency
222
+ * buckets and `closedQA`'s yes/no grading are autoevals' behavior, not Eve's.
223
+ * These are Eve-owned wrappers, not the raw library.
224
+ */
225
+ export interface AutoevalsJudges {
226
+ factuality(expected: string, opts?: JudgeOpts): AssertionHandle;
227
+ summarizes(expected: string, opts?: JudgeOpts): AssertionHandle;
228
+ closedQA(criteria: string, opts?: JudgeOpts): AssertionHandle;
229
+ sql(expected: string, opts?: JudgeOpts): AssertionHandle;
230
+ }
231
+ /**
232
+ * Model-backed assertion namespaces on `t.judge`. A future non-autoevals
233
+ * engine would slot in as a sibling of `autoevals`.
234
+ */
235
+ export interface JudgeContext {
236
+ readonly autoevals: AutoevalsJudges;
237
+ }
226
238
  /**
227
- * Describes the Eve server an eval suite runs against.
239
+ * The single context passed to an eval's `test(t)` function. It drives the
240
+ * primary session (it extends {@link EveEvalSession}), carries the run-level
241
+ * and value-level assertion vocabulary, and exposes `judge` for LLM-as-judge.
242
+ *
243
+ * Run-level assertions (`completed`, `calledTool`, …) record an entry
244
+ * evaluated against the final run and never throw; `check` and `judge`
245
+ * evaluate the supplied value immediately. Use plain `throw` /
246
+ * `turn.expectOk()` for bespoke preconditions that should abort the run.
247
+ */
248
+ export interface EveEvalContext extends EveEvalSession {
249
+ /** Eval timeout signal. */
250
+ readonly signal: AbortSignal;
251
+ /** Current target under test. */
252
+ readonly target: EveEvalTargetHandle;
253
+ /** The primary session's last assistant message, or null. */
254
+ readonly reply: string | null;
255
+ /** Structured eval log hook. */
256
+ log(message: string): void;
257
+ /** Create an additional independent session against the same target. */
258
+ newSession(): EveEvalSession;
259
+ completed(): AssertionHandle;
260
+ didNotFail(): AssertionHandle;
261
+ waiting(): AssertionHandle;
262
+ messageIncludes(token: string | RegExp): AssertionHandle;
263
+ calledTool(name: string, options?: EveEvalToolCallMatchOptions): AssertionHandle;
264
+ notCalledTool(name: string): AssertionHandle;
265
+ toolOrder(names: readonly string[]): AssertionHandle;
266
+ usedNoTools(): AssertionHandle;
267
+ maxToolCalls(max: number): AssertionHandle;
268
+ calledSubagent(name: string, options?: EveEvalSubagentCallMatchOptions): AssertionHandle;
269
+ noFailedActions(): AssertionHandle;
270
+ event(predicate: (events: readonly HandleMessageStreamEvent[]) => boolean, label: string): AssertionHandle;
271
+ outputEquals(value: unknown): AssertionHandle;
272
+ outputMatches(schema: StandardSchemaV1): AssertionHandle;
273
+ /** Apply a value-level assertion (from `eve/evals/expect`) to a value. */
274
+ check(value: unknown, assertion: Assertion): AssertionHandle;
275
+ /** LLM-as-judge assertions, bound to the resolved judge model. */
276
+ readonly judge: JudgeContext;
277
+ }
278
+ /**
279
+ * Describes the Eve server an eval runs against.
228
280
  */
229
281
  export interface EveEvalTarget {
230
282
  /**
@@ -234,152 +286,175 @@ export interface EveEvalTarget {
234
286
  readonly kind: "local" | "remote";
235
287
  /** Base HTTP URL the eval client connects to and sends message requests. */
236
288
  readonly url: string;
289
+ /** Capabilities discovered from the live target's info route. */
290
+ readonly capabilities: EveEvalTargetCapabilities;
291
+ }
292
+ export interface EveEvalTargetCapabilities {
293
+ readonly devRoutes: boolean;
294
+ readonly mockModels: boolean;
295
+ }
296
+ export interface EveEvalScheduleDispatchResult {
297
+ readonly scheduleId: string;
298
+ readonly sessionIds: readonly string[];
299
+ }
300
+ /**
301
+ * Live target handle exposed to eval runs.
302
+ */
303
+ export interface EveEvalTargetHandle extends EveEvalTarget {
304
+ /** Dispatch a dev-only authored schedule. Requires declaring `"devRoutes"`. */
305
+ dispatchSchedule(scheduleId: string): Promise<EveEvalScheduleDispatchResult>;
306
+ /** Authenticated fetch against the target base URL. */
307
+ fetch(path: string, init?: RequestInit): Promise<Response>;
308
+ /** Attach to a pre-existing session and consume one turn boundary. */
309
+ attachSession(sessionId: string, opts?: {
310
+ readonly startIndex?: number;
311
+ }): Promise<EveEvalSession>;
237
312
  }
238
313
  /**
239
- * Shared fields between the user-facing input and the validated suite.
314
+ * Shared fields between the user-facing input and the validated eval.
240
315
  *
241
- * Suite identity (`id`) is derived from the `evals/<path>.eval.ts` file
316
+ * Eval identity (`id`) is derived from the `evals/<path>.eval.ts` file
242
317
  * path by the discovery layer; it is not authored on the input.
243
318
  */
244
- interface EveEvalSuiteBase {
319
+ interface EveEvalBase {
245
320
  readonly description?: string;
246
- readonly task?: EveEvalTask;
247
- /**
248
- * Hard assertions applied to every case in the suite. Case-level `checks`
249
- * append to these. Any failed check marks the case failed and produces a
250
- * non-zero `eve eval` exit code, unlike scores which stay soft data.
251
- */
252
- readonly checks?: readonly EveEvalCheck[];
253
- readonly scores: readonly EveEvalScorer[];
254
321
  /**
255
- * Model used by model-backed scorers in this suite. Required only when a
256
- * model-backed scorer (e.g. the `Autoevals` wrappers) is present without
257
- * its own per-scorer model override.
258
- *
259
- * String model IDs route through the Vercel AI Gateway; the runner uses
260
- * provider model instances directly. This model is only for scoring and
261
- * never changes the Eve agent under test.
322
+ * Target/process assumptions verified before execution. The eval is
323
+ * skipped when any requirement is unmet.
262
324
  */
263
- readonly model?: LanguageModel;
264
- /**
265
- * Provider-specific options passed to model-backed scorers.
266
- */
267
- readonly modelOptions?: AgentModelOptionsDefinition;
325
+ readonly requires?: readonly EveEvalRequirement[];
268
326
  /**
269
- * Maximum number of cases the runner executes at once.
270
- * Defaults to 8 when omitted.
327
+ * Judge model for this eval's `t.judge.*` assertions. Optional: when
328
+ * omitted, judge assertions fall back to the `judge` declared in
329
+ * `evals.config.ts`. Only used for scoring; never changes the agent
330
+ * under test.
271
331
  */
272
- readonly maxConcurrency?: number;
332
+ readonly judge?: EveEvalJudgeConfig;
273
333
  readonly timeoutMs?: number;
274
- /** Used by `--tag` filtering: a suite carrying a requested tag runs all its cases. */
334
+ /** Used by `--tag` filtering. */
275
335
  readonly tags?: readonly string[];
276
336
  readonly metadata?: Readonly<Record<string, unknown>>;
277
337
  readonly reporters?: readonly EvalReporter[];
278
- /**
279
- * Minimum score thresholds per scorer name. A case "passes" when every
280
- * scorer meets or exceeds its threshold. Scorers not listed here
281
- * default to a threshold of 1.0 (exact match).
282
- *
283
- * @example
284
- * ```ts
285
- * thresholds: {
286
- * "Factuality": 0.5,
287
- * "run.didNotFail": 1.0,
288
- * }
289
- * ```
290
- */
291
- readonly thresholds?: Readonly<Record<string, number>>;
292
338
  }
293
339
  /**
294
- * Complete top-level key set accepted by {@link defineEvalSuite}, used to reject
295
- * unknown authored keys. The stricter {@link EveEvalSuiteInput} union enforces
296
- * `load`/`cases` exclusivity.
340
+ * Complete top-level key set accepted by {@link defineEval}, used to reject
341
+ * unknown authored keys.
297
342
  */
298
- export interface EveEvalSuiteInputFields extends Omit<EveEvalSuiteBase, "task"> {
299
- readonly cases?: readonly EveEvalCase[];
300
- readonly task?: EveEvalTaskFields;
301
- load?(): Promise<EveEvalCase[]>;
343
+ export interface EveEvalInputFields extends EveEvalBase {
344
+ readonly test?: (t: EveEvalContext) => void | Promise<void>;
302
345
  }
303
346
  /**
304
- * Full suite input passed to `defineEvalSuite()`.
347
+ * Full eval input passed to `defineEval()`.
305
348
  *
306
- * Provide either `load` (async function) or `cases` (static array), not both;
307
- * `cases` causes the runner to synthesize a `load` internally. Suite identity is
308
- * derived from the file path, so authors do not specify an `id` or `name`.
349
+ * Each eval file is exactly one case: an imperative `test(t)` function that
350
+ * drives the agent and asserts on what it produced. Eval identity is derived
351
+ * from the file path, so authors do not specify an `id` or `name`.
309
352
  */
310
- export type EveEvalSuiteInput = (EveEvalSuiteBase & {
311
- readonly cases?: never;
312
- /** Load cases dynamically. Mutually exclusive with `cases`. */
313
- load(): Promise<EveEvalCase[]>;
314
- }) | (EveEvalSuiteBase & {
315
- /** Static inline cases. Mutually exclusive with `load`. */
316
- readonly cases: readonly EveEvalCase[];
317
- load?: never;
318
- });
353
+ export interface EveEvalInput extends EveEvalBase {
354
+ /** Imperative interaction-and-assertion script. */
355
+ test(t: EveEvalContext): void | Promise<void>;
356
+ }
319
357
  /**
320
- * Suite returned by `defineEvalSuite()`. Carries no `id` yet: discovery stamps
321
- * the path-derived id at import time to produce a full {@link EveEvalSuite}. The
322
- * `_tag` literal (`"EveEvalSuite"`) brands the value so discovery and the runner
323
- * can recognize a defined suite.
358
+ * Eval returned by `defineEval()`. Carries no `id` yet: discovery stamps
359
+ * the path-derived id at import time to produce a full {@link EveEval}. The
360
+ * `_tag` literal (`"EveEval"`) brands the value so discovery and the runner
361
+ * can recognize a defined eval.
324
362
  */
325
- export interface EveEvalSuiteDefinition extends EveEvalSuiteBase {
326
- readonly _tag: "EveEvalSuite";
327
- /** Always present. Synthesized from `cases` when the input uses static cases. */
328
- load(): Promise<EveEvalCase[]>;
329
- }
363
+ export type EveEvalDefinition = EveEvalInput & {
364
+ readonly _tag: "EveEval";
365
+ };
330
366
  /**
331
- * Validated suite consumed by the runner and reporters. The `id` is the
332
- * path-derived slug attached by discovery
333
- * (e.g. `evals/weather.eval.ts` → `"weather"`).
367
+ * Validated eval consumed by the runner and reporters. The `id` is the
368
+ * path-derived slug attached by discovery (e.g. `evals/weather.eval.ts` →
369
+ * `"weather"`, `evals/runtime/multi-turn.eval.ts` → `"runtime/multi-turn"`).
370
+ * Files that default-export an array of evals derive
371
+ * `<file-id>/<zero-padded index>` ids (e.g. `"weather/0000"`).
334
372
  */
335
- export interface EveEvalSuite extends EveEvalSuiteDefinition {
373
+ export type EveEval = EveEvalDefinition & {
336
374
  readonly id: string;
337
- }
375
+ };
338
376
  /**
339
- * Per-case outcome computed by the runner:
377
+ * Per-eval outcome computed by the runner:
340
378
  *
341
- * - `"passed"` — no execution error, every check passed, every score met its threshold
342
- * - `"failed"` — a check failed or execution errored (timeout, transport, thrown task)
343
- * - `"scored"` — passed checks but at least one score fell below its threshold
344
- * - `"skipped"` — the case was not executed (reserved for unmet `requires` entries)
379
+ * - `"passed"` — no execution error, every gate held, every soft threshold met
380
+ * - `"failed"` — a gate assertion failed or execution errored (timeout, transport, thrown task)
381
+ * - `"scored"` — every gate held but a soft assertion fell below its threshold
382
+ * - `"skipped"` — the eval was not executed (unmet `requires` entries)
345
383
  */
346
- export type EveEvalCaseVerdict = "passed" | "failed" | "scored" | "skipped";
384
+ export type EveEvalVerdict = "passed" | "failed" | "scored" | "skipped";
347
385
  /**
348
- * Result of scoring one eval case.
386
+ * Result of executing and asserting one eval.
387
+ *
388
+ * `id` is the path-derived eval id
389
+ * (e.g. `evals/weather.eval.ts` → `"weather"`).
349
390
  */
350
- export interface EveEvalCaseResult {
351
- readonly case: EveEvalCase;
391
+ export interface EveEvalResult {
392
+ readonly id: string;
352
393
  readonly result: EveEvalTaskResult;
353
- /** Hard-assertion results (suite-level checks first, then case-level). */
354
- readonly checks: readonly EveEvalCheckResult[];
355
- readonly scores: readonly EveEvalScorerResult[];
356
- /** Per-case verdict; see {@link EveEvalCaseVerdict}. */
357
- readonly verdict: EveEvalCaseVerdict;
394
+ /** Every assertion recorded by the eval's `test(t)`, in record order. */
395
+ readonly assertions: readonly AssertionResult[];
396
+ /** Per-eval verdict; see {@link EveEvalVerdict}. */
397
+ readonly verdict: EveEvalVerdict;
358
398
  readonly error?: string;
359
- /** Why the case was skipped, when `verdict` is `"skipped"`. */
399
+ /** Why the eval was skipped, when `verdict` is `"skipped"`. */
360
400
  readonly skipReason?: string;
401
+ readonly startedAt: string;
402
+ readonly completedAt: string;
361
403
  }
362
404
  /**
363
- * Aggregated result for one suite run.
364
- *
365
- * `suite` is the path-derived suite id
366
- * (e.g. `evals/weather.eval.ts` → `"weather"`).
405
+ * Aggregated outcome of one `eve eval` run across every executed eval.
367
406
  */
368
- export interface EveEvalSuiteResult {
369
- readonly suite: string;
407
+ export interface EveEvalRunSummary {
370
408
  readonly target: EveEvalTarget;
371
- readonly cases: readonly EveEvalCaseResult[];
409
+ readonly results: readonly EveEvalResult[];
372
410
  readonly startedAt: string;
373
411
  readonly completedAt: string;
374
- /** Cases with verdict `"passed"`. */
412
+ /** Evals with verdict `"passed"`. */
375
413
  readonly passed: number;
376
- /** Cases with verdict `"failed"` (check failures and execution errors). */
414
+ /** Evals with verdict `"failed"` (gate failures and execution errors). */
377
415
  readonly failed: number;
378
- /** Cases with verdict `"scored"` (below-threshold scores only). */
416
+ /** Evals with verdict `"scored"` (below-threshold soft assertions only). */
379
417
  readonly scored: number;
380
- /** Cases with verdict `"skipped"`. */
418
+ /** Evals with verdict `"skipped"`. */
381
419
  readonly skipped: number;
382
420
  /** The execution-error subset of `failed` (timeouts, connection failures, exceptions). */
383
421
  readonly errored: number;
384
422
  }
423
+ /**
424
+ * Run-wide eval configuration authored in `evals.config.ts`.
425
+ *
426
+ * Exactly one `evals.config.ts` is required at the root of the `evals/`
427
+ * directory; it supplies the defaults every eval in the run shares.
428
+ */
429
+ export interface EveEvalConfigInput {
430
+ /**
431
+ * Default judge model for `t.judge.*` assertions across every eval.
432
+ * Optional: evals that use no judge need not set it, and individual evals
433
+ * may override it with their own `judge`. Only ever used for scoring.
434
+ */
435
+ readonly judge?: EveEvalJudgeConfig;
436
+ /**
437
+ * Reporters that observe every eval in the run (e.g. a shared
438
+ * `Braintrust()` experiment). Suppressed by `eve eval --skip-report`.
439
+ */
440
+ readonly reporters?: readonly EvalReporter[];
441
+ /**
442
+ * Default maximum number of evals executing at once. Must be a positive
443
+ * integer. `eve eval --max-concurrency` overrides it; defaults to 8 when
444
+ * neither is set.
445
+ */
446
+ readonly maxConcurrency?: number;
447
+ /**
448
+ * Default per-eval timeout in milliseconds. An eval's own `timeoutMs`
449
+ * overrides it, and `eve eval --timeout` overrides both.
450
+ */
451
+ readonly timeoutMs?: number;
452
+ }
453
+ /**
454
+ * Validated eval run configuration returned by `defineEvalConfig()`. The
455
+ * `_tag` literal brands the value so discovery can recognize it.
456
+ */
457
+ export type EveEvalConfig = EveEvalConfigInput & {
458
+ readonly _tag: "EveEvalConfig";
459
+ };
385
460
  export {};
@@ -0,0 +1,14 @@
1
+ import type { ModelMessage } from "ai";
2
+ /**
3
+ * Re-applies framework-owned state preservation after the harness compacts
4
+ * message history, returning any messages to append to the compacted history.
5
+ *
6
+ * Runs the framework's built-in preservation steps:
7
+ * - resets read-before-write tracking, so a write after compaction re-reads
8
+ * the file whose read evidence was summarized away;
9
+ * - re-injects the todo list (when present), so the model keeps its task list.
10
+ *
11
+ * Must be called inside the harness step's `AlsContext`; both steps read
12
+ * durable context state.
13
+ */
14
+ export declare function preserveFrameworkStateOnCompaction(): readonly ModelMessage[];
@@ -0,0 +1 @@
1
+ import{clearReadFileState}from"#runtime/framework-tools/file-state.js";import{getTodoCompactionMessage}from"#runtime/framework-tools/todo.js";function preserveFrameworkStateOnCompaction(){clearReadFileState();let e=getTodoCompactionMessage();return e===void 0?[]:[e]}export{preserveFrameworkStateOnCompaction};