eve 0.6.0-beta.9 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +281 -0
- package/README.md +9 -6
- package/dist/docs/public/README.md +17 -12
- package/dist/docs/public/agent-config.md +10 -10
- package/dist/docs/public/channels/custom.mdx +4 -4
- package/dist/docs/public/channels/discord.mdx +1 -1
- package/dist/docs/public/channels/eve.mdx +10 -10
- package/dist/docs/public/channels/github.mdx +1 -1
- package/dist/docs/public/channels/overview.mdx +21 -15
- package/dist/docs/public/channels/slack.mdx +16 -4
- package/dist/docs/public/channels/teams.mdx +1 -1
- package/dist/docs/public/channels/telegram.mdx +1 -1
- package/dist/docs/public/channels/twilio.mdx +1 -1
- package/dist/docs/public/{advanced → concepts}/context-control.md +3 -3
- package/dist/docs/public/{advanced → concepts}/default-harness.md +5 -5
- package/dist/docs/public/{advanced → concepts}/execution-model-and-durability.md +3 -1
- package/dist/docs/public/concepts/meta.json +10 -0
- package/dist/docs/public/{advanced → concepts}/security-model.md +3 -3
- package/dist/docs/public/{advanced → concepts}/sessions-runs-and-streaming.md +7 -7
- package/dist/docs/public/connections.mdx +6 -4
- package/dist/docs/public/evals/assertions.mdx +108 -0
- package/dist/docs/public/evals/cases.mdx +143 -0
- package/dist/docs/public/evals/judge.mdx +94 -0
- package/dist/docs/public/evals/meta.json +4 -0
- package/dist/docs/public/evals/overview.mdx +118 -0
- package/dist/docs/public/evals/reporters.mdx +62 -0
- package/dist/docs/public/evals/running.mdx +63 -0
- package/dist/docs/public/evals/targets.mdx +54 -0
- package/dist/docs/public/getting-started.mdx +38 -33
- package/dist/docs/public/{advanced → guides}/auth-and-route-protection.md +5 -3
- package/dist/docs/public/{client → guides/client}/continuations.mdx +2 -2
- package/dist/docs/public/{client → guides/client}/messages.mdx +1 -1
- package/dist/docs/public/{client → guides/client}/meta.json +1 -1
- package/dist/docs/public/{client → guides/client}/output-schema.mdx +2 -2
- package/dist/docs/public/{client → guides/client}/overview.mdx +5 -5
- package/dist/docs/public/{client → guides/client}/streaming.mdx +1 -1
- package/dist/docs/public/{advanced → guides}/deployment.md +9 -1
- package/dist/docs/public/guides/dev-tui.md +50 -0
- package/dist/docs/public/{advanced → guides}/dynamic-capabilities.md +1 -1
- package/dist/docs/public/{advanced → guides}/dynamic-workflows.md +1 -1
- package/dist/docs/public/{frontend → guides/frontend}/nextjs.mdx +16 -7
- package/dist/docs/public/{frontend → guides/frontend}/nuxt.mdx +7 -7
- package/dist/docs/public/{frontend → guides/frontend}/overview.mdx +6 -6
- package/dist/docs/public/{frontend → guides/frontend}/sveltekit.mdx +5 -5
- package/dist/docs/public/{frontend → guides/frontend}/use-eve-agent-svelte.mdx +2 -2
- package/dist/docs/public/{frontend → guides/frontend}/use-eve-agent-vue.mdx +2 -2
- package/dist/docs/public/{advanced → guides}/hooks.md +2 -2
- package/dist/docs/public/{advanced → guides}/instrumentation.md +3 -1
- package/dist/docs/public/{advanced → guides}/meta.json +8 -12
- package/dist/docs/public/{advanced → guides}/session-context.md +3 -3
- package/dist/docs/public/{advanced → guides}/state.md +1 -1
- package/dist/docs/public/instructions.mdx +2 -2
- package/dist/docs/public/introduction.md +5 -2
- package/dist/docs/public/meta.json +4 -3
- package/dist/docs/public/reference/cli.md +35 -19
- package/dist/docs/public/reference/meta.json +1 -1
- package/dist/docs/public/reference/project-layout.md +5 -1
- package/dist/docs/public/reference/typescript-api.md +27 -23
- package/dist/docs/public/sandbox.mdx +1 -1
- package/dist/docs/public/schedules.mdx +2 -2
- package/dist/docs/public/skills.mdx +3 -3
- package/dist/docs/public/subagents.mdx +3 -3
- package/dist/docs/public/tools.mdx +4 -8
- package/dist/docs/public/tutorial/connect-a-warehouse.mdx +3 -3
- package/dist/docs/public/tutorial/first-agent.mdx +6 -3
- package/dist/docs/public/tutorial/guard-the-spend.mdx +1 -1
- package/dist/docs/public/tutorial/how-it-runs.mdx +2 -2
- package/dist/docs/public/tutorial/meta.json +1 -1
- package/dist/docs/public/tutorial/query-sample-data.mdx +1 -1
- package/dist/docs/public/tutorial/remember-definitions.mdx +3 -3
- package/dist/docs/public/tutorial/run-analysis.mdx +1 -1
- package/dist/docs/public/tutorial/ship-it.mdx +4 -4
- package/dist/docs/public/tutorial/team-playbooks.mdx +3 -3
- package/dist/src/chunks/{use-eve-agent-DCZbkLG7.js → use-eve-agent-DErQj5hs.js} +125 -37
- package/dist/src/chunks/{use-eve-agent-DoheC4_o.js → use-eve-agent-DoR8C4i6.js} +125 -37
- package/dist/src/cli/banner.d.ts +7 -0
- package/dist/src/cli/banner.js +1 -0
- package/dist/src/cli/commands/channel-add-conflicts.d.ts +1 -1
- package/dist/src/cli/commands/channels.d.ts +9 -6
- package/dist/src/cli/commands/channels.js +1 -1
- package/dist/src/cli/commands/deploy.d.ts +21 -0
- package/dist/src/cli/commands/deploy.js +1 -0
- package/dist/src/cli/commands/init-git.d.ts +15 -0
- package/dist/src/cli/commands/init-git.js +1 -0
- package/dist/src/cli/commands/init.d.ts +29 -0
- package/dist/src/cli/commands/init.js +1 -0
- package/dist/src/cli/commands/link.d.ts +21 -0
- package/dist/src/cli/commands/link.js +1 -0
- package/dist/src/cli/commands/preconditions.d.ts +7 -0
- package/dist/src/cli/commands/preconditions.js +1 -0
- package/dist/src/cli/commands/register-project-commands.d.ts +12 -0
- package/dist/src/cli/commands/register-project-commands.js +1 -0
- package/dist/src/cli/dev/tui/agent-header.d.ts +15 -9
- package/dist/src/cli/dev/tui/agent-header.js +1 -1
- package/dist/src/cli/dev/tui/blocks.d.ts +1 -1
- package/dist/src/cli/dev/tui/blocks.js +3 -2
- package/dist/src/cli/dev/tui/command-typeahead.d.ts +47 -0
- package/dist/src/cli/dev/tui/command-typeahead.js +1 -0
- package/dist/src/cli/dev/tui/dev-rebuild-status.d.ts +21 -0
- package/dist/src/cli/dev/tui/dev-rebuild-status.js +1 -0
- package/dist/src/cli/dev/tui/errors.d.ts +18 -0
- package/dist/src/cli/dev/tui/errors.js +1 -1
- package/dist/src/cli/dev/tui/prompt-command-handler.d.ts +14 -0
- package/dist/src/cli/dev/tui/prompt-command-handler.js +1 -0
- package/dist/src/cli/dev/tui/prompt-commands.d.ts +54 -0
- package/dist/src/cli/dev/tui/prompt-commands.js +2 -0
- package/dist/src/cli/dev/tui/runner.d.ts +64 -7
- package/dist/src/cli/dev/tui/runner.js +1 -1
- package/dist/src/cli/dev/tui/setup-commands.d.ts +48 -0
- package/dist/src/cli/dev/tui/setup-commands.js +2 -0
- package/dist/src/cli/dev/tui/setup-flow.d.ts +35 -0
- package/dist/src/cli/dev/tui/setup-issues.d.ts +40 -0
- package/dist/src/cli/dev/tui/setup-issues.js +1 -0
- package/dist/src/cli/dev/tui/setup-panel.d.ts +103 -0
- package/dist/src/cli/dev/tui/setup-panel.js +1 -0
- package/dist/src/cli/dev/tui/status-line.d.ts +25 -0
- package/dist/src/cli/dev/tui/status-line.js +1 -0
- package/dist/src/cli/dev/tui/stream-format.d.ts +16 -1
- package/dist/src/cli/dev/tui/stream-format.js +1 -1
- package/dist/src/cli/dev/tui/terminal-renderer.d.ts +32 -3
- package/dist/src/cli/dev/tui/terminal-renderer.js +5 -2
- package/dist/src/cli/dev/tui/test/index.d.ts +3 -1
- package/dist/src/cli/dev/tui/test/index.js +1 -1
- package/dist/src/cli/dev/tui/test/mock-terminal.d.ts +1 -0
- package/dist/src/cli/dev/tui/test/mock-terminal.js +1 -1
- package/dist/src/cli/dev/tui/theme.d.ts +10 -0
- package/dist/src/cli/dev/tui/theme.js +1 -1
- package/dist/src/cli/dev/tui/tui-prompter.d.ts +20 -0
- package/dist/src/cli/dev/tui/tui-prompter.js +1 -0
- package/dist/src/cli/dev/tui/tui.d.ts +6 -8
- package/dist/src/cli/dev/tui/tui.js +1 -1
- package/dist/src/cli/dev/tui/types.d.ts +4 -3
- package/dist/src/cli/dev/tui/vercel-status.d.ts +47 -0
- package/dist/src/cli/dev/tui/vercel-status.js +1 -0
- package/dist/src/cli/run.d.ts +9 -18
- package/dist/src/cli/run.js +2 -2
- package/dist/src/client/client.d.ts +8 -0
- package/dist/src/client/client.js +1 -1
- package/dist/src/client/file-parts.d.ts +18 -0
- package/dist/src/client/file-parts.js +1 -0
- package/dist/src/client/index.d.ts +3 -2
- package/dist/src/client/index.js +1 -1
- package/dist/src/client/message-response.js +1 -1
- package/dist/src/client/open-stream.d.ts +6 -0
- package/dist/src/client/open-stream.js +1 -1
- package/dist/src/client/session-utils.d.ts +5 -0
- package/dist/src/client/session-utils.js +1 -1
- package/dist/src/client/session.js +1 -1
- package/dist/src/client/types.d.ts +9 -2
- package/dist/src/compiled/.vendor-stamp.json +8 -8
- package/dist/src/compiled/@ai-sdk/anthropic/index.d.ts +56 -31
- package/dist/src/compiled/@ai-sdk/anthropic/index.js +2 -2
- package/dist/src/compiled/@ai-sdk/google/index.js +1 -1
- package/dist/src/compiled/@ai-sdk/mcp/index.js +1 -1
- package/dist/src/compiled/@ai-sdk/openai/index.d.ts +16 -9
- package/dist/src/compiled/@ai-sdk/openai/index.js +2 -2
- package/dist/src/compiled/@ai-sdk/otel/index.js +2 -2
- package/dist/src/compiled/@vercel/sandbox/index.js +1 -1
- package/dist/src/compiled/@workflow/core/capabilities.d.ts +19 -1
- package/dist/src/compiled/@workflow/core/class-serialization.d.ts +32 -0
- package/dist/src/compiled/@workflow/core/create-hook.d.ts +37 -0
- package/dist/src/compiled/@workflow/core/global.d.ts +11 -1
- package/dist/src/compiled/@workflow/core/index.js +2 -2
- package/dist/src/compiled/@workflow/core/runtime/helpers.d.ts +4 -2
- package/dist/src/compiled/@workflow/core/runtime/start.d.ts +6 -0
- package/dist/src/compiled/@workflow/core/runtime/suspension-handler.d.ts +15 -2
- package/dist/src/compiled/@workflow/core/runtime/wait-continuation.d.ts +84 -0
- package/dist/src/compiled/@workflow/core/runtime/wait-until.d.ts +18 -0
- package/dist/src/compiled/@workflow/core/runtime.d.ts +3 -1
- package/dist/src/compiled/@workflow/core/runtime.js +28 -28
- package/dist/src/compiled/@workflow/core/serialization/types.d.ts +21 -0
- package/dist/src/compiled/@workflow/core/serialization.d.ts +113 -6
- package/dist/src/compiled/@workflow/core/symbols.d.ts +2 -0
- package/dist/src/compiled/@workflow/core/util.d.ts +0 -5
- package/dist/src/compiled/@workflow/core/version.d.ts +1 -1
- package/dist/src/compiled/@workflow/core/workflow/attribute-dispatcher.d.ts +6 -0
- package/dist/src/compiled/@workflow/core/workflow/set-attributes.d.ts +3 -4
- package/dist/src/compiled/@workflow/core/workflow.js +1 -1
- package/dist/src/compiled/@workflow/world/events.d.ts +48 -0
- package/dist/src/compiled/@workflow/world/index.d.ts +3 -3
- package/dist/src/compiled/@workflow/world/queue.d.ts +31 -2
- package/dist/src/compiled/@workflow/world/runs.d.ts +2 -0
- package/dist/src/compiled/@workflow/world/spec-version.d.ts +2 -1
- package/dist/src/compiled/_chunks/workflow/attribute-changes-DGVGRGfw.js +59 -0
- package/dist/src/compiled/_chunks/workflow/{dist-gEXVSMPU.js → dist-CkMRLaRV.js} +1 -1
- package/dist/src/compiled/_chunks/workflow/functions-DuPjIvMH.js +1 -0
- package/dist/src/compiled/_chunks/workflow/resume-hook-DMSadN9o.js +1 -0
- package/dist/src/compiled/_chunks/workflow/run-BRdn7zy_.js +1 -0
- package/dist/src/compiled/_chunks/workflow/sleep-CpXfoXLF.js +1 -0
- package/dist/src/compiled/just-bash/index.d.ts +4 -4
- package/dist/src/compiler/artifacts.js +1 -1
- package/dist/src/compiler/manifest.d.ts +8 -8
- package/dist/src/compiler/normalize-agent-config.js +1 -1
- package/dist/src/compiler/normalize-channel.d.ts +2 -1
- package/dist/src/compiler/normalize-channel.js +1 -1
- package/dist/src/compiler/normalize-connection.d.ts +2 -1
- package/dist/src/compiler/normalize-connection.js +1 -1
- package/dist/src/compiler/normalize-helpers.d.ts +5 -0
- package/dist/src/compiler/normalize-helpers.js +1 -1
- package/dist/src/compiler/normalize-instructions.d.ts +3 -2
- package/dist/src/compiler/normalize-instructions.js +1 -1
- package/dist/src/compiler/normalize-manifest.js +2 -2
- package/dist/src/compiler/normalize-sandbox.d.ts +2 -1
- package/dist/src/compiler/normalize-sandbox.js +1 -1
- package/dist/src/compiler/normalize-schedule.d.ts +2 -1
- package/dist/src/compiler/normalize-schedule.js +1 -1
- package/dist/src/compiler/normalize-skill.d.ts +2 -1
- package/dist/src/compiler/normalize-skill.js +1 -1
- package/dist/src/compiler/normalize-subagent.d.ts +4 -1
- package/dist/src/compiler/normalize-subagent.js +1 -1
- package/dist/src/compiler/normalize-tool.d.ts +2 -1
- package/dist/src/compiler/normalize-tool.js +1 -1
- package/dist/src/compiler/workspace-resources.js +1 -1
- package/dist/src/context/node.d.ts +1 -1
- package/dist/src/evals/assertions/collector.d.ts +43 -0
- package/dist/src/evals/assertions/collector.js +1 -0
- package/dist/src/evals/assertions/run.d.ts +72 -0
- package/dist/src/evals/assertions/run.js +2 -0
- package/dist/src/evals/autoevals-client.js +2 -0
- package/dist/src/evals/cli/eval-client.d.ts +22 -0
- package/dist/src/evals/cli/eval-client.js +1 -0
- package/dist/src/evals/cli/eval.d.ts +8 -5
- package/dist/src/evals/cli/eval.js +1 -1
- package/dist/src/evals/context.d.ts +19 -0
- package/dist/src/evals/context.js +1 -0
- package/dist/src/evals/define-eval-config.d.ts +16 -0
- package/dist/src/evals/define-eval-config.js +1 -0
- package/dist/src/evals/define-eval.d.ts +20 -0
- package/dist/src/evals/define-eval.js +1 -0
- package/dist/src/evals/expect/index.d.ts +25 -0
- package/dist/src/evals/expect/index.js +1 -0
- package/dist/src/evals/index.d.ts +6 -2
- package/dist/src/evals/index.js +1 -1
- package/dist/src/evals/judge.d.ts +20 -0
- package/dist/src/evals/judge.js +1 -0
- package/dist/src/evals/{checks/match.d.ts → match.d.ts} +17 -18
- package/dist/src/evals/match.js +1 -0
- package/dist/src/evals/reporters/index.d.ts +1 -0
- package/dist/src/evals/reporters/index.js +1 -1
- package/dist/src/evals/requirements.d.ts +3 -0
- package/dist/src/evals/requirements.js +1 -0
- package/dist/src/evals/runner/artifacts.d.ts +7 -6
- package/dist/src/evals/runner/artifacts.js +3 -3
- package/dist/src/evals/runner/discover.d.ts +31 -10
- package/dist/src/evals/runner/discover.js +1 -1
- package/dist/src/evals/runner/execute-eval.d.ts +25 -0
- package/dist/src/evals/runner/execute-eval.js +1 -0
- package/dist/src/evals/runner/execute-task.d.ts +31 -0
- package/dist/src/evals/runner/execute-task.js +1 -0
- package/dist/src/evals/runner/reporters/braintrust.d.ts +7 -5
- package/dist/src/evals/runner/reporters/braintrust.js +2 -2
- package/dist/src/evals/runner/reporters/console.d.ts +4 -4
- package/dist/src/evals/runner/reporters/console.js +1 -1
- package/dist/src/evals/runner/reporters/junit.d.ts +10 -0
- package/dist/src/evals/runner/reporters/junit.js +4 -0
- package/dist/src/evals/runner/reporters/types.d.ts +14 -8
- package/dist/src/evals/runner/run-evals.d.ts +38 -0
- package/dist/src/evals/runner/run-evals.js +1 -0
- package/dist/src/evals/runner/verdict.d.ts +10 -15
- package/dist/src/evals/runner/verdict.js +1 -1
- package/dist/src/evals/session.d.ts +52 -0
- package/dist/src/evals/session.js +1 -0
- package/dist/src/evals/target.d.ts +23 -0
- package/dist/src/evals/target.js +1 -0
- package/dist/src/evals/types.d.ts +294 -219
- package/dist/src/execution/compaction.d.ts +14 -0
- package/dist/src/execution/compaction.js +1 -0
- package/dist/src/execution/delegated-parent-notification.js +1 -1
- package/dist/src/execution/dispatch-runtime-actions-step.js +1 -1
- package/dist/src/execution/node-step.js +1 -1
- package/dist/src/execution/sandbox/bash-tool.d.ts +6 -6
- package/dist/src/execution/sandbox/bash-tool.js +1 -1
- package/dist/src/execution/sandbox/bindings/local.js +1 -1
- package/dist/src/execution/sandbox/bindings/vercel.d.ts +2 -6
- package/dist/src/execution/sandbox/bindings/vercel.js +1 -1
- package/dist/src/execution/sandbox/glob-tool.js +3 -3
- package/dist/src/execution/sandbox/grep-tool.js +3 -3
- package/dist/src/execution/sandbox/read-file-tool.js +1 -1
- package/dist/src/execution/subagent-adapter.js +1 -1
- package/dist/src/execution/tool-auth.js +1 -1
- package/dist/src/execution/turn-workflow.js +1 -1
- package/dist/src/execution/workflow-runtime.d.ts +2 -2
- package/dist/src/execution/workflow-runtime.js +1 -1
- package/dist/src/execution/workflow-steps.js +1 -1
- package/dist/src/harness/action-result-helpers.js +1 -1
- package/dist/src/harness/authorization.d.ts +26 -0
- package/dist/src/harness/authorization.js +1 -1
- package/dist/src/harness/code-mode-lifecycle.js +1 -1
- package/dist/src/harness/emission.d.ts +12 -5
- package/dist/src/harness/emission.js +1 -1
- package/dist/src/harness/model-call-error.d.ts +35 -6
- package/dist/src/harness/model-call-error.js +1 -1
- package/dist/src/harness/step-hooks.d.ts +10 -4
- package/dist/src/harness/step-hooks.js +1 -1
- package/dist/src/harness/tool-loop.js +1 -1
- package/dist/src/harness/tools.d.ts +4 -6
- package/dist/src/harness/tools.js +1 -1
- package/dist/src/harness/turn-tag-state.d.ts +4 -0
- package/dist/src/harness/turn-tag-state.js +1 -1
- package/dist/src/harness/types.d.ts +4 -15
- package/dist/src/internal/application/cache-metadata.js +1 -1
- package/dist/src/internal/application/compiled-artifacts.js +1 -1
- package/dist/src/internal/application/package.js +1 -1
- package/dist/src/internal/application/paths.js +1 -1
- package/dist/src/internal/authored-definition/schema-backed.js +1 -1
- package/dist/src/internal/authored-module-loader.d.ts +4 -1
- package/dist/src/internal/authored-module-loader.js +2 -2
- package/dist/src/internal/authored-module-map-loader.js +1 -1
- package/dist/src/internal/nitro/dev-runtime-artifacts.js +1 -1
- package/dist/src/internal/nitro/host/build-application.js +1 -1
- package/dist/src/internal/nitro/host/build-vercel-agent-summary.js +1 -1
- package/dist/src/internal/nitro/host/configure-nitro-routes.js +3 -3
- package/dist/src/internal/nitro/host/create-application-nitro.js +1 -1
- package/dist/src/internal/nitro/host/dev-authored-source-watcher.js +1 -1
- package/dist/src/internal/nitro/host/dev-watcher-log.d.ts +37 -0
- package/dist/src/internal/nitro/host/dev-watcher-log.js +1 -0
- package/dist/src/internal/nitro/host/ports.d.ts +8 -0
- package/dist/src/internal/nitro/host/ports.js +1 -0
- package/dist/src/internal/nitro/host/prepare-application-host.js +1 -1
- package/dist/src/internal/nitro/host/server-external-packages.d.ts +1 -1
- package/dist/src/internal/nitro/host/server-external-packages.js +1 -1
- package/dist/src/internal/nitro/host/start-development-server.js +1 -1
- package/dist/src/internal/nitro/host/start-production-server.js +1 -1
- package/dist/src/internal/nitro/routes/agent-info/build-agent-info-response-from-manifest.d.ts +5 -0
- package/dist/src/internal/nitro/routes/agent-info/build-agent-info-response-from-manifest.js +1 -0
- package/dist/src/internal/nitro/routes/agent-info/build-agent-info-response.d.ts +31 -2
- package/dist/src/internal/nitro/routes/agent-info/build-agent-info-response.js +1 -1
- package/dist/src/internal/nitro/routes/agent-info/load-agent-info-data.d.ts +13 -0
- package/dist/src/internal/nitro/routes/agent-info/load-agent-info-data.js +1 -1
- package/dist/src/internal/nitro/routes/info.d.ts +2 -2
- package/dist/src/internal/nitro/routes/info.js +1 -1
- package/dist/src/internal/workflow/queue-namespace.d.ts +5 -0
- package/dist/src/internal/workflow/queue-namespace.js +1 -0
- package/dist/src/internal/workflow-bundle/builder-support.js +2 -2
- package/dist/src/internal/workflow-bundle/builder.js +3 -5
- package/dist/src/internal/workflow-bundle/vercel-workflow-output.js +1 -1
- package/dist/src/internal/workflow-bundle/workflow-builders.d.ts +1 -1
- package/dist/src/internal/workflow-bundle/workflow-builders.js +1 -1
- package/dist/src/node_modules/.pnpm/@clack_core@1.3.1/node_modules/@clack/core/dist/index.js +4 -4
- package/dist/src/protocol/message.d.ts +15 -0
- package/dist/src/protocol/message.js +2 -2
- package/dist/src/public/channels/slack/api.d.ts +8 -0
- package/dist/src/public/channels/slack/api.js +1 -1
- package/dist/src/public/channels/slack/connections.d.ts +26 -18
- package/dist/src/public/channels/slack/connections.js +1 -1
- package/dist/src/public/channels/slack/defaults.d.ts +5 -2
- package/dist/src/public/channels/slack/defaults.js +1 -1
- package/dist/src/public/channels/slack/index.d.ts +1 -1
- package/dist/src/public/channels/slack/slackChannel.d.ts +65 -5
- package/dist/src/public/channels/slack/slackChannel.js +1 -1
- package/dist/src/public/channels/teams/defaults.js +1 -1
- package/dist/src/public/connections/errors.d.ts +8 -0
- package/dist/src/public/definitions/tool.d.ts +0 -33
- package/dist/src/public/next/index.d.ts +7 -1
- package/dist/src/public/next/index.js +1 -1
- package/dist/src/public/next/server.d.ts +1 -0
- package/dist/src/public/next/server.js +1 -1
- package/dist/src/public/nuxt/dev-server.js +1 -1
- package/dist/src/public/sveltekit/dev-server.js +1 -1
- package/dist/src/public/sveltekit/index.d.ts +1 -1
- package/dist/src/public/tools/defaults.d.ts +2 -4
- package/dist/src/public/tools/defaults.js +1 -1
- package/dist/src/public/tools/define-bash-tool.d.ts +3 -3
- package/dist/src/public/tools/define-bash-tool.js +1 -1
- package/dist/src/public/tools/define-read-file-tool.d.ts +0 -6
- package/dist/src/public/tools/define-read-file-tool.js +1 -1
- package/dist/src/public/tools/index.d.ts +2 -2
- package/dist/src/public/tools/index.js +1 -1
- package/dist/src/public/tools/internal.js +1 -1
- package/dist/src/runtime/actions/types.d.ts +11 -11
- package/dist/src/runtime/agent/mock-model-adapter.js +1 -1
- package/dist/src/runtime/agent/mock-model-fixtures.js +3 -2
- package/dist/src/runtime/agent/mock-model-skill-selection.js +3 -4
- package/dist/src/runtime/connections/callback-route.js +1 -1
- package/dist/src/runtime/connections/mcp-client.js +1 -1
- package/dist/src/runtime/connections/scoped-authorization.d.ts +21 -5
- package/dist/src/runtime/connections/scoped-authorization.js +1 -1
- package/dist/src/runtime/connections/types.d.ts +33 -0
- package/dist/src/runtime/connections/validate-authorization.js +1 -1
- package/dist/src/runtime/framework-tools/bash.d.ts +3 -3
- package/dist/src/runtime/framework-tools/bash.js +1 -1
- package/dist/src/runtime/framework-tools/connection-search-dynamic.d.ts +1 -1
- package/dist/src/runtime/framework-tools/connection-search-dynamic.js +1 -1
- package/dist/src/runtime/framework-tools/file-state.d.ts +3 -3
- package/dist/src/runtime/framework-tools/index.js +1 -1
- package/dist/src/runtime/framework-tools/read-file.js +2 -2
- package/dist/src/runtime/framework-tools/todo.d.ts +7 -0
- package/dist/src/runtime/framework-tools/todo.js +2 -2
- package/dist/src/runtime/governance/auth/http-basic.js +1 -1
- package/dist/src/runtime/input/types.d.ts +1 -1
- package/dist/src/runtime/resolve-tool.d.ts +2 -2
- package/dist/src/runtime/resolve-tool.js +1 -1
- package/dist/src/runtime/sandbox/keys.js +1 -1
- package/dist/src/runtime/session-callback-route.js +1 -1
- package/dist/src/runtime/types.d.ts +1 -7
- package/dist/src/services/dev-client/client-options.d.ts +8 -0
- package/dist/src/services/dev-client/client-options.js +1 -0
- package/dist/src/services/dev-client/runtime-artifacts.d.ts +13 -0
- package/dist/src/services/dev-client/runtime-artifacts.js +1 -0
- package/dist/src/services/dev-client.d.ts +13 -46
- package/dist/src/services/dev-client.js +1 -1
- package/dist/src/setup/ask.d.ts +205 -0
- package/dist/src/setup/ask.js +1 -0
- package/dist/src/setup/boxes/add-channels.d.ts +100 -16
- package/dist/src/setup/boxes/add-channels.js +2 -1
- package/dist/src/setup/boxes/add-connections.d.ts +13 -23
- package/dist/src/setup/boxes/add-connections.js +1 -1
- package/dist/src/setup/boxes/apply-ai-gateway-credential.d.ts +2 -2
- package/dist/src/setup/boxes/apply-ai-gateway-credential.js +1 -1
- package/dist/src/setup/boxes/deploy-project.d.ts +46 -14
- package/dist/src/setup/boxes/deploy-project.js +1 -1
- package/dist/src/setup/boxes/detect-ai-gateway.d.ts +10 -3
- package/dist/src/setup/boxes/detect-ai-gateway.js +1 -1
- package/dist/src/setup/boxes/link-project.d.ts +3 -3
- package/dist/src/setup/boxes/link-project.js +1 -1
- package/dist/src/setup/boxes/one-shot-next-steps.d.ts +18 -0
- package/dist/src/setup/boxes/one-shot-next-steps.js +2 -0
- package/dist/src/setup/boxes/preflight.d.ts +14 -6
- package/dist/src/setup/boxes/preflight.js +1 -1
- package/dist/src/setup/boxes/resolve-provisioning.d.ts +36 -8
- package/dist/src/setup/boxes/resolve-provisioning.js +1 -1
- package/dist/src/setup/boxes/resolve-target.d.ts +25 -8
- package/dist/src/setup/boxes/resolve-target.js +1 -1
- package/dist/src/setup/boxes/scaffold.d.ts +12 -6
- package/dist/src/setup/boxes/scaffold.js +1 -1
- package/dist/src/setup/boxes/select-channels.d.ts +38 -9
- package/dist/src/setup/boxes/select-channels.js +1 -1
- package/dist/src/setup/boxes/select-chat.d.ts +15 -11
- package/dist/src/setup/boxes/select-chat.js +1 -1
- package/dist/src/setup/boxes/select-connections.d.ts +30 -0
- package/dist/src/setup/boxes/select-connections.js +1 -0
- package/dist/src/setup/boxes/select-model.d.ts +18 -14
- package/dist/src/setup/boxes/select-model.js +1 -1
- package/dist/src/setup/boxes/select-setup-mode.d.ts +32 -0
- package/dist/src/setup/boxes/select-setup-mode.js +1 -0
- package/dist/src/setup/channel-add-conflicts.d.ts +28 -0
- package/dist/src/setup/channel-add-conflicts.js +1 -0
- package/dist/src/setup/cli/channel-setup-prompter.d.ts +23 -0
- package/dist/src/setup/cli/channel-setup-prompter.js +1 -0
- package/dist/src/setup/cli/connection-add-prompter.d.ts +8 -0
- package/dist/src/setup/cli/connection-add-prompter.js +1 -0
- package/dist/src/setup/{scaffold/cli → cli}/index.d.ts +4 -3
- package/dist/src/setup/cli/index.js +1 -0
- package/dist/src/setup/{scaffold/cli → cli}/prompt-ui.d.ts +39 -15
- package/dist/src/setup/cli/prompt-ui.js +5 -0
- package/dist/src/setup/{scaffold/cli → cli}/rail-log.d.ts +2 -0
- package/dist/src/setup/{scaffold/cli → cli}/rail-log.js +2 -2
- package/dist/src/setup/{scaffold/cli → cli}/select-component.d.ts +18 -3
- package/dist/src/setup/cli/select-component.js +1 -0
- package/dist/src/setup/cli/select-option-codec.d.ts +12 -0
- package/dist/src/setup/cli/select-option-codec.js +1 -0
- package/dist/src/setup/{scaffold/cli → cli}/select-state.d.ts +13 -1
- package/dist/src/setup/cli/select-state.js +1 -0
- package/dist/src/setup/cli/whimsy.d.ts +16 -0
- package/dist/src/setup/cli/whimsy.js +1 -0
- package/dist/src/setup/{scaffold/steps/setup-connection.d.ts → connection-connector.d.ts} +3 -2
- package/dist/src/setup/connection-connector.js +1 -0
- package/dist/src/setup/flows/channels.d.ts +43 -0
- package/dist/src/setup/flows/channels.js +1 -0
- package/dist/src/setup/flows/deploy.d.ts +40 -0
- package/dist/src/setup/flows/deploy.js +1 -0
- package/dist/src/setup/flows/in-project.d.ts +16 -0
- package/dist/src/setup/flows/in-project.js +1 -0
- package/dist/src/setup/flows/link.d.ts +43 -0
- package/dist/src/setup/flows/link.js +1 -0
- package/dist/src/setup/flows/model.d.ts +112 -0
- package/dist/src/setup/flows/model.js +1 -0
- package/dist/src/setup/flows/vercel.d.ts +31 -0
- package/dist/src/setup/flows/vercel.js +2 -0
- package/dist/src/setup/gateway-models.js +1 -1
- package/dist/src/setup/headless.d.ts +1 -1
- package/dist/src/setup/index.d.ts +10 -4
- package/dist/src/setup/index.js +1 -1
- package/dist/src/setup/onboarding.d.ts +7 -4
- package/dist/src/setup/onboarding.js +1 -1
- package/dist/src/setup/package-manager.d.ts +27 -0
- package/dist/src/setup/package-manager.js +1 -0
- package/dist/src/setup/primitives/index.d.ts +3 -0
- package/dist/src/setup/primitives/index.js +1 -0
- package/dist/src/setup/primitives/pm/bun.d.ts +10 -0
- package/dist/src/setup/primitives/pm/bun.js +1 -0
- package/dist/src/setup/primitives/pm/index.d.ts +11 -0
- package/dist/src/setup/primitives/pm/index.js +1 -0
- package/dist/src/setup/primitives/pm/npm.d.ts +10 -0
- package/dist/src/setup/primitives/pm/npm.js +1 -0
- package/dist/src/setup/primitives/pm/pnpm.d.ts +27 -0
- package/dist/src/setup/primitives/pm/pnpm.js +8 -0
- package/dist/src/setup/primitives/pm/run.d.ts +23 -0
- package/dist/src/setup/primitives/pm/run.js +1 -0
- package/dist/src/setup/primitives/pm/shared.d.ts +8 -0
- package/dist/src/setup/primitives/pm/shared.js +1 -0
- package/dist/src/setup/primitives/pm/types.d.ts +37 -0
- package/dist/src/setup/primitives/pm/types.js +1 -0
- package/dist/src/setup/primitives/pm/yarn.d.ts +10 -0
- package/dist/src/setup/primitives/pm/yarn.js +1 -0
- package/dist/src/setup/primitives/run-pnpm.d.ts +1 -0
- package/dist/src/setup/primitives/run-pnpm.js +1 -0
- package/dist/src/setup/{scaffold/primitives → primitives}/run-vercel.d.ts +7 -0
- package/dist/src/setup/primitives/run-vercel.js +1 -0
- package/dist/src/setup/project-name.d.ts +4 -0
- package/dist/src/setup/project-name.js +1 -0
- package/dist/src/setup/project-resolution.d.ts +54 -0
- package/dist/src/setup/project-resolution.js +1 -0
- package/dist/src/setup/prompter.d.ts +52 -4
- package/dist/src/setup/prompter.js +1 -1
- package/dist/src/setup/quit-guard.d.ts +1 -1
- package/dist/src/setup/run-vercel-link.d.ts +1 -1
- package/dist/src/setup/run-vercel-link.js +1 -1
- package/dist/src/setup/runner.d.ts +5 -4
- package/dist/src/setup/runner.js +1 -1
- package/dist/src/setup/scaffold/channels-catalog.d.ts +3 -3
- package/dist/src/setup/scaffold/channels-catalog.js +1 -1
- package/dist/src/setup/scaffold/create/add-to-project.d.ts +26 -0
- package/dist/src/setup/scaffold/create/add-to-project.js +1 -0
- package/dist/src/setup/scaffold/create/project.d.ts +54 -0
- package/dist/src/setup/scaffold/create/project.js +80 -0
- package/dist/src/setup/scaffold/index.d.ts +4 -4
- package/dist/src/setup/scaffold/index.js +1 -1
- package/dist/src/setup/scaffold/{channels.d.ts → update/channels.d.ts} +11 -0
- package/dist/src/setup/scaffold/update/channels.js +7 -0
- package/dist/src/setup/scaffold/{connections.d.ts → update/connections.d.ts} +1 -1
- package/dist/src/setup/scaffold/update/connections.js +21 -0
- package/dist/src/setup/scaffold/version-tokens.d.ts +11 -0
- package/dist/src/setup/scaffold/version-tokens.js +1 -0
- package/dist/src/setup/{scaffold/steps/setup-slackbot.d.ts → slackbot.d.ts} +24 -20
- package/dist/src/setup/slackbot.js +1 -0
- package/dist/src/setup/state.d.ts +62 -15
- package/dist/src/setup/state.js +1 -1
- package/dist/src/setup/step.d.ts +9 -18
- package/dist/src/setup/vercel-project.d.ts +15 -8
- package/dist/src/setup/vercel-project.js +1 -1
- package/dist/src/shared/agent-definition.d.ts +5 -3
- package/dist/src/shared/default-agent-model.d.ts +5 -0
- package/dist/src/shared/default-agent-model.js +1 -0
- package/dist/src/source-change/apply-model-name.d.ts +25 -0
- package/dist/src/source-change/apply-model-name.js +2 -0
- package/dist/src/source-change/static-source-change.d.ts +36 -0
- package/dist/src/source-change/static-source-change.js +1 -0
- package/dist/src/svelte/index.js +1 -1
- package/dist/src/svelte/use-eve-agent.js +1 -1
- package/dist/src/vue/index.js +1 -1
- package/dist/src/vue/use-eve-agent.js +1 -1
- package/package.json +22 -42
- package/dist/docs/evals-v2-plan.md +0 -939
- package/dist/docs/public/advanced/dev-tui.md +0 -52
- package/dist/docs/public/advanced/evals.md +0 -158
- package/dist/docs/public/reference/faqs.md +0 -48
- package/dist/src/cli/commands/setup.d.ts +0 -55
- package/dist/src/cli/commands/setup.js +0 -1
- package/dist/src/cli/dev/repl/input-requests.d.ts +0 -38
- package/dist/src/cli/dev/repl/input-requests.js +0 -1
- package/dist/src/cli/dev/repl/input.d.ts +0 -19
- package/dist/src/cli/dev/repl/input.js +0 -1
- package/dist/src/cli/dev/repl/repl.d.ts +0 -62
- package/dist/src/cli/dev/repl/repl.js +0 -2
- package/dist/src/cli/dev/repl/terminal.d.ts +0 -21
- package/dist/src/cli/dev/repl/terminal.js +0 -5
- package/dist/src/compiled/_chunks/workflow/resume-hook-0Zk0zSvq.js +0 -12
- package/dist/src/compiled/_chunks/workflow/sleep-DXZr2BgM.js +0 -1
- package/dist/src/compiled/_chunks/workflow/symbols-BWCAoPHE.js +0 -48
- package/dist/src/evals/checks/checks.d.ts +0 -66
- package/dist/src/evals/checks/checks.js +0 -2
- package/dist/src/evals/checks/index.d.ts +0 -21
- package/dist/src/evals/checks/index.js +0 -1
- package/dist/src/evals/checks/match.js +0 -1
- package/dist/src/evals/define-eval-suite.d.ts +0 -18
- package/dist/src/evals/define-eval-suite.js +0 -1
- package/dist/src/evals/runner/execute-case.d.ts +0 -23
- package/dist/src/evals/runner/execute-case.js +0 -1
- package/dist/src/evals/runner/execute-suite.d.ts +0 -24
- package/dist/src/evals/runner/execute-suite.js +0 -1
- package/dist/src/evals/scorers/autoevals-client.js +0 -2
- package/dist/src/evals/scorers/autoevals.d.ts +0 -58
- package/dist/src/evals/scorers/autoevals.js +0 -1
- package/dist/src/evals/scorers/json.d.ts +0 -10
- package/dist/src/evals/scorers/json.js +0 -1
- package/dist/src/evals/scorers/model-marker.d.ts +0 -12
- package/dist/src/evals/scorers/model-marker.js +0 -1
- package/dist/src/evals/scorers/run.d.ts +0 -24
- package/dist/src/evals/scorers/run.js +0 -1
- package/dist/src/evals/scorers/sql.d.ts +0 -9
- package/dist/src/evals/scorers/sql.js +0 -1
- package/dist/src/evals/scorers/text.d.ts +0 -18
- package/dist/src/evals/scorers/text.js +0 -1
- package/dist/src/evals/scores/index.d.ts +0 -72
- package/dist/src/evals/scores/index.js +0 -1
- package/dist/src/execution/tool-compaction.d.ts +0 -9
- package/dist/src/execution/tool-compaction.js +0 -1
- package/dist/src/services/dev-client/stream.d.ts +0 -5
- package/dist/src/services/dev-client/stream.js +0 -1
- package/dist/src/services/dev-client/url.d.ts +0 -11
- package/dist/src/services/dev-client/url.js +0 -1
- package/dist/src/setup/channel-setup-prompter.d.ts +0 -8
- package/dist/src/setup/channel-setup-prompter.js +0 -1
- package/dist/src/setup/scaffold/channels.js +0 -7
- package/dist/src/setup/scaffold/cli/channel-add-prompter.d.ts +0 -12
- package/dist/src/setup/scaffold/cli/channel-add-prompter.js +0 -1
- package/dist/src/setup/scaffold/cli/channel-setup-prompter.d.ts +0 -56
- package/dist/src/setup/scaffold/cli/connection-add-prompter.d.ts +0 -44
- package/dist/src/setup/scaffold/cli/connection-add-prompter.js +0 -1
- package/dist/src/setup/scaffold/cli/index.js +0 -1
- package/dist/src/setup/scaffold/cli/prompt-ui.js +0 -5
- package/dist/src/setup/scaffold/cli/select-component.js +0 -1
- package/dist/src/setup/scaffold/cli/select-state.js +0 -1
- package/dist/src/setup/scaffold/connections.js +0 -21
- package/dist/src/setup/scaffold/pnpm-workspace.d.ts +0 -3
- package/dist/src/setup/scaffold/pnpm-workspace.js +0 -11
- package/dist/src/setup/scaffold/primitives/detect-deployment.d.ts +0 -13
- package/dist/src/setup/scaffold/primitives/detect-deployment.js +0 -1
- package/dist/src/setup/scaffold/primitives/index.d.ts +0 -3
- package/dist/src/setup/scaffold/primitives/index.js +0 -1
- package/dist/src/setup/scaffold/primitives/pnpm-invocation.d.ts +0 -12
- package/dist/src/setup/scaffold/primitives/pnpm-invocation.js +0 -1
- package/dist/src/setup/scaffold/primitives/run-pnpm.d.ts +0 -17
- package/dist/src/setup/scaffold/primitives/run-pnpm.js +0 -1
- package/dist/src/setup/scaffold/primitives/run-vercel.js +0 -1
- package/dist/src/setup/scaffold/project.d.ts +0 -21
- package/dist/src/setup/scaffold/project.js +0 -80
- package/dist/src/setup/scaffold/steps/deploy-to-vercel.d.ts +0 -17
- package/dist/src/setup/scaffold/steps/deploy-to-vercel.js +0 -1
- package/dist/src/setup/scaffold/steps/index.d.ts +0 -4
- package/dist/src/setup/scaffold/steps/index.js +0 -1
- package/dist/src/setup/scaffold/steps/project-resolution.d.ts +0 -19
- package/dist/src/setup/scaffold/steps/project-resolution.js +0 -1
- package/dist/src/setup/scaffold/steps/run-add-connection.d.ts +0 -40
- package/dist/src/setup/scaffold/steps/run-add-connection.js +0 -1
- package/dist/src/setup/scaffold/steps/run-add-to-agent.d.ts +0 -81
- package/dist/src/setup/scaffold/steps/run-add-to-agent.js +0 -2
- package/dist/src/setup/scaffold/steps/setup-connection.js +0 -1
- package/dist/src/setup/scaffold/steps/setup-slackbot.js +0 -1
- /package/dist/docs/public/{frontend → guides/frontend}/meta.json +0 -0
- /package/dist/docs/public/{advanced → guides}/remote-agents.md +0 -0
- /package/dist/src/{setup/scaffold/cli/channel-setup-prompter.js → cli/dev/tui/setup-flow.js} +0 -0
- /package/dist/src/evals/{scorers/autoevals-client.d.ts → autoevals-client.d.ts} +0 -0
- /package/dist/src/setup/{scaffold/cli → cli}/command-output.d.ts +0 -0
- /package/dist/src/setup/{scaffold/cli → cli}/command-output.js +0 -0
- /package/dist/src/setup/{scaffold/human-action.d.ts → human-action.d.ts} +0 -0
- /package/dist/src/setup/{scaffold/human-action.js → human-action.js} +0 -0
- /package/dist/src/setup/{scaffold/primitives → primitives}/process-output.d.ts +0 -0
- /package/dist/src/setup/{scaffold/primitives → primitives}/process-output.js +0 -0
- /package/dist/src/setup/scaffold/{web-template.d.ts → create/web-template.d.ts} +0 -0
- /package/dist/src/setup/scaffold/{web-template.js → create/web-template.js} +0 -0
- /package/dist/src/setup/scaffold/{module-files.d.ts → update/module-files.d.ts} +0 -0
- /package/dist/src/setup/scaffold/{module-files.js → update/module-files.js} +0 -0
- /package/dist/src/setup/scaffold/{package-json.d.ts → update/package-json.d.ts} +0 -0
- /package/dist/src/setup/scaffold/{package-json.js → update/package-json.js} +0 -0
- /package/dist/src/setup/scaffold/{primitives → update}/update-connection-connector.d.ts +0 -0
- /package/dist/src/setup/scaffold/{primitives → update}/update-connection-connector.js +0 -0
- /package/dist/src/setup/scaffold/{primitives → update}/update-slack-channel.d.ts +0 -0
- /package/dist/src/setup/scaffold/{primitives → update}/update-slack-channel.js +0 -0
|
@@ -1,39 +1,17 @@
|
|
|
1
1
|
import type { LanguageModel } from "ai";
|
|
2
|
+
import type { StandardSchemaV1 } from "#compiled/@standard-schema/spec/index.js";
|
|
2
3
|
import type { HandleMessageStreamEvent, RuntimeIdentity } from "#protocol/message.js";
|
|
3
|
-
import type {
|
|
4
|
+
import type { SendTurnInput, SessionState } from "#client/types.js";
|
|
5
|
+
import type { InputRequest, InputResponse } from "#runtime/input/types.js";
|
|
4
6
|
import type { JsonObject } from "#shared/json.js";
|
|
5
7
|
import type { AgentModelOptionsDefinition } from "#shared/agent-definition.js";
|
|
6
8
|
import type { EvalReporter } from "#evals/runner/reporters/types.js";
|
|
9
|
+
import type { EveEvalSubagentCallMatchOptions, EveEvalToolCallMatchOptions } from "#evals/match.js";
|
|
7
10
|
/**
|
|
8
|
-
*
|
|
11
|
+
* Assumptions an eval needs the runner to verify against the live target
|
|
12
|
+
* or eval process environment before executing it.
|
|
9
13
|
*/
|
|
10
|
-
export
|
|
11
|
-
/** Uniquely identifies the case within its suite. */
|
|
12
|
-
readonly id: string;
|
|
13
|
-
/**
|
|
14
|
-
* The case prompt, or a structured record the task derives messages from.
|
|
15
|
-
* A string is sent verbatim and a record is `JSON.stringify`d, unless the
|
|
16
|
-
* task's `messages`/`prompt` reads specific fields.
|
|
17
|
-
*/
|
|
18
|
-
readonly input: string | Record<string, unknown>;
|
|
19
|
-
/**
|
|
20
|
-
* Reference value scorers compare against. The runner coerces this to a
|
|
21
|
-
* string for autoevals-compatible scorers; `args.case.expected` exposes it
|
|
22
|
-
* unmodified.
|
|
23
|
-
*/
|
|
24
|
-
readonly expected?: unknown;
|
|
25
|
-
/**
|
|
26
|
-
* Hard assertions for this case, appended to the suite-level `checks`.
|
|
27
|
-
* Any failure marks the case failed and flips the CLI exit code.
|
|
28
|
-
*/
|
|
29
|
-
readonly checks?: readonly EveEvalCheck[];
|
|
30
|
-
/** Additional scorers for this case, appended to the suite-level `scores`. */
|
|
31
|
-
readonly scores?: readonly EveEvalScorer[];
|
|
32
|
-
/** Used by `--tag` filtering, and passed through for reporting. */
|
|
33
|
-
readonly tags?: readonly string[];
|
|
34
|
-
/** Passed through for reporting and Braintrust span logging. */
|
|
35
|
-
readonly metadata?: Readonly<Record<string, unknown>>;
|
|
36
|
-
}
|
|
14
|
+
export type EveEvalRequirement = "mockModels" | "devRoutes" | `env:${string}`;
|
|
37
15
|
/**
|
|
38
16
|
* One tool call extracted from the captured stream, pairing the
|
|
39
17
|
* `actions.requested` request with its matching `action.result`.
|
|
@@ -87,12 +65,23 @@ export interface EveEvalDerivedFacts {
|
|
|
87
65
|
readonly failureCode?: string;
|
|
88
66
|
}
|
|
89
67
|
/**
|
|
90
|
-
*
|
|
68
|
+
* Captured event stream and facts for one session involved in an eval.
|
|
69
|
+
*/
|
|
70
|
+
export interface EveEvalSessionResult {
|
|
71
|
+
readonly derived: EveEvalDerivedFacts;
|
|
72
|
+
readonly events: readonly HandleMessageStreamEvent[];
|
|
73
|
+
readonly primary: boolean;
|
|
74
|
+
readonly sessionId?: string;
|
|
75
|
+
readonly state: SessionState;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Full result of executing one eval against an Eve agent.
|
|
91
79
|
*/
|
|
92
80
|
export interface EveEvalTaskResult {
|
|
93
81
|
/**
|
|
94
|
-
* The
|
|
95
|
-
*
|
|
82
|
+
* The agent's last assistant message (same as {@link finalMessage}), retained
|
|
83
|
+
* for reporters and artifacts that log a single "output" value. Mutable
|
|
84
|
+
* because the runner assigns it after the run completes.
|
|
96
85
|
*/
|
|
97
86
|
output: unknown;
|
|
98
87
|
/** The agent's last assistant message, or null when none was produced. */
|
|
@@ -106,8 +95,12 @@ export interface EveEvalTaskResult {
|
|
|
106
95
|
readonly status: "completed" | "failed" | "waiting";
|
|
107
96
|
/** The captured stream events from the run. */
|
|
108
97
|
readonly events: readonly HandleMessageStreamEvent[];
|
|
98
|
+
/** Lines written through `t.log` while the eval ran. */
|
|
99
|
+
readonly logs?: readonly string[];
|
|
109
100
|
/** Facts extracted from the stream (tool calls, message counts, etc.). */
|
|
110
101
|
readonly derived: EveEvalDerivedFacts;
|
|
102
|
+
/** Per-session event streams captured while executing this eval. */
|
|
103
|
+
readonly sessions?: readonly EveEvalSessionResult[];
|
|
111
104
|
/**
|
|
112
105
|
* Runtime identity metadata captured from the `session.started` stream event.
|
|
113
106
|
* Present when the Eve server populates the event with its runtime metadata.
|
|
@@ -115,116 +108,175 @@ export interface EveEvalTaskResult {
|
|
|
115
108
|
readonly runtimeIdentity?: RuntimeIdentity;
|
|
116
109
|
}
|
|
117
110
|
/**
|
|
118
|
-
*
|
|
111
|
+
* How a failing assertion affects the verdict. A `"gate"` is a hard
|
|
112
|
+
* assertion: missing it fails the eval. A `"soft"` assertion is tracked
|
|
113
|
+
* data that only fails the eval under `eve eval --strict` (and only when it
|
|
114
|
+
* carries a threshold).
|
|
119
115
|
*/
|
|
120
|
-
export
|
|
121
|
-
/** Scorer name. Used as the key in Braintrust score maps. */
|
|
122
|
-
readonly name: string;
|
|
123
|
-
/** Score between 0 and 1, or null if the scorer could not produce a score. */
|
|
124
|
-
readonly score: number | null;
|
|
125
|
-
/** Optional metadata for debugging or Braintrust span logging. */
|
|
126
|
-
readonly metadata?: Readonly<Record<string, unknown>>;
|
|
127
|
-
}
|
|
116
|
+
export type AssertionSeverity = "gate" | "soft";
|
|
128
117
|
/**
|
|
129
|
-
*
|
|
118
|
+
* A value-level assertion produced by the builders in `eve/evals/expect`
|
|
119
|
+
* (e.g. `includes`, `equals`, `similarity`) and applied to an explicit value
|
|
120
|
+
* via `t.check(value, assertion)`. Boolean assertions score exactly 0 or 1.
|
|
130
121
|
*
|
|
131
|
-
* The
|
|
132
|
-
*
|
|
133
|
-
*
|
|
134
|
-
* and `result.output`.
|
|
122
|
+
* The chainable `gate`/`soft`/`atLeast` return a new assertion with the
|
|
123
|
+
* severity or threshold overridden, so the threshold rides on the assertion
|
|
124
|
+
* itself rather than a detached map.
|
|
135
125
|
*/
|
|
136
|
-
export interface
|
|
137
|
-
|
|
138
|
-
readonly
|
|
139
|
-
/**
|
|
140
|
-
readonly
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
* change the target Eve agent model. `undefined` when the suite omits
|
|
146
|
-
* `model`; model-backed scorers throw a descriptive error in that case.
|
|
147
|
-
*/
|
|
148
|
-
readonly model: LanguageModel | undefined;
|
|
149
|
-
/** Suite-level provider options for model-backed scorers. */
|
|
150
|
-
readonly modelOptions?: AgentModelOptionsDefinition;
|
|
151
|
-
/** Full eval case. */
|
|
152
|
-
readonly case: EveEvalCase;
|
|
153
|
-
/** Full task result with events and derived facts. */
|
|
154
|
-
readonly result: EveEvalTaskResult;
|
|
126
|
+
export interface Assertion {
|
|
127
|
+
readonly name: string;
|
|
128
|
+
readonly severity: AssertionSeverity;
|
|
129
|
+
/** Minimum passing score. `undefined` on a soft assertion = tracked only. */
|
|
130
|
+
readonly threshold?: number;
|
|
131
|
+
score(value: unknown): number | Promise<number>;
|
|
132
|
+
gate(threshold?: number): Assertion;
|
|
133
|
+
soft(threshold?: number): Assertion;
|
|
134
|
+
atLeast(threshold: number): Assertion;
|
|
155
135
|
}
|
|
156
136
|
/**
|
|
157
|
-
*
|
|
158
|
-
*
|
|
159
|
-
*
|
|
137
|
+
* Handle to a recorded assertion, returned by every `t` assertion method.
|
|
138
|
+
* Chain `gate`/`soft`/`atLeast` to override the recorded severity or
|
|
139
|
+
* threshold, and `await` it to surface model-backed (judge) errors and ensure
|
|
140
|
+
* the assertion has resolved before the run continues.
|
|
160
141
|
*/
|
|
161
|
-
export
|
|
142
|
+
export interface AssertionHandle extends PromiseLike<void> {
|
|
143
|
+
gate(threshold?: number): this;
|
|
144
|
+
soft(threshold?: number): this;
|
|
145
|
+
atLeast(threshold: number): this;
|
|
146
|
+
}
|
|
162
147
|
/**
|
|
163
|
-
*
|
|
164
|
-
*
|
|
165
|
-
* Unlike scores, checks are hard assertions: any `passed: false` marks the
|
|
166
|
-
* case failed and produces a non-zero `eve eval` exit code.
|
|
148
|
+
* The recorded outcome of one assertion, consumed by the verdict, reporters,
|
|
149
|
+
* and artifacts. A boolean assertion has `score` 0 or 1.
|
|
167
150
|
*/
|
|
168
|
-
export interface
|
|
151
|
+
export interface AssertionResult {
|
|
169
152
|
readonly name: string;
|
|
153
|
+
readonly score: number;
|
|
154
|
+
readonly severity: AssertionSeverity;
|
|
155
|
+
readonly threshold?: number;
|
|
170
156
|
readonly passed: boolean;
|
|
171
157
|
/** Human-readable failure detail, shown in console output and artifacts. */
|
|
172
158
|
readonly message?: string;
|
|
173
159
|
readonly metadata?: Readonly<Record<string, unknown>>;
|
|
174
160
|
}
|
|
175
161
|
/**
|
|
176
|
-
*
|
|
177
|
-
* as scorers but never receive a judge model — they are deterministic
|
|
178
|
-
* assertions over the captured run.
|
|
162
|
+
* Driver for one session, exposed on the eval context and by `t.newSession()`.
|
|
179
163
|
*/
|
|
180
|
-
export interface
|
|
181
|
-
|
|
182
|
-
readonly
|
|
183
|
-
/**
|
|
184
|
-
readonly
|
|
164
|
+
export interface EveEvalSession {
|
|
165
|
+
/** All events observed on this session so far. */
|
|
166
|
+
readonly events: readonly HandleMessageStreamEvent[];
|
|
167
|
+
/** Input requests left pending by the last parked turn. */
|
|
168
|
+
readonly pendingInputRequests: readonly InputRequest[];
|
|
169
|
+
/** Serializable cursor for resuming this session. */
|
|
170
|
+
readonly state: SessionState;
|
|
171
|
+
/** Eve session id after the first successful send. */
|
|
172
|
+
readonly sessionId: string | undefined;
|
|
173
|
+
/** Assert the last turn parked on HITL input and return matching requests. */
|
|
174
|
+
expectInputRequests(filter?: {
|
|
175
|
+
readonly display?: InputRequest["display"];
|
|
176
|
+
readonly toolName?: string;
|
|
177
|
+
}): readonly InputRequest[];
|
|
178
|
+
/** Resolve specific pending requests and run the resumed turn. */
|
|
179
|
+
respond(...responses: InputResponse[]): Promise<EveEvalTurn>;
|
|
180
|
+
/** Resolve every pending request with the same option id. */
|
|
181
|
+
respondAll(optionId: string): Promise<EveEvalTurn>;
|
|
182
|
+
/** Send one turn through this session. */
|
|
183
|
+
send(input: SendTurnInput): Promise<EveEvalTurn>;
|
|
184
|
+
/** Send one text turn with a local file attached as a data URL. */
|
|
185
|
+
sendFile(text: string, filePath: string, mediaType?: string): Promise<EveEvalTurn>;
|
|
185
186
|
}
|
|
186
187
|
/**
|
|
187
|
-
* One
|
|
188
|
-
* `eve/evals/checks`; custom checks are plain functions with this shape.
|
|
188
|
+
* One completed eval-driver turn.
|
|
189
189
|
*/
|
|
190
|
-
export
|
|
191
|
-
|
|
192
|
-
readonly
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
readonly parseOutput?: (result: EveEvalTaskResult) => unknown;
|
|
199
|
-
readonly prompt?: (testCase: EveEvalCase) => string;
|
|
190
|
+
export interface EveEvalTurn {
|
|
191
|
+
readonly data: unknown;
|
|
192
|
+
readonly events: readonly HandleMessageStreamEvent[];
|
|
193
|
+
readonly inputRequests: readonly InputRequest[];
|
|
194
|
+
readonly message: string | undefined;
|
|
195
|
+
readonly status: "completed" | "failed" | "waiting";
|
|
196
|
+
readonly toolCalls: readonly EveEvalToolCall[];
|
|
197
|
+
expectOk(): this;
|
|
200
198
|
}
|
|
201
199
|
/**
|
|
202
|
-
*
|
|
203
|
-
*
|
|
204
|
-
*
|
|
200
|
+
* The judge model used by `t.judge.*` assertions, configured per-eval or as
|
|
201
|
+
* the run-wide default in `evals.config.ts`. Only ever used for scoring; it
|
|
202
|
+
* never changes the agent under test. String model ids route through the
|
|
203
|
+
* Vercel AI Gateway; provider model instances run directly.
|
|
205
204
|
*/
|
|
206
|
-
export
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
205
|
+
export interface EveEvalJudgeConfig {
|
|
206
|
+
readonly model: LanguageModel;
|
|
207
|
+
readonly modelOptions?: AgentModelOptionsDefinition;
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* Per-call options for `t.judge.autoevals.*` assertions.
|
|
211
|
+
*/
|
|
212
|
+
export interface JudgeOpts {
|
|
213
|
+
/** Value to grade. Defaults to the final assistant message (`t.reply`). */
|
|
214
|
+
readonly on?: unknown;
|
|
215
|
+
/** Judge model for this call only; overrides the eval/config judge model. */
|
|
216
|
+
readonly model?: LanguageModel;
|
|
217
|
+
readonly modelOptions?: AgentModelOptionsDefinition;
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Braintrust autoevals graders, bound to the resolved judge model. The grader
|
|
221
|
+
* family is named so its semantics are explicit: `factuality`'s consistency
|
|
222
|
+
* buckets and `closedQA`'s yes/no grading are autoevals' behavior, not Eve's.
|
|
223
|
+
* These are Eve-owned wrappers, not the raw library.
|
|
224
|
+
*/
|
|
225
|
+
export interface AutoevalsJudges {
|
|
226
|
+
factuality(expected: string, opts?: JudgeOpts): AssertionHandle;
|
|
227
|
+
summarizes(expected: string, opts?: JudgeOpts): AssertionHandle;
|
|
228
|
+
closedQA(criteria: string, opts?: JudgeOpts): AssertionHandle;
|
|
229
|
+
sql(expected: string, opts?: JudgeOpts): AssertionHandle;
|
|
230
|
+
}
|
|
231
|
+
/**
|
|
232
|
+
* Model-backed assertion namespaces on `t.judge`. A future non-autoevals
|
|
233
|
+
* engine would slot in as a sibling of `autoevals`.
|
|
234
|
+
*/
|
|
235
|
+
export interface JudgeContext {
|
|
236
|
+
readonly autoevals: AutoevalsJudges;
|
|
237
|
+
}
|
|
226
238
|
/**
|
|
227
|
-
*
|
|
239
|
+
* The single context passed to an eval's `test(t)` function. It drives the
|
|
240
|
+
* primary session (it extends {@link EveEvalSession}), carries the run-level
|
|
241
|
+
* and value-level assertion vocabulary, and exposes `judge` for LLM-as-judge.
|
|
242
|
+
*
|
|
243
|
+
* Run-level assertions (`completed`, `calledTool`, …) record an entry
|
|
244
|
+
* evaluated against the final run and never throw; `check` and `judge`
|
|
245
|
+
* evaluate the supplied value immediately. Use plain `throw` /
|
|
246
|
+
* `turn.expectOk()` for bespoke preconditions that should abort the run.
|
|
247
|
+
*/
|
|
248
|
+
export interface EveEvalContext extends EveEvalSession {
|
|
249
|
+
/** Eval timeout signal. */
|
|
250
|
+
readonly signal: AbortSignal;
|
|
251
|
+
/** Current target under test. */
|
|
252
|
+
readonly target: EveEvalTargetHandle;
|
|
253
|
+
/** The primary session's last assistant message, or null. */
|
|
254
|
+
readonly reply: string | null;
|
|
255
|
+
/** Structured eval log hook. */
|
|
256
|
+
log(message: string): void;
|
|
257
|
+
/** Create an additional independent session against the same target. */
|
|
258
|
+
newSession(): EveEvalSession;
|
|
259
|
+
completed(): AssertionHandle;
|
|
260
|
+
didNotFail(): AssertionHandle;
|
|
261
|
+
waiting(): AssertionHandle;
|
|
262
|
+
messageIncludes(token: string | RegExp): AssertionHandle;
|
|
263
|
+
calledTool(name: string, options?: EveEvalToolCallMatchOptions): AssertionHandle;
|
|
264
|
+
notCalledTool(name: string): AssertionHandle;
|
|
265
|
+
toolOrder(names: readonly string[]): AssertionHandle;
|
|
266
|
+
usedNoTools(): AssertionHandle;
|
|
267
|
+
maxToolCalls(max: number): AssertionHandle;
|
|
268
|
+
calledSubagent(name: string, options?: EveEvalSubagentCallMatchOptions): AssertionHandle;
|
|
269
|
+
noFailedActions(): AssertionHandle;
|
|
270
|
+
event(predicate: (events: readonly HandleMessageStreamEvent[]) => boolean, label: string): AssertionHandle;
|
|
271
|
+
outputEquals(value: unknown): AssertionHandle;
|
|
272
|
+
outputMatches(schema: StandardSchemaV1): AssertionHandle;
|
|
273
|
+
/** Apply a value-level assertion (from `eve/evals/expect`) to a value. */
|
|
274
|
+
check(value: unknown, assertion: Assertion): AssertionHandle;
|
|
275
|
+
/** LLM-as-judge assertions, bound to the resolved judge model. */
|
|
276
|
+
readonly judge: JudgeContext;
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Describes the Eve server an eval runs against.
|
|
228
280
|
*/
|
|
229
281
|
export interface EveEvalTarget {
|
|
230
282
|
/**
|
|
@@ -234,152 +286,175 @@ export interface EveEvalTarget {
|
|
|
234
286
|
readonly kind: "local" | "remote";
|
|
235
287
|
/** Base HTTP URL the eval client connects to and sends message requests. */
|
|
236
288
|
readonly url: string;
|
|
289
|
+
/** Capabilities discovered from the live target's info route. */
|
|
290
|
+
readonly capabilities: EveEvalTargetCapabilities;
|
|
291
|
+
}
|
|
292
|
+
export interface EveEvalTargetCapabilities {
|
|
293
|
+
readonly devRoutes: boolean;
|
|
294
|
+
readonly mockModels: boolean;
|
|
295
|
+
}
|
|
296
|
+
export interface EveEvalScheduleDispatchResult {
|
|
297
|
+
readonly scheduleId: string;
|
|
298
|
+
readonly sessionIds: readonly string[];
|
|
299
|
+
}
|
|
300
|
+
/**
|
|
301
|
+
* Live target handle exposed to eval runs.
|
|
302
|
+
*/
|
|
303
|
+
export interface EveEvalTargetHandle extends EveEvalTarget {
|
|
304
|
+
/** Dispatch a dev-only authored schedule. Requires declaring `"devRoutes"`. */
|
|
305
|
+
dispatchSchedule(scheduleId: string): Promise<EveEvalScheduleDispatchResult>;
|
|
306
|
+
/** Authenticated fetch against the target base URL. */
|
|
307
|
+
fetch(path: string, init?: RequestInit): Promise<Response>;
|
|
308
|
+
/** Attach to a pre-existing session and consume one turn boundary. */
|
|
309
|
+
attachSession(sessionId: string, opts?: {
|
|
310
|
+
readonly startIndex?: number;
|
|
311
|
+
}): Promise<EveEvalSession>;
|
|
237
312
|
}
|
|
238
313
|
/**
|
|
239
|
-
* Shared fields between the user-facing input and the validated
|
|
314
|
+
* Shared fields between the user-facing input and the validated eval.
|
|
240
315
|
*
|
|
241
|
-
*
|
|
316
|
+
* Eval identity (`id`) is derived from the `evals/<path>.eval.ts` file
|
|
242
317
|
* path by the discovery layer; it is not authored on the input.
|
|
243
318
|
*/
|
|
244
|
-
interface
|
|
319
|
+
interface EveEvalBase {
|
|
245
320
|
readonly description?: string;
|
|
246
|
-
readonly task?: EveEvalTask;
|
|
247
|
-
/**
|
|
248
|
-
* Hard assertions applied to every case in the suite. Case-level `checks`
|
|
249
|
-
* append to these. Any failed check marks the case failed and produces a
|
|
250
|
-
* non-zero `eve eval` exit code, unlike scores which stay soft data.
|
|
251
|
-
*/
|
|
252
|
-
readonly checks?: readonly EveEvalCheck[];
|
|
253
|
-
readonly scores: readonly EveEvalScorer[];
|
|
254
321
|
/**
|
|
255
|
-
*
|
|
256
|
-
*
|
|
257
|
-
* its own per-scorer model override.
|
|
258
|
-
*
|
|
259
|
-
* String model IDs route through the Vercel AI Gateway; the runner uses
|
|
260
|
-
* provider model instances directly. This model is only for scoring and
|
|
261
|
-
* never changes the Eve agent under test.
|
|
322
|
+
* Target/process assumptions verified before execution. The eval is
|
|
323
|
+
* skipped when any requirement is unmet.
|
|
262
324
|
*/
|
|
263
|
-
readonly
|
|
264
|
-
/**
|
|
265
|
-
* Provider-specific options passed to model-backed scorers.
|
|
266
|
-
*/
|
|
267
|
-
readonly modelOptions?: AgentModelOptionsDefinition;
|
|
325
|
+
readonly requires?: readonly EveEvalRequirement[];
|
|
268
326
|
/**
|
|
269
|
-
*
|
|
270
|
-
*
|
|
327
|
+
* Judge model for this eval's `t.judge.*` assertions. Optional: when
|
|
328
|
+
* omitted, judge assertions fall back to the `judge` declared in
|
|
329
|
+
* `evals.config.ts`. Only used for scoring; never changes the agent
|
|
330
|
+
* under test.
|
|
271
331
|
*/
|
|
272
|
-
readonly
|
|
332
|
+
readonly judge?: EveEvalJudgeConfig;
|
|
273
333
|
readonly timeoutMs?: number;
|
|
274
|
-
/** Used by `--tag` filtering
|
|
334
|
+
/** Used by `--tag` filtering. */
|
|
275
335
|
readonly tags?: readonly string[];
|
|
276
336
|
readonly metadata?: Readonly<Record<string, unknown>>;
|
|
277
337
|
readonly reporters?: readonly EvalReporter[];
|
|
278
|
-
/**
|
|
279
|
-
* Minimum score thresholds per scorer name. A case "passes" when every
|
|
280
|
-
* scorer meets or exceeds its threshold. Scorers not listed here
|
|
281
|
-
* default to a threshold of 1.0 (exact match).
|
|
282
|
-
*
|
|
283
|
-
* @example
|
|
284
|
-
* ```ts
|
|
285
|
-
* thresholds: {
|
|
286
|
-
* "Factuality": 0.5,
|
|
287
|
-
* "run.didNotFail": 1.0,
|
|
288
|
-
* }
|
|
289
|
-
* ```
|
|
290
|
-
*/
|
|
291
|
-
readonly thresholds?: Readonly<Record<string, number>>;
|
|
292
338
|
}
|
|
293
339
|
/**
|
|
294
|
-
* Complete top-level key set accepted by {@link
|
|
295
|
-
* unknown authored keys.
|
|
296
|
-
* `load`/`cases` exclusivity.
|
|
340
|
+
* Complete top-level key set accepted by {@link defineEval}, used to reject
|
|
341
|
+
* unknown authored keys.
|
|
297
342
|
*/
|
|
298
|
-
export interface
|
|
299
|
-
readonly
|
|
300
|
-
readonly task?: EveEvalTaskFields;
|
|
301
|
-
load?(): Promise<EveEvalCase[]>;
|
|
343
|
+
export interface EveEvalInputFields extends EveEvalBase {
|
|
344
|
+
readonly test?: (t: EveEvalContext) => void | Promise<void>;
|
|
302
345
|
}
|
|
303
346
|
/**
|
|
304
|
-
* Full
|
|
347
|
+
* Full eval input passed to `defineEval()`.
|
|
305
348
|
*
|
|
306
|
-
*
|
|
307
|
-
*
|
|
308
|
-
*
|
|
349
|
+
* Each eval file is exactly one case: an imperative `test(t)` function that
|
|
350
|
+
* drives the agent and asserts on what it produced. Eval identity is derived
|
|
351
|
+
* from the file path, so authors do not specify an `id` or `name`.
|
|
309
352
|
*/
|
|
310
|
-
export
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
}) | (EveEvalSuiteBase & {
|
|
315
|
-
/** Static inline cases. Mutually exclusive with `load`. */
|
|
316
|
-
readonly cases: readonly EveEvalCase[];
|
|
317
|
-
load?: never;
|
|
318
|
-
});
|
|
353
|
+
export interface EveEvalInput extends EveEvalBase {
|
|
354
|
+
/** Imperative interaction-and-assertion script. */
|
|
355
|
+
test(t: EveEvalContext): void | Promise<void>;
|
|
356
|
+
}
|
|
319
357
|
/**
|
|
320
|
-
*
|
|
321
|
-
* the path-derived id at import time to produce a full {@link
|
|
322
|
-
* `_tag` literal (`"
|
|
323
|
-
* can recognize a defined
|
|
358
|
+
* Eval returned by `defineEval()`. Carries no `id` yet: discovery stamps
|
|
359
|
+
* the path-derived id at import time to produce a full {@link EveEval}. The
|
|
360
|
+
* `_tag` literal (`"EveEval"`) brands the value so discovery and the runner
|
|
361
|
+
* can recognize a defined eval.
|
|
324
362
|
*/
|
|
325
|
-
export
|
|
326
|
-
readonly _tag: "
|
|
327
|
-
|
|
328
|
-
load(): Promise<EveEvalCase[]>;
|
|
329
|
-
}
|
|
363
|
+
export type EveEvalDefinition = EveEvalInput & {
|
|
364
|
+
readonly _tag: "EveEval";
|
|
365
|
+
};
|
|
330
366
|
/**
|
|
331
|
-
* Validated
|
|
332
|
-
* path-derived slug attached by discovery
|
|
333
|
-
*
|
|
367
|
+
* Validated eval consumed by the runner and reporters. The `id` is the
|
|
368
|
+
* path-derived slug attached by discovery (e.g. `evals/weather.eval.ts` →
|
|
369
|
+
* `"weather"`, `evals/runtime/multi-turn.eval.ts` → `"runtime/multi-turn"`).
|
|
370
|
+
* Files that default-export an array of evals derive
|
|
371
|
+
* `<file-id>/<zero-padded index>` ids (e.g. `"weather/0000"`).
|
|
334
372
|
*/
|
|
335
|
-
export
|
|
373
|
+
export type EveEval = EveEvalDefinition & {
|
|
336
374
|
readonly id: string;
|
|
337
|
-
}
|
|
375
|
+
};
|
|
338
376
|
/**
|
|
339
|
-
* Per-
|
|
377
|
+
* Per-eval outcome computed by the runner:
|
|
340
378
|
*
|
|
341
|
-
* - `"passed"` — no execution error, every
|
|
342
|
-
* - `"failed"` — a
|
|
343
|
-
* - `"scored"` —
|
|
344
|
-
* - `"skipped"` — the
|
|
379
|
+
* - `"passed"` — no execution error, every gate held, every soft threshold met
|
|
380
|
+
* - `"failed"` — a gate assertion failed or execution errored (timeout, transport, thrown task)
|
|
381
|
+
* - `"scored"` — every gate held but a soft assertion fell below its threshold
|
|
382
|
+
* - `"skipped"` — the eval was not executed (unmet `requires` entries)
|
|
345
383
|
*/
|
|
346
|
-
export type
|
|
384
|
+
export type EveEvalVerdict = "passed" | "failed" | "scored" | "skipped";
|
|
347
385
|
/**
|
|
348
|
-
* Result of
|
|
386
|
+
* Result of executing and asserting one eval.
|
|
387
|
+
*
|
|
388
|
+
* `id` is the path-derived eval id
|
|
389
|
+
* (e.g. `evals/weather.eval.ts` → `"weather"`).
|
|
349
390
|
*/
|
|
350
|
-
export interface
|
|
351
|
-
readonly
|
|
391
|
+
export interface EveEvalResult {
|
|
392
|
+
readonly id: string;
|
|
352
393
|
readonly result: EveEvalTaskResult;
|
|
353
|
-
/**
|
|
354
|
-
readonly
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
readonly verdict: EveEvalCaseVerdict;
|
|
394
|
+
/** Every assertion recorded by the eval's `test(t)`, in record order. */
|
|
395
|
+
readonly assertions: readonly AssertionResult[];
|
|
396
|
+
/** Per-eval verdict; see {@link EveEvalVerdict}. */
|
|
397
|
+
readonly verdict: EveEvalVerdict;
|
|
358
398
|
readonly error?: string;
|
|
359
|
-
/** Why the
|
|
399
|
+
/** Why the eval was skipped, when `verdict` is `"skipped"`. */
|
|
360
400
|
readonly skipReason?: string;
|
|
401
|
+
readonly startedAt: string;
|
|
402
|
+
readonly completedAt: string;
|
|
361
403
|
}
|
|
362
404
|
/**
|
|
363
|
-
* Aggregated
|
|
364
|
-
*
|
|
365
|
-
* `suite` is the path-derived suite id
|
|
366
|
-
* (e.g. `evals/weather.eval.ts` → `"weather"`).
|
|
405
|
+
* Aggregated outcome of one `eve eval` run across every executed eval.
|
|
367
406
|
*/
|
|
368
|
-
export interface
|
|
369
|
-
readonly suite: string;
|
|
407
|
+
export interface EveEvalRunSummary {
|
|
370
408
|
readonly target: EveEvalTarget;
|
|
371
|
-
readonly
|
|
409
|
+
readonly results: readonly EveEvalResult[];
|
|
372
410
|
readonly startedAt: string;
|
|
373
411
|
readonly completedAt: string;
|
|
374
|
-
/**
|
|
412
|
+
/** Evals with verdict `"passed"`. */
|
|
375
413
|
readonly passed: number;
|
|
376
|
-
/**
|
|
414
|
+
/** Evals with verdict `"failed"` (gate failures and execution errors). */
|
|
377
415
|
readonly failed: number;
|
|
378
|
-
/**
|
|
416
|
+
/** Evals with verdict `"scored"` (below-threshold soft assertions only). */
|
|
379
417
|
readonly scored: number;
|
|
380
|
-
/**
|
|
418
|
+
/** Evals with verdict `"skipped"`. */
|
|
381
419
|
readonly skipped: number;
|
|
382
420
|
/** The execution-error subset of `failed` (timeouts, connection failures, exceptions). */
|
|
383
421
|
readonly errored: number;
|
|
384
422
|
}
|
|
423
|
+
/**
|
|
424
|
+
* Run-wide eval configuration authored in `evals.config.ts`.
|
|
425
|
+
*
|
|
426
|
+
* Exactly one `evals.config.ts` is required at the root of the `evals/`
|
|
427
|
+
* directory; it supplies the defaults every eval in the run shares.
|
|
428
|
+
*/
|
|
429
|
+
export interface EveEvalConfigInput {
|
|
430
|
+
/**
|
|
431
|
+
* Default judge model for `t.judge.*` assertions across every eval.
|
|
432
|
+
* Optional: evals that use no judge need not set it, and individual evals
|
|
433
|
+
* may override it with their own `judge`. Only ever used for scoring.
|
|
434
|
+
*/
|
|
435
|
+
readonly judge?: EveEvalJudgeConfig;
|
|
436
|
+
/**
|
|
437
|
+
* Reporters that observe every eval in the run (e.g. a shared
|
|
438
|
+
* `Braintrust()` experiment). Suppressed by `eve eval --skip-report`.
|
|
439
|
+
*/
|
|
440
|
+
readonly reporters?: readonly EvalReporter[];
|
|
441
|
+
/**
|
|
442
|
+
* Default maximum number of evals executing at once. Must be a positive
|
|
443
|
+
* integer. `eve eval --max-concurrency` overrides it; defaults to 8 when
|
|
444
|
+
* neither is set.
|
|
445
|
+
*/
|
|
446
|
+
readonly maxConcurrency?: number;
|
|
447
|
+
/**
|
|
448
|
+
* Default per-eval timeout in milliseconds. An eval's own `timeoutMs`
|
|
449
|
+
* overrides it, and `eve eval --timeout` overrides both.
|
|
450
|
+
*/
|
|
451
|
+
readonly timeoutMs?: number;
|
|
452
|
+
}
|
|
453
|
+
/**
|
|
454
|
+
* Validated eval run configuration returned by `defineEvalConfig()`. The
|
|
455
|
+
* `_tag` literal brands the value so discovery can recognize it.
|
|
456
|
+
*/
|
|
457
|
+
export type EveEvalConfig = EveEvalConfigInput & {
|
|
458
|
+
readonly _tag: "EveEvalConfig";
|
|
459
|
+
};
|
|
385
460
|
export {};
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { ModelMessage } from "ai";
|
|
2
|
+
/**
|
|
3
|
+
* Re-applies framework-owned state preservation after the harness compacts
|
|
4
|
+
* message history, returning any messages to append to the compacted history.
|
|
5
|
+
*
|
|
6
|
+
* Runs the framework's built-in preservation steps:
|
|
7
|
+
* - resets read-before-write tracking, so a write after compaction re-reads
|
|
8
|
+
* the file whose read evidence was summarized away;
|
|
9
|
+
* - re-injects the todo list (when present), so the model keeps its task list.
|
|
10
|
+
*
|
|
11
|
+
* Must be called inside the harness step's `AlsContext`; both steps read
|
|
12
|
+
* durable context state.
|
|
13
|
+
*/
|
|
14
|
+
export declare function preserveFrameworkStateOnCompaction(): readonly ModelMessage[];
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{clearReadFileState}from"#runtime/framework-tools/file-state.js";import{getTodoCompactionMessage}from"#runtime/framework-tools/todo.js";function preserveFrameworkStateOnCompaction(){clearReadFileState();let e=getTodoCompactionMessage();return e===void 0?[]:[e]}export{preserveFrameworkStateOnCompaction};
|