eve 0.7.2 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/docs/public/evals/cases.mdx +1 -2
- package/dist/docs/public/evals/judge.mdx +1 -1
- package/dist/docs/public/evals/overview.mdx +2 -2
- package/dist/docs/public/evals/running.mdx +4 -9
- package/dist/docs/public/evals/targets.mdx +4 -23
- package/dist/src/cli/run.d.ts +0 -2
- package/dist/src/cli/run.js +1 -1
- package/dist/src/client/types.d.ts +0 -1
- package/dist/src/evals/cli/eval.d.ts +0 -2
- package/dist/src/evals/cli/eval.js +1 -1
- package/dist/src/evals/define-eval.d.ts +1 -1
- package/dist/src/evals/define-eval.js +1 -1
- package/dist/src/evals/index.d.ts +1 -2
- package/dist/src/evals/index.js +1 -1
- package/dist/src/evals/runner/artifacts.js +1 -1
- package/dist/src/evals/runner/execute-eval.d.ts +2 -3
- package/dist/src/evals/runner/execute-eval.js +1 -1
- package/dist/src/evals/runner/execute-task.d.ts +1 -2
- package/dist/src/evals/runner/execute-task.js +1 -1
- package/dist/src/evals/runner/reporters/console.js +1 -1
- package/dist/src/evals/runner/reporters/junit.js +3 -4
- package/dist/src/evals/runner/run-evals.d.ts +0 -1
- package/dist/src/evals/runner/run-evals.js +1 -1
- package/dist/src/evals/target.d.ts +1 -6
- package/dist/src/evals/target.js +1 -1
- package/dist/src/evals/types.d.ts +2 -18
- package/dist/src/execution/node-step.js +1 -1
- package/dist/src/internal/application/package.js +1 -1
- package/dist/src/internal/nitro/routes/agent-info/build-agent-info-response-from-manifest.js +1 -1
- package/dist/src/internal/nitro/routes/agent-info/build-agent-info-response.d.ts +0 -1
- package/dist/src/internal/nitro/routes/agent-info/build-agent-info-response.js +1 -1
- package/dist/src/runtime/agent/mock-model-adapter.d.ts +3 -6
- package/dist/src/runtime/agent/mock-model-adapter.js +2 -2
- package/dist/src/runtime/agent/resolve-model.d.ts +2 -2
- package/dist/src/runtime/agent/resolve-model.js +1 -1
- package/dist/src/setup/scaffold/create/add-to-project.js +1 -1
- package/dist/src/setup/scaffold/create/project.js +2 -2
- package/dist/src/setup/scaffold/update/channels.js +1 -1
- package/package.json +1 -1
- package/dist/src/evals/requirements.d.ts +0 -3
- package/dist/src/evals/requirements.js +0 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# eve
|
|
2
2
|
|
|
3
|
+
## 0.7.3
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 20aa34f: Remove the eval mock-model surface and the `requires` field. `eve eval` no
|
|
8
|
+
longer accepts `--mock-models` or `--no-skips`, `defineEval` no longer accepts
|
|
9
|
+
`requires`, and the `skipped` verdict, `EveEvalRequirement`, and
|
|
10
|
+
`EveEvalRequirementError` exports are gone. Evals now always run against a live
|
|
11
|
+
model target (local dev server or `--url`); `dispatchSchedule()` enforces dev
|
|
12
|
+
routes directly from the target capability. The deterministic mock-model
|
|
13
|
+
adapter remains internal to the test tiers (active only under `NODE_ENV=test`).
|
|
14
|
+
|
|
3
15
|
## 0.7.2
|
|
4
16
|
|
|
5
17
|
### Patch Changes
|
|
@@ -76,7 +76,6 @@ import { defineEval } from "eve/evals";
|
|
|
76
76
|
import { includes } from "eve/evals/expect";
|
|
77
77
|
|
|
78
78
|
export default defineEval({
|
|
79
|
-
requires: ["mockModels"],
|
|
80
79
|
async test(t) {
|
|
81
80
|
await t.send("My favorite word is marigold.");
|
|
82
81
|
const firstSessionId = t.sessionId;
|
|
@@ -108,7 +107,7 @@ Each `send` (and `respond`/`respondAll`) resolves to a turn whose `expectOk()` t
|
|
|
108
107
|
|
|
109
108
|
Events from every session are captured in the result and artifacts. `t.log(message)` records debug lines into the eval artifact; `--verbose` also streams them to stdout as evals run. `t.signal` is an `AbortSignal` that fires on timeout.
|
|
110
109
|
|
|
111
|
-
For driving sessions created outside the eval — by a channel webhook or a schedule — see [Targets
|
|
110
|
+
For driving sessions created outside the eval — by a channel webhook or a schedule — see [Targets](./targets).
|
|
112
111
|
|
|
113
112
|
## Datasets: exporting an array
|
|
114
113
|
|
|
@@ -91,4 +91,4 @@ A **string model id** (e.g. `"anthropic/claude-opus-4.8"`) routes through the Ve
|
|
|
91
91
|
|
|
92
92
|
- [Assertions](./assertions): deterministic run-level and value assertions
|
|
93
93
|
- [Reporters](./reporters): ship judged scores to Braintrust experiments
|
|
94
|
-
- [Targets
|
|
94
|
+
- [Targets](./targets): local vs remote targets for judge-backed evals
|
|
@@ -40,7 +40,7 @@ export default defineEval({
|
|
|
40
40
|
});
|
|
41
41
|
```
|
|
42
42
|
|
|
43
|
-
`test` is the only required field. The rest are optional: `description`, `
|
|
43
|
+
`test` is the only required field. The rest are optional: `description`, `judge`, `tags`, `metadata`, `timeoutMs`, `reporters`. The init template adds `evals/**/*.ts` to `tsconfig.json`, so your eval code type-checks alongside the app.
|
|
44
44
|
|
|
45
45
|
## `evals.config.ts`
|
|
46
46
|
|
|
@@ -108,7 +108,7 @@ The rest of this section covers each piece:
|
|
|
108
108
|
- [Cases](./cases): single-turn evals, scripted multi-turn evals, and dataset fan-out
|
|
109
109
|
- [Assertions](./assertions): run-level methods and `t.check` value assertions, with matchers and severity
|
|
110
110
|
- [Judge](./judge): LLM-as-judge grading and the judge model
|
|
111
|
-
- [Targets
|
|
111
|
+
- [Targets](./targets): local vs remote targets for the same eval files
|
|
112
112
|
- [Reporters](./reporters): Braintrust experiments and JUnit XML
|
|
113
113
|
- [Running evals](./running): the `eve eval` CLI, exit codes, and artifacts
|
|
114
114
|
|
|
@@ -9,10 +9,8 @@ description: "The eve eval CLI: flags, filters, exit codes, artifacts, and how t
|
|
|
9
9
|
eve eval # run all discovered evals locally
|
|
10
10
|
eve eval weather smoke # run selected evals (an id, or a directory prefix)
|
|
11
11
|
eve eval --url https://<app> # target a remote app instead of a local host
|
|
12
|
-
eve eval --mock-models # local dev target uses deterministic mock models
|
|
13
12
|
eve eval --tag fast # only evals carrying a tag
|
|
14
13
|
eve eval --strict # soft below-threshold assertions also fail the exit code
|
|
15
|
-
eve eval --no-skips # unmet requirements fail instead of skipping
|
|
16
14
|
eve eval --timeout 60000 # per-eval timeout in milliseconds
|
|
17
15
|
eve eval --max-concurrency 4 # cap concurrent eval executions (default 8)
|
|
18
16
|
eve eval --junit .eve/junit.xml # write JUnit XML
|
|
@@ -32,25 +30,22 @@ Positional ids match exactly or by directory prefix: `eve eval weather` runs `ev
|
|
|
32
30
|
| `1` | Any eval failed — a failed gate, an execution error, or a strict threshold miss |
|
|
33
31
|
| `2` | Configuration error |
|
|
34
32
|
|
|
35
|
-
Unmet [requirements](./targets) skip visibly without affecting the exit code unless you pass `--no-skips`.
|
|
36
|
-
|
|
37
33
|
## Artifacts
|
|
38
34
|
|
|
39
35
|
Each run drops artifacts under `.eve/evals/<timestamp>/`: a run `summary.json`, a `results.jsonl` index, and per-eval assertion results, verdicts, captured event streams, and `t.log` lines under `evals/`. The console output stays tight on purpose; when an eval fails, the artifact has the full story.
|
|
40
36
|
|
|
41
37
|
## CI
|
|
42
38
|
|
|
43
|
-
A solid CI invocation is strict
|
|
39
|
+
A solid CI invocation is strict and machine-reportable:
|
|
44
40
|
|
|
45
41
|
```bash
|
|
46
|
-
eve eval --strict --
|
|
42
|
+
eve eval --strict --junit .eve/junit.xml
|
|
47
43
|
```
|
|
48
44
|
|
|
49
45
|
- `--strict` turns soft threshold misses into failures, so score regressions block the merge.
|
|
50
|
-
- `--mock-models` keeps the default leg deterministic and credential-free. Put real-model evals in their own files gated on `requires: ["env:..."]`, and add `--no-skips` on legs that must prove those ran.
|
|
51
46
|
- `--junit` gives the CI provider per-eval annotations; upload the `.eve/evals/` directory as a failure artifact for the full event streams.
|
|
52
47
|
|
|
53
|
-
Against a deployed app,
|
|
48
|
+
Evals run against a live model, so the CI environment must provide the model-provider credentials. Against a deployed app, add `--url`:
|
|
54
49
|
|
|
55
50
|
```bash
|
|
56
51
|
eve eval --strict --url "$DEPLOY_URL" --junit .eve/junit.xml
|
|
@@ -58,6 +53,6 @@ eve eval --strict --url "$DEPLOY_URL" --junit .eve/junit.xml
|
|
|
58
53
|
|
|
59
54
|
## What to read next
|
|
60
55
|
|
|
61
|
-
- [Targets
|
|
56
|
+
- [Targets](./targets): what `--url` interacts with
|
|
62
57
|
- [Reporters](./reporters): Braintrust and JUnit output
|
|
63
58
|
- [CLI reference](../reference/cli): the rest of the `eve` CLI
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
|
-
title: "Targets
|
|
3
|
-
description: "Point evals at a local dev server or a deployment
|
|
2
|
+
title: "Targets"
|
|
3
|
+
description: "Point evals at a local dev server or a deployment with the same eval files."
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
An eval target is always an HTTP URL. `eve eval` starts a local dev server, while `eve eval --url <url>` runs against an existing server or deployment — the same eval files work for both, which is what makes evals usable as end-to-end tests in CI.
|
|
@@ -13,7 +13,6 @@ The runner polls `/eve/v1/health`, verifies `/eve/v1/info`, and exposes the live
|
|
|
13
13
|
import { defineEval } from "eve/evals";
|
|
14
14
|
|
|
15
15
|
export default defineEval({
|
|
16
|
-
requires: ["mockModels", "devRoutes"],
|
|
17
16
|
async test(t) {
|
|
18
17
|
const { sessionIds } = await t.target.dispatchSchedule("heartbeat");
|
|
19
18
|
await t.target.attachSession(sessionIds[0]!);
|
|
@@ -24,31 +23,13 @@ export default defineEval({
|
|
|
24
23
|
```
|
|
25
24
|
|
|
26
25
|
- `t.target.fetch(path, init)` performs an authenticated fetch against the target — useful for channel and webhook ingress.
|
|
27
|
-
- `t.target.dispatchSchedule(id)` triggers a [schedule](../schedules) through the dev-only schedule route and returns the session ids it created. It
|
|
26
|
+
- `t.target.dispatchSchedule(id)` triggers a [schedule](../schedules) through the dev-only schedule route and returns the session ids it created. It works only against a target with dev routes enabled (the local `eve eval` dev server, or a deployment running in development mode), and throws otherwise.
|
|
28
27
|
- `t.target.attachSession(sessionId, { startIndex? })` consumes one turn from a session created outside the eval — by a channel or a schedule — so its events feed the run-level assertions.
|
|
29
28
|
|
|
30
29
|
Sessions attached this way are full `EveEvalSession`s: you can keep driving them with `send` and read their event streams. The run-level assertions on `t` (`t.completed()`, `t.calledTool(...)`) read the whole run, attached sessions included.
|
|
31
30
|
|
|
32
|
-
## Requirements
|
|
33
|
-
|
|
34
|
-
Use `requires` to declare assumptions the runner verifies before executing an eval:
|
|
35
|
-
|
|
36
|
-
| Requirement | Means |
|
|
37
|
-
| -------------- | --------------------------------------------------------------------- |
|
|
38
|
-
| `"mockModels"` | `/eve/v1/info` reports the deterministic mock model adapter is active |
|
|
39
|
-
| `"devRoutes"` | `/eve/v1/info` reports dev-only routes are mounted |
|
|
40
|
-
| `"env:NAME"` | The eval process has environment variable `NAME` set |
|
|
41
|
-
|
|
42
|
-
Unmet requirements produce a visible `skipped` verdict and do not affect the exit code. Pass `--no-skips` when a CI leg must prove full coverage.
|
|
43
|
-
|
|
44
|
-
## Mock models
|
|
45
|
-
|
|
46
|
-
Deterministic evals — the kind you want in CI — should not depend on a live model. `eve eval --mock-models` starts the local dev server with deterministic authored models, and `requires: ["mockModels"]` makes the dependency explicit so the eval skips instead of flaking anywhere else.
|
|
47
|
-
|
|
48
|
-
`--mock-models` is invalid with `--url` because remote target capabilities are discovered, not set by the runner. For evals that genuinely need a real model — judging nuanced behavior, exercising provider-side tools — gate them on credentials instead (`requires: ["env:AI_GATEWAY_API_KEY"]`) and keep them in their own eval files so a tag filter can select or exclude them.
|
|
49
|
-
|
|
50
31
|
## What to read next
|
|
51
32
|
|
|
52
|
-
- [Running evals](./running): `--url
|
|
33
|
+
- [Running evals](./running): `--url` and the rest of the CLI in practice
|
|
53
34
|
- [Schedules](../schedules): the surface `dispatchSchedule` drives
|
|
54
35
|
- [Channels](../channels/overview): ingress you can exercise with `target.fetch`
|
package/dist/src/cli/run.d.ts
CHANGED
package/dist/src/cli/run.js
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
import{resolveInstalledPackageInfo}from"#internal/application/package.js";import{basename}from"node:path";import{createCliTheme,renderCliTaggedLine}from"#cli/ui/output.js";import{eveCliBanner}from"#cli/banner.js";import{Command,CommanderError,InvalidArgumentError}from"#compiled/commander/index.js";import{resolveApplicationRoot}from"#internal/application/paths.js";import{registerProjectCommands}from"#cli/commands/register-project-commands.js";import{parseDevelopmentServerUrl}from"#cli/dev/url.js";async function loadBuildHost(){return(await import(`#internal/nitro/host.js`)).buildApplication}async function loadPrintApplicationInfo(){return(await import(`#cli/commands/info.js`)).printApplicationInfo}async function loadRunDevelopmentTui(){return(await import(`#cli/dev/tui/tui.js`)).runDevelopmentTui}async function loadRunEvalCommand(){return(await import(`#evals/cli/eval.js`)).runEvalCommand}async function loadStartHost(){return(await import(`#internal/nitro/host.js`)).startDevelopmentServer}async function loadStartProductionHost(){return(await import(`#internal/nitro/host.js`)).startProductionServer}function shouldPrintCliBootBanner(e){return e.name()===`info`||e.name()===`dev`||e.name()===`init`}async function waitForShutdownSignal(e){await new Promise((t,n)=>{let r=!1,cleanup=()=>{process.off(`SIGINT`,handleSignal),process.off(`SIGTERM`,handleSignal)},handleSignal=()=>{r||(r=!0,cleanup(),e.close().then(t,n))};process.once(`SIGINT`,handleSignal),process.once(`SIGTERM`,handleSignal)})}async function waitForProductionServer(e){await Promise.race([e.wait(),waitForShutdownSignal({close:()=>e.close()})])}function parsePortOption(e){if(!/^-?\d+$/.test(e))throw new InvalidArgumentError(`Expected a numeric port, received "${e}".`);let t=Number(e);if(t<0||t>65535)throw new InvalidArgumentError(`Expected a port between 0 and 65535, received "${e}".`);return t}const DISPLAY_MODES=new Set([`full`,`collapsed`,`auto-collapsed`,`hidden`]),STATS_MODES=new Set([`tokens`,`tokensPerSecond`]),LOG_MODES=new Set([`all`,`stderr`,`none`]);function parseDisplayMode(e){if(!DISPLAY_MODES.has(e))throw new InvalidArgumentError(`Expected one of ${[...DISPLAY_MODES].join(`, `)}, received "${e}".`);return e}function parseStatsMode(e){if(!STATS_MODES.has(e))throw new InvalidArgumentError(`Expected one of ${[...STATS_MODES].join(`, `)}, received "${e}".`);return e}function parseLogsMode(e){if(!LOG_MODES.has(e))throw new InvalidArgumentError(`Expected one of ${[...LOG_MODES].join(`, `)}, received "${e}".`);return e}function parseContextSizeOption(e){let t=Number(e);if(!Number.isFinite(t)||t<=0)throw new InvalidArgumentError(`Expected a positive number, received "${e}".`);return t}function resolveDevUiMode(e){return e.options.ui===!1||!e.interactive?`headless`:`tui`}function resolveTuiTitle(e){if(e.name!==void 0&&e.name.length>0)return e.name;if(e.remoteServerUrl!==void 0)try{return new URL(e.remoteServerUrl).host}catch{return}let n=humanizeProjectName(basename(e.appRoot));return n.length>0?n:void 0}function humanizeProjectName(e){return e.replace(/[-_.]+/gu,` `).trim().split(/\s+/u).filter(e=>e.length>0).map(e=>e[0].toUpperCase()+e.slice(1)).join(` `)}function resolveTuiDisplayOptions(e){let t={reasoning:e.reasoning??`full`,tools:e.tools??`auto-collapsed`};return e.subagents!==void 0&&(t.subagents=e.subagents),e.connectionAuth!==void 0&&(t.connectionAuth=e.connectionAuth),e.assistantResponseStats!==void 0&&(t.assistantResponseStats=e.assistantResponseStats),e.contextSize!==void 0&&(t.contextSize=e.contextSize),e.logs!==void 0&&(t.logs=e.logs),t}function hasInteractiveTerminal(){return!!(process.stdin.isTTY&&process.stdout.isTTY)}function rewriteDevelopmentUrlShorthand(e){let t=e[1];return e[0]!==`dev`||e.length!==2||t===void 0||t.startsWith(`-`)?[...e]:[`dev`,`--url`,t]}function resolveRemoteDevelopmentServerUrl(e){if(e.url){if(e.host!==void 0)throw new InvalidArgumentError(`The --host option cannot be used with --url.`);if(e.port!==void 0)throw new InvalidArgumentError(`The --port option cannot be used with --url.`);if(e.ui===!1)throw new InvalidArgumentError(`The --no-ui option cannot be used with --url.`);return e.url}}function createCliProgram(t,o){let s=resolveApplicationRoot(),l=resolveInstalledPackageInfo().version,u=new Command,d=createCliTheme();u.name(`eve`).description(`Build and run an Eve application.`).version(l).showHelpAfterError().exitOverride().hook(`preAction`,(e,n)=>{shouldPrintCliBootBanner(n)&&t.log(eveCliBanner())}).configureOutput({writeErr:e=>{t.error(e.trimEnd())},writeOut:e=>{t.log(e.trimEnd())}});let f=u.command(`channels`).description(`Manage user-authored channels in the current project.`);return f.command(`add [kind]`).description(`Add channels interactively, or scaffold a channel kind (slack | web).`).option(`-f, --force`,`Overwrite existing channel files`).option(`-y, --yes`,`Assume yes for confirmations; requires an explicit channel kind`).action(async(e,n)=>{let{runChannelsAddCommand:r}=await import(`#cli/commands/channels.js`);await r(t,s,{kind:e,options:n})}),f.command(`list`).description(`List user-authored channels in the current project.`).option(`--json`,`Output as JSON`).action(async e=>{let{runChannelsListCommand:n}=await import(`#cli/commands/channels.js`);await n(t,s,e)}),u.command(`init <target>`).description(`Create a new Eve agent, or add one to an existing project directory.`).option(`--channel-web-nextjs`,`Add the Web Chat application (Next.js)`).action(async(e,n)=>{let{runInitCommand:r}=await import(`#cli/commands/init.js`);await r(t,s,e,n)}),registerProjectCommands({program:u,logger:t,appRoot:s}),u.command(`build`).description(`Build the current Eve application.`).action(async()=>{let{loadDevelopmentEnvironmentFiles:e}=await import(`#cli/dev/environment.js`);e(s);let n=await(o.buildHost??await loadBuildHost())(s);t.log(renderCliTaggedLine(d,{message:`built output at ${n}`,tag:`build`,tone:`success`}))}),u.command(`start`).description(`Start a built Eve application.`).option(`--host <host>`,`Host interface to bind`).option(`--port <port>`,`Port to listen on (defaults to $PORT, then 3000)`,parsePortOption).action(async e=>{let{loadDevelopmentEnvironmentFiles:n}=await import(`#cli/dev/environment.js`);n(s);let i=await(o.startProductionHost??await loadStartProductionHost())(s,{host:e.host,port:e.port});t.log(renderCliTaggedLine(d,{message:`server listening at ${i.url}`,tag:`start`,tone:`success`})),await waitForProductionServer(i)}),u.command(`dev`).description(`Start the Eve development server or connect to an existing URL.`).option(`--host <host>`,`Host interface to bind`).option(`--port <port>`,`Port to listen on (defaults to $PORT, then 3000)`,parsePortOption).option(`-u, --url <url>`,`Connect to an existing server URL`,parseDevelopmentServerUrl).option(`--no-ui`,`Start the server without an interactive UI`).option(`--name <name>`,`Title shown in the terminal UI (defaults to the app folder name)`).option(`--tools <mode>`,`How tool calls render: full | collapsed | auto-collapsed | hidden`,parseDisplayMode).option(`--reasoning <mode>`,`How reasoning renders: full | collapsed | auto-collapsed | hidden`,parseDisplayMode).option(`--subagents <mode>`,`How subagent sections render: full | collapsed | auto-collapsed | hidden`,parseDisplayMode).option(`--connection-auth <mode>`,`How connection authorization renders: full | collapsed | auto-collapsed | hidden`,parseDisplayMode).option(`--assistant-response-stats <mode>`,`Assistant header statistic: tokens | tokensPerSecond`,parseStatsMode).option(`--context-size <tokens>`,`Model context window size, shown as a usage percentage`,parseContextSizeOption).option(`--logs <mode>`,`Which server/agent logs to show: all | stderr | none`,parseLogsMode).addHelpText(`after`,`
|
|
2
2
|
You can also pass a bare URL as the only argument, for example: eve dev https://example.com
|
|
3
|
-
`).action(async e=>{let n=resolveRemoteDevelopmentServerUrl(e),{loadDevelopmentEnvironmentFiles:i}=await import(`#cli/dev/environment.js`);i(s);let runInteractiveUi=async r=>{t.log(``);let i=o.runDevelopmentTui??await loadRunDevelopmentTui(),a=resolveTuiDisplayOptions(e),c=resolveTuiTitle({name:e.name,remoteServerUrl:n,appRoot:s});c!==void 0&&(a.name=c),await i(n===void 0?{serverUrl:r,appRoot:s,...a}:{serverUrl:r,...a})};if(n){if(t.log(renderCliTaggedLine(d,{message:`connecting to ${n}`,tag:`dev`,tone:`info`})),resolveDevUiMode({options:e,interactive:hasInteractiveTerminal()})===`headless`){t.log(renderCliTaggedLine(d,{message:`Interactive UI disabled because the current terminal is not a TTY.`,tag:`dev`,tone:`warning`}));return}await runInteractiveUi(n);return}let a=await(o.startHost??await loadStartHost())(s,{host:e.host,port:e.port}),c=!1,closeServer=async()=>{c||(c=!0,await a.close())};try{let n=hasInteractiveTerminal(),i=resolveDevUiMode({options:e,interactive:n});if(i!==`tui`&&t.log(renderCliTaggedLine(d,{message:`server listening at ${a.url}`,tag:`dev`,tone:`success`})),i===`headless`)return e.ui!==!1&&!n&&t.log(renderCliTaggedLine(d,{message:`Interactive UI disabled because the current terminal is not a TTY.`,tag:`dev`,tone:`warning`})),await waitForShutdownSignal({close:closeServer});await runInteractiveUi(a.url)}finally{await closeServer()}}),u.command(`info`).description(`Print resolved application information.`).option(`--json`,`Output as JSON`).action(async e=>{await(o.printApplicationInfo??await loadPrintApplicationInfo())(t,s,e)}),u.command(`eval`).description(`Run evals against an Eve agent.`).argument(`[evalIds...]`,`Eval ids (or directory prefixes) to run (all discovered evals when omitted)`).option(`--url <url>`,`Remote agent URL (skip local host startup)`).option(`--
|
|
3
|
+
`).action(async e=>{let n=resolveRemoteDevelopmentServerUrl(e),{loadDevelopmentEnvironmentFiles:i}=await import(`#cli/dev/environment.js`);i(s);let runInteractiveUi=async r=>{t.log(``);let i=o.runDevelopmentTui??await loadRunDevelopmentTui(),a=resolveTuiDisplayOptions(e),c=resolveTuiTitle({name:e.name,remoteServerUrl:n,appRoot:s});c!==void 0&&(a.name=c),await i(n===void 0?{serverUrl:r,appRoot:s,...a}:{serverUrl:r,...a})};if(n){if(t.log(renderCliTaggedLine(d,{message:`connecting to ${n}`,tag:`dev`,tone:`info`})),resolveDevUiMode({options:e,interactive:hasInteractiveTerminal()})===`headless`){t.log(renderCliTaggedLine(d,{message:`Interactive UI disabled because the current terminal is not a TTY.`,tag:`dev`,tone:`warning`}));return}await runInteractiveUi(n);return}let a=await(o.startHost??await loadStartHost())(s,{host:e.host,port:e.port}),c=!1,closeServer=async()=>{c||(c=!0,await a.close())};try{let n=hasInteractiveTerminal(),i=resolveDevUiMode({options:e,interactive:n});if(i!==`tui`&&t.log(renderCliTaggedLine(d,{message:`server listening at ${a.url}`,tag:`dev`,tone:`success`})),i===`headless`)return e.ui!==!1&&!n&&t.log(renderCliTaggedLine(d,{message:`Interactive UI disabled because the current terminal is not a TTY.`,tag:`dev`,tone:`warning`})),await waitForShutdownSignal({close:closeServer});await runInteractiveUi(a.url)}finally{await closeServer()}}),u.command(`info`).description(`Print resolved application information.`).option(`--json`,`Output as JSON`).action(async e=>{await(o.printApplicationInfo??await loadPrintApplicationInfo())(t,s,e)}),u.command(`eval`).description(`Run evals against an Eve agent.`).argument(`[evalIds...]`,`Eval ids (or directory prefixes) to run (all discovered evals when omitted)`).option(`--url <url>`,`Remote agent URL (skip local host startup)`).option(`--tag <tag...>`,`Run only evals carrying a tag`).option(`--strict`,`Fail the exit code when any score falls below its threshold`).option(`--list`,`Print discovered evals without running them`).option(`--timeout <ms>`,`Per-eval timeout in milliseconds`).option(`--max-concurrency <n>`,`Max concurrent eval executions`).option(`--json`,`Output results as JSON`).option(`--junit <path>`,`Write JUnit XML results to a file`).option(`--skip-report`,`Skip eval-defined reporters (e.g. Braintrust)`).option(`--verbose`,`Stream per-eval ctx.log lines to stdout`).action(async(e,n)=>{await(o.runEvalCommand??await loadRunEvalCommand())(e,n,t)}),u}async function runCli(e=process.argv.slice(2),t=console,n={}){let r=createCliProgram(t,n),i=e.length===0?[`dev`]:rewriteDevelopmentUrlShorthand(e);try{await r.parseAsync(i,{from:`user`})}catch(e){if(e instanceof CommanderError){if(e.exitCode===0)return;throw Error(e.message)}throw e}}export{resolveDevUiMode,resolveTuiDisplayOptions,resolveTuiTitle,runCli};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{basename,join}from"node:path";import{readFile}from"node:fs/promises";import{resolveApplicationRoot}from"#internal/application/paths.js";import{loadDevelopmentEnvironmentFiles}from"#cli/dev/environment.js";import{startDevelopmentServer}from"#internal/nitro/host.js";import{
|
|
1
|
+
import{basename,join}from"node:path";import{readFile}from"node:fs/promises";import{resolveApplicationRoot}from"#internal/application/paths.js";import{loadDevelopmentEnvironmentFiles}from"#cli/dev/environment.js";import{startDevelopmentServer}from"#internal/nitro/host.js";import{createEvalClient}from"#evals/cli/eval-client.js";import{discoverAndImportEvals,discoverEvalConfig}from"#evals/runner/discover.js";import{runEvals}from"#evals/runner/run-evals.js";import{ConsoleReporter}from"#evals/runner/reporters/console.js";import{JUnit}from"#evals/runner/reporters/junit.js";import{resolveEvalTargetHandle}from"#evals/target.js";async function runEvalCommand(e,t,n){let s=resolveApplicationRoot();loadDevelopmentEnvironmentFiles(s);let c=e.length>0?e:void 0,l=await discoverAndImportEvals(s,c);if(l.length===0){c?n.error(`No evals found matching: ${c.join(`, `)}`):n.error(`No evals found. Create files under evals/ with the *.eval.ts extension.`),process.exitCode=2;return}let u=filterEvalsByTag(l,t.tag??[]);if(u.length===0){n.error(`No evals matched the provided tags (${(t.tag??[]).join(`, `)}).`),process.exitCode=2;return}let d,f;try{d=parsePositiveInteger(t.maxConcurrency,`--max-concurrency`),f=parseNonNegativeInteger(t.timeout,`--timeout`)}catch(e){n.error(e instanceof Error?e.message:String(e)),process.exitCode=2;return}if(t.list===!0){printEvalList(u,t.json===!0,n);return}let p;try{p=await discoverEvalConfig(s)}catch(e){n.error(e instanceof Error?e.message:String(e)),process.exitCode=2;return}let m,h;try{t.url?h=await resolveEvalTargetHandle({client:createEvalClient({kind:`remote`,url:t.url}),expectedAgentName:await readExpectedAgentName(s),kind:`remote`,url:t.url}):(m=await startDevelopmentServer(s,{host:`127.0.0.1`,port:0}),h=await resolveEvalTargetHandle({client:createEvalClient({kind:`local`,url:m.url}),expectedAgentName:await readExpectedAgentName(s),kind:`local`,url:m.url}));let e=createEvalClient(h),r=t.json===!0?[]:[new ConsoleReporter];t.junit!==void 0&&r.push(JUnit({filePath:t.junit}));let i=await runEvals({evaluations:u,config:p,target:h,client:e,appRoot:s,reporters:r,includeEvalReporters:t.skipReport!==!0,maxConcurrency:d,timeoutMs:f,onEvalLog:t.verbose===!0?(e,t)=>n.log(`[${e}] ${t}`):void 0});t.json&&n.log(JSON.stringify(i,null,2));let a=i.failed>0,o=t.strict===!0&&i.scored>0;(a||o)&&(process.exitCode=1)}finally{m&&await m.close()}let g=typeof process.exitCode==`number`?process.exitCode:0;process.exit(g)}function parsePositiveInteger(e,t){if(e===void 0)return;let n=Number(e);if(!Number.isInteger(n)||n<1)throw Error(`${t} must be a positive integer; got "${e}".`);return n}function parseNonNegativeInteger(e,t){if(e===void 0)return;let n=Number(e);if(!Number.isInteger(n)||n<0)throw Error(`${t} must be a non-negative integer; got "${e}".`);return n}function filterEvalsByTag(e,t){return t.length===0?[...e]:e.filter(e=>e.tags?.some(e=>t.includes(e))??!1)}function printEvalList(e,t,n){if(t){let t=e.map(e=>({id:e.id,description:e.description,tags:e.tags}));n.log(JSON.stringify(t,null,2));return}for(let t of e){let e=t.description===void 0?``:` — ${t.description}`,r=t.tags!==void 0&&t.tags.length>0?` [${t.tags.join(`, `)}]`:``;n.log(`${t.id}${r}${e}`)}}async function readExpectedAgentName(r){try{let i=JSON.parse(await readFile(join(r,`package.json`),`utf8`));return typeof i.name==`string`&&i.name.length>0?i.name:basename(r)}catch{return basename(r)}}export{runEvalCommand};
|
|
@@ -15,6 +15,6 @@ import type { EveEvalDefinition, EveEvalInput } from "#evals/types.js";
|
|
|
15
15
|
*
|
|
16
16
|
* Throws on invalid input: a missing `test` function, a removed legacy key
|
|
17
17
|
* (`input`/`run`/`checks`/`scores`/`expected`/`thresholds`/`parseOutput`/
|
|
18
|
-
* `model`), or a negative or non-finite `timeoutMs`.
|
|
18
|
+
* `model`/`requires`), or a negative or non-finite `timeoutMs`.
|
|
19
19
|
*/
|
|
20
20
|
export declare function defineEval(input: EveEvalInput): EveEvalDefinition;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
function defineEval(e){return validateEvalInput(e),{...e,_tag:`EveEval`}}function validateEvalInput(e){if(`id`in e)throw Error("Eval must not specify `id`. Eval identity is derived from the file path under evals/.");if(`name`in e)throw Error("Eval must not specify `name`. Eval identity is derived from the file path under evals/.");if(rejectLegacyKey(e,`input`,"Send the prompt inside `test`: `async test(t) { await t.send(...) }`."),rejectLegacyKey(e,`run`,"Rename `run` to `test`; it receives the same context `t`."),rejectLegacyKey(e,`checks`,"Assert inline inside `test` (e.g. `t.completed()`, `t.calledTool(...)`)."),rejectLegacyKey(e,`scores`,"Use soft assertions inside `test`: `t.check(...).atLeast(n)` or `t.judge.autoevals.*`."),rejectLegacyKey(e,`expected`,"Pass the reference value to the assertion (e.g. `t.check(t.reply, includes(value))`)."),rejectLegacyKey(e,`thresholds`,"Put the threshold on the assertion: `.atLeast(n)`."),rejectLegacyKey(e,`parseOutput`,"Read the value you want inside `test` and assert on it directly."),rejectLegacyKey(e,`model`,"Rename `model` to `judge: { model }`."),rejectLegacyKey(e,`modelOptions`,"Move it under `judge: { model, modelOptions }`."),rejectLegacyKey(e,`cases`,"Each eval file is one case; default-export an array of `defineEval(...)` for datasets."),typeof e.test!=`function`)throw Error("Eval requires a `test(t)` function.");if(
|
|
1
|
+
function defineEval(e){return validateEvalInput(e),{...e,_tag:`EveEval`}}function validateEvalInput(e){if(`id`in e)throw Error("Eval must not specify `id`. Eval identity is derived from the file path under evals/.");if(`name`in e)throw Error("Eval must not specify `name`. Eval identity is derived from the file path under evals/.");if(rejectLegacyKey(e,`input`,"Send the prompt inside `test`: `async test(t) { await t.send(...) }`."),rejectLegacyKey(e,`run`,"Rename `run` to `test`; it receives the same context `t`."),rejectLegacyKey(e,`checks`,"Assert inline inside `test` (e.g. `t.completed()`, `t.calledTool(...)`)."),rejectLegacyKey(e,`scores`,"Use soft assertions inside `test`: `t.check(...).atLeast(n)` or `t.judge.autoevals.*`."),rejectLegacyKey(e,`expected`,"Pass the reference value to the assertion (e.g. `t.check(t.reply, includes(value))`)."),rejectLegacyKey(e,`thresholds`,"Put the threshold on the assertion: `.atLeast(n)`."),rejectLegacyKey(e,`parseOutput`,"Read the value you want inside `test` and assert on it directly."),rejectLegacyKey(e,`model`,"Rename `model` to `judge: { model }`."),rejectLegacyKey(e,`modelOptions`,"Move it under `judge: { model, modelOptions }`."),rejectLegacyKey(e,`cases`,"Each eval file is one case; default-export an array of `defineEval(...)` for datasets."),rejectLegacyKey(e,`requires`,`Point real-model evals at credentialed targets directly; dev-only routes are enforced from the live target.`),typeof e.test!=`function`)throw Error("Eval requires a `test(t)` function.");if(e.timeoutMs!==void 0&&(e.timeoutMs<0||!Number.isFinite(e.timeoutMs)))throw Error("Eval `timeoutMs` must be a non-negative finite number.")}function rejectLegacyKey(e,t,n){if(t in e)throw Error(`Eval \`${t}\` is no longer supported. ${n}`)}export{defineEval};
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
export { defineEval } from "#evals/define-eval.js";
|
|
2
2
|
export { defineEvalConfig } from "#evals/define-eval-config.js";
|
|
3
3
|
export { EveEvalTurnFailedError } from "#evals/session.js";
|
|
4
|
-
export { EveEvalRequirementError } from "#evals/target.js";
|
|
5
4
|
export type { RuntimeIdentity } from "#protocol/message.js";
|
|
6
5
|
export type { InputRequest } from "#runtime/input/types.js";
|
|
7
6
|
export type { EveEvalValueMatcher, EveEvalToolCallMatchOptions, EveEvalSubagentCallMatchOptions, } from "#evals/match.js";
|
|
8
|
-
export type { Assertion, AssertionHandle, AssertionResult, AssertionSeverity, AutoevalsJudges, EveEvalContext, EveEvalDerivedFacts, EveEvalJudgeConfig, EveEvalRunSummary, EveEvalSession, EveEvalSessionResult,
|
|
7
|
+
export type { Assertion, AssertionHandle, AssertionResult, AssertionSeverity, AutoevalsJudges, EveEvalContext, EveEvalDerivedFacts, EveEvalJudgeConfig, EveEvalRunSummary, EveEvalSession, EveEvalSessionResult, EveEvalScheduleDispatchResult, EveEvalSubagentCall, EveEval, EveEvalConfig, EveEvalConfigInput, EveEvalDefinition, EveEvalInput, EveEvalResult, EveEvalTarget, EveEvalTargetCapabilities, EveEvalTargetHandle, EveEvalTaskResult, EveEvalToolCall, EveEvalTurn, EveEvalVerdict, JudgeContext, JudgeOpts, } from "#evals/types.js";
|
package/dist/src/evals/index.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
import{
|
|
1
|
+
import{EveEvalTurnFailedError}from"#evals/session.js";import{defineEval}from"#evals/define-eval.js";import{defineEvalConfig}from"#evals/define-eval-config.js";export{EveEvalTurnFailedError,defineEval,defineEvalConfig};
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
import{dirname,join}from"node:path";import{mkdir,writeFile}from"node:fs/promises";function resolveArtifactDirectory(e){return join(e,`.eve`,`evals`,new Date().toISOString().replace(/[:.]/g,`-`).slice(0,19))}async function writeArtifacts(t,n){let r=join(t,`evals`);await mkdir(r,{recursive:!0}),await writeFile(join(t,`summary.json`),JSON.stringify(buildSummaryArtifact(n),null,2));let i=n.results.map(e=>JSON.stringify(buildResultLine(e))).join(`
|
|
2
2
|
`);await writeFile(join(t,`results.jsonl`),`${i}\n`),await Promise.all(n.results.map(async t=>{let n=join(r,`${sanitizeArtifactPath(t.id)}.json`);await mkdir(dirname(n),{recursive:!0}),await writeFile(n,JSON.stringify(buildEvalArtifact(t),null,2));let i=t.result.events.map(e=>JSON.stringify(e)).join(`
|
|
3
|
-
`);await writeFile(join(r,`${sanitizeArtifactPath(t.id)}.events.ndjson`),`${i}\n`)}))}function buildSummaryArtifact(e){return{target:e.target,startedAt:e.startedAt,completedAt:e.completedAt,passed:e.passed,failed:e.failed,scored:e.scored,
|
|
3
|
+
`);await writeFile(join(r,`${sanitizeArtifactPath(t.id)}.events.ndjson`),`${i}\n`)}))}function buildSummaryArtifact(e){return{target:e.target,startedAt:e.startedAt,completedAt:e.completedAt,passed:e.passed,failed:e.failed,scored:e.scored,errored:e.errored,totalEvals:e.results.length,evals:e.results.map(e=>({id:e.id,verdict:e.verdict,status:e.result.status,assertions:e.assertions.map(e=>({name:e.name,score:e.score,severity:e.severity,passed:e.passed})),error:e.error}))}}function buildResultLine(e){return{id:e.id,verdict:e.verdict,status:e.result.status,output:e.result.output,assertions:e.assertions,error:e.error}}function buildEvalArtifact(e){return{id:e.id,result:{output:e.result.output,finalMessage:e.result.finalMessage,sessionId:e.result.sessionId,status:e.result.status,logs:e.result.logs,derived:e.result.derived,sessions:e.result.sessions},verdict:e.verdict,assertions:e.assertions,error:e.error}}function sanitizeArtifactPath(e){return e.split(`/`).map(e=>e.replace(/[^a-zA-Z0-9_-]/g,`_`)).join(`/`)}export{resolveArtifactDirectory,writeArtifacts};
|
|
@@ -5,7 +5,6 @@ import type { EveEval, EveEvalResult, EveEvalTargetHandle } from "#evals/types.j
|
|
|
5
5
|
*/
|
|
6
6
|
export interface ExecuteEvalOptions {
|
|
7
7
|
readonly evaluation: EveEval;
|
|
8
|
-
readonly failOnSkip?: boolean;
|
|
9
8
|
/** Receives `t.log` lines as the eval runs (used by `--verbose`). */
|
|
10
9
|
readonly onLog?: (message: string) => void;
|
|
11
10
|
readonly target: EveEvalTargetHandle;
|
|
@@ -19,7 +18,7 @@ export interface ExecuteEvalOptions {
|
|
|
19
18
|
readonly client: Client;
|
|
20
19
|
}
|
|
21
20
|
/**
|
|
22
|
-
* Executes one eval end to end:
|
|
23
|
-
*
|
|
21
|
+
* Executes one eval end to end: runs `test(t)`, collects its assertions, and
|
|
22
|
+
* computes the verdict.
|
|
24
23
|
*/
|
|
25
24
|
export declare function executeEval(options: ExecuteEvalOptions): Promise<EveEvalResult>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{toErrorMessage}from"#shared/errors.js";import{createEmptyDerivedFacts}from"#evals/runner/derive-run-facts.js";import{executeTask}from"#evals/runner/execute-task.js";import{
|
|
1
|
+
import{toErrorMessage}from"#shared/errors.js";import{createEmptyDerivedFacts}from"#evals/runner/derive-run-facts.js";import{executeTask}from"#evals/runner/execute-task.js";import{computeEvalVerdict}from"#evals/runner/verdict.js";async function executeEval(n){let{evaluation:r,target:i,client:a}=n,o=new Date().toISOString(),s,c=[],l;try{let e=await executeTask({client:a,evaluation:r,onLog:n.onLog,target:i,timeoutMs:n.timeoutMs??r.timeoutMs});s=e.result,c=e.assertions,l=e.error}catch(t){l=toErrorMessage(t),s={output:null,finalMessage:null,status:`failed`,events:[],derived:createEmptyDerivedFacts()}}let u=computeEvalVerdict({error:l,assertions:c});return{id:r.id,result:s,assertions:c,verdict:u,error:l,startedAt:o,completedAt:new Date().toISOString()}}export{executeEval};
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { Client } from "#client/client.js";
|
|
2
|
-
import type { AssertionResult, EveEval,
|
|
2
|
+
import type { AssertionResult, EveEval, EveEvalTargetHandle, EveEvalTaskResult } from "#evals/types.js";
|
|
3
3
|
/**
|
|
4
4
|
* Options for executing one eval's task.
|
|
5
5
|
*/
|
|
@@ -8,7 +8,6 @@ interface ExecuteTaskOptions {
|
|
|
8
8
|
readonly evaluation: EveEval;
|
|
9
9
|
/** Receives each `t.log` line as it is written (used by `--verbose`). */
|
|
10
10
|
readonly onLog?: (message: string) => void;
|
|
11
|
-
readonly requirements?: readonly EveEvalRequirement[];
|
|
12
11
|
readonly target: EveEvalTargetHandle;
|
|
13
12
|
readonly timeoutMs?: number;
|
|
14
13
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{toErrorMessage}from"#shared/errors.js";import{scopeEvalTargetHandle}from"#evals/target.js";import{EvalSessionManager}from"#evals/session.js";import{createEmptyDerivedFacts}from"#evals/runner/derive-run-facts.js";import{createEvalContext}from"#evals/context.js";async function executeTask(r){let{client:a,evaluation:o,target:s,timeoutMs:c}=r,l=c===void 0?neverAbortSignal():AbortSignal.timeout(c),u=new EvalSessionManager({client:a,signal:l}),d=scopeEvalTargetHandle(s,{
|
|
1
|
+
import{toErrorMessage}from"#shared/errors.js";import{scopeEvalTargetHandle}from"#evals/target.js";import{EvalSessionManager}from"#evals/session.js";import{createEmptyDerivedFacts}from"#evals/runner/derive-run-facts.js";import{createEvalContext}from"#evals/context.js";async function executeTask(r){let{client:a,evaluation:o,target:s,timeoutMs:c}=r,l=c===void 0?neverAbortSignal():AbortSignal.timeout(c),u=new EvalSessionManager({client:a,signal:l}),d=scopeEvalTargetHandle(s,{sessions:u}),f=[],{context:p,collector:m}=createEvalContext({manager:u,target:d,signal:l,judge:o.judge,log:e=>{f.push(e),r.onLog?.(e)}}),h;try{await o.test(p)}catch(t){h=toErrorMessage(t)}let g=buildTaskResult({logs:f,sessions:u.snapshots(),turn:u.lastTurnSession()?.lastTurn});return{result:g,assertions:await m.finalize(g),error:h}}function buildTaskResult(e){let t=e.sessions.flatMap(e=>e.events),n=e.turn?.message??null;return{output:n,finalMessage:n,sessionId:selectPrimarySessionId(e.sessions),status:e.turn?.status??`completed`,events:t,logs:e.logs,derived:combineDerivedFacts(e.sessions),sessions:e.sessions,runtimeIdentity:extractRuntimeIdentity(t)}}function combineDerivedFacts(e){if(e.length===0)return createEmptyDerivedFacts();let t=e.flatMap(e=>e.derived.toolCalls),n=e.flatMap(e=>e.derived.subagentCalls),i=e.flatMap(e=>e.derived.inputRequests),a=e.find(e=>e.derived.failureCode!==void 0)?.derived.failureCode;return{toolCalls:t,toolCallCount:t.length,subagentCalls:n,subagentCallCount:n.length,inputRequests:i,parked:e.some(e=>e.derived.parked),messageCount:sum(e,e=>e.derived.messageCount),reasoningBlockCount:sum(e,e=>e.derived.reasoningBlockCount),failureCode:a}}function selectPrimarySessionId(e){return e.find(e=>e.primary)?.sessionId??e[0]?.sessionId}function extractRuntimeIdentity(e){for(let t of e)if(t.type===`session.started`&&t.data.runtime!==void 0)return t.data.runtime}function sum(e,t){return e.reduce((e,n)=>e+t(n),0)}function neverAbortSignal(){return new AbortController().signal}export{executeTask};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import picocolors from"#compiled/picocolors/index.js";var ConsoleReporter=class{#e;#t;constructor(t){this.#e=t?.log??console.log,this.#t=picocolors.createColors(t?.color??!!process.stdout.isTTY)}onRunStart(e,t){this.#e(``),this.#e(`${this.#t.bold(this.#t.cyan(`EVALS`))} ${this.#t.bold(String(e.length))}`),this.#e(`${this.#t.dim(`target`)} ${t.kind===`local`?this.#t.green(t.url):this.#t.blue(t.url)}`),this.#e(``)}onEvalComplete(e){let{assertions:t,verdict:n,error:r}=e,i=t.filter(e=>e.severity===`gate`),a=t.filter(e=>e.severity===`soft`),o=this.#n(n),s=i.length>0?this.#r(i.filter(e=>e.passed).length,i.length):``,c=a.map(e=>this.#i(e.name,e.score)).join(` `),l=[o,this.#t.dim(e.id),s,c].filter(Boolean).join(` `);this.#e(l)
|
|
1
|
+
import picocolors from"#compiled/picocolors/index.js";var ConsoleReporter=class{#e;#t;constructor(t){this.#e=t?.log??console.log,this.#t=picocolors.createColors(t?.color??!!process.stdout.isTTY)}onRunStart(e,t){this.#e(``),this.#e(`${this.#t.bold(this.#t.cyan(`EVALS`))} ${this.#t.bold(String(e.length))}`),this.#e(`${this.#t.dim(`target`)} ${t.kind===`local`?this.#t.green(t.url):this.#t.blue(t.url)}`),this.#e(``)}onEvalComplete(e){let{assertions:t,verdict:n,error:r}=e,i=t.filter(e=>e.severity===`gate`),a=t.filter(e=>e.severity===`soft`),o=this.#n(n),s=i.length>0?this.#r(i.filter(e=>e.passed).length,i.length):``,c=a.map(e=>this.#i(e.name,e.score)).join(` `),l=[o,this.#t.dim(e.id),s,c].filter(Boolean).join(` `);this.#e(l);for(let e of t){if(e.passed)continue;let t=e.message===void 0?``:`: ${e.message}`;this.#e(` ${this.#t.red(`✗ ${e.name}${t}`)}`)}r&&this.#e(` ${this.#t.red(r)}`)}onRunComplete(e){this.#e(``);let{passed:t,failed:n,scored:r,results:i}=e,a=i.length,o=[];t>0&&o.push(this.#t.green(`${t} passed`)),n>0&&o.push(this.#t.red(`${n} failed`)),r>0&&o.push(this.#t.yellow(`${r} scored`)),o.length===0&&o.push(this.#t.dim(`0 evals`)),this.#e(`${this.#t.bold(`Results:`)} ${o.join(`, `)} ${this.#t.dim(`(${a} total)`)}`);let s=this.#a(i);if(s.total>0){let e=this.#t.green(`${s.passed} passed`),t=s.failed>0?`, ${this.#t.red(`${s.failed} failed`)}`:``;this.#e(`${this.#t.bold(`Gates:`)} ${e}${t}`)}let c=this.#o(i);if(c.length>0){this.#e(``);for(let{name:e,avg:t,count:n}of c){let r=this.#i(e,t);this.#e(` ${r} ${this.#t.dim(`(${n} evals)`)}`)}}let l=computeDurationMs(e.startedAt,e.completedAt);this.#e(``),this.#e(this.#t.dim(`Completed in ${formatDuration(l)}`)),this.#e(``)}#n(e){switch(e){case`passed`:return this.#t.green(`✓`);case`failed`:return this.#t.red(`✗`);case`scored`:return this.#t.yellow(`○`)}}#r(e,t){let n=`gates ${e}/${t}`;return e===t?this.#t.green(n):this.#t.red(n)}#i(e,t){let n=`${e}: ${Math.round(t*100)}%`;return t===1?this.#t.green(n):t===0?this.#t.red(n):this.#t.yellow(n)}#a(e){let t=0,n=0;for(let r of e)for(let e of gatesOf(r))e.passed?t+=1:n+=1;return{passed:t,failed:n,total:t+n}}#o(e){let t=new Map;for(let n of e)for(let e of n.assertions){if(e.severity!==`soft`)continue;let n=t.get(e.name);n?(n.sum+=e.score,n.count+=1):t.set(e.name,{sum:e.score,count:1})}return[...t.entries()].map(([e,{sum:t,count:n}])=>({name:e,avg:t/n,count:n}))}};function gatesOf(e){return e.assertions.filter(e=>e.severity===`gate`)}function computeDurationMs(e,t){return new Date(t).getTime()-new Date(e).getTime()}function formatDuration(e){return e<1e3?`${e}ms`:e<6e4?`${(e/1e3).toFixed(1)}s`:`${Math.floor(e/6e4)}m ${(e%6e4/1e3).toFixed(0)}s`}export{ConsoleReporter};
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import{dirname}from"node:path";import{mkdir,writeFile}from"node:fs/promises";function JUnit(e){return new JUnitReporter(e)}var JUnitReporter=class{#e;constructor(e){this.#e=e}onRunStart(){}onEvalComplete(){}async onRunComplete(r){let i=renderJUnit(r,{suiteName:this.#e.suiteName});await mkdir(dirname(this.#e.filePath),{recursive:!0}),await writeFile(this.#e.filePath,i)}};function renderJUnit(e,t){let n=e.failed+e.scored,r=e.results.map(renderTestCase);return[`<?xml version="1.0" encoding="UTF-8"?>`,`<testsuite name="${escapeXml(t.suiteName??`eve evals`)}" tests="${e.results.length}" failures="${n}" skipped="
|
|
2
|
-
`)}function renderTestCase(e){let t=`classname="eve.eval" name="${escapeXml(e.id)}" time="${formatSeconds(durationSeconds(e))}"`;if(e.verdict===`passed`)return` <testcase ${t}/>`;
|
|
3
|
-
`);let
|
|
4
|
-
`)}function buildFailureDetail(e){return{verdict:e.verdict,error:e.error,assertions:e.assertions,logs:e.result.logs}}function failureMessage(e){if(e.error!==void 0)return e.error;let t=e.assertions.find(e=>!e.passed);return t===void 0?e.verdict===`scored`?`score below threshold`:e.skipReason??e.verdict:t.message===void 0?t.name:`${t.name}: ${t.message}`}function durationSeconds(e){let t=new Date(e.completedAt).getTime()-new Date(e.startedAt).getTime();return Math.max(0,t/1e3)}function formatSeconds(e){return e.toFixed(3)}function escapeXml(e){return e.replace(/&/g,`&`).replace(/"/g,`"`).replace(/'/g,`'`).replace(/</g,`<`).replace(/>/g,`>`)}export{JUnit};
|
|
1
|
+
import{dirname}from"node:path";import{mkdir,writeFile}from"node:fs/promises";function JUnit(e){return new JUnitReporter(e)}var JUnitReporter=class{#e;constructor(e){this.#e=e}onRunStart(){}onEvalComplete(){}async onRunComplete(r){let i=renderJUnit(r,{suiteName:this.#e.suiteName});await mkdir(dirname(this.#e.filePath),{recursive:!0}),await writeFile(this.#e.filePath,i)}};function renderJUnit(e,t){let n=e.failed+e.scored,r=e.results.map(renderTestCase);return[`<?xml version="1.0" encoding="UTF-8"?>`,`<testsuite name="${escapeXml(t.suiteName??`eve evals`)}" tests="${e.results.length}" failures="${n}" skipped="0" time="${formatSeconds(durationSeconds(e))}">`,...r,`</testsuite>`,``].join(`
|
|
2
|
+
`)}function renderTestCase(e){let t=`classname="eve.eval" name="${escapeXml(e.id)}" time="${formatSeconds(durationSeconds(e))}"`;if(e.verdict===`passed`)return` <testcase ${t}/>`;let n=failureMessage(e);return[` <testcase ${t}>`,` <failure message="${escapeXml(n)}">${escapeXml(JSON.stringify(buildFailureDetail(e),null,2))}</failure>`,` </testcase>`].join(`
|
|
3
|
+
`)}function buildFailureDetail(e){return{verdict:e.verdict,error:e.error,assertions:e.assertions,logs:e.result.logs}}function failureMessage(e){if(e.error!==void 0)return e.error;let t=e.assertions.find(e=>!e.passed);return t===void 0?e.verdict===`scored`?`score below threshold`:e.verdict:t.message===void 0?t.name:`${t.name}: ${t.message}`}function durationSeconds(e){let t=new Date(e.completedAt).getTime()-new Date(e.startedAt).getTime();return Math.max(0,t/1e3)}function formatSeconds(e){return e.toFixed(3)}function escapeXml(e){return e.replace(/&/g,`&`).replace(/"/g,`"`).replace(/'/g,`'`).replace(/</g,`<`).replace(/>/g,`>`)}export{JUnit};
|
|
@@ -15,7 +15,6 @@ export interface RunEvalsOptions {
|
|
|
15
15
|
readonly reporters: readonly EvalReporter[];
|
|
16
16
|
/** When false, eval-defined and config `reporters` are ignored (CLI `--skip-report`). */
|
|
17
17
|
readonly includeEvalReporters?: boolean;
|
|
18
|
-
readonly failOnSkip?: boolean;
|
|
19
18
|
/**
|
|
20
19
|
* Maximum number of evals executing at once. Must be a positive integer.
|
|
21
20
|
* Overrides the config `maxConcurrency`; defaults to 8 when neither is set.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{resolveArtifactDirectory,writeArtifacts}from"#evals/runner/artifacts.js";import{executeEval}from"#evals/runner/execute-eval.js";async function runEvals(r){let{config:i,target:a,client:o,appRoot:s}=r,c=r.maxConcurrency??i.maxConcurrency??8;if(!Number.isInteger(c)||c<1)throw Error(`Eval maxConcurrency must be a positive integer; got ${String(r.maxConcurrency??i.maxConcurrency)}.`);let l=r.evaluations.map(e=>applyConfigDefaults(e,i)),u=new Date().toISOString(),d=buildReporterBindings({...r,evaluations:l});for(let e of d)await e.reporter.onRunStart(l.filter(t=>e.evalIds.has(t.id)),a);let f=[],p=[...l],m=new Set,h=Promise.resolve();for(;p.length>0||m.size>0;){for(;p.length>0&&m.size<c;){let e=p.shift();if(e===void 0)break;let t=(async()=>{let t=await executeEval({client:o,evaluation:e,
|
|
1
|
+
import{resolveArtifactDirectory,writeArtifacts}from"#evals/runner/artifacts.js";import{executeEval}from"#evals/runner/execute-eval.js";async function runEvals(r){let{config:i,target:a,client:o,appRoot:s}=r,c=r.maxConcurrency??i.maxConcurrency??8;if(!Number.isInteger(c)||c<1)throw Error(`Eval maxConcurrency must be a positive integer; got ${String(r.maxConcurrency??i.maxConcurrency)}.`);let l=r.evaluations.map(e=>applyConfigDefaults(e,i)),u=new Date().toISOString(),d=buildReporterBindings({...r,evaluations:l});for(let e of d)await e.reporter.onRunStart(l.filter(t=>e.evalIds.has(t.id)),a);let f=[],p=[...l],m=new Set,h=Promise.resolve();for(;p.length>0||m.size>0;){for(;p.length>0&&m.size<c;){let e=p.shift();if(e===void 0)break;let t=(async()=>{let t=await executeEval({client:o,evaluation:e,onLog:r.onEvalLog===void 0?void 0:t=>r.onEvalLog?.(e.id,t),target:a,timeoutMs:r.timeoutMs});f.push(t),h=h.then(async()=>{for(let e of d)e.evalIds.has(t.id)&&await e.reporter.onEvalComplete(t)})})().finally(()=>{m.delete(t)});m.add(t)}m.size>0&&await Promise.race(m)}await h;let g=new Map(l.map((e,t)=>[e.id,t]));f.sort((e,t)=>(g.get(e.id)??0)-(g.get(t.id)??0));let _=buildSummary(a,f,u);await writeArtifacts(resolveArtifactDirectory(s),_);for(let e of d)await e.reporter.onRunComplete(scopeSummary(_,e.evalIds));return _}function buildReporterBindings(e){let t=new Set(e.evaluations.map(e=>e.id)),n=new Set(e.reporters);if(e.includeEvalReporters!==!1)for(let t of e.config.reporters??[])n.add(t);let r=[...n].map(e=>({reporter:e,evalIds:t}));if(e.includeEvalReporters===!1)return r;let i=new Map;for(let t of e.evaluations)for(let e of t.reporters??[]){if(n.has(e))continue;let r=i.get(e)??new Set;r.add(t.id),i.set(e,r)}for(let[e,t]of i)r.push({reporter:e,evalIds:t});return r}function applyConfigDefaults(e,t){return e.judge!==void 0||t.judge===void 0?e:{...e,judge:t.judge}}function buildSummary(e,t,n){return{target:e,results:t,startedAt:n,completedAt:new Date().toISOString(),passed:countVerdicts(t,`passed`),failed:countVerdicts(t,`failed`),scored:countVerdicts(t,`scored`),errored:t.filter(e=>e.error!==void 0).length}}function scopeSummary(e,t){if(e.results.every(e=>t.has(e.id)))return e;let n=e.results.filter(e=>t.has(e.id));return{...e,results:n,passed:countVerdicts(n,`passed`),failed:countVerdicts(n,`failed`),scored:countVerdicts(n,`scored`),errored:n.filter(e=>e.error!==void 0).length}}function countVerdicts(e,t){return e.filter(e=>e.verdict===t).length}export{runEvals};
|
|
@@ -1,10 +1,6 @@
|
|
|
1
1
|
import { Client } from "#client/client.js";
|
|
2
2
|
import { EvalSessionManager } from "#evals/session.js";
|
|
3
|
-
import type {
|
|
4
|
-
export declare class EveEvalRequirementError extends Error {
|
|
5
|
-
readonly requirement: EveEvalRequirement;
|
|
6
|
-
constructor(requirement: EveEvalRequirement, message: string);
|
|
7
|
-
}
|
|
3
|
+
import type { EveEvalTargetCapabilities, EveEvalTargetHandle } from "#evals/types.js";
|
|
8
4
|
export declare function resolveEvalTargetHandle(input: {
|
|
9
5
|
readonly client: Client;
|
|
10
6
|
readonly expectedAgentName?: string;
|
|
@@ -18,6 +14,5 @@ export declare function createEvalTargetHandle(input: {
|
|
|
18
14
|
readonly url: string;
|
|
19
15
|
}): EveEvalTargetHandle;
|
|
20
16
|
export declare function scopeEvalTargetHandle(target: EveEvalTargetHandle, input: {
|
|
21
|
-
readonly requirements: readonly EveEvalRequirement[];
|
|
22
17
|
readonly sessions?: EvalSessionManager;
|
|
23
18
|
}): EveEvalTargetHandle;
|
package/dist/src/evals/target.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
import{createEveDevDispatchSchedulePath}from"#protocol/routes.js";import{toErrorMessage}from"#shared/errors.js";import"#client/client.js";import{EvalSessionManager}from"#evals/session.js";import{setTimeout}from"node:timers/promises";
|
|
1
|
+
import{createEveDevDispatchSchedulePath}from"#protocol/routes.js";import{toErrorMessage}from"#shared/errors.js";import"#client/client.js";import{EvalSessionManager}from"#evals/session.js";import{setTimeout}from"node:timers/promises";async function resolveEvalTargetHandle(e){await waitForTargetHealth(e.client,e.url);let t=await e.client.info();if(assertAgentInfoShape(t,e.url),e.expectedAgentName!==void 0&&t.agent.name!==e.expectedAgentName)throw Error(`Expected eval target ${JSON.stringify(e.expectedAgentName)} at ${e.url}, but ${JSON.stringify(t.agent.name)} is responding there.`);return createEvalTargetHandle({capabilities:capabilitiesFromInfo(t),client:e.client,kind:e.kind,url:e.url})}function createEvalTargetHandle(e){return createHandle({capabilities:e.capabilities,client:e.client,kind:e.kind,sessions:void 0,url:e.url})}function scopeEvalTargetHandle(e,t){return createHandle({capabilities:e.capabilities,client:void 0,delegate:e,kind:e.kind,sessions:t.sessions,url:e.url})}function createHandle(t){let r=t.delegate,i=t.client,fetchTarget=async(e,t)=>{if(r!==void 0)return await r.fetch(e,t);if(i===void 0)throw Error(`Eval target cannot fetch without a client.`);return await i.fetch(e,t)};return{capabilities:t.capabilities,kind:t.kind,url:t.url,async attachSession(e,a){if(t.sessions!==void 0)return await t.sessions.attachSession(e,a);if(r!==void 0)return await r.attachSession(e,a);if(i===void 0)throw Error(`Eval target cannot attach sessions without a client.`);return await new EvalSessionManager({client:i}).attachSession(e,a)},async dispatchSchedule(n){if(!t.capabilities.devRoutes)throw Error(`target.dispatchSchedule() requires a target with dev routes enabled.`);let r=await fetchTarget(createEveDevDispatchSchedulePath(n),{method:`POST`});if(!r.ok){let e=await readResponseBodySafely(r);throw Error(`Schedule dispatch failed: ${r.status} ${r.statusText}`+(e.length>0?`, ${e}`:``))}return parseScheduleDispatchResult(await r.json())},async fetch(e,t){return await fetchTarget(e,t)}}}function capabilitiesFromInfo(e){return{devRoutes:e.capabilities?.devRoutes??e.mode===`development`}}async function waitForTargetHealth(e,n){let i=Date.now()+6e4,a;for(;Date.now()<i;)try{await e.health();return}catch(e){a=toErrorMessage(e),await setTimeout(250)}throw Error(`Timed out waiting for eval target health at ${n}.`+(a===void 0?``:` Last error: ${a}`))}function assertAgentInfoShape(e,t){if(e.kind!==`eve-agent-info`||e.version!==1)throw Error(`Eval target ${t} returned an unrecognized /eve/v1/info payload.`)}function parseScheduleDispatchResult(e){if(typeof e!=`object`||!e||!(`scheduleId`in e)||typeof e.scheduleId!=`string`||!(`sessionIds`in e)||!Array.isArray(e.sessionIds)||e.sessionIds.some(e=>typeof e!=`string`))throw Error(`Schedule dispatch returned an unexpected response shape: ${JSON.stringify(e)}`);return{scheduleId:e.scheduleId,sessionIds:[...e.sessionIds]}}async function readResponseBodySafely(e){try{return(await e.text()).trim()}catch{return``}}export{createEvalTargetHandle,resolveEvalTargetHandle,scopeEvalTargetHandle};
|
|
@@ -7,11 +7,6 @@ import type { JsonObject } from "#shared/json.js";
|
|
|
7
7
|
import type { AgentModelOptionsDefinition } from "#shared/agent-definition.js";
|
|
8
8
|
import type { EvalReporter } from "#evals/runner/reporters/types.js";
|
|
9
9
|
import type { EveEvalSubagentCallMatchOptions, EveEvalToolCallMatchOptions } from "#evals/match.js";
|
|
10
|
-
/**
|
|
11
|
-
* Assumptions an eval needs the runner to verify against the live target
|
|
12
|
-
* or eval process environment before executing it.
|
|
13
|
-
*/
|
|
14
|
-
export type EveEvalRequirement = "mockModels" | "devRoutes" | `env:${string}`;
|
|
15
10
|
/**
|
|
16
11
|
* One tool call extracted from the captured stream, pairing the
|
|
17
12
|
* `actions.requested` request with its matching `action.result`.
|
|
@@ -291,7 +286,6 @@ export interface EveEvalTarget {
|
|
|
291
286
|
}
|
|
292
287
|
export interface EveEvalTargetCapabilities {
|
|
293
288
|
readonly devRoutes: boolean;
|
|
294
|
-
readonly mockModels: boolean;
|
|
295
289
|
}
|
|
296
290
|
export interface EveEvalScheduleDispatchResult {
|
|
297
291
|
readonly scheduleId: string;
|
|
@@ -301,7 +295,7 @@ export interface EveEvalScheduleDispatchResult {
|
|
|
301
295
|
* Live target handle exposed to eval runs.
|
|
302
296
|
*/
|
|
303
297
|
export interface EveEvalTargetHandle extends EveEvalTarget {
|
|
304
|
-
/** Dispatch a dev-only authored schedule. Requires
|
|
298
|
+
/** Dispatch a dev-only authored schedule. Requires a target with dev routes enabled. */
|
|
305
299
|
dispatchSchedule(scheduleId: string): Promise<EveEvalScheduleDispatchResult>;
|
|
306
300
|
/** Authenticated fetch against the target base URL. */
|
|
307
301
|
fetch(path: string, init?: RequestInit): Promise<Response>;
|
|
@@ -318,11 +312,6 @@ export interface EveEvalTargetHandle extends EveEvalTarget {
|
|
|
318
312
|
*/
|
|
319
313
|
interface EveEvalBase {
|
|
320
314
|
readonly description?: string;
|
|
321
|
-
/**
|
|
322
|
-
* Target/process assumptions verified before execution. The eval is
|
|
323
|
-
* skipped when any requirement is unmet.
|
|
324
|
-
*/
|
|
325
|
-
readonly requires?: readonly EveEvalRequirement[];
|
|
326
315
|
/**
|
|
327
316
|
* Judge model for this eval's `t.judge.*` assertions. Optional: when
|
|
328
317
|
* omitted, judge assertions fall back to the `judge` declared in
|
|
@@ -379,9 +368,8 @@ export type EveEval = EveEvalDefinition & {
|
|
|
379
368
|
* - `"passed"` — no execution error, every gate held, every soft threshold met
|
|
380
369
|
* - `"failed"` — a gate assertion failed or execution errored (timeout, transport, thrown task)
|
|
381
370
|
* - `"scored"` — every gate held but a soft assertion fell below its threshold
|
|
382
|
-
* - `"skipped"` — the eval was not executed (unmet `requires` entries)
|
|
383
371
|
*/
|
|
384
|
-
export type EveEvalVerdict = "passed" | "failed" | "scored"
|
|
372
|
+
export type EveEvalVerdict = "passed" | "failed" | "scored";
|
|
385
373
|
/**
|
|
386
374
|
* Result of executing and asserting one eval.
|
|
387
375
|
*
|
|
@@ -396,8 +384,6 @@ export interface EveEvalResult {
|
|
|
396
384
|
/** Per-eval verdict; see {@link EveEvalVerdict}. */
|
|
397
385
|
readonly verdict: EveEvalVerdict;
|
|
398
386
|
readonly error?: string;
|
|
399
|
-
/** Why the eval was skipped, when `verdict` is `"skipped"`. */
|
|
400
|
-
readonly skipReason?: string;
|
|
401
387
|
readonly startedAt: string;
|
|
402
388
|
readonly completedAt: string;
|
|
403
389
|
}
|
|
@@ -415,8 +401,6 @@ export interface EveEvalRunSummary {
|
|
|
415
401
|
readonly failed: number;
|
|
416
402
|
/** Evals with verdict `"scored"` (below-threshold soft assertions only). */
|
|
417
403
|
readonly scored: number;
|
|
418
|
-
/** Evals with verdict `"skipped"`. */
|
|
419
|
-
readonly skipped: number;
|
|
420
404
|
/** The execution-error subset of `failed` (timeouts, connection failures, exceptions). */
|
|
421
405
|
readonly errored: number;
|
|
422
406
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{createLogger}from"#internal/logging.js";import{resolveInstalledPackageInfo}from"#internal/application/package.js";import{jsonSchema}from"ai";import{
|
|
1
|
+
import{createLogger}from"#internal/logging.js";import{resolveInstalledPackageInfo}from"#internal/application/package.js";import{jsonSchema}from"ai";import{createToolLoopHarness}from"#harness/tool-loop.js";import{resolveCodeModeEnabled}from"#shared/code-mode.js";import{resolveRuntimeModelReference}from"#runtime/agent/resolve-model.js";import{findRegisteredRuntimeTool}from"#runtime/tools/registry.js";import{SUBAGENT_TOOL_INPUT_SCHEMA}from"#runtime/subagents/registry.js";import{preserveFrameworkStateOnCompaction}from"#execution/compaction.js";import{buildUnauthorizedToolContext,createAuthorizedToolExecute}from"#execution/tool-auth.js";const log=createLogger(`execution.node-step`);function createExecutionNodeStep(e){let t=createRuntimeModelResolver(e.compiledArtifactsSource),n=createNodeHarnessTools({node:e.node});return createToolLoopHarness({capabilities:e.capabilities,codeMode:resolveCodeModeEnabled(e.node.agent.config?.experimental?.codeMode),workflow:e.node.agent.workflowEnabled===!0,handleEvent:e.handleEvent,mode:e.mode,onCompaction:preserveFrameworkStateOnCompaction,resolveModel:t,runtimeIdentity:buildRuntimeIdentity(e.node),tools:n})}function buildRuntimeIdentity(e){let n=resolveInstalledPackageInfo(),r={agentId:e.turnAgent.id,agentName:e.agent.config?.name,eveVersion:n.version,modelId:e.turnAgent.model.id},i=process.env.VERCEL_GIT_COMMIT_SHA?.trim(),a=process.env.VERCEL_GIT_COMMIT_REF?.trim(),o=process.env.VERCEL_DEPLOYMENT_CREATED_AT?.trim();return i||a||o?{...r,build:{deployedAt:o||void 0,gitBranch:a||void 0,gitSha:i||void 0}}:r}function createRuntimeModelResolver(e){return t=>resolveRuntimeModelReference(t,{compiledArtifactsSource:e})}function createNodeHarnessTools(e){let t=new Map;for(let n of e.node.turnAgent.tools){let r=resolveHarnessToolDefinition({node:e.node,tool:n});r!==null&&t.set(n.name,r)}return t.has(`agent`)||t.set(`agent`,{description:`Launch a new agent to handle a complex, multi-step subtask.`,inputSchema:jsonSchema(SUBAGENT_TOOL_INPUT_SCHEMA),name:`agent`,runtimeAction:{kind:`subagent-call`,nodeId:e.node.nodeId,subagentName:`agent`}}),t}function resolveHarnessToolDefinition(e){if(e.tool.kind===`subagent`)return{description:e.tool.description??``,inputSchema:jsonSchema(e.tool.inputSchema??{}),name:e.tool.name,outputSchema:e.tool.outputSchema===void 0?void 0:jsonSchema(e.tool.outputSchema),runtimeAction:{kind:`subagent-call`,nodeId:e.tool.nodeId,subagentName:e.tool.name}};if(e.tool.kind===`remote`)return{description:e.tool.description??``,inputSchema:jsonSchema(e.tool.inputSchema??{}),name:e.tool.name,outputSchema:e.tool.outputSchema===void 0?void 0:jsonSchema(e.tool.outputSchema),runtimeAction:{kind:`remote-agent-call`,nodeId:e.tool.nodeId,remoteAgentName:e.tool.name,subagentName:e.tool.name}};let t=findRegisteredRuntimeTool(e.node.toolRegistry,e.tool.name);if(t===null)return log.warn(`declared tool is not registered — omitting from toolset`,{toolName:e.tool.name,nodeId:e.node.nodeId}),null;let r=t.definition,i=r.sourceId.startsWith(`eve:`),a=r.execute;return{approvalKey:r.approvalKey,description:r.description,execute:resolveAuthoredExecute({auth:r.auth,isFrameworkTool:i,rawExecute:a,scope:r.name}),inputSchema:r.inputStandardSchema??jsonSchema(r.inputSchema??{}),name:r.name,needsApproval:r.needsApproval,outputSchema:r.outputStandardSchema??maybeJsonSchema(r.outputSchema),toModelOutput:r.toModelOutput}}function resolveAuthoredExecute(e){let{auth:t,isFrameworkTool:n,rawExecute:r,scope:i}=e;if(r===void 0)return;if(n)return r;let a=r;return t===void 0?e=>a(e,buildUnauthorizedToolContext(i)):createAuthorizedToolExecute({auth:t,execute:a,scope:i})}function maybeJsonSchema(e){return e===void 0?void 0:jsonSchema(e)}export{createExecutionNodeStep,createNodeHarnessTools};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{createRequire}from"node:module";import{basename,dirname,join}from"node:path";import{existsSync,readFileSync,realpathSync}from"node:fs";import{EVE_PACKAGE_NAME}from"#internal/package-name.js";import{fileURLToPath}from"node:url";let cachedPackageInfo;const BUNDLED_FALLBACK_PACKAGE_VERSION=`0.7.
|
|
1
|
+
import{createRequire}from"node:module";import{basename,dirname,join}from"node:path";import{existsSync,readFileSync,realpathSync}from"node:fs";import{EVE_PACKAGE_NAME}from"#internal/package-name.js";import{fileURLToPath}from"node:url";let cachedPackageInfo;const BUNDLED_FALLBACK_PACKAGE_VERSION=`0.7.3`,WORKFLOW_MODULE_ALIASES={"workflow/api":`src/compiled/@workflow/core/runtime.js`,"workflow/errors":`src/compiled/@workflow/errors/index.js`,"workflow/internal/private":`src/compiled/@workflow/core/private.js`,"workflow/runtime":`src/compiled/@workflow/core/runtime.js`};function resolveFallbackPackageVersion(){return BUNDLED_FALLBACK_PACKAGE_VERSION.startsWith(`__`)?`0.0.0`:BUNDLED_FALLBACK_PACKAGE_VERSION}const FALLBACK_PACKAGE_INFO={name:EVE_PACKAGE_NAME,version:resolveFallbackPackageVersion()};function resolveCurrentModulePath(){return typeof __filename==`string`?__filename:resolveCurrentModulePathFromStack()}function resolveCurrentModulePathFromStack(){let e=Error.prepareStackTrace;try{Error.prepareStackTrace=(e,t)=>t;let e=Error().stack?.[0]?.getFileName();if(typeof e!=`string`||e.length===0)throw Error(`Failed to resolve the current module path from the stack trace.`);return e.startsWith(`file:`)?fileURLToPath(e):e}finally{Error.prepareStackTrace=e}}const require=createRequire(resolveCurrentModulePath());function isBuildOutputPackageRoot(e){return basename(e)===`dist`&&existsSync(join(dirname(e),`package.json`))}function resolvePackageBuildRoot(){let e=dirname(realpathSync(resolveCurrentModulePath()));for(;;){if(isBuildOutputPackageRoot(e))return e;let t=dirname(e);if(t===e)return null;e=t}}function findNearestPackageRoot(e){let t=e;for(;;){if(existsSync(join(t,`package.json`))&&!isBuildOutputPackageRoot(t))return t;let r=dirname(t);if(r===t)throw Error(`Failed to resolve package root from "${e}".`);t=r}}function resolvePackageRoot(){return findNearestPackageRoot(dirname(realpathSync(resolveCurrentModulePath())))}function tryResolvePackageRoot(){try{return resolvePackageRoot()}catch{return}}function rewriteSourceFilePathForBuild(e){return e.replace(/\.[cm]?tsx?$/,`.js`)}function resolvePackageSourceFilePath(e){let t=resolvePackageBuildRoot();return t===null?join(resolvePackageRoot(),e):join(t,rewriteSourceFilePathForBuild(e))}function resolvePackageSourceDirectoryPath(e){let t=resolvePackageBuildRoot();return join(t===null?resolvePackageRoot():t,e)}function resolvePackageDependencyPath(e){return require.resolve(e)}function resolvePackageCompiledFilePath(e){let t=resolvePackageBuildRoot();return t===null?join(resolvePackageRoot(),`.generated`,`compiled`,e.replace(/^src\/compiled\//,``)):join(t,e)}function normalizeInstalledPackageInfo(e){let t=e;if(!(typeof t.name!=`string`||typeof t.version!=`string`))return{name:t.name,version:t.version}}function tryReadInstalledPackageInfo(e,t){let n=normalizeInstalledPackageInfo(JSON.parse(readFileSync(e,`utf8`)));if(n?.name===t)return n}function resolveInstalledPackageInfo(){if(cachedPackageInfo)return cachedPackageInfo;let e=tryResolvePackageRoot(),t=e===void 0?void 0:tryReadInstalledPackageInfo(join(e,`package.json`),EVE_PACKAGE_NAME);if(t)return cachedPackageInfo=t,cachedPackageInfo;try{let e=tryReadInstalledPackageInfo(require.resolve(`${EVE_PACKAGE_NAME}/package.json`),EVE_PACKAGE_NAME);if(e)return cachedPackageInfo=e,cachedPackageInfo}catch{}return cachedPackageInfo={...FALLBACK_PACKAGE_INFO},cachedPackageInfo}function resolveWorkflowModulePath(e){if(e===`workflow`)return resolvePackageSourceFilePath(`src/internal/workflow/index.ts`);if(e===`workflow/internal/builtins`)return resolvePackageSourceFilePath(`src/internal/workflow/builtins.ts`);let t=WORKFLOW_MODULE_ALIASES[e];return t===void 0?require.resolve(e):resolvePackageCompiledFilePath(t)}export{resolveInstalledPackageInfo,resolvePackageDependencyPath,resolvePackageRoot,resolvePackageSourceDirectoryPath,resolvePackageSourceFilePath,resolveWorkflowModulePath};
|
package/dist/src/internal/nitro/routes/agent-info/build-agent-info-response-from-manifest.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
import{
|
|
1
|
+
import{CODE_MODE_TOOL_NAME,WORKFLOW_TOOL_NAME}from"#shared/code-mode.js";import{LOAD_SKILL_TOOL_NAME}from"#runtime/skills/fragment-context.js";import{getAllFrameworkChannelNames,getFrameworkChannelDefinitions}from"#runtime/framework-channels/index.js";import{getAllFrameworkToolNames,getFrameworkToolDefinitions}from"#runtime/framework-tools/index.js";import{createConnectionSearchResolver}from"#runtime/framework-tools/connection-search-dynamic.js";import{renderChannel,renderDynamicResolver,renderSchedule,renderSubagent,renderTool,toSource}from"#internal/nitro/routes/agent-info/build-agent-info-response.js";function buildAgentInfoResponseFromManifest(i,a){let o=i.manifest,s=o.channels.filter(e=>e.kind===`channel`),c=o.channels.filter(e=>e.kind===`disabled`).map(e=>e.name),l=new Set(o.tools.map(e=>e.name)),u=new Set(o.disabledFrameworkTools),d=getAllFrameworkToolNames(),f=getAllFrameworkChannelNames(),p=getFrameworkToolDefinitions({hasConnections:o.connections.length>0}),m=getFrameworkChannelDefinitions(),h=p.filter(e=>!l.has(e.name)&&!u.has(e.name)),g=o.tools.map(e=>({...toSource(e),description:e.description,hasAuth:!1,hasCompactionHook:!1,hasExecute:!0,hasModelOutputProjection:!1,hasOutputSchema:e.outputSchema!==void 0&&e.outputSchema!==null,inputSchema:e.inputSchema,name:e.name,origin:`authored`,outputSchema:e.outputSchema??null,replacesFrameworkTool:d.has(e.name),requiresApproval:!1})),_=new Set(s.map(e=>e.name)),v=new Set(c),y=m.filter(e=>!_.has(e.name)&&!v.has(e.name)),b=s.map(e=>({...toSource(e),adapterKind:e.adapterKind,method:e.method,name:e.name,origin:`authored`,urlPath:e.urlPath}));return{agent:{agentRoot:o.agentRoot,appRoot:o.appRoot,configSource:o.config.source?toSource(o.config.source):void 0,description:o.config.description,model:{contextWindowTokens:o.config.model.contextWindowTokens,id:o.config.model.id,providerOptions:o.config.model.providerOptions,source:o.config.model.source?toSource(o.config.model.source):void 0},name:o.config.name,outputSchema:o.config.outputSchema},capabilities:{devRoutes:a.mode===`development`},channels:{authored:b,available:[...y.map(e=>renderChannel(e,{origin:`framework`})),...b],disabledFramework:c,framework:m.filter(e=>f.has(e.name)).map(e=>{let t=_.has(e.name),n=v.has(e.name),r=n?`disabled`:t?`replaced`:`active`;return{...renderChannel(e,{origin:`framework`}),disabledByAuthor:n,replacedByAuthoredChannel:t,status:r}})},connections:o.connections.map(e=>({...toSource(e),connectionName:e.connectionName,description:e.description,hasApproval:!1,hasAuthorization:e.vercelConnect!==void 0,hasHeaders:!1,protocol:e.protocol,url:e.url})),diagnostics:{discoveryErrors:o.diagnosticsSummary.errors,discoveryWarnings:o.diagnosticsSummary.warnings},hooks:o.hooks.map(e=>({...toSource(e),eventNames:[],slug:e.slug})),instructions:{dynamic:o.dynamicInstructions.map(e=>renderDynamicResolver(e,{origin:`authored`})),static:o.instructions===void 0?null:{...toSource(o.instructions),markdown:o.instructions.markdown,name:o.instructions.name}},kind:`eve-agent-info`,mode:a.mode,sandbox:o.sandbox===null?null:{...toSource(o.sandbox),description:o.sandbox.description,hasBootstrap:!1,hasOnSession:!1,revalidationKey:o.sandbox.revalidationKey,sourceHash:o.sandbox.sourceHash},schedules:i.schedules.map(renderSchedule),skills:{static:o.skills.map(e=>({...toSource(e),description:e.description,license:e.license,markdown:e.markdown,metadata:e.metadata,name:e.name})),dynamic:o.dynamicSkills.map(e=>renderDynamicResolver(e,{origin:`authored`}))},subagents:{local:o.subagents.map(renderSubagent),total:o.subagents.length},tools:{available:[...h.map(e=>renderTool(e,{origin:`framework`,replacesFrameworkTool:!1})),...g],authored:g,disabledFramework:[...o.disabledFrameworkTools],dynamic:[...o.connections.length>0?[renderDynamicResolver(createConnectionSearchResolver(),{origin:`framework`})]:[],...o.dynamicTools.map(e=>renderDynamicResolver(e,{origin:`authored`}))],framework:p.map(e=>{let t=l.has(e.name),n=u.has(e.name),r=n?`disabled`:t?`replaced`:`active`;return{...renderTool(e,{origin:`framework`,replacesFrameworkTool:!1}),disabledByAuthor:n,replacedByAuthoredTool:t,status:r}}),reserved:[CODE_MODE_TOOL_NAME,WORKFLOW_TOOL_NAME,LOAD_SKILL_TOOL_NAME]},version:1,workflow:{enabled:o.workflowEnabled,toolName:WORKFLOW_TOOL_NAME},workspace:{resourceRoot:o.workspaceResourceRoot,rootEntries:[...o.workspaceResourceRoot.rootEntries]}}}export{buildAgentInfoResponseFromManifest};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{
|
|
1
|
+
import{CODE_MODE_TOOL_NAME,WORKFLOW_TOOL_NAME}from"#shared/code-mode.js";import{LOAD_SKILL_TOOL_NAME}from"#runtime/skills/fragment-context.js";import{getAllFrameworkChannelNames,getFrameworkChannelDefinitions}from"#runtime/framework-channels/index.js";import{getAllFrameworkToolNames,getFrameworkToolDefinitions}from"#runtime/framework-tools/index.js";import{createConnectionSearchResolver}from"#runtime/framework-tools/connection-search-dynamic.js";function buildAgentInfoResponse(e,n){let r=e.agent,i=buildToolInfo(r);return{agent:{agentRoot:r.metadata.agentRoot,appRoot:r.metadata.appRoot,configSource:r.config.source?toSource(r.config.source):void 0,description:r.config.description,model:{contextWindowTokens:r.config.model.contextWindowTokens,id:r.config.model.id,providerOptions:r.config.model.providerOptions,source:r.config.model.source?toSource(r.config.model.source):void 0},name:r.config.name,outputSchema:r.config.outputSchema},capabilities:{devRoutes:n.mode===`development`},channels:buildChannelInfo(r),connections:r.connections.map(e=>({...toSource(e),connectionName:e.connectionName,description:e.description,hasApproval:e.approval!==void 0,hasAuthorization:e.authorization!==void 0,hasHeaders:e.headers!==void 0,protocol:e.protocol,toolFilter:e.tools,url:e.url})),diagnostics:{discoveryErrors:r.metadata.diagnosticsSummary.errors,discoveryWarnings:r.metadata.diagnosticsSummary.warnings},hooks:r.hooks.map(e=>({...toSource(e),eventNames:Object.keys(e.events).sort(),slug:e.slug})),instructions:{dynamic:r.dynamicInstructionsResolvers.map(e=>renderDynamicResolver(e,{origin:`authored`})),static:r.instructions?{...toSource(r.instructions),markdown:r.instructions.markdown,name:r.instructions.name}:null},kind:`eve-agent-info`,mode:n.mode,sandbox:renderSandbox(r.sandbox),schedules:e.schedules.map(renderSchedule),skills:{static:r.skills.map(renderSkill),dynamic:r.dynamicSkillResolvers.map(e=>renderDynamicResolver(e,{origin:`authored`}))},subagents:{local:e.manifest.subagents.map(renderSubagent),total:e.manifest.subagents.length},tools:i,version:1,workflow:{enabled:r.workflowEnabled,toolName:WORKFLOW_TOOL_NAME},workspace:{resourceRoot:r.workspaceResourceRoot,rootEntries:[...r.workspaceSpec.rootEntries]}}}function buildChannelInfo(e){let t=new Set(e.channels.map(e=>e.name)),n=new Set(e.disabledFrameworkChannels),a=getAllFrameworkChannelNames(),o=getFrameworkChannelDefinitions(),s=o.filter(e=>!t.has(e.name)&&!n.has(e.name)),c=e.channels.map(e=>renderChannel(e,{origin:`authored`})),l=o.map(e=>{let r=t.has(e.name),i=n.has(e.name),a=i?`disabled`:r?`replaced`:`active`;return{...renderChannel(e,{origin:`framework`}),disabledByAuthor:i,replacedByAuthoredChannel:r,status:a}});return{authored:c,available:[...s.map(e=>renderChannel(e,{origin:`framework`})),...c],disabledFramework:[...e.disabledFrameworkChannels],framework:l.filter(e=>a.has(e.name))}}function buildToolInfo(r){let i=new Set(r.tools.map(e=>e.name)),c=new Set(r.disabledFrameworkTools),l=getAllFrameworkToolNames(),u=getFrameworkToolDefinitions({hasConnections:r.connections.length>0}),d=r.connections.length>0?[createConnectionSearchResolver()]:[],f=u.filter(e=>!i.has(e.name)&&!c.has(e.name)),p=r.tools.map(e=>renderTool(e,{origin:`authored`,replacesFrameworkTool:l.has(e.name)})),m=u.map(e=>{let t=i.has(e.name),n=c.has(e.name),r=n?`disabled`:t?`replaced`:`active`;return{...renderTool(e,{origin:`framework`,replacesFrameworkTool:!1}),disabledByAuthor:n,replacedByAuthoredTool:t,status:r}});return{available:[...f.map(e=>renderTool(e,{origin:`framework`,replacesFrameworkTool:!1})),...p],authored:p,disabledFramework:[...r.disabledFrameworkTools],dynamic:[...d.map(e=>renderDynamicResolver(e,{origin:`framework`})),...r.dynamicToolResolvers.map(e=>renderDynamicResolver(e,{origin:`authored`}))],framework:m,reserved:[CODE_MODE_TOOL_NAME,WORKFLOW_TOOL_NAME,LOAD_SKILL_TOOL_NAME]}}function renderChannel(e,t){return{...toSource(e),adapterKind:e.adapter?.kind,method:e.method,name:e.name,origin:t.origin,urlPath:e.urlPath}}function renderTool(e,t){return{...toSource(e),description:e.description,hasAuth:e.auth!==void 0,hasExecute:e.execute!==void 0,hasModelOutputProjection:e.toModelOutput!==void 0,hasOutputSchema:e.outputSchema!==void 0&&e.outputSchema!==null,inputSchema:e.inputSchema,name:e.name,origin:t.origin,outputSchema:e.outputSchema,replacesFrameworkTool:t.replacesFrameworkTool,requiresApproval:e.needsApproval!==void 0}}function renderSkill(e){return{...toSource(e),description:e.description,license:e.license,markdown:e.markdown,metadata:e.metadata,name:e.name}}function renderSchedule(e){return{...toSource(e),cron:e.cron,hasRun:e.hasRun,markdown:e.markdown,name:e.name}}function renderSandbox(e){return e===null?null:{...toSource(e),backendKind:resolveBackendKind(e.backend),description:e.description,hasBootstrap:e.bootstrap!==void 0,hasOnSession:e.onSession!==void 0,revalidationKey:e.revalidationKey,sourceHash:e.sourceHash}}function renderSubagent(e){return{...toSource(e),description:e.description,entryPath:e.entryPath,name:e.name,nodeId:e.nodeId,rootPath:e.rootPath,summary:{channels:e.agent.channels.length,connections:e.agent.connections.length,hooks:e.agent.hooks.length,instructions:e.agent.instructions!==void 0,schedules:e.agent.schedules.length,skills:e.agent.skills.length,tools:e.agent.tools.length}}}function renderDynamicResolver(e,t){return{...toSource(e),eventNames:[...e.eventNames],origin:t.origin,slug:e.slug}}function toSource(e){return{exportName:e.exportName,logicalPath:e.logicalPath,sourceId:e.sourceId,sourceKind:e.sourceKind}}function resolveBackendKind(e){if(typeof e!=`object`||!e)return;let t=e.kind;return typeof t==`string`?t:void 0}export{buildAgentInfoResponse,renderChannel,renderDynamicResolver,renderSchedule,renderSubagent,renderTool,toSource};
|
|
@@ -1,13 +1,10 @@
|
|
|
1
1
|
import type { LanguageModel } from "ai";
|
|
2
2
|
import { type RuntimeModelReference } from "#runtime/agent/bootstrap.js";
|
|
3
|
-
/**
|
|
4
|
-
* Environment variable that forces authored runtime models onto the dedicated
|
|
5
|
-
* deterministic mock adapter.
|
|
6
|
-
*/
|
|
7
|
-
export declare const EVE_MOCK_AUTHORED_MODELS_ENV = "EVE_MOCK_AUTHORED_MODELS";
|
|
8
3
|
/**
|
|
9
4
|
* Returns true when authored runtime models should resolve through the
|
|
10
|
-
* dedicated deterministic mock adapter.
|
|
5
|
+
* dedicated deterministic mock adapter. The adapter is internal to the test
|
|
6
|
+
* tiers: it activates only under `NODE_ENV=test`, keeping the unit,
|
|
7
|
+
* integration, and scenario suites deterministic and credential-free.
|
|
11
8
|
*/
|
|
12
9
|
export declare function shouldMockAuthoredRuntimeModels(): boolean;
|
|
13
10
|
/**
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import{z}from"#compiled/zod/index.js";import{CODE_MODE_TOOL_NAME}from"#shared/code-mode.js";import{LOAD_SKILL_TOOL_NAME}from"#runtime/skills/fragment-context.js";import{FINAL_OUTPUT_TOOL_NAME}from"#runtime/framework-tools/final-output.js";import{MockLanguageModelV3}from"ai/test";import{BOOTSTRAP_RUNTIME_MODEL_ID,BOOTSTRAP_RUNTIME_SYSTEM_PROMPT}from"#runtime/agent/bootstrap.js";import{createBootstrapGenerateResult,createBootstrapStreamResult,estimateTokenCount,getLastUserPromptText,getPromptContentText,getPromptText}from"#runtime/agent/bootstrap-model-utils.js";import{createMockAuthoredToolInput,formatToolOutput,resolveMockFixtureToken,resolveWeatherCity}from"#runtime/agent/mock-model-fixtures.js";import{findRelevantSkill,getActivatedSkillIds,getAvailableSkills}from"#runtime/agent/mock-model-skill-selection.js";import{createJsonSchemaSample}from"#runtime/agent/mock-structured-output.js";const authoredRuntimeModelMocks=new Map,bootstrapWeatherPayloadSchema=z.object({city:z.string(),condition:z.string(),summary:z.string(),temperatureF:z.number().finite()}).strict()
|
|
1
|
+
import{z}from"#compiled/zod/index.js";import{CODE_MODE_TOOL_NAME}from"#shared/code-mode.js";import{LOAD_SKILL_TOOL_NAME}from"#runtime/skills/fragment-context.js";import{FINAL_OUTPUT_TOOL_NAME}from"#runtime/framework-tools/final-output.js";import{MockLanguageModelV3}from"ai/test";import{BOOTSTRAP_RUNTIME_MODEL_ID,BOOTSTRAP_RUNTIME_SYSTEM_PROMPT}from"#runtime/agent/bootstrap.js";import{createBootstrapGenerateResult,createBootstrapStreamResult,estimateTokenCount,getLastUserPromptText,getPromptContentText,getPromptText}from"#runtime/agent/bootstrap-model-utils.js";import{createMockAuthoredToolInput,formatToolOutput,resolveMockFixtureToken,resolveWeatherCity}from"#runtime/agent/mock-model-fixtures.js";import{findRelevantSkill,getActivatedSkillIds,getAvailableSkills}from"#runtime/agent/mock-model-skill-selection.js";import{createJsonSchemaSample}from"#runtime/agent/mock-structured-output.js";const authoredRuntimeModelMocks=new Map,bootstrapWeatherPayloadSchema=z.object({city:z.string(),condition:z.string(),summary:z.string(),temperatureF:z.number().finite()}).strict();function shouldMockAuthoredRuntimeModels(){return process.env.NODE_ENV===`test`}function createMockAuthoredRuntimeModel(e){let t=authoredRuntimeModelMocks.get(e.id);if(t!==void 0)return t;let n=new MockLanguageModelV3({modelId:e.id,provider:`eve-runtime-mock`,doGenerate:async t=>createMockModelResult(t,e.id),doStream:async t=>createBootstrapStreamResult(createMockModelResult(t,e.id))});return authoredRuntimeModelMocks.set(e.id,n),n}function createMockModelResult(e,t){let n=getLastAuthoredToolResult(e.prompt);if(n!==null){let r=createFollowUpToolCallResult({modelId:t,options:e,result:n});if(r!==null)return r}else{let n=createSkillLoadResult(e.prompt,t)??createAuthoredToolCallResult(e,t);if(n!==null)return n}let r=createFinalOutputResult(e,t);if(r!==null)return r;let i=n===null?createAssistantMessage(e.prompt):formatToolResultReply(n,e.prompt);return createBootstrapGenerateResult({inputTokens:estimateTokenCount(getPromptText(e.prompt)),modelId:t,outputTokens:estimateTokenCount(i),text:i})}function createFinalOutputResult(e,t){let n=getAvailableTools(e).find(e=>e.name===FINAL_OUTPUT_TOOL_NAME);if(n===void 0)return null;let i=createJsonSchemaSample(n.inputSchema);return createToolCallGenerateResult({input:i,inputTokens:estimateTokenCount(getPromptText(e.prompt)),modelId:t,outputTokens:estimateTokenCount(JSON.stringify(i)),toolCallId:createToolCallId(FINAL_OUTPUT_TOOL_NAME),toolName:FINAL_OUTPUT_TOOL_NAME})}function resolveMockAuthoredRuntimeModel(e){return!shouldMockAuthoredRuntimeModels()||e.id===BOOTSTRAP_RUNTIME_MODEL_ID?null:createMockAuthoredRuntimeModel(e)}function createSkillLoadResult(e,t){let r=getLastUserPromptText(e);if(r===null||getActivatedSkillIds(e).length>0)return null;let i=findRelevantSkill(getAvailableSkills(e),r);return i===null?null:createToolCallGenerateResult({input:{skill:i.name},inputTokens:estimateTokenCount(getPromptText(e)),modelId:t,outputTokens:estimateTokenCount(i.name),toolCallId:`call_load_skill`,toolName:LOAD_SKILL_TOOL_NAME})}function createAuthoredToolCallResult(e,n){let r=getLastUserPromptText(e.prompt);if(r===null)return null;let i=findRelevantTool(getAvailableTools(e),r);if(i===null)return null;let a=resolveWeatherCity(r),o=createMockAuthoredToolInput(i,r,a);if(i.name===CODE_MODE_TOOL_NAME){let t=findRelevantCodeModeHostTool(i.description,r);if(t===null)return null;let o=`return await tools${formatCodeModeToolAccess(t)}({ city: ${JSON.stringify(a)} });`;return createToolCallGenerateResult({input:{js:o},inputTokens:estimateTokenCount(getPromptText(e.prompt)),modelId:n,outputTokens:estimateTokenCount(o),toolCallId:createToolCallId(i.name),toolName:i.name})}return createToolCallGenerateResult({input:o,inputTokens:estimateTokenCount(getPromptText(e.prompt)),modelId:n,outputTokens:estimateTokenCount(Object.values(o).join(` `)),toolCallId:createToolCallId(i.name),toolName:i.name})}function createFollowUpToolCallResult(e){let t=findNextExplicitToolAfterResult({previousToolName:e.result.toolName,prompt:e.options.prompt,tools:getAvailableTools(e.options)});if(t===null)return null;let n=createFollowUpToolInput(e.result.output);return n===null?null:createToolCallGenerateResult({input:n,inputTokens:estimateTokenCount(getPromptText(e.options.prompt)),modelId:e.modelId,outputTokens:estimateTokenCount(Object.values(n).join(` `)),toolCallId:createToolCallId(t.name),toolName:t.name})}function createAssistantMessage(e){let t=getLastUserPromptText(e)??`Hello from Eve`,n=getSystemPromptLabels(e),r=resolveSystemProbe(e),i=resolveMockFixtureToken(e);return i===null?n.length>0?r===null?`Bootstrap reply [${n.join(`, `)}]: ${t}`:`Bootstrap reply [${n.join(`, `)}; probe=${r}]: ${t}`:r===null?`Bootstrap reply: ${t}`:`Bootstrap reply [probe=${r}]: ${t}`:i}function formatToolResultReply(e,t){if(e.isError)return`Local weather tool failed: ${formatToolOutput(e.output)}`;if(isWeatherPayload(e.output))return`Used local weather tool for ${e.output.city}: ${e.output.condition}, ${e.output.temperatureF}F. ${e.output.summary}`;let n=getLastUserPromptText(t)??`Hello from Eve`;return`Used ${e.toolName} for "${n}": ${formatToolOutput(e.output)}`}function createToolCallGenerateResult(e){return{content:[{input:JSON.stringify(e.input),toolCallId:e.toolCallId,toolName:e.toolName,type:`tool-call`}],finishReason:{raw:void 0,unified:`tool-calls`},response:{id:`bootstrap-response`,modelId:e.modelId,timestamp:new Date(`2026-03-16T00:00:00.000Z`)},usage:{inputTokens:{cacheRead:0,cacheWrite:0,noCache:e.inputTokens,total:e.inputTokens},outputTokens:{reasoning:0,text:e.outputTokens,total:e.outputTokens}},warnings:[]}}function getAvailableTools(e){return(e.tools??[]).flatMap(e=>e.type===`function`?[{description:e.description,inputSchema:`inputSchema`in e?e.inputSchema:void 0,name:e.name,outputSchema:`outputSchema`in e?e.outputSchema:void 0}]:[])}function getLastAuthoredToolResult(e){for(let t of[...e].reverse()){if(t.role===`user`)return null;if(!(t.role!==`tool`&&t.role!==`assistant`)){for(let e of[...t.content].reverse())if(!(typeof e==`string`||e.type!==`tool-result`)&&e.toolName!==LOAD_SKILL_TOOL_NAME)return{isError:e.output.type===`error-json`||e.output.type===`error-text`||e.output.type===`execution-denied`,output:e.output.type===`execution-denied`?{reason:e.output.reason??null,type:e.output.type}:e.output.value,toolCallId:e.toolCallId,toolName:e.toolName}}}return null}function findNextExplicitToolAfterResult(e){let t=getLastUserPromptText(e.prompt);if(t===null)return null;let n=normalizeText(t),r=n.indexOf(normalizeText(e.previousToolName));return r<0?null:e.tools.filter(t=>t.name!==e.previousToolName).flatMap(e=>{let t=n.indexOf(normalizeText(e.name),r+1);return t<0?[]:[{index:t,tool:e}]}).sort((e,t)=>e.index-t.index)[0]?.tool??null}function createFollowUpToolInput(e){return isRecord(e)&&typeof e.stepKey==`string`?{stepKey:e.stepKey}:null}function getSystemPromptLabels(e){let t=e.filter(e=>e.role===`system`);if(t.length===0)return[];let n=t.flatMap(e=>{let t=getPromptContentText(e.content);if(t.startsWith(`Available skills
|
|
2
2
|
`))return[];let n=t.split(`
|
|
3
3
|
`).map(e=>e.trim()).filter(e=>e.length>0),r=[];for(let e of n){if(e===BOOTSTRAP_RUNTIME_SYSTEM_PROMPT||e===`Available skills`)continue;let t=/^System \((.+)\)$/.exec(e);if(t?.[1]){r.push(t[1]);continue}let n=/^Skill \((.+)\)$/.exec(e);n?.[1]&&r.push(n[1])}if(r.length>0)return r;let i=n.find(e=>e!==BOOTSTRAP_RUNTIME_SYSTEM_PROMPT&&e!==`Available skills`);return i===void 0?[]:[i]});return[...new Set(n)]}function findRelevantTool(e,t){let r=normalizeText(t),i=e.find(e=>e.name!==`agent`&&e.name!==LOAD_SKILL_TOOL_NAME&&r.includes(normalizeText(e.name)));return i===void 0?/\b(forecast|temperature|weather|wind|rain|snow)\b/u.test(r)?e.find(e=>/\b(forecast|temperature|weather|wind|rain|snow)\b/u.test(normalizeText(`${e.name} ${e.description??``}`)))??null:null:i}function findRelevantCodeModeHostTool(e,t){return e===void 0?null:findRelevantTool(parseCodeModeHostTools(e),t)?.name??null}function parseCodeModeHostTools(e){let t=[],n;for(let r of e.split(`
|
|
4
4
|
`)){let e=/^\s*\/\*\*\s*(.*?)\s*\*\/\s*$/u.exec(r);if(e?.[1]!==void 0){n=e[1];continue}let i=/^\s*(?:([$A-Z_a-z][$\w]*)|(["'])(.*?)\2)\s*:\s*\(input:/u.exec(r),a=i?.[1]??i?.[3];a!==void 0&&(t.push({description:n,name:a}),n=void 0)}return t}function formatCodeModeToolAccess(e){return/^[$A-Z_a-z][$\w]*$/u.test(e)?`.${e}`:`[${JSON.stringify(e)}]`}function normalizeText(e){return e.toLowerCase().replace(/[^a-z0-9]+/gu,` `).trim()}function createToolCallId(e){return`call_${e.toLowerCase().replace(/[^a-z0-9]+/gu,`_`).replace(/^_+|_+$/gu,``)||`tool`}`}function resolveSystemProbe(e){let t=e.filter(e=>e.role===`system`).map(e=>getPromptContentText(e.content)).join(`
|
|
5
|
-
`);return/hmr-probe:\s*([^\n]+)/iu.exec(t)?.[1]?.trim()||null}function isWeatherPayload(e){return bootstrapWeatherPayloadSchema.safeParse(e).success}function isRecord(e){return typeof e==`object`&&!!e&&!Array.isArray(e)}export{
|
|
5
|
+
`);return/hmr-probe:\s*([^\n]+)/iu.exec(t)?.[1]?.trim()||null}function isWeatherPayload(e){return bootstrapWeatherPayloadSchema.safeParse(e).success}function isRecord(e){return typeof e==`object`&&!!e&&!Array.isArray(e)}export{createMockAuthoredRuntimeModel,resolveMockAuthoredRuntimeModel,shouldMockAuthoredRuntimeModels};
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import type { LanguageModel } from "ai";
|
|
2
2
|
import type { RuntimeCompiledArtifactsSource } from "#runtime/compiled-artifacts-source.js";
|
|
3
3
|
import type { RuntimeModelReference } from "#runtime/agent/bootstrap.js";
|
|
4
|
-
import {
|
|
5
|
-
export {
|
|
4
|
+
import { shouldMockAuthoredRuntimeModels } from "#runtime/agent/mock-model-adapter.js";
|
|
5
|
+
export { shouldMockAuthoredRuntimeModels };
|
|
6
6
|
/**
|
|
7
7
|
* Resolves one runtime model reference into the active language model.
|
|
8
8
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{expectObjectRecord,getAuthoredModuleExport,materializeAuthoredModuleExport}from"#internal/authored-module.js";import{ROOT_COMPILED_AGENT_NODE_ID}from"#compiler/manifest.js";import{normalizeAgentDefinition}from"#internal/authored-definition/core.js";import{loadCompiledModuleMap}from"#runtime/loaders/module-map.js";import{resolveBootstrapRuntimeModel}from"#runtime/agent/bootstrap-model.js";import{
|
|
1
|
+
import{expectObjectRecord,getAuthoredModuleExport,materializeAuthoredModuleExport}from"#internal/authored-module.js";import{ROOT_COMPILED_AGENT_NODE_ID}from"#compiler/manifest.js";import{normalizeAgentDefinition}from"#internal/authored-definition/core.js";import{loadCompiledModuleMap}from"#runtime/loaders/module-map.js";import{resolveBootstrapRuntimeModel}from"#runtime/agent/bootstrap-model.js";import{resolveMockAuthoredRuntimeModel,shouldMockAuthoredRuntimeModels}from"#runtime/agent/mock-model-adapter.js";async function resolveRuntimeModelReference(e,t={}){let n=resolveBootstrapRuntimeModel(e);if(n!==null)return n;let r=resolveMockAuthoredRuntimeModel(e);return r===null?isSourceBackedRuntimeModelReference(e)?await loadSourceBackedRuntimeModelReference(e,t):e.id:r}async function loadSourceBackedRuntimeModelReference(i,a){if(a.compiledArtifactsSource===void 0)throw Error(`Expected an explicit compiled artifact source to resolve the authored runtime model "${i.id}".`);let o=(await loadCompiledModuleMap({compiledArtifactsSource:a.compiledArtifactsSource})).nodes[ROOT_COMPILED_AGENT_NODE_ID]?.modules[i.source.sourceId],s=normalizeAgentDefinition(await materializeAuthoredModuleExport(getAuthoredModuleExport(expectObjectRecord(o,`Missing compiled agent config module for runtime model "${i.id}" at "${i.source.logicalPath}".`),i.source)),`Expected the authored agent config export "${i.source.exportName??`default`}" from "${i.source.logicalPath}" to match the public Eve shape.`).model;if(s===void 0)throw Error(`Expected the authored agent config export "${i.source.exportName??`default`}" from "${i.source.logicalPath}" to provide a runtime model.`);return s}function isSourceBackedRuntimeModelReference(e){return e.source!==void 0}export{resolveRuntimeModelReference,shouldMockAuthoredRuntimeModels};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{getPackageManagerStrategy}from"../../primitives/pm/index.js";import{pathExists,writeTextFile}from"../files.js";import{patchPackageJson}from"../update/package-json.js";import{resolveVersionToken}from"../version-tokens.js";import{agentTemplateFiles,formatEveDependencySpecifier}from"./project.js";import{join}from"node:path";import{readFile}from"node:fs/promises";const DEPENDENCY_FIELDS=[`dependencies`,`devDependencies`,`optionalDependencies`,`peerDependencies`];function isJsonObject(e){return typeof e==`object`&&!!e&&!Array.isArray(e)}function hasDeclaredDependency(e,t){if(!isJsonObject(e))return!1;for(let n of DEPENDENCY_FIELDS){let r=e[n];if(isJsonObject(r)&&typeof r[t]==`string`)return!0}return!1}async function addAgentToProject(i){let a=join(i.projectRoot,`package.json`);if(!await pathExists(a))throw Error(`Cannot add an Eve agent to "${i.projectRoot}" because it has no package.json. Run \`eve init <name>\` to create a new project instead.`);let o=agentTemplateFiles(i.model),s=[];for(let e of Object.keys(o))await pathExists(join(i.projectRoot,e))&&s.push(e);if(s.length===0&&await pathExists(join(i.projectRoot,`agent`))&&s.push(`agent/`),s.length>0)throw Error(`Cannot add an Eve agent to "${i.projectRoot}" because it already has: ${s.join(`, `)}. Move them aside first.`);let c=resolveVersionToken(`evePackageVersion`,i.evePackageVersion??`0.7.
|
|
1
|
+
import{getPackageManagerStrategy}from"../../primitives/pm/index.js";import{pathExists,writeTextFile}from"../files.js";import{patchPackageJson}from"../update/package-json.js";import{resolveVersionToken}from"../version-tokens.js";import{agentTemplateFiles,formatEveDependencySpecifier}from"./project.js";import{join}from"node:path";import{readFile}from"node:fs/promises";const DEPENDENCY_FIELDS=[`dependencies`,`devDependencies`,`optionalDependencies`,`peerDependencies`];function isJsonObject(e){return typeof e==`object`&&!!e&&!Array.isArray(e)}function hasDeclaredDependency(e,t){if(!isJsonObject(e))return!1;for(let n of DEPENDENCY_FIELDS){let r=e[n];if(isJsonObject(r)&&typeof r[t]==`string`)return!0}return!1}async function addAgentToProject(i){let a=join(i.projectRoot,`package.json`);if(!await pathExists(a))throw Error(`Cannot add an Eve agent to "${i.projectRoot}" because it has no package.json. Run \`eve init <name>\` to create a new project instead.`);let o=agentTemplateFiles(i.model),s=[];for(let e of Object.keys(o))await pathExists(join(i.projectRoot,e))&&s.push(e);if(s.length===0&&await pathExists(join(i.projectRoot,`agent`))&&s.push(`agent/`),s.length>0)throw Error(`Cannot add an Eve agent to "${i.projectRoot}" because it already has: ${s.join(`, `)}. Move them aside first.`);let c=resolveVersionToken(`evePackageVersion`,i.evePackageVersion??`0.7.3`),l=resolveVersionToken(`aiPackageVersion`,i.aiPackageVersion??`7.0.0-canary.171`),u=resolveVersionToken(`zodPackageVersion`,i.zodPackageVersion??`4.4.3`),d=[];for(let[e,t]of Object.entries(o)){let r=join(i.projectRoot,e);await writeTextFile(r,t),d.push(r)}let f=JSON.parse(await readFile(a,`utf8`)),p={ai:l,eve:formatEveDependencySpecifier(c),zod:u},m={};for(let[e,t]of Object.entries(p))hasDeclaredDependency(f,e)||(m[e]=t);return Object.keys(m).length>0&&await patchPackageJson(a,{dependencies:m}),await getPackageManagerStrategy(i.packageManager??`pnpm`).applyProjectConfiguration(i.projectRoot),{filesWritten:d,dependenciesAdded:Object.keys(m).sort()}}export{addAgentToProject};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import{getPackageManagerStrategy}from"../../primitives/pm/index.js";import{pathExists,writeTextFile}from"../files.js";import{resolveVersionToken}from"../version-tokens.js";import{SUPPORTED_AUTHORED_MODULE_FILE_EXTENSIONS}from"../update/module-files.js";import{WEB_APP_TEMPLATE_FILES}from"./web-template.js";import{basename,join,resolve}from"node:path";import{mkdir,readdir,stat}from"node:fs/promises";const CURRENT_DIRECTORY_PROJECT_NAME=`.`,ALLOWED_CREATE_IN_PLACE_ENTRIES=new Set([`.DS_Store`,`.git`,`.gitkeep`,`.hg`]),DEFAULT_EVE_PACKAGE_VERSION=`0.7.
|
|
1
|
+
import{getPackageManagerStrategy}from"../../primitives/pm/index.js";import{pathExists,writeTextFile}from"../files.js";import{resolveVersionToken}from"../version-tokens.js";import{SUPPORTED_AUTHORED_MODULE_FILE_EXTENSIONS}from"../update/module-files.js";import{WEB_APP_TEMPLATE_FILES}from"./web-template.js";import{basename,join,resolve}from"node:path";import{mkdir,readdir,stat}from"node:fs/promises";const CURRENT_DIRECTORY_PROJECT_NAME=`.`,ALLOWED_CREATE_IN_PLACE_ENTRIES=new Set([`.DS_Store`,`.git`,`.gitkeep`,`.hg`]),DEFAULT_EVE_PACKAGE_VERSION=`0.7.3`,DEFAULT_AI_PACKAGE_VERSION=`7.0.0-canary.171`,DEFAULT_ZOD_PACKAGE_VERSION=`4.4.3`;function modelProviderSlug(e){let t=(e.split(`/`)[0]??``).replaceAll(/[^A-Za-z0-9._-]/gu,``);return t.length>0?t:`anthropic`}function byokProviderEnvVar(e){let t=modelProviderSlug(e).toUpperCase().replaceAll(/[^A-Z0-9]/gu,`_`);return`${/^[0-9]/.test(t)?`_`:``}${t}_API_KEY`}function agentTemplateFiles(e){return{"agent/agent.ts":BASE_AGENT_TEMPLATE.replaceAll(`__EVE_INIT_MODEL__`,e),"agent/channels/eve.ts":WEB_APP_TEMPLATE_FILES[`agent/channels/eve.ts`],"agent/instructions.md":AGENT_INSTRUCTIONS_TEMPLATE}}function renderTemplate(e,t){return e.replaceAll(`__EVE_INIT_APP_NAME__`,t.appName).replaceAll(`__EVE_INIT_MODEL__`,t.model).replaceAll(`__EVE_INIT_BYOK_PROVIDER__`,modelProviderSlug(t.model)).replaceAll(`__EVE_INIT_BYOK_ENV_VAR__`,byokProviderEnvVar(t.model)).replaceAll(`__EVE_INIT_PACKAGE_VERSION__`,formatEveDependencySpecifier(t.evePackageVersion)).replaceAll(`__EVE_INIT_AI_SDK_VERSION__`,t.aiPackageVersion).replaceAll(`__EVE_INIT_ZOD_VERSION__`,t.zodPackageVersion).replaceAll(`__EVE_INIT_TSGO_VERSION__`,t.tsgoPackageVersion).replaceAll(`__EVE_INIT_TYPES_NODE_VERSION__`,t.typesNodePackageVersion)}function formatEveDependencySpecifier(e){return/^\d+\.\d+\.\d+(?:[-+][0-9A-Za-z-.]+)?$/.test(e)?`^${e}`:e}const BASE_AGENT_TEMPLATE=`import { defineAgent } from "eve";
|
|
2
2
|
|
|
3
3
|
export default defineAgent({
|
|
4
4
|
model: "__EVE_INIT_MODEL__",
|
|
@@ -77,4 +77,4 @@ export default defineAgent({
|
|
|
77
77
|
},
|
|
78
78
|
},
|
|
79
79
|
});
|
|
80
|
-
`:BASE_AGENT_TEMPLATE,...SHARED_TEMPLATE_FILES,"package.json":packageJsonTemplate(t),...getPackageManagerStrategy(n).scaffoldFiles}}async function assertCanCreateInPlace(e,n){if(!await pathExists(e))return;let r=(await readdir(e)).filter(e=>!ALLOWED_CREATE_IN_PLACE_ENTRIES.has(e));if(r.length>0&&!n){let e=r.slice(0,5).join(`, `),t=r.length>5?`, and ${r.length-5} more`:``;throw Error(`Cannot create project in current directory because it is not empty. Found: ${e}${t}. Use an empty directory.`)}}async function scaffoldBaseProject(e){let i=resolve(e.targetDirectory??process.cwd(),e.projectName),a=e.projectName===`.`,s=e.overwriteExisting??!1,u=e.byokProvider??!1,d=e.packageManager??`pnpm`;if(a)await assertCanCreateInPlace(i,s);else if(await pathExists(i))throw Error(`Cannot create project because "${i}" already exists.`);let f=e.typesNodePackageVersion??`25.9.1`,p={appName:basename(i),model:e.model,evePackageVersion:resolveVersionToken(`evePackageVersion`,e.evePackageVersion??`0.7.
|
|
80
|
+
`:BASE_AGENT_TEMPLATE,...SHARED_TEMPLATE_FILES,"package.json":packageJsonTemplate(t),...getPackageManagerStrategy(n).scaffoldFiles}}async function assertCanCreateInPlace(e,n){if(!await pathExists(e))return;let r=(await readdir(e)).filter(e=>!ALLOWED_CREATE_IN_PLACE_ENTRIES.has(e));if(r.length>0&&!n){let e=r.slice(0,5).join(`, `),t=r.length>5?`, and ${r.length-5} more`:``;throw Error(`Cannot create project in current directory because it is not empty. Found: ${e}${t}. Use an empty directory.`)}}async function scaffoldBaseProject(e){let i=resolve(e.targetDirectory??process.cwd(),e.projectName),a=e.projectName===`.`,s=e.overwriteExisting??!1,u=e.byokProvider??!1,d=e.packageManager??`pnpm`;if(a)await assertCanCreateInPlace(i,s);else if(await pathExists(i))throw Error(`Cannot create project because "${i}" already exists.`);let f=e.typesNodePackageVersion??`25.9.1`,p={appName:basename(i),model:e.model,evePackageVersion:resolveVersionToken(`evePackageVersion`,e.evePackageVersion??`0.7.3`),aiPackageVersion:resolveVersionToken(`aiPackageVersion`,e.aiPackageVersion??`7.0.0-canary.171`),zodPackageVersion:resolveVersionToken(`zodPackageVersion`,e.zodPackageVersion??`4.4.3`),tsgoPackageVersion:resolveVersionToken(`tsgoPackageVersion`,e.tsgoPackageVersion??`7.0.0-dev.20260523.1`),typesNodePackageVersion:u?resolveVersionToken(`typesNodePackageVersion`,f):f};await mkdir(i,{recursive:!0});for(let[r,o]of Object.entries(templateFiles(u,d))){let c=`${i}/${r}`,l=await pathExists(c);await writeTextFile(c,renderTemplate(o,p),{force:a&&s}),l&&await e.onOverwriteFile?.(c)}return i}async function isEveProject(e){for(let t of SUPPORTED_AUTHORED_MODULE_FILE_EXTENSIONS)try{return await stat(join(e,`agent`,`agent${t}`)),!0}catch{}return!1}export{CURRENT_DIRECTORY_PROJECT_NAME,DEFAULT_AI_PACKAGE_VERSION,DEFAULT_EVE_PACKAGE_VERSION,DEFAULT_ZOD_PACKAGE_VERSION,agentTemplateFiles,byokProviderEnvVar,formatEveDependencySpecifier,isEveProject,modelProviderSlug,scaffoldBaseProject};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import{getPackageManagerStrategy}from"../../primitives/pm/index.js";import{pathExists,writeTextFile}from"../files.js";import{patchPackageJson}from"./package-json.js";import{resolveVersionToken}from"../version-tokens.js";import{getSupportedModuleBaseName,matchesSupportedModuleBaseName}from"./module-files.js";import{WEB_APP_TEMPLATE_FILES,WEB_APP_TEMPLATE_PACKAGE_JSON}from"../create/web-template.js";import"../create/project.js";import{basename,join,resolve}from"node:path";import{readFile,readdir,writeFile}from"node:fs/promises";const SLACK_CHANNEL_DEFAULT_ROUTE=`/eve/v1/slack`,DEFAULT_SLACK_CONNECTOR_SLUG=`my-agent`,PACKAGE_DEPENDENCY_FIELDS=[`dependencies`,`devDependencies`,`peerDependencies`,`optionalDependencies`],WEB_NEXT_CONFIG_PATH=`next.config.ts`,WEB_VERCEL_JSON_PATH=`vercel.json`,WEB_VERCEL_JSON_SCHEMA=`https://openapi.vercel.sh/vercel.json`,WEB_COMPETING_NEXT_CONFIG_PATHS=[`next.config.js`,`next.config.mjs`,WEB_NEXT_CONFIG_PATH,`next.config.mts`].filter(e=>e!==WEB_NEXT_CONFIG_PATH),WEB_DEFAULT_VERCEL_SERVICES={web:{entrypoint:`.`,framework:`nextjs`,routePrefix:`/`},eve:{buildCommand:`eve build`,entrypoint:`.`,framework:`eve`,routePrefix:`/_eve_internal/eve`}};function toSlackConnectorSlug(e){return e}function isJsonObject(e){return typeof e==`object`&&!!e&&!Array.isArray(e)}async function readDependencyVersion(e,t){let n=JSON.parse(await readFile(e,`utf8`));if(!isJsonObject(n)||!isJsonObject(n.dependencies))return;let r=n.dependencies[t];return typeof r==`string`?r:void 0}function packageJsonHasDependency(e,t){for(let n of PACKAGE_DEPENDENCY_FIELDS){let r=e[n];if(isJsonObject(r)&&typeof r[t]==`string`)return!0}return!1}async function hasPackageDependency(e,n){if(!await pathExists(e))return!1;let r=JSON.parse(await readFile(e,`utf8`));return isJsonObject(r)&&packageJsonHasDependency(r,n)}async function isNextJsProject(e){return hasPackageDependency(join(e,`package.json`),`next`)}async function ensurePackageDependency(e,n,i){return!await pathExists(e)||await readDependencyVersion(e,n)===i?[]:(await patchPackageJson(e,{dependencies:{[n]:i}}),[{path:e,dependencies:[n],devDependencies:[],scripts:[]}])}function resolveWebPackageVersions(e){return{evePackageVersion:e?.evePackageVersion??`0.7.
|
|
1
|
+
import{getPackageManagerStrategy}from"../../primitives/pm/index.js";import{pathExists,writeTextFile}from"../files.js";import{patchPackageJson}from"./package-json.js";import{resolveVersionToken}from"../version-tokens.js";import{getSupportedModuleBaseName,matchesSupportedModuleBaseName}from"./module-files.js";import{WEB_APP_TEMPLATE_FILES,WEB_APP_TEMPLATE_PACKAGE_JSON}from"../create/web-template.js";import"../create/project.js";import{basename,join,resolve}from"node:path";import{readFile,readdir,writeFile}from"node:fs/promises";const SLACK_CHANNEL_DEFAULT_ROUTE=`/eve/v1/slack`,DEFAULT_SLACK_CONNECTOR_SLUG=`my-agent`,PACKAGE_DEPENDENCY_FIELDS=[`dependencies`,`devDependencies`,`peerDependencies`,`optionalDependencies`],WEB_NEXT_CONFIG_PATH=`next.config.ts`,WEB_VERCEL_JSON_PATH=`vercel.json`,WEB_VERCEL_JSON_SCHEMA=`https://openapi.vercel.sh/vercel.json`,WEB_COMPETING_NEXT_CONFIG_PATHS=[`next.config.js`,`next.config.mjs`,WEB_NEXT_CONFIG_PATH,`next.config.mts`].filter(e=>e!==WEB_NEXT_CONFIG_PATH),WEB_DEFAULT_VERCEL_SERVICES={web:{entrypoint:`.`,framework:`nextjs`,routePrefix:`/`},eve:{buildCommand:`eve build`,entrypoint:`.`,framework:`eve`,routePrefix:`/_eve_internal/eve`}};function toSlackConnectorSlug(e){return e}function isJsonObject(e){return typeof e==`object`&&!!e&&!Array.isArray(e)}async function readDependencyVersion(e,t){let n=JSON.parse(await readFile(e,`utf8`));if(!isJsonObject(n)||!isJsonObject(n.dependencies))return;let r=n.dependencies[t];return typeof r==`string`?r:void 0}function packageJsonHasDependency(e,t){for(let n of PACKAGE_DEPENDENCY_FIELDS){let r=e[n];if(isJsonObject(r)&&typeof r[t]==`string`)return!0}return!1}async function hasPackageDependency(e,n){if(!await pathExists(e))return!1;let r=JSON.parse(await readFile(e,`utf8`));return isJsonObject(r)&&packageJsonHasDependency(r,n)}async function isNextJsProject(e){return hasPackageDependency(join(e,`package.json`),`next`)}async function ensurePackageDependency(e,n,i){return!await pathExists(e)||await readDependencyVersion(e,n)===i?[]:(await patchPackageJson(e,{dependencies:{[n]:i}}),[{path:e,dependencies:[n],devDependencies:[],scripts:[]}])}function resolveWebPackageVersions(e){return{evePackageVersion:e?.evePackageVersion??`0.7.3`,aiPackageVersion:e?.aiPackageVersion??`7.0.0-canary.171`,nextPackageVersion:e?.nextPackageVersion??`16.2.6`,reactPackageVersion:e?.reactPackageVersion??`19.2.6`,reactDomPackageVersion:e?.reactDomPackageVersion??`19.2.6`,streamdownPackageVersion:e?.streamdownPackageVersion??`2.5.0`,zodPackageVersion:e?.zodPackageVersion??`4.4.3`,tsgoPackageVersion:e?.tsgoPackageVersion??`7.0.0-dev.20260523.1`,typesNodePackageVersion:e?.typesNodePackageVersion??`25.9.1`,typesReactPackageVersion:e?.typesReactPackageVersion??`19.2.15`,typesReactDomPackageVersion:e?.typesReactDomPackageVersion??`19.2.3`}}function formatEveDependencySpecifier(e){return/^\d+\.\d+\.\d+(?:[-+][0-9A-Za-z-.]+)?$/.test(e)?`^${e}`:e}async function patchWebPackageJson(e,n){if(!await pathExists(e))return[];let a={...WEB_APP_TEMPLATE_PACKAGE_JSON.dependencies,ai:resolveVersionToken(`aiPackageVersion`,n.aiPackageVersion),eve:formatEveDependencySpecifier(resolveVersionToken(`evePackageVersion`,n.evePackageVersion)),next:resolveVersionToken(`nextPackageVersion`,n.nextPackageVersion),react:resolveVersionToken(`reactPackageVersion`,n.reactPackageVersion),"react-dom":resolveVersionToken(`reactDomPackageVersion`,n.reactDomPackageVersion),streamdown:resolveVersionToken(`streamdownPackageVersion`,n.streamdownPackageVersion),zod:resolveVersionToken(`zodPackageVersion`,n.zodPackageVersion)},o={...WEB_APP_TEMPLATE_PACKAGE_JSON.devDependencies,"@types/node":resolveVersionToken(`typesNodePackageVersion`,n.typesNodePackageVersion),"@types/react":resolveVersionToken(`typesReactPackageVersion`,n.typesReactPackageVersion),"@types/react-dom":resolveVersionToken(`typesReactDomPackageVersion`,n.typesReactDomPackageVersion),"@typescript/native-preview":resolveVersionToken(`tsgoPackageVersion`,n.tsgoPackageVersion)},s=WEB_APP_TEMPLATE_PACKAGE_JSON.scripts;return await patchPackageJson(e,{dependencies:a,devDependencies:o,scripts:s}),[{path:e,dependencies:Object.keys(a),devDependencies:Object.keys(o),scripts:Object.keys(s)}]}function normalizeSlackConnectorSlug(e){return toSlackConnectorSlug((e.trim().replace(/^@/,``).split(`/`).at(-1)??``).toLowerCase().replace(/[^a-z0-9_-]+/g,`-`).replace(/^[^a-z0-9]+/,``).replace(/[^a-z0-9]+$/,``).slice(0,100).replace(/[^a-z0-9]+$/,``)||`my-agent`)}async function deriveSlackConnectorSlug(e,t){if(t!==void 0&&t.length>0&&t!==`.`)return normalizeSlackConnectorSlug(t);try{let t=await readFile(join(e,`package.json`),`utf8`),n=JSON.parse(t);if(typeof n.name==`string`&&n.name.length>0)return normalizeSlackConnectorSlug(n.name)}catch{}return normalizeSlackConnectorSlug(basename(resolve(e))||`my-agent`)}function buildSlackTemplate(e){return`import { connectSlackCredentials } from "@vercel/connect/eve";
|
|
2
2
|
import { slackChannel } from "eve/channels/slack";
|
|
3
3
|
|
|
4
4
|
export default slackChannel({
|
package/package.json
CHANGED
|
@@ -1,3 +0,0 @@
|
|
|
1
|
-
import type { EveEvalRequirement, EveEvalTarget } from "#evals/types.js";
|
|
2
|
-
export declare function findUnmetRequirements(requirements: readonly EveEvalRequirement[], target: EveEvalTarget, env?: NodeJS.ProcessEnv): readonly EveEvalRequirement[];
|
|
3
|
-
export declare function formatUnmetRequirements(requirements: readonly EveEvalRequirement[]): string;
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
function findUnmetRequirements(e,t,n=process.env){let r=[];for(let i of new Set(e))requirementMet(i,t,n)||r.push(i);return r}function formatUnmetRequirements(e){return e.length===0?``:`Unmet eval requirement${e.length===1?``:`s`}: ${e.map(formatRequirement).join(`, `)}`}function requirementMet(e,t,n){if(e===`mockModels`)return t.capabilities.mockModels;if(e===`devRoutes`)return t.capabilities.devRoutes;let r=e.slice(4);return r.length>0&&n[r]!==void 0}function formatRequirement(e){return e===`mockModels`?`"mockModels" (local evals can enable this with --mock-models)`:e===`devRoutes`?`"devRoutes"`:`"${e}"`}export{findUnmetRequirements,formatUnmetRequirements};
|