experimental-ash 0.24.2 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/docs/public/typescript-api.md +0 -12
- package/dist/src/channel/adapter.d.ts +5 -18
- package/dist/src/channel/adapter.js +2 -8
- package/dist/src/channel/cross-channel-receive.d.ts +10 -1
- package/dist/src/channel/cross-channel-receive.js +21 -3
- package/dist/src/channel/routes.d.ts +2 -1
- package/dist/src/channel/routes.js +6 -3
- package/dist/src/channel/schedule.js +2 -2
- package/dist/src/channel/types.d.ts +1 -1
- package/dist/src/chunks/{client-DLHAGI2g.js → client-BShLWzR6.js} +3 -3
- package/dist/src/chunks/compile-agent-75wLLW-E.js +5 -0
- package/dist/src/chunks/dev-authored-source-watcher-DqoJsDup.js +1 -0
- package/dist/src/chunks/host-JVy7fewA.js +70 -0
- package/dist/src/chunks/paths-DNjq5JOy.js +85 -0
- package/dist/src/chunks/{token-D98SQdvs.js → token-BOkIxJeV.js} +1 -1
- package/dist/src/chunks/types-CjIyrcYo.js +1 -0
- package/dist/src/cli/commands/info.js +1 -1
- package/dist/src/cli/dev/environment.d.ts +0 -5
- package/dist/src/cli/dev/environment.js +1 -1
- package/dist/src/cli/dev/repl.d.ts +1 -1
- package/dist/src/cli/dev/repl.js +3 -3
- package/dist/src/cli/run.d.ts +0 -1
- package/dist/src/cli/run.js +2 -2
- package/dist/src/client/message-reducer.js +13 -25
- package/dist/src/client/message-response.d.ts +2 -1
- package/dist/src/client/open-stream.d.ts +3 -3
- package/dist/src/client/open-stream.js +1 -2
- package/dist/src/client/session.d.ts +2 -1
- package/dist/src/client/session.js +0 -3
- package/dist/src/client/types.d.ts +3 -2
- package/dist/src/compiler/artifacts.d.ts +7 -10
- package/dist/src/compiler/artifacts.js +3 -3
- package/dist/src/compiler/manifest.d.ts +6 -15
- package/dist/src/compiler/manifest.js +3 -3
- package/dist/src/compiler/normalize-agent-config.js +12 -10
- package/dist/src/compiler/normalize-manifest.js +3 -2
- package/dist/src/context/container.d.ts +1 -16
- package/dist/src/context/container.js +1 -24
- package/dist/src/context/hook-lifecycle.d.ts +2 -7
- package/dist/src/context/hook-lifecycle.js +0 -6
- package/dist/src/context/provider.d.ts +1 -11
- package/dist/src/context/providers/sandbox.js +4 -1
- package/dist/src/context/run-step.d.ts +2 -4
- package/dist/src/context/run-step.js +4 -17
- package/dist/src/context/seed-keys.d.ts +1 -1
- package/dist/src/discover/connections.d.ts +2 -1
- package/dist/src/discover/diagnostics.d.ts +0 -8
- package/dist/src/discover/diagnostics.js +4 -16
- package/dist/src/discover/discover-agent.d.ts +3 -13
- package/dist/src/discover/discover-agent.js +1 -11
- package/dist/src/discover/discover-subagent.d.ts +3 -2
- package/dist/src/discover/discover-subagent.js +1 -1
- package/dist/src/discover/filesystem.d.ts +0 -37
- package/dist/src/discover/filesystem.js +0 -115
- package/dist/src/discover/grammar.d.ts +10 -20
- package/dist/src/discover/grammar.js +11 -33
- package/dist/src/discover/lib.d.ts +3 -2
- package/dist/src/discover/manifest.d.ts +3 -3
- package/dist/src/discover/markdown.d.ts +2 -1
- package/dist/src/discover/sandbox.d.ts +2 -1
- package/dist/src/discover/schedules.d.ts +3 -2
- package/dist/src/discover/schedules.js +1 -1
- package/dist/src/discover/skills.d.ts +3 -2
- package/dist/src/discover/skills.js +1 -1
- package/dist/src/discover/slots.d.ts +3 -2
- package/dist/src/evals/cli/eval.d.ts +0 -6
- package/dist/src/evals/cli/eval.js +1 -1
- package/dist/src/evals/loaders/index.d.ts +2 -22
- package/dist/src/evals/loaders/index.js +1 -1
- package/dist/src/evals/reporters/index.d.ts +2 -14
- package/dist/src/evals/reporters/index.js +1 -1
- package/dist/src/evals/runner/discover.d.ts +0 -6
- package/dist/src/evals/runner/discover.js +1 -1
- package/dist/src/evals/runner/execute-case.d.ts +2 -1
- package/dist/src/evals/runner/execute-suite.d.ts +2 -1
- package/dist/src/evals/runner/reporters/braintrust.js +1 -1
- package/dist/src/evals/runner/resolve-git-metadata.d.ts +3 -3
- package/dist/src/evals/scorers/autoevals-client.d.ts +2 -2
- package/dist/src/execution/connection-auth-steps.d.ts +1 -5
- package/dist/src/execution/connection-auth-steps.js +4 -5
- package/dist/src/execution/node-step.d.ts +1 -1
- package/dist/src/execution/skills/instructions.d.ts +2 -1
- package/dist/src/execution/subagent-hitl-proxy.d.ts +1 -1
- package/dist/src/execution/subagent-tool.d.ts +0 -8
- package/dist/src/execution/subagent-tool.js +9 -16
- package/dist/src/execution/tool-compaction.js +0 -2
- package/dist/src/execution/turn-workflow.d.ts +1 -1
- package/dist/src/execution/turn-workflow.js +2 -2
- package/dist/src/execution/web-fetch/tool.js +1 -1
- package/dist/src/execution/workflow-steps.d.ts +17 -1
- package/dist/src/execution/workflow-steps.js +5 -6
- package/dist/src/harness/action-result-helpers.d.ts +0 -12
- package/dist/src/harness/action-result-helpers.js +1 -1
- package/dist/src/harness/emission.d.ts +2 -2
- package/dist/src/harness/execute-tool.d.ts +2 -1
- package/dist/src/harness/input-requests.d.ts +2 -1
- package/dist/src/harness/prompt-cache.d.ts +1 -9
- package/dist/src/harness/prompt-cache.js +0 -12
- package/dist/src/harness/runtime-actions.d.ts +2 -11
- package/dist/src/harness/runtime-actions.js +1 -1
- package/dist/src/harness/step-hooks.d.ts +3 -2
- package/dist/src/harness/step-hooks.js +3 -7
- package/dist/src/harness/tool-loop.js +0 -2
- package/dist/src/harness/types.d.ts +1 -1
- package/dist/src/internal/application/package.js +2 -2
- package/dist/src/internal/application/runtime-compiled-artifacts-source.js +0 -1
- package/dist/src/internal/attachments/errors.d.ts +8 -2
- package/dist/src/internal/attachments/url-refs.d.ts +0 -1
- package/dist/src/internal/attachments/url-refs.js +1 -1
- package/dist/src/internal/authored-definition/channel.d.ts +0 -5
- package/dist/src/internal/authored-definition/channel.js +1 -10
- package/dist/src/internal/authored-definition/sandbox.d.ts +2 -1
- package/dist/src/internal/authored-definition/schema-backed.d.ts +3 -2
- package/dist/src/internal/authored-module-loader.d.ts +0 -6
- package/dist/src/internal/authored-module-loader.js +0 -9
- package/dist/src/internal/authored-module.d.ts +0 -4
- package/dist/src/internal/authored-module.js +0 -10
- package/dist/src/internal/bundler/nitro-rolldown.d.ts +2 -10
- package/dist/src/internal/bundler/nitro-rolldown.js +1 -1
- package/dist/src/{public → internal}/helpers/markdown.d.ts +2 -23
- package/dist/src/{public → internal}/helpers/markdown.js +1 -1
- package/dist/src/internal/logging.d.ts +2 -8
- package/dist/src/internal/nitro/host/build-vercel-agent-summary.d.ts +1 -1
- package/dist/src/internal/nitro/host/build-vercel-agent-summary.js +1 -1
- package/dist/src/internal/nitro/host/create-application-nitro.js +1 -1
- package/dist/src/internal/nitro/host/dev-authored-source-watcher.js +2 -2
- package/dist/src/internal/nitro/routes/channel-dispatch.js +2 -2
- package/dist/src/internal/nitro/routes/runtime-artifacts.js +0 -1
- package/dist/src/internal/node-esm-compat-banner.d.ts +1 -1
- package/dist/src/internal/runtime-registry.d.ts +2 -1
- package/dist/src/{protocol → internal}/vercel-agent-summary.d.ts +1 -1
- package/dist/src/protocol/message.d.ts +0 -34
- package/dist/src/protocol/message.js +0 -30
- package/dist/src/public/channels/index.d.ts +1 -1
- package/dist/src/public/channels/index.js +1 -1
- package/dist/src/public/channels/slack/api.d.ts +1 -7
- package/dist/src/public/channels/slack/api.js +1 -1
- package/dist/src/public/channels/slack/connections.d.ts +2 -6
- package/dist/src/public/channels/slack/hitl.d.ts +3 -2
- package/dist/src/public/channels/slack/inbound.d.ts +0 -35
- package/dist/src/public/definitions/channel.d.ts +2 -2
- package/dist/src/public/definitions/channel.js +1 -1
- package/dist/src/public/definitions/defineChannel.d.ts +1 -1
- package/dist/src/public/definitions/defineChannel.js +1 -1
- package/dist/src/public/definitions/instructions.d.ts +0 -11
- package/dist/src/public/definitions/instructions.js +0 -5
- package/dist/src/public/definitions/tool.d.ts +3 -7
- package/dist/src/public/definitions/tool.js +1 -1
- package/dist/src/public/instructions/index.d.ts +1 -5
- package/dist/src/public/instructions/index.js +1 -3
- package/dist/src/public/tool-result-narrowing.d.ts +2 -1
- package/dist/src/react/use-ash-agent.d.ts +2 -1
- package/dist/src/react/use-ash-agent.js +1 -5
- package/dist/src/runtime/actions/types.d.ts +8 -50
- package/dist/src/runtime/actions/types.js +5 -21
- package/dist/src/runtime/agent/bootstrap-model-utils.d.ts +2 -1
- package/dist/src/runtime/agent/mock-model-adapter.js +1 -1
- package/dist/src/runtime/channels/registry.js +3 -8
- package/dist/src/runtime/compiled-artifacts-source.d.ts +4 -11
- package/dist/src/runtime/compiled-artifacts-source.js +3 -7
- package/dist/src/runtime/connections/authorization-tokens.d.ts +1 -14
- package/dist/src/runtime/connections/authorization-tokens.js +2 -28
- package/dist/src/runtime/connections/principal-context.d.ts +3 -2
- package/dist/src/runtime/connections/validate-authorization.d.ts +0 -11
- package/dist/src/runtime/connections/validate-authorization.js +0 -16
- package/dist/src/runtime/framework-channels/index.d.ts +0 -1
- package/dist/src/runtime/framework-channels/index.js +1 -1
- package/dist/src/runtime/framework-tools/index.d.ts +2 -1
- package/dist/src/runtime/framework-tools/index.js +2 -1
- package/dist/src/runtime/framework-tools/skill.d.ts +0 -13
- package/dist/src/runtime/framework-tools/skill.js +1 -1
- package/dist/src/runtime/governance/auth/token-claims.d.ts +1 -16
- package/dist/src/runtime/governance/auth/token-claims.js +3 -3
- package/dist/src/runtime/governance/auth/types.d.ts +0 -4
- package/dist/src/runtime/governance/network/ip-allow-list.d.ts +0 -17
- package/dist/src/runtime/governance/network/ip-allow-list.js +0 -39
- package/dist/src/runtime/hooks/registry.d.ts +7 -4
- package/dist/src/runtime/hooks/registry.js +4 -2
- package/dist/src/runtime/loaders/bundled-artifacts.d.ts +0 -5
- package/dist/src/runtime/loaders/bundled-artifacts.js +0 -7
- package/dist/src/runtime/loaders/compile-metadata.d.ts +3 -10
- package/dist/src/runtime/loaders/compile-metadata.js +6 -8
- package/dist/src/runtime/loaders/manifest.d.ts +2 -2
- package/dist/src/runtime/loaders/manifest.js +4 -5
- package/dist/src/runtime/loaders/module-map.d.ts +2 -2
- package/dist/src/runtime/loaders/module-map.js +3 -4
- package/dist/src/runtime/prompt/compose.d.ts +2 -1
- package/dist/src/runtime/resolve-agent-graph.d.ts +2 -15
- package/dist/src/runtime/resolve-agent-graph.js +1 -1
- package/dist/src/runtime/sandbox/keys.d.ts +2 -1
- package/dist/src/runtime/sandbox/registry.d.ts +3 -3
- package/dist/src/runtime/schedules/resolve-schedule.d.ts +3 -12
- package/dist/src/runtime/schedules/resolve-schedule.js +1 -1
- package/dist/src/runtime/sessions/auth.d.ts +2 -13
- package/dist/src/runtime/sessions/auth.js +1 -11
- package/dist/src/runtime/sessions/compiled-agent-cache.js +1 -1
- package/dist/src/runtime/sessions/runtime-session.d.ts +0 -6
- package/dist/src/runtime/sessions/runtime-session.js +0 -13
- package/dist/src/runtime/sessions/turn.d.ts +2 -2
- package/dist/src/runtime/subagents/registry.d.ts +2 -5
- package/dist/src/runtime/subagents/registry.js +0 -6
- package/dist/src/runtime/tools/registry.d.ts +2 -1
- package/dist/src/runtime/tools/registry.js +1 -4
- package/dist/src/runtime/types.d.ts +4 -3
- package/dist/src/runtime/workspace/seed-files.d.ts +2 -1
- package/dist/src/services/dev-client/request-headers.d.ts +37 -8
- package/dist/src/services/dev-client/request-headers.js +71 -46
- package/dist/src/services/dev-client/stream.d.ts +0 -19
- package/dist/src/services/dev-client/stream.js +0 -37
- package/dist/src/services/dev-client/url.d.ts +3 -7
- package/dist/src/services/dev-client/url.js +4 -10
- package/dist/src/services/dev-client.d.ts +0 -12
- package/dist/src/services/dev-client.js +6 -92
- package/dist/src/shared/agent-definition.d.ts +1 -1
- package/dist/src/shared/json.d.ts +0 -4
- package/dist/src/shared/json.js +0 -38
- package/dist/src/shared/skill-definition.d.ts +0 -2
- package/package.json +1 -6
- package/dist/src/chunks/dev-authored-source-watcher-CBID_Dwh.js +0 -1
- package/dist/src/chunks/host-zBy9FyyX.js +0 -70
- package/dist/src/chunks/package-HUaeub_D.js +0 -1
- package/dist/src/chunks/paths-CebY5GCi.js +0 -89
- package/dist/src/chunks/types-DDA2QUED.js +0 -1
- package/dist/src/compiler/resource-files.d.ts +0 -19
- package/dist/src/compiler/resource-files.js +0 -28
- package/dist/src/execution/subagent-invocation.d.ts +0 -19
- package/dist/src/execution/subagent-invocation.js +0 -17
- package/dist/src/execution/task-mode.d.ts +0 -9
- package/dist/src/execution/task-mode.js +0 -12
- package/dist/src/execution/types.d.ts +0 -20
- package/dist/src/execution/types.js +0 -1
- package/dist/src/internal/logical-paths.d.ts +0 -13
- package/dist/src/internal/logical-paths.js +0 -25
- package/dist/src/runtime/sessions/messages.d.ts +0 -140
- package/dist/src/runtime/sessions/messages.js +0 -170
- package/dist/src/runtime/standard-schema.d.ts +0 -27
- package/dist/src/runtime/standard-schema.js +0 -64
- package/dist/src/services/dev-client/live-stream.d.ts +0 -35
- package/dist/src/services/dev-client/live-stream.js +0 -157
- package/dist/src/services/dev-client/send-message.d.ts +0 -24
- package/dist/src/services/dev-client/send-message.js +0 -185
- package/dist/src/services/dev-client/session.d.ts +0 -54
- package/dist/src/services/dev-client/session.js +0 -52
- package/dist/src/services/host.d.ts +0 -14
- package/dist/src/services/host.js +0 -13
- /package/dist/src/{package-name.d.ts → internal/package-name.d.ts} +0 -0
- /package/dist/src/{package-name.js → internal/package-name.js} +0 -0
- /package/dist/src/{protocol → internal}/vercel-agent-summary.js +0 -0
- /package/dist/src/{run-mode.d.ts → shared/run-mode.d.ts} +0 -0
- /package/dist/src/{run-mode.js → shared/run-mode.js} +0 -0
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import type { Command } from "commander";
|
|
2
1
|
interface EvalCliOptions {
|
|
3
2
|
suite?: string[];
|
|
4
3
|
url?: string;
|
|
@@ -6,17 +5,12 @@ interface EvalCliOptions {
|
|
|
6
5
|
maxConcurrency?: string;
|
|
7
6
|
json?: boolean;
|
|
8
7
|
all?: boolean;
|
|
9
|
-
listSuites?: boolean;
|
|
10
8
|
skipReport?: boolean;
|
|
11
9
|
}
|
|
12
10
|
type EvalCliLogger = {
|
|
13
11
|
log(message: string): void;
|
|
14
12
|
error(message: string): void;
|
|
15
13
|
};
|
|
16
|
-
/**
|
|
17
|
-
* Registers the `ash eval` command on the given Commander program.
|
|
18
|
-
*/
|
|
19
|
-
export declare function registerEvalCommand(program: Command, logger: EvalCliLogger): void;
|
|
20
14
|
/**
|
|
21
15
|
* Runs the `ash eval` command with already-parsed Commander options.
|
|
22
16
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{n as e}from"../../chunks/paths-
|
|
1
|
+
import{n as e}from"../../chunks/paths-DNjq5JOy.js";import{loadDevelopmentEnvironmentFiles as t}from"../../cli/dev/environment.js";import{n,s as r,t as i}from"../../chunks/client-BShLWzR6.js";import{n as a}from"../../chunks/host-JVy7fewA.js";import{discoverAndImportSuites as o}from"../runner/discover.js";import{executeSuite as s}from"../runner/execute-suite.js";import{ConsoleReporter as c}from"../runner/reporters/console.js";var l=r();async function u(n,r){let i=e();t(i);let c=n.suite,l=await o(i,c);if(l.length===0){c&&c.length>0?r.error(`No suites found matching: ${c.join(`, `)}`):r.error(`No eval suites found. Create suite files under evals/ with the *.eval.ts extension.`),process.exitCode=1;return}let u,f;n.url?f={kind:`remote`,url:n.url}:(u=await a(i,{host:`127.0.0.1`,port:0}),f={kind:`local`,url:u.url});let p=d(f);try{let e=[];for(let t of l){let r=m(t,n),a=h(r,{json:n.json===!0,skipReport:n.skipReport===!0}),o=await s({suite:r,target:f,reporters:a,appRoot:i,client:p});e.push(o)}n.json&&r.log(JSON.stringify(e,null,2)),e.some(e=>e.errored>0)&&(process.exitCode=1)}finally{u&&await u.close()}process.exit(process.exitCode??0)}function d(e){if(e.kind===`local`)return new i({host:e.url});let t={},r=process.env.VERCEL_AUTOMATION_BYPASS_SECRET?.trim();return r&&(t[n]=r),new i({auth:f(),headers:Object.keys(t).length>0?t:void 0,host:e.url})}function f(){let e=process.env.ASH_EVAL_AUTH_TOKEN?.trim();return e?{bearer:e}:{bearer:p}}async function p(){try{let e=(await(0,l.getVercelOidcToken)()).trim();if(e.length>0)return e}catch{}return process.env.VERCEL_OIDC_TOKEN?.trim()??``}function m(e,t){let n=t.maxConcurrency?Number.parseInt(t.maxConcurrency,10):void 0,r=t.timeout?Number.parseInt(t.timeout,10):void 0;if(n===void 0&&r===void 0)return e;let i={...e};return n!==void 0&&(i.maxConcurrency=n),r!==void 0&&(i.timeoutMs=r),i}function h(e,t){let n=t.json?[]:[new c];return!t.skipReport&&e.reporters&&n.push(...e.reporters),n}export{u as runEvalCommand};
|
|
@@ -1,22 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
*
|
|
4
|
-
* @example
|
|
5
|
-
* ```ts
|
|
6
|
-
* import { loadJson } from "experimental-ash/evals/loaders";
|
|
7
|
-
*
|
|
8
|
-
* const data = await loadJson("evals/data/cases.json");
|
|
9
|
-
* ```
|
|
10
|
-
*/
|
|
11
|
-
export declare function loadJson(filePath: string): Promise<unknown>;
|
|
12
|
-
/**
|
|
13
|
-
* Loads and parses a YAML file.
|
|
14
|
-
*
|
|
15
|
-
* @example
|
|
16
|
-
* ```ts
|
|
17
|
-
* import { loadYaml } from "experimental-ash/evals/loaders";
|
|
18
|
-
*
|
|
19
|
-
* const doc = await loadYaml("evals/data/cases.yaml");
|
|
20
|
-
* ```
|
|
21
|
-
*/
|
|
22
|
-
export declare function loadYaml(filePath: string): Promise<Record<string, unknown>>;
|
|
1
|
+
export { loadJson } from "#evals/loaders/json.js";
|
|
2
|
+
export { loadYaml } from "#evals/loaders/yaml.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{loadJson as e}from"./json.js";import{loadYaml as t}from"./yaml.js";
|
|
1
|
+
import{loadJson as e}from"./json.js";import{loadYaml as t}from"./yaml.js";export{e as loadJson,t as loadYaml};
|
|
@@ -1,14 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Creates a Braintrust reporter for use in a suite's `reporters` array.
|
|
5
|
-
*
|
|
6
|
-
* @example
|
|
7
|
-
* ```ts
|
|
8
|
-
* import { Braintrust } from "experimental-ash/evals/reporters";
|
|
9
|
-
*
|
|
10
|
-
* Braintrust({ projectName: "My Project" });
|
|
11
|
-
* ```
|
|
12
|
-
*/
|
|
13
|
-
export declare function Braintrust(config?: BraintrustReporterConfig): EvalReporter;
|
|
14
|
-
export type { BraintrustReporterConfig, EvalReporter };
|
|
1
|
+
export { Braintrust, type BraintrustReporterConfig } from "#evals/runner/reporters/braintrust.js";
|
|
2
|
+
export type { EvalReporter } from "#evals/runner/reporters/types.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{Braintrust as e}from"../runner/reporters/braintrust.js";
|
|
1
|
+
import{Braintrust as e}from"../runner/reporters/braintrust.js";export{e as Braintrust};
|
|
@@ -6,12 +6,6 @@ import type { AshEvalSuite } from "#evals/types.js";
|
|
|
6
6
|
* Returns absolute paths sorted alphabetically by relative path.
|
|
7
7
|
*/
|
|
8
8
|
export declare function discoverSuiteFiles(appRoot: string): Promise<string[]>;
|
|
9
|
-
/**
|
|
10
|
-
* Derives the canonical suite id from one absolute eval-suite file path.
|
|
11
|
-
*
|
|
12
|
-
* `<appRoot>/evals/sub/weather.eval.ts` → `"sub/weather"`.
|
|
13
|
-
*/
|
|
14
|
-
export declare function deriveSuiteId(appRoot: string, filePath: string): string;
|
|
15
9
|
/**
|
|
16
10
|
* Imports a discovered suite file and stamps the path-derived id onto
|
|
17
11
|
* the suite definition.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{t as e}from"../../chunks/authored-module-loader-DcCfCiBm.js";import{join as t,relative as n}from"node:path";import{readdir as r}from"node:fs/promises";const i=`.eval.ts`;async function a(e){let r=t(e,`evals`),i=[];try{await u(r,i)}catch(e){if(d(e))return[];throw e}return i.sort((e,t)=>n(r,e).localeCompare(n(r,t))),i}function o(e,r){let a=n(t(e,`evals`),r).split(/[\\/]/u).join(`/`);return a.endsWith(i)?a.slice(0,-8):a}async function s(t,n){let r=(await e(n)).default;if(!l(r))throw Error(`Suite file "${n}" does not export a valid AshEvalSuite as its default export. Use defineEvalSuite() to create the suite.`);return{...r,id:o(t,n)}}async function c(e,t){let n=await a(e);if(n.length===0)return[];let r=[];for(let i of n){let n=await s(e,i);(t===void 0||t.length===0||t.includes(n.id))&&r.push(n)}return r}function l(e){return typeof e==`object`&&!!e&&`_tag`in e&&e._tag===`AshEvalSuite`}async function u(e,n){let a=await r(e,{withFileTypes:!0});for(let r of a){let a=t(e,r.name);r.isDirectory()?await u(a,n):r.isFile()&&r.name.endsWith(i)&&n.push(a)}}function d(e){return typeof e==`object`&&!!e&&`code`in e&&e.code===`ENOENT`}export{
|
|
1
|
+
import{t as e}from"../../chunks/authored-module-loader-DcCfCiBm.js";import{join as t,relative as n}from"node:path";import{readdir as r}from"node:fs/promises";const i=`.eval.ts`;async function a(e){let r=t(e,`evals`),i=[];try{await u(r,i)}catch(e){if(d(e))return[];throw e}return i.sort((e,t)=>n(r,e).localeCompare(n(r,t))),i}function o(e,r){let a=n(t(e,`evals`),r).split(/[\\/]/u).join(`/`);return a.endsWith(i)?a.slice(0,-8):a}async function s(t,n){let r=(await e(n)).default;if(!l(r))throw Error(`Suite file "${n}" does not export a valid AshEvalSuite as its default export. Use defineEvalSuite() to create the suite.`);return{...r,id:o(t,n)}}async function c(e,t){let n=await a(e);if(n.length===0)return[];let r=[];for(let i of n){let n=await s(e,i);(t===void 0||t.length===0||t.includes(n.id))&&r.push(n)}return r}function l(e){return typeof e==`object`&&!!e&&`_tag`in e&&e._tag===`AshEvalSuite`}async function u(e,n){let a=await r(e,{withFileTypes:!0});for(let r of a){let a=t(e,r.name);r.isDirectory()?await u(a,n):r.isFile()&&r.name.endsWith(i)&&n.push(a)}}function d(e){return typeof e==`object`&&!!e&&`code`in e&&e.code===`ENOENT`}export{c as discoverAndImportSuites,a as discoverSuiteFiles,s as importSuiteFile};
|
|
@@ -3,7 +3,7 @@ import type { AshEvalCase, AshEvalTask, AshEvalTaskResult } from "#evals/types.j
|
|
|
3
3
|
/**
|
|
4
4
|
* Options for executing one eval case.
|
|
5
5
|
*/
|
|
6
|
-
|
|
6
|
+
interface ExecuteCaseOptions {
|
|
7
7
|
readonly client: Client;
|
|
8
8
|
readonly testCase: AshEvalCase;
|
|
9
9
|
readonly task?: AshEvalTask;
|
|
@@ -20,3 +20,4 @@ export interface ExecuteCaseOptions {
|
|
|
20
20
|
* and status come from the final turn only.
|
|
21
21
|
*/
|
|
22
22
|
export declare function executeCase(options: ExecuteCaseOptions): Promise<AshEvalTaskResult>;
|
|
23
|
+
export {};
|
|
@@ -4,7 +4,7 @@ import type { EvalReporter } from "#evals/runner/reporters/types.js";
|
|
|
4
4
|
/**
|
|
5
5
|
* Options for executing one eval suite.
|
|
6
6
|
*/
|
|
7
|
-
|
|
7
|
+
interface ExecuteSuiteOptions {
|
|
8
8
|
readonly suite: AshEvalSuite;
|
|
9
9
|
readonly target: AshEvalTarget;
|
|
10
10
|
readonly reporters: readonly EvalReporter[];
|
|
@@ -21,3 +21,4 @@ export interface ExecuteSuiteOptions {
|
|
|
21
21
|
* reports results.
|
|
22
22
|
*/
|
|
23
23
|
export declare function executeSuite(options: ExecuteSuiteOptions): Promise<AshEvalSuiteResult>;
|
|
24
|
+
export {};
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import{resolveLocalGitMetadata as e}from"../resolve-git-metadata.js";function t(e={}){return new n(e)}var n=class{#e;#t;#n;#r
|
|
1
|
+
import{resolveLocalGitMetadata as e}from"../resolve-git-metadata.js";function t(e={}){return new n(e)}var n=class{#e;#t;#n;#r;constructor(e){this.#e=e}async onSuiteStart(t,n){let o=await r();this.#t=o;let s=e(process.cwd());this.#r=t.metadata;let c=i(t,n),l=a(t,n);this.#n=await o.init({project:this.#e.projectName??t.id,projectId:this.#e.projectId,experiment:this.#e.experimentName,baseExperiment:this.#e.baseExperimentName,baseExperimentId:this.#e.baseExperimentId,update:this.#e.update,tags:c,metadata:l,noExitFlush:!0,setCurrent:!1,repoInfo:s.sha?{commit:s.sha,branch:s.branch}:null})}onCaseComplete(e){if(!this.#n)return;let t={};for(let n of e.scores)n.score!==null&&(t[n.name]=n.score);let n={...this.#r,...e.case.metadata,ashSessionId:e.result.sessionId,ashStatus:e.result.status,ashToolCalls:e.result.derived.toolCalls,ashSubagentCalls:e.result.derived.subagentCalls};e.result.derived.failureCode&&(n.ashFailureCode=e.result.derived.failureCode);let r={toolCallCount:e.result.derived.toolCallCount,subagentCallCount:e.result.derived.subagentCallCount,messageCount:e.result.derived.messageCount,reasoningBlockCount:e.result.derived.reasoningBlockCount};this.#n.log({id:e.case.id,input:e.case.input,output:e.result.output,expected:e.case.expected,error:e.error??void 0,scores:t,metadata:n,metrics:r,tags:e.case.tags?[...e.case.tags]:void 0})}async onSuiteComplete(e){if(this.#n)try{this.#t&&await this.#t.flush();let e=await this.#n.summarize();e.experimentUrl&&console.log(`Braintrust experiment: ${e.experimentUrl}\n\n`)}finally{await this.#n.close(),this.#n=void 0,this.#t=void 0}}};async function r(){try{return await import(`braintrust`)}catch{throw Error([`The 'braintrust' package is required for Braintrust reporting but was not found.`,``,`Install it with:`,` npm install braintrust`].join(`
|
|
2
2
|
`))}}function i(e,t){let n=[`ash`,`suite:${e.id}`,`target:${t.kind}`];return Array.isArray(e.tags)&&n.push(...e.tags),n}function a(e,t){return{ashSuiteId:e.id,ashTargetKind:t.kind,ashTargetUrl:t.url,ashTimestamp:new Date().toISOString()}}export{t as Braintrust};
|
|
@@ -5,9 +5,9 @@ interface GitMetadata {
|
|
|
5
5
|
/**
|
|
6
6
|
* Resolves local git metadata for the eval run context.
|
|
7
7
|
*
|
|
8
|
-
* Used to populate
|
|
9
|
-
*
|
|
10
|
-
*
|
|
8
|
+
* Used to populate `repoInfo` on the Braintrust experiment so the dashboard
|
|
9
|
+
* shows which sha/branch produced the run. This describes the eval code,
|
|
10
|
+
* not the remote target.
|
|
11
11
|
*
|
|
12
12
|
* Returns an empty object when git is unavailable or the directory is
|
|
13
13
|
* not a git repository.
|
|
@@ -4,9 +4,9 @@ import { Factuality } from "autoevals";
|
|
|
4
4
|
* The OpenAI-shaped client surface autoevals expects. Extracted from the
|
|
5
5
|
* library so we don't take a direct dependency on `openai` types.
|
|
6
6
|
*/
|
|
7
|
-
|
|
7
|
+
type AutoevalsClient = NonNullable<Parameters<typeof Factuality>[0]["client"]>;
|
|
8
8
|
type ProviderOptions = Parameters<typeof generateText>[0]["providerOptions"];
|
|
9
|
-
|
|
9
|
+
interface AutoevalsClientConfig {
|
|
10
10
|
readonly languageModel: LanguageModel;
|
|
11
11
|
readonly providerOptions?: ProviderOptions;
|
|
12
12
|
}
|
|
@@ -4,8 +4,6 @@
|
|
|
4
4
|
*/
|
|
5
5
|
import type { HarnessEmissionState } from "#harness/emission.js";
|
|
6
6
|
import type { HarnessSession } from "#harness/types.js";
|
|
7
|
-
import { type ConnectionAuthorizationOutcome } from "#protocol/message.js";
|
|
8
|
-
import type { ConnectionAuthorizationChallenge } from "#public/connections/errors.js";
|
|
9
7
|
import type { JsonValue } from "#public/types/json.js";
|
|
10
8
|
import type { AuthorizationCallbackRequest, ConnectionPrincipal, TokenResult } from "#runtime/connections/types.js";
|
|
11
9
|
import { type PendingConnectionToolCall } from "#runtime/framework-tools/pending-connection-tool-calls.js";
|
|
@@ -13,7 +11,7 @@ import { type PendingConnectionToolCall } from "#runtime/framework-tools/pending
|
|
|
13
11
|
* Result of one `startAuthorization` step.
|
|
14
12
|
*
|
|
15
13
|
* On success the runtime journals
|
|
16
|
-
* `{ ok: true, principal, state,
|
|
14
|
+
* `{ ok: true, principal, state, serializedContext }`. The
|
|
17
15
|
* `principal` is the framework-resolved {@link ConnectionPrincipal}
|
|
18
16
|
* captured at `startAuthorization` time; the orchestrator carries it
|
|
19
17
|
* forward so `completeAuthorization` and the post-resume retry observe
|
|
@@ -32,7 +30,6 @@ import { type PendingConnectionToolCall } from "#runtime/framework-tools/pending
|
|
|
32
30
|
* `outcome: "failed"` so channels clean up their UI.
|
|
33
31
|
*/
|
|
34
32
|
export type StartAuthorizationStepResult = {
|
|
35
|
-
readonly challenge: ConnectionAuthorizationChallenge;
|
|
36
33
|
readonly ok: true;
|
|
37
34
|
readonly principal: ConnectionPrincipal;
|
|
38
35
|
readonly serializedContext: Record<string, unknown>;
|
|
@@ -79,7 +76,6 @@ export type CompleteAuthorizationStepResult = {
|
|
|
79
76
|
readonly token: TokenResult;
|
|
80
77
|
} | {
|
|
81
78
|
readonly ok: false;
|
|
82
|
-
readonly outcome: Exclude<ConnectionAuthorizationOutcome, "authorized">;
|
|
83
79
|
readonly reason: string;
|
|
84
80
|
/**
|
|
85
81
|
* When `true`, downstream pending tool calls for this connection
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
*/
|
|
5
5
|
import { buildAdapterContext } from "#channel/adapter-context.js";
|
|
6
6
|
import { callAdapterEventHandler } from "#channel/adapter.js";
|
|
7
|
-
import { contextStorage
|
|
7
|
+
import { contextStorage } from "#context/container.js";
|
|
8
8
|
import { BundleKey, ChannelKey } from "#context/keys.js";
|
|
9
9
|
import { getActiveRuntimeNode } from "#context/node.js";
|
|
10
10
|
import { deserializeContext, serializeContext } from "#context/serialize.js";
|
|
@@ -82,7 +82,6 @@ export async function startAuthorizationForConnectionStep(input) {
|
|
|
82
82
|
// the handler made (e.g. tracked Slack message ts) survive the
|
|
83
83
|
// step boundary into the matching `completeAuthorization` step.
|
|
84
84
|
return {
|
|
85
|
-
challenge,
|
|
86
85
|
ok: true,
|
|
87
86
|
principal,
|
|
88
87
|
serializedContext: serializeContext(ctx),
|
|
@@ -130,7 +129,7 @@ export async function completeAuthorizationForConnectionStep(input) {
|
|
|
130
129
|
stepIndex: input.emissionState.stepIndex,
|
|
131
130
|
turnId: input.emissionState.turnId,
|
|
132
131
|
}), input.parentWritable);
|
|
133
|
-
return { ok: false,
|
|
132
|
+
return { ok: false, reason, retryable: false };
|
|
134
133
|
}
|
|
135
134
|
try {
|
|
136
135
|
const token = await connection.authorization.completeAuthorization({
|
|
@@ -168,7 +167,7 @@ export async function completeAuthorizationForConnectionStep(input) {
|
|
|
168
167
|
stepIndex: input.emissionState.stepIndex,
|
|
169
168
|
turnId: input.emissionState.turnId,
|
|
170
169
|
}), input.parentWritable);
|
|
171
|
-
return { ok: false,
|
|
170
|
+
return { ok: false, reason, retryable };
|
|
172
171
|
}
|
|
173
172
|
}
|
|
174
173
|
/**
|
|
@@ -308,7 +307,7 @@ function findConnection(ctx, connectionName) {
|
|
|
308
307
|
}
|
|
309
308
|
async function emitAuthorizationEvent(ctx, event, parentWritable) {
|
|
310
309
|
const adapter = ctx.require(ChannelKey);
|
|
311
|
-
const adapterCtx = buildAdapterContext(adapter,
|
|
310
|
+
const adapterCtx = buildAdapterContext(adapter, ctx);
|
|
312
311
|
const finalEvent = await callAdapterEventHandler(adapter, event, adapterCtx);
|
|
313
312
|
// Pin any handler-driven channel-state mutations back onto ctx so
|
|
314
313
|
// they survive the durable step boundary when the caller
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { Runtime, SessionCapabilities } from "#channel/types.js";
|
|
2
2
|
import type { HarnessEmitFn, StepFn } from "#harness/types.js";
|
|
3
|
-
import type { RunMode } from "#run-mode.js";
|
|
3
|
+
import type { RunMode } from "#shared/run-mode.js";
|
|
4
4
|
import type { RuntimeCompiledArtifactsSource } from "#runtime/compiled-artifacts-source.js";
|
|
5
5
|
import type { ResolvedRuntimeAgentNode } from "#runtime/graph.js";
|
|
6
6
|
/**
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
interface AvailableSkillDescription {
|
|
2
2
|
readonly description: string;
|
|
3
3
|
readonly name: string;
|
|
4
4
|
}
|
|
@@ -15,3 +15,4 @@ export interface AvailableSkillDescription {
|
|
|
15
15
|
* reuse the same formatter for durable history announcements.
|
|
16
16
|
*/
|
|
17
17
|
export declare function formatAvailableSkillsSection(skills: readonly AvailableSkillDescription[]): string | null;
|
|
18
|
+
export {};
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { DeliverPayload, SubagentInputRequestHookPayload } from "#channel/types.js";
|
|
2
2
|
import type { HarnessEmitFn, HarnessSession } from "#harness/types.js";
|
|
3
|
-
import type { RunMode } from "#run-mode.js";
|
|
3
|
+
import type { RunMode } from "#shared/run-mode.js";
|
|
4
4
|
import type { InputResponse } from "#runtime/input/types.js";
|
|
5
5
|
/**
|
|
6
6
|
* Runs the parent-side work when a `subagent-input-request` arrives at
|
|
@@ -8,14 +8,6 @@ interface BatchEventMetadata {
|
|
|
8
8
|
readonly sequence: number;
|
|
9
9
|
readonly turnId: string;
|
|
10
10
|
}
|
|
11
|
-
/**
|
|
12
|
-
* Returns the deterministic continuation token used for one delegated child
|
|
13
|
-
* subagent run.
|
|
14
|
-
*/
|
|
15
|
-
export declare function createSubagentContinuationToken(input: {
|
|
16
|
-
readonly callId: string;
|
|
17
|
-
readonly parentSessionId: string;
|
|
18
|
-
}): string;
|
|
19
11
|
/**
|
|
20
12
|
* Result of {@link buildSubagentRunInput}.
|
|
21
13
|
*
|
|
@@ -1,22 +1,11 @@
|
|
|
1
1
|
import { SUBAGENT_ADAPTER_KIND } from "#execution/subagent-adapter.js";
|
|
2
2
|
import { mintSubagentContinuationToken } from "#execution/session.js";
|
|
3
|
-
import { formatSubagentInvocation } from "#execution/subagent-invocation.js";
|
|
4
|
-
/**
|
|
5
|
-
* Returns the deterministic continuation token used for one delegated child
|
|
6
|
-
* subagent run.
|
|
7
|
-
*/
|
|
8
|
-
export function createSubagentContinuationToken(input) {
|
|
9
|
-
return mintSubagentContinuationToken(`${input.parentSessionId}:${input.callId}`);
|
|
10
|
-
}
|
|
11
3
|
/**
|
|
12
4
|
* Builds the {@link RunInput} for one delegated subagent child run.
|
|
13
5
|
*/
|
|
14
6
|
export function buildSubagentRunInput(input) {
|
|
15
7
|
const { action, auth, batchEvent, capabilities, initiatorAuth, session } = input;
|
|
16
|
-
const childContinuationToken =
|
|
17
|
-
callId: action.callId,
|
|
18
|
-
parentSessionId: session.sessionId,
|
|
19
|
-
});
|
|
8
|
+
const childContinuationToken = mintSubagentContinuationToken(`${session.sessionId}:${action.callId}`);
|
|
20
9
|
const runInput = {
|
|
21
10
|
adapter: {
|
|
22
11
|
kind: SUBAGENT_ADAPTER_KIND,
|
|
@@ -50,9 +39,13 @@ export function buildSubagentRunInput(input) {
|
|
|
50
39
|
*/
|
|
51
40
|
function formatSubagentCallInputMessage(action) {
|
|
52
41
|
const { message } = action.input;
|
|
53
|
-
return
|
|
54
|
-
|
|
42
|
+
return [
|
|
43
|
+
`You are the subagent "${action.subagentName}".`,
|
|
44
|
+
`Description: ${action.description}`,
|
|
45
|
+
"",
|
|
46
|
+
"The caller delegated the following task to you. Complete it and return the final result directly.",
|
|
47
|
+
"",
|
|
48
|
+
"Caller message:",
|
|
55
49
|
message,
|
|
56
|
-
|
|
57
|
-
}).message;
|
|
50
|
+
].join("\n");
|
|
58
51
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { HookPayload, SessionCapabilities } from "#channel/types.js";
|
|
2
2
|
import type { HarnessSession } from "#harness/types.js";
|
|
3
|
-
import type { RunMode } from "#run-mode.js";
|
|
3
|
+
import type { RunMode } from "#shared/run-mode.js";
|
|
4
4
|
export interface TurnResultPayload {
|
|
5
5
|
readonly action: "done" | "park";
|
|
6
6
|
readonly kind: "turn-result";
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { hasPendingInputBatch } from "#harness/input-requests.js";
|
|
2
2
|
import { hasPendingRuntimeActionBatch } from "#harness/runtime-actions.js";
|
|
3
3
|
import { awaitAuthorizationAndResolve } from "#execution/await-authorization-orchestrator.js";
|
|
4
|
-
import { createTaskModeWaitError } from "#execution/task-mode.js";
|
|
5
4
|
import { normalizeSerializableError } from "#execution/workflow-errors.js";
|
|
6
5
|
import { turnStep } from "#execution/workflow-steps.js";
|
|
6
|
+
const TASK_MODE_WAIT_ERROR_MESSAGE = "Task mode cannot wait for follow-up input (`next: null`).";
|
|
7
7
|
/**
|
|
8
8
|
* Short-lived workflow that owns one runtime turn for the durable
|
|
9
9
|
* driver.
|
|
@@ -56,7 +56,7 @@ export async function turnWorkflow(input) {
|
|
|
56
56
|
});
|
|
57
57
|
return;
|
|
58
58
|
}
|
|
59
|
-
throw
|
|
59
|
+
throw new Error(TASK_MODE_WAIT_ERROR_MESSAGE);
|
|
60
60
|
}
|
|
61
61
|
if (result.action === "await-authorization") {
|
|
62
62
|
const resolved = await awaitAuthorizationAndResolve({
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { ASH_PACKAGE_NAME } from "#package-name.js";
|
|
1
|
+
import { ASH_PACKAGE_NAME } from "#internal/package-name.js";
|
|
2
2
|
import { truncateHead } from "#execution/sandbox/truncate-output.js";
|
|
3
3
|
import { convertHtmlToMarkdown, extractTextFromHtml } from "#execution/web-fetch/html.js";
|
|
4
4
|
const MAX_RESPONSE_SIZE = 5 * 1024 * 1024; // 5 MB
|
|
@@ -2,8 +2,24 @@ import type { DeliverPayload, HookPayload, SessionAuthContext, SubagentInputRequ
|
|
|
2
2
|
import { deserializeContext } from "#context/serialize.js";
|
|
3
3
|
import type { HarnessSession } from "#harness/types.js";
|
|
4
4
|
import type { RuntimeCompiledArtifactsSource } from "#runtime/compiled-artifacts-source.js";
|
|
5
|
+
import { type PendingConnectionAuthorization } from "#runtime/framework-tools/connection-search.js";
|
|
6
|
+
import { type PendingConnectionToolCall } from "#runtime/framework-tools/pending-connection-tool-calls.js";
|
|
5
7
|
import { type TurnWorkflowInput } from "#execution/turn-workflow.js";
|
|
6
|
-
|
|
8
|
+
/**
|
|
9
|
+
* Serializable projection of a step result for workflow persistence.
|
|
10
|
+
*/
|
|
11
|
+
export type DurableStepResult = {
|
|
12
|
+
readonly action: "continue" | "park" | "done";
|
|
13
|
+
readonly output?: string;
|
|
14
|
+
readonly serializedContext: Record<string, unknown>;
|
|
15
|
+
readonly session: HarnessSession;
|
|
16
|
+
} | {
|
|
17
|
+
readonly action: "await-authorization";
|
|
18
|
+
readonly pendingToolCalls: readonly PendingConnectionToolCall[];
|
|
19
|
+
readonly pendingAuths: readonly PendingConnectionAuthorization[];
|
|
20
|
+
readonly serializedContext: Record<string, unknown>;
|
|
21
|
+
readonly session: HarnessSession;
|
|
22
|
+
};
|
|
7
23
|
/**
|
|
8
24
|
* Input for one atomic harness step inside a durable `"use step"`
|
|
9
25
|
* boundary.
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { buildAdapterContext } from "#channel/adapter-context.js";
|
|
2
2
|
import { callAdapterEventHandler, defaultDeliverResult } from "#channel/adapter.js";
|
|
3
|
-
import { toContextAccessor } from "#context/container.js";
|
|
4
3
|
import { dispatchStreamEventHooks, runHookLifecycleStep } from "#context/hook-lifecycle.js";
|
|
5
4
|
import { AuthKey, BundleKey, CapabilitiesKey, ChannelKey, ContinuationTokenKey, InitiatorAuthKey, ModeKey, } from "#context/keys.js";
|
|
6
5
|
import { runStep } from "#context/run-step.js";
|
|
@@ -12,7 +11,7 @@ import { getPendingRuntimeActionBatch, recordPendingSubagentChildToken, } from "
|
|
|
12
11
|
import { createLogger, formatError } from "#internal/logging.js";
|
|
13
12
|
import { createSessionFailedEvent, createSubagentCalledEvent, encodeMessageStreamEvent, timestampHandleMessageStreamEvent, } from "#protocol/message.js";
|
|
14
13
|
import { drainPendingConnectionAuthorizations, PendingConnectionAuthorizationsKey, } from "#runtime/framework-tools/connection-search.js";
|
|
15
|
-
import { PendingConnectionToolCallsKey } from "#runtime/framework-tools/pending-connection-tool-calls.js";
|
|
14
|
+
import { PendingConnectionToolCallsKey, } from "#runtime/framework-tools/pending-connection-tool-calls.js";
|
|
16
15
|
import { getCompiledRuntimeAgentBundle } from "#runtime/sessions/compiled-agent-cache.js";
|
|
17
16
|
import { createExecutionNodeStep } from "#execution/node-step.js";
|
|
18
17
|
import { emitProxiedInputRequest, routeDeliverPayload } from "#execution/subagent-hitl-proxy.js";
|
|
@@ -36,7 +35,7 @@ export async function turnStep(input) {
|
|
|
36
35
|
}
|
|
37
36
|
// Build the adapter context for deliver and event handlers.
|
|
38
37
|
// Slack adapters override this to inject ctx.thread and ctx.slack.
|
|
39
|
-
const adapterCtx = buildAdapterContext(adapter,
|
|
38
|
+
const adapterCtx = buildAdapterContext(adapter, ctx);
|
|
40
39
|
// Run the adapter's deliver hook for each queued payload — sets
|
|
41
40
|
// custom context keys and optionally transforms the message.
|
|
42
41
|
// Coalesces the resulting StepInput values.
|
|
@@ -220,7 +219,7 @@ export async function dispatchPendingRuntimeActionsStep(input) {
|
|
|
220
219
|
const capabilities = ctx.get(CapabilitiesKey);
|
|
221
220
|
const initiatorAuth = ctx.get(InitiatorAuthKey) ?? null;
|
|
222
221
|
const writer = input.parentWritable.getWriter();
|
|
223
|
-
const adapterCtx = buildAdapterContext(adapter,
|
|
222
|
+
const adapterCtx = buildAdapterContext(adapter, ctx);
|
|
224
223
|
let nextSession = input.session;
|
|
225
224
|
try {
|
|
226
225
|
for (const action of batch.actions) {
|
|
@@ -288,7 +287,7 @@ export async function emitTerminalSessionFailureStep(input) {
|
|
|
288
287
|
const ctx = await deserializeContext(input.serializedContext);
|
|
289
288
|
const adapter = ctx.get(ChannelKey);
|
|
290
289
|
if (adapter !== undefined) {
|
|
291
|
-
const adapterCtx = buildAdapterContext(adapter,
|
|
290
|
+
const adapterCtx = buildAdapterContext(adapter, ctx);
|
|
292
291
|
await callAdapterEventHandler(adapter, event, adapterCtx);
|
|
293
292
|
}
|
|
294
293
|
}
|
|
@@ -327,7 +326,7 @@ export async function runProxyInputRequestStep(input) {
|
|
|
327
326
|
"use step";
|
|
328
327
|
const ctx = await deserializeContext(input.serializedContext);
|
|
329
328
|
const adapter = ctx.require(ChannelKey);
|
|
330
|
-
const adapterCtx = buildAdapterContext(adapter,
|
|
329
|
+
const adapterCtx = buildAdapterContext(adapter, ctx);
|
|
331
330
|
const mode = ctx.require(ModeKey);
|
|
332
331
|
const writer = input.parentWritable.getWriter();
|
|
333
332
|
let proxyResult;
|
|
@@ -1,23 +1,11 @@
|
|
|
1
1
|
import type { ModelMessage, ToolSet, TypedToolResult } from "ai";
|
|
2
2
|
import type { RuntimeToolResultActionResult } from "#runtime/actions/types.js";
|
|
3
|
-
import type { JsonValue } from "#shared/json.js";
|
|
4
3
|
type ToolResponsePart = Extract<ModelMessage, {
|
|
5
4
|
role: "tool";
|
|
6
5
|
}>["content"][number];
|
|
7
6
|
type ToolResultPart = Extract<ToolResponsePart, {
|
|
8
7
|
type: "tool-result";
|
|
9
8
|
}>;
|
|
10
|
-
/**
|
|
11
|
-
* Coerces an arbitrary value to a JSON-safe {@link JsonValue} without
|
|
12
|
-
* premature stringification.
|
|
13
|
-
*
|
|
14
|
-
* - Strings, numbers, booleans, and `null` pass through as primitives.
|
|
15
|
-
* - `Error` instances surface only their message (no stack leak).
|
|
16
|
-
* - Plain objects and arrays pass through structurally.
|
|
17
|
-
* - Non-JSON-representable values (functions, symbols, BigInts) fall
|
|
18
|
-
* back to `String(value)`.
|
|
19
|
-
*/
|
|
20
|
-
export declare function toJsonValue(value: unknown): JsonValue;
|
|
21
9
|
/**
|
|
22
10
|
* Builds a `RuntimeToolResultActionResult` from one AI SDK
|
|
23
11
|
* {@link TypedToolResult}. Used for tool results captured on the AI SDK
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* - Non-JSON-representable values (functions, symbols, BigInts) fall
|
|
9
9
|
* back to `String(value)`.
|
|
10
10
|
*/
|
|
11
|
-
|
|
11
|
+
function toJsonValue(value) {
|
|
12
12
|
if (value === null ||
|
|
13
13
|
typeof value === "string" ||
|
|
14
14
|
typeof value === "number" ||
|
|
@@ -6,7 +6,7 @@ type InlineToolResultPart = Extract<ToolResponsePart, {
|
|
|
6
6
|
type: "tool-result";
|
|
7
7
|
}>;
|
|
8
8
|
import type { AssistantStepFinishReason, RuntimeIdentity } from "#protocol/message.js";
|
|
9
|
-
import type { RunMode } from "#run-mode.js";
|
|
9
|
+
import type { RunMode } from "#shared/run-mode.js";
|
|
10
10
|
import type { JsonObject } from "#shared/json.js";
|
|
11
11
|
import type { HarnessEmitFn, HarnessSession, StepInput } from "#harness/types.js";
|
|
12
12
|
/**
|
|
@@ -103,7 +103,7 @@ export declare function normalizeAssistantStepFinishReason(value: string | undef
|
|
|
103
103
|
* harness splices them into persisted history to keep the prior turn's
|
|
104
104
|
* `tool_use` block balanced with a matching `tool_result` on replay.
|
|
105
105
|
*/
|
|
106
|
-
|
|
106
|
+
interface EmittedStreamContent {
|
|
107
107
|
readonly inlineActionResultCallIds: ReadonlySet<string>;
|
|
108
108
|
readonly inlineToolResultParts: readonly InlineToolResultPart[];
|
|
109
109
|
}
|
|
@@ -6,7 +6,7 @@ import type { NeedsApprovalContext } from "#public/definitions/tool.js";
|
|
|
6
6
|
* These tools are surfaced to the model without a local `execute` function.
|
|
7
7
|
* The harness records the tool call and the runtime executes it later.
|
|
8
8
|
*/
|
|
9
|
-
|
|
9
|
+
type HarnessRuntimeActionDefinition = {
|
|
10
10
|
readonly kind: "subagent-call";
|
|
11
11
|
readonly nodeId: string;
|
|
12
12
|
readonly subagentName: string;
|
|
@@ -24,3 +24,4 @@ export interface HarnessToolDefinition {
|
|
|
24
24
|
readonly runtimeAction?: HarnessRuntimeActionDefinition;
|
|
25
25
|
readonly toModelOutput?: (output: unknown) => unknown;
|
|
26
26
|
}
|
|
27
|
+
export {};
|
|
@@ -42,7 +42,7 @@ export declare function resolvePendingInput(input: {
|
|
|
42
42
|
readonly session: HarnessSession;
|
|
43
43
|
readonly stepInput?: StepInput;
|
|
44
44
|
}): ResolvePendingInputResult;
|
|
45
|
-
|
|
45
|
+
type ResolvePendingInputResult = {
|
|
46
46
|
readonly deferredMessage?: boolean;
|
|
47
47
|
readonly outcome: "resolved" | "continue" | "unresolved";
|
|
48
48
|
readonly messages: ModelMessage[];
|
|
@@ -72,3 +72,4 @@ export declare function getApprovedTools(session: HarnessSession): ReadonlySet<s
|
|
|
72
72
|
export declare function createRuntimeToolCallActionFromToolCall(input: {
|
|
73
73
|
readonly toolCall: TypedToolCall<ToolSet>;
|
|
74
74
|
}): RuntimeToolCallActionRequest;
|
|
75
|
+
export {};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { LanguageModel, ModelMessage,
|
|
1
|
+
import type { LanguageModel, ModelMessage, ToolSet } from "ai";
|
|
2
2
|
/**
|
|
3
3
|
* The caching strategy to apply for one harness step.
|
|
4
4
|
*/
|
|
@@ -71,11 +71,3 @@ export declare function applyLastToolCacheBreakpoint(tools: ToolSet, marker: Ant
|
|
|
71
71
|
* through any preceding tool results.
|
|
72
72
|
*/
|
|
73
73
|
export declare function applyConversationCacheControl(messages: readonly ModelMessage[], marker: AnthropicCacheMarker): ModelMessage[];
|
|
74
|
-
/**
|
|
75
|
-
* Builds the AI SDK `prepareStep` callback for the `anthropic-direct` path.
|
|
76
|
-
*
|
|
77
|
-
* The returned function accepts the SDK's per-step input and returns a
|
|
78
|
-
* `{ messages }` override with the last user and last assistant messages
|
|
79
|
-
* marked with the Anthropic cache control breakpoint.
|
|
80
|
-
*/
|
|
81
|
-
export declare function buildPromptCachePrepareStep(marker: AnthropicCacheMarker): PrepareStepFunction<ToolSet>;
|
|
@@ -135,15 +135,3 @@ export function applyConversationCacheControl(messages, marker) {
|
|
|
135
135
|
}
|
|
136
136
|
return out;
|
|
137
137
|
}
|
|
138
|
-
/**
|
|
139
|
-
* Builds the AI SDK `prepareStep` callback for the `anthropic-direct` path.
|
|
140
|
-
*
|
|
141
|
-
* The returned function accepts the SDK's per-step input and returns a
|
|
142
|
-
* `{ messages }` override with the last user and last assistant messages
|
|
143
|
-
* marked with the Anthropic cache control breakpoint.
|
|
144
|
-
*/
|
|
145
|
-
export function buildPromptCachePrepareStep(marker) {
|
|
146
|
-
return ({ messages }) => ({
|
|
147
|
-
messages: applyConversationCacheControl(messages, marker),
|
|
148
|
-
});
|
|
149
|
-
}
|
|
@@ -31,7 +31,7 @@ interface PendingRuntimeActionBatch {
|
|
|
31
31
|
/**
|
|
32
32
|
* Outcome of resolving a pending runtime-action batch.
|
|
33
33
|
*/
|
|
34
|
-
|
|
34
|
+
interface ResolvePendingRuntimeActionsResult {
|
|
35
35
|
readonly messages: ModelMessage[];
|
|
36
36
|
readonly outcome: "continue" | "resolved" | "unresolved";
|
|
37
37
|
readonly session: HarnessSession;
|
|
@@ -72,7 +72,7 @@ export declare function recordPendingSubagentChildToken(input: {
|
|
|
72
72
|
* accumulation loop process both kinds without coupling to the concrete
|
|
73
73
|
* `HookPayload` shape.
|
|
74
74
|
*/
|
|
75
|
-
|
|
75
|
+
type RuntimeActionAccumulatorItem<TDeliver> = {
|
|
76
76
|
readonly kind: "deliver";
|
|
77
77
|
readonly value: TDeliver;
|
|
78
78
|
} | {
|
|
@@ -88,15 +88,6 @@ export declare function accumulateRuntimeActionResults<TDeliver>(input: {
|
|
|
88
88
|
readonly getNext: () => Promise<RuntimeActionAccumulatorItem<TDeliver> | null>;
|
|
89
89
|
readonly session: HarnessSession;
|
|
90
90
|
}): Promise<RuntimeActionResult[] | null>;
|
|
91
|
-
/**
|
|
92
|
-
* Returns the stable ordered runtime-action results for the current pending
|
|
93
|
-
* batch when every action has a matching result. Unknown and duplicate results
|
|
94
|
-
* are ignored.
|
|
95
|
-
*/
|
|
96
|
-
export declare function resolveReadyRuntimeActionResults(input: {
|
|
97
|
-
readonly results: readonly RuntimeActionResult[];
|
|
98
|
-
readonly session: HarnessSession;
|
|
99
|
-
}): RuntimeActionResult[] | undefined;
|
|
100
91
|
/**
|
|
101
92
|
* Resolves one pending runtime-action batch back into model history.
|
|
102
93
|
*
|