experimental-ash 0.24.2 → 0.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/dist/docs/public/typescript-api.md +0 -12
- package/dist/src/channel/adapter.d.ts +5 -18
- package/dist/src/channel/adapter.js +2 -8
- package/dist/src/channel/cross-channel-receive.d.ts +10 -1
- package/dist/src/channel/cross-channel-receive.js +21 -3
- package/dist/src/channel/routes.d.ts +2 -1
- package/dist/src/channel/routes.js +6 -3
- package/dist/src/channel/schedule.js +2 -2
- package/dist/src/channel/types.d.ts +1 -1
- package/dist/src/chunks/{client-DLHAGI2g.js → client-BShLWzR6.js} +3 -3
- package/dist/src/chunks/compile-agent-CyP6FrL8.js +5 -0
- package/dist/src/chunks/dev-authored-source-watcher-DIWfVUsu.js +1 -0
- package/dist/src/chunks/host-BxT35q6K.js +70 -0
- package/dist/src/chunks/paths-B2hLA0Fn.js +85 -0
- package/dist/src/chunks/{token-D98SQdvs.js → token-BOkIxJeV.js} +1 -1
- package/dist/src/chunks/types-CjIyrcYo.js +1 -0
- package/dist/src/cli/commands/info.js +1 -1
- package/dist/src/cli/dev/environment.d.ts +0 -5
- package/dist/src/cli/dev/environment.js +1 -1
- package/dist/src/cli/dev/repl.d.ts +1 -1
- package/dist/src/cli/dev/repl.js +3 -3
- package/dist/src/cli/run.d.ts +0 -1
- package/dist/src/cli/run.js +2 -2
- package/dist/src/client/index.d.ts +1 -1
- package/dist/src/client/message-reducer.js +13 -25
- package/dist/src/client/message-response.d.ts +2 -1
- package/dist/src/client/open-stream.d.ts +3 -3
- package/dist/src/client/open-stream.js +1 -2
- package/dist/src/client/session.d.ts +2 -1
- package/dist/src/client/session.js +0 -3
- package/dist/src/client/types.d.ts +3 -2
- package/dist/src/compiler/artifacts.d.ts +7 -10
- package/dist/src/compiler/artifacts.js +3 -3
- package/dist/src/compiler/manifest.d.ts +6 -15
- package/dist/src/compiler/manifest.js +3 -3
- package/dist/src/compiler/normalize-agent-config.js +12 -10
- package/dist/src/compiler/normalize-manifest.js +3 -2
- package/dist/src/context/container.d.ts +1 -16
- package/dist/src/context/container.js +1 -24
- package/dist/src/context/hook-lifecycle.d.ts +2 -7
- package/dist/src/context/hook-lifecycle.js +0 -6
- package/dist/src/context/provider.d.ts +1 -11
- package/dist/src/context/providers/sandbox.js +4 -1
- package/dist/src/context/run-step.d.ts +2 -4
- package/dist/src/context/run-step.js +4 -17
- package/dist/src/context/seed-keys.d.ts +1 -1
- package/dist/src/discover/connections.d.ts +2 -1
- package/dist/src/discover/diagnostics.d.ts +0 -8
- package/dist/src/discover/diagnostics.js +4 -16
- package/dist/src/discover/discover-agent.d.ts +3 -13
- package/dist/src/discover/discover-agent.js +1 -11
- package/dist/src/discover/discover-subagent.d.ts +3 -2
- package/dist/src/discover/discover-subagent.js +1 -1
- package/dist/src/discover/filesystem.d.ts +0 -37
- package/dist/src/discover/filesystem.js +0 -115
- package/dist/src/discover/grammar.d.ts +10 -20
- package/dist/src/discover/grammar.js +11 -33
- package/dist/src/discover/lib.d.ts +3 -2
- package/dist/src/discover/manifest.d.ts +3 -3
- package/dist/src/discover/markdown.d.ts +2 -1
- package/dist/src/discover/sandbox.d.ts +2 -1
- package/dist/src/discover/schedules.d.ts +3 -2
- package/dist/src/discover/schedules.js +1 -1
- package/dist/src/discover/skills.d.ts +3 -2
- package/dist/src/discover/skills.js +1 -1
- package/dist/src/discover/slots.d.ts +3 -2
- package/dist/src/evals/cli/eval.d.ts +0 -6
- package/dist/src/evals/cli/eval.js +1 -1
- package/dist/src/evals/loaders/index.d.ts +2 -22
- package/dist/src/evals/loaders/index.js +1 -1
- package/dist/src/evals/reporters/index.d.ts +2 -14
- package/dist/src/evals/reporters/index.js +1 -1
- package/dist/src/evals/runner/discover.d.ts +0 -6
- package/dist/src/evals/runner/discover.js +1 -1
- package/dist/src/evals/runner/execute-case.d.ts +2 -1
- package/dist/src/evals/runner/execute-suite.d.ts +2 -1
- package/dist/src/evals/runner/reporters/braintrust.js +1 -1
- package/dist/src/evals/runner/resolve-git-metadata.d.ts +3 -3
- package/dist/src/evals/scorers/autoevals-client.d.ts +2 -2
- package/dist/src/execution/await-authorization-orchestrator.d.ts +0 -2
- package/dist/src/execution/await-authorization-orchestrator.js +11 -19
- package/dist/src/execution/connection-auth-steps.d.ts +1 -5
- package/dist/src/execution/connection-auth-steps.js +5 -9
- package/dist/src/execution/node-step.d.ts +1 -1
- package/dist/src/execution/skills/instructions.d.ts +2 -1
- package/dist/src/execution/subagent-hitl-proxy.d.ts +1 -1
- package/dist/src/execution/subagent-tool.d.ts +0 -8
- package/dist/src/execution/subagent-tool.js +9 -16
- package/dist/src/execution/tool-compaction.js +0 -2
- package/dist/src/execution/turn-workflow.d.ts +1 -1
- package/dist/src/execution/turn-workflow.js +2 -3
- package/dist/src/execution/web-fetch/tool.js +1 -1
- package/dist/src/execution/workflow-steps.d.ts +15 -1
- package/dist/src/execution/workflow-steps.js +8 -18
- package/dist/src/harness/action-result-helpers.d.ts +0 -12
- package/dist/src/harness/action-result-helpers.js +1 -1
- package/dist/src/harness/emission.d.ts +2 -2
- package/dist/src/harness/execute-tool.d.ts +2 -1
- package/dist/src/harness/input-requests.d.ts +2 -1
- package/dist/src/harness/prompt-cache.d.ts +1 -9
- package/dist/src/harness/prompt-cache.js +0 -12
- package/dist/src/harness/runtime-actions.d.ts +2 -11
- package/dist/src/harness/runtime-actions.js +1 -1
- package/dist/src/harness/step-hooks.d.ts +3 -2
- package/dist/src/harness/step-hooks.js +3 -7
- package/dist/src/harness/tool-loop.js +0 -2
- package/dist/src/harness/types.d.ts +1 -1
- package/dist/src/internal/application/package.js +2 -2
- package/dist/src/internal/application/runtime-compiled-artifacts-source.js +0 -1
- package/dist/src/internal/attachments/errors.d.ts +8 -2
- package/dist/src/internal/attachments/url-refs.d.ts +0 -1
- package/dist/src/internal/attachments/url-refs.js +1 -1
- package/dist/src/internal/authored-definition/channel.d.ts +0 -5
- package/dist/src/internal/authored-definition/channel.js +1 -10
- package/dist/src/internal/authored-definition/sandbox.d.ts +2 -1
- package/dist/src/internal/authored-definition/schema-backed.d.ts +3 -2
- package/dist/src/internal/authored-module-loader.d.ts +0 -6
- package/dist/src/internal/authored-module-loader.js +0 -9
- package/dist/src/internal/authored-module.d.ts +0 -4
- package/dist/src/internal/authored-module.js +0 -10
- package/dist/src/internal/bundler/nitro-rolldown.d.ts +2 -10
- package/dist/src/internal/bundler/nitro-rolldown.js +1 -1
- package/dist/src/{public → internal}/helpers/markdown.d.ts +2 -23
- package/dist/src/{public → internal}/helpers/markdown.js +1 -1
- package/dist/src/internal/logging.d.ts +2 -8
- package/dist/src/internal/nitro/host/build-vercel-agent-summary.d.ts +1 -1
- package/dist/src/internal/nitro/host/build-vercel-agent-summary.js +1 -1
- package/dist/src/internal/nitro/host/create-application-nitro.js +1 -1
- package/dist/src/internal/nitro/host/dev-authored-source-watcher.js +2 -2
- package/dist/src/internal/nitro/routes/channel-dispatch.js +2 -2
- package/dist/src/internal/nitro/routes/runtime-artifacts.js +0 -1
- package/dist/src/internal/node-esm-compat-banner.d.ts +1 -1
- package/dist/src/internal/runtime-registry.d.ts +2 -1
- package/dist/src/{protocol → internal}/vercel-agent-summary.d.ts +1 -1
- package/dist/src/protocol/message.d.ts +0 -34
- package/dist/src/protocol/message.js +0 -30
- package/dist/src/public/channels/index.d.ts +1 -1
- package/dist/src/public/channels/index.js +1 -1
- package/dist/src/public/channels/slack/api.d.ts +1 -7
- package/dist/src/public/channels/slack/api.js +1 -1
- package/dist/src/public/channels/slack/connections.d.ts +2 -6
- package/dist/src/public/channels/slack/hitl.d.ts +3 -2
- package/dist/src/public/channels/slack/inbound.d.ts +0 -35
- package/dist/src/public/definitions/channel.d.ts +2 -2
- package/dist/src/public/definitions/channel.js +1 -1
- package/dist/src/public/definitions/defineChannel.d.ts +1 -1
- package/dist/src/public/definitions/defineChannel.js +1 -1
- package/dist/src/public/definitions/instructions.d.ts +0 -11
- package/dist/src/public/definitions/instructions.js +0 -5
- package/dist/src/public/definitions/tool.d.ts +3 -7
- package/dist/src/public/definitions/tool.js +1 -1
- package/dist/src/public/instructions/index.d.ts +1 -5
- package/dist/src/public/instructions/index.js +1 -3
- package/dist/src/public/tool-result-narrowing.d.ts +2 -1
- package/dist/src/react/use-ash-agent.d.ts +2 -1
- package/dist/src/react/use-ash-agent.js +1 -5
- package/dist/src/runtime/actions/types.d.ts +8 -50
- package/dist/src/runtime/actions/types.js +5 -21
- package/dist/src/runtime/agent/bootstrap-model-utils.d.ts +2 -1
- package/dist/src/runtime/agent/mock-model-adapter.js +1 -1
- package/dist/src/runtime/channels/registry.js +3 -8
- package/dist/src/runtime/compiled-artifacts-source.d.ts +4 -11
- package/dist/src/runtime/compiled-artifacts-source.js +3 -7
- package/dist/src/runtime/connections/authorization-tokens.d.ts +1 -14
- package/dist/src/runtime/connections/authorization-tokens.js +2 -28
- package/dist/src/runtime/connections/principal-context.d.ts +3 -2
- package/dist/src/runtime/connections/validate-authorization.d.ts +0 -11
- package/dist/src/runtime/connections/validate-authorization.js +0 -16
- package/dist/src/runtime/framework-channels/index.d.ts +0 -1
- package/dist/src/runtime/framework-channels/index.js +1 -1
- package/dist/src/runtime/framework-tools/connection-search.d.ts +0 -46
- package/dist/src/runtime/framework-tools/connection-search.js +3 -78
- package/dist/src/runtime/framework-tools/connection-tools.d.ts +3 -6
- package/dist/src/runtime/framework-tools/connection-tools.js +6 -13
- package/dist/src/runtime/framework-tools/index.d.ts +2 -1
- package/dist/src/runtime/framework-tools/index.js +2 -1
- package/dist/src/runtime/framework-tools/skill.d.ts +0 -13
- package/dist/src/runtime/framework-tools/skill.js +1 -1
- package/dist/src/runtime/governance/auth/token-claims.d.ts +1 -16
- package/dist/src/runtime/governance/auth/token-claims.js +3 -3
- package/dist/src/runtime/governance/auth/types.d.ts +0 -4
- package/dist/src/runtime/governance/network/ip-allow-list.d.ts +0 -17
- package/dist/src/runtime/governance/network/ip-allow-list.js +0 -39
- package/dist/src/runtime/hooks/registry.d.ts +7 -4
- package/dist/src/runtime/hooks/registry.js +4 -2
- package/dist/src/runtime/loaders/bundled-artifacts.d.ts +0 -5
- package/dist/src/runtime/loaders/bundled-artifacts.js +0 -7
- package/dist/src/runtime/loaders/compile-metadata.d.ts +3 -10
- package/dist/src/runtime/loaders/compile-metadata.js +6 -8
- package/dist/src/runtime/loaders/manifest.d.ts +2 -2
- package/dist/src/runtime/loaders/manifest.js +4 -5
- package/dist/src/runtime/loaders/module-map.d.ts +2 -2
- package/dist/src/runtime/loaders/module-map.js +3 -4
- package/dist/src/runtime/prompt/compose.d.ts +2 -1
- package/dist/src/runtime/resolve-agent-graph.d.ts +2 -15
- package/dist/src/runtime/resolve-agent-graph.js +1 -1
- package/dist/src/runtime/sandbox/keys.d.ts +2 -1
- package/dist/src/runtime/sandbox/registry.d.ts +3 -3
- package/dist/src/runtime/schedules/resolve-schedule.d.ts +3 -12
- package/dist/src/runtime/schedules/resolve-schedule.js +1 -1
- package/dist/src/runtime/sessions/auth.d.ts +2 -13
- package/dist/src/runtime/sessions/auth.js +1 -11
- package/dist/src/runtime/sessions/compiled-agent-cache.js +1 -1
- package/dist/src/runtime/sessions/runtime-session.d.ts +0 -6
- package/dist/src/runtime/sessions/runtime-session.js +0 -13
- package/dist/src/runtime/sessions/turn.d.ts +2 -2
- package/dist/src/runtime/subagents/registry.d.ts +2 -5
- package/dist/src/runtime/subagents/registry.js +0 -6
- package/dist/src/runtime/tools/registry.d.ts +2 -1
- package/dist/src/runtime/tools/registry.js +1 -4
- package/dist/src/runtime/types.d.ts +4 -3
- package/dist/src/runtime/workspace/seed-files.d.ts +2 -1
- package/dist/src/services/dev-client/request-headers.d.ts +37 -8
- package/dist/src/services/dev-client/request-headers.js +71 -46
- package/dist/src/services/dev-client/stream.d.ts +0 -19
- package/dist/src/services/dev-client/stream.js +0 -37
- package/dist/src/services/dev-client/url.d.ts +3 -7
- package/dist/src/services/dev-client/url.js +4 -10
- package/dist/src/services/dev-client.d.ts +0 -12
- package/dist/src/services/dev-client.js +6 -92
- package/dist/src/shared/agent-definition.d.ts +1 -1
- package/dist/src/shared/json.d.ts +0 -4
- package/dist/src/shared/json.js +0 -38
- package/dist/src/shared/skill-definition.d.ts +0 -2
- package/package.json +1 -6
- package/dist/src/chunks/dev-authored-source-watcher-CBID_Dwh.js +0 -1
- package/dist/src/chunks/host-zBy9FyyX.js +0 -70
- package/dist/src/chunks/package-HUaeub_D.js +0 -1
- package/dist/src/chunks/paths-CebY5GCi.js +0 -89
- package/dist/src/chunks/types-DDA2QUED.js +0 -1
- package/dist/src/compiler/resource-files.d.ts +0 -19
- package/dist/src/compiler/resource-files.js +0 -28
- package/dist/src/execution/subagent-invocation.d.ts +0 -19
- package/dist/src/execution/subagent-invocation.js +0 -17
- package/dist/src/execution/task-mode.d.ts +0 -9
- package/dist/src/execution/task-mode.js +0 -12
- package/dist/src/execution/types.d.ts +0 -20
- package/dist/src/execution/types.js +0 -1
- package/dist/src/internal/logical-paths.d.ts +0 -13
- package/dist/src/internal/logical-paths.js +0 -25
- package/dist/src/runtime/sessions/messages.d.ts +0 -140
- package/dist/src/runtime/sessions/messages.js +0 -170
- package/dist/src/runtime/standard-schema.d.ts +0 -27
- package/dist/src/runtime/standard-schema.js +0 -64
- package/dist/src/services/dev-client/live-stream.d.ts +0 -35
- package/dist/src/services/dev-client/live-stream.js +0 -157
- package/dist/src/services/dev-client/send-message.d.ts +0 -24
- package/dist/src/services/dev-client/send-message.js +0 -185
- package/dist/src/services/dev-client/session.d.ts +0 -54
- package/dist/src/services/dev-client/session.js +0 -52
- package/dist/src/services/host.d.ts +0 -14
- package/dist/src/services/host.js +0 -13
- /package/dist/src/{package-name.d.ts → internal/package-name.d.ts} +0 -0
- /package/dist/src/{package-name.js → internal/package-name.js} +0 -0
- /package/dist/src/{protocol → internal}/vercel-agent-summary.js +0 -0
- /package/dist/src/{run-mode.d.ts → shared/run-mode.d.ts} +0 -0
- /package/dist/src/{run-mode.js → shared/run-mode.js} +0 -0
|
@@ -2,7 +2,7 @@ import type { Dirent } from "node:fs";
|
|
|
2
2
|
/**
|
|
3
3
|
* Candidate authored sources for one flat slot such as `system` or `agent`.
|
|
4
4
|
*/
|
|
5
|
-
|
|
5
|
+
interface FlatSlotCandidates {
|
|
6
6
|
markdownFileName?: string;
|
|
7
7
|
moduleFileNames: string[];
|
|
8
8
|
}
|
|
@@ -10,7 +10,7 @@ export interface FlatSlotCandidates {
|
|
|
10
10
|
* Candidate authored sources for one named directory slot such as
|
|
11
11
|
* `tools/<name>`.
|
|
12
12
|
*/
|
|
13
|
-
|
|
13
|
+
interface NamedSlotCandidates extends FlatSlotCandidates {
|
|
14
14
|
slotName: string;
|
|
15
15
|
}
|
|
16
16
|
/**
|
|
@@ -27,3 +27,4 @@ export declare function collectNamedSlotCandidates(entries: readonly Pick<Dirent
|
|
|
27
27
|
allowMarkdown: boolean;
|
|
28
28
|
allowModules: boolean;
|
|
29
29
|
}): NamedSlotCandidates[];
|
|
30
|
+
export {};
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import type { Command } from "commander";
|
|
2
1
|
interface EvalCliOptions {
|
|
3
2
|
suite?: string[];
|
|
4
3
|
url?: string;
|
|
@@ -6,17 +5,12 @@ interface EvalCliOptions {
|
|
|
6
5
|
maxConcurrency?: string;
|
|
7
6
|
json?: boolean;
|
|
8
7
|
all?: boolean;
|
|
9
|
-
listSuites?: boolean;
|
|
10
8
|
skipReport?: boolean;
|
|
11
9
|
}
|
|
12
10
|
type EvalCliLogger = {
|
|
13
11
|
log(message: string): void;
|
|
14
12
|
error(message: string): void;
|
|
15
13
|
};
|
|
16
|
-
/**
|
|
17
|
-
* Registers the `ash eval` command on the given Commander program.
|
|
18
|
-
*/
|
|
19
|
-
export declare function registerEvalCommand(program: Command, logger: EvalCliLogger): void;
|
|
20
14
|
/**
|
|
21
15
|
* Runs the `ash eval` command with already-parsed Commander options.
|
|
22
16
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{n as e}from"../../chunks/paths-
|
|
1
|
+
import{n as e}from"../../chunks/paths-B2hLA0Fn.js";import{loadDevelopmentEnvironmentFiles as t}from"../../cli/dev/environment.js";import{n,s as r,t as i}from"../../chunks/client-BShLWzR6.js";import{n as a}from"../../chunks/host-BxT35q6K.js";import{discoverAndImportSuites as o}from"../runner/discover.js";import{executeSuite as s}from"../runner/execute-suite.js";import{ConsoleReporter as c}from"../runner/reporters/console.js";var l=r();async function u(n,r){let i=e();t(i);let c=n.suite,l=await o(i,c);if(l.length===0){c&&c.length>0?r.error(`No suites found matching: ${c.join(`, `)}`):r.error(`No eval suites found. Create suite files under evals/ with the *.eval.ts extension.`),process.exitCode=1;return}let u,f;n.url?f={kind:`remote`,url:n.url}:(u=await a(i,{host:`127.0.0.1`,port:0}),f={kind:`local`,url:u.url});let p=d(f);try{let e=[];for(let t of l){let r=m(t,n),a=h(r,{json:n.json===!0,skipReport:n.skipReport===!0}),o=await s({suite:r,target:f,reporters:a,appRoot:i,client:p});e.push(o)}n.json&&r.log(JSON.stringify(e,null,2)),e.some(e=>e.errored>0)&&(process.exitCode=1)}finally{u&&await u.close()}process.exit(process.exitCode??0)}function d(e){if(e.kind===`local`)return new i({host:e.url});let t={},r=process.env.VERCEL_AUTOMATION_BYPASS_SECRET?.trim();return r&&(t[n]=r),new i({auth:f(),headers:Object.keys(t).length>0?t:void 0,host:e.url})}function f(){let e=process.env.ASH_EVAL_AUTH_TOKEN?.trim();return e?{bearer:e}:{bearer:p}}async function p(){try{let e=(await(0,l.getVercelOidcToken)()).trim();if(e.length>0)return e}catch{}return process.env.VERCEL_OIDC_TOKEN?.trim()??``}function m(e,t){let n=t.maxConcurrency?Number.parseInt(t.maxConcurrency,10):void 0,r=t.timeout?Number.parseInt(t.timeout,10):void 0;if(n===void 0&&r===void 0)return e;let i={...e};return n!==void 0&&(i.maxConcurrency=n),r!==void 0&&(i.timeoutMs=r),i}function h(e,t){let n=t.json?[]:[new c];return!t.skipReport&&e.reporters&&n.push(...e.reporters),n}export{u as runEvalCommand};
|
|
@@ -1,22 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
*
|
|
4
|
-
* @example
|
|
5
|
-
* ```ts
|
|
6
|
-
* import { loadJson } from "experimental-ash/evals/loaders";
|
|
7
|
-
*
|
|
8
|
-
* const data = await loadJson("evals/data/cases.json");
|
|
9
|
-
* ```
|
|
10
|
-
*/
|
|
11
|
-
export declare function loadJson(filePath: string): Promise<unknown>;
|
|
12
|
-
/**
|
|
13
|
-
* Loads and parses a YAML file.
|
|
14
|
-
*
|
|
15
|
-
* @example
|
|
16
|
-
* ```ts
|
|
17
|
-
* import { loadYaml } from "experimental-ash/evals/loaders";
|
|
18
|
-
*
|
|
19
|
-
* const doc = await loadYaml("evals/data/cases.yaml");
|
|
20
|
-
* ```
|
|
21
|
-
*/
|
|
22
|
-
export declare function loadYaml(filePath: string): Promise<Record<string, unknown>>;
|
|
1
|
+
export { loadJson } from "#evals/loaders/json.js";
|
|
2
|
+
export { loadYaml } from "#evals/loaders/yaml.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{loadJson as e}from"./json.js";import{loadYaml as t}from"./yaml.js";
|
|
1
|
+
import{loadJson as e}from"./json.js";import{loadYaml as t}from"./yaml.js";export{e as loadJson,t as loadYaml};
|
|
@@ -1,14 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Creates a Braintrust reporter for use in a suite's `reporters` array.
|
|
5
|
-
*
|
|
6
|
-
* @example
|
|
7
|
-
* ```ts
|
|
8
|
-
* import { Braintrust } from "experimental-ash/evals/reporters";
|
|
9
|
-
*
|
|
10
|
-
* Braintrust({ projectName: "My Project" });
|
|
11
|
-
* ```
|
|
12
|
-
*/
|
|
13
|
-
export declare function Braintrust(config?: BraintrustReporterConfig): EvalReporter;
|
|
14
|
-
export type { BraintrustReporterConfig, EvalReporter };
|
|
1
|
+
export { Braintrust, type BraintrustReporterConfig } from "#evals/runner/reporters/braintrust.js";
|
|
2
|
+
export type { EvalReporter } from "#evals/runner/reporters/types.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{Braintrust as e}from"../runner/reporters/braintrust.js";
|
|
1
|
+
import{Braintrust as e}from"../runner/reporters/braintrust.js";export{e as Braintrust};
|
|
@@ -6,12 +6,6 @@ import type { AshEvalSuite } from "#evals/types.js";
|
|
|
6
6
|
* Returns absolute paths sorted alphabetically by relative path.
|
|
7
7
|
*/
|
|
8
8
|
export declare function discoverSuiteFiles(appRoot: string): Promise<string[]>;
|
|
9
|
-
/**
|
|
10
|
-
* Derives the canonical suite id from one absolute eval-suite file path.
|
|
11
|
-
*
|
|
12
|
-
* `<appRoot>/evals/sub/weather.eval.ts` → `"sub/weather"`.
|
|
13
|
-
*/
|
|
14
|
-
export declare function deriveSuiteId(appRoot: string, filePath: string): string;
|
|
15
9
|
/**
|
|
16
10
|
* Imports a discovered suite file and stamps the path-derived id onto
|
|
17
11
|
* the suite definition.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{t as e}from"../../chunks/authored-module-loader-DcCfCiBm.js";import{join as t,relative as n}from"node:path";import{readdir as r}from"node:fs/promises";const i=`.eval.ts`;async function a(e){let r=t(e,`evals`),i=[];try{await u(r,i)}catch(e){if(d(e))return[];throw e}return i.sort((e,t)=>n(r,e).localeCompare(n(r,t))),i}function o(e,r){let a=n(t(e,`evals`),r).split(/[\\/]/u).join(`/`);return a.endsWith(i)?a.slice(0,-8):a}async function s(t,n){let r=(await e(n)).default;if(!l(r))throw Error(`Suite file "${n}" does not export a valid AshEvalSuite as its default export. Use defineEvalSuite() to create the suite.`);return{...r,id:o(t,n)}}async function c(e,t){let n=await a(e);if(n.length===0)return[];let r=[];for(let i of n){let n=await s(e,i);(t===void 0||t.length===0||t.includes(n.id))&&r.push(n)}return r}function l(e){return typeof e==`object`&&!!e&&`_tag`in e&&e._tag===`AshEvalSuite`}async function u(e,n){let a=await r(e,{withFileTypes:!0});for(let r of a){let a=t(e,r.name);r.isDirectory()?await u(a,n):r.isFile()&&r.name.endsWith(i)&&n.push(a)}}function d(e){return typeof e==`object`&&!!e&&`code`in e&&e.code===`ENOENT`}export{
|
|
1
|
+
import{t as e}from"../../chunks/authored-module-loader-DcCfCiBm.js";import{join as t,relative as n}from"node:path";import{readdir as r}from"node:fs/promises";const i=`.eval.ts`;async function a(e){let r=t(e,`evals`),i=[];try{await u(r,i)}catch(e){if(d(e))return[];throw e}return i.sort((e,t)=>n(r,e).localeCompare(n(r,t))),i}function o(e,r){let a=n(t(e,`evals`),r).split(/[\\/]/u).join(`/`);return a.endsWith(i)?a.slice(0,-8):a}async function s(t,n){let r=(await e(n)).default;if(!l(r))throw Error(`Suite file "${n}" does not export a valid AshEvalSuite as its default export. Use defineEvalSuite() to create the suite.`);return{...r,id:o(t,n)}}async function c(e,t){let n=await a(e);if(n.length===0)return[];let r=[];for(let i of n){let n=await s(e,i);(t===void 0||t.length===0||t.includes(n.id))&&r.push(n)}return r}function l(e){return typeof e==`object`&&!!e&&`_tag`in e&&e._tag===`AshEvalSuite`}async function u(e,n){let a=await r(e,{withFileTypes:!0});for(let r of a){let a=t(e,r.name);r.isDirectory()?await u(a,n):r.isFile()&&r.name.endsWith(i)&&n.push(a)}}function d(e){return typeof e==`object`&&!!e&&`code`in e&&e.code===`ENOENT`}export{c as discoverAndImportSuites,a as discoverSuiteFiles,s as importSuiteFile};
|
|
@@ -3,7 +3,7 @@ import type { AshEvalCase, AshEvalTask, AshEvalTaskResult } from "#evals/types.j
|
|
|
3
3
|
/**
|
|
4
4
|
* Options for executing one eval case.
|
|
5
5
|
*/
|
|
6
|
-
|
|
6
|
+
interface ExecuteCaseOptions {
|
|
7
7
|
readonly client: Client;
|
|
8
8
|
readonly testCase: AshEvalCase;
|
|
9
9
|
readonly task?: AshEvalTask;
|
|
@@ -20,3 +20,4 @@ export interface ExecuteCaseOptions {
|
|
|
20
20
|
* and status come from the final turn only.
|
|
21
21
|
*/
|
|
22
22
|
export declare function executeCase(options: ExecuteCaseOptions): Promise<AshEvalTaskResult>;
|
|
23
|
+
export {};
|
|
@@ -4,7 +4,7 @@ import type { EvalReporter } from "#evals/runner/reporters/types.js";
|
|
|
4
4
|
/**
|
|
5
5
|
* Options for executing one eval suite.
|
|
6
6
|
*/
|
|
7
|
-
|
|
7
|
+
interface ExecuteSuiteOptions {
|
|
8
8
|
readonly suite: AshEvalSuite;
|
|
9
9
|
readonly target: AshEvalTarget;
|
|
10
10
|
readonly reporters: readonly EvalReporter[];
|
|
@@ -21,3 +21,4 @@ export interface ExecuteSuiteOptions {
|
|
|
21
21
|
* reports results.
|
|
22
22
|
*/
|
|
23
23
|
export declare function executeSuite(options: ExecuteSuiteOptions): Promise<AshEvalSuiteResult>;
|
|
24
|
+
export {};
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import{resolveLocalGitMetadata as e}from"../resolve-git-metadata.js";function t(e={}){return new n(e)}var n=class{#e;#t;#n;#r
|
|
1
|
+
import{resolveLocalGitMetadata as e}from"../resolve-git-metadata.js";function t(e={}){return new n(e)}var n=class{#e;#t;#n;#r;constructor(e){this.#e=e}async onSuiteStart(t,n){let o=await r();this.#t=o;let s=e(process.cwd());this.#r=t.metadata;let c=i(t,n),l=a(t,n);this.#n=await o.init({project:this.#e.projectName??t.id,projectId:this.#e.projectId,experiment:this.#e.experimentName,baseExperiment:this.#e.baseExperimentName,baseExperimentId:this.#e.baseExperimentId,update:this.#e.update,tags:c,metadata:l,noExitFlush:!0,setCurrent:!1,repoInfo:s.sha?{commit:s.sha,branch:s.branch}:null})}onCaseComplete(e){if(!this.#n)return;let t={};for(let n of e.scores)n.score!==null&&(t[n.name]=n.score);let n={...this.#r,...e.case.metadata,ashSessionId:e.result.sessionId,ashStatus:e.result.status,ashToolCalls:e.result.derived.toolCalls,ashSubagentCalls:e.result.derived.subagentCalls};e.result.derived.failureCode&&(n.ashFailureCode=e.result.derived.failureCode);let r={toolCallCount:e.result.derived.toolCallCount,subagentCallCount:e.result.derived.subagentCallCount,messageCount:e.result.derived.messageCount,reasoningBlockCount:e.result.derived.reasoningBlockCount};this.#n.log({id:e.case.id,input:e.case.input,output:e.result.output,expected:e.case.expected,error:e.error??void 0,scores:t,metadata:n,metrics:r,tags:e.case.tags?[...e.case.tags]:void 0})}async onSuiteComplete(e){if(this.#n)try{this.#t&&await this.#t.flush();let e=await this.#n.summarize();e.experimentUrl&&console.log(`Braintrust experiment: ${e.experimentUrl}\n\n`)}finally{await this.#n.close(),this.#n=void 0,this.#t=void 0}}};async function r(){try{return await import(`braintrust`)}catch{throw Error([`The 'braintrust' package is required for Braintrust reporting but was not found.`,``,`Install it with:`,` npm install braintrust`].join(`
|
|
2
2
|
`))}}function i(e,t){let n=[`ash`,`suite:${e.id}`,`target:${t.kind}`];return Array.isArray(e.tags)&&n.push(...e.tags),n}function a(e,t){return{ashSuiteId:e.id,ashTargetKind:t.kind,ashTargetUrl:t.url,ashTimestamp:new Date().toISOString()}}export{t as Braintrust};
|
|
@@ -5,9 +5,9 @@ interface GitMetadata {
|
|
|
5
5
|
/**
|
|
6
6
|
* Resolves local git metadata for the eval run context.
|
|
7
7
|
*
|
|
8
|
-
* Used to populate
|
|
9
|
-
*
|
|
10
|
-
*
|
|
8
|
+
* Used to populate `repoInfo` on the Braintrust experiment so the dashboard
|
|
9
|
+
* shows which sha/branch produced the run. This describes the eval code,
|
|
10
|
+
* not the remote target.
|
|
11
11
|
*
|
|
12
12
|
* Returns an empty object when git is unavailable or the directory is
|
|
13
13
|
* not a git repository.
|
|
@@ -4,9 +4,9 @@ import { Factuality } from "autoevals";
|
|
|
4
4
|
* The OpenAI-shaped client surface autoevals expects. Extracted from the
|
|
5
5
|
* library so we don't take a direct dependency on `openai` types.
|
|
6
6
|
*/
|
|
7
|
-
|
|
7
|
+
type AutoevalsClient = NonNullable<Parameters<typeof Factuality>[0]["client"]>;
|
|
8
8
|
type ProviderOptions = Parameters<typeof generateText>[0]["providerOptions"];
|
|
9
|
-
|
|
9
|
+
interface AutoevalsClientConfig {
|
|
10
10
|
readonly languageModel: LanguageModel;
|
|
11
11
|
readonly providerOptions?: ProviderOptions;
|
|
12
12
|
}
|
|
@@ -6,7 +6,6 @@
|
|
|
6
6
|
* effects stay inside durable steps.
|
|
7
7
|
*/
|
|
8
8
|
import type { HarnessSession } from "#harness/types.js";
|
|
9
|
-
import type { PendingConnectionAuthorization } from "#runtime/framework-tools/connection-search.js";
|
|
10
9
|
import type { PendingConnectionToolCall } from "#runtime/framework-tools/pending-connection-tool-calls.js";
|
|
11
10
|
/**
|
|
12
11
|
* Return value of {@link awaitAuthorizationAndResolve}. The workflow
|
|
@@ -33,7 +32,6 @@ export interface AwaitAuthorizationResolveResult {
|
|
|
33
32
|
export declare function awaitAuthorizationAndResolve(input: {
|
|
34
33
|
readonly parentWritable: WritableStream<Uint8Array>;
|
|
35
34
|
readonly pendingToolCalls: readonly PendingConnectionToolCall[];
|
|
36
|
-
readonly pendingAuths: readonly PendingConnectionAuthorization[];
|
|
37
35
|
readonly serializedContext: Record<string, unknown>;
|
|
38
36
|
readonly session: HarnessSession;
|
|
39
37
|
}): Promise<AwaitAuthorizationResolveResult>;
|
|
@@ -23,8 +23,8 @@ import { completeAuthorizationForConnectionStep, emitConnectionAuthorizationPend
|
|
|
23
23
|
*/
|
|
24
24
|
export async function awaitAuthorizationAndResolve(input) {
|
|
25
25
|
const emissionState = getHarnessEmissionState(input.session);
|
|
26
|
-
const
|
|
27
|
-
if (
|
|
26
|
+
const connectionNames = uniqueConnectionNames(input.pendingToolCalls);
|
|
27
|
+
if (connectionNames.length === 0) {
|
|
28
28
|
return {
|
|
29
29
|
serializedContext: input.serializedContext,
|
|
30
30
|
session: input.session,
|
|
@@ -45,12 +45,12 @@ export async function awaitAuthorizationAndResolve(input) {
|
|
|
45
45
|
// callback in Ash lets the framework decide delivery policy (auth,
|
|
46
46
|
// throttling, logging) without leaking generic workflow primitives.
|
|
47
47
|
const callbackBaseUrl = trimTrailingSlash(getWorkflowMetadata().url);
|
|
48
|
-
const hooks =
|
|
48
|
+
const hooks = connectionNames.map((name) => {
|
|
49
49
|
const hook = createHook();
|
|
50
50
|
return {
|
|
51
|
-
connectionName:
|
|
51
|
+
connectionName: name,
|
|
52
52
|
hook,
|
|
53
|
-
webhookUrl: `${callbackBaseUrl}${createAshConnectionCallbackRoutePath(
|
|
53
|
+
webhookUrl: `${callbackBaseUrl}${createAshConnectionCallbackRoutePath(name, hook.token)}`,
|
|
54
54
|
};
|
|
55
55
|
});
|
|
56
56
|
// Run every `startAuthorization` inside its own durable step,
|
|
@@ -157,23 +157,15 @@ export async function awaitAuthorizationAndResolve(input) {
|
|
|
157
157
|
tokens,
|
|
158
158
|
});
|
|
159
159
|
}
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
* least one entry of `pendingCalls`.
|
|
163
|
-
*/
|
|
164
|
-
function filterAuthorizationsWithPendingCalls(pending, pendingCalls) {
|
|
165
|
-
const targetConnectionNames = new Set();
|
|
160
|
+
function uniqueConnectionNames(pendingCalls) {
|
|
161
|
+
const seen = new Set();
|
|
166
162
|
for (const call of pendingCalls) {
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
else {
|
|
171
|
-
for (const name of call.connectionNames) {
|
|
172
|
-
targetConnectionNames.add(name);
|
|
173
|
-
}
|
|
163
|
+
const names = call.kind === "connection-execute" ? [call.connectionName] : call.connectionNames;
|
|
164
|
+
for (const name of names) {
|
|
165
|
+
seen.add(name);
|
|
174
166
|
}
|
|
175
167
|
}
|
|
176
|
-
return
|
|
168
|
+
return [...seen];
|
|
177
169
|
}
|
|
178
170
|
/**
|
|
179
171
|
* Awaits the first payload delivered to `hook` via `resumeHook`, using
|
|
@@ -4,8 +4,6 @@
|
|
|
4
4
|
*/
|
|
5
5
|
import type { HarnessEmissionState } from "#harness/emission.js";
|
|
6
6
|
import type { HarnessSession } from "#harness/types.js";
|
|
7
|
-
import { type ConnectionAuthorizationOutcome } from "#protocol/message.js";
|
|
8
|
-
import type { ConnectionAuthorizationChallenge } from "#public/connections/errors.js";
|
|
9
7
|
import type { JsonValue } from "#public/types/json.js";
|
|
10
8
|
import type { AuthorizationCallbackRequest, ConnectionPrincipal, TokenResult } from "#runtime/connections/types.js";
|
|
11
9
|
import { type PendingConnectionToolCall } from "#runtime/framework-tools/pending-connection-tool-calls.js";
|
|
@@ -13,7 +11,7 @@ import { type PendingConnectionToolCall } from "#runtime/framework-tools/pending
|
|
|
13
11
|
* Result of one `startAuthorization` step.
|
|
14
12
|
*
|
|
15
13
|
* On success the runtime journals
|
|
16
|
-
* `{ ok: true, principal, state,
|
|
14
|
+
* `{ ok: true, principal, state, serializedContext }`. The
|
|
17
15
|
* `principal` is the framework-resolved {@link ConnectionPrincipal}
|
|
18
16
|
* captured at `startAuthorization` time; the orchestrator carries it
|
|
19
17
|
* forward so `completeAuthorization` and the post-resume retry observe
|
|
@@ -32,7 +30,6 @@ import { type PendingConnectionToolCall } from "#runtime/framework-tools/pending
|
|
|
32
30
|
* `outcome: "failed"` so channels clean up their UI.
|
|
33
31
|
*/
|
|
34
32
|
export type StartAuthorizationStepResult = {
|
|
35
|
-
readonly challenge: ConnectionAuthorizationChallenge;
|
|
36
33
|
readonly ok: true;
|
|
37
34
|
readonly principal: ConnectionPrincipal;
|
|
38
35
|
readonly serializedContext: Record<string, unknown>;
|
|
@@ -79,7 +76,6 @@ export type CompleteAuthorizationStepResult = {
|
|
|
79
76
|
readonly token: TokenResult;
|
|
80
77
|
} | {
|
|
81
78
|
readonly ok: false;
|
|
82
|
-
readonly outcome: Exclude<ConnectionAuthorizationOutcome, "authorized">;
|
|
83
79
|
readonly reason: string;
|
|
84
80
|
/**
|
|
85
81
|
* When `true`, downstream pending tool calls for this connection
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
*/
|
|
5
5
|
import { buildAdapterContext } from "#channel/adapter-context.js";
|
|
6
6
|
import { callAdapterEventHandler } from "#channel/adapter.js";
|
|
7
|
-
import { contextStorage
|
|
7
|
+
import { contextStorage } from "#context/container.js";
|
|
8
8
|
import { BundleKey, ChannelKey } from "#context/keys.js";
|
|
9
9
|
import { getActiveRuntimeNode } from "#context/node.js";
|
|
10
10
|
import { deserializeContext, serializeContext } from "#context/serialize.js";
|
|
@@ -14,7 +14,7 @@ import { writeCachedToken } from "#runtime/connections/authorization-tokens.js";
|
|
|
14
14
|
import { withConnectionPrincipalOverride } from "#runtime/connections/principal-context.js";
|
|
15
15
|
import { principalKey, resolveConnectionPrincipal } from "#runtime/connections/principal.js";
|
|
16
16
|
import { ConnectionRegistryImpl } from "#runtime/connections/registry.js";
|
|
17
|
-
import { ConnectionRegistryKey, executeConnectionSearch,
|
|
17
|
+
import { ConnectionRegistryKey, executeConnectionSearch, } from "#runtime/framework-tools/connection-search.js";
|
|
18
18
|
import { isConnectionAuthorizationPlaceholder, PendingConnectionToolCallsKey, } from "#runtime/framework-tools/pending-connection-tool-calls.js";
|
|
19
19
|
import { withDefaultAuthorizationInstructions } from "#execution/authorization-challenge-defaults.js";
|
|
20
20
|
import { splicePendingToolResults } from "#execution/await-authorization-splice.js";
|
|
@@ -82,7 +82,6 @@ export async function startAuthorizationForConnectionStep(input) {
|
|
|
82
82
|
// the handler made (e.g. tracked Slack message ts) survive the
|
|
83
83
|
// step boundary into the matching `completeAuthorization` step.
|
|
84
84
|
return {
|
|
85
|
-
challenge,
|
|
86
85
|
ok: true,
|
|
87
86
|
principal,
|
|
88
87
|
serializedContext: serializeContext(ctx),
|
|
@@ -130,7 +129,7 @@ export async function completeAuthorizationForConnectionStep(input) {
|
|
|
130
129
|
stepIndex: input.emissionState.stepIndex,
|
|
131
130
|
turnId: input.emissionState.turnId,
|
|
132
131
|
}), input.parentWritable);
|
|
133
|
-
return { ok: false,
|
|
132
|
+
return { ok: false, reason, retryable: false };
|
|
134
133
|
}
|
|
135
134
|
try {
|
|
136
135
|
const token = await connection.authorization.completeAuthorization({
|
|
@@ -168,7 +167,7 @@ export async function completeAuthorizationForConnectionStep(input) {
|
|
|
168
167
|
stepIndex: input.emissionState.stepIndex,
|
|
169
168
|
turnId: input.emissionState.turnId,
|
|
170
169
|
}), input.parentWritable);
|
|
171
|
-
return { ok: false,
|
|
170
|
+
return { ok: false, reason, retryable };
|
|
172
171
|
}
|
|
173
172
|
}
|
|
174
173
|
/**
|
|
@@ -290,9 +289,6 @@ export async function resolvePendingToolCallsStep(input) {
|
|
|
290
289
|
return names.some((name) => !resolvedSet.has(name) && input.failedConnections[name] === undefined);
|
|
291
290
|
});
|
|
292
291
|
ctx.set(PendingConnectionToolCallsKey, remainingPending);
|
|
293
|
-
const currentPendingAuths = ctx.get(PendingConnectionAuthorizationsKey) ?? [];
|
|
294
|
-
const remainingPendingAuths = currentPendingAuths.filter((p) => !resolvedSet.has(p.connectionName) && input.failedConnections[p.connectionName] === undefined);
|
|
295
|
-
ctx.set(PendingConnectionAuthorizationsKey, remainingPendingAuths);
|
|
296
292
|
return {
|
|
297
293
|
serializedContext: serializeContext(ctx),
|
|
298
294
|
session: splicedSession,
|
|
@@ -308,7 +304,7 @@ function findConnection(ctx, connectionName) {
|
|
|
308
304
|
}
|
|
309
305
|
async function emitAuthorizationEvent(ctx, event, parentWritable) {
|
|
310
306
|
const adapter = ctx.require(ChannelKey);
|
|
311
|
-
const adapterCtx = buildAdapterContext(adapter,
|
|
307
|
+
const adapterCtx = buildAdapterContext(adapter, ctx);
|
|
312
308
|
const finalEvent = await callAdapterEventHandler(adapter, event, adapterCtx);
|
|
313
309
|
// Pin any handler-driven channel-state mutations back onto ctx so
|
|
314
310
|
// they survive the durable step boundary when the caller
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { Runtime, SessionCapabilities } from "#channel/types.js";
|
|
2
2
|
import type { HarnessEmitFn, StepFn } from "#harness/types.js";
|
|
3
|
-
import type { RunMode } from "#run-mode.js";
|
|
3
|
+
import type { RunMode } from "#shared/run-mode.js";
|
|
4
4
|
import type { RuntimeCompiledArtifactsSource } from "#runtime/compiled-artifacts-source.js";
|
|
5
5
|
import type { ResolvedRuntimeAgentNode } from "#runtime/graph.js";
|
|
6
6
|
/**
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
interface AvailableSkillDescription {
|
|
2
2
|
readonly description: string;
|
|
3
3
|
readonly name: string;
|
|
4
4
|
}
|
|
@@ -15,3 +15,4 @@ export interface AvailableSkillDescription {
|
|
|
15
15
|
* reuse the same formatter for durable history announcements.
|
|
16
16
|
*/
|
|
17
17
|
export declare function formatAvailableSkillsSection(skills: readonly AvailableSkillDescription[]): string | null;
|
|
18
|
+
export {};
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { DeliverPayload, SubagentInputRequestHookPayload } from "#channel/types.js";
|
|
2
2
|
import type { HarnessEmitFn, HarnessSession } from "#harness/types.js";
|
|
3
|
-
import type { RunMode } from "#run-mode.js";
|
|
3
|
+
import type { RunMode } from "#shared/run-mode.js";
|
|
4
4
|
import type { InputResponse } from "#runtime/input/types.js";
|
|
5
5
|
/**
|
|
6
6
|
* Runs the parent-side work when a `subagent-input-request` arrives at
|
|
@@ -8,14 +8,6 @@ interface BatchEventMetadata {
|
|
|
8
8
|
readonly sequence: number;
|
|
9
9
|
readonly turnId: string;
|
|
10
10
|
}
|
|
11
|
-
/**
|
|
12
|
-
* Returns the deterministic continuation token used for one delegated child
|
|
13
|
-
* subagent run.
|
|
14
|
-
*/
|
|
15
|
-
export declare function createSubagentContinuationToken(input: {
|
|
16
|
-
readonly callId: string;
|
|
17
|
-
readonly parentSessionId: string;
|
|
18
|
-
}): string;
|
|
19
11
|
/**
|
|
20
12
|
* Result of {@link buildSubagentRunInput}.
|
|
21
13
|
*
|
|
@@ -1,22 +1,11 @@
|
|
|
1
1
|
import { SUBAGENT_ADAPTER_KIND } from "#execution/subagent-adapter.js";
|
|
2
2
|
import { mintSubagentContinuationToken } from "#execution/session.js";
|
|
3
|
-
import { formatSubagentInvocation } from "#execution/subagent-invocation.js";
|
|
4
|
-
/**
|
|
5
|
-
* Returns the deterministic continuation token used for one delegated child
|
|
6
|
-
* subagent run.
|
|
7
|
-
*/
|
|
8
|
-
export function createSubagentContinuationToken(input) {
|
|
9
|
-
return mintSubagentContinuationToken(`${input.parentSessionId}:${input.callId}`);
|
|
10
|
-
}
|
|
11
3
|
/**
|
|
12
4
|
* Builds the {@link RunInput} for one delegated subagent child run.
|
|
13
5
|
*/
|
|
14
6
|
export function buildSubagentRunInput(input) {
|
|
15
7
|
const { action, auth, batchEvent, capabilities, initiatorAuth, session } = input;
|
|
16
|
-
const childContinuationToken =
|
|
17
|
-
callId: action.callId,
|
|
18
|
-
parentSessionId: session.sessionId,
|
|
19
|
-
});
|
|
8
|
+
const childContinuationToken = mintSubagentContinuationToken(`${session.sessionId}:${action.callId}`);
|
|
20
9
|
const runInput = {
|
|
21
10
|
adapter: {
|
|
22
11
|
kind: SUBAGENT_ADAPTER_KIND,
|
|
@@ -50,9 +39,13 @@ export function buildSubagentRunInput(input) {
|
|
|
50
39
|
*/
|
|
51
40
|
function formatSubagentCallInputMessage(action) {
|
|
52
41
|
const { message } = action.input;
|
|
53
|
-
return
|
|
54
|
-
|
|
42
|
+
return [
|
|
43
|
+
`You are the subagent "${action.subagentName}".`,
|
|
44
|
+
`Description: ${action.description}`,
|
|
45
|
+
"",
|
|
46
|
+
"The caller delegated the following task to you. Complete it and return the final result directly.",
|
|
47
|
+
"",
|
|
48
|
+
"Caller message:",
|
|
55
49
|
message,
|
|
56
|
-
|
|
57
|
-
}).message;
|
|
50
|
+
].join("\n");
|
|
58
51
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { HookPayload, SessionCapabilities } from "#channel/types.js";
|
|
2
2
|
import type { HarnessSession } from "#harness/types.js";
|
|
3
|
-
import type { RunMode } from "#run-mode.js";
|
|
3
|
+
import type { RunMode } from "#shared/run-mode.js";
|
|
4
4
|
export interface TurnResultPayload {
|
|
5
5
|
readonly action: "done" | "park";
|
|
6
6
|
readonly kind: "turn-result";
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { hasPendingInputBatch } from "#harness/input-requests.js";
|
|
2
2
|
import { hasPendingRuntimeActionBatch } from "#harness/runtime-actions.js";
|
|
3
3
|
import { awaitAuthorizationAndResolve } from "#execution/await-authorization-orchestrator.js";
|
|
4
|
-
import { createTaskModeWaitError } from "#execution/task-mode.js";
|
|
5
4
|
import { normalizeSerializableError } from "#execution/workflow-errors.js";
|
|
6
5
|
import { turnStep } from "#execution/workflow-steps.js";
|
|
6
|
+
const TASK_MODE_WAIT_ERROR_MESSAGE = "Task mode cannot wait for follow-up input (`next: null`).";
|
|
7
7
|
/**
|
|
8
8
|
* Short-lived workflow that owns one runtime turn for the durable
|
|
9
9
|
* driver.
|
|
@@ -56,12 +56,11 @@ export async function turnWorkflow(input) {
|
|
|
56
56
|
});
|
|
57
57
|
return;
|
|
58
58
|
}
|
|
59
|
-
throw
|
|
59
|
+
throw new Error(TASK_MODE_WAIT_ERROR_MESSAGE);
|
|
60
60
|
}
|
|
61
61
|
if (result.action === "await-authorization") {
|
|
62
62
|
const resolved = await awaitAuthorizationAndResolve({
|
|
63
63
|
parentWritable,
|
|
64
|
-
pendingAuths: result.pendingAuths,
|
|
65
64
|
pendingToolCalls: result.pendingToolCalls,
|
|
66
65
|
serializedContext: currentSerializedContext,
|
|
67
66
|
session: currentSession,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { ASH_PACKAGE_NAME } from "#package-name.js";
|
|
1
|
+
import { ASH_PACKAGE_NAME } from "#internal/package-name.js";
|
|
2
2
|
import { truncateHead } from "#execution/sandbox/truncate-output.js";
|
|
3
3
|
import { convertHtmlToMarkdown, extractTextFromHtml } from "#execution/web-fetch/html.js";
|
|
4
4
|
const MAX_RESPONSE_SIZE = 5 * 1024 * 1024; // 5 MB
|
|
@@ -2,8 +2,22 @@ import type { DeliverPayload, HookPayload, SessionAuthContext, SubagentInputRequ
|
|
|
2
2
|
import { deserializeContext } from "#context/serialize.js";
|
|
3
3
|
import type { HarnessSession } from "#harness/types.js";
|
|
4
4
|
import type { RuntimeCompiledArtifactsSource } from "#runtime/compiled-artifacts-source.js";
|
|
5
|
+
import { type PendingConnectionToolCall } from "#runtime/framework-tools/pending-connection-tool-calls.js";
|
|
5
6
|
import { type TurnWorkflowInput } from "#execution/turn-workflow.js";
|
|
6
|
-
|
|
7
|
+
/**
|
|
8
|
+
* Serializable projection of a step result for workflow persistence.
|
|
9
|
+
*/
|
|
10
|
+
export type DurableStepResult = {
|
|
11
|
+
readonly action: "continue" | "park" | "done";
|
|
12
|
+
readonly output?: string;
|
|
13
|
+
readonly serializedContext: Record<string, unknown>;
|
|
14
|
+
readonly session: HarnessSession;
|
|
15
|
+
} | {
|
|
16
|
+
readonly action: "await-authorization";
|
|
17
|
+
readonly pendingToolCalls: readonly PendingConnectionToolCall[];
|
|
18
|
+
readonly serializedContext: Record<string, unknown>;
|
|
19
|
+
readonly session: HarnessSession;
|
|
20
|
+
};
|
|
7
21
|
/**
|
|
8
22
|
* Input for one atomic harness step inside a durable `"use step"`
|
|
9
23
|
* boundary.
|
|
@@ -1,18 +1,16 @@
|
|
|
1
1
|
import { buildAdapterContext } from "#channel/adapter-context.js";
|
|
2
2
|
import { callAdapterEventHandler, defaultDeliverResult } from "#channel/adapter.js";
|
|
3
|
-
import { toContextAccessor } from "#context/container.js";
|
|
4
3
|
import { dispatchStreamEventHooks, runHookLifecycleStep } from "#context/hook-lifecycle.js";
|
|
5
4
|
import { AuthKey, BundleKey, CapabilitiesKey, ChannelKey, ContinuationTokenKey, InitiatorAuthKey, ModeKey, } from "#context/keys.js";
|
|
6
5
|
import { runStep } from "#context/run-step.js";
|
|
7
6
|
import { deserializeContext, serializeContext } from "#context/serialize.js";
|
|
8
|
-
import {
|
|
7
|
+
import { isHarnessBetweenTurns } from "#harness/emission.js";
|
|
9
8
|
import { coalesceTurnInputs } from "#harness/messages.js";
|
|
10
9
|
import { upsertProxyInputRequests } from "#harness/proxy-input-requests.js";
|
|
11
10
|
import { getPendingRuntimeActionBatch, recordPendingSubagentChildToken, } from "#harness/runtime-actions.js";
|
|
12
11
|
import { createLogger, formatError } from "#internal/logging.js";
|
|
13
12
|
import { createSessionFailedEvent, createSubagentCalledEvent, encodeMessageStreamEvent, timestampHandleMessageStreamEvent, } from "#protocol/message.js";
|
|
14
|
-
import {
|
|
15
|
-
import { PendingConnectionToolCallsKey } from "#runtime/framework-tools/pending-connection-tool-calls.js";
|
|
13
|
+
import { PendingConnectionToolCallsKey, } from "#runtime/framework-tools/pending-connection-tool-calls.js";
|
|
16
14
|
import { getCompiledRuntimeAgentBundle } from "#runtime/sessions/compiled-agent-cache.js";
|
|
17
15
|
import { createExecutionNodeStep } from "#execution/node-step.js";
|
|
18
16
|
import { emitProxiedInputRequest, routeDeliverPayload } from "#execution/subagent-hitl-proxy.js";
|
|
@@ -36,7 +34,7 @@ export async function turnStep(input) {
|
|
|
36
34
|
}
|
|
37
35
|
// Build the adapter context for deliver and event handlers.
|
|
38
36
|
// Slack adapters override this to inject ctx.thread and ctx.slack.
|
|
39
|
-
const adapterCtx = buildAdapterContext(adapter,
|
|
37
|
+
const adapterCtx = buildAdapterContext(adapter, ctx);
|
|
40
38
|
// Run the adapter's deliver hook for each queued payload — sets
|
|
41
39
|
// custom context keys and optionally transforms the message.
|
|
42
40
|
// Coalesces the resulting StepInput values.
|
|
@@ -101,13 +99,7 @@ export async function turnStep(input) {
|
|
|
101
99
|
mode,
|
|
102
100
|
node: bundle.graph.root,
|
|
103
101
|
});
|
|
104
|
-
|
|
105
|
-
await drainPendingConnectionAuthorizations({
|
|
106
|
-
ctx,
|
|
107
|
-
emit,
|
|
108
|
-
state: getHarnessEmissionState(result.session),
|
|
109
|
-
});
|
|
110
|
-
return result;
|
|
102
|
+
return step(refreshedSession, stepInput);
|
|
111
103
|
};
|
|
112
104
|
// Lifecycle hooks fire only at the start of a new turn — when the
|
|
113
105
|
// workflow has freshly delivered input and the harness is between
|
|
@@ -143,13 +135,11 @@ export async function turnStep(input) {
|
|
|
143
135
|
// serialized context; the resolve step clears them after
|
|
144
136
|
// successful retry.
|
|
145
137
|
const pendingToolCalls = ctx.get(PendingConnectionToolCallsKey) ?? [];
|
|
146
|
-
|
|
147
|
-
if (pendingToolCalls.length > 0 && pendingAuths.length > 0) {
|
|
138
|
+
if (pendingToolCalls.length > 0) {
|
|
148
139
|
writer.releaseLock();
|
|
149
140
|
return {
|
|
150
141
|
action: "await-authorization",
|
|
151
142
|
pendingToolCalls,
|
|
152
|
-
pendingAuths,
|
|
153
143
|
serializedContext: nextSerializedContext,
|
|
154
144
|
session: stepResult.session,
|
|
155
145
|
};
|
|
@@ -220,7 +210,7 @@ export async function dispatchPendingRuntimeActionsStep(input) {
|
|
|
220
210
|
const capabilities = ctx.get(CapabilitiesKey);
|
|
221
211
|
const initiatorAuth = ctx.get(InitiatorAuthKey) ?? null;
|
|
222
212
|
const writer = input.parentWritable.getWriter();
|
|
223
|
-
const adapterCtx = buildAdapterContext(adapter,
|
|
213
|
+
const adapterCtx = buildAdapterContext(adapter, ctx);
|
|
224
214
|
let nextSession = input.session;
|
|
225
215
|
try {
|
|
226
216
|
for (const action of batch.actions) {
|
|
@@ -288,7 +278,7 @@ export async function emitTerminalSessionFailureStep(input) {
|
|
|
288
278
|
const ctx = await deserializeContext(input.serializedContext);
|
|
289
279
|
const adapter = ctx.get(ChannelKey);
|
|
290
280
|
if (adapter !== undefined) {
|
|
291
|
-
const adapterCtx = buildAdapterContext(adapter,
|
|
281
|
+
const adapterCtx = buildAdapterContext(adapter, ctx);
|
|
292
282
|
await callAdapterEventHandler(adapter, event, adapterCtx);
|
|
293
283
|
}
|
|
294
284
|
}
|
|
@@ -327,7 +317,7 @@ export async function runProxyInputRequestStep(input) {
|
|
|
327
317
|
"use step";
|
|
328
318
|
const ctx = await deserializeContext(input.serializedContext);
|
|
329
319
|
const adapter = ctx.require(ChannelKey);
|
|
330
|
-
const adapterCtx = buildAdapterContext(adapter,
|
|
320
|
+
const adapterCtx = buildAdapterContext(adapter, ctx);
|
|
331
321
|
const mode = ctx.require(ModeKey);
|
|
332
322
|
const writer = input.parentWritable.getWriter();
|
|
333
323
|
let proxyResult;
|