@tangle-network/agent-runtime 0.44.0 → 0.46.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -203
- package/dist/agent.d.ts +3 -2
- package/dist/agent.js +5 -7
- package/dist/agent.js.map +1 -1
- package/dist/analyst-loop.d.ts +28 -2
- package/dist/analyst-loop.js +4 -1
- package/dist/audit.d.ts +93 -0
- package/dist/audit.js +312 -0
- package/dist/audit.js.map +1 -0
- package/dist/chunk-4B6U4CVQ.js +15 -0
- package/dist/chunk-4B6U4CVQ.js.map +1 -0
- package/dist/chunk-65FQLI4V.js +4089 -0
- package/dist/chunk-65FQLI4V.js.map +1 -0
- package/dist/{chunk-GFKVVRQ7.js → chunk-GN75RGM6.js} +13 -12
- package/dist/chunk-GN75RGM6.js.map +1 -0
- package/dist/chunk-GSUO5QS6.js +146 -0
- package/dist/chunk-GSUO5QS6.js.map +1 -0
- package/dist/chunk-HNUXAZIJ.js +580 -0
- package/dist/chunk-HNUXAZIJ.js.map +1 -0
- package/dist/{chunk-SKUZZCHE.js → chunk-I42NHLKX.js} +5 -5
- package/dist/chunk-I42NHLKX.js.map +1 -0
- package/dist/{chunk-HVYOHJHK.js → chunk-JNPK46YH.js} +2 -2
- package/dist/chunk-JNPK46YH.js.map +1 -0
- package/dist/{chunk-3HMHSN22.js → chunk-KADIJAD4.js} +38 -24
- package/dist/chunk-KADIJAD4.js.map +1 -0
- package/dist/{chunk-KDMRUD2P.js → chunk-KPN7OQ64.js} +296 -8
- package/dist/chunk-KPN7OQ64.js.map +1 -0
- package/dist/{chunk-NRZOXCJK.js → chunk-VR4JIC5H.js} +2 -2
- package/dist/chunk-WIR4HOOJ.js +27 -0
- package/dist/chunk-WIR4HOOJ.js.map +1 -0
- package/dist/coder-DCWFQpmJ.d.ts +114 -0
- package/dist/driver-C-mtBo7h.d.ts +221 -0
- package/dist/improvement.d.ts +0 -1
- package/dist/improvement.js +0 -5
- package/dist/improvement.js.map +1 -1
- package/dist/index.d.ts +122 -9
- package/dist/index.js +398 -10
- package/dist/index.js.map +1 -1
- package/dist/{kb-gate-D0ZIhFOU.d.ts → kb-gate-2Gwpz_27.d.ts} +86 -9
- package/dist/{loop-runner-bin-BLMa8He3.d.ts → loop-runner-bin-D-K6bRp3.d.ts} +17 -13
- package/dist/loop-runner-bin.d.ts +8 -6
- package/dist/loop-runner-bin.js +6 -8
- package/dist/loops.d.ts +7 -393
- package/dist/loops.js +96 -27
- package/dist/mcp/bin.js +7 -7
- package/dist/mcp/bin.js.map +1 -1
- package/dist/mcp/index.d.ts +286 -13
- package/dist/mcp/index.js +341 -9
- package/dist/mcp/index.js.map +1 -1
- package/dist/{otel-export-wFDmmurL.d.ts → otel-export-nurzFwuJ.d.ts} +1 -1
- package/dist/profiles.d.ts +385 -86
- package/dist/profiles.js +549 -4
- package/dist/profiles.js.map +1 -1
- package/dist/{run-loop-C4L1Sted.d.ts → run-loop-CU2Y00Si.d.ts} +36 -13
- package/dist/runtime-hooks-C7JwKb9E.d.ts +70 -0
- package/dist/runtime.d.ts +1964 -0
- package/dist/runtime.js +114 -0
- package/dist/runtime.js.map +1 -0
- package/dist/substrate-CUgk7F7s.d.ts +77 -0
- package/dist/topology.d.ts +73 -0
- package/dist/topology.js +111 -0
- package/dist/topology.js.map +1 -0
- package/dist/types-BfoeiQRZ.d.ts +438 -0
- package/dist/{types-DbJzz2uf.d.ts → types-DnYoHvvZ.d.ts} +110 -4
- package/dist/workflow.d.ts +4 -3
- package/dist/workflow.js +4 -5
- package/dist/workflow.js.map +1 -1
- package/package.json +37 -28
- package/skills/agent-runtime-adoption/SKILL.md +32 -29
- package/skills/generate-eval/SKILL.md +60 -0
- package/dist/chunk-3HMHSN22.js.map +0 -1
- package/dist/chunk-GFKVVRQ7.js.map +0 -1
- package/dist/chunk-HVYOHJHK.js.map +0 -1
- package/dist/chunk-KDMRUD2P.js.map +0 -1
- package/dist/chunk-PY6NMZYX.js +0 -52
- package/dist/chunk-PY6NMZYX.js.map +0 -1
- package/dist/chunk-S7JXV32P.js +0 -947
- package/dist/chunk-S7JXV32P.js.map +0 -1
- package/dist/chunk-SKUZZCHE.js.map +0 -1
- package/dist/chunk-SQSCRJ7U.js +0 -65
- package/dist/chunk-SQSCRJ7U.js.map +0 -1
- package/dist/chunk-VOX6Z3II.js +0 -90
- package/dist/chunk-VOX6Z3II.js.map +0 -1
- package/dist/chunk-XBUG326M.js +0 -261
- package/dist/chunk-XBUG326M.js.map +0 -1
- package/dist/dynamic-wUgp6UKs.d.ts +0 -108
- package/dist/optimize-prompt-D-urF2wW.d.ts +0 -129
- /package/dist/{chunk-NRZOXCJK.js.map → chunk-VR4JIC5H.js.map} +0 -0
|
@@ -3,22 +3,22 @@ import {
|
|
|
3
3
|
} from "./chunk-FNMGYYSS.js";
|
|
4
4
|
import {
|
|
5
5
|
createDefaultCoderDelegate
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-I42NHLKX.js";
|
|
7
7
|
import {
|
|
8
8
|
runAnalystLoop
|
|
9
|
-
} from "./chunk-
|
|
9
|
+
} from "./chunk-HNUXAZIJ.js";
|
|
10
10
|
import {
|
|
11
|
-
|
|
12
|
-
} from "./chunk-VOX6Z3II.js";
|
|
13
|
-
import {
|
|
14
|
-
createDynamicDriver,
|
|
11
|
+
createDriver,
|
|
15
12
|
runLoop
|
|
16
|
-
} from "./chunk-
|
|
13
|
+
} from "./chunk-65FQLI4V.js";
|
|
17
14
|
import {
|
|
18
15
|
ConfigError
|
|
19
|
-
} from "./chunk-
|
|
16
|
+
} from "./chunk-GSUO5QS6.js";
|
|
20
17
|
|
|
21
18
|
// src/loop-runner.ts
|
|
19
|
+
import {
|
|
20
|
+
selfImprove
|
|
21
|
+
} from "@tangle-network/agent-eval/contract";
|
|
22
22
|
var DELEGATED_LOOP_MODES = [
|
|
23
23
|
"code",
|
|
24
24
|
"review",
|
|
@@ -70,10 +70,11 @@ function reviewLoopRunner(options) {
|
|
|
70
70
|
}
|
|
71
71
|
function dynamicLoopRunner(o) {
|
|
72
72
|
return async (signal) => runLoop({
|
|
73
|
-
driver:
|
|
73
|
+
driver: createDriver({
|
|
74
74
|
planner: o.planner,
|
|
75
75
|
...o.maxIterations !== void 0 ? { maxIterations: o.maxIterations } : {},
|
|
76
|
-
...o.maxFanout !== void 0 ? { maxFanout: o.maxFanout } : {}
|
|
76
|
+
...o.maxFanout !== void 0 ? { maxFanout: o.maxFanout } : {},
|
|
77
|
+
...o.analyze ? { analyze: o.analyze } : {}
|
|
77
78
|
}),
|
|
78
79
|
...o.agentRun ? { agentRun: o.agentRun } : {},
|
|
79
80
|
...o.agentRuns ? { agentRuns: o.agentRuns } : {},
|
|
@@ -108,7 +109,7 @@ function researchLoopRunner(o) {
|
|
|
108
109
|
};
|
|
109
110
|
}
|
|
110
111
|
function selfImproveLoopRunner(options) {
|
|
111
|
-
return async () =>
|
|
112
|
+
return async () => selfImprove(options);
|
|
112
113
|
}
|
|
113
114
|
function auditLoopRunner(options) {
|
|
114
115
|
return async () => runAnalystLoop(options);
|
|
@@ -199,4 +200,4 @@ export {
|
|
|
199
200
|
runLoopRunnerCli,
|
|
200
201
|
parseLoopRunnerArgv
|
|
201
202
|
};
|
|
202
|
-
//# sourceMappingURL=chunk-
|
|
203
|
+
//# sourceMappingURL=chunk-GN75RGM6.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/loop-runner.ts","../src/loop-runner-bin.ts"],"sourcesContent":["/**\n * @experimental\n *\n * `runDelegatedLoop` — the configured delegated loop-runner.\n *\n * One typed entrypoint a worker agent (or a scheduled routine) calls to run a\n * disciplined loop in a chosen MODE, over agent-runtime's hardened engines:\n *\n * code → build-in-a-loop via the coder delegate (no-op + secret floor,\n * optional reviewer gate, winner-selection)\n * review → code mode with a REQUIRED reviewer (the gate is the point)\n * research → research-in-a-loop with valid-only KB growth (createKbGate)\n * audit → analyze trace/run data → findings (runAnalystLoop, caller-wired)\n * self-improve → closed-loop text/config optimization (selfImprove, held-out gated)\n * dynamic → agent-authored topology (runLoop + createDriver)\n *\n * It is intentionally a thin façade: the value is that EVERY product reuses the\n * one hardened engine instead of forking delegation logic. The dispatcher owns\n * mode routing, timing, fail-loud on an unregistered mode, and a uniform result\n * shape; each mode's engine is a pre-configured runner in the registry (build it\n * with the factories below, or inject your own / a stub).\n */\n\nimport type { Scenario } from '@tangle-network/agent-eval/campaign'\nimport {\n type SelfImproveOptions,\n type SelfImproveResult,\n selfImprove,\n} from '@tangle-network/agent-eval/contract'\nimport { runAnalystLoop } from './analyst-loop'\nimport type { RunAnalystLoopOpts, RunAnalystLoopResult } from './analyst-loop/types'\nimport { ConfigError } from './errors'\nimport {\n type CoderReviewer,\n type CoderWinnerSelection,\n createDefaultCoderDelegate,\n type DelegateRunCtx,\n} from './mcp/delegates'\nimport { type CreateKbGateOptions, createKbGate, type FactCandidate } from './mcp/kb-gate'\nimport type { DelegateCodeArgs } from './mcp/types'\nimport type { CoderOutput } from './profiles/coder'\nimport {\n type AgentRunSpec,\n type CreateDriverOptions,\n createDriver,\n type DriverDecision,\n type LoopResult,\n type OutputAdapter,\n runLoop,\n type SandboxClient,\n type TopologyPlanner,\n type Validator,\n} from './runtime'\n\n/** @experimental Every delegated-loop mode, for validation + CLI surfaces. */\nexport const DELEGATED_LOOP_MODES = [\n 'code',\n 'review',\n 'research',\n 'audit',\n 'self-improve',\n 'dynamic',\n] as const\n\n/** @experimental */\nexport type DelegatedLoopMode = (typeof DELEGATED_LOOP_MODES)[number]\n\n/** @experimental Type guard for an untrusted mode string (CLI / config input). */\nexport function isDelegatedLoopMode(value: unknown): value is DelegatedLoopMode {\n return typeof value === 'string' && (DELEGATED_LOOP_MODES as readonly string[]).includes(value)\n}\n\n/** @experimental A pre-configured loop for one mode. Returns the mode's raw\n * output; the dispatcher wraps it in a {@link DelegatedLoopResult}. */\nexport type DelegatedLoopRunner<T = unknown> = (signal: AbortSignal) => Promise<T>\n\n/** @experimental Mode → configured runner. Partial: only register the modes a\n * given product/routine actually uses. */\nexport type DelegatedLoopRegistry = Partial<Record<DelegatedLoopMode, DelegatedLoopRunner>>\n\n/** @experimental Uniform result — never throws from a registered runner; a\n * thrown engine becomes `{ ok: false, error }` so a routine can record + move on. */\nexport interface DelegatedLoopResult<T = unknown> {\n mode: DelegatedLoopMode\n ok: boolean\n output?: T\n error?: string\n durationMs: number\n}\n\n/** @experimental */\nexport interface RunDelegatedLoopOptions {\n signal?: AbortSignal\n /** Clock override for deterministic tests. */\n now?: () => number\n}\n\n/**\n * @experimental\n *\n * Dispatch a configured loop by mode. Fails loud (throws `ConfigError`) when no\n * runner is registered for the mode — a routine pointed at an unwired mode is a\n * config bug, not a silent no-op. A runner that throws is captured as\n * `{ ok: false }` so unattended runs record the failure rather than crash.\n */\nexport async function runDelegatedLoop<T = unknown>(\n mode: DelegatedLoopMode,\n registry: DelegatedLoopRegistry,\n options: RunDelegatedLoopOptions = {},\n): Promise<DelegatedLoopResult<T>> {\n const runner = registry[mode] as DelegatedLoopRunner<T> | undefined\n if (!runner) {\n throw new ConfigError(\n `runDelegatedLoop: no runner registered for mode '${mode}' (registered: ${\n Object.keys(registry).join(', ') || 'none'\n })`,\n )\n }\n const now = options.now ?? Date.now\n const signal = options.signal ?? new AbortController().signal\n const start = now()\n try {\n const output = await runner(signal)\n return { mode, ok: true, output, durationMs: now() - start }\n } catch (err) {\n return {\n mode,\n ok: false,\n error: err instanceof Error ? err.message : String(err),\n durationMs: now() - start,\n }\n }\n}\n\n/** @experimental Options for the default `code`/`review` runner. */\nexport interface CoderLoopRunnerOptions {\n sandboxClient: SandboxClient\n /** What to build — the delegate args (goal, repoRoot, variants, config, …). */\n args: DelegateCodeArgs\n /** Adversarial reviewer. REQUIRED for `review` mode (see `reviewLoopRunner`). */\n reviewer?: CoderReviewer\n /** Winner-selection strategy. Default `highest-score`. */\n winnerSelection?: CoderWinnerSelection\n /** Harnesses for `variants > 1` fanout. */\n fanoutHarnesses?: string[]\n}\n\n/** @experimental Build a `code`-mode runner over the hardened coder delegate. */\nexport function coderLoopRunner(options: CoderLoopRunnerOptions): DelegatedLoopRunner<CoderOutput> {\n const delegate = createDefaultCoderDelegate({\n sandboxClient: options.sandboxClient,\n ...(options.reviewer ? { reviewer: options.reviewer } : {}),\n ...(options.winnerSelection ? { winnerSelection: options.winnerSelection } : {}),\n ...(options.fanoutHarnesses ? { fanoutHarnesses: options.fanoutHarnesses } : {}),\n })\n return async (signal) => {\n const ctx: DelegateRunCtx = { signal, report: () => {} }\n return delegate(options.args, ctx)\n }\n}\n\n/**\n * @experimental\n *\n * `review` mode = `code` with a REQUIRED reviewer. The gate is the whole point,\n * so the type forces a reviewer (a \"review loop\" with no reviewer is a code loop).\n */\nexport function reviewLoopRunner(\n options: CoderLoopRunnerOptions & { reviewer: CoderReviewer },\n): DelegatedLoopRunner<CoderOutput> {\n return coderLoopRunner(options)\n}\n\n/** @experimental Options for the default `dynamic` runner. */\nexport interface DynamicLoopRunnerOptions<Task, Output> {\n sandboxClient: SandboxClient\n /** The agent-authored topology planner (sync or async; an async planner is where an LLM call goes). */\n planner: TopologyPlanner<Task, Output>\n task: Task\n output: OutputAdapter<Output>\n validator?: Validator<Output>\n /** Exactly one of `agentRun` / `agentRuns` (runLoop validates). */\n agentRun?: AgentRunSpec<Task>\n agentRuns?: AgentRunSpec<Task>[]\n maxIterations?: number\n maxFanout?: number\n /** Optional trace-analyst hook forwarded to the dynamic driver so the loop runs\n * `f(trace, findings)` — see `CreateDriverOptions.analyze`. Caller-side\n * seam to `runAnalystLoop`; keeps this runner analyst-free. */\n analyze?: CreateDriverOptions<Task, Output>['analyze']\n}\n\n/** @experimental `dynamic` mode — agent-authored topology over `runLoop`. */\nexport function dynamicLoopRunner<Task, Output>(\n o: DynamicLoopRunnerOptions<Task, Output>,\n): DelegatedLoopRunner<LoopResult<Task, Output, DriverDecision>> {\n return async (signal) =>\n runLoop<Task, Output, DriverDecision>({\n driver: createDriver<Task, Output>({\n planner: o.planner,\n ...(o.maxIterations !== undefined ? { maxIterations: o.maxIterations } : {}),\n ...(o.maxFanout !== undefined ? { maxFanout: o.maxFanout } : {}),\n ...(o.analyze ? { analyze: o.analyze } : {}),\n }),\n ...(o.agentRun ? { agentRun: o.agentRun } : {}),\n ...(o.agentRuns ? { agentRuns: o.agentRuns } : {}),\n output: o.output,\n ...(o.validator ? { validator: o.validator } : {}),\n task: o.task,\n ctx: { sandboxClient: o.sandboxClient, signal },\n ...(o.maxIterations !== undefined ? { maxIterations: o.maxIterations } : {}),\n })\n}\n\n/** @experimental A fact rejected at the KB gate — surfaced, never dropped. */\nexport interface VetoedFact {\n candidate: FactCandidate\n vetoedBy?: string\n reason?: string\n}\n\n/** @experimental */\nexport interface ResearchLoopResult {\n /** Facts that passed the fail-closed gate — safe to write to the KB. */\n accepted: FactCandidate[]\n /** Facts the gate vetoed in the final round — escalate, do not silently drop. */\n vetoed: VetoedFact[]\n /** Research rounds actually run. */\n rounds: number\n}\n\n/** @experimental Options for the default `research` runner. */\nexport interface ResearchLoopRunnerOptions {\n /**\n * The research engine (the consumer's web/doc searcher + extractor). Called\n * each round with the prior round's vetoes so it can re-research the gaps.\n * Returns fact candidates carrying their grounding (`verbatimPassage` +\n * `sourceText`).\n */\n research: (round: number, vetoed: VetoedFact[]) => Promise<FactCandidate[]>\n /** Gate config (extra judges, self-artifact kinds, …). The floor is always on. */\n gate?: CreateKbGateOptions\n /** Max research rounds (correct-on-veto remediation). Default 1. */\n maxRounds?: number\n}\n\n/**\n * @experimental `research` mode — research-in-a-loop with valid-only KB growth.\n *\n * Each round: research → gate every candidate (fail-closed; passage MUST be in\n * the source) → accept the clean ones → re-research the vetoed ones next round,\n * up to `maxRounds`. Vetoed facts in the final round are RETURNED (escalate,\n * never silently dropped) so the caller audits vs retries.\n */\nexport function researchLoopRunner(\n o: ResearchLoopRunnerOptions,\n): DelegatedLoopRunner<ResearchLoopResult> {\n const gate = createKbGate(o.gate)\n const maxRounds = Math.max(1, Math.trunc(o.maxRounds ?? 1))\n return async (signal) => {\n const accepted: FactCandidate[] = []\n let vetoed: VetoedFact[] = []\n let rounds = 0\n for (let round = 0; round < maxRounds; round += 1) {\n if (signal.aborted) break\n rounds += 1\n const candidates = await o.research(round, vetoed)\n if (candidates.length === 0) break\n vetoed = []\n for (const c of candidates) {\n const v = await gate(c)\n if (v.accepted) accepted.push(c)\n else vetoed.push({ candidate: c, vetoedBy: v.vetoedBy, reason: v.reason })\n }\n if (vetoed.length === 0) break\n }\n return { accepted, vetoed, rounds }\n }\n}\n\n/** @experimental `self-improve` mode — agent-eval's one-call closed loop (held-out gated). */\nexport function selfImproveLoopRunner<TScenario extends Scenario, TArtifact>(\n options: SelfImproveOptions<TScenario, TArtifact>,\n): DelegatedLoopRunner<SelfImproveResult<TScenario, TArtifact>> {\n return async () => selfImprove<TScenario, TArtifact>(options)\n}\n\n/** @experimental `audit` mode — analyst loop over captured trace/run data. */\nexport function auditLoopRunner<TProposal = unknown, TEdit = unknown>(\n options: RunAnalystLoopOpts,\n): DelegatedLoopRunner<RunAnalystLoopResult<TProposal, TEdit>> {\n return async () => runAnalystLoop<TProposal, TEdit>(options)\n}\n","#!/usr/bin/env node\n/**\n * @experimental\n *\n * `agent-runtime-loop` — the schedulable entrypoint for the configured\n * delegated loop-runner. A cron job / routine / Makefile target invokes:\n *\n * agent-runtime-loop --mode research --config ./loops.config.js\n *\n * The config module wires the registry (with full access to env / creds —\n * which is why the deps live there, not in this generic bin). It must default-\n * export a `DelegatedLoopRegistry`, or a `() => DelegatedLoopRegistry | Promise<…>`.\n * The bin runs the selected mode, prints the `DelegatedLoopResult` as JSON, and\n * exits 0 on `ok`, 1 on a recorded failure, 2 on a usage/config error.\n */\n\nimport {\n DELEGATED_LOOP_MODES,\n type DelegatedLoopMode,\n type DelegatedLoopRegistry,\n type DelegatedLoopResult,\n isDelegatedLoopMode,\n runDelegatedLoop,\n} from './loop-runner'\n\n/** @experimental Parsed CLI invocation. */\nexport interface LoopRunnerCliArgs {\n mode: string\n /** Loads the registry — the bin wires this from `--config`; tests inject a stub. */\n loadRegistry: () => Promise<DelegatedLoopRegistry> | DelegatedLoopRegistry\n now?: () => number\n}\n\n/** @experimental */\nexport interface LoopRunnerCliResult {\n exitCode: number\n result?: DelegatedLoopResult\n error?: string\n}\n\n/**\n * @experimental\n *\n * Pure CLI core (no process / argv / IO) so it's unit-testable: validate the\n * mode, load the registry, dispatch, map to an exit code (0 ok / 1 failed /\n * 2 usage). Exported for embedding in custom runners + tests.\n */\nexport async function runLoopRunnerCli(args: LoopRunnerCliArgs): Promise<LoopRunnerCliResult> {\n if (!isDelegatedLoopMode(args.mode)) {\n return {\n exitCode: 2,\n error: `unknown mode '${args.mode}' (expected one of: ${DELEGATED_LOOP_MODES.join(', ')})`,\n }\n }\n let registry: DelegatedLoopRegistry\n try {\n registry = await args.loadRegistry()\n } catch (err) {\n return { exitCode: 2, error: `failed to load registry: ${errMsg(err)}` }\n }\n if (!registry[args.mode]) {\n return {\n exitCode: 2,\n error: `config registers no runner for mode '${args.mode}' (registered: ${\n Object.keys(registry).join(', ') || 'none'\n })`,\n }\n }\n // runDelegatedLoop throws only on a missing runner (guarded above); a failing\n // engine is captured as { ok: false } → exit 1, not a crash.\n const result = await runDelegatedLoop(args.mode as DelegatedLoopMode, registry, {\n ...(args.now ? { now: args.now } : {}),\n })\n return { exitCode: result.ok ? 0 : 1, result }\n}\n\n/** Parse `--mode X --config Y` from an argv tail (`process.argv.slice(2)`). */\nexport function parseLoopRunnerArgv(argv: string[]): { mode?: string; config?: string } {\n const out: { mode?: string; config?: string } = {}\n for (let i = 0; i < argv.length; i += 1) {\n const a = argv[i]\n if (a === '--mode') out.mode = argv[++i]\n else if (a === '--config') out.config = argv[++i]\n else if (a?.startsWith('--mode=')) out.mode = a.slice('--mode='.length)\n else if (a?.startsWith('--config=')) out.config = a.slice('--config='.length)\n }\n return out\n}\n\n/** Normalize a config module's default export → a registry. */\nfunction resolveRegistry(mod: unknown): DelegatedLoopRegistry {\n const def = (mod as { default?: unknown })?.default ?? mod\n const value = typeof def === 'function' ? (def as () => unknown)() : def\n return value as DelegatedLoopRegistry\n}\n\nfunction errMsg(err: unknown): string {\n return err instanceof Error ? err.message : String(err)\n}\n\n/** The argv → IO → exit shell. Kept thin; logic lives in `runLoopRunnerCli`. */\nasync function main(): Promise<void> {\n const { mode, config } = parseLoopRunnerArgv(process.argv.slice(2))\n if (!mode || !config) {\n process.stderr.write(\n 'usage: agent-runtime-loop --mode <mode> --config <module>\\n' +\n ` modes: ${DELEGATED_LOOP_MODES.join(' | ')}\\n` +\n ' config: a JS/TS module default-exporting a DelegatedLoopRegistry (or a factory)\\n',\n )\n process.exit(2)\n }\n const { pathToFileURL } = await import('node:url')\n const { resolve } = await import('node:path')\n const cli = await runLoopRunnerCli({\n mode,\n loadRegistry: async () => resolveRegistry(await import(pathToFileURL(resolve(config)).href)),\n })\n process.stdout.write(`${JSON.stringify(cli.result ?? { error: cli.error }, null, 2)}\\n`)\n if (cli.error) process.stderr.write(`${cli.error}\\n`)\n process.exit(cli.exitCode)\n}\n\n// Run only when executed as the bin — never when imported for the testable\n// core, and never when bundled into a runtime that has no `process.argv`\n// (e.g. Cloudflare Workers, where `process` is a shim without `argv`). Reading\n// `process.argv[1]` directly would throw at module load there; `process.argv?.`\n// keeps the guard a no-op instead of crashing the Worker on startup.\nconst invokedScript = typeof process !== 'undefined' ? process.argv?.[1] : undefined\nif (invokedScript && /loop-runner-bin\\.(js|ts|mjs)$/.test(invokedScript)) {\n void main()\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAwBA;AAAA,EAGE;AAAA,OACK;AA2BA,IAAM,uBAAuB;AAAA,EAClC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAMO,SAAS,oBAAoB,OAA4C;AAC9E,SAAO,OAAO,UAAU,YAAa,qBAA2C,SAAS,KAAK;AAChG;AAmCA,eAAsB,iBACpB,MACA,UACA,UAAmC,CAAC,GACH;AACjC,QAAM,SAAS,SAAS,IAAI;AAC5B,MAAI,CAAC,QAAQ;AACX,UAAM,IAAI;AAAA,MACR,oDAAoD,IAAI,kBACtD,OAAO,KAAK,QAAQ,EAAE,KAAK,IAAI,KAAK,MACtC;AAAA,IACF;AAAA,EACF;AACA,QAAM,MAAM,QAAQ,OAAO,KAAK;AAChC,QAAM,SAAS,QAAQ,UAAU,IAAI,gBAAgB,EAAE;AACvD,QAAM,QAAQ,IAAI;AAClB,MAAI;AACF,UAAM,SAAS,MAAM,OAAO,MAAM;AAClC,WAAO,EAAE,MAAM,IAAI,MAAM,QAAQ,YAAY,IAAI,IAAI,MAAM;AAAA,EAC7D,SAAS,KAAK;AACZ,WAAO;AAAA,MACL;AAAA,MACA,IAAI;AAAA,MACJ,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,MACtD,YAAY,IAAI,IAAI;AAAA,IACtB;AAAA,EACF;AACF;AAgBO,SAAS,gBAAgB,SAAmE;AACjG,QAAM,WAAW,2BAA2B;AAAA,IAC1C,eAAe,QAAQ;AAAA,IACvB,GAAI,QAAQ,WAAW,EAAE,UAAU,QAAQ,SAAS,IAAI,CAAC;AAAA,IACzD,GAAI,QAAQ,kBAAkB,EAAE,iBAAiB,QAAQ,gBAAgB,IAAI,CAAC;AAAA,IAC9E,GAAI,QAAQ,kBAAkB,EAAE,iBAAiB,QAAQ,gBAAgB,IAAI,CAAC;AAAA,EAChF,CAAC;AACD,SAAO,OAAO,WAAW;AACvB,UAAM,MAAsB,EAAE,QAAQ,QAAQ,MAAM;AAAA,IAAC,EAAE;AACvD,WAAO,SAAS,QAAQ,MAAM,GAAG;AAAA,EACnC;AACF;AAQO,SAAS,iBACd,SACkC;AAClC,SAAO,gBAAgB,OAAO;AAChC;AAsBO,SAAS,kBACd,GAC+D;AAC/D,SAAO,OAAO,WACZ,QAAsC;AAAA,IACpC,QAAQ,aAA2B;AAAA,MACjC,SAAS,EAAE;AAAA,MACX,GAAI,EAAE,kBAAkB,SAAY,EAAE,eAAe,EAAE,cAAc,IAAI,CAAC;AAAA,MAC1E,GAAI,EAAE,cAAc,SAAY,EAAE,WAAW,EAAE,UAAU,IAAI,CAAC;AAAA,MAC9D,GAAI,EAAE,UAAU,EAAE,SAAS,EAAE,QAAQ,IAAI,CAAC;AAAA,IAC5C,CAAC;AAAA,IACD,GAAI,EAAE,WAAW,EAAE,UAAU,EAAE,SAAS,IAAI,CAAC;AAAA,IAC7C,GAAI,EAAE,YAAY,EAAE,WAAW,EAAE,UAAU,IAAI,CAAC;AAAA,IAChD,QAAQ,EAAE;AAAA,IACV,GAAI,EAAE,YAAY,EAAE,WAAW,EAAE,UAAU,IAAI,CAAC;AAAA,IAChD,MAAM,EAAE;AAAA,IACR,KAAK,EAAE,eAAe,EAAE,eAAe,OAAO;AAAA,IAC9C,GAAI,EAAE,kBAAkB,SAAY,EAAE,eAAe,EAAE,cAAc,IAAI,CAAC;AAAA,EAC5E,CAAC;AACL;AA0CO,SAAS,mBACd,GACyC;AACzC,QAAM,OAAO,aAAa,EAAE,IAAI;AAChC,QAAM,YAAY,KAAK,IAAI,GAAG,KAAK,MAAM,EAAE,aAAa,CAAC,CAAC;AAC1D,SAAO,OAAO,WAAW;AACvB,UAAM,WAA4B,CAAC;AACnC,QAAI,SAAuB,CAAC;AAC5B,QAAI,SAAS;AACb,aAAS,QAAQ,GAAG,QAAQ,WAAW,SAAS,GAAG;AACjD,UAAI,OAAO,QAAS;AACpB,gBAAU;AACV,YAAM,aAAa,MAAM,EAAE,SAAS,OAAO,MAAM;AACjD,UAAI,WAAW,WAAW,EAAG;AAC7B,eAAS,CAAC;AACV,iBAAW,KAAK,YAAY;AAC1B,cAAM,IAAI,MAAM,KAAK,CAAC;AACtB,YAAI,EAAE,SAAU,UAAS,KAAK,CAAC;AAAA,YAC1B,QAAO,KAAK,EAAE,WAAW,GAAG,UAAU,EAAE,UAAU,QAAQ,EAAE,OAAO,CAAC;AAAA,MAC3E;AACA,UAAI,OAAO,WAAW,EAAG;AAAA,IAC3B;AACA,WAAO,EAAE,UAAU,QAAQ,OAAO;AAAA,EACpC;AACF;AAGO,SAAS,sBACd,SAC8D;AAC9D,SAAO,YAAY,YAAkC,OAAO;AAC9D;AAGO,SAAS,gBACd,SAC6D;AAC7D,SAAO,YAAY,eAAiC,OAAO;AAC7D;;;ACrPA,eAAsB,iBAAiB,MAAuD;AAC5F,MAAI,CAAC,oBAAoB,KAAK,IAAI,GAAG;AACnC,WAAO;AAAA,MACL,UAAU;AAAA,MACV,OAAO,iBAAiB,KAAK,IAAI,uBAAuB,qBAAqB,KAAK,IAAI,CAAC;AAAA,IACzF;AAAA,EACF;AACA,MAAI;AACJ,MAAI;AACF,eAAW,MAAM,KAAK,aAAa;AAAA,EACrC,SAAS,KAAK;AACZ,WAAO,EAAE,UAAU,GAAG,OAAO,4BAA4B,OAAO,GAAG,CAAC,GAAG;AAAA,EACzE;AACA,MAAI,CAAC,SAAS,KAAK,IAAI,GAAG;AACxB,WAAO;AAAA,MACL,UAAU;AAAA,MACV,OAAO,wCAAwC,KAAK,IAAI,kBACtD,OAAO,KAAK,QAAQ,EAAE,KAAK,IAAI,KAAK,MACtC;AAAA,IACF;AAAA,EACF;AAGA,QAAM,SAAS,MAAM,iBAAiB,KAAK,MAA2B,UAAU;AAAA,IAC9E,GAAI,KAAK,MAAM,EAAE,KAAK,KAAK,IAAI,IAAI,CAAC;AAAA,EACtC,CAAC;AACD,SAAO,EAAE,UAAU,OAAO,KAAK,IAAI,GAAG,OAAO;AAC/C;AAGO,SAAS,oBAAoB,MAAoD;AACtF,QAAM,MAA0C,CAAC;AACjD,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK,GAAG;AACvC,UAAM,IAAI,KAAK,CAAC;AAChB,QAAI,MAAM,SAAU,KAAI,OAAO,KAAK,EAAE,CAAC;AAAA,aAC9B,MAAM,WAAY,KAAI,SAAS,KAAK,EAAE,CAAC;AAAA,aACvC,GAAG,WAAW,SAAS,EAAG,KAAI,OAAO,EAAE,MAAM,UAAU,MAAM;AAAA,aAC7D,GAAG,WAAW,WAAW,EAAG,KAAI,SAAS,EAAE,MAAM,YAAY,MAAM;AAAA,EAC9E;AACA,SAAO;AACT;AAGA,SAAS,gBAAgB,KAAqC;AAC5D,QAAM,MAAO,KAA+B,WAAW;AACvD,QAAM,QAAQ,OAAO,QAAQ,aAAc,IAAsB,IAAI;AACrE,SAAO;AACT;AAEA,SAAS,OAAO,KAAsB;AACpC,SAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AACxD;AAGA,eAAe,OAAsB;AACnC,QAAM,EAAE,MAAM,OAAO,IAAI,oBAAoB,QAAQ,KAAK,MAAM,CAAC,CAAC;AAClE,MAAI,CAAC,QAAQ,CAAC,QAAQ;AACpB,YAAQ,OAAO;AAAA,MACb;AAAA,WACc,qBAAqB,KAAK,KAAK,CAAC;AAAA;AAAA;AAAA,IAEhD;AACA,YAAQ,KAAK,CAAC;AAAA,EAChB;AACA,QAAM,EAAE,cAAc,IAAI,MAAM,OAAO,KAAU;AACjD,QAAM,EAAE,QAAQ,IAAI,MAAM,OAAO,MAAW;AAC5C,QAAM,MAAM,MAAM,iBAAiB;AAAA,IACjC;AAAA,IACA,cAAc,YAAY,gBAAgB,MAAM,OAAO,cAAc,QAAQ,MAAM,CAAC,EAAE,KAAK;AAAA,EAC7F,CAAC;AACD,UAAQ,OAAO,MAAM,GAAG,KAAK,UAAU,IAAI,UAAU,EAAE,OAAO,IAAI,MAAM,GAAG,MAAM,CAAC,CAAC;AAAA,CAAI;AACvF,MAAI,IAAI,MAAO,SAAQ,OAAO,MAAM,GAAG,IAAI,KAAK;AAAA,CAAI;AACpD,UAAQ,KAAK,IAAI,QAAQ;AAC3B;AAOA,IAAM,gBAAgB,OAAO,YAAY,cAAc,QAAQ,OAAO,CAAC,IAAI;AAC3E,IAAI,iBAAiB,gCAAgC,KAAK,aAAa,GAAG;AACxE,OAAK,KAAK;AACZ;","names":[]}
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
// src/errors.ts
|
|
2
|
+
import { AgentEvalError } from "@tangle-network/agent-eval";
|
|
3
|
+
import {
|
|
4
|
+
AgentEvalError as AgentEvalError2,
|
|
5
|
+
CaptureIntegrityError,
|
|
6
|
+
ConfigError,
|
|
7
|
+
JudgeError,
|
|
8
|
+
NotFoundError,
|
|
9
|
+
ReplayError,
|
|
10
|
+
ValidationError,
|
|
11
|
+
VerificationError
|
|
12
|
+
} from "@tangle-network/agent-eval";
|
|
13
|
+
var SessionMismatchError = class extends AgentEvalError {
|
|
14
|
+
sessionBackend;
|
|
15
|
+
requestedBackend;
|
|
16
|
+
constructor(sessionBackend, requestedBackend, options) {
|
|
17
|
+
super(
|
|
18
|
+
"validation",
|
|
19
|
+
`Cannot resume ${sessionBackend} session with ${requestedBackend} backend`,
|
|
20
|
+
options
|
|
21
|
+
);
|
|
22
|
+
this.sessionBackend = sessionBackend;
|
|
23
|
+
this.requestedBackend = requestedBackend;
|
|
24
|
+
}
|
|
25
|
+
};
|
|
26
|
+
var BackendTransportError = class extends AgentEvalError {
|
|
27
|
+
backend;
|
|
28
|
+
status;
|
|
29
|
+
/**
|
|
30
|
+
* Truncated upstream response body (≤2 KiB) when available. Diagnostic
|
|
31
|
+
* only — surfaces in `backend_error.error.body` and `final.error.body`
|
|
32
|
+
* so operators can see "free_tier_limit", "invalid_api_key", etc. without
|
|
33
|
+
* cracking the log line open.
|
|
34
|
+
*/
|
|
35
|
+
body;
|
|
36
|
+
constructor(backend, message, options) {
|
|
37
|
+
super("config", message, options);
|
|
38
|
+
this.backend = backend;
|
|
39
|
+
this.status = options?.status;
|
|
40
|
+
this.body = options?.body;
|
|
41
|
+
}
|
|
42
|
+
};
|
|
43
|
+
var RuntimeRunStateError = class extends AgentEvalError {
|
|
44
|
+
constructor(message, options) {
|
|
45
|
+
super("validation", message, options);
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
var PlannerError = class extends AgentEvalError {
|
|
49
|
+
constructor(message, options) {
|
|
50
|
+
super("validation", message, options);
|
|
51
|
+
}
|
|
52
|
+
};
|
|
53
|
+
var AnalystError = class extends AgentEvalError {
|
|
54
|
+
constructor(message, options) {
|
|
55
|
+
super("validation", message, options);
|
|
56
|
+
}
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
// src/runtime/sandbox-events.ts
|
|
60
|
+
function extractLlmCallEvent(event, agentRunName) {
|
|
61
|
+
if (!event || typeof event !== "object") return void 0;
|
|
62
|
+
const type = String(event.type ?? "");
|
|
63
|
+
const data = event.data && typeof event.data === "object" ? event.data : {};
|
|
64
|
+
if (type === "llm_call" || type === "cost.usage" || type === "usage") {
|
|
65
|
+
return buildLlmCall(data, agentRunName);
|
|
66
|
+
}
|
|
67
|
+
if (type === "message.completed" || type === "result" || type === "final") {
|
|
68
|
+
const usage = data.usage;
|
|
69
|
+
if (!usage || typeof usage !== "object") return void 0;
|
|
70
|
+
return buildLlmCall({ ...usage, model: data.model ?? usage.model }, agentRunName);
|
|
71
|
+
}
|
|
72
|
+
if (type === "done") {
|
|
73
|
+
const usage = data.tokenUsage;
|
|
74
|
+
if (!usage || typeof usage !== "object") return void 0;
|
|
75
|
+
const out = pickFiniteNumber(usage, ["outputTokens", "completion_tokens", "tokensOut"]);
|
|
76
|
+
const reasoning = pickFiniteNumber(usage, ["reasoningTokens"]);
|
|
77
|
+
const mergedOut = out !== void 0 || reasoning !== void 0 ? (out ?? 0) + (reasoning ?? 0) : void 0;
|
|
78
|
+
return buildLlmCall(
|
|
79
|
+
{
|
|
80
|
+
inputTokens: usage.inputTokens,
|
|
81
|
+
outputTokens: mergedOut,
|
|
82
|
+
totalCostUsd: data.totalCostUsd,
|
|
83
|
+
model: data.model ?? usage.model
|
|
84
|
+
},
|
|
85
|
+
agentRunName
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
return void 0;
|
|
89
|
+
}
|
|
90
|
+
function buildLlmCall(data, agentRunName) {
|
|
91
|
+
const tokensIn = pickFiniteNumber(data, ["tokensIn", "inputTokens", "prompt_tokens"]);
|
|
92
|
+
const tokensOut = pickFiniteNumber(data, ["tokensOut", "outputTokens", "completion_tokens"]);
|
|
93
|
+
const costUsd = pickFiniteNumber(data, ["costUsd", "totalCostUsd", "cost_usd", "cost"]);
|
|
94
|
+
if (tokensIn === void 0 && tokensOut === void 0 && costUsd === void 0) {
|
|
95
|
+
return void 0;
|
|
96
|
+
}
|
|
97
|
+
const model = typeof data.model === "string" && data.model.length > 0 ? data.model : agentRunName;
|
|
98
|
+
const event = {
|
|
99
|
+
type: "llm_call",
|
|
100
|
+
model
|
|
101
|
+
};
|
|
102
|
+
if (tokensIn !== void 0) event.tokensIn = tokensIn;
|
|
103
|
+
if (tokensOut !== void 0) event.tokensOut = tokensOut;
|
|
104
|
+
if (costUsd !== void 0) event.costUsd = costUsd;
|
|
105
|
+
return event;
|
|
106
|
+
}
|
|
107
|
+
function pickFiniteNumber(data, keys) {
|
|
108
|
+
for (const key of keys) {
|
|
109
|
+
const value = data[key];
|
|
110
|
+
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
111
|
+
}
|
|
112
|
+
return void 0;
|
|
113
|
+
}
|
|
114
|
+
function mapSandboxEvent(event, opts = {}) {
|
|
115
|
+
if (!event || typeof event !== "object") return void 0;
|
|
116
|
+
const type = String(event.type ?? "");
|
|
117
|
+
const data = event.data && typeof event.data === "object" ? event.data : {};
|
|
118
|
+
if (type === "message.part.updated") {
|
|
119
|
+
const part = data.part && typeof data.part === "object" ? data.part : {};
|
|
120
|
+
const partType = String(part.type ?? "");
|
|
121
|
+
const delta = typeof data.delta === "string" ? data.delta : void 0;
|
|
122
|
+
const text = delta ?? (typeof part.text === "string" ? part.text : void 0);
|
|
123
|
+
if (text === void 0) return void 0;
|
|
124
|
+
if (partType === "text") return { type: "text_delta", text };
|
|
125
|
+
if (partType === "reasoning" || partType === "thinking")
|
|
126
|
+
return { type: "reasoning_delta", text };
|
|
127
|
+
return void 0;
|
|
128
|
+
}
|
|
129
|
+
return extractLlmCallEvent(event, opts.agentRunName ?? "agent");
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
export {
|
|
133
|
+
SessionMismatchError,
|
|
134
|
+
BackendTransportError,
|
|
135
|
+
RuntimeRunStateError,
|
|
136
|
+
PlannerError,
|
|
137
|
+
AnalystError,
|
|
138
|
+
AgentEvalError2 as AgentEvalError,
|
|
139
|
+
ConfigError,
|
|
140
|
+
JudgeError,
|
|
141
|
+
NotFoundError,
|
|
142
|
+
ValidationError,
|
|
143
|
+
extractLlmCallEvent,
|
|
144
|
+
mapSandboxEvent
|
|
145
|
+
};
|
|
146
|
+
//# sourceMappingURL=chunk-GSUO5QS6.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/errors.ts","../src/runtime/sandbox-events.ts"],"sourcesContent":["/**\n * @stable\n *\n * Error taxonomy for `@tangle-network/agent-runtime`.\n *\n * Public contract: every error this package throws as part of its consumer-\n * facing API either extends `AgentEvalError` (re-exported here for ergonomic\n * `instanceof` checks at the runtime boundary) or extends one of the\n * runtime-specific subclasses below.\n *\n * Internal invariant guards (`throw new Error('this should never happen')`)\n * remain plain `Error` — they are programmer-mistake assertions, not\n * consumer-catchable contract failures.\n *\n * Subclassing strategy: where a runtime-specific failure maps cleanly to an\n * agent-eval code (validation, config, not_found), we re-use the agent-eval\n * subclass. Runtime-only failure modes (session resume against the wrong\n * backend, backend transport errors) get fresh subclasses that still carry an\n * `AgentEvalErrorCode` so cross-package handlers can pattern-match without\n * importing the runtime.\n */\n\nimport { AgentEvalError } from '@tangle-network/agent-eval'\n\nexport {\n AgentEvalError,\n type AgentEvalErrorCode,\n CaptureIntegrityError,\n ConfigError,\n JudgeError,\n NotFoundError,\n ReplayError,\n ValidationError,\n VerificationError,\n} from '@tangle-network/agent-eval'\n\n/**\n * @stable\n *\n * Caller asked to resume a session against a backend whose `kind` does not\n * match the session's recorded backend. This is a routing bug — the same\n * session id was reused across two different backend implementations — and\n * is not retryable without picking the right backend.\n */\nexport class SessionMismatchError extends AgentEvalError {\n readonly sessionBackend: string\n readonly requestedBackend: string\n\n constructor(sessionBackend: string, requestedBackend: string, options?: { cause?: unknown }) {\n super(\n 'validation',\n `Cannot resume ${sessionBackend} session with ${requestedBackend} backend`,\n options,\n )\n this.sessionBackend = sessionBackend\n this.requestedBackend = requestedBackend\n }\n}\n\n/**\n * @stable\n *\n * A backend transport call (HTTP, gRPC, sidecar IPC) failed with a non-success\n * status. Distinct from `JudgeError` (which is structural / unrecoverable)\n * because backend failures are sometimes retryable and consumers may want to\n * branch on the upstream status code.\n */\nexport class BackendTransportError extends AgentEvalError {\n readonly backend: string\n readonly status?: number\n /**\n * Truncated upstream response body (≤2 KiB) when available. Diagnostic\n * only — surfaces in `backend_error.error.body` and `final.error.body`\n * so operators can see \"free_tier_limit\", \"invalid_api_key\", etc. without\n * cracking the log line open.\n */\n readonly body?: string\n\n constructor(\n backend: string,\n message: string,\n options?: { cause?: unknown; status?: number; body?: string },\n ) {\n super('config', message, options)\n this.backend = backend\n this.status = options?.status\n this.body = options?.body\n }\n}\n\n/**\n * @stable\n *\n * A runtime-run lifecycle method was called in an order the state machine does\n * not allow: `persist()` before `complete()`, `complete()` twice, etc.\n */\nexport class RuntimeRunStateError extends AgentEvalError {\n constructor(message: string, options?: { cause?: unknown }) {\n super('validation', message, options)\n }\n}\n\n/**\n * @stable\n *\n * The dynamic-loop planner returned an unusable topology move — the LLM emitted\n * no parseable envelope, an unknown `kind`, or a structurally-invalid move\n * (e.g. a fanout with zero tasks). This is a structural failure of the\n * agent-authored topology, not a config mistake: the planner ran but its output\n * cannot drive the kernel. Carries `validation` so cross-package handlers can\n * pattern-match without importing the runtime. Fail loud — never substitute a\n * default move, or the loop silently runs a topology nobody chose.\n */\nexport class PlannerError extends AgentEvalError {\n constructor(message: string, options?: { cause?: unknown }) {\n super('validation', message, options)\n }\n}\n\n/**\n * The analyst loop could not read or run over a round's trace — e.g. an empty round\n * (no iterations to analyze) or a malformed trace projection. Fail loud: a silent empty\n * store would mask a broken capture path and the driver would steer on nothing.\n */\nexport class AnalystError extends AgentEvalError {\n constructor(message: string, options?: { cause?: unknown }) {\n super('validation', message, options)\n }\n}\n","/**\n * Sandbox-event → runtime-event mapping.\n *\n * The sandbox SDK emits a polymorphic `SandboxEvent = { type, data, id? }`\n * whose `type` vocabulary is backend-determined (opencode, etc.) rather than\n * enumerated by the SDK. Two consumers project it:\n * - the loop kernel's cost ledger (`extractLlmCallEvent`) — sums usage off\n * every cost-bearing event, regardless of stream shape;\n * - the `AgentRuntime.act` streaming contract (`mapSandboxEvent`) — projects\n * incremental events to the `RuntimeStreamEvent` chat-UX vocabulary.\n *\n * Both live here so the empirically-observed `type` vocabulary has one home.\n */\n\nimport type { SandboxEvent } from '@tangle-network/sandbox'\nimport type { RuntimeStreamEvent } from '../types'\n\n/**\n * Extract a `RuntimeStreamEvent`-shaped `llm_call` from a sandbox event when\n * the event carries usage/cost data. Returns `undefined` for non-cost events\n * so the kernel can iterate the full stream without branching.\n *\n * Canonical cost-carrying types observed in the wild:\n * - `llm_call` — `data: { model, tokensIn, tokensOut, costUsd, ... }`\n * - `message.completed` / `result` — `data: { usage: { inputTokens,\n * outputTokens, totalCostUsd? } }`\n * - `cost.usage` / `usage` — same shape under a dedicated type\n *\n * Numeric coercion is strict: `Number.isFinite` gates every accumulator write\n * so a sentinel `NaN` from a misbehaving backend cannot poison the ledger.\n */\nexport function extractLlmCallEvent(\n event: SandboxEvent,\n agentRunName: string,\n): (RuntimeStreamEvent & { type: 'llm_call' }) | undefined {\n if (!event || typeof event !== 'object') return undefined\n const type = String(event.type ?? '')\n const data =\n event.data && typeof event.data === 'object'\n ? (event.data as Record<string, unknown>)\n : ({} as Record<string, unknown>)\n\n if (type === 'llm_call' || type === 'cost.usage' || type === 'usage') {\n return buildLlmCall(data, agentRunName)\n }\n if (type === 'message.completed' || type === 'result' || type === 'final') {\n const usage = data.usage as Record<string, unknown> | undefined\n if (!usage || typeof usage !== 'object') return undefined\n return buildLlmCall({ ...usage, model: data.model ?? usage.model }, agentRunName)\n }\n // sandbox 0.4.0 terminal event: `data = { tokenUsage: { inputTokens, outputTokens,\n // reasoningTokens, cacheReadInputTokens }, totalCostUsd }`. Usage lives under\n // `tokenUsage` (not `usage`) and the cost is top-level — neither matched the\n // branches above, so an in-process loopDispatch run reported {0,0} and the\n // backend-integrity guard misread a real run as a stub. Reasoning tokens are\n // billed output (reasoning models), so they fold into the output count.\n if (type === 'done') {\n const usage = data.tokenUsage as Record<string, unknown> | undefined\n if (!usage || typeof usage !== 'object') return undefined\n const out = pickFiniteNumber(usage, ['outputTokens', 'completion_tokens', 'tokensOut'])\n const reasoning = pickFiniteNumber(usage, ['reasoningTokens'])\n const mergedOut =\n out !== undefined || reasoning !== undefined ? (out ?? 0) + (reasoning ?? 0) : undefined\n return buildLlmCall(\n {\n inputTokens: usage.inputTokens,\n outputTokens: mergedOut,\n totalCostUsd: data.totalCostUsd,\n model: data.model ?? usage.model,\n },\n agentRunName,\n )\n }\n return undefined\n}\n\nfunction buildLlmCall(\n data: Record<string, unknown>,\n agentRunName: string,\n): (RuntimeStreamEvent & { type: 'llm_call' }) | undefined {\n const tokensIn = pickFiniteNumber(data, ['tokensIn', 'inputTokens', 'prompt_tokens'])\n const tokensOut = pickFiniteNumber(data, ['tokensOut', 'outputTokens', 'completion_tokens'])\n const costUsd = pickFiniteNumber(data, ['costUsd', 'totalCostUsd', 'cost_usd', 'cost'])\n if (tokensIn === undefined && tokensOut === undefined && costUsd === undefined) {\n return undefined\n }\n const model = typeof data.model === 'string' && data.model.length > 0 ? data.model : agentRunName\n const event: RuntimeStreamEvent & { type: 'llm_call' } = {\n type: 'llm_call',\n model,\n }\n if (tokensIn !== undefined) event.tokensIn = tokensIn\n if (tokensOut !== undefined) event.tokensOut = tokensOut\n if (costUsd !== undefined) event.costUsd = costUsd\n return event\n}\n\nfunction pickFiniteNumber(data: Record<string, unknown>, keys: string[]): number | undefined {\n for (const key of keys) {\n const value = data[key]\n if (typeof value === 'number' && Number.isFinite(value)) return value\n }\n return undefined\n}\n\n/**\n * Project one `SandboxEvent` onto the `RuntimeStreamEvent` chat-UX vocabulary,\n * for runtimes that bridge a sandbox `streamPrompt` into the\n * `AgentRuntime.act` streaming contract. Returns `undefined` for events that\n * have no faithful projection — the raw stream is preserved separately for the\n * `OutputAdapter`, so an unmapped event never loses data.\n *\n * Mapped (the task-optional incremental variants — no synthesized task\n * lifecycle, no guessed tool-part shapes):\n * - `message.part.updated` text part → `text_delta`\n * - `message.part.updated` reasoning/thinking part → `reasoning_delta`\n * - cost-bearing events → `llm_call` (shared with the ledger extractor)\n *\n * The opencode backend emits incremental text as\n * `{ type: 'message.part.updated', data: { part: { type, text }, delta } }`;\n * `delta` is the increment, `part.text` the running accumulation.\n */\nexport function mapSandboxEvent(\n event: SandboxEvent,\n opts: { agentRunName?: string } = {},\n): RuntimeStreamEvent | undefined {\n if (!event || typeof event !== 'object') return undefined\n const type = String(event.type ?? '')\n const data =\n event.data && typeof event.data === 'object'\n ? (event.data as Record<string, unknown>)\n : ({} as Record<string, unknown>)\n\n if (type === 'message.part.updated') {\n const part =\n data.part && typeof data.part === 'object' ? (data.part as Record<string, unknown>) : {}\n const partType = String(part.type ?? '')\n const delta = typeof data.delta === 'string' ? data.delta : undefined\n const text = delta ?? (typeof part.text === 'string' ? part.text : undefined)\n if (text === undefined) return undefined\n if (partType === 'text') return { type: 'text_delta', text }\n if (partType === 'reasoning' || partType === 'thinking')\n return { type: 'reasoning_delta', text }\n return undefined\n }\n\n return extractLlmCallEvent(event, opts.agentRunName ?? 'agent')\n}\n"],"mappings":";AAsBA,SAAS,sBAAsB;AAE/B;AAAA,EACE,kBAAAA;AAAA,EAEA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAUA,IAAM,uBAAN,cAAmC,eAAe;AAAA,EAC9C;AAAA,EACA;AAAA,EAET,YAAY,gBAAwB,kBAA0B,SAA+B;AAC3F;AAAA,MACE;AAAA,MACA,iBAAiB,cAAc,iBAAiB,gBAAgB;AAAA,MAChE;AAAA,IACF;AACA,SAAK,iBAAiB;AACtB,SAAK,mBAAmB;AAAA,EAC1B;AACF;AAUO,IAAM,wBAAN,cAAoC,eAAe;AAAA,EAC/C;AAAA,EACA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA;AAAA,EAET,YACE,SACA,SACA,SACA;AACA,UAAM,UAAU,SAAS,OAAO;AAChC,SAAK,UAAU;AACf,SAAK,SAAS,SAAS;AACvB,SAAK,OAAO,SAAS;AAAA,EACvB;AACF;AAQO,IAAM,uBAAN,cAAmC,eAAe;AAAA,EACvD,YAAY,SAAiB,SAA+B;AAC1D,UAAM,cAAc,SAAS,OAAO;AAAA,EACtC;AACF;AAaO,IAAM,eAAN,cAA2B,eAAe;AAAA,EAC/C,YAAY,SAAiB,SAA+B;AAC1D,UAAM,cAAc,SAAS,OAAO;AAAA,EACtC;AACF;AAOO,IAAM,eAAN,cAA2B,eAAe;AAAA,EAC/C,YAAY,SAAiB,SAA+B;AAC1D,UAAM,cAAc,SAAS,OAAO;AAAA,EACtC;AACF;;;ACjGO,SAAS,oBACd,OACA,cACyD;AACzD,MAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAChD,QAAM,OAAO,OAAO,MAAM,QAAQ,EAAE;AACpC,QAAM,OACJ,MAAM,QAAQ,OAAO,MAAM,SAAS,WAC/B,MAAM,OACN,CAAC;AAER,MAAI,SAAS,cAAc,SAAS,gBAAgB,SAAS,SAAS;AACpE,WAAO,aAAa,MAAM,YAAY;AAAA,EACxC;AACA,MAAI,SAAS,uBAAuB,SAAS,YAAY,SAAS,SAAS;AACzE,UAAM,QAAQ,KAAK;AACnB,QAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAChD,WAAO,aAAa,EAAE,GAAG,OAAO,OAAO,KAAK,SAAS,MAAM,MAAM,GAAG,YAAY;AAAA,EAClF;AAOA,MAAI,SAAS,QAAQ;AACnB,UAAM,QAAQ,KAAK;AACnB,QAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAChD,UAAM,MAAM,iBAAiB,OAAO,CAAC,gBAAgB,qBAAqB,WAAW,CAAC;AACtF,UAAM,YAAY,iBAAiB,OAAO,CAAC,iBAAiB,CAAC;AAC7D,UAAM,YACJ,QAAQ,UAAa,cAAc,UAAa,OAAO,MAAM,aAAa,KAAK;AACjF,WAAO;AAAA,MACL;AAAA,QACE,aAAa,MAAM;AAAA,QACnB,cAAc;AAAA,QACd,cAAc,KAAK;AAAA,QACnB,OAAO,KAAK,SAAS,MAAM;AAAA,MAC7B;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,aACP,MACA,cACyD;AACzD,QAAM,WAAW,iBAAiB,MAAM,CAAC,YAAY,eAAe,eAAe,CAAC;AACpF,QAAM,YAAY,iBAAiB,MAAM,CAAC,aAAa,gBAAgB,mBAAmB,CAAC;AAC3F,QAAM,UAAU,iBAAiB,MAAM,CAAC,WAAW,gBAAgB,YAAY,MAAM,CAAC;AACtF,MAAI,aAAa,UAAa,cAAc,UAAa,YAAY,QAAW;AAC9E,WAAO;AAAA,EACT;AACA,QAAM,QAAQ,OAAO,KAAK,UAAU,YAAY,KAAK,MAAM,SAAS,IAAI,KAAK,QAAQ;AACrF,QAAM,QAAmD;AAAA,IACvD,MAAM;AAAA,IACN;AAAA,EACF;AACA,MAAI,aAAa,OAAW,OAAM,WAAW;AAC7C,MAAI,cAAc,OAAW,OAAM,YAAY;AAC/C,MAAI,YAAY,OAAW,OAAM,UAAU;AAC3C,SAAO;AACT;AAEA,SAAS,iBAAiB,MAA+B,MAAoC;AAC3F,aAAW,OAAO,MAAM;AACtB,UAAM,QAAQ,KAAK,GAAG;AACtB,QAAI,OAAO,UAAU,YAAY,OAAO,SAAS,KAAK,EAAG,QAAO;AAAA,EAClE;AACA,SAAO;AACT;AAmBO,SAAS,gBACd,OACA,OAAkC,CAAC,GACH;AAChC,MAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAChD,QAAM,OAAO,OAAO,MAAM,QAAQ,EAAE;AACpC,QAAM,OACJ,MAAM,QAAQ,OAAO,MAAM,SAAS,WAC/B,MAAM,OACN,CAAC;AAER,MAAI,SAAS,wBAAwB;AACnC,UAAM,OACJ,KAAK,QAAQ,OAAO,KAAK,SAAS,WAAY,KAAK,OAAmC,CAAC;AACzF,UAAM,WAAW,OAAO,KAAK,QAAQ,EAAE;AACvC,UAAM,QAAQ,OAAO,KAAK,UAAU,WAAW,KAAK,QAAQ;AAC5D,UAAM,OAAO,UAAU,OAAO,KAAK,SAAS,WAAW,KAAK,OAAO;AACnE,QAAI,SAAS,OAAW,QAAO;AAC/B,QAAI,aAAa,OAAQ,QAAO,EAAE,MAAM,cAAc,KAAK;AAC3D,QAAI,aAAa,eAAe,aAAa;AAC3C,aAAO,EAAE,MAAM,mBAAmB,KAAK;AACzC,WAAO;AAAA,EACT;AAEA,SAAO,oBAAoB,OAAO,KAAK,gBAAgB,OAAO;AAChE;","names":["AgentEvalError"]}
|