@tangle-network/agent-runtime 0.44.0 → 0.45.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -203
- package/dist/agent.d.ts +5 -4
- package/dist/agent.js +5 -7
- package/dist/agent.js.map +1 -1
- package/dist/analyst-loop.d.ts +65 -4
- package/dist/analyst-loop.js +6 -1
- package/dist/audit.d.ts +93 -0
- package/dist/audit.js +312 -0
- package/dist/audit.js.map +1 -0
- package/dist/chunk-4B6U4CVQ.js +15 -0
- package/dist/chunk-4B6U4CVQ.js.map +1 -0
- package/dist/chunk-FK53TXOP.js +603 -0
- package/dist/chunk-FK53TXOP.js.map +1 -0
- package/dist/{chunk-SKUZZCHE.js → chunk-IJ6FGOPO.js} +5 -5
- package/dist/chunk-IJ6FGOPO.js.map +1 -0
- package/dist/{chunk-HVYOHJHK.js → chunk-IJGS6J7X.js} +2 -2
- package/dist/chunk-IJGS6J7X.js.map +1 -0
- package/dist/chunk-KEWO4KI6.js +3599 -0
- package/dist/chunk-KEWO4KI6.js.map +1 -0
- package/dist/{chunk-NRZOXCJK.js → chunk-KSMX62JF.js} +2 -2
- package/dist/{chunk-GFKVVRQ7.js → chunk-NYN5RTLP.js} +11 -10
- package/dist/chunk-NYN5RTLP.js.map +1 -0
- package/dist/chunk-PRX45WE2.js +264 -0
- package/dist/chunk-PRX45WE2.js.map +1 -0
- package/dist/{chunk-3HMHSN22.js → chunk-QR4UUC5P.js} +6 -6
- package/dist/chunk-QR4UUC5P.js.map +1 -0
- package/dist/chunk-WIR4HOOJ.js +27 -0
- package/dist/chunk-WIR4HOOJ.js.map +1 -0
- package/dist/{chunk-KDMRUD2P.js → chunk-Z2QXVBA6.js} +296 -8
- package/dist/chunk-Z2QXVBA6.js.map +1 -0
- package/dist/coder-CczgMqFx.d.ts +114 -0
- package/dist/dynamic-BvllHV6M.d.ts +221 -0
- package/dist/{improvement-adapter-BC4HhuAR.d.ts → improvement-adapter-CWegd3vw.d.ts} +1 -1
- package/dist/improvement.d.ts +2 -3
- package/dist/improvement.js +0 -5
- package/dist/improvement.js.map +1 -1
- package/dist/index.d.ts +123 -10
- package/dist/index.js +398 -10
- package/dist/index.js.map +1 -1
- package/dist/{kb-gate-D0ZIhFOU.d.ts → kb-gate-D9GBocLN.d.ts} +82 -5
- package/dist/{loop-runner-bin-BLMa8He3.d.ts → loop-runner-bin-CPrCoKqC.d.ts} +14 -10
- package/dist/loop-runner-bin.d.ts +9 -7
- package/dist/loop-runner-bin.js +6 -8
- package/dist/loops.d.ts +7 -393
- package/dist/loops.js +94 -25
- package/dist/mcp/bin.js +7 -7
- package/dist/mcp/bin.js.map +1 -1
- package/dist/mcp/index.d.ts +284 -11
- package/dist/mcp/index.js +341 -9
- package/dist/mcp/index.js.map +1 -1
- package/dist/{otel-export-wFDmmurL.d.ts → otel-export-Dy2DyUCU.d.ts} +1 -1
- package/dist/profiles.d.ts +385 -86
- package/dist/profiles.js +549 -4
- package/dist/profiles.js.map +1 -1
- package/dist/{run-loop-C4L1Sted.d.ts → run-loop--hSoIknW.d.ts} +35 -12
- package/dist/runtime-hooks-C7JwKb9E.d.ts +70 -0
- package/dist/runtime.d.ts +1860 -0
- package/dist/runtime.js +114 -0
- package/dist/runtime.js.map +1 -0
- package/dist/substrate-CUgk7F7s.d.ts +77 -0
- package/dist/topology.d.ts +73 -0
- package/dist/topology.js +111 -0
- package/dist/topology.js.map +1 -0
- package/dist/types-1HbsFa7H.d.ts +438 -0
- package/dist/{types-p8dWBIXL.d.ts → types-BtRLF2U3.d.ts} +1 -1
- package/dist/{types-DbJzz2uf.d.ts → types-DdzkffAm.d.ts} +95 -1
- package/dist/workflow.d.ts +3 -2
- package/dist/workflow.js +4 -5
- package/dist/workflow.js.map +1 -1
- package/package.json +26 -6
- package/skills/agent-runtime-adoption/SKILL.md +29 -26
- package/dist/chunk-3HMHSN22.js.map +0 -1
- package/dist/chunk-GFKVVRQ7.js.map +0 -1
- package/dist/chunk-HVYOHJHK.js.map +0 -1
- package/dist/chunk-KDMRUD2P.js.map +0 -1
- package/dist/chunk-PY6NMZYX.js +0 -52
- package/dist/chunk-PY6NMZYX.js.map +0 -1
- package/dist/chunk-S7JXV32P.js +0 -947
- package/dist/chunk-S7JXV32P.js.map +0 -1
- package/dist/chunk-SKUZZCHE.js.map +0 -1
- package/dist/chunk-SQSCRJ7U.js +0 -65
- package/dist/chunk-SQSCRJ7U.js.map +0 -1
- package/dist/chunk-VOX6Z3II.js +0 -90
- package/dist/chunk-VOX6Z3II.js.map +0 -1
- package/dist/chunk-XBUG326M.js +0 -261
- package/dist/chunk-XBUG326M.js.map +0 -1
- package/dist/dynamic-wUgp6UKs.d.ts +0 -108
- package/dist/optimize-prompt-D-urF2wW.d.ts +0 -129
- /package/dist/{chunk-NRZOXCJK.js.map → chunk-KSMX62JF.js.map} +0 -0
|
@@ -14,7 +14,7 @@ import {
|
|
|
14
14
|
DELEGATION_STATUS_DESCRIPTION,
|
|
15
15
|
DELEGATION_STATUS_INPUT_SCHEMA,
|
|
16
16
|
DELEGATION_STATUS_TOOL_NAME
|
|
17
|
-
} from "./chunk-
|
|
17
|
+
} from "./chunk-IJGS6J7X.js";
|
|
18
18
|
|
|
19
19
|
// src/mcp/openai-tools.ts
|
|
20
20
|
function buildTool(name, description, parameters) {
|
|
@@ -61,4 +61,4 @@ export {
|
|
|
61
61
|
mcpToolsForRuntimeMcp,
|
|
62
62
|
mcpToolsForRuntimeMcpSubset
|
|
63
63
|
};
|
|
64
|
-
//# sourceMappingURL=chunk-
|
|
64
|
+
//# sourceMappingURL=chunk-KSMX62JF.js.map
|
|
@@ -3,22 +3,22 @@ import {
|
|
|
3
3
|
} from "./chunk-FNMGYYSS.js";
|
|
4
4
|
import {
|
|
5
5
|
createDefaultCoderDelegate
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-IJ6FGOPO.js";
|
|
7
7
|
import {
|
|
8
8
|
runAnalystLoop
|
|
9
|
-
} from "./chunk-
|
|
10
|
-
import {
|
|
11
|
-
optimizePrompt
|
|
12
|
-
} from "./chunk-VOX6Z3II.js";
|
|
9
|
+
} from "./chunk-FK53TXOP.js";
|
|
13
10
|
import {
|
|
14
11
|
createDynamicDriver,
|
|
15
12
|
runLoop
|
|
16
|
-
} from "./chunk-
|
|
13
|
+
} from "./chunk-KEWO4KI6.js";
|
|
17
14
|
import {
|
|
18
15
|
ConfigError
|
|
19
|
-
} from "./chunk-
|
|
16
|
+
} from "./chunk-PRX45WE2.js";
|
|
20
17
|
|
|
21
18
|
// src/loop-runner.ts
|
|
19
|
+
import {
|
|
20
|
+
selfImprove
|
|
21
|
+
} from "@tangle-network/agent-eval/contract";
|
|
22
22
|
var DELEGATED_LOOP_MODES = [
|
|
23
23
|
"code",
|
|
24
24
|
"review",
|
|
@@ -73,7 +73,8 @@ function dynamicLoopRunner(o) {
|
|
|
73
73
|
driver: createDynamicDriver({
|
|
74
74
|
planner: o.planner,
|
|
75
75
|
...o.maxIterations !== void 0 ? { maxIterations: o.maxIterations } : {},
|
|
76
|
-
...o.maxFanout !== void 0 ? { maxFanout: o.maxFanout } : {}
|
|
76
|
+
...o.maxFanout !== void 0 ? { maxFanout: o.maxFanout } : {},
|
|
77
|
+
...o.analyze ? { analyze: o.analyze } : {}
|
|
77
78
|
}),
|
|
78
79
|
...o.agentRun ? { agentRun: o.agentRun } : {},
|
|
79
80
|
...o.agentRuns ? { agentRuns: o.agentRuns } : {},
|
|
@@ -108,7 +109,7 @@ function researchLoopRunner(o) {
|
|
|
108
109
|
};
|
|
109
110
|
}
|
|
110
111
|
function selfImproveLoopRunner(options) {
|
|
111
|
-
return async () =>
|
|
112
|
+
return async () => selfImprove(options);
|
|
112
113
|
}
|
|
113
114
|
function auditLoopRunner(options) {
|
|
114
115
|
return async () => runAnalystLoop(options);
|
|
@@ -199,4 +200,4 @@ export {
|
|
|
199
200
|
runLoopRunnerCli,
|
|
200
201
|
parseLoopRunnerArgv
|
|
201
202
|
};
|
|
202
|
-
//# sourceMappingURL=chunk-
|
|
203
|
+
//# sourceMappingURL=chunk-NYN5RTLP.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/loop-runner.ts","../src/loop-runner-bin.ts"],"sourcesContent":["/**\n * @experimental\n *\n * `runDelegatedLoop` — the configured delegated loop-runner.\n *\n * One typed entrypoint a worker agent (or a scheduled routine) calls to run a\n * disciplined loop in a chosen MODE, over agent-runtime's hardened engines:\n *\n * code → build-in-a-loop via the coder delegate (no-op + secret floor,\n * optional reviewer gate, winner-selection)\n * review → code mode with a REQUIRED reviewer (the gate is the point)\n * research → research-in-a-loop with valid-only KB growth (createKbGate)\n * audit → analyze trace/run data → findings (runAnalystLoop, caller-wired)\n * self-improve → closed-loop text/config optimization (selfImprove, held-out gated)\n * dynamic → agent-authored topology (runLoop + createDynamicDriver)\n *\n * It is intentionally a thin façade: the value is that EVERY product reuses the\n * one hardened engine instead of forking delegation logic. The dispatcher owns\n * mode routing, timing, fail-loud on an unregistered mode, and a uniform result\n * shape; each mode's engine is a pre-configured runner in the registry (build it\n * with the factories below, or inject your own / a stub).\n */\n\nimport type { Scenario } from '@tangle-network/agent-eval/campaign'\nimport {\n type SelfImproveOptions,\n type SelfImproveResult,\n selfImprove,\n} from '@tangle-network/agent-eval/contract'\nimport { runAnalystLoop } from './analyst-loop'\nimport type { RunAnalystLoopOpts, RunAnalystLoopResult } from './analyst-loop/types'\nimport { ConfigError } from './errors'\nimport {\n type CoderReviewer,\n type CoderWinnerSelection,\n createDefaultCoderDelegate,\n type DelegateRunCtx,\n} from './mcp/delegates'\nimport { type CreateKbGateOptions, createKbGate, type FactCandidate } from './mcp/kb-gate'\nimport type { DelegateCodeArgs } from './mcp/types'\nimport type { CoderOutput } from './profiles/coder'\nimport {\n type AgentRunSpec,\n type CreateDynamicDriverOptions,\n createDynamicDriver,\n type DynamicDecision,\n type LoopResult,\n type LoopSandboxClient,\n type OutputAdapter,\n runLoop,\n type TopologyPlanner,\n type Validator,\n} from './runtime'\n\n/** @experimental Every delegated-loop mode, for validation + CLI surfaces. */\nexport const DELEGATED_LOOP_MODES = [\n 'code',\n 'review',\n 'research',\n 'audit',\n 'self-improve',\n 'dynamic',\n] as const\n\n/** @experimental */\nexport type DelegatedLoopMode = (typeof DELEGATED_LOOP_MODES)[number]\n\n/** @experimental Type guard for an untrusted mode string (CLI / config input). */\nexport function isDelegatedLoopMode(value: unknown): value is DelegatedLoopMode {\n return typeof value === 'string' && (DELEGATED_LOOP_MODES as readonly string[]).includes(value)\n}\n\n/** @experimental A pre-configured loop for one mode. Returns the mode's raw\n * output; the dispatcher wraps it in a {@link DelegatedLoopResult}. */\nexport type DelegatedLoopRunner<T = unknown> = (signal: AbortSignal) => Promise<T>\n\n/** @experimental Mode → configured runner. Partial: only register the modes a\n * given product/routine actually uses. */\nexport type DelegatedLoopRegistry = Partial<Record<DelegatedLoopMode, DelegatedLoopRunner>>\n\n/** @experimental Uniform result — never throws from a registered runner; a\n * thrown engine becomes `{ ok: false, error }` so a routine can record + move on. */\nexport interface DelegatedLoopResult<T = unknown> {\n mode: DelegatedLoopMode\n ok: boolean\n output?: T\n error?: string\n durationMs: number\n}\n\n/** @experimental */\nexport interface RunDelegatedLoopOptions {\n signal?: AbortSignal\n /** Clock override for deterministic tests. */\n now?: () => number\n}\n\n/**\n * @experimental\n *\n * Dispatch a configured loop by mode. Fails loud (throws `ConfigError`) when no\n * runner is registered for the mode — a routine pointed at an unwired mode is a\n * config bug, not a silent no-op. A runner that throws is captured as\n * `{ ok: false }` so unattended runs record the failure rather than crash.\n */\nexport async function runDelegatedLoop<T = unknown>(\n mode: DelegatedLoopMode,\n registry: DelegatedLoopRegistry,\n options: RunDelegatedLoopOptions = {},\n): Promise<DelegatedLoopResult<T>> {\n const runner = registry[mode] as DelegatedLoopRunner<T> | undefined\n if (!runner) {\n throw new ConfigError(\n `runDelegatedLoop: no runner registered for mode '${mode}' (registered: ${\n Object.keys(registry).join(', ') || 'none'\n })`,\n )\n }\n const now = options.now ?? Date.now\n const signal = options.signal ?? new AbortController().signal\n const start = now()\n try {\n const output = await runner(signal)\n return { mode, ok: true, output, durationMs: now() - start }\n } catch (err) {\n return {\n mode,\n ok: false,\n error: err instanceof Error ? err.message : String(err),\n durationMs: now() - start,\n }\n }\n}\n\n/** @experimental Options for the default `code`/`review` runner. */\nexport interface CoderLoopRunnerOptions {\n sandboxClient: LoopSandboxClient\n /** What to build — the delegate args (goal, repoRoot, variants, config, …). */\n args: DelegateCodeArgs\n /** Adversarial reviewer. REQUIRED for `review` mode (see `reviewLoopRunner`). */\n reviewer?: CoderReviewer\n /** Winner-selection strategy. Default `highest-score`. */\n winnerSelection?: CoderWinnerSelection\n /** Harnesses for `variants > 1` fanout. */\n fanoutHarnesses?: string[]\n}\n\n/** @experimental Build a `code`-mode runner over the hardened coder delegate. */\nexport function coderLoopRunner(options: CoderLoopRunnerOptions): DelegatedLoopRunner<CoderOutput> {\n const delegate = createDefaultCoderDelegate({\n sandboxClient: options.sandboxClient,\n ...(options.reviewer ? { reviewer: options.reviewer } : {}),\n ...(options.winnerSelection ? { winnerSelection: options.winnerSelection } : {}),\n ...(options.fanoutHarnesses ? { fanoutHarnesses: options.fanoutHarnesses } : {}),\n })\n return async (signal) => {\n const ctx: DelegateRunCtx = { signal, report: () => {} }\n return delegate(options.args, ctx)\n }\n}\n\n/**\n * @experimental\n *\n * `review` mode = `code` with a REQUIRED reviewer. The gate is the whole point,\n * so the type forces a reviewer (a \"review loop\" with no reviewer is a code loop).\n */\nexport function reviewLoopRunner(\n options: CoderLoopRunnerOptions & { reviewer: CoderReviewer },\n): DelegatedLoopRunner<CoderOutput> {\n return coderLoopRunner(options)\n}\n\n/** @experimental Options for the default `dynamic` runner. */\nexport interface DynamicLoopRunnerOptions<Task, Output> {\n sandboxClient: LoopSandboxClient\n /** The agent-authored topology planner (sync or async; an async planner is where an LLM call goes). */\n planner: TopologyPlanner<Task, Output>\n task: Task\n output: OutputAdapter<Output>\n validator?: Validator<Output>\n /** Exactly one of `agentRun` / `agentRuns` (runLoop validates). */\n agentRun?: AgentRunSpec<Task>\n agentRuns?: AgentRunSpec<Task>[]\n maxIterations?: number\n maxFanout?: number\n /** Optional trace-analyst hook forwarded to the dynamic driver so the loop runs\n * `f(trace, findings)` — see `CreateDynamicDriverOptions.analyze`. Caller-side\n * seam to `runAnalystLoop`; keeps this runner analyst-free. */\n analyze?: CreateDynamicDriverOptions<Task, Output>['analyze']\n}\n\n/** @experimental `dynamic` mode — agent-authored topology over `runLoop`. */\nexport function dynamicLoopRunner<Task, Output>(\n o: DynamicLoopRunnerOptions<Task, Output>,\n): DelegatedLoopRunner<LoopResult<Task, Output, DynamicDecision>> {\n return async (signal) =>\n runLoop<Task, Output, DynamicDecision>({\n driver: createDynamicDriver<Task, Output>({\n planner: o.planner,\n ...(o.maxIterations !== undefined ? { maxIterations: o.maxIterations } : {}),\n ...(o.maxFanout !== undefined ? { maxFanout: o.maxFanout } : {}),\n ...(o.analyze ? { analyze: o.analyze } : {}),\n }),\n ...(o.agentRun ? { agentRun: o.agentRun } : {}),\n ...(o.agentRuns ? { agentRuns: o.agentRuns } : {}),\n output: o.output,\n ...(o.validator ? { validator: o.validator } : {}),\n task: o.task,\n ctx: { sandboxClient: o.sandboxClient, signal },\n ...(o.maxIterations !== undefined ? { maxIterations: o.maxIterations } : {}),\n })\n}\n\n/** @experimental A fact rejected at the KB gate — surfaced, never dropped. */\nexport interface VetoedFact {\n candidate: FactCandidate\n vetoedBy?: string\n reason?: string\n}\n\n/** @experimental */\nexport interface ResearchLoopResult {\n /** Facts that passed the fail-closed gate — safe to write to the KB. */\n accepted: FactCandidate[]\n /** Facts the gate vetoed in the final round — escalate, do not silently drop. */\n vetoed: VetoedFact[]\n /** Research rounds actually run. */\n rounds: number\n}\n\n/** @experimental Options for the default `research` runner. */\nexport interface ResearchLoopRunnerOptions {\n /**\n * The research engine (the consumer's web/doc searcher + extractor). Called\n * each round with the prior round's vetoes so it can re-research the gaps.\n * Returns fact candidates carrying their grounding (`verbatimPassage` +\n * `sourceText`).\n */\n research: (round: number, vetoed: VetoedFact[]) => Promise<FactCandidate[]>\n /** Gate config (extra judges, self-artifact kinds, …). The floor is always on. */\n gate?: CreateKbGateOptions\n /** Max research rounds (correct-on-veto remediation). Default 1. */\n maxRounds?: number\n}\n\n/**\n * @experimental `research` mode — research-in-a-loop with valid-only KB growth.\n *\n * Each round: research → gate every candidate (fail-closed; passage MUST be in\n * the source) → accept the clean ones → re-research the vetoed ones next round,\n * up to `maxRounds`. Vetoed facts in the final round are RETURNED (escalate,\n * never silently dropped) so the caller audits vs retries.\n */\nexport function researchLoopRunner(\n o: ResearchLoopRunnerOptions,\n): DelegatedLoopRunner<ResearchLoopResult> {\n const gate = createKbGate(o.gate)\n const maxRounds = Math.max(1, Math.trunc(o.maxRounds ?? 1))\n return async (signal) => {\n const accepted: FactCandidate[] = []\n let vetoed: VetoedFact[] = []\n let rounds = 0\n for (let round = 0; round < maxRounds; round += 1) {\n if (signal.aborted) break\n rounds += 1\n const candidates = await o.research(round, vetoed)\n if (candidates.length === 0) break\n vetoed = []\n for (const c of candidates) {\n const v = await gate(c)\n if (v.accepted) accepted.push(c)\n else vetoed.push({ candidate: c, vetoedBy: v.vetoedBy, reason: v.reason })\n }\n if (vetoed.length === 0) break\n }\n return { accepted, vetoed, rounds }\n }\n}\n\n/** @experimental `self-improve` mode — agent-eval's one-call closed loop (held-out gated). */\nexport function selfImproveLoopRunner<TScenario extends Scenario, TArtifact>(\n options: SelfImproveOptions<TScenario, TArtifact>,\n): DelegatedLoopRunner<SelfImproveResult<TScenario, TArtifact>> {\n return async () => selfImprove<TScenario, TArtifact>(options)\n}\n\n/** @experimental `audit` mode — analyst loop over captured trace/run data. */\nexport function auditLoopRunner<TProposal = unknown, TEdit = unknown>(\n options: RunAnalystLoopOpts,\n): DelegatedLoopRunner<RunAnalystLoopResult<TProposal, TEdit>> {\n return async () => runAnalystLoop<TProposal, TEdit>(options)\n}\n","#!/usr/bin/env node\n/**\n * @experimental\n *\n * `agent-runtime-loop` — the schedulable entrypoint for the configured\n * delegated loop-runner. A cron job / routine / Makefile target invokes:\n *\n * agent-runtime-loop --mode research --config ./loops.config.js\n *\n * The config module wires the registry (with full access to env / creds —\n * which is why the deps live there, not in this generic bin). It must default-\n * export a `DelegatedLoopRegistry`, or a `() => DelegatedLoopRegistry | Promise<…>`.\n * The bin runs the selected mode, prints the `DelegatedLoopResult` as JSON, and\n * exits 0 on `ok`, 1 on a recorded failure, 2 on a usage/config error.\n */\n\nimport {\n DELEGATED_LOOP_MODES,\n type DelegatedLoopMode,\n type DelegatedLoopRegistry,\n type DelegatedLoopResult,\n isDelegatedLoopMode,\n runDelegatedLoop,\n} from './loop-runner'\n\n/** @experimental Parsed CLI invocation. */\nexport interface LoopRunnerCliArgs {\n mode: string\n /** Loads the registry — the bin wires this from `--config`; tests inject a stub. */\n loadRegistry: () => Promise<DelegatedLoopRegistry> | DelegatedLoopRegistry\n now?: () => number\n}\n\n/** @experimental */\nexport interface LoopRunnerCliResult {\n exitCode: number\n result?: DelegatedLoopResult\n error?: string\n}\n\n/**\n * @experimental\n *\n * Pure CLI core (no process / argv / IO) so it's unit-testable: validate the\n * mode, load the registry, dispatch, map to an exit code (0 ok / 1 failed /\n * 2 usage). Exported for embedding in custom runners + tests.\n */\nexport async function runLoopRunnerCli(args: LoopRunnerCliArgs): Promise<LoopRunnerCliResult> {\n if (!isDelegatedLoopMode(args.mode)) {\n return {\n exitCode: 2,\n error: `unknown mode '${args.mode}' (expected one of: ${DELEGATED_LOOP_MODES.join(', ')})`,\n }\n }\n let registry: DelegatedLoopRegistry\n try {\n registry = await args.loadRegistry()\n } catch (err) {\n return { exitCode: 2, error: `failed to load registry: ${errMsg(err)}` }\n }\n if (!registry[args.mode]) {\n return {\n exitCode: 2,\n error: `config registers no runner for mode '${args.mode}' (registered: ${\n Object.keys(registry).join(', ') || 'none'\n })`,\n }\n }\n // runDelegatedLoop throws only on a missing runner (guarded above); a failing\n // engine is captured as { ok: false } → exit 1, not a crash.\n const result = await runDelegatedLoop(args.mode as DelegatedLoopMode, registry, {\n ...(args.now ? { now: args.now } : {}),\n })\n return { exitCode: result.ok ? 0 : 1, result }\n}\n\n/** Parse `--mode X --config Y` from an argv tail (`process.argv.slice(2)`). */\nexport function parseLoopRunnerArgv(argv: string[]): { mode?: string; config?: string } {\n const out: { mode?: string; config?: string } = {}\n for (let i = 0; i < argv.length; i += 1) {\n const a = argv[i]\n if (a === '--mode') out.mode = argv[++i]\n else if (a === '--config') out.config = argv[++i]\n else if (a?.startsWith('--mode=')) out.mode = a.slice('--mode='.length)\n else if (a?.startsWith('--config=')) out.config = a.slice('--config='.length)\n }\n return out\n}\n\n/** Normalize a config module's default export → a registry. */\nfunction resolveRegistry(mod: unknown): DelegatedLoopRegistry {\n const def = (mod as { default?: unknown })?.default ?? mod\n const value = typeof def === 'function' ? (def as () => unknown)() : def\n return value as DelegatedLoopRegistry\n}\n\nfunction errMsg(err: unknown): string {\n return err instanceof Error ? err.message : String(err)\n}\n\n/** The argv → IO → exit shell. Kept thin; logic lives in `runLoopRunnerCli`. */\nasync function main(): Promise<void> {\n const { mode, config } = parseLoopRunnerArgv(process.argv.slice(2))\n if (!mode || !config) {\n process.stderr.write(\n 'usage: agent-runtime-loop --mode <mode> --config <module>\\n' +\n ` modes: ${DELEGATED_LOOP_MODES.join(' | ')}\\n` +\n ' config: a JS/TS module default-exporting a DelegatedLoopRegistry (or a factory)\\n',\n )\n process.exit(2)\n }\n const { pathToFileURL } = await import('node:url')\n const { resolve } = await import('node:path')\n const cli = await runLoopRunnerCli({\n mode,\n loadRegistry: async () => resolveRegistry(await import(pathToFileURL(resolve(config)).href)),\n })\n process.stdout.write(`${JSON.stringify(cli.result ?? { error: cli.error }, null, 2)}\\n`)\n if (cli.error) process.stderr.write(`${cli.error}\\n`)\n process.exit(cli.exitCode)\n}\n\n// Run only when executed as the bin — never when imported for the testable\n// core, and never when bundled into a runtime that has no `process.argv`\n// (e.g. Cloudflare Workers, where `process` is a shim without `argv`). Reading\n// `process.argv[1]` directly would throw at module load there; `process.argv?.`\n// keeps the guard a no-op instead of crashing the Worker on startup.\nconst invokedScript = typeof process !== 'undefined' ? process.argv?.[1] : undefined\nif (invokedScript && /loop-runner-bin\\.(js|ts|mjs)$/.test(invokedScript)) {\n void main()\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAwBA;AAAA,EAGE;AAAA,OACK;AA2BA,IAAM,uBAAuB;AAAA,EAClC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAMO,SAAS,oBAAoB,OAA4C;AAC9E,SAAO,OAAO,UAAU,YAAa,qBAA2C,SAAS,KAAK;AAChG;AAmCA,eAAsB,iBACpB,MACA,UACA,UAAmC,CAAC,GACH;AACjC,QAAM,SAAS,SAAS,IAAI;AAC5B,MAAI,CAAC,QAAQ;AACX,UAAM,IAAI;AAAA,MACR,oDAAoD,IAAI,kBACtD,OAAO,KAAK,QAAQ,EAAE,KAAK,IAAI,KAAK,MACtC;AAAA,IACF;AAAA,EACF;AACA,QAAM,MAAM,QAAQ,OAAO,KAAK;AAChC,QAAM,SAAS,QAAQ,UAAU,IAAI,gBAAgB,EAAE;AACvD,QAAM,QAAQ,IAAI;AAClB,MAAI;AACF,UAAM,SAAS,MAAM,OAAO,MAAM;AAClC,WAAO,EAAE,MAAM,IAAI,MAAM,QAAQ,YAAY,IAAI,IAAI,MAAM;AAAA,EAC7D,SAAS,KAAK;AACZ,WAAO;AAAA,MACL;AAAA,MACA,IAAI;AAAA,MACJ,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,MACtD,YAAY,IAAI,IAAI;AAAA,IACtB;AAAA,EACF;AACF;AAgBO,SAAS,gBAAgB,SAAmE;AACjG,QAAM,WAAW,2BAA2B;AAAA,IAC1C,eAAe,QAAQ;AAAA,IACvB,GAAI,QAAQ,WAAW,EAAE,UAAU,QAAQ,SAAS,IAAI,CAAC;AAAA,IACzD,GAAI,QAAQ,kBAAkB,EAAE,iBAAiB,QAAQ,gBAAgB,IAAI,CAAC;AAAA,IAC9E,GAAI,QAAQ,kBAAkB,EAAE,iBAAiB,QAAQ,gBAAgB,IAAI,CAAC;AAAA,EAChF,CAAC;AACD,SAAO,OAAO,WAAW;AACvB,UAAM,MAAsB,EAAE,QAAQ,QAAQ,MAAM;AAAA,IAAC,EAAE;AACvD,WAAO,SAAS,QAAQ,MAAM,GAAG;AAAA,EACnC;AACF;AAQO,SAAS,iBACd,SACkC;AAClC,SAAO,gBAAgB,OAAO;AAChC;AAsBO,SAAS,kBACd,GACgE;AAChE,SAAO,OAAO,WACZ,QAAuC;AAAA,IACrC,QAAQ,oBAAkC;AAAA,MACxC,SAAS,EAAE;AAAA,MACX,GAAI,EAAE,kBAAkB,SAAY,EAAE,eAAe,EAAE,cAAc,IAAI,CAAC;AAAA,MAC1E,GAAI,EAAE,cAAc,SAAY,EAAE,WAAW,EAAE,UAAU,IAAI,CAAC;AAAA,MAC9D,GAAI,EAAE,UAAU,EAAE,SAAS,EAAE,QAAQ,IAAI,CAAC;AAAA,IAC5C,CAAC;AAAA,IACD,GAAI,EAAE,WAAW,EAAE,UAAU,EAAE,SAAS,IAAI,CAAC;AAAA,IAC7C,GAAI,EAAE,YAAY,EAAE,WAAW,EAAE,UAAU,IAAI,CAAC;AAAA,IAChD,QAAQ,EAAE;AAAA,IACV,GAAI,EAAE,YAAY,EAAE,WAAW,EAAE,UAAU,IAAI,CAAC;AAAA,IAChD,MAAM,EAAE;AAAA,IACR,KAAK,EAAE,eAAe,EAAE,eAAe,OAAO;AAAA,IAC9C,GAAI,EAAE,kBAAkB,SAAY,EAAE,eAAe,EAAE,cAAc,IAAI,CAAC;AAAA,EAC5E,CAAC;AACL;AA0CO,SAAS,mBACd,GACyC;AACzC,QAAM,OAAO,aAAa,EAAE,IAAI;AAChC,QAAM,YAAY,KAAK,IAAI,GAAG,KAAK,MAAM,EAAE,aAAa,CAAC,CAAC;AAC1D,SAAO,OAAO,WAAW;AACvB,UAAM,WAA4B,CAAC;AACnC,QAAI,SAAuB,CAAC;AAC5B,QAAI,SAAS;AACb,aAAS,QAAQ,GAAG,QAAQ,WAAW,SAAS,GAAG;AACjD,UAAI,OAAO,QAAS;AACpB,gBAAU;AACV,YAAM,aAAa,MAAM,EAAE,SAAS,OAAO,MAAM;AACjD,UAAI,WAAW,WAAW,EAAG;AAC7B,eAAS,CAAC;AACV,iBAAW,KAAK,YAAY;AAC1B,cAAM,IAAI,MAAM,KAAK,CAAC;AACtB,YAAI,EAAE,SAAU,UAAS,KAAK,CAAC;AAAA,YAC1B,QAAO,KAAK,EAAE,WAAW,GAAG,UAAU,EAAE,UAAU,QAAQ,EAAE,OAAO,CAAC;AAAA,MAC3E;AACA,UAAI,OAAO,WAAW,EAAG;AAAA,IAC3B;AACA,WAAO,EAAE,UAAU,QAAQ,OAAO;AAAA,EACpC;AACF;AAGO,SAAS,sBACd,SAC8D;AAC9D,SAAO,YAAY,YAAkC,OAAO;AAC9D;AAGO,SAAS,gBACd,SAC6D;AAC7D,SAAO,YAAY,eAAiC,OAAO;AAC7D;;;ACrPA,eAAsB,iBAAiB,MAAuD;AAC5F,MAAI,CAAC,oBAAoB,KAAK,IAAI,GAAG;AACnC,WAAO;AAAA,MACL,UAAU;AAAA,MACV,OAAO,iBAAiB,KAAK,IAAI,uBAAuB,qBAAqB,KAAK,IAAI,CAAC;AAAA,IACzF;AAAA,EACF;AACA,MAAI;AACJ,MAAI;AACF,eAAW,MAAM,KAAK,aAAa;AAAA,EACrC,SAAS,KAAK;AACZ,WAAO,EAAE,UAAU,GAAG,OAAO,4BAA4B,OAAO,GAAG,CAAC,GAAG;AAAA,EACzE;AACA,MAAI,CAAC,SAAS,KAAK,IAAI,GAAG;AACxB,WAAO;AAAA,MACL,UAAU;AAAA,MACV,OAAO,wCAAwC,KAAK,IAAI,kBACtD,OAAO,KAAK,QAAQ,EAAE,KAAK,IAAI,KAAK,MACtC;AAAA,IACF;AAAA,EACF;AAGA,QAAM,SAAS,MAAM,iBAAiB,KAAK,MAA2B,UAAU;AAAA,IAC9E,GAAI,KAAK,MAAM,EAAE,KAAK,KAAK,IAAI,IAAI,CAAC;AAAA,EACtC,CAAC;AACD,SAAO,EAAE,UAAU,OAAO,KAAK,IAAI,GAAG,OAAO;AAC/C;AAGO,SAAS,oBAAoB,MAAoD;AACtF,QAAM,MAA0C,CAAC;AACjD,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK,GAAG;AACvC,UAAM,IAAI,KAAK,CAAC;AAChB,QAAI,MAAM,SAAU,KAAI,OAAO,KAAK,EAAE,CAAC;AAAA,aAC9B,MAAM,WAAY,KAAI,SAAS,KAAK,EAAE,CAAC;AAAA,aACvC,GAAG,WAAW,SAAS,EAAG,KAAI,OAAO,EAAE,MAAM,UAAU,MAAM;AAAA,aAC7D,GAAG,WAAW,WAAW,EAAG,KAAI,SAAS,EAAE,MAAM,YAAY,MAAM;AAAA,EAC9E;AACA,SAAO;AACT;AAGA,SAAS,gBAAgB,KAAqC;AAC5D,QAAM,MAAO,KAA+B,WAAW;AACvD,QAAM,QAAQ,OAAO,QAAQ,aAAc,IAAsB,IAAI;AACrE,SAAO;AACT;AAEA,SAAS,OAAO,KAAsB;AACpC,SAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AACxD;AAGA,eAAe,OAAsB;AACnC,QAAM,EAAE,MAAM,OAAO,IAAI,oBAAoB,QAAQ,KAAK,MAAM,CAAC,CAAC;AAClE,MAAI,CAAC,QAAQ,CAAC,QAAQ;AACpB,YAAQ,OAAO;AAAA,MACb;AAAA,WACc,qBAAqB,KAAK,KAAK,CAAC;AAAA;AAAA;AAAA,IAEhD;AACA,YAAQ,KAAK,CAAC;AAAA,EAChB;AACA,QAAM,EAAE,cAAc,IAAI,MAAM,OAAO,KAAU;AACjD,QAAM,EAAE,QAAQ,IAAI,MAAM,OAAO,MAAW;AAC5C,QAAM,MAAM,MAAM,iBAAiB;AAAA,IACjC;AAAA,IACA,cAAc,YAAY,gBAAgB,MAAM,OAAO,cAAc,QAAQ,MAAM,CAAC,EAAE,KAAK;AAAA,EAC7F,CAAC;AACD,UAAQ,OAAO,MAAM,GAAG,KAAK,UAAU,IAAI,UAAU,EAAE,OAAO,IAAI,MAAM,GAAG,MAAM,CAAC,CAAC;AAAA,CAAI;AACvF,MAAI,IAAI,MAAO,SAAQ,OAAO,MAAM,GAAG,IAAI,KAAK;AAAA,CAAI;AACpD,UAAQ,KAAK,IAAI,QAAQ;AAC3B;AAOA,IAAM,gBAAgB,OAAO,YAAY,cAAc,QAAQ,OAAO,CAAC,IAAI;AAC3E,IAAI,iBAAiB,gCAAgC,KAAK,aAAa,GAAG;AACxE,OAAK,KAAK;AACZ;","names":[]}
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
// src/errors.ts
|
|
2
|
+
import { AgentEvalError } from "@tangle-network/agent-eval";
|
|
3
|
+
import {
|
|
4
|
+
AgentEvalError as AgentEvalError2,
|
|
5
|
+
CaptureIntegrityError,
|
|
6
|
+
ConfigError,
|
|
7
|
+
JudgeError,
|
|
8
|
+
NotFoundError,
|
|
9
|
+
ReplayError,
|
|
10
|
+
ValidationError,
|
|
11
|
+
VerificationError
|
|
12
|
+
} from "@tangle-network/agent-eval";
|
|
13
|
+
var SessionMismatchError = class extends AgentEvalError {
|
|
14
|
+
sessionBackend;
|
|
15
|
+
requestedBackend;
|
|
16
|
+
constructor(sessionBackend, requestedBackend, options) {
|
|
17
|
+
super(
|
|
18
|
+
"validation",
|
|
19
|
+
`Cannot resume ${sessionBackend} session with ${requestedBackend} backend`,
|
|
20
|
+
options
|
|
21
|
+
);
|
|
22
|
+
this.sessionBackend = sessionBackend;
|
|
23
|
+
this.requestedBackend = requestedBackend;
|
|
24
|
+
}
|
|
25
|
+
};
|
|
26
|
+
var BackendTransportError = class extends AgentEvalError {
|
|
27
|
+
backend;
|
|
28
|
+
status;
|
|
29
|
+
/**
|
|
30
|
+
* Truncated upstream response body (≤2 KiB) when available. Diagnostic
|
|
31
|
+
* only — surfaces in `backend_error.error.body` and `final.error.body`
|
|
32
|
+
* so operators can see "free_tier_limit", "invalid_api_key", etc. without
|
|
33
|
+
* cracking the log line open.
|
|
34
|
+
*/
|
|
35
|
+
body;
|
|
36
|
+
constructor(backend, message, options) {
|
|
37
|
+
super("config", message, options);
|
|
38
|
+
this.backend = backend;
|
|
39
|
+
this.status = options?.status;
|
|
40
|
+
this.body = options?.body;
|
|
41
|
+
}
|
|
42
|
+
};
|
|
43
|
+
var RuntimeRunStateError = class extends AgentEvalError {
|
|
44
|
+
constructor(message, options) {
|
|
45
|
+
super("validation", message, options);
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
var PlannerError = class extends AgentEvalError {
|
|
49
|
+
constructor(message, options) {
|
|
50
|
+
super("validation", message, options);
|
|
51
|
+
}
|
|
52
|
+
};
|
|
53
|
+
var AnalystError = class extends AgentEvalError {
|
|
54
|
+
constructor(message, options) {
|
|
55
|
+
super("validation", message, options);
|
|
56
|
+
}
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
// src/runtime/sandbox-events.ts
|
|
60
|
+
function extractLlmCallEvent(event, agentRunName) {
|
|
61
|
+
if (!event || typeof event !== "object") return void 0;
|
|
62
|
+
const type = String(event.type ?? "");
|
|
63
|
+
const data = event.data && typeof event.data === "object" ? event.data : {};
|
|
64
|
+
if (type === "llm_call" || type === "cost.usage" || type === "usage") {
|
|
65
|
+
return buildLlmCall(data, agentRunName);
|
|
66
|
+
}
|
|
67
|
+
if (type === "message.completed" || type === "result" || type === "final") {
|
|
68
|
+
const usage = data.usage;
|
|
69
|
+
if (!usage || typeof usage !== "object") return void 0;
|
|
70
|
+
return buildLlmCall({ ...usage, model: data.model ?? usage.model }, agentRunName);
|
|
71
|
+
}
|
|
72
|
+
if (type === "done") {
|
|
73
|
+
const usage = data.tokenUsage;
|
|
74
|
+
if (!usage || typeof usage !== "object") return void 0;
|
|
75
|
+
const out = pickFiniteNumber(usage, ["outputTokens", "completion_tokens", "tokensOut"]);
|
|
76
|
+
const reasoning = pickFiniteNumber(usage, ["reasoningTokens"]);
|
|
77
|
+
const mergedOut = out !== void 0 || reasoning !== void 0 ? (out ?? 0) + (reasoning ?? 0) : void 0;
|
|
78
|
+
return buildLlmCall(
|
|
79
|
+
{
|
|
80
|
+
inputTokens: usage.inputTokens,
|
|
81
|
+
outputTokens: mergedOut,
|
|
82
|
+
totalCostUsd: data.totalCostUsd,
|
|
83
|
+
model: data.model ?? usage.model
|
|
84
|
+
},
|
|
85
|
+
agentRunName
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
return void 0;
|
|
89
|
+
}
|
|
90
|
+
function buildLlmCall(data, agentRunName) {
|
|
91
|
+
const tokensIn = pickFiniteNumber(data, ["tokensIn", "inputTokens", "prompt_tokens"]);
|
|
92
|
+
const tokensOut = pickFiniteNumber(data, ["tokensOut", "outputTokens", "completion_tokens"]);
|
|
93
|
+
const costUsd = pickFiniteNumber(data, ["costUsd", "totalCostUsd", "cost_usd", "cost"]);
|
|
94
|
+
if (tokensIn === void 0 && tokensOut === void 0 && costUsd === void 0) {
|
|
95
|
+
return void 0;
|
|
96
|
+
}
|
|
97
|
+
const model = typeof data.model === "string" && data.model.length > 0 ? data.model : agentRunName;
|
|
98
|
+
const event = {
|
|
99
|
+
type: "llm_call",
|
|
100
|
+
model
|
|
101
|
+
};
|
|
102
|
+
if (tokensIn !== void 0) event.tokensIn = tokensIn;
|
|
103
|
+
if (tokensOut !== void 0) event.tokensOut = tokensOut;
|
|
104
|
+
if (costUsd !== void 0) event.costUsd = costUsd;
|
|
105
|
+
return event;
|
|
106
|
+
}
|
|
107
|
+
function pickFiniteNumber(data, keys) {
|
|
108
|
+
for (const key of keys) {
|
|
109
|
+
const value = data[key];
|
|
110
|
+
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
111
|
+
}
|
|
112
|
+
return void 0;
|
|
113
|
+
}
|
|
114
|
+
function mapSandboxEvent(event, opts = {}) {
|
|
115
|
+
if (!event || typeof event !== "object") return void 0;
|
|
116
|
+
const type = String(event.type ?? "");
|
|
117
|
+
const data = event.data && typeof event.data === "object" ? event.data : {};
|
|
118
|
+
if (type === "message.part.updated") {
|
|
119
|
+
const part = data.part && typeof data.part === "object" ? data.part : {};
|
|
120
|
+
const partType = String(part.type ?? "");
|
|
121
|
+
const delta = typeof data.delta === "string" ? data.delta : void 0;
|
|
122
|
+
const text = delta ?? (typeof part.text === "string" ? part.text : void 0);
|
|
123
|
+
if (text === void 0) return void 0;
|
|
124
|
+
if (partType === "text") return { type: "text_delta", text };
|
|
125
|
+
if (partType === "reasoning" || partType === "thinking")
|
|
126
|
+
return { type: "reasoning_delta", text };
|
|
127
|
+
return void 0;
|
|
128
|
+
}
|
|
129
|
+
return extractLlmCallEvent(event, opts.agentRunName ?? "agent");
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// src/runtime/util.ts
|
|
133
|
+
async function deleteBoxSafe(box) {
|
|
134
|
+
if (!box || typeof box.delete !== "function") return true;
|
|
135
|
+
try {
|
|
136
|
+
await box.delete();
|
|
137
|
+
return true;
|
|
138
|
+
} catch {
|
|
139
|
+
return false;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
function randomSuffix(len = 8) {
|
|
143
|
+
return Math.random().toString(36).slice(2, 2 + len);
|
|
144
|
+
}
|
|
145
|
+
function randomUuid() {
|
|
146
|
+
return crypto.randomUUID();
|
|
147
|
+
}
|
|
148
|
+
function abortError() {
|
|
149
|
+
const err = new Error("aborted");
|
|
150
|
+
err.name = "AbortError";
|
|
151
|
+
return err;
|
|
152
|
+
}
|
|
153
|
+
function throwAbort() {
|
|
154
|
+
throw abortError();
|
|
155
|
+
}
|
|
156
|
+
function throwIfAborted(signal) {
|
|
157
|
+
if (signal?.aborted) throw abortError();
|
|
158
|
+
}
|
|
159
|
+
function sleep(ms, signal) {
|
|
160
|
+
return new Promise((resolve) => {
|
|
161
|
+
if (signal?.aborted) {
|
|
162
|
+
resolve();
|
|
163
|
+
return;
|
|
164
|
+
}
|
|
165
|
+
let onAbort;
|
|
166
|
+
const timer = setTimeout(() => {
|
|
167
|
+
if (onAbort && signal) signal.removeEventListener("abort", onAbort);
|
|
168
|
+
resolve();
|
|
169
|
+
}, ms);
|
|
170
|
+
if (signal) {
|
|
171
|
+
onAbort = () => {
|
|
172
|
+
clearTimeout(timer);
|
|
173
|
+
resolve();
|
|
174
|
+
};
|
|
175
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
176
|
+
}
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
function withTimeout(promise, ms) {
|
|
180
|
+
return new Promise((resolve) => {
|
|
181
|
+
const timer = setTimeout(() => resolve(void 0), ms);
|
|
182
|
+
promise.then(
|
|
183
|
+
(value) => {
|
|
184
|
+
clearTimeout(timer);
|
|
185
|
+
resolve(value);
|
|
186
|
+
},
|
|
187
|
+
() => {
|
|
188
|
+
clearTimeout(timer);
|
|
189
|
+
resolve(void 0);
|
|
190
|
+
}
|
|
191
|
+
);
|
|
192
|
+
});
|
|
193
|
+
}
|
|
194
|
+
function stringifySafe(value, opts = {}) {
|
|
195
|
+
let s;
|
|
196
|
+
try {
|
|
197
|
+
if (typeof value === "string") {
|
|
198
|
+
s = value;
|
|
199
|
+
} else {
|
|
200
|
+
const json = opts.pretty ? JSON.stringify(value, null, 2) : JSON.stringify(value);
|
|
201
|
+
s = json ?? String(value);
|
|
202
|
+
}
|
|
203
|
+
} catch {
|
|
204
|
+
s = String(value);
|
|
205
|
+
}
|
|
206
|
+
if (opts.max !== void 0 && s.length > opts.max) return `${s.slice(0, opts.max)}\u2026`;
|
|
207
|
+
return s;
|
|
208
|
+
}
|
|
209
|
+
function zeroTokenUsage() {
|
|
210
|
+
return { input: 0, output: 0 };
|
|
211
|
+
}
|
|
212
|
+
function addTokenUsage(acc, delta) {
|
|
213
|
+
acc.input += delta.input ?? 0;
|
|
214
|
+
acc.output += delta.output ?? 0;
|
|
215
|
+
}
|
|
216
|
+
async function mapWithConcurrency(items, limit, fn) {
|
|
217
|
+
const bound = Math.max(1, Math.floor(limit));
|
|
218
|
+
const results = new Array(items.length);
|
|
219
|
+
let next = 0;
|
|
220
|
+
let failed = false;
|
|
221
|
+
const worker = async () => {
|
|
222
|
+
while (!failed) {
|
|
223
|
+
const i = next;
|
|
224
|
+
next += 1;
|
|
225
|
+
if (i >= items.length) return;
|
|
226
|
+
try {
|
|
227
|
+
results[i] = await fn(items[i], i);
|
|
228
|
+
} catch (err) {
|
|
229
|
+
failed = true;
|
|
230
|
+
throw err;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
};
|
|
234
|
+
const workerCount = Math.min(bound, items.length);
|
|
235
|
+
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
236
|
+
return results;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
export {
|
|
240
|
+
SessionMismatchError,
|
|
241
|
+
BackendTransportError,
|
|
242
|
+
RuntimeRunStateError,
|
|
243
|
+
PlannerError,
|
|
244
|
+
AnalystError,
|
|
245
|
+
AgentEvalError2 as AgentEvalError,
|
|
246
|
+
ConfigError,
|
|
247
|
+
JudgeError,
|
|
248
|
+
NotFoundError,
|
|
249
|
+
ValidationError,
|
|
250
|
+
deleteBoxSafe,
|
|
251
|
+
randomSuffix,
|
|
252
|
+
randomUuid,
|
|
253
|
+
throwAbort,
|
|
254
|
+
throwIfAborted,
|
|
255
|
+
sleep,
|
|
256
|
+
withTimeout,
|
|
257
|
+
stringifySafe,
|
|
258
|
+
zeroTokenUsage,
|
|
259
|
+
addTokenUsage,
|
|
260
|
+
mapWithConcurrency,
|
|
261
|
+
extractLlmCallEvent,
|
|
262
|
+
mapSandboxEvent
|
|
263
|
+
};
|
|
264
|
+
//# sourceMappingURL=chunk-PRX45WE2.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/errors.ts","../src/runtime/sandbox-events.ts","../src/runtime/util.ts"],"sourcesContent":["/**\n * @stable\n *\n * Error taxonomy for `@tangle-network/agent-runtime`.\n *\n * Public contract: every error this package throws as part of its consumer-\n * facing API either extends `AgentEvalError` (re-exported here for ergonomic\n * `instanceof` checks at the runtime boundary) or extends one of the\n * runtime-specific subclasses below.\n *\n * Internal invariant guards (`throw new Error('this should never happen')`)\n * remain plain `Error` — they are programmer-mistake assertions, not\n * consumer-catchable contract failures.\n *\n * Subclassing strategy: where a runtime-specific failure maps cleanly to an\n * agent-eval code (validation, config, not_found), we re-use the agent-eval\n * subclass. Runtime-only failure modes (session resume against the wrong\n * backend, backend transport errors) get fresh subclasses that still carry an\n * `AgentEvalErrorCode` so cross-package handlers can pattern-match without\n * importing the runtime.\n */\n\nimport { AgentEvalError } from '@tangle-network/agent-eval'\n\nexport {\n AgentEvalError,\n type AgentEvalErrorCode,\n CaptureIntegrityError,\n ConfigError,\n JudgeError,\n NotFoundError,\n ReplayError,\n ValidationError,\n VerificationError,\n} from '@tangle-network/agent-eval'\n\n/**\n * @stable\n *\n * Caller asked to resume a session against a backend whose `kind` does not\n * match the session's recorded backend. This is a routing bug — the same\n * session id was reused across two different backend implementations — and\n * is not retryable without picking the right backend.\n */\nexport class SessionMismatchError extends AgentEvalError {\n readonly sessionBackend: string\n readonly requestedBackend: string\n\n constructor(sessionBackend: string, requestedBackend: string, options?: { cause?: unknown }) {\n super(\n 'validation',\n `Cannot resume ${sessionBackend} session with ${requestedBackend} backend`,\n options,\n )\n this.sessionBackend = sessionBackend\n this.requestedBackend = requestedBackend\n }\n}\n\n/**\n * @stable\n *\n * A backend transport call (HTTP, gRPC, sidecar IPC) failed with a non-success\n * status. Distinct from `JudgeError` (which is structural / unrecoverable)\n * because backend failures are sometimes retryable and consumers may want to\n * branch on the upstream status code.\n */\nexport class BackendTransportError extends AgentEvalError {\n readonly backend: string\n readonly status?: number\n /**\n * Truncated upstream response body (≤2 KiB) when available. Diagnostic\n * only — surfaces in `backend_error.error.body` and `final.error.body`\n * so operators can see \"free_tier_limit\", \"invalid_api_key\", etc. without\n * cracking the log line open.\n */\n readonly body?: string\n\n constructor(\n backend: string,\n message: string,\n options?: { cause?: unknown; status?: number; body?: string },\n ) {\n super('config', message, options)\n this.backend = backend\n this.status = options?.status\n this.body = options?.body\n }\n}\n\n/**\n * @stable\n *\n * A runtime-run lifecycle method was called in an order the state machine does\n * not allow: `persist()` before `complete()`, `complete()` twice, etc.\n */\nexport class RuntimeRunStateError extends AgentEvalError {\n constructor(message: string, options?: { cause?: unknown }) {\n super('validation', message, options)\n }\n}\n\n/**\n * @stable\n *\n * The dynamic-loop planner returned an unusable topology move — the LLM emitted\n * no parseable envelope, an unknown `kind`, or a structurally-invalid move\n * (e.g. a fanout with zero tasks). This is a structural failure of the\n * agent-authored topology, not a config mistake: the planner ran but its output\n * cannot drive the kernel. Carries `validation` so cross-package handlers can\n * pattern-match without importing the runtime. Fail loud — never substitute a\n * default move, or the loop silently runs a topology nobody chose.\n */\nexport class PlannerError extends AgentEvalError {\n constructor(message: string, options?: { cause?: unknown }) {\n super('validation', message, options)\n }\n}\n\n/**\n * The analyst loop could not read or run over a round's trace — e.g. an empty round\n * (no iterations to analyze) or a malformed trace projection. Fail loud: a silent empty\n * store would mask a broken capture path and the driver would steer on nothing.\n */\nexport class AnalystError extends AgentEvalError {\n constructor(message: string, options?: { cause?: unknown }) {\n super('validation', message, options)\n }\n}\n","/**\n * Sandbox-event → runtime-event mapping.\n *\n * The sandbox SDK emits a polymorphic `SandboxEvent = { type, data, id? }`\n * whose `type` vocabulary is backend-determined (opencode, etc.) rather than\n * enumerated by the SDK. Two consumers project it:\n * - the loop kernel's cost ledger (`extractLlmCallEvent`) — sums usage off\n * every cost-bearing event, regardless of stream shape;\n * - the `AgentRuntime.act` streaming contract (`mapSandboxEvent`) — projects\n * incremental events to the `RuntimeStreamEvent` chat-UX vocabulary.\n *\n * Both live here so the empirically-observed `type` vocabulary has one home.\n */\n\nimport type { SandboxEvent } from '@tangle-network/sandbox'\nimport type { RuntimeStreamEvent } from '../types'\n\n/**\n * Extract a `RuntimeStreamEvent`-shaped `llm_call` from a sandbox event when\n * the event carries usage/cost data. Returns `undefined` for non-cost events\n * so the kernel can iterate the full stream without branching.\n *\n * Canonical cost-carrying types observed in the wild:\n * - `llm_call` — `data: { model, tokensIn, tokensOut, costUsd, ... }`\n * - `message.completed` / `result` — `data: { usage: { inputTokens,\n * outputTokens, totalCostUsd? } }`\n * - `cost.usage` / `usage` — same shape under a dedicated type\n *\n * Numeric coercion is strict: `Number.isFinite` gates every accumulator write\n * so a sentinel `NaN` from a misbehaving backend cannot poison the ledger.\n */\nexport function extractLlmCallEvent(\n event: SandboxEvent,\n agentRunName: string,\n): (RuntimeStreamEvent & { type: 'llm_call' }) | undefined {\n if (!event || typeof event !== 'object') return undefined\n const type = String(event.type ?? '')\n const data =\n event.data && typeof event.data === 'object'\n ? (event.data as Record<string, unknown>)\n : ({} as Record<string, unknown>)\n\n if (type === 'llm_call' || type === 'cost.usage' || type === 'usage') {\n return buildLlmCall(data, agentRunName)\n }\n if (type === 'message.completed' || type === 'result' || type === 'final') {\n const usage = data.usage as Record<string, unknown> | undefined\n if (!usage || typeof usage !== 'object') return undefined\n return buildLlmCall({ ...usage, model: data.model ?? usage.model }, agentRunName)\n }\n // sandbox 0.4.0 terminal event: `data = { tokenUsage: { inputTokens, outputTokens,\n // reasoningTokens, cacheReadInputTokens }, totalCostUsd }`. Usage lives under\n // `tokenUsage` (not `usage`) and the cost is top-level — neither matched the\n // branches above, so an in-process loopDispatch run reported {0,0} and the\n // backend-integrity guard misread a real run as a stub. Reasoning tokens are\n // billed output (reasoning models), so they fold into the output count.\n if (type === 'done') {\n const usage = data.tokenUsage as Record<string, unknown> | undefined\n if (!usage || typeof usage !== 'object') return undefined\n const out = pickFiniteNumber(usage, ['outputTokens', 'completion_tokens', 'tokensOut'])\n const reasoning = pickFiniteNumber(usage, ['reasoningTokens'])\n const mergedOut =\n out !== undefined || reasoning !== undefined ? (out ?? 0) + (reasoning ?? 0) : undefined\n return buildLlmCall(\n {\n inputTokens: usage.inputTokens,\n outputTokens: mergedOut,\n totalCostUsd: data.totalCostUsd,\n model: data.model ?? usage.model,\n },\n agentRunName,\n )\n }\n return undefined\n}\n\nfunction buildLlmCall(\n data: Record<string, unknown>,\n agentRunName: string,\n): (RuntimeStreamEvent & { type: 'llm_call' }) | undefined {\n const tokensIn = pickFiniteNumber(data, ['tokensIn', 'inputTokens', 'prompt_tokens'])\n const tokensOut = pickFiniteNumber(data, ['tokensOut', 'outputTokens', 'completion_tokens'])\n const costUsd = pickFiniteNumber(data, ['costUsd', 'totalCostUsd', 'cost_usd', 'cost'])\n if (tokensIn === undefined && tokensOut === undefined && costUsd === undefined) {\n return undefined\n }\n const model = typeof data.model === 'string' && data.model.length > 0 ? data.model : agentRunName\n const event: RuntimeStreamEvent & { type: 'llm_call' } = {\n type: 'llm_call',\n model,\n }\n if (tokensIn !== undefined) event.tokensIn = tokensIn\n if (tokensOut !== undefined) event.tokensOut = tokensOut\n if (costUsd !== undefined) event.costUsd = costUsd\n return event\n}\n\nfunction pickFiniteNumber(data: Record<string, unknown>, keys: string[]): number | undefined {\n for (const key of keys) {\n const value = data[key]\n if (typeof value === 'number' && Number.isFinite(value)) return value\n }\n return undefined\n}\n\n/**\n * Project one `SandboxEvent` onto the `RuntimeStreamEvent` chat-UX vocabulary,\n * for runtimes that bridge a sandbox `streamPrompt` into the\n * `AgentRuntime.act` streaming contract. Returns `undefined` for events that\n * have no faithful projection — the raw stream is preserved separately for the\n * `OutputAdapter`, so an unmapped event never loses data.\n *\n * Mapped (the task-optional incremental variants — no synthesized task\n * lifecycle, no guessed tool-part shapes):\n * - `message.part.updated` text part → `text_delta`\n * - `message.part.updated` reasoning/thinking part → `reasoning_delta`\n * - cost-bearing events → `llm_call` (shared with the ledger extractor)\n *\n * The opencode backend emits incremental text as\n * `{ type: 'message.part.updated', data: { part: { type, text }, delta } }`;\n * `delta` is the increment, `part.text` the running accumulation.\n */\nexport function mapSandboxEvent(\n event: SandboxEvent,\n opts: { agentRunName?: string } = {},\n): RuntimeStreamEvent | undefined {\n if (!event || typeof event !== 'object') return undefined\n const type = String(event.type ?? '')\n const data =\n event.data && typeof event.data === 'object'\n ? (event.data as Record<string, unknown>)\n : ({} as Record<string, unknown>)\n\n if (type === 'message.part.updated') {\n const part =\n data.part && typeof data.part === 'object' ? (data.part as Record<string, unknown>) : {}\n const partType = String(part.type ?? '')\n const delta = typeof data.delta === 'string' ? data.delta : undefined\n const text = delta ?? (typeof part.text === 'string' ? part.text : undefined)\n if (text === undefined) return undefined\n if (partType === 'text') return { type: 'text_delta', text }\n if (partType === 'reasoning' || partType === 'thinking')\n return { type: 'reasoning_delta', text }\n return undefined\n }\n\n return extractLlmCallEvent(event, opts.agentRunName ?? 'agent')\n}\n","/**\n * @experimental\n *\n * Internal loop-kernel utilities shared across the kernel, drivers, and the\n * sandbox-acquire layer. Not part of the public barrel surface.\n */\n\nimport type { SandboxInstance } from '@tangle-network/sandbox'\nimport type { LoopTokenUsage } from './types'\n\n/**\n * Best-effort sandbox delete. Skips instances without a `delete` (test fakes);\n * swallows errors (the platform reaps on expiry). Returns `false` when delete\n * threw, `true` otherwise, so callers can surface a leak if they choose.\n */\nexport async function deleteBoxSafe(box: SandboxInstance | undefined): Promise<boolean> {\n if (!box || typeof (box as { delete?: unknown }).delete !== 'function') return true\n try {\n await box.delete()\n return true\n } catch {\n return false\n }\n}\n\n/** Short base36 id for trace correlation. Not cryptographic, not collision-free. */\nexport function randomSuffix(len = 8): string {\n return Math.random()\n .toString(36)\n .slice(2, 2 + len)\n}\n\n/** Collision-resistant id for sandbox naming (find-by-name recovery must be unique). */\nexport function randomUuid(): string {\n return crypto.randomUUID()\n}\n\n/** Construct an AbortError. Downstream code pattern-matches on `err.name`. */\nexport function abortError(): Error {\n const err = new Error('aborted')\n err.name = 'AbortError'\n return err\n}\n\n/** Throw an AbortError. */\nexport function throwAbort(): never {\n throw abortError()\n}\n\n/** Throw if the signal is already aborted; otherwise no-op. */\nexport function throwIfAborted(signal: AbortSignal | undefined): void {\n if (signal?.aborted) throw abortError()\n}\n\n/**\n * Sleep that resolves early on abort and always clears its timer so it never\n * keeps the event loop alive. Resolves (does not reject) on abort — callers\n * re-check the signal explicitly after the sleep.\n */\nexport function sleep(ms: number, signal?: AbortSignal): Promise<void> {\n return new Promise((resolve) => {\n if (signal?.aborted) {\n resolve()\n return\n }\n let onAbort: (() => void) | undefined\n const timer = setTimeout(() => {\n if (onAbort && signal) signal.removeEventListener('abort', onAbort)\n resolve()\n }, ms)\n if (signal) {\n onAbort = () => {\n clearTimeout(timer)\n resolve()\n }\n signal.addEventListener('abort', onAbort, { once: true })\n }\n })\n}\n\n/**\n * Race a promise against a timeout. Resolves with the value if it settles in\n * time, otherwise resolves with `undefined`. Always clears the timer.\n */\nexport function withTimeout<T>(promise: Promise<T>, ms: number): Promise<T | undefined> {\n return new Promise<T | undefined>((resolve) => {\n const timer = setTimeout(() => resolve(undefined), ms)\n promise.then(\n (value) => {\n clearTimeout(timer)\n resolve(value)\n },\n () => {\n clearTimeout(timer)\n resolve(undefined)\n },\n )\n })\n}\n\ninterface StringifyOptions {\n /** Pretty-print with 2-space indent. Default false (compact). */\n pretty?: boolean\n /** Truncate to this many chars, appending `…`. Default unbounded. */\n max?: number\n}\n\n/**\n * `JSON.stringify` with a `String()` fallback on throw (cyclic / non-JSON).\n * Strings pass through unstringified so a preview of a string output is the\n * string itself, not a quoted re-encoding.\n */\nexport function stringifySafe(value: unknown, opts: StringifyOptions = {}): string {\n let s: string\n try {\n if (typeof value === 'string') {\n s = value\n } else {\n const json = opts.pretty ? JSON.stringify(value, null, 2) : JSON.stringify(value)\n s = json ?? String(value)\n }\n } catch {\n s = String(value)\n }\n if (opts.max !== undefined && s.length > opts.max) return `${s.slice(0, opts.max)}…`\n return s\n}\n\n/** A fresh zero token-usage accumulator. */\nexport function zeroTokenUsage(): LoopTokenUsage {\n return { input: 0, output: 0 }\n}\n\n/** Add `delta` into `acc` in place. Missing fields count as zero. */\nexport function addTokenUsage(acc: LoopTokenUsage, delta: Partial<LoopTokenUsage>): void {\n acc.input += delta.input ?? 0\n acc.output += delta.output ?? 0\n}\n\n/**\n * Map `items` through `fn` with at most `limit` calls in flight at once,\n * preserving input order in the result. On the first `fn` rejection no NEW\n * items are picked up; already-in-flight calls are awaited, then the first\n * error is rethrown. `limit` is clamped to ≥ 1.\n *\n * Used where a burst of provisioning (e.g. forking N child boxes) must respect\n * the loop's concurrency bound instead of firing all N at once.\n */\nexport async function mapWithConcurrency<T, R>(\n items: readonly T[],\n limit: number,\n fn: (item: T, index: number) => Promise<R>,\n): Promise<R[]> {\n const bound = Math.max(1, Math.floor(limit))\n const results = new Array<R>(items.length)\n let next = 0\n let failed = false\n const worker = async (): Promise<void> => {\n while (!failed) {\n const i = next\n next += 1\n if (i >= items.length) return\n try {\n results[i] = await fn(items[i] as T, i)\n } catch (err) {\n failed = true\n throw err\n }\n }\n }\n const workerCount = Math.min(bound, items.length)\n await Promise.all(Array.from({ length: workerCount }, () => worker()))\n return results\n}\n"],"mappings":";AAsBA,SAAS,sBAAsB;AAE/B;AAAA,EACE,kBAAAA;AAAA,EAEA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAUA,IAAM,uBAAN,cAAmC,eAAe;AAAA,EAC9C;AAAA,EACA;AAAA,EAET,YAAY,gBAAwB,kBAA0B,SAA+B;AAC3F;AAAA,MACE;AAAA,MACA,iBAAiB,cAAc,iBAAiB,gBAAgB;AAAA,MAChE;AAAA,IACF;AACA,SAAK,iBAAiB;AACtB,SAAK,mBAAmB;AAAA,EAC1B;AACF;AAUO,IAAM,wBAAN,cAAoC,eAAe;AAAA,EAC/C;AAAA,EACA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA;AAAA,EAET,YACE,SACA,SACA,SACA;AACA,UAAM,UAAU,SAAS,OAAO;AAChC,SAAK,UAAU;AACf,SAAK,SAAS,SAAS;AACvB,SAAK,OAAO,SAAS;AAAA,EACvB;AACF;AAQO,IAAM,uBAAN,cAAmC,eAAe;AAAA,EACvD,YAAY,SAAiB,SAA+B;AAC1D,UAAM,cAAc,SAAS,OAAO;AAAA,EACtC;AACF;AAaO,IAAM,eAAN,cAA2B,eAAe;AAAA,EAC/C,YAAY,SAAiB,SAA+B;AAC1D,UAAM,cAAc,SAAS,OAAO;AAAA,EACtC;AACF;AAOO,IAAM,eAAN,cAA2B,eAAe;AAAA,EAC/C,YAAY,SAAiB,SAA+B;AAC1D,UAAM,cAAc,SAAS,OAAO;AAAA,EACtC;AACF;;;ACjGO,SAAS,oBACd,OACA,cACyD;AACzD,MAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAChD,QAAM,OAAO,OAAO,MAAM,QAAQ,EAAE;AACpC,QAAM,OACJ,MAAM,QAAQ,OAAO,MAAM,SAAS,WAC/B,MAAM,OACN,CAAC;AAER,MAAI,SAAS,cAAc,SAAS,gBAAgB,SAAS,SAAS;AACpE,WAAO,aAAa,MAAM,YAAY;AAAA,EACxC;AACA,MAAI,SAAS,uBAAuB,SAAS,YAAY,SAAS,SAAS;AACzE,UAAM,QAAQ,KAAK;AACnB,QAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAChD,WAAO,aAAa,EAAE,GAAG,OAAO,OAAO,KAAK,SAAS,MAAM,MAAM,GAAG,YAAY;AAAA,EAClF;AAOA,MAAI,SAAS,QAAQ;AACnB,UAAM,QAAQ,KAAK;AACnB,QAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAChD,UAAM,MAAM,iBAAiB,OAAO,CAAC,gBAAgB,qBAAqB,WAAW,CAAC;AACtF,UAAM,YAAY,iBAAiB,OAAO,CAAC,iBAAiB,CAAC;AAC7D,UAAM,YACJ,QAAQ,UAAa,cAAc,UAAa,OAAO,MAAM,aAAa,KAAK;AACjF,WAAO;AAAA,MACL;AAAA,QACE,aAAa,MAAM;AAAA,QACnB,cAAc;AAAA,QACd,cAAc,KAAK;AAAA,QACnB,OAAO,KAAK,SAAS,MAAM;AAAA,MAC7B;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,aACP,MACA,cACyD;AACzD,QAAM,WAAW,iBAAiB,MAAM,CAAC,YAAY,eAAe,eAAe,CAAC;AACpF,QAAM,YAAY,iBAAiB,MAAM,CAAC,aAAa,gBAAgB,mBAAmB,CAAC;AAC3F,QAAM,UAAU,iBAAiB,MAAM,CAAC,WAAW,gBAAgB,YAAY,MAAM,CAAC;AACtF,MAAI,aAAa,UAAa,cAAc,UAAa,YAAY,QAAW;AAC9E,WAAO;AAAA,EACT;AACA,QAAM,QAAQ,OAAO,KAAK,UAAU,YAAY,KAAK,MAAM,SAAS,IAAI,KAAK,QAAQ;AACrF,QAAM,QAAmD;AAAA,IACvD,MAAM;AAAA,IACN;AAAA,EACF;AACA,MAAI,aAAa,OAAW,OAAM,WAAW;AAC7C,MAAI,cAAc,OAAW,OAAM,YAAY;AAC/C,MAAI,YAAY,OAAW,OAAM,UAAU;AAC3C,SAAO;AACT;AAEA,SAAS,iBAAiB,MAA+B,MAAoC;AAC3F,aAAW,OAAO,MAAM;AACtB,UAAM,QAAQ,KAAK,GAAG;AACtB,QAAI,OAAO,UAAU,YAAY,OAAO,SAAS,KAAK,EAAG,QAAO;AAAA,EAClE;AACA,SAAO;AACT;AAmBO,SAAS,gBACd,OACA,OAAkC,CAAC,GACH;AAChC,MAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAChD,QAAM,OAAO,OAAO,MAAM,QAAQ,EAAE;AACpC,QAAM,OACJ,MAAM,QAAQ,OAAO,MAAM,SAAS,WAC/B,MAAM,OACN,CAAC;AAER,MAAI,SAAS,wBAAwB;AACnC,UAAM,OACJ,KAAK,QAAQ,OAAO,KAAK,SAAS,WAAY,KAAK,OAAmC,CAAC;AACzF,UAAM,WAAW,OAAO,KAAK,QAAQ,EAAE;AACvC,UAAM,QAAQ,OAAO,KAAK,UAAU,WAAW,KAAK,QAAQ;AAC5D,UAAM,OAAO,UAAU,OAAO,KAAK,SAAS,WAAW,KAAK,OAAO;AACnE,QAAI,SAAS,OAAW,QAAO;AAC/B,QAAI,aAAa,OAAQ,QAAO,EAAE,MAAM,cAAc,KAAK;AAC3D,QAAI,aAAa,eAAe,aAAa;AAC3C,aAAO,EAAE,MAAM,mBAAmB,KAAK;AACzC,WAAO;AAAA,EACT;AAEA,SAAO,oBAAoB,OAAO,KAAK,gBAAgB,OAAO;AAChE;;;ACpIA,eAAsB,cAAc,KAAoD;AACtF,MAAI,CAAC,OAAO,OAAQ,IAA6B,WAAW,WAAY,QAAO;AAC/E,MAAI;AACF,UAAM,IAAI,OAAO;AACjB,WAAO;AAAA,EACT,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAGO,SAAS,aAAa,MAAM,GAAW;AAC5C,SAAO,KAAK,OAAO,EAChB,SAAS,EAAE,EACX,MAAM,GAAG,IAAI,GAAG;AACrB;AAGO,SAAS,aAAqB;AACnC,SAAO,OAAO,WAAW;AAC3B;AAGO,SAAS,aAAoB;AAClC,QAAM,MAAM,IAAI,MAAM,SAAS;AAC/B,MAAI,OAAO;AACX,SAAO;AACT;AAGO,SAAS,aAAoB;AAClC,QAAM,WAAW;AACnB;AAGO,SAAS,eAAe,QAAuC;AACpE,MAAI,QAAQ,QAAS,OAAM,WAAW;AACxC;AAOO,SAAS,MAAM,IAAY,QAAqC;AACrE,SAAO,IAAI,QAAQ,CAAC,YAAY;AAC9B,QAAI,QAAQ,SAAS;AACnB,cAAQ;AACR;AAAA,IACF;AACA,QAAI;AACJ,UAAM,QAAQ,WAAW,MAAM;AAC7B,UAAI,WAAW,OAAQ,QAAO,oBAAoB,SAAS,OAAO;AAClE,cAAQ;AAAA,IACV,GAAG,EAAE;AACL,QAAI,QAAQ;AACV,gBAAU,MAAM;AACd,qBAAa,KAAK;AAClB,gBAAQ;AAAA,MACV;AACA,aAAO,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK,CAAC;AAAA,IAC1D;AAAA,EACF,CAAC;AACH;AAMO,SAAS,YAAe,SAAqB,IAAoC;AACtF,SAAO,IAAI,QAAuB,CAAC,YAAY;AAC7C,UAAM,QAAQ,WAAW,MAAM,QAAQ,MAAS,GAAG,EAAE;AACrD,YAAQ;AAAA,MACN,CAAC,UAAU;AACT,qBAAa,KAAK;AAClB,gBAAQ,KAAK;AAAA,MACf;AAAA,MACA,MAAM;AACJ,qBAAa,KAAK;AAClB,gBAAQ,MAAS;AAAA,MACnB;AAAA,IACF;AAAA,EACF,CAAC;AACH;AAcO,SAAS,cAAc,OAAgB,OAAyB,CAAC,GAAW;AACjF,MAAI;AACJ,MAAI;AACF,QAAI,OAAO,UAAU,UAAU;AAC7B,UAAI;AAAA,IACN,OAAO;AACL,YAAM,OAAO,KAAK,SAAS,KAAK,UAAU,OAAO,MAAM,CAAC,IAAI,KAAK,UAAU,KAAK;AAChF,UAAI,QAAQ,OAAO,KAAK;AAAA,IAC1B;AAAA,EACF,QAAQ;AACN,QAAI,OAAO,KAAK;AAAA,EAClB;AACA,MAAI,KAAK,QAAQ,UAAa,EAAE,SAAS,KAAK,IAAK,QAAO,GAAG,EAAE,MAAM,GAAG,KAAK,GAAG,CAAC;AACjF,SAAO;AACT;AAGO,SAAS,iBAAiC;AAC/C,SAAO,EAAE,OAAO,GAAG,QAAQ,EAAE;AAC/B;AAGO,SAAS,cAAc,KAAqB,OAAsC;AACvF,MAAI,SAAS,MAAM,SAAS;AAC5B,MAAI,UAAU,MAAM,UAAU;AAChC;AAWA,eAAsB,mBACpB,OACA,OACA,IACc;AACd,QAAM,QAAQ,KAAK,IAAI,GAAG,KAAK,MAAM,KAAK,CAAC;AAC3C,QAAM,UAAU,IAAI,MAAS,MAAM,MAAM;AACzC,MAAI,OAAO;AACX,MAAI,SAAS;AACb,QAAM,SAAS,YAA2B;AACxC,WAAO,CAAC,QAAQ;AACd,YAAM,IAAI;AACV,cAAQ;AACR,UAAI,KAAK,MAAM,OAAQ;AACvB,UAAI;AACF,gBAAQ,CAAC,IAAI,MAAM,GAAG,MAAM,CAAC,GAAQ,CAAC;AAAA,MACxC,SAAS,KAAK;AACZ,iBAAS;AACT,cAAM;AAAA,MACR;AAAA,IACF;AAAA,EACF;AACA,QAAM,cAAc,KAAK,IAAI,OAAO,MAAM,MAAM;AAChD,QAAM,QAAQ,IAAI,MAAM,KAAK,EAAE,QAAQ,YAAY,GAAG,MAAM,OAAO,CAAC,CAAC;AACrE,SAAO;AACT;","names":["AgentEvalError"]}
|
|
@@ -1,7 +1,3 @@
|
|
|
1
|
-
import {
|
|
2
|
-
createFanoutVoteDriver
|
|
3
|
-
} from "./chunk-PY6NMZYX.js";
|
|
4
|
-
|
|
5
1
|
// src/profiles/coder.ts
|
|
6
2
|
var DEFAULT_MAX_DIFF_LINES = 400;
|
|
7
3
|
function coderProfile(options = {}) {
|
|
@@ -38,7 +34,11 @@ function multiHarnessCoderFanout(options = {}) {
|
|
|
38
34
|
return agentRunSpec;
|
|
39
35
|
});
|
|
40
36
|
const { output, validator } = coderProfile();
|
|
41
|
-
const driver =
|
|
37
|
+
const driver = {
|
|
38
|
+
name: "fanout",
|
|
39
|
+
plan: async (task, history) => history.length === 0 ? agentRuns.map(() => task) : [],
|
|
40
|
+
decide: (history) => history.some((i) => i.verdict?.valid === true) ? "pick-winner" : "fail"
|
|
41
|
+
};
|
|
42
42
|
return { agentRuns, output, validator, driver };
|
|
43
43
|
}
|
|
44
44
|
var DEFAULT_CODER_SYSTEM_PROMPT = [
|
|
@@ -261,4 +261,4 @@ export {
|
|
|
261
261
|
multiHarnessCoderFanout,
|
|
262
262
|
createCoderValidator
|
|
263
263
|
};
|
|
264
|
-
//# sourceMappingURL=chunk-
|
|
264
|
+
//# sourceMappingURL=chunk-QR4UUC5P.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/profiles/coder.ts"],"sourcesContent":["/**\n * @experimental\n *\n * `coderProfile` — opinionated preset for code-modification tasks.\n *\n * The agent is told to:\n * - work on a fresh branch inside the sandbox workspace\n * - keep the patch minimal (under `maxDiffLines`)\n * - avoid `forbiddenPaths`\n * - run `testCmd` and `typecheckCmd`\n * - emit a final JSON result the output adapter parses\n *\n * The profile is stateless and agent-agnostic — `harness` selects the\n * sandbox-SDK backend (`claude-code`, `codex`, `opencode/*`). For\n * heterogeneous fanout, use `multiHarnessCoderFanout`.\n */\n\nimport type { AgentProfile, SandboxEvent } from '@tangle-network/sandbox'\nimport type {\n AgentRunSpec,\n DefaultVerdict,\n Driver,\n OutputAdapter,\n Validator,\n} from '../runtime/types'\n\nconst DEFAULT_MAX_DIFF_LINES = 400\n\n/** @experimental */\nexport interface CoderTask {\n /** What the agent must accomplish. Free-form prose. */\n goal: string\n /** Absolute path inside the sandbox where the repo lives. */\n repoRoot: string\n /** Default `main`. The branch the agent diffs against. */\n baseBranch?: string\n /** Default `pnpm test --run`. */\n testCmd?: string\n /** Default `pnpm typecheck`. */\n typecheckCmd?: string\n /** Files the agent may inspect for context. Surfaced verbatim in the prompt. */\n contextFiles?: string[]\n /**\n * Paths the agent must not touch. Validator hard-fails on any match.\n * Use glob-free literal path prefixes for unambiguous enforcement.\n */\n forbiddenPaths?: string[]\n /** Default 400. Hard cap; validator hard-fails when exceeded. */\n maxDiffLines?: number\n}\n\n/** @experimental */\nexport interface CoderOutput {\n /** Branch the agent wrote the patch on. */\n branch: string\n /** Unified diff (`git diff <base>..HEAD`). */\n patch: string\n testResult: { passed: boolean; output: string }\n typecheckResult: { passed: boolean; output: string }\n diffStats: { filesChanged: number; insertions: number; deletions: number }\n /** Optional reviewer commentary surfaced by the agent. */\n reviewerNotes?: string\n}\n\n/** @experimental */\nexport interface CoderProfileOptions {\n /** Sandbox-SDK backend.type. Default `'claude-code'`. */\n harness?: string\n /** Default model id passed in `AgentProfile.model.default`. */\n model?: string\n /** Custom system prompt replacement. Default = built-in coder preset. */\n systemPrompt?: string\n /** Stable name for `AgentRunSpec.name`. Default = `coder-${harness}`. */\n name?: string\n}\n\n/**\n * Build a coder preset.\n *\n * `validator` enforces test + typecheck + a 400-line default diff cap. For\n * per-task `forbiddenPaths` / `maxDiffLines` enforcement, pass `task` here\n * — the returned validator closes over its constraints. Without a task\n * the validator falls back to the default cap and skips path enforcement.\n *\n * @experimental\n */\nexport function coderProfile(options: CoderProfileOptions & { task?: CoderTask } = {}): {\n profile: AgentProfile\n taskToPrompt: (task: CoderTask) => string\n output: OutputAdapter<CoderOutput>\n validator: Validator<CoderOutput>\n agentRunSpec: AgentRunSpec<CoderTask>\n} {\n const harness = options.harness ?? 'claude-code'\n const name = options.name ?? `coder-${harness}`\n const systemPrompt = options.systemPrompt ?? DEFAULT_CODER_SYSTEM_PROMPT\n const profile: AgentProfile = {\n name,\n description: 'Code-modification agent. Minimal-diff worktree-based coder.',\n prompt: { systemPrompt },\n model: options.model ? { default: options.model } : undefined,\n tools: { git: true, fs: true, shell: true, test_runner: true },\n metadata: { backendType: harness, role: 'coder' },\n }\n const output: OutputAdapter<CoderOutput> = { parse: parseCoderEvents }\n const validator: Validator<CoderOutput> = options.task\n ? createCoderValidator(options.task)\n : createCoderValidator({\n goal: '',\n repoRoot: '',\n forbiddenPaths: [],\n maxDiffLines: DEFAULT_MAX_DIFF_LINES,\n })\n const agentRunSpec: AgentRunSpec<CoderTask> = {\n name,\n profile,\n taskToPrompt: formatCoderPrompt,\n }\n return { profile, taskToPrompt: formatCoderPrompt, output, validator, agentRunSpec }\n}\n\n/** @experimental */\nexport interface MultiHarnessCoderFanoutOptions {\n /**\n * Sandbox-SDK backend.type identifiers, one per parallel agent. Default:\n * `['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']`.\n */\n harnesses?: string[]\n /** Optional per-harness model override. Indexed parallel to `harnesses`. */\n models?: (string | undefined)[]\n}\n\n/** @experimental */\nexport function multiHarnessCoderFanout(options: MultiHarnessCoderFanoutOptions = {}): {\n agentRuns: AgentRunSpec<CoderTask>[]\n output: OutputAdapter<CoderOutput>\n validator: Validator<CoderOutput>\n driver: Driver<CoderTask, CoderOutput, 'pick-winner' | 'fail'>\n} {\n const harnesses =\n options.harnesses && options.harnesses.length > 0\n ? options.harnesses\n : ['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']\n const models = options.models ?? []\n const agentRuns = harnesses.map((harness, i) => {\n const { agentRunSpec } = coderProfile({ harness, model: models[i] })\n return agentRunSpec\n })\n const { output, validator } = coderProfile()\n const driver: Driver<CoderTask, CoderOutput, 'pick-winner' | 'fail'> = {\n name: 'fanout',\n plan: async (task, history) => (history.length === 0 ? agentRuns.map(() => task) : []),\n decide: (history) => (history.some((i) => i.verdict?.valid === true) ? 'pick-winner' : 'fail'),\n }\n return { agentRuns, output, validator, driver }\n}\n\nconst DEFAULT_CODER_SYSTEM_PROMPT = [\n 'You are a coder agent operating inside an isolated sandbox workspace.',\n 'Your job is to deliver a minimal, correct patch for the user-supplied goal.',\n '',\n 'Hard rules:',\n ' 1. Work on a fresh branch off the supplied base. Do not mutate the base branch.',\n ' 2. Never touch a forbidden path. The user will list them explicitly.',\n ' 3. Keep the diff under the max-diff cap. Prefer the smallest change that ships.',\n ' 4. Run the supplied test and typecheck commands before declaring done.',\n ' 5. If either command fails, fix the cause — do not weaken the test or hide the error.',\n '',\n 'When you finish, emit a single final structured message of the shape:',\n ' ```json',\n ' { \"branch\": \"<branch-name>\",',\n ' \"patch\": \"<unified-diff>\",',\n ' \"testResult\": { \"passed\": <bool>, \"output\": \"<stdout/stderr>\" },',\n ' \"typecheckResult\": { \"passed\": <bool>, \"output\": \"<stdout/stderr>\" },',\n ' \"diffStats\": { \"filesChanged\": <int>, \"insertions\": <int>, \"deletions\": <int> },',\n ' \"reviewerNotes\": \"<optional commentary>\" }',\n ' ```',\n].join('\\n')\n\nfunction formatCoderPrompt(task: CoderTask): string {\n const base = task.baseBranch ?? 'main'\n const testCmd = task.testCmd ?? 'pnpm test --run'\n const typecheckCmd = task.typecheckCmd ?? 'pnpm typecheck'\n const maxDiff = task.maxDiffLines ?? DEFAULT_MAX_DIFF_LINES\n const forbidden = task.forbiddenPaths?.length ? task.forbiddenPaths.join(', ') : '(none)'\n const context = task.contextFiles?.length\n ? task.contextFiles.map((f) => ` - ${f}`).join('\\n')\n : ' (none)'\n return [\n `Goal: ${task.goal}`,\n `Repo: ${task.repoRoot}`,\n `Base branch: ${base}`,\n `Run tests with: ${testCmd}`,\n `Run typecheck with: ${typecheckCmd}`,\n `Forbidden paths: ${forbidden}`,\n `Max diff lines: ${maxDiff}`,\n 'Context files:',\n context,\n '',\n 'Produce a minimal patch on a fresh branch. Run tests and typecheck before',\n 'returning. Emit the final JSON result block exactly as instructed.',\n ].join('\\n')\n}\n\n/**\n * Walk the event stream and return the last structured `coder.result` payload.\n *\n * The agent is instructed to emit a JSON block; in practice the sandbox SDK\n * lifts the structured payload onto `data.result` of a `result` / `final`\n * event. When the event stream does not contain a structured result, the\n * adapter scans text deltas for a fenced JSON block matching the expected\n * keys. Both shapes converge on `CoderOutput`.\n */\nfunction parseCoderEvents(events: SandboxEvent[]): CoderOutput {\n for (let i = events.length - 1; i >= 0; i -= 1) {\n const event = events[i]\n if (!event) continue\n const type = String(event.type ?? '')\n const data = isRecord(event.data) ? event.data : {}\n if (type === 'result' || type === 'final' || type === 'coder.result') {\n const direct = coerceCoderOutput(data.result ?? data.output ?? data)\n if (direct) return direct\n }\n }\n // Fallback: scan text deltas in reverse for a fenced JSON block.\n for (let i = events.length - 1; i >= 0; i -= 1) {\n const event = events[i]\n if (!event) continue\n const data = isRecord(event.data) ? event.data : {}\n const text = pickString(data.text) ?? pickString(data.delta)\n if (!text) continue\n const fenced = extractFencedJson(text)\n if (!fenced) continue\n const coerced = coerceCoderOutput(fenced)\n if (coerced) return coerced\n }\n return {\n branch: '',\n patch: '',\n testResult: { passed: false, output: '' },\n typecheckResult: { passed: false, output: '' },\n diffStats: { filesChanged: 0, insertions: 0, deletions: 0 },\n }\n}\n\n/**\n * Build a validator that closes over a specific `CoderTask`'s constraints.\n *\n * Checks in order:\n * 1. Forbidden-path: any `+++` / `---` header in the patch matching a\n * path prefix in `task.forbiddenPaths` fails hard.\n * 2. Diff size: line count above `task.maxDiffLines` (default 400) fails\n * hard; below cap, the score shrinks linearly.\n * 3. Tests: `output.testResult.passed` must be `true`.\n * 4. Typecheck: `output.typecheckResult.passed` must be `true`.\n *\n * Aggregate score: `0.5 * tests + 0.3 * typecheck + 0.2 * (1 - diffLines/maxDiff)`.\n * `valid` is the conjunction of all four.\n *\n * @experimental\n */\n/**\n * Default-on safety floor (folded from the ai-trading-blueprint delegation\n * MCP): a coder patch that touches a credential-shaped path is rejected\n * regardless of `forbiddenPaths` config. Catches `.env`, private keys,\n * keystores, wallets, and the common secret/credential JSON files.\n */\nconst SECRET_PATH_RE =\n /(^|\\/)(\\.env(\\.|$)|.*\\.(pem|key|p12|pfx|keystore|wallet)|id_rsa|id_ed25519|secrets?\\.json|credentials?\\.json)$/i\n\nexport function createCoderValidator(task: CoderTask): Validator<CoderOutput> {\n const maxDiff = task.maxDiffLines ?? DEFAULT_MAX_DIFF_LINES\n const forbidden = task.forbiddenPaths ?? []\n return {\n async validate(output) {\n const scores: Record<string, number> = {}\n const notes: string[] = []\n let pass = true\n\n const touched = touchedPathsFromPatch(output.patch)\n\n // No-op rejection: an empty patch can trivially \"pass\" tests/typecheck\n // (nothing changed) yet does no work — never a valid coder result.\n if (touched.length === 0 || output.patch.trim().length === 0) {\n pass = false\n scores.nonEmpty = 0\n notes.push('empty patch — no files changed')\n } else {\n scores.nonEmpty = 1\n }\n\n // Secret-path floor: always-on, independent of `forbiddenPaths`.\n const touchedSecrets = touched.filter((p) => SECRET_PATH_RE.test(p))\n if (touchedSecrets.length > 0) {\n pass = false\n scores.noSecrets = 0\n notes.push(`touched secret-shaped paths: ${touchedSecrets.join(', ')}`)\n } else {\n scores.noSecrets = 1\n }\n\n const touchedForbidden = forbidden.filter((path) => {\n const prefix = path.endsWith('/') ? path : `${path}/`\n const exact = prefix.slice(0, -1)\n return touched.some((p) => p === exact || p.startsWith(prefix))\n })\n if (touchedForbidden.length > 0) {\n pass = false\n scores.forbiddenPath = 0\n notes.push(`touched forbidden paths: ${touchedForbidden.join(', ')}`)\n } else {\n scores.forbiddenPath = 1\n }\n\n const diffLines = countDiffLines(output.patch)\n if (diffLines > maxDiff) {\n pass = false\n scores.diffSize = 0\n notes.push(`diff ${diffLines} lines exceeds cap ${maxDiff}`)\n } else {\n scores.diffSize = maxDiff === 0 ? 0 : Math.max(0, 1 - diffLines / maxDiff)\n }\n\n scores.tests = output.testResult.passed ? 1 : 0\n scores.typecheck = output.typecheckResult.passed ? 1 : 0\n if (!output.testResult.passed) {\n pass = false\n notes.push('tests failed')\n }\n if (!output.typecheckResult.passed) {\n pass = false\n notes.push('typecheck failed')\n }\n\n const score = 0.5 * scores.tests + 0.3 * scores.typecheck + 0.2 * scores.diffSize\n const verdict: DefaultVerdict = {\n valid: pass,\n score: Number.isFinite(score) ? score : 0,\n scores,\n }\n if (notes.length > 0) verdict.notes = notes.join('; ')\n return verdict\n },\n }\n}\n\nfunction touchedPathsFromPatch(patch: string): string[] {\n const out = new Set<string>()\n for (const line of patch.split(/\\r?\\n/)) {\n if (line.startsWith('+++ ') || line.startsWith('--- ')) {\n const rest = line.slice(4).trim()\n if (rest === '/dev/null') continue\n const stripped = rest.startsWith('a/') || rest.startsWith('b/') ? rest.slice(2) : rest\n out.add(stripped)\n }\n }\n return [...out]\n}\n\nfunction countDiffLines(patch: string): number {\n let count = 0\n for (const line of patch.split(/\\r?\\n/)) {\n if (\n (line.startsWith('+') || line.startsWith('-')) &&\n !line.startsWith('+++') &&\n !line.startsWith('---')\n ) {\n count += 1\n }\n }\n return count\n}\n\nfunction isRecord(value: unknown): value is Record<string, unknown> {\n return value !== null && typeof value === 'object' && !Array.isArray(value)\n}\n\nfunction pickString(value: unknown): string | undefined {\n return typeof value === 'string' && value.length > 0 ? value : undefined\n}\n\nfunction extractFencedJson(text: string): unknown | undefined {\n const match = text.match(/```(?:json)?\\s*([\\s\\S]*?)```/i)\n if (!match) return undefined\n const body = (match[1] ?? '').trim()\n if (!body) return undefined\n try {\n return JSON.parse(body)\n } catch {\n return undefined\n }\n}\n\nfunction coerceCoderOutput(value: unknown): CoderOutput | undefined {\n if (!isRecord(value)) return undefined\n const branch = pickString(value.branch)\n const patch = pickString(value.patch) ?? ''\n if (branch === undefined) return undefined\n const testResult = coerceCmdResult(value.testResult)\n const typecheckResult = coerceCmdResult(value.typecheckResult)\n const diffStats = coerceDiffStats(value.diffStats)\n return {\n branch,\n patch,\n testResult,\n typecheckResult,\n diffStats,\n reviewerNotes: pickString(value.reviewerNotes),\n }\n}\n\nfunction coerceCmdResult(value: unknown): { passed: boolean; output: string } {\n if (!isRecord(value)) return { passed: false, output: '' }\n return {\n passed: value.passed === true,\n output: pickString(value.output) ?? '',\n }\n}\n\nfunction coerceDiffStats(value: unknown): {\n filesChanged: number\n insertions: number\n deletions: number\n} {\n if (!isRecord(value)) return { filesChanged: 0, insertions: 0, deletions: 0 }\n return {\n filesChanged: toFiniteInt(value.filesChanged),\n insertions: toFiniteInt(value.insertions),\n deletions: toFiniteInt(value.deletions),\n }\n}\n\nfunction toFiniteInt(value: unknown): number {\n if (typeof value !== 'number') return 0\n if (!Number.isFinite(value)) return 0\n return Math.max(0, Math.trunc(value))\n}\n"],"mappings":";AA0BA,IAAM,yBAAyB;AA4DxB,SAAS,aAAa,UAAsD,CAAC,GAMlF;AACA,QAAM,UAAU,QAAQ,WAAW;AACnC,QAAM,OAAO,QAAQ,QAAQ,SAAS,OAAO;AAC7C,QAAM,eAAe,QAAQ,gBAAgB;AAC7C,QAAM,UAAwB;AAAA,IAC5B;AAAA,IACA,aAAa;AAAA,IACb,QAAQ,EAAE,aAAa;AAAA,IACvB,OAAO,QAAQ,QAAQ,EAAE,SAAS,QAAQ,MAAM,IAAI;AAAA,IACpD,OAAO,EAAE,KAAK,MAAM,IAAI,MAAM,OAAO,MAAM,aAAa,KAAK;AAAA,IAC7D,UAAU,EAAE,aAAa,SAAS,MAAM,QAAQ;AAAA,EAClD;AACA,QAAM,SAAqC,EAAE,OAAO,iBAAiB;AACrE,QAAM,YAAoC,QAAQ,OAC9C,qBAAqB,QAAQ,IAAI,IACjC,qBAAqB;AAAA,IACnB,MAAM;AAAA,IACN,UAAU;AAAA,IACV,gBAAgB,CAAC;AAAA,IACjB,cAAc;AAAA,EAChB,CAAC;AACL,QAAM,eAAwC;AAAA,IAC5C;AAAA,IACA;AAAA,IACA,cAAc;AAAA,EAChB;AACA,SAAO,EAAE,SAAS,cAAc,mBAAmB,QAAQ,WAAW,aAAa;AACrF;AAcO,SAAS,wBAAwB,UAA0C,CAAC,GAKjF;AACA,QAAM,YACJ,QAAQ,aAAa,QAAQ,UAAU,SAAS,IAC5C,QAAQ,YACR,CAAC,eAAe,SAAS,kCAAkC;AACjE,QAAM,SAAS,QAAQ,UAAU,CAAC;AAClC,QAAM,YAAY,UAAU,IAAI,CAAC,SAAS,MAAM;AAC9C,UAAM,EAAE,aAAa,IAAI,aAAa,EAAE,SAAS,OAAO,OAAO,CAAC,EAAE,CAAC;AACnE,WAAO;AAAA,EACT,CAAC;AACD,QAAM,EAAE,QAAQ,UAAU,IAAI,aAAa;AAC3C,QAAM,SAAiE;AAAA,IACrE,MAAM;AAAA,IACN,MAAM,OAAO,MAAM,YAAa,QAAQ,WAAW,IAAI,UAAU,IAAI,MAAM,IAAI,IAAI,CAAC;AAAA,IACpF,QAAQ,CAAC,YAAa,QAAQ,KAAK,CAAC,MAAM,EAAE,SAAS,UAAU,IAAI,IAAI,gBAAgB;AAAA,EACzF;AACA,SAAO,EAAE,WAAW,QAAQ,WAAW,OAAO;AAChD;AAEA,IAAM,8BAA8B;AAAA,EAClC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,EAAE,KAAK,IAAI;AAEX,SAAS,kBAAkB,MAAyB;AAClD,QAAM,OAAO,KAAK,cAAc;AAChC,QAAM,UAAU,KAAK,WAAW;AAChC,QAAM,eAAe,KAAK,gBAAgB;AAC1C,QAAM,UAAU,KAAK,gBAAgB;AACrC,QAAM,YAAY,KAAK,gBAAgB,SAAS,KAAK,eAAe,KAAK,IAAI,IAAI;AACjF,QAAM,UAAU,KAAK,cAAc,SAC/B,KAAK,aAAa,IAAI,CAAC,MAAM,OAAO,CAAC,EAAE,EAAE,KAAK,IAAI,IAClD;AACJ,SAAO;AAAA,IACL,SAAS,KAAK,IAAI;AAAA,IAClB,SAAS,KAAK,QAAQ;AAAA,IACtB,gBAAgB,IAAI;AAAA,IACpB,mBAAmB,OAAO;AAAA,IAC1B,uBAAuB,YAAY;AAAA,IACnC,oBAAoB,SAAS;AAAA,IAC7B,mBAAmB,OAAO;AAAA,IAC1B;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EAAE,KAAK,IAAI;AACb;AAWA,SAAS,iBAAiB,QAAqC;AAC7D,WAAS,IAAI,OAAO,SAAS,GAAG,KAAK,GAAG,KAAK,GAAG;AAC9C,UAAM,QAAQ,OAAO,CAAC;AACtB,QAAI,CAAC,MAAO;AACZ,UAAM,OAAO,OAAO,MAAM,QAAQ,EAAE;AACpC,UAAM,OAAO,SAAS,MAAM,IAAI,IAAI,MAAM,OAAO,CAAC;AAClD,QAAI,SAAS,YAAY,SAAS,WAAW,SAAS,gBAAgB;AACpE,YAAM,SAAS,kBAAkB,KAAK,UAAU,KAAK,UAAU,IAAI;AACnE,UAAI,OAAQ,QAAO;AAAA,IACrB;AAAA,EACF;AAEA,WAAS,IAAI,OAAO,SAAS,GAAG,KAAK,GAAG,KAAK,GAAG;AAC9C,UAAM,QAAQ,OAAO,CAAC;AACtB,QAAI,CAAC,MAAO;AACZ,UAAM,OAAO,SAAS,MAAM,IAAI,IAAI,MAAM,OAAO,CAAC;AAClD,UAAM,OAAO,WAAW,KAAK,IAAI,KAAK,WAAW,KAAK,KAAK;AAC3D,QAAI,CAAC,KAAM;AACX,UAAM,SAAS,kBAAkB,IAAI;AACrC,QAAI,CAAC,OAAQ;AACb,UAAM,UAAU,kBAAkB,MAAM;AACxC,QAAI,QAAS,QAAO;AAAA,EACtB;AACA,SAAO;AAAA,IACL,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,YAAY,EAAE,QAAQ,OAAO,QAAQ,GAAG;AAAA,IACxC,iBAAiB,EAAE,QAAQ,OAAO,QAAQ,GAAG;AAAA,IAC7C,WAAW,EAAE,cAAc,GAAG,YAAY,GAAG,WAAW,EAAE;AAAA,EAC5D;AACF;AAwBA,IAAM,iBACJ;AAEK,SAAS,qBAAqB,MAAyC;AAC5E,QAAM,UAAU,KAAK,gBAAgB;AACrC,QAAM,YAAY,KAAK,kBAAkB,CAAC;AAC1C,SAAO;AAAA,IACL,MAAM,SAAS,QAAQ;AACrB,YAAM,SAAiC,CAAC;AACxC,YAAM,QAAkB,CAAC;AACzB,UAAI,OAAO;AAEX,YAAM,UAAU,sBAAsB,OAAO,KAAK;AAIlD,UAAI,QAAQ,WAAW,KAAK,OAAO,MAAM,KAAK,EAAE,WAAW,GAAG;AAC5D,eAAO;AACP,eAAO,WAAW;AAClB,cAAM,KAAK,qCAAgC;AAAA,MAC7C,OAAO;AACL,eAAO,WAAW;AAAA,MACpB;AAGA,YAAM,iBAAiB,QAAQ,OAAO,CAAC,MAAM,eAAe,KAAK,CAAC,CAAC;AACnE,UAAI,eAAe,SAAS,GAAG;AAC7B,eAAO;AACP,eAAO,YAAY;AACnB,cAAM,KAAK,gCAAgC,eAAe,KAAK,IAAI,CAAC,EAAE;AAAA,MACxE,OAAO;AACL,eAAO,YAAY;AAAA,MACrB;AAEA,YAAM,mBAAmB,UAAU,OAAO,CAAC,SAAS;AAClD,cAAM,SAAS,KAAK,SAAS,GAAG,IAAI,OAAO,GAAG,IAAI;AAClD,cAAM,QAAQ,OAAO,MAAM,GAAG,EAAE;AAChC,eAAO,QAAQ,KAAK,CAAC,MAAM,MAAM,SAAS,EAAE,WAAW,MAAM,CAAC;AAAA,MAChE,CAAC;AACD,UAAI,iBAAiB,SAAS,GAAG;AAC/B,eAAO;AACP,eAAO,gBAAgB;AACvB,cAAM,KAAK,4BAA4B,iBAAiB,KAAK,IAAI,CAAC,EAAE;AAAA,MACtE,OAAO;AACL,eAAO,gBAAgB;AAAA,MACzB;AAEA,YAAM,YAAY,eAAe,OAAO,KAAK;AAC7C,UAAI,YAAY,SAAS;AACvB,eAAO;AACP,eAAO,WAAW;AAClB,cAAM,KAAK,QAAQ,SAAS,sBAAsB,OAAO,EAAE;AAAA,MAC7D,OAAO;AACL,eAAO,WAAW,YAAY,IAAI,IAAI,KAAK,IAAI,GAAG,IAAI,YAAY,OAAO;AAAA,MAC3E;AAEA,aAAO,QAAQ,OAAO,WAAW,SAAS,IAAI;AAC9C,aAAO,YAAY,OAAO,gBAAgB,SAAS,IAAI;AACvD,UAAI,CAAC,OAAO,WAAW,QAAQ;AAC7B,eAAO;AACP,cAAM,KAAK,cAAc;AAAA,MAC3B;AACA,UAAI,CAAC,OAAO,gBAAgB,QAAQ;AAClC,eAAO;AACP,cAAM,KAAK,kBAAkB;AAAA,MAC/B;AAEA,YAAM,QAAQ,MAAM,OAAO,QAAQ,MAAM,OAAO,YAAY,MAAM,OAAO;AACzE,YAAM,UAA0B;AAAA,QAC9B,OAAO;AAAA,QACP,OAAO,OAAO,SAAS,KAAK,IAAI,QAAQ;AAAA,QACxC;AAAA,MACF;AACA,UAAI,MAAM,SAAS,EAAG,SAAQ,QAAQ,MAAM,KAAK,IAAI;AACrD,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAEA,SAAS,sBAAsB,OAAyB;AACtD,QAAM,MAAM,oBAAI,IAAY;AAC5B,aAAW,QAAQ,MAAM,MAAM,OAAO,GAAG;AACvC,QAAI,KAAK,WAAW,MAAM,KAAK,KAAK,WAAW,MAAM,GAAG;AACtD,YAAM,OAAO,KAAK,MAAM,CAAC,EAAE,KAAK;AAChC,UAAI,SAAS,YAAa;AAC1B,YAAM,WAAW,KAAK,WAAW,IAAI,KAAK,KAAK,WAAW,IAAI,IAAI,KAAK,MAAM,CAAC,IAAI;AAClF,UAAI,IAAI,QAAQ;AAAA,IAClB;AAAA,EACF;AACA,SAAO,CAAC,GAAG,GAAG;AAChB;AAEA,SAAS,eAAe,OAAuB;AAC7C,MAAI,QAAQ;AACZ,aAAW,QAAQ,MAAM,MAAM,OAAO,GAAG;AACvC,SACG,KAAK,WAAW,GAAG,KAAK,KAAK,WAAW,GAAG,MAC5C,CAAC,KAAK,WAAW,KAAK,KACtB,CAAC,KAAK,WAAW,KAAK,GACtB;AACA,eAAS;AAAA,IACX;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,SAAS,OAAkD;AAClE,SAAO,UAAU,QAAQ,OAAO,UAAU,YAAY,CAAC,MAAM,QAAQ,KAAK;AAC5E;AAEA,SAAS,WAAW,OAAoC;AACtD,SAAO,OAAO,UAAU,YAAY,MAAM,SAAS,IAAI,QAAQ;AACjE;AAEA,SAAS,kBAAkB,MAAmC;AAC5D,QAAM,QAAQ,KAAK,MAAM,+BAA+B;AACxD,MAAI,CAAC,MAAO,QAAO;AACnB,QAAM,QAAQ,MAAM,CAAC,KAAK,IAAI,KAAK;AACnC,MAAI,CAAC,KAAM,QAAO;AAClB,MAAI;AACF,WAAO,KAAK,MAAM,IAAI;AAAA,EACxB,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,SAAS,kBAAkB,OAAyC;AAClE,MAAI,CAAC,SAAS,KAAK,EAAG,QAAO;AAC7B,QAAM,SAAS,WAAW,MAAM,MAAM;AACtC,QAAM,QAAQ,WAAW,MAAM,KAAK,KAAK;AACzC,MAAI,WAAW,OAAW,QAAO;AACjC,QAAM,aAAa,gBAAgB,MAAM,UAAU;AACnD,QAAM,kBAAkB,gBAAgB,MAAM,eAAe;AAC7D,QAAM,YAAY,gBAAgB,MAAM,SAAS;AACjD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,eAAe,WAAW,MAAM,aAAa;AAAA,EAC/C;AACF;AAEA,SAAS,gBAAgB,OAAqD;AAC5E,MAAI,CAAC,SAAS,KAAK,EAAG,QAAO,EAAE,QAAQ,OAAO,QAAQ,GAAG;AACzD,SAAO;AAAA,IACL,QAAQ,MAAM,WAAW;AAAA,IACzB,QAAQ,WAAW,MAAM,MAAM,KAAK;AAAA,EACtC;AACF;AAEA,SAAS,gBAAgB,OAIvB;AACA,MAAI,CAAC,SAAS,KAAK,EAAG,QAAO,EAAE,cAAc,GAAG,YAAY,GAAG,WAAW,EAAE;AAC5E,SAAO;AAAA,IACL,cAAc,YAAY,MAAM,YAAY;AAAA,IAC5C,YAAY,YAAY,MAAM,UAAU;AAAA,IACxC,WAAW,YAAY,MAAM,SAAS;AAAA,EACxC;AACF;AAEA,SAAS,YAAY,OAAwB;AAC3C,MAAI,OAAO,UAAU,SAAU,QAAO;AACtC,MAAI,CAAC,OAAO,SAAS,KAAK,EAAG,QAAO;AACpC,SAAO,KAAK,IAAI,GAAG,KAAK,MAAM,KAAK,CAAC;AACtC;","names":[]}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
// src/profiles/ui-auditor/substrate.ts
|
|
2
|
+
var UI_LENSES = [
|
|
3
|
+
"consistency",
|
|
4
|
+
"hierarchy",
|
|
5
|
+
"layout",
|
|
6
|
+
"ux-flow",
|
|
7
|
+
"duplication",
|
|
8
|
+
"accessibility",
|
|
9
|
+
"responsive",
|
|
10
|
+
"states",
|
|
11
|
+
"content",
|
|
12
|
+
"interaction",
|
|
13
|
+
"performance-perceived",
|
|
14
|
+
"other"
|
|
15
|
+
];
|
|
16
|
+
var UI_FINDING_SEVERITIES = [
|
|
17
|
+
"critical",
|
|
18
|
+
"high",
|
|
19
|
+
"med",
|
|
20
|
+
"low"
|
|
21
|
+
];
|
|
22
|
+
|
|
23
|
+
export {
|
|
24
|
+
UI_LENSES,
|
|
25
|
+
UI_FINDING_SEVERITIES
|
|
26
|
+
};
|
|
27
|
+
//# sourceMappingURL=chunk-WIR4HOOJ.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/profiles/ui-auditor/substrate.ts"],"sourcesContent":["/**\n * @experimental\n *\n * UI audit finding shapes — the unit of evidence a contributor can act on.\n *\n * A finding describes a single, actionable UI problem: lens, severity,\n * route, observation, impact, suggested fix, and screenshot evidence.\n * Findings are produced by the auditor profile, persisted by the issue\n * writer as self-contained GitHub-issue Markdown, and surfaced over MCP.\n *\n * The shapes are deliberately constraining — the validator + writer\n * hard-fail on missing screenshot evidence, missing lens, missing title.\n */\n\n/**\n * Canonical audit lenses. Each lens scopes a finding to a single class of\n * problem so a single audit pass can iterate them without pile-on findings\n * under a generic label.\n */\nexport type UiLens =\n | 'consistency'\n | 'hierarchy'\n | 'layout'\n | 'ux-flow'\n | 'duplication'\n | 'accessibility'\n | 'responsive'\n | 'states'\n | 'content'\n | 'interaction'\n | 'performance-perceived'\n | 'other'\n\n/** Frozen tuple of lenses for validation + iteration. */\nexport const UI_LENSES: readonly UiLens[] = [\n 'consistency',\n 'hierarchy',\n 'layout',\n 'ux-flow',\n 'duplication',\n 'accessibility',\n 'responsive',\n 'states',\n 'content',\n 'interaction',\n 'performance-perceived',\n 'other',\n] as const\n\n/**\n * Severity scale.\n * - `critical` — blocks a core task or is an accessibility blocker.\n * - `high` — confusing, broken-looking, or noticeable friction.\n * - `med` — visible polish issue, would be caught in code review.\n * - `low` — nitpick worth fixing eventually.\n */\nexport type UiFindingSeverity = 'low' | 'med' | 'high' | 'critical'\n\n/** Frozen severity tuple, ordered worst → least bad for sort/report. */\nexport const UI_FINDING_SEVERITIES: readonly UiFindingSeverity[] = [\n 'critical',\n 'high',\n 'med',\n 'low',\n] as const\n\n/** Pointer to a screenshot referenced by a finding (workspace-relative path). */\nexport interface UiFindingScreenshot {\n path: string\n viewport?: string\n label?: string\n}\n\n/**\n * A single UI audit finding — the unit of work a contributor can act on.\n *\n * Every field except the documented optionals is required. The auditor\n * validator + writer hard-fail on missing screenshot evidence, missing\n * lens, missing title, etc.\n */\nexport interface UiFinding {\n /** Monotonic id assigned by the writer when persisting. Optional in-transit. */\n id?: number\n title: string\n lens: UiLens\n severity: UiFindingSeverity\n /** Logical route the finding was observed on (e.g. `home`, `checkout-step-2`). */\n route: string\n /** Fully qualified URL the finding was observed at. */\n url?: string\n /** Viewport string the offending capture was taken at (e.g. `1280x800`). */\n viewport?: string\n /** CSS selector pinning the offending element, when one can be identified. */\n selector?: string\n /** 1–3 sentences describing what the screenshot shows that is wrong. */\n observation: string\n /** Who is affected and how. */\n impact: string\n /** A specific change a contributor could apply without asking back. */\n suggestedFix: string\n /** Optional explicit reproduction steps. Writer synthesizes from route/url/selector when omitted. */\n reproSteps?: string\n /** Free-form tags. */\n tags?: readonly string[]\n /** Screenshot references — must be non-empty for actionable findings. */\n screenshots: readonly UiFindingScreenshot[]\n /** Cross-references to similar findings already on file, by id. */\n similarTo?: readonly number[]\n /** ISO-8601 creation timestamp set by the writer when persisted. */\n createdAt?: string\n}\n"],"mappings":";AAkCO,IAAM,YAA+B;AAAA,EAC1C;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAYO,IAAM,wBAAsD;AAAA,EACjE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;","names":[]}
|