@tangle-network/agent-runtime 0.38.0 → 0.40.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +265 -69
- package/dist/chunk-3WQJRSUJ.js +201 -0
- package/dist/chunk-3WQJRSUJ.js.map +1 -0
- package/dist/{chunk-M65QJD35.js → chunk-6HI3QUJD.js} +5 -3
- package/dist/{chunk-M65QJD35.js.map → chunk-6HI3QUJD.js.map} +1 -1
- package/dist/{chunk-Z523NPJK.js → chunk-7ZECSZ3C.js} +2 -59
- package/dist/chunk-7ZECSZ3C.js.map +1 -0
- package/dist/chunk-FNMGYYSS.js +60 -0
- package/dist/chunk-FNMGYYSS.js.map +1 -0
- package/dist/{chunk-V6GURW4W.js → chunk-HSX6PFZR.js} +1 -209
- package/dist/chunk-HSX6PFZR.js.map +1 -0
- package/dist/{chunk-7JBDJQLO.js → chunk-OISRXLWI.js} +8 -3
- package/dist/chunk-OISRXLWI.js.map +1 -0
- package/dist/chunk-VFKBIZTY.js +212 -0
- package/dist/chunk-VFKBIZTY.js.map +1 -0
- package/dist/{dynamic-DeOPeeAw.d.ts → dynamic-BT9Ji3jE.d.ts} +3 -1
- package/dist/improvement.d.ts +1 -1
- package/dist/index.d.ts +10 -147
- package/dist/index.js +23 -99
- package/dist/index.js.map +1 -1
- package/dist/{otel-export-CNmeg_7B.d.ts → kb-gate-C4tho31v.d.ts} +2 -191
- package/dist/loop-runner-bin-C1MuoT8c.d.ts +192 -0
- package/dist/loop-runner-bin.d.ts +12 -0
- package/dist/loop-runner-bin.js +19 -0
- package/dist/loop-runner-bin.js.map +1 -0
- package/dist/loops.d.ts +4 -4
- package/dist/loops.js +1 -1
- package/dist/mcp/bin.js +4 -3
- package/dist/mcp/bin.js.map +1 -1
- package/dist/mcp/index.d.ts +5 -3
- package/dist/mcp/index.js +12 -8
- package/dist/mcp/index.js.map +1 -1
- package/dist/{optimize-prompt-cmH9wZdH.d.ts → optimize-prompt-D-urF2wW.d.ts} +1 -1
- package/dist/otel-export-xgf4J6bo.d.ts +191 -0
- package/dist/profiles.d.ts +1 -1
- package/dist/{types-CmkQl8qE.d.ts → types-CNs7_1R3.d.ts} +8 -1
- package/package.json +3 -2
- package/dist/chunk-7JBDJQLO.js.map +0 -1
- package/dist/chunk-V6GURW4W.js.map +0 -1
- package/dist/chunk-Z523NPJK.js.map +0 -1
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
import {
|
|
2
|
+
runLoop
|
|
3
|
+
} from "./chunk-OISRXLWI.js";
|
|
4
|
+
import {
|
|
5
|
+
coderProfile,
|
|
6
|
+
multiHarnessCoderFanout
|
|
7
|
+
} from "./chunk-3HMHSN22.js";
|
|
8
|
+
|
|
9
|
+
// src/mcp/executor.ts
|
|
10
|
+
function createSiblingSandboxExecutor(options) {
|
|
11
|
+
const underlying = options.client;
|
|
12
|
+
const client = {
|
|
13
|
+
create(opts) {
|
|
14
|
+
return underlying.create(opts);
|
|
15
|
+
},
|
|
16
|
+
describePlacement(box) {
|
|
17
|
+
return { kind: "sibling", sandboxId: readId(box) };
|
|
18
|
+
}
|
|
19
|
+
};
|
|
20
|
+
return {
|
|
21
|
+
client,
|
|
22
|
+
describe() {
|
|
23
|
+
return "sibling-sandbox (each delegation = fresh sandbox via client.create)";
|
|
24
|
+
}
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
function createFleetWorkspaceExecutor(options) {
|
|
28
|
+
const fleet = options.fleet;
|
|
29
|
+
const exclude = new Set(options.excludeMachineIds ?? []);
|
|
30
|
+
let callIndex = 0;
|
|
31
|
+
const placementBySandboxId = /* @__PURE__ */ new Map();
|
|
32
|
+
const client = {
|
|
33
|
+
async create() {
|
|
34
|
+
const ids = fleet.ids.filter((id) => !exclude.has(id));
|
|
35
|
+
if (ids.length === 0) {
|
|
36
|
+
throw new Error(
|
|
37
|
+
`agent-runtime: fleet ${fleet.fleetId} has no eligible worker machines (ids=[${fleet.ids.join(",")}], excluded=[${[...exclude].join(",")}])`
|
|
38
|
+
);
|
|
39
|
+
}
|
|
40
|
+
const selector = options.selectMachine;
|
|
41
|
+
const machineId = selector ? selector({ callIndex, ids }) : ids[callIndex % ids.length];
|
|
42
|
+
callIndex += 1;
|
|
43
|
+
if (typeof machineId !== "string" || machineId.length === 0) {
|
|
44
|
+
throw new Error("agent-runtime: fleet executor selectMachine returned an empty machine id");
|
|
45
|
+
}
|
|
46
|
+
const box = await fleet.sandbox(machineId);
|
|
47
|
+
const sandboxId = readId(box);
|
|
48
|
+
if (sandboxId) placementBySandboxId.set(sandboxId, { machineId });
|
|
49
|
+
return box;
|
|
50
|
+
},
|
|
51
|
+
describePlacement(box) {
|
|
52
|
+
const sandboxId = readId(box);
|
|
53
|
+
const recorded = sandboxId ? placementBySandboxId.get(sandboxId) : void 0;
|
|
54
|
+
return {
|
|
55
|
+
kind: "fleet",
|
|
56
|
+
sandboxId,
|
|
57
|
+
fleetId: fleet.fleetId,
|
|
58
|
+
machineId: recorded?.machineId
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
};
|
|
62
|
+
return {
|
|
63
|
+
client,
|
|
64
|
+
describe() {
|
|
65
|
+
const excluded = exclude.size > 0 ? ` (excluded=[${[...exclude].join(",")}])` : "";
|
|
66
|
+
return `fleet-workspace (fleetId=${fleet.fleetId}, machines=[${fleet.ids.join(",")}]${excluded})`;
|
|
67
|
+
}
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
function readId(box) {
|
|
71
|
+
const raw = box.id;
|
|
72
|
+
return typeof raw === "string" && raw.length > 0 ? raw : void 0;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// src/mcp/delegates.ts
|
|
76
|
+
function createDefaultCoderDelegate(options) {
|
|
77
|
+
const executor = resolveExecutor(options);
|
|
78
|
+
const sandboxClient = executor.client;
|
|
79
|
+
const fanoutHarnesses = options.fanoutHarnesses;
|
|
80
|
+
const maxConcurrency = options.maxConcurrency ?? 4;
|
|
81
|
+
return async (args, ctx) => {
|
|
82
|
+
const task = {
|
|
83
|
+
goal: buildCoderGoal(args),
|
|
84
|
+
repoRoot: args.repoRoot,
|
|
85
|
+
testCmd: args.config?.testCmd,
|
|
86
|
+
typecheckCmd: args.config?.typecheckCmd,
|
|
87
|
+
forbiddenPaths: args.config?.forbiddenPaths,
|
|
88
|
+
maxDiffLines: args.config?.maxDiffLines
|
|
89
|
+
};
|
|
90
|
+
const variants = Math.max(1, Math.trunc(args.variants ?? 1));
|
|
91
|
+
ctx.report({ iteration: 0, phase: "starting" });
|
|
92
|
+
if (variants <= 1) {
|
|
93
|
+
const { agentRunSpec, output, validator } = coderProfile({ task });
|
|
94
|
+
const result2 = await runLoop({
|
|
95
|
+
driver: singleShotDriver,
|
|
96
|
+
agentRun: agentRunSpec,
|
|
97
|
+
output,
|
|
98
|
+
validator,
|
|
99
|
+
task,
|
|
100
|
+
ctx: { sandboxClient, signal: ctx.signal },
|
|
101
|
+
maxIterations: 1,
|
|
102
|
+
maxConcurrency
|
|
103
|
+
});
|
|
104
|
+
const chosen2 = await pickCoderWinner({
|
|
105
|
+
iterations: result2.iterations,
|
|
106
|
+
reviewer: options.reviewer,
|
|
107
|
+
selection: options.winnerSelection ?? "highest-score",
|
|
108
|
+
task,
|
|
109
|
+
signal: ctx.signal
|
|
110
|
+
});
|
|
111
|
+
if (!chosen2) throw new Error(noWinnerMessage(options.reviewer));
|
|
112
|
+
ctx.report({ iteration: 1, phase: "completed" });
|
|
113
|
+
return chosen2;
|
|
114
|
+
}
|
|
115
|
+
const fanout = multiHarnessCoderFanout(
|
|
116
|
+
fanoutHarnesses && fanoutHarnesses.length > 0 ? { harnesses: fanoutHarnesses.slice(0, variants) } : { harnesses: void 0 }
|
|
117
|
+
);
|
|
118
|
+
const agentRuns = fanout.agentRuns.slice(0, variants);
|
|
119
|
+
const result = await runLoop({
|
|
120
|
+
driver: fanout.driver,
|
|
121
|
+
agentRuns,
|
|
122
|
+
output: fanout.output,
|
|
123
|
+
validator: fanout.validator,
|
|
124
|
+
task,
|
|
125
|
+
ctx: { sandboxClient, signal: ctx.signal },
|
|
126
|
+
maxIterations: variants,
|
|
127
|
+
maxConcurrency: Math.min(maxConcurrency, variants)
|
|
128
|
+
});
|
|
129
|
+
const chosen = await pickCoderWinner({
|
|
130
|
+
iterations: result.iterations,
|
|
131
|
+
reviewer: options.reviewer,
|
|
132
|
+
selection: options.winnerSelection ?? "highest-score",
|
|
133
|
+
task,
|
|
134
|
+
signal: ctx.signal
|
|
135
|
+
});
|
|
136
|
+
if (!chosen) throw new Error(noWinnerMessage(options.reviewer));
|
|
137
|
+
ctx.report({ iteration: agentRuns.length, phase: "completed" });
|
|
138
|
+
return chosen;
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
async function pickCoderWinner(args) {
|
|
142
|
+
const valid = [];
|
|
143
|
+
for (const iter of args.iterations) {
|
|
144
|
+
if (iter.output === void 0 || iter.error || iter.verdict?.valid !== true) continue;
|
|
145
|
+
valid.push({
|
|
146
|
+
index: iter.index,
|
|
147
|
+
output: iter.output,
|
|
148
|
+
score: iter.verdict.score ?? 0,
|
|
149
|
+
readiness: iter.verdict.score ?? 0
|
|
150
|
+
});
|
|
151
|
+
}
|
|
152
|
+
if (valid.length === 0) return void 0;
|
|
153
|
+
let eligible = valid;
|
|
154
|
+
if (args.reviewer) {
|
|
155
|
+
eligible = [];
|
|
156
|
+
for (const c of valid) {
|
|
157
|
+
const review = await args.reviewer(c.output, args.task, { signal: args.signal });
|
|
158
|
+
if (review.approved) eligible.push({ ...c, readiness: review.readiness });
|
|
159
|
+
}
|
|
160
|
+
if (eligible.length === 0) return void 0;
|
|
161
|
+
}
|
|
162
|
+
return selectCoderCandidate(eligible, args.selection).output;
|
|
163
|
+
}
|
|
164
|
+
function selectCoderCandidate(candidates, selection) {
|
|
165
|
+
const diffLines = (c) => c.output.diffStats.insertions + c.output.diffStats.deletions;
|
|
166
|
+
const sorted = [...candidates].sort((a, b) => {
|
|
167
|
+
switch (selection) {
|
|
168
|
+
case "smallest-diff":
|
|
169
|
+
return diffLines(a) - diffLines(b) || a.index - b.index;
|
|
170
|
+
case "highest-readiness":
|
|
171
|
+
return b.readiness - a.readiness || a.index - b.index;
|
|
172
|
+
case "first-approved":
|
|
173
|
+
return a.index - b.index;
|
|
174
|
+
default:
|
|
175
|
+
return b.score - a.score || a.index - b.index;
|
|
176
|
+
}
|
|
177
|
+
});
|
|
178
|
+
return sorted[0];
|
|
179
|
+
}
|
|
180
|
+
function noWinnerMessage(reviewer) {
|
|
181
|
+
return reviewer ? "coder delegate: no candidate passed validation + review" : "coder delegate: no candidate passed validation";
|
|
182
|
+
}
|
|
183
|
+
function buildCoderGoal(args) {
|
|
184
|
+
if (!args.contextHint) return args.goal;
|
|
185
|
+
return [args.goal, "", "## Context", args.contextHint].join("\n");
|
|
186
|
+
}
|
|
187
|
+
function resolveExecutor(options) {
|
|
188
|
+
if (options.executor && options.sandboxClient) {
|
|
189
|
+
throw new Error("createDefaultCoderDelegate: pass exactly one of `executor` or `sandboxClient`");
|
|
190
|
+
}
|
|
191
|
+
if (options.executor) return options.executor;
|
|
192
|
+
if (options.sandboxClient) {
|
|
193
|
+
return createSiblingSandboxExecutor({ client: options.sandboxClient });
|
|
194
|
+
}
|
|
195
|
+
throw new Error("createDefaultCoderDelegate: `executor` or `sandboxClient` is required");
|
|
196
|
+
}
|
|
197
|
+
var singleShotDriver = {
|
|
198
|
+
name: "mcp-single-shot",
|
|
199
|
+
async plan(task, history) {
|
|
200
|
+
return history.length === 0 ? [task] : [];
|
|
201
|
+
},
|
|
202
|
+
decide(history) {
|
|
203
|
+
return history.length > 0 ? "pick-winner" : "fail";
|
|
204
|
+
}
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
export {
|
|
208
|
+
createSiblingSandboxExecutor,
|
|
209
|
+
createFleetWorkspaceExecutor,
|
|
210
|
+
createDefaultCoderDelegate
|
|
211
|
+
};
|
|
212
|
+
//# sourceMappingURL=chunk-VFKBIZTY.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/mcp/executor.ts","../src/mcp/delegates.ts"],"sourcesContent":["/**\n * @experimental\n *\n * Delegation executors — the layer between MCP delegates and the sandbox\n * substrate. Each executor exposes a {@link LoopSandboxClient} the kernel\n * consumes plus a placement tag so the trace pipeline can correlate workers\n * with their physical placement.\n *\n * Two implementations ship in-box:\n *\n * - {@link createSiblingSandboxExecutor} — every delegation spawns a fresh\n * sandbox sibling to the caller. Default when the MCP server runs as a\n * standalone CLI mounted outside a fleet.\n *\n * - {@link createFleetWorkspaceExecutor} — delegations dispatch onto machines\n * in the caller's existing fleet so worker diffs land directly on the\n * caller's filesystem (the fleet's shared workspace). Selected when the\n * parent sandbox passes `TANGLE_FLEET_ID` into the MCP server's env.\n */\n\nimport type { CreateSandboxOptions, SandboxInstance } from '@tangle-network/sandbox'\nimport type { LoopSandboxClient, LoopSandboxPlacement } from '../loops'\n\n/** @experimental */\nexport interface DelegationExecutor {\n /** Sandbox client the kernel calls. Returned with `describePlacement` set. */\n readonly client: LoopSandboxClient\n /** Best-effort one-liner used in stderr boot logs and diagnostics. */\n describe(): string\n}\n\n/** @experimental */\nexport interface SiblingSandboxExecutorOptions {\n client: LoopSandboxClient\n}\n\n/**\n * Wrap a raw sandbox SDK client so the kernel emits\n * `loop.iteration.dispatch` events with `{ placement: 'sibling', sandboxId }`.\n *\n * The returned client `.create()` delegates to the underlying client; the\n * only added behavior is a `describePlacement` tag the kernel reads.\n *\n * @experimental\n */\nexport function createSiblingSandboxExecutor(\n options: SiblingSandboxExecutorOptions,\n): DelegationExecutor {\n const underlying = options.client\n const client: LoopSandboxClient = {\n create(opts?: CreateSandboxOptions): Promise<SandboxInstance> {\n return underlying.create(opts)\n },\n describePlacement(box: SandboxInstance): LoopSandboxPlacement {\n return { kind: 'sibling', sandboxId: readId(box) }\n },\n }\n return {\n client,\n describe(): string {\n return 'sibling-sandbox (each delegation = fresh sandbox via client.create)'\n },\n }\n}\n\n/**\n * Minimal `SandboxFleet` surface the fleet executor calls. Declared\n * structurally so tests can pass an in-memory stub without instantiating the\n * sandbox SDK.\n *\n * @experimental\n */\nexport interface FleetHandle {\n readonly fleetId: string\n /** Machine ids in dispatch-eligible order. The executor round-robins. */\n readonly ids: ReadonlyArray<string>\n /** Resolve a machine id to its `SandboxInstance` — that machine is mounted\n * on the fleet's shared workspace, so any diff the worker writes lands on\n * every other fleet machine's filesystem too. */\n sandbox(machineId: string): Promise<SandboxInstance>\n}\n\n/** @experimental */\nexport interface FleetWorkspaceExecutorOptions {\n fleet: FleetHandle\n /**\n * Override the machine-selection policy. Default = round-robin across\n * `fleet.ids`, skipping the optional `excludeMachineIds` set (typically the\n * coordinator machine the MCP server is running on).\n */\n selectMachine?: (call: { callIndex: number; ids: ReadonlyArray<string> }) => string\n /**\n * Machine ids to skip during default round-robin. Set to the caller's own\n * machineId so workers don't compete with the orchestrator on the same VM.\n */\n excludeMachineIds?: ReadonlyArray<string>\n}\n\n/**\n * Build an executor that resolves each delegated iteration to an existing\n * machine in `fleet`. The fleet's shared-workspace policy means the worker\n * machine sees the caller's filesystem — diffs land in-place with no\n * cross-sandbox copy step.\n *\n * @experimental\n */\nexport function createFleetWorkspaceExecutor(\n options: FleetWorkspaceExecutorOptions,\n): DelegationExecutor {\n const fleet = options.fleet\n const exclude = new Set(options.excludeMachineIds ?? [])\n let callIndex = 0\n // machineId-by-sandboxId, populated as we resolve machines so\n // `describePlacement` can recover the assignment from the SandboxInstance\n // the kernel hands back.\n const placementBySandboxId = new Map<string, { machineId: string }>()\n\n const client: LoopSandboxClient = {\n async create(): Promise<SandboxInstance> {\n const ids = fleet.ids.filter((id) => !exclude.has(id))\n if (ids.length === 0) {\n throw new Error(\n `agent-runtime: fleet ${fleet.fleetId} has no eligible worker machines (ids=[${fleet.ids.join(',')}], excluded=[${[...exclude].join(',')}])`,\n )\n }\n const selector = options.selectMachine\n const machineId = selector ? selector({ callIndex, ids }) : ids[callIndex % ids.length]\n callIndex += 1\n if (typeof machineId !== 'string' || machineId.length === 0) {\n throw new Error('agent-runtime: fleet executor selectMachine returned an empty machine id')\n }\n const box = await fleet.sandbox(machineId)\n const sandboxId = readId(box)\n if (sandboxId) placementBySandboxId.set(sandboxId, { machineId })\n return box\n },\n describePlacement(box: SandboxInstance): LoopSandboxPlacement {\n const sandboxId = readId(box)\n const recorded = sandboxId ? placementBySandboxId.get(sandboxId) : undefined\n return {\n kind: 'fleet',\n sandboxId,\n fleetId: fleet.fleetId,\n machineId: recorded?.machineId,\n }\n },\n }\n\n return {\n client,\n describe(): string {\n const excluded = exclude.size > 0 ? ` (excluded=[${[...exclude].join(',')}])` : ''\n return `fleet-workspace (fleetId=${fleet.fleetId}, machines=[${fleet.ids.join(',')}]${excluded})`\n },\n }\n}\n\nfunction readId(box: SandboxInstance): string | undefined {\n const raw = (box as unknown as { id?: unknown }).id\n return typeof raw === 'string' && raw.length > 0 ? raw : undefined\n}\n","/**\n * @experimental\n *\n * Delegate factories — the layer between MCP tool handlers and the\n * underlying `runLoop` runners.\n *\n * The MCP server is profile-agnostic: it owns the task queue + feedback\n * store + transport. Each `*Delegate` is the closure that the queue\n * invokes when a task runs. Consumers can override either delegate to\n * inject custom drivers, mocks, fleet-aware dispatchers, etc.\n *\n * The default coder delegate is wired here because we own\n * `coderProfile` / `multiHarnessCoderFanout`. The default researcher\n * delegate is **not** wired in this file — `agent-knowledge` cannot be\n * imported from `agent-runtime` without inducing a cycle. Consumers\n * pass `researcherDelegate` explicitly when constructing the server.\n */\n\nimport type { Iteration, LoopSandboxClient } from '../loops'\nimport { runLoop } from '../loops'\nimport { type CoderOutput, coderProfile, multiHarnessCoderFanout } from '../profiles/coder'\nimport { createSiblingSandboxExecutor, type DelegationExecutor } from './executor'\nimport type {\n CoderTask,\n DelegateCodeArgs,\n DelegateResearchArgs,\n DelegationProgress,\n ResearchOutputShape,\n} from './types'\n\n/** @experimental */\nexport interface DelegateRunCtx {\n signal: AbortSignal\n report(progress: DelegationProgress): void\n}\n\n/** @experimental */\nexport type CoderDelegate = (\n args: DelegateCodeArgs,\n ctx: DelegateRunCtx,\n) => Promise<import('../profiles/coder').CoderOutput>\n\n/** @experimental */\nexport type ResearcherDelegate = (\n args: DelegateResearchArgs,\n ctx: DelegateRunCtx,\n) => Promise<ResearchOutputShape>\n\n/** @experimental Structured review verdict over a coder candidate. */\nexport interface CoderReview {\n /** Gate: only approved candidates are eligible to win. */\n approved: boolean\n /** Reviewer's recommendation — surfaced in traces. */\n recommendation: 'ship' | 'approve-with-nits' | 'changes-requested' | 'reject'\n /** Readiness 0..1, used by the `highest-readiness` winner-selection strategy. */\n readiness: number\n notes?: string\n}\n\n/**\n * @experimental\n *\n * Optional adversarial reviewer over a coder candidate that already passed\n * mechanical validation (tests/typecheck/forbidden/diff/no-op/secrets). Folded\n * from the ai-trading-blueprint delegation MCP: a candidate is only eligible to\n * win if the reviewer approves it. The reviewer is the consumer's seam — an LLM\n * judge, a `pnpm review` command, anything returning a `CoderReview`.\n */\nexport type CoderReviewer = (\n output: import('../profiles/coder').CoderOutput,\n task: CoderTask,\n ctx: { signal: AbortSignal },\n) => Promise<CoderReview> | CoderReview\n\n/**\n * @experimental Winner-selection strategy among validated (+ reviewed)\n * candidates. `highest-readiness` requires a `reviewer`. Default `highest-score`\n * (the kernel's behavior — preserves backward compatibility).\n */\nexport type CoderWinnerSelection =\n | 'highest-score'\n | 'smallest-diff'\n | 'highest-readiness'\n | 'first-approved'\n\n/** @experimental */\nexport interface CreateDefaultCoderDelegateOptions {\n /**\n * Execution placement. Pass a {@link DelegationExecutor} (sibling or fleet)\n * to control where worker iterations land. `sandboxClient` is a\n * convenience shorthand that wraps the client in a sibling executor — pass\n * one or the other, not both.\n */\n executor?: DelegationExecutor\n /**\n * Convenience shorthand for sibling placement. Equivalent to\n * `executor: createSiblingSandboxExecutor({ client: sandboxClient })`.\n */\n sandboxClient?: LoopSandboxClient\n /** Default `['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']` when variants > 1. */\n fanoutHarnesses?: string[]\n /** Hard cap on the kernel's per-batch concurrency. Default 4. */\n maxConcurrency?: number\n /**\n * Optional adversarial reviewer. When set, a candidate must pass mechanical\n * validation AND `reviewer.approved` to be eligible to win — empty/secret/\n * test-failing patches are already gone; this catches the \"compiles + passes\n * but wrong/unsafe\" class the deterministic validator can't see.\n */\n reviewer?: CoderReviewer\n /** Winner-selection strategy among eligible candidates. Default `highest-score`. */\n winnerSelection?: CoderWinnerSelection\n}\n\n/**\n * Build a coder delegate that drives `runLoop` against the project's\n * sandbox client + coder profile. When `args.variants > 1` it switches\n * to the multi-harness fanout topology.\n *\n * @experimental\n */\nexport function createDefaultCoderDelegate(\n options: CreateDefaultCoderDelegateOptions,\n): CoderDelegate {\n const executor = resolveExecutor(options)\n const sandboxClient = executor.client\n const fanoutHarnesses = options.fanoutHarnesses\n const maxConcurrency = options.maxConcurrency ?? 4\n return async (args, ctx) => {\n const task: CoderTask = {\n goal: buildCoderGoal(args),\n repoRoot: args.repoRoot,\n testCmd: args.config?.testCmd,\n typecheckCmd: args.config?.typecheckCmd,\n forbiddenPaths: args.config?.forbiddenPaths,\n maxDiffLines: args.config?.maxDiffLines,\n }\n const variants = Math.max(1, Math.trunc(args.variants ?? 1))\n ctx.report({ iteration: 0, phase: 'starting' })\n if (variants <= 1) {\n const { agentRunSpec, output, validator } = coderProfile({ task })\n const result = await runLoop({\n driver: singleShotDriver,\n agentRun: agentRunSpec,\n output,\n validator,\n task,\n ctx: { sandboxClient, signal: ctx.signal },\n maxIterations: 1,\n maxConcurrency,\n })\n const chosen = await pickCoderWinner({\n iterations: result.iterations,\n reviewer: options.reviewer,\n selection: options.winnerSelection ?? 'highest-score',\n task,\n signal: ctx.signal,\n })\n if (!chosen) throw new Error(noWinnerMessage(options.reviewer))\n ctx.report({ iteration: 1, phase: 'completed' })\n return chosen\n }\n const fanout = multiHarnessCoderFanout(\n fanoutHarnesses && fanoutHarnesses.length > 0\n ? { harnesses: fanoutHarnesses.slice(0, variants) }\n : { harnesses: undefined },\n )\n const agentRuns = fanout.agentRuns.slice(0, variants)\n const result = await runLoop({\n driver: fanout.driver,\n agentRuns,\n output: fanout.output,\n validator: fanout.validator,\n task,\n ctx: { sandboxClient, signal: ctx.signal },\n maxIterations: variants,\n maxConcurrency: Math.min(maxConcurrency, variants),\n })\n const chosen = await pickCoderWinner({\n iterations: result.iterations,\n reviewer: options.reviewer,\n selection: options.winnerSelection ?? 'highest-score',\n task,\n signal: ctx.signal,\n })\n if (!chosen) throw new Error(noWinnerMessage(options.reviewer))\n ctx.report({ iteration: agentRuns.length, phase: 'completed' })\n return chosen\n }\n}\n\ninterface PickCoderWinnerArgs {\n iterations: ReadonlyArray<Iteration<CoderTask, CoderOutput>>\n reviewer: CoderReviewer | undefined\n selection: CoderWinnerSelection\n task: CoderTask\n signal: AbortSignal\n}\n\ninterface CoderCandidate {\n index: number\n output: CoderOutput\n score: number\n readiness: number\n}\n\n/**\n * Pick the winning coder candidate from a finished loop's iterations:\n * 1. keep only mechanically-VALID candidates (the validator already gated\n * tests/typecheck/forbidden/diff/no-op/secrets),\n * 2. if a `reviewer` is wired, keep only those it APPROVES,\n * 3. select among survivors by the chosen strategy.\n * Returns `undefined` when nothing survives — the delegate fails loud.\n */\nasync function pickCoderWinner(args: PickCoderWinnerArgs): Promise<CoderOutput | undefined> {\n const valid: CoderCandidate[] = []\n for (const iter of args.iterations) {\n if (iter.output === undefined || iter.error || iter.verdict?.valid !== true) continue\n valid.push({\n index: iter.index,\n output: iter.output,\n score: iter.verdict.score ?? 0,\n readiness: iter.verdict.score ?? 0,\n })\n }\n if (valid.length === 0) return undefined\n\n let eligible = valid\n if (args.reviewer) {\n eligible = []\n for (const c of valid) {\n const review = await args.reviewer(c.output, args.task, { signal: args.signal })\n if (review.approved) eligible.push({ ...c, readiness: review.readiness })\n }\n if (eligible.length === 0) return undefined\n }\n\n return selectCoderCandidate(eligible, args.selection).output\n}\n\n/** Apply the winner-selection strategy; ties broken by earliest iteration. */\nfunction selectCoderCandidate(\n candidates: CoderCandidate[],\n selection: CoderWinnerSelection,\n): CoderCandidate {\n const diffLines = (c: CoderCandidate) =>\n c.output.diffStats.insertions + c.output.diffStats.deletions\n const sorted = [...candidates].sort((a, b) => {\n switch (selection) {\n case 'smallest-diff':\n return diffLines(a) - diffLines(b) || a.index - b.index\n case 'highest-readiness':\n return b.readiness - a.readiness || a.index - b.index\n case 'first-approved':\n return a.index - b.index\n default:\n return b.score - a.score || a.index - b.index\n }\n })\n return sorted[0]!\n}\n\nfunction noWinnerMessage(reviewer: CoderReviewer | undefined): string {\n return reviewer\n ? 'coder delegate: no candidate passed validation + review'\n : 'coder delegate: no candidate passed validation'\n}\n\nfunction buildCoderGoal(args: DelegateCodeArgs): string {\n if (!args.contextHint) return args.goal\n return [args.goal, '', '## Context', args.contextHint].join('\\n')\n}\n\nfunction resolveExecutor(options: CreateDefaultCoderDelegateOptions): DelegationExecutor {\n if (options.executor && options.sandboxClient) {\n throw new Error('createDefaultCoderDelegate: pass exactly one of `executor` or `sandboxClient`')\n }\n if (options.executor) return options.executor\n if (options.sandboxClient) {\n return createSiblingSandboxExecutor({ client: options.sandboxClient })\n }\n throw new Error('createDefaultCoderDelegate: `executor` or `sandboxClient` is required')\n}\n\n/**\n * Single-shot driver — plan one task on iteration 0, stop after one\n * iteration. Used by the coder delegate when `variants <= 1`. Keeps the\n * runLoop kernel-level accounting (timing, cost, trace emission) while\n * skipping fanout/refine topology overhead.\n */\nconst singleShotDriver = {\n name: 'mcp-single-shot',\n async plan<Task>(task: Task, history: ReadonlyArray<unknown>): Promise<Task[]> {\n return history.length === 0 ? [task] : []\n },\n decide(history: ReadonlyArray<unknown>): 'pick-winner' | 'fail' {\n return history.length > 0 ? 'pick-winner' : 'fail'\n },\n}\n"],"mappings":";;;;;;;;;AA6CO,SAAS,6BACd,SACoB;AACpB,QAAM,aAAa,QAAQ;AAC3B,QAAM,SAA4B;AAAA,IAChC,OAAO,MAAuD;AAC5D,aAAO,WAAW,OAAO,IAAI;AAAA,IAC/B;AAAA,IACA,kBAAkB,KAA4C;AAC5D,aAAO,EAAE,MAAM,WAAW,WAAW,OAAO,GAAG,EAAE;AAAA,IACnD;AAAA,EACF;AACA,SAAO;AAAA,IACL;AAAA,IACA,WAAmB;AACjB,aAAO;AAAA,IACT;AAAA,EACF;AACF;AA2CO,SAAS,6BACd,SACoB;AACpB,QAAM,QAAQ,QAAQ;AACtB,QAAM,UAAU,IAAI,IAAI,QAAQ,qBAAqB,CAAC,CAAC;AACvD,MAAI,YAAY;AAIhB,QAAM,uBAAuB,oBAAI,IAAmC;AAEpE,QAAM,SAA4B;AAAA,IAChC,MAAM,SAAmC;AACvC,YAAM,MAAM,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;AACrD,UAAI,IAAI,WAAW,GAAG;AACpB,cAAM,IAAI;AAAA,UACR,wBAAwB,MAAM,OAAO,0CAA0C,MAAM,IAAI,KAAK,GAAG,CAAC,gBAAgB,CAAC,GAAG,OAAO,EAAE,KAAK,GAAG,CAAC;AAAA,QAC1I;AAAA,MACF;AACA,YAAM,WAAW,QAAQ;AACzB,YAAM,YAAY,WAAW,SAAS,EAAE,WAAW,IAAI,CAAC,IAAI,IAAI,YAAY,IAAI,MAAM;AACtF,mBAAa;AACb,UAAI,OAAO,cAAc,YAAY,UAAU,WAAW,GAAG;AAC3D,cAAM,IAAI,MAAM,0EAA0E;AAAA,MAC5F;AACA,YAAM,MAAM,MAAM,MAAM,QAAQ,SAAS;AACzC,YAAM,YAAY,OAAO,GAAG;AAC5B,UAAI,UAAW,sBAAqB,IAAI,WAAW,EAAE,UAAU,CAAC;AAChE,aAAO;AAAA,IACT;AAAA,IACA,kBAAkB,KAA4C;AAC5D,YAAM,YAAY,OAAO,GAAG;AAC5B,YAAM,WAAW,YAAY,qBAAqB,IAAI,SAAS,IAAI;AACnE,aAAO;AAAA,QACL,MAAM;AAAA,QACN;AAAA,QACA,SAAS,MAAM;AAAA,QACf,WAAW,UAAU;AAAA,MACvB;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA,WAAmB;AACjB,YAAM,WAAW,QAAQ,OAAO,IAAI,eAAe,CAAC,GAAG,OAAO,EAAE,KAAK,GAAG,CAAC,OAAO;AAChF,aAAO,4BAA4B,MAAM,OAAO,eAAe,MAAM,IAAI,KAAK,GAAG,CAAC,IAAI,QAAQ;AAAA,IAChG;AAAA,EACF;AACF;AAEA,SAAS,OAAO,KAA0C;AACxD,QAAM,MAAO,IAAoC;AACjD,SAAO,OAAO,QAAQ,YAAY,IAAI,SAAS,IAAI,MAAM;AAC3D;;;ACvCO,SAAS,2BACd,SACe;AACf,QAAM,WAAW,gBAAgB,OAAO;AACxC,QAAM,gBAAgB,SAAS;AAC/B,QAAM,kBAAkB,QAAQ;AAChC,QAAM,iBAAiB,QAAQ,kBAAkB;AACjD,SAAO,OAAO,MAAM,QAAQ;AAC1B,UAAM,OAAkB;AAAA,MACtB,MAAM,eAAe,IAAI;AAAA,MACzB,UAAU,KAAK;AAAA,MACf,SAAS,KAAK,QAAQ;AAAA,MACtB,cAAc,KAAK,QAAQ;AAAA,MAC3B,gBAAgB,KAAK,QAAQ;AAAA,MAC7B,cAAc,KAAK,QAAQ;AAAA,IAC7B;AACA,UAAM,WAAW,KAAK,IAAI,GAAG,KAAK,MAAM,KAAK,YAAY,CAAC,CAAC;AAC3D,QAAI,OAAO,EAAE,WAAW,GAAG,OAAO,WAAW,CAAC;AAC9C,QAAI,YAAY,GAAG;AACjB,YAAM,EAAE,cAAc,QAAQ,UAAU,IAAI,aAAa,EAAE,KAAK,CAAC;AACjE,YAAMA,UAAS,MAAM,QAAQ;AAAA,QAC3B,QAAQ;AAAA,QACR,UAAU;AAAA,QACV;AAAA,QACA;AAAA,QACA;AAAA,QACA,KAAK,EAAE,eAAe,QAAQ,IAAI,OAAO;AAAA,QACzC,eAAe;AAAA,QACf;AAAA,MACF,CAAC;AACD,YAAMC,UAAS,MAAM,gBAAgB;AAAA,QACnC,YAAYD,QAAO;AAAA,QACnB,UAAU,QAAQ;AAAA,QAClB,WAAW,QAAQ,mBAAmB;AAAA,QACtC;AAAA,QACA,QAAQ,IAAI;AAAA,MACd,CAAC;AACD,UAAI,CAACC,QAAQ,OAAM,IAAI,MAAM,gBAAgB,QAAQ,QAAQ,CAAC;AAC9D,UAAI,OAAO,EAAE,WAAW,GAAG,OAAO,YAAY,CAAC;AAC/C,aAAOA;AAAA,IACT;AACA,UAAM,SAAS;AAAA,MACb,mBAAmB,gBAAgB,SAAS,IACxC,EAAE,WAAW,gBAAgB,MAAM,GAAG,QAAQ,EAAE,IAChD,EAAE,WAAW,OAAU;AAAA,IAC7B;AACA,UAAM,YAAY,OAAO,UAAU,MAAM,GAAG,QAAQ;AACpD,UAAM,SAAS,MAAM,QAAQ;AAAA,MAC3B,QAAQ,OAAO;AAAA,MACf;AAAA,MACA,QAAQ,OAAO;AAAA,MACf,WAAW,OAAO;AAAA,MAClB;AAAA,MACA,KAAK,EAAE,eAAe,QAAQ,IAAI,OAAO;AAAA,MACzC,eAAe;AAAA,MACf,gBAAgB,KAAK,IAAI,gBAAgB,QAAQ;AAAA,IACnD,CAAC;AACD,UAAM,SAAS,MAAM,gBAAgB;AAAA,MACnC,YAAY,OAAO;AAAA,MACnB,UAAU,QAAQ;AAAA,MAClB,WAAW,QAAQ,mBAAmB;AAAA,MACtC;AAAA,MACA,QAAQ,IAAI;AAAA,IACd,CAAC;AACD,QAAI,CAAC,OAAQ,OAAM,IAAI,MAAM,gBAAgB,QAAQ,QAAQ,CAAC;AAC9D,QAAI,OAAO,EAAE,WAAW,UAAU,QAAQ,OAAO,YAAY,CAAC;AAC9D,WAAO;AAAA,EACT;AACF;AAyBA,eAAe,gBAAgB,MAA6D;AAC1F,QAAM,QAA0B,CAAC;AACjC,aAAW,QAAQ,KAAK,YAAY;AAClC,QAAI,KAAK,WAAW,UAAa,KAAK,SAAS,KAAK,SAAS,UAAU,KAAM;AAC7E,UAAM,KAAK;AAAA,MACT,OAAO,KAAK;AAAA,MACZ,QAAQ,KAAK;AAAA,MACb,OAAO,KAAK,QAAQ,SAAS;AAAA,MAC7B,WAAW,KAAK,QAAQ,SAAS;AAAA,IACnC,CAAC;AAAA,EACH;AACA,MAAI,MAAM,WAAW,EAAG,QAAO;AAE/B,MAAI,WAAW;AACf,MAAI,KAAK,UAAU;AACjB,eAAW,CAAC;AACZ,eAAW,KAAK,OAAO;AACrB,YAAM,SAAS,MAAM,KAAK,SAAS,EAAE,QAAQ,KAAK,MAAM,EAAE,QAAQ,KAAK,OAAO,CAAC;AAC/E,UAAI,OAAO,SAAU,UAAS,KAAK,EAAE,GAAG,GAAG,WAAW,OAAO,UAAU,CAAC;AAAA,IAC1E;AACA,QAAI,SAAS,WAAW,EAAG,QAAO;AAAA,EACpC;AAEA,SAAO,qBAAqB,UAAU,KAAK,SAAS,EAAE;AACxD;AAGA,SAAS,qBACP,YACA,WACgB;AAChB,QAAM,YAAY,CAAC,MACjB,EAAE,OAAO,UAAU,aAAa,EAAE,OAAO,UAAU;AACrD,QAAM,SAAS,CAAC,GAAG,UAAU,EAAE,KAAK,CAAC,GAAG,MAAM;AAC5C,YAAQ,WAAW;AAAA,MACjB,KAAK;AACH,eAAO,UAAU,CAAC,IAAI,UAAU,CAAC,KAAK,EAAE,QAAQ,EAAE;AAAA,MACpD,KAAK;AACH,eAAO,EAAE,YAAY,EAAE,aAAa,EAAE,QAAQ,EAAE;AAAA,MAClD,KAAK;AACH,eAAO,EAAE,QAAQ,EAAE;AAAA,MACrB;AACE,eAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,QAAQ,EAAE;AAAA,IAC5C;AAAA,EACF,CAAC;AACD,SAAO,OAAO,CAAC;AACjB;AAEA,SAAS,gBAAgB,UAA6C;AACpE,SAAO,WACH,4DACA;AACN;AAEA,SAAS,eAAe,MAAgC;AACtD,MAAI,CAAC,KAAK,YAAa,QAAO,KAAK;AACnC,SAAO,CAAC,KAAK,MAAM,IAAI,cAAc,KAAK,WAAW,EAAE,KAAK,IAAI;AAClE;AAEA,SAAS,gBAAgB,SAAgE;AACvF,MAAI,QAAQ,YAAY,QAAQ,eAAe;AAC7C,UAAM,IAAI,MAAM,+EAA+E;AAAA,EACjG;AACA,MAAI,QAAQ,SAAU,QAAO,QAAQ;AACrC,MAAI,QAAQ,eAAe;AACzB,WAAO,6BAA6B,EAAE,QAAQ,QAAQ,cAAc,CAAC;AAAA,EACvE;AACA,QAAM,IAAI,MAAM,uEAAuE;AACzF;AAQA,IAAM,mBAAmB;AAAA,EACvB,MAAM;AAAA,EACN,MAAM,KAAW,MAAY,SAAkD;AAC7E,WAAO,QAAQ,WAAW,IAAI,CAAC,IAAI,IAAI,CAAC;AAAA,EAC1C;AAAA,EACA,OAAO,SAAyD;AAC9D,WAAO,QAAQ,SAAS,IAAI,gBAAgB;AAAA,EAC9C;AACF;","names":["result","chosen"]}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { I as Iteration, D as Driver } from './types-
|
|
1
|
+
import { I as Iteration, D as Driver } from './types-CNs7_1R3.js';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* @experimental
|
|
@@ -41,10 +41,12 @@ type TopologyMove<Task> = {
|
|
|
41
41
|
kind: 'refine';
|
|
42
42
|
task: Task;
|
|
43
43
|
rationale?: string;
|
|
44
|
+
parentIndex?: number;
|
|
44
45
|
} | {
|
|
45
46
|
kind: 'fanout';
|
|
46
47
|
tasks: Task[];
|
|
47
48
|
rationale?: string;
|
|
49
|
+
parentIndex?: number;
|
|
48
50
|
} | {
|
|
49
51
|
kind: 'stop';
|
|
50
52
|
rationale?: string;
|
package/dist/improvement.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { AnalystFinding } from '@tangle-network/agent-eval';
|
|
2
2
|
import { L as LocalHarness, r as runLocalHarness } from './local-harness-KrdFTY5R.js';
|
|
3
3
|
import { LabeledScenarioStore, WorktreeAdapter, ImprovementDriver } from '@tangle-network/agent-eval/campaign';
|
|
4
|
-
export { O as OptimizePromptOptions,
|
|
4
|
+
export { O as OptimizePromptOptions, b as OptimizePromptReflection, a as OptimizePromptResult, o as optimizePrompt } from './optimize-prompt-D-urF2wW.js';
|
|
5
5
|
import { S as SurfaceImprovementEdit } from './improvement-adapter-BC4HhuAR.js';
|
|
6
6
|
import { I as ImprovementAdapter } from './types-p8dWBIXL.js';
|
|
7
7
|
import 'node:child_process';
|
package/dist/index.d.ts
CHANGED
|
@@ -2,15 +2,15 @@ import { AgentEvalError, KnowledgeReadinessReport, RunRecord, ControlEvalResult,
|
|
|
2
2
|
export { AgentEvalError, AgentEvalErrorCode, ConfigError, ControlBudget, ControlDecision, ControlEvalResult, ControlRunResult, ControlStep, DataAcquisitionPlan, JudgeError, KnowledgeReadinessReport, KnowledgeRequirement, NotFoundError, RunRecord, ValidationError } from '@tangle-network/agent-eval';
|
|
3
3
|
import { a as AgentBackendInput, b as AgentExecutionBackend, O as OpenAIChatTool, c as OpenAIChatToolChoice, d as AgentBackendContext, R as RuntimeStreamEvent, K as KnowledgeReadinessDecision, e as RunAgentTaskOptions, f as AgentTaskRunResult, g as RunAgentTaskStreamOptions, h as AgentRuntimeEvent, i as AgentTaskStatus, j as RuntimeSessionStore, k as RuntimeSession } from './types-CsCCryln.js';
|
|
4
4
|
export { l as AgentAdapter, m as AgentKnowledgeProvider, n as AgentRuntimeEventSink, o as AgentTaskContext, A as AgentTaskSpec, B as BackendErrorDetail } from './types-CsCCryln.js';
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
import
|
|
9
|
-
import
|
|
10
|
-
|
|
11
|
-
import
|
|
12
|
-
|
|
13
|
-
import
|
|
5
|
+
export { C as CoderLoopRunnerOptions, D as DELEGATED_LOOP_MODES, a as DelegatedLoopMode, b as DelegatedLoopRegistry, c as DelegatedLoopResult, d as DelegatedLoopRunner, e as DynamicLoopRunnerOptions, L as LoopRunnerCliArgs, f as LoopRunnerCliResult, R as ResearchLoopResult, g as ResearchLoopRunnerOptions, h as RunDelegatedLoopOptions, V as VetoedFact, i as auditLoopRunner, j as coderLoopRunner, k as dynamicLoopRunner, l as isDelegatedLoopMode, p as parseLoopRunnerArgv, r as researchLoopRunner, m as reviewLoopRunner, n as runDelegatedLoop, o as runLoopRunnerCli, s as selfImproveLoopRunner } from './loop-runner-bin-C1MuoT8c.js';
|
|
6
|
+
export { E as EvalRunEvent, b as EvalRunGeneration, c as EvalRunsExportConfig, d as EvalRunsExportResult, I as INTELLIGENCE_WIRE_VERSION, e as OtelAttribute, f as OtelExportConfig, O as OtelExporter, g as OtelSpan, h as buildLoopOtelSpans, i as createOtelExporter, j as exportEvalRuns, l as loopEventToOtelSpan, m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from './otel-export-xgf4J6bo.js';
|
|
7
|
+
export { R as RuntimeRunHandle, p as RuntimeRunPersistenceAdapter, q as RuntimeRunRow, s as startRuntimeRun } from './types-CNs7_1R3.js';
|
|
8
|
+
import '@tangle-network/agent-eval/campaign';
|
|
9
|
+
import './types-p8dWBIXL.js';
|
|
10
|
+
import './optimize-prompt-D-urF2wW.js';
|
|
11
|
+
import './dynamic-BT9Ji3jE.js';
|
|
12
|
+
import './kb-gate-C4tho31v.js';
|
|
13
|
+
import './profiles.js';
|
|
14
14
|
import '@tangle-network/sandbox';
|
|
15
15
|
|
|
16
16
|
/**
|
|
@@ -1015,143 +1015,6 @@ declare class PlannerError extends AgentEvalError {
|
|
|
1015
1015
|
});
|
|
1016
1016
|
}
|
|
1017
1017
|
|
|
1018
|
-
/**
|
|
1019
|
-
* @experimental
|
|
1020
|
-
*
|
|
1021
|
-
* `runDelegatedLoop` — the configured delegated loop-runner.
|
|
1022
|
-
*
|
|
1023
|
-
* One typed entrypoint a worker agent (or a scheduled routine) calls to run a
|
|
1024
|
-
* disciplined loop in a chosen MODE, over agent-runtime's hardened engines:
|
|
1025
|
-
*
|
|
1026
|
-
* code → build-in-a-loop via the coder delegate (no-op + secret floor,
|
|
1027
|
-
* optional reviewer gate, winner-selection)
|
|
1028
|
-
* review → code mode with a REQUIRED reviewer (the gate is the point)
|
|
1029
|
-
* research → research-in-a-loop with valid-only KB growth (createKbGate)
|
|
1030
|
-
* audit → analyze trace/run data → findings (runAnalystLoop, caller-wired)
|
|
1031
|
-
* self-improve → identity-gated prompt optimization (optimizePrompt, caller-wired)
|
|
1032
|
-
* dynamic → agent-authored topology (runLoop + createDynamicDriver)
|
|
1033
|
-
*
|
|
1034
|
-
* It is intentionally a thin façade: the value is that EVERY product reuses the
|
|
1035
|
-
* one hardened engine instead of forking delegation logic. The dispatcher owns
|
|
1036
|
-
* mode routing, timing, fail-loud on an unregistered mode, and a uniform result
|
|
1037
|
-
* shape; each mode's engine is a pre-configured runner in the registry (build it
|
|
1038
|
-
* with the factories below, or inject your own / a stub).
|
|
1039
|
-
*/
|
|
1040
|
-
|
|
1041
|
-
/** @experimental */
|
|
1042
|
-
type DelegatedLoopMode = 'code' | 'review' | 'research' | 'audit' | 'self-improve' | 'dynamic';
|
|
1043
|
-
/** @experimental A pre-configured loop for one mode. Returns the mode's raw
|
|
1044
|
-
* output; the dispatcher wraps it in a {@link DelegatedLoopResult}. */
|
|
1045
|
-
type DelegatedLoopRunner<T = unknown> = (signal: AbortSignal) => Promise<T>;
|
|
1046
|
-
/** @experimental Mode → configured runner. Partial: only register the modes a
|
|
1047
|
-
* given product/routine actually uses. */
|
|
1048
|
-
type DelegatedLoopRegistry = Partial<Record<DelegatedLoopMode, DelegatedLoopRunner>>;
|
|
1049
|
-
/** @experimental Uniform result — never throws from a registered runner; a
|
|
1050
|
-
* thrown engine becomes `{ ok: false, error }` so a routine can record + move on. */
|
|
1051
|
-
interface DelegatedLoopResult<T = unknown> {
|
|
1052
|
-
mode: DelegatedLoopMode;
|
|
1053
|
-
ok: boolean;
|
|
1054
|
-
output?: T;
|
|
1055
|
-
error?: string;
|
|
1056
|
-
durationMs: number;
|
|
1057
|
-
}
|
|
1058
|
-
/** @experimental */
|
|
1059
|
-
interface RunDelegatedLoopOptions {
|
|
1060
|
-
signal?: AbortSignal;
|
|
1061
|
-
/** Clock override for deterministic tests. */
|
|
1062
|
-
now?: () => number;
|
|
1063
|
-
}
|
|
1064
|
-
/**
|
|
1065
|
-
* @experimental
|
|
1066
|
-
*
|
|
1067
|
-
* Dispatch a configured loop by mode. Fails loud (throws `ConfigError`) when no
|
|
1068
|
-
* runner is registered for the mode — a routine pointed at an unwired mode is a
|
|
1069
|
-
* config bug, not a silent no-op. A runner that throws is captured as
|
|
1070
|
-
* `{ ok: false }` so unattended runs record the failure rather than crash.
|
|
1071
|
-
*/
|
|
1072
|
-
declare function runDelegatedLoop<T = unknown>(mode: DelegatedLoopMode, registry: DelegatedLoopRegistry, options?: RunDelegatedLoopOptions): Promise<DelegatedLoopResult<T>>;
|
|
1073
|
-
/** @experimental Options for the default `code`/`review` runner. */
|
|
1074
|
-
interface CoderLoopRunnerOptions {
|
|
1075
|
-
sandboxClient: LoopSandboxClient;
|
|
1076
|
-
/** What to build — the delegate args (goal, repoRoot, variants, config, …). */
|
|
1077
|
-
args: DelegateCodeArgs;
|
|
1078
|
-
/** Adversarial reviewer. REQUIRED for `review` mode (see `reviewLoopRunner`). */
|
|
1079
|
-
reviewer?: CoderReviewer;
|
|
1080
|
-
/** Winner-selection strategy. Default `highest-score`. */
|
|
1081
|
-
winnerSelection?: CoderWinnerSelection;
|
|
1082
|
-
/** Harnesses for `variants > 1` fanout. */
|
|
1083
|
-
fanoutHarnesses?: string[];
|
|
1084
|
-
}
|
|
1085
|
-
/** @experimental Build a `code`-mode runner over the hardened coder delegate. */
|
|
1086
|
-
declare function coderLoopRunner(options: CoderLoopRunnerOptions): DelegatedLoopRunner<CoderOutput>;
|
|
1087
|
-
/**
|
|
1088
|
-
* @experimental
|
|
1089
|
-
*
|
|
1090
|
-
* `review` mode = `code` with a REQUIRED reviewer. The gate is the whole point,
|
|
1091
|
-
* so the type forces a reviewer (a "review loop" with no reviewer is a code loop).
|
|
1092
|
-
*/
|
|
1093
|
-
declare function reviewLoopRunner(options: CoderLoopRunnerOptions & {
|
|
1094
|
-
reviewer: CoderReviewer;
|
|
1095
|
-
}): DelegatedLoopRunner<CoderOutput>;
|
|
1096
|
-
/** @experimental Options for the default `dynamic` runner. */
|
|
1097
|
-
interface DynamicLoopRunnerOptions<Task, Output> {
|
|
1098
|
-
sandboxClient: LoopSandboxClient;
|
|
1099
|
-
/** The agent-authored topology planner (e.g. `createSandboxPlanner(...)`). */
|
|
1100
|
-
planner: TopologyPlanner<Task, Output>;
|
|
1101
|
-
task: Task;
|
|
1102
|
-
output: OutputAdapter<Output>;
|
|
1103
|
-
validator?: Validator<Output>;
|
|
1104
|
-
/** Exactly one of `agentRun` / `agentRuns` (runLoop validates). */
|
|
1105
|
-
agentRun?: AgentRunSpec<Task>;
|
|
1106
|
-
agentRuns?: AgentRunSpec<Task>[];
|
|
1107
|
-
maxIterations?: number;
|
|
1108
|
-
maxFanout?: number;
|
|
1109
|
-
}
|
|
1110
|
-
/** @experimental `dynamic` mode — agent-authored topology over `runLoop`. */
|
|
1111
|
-
declare function dynamicLoopRunner<Task, Output>(o: DynamicLoopRunnerOptions<Task, Output>): DelegatedLoopRunner<LoopResult<Task, Output, DynamicDecision>>;
|
|
1112
|
-
/** @experimental A fact rejected at the KB gate — surfaced, never dropped. */
|
|
1113
|
-
interface VetoedFact {
|
|
1114
|
-
candidate: FactCandidate;
|
|
1115
|
-
vetoedBy?: string;
|
|
1116
|
-
reason?: string;
|
|
1117
|
-
}
|
|
1118
|
-
/** @experimental */
|
|
1119
|
-
interface ResearchLoopResult {
|
|
1120
|
-
/** Facts that passed the fail-closed gate — safe to write to the KB. */
|
|
1121
|
-
accepted: FactCandidate[];
|
|
1122
|
-
/** Facts the gate vetoed in the final round — escalate, do not silently drop. */
|
|
1123
|
-
vetoed: VetoedFact[];
|
|
1124
|
-
/** Research rounds actually run. */
|
|
1125
|
-
rounds: number;
|
|
1126
|
-
}
|
|
1127
|
-
/** @experimental Options for the default `research` runner. */
|
|
1128
|
-
interface ResearchLoopRunnerOptions {
|
|
1129
|
-
/**
|
|
1130
|
-
* The research engine (the consumer's web/doc searcher + extractor). Called
|
|
1131
|
-
* each round with the prior round's vetoes so it can re-research the gaps.
|
|
1132
|
-
* Returns fact candidates carrying their grounding (`verbatimPassage` +
|
|
1133
|
-
* `sourceText`).
|
|
1134
|
-
*/
|
|
1135
|
-
research: (round: number, vetoed: VetoedFact[]) => Promise<FactCandidate[]>;
|
|
1136
|
-
/** Gate config (extra judges, self-artifact kinds, …). The floor is always on. */
|
|
1137
|
-
gate?: CreateKbGateOptions;
|
|
1138
|
-
/** Max research rounds (correct-on-veto remediation). Default 1. */
|
|
1139
|
-
maxRounds?: number;
|
|
1140
|
-
}
|
|
1141
|
-
/**
|
|
1142
|
-
* @experimental `research` mode — research-in-a-loop with valid-only KB growth.
|
|
1143
|
-
*
|
|
1144
|
-
* Each round: research → gate every candidate (fail-closed; passage MUST be in
|
|
1145
|
-
* the source) → accept the clean ones → re-research the vetoed ones next round,
|
|
1146
|
-
* up to `maxRounds`. Vetoed facts in the final round are RETURNED (escalate,
|
|
1147
|
-
* never silently dropped) so the caller audits vs retries.
|
|
1148
|
-
*/
|
|
1149
|
-
declare function researchLoopRunner(o: ResearchLoopRunnerOptions): DelegatedLoopRunner<ResearchLoopResult>;
|
|
1150
|
-
/** @experimental `self-improve` mode — identity-gated prompt optimization. */
|
|
1151
|
-
declare function selfImproveLoopRunner<TScenario extends Scenario, TArtifact>(options: OptimizePromptOptions<TScenario, TArtifact>): DelegatedLoopRunner<OptimizePromptResult<TArtifact, TScenario>>;
|
|
1152
|
-
/** @experimental `audit` mode — analyst loop over captured trace/run data. */
|
|
1153
|
-
declare function auditLoopRunner<TProposal = unknown, TEdit = unknown>(options: RunAnalystLoopOpts): DelegatedLoopRunner<RunAnalystLoopResult<TProposal, TEdit>>;
|
|
1154
|
-
|
|
1155
1018
|
/**
|
|
1156
1019
|
* @stable
|
|
1157
1020
|
*
|
|
@@ -1433,4 +1296,4 @@ declare function readinessServerSentEvent(report: KnowledgeReadinessReport, opti
|
|
|
1433
1296
|
/** @stable */
|
|
1434
1297
|
declare function runtimeStreamServerSentEvent(event: RuntimeStreamEvent, options?: RuntimeTelemetryOptions & ServerSentEventOptions): string;
|
|
1435
1298
|
|
|
1436
|
-
export { AgentBackendContext, AgentBackendInput, AgentExecutionBackend, AgentRuntimeEvent, AgentTaskRunResult, AgentTaskStatus, type AuthSource, type BackendCallPolicy, BackendTransportError, type ChatStreamEvent, type ChatTurnHooks, type ChatTurnIdentity, type ChatTurnProducer, type ChatTurnResult, type CircuitBreakerConfig, CircuitBreakerState, CircuitOpenError, type
|
|
1299
|
+
export { AgentBackendContext, AgentBackendInput, AgentExecutionBackend, AgentRuntimeEvent, AgentTaskRunResult, AgentTaskStatus, type AuthSource, type BackendCallPolicy, BackendTransportError, type ChatStreamEvent, type ChatTurnHooks, type ChatTurnIdentity, type ChatTurnProducer, type ChatTurnResult, type CircuitBreakerConfig, CircuitBreakerState, CircuitOpenError, type Conversation, type ConversationDriveState, type ConversationJournal, type ConversationJournalEntry, type ConversationParticipant, type ConversationPolicy, type ConversationResult, type ConversationStreamEvent, type ConversationTurn, type D1DatabaseLike, type D1StmtLike, DEFAULT_MAX_DEPTH, DEFAULT_ROUTER_BASE_URL, DeadlineExceededError, FORWARD_HEADERS, FileConversationJournal, type ForwardHeaderName, type HaltContext, type HaltPredicate, type HaltReason, type HaltSignal, InMemoryConversationJournal, InMemoryRuntimeSessionStore, type ModelInfo, OpenAIChatTool, OpenAIChatToolChoice, PlannerError, type PropagatedHeaders, type ResolvedChatModel, type RetryBackoff, type RetryableErrorPredicate, type RouterEnv, type RunChatTurnInput, type RunConversationOptions, type RuntimeEventCollector, RuntimeRunStateError, RuntimeSessionStore, RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, type SqlAdapter, SqlConversationJournal, type TurnOrder, applyRunRecordDefaults, buildForwardHeaders, cleanModelId, computeBackoff, createConversationBackend, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, d1ToSqlAdapter, decideKnowledgeReadiness, defaultIsRetryable, defineConversation, deriveExecutionId, getModels, handleChatTurn, isDepthExceeded, makePerAttemptSignal, readDepth, readinessServerSentEvent, resolveChatModel, resolveRouterBaseUrl, runAgentTask, runAgentTaskStream, runConversation, runConversationStream, runtimeStreamServerSentEvent, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, sleep, slugifySpeaker, turnId, validateChatModelId };
|
package/dist/index.js
CHANGED
|
@@ -1,26 +1,31 @@
|
|
|
1
1
|
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
2
|
+
DELEGATED_LOOP_MODES,
|
|
3
|
+
auditLoopRunner,
|
|
4
|
+
coderLoopRunner,
|
|
5
|
+
dynamicLoopRunner,
|
|
6
|
+
isDelegatedLoopMode,
|
|
7
|
+
parseLoopRunnerArgv,
|
|
8
|
+
researchLoopRunner,
|
|
9
|
+
reviewLoopRunner,
|
|
10
|
+
runDelegatedLoop,
|
|
11
|
+
runLoopRunnerCli,
|
|
12
|
+
selfImproveLoopRunner
|
|
13
|
+
} from "./chunk-3WQJRSUJ.js";
|
|
14
|
+
import "./chunk-XBUG326M.js";
|
|
15
|
+
import "./chunk-VOX6Z3II.js";
|
|
7
16
|
import {
|
|
8
17
|
INTELLIGENCE_WIRE_VERSION,
|
|
9
18
|
buildLoopOtelSpans,
|
|
10
|
-
createKbGate,
|
|
11
19
|
createOtelExporter,
|
|
12
20
|
exportEvalRuns,
|
|
13
21
|
loopEventToOtelSpan,
|
|
14
22
|
mcpToolsForRuntimeMcp,
|
|
15
23
|
mcpToolsForRuntimeMcpSubset
|
|
16
|
-
} from "./chunk-
|
|
17
|
-
import
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
import
|
|
21
|
-
createDynamicDriver,
|
|
22
|
-
runLoop
|
|
23
|
-
} from "./chunk-7JBDJQLO.js";
|
|
24
|
+
} from "./chunk-7ZECSZ3C.js";
|
|
25
|
+
import "./chunk-HSX6PFZR.js";
|
|
26
|
+
import "./chunk-FNMGYYSS.js";
|
|
27
|
+
import "./chunk-VFKBIZTY.js";
|
|
28
|
+
import "./chunk-OISRXLWI.js";
|
|
24
29
|
import "./chunk-3HMHSN22.js";
|
|
25
30
|
import "./chunk-PY6NMZYX.js";
|
|
26
31
|
import {
|
|
@@ -1739,91 +1744,6 @@ function deriveExecutionId(input) {
|
|
|
1739
1744
|
return `${input.projectId}:${input.sessionId}:${input.turnIndex}`;
|
|
1740
1745
|
}
|
|
1741
1746
|
|
|
1742
|
-
// src/loop-runner.ts
|
|
1743
|
-
async function runDelegatedLoop(mode, registry, options = {}) {
|
|
1744
|
-
const runner = registry[mode];
|
|
1745
|
-
if (!runner) {
|
|
1746
|
-
throw new ConfigError(
|
|
1747
|
-
`runDelegatedLoop: no runner registered for mode '${mode}' (registered: ${Object.keys(registry).join(", ") || "none"})`
|
|
1748
|
-
);
|
|
1749
|
-
}
|
|
1750
|
-
const now = options.now ?? Date.now;
|
|
1751
|
-
const signal = options.signal ?? new AbortController().signal;
|
|
1752
|
-
const start = now();
|
|
1753
|
-
try {
|
|
1754
|
-
const output = await runner(signal);
|
|
1755
|
-
return { mode, ok: true, output, durationMs: now() - start };
|
|
1756
|
-
} catch (err) {
|
|
1757
|
-
return {
|
|
1758
|
-
mode,
|
|
1759
|
-
ok: false,
|
|
1760
|
-
error: err instanceof Error ? err.message : String(err),
|
|
1761
|
-
durationMs: now() - start
|
|
1762
|
-
};
|
|
1763
|
-
}
|
|
1764
|
-
}
|
|
1765
|
-
function coderLoopRunner(options) {
|
|
1766
|
-
const delegate = createDefaultCoderDelegate({
|
|
1767
|
-
sandboxClient: options.sandboxClient,
|
|
1768
|
-
...options.reviewer ? { reviewer: options.reviewer } : {},
|
|
1769
|
-
...options.winnerSelection ? { winnerSelection: options.winnerSelection } : {},
|
|
1770
|
-
...options.fanoutHarnesses ? { fanoutHarnesses: options.fanoutHarnesses } : {}
|
|
1771
|
-
});
|
|
1772
|
-
return async (signal) => {
|
|
1773
|
-
const ctx = { signal, report: () => {
|
|
1774
|
-
} };
|
|
1775
|
-
return delegate(options.args, ctx);
|
|
1776
|
-
};
|
|
1777
|
-
}
|
|
1778
|
-
function reviewLoopRunner(options) {
|
|
1779
|
-
return coderLoopRunner(options);
|
|
1780
|
-
}
|
|
1781
|
-
function dynamicLoopRunner(o) {
|
|
1782
|
-
return async (signal) => runLoop({
|
|
1783
|
-
driver: createDynamicDriver({
|
|
1784
|
-
planner: o.planner,
|
|
1785
|
-
...o.maxIterations !== void 0 ? { maxIterations: o.maxIterations } : {},
|
|
1786
|
-
...o.maxFanout !== void 0 ? { maxFanout: o.maxFanout } : {}
|
|
1787
|
-
}),
|
|
1788
|
-
...o.agentRun ? { agentRun: o.agentRun } : {},
|
|
1789
|
-
...o.agentRuns ? { agentRuns: o.agentRuns } : {},
|
|
1790
|
-
output: o.output,
|
|
1791
|
-
...o.validator ? { validator: o.validator } : {},
|
|
1792
|
-
task: o.task,
|
|
1793
|
-
ctx: { sandboxClient: o.sandboxClient, signal },
|
|
1794
|
-
...o.maxIterations !== void 0 ? { maxIterations: o.maxIterations } : {}
|
|
1795
|
-
});
|
|
1796
|
-
}
|
|
1797
|
-
function researchLoopRunner(o) {
|
|
1798
|
-
const gate = createKbGate(o.gate);
|
|
1799
|
-
const maxRounds = Math.max(1, Math.trunc(o.maxRounds ?? 1));
|
|
1800
|
-
return async (signal) => {
|
|
1801
|
-
const accepted = [];
|
|
1802
|
-
let vetoed = [];
|
|
1803
|
-
let rounds = 0;
|
|
1804
|
-
for (let round = 0; round < maxRounds; round += 1) {
|
|
1805
|
-
if (signal.aborted) break;
|
|
1806
|
-
rounds += 1;
|
|
1807
|
-
const candidates = await o.research(round, vetoed);
|
|
1808
|
-
if (candidates.length === 0) break;
|
|
1809
|
-
vetoed = [];
|
|
1810
|
-
for (const c of candidates) {
|
|
1811
|
-
const v = await gate(c);
|
|
1812
|
-
if (v.accepted) accepted.push(c);
|
|
1813
|
-
else vetoed.push({ candidate: c, vetoedBy: v.vetoedBy, reason: v.reason });
|
|
1814
|
-
}
|
|
1815
|
-
if (vetoed.length === 0) break;
|
|
1816
|
-
}
|
|
1817
|
-
return { accepted, vetoed, rounds };
|
|
1818
|
-
};
|
|
1819
|
-
}
|
|
1820
|
-
function selfImproveLoopRunner(options) {
|
|
1821
|
-
return async () => optimizePrompt(options);
|
|
1822
|
-
}
|
|
1823
|
-
function auditLoopRunner(options) {
|
|
1824
|
-
return async () => runAnalystLoop(options);
|
|
1825
|
-
}
|
|
1826
|
-
|
|
1827
1747
|
// src/model-resolution.ts
|
|
1828
1748
|
var DEFAULT_ROUTER_BASE_URL = "https://router.tangle.tools";
|
|
1829
1749
|
function resolveRouterBaseUrl(env = {}) {
|
|
@@ -2816,6 +2736,7 @@ export {
|
|
|
2816
2736
|
ConfigError,
|
|
2817
2737
|
DEFAULT_MAX_DEPTH,
|
|
2818
2738
|
DEFAULT_ROUTER_BASE_URL,
|
|
2739
|
+
DELEGATED_LOOP_MODES,
|
|
2819
2740
|
DeadlineExceededError,
|
|
2820
2741
|
FORWARD_HEADERS,
|
|
2821
2742
|
FileConversationJournal,
|
|
@@ -2851,11 +2772,13 @@ export {
|
|
|
2851
2772
|
exportEvalRuns,
|
|
2852
2773
|
getModels,
|
|
2853
2774
|
handleChatTurn,
|
|
2775
|
+
isDelegatedLoopMode,
|
|
2854
2776
|
isDepthExceeded,
|
|
2855
2777
|
loopEventToOtelSpan,
|
|
2856
2778
|
makePerAttemptSignal,
|
|
2857
2779
|
mcpToolsForRuntimeMcp,
|
|
2858
2780
|
mcpToolsForRuntimeMcpSubset,
|
|
2781
|
+
parseLoopRunnerArgv,
|
|
2859
2782
|
readDepth,
|
|
2860
2783
|
readinessServerSentEvent,
|
|
2861
2784
|
researchLoopRunner,
|
|
@@ -2867,6 +2790,7 @@ export {
|
|
|
2867
2790
|
runConversation,
|
|
2868
2791
|
runConversationStream,
|
|
2869
2792
|
runDelegatedLoop,
|
|
2793
|
+
runLoopRunnerCli,
|
|
2870
2794
|
runtimeStreamServerSentEvent,
|
|
2871
2795
|
sanitizeAgentRuntimeEvent,
|
|
2872
2796
|
sanitizeKnowledgeReadinessReport,
|