@tangle-network/agent-runtime 0.44.0 → 0.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +95 -203
  2. package/dist/agent.d.ts +3 -2
  3. package/dist/agent.js +5 -7
  4. package/dist/agent.js.map +1 -1
  5. package/dist/analyst-loop.d.ts +28 -2
  6. package/dist/analyst-loop.js +4 -1
  7. package/dist/audit.d.ts +93 -0
  8. package/dist/audit.js +312 -0
  9. package/dist/audit.js.map +1 -0
  10. package/dist/chunk-4B6U4CVQ.js +15 -0
  11. package/dist/chunk-4B6U4CVQ.js.map +1 -0
  12. package/dist/chunk-65FQLI4V.js +4089 -0
  13. package/dist/chunk-65FQLI4V.js.map +1 -0
  14. package/dist/{chunk-GFKVVRQ7.js → chunk-GN75RGM6.js} +13 -12
  15. package/dist/chunk-GN75RGM6.js.map +1 -0
  16. package/dist/chunk-GSUO5QS6.js +146 -0
  17. package/dist/chunk-GSUO5QS6.js.map +1 -0
  18. package/dist/chunk-HNUXAZIJ.js +580 -0
  19. package/dist/chunk-HNUXAZIJ.js.map +1 -0
  20. package/dist/{chunk-SKUZZCHE.js → chunk-I42NHLKX.js} +5 -5
  21. package/dist/chunk-I42NHLKX.js.map +1 -0
  22. package/dist/{chunk-HVYOHJHK.js → chunk-JNPK46YH.js} +2 -2
  23. package/dist/chunk-JNPK46YH.js.map +1 -0
  24. package/dist/{chunk-3HMHSN22.js → chunk-KADIJAD4.js} +38 -24
  25. package/dist/chunk-KADIJAD4.js.map +1 -0
  26. package/dist/{chunk-KDMRUD2P.js → chunk-KPN7OQ64.js} +296 -8
  27. package/dist/chunk-KPN7OQ64.js.map +1 -0
  28. package/dist/{chunk-NRZOXCJK.js → chunk-VR4JIC5H.js} +2 -2
  29. package/dist/chunk-WIR4HOOJ.js +27 -0
  30. package/dist/chunk-WIR4HOOJ.js.map +1 -0
  31. package/dist/coder-DCWFQpmJ.d.ts +114 -0
  32. package/dist/driver-C-mtBo7h.d.ts +221 -0
  33. package/dist/improvement.d.ts +0 -1
  34. package/dist/improvement.js +0 -5
  35. package/dist/improvement.js.map +1 -1
  36. package/dist/index.d.ts +122 -9
  37. package/dist/index.js +398 -10
  38. package/dist/index.js.map +1 -1
  39. package/dist/{kb-gate-D0ZIhFOU.d.ts → kb-gate-2Gwpz_27.d.ts} +86 -9
  40. package/dist/{loop-runner-bin-BLMa8He3.d.ts → loop-runner-bin-D-K6bRp3.d.ts} +17 -13
  41. package/dist/loop-runner-bin.d.ts +8 -6
  42. package/dist/loop-runner-bin.js +6 -8
  43. package/dist/loops.d.ts +7 -393
  44. package/dist/loops.js +96 -27
  45. package/dist/mcp/bin.js +7 -7
  46. package/dist/mcp/bin.js.map +1 -1
  47. package/dist/mcp/index.d.ts +286 -13
  48. package/dist/mcp/index.js +341 -9
  49. package/dist/mcp/index.js.map +1 -1
  50. package/dist/{otel-export-wFDmmurL.d.ts → otel-export-nurzFwuJ.d.ts} +1 -1
  51. package/dist/profiles.d.ts +385 -86
  52. package/dist/profiles.js +549 -4
  53. package/dist/profiles.js.map +1 -1
  54. package/dist/{run-loop-C4L1Sted.d.ts → run-loop-CU2Y00Si.d.ts} +36 -13
  55. package/dist/runtime-hooks-C7JwKb9E.d.ts +70 -0
  56. package/dist/runtime.d.ts +1964 -0
  57. package/dist/runtime.js +114 -0
  58. package/dist/runtime.js.map +1 -0
  59. package/dist/substrate-CUgk7F7s.d.ts +77 -0
  60. package/dist/topology.d.ts +73 -0
  61. package/dist/topology.js +111 -0
  62. package/dist/topology.js.map +1 -0
  63. package/dist/types-BfoeiQRZ.d.ts +438 -0
  64. package/dist/{types-DbJzz2uf.d.ts → types-DnYoHvvZ.d.ts} +110 -4
  65. package/dist/workflow.d.ts +4 -3
  66. package/dist/workflow.js +4 -5
  67. package/dist/workflow.js.map +1 -1
  68. package/package.json +37 -28
  69. package/skills/agent-runtime-adoption/SKILL.md +32 -29
  70. package/skills/generate-eval/SKILL.md +60 -0
  71. package/dist/chunk-3HMHSN22.js.map +0 -1
  72. package/dist/chunk-GFKVVRQ7.js.map +0 -1
  73. package/dist/chunk-HVYOHJHK.js.map +0 -1
  74. package/dist/chunk-KDMRUD2P.js.map +0 -1
  75. package/dist/chunk-PY6NMZYX.js +0 -52
  76. package/dist/chunk-PY6NMZYX.js.map +0 -1
  77. package/dist/chunk-S7JXV32P.js +0 -947
  78. package/dist/chunk-S7JXV32P.js.map +0 -1
  79. package/dist/chunk-SKUZZCHE.js.map +0 -1
  80. package/dist/chunk-SQSCRJ7U.js +0 -65
  81. package/dist/chunk-SQSCRJ7U.js.map +0 -1
  82. package/dist/chunk-VOX6Z3II.js +0 -90
  83. package/dist/chunk-VOX6Z3II.js.map +0 -1
  84. package/dist/chunk-XBUG326M.js +0 -261
  85. package/dist/chunk-XBUG326M.js.map +0 -1
  86. package/dist/dynamic-wUgp6UKs.d.ts +0 -108
  87. package/dist/optimize-prompt-D-urF2wW.d.ts +0 -129
  88. /package/dist/{chunk-NRZOXCJK.js.map → chunk-VR4JIC5H.js.map} +0 -0
@@ -0,0 +1,114 @@
1
+ import { AgentProfile } from '@tangle-network/sandbox';
2
+ import { O as OutputAdapter, V as Validator, A as AgentRunSpec, D as Driver } from './types-DnYoHvvZ.js';
3
+
4
+ /**
5
+ * @experimental
6
+ *
7
+ * `coderProfile` — opinionated preset for code-modification tasks.
8
+ *
9
+ * The agent is told to:
10
+ * - work on a fresh branch inside the sandbox workspace
11
+ * - keep the patch minimal (under `maxDiffLines`)
12
+ * - avoid `forbiddenPaths`
13
+ * - run `testCmd` and `typecheckCmd`
14
+ * - emit a final JSON result the output adapter parses
15
+ *
16
+ * The profile is stateless and agent-agnostic — `harness` selects the
17
+ * sandbox-SDK backend (`claude-code`, `codex`, `opencode/*`). For
18
+ * heterogeneous fanout, use `multiHarnessCoderFanout`.
19
+ */
20
+
21
+ /** @experimental */
22
+ interface CoderTask {
23
+ /** What the agent must accomplish. Free-form prose. */
24
+ goal: string;
25
+ /** Absolute path inside the sandbox where the repo lives. */
26
+ repoRoot: string;
27
+ /** Default `main`. The branch the agent diffs against. */
28
+ baseBranch?: string;
29
+ /** Default `pnpm test --run`. */
30
+ testCmd?: string;
31
+ /** Default `pnpm typecheck`. */
32
+ typecheckCmd?: string;
33
+ /** Files the agent may inspect for context. Surfaced verbatim in the prompt. */
34
+ contextFiles?: string[];
35
+ /**
36
+ * Paths the agent must not touch. Validator hard-fails on any match.
37
+ * Use glob-free literal path prefixes for unambiguous enforcement.
38
+ */
39
+ forbiddenPaths?: string[];
40
+ /** Default 400. Hard cap; validator hard-fails when exceeded. */
41
+ maxDiffLines?: number;
42
+ }
43
+ /** @experimental */
44
+ interface CoderOutput {
45
+ /** Branch the agent wrote the patch on. */
46
+ branch: string;
47
+ /** Unified diff (`git diff <base>..HEAD`). */
48
+ patch: string;
49
+ testResult: {
50
+ passed: boolean;
51
+ output: string;
52
+ };
53
+ typecheckResult: {
54
+ passed: boolean;
55
+ output: string;
56
+ };
57
+ diffStats: {
58
+ filesChanged: number;
59
+ insertions: number;
60
+ deletions: number;
61
+ };
62
+ /** Optional reviewer commentary surfaced by the agent. */
63
+ reviewerNotes?: string;
64
+ }
65
+ /** @experimental */
66
+ interface CoderProfileOptions {
67
+ /** Sandbox-SDK backend.type. Default `'claude-code'`. */
68
+ harness?: string;
69
+ /** Default model id passed in `AgentProfile.model.default`. */
70
+ model?: string;
71
+ /** Custom system prompt replacement. Default = built-in coder preset. */
72
+ systemPrompt?: string;
73
+ /** Stable name for `AgentRunSpec.name`. Default = `coder-${harness}`. */
74
+ name?: string;
75
+ }
76
+ /**
77
+ * Build a coder preset.
78
+ *
79
+ * `validator` enforces test + typecheck + a 400-line default diff cap. For
80
+ * per-task `forbiddenPaths` / `maxDiffLines` enforcement, pass `task` here
81
+ * — the returned validator closes over its constraints. Without a task
82
+ * the validator falls back to the default cap and skips path enforcement.
83
+ *
84
+ * @experimental
85
+ */
86
+ declare function coderProfile(options?: CoderProfileOptions & {
87
+ task?: CoderTask;
88
+ }): {
89
+ profile: AgentProfile;
90
+ taskToPrompt: (task: CoderTask) => string;
91
+ output: OutputAdapter<CoderOutput>;
92
+ validator: Validator<CoderOutput>;
93
+ agentRunSpec: AgentRunSpec<CoderTask>;
94
+ };
95
+ /** @experimental */
96
+ interface MultiHarnessCoderFanoutOptions {
97
+ /**
98
+ * Sandbox-SDK backend.type identifiers, one per parallel agent. Default:
99
+ * `['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']`.
100
+ */
101
+ harnesses?: string[];
102
+ /** Optional per-harness model override. Indexed parallel to `harnesses`. */
103
+ models?: (string | undefined)[];
104
+ }
105
+ /** @experimental */
106
+ declare function multiHarnessCoderFanout(options?: MultiHarnessCoderFanoutOptions): {
107
+ agentRuns: AgentRunSpec<CoderTask>[];
108
+ output: OutputAdapter<CoderOutput>;
109
+ validator: Validator<CoderOutput>;
110
+ driver: Driver<CoderTask, CoderOutput, 'pick-winner' | 'fail'>;
111
+ };
112
+ declare function createCoderValidator(task: CoderTask): Validator<CoderOutput>;
113
+
114
+ export { type CoderOutput as C, type MultiHarnessCoderFanoutOptions as M, type CoderTask as a, type CoderProfileOptions as b, coderProfile as c, createCoderValidator as d, multiHarnessCoderFanout as m };
@@ -0,0 +1,221 @@
1
+ import { AnalystFinding } from '@tangle-network/agent-eval';
2
+ import { I as Iteration, D as Driver } from './types-DnYoHvvZ.js';
3
+
4
+ /**
5
+ * @experimental
6
+ *
7
+ * Completion / satisfiability — the OTHER output of the pluggable analyst (the steer output
8
+ * is `AnalystFinding[]` via the `analyze` hook; this is the "is it done?" output via the
9
+ * `complete` hook). A `CompletionAnalyst` reads a node's trace and returns a `CompletionVerdict`
10
+ * the PARENT (driver) acts on: end the node, or keep going. It fits ANY node and composes to
11
+ * any depth — a 1-deep loop has one; an N-deep tree has one per node.
12
+ *
13
+ * The verdict's authority scales with its DETERMINISM (the thing that varies by task):
14
+ * - `deterministic` — build/test/lint pass, a proof checks, every claim's citation resolves:
15
+ * ground truth, the driver TRUSTS it and ends. Not an opinion.
16
+ * - `probabilistic` — a quality/soundness judgment (marketing, "the experiment is sound"):
17
+ * ADVISORY. It passes to the driver with its reasons; the driver validates (here: a
18
+ * confidence threshold; a richer driver may re-examine the reasons) before ending.
19
+ *
20
+ * Two stop-signal mechanisms, by node mode, both → one `CompletionVerdict`:
21
+ * - sandbox-agent (text stream): a unique per-node STOP SENTINEL the agent emits when done
22
+ * (`stopSentinel` / `sentinelCompletion`) — ralph-loop style; the seed makes it
23
+ * unguessable + attributable, so it can't be spuriously emitted or confused with content.
24
+ * - deterministic check (compile/test/citation/proof): `deterministicCompletion(check)` —
25
+ * a verifier over the output, never the judge verdict (selector ≠ judge holds).
26
+ */
27
+
28
+ /** Trace-derived evidence for a completion claim — an artifact (output) or a verifier metric,
29
+ * never the judge's own verdict. Mirrors the steer-firewall's provenance discipline. */
30
+ interface CompletionEvidence {
31
+ kind: 'artifact' | 'metric';
32
+ uri: string;
33
+ }
34
+ /** The "is it done?" verdict an analyst returns to the parent. */
35
+ interface CompletionVerdict {
36
+ done: boolean;
37
+ /** How verifiable the claim is — sets whether the driver trusts it or validates it. */
38
+ determinism: 'deterministic' | 'probabilistic';
39
+ /** Why the analyst believes it is (or isn't) done — what the driver validates. */
40
+ reasons?: string;
41
+ /** 0..1, for probabilistic verdicts; the driver's validation threshold reads this. */
42
+ confidence?: number;
43
+ evidence?: ReadonlyArray<CompletionEvidence>;
44
+ }
45
+ /** Reads a node's trace → a completion verdict. Same input shape as the `analyze` hook, so
46
+ * ONE analyst node can back both channels (findings for steer, a verdict for stop). */
47
+ interface CompletionAnalyst<Task, Output> {
48
+ assess(input: {
49
+ task: Task;
50
+ history: ReadonlyArray<Iteration<Task, Output>>;
51
+ }): CompletionVerdict | Promise<CompletionVerdict>;
52
+ }
53
+ /** When a verdict authorizes the driver to END. Deterministic → trust (ground truth);
54
+ * probabilistic → validate by confidence threshold (the driver's check). */
55
+ interface CompletionPolicy {
56
+ /** Minimum confidence a PROBABILISTIC verdict must clear to end. Default 0.8. */
57
+ minConfidence?: number;
58
+ }
59
+ declare function completionAuthorizes(v: CompletionVerdict, policy?: CompletionPolicy): boolean;
60
+ /**
61
+ * A unique, attributable stop sentinel for a node (ralph-loop style). Deterministic from the
62
+ * seed (no Math.random — reproducible + attributable to the node); the agent is instructed to
63
+ * emit it VERBATIM when it judges itself done. Unguessable enough that content never trips it.
64
+ */
65
+ declare function stopSentinel(seed: string): string;
66
+ /**
67
+ * Completion for a sandbox-agent node: done iff the latest output carries the node's stop
68
+ * sentinel. PROBABILISTIC (the agent's own self-judgment) — the driver validates it.
69
+ */
70
+ declare function sentinelCompletion<Task>(sentinel: string, opts?: {
71
+ confidence?: number;
72
+ }): CompletionAnalyst<Task, string>;
73
+ /**
74
+ * Completion for a DETERMINISTIC check (build/test/lint/citation/proof): done iff the check
75
+ * passes. Ground truth — the driver ends directly, no validation. The check reads the output
76
+ * (a verifier), never the judge verdict — selector ≠ judge stays intact.
77
+ */
78
+ declare function deterministicCompletion<Task, Output>(check: (output: Output, history: ReadonlyArray<Iteration<Task, Output>>) => {
79
+ passed: boolean;
80
+ reasons?: string;
81
+ }): CompletionAnalyst<Task, Output>;
82
+
83
+ /**
84
+ * @experimental
85
+ *
86
+ * Dynamic driver — the agent authors the loop topology at runtime.
87
+ *
88
+ * Where a fixed-shape driver encodes one topology as a pure function of
89
+ * history, this driver delegates the per-round shape to an injected
90
+ * `TopologyPlanner`. Each round the planner inspects the task + iteration
91
+ * history and emits one `TopologyMove`:
92
+ * - `refine` → one task next round (optionally rewritten from the prior attempt)
93
+ * - `fanout` → N tasks next round (the kernel round-robins `agentRuns`, so a
94
+ * 2-harness fanout dispatches branch 0 to harness A and branch 1 to harness B)
95
+ * - `stop` → terminate; the kernel selects the winner across all iterations
96
+ *
97
+ * The planner is the brain; this driver is the structure. It maps moves onto
98
+ * the kernel's `plan`/`decide` contract, enforces the iteration + fanout caps,
99
+ * and fails loud on a malformed move. The planner is injected — so a test can
100
+ * drive a deterministic policy through the real kernel, and production can wire
101
+ * it to an LLM-backed, agent-authored planner.
102
+ *
103
+ * Topology is orthogonal to harness: the planner never names a backend. Which
104
+ * harness runs a branch is decided by the `AgentRunSpec` the kernel round-robins
105
+ * to, so one dynamic driver works across claude-code, codex, opencode, pi —
106
+ * including fanning a single round across several at once.
107
+ */
108
+
109
+ /** Terminal once `decide` returns `'done'` (a kernel terminal decision). */
110
+ type DriverDecision = 'continue' | 'done';
111
+ /**
112
+ * One topology decision for the next round. `fanout` carries explicit tasks
113
+ * rather than a count so the planner can issue heterogeneous branches (a
114
+ * different sub-task per harness); pass N copies of one task for a homogeneous
115
+ * fanout that relies on `agentRuns` diversity instead.
116
+ *
117
+ * @experimental
118
+ */
119
+ type TopologyMove<Task> = {
120
+ kind: 'refine';
121
+ task: Task;
122
+ rationale?: string;
123
+ parentIndex?: number;
124
+ } | {
125
+ kind: 'fanout';
126
+ tasks: Task[];
127
+ rationale?: string;
128
+ parentIndex?: number;
129
+ } | {
130
+ kind: 'stop';
131
+ rationale?: string;
132
+ } | {
133
+ kind: 'select';
134
+ index: number;
135
+ rationale?: string;
136
+ };
137
+ /** @experimental */
138
+ interface PlannerContext<Task, Output> {
139
+ /** The root task the loop was invoked with — stable across rounds. */
140
+ task: Task;
141
+ /** Every iteration so far, in dispatch order, with outputs + verdicts. */
142
+ history: ReadonlyArray<Iteration<Task, Output>>;
143
+ /** `history.length` — iterations already spent. */
144
+ iterationsSpent: number;
145
+ /** Iterations left before the driver's `maxIterations` cap forces a stop. */
146
+ iterationsRemaining: number;
147
+ /**
148
+ * Trace-analyst findings about the attempts so far — populated only when an
149
+ * `analyze` hook is wired into the driver (see CreateDriverOptions).
150
+ * This is the channel that lets the planner steer from the DIAGNOSIS
151
+ * (`f(trace, findings)`), not the verdict score alone. Undefined = no analyst
152
+ * wired (the planner runs exactly as before). @experimental
153
+ */
154
+ analyses?: ReadonlyArray<AnalystFinding>;
155
+ }
156
+ /**
157
+ * Chooses the next topology move from the task + history. Sync or async; an
158
+ * async planner is where an LLM call goes (an agent-authored topology planner).
159
+ *
160
+ * @experimental
161
+ */
162
+ type TopologyPlanner<Task, Output> = (ctx: PlannerContext<Task, Output>) => TopologyMove<Task> | Promise<TopologyMove<Task>>;
163
+ /**
164
+ * Input to the optional `analyze` hook: the root task + the trace so far. The
165
+ * hook turns this into `AnalystFinding[]` — the caller's seam to `runAnalystLoop`.
166
+ * @experimental
167
+ */
168
+ interface AnalyzeInput<Task, Output> {
169
+ task: Task;
170
+ history: ReadonlyArray<Iteration<Task, Output>>;
171
+ }
172
+ /** @experimental */
173
+ interface CreateDriverOptions<Task, Output> {
174
+ /** The agent-authored topology policy. Invoked once per round in `plan`. */
175
+ planner: TopologyPlanner<Task, Output>;
176
+ /**
177
+ * Optional trace-analyst hook. When set, the driver calls it each round AFTER
178
+ * the first (a trace must exist) and BEFORE the planner, then passes the
179
+ * findings to the planner via `PlannerContext.analyses` — so the planner
180
+ * decides from the diagnosis, not the verdict score alone. This is the seam to
181
+ * `runAnalystLoop`; it lives on the driver so `run-loop` stays analyst-free
182
+ * (the layering rule). Fail-loud: a throwing or non-array hook aborts the round
183
+ * (no silent empty findings).
184
+ */
185
+ analyze?: (input: AnalyzeInput<Task, Output>) => ReadonlyArray<AnalystFinding> | Promise<ReadonlyArray<AnalystFinding>>;
186
+ /**
187
+ * Optional completion analyst — the DEPLOYABLE, non-oracle stop. Each round (after a
188
+ * trace exists) the driver asks "is it done?"; if the verdict AUTHORIZES ending
189
+ * (deterministic = trust ground truth; probabilistic = clears `completionPolicy`'s
190
+ * confidence), the driver stops BEFORE consulting the planner. This is the satisfiability
191
+ * primitive — usable at 1 deep, composing to any depth (one per node). Fail-loud: a
192
+ * throwing or non-verdict assess aborts the round. Distinct from `analyze` (the steer
193
+ * channel) though one analyst node may back both.
194
+ */
195
+ complete?: CompletionAnalyst<Task, Output>;
196
+ /** Validation policy for a probabilistic completion verdict (the driver's check). */
197
+ completionPolicy?: CompletionPolicy;
198
+ /**
199
+ * Hard safety cap on total iterations. When reached, the driver stops before
200
+ * consulting the planner. Default 8. Set the kernel's `runLoop`
201
+ * `maxIterations >= ` this so the driver's cap governs and the loop closes on
202
+ * a clean `'done'` rather than a truncated `'continue'`.
203
+ */
204
+ maxIterations?: number;
205
+ /** Max branches a single `fanout` move may dispatch. Default 4. */
206
+ maxFanout?: number;
207
+ /** Stable identifier surfaced in trace events. Default `'dynamic'`. */
208
+ name?: string;
209
+ }
210
+ /** @experimental */
211
+ declare function createDriver<Task, Output>(options: CreateDriverOptions<Task, Output>): Driver<Task, Output, DriverDecision>;
212
+ /**
213
+ * Compact, planner-facing rendering of trace-analyst findings — the diagnosis the
214
+ * planner steers from. Empty input renders to '' (callers omit the section). Shows
215
+ * severity·area·claim·recommended_action·confidence; raw evidence_refs/metadata are
216
+ * for renderers that know the analyst, not the topology decision.
217
+ * @experimental
218
+ */
219
+ declare function renderAnalyses(findings: ReadonlyArray<AnalystFinding>): string;
220
+
221
+ export { type AnalyzeInput as A, type CreateDriverOptions as C, type DriverDecision as D, type PlannerContext as P, type TopologyPlanner as T, type CompletionAnalyst as a, type CompletionEvidence as b, type CompletionPolicy as c, type CompletionVerdict as d, type TopologyMove as e, completionAuthorizes as f, createDriver as g, deterministicCompletion as h, stopSentinel as i, renderAnalyses as r, sentinelCompletion as s };
@@ -1,7 +1,6 @@
1
1
  import { AnalystFinding } from '@tangle-network/agent-eval';
2
2
  import { L as LocalHarness, r as runLocalHarness } from './local-harness-KrdFTY5R.js';
3
3
  import { LabeledScenarioStore, WorktreeAdapter, ImprovementDriver } from '@tangle-network/agent-eval/campaign';
4
- export { O as OptimizePromptOptions, b as OptimizePromptReflection, a as OptimizePromptResult, o as optimizePrompt } from './optimize-prompt-D-urF2wW.js';
5
4
  import { S as SurfaceImprovementEdit } from './improvement-adapter-BC4HhuAR.js';
6
5
  import { I as ImprovementAdapter } from './types-p8dWBIXL.js';
7
6
  import 'node:child_process';
@@ -1,10 +1,6 @@
1
1
  import {
2
2
  runLocalHarness
3
3
  } from "./chunk-GLR25NG7.js";
4
- import {
5
- optimizePrompt
6
- } from "./chunk-VOX6Z3II.js";
7
- import "./chunk-SQSCRJ7U.js";
8
4
  import "./chunk-DGUM43GV.js";
9
5
 
10
6
  // src/improvement/agentic-generator.ts
@@ -160,7 +156,6 @@ function applyPatch(patch, cwd) {
160
156
  export {
161
157
  agenticGenerator,
162
158
  improvementDriver,
163
- optimizePrompt,
164
159
  reflectiveGenerator
165
160
  };
166
161
  //# sourceMappingURL=improvement.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/improvement/agentic-generator.ts","../src/improvement/improvement-driver.ts","../src/improvement/reflective-generator.ts"],"sourcesContent":["/**\n * @experimental\n *\n * `agenticGenerator` — the full-agentic `CandidateGenerator`: the\n * `shots=N, sandbox=on` setting of the one `improvementDriver`. It runs a real\n * coding harness (claude / codex / opencode) inside the candidate worktree the\n * driver already created, letting the agent read the codebase + the research\n * report and make the change in place. The driver then commits the worktree\n * into a `CodeSurface`.\n *\n * Mechanism: identical to the proven Phase-2.8 in-process executor — spawn the\n * harness as a subprocess with `cwd` = the worktree, on the same filesystem,\n * so edits land in place (no sandbox-mount round-trip). `runLocalHarness` is\n * the verified primitive. The OUTER sandbox is the improvement loop's own\n * execution context; the generator does not nest a second sandbox per\n * candidate (which would reintroduce a host↔sandbox worktree-transport\n * problem that does not need solving here).\n *\n * `maxShots` is the DEPTH dial: the harness runs once; if it produced no change\n * (the worktree stays clean), the generator refines the prompt and retries, up\n * to `maxShots` times. A harness that already changed files returns on shot 1.\n */\n\nimport { spawnSync } from 'node:child_process'\nimport type { AnalystFinding } from '@tangle-network/agent-eval'\nimport { type LocalHarness, runLocalHarness } from '../mcp/local-harness'\nimport type { CandidateGenerator } from './improvement-driver'\n\nexport interface AgenticGeneratorOptions {\n /** Local coding harness to run in the worktree. Default `claude`. */\n harness?: LocalHarness\n /** Per-shot wall-clock timeout (ms). Default = `runLocalHarness` default (5m). */\n timeoutMs?: number\n /** Build the harness task prompt from the report + findings. Override for\n * domain phrasing; the default turns findings into a concrete coder task. */\n buildPrompt?: (args: { report: unknown; findings: AnalystFinding[] }) => string\n /** Test seam — inject the harness runner (defaults to `runLocalHarness`). */\n runHarness?: typeof runLocalHarness\n /** Test seam — inject the worktree-dirty check (defaults to `git status`). */\n isDirty?: (worktreePath: string) => boolean\n}\n\nexport function agenticGenerator(opts: AgenticGeneratorOptions = {}): CandidateGenerator {\n const harness = opts.harness ?? 'claude'\n const buildPrompt = opts.buildPrompt ?? defaultBuildPrompt\n const run = opts.runHarness ?? runLocalHarness\n const dirty = opts.isDirty ?? worktreeDirty\n\n return {\n kind: `agentic:${harness}`,\n async generate({ worktreePath, report, findings, maxShots, signal }) {\n let prompt = buildPrompt({ report, findings })\n const shots = Math.max(1, maxShots)\n\n for (let shot = 0; shot < shots; shot++) {\n if (signal.aborted) break\n await run({\n harness,\n cwd: worktreePath,\n taskPrompt: prompt,\n timeoutMs: opts.timeoutMs,\n signal,\n })\n // The worktree IS the signal: if the harness touched files, we have a\n // candidate. We don't trust the harness's stdout — we trust the diff.\n if (dirty(worktreePath)) {\n return { applied: true, summary: summarize(findings) }\n }\n // No change this shot — give the next attempt explicit feedback.\n prompt = refine(prompt)\n }\n return { applied: false, summary: '' }\n },\n }\n}\n\n/** Turn the analyst's findings (+ optional report) into a concrete coder task. */\nfunction defaultBuildPrompt(args: { report: unknown; findings: AnalystFinding[] }): string {\n const lines: string[] = [\n 'You are improving this codebase based on an evaluation analysis.',\n 'Make the smallest set of edits that addresses the findings below, then stop.',\n 'Do not change unrelated code. Do not commit — leave changes in the working tree.',\n '',\n 'Findings:',\n ]\n for (const f of args.findings) {\n const where = f.subject ? ` [${f.subject}]` : ''\n lines.push(`- (${f.severity})${where} ${f.claim}`)\n if (f.recommended_action) lines.push(` → ${f.recommended_action}`)\n }\n return lines.join('\\n')\n}\n\nfunction refine(prompt: string): string {\n return `${prompt}\\n\\nNOTE: your previous attempt left the working tree unchanged. Make the concrete file edits now.`\n}\n\n/** A one-line summary for the commit message, derived from the findings. */\nfunction summarize(findings: AnalystFinding[]): string {\n if (findings.length === 0) return 'agentic improvement'\n if (findings.length === 1) return `agentic: ${truncate(findings[0]!.claim, 64)}`\n return `agentic: ${findings.length} findings addressed`\n}\n\nfunction truncate(s: string, n: number): string {\n return s.length <= n ? s : `${s.slice(0, n - 1)}…`\n}\n\n/** Non-empty `git status --porcelain` ⇒ the harness changed the worktree.\n * Fails loud: the worktree is a fresh checkout, so a git error here means\n * something is genuinely broken (git missing, corrupt index, killed mid-run).\n * Folding that into `false` would silently discard a candidate and mask the\n * real failure — forbidden by the no-silent-fallbacks doctrine. */\nfunction worktreeDirty(worktreePath: string): boolean {\n const result = spawnSync('git', ['status', '--porcelain'], {\n cwd: worktreePath,\n encoding: 'utf-8',\n })\n if (result.error) {\n throw new Error(\n `agenticGenerator: git status failed to spawn in ${worktreePath}: ${result.error.message}`,\n )\n }\n if (result.status !== 0) {\n throw new Error(\n `agenticGenerator: git status exited ${result.status} in ${worktreePath}: ${result.stderr.trim()}`,\n )\n }\n return result.stdout.trim().length > 0\n}\n","/**\n * @experimental\n *\n * `improvementDriver` — the ONE reflective/agentic improvement driver for\n * agent-eval's improvement loop. It implements `ImprovementDriver` and owns\n * the candidate lifecycle (worktree create → generate → finalize/discard,\n * × populationSize); it delegates the only thing that genuinely varies — HOW\n * a candidate change is produced — to a pluggable `CandidateGenerator`.\n *\n * There is no separate \"analyst driver\" vs \"autoresearch driver\": those are\n * the SAME driver at two settings of a dial.\n * - cheap reflective path → `reflectiveGenerator` (shots=1, no sandbox;\n * applies pre-drafted patches)\n * - full agentic path → `agenticGenerator` (shots=N, sandbox runLoop;\n * an agent reads code + report and edits)\n * Both emit changes into a worktree the driver finalizes into a\n * `CodeSurface{ worktreeRef }` the loop measures on the holdout. See\n * agent-eval's `docs/design/self-improvement-engine.md`.\n */\n\nimport type { AnalystFinding } from '@tangle-network/agent-eval'\nimport type {\n CodeSurface,\n ImprovementDriver,\n LabeledScenarioStore,\n ProposeContext,\n WorktreeAdapter,\n} from '@tangle-network/agent-eval/campaign'\n\n/** The byte-producing seam — the ONE thing that differs between the cheap\n * reflective path and the full agentic path. A generator makes (uncommitted)\n * changes inside `worktreePath`; the driver commits them via the worktree\n * adapter's `finalize`. */\nexport interface CandidateGenerator {\n kind: string\n generate(args: {\n /** The candidate worktree — a fresh checkout of baseRef. Write changes here. */\n worktreePath: string\n /** Phase-2 research report (analyst findings + diff), opaque. */\n report: unknown\n /** Findings resolved from the report or the loop context. */\n findings: AnalystFinding[]\n /** Handle to all captured data, to ground the change. */\n dataset?: LabeledScenarioStore\n /** DEPTH: max iterations the generator may take (agentic uses this; the\n * reflective generator ignores it). */\n maxShots: number\n signal: AbortSignal\n }): Promise<{ applied: boolean; summary: string }>\n}\n\nexport interface ImprovementDriverOptions {\n worktree: WorktreeAdapter\n generator: CandidateGenerator\n /** Base ref candidate worktrees fork from. Default `main`. */\n baseRef?: string\n}\n\nexport function improvementDriver(\n opts: ImprovementDriverOptions,\n): ImprovementDriver<AnalystFinding> {\n const baseRef = opts.baseRef ?? 'main'\n\n return {\n kind: `improvement:${opts.generator.kind}`,\n async propose(ctx) {\n const findings = resolveFindings(ctx)\n // No signal to act on — propose nothing rather than spin up worktrees.\n if (findings.length === 0 && ctx.report === undefined) return []\n\n const surfaces: CodeSurface[] = []\n for (let i = 0; i < ctx.populationSize; i++) {\n if (ctx.signal.aborted) break\n const wt = await opts.worktree.create({\n baseRef,\n label: `${opts.generator.kind}-gen${ctx.generation}-cand${i}`,\n })\n // Once a worktree exists it MUST be accounted for: finalized into a\n // surface, or discarded. A throw from generate()/finalize() must not\n // leak the worktree + branch — discard best-effort, then rethrow loud.\n try {\n const { applied, summary } = await opts.generator.generate({\n worktreePath: wt.path,\n report: ctx.report,\n findings,\n dataset: ctx.dataset,\n maxShots: ctx.maxImprovementShots ?? 1,\n signal: ctx.signal,\n })\n if (!applied) {\n await opts.worktree.discard(wt)\n continue\n }\n surfaces.push(await opts.worktree.finalize(wt, summary))\n } catch (err) {\n // Best-effort cleanup; never mask the original failure.\n await opts.worktree.discard(wt).catch(() => {})\n throw err\n }\n }\n return surfaces\n },\n }\n}\n\n/** Phase-2 report carries `findings` when present; else fall back to the\n * loop's `ctx.findings`. The report is opaque to the substrate, so probe it\n * structurally. */\nfunction resolveFindings(ctx: ProposeContext<AnalystFinding>): AnalystFinding[] {\n const report = ctx.report\n if (report && typeof report === 'object' && 'findings' in report) {\n const f = (report as { findings: unknown }).findings\n if (Array.isArray(f) && f.length > 0) return f as AnalystFinding[]\n }\n return ctx.findings\n}\n","/**\n * @experimental\n *\n * `reflectiveGenerator` — the cheap, no-sandbox `CandidateGenerator`. It drafts\n * surface edits via the existing improvement adapter (`proposeFromFindings`,\n * one LLM patch per finding) and applies them as ONE coherent improvement into\n * the candidate worktree. `maxShots` is ignored — reflection is single-shot by\n * construction (the patches are already drafted).\n *\n * This is the `shots=1, sandbox=off` setting of the one improvement driver.\n * The `agenticGenerator` (sandbox runLoop) is the `shots=N, sandbox=on`\n * setting — both plug into the same `improvementDriver`.\n */\n\nimport { spawnSync } from 'node:child_process'\nimport type { SurfaceImprovementEdit } from '../agent/improvement-adapter'\nimport type { ImprovementAdapter } from '../analyst-loop/types'\nimport type { CandidateGenerator } from './improvement-driver'\n\nexport interface ReflectiveGeneratorOptions {\n improvementAdapter: ImprovementAdapter<SurfaceImprovementEdit>\n}\n\nexport function reflectiveGenerator(opts: ReflectiveGeneratorOptions): CandidateGenerator {\n return {\n kind: 'reflective',\n async generate({ worktreePath, findings }) {\n const batch = await opts.improvementAdapter.proposeFromFindings(findings)\n if (batch.edits.length === 0) return { applied: false, summary: '' }\n\n let applied = 0\n for (const edit of batch.edits) {\n if (applyPatch(edit.patch, worktreePath)) applied++\n }\n if (applied === 0) return { applied: false, summary: '' }\n\n const summary =\n batch.edits.length === 1\n ? batch.edits[0]!.summary\n : `analyst: ${applied} surface edit${applied === 1 ? '' : 's'}`\n return { applied: true, summary }\n },\n }\n}\n\n/** Mirror the improvement adapter's proven apply invocation, run inside the\n * candidate worktree (a fresh checkout of baseRef, so `-p0` paths match). */\nfunction applyPatch(patch: string, cwd: string): boolean {\n const result = spawnSync('git', ['apply', '--whitespace=fix', '-p0', '-'], {\n cwd,\n input: patch,\n encoding: 'utf-8',\n })\n return result.status === 0\n}\n"],"mappings":";;;;;;;;;;AAuBA,SAAS,iBAAiB;AAmBnB,SAAS,iBAAiB,OAAgC,CAAC,GAAuB;AACvF,QAAM,UAAU,KAAK,WAAW;AAChC,QAAM,cAAc,KAAK,eAAe;AACxC,QAAM,MAAM,KAAK,cAAc;AAC/B,QAAM,QAAQ,KAAK,WAAW;AAE9B,SAAO;AAAA,IACL,MAAM,WAAW,OAAO;AAAA,IACxB,MAAM,SAAS,EAAE,cAAc,QAAQ,UAAU,UAAU,OAAO,GAAG;AACnE,UAAI,SAAS,YAAY,EAAE,QAAQ,SAAS,CAAC;AAC7C,YAAM,QAAQ,KAAK,IAAI,GAAG,QAAQ;AAElC,eAAS,OAAO,GAAG,OAAO,OAAO,QAAQ;AACvC,YAAI,OAAO,QAAS;AACpB,cAAM,IAAI;AAAA,UACR;AAAA,UACA,KAAK;AAAA,UACL,YAAY;AAAA,UACZ,WAAW,KAAK;AAAA,UAChB;AAAA,QACF,CAAC;AAGD,YAAI,MAAM,YAAY,GAAG;AACvB,iBAAO,EAAE,SAAS,MAAM,SAAS,UAAU,QAAQ,EAAE;AAAA,QACvD;AAEA,iBAAS,OAAO,MAAM;AAAA,MACxB;AACA,aAAO,EAAE,SAAS,OAAO,SAAS,GAAG;AAAA,IACvC;AAAA,EACF;AACF;AAGA,SAAS,mBAAmB,MAA+D;AACzF,QAAM,QAAkB;AAAA,IACtB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,aAAW,KAAK,KAAK,UAAU;AAC7B,UAAM,QAAQ,EAAE,UAAU,KAAK,EAAE,OAAO,MAAM;AAC9C,UAAM,KAAK,MAAM,EAAE,QAAQ,IAAI,KAAK,IAAI,EAAE,KAAK,EAAE;AACjD,QAAI,EAAE,mBAAoB,OAAM,KAAK,cAAS,EAAE,kBAAkB,EAAE;AAAA,EACtE;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAEA,SAAS,OAAO,QAAwB;AACtC,SAAO,GAAG,MAAM;AAAA;AAAA;AAClB;AAGA,SAAS,UAAU,UAAoC;AACrD,MAAI,SAAS,WAAW,EAAG,QAAO;AAClC,MAAI,SAAS,WAAW,EAAG,QAAO,YAAY,SAAS,SAAS,CAAC,EAAG,OAAO,EAAE,CAAC;AAC9E,SAAO,YAAY,SAAS,MAAM;AACpC;AAEA,SAAS,SAAS,GAAW,GAAmB;AAC9C,SAAO,EAAE,UAAU,IAAI,IAAI,GAAG,EAAE,MAAM,GAAG,IAAI,CAAC,CAAC;AACjD;AAOA,SAAS,cAAc,cAA+B;AACpD,QAAM,SAAS,UAAU,OAAO,CAAC,UAAU,aAAa,GAAG;AAAA,IACzD,KAAK;AAAA,IACL,UAAU;AAAA,EACZ,CAAC;AACD,MAAI,OAAO,OAAO;AAChB,UAAM,IAAI;AAAA,MACR,mDAAmD,YAAY,KAAK,OAAO,MAAM,OAAO;AAAA,IAC1F;AAAA,EACF;AACA,MAAI,OAAO,WAAW,GAAG;AACvB,UAAM,IAAI;AAAA,MACR,uCAAuC,OAAO,MAAM,OAAO,YAAY,KAAK,OAAO,OAAO,KAAK,CAAC;AAAA,IAClG;AAAA,EACF;AACA,SAAO,OAAO,OAAO,KAAK,EAAE,SAAS;AACvC;;;ACvEO,SAAS,kBACd,MACmC;AACnC,QAAM,UAAU,KAAK,WAAW;AAEhC,SAAO;AAAA,IACL,MAAM,eAAe,KAAK,UAAU,IAAI;AAAA,IACxC,MAAM,QAAQ,KAAK;AACjB,YAAM,WAAW,gBAAgB,GAAG;AAEpC,UAAI,SAAS,WAAW,KAAK,IAAI,WAAW,OAAW,QAAO,CAAC;AAE/D,YAAM,WAA0B,CAAC;AACjC,eAAS,IAAI,GAAG,IAAI,IAAI,gBAAgB,KAAK;AAC3C,YAAI,IAAI,OAAO,QAAS;AACxB,cAAM,KAAK,MAAM,KAAK,SAAS,OAAO;AAAA,UACpC;AAAA,UACA,OAAO,GAAG,KAAK,UAAU,IAAI,OAAO,IAAI,UAAU,QAAQ,CAAC;AAAA,QAC7D,CAAC;AAID,YAAI;AACF,gBAAM,EAAE,SAAS,QAAQ,IAAI,MAAM,KAAK,UAAU,SAAS;AAAA,YACzD,cAAc,GAAG;AAAA,YACjB,QAAQ,IAAI;AAAA,YACZ;AAAA,YACA,SAAS,IAAI;AAAA,YACb,UAAU,IAAI,uBAAuB;AAAA,YACrC,QAAQ,IAAI;AAAA,UACd,CAAC;AACD,cAAI,CAAC,SAAS;AACZ,kBAAM,KAAK,SAAS,QAAQ,EAAE;AAC9B;AAAA,UACF;AACA,mBAAS,KAAK,MAAM,KAAK,SAAS,SAAS,IAAI,OAAO,CAAC;AAAA,QACzD,SAAS,KAAK;AAEZ,gBAAM,KAAK,SAAS,QAAQ,EAAE,EAAE,MAAM,MAAM;AAAA,UAAC,CAAC;AAC9C,gBAAM;AAAA,QACR;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAKA,SAAS,gBAAgB,KAAuD;AAC9E,QAAM,SAAS,IAAI;AACnB,MAAI,UAAU,OAAO,WAAW,YAAY,cAAc,QAAQ;AAChE,UAAM,IAAK,OAAiC;AAC5C,QAAI,MAAM,QAAQ,CAAC,KAAK,EAAE,SAAS,EAAG,QAAO;AAAA,EAC/C;AACA,SAAO,IAAI;AACb;;;ACrGA,SAAS,aAAAA,kBAAiB;AASnB,SAAS,oBAAoB,MAAsD;AACxF,SAAO;AAAA,IACL,MAAM;AAAA,IACN,MAAM,SAAS,EAAE,cAAc,SAAS,GAAG;AACzC,YAAM,QAAQ,MAAM,KAAK,mBAAmB,oBAAoB,QAAQ;AACxE,UAAI,MAAM,MAAM,WAAW,EAAG,QAAO,EAAE,SAAS,OAAO,SAAS,GAAG;AAEnE,UAAI,UAAU;AACd,iBAAW,QAAQ,MAAM,OAAO;AAC9B,YAAI,WAAW,KAAK,OAAO,YAAY,EAAG;AAAA,MAC5C;AACA,UAAI,YAAY,EAAG,QAAO,EAAE,SAAS,OAAO,SAAS,GAAG;AAExD,YAAM,UACJ,MAAM,MAAM,WAAW,IACnB,MAAM,MAAM,CAAC,EAAG,UAChB,YAAY,OAAO,gBAAgB,YAAY,IAAI,KAAK,GAAG;AACjE,aAAO,EAAE,SAAS,MAAM,QAAQ;AAAA,IAClC;AAAA,EACF;AACF;AAIA,SAAS,WAAW,OAAe,KAAsB;AACvD,QAAM,SAASA,WAAU,OAAO,CAAC,SAAS,oBAAoB,OAAO,GAAG,GAAG;AAAA,IACzE;AAAA,IACA,OAAO;AAAA,IACP,UAAU;AAAA,EACZ,CAAC;AACD,SAAO,OAAO,WAAW;AAC3B;","names":["spawnSync"]}
1
+ {"version":3,"sources":["../src/improvement/agentic-generator.ts","../src/improvement/improvement-driver.ts","../src/improvement/reflective-generator.ts"],"sourcesContent":["/**\n * @experimental\n *\n * `agenticGenerator` — the full-agentic `CandidateGenerator`: the\n * `shots=N, sandbox=on` setting of the one `improvementDriver`. It runs a real\n * coding harness (claude / codex / opencode) inside the candidate worktree the\n * driver already created, letting the agent read the codebase + the research\n * report and make the change in place. The driver then commits the worktree\n * into a `CodeSurface`.\n *\n * Mechanism: identical to the proven Phase-2.8 in-process executor — spawn the\n * harness as a subprocess with `cwd` = the worktree, on the same filesystem,\n * so edits land in place (no sandbox-mount round-trip). `runLocalHarness` is\n * the verified primitive. The OUTER sandbox is the improvement loop's own\n * execution context; the generator does not nest a second sandbox per\n * candidate (which would reintroduce a host↔sandbox worktree-transport\n * problem that does not need solving here).\n *\n * `maxShots` is the DEPTH dial: the harness runs once; if it produced no change\n * (the worktree stays clean), the generator refines the prompt and retries, up\n * to `maxShots` times. A harness that already changed files returns on shot 1.\n */\n\nimport { spawnSync } from 'node:child_process'\nimport type { AnalystFinding } from '@tangle-network/agent-eval'\nimport { type LocalHarness, runLocalHarness } from '../mcp/local-harness'\nimport type { CandidateGenerator } from './improvement-driver'\n\nexport interface AgenticGeneratorOptions {\n /** Local coding harness to run in the worktree. Default `claude`. */\n harness?: LocalHarness\n /** Per-shot wall-clock timeout (ms). Default = `runLocalHarness` default (5m). */\n timeoutMs?: number\n /** Build the harness task prompt from the report + findings. Override for\n * domain phrasing; the default turns findings into a concrete coder task. */\n buildPrompt?: (args: { report: unknown; findings: AnalystFinding[] }) => string\n /** Test seam — inject the harness runner (defaults to `runLocalHarness`). */\n runHarness?: typeof runLocalHarness\n /** Test seam — inject the worktree-dirty check (defaults to `git status`). */\n isDirty?: (worktreePath: string) => boolean\n}\n\nexport function agenticGenerator(opts: AgenticGeneratorOptions = {}): CandidateGenerator {\n const harness = opts.harness ?? 'claude'\n const buildPrompt = opts.buildPrompt ?? defaultBuildPrompt\n const run = opts.runHarness ?? runLocalHarness\n const dirty = opts.isDirty ?? worktreeDirty\n\n return {\n kind: `agentic:${harness}`,\n async generate({ worktreePath, report, findings, maxShots, signal }) {\n let prompt = buildPrompt({ report, findings })\n const shots = Math.max(1, maxShots)\n\n for (let shot = 0; shot < shots; shot++) {\n if (signal.aborted) break\n await run({\n harness,\n cwd: worktreePath,\n taskPrompt: prompt,\n timeoutMs: opts.timeoutMs,\n signal,\n })\n // The worktree IS the signal: if the harness touched files, we have a\n // candidate. We don't trust the harness's stdout — we trust the diff.\n if (dirty(worktreePath)) {\n return { applied: true, summary: summarize(findings) }\n }\n // No change this shot — give the next attempt explicit feedback.\n prompt = refine(prompt)\n }\n return { applied: false, summary: '' }\n },\n }\n}\n\n/** Turn the analyst's findings (+ optional report) into a concrete coder task. */\nfunction defaultBuildPrompt(args: { report: unknown; findings: AnalystFinding[] }): string {\n const lines: string[] = [\n 'You are improving this codebase based on an evaluation analysis.',\n 'Make the smallest set of edits that addresses the findings below, then stop.',\n 'Do not change unrelated code. Do not commit — leave changes in the working tree.',\n '',\n 'Findings:',\n ]\n for (const f of args.findings) {\n const where = f.subject ? ` [${f.subject}]` : ''\n lines.push(`- (${f.severity})${where} ${f.claim}`)\n if (f.recommended_action) lines.push(` → ${f.recommended_action}`)\n }\n return lines.join('\\n')\n}\n\nfunction refine(prompt: string): string {\n return `${prompt}\\n\\nNOTE: your previous attempt left the working tree unchanged. Make the concrete file edits now.`\n}\n\n/** A one-line summary for the commit message, derived from the findings. */\nfunction summarize(findings: AnalystFinding[]): string {\n if (findings.length === 0) return 'agentic improvement'\n if (findings.length === 1) return `agentic: ${truncate(findings[0]!.claim, 64)}`\n return `agentic: ${findings.length} findings addressed`\n}\n\nfunction truncate(s: string, n: number): string {\n return s.length <= n ? s : `${s.slice(0, n - 1)}…`\n}\n\n/** Non-empty `git status --porcelain` ⇒ the harness changed the worktree.\n * Fails loud: the worktree is a fresh checkout, so a git error here means\n * something is genuinely broken (git missing, corrupt index, killed mid-run).\n * Folding that into `false` would silently discard a candidate and mask the\n * real failure — forbidden by the no-silent-fallbacks doctrine. */\nfunction worktreeDirty(worktreePath: string): boolean {\n const result = spawnSync('git', ['status', '--porcelain'], {\n cwd: worktreePath,\n encoding: 'utf-8',\n })\n if (result.error) {\n throw new Error(\n `agenticGenerator: git status failed to spawn in ${worktreePath}: ${result.error.message}`,\n )\n }\n if (result.status !== 0) {\n throw new Error(\n `agenticGenerator: git status exited ${result.status} in ${worktreePath}: ${result.stderr.trim()}`,\n )\n }\n return result.stdout.trim().length > 0\n}\n","/**\n * @experimental\n *\n * `improvementDriver` — the ONE reflective/agentic improvement driver for\n * agent-eval's improvement loop. It implements `ImprovementDriver` and owns\n * the candidate lifecycle (worktree create → generate → finalize/discard,\n * × populationSize); it delegates the only thing that genuinely varies — HOW\n * a candidate change is produced — to a pluggable `CandidateGenerator`.\n *\n * There is no separate \"analyst driver\" vs \"autoresearch driver\": those are\n * the SAME driver at two settings of a dial.\n * - cheap reflective path → `reflectiveGenerator` (shots=1, no sandbox;\n * applies pre-drafted patches)\n * - full agentic path → `agenticGenerator` (shots=N, sandbox runLoop;\n * an agent reads code + report and edits)\n * Both emit changes into a worktree the driver finalizes into a\n * `CodeSurface{ worktreeRef }` the loop measures on the holdout. See\n * agent-eval's `docs/design/self-improvement-engine.md`.\n */\n\nimport type { AnalystFinding } from '@tangle-network/agent-eval'\nimport type {\n CodeSurface,\n ImprovementDriver,\n LabeledScenarioStore,\n ProposeContext,\n WorktreeAdapter,\n} from '@tangle-network/agent-eval/campaign'\n\n/** The byte-producing seam — the ONE thing that differs between the cheap\n * reflective path and the full agentic path. A generator makes (uncommitted)\n * changes inside `worktreePath`; the driver commits them via the worktree\n * adapter's `finalize`. */\nexport interface CandidateGenerator {\n kind: string\n generate(args: {\n /** The candidate worktree — a fresh checkout of baseRef. Write changes here. */\n worktreePath: string\n /** Phase-2 research report (analyst findings + diff), opaque. */\n report: unknown\n /** Findings resolved from the report or the loop context. */\n findings: AnalystFinding[]\n /** Handle to all captured data, to ground the change. */\n dataset?: LabeledScenarioStore\n /** DEPTH: max iterations the generator may take (agentic uses this; the\n * reflective generator ignores it). */\n maxShots: number\n signal: AbortSignal\n }): Promise<{ applied: boolean; summary: string }>\n}\n\nexport interface ImprovementDriverOptions {\n worktree: WorktreeAdapter\n generator: CandidateGenerator\n /** Base ref candidate worktrees fork from. Default `main`. */\n baseRef?: string\n}\n\nexport function improvementDriver(\n opts: ImprovementDriverOptions,\n): ImprovementDriver<AnalystFinding> {\n const baseRef = opts.baseRef ?? 'main'\n\n return {\n kind: `improvement:${opts.generator.kind}`,\n async propose(ctx) {\n const findings = resolveFindings(ctx)\n // No signal to act on — propose nothing rather than spin up worktrees.\n if (findings.length === 0 && ctx.report === undefined) return []\n\n const surfaces: CodeSurface[] = []\n for (let i = 0; i < ctx.populationSize; i++) {\n if (ctx.signal.aborted) break\n const wt = await opts.worktree.create({\n baseRef,\n label: `${opts.generator.kind}-gen${ctx.generation}-cand${i}`,\n })\n // Once a worktree exists it MUST be accounted for: finalized into a\n // surface, or discarded. A throw from generate()/finalize() must not\n // leak the worktree + branch — discard best-effort, then rethrow loud.\n try {\n const { applied, summary } = await opts.generator.generate({\n worktreePath: wt.path,\n report: ctx.report,\n findings,\n dataset: ctx.dataset,\n maxShots: ctx.maxImprovementShots ?? 1,\n signal: ctx.signal,\n })\n if (!applied) {\n await opts.worktree.discard(wt)\n continue\n }\n surfaces.push(await opts.worktree.finalize(wt, summary))\n } catch (err) {\n // Best-effort cleanup; never mask the original failure.\n await opts.worktree.discard(wt).catch(() => {})\n throw err\n }\n }\n return surfaces\n },\n }\n}\n\n/** Phase-2 report carries `findings` when present; else fall back to the\n * loop's `ctx.findings`. The report is opaque to the substrate, so probe it\n * structurally. */\nfunction resolveFindings(ctx: ProposeContext<AnalystFinding>): AnalystFinding[] {\n const report = ctx.report\n if (report && typeof report === 'object' && 'findings' in report) {\n const f = (report as { findings: unknown }).findings\n if (Array.isArray(f) && f.length > 0) return f as AnalystFinding[]\n }\n return ctx.findings\n}\n","/**\n * @experimental\n *\n * `reflectiveGenerator` — the cheap, no-sandbox `CandidateGenerator`. It drafts\n * surface edits via the existing improvement adapter (`proposeFromFindings`,\n * one LLM patch per finding) and applies them as ONE coherent improvement into\n * the candidate worktree. `maxShots` is ignored — reflection is single-shot by\n * construction (the patches are already drafted).\n *\n * This is the `shots=1, sandbox=off` setting of the one improvement driver.\n * The `agenticGenerator` (sandbox runLoop) is the `shots=N, sandbox=on`\n * setting — both plug into the same `improvementDriver`.\n */\n\nimport { spawnSync } from 'node:child_process'\nimport type { SurfaceImprovementEdit } from '../agent/improvement-adapter'\nimport type { ImprovementAdapter } from '../analyst-loop/types'\nimport type { CandidateGenerator } from './improvement-driver'\n\nexport interface ReflectiveGeneratorOptions {\n improvementAdapter: ImprovementAdapter<SurfaceImprovementEdit>\n}\n\nexport function reflectiveGenerator(opts: ReflectiveGeneratorOptions): CandidateGenerator {\n return {\n kind: 'reflective',\n async generate({ worktreePath, findings }) {\n const batch = await opts.improvementAdapter.proposeFromFindings(findings)\n if (batch.edits.length === 0) return { applied: false, summary: '' }\n\n let applied = 0\n for (const edit of batch.edits) {\n if (applyPatch(edit.patch, worktreePath)) applied++\n }\n if (applied === 0) return { applied: false, summary: '' }\n\n const summary =\n batch.edits.length === 1\n ? batch.edits[0]!.summary\n : `analyst: ${applied} surface edit${applied === 1 ? '' : 's'}`\n return { applied: true, summary }\n },\n }\n}\n\n/** Mirror the improvement adapter's proven apply invocation, run inside the\n * candidate worktree (a fresh checkout of baseRef, so `-p0` paths match). */\nfunction applyPatch(patch: string, cwd: string): boolean {\n const result = spawnSync('git', ['apply', '--whitespace=fix', '-p0', '-'], {\n cwd,\n input: patch,\n encoding: 'utf-8',\n })\n return result.status === 0\n}\n"],"mappings":";;;;;;AAuBA,SAAS,iBAAiB;AAmBnB,SAAS,iBAAiB,OAAgC,CAAC,GAAuB;AACvF,QAAM,UAAU,KAAK,WAAW;AAChC,QAAM,cAAc,KAAK,eAAe;AACxC,QAAM,MAAM,KAAK,cAAc;AAC/B,QAAM,QAAQ,KAAK,WAAW;AAE9B,SAAO;AAAA,IACL,MAAM,WAAW,OAAO;AAAA,IACxB,MAAM,SAAS,EAAE,cAAc,QAAQ,UAAU,UAAU,OAAO,GAAG;AACnE,UAAI,SAAS,YAAY,EAAE,QAAQ,SAAS,CAAC;AAC7C,YAAM,QAAQ,KAAK,IAAI,GAAG,QAAQ;AAElC,eAAS,OAAO,GAAG,OAAO,OAAO,QAAQ;AACvC,YAAI,OAAO,QAAS;AACpB,cAAM,IAAI;AAAA,UACR;AAAA,UACA,KAAK;AAAA,UACL,YAAY;AAAA,UACZ,WAAW,KAAK;AAAA,UAChB;AAAA,QACF,CAAC;AAGD,YAAI,MAAM,YAAY,GAAG;AACvB,iBAAO,EAAE,SAAS,MAAM,SAAS,UAAU,QAAQ,EAAE;AAAA,QACvD;AAEA,iBAAS,OAAO,MAAM;AAAA,MACxB;AACA,aAAO,EAAE,SAAS,OAAO,SAAS,GAAG;AAAA,IACvC;AAAA,EACF;AACF;AAGA,SAAS,mBAAmB,MAA+D;AACzF,QAAM,QAAkB;AAAA,IACtB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,aAAW,KAAK,KAAK,UAAU;AAC7B,UAAM,QAAQ,EAAE,UAAU,KAAK,EAAE,OAAO,MAAM;AAC9C,UAAM,KAAK,MAAM,EAAE,QAAQ,IAAI,KAAK,IAAI,EAAE,KAAK,EAAE;AACjD,QAAI,EAAE,mBAAoB,OAAM,KAAK,cAAS,EAAE,kBAAkB,EAAE;AAAA,EACtE;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAEA,SAAS,OAAO,QAAwB;AACtC,SAAO,GAAG,MAAM;AAAA;AAAA;AAClB;AAGA,SAAS,UAAU,UAAoC;AACrD,MAAI,SAAS,WAAW,EAAG,QAAO;AAClC,MAAI,SAAS,WAAW,EAAG,QAAO,YAAY,SAAS,SAAS,CAAC,EAAG,OAAO,EAAE,CAAC;AAC9E,SAAO,YAAY,SAAS,MAAM;AACpC;AAEA,SAAS,SAAS,GAAW,GAAmB;AAC9C,SAAO,EAAE,UAAU,IAAI,IAAI,GAAG,EAAE,MAAM,GAAG,IAAI,CAAC,CAAC;AACjD;AAOA,SAAS,cAAc,cAA+B;AACpD,QAAM,SAAS,UAAU,OAAO,CAAC,UAAU,aAAa,GAAG;AAAA,IACzD,KAAK;AAAA,IACL,UAAU;AAAA,EACZ,CAAC;AACD,MAAI,OAAO,OAAO;AAChB,UAAM,IAAI;AAAA,MACR,mDAAmD,YAAY,KAAK,OAAO,MAAM,OAAO;AAAA,IAC1F;AAAA,EACF;AACA,MAAI,OAAO,WAAW,GAAG;AACvB,UAAM,IAAI;AAAA,MACR,uCAAuC,OAAO,MAAM,OAAO,YAAY,KAAK,OAAO,OAAO,KAAK,CAAC;AAAA,IAClG;AAAA,EACF;AACA,SAAO,OAAO,OAAO,KAAK,EAAE,SAAS;AACvC;;;ACvEO,SAAS,kBACd,MACmC;AACnC,QAAM,UAAU,KAAK,WAAW;AAEhC,SAAO;AAAA,IACL,MAAM,eAAe,KAAK,UAAU,IAAI;AAAA,IACxC,MAAM,QAAQ,KAAK;AACjB,YAAM,WAAW,gBAAgB,GAAG;AAEpC,UAAI,SAAS,WAAW,KAAK,IAAI,WAAW,OAAW,QAAO,CAAC;AAE/D,YAAM,WAA0B,CAAC;AACjC,eAAS,IAAI,GAAG,IAAI,IAAI,gBAAgB,KAAK;AAC3C,YAAI,IAAI,OAAO,QAAS;AACxB,cAAM,KAAK,MAAM,KAAK,SAAS,OAAO;AAAA,UACpC;AAAA,UACA,OAAO,GAAG,KAAK,UAAU,IAAI,OAAO,IAAI,UAAU,QAAQ,CAAC;AAAA,QAC7D,CAAC;AAID,YAAI;AACF,gBAAM,EAAE,SAAS,QAAQ,IAAI,MAAM,KAAK,UAAU,SAAS;AAAA,YACzD,cAAc,GAAG;AAAA,YACjB,QAAQ,IAAI;AAAA,YACZ;AAAA,YACA,SAAS,IAAI;AAAA,YACb,UAAU,IAAI,uBAAuB;AAAA,YACrC,QAAQ,IAAI;AAAA,UACd,CAAC;AACD,cAAI,CAAC,SAAS;AACZ,kBAAM,KAAK,SAAS,QAAQ,EAAE;AAC9B;AAAA,UACF;AACA,mBAAS,KAAK,MAAM,KAAK,SAAS,SAAS,IAAI,OAAO,CAAC;AAAA,QACzD,SAAS,KAAK;AAEZ,gBAAM,KAAK,SAAS,QAAQ,EAAE,EAAE,MAAM,MAAM;AAAA,UAAC,CAAC;AAC9C,gBAAM;AAAA,QACR;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAKA,SAAS,gBAAgB,KAAuD;AAC9E,QAAM,SAAS,IAAI;AACnB,MAAI,UAAU,OAAO,WAAW,YAAY,cAAc,QAAQ;AAChE,UAAM,IAAK,OAAiC;AAC5C,QAAI,MAAM,QAAQ,CAAC,KAAK,EAAE,SAAS,EAAG,QAAO;AAAA,EAC/C;AACA,SAAO,IAAI;AACb;;;ACrGA,SAAS,aAAAA,kBAAiB;AASnB,SAAS,oBAAoB,MAAsD;AACxF,SAAO;AAAA,IACL,MAAM;AAAA,IACN,MAAM,SAAS,EAAE,cAAc,SAAS,GAAG;AACzC,YAAM,QAAQ,MAAM,KAAK,mBAAmB,oBAAoB,QAAQ;AACxE,UAAI,MAAM,MAAM,WAAW,EAAG,QAAO,EAAE,SAAS,OAAO,SAAS,GAAG;AAEnE,UAAI,UAAU;AACd,iBAAW,QAAQ,MAAM,OAAO;AAC9B,YAAI,WAAW,KAAK,OAAO,YAAY,EAAG;AAAA,MAC5C;AACA,UAAI,YAAY,EAAG,QAAO,EAAE,SAAS,OAAO,SAAS,GAAG;AAExD,YAAM,UACJ,MAAM,MAAM,WAAW,IACnB,MAAM,MAAM,CAAC,EAAG,UAChB,YAAY,OAAO,gBAAgB,YAAY,IAAI,KAAK,GAAG;AACjE,aAAO,EAAE,SAAS,MAAM,QAAQ;AAAA,IAClC;AAAA,EACF;AACF;AAIA,SAAS,WAAW,OAAe,KAAsB;AACvD,QAAM,SAASA,WAAU,OAAO,CAAC,SAAS,oBAAoB,OAAO,GAAG,GAAG;AAAA,IACzE;AAAA,IACA,OAAO;AAAA,IACP,UAAU;AAAA,EACZ,CAAC;AACD,SAAO,OAAO,WAAW;AAC3B;","names":["spawnSync"]}
package/dist/index.d.ts CHANGED
@@ -1,16 +1,19 @@
1
1
  import { AgentEvalError, KnowledgeReadinessReport, RunRecord, ControlEvalResult, KnowledgeRequirement } from '@tangle-network/agent-eval';
2
2
  export { AgentEvalError, AgentEvalErrorCode, ConfigError, ControlBudget, ControlDecision, ControlEvalResult, ControlRunResult, ControlStep, DataAcquisitionPlan, JudgeError, KnowledgeReadinessReport, KnowledgeRequirement, NotFoundError, RunRecord, ValidationError } from '@tangle-network/agent-eval';
3
- import { q as AgentBackendInput, r as AgentExecutionBackend, c as OpenAIChatTool, s as OpenAIChatToolChoice, t as AgentBackendContext, R as RuntimeStreamEvent, K as KnowledgeReadinessDecision, u as RunAgentTaskOptions, v as AgentTaskRunResult, w as RunAgentTaskStreamOptions, x as AgentRuntimeEvent, y as AgentTaskStatus, z as RuntimeSessionStore, B as RuntimeSession } from './types-DbJzz2uf.js';
4
- export { C as AgentAdapter, F as AgentKnowledgeProvider, G as AgentRuntimeEventSink, H as AgentTaskContext, J as AgentTaskSpec, M as BackendErrorDetail, N as RuntimeRunHandle, P as RuntimeRunPersistenceAdapter, Q as RuntimeRunRow, S as startRuntimeRun } from './types-DbJzz2uf.js';
5
- export { C as CoderLoopRunnerOptions, D as DELEGATED_LOOP_MODES, a as DelegatedLoopMode, b as DelegatedLoopRegistry, c as DelegatedLoopResult, d as DelegatedLoopRunner, e as DynamicLoopRunnerOptions, L as LoopRunnerCliArgs, f as LoopRunnerCliResult, R as ResearchLoopResult, g as ResearchLoopRunnerOptions, h as RunDelegatedLoopOptions, V as VetoedFact, i as auditLoopRunner, j as coderLoopRunner, k as dynamicLoopRunner, l as isDelegatedLoopMode, p as parseLoopRunnerArgv, r as researchLoopRunner, m as reviewLoopRunner, n as runDelegatedLoop, o as runLoopRunnerCli, s as selfImproveLoopRunner } from './loop-runner-bin-BLMa8He3.js';
6
- export { E as EvalRunEvent, b as EvalRunGeneration, c as EvalRunsExportConfig, d as EvalRunsExportResult, I as INTELLIGENCE_WIRE_VERSION, e as OtelAttribute, f as OtelExportConfig, O as OtelExporter, g as OtelSpan, h as buildLoopOtelSpans, i as createOtelExporter, j as exportEvalRuns, l as loopEventToOtelSpan, m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from './otel-export-wFDmmurL.js';
3
+ import { g as AgentBackendInput, h as AgentExecutionBackend, d as OpenAIChatTool, i as OpenAIChatToolChoice, j as AgentBackendContext, R as RuntimeStreamEvent, K as KnowledgeReadinessDecision, k as RunAgentTaskOptions, l as AgentTaskRunResult, m as RunAgentTaskStreamOptions, n as AgentRuntimeEvent, o as AgentTaskStatus, p as RuntimeSessionStore, q as RuntimeSession } from './types-DnYoHvvZ.js';
4
+ export { r as AgentAdapter, s as AgentKnowledgeProvider, t as AgentRuntimeEventSink, u as AgentTaskContext, v as AgentTaskSpec, B as BackendErrorDetail, w as RuntimeRunHandle, x as RuntimeRunPersistenceAdapter, y as RuntimeRunRow, z as startRuntimeRun } from './types-DnYoHvvZ.js';
5
+ export { C as CoderLoopRunnerOptions, D as DELEGATED_LOOP_MODES, a as DelegatedLoopMode, b as DelegatedLoopRegistry, c as DelegatedLoopResult, d as DelegatedLoopRunner, e as DynamicLoopRunnerOptions, L as LoopRunnerCliArgs, f as LoopRunnerCliResult, R as ResearchLoopResult, g as ResearchLoopRunnerOptions, h as RunDelegatedLoopOptions, V as VetoedFact, i as auditLoopRunner, j as coderLoopRunner, k as dynamicLoopRunner, l as isDelegatedLoopMode, p as parseLoopRunnerArgv, r as researchLoopRunner, m as reviewLoopRunner, n as runDelegatedLoop, o as runLoopRunnerCli, s as selfImproveLoopRunner } from './loop-runner-bin-D-K6bRp3.js';
6
+ export { E as EvalRunEvent, b as EvalRunGeneration, c as EvalRunsExportConfig, d as EvalRunsExportResult, I as INTELLIGENCE_WIRE_VERSION, e as OtelAttribute, f as OtelExportConfig, O as OtelExporter, g as OtelSpan, h as buildLoopOtelSpans, i as createOtelExporter, j as exportEvalRuns, l as loopEventToOtelSpan, m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from './otel-export-nurzFwuJ.js';
7
+ import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
8
+ export { b as RuntimeDecisionEvidenceRef, c as RuntimeDecisionKind, d as RuntimeDecisionPoint, e as RuntimeHookContext, f as RuntimeHookErrorContext, a as RuntimeHookEvent, g as RuntimeHookPhase, h as RuntimeHookTarget, i as composeRuntimeHooks, j as defineRuntimeHooks, n as notifyRuntimeDecisionPoint, k as notifyRuntimeHookEvent } from './runtime-hooks-C7JwKb9E.js';
7
9
  import '@tangle-network/sandbox';
8
10
  import '@tangle-network/agent-eval/campaign';
11
+ import '@tangle-network/agent-eval/contract';
9
12
  import './types-p8dWBIXL.js';
10
- import './optimize-prompt-D-urF2wW.js';
11
- import './dynamic-wUgp6UKs.js';
12
- import './kb-gate-D0ZIhFOU.js';
13
- import './profiles.js';
13
+ import './kb-gate-2Gwpz_27.js';
14
+ import './coder-DCWFQpmJ.js';
15
+ import './substrate-CUgk7F7s.js';
16
+ import './driver-C-mtBo7h.js';
14
17
 
15
18
  /**
16
19
  * @stable
@@ -1295,4 +1298,114 @@ declare function readinessServerSentEvent(report: KnowledgeReadinessReport, opti
1295
1298
  /** @stable */
1296
1299
  declare function runtimeStreamServerSentEvent(event: RuntimeStreamEvent, options?: RuntimeTelemetryOptions & ServerSentEventOptions): string;
1297
1300
 
1298
- export { AgentBackendContext, AgentBackendInput, AgentExecutionBackend, AgentRuntimeEvent, AgentTaskRunResult, AgentTaskStatus, type AuthSource, type BackendCallPolicy, BackendTransportError, type ChatStreamEvent, type ChatTurnHooks, type ChatTurnIdentity, type ChatTurnProducer, type ChatTurnResult, type CircuitBreakerConfig, CircuitBreakerState, CircuitOpenError, type Conversation, type ConversationDriveState, type ConversationJournal, type ConversationJournalEntry, type ConversationParticipant, type ConversationPolicy, type ConversationResult, type ConversationStreamEvent, type ConversationTurn, type D1DatabaseLike, type D1StmtLike, DEFAULT_MAX_DEPTH, DEFAULT_ROUTER_BASE_URL, DeadlineExceededError, FORWARD_HEADERS, FileConversationJournal, type ForwardHeaderName, type HaltContext, type HaltPredicate, type HaltReason, type HaltSignal, InMemoryConversationJournal, InMemoryRuntimeSessionStore, type ModelInfo, OpenAIChatTool, OpenAIChatToolChoice, PlannerError, type PropagatedHeaders, type ResolvedChatModel, type RetryBackoff, type RetryableErrorPredicate, type RouterEnv, type RunChatTurnInput, type RunConversationOptions, type RuntimeEventCollector, RuntimeRunStateError, RuntimeSessionStore, RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, type SqlAdapter, SqlConversationJournal, type TurnOrder, applyRunRecordDefaults, buildForwardHeaders, cleanModelId, computeBackoff, createConversationBackend, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, d1ToSqlAdapter, decideKnowledgeReadiness, defaultIsRetryable, defineConversation, deriveExecutionId, getModels, handleChatTurn, isDepthExceeded, makePerAttemptSignal, readDepth, readinessServerSentEvent, resolveChatModel, resolveRouterBaseUrl, runAgentTask, runAgentTaskStream, runConversation, runConversationStream, runtimeStreamServerSentEvent, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, sleep, slugifySpeaker, turnId, validateChatModelId };
1301
+ /**
1302
+ * Bounded turn-level tool-dispatch loop.
1303
+ *
1304
+ * `runAgentTaskStream` runs ONE model turn; `runLoop` orchestrates DELEGATED
1305
+ * multi-agent topologies (refine / fanout-vote). Neither is the everyday
1306
+ * interactive shape: a chat turn where the model may emit tool calls, each is
1307
+ * executed, the results are folded back, and the turn re-runs until the model
1308
+ * stops (or a turn cap). Every agent app hand-rolls that loop — this is it,
1309
+ * as a reusable primitive.
1310
+ *
1311
+ * Substrate-neutral by design: the caller supplies `streamTurn` (wrapping
1312
+ * whatever backend / `runAgentTaskStream` it uses) and `executeToolCall`
1313
+ * (routing to its executors). This module owns the LOOP; the caller owns the
1314
+ * model and the executors. `Raw` (streaming variant) is the caller's own
1315
+ * event type. The only imported contract is the runtime hook type: hooks are
1316
+ * execution-scoped observers, not part of the agent profile.
1317
+ */
1318
+
1319
+ interface ToolLoopCall {
1320
+ toolCallId?: string;
1321
+ toolName: string;
1322
+ args: Record<string, unknown>;
1323
+ }
1324
+ /** Outcome of one tool dispatch — structurally compatible with a hub/integration
1325
+ * tool-outcome union, so callers can fold either through the loop. */
1326
+ type ToolCallOutcome = {
1327
+ ok: true;
1328
+ result: unknown;
1329
+ } | {
1330
+ ok: false;
1331
+ code: string;
1332
+ message: string;
1333
+ status?: number;
1334
+ };
1335
+ type ToolLoopMessage = {
1336
+ role: string;
1337
+ content: string;
1338
+ };
1339
+ type ToolLoopEvent = {
1340
+ type: 'text';
1341
+ text: string;
1342
+ } | {
1343
+ type: 'tool_call';
1344
+ call: ToolLoopCall;
1345
+ } | {
1346
+ type: 'other';
1347
+ event: unknown;
1348
+ };
1349
+ interface ToolLoopResult {
1350
+ finalText: string;
1351
+ toolResults: Array<{
1352
+ call: ToolLoopCall;
1353
+ label: string;
1354
+ outcome: ToolCallOutcome;
1355
+ }>;
1356
+ turns: number;
1357
+ cappedOut: boolean;
1358
+ }
1359
+ interface RunToolLoopOptions {
1360
+ systemPrompt: string;
1361
+ userMessage: string;
1362
+ priorMessages?: ToolLoopMessage[];
1363
+ streamTurn: (messages: ToolLoopMessage[]) => AsyncIterable<ToolLoopEvent>;
1364
+ executeToolCall: (call: ToolLoopCall) => Promise<ToolCallOutcome>;
1365
+ isExecutableTool: (toolName: string) => boolean;
1366
+ maxToolTurns?: number;
1367
+ renderResult?: (label: string, outcome: ToolCallOutcome) => string;
1368
+ labelFor?: (call: ToolLoopCall) => string;
1369
+ runId?: string;
1370
+ scenarioId?: string;
1371
+ hooks?: RuntimeHooks;
1372
+ }
1373
+ /** Run the bounded tool loop and return the final text + every executed tool
1374
+ * outcome. Awaitable — callers needing to stream events to a UI use
1375
+ * {@link streamToolLoop}. */
1376
+ declare function runToolLoop(opts: RunToolLoopOptions): Promise<ToolLoopResult>;
1377
+ type StreamToolLoopYield<Raw> = {
1378
+ kind: 'event';
1379
+ event: Raw;
1380
+ } | {
1381
+ kind: 'tool_result';
1382
+ toolName: string;
1383
+ toolCallId?: string;
1384
+ label: string;
1385
+ outcome: ToolCallOutcome;
1386
+ } | {
1387
+ kind: 'capped';
1388
+ pending: number;
1389
+ };
1390
+ interface StreamToolLoopOptions<Raw> {
1391
+ systemPrompt: string;
1392
+ userMessage: string;
1393
+ priorMessages?: ToolLoopMessage[];
1394
+ streamTurn: (messages: ToolLoopMessage[]) => AsyncIterable<Raw>;
1395
+ extractText: (event: Raw) => string;
1396
+ extractToolCall: (event: Raw) => ToolLoopCall | null;
1397
+ isExecutableTool: (toolName: string) => boolean;
1398
+ executeToolCall: (call: ToolLoopCall) => Promise<ToolCallOutcome>;
1399
+ maxToolTurns?: number;
1400
+ renderResult?: (label: string, outcome: ToolCallOutcome) => string;
1401
+ labelFor?: (call: ToolLoopCall) => string;
1402
+ runId?: string;
1403
+ scenarioId?: string;
1404
+ hooks?: RuntimeHooks;
1405
+ }
1406
+ /** Streaming bounded tool loop: yields each raw turn event (the caller maps +
1407
+ * telemetries + re-emits it) and each executed `tool_result`; emits one
1408
+ * `capped` if it stops at the turn limit with calls still pending. */
1409
+ declare function streamToolLoop<Raw>(opts: StreamToolLoopOptions<Raw>): AsyncGenerator<StreamToolLoopYield<Raw>, void, unknown>;
1410
+
1411
+ export { AgentBackendContext, AgentBackendInput, AgentExecutionBackend, AgentRuntimeEvent, AgentTaskRunResult, AgentTaskStatus, type AuthSource, type BackendCallPolicy, BackendTransportError, type ChatStreamEvent, type ChatTurnHooks, type ChatTurnIdentity, type ChatTurnProducer, type ChatTurnResult, type CircuitBreakerConfig, CircuitBreakerState, CircuitOpenError, type Conversation, type ConversationDriveState, type ConversationJournal, type ConversationJournalEntry, type ConversationParticipant, type ConversationPolicy, type ConversationResult, type ConversationStreamEvent, type ConversationTurn, type D1DatabaseLike, type D1StmtLike, DEFAULT_MAX_DEPTH, DEFAULT_ROUTER_BASE_URL, DeadlineExceededError, FORWARD_HEADERS, FileConversationJournal, type ForwardHeaderName, type HaltContext, type HaltPredicate, type HaltReason, type HaltSignal, InMemoryConversationJournal, InMemoryRuntimeSessionStore, type ModelInfo, OpenAIChatTool, OpenAIChatToolChoice, PlannerError, type PropagatedHeaders, type ResolvedChatModel, type RetryBackoff, type RetryableErrorPredicate, type RouterEnv, type RunChatTurnInput, type RunConversationOptions, type RunToolLoopOptions, type RuntimeEventCollector, RuntimeHooks, RuntimeRunStateError, RuntimeSessionStore, RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, type SqlAdapter, SqlConversationJournal, type StreamToolLoopOptions, type StreamToolLoopYield, type ToolCallOutcome, type ToolLoopCall, type ToolLoopEvent, type ToolLoopMessage, type ToolLoopResult, type TurnOrder, applyRunRecordDefaults, buildForwardHeaders, cleanModelId, computeBackoff, createConversationBackend, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, d1ToSqlAdapter, decideKnowledgeReadiness, defaultIsRetryable, defineConversation, deriveExecutionId, getModels, handleChatTurn, isDepthExceeded, makePerAttemptSignal, readDepth, readinessServerSentEvent, resolveChatModel, resolveRouterBaseUrl, runAgentTask, runAgentTaskStream, runConversation, runConversationStream, runToolLoop, runtimeStreamServerSentEvent, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, sleep, slugifySpeaker, streamToolLoop, turnId, validateChatModelId };