@tangle-network/agent-runtime 0.44.0 → 0.46.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -203
- package/dist/agent.d.ts +3 -2
- package/dist/agent.js +5 -7
- package/dist/agent.js.map +1 -1
- package/dist/analyst-loop.d.ts +28 -2
- package/dist/analyst-loop.js +4 -1
- package/dist/audit.d.ts +93 -0
- package/dist/audit.js +312 -0
- package/dist/audit.js.map +1 -0
- package/dist/chunk-4B6U4CVQ.js +15 -0
- package/dist/chunk-4B6U4CVQ.js.map +1 -0
- package/dist/chunk-65FQLI4V.js +4089 -0
- package/dist/chunk-65FQLI4V.js.map +1 -0
- package/dist/{chunk-GFKVVRQ7.js → chunk-GN75RGM6.js} +13 -12
- package/dist/chunk-GN75RGM6.js.map +1 -0
- package/dist/chunk-GSUO5QS6.js +146 -0
- package/dist/chunk-GSUO5QS6.js.map +1 -0
- package/dist/chunk-HNUXAZIJ.js +580 -0
- package/dist/chunk-HNUXAZIJ.js.map +1 -0
- package/dist/{chunk-SKUZZCHE.js → chunk-I42NHLKX.js} +5 -5
- package/dist/chunk-I42NHLKX.js.map +1 -0
- package/dist/{chunk-HVYOHJHK.js → chunk-JNPK46YH.js} +2 -2
- package/dist/chunk-JNPK46YH.js.map +1 -0
- package/dist/{chunk-3HMHSN22.js → chunk-KADIJAD4.js} +38 -24
- package/dist/chunk-KADIJAD4.js.map +1 -0
- package/dist/{chunk-KDMRUD2P.js → chunk-KPN7OQ64.js} +296 -8
- package/dist/chunk-KPN7OQ64.js.map +1 -0
- package/dist/{chunk-NRZOXCJK.js → chunk-VR4JIC5H.js} +2 -2
- package/dist/chunk-WIR4HOOJ.js +27 -0
- package/dist/chunk-WIR4HOOJ.js.map +1 -0
- package/dist/coder-DCWFQpmJ.d.ts +114 -0
- package/dist/driver-C-mtBo7h.d.ts +221 -0
- package/dist/improvement.d.ts +0 -1
- package/dist/improvement.js +0 -5
- package/dist/improvement.js.map +1 -1
- package/dist/index.d.ts +122 -9
- package/dist/index.js +398 -10
- package/dist/index.js.map +1 -1
- package/dist/{kb-gate-D0ZIhFOU.d.ts → kb-gate-2Gwpz_27.d.ts} +86 -9
- package/dist/{loop-runner-bin-BLMa8He3.d.ts → loop-runner-bin-D-K6bRp3.d.ts} +17 -13
- package/dist/loop-runner-bin.d.ts +8 -6
- package/dist/loop-runner-bin.js +6 -8
- package/dist/loops.d.ts +7 -393
- package/dist/loops.js +96 -27
- package/dist/mcp/bin.js +7 -7
- package/dist/mcp/bin.js.map +1 -1
- package/dist/mcp/index.d.ts +286 -13
- package/dist/mcp/index.js +341 -9
- package/dist/mcp/index.js.map +1 -1
- package/dist/{otel-export-wFDmmurL.d.ts → otel-export-nurzFwuJ.d.ts} +1 -1
- package/dist/profiles.d.ts +385 -86
- package/dist/profiles.js +549 -4
- package/dist/profiles.js.map +1 -1
- package/dist/{run-loop-C4L1Sted.d.ts → run-loop-CU2Y00Si.d.ts} +36 -13
- package/dist/runtime-hooks-C7JwKb9E.d.ts +70 -0
- package/dist/runtime.d.ts +1964 -0
- package/dist/runtime.js +114 -0
- package/dist/runtime.js.map +1 -0
- package/dist/substrate-CUgk7F7s.d.ts +77 -0
- package/dist/topology.d.ts +73 -0
- package/dist/topology.js +111 -0
- package/dist/topology.js.map +1 -0
- package/dist/types-BfoeiQRZ.d.ts +438 -0
- package/dist/{types-DbJzz2uf.d.ts → types-DnYoHvvZ.d.ts} +110 -4
- package/dist/workflow.d.ts +4 -3
- package/dist/workflow.js +4 -5
- package/dist/workflow.js.map +1 -1
- package/package.json +37 -28
- package/skills/agent-runtime-adoption/SKILL.md +32 -29
- package/skills/generate-eval/SKILL.md +60 -0
- package/dist/chunk-3HMHSN22.js.map +0 -1
- package/dist/chunk-GFKVVRQ7.js.map +0 -1
- package/dist/chunk-HVYOHJHK.js.map +0 -1
- package/dist/chunk-KDMRUD2P.js.map +0 -1
- package/dist/chunk-PY6NMZYX.js +0 -52
- package/dist/chunk-PY6NMZYX.js.map +0 -1
- package/dist/chunk-S7JXV32P.js +0 -947
- package/dist/chunk-S7JXV32P.js.map +0 -1
- package/dist/chunk-SKUZZCHE.js.map +0 -1
- package/dist/chunk-SQSCRJ7U.js +0 -65
- package/dist/chunk-SQSCRJ7U.js.map +0 -1
- package/dist/chunk-VOX6Z3II.js +0 -90
- package/dist/chunk-VOX6Z3II.js.map +0 -1
- package/dist/chunk-XBUG326M.js +0 -261
- package/dist/chunk-XBUG326M.js.map +0 -1
- package/dist/dynamic-wUgp6UKs.d.ts +0 -108
- package/dist/optimize-prompt-D-urF2wW.d.ts +0 -129
- /package/dist/{chunk-NRZOXCJK.js.map → chunk-VR4JIC5H.js.map} +0 -0
package/dist/chunk-XBUG326M.js
DELETED
|
@@ -1,261 +0,0 @@
|
|
|
1
|
-
// src/analyst-loop/run-analyst-loop.ts
|
|
2
|
-
import { diffFindings } from "@tangle-network/agent-eval";
|
|
3
|
-
async function runAnalystLoop(opts) {
|
|
4
|
-
const log = opts.log ?? defaultLog;
|
|
5
|
-
const strategy = opts.priorFindingsStrategy ?? "per-kind";
|
|
6
|
-
const emit = makeEmitter(opts.onEvent);
|
|
7
|
-
const startedAt = Date.now();
|
|
8
|
-
const baselineRunId = resolveBaselineRunId(opts);
|
|
9
|
-
const priorAll = baselineRunId ? opts.findingsStore?.loadRun(baselineRunId) ?? [] : [];
|
|
10
|
-
log("baseline resolved", { baselineRunId, prior_findings: priorAll.length });
|
|
11
|
-
await emit({
|
|
12
|
-
type: "baseline-resolved",
|
|
13
|
-
runId: opts.runId,
|
|
14
|
-
baselineRunId,
|
|
15
|
-
priorFindingCount: priorAll.length
|
|
16
|
-
});
|
|
17
|
-
const priorFindings = buildPriorFindingsInput(priorAll, strategy, opts.registry.list());
|
|
18
|
-
const analystResult = await runRegistry(opts, priorFindings, emit);
|
|
19
|
-
log("analyst run complete", {
|
|
20
|
-
findings: analystResult.findings.length,
|
|
21
|
-
cost_usd: analystResult.total_cost_usd,
|
|
22
|
-
per_analyst: analystResult.per_analyst.map((s) => ({
|
|
23
|
-
id: s.analyst_id,
|
|
24
|
-
status: s.status,
|
|
25
|
-
n: s.findings_count
|
|
26
|
-
}))
|
|
27
|
-
});
|
|
28
|
-
if (opts.findingsStore && analystResult.findings.length > 0) {
|
|
29
|
-
await opts.findingsStore.append(opts.runId, analystResult.findings);
|
|
30
|
-
await emit({
|
|
31
|
-
type: "findings-persisted",
|
|
32
|
-
runId: opts.runId,
|
|
33
|
-
count: analystResult.findings.length
|
|
34
|
-
});
|
|
35
|
-
}
|
|
36
|
-
let diff = null;
|
|
37
|
-
if (baselineRunId && analystResult.findings.length > 0) {
|
|
38
|
-
diff = diffFindings(
|
|
39
|
-
priorAll.map((f) => ({ ...f })),
|
|
40
|
-
analystResult.findings.map((f) => ({ ...f, run_id: opts.runId }))
|
|
41
|
-
);
|
|
42
|
-
log("diff vs baseline", {
|
|
43
|
-
appeared: diff.appeared.length,
|
|
44
|
-
disappeared: diff.disappeared.length,
|
|
45
|
-
persisted: diff.persisted.length,
|
|
46
|
-
changed: diff.changed.length
|
|
47
|
-
});
|
|
48
|
-
await emit({
|
|
49
|
-
type: "diff-computed",
|
|
50
|
-
runId: opts.runId,
|
|
51
|
-
baselineRunId,
|
|
52
|
-
appeared: diff.appeared.length,
|
|
53
|
-
disappeared: diff.disappeared.length,
|
|
54
|
-
persisted: diff.persisted.length,
|
|
55
|
-
changed: diff.changed.length
|
|
56
|
-
});
|
|
57
|
-
}
|
|
58
|
-
let knowledge = null;
|
|
59
|
-
if (opts.knowledgeAdapter) {
|
|
60
|
-
knowledge = await runKnowledgeAdapter(opts, analystResult.findings, log, emit);
|
|
61
|
-
}
|
|
62
|
-
let improvement = null;
|
|
63
|
-
if (opts.improvementAdapter) {
|
|
64
|
-
improvement = await runImprovementAdapter(opts, analystResult.findings, log, emit);
|
|
65
|
-
}
|
|
66
|
-
await emit({
|
|
67
|
-
type: "loop-completed",
|
|
68
|
-
runId: opts.runId,
|
|
69
|
-
durationMs: Date.now() - startedAt
|
|
70
|
-
});
|
|
71
|
-
return {
|
|
72
|
-
runId: opts.runId,
|
|
73
|
-
baselineRunId,
|
|
74
|
-
analystResult,
|
|
75
|
-
diff,
|
|
76
|
-
knowledge,
|
|
77
|
-
improvement
|
|
78
|
-
};
|
|
79
|
-
}
|
|
80
|
-
function makeEmitter(onEvent) {
|
|
81
|
-
if (!onEvent) return async () => {
|
|
82
|
-
};
|
|
83
|
-
return async (event) => {
|
|
84
|
-
await onEvent(event);
|
|
85
|
-
};
|
|
86
|
-
}
|
|
87
|
-
async function runRegistry(opts, priorFindings, emit) {
|
|
88
|
-
const reg = opts.registry;
|
|
89
|
-
if (typeof reg.runStream === "function" && opts.onEvent) {
|
|
90
|
-
let final = null;
|
|
91
|
-
for await (const ev of reg.runStream(opts.runId, opts.inputs, { priorFindings })) {
|
|
92
|
-
await emit({ type: "analyst", runId: opts.runId, event: ev });
|
|
93
|
-
if (ev.type === "run-completed") final = ev.result;
|
|
94
|
-
}
|
|
95
|
-
if (!final) {
|
|
96
|
-
throw new Error("runAnalystLoop: registry.runStream ended without run-completed event");
|
|
97
|
-
}
|
|
98
|
-
return final;
|
|
99
|
-
}
|
|
100
|
-
return opts.registry.run(opts.runId, opts.inputs, { priorFindings });
|
|
101
|
-
}
|
|
102
|
-
function resolveBaselineRunId(opts) {
|
|
103
|
-
if (opts.baselineRunId === null) return null;
|
|
104
|
-
if (typeof opts.baselineRunId === "string") return opts.baselineRunId;
|
|
105
|
-
if (!opts.findingsStore) return null;
|
|
106
|
-
const all = opts.findingsStore.loadAll();
|
|
107
|
-
let last = null;
|
|
108
|
-
for (const row of all) {
|
|
109
|
-
if (row.run_id === opts.runId) continue;
|
|
110
|
-
last = row.run_id;
|
|
111
|
-
}
|
|
112
|
-
return last;
|
|
113
|
-
}
|
|
114
|
-
function buildPriorFindingsInput(prior, strategy, registry) {
|
|
115
|
-
if (strategy === "none" || prior.length === 0) return void 0;
|
|
116
|
-
const stripped = prior.map(({ run_id: _run_id, ...rest }) => rest);
|
|
117
|
-
if (strategy === "wildcard") {
|
|
118
|
-
return { "*": stripped };
|
|
119
|
-
}
|
|
120
|
-
void registry;
|
|
121
|
-
return stripped;
|
|
122
|
-
}
|
|
123
|
-
async function runKnowledgeAdapter(opts, findings, log, emit) {
|
|
124
|
-
const adapter = opts.knowledgeAdapter;
|
|
125
|
-
const batch = await adapter.proposeFromFindings(findings);
|
|
126
|
-
log("knowledge.proposeFromFindings", {
|
|
127
|
-
proposals: batch.proposals.length,
|
|
128
|
-
skipped: batch.skipped,
|
|
129
|
-
errors: batch.errors.length
|
|
130
|
-
});
|
|
131
|
-
await emit({
|
|
132
|
-
type: "knowledge-proposed",
|
|
133
|
-
runId: opts.runId,
|
|
134
|
-
proposalCount: batch.proposals.length,
|
|
135
|
-
skipped: batch.skipped,
|
|
136
|
-
errors: batch.errors.length
|
|
137
|
-
});
|
|
138
|
-
const auto = opts.autoApply?.knowledge ?? false;
|
|
139
|
-
const threshold = opts.autoApply?.knowledgeConfidenceThreshold ?? 0.85;
|
|
140
|
-
if (!auto || !adapter.apply) {
|
|
141
|
-
await emit({
|
|
142
|
-
type: "knowledge-applied",
|
|
143
|
-
runId: opts.runId,
|
|
144
|
-
writtenCount: 0,
|
|
145
|
-
withheldForReview: batch.proposals.length
|
|
146
|
-
});
|
|
147
|
-
return {
|
|
148
|
-
proposals: batch.proposals,
|
|
149
|
-
applied: [],
|
|
150
|
-
skipped: batch.skipped,
|
|
151
|
-
errors: batch.errors,
|
|
152
|
-
withheld_for_review: batch.proposals.length
|
|
153
|
-
};
|
|
154
|
-
}
|
|
155
|
-
const findingsById = new Map(findings.map((f) => [f.finding_id, f]));
|
|
156
|
-
const safe = [];
|
|
157
|
-
let withheld = 0;
|
|
158
|
-
for (const p of batch.proposals) {
|
|
159
|
-
const src = p.sourceFindingId ? findingsById.get(p.sourceFindingId) : void 0;
|
|
160
|
-
if (!src) {
|
|
161
|
-
withheld += 1;
|
|
162
|
-
continue;
|
|
163
|
-
}
|
|
164
|
-
if (src.confidence < threshold) {
|
|
165
|
-
withheld += 1;
|
|
166
|
-
continue;
|
|
167
|
-
}
|
|
168
|
-
safe.push(p);
|
|
169
|
-
}
|
|
170
|
-
const result = await adapter.apply(safe);
|
|
171
|
-
log("knowledge.apply", {
|
|
172
|
-
applied: result.written.length,
|
|
173
|
-
withheld_for_review: withheld,
|
|
174
|
-
warnings: result.warnings.length
|
|
175
|
-
});
|
|
176
|
-
await emit({
|
|
177
|
-
type: "knowledge-applied",
|
|
178
|
-
runId: opts.runId,
|
|
179
|
-
writtenCount: result.written.length,
|
|
180
|
-
withheldForReview: withheld
|
|
181
|
-
});
|
|
182
|
-
return {
|
|
183
|
-
proposals: batch.proposals,
|
|
184
|
-
applied: result.written,
|
|
185
|
-
skipped: batch.skipped,
|
|
186
|
-
errors: batch.errors,
|
|
187
|
-
withheld_for_review: withheld
|
|
188
|
-
};
|
|
189
|
-
}
|
|
190
|
-
async function runImprovementAdapter(opts, findings, log, emit) {
|
|
191
|
-
const adapter = opts.improvementAdapter;
|
|
192
|
-
const batch = await adapter.proposeFromFindings(findings);
|
|
193
|
-
log("improvement.proposeFromFindings", {
|
|
194
|
-
edits: batch.edits.length,
|
|
195
|
-
skipped: batch.skipped,
|
|
196
|
-
errors: batch.errors.length
|
|
197
|
-
});
|
|
198
|
-
await emit({
|
|
199
|
-
type: "improvement-proposed",
|
|
200
|
-
runId: opts.runId,
|
|
201
|
-
editCount: batch.edits.length,
|
|
202
|
-
skipped: batch.skipped,
|
|
203
|
-
errors: batch.errors.length
|
|
204
|
-
});
|
|
205
|
-
const auto = opts.autoApply?.improvement ?? false;
|
|
206
|
-
const threshold = opts.autoApply?.improvementConfidenceThreshold ?? 0.9;
|
|
207
|
-
if (!auto || !adapter.apply) {
|
|
208
|
-
await emit({
|
|
209
|
-
type: "improvement-applied",
|
|
210
|
-
runId: opts.runId,
|
|
211
|
-
appliedCount: 0,
|
|
212
|
-
withheldForReview: batch.edits.length
|
|
213
|
-
});
|
|
214
|
-
return {
|
|
215
|
-
edits: batch.edits,
|
|
216
|
-
applied: [],
|
|
217
|
-
skipped: batch.skipped,
|
|
218
|
-
errors: batch.errors,
|
|
219
|
-
withheld_for_review: batch.edits.length
|
|
220
|
-
};
|
|
221
|
-
}
|
|
222
|
-
const findingsById = new Map(findings.map((f) => [f.finding_id, f]));
|
|
223
|
-
const safe = [];
|
|
224
|
-
let withheld = 0;
|
|
225
|
-
for (const e of batch.edits) {
|
|
226
|
-
const src = e.sourceFindingId ? findingsById.get(e.sourceFindingId) : void 0;
|
|
227
|
-
if (!src || src.confidence < threshold) {
|
|
228
|
-
withheld += 1;
|
|
229
|
-
continue;
|
|
230
|
-
}
|
|
231
|
-
safe.push(e);
|
|
232
|
-
}
|
|
233
|
-
const result = await adapter.apply(safe);
|
|
234
|
-
log("improvement.apply", {
|
|
235
|
-
applied: result.applied.length,
|
|
236
|
-
withheld_for_review: withheld,
|
|
237
|
-
warnings: result.warnings.length
|
|
238
|
-
});
|
|
239
|
-
await emit({
|
|
240
|
-
type: "improvement-applied",
|
|
241
|
-
runId: opts.runId,
|
|
242
|
-
appliedCount: result.applied.length,
|
|
243
|
-
withheldForReview: withheld
|
|
244
|
-
});
|
|
245
|
-
return {
|
|
246
|
-
edits: batch.edits,
|
|
247
|
-
applied: result.applied,
|
|
248
|
-
skipped: batch.skipped,
|
|
249
|
-
errors: batch.errors,
|
|
250
|
-
withheld_for_review: withheld
|
|
251
|
-
};
|
|
252
|
-
}
|
|
253
|
-
function defaultLog(msg, fields) {
|
|
254
|
-
if (fields) console.log(`[analyst-loop] ${msg}`, fields);
|
|
255
|
-
else console.log(`[analyst-loop] ${msg}`);
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
export {
|
|
259
|
-
runAnalystLoop
|
|
260
|
-
};
|
|
261
|
-
//# sourceMappingURL=chunk-XBUG326M.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/analyst-loop/run-analyst-loop.ts"],"sourcesContent":["/**\n * `runAnalystLoop` — the one call agent apps reach for to close the\n * recursive-self-improvement loop.\n *\n * 1. Load baseline findings (last run, or the slice the caller specifies)\n * 2. Run the analyst registry with priorFindings injected\n * 3. Persist the new run's findings to the ledger\n * 4. Diff the new run against the baseline\n * 5. Hand the findings to the knowledge adapter → proposals (and\n * optionally apply them) → wiki edits\n * 6. Hand the findings to the improvement adapter → prompt / tool /\n * scaffolding edits (review-only by default)\n * 7. Return a single report the consumer renders / persists / acts on.\n *\n * Adapters are optional: the loop works as a \"run + diff + report\"\n * primitive when no adapters are wired; it closes end-to-end when\n * both adapters are wired.\n */\n\nimport type { AnalystFinding, AnalystRunResult, FindingsDiff } from '@tangle-network/agent-eval'\nimport { diffFindings } from '@tangle-network/agent-eval'\n\nimport type {\n AnalystLoopEvent,\n AnalystRegistryStreamingLike,\n ImprovementReport,\n KnowledgeReport,\n RunAnalystLoopOpts,\n RunAnalystLoopResult,\n} from './types'\n\nexport async function runAnalystLoop<TProposal = unknown, TEdit = unknown>(\n opts: RunAnalystLoopOpts,\n): Promise<RunAnalystLoopResult<TProposal, TEdit>> {\n const log = opts.log ?? defaultLog\n const strategy = opts.priorFindingsStrategy ?? 'per-kind'\n const emit = makeEmitter(opts.onEvent)\n const startedAt = Date.now()\n\n // 1. Resolve baseline + load prior findings.\n const baselineRunId = resolveBaselineRunId(opts)\n const priorAll: ReadonlyArray<AnalystFinding & { run_id: string }> = baselineRunId\n ? (opts.findingsStore?.loadRun(baselineRunId) ?? [])\n : []\n log('baseline resolved', { baselineRunId, prior_findings: priorAll.length })\n await emit({\n type: 'baseline-resolved',\n runId: opts.runId,\n baselineRunId,\n priorFindingCount: priorAll.length,\n })\n\n // 2. Run the registry. Strategy controls how analysts see priors.\n // When the registry exposes runStream, forward each event verbatim\n // so subscribers see per-analyst progress in real time.\n const priorFindings = buildPriorFindingsInput(priorAll, strategy, opts.registry.list())\n const analystResult = await runRegistry(opts, priorFindings, emit)\n log('analyst run complete', {\n findings: analystResult.findings.length,\n cost_usd: analystResult.total_cost_usd,\n per_analyst: analystResult.per_analyst.map((s) => ({\n id: s.analyst_id,\n status: s.status,\n n: s.findings_count,\n })),\n })\n\n // 3. Persist the new run before any side-effecting adapter runs so\n // the ledger is the source of truth even if an adapter throws.\n if (opts.findingsStore && analystResult.findings.length > 0) {\n await opts.findingsStore.append(opts.runId, analystResult.findings)\n await emit({\n type: 'findings-persisted',\n runId: opts.runId,\n count: analystResult.findings.length,\n })\n }\n\n // 4. Diff vs baseline.\n let diff: FindingsDiff | null = null\n if (baselineRunId && analystResult.findings.length > 0) {\n diff = diffFindings(\n priorAll.map((f) => ({ ...f })),\n analystResult.findings.map((f) => ({ ...f, run_id: opts.runId })),\n )\n log('diff vs baseline', {\n appeared: diff.appeared.length,\n disappeared: diff.disappeared.length,\n persisted: diff.persisted.length,\n changed: diff.changed.length,\n })\n await emit({\n type: 'diff-computed',\n runId: opts.runId,\n baselineRunId,\n appeared: diff.appeared.length,\n disappeared: diff.disappeared.length,\n persisted: diff.persisted.length,\n changed: diff.changed.length,\n })\n }\n\n // 5. Knowledge adapter — proposals + optional auto-apply.\n let knowledge: KnowledgeReport<TProposal> | null = null\n if (opts.knowledgeAdapter) {\n knowledge = await runKnowledgeAdapter(opts, analystResult.findings, log, emit)\n }\n\n // 6. Improvement adapter — prompt / tool / scaffolding edits.\n let improvement: ImprovementReport<TEdit> | null = null\n if (opts.improvementAdapter) {\n improvement = await runImprovementAdapter(opts, analystResult.findings, log, emit)\n }\n\n await emit({\n type: 'loop-completed',\n runId: opts.runId,\n durationMs: Date.now() - startedAt,\n })\n\n return {\n runId: opts.runId,\n baselineRunId,\n analystResult,\n diff,\n knowledge,\n improvement,\n }\n}\n\ntype Emitter = (event: AnalystLoopEvent) => Promise<void>\n\nfunction makeEmitter(onEvent: RunAnalystLoopOpts['onEvent']): Emitter {\n if (!onEvent) return async () => {}\n return async (event) => {\n await onEvent(event)\n }\n}\n\nasync function runRegistry(\n opts: RunAnalystLoopOpts,\n priorFindings: ReturnType<typeof buildPriorFindingsInput>,\n emit: Emitter,\n): Promise<AnalystRunResult> {\n const reg = opts.registry as AnalystRegistryStreamingLike\n if (typeof reg.runStream === 'function' && opts.onEvent) {\n let final: AnalystRunResult | null = null\n for await (const ev of reg.runStream(opts.runId, opts.inputs, { priorFindings })) {\n await emit({ type: 'analyst', runId: opts.runId, event: ev })\n if (ev.type === 'run-completed') final = ev.result\n }\n if (!final) {\n throw new Error('runAnalystLoop: registry.runStream ended without run-completed event')\n }\n return final\n }\n return opts.registry.run(opts.runId, opts.inputs, { priorFindings })\n}\n\nfunction resolveBaselineRunId(opts: RunAnalystLoopOpts): string | null {\n if (opts.baselineRunId === null) return null\n if (typeof opts.baselineRunId === 'string') return opts.baselineRunId\n if (!opts.findingsStore) return null\n const all = opts.findingsStore.loadAll()\n let last: string | null = null\n for (const row of all) {\n if (row.run_id === opts.runId) continue\n last = row.run_id\n }\n return last\n}\n\nfunction buildPriorFindingsInput(\n prior: ReadonlyArray<AnalystFinding & { run_id: string }>,\n strategy: 'per-kind' | 'wildcard' | 'none',\n registry: ReadonlyArray<{ id: string }>,\n): ReadonlyArray<AnalystFinding> | Record<string, ReadonlyArray<AnalystFinding>> | undefined {\n if (strategy === 'none' || prior.length === 0) return undefined\n const stripped = prior.map(({ run_id: _run_id, ...rest }) => rest as AnalystFinding)\n if (strategy === 'wildcard') {\n return { '*': stripped }\n }\n void registry\n return stripped\n}\n\nasync function runKnowledgeAdapter<TProposal>(\n opts: RunAnalystLoopOpts,\n findings: ReadonlyArray<AnalystFinding>,\n log: NonNullable<RunAnalystLoopOpts['log']>,\n emit: Emitter,\n): Promise<KnowledgeReport<TProposal>> {\n const adapter = opts.knowledgeAdapter!\n const batch = await adapter.proposeFromFindings(findings)\n log('knowledge.proposeFromFindings', {\n proposals: batch.proposals.length,\n skipped: batch.skipped,\n errors: batch.errors.length,\n })\n await emit({\n type: 'knowledge-proposed',\n runId: opts.runId,\n proposalCount: batch.proposals.length,\n skipped: batch.skipped,\n errors: batch.errors.length,\n })\n\n const auto = opts.autoApply?.knowledge ?? false\n const threshold = opts.autoApply?.knowledgeConfidenceThreshold ?? 0.85\n\n if (!auto || !adapter.apply) {\n await emit({\n type: 'knowledge-applied',\n runId: opts.runId,\n writtenCount: 0,\n withheldForReview: batch.proposals.length,\n })\n return {\n proposals: batch.proposals as TProposal[],\n applied: [],\n skipped: batch.skipped,\n errors: batch.errors,\n withheld_for_review: batch.proposals.length,\n }\n }\n\n const findingsById = new Map(findings.map((f) => [f.finding_id, f]))\n const safe: TProposal[] = []\n let withheld = 0\n for (const p of batch.proposals as Array<TProposal & { sourceFindingId?: string }>) {\n const src = p.sourceFindingId ? findingsById.get(p.sourceFindingId) : undefined\n if (!src) {\n withheld += 1\n continue\n }\n if (src.confidence < threshold) {\n withheld += 1\n continue\n }\n safe.push(p)\n }\n const result = await adapter.apply(safe)\n log('knowledge.apply', {\n applied: result.written.length,\n withheld_for_review: withheld,\n warnings: result.warnings.length,\n })\n await emit({\n type: 'knowledge-applied',\n runId: opts.runId,\n writtenCount: result.written.length,\n withheldForReview: withheld,\n })\n return {\n proposals: batch.proposals as TProposal[],\n applied: result.written,\n skipped: batch.skipped,\n errors: batch.errors,\n withheld_for_review: withheld,\n }\n}\n\nasync function runImprovementAdapter<TEdit>(\n opts: RunAnalystLoopOpts,\n findings: ReadonlyArray<AnalystFinding>,\n log: NonNullable<RunAnalystLoopOpts['log']>,\n emit: Emitter,\n): Promise<ImprovementReport<TEdit>> {\n const adapter = opts.improvementAdapter!\n const batch = await adapter.proposeFromFindings(findings)\n log('improvement.proposeFromFindings', {\n edits: batch.edits.length,\n skipped: batch.skipped,\n errors: batch.errors.length,\n })\n await emit({\n type: 'improvement-proposed',\n runId: opts.runId,\n editCount: batch.edits.length,\n skipped: batch.skipped,\n errors: batch.errors.length,\n })\n\n const auto = opts.autoApply?.improvement ?? false\n const threshold = opts.autoApply?.improvementConfidenceThreshold ?? 0.9\n\n if (!auto || !adapter.apply) {\n await emit({\n type: 'improvement-applied',\n runId: opts.runId,\n appliedCount: 0,\n withheldForReview: batch.edits.length,\n })\n return {\n edits: batch.edits as TEdit[],\n applied: [],\n skipped: batch.skipped,\n errors: batch.errors,\n withheld_for_review: batch.edits.length,\n }\n }\n\n const findingsById = new Map(findings.map((f) => [f.finding_id, f]))\n const safe: TEdit[] = []\n let withheld = 0\n for (const e of batch.edits as Array<TEdit & { sourceFindingId?: string }>) {\n const src = e.sourceFindingId ? findingsById.get(e.sourceFindingId) : undefined\n if (!src || src.confidence < threshold) {\n withheld += 1\n continue\n }\n safe.push(e)\n }\n const result = await adapter.apply(safe)\n log('improvement.apply', {\n applied: result.applied.length,\n withheld_for_review: withheld,\n warnings: result.warnings.length,\n })\n await emit({\n type: 'improvement-applied',\n runId: opts.runId,\n appliedCount: result.applied.length,\n withheldForReview: withheld,\n })\n return {\n edits: batch.edits as TEdit[],\n applied: result.applied,\n skipped: batch.skipped,\n errors: batch.errors,\n withheld_for_review: withheld,\n }\n}\n\nfunction defaultLog(msg: string, fields?: Record<string, unknown>): void {\n if (fields) console.log(`[analyst-loop] ${msg}`, fields)\n else console.log(`[analyst-loop] ${msg}`)\n}\n"],"mappings":";AAoBA,SAAS,oBAAoB;AAW7B,eAAsB,eACpB,MACiD;AACjD,QAAM,MAAM,KAAK,OAAO;AACxB,QAAM,WAAW,KAAK,yBAAyB;AAC/C,QAAM,OAAO,YAAY,KAAK,OAAO;AACrC,QAAM,YAAY,KAAK,IAAI;AAG3B,QAAM,gBAAgB,qBAAqB,IAAI;AAC/C,QAAM,WAA+D,gBAChE,KAAK,eAAe,QAAQ,aAAa,KAAK,CAAC,IAChD,CAAC;AACL,MAAI,qBAAqB,EAAE,eAAe,gBAAgB,SAAS,OAAO,CAAC;AAC3E,QAAM,KAAK;AAAA,IACT,MAAM;AAAA,IACN,OAAO,KAAK;AAAA,IACZ;AAAA,IACA,mBAAmB,SAAS;AAAA,EAC9B,CAAC;AAKD,QAAM,gBAAgB,wBAAwB,UAAU,UAAU,KAAK,SAAS,KAAK,CAAC;AACtF,QAAM,gBAAgB,MAAM,YAAY,MAAM,eAAe,IAAI;AACjE,MAAI,wBAAwB;AAAA,IAC1B,UAAU,cAAc,SAAS;AAAA,IACjC,UAAU,cAAc;AAAA,IACxB,aAAa,cAAc,YAAY,IAAI,CAAC,OAAO;AAAA,MACjD,IAAI,EAAE;AAAA,MACN,QAAQ,EAAE;AAAA,MACV,GAAG,EAAE;AAAA,IACP,EAAE;AAAA,EACJ,CAAC;AAID,MAAI,KAAK,iBAAiB,cAAc,SAAS,SAAS,GAAG;AAC3D,UAAM,KAAK,cAAc,OAAO,KAAK,OAAO,cAAc,QAAQ;AAClE,UAAM,KAAK;AAAA,MACT,MAAM;AAAA,MACN,OAAO,KAAK;AAAA,MACZ,OAAO,cAAc,SAAS;AAAA,IAChC,CAAC;AAAA,EACH;AAGA,MAAI,OAA4B;AAChC,MAAI,iBAAiB,cAAc,SAAS,SAAS,GAAG;AACtD,WAAO;AAAA,MACL,SAAS,IAAI,CAAC,OAAO,EAAE,GAAG,EAAE,EAAE;AAAA,MAC9B,cAAc,SAAS,IAAI,CAAC,OAAO,EAAE,GAAG,GAAG,QAAQ,KAAK,MAAM,EAAE;AAAA,IAClE;AACA,QAAI,oBAAoB;AAAA,MACtB,UAAU,KAAK,SAAS;AAAA,MACxB,aAAa,KAAK,YAAY;AAAA,MAC9B,WAAW,KAAK,UAAU;AAAA,MAC1B,SAAS,KAAK,QAAQ;AAAA,IACxB,CAAC;AACD,UAAM,KAAK;AAAA,MACT,MAAM;AAAA,MACN,OAAO,KAAK;AAAA,MACZ;AAAA,MACA,UAAU,KAAK,SAAS;AAAA,MACxB,aAAa,KAAK,YAAY;AAAA,MAC9B,WAAW,KAAK,UAAU;AAAA,MAC1B,SAAS,KAAK,QAAQ;AAAA,IACxB,CAAC;AAAA,EACH;AAGA,MAAI,YAA+C;AACnD,MAAI,KAAK,kBAAkB;AACzB,gBAAY,MAAM,oBAAoB,MAAM,cAAc,UAAU,KAAK,IAAI;AAAA,EAC/E;AAGA,MAAI,cAA+C;AACnD,MAAI,KAAK,oBAAoB;AAC3B,kBAAc,MAAM,sBAAsB,MAAM,cAAc,UAAU,KAAK,IAAI;AAAA,EACnF;AAEA,QAAM,KAAK;AAAA,IACT,MAAM;AAAA,IACN,OAAO,KAAK;AAAA,IACZ,YAAY,KAAK,IAAI,IAAI;AAAA,EAC3B,CAAC;AAED,SAAO;AAAA,IACL,OAAO,KAAK;AAAA,IACZ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAIA,SAAS,YAAY,SAAiD;AACpE,MAAI,CAAC,QAAS,QAAO,YAAY;AAAA,EAAC;AAClC,SAAO,OAAO,UAAU;AACtB,UAAM,QAAQ,KAAK;AAAA,EACrB;AACF;AAEA,eAAe,YACb,MACA,eACA,MAC2B;AAC3B,QAAM,MAAM,KAAK;AACjB,MAAI,OAAO,IAAI,cAAc,cAAc,KAAK,SAAS;AACvD,QAAI,QAAiC;AACrC,qBAAiB,MAAM,IAAI,UAAU,KAAK,OAAO,KAAK,QAAQ,EAAE,cAAc,CAAC,GAAG;AAChF,YAAM,KAAK,EAAE,MAAM,WAAW,OAAO,KAAK,OAAO,OAAO,GAAG,CAAC;AAC5D,UAAI,GAAG,SAAS,gBAAiB,SAAQ,GAAG;AAAA,IAC9C;AACA,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,sEAAsE;AAAA,IACxF;AACA,WAAO;AAAA,EACT;AACA,SAAO,KAAK,SAAS,IAAI,KAAK,OAAO,KAAK,QAAQ,EAAE,cAAc,CAAC;AACrE;AAEA,SAAS,qBAAqB,MAAyC;AACrE,MAAI,KAAK,kBAAkB,KAAM,QAAO;AACxC,MAAI,OAAO,KAAK,kBAAkB,SAAU,QAAO,KAAK;AACxD,MAAI,CAAC,KAAK,cAAe,QAAO;AAChC,QAAM,MAAM,KAAK,cAAc,QAAQ;AACvC,MAAI,OAAsB;AAC1B,aAAW,OAAO,KAAK;AACrB,QAAI,IAAI,WAAW,KAAK,MAAO;AAC/B,WAAO,IAAI;AAAA,EACb;AACA,SAAO;AACT;AAEA,SAAS,wBACP,OACA,UACA,UAC2F;AAC3F,MAAI,aAAa,UAAU,MAAM,WAAW,EAAG,QAAO;AACtD,QAAM,WAAW,MAAM,IAAI,CAAC,EAAE,QAAQ,SAAS,GAAG,KAAK,MAAM,IAAsB;AACnF,MAAI,aAAa,YAAY;AAC3B,WAAO,EAAE,KAAK,SAAS;AAAA,EACzB;AACA,OAAK;AACL,SAAO;AACT;AAEA,eAAe,oBACb,MACA,UACA,KACA,MACqC;AACrC,QAAM,UAAU,KAAK;AACrB,QAAM,QAAQ,MAAM,QAAQ,oBAAoB,QAAQ;AACxD,MAAI,iCAAiC;AAAA,IACnC,WAAW,MAAM,UAAU;AAAA,IAC3B,SAAS,MAAM;AAAA,IACf,QAAQ,MAAM,OAAO;AAAA,EACvB,CAAC;AACD,QAAM,KAAK;AAAA,IACT,MAAM;AAAA,IACN,OAAO,KAAK;AAAA,IACZ,eAAe,MAAM,UAAU;AAAA,IAC/B,SAAS,MAAM;AAAA,IACf,QAAQ,MAAM,OAAO;AAAA,EACvB,CAAC;AAED,QAAM,OAAO,KAAK,WAAW,aAAa;AAC1C,QAAM,YAAY,KAAK,WAAW,gCAAgC;AAElE,MAAI,CAAC,QAAQ,CAAC,QAAQ,OAAO;AAC3B,UAAM,KAAK;AAAA,MACT,MAAM;AAAA,MACN,OAAO,KAAK;AAAA,MACZ,cAAc;AAAA,MACd,mBAAmB,MAAM,UAAU;AAAA,IACrC,CAAC;AACD,WAAO;AAAA,MACL,WAAW,MAAM;AAAA,MACjB,SAAS,CAAC;AAAA,MACV,SAAS,MAAM;AAAA,MACf,QAAQ,MAAM;AAAA,MACd,qBAAqB,MAAM,UAAU;AAAA,IACvC;AAAA,EACF;AAEA,QAAM,eAAe,IAAI,IAAI,SAAS,IAAI,CAAC,MAAM,CAAC,EAAE,YAAY,CAAC,CAAC,CAAC;AACnE,QAAM,OAAoB,CAAC;AAC3B,MAAI,WAAW;AACf,aAAW,KAAK,MAAM,WAA8D;AAClF,UAAM,MAAM,EAAE,kBAAkB,aAAa,IAAI,EAAE,eAAe,IAAI;AACtE,QAAI,CAAC,KAAK;AACR,kBAAY;AACZ;AAAA,IACF;AACA,QAAI,IAAI,aAAa,WAAW;AAC9B,kBAAY;AACZ;AAAA,IACF;AACA,SAAK,KAAK,CAAC;AAAA,EACb;AACA,QAAM,SAAS,MAAM,QAAQ,MAAM,IAAI;AACvC,MAAI,mBAAmB;AAAA,IACrB,SAAS,OAAO,QAAQ;AAAA,IACxB,qBAAqB;AAAA,IACrB,UAAU,OAAO,SAAS;AAAA,EAC5B,CAAC;AACD,QAAM,KAAK;AAAA,IACT,MAAM;AAAA,IACN,OAAO,KAAK;AAAA,IACZ,cAAc,OAAO,QAAQ;AAAA,IAC7B,mBAAmB;AAAA,EACrB,CAAC;AACD,SAAO;AAAA,IACL,WAAW,MAAM;AAAA,IACjB,SAAS,OAAO;AAAA,IAChB,SAAS,MAAM;AAAA,IACf,QAAQ,MAAM;AAAA,IACd,qBAAqB;AAAA,EACvB;AACF;AAEA,eAAe,sBACb,MACA,UACA,KACA,MACmC;AACnC,QAAM,UAAU,KAAK;AACrB,QAAM,QAAQ,MAAM,QAAQ,oBAAoB,QAAQ;AACxD,MAAI,mCAAmC;AAAA,IACrC,OAAO,MAAM,MAAM;AAAA,IACnB,SAAS,MAAM;AAAA,IACf,QAAQ,MAAM,OAAO;AAAA,EACvB,CAAC;AACD,QAAM,KAAK;AAAA,IACT,MAAM;AAAA,IACN,OAAO,KAAK;AAAA,IACZ,WAAW,MAAM,MAAM;AAAA,IACvB,SAAS,MAAM;AAAA,IACf,QAAQ,MAAM,OAAO;AAAA,EACvB,CAAC;AAED,QAAM,OAAO,KAAK,WAAW,eAAe;AAC5C,QAAM,YAAY,KAAK,WAAW,kCAAkC;AAEpE,MAAI,CAAC,QAAQ,CAAC,QAAQ,OAAO;AAC3B,UAAM,KAAK;AAAA,MACT,MAAM;AAAA,MACN,OAAO,KAAK;AAAA,MACZ,cAAc;AAAA,MACd,mBAAmB,MAAM,MAAM;AAAA,IACjC,CAAC;AACD,WAAO;AAAA,MACL,OAAO,MAAM;AAAA,MACb,SAAS,CAAC;AAAA,MACV,SAAS,MAAM;AAAA,MACf,QAAQ,MAAM;AAAA,MACd,qBAAqB,MAAM,MAAM;AAAA,IACnC;AAAA,EACF;AAEA,QAAM,eAAe,IAAI,IAAI,SAAS,IAAI,CAAC,MAAM,CAAC,EAAE,YAAY,CAAC,CAAC,CAAC;AACnE,QAAM,OAAgB,CAAC;AACvB,MAAI,WAAW;AACf,aAAW,KAAK,MAAM,OAAsD;AAC1E,UAAM,MAAM,EAAE,kBAAkB,aAAa,IAAI,EAAE,eAAe,IAAI;AACtE,QAAI,CAAC,OAAO,IAAI,aAAa,WAAW;AACtC,kBAAY;AACZ;AAAA,IACF;AACA,SAAK,KAAK,CAAC;AAAA,EACb;AACA,QAAM,SAAS,MAAM,QAAQ,MAAM,IAAI;AACvC,MAAI,qBAAqB;AAAA,IACvB,SAAS,OAAO,QAAQ;AAAA,IACxB,qBAAqB;AAAA,IACrB,UAAU,OAAO,SAAS;AAAA,EAC5B,CAAC;AACD,QAAM,KAAK;AAAA,IACT,MAAM;AAAA,IACN,OAAO,KAAK;AAAA,IACZ,cAAc,OAAO,QAAQ;AAAA,IAC7B,mBAAmB;AAAA,EACrB,CAAC;AACD,SAAO;AAAA,IACL,OAAO,MAAM;AAAA,IACb,SAAS,OAAO;AAAA,IAChB,SAAS,MAAM;AAAA,IACf,QAAQ,MAAM;AAAA,IACd,qBAAqB;AAAA,EACvB;AACF;AAEA,SAAS,WAAW,KAAa,QAAwC;AACvE,MAAI,OAAQ,SAAQ,IAAI,kBAAkB,GAAG,IAAI,MAAM;AAAA,MAClD,SAAQ,IAAI,kBAAkB,GAAG,EAAE;AAC1C;","names":[]}
|
|
@@ -1,108 +0,0 @@
|
|
|
1
|
-
import { I as Iteration, D as Driver } from './types-DbJzz2uf.js';
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* @experimental
|
|
5
|
-
*
|
|
6
|
-
* Dynamic driver — the agent authors the loop topology at runtime.
|
|
7
|
-
*
|
|
8
|
-
* Where `refine` and `fanout-vote` encode a fixed shape as a pure function of
|
|
9
|
-
* history, this driver delegates the per-round shape to an injected
|
|
10
|
-
* `TopologyPlanner`. Each round the planner inspects the task + iteration
|
|
11
|
-
* history and emits one `TopologyMove`:
|
|
12
|
-
* - `refine` → one task next round (optionally rewritten from the prior attempt)
|
|
13
|
-
* - `fanout` → N tasks next round (the kernel round-robins `agentRuns`, so a
|
|
14
|
-
* 2-harness fanout dispatches branch 0 to harness A and branch 1 to harness B)
|
|
15
|
-
* - `stop` → terminate; the kernel selects the winner across all iterations
|
|
16
|
-
*
|
|
17
|
-
* The planner is the brain; this driver is the structure. It maps moves onto
|
|
18
|
-
* the kernel's `plan`/`decide` contract, enforces the iteration + fanout caps,
|
|
19
|
-
* and fails loud on a malformed move. The planner is injected exactly like
|
|
20
|
-
* `refine`'s `refineTask` and `fanout-vote`'s `selector` — so a test can drive
|
|
21
|
-
* a deterministic policy through the real kernel, and production can wire it to
|
|
22
|
-
* an LLM via `createSandboxPlanner`.
|
|
23
|
-
*
|
|
24
|
-
* Topology is orthogonal to harness: the planner never names a backend. Which
|
|
25
|
-
* harness runs a branch is decided by the `AgentRunSpec` the kernel round-robins
|
|
26
|
-
* to, so one dynamic driver works across claude-code, codex, opencode, pi —
|
|
27
|
-
* including fanning a single round across several at once.
|
|
28
|
-
*/
|
|
29
|
-
|
|
30
|
-
/** Terminal once `decide` returns `'done'` (a kernel terminal decision). */
|
|
31
|
-
type DynamicDecision = 'continue' | 'done';
|
|
32
|
-
/**
|
|
33
|
-
* One topology decision for the next round. `fanout` carries explicit tasks
|
|
34
|
-
* rather than a count so the planner can issue heterogeneous branches (a
|
|
35
|
-
* different sub-task per harness); pass N copies of one task for a homogeneous
|
|
36
|
-
* fanout that relies on `agentRuns` diversity instead.
|
|
37
|
-
*
|
|
38
|
-
* @experimental
|
|
39
|
-
*/
|
|
40
|
-
type TopologyMove<Task> = {
|
|
41
|
-
kind: 'refine';
|
|
42
|
-
task: Task;
|
|
43
|
-
rationale?: string;
|
|
44
|
-
parentIndex?: number;
|
|
45
|
-
} | {
|
|
46
|
-
kind: 'fanout';
|
|
47
|
-
tasks: Task[];
|
|
48
|
-
rationale?: string;
|
|
49
|
-
parentIndex?: number;
|
|
50
|
-
} | {
|
|
51
|
-
kind: 'stop';
|
|
52
|
-
rationale?: string;
|
|
53
|
-
};
|
|
54
|
-
/** @experimental */
|
|
55
|
-
interface PlannerContext<Task, Output> {
|
|
56
|
-
/** The root task the loop was invoked with — stable across rounds. */
|
|
57
|
-
task: Task;
|
|
58
|
-
/** Every iteration so far, in dispatch order, with outputs + verdicts. */
|
|
59
|
-
history: ReadonlyArray<Iteration<Task, Output>>;
|
|
60
|
-
/** `history.length` — iterations already spent. */
|
|
61
|
-
iterationsSpent: number;
|
|
62
|
-
/** Iterations left before the driver's `maxIterations` cap forces a stop. */
|
|
63
|
-
iterationsRemaining: number;
|
|
64
|
-
}
|
|
65
|
-
/**
|
|
66
|
-
* Chooses the next topology move from the task + history. Sync or async; an
|
|
67
|
-
* async planner is where an LLM call goes (see `createSandboxPlanner`).
|
|
68
|
-
*
|
|
69
|
-
* @experimental
|
|
70
|
-
*/
|
|
71
|
-
type TopologyPlanner<Task, Output> = (ctx: PlannerContext<Task, Output>) => TopologyMove<Task> | Promise<TopologyMove<Task>>;
|
|
72
|
-
/** @experimental */
|
|
73
|
-
interface CreateDynamicDriverOptions<Task, Output> {
|
|
74
|
-
/** The agent-authored topology policy. Invoked once per round in `plan`. */
|
|
75
|
-
planner: TopologyPlanner<Task, Output>;
|
|
76
|
-
/**
|
|
77
|
-
* Hard safety cap on total iterations. When reached, the driver stops before
|
|
78
|
-
* consulting the planner. Default 8. Set the kernel's `runLoop`
|
|
79
|
-
* `maxIterations >= ` this so the driver's cap governs and the loop closes on
|
|
80
|
-
* a clean `'done'` rather than a truncated `'continue'`.
|
|
81
|
-
*/
|
|
82
|
-
maxIterations?: number;
|
|
83
|
-
/** Max branches a single `fanout` move may dispatch. Default 4. */
|
|
84
|
-
maxFanout?: number;
|
|
85
|
-
/** Stable identifier surfaced in trace events. Default `'dynamic'`. */
|
|
86
|
-
name?: string;
|
|
87
|
-
}
|
|
88
|
-
/** @experimental */
|
|
89
|
-
declare function createDynamicDriver<Task, Output>(options: CreateDynamicDriverOptions<Task, Output>): Driver<Task, Output, DynamicDecision>;
|
|
90
|
-
/**
|
|
91
|
-
* Compact, planner-friendly view of iteration history — what an LLM planner
|
|
92
|
-
* needs to choose the next move without the raw event streams. Output is
|
|
93
|
-
* truncated so a long run's prompt stays bounded.
|
|
94
|
-
*
|
|
95
|
-
* @experimental
|
|
96
|
-
*/
|
|
97
|
-
declare function summarizeHistory<Task, Output>(history: ReadonlyArray<Iteration<Task, Output>>, opts?: {
|
|
98
|
-
maxOutputChars?: number;
|
|
99
|
-
}): Array<{
|
|
100
|
-
index: number;
|
|
101
|
-
agentRunName: string;
|
|
102
|
-
valid?: boolean;
|
|
103
|
-
score?: number;
|
|
104
|
-
error?: string;
|
|
105
|
-
output?: string;
|
|
106
|
-
}>;
|
|
107
|
-
|
|
108
|
-
export { type CreateDynamicDriverOptions as C, type DynamicDecision as D, type PlannerContext as P, type TopologyPlanner as T, type TopologyMove as a, createDynamicDriver as c, summarizeHistory as s };
|
|
@@ -1,129 +0,0 @@
|
|
|
1
|
-
import { LlmClientOptions } from '@tangle-network/agent-eval';
|
|
2
|
-
import { Scenario, DispatchContext, JudgeConfig, ImprovementDriver, Gate, CampaignStorage, GateResult, RunImprovementLoopResult } from '@tangle-network/agent-eval/campaign';
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* @experimental
|
|
6
|
-
*
|
|
7
|
-
* `optimizePrompt` — identity-gated optimization for any TEXT prompt surface
|
|
8
|
-
* (system prompt, planner prompt, judge rubric, skill doc).
|
|
9
|
-
*
|
|
10
|
-
* The text-surface sibling to this module's `improvementDriver` (the
|
|
11
|
-
* CODE-surface / worktree path). Both feed agent-eval's `runImprovementLoop`;
|
|
12
|
-
* this one defaults the driver to agent-eval's `gepaDriver` (reflective text
|
|
13
|
-
* mutator) and the gate to `heldOutGate`.
|
|
14
|
-
*
|
|
15
|
-
* IDENTITY-GATED BY CONSTRUCTION — the whole point. The loop runs evals,
|
|
16
|
-
* collects per-scenario signal, proposes candidates, and the gate compares
|
|
17
|
-
* candidate-vs-baseline ON THE HELDOUT. `result.prompt` is the baseline
|
|
18
|
-
* (identity) UNLESS the gate decided `'ship'`. So wiring a surface up is safe:
|
|
19
|
-
* a surface with no beneficial mutation simply keeps its baseline. You never
|
|
20
|
-
* regress by registering a prompt — you only ever improve when the held-out
|
|
21
|
-
* data earns it.
|
|
22
|
-
*
|
|
23
|
-
* Generic over the runtime: `runWithPrompt` is the only domain seam — given a
|
|
24
|
-
* candidate prompt + scenario, run it however the surface runs (sandbox
|
|
25
|
-
* `streamPrompt`, a `runLoop`, a direct model call) and return the artifact the
|
|
26
|
-
* judges score. The optimizer never assumes how a prompt is executed.
|
|
27
|
-
*/
|
|
28
|
-
|
|
29
|
-
/** Reflection config for the default `gepaDriver`. Omit when passing a custom
|
|
30
|
-
* `driver`. */
|
|
31
|
-
interface OptimizePromptReflection {
|
|
32
|
-
/** Router transport for the reflection model. */
|
|
33
|
-
llm: LlmClientOptions;
|
|
34
|
-
/** Model that performs the reflective rewrite. */
|
|
35
|
-
model: string;
|
|
36
|
-
/** What is being optimized — orients the reflection prompt. Default
|
|
37
|
-
* `'system prompt'`. */
|
|
38
|
-
target?: string;
|
|
39
|
-
/** Surface-specific mutation levers offered to the reflector. */
|
|
40
|
-
mutationPrimitives?: string[];
|
|
41
|
-
/** H2 (`## Foo`) headings that MUST survive every candidate. gepaDriver's
|
|
42
|
-
* only structural guard — load-bearing sections of the prompt should be
|
|
43
|
-
* `##` headings so a rewrite cannot drop them. */
|
|
44
|
-
preserveSections?: string[];
|
|
45
|
-
/** Max sentence-level edits per candidate vs the parent (a textual learning
|
|
46
|
-
* rate). Caps a rewrite from wiping prior rules in one generation. */
|
|
47
|
-
maxSentenceEdits?: number;
|
|
48
|
-
}
|
|
49
|
-
/** @experimental */
|
|
50
|
-
interface OptimizePromptOptions<TScenario extends Scenario, TArtifact> {
|
|
51
|
-
/** The prompt being optimized — the identity baseline the gate protects. */
|
|
52
|
-
baselinePrompt: string;
|
|
53
|
-
/** Domain seam: run a candidate prompt against a scenario → artifact the
|
|
54
|
-
* judges score. The optimizer is agnostic to HOW the prompt runs. */
|
|
55
|
-
runWithPrompt: (prompt: string, scenario: TScenario, ctx: DispatchContext) => Promise<TArtifact>;
|
|
56
|
-
/** Training pool — scored each generation to rank candidates. */
|
|
57
|
-
scenarios: TScenario[];
|
|
58
|
-
/** Held out of training — scored ONLY for the gate's baseline-vs-winner
|
|
59
|
-
* delta. Disjoint from `scenarios`; this is what makes promotion measure
|
|
60
|
-
* generalization, not memorization. */
|
|
61
|
-
holdoutScenarios: TScenario[];
|
|
62
|
-
/** Scorers — deterministic checks or LLM judges. */
|
|
63
|
-
judges: JudgeConfig<TArtifact, TScenario>[];
|
|
64
|
-
/** Where artifacts + traces land (opaque key under in-memory storage). */
|
|
65
|
-
runDir: string;
|
|
66
|
-
/** Default driver = `gepaDriver` built from this. Required UNLESS `driver`
|
|
67
|
-
* is supplied. */
|
|
68
|
-
reflection?: OptimizePromptReflection;
|
|
69
|
-
/** Override the improvement strategy (custom driver / deterministic tests). */
|
|
70
|
-
driver?: ImprovementDriver;
|
|
71
|
-
/** Override the promotion gate. Default `heldOutGate` over `holdoutScenarios`
|
|
72
|
-
* — zero extra LLM. Wrap `defaultProductionGate` for red-team/reward-hacking
|
|
73
|
-
* hardening on production wiring. */
|
|
74
|
-
gate?: Gate<TArtifact, TScenario>;
|
|
75
|
-
/** Minimum held-out composite lift to ship, forwarded to the default
|
|
76
|
-
* `heldOutGate`. When omitted the gate uses its own default. */
|
|
77
|
-
deltaThreshold?: number;
|
|
78
|
-
/** Candidates proposed per generation. Default 4. */
|
|
79
|
-
populationSize?: number;
|
|
80
|
-
/** Generations to run. Default 3. */
|
|
81
|
-
maxGenerations?: number;
|
|
82
|
-
/** Candidates carried to the next generation. Default 2. */
|
|
83
|
-
promoteTopK?: number;
|
|
84
|
-
/** Storage backend. Pass `inMemoryCampaignStorage()` for filesystem-less /
|
|
85
|
-
* test runs. Default: Node filesystem. */
|
|
86
|
-
storage?: CampaignStorage;
|
|
87
|
-
/** Reproducibility seed. Default 42. */
|
|
88
|
-
seed?: number;
|
|
89
|
-
/** Per-scenario replicates for CI bands. Default 1. */
|
|
90
|
-
reps?: number;
|
|
91
|
-
/** Max concurrent cells. Default 2. */
|
|
92
|
-
maxConcurrency?: number;
|
|
93
|
-
/** Test seam — override the wall clock. */
|
|
94
|
-
now?: () => Date;
|
|
95
|
-
/** On a shipped gate: `'pr'` opens a PR, `'none'` just reports. Default
|
|
96
|
-
* `'none'`. */
|
|
97
|
-
autoOnPromote?: 'pr' | 'none';
|
|
98
|
-
ghOwner?: string;
|
|
99
|
-
ghRepo?: string;
|
|
100
|
-
}
|
|
101
|
-
/** @experimental */
|
|
102
|
-
interface OptimizePromptResult<TArtifact, TScenario extends Scenario> {
|
|
103
|
-
/** The prompt to USE. Identity (the baseline) unless the gate shipped a
|
|
104
|
-
* winner — so a caller can always assign `result.prompt` unconditionally. */
|
|
105
|
-
prompt: string;
|
|
106
|
-
/** True only when the gate promoted a candidate over baseline on holdout. */
|
|
107
|
-
improved: boolean;
|
|
108
|
-
/** The gate's verdict (`'ship' | 'hold' | 'need_more_work' | ...`). */
|
|
109
|
-
decision: GateResult['decision'];
|
|
110
|
-
/** Human-readable reasons the gate gave. */
|
|
111
|
-
reasons: string[];
|
|
112
|
-
/** Mean held-out composite of the baseline. */
|
|
113
|
-
baselineComposite: number;
|
|
114
|
-
/** Mean held-out composite of the winner candidate. */
|
|
115
|
-
winnerComposite: number;
|
|
116
|
-
/** Held-out lift (winner − baseline); the gate's `delta` when it reported one. */
|
|
117
|
-
delta: number;
|
|
118
|
-
/** Why the winner was proposed — present when a shipped winner carried a
|
|
119
|
-
* driver rationale. */
|
|
120
|
-
rationale?: string;
|
|
121
|
-
/** Unified baseline→winner diff (empty when the winner is the baseline). */
|
|
122
|
-
diff: string;
|
|
123
|
-
/** The full loop result for callers that need generations / campaigns. */
|
|
124
|
-
raw: RunImprovementLoopResult<TArtifact, TScenario>;
|
|
125
|
-
}
|
|
126
|
-
/** @experimental */
|
|
127
|
-
declare function optimizePrompt<TScenario extends Scenario, TArtifact>(opts: OptimizePromptOptions<TScenario, TArtifact>): Promise<OptimizePromptResult<TArtifact, TScenario>>;
|
|
128
|
-
|
|
129
|
-
export { type OptimizePromptOptions as O, type OptimizePromptResult as a, type OptimizePromptReflection as b, optimizePrompt as o };
|
|
File without changes
|