synergyspec-selfevolving 1.4.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -18
- package/dist/commands/learn.d.ts +12 -1
- package/dist/commands/learn.js +158 -11
- package/dist/commands/self-evolution-episode.d.ts +177 -0
- package/dist/commands/self-evolution-episode.js +431 -0
- package/dist/commands/self-evolution.d.ts +12 -190
- package/dist/commands/self-evolution.js +114 -866
- package/dist/core/archive.d.ts +0 -1
- package/dist/core/archive.js +0 -58
- package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
- package/dist/core/artifact-graph/instruction-loader.js +3 -31
- package/dist/core/fitness/loss.d.ts +5 -5
- package/dist/core/fitness/loss.js +4 -4
- package/dist/core/fitness/test-failures.js +10 -2
- package/dist/core/project-config.d.ts +19 -0
- package/dist/core/project-config.js +96 -0
- package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
- package/dist/core/self-evolution/candidate-fitness.js +31 -5
- package/dist/core/self-evolution/candidates.d.ts +0 -9
- package/dist/core/self-evolution/critic-agent.d.ts +192 -0
- package/dist/core/self-evolution/critic-agent.js +568 -0
- package/dist/core/self-evolution/edits-contract.d.ts +53 -0
- package/dist/core/self-evolution/edits-contract.js +89 -0
- package/dist/core/self-evolution/episode-orchestrator.d.ts +234 -0
- package/dist/core/self-evolution/episode-orchestrator.js +681 -0
- package/dist/core/self-evolution/episode-store.d.ts +266 -0
- package/dist/core/self-evolution/episode-store.js +573 -0
- package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
- package/dist/core/self-evolution/evolution-switches.js +5 -10
- package/dist/core/self-evolution/evolving-agent.d.ts +208 -0
- package/dist/core/self-evolution/evolving-agent.js +535 -0
- package/dist/core/self-evolution/host-harness.d.ts +14 -15
- package/dist/core/self-evolution/host-harness.js +48 -23
- package/dist/core/self-evolution/index.d.ts +11 -6
- package/dist/core/self-evolution/index.js +20 -6
- package/dist/core/self-evolution/line-diff.d.ts +60 -0
- package/dist/core/self-evolution/line-diff.js +130 -0
- package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
- package/dist/core/self-evolution/policy/fs-safe.js +89 -0
- package/dist/core/self-evolution/policy/index.d.ts +13 -0
- package/dist/core/self-evolution/policy/index.js +13 -0
- package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
- package/dist/core/self-evolution/policy/policy-store.js +774 -0
- package/dist/core/self-evolution/policy/prediction-reconcile.d.ts +54 -0
- package/dist/core/self-evolution/policy/prediction-reconcile.js +191 -0
- package/dist/core/self-evolution/policy/reject-buffer.d.ts +55 -0
- package/dist/core/self-evolution/policy/reject-buffer.js +170 -0
- package/dist/core/self-evolution/promote.d.ts +1 -1
- package/dist/core/self-evolution/promote.js +6 -33
- package/dist/core/self-evolution/promotion.js +1 -2
- package/dist/core/self-evolution/reward-agent.d.ts +379 -0
- package/dist/core/self-evolution/reward-agent.js +940 -0
- package/dist/core/self-evolution/reward-aggregator.d.ts +59 -0
- package/dist/core/self-evolution/reward-aggregator.js +262 -0
- package/dist/core/self-evolution/scope-gate.d.ts +66 -0
- package/dist/core/self-evolution/scope-gate.js +107 -0
- package/dist/core/self-evolution/success-channel.js +2 -2
- package/dist/core/self-evolution/tamper-check.d.ts +24 -0
- package/dist/core/self-evolution/tamper-check.js +236 -0
- package/dist/core/self-evolution/tool-evolution.js +2 -13
- package/dist/core/self-evolution/verdict.d.ts +8 -5
- package/dist/core/self-evolution/verdict.js +4 -7
- package/dist/core/templates/workflows/gen-tests.js +1 -1
- package/dist/core/templates/workflows/learn.d.ts +3 -2
- package/dist/core/templates/workflows/learn.js +21 -18
- package/dist/core/templates/workflows/self-evolving.d.ts +6 -4
- package/dist/core/templates/workflows/self-evolving.js +62 -172
- package/dist/core/trajectory/scrub.d.ts +27 -0
- package/dist/core/trajectory/scrub.js +79 -0
- package/dist/core/trajectory/skeleton.d.ts +27 -1
- package/dist/core/trajectory/skeleton.js +152 -8
- package/dist/dashboard/data.d.ts +25 -51
- package/dist/dashboard/data.js +68 -180
- package/dist/dashboard/react-client.js +458 -503
- package/dist/dashboard/react-styles.js +3 -3
- package/dist/dashboard/server.js +23 -17
- package/dist/ui/ascii-patterns.d.ts +7 -15
- package/dist/ui/ascii-patterns.js +123 -54
- package/dist/ui/welcome-screen.d.ts +0 -14
- package/dist/ui/welcome-screen.js +16 -35
- package/package.json +1 -1
- package/dist/core/self-evolution/ga-selection.d.ts +0 -94
- package/dist/core/self-evolution/ga-selection.js +0 -153
- package/dist/core/self-evolution/proposer-agent.d.ts +0 -182
- package/dist/core/self-evolution/proposer-agent.js +0 -326
- package/dist/core/self-evolution/replay-runner.d.ts +0 -100
- package/dist/core/self-evolution/replay-runner.js +0 -170
- package/dist/core/self-evolution/replay.d.ts +0 -45
- package/dist/core/self-evolution/replay.js +0 -56
- package/dist/core/self-evolution/template-variants.d.ts +0 -62
- package/dist/core/self-evolution/template-variants.js +0 -171
- package/dist/core/self-evolution/trajectory.d.ts +0 -65
- package/dist/core/self-evolution/trajectory.js +0 -185
|
@@ -0,0 +1,568 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CRITIC AGENT(基线智能体 baseline agent)runner — loop v2 (self-evolution as
|
|
3
|
+
* in-context RL).
|
|
4
|
+
*
|
|
5
|
+
* The CRITIC AGENT is an AGENT with the SAME input/output as the 主智能体 MAIN
|
|
6
|
+
* AGENT (frozen actor; the user's host agent running the current 策略 policy
|
|
7
|
+
* vN+1). It produces the baseline arm for LAST episode's 策略 policy vN on the
|
|
8
|
+
* SAME change in an ISOLATED worktree (by default RE-DOING the change under vN —
|
|
9
|
+
* see {@link CriticBaselineMode}), so the 奖励智能体 REWARD AGENT can later 算分
|
|
10
|
+
* calculate reward(主臂)&reward(基线臂) and advantage = reward(主臂) −
|
|
11
|
+
* reward(基线臂). Only its baseline trajectory survives — 产物即弃 (worktree
|
|
12
|
+
* artifacts discarded): the worktree is torn down in `finally`, and the single
|
|
13
|
+
* durable output is the `baseline-arm/` capture in the episode store.
|
|
14
|
+
*
|
|
15
|
+
* This module orchestrates ONE baseline arm:
|
|
16
|
+
* 1. create an isolated worktree OUTSIDE the repo (git worktree at detached
|
|
17
|
+
* HEAD — which excludes the change's still-uncommitted implementation — else
|
|
18
|
+
* a recursive file copy fallback),
|
|
19
|
+
* 2. make it runnable (node_modules junction/symlink + the untracked surfaces
|
|
20
|
+
* the rerun reads),
|
|
21
|
+
* 2b. ('re-do' mode, the default) reset the copied change dir to its INPUTS —
|
|
22
|
+
* remove the GENERATED artifacts (design.md, tasks.md) + reports — so the
|
|
23
|
+
* rerun re-authors them under the installed prior policy ({@link
|
|
24
|
+
* resetChangeArtifactsForRedo}),
|
|
25
|
+
* 3. INSTALL 策略 policy vN into the worktree from the byte-for-byte version
|
|
26
|
+
* snapshot, so the baseline arm reruns the PRIOR policy and not the live
|
|
27
|
+
* templates,
|
|
28
|
+
* 4. rerun headlessly via the host-aware {@link runHeadlessAgent} with
|
|
29
|
+
* cwd = worktree ('re-do' regenerates design→tasks→impl→tests; 're-test'
|
|
30
|
+
* re-runs the existing change's tests), never editing canonical files,
|
|
31
|
+
* 5. persist the baseline arm (stdout always; the claude session transcript +
|
|
32
|
+
* action skeleton when discoverable; an `objective.json` shaped IDENTICALLY
|
|
33
|
+
* to the main arm's), and
|
|
34
|
+
* 6. ALWAYS tear the worktree down.
|
|
35
|
+
*
|
|
36
|
+
* Honesty contract: a pass rate is parse-or-throw — when the rerun's stdout
|
|
37
|
+
* carries no parseable test summary the objective records `passRate: null`
|
|
38
|
+
* rather than fabricating one. The agent is RUN, never asked to edit; the prompt
|
|
39
|
+
* strips every arm/candidate word.
|
|
40
|
+
*/
|
|
41
|
+
import { spawn as nodeSpawn } from 'node:child_process';
|
|
42
|
+
import { promises as fs } from 'node:fs';
|
|
43
|
+
import * as os from 'node:os';
|
|
44
|
+
import * as path from 'node:path';
|
|
45
|
+
import { parseTestMetrics, computePerChangeLoss, measureHealthPenalty, resolveMetricSource, } from '../fitness/index.js';
|
|
46
|
+
import { readProjectConfig } from '../project-config.js';
|
|
47
|
+
import { claudeProjectsDir } from '../learn/trajectory-discovery.js';
|
|
48
|
+
import { claudeSourceFactory } from '../trajectory/adapters/claude.js';
|
|
49
|
+
import { toActionSkeleton } from '../trajectory/skeleton.js';
|
|
50
|
+
import { runHeadlessAgent } from './host-harness.js';
|
|
51
|
+
import { currentPolicyVersion, readPolicyLedger, readPolicySnapshotFiles, } from './policy/index.js';
|
|
52
|
+
import { advanceEpisodeStage, writeArmCapture } from './episode-store.js';
|
|
53
|
+
/** Error thrown when the worktree could not be created (git AND copy fallback failed). */
|
|
54
|
+
export class CriticWorktreeError extends Error {
|
|
55
|
+
constructor(message) {
|
|
56
|
+
super(`critic worktree failed: ${message}`);
|
|
57
|
+
this.name = 'CriticWorktreeError';
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Decide whether the CRITIC AGENT(基线智能体 baseline agent)should run for the
|
|
62
|
+
* NEXT episode.
|
|
63
|
+
*
|
|
64
|
+
* Skip (`run: false`) when:
|
|
65
|
+
* - the 单一血统 single lineage has < 2 versions — there is no PRIOR policy to
|
|
66
|
+
* rerun (v0 is the only point; the 主智能体 MAIN AGENT IS v0), OR
|
|
67
|
+
* - the head 版本账本 ledger entry's action is 'refused' — the 演进智能体
|
|
68
|
+
* EVOLVING AGENT refused last episode, so vN+1 ≡ vN and rerunning the
|
|
69
|
+
* baseline would compare a policy against ITSELF (no advantage to measure).
|
|
70
|
+
*
|
|
71
|
+
* Otherwise run, rerunning the head version (vN, the policy the LAST episode
|
|
72
|
+
* settled on, which the current 主智能体 MAIN AGENT also runs as vN+1 unless an
|
|
73
|
+
* evolve happened — the comparison the 奖励智能体 REWARD AGENT scores).
|
|
74
|
+
*
|
|
75
|
+
* Pure read of the ledger via {@link readPolicyLedger}/{@link currentPolicyVersion};
|
|
76
|
+
* this function NEVER writes episode state. The skip path's
|
|
77
|
+
* {@link advanceEpisodeStage} to 'baseline-skipped' is the CALLER's job.
|
|
78
|
+
*/
|
|
79
|
+
export async function shouldRunCriticAgent(opts) {
|
|
80
|
+
const repoRoot = path.resolve(opts.repoRoot);
|
|
81
|
+
const ledger = await readPolicyLedger(repoRoot, opts.targetId);
|
|
82
|
+
if (ledger.length === 0) {
|
|
83
|
+
return {
|
|
84
|
+
run: false,
|
|
85
|
+
reason: `policy lineage for ${opts.targetId} is not initialized (no versions to rerun)`,
|
|
86
|
+
baselineVersion: null,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
// A lineage with a single distinct version (only v0) has no PRIOR policy to
|
|
90
|
+
// compare against. The lineage head is monotonic, so "< 2 versions" is "head
|
|
91
|
+
// version is 0" — 'init' alone, or 'init' followed only by 'refused' entries
|
|
92
|
+
// (refused does not bump the version).
|
|
93
|
+
const head = ledger[ledger.length - 1];
|
|
94
|
+
const baselineVersion = await currentPolicyVersion(repoRoot, opts.targetId);
|
|
95
|
+
if (baselineVersion === null || baselineVersion < 1) {
|
|
96
|
+
return {
|
|
97
|
+
run: false,
|
|
98
|
+
reason: `policy lineage for ${opts.targetId} has < 2 versions (head v${baselineVersion ?? 0}); no prior policy to rerun`,
|
|
99
|
+
baselineVersion: null,
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
if (head.action === 'refused') {
|
|
103
|
+
return {
|
|
104
|
+
run: false,
|
|
105
|
+
reason: `last episode refused to evolve ${opts.targetId} (vN+1 ≡ vN); rerunning the baseline would compare a policy against itself`,
|
|
106
|
+
baselineVersion: null,
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
return {
|
|
110
|
+
run: true,
|
|
111
|
+
reason: `policy lineage for ${opts.targetId} head v${baselineVersion} (last action '${head.action}'); rerunning the baseline arm`,
|
|
112
|
+
baselineVersion,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
export function assembleCriticPrompt(changeName, mode = 're-do') {
|
|
116
|
+
if (mode === 're-test') {
|
|
117
|
+
return [
|
|
118
|
+
`You are RE-RUNNING an existing SynergySpec change end-to-end to measure its`,
|
|
119
|
+
`test outcome under the artifact templates already installed in your working`,
|
|
120
|
+
`directory. This is a measurement run only — do NOT modify any canonical`,
|
|
121
|
+
`workflow prompt, artifact template, or schema, and do NOT edit the frozen`,
|
|
122
|
+
`gen-test/run-test oracle.`,
|
|
123
|
+
``,
|
|
124
|
+
`Change name: ${changeName}`,
|
|
125
|
+
``,
|
|
126
|
+
`Run the change's tests (apply → gen-test → run-test) and output the test`,
|
|
127
|
+
`runner's SUMMARY LINE verbatim as the final line of your response, e.g.`,
|
|
128
|
+
`"Tests 12 passed | 1 failed (13)" or "5 passed, 0 failed in 0.4s".`,
|
|
129
|
+
].join('\n');
|
|
130
|
+
}
|
|
131
|
+
// 're-do' — regenerate the change end-to-end so the installed prior-policy
|
|
132
|
+
// design template is actually exercised.
|
|
133
|
+
return [
|
|
134
|
+
`You are RE-DOING an existing SynergySpec change from scratch under the`,
|
|
135
|
+
`artifact templates currently installed in your working directory, to measure`,
|
|
136
|
+
`the test outcome those templates produce. This is a measurement run.`,
|
|
137
|
+
``,
|
|
138
|
+
`Change name: ${changeName}`,
|
|
139
|
+
``,
|
|
140
|
+
`The change's INPUT artifacts (proposal.md, usecases.md, specs/) are present.`,
|
|
141
|
+
`Its design.md and tasks.md have been intentionally REMOVED so you regenerate`,
|
|
142
|
+
`them under the installed templates. Re-create the change end-to-end:`,
|
|
143
|
+
``,
|
|
144
|
+
`1. Regenerate the design — run`,
|
|
145
|
+
` synergyspec-selfevolving instructions design --change "${changeName}" --json`,
|
|
146
|
+
` read the returned template + dependency files (proposal.md, usecases.md),`,
|
|
147
|
+
` and author design.md using that template as the structure.`,
|
|
148
|
+
`2. Regenerate the tasks the same way`,
|
|
149
|
+
` (synergyspec-selfevolving instructions tasks --change "${changeName}" --json),`,
|
|
150
|
+
` then apply them — implement the code each task requires.`,
|
|
151
|
+
`3. Generate the change's tests (gen-test), then run the test runner (run-test).`,
|
|
152
|
+
``,
|
|
153
|
+
`Do NOT modify any canonical workflow prompt, artifact TEMPLATE, or schema, and`,
|
|
154
|
+
`do NOT edit the frozen gen-test/run-test oracle. Write ONLY the change's own`,
|
|
155
|
+
`artifacts (design.md, tasks.md in the change dir) and the implementation`,
|
|
156
|
+
`source the tasks require.`,
|
|
157
|
+
``,
|
|
158
|
+
`Output the test runner's SUMMARY LINE verbatim as the final line of your`,
|
|
159
|
+
`response, e.g. "Tests 12 passed | 1 failed (13)" or "5 passed, 0 failed in 0.4s".`,
|
|
160
|
+
].join('\n');
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Generated artifacts a 're-do' baseline removes from the copied change dir
|
|
164
|
+
* before the rerun, so the agent re-authors them under the installed prior
|
|
165
|
+
* policy. design.md + tasks.md are the policy-shaped chain; the report files are
|
|
166
|
+
* post-implementation residue that would otherwise read the change as already
|
|
167
|
+
* applied (status keys doneness off file existence). The INPUT artifacts
|
|
168
|
+
* (proposal.md, usecases.md, specs/) — which define "the same task" — are KEPT.
|
|
169
|
+
*/
|
|
170
|
+
const REDO_REGENERATED_ARTIFACTS = [
|
|
171
|
+
'design.md',
|
|
172
|
+
'tasks.md',
|
|
173
|
+
'test-report.md',
|
|
174
|
+
'test-plan.md',
|
|
175
|
+
'spec-tests.md',
|
|
176
|
+
'spec-blast-radius.md',
|
|
177
|
+
'verification-report.md',
|
|
178
|
+
];
|
|
179
|
+
/**
|
|
180
|
+
* Reset a copied change dir to its inputs for a 're-do' baseline (see
|
|
181
|
+
* {@link REDO_REGENERATED_ARTIFACTS}). Best-effort: a missing artifact is fine.
|
|
182
|
+
*/
|
|
183
|
+
async function resetChangeArtifactsForRedo(changeDir) {
|
|
184
|
+
for (const rel of REDO_REGENERATED_ARTIFACTS) {
|
|
185
|
+
await fs.rm(path.join(changeDir, rel), { force: true }).catch(() => { });
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
const NODE_MODULES = 'node_modules';
|
|
189
|
+
const CONFIG_DIR = '.synergyspec-selfevolving';
|
|
190
|
+
const SCHEMAS_REL = path.join('synergyspec-selfevolving', 'schemas');
|
|
191
|
+
/**
|
|
192
|
+
* Run the CRITIC AGENT(基线智能体 baseline agent)'s full baseline arm and
|
|
193
|
+
* persist its capture. ALWAYS tears the worktree down (产物即弃). On success it
|
|
194
|
+
* advances the episode to 'baseline-arm-captured' (patch
|
|
195
|
+
* `{policyVersionBaseline}`). The SKIP path is the caller's job (see
|
|
196
|
+
* {@link shouldRunCriticAgent}).
|
|
197
|
+
*/
|
|
198
|
+
export async function runCriticAgent(opts) {
|
|
199
|
+
const repoRoot = path.resolve(opts.repoRoot);
|
|
200
|
+
const spawnImpl = opts.spawn ?? nodeSpawn;
|
|
201
|
+
const timeoutMs = opts.timeoutMs ?? 600000;
|
|
202
|
+
const homeDir = opts.homeDir ?? os.homedir();
|
|
203
|
+
const baselineMode = opts.baselineMode ?? 're-do';
|
|
204
|
+
if (!Number.isInteger(opts.baselineVersion) || opts.baselineVersion < 0) {
|
|
205
|
+
throw new Error(`runCriticAgent requires a non-negative integer baselineVersion, got ${JSON.stringify(opts.baselineVersion)}`);
|
|
206
|
+
}
|
|
207
|
+
const worktreeName = `synergyspec-critic-${opts.episodeId}`;
|
|
208
|
+
const worktreePath = path.join(os.tmpdir(), worktreeName);
|
|
209
|
+
// The run window opens just before the spawn; the claude transcript discovery
|
|
210
|
+
// selects the newest session file written after this instant.
|
|
211
|
+
const runStart = (opts.now ?? new Date()).getTime();
|
|
212
|
+
let worktreeMode = 'git-worktree';
|
|
213
|
+
try {
|
|
214
|
+
// 1) Isolated worktree OUTSIDE the repo (git worktree --detach, else copy).
|
|
215
|
+
worktreeMode = await createIsolatedWorktree(repoRoot, worktreePath, spawnImpl);
|
|
216
|
+
// 're-do' fidelity needs the detached-HEAD tree (pre-change code). The copy
|
|
217
|
+
// fallback (non-git repo) brings the LIVE tree — including the change's
|
|
218
|
+
// uncommitted implementation — so it cannot reach the pre-change state and
|
|
219
|
+
// degrades to a re-measure. Surface that so a degraded baseline is not silent.
|
|
220
|
+
if (baselineMode === 're-do' && worktreeMode === 'copy-fallback') {
|
|
221
|
+
console.warn(`[critic] re-do baseline degraded for "${opts.changeName}": no git worktree ` +
|
|
222
|
+
`(copy fallback) — the change's implementation could not be isolated, so the ` +
|
|
223
|
+
`baseline re-measures rather than re-does. Use a git repo, or set ` +
|
|
224
|
+
`selfEvolution.critic.baselineMode: re-test to silence this.`);
|
|
225
|
+
}
|
|
226
|
+
// 2) Make it runnable: node_modules junction/symlink + untracked surfaces.
|
|
227
|
+
await makeWorktreeRunnable(repoRoot, worktreePath, opts.changeName);
|
|
228
|
+
// 2b) 're-do': reset the copied change dir to its inputs so the rerun
|
|
229
|
+
// RE-AUTHORS design+tasks under the prior policy (and re-implements on
|
|
230
|
+
// the pre-change code the detached-HEAD worktree already holds). The
|
|
231
|
+
// fidelity over 're-test' is that the design TEMPLATE is actually
|
|
232
|
+
// exercised, so advantage reflects the policy change, not re-run noise.
|
|
233
|
+
if (baselineMode === 're-do') {
|
|
234
|
+
await resetChangeArtifactsForRedo(path.join(worktreePath, 'synergyspec-selfevolving', 'changes', opts.changeName));
|
|
235
|
+
}
|
|
236
|
+
// 3) INSTALL 策略 policy vN (byte-for-byte snapshot files) — the fidelity
|
|
237
|
+
// fix the old GA replay never performed.
|
|
238
|
+
await installPolicyVersion(repoRoot, worktreePath, opts.targetId, opts.baselineVersion);
|
|
239
|
+
// 4) Rerun headlessly with cwd = worktree (re-do: regenerate; re-test: measure).
|
|
240
|
+
const prompt = assembleCriticPrompt(opts.changeName, baselineMode);
|
|
241
|
+
const run = await runHeadlessAgent(prompt, {
|
|
242
|
+
cwd: worktreePath,
|
|
243
|
+
spawn: spawnImpl,
|
|
244
|
+
timeoutMs,
|
|
245
|
+
});
|
|
246
|
+
// 5) Build + persist the baseline arm.
|
|
247
|
+
const measuredAt = new Date().toISOString();
|
|
248
|
+
const metrics = parseTestMetrics(run.stdout);
|
|
249
|
+
// Discover + normalize the claude session transcript for the WORKTREE path
|
|
250
|
+
// (newest session file written after `runStart`). Yields the observed
|
|
251
|
+
// verdict + the action skeleton; absent on non-claude harnesses or a miss.
|
|
252
|
+
const trajectory = await discoverWorktreeTrajectory({
|
|
253
|
+
worktreePath,
|
|
254
|
+
changeName: opts.changeName,
|
|
255
|
+
homeDir,
|
|
256
|
+
runStartMs: runStart,
|
|
257
|
+
});
|
|
258
|
+
const facts = trajectory
|
|
259
|
+
? // Local import keeps the facts derivation in one place (learn uses the
|
|
260
|
+
// same function); imported lazily to avoid a top-level cycle hazard.
|
|
261
|
+
(await import('../trajectory/facts.js')).toTrajectoryFacts(trajectory, opts.changeName)
|
|
262
|
+
: null;
|
|
263
|
+
// Honesty: prefer the OBSERVED pass rate (a real runner ran), else the
|
|
264
|
+
// stdout-parsed summary; null when neither parsed (never fabricated).
|
|
265
|
+
const observedPassRate = facts?.testRunObserved && facts.observedPassRate !== null
|
|
266
|
+
? facts.observedPassRate
|
|
267
|
+
: null;
|
|
268
|
+
const passRate = observedPassRate ?? metrics?.passRate ?? null;
|
|
269
|
+
const verified = facts ? facts.verified : false;
|
|
270
|
+
const observedStatus = facts ? facts.observedStatus : null;
|
|
271
|
+
// Health measured against the WORKTREE produced code, via the project's
|
|
272
|
+
// configured source (resolved from the worktree's copied config). No signal
|
|
273
|
+
// ⇒ null, exactly like the main arm.
|
|
274
|
+
const metricSource = resolveMetricSource(readProjectConfig(worktreePath));
|
|
275
|
+
const healthPenalty = (await measureHealthPenalty(metricSource, worktreePath)) ?? null;
|
|
276
|
+
const loss = passRate !== null
|
|
277
|
+
? computePerChangeLoss({
|
|
278
|
+
passRate,
|
|
279
|
+
healthPenalty: healthPenalty ?? undefined,
|
|
280
|
+
verified: facts ? facts.verified : undefined,
|
|
281
|
+
}).loss
|
|
282
|
+
: null;
|
|
283
|
+
const objective = {
|
|
284
|
+
passRate,
|
|
285
|
+
...(metrics ? { testsTotal: metrics.total, testsFailed: metrics.failed } : {}),
|
|
286
|
+
healthPenalty,
|
|
287
|
+
loss,
|
|
288
|
+
verified,
|
|
289
|
+
observedStatus,
|
|
290
|
+
measuredAt,
|
|
291
|
+
...(facts ? { testRunObserved: facts.testRunObserved } : {}),
|
|
292
|
+
...(facts?.observedFailures && facts.observedFailures.length > 0
|
|
293
|
+
? { observedFailures: facts.observedFailures }
|
|
294
|
+
: {}),
|
|
295
|
+
};
|
|
296
|
+
// Transcript: the claude session `.jsonl` when discovered, else stdout.
|
|
297
|
+
let transcriptDiscovered = false;
|
|
298
|
+
let transcript;
|
|
299
|
+
let skeleton;
|
|
300
|
+
const sessionPath = trajectory?.sourcePaths[0];
|
|
301
|
+
if (trajectory && sessionPath) {
|
|
302
|
+
try {
|
|
303
|
+
const content = await fs.readFile(sessionPath, 'utf8');
|
|
304
|
+
transcript = { fileName: 'transcript.jsonl', content };
|
|
305
|
+
transcriptDiscovered = true;
|
|
306
|
+
const actionSkeleton = toActionSkeleton(trajectory);
|
|
307
|
+
if (actionSkeleton)
|
|
308
|
+
skeleton = actionSkeleton;
|
|
309
|
+
}
|
|
310
|
+
catch {
|
|
311
|
+
// Unreadable session file — fall back to stdout below.
|
|
312
|
+
transcript = { fileName: 'stdout.txt', content: run.stdout };
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
else {
|
|
316
|
+
transcript = { fileName: 'stdout.txt', content: run.stdout };
|
|
317
|
+
}
|
|
318
|
+
const { armDir } = await writeArmCapture({
|
|
319
|
+
repoRoot,
|
|
320
|
+
episodeId: opts.episodeId,
|
|
321
|
+
arm: 'baseline-arm',
|
|
322
|
+
transcript,
|
|
323
|
+
...(skeleton ? { skeleton } : {}),
|
|
324
|
+
objective,
|
|
325
|
+
});
|
|
326
|
+
// Record the arm landed (monotonic stage advance + which version reran).
|
|
327
|
+
await advanceEpisodeStage({
|
|
328
|
+
repoRoot,
|
|
329
|
+
episodeId: opts.episodeId,
|
|
330
|
+
stage: 'baseline-arm-captured',
|
|
331
|
+
patch: { policyVersionBaseline: opts.baselineVersion },
|
|
332
|
+
});
|
|
333
|
+
return {
|
|
334
|
+
armDir,
|
|
335
|
+
objective,
|
|
336
|
+
transcriptDiscovered,
|
|
337
|
+
worktreePath,
|
|
338
|
+
worktreeMode,
|
|
339
|
+
baselineMode,
|
|
340
|
+
};
|
|
341
|
+
}
|
|
342
|
+
finally {
|
|
343
|
+
// 6) 产物即弃: ALWAYS tear the worktree down — even when a step above threw.
|
|
344
|
+
await teardownWorktree(repoRoot, worktreePath, worktreeMode, spawnImpl);
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
// ---------------------------------------------------------------------------
|
|
348
|
+
// Worktree lifecycle
|
|
349
|
+
// ---------------------------------------------------------------------------
|
|
350
|
+
/**
|
|
351
|
+
* Create an isolated worktree at `worktreePath` OUTSIDE the repo. Tries
|
|
352
|
+
* `git worktree add --detach <worktreePath> HEAD` first; on ANY git failure
|
|
353
|
+
* (not a repo, git missing, etc.) falls back to a recursive file copy of the
|
|
354
|
+
* repo excluding `node_modules` and `.git`. Returns which mode succeeded.
|
|
355
|
+
*/
|
|
356
|
+
async function createIsolatedWorktree(repoRoot, worktreePath, spawnImpl) {
|
|
357
|
+
// Best-effort: a stale worktree dir from an interrupted run would make both
|
|
358
|
+
// git-add and copy fail; clear it first (产物即弃 — nothing here is durable).
|
|
359
|
+
await fs.rm(worktreePath, { recursive: true, force: true }).catch(() => { });
|
|
360
|
+
try {
|
|
361
|
+
await runGit(repoRoot, ['worktree', 'add', '--detach', worktreePath, 'HEAD'], spawnImpl);
|
|
362
|
+
return 'git-worktree';
|
|
363
|
+
}
|
|
364
|
+
catch {
|
|
365
|
+
// Fall through to the copy fallback (not a git repo, git unavailable, …).
|
|
366
|
+
}
|
|
367
|
+
try {
|
|
368
|
+
await copyRepoTree(repoRoot, worktreePath);
|
|
369
|
+
return 'copy-fallback';
|
|
370
|
+
}
|
|
371
|
+
catch (err) {
|
|
372
|
+
throw new CriticWorktreeError(`git worktree add failed and the copy fallback failed too: ${err instanceof Error ? err.message : String(err)}`);
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
/**
|
|
376
|
+
* Tear down the worktree. For a git worktree: `git worktree remove --force` then
|
|
377
|
+
* `git worktree prune` (both best-effort), and an explicit rmdir to be sure.
|
|
378
|
+
* For the copy fallback: recursive rmdir. Never throws — teardown failures must
|
|
379
|
+
* not mask a real error from the run.
|
|
380
|
+
*/
|
|
381
|
+
async function teardownWorktree(repoRoot, worktreePath, mode, spawnImpl) {
|
|
382
|
+
if (mode === 'git-worktree') {
|
|
383
|
+
await runGit(repoRoot, ['worktree', 'remove', '--force', worktreePath], spawnImpl).catch(() => { });
|
|
384
|
+
await runGit(repoRoot, ['worktree', 'prune'], spawnImpl).catch(() => { });
|
|
385
|
+
}
|
|
386
|
+
// The node_modules entry is a junction/symlink; `rm -rf` removes the link, not
|
|
387
|
+
// the real tree behind it. Belt-and-suspenders rmdir for both modes.
|
|
388
|
+
await fs.rm(worktreePath, { recursive: true, force: true }).catch(() => { });
|
|
389
|
+
}
|
|
390
|
+
/** Run a git subcommand in `repoRoot`; rejects on a non-zero exit or spawn error. */
|
|
391
|
+
async function runGit(repoRoot, args, spawnImpl) {
|
|
392
|
+
await new Promise((resolve, reject) => {
|
|
393
|
+
const child = spawnImpl('git', args, { cwd: repoRoot, shell: false });
|
|
394
|
+
const err = [];
|
|
395
|
+
child.stderr?.on('data', (c) => err.push(Buffer.from(c)));
|
|
396
|
+
child.on('error', (e) => reject(e));
|
|
397
|
+
child.on('close', (code) => {
|
|
398
|
+
if (code === 0)
|
|
399
|
+
resolve();
|
|
400
|
+
else
|
|
401
|
+
reject(new Error(`git ${args[0]} exited ${code}: ${Buffer.concat(err).toString('utf8')}`));
|
|
402
|
+
});
|
|
403
|
+
});
|
|
404
|
+
}
|
|
405
|
+
/**
|
|
406
|
+
* Recursive copy of the repo tree into `dest`, excluding `node_modules` and
|
|
407
|
+
* `.git` (the two directories that are huge and/or meaningless in an isolated
|
|
408
|
+
* checkout — node_modules is re-linked separately, .git is the worktree's
|
|
409
|
+
* parent's concern).
|
|
410
|
+
*/
|
|
411
|
+
async function copyRepoTree(src, dest) {
|
|
412
|
+
await fs.cp(src, dest, {
|
|
413
|
+
recursive: true,
|
|
414
|
+
filter: (source) => {
|
|
415
|
+
const base = path.basename(source);
|
|
416
|
+
return base !== NODE_MODULES && base !== '.git';
|
|
417
|
+
},
|
|
418
|
+
});
|
|
419
|
+
}
|
|
420
|
+
/**
|
|
421
|
+
* Make the worktree runnable:
|
|
422
|
+
* - junction/symlink `node_modules` into the worktree (junction on Windows so
|
|
423
|
+
* no admin/dev-mode is needed; a plain dir symlink elsewhere), and
|
|
424
|
+
* - copy the untracked surfaces the rerun reads that git worktree / the copy
|
|
425
|
+
* filter do not bring: the change dir, the project-local schemas dir (if
|
|
426
|
+
* present), and the `.synergyspec-selfevolving/` config EXCLUDING its
|
|
427
|
+
* `self-evolution/` subdir (the loop's own state must NOT leak into the
|
|
428
|
+
* isolated rerun).
|
|
429
|
+
*/
|
|
430
|
+
async function makeWorktreeRunnable(repoRoot, worktreePath, changeName) {
|
|
431
|
+
// node_modules link.
|
|
432
|
+
const srcNodeModules = path.join(repoRoot, NODE_MODULES);
|
|
433
|
+
if (await pathExists(srcNodeModules)) {
|
|
434
|
+
const destNodeModules = path.join(worktreePath, NODE_MODULES);
|
|
435
|
+
// A git worktree starts empty of node_modules; the copy fallback excluded
|
|
436
|
+
// it. Either way the dest should not exist — clear a stray one to be safe.
|
|
437
|
+
await fs.rm(destNodeModules, { recursive: true, force: true }).catch(() => { });
|
|
438
|
+
const linkType = process.platform === 'win32' ? 'junction' : 'dir';
|
|
439
|
+
try {
|
|
440
|
+
await fs.symlink(srcNodeModules, destNodeModules, linkType);
|
|
441
|
+
}
|
|
442
|
+
catch {
|
|
443
|
+
// Symlink/junction unavailable (rare) — leave it absent; the rerun may
|
|
444
|
+
// still resolve the linked CLI from the parent install. Non-fatal.
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
// Untracked change dir (git tracks it once committed, but a fresh change is
|
|
448
|
+
// untracked; the copy fallback already brought it — copying is idempotent).
|
|
449
|
+
await copyDirInto(path.join(repoRoot, 'synergyspec-selfevolving', 'changes', changeName), path.join(worktreePath, 'synergyspec-selfevolving', 'changes', changeName));
|
|
450
|
+
// Project-local schemas dir, when present.
|
|
451
|
+
await copyDirInto(path.join(repoRoot, SCHEMAS_REL), path.join(worktreePath, SCHEMAS_REL));
|
|
452
|
+
// `.synergyspec-selfevolving/` config, EXCLUDING the self-evolution/ subdir.
|
|
453
|
+
const srcConfig = path.join(repoRoot, CONFIG_DIR);
|
|
454
|
+
if (await pathExists(srcConfig)) {
|
|
455
|
+
await fs.cp(srcConfig, path.join(worktreePath, CONFIG_DIR), {
|
|
456
|
+
recursive: true,
|
|
457
|
+
force: true,
|
|
458
|
+
filter: (source) => {
|
|
459
|
+
const rel = path.relative(srcConfig, source);
|
|
460
|
+
// Drop the loop's own state dir and everything under it.
|
|
461
|
+
return rel !== 'self-evolution' && !rel.startsWith(`self-evolution${path.sep}`);
|
|
462
|
+
},
|
|
463
|
+
});
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
/**
|
|
467
|
+
* INSTALL the byte-for-byte 策略 policy vN snapshot files into the worktree at
|
|
468
|
+
* their repo-relative paths. This is the fidelity fix: the baseline arm runs the
|
|
469
|
+
* SAME policy the LAST episode settled on, not whatever happens to be live.
|
|
470
|
+
* Snapshot reads are sha256-verified by {@link readPolicySnapshotFiles}, so a
|
|
471
|
+
* corrupt snapshot throws here rather than silently installing wrong bytes.
|
|
472
|
+
*/
|
|
473
|
+
async function installPolicyVersion(repoRoot, worktreePath, targetId, version) {
|
|
474
|
+
const files = await readPolicySnapshotFiles(repoRoot, targetId, version);
|
|
475
|
+
for (const f of files) {
|
|
476
|
+
const abs = path.join(worktreePath, ...f.relPath.split('/'));
|
|
477
|
+
// Defense-in-depth: snapshot relPaths are repo-relative POSIX paths; refuse
|
|
478
|
+
// anything that escapes the worktree.
|
|
479
|
+
const rel = path.relative(worktreePath, abs);
|
|
480
|
+
if (rel.startsWith('..') || path.isAbsolute(rel)) {
|
|
481
|
+
throw new Error(`Refusing to install policy file outside the worktree: ${f.relPath}`);
|
|
482
|
+
}
|
|
483
|
+
await fs.mkdir(path.dirname(abs), { recursive: true });
|
|
484
|
+
await fs.writeFile(abs, f.content, 'utf8');
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
/**
|
|
488
|
+
* Discover + normalize the claude session transcript produced by the rerun, by
|
|
489
|
+
* computing the claude project-dir path-hash FOR THE WORKTREE PATH (the rerun's
|
|
490
|
+
* cwd) and picking the newest `.jsonl` written after the run started, then
|
|
491
|
+
* reusing the claude adapter to normalize it. Returns `null` on a non-claude
|
|
492
|
+
* harness, no projects dir, or no session file in the window — exactly the
|
|
493
|
+
* "no trajectory ⇒ stdout only" fallback the caller relies on.
|
|
494
|
+
*
|
|
495
|
+
* Reuses {@link claudeProjectsDir} (the path-hash encoding) and the public
|
|
496
|
+
* {@link claudeSourceFactory} (the per-line transcript parser + subagent
|
|
497
|
+
* stitching) so this never reimplements either; full reuse, no new exports.
|
|
498
|
+
*/
|
|
499
|
+
async function discoverWorktreeTrajectory(opts) {
|
|
500
|
+
const projectsDir = claudeProjectsDir(opts.worktreePath, opts.homeDir);
|
|
501
|
+
// No projects dir for the worktree ⇒ the host harness is not claude (or never
|
|
502
|
+
// wrote a session). Skip cleanly.
|
|
503
|
+
let entries;
|
|
504
|
+
try {
|
|
505
|
+
entries = await fs.readdir(projectsDir, { withFileTypes: true });
|
|
506
|
+
}
|
|
507
|
+
catch {
|
|
508
|
+
return null;
|
|
509
|
+
}
|
|
510
|
+
// Newest `.jsonl` whose mtime is within the run window (>= runStart). Picking
|
|
511
|
+
// the newest matches trajectory-discovery's window intent: the rerun's own
|
|
512
|
+
// session is the most-recently-written one under the worktree's project dir.
|
|
513
|
+
let newest = null;
|
|
514
|
+
for (const entry of entries) {
|
|
515
|
+
if (!entry.isFile() || !entry.name.endsWith('.jsonl'))
|
|
516
|
+
continue;
|
|
517
|
+
const full = path.join(projectsDir, entry.name);
|
|
518
|
+
let mtimeMs;
|
|
519
|
+
try {
|
|
520
|
+
mtimeMs = (await fs.stat(full)).mtimeMs;
|
|
521
|
+
}
|
|
522
|
+
catch {
|
|
523
|
+
continue;
|
|
524
|
+
}
|
|
525
|
+
if (mtimeMs < opts.runStartMs)
|
|
526
|
+
continue;
|
|
527
|
+
if (!newest || mtimeMs > newest.mtimeMs)
|
|
528
|
+
newest = { path: full, mtimeMs };
|
|
529
|
+
}
|
|
530
|
+
if (!newest)
|
|
531
|
+
return null;
|
|
532
|
+
// Normalize via the claude adapter. The source's getTrajectory re-discovers
|
|
533
|
+
// through findTranscriptsForChange against the WORKTREE root: with no
|
|
534
|
+
// events.ndjson it uses the mtime-overlap fallback over this same projects
|
|
535
|
+
// dir, so the session we just selected is the one normalized (main session +
|
|
536
|
+
// its subagents stitched). Detect against the worktree so the source is
|
|
537
|
+
// pinned to the worktree's project hash.
|
|
538
|
+
try {
|
|
539
|
+
const source = await claudeSourceFactory.detect(opts.worktreePath, {
|
|
540
|
+
homeDir: opts.homeDir,
|
|
541
|
+
});
|
|
542
|
+
if (!source)
|
|
543
|
+
return null;
|
|
544
|
+
return await source.getTrajectory(opts.changeName);
|
|
545
|
+
}
|
|
546
|
+
catch {
|
|
547
|
+
return null;
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
// ---------------------------------------------------------------------------
|
|
551
|
+
// Small fs helpers (match the neighbor idiom: no throw on probe)
|
|
552
|
+
// ---------------------------------------------------------------------------
|
|
553
|
+
async function pathExists(p) {
|
|
554
|
+
try {
|
|
555
|
+
await fs.stat(p);
|
|
556
|
+
return true;
|
|
557
|
+
}
|
|
558
|
+
catch {
|
|
559
|
+
return false;
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
/** Recursive copy of `src` into `dest` when `src` exists; idempotent, no throw on a missing src. */
|
|
563
|
+
async function copyDirInto(src, dest) {
|
|
564
|
+
if (!(await pathExists(src)))
|
|
565
|
+
return;
|
|
566
|
+
await fs.cp(src, dest, { recursive: true, force: true });
|
|
567
|
+
}
|
|
568
|
+
//# sourceMappingURL=critic-agent.js.map
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
export declare class CanonicalProposerOutputInvalid extends Error {
|
|
2
|
+
constructor(message: string);
|
|
3
|
+
}
|
|
4
|
+
/** The model declined to edit anything (empty edits). Not an error — a no-op. */
|
|
5
|
+
export declare class CanonicalProposerNoOp extends Error {
|
|
6
|
+
constructor();
|
|
7
|
+
}
|
|
8
|
+
/** The headless agent invocation itself failed (crash / empty output). */
|
|
9
|
+
export declare class CanonicalProposerInvocationError extends Error {
|
|
10
|
+
constructor(stderr: string);
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* The packaged result of one validated candidate edit set: the human-readable
|
|
14
|
+
* unified diff, the POSIX paths actually edited (a subset of the target's
|
|
15
|
+
* declared files), a non-empty rationale, and the parsed full-file-replacement
|
|
16
|
+
* edits. Produced by the manual host-authored channel (`packageHostEdits`).
|
|
17
|
+
*/
|
|
18
|
+
export interface CanonicalProposeOutput {
|
|
19
|
+
targetId: string;
|
|
20
|
+
/** A unified-diff rendering of the edits (opaque to the gate; readable by a human). */
|
|
21
|
+
diffPatch: string;
|
|
22
|
+
/** POSIX paths actually edited — always a subset of the target's declared files. */
|
|
23
|
+
changedFiles: string[];
|
|
24
|
+
/** Non-empty rationale (the static gate requires one). */
|
|
25
|
+
rationale: string;
|
|
26
|
+
/** The parsed full-file-replacement edits. */
|
|
27
|
+
edits: {
|
|
28
|
+
relPath: string;
|
|
29
|
+
content: string;
|
|
30
|
+
}[];
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Validate already-structured candidate edits against the allowed (target-
|
|
34
|
+
* scoped) file set and the frozen gate-defining files. Author-agnostic: this is
|
|
35
|
+
* the SINGLE place that enforces, at propose time, that every edit (a) is a
|
|
36
|
+
* well-formed `{relPath, content}` object, (b) does not touch a
|
|
37
|
+
* `GATE_DEFINING_FILES` entry (the frozen oracle/gate files), and (c) stays
|
|
38
|
+
* inside `allowedFiles`. Both the manual host-authored (`--from-edits`) path and
|
|
39
|
+
* the loop-v2 演进智能体 EVOLVING AGENT call this so their safety contract is
|
|
40
|
+
* byte-identical. relPaths are normalized to POSIX separators.
|
|
41
|
+
*
|
|
42
|
+
* Throws {@link CanonicalProposerNoOp} when `rawEdits` is empty and
|
|
43
|
+
* {@link CanonicalProposerOutputInvalid} for any shape / frozen / scope
|
|
44
|
+
* violation. Path traversal and absolute paths are rejected transitively: they
|
|
45
|
+
* can never be a member of `allowedFiles`, so they fail the scope check.
|
|
46
|
+
*/
|
|
47
|
+
export declare function validateCandidateEdits(rawEdits: readonly unknown[], allowedFiles: readonly string[]): {
|
|
48
|
+
relPath: string;
|
|
49
|
+
content: string;
|
|
50
|
+
}[];
|
|
51
|
+
/** Render a whole-file-replacement unified diff (human-readable; git-apply friendly). */
|
|
52
|
+
export declare function renderUnifiedDiff(relPath: string, oldContent: string, newContent: string): string;
|
|
53
|
+
//# sourceMappingURL=edits-contract.d.ts.map
|