synergyspec-selfevolving 1.4.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -18
- package/dist/commands/learn.d.ts +12 -1
- package/dist/commands/learn.js +151 -11
- package/dist/commands/self-evolution-episode.d.ts +177 -0
- package/dist/commands/self-evolution-episode.js +423 -0
- package/dist/commands/self-evolution.d.ts +12 -190
- package/dist/commands/self-evolution.js +114 -866
- package/dist/core/archive.d.ts +0 -1
- package/dist/core/archive.js +0 -58
- package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
- package/dist/core/artifact-graph/instruction-loader.js +3 -31
- package/dist/core/fitness/loss.d.ts +5 -5
- package/dist/core/fitness/loss.js +4 -4
- package/dist/core/project-config.d.ts +2 -0
- package/dist/core/project-config.js +28 -0
- package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
- package/dist/core/self-evolution/candidate-fitness.js +31 -5
- package/dist/core/self-evolution/candidates.d.ts +0 -9
- package/dist/core/self-evolution/critic-agent.d.ts +150 -0
- package/dist/core/self-evolution/critic-agent.js +487 -0
- package/dist/core/self-evolution/edits-contract.d.ts +53 -0
- package/dist/core/self-evolution/edits-contract.js +89 -0
- package/dist/core/self-evolution/episode-orchestrator.d.ts +197 -0
- package/dist/core/self-evolution/episode-orchestrator.js +534 -0
- package/dist/core/self-evolution/episode-store.d.ts +266 -0
- package/dist/core/self-evolution/episode-store.js +573 -0
- package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
- package/dist/core/self-evolution/evolution-switches.js +5 -10
- package/dist/core/self-evolution/evolving-agent.d.ts +162 -0
- package/dist/core/self-evolution/evolving-agent.js +449 -0
- package/dist/core/self-evolution/host-harness.d.ts +1 -2
- package/dist/core/self-evolution/host-harness.js +1 -2
- package/dist/core/self-evolution/index.d.ts +9 -6
- package/dist/core/self-evolution/index.js +18 -6
- package/dist/core/self-evolution/line-diff.d.ts +60 -0
- package/dist/core/self-evolution/line-diff.js +130 -0
- package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
- package/dist/core/self-evolution/policy/fs-safe.js +89 -0
- package/dist/core/self-evolution/policy/index.d.ts +13 -0
- package/dist/core/self-evolution/policy/index.js +13 -0
- package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
- package/dist/core/self-evolution/policy/policy-store.js +774 -0
- package/dist/core/self-evolution/policy/reject-buffer.d.ts +48 -0
- package/dist/core/self-evolution/policy/reject-buffer.js +168 -0
- package/dist/core/self-evolution/promote.d.ts +1 -1
- package/dist/core/self-evolution/promote.js +6 -33
- package/dist/core/self-evolution/promotion.js +1 -2
- package/dist/core/self-evolution/reward-agent.d.ts +234 -0
- package/dist/core/self-evolution/reward-agent.js +564 -0
- package/dist/core/self-evolution/scope-gate.d.ts +66 -0
- package/dist/core/self-evolution/scope-gate.js +107 -0
- package/dist/core/self-evolution/success-channel.js +2 -2
- package/dist/core/self-evolution/tool-evolution.js +2 -13
- package/dist/core/self-evolution/verdict.d.ts +8 -5
- package/dist/core/self-evolution/verdict.js +4 -7
- package/dist/core/templates/workflows/learn.d.ts +3 -2
- package/dist/core/templates/workflows/learn.js +18 -16
- package/dist/core/templates/workflows/self-evolving.d.ts +6 -4
- package/dist/core/templates/workflows/self-evolving.js +62 -172
- package/dist/dashboard/data.d.ts +25 -51
- package/dist/dashboard/data.js +68 -180
- package/dist/dashboard/react-client.js +458 -503
- package/dist/dashboard/react-styles.js +3 -3
- package/dist/dashboard/server.js +23 -17
- package/dist/ui/ascii-patterns.d.ts +7 -15
- package/dist/ui/ascii-patterns.js +123 -54
- package/dist/ui/welcome-screen.d.ts +0 -14
- package/dist/ui/welcome-screen.js +16 -35
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -133,7 +133,7 @@ Now tell your AI: `/synspec:propose <what-you-want-to-build>`
|
|
|
133
133
|
→ **[Workflows](docs/workflows.md)**: combos and patterns<br>
|
|
134
134
|
→ **[Commands](docs/commands.md)**: slash commands & skills<br>
|
|
135
135
|
→ **[CLI](docs/cli.md)**: terminal reference<br>
|
|
136
|
-
→ **[
|
|
136
|
+
→ **[Self-Evolution Loop](docs/evaluation-lab.md)**: the loop-v2 in-context RL surface<br>
|
|
137
137
|
→ **[Supported Tools](docs/supported-tools.md)**: tool integrations & install paths<br>
|
|
138
138
|
→ **[Concepts](docs/concepts.md)**: how it all fits<br>
|
|
139
139
|
→ **[Multi-Language](docs/multi-language.md)**: multi-language support<br>
|
|
@@ -199,9 +199,9 @@ The self-evolution machinery lives in-tree under `src/core/fitness` and
|
|
|
199
199
|
`src/core/self-evolution`, exposed through the `synergyspec-selfevolving
|
|
200
200
|
self-evolution` subcommands (not as an ordinary day-to-day skill). It treats the
|
|
201
201
|
spec workflow as a learnable system — a change is a forward pass through the
|
|
202
|
-
artifact templates; `learn` runs the backward pass (a per-change loss); and
|
|
203
|
-
|
|
204
|
-
|
|
202
|
+
artifact templates; `learn` runs the backward pass (a per-change loss); and an
|
|
203
|
+
in-context-RL episode improves one design 策略 POLICY lineage from a graded
|
|
204
|
+
advantage, with a rollback that fires *before* any new edit.
|
|
205
205
|
|
|
206
206
|
What actually works today:
|
|
207
207
|
|
|
@@ -233,21 +233,31 @@ What actually works today:
|
|
|
233
233
|
session: an unresolvable flag is an up-front error (exit non-zero), while a
|
|
234
234
|
missing env handle fails closed — no trajectory, and the observed-verified
|
|
235
235
|
gate refuses to promote.
|
|
236
|
-
- **Code-health gate** (
|
|
237
|
-
regression vs the last accepted state blocks
|
|
238
|
-
loud `health-signal-unavailable` observation if a configured analyzer
|
|
239
|
-
run). No health signal ⇒ no gate, so the loop is never blocked on a missing
|
|
236
|
+
- **Code-health gate** (the episode's evolving agent / `evolve-from-edits`): a
|
|
237
|
+
measured code-health regression vs the last accepted state blocks promotion (and
|
|
238
|
+
surfaces a loud `health-signal-unavailable` observation if a configured analyzer
|
|
239
|
+
can't run). No health signal ⇒ no gate, so the loop is never blocked on a missing
|
|
240
240
|
measurement.
|
|
241
241
|
- **Candidate proposals** (`self-evolution propose-canonical`): turns aggregated
|
|
242
242
|
`learn` hints into human-gated candidate packages under
|
|
243
243
|
`.synergyspec-selfevolving/self-evolution/candidates/`. Proposal-only — no
|
|
244
244
|
canonical file is modified, and the frozen gen-test/run-test oracle is never
|
|
245
245
|
touched.
|
|
246
|
-
- **
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
246
|
+
- **In-context-RL episode** (`self-evolution episode --change <name>`): runs ONE
|
|
247
|
+
completed change through a fixed, code-spawned pipeline. A two-arm forward grades
|
|
248
|
+
the **主智能体 MAIN AGENT** (the frozen current 策略 POLICY) from the change's
|
|
249
|
+
`learn` report against a **CRITIC AGENT(基线智能体 baseline agent)** that reruns
|
|
250
|
+
the prior policy on the same change; a **奖励智能体 REWARD AGENT** computes
|
|
251
|
+
**advantage = reward(主臂) − reward(基线臂)** and names the gap; on a bad advantage
|
|
252
|
+
the orchestrator rolls the 策略 POLICY back *before* the **演进智能体 EVOLVING
|
|
253
|
+
AGENT** is called; the evolving agent then makes ONE bounded edit (or refuses)
|
|
254
|
+
after the static / observed-GREEN / 范围⊆诊断 gates pass, writing the next policy
|
|
255
|
+
version onto your LOCAL files — no candidate dir, no republish. A frozen target
|
|
256
|
+
(per the per-target evolution policy) is skipped. Inspect the lineage with
|
|
257
|
+
`self-evolution policy show [--target <id>]`, re-enter a partial run with
|
|
258
|
+
`self-evolution episode resume <id>`, and manually restore a version with
|
|
259
|
+
`self-evolution policy rollback --target <id> --yes`. See
|
|
260
|
+
[docs/evaluation-lab.md](docs/evaluation-lab.md).
|
|
251
261
|
- **Per-target evolution scope** (`selfEvolution:` in
|
|
252
262
|
`synergyspec-selfevolving/config.yaml`): one switch decides which canonical
|
|
253
263
|
targets may evolve, honored end-to-end by `learn` → `propose-canonical` → the
|
|
@@ -260,13 +270,16 @@ What actually works today:
|
|
|
260
270
|
|
|
261
271
|
What is **not** built yet (don't infer it from the architecture diagrams):
|
|
262
272
|
|
|
263
|
-
- No autonomous multi-
|
|
264
|
-
|
|
265
|
-
|
|
273
|
+
- No autonomous multi-episode **breeding** loop. Each `self-evolution episode`
|
|
274
|
+
improves the policy from one change's graded advantage; it never fans out
|
|
275
|
+
competing variants or runs unattended generations. New manual candidates only
|
|
276
|
+
come from the human-gated `propose-canonical` step.
|
|
266
277
|
- No measured benchmark gains. The mechanisms are implemented and unit-tested,
|
|
267
278
|
but the loop has not been run end-to-end to produce quantitative results.
|
|
268
|
-
- The earlier out-of-tree `evolve/` DGM lab (and its `pnpm evolve` harness)
|
|
269
|
-
|
|
279
|
+
- The earlier out-of-tree `evolve/` DGM lab (and its `pnpm evolve` harness) and
|
|
280
|
+
the genetic-algorithm outer loop (`self-evolution evolve` / `--replay`) have
|
|
281
|
+
been removed; the technique was internalized into `src/core` and re-homed on the
|
|
282
|
+
loop-v2 in-context-RL episode.
|
|
270
283
|
|
|
271
284
|
## Contributing
|
|
272
285
|
|
package/dist/commands/learn.d.ts
CHANGED
|
@@ -1,3 +1,14 @@
|
|
|
1
1
|
import { Command } from 'commander';
|
|
2
|
-
|
|
2
|
+
import { type RunEpisodeOptions, type RunEpisodeResult, type RunEpisodeBusy } from '../core/self-evolution/episode-orchestrator.js';
|
|
3
|
+
/**
|
|
4
|
+
* Injectable dependencies for {@link registerLearnCommand}. The ONLY seam today
|
|
5
|
+
* is `runEpisode` — the loop-v2 episode runner the autonomous `--apply` entrance
|
|
6
|
+
* invokes. It defaults to the real {@link runEpisode}; learn tests stub it so the
|
|
7
|
+
* evolve path is exercised WITHOUT spawning real agents (the orchestrator's three
|
|
8
|
+
* agents each own a `runHeadlessAgent` spawn, which the stub never reaches).
|
|
9
|
+
*/
|
|
10
|
+
export interface LearnCommandDeps {
|
|
11
|
+
runEpisode?: (opts: RunEpisodeOptions) => Promise<RunEpisodeResult | RunEpisodeBusy>;
|
|
12
|
+
}
|
|
13
|
+
export declare function registerLearnCommand(program: Command, deps?: LearnCommandDeps): void;
|
|
3
14
|
//# sourceMappingURL=learn.d.ts.map
|
package/dist/commands/learn.js
CHANGED
|
@@ -9,12 +9,14 @@ import { toTrajectoryFacts, describeRunnerResults } from '../core/trajectory/fac
|
|
|
9
9
|
import { toActionSkeleton } from '../core/trajectory/skeleton.js';
|
|
10
10
|
import { resolveHostHarness } from '../core/self-evolution/host-harness.js';
|
|
11
11
|
import { mineSuccessSignals } from '../core/self-evolution/success-channel.js';
|
|
12
|
+
import { captureMainArm, runEpisode, } from '../core/self-evolution/episode-orchestrator.js';
|
|
12
13
|
import { buildLLMSummaryCandidates, ingestLearnHandoff, } from '../core/learn/llm-summary.js';
|
|
13
14
|
function collect(value, previous) {
|
|
14
15
|
previous.push(value);
|
|
15
16
|
return previous;
|
|
16
17
|
}
|
|
17
|
-
export function registerLearnCommand(program) {
|
|
18
|
+
export function registerLearnCommand(program, deps = {}) {
|
|
19
|
+
const runEpisodeImpl = deps.runEpisode ?? runEpisode;
|
|
18
20
|
const learnCmd = program
|
|
19
21
|
.command('learn [change]')
|
|
20
22
|
.description('Review a completed change and extract reusable learning candidates')
|
|
@@ -157,13 +159,45 @@ export function registerLearnCommand(program) {
|
|
|
157
159
|
// side-write only; never fail learn over it.
|
|
158
160
|
}
|
|
159
161
|
}
|
|
162
|
+
// LOOP-V2 AUTONOMOUS ENTRANCE (CS6-F): on an --apply run that opted into
|
|
163
|
+
// evolution, resolved EXACTLY ONE concrete evolvable target, and carries
|
|
164
|
+
// an observed-VERIFIED-GREEN signal, run one self-evolution episode
|
|
165
|
+
// in-process (rollback-before-evolution). A bare `learn <change>` preview
|
|
166
|
+
// can never reach this (it is neither --apply nor an evolving run), so the
|
|
167
|
+
// non-evolving path stays byte-identical to today. The runner is injected
|
|
168
|
+
// (`deps.runEpisode`) so tests exercise this path WITHOUT spawning real
|
|
169
|
+
// agents; it defaults to the real orchestrator.
|
|
170
|
+
let episodeOutcome;
|
|
171
|
+
const concreteEvolveTarget = resolveSingleConcreteTarget(evolutionPreview);
|
|
172
|
+
if (options.apply === true &&
|
|
173
|
+
isEvolvingRun(options) &&
|
|
174
|
+
concreteEvolveTarget !== undefined &&
|
|
175
|
+
concreteEvolveTarget.targetId !== null &&
|
|
176
|
+
reportIsObservedVerifiedGreen(report)) {
|
|
177
|
+
const mainArm = await captureMainArm({
|
|
178
|
+
repoRoot: projectRoot,
|
|
179
|
+
changeName: report.changeName,
|
|
180
|
+
report,
|
|
181
|
+
});
|
|
182
|
+
episodeOutcome = await runEpisodeImpl({
|
|
183
|
+
repoRoot: projectRoot,
|
|
184
|
+
targetId: concreteEvolveTarget.targetId,
|
|
185
|
+
changeName: report.changeName,
|
|
186
|
+
changeDirPath: report.changeDir,
|
|
187
|
+
mainArm,
|
|
188
|
+
});
|
|
189
|
+
}
|
|
160
190
|
if (options.json) {
|
|
161
|
-
printJson(report, applied, evolutionPreview, hintsPath);
|
|
191
|
+
printJson(report, applied, evolutionPreview, hintsPath, episodeOutcome);
|
|
162
192
|
return;
|
|
163
193
|
}
|
|
164
194
|
console.log(renderLearnReport(report, applied));
|
|
165
195
|
console.log('');
|
|
166
196
|
console.log(renderLearnTransparency(report, applied, evolutionPreview, hintsPath, options));
|
|
197
|
+
if (episodeOutcome) {
|
|
198
|
+
console.log('');
|
|
199
|
+
console.log(renderEpisodeOutcome(episodeOutcome));
|
|
200
|
+
}
|
|
167
201
|
if (successSummary) {
|
|
168
202
|
console.log('');
|
|
169
203
|
console.log(successSummary);
|
|
@@ -515,7 +549,7 @@ function renderIngestHandoff(changeName, ingest, applied) {
|
|
|
515
549
|
}
|
|
516
550
|
return lines.join('\n');
|
|
517
551
|
}
|
|
518
|
-
function printJson(report, applied, evolutionPreview, hintsPath) {
|
|
552
|
+
function printJson(report, applied, evolutionPreview, hintsPath, episodeOutcome) {
|
|
519
553
|
// `mode` only tracks whether MEMORY candidates were applied (--apply). It does
|
|
520
554
|
// NOT reflect that --persist-hints wrote a hints file, which is what made the
|
|
521
555
|
// old `mode:"preview"` read as "nothing written". `wrote` makes every write this
|
|
@@ -540,6 +574,30 @@ function printJson(report, applied, evolutionPreview, hintsPath) {
|
|
|
540
574
|
hintIds: target.hintIds,
|
|
541
575
|
})),
|
|
542
576
|
};
|
|
577
|
+
// Loop-v2 episode outcome (CS6-F): present only when the autonomous entrance
|
|
578
|
+
// ran one this --apply run; absent on every non-evolving / unverified run so
|
|
579
|
+
// their --json stays byte-identical to today.
|
|
580
|
+
const episode = episodeOutcome
|
|
581
|
+
? 'busy' in episodeOutcome
|
|
582
|
+
? { busy: true, reason: episodeOutcome.reason }
|
|
583
|
+
: {
|
|
584
|
+
episodeId: episodeOutcome.episodeId,
|
|
585
|
+
baselineSkipped: episodeOutcome.baselineSkipped,
|
|
586
|
+
advantage: episodeOutcome.advantage,
|
|
587
|
+
decision: episodeOutcome.decision,
|
|
588
|
+
evolution: episodeOutcome.evolution === null
|
|
589
|
+
? null
|
|
590
|
+
: episodeOutcome.evolution.kind === 'evolved'
|
|
591
|
+
? {
|
|
592
|
+
kind: 'evolved',
|
|
593
|
+
version: episodeOutcome.evolution.ledgerEntry.version,
|
|
594
|
+
}
|
|
595
|
+
: episodeOutcome.evolution.kind === 'refused'
|
|
596
|
+
? { kind: 'refused', reason: episodeOutcome.evolution.reason }
|
|
597
|
+
: { kind: 'not-spawned', reason: episodeOutcome.evolution.reason },
|
|
598
|
+
newPolicyVersion: episodeOutcome.newPolicyVersion,
|
|
599
|
+
}
|
|
600
|
+
: undefined;
|
|
543
601
|
console.log(JSON.stringify({
|
|
544
602
|
mode: applied ? 'apply' : 'preview',
|
|
545
603
|
...report,
|
|
@@ -548,6 +606,7 @@ function printJson(report, applied, evolutionPreview, hintsPath) {
|
|
|
548
606
|
...(hintsPath ? { hintsPath } : {}),
|
|
549
607
|
wrote,
|
|
550
608
|
evolution,
|
|
609
|
+
...(episode ? { episode } : {}),
|
|
551
610
|
}, null, 2));
|
|
552
611
|
}
|
|
553
612
|
async function buildEvolutionPreview(hints, targetPolicy, projectRoot) {
|
|
@@ -715,10 +774,18 @@ function renderLearnTransparency(report, applied, evolutionPreview, hintsPath, o
|
|
|
715
774
|
// full new content for that target's resolved LOCAL file.
|
|
716
775
|
const concreteTargets = evolutionPreview.targets.filter((target) => target.targetId !== null);
|
|
717
776
|
const concreteTarget = concreteTargets.length > 0 ? concreteTargets[0] : undefined;
|
|
718
|
-
if (hintsPath && concreteTarget) {
|
|
777
|
+
if (hintsPath && concreteTarget && concreteTarget.targetId !== null) {
|
|
778
|
+
const concreteTargetId = concreteTarget.targetId;
|
|
719
779
|
lines.push(`- Hints written: ${hintsPath}`);
|
|
780
|
+
// LOOP-V2 (autonomous, rollback-before-evolution): the in-context-RL episode
|
|
781
|
+
// is now the default autonomous path. `--apply` on a verified-green run runs
|
|
782
|
+
// it in-process; this line is the explicit re-runnable form.
|
|
783
|
+
lines.push(renderEpisodeNextStep(report.changeName, concreteTargetId, options));
|
|
720
784
|
const localFile = concreteTarget.localFiles[0] ?? concreteTarget.files[0] ?? '<target file>';
|
|
721
|
-
|
|
785
|
+
// MANUAL channel (host agent authors the edit directly): kept for operators
|
|
786
|
+
// who want to hand evolve-from-edits a full new-file content themselves
|
|
787
|
+
// rather than spawn the loop-v2 EVOLVING AGENT.
|
|
788
|
+
lines.push(`- Manual: evolve from your own edits: synergyspec-selfevolving self-evolution evolve-from-edits --from-learn "${hintsPath}" --evolve-target ${concreteTargetId} --from-edits <edits.json> --yes`);
|
|
722
789
|
lines.push(` you (the host agent) author edits.json's full new file content for the target file shown above (${localFile}).`);
|
|
723
790
|
}
|
|
724
791
|
else if (hintsPath) {
|
|
@@ -754,26 +821,99 @@ function renderLearnTransparency(report, applied, evolutionPreview, hintsPath, o
|
|
|
754
821
|
lines.push('- After reviewing or evolving, run /synspec:archive to close the change.');
|
|
755
822
|
}
|
|
756
823
|
else {
|
|
824
|
+
// A bare preview has no agent-in-the-loop signal, so point the human/cron
|
|
825
|
+
// operator at the LOOP-V2 autonomous entrance (in-context RL,
|
|
826
|
+
// rollback-before-evolution): `self-evolution episode`.
|
|
757
827
|
lines.push('');
|
|
758
|
-
lines.push('
|
|
759
|
-
lines.push(`- One-button local evolve:
|
|
828
|
+
lines.push('autonomous loop-v2 (rollback-before-evolution):');
|
|
829
|
+
lines.push(`- One-button local evolve: ${renderEpisodeCommand(report.changeName, undefined, options)}`);
|
|
760
830
|
lines.push('- After reviewing or evolving, run /synspec:archive to close the change.');
|
|
761
831
|
}
|
|
762
832
|
return lines.join('\n');
|
|
763
833
|
}
|
|
834
|
+
/**
|
|
835
|
+
* The loop-v2 autonomous entrance command (CS6-F):
|
|
836
|
+
* `self-evolution episode --change "<name>" [--target <id>] [--session-id <id>]`.
|
|
837
|
+
* Replaces the GA `auto-evolve` / autonomous `evolve-from-edits` suggestions as
|
|
838
|
+
* the loop-v2 path. The `--target` pin is included only when a concrete target
|
|
839
|
+
* id resolved; `--session-id` is threaded through when the operator pinned an
|
|
840
|
+
* explicit trajectory handle so the episode grades the SAME session learn did.
|
|
841
|
+
*/
|
|
842
|
+
function renderEpisodeCommand(changeName, targetId, options) {
|
|
843
|
+
const parts = [`synergyspec-selfevolving self-evolution episode --change "${changeName}"`];
|
|
844
|
+
if (targetId)
|
|
845
|
+
parts.push(`--target ${targetId}`);
|
|
846
|
+
if (options.sessionId)
|
|
847
|
+
parts.push(`--session-id ${options.sessionId}`);
|
|
848
|
+
return parts.join(' ');
|
|
849
|
+
}
|
|
850
|
+
/** The loop-v2 next-step line shown when a concrete target is pinned. */
|
|
851
|
+
function renderEpisodeNextStep(changeName, targetId, options) {
|
|
852
|
+
return `- Autonomous loop-v2 episode: ${renderEpisodeCommand(changeName, targetId, options)}`;
|
|
853
|
+
}
|
|
764
854
|
/**
|
|
765
855
|
* An "evolving run" is one where the operator opted into evolution
|
|
766
856
|
* (`--apply` / `--persist-hints` / a named `--evolve-target`) — per the skill,
|
|
767
857
|
* the bare CLI previews and only the skill/agent flow passes these flags, so
|
|
768
858
|
* this is the agent-in-the-loop proxy (the same signal that gates the
|
|
769
|
-
* unbindable-hint observations in the learn action). The
|
|
770
|
-
*
|
|
771
|
-
* NO agent in the loop; the skill
|
|
772
|
-
*
|
|
859
|
+
* unbindable-hint observations in the learn action). The loop-v2 autonomous
|
|
860
|
+
* fallback (the `self-evolution episode` next-step, which code-spawns the
|
|
861
|
+
* reward + evolving agents) is for runs with NO agent in the loop; the skill
|
|
862
|
+
* forbids surfacing that autonomous entrance when an agent IS already the
|
|
863
|
+
* proposer, so the fallback is suppressed on evolving runs.
|
|
773
864
|
*/
|
|
774
865
|
function isEvolvingRun(options) {
|
|
775
866
|
return (options.apply === true || options.persistHints === true || options.evolveTarget !== undefined);
|
|
776
867
|
}
|
|
868
|
+
/**
|
|
869
|
+
* The report carries an observed-VERIFIED-GREEN signal: a REAL test runner was
|
|
870
|
+
* observed (`verified`) AND it came back green (`observedStatus === 'success'`).
|
|
871
|
+
* This is the SAME condition the failure-evidence routing uses in core/learn.ts
|
|
872
|
+
* (a verified-green run yields an empty failure list); the loop-v2 episode is
|
|
873
|
+
* only auto-launched when the main arm actually has a measured, trusted outcome
|
|
874
|
+
* — never on an authored-only or unverified report.
|
|
875
|
+
*/
|
|
876
|
+
function reportIsObservedVerifiedGreen(report) {
|
|
877
|
+
const facts = report.fitnessSample?.trajectoryFacts;
|
|
878
|
+
return facts !== undefined && facts.verified === true && facts.observedStatus === 'success';
|
|
879
|
+
}
|
|
880
|
+
/**
|
|
881
|
+
* The ONE concrete evolvable target this learn run resolved, or `undefined` when
|
|
882
|
+
* there is not exactly one (zero, or a kind-only/ambiguous group that still
|
|
883
|
+
* needs a `--evolve-target` pin). Mirrors the `concreteTarget` the next-steps
|
|
884
|
+
* renderer picks; the loop-v2 entrance refuses to guess when more than one
|
|
885
|
+
* concrete target is in play.
|
|
886
|
+
*/
|
|
887
|
+
function resolveSingleConcreteTarget(preview) {
|
|
888
|
+
const concrete = preview.targets.filter((target) => target.targetId !== null);
|
|
889
|
+
return concrete.length === 1 ? concrete[0] : undefined;
|
|
890
|
+
}
|
|
891
|
+
/** Render the loop-v2 episode outcome for the human-readable transparency block. */
|
|
892
|
+
function renderEpisodeOutcome(episode) {
|
|
893
|
+
if ('busy' in episode) {
|
|
894
|
+
return `- Loop-v2 episode: skipped — another in-flight episode holds the target (${episode.reason}).`;
|
|
895
|
+
}
|
|
896
|
+
const lines = [];
|
|
897
|
+
lines.push(`- Loop-v2 episode: ${episode.episodeId}`);
|
|
898
|
+
const advantage = episode.advantage === null ? 'n/a (baseline skipped or abstained)' : String(episode.advantage);
|
|
899
|
+
lines.push(` advantage: ${advantage}; decision: ${episode.decision}`);
|
|
900
|
+
if (episode.evolution === null) {
|
|
901
|
+
lines.push(' evolution: not spawned');
|
|
902
|
+
}
|
|
903
|
+
else if (episode.evolution.kind === 'evolved') {
|
|
904
|
+
lines.push(` evolution: evolved -> policy v${episode.evolution.ledgerEntry.version}`);
|
|
905
|
+
}
|
|
906
|
+
else if (episode.evolution.kind === 'refused') {
|
|
907
|
+
lines.push(` evolution: refused (${episode.evolution.reason})`);
|
|
908
|
+
}
|
|
909
|
+
else {
|
|
910
|
+
lines.push(` evolution: not spawned (${episode.evolution.reason})`);
|
|
911
|
+
}
|
|
912
|
+
if (episode.newPolicyVersion !== null) {
|
|
913
|
+
lines.push(` policy head: v${episode.newPolicyVersion}`);
|
|
914
|
+
}
|
|
915
|
+
return lines.join('\n');
|
|
916
|
+
}
|
|
777
917
|
function renderExplicitPolicy(explicit) {
|
|
778
918
|
if (explicit.length === 0)
|
|
779
919
|
return '';
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Loop-v2 CLI commands (self-evolution as in-context RL).
|
|
3
|
+
*
|
|
4
|
+
* These commands drive ONE bounded episode of the loop documented in
|
|
5
|
+
* `src/core/self-evolution/episode-orchestrator.ts`:
|
|
6
|
+
*
|
|
7
|
+
* - `episode` — run ONE episode for a change: build the 主智能体
|
|
8
|
+
* MAIN AGENT (frozen actor, 策略 POLICY vN+1) arm
|
|
9
|
+
* from a learn report (the same grading the `learn`
|
|
10
|
+
* command uses), then run the orchestrator (CRITIC
|
|
11
|
+
* AGENT(基线智能体 baseline agent)arm + 奖励智能体
|
|
12
|
+
* REWARD AGENT scoring + rollback/keep decision +
|
|
13
|
+
* 演进智能体 EVOLVING AGENT optimizer.step). Prints a
|
|
14
|
+
* human summary (advantage, decision, evolution
|
|
15
|
+
* kind, new policy version) or --json.
|
|
16
|
+
* - `episode resume <id>` — re-enter a partially-run episode at its recorded
|
|
17
|
+
* stage and finish the remaining steps.
|
|
18
|
+
* - `policy show` — READ-ONLY: print the 版本账本 ledger (versions,
|
|
19
|
+
* actions, Δ stats, predictions) + the 否决缓冲
|
|
20
|
+
* reject-buffer for the target(s). Replaces the
|
|
21
|
+
* read-only role of the (soon-removed) `trajectory`
|
|
22
|
+
* command.
|
|
23
|
+
* - `policy rollback` — manual snapshot rollback to the prior version +
|
|
24
|
+
* a `human-reject` 否决缓冲 entry; requires --yes.
|
|
25
|
+
*
|
|
26
|
+
* Like the rest of the self-evolution CLI surface, every programmatic entrypoint
|
|
27
|
+
* here is exported and fully INJECTABLE (the orchestrator spawn seam, the learn
|
|
28
|
+
* report generator, stdout/stderr, clock) so the commands are unit-testable
|
|
29
|
+
* without spawning a real `claude` binary.
|
|
30
|
+
*/
|
|
31
|
+
import { Command } from 'commander';
|
|
32
|
+
import { captureMainArm as captureMainArmImpl, resumeEpisode as resumeEpisodeImpl, type RunEpisodeOptions, type RunEpisodeResult, type RunEpisodeBusy, type ResumeEpisodeResult, type PolicyLedgerEntry, type RejectBufferEntry } from '../core/self-evolution/index.js';
|
|
33
|
+
import { type LearnReport } from '../core/learn.js';
|
|
34
|
+
/**
|
|
35
|
+
* The options the `episode` command forwards to the runEpisode seam. A superset
|
|
36
|
+
* of the orchestrator's {@link RunEpisodeOptions} carrying the `--no-baseline`
|
|
37
|
+
* request as `skipBaseline`. The base orchestrator ignores the extra key (the
|
|
38
|
+
* CRITIC AGENT arm is gated by its own ledger read); a custom seam may read it.
|
|
39
|
+
*/
|
|
40
|
+
export type EpisodeRunOptions = RunEpisodeOptions & {
|
|
41
|
+
skipBaseline?: boolean;
|
|
42
|
+
};
|
|
43
|
+
/**
|
|
44
|
+
* The injectable orchestrator seam. Accepts the {@link EpisodeRunOptions}
|
|
45
|
+
* superset (the real {@link runEpisodeImpl}, typed for {@link RunEpisodeOptions},
|
|
46
|
+
* satisfies this because it accepts a subset of the fields).
|
|
47
|
+
*/
|
|
48
|
+
export type EpisodeRunner = (opts: EpisodeRunOptions) => Promise<RunEpisodeResult | RunEpisodeBusy>;
|
|
49
|
+
/**
|
|
50
|
+
* Attach the loop-v2 `episode` + `policy` subcommands to the parent
|
|
51
|
+
* `self-evolution` command. Called once from {@link registerSelfEvolutionCommand}.
|
|
52
|
+
*/
|
|
53
|
+
export declare function attachSelfEvolutionEpisodeCommands(parent: Command): void;
|
|
54
|
+
export interface RunEpisodeCommandArgs {
|
|
55
|
+
/** The completed change to run the episode for. */
|
|
56
|
+
changeName?: string;
|
|
57
|
+
/** Canonical target id to evolve. Defaults to the design artifact-template. */
|
|
58
|
+
target?: string;
|
|
59
|
+
/** Skip the CRITIC AGENT(基线智能体 baseline agent)arm for this episode. */
|
|
60
|
+
noBaseline?: boolean;
|
|
61
|
+
/** Explicit transcript handle (Claude transcript store only). */
|
|
62
|
+
transcript?: string;
|
|
63
|
+
/** Explicit Claude session id handle. */
|
|
64
|
+
sessionId?: string;
|
|
65
|
+
json?: boolean;
|
|
66
|
+
}
|
|
67
|
+
export interface RunEpisodeCommandOptions {
|
|
68
|
+
repoRoot: string;
|
|
69
|
+
stdout?: (l: string) => void;
|
|
70
|
+
stderr?: (l: string) => void;
|
|
71
|
+
/**
|
|
72
|
+
* Test seam: produce the change's learn report (the 主智能体 MAIN AGENT arm's
|
|
73
|
+
* grading). Defaults to {@link generateLearnReport}; tests inject a graded stub
|
|
74
|
+
* so the episode runs without a real agent trajectory.
|
|
75
|
+
*/
|
|
76
|
+
generateReport?: (changeName: string) => Promise<LearnReport>;
|
|
77
|
+
/**
|
|
78
|
+
* Test seam: build the {@link MainArmCapture} from a learn report. Defaults to
|
|
79
|
+
* the orchestrator's {@link captureMainArmImpl}.
|
|
80
|
+
*/
|
|
81
|
+
captureMainArm?: typeof captureMainArmImpl;
|
|
82
|
+
/**
|
|
83
|
+
* Test seam: the orchestrator that runs the three agents. Defaults to the real
|
|
84
|
+
* {@link runEpisodeImpl}; tests inject a fake so NO agents are spawned.
|
|
85
|
+
*/
|
|
86
|
+
runEpisode?: EpisodeRunner;
|
|
87
|
+
}
|
|
88
|
+
export interface RunEpisodeCommandResult {
|
|
89
|
+
exitCode: number;
|
|
90
|
+
/** Present when the episode ran (not busy / not an error). */
|
|
91
|
+
result?: RunEpisodeResult;
|
|
92
|
+
/** Present when the target's in-flight slot was already held. */
|
|
93
|
+
busy?: RunEpisodeBusy;
|
|
94
|
+
error?: string;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Programmatic entrypoint for `self-evolution episode`. Exported so tests can
|
|
98
|
+
* drive the full episode flow with an injected orchestrator seam (no real agent
|
|
99
|
+
* spawn).
|
|
100
|
+
*/
|
|
101
|
+
export declare function runEpisodeCommand(args: RunEpisodeCommandArgs, opts: RunEpisodeCommandOptions): Promise<RunEpisodeCommandResult>;
|
|
102
|
+
export interface RunResumeEpisodeCommandArgs {
|
|
103
|
+
episodeId: string;
|
|
104
|
+
json?: boolean;
|
|
105
|
+
}
|
|
106
|
+
export interface RunResumeEpisodeCommandResult {
|
|
107
|
+
exitCode: number;
|
|
108
|
+
result?: ResumeEpisodeResult;
|
|
109
|
+
error?: string;
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Programmatic entrypoint for `self-evolution episode resume <id>`. Exported for
|
|
113
|
+
* tests; the orchestrator seam is injectable.
|
|
114
|
+
*/
|
|
115
|
+
export declare function runResumeEpisodeCommand(args: RunResumeEpisodeCommandArgs, opts: {
|
|
116
|
+
repoRoot: string;
|
|
117
|
+
stdout?: (l: string) => void;
|
|
118
|
+
stderr?: (l: string) => void;
|
|
119
|
+
resumeEpisode?: typeof resumeEpisodeImpl;
|
|
120
|
+
}): Promise<RunResumeEpisodeCommandResult>;
|
|
121
|
+
export interface RunPolicyShowCommandArgs {
|
|
122
|
+
/** Restrict to a single canonical target id. */
|
|
123
|
+
target?: string;
|
|
124
|
+
json?: boolean;
|
|
125
|
+
}
|
|
126
|
+
export interface PolicyShowTargetView {
|
|
127
|
+
targetId: string;
|
|
128
|
+
/** Lineage head version, or null when the lineage has not been initialized. */
|
|
129
|
+
head: number | null;
|
|
130
|
+
ledger: PolicyLedgerEntry[];
|
|
131
|
+
rejectBuffer: RejectBufferEntry[];
|
|
132
|
+
}
|
|
133
|
+
export interface RunPolicyShowCommandResult {
|
|
134
|
+
exitCode: number;
|
|
135
|
+
targets: PolicyShowTargetView[];
|
|
136
|
+
error?: string;
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Programmatic entrypoint for `self-evolution policy show`. READ-ONLY: reads the
|
|
140
|
+
* 版本账本 ledger + 否决缓冲 reject-buffer for the target(s) and renders them.
|
|
141
|
+
* Never mutates anything.
|
|
142
|
+
*/
|
|
143
|
+
export declare function runPolicyShowCommand(args: RunPolicyShowCommandArgs, opts: {
|
|
144
|
+
repoRoot: string;
|
|
145
|
+
stdout?: (l: string) => void;
|
|
146
|
+
stderr?: (l: string) => void;
|
|
147
|
+
}): Promise<RunPolicyShowCommandResult>;
|
|
148
|
+
export interface RunPolicyRollbackCommandArgs {
|
|
149
|
+
target: string;
|
|
150
|
+
/** Why the version is being rejected (recorded on the 否决缓冲 entry). */
|
|
151
|
+
reason?: string;
|
|
152
|
+
/** Required confirmation. */
|
|
153
|
+
yes?: boolean;
|
|
154
|
+
json?: boolean;
|
|
155
|
+
}
|
|
156
|
+
export interface RunPolicyRollbackCommandResult {
|
|
157
|
+
exitCode: number;
|
|
158
|
+
/** The rollback ledger entry, present on success. */
|
|
159
|
+
entry?: PolicyLedgerEntry;
|
|
160
|
+
/** The version the lineage was restored TO. */
|
|
161
|
+
toVersion?: number;
|
|
162
|
+
error?: string;
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Programmatic entrypoint for `self-evolution policy rollback`. Manually rolls
|
|
166
|
+
* the 策略 POLICY lineage back to the previous version (recorded as a NEW
|
|
167
|
+
* monotonic head, git-revert style) and appends a `human-reject` 否决缓冲
|
|
168
|
+
* reject-buffer entry so the next 演进智能体 EVOLVING AGENT step sees the rejected
|
|
169
|
+
* direction. Requires --yes.
|
|
170
|
+
*/
|
|
171
|
+
export declare function runPolicyRollbackCommand(args: RunPolicyRollbackCommandArgs, opts: {
|
|
172
|
+
repoRoot: string;
|
|
173
|
+
stdout?: (l: string) => void;
|
|
174
|
+
stderr?: (l: string) => void;
|
|
175
|
+
now?: () => Date;
|
|
176
|
+
}): Promise<RunPolicyRollbackCommandResult>;
|
|
177
|
+
//# sourceMappingURL=self-evolution-episode.d.ts.map
|