synergyspec-selfevolving 1.4.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/README.md +31 -18
  2. package/dist/commands/learn.d.ts +12 -1
  3. package/dist/commands/learn.js +158 -11
  4. package/dist/commands/self-evolution-episode.d.ts +177 -0
  5. package/dist/commands/self-evolution-episode.js +431 -0
  6. package/dist/commands/self-evolution.d.ts +12 -190
  7. package/dist/commands/self-evolution.js +114 -866
  8. package/dist/core/archive.d.ts +0 -1
  9. package/dist/core/archive.js +0 -58
  10. package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
  11. package/dist/core/artifact-graph/instruction-loader.js +3 -31
  12. package/dist/core/fitness/loss.d.ts +5 -5
  13. package/dist/core/fitness/loss.js +4 -4
  14. package/dist/core/fitness/test-failures.js +10 -2
  15. package/dist/core/project-config.d.ts +19 -0
  16. package/dist/core/project-config.js +96 -0
  17. package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
  18. package/dist/core/self-evolution/candidate-fitness.js +31 -5
  19. package/dist/core/self-evolution/candidates.d.ts +0 -9
  20. package/dist/core/self-evolution/critic-agent.d.ts +192 -0
  21. package/dist/core/self-evolution/critic-agent.js +568 -0
  22. package/dist/core/self-evolution/edits-contract.d.ts +53 -0
  23. package/dist/core/self-evolution/edits-contract.js +89 -0
  24. package/dist/core/self-evolution/episode-orchestrator.d.ts +234 -0
  25. package/dist/core/self-evolution/episode-orchestrator.js +681 -0
  26. package/dist/core/self-evolution/episode-store.d.ts +266 -0
  27. package/dist/core/self-evolution/episode-store.js +573 -0
  28. package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
  29. package/dist/core/self-evolution/evolution-switches.js +5 -10
  30. package/dist/core/self-evolution/evolving-agent.d.ts +208 -0
  31. package/dist/core/self-evolution/evolving-agent.js +535 -0
  32. package/dist/core/self-evolution/host-harness.d.ts +14 -15
  33. package/dist/core/self-evolution/host-harness.js +48 -23
  34. package/dist/core/self-evolution/index.d.ts +11 -6
  35. package/dist/core/self-evolution/index.js +20 -6
  36. package/dist/core/self-evolution/line-diff.d.ts +60 -0
  37. package/dist/core/self-evolution/line-diff.js +130 -0
  38. package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
  39. package/dist/core/self-evolution/policy/fs-safe.js +89 -0
  40. package/dist/core/self-evolution/policy/index.d.ts +13 -0
  41. package/dist/core/self-evolution/policy/index.js +13 -0
  42. package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
  43. package/dist/core/self-evolution/policy/policy-store.js +774 -0
  44. package/dist/core/self-evolution/policy/prediction-reconcile.d.ts +54 -0
  45. package/dist/core/self-evolution/policy/prediction-reconcile.js +191 -0
  46. package/dist/core/self-evolution/policy/reject-buffer.d.ts +55 -0
  47. package/dist/core/self-evolution/policy/reject-buffer.js +170 -0
  48. package/dist/core/self-evolution/promote.d.ts +1 -1
  49. package/dist/core/self-evolution/promote.js +6 -33
  50. package/dist/core/self-evolution/promotion.js +1 -2
  51. package/dist/core/self-evolution/reward-agent.d.ts +379 -0
  52. package/dist/core/self-evolution/reward-agent.js +940 -0
  53. package/dist/core/self-evolution/reward-aggregator.d.ts +59 -0
  54. package/dist/core/self-evolution/reward-aggregator.js +262 -0
  55. package/dist/core/self-evolution/scope-gate.d.ts +66 -0
  56. package/dist/core/self-evolution/scope-gate.js +107 -0
  57. package/dist/core/self-evolution/success-channel.js +2 -2
  58. package/dist/core/self-evolution/tamper-check.d.ts +24 -0
  59. package/dist/core/self-evolution/tamper-check.js +236 -0
  60. package/dist/core/self-evolution/tool-evolution.js +2 -13
  61. package/dist/core/self-evolution/verdict.d.ts +8 -5
  62. package/dist/core/self-evolution/verdict.js +4 -7
  63. package/dist/core/templates/workflows/gen-tests.js +1 -1
  64. package/dist/core/templates/workflows/learn.d.ts +3 -2
  65. package/dist/core/templates/workflows/learn.js +21 -18
  66. package/dist/core/templates/workflows/self-evolving.d.ts +6 -4
  67. package/dist/core/templates/workflows/self-evolving.js +62 -172
  68. package/dist/core/trajectory/scrub.d.ts +27 -0
  69. package/dist/core/trajectory/scrub.js +79 -0
  70. package/dist/core/trajectory/skeleton.d.ts +27 -1
  71. package/dist/core/trajectory/skeleton.js +152 -8
  72. package/dist/dashboard/data.d.ts +25 -51
  73. package/dist/dashboard/data.js +68 -180
  74. package/dist/dashboard/react-client.js +458 -503
  75. package/dist/dashboard/react-styles.js +3 -3
  76. package/dist/dashboard/server.js +23 -17
  77. package/dist/ui/ascii-patterns.d.ts +7 -15
  78. package/dist/ui/ascii-patterns.js +123 -54
  79. package/dist/ui/welcome-screen.d.ts +0 -14
  80. package/dist/ui/welcome-screen.js +16 -35
  81. package/package.json +1 -1
  82. package/dist/core/self-evolution/ga-selection.d.ts +0 -94
  83. package/dist/core/self-evolution/ga-selection.js +0 -153
  84. package/dist/core/self-evolution/proposer-agent.d.ts +0 -182
  85. package/dist/core/self-evolution/proposer-agent.js +0 -326
  86. package/dist/core/self-evolution/replay-runner.d.ts +0 -100
  87. package/dist/core/self-evolution/replay-runner.js +0 -170
  88. package/dist/core/self-evolution/replay.d.ts +0 -45
  89. package/dist/core/self-evolution/replay.js +0 -56
  90. package/dist/core/self-evolution/template-variants.d.ts +0 -62
  91. package/dist/core/self-evolution/template-variants.js +0 -171
  92. package/dist/core/self-evolution/trajectory.d.ts +0 -65
  93. package/dist/core/self-evolution/trajectory.js +0 -185
package/README.md CHANGED
@@ -133,7 +133,7 @@ Now tell your AI: `/synspec:propose <what-you-want-to-build>`
133
133
  → **[Workflows](docs/workflows.md)**: combos and patterns<br>
134
134
  → **[Commands](docs/commands.md)**: slash commands & skills<br>
135
135
  → **[CLI](docs/cli.md)**: terminal reference<br>
136
- → **[Evaluation Lab](docs/evaluation-lab.md)**: advanced self-evolution harness<br>
136
+ → **[Self-Evolution Loop](docs/evaluation-lab.md)**: the loop-v2 in-context RL surface<br>
137
137
  → **[Supported Tools](docs/supported-tools.md)**: tool integrations & install paths<br>
138
138
  → **[Concepts](docs/concepts.md)**: how it all fits<br>
139
139
  → **[Multi-Language](docs/multi-language.md)**: multi-language support<br>
@@ -199,9 +199,9 @@ The self-evolution machinery lives in-tree under `src/core/fitness` and
199
199
  `src/core/self-evolution`, exposed through the `synergyspec-selfevolving
200
200
  self-evolution` subcommands (not as an ordinary day-to-day skill). It treats the
201
201
  spec workflow as a learnable system — a change is a forward pass through the
202
- artifact templates; `learn` runs the backward pass (a per-change loss); and a
203
- genetic-algorithm outer loop selects among competing candidate template
204
- variants.
202
+ artifact templates; `learn` runs the backward pass (a per-change loss); and an
203
+ in-context-RL episode improves one design 策略 POLICY lineage from a graded
204
+ advantage, with a rollback that fires *before* any new edit.
205
205
 
206
206
  What actually works today:
207
207
 
@@ -233,21 +233,31 @@ What actually works today:
233
233
  session: an unresolvable flag is an up-front error (exit non-zero), while a
234
234
  missing env handle fails closed — no trajectory, and the observed-verified
235
235
  gate refuses to promote.
236
- - **Code-health gate** (auto-evolve / `evolve-from-edits`): a measured code-health
237
- regression vs the last accepted state blocks auto-promotion (and surfaces a
238
- loud `health-signal-unavailable` observation if a configured analyzer can't
239
- run). No health signal ⇒ no gate, so the loop is never blocked on a missing
236
+ - **Code-health gate** (the episode's evolving agent / `evolve-from-edits`): a
237
+ measured code-health regression vs the last accepted state blocks promotion (and
238
+ surfaces a loud `health-signal-unavailable` observation if a configured analyzer
239
+ can't run). No health signal ⇒ no gate, so the loop is never blocked on a missing
240
240
  measurement.
241
241
  - **Candidate proposals** (`self-evolution propose-canonical`): turns aggregated
242
242
  `learn` hints into human-gated candidate packages under
243
243
  `.synergyspec-selfevolving/self-evolution/candidates/`. Proposal-only — no
244
244
  canonical file is modified, and the frozen gen-test/run-test oracle is never
245
245
  touched.
246
- - **GA outer loop** (`self-evolution evolve`): groups candidates by canonical
247
- target, scores them by accumulated fitness (or `--replay <corpus>` to re-run a
248
- change corpus through baseline vs. candidate), ranks them, and generates a
249
- **human-gated** promotion report. It never auto-promotes, and a frozen target
250
- (per the per-target evolution policy) is skipped.
246
+ - **In-context-RL episode** (`self-evolution episode --change <name>`): runs ONE
247
+ completed change through a fixed, code-spawned pipeline. A two-arm forward grades
248
+ the **主智能体 MAIN AGENT** (the frozen current 策略 POLICY) from the change's
249
+ `learn` report against a **CRITIC AGENT(基线智能体 baseline agent)** that reruns
250
+ the prior policy on the same change; a **奖励智能体 REWARD AGENT** computes
251
+ **advantage = reward(主臂) − reward(基线臂)** and names the gap; on a bad advantage
252
+ the orchestrator rolls the 策略 POLICY back *before* the **演进智能体 EVOLVING
253
+ AGENT** is called; the evolving agent then makes ONE bounded edit (or refuses)
254
+ after the static / observed-GREEN / 范围⊆诊断 gates pass, writing the next policy
255
+ version onto your LOCAL files — no candidate dir, no republish. A frozen target
256
+ (per the per-target evolution policy) is skipped. Inspect the lineage with
257
+ `self-evolution policy show [--target <id>]`, re-enter a partial run with
258
+ `self-evolution episode resume <id>`, and manually restore a version with
259
+ `self-evolution policy rollback --target <id> --yes`. See
260
+ [docs/evaluation-lab.md](docs/evaluation-lab.md).
251
261
  - **Per-target evolution scope** (`selfEvolution:` in
252
262
  `synergyspec-selfevolving/config.yaml`): one switch decides which canonical
253
263
  targets may evolve, honored end-to-end by `learn` → `propose-canonical` → the
@@ -260,13 +270,16 @@ What actually works today:
260
270
 
261
271
  What is **not** built yet (don't infer it from the architecture diagrams):
262
272
 
263
- - No autonomous multi-generation **breeding** loop (mutate winners → next
264
- generation). `evolve` runs a single generation over pre-existing candidates;
265
- new candidates only come from the human-gated `propose-canonical` step.
273
+ - No autonomous multi-episode **breeding** loop. Each `self-evolution episode`
274
+ improves the policy from one change's graded advantage; it never fans out
275
+ competing variants or runs unattended generations. New manual candidates only
276
+ come from the human-gated `propose-canonical` step.
266
277
  - No measured benchmark gains. The mechanisms are implemented and unit-tested,
267
278
  but the loop has not been run end-to-end to produce quantitative results.
268
- - The earlier out-of-tree `evolve/` DGM lab (and its `pnpm evolve` harness) has
269
- been removed; the technique was internalized into `src/core`.
279
+ - The earlier out-of-tree `evolve/` DGM lab (and its `pnpm evolve` harness) and
280
+ the genetic-algorithm outer loop (`self-evolution evolve` / `--replay`) have
281
+ been removed; the technique was internalized into `src/core` and re-homed on the
282
+ loop-v2 in-context-RL episode.
270
283
 
271
284
  ## Contributing
272
285
 
@@ -1,3 +1,14 @@
1
1
  import { Command } from 'commander';
2
- export declare function registerLearnCommand(program: Command): void;
2
+ import { type RunEpisodeOptions, type RunEpisodeResult, type RunEpisodeBusy } from '../core/self-evolution/episode-orchestrator.js';
3
+ /**
4
+ * Injectable dependencies for {@link registerLearnCommand}. The ONLY seam today
5
+ * is `runEpisode` — the loop-v2 episode runner the autonomous `--apply` entrance
6
+ * invokes. It defaults to the real {@link runEpisode}; learn tests stub it so the
7
+ * evolve path is exercised WITHOUT spawning real agents (the orchestrator's three
8
+ * agents each own a `runHeadlessAgent` spawn, which the stub never reaches).
9
+ */
10
+ export interface LearnCommandDeps {
11
+ runEpisode?: (opts: RunEpisodeOptions) => Promise<RunEpisodeResult | RunEpisodeBusy>;
12
+ }
13
+ export declare function registerLearnCommand(program: Command, deps?: LearnCommandDeps): void;
3
14
  //# sourceMappingURL=learn.d.ts.map
@@ -9,12 +9,14 @@ import { toTrajectoryFacts, describeRunnerResults } from '../core/trajectory/fac
9
9
  import { toActionSkeleton } from '../core/trajectory/skeleton.js';
10
10
  import { resolveHostHarness } from '../core/self-evolution/host-harness.js';
11
11
  import { mineSuccessSignals } from '../core/self-evolution/success-channel.js';
12
+ import { captureMainArm, runEpisode, } from '../core/self-evolution/episode-orchestrator.js';
12
13
  import { buildLLMSummaryCandidates, ingestLearnHandoff, } from '../core/learn/llm-summary.js';
13
14
  function collect(value, previous) {
14
15
  previous.push(value);
15
16
  return previous;
16
17
  }
17
- export function registerLearnCommand(program) {
18
+ export function registerLearnCommand(program, deps = {}) {
19
+ const runEpisodeImpl = deps.runEpisode ?? runEpisode;
18
20
  const learnCmd = program
19
21
  .command('learn [change]')
20
22
  .description('Review a completed change and extract reusable learning candidates')
@@ -157,13 +159,52 @@ export function registerLearnCommand(program) {
157
159
  // side-write only; never fail learn over it.
158
160
  }
159
161
  }
162
+ // LOOP-V2 AUTONOMOUS ENTRANCE (CS6-F): on an --apply run that opted into
163
+ // evolution, resolved EXACTLY ONE concrete evolvable target, and carries
164
+ // an observed-VERIFIED-GREEN signal, run one self-evolution episode
165
+ // in-process (rollback-before-evolution). A bare `learn <change>` preview
166
+ // can never reach this (it is neither --apply nor an evolving run), so the
167
+ // non-evolving path stays byte-identical to today. The runner is injected
168
+ // (`deps.runEpisode`) so tests exercise this path WITHOUT spawning real
169
+ // agents; it defaults to the real orchestrator.
170
+ let episodeOutcome;
171
+ const concreteEvolveTarget = resolveSingleConcreteTarget(evolutionPreview);
172
+ if (options.apply === true &&
173
+ isEvolvingRun(options) &&
174
+ concreteEvolveTarget !== undefined &&
175
+ concreteEvolveTarget.targetId !== null &&
176
+ reportIsObservedVerifiedGreen(report)) {
177
+ const mainArm = await captureMainArm({
178
+ repoRoot: projectRoot,
179
+ changeName: report.changeName,
180
+ report,
181
+ });
182
+ // Thread the loop-v2 reward judge-quality config (samples / noiseFloor /
183
+ // orderSwap / tamperCheck). Omitted ⇒ the orchestrator's single-sample,
184
+ // flag-only default (no extra spawns).
185
+ const episodeConfig = readProjectConfig(projectRoot);
186
+ episodeOutcome = await runEpisodeImpl({
187
+ repoRoot: projectRoot,
188
+ targetId: concreteEvolveTarget.targetId,
189
+ changeName: report.changeName,
190
+ changeDirPath: report.changeDir,
191
+ mainArm,
192
+ ...(episodeConfig?.selfEvolution?.reward
193
+ ? { reward: episodeConfig.selfEvolution.reward }
194
+ : {}),
195
+ });
196
+ }
160
197
  if (options.json) {
161
- printJson(report, applied, evolutionPreview, hintsPath);
198
+ printJson(report, applied, evolutionPreview, hintsPath, episodeOutcome);
162
199
  return;
163
200
  }
164
201
  console.log(renderLearnReport(report, applied));
165
202
  console.log('');
166
203
  console.log(renderLearnTransparency(report, applied, evolutionPreview, hintsPath, options));
204
+ if (episodeOutcome) {
205
+ console.log('');
206
+ console.log(renderEpisodeOutcome(episodeOutcome));
207
+ }
167
208
  if (successSummary) {
168
209
  console.log('');
169
210
  console.log(successSummary);
@@ -515,7 +556,7 @@ function renderIngestHandoff(changeName, ingest, applied) {
515
556
  }
516
557
  return lines.join('\n');
517
558
  }
518
- function printJson(report, applied, evolutionPreview, hintsPath) {
559
+ function printJson(report, applied, evolutionPreview, hintsPath, episodeOutcome) {
519
560
  // `mode` only tracks whether MEMORY candidates were applied (--apply). It does
520
561
  // NOT reflect that --persist-hints wrote a hints file, which is what made the
521
562
  // old `mode:"preview"` read as "nothing written". `wrote` makes every write this
@@ -540,6 +581,30 @@ function printJson(report, applied, evolutionPreview, hintsPath) {
540
581
  hintIds: target.hintIds,
541
582
  })),
542
583
  };
584
+ // Loop-v2 episode outcome (CS6-F): present only when the autonomous entrance
585
+ // ran one this --apply run; absent on every non-evolving / unverified run so
586
+ // their --json stays byte-identical to today.
587
+ const episode = episodeOutcome
588
+ ? 'busy' in episodeOutcome
589
+ ? { busy: true, reason: episodeOutcome.reason }
590
+ : {
591
+ episodeId: episodeOutcome.episodeId,
592
+ baselineSkipped: episodeOutcome.baselineSkipped,
593
+ advantage: episodeOutcome.advantage,
594
+ decision: episodeOutcome.decision,
595
+ evolution: episodeOutcome.evolution === null
596
+ ? null
597
+ : episodeOutcome.evolution.kind === 'evolved'
598
+ ? {
599
+ kind: 'evolved',
600
+ version: episodeOutcome.evolution.ledgerEntry.version,
601
+ }
602
+ : episodeOutcome.evolution.kind === 'refused'
603
+ ? { kind: 'refused', reason: episodeOutcome.evolution.reason }
604
+ : { kind: 'not-spawned', reason: episodeOutcome.evolution.reason },
605
+ newPolicyVersion: episodeOutcome.newPolicyVersion,
606
+ }
607
+ : undefined;
543
608
  console.log(JSON.stringify({
544
609
  mode: applied ? 'apply' : 'preview',
545
610
  ...report,
@@ -548,6 +613,7 @@ function printJson(report, applied, evolutionPreview, hintsPath) {
548
613
  ...(hintsPath ? { hintsPath } : {}),
549
614
  wrote,
550
615
  evolution,
616
+ ...(episode ? { episode } : {}),
551
617
  }, null, 2));
552
618
  }
553
619
  async function buildEvolutionPreview(hints, targetPolicy, projectRoot) {
@@ -715,10 +781,18 @@ function renderLearnTransparency(report, applied, evolutionPreview, hintsPath, o
715
781
  // full new content for that target's resolved LOCAL file.
716
782
  const concreteTargets = evolutionPreview.targets.filter((target) => target.targetId !== null);
717
783
  const concreteTarget = concreteTargets.length > 0 ? concreteTargets[0] : undefined;
718
- if (hintsPath && concreteTarget) {
784
+ if (hintsPath && concreteTarget && concreteTarget.targetId !== null) {
785
+ const concreteTargetId = concreteTarget.targetId;
719
786
  lines.push(`- Hints written: ${hintsPath}`);
787
+ // LOOP-V2 (autonomous, rollback-before-evolution): the in-context-RL episode
788
+ // is now the default autonomous path. `--apply` on a verified-green run runs
789
+ // it in-process; this line is the explicit re-runnable form.
790
+ lines.push(renderEpisodeNextStep(report.changeName, concreteTargetId, options));
720
791
  const localFile = concreteTarget.localFiles[0] ?? concreteTarget.files[0] ?? '<target file>';
721
- lines.push(`- Evolve from your edits: synergyspec-selfevolving self-evolution evolve-from-edits --from-learn "${hintsPath}" --evolve-target ${concreteTarget.targetId} --from-edits <edits.json> --yes`);
792
+ // MANUAL channel (host agent authors the edit directly): kept for operators
793
+ // who want to hand evolve-from-edits a full new-file content themselves
794
+ // rather than spawn the loop-v2 EVOLVING AGENT.
795
+ lines.push(`- Manual: evolve from your own edits: synergyspec-selfevolving self-evolution evolve-from-edits --from-learn "${hintsPath}" --evolve-target ${concreteTargetId} --from-edits <edits.json> --yes`);
722
796
  lines.push(` you (the host agent) author edits.json's full new file content for the target file shown above (${localFile}).`);
723
797
  }
724
798
  else if (hintsPath) {
@@ -754,26 +828,99 @@ function renderLearnTransparency(report, applied, evolutionPreview, hintsPath, o
754
828
  lines.push('- After reviewing or evolving, run /synspec:archive to close the change.');
755
829
  }
756
830
  else {
831
+ // A bare preview has no agent-in-the-loop signal, so point the human/cron
832
+ // operator at the LOOP-V2 autonomous entrance (in-context RL,
833
+ // rollback-before-evolution): `self-evolution episode`.
757
834
  lines.push('');
758
- lines.push('headless fallback (no host agent):');
759
- lines.push(`- One-button local evolve: synergyspec-selfevolving self-evolution auto-evolve --change "${report.changeName}"${renderTargetArgs(options)}`);
835
+ lines.push('autonomous loop-v2 (rollback-before-evolution):');
836
+ lines.push(`- One-button local evolve: ${renderEpisodeCommand(report.changeName, undefined, options)}`);
760
837
  lines.push('- After reviewing or evolving, run /synspec:archive to close the change.');
761
838
  }
762
839
  return lines.join('\n');
763
840
  }
841
+ /**
842
+ * The loop-v2 autonomous entrance command (CS6-F):
843
+ * `self-evolution episode --change "<name>" [--target <id>] [--session-id <id>]`.
844
+ * Replaces the GA `auto-evolve` / autonomous `evolve-from-edits` suggestions as
845
+ * the loop-v2 path. The `--target` pin is included only when a concrete target
846
+ * id resolved; `--session-id` is threaded through when the operator pinned an
847
+ * explicit trajectory handle so the episode grades the SAME session learn did.
848
+ */
849
+ function renderEpisodeCommand(changeName, targetId, options) {
850
+ const parts = [`synergyspec-selfevolving self-evolution episode --change "${changeName}"`];
851
+ if (targetId)
852
+ parts.push(`--target ${targetId}`);
853
+ if (options.sessionId)
854
+ parts.push(`--session-id ${options.sessionId}`);
855
+ return parts.join(' ');
856
+ }
857
+ /** The loop-v2 next-step line shown when a concrete target is pinned. */
858
+ function renderEpisodeNextStep(changeName, targetId, options) {
859
+ return `- Autonomous loop-v2 episode: ${renderEpisodeCommand(changeName, targetId, options)}`;
860
+ }
764
861
  /**
765
862
  * An "evolving run" is one where the operator opted into evolution
766
863
  * (`--apply` / `--persist-hints` / a named `--evolve-target`) — per the skill,
767
864
  * the bare CLI previews and only the skill/agent flow passes these flags, so
768
865
  * this is the agent-in-the-loop proxy (the same signal that gates the
769
- * unbindable-hint observations in the learn action). The headless
770
- * `auto-evolve` fallback (it spawns its proposer internally) is for runs with
771
- * NO agent in the loop; the skill forbids the headless proposer when an agent
772
- * IS the proposer, so the fallback is suppressed on evolving runs.
866
+ * unbindable-hint observations in the learn action). The loop-v2 autonomous
867
+ * fallback (the `self-evolution episode` next-step, which code-spawns the
868
+ * reward + evolving agents) is for runs with NO agent in the loop; the skill
869
+ * forbids surfacing that autonomous entrance when an agent IS already the
870
+ * proposer, so the fallback is suppressed on evolving runs.
773
871
  */
774
872
  function isEvolvingRun(options) {
775
873
  return (options.apply === true || options.persistHints === true || options.evolveTarget !== undefined);
776
874
  }
875
+ /**
876
+ * The report carries an observed-VERIFIED-GREEN signal: a REAL test runner was
877
+ * observed (`verified`) AND it came back green (`observedStatus === 'success'`).
878
+ * This is the SAME condition the failure-evidence routing uses in core/learn.ts
879
+ * (a verified-green run yields an empty failure list); the loop-v2 episode is
880
+ * only auto-launched when the main arm actually has a measured, trusted outcome
881
+ * — never on an authored-only or unverified report.
882
+ */
883
+ function reportIsObservedVerifiedGreen(report) {
884
+ const facts = report.fitnessSample?.trajectoryFacts;
885
+ return facts !== undefined && facts.verified === true && facts.observedStatus === 'success';
886
+ }
887
+ /**
888
+ * The ONE concrete evolvable target this learn run resolved, or `undefined` when
889
+ * there is not exactly one (zero, or a kind-only/ambiguous group that still
890
+ * needs a `--evolve-target` pin). Mirrors the `concreteTarget` the next-steps
891
+ * renderer picks; the loop-v2 entrance refuses to guess when more than one
892
+ * concrete target is in play.
893
+ */
894
+ function resolveSingleConcreteTarget(preview) {
895
+ const concrete = preview.targets.filter((target) => target.targetId !== null);
896
+ return concrete.length === 1 ? concrete[0] : undefined;
897
+ }
898
+ /** Render the loop-v2 episode outcome for the human-readable transparency block. */
899
+ function renderEpisodeOutcome(episode) {
900
+ if ('busy' in episode) {
901
+ return `- Loop-v2 episode: skipped — another in-flight episode holds the target (${episode.reason}).`;
902
+ }
903
+ const lines = [];
904
+ lines.push(`- Loop-v2 episode: ${episode.episodeId}`);
905
+ const advantage = episode.advantage === null ? 'n/a (baseline skipped or abstained)' : String(episode.advantage);
906
+ lines.push(` advantage: ${advantage}; decision: ${episode.decision}`);
907
+ if (episode.evolution === null) {
908
+ lines.push(' evolution: not spawned');
909
+ }
910
+ else if (episode.evolution.kind === 'evolved') {
911
+ lines.push(` evolution: evolved -> policy v${episode.evolution.ledgerEntry.version}`);
912
+ }
913
+ else if (episode.evolution.kind === 'refused') {
914
+ lines.push(` evolution: refused (${episode.evolution.reason})`);
915
+ }
916
+ else {
917
+ lines.push(` evolution: not spawned (${episode.evolution.reason})`);
918
+ }
919
+ if (episode.newPolicyVersion !== null) {
920
+ lines.push(` policy head: v${episode.newPolicyVersion}`);
921
+ }
922
+ return lines.join('\n');
923
+ }
777
924
  function renderExplicitPolicy(explicit) {
778
925
  if (explicit.length === 0)
779
926
  return '';
@@ -0,0 +1,177 @@
1
+ /**
2
+ * Loop-v2 CLI commands (self-evolution as in-context RL).
3
+ *
4
+ * These commands drive ONE bounded episode of the loop documented in
5
+ * `src/core/self-evolution/episode-orchestrator.ts`:
6
+ *
7
+ * - `episode` — run ONE episode for a change: build the 主智能体
8
+ * MAIN AGENT (frozen actor, 策略 POLICY vN+1) arm
9
+ * from a learn report (the same grading the `learn`
10
+ * command uses), then run the orchestrator (CRITIC
11
+ * AGENT(基线智能体 baseline agent)arm + 奖励智能体
12
+ * REWARD AGENT scoring + rollback/keep decision +
13
+ * 演进智能体 EVOLVING AGENT optimizer.step). Prints a
14
+ * human summary (advantage, decision, evolution
15
+ * kind, new policy version) or --json.
16
+ * - `episode resume <id>` — re-enter a partially-run episode at its recorded
17
+ * stage and finish the remaining steps.
18
+ * - `policy show` — READ-ONLY: print the 版本账本 ledger (versions,
19
+ * actions, Δ stats, predictions) + the 否决缓冲
20
+ * reject-buffer for the target(s). Replaces the
21
+ * read-only role of the (soon-removed) `trajectory`
22
+ * command.
23
+ * - `policy rollback` — manual snapshot rollback to the prior version +
24
+ * a `human-reject` 否决缓冲 entry; requires --yes.
25
+ *
26
+ * Like the rest of the self-evolution CLI surface, every programmatic entrypoint
27
+ * here is exported and fully INJECTABLE (the orchestrator spawn seam, the learn
28
+ * report generator, stdout/stderr, clock) so the commands are unit-testable
29
+ * without spawning a real `claude` binary.
30
+ */
31
+ import { Command } from 'commander';
32
+ import { captureMainArm as captureMainArmImpl, resumeEpisode as resumeEpisodeImpl, type RunEpisodeOptions, type RunEpisodeResult, type RunEpisodeBusy, type ResumeEpisodeResult, type PolicyLedgerEntry, type RejectBufferEntry } from '../core/self-evolution/index.js';
33
+ import { type LearnReport } from '../core/learn.js';
34
+ /**
35
+ * The options the `episode` command forwards to the runEpisode seam. A superset
36
+ * of the orchestrator's {@link RunEpisodeOptions} carrying the `--no-baseline`
37
+ * request as `skipBaseline`. The base orchestrator ignores the extra key (the
38
+ * CRITIC AGENT arm is gated by its own ledger read); a custom seam may read it.
39
+ */
40
+ export type EpisodeRunOptions = RunEpisodeOptions & {
41
+ skipBaseline?: boolean;
42
+ };
43
+ /**
44
+ * The injectable orchestrator seam. Accepts the {@link EpisodeRunOptions}
45
+ * superset (the real {@link runEpisodeImpl}, typed for {@link RunEpisodeOptions},
46
+ * satisfies this because it accepts a subset of the fields).
47
+ */
48
+ export type EpisodeRunner = (opts: EpisodeRunOptions) => Promise<RunEpisodeResult | RunEpisodeBusy>;
49
+ /**
50
+ * Attach the loop-v2 `episode` + `policy` subcommands to the parent
51
+ * `self-evolution` command. Called once from {@link registerSelfEvolutionCommand}.
52
+ */
53
+ export declare function attachSelfEvolutionEpisodeCommands(parent: Command): void;
54
+ export interface RunEpisodeCommandArgs {
55
+ /** The completed change to run the episode for. */
56
+ changeName?: string;
57
+ /** Canonical target id to evolve. Defaults to the design artifact-template. */
58
+ target?: string;
59
+ /** Skip the CRITIC AGENT(基线智能体 baseline agent)arm for this episode. */
60
+ noBaseline?: boolean;
61
+ /** Explicit transcript handle (Claude transcript store only). */
62
+ transcript?: string;
63
+ /** Explicit Claude session id handle. */
64
+ sessionId?: string;
65
+ json?: boolean;
66
+ }
67
+ export interface RunEpisodeCommandOptions {
68
+ repoRoot: string;
69
+ stdout?: (l: string) => void;
70
+ stderr?: (l: string) => void;
71
+ /**
72
+ * Test seam: produce the change's learn report (the 主智能体 MAIN AGENT arm's
73
+ * grading). Defaults to {@link generateLearnReport}; tests inject a graded stub
74
+ * so the episode runs without a real agent trajectory.
75
+ */
76
+ generateReport?: (changeName: string) => Promise<LearnReport>;
77
+ /**
78
+ * Test seam: build the {@link MainArmCapture} from a learn report. Defaults to
79
+ * the orchestrator's {@link captureMainArmImpl}.
80
+ */
81
+ captureMainArm?: typeof captureMainArmImpl;
82
+ /**
83
+ * Test seam: the orchestrator that runs the three agents. Defaults to the real
84
+ * {@link runEpisodeImpl}; tests inject a fake so NO agents are spawned.
85
+ */
86
+ runEpisode?: EpisodeRunner;
87
+ }
88
+ export interface RunEpisodeCommandResult {
89
+ exitCode: number;
90
+ /** Present when the episode ran (not busy / not an error). */
91
+ result?: RunEpisodeResult;
92
+ /** Present when the target's in-flight slot was already held. */
93
+ busy?: RunEpisodeBusy;
94
+ error?: string;
95
+ }
96
+ /**
97
+ * Programmatic entrypoint for `self-evolution episode`. Exported so tests can
98
+ * drive the full episode flow with an injected orchestrator seam (no real agent
99
+ * spawn).
100
+ */
101
+ export declare function runEpisodeCommand(args: RunEpisodeCommandArgs, opts: RunEpisodeCommandOptions): Promise<RunEpisodeCommandResult>;
102
+ export interface RunResumeEpisodeCommandArgs {
103
+ episodeId: string;
104
+ json?: boolean;
105
+ }
106
+ export interface RunResumeEpisodeCommandResult {
107
+ exitCode: number;
108
+ result?: ResumeEpisodeResult;
109
+ error?: string;
110
+ }
111
+ /**
112
+ * Programmatic entrypoint for `self-evolution episode resume <id>`. Exported for
113
+ * tests; the orchestrator seam is injectable.
114
+ */
115
+ export declare function runResumeEpisodeCommand(args: RunResumeEpisodeCommandArgs, opts: {
116
+ repoRoot: string;
117
+ stdout?: (l: string) => void;
118
+ stderr?: (l: string) => void;
119
+ resumeEpisode?: typeof resumeEpisodeImpl;
120
+ }): Promise<RunResumeEpisodeCommandResult>;
121
+ export interface RunPolicyShowCommandArgs {
122
+ /** Restrict to a single canonical target id. */
123
+ target?: string;
124
+ json?: boolean;
125
+ }
126
+ export interface PolicyShowTargetView {
127
+ targetId: string;
128
+ /** Lineage head version, or null when the lineage has not been initialized. */
129
+ head: number | null;
130
+ ledger: PolicyLedgerEntry[];
131
+ rejectBuffer: RejectBufferEntry[];
132
+ }
133
+ export interface RunPolicyShowCommandResult {
134
+ exitCode: number;
135
+ targets: PolicyShowTargetView[];
136
+ error?: string;
137
+ }
138
+ /**
139
+ * Programmatic entrypoint for `self-evolution policy show`. READ-ONLY: reads the
140
+ * 版本账本 ledger + 否决缓冲 reject-buffer for the target(s) and renders them.
141
+ * Never mutates anything.
142
+ */
143
+ export declare function runPolicyShowCommand(args: RunPolicyShowCommandArgs, opts: {
144
+ repoRoot: string;
145
+ stdout?: (l: string) => void;
146
+ stderr?: (l: string) => void;
147
+ }): Promise<RunPolicyShowCommandResult>;
148
+ export interface RunPolicyRollbackCommandArgs {
149
+ target: string;
150
+ /** Why the version is being rejected (recorded on the 否决缓冲 entry). */
151
+ reason?: string;
152
+ /** Required confirmation. */
153
+ yes?: boolean;
154
+ json?: boolean;
155
+ }
156
+ export interface RunPolicyRollbackCommandResult {
157
+ exitCode: number;
158
+ /** The rollback ledger entry, present on success. */
159
+ entry?: PolicyLedgerEntry;
160
+ /** The version the lineage was restored TO. */
161
+ toVersion?: number;
162
+ error?: string;
163
+ }
164
+ /**
165
+ * Programmatic entrypoint for `self-evolution policy rollback`. Manually rolls
166
+ * the 策略 POLICY lineage back to the previous version (recorded as a NEW
167
+ * monotonic head, git-revert style) and appends a `human-reject` 否决缓冲
168
+ * reject-buffer entry so the next 演进智能体 EVOLVING AGENT step sees the rejected
169
+ * direction. Requires --yes.
170
+ */
171
+ export declare function runPolicyRollbackCommand(args: RunPolicyRollbackCommandArgs, opts: {
172
+ repoRoot: string;
173
+ stdout?: (l: string) => void;
174
+ stderr?: (l: string) => void;
175
+ now?: () => Date;
176
+ }): Promise<RunPolicyRollbackCommandResult>;
177
+ //# sourceMappingURL=self-evolution-episode.d.ts.map