synergyspec-selfevolving 1.3.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +50 -19
  2. package/dist/commands/learn.d.ts +12 -1
  3. package/dist/commands/learn.js +373 -31
  4. package/dist/commands/self-evolution-episode.d.ts +177 -0
  5. package/dist/commands/self-evolution-episode.js +423 -0
  6. package/dist/commands/self-evolution.d.ts +12 -190
  7. package/dist/commands/self-evolution.js +179 -786
  8. package/dist/commands/workflow/status.js +3 -1
  9. package/dist/core/archive.d.ts +0 -1
  10. package/dist/core/archive.js +0 -58
  11. package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
  12. package/dist/core/artifact-graph/instruction-loader.js +3 -31
  13. package/dist/core/config-prompts.js +4 -0
  14. package/dist/core/fitness/health/health-metrics.d.ts +26 -56
  15. package/dist/core/fitness/health/health-metrics.js +19 -58
  16. package/dist/core/fitness/health/index.d.ts +15 -2
  17. package/dist/core/fitness/health/index.js +25 -1
  18. package/dist/core/fitness/health/local-source.d.ts +43 -4
  19. package/dist/core/fitness/health/local-source.js +181 -25
  20. package/dist/core/fitness/health/metric-source.d.ts +48 -19
  21. package/dist/core/fitness/health/metric-source.js +8 -18
  22. package/dist/core/fitness/health/resolve-source.js +4 -1
  23. package/dist/core/fitness/loss.d.ts +7 -7
  24. package/dist/core/fitness/loss.js +6 -6
  25. package/dist/core/fitness/sample.d.ts +10 -0
  26. package/dist/core/fitness/test-failures.d.ts +30 -0
  27. package/dist/core/fitness/test-failures.js +123 -0
  28. package/dist/core/learn/credit-path.d.ts +36 -0
  29. package/dist/core/learn/credit-path.js +198 -0
  30. package/dist/core/learn/trajectory-discovery.d.ts +39 -0
  31. package/dist/core/learn/trajectory-discovery.js +140 -0
  32. package/dist/core/learn.d.ts +39 -5
  33. package/dist/core/learn.js +131 -14
  34. package/dist/core/project-config.d.ts +4 -0
  35. package/dist/core/project-config.js +52 -1
  36. package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
  37. package/dist/core/self-evolution/candidate-fitness.js +31 -5
  38. package/dist/core/self-evolution/candidates.d.ts +0 -9
  39. package/dist/core/self-evolution/canonical-targets.d.ts +8 -4
  40. package/dist/core/self-evolution/canonical-targets.js +8 -4
  41. package/dist/core/self-evolution/critic-agent.d.ts +150 -0
  42. package/dist/core/self-evolution/critic-agent.js +487 -0
  43. package/dist/core/self-evolution/edits-contract.d.ts +53 -0
  44. package/dist/core/self-evolution/edits-contract.js +89 -0
  45. package/dist/core/self-evolution/episode-orchestrator.d.ts +197 -0
  46. package/dist/core/self-evolution/episode-orchestrator.js +534 -0
  47. package/dist/core/self-evolution/episode-store.d.ts +266 -0
  48. package/dist/core/self-evolution/episode-store.js +573 -0
  49. package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
  50. package/dist/core/self-evolution/evolution-switches.js +5 -10
  51. package/dist/core/self-evolution/evolving-agent.d.ts +162 -0
  52. package/dist/core/self-evolution/evolving-agent.js +449 -0
  53. package/dist/core/self-evolution/health-baseline.d.ts +25 -6
  54. package/dist/core/self-evolution/health-baseline.js +30 -6
  55. package/dist/core/self-evolution/host-harness.d.ts +1 -2
  56. package/dist/core/self-evolution/host-harness.js +1 -2
  57. package/dist/core/self-evolution/index.d.ts +10 -6
  58. package/dist/core/self-evolution/index.js +19 -6
  59. package/dist/core/self-evolution/learn-hints.d.ts +31 -0
  60. package/dist/core/self-evolution/learn-hints.js +16 -0
  61. package/dist/core/self-evolution/learn-observation-adapter.d.ts +35 -0
  62. package/dist/core/self-evolution/learn-observation-adapter.js +285 -10
  63. package/dist/core/self-evolution/line-diff.d.ts +60 -0
  64. package/dist/core/self-evolution/line-diff.js +130 -0
  65. package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
  66. package/dist/core/self-evolution/policy/fs-safe.js +89 -0
  67. package/dist/core/self-evolution/policy/index.d.ts +13 -0
  68. package/dist/core/self-evolution/policy/index.js +13 -0
  69. package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
  70. package/dist/core/self-evolution/policy/policy-store.js +774 -0
  71. package/dist/core/self-evolution/policy/reject-buffer.d.ts +48 -0
  72. package/dist/core/self-evolution/policy/reject-buffer.js +168 -0
  73. package/dist/core/self-evolution/promote.d.ts +1 -1
  74. package/dist/core/self-evolution/promote.js +6 -33
  75. package/dist/core/self-evolution/promotion.js +1 -2
  76. package/dist/core/self-evolution/proposer-agent.d.ts +41 -0
  77. package/dist/core/self-evolution/proposer-agent.js +94 -13
  78. package/dist/core/self-evolution/proposer-slice.d.ts +26 -0
  79. package/dist/core/self-evolution/proposer-slice.js +54 -0
  80. package/dist/core/self-evolution/reward-agent.d.ts +234 -0
  81. package/dist/core/self-evolution/reward-agent.js +564 -0
  82. package/dist/core/self-evolution/scope-gate.d.ts +66 -0
  83. package/dist/core/self-evolution/scope-gate.js +107 -0
  84. package/dist/core/self-evolution/success-channel.d.ts +79 -0
  85. package/dist/core/self-evolution/success-channel.js +361 -0
  86. package/dist/core/self-evolution/target-evolution.d.ts +11 -0
  87. package/dist/core/self-evolution/target-evolution.js +2 -0
  88. package/dist/core/self-evolution/tool-evolution.js +2 -13
  89. package/dist/core/self-evolution/verdict.d.ts +8 -5
  90. package/dist/core/self-evolution/verdict.js +4 -7
  91. package/dist/core/templates/skill-templates.d.ts +1 -0
  92. package/dist/core/templates/skill-templates.js +1 -0
  93. package/dist/core/templates/workflow-manifest.js +2 -0
  94. package/dist/core/templates/workflows/learn.d.ts +4 -2
  95. package/dist/core/templates/workflows/learn.js +25 -166
  96. package/dist/core/templates/workflows/self-evolving.d.ts +13 -0
  97. package/dist/core/templates/workflows/self-evolving.js +127 -0
  98. package/dist/core/trajectory/facts.d.ts +16 -0
  99. package/dist/core/trajectory/facts.js +12 -4
  100. package/dist/core/trajectory/skeleton.d.ts +43 -0
  101. package/dist/core/trajectory/skeleton.js +239 -0
  102. package/dist/dashboard/data.d.ts +25 -51
  103. package/dist/dashboard/data.js +68 -180
  104. package/dist/dashboard/react-client.js +458 -503
  105. package/dist/dashboard/react-styles.js +3 -3
  106. package/dist/dashboard/server.js +23 -17
  107. package/dist/ui/ascii-patterns.d.ts +7 -15
  108. package/dist/ui/ascii-patterns.js +123 -54
  109. package/dist/ui/welcome-screen.d.ts +0 -14
  110. package/dist/ui/welcome-screen.js +16 -35
  111. package/package.json +3 -1
  112. package/scripts/code-health.py +1066 -638
  113. package/scripts/slop_rules.yaml +2151 -0
package/README.md CHANGED
@@ -133,7 +133,7 @@ Now tell your AI: `/synspec:propose <what-you-want-to-build>`
133
133
  → **[Workflows](docs/workflows.md)**: combos and patterns<br>
134
134
  → **[Commands](docs/commands.md)**: slash commands & skills<br>
135
135
  → **[CLI](docs/cli.md)**: terminal reference<br>
136
- → **[Evaluation Lab](docs/evaluation-lab.md)**: advanced self-evolution harness<br>
136
+ → **[Self-Evolution Loop](docs/evaluation-lab.md)**: the loop-v2 in-context RL surface<br>
137
137
  → **[Supported Tools](docs/supported-tools.md)**: tool integrations & install paths<br>
138
138
  → **[Concepts](docs/concepts.md)**: how it all fits<br>
139
139
  → **[Multi-Language](docs/multi-language.md)**: multi-language support<br>
@@ -199,9 +199,9 @@ The self-evolution machinery lives in-tree under `src/core/fitness` and
199
199
  `src/core/self-evolution`, exposed through the `synergyspec-selfevolving
200
200
  self-evolution` subcommands (not as an ordinary day-to-day skill). It treats the
201
201
  spec workflow as a learnable system — a change is a forward pass through the
202
- artifact templates; `learn` runs the backward pass (a per-change loss); and a
203
- genetic-algorithm outer loop selects among competing candidate template
204
- variants.
202
+ artifact templates; `learn` runs the backward pass (a per-change loss); and an
203
+ in-context-RL episode improves one design 策略 POLICY lineage from a graded
204
+ advantage, with a rollback that fires *before* any new edit.
205
205
 
206
206
  What actually works today:
207
207
 
@@ -211,25 +211,53 @@ What actually works today:
211
211
  `MetricSource` selected via `health:` in `synergyspec-selfevolving/config.yaml`.
212
212
  New projects scaffold `source: local` (default-on): a dependency-free,
213
213
  multi-language analyzer (`scripts/code-health.py`, Python 3 stdlib only) that
214
- scores Python, Rust, C, and C++ no server, no network. Set `source: stub` to
214
+ scores Python, Rust, C, and C++ by computing the SlopCodeBench
215
+ `structural_erosion` and `verbosity` scores (for Python, the slop rules are
216
+ the actual SlopCodeBench v0.3 ast-grep rule set, bundled) — no server, no
217
+ network. Set `source: stub` to
215
218
  make the loss functional-only; `sonarqube` is also supported; `local-python` is
216
219
  a back-compat alias for `local`. See
217
220
  [docs/customization.md](docs/customization.md#code-health-metrics-self-evolution).
218
- - **Code-health gate** (auto-evolve / `evolve-from-edits`): a measured code-health
219
- regression vs the last accepted state blocks auto-promotion (and surfaces a
220
- loud `health-signal-unavailable` observation if a configured analyzer can't
221
- run). No health signal ⇒ no gate, so the loop is never blocked on a missing
221
+ - **Rollout/critic separation** (`learn` `synergyspec-selfevolving-self-evolving`):
222
+ the end-of-cycle critique runs in a fresh-context critic subagent — an
223
+ always-installed utility skill that reads only the on-disk record
224
+ (transcripts, hints, evolution result, learn report) and returns a
225
+ `## Critic Verdict` block that the thin `learn` relays. The session that did
226
+ the work never grades its own rollout; hosts without subagents run the critic
227
+ inline, marked as degraded isolation. Headless and fresh-context invokers can
228
+ pass explicit trajectory handles (`--transcript` / `--session-id` on `learn`,
229
+ `learn handoff`, `learn debug-trajectory`, and `evolve-from-edits`, or the
230
+ `SYNERGYSPEC_SELFEVOLVING_TRANSCRIPT` / `SYNERGYSPEC_SELFEVOLVING_SESSION_ID`
231
+ env vars; flags beat env vars, transcript beats session-id). An explicit
232
+ handle beats change-window discovery and never silently grades another
233
+ session: an unresolvable flag is an up-front error (exit non-zero), while a
234
+ missing env handle fails closed — no trajectory, and the observed-verified
235
+ gate refuses to promote.
236
+ - **Code-health gate** (the episode's evolving agent / `evolve-from-edits`): a
237
+ measured code-health regression vs the last accepted state blocks promotion (and
238
+ surfaces a loud `health-signal-unavailable` observation if a configured analyzer
239
+ can't run). No health signal ⇒ no gate, so the loop is never blocked on a missing
222
240
  measurement.
223
241
  - **Candidate proposals** (`self-evolution propose-canonical`): turns aggregated
224
242
  `learn` hints into human-gated candidate packages under
225
243
  `.synergyspec-selfevolving/self-evolution/candidates/`. Proposal-only — no
226
244
  canonical file is modified, and the frozen gen-test/run-test oracle is never
227
245
  touched.
228
- - **GA outer loop** (`self-evolution evolve`): groups candidates by canonical
229
- target, scores them by accumulated fitness (or `--replay <corpus>` to re-run a
230
- change corpus through baseline vs. candidate), ranks them, and generates a
231
- **human-gated** promotion report. It never auto-promotes, and a frozen target
232
- (per the per-target evolution policy) is skipped.
246
+ - **In-context-RL episode** (`self-evolution episode --change <name>`): runs ONE
247
+ completed change through a fixed, code-spawned pipeline. A two-arm forward grades
248
+ the **主智能体 MAIN AGENT** (the frozen current 策略 POLICY) from the change's
249
+ `learn` report against a **CRITIC AGENT(基线智能体 baseline agent)** that reruns
250
+ the prior policy on the same change; a **奖励智能体 REWARD AGENT** computes
251
+ **advantage = reward(主臂) − reward(基线臂)** and names the gap; on a bad advantage
252
+ the orchestrator rolls the 策略 POLICY back *before* the **演进智能体 EVOLVING
253
+ AGENT** is called; the evolving agent then makes ONE bounded edit (or refuses)
254
+ after the static / observed-GREEN / 范围⊆诊断 gates pass, writing the next policy
255
+ version onto your LOCAL files — no candidate dir, no republish. A frozen target
256
+ (per the per-target evolution policy) is skipped. Inspect the lineage with
257
+ `self-evolution policy show [--target <id>]`, re-enter a partial run with
258
+ `self-evolution episode resume <id>`, and manually restore a version with
259
+ `self-evolution policy rollback --target <id> --yes`. See
260
+ [docs/evaluation-lab.md](docs/evaluation-lab.md).
233
261
  - **Per-target evolution scope** (`selfEvolution:` in
234
262
  `synergyspec-selfevolving/config.yaml`): one switch decides which canonical
235
263
  targets may evolve, honored end-to-end by `learn` → `propose-canonical` → the
@@ -242,13 +270,16 @@ What actually works today:
242
270
 
243
271
  What is **not** built yet (don't infer it from the architecture diagrams):
244
272
 
245
- - No autonomous multi-generation **breeding** loop (mutate winners → next
246
- generation). `evolve` runs a single generation over pre-existing candidates;
247
- new candidates only come from the human-gated `propose-canonical` step.
273
+ - No autonomous multi-episode **breeding** loop. Each `self-evolution episode`
274
+ improves the policy from one change's graded advantage; it never fans out
275
+ competing variants or runs unattended generations. New manual candidates only
276
+ come from the human-gated `propose-canonical` step.
248
277
  - No measured benchmark gains. The mechanisms are implemented and unit-tested,
249
278
  but the loop has not been run end-to-end to produce quantitative results.
250
- - The earlier out-of-tree `evolve/` DGM lab (and its `pnpm evolve` harness) has
251
- been removed; the technique was internalized into `src/core`.
279
+ - The earlier out-of-tree `evolve/` DGM lab (and its `pnpm evolve` harness) and
280
+ the genetic-algorithm outer loop (`self-evolution evolve` / `--replay`) have
281
+ been removed; the technique was internalized into `src/core` and re-homed on the
282
+ loop-v2 in-context-RL episode.
252
283
 
253
284
  ## Contributing
254
285
 
@@ -1,3 +1,14 @@
1
1
  import { Command } from 'commander';
2
- export declare function registerLearnCommand(program: Command): void;
2
+ import { type RunEpisodeOptions, type RunEpisodeResult, type RunEpisodeBusy } from '../core/self-evolution/episode-orchestrator.js';
3
+ /**
4
+ * Injectable dependencies for {@link registerLearnCommand}. The ONLY seam today
5
+ * is `runEpisode` — the loop-v2 episode runner the autonomous `--apply` entrance
6
+ * invokes. It defaults to the real {@link runEpisode}; learn tests stub it so the
7
+ * evolve path is exercised WITHOUT spawning real agents (the orchestrator's three
8
+ * agents each own a `runHeadlessAgent` spawn, which the stub never reaches).
9
+ */
10
+ export interface LearnCommandDeps {
11
+ runEpisode?: (opts: RunEpisodeOptions) => Promise<RunEpisodeResult | RunEpisodeBusy>;
12
+ }
13
+ export declare function registerLearnCommand(program: Command, deps?: LearnCommandDeps): void;
3
14
  //# sourceMappingURL=learn.d.ts.map