synergyspec-selfevolving 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/README.md +31 -18
  2. package/dist/commands/learn.d.ts +12 -1
  3. package/dist/commands/learn.js +151 -11
  4. package/dist/commands/self-evolution-episode.d.ts +177 -0
  5. package/dist/commands/self-evolution-episode.js +423 -0
  6. package/dist/commands/self-evolution.d.ts +12 -190
  7. package/dist/commands/self-evolution.js +114 -866
  8. package/dist/core/archive.d.ts +0 -1
  9. package/dist/core/archive.js +0 -58
  10. package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
  11. package/dist/core/artifact-graph/instruction-loader.js +3 -31
  12. package/dist/core/fitness/loss.d.ts +5 -5
  13. package/dist/core/fitness/loss.js +4 -4
  14. package/dist/core/project-config.d.ts +2 -0
  15. package/dist/core/project-config.js +28 -0
  16. package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
  17. package/dist/core/self-evolution/candidate-fitness.js +31 -5
  18. package/dist/core/self-evolution/candidates.d.ts +0 -9
  19. package/dist/core/self-evolution/critic-agent.d.ts +150 -0
  20. package/dist/core/self-evolution/critic-agent.js +487 -0
  21. package/dist/core/self-evolution/edits-contract.d.ts +53 -0
  22. package/dist/core/self-evolution/edits-contract.js +89 -0
  23. package/dist/core/self-evolution/episode-orchestrator.d.ts +197 -0
  24. package/dist/core/self-evolution/episode-orchestrator.js +534 -0
  25. package/dist/core/self-evolution/episode-store.d.ts +266 -0
  26. package/dist/core/self-evolution/episode-store.js +573 -0
  27. package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
  28. package/dist/core/self-evolution/evolution-switches.js +5 -10
  29. package/dist/core/self-evolution/evolving-agent.d.ts +162 -0
  30. package/dist/core/self-evolution/evolving-agent.js +449 -0
  31. package/dist/core/self-evolution/host-harness.d.ts +1 -2
  32. package/dist/core/self-evolution/host-harness.js +1 -2
  33. package/dist/core/self-evolution/index.d.ts +9 -6
  34. package/dist/core/self-evolution/index.js +18 -6
  35. package/dist/core/self-evolution/line-diff.d.ts +60 -0
  36. package/dist/core/self-evolution/line-diff.js +130 -0
  37. package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
  38. package/dist/core/self-evolution/policy/fs-safe.js +89 -0
  39. package/dist/core/self-evolution/policy/index.d.ts +13 -0
  40. package/dist/core/self-evolution/policy/index.js +13 -0
  41. package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
  42. package/dist/core/self-evolution/policy/policy-store.js +774 -0
  43. package/dist/core/self-evolution/policy/reject-buffer.d.ts +48 -0
  44. package/dist/core/self-evolution/policy/reject-buffer.js +168 -0
  45. package/dist/core/self-evolution/promote.d.ts +1 -1
  46. package/dist/core/self-evolution/promote.js +6 -33
  47. package/dist/core/self-evolution/promotion.js +1 -2
  48. package/dist/core/self-evolution/reward-agent.d.ts +234 -0
  49. package/dist/core/self-evolution/reward-agent.js +564 -0
  50. package/dist/core/self-evolution/scope-gate.d.ts +66 -0
  51. package/dist/core/self-evolution/scope-gate.js +107 -0
  52. package/dist/core/self-evolution/success-channel.js +2 -2
  53. package/dist/core/self-evolution/tool-evolution.js +2 -13
  54. package/dist/core/self-evolution/verdict.d.ts +8 -5
  55. package/dist/core/self-evolution/verdict.js +4 -7
  56. package/dist/core/templates/workflows/learn.d.ts +3 -2
  57. package/dist/core/templates/workflows/learn.js +18 -16
  58. package/dist/core/templates/workflows/self-evolving.d.ts +6 -4
  59. package/dist/core/templates/workflows/self-evolving.js +62 -172
  60. package/dist/dashboard/data.d.ts +25 -51
  61. package/dist/dashboard/data.js +68 -180
  62. package/dist/dashboard/react-client.js +458 -503
  63. package/dist/dashboard/react-styles.js +3 -3
  64. package/dist/dashboard/server.js +23 -17
  65. package/dist/ui/ascii-patterns.d.ts +7 -15
  66. package/dist/ui/ascii-patterns.js +123 -54
  67. package/dist/ui/welcome-screen.d.ts +0 -14
  68. package/dist/ui/welcome-screen.js +16 -35
  69. package/package.json +1 -1
@@ -0,0 +1,197 @@
1
+ /**
2
+ * Episode orchestrator — the rollback-before-evolution heart of loop v2
3
+ * (self-evolution as in-context RL).
4
+ *
5
+ * One `runEpisode` runs ONE synergyspec-selfevolving change through the full
6
+ * loop, in a STRICT, durably-persisted order — correctness of ordering is the
7
+ * design contract:
8
+ *
9
+ * a. acquireInFlight — one in-flight episode per target.
10
+ * b. ensure 单一血统 single lineage — init v0 from resolved files when new;
11
+ * policyVersionMain = lineage head.
12
+ * c. createEpisode + writeArmCapture — record the 主智能体 MAIN AGENT (frozen
13
+ * actor, policy vN+1) arm; advance
14
+ * 'main-arm-captured'.
15
+ * d. CRITIC AGENT(基线智能体 baseline — runCriticAgent reruns LAST episode's
16
+ * agent) policy vN on the SAME change, OR skip
17
+ * ('baseline-skipped') when the lineage
18
+ * has < 2 versions / last action refused.
19
+ * e. 奖励智能体 REWARD AGENT — runRewardAgent CALCULATES 算分
20
+ * reward(主臂)&reward(基线臂), advantage
21
+ * = reward(主臂) − reward(基线臂), the
22
+ * 文本梯度 textual gradient; writes
23
+ * diagnosis.json; advances 'scored'.
24
+ * f. DECISION on the main arm's edits:
25
+ * - 弃权 abstained / no gaps → advance 'abstained'; SKIP evolution.
26
+ * - bad advantage (< threshold) → ROLLBACK the 策略 POLICY to the prior
27
+ * good version, THEN append the 否决缓冲
28
+ * reject-buffer entry — BOTH durably on
29
+ * disk — THEN advance 'rolled-back'.
30
+ * - otherwise → advance 'kept'.
31
+ * g. 演进智能体 EVOLVING AGENT — ONLY after (f) persisted: runEvolvingAgent
32
+ * (optimizer.step) reads the reject-buffer FRESH from disk
33
+ * (so THIS episode's just-written entry is
34
+ * in its prompt) and either not-spawned /
35
+ * refused / evolved.
36
+ * h. advance 'closed' + releaseInFlight — ALWAYS, even on error.
37
+ *
38
+ * ORDERING GUARANTEE: the rollback + reject-buffer write are SEQUENTIAL awaits
39
+ * that BOTH complete (and the stage reads 'rolled-back'/'kept') before
40
+ * {@link runEvolvingAgent} is even called. (f) and (g) are never parallelized
41
+ * and never share a Promise.all.
42
+ *
43
+ * This module orchestrates; it never spawns an agent itself — the three agents
44
+ * own their own {@link runHeadlessAgent} spawns (the `spawn` seam threads to all
45
+ * three). The only state it owns is the ordering + the rollback/reject decision.
46
+ */
47
+ import { spawn as nodeSpawn } from 'node:child_process';
48
+ import type { LearnReport } from '../learn.js';
49
+ import type { TrajectorySource } from '../trajectory/source.js';
50
+ import { type PolicyResolveFiles } from './policy/policy-store.js';
51
+ import { type EpisodeStage } from './episode-store.js';
52
+ import { type ArmObjective } from './critic-agent.js';
53
+ import { type RunEvolvingAgentResult } from './evolving-agent.js';
54
+ /** The 主智能体 MAIN AGENT (policy vN+1) capture the orchestrator records. */
55
+ export interface MainArmCapture {
56
+ /** Raw session transcript text, when provided; persisted as `transcript.jsonl`. */
57
+ transcript?: string;
58
+ /** Bounded action skeleton of the observed run, when discoverable. */
59
+ skeleton?: object;
60
+ /**
61
+ * The arm objective, byte-shape-IDENTICAL to the CRITIC AGENT's
62
+ * {@link ArmObjective} so the 奖励智能体 REWARD AGENT reads both arms uniformly.
63
+ */
64
+ objective: ArmObjective;
65
+ }
66
+ export interface CaptureMainArmOptions {
67
+ repoRoot: string;
68
+ changeName: string;
69
+ /**
70
+ * An ALREADY-COMPUTED learn report (from `generateLearnReport`). Its
71
+ * `fitnessSample` carries the graded pass rate / health / observed-trajectory
72
+ * facts — the orchestrator REUSES that grading rather than re-running it.
73
+ */
74
+ report: LearnReport;
75
+ /**
76
+ * Optional trajectory handles. When the learn report did not carry an
77
+ * `observedRun` skeleton (older capture paths), these let the caller hand a
78
+ * skeleton / raw transcript directly, OR the orchestrator re-discovers the
79
+ * skeleton via {@link getTrajectoryForChange}. A `trajectorySource` override is
80
+ * honored first (tests / explicit harness selection).
81
+ */
82
+ trajectoryHandles?: {
83
+ /** Verbatim session transcript text to persist as `transcript.jsonl`. */
84
+ transcript?: string;
85
+ /** Pre-computed action skeleton (else derived from the discovered trajectory). */
86
+ skeleton?: object;
87
+ /** Trajectory-source override (tests); else the registry auto-detects. */
88
+ trajectorySource?: TrajectorySource;
89
+ };
90
+ }
91
+ /**
92
+ * Build the 主智能体 MAIN AGENT arm `{transcript?, skeleton?, objective}` from an
93
+ * already-computed learn report's {@link FitnessSample} + the discovered
94
+ * trajectory.
95
+ *
96
+ * REUSES the learn/fitness grading verbatim — it never re-grades:
97
+ * - `objective.passRate` prefers the OBSERVED pass rate
98
+ * (`fitnessSample.trajectoryFacts.observedPassRate` when a runner ran), else
99
+ * the authored `testMetrics.passRate`, else `null` (never fabricated);
100
+ * - `objective.healthPenalty` is `fitnessSample.healthSignal` (the raw
101
+ * "no signal ⇒ null" health reading, distinct from the `?? 0`-defaulted
102
+ * `loss.healthPenalty`);
103
+ * - `objective.loss` is the blended `fitnessSample.loss.loss` (or `null`);
104
+ * - `verified` / `observedStatus` come from `trajectoryFacts`.
105
+ * The shape is byte-identical to {@link ArmObjective} so both arms read uniformly.
106
+ */
107
+ export declare function captureMainArm(opts: CaptureMainArmOptions): Promise<MainArmCapture>;
108
+ /** The decision the orchestrator made on the main arm's edits. */
109
+ export type EpisodeDecision = 'rolled-back' | 'kept' | 'abstained';
110
+ export interface RunEpisodeOptions {
111
+ repoRoot: string;
112
+ targetId: string;
113
+ changeName: string;
114
+ /** Absolute path of the change dir; recorded in episode.json, never copied. */
115
+ changeDirPath: string;
116
+ /** The 主智能体 MAIN AGENT arm (from {@link captureMainArm}). */
117
+ mainArm: MainArmCapture;
118
+ /**
119
+ * advantage = reward(主臂) − reward(基线臂) threshold below which the 策略
120
+ * POLICY is rolled back to the prior version BEFORE the 演进智能体 EVOLVING
121
+ * AGENT runs. Default 0 (a non-positive advantage triggers a rollback).
122
+ */
123
+ advantageRollbackThreshold?: number;
124
+ /** Edit budget L for the 演进智能体 EVOLVING AGENT. Default 40. */
125
+ editBudget?: number;
126
+ /** Injectable spawn seam — threaded to ALL THREE agents. Defaults to node's spawn. */
127
+ spawn?: typeof nodeSpawn;
128
+ /** Injectable clock for the lock + episode id; defaults to `new Date()`. */
129
+ now?: Date;
130
+ /**
131
+ * TEST seam: the file resolver {@link initPolicyLineage} uses to snapshot v0
132
+ * when the lineage is new. Defaults to the real `resolveTargetLocalFiles`.
133
+ */
134
+ resolveFiles?: PolicyResolveFiles;
135
+ }
136
+ export interface RunEpisodeResult {
137
+ episodeId: string;
138
+ /** True when the CRITIC AGENT(基线智能体 baseline agent)arm was skipped. */
139
+ baselineSkipped: boolean;
140
+ /** advantage = reward(主臂) − reward(基线臂); null when skipped or abstained. */
141
+ advantage: number | null;
142
+ decision: EpisodeDecision;
143
+ /** The 演进智能体 EVOLVING AGENT outcome, or `null` when it was never spawned. */
144
+ evolution: RunEvolvingAgentResult | null;
145
+ /** The lineage head version AFTER the episode (post-rollback / post-evolve). */
146
+ newPolicyVersion: number | null;
147
+ }
148
+ /** Returned (not thrown) when the target already has a non-stale in-flight lock. */
149
+ export interface RunEpisodeBusy {
150
+ episodeId: null;
151
+ busy: true;
152
+ reason: string;
153
+ }
154
+ /**
155
+ * Run ONE episode through the loop in the strict, durably-persisted order
156
+ * documented at the top of this module. See {@link RunEpisodeResult}.
157
+ *
158
+ * The in-flight lock is released in a finally guard so a throw mid-episode never
159
+ * wedges the target's slot.
160
+ */
161
+ export declare function runEpisode(opts: RunEpisodeOptions): Promise<RunEpisodeResult | RunEpisodeBusy>;
162
+ export interface ResumeEpisodeOptions {
163
+ repoRoot: string;
164
+ episodeId: string;
165
+ /** Injectable spawn seam — threaded to the remaining agents. */
166
+ spawn?: typeof nodeSpawn;
167
+ /** advantage rollback threshold (default 0), for an episode resumed before the decision. */
168
+ advantageRollbackThreshold?: number;
169
+ /** Edit budget L (default 40). */
170
+ editBudget?: number;
171
+ }
172
+ export interface ResumeEpisodeResult {
173
+ episodeId: string;
174
+ /** The stage the episode was at when resume was called. */
175
+ resumedFrom: EpisodeStage;
176
+ /** The stage it reached after the resume re-ran the remaining steps. */
177
+ stage: EpisodeStage;
178
+ evolution: RunEvolvingAgentResult | null;
179
+ }
180
+ /**
181
+ * Re-enter a partially-run episode at its recorded stage and idempotently run
182
+ * the REMAINING steps. Best-effort — used by the CLI `episode resume`. The
183
+ * episode stage machine is monotonic, so this picks up from the first not-yet-
184
+ * done step rather than re-advancing a stage already entered:
185
+ *
186
+ * - 'scored' → run the decision (f) then the 演进智能体 (g).
187
+ * - 'rolled-back' / 'kept' → run the 演进智能体 EVOLVING AGENT (g) then close.
188
+ * - 'evolved'/'evolution-refused'/'abstained' → close.
189
+ * - earlier stages → not auto-resumable here (the arms / reward
190
+ * agent need their own re-entry); reported as-is.
191
+ *
192
+ * NOTE: resume does NOT re-acquire the in-flight lock — the original
193
+ * {@link runEpisode} already released it; a resume is an operator-driven
194
+ * recovery, not a concurrent run.
195
+ */
196
+ export declare function resumeEpisode(opts: ResumeEpisodeOptions): Promise<ResumeEpisodeResult>;
197
+ //# sourceMappingURL=episode-orchestrator.d.ts.map