synergyspec-selfevolving 1.4.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/README.md +31 -18
  2. package/dist/commands/learn.d.ts +12 -1
  3. package/dist/commands/learn.js +158 -11
  4. package/dist/commands/self-evolution-episode.d.ts +177 -0
  5. package/dist/commands/self-evolution-episode.js +431 -0
  6. package/dist/commands/self-evolution.d.ts +12 -190
  7. package/dist/commands/self-evolution.js +114 -866
  8. package/dist/core/archive.d.ts +0 -1
  9. package/dist/core/archive.js +0 -58
  10. package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
  11. package/dist/core/artifact-graph/instruction-loader.js +3 -31
  12. package/dist/core/fitness/loss.d.ts +5 -5
  13. package/dist/core/fitness/loss.js +4 -4
  14. package/dist/core/fitness/test-failures.js +10 -2
  15. package/dist/core/project-config.d.ts +19 -0
  16. package/dist/core/project-config.js +96 -0
  17. package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
  18. package/dist/core/self-evolution/candidate-fitness.js +31 -5
  19. package/dist/core/self-evolution/candidates.d.ts +0 -9
  20. package/dist/core/self-evolution/critic-agent.d.ts +192 -0
  21. package/dist/core/self-evolution/critic-agent.js +568 -0
  22. package/dist/core/self-evolution/edits-contract.d.ts +53 -0
  23. package/dist/core/self-evolution/edits-contract.js +89 -0
  24. package/dist/core/self-evolution/episode-orchestrator.d.ts +234 -0
  25. package/dist/core/self-evolution/episode-orchestrator.js +681 -0
  26. package/dist/core/self-evolution/episode-store.d.ts +266 -0
  27. package/dist/core/self-evolution/episode-store.js +573 -0
  28. package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
  29. package/dist/core/self-evolution/evolution-switches.js +5 -10
  30. package/dist/core/self-evolution/evolving-agent.d.ts +208 -0
  31. package/dist/core/self-evolution/evolving-agent.js +535 -0
  32. package/dist/core/self-evolution/host-harness.d.ts +14 -15
  33. package/dist/core/self-evolution/host-harness.js +48 -23
  34. package/dist/core/self-evolution/index.d.ts +11 -6
  35. package/dist/core/self-evolution/index.js +20 -6
  36. package/dist/core/self-evolution/line-diff.d.ts +60 -0
  37. package/dist/core/self-evolution/line-diff.js +130 -0
  38. package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
  39. package/dist/core/self-evolution/policy/fs-safe.js +89 -0
  40. package/dist/core/self-evolution/policy/index.d.ts +13 -0
  41. package/dist/core/self-evolution/policy/index.js +13 -0
  42. package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
  43. package/dist/core/self-evolution/policy/policy-store.js +774 -0
  44. package/dist/core/self-evolution/policy/prediction-reconcile.d.ts +54 -0
  45. package/dist/core/self-evolution/policy/prediction-reconcile.js +191 -0
  46. package/dist/core/self-evolution/policy/reject-buffer.d.ts +55 -0
  47. package/dist/core/self-evolution/policy/reject-buffer.js +170 -0
  48. package/dist/core/self-evolution/promote.d.ts +1 -1
  49. package/dist/core/self-evolution/promote.js +6 -33
  50. package/dist/core/self-evolution/promotion.js +1 -2
  51. package/dist/core/self-evolution/reward-agent.d.ts +379 -0
  52. package/dist/core/self-evolution/reward-agent.js +940 -0
  53. package/dist/core/self-evolution/reward-aggregator.d.ts +59 -0
  54. package/dist/core/self-evolution/reward-aggregator.js +262 -0
  55. package/dist/core/self-evolution/scope-gate.d.ts +66 -0
  56. package/dist/core/self-evolution/scope-gate.js +107 -0
  57. package/dist/core/self-evolution/success-channel.js +2 -2
  58. package/dist/core/self-evolution/tamper-check.d.ts +24 -0
  59. package/dist/core/self-evolution/tamper-check.js +236 -0
  60. package/dist/core/self-evolution/tool-evolution.js +2 -13
  61. package/dist/core/self-evolution/verdict.d.ts +8 -5
  62. package/dist/core/self-evolution/verdict.js +4 -7
  63. package/dist/core/templates/workflows/gen-tests.js +1 -1
  64. package/dist/core/templates/workflows/learn.d.ts +3 -2
  65. package/dist/core/templates/workflows/learn.js +21 -18
  66. package/dist/core/templates/workflows/self-evolving.d.ts +6 -4
  67. package/dist/core/templates/workflows/self-evolving.js +62 -172
  68. package/dist/core/trajectory/scrub.d.ts +27 -0
  69. package/dist/core/trajectory/scrub.js +79 -0
  70. package/dist/core/trajectory/skeleton.d.ts +27 -1
  71. package/dist/core/trajectory/skeleton.js +152 -8
  72. package/dist/dashboard/data.d.ts +25 -51
  73. package/dist/dashboard/data.js +68 -180
  74. package/dist/dashboard/react-client.js +458 -503
  75. package/dist/dashboard/react-styles.js +3 -3
  76. package/dist/dashboard/server.js +23 -17
  77. package/dist/ui/ascii-patterns.d.ts +7 -15
  78. package/dist/ui/ascii-patterns.js +123 -54
  79. package/dist/ui/welcome-screen.d.ts +0 -14
  80. package/dist/ui/welcome-screen.js +16 -35
  81. package/package.json +1 -1
  82. package/dist/core/self-evolution/ga-selection.d.ts +0 -94
  83. package/dist/core/self-evolution/ga-selection.js +0 -153
  84. package/dist/core/self-evolution/proposer-agent.d.ts +0 -182
  85. package/dist/core/self-evolution/proposer-agent.js +0 -326
  86. package/dist/core/self-evolution/replay-runner.d.ts +0 -100
  87. package/dist/core/self-evolution/replay-runner.js +0 -170
  88. package/dist/core/self-evolution/replay.d.ts +0 -45
  89. package/dist/core/self-evolution/replay.js +0 -56
  90. package/dist/core/self-evolution/template-variants.d.ts +0 -62
  91. package/dist/core/self-evolution/template-variants.js +0 -171
  92. package/dist/core/self-evolution/trajectory.d.ts +0 -65
  93. package/dist/core/self-evolution/trajectory.js +0 -185
@@ -0,0 +1,266 @@
1
+ /**
2
+ * 磁盘 DISK episode store(transcripts ×双臂)— loop v2 (self-evolution as
3
+ * in-context RL).
4
+ *
5
+ * This module is DUMB STORAGE plus a stage machine — it never spawns an agent
6
+ * and never computes a skeleton; callers pass objects in. One episode = one
7
+ * synergyspec-selfevolving change run through the loop, recorded under:
8
+ *
9
+ * <repoRoot>/.synergyspec-selfevolving/self-evolution/episodes/<episodeId>/
10
+ * episode.json // the state-machine record (this module owns it)
11
+ * main-arm/ // 主智能体 MAIN AGENT (frozen actor; the user's host
12
+ * // agent running the current policy vN+1):
13
+ * // transcript.jsonl + skeleton.json + objective.json
14
+ * baseline-arm/ // CRITIC AGENT(基线智能体 baseline agent)output — an
15
+ * // AGENT with the same input/output as the main agent
16
+ * // that reruns LAST episode's policy vN on the SAME
17
+ * // change. This dir is the ONLY artifact that survives
18
+ * // its worktree (产物即弃 — worktree artifacts
19
+ * // discarded): same three files, except the transcript
20
+ * // may be `stdout.txt` when the harness exposes no
21
+ * // session file.
22
+ * diagnosis.json // written LATER by the 奖励智能体 REWARD AGENT via
23
+ * // {@link writeDiagnosis} — LLM as judge; CALCULATES
24
+ * // 算分 reward(主臂)&reward(基线臂); advantage =
25
+ * // reward(主臂) − reward(基线臂); 文本梯度 textual
26
+ * // gradient; never edits; 弃权 abstains when no
27
+ * // nameable gap. {@link createEpisode} never creates
28
+ * // this file; this module only stores its content.
29
+ *
30
+ * The 5 artifacts + test-report stay in the change dir
31
+ * (`synergyspec-selfevolving/changes/<name>/`) — `episode.json` records that
32
+ * path (`changeDirPath`), never copies.
33
+ *
34
+ * 策略 POLICY = design template(主智能体的「权重」)· CLI 持有版本:
35
+ * `policyVersionMain` / `policyVersionBaseline` record WHICH version each arm
36
+ * ran. The versions themselves live in the 版本账本 ledger (a sibling module);
37
+ * 单一血统 single lineage and the 否决缓冲 reject-buffer are enforced there,
38
+ * not here. The 演进智能体 EVOLVING AGENT (optimizer.step; ONE bounded edit ≤L;
39
+ * never scores) runs after scoring; its outcome lands in this store only as a
40
+ * stage (`evolved` | `evolution-refused`).
41
+ *
42
+ * Hard rules (matching candidates.ts):
43
+ * - This module ONLY reads/writes inside the episodes base dir.
44
+ * - All writes use a tmp + rename pattern so a half-written record never
45
+ * appears under its final name.
46
+ * - Stage changes go through a validated MONOTONIC state machine —
47
+ * advancing to a stage not reachable from the current one throws.
48
+ */
49
+ /**
50
+ * Lifecycle stage for an episode.
51
+ *
52
+ * State machine (see {@link isLegalEpisodeStageTransition}; monotonic — no
53
+ * regressions, no jumps):
54
+ *
55
+ * created
56
+ * -> main-arm-captured // 主智能体 MAIN AGENT arm landed
57
+ * -> (baseline-arm-captured | baseline-skipped) // CRITIC AGENT(基线智能体 baseline agent)arm
58
+ * -> scored // 奖励智能体 REWARD AGENT wrote diagnosis.json
59
+ * -> (rolled-back | kept) // rollback decision on the main arm's edits
60
+ * -> (evolved | evolution-refused | abstained) // 演进智能体 EVOLVING AGENT outcome
61
+ * -> closed // terminal
62
+ *
63
+ * `abstained` may also follow `scored` directly: the 奖励智能体 REWARD AGENT
64
+ * 弃权 abstains when no nameable gap → no rollback decision needed → the
65
+ * 演进智能体 EVOLVING AGENT is never spawned.
66
+ */
67
+ export type EpisodeStage = 'created' | 'main-arm-captured' | 'baseline-arm-captured' | 'baseline-skipped' | 'scored' | 'rolled-back' | 'kept' | 'evolved' | 'evolution-refused' | 'abstained' | 'closed';
68
+ /**
69
+ * Iterable list of every legal {@link EpisodeStage} value. Order follows the
70
+ * documented state machine for readability, not behavior.
71
+ */
72
+ export declare const EPISODE_STAGES: readonly EpisodeStage[];
73
+ /**
74
+ * The two arms of one episode:
75
+ * - `main-arm` — 主智能体 MAIN AGENT (frozen actor, policy vN+1).
76
+ * - `baseline-arm` — CRITIC AGENT(基线智能体 baseline agent), rerunning LAST
77
+ * episode's policy vN on the SAME change; the capture is the ONLY artifact
78
+ * that survives its worktree (产物即弃).
79
+ */
80
+ export type EpisodeArm = 'main-arm' | 'baseline-arm';
81
+ /** One audit entry in `stageHistory` — which stage was entered, and when. */
82
+ export interface EpisodeStageHistoryEntry {
83
+ stage: EpisodeStage;
84
+ /** ISO 8601 UTC timestamp the stage was entered. */
85
+ at: string;
86
+ }
87
+ /**
88
+ * The `episode.json` state-machine record. Mutated only via
89
+ * {@link advanceEpisodeStage}; everything else in the episode dir is opaque
90
+ * arm/diagnosis storage.
91
+ */
92
+ export interface EpisodeRecord {
93
+ schemaVersion: 1;
94
+ /** Sanitized id, also the directory name: `[a-z0-9-]` only. */
95
+ episodeId: string;
96
+ /** The synergyspec-selfevolving change this episode ran. */
97
+ changeName: string;
98
+ /**
99
+ * Absolute path of the change dir (`synergyspec-selfevolving/changes/<name>/`)
100
+ * where the 5 artifacts + test-report live. Recorded, never copied.
101
+ */
102
+ changeDirPath: string;
103
+ /** The canonical target whose 策略 POLICY the episode exercises. */
104
+ targetId: string;
105
+ /** 版本账本 ledger version the 主智能体 MAIN AGENT ran (vN+1); null when unknown. */
106
+ policyVersionMain: number | null;
107
+ /** 版本账本 ledger version the CRITIC AGENT(基线智能体 baseline agent)reran (vN); null until captured. */
108
+ policyVersionBaseline: number | null;
109
+ /** Current stage. Mutated only via {@link advanceEpisodeStage}. */
110
+ stage: EpisodeStage;
111
+ /** Append-only audit trail of every stage entered, including `created`. */
112
+ stageHistory: EpisodeStageHistoryEntry[];
113
+ /** Why the baseline arm was skipped (set with stage `baseline-skipped`). */
114
+ baselineSkippedReason?: string;
115
+ /** advantage = reward(主臂) − reward(基线臂); null when the 奖励智能体 REWARD AGENT 弃权 abstained. */
116
+ advantage?: number | null;
117
+ /** ISO 8601 UTC timestamp the episode record was created. */
118
+ createdAt: string;
119
+ /** ISO 8601 UTC timestamp of the last record mutation. */
120
+ updatedAt: string;
121
+ }
122
+ /** Patch fields {@link advanceEpisodeStage} may merge alongside a stage advance. */
123
+ export interface EpisodeStagePatch {
124
+ policyVersionBaseline?: number | null;
125
+ baselineSkippedReason?: string;
126
+ advantage?: number | null;
127
+ }
128
+ /**
129
+ * True iff `(from -> to)` is a legal transition in the episode stage machine.
130
+ * See the type-level doc on {@link EpisodeStage} for the full table.
131
+ *
132
+ * Pure function; safe to call from validators, tests, and UIs.
133
+ */
134
+ export declare function isLegalEpisodeStageTransition(from: EpisodeStage, to: EpisodeStage): boolean;
135
+ /**
136
+ * Absolute path of one episode's directory. No I/O; the dir may not exist yet.
137
+ * Throws on an unsafe id so this can never be used for path traversal.
138
+ */
139
+ export declare function episodeDir(repoRoot: string, episodeId: string): string;
140
+ export interface CreateEpisodeOptions {
141
+ /** Absolute path to the project root that owns the episode store. */
142
+ repoRoot: string;
143
+ /** The synergyspec-selfevolving change name this episode ran. */
144
+ changeName: string;
145
+ /** Absolute path of the change dir; recorded in episode.json, never copied. */
146
+ changeDirPath: string;
147
+ /** The canonical target whose 策略 POLICY the episode exercises. */
148
+ targetId: string;
149
+ /** 版本账本 ledger version the 主智能体 MAIN AGENT ran (vN+1); null when unknown. */
150
+ policyVersionMain: number | null;
151
+ /**
152
+ * Explicit episode id (must already match `[a-z0-9-]`). Default:
153
+ * `<changeName>-<compact UTC timestamp yyyyMMddTHHmmss>` sanitized to
154
+ * `[a-z0-9-]`. Either way a collision appends `-2`, `-3`, …
155
+ */
156
+ episodeId?: string;
157
+ /** TEST-ONLY deterministic clock seed; defaults to `new Date()`. */
158
+ now?: Date;
159
+ }
160
+ /**
161
+ * Create a new episode directory with its `episode.json` at stage `created`
162
+ * (plus empty `main-arm/` and `baseline-arm/` dirs so the on-disk layout is
163
+ * self-describing). `diagnosis.json` is deliberately NOT created here — the
164
+ * 奖励智能体 REWARD AGENT writes it later via {@link writeDiagnosis}.
165
+ *
166
+ * Atomic: the whole episode dir is staged in a sibling tmp dir and renamed
167
+ * into place, so a half-written episode never appears under its final id.
168
+ *
169
+ * @returns The created record and the absolute episode directory. Callers must
170
+ * read `episode.episodeId` back — a collision may have suffixed it.
171
+ */
172
+ export declare function createEpisode(opts: CreateEpisodeOptions): Promise<{
173
+ episode: EpisodeRecord;
174
+ episodeDir: string;
175
+ }>;
176
+ /**
177
+ * Read one episode's `episode.json`. Throws a deterministic `Error` when the
178
+ * episode does not exist.
179
+ */
180
+ export declare function readEpisode(repoRoot: string, episodeId: string): Promise<EpisodeRecord>;
181
+ export interface AdvanceEpisodeStageOptions {
182
+ repoRoot: string;
183
+ episodeId: string;
184
+ /** The stage to enter; must be legal from the current stage or this throws. */
185
+ stage: EpisodeStage;
186
+ /** Optional allowlisted fields to merge in the same atomic write. */
187
+ patch?: EpisodeStagePatch;
188
+ }
189
+ /**
190
+ * Advance an episode to its next stage — atomic read-modify-write of
191
+ * `episode.json` (sibling tmp file + rename, so a half-written advance is
192
+ * never observed).
193
+ *
194
+ * - Validates the transition via {@link isLegalEpisodeStageTransition};
195
+ * advancing to a stage not reachable from the current one throws.
196
+ * - Appends `{stage, at}` to `stageHistory`.
197
+ * - Merges the allowlisted `patch` fields (`policyVersionBaseline`,
198
+ * `baselineSkippedReason`, `advantage`) in the same write.
199
+ * - Bumps `updatedAt`.
200
+ */
201
+ export declare function advanceEpisodeStage(opts: AdvanceEpisodeStageOptions): Promise<EpisodeRecord>;
202
+ /**
203
+ * List every episode (newest first by `createdAt`; ties resolve by id for
204
+ * determinism, matching `listCandidates`).
205
+ *
206
+ * - Returns `[]` cleanly when the episodes base dir does not exist.
207
+ * - Skips in-flight `.tmp-` dirs from a previous interrupted create.
208
+ * - Skips directories without a readable, valid `episode.json` (prints a
209
+ * warning to stderr so corrupted episodes are surfaced).
210
+ */
211
+ export declare function listEpisodes(repoRoot: string): Promise<EpisodeRecord[]>;
212
+ export interface WriteArmCaptureOptions {
213
+ repoRoot: string;
214
+ episodeId: string;
215
+ /** Which arm dir to write into; see {@link EpisodeArm}. */
216
+ arm: EpisodeArm;
217
+ /**
218
+ * Raw transcript to persist verbatim. Conventionally `transcript.jsonl`;
219
+ * the CRITIC AGENT(基线智能体 baseline agent)arm may carry `stdout.txt`
220
+ * when the harness exposes no session file. Optional — some harnesses only
221
+ * yield a skeleton + objective.
222
+ */
223
+ transcript?: {
224
+ fileName: string;
225
+ content: string;
226
+ };
227
+ /** Pre-computed skeleton (this module never computes one). Stored as `skeleton.json`. */
228
+ skeleton?: object;
229
+ /** The arm's objective record. Stored as `objective.json`. Required. */
230
+ objective: object;
231
+ }
232
+ /**
233
+ * Persist one arm's capture into `<episodeDir>/<arm>/`. DUMB STORAGE: callers
234
+ * pass the transcript/skeleton/objective in; this module never spawns the
235
+ * 主智能体 MAIN AGENT or the CRITIC AGENT(基线智能体 baseline agent)and never
236
+ * computes a skeleton. Each file is written atomically (tmp + rename);
237
+ * re-capturing overwrites (last write wins). Stage advancement is the
238
+ * caller's job via {@link advanceEpisodeStage} — storage and state machine
239
+ * stay decoupled.
240
+ *
241
+ * @returns The absolute arm dir and the absolute paths written.
242
+ */
243
+ export declare function writeArmCapture(opts: WriteArmCaptureOptions): Promise<{
244
+ armDir: string;
245
+ writtenFiles: string[];
246
+ }>;
247
+ export interface WriteDiagnosisOptions {
248
+ repoRoot: string;
249
+ episodeId: string;
250
+ /**
251
+ * The 奖励智能体 REWARD AGENT's diagnosis — reward(主臂)&reward(基线臂),
252
+ * advantage = reward(主臂) − reward(基线臂), 文本梯度 textual gradient (or a
253
+ * 弃权 abstain record when there is no nameable gap). Storage only: this
254
+ * module never validates or interprets the content; the 奖励智能体 owns it.
255
+ */
256
+ diagnosis: object;
257
+ }
258
+ /**
259
+ * Atomic write of `<episodeDir>/diagnosis.json`. Last write wins (an episode
260
+ * has exactly ONE current diagnosis; the audit trail is `stageHistory`).
261
+ * Throws deterministically when the episode does not exist.
262
+ *
263
+ * @returns The absolute path of the written `diagnosis.json`.
264
+ */
265
+ export declare function writeDiagnosis(opts: WriteDiagnosisOptions): Promise<string>;
266
+ //# sourceMappingURL=episode-store.d.ts.map