synergyspec-selfevolving 1.3.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -19
- package/dist/commands/learn.d.ts +12 -1
- package/dist/commands/learn.js +373 -31
- package/dist/commands/self-evolution-episode.d.ts +177 -0
- package/dist/commands/self-evolution-episode.js +423 -0
- package/dist/commands/self-evolution.d.ts +12 -190
- package/dist/commands/self-evolution.js +179 -786
- package/dist/commands/workflow/status.js +3 -1
- package/dist/core/archive.d.ts +0 -1
- package/dist/core/archive.js +0 -58
- package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
- package/dist/core/artifact-graph/instruction-loader.js +3 -31
- package/dist/core/config-prompts.js +4 -0
- package/dist/core/fitness/health/health-metrics.d.ts +26 -56
- package/dist/core/fitness/health/health-metrics.js +19 -58
- package/dist/core/fitness/health/index.d.ts +15 -2
- package/dist/core/fitness/health/index.js +25 -1
- package/dist/core/fitness/health/local-source.d.ts +43 -4
- package/dist/core/fitness/health/local-source.js +181 -25
- package/dist/core/fitness/health/metric-source.d.ts +48 -19
- package/dist/core/fitness/health/metric-source.js +8 -18
- package/dist/core/fitness/health/resolve-source.js +4 -1
- package/dist/core/fitness/loss.d.ts +7 -7
- package/dist/core/fitness/loss.js +6 -6
- package/dist/core/fitness/sample.d.ts +10 -0
- package/dist/core/fitness/test-failures.d.ts +30 -0
- package/dist/core/fitness/test-failures.js +123 -0
- package/dist/core/learn/credit-path.d.ts +36 -0
- package/dist/core/learn/credit-path.js +198 -0
- package/dist/core/learn/trajectory-discovery.d.ts +39 -0
- package/dist/core/learn/trajectory-discovery.js +140 -0
- package/dist/core/learn.d.ts +39 -5
- package/dist/core/learn.js +131 -14
- package/dist/core/project-config.d.ts +4 -0
- package/dist/core/project-config.js +52 -1
- package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
- package/dist/core/self-evolution/candidate-fitness.js +31 -5
- package/dist/core/self-evolution/candidates.d.ts +0 -9
- package/dist/core/self-evolution/canonical-targets.d.ts +8 -4
- package/dist/core/self-evolution/canonical-targets.js +8 -4
- package/dist/core/self-evolution/critic-agent.d.ts +150 -0
- package/dist/core/self-evolution/critic-agent.js +487 -0
- package/dist/core/self-evolution/edits-contract.d.ts +53 -0
- package/dist/core/self-evolution/edits-contract.js +89 -0
- package/dist/core/self-evolution/episode-orchestrator.d.ts +197 -0
- package/dist/core/self-evolution/episode-orchestrator.js +534 -0
- package/dist/core/self-evolution/episode-store.d.ts +266 -0
- package/dist/core/self-evolution/episode-store.js +573 -0
- package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
- package/dist/core/self-evolution/evolution-switches.js +5 -10
- package/dist/core/self-evolution/evolving-agent.d.ts +162 -0
- package/dist/core/self-evolution/evolving-agent.js +449 -0
- package/dist/core/self-evolution/health-baseline.d.ts +25 -6
- package/dist/core/self-evolution/health-baseline.js +30 -6
- package/dist/core/self-evolution/host-harness.d.ts +1 -2
- package/dist/core/self-evolution/host-harness.js +1 -2
- package/dist/core/self-evolution/index.d.ts +10 -6
- package/dist/core/self-evolution/index.js +19 -6
- package/dist/core/self-evolution/learn-hints.d.ts +31 -0
- package/dist/core/self-evolution/learn-hints.js +16 -0
- package/dist/core/self-evolution/learn-observation-adapter.d.ts +35 -0
- package/dist/core/self-evolution/learn-observation-adapter.js +285 -10
- package/dist/core/self-evolution/line-diff.d.ts +60 -0
- package/dist/core/self-evolution/line-diff.js +130 -0
- package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
- package/dist/core/self-evolution/policy/fs-safe.js +89 -0
- package/dist/core/self-evolution/policy/index.d.ts +13 -0
- package/dist/core/self-evolution/policy/index.js +13 -0
- package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
- package/dist/core/self-evolution/policy/policy-store.js +774 -0
- package/dist/core/self-evolution/policy/reject-buffer.d.ts +48 -0
- package/dist/core/self-evolution/policy/reject-buffer.js +168 -0
- package/dist/core/self-evolution/promote.d.ts +1 -1
- package/dist/core/self-evolution/promote.js +6 -33
- package/dist/core/self-evolution/promotion.js +1 -2
- package/dist/core/self-evolution/proposer-agent.d.ts +41 -0
- package/dist/core/self-evolution/proposer-agent.js +94 -13
- package/dist/core/self-evolution/proposer-slice.d.ts +26 -0
- package/dist/core/self-evolution/proposer-slice.js +54 -0
- package/dist/core/self-evolution/reward-agent.d.ts +234 -0
- package/dist/core/self-evolution/reward-agent.js +564 -0
- package/dist/core/self-evolution/scope-gate.d.ts +66 -0
- package/dist/core/self-evolution/scope-gate.js +107 -0
- package/dist/core/self-evolution/success-channel.d.ts +79 -0
- package/dist/core/self-evolution/success-channel.js +361 -0
- package/dist/core/self-evolution/target-evolution.d.ts +11 -0
- package/dist/core/self-evolution/target-evolution.js +2 -0
- package/dist/core/self-evolution/tool-evolution.js +2 -13
- package/dist/core/self-evolution/verdict.d.ts +8 -5
- package/dist/core/self-evolution/verdict.js +4 -7
- package/dist/core/templates/skill-templates.d.ts +1 -0
- package/dist/core/templates/skill-templates.js +1 -0
- package/dist/core/templates/workflow-manifest.js +2 -0
- package/dist/core/templates/workflows/learn.d.ts +4 -2
- package/dist/core/templates/workflows/learn.js +25 -166
- package/dist/core/templates/workflows/self-evolving.d.ts +13 -0
- package/dist/core/templates/workflows/self-evolving.js +127 -0
- package/dist/core/trajectory/facts.d.ts +16 -0
- package/dist/core/trajectory/facts.js +12 -4
- package/dist/core/trajectory/skeleton.d.ts +43 -0
- package/dist/core/trajectory/skeleton.js +239 -0
- package/dist/dashboard/data.d.ts +25 -51
- package/dist/dashboard/data.js +68 -180
- package/dist/dashboard/react-client.js +458 -503
- package/dist/dashboard/react-styles.js +3 -3
- package/dist/dashboard/server.js +23 -17
- package/dist/ui/ascii-patterns.d.ts +7 -15
- package/dist/ui/ascii-patterns.js +123 -54
- package/dist/ui/welcome-screen.d.ts +0 -14
- package/dist/ui/welcome-screen.js +16 -35
- package/package.json +3 -1
- package/scripts/code-health.py +1066 -638
- package/scripts/slop_rules.yaml +2151 -0
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Episode orchestrator — the rollback-before-evolution heart of loop v2
|
|
3
|
+
* (self-evolution as in-context RL).
|
|
4
|
+
*
|
|
5
|
+
* One `runEpisode` runs ONE synergyspec-selfevolving change through the full
|
|
6
|
+
* loop, in a STRICT, durably-persisted order — correctness of ordering is the
|
|
7
|
+
* design contract:
|
|
8
|
+
*
|
|
9
|
+
* a. acquireInFlight — one in-flight episode per target.
|
|
10
|
+
* b. ensure 单一血统 single lineage — init v0 from resolved files when new;
|
|
11
|
+
* policyVersionMain = lineage head.
|
|
12
|
+
* c. createEpisode + writeArmCapture — record the 主智能体 MAIN AGENT (frozen
|
|
13
|
+
* actor, policy vN+1) arm; advance
|
|
14
|
+
* 'main-arm-captured'.
|
|
15
|
+
* d. CRITIC AGENT(基线智能体 baseline — runCriticAgent reruns LAST episode's
|
|
16
|
+
* agent) policy vN on the SAME change, OR skip
|
|
17
|
+
* ('baseline-skipped') when the lineage
|
|
18
|
+
* has < 2 versions / last action refused.
|
|
19
|
+
* e. 奖励智能体 REWARD AGENT — runRewardAgent CALCULATES 算分
|
|
20
|
+
* reward(主臂)&reward(基线臂), advantage
|
|
21
|
+
* = reward(主臂) − reward(基线臂), the
|
|
22
|
+
* 文本梯度 textual gradient; writes
|
|
23
|
+
* diagnosis.json; advances 'scored'.
|
|
24
|
+
* f. DECISION on the main arm's edits:
|
|
25
|
+
* - 弃权 abstained / no gaps → advance 'abstained'; SKIP evolution.
|
|
26
|
+
* - bad advantage (< threshold) → ROLLBACK the 策略 POLICY to the prior
|
|
27
|
+
* good version, THEN append the 否决缓冲
|
|
28
|
+
* reject-buffer entry — BOTH durably on
|
|
29
|
+
* disk — THEN advance 'rolled-back'.
|
|
30
|
+
* - otherwise → advance 'kept'.
|
|
31
|
+
* g. 演进智能体 EVOLVING AGENT — ONLY after (f) persisted: runEvolvingAgent
|
|
32
|
+
* (optimizer.step) reads the reject-buffer FRESH from disk
|
|
33
|
+
* (so THIS episode's just-written entry is
|
|
34
|
+
* in its prompt) and either not-spawned /
|
|
35
|
+
* refused / evolved.
|
|
36
|
+
* h. advance 'closed' + releaseInFlight — ALWAYS, even on error.
|
|
37
|
+
*
|
|
38
|
+
* ORDERING GUARANTEE: the rollback + reject-buffer write are SEQUENTIAL awaits
|
|
39
|
+
* that BOTH complete (and the stage reads 'rolled-back'/'kept') before
|
|
40
|
+
* {@link runEvolvingAgent} is even called. (f) and (g) are never parallelized
|
|
41
|
+
* and never share a Promise.all.
|
|
42
|
+
*
|
|
43
|
+
* This module orchestrates; it never spawns an agent itself — the three agents
|
|
44
|
+
* own their own {@link runHeadlessAgent} spawns (the `spawn` seam threads to all
|
|
45
|
+
* three). The only state it owns is the ordering + the rollback/reject decision.
|
|
46
|
+
*/
|
|
47
|
+
import { spawn as nodeSpawn } from 'node:child_process';
|
|
48
|
+
import type { LearnReport } from '../learn.js';
|
|
49
|
+
import type { TrajectorySource } from '../trajectory/source.js';
|
|
50
|
+
import { type PolicyResolveFiles } from './policy/policy-store.js';
|
|
51
|
+
import { type EpisodeStage } from './episode-store.js';
|
|
52
|
+
import { type ArmObjective } from './critic-agent.js';
|
|
53
|
+
import { type RunEvolvingAgentResult } from './evolving-agent.js';
|
|
54
|
+
/** The 主智能体 MAIN AGENT (policy vN+1) capture the orchestrator records. */
|
|
55
|
+
export interface MainArmCapture {
|
|
56
|
+
/** Raw session transcript text, when provided; persisted as `transcript.jsonl`. */
|
|
57
|
+
transcript?: string;
|
|
58
|
+
/** Bounded action skeleton of the observed run, when discoverable. */
|
|
59
|
+
skeleton?: object;
|
|
60
|
+
/**
|
|
61
|
+
* The arm objective, byte-shape-IDENTICAL to the CRITIC AGENT's
|
|
62
|
+
* {@link ArmObjective} so the 奖励智能体 REWARD AGENT reads both arms uniformly.
|
|
63
|
+
*/
|
|
64
|
+
objective: ArmObjective;
|
|
65
|
+
}
|
|
66
|
+
export interface CaptureMainArmOptions {
|
|
67
|
+
repoRoot: string;
|
|
68
|
+
changeName: string;
|
|
69
|
+
/**
|
|
70
|
+
* An ALREADY-COMPUTED learn report (from `generateLearnReport`). Its
|
|
71
|
+
* `fitnessSample` carries the graded pass rate / health / observed-trajectory
|
|
72
|
+
* facts — the orchestrator REUSES that grading rather than re-running it.
|
|
73
|
+
*/
|
|
74
|
+
report: LearnReport;
|
|
75
|
+
/**
|
|
76
|
+
* Optional trajectory handles. When the learn report did not carry an
|
|
77
|
+
* `observedRun` skeleton (older capture paths), these let the caller hand a
|
|
78
|
+
* skeleton / raw transcript directly, OR the orchestrator re-discovers the
|
|
79
|
+
* skeleton via {@link getTrajectoryForChange}. A `trajectorySource` override is
|
|
80
|
+
* honored first (tests / explicit harness selection).
|
|
81
|
+
*/
|
|
82
|
+
trajectoryHandles?: {
|
|
83
|
+
/** Verbatim session transcript text to persist as `transcript.jsonl`. */
|
|
84
|
+
transcript?: string;
|
|
85
|
+
/** Pre-computed action skeleton (else derived from the discovered trajectory). */
|
|
86
|
+
skeleton?: object;
|
|
87
|
+
/** Trajectory-source override (tests); else the registry auto-detects. */
|
|
88
|
+
trajectorySource?: TrajectorySource;
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Build the 主智能体 MAIN AGENT arm `{transcript?, skeleton?, objective}` from an
|
|
93
|
+
* already-computed learn report's {@link FitnessSample} + the discovered
|
|
94
|
+
* trajectory.
|
|
95
|
+
*
|
|
96
|
+
* REUSES the learn/fitness grading verbatim — it never re-grades:
|
|
97
|
+
* - `objective.passRate` prefers the OBSERVED pass rate
|
|
98
|
+
* (`fitnessSample.trajectoryFacts.observedPassRate` when a runner ran), else
|
|
99
|
+
* the authored `testMetrics.passRate`, else `null` (never fabricated);
|
|
100
|
+
* - `objective.healthPenalty` is `fitnessSample.healthSignal` (the raw
|
|
101
|
+
* "no signal ⇒ null" health reading, distinct from the `?? 0`-defaulted
|
|
102
|
+
* `loss.healthPenalty`);
|
|
103
|
+
* - `objective.loss` is the blended `fitnessSample.loss.loss` (or `null`);
|
|
104
|
+
* - `verified` / `observedStatus` come from `trajectoryFacts`.
|
|
105
|
+
* The shape is byte-identical to {@link ArmObjective} so both arms read uniformly.
|
|
106
|
+
*/
|
|
107
|
+
export declare function captureMainArm(opts: CaptureMainArmOptions): Promise<MainArmCapture>;
|
|
108
|
+
/** The decision the orchestrator made on the main arm's edits. */
|
|
109
|
+
export type EpisodeDecision = 'rolled-back' | 'kept' | 'abstained';
|
|
110
|
+
export interface RunEpisodeOptions {
|
|
111
|
+
repoRoot: string;
|
|
112
|
+
targetId: string;
|
|
113
|
+
changeName: string;
|
|
114
|
+
/** Absolute path of the change dir; recorded in episode.json, never copied. */
|
|
115
|
+
changeDirPath: string;
|
|
116
|
+
/** The 主智能体 MAIN AGENT arm (from {@link captureMainArm}). */
|
|
117
|
+
mainArm: MainArmCapture;
|
|
118
|
+
/**
|
|
119
|
+
* advantage = reward(主臂) − reward(基线臂) threshold below which the 策略
|
|
120
|
+
* POLICY is rolled back to the prior version BEFORE the 演进智能体 EVOLVING
|
|
121
|
+
* AGENT runs. Default 0 (a non-positive advantage triggers a rollback).
|
|
122
|
+
*/
|
|
123
|
+
advantageRollbackThreshold?: number;
|
|
124
|
+
/** Edit budget L for the 演进智能体 EVOLVING AGENT. Default 40. */
|
|
125
|
+
editBudget?: number;
|
|
126
|
+
/** Injectable spawn seam — threaded to ALL THREE agents. Defaults to node's spawn. */
|
|
127
|
+
spawn?: typeof nodeSpawn;
|
|
128
|
+
/** Injectable clock for the lock + episode id; defaults to `new Date()`. */
|
|
129
|
+
now?: Date;
|
|
130
|
+
/**
|
|
131
|
+
* TEST seam: the file resolver {@link initPolicyLineage} uses to snapshot v0
|
|
132
|
+
* when the lineage is new. Defaults to the real `resolveTargetLocalFiles`.
|
|
133
|
+
*/
|
|
134
|
+
resolveFiles?: PolicyResolveFiles;
|
|
135
|
+
}
|
|
136
|
+
export interface RunEpisodeResult {
|
|
137
|
+
episodeId: string;
|
|
138
|
+
/** True when the CRITIC AGENT(基线智能体 baseline agent)arm was skipped. */
|
|
139
|
+
baselineSkipped: boolean;
|
|
140
|
+
/** advantage = reward(主臂) − reward(基线臂); null when skipped or abstained. */
|
|
141
|
+
advantage: number | null;
|
|
142
|
+
decision: EpisodeDecision;
|
|
143
|
+
/** The 演进智能体 EVOLVING AGENT outcome, or `null` when it was never spawned. */
|
|
144
|
+
evolution: RunEvolvingAgentResult | null;
|
|
145
|
+
/** The lineage head version AFTER the episode (post-rollback / post-evolve). */
|
|
146
|
+
newPolicyVersion: number | null;
|
|
147
|
+
}
|
|
148
|
+
/** Returned (not thrown) when the target already has a non-stale in-flight lock. */
|
|
149
|
+
export interface RunEpisodeBusy {
|
|
150
|
+
episodeId: null;
|
|
151
|
+
busy: true;
|
|
152
|
+
reason: string;
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Run ONE episode through the loop in the strict, durably-persisted order
|
|
156
|
+
* documented at the top of this module. See {@link RunEpisodeResult}.
|
|
157
|
+
*
|
|
158
|
+
* The in-flight lock is released in a finally guard so a throw mid-episode never
|
|
159
|
+
* wedges the target's slot.
|
|
160
|
+
*/
|
|
161
|
+
export declare function runEpisode(opts: RunEpisodeOptions): Promise<RunEpisodeResult | RunEpisodeBusy>;
|
|
162
|
+
export interface ResumeEpisodeOptions {
|
|
163
|
+
repoRoot: string;
|
|
164
|
+
episodeId: string;
|
|
165
|
+
/** Injectable spawn seam — threaded to the remaining agents. */
|
|
166
|
+
spawn?: typeof nodeSpawn;
|
|
167
|
+
/** advantage rollback threshold (default 0), for an episode resumed before the decision. */
|
|
168
|
+
advantageRollbackThreshold?: number;
|
|
169
|
+
/** Edit budget L (default 40). */
|
|
170
|
+
editBudget?: number;
|
|
171
|
+
}
|
|
172
|
+
export interface ResumeEpisodeResult {
|
|
173
|
+
episodeId: string;
|
|
174
|
+
/** The stage the episode was at when resume was called. */
|
|
175
|
+
resumedFrom: EpisodeStage;
|
|
176
|
+
/** The stage it reached after the resume re-ran the remaining steps. */
|
|
177
|
+
stage: EpisodeStage;
|
|
178
|
+
evolution: RunEvolvingAgentResult | null;
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Re-enter a partially-run episode at its recorded stage and idempotently run
|
|
182
|
+
* the REMAINING steps. Best-effort — used by the CLI `episode resume`. The
|
|
183
|
+
* episode stage machine is monotonic, so this picks up from the first not-yet-
|
|
184
|
+
* done step rather than re-advancing a stage already entered:
|
|
185
|
+
*
|
|
186
|
+
* - 'scored' → run the decision (f) then the 演进智能体 (g).
|
|
187
|
+
* - 'rolled-back' / 'kept' → run the 演进智能体 EVOLVING AGENT (g) then close.
|
|
188
|
+
* - 'evolved'/'evolution-refused'/'abstained' → close.
|
|
189
|
+
* - earlier stages → not auto-resumable here (the arms / reward
|
|
190
|
+
* agent need their own re-entry); reported as-is.
|
|
191
|
+
*
|
|
192
|
+
* NOTE: resume does NOT re-acquire the in-flight lock — the original
|
|
193
|
+
* {@link runEpisode} already released it; a resume is an operator-driven
|
|
194
|
+
* recovery, not a concurrent run.
|
|
195
|
+
*/
|
|
196
|
+
export declare function resumeEpisode(opts: ResumeEpisodeOptions): Promise<ResumeEpisodeResult>;
|
|
197
|
+
//# sourceMappingURL=episode-orchestrator.d.ts.map
|