synergyspec-selfevolving 1.3.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -19
- package/dist/commands/learn.d.ts +12 -1
- package/dist/commands/learn.js +373 -31
- package/dist/commands/self-evolution-episode.d.ts +177 -0
- package/dist/commands/self-evolution-episode.js +423 -0
- package/dist/commands/self-evolution.d.ts +12 -190
- package/dist/commands/self-evolution.js +179 -786
- package/dist/commands/workflow/status.js +3 -1
- package/dist/core/archive.d.ts +0 -1
- package/dist/core/archive.js +0 -58
- package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
- package/dist/core/artifact-graph/instruction-loader.js +3 -31
- package/dist/core/config-prompts.js +4 -0
- package/dist/core/fitness/health/health-metrics.d.ts +26 -56
- package/dist/core/fitness/health/health-metrics.js +19 -58
- package/dist/core/fitness/health/index.d.ts +15 -2
- package/dist/core/fitness/health/index.js +25 -1
- package/dist/core/fitness/health/local-source.d.ts +43 -4
- package/dist/core/fitness/health/local-source.js +181 -25
- package/dist/core/fitness/health/metric-source.d.ts +48 -19
- package/dist/core/fitness/health/metric-source.js +8 -18
- package/dist/core/fitness/health/resolve-source.js +4 -1
- package/dist/core/fitness/loss.d.ts +7 -7
- package/dist/core/fitness/loss.js +6 -6
- package/dist/core/fitness/sample.d.ts +10 -0
- package/dist/core/fitness/test-failures.d.ts +30 -0
- package/dist/core/fitness/test-failures.js +123 -0
- package/dist/core/learn/credit-path.d.ts +36 -0
- package/dist/core/learn/credit-path.js +198 -0
- package/dist/core/learn/trajectory-discovery.d.ts +39 -0
- package/dist/core/learn/trajectory-discovery.js +140 -0
- package/dist/core/learn.d.ts +39 -5
- package/dist/core/learn.js +131 -14
- package/dist/core/project-config.d.ts +4 -0
- package/dist/core/project-config.js +52 -1
- package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
- package/dist/core/self-evolution/candidate-fitness.js +31 -5
- package/dist/core/self-evolution/candidates.d.ts +0 -9
- package/dist/core/self-evolution/canonical-targets.d.ts +8 -4
- package/dist/core/self-evolution/canonical-targets.js +8 -4
- package/dist/core/self-evolution/critic-agent.d.ts +150 -0
- package/dist/core/self-evolution/critic-agent.js +487 -0
- package/dist/core/self-evolution/edits-contract.d.ts +53 -0
- package/dist/core/self-evolution/edits-contract.js +89 -0
- package/dist/core/self-evolution/episode-orchestrator.d.ts +197 -0
- package/dist/core/self-evolution/episode-orchestrator.js +534 -0
- package/dist/core/self-evolution/episode-store.d.ts +266 -0
- package/dist/core/self-evolution/episode-store.js +573 -0
- package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
- package/dist/core/self-evolution/evolution-switches.js +5 -10
- package/dist/core/self-evolution/evolving-agent.d.ts +162 -0
- package/dist/core/self-evolution/evolving-agent.js +449 -0
- package/dist/core/self-evolution/health-baseline.d.ts +25 -6
- package/dist/core/self-evolution/health-baseline.js +30 -6
- package/dist/core/self-evolution/host-harness.d.ts +1 -2
- package/dist/core/self-evolution/host-harness.js +1 -2
- package/dist/core/self-evolution/index.d.ts +10 -6
- package/dist/core/self-evolution/index.js +19 -6
- package/dist/core/self-evolution/learn-hints.d.ts +31 -0
- package/dist/core/self-evolution/learn-hints.js +16 -0
- package/dist/core/self-evolution/learn-observation-adapter.d.ts +35 -0
- package/dist/core/self-evolution/learn-observation-adapter.js +285 -10
- package/dist/core/self-evolution/line-diff.d.ts +60 -0
- package/dist/core/self-evolution/line-diff.js +130 -0
- package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
- package/dist/core/self-evolution/policy/fs-safe.js +89 -0
- package/dist/core/self-evolution/policy/index.d.ts +13 -0
- package/dist/core/self-evolution/policy/index.js +13 -0
- package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
- package/dist/core/self-evolution/policy/policy-store.js +774 -0
- package/dist/core/self-evolution/policy/reject-buffer.d.ts +48 -0
- package/dist/core/self-evolution/policy/reject-buffer.js +168 -0
- package/dist/core/self-evolution/promote.d.ts +1 -1
- package/dist/core/self-evolution/promote.js +6 -33
- package/dist/core/self-evolution/promotion.js +1 -2
- package/dist/core/self-evolution/proposer-agent.d.ts +41 -0
- package/dist/core/self-evolution/proposer-agent.js +94 -13
- package/dist/core/self-evolution/proposer-slice.d.ts +26 -0
- package/dist/core/self-evolution/proposer-slice.js +54 -0
- package/dist/core/self-evolution/reward-agent.d.ts +234 -0
- package/dist/core/self-evolution/reward-agent.js +564 -0
- package/dist/core/self-evolution/scope-gate.d.ts +66 -0
- package/dist/core/self-evolution/scope-gate.js +107 -0
- package/dist/core/self-evolution/success-channel.d.ts +79 -0
- package/dist/core/self-evolution/success-channel.js +361 -0
- package/dist/core/self-evolution/target-evolution.d.ts +11 -0
- package/dist/core/self-evolution/target-evolution.js +2 -0
- package/dist/core/self-evolution/tool-evolution.js +2 -13
- package/dist/core/self-evolution/verdict.d.ts +8 -5
- package/dist/core/self-evolution/verdict.js +4 -7
- package/dist/core/templates/skill-templates.d.ts +1 -0
- package/dist/core/templates/skill-templates.js +1 -0
- package/dist/core/templates/workflow-manifest.js +2 -0
- package/dist/core/templates/workflows/learn.d.ts +4 -2
- package/dist/core/templates/workflows/learn.js +25 -166
- package/dist/core/templates/workflows/self-evolving.d.ts +13 -0
- package/dist/core/templates/workflows/self-evolving.js +127 -0
- package/dist/core/trajectory/facts.d.ts +16 -0
- package/dist/core/trajectory/facts.js +12 -4
- package/dist/core/trajectory/skeleton.d.ts +43 -0
- package/dist/core/trajectory/skeleton.js +239 -0
- package/dist/dashboard/data.d.ts +25 -51
- package/dist/dashboard/data.js +68 -180
- package/dist/dashboard/react-client.js +458 -503
- package/dist/dashboard/react-styles.js +3 -3
- package/dist/dashboard/server.js +23 -17
- package/dist/ui/ascii-patterns.d.ts +7 -15
- package/dist/ui/ascii-patterns.js +123 -54
- package/dist/ui/welcome-screen.d.ts +0 -14
- package/dist/ui/welcome-screen.js +16 -35
- package/package.json +3 -1
- package/scripts/code-health.py +1066 -638
- package/scripts/slop_rules.yaml +2151 -0
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 奖励智能体 REWARD AGENT — loop v2 (self-evolution as in-context RL).
|
|
3
|
+
*
|
|
4
|
+
* LLM as judge. Reads ONE episode's two arms from the {@link import('./episode-store.js')}
|
|
5
|
+
* episode store and CALCULATES 算分 reward(主臂) and reward(基线臂), each in
|
|
6
|
+
* [0,1], anchored on the objective evidence on disk (tests · health · 轨迹度量).
|
|
7
|
+
* advantage = reward(主臂) − reward(基线臂). It finds errors / names gaps with a
|
|
8
|
+
* suggested direction = 文本梯度 textual gradient; it NEVER edits any file; when
|
|
9
|
+
* there is no nameable gap it 弃权 abstains.
|
|
10
|
+
*
|
|
11
|
+
* It is a SIBLING of the 演进智能体 EVOLVING AGENT (optimizer.step; ONE bounded
|
|
12
|
+
* edit ≤L; never scores) — never parent-child. The reward agent runs FIRST (it
|
|
13
|
+
* scores), the 演进智能体 runs AFTER (it edits); each is code-spawned in a fresh
|
|
14
|
+
* context via {@link runHeadlessAgent} from `./host-harness.js` — NOT a skill,
|
|
15
|
+
* the same headless-spawn seam the canonical proposer uses.
|
|
16
|
+
*
|
|
17
|
+
* Write boundary: this module's ONLY write path is the episode dir — it calls
|
|
18
|
+
* {@link writeDiagnosis} and {@link advanceEpisodeStage} (to `scored`). It never
|
|
19
|
+
* touches a canonical target file, a 策略 POLICY snapshot, or the change dir
|
|
20
|
+
* (which it reads, never copies). The loss/health numbers from
|
|
21
|
+
* `src/core/fitness/loss.ts` stay on disk as ANCHORS — the reward score itself
|
|
22
|
+
* is the JUDGE'S OWN, anchored on but not equal to the loss.
|
|
23
|
+
*
|
|
24
|
+
* Uses the same fenced-block agent idiom as the EVOLVING AGENT: one fenced-block
|
|
25
|
+
* output discipline, a bounded repair loop on parse/validation failure,
|
|
26
|
+
* fail-closed plain (Zod-free) validation, and atomic episode-store writes.
|
|
27
|
+
*/
|
|
28
|
+
import { spawn as nodeSpawn } from 'node:child_process';
|
|
29
|
+
import { type EpisodeRecord } from './episode-store.js';
|
|
30
|
+
export declare class RewardAgentOutputInvalid extends Error {
|
|
31
|
+
constructor(message: string);
|
|
32
|
+
}
|
|
33
|
+
export declare class RewardAgentInvocationError extends Error {
|
|
34
|
+
constructor(stderr: string);
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* The objective record each arm persists as `objective.json`, as the 奖励智能体
|
|
38
|
+
* REWARD AGENT READS it. A null-safe SUPERSET of the canonical on-disk
|
|
39
|
+
* {@link import('./critic-agent.js').ArmObjective} (which is the flat shape both
|
|
40
|
+
* arms WRITE): the MAIN ARM may instead carry the nested-learn fallback fields
|
|
41
|
+
* (`testMetrics`, `healthSignal`, a nested {@link import('../fitness/loss.js').PerChangeLoss}
|
|
42
|
+
* `loss`) when an older capture path wrote a raw {@link import('../fitness/sample.js').FitnessSample}.
|
|
43
|
+
* This module reads ONLY the fields it maps to anchors and is null-safe to
|
|
44
|
+
* either shape, so a divergence in the exact layout cannot crash the judge.
|
|
45
|
+
*
|
|
46
|
+
* Named distinctly from the canonical `ArmObjective` (the barrel re-exports the
|
|
47
|
+
* critic-agent declaration) so there is no ambiguous duplicate export.
|
|
48
|
+
*/
|
|
49
|
+
export interface RewardArmObjectiveInput {
|
|
50
|
+
/** Flat pass rate in [0,1] (CRITIC AGENT shape). */
|
|
51
|
+
passRate?: number | null;
|
|
52
|
+
testsTotal?: number | null;
|
|
53
|
+
testsFailed?: number | null;
|
|
54
|
+
/** Flat normalized health penalty in [0,1] (CRITIC AGENT shape). */
|
|
55
|
+
healthPenalty?: number | null;
|
|
56
|
+
/** Flat blended loss in [0,1] (CRITIC AGENT shape) OR nested PerChangeLoss (MAIN ARM shape). */
|
|
57
|
+
loss?: number | null | {
|
|
58
|
+
functionalLoss?: number | null;
|
|
59
|
+
healthPenalty?: number | null;
|
|
60
|
+
loss?: number | null;
|
|
61
|
+
};
|
|
62
|
+
verified?: boolean;
|
|
63
|
+
observedStatus?: string | null;
|
|
64
|
+
measuredAt?: string | null;
|
|
65
|
+
/** MAIN ARM shape (FitnessSample): functional metrics live under testMetrics. */
|
|
66
|
+
testMetrics?: {
|
|
67
|
+
passRate?: number | null;
|
|
68
|
+
} | null;
|
|
69
|
+
/** MAIN ARM shape (FitnessSample): the raw health signal, distinct from loss.healthPenalty. */
|
|
70
|
+
healthSignal?: number | null;
|
|
71
|
+
/** Additive/forward-compatible fields pass through unread. */
|
|
72
|
+
[key: string]: unknown;
|
|
73
|
+
}
|
|
74
|
+
/** The anchors block mapped from both arms' objectives (nulls where skipped). */
|
|
75
|
+
export interface DiagnosisAnchors {
|
|
76
|
+
mainLoss: number | null;
|
|
77
|
+
baselineLoss: number | null;
|
|
78
|
+
mainPassRate: number | null;
|
|
79
|
+
baselinePassRate: number | null;
|
|
80
|
+
mainHealthPenalty: number | null;
|
|
81
|
+
baselineHealthPenalty: number | null;
|
|
82
|
+
}
|
|
83
|
+
/** One named error the judge found, addressed to a quoted span in a real file. */
|
|
84
|
+
export interface DiagnosisError {
|
|
85
|
+
arm: 'main' | 'baseline';
|
|
86
|
+
description: string;
|
|
87
|
+
evidence: {
|
|
88
|
+
file: string;
|
|
89
|
+
quote: string;
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
/** One nameable gap, addressed to a heading (`section`) in a target file. */
|
|
93
|
+
export interface DiagnosisGap {
|
|
94
|
+
file: string;
|
|
95
|
+
/** A heading in the target file; the `'*'` wildcard is allowed. */
|
|
96
|
+
section: string;
|
|
97
|
+
description: string;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* The `diagnosis.json` the 奖励智能体 REWARD AGENT writes (schemaVersion 1).
|
|
101
|
+
* advantage = reward(主臂) − reward(基线臂); `null` when the baseline arm was
|
|
102
|
+
* skipped (no comparison possible) OR when the judge 弃权 abstained.
|
|
103
|
+
*/
|
|
104
|
+
export interface RewardDiagnosis {
|
|
105
|
+
schemaVersion: 1;
|
|
106
|
+
episodeId: string;
|
|
107
|
+
changeName: string;
|
|
108
|
+
targetId: string;
|
|
109
|
+
policyVersions: {
|
|
110
|
+
main: number | null;
|
|
111
|
+
baseline: number | null;
|
|
112
|
+
};
|
|
113
|
+
rewardMain: number;
|
|
114
|
+
/** null when the baseline arm was skipped. */
|
|
115
|
+
rewardBaseline: number | null;
|
|
116
|
+
/** reward(主臂) − reward(基线臂); null when baseline skipped or abstained. */
|
|
117
|
+
advantage: number | null;
|
|
118
|
+
anchors: DiagnosisAnchors;
|
|
119
|
+
errors: DiagnosisError[];
|
|
120
|
+
gaps: DiagnosisGap[];
|
|
121
|
+
/** 文本梯度 textual gradient; null only when abstained. */
|
|
122
|
+
textualGradient: string | null;
|
|
123
|
+
abstained: boolean;
|
|
124
|
+
abstainReason?: string;
|
|
125
|
+
}
|
|
126
|
+
/** Assembled input for {@link assembleRewardAgentPrompt}. */
|
|
127
|
+
export interface RewardAgentPromptInput {
|
|
128
|
+
changeName: string;
|
|
129
|
+
targetId: string;
|
|
130
|
+
policyVersions: {
|
|
131
|
+
main: number | null;
|
|
132
|
+
baseline: number | null;
|
|
133
|
+
};
|
|
134
|
+
/** 主智能体 MAIN AGENT (policy vN+1) capture. */
|
|
135
|
+
mainArm: {
|
|
136
|
+
skeleton: object | null;
|
|
137
|
+
/** Raw transcript text (jsonl) — bounded by {@link assembleRewardAgentPrompt}. */
|
|
138
|
+
transcript: string | null;
|
|
139
|
+
objective: RewardArmObjectiveInput;
|
|
140
|
+
};
|
|
141
|
+
/**
|
|
142
|
+
* CRITIC AGENT(基线智能体 baseline agent, policy vN)capture. `null` when the
|
|
143
|
+
* baseline arm was SKIPPED — the BASELINE ARM block is OMITTED entirely and
|
|
144
|
+
* the prompt states no comparison is possible (null rewardBaseline/advantage).
|
|
145
|
+
*/
|
|
146
|
+
baselineArm: {
|
|
147
|
+
skeleton: object | null;
|
|
148
|
+
transcript: string | null;
|
|
149
|
+
objective: RewardArmObjectiveInput;
|
|
150
|
+
} | null;
|
|
151
|
+
/** Bounded excerpts of the 5 artifacts + test-report.md read from the change dir. */
|
|
152
|
+
artifacts: {
|
|
153
|
+
file: string;
|
|
154
|
+
content: string;
|
|
155
|
+
}[];
|
|
156
|
+
/** Pre-mapped anchors (loss/health/passRate) from both arms. */
|
|
157
|
+
anchors: DiagnosisAnchors;
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Assemble the 奖励智能体 REWARD AGENT prompt. Pure (no I/O); exported for golden
|
|
161
|
+
* tests. Ordered blocks:
|
|
162
|
+
* 1. PRELUDE — the judge contract.
|
|
163
|
+
* 2. MAIN ARM (主智能体, policy vN+1) — skeleton + bounded transcript excerpt.
|
|
164
|
+
* 3. BASELINE ARM (CRITIC AGENT(基线智能体), policy vN) — same; OMITTED
|
|
165
|
+
* ENTIRELY when the baseline arm was skipped (a one-line note replaces it,
|
|
166
|
+
* demanding null rewardBaseline/advantage).
|
|
167
|
+
* 4. ARTIFACTS — the 5 artifacts + test-report.md, bounded excerpts.
|
|
168
|
+
* 5. OBJECTIVE EVIDENCE — both arms' anchors verbatim (the on-disk loss/health
|
|
169
|
+
* numbers that anchor the score).
|
|
170
|
+
*/
|
|
171
|
+
export declare function assembleRewardAgentPrompt(input: RewardAgentPromptInput): string;
|
|
172
|
+
interface ParsedDiagnosis {
|
|
173
|
+
rewardMain: number;
|
|
174
|
+
rewardBaseline: number | null;
|
|
175
|
+
advantage: number | null;
|
|
176
|
+
errors: DiagnosisError[];
|
|
177
|
+
gaps: DiagnosisGap[];
|
|
178
|
+
textualGradient: string | null;
|
|
179
|
+
abstained: boolean;
|
|
180
|
+
abstainReason?: string;
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Parse the judge's `json:diagnosis` block with a strict one-block discipline:
|
|
184
|
+
* exactly one fenced block, well-formed JSON, then fail-closed shape + range
|
|
185
|
+
* validation.
|
|
186
|
+
*
|
|
187
|
+
* Throws {@link RewardAgentOutputInvalid} on any violation (the repair loop
|
|
188
|
+
* re-prompts with the concrete message appended).
|
|
189
|
+
*/
|
|
190
|
+
export declare function parseRewardAgentResponse(text: string): ParsedDiagnosis;
|
|
191
|
+
/** Map an arm's objective to its (loss, passRate, healthPenalty) anchors. */
|
|
192
|
+
export declare function mapArmAnchors(objective: RewardArmObjectiveInput | null | undefined): {
|
|
193
|
+
loss: number | null;
|
|
194
|
+
passRate: number | null;
|
|
195
|
+
healthPenalty: number | null;
|
|
196
|
+
};
|
|
197
|
+
/** Build the {@link DiagnosisAnchors} block from both arms' objectives. */
|
|
198
|
+
export declare function buildAnchors(mainObjective: RewardArmObjectiveInput, baselineObjective: RewardArmObjectiveInput | null): DiagnosisAnchors;
|
|
199
|
+
export interface RunRewardAgentOptions {
|
|
200
|
+
repoRoot: string;
|
|
201
|
+
episodeId: string;
|
|
202
|
+
/** Injected for tests; defaults to node's spawn. */
|
|
203
|
+
spawn?: typeof nodeSpawn;
|
|
204
|
+
/** Override the agent binary; defaults to the host harness's default. */
|
|
205
|
+
binary?: string;
|
|
206
|
+
/** Bounded re-prompts on parse/validation failure (default 2 ⇒ at most 3 spawns). */
|
|
207
|
+
maxRepairAttempts?: number;
|
|
208
|
+
}
|
|
209
|
+
export interface RunRewardAgentResult {
|
|
210
|
+
diagnosis: RewardDiagnosis;
|
|
211
|
+
/** Absolute path of the written `diagnosis.json`. */
|
|
212
|
+
diagnosisPath: string;
|
|
213
|
+
/** The episode record after advancing to `scored`. */
|
|
214
|
+
episode: EpisodeRecord;
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* Run the 奖励智能体 REWARD AGENT end-to-end for one episode:
|
|
218
|
+
* 1. read the episode + both arms (baseline omitted when skipped),
|
|
219
|
+
* 2. map anchors and assemble the prompt,
|
|
220
|
+
* 3. spawn the judge (fresh context) and parse with a bounded repair loop —
|
|
221
|
+
* on a parse/validation failure, re-prompt with the concrete error appended
|
|
222
|
+
* (same pattern as `runCanonicalProposerAgentWithRepair`),
|
|
223
|
+
* 4. RECOMPUTE advantage from the claimed rewards and REJECT a mismatch
|
|
224
|
+
* (|recomputed − claimed| > 1e-9 ⇒ repairable error); when the baseline was
|
|
225
|
+
* skipped, rewardBaseline and advantage MUST both be null,
|
|
226
|
+
* 5. write `diagnosis.json` via {@link writeDiagnosis} and advance the episode
|
|
227
|
+
* stage to `scored` via {@link advanceEpisodeStage}.
|
|
228
|
+
*
|
|
229
|
+
* The ONLY write path is the episode dir. Invocation errors (agent crash) are
|
|
230
|
+
* NOT repaired — they propagate as {@link RewardAgentInvocationError}.
|
|
231
|
+
*/
|
|
232
|
+
export declare function runRewardAgent(opts: RunRewardAgentOptions): Promise<RunRewardAgentResult>;
|
|
233
|
+
export {};
|
|
234
|
+
//# sourceMappingURL=reward-agent.d.ts.map
|