synergyspec-selfevolving 1.4.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -18
- package/dist/commands/learn.d.ts +12 -1
- package/dist/commands/learn.js +158 -11
- package/dist/commands/self-evolution-episode.d.ts +177 -0
- package/dist/commands/self-evolution-episode.js +431 -0
- package/dist/commands/self-evolution.d.ts +12 -190
- package/dist/commands/self-evolution.js +114 -866
- package/dist/core/archive.d.ts +0 -1
- package/dist/core/archive.js +0 -58
- package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
- package/dist/core/artifact-graph/instruction-loader.js +3 -31
- package/dist/core/fitness/loss.d.ts +5 -5
- package/dist/core/fitness/loss.js +4 -4
- package/dist/core/fitness/test-failures.js +10 -2
- package/dist/core/project-config.d.ts +19 -0
- package/dist/core/project-config.js +96 -0
- package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
- package/dist/core/self-evolution/candidate-fitness.js +31 -5
- package/dist/core/self-evolution/candidates.d.ts +0 -9
- package/dist/core/self-evolution/critic-agent.d.ts +192 -0
- package/dist/core/self-evolution/critic-agent.js +568 -0
- package/dist/core/self-evolution/edits-contract.d.ts +53 -0
- package/dist/core/self-evolution/edits-contract.js +89 -0
- package/dist/core/self-evolution/episode-orchestrator.d.ts +234 -0
- package/dist/core/self-evolution/episode-orchestrator.js +681 -0
- package/dist/core/self-evolution/episode-store.d.ts +266 -0
- package/dist/core/self-evolution/episode-store.js +573 -0
- package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
- package/dist/core/self-evolution/evolution-switches.js +5 -10
- package/dist/core/self-evolution/evolving-agent.d.ts +208 -0
- package/dist/core/self-evolution/evolving-agent.js +535 -0
- package/dist/core/self-evolution/host-harness.d.ts +14 -15
- package/dist/core/self-evolution/host-harness.js +48 -23
- package/dist/core/self-evolution/index.d.ts +11 -6
- package/dist/core/self-evolution/index.js +20 -6
- package/dist/core/self-evolution/line-diff.d.ts +60 -0
- package/dist/core/self-evolution/line-diff.js +130 -0
- package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
- package/dist/core/self-evolution/policy/fs-safe.js +89 -0
- package/dist/core/self-evolution/policy/index.d.ts +13 -0
- package/dist/core/self-evolution/policy/index.js +13 -0
- package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
- package/dist/core/self-evolution/policy/policy-store.js +774 -0
- package/dist/core/self-evolution/policy/prediction-reconcile.d.ts +54 -0
- package/dist/core/self-evolution/policy/prediction-reconcile.js +191 -0
- package/dist/core/self-evolution/policy/reject-buffer.d.ts +55 -0
- package/dist/core/self-evolution/policy/reject-buffer.js +170 -0
- package/dist/core/self-evolution/promote.d.ts +1 -1
- package/dist/core/self-evolution/promote.js +6 -33
- package/dist/core/self-evolution/promotion.js +1 -2
- package/dist/core/self-evolution/reward-agent.d.ts +379 -0
- package/dist/core/self-evolution/reward-agent.js +940 -0
- package/dist/core/self-evolution/reward-aggregator.d.ts +59 -0
- package/dist/core/self-evolution/reward-aggregator.js +262 -0
- package/dist/core/self-evolution/scope-gate.d.ts +66 -0
- package/dist/core/self-evolution/scope-gate.js +107 -0
- package/dist/core/self-evolution/success-channel.js +2 -2
- package/dist/core/self-evolution/tamper-check.d.ts +24 -0
- package/dist/core/self-evolution/tamper-check.js +236 -0
- package/dist/core/self-evolution/tool-evolution.js +2 -13
- package/dist/core/self-evolution/verdict.d.ts +8 -5
- package/dist/core/self-evolution/verdict.js +4 -7
- package/dist/core/templates/workflows/gen-tests.js +1 -1
- package/dist/core/templates/workflows/learn.d.ts +3 -2
- package/dist/core/templates/workflows/learn.js +21 -18
- package/dist/core/templates/workflows/self-evolving.d.ts +6 -4
- package/dist/core/templates/workflows/self-evolving.js +62 -172
- package/dist/core/trajectory/scrub.d.ts +27 -0
- package/dist/core/trajectory/scrub.js +79 -0
- package/dist/core/trajectory/skeleton.d.ts +27 -1
- package/dist/core/trajectory/skeleton.js +152 -8
- package/dist/dashboard/data.d.ts +25 -51
- package/dist/dashboard/data.js +68 -180
- package/dist/dashboard/react-client.js +458 -503
- package/dist/dashboard/react-styles.js +3 -3
- package/dist/dashboard/server.js +23 -17
- package/dist/ui/ascii-patterns.d.ts +7 -15
- package/dist/ui/ascii-patterns.js +123 -54
- package/dist/ui/welcome-screen.d.ts +0 -14
- package/dist/ui/welcome-screen.js +16 -35
- package/package.json +1 -1
- package/dist/core/self-evolution/ga-selection.d.ts +0 -94
- package/dist/core/self-evolution/ga-selection.js +0 -153
- package/dist/core/self-evolution/proposer-agent.d.ts +0 -182
- package/dist/core/self-evolution/proposer-agent.js +0 -326
- package/dist/core/self-evolution/replay-runner.d.ts +0 -100
- package/dist/core/self-evolution/replay-runner.js +0 -170
- package/dist/core/self-evolution/replay.d.ts +0 -45
- package/dist/core/self-evolution/replay.js +0 -56
- package/dist/core/self-evolution/template-variants.d.ts +0 -62
- package/dist/core/self-evolution/template-variants.js +0 -171
- package/dist/core/self-evolution/trajectory.d.ts +0 -65
- package/dist/core/self-evolution/trajectory.js +0 -185
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 演进智能体 EVOLVING AGENT (optimizer.step) — loop v2 (self-evolution as
|
|
3
|
+
* in-context RL).
|
|
4
|
+
*
|
|
5
|
+
* The EVOLVING AGENT is the ONLY agent in the loop that EDITS. Given the
|
|
6
|
+
* 奖励智能体 REWARD AGENT's diagnosis (errors, gaps with section anchors, the
|
|
7
|
+
* 文本梯度 textual gradient, advantage), the 否决缓冲 reject-buffer (directions
|
|
8
|
+
* already vetoed), and the 成功保护 success protections (do-not-prune), it makes
|
|
9
|
+
* ONE bounded edit of ≤ L changed lines (added + removed) with a checkable
|
|
10
|
+
* prediction, OR it refuses when the diagnosis is too weak to name a concrete
|
|
11
|
+
* edit. It NEVER scores — scoring belongs to the 奖励智能体 REWARD AGENT.
|
|
12
|
+
*
|
|
13
|
+
* The 策略 POLICY = design template(主智能体的「权重」)lives in the user's
|
|
14
|
+
* repo; this agent's accepted edit is written back as the NEXT policy version
|
|
15
|
+
* via {@link import('./policy/policy-store.js').advancePolicyVersion}. There is
|
|
16
|
+
* NO candidate dir, NO fitness sidecar, and NO verdict.json on the accept path
|
|
17
|
+
* (those belonged to the GA loop, removed in a later changeset). The episode
|
|
18
|
+
* stage advances to 'evolved' on accept, 'evolution-refused' on refusal.
|
|
19
|
+
*
|
|
20
|
+
* Gates, all must pass before the edit is written back:
|
|
21
|
+
* IN the bounded repair loop (a content-driven failure is REPAIRABLE — it is
|
|
22
|
+
* re-prompted, bounded by maxRepairAttempts, rather than thrown past the loop):
|
|
23
|
+
* - static — frozen/gate-defining-file freeze + target-file scope
|
|
24
|
+
* ({@link validateCandidateEdits}) and the tool-evolution
|
|
25
|
+
* static guard ({@link evaluateToolEvolutionCandidate},
|
|
26
|
+
* the engine `runStaticCandidateGate` runs — adapted to a
|
|
27
|
+
* package-free in-memory call since loop v2 has no
|
|
28
|
+
* candidate dir);
|
|
29
|
+
* - 范围⊆诊断 — every changed (file, section) is covered by a diagnosis
|
|
30
|
+
* gap ({@link checkScopeWithinDiagnosis});
|
|
31
|
+
* - countChangedLines ≤ editBudget (L), and a present/valid prediction.
|
|
32
|
+
* AFTER the loop (NOT repairable — the edit cannot influence it, so a re-prompt
|
|
33
|
+
* would be a category error):
|
|
34
|
+
* - observed-GREEN — the MAIN arm's objective.json must carry a real,
|
|
35
|
+
* trajectory-VERIFIED green signal ({@link isEvidenceComplete}).
|
|
36
|
+
*/
|
|
37
|
+
import { spawn as nodeSpawn } from 'node:child_process';
|
|
38
|
+
import { type CanonicalTarget } from './canonical-targets.js';
|
|
39
|
+
import { type TargetProtection } from './success-channel.js';
|
|
40
|
+
import { type RejectBufferEntry } from './policy/reject-buffer.js';
|
|
41
|
+
import { type PolicyPrediction, type PolicyLedgerEntry } from './policy/policy-store.js';
|
|
42
|
+
import { type DiagnosisGap } from './scope-gate.js';
|
|
43
|
+
import type { WeaknessClass, GapSeverity } from './reward-agent.js';
|
|
44
|
+
/** Default edit budget L: at most this many changed lines (added + removed). */
|
|
45
|
+
export declare const DEFAULT_EVOLVING_AGENT_EDIT_BUDGET = 40;
|
|
46
|
+
/**
|
|
47
|
+
* Floor for the orchestrator's failure-driven 步长 step-size schedule: after a
|
|
48
|
+
* rolled-back edit the next episode's budget is halved toward this floor (never
|
|
49
|
+
* below it), so a struggling lineage takes smaller, more legible steps instead
|
|
50
|
+
* of another full-size swing — the backtracking-line-search / trust-region move
|
|
51
|
+
* (shrink the step after a step that lost ground; SkillOpt's decaying edit
|
|
52
|
+
* budget). The flat default above stays the ceiling for a healthy lineage. The
|
|
53
|
+
* schedule itself lives in the orchestrator (`scheduledEditBudget`); the
|
|
54
|
+
* 演进智能体 EVOLVING AGENT just receives the resolved budget as `editBudget`.
|
|
55
|
+
*/
|
|
56
|
+
export declare const MIN_EVOLVING_AGENT_EDIT_BUDGET = 8;
|
|
57
|
+
/**
|
|
58
|
+
* The slice of the 奖励智能体 REWARD AGENT's `diagnosis.json` the EVOLVING
|
|
59
|
+
* AGENT reads. Kept STRUCTURAL (not a hard import of a Diagnosis type) — the
|
|
60
|
+
* reward agent is a sibling changeset; mirroring the {@link EvidenceReport}
|
|
61
|
+
* idiom in `promote.ts`, this stays a defensive shape so a test can drive it
|
|
62
|
+
* with a hand-built object and additive future fields pass through.
|
|
63
|
+
*/
|
|
64
|
+
export interface EvolvingAgentDiagnosis {
|
|
65
|
+
/** 弃权 abstain: the reward agent found no nameable gap. Refuse-to-spawn. */
|
|
66
|
+
abstained?: boolean;
|
|
67
|
+
/** Named gaps (with section anchors) the edit's scope must be a subset of. */
|
|
68
|
+
gaps?: AnnotatedDiagnosisGap[];
|
|
69
|
+
/** Observed errors the edit should address (rendered into the prompt). */
|
|
70
|
+
errors?: string[];
|
|
71
|
+
/** 文本梯度 textual gradient — the natural-language improvement direction. */
|
|
72
|
+
textualGradient?: string;
|
|
73
|
+
/** advantage = reward(主臂) − reward(基线臂), when measured. */
|
|
74
|
+
advantage?: number | null;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* A diagnosis gap as the EVOLVING AGENT carries it: the scope-gate's structural
|
|
78
|
+
* `{file, section}` augmented with the 奖励智能体 REWARD AGENT's OPTIONAL
|
|
79
|
+
* `weaknessClass`/`severity` annotations (⑥). The scope gate only reads
|
|
80
|
+
* file/section, so these extra fields pass through harmlessly; the prompt
|
|
81
|
+
* renderer surfaces them when present to AIM the bounded edit. Old diagnoses
|
|
82
|
+
* carry neither field and behave exactly as before.
|
|
83
|
+
*/
|
|
84
|
+
export interface AnnotatedDiagnosisGap extends DiagnosisGap {
|
|
85
|
+
weaknessClass?: WeaknessClass;
|
|
86
|
+
severity?: GapSeverity;
|
|
87
|
+
}
|
|
88
|
+
export interface AssembleEvolvingAgentPromptInput {
|
|
89
|
+
/** Resolved canonical target whose `files[]` bound the editable surface. */
|
|
90
|
+
target: CanonicalTarget;
|
|
91
|
+
/** Current on-disk contents of each editable lineage file. */
|
|
92
|
+
currentFiles: {
|
|
93
|
+
relPath: string;
|
|
94
|
+
content: string;
|
|
95
|
+
}[];
|
|
96
|
+
/** The reward agent's diagnosis (already read from diagnosis.json). */
|
|
97
|
+
diagnosis: EvolvingAgentDiagnosis;
|
|
98
|
+
/** Edit budget L (changed lines, added + removed). */
|
|
99
|
+
editBudget: number;
|
|
100
|
+
/** Most-recent 否决缓冲 reject-buffer entries for this target (newest last). */
|
|
101
|
+
rejectBuffer?: RejectBufferEntry[];
|
|
102
|
+
/** Pre-rendered DO-NOT-PRUNE block (成功保护). Omitted when empty. */
|
|
103
|
+
doNotPrune?: string;
|
|
104
|
+
/**
|
|
105
|
+
* One-line 预测校准 prediction-calibration note: the proposer's recent
|
|
106
|
+
* checkable predictions' hit/miss record, settled by later measurements.
|
|
107
|
+
* Read-only context (it never scores); omitted when there is no settled
|
|
108
|
+
* prediction history, so prompts on early episodes stay byte-identical.
|
|
109
|
+
*/
|
|
110
|
+
calibrationNote?: string;
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Assemble the EVOLVING AGENT prompt. Order is stable and sections are
|
|
114
|
+
* omitted-when-empty so prompts on runs with no reject-buffer / no protections
|
|
115
|
+
* stay byte-identical. The editable files are fenced as `<<FILE: relPath>>`.
|
|
116
|
+
*/
|
|
117
|
+
export declare function assembleEvolvingAgentPrompt(input: AssembleEvolvingAgentPromptInput): string;
|
|
118
|
+
/** The agent declined: no concrete edit was nameable. Not an error. */
|
|
119
|
+
export interface EvolvingAgentRefusal {
|
|
120
|
+
kind: 'refusal';
|
|
121
|
+
reason: string;
|
|
122
|
+
}
|
|
123
|
+
/** A concrete bounded edit with its checkable prediction. */
|
|
124
|
+
export interface EvolvingAgentEdit {
|
|
125
|
+
kind: 'edit';
|
|
126
|
+
rationale: string;
|
|
127
|
+
prediction: PolicyPrediction;
|
|
128
|
+
edits: {
|
|
129
|
+
relPath: string;
|
|
130
|
+
content: string;
|
|
131
|
+
}[];
|
|
132
|
+
}
|
|
133
|
+
export type ParsedEvolvingAgentResponse = EvolvingAgentRefusal | EvolvingAgentEdit;
|
|
134
|
+
/**
|
|
135
|
+
* Parse the model's single `json:patch` block. Accepts EITHER the refusal shape
|
|
136
|
+
* (`{edits: [], refusal: string}`) OR a concrete edit (`{rationale, prediction,
|
|
137
|
+
* edits[]}`). Throws {@link CanonicalProposerOutputInvalid} on a malformed
|
|
138
|
+
* block, the wrong block count, a missing/invalid prediction, or
|
|
139
|
+
* {@link CanonicalProposerNoOp} on empty edits WITHOUT a refusal reason.
|
|
140
|
+
*
|
|
141
|
+
* Edits are NOT yet scope-validated here (the caller runs the static gate over
|
|
142
|
+
* them); this only enforces the SHAPE of the contract.
|
|
143
|
+
*/
|
|
144
|
+
export declare function parseEvolvingAgentResponse(text: string): ParsedEvolvingAgentResponse;
|
|
145
|
+
export interface RunEvolvingAgentOptions {
|
|
146
|
+
repoRoot: string;
|
|
147
|
+
episodeId: string;
|
|
148
|
+
targetId: string;
|
|
149
|
+
/** Edit budget L (changed lines, added + removed). Default 40. */
|
|
150
|
+
editBudget?: number;
|
|
151
|
+
/** Injected spawn seam for tests; defaults to node's spawn. */
|
|
152
|
+
spawn?: typeof nodeSpawn;
|
|
153
|
+
/** At most this many parse/budget/gate-3 repair re-prompts. Default 2. */
|
|
154
|
+
maxRepairAttempts?: number;
|
|
155
|
+
/** Override the agent binary; defaults to env or 'claude'. */
|
|
156
|
+
binary?: string;
|
|
157
|
+
/**
|
|
158
|
+
* 成功保护 protections feeding the DO-NOT-PRUNE block. Defaults to a FRESH
|
|
159
|
+
* `readProtections` from disk (the green-run-mined, load-bearing sections);
|
|
160
|
+
* tests pass it directly to keep the flow hermetic. An absent file reads as
|
|
161
|
+
* `[]`, so a run with no mined protections renders no block (byte-identical).
|
|
162
|
+
*/
|
|
163
|
+
protections?: TargetProtection[];
|
|
164
|
+
/**
|
|
165
|
+
* Exemplar file paths for the DO-NOT-PRUNE block's `exemplars:` line. Defaults
|
|
166
|
+
* to a FRESH `listExemplarFiles` from disk; tests pass it directly.
|
|
167
|
+
*/
|
|
168
|
+
exemplarPaths?: string[];
|
|
169
|
+
/**
|
|
170
|
+
* One-line 预测校准 prediction-calibration note surfaced to the proposer
|
|
171
|
+
* (read-only, advisory). Defaults to absent; the orchestrator computes it from
|
|
172
|
+
* the prediction-reconcile ledger via `summarizeCalibration`.
|
|
173
|
+
*/
|
|
174
|
+
calibrationNote?: string;
|
|
175
|
+
}
|
|
176
|
+
/** Outcome: the agent was not spawned (code-side refuse). */
|
|
177
|
+
export interface EvolvingAgentNotSpawned {
|
|
178
|
+
kind: 'not-spawned';
|
|
179
|
+
reason: string;
|
|
180
|
+
}
|
|
181
|
+
/** Outcome: the model refused; recorded as a 'refused' ledger entry. */
|
|
182
|
+
export interface EvolvingAgentRefused {
|
|
183
|
+
kind: 'refused';
|
|
184
|
+
reason: string;
|
|
185
|
+
ledgerEntry: PolicyLedgerEntry;
|
|
186
|
+
}
|
|
187
|
+
/** Outcome: the edit passed all gates and became the next policy version. */
|
|
188
|
+
export interface EvolvingAgentEvolved {
|
|
189
|
+
kind: 'evolved';
|
|
190
|
+
ledgerEntry: PolicyLedgerEntry;
|
|
191
|
+
}
|
|
192
|
+
export type RunEvolvingAgentResult = EvolvingAgentNotSpawned | EvolvingAgentRefused | EvolvingAgentEvolved;
|
|
193
|
+
/**
|
|
194
|
+
* Run the 演进智能体 EVOLVING AGENT against an already-scored episode.
|
|
195
|
+
*
|
|
196
|
+
* Flow:
|
|
197
|
+
* 0. Code-side refuse-to-spawn: diagnosis.abstained or no gaps ⇒ not-spawned.
|
|
198
|
+
* 1. Assemble + spawn (fresh context) and parse with repair ×N; over-budget
|
|
199
|
+
* and 范围⊆诊断 (gate-3) violations are repairable too (re-prompt with the
|
|
200
|
+
* violation appended).
|
|
201
|
+
* 2. Model refusal ⇒ {kind:'refused'} + a 'refused' ledger entry.
|
|
202
|
+
* 3. GATES ×3 (static → observed-GREEN → 范围⊆诊断), each ANDed with the
|
|
203
|
+
* ≤ L budget and a valid prediction.
|
|
204
|
+
* 4. All green ⇒ advancePolicyVersion writes the next version; episode stage
|
|
205
|
+
* advances to 'evolved' (or 'evolution-refused' on refusal).
|
|
206
|
+
*/
|
|
207
|
+
export declare function runEvolvingAgent(opts: RunEvolvingAgentOptions): Promise<RunEvolvingAgentResult>;
|
|
208
|
+
//# sourceMappingURL=evolving-agent.d.ts.map
|