synergyspec-selfevolving 1.4.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/README.md +31 -18
  2. package/dist/commands/learn.d.ts +12 -1
  3. package/dist/commands/learn.js +158 -11
  4. package/dist/commands/self-evolution-episode.d.ts +177 -0
  5. package/dist/commands/self-evolution-episode.js +431 -0
  6. package/dist/commands/self-evolution.d.ts +12 -190
  7. package/dist/commands/self-evolution.js +114 -866
  8. package/dist/core/archive.d.ts +0 -1
  9. package/dist/core/archive.js +0 -58
  10. package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
  11. package/dist/core/artifact-graph/instruction-loader.js +3 -31
  12. package/dist/core/fitness/loss.d.ts +5 -5
  13. package/dist/core/fitness/loss.js +4 -4
  14. package/dist/core/fitness/test-failures.js +10 -2
  15. package/dist/core/project-config.d.ts +19 -0
  16. package/dist/core/project-config.js +96 -0
  17. package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
  18. package/dist/core/self-evolution/candidate-fitness.js +31 -5
  19. package/dist/core/self-evolution/candidates.d.ts +0 -9
  20. package/dist/core/self-evolution/critic-agent.d.ts +192 -0
  21. package/dist/core/self-evolution/critic-agent.js +568 -0
  22. package/dist/core/self-evolution/edits-contract.d.ts +53 -0
  23. package/dist/core/self-evolution/edits-contract.js +89 -0
  24. package/dist/core/self-evolution/episode-orchestrator.d.ts +234 -0
  25. package/dist/core/self-evolution/episode-orchestrator.js +681 -0
  26. package/dist/core/self-evolution/episode-store.d.ts +266 -0
  27. package/dist/core/self-evolution/episode-store.js +573 -0
  28. package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
  29. package/dist/core/self-evolution/evolution-switches.js +5 -10
  30. package/dist/core/self-evolution/evolving-agent.d.ts +208 -0
  31. package/dist/core/self-evolution/evolving-agent.js +535 -0
  32. package/dist/core/self-evolution/host-harness.d.ts +14 -15
  33. package/dist/core/self-evolution/host-harness.js +48 -23
  34. package/dist/core/self-evolution/index.d.ts +11 -6
  35. package/dist/core/self-evolution/index.js +20 -6
  36. package/dist/core/self-evolution/line-diff.d.ts +60 -0
  37. package/dist/core/self-evolution/line-diff.js +130 -0
  38. package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
  39. package/dist/core/self-evolution/policy/fs-safe.js +89 -0
  40. package/dist/core/self-evolution/policy/index.d.ts +13 -0
  41. package/dist/core/self-evolution/policy/index.js +13 -0
  42. package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
  43. package/dist/core/self-evolution/policy/policy-store.js +774 -0
  44. package/dist/core/self-evolution/policy/prediction-reconcile.d.ts +54 -0
  45. package/dist/core/self-evolution/policy/prediction-reconcile.js +191 -0
  46. package/dist/core/self-evolution/policy/reject-buffer.d.ts +55 -0
  47. package/dist/core/self-evolution/policy/reject-buffer.js +170 -0
  48. package/dist/core/self-evolution/promote.d.ts +1 -1
  49. package/dist/core/self-evolution/promote.js +6 -33
  50. package/dist/core/self-evolution/promotion.js +1 -2
  51. package/dist/core/self-evolution/reward-agent.d.ts +379 -0
  52. package/dist/core/self-evolution/reward-agent.js +940 -0
  53. package/dist/core/self-evolution/reward-aggregator.d.ts +59 -0
  54. package/dist/core/self-evolution/reward-aggregator.js +262 -0
  55. package/dist/core/self-evolution/scope-gate.d.ts +66 -0
  56. package/dist/core/self-evolution/scope-gate.js +107 -0
  57. package/dist/core/self-evolution/success-channel.js +2 -2
  58. package/dist/core/self-evolution/tamper-check.d.ts +24 -0
  59. package/dist/core/self-evolution/tamper-check.js +236 -0
  60. package/dist/core/self-evolution/tool-evolution.js +2 -13
  61. package/dist/core/self-evolution/verdict.d.ts +8 -5
  62. package/dist/core/self-evolution/verdict.js +4 -7
  63. package/dist/core/templates/workflows/gen-tests.js +1 -1
  64. package/dist/core/templates/workflows/learn.d.ts +3 -2
  65. package/dist/core/templates/workflows/learn.js +21 -18
  66. package/dist/core/templates/workflows/self-evolving.d.ts +6 -4
  67. package/dist/core/templates/workflows/self-evolving.js +62 -172
  68. package/dist/core/trajectory/scrub.d.ts +27 -0
  69. package/dist/core/trajectory/scrub.js +79 -0
  70. package/dist/core/trajectory/skeleton.d.ts +27 -1
  71. package/dist/core/trajectory/skeleton.js +152 -8
  72. package/dist/dashboard/data.d.ts +25 -51
  73. package/dist/dashboard/data.js +68 -180
  74. package/dist/dashboard/react-client.js +458 -503
  75. package/dist/dashboard/react-styles.js +3 -3
  76. package/dist/dashboard/server.js +23 -17
  77. package/dist/ui/ascii-patterns.d.ts +7 -15
  78. package/dist/ui/ascii-patterns.js +123 -54
  79. package/dist/ui/welcome-screen.d.ts +0 -14
  80. package/dist/ui/welcome-screen.js +16 -35
  81. package/package.json +1 -1
  82. package/dist/core/self-evolution/ga-selection.d.ts +0 -94
  83. package/dist/core/self-evolution/ga-selection.js +0 -153
  84. package/dist/core/self-evolution/proposer-agent.d.ts +0 -182
  85. package/dist/core/self-evolution/proposer-agent.js +0 -326
  86. package/dist/core/self-evolution/replay-runner.d.ts +0 -100
  87. package/dist/core/self-evolution/replay-runner.js +0 -170
  88. package/dist/core/self-evolution/replay.d.ts +0 -45
  89. package/dist/core/self-evolution/replay.js +0 -56
  90. package/dist/core/self-evolution/template-variants.d.ts +0 -62
  91. package/dist/core/self-evolution/template-variants.js +0 -171
  92. package/dist/core/self-evolution/trajectory.d.ts +0 -65
  93. package/dist/core/self-evolution/trajectory.js +0 -185
@@ -0,0 +1,208 @@
1
+ /**
2
+ * 演进智能体 EVOLVING AGENT (optimizer.step) — loop v2 (self-evolution as
3
+ * in-context RL).
4
+ *
5
+ * The EVOLVING AGENT is the ONLY agent in the loop that EDITS. Given the
6
+ * 奖励智能体 REWARD AGENT's diagnosis (errors, gaps with section anchors, the
7
+ * 文本梯度 textual gradient, advantage), the 否决缓冲 reject-buffer (directions
8
+ * already vetoed), and the 成功保护 success protections (do-not-prune), it makes
9
+ * ONE bounded edit of ≤ L changed lines (added + removed) with a checkable
10
+ * prediction, OR it refuses when the diagnosis is too weak to name a concrete
11
+ * edit. It NEVER scores — scoring belongs to the 奖励智能体 REWARD AGENT.
12
+ *
13
+ * The 策略 POLICY = design template(主智能体的「权重」)lives in the user's
14
+ * repo; this agent's accepted edit is written back as the NEXT policy version
15
+ * via {@link import('./policy/policy-store.js').advancePolicyVersion}. There is
16
+ * NO candidate dir, NO fitness sidecar, and NO verdict.json on the accept path
17
+ * (those belonged to the GA loop, removed in a later changeset). The episode
18
+ * stage advances to 'evolved' on accept, 'evolution-refused' on refusal.
19
+ *
20
+ * Gates, all must pass before the edit is written back:
21
+ * IN the bounded repair loop (a content-driven failure is REPAIRABLE — it is
22
+ * re-prompted, bounded by maxRepairAttempts, rather than thrown past the loop):
23
+ * - static — frozen/gate-defining-file freeze + target-file scope
24
+ * ({@link validateCandidateEdits}) and the tool-evolution
25
+ * static guard ({@link evaluateToolEvolutionCandidate},
26
+ * the engine `runStaticCandidateGate` runs — adapted to a
27
+ * package-free in-memory call since loop v2 has no
28
+ * candidate dir);
29
+ * - 范围⊆诊断 — every changed (file, section) is covered by a diagnosis
30
+ * gap ({@link checkScopeWithinDiagnosis});
31
+ * - countChangedLines ≤ editBudget (L), and a present/valid prediction.
32
+ * AFTER the loop (NOT repairable — the edit cannot influence it, so a re-prompt
33
+ * would be a category error):
34
+ * - observed-GREEN — the MAIN arm's objective.json must carry a real,
35
+ * trajectory-VERIFIED green signal ({@link isEvidenceComplete}).
36
+ */
37
+ import { spawn as nodeSpawn } from 'node:child_process';
38
+ import { type CanonicalTarget } from './canonical-targets.js';
39
+ import { type TargetProtection } from './success-channel.js';
40
+ import { type RejectBufferEntry } from './policy/reject-buffer.js';
41
+ import { type PolicyPrediction, type PolicyLedgerEntry } from './policy/policy-store.js';
42
+ import { type DiagnosisGap } from './scope-gate.js';
43
+ import type { WeaknessClass, GapSeverity } from './reward-agent.js';
44
+ /** Default edit budget L: at most this many changed lines (added + removed). */
45
+ export declare const DEFAULT_EVOLVING_AGENT_EDIT_BUDGET = 40;
46
+ /**
47
+ * Floor for the orchestrator's failure-driven 步长 step-size schedule: after a
48
+ * rolled-back edit the next episode's budget is halved toward this floor (never
49
+ * below it), so a struggling lineage takes smaller, more legible steps instead
50
+ * of another full-size swing — the backtracking-line-search / trust-region move
51
+ * (shrink the step after a step that lost ground; SkillOpt's decaying edit
52
+ * budget). The flat default above stays the ceiling for a healthy lineage. The
53
+ * schedule itself lives in the orchestrator (`scheduledEditBudget`); the
54
+ * 演进智能体 EVOLVING AGENT just receives the resolved budget as `editBudget`.
55
+ */
56
+ export declare const MIN_EVOLVING_AGENT_EDIT_BUDGET = 8;
57
+ /**
58
+ * The slice of the 奖励智能体 REWARD AGENT's `diagnosis.json` the EVOLVING
59
+ * AGENT reads. Kept STRUCTURAL (not a hard import of a Diagnosis type) — the
60
+ * reward agent is a sibling changeset; mirroring the {@link EvidenceReport}
61
+ * idiom in `promote.ts`, this stays a defensive shape so a test can drive it
62
+ * with a hand-built object and additive future fields pass through.
63
+ */
64
+ export interface EvolvingAgentDiagnosis {
65
+ /** 弃权 abstain: the reward agent found no nameable gap. Refuse-to-spawn. */
66
+ abstained?: boolean;
67
+ /** Named gaps (with section anchors) the edit's scope must be a subset of. */
68
+ gaps?: AnnotatedDiagnosisGap[];
69
+ /** Observed errors the edit should address (rendered into the prompt). */
70
+ errors?: string[];
71
+ /** 文本梯度 textual gradient — the natural-language improvement direction. */
72
+ textualGradient?: string;
73
+ /** advantage = reward(主臂) − reward(基线臂), when measured. */
74
+ advantage?: number | null;
75
+ }
76
+ /**
77
+ * A diagnosis gap as the EVOLVING AGENT carries it: the scope-gate's structural
78
+ * `{file, section}` augmented with the 奖励智能体 REWARD AGENT's OPTIONAL
79
+ * `weaknessClass`/`severity` annotations (⑥). The scope gate only reads
80
+ * file/section, so these extra fields pass through harmlessly; the prompt
81
+ * renderer surfaces them when present to AIM the bounded edit. Old diagnoses
82
+ * carry neither field and behave exactly as before.
83
+ */
84
+ export interface AnnotatedDiagnosisGap extends DiagnosisGap {
85
+ weaknessClass?: WeaknessClass;
86
+ severity?: GapSeverity;
87
+ }
88
+ export interface AssembleEvolvingAgentPromptInput {
89
+ /** Resolved canonical target whose `files[]` bound the editable surface. */
90
+ target: CanonicalTarget;
91
+ /** Current on-disk contents of each editable lineage file. */
92
+ currentFiles: {
93
+ relPath: string;
94
+ content: string;
95
+ }[];
96
+ /** The reward agent's diagnosis (already read from diagnosis.json). */
97
+ diagnosis: EvolvingAgentDiagnosis;
98
+ /** Edit budget L (changed lines, added + removed). */
99
+ editBudget: number;
100
+ /** Most-recent 否决缓冲 reject-buffer entries for this target (newest last). */
101
+ rejectBuffer?: RejectBufferEntry[];
102
+ /** Pre-rendered DO-NOT-PRUNE block (成功保护). Omitted when empty. */
103
+ doNotPrune?: string;
104
+ /**
105
+ * One-line 预测校准 prediction-calibration note: the proposer's recent
106
+ * checkable predictions' hit/miss record, settled by later measurements.
107
+ * Read-only context (it never scores); omitted when there is no settled
108
+ * prediction history, so prompts on early episodes stay byte-identical.
109
+ */
110
+ calibrationNote?: string;
111
+ }
112
+ /**
113
+ * Assemble the EVOLVING AGENT prompt. Order is stable and sections are
114
+ * omitted-when-empty so prompts on runs with no reject-buffer / no protections
115
+ * stay byte-identical. The editable files are fenced as `<<FILE: relPath>>`.
116
+ */
117
+ export declare function assembleEvolvingAgentPrompt(input: AssembleEvolvingAgentPromptInput): string;
118
+ /** The agent declined: no concrete edit was nameable. Not an error. */
119
+ export interface EvolvingAgentRefusal {
120
+ kind: 'refusal';
121
+ reason: string;
122
+ }
123
+ /** A concrete bounded edit with its checkable prediction. */
124
+ export interface EvolvingAgentEdit {
125
+ kind: 'edit';
126
+ rationale: string;
127
+ prediction: PolicyPrediction;
128
+ edits: {
129
+ relPath: string;
130
+ content: string;
131
+ }[];
132
+ }
133
+ export type ParsedEvolvingAgentResponse = EvolvingAgentRefusal | EvolvingAgentEdit;
134
+ /**
135
+ * Parse the model's single `json:patch` block. Accepts EITHER the refusal shape
136
+ * (`{edits: [], refusal: string}`) OR a concrete edit (`{rationale, prediction,
137
+ * edits[]}`). Throws {@link CanonicalProposerOutputInvalid} on a malformed
138
+ * block, the wrong block count, a missing/invalid prediction, or
139
+ * {@link CanonicalProposerNoOp} on empty edits WITHOUT a refusal reason.
140
+ *
141
+ * Edits are NOT yet scope-validated here (the caller runs the static gate over
142
+ * them); this only enforces the SHAPE of the contract.
143
+ */
144
+ export declare function parseEvolvingAgentResponse(text: string): ParsedEvolvingAgentResponse;
145
+ export interface RunEvolvingAgentOptions {
146
+ repoRoot: string;
147
+ episodeId: string;
148
+ targetId: string;
149
+ /** Edit budget L (changed lines, added + removed). Default 40. */
150
+ editBudget?: number;
151
+ /** Injected spawn seam for tests; defaults to node's spawn. */
152
+ spawn?: typeof nodeSpawn;
153
+ /** At most this many parse/budget/gate-3 repair re-prompts. Default 2. */
154
+ maxRepairAttempts?: number;
155
+ /** Override the agent binary; defaults to env or 'claude'. */
156
+ binary?: string;
157
+ /**
158
+ * 成功保护 protections feeding the DO-NOT-PRUNE block. Defaults to a FRESH
159
+ * `readProtections` from disk (the green-run-mined, load-bearing sections);
160
+ * tests pass it directly to keep the flow hermetic. An absent file reads as
161
+ * `[]`, so a run with no mined protections renders no block (byte-identical).
162
+ */
163
+ protections?: TargetProtection[];
164
+ /**
165
+ * Exemplar file paths for the DO-NOT-PRUNE block's `exemplars:` line. Defaults
166
+ * to a FRESH `listExemplarFiles` from disk; tests pass it directly.
167
+ */
168
+ exemplarPaths?: string[];
169
+ /**
170
+ * One-line 预测校准 prediction-calibration note surfaced to the proposer
171
+ * (read-only, advisory). Defaults to absent; the orchestrator computes it from
172
+ * the prediction-reconcile ledger via `summarizeCalibration`.
173
+ */
174
+ calibrationNote?: string;
175
+ }
176
+ /** Outcome: the agent was not spawned (code-side refuse). */
177
+ export interface EvolvingAgentNotSpawned {
178
+ kind: 'not-spawned';
179
+ reason: string;
180
+ }
181
+ /** Outcome: the model refused; recorded as a 'refused' ledger entry. */
182
+ export interface EvolvingAgentRefused {
183
+ kind: 'refused';
184
+ reason: string;
185
+ ledgerEntry: PolicyLedgerEntry;
186
+ }
187
+ /** Outcome: the edit passed all gates and became the next policy version. */
188
+ export interface EvolvingAgentEvolved {
189
+ kind: 'evolved';
190
+ ledgerEntry: PolicyLedgerEntry;
191
+ }
192
+ export type RunEvolvingAgentResult = EvolvingAgentNotSpawned | EvolvingAgentRefused | EvolvingAgentEvolved;
193
+ /**
194
+ * Run the 演进智能体 EVOLVING AGENT against an already-scored episode.
195
+ *
196
+ * Flow:
197
+ * 0. Code-side refuse-to-spawn: diagnosis.abstained or no gaps ⇒ not-spawned.
198
+ * 1. Assemble + spawn (fresh context) and parse with repair ×N; over-budget
199
+ * and 范围⊆诊断 (gate-3) violations are repairable too (re-prompt with the
200
+ * violation appended).
201
+ * 2. Model refusal ⇒ {kind:'refused'} + a 'refused' ledger entry.
202
+ * 3. GATES ×3 (static → observed-GREEN → 范围⊆诊断), each ANDed with the
203
+ * ≤ L budget and a valid prediction.
204
+ * 4. All green ⇒ advancePolicyVersion writes the next version; episode stage
205
+ * advances to 'evolved' (or 'evolution-refused' on refusal).
206
+ */
207
+ export declare function runEvolvingAgent(opts: RunEvolvingAgentOptions): Promise<RunEvolvingAgentResult>;
208
+ //# sourceMappingURL=evolving-agent.d.ts.map