synergyspec-selfevolving 1.4.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -18
- package/dist/commands/learn.d.ts +12 -1
- package/dist/commands/learn.js +158 -11
- package/dist/commands/self-evolution-episode.d.ts +177 -0
- package/dist/commands/self-evolution-episode.js +431 -0
- package/dist/commands/self-evolution.d.ts +12 -190
- package/dist/commands/self-evolution.js +114 -866
- package/dist/core/archive.d.ts +0 -1
- package/dist/core/archive.js +0 -58
- package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
- package/dist/core/artifact-graph/instruction-loader.js +3 -31
- package/dist/core/fitness/loss.d.ts +5 -5
- package/dist/core/fitness/loss.js +4 -4
- package/dist/core/fitness/test-failures.js +10 -2
- package/dist/core/project-config.d.ts +19 -0
- package/dist/core/project-config.js +96 -0
- package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
- package/dist/core/self-evolution/candidate-fitness.js +31 -5
- package/dist/core/self-evolution/candidates.d.ts +0 -9
- package/dist/core/self-evolution/critic-agent.d.ts +192 -0
- package/dist/core/self-evolution/critic-agent.js +568 -0
- package/dist/core/self-evolution/edits-contract.d.ts +53 -0
- package/dist/core/self-evolution/edits-contract.js +89 -0
- package/dist/core/self-evolution/episode-orchestrator.d.ts +234 -0
- package/dist/core/self-evolution/episode-orchestrator.js +681 -0
- package/dist/core/self-evolution/episode-store.d.ts +266 -0
- package/dist/core/self-evolution/episode-store.js +573 -0
- package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
- package/dist/core/self-evolution/evolution-switches.js +5 -10
- package/dist/core/self-evolution/evolving-agent.d.ts +208 -0
- package/dist/core/self-evolution/evolving-agent.js +535 -0
- package/dist/core/self-evolution/host-harness.d.ts +14 -15
- package/dist/core/self-evolution/host-harness.js +48 -23
- package/dist/core/self-evolution/index.d.ts +11 -6
- package/dist/core/self-evolution/index.js +20 -6
- package/dist/core/self-evolution/line-diff.d.ts +60 -0
- package/dist/core/self-evolution/line-diff.js +130 -0
- package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
- package/dist/core/self-evolution/policy/fs-safe.js +89 -0
- package/dist/core/self-evolution/policy/index.d.ts +13 -0
- package/dist/core/self-evolution/policy/index.js +13 -0
- package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
- package/dist/core/self-evolution/policy/policy-store.js +774 -0
- package/dist/core/self-evolution/policy/prediction-reconcile.d.ts +54 -0
- package/dist/core/self-evolution/policy/prediction-reconcile.js +191 -0
- package/dist/core/self-evolution/policy/reject-buffer.d.ts +55 -0
- package/dist/core/self-evolution/policy/reject-buffer.js +170 -0
- package/dist/core/self-evolution/promote.d.ts +1 -1
- package/dist/core/self-evolution/promote.js +6 -33
- package/dist/core/self-evolution/promotion.js +1 -2
- package/dist/core/self-evolution/reward-agent.d.ts +379 -0
- package/dist/core/self-evolution/reward-agent.js +940 -0
- package/dist/core/self-evolution/reward-aggregator.d.ts +59 -0
- package/dist/core/self-evolution/reward-aggregator.js +262 -0
- package/dist/core/self-evolution/scope-gate.d.ts +66 -0
- package/dist/core/self-evolution/scope-gate.js +107 -0
- package/dist/core/self-evolution/success-channel.js +2 -2
- package/dist/core/self-evolution/tamper-check.d.ts +24 -0
- package/dist/core/self-evolution/tamper-check.js +236 -0
- package/dist/core/self-evolution/tool-evolution.js +2 -13
- package/dist/core/self-evolution/verdict.d.ts +8 -5
- package/dist/core/self-evolution/verdict.js +4 -7
- package/dist/core/templates/workflows/gen-tests.js +1 -1
- package/dist/core/templates/workflows/learn.d.ts +3 -2
- package/dist/core/templates/workflows/learn.js +21 -18
- package/dist/core/templates/workflows/self-evolving.d.ts +6 -4
- package/dist/core/templates/workflows/self-evolving.js +62 -172
- package/dist/core/trajectory/scrub.d.ts +27 -0
- package/dist/core/trajectory/scrub.js +79 -0
- package/dist/core/trajectory/skeleton.d.ts +27 -1
- package/dist/core/trajectory/skeleton.js +152 -8
- package/dist/dashboard/data.d.ts +25 -51
- package/dist/dashboard/data.js +68 -180
- package/dist/dashboard/react-client.js +458 -503
- package/dist/dashboard/react-styles.js +3 -3
- package/dist/dashboard/server.js +23 -17
- package/dist/ui/ascii-patterns.d.ts +7 -15
- package/dist/ui/ascii-patterns.js +123 -54
- package/dist/ui/welcome-screen.d.ts +0 -14
- package/dist/ui/welcome-screen.js +16 -35
- package/package.json +1 -1
- package/dist/core/self-evolution/ga-selection.d.ts +0 -94
- package/dist/core/self-evolution/ga-selection.js +0 -153
- package/dist/core/self-evolution/proposer-agent.d.ts +0 -182
- package/dist/core/self-evolution/proposer-agent.js +0 -326
- package/dist/core/self-evolution/replay-runner.d.ts +0 -100
- package/dist/core/self-evolution/replay-runner.js +0 -170
- package/dist/core/self-evolution/replay.d.ts +0 -45
- package/dist/core/self-evolution/replay.js +0 -56
- package/dist/core/self-evolution/template-variants.d.ts +0 -62
- package/dist/core/self-evolution/template-variants.js +0 -171
- package/dist/core/self-evolution/trajectory.d.ts +0 -65
- package/dist/core/self-evolution/trajectory.js +0 -185
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { type PolicyPrediction } from './policy-store.js';
|
|
2
|
+
export declare const POLICY_PREDICTION_RECONCILE_FILE = "prediction-reconcile.ndjson";
|
|
3
|
+
/** One settled prediction: what was bet, and whether the measured delta agreed. */
|
|
4
|
+
export interface PredictionReconcileEntry {
|
|
5
|
+
schemaVersion: 1;
|
|
6
|
+
/** ISO-8601 UTC timestamp the prediction was settled. */
|
|
7
|
+
at: string;
|
|
8
|
+
episodeId: string;
|
|
9
|
+
targetId: string;
|
|
10
|
+
/** The 'evolve' version whose prediction this settles. */
|
|
11
|
+
predictedAtVersion: number;
|
|
12
|
+
metric: PolicyPrediction['metric'];
|
|
13
|
+
direction: PolicyPrediction['direction'];
|
|
14
|
+
/**
|
|
15
|
+
* Verifiable `main[metric] − baseline[metric]` for the two arms of the
|
|
16
|
+
* settling episode; `null` when either arm lacked the measured value.
|
|
17
|
+
*/
|
|
18
|
+
observedDelta: number | null;
|
|
19
|
+
/**
|
|
20
|
+
* `true` — the measured delta moved in the predicted direction;
|
|
21
|
+
* `false` — it did not (refuted, incl. no movement);
|
|
22
|
+
* `null` — unsettled (a value was unmeasured).
|
|
23
|
+
*/
|
|
24
|
+
hit: boolean | null;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Read one target's settled predictions, newest LAST. `[]` when the ledger does
|
|
28
|
+
* not exist; malformed lines skipped best-effort.
|
|
29
|
+
*/
|
|
30
|
+
export declare function readPredictionReconcile(repoRoot: string, targetId: string): Promise<PredictionReconcileEntry[]>;
|
|
31
|
+
export interface ReconcilePredictionOptions {
|
|
32
|
+
repoRoot: string;
|
|
33
|
+
targetId: string;
|
|
34
|
+
episodeId: string;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Settle the prediction attached to the version the 主智能体 MAIN AGENT ran this
|
|
38
|
+
* episode against the two arms' measured metric delta, and append a residual.
|
|
39
|
+
*
|
|
40
|
+
* Returns the appended entry, or `null` when there is nothing to settle (the
|
|
41
|
+
* head version carries no prediction — e.g. v0 init or a rollback content
|
|
42
|
+
* version), the lineage is unknown, or this episode is already settled
|
|
43
|
+
* (idempotent). Best-effort: a missing arm objective yields an `unsettled`
|
|
44
|
+
* (`hit: null`) entry rather than a throw.
|
|
45
|
+
*/
|
|
46
|
+
export declare function reconcilePrediction(opts: ReconcilePredictionOptions): Promise<PredictionReconcileEntry | null>;
|
|
47
|
+
/**
|
|
48
|
+
* Fold the recent settled predictions into a one-line-plus-bullets advisory note
|
|
49
|
+
* for the 演进智能体 EVOLVING AGENT's prompt. Counts only SETTLED entries
|
|
50
|
+
* (`hit !== null`); returns `null` when there is no settled history, so early
|
|
51
|
+
* episodes render no calibration block (byte-identical prompts).
|
|
52
|
+
*/
|
|
53
|
+
export declare function summarizeCalibration(repoRoot: string, targetId: string, limit?: number): Promise<string | null>;
|
|
54
|
+
//# sourceMappingURL=prediction-reconcile.d.ts.map
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 预测校准 prediction-reconcile ledger — closes the propose→observe loop.
|
|
3
|
+
*
|
|
4
|
+
* The 演进智能体 EVOLVING AGENT attaches a falsifiable {@link PolicyPrediction}
|
|
5
|
+
* to every 'evolve' step (which scoreboard metric the edit will move, and in
|
|
6
|
+
* which direction). Until now that prediction was a DEAD WRITE: recorded on the
|
|
7
|
+
* 版本账本 ledger, surfaced only as a display string, never compared to what
|
|
8
|
+
* actually happened.
|
|
9
|
+
*
|
|
10
|
+
* This module settles it. When a later episode measures the two arms, the
|
|
11
|
+
* prediction made for the version the 主智能体 MAIN AGENT just ran is checked
|
|
12
|
+
* against the VERIFIABLE per-metric delta `main[metric] − baseline[metric]`
|
|
13
|
+
* (NOT the LLM judge's reward — the settlement is a hard, reproducible signal).
|
|
14
|
+
* A hit/miss residual is appended to a sibling NDJSON ledger, and
|
|
15
|
+
* {@link summarizeCalibration} folds the recent record into a one-line advisory
|
|
16
|
+
* note the orchestrator feeds back to the proposer ("a repeatedly-refuted
|
|
17
|
+
* direction is a weak bet"). The note is ADVISORY ONLY — it never gates an edit.
|
|
18
|
+
*
|
|
19
|
+
* Append-only, one line per settled episode in
|
|
20
|
+
* `<repoRoot>/.synergyspec-selfevolving/self-evolution/policy/prediction-reconcile.ndjson`,
|
|
21
|
+
* newest last. Settlement is idempotent per episode (a crash-resume that
|
|
22
|
+
* re-enters never double-appends); reads skip malformed lines best-effort,
|
|
23
|
+
* matching the repo's other ndjson readers. Every public call is defensive —
|
|
24
|
+
* missing arms / no prior prediction / unreadable objective ⇒ a clean
|
|
25
|
+
* `null`/`[]`, never a throw that could fail the episode.
|
|
26
|
+
*/
|
|
27
|
+
import { promises as fs } from 'node:fs';
|
|
28
|
+
import * as path from 'node:path';
|
|
29
|
+
import { resolvePolicyLayout, readPolicyLedger } from './policy-store.js';
|
|
30
|
+
import { appendFileDurable } from './fs-safe.js';
|
|
31
|
+
import { episodeDir, readEpisode } from '../episode-store.js';
|
|
32
|
+
export const POLICY_PREDICTION_RECONCILE_FILE = 'prediction-reconcile.ndjson';
|
|
33
|
+
function reconcilePath(repoRoot) {
|
|
34
|
+
return path.join(resolvePolicyLayout(path.resolve(repoRoot)).baseDir, POLICY_PREDICTION_RECONCILE_FILE);
|
|
35
|
+
}
|
|
36
|
+
function isFiniteNumberOrNull(value) {
|
|
37
|
+
return value === null || (typeof value === 'number' && Number.isFinite(value));
|
|
38
|
+
}
|
|
39
|
+
const RECONCILE_METRICS = new Set(['loss', 'passRate', 'healthPenalty']);
|
|
40
|
+
function isValidReconcileEntry(value) {
|
|
41
|
+
if (!value || typeof value !== 'object')
|
|
42
|
+
return false;
|
|
43
|
+
const e = value;
|
|
44
|
+
if (e.schemaVersion !== 1)
|
|
45
|
+
return false;
|
|
46
|
+
if (typeof e.at !== 'string' || e.at.length === 0)
|
|
47
|
+
return false;
|
|
48
|
+
if (typeof e.episodeId !== 'string' || e.episodeId.length === 0)
|
|
49
|
+
return false;
|
|
50
|
+
if (typeof e.targetId !== 'string' || e.targetId.length === 0)
|
|
51
|
+
return false;
|
|
52
|
+
if (typeof e.predictedAtVersion !== 'number' || !Number.isInteger(e.predictedAtVersion)) {
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
if (typeof e.metric !== 'string' || !RECONCILE_METRICS.has(e.metric))
|
|
56
|
+
return false;
|
|
57
|
+
if (e.direction !== 'down' && e.direction !== 'up')
|
|
58
|
+
return false;
|
|
59
|
+
if (!isFiniteNumberOrNull(e.observedDelta))
|
|
60
|
+
return false;
|
|
61
|
+
if (e.hit !== true && e.hit !== false && e.hit !== null)
|
|
62
|
+
return false;
|
|
63
|
+
return true;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Read one target's settled predictions, newest LAST. `[]` when the ledger does
|
|
67
|
+
* not exist; malformed lines skipped best-effort.
|
|
68
|
+
*/
|
|
69
|
+
export async function readPredictionReconcile(repoRoot, targetId) {
|
|
70
|
+
let raw;
|
|
71
|
+
try {
|
|
72
|
+
raw = await fs.readFile(reconcilePath(repoRoot), 'utf8');
|
|
73
|
+
}
|
|
74
|
+
catch (err) {
|
|
75
|
+
if (err.code === 'ENOENT')
|
|
76
|
+
return [];
|
|
77
|
+
throw err;
|
|
78
|
+
}
|
|
79
|
+
const out = [];
|
|
80
|
+
for (const line of raw.split(/\r?\n/)) {
|
|
81
|
+
const trimmed = line.trim();
|
|
82
|
+
if (trimmed.length === 0)
|
|
83
|
+
continue;
|
|
84
|
+
let parsed;
|
|
85
|
+
try {
|
|
86
|
+
parsed = JSON.parse(trimmed);
|
|
87
|
+
}
|
|
88
|
+
catch {
|
|
89
|
+
continue;
|
|
90
|
+
}
|
|
91
|
+
if (isValidReconcileEntry(parsed) && parsed.targetId === targetId)
|
|
92
|
+
out.push(parsed);
|
|
93
|
+
}
|
|
94
|
+
return out;
|
|
95
|
+
}
|
|
96
|
+
/** Defensively read one arm's `objective.json` numeric metric value, or `null`. */
|
|
97
|
+
async function readArmMetric(repoRoot, episodeId, arm, metric) {
|
|
98
|
+
let raw;
|
|
99
|
+
try {
|
|
100
|
+
raw = await fs.readFile(path.join(episodeDir(repoRoot, episodeId), arm, 'objective.json'), 'utf8');
|
|
101
|
+
}
|
|
102
|
+
catch {
|
|
103
|
+
return null;
|
|
104
|
+
}
|
|
105
|
+
let obj;
|
|
106
|
+
try {
|
|
107
|
+
obj = JSON.parse(raw);
|
|
108
|
+
}
|
|
109
|
+
catch {
|
|
110
|
+
return null;
|
|
111
|
+
}
|
|
112
|
+
const v = obj[metric];
|
|
113
|
+
return typeof v === 'number' && Number.isFinite(v) ? v : null;
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Settle the prediction attached to the version the 主智能体 MAIN AGENT ran this
|
|
117
|
+
* episode against the two arms' measured metric delta, and append a residual.
|
|
118
|
+
*
|
|
119
|
+
* Returns the appended entry, or `null` when there is nothing to settle (the
|
|
120
|
+
* head version carries no prediction — e.g. v0 init or a rollback content
|
|
121
|
+
* version), the lineage is unknown, or this episode is already settled
|
|
122
|
+
* (idempotent). Best-effort: a missing arm objective yields an `unsettled`
|
|
123
|
+
* (`hit: null`) entry rather than a throw.
|
|
124
|
+
*/
|
|
125
|
+
export async function reconcilePrediction(opts) {
|
|
126
|
+
const repoRoot = path.resolve(opts.repoRoot);
|
|
127
|
+
const { targetId, episodeId } = opts;
|
|
128
|
+
const ep = await readEpisode(repoRoot, episodeId);
|
|
129
|
+
const mainVersion = ep.policyVersionMain;
|
|
130
|
+
if (mainVersion === null)
|
|
131
|
+
return null;
|
|
132
|
+
// The prediction to settle is the one on the 'evolve' entry that produced the
|
|
133
|
+
// version the main arm ran. 'refused' entries reuse the head version and
|
|
134
|
+
// 'rollback' entries carry no prediction — so match action+version+prediction.
|
|
135
|
+
const ledger = await readPolicyLedger(repoRoot, targetId);
|
|
136
|
+
const evolveEntry = ledger.find((e) => e.action === 'evolve' && e.version === mainVersion && e.prediction !== undefined);
|
|
137
|
+
const prediction = evolveEntry?.prediction;
|
|
138
|
+
if (!prediction)
|
|
139
|
+
return null;
|
|
140
|
+
// Idempotent: never settle the same episode twice (crash-resume safe).
|
|
141
|
+
const existing = await readPredictionReconcile(repoRoot, targetId);
|
|
142
|
+
if (existing.some((e) => e.episodeId === episodeId))
|
|
143
|
+
return null;
|
|
144
|
+
const main = await readArmMetric(repoRoot, episodeId, 'main-arm', prediction.metric);
|
|
145
|
+
const baseline = await readArmMetric(repoRoot, episodeId, 'baseline-arm', prediction.metric);
|
|
146
|
+
const observedDelta = main !== null && baseline !== null ? main - baseline : null;
|
|
147
|
+
const hit = observedDelta === null
|
|
148
|
+
? null
|
|
149
|
+
: prediction.direction === 'down'
|
|
150
|
+
? observedDelta < 0
|
|
151
|
+
: observedDelta > 0;
|
|
152
|
+
const entry = {
|
|
153
|
+
schemaVersion: 1,
|
|
154
|
+
at: new Date().toISOString(),
|
|
155
|
+
episodeId,
|
|
156
|
+
targetId,
|
|
157
|
+
predictedAtVersion: mainVersion,
|
|
158
|
+
metric: prediction.metric,
|
|
159
|
+
direction: prediction.direction,
|
|
160
|
+
observedDelta,
|
|
161
|
+
hit,
|
|
162
|
+
};
|
|
163
|
+
const layout = resolvePolicyLayout(repoRoot);
|
|
164
|
+
await fs.mkdir(layout.baseDir, { recursive: true });
|
|
165
|
+
await appendFileDurable(reconcilePath(repoRoot), `${JSON.stringify(entry)}\n`);
|
|
166
|
+
return entry;
|
|
167
|
+
}
|
|
168
|
+
/** Most-recent settled predictions surfaced in the calibration note. */
|
|
169
|
+
const CALIBRATION_NOTE_LIMIT = 5;
|
|
170
|
+
/**
|
|
171
|
+
* Fold the recent settled predictions into a one-line-plus-bullets advisory note
|
|
172
|
+
* for the 演进智能体 EVOLVING AGENT's prompt. Counts only SETTLED entries
|
|
173
|
+
* (`hit !== null`); returns `null` when there is no settled history, so early
|
|
174
|
+
* episodes render no calibration block (byte-identical prompts).
|
|
175
|
+
*/
|
|
176
|
+
export async function summarizeCalibration(repoRoot, targetId, limit = CALIBRATION_NOTE_LIMIT) {
|
|
177
|
+
const settled = (await readPredictionReconcile(repoRoot, targetId)).filter((e) => e.hit !== null);
|
|
178
|
+
if (settled.length === 0)
|
|
179
|
+
return null;
|
|
180
|
+
const recent = settled.slice(-Math.max(1, limit));
|
|
181
|
+
const hits = recent.filter((e) => e.hit === true).length;
|
|
182
|
+
const misses = recent.length - hits;
|
|
183
|
+
const lines = recent.map((e) => {
|
|
184
|
+
const arrow = e.direction === 'down' ? '↓' : '↑';
|
|
185
|
+
const verdict = e.hit ? 'held' : 'refuted';
|
|
186
|
+
const delta = e.observedDelta === null ? '' : ` (Δ=${e.observedDelta.toFixed(3)})`;
|
|
187
|
+
return `- v${e.predictedAtVersion}: predicted ${e.metric} ${arrow} — ${verdict}${delta}`;
|
|
188
|
+
});
|
|
189
|
+
return [`Last ${recent.length} settled prediction(s): ${hits} held, ${misses} refuted.`, ...lines].join('\n');
|
|
190
|
+
}
|
|
191
|
+
//# sourceMappingURL=prediction-reconcile.js.map
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Why the attempted evolve step was rejected:
|
|
3
|
+
* - `bad-advantage` — the 奖励智能体 REWARD AGENT measured a non-positive
|
|
4
|
+
* advantage (主臂 did not beat the 基线臂).
|
|
5
|
+
* - `human-reject` — a human said no.
|
|
6
|
+
* - `tamper-suspected` — the 防作弊 tamper detector flagged the main arm as
|
|
7
|
+
* having "passed" by weakening its own tests.
|
|
8
|
+
*/
|
|
9
|
+
export type RejectBufferReason = 'bad-advantage' | 'human-reject' | 'tamper-suspected';
|
|
10
|
+
/** Compact description of the rejected edit (the files themselves rolled back). */
|
|
11
|
+
export interface RejectBufferEditSummary {
|
|
12
|
+
/** Repo-relative POSIX paths the rejected edit touched. */
|
|
13
|
+
files: string[];
|
|
14
|
+
linesAdded: number;
|
|
15
|
+
linesRemoved: number;
|
|
16
|
+
/** Short excerpt of the edit's rationale, for the next episode's context. */
|
|
17
|
+
rationaleExcerpt: string;
|
|
18
|
+
}
|
|
19
|
+
/** One line of the 否决缓冲 (`reject-buffer.ndjson`). */
|
|
20
|
+
export interface RejectBufferEntry {
|
|
21
|
+
schemaVersion: 1;
|
|
22
|
+
/** ISO-8601 UTC timestamp the rejection was recorded. */
|
|
23
|
+
at: string;
|
|
24
|
+
episodeId: string;
|
|
25
|
+
targetId: string;
|
|
26
|
+
/** The lineage version the rejected edit was based on. */
|
|
27
|
+
fromVersion: number;
|
|
28
|
+
/** The (now rolled-back) version the rejected edit had advanced to. */
|
|
29
|
+
toVersion: number;
|
|
30
|
+
/** advantage = reward(主臂) − reward(基线臂); null when not measured. */
|
|
31
|
+
advantage: number | null;
|
|
32
|
+
rewardMain: number | null;
|
|
33
|
+
rewardBaseline: number | null;
|
|
34
|
+
/** The 文本梯度 textual gradient the rejected edit was acting on. */
|
|
35
|
+
textualGradientTried: string;
|
|
36
|
+
editSummary: RejectBufferEditSummary;
|
|
37
|
+
reason: RejectBufferReason;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Append one rejection to the 否决缓冲. Fail-closed: an invalid entry throws
|
|
41
|
+
* and nothing is written. Returns the buffer's path.
|
|
42
|
+
*/
|
|
43
|
+
export declare function appendRejectBufferEntry(repoRoot: string, entry: RejectBufferEntry): Promise<string>;
|
|
44
|
+
/**
|
|
45
|
+
* Read one target's rejections, newest LAST (file/append order). `limit`
|
|
46
|
+
* returns only the most recent N (still newest last). Returns `[]` when the
|
|
47
|
+
* buffer does not exist; malformed lines are skipped best-effort.
|
|
48
|
+
*/
|
|
49
|
+
export declare function readRejectBuffer(repoRoot: string, targetId: string, limit?: number): Promise<RejectBufferEntry[]>;
|
|
50
|
+
/**
|
|
51
|
+
* Read EVERY target's rejections, newest LAST (file/append order). Returns
|
|
52
|
+
* `[]` when the buffer does not exist; malformed lines are skipped best-effort.
|
|
53
|
+
*/
|
|
54
|
+
export declare function readRejectBufferAll(repoRoot: string): Promise<RejectBufferEntry[]>;
|
|
55
|
+
//# sourceMappingURL=reject-buffer.d.ts.map
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 否决缓冲 reject-buffer — the append-only record of evolve attempts the loop
|
|
3
|
+
* said NO to: a bad advantage measured by the 奖励智能体 REWARD AGENT
|
|
4
|
+
* (advantage = reward(主臂) − reward(基线臂) came back non-positive), or a
|
|
5
|
+
* human reject. The 演进智能体 EVOLVING AGENT reads this before
|
|
6
|
+
* optimizer.step so it does not retry a 文本梯度 textual gradient that
|
|
7
|
+
* already lost — the rejected edit's summary stays as evidence instead of
|
|
8
|
+
* being silently discarded with the rolled-back files.
|
|
9
|
+
*
|
|
10
|
+
* One NDJSON line per rejection in
|
|
11
|
+
* `<repoRoot>/.synergyspec-selfevolving/self-evolution/policy/reject-buffer.ndjson`,
|
|
12
|
+
* newest last. Appends are fail-closed (an invalid entry throws and writes
|
|
13
|
+
* nothing); reads skip malformed lines best-effort, matching the repo's other
|
|
14
|
+
* ndjson readers.
|
|
15
|
+
*/
|
|
16
|
+
import { promises as fs } from 'node:fs';
|
|
17
|
+
import * as path from 'node:path';
|
|
18
|
+
import { resolvePolicyLayout } from './policy-store.js';
|
|
19
|
+
import { appendFileDurable } from './fs-safe.js';
|
|
20
|
+
function isFiniteNumberOrNull(value) {
|
|
21
|
+
return value === null || (typeof value === 'number' && Number.isFinite(value));
|
|
22
|
+
}
|
|
23
|
+
function isNonNegativeInteger(value) {
|
|
24
|
+
return typeof value === 'number' && Number.isInteger(value) && value >= 0;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Describe the FIRST shape problem of a would-be entry, or `null` when it is
|
|
28
|
+
* a valid {@link RejectBufferEntry}. Shared by the fail-closed append and the
|
|
29
|
+
* skip-corrupt-lines read.
|
|
30
|
+
*/
|
|
31
|
+
function rejectBufferEntryProblem(value) {
|
|
32
|
+
if (!value || typeof value !== 'object')
|
|
33
|
+
return 'entry must be an object';
|
|
34
|
+
const e = value;
|
|
35
|
+
if (e.schemaVersion !== 1)
|
|
36
|
+
return 'schemaVersion must be 1';
|
|
37
|
+
if (typeof e.at !== 'string' || e.at.length === 0)
|
|
38
|
+
return 'at must be an ISO-8601 string';
|
|
39
|
+
if (typeof e.episodeId !== 'string' || e.episodeId.length === 0) {
|
|
40
|
+
return 'episodeId must be a non-empty string';
|
|
41
|
+
}
|
|
42
|
+
if (typeof e.targetId !== 'string' || e.targetId.length === 0) {
|
|
43
|
+
return 'targetId must be a non-empty string';
|
|
44
|
+
}
|
|
45
|
+
if (!isNonNegativeInteger(e.fromVersion))
|
|
46
|
+
return 'fromVersion must be a non-negative integer';
|
|
47
|
+
if (!isNonNegativeInteger(e.toVersion))
|
|
48
|
+
return 'toVersion must be a non-negative integer';
|
|
49
|
+
if (!isFiniteNumberOrNull(e.advantage))
|
|
50
|
+
return 'advantage must be a finite number or null';
|
|
51
|
+
if (!isFiniteNumberOrNull(e.rewardMain))
|
|
52
|
+
return 'rewardMain must be a finite number or null';
|
|
53
|
+
if (!isFiniteNumberOrNull(e.rewardBaseline)) {
|
|
54
|
+
return 'rewardBaseline must be a finite number or null';
|
|
55
|
+
}
|
|
56
|
+
if (typeof e.textualGradientTried !== 'string') {
|
|
57
|
+
return 'textualGradientTried must be a string (the 文本梯度 textual gradient that was attempted)';
|
|
58
|
+
}
|
|
59
|
+
const summary = e.editSummary;
|
|
60
|
+
if (!summary || typeof summary !== 'object')
|
|
61
|
+
return 'editSummary must be an object';
|
|
62
|
+
if (!Array.isArray(summary.files) || summary.files.some((f) => typeof f !== 'string')) {
|
|
63
|
+
return 'editSummary.files must be string[]';
|
|
64
|
+
}
|
|
65
|
+
if (!isNonNegativeInteger(summary.linesAdded)) {
|
|
66
|
+
return 'editSummary.linesAdded must be a non-negative integer';
|
|
67
|
+
}
|
|
68
|
+
if (!isNonNegativeInteger(summary.linesRemoved)) {
|
|
69
|
+
return 'editSummary.linesRemoved must be a non-negative integer';
|
|
70
|
+
}
|
|
71
|
+
if (typeof summary.rationaleExcerpt !== 'string') {
|
|
72
|
+
return 'editSummary.rationaleExcerpt must be a string';
|
|
73
|
+
}
|
|
74
|
+
if (e.reason !== 'bad-advantage' &&
|
|
75
|
+
e.reason !== 'human-reject' &&
|
|
76
|
+
e.reason !== 'tamper-suspected') {
|
|
77
|
+
return "reason must be 'bad-advantage', 'human-reject', or 'tamper-suspected'";
|
|
78
|
+
}
|
|
79
|
+
return null;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Append one rejection to the 否决缓冲. Fail-closed: an invalid entry throws
|
|
83
|
+
* and nothing is written. Returns the buffer's path.
|
|
84
|
+
*/
|
|
85
|
+
export async function appendRejectBufferEntry(repoRoot, entry) {
|
|
86
|
+
const problem = rejectBufferEntryProblem(entry);
|
|
87
|
+
if (problem !== null) {
|
|
88
|
+
throw new Error(`Invalid 否决缓冲 reject-buffer entry: ${problem}`);
|
|
89
|
+
}
|
|
90
|
+
const layout = resolvePolicyLayout(path.resolve(repoRoot));
|
|
91
|
+
await fs.mkdir(layout.baseDir, { recursive: true });
|
|
92
|
+
// Durable append: fsync the fd so a host crash cannot lose a rollback's
|
|
93
|
+
// reject-buffer record a later separate process (resumeEpisode) relies on.
|
|
94
|
+
await appendFileDurable(layout.rejectBufferPath, `${JSON.stringify(entry)}\n`);
|
|
95
|
+
return layout.rejectBufferPath;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Read one target's rejections, newest LAST (file/append order). `limit`
|
|
99
|
+
* returns only the most recent N (still newest last). Returns `[]` when the
|
|
100
|
+
* buffer does not exist; malformed lines are skipped best-effort.
|
|
101
|
+
*/
|
|
102
|
+
export async function readRejectBuffer(repoRoot, targetId, limit) {
|
|
103
|
+
const layout = resolvePolicyLayout(path.resolve(repoRoot));
|
|
104
|
+
let raw;
|
|
105
|
+
try {
|
|
106
|
+
raw = await fs.readFile(layout.rejectBufferPath, 'utf8');
|
|
107
|
+
}
|
|
108
|
+
catch (err) {
|
|
109
|
+
if (err.code === 'ENOENT')
|
|
110
|
+
return [];
|
|
111
|
+
throw err;
|
|
112
|
+
}
|
|
113
|
+
const entries = [];
|
|
114
|
+
for (const line of raw.split(/\r?\n/)) {
|
|
115
|
+
const trimmed = line.trim();
|
|
116
|
+
if (trimmed.length === 0)
|
|
117
|
+
continue;
|
|
118
|
+
let parsed;
|
|
119
|
+
try {
|
|
120
|
+
parsed = JSON.parse(trimmed);
|
|
121
|
+
}
|
|
122
|
+
catch {
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
if (rejectBufferEntryProblem(parsed) !== null)
|
|
126
|
+
continue;
|
|
127
|
+
const entry = parsed;
|
|
128
|
+
if (entry.targetId === targetId)
|
|
129
|
+
entries.push(entry);
|
|
130
|
+
}
|
|
131
|
+
if (limit === undefined)
|
|
132
|
+
return entries;
|
|
133
|
+
if (limit <= 0)
|
|
134
|
+
return [];
|
|
135
|
+
return entries.slice(-limit);
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Read EVERY target's rejections, newest LAST (file/append order). Returns
|
|
139
|
+
* `[]` when the buffer does not exist; malformed lines are skipped best-effort.
|
|
140
|
+
*/
|
|
141
|
+
export async function readRejectBufferAll(repoRoot) {
|
|
142
|
+
const layout = resolvePolicyLayout(path.resolve(repoRoot));
|
|
143
|
+
let raw;
|
|
144
|
+
try {
|
|
145
|
+
raw = await fs.readFile(layout.rejectBufferPath, 'utf8');
|
|
146
|
+
}
|
|
147
|
+
catch (err) {
|
|
148
|
+
if (err.code === 'ENOENT')
|
|
149
|
+
return [];
|
|
150
|
+
throw err;
|
|
151
|
+
}
|
|
152
|
+
const entries = [];
|
|
153
|
+
for (const line of raw.split(/\r?\n/)) {
|
|
154
|
+
const trimmed = line.trim();
|
|
155
|
+
if (trimmed.length === 0)
|
|
156
|
+
continue;
|
|
157
|
+
let parsed;
|
|
158
|
+
try {
|
|
159
|
+
parsed = JSON.parse(trimmed);
|
|
160
|
+
}
|
|
161
|
+
catch {
|
|
162
|
+
continue;
|
|
163
|
+
}
|
|
164
|
+
if (rejectBufferEntryProblem(parsed) !== null)
|
|
165
|
+
continue;
|
|
166
|
+
entries.push(parsed);
|
|
167
|
+
}
|
|
168
|
+
return entries;
|
|
169
|
+
}
|
|
170
|
+
//# sourceMappingURL=reject-buffer.js.map
|
|
@@ -99,7 +99,7 @@ export interface AutoPromoteDecision {
|
|
|
99
99
|
reason: string;
|
|
100
100
|
}
|
|
101
101
|
/**
|
|
102
|
-
* Pure auto-promote predicate for one-button
|
|
102
|
+
* Pure auto-promote predicate for one-button promote. The static gate +
|
|
103
103
|
* per-target switch are hard prerequisites; the fitness comparison is the
|
|
104
104
|
* regression guard.
|
|
105
105
|
*
|
|
@@ -23,8 +23,8 @@
|
|
|
23
23
|
*/
|
|
24
24
|
import { promises as fs } from 'node:fs';
|
|
25
25
|
import * as path from 'node:path';
|
|
26
|
-
import * as crypto from 'node:crypto';
|
|
27
26
|
import { GATE_DEFINING_FILES } from './candidate-gates.js';
|
|
27
|
+
import { assertWithinRepo, writeFileAtomic } from './policy/fs-safe.js';
|
|
28
28
|
import { resolveTargetLocalFiles } from './local-targets.js';
|
|
29
29
|
import { isCanonicalTargetEvolvable, } from './target-evolution.js';
|
|
30
30
|
import { readCandidatePackage, updateCandidateStatus, } from './candidates.js';
|
|
@@ -88,7 +88,7 @@ export async function applyCandidatePromotion(layout, candidateId, opts) {
|
|
|
88
88
|
assertWithinRepo(repoRoot, abs);
|
|
89
89
|
if (!(await fileExists(abs))) {
|
|
90
90
|
throw new Error(`Refusing to promote ${candidateId}: target file does not exist on disk: ${rel} ` +
|
|
91
|
-
`(
|
|
91
|
+
`(self-evolution only edits existing local files).`);
|
|
92
92
|
}
|
|
93
93
|
resolved.push({ rel, abs, content: edit.content });
|
|
94
94
|
}
|
|
@@ -143,11 +143,11 @@ export async function applyCandidatePromotion(layout, candidateId, opts) {
|
|
|
143
143
|
// Lifecycle bridge: ready-for-eval -> eval-passed -> promoted.
|
|
144
144
|
if (startStatus === 'ready-for-eval') {
|
|
145
145
|
await updateStatus(layout, candidateId, 'eval-passed', {
|
|
146
|
-
rationale: '
|
|
146
|
+
rationale: 'promote: static gate stands in for the full eval suite (MVP)',
|
|
147
147
|
});
|
|
148
148
|
}
|
|
149
149
|
promoted = await updateStatus(layout, candidateId, 'promoted', {
|
|
150
|
-
rationale: `
|
|
150
|
+
rationale: `promote: applied ${appliedFiles.length} file(s) to canonical targets [${candidate.targetIds.join(', ')}]`,
|
|
151
151
|
});
|
|
152
152
|
}
|
|
153
153
|
catch (err) {
|
|
@@ -194,7 +194,7 @@ export async function rollbackCandidatePromotion(layout, candidateId, opts) {
|
|
|
194
194
|
restoredFiles.push(entry.relPath);
|
|
195
195
|
}
|
|
196
196
|
const rolled = await updateCandidateStatus(layout, candidateId, 'rolled-back', {
|
|
197
|
-
rationale: '
|
|
197
|
+
rationale: 'rollback: restored pre-promotion snapshot',
|
|
198
198
|
});
|
|
199
199
|
return { candidateId, status: rolled.status, restoredFiles };
|
|
200
200
|
}
|
|
@@ -205,7 +205,7 @@ export async function rollbackCandidatePromotion(layout, candidateId, opts) {
|
|
|
205
205
|
*/
|
|
206
206
|
export const DEFAULT_HEALTH_REGRESSION_MARGIN = 0.05;
|
|
207
207
|
/**
|
|
208
|
-
* Pure auto-promote predicate for one-button
|
|
208
|
+
* Pure auto-promote predicate for one-button promote. The static gate +
|
|
209
209
|
* per-target switch are hard prerequisites; the fitness comparison is the
|
|
210
210
|
* regression guard.
|
|
211
211
|
*
|
|
@@ -377,31 +377,4 @@ async function fileExists(abs) {
|
|
|
377
377
|
return false;
|
|
378
378
|
}
|
|
379
379
|
}
|
|
380
|
-
/** Throw if `abs` resolves outside `repoRoot`. */
|
|
381
|
-
function assertWithinRepo(repoRoot, abs) {
|
|
382
|
-
const base = path.resolve(repoRoot);
|
|
383
|
-
const target = path.resolve(abs);
|
|
384
|
-
const rel = path.relative(base, target);
|
|
385
|
-
if (rel === '' || rel.startsWith('..') || path.isAbsolute(rel)) {
|
|
386
|
-
throw new Error(`Refusing to write outside repo root. root=${base} target=${target}`);
|
|
387
|
-
}
|
|
388
|
-
}
|
|
389
|
-
/** Atomic write via sibling tmp file + rename; cleans up on rename failure. */
|
|
390
|
-
async function writeFileAtomic(abs, content) {
|
|
391
|
-
const dir = path.dirname(abs);
|
|
392
|
-
const tmp = path.join(dir, `.${path.basename(abs)}.tmp-${crypto.randomBytes(4).toString('hex')}`);
|
|
393
|
-
await fs.writeFile(tmp, content, 'utf8');
|
|
394
|
-
try {
|
|
395
|
-
await fs.rename(tmp, abs);
|
|
396
|
-
}
|
|
397
|
-
catch (err) {
|
|
398
|
-
try {
|
|
399
|
-
await fs.rm(tmp, { force: true });
|
|
400
|
-
}
|
|
401
|
-
catch {
|
|
402
|
-
// ignore
|
|
403
|
-
}
|
|
404
|
-
throw err;
|
|
405
|
-
}
|
|
406
|
-
}
|
|
407
380
|
//# sourceMappingURL=promote.js.map
|
|
@@ -28,8 +28,7 @@ import * as crypto from 'node:crypto';
|
|
|
28
28
|
import { readCandidatePackage, setCandidateArtifactPath, } from './candidates.js';
|
|
29
29
|
import { lookupCanonicalTarget, } from './canonical-targets.js';
|
|
30
30
|
import { candidateEvalReportSchema } from './eval-report.js';
|
|
31
|
-
import { readCandidateFitness, } from './candidate-fitness.js';
|
|
32
|
-
import { readPromotedBaselineLoss } from './ga-selection.js';
|
|
31
|
+
import { readCandidateFitness, readPromotedBaselineLoss, } from './candidate-fitness.js';
|
|
33
32
|
import { DEFAULT_LOSS_REGRESSION_MARGIN } from './promote.js';
|
|
34
33
|
/** Risk tier ordering used when comparing candidate risk to target risk. */
|
|
35
34
|
const RISK_ORDER = {
|