synergyspec-selfevolving 2.1.3 → 2.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/change-readiness.js +53 -6
- package/dist/core/self-evolution/episode-orchestrator.d.ts +9 -7
- package/dist/core/self-evolution/episode-orchestrator.js +67 -40
- package/dist/core/self-evolution/evolving-agent.d.ts +8 -8
- package/dist/core/self-evolution/evolving-agent.js +22 -24
- package/dist/core/templates/workflows/learn.js +1 -1
- package/package.json +4 -3
- package/schemas/spec-driven/templates/design.md +2 -1
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { promises as fs } from 'fs';
|
|
2
2
|
import path from 'path';
|
|
3
3
|
import { formatChangeStatus, loadChangeContext, } from './artifact-graph/index.js';
|
|
4
|
+
import { listEpisodes } from './self-evolution/episode-store.js';
|
|
4
5
|
const TASK_PATTERN = /^[-*]\s+\[([\sx])\]\s*(.*)$/i;
|
|
5
6
|
const REQUIRED_EVIDENCE_FILES = [
|
|
6
7
|
['specTests', 'spec-tests.md'],
|
|
@@ -45,7 +46,7 @@ export async function getChangeReadiness(projectRoot, changeName, schemaName) {
|
|
|
45
46
|
const artifactStatus = deriveArtifactWorkflowStatus(artifactGraph);
|
|
46
47
|
const taskReadiness = await readTaskReadiness(context.changeDir);
|
|
47
48
|
const evidence = await readEvidenceReadiness(context.changeDir);
|
|
48
|
-
const evolution = await readEvolutionOutcome(context.changeDir);
|
|
49
|
+
const evolution = await readEvolutionOutcome(projectRoot, context.changeDir, changeName);
|
|
49
50
|
const status = deriveChangeReadinessStatus(artifactStatus, taskReadiness.total, taskReadiness.completed);
|
|
50
51
|
return {
|
|
51
52
|
changeName,
|
|
@@ -141,18 +142,21 @@ async function readEvidenceReadiness(changeDir) {
|
|
|
141
142
|
};
|
|
142
143
|
}
|
|
143
144
|
/**
|
|
144
|
-
* Read the CLI-written evolution outcome for the change, if any.
|
|
145
|
-
*
|
|
146
|
-
*
|
|
145
|
+
* Read the CLI-written evolution outcome for the change, if any. When the manual
|
|
146
|
+
* evolution-result file is absent, fall back to the durable loop-v2 episode store
|
|
147
|
+
* so a failed `learn --apply` / self-evolution episode is not mislabeled
|
|
148
|
+
* `not-run`. Defensive: parse errors / unknown outcomes degrade to `'not-run'`
|
|
149
|
+
* (forward compatible and never throws), so `status` can always render an
|
|
150
|
+
* Evolution line.
|
|
147
151
|
*/
|
|
148
|
-
async function readEvolutionOutcome(changeDir) {
|
|
152
|
+
async function readEvolutionOutcome(projectRoot, changeDir, changeName) {
|
|
149
153
|
const notRun = { status: 'not-run', promoted: false, promotedFiles: [] };
|
|
150
154
|
let raw;
|
|
151
155
|
try {
|
|
152
156
|
raw = await fs.readFile(path.join(changeDir, 'evolution-result.json'), 'utf-8');
|
|
153
157
|
}
|
|
154
158
|
catch {
|
|
155
|
-
return notRun;
|
|
159
|
+
return (await readLatestEpisodeOutcome(projectRoot, changeDir, changeName)) ?? notRun;
|
|
156
160
|
}
|
|
157
161
|
try {
|
|
158
162
|
const record = JSON.parse(raw);
|
|
@@ -189,6 +193,49 @@ async function readEvolutionOutcome(changeDir) {
|
|
|
189
193
|
return notRun;
|
|
190
194
|
}
|
|
191
195
|
}
|
|
196
|
+
async function readLatestEpisodeOutcome(projectRoot, changeDir, changeName) {
|
|
197
|
+
let episodes;
|
|
198
|
+
try {
|
|
199
|
+
episodes = await listEpisodes(projectRoot);
|
|
200
|
+
}
|
|
201
|
+
catch {
|
|
202
|
+
return null;
|
|
203
|
+
}
|
|
204
|
+
const resolvedChangeDir = path.resolve(changeDir);
|
|
205
|
+
const episode = episodes.find((ep) => ep.changeName === changeName || path.resolve(ep.changeDirPath) === resolvedChangeDir);
|
|
206
|
+
if (!episode)
|
|
207
|
+
return null;
|
|
208
|
+
if (episode.stage === 'errored') {
|
|
209
|
+
return {
|
|
210
|
+
status: 'error',
|
|
211
|
+
reason: episode.terminalError,
|
|
212
|
+
targetId: episode.targetId,
|
|
213
|
+
promoted: false,
|
|
214
|
+
promotedFiles: [],
|
|
215
|
+
timestamp: episode.updatedAt,
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
if (episode.stage === 'evolution-refused') {
|
|
219
|
+
return {
|
|
220
|
+
status: 'refused',
|
|
221
|
+
reason: 'evolution refused',
|
|
222
|
+
targetId: episode.targetId,
|
|
223
|
+
promoted: false,
|
|
224
|
+
promotedFiles: [],
|
|
225
|
+
timestamp: episode.updatedAt,
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
if (episode.stage === 'evolved') {
|
|
229
|
+
return {
|
|
230
|
+
status: 'promoted',
|
|
231
|
+
targetId: episode.targetId,
|
|
232
|
+
promoted: true,
|
|
233
|
+
promotedFiles: [],
|
|
234
|
+
timestamp: episode.updatedAt,
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
return null;
|
|
238
|
+
}
|
|
192
239
|
async function testReportRequiresPlan(testReportPath) {
|
|
193
240
|
try {
|
|
194
241
|
const content = await fs.readFile(testReportPath, 'utf-8');
|
|
@@ -28,10 +28,12 @@
|
|
|
28
28
|
* reject-buffer entry — BOTH durably on
|
|
29
29
|
* disk — THEN advance 'rolled-back'.
|
|
30
30
|
* - otherwise → advance 'kept'.
|
|
31
|
-
* g. 演进智能体 EVOLVING AGENT — ONLY after (f) persisted:
|
|
32
|
-
* (optimizer.step)
|
|
33
|
-
*
|
|
34
|
-
*
|
|
31
|
+
* g. 演进智能体 EVOLVING AGENT — ONLY after (f) persisted: require the
|
|
32
|
+
* (optimizer.step) main-arm observed-GREEN evidence, then
|
|
33
|
+
* advance the 'evolving' marker
|
|
34
|
+
* (heartbeat) so a concurrent sibling
|
|
35
|
+
* sees a live holder, then
|
|
36
|
+
* runEvolvingAgent reads the
|
|
35
37
|
* reject-buffer FRESH from disk (so THIS
|
|
36
38
|
* episode's just-written entry is in its
|
|
37
39
|
* prompt) and either not-spawned /
|
|
@@ -39,9 +41,9 @@
|
|
|
39
41
|
* h. advance 'closed' + releaseInFlight — ALWAYS, even on error.
|
|
40
42
|
*
|
|
41
43
|
* ORDERING GUARANTEE: the rollback + reject-buffer write are SEQUENTIAL awaits
|
|
42
|
-
* that BOTH complete (and the stage reads 'rolled-back'/'kept') before
|
|
43
|
-
* {@link runEvolvingAgent}
|
|
44
|
-
* and never share a Promise.all.
|
|
44
|
+
* that BOTH complete (and the stage reads 'rolled-back'/'kept') before the
|
|
45
|
+
* observed-GREEN preflight and {@link runEvolvingAgent}. (f) and (g) are never
|
|
46
|
+
* parallelized and never share a Promise.all.
|
|
45
47
|
*
|
|
46
48
|
* This module orchestrates; it never spawns an agent itself — the three agents
|
|
47
49
|
* own their own {@link runHeadlessAgent} spawns (the `spawn` seam threads to all
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { promises as fs } from 'node:fs';
|
|
2
|
+
import * as path from 'node:path';
|
|
1
3
|
import { toActionSkeleton } from '../trajectory/skeleton.js';
|
|
2
4
|
import { getTrajectoryForChange } from '../trajectory/registry.js';
|
|
3
5
|
import { acquireInFlight, releaseInFlight, currentPolicyVersion, readPolicyLedger, initPolicyLineage, rollbackPolicyVersion, } from './policy/policy-store.js';
|
|
@@ -6,7 +8,7 @@ import { createEpisode, advanceEpisodeStage, writeArmCapture, readEpisode, episo
|
|
|
6
8
|
import { shouldRunCriticAgent, runCriticAgent, } from './critic-agent.js';
|
|
7
9
|
import { runRewardAgentEnsemble } from './reward-aggregator.js';
|
|
8
10
|
import { detectTestTamper } from './tamper-check.js';
|
|
9
|
-
import { runEvolvingAgent, DEFAULT_EVOLVING_AGENT_EDIT_BUDGET, MIN_EVOLVING_AGENT_EDIT_BUDGET, } from './evolving-agent.js';
|
|
11
|
+
import { runEvolvingAgent, DEFAULT_EVOLVING_AGENT_EDIT_BUDGET, MIN_EVOLVING_AGENT_EDIT_BUDGET, isArmObjectiveGreen, } from './evolving-agent.js';
|
|
10
12
|
import { reconcilePrediction, summarizeCalibration, } from './policy/prediction-reconcile.js';
|
|
11
13
|
/**
|
|
12
14
|
* Build the 主智能体 MAIN AGENT arm `{transcript?, skeleton?, objective}` from an
|
|
@@ -183,13 +185,65 @@ function deriveEpisodeId(changeName, now) {
|
|
|
183
185
|
/**
|
|
184
186
|
* Build the `terminalError` note for a thrown step. A timeout reads identically
|
|
185
187
|
* to a hard crash on disk otherwise, so a message that names a host-agent timeout
|
|
186
|
-
* (
|
|
187
|
-
*
|
|
188
|
-
*
|
|
188
|
+
* (absolute wall: `headless agent timed out after Nms`; idle wall: `idle timeout`)
|
|
189
|
+
* is PREFIXED with a `timeout:` marker. A timed-out episode is then
|
|
190
|
+
* distinguishable from a genuine crash in episode.json. Pure.
|
|
189
191
|
*/
|
|
190
192
|
function terminalErrorLabel(err) {
|
|
191
193
|
const msg = err instanceof Error ? err.message : String(err);
|
|
192
|
-
return
|
|
194
|
+
return /\b(timed out|idle timeout)\b/i.test(msg) ? `timeout: ${msg}` : msg;
|
|
195
|
+
}
|
|
196
|
+
function observedGreenFailureReason(objective) {
|
|
197
|
+
if (!objective) {
|
|
198
|
+
return 'observed-GREEN gate: main-arm/objective.json is missing or unreadable - cannot confirm a verified green run';
|
|
199
|
+
}
|
|
200
|
+
const evidence = isArmObjectiveGreen(objective);
|
|
201
|
+
return evidence.ok ? null : `observed-GREEN gate failed: ${evidence.reason}`;
|
|
202
|
+
}
|
|
203
|
+
async function readMainArmObjectiveForEpisode(repoRoot, episodeId) {
|
|
204
|
+
const file = path.join(episodeDir(repoRoot, episodeId), 'main-arm', 'objective.json');
|
|
205
|
+
let raw;
|
|
206
|
+
try {
|
|
207
|
+
raw = await fs.readFile(file, 'utf8');
|
|
208
|
+
}
|
|
209
|
+
catch (err) {
|
|
210
|
+
if (err.code === 'ENOENT')
|
|
211
|
+
return null;
|
|
212
|
+
throw err;
|
|
213
|
+
}
|
|
214
|
+
try {
|
|
215
|
+
return JSON.parse(raw);
|
|
216
|
+
}
|
|
217
|
+
catch {
|
|
218
|
+
return null;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
async function runEvolvingStepIfObservedGreen(opts) {
|
|
222
|
+
const objective = Object.prototype.hasOwnProperty.call(opts, 'objective')
|
|
223
|
+
? (opts.objective ?? null)
|
|
224
|
+
: await readMainArmObjectiveForEpisode(opts.repoRoot, opts.episodeId);
|
|
225
|
+
const gateFailure = observedGreenFailureReason(objective);
|
|
226
|
+
if (gateFailure) {
|
|
227
|
+
return { kind: 'not-spawned', reason: gateFailure };
|
|
228
|
+
}
|
|
229
|
+
if (opts.markEvolving) {
|
|
230
|
+
await advanceEpisodeStage({
|
|
231
|
+
repoRoot: opts.repoRoot,
|
|
232
|
+
episodeId: opts.episodeId,
|
|
233
|
+
stage: 'evolving',
|
|
234
|
+
patch: { evolvingHeartbeatAt: new Date().toISOString() },
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
return await runEvolvingAgent({
|
|
238
|
+
repoRoot: opts.repoRoot,
|
|
239
|
+
episodeId: opts.episodeId,
|
|
240
|
+
targetId: opts.targetId,
|
|
241
|
+
editBudget: opts.editBudget,
|
|
242
|
+
...(opts.calibrationNote ? { calibrationNote: opts.calibrationNote } : {}),
|
|
243
|
+
spawn: opts.spawn,
|
|
244
|
+
...(opts.agentTimeoutMs !== undefined ? { timeoutMs: opts.agentTimeoutMs } : {}),
|
|
245
|
+
...(opts.harness ? { harness: opts.harness } : {}),
|
|
246
|
+
});
|
|
193
247
|
}
|
|
194
248
|
/**
|
|
195
249
|
* Run ONE episode through the loop in the strict, durably-persisted order
|
|
@@ -437,16 +491,7 @@ async function runEpisodeAfterCreate(opts) {
|
|
|
437
491
|
? scheduledEditBudget(await readPolicyLedger(repoRoot, targetId), editBudget)
|
|
438
492
|
: editBudget;
|
|
439
493
|
const calibrationNote = await summarizeCalibration(repoRoot, targetId);
|
|
440
|
-
|
|
441
|
-
// sibling reading the store sees a LIVE-but-slow holder, not a stale lock at
|
|
442
|
-
// 'kept'/'rolled-back'. runEvolvingAgent advances the terminal outcome.
|
|
443
|
-
await advanceEpisodeStage({
|
|
444
|
-
repoRoot,
|
|
445
|
-
episodeId,
|
|
446
|
-
stage: 'evolving',
|
|
447
|
-
patch: { evolvingHeartbeatAt: new Date().toISOString() },
|
|
448
|
-
});
|
|
449
|
-
evolution = await runEvolvingAgent({
|
|
494
|
+
evolution = await runEvolvingStepIfObservedGreen({
|
|
450
495
|
repoRoot,
|
|
451
496
|
episodeId,
|
|
452
497
|
targetId,
|
|
@@ -455,6 +500,8 @@ async function runEpisodeAfterCreate(opts) {
|
|
|
455
500
|
spawn: opts.spawn,
|
|
456
501
|
...(opts.agentTimeoutMs !== undefined ? { timeoutMs: opts.agentTimeoutMs } : {}),
|
|
457
502
|
...(opts.harness ? { harness: opts.harness } : {}),
|
|
503
|
+
markEvolving: true,
|
|
504
|
+
objective: opts.mainArm.objective,
|
|
458
505
|
});
|
|
459
506
|
}
|
|
460
507
|
}
|
|
@@ -762,16 +809,7 @@ export async function resumeEpisode(opts) {
|
|
|
762
809
|
? scheduledEditBudget(await readPolicyLedger(repoRoot, targetId), editBudget)
|
|
763
810
|
: editBudget;
|
|
764
811
|
const calibrationNote = await summarizeCalibration(repoRoot, targetId);
|
|
765
|
-
|
|
766
|
-
// runEpisode's (g). Idempotent across a crash-resume: 'evolving' is reached
|
|
767
|
-
// from both 'rolled-back' and 'kept'.
|
|
768
|
-
await advanceEpisodeStage({
|
|
769
|
-
repoRoot,
|
|
770
|
-
episodeId,
|
|
771
|
-
stage: 'evolving',
|
|
772
|
-
patch: { evolvingHeartbeatAt: new Date().toISOString() },
|
|
773
|
-
});
|
|
774
|
-
evolution = await runEvolvingAgent({
|
|
812
|
+
evolution = await runEvolvingStepIfObservedGreen({
|
|
775
813
|
repoRoot,
|
|
776
814
|
episodeId,
|
|
777
815
|
targetId,
|
|
@@ -780,6 +818,7 @@ export async function resumeEpisode(opts) {
|
|
|
780
818
|
spawn: opts.spawn,
|
|
781
819
|
...(opts.agentTimeoutMs !== undefined ? { timeoutMs: opts.agentTimeoutMs } : {}),
|
|
782
820
|
...(opts.harness ? { harness: opts.harness } : {}),
|
|
821
|
+
markEvolving: true,
|
|
783
822
|
});
|
|
784
823
|
}
|
|
785
824
|
await closeEpisodeBestEffort(repoRoot, episodeId);
|
|
@@ -802,18 +841,7 @@ export async function resumeEpisode(opts) {
|
|
|
802
841
|
? scheduledEditBudget(await readPolicyLedger(repoRoot, targetId), editBudget)
|
|
803
842
|
: editBudget;
|
|
804
843
|
const calibrationNote = await summarizeCalibration(repoRoot, targetId);
|
|
805
|
-
|
|
806
|
-
// decision stage. When already at 'evolving' (a crash mid-spawn re-drive),
|
|
807
|
-
// the marker is present — skip the (now illegal) self-transition.
|
|
808
|
-
if (stage !== 'evolving') {
|
|
809
|
-
await advanceEpisodeStage({
|
|
810
|
-
repoRoot,
|
|
811
|
-
episodeId,
|
|
812
|
-
stage: 'evolving',
|
|
813
|
-
patch: { evolvingHeartbeatAt: new Date().toISOString() },
|
|
814
|
-
});
|
|
815
|
-
}
|
|
816
|
-
evolution = await runEvolvingAgent({
|
|
844
|
+
evolution = await runEvolvingStepIfObservedGreen({
|
|
817
845
|
repoRoot,
|
|
818
846
|
episodeId,
|
|
819
847
|
targetId,
|
|
@@ -822,6 +850,7 @@ export async function resumeEpisode(opts) {
|
|
|
822
850
|
spawn: opts.spawn,
|
|
823
851
|
...(opts.agentTimeoutMs !== undefined ? { timeoutMs: opts.agentTimeoutMs } : {}),
|
|
824
852
|
...(opts.harness ? { harness: opts.harness } : {}),
|
|
853
|
+
markEvolving: stage !== 'evolving',
|
|
825
854
|
});
|
|
826
855
|
await closeEpisodeBestEffort(repoRoot, episodeId);
|
|
827
856
|
}
|
|
@@ -854,9 +883,7 @@ export async function resumeEpisode(opts) {
|
|
|
854
883
|
* AGENT's reader uses).
|
|
855
884
|
*/
|
|
856
885
|
async function readDiagnosisForResume(repoRoot, episodeId) {
|
|
857
|
-
const
|
|
858
|
-
const pathMod = await import('node:path');
|
|
859
|
-
const file = pathMod.join(episodeDir(repoRoot, episodeId), 'diagnosis.json');
|
|
886
|
+
const file = path.join(episodeDir(repoRoot, episodeId), 'diagnosis.json');
|
|
860
887
|
let raw;
|
|
861
888
|
try {
|
|
862
889
|
raw = await fs.readFile(file, 'utf8');
|
|
@@ -243,14 +243,14 @@ export declare function isArmObjectiveGreen(objective: ArmObjectiveGreenInput):
|
|
|
243
243
|
* Run the 演进智能体 EVOLVING AGENT against an already-scored episode.
|
|
244
244
|
*
|
|
245
245
|
* Flow:
|
|
246
|
-
* 0. Code-side refuse-to-spawn: diagnosis.abstained
|
|
247
|
-
*
|
|
248
|
-
*
|
|
249
|
-
*
|
|
250
|
-
* 2.
|
|
251
|
-
*
|
|
252
|
-
*
|
|
253
|
-
* 4. All green
|
|
246
|
+
* 0. Code-side refuse-to-spawn: diagnosis.abstained, no gaps, or no editable
|
|
247
|
+
* target files => not-spawned.
|
|
248
|
+
* 1. Pre-spawn gate: observed-GREEN must already be verified, because the edit
|
|
249
|
+
* cannot change the main-arm evidence and retrying it is a category error.
|
|
250
|
+
* 2. Assemble + spawn (fresh context) and parse with repair xN; over-budget,
|
|
251
|
+
* static, and scope-within-diagnosis violations are repairable.
|
|
252
|
+
* 3. Model refusal => {kind:'refused'} + a 'refused' ledger entry.
|
|
253
|
+
* 4. All green => advancePolicyVersion writes the next version; episode stage
|
|
254
254
|
* advances to 'evolved' (or 'evolution-refused' on refusal).
|
|
255
255
|
*/
|
|
256
256
|
export declare function runEvolvingAgent(opts: RunEvolvingAgentOptions): Promise<RunEvolvingAgentResult>;
|
|
@@ -388,14 +388,14 @@ function gateFeedback(reason) {
|
|
|
388
388
|
* Run the 演进智能体 EVOLVING AGENT against an already-scored episode.
|
|
389
389
|
*
|
|
390
390
|
* Flow:
|
|
391
|
-
* 0. Code-side refuse-to-spawn: diagnosis.abstained
|
|
392
|
-
*
|
|
393
|
-
*
|
|
394
|
-
*
|
|
395
|
-
* 2.
|
|
396
|
-
*
|
|
397
|
-
*
|
|
398
|
-
* 4. All green
|
|
391
|
+
* 0. Code-side refuse-to-spawn: diagnosis.abstained, no gaps, or no editable
|
|
392
|
+
* target files => not-spawned.
|
|
393
|
+
* 1. Pre-spawn gate: observed-GREEN must already be verified, because the edit
|
|
394
|
+
* cannot change the main-arm evidence and retrying it is a category error.
|
|
395
|
+
* 2. Assemble + spawn (fresh context) and parse with repair xN; over-budget,
|
|
396
|
+
* static, and scope-within-diagnosis violations are repairable.
|
|
397
|
+
* 3. Model refusal => {kind:'refused'} + a 'refused' ledger entry.
|
|
398
|
+
* 4. All green => advancePolicyVersion writes the next version; episode stage
|
|
399
399
|
* advances to 'evolved' (or 'evolution-refused' on refusal).
|
|
400
400
|
*/
|
|
401
401
|
export async function runEvolvingAgent(opts) {
|
|
@@ -435,6 +435,18 @@ export async function runEvolvingAgent(opts) {
|
|
|
435
435
|
};
|
|
436
436
|
}
|
|
437
437
|
const allowedFiles = currentFiles.map((f) => f.relPath);
|
|
438
|
+
// -- 1. PRE-SPAWN GATE: observed-GREEN ------------------------------------
|
|
439
|
+
// This reads the PRE-edit MAIN arm's objective.json. A child edit cannot make
|
|
440
|
+
// red or unverified source evidence green, so fail before burning a headless
|
|
441
|
+
// evolving-agent invocation.
|
|
442
|
+
const objective = await readMainArmObjective(repoRoot, episodeId);
|
|
443
|
+
if (!objective) {
|
|
444
|
+
throw new EvolvingAgentOutputInvalid('observed-GREEN gate: main-arm/objective.json is missing or unreadable - cannot confirm a verified green run');
|
|
445
|
+
}
|
|
446
|
+
const evidence = isArmObjectiveGreen(objective);
|
|
447
|
+
if (!evidence.ok) {
|
|
448
|
+
throw new EvolvingAgentOutputInvalid(`observed-GREEN gate failed: ${evidence.reason}`);
|
|
449
|
+
}
|
|
438
450
|
const rejectBuffer = await readRejectBuffer(repoRoot, targetId, REJECT_BUFFER_PROMPT_LIMIT);
|
|
439
451
|
// 成功保护 DO-NOT-PRUNE: read protections + exemplars FRESH from disk (mirroring
|
|
440
452
|
// the reject-buffer read just above), so the green-run-mined load-bearing
|
|
@@ -453,7 +465,7 @@ export async function runEvolvingAgent(opts) {
|
|
|
453
465
|
doNotPrune,
|
|
454
466
|
...(opts.calibrationNote ? { calibrationNote: opts.calibrationNote } : {}),
|
|
455
467
|
});
|
|
456
|
-
//
|
|
468
|
+
// -- 2. Spawn + parse with bounded repair (parse / budget / scope) ----------
|
|
457
469
|
let feedback = null;
|
|
458
470
|
let parsed = null;
|
|
459
471
|
let scopeResult = null;
|
|
@@ -548,21 +560,7 @@ export async function runEvolvingAgent(opts) {
|
|
|
548
560
|
// Unreachable on the accept path; fail closed rather than evolve out of scope.
|
|
549
561
|
throw new EvolvingAgentOutputInvalid('范围⊆诊断 scope gate did not pass');
|
|
550
562
|
}
|
|
551
|
-
//
|
|
552
|
-
// static / 范围⊆诊断 / budget / valid-prediction were all enforced inside the
|
|
553
|
-
// bounded repair loop above (a content-driven failure was repairable there).
|
|
554
|
-
// observed-GREEN runs ONCE here and is NOT repairable: it reads the PRE-edit
|
|
555
|
-
// MAIN arm's objective.json, which the edit cannot influence — re-prompting it
|
|
556
|
-
// would be a category error.
|
|
557
|
-
const objective = await readMainArmObjective(repoRoot, episodeId);
|
|
558
|
-
if (!objective) {
|
|
559
|
-
throw new EvolvingAgentOutputInvalid('observed-GREEN gate: main-arm/objective.json is missing or unreadable — cannot confirm a verified green run');
|
|
560
|
-
}
|
|
561
|
-
const evidence = isArmObjectiveGreen(objective);
|
|
562
|
-
if (!evidence.ok) {
|
|
563
|
-
throw new EvolvingAgentOutputInvalid(`observed-GREEN gate failed: ${evidence.reason}`);
|
|
564
|
-
}
|
|
565
|
-
// ── 4. Write back the next policy version. NO candidate dir / sidecar / verdict. ─
|
|
563
|
+
// -- 4. Write back the next policy version. NO candidate dir / sidecar / verdict.
|
|
566
564
|
const ledgerEntry = await advancePolicyVersion({
|
|
567
565
|
repoRoot,
|
|
568
566
|
targetId,
|
|
@@ -25,7 +25,7 @@ This is the review-and-learn step after \`/synspec:apply\` and \`/synspec:verify
|
|
|
25
25
|
|
|
26
26
|
3. **Spawn the runner**
|
|
27
27
|
|
|
28
|
-
Use Task
|
|
28
|
+
Use the host's available general-purpose Task/subagent runner (for example \`general-purpose\` on Claude or \`general\` on hosts that expose that type), prompt: "Use Skill tool to invoke synergyspec-selfevolving-self-evolving for change '<name>'. Project root: <root>. Harness: <harness>. Mode: apply. Session-id: <id>. Transcript: <path>. Trigger the loop-v2 self-evolution episode autonomously, do not ask the user questions, and end with the '## Episode Verdict' block."
|
|
29
29
|
|
|
30
30
|
Include the \`Session-id: <id>.\` / \`Transcript: <path>.\` segment only when the session handle from step 2 is known — omit it entirely when unknown.
|
|
31
31
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "synergyspec-selfevolving",
|
|
3
|
-
"version": "2.1.
|
|
3
|
+
"version": "2.1.4",
|
|
4
4
|
"description": "AI-native system for spec-driven development",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"synergyspec-selfevolving",
|
|
@@ -46,6 +46,7 @@
|
|
|
46
46
|
"scripts": {
|
|
47
47
|
"lint": "eslint src/",
|
|
48
48
|
"build": "node build.js",
|
|
49
|
+
"build:clean": "node --input-type=module -e \"process.env.CLEAN_DIST='1'; await import('./build.js')\"",
|
|
49
50
|
"dev": "tsc --watch",
|
|
50
51
|
"dev:cli": "pnpm build && node bin/synergyspec-selfevolving.js",
|
|
51
52
|
"test": "vitest run",
|
|
@@ -54,8 +55,8 @@
|
|
|
54
55
|
"test:coverage": "vitest run --coverage",
|
|
55
56
|
"test:postinstall": "node scripts/postinstall.js",
|
|
56
57
|
"test:e2e:real-agent": "node smoking-test/run-real-agent-self-evolution-e2e.mjs --local-pack",
|
|
57
|
-
"prepare": "pnpm run build",
|
|
58
|
-
"prepublishOnly": "pnpm run build && pnpm run check:pack-version && pnpm run check:pack-contents",
|
|
58
|
+
"prepare": "pnpm run build:clean",
|
|
59
|
+
"prepublishOnly": "pnpm run build:clean && pnpm run check:pack-version && pnpm run check:pack-contents",
|
|
59
60
|
"postinstall": "node scripts/postinstall.js",
|
|
60
61
|
"check:docs": "node scripts/docs-check.mjs",
|
|
61
62
|
"check:pack-version": "node scripts/pack-version-check.mjs",
|
|
@@ -25,7 +25,8 @@ Pick a level for this change before writing the rest of this document:
|
|
|
25
25
|
|
|
26
26
|
`## Design Level` MUST be the first `## ` heading in the file.
|
|
27
27
|
|
|
28
|
-
See the `design` artifact instruction (run:
|
|
28
|
+
See the `design` artifact instruction (run:
|
|
29
|
+
`synergyspec-selfevolving instructions design --change <name> --json`) and
|
|
29
30
|
`docs/concepts.md` for the full heuristic. If unsure between two levels, pick the
|
|
30
31
|
higher one — over-classification is harmless, under-classification is not.
|
|
31
32
|
-->
|