synergyspec-selfevolving 1.4.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -18
- package/dist/commands/learn.d.ts +12 -1
- package/dist/commands/learn.js +151 -11
- package/dist/commands/self-evolution-episode.d.ts +177 -0
- package/dist/commands/self-evolution-episode.js +423 -0
- package/dist/commands/self-evolution.d.ts +12 -190
- package/dist/commands/self-evolution.js +114 -866
- package/dist/core/archive.d.ts +0 -1
- package/dist/core/archive.js +0 -58
- package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
- package/dist/core/artifact-graph/instruction-loader.js +3 -31
- package/dist/core/fitness/loss.d.ts +5 -5
- package/dist/core/fitness/loss.js +4 -4
- package/dist/core/project-config.d.ts +2 -0
- package/dist/core/project-config.js +28 -0
- package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
- package/dist/core/self-evolution/candidate-fitness.js +31 -5
- package/dist/core/self-evolution/candidates.d.ts +0 -9
- package/dist/core/self-evolution/critic-agent.d.ts +150 -0
- package/dist/core/self-evolution/critic-agent.js +487 -0
- package/dist/core/self-evolution/edits-contract.d.ts +53 -0
- package/dist/core/self-evolution/edits-contract.js +89 -0
- package/dist/core/self-evolution/episode-orchestrator.d.ts +197 -0
- package/dist/core/self-evolution/episode-orchestrator.js +534 -0
- package/dist/core/self-evolution/episode-store.d.ts +266 -0
- package/dist/core/self-evolution/episode-store.js +573 -0
- package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
- package/dist/core/self-evolution/evolution-switches.js +5 -10
- package/dist/core/self-evolution/evolving-agent.d.ts +162 -0
- package/dist/core/self-evolution/evolving-agent.js +449 -0
- package/dist/core/self-evolution/host-harness.d.ts +1 -2
- package/dist/core/self-evolution/host-harness.js +1 -2
- package/dist/core/self-evolution/index.d.ts +9 -6
- package/dist/core/self-evolution/index.js +18 -6
- package/dist/core/self-evolution/line-diff.d.ts +60 -0
- package/dist/core/self-evolution/line-diff.js +130 -0
- package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
- package/dist/core/self-evolution/policy/fs-safe.js +89 -0
- package/dist/core/self-evolution/policy/index.d.ts +13 -0
- package/dist/core/self-evolution/policy/index.js +13 -0
- package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
- package/dist/core/self-evolution/policy/policy-store.js +774 -0
- package/dist/core/self-evolution/policy/reject-buffer.d.ts +48 -0
- package/dist/core/self-evolution/policy/reject-buffer.js +168 -0
- package/dist/core/self-evolution/promote.d.ts +1 -1
- package/dist/core/self-evolution/promote.js +6 -33
- package/dist/core/self-evolution/promotion.js +1 -2
- package/dist/core/self-evolution/reward-agent.d.ts +234 -0
- package/dist/core/self-evolution/reward-agent.js +564 -0
- package/dist/core/self-evolution/scope-gate.d.ts +66 -0
- package/dist/core/self-evolution/scope-gate.js +107 -0
- package/dist/core/self-evolution/success-channel.js +2 -2
- package/dist/core/self-evolution/tool-evolution.js +2 -13
- package/dist/core/self-evolution/verdict.d.ts +8 -5
- package/dist/core/self-evolution/verdict.js +4 -7
- package/dist/core/templates/workflows/learn.d.ts +3 -2
- package/dist/core/templates/workflows/learn.js +18 -16
- package/dist/core/templates/workflows/self-evolving.d.ts +6 -4
- package/dist/core/templates/workflows/self-evolving.js +62 -172
- package/dist/dashboard/data.d.ts +25 -51
- package/dist/dashboard/data.js +68 -180
- package/dist/dashboard/react-client.js +458 -503
- package/dist/dashboard/react-styles.js +3 -3
- package/dist/dashboard/server.js +23 -17
- package/dist/ui/ascii-patterns.d.ts +7 -15
- package/dist/ui/ascii-patterns.js +123 -54
- package/dist/ui/welcome-screen.d.ts +0 -14
- package/dist/ui/welcome-screen.js +16 -35
- package/package.json +1 -1
package/dist/core/archive.d.ts
CHANGED
package/dist/core/archive.js
CHANGED
|
@@ -5,8 +5,6 @@ import { getChangeReadiness, } from './change-readiness.js';
|
|
|
5
5
|
import { Validator } from './validation/validator.js';
|
|
6
6
|
import chalk from 'chalk';
|
|
7
7
|
import { findSpecUpdates, buildUpdatedSpec, writeUpdatedSpec, } from './specs-apply.js';
|
|
8
|
-
import { evaluateTaskDecompositionForChange, isEvolutionPartEnabled, parseEvolutionSwitchOptions, recordTemplateVariantObservation, verifySpecCodeAlignmentForChange, } from './self-evolution/index.js';
|
|
9
|
-
import { isReadOnlyMode } from '../runtime/side-effects.js';
|
|
10
8
|
/**
|
|
11
9
|
* Recursively copy a directory. Used when fs.rename fails (e.g. EPERM on Windows).
|
|
12
10
|
*/
|
|
@@ -248,7 +246,6 @@ export class ArchiveCommand {
|
|
|
248
246
|
}
|
|
249
247
|
// Create archive directory if needed
|
|
250
248
|
await fs.mkdir(archiveDir, { recursive: true });
|
|
251
|
-
await this.recordSelfEvolutionObservations(targetPath, changeName, parseEvolutionSwitchOptions(options));
|
|
252
249
|
// Move change to archive (uses copy+remove on EPERM/EXDEV, e.g. Windows)
|
|
253
250
|
await moveDirectory(changeDir, archivePath);
|
|
254
251
|
console.log(`Change '${changeName}' archived as '${archiveName}'.`);
|
|
@@ -338,60 +335,5 @@ export class ArchiveCommand {
|
|
|
338
335
|
console.log(chalk.yellow('Archive cancelled. Complete the change or rerun with --force-incomplete.'));
|
|
339
336
|
return false;
|
|
340
337
|
}
|
|
341
|
-
async recordSelfEvolutionObservations(projectRoot, changeName, evolutionSwitches) {
|
|
342
|
-
if (isReadOnlyMode()) {
|
|
343
|
-
return;
|
|
344
|
-
}
|
|
345
|
-
if (!isEvolutionPartEnabled(evolutionSwitches, 'template-variants')) {
|
|
346
|
-
return;
|
|
347
|
-
}
|
|
348
|
-
try {
|
|
349
|
-
const taskQuality = isEvolutionPartEnabled(evolutionSwitches, 'task-decomposition')
|
|
350
|
-
? evaluateTaskDecompositionForChange({ projectRoot, changeName })
|
|
351
|
-
: null;
|
|
352
|
-
const alignment = isEvolutionPartEnabled(evolutionSwitches, 'alignment-verifier')
|
|
353
|
-
? verifySpecCodeAlignmentForChange({ projectRoot, changeName })
|
|
354
|
-
: null;
|
|
355
|
-
const artifacts = ['proposal', 'usecases', 'specs', 'design', 'tasks'];
|
|
356
|
-
let recorded = 0;
|
|
357
|
-
for (const artifactId of artifacts) {
|
|
358
|
-
if (recordTemplateVariantObservation({
|
|
359
|
-
projectRoot,
|
|
360
|
-
schemaName: 'spec-driven',
|
|
361
|
-
artifactId,
|
|
362
|
-
changeName,
|
|
363
|
-
observation: {
|
|
364
|
-
taskCompletionRatio: taskQuality?.metrics.completionRatio,
|
|
365
|
-
taskQualityScore: taskQuality?.score,
|
|
366
|
-
alignmentScore: alignment?.score,
|
|
367
|
-
reworkCount: countReworkSignals((taskQuality?.findings.length ?? 0) + (alignment?.findings.length ?? 0)),
|
|
368
|
-
notes: [
|
|
369
|
-
'archive observation',
|
|
370
|
-
taskQuality ? `task=${taskQuality.score.toFixed(2)}` : 'task=disabled',
|
|
371
|
-
alignment ? `alignment=${alignment.score.toFixed(2)}` : 'alignment=disabled',
|
|
372
|
-
].join(': '),
|
|
373
|
-
},
|
|
374
|
-
})) {
|
|
375
|
-
recorded++;
|
|
376
|
-
}
|
|
377
|
-
}
|
|
378
|
-
if (recorded > 0) {
|
|
379
|
-
console.log(`Self-evolution observations recorded for ${recorded} template variant(s).`);
|
|
380
|
-
}
|
|
381
|
-
}
|
|
382
|
-
catch (error) {
|
|
383
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
384
|
-
console.log(chalk.yellow(`Warning: self-evolution observation skipped: ${message}`));
|
|
385
|
-
}
|
|
386
|
-
}
|
|
387
|
-
}
|
|
388
|
-
function countReworkSignals(findingCount) {
|
|
389
|
-
if (findingCount <= 0)
|
|
390
|
-
return 0;
|
|
391
|
-
if (findingCount <= 2)
|
|
392
|
-
return 1;
|
|
393
|
-
if (findingCount <= 5)
|
|
394
|
-
return 2;
|
|
395
|
-
return 3;
|
|
396
338
|
}
|
|
397
339
|
//# sourceMappingURL=archive.js.map
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { ArtifactGraph } from './graph.js';
|
|
2
2
|
import type { CompletedSet } from './types.js';
|
|
3
|
-
import { type EvolutionSwitchInput, type EvolutionSwitches
|
|
3
|
+
import { type EvolutionSwitchInput, type EvolutionSwitches } from '../self-evolution/index.js';
|
|
4
4
|
/**
|
|
5
5
|
* Error thrown when loading a template fails.
|
|
6
6
|
*/
|
|
@@ -49,9 +49,7 @@ export interface ArtifactInstructions {
|
|
|
49
49
|
rules: string[] | undefined;
|
|
50
50
|
/** Template content (structure to follow - this IS the output format) */
|
|
51
51
|
template: string;
|
|
52
|
-
/**
|
|
53
|
-
templateVariant?: TemplateVariantSelection;
|
|
54
|
-
/** Self-evolution context assembled from template selection, archive memory, and quality signals */
|
|
52
|
+
/** Self-evolution context assembled from archive memory and quality signals */
|
|
55
53
|
selfEvolutionContext?: string;
|
|
56
54
|
/** Per-run evolution switches that controlled this instruction payload */
|
|
57
55
|
evolutionSwitches?: EvolutionSwitches;
|
|
@@ -7,7 +7,7 @@ import { resolveSchemaForChange } from '../../utils/change-metadata.js';
|
|
|
7
7
|
import { readProjectConfig, validateConfigRules, validateConfigSelfEvolutionTargets, } from '../project-config.js';
|
|
8
8
|
import { CANONICAL_TARGETS } from '../self-evolution/canonical-targets.js';
|
|
9
9
|
import { ensureDesignConstitution } from '../design-constitution.js';
|
|
10
|
-
import { findSimilarArchiveExperiences, isEvolutionPartEnabled,
|
|
10
|
+
import { findSimilarArchiveExperiences, isEvolutionPartEnabled, renderArchiveExperienceBlock, renderTaskStrategyContext, resolveEvolutionSwitches, evaluateTaskDecomposition, } from '../self-evolution/index.js';
|
|
11
11
|
// Session-level cache for validation warnings (avoid repeating same warnings)
|
|
12
12
|
const shownWarnings = new Set();
|
|
13
13
|
/**
|
|
@@ -96,27 +96,7 @@ export function generateInstructions(context, artifactId, projectRoot, options =
|
|
|
96
96
|
throw new Error(`Artifact '${artifactId}' not found in schema '${context.schemaName}'`);
|
|
97
97
|
}
|
|
98
98
|
const evolutionSwitches = resolveEvolutionSwitches(options.evolution);
|
|
99
|
-
const
|
|
100
|
-
? loadTemplateWithVariant({
|
|
101
|
-
schemaName: context.schemaName,
|
|
102
|
-
artifactId: artifact.id,
|
|
103
|
-
templatePath: artifact.template,
|
|
104
|
-
projectRoot: context.projectRoot,
|
|
105
|
-
loadBuiltIn: loadTemplate,
|
|
106
|
-
})
|
|
107
|
-
: {
|
|
108
|
-
content: loadTemplate(context.schemaName, artifact.template, context.projectRoot),
|
|
109
|
-
selection: {
|
|
110
|
-
id: 'built-in',
|
|
111
|
-
schema: context.schemaName,
|
|
112
|
-
artifact: artifact.id,
|
|
113
|
-
templatePath: artifact.template,
|
|
114
|
-
score: 0.5,
|
|
115
|
-
observationCount: 0,
|
|
116
|
-
source: 'built-in',
|
|
117
|
-
reason: 'template evolution disabled for this run',
|
|
118
|
-
},
|
|
119
|
-
};
|
|
99
|
+
const templateContent = loadTemplate(context.schemaName, artifact.template, context.projectRoot);
|
|
120
100
|
const dependencies = getDependencyInfo(artifact, context.graph, context.completed);
|
|
121
101
|
const unlocks = getUnlockedArtifacts(context.graph, artifactId);
|
|
122
102
|
// Use projectRoot from context if not explicitly provided
|
|
@@ -167,7 +147,6 @@ export function generateInstructions(context, artifactId, projectRoot, options =
|
|
|
167
147
|
const selfEvolutionContext = buildSelfEvolutionContext({
|
|
168
148
|
context,
|
|
169
149
|
artifactId,
|
|
170
|
-
templateVariant: templateWithVariant.selection,
|
|
171
150
|
configContext,
|
|
172
151
|
evolutionSwitches,
|
|
173
152
|
});
|
|
@@ -181,8 +160,7 @@ export function generateInstructions(context, artifactId, projectRoot, options =
|
|
|
181
160
|
instruction: artifact.instruction,
|
|
182
161
|
context: configContext,
|
|
183
162
|
rules: configRules,
|
|
184
|
-
template:
|
|
185
|
-
templateVariant: templateWithVariant.selection,
|
|
163
|
+
template: templateContent,
|
|
186
164
|
selfEvolutionContext,
|
|
187
165
|
evolutionSwitches,
|
|
188
166
|
dependencies,
|
|
@@ -191,12 +169,6 @@ export function generateInstructions(context, artifactId, projectRoot, options =
|
|
|
191
169
|
}
|
|
192
170
|
function buildSelfEvolutionContext(args) {
|
|
193
171
|
const blocks = [];
|
|
194
|
-
if (isEvolutionPartEnabled(args.evolutionSwitches, 'template-variants') && args.templateVariant) {
|
|
195
|
-
const variantContext = renderTemplateVariantContext(args.templateVariant);
|
|
196
|
-
if (variantContext) {
|
|
197
|
-
blocks.push(variantContext);
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
172
|
if (isEvolutionPartEnabled(args.evolutionSwitches, 'archive-memory')) {
|
|
201
173
|
const query = [
|
|
202
174
|
args.context.changeName,
|
|
@@ -6,10 +6,10 @@
|
|
|
6
6
|
* healthPenalty = normalized SlopCodeBench code-health penalty
|
|
7
7
|
* (structural_erosion ⊕ verbosity)
|
|
8
8
|
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
9
|
+
* This loss is the objective evidence the 奖励智能体 (REWARD AGENT) anchors
|
|
10
|
+
* on for an episode: functional correctness is ALSO a hard GATE
|
|
11
|
+
* (a change whose code fails its tests cannot be promoted), while this module
|
|
12
|
+
* only computes the continuous loss scalar used to compare passers.
|
|
13
13
|
*/
|
|
14
14
|
export interface PerChangeLoss {
|
|
15
15
|
/** 1 − pass_rate, in [0,1]. */
|
|
@@ -45,7 +45,7 @@ export interface ComputeLossInput {
|
|
|
45
45
|
* the loss is byte-identical to the functional⊕health baseline regardless of
|
|
46
46
|
* `verified` — the trajectory signal is recorded on the FitnessSample for
|
|
47
47
|
* auditing without yet moving selection. Raise it to let unverified
|
|
48
|
-
* candidates be down-weighted (never hard-disqualified)
|
|
48
|
+
* candidates be down-weighted (never hard-disqualified) when comparing them.
|
|
49
49
|
*/
|
|
50
50
|
unverifiedWeight?: number;
|
|
51
51
|
}
|
|
@@ -6,10 +6,10 @@
|
|
|
6
6
|
* healthPenalty = normalized SlopCodeBench code-health penalty
|
|
7
7
|
* (structural_erosion ⊕ verbosity)
|
|
8
8
|
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
9
|
+
* This loss is the objective evidence the 奖励智能体 (REWARD AGENT) anchors
|
|
10
|
+
* on for an episode: functional correctness is ALSO a hard GATE
|
|
11
|
+
* (a change whose code fails its tests cannot be promoted), while this module
|
|
12
|
+
* only computes the continuous loss scalar used to compare passers.
|
|
13
13
|
*/
|
|
14
14
|
function clamp01(v) {
|
|
15
15
|
if (Number.isNaN(v))
|
|
@@ -25,6 +25,8 @@ export declare const ProjectConfigSchema: z.ZodObject<{
|
|
|
25
25
|
evolve: z.ZodBoolean;
|
|
26
26
|
}, z.core.$strip>>>;
|
|
27
27
|
focus: z.ZodOptional<z.ZodBoolean>;
|
|
28
|
+
advantageRollbackThreshold: z.ZodOptional<z.ZodNumber>;
|
|
29
|
+
editBudget: z.ZodOptional<z.ZodNumber>;
|
|
28
30
|
}, z.core.$strip>>;
|
|
29
31
|
health: z.ZodOptional<z.ZodObject<{
|
|
30
32
|
source: z.ZodDefault<z.ZodEnum<{
|
|
@@ -49,6 +49,17 @@ export const ProjectConfigSchema = z.object({
|
|
|
49
49
|
// hint instead of silently dropping them. Set `focus: false` (or pass
|
|
50
50
|
// `learn --no-focus`) to drop frozen-kind signals silently, as before.
|
|
51
51
|
focus: z.boolean().optional(),
|
|
52
|
+
// Loop v2 (self-evolution as in-context RL): the advantage = reward(主臂) −
|
|
53
|
+
// reward(基线臂) threshold below which the episode-orchestrator rolls the
|
|
54
|
+
// 策略 POLICY back to the prior version before the 演进智能体 EVOLVING AGENT
|
|
55
|
+
// runs. Default 0 — a non-positive advantage (the new policy did not beat
|
|
56
|
+
// the baseline) triggers a rollback. Optional/omitted ⇒ the orchestrator's
|
|
57
|
+
// built-in default applies.
|
|
58
|
+
advantageRollbackThreshold: z.number().optional(),
|
|
59
|
+
// Loop v2: the edit budget L (max changed lines, added + removed) the
|
|
60
|
+
// 演进智能体 EVOLVING AGENT's ONE bounded edit may total. Default 40.
|
|
61
|
+
// Optional/omitted ⇒ the agent's DEFAULT_EVOLVING_AGENT_EDIT_BUDGET applies.
|
|
62
|
+
editBudget: z.number().optional(),
|
|
52
63
|
})
|
|
53
64
|
.optional()
|
|
54
65
|
.describe('Per-canonical-target self-evolution toggles'),
|
|
@@ -218,6 +229,23 @@ export function readProjectConfig(projectRoot) {
|
|
|
218
229
|
console.warn(`Invalid 'selfEvolution.focus' in config (must be boolean), ignoring`);
|
|
219
230
|
}
|
|
220
231
|
}
|
|
232
|
+
// Loop v2 numeric knobs. Resilient: a non-number is dropped with a
|
|
233
|
+
// warning (the orchestrator/agent default then applies); omitted ⇒
|
|
234
|
+
// undefined (byte-identical to configs that never set them).
|
|
235
|
+
const advThresholdResult = z.number().safeParse(rawSE.advantageRollbackThreshold);
|
|
236
|
+
if (advThresholdResult.success) {
|
|
237
|
+
selfEvolution.advantageRollbackThreshold = advThresholdResult.data;
|
|
238
|
+
}
|
|
239
|
+
else if (rawSE.advantageRollbackThreshold !== undefined) {
|
|
240
|
+
console.warn(`Invalid 'selfEvolution.advantageRollbackThreshold' in config (must be a number), ignoring`);
|
|
241
|
+
}
|
|
242
|
+
const editBudgetResult = z.number().safeParse(rawSE.editBudget);
|
|
243
|
+
if (editBudgetResult.success) {
|
|
244
|
+
selfEvolution.editBudget = editBudgetResult.data;
|
|
245
|
+
}
|
|
246
|
+
else if (rawSE.editBudget !== undefined) {
|
|
247
|
+
console.warn(`Invalid 'selfEvolution.editBudget' in config (must be a number), ignoring`);
|
|
248
|
+
}
|
|
221
249
|
config.selfEvolution = selfEvolution;
|
|
222
250
|
}
|
|
223
251
|
else {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { type CandidateRepoLayout } from './candidates.js';
|
|
2
2
|
export declare const FITNESS_RECORD_FILE = "fitness-record.jsonl";
|
|
3
3
|
export interface CandidateFitnessRecord {
|
|
4
4
|
/** ISO-8601 UTC timestamp the sample was recorded. */
|
|
@@ -44,4 +44,26 @@ export declare function isValidRecord(value: unknown): value is CandidateFitness
|
|
|
44
44
|
* absent. Malformed/blank lines are skipped (forward-compatible).
|
|
45
45
|
*/
|
|
46
46
|
export declare function readCandidateFitness(layout: CandidateRepoLayout, candidateId: string): Promise<AccumulatedFitness>;
|
|
47
|
+
/**
|
|
48
|
+
* The accumulated baseline reading for a target's de-facto canonical variant:
|
|
49
|
+
* the promoted candidate whose fitness sidecar supplies the baseline loss.
|
|
50
|
+
*/
|
|
51
|
+
export interface PromotedBaseline {
|
|
52
|
+
candidateId: string;
|
|
53
|
+
meanLoss: number;
|
|
54
|
+
/** createdAt of the baseline candidate (for reporting). */
|
|
55
|
+
recordedAt: string;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* The de-facto canonical baseline loss for a target: the accumulated mean loss
|
|
59
|
+
* of the MOST RECENTLY promoted candidate that has measured fitness. Returns
|
|
60
|
+
* null when no promoted candidate for the target has a non-null meanLoss (⇒
|
|
61
|
+
* downstream gates cannot use a baseline). Rolled-back candidates are excluded
|
|
62
|
+
* automatically (they are status 'rolled-back', not 'promoted').
|
|
63
|
+
*
|
|
64
|
+
* `listCandidates` already returns promoted candidates sorted by `createdAt`
|
|
65
|
+
* DESCENDING (ties by id), so the first proven candidate in iteration order is
|
|
66
|
+
* the most recently promoted one with fitness.
|
|
67
|
+
*/
|
|
68
|
+
export declare function readPromotedBaselineLoss(layout: CandidateRepoLayout, targetId: string): Promise<PromotedBaseline | null>;
|
|
47
69
|
//# sourceMappingURL=candidate-fitness.d.ts.map
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Per-candidate accumulated fitness
|
|
2
|
+
* Per-candidate accumulated fitness for the manual promote / evolve-from-edits
|
|
3
|
+
* channel (a candidate's accumulated outcome history).
|
|
3
4
|
*
|
|
4
5
|
* Each real change a candidate template-variant was active for appends one line
|
|
5
6
|
* to an append-only `fitness-record.jsonl` sidecar in the candidate's directory
|
|
6
|
-
* (`<baseDir>/<candidateId>/fitness-record.jsonl`). The
|
|
7
|
-
* the accumulated record to
|
|
8
|
-
*
|
|
9
|
-
* todo/learn-self-evolution-migration-plan.md.
|
|
7
|
+
* (`<baseDir>/<candidateId>/fitness-record.jsonl`). The manual promote /
|
|
8
|
+
* evolve-from-edits path reads the accumulated record to feed a data-driven
|
|
9
|
+
* promotion verdict. See todo/learn-self-evolution-migration-plan.md.
|
|
10
10
|
*
|
|
11
11
|
* Sidecar (not a candidate.json field) on purpose: candidate.json is the stable
|
|
12
12
|
* proposal-time header; fitness is retrospective outcome data that grows over
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
*/
|
|
16
16
|
import { promises as fs } from 'node:fs';
|
|
17
17
|
import * as path from 'node:path';
|
|
18
|
+
import { listCandidates } from './candidates.js';
|
|
18
19
|
export const FITNESS_RECORD_FILE = 'fitness-record.jsonl';
|
|
19
20
|
function candidateDir(layout, candidateId) {
|
|
20
21
|
return path.join(layout.baseDir, candidateId);
|
|
@@ -104,4 +105,29 @@ export async function readCandidateFitness(layout, candidateId) {
|
|
|
104
105
|
recentTrend: trendOf(records.map((r) => r.loss)),
|
|
105
106
|
};
|
|
106
107
|
}
|
|
108
|
+
/**
|
|
109
|
+
* The de-facto canonical baseline loss for a target: the accumulated mean loss
|
|
110
|
+
* of the MOST RECENTLY promoted candidate that has measured fitness. Returns
|
|
111
|
+
* null when no promoted candidate for the target has a non-null meanLoss (⇒
|
|
112
|
+
* downstream gates cannot use a baseline). Rolled-back candidates are excluded
|
|
113
|
+
* automatically (they are status 'rolled-back', not 'promoted').
|
|
114
|
+
*
|
|
115
|
+
* `listCandidates` already returns promoted candidates sorted by `createdAt`
|
|
116
|
+
* DESCENDING (ties by id), so the first proven candidate in iteration order is
|
|
117
|
+
* the most recently promoted one with fitness.
|
|
118
|
+
*/
|
|
119
|
+
export async function readPromotedBaselineLoss(layout, targetId) {
|
|
120
|
+
const promoted = await listCandidates(layout, { status: 'promoted', targetId });
|
|
121
|
+
for (const candidate of promoted) {
|
|
122
|
+
const fitness = await readCandidateFitness(layout, candidate.id);
|
|
123
|
+
if (fitness.meanLoss !== null) {
|
|
124
|
+
return {
|
|
125
|
+
candidateId: candidate.id,
|
|
126
|
+
meanLoss: fitness.meanLoss,
|
|
127
|
+
recordedAt: candidate.createdAt,
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
return null;
|
|
132
|
+
}
|
|
107
133
|
//# sourceMappingURL=candidate-fitness.js.map
|
|
@@ -96,15 +96,6 @@ export interface CanonicalCandidate {
|
|
|
96
96
|
evalReportPath?: string;
|
|
97
97
|
/** Set by promotion once a decision has been recorded. Relative to candidate dir. */
|
|
98
98
|
promotionDecisionPath?: string;
|
|
99
|
-
/**
|
|
100
|
-
* Population-based generation (`propose-canonical --variants N`): the shared id
|
|
101
|
-
* of the sibling-variant cohort drafted from the SAME hint group in one run.
|
|
102
|
-
* Absent for single-candidate (default) proposals. Lets the GA outer loop mark
|
|
103
|
-
* siblings that lost the ranking as `outcompeted`.
|
|
104
|
-
*/
|
|
105
|
-
variantGroup?: string;
|
|
106
|
-
/** The improvement angle this variant was asked to pursue (population-based). */
|
|
107
|
-
variantAngle?: string;
|
|
108
99
|
}
|
|
109
100
|
/**
|
|
110
101
|
* A single full-file-replacement edit produced by the proposer agent: the
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CRITIC AGENT(基线智能体 baseline agent)runner — loop v2 (self-evolution as
|
|
3
|
+
* in-context RL).
|
|
4
|
+
*
|
|
5
|
+
* The CRITIC AGENT is an AGENT with the SAME input/output as the 主智能体 MAIN
|
|
6
|
+
* AGENT (frozen actor; the user's host agent running the current 策略 policy
|
|
7
|
+
* vN+1). It reruns LAST episode's 策略 policy vN on the SAME change in an
|
|
8
|
+
* ISOLATED worktree, so the 奖励智能体 REWARD AGENT can later 算分 calculate
|
|
9
|
+
* reward(主臂)&reward(基线臂) and advantage = reward(主臂) − reward(基线臂).
|
|
10
|
+
* Only its baseline trajectory survives — 产物即弃 (worktree artifacts
|
|
11
|
+
* discarded): the worktree is torn down in `finally`, and the single durable
|
|
12
|
+
* output is the `baseline-arm/` capture in the episode store.
|
|
13
|
+
*
|
|
14
|
+
* This module orchestrates ONE baseline arm:
|
|
15
|
+
* 1. create an isolated worktree OUTSIDE the repo (git worktree, else a
|
|
16
|
+
* recursive file copy fallback),
|
|
17
|
+
* 2. make it runnable (node_modules junction/symlink + the untracked surfaces
|
|
18
|
+
* the rerun reads),
|
|
19
|
+
* 3. INSTALL 策略 policy vN into the worktree from the byte-for-byte version
|
|
20
|
+
* snapshot, so the baseline arm reruns the PRIOR policy and not the live
|
|
21
|
+
* templates,
|
|
22
|
+
* 4. rerun headlessly via the host-aware {@link runHeadlessAgent} with
|
|
23
|
+
* cwd = worktree, measurement only, never editing canonical files,
|
|
24
|
+
* 5. persist the baseline arm (stdout always; the claude session transcript +
|
|
25
|
+
* action skeleton when discoverable; an `objective.json` shaped IDENTICALLY
|
|
26
|
+
* to the main arm's), and
|
|
27
|
+
* 6. ALWAYS tear the worktree down.
|
|
28
|
+
*
|
|
29
|
+
* Honesty contract: a pass rate is parse-or-throw — when the rerun's stdout
|
|
30
|
+
* carries no parseable test summary the objective records `passRate: null`
|
|
31
|
+
* rather than fabricating one. The agent is RUN, never asked to edit; the prompt
|
|
32
|
+
* strips every arm/candidate word.
|
|
33
|
+
*/
|
|
34
|
+
import { spawn as nodeSpawn } from 'node:child_process';
|
|
35
|
+
/** Error thrown when the worktree could not be created (git AND copy fallback failed). */
|
|
36
|
+
export declare class CriticWorktreeError extends Error {
|
|
37
|
+
constructor(message: string);
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* The baseline arm's `objective.json` shape — kept IDENTICAL to the main arm's
|
|
41
|
+
* (CS4 writes the main arm with this same shape). It mirrors the slice of the
|
|
42
|
+
* 主智能体 MAIN AGENT's `learn` fitness sample the 奖励智能体 REWARD AGENT
|
|
43
|
+
* compares across arms:
|
|
44
|
+
* - `passRate` — functional pass rate in [0,1], or `null` when the rerun
|
|
45
|
+
* produced no parseable test summary (NEVER fabricated).
|
|
46
|
+
* - `testsTotal` / — raw counts behind the pass rate, present only when a
|
|
47
|
+
* `testsFailed` summary parsed.
|
|
48
|
+
* - `healthPenalty` — normalized code-health penalty in [0,1] measured
|
|
49
|
+
* against the WORKTREE, or `null` when no signal.
|
|
50
|
+
* - `loss` — blended functional⊕health per-change loss in [0,1], or
|
|
51
|
+
* `null` when there was no functional signal (no pass
|
|
52
|
+
* rate ⇒ "no signal", not loss = 1).
|
|
53
|
+
* - `verified` — whether a real test-runner invocation was OBSERVED in
|
|
54
|
+
* the rerun's trajectory (the observed-verified gate's
|
|
55
|
+
* signal); `false` when only stdout was captured.
|
|
56
|
+
* - `observedStatus` — 'success' | 'failure' | null, derived from the observed
|
|
57
|
+
* run (null when no runner was observed).
|
|
58
|
+
* - `measuredAt` — ISO-8601 UTC timestamp the arm was scored.
|
|
59
|
+
*/
|
|
60
|
+
export interface ArmObjective {
|
|
61
|
+
passRate: number | null;
|
|
62
|
+
testsTotal?: number;
|
|
63
|
+
testsFailed?: number;
|
|
64
|
+
healthPenalty: number | null;
|
|
65
|
+
loss: number | null;
|
|
66
|
+
verified: boolean;
|
|
67
|
+
observedStatus: 'success' | 'failure' | null;
|
|
68
|
+
measuredAt: string;
|
|
69
|
+
}
|
|
70
|
+
export interface ShouldRunCriticAgentOptions {
|
|
71
|
+
repoRoot: string;
|
|
72
|
+
targetId: string;
|
|
73
|
+
}
|
|
74
|
+
export interface ShouldRunCriticAgentResult {
|
|
75
|
+
/** True when the baseline arm SHOULD run. */
|
|
76
|
+
run: boolean;
|
|
77
|
+
/** Human-readable explanation (logged; surfaced on the skip path). */
|
|
78
|
+
reason: string;
|
|
79
|
+
/**
|
|
80
|
+
* The policy version the baseline arm would rerun (LAST episode's vN), or
|
|
81
|
+
* `null` on every skip path.
|
|
82
|
+
*/
|
|
83
|
+
baselineVersion: number | null;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Decide whether the CRITIC AGENT(基线智能体 baseline agent)should run for the
|
|
87
|
+
* NEXT episode.
|
|
88
|
+
*
|
|
89
|
+
* Skip (`run: false`) when:
|
|
90
|
+
* - the 单一血统 single lineage has < 2 versions — there is no PRIOR policy to
|
|
91
|
+
* rerun (v0 is the only point; the 主智能体 MAIN AGENT IS v0), OR
|
|
92
|
+
* - the head 版本账本 ledger entry's action is 'refused' — the 演进智能体
|
|
93
|
+
* EVOLVING AGENT refused last episode, so vN+1 ≡ vN and rerunning the
|
|
94
|
+
* baseline would compare a policy against ITSELF (no advantage to measure).
|
|
95
|
+
*
|
|
96
|
+
* Otherwise run, rerunning the head version (vN, the policy the LAST episode
|
|
97
|
+
* settled on, which the current 主智能体 MAIN AGENT also runs as vN+1 unless an
|
|
98
|
+
* evolve happened — the comparison the 奖励智能体 REWARD AGENT scores).
|
|
99
|
+
*
|
|
100
|
+
* Pure read of the ledger via {@link readPolicyLedger}/{@link currentPolicyVersion};
|
|
101
|
+
* this function NEVER writes episode state. The skip path's
|
|
102
|
+
* {@link advanceEpisodeStage} to 'baseline-skipped' is the CALLER's job.
|
|
103
|
+
*/
|
|
104
|
+
export declare function shouldRunCriticAgent(opts: ShouldRunCriticAgentOptions): Promise<ShouldRunCriticAgentResult>;
|
|
105
|
+
/**
|
|
106
|
+
* Assemble the CRITIC AGENT(基线智能体 baseline agent)rerun prompt. STRIPPED
|
|
107
|
+
* of every arm/candidate word: the agent is simply told to re-run change
|
|
108
|
+
* <changeName> end-to-end (apply → gen-test → run-test) under the templates
|
|
109
|
+
* already installed in its working directory, measurement only, never editing
|
|
110
|
+
* canonical files, and to print the runner summary line verbatim as its final
|
|
111
|
+
* line.
|
|
112
|
+
*/
|
|
113
|
+
export declare function assembleCriticPrompt(changeName: string): string;
|
|
114
|
+
export interface RunCriticAgentOptions {
|
|
115
|
+
repoRoot: string;
|
|
116
|
+
targetId: string;
|
|
117
|
+
changeName: string;
|
|
118
|
+
episodeId: string;
|
|
119
|
+
/** LAST episode's policy version vN, from {@link shouldRunCriticAgent}. */
|
|
120
|
+
baselineVersion: number;
|
|
121
|
+
/** Injectable spawn seam for tests; defaults to node's spawn. */
|
|
122
|
+
spawn?: typeof nodeSpawn;
|
|
123
|
+
/** Hard timeout per agent run (ms). Default 600000 (10 min). */
|
|
124
|
+
timeoutMs?: number;
|
|
125
|
+
/** Override `os.homedir()` for tests (claude transcript discovery). */
|
|
126
|
+
homeDir?: string;
|
|
127
|
+
/** TEST seam: inject the worktree root instead of git/copy, skipping setup teardown of git. */
|
|
128
|
+
now?: Date;
|
|
129
|
+
}
|
|
130
|
+
export interface RunCriticAgentResult {
|
|
131
|
+
/** Absolute path of the `baseline-arm/` dir the capture landed in. */
|
|
132
|
+
armDir: string;
|
|
133
|
+
/** The objective that was persisted. */
|
|
134
|
+
objective: ArmObjective;
|
|
135
|
+
/** True when the claude session transcript was discovered and persisted. */
|
|
136
|
+
transcriptDiscovered: boolean;
|
|
137
|
+
/** The worktree path that was created then discarded (audit only). */
|
|
138
|
+
worktreePath: string;
|
|
139
|
+
/** How the worktree was created. */
|
|
140
|
+
worktreeMode: 'git-worktree' | 'copy-fallback';
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Run the CRITIC AGENT(基线智能体 baseline agent)'s full baseline arm and
|
|
144
|
+
* persist its capture. ALWAYS tears the worktree down (产物即弃). On success it
|
|
145
|
+
* advances the episode to 'baseline-arm-captured' (patch
|
|
146
|
+
* `{policyVersionBaseline}`). The SKIP path is the caller's job (see
|
|
147
|
+
* {@link shouldRunCriticAgent}).
|
|
148
|
+
*/
|
|
149
|
+
export declare function runCriticAgent(opts: RunCriticAgentOptions): Promise<RunCriticAgentResult>;
|
|
150
|
+
//# sourceMappingURL=critic-agent.d.ts.map
|