synergyspec-selfevolving 1.4.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/README.md +31 -18
  2. package/dist/commands/learn.d.ts +12 -1
  3. package/dist/commands/learn.js +158 -11
  4. package/dist/commands/self-evolution-episode.d.ts +177 -0
  5. package/dist/commands/self-evolution-episode.js +431 -0
  6. package/dist/commands/self-evolution.d.ts +12 -190
  7. package/dist/commands/self-evolution.js +114 -866
  8. package/dist/core/archive.d.ts +0 -1
  9. package/dist/core/archive.js +0 -58
  10. package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
  11. package/dist/core/artifact-graph/instruction-loader.js +3 -31
  12. package/dist/core/fitness/loss.d.ts +5 -5
  13. package/dist/core/fitness/loss.js +4 -4
  14. package/dist/core/fitness/test-failures.js +10 -2
  15. package/dist/core/project-config.d.ts +19 -0
  16. package/dist/core/project-config.js +96 -0
  17. package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
  18. package/dist/core/self-evolution/candidate-fitness.js +31 -5
  19. package/dist/core/self-evolution/candidates.d.ts +0 -9
  20. package/dist/core/self-evolution/critic-agent.d.ts +192 -0
  21. package/dist/core/self-evolution/critic-agent.js +568 -0
  22. package/dist/core/self-evolution/edits-contract.d.ts +53 -0
  23. package/dist/core/self-evolution/edits-contract.js +89 -0
  24. package/dist/core/self-evolution/episode-orchestrator.d.ts +234 -0
  25. package/dist/core/self-evolution/episode-orchestrator.js +681 -0
  26. package/dist/core/self-evolution/episode-store.d.ts +266 -0
  27. package/dist/core/self-evolution/episode-store.js +573 -0
  28. package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
  29. package/dist/core/self-evolution/evolution-switches.js +5 -10
  30. package/dist/core/self-evolution/evolving-agent.d.ts +208 -0
  31. package/dist/core/self-evolution/evolving-agent.js +535 -0
  32. package/dist/core/self-evolution/host-harness.d.ts +14 -15
  33. package/dist/core/self-evolution/host-harness.js +48 -23
  34. package/dist/core/self-evolution/index.d.ts +11 -6
  35. package/dist/core/self-evolution/index.js +20 -6
  36. package/dist/core/self-evolution/line-diff.d.ts +60 -0
  37. package/dist/core/self-evolution/line-diff.js +130 -0
  38. package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
  39. package/dist/core/self-evolution/policy/fs-safe.js +89 -0
  40. package/dist/core/self-evolution/policy/index.d.ts +13 -0
  41. package/dist/core/self-evolution/policy/index.js +13 -0
  42. package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
  43. package/dist/core/self-evolution/policy/policy-store.js +774 -0
  44. package/dist/core/self-evolution/policy/prediction-reconcile.d.ts +54 -0
  45. package/dist/core/self-evolution/policy/prediction-reconcile.js +191 -0
  46. package/dist/core/self-evolution/policy/reject-buffer.d.ts +55 -0
  47. package/dist/core/self-evolution/policy/reject-buffer.js +170 -0
  48. package/dist/core/self-evolution/promote.d.ts +1 -1
  49. package/dist/core/self-evolution/promote.js +6 -33
  50. package/dist/core/self-evolution/promotion.js +1 -2
  51. package/dist/core/self-evolution/reward-agent.d.ts +379 -0
  52. package/dist/core/self-evolution/reward-agent.js +940 -0
  53. package/dist/core/self-evolution/reward-aggregator.d.ts +59 -0
  54. package/dist/core/self-evolution/reward-aggregator.js +262 -0
  55. package/dist/core/self-evolution/scope-gate.d.ts +66 -0
  56. package/dist/core/self-evolution/scope-gate.js +107 -0
  57. package/dist/core/self-evolution/success-channel.js +2 -2
  58. package/dist/core/self-evolution/tamper-check.d.ts +24 -0
  59. package/dist/core/self-evolution/tamper-check.js +236 -0
  60. package/dist/core/self-evolution/tool-evolution.js +2 -13
  61. package/dist/core/self-evolution/verdict.d.ts +8 -5
  62. package/dist/core/self-evolution/verdict.js +4 -7
  63. package/dist/core/templates/workflows/gen-tests.js +1 -1
  64. package/dist/core/templates/workflows/learn.d.ts +3 -2
  65. package/dist/core/templates/workflows/learn.js +21 -18
  66. package/dist/core/templates/workflows/self-evolving.d.ts +6 -4
  67. package/dist/core/templates/workflows/self-evolving.js +62 -172
  68. package/dist/core/trajectory/scrub.d.ts +27 -0
  69. package/dist/core/trajectory/scrub.js +79 -0
  70. package/dist/core/trajectory/skeleton.d.ts +27 -1
  71. package/dist/core/trajectory/skeleton.js +152 -8
  72. package/dist/dashboard/data.d.ts +25 -51
  73. package/dist/dashboard/data.js +68 -180
  74. package/dist/dashboard/react-client.js +458 -503
  75. package/dist/dashboard/react-styles.js +3 -3
  76. package/dist/dashboard/server.js +23 -17
  77. package/dist/ui/ascii-patterns.d.ts +7 -15
  78. package/dist/ui/ascii-patterns.js +123 -54
  79. package/dist/ui/welcome-screen.d.ts +0 -14
  80. package/dist/ui/welcome-screen.js +16 -35
  81. package/package.json +1 -1
  82. package/dist/core/self-evolution/ga-selection.d.ts +0 -94
  83. package/dist/core/self-evolution/ga-selection.js +0 -153
  84. package/dist/core/self-evolution/proposer-agent.d.ts +0 -182
  85. package/dist/core/self-evolution/proposer-agent.js +0 -326
  86. package/dist/core/self-evolution/replay-runner.d.ts +0 -100
  87. package/dist/core/self-evolution/replay-runner.js +0 -170
  88. package/dist/core/self-evolution/replay.d.ts +0 -45
  89. package/dist/core/self-evolution/replay.js +0 -56
  90. package/dist/core/self-evolution/template-variants.d.ts +0 -62
  91. package/dist/core/self-evolution/template-variants.js +0 -171
  92. package/dist/core/self-evolution/trajectory.d.ts +0 -65
  93. package/dist/core/self-evolution/trajectory.js +0 -185
@@ -1,100 +0,0 @@
1
- /**
2
- * Live replay runner for the GA outer loop.
3
- *
4
- * This is the production wiring the replay comparison (`replay.ts`) reserved as
5
- * the injected `runChange` seam: it re-runs a real change under an arm's
6
- * templates by spawning the coding agent (the same spawn+parse contract as
7
- * `proposer-agent.ts`), then parses the runner summary into a pass rate and
8
- * measures the produced code's health. It owns the *side effects* the pure
9
- * comparison module deliberately avoids:
10
- *
11
- * - spawning the agent to actually run apply → gen-test → run-test, and
12
- * - appending each candidate's outcome to its `fitness-record.jsonl` sidecar
13
- * via {@link appendCandidateFitness} — the step that, until now, no live
14
- * path performed (so every candidate was permanently "unproven").
15
- *
16
- * Honesty contract: when the agent is unavailable or its output is not a
17
- * parseable test summary, `runChange` THROWS rather than fabricating a pass
18
- * rate. The orchestrator treats a throwing candidate as "could not score"
19
- * (skipped, logged) — never as a silent zero/one. The agent spawn is injectable
20
- * so unit tests drive the whole loop with a fake `runChange`.
21
- *
22
- * Oracle freeze: the replay agent only RUNS a change and reports test results;
23
- * it is never asked to edit canonical files. The gen-test/run-test oracle is
24
- * not modified here (or anywhere).
25
- */
26
- import { spawn as nodeSpawn } from 'node:child_process';
27
- import { type MetricSource } from '../fitness/index.js';
28
- import { type ReplayArm, type RunChangeFn, type ReplayResult } from './replay.js';
29
- import type { CandidateRepoLayout } from './candidates.js';
30
- /**
31
- * Agent binary used to re-run a change. Mirrors {@link canonicalProposerBinary}.
32
- * Back-compat shim: now only relevant on the 'claude' harness branch of
33
- * {@link buildHeadlessCommand} (which derives the same value internally); kept
34
- * exported for callers that still pass an explicit `binary`.
35
- */
36
- export declare function replayAgentBinary(): string;
37
- export declare class ReplayRunError extends Error {
38
- constructor(message: string);
39
- }
40
- export interface MakeReplayRunChangeOptions {
41
- /** Project root whose produced code is measured for the health term. */
42
- repoRoot: string;
43
- /** Injected for tests; defaults to node's spawn. */
44
- spawn?: typeof nodeSpawn;
45
- /** Override the agent binary; defaults to env or 'claude'. */
46
- binary?: string;
47
- /**
48
- * Metric source for the health half of each replay outcome. Defaults to the
49
- * {@link StubMetricSource} (no signal) so a replay with no configured source
50
- * scores on functional pass rate alone — identical to the loss baseline.
51
- */
52
- healthSource?: MetricSource;
53
- /** Hard timeout per agent run (ms). Default 300000 (5 min). */
54
- timeoutMs?: number;
55
- }
56
- /** Assemble the read-only "re-run this change and report results" agent prompt. */
57
- export declare function assembleReplayPrompt(changeId: string, arm: ReplayArm): string;
58
- /**
59
- * Build a live {@link RunChangeFn} that spawns the agent to re-run a change
60
- * under an arm's templates and reports its {@link ReplayRunOutcome}. Throws
61
- * {@link ReplayRunError} when the agent is unavailable or its output has no
62
- * parseable test summary (never fabricates a pass rate).
63
- */
64
- export declare function makeReplayRunChange(opts: MakeReplayRunChangeOptions): RunChangeFn;
65
- export interface ScoreCandidatesByReplayOptions {
66
- layout: CandidateRepoLayout;
67
- /** Candidate ids competing for one canonical target. */
68
- candidateIds: string[];
69
- /** The corpus of real change ids to replay through baseline + candidate. */
70
- changeIds: string[];
71
- /** Injected re-runner (see {@link makeReplayRunChange}). */
72
- runChange: RunChangeFn;
73
- /** ISO-8601 timestamp stamped onto every appended fitness record (caller-supplied). */
74
- at: string;
75
- functionalWeight?: number;
76
- healthWeight?: number;
77
- tolerance?: number;
78
- /** Optional progress sink. */
79
- log?: (line: string) => void;
80
- }
81
- export interface CandidateReplayScore {
82
- candidateId: string;
83
- /** Replay comparison vs baseline (null when the candidate could not be scored). */
84
- result: ReplayResult | null;
85
- /** Number of fitness records appended for this candidate. */
86
- recordsAppended: number;
87
- /** Error message when scoring threw (candidate skipped). */
88
- error?: string;
89
- }
90
- /**
91
- * Score each candidate by replaying the corpus through baseline + candidate,
92
- * and APPEND one fitness record per change to the candidate's sidecar — the
93
- * step that makes {@link rankCandidatesForTarget} operate on real data.
94
- *
95
- * A candidate whose `runChange` throws (agent unavailable, unparseable output)
96
- * is skipped with its error captured; the rest still score. Promotion is NOT
97
- * performed here.
98
- */
99
- export declare function scoreCandidatesByReplay(opts: ScoreCandidatesByReplayOptions): Promise<CandidateReplayScore[]>;
100
- //# sourceMappingURL=replay-runner.d.ts.map
@@ -1,170 +0,0 @@
1
- /**
2
- * Live replay runner for the GA outer loop.
3
- *
4
- * This is the production wiring the replay comparison (`replay.ts`) reserved as
5
- * the injected `runChange` seam: it re-runs a real change under an arm's
6
- * templates by spawning the coding agent (the same spawn+parse contract as
7
- * `proposer-agent.ts`), then parses the runner summary into a pass rate and
8
- * measures the produced code's health. It owns the *side effects* the pure
9
- * comparison module deliberately avoids:
10
- *
11
- * - spawning the agent to actually run apply → gen-test → run-test, and
12
- * - appending each candidate's outcome to its `fitness-record.jsonl` sidecar
13
- * via {@link appendCandidateFitness} — the step that, until now, no live
14
- * path performed (so every candidate was permanently "unproven").
15
- *
16
- * Honesty contract: when the agent is unavailable or its output is not a
17
- * parseable test summary, `runChange` THROWS rather than fabricating a pass
18
- * rate. The orchestrator treats a throwing candidate as "could not score"
19
- * (skipped, logged) — never as a silent zero/one. The agent spawn is injectable
20
- * so unit tests drive the whole loop with a fake `runChange`.
21
- *
22
- * Oracle freeze: the replay agent only RUNS a change and reports test results;
23
- * it is never asked to edit canonical files. The gen-test/run-test oracle is
24
- * not modified here (or anywhere).
25
- */
26
- import { spawn as nodeSpawn } from 'node:child_process';
27
- import { parseTestMetrics, computePerChangeLoss, measureHealthPenalty, StubMetricSource, } from '../fitness/index.js';
28
- import { runHeadlessAgent } from './host-harness.js';
29
- import { appendCandidateFitness, readCandidateFitness } from './candidate-fitness.js';
30
- import { replayCompare, } from './replay.js';
31
- /**
32
- * Agent binary used to re-run a change. Mirrors {@link canonicalProposerBinary}.
33
- * Back-compat shim: now only relevant on the 'claude' harness branch of
34
- * {@link buildHeadlessCommand} (which derives the same value internally); kept
35
- * exported for callers that still pass an explicit `binary`.
36
- */
37
- export function replayAgentBinary() {
38
- const env = process.env.SYNERGYSPEC_SELFEVOLVING_CLAUDE_BIN;
39
- return env && env.trim().length > 0 ? env : 'claude';
40
- }
41
- export class ReplayRunError extends Error {
42
- constructor(message) {
43
- super(`replay run failed: ${message}`);
44
- this.name = 'ReplayRunError';
45
- }
46
- }
47
- /** Assemble the read-only "re-run this change and report results" agent prompt. */
48
- export function assembleReplayPrompt(changeId, arm) {
49
- return [
50
- `You are RE-RUNNING an existing SynergySpec change end-to-end to measure its`,
51
- `test outcome under the ${arm.toUpperCase()} artifact templates. This is a`,
52
- `measurement run only — do NOT modify any canonical workflow prompt, artifact`,
53
- `template, or schema, and do NOT edit the frozen gen-test/run-test oracle.`,
54
- ``,
55
- `Change id: ${changeId}`,
56
- `Arm: ${arm} (${arm === 'baseline' ? 'built-in templates' : "this candidate's proposed templates"})`,
57
- ``,
58
- `Run the change's tests (apply → gen-test → run-test) and output the test`,
59
- `runner's SUMMARY LINE verbatim as the final line of your response, e.g.`,
60
- `"Tests 12 passed | 1 failed (13)" or "5 passed, 0 failed in 0.4s".`,
61
- ].join('\n');
62
- }
63
- function runAgentOnce(spawnImpl, binary, prompt, timeoutMs, cwd) {
64
- // Delegate to the host-aware headless runner. Running in the project root
65
- // (`cwd`) so "re-run change X" resolves the change and its tests relative to
66
- // the right repo. On the (default) 'claude' harness this is byte-identical to
67
- // the previous `binary -p prompt` spawn, including the SIGTERM→SIGKILL timeout
68
- // escalation; `binary` is honored as the binary override.
69
- return runHeadlessAgent(prompt, {
70
- cwd,
71
- spawn: spawnImpl,
72
- binaryOverride: binary,
73
- timeoutMs,
74
- });
75
- }
76
- /**
77
- * Build a live {@link RunChangeFn} that spawns the agent to re-run a change
78
- * under an arm's templates and reports its {@link ReplayRunOutcome}. Throws
79
- * {@link ReplayRunError} when the agent is unavailable or its output has no
80
- * parseable test summary (never fabricates a pass rate).
81
- */
82
- export function makeReplayRunChange(opts) {
83
- const spawnImpl = opts.spawn ?? nodeSpawn;
84
- // Pass the override through unchanged (undefined → the 'claude' harness
85
- // derives the same default replayAgentBinary() value internally), so the
86
- // host-aware runner can select per-harness argv/stdin contracts.
87
- const binary = opts.binary;
88
- const healthSource = opts.healthSource ?? new StubMetricSource();
89
- const timeoutMs = opts.timeoutMs ?? 300000;
90
- return async (changeId, arm) => {
91
- const prompt = assembleReplayPrompt(changeId, arm);
92
- const result = await runAgentOnce(spawnImpl, binary, prompt, timeoutMs, opts.repoRoot);
93
- if (result.exitCode !== 0) {
94
- throw new ReplayRunError(`agent exited ${result.exitCode} for change "${changeId}" (${arm}): ${result.stderr.trim()}`);
95
- }
96
- const metrics = parseTestMetrics(result.stdout);
97
- if (metrics === null) {
98
- throw new ReplayRunError(`no parseable test summary for change "${changeId}" (${arm})`);
99
- }
100
- const healthPenalty = (await measureHealthPenalty(healthSource, opts.repoRoot)) ?? undefined;
101
- return { passRate: metrics.passRate, healthPenalty };
102
- };
103
- }
104
- /**
105
- * Score each candidate by replaying the corpus through baseline + candidate,
106
- * and APPEND one fitness record per change to the candidate's sidecar — the
107
- * step that makes {@link rankCandidatesForTarget} operate on real data.
108
- *
109
- * A candidate whose `runChange` throws (agent unavailable, unparseable output)
110
- * is skipped with its error captured; the rest still score. Promotion is NOT
111
- * performed here.
112
- */
113
- export async function scoreCandidatesByReplay(opts) {
114
- const scores = [];
115
- for (const candidateId of opts.candidateIds) {
116
- // Capture each change's candidate-arm outcome via a recording wrapper, so we
117
- // can append per-change fitness without re-running the agent.
118
- const candidateOutcomes = new Map();
119
- const recordingRunChange = async (changeId, arm) => {
120
- const outcome = await opts.runChange(changeId, arm);
121
- if (arm === 'candidate')
122
- candidateOutcomes.set(changeId, outcome);
123
- return outcome;
124
- };
125
- try {
126
- const result = await replayCompare({
127
- candidateId,
128
- changeIds: opts.changeIds,
129
- runChange: recordingRunChange,
130
- functionalWeight: opts.functionalWeight,
131
- healthWeight: opts.healthWeight,
132
- tolerance: opts.tolerance,
133
- });
134
- // Idempotency: the sidecar is append-only, so re-running the GA loop over
135
- // the same corpus must NOT duplicate records. Skip any sourceChange this
136
- // candidate already has a record for.
137
- const existing = await readCandidateFitness(opts.layout, candidateId);
138
- const seenChanges = new Set(existing.records.map((r) => r.sourceChange));
139
- let recordsAppended = 0;
140
- for (const [changeId, outcome] of candidateOutcomes) {
141
- if (seenChanges.has(changeId))
142
- continue;
143
- const loss = computePerChangeLoss({
144
- passRate: outcome.passRate,
145
- healthPenalty: outcome.healthPenalty,
146
- functionalWeight: opts.functionalWeight,
147
- healthWeight: opts.healthWeight,
148
- }).loss;
149
- await appendCandidateFitness(opts.layout, candidateId, {
150
- at: opts.at,
151
- sourceChange: changeId,
152
- passRate: outcome.passRate,
153
- healthPenalty: outcome.healthPenalty,
154
- loss,
155
- });
156
- seenChanges.add(changeId);
157
- recordsAppended += 1;
158
- }
159
- opts.log?.(`scored ${candidateId}: ${recordsAppended} record(s) appended`);
160
- scores.push({ candidateId, result, recordsAppended });
161
- }
162
- catch (err) {
163
- const message = err instanceof Error ? err.message : String(err);
164
- opts.log?.(`skipped ${candidateId}: ${message}`);
165
- scores.push({ candidateId, result: null, recordsAppended: 0, error: message });
166
- }
167
- }
168
- return scores;
169
- }
170
- //# sourceMappingURL=replay-runner.js.map
@@ -1,45 +0,0 @@
1
- export type ReplayArm = 'baseline' | 'candidate';
2
- export interface ReplayRunOutcome {
3
- /** Functional pass rate in [0,1] from re-running the change under the arm. */
4
- passRate: number;
5
- /** Optional code-health penalty in [0,1] (the health head). */
6
- healthPenalty?: number;
7
- }
8
- /** Re-run one change under one arm and report its outcome. Injected. */
9
- export type RunChangeFn = (changeId: string, arm: ReplayArm) => Promise<ReplayRunOutcome>;
10
- export interface ReplayPerChange {
11
- changeId: string;
12
- baselineLoss: number;
13
- candidateLoss: number;
14
- /** candidateLoss − baselineLoss; negative = candidate better on this change. */
15
- delta: number;
16
- }
17
- export interface ReplayResult {
18
- candidateId: string;
19
- perChange: ReplayPerChange[];
20
- baselineMeanLoss: number | null;
21
- candidateMeanLoss: number | null;
22
- /** candidateMeanLoss − baselineMeanLoss; negative = candidate better overall. */
23
- meanDelta: number | null;
24
- /** Candidate is non-inferior: meanDelta ≤ tolerance. */
25
- candidateNonInferior: boolean;
26
- /** Candidate is strictly better: meanDelta < −tolerance. */
27
- candidateBetter: boolean;
28
- }
29
- export interface ReplayOptions {
30
- candidateId: string;
31
- changeIds: string[];
32
- runChange: RunChangeFn;
33
- /** Loss weights forwarded to computePerChangeLoss. */
34
- functionalWeight?: number;
35
- healthWeight?: number;
36
- /** Non-inferiority tolerance on mean loss (default 0). */
37
- tolerance?: number;
38
- }
39
- /**
40
- * Replay each change through baseline + candidate, score both with the per-change
41
- * loss, and decide whether the candidate is non-inferior / strictly better.
42
- * Promotion remains human-gated — this only produces the comparison evidence.
43
- */
44
- export declare function replayCompare(opts: ReplayOptions): Promise<ReplayResult>;
45
- //# sourceMappingURL=replay.d.ts.map
@@ -1,56 +0,0 @@
1
- /**
2
- * Optional REPLAY comparison (P4).
3
- *
4
- * Ranks a candidate template-variant against the baseline by re-running a corpus
5
- * of recent real changes through BOTH and comparing the per-change loss. This is
6
- * the fast, apples-to-apples GA comparison the plan reserves for a pre-promotion
7
- * confidence check (the default is online fitness accumulation; see
8
- * candidate-fitness.ts). The actual pipeline re-run (apply → gen-test → run-test
9
- * under a variant's templates) is INJECTED as `runChange` — in production a
10
- * caller shells out to the built CLI; this module owns only the comparison /
11
- * aggregation logic so it stays pure and testable. See
12
- * todo/learn-self-evolution-migration-plan.md (P4).
13
- */
14
- import { computePerChangeLoss } from '../fitness/loss.js';
15
- function mean(xs) {
16
- return xs.length === 0 ? null : xs.reduce((a, b) => a + b, 0) / xs.length;
17
- }
18
- /**
19
- * Replay each change through baseline + candidate, score both with the per-change
20
- * loss, and decide whether the candidate is non-inferior / strictly better.
21
- * Promotion remains human-gated — this only produces the comparison evidence.
22
- */
23
- export async function replayCompare(opts) {
24
- const tol = opts.tolerance ?? 0;
25
- const lossOf = (o) => computePerChangeLoss({
26
- passRate: o.passRate,
27
- healthPenalty: o.healthPenalty,
28
- functionalWeight: opts.functionalWeight,
29
- healthWeight: opts.healthWeight,
30
- }).loss;
31
- const perChange = [];
32
- for (const changeId of opts.changeIds) {
33
- const [base, cand] = await Promise.all([
34
- opts.runChange(changeId, 'baseline'),
35
- opts.runChange(changeId, 'candidate'),
36
- ]);
37
- const baselineLoss = lossOf(base);
38
- const candidateLoss = lossOf(cand);
39
- perChange.push({ changeId, baselineLoss, candidateLoss, delta: candidateLoss - baselineLoss });
40
- }
41
- const baselineMeanLoss = mean(perChange.map((p) => p.baselineLoss));
42
- const candidateMeanLoss = mean(perChange.map((p) => p.candidateLoss));
43
- const meanDelta = baselineMeanLoss === null || candidateMeanLoss === null
44
- ? null
45
- : candidateMeanLoss - baselineMeanLoss;
46
- return {
47
- candidateId: opts.candidateId,
48
- perChange,
49
- baselineMeanLoss,
50
- candidateMeanLoss,
51
- meanDelta,
52
- candidateNonInferior: meanDelta !== null && meanDelta <= tol,
53
- candidateBetter: meanDelta !== null && meanDelta < -tol,
54
- };
55
- }
56
- //# sourceMappingURL=replay.js.map
@@ -1,62 +0,0 @@
1
- export declare const TEMPLATE_VARIANT_SCHEMA_VERSION: 1;
2
- export interface TemplateVariantObservation {
3
- changeName?: string;
4
- archivedAt?: string;
5
- reworkCount?: number;
6
- taskCompletionRatio?: number;
7
- taskQualityScore?: number;
8
- alignmentScore?: number;
9
- notes?: string;
10
- }
11
- export interface TemplateVariantDefinition {
12
- id: string;
13
- schema: string;
14
- artifact: string;
15
- templatePath: string;
16
- status?: 'active' | 'retired';
17
- weight?: number;
18
- observations?: TemplateVariantObservation[];
19
- }
20
- export interface TemplateVariantManifest {
21
- schemaVersion: typeof TEMPLATE_VARIANT_SCHEMA_VERSION;
22
- variants: TemplateVariantDefinition[];
23
- }
24
- export interface TemplateVariantSelection {
25
- id: string;
26
- schema: string;
27
- artifact: string;
28
- templatePath: string;
29
- score: number;
30
- observationCount: number;
31
- source: 'project-variant' | 'built-in';
32
- reason: string;
33
- }
34
- export interface LoadedTemplateWithVariant {
35
- content: string;
36
- selection: TemplateVariantSelection;
37
- }
38
- export declare function templateVariantManifestPath(projectRoot: string): string;
39
- export declare function readTemplateVariantManifest(projectRoot: string): TemplateVariantManifest;
40
- export declare function writeTemplateVariantManifest(projectRoot: string, manifest: TemplateVariantManifest): void;
41
- export declare function scoreTemplateVariant(variant: TemplateVariantDefinition): number;
42
- export declare function selectTemplateVariant(args: {
43
- projectRoot: string;
44
- schemaName: string;
45
- artifactId: string;
46
- }): TemplateVariantSelection | null;
47
- export declare function loadTemplateWithVariant(args: {
48
- schemaName: string;
49
- artifactId: string;
50
- templatePath: string;
51
- projectRoot: string;
52
- loadBuiltIn: (schemaName: string, templatePath: string, projectRoot?: string) => string;
53
- }): LoadedTemplateWithVariant;
54
- export declare function recordTemplateVariantObservation(args: {
55
- projectRoot: string;
56
- schemaName: string;
57
- artifactId: string;
58
- changeName: string;
59
- observation: Omit<TemplateVariantObservation, 'changeName' | 'archivedAt'> & Partial<Pick<TemplateVariantObservation, 'archivedAt'>>;
60
- }): boolean;
61
- export declare function renderTemplateVariantContext(selection: TemplateVariantSelection): string | null;
62
- //# sourceMappingURL=template-variants.d.ts.map
@@ -1,171 +0,0 @@
1
- import * as fs from 'node:fs';
2
- import * as path from 'node:path';
3
- import { clamp01, readTextIfExists, safeJoinInside } from './shared.js';
4
- export const TEMPLATE_VARIANT_SCHEMA_VERSION = 1;
5
- export function templateVariantManifestPath(projectRoot) {
6
- return path.join(projectRoot, '.synergyspec-selfevolving', 'self-evolution', 'template-variants.json');
7
- }
8
- export function readTemplateVariantManifest(projectRoot) {
9
- const manifestPath = templateVariantManifestPath(projectRoot);
10
- const raw = readTextIfExists(manifestPath);
11
- if (!raw) {
12
- return { schemaVersion: TEMPLATE_VARIANT_SCHEMA_VERSION, variants: [] };
13
- }
14
- try {
15
- const parsed = JSON.parse(raw);
16
- return {
17
- schemaVersion: TEMPLATE_VARIANT_SCHEMA_VERSION,
18
- variants: Array.isArray(parsed.variants) ? parsed.variants.filter(isVariant) : [],
19
- };
20
- }
21
- catch {
22
- return { schemaVersion: TEMPLATE_VARIANT_SCHEMA_VERSION, variants: [] };
23
- }
24
- }
25
- export function writeTemplateVariantManifest(projectRoot, manifest) {
26
- const manifestPath = templateVariantManifestPath(projectRoot);
27
- fs.mkdirSync(path.dirname(manifestPath), { recursive: true });
28
- fs.writeFileSync(manifestPath, `${JSON.stringify({
29
- schemaVersion: TEMPLATE_VARIANT_SCHEMA_VERSION,
30
- variants: manifest.variants,
31
- }, null, 2)}\n`, 'utf-8');
32
- }
33
- export function scoreTemplateVariant(variant) {
34
- const observations = variant.observations ?? [];
35
- if (observations.length === 0) {
36
- return clamp01(variant.weight ?? 0.5);
37
- }
38
- const scores = observations.map((obs) => {
39
- const alignment = obs.alignmentScore ?? 0.5;
40
- const taskQuality = obs.taskQualityScore ?? 0.5;
41
- const completion = obs.taskCompletionRatio ?? 0.5;
42
- const reworkPenalty = Math.min(0.3, Math.max(0, obs.reworkCount ?? 0) * 0.05);
43
- return clamp01(alignment * 0.45 + taskQuality * 0.25 + completion * 0.2 + 0.1 - reworkPenalty);
44
- });
45
- const weighted = scores
46
- .map((score, index) => ({
47
- score,
48
- archivedAtMs: Date.parse(observations[index].archivedAt ?? ''),
49
- }))
50
- .sort((a, b) => {
51
- const aTime = Number.isFinite(a.archivedAtMs) ? a.archivedAtMs : 0;
52
- const bTime = Number.isFinite(b.archivedAtMs) ? b.archivedAtMs : 0;
53
- return aTime - bTime;
54
- });
55
- const totalWeight = weighted.reduce((sum, _item, index) => sum + 1 + index / weighted.length, 0);
56
- const avg = weighted.reduce((sum, item, index) => sum + item.score * (1 + index / weighted.length), 0) / totalWeight;
57
- return clamp01(avg + (variant.weight ?? 0) * 0.05);
58
- }
59
- export function selectTemplateVariant(args) {
60
- const manifest = readTemplateVariantManifest(args.projectRoot);
61
- const candidates = manifest.variants
62
- .filter((variant) => variant.status !== 'retired' &&
63
- variant.schema === args.schemaName &&
64
- variant.artifact === args.artifactId)
65
- .map((variant) => ({ variant, score: scoreTemplateVariant(variant) }))
66
- .filter(({ variant }) => {
67
- const fullPath = safeJoinInside(args.projectRoot, variant.templatePath);
68
- return fullPath !== null && fs.existsSync(fullPath);
69
- });
70
- if (candidates.length === 0)
71
- return null;
72
- candidates.sort((a, b) => {
73
- if (b.score !== a.score)
74
- return b.score - a.score;
75
- const bLatest = latestArchivedAtMs(b.variant);
76
- const aLatest = latestArchivedAtMs(a.variant);
77
- if (bLatest !== aLatest)
78
- return bLatest - aLatest;
79
- return a.variant.id.localeCompare(b.variant.id);
80
- });
81
- const winner = candidates[0];
82
- return {
83
- id: winner.variant.id,
84
- schema: winner.variant.schema,
85
- artifact: winner.variant.artifact,
86
- templatePath: winner.variant.templatePath,
87
- score: winner.score,
88
- observationCount: winner.variant.observations?.length ?? 0,
89
- source: 'project-variant',
90
- reason: winner.variant.observations && winner.variant.observations.length > 0
91
- ? 'highest observed lifecycle score'
92
- : 'highest configured prior weight',
93
- };
94
- }
95
- export function loadTemplateWithVariant(args) {
96
- const selected = selectTemplateVariant({
97
- projectRoot: args.projectRoot,
98
- schemaName: args.schemaName,
99
- artifactId: args.artifactId,
100
- });
101
- if (selected) {
102
- const fullPath = safeJoinInside(args.projectRoot, selected.templatePath);
103
- if (fullPath) {
104
- const content = readTextIfExists(fullPath);
105
- if (content !== null) {
106
- return { content, selection: selected };
107
- }
108
- }
109
- }
110
- return {
111
- content: args.loadBuiltIn(args.schemaName, args.templatePath, args.projectRoot),
112
- selection: {
113
- id: 'built-in',
114
- schema: args.schemaName,
115
- artifact: args.artifactId,
116
- templatePath: args.templatePath,
117
- score: 0.5,
118
- observationCount: 0,
119
- source: 'built-in',
120
- reason: 'no active project template variant matched',
121
- },
122
- };
123
- }
124
- export function recordTemplateVariantObservation(args) {
125
- const manifest = readTemplateVariantManifest(args.projectRoot);
126
- const selection = selectTemplateVariant({
127
- projectRoot: args.projectRoot,
128
- schemaName: args.schemaName,
129
- artifactId: args.artifactId,
130
- });
131
- if (!selection || selection.source !== 'project-variant')
132
- return false;
133
- const variant = manifest.variants.find((v) => v.id === selection.id);
134
- if (!variant)
135
- return false;
136
- const observations = variant.observations ?? [];
137
- observations.push({
138
- ...args.observation,
139
- changeName: args.changeName,
140
- archivedAt: args.observation.archivedAt ?? new Date().toISOString(),
141
- });
142
- variant.observations = observations.slice(-50);
143
- writeTemplateVariantManifest(args.projectRoot, manifest);
144
- return true;
145
- }
146
- export function renderTemplateVariantContext(selection) {
147
- if (selection.source !== 'project-variant')
148
- return null;
149
- return [
150
- `Template variant selected: ${selection.id}`,
151
- `Artifact: ${selection.schema}/${selection.artifact}`,
152
- `Score: ${selection.score.toFixed(2)} from ${selection.observationCount} lifecycle observation(s)`,
153
- `Reason: ${selection.reason}`,
154
- `Variant template: ${selection.templatePath}`,
155
- ].join('\n');
156
- }
157
- function isVariant(value) {
158
- if (!value || typeof value !== 'object')
159
- return false;
160
- const v = value;
161
- return (typeof v.id === 'string' &&
162
- typeof v.schema === 'string' &&
163
- typeof v.artifact === 'string' &&
164
- typeof v.templatePath === 'string');
165
- }
166
- function latestArchivedAtMs(variant) {
167
- return Math.max(...(variant.observations ?? [])
168
- .map((obs) => Date.parse(obs.archivedAt ?? ''))
169
- .filter((n) => Number.isFinite(n)), 0);
170
- }
171
- //# sourceMappingURL=template-variants.js.map
@@ -1,65 +0,0 @@
1
- import { type CandidateRepoLayout } from './candidates.js';
2
- /** One prior candidate, condensed into the signals the proposer learns from. */
3
- export interface TrajectoryEntry {
4
- candidateId: string;
5
- /** Accumulated mean loss in [0,1]; null when the candidate is unproven. */
6
- meanLoss: number | null;
7
- /** Accumulated mean functional pass rate in [0,1]; null when unproven. */
8
- meanPassRate: number | null;
9
- /** Number of fitness records backing the means. */
10
- count: number;
11
- /** Loss trend across the record history. */
12
- trend: string;
13
- /** Disposition label ({@link verdictLabel}): promoted / rejected / unproven / … */
14
- verdict: string;
15
- /** Verdict reason, trimmed and truncated to ≤120 chars; omitted when empty. */
16
- reason?: string;
17
- /**
18
- * First non-heading, non-empty line of `rationale.md`, truncated to ≤160
19
- * chars; `''` when the candidate has no usable rationale line.
20
- */
21
- rationaleExcerpt: string;
22
- }
23
- export interface BuildTrajectoryOptions {
24
- /** Cap on the number of entries returned. Default 6. */
25
- maxEntries?: number;
26
- /**
27
- * Candidate ids to never include — typically the sibling variants created in
28
- * the SAME run as the proposal this trajectory feeds (they have no outcome
29
- * yet, and showing a candidate its own siblings would be circular).
30
- */
31
- excludeCandidateIds?: string[];
32
- }
33
- export interface RenderTrajectoryOptions {
34
- /** The loss of the currently-promoted baseline this proposal must beat. */
35
- baselineLoss?: number | null;
36
- /** Id of the baseline candidate, for attribution in the rendered block. */
37
- baselineCandidateId?: string;
38
- }
39
- /**
40
- * Build the optimization trajectory for `targetId`: a list of the most
41
- * informative prior candidates that touched the target.
42
- *
43
- * Selection (cap = `maxEntries`, default 6):
44
- * 1. Read every candidate listCandidates returns for the target (createdAt
45
- * DESC), skipping any id in `excludeCandidateIds`.
46
- * 2. Drop pure placeholders — a candidate with NO edits.json, NO fitness
47
- * records, and NO verdict carries no signal.
48
- * 3. Partition the survivors into PROVEN (meanLoss !== null) and
49
- * JUDGED-UNPROVEN (the rest: they survived because they have a verdict
50
- * and/or edits). Sort proven by meanLoss ASC (best first; ties broken by
51
- * candidateId ASC). Take proven first, then fill the remaining slots with
52
- * unproven in listCandidates order (createdAt DESC).
53
- *
54
- * Returned order: proven entries best-first (lowest meanLoss), then unproven.
55
- * {@link renderTrajectoryBlock} re-sorts for display, so this order exists only
56
- * to be stable and testable.
57
- */
58
- export declare function buildOptimizationTrajectory(layout: CandidateRepoLayout, targetId: string, opts?: BuildTrajectoryOptions): Promise<TrajectoryEntry[]>;
59
- /**
60
- * Render the trajectory entries into a prompt block (OPRO ordering: worst
61
- * first, BEST LAST — the lowest-loss entry sits closest to the generation
62
- * point). Returns `''` for an empty list so callers can omit the section.
63
- */
64
- export declare function renderTrajectoryBlock(entries: TrajectoryEntry[], opts?: RenderTrajectoryOptions): string;
65
- //# sourceMappingURL=trajectory.d.ts.map