synergyspec-selfevolving 1.1.10 → 1.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -3
- package/dist/commands/learn.js +78 -11
- package/dist/commands/self-evolution.d.ts +13 -0
- package/dist/commands/self-evolution.js +156 -20
- package/dist/commands/workflow/status.js +13 -0
- package/dist/core/change-readiness.d.ts +24 -0
- package/dist/core/change-readiness.js +47 -0
- package/dist/core/config-prompts.js +10 -0
- package/dist/core/fitness/health/local-source.d.ts +9 -6
- package/dist/core/fitness/health/local-source.js +9 -6
- package/dist/core/fitness/health/resolve-source.d.ts +4 -3
- package/dist/core/fitness/health/resolve-source.js +5 -4
- package/dist/core/fitness/sample.d.ts +17 -0
- package/dist/core/learn.d.ts +7 -0
- package/dist/core/learn.js +57 -5
- package/dist/core/project-config.d.ts +1 -0
- package/dist/core/project-config.js +11 -8
- package/dist/core/self-evolution/health-baseline.d.ts +24 -0
- package/dist/core/self-evolution/health-baseline.js +78 -0
- package/dist/core/self-evolution/index.d.ts +1 -0
- package/dist/core/self-evolution/index.js +1 -0
- package/dist/core/self-evolution/learn-observation-adapter.d.ts +16 -1
- package/dist/core/self-evolution/learn-observation-adapter.js +101 -15
- package/dist/core/self-evolution/promote.d.ts +25 -0
- package/dist/core/self-evolution/promote.js +21 -0
- package/dist/core/self-evolution/target-evolution.d.ts +7 -0
- package/dist/core/self-evolution/target-evolution.js +9 -0
- package/dist/core/templates/workflows/learn.js +10 -5
- package/package.json +2 -1
- package/scripts/code-health.py +1154 -0
|
@@ -45,6 +45,7 @@ export async function getChangeReadiness(projectRoot, changeName, schemaName) {
|
|
|
45
45
|
const artifactStatus = deriveArtifactWorkflowStatus(artifactGraph);
|
|
46
46
|
const taskReadiness = await readTaskReadiness(context.changeDir);
|
|
47
47
|
const evidence = await readEvidenceReadiness(context.changeDir);
|
|
48
|
+
const evolution = await readEvolutionOutcome(context.changeDir);
|
|
48
49
|
const status = deriveChangeReadinessStatus(artifactStatus, taskReadiness.total, taskReadiness.completed);
|
|
49
50
|
return {
|
|
50
51
|
changeName,
|
|
@@ -57,6 +58,10 @@ export async function getChangeReadiness(projectRoot, changeName, schemaName) {
|
|
|
57
58
|
totalTasks: taskReadiness.total,
|
|
58
59
|
incompleteTasks: taskReadiness.incomplete,
|
|
59
60
|
evidence,
|
|
61
|
+
evolution,
|
|
62
|
+
// ANNOTATE, do not GATE: a refused/failed self-evolution is surfaced via
|
|
63
|
+
// `evolution` but never blocks archiving a finished change (change completion
|
|
64
|
+
// and tool self-evolution are orthogonal).
|
|
60
65
|
isArchiveReady: artifactStatus === 'complete' &&
|
|
61
66
|
taskReadiness.status === 'complete' &&
|
|
62
67
|
evidence.missing.length === 0,
|
|
@@ -73,6 +78,7 @@ export function toReadinessJson(readiness) {
|
|
|
73
78
|
totalTasks: readiness.totalTasks,
|
|
74
79
|
incompleteTasks: readiness.incompleteTasks,
|
|
75
80
|
evidence: readiness.evidence,
|
|
81
|
+
evolution: readiness.evolution,
|
|
76
82
|
isArchiveReady: readiness.isArchiveReady,
|
|
77
83
|
};
|
|
78
84
|
}
|
|
@@ -134,6 +140,47 @@ async function readEvidenceReadiness(changeDir) {
|
|
|
134
140
|
missing,
|
|
135
141
|
};
|
|
136
142
|
}
|
|
143
|
+
/**
|
|
144
|
+
* Read the CLI-written evolution outcome for the change, if any. Defensive: any
|
|
145
|
+
* missing file / parse error / unknown outcome degrades to `'not-run'` (forward
|
|
146
|
+
* compatible and never throws), so `status` can always render an Evolution line.
|
|
147
|
+
*/
|
|
148
|
+
async function readEvolutionOutcome(changeDir) {
|
|
149
|
+
const notRun = { status: 'not-run', promoted: false, promotedFiles: [] };
|
|
150
|
+
let raw;
|
|
151
|
+
try {
|
|
152
|
+
raw = await fs.readFile(path.join(changeDir, 'evolution-result.json'), 'utf-8');
|
|
153
|
+
}
|
|
154
|
+
catch {
|
|
155
|
+
return notRun;
|
|
156
|
+
}
|
|
157
|
+
try {
|
|
158
|
+
const record = JSON.parse(raw);
|
|
159
|
+
const outcome = typeof record.outcome === 'string' ? record.outcome : '';
|
|
160
|
+
const status = outcome === 'promoted'
|
|
161
|
+
? 'promoted'
|
|
162
|
+
: outcome.startsWith('refused-')
|
|
163
|
+
? 'refused'
|
|
164
|
+
: outcome.startsWith('error-')
|
|
165
|
+
? 'error'
|
|
166
|
+
: 'not-run';
|
|
167
|
+
if (status === 'not-run')
|
|
168
|
+
return notRun;
|
|
169
|
+
return {
|
|
170
|
+
status,
|
|
171
|
+
reason: typeof record.reason === 'string' ? record.reason : undefined,
|
|
172
|
+
targetId: typeof record.targetId === 'string' ? record.targetId : undefined,
|
|
173
|
+
promoted: record.promoted === true,
|
|
174
|
+
promotedFiles: Array.isArray(record.promotedFiles)
|
|
175
|
+
? record.promotedFiles.filter((file) => typeof file === 'string')
|
|
176
|
+
: [],
|
|
177
|
+
timestamp: typeof record.timestamp === 'string' ? record.timestamp : undefined,
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
catch {
|
|
181
|
+
return notRun;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
137
184
|
async function testReportRequiresPlan(testReportPath) {
|
|
138
185
|
try {
|
|
139
186
|
const content = await fs.readFile(testReportPath, 'utf-8');
|
|
@@ -29,6 +29,16 @@ export function serializeConfig(config) {
|
|
|
29
29
|
lines.push('# - Always include a "Non-goals" section');
|
|
30
30
|
lines.push('# tasks:');
|
|
31
31
|
lines.push('# - Break tasks into chunks of max 2 hours');
|
|
32
|
+
lines.push('');
|
|
33
|
+
// Code-health scoring for self-evolution (default-on). Feeds the 0.3·health
|
|
34
|
+
// half of the per-change fitness loss and gates auto-promotion on a measured
|
|
35
|
+
// health regression. `local` is a dependency-free, multi-language analyzer
|
|
36
|
+
// (Python, Rust, C, C++) bundled with the tool — it needs Python 3 to run, but
|
|
37
|
+
// no server and no network. Set `source: stub` to turn health scoring off.
|
|
38
|
+
lines.push('# Code-health scoring for self-evolution (Python, Rust, C, C++).');
|
|
39
|
+
lines.push('# Set source: stub to disable. local-python is an alias for local.');
|
|
40
|
+
lines.push('health:');
|
|
41
|
+
lines.push(' source: local');
|
|
32
42
|
return lines.join('\n') + '\n';
|
|
33
43
|
}
|
|
34
44
|
//# sourceMappingURL=config-prompts.js.map
|
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Self-contained local
|
|
2
|
+
* Self-contained local code-health metric source (config token `local`, with
|
|
3
|
+
* `local-python` accepted as a back-compat alias).
|
|
3
4
|
*
|
|
4
|
-
* This is the
|
|
5
|
-
*
|
|
6
|
-
* (Python 3 stdlib only
|
|
7
|
-
* server, no network, and no
|
|
5
|
+
* This is the default {@link MetricSource} for the self-evolution fitness loop:
|
|
6
|
+
* it shells out to the dependency-free, multi-language `scripts/code-health.py`
|
|
7
|
+
* analyzer (Python 3 stdlib only; reads Python, Rust, C, and C++ source) so
|
|
8
|
+
* health metrics can be computed with no SonarQube server, no network, and no
|
|
9
|
+
* third-party packages. Python 3 must be installed to RUN the analyzer, but the
|
|
10
|
+
* code it measures can be any of the supported languages.
|
|
8
11
|
*
|
|
9
12
|
* Degrades gracefully: if Python is not installed, the spawn errors, or the
|
|
10
13
|
* analyzer emits something that is not the expected JSON, `measure()` resolves
|
|
@@ -28,7 +31,7 @@ export interface LocalPythonMetricSourceOptions {
|
|
|
28
31
|
scriptPath?: string;
|
|
29
32
|
}
|
|
30
33
|
export declare class LocalPythonMetricSource implements MetricSource {
|
|
31
|
-
readonly name = "local
|
|
34
|
+
readonly name = "local";
|
|
32
35
|
private readonly pythonBin;
|
|
33
36
|
private readonly spawnImpl;
|
|
34
37
|
private readonly scriptPath;
|
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Self-contained local
|
|
2
|
+
* Self-contained local code-health metric source (config token `local`, with
|
|
3
|
+
* `local-python` accepted as a back-compat alias).
|
|
3
4
|
*
|
|
4
|
-
* This is the
|
|
5
|
-
*
|
|
6
|
-
* (Python 3 stdlib only
|
|
7
|
-
* server, no network, and no
|
|
5
|
+
* This is the default {@link MetricSource} for the self-evolution fitness loop:
|
|
6
|
+
* it shells out to the dependency-free, multi-language `scripts/code-health.py`
|
|
7
|
+
* analyzer (Python 3 stdlib only; reads Python, Rust, C, and C++ source) so
|
|
8
|
+
* health metrics can be computed with no SonarQube server, no network, and no
|
|
9
|
+
* third-party packages. Python 3 must be installed to RUN the analyzer, but the
|
|
10
|
+
* code it measures can be any of the supported languages.
|
|
8
11
|
*
|
|
9
12
|
* Degrades gracefully: if Python is not installed, the spawn errors, or the
|
|
10
13
|
* analyzer emits something that is not the expected JSON, `measure()` resolves
|
|
@@ -85,7 +88,7 @@ function toRawHealthMetrics(parsed) {
|
|
|
85
88
|
};
|
|
86
89
|
}
|
|
87
90
|
export class LocalPythonMetricSource {
|
|
88
|
-
name = 'local
|
|
91
|
+
name = 'local';
|
|
89
92
|
pythonBin;
|
|
90
93
|
spawnImpl;
|
|
91
94
|
scriptPath;
|
|
@@ -20,9 +20,10 @@ import type { MetricSource } from './metric-source.js';
|
|
|
20
20
|
/**
|
|
21
21
|
* Build the {@link MetricSource} selected by `config.health.source`.
|
|
22
22
|
*
|
|
23
|
-
* - absent / `stub`
|
|
24
|
-
* - `local-python`
|
|
25
|
-
* `scripts/code-health.py`; needs Python 3 but no
|
|
23
|
+
* - absent / `stub` → {@link StubMetricSource} (no signal).
|
|
24
|
+
* - `local` / `local-python` → {@link LocalPythonMetricSource} (shells out to
|
|
25
|
+
* the bundled multi-language `scripts/code-health.py`; needs Python 3 but no
|
|
26
|
+
* network/server). `local-python` is a back-compat alias for `local`.
|
|
26
27
|
* - `sonarqube` → {@link SonarQubeMetricSource} when `sonarUrl`,
|
|
27
28
|
* `sonarToken`, and `sonarProjectKey` are all present; otherwise falls back
|
|
28
29
|
* to the stub (a misconfigured Sonar block must not silently fabricate a
|
|
@@ -3,9 +3,10 @@ import { LocalPythonMetricSource } from './local-source.js';
|
|
|
3
3
|
/**
|
|
4
4
|
* Build the {@link MetricSource} selected by `config.health.source`.
|
|
5
5
|
*
|
|
6
|
-
* - absent / `stub`
|
|
7
|
-
* - `local-python`
|
|
8
|
-
* `scripts/code-health.py`; needs Python 3 but no
|
|
6
|
+
* - absent / `stub` → {@link StubMetricSource} (no signal).
|
|
7
|
+
* - `local` / `local-python` → {@link LocalPythonMetricSource} (shells out to
|
|
8
|
+
* the bundled multi-language `scripts/code-health.py`; needs Python 3 but no
|
|
9
|
+
* network/server). `local-python` is a back-compat alias for `local`.
|
|
9
10
|
* - `sonarqube` → {@link SonarQubeMetricSource} when `sonarUrl`,
|
|
10
11
|
* `sonarToken`, and `sonarProjectKey` are all present; otherwise falls back
|
|
11
12
|
* to the stub (a misconfigured Sonar block must not silently fabricate a
|
|
@@ -18,7 +19,7 @@ export function resolveMetricSource(config) {
|
|
|
18
19
|
if (!health || health.source === 'stub') {
|
|
19
20
|
return new StubMetricSource();
|
|
20
21
|
}
|
|
21
|
-
if (health.source === 'local-python') {
|
|
22
|
+
if (health.source === 'local' || health.source === 'local-python') {
|
|
22
23
|
return new LocalPythonMetricSource({ pythonBin: health.pythonBin });
|
|
23
24
|
}
|
|
24
25
|
if (health.source === 'sonarqube') {
|
|
@@ -19,6 +19,23 @@ export interface FitnessSample {
|
|
|
19
19
|
* same as all-tests-failing, so we record "no signal" rather than loss = 1.
|
|
20
20
|
*/
|
|
21
21
|
loss: PerChangeLoss | null;
|
|
22
|
+
/**
|
|
23
|
+
* The RAW code-health penalty in [0,1] from the active {@link MetricSource},
|
|
24
|
+
* or `null` when there was NO health signal (stub source, analyzer missing/
|
|
25
|
+
* failed, or no measurable source files). This is deliberately distinct from
|
|
26
|
+
* `loss.healthPenalty`, which is the `?? 0`-defaulted value folded into the
|
|
27
|
+
* loss and therefore cannot distinguish "measured a healthy 0" from "no
|
|
28
|
+
* signal". The default-path health gate keys off THIS field: `null` ⇒ no gate.
|
|
29
|
+
* Omitted entirely when no metric source was consulted (no test-report), so
|
|
30
|
+
* the authored-artifact-only path stays byte-identical.
|
|
31
|
+
*/
|
|
32
|
+
healthSignal?: number | null;
|
|
33
|
+
/**
|
|
34
|
+
* Name of the active metric source ('stub' | 'local' | 'sonarqube'); used for
|
|
35
|
+
* logs and the "configured but no signal" observation. Omitted when no source
|
|
36
|
+
* was consulted.
|
|
37
|
+
*/
|
|
38
|
+
healthSource?: string;
|
|
22
39
|
/**
|
|
23
40
|
* Ground-truth facts distilled from the agent's ACTUAL trajectory (which
|
|
24
41
|
* harness, whether a test runner was really observed running, the observed
|
package/dist/core/learn.d.ts
CHANGED
|
@@ -86,6 +86,13 @@ export interface LearnObservation {
|
|
|
86
86
|
}>;
|
|
87
87
|
/** Raw tags forwarded from the source signal; the adapter may interpret them. */
|
|
88
88
|
tags: string[];
|
|
89
|
+
/**
|
|
90
|
+
* Optional triage axis. Omitted (the default) for the neutral reflection signals
|
|
91
|
+
* — keeps their JSON byte-identical. `'defect'` marks an actionable tool defect
|
|
92
|
+
* the agent must SURFACE (e.g. an unresolved evolution target), distinct from a
|
|
93
|
+
* safe gate refusal.
|
|
94
|
+
*/
|
|
95
|
+
severity?: 'info' | 'action' | 'defect';
|
|
89
96
|
}
|
|
90
97
|
export interface LearnReport {
|
|
91
98
|
changeName: string;
|
package/dist/core/learn.js
CHANGED
|
@@ -50,8 +50,10 @@ export async function generateLearnReport(args = {}) {
|
|
|
50
50
|
const testReport = artifacts.evidence.find((f) => /(?:^|[\\/])(?:test-report|run-tests?-report|ci-report)\.md$/i.test(f.relativePath));
|
|
51
51
|
const testMetrics = testReport ? parseTestMetrics(testReport.content) : null;
|
|
52
52
|
let healthPenalty;
|
|
53
|
+
let healthSourceName;
|
|
53
54
|
if (testMetrics) {
|
|
54
55
|
const metricSource = resolveMetricSource(readProjectConfig(projectRoot));
|
|
56
|
+
healthSourceName = metricSource.name;
|
|
55
57
|
healthPenalty = (await measureHealthPenalty(metricSource, projectRoot)) ?? undefined;
|
|
56
58
|
}
|
|
57
59
|
// Ground truth: distil the agent's ACTUAL trajectory — main thread AND the
|
|
@@ -83,6 +85,15 @@ export async function generateLearnReport(args = {}) {
|
|
|
83
85
|
verified: passRateVerified,
|
|
84
86
|
})
|
|
85
87
|
: null,
|
|
88
|
+
// Record the raw health signal + source ONLY when a NON-STUB source was
|
|
89
|
+
// consulted (i.e. health is actually configured). `healthSignal` is the
|
|
90
|
+
// measured penalty or null ("no signal"); the default-path health gate reads
|
|
91
|
+
// it to tell apart a healthy 0 from an absent measurement. Omitted for the
|
|
92
|
+
// stub source and the artifact-only path so both stay byte-identical to the
|
|
93
|
+
// functional-only baseline.
|
|
94
|
+
...(healthSourceName && healthSourceName !== 'stub'
|
|
95
|
+
? { healthSignal: healthPenalty ?? null, healthSource: healthSourceName }
|
|
96
|
+
: {}),
|
|
86
97
|
...(trajectoryFacts ? { trajectoryFacts } : {}),
|
|
87
98
|
};
|
|
88
99
|
const reuse = inferReuseConclusions(summary, artifacts);
|
|
@@ -145,6 +156,27 @@ export async function generateLearnReport(args = {}) {
|
|
|
145
156
|
});
|
|
146
157
|
}
|
|
147
158
|
}
|
|
159
|
+
// Health head is CONFIGURED (a non-stub source was selected) but produced NO
|
|
160
|
+
// signal: surface it loudly rather than letting the health half of the loss
|
|
161
|
+
// silently default to 0. Default-on health must never fail invisibly — this
|
|
162
|
+
// is the same anti-opacity principle as the trajectory observations above.
|
|
163
|
+
// Operator-actionable (annotate, not gate): NOT a disqualifying code.
|
|
164
|
+
if (healthSourceName && healthSourceName !== 'stub' && healthPenalty === undefined) {
|
|
165
|
+
observations.push({
|
|
166
|
+
code: 'health-signal-unavailable',
|
|
167
|
+
summary: `code-health source '${healthSourceName}' is configured but produced no signal ` +
|
|
168
|
+
`(Python 3 missing, the analyzer failed, or there were no measurable source files); ` +
|
|
169
|
+
`the health half of the per-change loss defaulted to 0.`,
|
|
170
|
+
evidence: [
|
|
171
|
+
{
|
|
172
|
+
file: 'synergyspec-selfevolving/config.yaml',
|
|
173
|
+
detail: `health.source: ${healthSourceName}`,
|
|
174
|
+
},
|
|
175
|
+
],
|
|
176
|
+
tags: ['health', 'no-signal', 'action-required'],
|
|
177
|
+
severity: 'action',
|
|
178
|
+
});
|
|
179
|
+
}
|
|
148
180
|
return {
|
|
149
181
|
...reportSoFar,
|
|
150
182
|
observations,
|
|
@@ -361,7 +393,12 @@ export function renderLearnReport(report, applied) {
|
|
|
361
393
|
}
|
|
362
394
|
else {
|
|
363
395
|
for (const observation of report.observations) {
|
|
364
|
-
|
|
396
|
+
const marker = observation.severity === 'defect'
|
|
397
|
+
? 'DEFECT: '
|
|
398
|
+
: observation.severity === 'action'
|
|
399
|
+
? 'ACTION: '
|
|
400
|
+
: '';
|
|
401
|
+
lines.push(`- ${marker}[${observation.code}] ${observation.summary}`);
|
|
365
402
|
const firstEvidence = observation.evidence[0];
|
|
366
403
|
if (firstEvidence) {
|
|
367
404
|
lines.push(` evidence: ${firstEvidence.file}`);
|
|
@@ -1396,7 +1433,9 @@ function splitTableCells(line) {
|
|
|
1396
1433
|
.map((c) => c.trim().replace(/\*\*/g, '').replace(/`/g, ''));
|
|
1397
1434
|
}
|
|
1398
1435
|
/** A single cell whose whole value is a passing/neutral verdict. */
|
|
1399
|
-
const PASS_CELL_RE = /^(?:pass(?:ed|es)?|covered|ok
|
|
1436
|
+
const PASS_CELL_RE = /^(?:pass(?:ed|es)?|covered|ok|✓|✔|✅|n\/?a|none|-|—)$/i;
|
|
1437
|
+
/** A single cell whose whole value is an explicit failure/error verdict. */
|
|
1438
|
+
const FAIL_CELL_RE = /^(?:fail(?:ed|s|ing|ure)?|error(?:ed|s)?|blocked|incomplete|✗|✘|❌|❎)$/i;
|
|
1400
1439
|
/**
|
|
1401
1440
|
* Find lines in a verification artifact that look like UNRESOLVED failure
|
|
1402
1441
|
* evidence. The hazard (the same prose-keyword trap as the trajectory runner
|
|
@@ -1431,13 +1470,26 @@ export function extractFailureEvidence(file) {
|
|
|
1431
1470
|
const successor = nextNonEmpty(i);
|
|
1432
1471
|
if (successor !== null && isTableSeparator(successor))
|
|
1433
1472
|
continue;
|
|
1434
|
-
// A data row whose outcome/status cell reads PASS
|
|
1473
|
+
// A data row whose outcome/status cell reads PASS/✓/✅/covered is a PASSING
|
|
1435
1474
|
// result, even when another cell names the failure scenario it exercises.
|
|
1436
1475
|
if (cells.some((c) => PASS_CELL_RE.test(c)))
|
|
1437
1476
|
continue;
|
|
1477
|
+
// A table row is failure evidence ONLY when some cell is an explicit
|
|
1478
|
+
// FAIL/ERROR verdict. Prose in a scenario/description cell ("Open fails
|
|
1479
|
+
// because the path is invalid") does NOT count — that is how use-case →
|
|
1480
|
+
// test MAPPING tables (e.g. spec-tests.md: ID | Scenario | Layer | Type |
|
|
1481
|
+
// Test) legitimately describe negative-path scenarios with no outcome
|
|
1482
|
+
// column. Deciding structurally here avoids flagging those passing rows.
|
|
1483
|
+
if (cells.some((c) => FAIL_CELL_RE.test(c))) {
|
|
1484
|
+
matches.push({ file: file.relativePath, line: limitText(trimmed, 180) });
|
|
1485
|
+
if (matches.length >= 6)
|
|
1486
|
+
break;
|
|
1487
|
+
}
|
|
1488
|
+
continue;
|
|
1438
1489
|
}
|
|
1439
|
-
|
|
1440
|
-
// A list item explicitly marked PASS (
|
|
1490
|
+
if (/^[-*\s]*(?:✅|✓|✔)/.test(trimmed) || /^[-*\s]*(?:\*\*)?\s*pass(?:ed|es)?\b/i.test(trimmed)) {
|
|
1491
|
+
// A list item explicitly marked PASS (✅/✓/✔ or the word "pass") — a
|
|
1492
|
+
// passed negative-path scenario, even when its prose says "fails".
|
|
1441
1493
|
continue;
|
|
1442
1494
|
}
|
|
1443
1495
|
if (/\b(no|none|zero|0)\s+(failures?|failed|errors?|critical issues)\b/i.test(trimmed)) {
|
|
@@ -48,14 +48,17 @@ export const ProjectConfigSchema = z.object({
|
|
|
48
48
|
.describe('Per-canonical-target self-evolution toggles'),
|
|
49
49
|
// Optional: code-health metric source for the self-evolution fitness loss.
|
|
50
50
|
// `source` selects which MetricSource feeds the 0.3·health term of the
|
|
51
|
-
// per-change loss.
|
|
52
|
-
// byte-identical to the functional-only
|
|
53
|
-
//
|
|
54
|
-
//
|
|
51
|
+
// per-change loss. New projects scaffold `source: local` (default-on);
|
|
52
|
+
// set `source: stub` to make the loss byte-identical to the functional-only
|
|
53
|
+
// baseline (the pre-1.1.12 default). `local-python` is a back-compat alias
|
|
54
|
+
// for `local`. See src/core/fitness/health/resolve-source.ts.
|
|
55
55
|
health: z
|
|
56
56
|
.object({
|
|
57
|
-
|
|
58
|
-
//
|
|
57
|
+
// local: dependency-free multi-language analyzer (Python, Rust, C, C++)
|
|
58
|
+
// via the bundled scripts/code-health.py (needs Python 3 to run, but no
|
|
59
|
+
// network/server). `local-python` is an accepted alias for `local`.
|
|
60
|
+
source: z.enum(['stub', 'local', 'local-python', 'sonarqube']).default('stub'),
|
|
61
|
+
// local: interpreter override for the bundled analyzer (else env / 'python').
|
|
59
62
|
pythonBin: z.string().optional(),
|
|
60
63
|
// sonarqube: server connection + project key.
|
|
61
64
|
sonarUrl: z.string().optional(),
|
|
@@ -213,13 +216,13 @@ export function readProjectConfig(projectRoot) {
|
|
|
213
216
|
const rawHealth = raw.health;
|
|
214
217
|
const health = { source: 'stub' };
|
|
215
218
|
const sourceResult = z
|
|
216
|
-
.enum(['stub', 'local-python', 'sonarqube'])
|
|
219
|
+
.enum(['stub', 'local', 'local-python', 'sonarqube'])
|
|
217
220
|
.safeParse(rawHealth.source);
|
|
218
221
|
if (sourceResult.success) {
|
|
219
222
|
health.source = sourceResult.data;
|
|
220
223
|
}
|
|
221
224
|
else if (rawHealth.source !== undefined) {
|
|
222
|
-
console.warn(`Invalid 'health.source' in config (must be 'stub', 'local-python', or 'sonarqube'); using 'stub'`);
|
|
225
|
+
console.warn(`Invalid 'health.source' in config (must be 'stub', 'local', 'local-python', or 'sonarqube'); using 'stub'`);
|
|
223
226
|
}
|
|
224
227
|
for (const key of ['pythonBin', 'sonarUrl', 'sonarToken', 'sonarProjectKey']) {
|
|
225
228
|
const strResult = z.string().min(1).safeParse(rawHealth[key]);
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export declare const HEALTH_BASELINE_FILE = "health-baseline.json";
|
|
2
|
+
export interface HealthBaseline {
|
|
3
|
+
/** Recorded code-health penalty in [0,1] (lower is better) to compare against. */
|
|
4
|
+
healthPenalty: number;
|
|
5
|
+
/** ISO-8601 UTC timestamp the baseline was last updated. */
|
|
6
|
+
updatedAt: string;
|
|
7
|
+
/** The change whose measurement set this baseline (provenance; optional). */
|
|
8
|
+
sourceChange?: string;
|
|
9
|
+
/** The candidate promoted when this baseline was set (provenance; optional). */
|
|
10
|
+
candidateId?: string;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Read the recorded baseline. Returns `null` when the file is absent, unreadable,
|
|
14
|
+
* unparseable, or carries a non-finite `healthPenalty` (treated as "no baseline
|
|
15
|
+
* yet" ⇒ the gate does not fire and the first measured run records it).
|
|
16
|
+
*/
|
|
17
|
+
export declare function readHealthBaseline(repoRoot: string): Promise<HealthBaseline | null>;
|
|
18
|
+
/**
|
|
19
|
+
* Write/overwrite the recorded baseline. Best-effort: creates the parent dir if
|
|
20
|
+
* needed and swallows any error (a failed baseline write must not fail a promote
|
|
21
|
+
* that already succeeded). Returns `true` on a successful write, `false` otherwise.
|
|
22
|
+
*/
|
|
23
|
+
export declare function writeHealthBaseline(repoRoot: string, baseline: HealthBaseline): Promise<boolean>;
|
|
24
|
+
//# sourceMappingURL=health-baseline.d.ts.map
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-repo code-health BASELINE for the default-path health gate.
|
|
3
|
+
*
|
|
4
|
+
* The self-evolution health gate (see {@link import('./promote.js').shouldAutoPromote})
|
|
5
|
+
* blocks auto-promotion when a change's measured code-health is WORSE than the
|
|
6
|
+
* last accepted state. On the everyday (no-replay) path there is no candidate
|
|
7
|
+
* post-health to measure — a candidate is a template/prompt edit, while health
|
|
8
|
+
* is measured on generated code — so the honest "pre vs post" comparison is
|
|
9
|
+
* "this change's measured health" vs "the health recorded at the last accepted
|
|
10
|
+
* promotion". This module persists that single recorded baseline.
|
|
11
|
+
*
|
|
12
|
+
* Stored at `<repoRoot>/.synergyspec-selfevolving/self-evolution/health-baseline.json`
|
|
13
|
+
* (alongside the `candidates/` dir). One number per repo, because health is
|
|
14
|
+
* measured over the whole project source, not per-target.
|
|
15
|
+
*
|
|
16
|
+
* Defensive by construction: a missing/corrupt file reads as `null` (⇒ no gate,
|
|
17
|
+
* first run records & allows), and writes are best-effort (a write failure must
|
|
18
|
+
* never turn a successful promote into an error). Callers supply the timestamp,
|
|
19
|
+
* so this module never calls Date.now (deterministic + replay-safe).
|
|
20
|
+
*/
|
|
21
|
+
import { promises as fs } from 'node:fs';
|
|
22
|
+
import * as path from 'node:path';
|
|
23
|
+
export const HEALTH_BASELINE_FILE = 'health-baseline.json';
|
|
24
|
+
function baselinePath(repoRoot) {
|
|
25
|
+
return path.join(path.resolve(repoRoot), '.synergyspec-selfevolving', 'self-evolution', HEALTH_BASELINE_FILE);
|
|
26
|
+
}
|
|
27
|
+
function isFiniteNumber(v) {
|
|
28
|
+
return typeof v === 'number' && Number.isFinite(v);
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Read the recorded baseline. Returns `null` when the file is absent, unreadable,
|
|
32
|
+
* unparseable, or carries a non-finite `healthPenalty` (treated as "no baseline
|
|
33
|
+
* yet" ⇒ the gate does not fire and the first measured run records it).
|
|
34
|
+
*/
|
|
35
|
+
export async function readHealthBaseline(repoRoot) {
|
|
36
|
+
let raw;
|
|
37
|
+
try {
|
|
38
|
+
raw = await fs.readFile(baselinePath(repoRoot), 'utf8');
|
|
39
|
+
}
|
|
40
|
+
catch {
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
43
|
+
let parsed;
|
|
44
|
+
try {
|
|
45
|
+
parsed = JSON.parse(raw);
|
|
46
|
+
}
|
|
47
|
+
catch {
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
if (!parsed || typeof parsed !== 'object')
|
|
51
|
+
return null;
|
|
52
|
+
const obj = parsed;
|
|
53
|
+
if (!isFiniteNumber(obj.healthPenalty))
|
|
54
|
+
return null;
|
|
55
|
+
return {
|
|
56
|
+
healthPenalty: obj.healthPenalty,
|
|
57
|
+
updatedAt: typeof obj.updatedAt === 'string' ? obj.updatedAt : '',
|
|
58
|
+
...(typeof obj.sourceChange === 'string' ? { sourceChange: obj.sourceChange } : {}),
|
|
59
|
+
...(typeof obj.candidateId === 'string' ? { candidateId: obj.candidateId } : {}),
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Write/overwrite the recorded baseline. Best-effort: creates the parent dir if
|
|
64
|
+
* needed and swallows any error (a failed baseline write must not fail a promote
|
|
65
|
+
* that already succeeded). Returns `true` on a successful write, `false` otherwise.
|
|
66
|
+
*/
|
|
67
|
+
export async function writeHealthBaseline(repoRoot, baseline) {
|
|
68
|
+
const file = baselinePath(repoRoot);
|
|
69
|
+
try {
|
|
70
|
+
await fs.mkdir(path.dirname(file), { recursive: true });
|
|
71
|
+
await fs.writeFile(file, `${JSON.stringify(baseline, null, 2)}\n`, 'utf8');
|
|
72
|
+
return true;
|
|
73
|
+
}
|
|
74
|
+
catch {
|
|
75
|
+
return false;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
//# sourceMappingURL=health-baseline.js.map
|
|
@@ -12,6 +12,7 @@ export * from './learn-observation-adapter.js';
|
|
|
12
12
|
export * from './hints.js';
|
|
13
13
|
export * from './candidates.js';
|
|
14
14
|
export * from './candidate-fitness.js';
|
|
15
|
+
export * from './health-baseline.js';
|
|
15
16
|
export * from './ga-selection.js';
|
|
16
17
|
export * from './host-harness.js';
|
|
17
18
|
export * from './replay.js';
|
|
@@ -12,6 +12,7 @@ export * from './learn-observation-adapter.js';
|
|
|
12
12
|
export * from './hints.js';
|
|
13
13
|
export * from './candidates.js';
|
|
14
14
|
export * from './candidate-fitness.js';
|
|
15
|
+
export * from './health-baseline.js';
|
|
15
16
|
export * from './ga-selection.js';
|
|
16
17
|
export * from './host-harness.js';
|
|
17
18
|
export * from './replay.js';
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { type LearnEvolutionHint } from './learn-hints.js';
|
|
2
2
|
import { type TargetEvolutionPolicy } from './target-evolution.js';
|
|
3
|
-
import { type LearnReport } from '../learn.js';
|
|
3
|
+
import { type LearnReport, type LearnObservation } from '../learn.js';
|
|
4
4
|
/** The learn signals the interpreter reads (everything except the neutral observations). */
|
|
5
5
|
type LearnSignals = Omit<LearnReport, 'observations'>;
|
|
6
6
|
/**
|
|
@@ -40,6 +40,21 @@ export declare function resolveTargetLocalFilesReadonly(targetId: string, repoRo
|
|
|
40
40
|
* `target-evolution.ts` and the `add-per-target-evolution-switch` change.
|
|
41
41
|
*/
|
|
42
42
|
export declare function generateEvolutionHints(report: LearnSignals, policy?: TargetEvolutionPolicy): LearnEvolutionHint[];
|
|
43
|
+
/**
|
|
44
|
+
* Surface an UNBINDABLE kind-only evolution hint as an actionable DEFECT
|
|
45
|
+
* observation. After {@link scopeHintsByPolicy} runs, a hint that still has no
|
|
46
|
+
* `affectedTargetId` is one that could not be pinned to a concrete target (>1
|
|
47
|
+
* same-kind target evolvable and none named via `--evolve-target`) — it would
|
|
48
|
+
* surface as the `<kind>:unspecified` placeholder and yield a "0 surviving hint
|
|
49
|
+
* group" refusal that is a BINDING DEFECT, not a safe gate refusal. Emitting this
|
|
50
|
+
* is what lets the agent (and the skill) tell the two apart instead of recording a
|
|
51
|
+
* binding bug as "the gate correctly refused".
|
|
52
|
+
*
|
|
53
|
+
* Reads the SCOPED hints directly (no second pin pass), so it cannot drift from
|
|
54
|
+
* {@link scopeHintsByPolicy}. Returns `[]` when `policy` is undefined or nothing is
|
|
55
|
+
* unbindable, keeping learn output byte-identical in the common case.
|
|
56
|
+
*/
|
|
57
|
+
export declare function detectUnbindableHintObservations(hints: LearnEvolutionHint[], policy: TargetEvolutionPolicy | undefined): LearnObservation[];
|
|
43
58
|
/**
|
|
44
59
|
* Persist hints to the canonical handoff path that `propose-canonical
|
|
45
60
|
* --from-learn` reads (`{ evolutionHints: [...] }`). This is the one-motion
|