synergyspec-selfevolving 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -1
- package/dist/commands/learn.js +228 -26
- package/dist/commands/self-evolution.js +171 -26
- package/dist/commands/workflow/status.js +3 -1
- package/dist/core/config-prompts.js +4 -0
- package/dist/core/fitness/health/health-metrics.d.ts +26 -56
- package/dist/core/fitness/health/health-metrics.js +19 -58
- package/dist/core/fitness/health/index.d.ts +15 -2
- package/dist/core/fitness/health/index.js +25 -1
- package/dist/core/fitness/health/local-source.d.ts +43 -4
- package/dist/core/fitness/health/local-source.js +181 -25
- package/dist/core/fitness/health/metric-source.d.ts +48 -19
- package/dist/core/fitness/health/metric-source.js +8 -18
- package/dist/core/fitness/health/resolve-source.js +4 -1
- package/dist/core/fitness/loss.d.ts +2 -2
- package/dist/core/fitness/loss.js +2 -2
- package/dist/core/fitness/sample.d.ts +10 -0
- package/dist/core/fitness/test-failures.d.ts +30 -0
- package/dist/core/fitness/test-failures.js +123 -0
- package/dist/core/learn/credit-path.d.ts +36 -0
- package/dist/core/learn/credit-path.js +198 -0
- package/dist/core/learn/trajectory-discovery.d.ts +39 -0
- package/dist/core/learn/trajectory-discovery.js +140 -0
- package/dist/core/learn.d.ts +39 -5
- package/dist/core/learn.js +131 -14
- package/dist/core/project-config.d.ts +2 -0
- package/dist/core/project-config.js +24 -1
- package/dist/core/self-evolution/canonical-targets.d.ts +8 -4
- package/dist/core/self-evolution/canonical-targets.js +8 -4
- package/dist/core/self-evolution/health-baseline.d.ts +25 -6
- package/dist/core/self-evolution/health-baseline.js +30 -6
- package/dist/core/self-evolution/index.d.ts +1 -0
- package/dist/core/self-evolution/index.js +1 -0
- package/dist/core/self-evolution/learn-hints.d.ts +31 -0
- package/dist/core/self-evolution/learn-hints.js +16 -0
- package/dist/core/self-evolution/learn-observation-adapter.d.ts +35 -0
- package/dist/core/self-evolution/learn-observation-adapter.js +285 -10
- package/dist/core/self-evolution/proposer-agent.d.ts +41 -0
- package/dist/core/self-evolution/proposer-agent.js +94 -13
- package/dist/core/self-evolution/proposer-slice.d.ts +26 -0
- package/dist/core/self-evolution/proposer-slice.js +54 -0
- package/dist/core/self-evolution/success-channel.d.ts +79 -0
- package/dist/core/self-evolution/success-channel.js +361 -0
- package/dist/core/self-evolution/target-evolution.d.ts +11 -0
- package/dist/core/self-evolution/target-evolution.js +2 -0
- package/dist/core/templates/skill-templates.d.ts +1 -0
- package/dist/core/templates/skill-templates.js +1 -0
- package/dist/core/templates/workflow-manifest.js +2 -0
- package/dist/core/templates/workflows/learn.d.ts +3 -2
- package/dist/core/templates/workflows/learn.js +24 -167
- package/dist/core/templates/workflows/self-evolving.d.ts +11 -0
- package/dist/core/templates/workflows/self-evolving.js +237 -0
- package/dist/core/trajectory/facts.d.ts +16 -0
- package/dist/core/trajectory/facts.js +12 -4
- package/dist/core/trajectory/skeleton.d.ts +43 -0
- package/dist/core/trajectory/skeleton.js +239 -0
- package/package.json +3 -1
- package/scripts/code-health.py +1066 -638
- package/scripts/slop_rules.yaml +2151 -0
|
@@ -15,11 +15,11 @@
|
|
|
15
15
|
* health reading as "skip the health half of the loss" rather than a failure.
|
|
16
16
|
*
|
|
17
17
|
* The interface + {@link RawHealthMetrics} type come from `./metric-source.js`,
|
|
18
|
-
* which is authored in parallel; the shape this module relies on is the
|
|
18
|
+
* which is authored in parallel; the shape this module relies on is the 2-key
|
|
19
19
|
* record produced by the Python script.
|
|
20
20
|
*/
|
|
21
21
|
import { spawn as nodeSpawn } from 'node:child_process';
|
|
22
|
-
import type { MetricSource, RawHealthMetrics } from './metric-source.js';
|
|
22
|
+
import type { HealthOffender, MetricSource, RawHealthMetrics } from './metric-source.js';
|
|
23
23
|
/** Minimal structural type for node's `spawn` (kept narrow for injectability). */
|
|
24
24
|
export type SpawnImpl = typeof nodeSpawn;
|
|
25
25
|
export interface LocalPythonMetricSourceOptions {
|
|
@@ -29,22 +29,61 @@ export interface LocalPythonMetricSourceOptions {
|
|
|
29
29
|
spawnImpl?: SpawnImpl;
|
|
30
30
|
/** Override the analyzer script path (mostly for tests). */
|
|
31
31
|
scriptPath?: string;
|
|
32
|
+
/** Path to an ast-grep binary for the analyzer's Python slop-rule engine.
|
|
33
|
+
* When omitted, resolved automatically (env override, then the installed
|
|
34
|
+
* `@ast-grep/cli` platform package); see {@link defaultAstGrepBin}. */
|
|
35
|
+
astGrepBin?: string;
|
|
36
|
+
/** Path to a slop-rules YAML for the ast-grep engine. When omitted, resolved
|
|
37
|
+
* to the `slop_rules.yaml` vendored next to the analyzer script. */
|
|
38
|
+
rulesPath?: string;
|
|
32
39
|
}
|
|
40
|
+
/**
|
|
41
|
+
* Locate the ast-grep binary the analyzer's Python slop-rule engine should
|
|
42
|
+
* use, or `null` when none can be found (the analyzer then falls back to its
|
|
43
|
+
* own env/PATH lookup, or to the builtin rules). Resolution order:
|
|
44
|
+
* 1. env `SYNERGYSPEC_SELFEVOLVING_ASTGREP_BIN`, taken verbatim;
|
|
45
|
+
* 2. the native binary inside the installed `@ast-grep/cli` PLATFORM package
|
|
46
|
+
* (e.g. `@ast-grep/cli-win32-x64-msvc/ast-grep.exe`), resolved FROM the
|
|
47
|
+
* `@ast-grep/cli` package dir — under pnpm the platform package is only
|
|
48
|
+
* resolvable from there;
|
|
49
|
+
* 3. non-Windows only: `@ast-grep/cli/ast-grep` itself, where npm's
|
|
50
|
+
* postinstall may have swapped the JS shim for the native binary —
|
|
51
|
+
* accepted only when it does not start with `#!`.
|
|
52
|
+
* Exported for tests. Never throws; any resolution failure → null.
|
|
53
|
+
*/
|
|
54
|
+
export declare function defaultAstGrepBin(): string | null;
|
|
33
55
|
export declare class LocalPythonMetricSource implements MetricSource {
|
|
34
56
|
readonly name = "local";
|
|
35
57
|
private readonly pythonBin;
|
|
36
58
|
private readonly spawnImpl;
|
|
37
59
|
private readonly scriptPath;
|
|
60
|
+
private readonly astGrepBin;
|
|
61
|
+
private readonly rulesPath;
|
|
38
62
|
constructor(options?: LocalPythonMetricSourceOptions);
|
|
39
63
|
/**
|
|
40
64
|
* Run the analyzer over `codeDir` and return its metrics, or `null` on any
|
|
41
65
|
* spawn / exit / parse failure (graceful degradation when Python is absent).
|
|
42
66
|
*/
|
|
43
67
|
measure(codeDir: string): Promise<RawHealthMetrics | null>;
|
|
68
|
+
/**
|
|
69
|
+
* Detailed measurement: the same 2 scores PLUS the analyzer's
|
|
70
|
+
* `worst_offenders` mapped to {@link HealthOffender}s — from ONE spawn shared
|
|
71
|
+
* with `measure()`'s parse path, so the scalar half is provably the value
|
|
72
|
+
* `measure()` would have produced (`toRawHealthMetrics` strips the additive
|
|
73
|
+
* key either way). An old analyzer without the key yields `offenders: []`.
|
|
74
|
+
*/
|
|
75
|
+
measureDetailed(codeDir: string): Promise<{
|
|
76
|
+
raw: RawHealthMetrics;
|
|
77
|
+
offenders: HealthOffender[];
|
|
78
|
+
} | null>;
|
|
79
|
+
/** Single spawn + JSON parse shared by `measure` and `measureDetailed`. */
|
|
80
|
+
private runAndParse;
|
|
44
81
|
/**
|
|
45
82
|
* Spawn `python scripts/code-health.py <codeDir>` and collect stdout.
|
|
46
|
-
*
|
|
47
|
-
*
|
|
83
|
+
* `--ast-grep-bin` is appended when a binary resolved; `--rules` whenever the
|
|
84
|
+
* rules file exists (the analyzer's own PATH fallback still uses it even with
|
|
85
|
+
* no resolved binary). Resolves to the raw stdout string on a clean (exit 0)
|
|
86
|
+
* run, or `null` if the process cannot be spawned or exits non-zero.
|
|
48
87
|
*/
|
|
49
88
|
private runAnalyzer;
|
|
50
89
|
}
|
|
@@ -15,23 +15,16 @@
|
|
|
15
15
|
* health reading as "skip the health half of the loss" rather than a failure.
|
|
16
16
|
*
|
|
17
17
|
* The interface + {@link RawHealthMetrics} type come from `./metric-source.js`,
|
|
18
|
-
* which is authored in parallel; the shape this module relies on is the
|
|
18
|
+
* which is authored in parallel; the shape this module relies on is the 2-key
|
|
19
19
|
* record produced by the Python script.
|
|
20
20
|
*/
|
|
21
21
|
import { spawn as nodeSpawn } from 'node:child_process';
|
|
22
22
|
import { fileURLToPath } from 'node:url';
|
|
23
|
-
import { existsSync } from 'node:fs';
|
|
23
|
+
import { existsSync, openSync, readSync, closeSync } from 'node:fs';
|
|
24
|
+
import { createRequire } from 'node:module';
|
|
24
25
|
import path from 'node:path';
|
|
25
26
|
/** The exact set of numeric keys the analyzer emits. Order is irrelevant. */
|
|
26
|
-
const HEALTH_KEYS = [
|
|
27
|
-
'cyclomatic_p95',
|
|
28
|
-
'max_nesting_depth',
|
|
29
|
-
'cognitive_complexity',
|
|
30
|
-
'duplicated_lines_density',
|
|
31
|
-
'import_count',
|
|
32
|
-
'attr_method_usage_ratio',
|
|
33
|
-
'bare_except_count',
|
|
34
|
-
];
|
|
27
|
+
const HEALTH_KEYS = ['structural_erosion', 'verbosity'];
|
|
35
28
|
/**
|
|
36
29
|
* Locate `scripts/code-health.py` relative to this module. Built output lives
|
|
37
30
|
* at `dist/core/fitness/health/local-source.js`; the script stays at the
|
|
@@ -61,12 +54,110 @@ function defaultScriptPath() {
|
|
|
61
54
|
function defaultPythonBin() {
|
|
62
55
|
return process.env.SYNERGYSPEC_SELFEVOLVING_PYTHON_BIN || 'python';
|
|
63
56
|
}
|
|
57
|
+
/**
|
|
58
|
+
* Map `process.platform`/`process.arch` onto the `@ast-grep/cli` platform
|
|
59
|
+
* package that carries the real native binary (the `ast-grep` file inside
|
|
60
|
+
* `@ast-grep/cli` itself is a JS shim on Windows). Unknown platforms → null.
|
|
61
|
+
*/
|
|
62
|
+
function astGrepPlatformPackage() {
|
|
63
|
+
const { platform, arch } = process;
|
|
64
|
+
if (platform === 'darwin') {
|
|
65
|
+
return arch === 'arm64' ? '@ast-grep/cli-darwin-arm64' : '@ast-grep/cli-darwin-x64';
|
|
66
|
+
}
|
|
67
|
+
if (platform === 'linux') {
|
|
68
|
+
return arch === 'arm64' ? '@ast-grep/cli-linux-arm64-gnu' : '@ast-grep/cli-linux-x64-gnu';
|
|
69
|
+
}
|
|
70
|
+
if (platform === 'win32') {
|
|
71
|
+
if (arch === 'arm64')
|
|
72
|
+
return '@ast-grep/cli-win32-arm64-msvc';
|
|
73
|
+
if (arch === 'ia32')
|
|
74
|
+
return '@ast-grep/cli-win32-ia32-msvc';
|
|
75
|
+
return '@ast-grep/cli-win32-x64-msvc';
|
|
76
|
+
}
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
/** True when the file starts with `#!` (a script shim, not a native binary).
|
|
80
|
+
* An unreadable file counts as a shim so the candidate is skipped. */
|
|
81
|
+
function isShebangShim(file) {
|
|
82
|
+
try {
|
|
83
|
+
const fd = openSync(file, 'r');
|
|
84
|
+
try {
|
|
85
|
+
const head = Buffer.alloc(2);
|
|
86
|
+
const n = readSync(fd, head, 0, 2, 0);
|
|
87
|
+
return n === 2 && head[0] === 0x23 && head[1] === 0x21; // '#!'
|
|
88
|
+
}
|
|
89
|
+
finally {
|
|
90
|
+
closeSync(fd);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
catch {
|
|
94
|
+
return true;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
/** Memoized {@link defaultAstGrepBin} result; resolution runs once per process. */
|
|
98
|
+
let astGrepBinMemo;
|
|
99
|
+
/**
|
|
100
|
+
* Locate the ast-grep binary the analyzer's Python slop-rule engine should
|
|
101
|
+
* use, or `null` when none can be found (the analyzer then falls back to its
|
|
102
|
+
* own env/PATH lookup, or to the builtin rules). Resolution order:
|
|
103
|
+
* 1. env `SYNERGYSPEC_SELFEVOLVING_ASTGREP_BIN`, taken verbatim;
|
|
104
|
+
* 2. the native binary inside the installed `@ast-grep/cli` PLATFORM package
|
|
105
|
+
* (e.g. `@ast-grep/cli-win32-x64-msvc/ast-grep.exe`), resolved FROM the
|
|
106
|
+
* `@ast-grep/cli` package dir — under pnpm the platform package is only
|
|
107
|
+
* resolvable from there;
|
|
108
|
+
* 3. non-Windows only: `@ast-grep/cli/ast-grep` itself, where npm's
|
|
109
|
+
* postinstall may have swapped the JS shim for the native binary —
|
|
110
|
+
* accepted only when it does not start with `#!`.
|
|
111
|
+
* Exported for tests. Never throws; any resolution failure → null.
|
|
112
|
+
*/
|
|
113
|
+
export function defaultAstGrepBin() {
|
|
114
|
+
if (astGrepBinMemo !== undefined)
|
|
115
|
+
return astGrepBinMemo;
|
|
116
|
+
astGrepBinMemo = resolveAstGrepBin();
|
|
117
|
+
return astGrepBinMemo;
|
|
118
|
+
}
|
|
119
|
+
function resolveAstGrepBin() {
|
|
120
|
+
const env = process.env.SYNERGYSPEC_SELFEVOLVING_ASTGREP_BIN;
|
|
121
|
+
if (env)
|
|
122
|
+
return env;
|
|
123
|
+
try {
|
|
124
|
+
const req = createRequire(import.meta.url);
|
|
125
|
+
const cliPkgDir = path.dirname(req.resolve('@ast-grep/cli/package.json'));
|
|
126
|
+
const platformPkg = astGrepPlatformPackage();
|
|
127
|
+
if (platformPkg !== null) {
|
|
128
|
+
try {
|
|
129
|
+
const platformPkgDir = path.dirname(req.resolve(`${platformPkg}/package.json`, { paths: [cliPkgDir] }));
|
|
130
|
+
const bin = path.join(platformPkgDir, process.platform === 'win32' ? 'ast-grep.exe' : 'ast-grep');
|
|
131
|
+
if (existsSync(bin))
|
|
132
|
+
return bin;
|
|
133
|
+
}
|
|
134
|
+
catch {
|
|
135
|
+
// Platform package absent (e.g. optional dep skipped) → try the shim slot.
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
if (process.platform !== 'win32') {
|
|
139
|
+
const candidate = path.join(cliPkgDir, 'ast-grep');
|
|
140
|
+
if (existsSync(candidate) && !isShebangShim(candidate))
|
|
141
|
+
return candidate;
|
|
142
|
+
}
|
|
143
|
+
return null;
|
|
144
|
+
}
|
|
145
|
+
catch {
|
|
146
|
+
return null; // @ast-grep/cli not installed at all → no binary, no flag
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
/** The slop-rules YAML vendored as a sibling of the analyzer script. */
|
|
150
|
+
function defaultRulesPath(scriptPath) {
|
|
151
|
+
return path.join(path.dirname(scriptPath), 'slop_rules.yaml');
|
|
152
|
+
}
|
|
64
153
|
function isFiniteNumber(v) {
|
|
65
154
|
return typeof v === 'number' && Number.isFinite(v);
|
|
66
155
|
}
|
|
67
156
|
/**
|
|
68
157
|
* Validate + narrow a parsed JSON object into {@link RawHealthMetrics}. Every
|
|
69
|
-
* one of the
|
|
158
|
+
* one of the 2 keys must be present and a finite number; otherwise null.
|
|
159
|
+
* Additive analyzer keys (`worst_offenders`, `verbosity_engine`, …) are
|
|
160
|
+
* tolerated and stripped here.
|
|
70
161
|
*/
|
|
71
162
|
function toRawHealthMetrics(parsed) {
|
|
72
163
|
if (parsed === null || typeof parsed !== 'object')
|
|
@@ -78,46 +169,106 @@ function toRawHealthMetrics(parsed) {
|
|
|
78
169
|
}
|
|
79
170
|
// Build a clean object with exactly the known keys (no extra fields leak).
|
|
80
171
|
return {
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
cognitive_complexity: obj.cognitive_complexity,
|
|
84
|
-
duplicated_lines_density: obj.duplicated_lines_density,
|
|
85
|
-
import_count: obj.import_count,
|
|
86
|
-
attr_method_usage_ratio: obj.attr_method_usage_ratio,
|
|
87
|
-
bare_except_count: obj.bare_except_count,
|
|
172
|
+
structural_erosion: obj.structural_erosion,
|
|
173
|
+
verbosity: obj.verbosity,
|
|
88
174
|
};
|
|
89
175
|
}
|
|
176
|
+
/**
|
|
177
|
+
* Map the analyzer's additive `worst_offenders` key onto {@link HealthOffender}s
|
|
178
|
+
* (snake_case → camelCase). DEFENSIVE by design: an old analyzer without the key,
|
|
179
|
+
* a non-array value, or malformed entries all degrade to fewer/zero offenders —
|
|
180
|
+
* never to a parse failure — because offenders are pure visibility and must not
|
|
181
|
+
* be able to break the scalar penalty path. Optional fields are OMITTED (not
|
|
182
|
+
* `undefined`-assigned) so serialized samples stay minimal.
|
|
183
|
+
*/
|
|
184
|
+
function toHealthOffenders(parsed) {
|
|
185
|
+
if (parsed === null || typeof parsed !== 'object')
|
|
186
|
+
return [];
|
|
187
|
+
const list = parsed.worst_offenders;
|
|
188
|
+
if (!Array.isArray(list))
|
|
189
|
+
return [];
|
|
190
|
+
const out = [];
|
|
191
|
+
for (const item of list) {
|
|
192
|
+
if (item === null || typeof item !== 'object')
|
|
193
|
+
continue;
|
|
194
|
+
const o = item;
|
|
195
|
+
if (typeof o.file !== 'string' || o.file.length === 0)
|
|
196
|
+
continue;
|
|
197
|
+
if (typeof o.metric !== 'string' || o.metric.length === 0)
|
|
198
|
+
continue;
|
|
199
|
+
if (!isFiniteNumber(o.value))
|
|
200
|
+
continue;
|
|
201
|
+
out.push({
|
|
202
|
+
file: o.file,
|
|
203
|
+
metric: o.metric,
|
|
204
|
+
value: o.value,
|
|
205
|
+
...(typeof o.function === 'string' && o.function.length > 0
|
|
206
|
+
? { function: o.function }
|
|
207
|
+
: {}),
|
|
208
|
+
...(isFiniteNumber(o.line) ? { line: o.line } : {}),
|
|
209
|
+
...(isFiniteNumber(o.function_length) ? { functionLength: o.function_length } : {}),
|
|
210
|
+
});
|
|
211
|
+
}
|
|
212
|
+
return out;
|
|
213
|
+
}
|
|
90
214
|
export class LocalPythonMetricSource {
|
|
91
215
|
name = 'local';
|
|
92
216
|
pythonBin;
|
|
93
217
|
spawnImpl;
|
|
94
218
|
scriptPath;
|
|
219
|
+
astGrepBin;
|
|
220
|
+
rulesPath;
|
|
95
221
|
constructor(options = {}) {
|
|
96
222
|
this.pythonBin = options.pythonBin ?? defaultPythonBin();
|
|
97
223
|
this.spawnImpl = options.spawnImpl ?? nodeSpawn;
|
|
98
224
|
this.scriptPath = options.scriptPath ?? defaultScriptPath();
|
|
225
|
+
this.astGrepBin = options.astGrepBin ?? defaultAstGrepBin();
|
|
226
|
+
this.rulesPath = options.rulesPath ?? defaultRulesPath(this.scriptPath);
|
|
99
227
|
}
|
|
100
228
|
/**
|
|
101
229
|
* Run the analyzer over `codeDir` and return its metrics, or `null` on any
|
|
102
230
|
* spawn / exit / parse failure (graceful degradation when Python is absent).
|
|
103
231
|
*/
|
|
104
232
|
async measure(codeDir) {
|
|
233
|
+
const parsed = await this.runAndParse(codeDir);
|
|
234
|
+
if (parsed === null)
|
|
235
|
+
return null;
|
|
236
|
+
return toRawHealthMetrics(parsed);
|
|
237
|
+
}
|
|
238
|
+
/**
|
|
239
|
+
* Detailed measurement: the same 2 scores PLUS the analyzer's
|
|
240
|
+
* `worst_offenders` mapped to {@link HealthOffender}s — from ONE spawn shared
|
|
241
|
+
* with `measure()`'s parse path, so the scalar half is provably the value
|
|
242
|
+
* `measure()` would have produced (`toRawHealthMetrics` strips the additive
|
|
243
|
+
* key either way). An old analyzer without the key yields `offenders: []`.
|
|
244
|
+
*/
|
|
245
|
+
async measureDetailed(codeDir) {
|
|
246
|
+
const parsed = await this.runAndParse(codeDir);
|
|
247
|
+
if (parsed === null)
|
|
248
|
+
return null;
|
|
249
|
+
const raw = toRawHealthMetrics(parsed);
|
|
250
|
+
if (raw === null)
|
|
251
|
+
return null;
|
|
252
|
+
return { raw, offenders: toHealthOffenders(parsed) };
|
|
253
|
+
}
|
|
254
|
+
/** Single spawn + JSON parse shared by `measure` and `measureDetailed`. */
|
|
255
|
+
async runAndParse(codeDir) {
|
|
105
256
|
const stdout = await this.runAnalyzer(codeDir);
|
|
106
257
|
if (stdout === null)
|
|
107
258
|
return null;
|
|
108
|
-
let parsed;
|
|
109
259
|
try {
|
|
110
|
-
|
|
260
|
+
return JSON.parse(stdout);
|
|
111
261
|
}
|
|
112
262
|
catch {
|
|
113
263
|
return null; // not JSON → no signal
|
|
114
264
|
}
|
|
115
|
-
return toRawHealthMetrics(parsed);
|
|
116
265
|
}
|
|
117
266
|
/**
|
|
118
267
|
* Spawn `python scripts/code-health.py <codeDir>` and collect stdout.
|
|
119
|
-
*
|
|
120
|
-
*
|
|
268
|
+
* `--ast-grep-bin` is appended when a binary resolved; `--rules` whenever the
|
|
269
|
+
* rules file exists (the analyzer's own PATH fallback still uses it even with
|
|
270
|
+
* no resolved binary). Resolves to the raw stdout string on a clean (exit 0)
|
|
271
|
+
* run, or `null` if the process cannot be spawned or exits non-zero.
|
|
121
272
|
*/
|
|
122
273
|
runAnalyzer(codeDir) {
|
|
123
274
|
return new Promise((resolve) => {
|
|
@@ -128,9 +279,14 @@ export class LocalPythonMetricSource {
|
|
|
128
279
|
settled = true;
|
|
129
280
|
resolve(value);
|
|
130
281
|
};
|
|
282
|
+
const args = [this.scriptPath, codeDir];
|
|
283
|
+
if (this.astGrepBin !== null)
|
|
284
|
+
args.push('--ast-grep-bin', this.astGrepBin);
|
|
285
|
+
if (existsSync(this.rulesPath))
|
|
286
|
+
args.push('--rules', this.rulesPath);
|
|
131
287
|
let child;
|
|
132
288
|
try {
|
|
133
|
-
child = this.spawnImpl(this.pythonBin,
|
|
289
|
+
child = this.spawnImpl(this.pythonBin, args, { shell: false });
|
|
134
290
|
}
|
|
135
291
|
catch {
|
|
136
292
|
// Synchronous spawn failure (e.g. bad options) → no signal.
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Swappable source of raw code-health metrics for the self-evolution health
|
|
3
|
-
* head. A {@link MetricSource} measures the
|
|
3
|
+
* head. A {@link MetricSource} measures the two SlopCodeBench code-health
|
|
4
|
+
* scores (arXiv:2603.24755) on a directory
|
|
4
5
|
* of generated code; the result is then normalized into a single [0,1] penalty
|
|
5
6
|
* by `normalizeHealth` (see ./health-metrics.ts) and folded into the per-change
|
|
6
7
|
* loss alongside the functional term. See
|
|
@@ -13,25 +14,41 @@
|
|
|
13
14
|
* rather than guessing, exactly like the functional parser's null.
|
|
14
15
|
*/
|
|
15
16
|
/**
|
|
16
|
-
* The
|
|
17
|
-
*
|
|
18
|
-
* is better. All are plain numbers in their native units (see each field).
|
|
17
|
+
* The two raw SlopCodeBench code-health scores (arXiv:2603.24755), computed on
|
|
18
|
+
* generated code. Both are fractions in [0,1]; lower is better for both.
|
|
19
19
|
*/
|
|
20
20
|
export interface RawHealthMetrics {
|
|
21
|
-
/**
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
21
|
+
/** Mass-weighted share of functions with cyclomatic complexity > 10:
|
|
22
|
+
* Σ_{CC>10} CC·√SLOC / Σ CC·√SLOC. Fraction in [0,1]; lower better. */
|
|
23
|
+
structural_erosion: number;
|
|
24
|
+
/** |rule-flagged lines ∪ clone lines| / non-blank LOC. Fraction in [0,1]; lower better. */
|
|
25
|
+
verbosity: number;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* One worst per-function (or, for the heuristic C/C++/Rust analyzer path,
|
|
29
|
+
* per-file) code-health contributor — the file-addressed evidence BEHIND the
|
|
30
|
+
* scalar `healthPenalty` ("health 0.516 — x12.py cyclomatic 41"). Offenders
|
|
31
|
+
* are pure visibility: which entries a source surfaces can never change the
|
|
32
|
+
* raw scores or the normalized penalty.
|
|
33
|
+
*/
|
|
34
|
+
export interface HealthOffender {
|
|
35
|
+
/** Source file, relative to the measured code dir (forward slashes). */
|
|
36
|
+
file: string;
|
|
37
|
+
/** Function name; absent for file-level (heuristic-language) entries. */
|
|
38
|
+
function?: string;
|
|
39
|
+
/** 1-based line of the function definition; absent for file-level entries. */
|
|
40
|
+
line?: number;
|
|
41
|
+
/** What the entry contributes to: 'complexity_mass' (a CC>10 function ranked
|
|
42
|
+
* by mass CC·√SLOC) | a verbosity rule id — upstream SlopCodeBench slop-rule
|
|
43
|
+
* ids (see scripts/slop_rules.yaml) for Python; builtin rule ids for
|
|
44
|
+
* C/C++/Rust and the no-ast-grep-binary fallback | 'clone' (a duplicated
|
|
45
|
+
* region). */
|
|
46
|
+
metric: string;
|
|
47
|
+
/** The offending value: the complexity mass (rounded to 1 decimal) for
|
|
48
|
+
* 'complexity_mass' entries, or the flagged-region line count otherwise. */
|
|
49
|
+
value: number;
|
|
50
|
+
/** Source lines spanned by the function, when the analyzer knows it. */
|
|
51
|
+
functionLength?: number;
|
|
35
52
|
}
|
|
36
53
|
/**
|
|
37
54
|
* A swappable backend that measures {@link RawHealthMetrics} on a directory of
|
|
@@ -43,10 +60,22 @@ export interface MetricSource {
|
|
|
43
60
|
/** Stable identifier for logging / selecting the active source. */
|
|
44
61
|
readonly name: string;
|
|
45
62
|
/**
|
|
46
|
-
* Measure the
|
|
63
|
+
* Measure the 2 scores on the code under `codeDir`. Resolve to `null` when
|
|
47
64
|
* no health signal is available.
|
|
48
65
|
*/
|
|
49
66
|
measure(codeDir: string): Promise<RawHealthMetrics | null>;
|
|
67
|
+
/**
|
|
68
|
+
* OPTIONAL detailed measurement: the same 2 raw scores plus the bounded
|
|
69
|
+
* worst-offender list behind them. Optional so existing sources (stub,
|
|
70
|
+
* SonarQube) need no change — callers that want offenders fall back to
|
|
71
|
+
* `measure()` with an empty list when this method is absent. Implementations
|
|
72
|
+
* MUST derive `raw` from the same single measurement as the offenders (no
|
|
73
|
+
* second scan) so the scalar path is provably identical.
|
|
74
|
+
*/
|
|
75
|
+
measureDetailed?(codeDir: string): Promise<{
|
|
76
|
+
raw: RawHealthMetrics;
|
|
77
|
+
offenders: HealthOffender[];
|
|
78
|
+
} | null>;
|
|
50
79
|
}
|
|
51
80
|
/**
|
|
52
81
|
* The default no-op source: it produces no health signal, so the health head
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Swappable source of raw code-health metrics for the self-evolution health
|
|
3
|
-
* head. A {@link MetricSource} measures the
|
|
3
|
+
* head. A {@link MetricSource} measures the two SlopCodeBench code-health
|
|
4
|
+
* scores (arXiv:2603.24755) on a directory
|
|
4
5
|
* of generated code; the result is then normalized into a single [0,1] penalty
|
|
5
6
|
* by `normalizeHealth` (see ./health-metrics.ts) and folded into the per-change
|
|
6
7
|
* loss alongside the functional term. See
|
|
@@ -25,18 +26,12 @@ export class StubMetricSource {
|
|
|
25
26
|
}
|
|
26
27
|
/**
|
|
27
28
|
* The Sonar metric keys requested from `api/measures/component`, in the order
|
|
28
|
-
* they map onto {@link RawHealthMetrics}.
|
|
29
|
-
*
|
|
30
|
-
* custom measures published under the same metric keys as the raw-metric field.
|
|
29
|
+
* they map onto {@link RawHealthMetrics}. BOTH are custom measures published
|
|
30
|
+
* under these keys (neither is a native Sonar measure).
|
|
31
31
|
*/
|
|
32
32
|
const SONAR_METRIC_KEYS = [
|
|
33
|
-
'
|
|
34
|
-
'
|
|
35
|
-
'cognitive_complexity',
|
|
36
|
-
'duplicated_lines_density',
|
|
37
|
-
'import_count',
|
|
38
|
-
'attr_method_usage_ratio',
|
|
39
|
-
'bare_except_count',
|
|
33
|
+
'structural_erosion',
|
|
34
|
+
'verbosity',
|
|
40
35
|
];
|
|
41
36
|
/**
|
|
42
37
|
* A {@link MetricSource} backed by SonarQube. `measure`:
|
|
@@ -104,13 +99,8 @@ export function mapSonarMeasures(body) {
|
|
|
104
99
|
}
|
|
105
100
|
const get = (key) => byKey.get(key) ?? 0;
|
|
106
101
|
return {
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
cognitive_complexity: get('cognitive_complexity'),
|
|
110
|
-
duplicated_lines_density: get('duplicated_lines_density'),
|
|
111
|
-
import_count: get('import_count'),
|
|
112
|
-
attr_method_usage_ratio: get('attr_method_usage_ratio'),
|
|
113
|
-
bare_except_count: get('bare_except_count'),
|
|
102
|
+
structural_erosion: get('structural_erosion'),
|
|
103
|
+
verbosity: get('verbosity'),
|
|
114
104
|
};
|
|
115
105
|
}
|
|
116
106
|
//# sourceMappingURL=metric-source.js.map
|
|
@@ -20,7 +20,10 @@ export function resolveMetricSource(config) {
|
|
|
20
20
|
return new StubMetricSource();
|
|
21
21
|
}
|
|
22
22
|
if (health.source === 'local' || health.source === 'local-python') {
|
|
23
|
-
return new LocalPythonMetricSource({
|
|
23
|
+
return new LocalPythonMetricSource({
|
|
24
|
+
pythonBin: health.pythonBin,
|
|
25
|
+
astGrepBin: health.astGrepBin,
|
|
26
|
+
});
|
|
24
27
|
}
|
|
25
28
|
if (health.source === 'sonarqube') {
|
|
26
29
|
if (health.sonarUrl && health.sonarToken && health.sonarProjectKey) {
|
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
*
|
|
4
4
|
* loss = w_f · functionalLoss + w_h · healthPenalty
|
|
5
5
|
* functionalLoss = 1 − pass_rate (from the gen-test/run-test oracle)
|
|
6
|
-
* healthPenalty = normalized
|
|
7
|
-
*
|
|
6
|
+
* healthPenalty = normalized SlopCodeBench code-health penalty
|
|
7
|
+
* (structural_erosion ⊕ verbosity)
|
|
8
8
|
*
|
|
9
9
|
* Functional correctness is ALSO used as a hard GATE at GA selection/promotion
|
|
10
10
|
* (a variant whose code fails its tests cannot win); this module only computes
|
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
*
|
|
4
4
|
* loss = w_f · functionalLoss + w_h · healthPenalty
|
|
5
5
|
* functionalLoss = 1 − pass_rate (from the gen-test/run-test oracle)
|
|
6
|
-
* healthPenalty = normalized
|
|
7
|
-
*
|
|
6
|
+
* healthPenalty = normalized SlopCodeBench code-health penalty
|
|
7
|
+
* (structural_erosion ⊕ verbosity)
|
|
8
8
|
*
|
|
9
9
|
* Functional correctness is ALSO used as a hard GATE at GA selection/promotion
|
|
10
10
|
* (a variant whose code fails its tests cannot win); this module only computes
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
*/
|
|
9
9
|
import type { TestMetrics } from './test-metrics.js';
|
|
10
10
|
import type { PerChangeLoss } from './loss.js';
|
|
11
|
+
import type { HealthOffender } from './health/metric-source.js';
|
|
11
12
|
import type { TrajectoryFacts } from '../trajectory/facts.js';
|
|
12
13
|
export interface FitnessSample {
|
|
13
14
|
changeName: string;
|
|
@@ -36,6 +37,15 @@ export interface FitnessSample {
|
|
|
36
37
|
* was consulted.
|
|
37
38
|
*/
|
|
38
39
|
healthSource?: string;
|
|
40
|
+
/**
|
|
41
|
+
* The worst per-file/per-function contributors BEHIND `healthSignal`
|
|
42
|
+
* ("x12.py complexity_mass 66.3; foo() 26 lines") — file-addressed evidence so a
|
|
43
|
+
* bad health reading is navigable, not just a scalar. Pure visibility: the
|
|
44
|
+
* penalty and loss math never read this field. OMITTED (not empty) when the
|
|
45
|
+
* active source produced no offenders or cannot name them (stub/sonarqube),
|
|
46
|
+
* so existing serialized samples and the baseline path stay byte-identical.
|
|
47
|
+
*/
|
|
48
|
+
healthContributors?: HealthOffender[];
|
|
39
49
|
/**
|
|
40
50
|
* Ground-truth facts distilled from the agent's ACTUAL trajectory (which
|
|
41
51
|
* harness, whether a test runner was really observed running, the observed
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse the FAILING TEST IDS (and best-effort assertion lines) out of observed
|
|
3
|
+
* test-runner output. Sibling of {@link parseTestMetrics}: that one re-sources
|
|
4
|
+
* the pass/fail COUNTS for the loss, this one re-sources the failure CONTENT
|
|
5
|
+
* for the critic — so failure evidence can come from the run the
|
|
6
|
+
* observed-verified gate already trusts instead of from the authored
|
|
7
|
+
* test-report prose (`extractFailureEvidence`'s grep, which stays as the
|
|
8
|
+
* headless fallback only).
|
|
9
|
+
*
|
|
10
|
+
* Pure + dependency-free. Recognizes only explicit per-test failure markers
|
|
11
|
+
* from the canonical runners; an unrecognized format returns `[]` ("no
|
|
12
|
+
* signal"), never a guess. Outputs are bounded (≤ {@link MAX_FAILURES}
|
|
13
|
+
* failures, assertion ≤ {@link MAX_ASSERTION_CHARS} chars) so a pathological
|
|
14
|
+
* run cannot flood the learn report.
|
|
15
|
+
*/
|
|
16
|
+
export interface ParsedTestFailure {
|
|
17
|
+
/** Runner-native test id, e.g. `tests/test_x.py::test_name` or `suite > name`. */
|
|
18
|
+
testId: string;
|
|
19
|
+
/** Test file when derivable from the id (pytest/vitest path prefix). */
|
|
20
|
+
file?: string;
|
|
21
|
+
/** Best-effort assertion/error line for the failure, capped. */
|
|
22
|
+
assertion?: string;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Extract failing test ids + assertion lines from observed runner output.
|
|
26
|
+
* Returns `[]` when nothing is recognized. Deduplicates by testId, preserves
|
|
27
|
+
* first-seen order, caps at {@link MAX_FAILURES}.
|
|
28
|
+
*/
|
|
29
|
+
export declare function parseTestFailures(output: string): ParsedTestFailure[];
|
|
30
|
+
//# sourceMappingURL=test-failures.d.ts.map
|