synergyspec-selfevolving 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/README.md +19 -1
  2. package/dist/commands/learn.js +228 -26
  3. package/dist/commands/self-evolution.js +171 -26
  4. package/dist/commands/workflow/status.js +3 -1
  5. package/dist/core/config-prompts.js +4 -0
  6. package/dist/core/fitness/health/health-metrics.d.ts +26 -56
  7. package/dist/core/fitness/health/health-metrics.js +19 -58
  8. package/dist/core/fitness/health/index.d.ts +15 -2
  9. package/dist/core/fitness/health/index.js +25 -1
  10. package/dist/core/fitness/health/local-source.d.ts +43 -4
  11. package/dist/core/fitness/health/local-source.js +181 -25
  12. package/dist/core/fitness/health/metric-source.d.ts +48 -19
  13. package/dist/core/fitness/health/metric-source.js +8 -18
  14. package/dist/core/fitness/health/resolve-source.js +4 -1
  15. package/dist/core/fitness/loss.d.ts +2 -2
  16. package/dist/core/fitness/loss.js +2 -2
  17. package/dist/core/fitness/sample.d.ts +10 -0
  18. package/dist/core/fitness/test-failures.d.ts +30 -0
  19. package/dist/core/fitness/test-failures.js +123 -0
  20. package/dist/core/learn/credit-path.d.ts +36 -0
  21. package/dist/core/learn/credit-path.js +198 -0
  22. package/dist/core/learn/trajectory-discovery.d.ts +39 -0
  23. package/dist/core/learn/trajectory-discovery.js +140 -0
  24. package/dist/core/learn.d.ts +39 -5
  25. package/dist/core/learn.js +131 -14
  26. package/dist/core/project-config.d.ts +2 -0
  27. package/dist/core/project-config.js +24 -1
  28. package/dist/core/self-evolution/canonical-targets.d.ts +8 -4
  29. package/dist/core/self-evolution/canonical-targets.js +8 -4
  30. package/dist/core/self-evolution/health-baseline.d.ts +25 -6
  31. package/dist/core/self-evolution/health-baseline.js +30 -6
  32. package/dist/core/self-evolution/index.d.ts +1 -0
  33. package/dist/core/self-evolution/index.js +1 -0
  34. package/dist/core/self-evolution/learn-hints.d.ts +31 -0
  35. package/dist/core/self-evolution/learn-hints.js +16 -0
  36. package/dist/core/self-evolution/learn-observation-adapter.d.ts +35 -0
  37. package/dist/core/self-evolution/learn-observation-adapter.js +285 -10
  38. package/dist/core/self-evolution/proposer-agent.d.ts +41 -0
  39. package/dist/core/self-evolution/proposer-agent.js +94 -13
  40. package/dist/core/self-evolution/proposer-slice.d.ts +26 -0
  41. package/dist/core/self-evolution/proposer-slice.js +54 -0
  42. package/dist/core/self-evolution/success-channel.d.ts +79 -0
  43. package/dist/core/self-evolution/success-channel.js +361 -0
  44. package/dist/core/self-evolution/target-evolution.d.ts +11 -0
  45. package/dist/core/self-evolution/target-evolution.js +2 -0
  46. package/dist/core/templates/skill-templates.d.ts +1 -0
  47. package/dist/core/templates/skill-templates.js +1 -0
  48. package/dist/core/templates/workflow-manifest.js +2 -0
  49. package/dist/core/templates/workflows/learn.d.ts +3 -2
  50. package/dist/core/templates/workflows/learn.js +24 -167
  51. package/dist/core/templates/workflows/self-evolving.d.ts +11 -0
  52. package/dist/core/templates/workflows/self-evolving.js +237 -0
  53. package/dist/core/trajectory/facts.d.ts +16 -0
  54. package/dist/core/trajectory/facts.js +12 -4
  55. package/dist/core/trajectory/skeleton.d.ts +43 -0
  56. package/dist/core/trajectory/skeleton.js +239 -0
  57. package/package.json +3 -1
  58. package/scripts/code-health.py +1066 -638
  59. package/scripts/slop_rules.yaml +2151 -0
@@ -15,11 +15,11 @@
15
15
  * health reading as "skip the health half of the loss" rather than a failure.
16
16
  *
17
17
  * The interface + {@link RawHealthMetrics} type come from `./metric-source.js`,
18
- * which is authored in parallel; the shape this module relies on is the 7-key
18
+ * which is authored in parallel; the shape this module relies on is the 2-key
19
19
  * record produced by the Python script.
20
20
  */
21
21
  import { spawn as nodeSpawn } from 'node:child_process';
22
- import type { MetricSource, RawHealthMetrics } from './metric-source.js';
22
+ import type { HealthOffender, MetricSource, RawHealthMetrics } from './metric-source.js';
23
23
  /** Minimal structural type for node's `spawn` (kept narrow for injectability). */
24
24
  export type SpawnImpl = typeof nodeSpawn;
25
25
  export interface LocalPythonMetricSourceOptions {
@@ -29,22 +29,61 @@ export interface LocalPythonMetricSourceOptions {
29
29
  spawnImpl?: SpawnImpl;
30
30
  /** Override the analyzer script path (mostly for tests). */
31
31
  scriptPath?: string;
32
+ /** Path to an ast-grep binary for the analyzer's Python slop-rule engine.
33
+ * When omitted, resolved automatically (env override, then the installed
34
+ * `@ast-grep/cli` platform package); see {@link defaultAstGrepBin}. */
35
+ astGrepBin?: string;
36
+ /** Path to a slop-rules YAML for the ast-grep engine. When omitted, resolved
37
+ * to the `slop_rules.yaml` vendored next to the analyzer script. */
38
+ rulesPath?: string;
32
39
  }
40
+ /**
41
+ * Locate the ast-grep binary the analyzer's Python slop-rule engine should
42
+ * use, or `null` when none can be found (the analyzer then falls back to its
43
+ * own env/PATH lookup, or to the builtin rules). Resolution order:
44
+ * 1. env `SYNERGYSPEC_SELFEVOLVING_ASTGREP_BIN`, taken verbatim;
45
+ * 2. the native binary inside the installed `@ast-grep/cli` PLATFORM package
46
+ * (e.g. `@ast-grep/cli-win32-x64-msvc/ast-grep.exe`), resolved FROM the
47
+ * `@ast-grep/cli` package dir — under pnpm the platform package is only
48
+ * resolvable from there;
49
+ * 3. non-Windows only: `@ast-grep/cli/ast-grep` itself, where npm's
50
+ * postinstall may have swapped the JS shim for the native binary —
51
+ * accepted only when it does not start with `#!`.
52
+ * Exported for tests. Never throws; any resolution failure → null.
53
+ */
54
+ export declare function defaultAstGrepBin(): string | null;
33
55
  export declare class LocalPythonMetricSource implements MetricSource {
34
56
  readonly name = "local";
35
57
  private readonly pythonBin;
36
58
  private readonly spawnImpl;
37
59
  private readonly scriptPath;
60
+ private readonly astGrepBin;
61
+ private readonly rulesPath;
38
62
  constructor(options?: LocalPythonMetricSourceOptions);
39
63
  /**
40
64
  * Run the analyzer over `codeDir` and return its metrics, or `null` on any
41
65
  * spawn / exit / parse failure (graceful degradation when Python is absent).
42
66
  */
43
67
  measure(codeDir: string): Promise<RawHealthMetrics | null>;
68
+ /**
69
+ * Detailed measurement: the same 2 scores PLUS the analyzer's
70
+ * `worst_offenders` mapped to {@link HealthOffender}s — from ONE spawn shared
71
+ * with `measure()`'s parse path, so the scalar half is provably the value
72
+ * `measure()` would have produced (`toRawHealthMetrics` strips the additive
73
+ * key either way). An old analyzer without the key yields `offenders: []`.
74
+ */
75
+ measureDetailed(codeDir: string): Promise<{
76
+ raw: RawHealthMetrics;
77
+ offenders: HealthOffender[];
78
+ } | null>;
79
+ /** Single spawn + JSON parse shared by `measure` and `measureDetailed`. */
80
+ private runAndParse;
44
81
  /**
45
82
  * Spawn `python scripts/code-health.py <codeDir>` and collect stdout.
46
- * Resolves to the raw stdout string on a clean (exit 0) run, or `null` if the
47
- * process cannot be spawned or exits non-zero.
83
+ * `--ast-grep-bin` is appended when a binary resolved; `--rules` whenever the
84
+ * rules file exists (the analyzer's own PATH fallback still uses it even with
85
+ * no resolved binary). Resolves to the raw stdout string on a clean (exit 0)
86
+ * run, or `null` if the process cannot be spawned or exits non-zero.
48
87
  */
49
88
  private runAnalyzer;
50
89
  }
@@ -15,23 +15,16 @@
15
15
  * health reading as "skip the health half of the loss" rather than a failure.
16
16
  *
17
17
  * The interface + {@link RawHealthMetrics} type come from `./metric-source.js`,
18
- * which is authored in parallel; the shape this module relies on is the 7-key
18
+ * which is authored in parallel; the shape this module relies on is the 2-key
19
19
  * record produced by the Python script.
20
20
  */
21
21
  import { spawn as nodeSpawn } from 'node:child_process';
22
22
  import { fileURLToPath } from 'node:url';
23
- import { existsSync } from 'node:fs';
23
+ import { existsSync, openSync, readSync, closeSync } from 'node:fs';
24
+ import { createRequire } from 'node:module';
24
25
  import path from 'node:path';
25
26
  /** The exact set of numeric keys the analyzer emits. Order is irrelevant. */
26
- const HEALTH_KEYS = [
27
- 'cyclomatic_p95',
28
- 'max_nesting_depth',
29
- 'cognitive_complexity',
30
- 'duplicated_lines_density',
31
- 'import_count',
32
- 'attr_method_usage_ratio',
33
- 'bare_except_count',
34
- ];
27
+ const HEALTH_KEYS = ['structural_erosion', 'verbosity'];
35
28
  /**
36
29
  * Locate `scripts/code-health.py` relative to this module. Built output lives
37
30
  * at `dist/core/fitness/health/local-source.js`; the script stays at the
@@ -61,12 +54,110 @@ function defaultScriptPath() {
61
54
  function defaultPythonBin() {
62
55
  return process.env.SYNERGYSPEC_SELFEVOLVING_PYTHON_BIN || 'python';
63
56
  }
57
+ /**
58
+ * Map `process.platform`/`process.arch` onto the `@ast-grep/cli` platform
59
+ * package that carries the real native binary (the `ast-grep` file inside
60
+ * `@ast-grep/cli` itself is a JS shim on Windows). Unknown platforms → null.
61
+ */
62
+ function astGrepPlatformPackage() {
63
+ const { platform, arch } = process;
64
+ if (platform === 'darwin') {
65
+ return arch === 'arm64' ? '@ast-grep/cli-darwin-arm64' : '@ast-grep/cli-darwin-x64';
66
+ }
67
+ if (platform === 'linux') {
68
+ return arch === 'arm64' ? '@ast-grep/cli-linux-arm64-gnu' : '@ast-grep/cli-linux-x64-gnu';
69
+ }
70
+ if (platform === 'win32') {
71
+ if (arch === 'arm64')
72
+ return '@ast-grep/cli-win32-arm64-msvc';
73
+ if (arch === 'ia32')
74
+ return '@ast-grep/cli-win32-ia32-msvc';
75
+ return '@ast-grep/cli-win32-x64-msvc';
76
+ }
77
+ return null;
78
+ }
79
+ /** True when the file starts with `#!` (a script shim, not a native binary).
80
+ * An unreadable file counts as a shim so the candidate is skipped. */
81
+ function isShebangShim(file) {
82
+ try {
83
+ const fd = openSync(file, 'r');
84
+ try {
85
+ const head = Buffer.alloc(2);
86
+ const n = readSync(fd, head, 0, 2, 0);
87
+ return n === 2 && head[0] === 0x23 && head[1] === 0x21; // '#!'
88
+ }
89
+ finally {
90
+ closeSync(fd);
91
+ }
92
+ }
93
+ catch {
94
+ return true;
95
+ }
96
+ }
97
+ /** Memoized {@link defaultAstGrepBin} result; resolution runs once per process. */
98
+ let astGrepBinMemo;
99
+ /**
100
+ * Locate the ast-grep binary the analyzer's Python slop-rule engine should
101
+ * use, or `null` when none can be found (the analyzer then falls back to its
102
+ * own env/PATH lookup, or to the builtin rules). Resolution order:
103
+ * 1. env `SYNERGYSPEC_SELFEVOLVING_ASTGREP_BIN`, taken verbatim;
104
+ * 2. the native binary inside the installed `@ast-grep/cli` PLATFORM package
105
+ * (e.g. `@ast-grep/cli-win32-x64-msvc/ast-grep.exe`), resolved FROM the
106
+ * `@ast-grep/cli` package dir — under pnpm the platform package is only
107
+ * resolvable from there;
108
+ * 3. non-Windows only: `@ast-grep/cli/ast-grep` itself, where npm's
109
+ * postinstall may have swapped the JS shim for the native binary —
110
+ * accepted only when it does not start with `#!`.
111
+ * Exported for tests. Never throws; any resolution failure → null.
112
+ */
113
+ export function defaultAstGrepBin() {
114
+ if (astGrepBinMemo !== undefined)
115
+ return astGrepBinMemo;
116
+ astGrepBinMemo = resolveAstGrepBin();
117
+ return astGrepBinMemo;
118
+ }
119
+ function resolveAstGrepBin() {
120
+ const env = process.env.SYNERGYSPEC_SELFEVOLVING_ASTGREP_BIN;
121
+ if (env)
122
+ return env;
123
+ try {
124
+ const req = createRequire(import.meta.url);
125
+ const cliPkgDir = path.dirname(req.resolve('@ast-grep/cli/package.json'));
126
+ const platformPkg = astGrepPlatformPackage();
127
+ if (platformPkg !== null) {
128
+ try {
129
+ const platformPkgDir = path.dirname(req.resolve(`${platformPkg}/package.json`, { paths: [cliPkgDir] }));
130
+ const bin = path.join(platformPkgDir, process.platform === 'win32' ? 'ast-grep.exe' : 'ast-grep');
131
+ if (existsSync(bin))
132
+ return bin;
133
+ }
134
+ catch {
135
+ // Platform package absent (e.g. optional dep skipped) → try the shim slot.
136
+ }
137
+ }
138
+ if (process.platform !== 'win32') {
139
+ const candidate = path.join(cliPkgDir, 'ast-grep');
140
+ if (existsSync(candidate) && !isShebangShim(candidate))
141
+ return candidate;
142
+ }
143
+ return null;
144
+ }
145
+ catch {
146
+ return null; // @ast-grep/cli not installed at all → no binary, no flag
147
+ }
148
+ }
149
+ /** The slop-rules YAML vendored as a sibling of the analyzer script. */
150
+ function defaultRulesPath(scriptPath) {
151
+ return path.join(path.dirname(scriptPath), 'slop_rules.yaml');
152
+ }
64
153
  function isFiniteNumber(v) {
65
154
  return typeof v === 'number' && Number.isFinite(v);
66
155
  }
67
156
  /**
68
157
  * Validate + narrow a parsed JSON object into {@link RawHealthMetrics}. Every
69
- * one of the 7 keys must be present and a finite number; otherwise null.
158
+ * one of the 2 keys must be present and a finite number; otherwise null.
159
+ * Additive analyzer keys (`worst_offenders`, `verbosity_engine`, …) are
160
+ * tolerated and stripped here.
70
161
  */
71
162
  function toRawHealthMetrics(parsed) {
72
163
  if (parsed === null || typeof parsed !== 'object')
@@ -78,46 +169,106 @@ function toRawHealthMetrics(parsed) {
78
169
  }
79
170
  // Build a clean object with exactly the known keys (no extra fields leak).
80
171
  return {
81
- cyclomatic_p95: obj.cyclomatic_p95,
82
- max_nesting_depth: obj.max_nesting_depth,
83
- cognitive_complexity: obj.cognitive_complexity,
84
- duplicated_lines_density: obj.duplicated_lines_density,
85
- import_count: obj.import_count,
86
- attr_method_usage_ratio: obj.attr_method_usage_ratio,
87
- bare_except_count: obj.bare_except_count,
172
+ structural_erosion: obj.structural_erosion,
173
+ verbosity: obj.verbosity,
88
174
  };
89
175
  }
176
+ /**
177
+ * Map the analyzer's additive `worst_offenders` key onto {@link HealthOffender}s
178
+ * (snake_case → camelCase). DEFENSIVE by design: an old analyzer without the key,
179
+ * a non-array value, or malformed entries all degrade to fewer/zero offenders —
180
+ * never to a parse failure — because offenders are pure visibility and must not
181
+ * be able to break the scalar penalty path. Optional fields are OMITTED (not
182
+ * `undefined`-assigned) so serialized samples stay minimal.
183
+ */
184
+ function toHealthOffenders(parsed) {
185
+ if (parsed === null || typeof parsed !== 'object')
186
+ return [];
187
+ const list = parsed.worst_offenders;
188
+ if (!Array.isArray(list))
189
+ return [];
190
+ const out = [];
191
+ for (const item of list) {
192
+ if (item === null || typeof item !== 'object')
193
+ continue;
194
+ const o = item;
195
+ if (typeof o.file !== 'string' || o.file.length === 0)
196
+ continue;
197
+ if (typeof o.metric !== 'string' || o.metric.length === 0)
198
+ continue;
199
+ if (!isFiniteNumber(o.value))
200
+ continue;
201
+ out.push({
202
+ file: o.file,
203
+ metric: o.metric,
204
+ value: o.value,
205
+ ...(typeof o.function === 'string' && o.function.length > 0
206
+ ? { function: o.function }
207
+ : {}),
208
+ ...(isFiniteNumber(o.line) ? { line: o.line } : {}),
209
+ ...(isFiniteNumber(o.function_length) ? { functionLength: o.function_length } : {}),
210
+ });
211
+ }
212
+ return out;
213
+ }
90
214
  export class LocalPythonMetricSource {
91
215
  name = 'local';
92
216
  pythonBin;
93
217
  spawnImpl;
94
218
  scriptPath;
219
+ astGrepBin;
220
+ rulesPath;
95
221
  constructor(options = {}) {
96
222
  this.pythonBin = options.pythonBin ?? defaultPythonBin();
97
223
  this.spawnImpl = options.spawnImpl ?? nodeSpawn;
98
224
  this.scriptPath = options.scriptPath ?? defaultScriptPath();
225
+ this.astGrepBin = options.astGrepBin ?? defaultAstGrepBin();
226
+ this.rulesPath = options.rulesPath ?? defaultRulesPath(this.scriptPath);
99
227
  }
100
228
  /**
101
229
  * Run the analyzer over `codeDir` and return its metrics, or `null` on any
102
230
  * spawn / exit / parse failure (graceful degradation when Python is absent).
103
231
  */
104
232
  async measure(codeDir) {
233
+ const parsed = await this.runAndParse(codeDir);
234
+ if (parsed === null)
235
+ return null;
236
+ return toRawHealthMetrics(parsed);
237
+ }
238
+ /**
239
+ * Detailed measurement: the same 2 scores PLUS the analyzer's
240
+ * `worst_offenders` mapped to {@link HealthOffender}s — from ONE spawn shared
241
+ * with `measure()`'s parse path, so the scalar half is provably the value
242
+ * `measure()` would have produced (`toRawHealthMetrics` strips the additive
243
+ * key either way). An old analyzer without the key yields `offenders: []`.
244
+ */
245
+ async measureDetailed(codeDir) {
246
+ const parsed = await this.runAndParse(codeDir);
247
+ if (parsed === null)
248
+ return null;
249
+ const raw = toRawHealthMetrics(parsed);
250
+ if (raw === null)
251
+ return null;
252
+ return { raw, offenders: toHealthOffenders(parsed) };
253
+ }
254
+ /** Single spawn + JSON parse shared by `measure` and `measureDetailed`. */
255
+ async runAndParse(codeDir) {
105
256
  const stdout = await this.runAnalyzer(codeDir);
106
257
  if (stdout === null)
107
258
  return null;
108
- let parsed;
109
259
  try {
110
- parsed = JSON.parse(stdout);
260
+ return JSON.parse(stdout);
111
261
  }
112
262
  catch {
113
263
  return null; // not JSON → no signal
114
264
  }
115
- return toRawHealthMetrics(parsed);
116
265
  }
117
266
  /**
118
267
  * Spawn `python scripts/code-health.py <codeDir>` and collect stdout.
119
- * Resolves to the raw stdout string on a clean (exit 0) run, or `null` if the
120
- * process cannot be spawned or exits non-zero.
268
+ * `--ast-grep-bin` is appended when a binary resolved; `--rules` whenever the
269
+ * rules file exists (the analyzer's own PATH fallback still uses it even with
270
+ * no resolved binary). Resolves to the raw stdout string on a clean (exit 0)
271
+ * run, or `null` if the process cannot be spawned or exits non-zero.
121
272
  */
122
273
  runAnalyzer(codeDir) {
123
274
  return new Promise((resolve) => {
@@ -128,9 +279,14 @@ export class LocalPythonMetricSource {
128
279
  settled = true;
129
280
  resolve(value);
130
281
  };
282
+ const args = [this.scriptPath, codeDir];
283
+ if (this.astGrepBin !== null)
284
+ args.push('--ast-grep-bin', this.astGrepBin);
285
+ if (existsSync(this.rulesPath))
286
+ args.push('--rules', this.rulesPath);
131
287
  let child;
132
288
  try {
133
- child = this.spawnImpl(this.pythonBin, [this.scriptPath, codeDir], { shell: false });
289
+ child = this.spawnImpl(this.pythonBin, args, { shell: false });
134
290
  }
135
291
  catch {
136
292
  // Synchronous spawn failure (e.g. bad options) → no signal.
@@ -1,6 +1,7 @@
1
1
  /**
2
2
  * Swappable source of raw code-health metrics for the self-evolution health
3
- * head. A {@link MetricSource} measures the 7 code-health metrics on a directory
3
+ * head. A {@link MetricSource} measures the two SlopCodeBench code-health
4
+ * scores (arXiv:2603.24755) on a directory
4
5
  * of generated code; the result is then normalized into a single [0,1] penalty
5
6
  * by `normalizeHealth` (see ./health-metrics.ts) and folded into the per-change
6
7
  * loss alongside the functional term. See
@@ -13,25 +14,41 @@
13
14
  * rather than guessing, exactly like the functional parser's null.
14
15
  */
15
16
  /**
16
- * The 7 raw code-health metrics, computed on generated code. Lower is better for
17
- * every metric EXCEPT `attr_method_usage_ratio` (class cohesion), where higher
18
- * is better. All are plain numbers in their native units (see each field).
17
+ * The two raw SlopCodeBench code-health scores (arXiv:2603.24755), computed on
18
+ * generated code. Both are fractions in [0,1]; lower is better for both.
19
19
  */
20
20
  export interface RawHealthMetrics {
21
- /** 95th-percentile per-function cyclomatic complexity. Lower better; ceiling ~12. */
22
- cyclomatic_p95: number;
23
- /** Deepest nested control structure. Lower better; ceiling ~4. */
24
- max_nesting_depth: number;
25
- /** Mean Sonar cognitive complexity. Lower better; ceiling ~15. */
26
- cognitive_complexity: number;
27
- /** Duplicated-lines density, a fraction in [0,1]. Lower better; ceiling ~0.05. */
28
- duplicated_lines_density: number;
29
- /** Total import aliases. Lower better; ceiling ~40. */
30
- import_count: number;
31
- /** Class cohesion (attr/method usage ratio) in [0,1]. HIGHER better; floor ~0.5. */
32
- attr_method_usage_ratio: number;
33
- /** Count of bare `except`. Lower better; ceiling 0 (any > 0 is penalized). */
34
- bare_except_count: number;
21
+ /** Mass-weighted share of functions with cyclomatic complexity > 10:
22
+ * Σ_{CC>10} CC·√SLOC / Σ CC·√SLOC. Fraction in [0,1]; lower better. */
23
+ structural_erosion: number;
24
+ /** |rule-flagged lines ∪ clone lines| / non-blank LOC. Fraction in [0,1]; lower better. */
25
+ verbosity: number;
26
+ }
27
+ /**
28
+ * One worst per-function (or, for the heuristic C/C++/Rust analyzer path,
29
+ * per-file) code-health contributor the file-addressed evidence BEHIND the
30
+ * scalar `healthPenalty` ("health 0.516 — x12.py cyclomatic 41"). Offenders
31
+ * are pure visibility: which entries a source surfaces can never change the
32
+ * raw scores or the normalized penalty.
33
+ */
34
+ export interface HealthOffender {
35
+ /** Source file, relative to the measured code dir (forward slashes). */
36
+ file: string;
37
+ /** Function name; absent for file-level (heuristic-language) entries. */
38
+ function?: string;
39
+ /** 1-based line of the function definition; absent for file-level entries. */
40
+ line?: number;
41
+ /** What the entry contributes to: 'complexity_mass' (a CC>10 function ranked
42
+ * by mass CC·√SLOC) | a verbosity rule id — upstream SlopCodeBench slop-rule
43
+ * ids (see scripts/slop_rules.yaml) for Python; builtin rule ids for
44
+ * C/C++/Rust and the no-ast-grep-binary fallback | 'clone' (a duplicated
45
+ * region). */
46
+ metric: string;
47
+ /** The offending value: the complexity mass (rounded to 1 decimal) for
48
+ * 'complexity_mass' entries, or the flagged-region line count otherwise. */
49
+ value: number;
50
+ /** Source lines spanned by the function, when the analyzer knows it. */
51
+ functionLength?: number;
35
52
  }
36
53
  /**
37
54
  * A swappable backend that measures {@link RawHealthMetrics} on a directory of
@@ -43,10 +60,22 @@ export interface MetricSource {
43
60
  /** Stable identifier for logging / selecting the active source. */
44
61
  readonly name: string;
45
62
  /**
46
- * Measure the 7 metrics on the code under `codeDir`. Resolve to `null` when
63
+ * Measure the 2 scores on the code under `codeDir`. Resolve to `null` when
47
64
  * no health signal is available.
48
65
  */
49
66
  measure(codeDir: string): Promise<RawHealthMetrics | null>;
67
+ /**
68
+ * OPTIONAL detailed measurement: the same 2 raw scores plus the bounded
69
+ * worst-offender list behind them. Optional so existing sources (stub,
70
+ * SonarQube) need no change — callers that want offenders fall back to
71
+ * `measure()` with an empty list when this method is absent. Implementations
72
+ * MUST derive `raw` from the same single measurement as the offenders (no
73
+ * second scan) so the scalar path is provably identical.
74
+ */
75
+ measureDetailed?(codeDir: string): Promise<{
76
+ raw: RawHealthMetrics;
77
+ offenders: HealthOffender[];
78
+ } | null>;
50
79
  }
51
80
  /**
52
81
  * The default no-op source: it produces no health signal, so the health head
@@ -1,6 +1,7 @@
1
1
  /**
2
2
  * Swappable source of raw code-health metrics for the self-evolution health
3
- * head. A {@link MetricSource} measures the 7 code-health metrics on a directory
3
+ * head. A {@link MetricSource} measures the two SlopCodeBench code-health
4
+ * scores (arXiv:2603.24755) on a directory
4
5
  * of generated code; the result is then normalized into a single [0,1] penalty
5
6
  * by `normalizeHealth` (see ./health-metrics.ts) and folded into the per-change
6
7
  * loss alongside the functional term. See
@@ -25,18 +26,12 @@ export class StubMetricSource {
25
26
  }
26
27
  /**
27
28
  * The Sonar metric keys requested from `api/measures/component`, in the order
28
- * they map onto {@link RawHealthMetrics}. `cognitive_complexity` and
29
- * `duplicated_lines_density` are native Sonar measures; the remaining five are
30
- * custom measures published under the same metric keys as the raw-metric field.
29
+ * they map onto {@link RawHealthMetrics}. BOTH are custom measures published
30
+ * under these keys (neither is a native Sonar measure).
31
31
  */
32
32
  const SONAR_METRIC_KEYS = [
33
- 'cyclomatic_p95',
34
- 'max_nesting_depth',
35
- 'cognitive_complexity',
36
- 'duplicated_lines_density',
37
- 'import_count',
38
- 'attr_method_usage_ratio',
39
- 'bare_except_count',
33
+ 'structural_erosion',
34
+ 'verbosity',
40
35
  ];
41
36
  /**
42
37
  * A {@link MetricSource} backed by SonarQube. `measure`:
@@ -104,13 +99,8 @@ export function mapSonarMeasures(body) {
104
99
  }
105
100
  const get = (key) => byKey.get(key) ?? 0;
106
101
  return {
107
- cyclomatic_p95: get('cyclomatic_p95'),
108
- max_nesting_depth: get('max_nesting_depth'),
109
- cognitive_complexity: get('cognitive_complexity'),
110
- duplicated_lines_density: get('duplicated_lines_density'),
111
- import_count: get('import_count'),
112
- attr_method_usage_ratio: get('attr_method_usage_ratio'),
113
- bare_except_count: get('bare_except_count'),
102
+ structural_erosion: get('structural_erosion'),
103
+ verbosity: get('verbosity'),
114
104
  };
115
105
  }
116
106
  //# sourceMappingURL=metric-source.js.map
@@ -20,7 +20,10 @@ export function resolveMetricSource(config) {
20
20
  return new StubMetricSource();
21
21
  }
22
22
  if (health.source === 'local' || health.source === 'local-python') {
23
- return new LocalPythonMetricSource({ pythonBin: health.pythonBin });
23
+ return new LocalPythonMetricSource({
24
+ pythonBin: health.pythonBin,
25
+ astGrepBin: health.astGrepBin,
26
+ });
24
27
  }
25
28
  if (health.source === 'sonarqube') {
26
29
  if (health.sonarUrl && health.sonarToken && health.sonarProjectKey) {
@@ -3,8 +3,8 @@
3
3
  *
4
4
  * loss = w_f · functionalLoss + w_h · healthPenalty
5
5
  * functionalLoss = 1 − pass_rate (from the gen-test/run-test oracle)
6
- * healthPenalty = normalized 7-metric code-health penalty (SonarQube; the
7
- * health head lands later — until then callers pass 0)
6
+ * healthPenalty = normalized SlopCodeBench code-health penalty
7
+ * (structural_erosion verbosity)
8
8
  *
9
9
  * Functional correctness is ALSO used as a hard GATE at GA selection/promotion
10
10
  * (a variant whose code fails its tests cannot win); this module only computes
@@ -3,8 +3,8 @@
3
3
  *
4
4
  * loss = w_f · functionalLoss + w_h · healthPenalty
5
5
  * functionalLoss = 1 − pass_rate (from the gen-test/run-test oracle)
6
- * healthPenalty = normalized 7-metric code-health penalty (SonarQube; the
7
- * health head lands later — until then callers pass 0)
6
+ * healthPenalty = normalized SlopCodeBench code-health penalty
7
+ * (structural_erosion verbosity)
8
8
  *
9
9
  * Functional correctness is ALSO used as a hard GATE at GA selection/promotion
10
10
  * (a variant whose code fails its tests cannot win); this module only computes
@@ -8,6 +8,7 @@
8
8
  */
9
9
  import type { TestMetrics } from './test-metrics.js';
10
10
  import type { PerChangeLoss } from './loss.js';
11
+ import type { HealthOffender } from './health/metric-source.js';
11
12
  import type { TrajectoryFacts } from '../trajectory/facts.js';
12
13
  export interface FitnessSample {
13
14
  changeName: string;
@@ -36,6 +37,15 @@ export interface FitnessSample {
36
37
  * was consulted.
37
38
  */
38
39
  healthSource?: string;
40
+ /**
41
+ * The worst per-file/per-function contributors BEHIND `healthSignal`
42
+ * ("x12.py complexity_mass 66.3; foo() 26 lines") — file-addressed evidence so a
43
+ * bad health reading is navigable, not just a scalar. Pure visibility: the
44
+ * penalty and loss math never read this field. OMITTED (not empty) when the
45
+ * active source produced no offenders or cannot name them (stub/sonarqube),
46
+ * so existing serialized samples and the baseline path stay byte-identical.
47
+ */
48
+ healthContributors?: HealthOffender[];
39
49
  /**
40
50
  * Ground-truth facts distilled from the agent's ACTUAL trajectory (which
41
51
  * harness, whether a test runner was really observed running, the observed
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Parse the FAILING TEST IDS (and best-effort assertion lines) out of observed
3
+ * test-runner output. Sibling of {@link parseTestMetrics}: that one re-sources
4
+ * the pass/fail COUNTS for the loss, this one re-sources the failure CONTENT
5
+ * for the critic — so failure evidence can come from the run the
6
+ * observed-verified gate already trusts instead of from the authored
7
+ * test-report prose (`extractFailureEvidence`'s grep, which stays as the
8
+ * headless fallback only).
9
+ *
10
+ * Pure + dependency-free. Recognizes only explicit per-test failure markers
11
+ * from the canonical runners; an unrecognized format returns `[]` ("no
12
+ * signal"), never a guess. Outputs are bounded (≤ {@link MAX_FAILURES}
13
+ * failures, assertion ≤ {@link MAX_ASSERTION_CHARS} chars) so a pathological
14
+ * run cannot flood the learn report.
15
+ */
16
+ export interface ParsedTestFailure {
17
+ /** Runner-native test id, e.g. `tests/test_x.py::test_name` or `suite > name`. */
18
+ testId: string;
19
+ /** Test file when derivable from the id (pytest/vitest path prefix). */
20
+ file?: string;
21
+ /** Best-effort assertion/error line for the failure, capped. */
22
+ assertion?: string;
23
+ }
24
+ /**
25
+ * Extract failing test ids + assertion lines from observed runner output.
26
+ * Returns `[]` when nothing is recognized. Deduplicates by testId, preserves
27
+ * first-seen order, caps at {@link MAX_FAILURES}.
28
+ */
29
+ export declare function parseTestFailures(output: string): ParsedTestFailure[];
30
+ //# sourceMappingURL=test-failures.d.ts.map