npm - synergyspec-selfevolving - Versions diffs - 1.3.0 → 1.4.0 - Mend

synergyspec-selfevolving 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

package/README.md +19 -1
package/dist/commands/learn.js +228 -26
package/dist/commands/self-evolution.js +171 -26
package/dist/commands/workflow/status.js +3 -1
package/dist/core/config-prompts.js +4 -0
package/dist/core/fitness/health/health-metrics.d.ts +26 -56
package/dist/core/fitness/health/health-metrics.js +19 -58
package/dist/core/fitness/health/index.d.ts +15 -2
package/dist/core/fitness/health/index.js +25 -1
package/dist/core/fitness/health/local-source.d.ts +43 -4
package/dist/core/fitness/health/local-source.js +181 -25
package/dist/core/fitness/health/metric-source.d.ts +48 -19
package/dist/core/fitness/health/metric-source.js +8 -18
package/dist/core/fitness/health/resolve-source.js +4 -1
package/dist/core/fitness/loss.d.ts +2 -2
package/dist/core/fitness/loss.js +2 -2
package/dist/core/fitness/sample.d.ts +10 -0
package/dist/core/fitness/test-failures.d.ts +30 -0
package/dist/core/fitness/test-failures.js +123 -0
package/dist/core/learn/credit-path.d.ts +36 -0
package/dist/core/learn/credit-path.js +198 -0
package/dist/core/learn/trajectory-discovery.d.ts +39 -0
package/dist/core/learn/trajectory-discovery.js +140 -0
package/dist/core/learn.d.ts +39 -5
package/dist/core/learn.js +131 -14
package/dist/core/project-config.d.ts +2 -0
package/dist/core/project-config.js +24 -1
package/dist/core/self-evolution/canonical-targets.d.ts +8 -4
package/dist/core/self-evolution/canonical-targets.js +8 -4
package/dist/core/self-evolution/health-baseline.d.ts +25 -6
package/dist/core/self-evolution/health-baseline.js +30 -6
package/dist/core/self-evolution/index.d.ts +1 -0
package/dist/core/self-evolution/index.js +1 -0
package/dist/core/self-evolution/learn-hints.d.ts +31 -0
package/dist/core/self-evolution/learn-hints.js +16 -0
package/dist/core/self-evolution/learn-observation-adapter.d.ts +35 -0
package/dist/core/self-evolution/learn-observation-adapter.js +285 -10
package/dist/core/self-evolution/proposer-agent.d.ts +41 -0
package/dist/core/self-evolution/proposer-agent.js +94 -13
package/dist/core/self-evolution/proposer-slice.d.ts +26 -0
package/dist/core/self-evolution/proposer-slice.js +54 -0
package/dist/core/self-evolution/success-channel.d.ts +79 -0
package/dist/core/self-evolution/success-channel.js +361 -0
package/dist/core/self-evolution/target-evolution.d.ts +11 -0
package/dist/core/self-evolution/target-evolution.js +2 -0
package/dist/core/templates/skill-templates.d.ts +1 -0
package/dist/core/templates/skill-templates.js +1 -0
package/dist/core/templates/workflow-manifest.js +2 -0
package/dist/core/templates/workflows/learn.d.ts +3 -2
package/dist/core/templates/workflows/learn.js +24 -167
package/dist/core/templates/workflows/self-evolving.d.ts +11 -0
package/dist/core/templates/workflows/self-evolving.js +237 -0
package/dist/core/trajectory/facts.d.ts +16 -0
package/dist/core/trajectory/facts.js +12 -4
package/dist/core/trajectory/skeleton.d.ts +43 -0
package/dist/core/trajectory/skeleton.js +239 -0
package/package.json +3 -1
package/scripts/code-health.py +1066 -638
package/scripts/slop_rules.yaml +2151 -0

package/dist/core/fitness/health/local-source.d.ts CHANGED Viewed

@@ -15,11 +15,11 @@
  * health reading as "skip the health half of the loss" rather than a failure.
  *
  * The interface + {@link RawHealthMetrics} type come from `./metric-source.js`,
- * which is authored in parallel; the shape this module relies on is the 7-key
+ * which is authored in parallel; the shape this module relies on is the 2-key
  * record produced by the Python script.
  */
 import { spawn as nodeSpawn } from 'node:child_process';
-import type { MetricSource, RawHealthMetrics } from './metric-source.js';
+import type { HealthOffender, MetricSource, RawHealthMetrics } from './metric-source.js';
 /** Minimal structural type for node's `spawn` (kept narrow for injectability). */
 export type SpawnImpl = typeof nodeSpawn;
 export interface LocalPythonMetricSourceOptions {
@@ -29,22 +29,61 @@ export interface LocalPythonMetricSourceOptions {
     spawnImpl?: SpawnImpl;
     /** Override the analyzer script path (mostly for tests). */
     scriptPath?: string;
+    /** Path to an ast-grep binary for the analyzer's Python slop-rule engine.
+     *  When omitted, resolved automatically (env override, then the installed
+     *  `@ast-grep/cli` platform package); see {@link defaultAstGrepBin}. */
+    astGrepBin?: string;
+    /** Path to a slop-rules YAML for the ast-grep engine. When omitted, resolved
+     *  to the `slop_rules.yaml` vendored next to the analyzer script. */
+    rulesPath?: string;
 }
+/**
+ * Locate the ast-grep binary the analyzer's Python slop-rule engine should
+ * use, or `null` when none can be found (the analyzer then falls back to its
+ * own env/PATH lookup, or to the builtin rules). Resolution order:
+ *   1. env `SYNERGYSPEC_SELFEVOLVING_ASTGREP_BIN`, taken verbatim;
+ *   2. the native binary inside the installed `@ast-grep/cli` PLATFORM package
+ *      (e.g. `@ast-grep/cli-win32-x64-msvc/ast-grep.exe`), resolved FROM the
+ *      `@ast-grep/cli` package dir — under pnpm the platform package is only
+ *      resolvable from there;
+ *   3. non-Windows only: `@ast-grep/cli/ast-grep` itself, where npm's
+ *      postinstall may have swapped the JS shim for the native binary —
+ *      accepted only when it does not start with `#!`.
+ * Exported for tests. Never throws; any resolution failure → null.
+ */
+export declare function defaultAstGrepBin(): string | null;
 export declare class LocalPythonMetricSource implements MetricSource {
     readonly name = "local";
     private readonly pythonBin;
     private readonly spawnImpl;
     private readonly scriptPath;
+    private readonly astGrepBin;
+    private readonly rulesPath;
     constructor(options?: LocalPythonMetricSourceOptions);
     /**
      * Run the analyzer over `codeDir` and return its metrics, or `null` on any
      * spawn / exit / parse failure (graceful degradation when Python is absent).
      */
     measure(codeDir: string): Promise<RawHealthMetrics | null>;
+    /**
+     * Detailed measurement: the same 2 scores PLUS the analyzer's
+     * `worst_offenders` mapped to {@link HealthOffender}s — from ONE spawn shared
+     * with `measure()`'s parse path, so the scalar half is provably the value
+     * `measure()` would have produced (`toRawHealthMetrics` strips the additive
+     * key either way). An old analyzer without the key yields `offenders: []`.
+     */
+    measureDetailed(codeDir: string): Promise<{
+        raw: RawHealthMetrics;
+        offenders: HealthOffender[];
+    } | null>;
+    /** Single spawn + JSON parse shared by `measure` and `measureDetailed`. */
+    private runAndParse;
     /**
      * Spawn `python scripts/code-health.py <codeDir>` and collect stdout.
-     * Resolves to the raw stdout string on a clean (exit 0) run, or `null` if the
-     * process cannot be spawned or exits non-zero.
+     * `--ast-grep-bin` is appended when a binary resolved; `--rules` whenever the
+     * rules file exists (the analyzer's own PATH fallback still uses it even with
+     * no resolved binary). Resolves to the raw stdout string on a clean (exit 0)
+     * run, or `null` if the process cannot be spawned or exits non-zero.
      */
     private runAnalyzer;
 }

package/dist/core/fitness/health/local-source.js CHANGED Viewed

@@ -15,23 +15,16 @@
  * health reading as "skip the health half of the loss" rather than a failure.
  *
  * The interface + {@link RawHealthMetrics} type come from `./metric-source.js`,
- * which is authored in parallel; the shape this module relies on is the 7-key
+ * which is authored in parallel; the shape this module relies on is the 2-key
  * record produced by the Python script.
  */
 import { spawn as nodeSpawn } from 'node:child_process';
 import { fileURLToPath } from 'node:url';
-import { existsSync } from 'node:fs';
+import { existsSync, openSync, readSync, closeSync } from 'node:fs';
+import { createRequire } from 'node:module';
 import path from 'node:path';
 /** The exact set of numeric keys the analyzer emits. Order is irrelevant. */
-const HEALTH_KEYS = [
-    'cyclomatic_p95',
-    'max_nesting_depth',
-    'cognitive_complexity',
-    'duplicated_lines_density',
-    'import_count',
-    'attr_method_usage_ratio',
-    'bare_except_count',
-];
+const HEALTH_KEYS = ['structural_erosion', 'verbosity'];
 /**
  * Locate `scripts/code-health.py` relative to this module. Built output lives
  * at `dist/core/fitness/health/local-source.js`; the script stays at the
@@ -61,12 +54,110 @@ function defaultScriptPath() {
 function defaultPythonBin() {
     return process.env.SYNERGYSPEC_SELFEVOLVING_PYTHON_BIN || 'python';
 }
+/**
+ * Map `process.platform`/`process.arch` onto the `@ast-grep/cli` platform
+ * package that carries the real native binary (the `ast-grep` file inside
+ * `@ast-grep/cli` itself is a JS shim on Windows). Unknown platforms → null.
+ */
+function astGrepPlatformPackage() {
+    const { platform, arch } = process;
+    if (platform === 'darwin') {
+        return arch === 'arm64' ? '@ast-grep/cli-darwin-arm64' : '@ast-grep/cli-darwin-x64';
+    }
+    if (platform === 'linux') {
+        return arch === 'arm64' ? '@ast-grep/cli-linux-arm64-gnu' : '@ast-grep/cli-linux-x64-gnu';
+    }
+    if (platform === 'win32') {
+        if (arch === 'arm64')
+            return '@ast-grep/cli-win32-arm64-msvc';
+        if (arch === 'ia32')
+            return '@ast-grep/cli-win32-ia32-msvc';
+        return '@ast-grep/cli-win32-x64-msvc';
+    }
+    return null;
+}
+/** True when the file starts with `#!` (a script shim, not a native binary).
+ *  An unreadable file counts as a shim so the candidate is skipped. */
+function isShebangShim(file) {
+    try {
+        const fd = openSync(file, 'r');
+        try {
+            const head = Buffer.alloc(2);
+            const n = readSync(fd, head, 0, 2, 0);
+            return n === 2 && head[0] === 0x23 && head[1] === 0x21; // '#!'
+        }
+        finally {
+            closeSync(fd);
+        }
+    }
+    catch {
+        return true;
+    }
+}
+/** Memoized {@link defaultAstGrepBin} result; resolution runs once per process. */
+let astGrepBinMemo;
+/**
+ * Locate the ast-grep binary the analyzer's Python slop-rule engine should
+ * use, or `null` when none can be found (the analyzer then falls back to its
+ * own env/PATH lookup, or to the builtin rules). Resolution order:
+ *   1. env `SYNERGYSPEC_SELFEVOLVING_ASTGREP_BIN`, taken verbatim;
+ *   2. the native binary inside the installed `@ast-grep/cli` PLATFORM package
+ *      (e.g. `@ast-grep/cli-win32-x64-msvc/ast-grep.exe`), resolved FROM the
+ *      `@ast-grep/cli` package dir — under pnpm the platform package is only
+ *      resolvable from there;
+ *   3. non-Windows only: `@ast-grep/cli/ast-grep` itself, where npm's
+ *      postinstall may have swapped the JS shim for the native binary —
+ *      accepted only when it does not start with `#!`.
+ * Exported for tests. Never throws; any resolution failure → null.
+ */
+export function defaultAstGrepBin() {
+    if (astGrepBinMemo !== undefined)
+        return astGrepBinMemo;
+    astGrepBinMemo = resolveAstGrepBin();
+    return astGrepBinMemo;
+}
+function resolveAstGrepBin() {
+    const env = process.env.SYNERGYSPEC_SELFEVOLVING_ASTGREP_BIN;
+    if (env)
+        return env;
+    try {
+        const req = createRequire(import.meta.url);
+        const cliPkgDir = path.dirname(req.resolve('@ast-grep/cli/package.json'));
+        const platformPkg = astGrepPlatformPackage();
+        if (platformPkg !== null) {
+            try {
+                const platformPkgDir = path.dirname(req.resolve(`${platformPkg}/package.json`, { paths: [cliPkgDir] }));
+                const bin = path.join(platformPkgDir, process.platform === 'win32' ? 'ast-grep.exe' : 'ast-grep');
+                if (existsSync(bin))
+                    return bin;
+            }
+            catch {
+                // Platform package absent (e.g. optional dep skipped) → try the shim slot.
+            }
+        }
+        if (process.platform !== 'win32') {
+            const candidate = path.join(cliPkgDir, 'ast-grep');
+            if (existsSync(candidate) && !isShebangShim(candidate))
+                return candidate;
+        }
+        return null;
+    }
+    catch {
+        return null; // @ast-grep/cli not installed at all → no binary, no flag
+    }
+}
+/** The slop-rules YAML vendored as a sibling of the analyzer script. */
+function defaultRulesPath(scriptPath) {
+    return path.join(path.dirname(scriptPath), 'slop_rules.yaml');
+}
 function isFiniteNumber(v) {
     return typeof v === 'number' && Number.isFinite(v);
 }
 /**
  * Validate + narrow a parsed JSON object into {@link RawHealthMetrics}. Every
- * one of the 7 keys must be present and a finite number; otherwise null.
+ * one of the 2 keys must be present and a finite number; otherwise null.
+ * Additive analyzer keys (`worst_offenders`, `verbosity_engine`, …) are
+ * tolerated and stripped here.
  */
 function toRawHealthMetrics(parsed) {
     if (parsed === null || typeof parsed !== 'object')
@@ -78,46 +169,106 @@ function toRawHealthMetrics(parsed) {
     }
     // Build a clean object with exactly the known keys (no extra fields leak).
     return {
-        cyclomatic_p95: obj.cyclomatic_p95,
-        max_nesting_depth: obj.max_nesting_depth,
-        cognitive_complexity: obj.cognitive_complexity,
-        duplicated_lines_density: obj.duplicated_lines_density,
-        import_count: obj.import_count,
-        attr_method_usage_ratio: obj.attr_method_usage_ratio,
-        bare_except_count: obj.bare_except_count,
+        structural_erosion: obj.structural_erosion,
+        verbosity: obj.verbosity,
     };
 }
+/**
+ * Map the analyzer's additive `worst_offenders` key onto {@link HealthOffender}s
+ * (snake_case → camelCase). DEFENSIVE by design: an old analyzer without the key,
+ * a non-array value, or malformed entries all degrade to fewer/zero offenders —
+ * never to a parse failure — because offenders are pure visibility and must not
+ * be able to break the scalar penalty path. Optional fields are OMITTED (not
+ * `undefined`-assigned) so serialized samples stay minimal.
+ */
+function toHealthOffenders(parsed) {
+    if (parsed === null || typeof parsed !== 'object')
+        return [];
+    const list = parsed.worst_offenders;
+    if (!Array.isArray(list))
+        return [];
+    const out = [];
+    for (const item of list) {
+        if (item === null || typeof item !== 'object')
+            continue;
+        const o = item;
+        if (typeof o.file !== 'string' || o.file.length === 0)
+            continue;
+        if (typeof o.metric !== 'string' || o.metric.length === 0)
+            continue;
+        if (!isFiniteNumber(o.value))
+            continue;
+        out.push({
+            file: o.file,
+            metric: o.metric,
+            value: o.value,
+            ...(typeof o.function === 'string' && o.function.length > 0
+                ? { function: o.function }
+                : {}),
+            ...(isFiniteNumber(o.line) ? { line: o.line } : {}),
+            ...(isFiniteNumber(o.function_length) ? { functionLength: o.function_length } : {}),
+        });
+    }
+    return out;
+}
 export class LocalPythonMetricSource {
     name = 'local';
     pythonBin;
     spawnImpl;
     scriptPath;
+    astGrepBin;
+    rulesPath;
     constructor(options = {}) {
         this.pythonBin = options.pythonBin ?? defaultPythonBin();
         this.spawnImpl = options.spawnImpl ?? nodeSpawn;
         this.scriptPath = options.scriptPath ?? defaultScriptPath();
+        this.astGrepBin = options.astGrepBin ?? defaultAstGrepBin();
+        this.rulesPath = options.rulesPath ?? defaultRulesPath(this.scriptPath);
     }
     /**
      * Run the analyzer over `codeDir` and return its metrics, or `null` on any
      * spawn / exit / parse failure (graceful degradation when Python is absent).
      */
     async measure(codeDir) {
+        const parsed = await this.runAndParse(codeDir);
+        if (parsed === null)
+            return null;
+        return toRawHealthMetrics(parsed);
+    }
+    /**
+     * Detailed measurement: the same 2 scores PLUS the analyzer's
+     * `worst_offenders` mapped to {@link HealthOffender}s — from ONE spawn shared
+     * with `measure()`'s parse path, so the scalar half is provably the value
+     * `measure()` would have produced (`toRawHealthMetrics` strips the additive
+     * key either way). An old analyzer without the key yields `offenders: []`.
+     */
+    async measureDetailed(codeDir) {
+        const parsed = await this.runAndParse(codeDir);
+        if (parsed === null)
+            return null;
+        const raw = toRawHealthMetrics(parsed);
+        if (raw === null)
+            return null;
+        return { raw, offenders: toHealthOffenders(parsed) };
+    }
+    /** Single spawn + JSON parse shared by `measure` and `measureDetailed`. */
+    async runAndParse(codeDir) {
         const stdout = await this.runAnalyzer(codeDir);
         if (stdout === null)
             return null;
-        let parsed;
         try {
-            parsed = JSON.parse(stdout);
+            return JSON.parse(stdout);
         }
         catch {
             return null; // not JSON → no signal
         }
-        return toRawHealthMetrics(parsed);
     }
     /**
      * Spawn `python scripts/code-health.py <codeDir>` and collect stdout.
-     * Resolves to the raw stdout string on a clean (exit 0) run, or `null` if the
-     * process cannot be spawned or exits non-zero.
+     * `--ast-grep-bin` is appended when a binary resolved; `--rules` whenever the
+     * rules file exists (the analyzer's own PATH fallback still uses it even with
+     * no resolved binary). Resolves to the raw stdout string on a clean (exit 0)
+     * run, or `null` if the process cannot be spawned or exits non-zero.
      */
     runAnalyzer(codeDir) {
         return new Promise((resolve) => {
@@ -128,9 +279,14 @@ export class LocalPythonMetricSource {
                 settled = true;
                 resolve(value);
             };
+            const args = [this.scriptPath, codeDir];
+            if (this.astGrepBin !== null)
+                args.push('--ast-grep-bin', this.astGrepBin);
+            if (existsSync(this.rulesPath))
+                args.push('--rules', this.rulesPath);
             let child;
             try {
-                child = this.spawnImpl(this.pythonBin, [this.scriptPath, codeDir], { shell: false });
+                child = this.spawnImpl(this.pythonBin, args, { shell: false });
             }
             catch {
                 // Synchronous spawn failure (e.g. bad options) → no signal.

package/dist/core/fitness/health/metric-source.d.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 /**
  * Swappable source of raw code-health metrics for the self-evolution health
- * head. A {@link MetricSource} measures the 7 code-health metrics on a directory
+ * head. A {@link MetricSource} measures the two SlopCodeBench code-health
+ * scores (arXiv:2603.24755) on a directory
  * of generated code; the result is then normalized into a single [0,1] penalty
  * by `normalizeHealth` (see ./health-metrics.ts) and folded into the per-change
  * loss alongside the functional term. See
@@ -13,25 +14,41 @@
  * rather than guessing, exactly like the functional parser's null.
  */
 /**
- * The 7 raw code-health metrics, computed on generated code. Lower is better for
- * every metric EXCEPT `attr_method_usage_ratio` (class cohesion), where higher
- * is better. All are plain numbers in their native units (see each field).
+ * The two raw SlopCodeBench code-health scores (arXiv:2603.24755), computed on
+ * generated code. Both are fractions in [0,1]; lower is better for both.
  */
 export interface RawHealthMetrics {
-    /** 95th-percentile per-function cyclomatic complexity. Lower better; ceiling ~12. */
-    cyclomatic_p95: number;
-    /** Deepest nested control structure. Lower better; ceiling ~4. */
-    max_nesting_depth: number;
-    /** Mean Sonar cognitive complexity. Lower better; ceiling ~15. */
-    cognitive_complexity: number;
-    /** Duplicated-lines density, a fraction in [0,1]. Lower better; ceiling ~0.05. */
-    duplicated_lines_density: number;
-    /** Total import aliases. Lower better; ceiling ~40. */
-    import_count: number;
-    /** Class cohesion (attr/method usage ratio) in [0,1]. HIGHER better; floor ~0.5. */
-    attr_method_usage_ratio: number;
-    /** Count of bare `except`. Lower better; ceiling 0 (any > 0 is penalized). */
-    bare_except_count: number;
+    /** Mass-weighted share of functions with cyclomatic complexity > 10:
+     *  Σ_{CC>10} CC·√SLOC / Σ CC·√SLOC. Fraction in [0,1]; lower better. */
+    structural_erosion: number;
+    /** |rule-flagged lines ∪ clone lines| / non-blank LOC. Fraction in [0,1]; lower better. */
+    verbosity: number;
+}
+/**
+ * One worst per-function (or, for the heuristic C/C++/Rust analyzer path,
+ * per-file) code-health contributor — the file-addressed evidence BEHIND the
+ * scalar `healthPenalty` ("health 0.516 — x12.py cyclomatic 41"). Offenders
+ * are pure visibility: which entries a source surfaces can never change the
+ * raw scores or the normalized penalty.
+ */
+export interface HealthOffender {
+    /** Source file, relative to the measured code dir (forward slashes). */
+    file: string;
+    /** Function name; absent for file-level (heuristic-language) entries. */
+    function?: string;
+    /** 1-based line of the function definition; absent for file-level entries. */
+    line?: number;
+    /** What the entry contributes to: 'complexity_mass' (a CC>10 function ranked
+     *  by mass CC·√SLOC) | a verbosity rule id — upstream SlopCodeBench slop-rule
+     *  ids (see scripts/slop_rules.yaml) for Python; builtin rule ids for
+     *  C/C++/Rust and the no-ast-grep-binary fallback | 'clone' (a duplicated
+     *  region). */
+    metric: string;
+    /** The offending value: the complexity mass (rounded to 1 decimal) for
+     *  'complexity_mass' entries, or the flagged-region line count otherwise. */
+    value: number;
+    /** Source lines spanned by the function, when the analyzer knows it. */
+    functionLength?: number;
 }
 /**
  * A swappable backend that measures {@link RawHealthMetrics} on a directory of
@@ -43,10 +60,22 @@ export interface MetricSource {
     /** Stable identifier for logging / selecting the active source. */
     readonly name: string;
     /**
-     * Measure the 7 metrics on the code under `codeDir`. Resolve to `null` when
+     * Measure the 2 scores on the code under `codeDir`. Resolve to `null` when
      * no health signal is available.
      */
     measure(codeDir: string): Promise<RawHealthMetrics | null>;
+    /**
+     * OPTIONAL detailed measurement: the same 2 raw scores plus the bounded
+     * worst-offender list behind them. Optional so existing sources (stub,
+     * SonarQube) need no change — callers that want offenders fall back to
+     * `measure()` with an empty list when this method is absent. Implementations
+     * MUST derive `raw` from the same single measurement as the offenders (no
+     * second scan) so the scalar path is provably identical.
+     */
+    measureDetailed?(codeDir: string): Promise<{
+        raw: RawHealthMetrics;
+        offenders: HealthOffender[];
+    } | null>;
 }
 /**
  * The default no-op source: it produces no health signal, so the health head

package/dist/core/fitness/health/metric-source.js CHANGED Viewed

@@ -1,6 +1,7 @@
 /**
  * Swappable source of raw code-health metrics for the self-evolution health
- * head. A {@link MetricSource} measures the 7 code-health metrics on a directory
+ * head. A {@link MetricSource} measures the two SlopCodeBench code-health
+ * scores (arXiv:2603.24755) on a directory
  * of generated code; the result is then normalized into a single [0,1] penalty
  * by `normalizeHealth` (see ./health-metrics.ts) and folded into the per-change
  * loss alongside the functional term. See
@@ -25,18 +26,12 @@ export class StubMetricSource {
 }
 /**
  * The Sonar metric keys requested from `api/measures/component`, in the order
- * they map onto {@link RawHealthMetrics}. `cognitive_complexity` and
- * `duplicated_lines_density` are native Sonar measures; the remaining five are
- * custom measures published under the same metric keys as the raw-metric field.
+ * they map onto {@link RawHealthMetrics}. BOTH are custom measures published
+ * under these keys (neither is a native Sonar measure).
  */
 const SONAR_METRIC_KEYS = [
-    'cyclomatic_p95',
-    'max_nesting_depth',
-    'cognitive_complexity',
-    'duplicated_lines_density',
-    'import_count',
-    'attr_method_usage_ratio',
-    'bare_except_count',
+    'structural_erosion',
+    'verbosity',
 ];
 /**
  * A {@link MetricSource} backed by SonarQube. `measure`:
@@ -104,13 +99,8 @@ export function mapSonarMeasures(body) {
     }
     const get = (key) => byKey.get(key) ?? 0;
     return {
-        cyclomatic_p95: get('cyclomatic_p95'),
-        max_nesting_depth: get('max_nesting_depth'),
-        cognitive_complexity: get('cognitive_complexity'),
-        duplicated_lines_density: get('duplicated_lines_density'),
-        import_count: get('import_count'),
-        attr_method_usage_ratio: get('attr_method_usage_ratio'),
-        bare_except_count: get('bare_except_count'),
+        structural_erosion: get('structural_erosion'),
+        verbosity: get('verbosity'),
     };
 }
 //# sourceMappingURL=metric-source.js.map

package/dist/core/fitness/health/resolve-source.js CHANGED Viewed

@@ -20,7 +20,10 @@ export function resolveMetricSource(config) {
         return new StubMetricSource();
     }
     if (health.source === 'local' || health.source === 'local-python') {
-        return new LocalPythonMetricSource({ pythonBin: health.pythonBin });
+        return new LocalPythonMetricSource({
+            pythonBin: health.pythonBin,
+            astGrepBin: health.astGrepBin,
+        });
     }
     if (health.source === 'sonarqube') {
         if (health.sonarUrl && health.sonarToken && health.sonarProjectKey) {

package/dist/core/fitness/loss.d.ts CHANGED Viewed

@@ -3,8 +3,8 @@
  *
  * loss = w_f · functionalLoss  +  w_h · healthPenalty
  *   functionalLoss = 1 − pass_rate      (from the gen-test/run-test oracle)
- *   healthPenalty  = normalized 7-metric code-health penalty (SonarQube; the
- *                    health head lands later — until then callers pass 0)
+ *   healthPenalty  = normalized SlopCodeBench code-health penalty
+ *                    (structural_erosion ⊕ verbosity)
  *
  * Functional correctness is ALSO used as a hard GATE at GA selection/promotion
  * (a variant whose code fails its tests cannot win); this module only computes

package/dist/core/fitness/loss.js CHANGED Viewed

@@ -3,8 +3,8 @@
  *
  * loss = w_f · functionalLoss  +  w_h · healthPenalty
  *   functionalLoss = 1 − pass_rate      (from the gen-test/run-test oracle)
- *   healthPenalty  = normalized 7-metric code-health penalty (SonarQube; the
- *                    health head lands later — until then callers pass 0)
+ *   healthPenalty  = normalized SlopCodeBench code-health penalty
+ *                    (structural_erosion ⊕ verbosity)
  *
  * Functional correctness is ALSO used as a hard GATE at GA selection/promotion
  * (a variant whose code fails its tests cannot win); this module only computes

package/dist/core/fitness/sample.d.ts CHANGED Viewed

@@ -8,6 +8,7 @@
  */
 import type { TestMetrics } from './test-metrics.js';
 import type { PerChangeLoss } from './loss.js';
+import type { HealthOffender } from './health/metric-source.js';
 import type { TrajectoryFacts } from '../trajectory/facts.js';
 export interface FitnessSample {
     changeName: string;
@@ -36,6 +37,15 @@ export interface FitnessSample {
      * was consulted.
      */
     healthSource?: string;
+    /**
+     * The worst per-file/per-function contributors BEHIND `healthSignal`
+     * ("x12.py complexity_mass 66.3; foo() 26 lines") — file-addressed evidence so a
+     * bad health reading is navigable, not just a scalar. Pure visibility: the
+     * penalty and loss math never read this field. OMITTED (not empty) when the
+     * active source produced no offenders or cannot name them (stub/sonarqube),
+     * so existing serialized samples and the baseline path stay byte-identical.
+     */
+    healthContributors?: HealthOffender[];
     /**
      * Ground-truth facts distilled from the agent's ACTUAL trajectory (which
      * harness, whether a test runner was really observed running, the observed

package/dist/core/fitness/test-failures.d.ts ADDED Viewed

@@ -0,0 +1,30 @@
+/**
+ * Parse the FAILING TEST IDS (and best-effort assertion lines) out of observed
+ * test-runner output. Sibling of {@link parseTestMetrics}: that one re-sources
+ * the pass/fail COUNTS for the loss, this one re-sources the failure CONTENT
+ * for the critic — so failure evidence can come from the run the
+ * observed-verified gate already trusts instead of from the authored
+ * test-report prose (`extractFailureEvidence`'s grep, which stays as the
+ * headless fallback only).
+ *
+ * Pure + dependency-free. Recognizes only explicit per-test failure markers
+ * from the canonical runners; an unrecognized format returns `[]` ("no
+ * signal"), never a guess. Outputs are bounded (≤ {@link MAX_FAILURES}
+ * failures, assertion ≤ {@link MAX_ASSERTION_CHARS} chars) so a pathological
+ * run cannot flood the learn report.
+ */
+export interface ParsedTestFailure {
+    /** Runner-native test id, e.g. `tests/test_x.py::test_name` or `suite > name`. */
+    testId: string;
+    /** Test file when derivable from the id (pytest/vitest path prefix). */
+    file?: string;
+    /** Best-effort assertion/error line for the failure, capped. */
+    assertion?: string;
+}
+/**
+ * Extract failing test ids + assertion lines from observed runner output.
+ * Returns `[]` when nothing is recognized. Deduplicates by testId, preserves
+ * first-seen order, caps at {@link MAX_FAILURES}.
+ */
+export declare function parseTestFailures(output: string): ParsedTestFailure[];
+//# sourceMappingURL=test-failures.d.ts.map