npm - codeloop-mcp-server - Versions diffs - 0.1.50 → 0.1.52 - Mend

codeloop-mcp-server 0.1.50 → 0.1.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

package/dist/auth/critical_floors.d.ts.map +1 -1
package/dist/auth/critical_floors.js +8 -0
package/dist/auth/critical_floors.js.map +1 -1
package/dist/evidence/anti_rationalisation.d.ts +34 -0
package/dist/evidence/anti_rationalisation.d.ts.map +1 -0
package/dist/evidence/anti_rationalisation.js +85 -0
package/dist/evidence/anti_rationalisation.js.map +1 -0
package/dist/evidence/change_coverage.d.ts +59 -0
package/dist/evidence/change_coverage.d.ts.map +1 -0
package/dist/evidence/change_coverage.js +422 -0
package/dist/evidence/change_coverage.js.map +1 -0
package/dist/evidence/change_manifest.d.ts +94 -0
package/dist/evidence/change_manifest.d.ts.map +1 -0
package/dist/evidence/change_manifest.js +830 -0
package/dist/evidence/change_manifest.js.map +1 -0
package/dist/evidence/loop_state.d.ts +53 -0
package/dist/evidence/loop_state.d.ts.map +1 -0
package/dist/evidence/loop_state.js +147 -0
package/dist/evidence/loop_state.js.map +1 -0
package/dist/evidence/verify_staleness.d.ts +9 -0
package/dist/evidence/verify_staleness.d.ts.map +1 -0
package/dist/evidence/verify_staleness.js +180 -0
package/dist/evidence/verify_staleness.js.map +1 -0
package/dist/index.d.ts +1 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +374 -19
package/dist/index.js.map +1 -1
package/dist/runners/empty_state_detector.d.ts +33 -0
package/dist/runners/empty_state_detector.d.ts.map +1 -0
package/dist/runners/empty_state_detector.js +304 -0
package/dist/runners/empty_state_detector.js.map +1 -0
package/dist/runners/maestro.d.ts +13 -0
package/dist/runners/maestro.d.ts.map +1 -1
package/dist/runners/maestro.js +37 -1
package/dist/runners/maestro.js.map +1 -1
package/dist/runners/modal_detector.d.ts +60 -0
package/dist/runners/modal_detector.d.ts.map +1 -0
package/dist/runners/modal_detector.js +160 -0
package/dist/runners/modal_detector.js.map +1 -0
package/dist/runners/python_tests.d.ts +26 -0
package/dist/runners/python_tests.d.ts.map +1 -0
package/dist/runners/python_tests.js +181 -0
package/dist/runners/python_tests.js.map +1 -0
package/dist/runners/rust_tests.d.ts +28 -0
package/dist/runners/rust_tests.d.ts.map +1 -0
package/dist/runners/rust_tests.js +76 -0
package/dist/runners/rust_tests.js.map +1 -0
package/dist/tools/c7_slug.d.ts +14 -0
package/dist/tools/c7_slug.d.ts.map +1 -0
package/dist/tools/c7_slug.js +21 -0
package/dist/tools/c7_slug.js.map +1 -0
package/dist/tools/diagnose.d.ts.map +1 -1
package/dist/tools/diagnose.js +13 -0
package/dist/tools/diagnose.js.map +1 -1
package/dist/tools/gate_check.d.ts +2 -1
package/dist/tools/gate_check.d.ts.map +1 -1
package/dist/tools/gate_check.js +74 -32
package/dist/tools/gate_check.js.map +1 -1
package/dist/tools/is_ui_project.d.ts +23 -0
package/dist/tools/is_ui_project.d.ts.map +1 -0
package/dist/tools/is_ui_project.js +42 -0
package/dist/tools/is_ui_project.js.map +1 -0
package/dist/tools/plan_change_journey.d.ts +41 -0
package/dist/tools/plan_change_journey.d.ts.map +1 -0
package/dist/tools/plan_change_journey.js +131 -0
package/dist/tools/plan_change_journey.js.map +1 -0
package/dist/tools/verify.d.ts +28 -0
package/dist/tools/verify.d.ts.map +1 -1
package/dist/tools/verify.js +272 -8
package/dist/tools/verify.js.map +1 -1
package/package.json +1 -1

package/dist/auth/critical_floors.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"critical_floors.d.ts","sourceRoot":"","sources":["../../src/auth/critical_floors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAEH,MAAM,WAAW,aAAa;IAC5B,4DAA4D;IAC5D,WAAW,EAAE,MAAM,CAAC;IACpB,wDAAwD;IACxD,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,eAAe,EAAE,aAAa,~~EAwB1C~~,CAAC"}
1	+ {"version":3,"file":"critical_floors.d.ts","sourceRoot":"","sources":["../../src/auth/critical_floors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAEH,MAAM,WAAW,aAAa;IAC5B,4DAA4D;IAC5D,WAAW,EAAE,MAAM,CAAC;IACpB,wDAAwD;IACxD,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,eAAe,EAAE,aAAa,EAkC1C,CAAC"}

package/dist/auth/critical_floors.js CHANGED Viewed

@@ -60,5 +60,13 @@ export const CRITICAL_FLOORS = [
         min_version: "0.1.50",
         reason: "WPF / desktop final-mile fixes — pre-0.1.50 builds returned `(undefined, undefined)` when codeloop_interact was called with `text` / `role` / `automation_id` selectors instead of raw x/y on Windows desktop targets (every WPF tab/list-item click missed because the runner had no UIA tree walker for selectors), accepted weak cross-run design_compare matches and scored unrelated screens at 0% (e.g. `10-led-bom_*` paired with `led-design-bom-add-component`), wrote screenshot artifacts under the user's HOME folder when project_dir was omitted on Cursor-launched MCP servers, returned empty arrays from codeloop_discover_screens for desktop projects (designs/desktop/*.png never surfaced), classified MSB3027 / MSB3021 file-locked build errors as `issue_unclassified` (agents looped on the same locked-EXE forever), lacked `codeloop doctor --prune-artifacts` to clean up old corrupt-PNG runs, and surfaced bare 'App window not found' errors with no candidates / next_step diagnostic when the priority ladder exhausted",
     },
+    {
+        min_version: "0.1.51",
+        reason: "Full auto-loop + modal handling — pre-0.1.51 builds let the agent silently skip codeloop_verify between edits (no post-edit hooks, no staleness directive in tool responses), only ran ONE platform stack on multi-stack monorepos (.NET backend + React frontend / Django + Next.js / Tauri all silently skipped half the codebase), had no Python or Rust verify runners, didn't fire withInitHint on visual_review / design_compare / interaction_replay / generate_dev_report (so fresh workspaces could complete a full visual cycle without ever calling codeloop_init_project), spun the auto-fix loop forever with no server-side iteration cap (no escalate at 15 gate / 8 diagnose attempts), didn't auto-run Maestro flows in verify, and IGNORED MODALS during recording — every Save / Confirm-delete / EULA / browser beforeunload prompt was clicked-through-or-skipped, blocking the rest of the user_journey arc and silently dragging gate confidence down. 0.1.51 closes all of these and adds codeloop_capture_all_screens + codeloop_handle_modal so the loop is finally hands-off.",
+    },
+    {
+        min_version: "0.1.52",
+        reason: "Change-aware verification — pre-0.1.52 builds were CHANGE-BLIND: the auto-fix loop happily reached 100% confidence on a recording session that NEVER exercised the new feature the user just added. The Photometry-DB E2E #9 transcript shipped a new Product Code DataGrid column, a new buttons-below-title layout, a new ProductCode property, and an EF migration column — and the gate passed with 11/11 green having only run a 5-click navigation tour on an empty database. 0.1.52 ships C1 (per-run change_manifest.json built from the git diff vs the last verified SHA + uncommitted/untracked files, with feature-shape parsers for XAML / HTML / JSX / C# / TS / Dart / Swift / Kt / EF migrations / SQL DDL plus structural-layout delta detection), C2 (codeloop_plan_change_journey emits a per-manifest-entry interaction script with a HARD seed-first preamble), C3 (change_coverage_evidence blocker gate at 1.0 threshold cross-references the manifest against interaction log + screenshots + replay frames + build/runtime logs), C4 (empty-state seeding enforcement — codeloop_interact appends a HARD directive when the agent targets a row/cell with no prior commit/seed action), C5 (verify cross-checks tasks_completed claims against the manifest and surfaces orphan claims as warnings), C6 (anti-rationalisation directive in gate_check continue_fixing block + recent_thinking phrase scan that surfaces 'comprehensive verification confirms' / 'further interaction would be redundant' style stalls), and C7 (target_change_entry on codeloop_interact + codeloop_capture_screenshot anchors evidence to manifest entries via a deterministic --c7-<slug> filename suffix the C3 gate matches without fuzzy logic).",
+    },
 ];
 //# sourceMappingURL=critical_floors.js.map

package/dist/auth/critical_floors.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"critical_floors.js","sourceRoot":"","sources":["../../src/auth/critical_floors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AASH;;;;;;;;;;;;GAYG;AACH,MAAM,CAAC,MAAM,eAAe,GAAoB;IAC9C;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,ufAAuf;KAC1f;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,4hBAA4hB;KACriB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,yvBAAyvB;KAClwB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,kxBAAkxB;KACrxB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,0/BAA0/B;KAC7/B;CACF,CAAC"}
1	+ {"version":3,"file":"critical_floors.js","sourceRoot":"","sources":["../../src/auth/critical_floors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AASH;;;;;;;;;;;;GAYG;AACH,MAAM,CAAC,MAAM,eAAe,GAAoB;IAC9C;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,ufAAuf;KAC1f;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,4hBAA4hB;KACriB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,yvBAAyvB;KAClwB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,kxBAAkxB;KACrxB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,0/BAA0/B;KAC7/B;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,0iCAA0iC;KAC7iC;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,gqDAAgqD;KACnqD;CACF,CAAC"}

package/dist/evidence/anti_rationalisation.d.ts ADDED Viewed

@@ -0,0 +1,34 @@
+/**
+ * 0.1.52 C6 — Anti-rationalisation directive + recent_thinking scan.
+ *
+ * The Photometry-DB E2E #9 transcript ended at 100% confidence with the
+ * model narrating polite reasons not to keep iterating: "comprehensive
+ * verification confirms ready for production", "the new features are
+ * already implemented; further interaction would be redundant",
+ * "extensive UI exploration completed". Each phrase is a stand-in for
+ * "I will skip the change-aware verification step the C3 gate
+ * explicitly requires".
+ *
+ * This module ships the canonical FORBIDDEN list and a scanner that the
+ * gate_check tool can run over an optional `recent_thinking` payload
+ * supplied by the agent. When a forbidden phrase fires, the scanner
+ * surfaces the matched fragment so the gate's continue_fixing
+ * postscript can call out the rationalisation by name and force the
+ * agent to take a concrete next step instead of restating the excuse.
+ */
+export declare const C6_FORBIDDEN_PHRASES: Array<{
+    regex: RegExp;
+    reason: string;
+}>;
+export interface RationalisationHit {
+    matched_text: string;
+    reason: string;
+}
+export declare function scanRecentThinking(text: string | undefined | null): RationalisationHit[];
+/**
+ * Renders the C6 anti-rationalisation directive lines for inclusion in
+ * the gate_check continue_fixing postscript. Pure formatting — the
+ * caller decides where to inject this in the larger directive block.
+ */
+export declare function buildAntiRationalisationDirective(hits: RationalisationHit[]): string;
+//# sourceMappingURL=anti_rationalisation.d.ts.map

package/dist/evidence/anti_rationalisation.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"anti_rationalisation.d.ts","sourceRoot":"","sources":["../../src/evidence/anti_rationalisation.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,eAAO,MAAM,oBAAoB,EAAE,KAAK,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CA6BzE,CAAC;AAEF,MAAM,WAAW,kBAAkB;IACjC,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,GAAG,IAAI,GAAG,kBAAkB,EAAE,CAQxF;AAED;;;;GAIG;AACH,wBAAgB,iCAAiC,CAC/C,IAAI,EAAE,kBAAkB,EAAE,GACzB,MAAM,CAyBR"}

package/dist/evidence/anti_rationalisation.js ADDED Viewed

@@ -0,0 +1,85 @@
+/**
+ * 0.1.52 C6 — Anti-rationalisation directive + recent_thinking scan.
+ *
+ * The Photometry-DB E2E #9 transcript ended at 100% confidence with the
+ * model narrating polite reasons not to keep iterating: "comprehensive
+ * verification confirms ready for production", "the new features are
+ * already implemented; further interaction would be redundant",
+ * "extensive UI exploration completed". Each phrase is a stand-in for
+ * "I will skip the change-aware verification step the C3 gate
+ * explicitly requires".
+ *
+ * This module ships the canonical FORBIDDEN list and a scanner that the
+ * gate_check tool can run over an optional `recent_thinking` payload
+ * supplied by the agent. When a forbidden phrase fires, the scanner
+ * surfaces the matched fragment so the gate's continue_fixing
+ * postscript can call out the rationalisation by name and force the
+ * agent to take a concrete next step instead of restating the excuse.
+ */
+export const C6_FORBIDDEN_PHRASES = [
+    {
+        regex: /\b(no need|already exercised|already covered) to (re-?test|re-?verify|re-?run)\b/i,
+        reason: "Claims existing evidence covers the new diff. The C3 gate measures coverage of the diff, not of the whole app — old evidence does not credit new entries.",
+    },
+    {
+        regex: /\b(comprehensive|thorough|extensive|exhaustive)\s+(verification|coverage|testing|exploration)\s+(confirms|shows|demonstrates|already)\b/i,
+        reason: "Generic 'comprehensive verification' boilerplate. Cite the specific manifest entries you exercised, not the verb 'comprehensive'.",
+    },
+    {
+        regex: /\bfurther\s+(interaction|testing|verification)\s+(would be|is)\s+(redundant|unnecessary|impractical|not (feasible|needed))\b/i,
+        reason: "Claims further interaction is unnecessary. The C3 gate disagrees — every unexercised manifest entry is a concrete next step.",
+    },
+    {
+        regex: /\b(implementation|feature|change)\s+(?:is|has been|already)\s+(?:complete|implemented|verified)\b.*\bno (specific|further) (?:user )?interaction\b/i,
+        reason: "Confuses 'feature implemented' with 'feature exercised in the recording'. The gate requires the latter.",
+    },
+    {
+        regex: /\bcode review (confirms|shows|verifies)\s+(the|new)?\s*(features?|changes?)\s+(are|is)\s+(working|correct|ready)\b/i,
+        reason: "Substitutes static code reading for the runtime UI exercise the gate requires.",
+    },
+    {
+        regex: /\b(declaring|marking|treating)\s+(the )?task\s+(complete|done|ready)\s+(despite|even though|regardless)\b/i,
+        reason: "Declares the task complete while admitting gates are failing. Don't.",
+    },
+    {
+        regex: /\bgrid (is )?empty\b.*\b(can'?t|cannot|unable to)\s+(test|verify|exercise)\b/i,
+        reason: "Empty grid is the C4 directive's exact case — seed data, then exercise. Do not skip the entry.",
+    },
+];
+export function scanRecentThinking(text) {
+    if (!text || typeof text !== "string")
+        return [];
+    const out = [];
+    for (const { regex, reason } of C6_FORBIDDEN_PHRASES) {
+        const m = regex.exec(text);
+        if (m)
+            out.push({ matched_text: m[0], reason });
+    }
+    return out;
+}
+/**
+ * Renders the C6 anti-rationalisation directive lines for inclusion in
+ * the gate_check continue_fixing postscript. Pure formatting — the
+ * caller decides where to inject this in the larger directive block.
+ */
+export function buildAntiRationalisationDirective(hits) {
+    const header = [
+        "[CodeLoop C6] ANTI-RATIONALISATION DIRECTIVE — your next message MUST NOT make any of these claims:",
+        "  • 'Comprehensive verification confirms ready for production'",
+        "  • 'Further interaction would be redundant / impractical'",
+        "  • 'The features are already implemented; no specific user interaction needed'",
+        "  • 'Code review confirms the new features are working' (without runtime exercise)",
+        "  • 'The grid is empty; can't test the new column' (use C4: seed data first)",
+        "  • 'No need to re-verify' / 'Already covered'",
+        "Each manifest entry the change_coverage_evidence gate flags as unexercised is a CONCRETE next step. Drive each one via codeloop_interact (NOT prose). If a step truly cannot be exercised by tools you have, call codeloop_escalate — do NOT rationalise around it.",
+    ].join("\n");
+    if (hits.length === 0)
+        return header;
+    const hitLines = hits.map((h) => `  - matched "${h.matched_text}" — ${h.reason}`);
+    return (header +
+        "\n\n" +
+        `[CodeLoop C6] Detected ${hits.length} rationalisation phrase(s) in recent_thinking:\n` +
+        hitLines.join("\n") +
+        "\nRewrite without those phrases and execute the per-gate next steps above instead.");
+}
+//# sourceMappingURL=anti_rationalisation.js.map

package/dist/evidence/anti_rationalisation.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"anti_rationalisation.js","sourceRoot":"","sources":["../../src/evidence/anti_rationalisation.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,MAAM,CAAC,MAAM,oBAAoB,GAA6C;IAC5E;QACE,KAAK,EAAE,mFAAmF;QAC1F,MAAM,EAAE,2JAA2J;KACpK;IACD;QACE,KAAK,EAAE,0IAA0I;QACjJ,MAAM,EAAE,mIAAmI;KAC5I;IACD;QACE,KAAK,EAAE,+HAA+H;QACtI,MAAM,EAAE,8HAA8H;KACvI;IACD;QACE,KAAK,EAAE,qJAAqJ;QAC5J,MAAM,EAAE,yGAAyG;KAClH;IACD;QACE,KAAK,EAAE,qHAAqH;QAC5H,MAAM,EAAE,gFAAgF;KACzF;IACD;QACE,KAAK,EAAE,4GAA4G;QACnH,MAAM,EAAE,sEAAsE;KAC/E;IACD;QACE,KAAK,EAAE,+EAA+E;QACtF,MAAM,EAAE,gGAAgG;KACzG;CACF,CAAC;AAOF,MAAM,UAAU,kBAAkB,CAAC,IAA+B;IAChE,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO,EAAE,CAAC;IACjD,MAAM,GAAG,GAAyB,EAAE,CAAC;IACrC,KAAK,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,oBAAoB,EAAE,CAAC;QACrD,MAAM,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC3B,IAAI,CAAC;YAAE,GAAG,CAAC,IAAI,CAAC,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IAClD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,iCAAiC,CAC/C,IAA0B;IAE1B,MAAM,MAAM,GAAG;QACb,qGAAqG;QACrG,gEAAgE;QAChE,4DAA4D;QAC5D,iFAAiF;QACjF,oFAAoF;QACpF,8EAA8E;QAC9E,gDAAgD;QAChD,qQAAqQ;KACtQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEb,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,MAAM,CAAC;IAErC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CACvB,CAAC,CAAC,EAAE,EAAE,CACJ,gBAAgB,CAAC,CAAC,YAAY,OAAO,CAAC,CAAC,MAAM,EAAE,CAClD,CAAC;IACF,OAAO,CACL,MAAM;QACN,MAAM;QACN,0BAA0B,IAAI,CAAC,MAAM,kDAAkD;QACvF,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC;QACnB,oFAAoF,CACrF,CAAC;AACJ,CAAC"}

package/dist/evidence/change_coverage.d.ts ADDED Viewed

@@ -0,0 +1,59 @@
+import { type ChangeEntry, type ChangeManifest } from "./change_manifest.js";
+/**
+ * 0.1.52 C3 — change_coverage_evidence gate.
+ *
+ * Cross-references the change manifest produced by C1 against the
+ * interaction_log.jsonl entries, captured screenshot filenames, and
+ * replay-frame analyses across every recent run. Every non-implicit
+ * manifest entry must have at least one "exercising event"; the
+ * default threshold is 1.0 (every entry exercised) but config can
+ * relax it.
+ *
+ * Implicit entries (currently `method_added`) don't need to be hit
+ * directly — they're considered exercised whenever the property they
+ * operate on is exercised. The reason: a feature-shaped method like
+ * `PropagateProductCodeAsync` runs as a side-effect of typing into the
+ * Product Code cell, so we'd otherwise demand an unsatisfiable
+ * "exercise the method by name" interaction.
+ */
+export interface ChangeCoverageConfig {
+    enabled: boolean;
+    /** Minimum fraction of manifest entries that must be exercised to pass. Default 1.0 (HARD 100%). */
+    threshold: number;
+    /** Manifest entry kinds to skip (configurable per-project for genuine pure refactors). */
+    skip_kinds: ChangeEntry["kind"][];
+}
+export declare const DEFAULT_CHANGE_COVERAGE_CONFIG: ChangeCoverageConfig;
+export interface EntryStatus {
+    entry: ChangeEntry;
+    display_name: string;
+    exercised: boolean;
+    /** What evidence credited it. Empty when not exercised. */
+    evidence: string[];
+    /** True when the entry counts as "implicit" — passes when its sibling property is exercised. */
+    implicit: boolean;
+}
+export interface ChangeCoverageVerdict {
+    passed: boolean;
+    threshold: number;
+    manifest_run_id: string | null;
+    total_entries: number;
+    considered_entries: number;
+    exercised_entries: number;
+    /** entries the agent has not yet exercised. */
+    unexercised: EntryStatus[];
+    /** Per-entry status. */
+    per_entry: EntryStatus[];
+    /** Human-readable reason for the gate response. */
+    reason: string;
+    /** Concrete next_step the agent must execute when the gate fails. */
+    next_step: string;
+}
+export declare function evaluateChangeCoverage(cwd: string, runId: string, config?: ChangeCoverageConfig): ChangeCoverageVerdict;
+/**
+ * Resolve the change-coverage config block out of the project config,
+ * falling back to defaults. Tolerates missing fields and unknown keys.
+ */
+export declare function resolveChangeCoverageConfig(raw: unknown): ChangeCoverageConfig;
+export type { ChangeManifest };
+//# sourceMappingURL=change_coverage.d.ts.map

package/dist/evidence/change_coverage.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"change_coverage.d.ts","sourceRoot":"","sources":["../../src/evidence/change_coverage.ts"],"names":[],"mappings":"AAGA,OAAO,EAGL,KAAK,WAAW,EAChB,KAAK,cAAc,EACpB,MAAM,sBAAsB,CAAC;AAG9B;;;;;;;;;;;;;;;;GAgBG;AAEH,MAAM,WAAW,oBAAoB;IACnC,OAAO,EAAE,OAAO,CAAC;IACjB,oGAAoG;IACpG,SAAS,EAAE,MAAM,CAAC;IAClB,0FAA0F;IAC1F,UAAU,EAAE,WAAW,CAAC,MAAM,CAAC,EAAE,CAAC;CACnC;AAED,eAAO,MAAM,8BAA8B,EAAE,oBAI5C,CAAC;AAUF,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,WAAW,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,OAAO,CAAC;IACnB,2DAA2D;IAC3D,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,gGAAgG;IAChG,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,qBAAqB;IACpC,MAAM,EAAE,OAAO,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,aAAa,EAAE,MAAM,CAAC;IACtB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,+CAA+C;IAC/C,WAAW,EAAE,WAAW,EAAE,CAAC;IAC3B,wBAAwB;IACxB,SAAS,EAAE,WAAW,EAAE,CAAC;IACzB,mDAAmD;IACnD,MAAM,EAAE,MAAM,CAAC;IACf,qEAAqE;IACrE,SAAS,EAAE,MAAM,CAAC;CACnB;AA0TD,wBAAgB,sBAAsB,CACpC,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,MAAM,EACb,MAAM,GAAE,oBAAqD,GAC5D,qBAAqB,CA4FvB;AAED;;;GAGG;AACH,wBAAgB,2BAA2B,CACzC,GAAG,EAAE,OAAO,GACX,oBAAoB,CAgBtB;AAED,YAAY,EAAE,cAAc,EAAE,CAAC"}

package/dist/evidence/change_coverage.js ADDED Viewed

@@ -0,0 +1,422 @@
+import { existsSync, readFileSync, readdirSync } from "fs";
+import { join } from "path";
+import { getArtifactsBaseDir, getRunDir, listRuns } from "./artifacts.js";
+import { loadMostRecentChangeManifest, manifestEntryDisplayName, } from "./change_manifest.js";
+import { slugForTargetChangeEntry } from "../tools/c7_slug.js";
+export const DEFAULT_CHANGE_COVERAGE_CONFIG = {
+    enabled: true,
+    threshold: 1.0,
+    skip_kinds: [],
+};
+function readJsonlSafe(path) {
+    try {
+        const raw = readFileSync(path, "utf-8");
+        const out = [];
+        for (const line of raw.split("\n")) {
+            const t = line.trim();
+            if (!t)
+                continue;
+            try {
+                out.push(JSON.parse(t));
+            }
+            catch {
+                /* skip malformed */
+            }
+        }
+        return out;
+    }
+    catch {
+        return [];
+    }
+}
+function flatten(entries) {
+    const out = [];
+    for (const e of entries) {
+        const action = (e.action ?? "").toLowerCase();
+        const args = (e.input_args ?? {});
+        if (action === "sequence" && Array.isArray(args.steps)) {
+            for (const c of args.steps) {
+                out.push({ ...c, timestamp: c.timestamp ?? e.timestamp, success: c.success ?? e.success });
+            }
+            continue;
+        }
+        if (action === "maestro_flow" && Array.isArray(args.maestro_steps)) {
+            for (const c of args.maestro_steps) {
+                out.push({ ...c, timestamp: c.timestamp ?? e.timestamp, success: c.success ?? e.success });
+            }
+            continue;
+        }
+        out.push(e);
+    }
+    return out;
+}
+function entryHaystack(e) {
+    const args = e.input_args ?? {};
+    const parts = [];
+    for (const k of [
+        "selector",
+        "text",
+        "aria_label",
+        "label",
+        "target",
+        "automationId",
+        "name",
+        "intent",
+        "description",
+        "purpose",
+        "step",
+        "screen_name",
+        "url",
+        "value",
+        "target_change_entry",
+        "automation_action",
+    ]) {
+        const v = args[k];
+        if (typeof v === "string")
+            parts.push(v);
+    }
+    if (typeof e.detail === "string")
+        parts.push(e.detail);
+    if (typeof e.action === "string")
+        parts.push(e.action);
+    return parts.join(" ").toLowerCase();
+}
+function tokenizeEntryName(name) {
+    // Split CamelCase + snake_case + kebab-case into discrete tokens so
+    // a manifest "ProductCode" matches an interaction text "Product Code"
+    // and vice versa. Tokens are lowercased and >= 2 chars.
+    const expanded = name
+        .replace(/([a-z])([A-Z])/g, "$1 $2")
+        .replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2")
+        .replace(/[_\-./]/g, " ");
+    return expanded
+        .toLowerCase()
+        .split(/\s+/)
+        .filter((t) => t.length >= 2);
+}
+function collectExercisingEvents(cwd, runId) {
+    const baseDir = getArtifactsBaseDir(cwd);
+    const allLog = [];
+    const screenshotNames = [];
+    const replayAnalysisChunks = [];
+    const shellOutputs = [];
+    // Pull from this run + every sibling run in the project. The journey
+    // runs typically live in a separate run from the verify (matches how
+    // gate_check video / replay scoping works today).
+    const runIds = Array.from(new Set([runId, ...listRuns(baseDir)]));
+    for (const rid of runIds) {
+        const runDir = getRunDir(rid, baseDir);
+        if (!existsSync(runDir))
+            continue;
+        const logsDir = join(runDir, "logs");
+        if (existsSync(logsDir)) {
+            try {
+                for (const f of readdirSync(logsDir)) {
+                    if (f === "interaction_log.jsonl" || (f.startsWith("interaction_log") && f.endsWith(".jsonl"))) {
+                        allLog.push(...readJsonlSafe(join(logsDir, f)));
+                    }
+                }
+            }
+            catch {
+                /* skip */
+            }
+            // Build / dotnet / generic test logs may mention migration columns
+            // by name (the EF runtime prints "Applying migration X / new column
+            // Y" for example). We treat these as shell-style outputs the
+            // migration_column_added / migration_table_added classifier scans.
+            try {
+                for (const f of readdirSync(logsDir)) {
+                    if (!f.endsWith(".log"))
+                        continue;
+                    try {
+                        const txt = readFileSync(join(logsDir, f), "utf-8");
+                        shellOutputs.push(txt);
+                    }
+                    catch {
+                        /* skip */
+                    }
+                }
+            }
+            catch {
+                /* skip */
+            }
+        }
+        const ssDir = join(runDir, "screenshots");
+        if (existsSync(ssDir)) {
+            try {
+                for (const f of readdirSync(ssDir)) {
+                    if (f.endsWith(".png") || f.endsWith(".jpg"))
+                        screenshotNames.push(f);
+                }
+            }
+            catch {
+                /* skip */
+            }
+        }
+        // interaction_replay produces per-frame JSON / a summary file —
+        // pick up anything in replay/ or replay_frames/ for token search.
+        for (const sub of ["replay", "replay_frames"]) {
+            const d = join(runDir, sub);
+            if (!existsSync(d))
+                continue;
+            try {
+                for (const f of readdirSync(d)) {
+                    if (f.endsWith(".json") || f.endsWith(".txt") || f.endsWith(".md")) {
+                        try {
+                            replayAnalysisChunks.push(readFileSync(join(d, f), "utf-8"));
+                        }
+                        catch {
+                            /* skip */
+                        }
+                    }
+                }
+            }
+            catch {
+                /* skip */
+            }
+        }
+    }
+    const flat = flatten(allLog);
+    const resizeWindowEvents = flat.filter((e) => {
+        const action = (e.action ?? "").toLowerCase();
+        return (action === "resize_window" ||
+            action === "resize" ||
+            action === "set_window_size" ||
+            action === "rotate_device");
+    });
+    return {
+        log: flat,
+        screenshotNames,
+        replayAnalysisText: replayAnalysisChunks.join("\n").toLowerCase(),
+        resizeWindowEvents,
+        shellOutputs,
+    };
+}
+/** Check whether a manifest entry's name appears in the haystack (interaction log + replay analysis + screenshots). */
+function nameAppearsIn(haystack, name) {
+    const tokens = tokenizeEntryName(name);
+    if (tokens.length === 0)
+        return false;
+    // Require ALL tokens to appear (in any order) for multi-token names —
+    // this prevents "Product" alone matching unrelated screens.
+    // For single-token names, presence of that token is enough.
+    return tokens.every((t) => haystack.includes(t));
+}
+function evaluateEntry(entry, ctx) {
+    const evidence = [];
+    const logHaystack = ctx.log.map(entryHaystack).join(" | ");
+    const ssHaystack = ctx.screenshotNames.join(" | ").toLowerCase();
+    const replayHaystack = ctx.replayAnalysisText;
+    const shellHaystack = ctx.shellOutputs.join("\n").toLowerCase();
+    const fullUiHaystack = `${logHaystack} ${ssHaystack} ${replayHaystack}`;
+    switch (entry.kind) {
+        case "ui_element_added": {
+            if (nameAppearsIn(fullUiHaystack, entry.name)) {
+                evidence.push(`Name "${entry.name}" referenced by an interaction / screenshot / replay event`);
+            }
+            // Also accept a screenshot whose `target_change_entry` arg pinned
+            // this entry directly (C7 anchored screenshots) — either via the
+            // interaction-log arg OR via a filename suffix produced by the
+            // codeloop_capture_screenshot anchoring path.
+            const display = manifestEntryDisplayName(entry);
+            const anchored = ctx.log.some((e) => {
+                const arg = e.input_args?.target_change_entry;
+                return typeof arg === "string" && arg === display;
+            });
+            if (anchored)
+                evidence.push("C7 target_change_entry anchor present in interaction args");
+            const slug = slugForTargetChangeEntry(display);
+            const anchoredFilename = ctx.screenshotNames.some((n) => n.toLowerCase().includes(`--c7-${slug}`));
+            if (anchoredFilename)
+                evidence.push(`C7 anchored screenshot filename matched (slug "${slug}")`);
+            return { exercised: evidence.length > 0, evidence };
+        }
+        case "property_added": {
+            if (nameAppearsIn(fullUiHaystack, entry.name) || nameAppearsIn(fullUiHaystack, entry.class)) {
+                evidence.push(`Property "${entry.class}.${entry.name}" referenced by an interaction / screenshot / replay event`);
+            }
+            return { exercised: evidence.length > 0, evidence };
+        }
+        case "method_added": {
+            // Implicit — see header. We pass when the file or class shows up
+            // in the haystack, but the gate's main accounting treats this as
+            // implicit so the unexercised list never lists it directly.
+            if (nameAppearsIn(fullUiHaystack, entry.class) || nameAppearsIn(fullUiHaystack, entry.name)) {
+                evidence.push(`Method "${entry.class}.${entry.name}" referenced indirectly`);
+            }
+            return { exercised: evidence.length > 0, evidence };
+        }
+        case "migration_column_added": {
+            // Build / runtime logs typically print "Applying migration … add
+            // column ProductCode on Configurations". A direct sqlite-shell
+            // schema verify (action: "shell" or selector containing the
+            // column name) also counts.
+            if (nameAppearsIn(shellHaystack, entry.column) ||
+                nameAppearsIn(shellHaystack, entry.table) ||
+                nameAppearsIn(fullUiHaystack, entry.column)) {
+                evidence.push(`Migration column "${entry.table}.${entry.column}" referenced by build / interaction logs`);
+            }
+            return { exercised: evidence.length > 0, evidence };
+        }
+        case "migration_table_added": {
+            if (nameAppearsIn(shellHaystack, entry.table) || nameAppearsIn(fullUiHaystack, entry.table)) {
+                evidence.push(`Migration table "${entry.table}" referenced by build / interaction logs`);
+            }
+            return { exercised: evidence.length > 0, evidence };
+        }
+        case "layout_restructure": {
+            // Layout changes need a real test of the new layout. We require
+            // either a window resize / rotate event AND a follow-up
+            // screenshot, OR an explicit C7 anchor on a screenshot tied to
+            // this restructure file.
+            const fileToken = entry.file.split("/").pop()?.toLowerCase() ?? entry.file.toLowerCase();
+            const anchored = ctx.log.some((e) => {
+                const arg = e.input_args?.target_change_entry;
+                return typeof arg === "string" && arg === manifestEntryDisplayName(entry);
+            });
+            const resized = ctx.resizeWindowEvents.length > 0;
+            const screenshotMentionsFile = ctx.screenshotNames.some((n) => n.toLowerCase().includes(fileToken.replace(".xaml", "").replace(".html", "")));
+            if (anchored)
+                evidence.push("C7 target_change_entry anchor present");
+            if (resized && screenshotMentionsFile) {
+                evidence.push("Window resize event followed by a screenshot whose name references the changed file");
+            }
+            return { exercised: evidence.length > 0, evidence };
+        }
+    }
+}
+function nextStepFor(entry) {
+    const display = manifestEntryDisplayName(entry);
+    switch (entry.kind) {
+        case "ui_element_added":
+            if (entry.element === "datagrid_column") {
+                return `${display} (in ${entry.file}) — drive a codeloop_interact action="click" against a row's "${entry.name}" cell, then action="type" with realistic data, then capture_screenshot with target_change_entry="${display}". If the grid is empty, see the C4 empty-state directive.`;
+            }
+            if (entry.element === "button") {
+                return `${display} (in ${entry.file}) — drive codeloop_interact action="click" with text="${entry.name}", then capture_screenshot with target_change_entry="${display}". If a confirmation modal opens, call codeloop_handle_modal first.`;
+            }
+            if (entry.element === "menu_item") {
+                return `${display} (in ${entry.file}) — drive codeloop_interact action="click" against the menu opener, then click again against text="${entry.name}", then capture_screenshot with target_change_entry="${display}".`;
+            }
+            return `${display} (in ${entry.file}) — drive codeloop_interact action="click"/"type" against an element with this label, then capture_screenshot with target_change_entry="${display}".`;
+        case "property_added":
+            return `${display} (in ${entry.file}) — exercise via the UI control bound to this property (typically a textbox / column / toggle whose label resembles "${entry.name}"). Pass description="${display}" on codeloop_interact so the cross-check matches.`;
+        case "method_added":
+            return `${display} (in ${entry.file}) — implicit. Will be credited when the property/column it operates on is exercised. No direct interaction required.`;
+        case "migration_column_added":
+            return `${display} (in ${entry.file}) — verify by running codeloop_interact action="shell" with command "sqlite3 <db> '.schema ${entry.table}'" (or psql / dotnet ef migrations script equivalent). The schema dump must contain the column name.`;
+        case "migration_table_added":
+            return `${display} (in ${entry.file}) — verify by running a schema-dump shell command that references the table, or open a UI screen that loads/persists records of this table.`;
+        case "layout_restructure":
+            return `${display} — drive codeloop_interact action="resize_window" to a narrow size (e.g. 1024x600), then capture_screenshot with target_change_entry="${display}" so the C3 gate credits the restructure as exercised. If resize_window isn't available on your target platform, capture two screenshots at different window widths and reference this entry by name in the description arg.`;
+    }
+}
+export function evaluateChangeCoverage(cwd, runId, config = DEFAULT_CHANGE_COVERAGE_CONFIG) {
+    if (!config.enabled) {
+        return {
+            passed: true,
+            threshold: config.threshold,
+            manifest_run_id: null,
+            total_entries: 0,
+            considered_entries: 0,
+            exercised_entries: 0,
+            unexercised: [],
+            per_entry: [],
+            reason: "change_coverage_evidence is disabled in .codeloop/config.json (gates.change_coverage_evidence.enabled=false).",
+            next_step: "",
+        };
+    }
+    const { manifest, runId: manifestRunId } = loadMostRecentChangeManifest(cwd, runId);
+    if (!manifest || manifest.entries.length === 0) {
+        return {
+            passed: true,
+            threshold: config.threshold,
+            manifest_run_id: manifestRunId,
+            total_entries: 0,
+            considered_entries: 0,
+            exercised_entries: 0,
+            unexercised: [],
+            per_entry: [],
+            reason: manifest
+                ? "Change manifest contains no feature-shaped entries — gate trivially satisfied (this is a pure refactor / docs / dependency bump)."
+                : "No change manifest available. Gate trivially satisfied — call codeloop_verify to produce one.",
+            next_step: "",
+        };
+    }
+    const ctx = collectExercisingEvents(cwd, runId);
+    const skipKinds = new Set(config.skip_kinds);
+    const perEntry = [];
+    for (const entry of manifest.entries) {
+        if (skipKinds.has(entry.kind))
+            continue;
+        const display = manifestEntryDisplayName(entry);
+        const implicit = entry.kind === "method_added";
+        const { exercised, evidence } = evaluateEntry(entry, ctx);
+        perEntry.push({
+            entry,
+            display_name: display,
+            exercised,
+            evidence,
+            implicit,
+        });
+    }
+    const considered = perEntry.filter((p) => !p.implicit);
+    const exercised = considered.filter((p) => p.exercised);
+    const unexercised = considered.filter((p) => !p.exercised);
+    const fraction = considered.length === 0 ? 1 : exercised.length / considered.length;
+    const passed = fraction >= config.threshold;
+    let reason;
+    let nextStep = "";
+    if (passed) {
+        reason = `Change coverage met: ${exercised.length}/${considered.length} manifest entries exercised (>= ${(config.threshold * 100).toFixed(0)}% threshold).`;
+    }
+    else {
+        reason =
+            `Change coverage NOT met: ${exercised.length}/${considered.length} manifest entries exercised ` +
+                `(${(fraction * 100).toFixed(1)}% — required ${(config.threshold * 100).toFixed(0)}%). ` +
+                `Unexercised entries:\n` +
+                unexercised
+                    .slice(0, 10)
+                    .map((u) => `  - ${u.display_name}`)
+                    .join("\n") +
+                (unexercised.length > 10 ? `\n  …and ${unexercised.length - 10} more.` : "");
+        nextStep =
+            "Per-entry next steps — drive each one via codeloop_interact (NOT raw osascript / PowerShell / xdotool) before re-gating:\n" +
+                unexercised
+                    .slice(0, 8)
+                    .map((u, i) => `  ${i + 1}. ${nextStepFor(u.entry)}`)
+                    .join("\n");
+    }
+    return {
+        passed,
+        threshold: config.threshold,
+        manifest_run_id: manifestRunId,
+        total_entries: manifest.entries.length,
+        considered_entries: considered.length,
+        exercised_entries: exercised.length,
+        unexercised,
+        per_entry: perEntry,
+        reason,
+        next_step: nextStep,
+    };
+}
+/**
+ * Resolve the change-coverage config block out of the project config,
+ * falling back to defaults. Tolerates missing fields and unknown keys.
+ */
+export function resolveChangeCoverageConfig(raw) {
+    if (!raw || typeof raw !== "object")
+        return { ...DEFAULT_CHANGE_COVERAGE_CONFIG };
+    const r = raw;
+    const out = { ...DEFAULT_CHANGE_COVERAGE_CONFIG };
+    if (typeof r.enabled === "boolean")
+        out.enabled = r.enabled;
+    if (typeof r.threshold === "number" && r.threshold >= 0 && r.threshold <= 1) {
+        out.threshold = r.threshold;
+    }
+    if (Array.isArray(r.skip_kinds)) {
+        out.skip_kinds = r.skip_kinds.filter((k) => ["ui_element_added", "property_added", "method_added", "migration_column_added", "migration_table_added", "layout_restructure"].includes(String(k)));
+    }
+    return out;
+}
+//# sourceMappingURL=change_coverage.js.map