npm - codeloop-mcp-server - Versions diffs - 0.1.78 → 0.1.82 - Mend

codeloop-mcp-server 0.1.78 → 0.1.82

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/dist/evidence/agent_mode.d.ts +13 -3
package/dist/evidence/agent_mode.d.ts.map +1 -1
package/dist/evidence/agent_mode.js +63 -22
package/dist/evidence/agent_mode.js.map +1 -1
package/dist/evidence/interaction_evidence.d.ts +1 -1
package/dist/evidence/interaction_evidence.d.ts.map +1 -1
package/dist/evidence/interaction_evidence.js +3 -2
package/dist/evidence/interaction_evidence.js.map +1 -1
package/dist/index.js +22 -23
package/dist/index.js.map +1 -1
package/dist/runners/app_launcher.d.ts.map +1 -1
package/dist/runners/app_launcher.js +148 -8
package/dist/runners/app_launcher.js.map +1 -1
package/dist/runners/device_probe.d.ts +32 -0
package/dist/runners/device_probe.d.ts.map +1 -1
package/dist/runners/device_probe.js +73 -0
package/dist/runners/device_probe.js.map +1 -1
package/dist/runners/flutter_driver.d.ts +37 -0
package/dist/runners/flutter_driver.d.ts.map +1 -0
package/dist/runners/flutter_driver.js +242 -0
package/dist/runners/flutter_driver.js.map +1 -0
package/dist/runners/journey_to_maestro.d.ts.map +1 -1
package/dist/runners/journey_to_maestro.js +39 -8
package/dist/runners/journey_to_maestro.js.map +1 -1
package/dist/runners/launch_liveness.d.ts +44 -0
package/dist/runners/launch_liveness.d.ts.map +1 -0
package/dist/runners/launch_liveness.js +145 -0
package/dist/runners/launch_liveness.js.map +1 -0
package/dist/runners/maestro_generator.d.ts +7 -0
package/dist/runners/maestro_generator.d.ts.map +1 -1
package/dist/runners/maestro_generator.js +58 -0
package/dist/runners/maestro_generator.js.map +1 -1
package/dist/runners/mobile_build_prep.d.ts +66 -0
package/dist/runners/mobile_build_prep.d.ts.map +1 -0
package/dist/runners/mobile_build_prep.js +285 -0
package/dist/runners/mobile_build_prep.js.map +1 -0
package/dist/tools/gate_check.d.ts +15 -1
package/dist/tools/gate_check.d.ts.map +1 -1
package/dist/tools/gate_check.js +18 -11
package/dist/tools/gate_check.js.map +1 -1
package/dist/tools/run_journey.d.ts +19 -5
package/dist/tools/run_journey.d.ts.map +1 -1
package/dist/tools/run_journey.js +133 -39
package/dist/tools/run_journey.js.map +1 -1
package/dist/tools/verify.d.ts.map +1 -1
package/dist/tools/verify.js +9 -16
package/dist/tools/verify.js.map +1 -1
package/package.json +2 -2

package/dist/evidence/agent_mode.d.ts CHANGED Viewed

@@ -1,7 +1,14 @@
 export type AgentMode = "fix" | "audit";
+/**
+ * How long a PERSISTED audit mode stays in effect after it was last set.
+ * Each audit tool call rewrites the file (refreshing this window), so a
+ * continuous audit session never lapses; only an idle/abandoned audit does.
+ * Kept short enough that a later, unrelated request defaults back to fix.
+ */
+export declare const AUDIT_TTL_MS: number;
 /** Normalize a free-text mode value to the canonical enum. */
 export declare function normalizeAgentMode(value?: string | null): AgentMode | undefined;
-export declare function readPersistedAgentMode(cwd: string): AgentMode | undefined;
+export declare function readPersistedAgentMode(cwd: string, now?: number): AgentMode | undefined;
 export declare function persistAgentMode(cwd: string, mode: AgentMode): void;
 /**
  * Resolve the effective mode for a tool call and persist an explicit param so
@@ -15,8 +22,11 @@ export declare function resolveAgentMode(opts: {
 /** Shared schema description so every tool's `mode` param reads identically. */
 export declare const MODE_PARAM_DESCRIPTION: string;
 /**
- * The read-only directive appended to verify / diagnose / gate_check responses
- * when audit mode is active. Replaces the aggressive auto-fix prose.
+ * The report-only directive appended to verify / diagnose / gate_check responses
+ * when audit tone is active. It softens ONLY the auto-fix prose — it never tells
+ * the agent to skip verification. CodeLoop still runs/expects the full suite
+ * (including codeloop_run_journey to launch + drive the app); audit just means
+ * "don't edit the user's code yet."
  */
 export declare function buildAuditDirective(tool: "verify" | "diagnose" | "gate_check"): string;
 //# sourceMappingURL=agent_mode.d.ts.map

package/dist/evidence/agent_mode.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"agent_mode.d.ts","sourceRoot":"","sources":["../../src/evidence/agent_mode.ts"],"names":[],"mappings":"~~AAuBA~~,MAAM,MAAM,SAAS,GAAG,KAAK,GAAG,OAAO,CAAC;AAIxC,8DAA8D;AAC9D,wBAAgB,kBAAkB,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,GAAG,SAAS,CAc/E;AAED,wBAAgB,sBAAsB,CAAC,GAAG,EAAE,MAAM,GAAG,SAAS,GAAG,SAAS,~~CASzE~~;AAED,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,GAAG,IAAI,CAQnE;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE;IACrC,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,GAAG,SAAS,CAWZ;AAED,gFAAgF;AAChF,eAAO,MAAM,sBAAsB,~~QAOyC~~,CAAC;~~AAE7E;;;GAGG~~;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,QAAQ,GAAG,UAAU,GAAG,YAAY,GAAG,MAAM,CAyBtF"}
1	+ {"version":3,"file":"agent_mode.d.ts","sourceRoot":"","sources":["../../src/evidence/agent_mode.ts"],"names":[],"mappings":"AAwCA,MAAM,MAAM,SAAS,GAAG,KAAK,GAAG,OAAO,CAAC;AAIxC;;;;;GAKG;AACH,eAAO,MAAM,YAAY,QAAiB,CAAC;AAE3C,8DAA8D;AAC9D,wBAAgB,kBAAkB,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,GAAG,SAAS,CAc/E;AAED,wBAAgB,sBAAsB,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,GAAE,MAAmB,GAAG,SAAS,GAAG,SAAS,CAgBnG;AAED,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,GAAG,IAAI,CAQnE;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE;IACrC,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,GAAG,SAAS,CAWZ;AAED,gFAAgF;AAChF,eAAO,MAAM,sBAAsB,QAawC,CAAC;AAE5E;;;;;;GAMG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,QAAQ,GAAG,UAAU,GAAG,YAAY,GAAG,MAAM,CAyBtF"}

package/dist/evidence/agent_mode.js CHANGED Viewed

@@ -1,12 +1,18 @@
 /**
- * Agent mode — "fix" (default) vs "audit" (read-only).
+ * Agent mode — "fix" (default) vs "audit" (report-only TONE).
  *
- * CodeLoop's tool responses normally PUSH the agent into a mandatory
- * verify → diagnose → fix → re-verify → gate auto-fix loop. That is the right
- * default, but it directly contradicts a user who says "do NOT modify my code,
- * just run the checks and list every problem you find." In that case CodeLoop
- * must obey the user: still run all verifications and produce the full findings
- * list, but STOP pushing the agent to edit files or loop.
+ * IMPORTANT: audit is NOT a CodeLoop verification mode — it does not change what
+ * CodeLoop verifies. CodeLoop ALWAYS runs the full suite (build, tests, launches
+ * + DRIVES the app via codeloop_run_journey, screenshots, gate, confidence) and
+ * lists every issue it finds, in every mode. Driving the app is verification,
+ * not a code modification.
+ *
+ * "audit" only adjusts the response TONE for the one case where a user says
+ * "do NOT modify my code, just run the checks and list every problem you find."
+ * Then CodeLoop still verifies everything and produces the full findings list,
+ * but its prose does not COMMAND the agent to edit files or enter the auto-fix
+ * loop — the agent presents the report and lets the user decide what to fix.
+ * The agent's source-code edits are paused; nothing about CodeLoop's checks is.
  *
  * Mode resolution precedence (highest first):
  *   1. explicit `mode` param on the current tool call
@@ -17,10 +23,28 @@
  * When a tool receives an explicit `mode`, it persists it so the rest of the
  * session inherits the same mode without the agent having to repeat it on
  * every call.
+ *
+ * READ-ONLY IS OPT-IN, NOT STICKY. audit must only apply when the user has
+ * actually asked for it ("don't modify my code, just list the problems").
+ * Persisted audit therefore EXPIRES: it is honored only for AUDIT_TTL_MS after
+ * it was last set, and every audit tool call refreshes that timestamp. The
+ * moment the agent stops passing `mode:"audit"` (because the user is no longer
+ * restricting edits), the persisted audit goes stale and CodeLoop reverts to
+ * the active fix default — verifying, checking the gate/confidence, and pushing
+ * the auto-fix loop. Persisted "fix" never expires (it IS the default). An
+ * explicit config.agent_mode:"audit" is a deliberate user opt-in and is honored
+ * with no TTL.
  */
 import { existsSync, readFileSync, writeFileSync, mkdirSync } from "fs";
 import { join, dirname } from "path";
 const MODE_FILE_REL = join(".codeloop", "agent_mode.json");
+/**
+ * How long a PERSISTED audit mode stays in effect after it was last set.
+ * Each audit tool call rewrites the file (refreshing this window), so a
+ * continuous audit session never lapses; only an idle/abandoned audit does.
+ * Kept short enough that a later, unrelated request defaults back to fix.
+ */
+export const AUDIT_TTL_MS = 30 * 60 * 1000; // 30 minutes
 /** Normalize a free-text mode value to the canonical enum. */
 export function normalizeAgentMode(value) {
     if (!value)
@@ -36,13 +60,21 @@ export function normalizeAgentMode(value) {
     }
     return undefined;
 }
-export function readPersistedAgentMode(cwd) {
+export function readPersistedAgentMode(cwd, now = Date.now()) {
     try {
         const p = join(cwd, MODE_FILE_REL);
         if (!existsSync(p))
             return undefined;
         const data = JSON.parse(readFileSync(p, "utf-8"));
-        return normalizeAgentMode(data.mode);
+        const mode = normalizeAgentMode(data.mode);
+        // Persisted audit is OPT-IN and time-boxed: ignore it once stale so CodeLoop
+        // reverts to the active fix default unless the user keeps requesting audit.
+        if (mode === "audit") {
+            const setAt = data.set_at ? Date.parse(data.set_at) : NaN;
+            if (!Number.isFinite(setAt) || now - setAt > AUDIT_TTL_MS)
+                return undefined;
+        }
+        return mode;
     }
     catch {
         return undefined;
@@ -73,29 +105,38 @@ export function resolveAgentMode(opts) {
         "fix");
 }
 /** Shared schema description so every tool's `mode` param reads identically. */
-export const MODE_PARAM_DESCRIPTION = "Agent workflow mode. 'fix' (default) = the standard auto-fix loop: after " +
-    "verify/gate you should diagnose and modify code until gates pass. " +
-    "'audit' = READ-ONLY: set this when the user asked you to ONLY run the " +
-    "checks and LIST the problems WITHOUT modifying their code. In audit mode " +
-    "CodeLoop still runs every verification and returns the full findings, but " +
-    "its responses will NOT push you to edit files or loop — you present the " +
-    "report to the user and stop. The mode persists for the session once set.";
+export const MODE_PARAM_DESCRIPTION = "Response TONE only — it does NOT change what CodeLoop verifies. LEAVE UNSET " +
+    "for the normal behavior: 'fix' (default) = the standard auto-fix loop where, " +
+    "after verify/gate, you actively diagnose and modify code, re-verify, and drive " +
+    "the gate/confidence up until it passes. ONLY set 'audit' when the user " +
+    "EXPLICITLY asked you to run the checks and LIST the problems WITHOUT modifying " +
+    "their code (e.g. 'don't change anything, just tell me what's wrong'). Do NOT " +
+    "infer audit on your own. In EITHER mode CodeLoop runs the FULL suite — build, " +
+    "tests, launching + DRIVING the app (codeloop_run_journey), screenshots, gate, " +
+    "confidence — and returns every issue; driving the app is verification, not a " +
+    "code edit. 'audit' only stops CodeLoop's prose from commanding you to edit " +
+    "files or loop, so you present the report and let the user decide. Audit is " +
+    "OPT-IN and time-boxed: it auto-expires back to the fix default; pass mode:'fix' " +
+    "(or omit it) the moment the user is ready for you to apply fixes again.";
 /**
- * The read-only directive appended to verify / diagnose / gate_check responses
- * when audit mode is active. Replaces the aggressive auto-fix prose.
+ * The report-only directive appended to verify / diagnose / gate_check responses
+ * when audit tone is active. It softens ONLY the auto-fix prose — it never tells
+ * the agent to skip verification. CodeLoop still runs/expects the full suite
+ * (including codeloop_run_journey to launch + drive the app); audit just means
+ * "don't edit the user's code yet."
  */
 export function buildAuditDirective(tool) {
     const head = [
         "",
         "",
-        "📋 AUDIT MODE (read-only) — the user asked you to LIST problems, not fix them.",
-        "Do NOT modify any source files. Do NOT enter the auto-fix loop. Do NOT call codeloop_diagnose-then-edit unless the user explicitly asks.",
+        "📋 AUDIT MODE (report-only) — the user asked you to LIST problems, not fix them YET.",
+        "Do NOT modify any source files and do NOT enter the auto-fix loop. This pauses CODE EDITS only — it does NOT pause verification: still run CodeLoop's full read-only checks, including codeloop_run_journey (launch + drive the app + screenshots) and codeloop_gate_check, so your findings list is complete.",
     ];
     const tail = {
         verify: [
             "Present the failing checks above to the user as a findings list (file, what failed, why). " +
-                "You MAY call codeloop_diagnose to get a richer, structured breakdown to include in the report — diagnose only classifies, it does not edit code. " +
-                "After reporting, STOP and wait for the user to decide what to fix.",
+                "You MAY call codeloop_diagnose for a richer structured breakdown and SHOULD still call codeloop_run_journey to drive the app — both only gather evidence, neither edits code. " +
+                "After producing the complete report, STOP and wait for the user to decide what to fix.",
         ],
         diagnose: [
             "The repair_tasks above are a DIAGNOSIS for the REPORT, not a TODO list to execute. " +

package/dist/evidence/agent_mode.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"agent_mode.js","sourceRoot":"","sources":["../../src/evidence/agent_mode.ts"],"names":[],"mappings":"AAAA~~;;;;;;;;;;;;;;;;;;;GAmBG~~;AACH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AACxE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAIrC,MAAM,aAAa,GAAG,IAAI,CAAC,WAAW,EAAE,iBAAiB,CAAC,CAAC;AAE3D,8DAA8D;AAC9D,MAAM,UAAU,kBAAkB,CAAC,KAAqB;IACtD,IAAI,CAAC,KAAK;QAAE,OAAO,SAAS,CAAC;IAC7B,MAAM,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;IAC7C,IACE,CAAC,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,WAAW,EAAE,QAAQ,EAAE,aAAa;QACrE,aAAa,EAAE,WAAW,EAAE,WAAW,EAAE,eAAe,EAAE,YAAY;QACtE,QAAQ,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EACjC,CAAC;QACD,OAAO,OAAO,CAAC;IACjB,CAAC;IACD,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,UAAU,EAAE,UAAU,EAAE,SAAS,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;QACnE,OAAO,KAAK,CAAC;IACf,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,GAAW;~~IAChD~~,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;QACnC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;YAAE,OAAO,SAAS,CAAC;QACrC,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,EAAE,OAAO,CAAC,~~CAAsB~~,CAAC;~~QACvE~~,~~OAAO~~,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;~~IACvC~~,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,GAAW,EAAE,IAAe;IAC3D,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;QACnC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC3C,aAAa,CAAC,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACxF,CAAC;IAAC,MAAM,CAAC;QACP,kEAAkE;IACpE,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAIhC;IACC,MAAM,SAAS,GAAG,kBAAkB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACrD,IAAI,SAAS,EAAE,CAAC;QACd,gBAAgB,CAAC,IAAI,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;QACtC,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,OAAO,CACL,sBAAsB,CAAC,IAAI,CAAC,GAAG,CAAC;QAChC,kBAAkB,CAAC,IAAI,CAAC,UAAU,CAAC;QACnC,KAAK,CACN,CAAC;AACJ,CAAC;AAED,gFAAgF;AAChF,MAAM,CAAC,MAAM,sBAAsB,GACjC,~~2EAA2E~~;~~IAC3E~~,~~oEAAoE~~;~~IACpE~~,~~wEAAwE~~;~~IACxE~~,~~2EAA2E~~;~~IAC3E~~,~~4EAA4E~~;~~IAC5E~~,~~0EAA0E~~;~~IAC1E~~,~~0EAA0E~~,CAAC;~~AAE7E;;;GAGG~~;AACH,MAAM,UAAU,mBAAmB,CAAC,IAA0C;IAC5E,MAAM,IAAI,GAAG;QACX,EAAE;QACF,EAAE;QACF,~~gFAAgF~~;~~QAChF~~,~~0IAA0I~~;~~KAC3I~~,CAAC;IACF,MAAM,IAAI,GAAkC;QAC1C,MAAM,EAAE;YACN,4FAA4F;gBAC1F,~~mJAAmJ~~;~~gBACnJ~~,~~oEAAoE~~;~~SACvE~~;QACD,QAAQ,EAAE;YACR,qFAAqF;gBACnF,2FAA2F;gBAC3F,sGAAsG;SACzG;QACD,UAAU,EAAE;YACV,8DAA8D;gBAC5D,sFAAsF;gBACtF,+FAA+F;SAClG;KACF,CAAC;IACF,OAAO,CAAC,GAAG,IAAI,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC7C,CAAC"}
1	+ {"version":3,"file":"agent_mode.js","sourceRoot":"","sources":["../../src/evidence/agent_mode.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoCG;AACH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AACxE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAIrC,MAAM,aAAa,GAAG,IAAI,CAAC,WAAW,EAAE,iBAAiB,CAAC,CAAC;AAE3D;;;;;GAKG;AACH,MAAM,CAAC,MAAM,YAAY,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,CAAC,aAAa;AAEzD,8DAA8D;AAC9D,MAAM,UAAU,kBAAkB,CAAC,KAAqB;IACtD,IAAI,CAAC,KAAK;QAAE,OAAO,SAAS,CAAC;IAC7B,MAAM,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;IAC7C,IACE,CAAC,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,WAAW,EAAE,QAAQ,EAAE,aAAa;QACrE,aAAa,EAAE,WAAW,EAAE,WAAW,EAAE,eAAe,EAAE,YAAY;QACtE,QAAQ,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EACjC,CAAC;QACD,OAAO,OAAO,CAAC;IACjB,CAAC;IACD,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,UAAU,EAAE,UAAU,EAAE,SAAS,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;QACnE,OAAO,KAAK,CAAC;IACf,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,GAAW,EAAE,MAAc,IAAI,CAAC,GAAG,EAAE;IAC1E,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;QACnC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;YAAE,OAAO,SAAS,CAAC;QACrC,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,EAAE,OAAO,CAAC,CAAuC,CAAC;QACxF,MAAM,IAAI,GAAG,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC3C,6EAA6E;QAC7E,4EAA4E;QAC5E,IAAI,IAAI,KAAK,OAAO,EAAE,CAAC;YACrB,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;YAC1D,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,GAAG,GAAG,KAAK,GAAG,YAAY;gBAAE,OAAO,SAAS,CAAC;QAC9E,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,GAAW,EAAE,IAAe;IAC3D,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;QACnC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC3C,aAAa,CAAC,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACxF,CAAC;IAAC,MAAM,CAAC;QACP,kEAAkE;IACpE,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAIhC;IACC,MAAM,SAAS,GAAG,kBAAkB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACrD,IAAI,SAAS,EAAE,CAAC;QACd,gBAAgB,CAAC,IAAI,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;QACtC,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,OAAO,CACL,sBAAsB,CAAC,IAAI,CAAC,GAAG,CAAC;QAChC,kBAAkB,CAAC,IAAI,CAAC,UAAU,CAAC;QACnC,KAAK,CACN,CAAC;AACJ,CAAC;AAED,gFAAgF;AAChF,MAAM,CAAC,MAAM,sBAAsB,GACjC,8EAA8E;IAC9E,+EAA+E;IAC/E,iFAAiF;IACjF,yEAAyE;IACzE,iFAAiF;IACjF,+EAA+E;IAC/E,gFAAgF;IAChF,gFAAgF;IAChF,+EAA+E;IAC/E,6EAA6E;IAC7E,6EAA6E;IAC7E,kFAAkF;IAClF,yEAAyE,CAAC;AAE5E;;;;;;GAMG;AACH,MAAM,UAAU,mBAAmB,CAAC,IAA0C;IAC5E,MAAM,IAAI,GAAG;QACX,EAAE;QACF,EAAE;QACF,sFAAsF;QACtF,gTAAgT;KACjT,CAAC;IACF,MAAM,IAAI,GAAkC;QAC1C,MAAM,EAAE;YACN,4FAA4F;gBAC1F,gLAAgL;gBAChL,wFAAwF;SAC3F;QACD,QAAQ,EAAE;YACR,qFAAqF;gBACnF,2FAA2F;gBAC3F,sGAAsG;SACzG;QACD,UAAU,EAAE;YACV,8DAA8D;gBAC5D,sFAAsF;gBACtF,+FAA+F;SAClG;KACF,CAAC;IACF,OAAO,CAAC,GAAG,IAAI,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC7C,CAAC"}

package/dist/evidence/interaction_evidence.d.ts CHANGED Viewed

@@ -8,7 +8,7 @@ export interface InteractionEvidence {
     /** Screenshots captured during the journey. */
     screenshots: number;
     /** Mobile interaction engine used, when target was mobile. */
-    mobile_engine?: "maestro" | "coordinate";
+    mobile_engine?: "flutter_driver" | "maestro" | "coordinate";
     /** Maestro flow pass/fail, when applicable. */
     mobile_flows_passed?: number;
     mobile_flows_total?: number;

package/dist/evidence/interaction_evidence.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"interaction_evidence.d.ts","sourceRoot":"","sources":["../../src/evidence/interaction_evidence.ts"],"names":[],"mappings":"~~AAkBA~~,MAAM,WAAW,mBAAmB;IAClC,YAAY,EAAE,MAAM,CAAC;IACrB,4CAA4C;IAC5C,MAAM,EAAE,aAAa,CAAC;IACtB,MAAM,EAAE,MAAM,CAAC;IACf,8CAA8C;IAC9C,YAAY,EAAE,MAAM,CAAC;IACrB,+CAA+C;IAC/C,WAAW,EAAE,MAAM,CAAC;IACpB,8DAA8D;IAC9D,aAAa,CAAC,EAAE,SAAS,GAAG,YAAY,CAAC;~~IACzC~~,+CAA+C;IAC/C,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAID,0EAA0E;AAC1E,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,EAAE,mBAAmB,GAAG,IAAI,CAMtF;AAED;;;GAGG;AACH,wBAAgB,6BAA6B,CAC3C,gBAAgB,EAAE,MAAM,GACvB;IAAE,QAAQ,EAAE,mBAAmB,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAoB3D;AAED,MAAM,WAAW,oBAAoB;IACnC,wEAAwE;IACxE,eAAe,EAAE;QAAE,QAAQ,EAAE,mBAAmB,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;IAC3E,YAAY,EAAE,OAAO,CAAC;IACtB,yEAAyE;IACzE,wBAAwB,EAAE,OAAO,CAAC;CACnC;AAED,MAAM,WAAW,WAAW;IAC1B,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;;;GAIG;AACH,wBAAgB,+BAA+B,CAAC,KAAK,EAAE,oBAAoB,GAAG,WAAW,CA4BxF"}
1	+ {"version":3,"file":"interaction_evidence.d.ts","sourceRoot":"","sources":["../../src/evidence/interaction_evidence.ts"],"names":[],"mappings":"AAmBA,MAAM,WAAW,mBAAmB;IAClC,YAAY,EAAE,MAAM,CAAC;IACrB,4CAA4C;IAC5C,MAAM,EAAE,aAAa,CAAC;IACtB,MAAM,EAAE,MAAM,CAAC;IACf,8CAA8C;IAC9C,YAAY,EAAE,MAAM,CAAC;IACrB,+CAA+C;IAC/C,WAAW,EAAE,MAAM,CAAC;IACpB,8DAA8D;IAC9D,aAAa,CAAC,EAAE,gBAAgB,GAAG,SAAS,GAAG,YAAY,CAAC;IAC5D,+CAA+C;IAC/C,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAID,0EAA0E;AAC1E,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,EAAE,mBAAmB,GAAG,IAAI,CAMtF;AAED;;;GAGG;AACH,wBAAgB,6BAA6B,CAC3C,gBAAgB,EAAE,MAAM,GACvB;IAAE,QAAQ,EAAE,mBAAmB,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAoB3D;AAED,MAAM,WAAW,oBAAoB;IACnC,wEAAwE;IACxE,eAAe,EAAE;QAAE,QAAQ,EAAE,mBAAmB,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;IAC3E,YAAY,EAAE,OAAO,CAAC;IACtB,yEAAyE;IACzE,wBAAwB,EAAE,OAAO,CAAC;CACnC;AAED,MAAM,WAAW,WAAW;IAC1B,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;;;GAIG;AACH,wBAAgB,+BAA+B,CAAC,KAAK,EAAE,oBAAoB,GAAG,WAAW,CA4BxF"}

package/dist/evidence/interaction_evidence.js CHANGED Viewed

@@ -10,8 +10,9 @@
  * happened at all. It is deliberately satisfied by either evidence source so
  * it never regresses teams already using the manual recording flow.
  *
- * Applicable-or-n/a: the caller only adds it for UI projects and treats audit
- * (read-only) mode as n/a.
+ * Applicable-or-n/a: the caller only adds it for UI projects. It applies in
+ * EVERY mode — driving the app is verification, not a code edit, so a
+ * "don't modify my code" request never makes it n/a.
  */
 import { existsSync, readFileSync, writeFileSync, readdirSync, statSync } from "fs";
 import { join } from "path";

package/dist/evidence/interaction_evidence.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"interaction_evidence.js","sourceRoot":"","sources":["../../src/evidence/interaction_evidence.ts"],"names":[],"mappings":"AAAA~~;;;;;;;;;;;;;;GAcG~~;AACH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,WAAW,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AACpF,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAkB5B,MAAM,IAAI,GAAG,2BAA2B,CAAC;AAEzC,0EAA0E;AAC1E,MAAM,UAAU,wBAAwB,CAAC,MAAc,EAAE,EAAuB;IAC9E,IAAI,CAAC;QACH,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACjE,CAAC;IAAC,MAAM,CAAC;QACP,iBAAiB;IACnB,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,6BAA6B,CAC3C,gBAAwB;IAExB,IAAI,IAAI,GAA8D,IAAI,CAAC;IAC3E,IAAI,OAAO,GAAa,EAAE,CAAC;IAC3B,IAAI,CAAC;QACH,OAAO,GAAG,WAAW,CAAC,gBAAgB,CAAC,CAAC;IAC1C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;IACD,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,CAAC,GAAG,IAAI,CAAC,gBAAgB,EAAE,CAAC,EAAE,IAAI,CAAC,CAAC;QAC1C,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;YAAE,SAAS;QAC7B,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;YACpC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,EAAE,OAAO,CAAC,CAAwB,CAAC;YAC7E,IAAI,CAAC,IAAI,IAAI,OAAO,GAAG,IAAI,CAAC,OAAO;gBAAE,IAAI,GAAG,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC;QACpE,CAAC;QAAC,MAAM,CAAC;YACP,qBAAqB;QACvB,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAeD;;;;GAIG;AACH,MAAM,UAAU,+BAA+B,CAAC,KAA2B;IACzE,MAAM,EAAE,eAAe,EAAE,YAAY,EAAE,wBAAwB,EAAE,GAAG,KAAK,CAAC;IAC1E,IAAI,eAAe,IAAI,CAAC,YAAY,EAAE,CAAC;QACrC,MAAM,CAAC,GAAG,eAAe,CAAC,QAAQ,CAAC;QACnC,MAAM,MAAM,GAAG,CAAC,CAAC,aAAa;YAC5B,CAAC,CAAC,oBAAoB,CAAC,CAAC,aAAa,GAAG,CAAC,CAAC,kBAAkB,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,mBAAmB,IAAI,CAAC,IAAI,CAAC,CAAC,kBAAkB,UAAU,CAAC,CAAC,CAAC,EAAE,GAAG;YAChJ,CAAC,CAAC,EAAE,CAAC;QACP,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,MAAM,EAAE,8BAA8B,CAAC,CAAC,YAAY,eAAe,CAAC,CAAC,MAAM,GAAG,MAAM,KAAK,CAAC,CAAC,WAAW,iBAAiB;SACxH,CAAC;IACJ,CAAC;IACD,IAAI,wBAAwB,EAAE,CAAC;QAC7B,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,MAAM,EAAE,8FAA8F;SACvG,CAAC;IACJ,CAAC;IACD,IAAI,eAAe,IAAI,YAAY,EAAE,CAAC;QACpC,OAAO;YACL,MAAM,EAAE,KAAK;YACb,MAAM,EAAE,qHAAqH;SAC9H,CAAC;IACJ,CAAC;IACD,OAAO;QACL,MAAM,EAAE,KAAK;QACb,MAAM,EAAE,2IAA2I;KACpJ,CAAC;AACJ,CAAC"}
1	+ {"version":3,"file":"interaction_evidence.js","sourceRoot":"","sources":["../../src/evidence/interaction_evidence.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AACH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,WAAW,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AACpF,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAkB5B,MAAM,IAAI,GAAG,2BAA2B,CAAC;AAEzC,0EAA0E;AAC1E,MAAM,UAAU,wBAAwB,CAAC,MAAc,EAAE,EAAuB;IAC9E,IAAI,CAAC;QACH,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACjE,CAAC;IAAC,MAAM,CAAC;QACP,iBAAiB;IACnB,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,6BAA6B,CAC3C,gBAAwB;IAExB,IAAI,IAAI,GAA8D,IAAI,CAAC;IAC3E,IAAI,OAAO,GAAa,EAAE,CAAC;IAC3B,IAAI,CAAC;QACH,OAAO,GAAG,WAAW,CAAC,gBAAgB,CAAC,CAAC;IAC1C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;IACD,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,CAAC,GAAG,IAAI,CAAC,gBAAgB,EAAE,CAAC,EAAE,IAAI,CAAC,CAAC;QAC1C,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;YAAE,SAAS;QAC7B,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;YACpC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,EAAE,OAAO,CAAC,CAAwB,CAAC;YAC7E,IAAI,CAAC,IAAI,IAAI,OAAO,GAAG,IAAI,CAAC,OAAO;gBAAE,IAAI,GAAG,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC;QACpE,CAAC;QAAC,MAAM,CAAC;YACP,qBAAqB;QACvB,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAeD;;;;GAIG;AACH,MAAM,UAAU,+BAA+B,CAAC,KAA2B;IACzE,MAAM,EAAE,eAAe,EAAE,YAAY,EAAE,wBAAwB,EAAE,GAAG,KAAK,CAAC;IAC1E,IAAI,eAAe,IAAI,CAAC,YAAY,EAAE,CAAC;QACrC,MAAM,CAAC,GAAG,eAAe,CAAC,QAAQ,CAAC;QACnC,MAAM,MAAM,GAAG,CAAC,CAAC,aAAa;YAC5B,CAAC,CAAC,oBAAoB,CAAC,CAAC,aAAa,GAAG,CAAC,CAAC,kBAAkB,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,mBAAmB,IAAI,CAAC,IAAI,CAAC,CAAC,kBAAkB,UAAU,CAAC,CAAC,CAAC,EAAE,GAAG;YAChJ,CAAC,CAAC,EAAE,CAAC;QACP,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,MAAM,EAAE,8BAA8B,CAAC,CAAC,YAAY,eAAe,CAAC,CAAC,MAAM,GAAG,MAAM,KAAK,CAAC,CAAC,WAAW,iBAAiB;SACxH,CAAC;IACJ,CAAC;IACD,IAAI,wBAAwB,EAAE,CAAC;QAC7B,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,MAAM,EAAE,8FAA8F;SACvG,CAAC;IACJ,CAAC;IACD,IAAI,eAAe,IAAI,YAAY,EAAE,CAAC;QACpC,OAAO;YACL,MAAM,EAAE,KAAK;YACb,MAAM,EAAE,qHAAqH;SAC9H,CAAC;IACJ,CAAC;IACD,OAAO;QACL,MAAM,EAAE,KAAK;QACb,MAAM,EAAE,2IAA2I;KACpJ,CAAC;AACJ,CAAC"}

package/dist/index.js CHANGED Viewed

@@ -1801,19 +1801,21 @@ server.tool("codeloop_run_journey", TOOL_BOOTSTRAP + `DEEP-E2E EXECUTOR — laun
 in ONE hands-free call. This is the autonomous counterpart to the manual
 plan→start_recording→interact→stop→replay sequence: CodeLoop does it for you.
+Runs in EVERY mode — launching + driving the app is verification, not a code edit, so a
+"don't modify my code" request never disables it (that only pauses the agent's source edits).
 What it does, in order:
-1. REFUSES in audit/read-only mode (driving the app modifies its state).
-2. Detects the target (browser / desktop / android_emulator / ios_simulator / Flutter) — overridable.
-3. READY/LAUNCH per target: web → headed Playwright at e2e.web_url; desktop → launch evidence.target_app;
+1. Detects the target (browser / desktop / android_emulator / ios_simulator / Flutter) — overridable.
+2. READY/LAUNCH per target: web → headed Playwright at e2e.web_url; desktop → launch evidence.target_app;
    Android/iOS → BOOT the emulator/simulator (reuses one already booted; honors e2e.android_avd /
    e2e.ios_device; opt out with e2e.boot_device:false). If a mobile device can't be booted it returns
    a copy-paste directive instead of failing.
-4. Plans the journey (codeloop_plan_user_journey) — entity CRUD arcs + the AI-chatbox arc.
-5. Starts a background video recording (best-effort).
-6. DRIVES every deterministic step via the interaction engine (fill known fields, type the AI prompt +
+3. Plans the journey (codeloop_plan_user_journey) — entity CRUD arcs + the AI-chatbox arc.
+4. Starts a background video recording (best-effort).
+5. DRIVES every deterministic step via the interaction engine (fill known fields, type the AI prompt +
    submit + read back / assert a non-empty reply), capturing a screenshot after each step.
-7. Visits EVERY discovered screen (codeloop_discover_screens) and screenshots each.
-8. Stops the recording and returns a directive to run codeloop_interaction_replay + visual_review +
+6. Visits EVERY discovered screen (codeloop_discover_screens) and screenshots each.
+7. Stops the recording and returns a directive to run codeloop_interaction_replay + visual_review +
    design_compare + gate_check.
 Steps it can't resolve deterministically (ambiguous navigation, raw-coordinate targets, missing
@@ -1830,7 +1832,6 @@ screens_captured[], screenshots[], unsupported_count, manual_followups[], direct
     target_type: targetTypeSchema.optional().describe("Override the auto-detected interaction target. Accepts synonyms (web→browser, android→android_emulator, ios→ios_simulator, *_desktop→desktop)."),
     web_url: z.string().optional().describe("URL to open for browser targets (e.g. http://localhost:3000). Defaults to e2e.web_url from config. Start your dev server first."),
     max_duration_seconds: z.number().int().min(10).max(600).optional().describe("Max video recording length. Default 180s."),
-    mode: z.string().optional().describe(AGENT_MODE_PARAM_DESC),
 }, async (params) => {
     const result = await withAuth(async () => {
         const cwd = resolveCwd(params);
@@ -1839,7 +1840,6 @@ screens_captured[], screenshots[], unsupported_count, manual_followups[], direct
         const { runJourney } = await import("./tools/run_journey.js");
         return runJourney({
             cwd,
-            paramMode: params.mode,
             e2e: { ...cfg.e2e, web_url: params.web_url ?? cfg.e2e?.web_url },
             targetApp: cfg.evidence?.target_app,
             targetType: params.target_type,
@@ -2685,14 +2685,15 @@ Returns: checklist of completed and pending verification steps.`, {
         const gateIsPassing = hasGateCheck && latestMeta?.gate_result === "passed";
         const gateConfidence = latestMeta?.confidence ?? 0;
         // Deep-E2E journey evidence — has codeloop_run_journey driven the app?
-        // (Audit/read-only mode never drives the app, so the step is n/a there.)
+        // Required for every UI project regardless of mode: driving the app is
+        // verification, not a code edit, so "don't modify my code" never waives it.
         let hasJourneyEvidence = false;
-        let workflowAuditMode = false;
         try {
             const { loadLatestInteractionEvidence } = await import("./evidence/interaction_evidence.js");
-            hasJourneyEvidence = loadLatestInteractionEvidence(join(baseDir, "runs")) != null;
-            const { resolveAgentMode } = await import("./evidence/agent_mode.js");
-            workflowAuditMode = resolveAgentMode({ cwd, configMode: config.agent_mode }) === "audit";
+            // baseDir already ends in `/runs` (getArtifactsBaseDir) — pass it as-is,
+            // NOT join(baseDir,"runs") which double-nests and hid the evidence so
+            // step 3b stayed PENDING forever even after run_journey drove the app.
+            hasJourneyEvidence = loadLatestInteractionEvidence(baseDir) != null;
         }
         catch { /* best-effort */ }
         // Interaction coverage: compare interaction_log selectors/URLs against discover_screens
@@ -2816,20 +2817,18 @@ Returns: checklist of completed and pending verification steps.`, {
             },
             {
                 step: "3b. Deep-E2E journey (run_journey)",
-                status: !isUIProject || workflowAuditMode
+                status: !isUIProject
                     ? "n/a"
                     : hasJourneyEvidence || interactionCount > 0
                         ? "done"
                         : "PENDING",
                 detail: !isUIProject
                     ? "Not a UI project — deep-E2E journey not required"
-                    : workflowAuditMode
-                        ? "Audit/read-only mode — the app is not launched or driven. Switch to fix mode for full interaction testing."
-                        : hasJourneyEvidence
-                            ? "codeloop_run_journey drove the app (interaction_evidence.json present)."
-                            : interactionCount > 0
-                                ? "App driven via the manual codeloop_interact flow."
-                                : "App NOT driven yet. Call codeloop_run_journey ONCE — it launches the app / boots the emulator+simulator, types/taps/submits by label (Maestro on mobile), screenshots every screen, and records video. This satisfies the interaction_evidence gate.",
+                    : hasJourneyEvidence
+                        ? "codeloop_run_journey drove the app (interaction_evidence.json present)."
+                        : interactionCount > 0
+                            ? "App driven via the manual codeloop_interact flow."
+                            : "App NOT driven yet. Call codeloop_run_journey ONCE — it launches the app / boots the emulator+simulator, types/taps/submits by label (Maestro on mobile), screenshots every screen, and records video. This satisfies the interaction_evidence gate.",
             },
             {
                 step: "4. Gate check",