codeloop-mcp-server 0.1.79 → 0.1.83
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/evidence/agent_mode.d.ts +7 -2
- package/dist/evidence/agent_mode.d.ts.map +1 -1
- package/dist/evidence/agent_mode.js +63 -27
- package/dist/evidence/agent_mode.js.map +1 -1
- package/dist/evidence/interaction_evidence.d.ts +1 -1
- package/dist/evidence/interaction_evidence.d.ts.map +1 -1
- package/dist/evidence/interaction_evidence.js +3 -2
- package/dist/evidence/interaction_evidence.js.map +1 -1
- package/dist/index.js +22 -23
- package/dist/index.js.map +1 -1
- package/dist/runners/app_launcher.d.ts.map +1 -1
- package/dist/runners/app_launcher.js +148 -8
- package/dist/runners/app_launcher.js.map +1 -1
- package/dist/runners/device_probe.d.ts +32 -0
- package/dist/runners/device_probe.d.ts.map +1 -1
- package/dist/runners/device_probe.js +73 -0
- package/dist/runners/device_probe.js.map +1 -1
- package/dist/runners/flutter_driver.d.ts +37 -0
- package/dist/runners/flutter_driver.d.ts.map +1 -0
- package/dist/runners/flutter_driver.js +242 -0
- package/dist/runners/flutter_driver.js.map +1 -0
- package/dist/runners/journey_to_maestro.d.ts.map +1 -1
- package/dist/runners/journey_to_maestro.js +39 -8
- package/dist/runners/journey_to_maestro.js.map +1 -1
- package/dist/runners/launch_liveness.d.ts +44 -0
- package/dist/runners/launch_liveness.d.ts.map +1 -0
- package/dist/runners/launch_liveness.js +145 -0
- package/dist/runners/launch_liveness.js.map +1 -0
- package/dist/runners/maestro_generator.d.ts +7 -0
- package/dist/runners/maestro_generator.d.ts.map +1 -1
- package/dist/runners/maestro_generator.js +58 -0
- package/dist/runners/maestro_generator.js.map +1 -1
- package/dist/runners/mobile_build_prep.d.ts +66 -0
- package/dist/runners/mobile_build_prep.d.ts.map +1 -0
- package/dist/runners/mobile_build_prep.js +285 -0
- package/dist/runners/mobile_build_prep.js.map +1 -0
- package/dist/tools/gate_check.d.ts +15 -1
- package/dist/tools/gate_check.d.ts.map +1 -1
- package/dist/tools/gate_check.js +18 -11
- package/dist/tools/gate_check.js.map +1 -1
- package/dist/tools/run_journey.d.ts +19 -5
- package/dist/tools/run_journey.d.ts.map +1 -1
- package/dist/tools/run_journey.js +133 -39
- package/dist/tools/run_journey.js.map +1 -1
- package/dist/tools/verify.d.ts.map +1 -1
- package/dist/tools/verify.js +9 -16
- package/dist/tools/verify.js.map +1 -1
- package/package.json +2 -2
|
@@ -8,6 +8,8 @@ export type AgentMode = "fix" | "audit";
|
|
|
8
8
|
export declare const AUDIT_TTL_MS: number;
|
|
9
9
|
/** Normalize a free-text mode value to the canonical enum. */
|
|
10
10
|
export declare function normalizeAgentMode(value?: string | null): AgentMode | undefined;
|
|
11
|
+
/** Delete the persisted mode file. Best-effort — used to clear stale audit. */
|
|
12
|
+
export declare function clearPersistedAgentMode(cwd: string): void;
|
|
11
13
|
export declare function readPersistedAgentMode(cwd: string, now?: number): AgentMode | undefined;
|
|
12
14
|
export declare function persistAgentMode(cwd: string, mode: AgentMode): void;
|
|
13
15
|
/**
|
|
@@ -22,8 +24,11 @@ export declare function resolveAgentMode(opts: {
|
|
|
22
24
|
/** Shared schema description so every tool's `mode` param reads identically. */
|
|
23
25
|
export declare const MODE_PARAM_DESCRIPTION: string;
|
|
24
26
|
/**
|
|
25
|
-
* The
|
|
26
|
-
* when audit
|
|
27
|
+
* The report-only directive appended to verify / diagnose / gate_check responses
|
|
28
|
+
* when audit tone is active. It softens ONLY the auto-fix prose — it never tells
|
|
29
|
+
* the agent to skip verification. CodeLoop still runs/expects the full suite
|
|
30
|
+
* (including codeloop_run_journey to launch + drive the app); audit just means
|
|
31
|
+
* "don't edit the user's code yet."
|
|
27
32
|
*/
|
|
28
33
|
export declare function buildAuditDirective(tool: "verify" | "diagnose" | "gate_check"): string;
|
|
29
34
|
//# sourceMappingURL=agent_mode.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent_mode.d.ts","sourceRoot":"","sources":["../../src/evidence/agent_mode.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"agent_mode.d.ts","sourceRoot":"","sources":["../../src/evidence/agent_mode.ts"],"names":[],"mappings":"AAwCA,MAAM,MAAM,SAAS,GAAG,KAAK,GAAG,OAAO,CAAC;AAIxC;;;;;GAKG;AACH,eAAO,MAAM,YAAY,QAAiB,CAAC;AAE3C,8DAA8D;AAC9D,wBAAgB,kBAAkB,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,GAAG,SAAS,CAc/E;AAED,+EAA+E;AAC/E,wBAAgB,uBAAuB,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,CAOzD;AAED,wBAAgB,sBAAsB,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,GAAE,MAAmB,GAAG,SAAS,GAAG,SAAS,CAwBnG;AAED,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,GAAG,IAAI,CAQnE;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE;IACrC,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,GAAG,SAAS,CAWZ;AAED,gFAAgF;AAChF,eAAO,MAAM,sBAAsB,QAmBgB,CAAC;AAEpD;;;;;;GAMG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,QAAQ,GAAG,UAAU,GAAG,YAAY,GAAG,MAAM,CA0BtF"}
|
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Agent mode — "fix" (default) vs "audit" (
|
|
2
|
+
* Agent mode — "fix" (default) vs "audit" (report-only TONE).
|
|
3
3
|
*
|
|
4
|
-
* CodeLoop
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
4
|
+
* IMPORTANT: audit is NOT a CodeLoop verification mode — it does not change what
|
|
5
|
+
* CodeLoop verifies. CodeLoop ALWAYS runs the full suite (build, tests, launches
|
|
6
|
+
* + DRIVES the app via codeloop_run_journey, screenshots, gate, confidence) and
|
|
7
|
+
* lists every issue it finds, in every mode. Driving the app is verification,
|
|
8
|
+
* not a code modification.
|
|
9
|
+
*
|
|
10
|
+
* "audit" only adjusts the response TONE for the one case where a user says
|
|
11
|
+
* "do NOT modify my code, just run the checks and list every problem you find."
|
|
12
|
+
* Then CodeLoop still verifies everything and produces the full findings list,
|
|
13
|
+
* but its prose does not COMMAND the agent to edit files or enter the auto-fix
|
|
14
|
+
* loop — the agent presents the report and lets the user decide what to fix.
|
|
15
|
+
* The agent's source-code edits are paused; nothing about CodeLoop's checks is.
|
|
10
16
|
*
|
|
11
17
|
* Mode resolution precedence (highest first):
|
|
12
18
|
* 1. explicit `mode` param on the current tool call
|
|
@@ -29,7 +35,7 @@
|
|
|
29
35
|
* explicit config.agent_mode:"audit" is a deliberate user opt-in and is honored
|
|
30
36
|
* with no TTL.
|
|
31
37
|
*/
|
|
32
|
-
import { existsSync, readFileSync, writeFileSync, mkdirSync } from "fs";
|
|
38
|
+
import { existsSync, readFileSync, writeFileSync, mkdirSync, unlinkSync } from "fs";
|
|
33
39
|
import { join, dirname } from "path";
|
|
34
40
|
const MODE_FILE_REL = join(".codeloop", "agent_mode.json");
|
|
35
41
|
/**
|
|
@@ -54,6 +60,17 @@ export function normalizeAgentMode(value) {
|
|
|
54
60
|
}
|
|
55
61
|
return undefined;
|
|
56
62
|
}
|
|
63
|
+
/** Delete the persisted mode file. Best-effort — used to clear stale audit. */
|
|
64
|
+
export function clearPersistedAgentMode(cwd) {
|
|
65
|
+
try {
|
|
66
|
+
const p = join(cwd, MODE_FILE_REL);
|
|
67
|
+
if (existsSync(p))
|
|
68
|
+
unlinkSync(p);
|
|
69
|
+
}
|
|
70
|
+
catch {
|
|
71
|
+
/* best-effort */
|
|
72
|
+
}
|
|
73
|
+
}
|
|
57
74
|
export function readPersistedAgentMode(cwd, now = Date.now()) {
|
|
58
75
|
try {
|
|
59
76
|
const p = join(cwd, MODE_FILE_REL);
|
|
@@ -63,10 +80,17 @@ export function readPersistedAgentMode(cwd, now = Date.now()) {
|
|
|
63
80
|
const mode = normalizeAgentMode(data.mode);
|
|
64
81
|
// Persisted audit is OPT-IN and time-boxed: ignore it once stale so CodeLoop
|
|
65
82
|
// reverts to the active fix default unless the user keeps requesting audit.
|
|
83
|
+
// Crucially we also DELETE the stale file: a leftover agent_mode.json that
|
|
84
|
+
// still says "audit" is exactly what leaked across sessions — the agent saw
|
|
85
|
+
// it in git status, inferred "the project is in audit mode", and re-asserted
|
|
86
|
+
// mode:"audit" on the next call (bypassing this TTL entirely). Removing the
|
|
87
|
+
// artifact means a new, neutral request can't be silently turned read-only.
|
|
66
88
|
if (mode === "audit") {
|
|
67
89
|
const setAt = data.set_at ? Date.parse(data.set_at) : NaN;
|
|
68
|
-
if (!Number.isFinite(setAt) || now - setAt > AUDIT_TTL_MS)
|
|
90
|
+
if (!Number.isFinite(setAt) || now - setAt > AUDIT_TTL_MS) {
|
|
91
|
+
clearPersistedAgentMode(cwd);
|
|
69
92
|
return undefined;
|
|
93
|
+
}
|
|
70
94
|
}
|
|
71
95
|
return mode;
|
|
72
96
|
}
|
|
@@ -99,38 +123,50 @@ export function resolveAgentMode(opts) {
|
|
|
99
123
|
"fix");
|
|
100
124
|
}
|
|
101
125
|
/** Shared schema description so every tool's `mode` param reads identically. */
|
|
102
|
-
export const MODE_PARAM_DESCRIPTION = "
|
|
103
|
-
"the standard auto-fix loop where,
|
|
104
|
-
"and modify code, re-verify, and drive
|
|
105
|
-
"ONLY set 'audit' when the user
|
|
106
|
-
"
|
|
107
|
-
"
|
|
108
|
-
"
|
|
109
|
-
"
|
|
110
|
-
"
|
|
111
|
-
"
|
|
112
|
-
"
|
|
126
|
+
export const MODE_PARAM_DESCRIPTION = "Response TONE only — it does NOT change what CodeLoop verifies. LEAVE UNSET " +
|
|
127
|
+
"for the normal behavior: 'fix' (default) = the standard auto-fix loop where, " +
|
|
128
|
+
"after verify/gate, you actively diagnose and modify code, re-verify, and drive " +
|
|
129
|
+
"the gate/confidence up until it passes. ONLY set 'audit' when the user, IN " +
|
|
130
|
+
"THEIR CURRENT MESSAGE, EXPLICITLY asked you to run the checks and LIST the " +
|
|
131
|
+
"problems WITHOUT modifying their code (e.g. 'don't change anything, just tell " +
|
|
132
|
+
"me what's wrong'). Do NOT infer audit on your own. CRITICAL: NEVER pass " +
|
|
133
|
+
"mode:'audit' merely because a .codeloop/agent_mode.json file exists, shows up " +
|
|
134
|
+
"in git status, or contains \"audit\" — that file is a STALE per-session " +
|
|
135
|
+
"artifact from an earlier run, NOT a project setting and NOT the user's current " +
|
|
136
|
+
"intent. Decide the mode SOLELY from what the user asked THIS time; if they just " +
|
|
137
|
+
"said 'verify'/'run CodeLoop' with no read-only request, that is 'fix' (omit " +
|
|
138
|
+
"mode). In EITHER mode CodeLoop runs the FULL suite — build, tests, launching + " +
|
|
139
|
+
"DRIVING the app (codeloop_run_journey), screenshots, gate, confidence — and " +
|
|
140
|
+
"returns every issue; driving the app is verification, not a code edit. 'audit' " +
|
|
141
|
+
"only stops CodeLoop's prose from commanding you to edit files or loop, so you " +
|
|
142
|
+
"present the report and let the user decide. Audit is OPT-IN and time-boxed: it " +
|
|
143
|
+
"auto-expires back to the fix default; pass mode:'fix' (or omit it) the moment " +
|
|
144
|
+
"the user is ready for you to apply fixes again.";
|
|
113
145
|
/**
|
|
114
|
-
* The
|
|
115
|
-
* when audit
|
|
146
|
+
* The report-only directive appended to verify / diagnose / gate_check responses
|
|
147
|
+
* when audit tone is active. It softens ONLY the auto-fix prose — it never tells
|
|
148
|
+
* the agent to skip verification. CodeLoop still runs/expects the full suite
|
|
149
|
+
* (including codeloop_run_journey to launch + drive the app); audit just means
|
|
150
|
+
* "don't edit the user's code yet."
|
|
116
151
|
*/
|
|
117
152
|
export function buildAuditDirective(tool) {
|
|
118
153
|
const head = [
|
|
119
154
|
"",
|
|
120
155
|
"",
|
|
121
|
-
"📋 AUDIT MODE (
|
|
122
|
-
"Do NOT modify any source files
|
|
156
|
+
"📋 AUDIT MODE (report-only) — the user asked you to LIST problems, not fix them YET.",
|
|
157
|
+
"Do NOT modify any source files and do NOT enter the auto-fix loop. This pauses CODE EDITS only — it does NOT pause verification: still run CodeLoop's full read-only checks, including codeloop_run_journey (launch + drive the app + screenshots) and codeloop_gate_check, so your findings list is complete.",
|
|
123
158
|
];
|
|
124
159
|
const tail = {
|
|
125
160
|
verify: [
|
|
126
161
|
"Present the failing checks above to the user as a findings list (file, what failed, why). " +
|
|
127
|
-
"You MAY call codeloop_diagnose
|
|
128
|
-
"After
|
|
162
|
+
"You MAY call codeloop_diagnose for a richer structured breakdown and SHOULD still call codeloop_run_journey to drive the app — both only gather evidence, neither edits code. " +
|
|
163
|
+
"After producing the complete report, STOP and wait for the user to decide what to fix.",
|
|
129
164
|
],
|
|
130
165
|
diagnose: [
|
|
131
166
|
"The repair_tasks above are a DIAGNOSIS for the REPORT, not a TODO list to execute. " +
|
|
132
167
|
"Summarise each issue (severity, root cause, affected files, suggested fix) for the user. " +
|
|
133
|
-
"Do NOT apply the fixes and do NOT
|
|
168
|
+
"Do NOT apply the fixes and do NOT loop on code edits — the user asked you not to modify their code. " +
|
|
169
|
+
"This does NOT exempt you from the read-only VISUAL evidence: if you have not yet driven the app this run, you MUST still call codeloop_run_journey once (it launches + drives the app + screenshots — verification, not a code edit) and then codeloop_gate_check, so the report includes whether the app actually launches and behaves. Only the SOURCE-CODE fixes are paused; the launch/interaction pass is not.",
|
|
134
170
|
],
|
|
135
171
|
gate_check: [
|
|
136
172
|
"Report the gate results and confidence as an AUDIT SUMMARY. " +
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent_mode.js","sourceRoot":"","sources":["../../src/evidence/agent_mode.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"agent_mode.js","sourceRoot":"","sources":["../../src/evidence/agent_mode.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoCG;AACH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AACpF,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAIrC,MAAM,aAAa,GAAG,IAAI,CAAC,WAAW,EAAE,iBAAiB,CAAC,CAAC;AAE3D;;;;;GAKG;AACH,MAAM,CAAC,MAAM,YAAY,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,CAAC,aAAa;AAEzD,8DAA8D;AAC9D,MAAM,UAAU,kBAAkB,CAAC,KAAqB;IACtD,IAAI,CAAC,KAAK;QAAE,OAAO,SAAS,CAAC;IAC7B,MAAM,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;IAC7C,IACE,CAAC,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,WAAW,EAAE,QAAQ,EAAE,aAAa;QACrE,aAAa,EAAE,WAAW,EAAE,WAAW,EAAE,eAAe,EAAE,YAAY;QACtE,QAAQ,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EACjC,CAAC;QACD,OAAO,OAAO,CAAC;IACjB,CAAC;IACD,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,UAAU,EAAE,UAAU,EAAE,SAAS,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;QACnE,OAAO,KAAK,CAAC;IACf,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,+EAA+E;AAC/E,MAAM,UAAU,uBAAuB,CAAC,GAAW;IACjD,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;QACnC,IAAI,UAAU,CAAC,CAAC,CAAC;YAAE,UAAU,CAAC,CAAC,CAAC,CAAC;IACnC,CAAC;IAAC,MAAM,CAAC;QACP,iBAAiB;IACnB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,GAAW,EAAE,MAAc,IAAI,CAAC,GAAG,EAAE;IAC1E,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;QACnC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;YAAE,OAAO,SAAS,CAAC;QACrC,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,EAAE,OAAO,CAAC,CAAuC,CAAC;QACxF,MAAM,IAAI,GAAG,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC3C,6EAA6E;QAC7E,4EAA4E;QAC5E,2EAA2E;QAC3E,4EAA4E;QAC5E,6EAA6E;QAC7E,4EAA4E;QAC5E,4EAA4E;QAC5E,IAAI,IAAI,KAAK,OAAO,EAAE,CAAC;YACrB,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;YAC1D,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,GAAG,GAAG,KAAK,GAAG,YAAY,EAAE,CAAC;gBAC1D,uBAAuB,CAAC,GAAG,CAAC,CAAC;gBAC7B,OAAO,SAAS,CAAC;YACnB,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,GAAW,EAAE,IAAe;IAC3D,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;QACnC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC3C,aAAa,CAAC,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACxF,CAAC;IAAC,MAAM,CAAC;QACP,kEAAkE;IACpE,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAIhC;IACC,MAAM,SAAS,GAAG,kBAAkB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACrD,IAAI,SAAS,EAAE,CAAC;QACd,gBAAgB,CAAC,IAAI,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;QACtC,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,OAAO,CACL,sBAAsB,CAAC,IAAI,CAAC,GAAG,CAAC;QAChC,kBAAkB,CAAC,IAAI,CAAC,UAAU,CAAC;QACnC,KAAK,CACN,CAAC;AACJ,CAAC;AAED,gFAAgF;AAChF,MAAM,CAAC,MAAM,sBAAsB,GACjC,8EAA8E;IAC9E,+EAA+E;IAC/E,iFAAiF;IACjF,6EAA6E;IAC7E,6EAA6E;IAC7E,gFAAgF;IAChF,0EAA0E;IAC1E,gFAAgF;IAChF,0EAA0E;IAC1E,iFAAiF;IACjF,kFAAkF;IAClF,8EAA8E;IAC9E,iFAAiF;IACjF,8EAA8E;IAC9E,iFAAiF;IACjF,gFAAgF;IAChF,iFAAiF;IACjF,gFAAgF;IAChF,iDAAiD,CAAC;AAEpD;;;;;;GAMG;AACH,MAAM,UAAU,mBAAmB,CAAC,IAA0C;IAC5E,MAAM,IAAI,GAAG;QACX,EAAE;QACF,EAAE;QACF,sFAAsF;QACtF,gTAAgT;KACjT,CAAC;IACF,MAAM,IAAI,GAAkC;QAC1C,MAAM,EAAE;YACN,4FAA4F;gBAC1F,gLAAgL;gBAChL,wFAAwF;SAC3F;QACD,QAAQ,EAAE;YACR,qFAAqF;gBACnF,2FAA2F;gBAC3F,sGAAsG;gBACtG,qZAAqZ;SACxZ;QACD,UAAU,EAAE;YACV,8DAA8D;gBAC5D,sFAAsF;gBACtF,+FAA+F;SAClG;KACF,CAAC;IACF,OAAO,CAAC,GAAG,IAAI,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC7C,CAAC"}
|
|
@@ -8,7 +8,7 @@ export interface InteractionEvidence {
|
|
|
8
8
|
/** Screenshots captured during the journey. */
|
|
9
9
|
screenshots: number;
|
|
10
10
|
/** Mobile interaction engine used, when target was mobile. */
|
|
11
|
-
mobile_engine?: "maestro" | "coordinate";
|
|
11
|
+
mobile_engine?: "flutter_driver" | "maestro" | "coordinate";
|
|
12
12
|
/** Maestro flow pass/fail, when applicable. */
|
|
13
13
|
mobile_flows_passed?: number;
|
|
14
14
|
mobile_flows_total?: number;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"interaction_evidence.d.ts","sourceRoot":"","sources":["../../src/evidence/interaction_evidence.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"interaction_evidence.d.ts","sourceRoot":"","sources":["../../src/evidence/interaction_evidence.ts"],"names":[],"mappings":"AAmBA,MAAM,WAAW,mBAAmB;IAClC,YAAY,EAAE,MAAM,CAAC;IACrB,4CAA4C;IAC5C,MAAM,EAAE,aAAa,CAAC;IACtB,MAAM,EAAE,MAAM,CAAC;IACf,8CAA8C;IAC9C,YAAY,EAAE,MAAM,CAAC;IACrB,+CAA+C;IAC/C,WAAW,EAAE,MAAM,CAAC;IACpB,8DAA8D;IAC9D,aAAa,CAAC,EAAE,gBAAgB,GAAG,SAAS,GAAG,YAAY,CAAC;IAC5D,+CAA+C;IAC/C,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAID,0EAA0E;AAC1E,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,EAAE,mBAAmB,GAAG,IAAI,CAMtF;AAED;;;GAGG;AACH,wBAAgB,6BAA6B,CAC3C,gBAAgB,EAAE,MAAM,GACvB;IAAE,QAAQ,EAAE,mBAAmB,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAoB3D;AAED,MAAM,WAAW,oBAAoB;IACnC,wEAAwE;IACxE,eAAe,EAAE;QAAE,QAAQ,EAAE,mBAAmB,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;IAC3E,YAAY,EAAE,OAAO,CAAC;IACtB,yEAAyE;IACzE,wBAAwB,EAAE,OAAO,CAAC;CACnC;AAED,MAAM,WAAW,WAAW;IAC1B,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;;;GAIG;AACH,wBAAgB,+BAA+B,CAAC,KAAK,EAAE,oBAAoB,GAAG,WAAW,CA4BxF"}
|
|
@@ -10,8 +10,9 @@
|
|
|
10
10
|
* happened at all. It is deliberately satisfied by either evidence source so
|
|
11
11
|
* it never regresses teams already using the manual recording flow.
|
|
12
12
|
*
|
|
13
|
-
* Applicable-or-n/a: the caller only adds it for UI projects
|
|
14
|
-
*
|
|
13
|
+
* Applicable-or-n/a: the caller only adds it for UI projects. It applies in
|
|
14
|
+
* EVERY mode — driving the app is verification, not a code edit, so a
|
|
15
|
+
* "don't modify my code" request never makes it n/a.
|
|
15
16
|
*/
|
|
16
17
|
import { existsSync, readFileSync, writeFileSync, readdirSync, statSync } from "fs";
|
|
17
18
|
import { join } from "path";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"interaction_evidence.js","sourceRoot":"","sources":["../../src/evidence/interaction_evidence.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"interaction_evidence.js","sourceRoot":"","sources":["../../src/evidence/interaction_evidence.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AACH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,WAAW,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AACpF,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAkB5B,MAAM,IAAI,GAAG,2BAA2B,CAAC;AAEzC,0EAA0E;AAC1E,MAAM,UAAU,wBAAwB,CAAC,MAAc,EAAE,EAAuB;IAC9E,IAAI,CAAC;QACH,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACjE,CAAC;IAAC,MAAM,CAAC;QACP,iBAAiB;IACnB,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,6BAA6B,CAC3C,gBAAwB;IAExB,IAAI,IAAI,GAA8D,IAAI,CAAC;IAC3E,IAAI,OAAO,GAAa,EAAE,CAAC;IAC3B,IAAI,CAAC;QACH,OAAO,GAAG,WAAW,CAAC,gBAAgB,CAAC,CAAC;IAC1C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;IACD,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,CAAC,GAAG,IAAI,CAAC,gBAAgB,EAAE,CAAC,EAAE,IAAI,CAAC,CAAC;QAC1C,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;YAAE,SAAS;QAC7B,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;YACpC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,EAAE,OAAO,CAAC,CAAwB,CAAC;YAC7E,IAAI,CAAC,IAAI,IAAI,OAAO,GAAG,IAAI,CAAC,OAAO;gBAAE,IAAI,GAAG,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC;QACpE,CAAC;QAAC,MAAM,CAAC;YACP,qBAAqB;QACvB,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAeD;;;;GAIG;AACH,MAAM,UAAU,+BAA+B,CAAC,KAA2B;IACzE,MAAM,EAAE,eAAe,EAAE,YAAY,EAAE,wBAAwB,EAAE,GAAG,KAAK,CAAC;IAC1E,IAAI,eAAe,IAAI,CAAC,YAAY,EAAE,CAAC;QACrC,MAAM,CAAC,GAAG,eAAe,CAAC,QAAQ,CAAC;QACnC,MAAM,MAAM,GAAG,CAAC,CAAC,aAAa;YAC5B,CAAC,CAAC,oBAAoB,CAAC,CAAC,aAAa,GAAG,CAAC,CAAC,kBAAkB,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,mBAAmB,IAAI,CAAC,IAAI,CAAC,CAAC,kBAAkB,UAAU,CAAC,CAAC,CAAC,EAAE,GAAG;YAChJ,CAAC,CAAC,EAAE,CAAC;QACP,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,MAAM,EAAE,8BAA8B,CAAC,CAAC,YAAY,eAAe,CAAC,CAAC,MAAM,GAAG,MAAM,KAAK,CAAC,CAAC,WAAW,iBAAiB;SACxH,CAAC;IACJ,CAAC;IACD,IAAI,wBAAwB,EAAE,CAAC;QAC7B,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,MAAM,EAAE,8FAA8F;SACvG,CAAC;IACJ,CAAC;IACD,IAAI,eAAe,IAAI,YAAY,EAAE,CAAC;QACpC,OAAO;YACL,MAAM,EAAE,KAAK;YACb,MAAM,EAAE,qHAAqH;SAC9H,CAAC;IACJ,CAAC;IACD,OAAO;QACL,MAAM,EAAE,KAAK;QACb,MAAM,EAAE,2IAA2I;KACpJ,CAAC;AACJ,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1801,19 +1801,21 @@ server.tool("codeloop_run_journey", TOOL_BOOTSTRAP + `DEEP-E2E EXECUTOR — laun
|
|
|
1801
1801
|
in ONE hands-free call. This is the autonomous counterpart to the manual
|
|
1802
1802
|
plan→start_recording→interact→stop→replay sequence: CodeLoop does it for you.
|
|
1803
1803
|
|
|
1804
|
+
Runs in EVERY mode — launching + driving the app is verification, not a code edit, so a
|
|
1805
|
+
"don't modify my code" request never disables it (that only pauses the agent's source edits).
|
|
1806
|
+
|
|
1804
1807
|
What it does, in order:
|
|
1805
|
-
1.
|
|
1806
|
-
2.
|
|
1807
|
-
3. READY/LAUNCH per target: web → headed Playwright at e2e.web_url; desktop → launch evidence.target_app;
|
|
1808
|
+
1. Detects the target (browser / desktop / android_emulator / ios_simulator / Flutter) — overridable.
|
|
1809
|
+
2. READY/LAUNCH per target: web → headed Playwright at e2e.web_url; desktop → launch evidence.target_app;
|
|
1808
1810
|
Android/iOS → BOOT the emulator/simulator (reuses one already booted; honors e2e.android_avd /
|
|
1809
1811
|
e2e.ios_device; opt out with e2e.boot_device:false). If a mobile device can't be booted it returns
|
|
1810
1812
|
a copy-paste directive instead of failing.
|
|
1811
|
-
|
|
1812
|
-
|
|
1813
|
-
|
|
1813
|
+
3. Plans the journey (codeloop_plan_user_journey) — entity CRUD arcs + the AI-chatbox arc.
|
|
1814
|
+
4. Starts a background video recording (best-effort).
|
|
1815
|
+
5. DRIVES every deterministic step via the interaction engine (fill known fields, type the AI prompt +
|
|
1814
1816
|
submit + read back / assert a non-empty reply), capturing a screenshot after each step.
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
+
6. Visits EVERY discovered screen (codeloop_discover_screens) and screenshots each.
|
|
1818
|
+
7. Stops the recording and returns a directive to run codeloop_interaction_replay + visual_review +
|
|
1817
1819
|
design_compare + gate_check.
|
|
1818
1820
|
|
|
1819
1821
|
Steps it can't resolve deterministically (ambiguous navigation, raw-coordinate targets, missing
|
|
@@ -1830,7 +1832,6 @@ screens_captured[], screenshots[], unsupported_count, manual_followups[], direct
|
|
|
1830
1832
|
target_type: targetTypeSchema.optional().describe("Override the auto-detected interaction target. Accepts synonyms (web→browser, android→android_emulator, ios→ios_simulator, *_desktop→desktop)."),
|
|
1831
1833
|
web_url: z.string().optional().describe("URL to open for browser targets (e.g. http://localhost:3000). Defaults to e2e.web_url from config. Start your dev server first."),
|
|
1832
1834
|
max_duration_seconds: z.number().int().min(10).max(600).optional().describe("Max video recording length. Default 180s."),
|
|
1833
|
-
mode: z.string().optional().describe(AGENT_MODE_PARAM_DESC),
|
|
1834
1835
|
}, async (params) => {
|
|
1835
1836
|
const result = await withAuth(async () => {
|
|
1836
1837
|
const cwd = resolveCwd(params);
|
|
@@ -1839,7 +1840,6 @@ screens_captured[], screenshots[], unsupported_count, manual_followups[], direct
|
|
|
1839
1840
|
const { runJourney } = await import("./tools/run_journey.js");
|
|
1840
1841
|
return runJourney({
|
|
1841
1842
|
cwd,
|
|
1842
|
-
paramMode: params.mode,
|
|
1843
1843
|
e2e: { ...cfg.e2e, web_url: params.web_url ?? cfg.e2e?.web_url },
|
|
1844
1844
|
targetApp: cfg.evidence?.target_app,
|
|
1845
1845
|
targetType: params.target_type,
|
|
@@ -2685,14 +2685,15 @@ Returns: checklist of completed and pending verification steps.`, {
|
|
|
2685
2685
|
const gateIsPassing = hasGateCheck && latestMeta?.gate_result === "passed";
|
|
2686
2686
|
const gateConfidence = latestMeta?.confidence ?? 0;
|
|
2687
2687
|
// Deep-E2E journey evidence — has codeloop_run_journey driven the app?
|
|
2688
|
-
//
|
|
2688
|
+
// Required for every UI project regardless of mode: driving the app is
|
|
2689
|
+
// verification, not a code edit, so "don't modify my code" never waives it.
|
|
2689
2690
|
let hasJourneyEvidence = false;
|
|
2690
|
-
let workflowAuditMode = false;
|
|
2691
2691
|
try {
|
|
2692
2692
|
const { loadLatestInteractionEvidence } = await import("./evidence/interaction_evidence.js");
|
|
2693
|
-
|
|
2694
|
-
|
|
2695
|
-
|
|
2693
|
+
// baseDir already ends in `/runs` (getArtifactsBaseDir) — pass it as-is,
|
|
2694
|
+
// NOT join(baseDir,"runs") which double-nests and hid the evidence so
|
|
2695
|
+
// step 3b stayed PENDING forever even after run_journey drove the app.
|
|
2696
|
+
hasJourneyEvidence = loadLatestInteractionEvidence(baseDir) != null;
|
|
2696
2697
|
}
|
|
2697
2698
|
catch { /* best-effort */ }
|
|
2698
2699
|
// Interaction coverage: compare interaction_log selectors/URLs against discover_screens
|
|
@@ -2816,20 +2817,18 @@ Returns: checklist of completed and pending verification steps.`, {
|
|
|
2816
2817
|
},
|
|
2817
2818
|
{
|
|
2818
2819
|
step: "3b. Deep-E2E journey (run_journey)",
|
|
2819
|
-
status: !isUIProject
|
|
2820
|
+
status: !isUIProject
|
|
2820
2821
|
? "n/a"
|
|
2821
2822
|
: hasJourneyEvidence || interactionCount > 0
|
|
2822
2823
|
? "done"
|
|
2823
2824
|
: "PENDING",
|
|
2824
2825
|
detail: !isUIProject
|
|
2825
2826
|
? "Not a UI project — deep-E2E journey not required"
|
|
2826
|
-
:
|
|
2827
|
-
? "
|
|
2828
|
-
:
|
|
2829
|
-
? "
|
|
2830
|
-
:
|
|
2831
|
-
? "App driven via the manual codeloop_interact flow."
|
|
2832
|
-
: "App NOT driven yet. Call codeloop_run_journey ONCE — it launches the app / boots the emulator+simulator, types/taps/submits by label (Maestro on mobile), screenshots every screen, and records video. This satisfies the interaction_evidence gate.",
|
|
2827
|
+
: hasJourneyEvidence
|
|
2828
|
+
? "codeloop_run_journey drove the app (interaction_evidence.json present)."
|
|
2829
|
+
: interactionCount > 0
|
|
2830
|
+
? "App driven via the manual codeloop_interact flow."
|
|
2831
|
+
: "App NOT driven yet. Call codeloop_run_journey ONCE — it launches the app / boots the emulator+simulator, types/taps/submits by label (Maestro on mobile), screenshots every screen, and records video. This satisfies the interaction_evidence gate.",
|
|
2833
2832
|
},
|
|
2834
2833
|
{
|
|
2835
2834
|
step: "4. Gate check",
|