codeloop-mcp-server 0.1.78 → 0.1.82
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/evidence/agent_mode.d.ts +13 -3
- package/dist/evidence/agent_mode.d.ts.map +1 -1
- package/dist/evidence/agent_mode.js +63 -22
- package/dist/evidence/agent_mode.js.map +1 -1
- package/dist/evidence/interaction_evidence.d.ts +1 -1
- package/dist/evidence/interaction_evidence.d.ts.map +1 -1
- package/dist/evidence/interaction_evidence.js +3 -2
- package/dist/evidence/interaction_evidence.js.map +1 -1
- package/dist/index.js +22 -23
- package/dist/index.js.map +1 -1
- package/dist/runners/app_launcher.d.ts.map +1 -1
- package/dist/runners/app_launcher.js +148 -8
- package/dist/runners/app_launcher.js.map +1 -1
- package/dist/runners/device_probe.d.ts +32 -0
- package/dist/runners/device_probe.d.ts.map +1 -1
- package/dist/runners/device_probe.js +73 -0
- package/dist/runners/device_probe.js.map +1 -1
- package/dist/runners/flutter_driver.d.ts +37 -0
- package/dist/runners/flutter_driver.d.ts.map +1 -0
- package/dist/runners/flutter_driver.js +242 -0
- package/dist/runners/flutter_driver.js.map +1 -0
- package/dist/runners/journey_to_maestro.d.ts.map +1 -1
- package/dist/runners/journey_to_maestro.js +39 -8
- package/dist/runners/journey_to_maestro.js.map +1 -1
- package/dist/runners/launch_liveness.d.ts +44 -0
- package/dist/runners/launch_liveness.d.ts.map +1 -0
- package/dist/runners/launch_liveness.js +145 -0
- package/dist/runners/launch_liveness.js.map +1 -0
- package/dist/runners/maestro_generator.d.ts +7 -0
- package/dist/runners/maestro_generator.d.ts.map +1 -1
- package/dist/runners/maestro_generator.js +58 -0
- package/dist/runners/maestro_generator.js.map +1 -1
- package/dist/runners/mobile_build_prep.d.ts +66 -0
- package/dist/runners/mobile_build_prep.d.ts.map +1 -0
- package/dist/runners/mobile_build_prep.js +285 -0
- package/dist/runners/mobile_build_prep.js.map +1 -0
- package/dist/tools/gate_check.d.ts +15 -1
- package/dist/tools/gate_check.d.ts.map +1 -1
- package/dist/tools/gate_check.js +18 -11
- package/dist/tools/gate_check.js.map +1 -1
- package/dist/tools/run_journey.d.ts +19 -5
- package/dist/tools/run_journey.d.ts.map +1 -1
- package/dist/tools/run_journey.js +133 -39
- package/dist/tools/run_journey.js.map +1 -1
- package/dist/tools/verify.d.ts.map +1 -1
- package/dist/tools/verify.js +9 -16
- package/dist/tools/verify.js.map +1 -1
- package/package.json +2 -2
|
@@ -1,7 +1,14 @@
|
|
|
1
1
|
export type AgentMode = "fix" | "audit";
|
|
2
|
+
/**
|
|
3
|
+
* How long a PERSISTED audit mode stays in effect after it was last set.
|
|
4
|
+
* Each audit tool call rewrites the file (refreshing this window), so a
|
|
5
|
+
* continuous audit session never lapses; only an idle/abandoned audit does.
|
|
6
|
+
* Kept short enough that a later, unrelated request defaults back to fix.
|
|
7
|
+
*/
|
|
8
|
+
export declare const AUDIT_TTL_MS: number;
|
|
2
9
|
/** Normalize a free-text mode value to the canonical enum. */
|
|
3
10
|
export declare function normalizeAgentMode(value?: string | null): AgentMode | undefined;
|
|
4
|
-
export declare function readPersistedAgentMode(cwd: string): AgentMode | undefined;
|
|
11
|
+
export declare function readPersistedAgentMode(cwd: string, now?: number): AgentMode | undefined;
|
|
5
12
|
export declare function persistAgentMode(cwd: string, mode: AgentMode): void;
|
|
6
13
|
/**
|
|
7
14
|
* Resolve the effective mode for a tool call and persist an explicit param so
|
|
@@ -15,8 +22,11 @@ export declare function resolveAgentMode(opts: {
|
|
|
15
22
|
/** Shared schema description so every tool's `mode` param reads identically. */
|
|
16
23
|
export declare const MODE_PARAM_DESCRIPTION: string;
|
|
17
24
|
/**
|
|
18
|
-
* The
|
|
19
|
-
* when audit
|
|
25
|
+
* The report-only directive appended to verify / diagnose / gate_check responses
|
|
26
|
+
* when audit tone is active. It softens ONLY the auto-fix prose — it never tells
|
|
27
|
+
* the agent to skip verification. CodeLoop still runs/expects the full suite
|
|
28
|
+
* (including codeloop_run_journey to launch + drive the app); audit just means
|
|
29
|
+
* "don't edit the user's code yet."
|
|
20
30
|
*/
|
|
21
31
|
export declare function buildAuditDirective(tool: "verify" | "diagnose" | "gate_check"): string;
|
|
22
32
|
//# sourceMappingURL=agent_mode.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent_mode.d.ts","sourceRoot":"","sources":["../../src/evidence/agent_mode.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"agent_mode.d.ts","sourceRoot":"","sources":["../../src/evidence/agent_mode.ts"],"names":[],"mappings":"AAwCA,MAAM,MAAM,SAAS,GAAG,KAAK,GAAG,OAAO,CAAC;AAIxC;;;;;GAKG;AACH,eAAO,MAAM,YAAY,QAAiB,CAAC;AAE3C,8DAA8D;AAC9D,wBAAgB,kBAAkB,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,GAAG,SAAS,CAc/E;AAED,wBAAgB,sBAAsB,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,GAAE,MAAmB,GAAG,SAAS,GAAG,SAAS,CAgBnG;AAED,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,GAAG,IAAI,CAQnE;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE;IACrC,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,GAAG,SAAS,CAWZ;AAED,gFAAgF;AAChF,eAAO,MAAM,sBAAsB,QAawC,CAAC;AAE5E;;;;;;GAMG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,QAAQ,GAAG,UAAU,GAAG,YAAY,GAAG,MAAM,CAyBtF"}
|
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Agent mode — "fix" (default) vs "audit" (
|
|
2
|
+
* Agent mode — "fix" (default) vs "audit" (report-only TONE).
|
|
3
3
|
*
|
|
4
|
-
* CodeLoop
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
4
|
+
* IMPORTANT: audit is NOT a CodeLoop verification mode — it does not change what
|
|
5
|
+
* CodeLoop verifies. CodeLoop ALWAYS runs the full suite (build, tests, launches
|
|
6
|
+
* + DRIVES the app via codeloop_run_journey, screenshots, gate, confidence) and
|
|
7
|
+
* lists every issue it finds, in every mode. Driving the app is verification,
|
|
8
|
+
* not a code modification.
|
|
9
|
+
*
|
|
10
|
+
* "audit" only adjusts the response TONE for the one case where a user says
|
|
11
|
+
* "do NOT modify my code, just run the checks and list every problem you find."
|
|
12
|
+
* Then CodeLoop still verifies everything and produces the full findings list,
|
|
13
|
+
* but its prose does not COMMAND the agent to edit files or enter the auto-fix
|
|
14
|
+
* loop — the agent presents the report and lets the user decide what to fix.
|
|
15
|
+
* The agent's source-code edits are paused; nothing about CodeLoop's checks is.
|
|
10
16
|
*
|
|
11
17
|
* Mode resolution precedence (highest first):
|
|
12
18
|
* 1. explicit `mode` param on the current tool call
|
|
@@ -17,10 +23,28 @@
|
|
|
17
23
|
* When a tool receives an explicit `mode`, it persists it so the rest of the
|
|
18
24
|
* session inherits the same mode without the agent having to repeat it on
|
|
19
25
|
* every call.
|
|
26
|
+
*
|
|
27
|
+
* READ-ONLY IS OPT-IN, NOT STICKY. audit must only apply when the user has
|
|
28
|
+
* actually asked for it ("don't modify my code, just list the problems").
|
|
29
|
+
* Persisted audit therefore EXPIRES: it is honored only for AUDIT_TTL_MS after
|
|
30
|
+
* it was last set, and every audit tool call refreshes that timestamp. The
|
|
31
|
+
* moment the agent stops passing `mode:"audit"` (because the user is no longer
|
|
32
|
+
* restricting edits), the persisted audit goes stale and CodeLoop reverts to
|
|
33
|
+
* the active fix default — verifying, checking the gate/confidence, and pushing
|
|
34
|
+
* the auto-fix loop. Persisted "fix" never expires (it IS the default). An
|
|
35
|
+
* explicit config.agent_mode:"audit" is a deliberate user opt-in and is honored
|
|
36
|
+
* with no TTL.
|
|
20
37
|
*/
|
|
21
38
|
import { existsSync, readFileSync, writeFileSync, mkdirSync } from "fs";
|
|
22
39
|
import { join, dirname } from "path";
|
|
23
40
|
const MODE_FILE_REL = join(".codeloop", "agent_mode.json");
|
|
41
|
+
/**
|
|
42
|
+
* How long a PERSISTED audit mode stays in effect after it was last set.
|
|
43
|
+
* Each audit tool call rewrites the file (refreshing this window), so a
|
|
44
|
+
* continuous audit session never lapses; only an idle/abandoned audit does.
|
|
45
|
+
* Kept short enough that a later, unrelated request defaults back to fix.
|
|
46
|
+
*/
|
|
47
|
+
export const AUDIT_TTL_MS = 30 * 60 * 1000; // 30 minutes
|
|
24
48
|
/** Normalize a free-text mode value to the canonical enum. */
|
|
25
49
|
export function normalizeAgentMode(value) {
|
|
26
50
|
if (!value)
|
|
@@ -36,13 +60,21 @@ export function normalizeAgentMode(value) {
|
|
|
36
60
|
}
|
|
37
61
|
return undefined;
|
|
38
62
|
}
|
|
39
|
-
export function readPersistedAgentMode(cwd) {
|
|
63
|
+
export function readPersistedAgentMode(cwd, now = Date.now()) {
|
|
40
64
|
try {
|
|
41
65
|
const p = join(cwd, MODE_FILE_REL);
|
|
42
66
|
if (!existsSync(p))
|
|
43
67
|
return undefined;
|
|
44
68
|
const data = JSON.parse(readFileSync(p, "utf-8"));
|
|
45
|
-
|
|
69
|
+
const mode = normalizeAgentMode(data.mode);
|
|
70
|
+
// Persisted audit is OPT-IN and time-boxed: ignore it once stale so CodeLoop
|
|
71
|
+
// reverts to the active fix default unless the user keeps requesting audit.
|
|
72
|
+
if (mode === "audit") {
|
|
73
|
+
const setAt = data.set_at ? Date.parse(data.set_at) : NaN;
|
|
74
|
+
if (!Number.isFinite(setAt) || now - setAt > AUDIT_TTL_MS)
|
|
75
|
+
return undefined;
|
|
76
|
+
}
|
|
77
|
+
return mode;
|
|
46
78
|
}
|
|
47
79
|
catch {
|
|
48
80
|
return undefined;
|
|
@@ -73,29 +105,38 @@ export function resolveAgentMode(opts) {
|
|
|
73
105
|
"fix");
|
|
74
106
|
}
|
|
75
107
|
/** Shared schema description so every tool's `mode` param reads identically. */
|
|
76
|
-
export const MODE_PARAM_DESCRIPTION = "
|
|
77
|
-
"
|
|
78
|
-
"
|
|
79
|
-
"
|
|
80
|
-
"
|
|
81
|
-
"
|
|
82
|
-
"
|
|
108
|
+
export const MODE_PARAM_DESCRIPTION = "Response TONE only — it does NOT change what CodeLoop verifies. LEAVE UNSET " +
|
|
109
|
+
"for the normal behavior: 'fix' (default) = the standard auto-fix loop where, " +
|
|
110
|
+
"after verify/gate, you actively diagnose and modify code, re-verify, and drive " +
|
|
111
|
+
"the gate/confidence up until it passes. ONLY set 'audit' when the user " +
|
|
112
|
+
"EXPLICITLY asked you to run the checks and LIST the problems WITHOUT modifying " +
|
|
113
|
+
"their code (e.g. 'don't change anything, just tell me what's wrong'). Do NOT " +
|
|
114
|
+
"infer audit on your own. In EITHER mode CodeLoop runs the FULL suite — build, " +
|
|
115
|
+
"tests, launching + DRIVING the app (codeloop_run_journey), screenshots, gate, " +
|
|
116
|
+
"confidence — and returns every issue; driving the app is verification, not a " +
|
|
117
|
+
"code edit. 'audit' only stops CodeLoop's prose from commanding you to edit " +
|
|
118
|
+
"files or loop, so you present the report and let the user decide. Audit is " +
|
|
119
|
+
"OPT-IN and time-boxed: it auto-expires back to the fix default; pass mode:'fix' " +
|
|
120
|
+
"(or omit it) the moment the user is ready for you to apply fixes again.";
|
|
83
121
|
/**
|
|
84
|
-
* The
|
|
85
|
-
* when audit
|
|
122
|
+
* The report-only directive appended to verify / diagnose / gate_check responses
|
|
123
|
+
* when audit tone is active. It softens ONLY the auto-fix prose — it never tells
|
|
124
|
+
* the agent to skip verification. CodeLoop still runs/expects the full suite
|
|
125
|
+
* (including codeloop_run_journey to launch + drive the app); audit just means
|
|
126
|
+
* "don't edit the user's code yet."
|
|
86
127
|
*/
|
|
87
128
|
export function buildAuditDirective(tool) {
|
|
88
129
|
const head = [
|
|
89
130
|
"",
|
|
90
131
|
"",
|
|
91
|
-
"📋 AUDIT MODE (
|
|
92
|
-
"Do NOT modify any source files
|
|
132
|
+
"📋 AUDIT MODE (report-only) — the user asked you to LIST problems, not fix them YET.",
|
|
133
|
+
"Do NOT modify any source files and do NOT enter the auto-fix loop. This pauses CODE EDITS only — it does NOT pause verification: still run CodeLoop's full read-only checks, including codeloop_run_journey (launch + drive the app + screenshots) and codeloop_gate_check, so your findings list is complete.",
|
|
93
134
|
];
|
|
94
135
|
const tail = {
|
|
95
136
|
verify: [
|
|
96
137
|
"Present the failing checks above to the user as a findings list (file, what failed, why). " +
|
|
97
|
-
"You MAY call codeloop_diagnose
|
|
98
|
-
"After
|
|
138
|
+
"You MAY call codeloop_diagnose for a richer structured breakdown and SHOULD still call codeloop_run_journey to drive the app — both only gather evidence, neither edits code. " +
|
|
139
|
+
"After producing the complete report, STOP and wait for the user to decide what to fix.",
|
|
99
140
|
],
|
|
100
141
|
diagnose: [
|
|
101
142
|
"The repair_tasks above are a DIAGNOSIS for the REPORT, not a TODO list to execute. " +
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent_mode.js","sourceRoot":"","sources":["../../src/evidence/agent_mode.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"agent_mode.js","sourceRoot":"","sources":["../../src/evidence/agent_mode.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoCG;AACH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AACxE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAIrC,MAAM,aAAa,GAAG,IAAI,CAAC,WAAW,EAAE,iBAAiB,CAAC,CAAC;AAE3D;;;;;GAKG;AACH,MAAM,CAAC,MAAM,YAAY,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,CAAC,aAAa;AAEzD,8DAA8D;AAC9D,MAAM,UAAU,kBAAkB,CAAC,KAAqB;IACtD,IAAI,CAAC,KAAK;QAAE,OAAO,SAAS,CAAC;IAC7B,MAAM,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;IAC7C,IACE,CAAC,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,WAAW,EAAE,QAAQ,EAAE,aAAa;QACrE,aAAa,EAAE,WAAW,EAAE,WAAW,EAAE,eAAe,EAAE,YAAY;QACtE,QAAQ,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EACjC,CAAC;QACD,OAAO,OAAO,CAAC;IACjB,CAAC;IACD,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,UAAU,EAAE,UAAU,EAAE,SAAS,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;QACnE,OAAO,KAAK,CAAC;IACf,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,GAAW,EAAE,MAAc,IAAI,CAAC,GAAG,EAAE;IAC1E,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;QACnC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;YAAE,OAAO,SAAS,CAAC;QACrC,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,EAAE,OAAO,CAAC,CAAuC,CAAC;QACxF,MAAM,IAAI,GAAG,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC3C,6EAA6E;QAC7E,4EAA4E;QAC5E,IAAI,IAAI,KAAK,OAAO,EAAE,CAAC;YACrB,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;YAC1D,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,GAAG,GAAG,KAAK,GAAG,YAAY;gBAAE,OAAO,SAAS,CAAC;QAC9E,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,GAAW,EAAE,IAAe;IAC3D,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;QACnC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC3C,aAAa,CAAC,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACxF,CAAC;IAAC,MAAM,CAAC;QACP,kEAAkE;IACpE,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAIhC;IACC,MAAM,SAAS,GAAG,kBAAkB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACrD,IAAI,SAAS,EAAE,CAAC;QACd,gBAAgB,CAAC,IAAI,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;QACtC,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,OAAO,CACL,sBAAsB,CAAC,IAAI,CAAC,GAAG,CAAC;QAChC,kBAAkB,CAAC,IAAI,CAAC,UAAU,CAAC;QACnC,KAAK,CACN,CAAC;AACJ,CAAC;AAED,gFAAgF;AAChF,MAAM,CAAC,MAAM,sBAAsB,GACjC,8EAA8E;IAC9E,+EAA+E;IAC/E,iFAAiF;IACjF,yEAAyE;IACzE,iFAAiF;IACjF,+EAA+E;IAC/E,gFAAgF;IAChF,gFAAgF;IAChF,+EAA+E;IAC/E,6EAA6E;IAC7E,6EAA6E;IAC7E,kFAAkF;IAClF,yEAAyE,CAAC;AAE5E;;;;;;GAMG;AACH,MAAM,UAAU,mBAAmB,CAAC,IAA0C;IAC5E,MAAM,IAAI,GAAG;QACX,EAAE;QACF,EAAE;QACF,sFAAsF;QACtF,gTAAgT;KACjT,CAAC;IACF,MAAM,IAAI,GAAkC;QAC1C,MAAM,EAAE;YACN,4FAA4F;gBAC1F,gLAAgL;gBAChL,wFAAwF;SAC3F;QACD,QAAQ,EAAE;YACR,qFAAqF;gBACnF,2FAA2F;gBAC3F,sGAAsG;SACzG;QACD,UAAU,EAAE;YACV,8DAA8D;gBAC5D,sFAAsF;gBACtF,+FAA+F;SAClG;KACF,CAAC;IACF,OAAO,CAAC,GAAG,IAAI,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC7C,CAAC"}
|
|
@@ -8,7 +8,7 @@ export interface InteractionEvidence {
|
|
|
8
8
|
/** Screenshots captured during the journey. */
|
|
9
9
|
screenshots: number;
|
|
10
10
|
/** Mobile interaction engine used, when target was mobile. */
|
|
11
|
-
mobile_engine?: "maestro" | "coordinate";
|
|
11
|
+
mobile_engine?: "flutter_driver" | "maestro" | "coordinate";
|
|
12
12
|
/** Maestro flow pass/fail, when applicable. */
|
|
13
13
|
mobile_flows_passed?: number;
|
|
14
14
|
mobile_flows_total?: number;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"interaction_evidence.d.ts","sourceRoot":"","sources":["../../src/evidence/interaction_evidence.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"interaction_evidence.d.ts","sourceRoot":"","sources":["../../src/evidence/interaction_evidence.ts"],"names":[],"mappings":"AAmBA,MAAM,WAAW,mBAAmB;IAClC,YAAY,EAAE,MAAM,CAAC;IACrB,4CAA4C;IAC5C,MAAM,EAAE,aAAa,CAAC;IACtB,MAAM,EAAE,MAAM,CAAC;IACf,8CAA8C;IAC9C,YAAY,EAAE,MAAM,CAAC;IACrB,+CAA+C;IAC/C,WAAW,EAAE,MAAM,CAAC;IACpB,8DAA8D;IAC9D,aAAa,CAAC,EAAE,gBAAgB,GAAG,SAAS,GAAG,YAAY,CAAC;IAC5D,+CAA+C;IAC/C,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAID,0EAA0E;AAC1E,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,EAAE,mBAAmB,GAAG,IAAI,CAMtF;AAED;;;GAGG;AACH,wBAAgB,6BAA6B,CAC3C,gBAAgB,EAAE,MAAM,GACvB;IAAE,QAAQ,EAAE,mBAAmB,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAoB3D;AAED,MAAM,WAAW,oBAAoB;IACnC,wEAAwE;IACxE,eAAe,EAAE;QAAE,QAAQ,EAAE,mBAAmB,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;IAC3E,YAAY,EAAE,OAAO,CAAC;IACtB,yEAAyE;IACzE,wBAAwB,EAAE,OAAO,CAAC;CACnC;AAED,MAAM,WAAW,WAAW;IAC1B,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;;;GAIG;AACH,wBAAgB,+BAA+B,CAAC,KAAK,EAAE,oBAAoB,GAAG,WAAW,CA4BxF"}
|
|
@@ -10,8 +10,9 @@
|
|
|
10
10
|
* happened at all. It is deliberately satisfied by either evidence source so
|
|
11
11
|
* it never regresses teams already using the manual recording flow.
|
|
12
12
|
*
|
|
13
|
-
* Applicable-or-n/a: the caller only adds it for UI projects
|
|
14
|
-
*
|
|
13
|
+
* Applicable-or-n/a: the caller only adds it for UI projects. It applies in
|
|
14
|
+
* EVERY mode — driving the app is verification, not a code edit, so a
|
|
15
|
+
* "don't modify my code" request never makes it n/a.
|
|
15
16
|
*/
|
|
16
17
|
import { existsSync, readFileSync, writeFileSync, readdirSync, statSync } from "fs";
|
|
17
18
|
import { join } from "path";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"interaction_evidence.js","sourceRoot":"","sources":["../../src/evidence/interaction_evidence.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"interaction_evidence.js","sourceRoot":"","sources":["../../src/evidence/interaction_evidence.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AACH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,WAAW,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AACpF,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAkB5B,MAAM,IAAI,GAAG,2BAA2B,CAAC;AAEzC,0EAA0E;AAC1E,MAAM,UAAU,wBAAwB,CAAC,MAAc,EAAE,EAAuB;IAC9E,IAAI,CAAC;QACH,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACjE,CAAC;IAAC,MAAM,CAAC;QACP,iBAAiB;IACnB,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,6BAA6B,CAC3C,gBAAwB;IAExB,IAAI,IAAI,GAA8D,IAAI,CAAC;IAC3E,IAAI,OAAO,GAAa,EAAE,CAAC;IAC3B,IAAI,CAAC;QACH,OAAO,GAAG,WAAW,CAAC,gBAAgB,CAAC,CAAC;IAC1C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;IACD,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,CAAC,GAAG,IAAI,CAAC,gBAAgB,EAAE,CAAC,EAAE,IAAI,CAAC,CAAC;QAC1C,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;YAAE,SAAS;QAC7B,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;YACpC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,EAAE,OAAO,CAAC,CAAwB,CAAC;YAC7E,IAAI,CAAC,IAAI,IAAI,OAAO,GAAG,IAAI,CAAC,OAAO;gBAAE,IAAI,GAAG,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC;QACpE,CAAC;QAAC,MAAM,CAAC;YACP,qBAAqB;QACvB,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAeD;;;;GAIG;AACH,MAAM,UAAU,+BAA+B,CAAC,KAA2B;IACzE,MAAM,EAAE,eAAe,EAAE,YAAY,EAAE,wBAAwB,EAAE,GAAG,KAAK,CAAC;IAC1E,IAAI,eAAe,IAAI,CAAC,YAAY,EAAE,CAAC;QACrC,MAAM,CAAC,GAAG,eAAe,CAAC,QAAQ,CAAC;QACnC,MAAM,MAAM,GAAG,CAAC,CAAC,aAAa;YAC5B,CAAC,CAAC,oBAAoB,CAAC,CAAC,aAAa,GAAG,CAAC,CAAC,kBAAkB,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,mBAAmB,IAAI,CAAC,IAAI,CAAC,CAAC,kBAAkB,UAAU,CAAC,CAAC,CAAC,EAAE,GAAG;YAChJ,CAAC,CAAC,EAAE,CAAC;QACP,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,MAAM,EAAE,8BAA8B,CAAC,CAAC,YAAY,eAAe,CAAC,CAAC,MAAM,GAAG,MAAM,KAAK,CAAC,CAAC,WAAW,iBAAiB;SACxH,CAAC;IACJ,CAAC;IACD,IAAI,wBAAwB,EAAE,CAAC;QAC7B,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,MAAM,EAAE,8FAA8F;SACvG,CAAC;IACJ,CAAC;IACD,IAAI,eAAe,IAAI,YAAY,EAAE,CAAC;QACpC,OAAO;YACL,MAAM,EAAE,KAAK;YACb,MAAM,EAAE,qHAAqH;SAC9H,CAAC;IACJ,CAAC;IACD,OAAO;QACL,MAAM,EAAE,KAAK;QACb,MAAM,EAAE,2IAA2I;KACpJ,CAAC;AACJ,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1801,19 +1801,21 @@ server.tool("codeloop_run_journey", TOOL_BOOTSTRAP + `DEEP-E2E EXECUTOR — laun
|
|
|
1801
1801
|
in ONE hands-free call. This is the autonomous counterpart to the manual
|
|
1802
1802
|
plan→start_recording→interact→stop→replay sequence: CodeLoop does it for you.
|
|
1803
1803
|
|
|
1804
|
+
Runs in EVERY mode — launching + driving the app is verification, not a code edit, so a
|
|
1805
|
+
"don't modify my code" request never disables it (that only pauses the agent's source edits).
|
|
1806
|
+
|
|
1804
1807
|
What it does, in order:
|
|
1805
|
-
1.
|
|
1806
|
-
2.
|
|
1807
|
-
3. READY/LAUNCH per target: web → headed Playwright at e2e.web_url; desktop → launch evidence.target_app;
|
|
1808
|
+
1. Detects the target (browser / desktop / android_emulator / ios_simulator / Flutter) — overridable.
|
|
1809
|
+
2. READY/LAUNCH per target: web → headed Playwright at e2e.web_url; desktop → launch evidence.target_app;
|
|
1808
1810
|
Android/iOS → BOOT the emulator/simulator (reuses one already booted; honors e2e.android_avd /
|
|
1809
1811
|
e2e.ios_device; opt out with e2e.boot_device:false). If a mobile device can't be booted it returns
|
|
1810
1812
|
a copy-paste directive instead of failing.
|
|
1811
|
-
|
|
1812
|
-
|
|
1813
|
-
|
|
1813
|
+
3. Plans the journey (codeloop_plan_user_journey) — entity CRUD arcs + the AI-chatbox arc.
|
|
1814
|
+
4. Starts a background video recording (best-effort).
|
|
1815
|
+
5. DRIVES every deterministic step via the interaction engine (fill known fields, type the AI prompt +
|
|
1814
1816
|
submit + read back / assert a non-empty reply), capturing a screenshot after each step.
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
+
6. Visits EVERY discovered screen (codeloop_discover_screens) and screenshots each.
|
|
1818
|
+
7. Stops the recording and returns a directive to run codeloop_interaction_replay + visual_review +
|
|
1817
1819
|
design_compare + gate_check.
|
|
1818
1820
|
|
|
1819
1821
|
Steps it can't resolve deterministically (ambiguous navigation, raw-coordinate targets, missing
|
|
@@ -1830,7 +1832,6 @@ screens_captured[], screenshots[], unsupported_count, manual_followups[], direct
|
|
|
1830
1832
|
target_type: targetTypeSchema.optional().describe("Override the auto-detected interaction target. Accepts synonyms (web→browser, android→android_emulator, ios→ios_simulator, *_desktop→desktop)."),
|
|
1831
1833
|
web_url: z.string().optional().describe("URL to open for browser targets (e.g. http://localhost:3000). Defaults to e2e.web_url from config. Start your dev server first."),
|
|
1832
1834
|
max_duration_seconds: z.number().int().min(10).max(600).optional().describe("Max video recording length. Default 180s."),
|
|
1833
|
-
mode: z.string().optional().describe(AGENT_MODE_PARAM_DESC),
|
|
1834
1835
|
}, async (params) => {
|
|
1835
1836
|
const result = await withAuth(async () => {
|
|
1836
1837
|
const cwd = resolveCwd(params);
|
|
@@ -1839,7 +1840,6 @@ screens_captured[], screenshots[], unsupported_count, manual_followups[], direct
|
|
|
1839
1840
|
const { runJourney } = await import("./tools/run_journey.js");
|
|
1840
1841
|
return runJourney({
|
|
1841
1842
|
cwd,
|
|
1842
|
-
paramMode: params.mode,
|
|
1843
1843
|
e2e: { ...cfg.e2e, web_url: params.web_url ?? cfg.e2e?.web_url },
|
|
1844
1844
|
targetApp: cfg.evidence?.target_app,
|
|
1845
1845
|
targetType: params.target_type,
|
|
@@ -2685,14 +2685,15 @@ Returns: checklist of completed and pending verification steps.`, {
|
|
|
2685
2685
|
const gateIsPassing = hasGateCheck && latestMeta?.gate_result === "passed";
|
|
2686
2686
|
const gateConfidence = latestMeta?.confidence ?? 0;
|
|
2687
2687
|
// Deep-E2E journey evidence — has codeloop_run_journey driven the app?
|
|
2688
|
-
//
|
|
2688
|
+
// Required for every UI project regardless of mode: driving the app is
|
|
2689
|
+
// verification, not a code edit, so "don't modify my code" never waives it.
|
|
2689
2690
|
let hasJourneyEvidence = false;
|
|
2690
|
-
let workflowAuditMode = false;
|
|
2691
2691
|
try {
|
|
2692
2692
|
const { loadLatestInteractionEvidence } = await import("./evidence/interaction_evidence.js");
|
|
2693
|
-
|
|
2694
|
-
|
|
2695
|
-
|
|
2693
|
+
// baseDir already ends in `/runs` (getArtifactsBaseDir) — pass it as-is,
|
|
2694
|
+
// NOT join(baseDir,"runs") which double-nests and hid the evidence so
|
|
2695
|
+
// step 3b stayed PENDING forever even after run_journey drove the app.
|
|
2696
|
+
hasJourneyEvidence = loadLatestInteractionEvidence(baseDir) != null;
|
|
2696
2697
|
}
|
|
2697
2698
|
catch { /* best-effort */ }
|
|
2698
2699
|
// Interaction coverage: compare interaction_log selectors/URLs against discover_screens
|
|
@@ -2816,20 +2817,18 @@ Returns: checklist of completed and pending verification steps.`, {
|
|
|
2816
2817
|
},
|
|
2817
2818
|
{
|
|
2818
2819
|
step: "3b. Deep-E2E journey (run_journey)",
|
|
2819
|
-
status: !isUIProject
|
|
2820
|
+
status: !isUIProject
|
|
2820
2821
|
? "n/a"
|
|
2821
2822
|
: hasJourneyEvidence || interactionCount > 0
|
|
2822
2823
|
? "done"
|
|
2823
2824
|
: "PENDING",
|
|
2824
2825
|
detail: !isUIProject
|
|
2825
2826
|
? "Not a UI project — deep-E2E journey not required"
|
|
2826
|
-
:
|
|
2827
|
-
? "
|
|
2828
|
-
:
|
|
2829
|
-
? "
|
|
2830
|
-
:
|
|
2831
|
-
? "App driven via the manual codeloop_interact flow."
|
|
2832
|
-
: "App NOT driven yet. Call codeloop_run_journey ONCE — it launches the app / boots the emulator+simulator, types/taps/submits by label (Maestro on mobile), screenshots every screen, and records video. This satisfies the interaction_evidence gate.",
|
|
2827
|
+
: hasJourneyEvidence
|
|
2828
|
+
? "codeloop_run_journey drove the app (interaction_evidence.json present)."
|
|
2829
|
+
: interactionCount > 0
|
|
2830
|
+
? "App driven via the manual codeloop_interact flow."
|
|
2831
|
+
: "App NOT driven yet. Call codeloop_run_journey ONCE — it launches the app / boots the emulator+simulator, types/taps/submits by label (Maestro on mobile), screenshots every screen, and records video. This satisfies the interaction_evidence gate.",
|
|
2833
2832
|
},
|
|
2834
2833
|
{
|
|
2835
2834
|
step: "4. Gate check",
|