codeloop-mcp-server 0.1.76 → 0.1.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/evidence/interaction_evidence.d.ts +46 -0
- package/dist/evidence/interaction_evidence.d.ts.map +1 -0
- package/dist/evidence/interaction_evidence.js +91 -0
- package/dist/evidence/interaction_evidence.js.map +1 -0
- package/dist/index.js +28 -0
- package/dist/index.js.map +1 -1
- package/dist/runners/app_launcher.d.ts +6 -0
- package/dist/runners/app_launcher.d.ts.map +1 -1
- package/dist/runners/app_launcher.js +146 -18
- package/dist/runners/app_launcher.js.map +1 -1
- package/dist/runners/cli_auth.d.ts +1 -1
- package/dist/runners/cli_auth.d.ts.map +1 -1
- package/dist/runners/cli_auth.js +6 -3
- package/dist/runners/cli_auth.js.map +1 -1
- package/dist/runners/journey_to_maestro.d.ts +31 -0
- package/dist/runners/journey_to_maestro.d.ts.map +1 -0
- package/dist/runners/journey_to_maestro.js +101 -0
- package/dist/runners/journey_to_maestro.js.map +1 -0
- package/dist/runners/maestro_generator.d.ts +5 -1
- package/dist/runners/maestro_generator.d.ts.map +1 -1
- package/dist/runners/maestro_generator.js +12 -4
- package/dist/runners/maestro_generator.js.map +1 -1
- package/dist/runners/mobile_app_id.d.ts +7 -0
- package/dist/runners/mobile_app_id.d.ts.map +1 -0
- package/dist/runners/mobile_app_id.js +100 -0
- package/dist/runners/mobile_app_id.js.map +1 -0
- package/dist/tools/gate_check.d.ts.map +1 -1
- package/dist/tools/gate_check.js +29 -0
- package/dist/tools/gate_check.js.map +1 -1
- package/dist/tools/run_journey.d.ts +14 -0
- package/dist/tools/run_journey.d.ts.map +1 -1
- package/dist/tools/run_journey.js +132 -4
- package/dist/tools/run_journey.js.map +1 -1
- package/dist/tools/verify.d.ts.map +1 -1
- package/dist/tools/verify.js +16 -4
- package/dist/tools/verify.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
export interface InteractionEvidence {
|
|
2
|
+
generated_at: string;
|
|
3
|
+
/** What drove it: the deep-E2E executor. */
|
|
4
|
+
source: "run_journey";
|
|
5
|
+
target: string;
|
|
6
|
+
/** Deterministic steps the executor drove. */
|
|
7
|
+
driven_steps: number;
|
|
8
|
+
/** Screenshots captured during the journey. */
|
|
9
|
+
screenshots: number;
|
|
10
|
+
/** Mobile interaction engine used, when target was mobile. */
|
|
11
|
+
mobile_engine?: "maestro" | "coordinate";
|
|
12
|
+
/** Maestro flow pass/fail, when applicable. */
|
|
13
|
+
mobile_flows_passed?: number;
|
|
14
|
+
mobile_flows_total?: number;
|
|
15
|
+
}
|
|
16
|
+
/** Persist a run_journey interaction-evidence record into its run dir. */
|
|
17
|
+
export declare function writeInteractionEvidence(runDir: string, ev: InteractionEvidence): void;
|
|
18
|
+
/**
|
|
19
|
+
* Find the newest run_journey interaction_evidence.json across the project's
|
|
20
|
+
* artifacts/runs/* dirs. Returns the record + its mtime (ms) or null.
|
|
21
|
+
*/
|
|
22
|
+
export declare function loadLatestInteractionEvidence(artifactsRunsDir: string): {
|
|
23
|
+
evidence: InteractionEvidence;
|
|
24
|
+
mtimeMs: number;
|
|
25
|
+
} | null;
|
|
26
|
+
export interface InteractionGateInput {
|
|
27
|
+
/** A fresh run_journey artifact exists (and is not stale vs source). */
|
|
28
|
+
journeyEvidence: {
|
|
29
|
+
evidence: InteractionEvidence;
|
|
30
|
+
mtimeMs: number;
|
|
31
|
+
} | null;
|
|
32
|
+
journeyStale: boolean;
|
|
33
|
+
/** The manual recording flow produced interaction logs at some point. */
|
|
34
|
+
hasManualInteractionLogs: boolean;
|
|
35
|
+
}
|
|
36
|
+
export interface GateVerdict {
|
|
37
|
+
passed: boolean;
|
|
38
|
+
reason: string;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Verdict for the interaction_evidence gate. Passes when the app was driven by
|
|
42
|
+
* EITHER source; fails only when a UI project shows NO interactive testing at
|
|
43
|
+
* all, steering the agent to the one-call codeloop_run_journey path.
|
|
44
|
+
*/
|
|
45
|
+
export declare function evaluateInteractionEvidenceGate(input: InteractionGateInput): GateVerdict;
|
|
46
|
+
//# sourceMappingURL=interaction_evidence.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"interaction_evidence.d.ts","sourceRoot":"","sources":["../../src/evidence/interaction_evidence.ts"],"names":[],"mappings":"AAkBA,MAAM,WAAW,mBAAmB;IAClC,YAAY,EAAE,MAAM,CAAC;IACrB,4CAA4C;IAC5C,MAAM,EAAE,aAAa,CAAC;IACtB,MAAM,EAAE,MAAM,CAAC;IACf,8CAA8C;IAC9C,YAAY,EAAE,MAAM,CAAC;IACrB,+CAA+C;IAC/C,WAAW,EAAE,MAAM,CAAC;IACpB,8DAA8D;IAC9D,aAAa,CAAC,EAAE,SAAS,GAAG,YAAY,CAAC;IACzC,+CAA+C;IAC/C,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAID,0EAA0E;AAC1E,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,EAAE,mBAAmB,GAAG,IAAI,CAMtF;AAED;;;GAGG;AACH,wBAAgB,6BAA6B,CAC3C,gBAAgB,EAAE,MAAM,GACvB;IAAE,QAAQ,EAAE,mBAAmB,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAoB3D;AAED,MAAM,WAAW,oBAAoB;IACnC,wEAAwE;IACxE,eAAe,EAAE;QAAE,QAAQ,EAAE,mBAAmB,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;IAC3E,YAAY,EAAE,OAAO,CAAC;IACtB,yEAAyE;IACzE,wBAAwB,EAAE,OAAO,CAAC;CACnC;AAED,MAAM,WAAW,WAAW;IAC1B,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;;;GAIG;AACH,wBAAgB,+BAA+B,CAAC,KAAK,EAAE,oBAAoB,GAAG,WAAW,CA4BxF"}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* interaction_evidence — proof that the running app was actually DRIVEN this
|
|
3
|
+
* project (typed into, tapped, submitted), by EITHER the hands-free executor
|
|
4
|
+
* (codeloop_run_journey, which writes interaction_evidence.json) OR the manual
|
|
5
|
+
* record -> interact -> replay flow (which writes logs/interaction_log.jsonl).
|
|
6
|
+
*
|
|
7
|
+
* The `interaction_evidence` gate is the umbrella that makes "you must drive
|
|
8
|
+
* the UI" a checked condition for UI projects, and — crucially — points the
|
|
9
|
+
* agent at the ONE-CALL path (codeloop_run_journey) when no interaction
|
|
10
|
+
* happened at all. It is deliberately satisfied by either evidence source so
|
|
11
|
+
* it never regresses teams already using the manual recording flow.
|
|
12
|
+
*
|
|
13
|
+
* Applicable-or-n/a: the caller only adds it for UI projects and treats audit
|
|
14
|
+
* (read-only) mode as n/a.
|
|
15
|
+
*/
|
|
16
|
+
import { existsSync, readFileSync, writeFileSync, readdirSync, statSync } from "fs";
|
|
17
|
+
import { join } from "path";
|
|
18
|
+
const FILE = "interaction_evidence.json";
|
|
19
|
+
/** Persist a run_journey interaction-evidence record into its run dir. */
|
|
20
|
+
export function writeInteractionEvidence(runDir, ev) {
|
|
21
|
+
try {
|
|
22
|
+
writeFileSync(join(runDir, FILE), JSON.stringify(ev, null, 2));
|
|
23
|
+
}
|
|
24
|
+
catch {
|
|
25
|
+
/* best-effort */
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Find the newest run_journey interaction_evidence.json across the project's
|
|
30
|
+
* artifacts/runs/* dirs. Returns the record + its mtime (ms) or null.
|
|
31
|
+
*/
|
|
32
|
+
export function loadLatestInteractionEvidence(artifactsRunsDir) {
|
|
33
|
+
let best = null;
|
|
34
|
+
let runDirs = [];
|
|
35
|
+
try {
|
|
36
|
+
runDirs = readdirSync(artifactsRunsDir);
|
|
37
|
+
}
|
|
38
|
+
catch {
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
41
|
+
for (const d of runDirs) {
|
|
42
|
+
const p = join(artifactsRunsDir, d, FILE);
|
|
43
|
+
if (!existsSync(p))
|
|
44
|
+
continue;
|
|
45
|
+
try {
|
|
46
|
+
const mtimeMs = statSync(p).mtimeMs;
|
|
47
|
+
const evidence = JSON.parse(readFileSync(p, "utf-8"));
|
|
48
|
+
if (!best || mtimeMs > best.mtimeMs)
|
|
49
|
+
best = { evidence, mtimeMs };
|
|
50
|
+
}
|
|
51
|
+
catch {
|
|
52
|
+
/* skip unreadable */
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return best;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Verdict for the interaction_evidence gate. Passes when the app was driven by
|
|
59
|
+
* EITHER source; fails only when a UI project shows NO interactive testing at
|
|
60
|
+
* all, steering the agent to the one-call codeloop_run_journey path.
|
|
61
|
+
*/
|
|
62
|
+
export function evaluateInteractionEvidenceGate(input) {
|
|
63
|
+
const { journeyEvidence, journeyStale, hasManualInteractionLogs } = input;
|
|
64
|
+
if (journeyEvidence && !journeyStale) {
|
|
65
|
+
const e = journeyEvidence.evidence;
|
|
66
|
+
const mobile = e.mobile_engine
|
|
67
|
+
? ` (mobile engine: ${e.mobile_engine}${e.mobile_flows_total != null ? `, ${e.mobile_flows_passed ?? 0}/${e.mobile_flows_total} flow(s)` : ""})`
|
|
68
|
+
: "";
|
|
69
|
+
return {
|
|
70
|
+
passed: true,
|
|
71
|
+
reason: `codeloop_run_journey drove ${e.driven_steps} step(s) on ${e.target}${mobile}, ${e.screenshots} screenshot(s).`,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
if (hasManualInteractionLogs) {
|
|
75
|
+
return {
|
|
76
|
+
passed: true,
|
|
77
|
+
reason: "Interactions recorded via the manual codeloop_interact flow (interaction_log.jsonl present).",
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
if (journeyEvidence && journeyStale) {
|
|
81
|
+
return {
|
|
82
|
+
passed: false,
|
|
83
|
+
reason: "The only run_journey interaction evidence is STALE (source code changed after it ran). Re-run codeloop_run_journey.",
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
return {
|
|
87
|
+
passed: false,
|
|
88
|
+
reason: "No interactive testing found. Drive the app once with codeloop_run_journey (one call: launches the app, types/taps/submits, screenshots).",
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
//# sourceMappingURL=interaction_evidence.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"interaction_evidence.js","sourceRoot":"","sources":["../../src/evidence/interaction_evidence.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AACH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,WAAW,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AACpF,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAkB5B,MAAM,IAAI,GAAG,2BAA2B,CAAC;AAEzC,0EAA0E;AAC1E,MAAM,UAAU,wBAAwB,CAAC,MAAc,EAAE,EAAuB;IAC9E,IAAI,CAAC;QACH,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACjE,CAAC;IAAC,MAAM,CAAC;QACP,iBAAiB;IACnB,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,6BAA6B,CAC3C,gBAAwB;IAExB,IAAI,IAAI,GAA8D,IAAI,CAAC;IAC3E,IAAI,OAAO,GAAa,EAAE,CAAC;IAC3B,IAAI,CAAC;QACH,OAAO,GAAG,WAAW,CAAC,gBAAgB,CAAC,CAAC;IAC1C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;IACD,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,CAAC,GAAG,IAAI,CAAC,gBAAgB,EAAE,CAAC,EAAE,IAAI,CAAC,CAAC;QAC1C,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;YAAE,SAAS;QAC7B,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;YACpC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,EAAE,OAAO,CAAC,CAAwB,CAAC;YAC7E,IAAI,CAAC,IAAI,IAAI,OAAO,GAAG,IAAI,CAAC,OAAO;gBAAE,IAAI,GAAG,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC;QACpE,CAAC;QAAC,MAAM,CAAC;YACP,qBAAqB;QACvB,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAeD;;;;GAIG;AACH,MAAM,UAAU,+BAA+B,CAAC,KAA2B;IACzE,MAAM,EAAE,eAAe,EAAE,YAAY,EAAE,wBAAwB,EAAE,GAAG,KAAK,CAAC;IAC1E,IAAI,eAAe,IAAI,CAAC,YAAY,EAAE,CAAC;QACrC,MAAM,CAAC,GAAG,eAAe,CAAC,QAAQ,CAAC;QACnC,MAAM,MAAM,GAAG,CAAC,CAAC,aAAa;YAC5B,CAAC,CAAC,oBAAoB,CAAC,CAAC,aAAa,GAAG,CAAC,CAAC,kBAAkB,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,mBAAmB,IAAI,CAAC,IAAI,CAAC,CAAC,kBAAkB,UAAU,CAAC,CAAC,CAAC,EAAE,GAAG;YAChJ,CAAC,CAAC,EAAE,CAAC;QACP,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,MAAM,EAAE,8BAA8B,CAAC,CAAC,YAAY,eAAe,CAAC,CAAC,MAAM,GAAG,MAAM,KAAK,CAAC,CAAC,WAAW,iBAAiB;SACxH,CAAC;IACJ,CAAC;IACD,IAAI,wBAAwB,EAAE,CAAC;QAC7B,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,MAAM,EAAE,8FAA8F;SACvG,CAAC;IACJ,CAAC;IACD,IAAI,eAAe,IAAI,YAAY,EAAE,CAAC;QACpC,OAAO;YACL,MAAM,EAAE,KAAK;YACb,MAAM,EAAE,qHAAqH;SAC9H,CAAC;IACJ,CAAC;IACD,OAAO;QACL,MAAM,EAAE,KAAK;QACb,MAAM,EAAE,2IAA2I;KACpJ,CAAC;AACJ,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -2684,6 +2684,17 @@ Returns: checklist of completed and pending verification steps.`, {
|
|
|
2684
2684
|
}
|
|
2685
2685
|
const gateIsPassing = hasGateCheck && latestMeta?.gate_result === "passed";
|
|
2686
2686
|
const gateConfidence = latestMeta?.confidence ?? 0;
|
|
2687
|
+
// Deep-E2E journey evidence — has codeloop_run_journey driven the app?
|
|
2688
|
+
// (Audit/read-only mode never drives the app, so the step is n/a there.)
|
|
2689
|
+
let hasJourneyEvidence = false;
|
|
2690
|
+
let workflowAuditMode = false;
|
|
2691
|
+
try {
|
|
2692
|
+
const { loadLatestInteractionEvidence } = await import("./evidence/interaction_evidence.js");
|
|
2693
|
+
hasJourneyEvidence = loadLatestInteractionEvidence(join(baseDir, "runs")) != null;
|
|
2694
|
+
const { resolveAgentMode } = await import("./evidence/agent_mode.js");
|
|
2695
|
+
workflowAuditMode = resolveAgentMode({ cwd, configMode: config.agent_mode }) === "audit";
|
|
2696
|
+
}
|
|
2697
|
+
catch { /* best-effort */ }
|
|
2687
2698
|
// Interaction coverage: compare interaction_log selectors/URLs against discover_screens
|
|
2688
2699
|
let interactionCount = 0;
|
|
2689
2700
|
const coveredRoutes = new Set();
|
|
@@ -2803,6 +2814,23 @@ Returns: checklist of completed and pending verification steps.`, {
|
|
|
2803
2814
|
? `${videoCount} video(s) recorded`
|
|
2804
2815
|
: "No video recordings found. Call codeloop_start_recording → interact with ALL elements → codeloop_stop_recording → codeloop_interaction_replay.",
|
|
2805
2816
|
},
|
|
2817
|
+
{
|
|
2818
|
+
step: "3b. Deep-E2E journey (run_journey)",
|
|
2819
|
+
status: !isUIProject || workflowAuditMode
|
|
2820
|
+
? "n/a"
|
|
2821
|
+
: hasJourneyEvidence || interactionCount > 0
|
|
2822
|
+
? "done"
|
|
2823
|
+
: "PENDING",
|
|
2824
|
+
detail: !isUIProject
|
|
2825
|
+
? "Not a UI project — deep-E2E journey not required"
|
|
2826
|
+
: workflowAuditMode
|
|
2827
|
+
? "Audit/read-only mode — the app is not launched or driven. Switch to fix mode for full interaction testing."
|
|
2828
|
+
: hasJourneyEvidence
|
|
2829
|
+
? "codeloop_run_journey drove the app (interaction_evidence.json present)."
|
|
2830
|
+
: interactionCount > 0
|
|
2831
|
+
? "App driven via the manual codeloop_interact flow."
|
|
2832
|
+
: "App NOT driven yet. Call codeloop_run_journey ONCE — it launches the app / boots the emulator+simulator, types/taps/submits by label (Maestro on mobile), screenshots every screen, and records video. This satisfies the interaction_evidence gate.",
|
|
2833
|
+
},
|
|
2806
2834
|
{
|
|
2807
2835
|
step: "4. Gate check",
|
|
2808
2836
|
status: !hasGateCheck ? "PENDING" : gateIsPassing ? "done" : "NEEDS_RERUN",
|