codeloop-mcp-server 0.1.52 → 0.1.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/dist/auth/critical_floors.d.ts.map +1 -1
  2. package/dist/auth/critical_floors.js +8 -0
  3. package/dist/auth/critical_floors.js.map +1 -1
  4. package/dist/auth/update_check.d.ts.map +1 -1
  5. package/dist/auth/update_check.js +19 -1
  6. package/dist/auth/update_check.js.map +1 -1
  7. package/dist/evidence/anti_rationalisation.d.ts.map +1 -1
  8. package/dist/evidence/anti_rationalisation.js +15 -0
  9. package/dist/evidence/anti_rationalisation.js.map +1 -1
  10. package/dist/evidence/binary_freshness.d.ts +21 -0
  11. package/dist/evidence/binary_freshness.d.ts.map +1 -0
  12. package/dist/evidence/binary_freshness.js +113 -0
  13. package/dist/evidence/binary_freshness.js.map +1 -0
  14. package/dist/evidence/change_coverage.d.ts.map +1 -1
  15. package/dist/evidence/change_coverage.js +22 -1
  16. package/dist/evidence/change_coverage.js.map +1 -1
  17. package/dist/evidence/cycle_issues.d.ts +99 -0
  18. package/dist/evidence/cycle_issues.d.ts.map +1 -0
  19. package/dist/evidence/cycle_issues.js +120 -0
  20. package/dist/evidence/cycle_issues.js.map +1 -0
  21. package/dist/evidence/evidence_freshness.d.ts +39 -0
  22. package/dist/evidence/evidence_freshness.d.ts.map +1 -0
  23. package/dist/evidence/evidence_freshness.js +231 -0
  24. package/dist/evidence/evidence_freshness.js.map +1 -0
  25. package/dist/evidence/interaction_coverage.d.ts +15 -0
  26. package/dist/evidence/interaction_coverage.d.ts.map +1 -1
  27. package/dist/evidence/interaction_coverage.js +53 -4
  28. package/dist/evidence/interaction_coverage.js.map +1 -1
  29. package/dist/evidence/screenshot_diff.d.ts.map +1 -1
  30. package/dist/evidence/screenshot_diff.js +30 -12
  31. package/dist/evidence/screenshot_diff.js.map +1 -1
  32. package/dist/index.js +197 -4
  33. package/dist/index.js.map +1 -1
  34. package/dist/runners/modal_close_strategies.d.ts +82 -0
  35. package/dist/runners/modal_close_strategies.d.ts.map +1 -0
  36. package/dist/runners/modal_close_strategies.js +226 -0
  37. package/dist/runners/modal_close_strategies.js.map +1 -0
  38. package/dist/runners/modal_detector.d.ts +17 -0
  39. package/dist/runners/modal_detector.d.ts.map +1 -1
  40. package/dist/runners/modal_detector.js +95 -22
  41. package/dist/runners/modal_detector.js.map +1 -1
  42. package/dist/tools/design_compare.d.ts.map +1 -1
  43. package/dist/tools/design_compare.js +22 -3
  44. package/dist/tools/design_compare.js.map +1 -1
  45. package/dist/tools/gate_check.d.ts.map +1 -1
  46. package/dist/tools/gate_check.js +188 -14
  47. package/dist/tools/gate_check.js.map +1 -1
  48. package/package.json +2 -2
@@ -0,0 +1,120 @@
1
+ import { promises as fs } from "node:fs";
2
+ import path from "node:path";
3
+ import { getArtifactsBaseDir, getRunDir, listRuns } from "./artifacts.js";
4
+ function findLatestRunDir(workspaceRoot) {
5
+ const base = getArtifactsBaseDir(workspaceRoot);
6
+ const runs = listRuns(base);
7
+ if (runs.length === 0)
8
+ return undefined;
9
+ return getRunDir(runs[0], base);
10
+ }
11
+ const CYCLE_ISSUES_FILE = "cycle_issues.jsonl";
12
+ const CYCLE_RESOLUTIONS_FILE = "cycle_issue_resolutions.jsonl";
13
+ async function resolveLogsDir(workspaceRoot) {
14
+ const runDir = findLatestRunDir(workspaceRoot);
15
+ if (!runDir)
16
+ return undefined;
17
+ const logsDir = path.join(runDir, "logs");
18
+ await fs.mkdir(logsDir, { recursive: true });
19
+ return logsDir;
20
+ }
21
+ /**
22
+ * Append a cycle issue to <run>/logs/cycle_issues.jsonl. Best-effort —
23
+ * if no run directory exists yet (caller fired before
24
+ * codeloop_start_recording set one up), the call is a silent no-op.
25
+ */
26
+ export async function recordCycleIssue(workspaceRoot, issue) {
27
+ try {
28
+ const logsDir = await resolveLogsDir(workspaceRoot);
29
+ if (!logsDir)
30
+ return;
31
+ const file = path.join(logsDir, CYCLE_ISSUES_FILE);
32
+ const line = JSON.stringify({ ...issue, timestamp: new Date().toISOString() }) + "\n";
33
+ await fs.appendFile(file, line, "utf-8");
34
+ }
35
+ catch {
36
+ // intentional: never let evidence writes throw out of a tool path.
37
+ }
38
+ }
39
+ /**
40
+ * Append a resolution. Used both by the agent (via gate_check
41
+ * acknowledgment) and implicitly by tools (e.g. kill_modal_window
42
+ * succeeding clears modal_close_failed).
43
+ */
44
+ export async function recordCycleIssueResolution(workspaceRoot, resolution) {
45
+ try {
46
+ const logsDir = await resolveLogsDir(workspaceRoot);
47
+ if (!logsDir)
48
+ return;
49
+ const file = path.join(logsDir, CYCLE_RESOLUTIONS_FILE);
50
+ const line = JSON.stringify({ ...resolution, timestamp: new Date().toISOString() }) +
51
+ "\n";
52
+ await fs.appendFile(file, line, "utf-8");
53
+ }
54
+ catch {
55
+ /* swallow */
56
+ }
57
+ }
58
+ async function readJsonl(filePath) {
59
+ try {
60
+ const raw = await fs.readFile(filePath, "utf-8");
61
+ return raw
62
+ .split("\n")
63
+ .map((line) => line.trim())
64
+ .filter((line) => line.length > 0)
65
+ .map((line) => {
66
+ try {
67
+ return JSON.parse(line);
68
+ }
69
+ catch {
70
+ return null;
71
+ }
72
+ })
73
+ .filter((entry) => entry !== null);
74
+ }
75
+ catch {
76
+ return [];
77
+ }
78
+ }
79
+ /**
80
+ * Load cycle_issues + resolutions from the latest run. Used by the
81
+ * gate_check `cycle_issues_acknowledged` blocker.
82
+ */
83
+ export async function loadCycleIssues(workspaceRoot) {
84
+ const runDir = findLatestRunDir(workspaceRoot);
85
+ const empty = { issues: [], resolutions: [], unresolved: [] };
86
+ if (!runDir)
87
+ return empty;
88
+ const logsDir = path.join(runDir, "logs");
89
+ const issuesFile = path.join(logsDir, CYCLE_ISSUES_FILE);
90
+ const resolutionsFile = path.join(logsDir, CYCLE_RESOLUTIONS_FILE);
91
+ const [issues, resolutions] = await Promise.all([
92
+ readJsonl(issuesFile),
93
+ readJsonl(resolutionsFile),
94
+ ]);
95
+ // Resolution match is by kind. We deliberately stay simple — one
96
+ // resolution of kind X clears all currently-recorded issues of kind X.
97
+ // The gate's directive tells the agent to acknowledge each kind
98
+ // explicitly, which makes per-instance tracking unnecessary.
99
+ const resolvedKinds = new Set(resolutions.map((r) => r.issue_kind));
100
+ const unresolved = issues.filter((i) => !resolvedKinds.has(i.kind));
101
+ return { issues, resolutions, unresolved };
102
+ }
103
+ /**
104
+ * Render a one-line summary of an issue for the gate-check directive.
105
+ */
106
+ export function summariseCycleIssue(issue) {
107
+ switch (issue.kind) {
108
+ case "click_missed_target":
109
+ return `click_missed_target — ${issue.description ?? issue.selector ?? `coords ${JSON.stringify(issue.coords ?? null)}`}${issue.modal_kind ? ` (modal_kind=${issue.modal_kind})` : ""}`;
110
+ case "modal_close_failed":
111
+ return `modal_close_failed — kind=${issue.modal_kind}, tried=[${issue.strategies_tried.join(", ")}], hwnd=${issue.hwnd ?? "n/a"}`;
112
+ case "app_restart_during_recording":
113
+ return `app_restart_during_recording — ${issue.reason}`;
114
+ case "binary_mismatch":
115
+ return `binary_mismatch — target=${issue.target_app}, binary mtime=${issue.binary_mtime}, source mtime=${issue.newest_source_mtime} (lag=${issue.lag_seconds}s)`;
116
+ case "high_failure_rate":
117
+ return `high_failure_rate — ${issue.failed}/${issue.total} attempts failed (${(issue.ratio * 100).toFixed(1)}%)`;
118
+ }
119
+ }
120
+ //# sourceMappingURL=cycle_issues.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cycle_issues.js","sourceRoot":"","sources":["../../src/evidence/cycle_issues.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAC;AACzC,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,mBAAmB,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,gBAAgB,CAAC;AAE1E,SAAS,gBAAgB,CAAC,aAAqB;IAC7C,MAAM,IAAI,GAAG,mBAAmB,CAAC,aAAa,CAAC,CAAC;IAChD,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC5B,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IACxC,OAAO,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;AAClC,CAAC;AAmFD,MAAM,iBAAiB,GAAG,oBAAoB,CAAC;AAC/C,MAAM,sBAAsB,GAAG,+BAA+B,CAAC;AAE/D,KAAK,UAAU,cAAc,CAAC,aAAqB;IACjD,MAAM,MAAM,GAAG,gBAAgB,CAAC,aAAa,CAAC,CAAC;IAC/C,IAAI,CAAC,MAAM;QAAE,OAAO,SAAS,CAAC;IAC9B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC1C,MAAM,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC7C,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,aAAqB,EACrB,KAAsB;IAEtB,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,cAAc,CAAC,aAAa,CAAC,CAAC;QACpD,IAAI,CAAC,OAAO;YAAE,OAAO;QACrB,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,iBAAiB,CAAC,CAAC;QACnD,MAAM,IAAI,GACR,IAAI,CAAC,SAAS,CAAC,EAAE,GAAG,KAAK,EAAE,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,CAAC,GAAG,IAAI,CAAC;QAC3E,MAAM,EAAE,CAAC,UAAU,CAAC,IAAI,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;IAC3C,CAAC;IAAC,MAAM,CAAC;QACP,mEAAmE;IACrE,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAC9C,aAAqB,EACrB,UAAmD;IAEnD,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,cAAc,CAAC,aAAa,CAAC,CAAC;QACpD,IAAI,CAAC,OAAO;YAAE,OAAO;QACrB,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,sBAAsB,CAAC,CAAC;QACxD,MAAM,IAAI,GACR,IAAI,CAAC,SAAS,CAAC,EAAE,GAAG,UAAU,EAAE,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,CAAC;YACtE,IAAI,CAAC;QACP,MAAM,EAAE,CAAC,UAAU,CAAC,IAAI,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;IAC3C,CAAC;IAAC,MAAM,CAAC;QACP,aAAa;IACf,CAAC;AACH,CAAC;AAED,KAAK,UAAU,SAAS,CAAI,QAAgB;IAC1C,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QACjD,OAAO,GAAG;aACP,KAAK,CAAC,IAAI,CAAC;aACX,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;aAC1B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;aACjC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;YACZ,IAAI,CAAC;gBACH,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAM,CAAC;YAC/B,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC,CAAC;aACD,MAAM,CAAC,CAAC,KAAK,EAAc,EAAE,CAAC,KAAK,KAAK,IAAI,CAAC,CAAC;IACnD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AASD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,aAAqB;IAErB,MAAM,MAAM,GAAG,gBAAgB,CAAC,aAAa,CAAC,CAAC;IAC/C,MAAM,KAAK,GAAsB,EAAE,MAAM,EAAE,EAAE,EAAE,WAAW,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC;IACjF,IAAI,CAAC,MAAM;QAAE,OAAO,KAAK,CAAC;IAC1B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC1C,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,iBAAiB,CAAC,CAAC;IACzD,MAAM,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,sBAAsB,CAAC,CAAC;IACnE,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QAC9C,SAAS,CAAa,UAAU,CAAC;QACjC,SAAS,CAAuB,eAAe,CAAC;KACjD,CAAC,CAAC;IACH,iEAAiE;IACjE,uEAAuE;IACvE,gEAAgE;IAChE,6DAA6D;IAC7D,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC;IACpE,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IACpE,OAAO,EAAE,MAAM,EAAE,WAAW,EAAE,UAAU,EAAE,CAAC;AAC7C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,KAAiB;IACnD,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;QACnB,KAAK,qBAAqB;YACxB,OAAO,yBAAyB,KAAK,CAAC,WAAW,IAAI,KAAK,CAAC,QAAQ,IAAI,UAAU,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,MAAM,IAAI,IAAI,CAAC,EAAE,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,gBAAgB,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;QAC1L,KAAK,oBAAoB;YACvB,OAAO,6BAA6B,KAAK,CAAC,UAAU,YAAY,KAAK,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,KAAK,CAAC,IAAI,IAAI,KAAK,EAAE,CAAC;QACpI,KAAK,8BAA8B;YACjC,OAAO,kCAAkC,KAAK,CAAC,MAAM,EAAE,CAAC;QAC1D,KAAK,iBAAiB;YACpB,OAAO,4BAA4B,KAAK,CAAC,UAAU,kBAAkB,KAAK,CAAC,YAAY,kBAAkB,KAAK,CAAC,mBAAmB,SAAS,KAAK,CAAC,WAAW,IAAI,CAAC;QACnK,KAAK,mBAAmB;YACtB,OAAO,uBAAuB,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,KAAK,qBAAqB,CAAC,KAAK,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;IACrH,CAAC;AACH,CAAC"}
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Walk depth=2 of the project to find the newest source-file mtime. Mirrors
3
+ * verify_staleness.ts intentionally — both signals are coarse and meant to
4
+ * fire on every edit at workspace-root or first-level package directory.
5
+ */
6
+ export declare function newestSourceMtime(cwd: string): {
7
+ path: string;
8
+ mtimeMs: number;
9
+ } | null;
10
+ /**
11
+ * Best-effort mtime for a run's content. Prefers the latest mtime across
12
+ * the run's `videos/`, `logs/`, `screenshots/`, and `replay_frames/`
13
+ * subdirectories so we don't get tricked when the run dir itself was
14
+ * created early but the actual evidence was written hours later. Falls
15
+ * back to the run dir mtime when no content subdir exists.
16
+ */
17
+ export declare function runEvidenceMtime(runDir: string): number;
18
+ export interface FreshnessVerdict {
19
+ /** true when the source code changed AFTER the evidence was captured. */
20
+ stale: boolean;
21
+ source_path: string | null;
22
+ source_mtime_iso: string | null;
23
+ evidence_mtime_iso: string | null;
24
+ delta_minutes: number;
25
+ }
26
+ export declare function isEvidenceStale(cwd: string, evidenceMtimeMs: number): FreshnessVerdict;
27
+ /**
28
+ * Convenience helper for callers that already know which run id they're
29
+ * about to credit. Resolves the run dir, computes its evidence mtime,
30
+ * and runs the staleness check in one call.
31
+ */
32
+ export declare function isRunEvidenceStale(cwd: string, runId: string): FreshnessVerdict;
33
+ /**
34
+ * Format the staleness verdict into a single line suitable for appending
35
+ * to a gate's reason string. Returns an empty string when the verdict is
36
+ * fresh.
37
+ */
38
+ export declare function formatStalenessSuffix(v: FreshnessVerdict): string;
39
+ //# sourceMappingURL=evidence_freshness.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evidence_freshness.d.ts","sourceRoot":"","sources":["../../src/evidence/evidence_freshness.ts"],"names":[],"mappings":"AAyEA;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAiCvF;AAED;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAyCvD;AAED,MAAM,WAAW,gBAAgB;IAC/B,yEAAyE;IACzE,KAAK,EAAE,OAAO,CAAC;IACf,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,kBAAkB,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,aAAa,EAAE,MAAM,CAAC;CACvB;AAeD,wBAAgB,eAAe,CAC7B,GAAG,EAAE,MAAM,EACX,eAAe,EAAE,MAAM,GACtB,gBAAgB,CA4BlB;AAED;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,gBAAgB,CAI/E;AAED;;;;GAIG;AACH,wBAAgB,qBAAqB,CAAC,CAAC,EAAE,gBAAgB,GAAG,MAAM,CAOjE"}
@@ -0,0 +1,231 @@
1
+ import { existsSync, readdirSync, statSync } from "fs";
2
+ import { join } from "path";
3
+ import { getArtifactsBaseDir, getRunDir } from "./artifacts.js";
4
+ /**
5
+ * 0.1.53 D2 — Evidence freshness.
6
+ *
7
+ * The Photometry-DB E2E #10 transcript shipped at 100% confidence with all
8
+ * 11 gates green even though the agent NEVER re-recorded after adding the
9
+ * Product Name / Product Code column to the Luminaire export pipeline. The
10
+ * gate accepted:
11
+ *
12
+ * - video_evidence: "1 video(s) recorded in sibling run …_s6nzg8
13
+ * (28 min before this gate run)"
14
+ * - interaction_depth_evidence: 440 successful actions (from sibling run)
15
+ * - user_journey_evidence: full_arcs=1 (from sibling run)
16
+ *
17
+ * — all mined from a recording that pre-dated the source-file changes by
18
+ * ~30 minutes. The 0.1.51 cross-run fallback (`MAX_SIBLING_AGE_MS = 1 hour`
19
+ * relative to NOW) was meant to keep the gate from blowing up when an agent
20
+ * starts the recording in a separate run from the verify, but it has no
21
+ * concept of "the source code changed between the recording and the gate".
22
+ *
23
+ * D2 closes that gap by anchoring evidence to source-file mtime, NOT to
24
+ * wall-clock now:
25
+ *
26
+ * - Walk source files (depth-2, same SKIP_DIRS as H2) for the newest
27
+ * mtime.
28
+ * - For every sibling run we'd otherwise credit (video, interaction
29
+ * log, replay frames), require the run's directory mtime to be
30
+ * >= newest source-file mtime, otherwise the evidence is STALE and
31
+ * the gate must reject it.
32
+ * - The gate's reason string surfaces both timestamps so the agent
33
+ * can tell at a glance "my recording is from 2:14 PM but I edited
34
+ * PhotometricDataViewModel.cs at 2:42 PM — I need to re-record".
35
+ */
36
+ const SKIP_DIRS = new Set([
37
+ "node_modules",
38
+ ".git",
39
+ ".vs",
40
+ ".idea",
41
+ ".vscode",
42
+ ".codeloop",
43
+ "artifacts",
44
+ "dist",
45
+ "build",
46
+ "out",
47
+ "bin",
48
+ "obj",
49
+ "Pods",
50
+ ".next",
51
+ ".turbo",
52
+ ".cache",
53
+ ".gradle",
54
+ "DerivedData",
55
+ "__pycache__",
56
+ "target",
57
+ ".venv",
58
+ "venv",
59
+ ]);
60
+ const SKIP_FILE_PATTERNS = [
61
+ /^\.DS_Store$/,
62
+ /^Thumbs\.db$/,
63
+ /^package-lock\.json$/,
64
+ /^pnpm-lock\.yaml$/,
65
+ /^yarn\.lock$/,
66
+ /^poetry\.lock$/,
67
+ /^Cargo\.lock$/,
68
+ /\.log$/,
69
+ ];
70
+ /**
71
+ * Walk depth=2 of the project to find the newest source-file mtime. Mirrors
72
+ * verify_staleness.ts intentionally — both signals are coarse and meant to
73
+ * fire on every edit at workspace-root or first-level package directory.
74
+ */
75
+ export function newestSourceMtime(cwd) {
76
+ let newest = null;
77
+ function visit(dir, depth) {
78
+ if (depth > 2)
79
+ return;
80
+ let entries;
81
+ try {
82
+ entries = readdirSync(dir, { withFileTypes: true });
83
+ }
84
+ catch {
85
+ return;
86
+ }
87
+ for (const ent of entries) {
88
+ const name = ent.name;
89
+ if (SKIP_DIRS.has(name))
90
+ continue;
91
+ const p = join(dir, name);
92
+ if (ent.isDirectory()) {
93
+ visit(p, depth + 1);
94
+ }
95
+ else if (ent.isFile()) {
96
+ if (SKIP_FILE_PATTERNS.some((re) => re.test(name)))
97
+ continue;
98
+ try {
99
+ const ms = statSync(p).mtimeMs;
100
+ if (!newest || ms > newest.mtimeMs) {
101
+ newest = { path: p, mtimeMs: ms };
102
+ }
103
+ }
104
+ catch {
105
+ /* skip unreadable */
106
+ }
107
+ }
108
+ }
109
+ }
110
+ if (existsSync(cwd))
111
+ visit(cwd, 0);
112
+ return newest;
113
+ }
114
+ /**
115
+ * Best-effort mtime for a run's content. Prefers the latest mtime across
116
+ * the run's `videos/`, `logs/`, `screenshots/`, and `replay_frames/`
117
+ * subdirectories so we don't get tricked when the run dir itself was
118
+ * created early but the actual evidence was written hours later. Falls
119
+ * back to the run dir mtime when no content subdir exists.
120
+ */
121
+ export function runEvidenceMtime(runDir) {
122
+ if (!existsSync(runDir))
123
+ return 0;
124
+ let max = 0;
125
+ const candidates = ["videos", "logs", "screenshots", "replay_frames"];
126
+ for (const sub of candidates) {
127
+ const p = join(runDir, sub);
128
+ if (!existsSync(p))
129
+ continue;
130
+ try {
131
+ const ms = statSync(p).mtimeMs;
132
+ if (ms > max)
133
+ max = ms;
134
+ }
135
+ catch {
136
+ /* ignore */
137
+ }
138
+ // Walk one level into the subdir so we pick up the actual file
139
+ // mtimes — Windows doesn't always update the directory mtime when
140
+ // a single file inside it is rewritten.
141
+ try {
142
+ const entries = readdirSync(p);
143
+ for (const e of entries) {
144
+ try {
145
+ const ms = statSync(join(p, e)).mtimeMs;
146
+ if (ms > max)
147
+ max = ms;
148
+ }
149
+ catch {
150
+ /* ignore */
151
+ }
152
+ }
153
+ }
154
+ catch {
155
+ /* ignore */
156
+ }
157
+ }
158
+ if (max === 0) {
159
+ try {
160
+ max = statSync(runDir).mtimeMs;
161
+ }
162
+ catch {
163
+ /* ignore */
164
+ }
165
+ }
166
+ return max;
167
+ }
168
+ /**
169
+ * Compare evidence-run mtime against newest source-file mtime. The verdict
170
+ * is "stale" when the source changed at least 60 seconds AFTER the
171
+ * evidence was captured — the 60-second buffer is to absorb clock drift
172
+ * between Cursor's file-watcher and the recording subprocess on Windows.
173
+ *
174
+ * `evidenceMtimeMs === 0` means we couldn't find any evidence on disk;
175
+ * we treat that as `stale: false` because the parent gate already has a
176
+ * "no evidence found" failure mode and we don't want two gates failing
177
+ * with overlapping reasons.
178
+ */
179
+ const STALE_BUFFER_MS = 60_000;
180
+ export function isEvidenceStale(cwd, evidenceMtimeMs) {
181
+ if (evidenceMtimeMs === 0) {
182
+ return {
183
+ stale: false,
184
+ source_path: null,
185
+ source_mtime_iso: null,
186
+ evidence_mtime_iso: null,
187
+ delta_minutes: 0,
188
+ };
189
+ }
190
+ const newest = newestSourceMtime(cwd);
191
+ if (!newest) {
192
+ return {
193
+ stale: false,
194
+ source_path: null,
195
+ source_mtime_iso: null,
196
+ evidence_mtime_iso: new Date(evidenceMtimeMs).toISOString(),
197
+ delta_minutes: 0,
198
+ };
199
+ }
200
+ const stale = newest.mtimeMs > evidenceMtimeMs + STALE_BUFFER_MS;
201
+ return {
202
+ stale,
203
+ source_path: newest.path,
204
+ source_mtime_iso: new Date(newest.mtimeMs).toISOString(),
205
+ evidence_mtime_iso: new Date(evidenceMtimeMs).toISOString(),
206
+ delta_minutes: Math.round((newest.mtimeMs - evidenceMtimeMs) / 60_000),
207
+ };
208
+ }
209
+ /**
210
+ * Convenience helper for callers that already know which run id they're
211
+ * about to credit. Resolves the run dir, computes its evidence mtime,
212
+ * and runs the staleness check in one call.
213
+ */
214
+ export function isRunEvidenceStale(cwd, runId) {
215
+ const baseDir = getArtifactsBaseDir(cwd);
216
+ const runDir = getRunDir(runId, baseDir);
217
+ return isEvidenceStale(cwd, runEvidenceMtime(runDir));
218
+ }
219
+ /**
220
+ * Format the staleness verdict into a single line suitable for appending
221
+ * to a gate's reason string. Returns an empty string when the verdict is
222
+ * fresh.
223
+ */
224
+ export function formatStalenessSuffix(v) {
225
+ if (!v.stale)
226
+ return "";
227
+ const newer = v.source_path ? ` (newest source: ${v.source_path})` : "";
228
+ return (` STALE: source code changed ${v.delta_minutes} min after the evidence ` +
229
+ `was captured${newer}. Re-record AFTER the latest edit before re-gating.`);
230
+ }
231
+ //# sourceMappingURL=evidence_freshness.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evidence_freshness.js","sourceRoot":"","sources":["../../src/evidence/evidence_freshness.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AACvD,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,mBAAmB,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAEhE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AAEH,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;IACxB,cAAc;IACd,MAAM;IACN,KAAK;IACL,OAAO;IACP,SAAS;IACT,WAAW;IACX,WAAW;IACX,MAAM;IACN,OAAO;IACP,KAAK;IACL,KAAK;IACL,KAAK;IACL,MAAM;IACN,OAAO;IACP,QAAQ;IACR,QAAQ;IACR,SAAS;IACT,aAAa;IACb,aAAa;IACb,QAAQ;IACR,OAAO;IACP,MAAM;CACP,CAAC,CAAC;AAEH,MAAM,kBAAkB,GAAG;IACzB,cAAc;IACd,cAAc;IACd,sBAAsB;IACtB,mBAAmB;IACnB,cAAc;IACd,gBAAgB;IAChB,eAAe;IACf,QAAQ;CACT,CAAC;AAEF;;;;GAIG;AACH,MAAM,UAAU,iBAAiB,CAAC,GAAW;IAC3C,IAAI,MAAM,GAA6C,IAAI,CAAC;IAE5D,SAAS,KAAK,CAAC,GAAW,EAAE,KAAa;QACvC,IAAI,KAAK,GAAG,CAAC;YAAE,OAAO;QACtB,IAAI,OAAmF,CAAC;QACxF,IAAI,CAAC;YACH,OAAO,GAAG,WAAW,CAAC,GAAG,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAA8B,CAAC;QACnF,CAAC;QAAC,MAAM,CAAC;YACP,OAAO;QACT,CAAC;QACD,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;YAC1B,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;YACtB,IAAI,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC;gBAAE,SAAS;YAClC,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;YAC1B,IAAI,GAAG,CAAC,WAAW,EAAE,EAAE,CAAC;gBACtB,KAAK,CAAC,CAAC,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;YACtB,CAAC;iBAAM,IAAI,GAAG,CAAC,MAAM,EAAE,EAAE,CAAC;gBACxB,IAAI,kBAAkB,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBAAE,SAAS;gBAC7D,IAAI,CAAC;oBACH,MAAM,EAAE,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;oBAC/B,IAAI,CAAC,MAAM,IAAI,EAAE,GAAG,MAAM,CAAC,OAAO,EAAE,CAAC;wBACnC,MAAM,GAAG,EAAE,IAAI,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;oBACpC,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,qBAAqB;gBACvB,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,UAAU,CAAC,GAAG,CAAC;QAAE,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;IACnC,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,gBAAgB,CAAC,MAAc;IAC7C,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC;QAAE,OAAO,CAAC,CAAC;IAElC,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,MAAM,UAAU,GAAG,CAAC,QAAQ,EAAE,MAAM,EAAE,aAAa,EAAE,eAAe,CAAC,CAAC;IACtE,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;QAC7B,MAAM,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC5B,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;YAAE,SAAS;QAC7B,IAAI,CAAC;YACH,MAAM,EAAE,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;YAC/B,IAAI,EAAE,GAAG,GAAG;gBAAE,GAAG,GAAG,EAAE,CAAC;QACzB,CAAC;QAAC,MAAM,CAAC;YACP,YAAY;QACd,CAAC;QACD,+DAA+D;QAC/D,kEAAkE;QAClE,wCAAwC;QACxC,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;YAC/B,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;gBACxB,IAAI,CAAC;oBACH,MAAM,EAAE,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;oBACxC,IAAI,EAAE,GAAG,GAAG;wBAAE,GAAG,GAAG,EAAE,CAAC;gBACzB,CAAC;gBAAC,MAAM,CAAC;oBACP,YAAY;gBACd,CAAC;YACH,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,YAAY;QACd,CAAC;IACH,CAAC;IAED,IAAI,GAAG,KAAK,CAAC,EAAE,CAAC;QACd,IAAI,CAAC;YACH,GAAG,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC;QACjC,CAAC;QAAC,MAAM,CAAC;YACP,YAAY;QACd,CAAC;IACH,CAAC;IAED,OAAO,GAAG,CAAC;AACb,CAAC;AAWD;;;;;;;;;;GAUG;AACH,MAAM,eAAe,GAAG,MAAM,CAAC;AAE/B,MAAM,UAAU,eAAe,CAC7B,GAAW,EACX,eAAuB;IAEvB,IAAI,eAAe,KAAK,CAAC,EAAE,CAAC;QAC1B,OAAO;YACL,KAAK,EAAE,KAAK;YACZ,WAAW,EAAE,IAAI;YACjB,gBAAgB,EAAE,IAAI;YACtB,kBAAkB,EAAE,IAAI;YACxB,aAAa,EAAE,CAAC;SACjB,CAAC;IACJ,CAAC;IACD,MAAM,MAAM,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC;IACtC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO;YACL,KAAK,EAAE,KAAK;YACZ,WAAW,EAAE,IAAI;YACjB,gBAAgB,EAAE,IAAI;YACtB,kBAAkB,EAAE,IAAI,IAAI,CAAC,eAAe,CAAC,CAAC,WAAW,EAAE;YAC3D,aAAa,EAAE,CAAC;SACjB,CAAC;IACJ,CAAC;IACD,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,GAAG,eAAe,GAAG,eAAe,CAAC;IACjE,OAAO;QACL,KAAK;QACL,WAAW,EAAE,MAAM,CAAC,IAAI;QACxB,gBAAgB,EAAE,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,WAAW,EAAE;QACxD,kBAAkB,EAAE,IAAI,IAAI,CAAC,eAAe,CAAC,CAAC,WAAW,EAAE;QAC3D,aAAa,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,GAAG,eAAe,CAAC,GAAG,MAAM,CAAC;KACvE,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,kBAAkB,CAAC,GAAW,EAAE,KAAa;IAC3D,MAAM,OAAO,GAAG,mBAAmB,CAAC,GAAG,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,SAAS,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IACzC,OAAO,eAAe,CAAC,GAAG,EAAE,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC;AACxD,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,qBAAqB,CAAC,CAAmB;IACvD,IAAI,CAAC,CAAC,CAAC,KAAK;QAAE,OAAO,EAAE,CAAC;IACxB,MAAM,KAAK,GAAG,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IACxE,OAAO,CACL,+BAA+B,CAAC,CAAC,aAAa,0BAA0B;QACxE,eAAe,KAAK,qDAAqD,CAC1E,CAAC;AACJ,CAAC"}
@@ -108,6 +108,21 @@ export interface DepthVerdict {
108
108
  required: number;
109
109
  have: number;
110
110
  }>;
111
+ /**
112
+ * 0.1.54 E6 — Soft warning surfaced alongside the depth verdict when
113
+ * more than 10% of attempted interactions failed. The depth gate itself
114
+ * still passes if successful counts meet minimums; the warning gives
115
+ * the agent signal that "successful clicks aren't landing where you
116
+ * think they are" so they can fix bad-coordinate / unresolved-selector
117
+ * loops instead of repeatedly hammering the same dead spot.
118
+ */
119
+ failure_rate_warning?: {
120
+ failed: number;
121
+ successful: number;
122
+ total: number;
123
+ ratio: number;
124
+ message: string;
125
+ };
111
126
  }
112
127
  /**
113
128
  * Compare the observed coverage against the project's minimums. Returns
@@ -1 +1 @@
1
- {"version":3,"file":"interaction_coverage.d.ts","sourceRoot":"","sources":["../../src/evidence/interaction_coverage.ts"],"names":[],"mappings":"AAIA;;;;;;;;;;;;;;;GAeG;AACH,MAAM,WAAW,kBAAkB;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,mBAAmB;IAClC,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,kBAAkB,CAAC;IAC5B,cAAc,EAAE,MAAM,CAAC;IACvB,wFAAwF;IACxF,aAAa,EAAE,MAAM,CAAC;IACtB;;;;;;;OAOG;IACH,mBAAmB,EAAE,MAAM,CAAC;IAC5B,cAAc,EAAE,MAAM,CAAC;IACvB;;;;;;;;;;;;;OAaG;IACH,gCAAgC,EAAE,MAAM,CAAC;CAC1C;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,OAAO,CAAC;IACjB,gBAAgB,EAAE,MAAM,CAAC;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,kBAAkB,EAAE;QAClB,cAAc,EAAE,MAAM,CAAC;QACvB,UAAU,EAAE,MAAM,CAAC;QACnB,qBAAqB,EAAE,MAAM,CAAC;QAC9B,cAAc,EAAE,MAAM,CAAC;KACxB,CAAC;CACH;AAED,eAAO,MAAM,sBAAsB,EAAE,aAgBpC,CAAC;AA8TF;;;GAGG;AACH,wBAAgB,0BAA0B,CAAC,GAAG,EAAE,MAAM,GAAG,mBAAmB,CAgG3E;AA0CD,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,oBAAoB,EAAE,OAAO,CAAC;IAC9B;;;;;;;;;OASG;IACH,uBAAuB,CAAC,EAAE,OAAO,CAAC;CACnC;AAED,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,KAAK,CAAC;QAChB,MAAM,EACF,MAAM,kBAAkB,GACxB,kBAAkB,GAClB,gBAAgB,GAChB,YAAY,GACZ,uBAAuB,GACvB,gBAAgB,CAAC;QACrB,QAAQ,EAAE,MAAM,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC;KACd,CAAC,CAAC;CACJ;AAED;;;;GAIG;AACH,wBAAgB,aAAa,CAC3B,QAAQ,EAAE,mBAAmB,EAC7B,QAAQ,EAAE,aAAa,EACvB,SAAS,CAAC,EAAE,iBAAiB,GAC5B,YAAY,CAoGd;AAgED;;;GAGG;AACH;;;;GAIG;AACH,MAAM,MAAM,qBAAqB,GAAG;IAClC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,kBAAkB,CAAC,EAAE;QACnB,cAAc,CAAC,EAAE,MAAM,CAAC;QACxB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,qBAAqB,CAAC,EAAE,MAAM,CAAC;QAC/B,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,CAAC;CACH,CAAC;AAEF,wBAAgB,oBAAoB,CAClC,QAAQ,EAAE,qBAAqB,GAAG,SAAS,GAC1C,aAAa,CAgCf;AA2BD,MAAM,WAAW,kBAAkB;IACjC,8FAA8F;IAC9F,SAAS,EAAE,MAAM,CAAC;IAClB,sDAAsD;IACtD,YAAY,EAAE,MAAM,CAAC;IACrB,uEAAuE;IACvE,gBAAgB,EAAE,MAAM,CAAC;IACzB,sEAAsE;IACtE,cAAc,EAAE,MAAM,CAAC;IACvB,uGAAuG;IACvG,SAAS,EAAE,MAAM,CAAC;CACnB;AAyID;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,kBAAkB,CAiHjE;AAED,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE;QACR,SAAS,EAAE,MAAM,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,cAAc,EAAE,MAAM,CAAC;KACxB,CAAC;IACF,IAAI,EAAE,kBAAkB,CAAC;CAC1B;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,kBAAkB,EACxB,SAAS,EAAE;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,cAAc,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,OAAO,CAAC;IAAC,QAAQ,EAAE,OAAO,CAAA;CAAE,GAAG,SAAS,EACzG,QAAQ,GAAE;IAAE,SAAS,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAC;IAAC,cAAc,EAAE,MAAM,CAAA;CAAsD,GAC7H,cAAc,CAsChB"}
1
+ {"version":3,"file":"interaction_coverage.d.ts","sourceRoot":"","sources":["../../src/evidence/interaction_coverage.ts"],"names":[],"mappings":"AAIA;;;;;;;;;;;;;;;GAeG;AACH,MAAM,WAAW,kBAAkB;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,mBAAmB;IAClC,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,kBAAkB,CAAC;IAC5B,cAAc,EAAE,MAAM,CAAC;IACvB,wFAAwF;IACxF,aAAa,EAAE,MAAM,CAAC;IACtB;;;;;;;OAOG;IACH,mBAAmB,EAAE,MAAM,CAAC;IAC5B,cAAc,EAAE,MAAM,CAAC;IACvB;;;;;;;;;;;;;OAaG;IACH,gCAAgC,EAAE,MAAM,CAAC;CAC1C;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,OAAO,CAAC;IACjB,gBAAgB,EAAE,MAAM,CAAC;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,kBAAkB,EAAE;QAClB,cAAc,EAAE,MAAM,CAAC;QACvB,UAAU,EAAE,MAAM,CAAC;QACnB,qBAAqB,EAAE,MAAM,CAAC;QAC9B,cAAc,EAAE,MAAM,CAAC;KACxB,CAAC;CACH;AAED,eAAO,MAAM,sBAAsB,EAAE,aAgBpC,CAAC;AAuUF;;;GAGG;AACH,wBAAgB,0BAA0B,CAAC,GAAG,EAAE,MAAM,GAAG,mBAAmB,CA2G3E;AA0CD,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,oBAAoB,EAAE,OAAO,CAAC;IAC9B;;;;;;;;;OASG;IACH,uBAAuB,CAAC,EAAE,OAAO,CAAC;CACnC;AAED,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,KAAK,CAAC;QAChB,MAAM,EACF,MAAM,kBAAkB,GACxB,kBAAkB,GAClB,gBAAgB,GAChB,YAAY,GACZ,uBAAuB,GACvB,gBAAgB,CAAC;QACrB,QAAQ,EAAE,MAAM,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC;KACd,CAAC,CAAC;IACH;;;;;;;OAOG;IACH,oBAAoB,CAAC,EAAE;QACrB,MAAM,EAAE,MAAM,CAAC;QACf,UAAU,EAAE,MAAM,CAAC;QACnB,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,OAAO,EAAE,MAAM,CAAC;KACjB,CAAC;CACH;AAED;;;;GAIG;AACH,wBAAgB,aAAa,CAC3B,QAAQ,EAAE,mBAAmB,EAC7B,QAAQ,EAAE,aAAa,EACvB,SAAS,CAAC,EAAE,iBAAiB,GAC5B,YAAY,CAmId;AAgED;;;GAGG;AACH;;;;GAIG;AACH,MAAM,MAAM,qBAAqB,GAAG;IAClC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,kBAAkB,CAAC,EAAE;QACnB,cAAc,CAAC,EAAE,MAAM,CAAC;QACxB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,qBAAqB,CAAC,EAAE,MAAM,CAAC;QAC/B,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,CAAC;CACH,CAAC;AAEF,wBAAgB,oBAAoB,CAClC,QAAQ,EAAE,qBAAqB,GAAG,SAAS,GAC1C,aAAa,CAgCf;AA2BD,MAAM,WAAW,kBAAkB;IACjC,8FAA8F;IAC9F,SAAS,EAAE,MAAM,CAAC;IAClB,sDAAsD;IACtD,YAAY,EAAE,MAAM,CAAC;IACrB,uEAAuE;IACvE,gBAAgB,EAAE,MAAM,CAAC;IACzB,sEAAsE;IACtE,cAAc,EAAE,MAAM,CAAC;IACvB,uGAAuG;IACvG,SAAS,EAAE,MAAM,CAAC;CACnB;AAyID;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,kBAAkB,CAiHjE;AAED,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE;QACR,SAAS,EAAE,MAAM,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,cAAc,EAAE,MAAM,CAAC;KACxB,CAAC;IACF,IAAI,EAAE,kBAAkB,CAAC;CAC1B;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,kBAAkB,EACxB,SAAS,EAAE;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,cAAc,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,OAAO,CAAC;IAAC,QAAQ,EAAE,OAAO,CAAA;CAAE,GAAG,SAAS,EACzG,QAAQ,GAAE;IAAE,SAAS,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAC;IAAC,cAAc,EAAE,MAAM,CAAA;CAAsD,GAC7H,cAAc,CAsChB"}
@@ -69,6 +69,16 @@ function isCommitKey(args) {
69
69
  return /(?:^|[+ ])(enter|return)$/.test(key) || /(?:^|[+ ])(enter|return)$/.test(combo);
70
70
  }
71
71
  function bucketOne(entry, buckets) {
72
+ // 0.1.54 E5 — skip non-interaction log entries (replay-frame markers,
73
+ // pure console_error rollups, runtime-log entries) that share the
74
+ // interaction_log.jsonl file. An "interaction" entry must have a
75
+ // string `action` AND an object `input_args` — anything else is
76
+ // structural noise that historically polluted the "undefined" bucket
77
+ // (Photometry-DB E2E #11 had 150/560 entries hit "undefined").
78
+ if (typeof entry.action !== "string" || entry.action.length === 0)
79
+ return;
80
+ if (entry.input_args === null || typeof entry.input_args !== "object")
81
+ return;
72
82
  const action = (entry.action ?? "").toLowerCase();
73
83
  const args = entry.input_args ?? {};
74
84
  const success = entry.success !== false; // default to true when absent
@@ -343,8 +353,16 @@ export function collectInteractionCoverage(cwd) {
343
353
  const files = readdirSync(logsDir).filter((f) => f === "interaction_log.jsonl" || f.startsWith("interaction_log") && f.endsWith(".jsonl"));
344
354
  for (const f of files) {
345
355
  const entries = parseLogFile(join(logsDir, f));
346
- allEntries.push(...entries);
347
- for (const e of entries)
356
+ // 0.1.54 E5 — gate non-interaction rows BEFORE pushing them into
357
+ // the aggregate. The same predicate as bucketOne so the depth
358
+ // gate's `successful` count and the dev-report breakdown agree
359
+ // on what counts as an interaction.
360
+ const real = entries.filter((e) => typeof e.action === "string" &&
361
+ e.action.length > 0 &&
362
+ e.input_args !== null &&
363
+ typeof e.input_args === "object");
364
+ allEntries.push(...real);
365
+ for (const e of real)
348
366
  bucketOne(e, buckets);
349
367
  }
350
368
  }
@@ -518,12 +536,39 @@ export function evaluateDepth(coverage, minimums, discovery) {
518
536
  shortfalls.push(c);
519
537
  }
520
538
  }
539
+ // 0.1.54 E6 — Failed-interaction-rate warning. When the recording shows
540
+ // more than 10% of attempts failing, append a soft warning to the gate
541
+ // reason and surface the structured `failure_rate_warning` so the gate
542
+ // wrapper can also write a `high_failure_rate` cycle_issue. We
543
+ // intentionally do NOT block the depth gate on this — the depth gate's
544
+ // job is bucket coverage; failed clicks usually mean bad coordinates or
545
+ // unresolved selectors and the right fix is to inspect the modal /
546
+ // re-resolve the element, not to demand more attempts.
547
+ const failed = Math.max(0, coverage.total - coverage.successful);
548
+ const denom = failed + coverage.successful;
549
+ const ratio = denom > 0 ? failed / denom : 0;
550
+ const failureRateWarning = ratio > 0.1 && denom >= 10
551
+ ? {
552
+ failed,
553
+ successful: coverage.successful,
554
+ total: denom,
555
+ ratio,
556
+ message: `WARN: high interaction failure rate (${(ratio * 100).toFixed(1)}% — ${failed} of ${denom} attempts failed). ` +
557
+ `The agent may be clicking coordinates that miss their target; run codeloop_handle_modal to clear any blocking dialog, ` +
558
+ `or re-inspect with codeloop_interact action: "win_ui_inspect" to get a fresh AutomationId before the next attempt. ` +
559
+ `Re-running the same coordinate click is unlikely to start working without first changing what you're targeting.`,
560
+ }
561
+ : undefined;
521
562
  if (shortfalls.length === 0) {
522
563
  const b = coverage.buckets;
564
+ let reason = `Deep interaction coverage met: ${coverage.successful} successful actions (click=${b.click}, navigation=${b.navigation}, input=${b.input}, commit=${b.commit}, toggle=${b.toggle}, gesture=${b.gesture}, upload=${b.upload}).`;
565
+ if (failureRateWarning)
566
+ reason += `\n${failureRateWarning.message}`;
523
567
  return {
524
568
  passed: true,
525
- reason: `Deep interaction coverage met: ${coverage.successful} successful actions (click=${b.click}, navigation=${b.navigation}, input=${b.input}, commit=${b.commit}, toggle=${b.toggle}, gesture=${b.gesture}, upload=${b.upload}).`,
569
+ reason,
526
570
  shortfalls: [],
571
+ failure_rate_warning: failureRateWarning,
527
572
  };
528
573
  }
529
574
  const lines = shortfalls.map((s) => ` - ${s.bucket}: need >= ${s.required}, have ${s.have}`);
@@ -541,10 +586,14 @@ export function evaluateDepth(coverage, minimums, discovery) {
541
586
  `These are invisible to the CRUD classifier (edit_arcs / delete_actions / create signals can't be credited from a bare coordinate click). ` +
542
587
  "When you have to use coordinates because UIA can't resolve the element, ALSO pass an `intent` (e.g. \"confirm delete\"), `description` (\"click Yes on delete confirmation dialog\"), or `purpose` field to codeloop_interact so the classifier still credits the arc.";
543
588
  }
589
+ let failReason = `Deep interaction coverage NOT met. Shortfalls:\n${lines.join("\n")}\n${hint}${coordinateHint}`;
590
+ if (failureRateWarning)
591
+ failReason += `\n${failureRateWarning.message}`;
544
592
  return {
545
593
  passed: false,
546
- reason: `Deep interaction coverage NOT met. Shortfalls:\n${lines.join("\n")}\n${hint}${coordinateHint}`,
594
+ reason: failReason,
547
595
  shortfalls,
596
+ failure_rate_warning: failureRateWarning,
548
597
  };
549
598
  }
550
599
  function buildHint(shortfalls) {