codeloop-mcp-server 0.1.52 → 0.1.54
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth/critical_floors.d.ts.map +1 -1
- package/dist/auth/critical_floors.js +8 -0
- package/dist/auth/critical_floors.js.map +1 -1
- package/dist/auth/update_check.d.ts.map +1 -1
- package/dist/auth/update_check.js +19 -1
- package/dist/auth/update_check.js.map +1 -1
- package/dist/evidence/anti_rationalisation.d.ts.map +1 -1
- package/dist/evidence/anti_rationalisation.js +15 -0
- package/dist/evidence/anti_rationalisation.js.map +1 -1
- package/dist/evidence/binary_freshness.d.ts +21 -0
- package/dist/evidence/binary_freshness.d.ts.map +1 -0
- package/dist/evidence/binary_freshness.js +113 -0
- package/dist/evidence/binary_freshness.js.map +1 -0
- package/dist/evidence/change_coverage.d.ts.map +1 -1
- package/dist/evidence/change_coverage.js +22 -1
- package/dist/evidence/change_coverage.js.map +1 -1
- package/dist/evidence/cycle_issues.d.ts +99 -0
- package/dist/evidence/cycle_issues.d.ts.map +1 -0
- package/dist/evidence/cycle_issues.js +120 -0
- package/dist/evidence/cycle_issues.js.map +1 -0
- package/dist/evidence/evidence_freshness.d.ts +39 -0
- package/dist/evidence/evidence_freshness.d.ts.map +1 -0
- package/dist/evidence/evidence_freshness.js +231 -0
- package/dist/evidence/evidence_freshness.js.map +1 -0
- package/dist/evidence/interaction_coverage.d.ts +15 -0
- package/dist/evidence/interaction_coverage.d.ts.map +1 -1
- package/dist/evidence/interaction_coverage.js +53 -4
- package/dist/evidence/interaction_coverage.js.map +1 -1
- package/dist/evidence/screenshot_diff.d.ts.map +1 -1
- package/dist/evidence/screenshot_diff.js +30 -12
- package/dist/evidence/screenshot_diff.js.map +1 -1
- package/dist/index.js +197 -4
- package/dist/index.js.map +1 -1
- package/dist/runners/modal_close_strategies.d.ts +82 -0
- package/dist/runners/modal_close_strategies.d.ts.map +1 -0
- package/dist/runners/modal_close_strategies.js +226 -0
- package/dist/runners/modal_close_strategies.js.map +1 -0
- package/dist/runners/modal_detector.d.ts +17 -0
- package/dist/runners/modal_detector.d.ts.map +1 -1
- package/dist/runners/modal_detector.js +95 -22
- package/dist/runners/modal_detector.js.map +1 -1
- package/dist/tools/design_compare.d.ts.map +1 -1
- package/dist/tools/design_compare.js +22 -3
- package/dist/tools/design_compare.js.map +1 -1
- package/dist/tools/gate_check.d.ts.map +1 -1
- package/dist/tools/gate_check.js +188 -14
- package/dist/tools/gate_check.js.map +1 -1
- package/package.json +2 -2
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import { promises as fs } from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { getArtifactsBaseDir, getRunDir, listRuns } from "./artifacts.js";
|
|
4
|
+
function findLatestRunDir(workspaceRoot) {
|
|
5
|
+
const base = getArtifactsBaseDir(workspaceRoot);
|
|
6
|
+
const runs = listRuns(base);
|
|
7
|
+
if (runs.length === 0)
|
|
8
|
+
return undefined;
|
|
9
|
+
return getRunDir(runs[0], base);
|
|
10
|
+
}
|
|
11
|
+
const CYCLE_ISSUES_FILE = "cycle_issues.jsonl";
|
|
12
|
+
const CYCLE_RESOLUTIONS_FILE = "cycle_issue_resolutions.jsonl";
|
|
13
|
+
async function resolveLogsDir(workspaceRoot) {
|
|
14
|
+
const runDir = findLatestRunDir(workspaceRoot);
|
|
15
|
+
if (!runDir)
|
|
16
|
+
return undefined;
|
|
17
|
+
const logsDir = path.join(runDir, "logs");
|
|
18
|
+
await fs.mkdir(logsDir, { recursive: true });
|
|
19
|
+
return logsDir;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Append a cycle issue to <run>/logs/cycle_issues.jsonl. Best-effort —
|
|
23
|
+
* if no run directory exists yet (caller fired before
|
|
24
|
+
* codeloop_start_recording set one up), the call is a silent no-op.
|
|
25
|
+
*/
|
|
26
|
+
export async function recordCycleIssue(workspaceRoot, issue) {
|
|
27
|
+
try {
|
|
28
|
+
const logsDir = await resolveLogsDir(workspaceRoot);
|
|
29
|
+
if (!logsDir)
|
|
30
|
+
return;
|
|
31
|
+
const file = path.join(logsDir, CYCLE_ISSUES_FILE);
|
|
32
|
+
const line = JSON.stringify({ ...issue, timestamp: new Date().toISOString() }) + "\n";
|
|
33
|
+
await fs.appendFile(file, line, "utf-8");
|
|
34
|
+
}
|
|
35
|
+
catch {
|
|
36
|
+
// intentional: never let evidence writes throw out of a tool path.
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Append a resolution. Used both by the agent (via gate_check
|
|
41
|
+
* acknowledgment) and implicitly by tools (e.g. kill_modal_window
|
|
42
|
+
* succeeding clears modal_close_failed).
|
|
43
|
+
*/
|
|
44
|
+
export async function recordCycleIssueResolution(workspaceRoot, resolution) {
|
|
45
|
+
try {
|
|
46
|
+
const logsDir = await resolveLogsDir(workspaceRoot);
|
|
47
|
+
if (!logsDir)
|
|
48
|
+
return;
|
|
49
|
+
const file = path.join(logsDir, CYCLE_RESOLUTIONS_FILE);
|
|
50
|
+
const line = JSON.stringify({ ...resolution, timestamp: new Date().toISOString() }) +
|
|
51
|
+
"\n";
|
|
52
|
+
await fs.appendFile(file, line, "utf-8");
|
|
53
|
+
}
|
|
54
|
+
catch {
|
|
55
|
+
/* swallow */
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
async function readJsonl(filePath) {
|
|
59
|
+
try {
|
|
60
|
+
const raw = await fs.readFile(filePath, "utf-8");
|
|
61
|
+
return raw
|
|
62
|
+
.split("\n")
|
|
63
|
+
.map((line) => line.trim())
|
|
64
|
+
.filter((line) => line.length > 0)
|
|
65
|
+
.map((line) => {
|
|
66
|
+
try {
|
|
67
|
+
return JSON.parse(line);
|
|
68
|
+
}
|
|
69
|
+
catch {
|
|
70
|
+
return null;
|
|
71
|
+
}
|
|
72
|
+
})
|
|
73
|
+
.filter((entry) => entry !== null);
|
|
74
|
+
}
|
|
75
|
+
catch {
|
|
76
|
+
return [];
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Load cycle_issues + resolutions from the latest run. Used by the
|
|
81
|
+
* gate_check `cycle_issues_acknowledged` blocker.
|
|
82
|
+
*/
|
|
83
|
+
export async function loadCycleIssues(workspaceRoot) {
|
|
84
|
+
const runDir = findLatestRunDir(workspaceRoot);
|
|
85
|
+
const empty = { issues: [], resolutions: [], unresolved: [] };
|
|
86
|
+
if (!runDir)
|
|
87
|
+
return empty;
|
|
88
|
+
const logsDir = path.join(runDir, "logs");
|
|
89
|
+
const issuesFile = path.join(logsDir, CYCLE_ISSUES_FILE);
|
|
90
|
+
const resolutionsFile = path.join(logsDir, CYCLE_RESOLUTIONS_FILE);
|
|
91
|
+
const [issues, resolutions] = await Promise.all([
|
|
92
|
+
readJsonl(issuesFile),
|
|
93
|
+
readJsonl(resolutionsFile),
|
|
94
|
+
]);
|
|
95
|
+
// Resolution match is by kind. We deliberately stay simple — one
|
|
96
|
+
// resolution of kind X clears all currently-recorded issues of kind X.
|
|
97
|
+
// The gate's directive tells the agent to acknowledge each kind
|
|
98
|
+
// explicitly, which makes per-instance tracking unnecessary.
|
|
99
|
+
const resolvedKinds = new Set(resolutions.map((r) => r.issue_kind));
|
|
100
|
+
const unresolved = issues.filter((i) => !resolvedKinds.has(i.kind));
|
|
101
|
+
return { issues, resolutions, unresolved };
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Render a one-line summary of an issue for the gate-check directive.
|
|
105
|
+
*/
|
|
106
|
+
export function summariseCycleIssue(issue) {
|
|
107
|
+
switch (issue.kind) {
|
|
108
|
+
case "click_missed_target":
|
|
109
|
+
return `click_missed_target — ${issue.description ?? issue.selector ?? `coords ${JSON.stringify(issue.coords ?? null)}`}${issue.modal_kind ? ` (modal_kind=${issue.modal_kind})` : ""}`;
|
|
110
|
+
case "modal_close_failed":
|
|
111
|
+
return `modal_close_failed — kind=${issue.modal_kind}, tried=[${issue.strategies_tried.join(", ")}], hwnd=${issue.hwnd ?? "n/a"}`;
|
|
112
|
+
case "app_restart_during_recording":
|
|
113
|
+
return `app_restart_during_recording — ${issue.reason}`;
|
|
114
|
+
case "binary_mismatch":
|
|
115
|
+
return `binary_mismatch — target=${issue.target_app}, binary mtime=${issue.binary_mtime}, source mtime=${issue.newest_source_mtime} (lag=${issue.lag_seconds}s)`;
|
|
116
|
+
case "high_failure_rate":
|
|
117
|
+
return `high_failure_rate — ${issue.failed}/${issue.total} attempts failed (${(issue.ratio * 100).toFixed(1)}%)`;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
//# sourceMappingURL=cycle_issues.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cycle_issues.js","sourceRoot":"","sources":["../../src/evidence/cycle_issues.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAC;AACzC,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,mBAAmB,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,gBAAgB,CAAC;AAE1E,SAAS,gBAAgB,CAAC,aAAqB;IAC7C,MAAM,IAAI,GAAG,mBAAmB,CAAC,aAAa,CAAC,CAAC;IAChD,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC5B,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IACxC,OAAO,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;AAClC,CAAC;AAmFD,MAAM,iBAAiB,GAAG,oBAAoB,CAAC;AAC/C,MAAM,sBAAsB,GAAG,+BAA+B,CAAC;AAE/D,KAAK,UAAU,cAAc,CAAC,aAAqB;IACjD,MAAM,MAAM,GAAG,gBAAgB,CAAC,aAAa,CAAC,CAAC;IAC/C,IAAI,CAAC,MAAM;QAAE,OAAO,SAAS,CAAC;IAC9B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC1C,MAAM,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC7C,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,aAAqB,EACrB,KAAsB;IAEtB,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,cAAc,CAAC,aAAa,CAAC,CAAC;QACpD,IAAI,CAAC,OAAO;YAAE,OAAO;QACrB,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,iBAAiB,CAAC,CAAC;QACnD,MAAM,IAAI,GACR,IAAI,CAAC,SAAS,CAAC,EAAE,GAAG,KAAK,EAAE,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,CAAC,GAAG,IAAI,CAAC;QAC3E,MAAM,EAAE,CAAC,UAAU,CAAC,IAAI,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;IAC3C,CAAC;IAAC,MAAM,CAAC;QACP,mEAAmE;IACrE,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAC9C,aAAqB,EACrB,UAAmD;IAEnD,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,cAAc,CAAC,aAAa,CAAC,CAAC;QACpD,IAAI,CAAC,OAAO;YAAE,OAAO;QACrB,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,sBAAsB,CAAC,CAAC;QACxD,MAAM,IAAI,GACR,IAAI,CAAC,SAAS,CAAC,EAAE,GAAG,UAAU,EAAE,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,CAAC;YACtE,IAAI,CAAC;QACP,MAAM,EAAE,CAAC,UAAU,CAAC,IAAI,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;IAC3C,CAAC;IAAC,MAAM,CAAC;QACP,aAAa;IACf,CAAC;AACH,CAAC;AAED,KAAK,UAAU,SAAS,CAAI,QAAgB;IAC1C,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QACjD,OAAO,GAAG;aACP,KAAK,CAAC,IAAI,CAAC;aACX,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;aAC1B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;aACjC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;YACZ,IAAI,CAAC;gBACH,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAM,CAAC;YAC/B,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC,CAAC;aACD,MAAM,CAAC,CAAC,KAAK,EAAc,EAAE,CAAC,KAAK,KAAK,IAAI,CAAC,CAAC;IACnD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AASD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,aAAqB;IAErB,MAAM,MAAM,GAAG,gBAAgB,CAAC,aAAa,CAAC,CAAC;IAC/C,MAAM,KAAK,GAAsB,EAAE,MAAM,EAAE,EAAE,EAAE,WAAW,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC;IACjF,IAAI,CAAC,MAAM;QAAE,OAAO,KAAK,CAAC;IAC1B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC1C,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,iBAAiB,CAAC,CAAC;IACzD,MAAM,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,sBAAsB,CAAC,CAAC;IACnE,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QAC9C,SAAS,CAAa,UAAU,CAAC;QACjC,SAAS,CAAuB,eAAe,CAAC;KACjD,CAAC,CAAC;IACH,iEAAiE;IACjE,uEAAuE;IACvE,gEAAgE;IAChE,6DAA6D;IAC7D,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC;IACpE,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IACpE,OAAO,EAAE,MAAM,EAAE,WAAW,EAAE,UAAU,EAAE,CAAC;AAC7C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,KAAiB;IACnD,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;QACnB,KAAK,qBAAqB;YACxB,OAAO,yBAAyB,KAAK,CAAC,WAAW,IAAI,KAAK,CAAC,QAAQ,IAAI,UAAU,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,MAAM,IAAI,IAAI,CAAC,EAAE,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,gBAAgB,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;QAC1L,KAAK,oBAAoB;YACvB,OAAO,6BAA6B,KAAK,CAAC,UAAU,YAAY,KAAK,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,KAAK,CAAC,IAAI,IAAI,KAAK,EAAE,CAAC;QACpI,KAAK,8BAA8B;YACjC,OAAO,kCAAkC,KAAK,CAAC,MAAM,EAAE,CAAC;QAC1D,KAAK,iBAAiB;YACpB,OAAO,4BAA4B,KAAK,CAAC,UAAU,kBAAkB,KAAK,CAAC,YAAY,kBAAkB,KAAK,CAAC,mBAAmB,SAAS,KAAK,CAAC,WAAW,IAAI,CAAC;QACnK,KAAK,mBAAmB;YACtB,OAAO,uBAAuB,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,KAAK,qBAAqB,CAAC,KAAK,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;IACrH,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Walk depth=2 of the project to find the newest source-file mtime. Mirrors
|
|
3
|
+
* verify_staleness.ts intentionally — both signals are coarse and meant to
|
|
4
|
+
* fire on every edit at workspace-root or first-level package directory.
|
|
5
|
+
*/
|
|
6
|
+
export declare function newestSourceMtime(cwd: string): {
|
|
7
|
+
path: string;
|
|
8
|
+
mtimeMs: number;
|
|
9
|
+
} | null;
|
|
10
|
+
/**
|
|
11
|
+
* Best-effort mtime for a run's content. Prefers the latest mtime across
|
|
12
|
+
* the run's `videos/`, `logs/`, `screenshots/`, and `replay_frames/`
|
|
13
|
+
* subdirectories so we don't get tricked when the run dir itself was
|
|
14
|
+
* created early but the actual evidence was written hours later. Falls
|
|
15
|
+
* back to the run dir mtime when no content subdir exists.
|
|
16
|
+
*/
|
|
17
|
+
export declare function runEvidenceMtime(runDir: string): number;
|
|
18
|
+
export interface FreshnessVerdict {
|
|
19
|
+
/** true when the source code changed AFTER the evidence was captured. */
|
|
20
|
+
stale: boolean;
|
|
21
|
+
source_path: string | null;
|
|
22
|
+
source_mtime_iso: string | null;
|
|
23
|
+
evidence_mtime_iso: string | null;
|
|
24
|
+
delta_minutes: number;
|
|
25
|
+
}
|
|
26
|
+
export declare function isEvidenceStale(cwd: string, evidenceMtimeMs: number): FreshnessVerdict;
|
|
27
|
+
/**
|
|
28
|
+
* Convenience helper for callers that already know which run id they're
|
|
29
|
+
* about to credit. Resolves the run dir, computes its evidence mtime,
|
|
30
|
+
* and runs the staleness check in one call.
|
|
31
|
+
*/
|
|
32
|
+
export declare function isRunEvidenceStale(cwd: string, runId: string): FreshnessVerdict;
|
|
33
|
+
/**
|
|
34
|
+
* Format the staleness verdict into a single line suitable for appending
|
|
35
|
+
* to a gate's reason string. Returns an empty string when the verdict is
|
|
36
|
+
* fresh.
|
|
37
|
+
*/
|
|
38
|
+
export declare function formatStalenessSuffix(v: FreshnessVerdict): string;
|
|
39
|
+
//# sourceMappingURL=evidence_freshness.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evidence_freshness.d.ts","sourceRoot":"","sources":["../../src/evidence/evidence_freshness.ts"],"names":[],"mappings":"AAyEA;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAiCvF;AAED;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAyCvD;AAED,MAAM,WAAW,gBAAgB;IAC/B,yEAAyE;IACzE,KAAK,EAAE,OAAO,CAAC;IACf,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,kBAAkB,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,aAAa,EAAE,MAAM,CAAC;CACvB;AAeD,wBAAgB,eAAe,CAC7B,GAAG,EAAE,MAAM,EACX,eAAe,EAAE,MAAM,GACtB,gBAAgB,CA4BlB;AAED;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,gBAAgB,CAI/E;AAED;;;;GAIG;AACH,wBAAgB,qBAAqB,CAAC,CAAC,EAAE,gBAAgB,GAAG,MAAM,CAOjE"}
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
import { existsSync, readdirSync, statSync } from "fs";
|
|
2
|
+
import { join } from "path";
|
|
3
|
+
import { getArtifactsBaseDir, getRunDir } from "./artifacts.js";
|
|
4
|
+
/**
|
|
5
|
+
* 0.1.53 D2 — Evidence freshness.
|
|
6
|
+
*
|
|
7
|
+
* The Photometry-DB E2E #10 transcript shipped at 100% confidence with all
|
|
8
|
+
* 11 gates green even though the agent NEVER re-recorded after adding the
|
|
9
|
+
* Product Name / Product Code column to the Luminaire export pipeline. The
|
|
10
|
+
* gate accepted:
|
|
11
|
+
*
|
|
12
|
+
* - video_evidence: "1 video(s) recorded in sibling run …_s6nzg8
|
|
13
|
+
* (28 min before this gate run)"
|
|
14
|
+
* - interaction_depth_evidence: 440 successful actions (from sibling run)
|
|
15
|
+
* - user_journey_evidence: full_arcs=1 (from sibling run)
|
|
16
|
+
*
|
|
17
|
+
* — all mined from a recording that pre-dated the source-file changes by
|
|
18
|
+
* ~30 minutes. The 0.1.51 cross-run fallback (`MAX_SIBLING_AGE_MS = 1 hour`
|
|
19
|
+
* relative to NOW) was meant to keep the gate from blowing up when an agent
|
|
20
|
+
* starts the recording in a separate run from the verify, but it has no
|
|
21
|
+
* concept of "the source code changed between the recording and the gate".
|
|
22
|
+
*
|
|
23
|
+
* D2 closes that gap by anchoring evidence to source-file mtime, NOT to
|
|
24
|
+
* wall-clock now:
|
|
25
|
+
*
|
|
26
|
+
* - Walk source files (depth-2, same SKIP_DIRS as H2) for the newest
|
|
27
|
+
* mtime.
|
|
28
|
+
* - For every sibling run we'd otherwise credit (video, interaction
|
|
29
|
+
* log, replay frames), require the run's directory mtime to be
|
|
30
|
+
* >= newest source-file mtime, otherwise the evidence is STALE and
|
|
31
|
+
* the gate must reject it.
|
|
32
|
+
* - The gate's reason string surfaces both timestamps so the agent
|
|
33
|
+
* can tell at a glance "my recording is from 2:14 PM but I edited
|
|
34
|
+
* PhotometricDataViewModel.cs at 2:42 PM — I need to re-record".
|
|
35
|
+
*/
|
|
36
|
+
const SKIP_DIRS = new Set([
|
|
37
|
+
"node_modules",
|
|
38
|
+
".git",
|
|
39
|
+
".vs",
|
|
40
|
+
".idea",
|
|
41
|
+
".vscode",
|
|
42
|
+
".codeloop",
|
|
43
|
+
"artifacts",
|
|
44
|
+
"dist",
|
|
45
|
+
"build",
|
|
46
|
+
"out",
|
|
47
|
+
"bin",
|
|
48
|
+
"obj",
|
|
49
|
+
"Pods",
|
|
50
|
+
".next",
|
|
51
|
+
".turbo",
|
|
52
|
+
".cache",
|
|
53
|
+
".gradle",
|
|
54
|
+
"DerivedData",
|
|
55
|
+
"__pycache__",
|
|
56
|
+
"target",
|
|
57
|
+
".venv",
|
|
58
|
+
"venv",
|
|
59
|
+
]);
|
|
60
|
+
const SKIP_FILE_PATTERNS = [
|
|
61
|
+
/^\.DS_Store$/,
|
|
62
|
+
/^Thumbs\.db$/,
|
|
63
|
+
/^package-lock\.json$/,
|
|
64
|
+
/^pnpm-lock\.yaml$/,
|
|
65
|
+
/^yarn\.lock$/,
|
|
66
|
+
/^poetry\.lock$/,
|
|
67
|
+
/^Cargo\.lock$/,
|
|
68
|
+
/\.log$/,
|
|
69
|
+
];
|
|
70
|
+
/**
|
|
71
|
+
* Walk depth=2 of the project to find the newest source-file mtime. Mirrors
|
|
72
|
+
* verify_staleness.ts intentionally — both signals are coarse and meant to
|
|
73
|
+
* fire on every edit at workspace-root or first-level package directory.
|
|
74
|
+
*/
|
|
75
|
+
export function newestSourceMtime(cwd) {
|
|
76
|
+
let newest = null;
|
|
77
|
+
function visit(dir, depth) {
|
|
78
|
+
if (depth > 2)
|
|
79
|
+
return;
|
|
80
|
+
let entries;
|
|
81
|
+
try {
|
|
82
|
+
entries = readdirSync(dir, { withFileTypes: true });
|
|
83
|
+
}
|
|
84
|
+
catch {
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
for (const ent of entries) {
|
|
88
|
+
const name = ent.name;
|
|
89
|
+
if (SKIP_DIRS.has(name))
|
|
90
|
+
continue;
|
|
91
|
+
const p = join(dir, name);
|
|
92
|
+
if (ent.isDirectory()) {
|
|
93
|
+
visit(p, depth + 1);
|
|
94
|
+
}
|
|
95
|
+
else if (ent.isFile()) {
|
|
96
|
+
if (SKIP_FILE_PATTERNS.some((re) => re.test(name)))
|
|
97
|
+
continue;
|
|
98
|
+
try {
|
|
99
|
+
const ms = statSync(p).mtimeMs;
|
|
100
|
+
if (!newest || ms > newest.mtimeMs) {
|
|
101
|
+
newest = { path: p, mtimeMs: ms };
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
catch {
|
|
105
|
+
/* skip unreadable */
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
if (existsSync(cwd))
|
|
111
|
+
visit(cwd, 0);
|
|
112
|
+
return newest;
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Best-effort mtime for a run's content. Prefers the latest mtime across
|
|
116
|
+
* the run's `videos/`, `logs/`, `screenshots/`, and `replay_frames/`
|
|
117
|
+
* subdirectories so we don't get tricked when the run dir itself was
|
|
118
|
+
* created early but the actual evidence was written hours later. Falls
|
|
119
|
+
* back to the run dir mtime when no content subdir exists.
|
|
120
|
+
*/
|
|
121
|
+
export function runEvidenceMtime(runDir) {
|
|
122
|
+
if (!existsSync(runDir))
|
|
123
|
+
return 0;
|
|
124
|
+
let max = 0;
|
|
125
|
+
const candidates = ["videos", "logs", "screenshots", "replay_frames"];
|
|
126
|
+
for (const sub of candidates) {
|
|
127
|
+
const p = join(runDir, sub);
|
|
128
|
+
if (!existsSync(p))
|
|
129
|
+
continue;
|
|
130
|
+
try {
|
|
131
|
+
const ms = statSync(p).mtimeMs;
|
|
132
|
+
if (ms > max)
|
|
133
|
+
max = ms;
|
|
134
|
+
}
|
|
135
|
+
catch {
|
|
136
|
+
/* ignore */
|
|
137
|
+
}
|
|
138
|
+
// Walk one level into the subdir so we pick up the actual file
|
|
139
|
+
// mtimes — Windows doesn't always update the directory mtime when
|
|
140
|
+
// a single file inside it is rewritten.
|
|
141
|
+
try {
|
|
142
|
+
const entries = readdirSync(p);
|
|
143
|
+
for (const e of entries) {
|
|
144
|
+
try {
|
|
145
|
+
const ms = statSync(join(p, e)).mtimeMs;
|
|
146
|
+
if (ms > max)
|
|
147
|
+
max = ms;
|
|
148
|
+
}
|
|
149
|
+
catch {
|
|
150
|
+
/* ignore */
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
catch {
|
|
155
|
+
/* ignore */
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
if (max === 0) {
|
|
159
|
+
try {
|
|
160
|
+
max = statSync(runDir).mtimeMs;
|
|
161
|
+
}
|
|
162
|
+
catch {
|
|
163
|
+
/* ignore */
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return max;
|
|
167
|
+
}
|
|
168
|
+
/**
|
|
169
|
+
* Compare evidence-run mtime against newest source-file mtime. The verdict
|
|
170
|
+
* is "stale" when the source changed at least 60 seconds AFTER the
|
|
171
|
+
* evidence was captured — the 60-second buffer is to absorb clock drift
|
|
172
|
+
* between Cursor's file-watcher and the recording subprocess on Windows.
|
|
173
|
+
*
|
|
174
|
+
* `evidenceMtimeMs === 0` means we couldn't find any evidence on disk;
|
|
175
|
+
* we treat that as `stale: false` because the parent gate already has a
|
|
176
|
+
* "no evidence found" failure mode and we don't want two gates failing
|
|
177
|
+
* with overlapping reasons.
|
|
178
|
+
*/
|
|
179
|
+
const STALE_BUFFER_MS = 60_000;
|
|
180
|
+
export function isEvidenceStale(cwd, evidenceMtimeMs) {
|
|
181
|
+
if (evidenceMtimeMs === 0) {
|
|
182
|
+
return {
|
|
183
|
+
stale: false,
|
|
184
|
+
source_path: null,
|
|
185
|
+
source_mtime_iso: null,
|
|
186
|
+
evidence_mtime_iso: null,
|
|
187
|
+
delta_minutes: 0,
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
const newest = newestSourceMtime(cwd);
|
|
191
|
+
if (!newest) {
|
|
192
|
+
return {
|
|
193
|
+
stale: false,
|
|
194
|
+
source_path: null,
|
|
195
|
+
source_mtime_iso: null,
|
|
196
|
+
evidence_mtime_iso: new Date(evidenceMtimeMs).toISOString(),
|
|
197
|
+
delta_minutes: 0,
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
const stale = newest.mtimeMs > evidenceMtimeMs + STALE_BUFFER_MS;
|
|
201
|
+
return {
|
|
202
|
+
stale,
|
|
203
|
+
source_path: newest.path,
|
|
204
|
+
source_mtime_iso: new Date(newest.mtimeMs).toISOString(),
|
|
205
|
+
evidence_mtime_iso: new Date(evidenceMtimeMs).toISOString(),
|
|
206
|
+
delta_minutes: Math.round((newest.mtimeMs - evidenceMtimeMs) / 60_000),
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* Convenience helper for callers that already know which run id they're
|
|
211
|
+
* about to credit. Resolves the run dir, computes its evidence mtime,
|
|
212
|
+
* and runs the staleness check in one call.
|
|
213
|
+
*/
|
|
214
|
+
export function isRunEvidenceStale(cwd, runId) {
|
|
215
|
+
const baseDir = getArtifactsBaseDir(cwd);
|
|
216
|
+
const runDir = getRunDir(runId, baseDir);
|
|
217
|
+
return isEvidenceStale(cwd, runEvidenceMtime(runDir));
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Format the staleness verdict into a single line suitable for appending
|
|
221
|
+
* to a gate's reason string. Returns an empty string when the verdict is
|
|
222
|
+
* fresh.
|
|
223
|
+
*/
|
|
224
|
+
export function formatStalenessSuffix(v) {
|
|
225
|
+
if (!v.stale)
|
|
226
|
+
return "";
|
|
227
|
+
const newer = v.source_path ? ` (newest source: ${v.source_path})` : "";
|
|
228
|
+
return (` STALE: source code changed ${v.delta_minutes} min after the evidence ` +
|
|
229
|
+
`was captured${newer}. Re-record AFTER the latest edit before re-gating.`);
|
|
230
|
+
}
|
|
231
|
+
//# sourceMappingURL=evidence_freshness.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evidence_freshness.js","sourceRoot":"","sources":["../../src/evidence/evidence_freshness.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AACvD,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,mBAAmB,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAEhE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AAEH,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;IACxB,cAAc;IACd,MAAM;IACN,KAAK;IACL,OAAO;IACP,SAAS;IACT,WAAW;IACX,WAAW;IACX,MAAM;IACN,OAAO;IACP,KAAK;IACL,KAAK;IACL,KAAK;IACL,MAAM;IACN,OAAO;IACP,QAAQ;IACR,QAAQ;IACR,SAAS;IACT,aAAa;IACb,aAAa;IACb,QAAQ;IACR,OAAO;IACP,MAAM;CACP,CAAC,CAAC;AAEH,MAAM,kBAAkB,GAAG;IACzB,cAAc;IACd,cAAc;IACd,sBAAsB;IACtB,mBAAmB;IACnB,cAAc;IACd,gBAAgB;IAChB,eAAe;IACf,QAAQ;CACT,CAAC;AAEF;;;;GAIG;AACH,MAAM,UAAU,iBAAiB,CAAC,GAAW;IAC3C,IAAI,MAAM,GAA6C,IAAI,CAAC;IAE5D,SAAS,KAAK,CAAC,GAAW,EAAE,KAAa;QACvC,IAAI,KAAK,GAAG,CAAC;YAAE,OAAO;QACtB,IAAI,OAAmF,CAAC;QACxF,IAAI,CAAC;YACH,OAAO,GAAG,WAAW,CAAC,GAAG,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAA8B,CAAC;QACnF,CAAC;QAAC,MAAM,CAAC;YACP,OAAO;QACT,CAAC;QACD,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;YAC1B,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;YACtB,IAAI,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC;gBAAE,SAAS;YAClC,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;YAC1B,IAAI,GAAG,CAAC,WAAW,EAAE,EAAE,CAAC;gBACtB,KAAK,CAAC,CAAC,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;YACtB,CAAC;iBAAM,IAAI,GAAG,CAAC,MAAM,EAAE,EAAE,CAAC;gBACxB,IAAI,kBAAkB,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBAAE,SAAS;gBAC7D,IAAI,CAAC;oBACH,MAAM,EAAE,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;oBAC/B,IAAI,CAAC,MAAM,IAAI,EAAE,GAAG,MAAM,CAAC,OAAO,EAAE,CAAC;wBACnC,MAAM,GAAG,EAAE,IAAI,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;oBACpC,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,qBAAqB;gBACvB,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,UAAU,CAAC,GAAG,CAAC;QAAE,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;IACnC,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,gBAAgB,CAAC,MAAc;IAC7C,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC;QAAE,OAAO,CAAC,CAAC;IAElC,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,MAAM,UAAU,GAAG,CAAC,QAAQ,EAAE,MAAM,EAAE,aAAa,EAAE,eAAe,CAAC,CAAC;IACtE,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;QAC7B,MAAM,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC5B,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;YAAE,SAAS;QAC7B,IAAI,CAAC;YACH,MAAM,EAAE,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;YAC/B,IAAI,EAAE,GAAG,GAAG;gBAAE,GAAG,GAAG,EAAE,CAAC;QACzB,CAAC;QAAC,MAAM,CAAC;YACP,YAAY;QACd,CAAC;QACD,+DAA+D;QAC/D,kEAAkE;QAClE,wCAAwC;QACxC,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;YAC/B,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;gBACxB,IAAI,CAAC;oBACH,MAAM,EAAE,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;oBACxC,IAAI,EAAE,GAAG,GAAG;wBAAE,GAAG,GAAG,EAAE,CAAC;gBACzB,CAAC;gBAAC,MAAM,CAAC;oBACP,YAAY;gBACd,CAAC;YACH,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,YAAY;QACd,CAAC;IACH,CAAC;IAED,IAAI,GAAG,KAAK,CAAC,EAAE,CAAC;QACd,IAAI,CAAC;YACH,GAAG,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC;QACjC,CAAC;QAAC,MAAM,CAAC;YACP,YAAY;QACd,CAAC;IACH,CAAC;IAED,OAAO,GAAG,CAAC;AACb,CAAC;AAWD;;;;;;;;;;GAUG;AACH,MAAM,eAAe,GAAG,MAAM,CAAC;AAE/B,MAAM,UAAU,eAAe,CAC7B,GAAW,EACX,eAAuB;IAEvB,IAAI,eAAe,KAAK,CAAC,EAAE,CAAC;QAC1B,OAAO;YACL,KAAK,EAAE,KAAK;YACZ,WAAW,EAAE,IAAI;YACjB,gBAAgB,EAAE,IAAI;YACtB,kBAAkB,EAAE,IAAI;YACxB,aAAa,EAAE,CAAC;SACjB,CAAC;IACJ,CAAC;IACD,MAAM,MAAM,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC;IACtC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO;YACL,KAAK,EAAE,KAAK;YACZ,WAAW,EAAE,IAAI;YACjB,gBAAgB,EAAE,IAAI;YACtB,kBAAkB,EAAE,IAAI,IAAI,CAAC,eAAe,CAAC,CAAC,WAAW,EAAE;YAC3D,aAAa,EAAE,CAAC;SACjB,CAAC;IACJ,CAAC;IACD,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,GAAG,eAAe,GAAG,eAAe,CAAC;IACjE,OAAO;QACL,KAAK;QACL,WAAW,EAAE,MAAM,CAAC,IAAI;QACxB,gBAAgB,EAAE,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,WAAW,EAAE;QACxD,kBAAkB,EAAE,IAAI,IAAI,CAAC,eAAe,CAAC,CAAC,WAAW,EAAE;QAC3D,aAAa,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,GAAG,eAAe,CAAC,GAAG,MAAM,CAAC;KACvE,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,kBAAkB,CAAC,GAAW,EAAE,KAAa;IAC3D,MAAM,OAAO,GAAG,mBAAmB,CAAC,GAAG,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,SAAS,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IACzC,OAAO,eAAe,CAAC,GAAG,EAAE,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC;AACxD,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,qBAAqB,CAAC,CAAmB;IACvD,IAAI,CAAC,CAAC,CAAC,KAAK;QAAE,OAAO,EAAE,CAAC;IACxB,MAAM,KAAK,GAAG,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IACxE,OAAO,CACL,+BAA+B,CAAC,CAAC,aAAa,0BAA0B;QACxE,eAAe,KAAK,qDAAqD,CAC1E,CAAC;AACJ,CAAC"}
|
|
@@ -108,6 +108,21 @@ export interface DepthVerdict {
|
|
|
108
108
|
required: number;
|
|
109
109
|
have: number;
|
|
110
110
|
}>;
|
|
111
|
+
/**
|
|
112
|
+
* 0.1.54 E6 — Soft warning surfaced alongside the depth verdict when
|
|
113
|
+
* more than 10% of attempted interactions failed. The depth gate itself
|
|
114
|
+
* still passes if successful counts meet minimums; the warning gives
|
|
115
|
+
* the agent signal that "successful clicks aren't landing where you
|
|
116
|
+
* think they are" so they can fix bad-coordinate / unresolved-selector
|
|
117
|
+
* loops instead of repeatedly hammering the same dead spot.
|
|
118
|
+
*/
|
|
119
|
+
failure_rate_warning?: {
|
|
120
|
+
failed: number;
|
|
121
|
+
successful: number;
|
|
122
|
+
total: number;
|
|
123
|
+
ratio: number;
|
|
124
|
+
message: string;
|
|
125
|
+
};
|
|
111
126
|
}
|
|
112
127
|
/**
|
|
113
128
|
* Compare the observed coverage against the project's minimums. Returns
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"interaction_coverage.d.ts","sourceRoot":"","sources":["../../src/evidence/interaction_coverage.ts"],"names":[],"mappings":"AAIA;;;;;;;;;;;;;;;GAeG;AACH,MAAM,WAAW,kBAAkB;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,mBAAmB;IAClC,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,kBAAkB,CAAC;IAC5B,cAAc,EAAE,MAAM,CAAC;IACvB,wFAAwF;IACxF,aAAa,EAAE,MAAM,CAAC;IACtB;;;;;;;OAOG;IACH,mBAAmB,EAAE,MAAM,CAAC;IAC5B,cAAc,EAAE,MAAM,CAAC;IACvB;;;;;;;;;;;;;OAaG;IACH,gCAAgC,EAAE,MAAM,CAAC;CAC1C;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,OAAO,CAAC;IACjB,gBAAgB,EAAE,MAAM,CAAC;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,kBAAkB,EAAE;QAClB,cAAc,EAAE,MAAM,CAAC;QACvB,UAAU,EAAE,MAAM,CAAC;QACnB,qBAAqB,EAAE,MAAM,CAAC;QAC9B,cAAc,EAAE,MAAM,CAAC;KACxB,CAAC;CACH;AAED,eAAO,MAAM,sBAAsB,EAAE,aAgBpC,CAAC;
|
|
1
|
+
{"version":3,"file":"interaction_coverage.d.ts","sourceRoot":"","sources":["../../src/evidence/interaction_coverage.ts"],"names":[],"mappings":"AAIA;;;;;;;;;;;;;;;GAeG;AACH,MAAM,WAAW,kBAAkB;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,mBAAmB;IAClC,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,kBAAkB,CAAC;IAC5B,cAAc,EAAE,MAAM,CAAC;IACvB,wFAAwF;IACxF,aAAa,EAAE,MAAM,CAAC;IACtB;;;;;;;OAOG;IACH,mBAAmB,EAAE,MAAM,CAAC;IAC5B,cAAc,EAAE,MAAM,CAAC;IACvB;;;;;;;;;;;;;OAaG;IACH,gCAAgC,EAAE,MAAM,CAAC;CAC1C;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,OAAO,CAAC;IACjB,gBAAgB,EAAE,MAAM,CAAC;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,kBAAkB,EAAE;QAClB,cAAc,EAAE,MAAM,CAAC;QACvB,UAAU,EAAE,MAAM,CAAC;QACnB,qBAAqB,EAAE,MAAM,CAAC;QAC9B,cAAc,EAAE,MAAM,CAAC;KACxB,CAAC;CACH;AAED,eAAO,MAAM,sBAAsB,EAAE,aAgBpC,CAAC;AAuUF;;;GAGG;AACH,wBAAgB,0BAA0B,CAAC,GAAG,EAAE,MAAM,GAAG,mBAAmB,CA2G3E;AA0CD,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,oBAAoB,EAAE,OAAO,CAAC;IAC9B;;;;;;;;;OASG;IACH,uBAAuB,CAAC,EAAE,OAAO,CAAC;CACnC;AAED,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,KAAK,CAAC;QAChB,MAAM,EACF,MAAM,kBAAkB,GACxB,kBAAkB,GAClB,gBAAgB,GAChB,YAAY,GACZ,uBAAuB,GACvB,gBAAgB,CAAC;QACrB,QAAQ,EAAE,MAAM,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC;KACd,CAAC,CAAC;IACH;;;;;;;OAOG;IACH,oBAAoB,CAAC,EAAE;QACrB,MAAM,EAAE,MAAM,CAAC;QACf,UAAU,EAAE,MAAM,CAAC;QACnB,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,OAAO,EAAE,MAAM,CAAC;KACjB,CAAC;CACH;AAED;;;;GAIG;AACH,wBAAgB,aAAa,CAC3B,QAAQ,EAAE,mBAAmB,EAC7B,QAAQ,EAAE,aAAa,EACvB,SAAS,CAAC,EAAE,iBAAiB,GAC5B,YAAY,CAmId;AAgED;;;GAGG;AACH;;;;GAIG;AACH,MAAM,MAAM,qBAAqB,GAAG;IAClC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,kBAAkB,CAAC,EAAE;QACnB,cAAc,CAAC,EAAE,MAAM,CAAC;QACxB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,qBAAqB,CAAC,EAAE,MAAM,CAAC;QAC/B,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,CAAC;CACH,CAAC;AAEF,wBAAgB,oBAAoB,CAClC,QAAQ,EAAE,qBAAqB,GAAG,SAAS,GAC1C,aAAa,CAgCf;AA2BD,MAAM,WAAW,kBAAkB;IACjC,8FAA8F;IAC9F,SAAS,EAAE,MAAM,CAAC;IAClB,sDAAsD;IACtD,YAAY,EAAE,MAAM,CAAC;IACrB,uEAAuE;IACvE,gBAAgB,EAAE,MAAM,CAAC;IACzB,sEAAsE;IACtE,cAAc,EAAE,MAAM,CAAC;IACvB,uGAAuG;IACvG,SAAS,EAAE,MAAM,CAAC;CACnB;AAyID;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,kBAAkB,CAiHjE;AAED,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE;QACR,SAAS,EAAE,MAAM,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,cAAc,EAAE,MAAM,CAAC;KACxB,CAAC;IACF,IAAI,EAAE,kBAAkB,CAAC;CAC1B;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,kBAAkB,EACxB,SAAS,EAAE;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,cAAc,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,OAAO,CAAC;IAAC,QAAQ,EAAE,OAAO,CAAA;CAAE,GAAG,SAAS,EACzG,QAAQ,GAAE;IAAE,SAAS,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAC;IAAC,cAAc,EAAE,MAAM,CAAA;CAAsD,GAC7H,cAAc,CAsChB"}
|
|
@@ -69,6 +69,16 @@ function isCommitKey(args) {
|
|
|
69
69
|
return /(?:^|[+ ])(enter|return)$/.test(key) || /(?:^|[+ ])(enter|return)$/.test(combo);
|
|
70
70
|
}
|
|
71
71
|
function bucketOne(entry, buckets) {
|
|
72
|
+
// 0.1.54 E5 — skip non-interaction log entries (replay-frame markers,
|
|
73
|
+
// pure console_error rollups, runtime-log entries) that share the
|
|
74
|
+
// interaction_log.jsonl file. An "interaction" entry must have a
|
|
75
|
+
// string `action` AND an object `input_args` — anything else is
|
|
76
|
+
// structural noise that historically polluted the "undefined" bucket
|
|
77
|
+
// (Photometry-DB E2E #11 had 150/560 entries hit "undefined").
|
|
78
|
+
if (typeof entry.action !== "string" || entry.action.length === 0)
|
|
79
|
+
return;
|
|
80
|
+
if (entry.input_args === null || typeof entry.input_args !== "object")
|
|
81
|
+
return;
|
|
72
82
|
const action = (entry.action ?? "").toLowerCase();
|
|
73
83
|
const args = entry.input_args ?? {};
|
|
74
84
|
const success = entry.success !== false; // default to true when absent
|
|
@@ -343,8 +353,16 @@ export function collectInteractionCoverage(cwd) {
|
|
|
343
353
|
const files = readdirSync(logsDir).filter((f) => f === "interaction_log.jsonl" || f.startsWith("interaction_log") && f.endsWith(".jsonl"));
|
|
344
354
|
for (const f of files) {
|
|
345
355
|
const entries = parseLogFile(join(logsDir, f));
|
|
346
|
-
|
|
347
|
-
|
|
356
|
+
// 0.1.54 E5 — gate non-interaction rows BEFORE pushing them into
|
|
357
|
+
// the aggregate. The same predicate as bucketOne so the depth
|
|
358
|
+
// gate's `successful` count and the dev-report breakdown agree
|
|
359
|
+
// on what counts as an interaction.
|
|
360
|
+
const real = entries.filter((e) => typeof e.action === "string" &&
|
|
361
|
+
e.action.length > 0 &&
|
|
362
|
+
e.input_args !== null &&
|
|
363
|
+
typeof e.input_args === "object");
|
|
364
|
+
allEntries.push(...real);
|
|
365
|
+
for (const e of real)
|
|
348
366
|
bucketOne(e, buckets);
|
|
349
367
|
}
|
|
350
368
|
}
|
|
@@ -518,12 +536,39 @@ export function evaluateDepth(coverage, minimums, discovery) {
|
|
|
518
536
|
shortfalls.push(c);
|
|
519
537
|
}
|
|
520
538
|
}
|
|
539
|
+
// 0.1.54 E6 — Failed-interaction-rate warning. When the recording shows
|
|
540
|
+
// more than 10% of attempts failing, append a soft warning to the gate
|
|
541
|
+
// reason and surface the structured `failure_rate_warning` so the gate
|
|
542
|
+
// wrapper can also write a `high_failure_rate` cycle_issue. We
|
|
543
|
+
// intentionally do NOT block the depth gate on this — the depth gate's
|
|
544
|
+
// job is bucket coverage; failed clicks usually mean bad coordinates or
|
|
545
|
+
// unresolved selectors and the right fix is to inspect the modal /
|
|
546
|
+
// re-resolve the element, not to demand more attempts.
|
|
547
|
+
const failed = Math.max(0, coverage.total - coverage.successful);
|
|
548
|
+
const denom = failed + coverage.successful;
|
|
549
|
+
const ratio = denom > 0 ? failed / denom : 0;
|
|
550
|
+
const failureRateWarning = ratio > 0.1 && denom >= 10
|
|
551
|
+
? {
|
|
552
|
+
failed,
|
|
553
|
+
successful: coverage.successful,
|
|
554
|
+
total: denom,
|
|
555
|
+
ratio,
|
|
556
|
+
message: `WARN: high interaction failure rate (${(ratio * 100).toFixed(1)}% — ${failed} of ${denom} attempts failed). ` +
|
|
557
|
+
`The agent may be clicking coordinates that miss their target; run codeloop_handle_modal to clear any blocking dialog, ` +
|
|
558
|
+
`or re-inspect with codeloop_interact action: "win_ui_inspect" to get a fresh AutomationId before the next attempt. ` +
|
|
559
|
+
`Re-running the same coordinate click is unlikely to start working without first changing what you're targeting.`,
|
|
560
|
+
}
|
|
561
|
+
: undefined;
|
|
521
562
|
if (shortfalls.length === 0) {
|
|
522
563
|
const b = coverage.buckets;
|
|
564
|
+
let reason = `Deep interaction coverage met: ${coverage.successful} successful actions (click=${b.click}, navigation=${b.navigation}, input=${b.input}, commit=${b.commit}, toggle=${b.toggle}, gesture=${b.gesture}, upload=${b.upload}).`;
|
|
565
|
+
if (failureRateWarning)
|
|
566
|
+
reason += `\n${failureRateWarning.message}`;
|
|
523
567
|
return {
|
|
524
568
|
passed: true,
|
|
525
|
-
reason
|
|
569
|
+
reason,
|
|
526
570
|
shortfalls: [],
|
|
571
|
+
failure_rate_warning: failureRateWarning,
|
|
527
572
|
};
|
|
528
573
|
}
|
|
529
574
|
const lines = shortfalls.map((s) => ` - ${s.bucket}: need >= ${s.required}, have ${s.have}`);
|
|
@@ -541,10 +586,14 @@ export function evaluateDepth(coverage, minimums, discovery) {
|
|
|
541
586
|
`These are invisible to the CRUD classifier (edit_arcs / delete_actions / create signals can't be credited from a bare coordinate click). ` +
|
|
542
587
|
"When you have to use coordinates because UIA can't resolve the element, ALSO pass an `intent` (e.g. \"confirm delete\"), `description` (\"click Yes on delete confirmation dialog\"), or `purpose` field to codeloop_interact so the classifier still credits the arc.";
|
|
543
588
|
}
|
|
589
|
+
let failReason = `Deep interaction coverage NOT met. Shortfalls:\n${lines.join("\n")}\n${hint}${coordinateHint}`;
|
|
590
|
+
if (failureRateWarning)
|
|
591
|
+
failReason += `\n${failureRateWarning.message}`;
|
|
544
592
|
return {
|
|
545
593
|
passed: false,
|
|
546
|
-
reason:
|
|
594
|
+
reason: failReason,
|
|
547
595
|
shortfalls,
|
|
596
|
+
failure_rate_warning: failureRateWarning,
|
|
548
597
|
};
|
|
549
598
|
}
|
|
550
599
|
function buildHint(shortfalls) {
|