screenhand 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mcp-desktop.js +183 -0
- package/dist/src/context-tracker.js +26 -0
- package/dist/src/perception/coordinator.js +32 -0
- package/dist/src/state/app-map.js +206 -0
- package/dist/src/state/visual-mapper.js +325 -0
- package/dist/src/state/world-model.js +30 -1
- package/dist-app-maps/com.apple.Notes.json +2328 -2201
- package/dist-app-maps/com.apple.Terminal.json +331 -343
- package/dist-app-maps/com.apple.iCal.json +3 -3
- package/dist-app-maps/com.apple.mail.json +3 -3
- package/dist-app-maps/com.apple.reminders.json +2 -2
- package/dist-app-maps/net.whatsapp.WhatsApp.json +27 -27
- package/dist-references/notes.json +53 -16
- package/package.json +1 -1
package/dist/mcp-desktop.js
CHANGED
|
@@ -72,6 +72,7 @@ import { MenuScanner } from "./src/ingestion/menu-scanner.js";
|
|
|
72
72
|
import { DocParser } from "./src/ingestion/doc-parser.js";
|
|
73
73
|
import { TutorialExtractor } from "./src/ingestion/tutorial-extractor.js";
|
|
74
74
|
import { extractFeaturesFromHTML } from "./src/ingestion/feature-extractor.js";
|
|
75
|
+
import { quickScan, llmEnrich, buildVisualMeta, isSensitiveApp } from "./src/state/visual-mapper.js";
|
|
75
76
|
import { CoverageAuditor } from "./src/ingestion/coverage-auditor.js";
|
|
76
77
|
import { ReferenceMerger } from "./src/ingestion/reference-merger.js";
|
|
77
78
|
import { PlaybookPublisher } from "./src/community/publisher.js";
|
|
@@ -6882,6 +6883,188 @@ server.tool("discover_features", "Extract features from an app's official websit
|
|
|
6882
6883
|
_playbookStoreForContext.reload();
|
|
6883
6884
|
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
6884
6885
|
});
|
|
6886
|
+
// ── Visual App Mapping (Phase 3) ─────────────────────────────────
|
|
6887
|
+
server.tool("map_app", "Visually map an app's UI by taking a screenshot, running OCR to identify interactive elements and zones with coordinates. Makes subsequent tool calls faster and more accurate. Runs quick scan (~500ms) inline, optional LLM enrichment in background if ANTHROPIC_API_KEY is set.", {
|
|
6888
|
+
bundleId: z.string().describe("macOS bundle ID (e.g. com.apple.Notes)"),
|
|
6889
|
+
appName: z.string().describe("Human-readable app name"),
|
|
6890
|
+
force: z.boolean().optional().describe("Re-map even if a recent map exists (default: false)"),
|
|
6891
|
+
depth: z.enum(["quick", "full"]).optional().describe("'quick' = OCR only (~500ms). 'full' = OCR + LLM enrichment (~15s). Default: quick"),
|
|
6892
|
+
}, async ({ bundleId, appName, force, depth }) => {
|
|
6893
|
+
if (isSensitiveApp(bundleId)) {
|
|
6894
|
+
return { content: [{ type: "text", text: `Blocked: ${bundleId} is a sensitive app (password manager, banking, etc.). Visual mapping is not allowed for privacy reasons.` }] };
|
|
6895
|
+
}
|
|
6896
|
+
// Check if already mapped (unless force)
|
|
6897
|
+
if (!force) {
|
|
6898
|
+
const existingMeta = appMap.getVisualMeta(bundleId);
|
|
6899
|
+
if (existingMeta && !appMap.isVisualMapStale(bundleId)) {
|
|
6900
|
+
return { content: [{ type: "text", text: `Visual map for ${appName} already exists (${existingMeta.screensMapped.length} screens, confidence: ${existingMeta.confidence.toFixed(2)}). Use force: true to re-map.` }] };
|
|
6901
|
+
}
|
|
6902
|
+
}
|
|
6903
|
+
await ensureBridge();
|
|
6904
|
+
// Get focused app PID
|
|
6905
|
+
const apps = await bridge.call("app.list", {});
|
|
6906
|
+
const matchedApp = apps?.find((a) => a.bundleId === bundleId);
|
|
6907
|
+
if (!matchedApp?.pid) {
|
|
6908
|
+
return { content: [{ type: "text", text: `App ${bundleId} is not running. Launch it first with focus("${bundleId}") or launch("${bundleId}").` }] };
|
|
6909
|
+
}
|
|
6910
|
+
const pid = matchedApp.pid;
|
|
6911
|
+
// Get window bounds
|
|
6912
|
+
let windowTitle = "";
|
|
6913
|
+
let windowBounds;
|
|
6914
|
+
try {
|
|
6915
|
+
const wins = await bridge.call("window.list", {});
|
|
6916
|
+
const appWins = wins?.filter((w) => w.pid === pid);
|
|
6917
|
+
const mainWin = appWins?.find((w) => w.focused || w.frontmost || w.isMain) ?? appWins?.[0];
|
|
6918
|
+
if (mainWin) {
|
|
6919
|
+
windowTitle = mainWin.title ?? "";
|
|
6920
|
+
windowBounds = mainWin.bounds ?? mainWin;
|
|
6921
|
+
}
|
|
6922
|
+
}
|
|
6923
|
+
catch { /* use defaults */ }
|
|
6924
|
+
// Phase A: Quick scan (OCR)
|
|
6925
|
+
const scanResult = await quickScan(bridge, pid, windowBounds);
|
|
6926
|
+
if (!scanResult) {
|
|
6927
|
+
return { content: [{ type: "text", text: `Failed to capture screenshot of ${appName}. Make sure the app window is visible.` }] };
|
|
6928
|
+
}
|
|
6929
|
+
// Get app version for staleness tracking
|
|
6930
|
+
let appVersion = "unknown";
|
|
6931
|
+
try {
|
|
6932
|
+
const infoResult = await bridge.call("app.info", { bundleId });
|
|
6933
|
+
appVersion = infoResult?.version ?? infoResult?.shortVersion ?? "unknown";
|
|
6934
|
+
}
|
|
6935
|
+
catch { /* use default */ }
|
|
6936
|
+
// Get display scale factor
|
|
6937
|
+
let scaleFactor = 2;
|
|
6938
|
+
try {
|
|
6939
|
+
const screenInfo = await bridge.call("screen.info", {});
|
|
6940
|
+
scaleFactor = screenInfo?.scaleFactor ?? 2;
|
|
6941
|
+
}
|
|
6942
|
+
catch { /* default to Retina */ }
|
|
6943
|
+
const meta = buildVisualMeta(scanResult.hash, scanResult.captureSize, windowTitle, appVersion, scanResult.scan.confidence, scaleFactor);
|
|
6944
|
+
// Populate into AppMap
|
|
6945
|
+
const { added, updated } = appMap.populateFromVisualScan(bundleId, appName, scanResult.scan, meta);
|
|
6946
|
+
const lines = [
|
|
6947
|
+
`Visual map for ${appName} (${bundleId}):`,
|
|
6948
|
+
` Zones identified: ${scanResult.scan.zones.length}`,
|
|
6949
|
+
` Elements mapped: ${scanResult.scan.elements.length} (${added} new, ${updated} updated)`,
|
|
6950
|
+
` Map confidence: ${scanResult.scan.confidence.toFixed(2)}`,
|
|
6951
|
+
` App version: ${appVersion}`,
|
|
6952
|
+
];
|
|
6953
|
+
if (scanResult.scan.zones.length > 0) {
|
|
6954
|
+
lines.push(" Zones:");
|
|
6955
|
+
for (const z of scanResult.scan.zones) {
|
|
6956
|
+
const elCount = scanResult.scan.elements.filter(e => e.zone === z.label).length;
|
|
6957
|
+
lines.push(` ${z.label} (${z.type}): ${elCount} elements`);
|
|
6958
|
+
}
|
|
6959
|
+
}
|
|
6960
|
+
// Phase B: LLM enrichment (background, if depth=full and API key exists)
|
|
6961
|
+
if ((depth === "full") && process.env.ANTHROPIC_API_KEY) {
|
|
6962
|
+
lines.push(" LLM enrichment: starting in background...");
|
|
6963
|
+
// Fire and forget — don't block the response
|
|
6964
|
+
(async () => {
|
|
6965
|
+
try {
|
|
6966
|
+
// Get screenshot as file, then read as base64 for LLM
|
|
6967
|
+
const screenshotShot = await bridge.call("cg.captureScreen", {});
|
|
6968
|
+
if (!screenshotShot?.path)
|
|
6969
|
+
return;
|
|
6970
|
+
const fs = await import("node:fs");
|
|
6971
|
+
const screenshotBase64 = fs.readFileSync(screenshotShot.path).toString("base64");
|
|
6972
|
+
const screenshotData = { base64: screenshotBase64 };
|
|
6973
|
+
// Get AX tree for cross-reference
|
|
6974
|
+
let axTree = "";
|
|
6975
|
+
try {
|
|
6976
|
+
const tree = await bridge.call("ax.tree", { pid, depth: 3 });
|
|
6977
|
+
axTree = JSON.stringify(tree, null, 1).slice(0, 3000);
|
|
6978
|
+
}
|
|
6979
|
+
catch { /* proceed without AX */ }
|
|
6980
|
+
const enrichment = await llmEnrich(screenshotData.base64, axTree, appName, bundleId, windowTitle, scanResult.captureSize);
|
|
6981
|
+
if (enrichment) {
|
|
6982
|
+
// Merge LLM results into AppMap (LLM confidence capped at 0.5)
|
|
6983
|
+
const llmScan = {
|
|
6984
|
+
zones: enrichment.zones,
|
|
6985
|
+
elements: enrichment.elements.map(e => ({
|
|
6986
|
+
...e,
|
|
6987
|
+
confidence: Math.min(e.confidence, 0.5), // Cap — LLM is hypothesis
|
|
6988
|
+
})),
|
|
6989
|
+
confidence: Math.min(enrichment.confidence, 0.6),
|
|
6990
|
+
};
|
|
6991
|
+
appMap.populateFromVisualScan(bundleId, appName, llmScan, {
|
|
6992
|
+
...meta,
|
|
6993
|
+
confidence: Math.min(enrichment.confidence, 0.6),
|
|
6994
|
+
});
|
|
6995
|
+
process.stderr.write(`[visual-mapper] LLM enrichment complete for ${appName}: ${enrichment.elements.length} elements, ${enrichment.zones.length} zones\n`);
|
|
6996
|
+
}
|
|
6997
|
+
}
|
|
6998
|
+
catch (err) {
|
|
6999
|
+
process.stderr.write(`[visual-mapper] Background enrichment failed: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
7000
|
+
}
|
|
7001
|
+
})().catch(() => { });
|
|
7002
|
+
}
|
|
7003
|
+
else if (depth === "full" && !process.env.ANTHROPIC_API_KEY) {
|
|
7004
|
+
lines.push(" LLM enrichment: skipped (no ANTHROPIC_API_KEY set)");
|
|
7005
|
+
}
|
|
7006
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
7007
|
+
});
|
|
7008
|
+
originalTool("map_status", "Check the health of an app's visual map. Shows zones, element counts, confidence, staleness, and failure rates. Useful for debugging click failures.", {
|
|
7009
|
+
bundleId: z.string().describe("macOS bundle ID"),
|
|
7010
|
+
}, async ({ bundleId }) => {
|
|
7011
|
+
const meta = appMap.getVisualMeta(bundleId);
|
|
7012
|
+
const data = appMap.getLoaded(bundleId) ?? appMap.load(bundleId);
|
|
7013
|
+
if (!meta) {
|
|
7014
|
+
return { content: [{ type: "text", text: `No visual map exists for ${bundleId}. Run map_app to create one.` }] };
|
|
7015
|
+
}
|
|
7016
|
+
const isStale = appMap.isVisualMapStale(bundleId);
|
|
7017
|
+
const ageMs = Date.now() - new Date(meta.lastScannedAt).getTime();
|
|
7018
|
+
const ageHours = Math.round(ageMs / 3_600_000);
|
|
7019
|
+
const lines = [
|
|
7020
|
+
`Visual Map Status: ${bundleId}`,
|
|
7021
|
+
` Last scanned: ${meta.lastScannedAt} (${ageHours}h ago)`,
|
|
7022
|
+
` App version: ${meta.appVersion}`,
|
|
7023
|
+
` Confidence: ${meta.confidence.toFixed(2)}`,
|
|
7024
|
+
` Staleness: ${isStale ? "STALE — consider re-mapping" : "fresh"}`,
|
|
7025
|
+
` Screens mapped: ${meta.screensMapped.join(", ") || "(none)"}`,
|
|
7026
|
+
` Scale factor: ${meta.scaleFactor}x`,
|
|
7027
|
+
` Capture size: ${meta.captureSize.w}x${meta.captureSize.h}`,
|
|
7028
|
+
];
|
|
7029
|
+
// Count visual-scan elements
|
|
7030
|
+
if (data) {
|
|
7031
|
+
let visualElements = 0;
|
|
7032
|
+
let axElements = 0;
|
|
7033
|
+
let totalValidations = 0;
|
|
7034
|
+
let totalMismatches = 0;
|
|
7035
|
+
for (const zone of Object.values(data.zones)) {
|
|
7036
|
+
for (const el of zone.elements) {
|
|
7037
|
+
if (el.labelSource === "ocr" || el.labelSource === "llm") {
|
|
7038
|
+
visualElements++;
|
|
7039
|
+
totalValidations += el.validationCount ?? 0;
|
|
7040
|
+
totalMismatches += el.mismatchCount ?? 0;
|
|
7041
|
+
}
|
|
7042
|
+
else if (el.labelSource === "ax" || el.labelSource === "manual") {
|
|
7043
|
+
axElements++;
|
|
7044
|
+
}
|
|
7045
|
+
}
|
|
7046
|
+
}
|
|
7047
|
+
lines.push(` Visual-scan elements: ${visualElements}`);
|
|
7048
|
+
lines.push(` AX-confirmed elements: ${axElements}`);
|
|
7049
|
+
if (totalValidations + totalMismatches > 0) {
|
|
7050
|
+
const matchRate = totalValidations / (totalValidations + totalMismatches);
|
|
7051
|
+
lines.push(` Position match rate: ${(matchRate * 100).toFixed(1)}% (${totalValidations} matches, ${totalMismatches} mismatches)`);
|
|
7052
|
+
}
|
|
7053
|
+
// Zone breakdown
|
|
7054
|
+
const zoneKeys = Object.keys(data.zones);
|
|
7055
|
+
if (zoneKeys.length > 0) {
|
|
7056
|
+
lines.push(" Zones:");
|
|
7057
|
+
for (const key of zoneKeys.slice(0, 15)) {
|
|
7058
|
+
const zone = data.zones[key];
|
|
7059
|
+
lines.push(` ${key} (${zone.type}): ${zone.elements.length} elements`);
|
|
7060
|
+
}
|
|
7061
|
+
if (zoneKeys.length > 15) {
|
|
7062
|
+
lines.push(` ... and ${zoneKeys.length - 15} more`);
|
|
7063
|
+
}
|
|
7064
|
+
}
|
|
7065
|
+
}
|
|
7066
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
7067
|
+
});
|
|
6885
7068
|
server.tool("coverage_report", "Check what ScreenHand knows about an app: shortcuts, selectors, flows, playbooks, error patterns, and stability %. Useful before complex workflows to decide strategy: learn first (if empty), go fast (if high coverage), or use fallback tools (if error patterns exist). Optional for quick actions.", {
|
|
6886
7069
|
bundleId: z.string().describe("macOS bundle ID (e.g. com.blackmagic-design.DaVinciResolveLite)"),
|
|
6887
7070
|
appName: z.string().describe("Human-readable app name"),
|
|
@@ -241,6 +241,32 @@ export class ContextTracker {
|
|
|
241
241
|
: map.masteryLevel.toUpperCase();
|
|
242
242
|
hints.push(`🗺 AppMap [${ratingDisplay}]: ${zones} zones, ${verifiedPaths}/${totalPaths} verified paths`);
|
|
243
243
|
}
|
|
244
|
+
// Visual map hint: suggest map_app if no visual data exists
|
|
245
|
+
if (hints.length < 3 && this.context.appMapData) {
|
|
246
|
+
const visualMeta = this.context.appMapData.visualMeta;
|
|
247
|
+
if (!visualMeta) {
|
|
248
|
+
hints.push("📸 No visual map — run map_app(bundleId, appName) for faster, more accurate element targeting");
|
|
249
|
+
}
|
|
250
|
+
else {
|
|
251
|
+
// Check for element with visual position near the target
|
|
252
|
+
const target = extractTarget(params);
|
|
253
|
+
if (target) {
|
|
254
|
+
const targetLower = target.toLowerCase();
|
|
255
|
+
for (const zone of Object.values(this.context.appMapData.zones)) {
|
|
256
|
+
for (const el of zone.elements) {
|
|
257
|
+
if (el.label.toLowerCase().includes(targetLower) &&
|
|
258
|
+
el.relativeX >= 0 && el.relativeY >= 0 &&
|
|
259
|
+
(el.visualConfidence ?? 0) >= 0.5) {
|
|
260
|
+
hints.push(`📍 Visual map: "${el.label}" at (${el.relativeX.toFixed(2)}, ${el.relativeY.toFixed(2)}) [${el.labelSource ?? "unknown"} source, confidence ${(el.visualConfidence ?? 0).toFixed(1)}]`);
|
|
261
|
+
break;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
if (hints.length >= 3)
|
|
265
|
+
break;
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
}
|
|
244
270
|
// Playbook-dependent hints below
|
|
245
271
|
if (!this.context.playbook)
|
|
246
272
|
return hints;
|
|
@@ -1034,6 +1034,38 @@ export class PerceptionCoordinator extends EventEmitter {
|
|
|
1034
1034
|
process.stderr.write(`[perception] recordElementOutcome failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
1035
1035
|
}
|
|
1036
1036
|
}
|
|
1037
|
+
// 4. Visual map validation: cross-check AX positions against visual map
|
|
1038
|
+
// Uses proximity-based matching (not label matching) so OCR text
|
|
1039
|
+
// that doesn't exactly match AX labels can still be validated.
|
|
1040
|
+
if (this.appMap.getVisualMeta(bundleId) && focusedWin.bounds?.value) {
|
|
1041
|
+
const winBounds = focusedWin.bounds.value;
|
|
1042
|
+
let validated = 0;
|
|
1043
|
+
for (const [, ctrl] of focusedWin.controls) {
|
|
1044
|
+
if (validated >= 15)
|
|
1045
|
+
break; // Cap per cycle
|
|
1046
|
+
const label = ctrl.label?.value;
|
|
1047
|
+
if (!label || label.length < 2)
|
|
1048
|
+
continue;
|
|
1049
|
+
const pos = ctrl.position;
|
|
1050
|
+
if (!pos || typeof pos.x !== "number" || typeof pos.y !== "number")
|
|
1051
|
+
continue;
|
|
1052
|
+
// Convert absolute to relative
|
|
1053
|
+
const relX = winBounds.width > 0 ? (pos.x - winBounds.x) / winBounds.width : -1;
|
|
1054
|
+
const relY = winBounds.height > 0 ? (pos.y - winBounds.y) / winBounds.height : -1;
|
|
1055
|
+
if (relX < 0 || relX > 1 || relY < 0 || relY > 1)
|
|
1056
|
+
continue;
|
|
1057
|
+
try {
|
|
1058
|
+
// Try exact label match first, fall back to proximity match
|
|
1059
|
+
const exactMatch = this.appMap.validateElementPosition(bundleId, label, relX, relY);
|
|
1060
|
+
if (exactMatch === null) {
|
|
1061
|
+
// No label match — try proximity: find nearest visual-scan element
|
|
1062
|
+
this.appMap.validateNearestElement(bundleId, label, relX, relY);
|
|
1063
|
+
}
|
|
1064
|
+
validated++;
|
|
1065
|
+
}
|
|
1066
|
+
catch { /* non-fatal */ }
|
|
1067
|
+
}
|
|
1068
|
+
}
|
|
1037
1069
|
}
|
|
1038
1070
|
/**
|
|
1039
1071
|
* Wire #9: Record element visibility from OCR detections.
|
|
@@ -2303,6 +2303,212 @@ export class AppMap {
|
|
|
2303
2303
|
return [];
|
|
2304
2304
|
}
|
|
2305
2305
|
}
|
|
2306
|
+
// ── Visual Mapping (Phase 3) ──────────────────────────────────────
|
|
2307
|
+
/**
|
|
2308
|
+
* Get visual mapping metadata for an app.
|
|
2309
|
+
*/
|
|
2310
|
+
getVisualMeta(bundleId) {
|
|
2311
|
+
const data = this.ensureLoaded(bundleId);
|
|
2312
|
+
return data?.visualMeta ?? null;
|
|
2313
|
+
}
|
|
2314
|
+
/**
|
|
2315
|
+
* Populate the app map from a visual scan result.
|
|
2316
|
+
* Fills in -1,-1 coordinates for known elements and adds newly discovered ones.
|
|
2317
|
+
* Does NOT overwrite elements that already have valid positions from AX/interaction data.
|
|
2318
|
+
*/
|
|
2319
|
+
populateFromVisualScan(bundleId, appName, scan, meta) {
|
|
2320
|
+
let data = this.ensureLoaded(bundleId);
|
|
2321
|
+
if (!data) {
|
|
2322
|
+
data = this.createEmpty(bundleId, appName);
|
|
2323
|
+
this.save(data);
|
|
2324
|
+
}
|
|
2325
|
+
let added = 0;
|
|
2326
|
+
let updated = 0;
|
|
2327
|
+
// Purge all OCR/LLM elements to make room for fresh scan results.
|
|
2328
|
+
// AX-confirmed and manual elements are kept — they're authoritative.
|
|
2329
|
+
for (const zone of Object.values(data.zones)) {
|
|
2330
|
+
zone.elements = zone.elements.filter(el => {
|
|
2331
|
+
if (el.labelSource === "ax" || el.labelSource === "manual")
|
|
2332
|
+
return true;
|
|
2333
|
+
// Keep elements with no labelSource that have valid AX-style positions
|
|
2334
|
+
// (these come from recordElementOutcome, i.e. real interactions)
|
|
2335
|
+
if (!el.labelSource && el.relativeX != null && el.relativeX >= 0)
|
|
2336
|
+
return true;
|
|
2337
|
+
return false;
|
|
2338
|
+
});
|
|
2339
|
+
}
|
|
2340
|
+
this.save(data);
|
|
2341
|
+
// Populate zones from scan
|
|
2342
|
+
for (const zone of scan.zones) {
|
|
2343
|
+
const zoneKey = zone.label.toLowerCase().replace(/\s+/g, "_");
|
|
2344
|
+
if (!data.zones[zoneKey]) {
|
|
2345
|
+
this.addZone(bundleId, zoneKey, {
|
|
2346
|
+
relativePosition: zone.bounds,
|
|
2347
|
+
type: zone.type,
|
|
2348
|
+
elements: [],
|
|
2349
|
+
verified: false,
|
|
2350
|
+
lastSeen: new Date().toISOString(),
|
|
2351
|
+
});
|
|
2352
|
+
}
|
|
2353
|
+
}
|
|
2354
|
+
// Populate elements from scan
|
|
2355
|
+
for (const el of scan.elements) {
|
|
2356
|
+
// Check if element already exists with a valid position
|
|
2357
|
+
const existing = this.findElement(bundleId, el.label);
|
|
2358
|
+
if (existing) {
|
|
2359
|
+
const hasValidPosition = existing.element.relativeX >= 0 && existing.element.relativeY >= 0;
|
|
2360
|
+
const hasAXSource = existing.element.labelSource === "ax" || existing.element.labelSource === "manual";
|
|
2361
|
+
// Don't overwrite AX-confirmed positions — they're more reliable
|
|
2362
|
+
if (hasValidPosition && hasAXSource)
|
|
2363
|
+
continue;
|
|
2364
|
+
// Update position if current is unknown (-1) or from lower-confidence source
|
|
2365
|
+
if (!hasValidPosition || (existing.element.labelSource === "llm" && el.confidence > (existing.element.visualConfidence ?? 0))) {
|
|
2366
|
+
this.updateElementPosition(bundleId, existing.zone, el.label, el.x, el.y);
|
|
2367
|
+
// Reload after update
|
|
2368
|
+
const refreshed = this.findElement(bundleId, el.label);
|
|
2369
|
+
if (refreshed) {
|
|
2370
|
+
refreshed.element.labelSource = el.confidence >= 0.7 ? "ocr" : "llm";
|
|
2371
|
+
refreshed.element.visualConfidence = el.confidence;
|
|
2372
|
+
refreshed.element.validationCount = 0;
|
|
2373
|
+
refreshed.element.mismatchCount = 0;
|
|
2374
|
+
}
|
|
2375
|
+
updated++;
|
|
2376
|
+
}
|
|
2377
|
+
continue;
|
|
2378
|
+
}
|
|
2379
|
+
// New element — find best matching zone or use "auto_discovered"
|
|
2380
|
+
const targetZone = el.zone ? el.zone.toLowerCase().replace(/\s+/g, "_") : "auto_discovered";
|
|
2381
|
+
const zoneKey = data.zones[targetZone] ? targetZone : "auto_discovered";
|
|
2382
|
+
const newElement = {
|
|
2383
|
+
label: el.label,
|
|
2384
|
+
relativeX: el.x,
|
|
2385
|
+
relativeY: el.y,
|
|
2386
|
+
anchor: "top-left",
|
|
2387
|
+
ocrBackup: el.label,
|
|
2388
|
+
successCount: 0,
|
|
2389
|
+
failCount: 0,
|
|
2390
|
+
lastInteracted: new Date().toISOString(),
|
|
2391
|
+
sessionsSinceUse: 0,
|
|
2392
|
+
labelSource: el.confidence >= 0.7 ? "ocr" : "llm",
|
|
2393
|
+
visualConfidence: el.confidence,
|
|
2394
|
+
validationCount: 0,
|
|
2395
|
+
mismatchCount: 0,
|
|
2396
|
+
};
|
|
2397
|
+
this.addElement(bundleId, zoneKey, newElement);
|
|
2398
|
+
added++;
|
|
2399
|
+
}
|
|
2400
|
+
// Store visual metadata
|
|
2401
|
+
data = this.ensureLoaded(bundleId);
|
|
2402
|
+
if (data) {
|
|
2403
|
+
data.visualMeta = meta;
|
|
2404
|
+
this.dirty.add(bundleId);
|
|
2405
|
+
this.scheduleSave();
|
|
2406
|
+
}
|
|
2407
|
+
return { added, updated };
|
|
2408
|
+
}
|
|
2409
|
+
/**
|
|
2410
|
+
* Validate a live AX element position against the visual map.
|
|
2411
|
+
* Returns true if positions match (within tolerance), false if mismatch.
|
|
2412
|
+
* Updates validationCount/mismatchCount on the stored element.
|
|
2413
|
+
*/
|
|
2414
|
+
validateElementPosition(bundleId, label, liveX, liveY, tolerance = 0.05) {
|
|
2415
|
+
const found = this.findElement(bundleId, label);
|
|
2416
|
+
if (!found)
|
|
2417
|
+
return null;
|
|
2418
|
+
const el = found.element;
|
|
2419
|
+
// Only validate elements that have visual-scan positions
|
|
2420
|
+
if (el.relativeX < 0 || el.relativeY < 0)
|
|
2421
|
+
return null;
|
|
2422
|
+
if (!el.labelSource || el.labelSource === "ax" || el.labelSource === "manual")
|
|
2423
|
+
return null;
|
|
2424
|
+
const dx = Math.abs(el.relativeX - liveX);
|
|
2425
|
+
const dy = Math.abs(el.relativeY - liveY);
|
|
2426
|
+
const matches = dx <= tolerance && dy <= tolerance;
|
|
2427
|
+
if (matches) {
|
|
2428
|
+
el.validationCount = (el.validationCount ?? 0) + 1;
|
|
2429
|
+
// After 3 validations, promote confidence
|
|
2430
|
+
if ((el.validationCount ?? 0) >= 3 && (el.visualConfidence ?? 0) < 0.9) {
|
|
2431
|
+
el.visualConfidence = 0.9;
|
|
2432
|
+
el.labelSource = "ax"; // Promoted — now AX-confirmed
|
|
2433
|
+
}
|
|
2434
|
+
}
|
|
2435
|
+
else {
|
|
2436
|
+
el.mismatchCount = (el.mismatchCount ?? 0) + 1;
|
|
2437
|
+
// After 3 mismatches, demote confidence
|
|
2438
|
+
const total = (el.validationCount ?? 0) + (el.mismatchCount ?? 0);
|
|
2439
|
+
if (total >= 10 && (el.mismatchCount ?? 0) / total > 0.3) {
|
|
2440
|
+
el.visualConfidence = 0.3;
|
|
2441
|
+
}
|
|
2442
|
+
}
|
|
2443
|
+
this.dirty.add(bundleId);
|
|
2444
|
+
this.scheduleSave();
|
|
2445
|
+
return matches;
|
|
2446
|
+
}
|
|
2447
|
+
/**
|
|
2448
|
+
* Proximity-based validation: find the nearest visual-scan element to a live AX position.
|
|
2449
|
+
* Unlike validateElementPosition (label match), this matches by position alone.
|
|
2450
|
+
* If a visual-scan element is within tolerance of the AX position, validate it.
|
|
2451
|
+
* Also updates the element's label to the AX label if it was OCR-derived.
|
|
2452
|
+
*/
|
|
2453
|
+
validateNearestElement(bundleId, axLabel, liveX, liveY, tolerance = 0.05) {
|
|
2454
|
+
const data = this.ensureLoaded(bundleId);
|
|
2455
|
+
if (!data)
|
|
2456
|
+
return false;
|
|
2457
|
+
let bestEl = null;
|
|
2458
|
+
let bestDist = Infinity;
|
|
2459
|
+
for (const zone of Object.values(data.zones)) {
|
|
2460
|
+
for (const el of zone.elements) {
|
|
2461
|
+
// Only validate visual-scan elements (ocr/llm source)
|
|
2462
|
+
if (el.relativeX < 0 || el.relativeY < 0)
|
|
2463
|
+
continue;
|
|
2464
|
+
if (!el.labelSource || el.labelSource === "ax" || el.labelSource === "manual")
|
|
2465
|
+
continue;
|
|
2466
|
+
const dx = Math.abs(el.relativeX - liveX);
|
|
2467
|
+
const dy = Math.abs(el.relativeY - liveY);
|
|
2468
|
+
const dist = Math.sqrt(dx * dx + dy * dy);
|
|
2469
|
+
if (dx <= tolerance && dy <= tolerance && dist < bestDist) {
|
|
2470
|
+
bestDist = dist;
|
|
2471
|
+
bestEl = el;
|
|
2472
|
+
}
|
|
2473
|
+
}
|
|
2474
|
+
}
|
|
2475
|
+
if (!bestEl)
|
|
2476
|
+
return false;
|
|
2477
|
+
bestEl.validationCount = (bestEl.validationCount ?? 0) + 1;
|
|
2478
|
+
// After 3 proximity validations, promote to AX-confirmed
|
|
2479
|
+
if ((bestEl.validationCount ?? 0) >= 3 && (bestEl.visualConfidence ?? 0) < 0.9) {
|
|
2480
|
+
bestEl.visualConfidence = 0.9;
|
|
2481
|
+
bestEl.labelSource = "ax";
|
|
2482
|
+
// Update label to the authoritative AX label
|
|
2483
|
+
if (axLabel && axLabel.length >= 2) {
|
|
2484
|
+
bestEl.label = axLabel;
|
|
2485
|
+
bestEl.ocrBackup = axLabel;
|
|
2486
|
+
}
|
|
2487
|
+
}
|
|
2488
|
+
this.dirty.add(bundleId);
|
|
2489
|
+
this.scheduleSave();
|
|
2490
|
+
return true;
|
|
2491
|
+
}
|
|
2492
|
+
/**
|
|
2493
|
+
* Check if an app's visual map is stale based on app version change.
|
|
2494
|
+
*/
|
|
2495
|
+
isVisualMapStale(bundleId, currentAppVersion) {
|
|
2496
|
+
const meta = this.getVisualMeta(bundleId);
|
|
2497
|
+
if (!meta)
|
|
2498
|
+
return true; // No map = stale
|
|
2499
|
+
// Version mismatch = stale
|
|
2500
|
+
if (currentAppVersion && meta.appVersion !== currentAppVersion)
|
|
2501
|
+
return true;
|
|
2502
|
+
// Age-based: older than 7 days = stale
|
|
2503
|
+
const ageMs = Date.now() - new Date(meta.lastScannedAt).getTime();
|
|
2504
|
+
const ageDays = ageMs / 86_400_000;
|
|
2505
|
+
if (ageDays > 7)
|
|
2506
|
+
return true;
|
|
2507
|
+
// Confidence too low = stale
|
|
2508
|
+
if (meta.confidence < 0.3)
|
|
2509
|
+
return true;
|
|
2510
|
+
return false;
|
|
2511
|
+
}
|
|
2306
2512
|
// ── Internals ─────────────────────────────────────────────────────
|
|
2307
2513
|
ensureLoaded(bundleId) {
|
|
2308
2514
|
return this.cache.get(bundleId) ?? this.load(bundleId);
|