screenhand 0.5.0 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/dist/mcp-desktop.js +463 -39
  2. package/dist/src/community/publisher.js +4 -2
  3. package/dist/src/context-tracker.js +62 -6
  4. package/dist/src/ingestion/reference-merger.js +33 -0
  5. package/dist/src/memory/recall.js +65 -1
  6. package/dist/src/memory/research.js +1 -1
  7. package/dist/src/memory/service.js +26 -5
  8. package/dist/src/memory/store.js +42 -23
  9. package/dist/src/native/bridge-client.js +3 -3
  10. package/dist/src/perception/coordinator.js +94 -15
  11. package/dist/src/perception/manager.js +65 -1
  12. package/dist/src/planner/executor.js +6 -2
  13. package/dist/src/planner/plan-refiner.js +213 -0
  14. package/dist/src/playbook/engine.js +18 -3
  15. package/dist/src/playbook/recorder.js +24 -8
  16. package/dist/src/playbook/runner.js +9 -3
  17. package/dist/src/playbook/store.js +8 -0
  18. package/dist/src/recovery/engine.js +9 -3
  19. package/dist/src/state/app-map.js +212 -2
  20. package/dist/src/state/state-watcher.js +144 -0
  21. package/dist/src/state/visual-mapper.js +325 -0
  22. package/dist/src/state/world-model.js +30 -1
  23. package/dist/src/supervisor/supervisor.js +1 -1
  24. package/dist-app-maps/com.apple.Notes.json +2328 -2201
  25. package/dist-app-maps/com.apple.Terminal.json +331 -343
  26. package/dist-app-maps/com.apple.iCal.json +3 -3
  27. package/dist-app-maps/com.apple.iphonesimulator.json +714 -223
  28. package/dist-app-maps/com.apple.mail.json +3 -3
  29. package/dist-app-maps/com.apple.reminders.json +2 -2
  30. package/dist-app-maps/net.whatsapp.WhatsApp.json +27 -27
  31. package/dist-references/notes.json +53 -16
  32. package/dist-references/simulator.json +48 -2
  33. package/package.json +1 -1
@@ -339,7 +339,9 @@ export class AppMap {
339
339
  fs.mkdirSync(this.config.mapsDir, { recursive: true });
340
340
  writeFileAtomicSync(this.ladderFilePath(bundleId), JSON.stringify(data, null, 2));
341
341
  }
342
- catch { /* non-critical */ }
342
+ catch (e) {
343
+ process.stderr.write(`[app-map] saveGeneratedLadder failed: ${e instanceof Error ? e.message : String(e)}\n`);
344
+ }
343
345
  }
344
346
  // ── Create ────────────────────────────────────────────────────────
345
347
  createEmpty(bundleId, appName, version = "unknown") {
@@ -1591,7 +1593,9 @@ export class AppMap {
1591
1593
  writeFileAtomicSync(this.filePath(data.app), JSON.stringify(data, null, 2) + "\n");
1592
1594
  this.dirty.delete(data.app); // Only remove the one we just wrote
1593
1595
  }
1594
- catch { /* non-fatal — will be picked up by next debounced save */ }
1596
+ catch (e) {
1597
+ process.stderr.write(`[app-map] urgent mastery save failed: ${e instanceof Error ? e.message : String(e)}\n`);
1598
+ }
1595
1599
  }
1596
1600
  }
1597
1601
  refreshMastery(bundleId) {
@@ -2299,6 +2303,212 @@ export class AppMap {
2299
2303
  return [];
2300
2304
  }
2301
2305
  }
2306
+ // ── Visual Mapping (Phase 3) ──────────────────────────────────────
2307
+ /**
2308
+ * Get visual mapping metadata for an app.
2309
+ */
2310
+ getVisualMeta(bundleId) {
2311
+ const data = this.ensureLoaded(bundleId);
2312
+ return data?.visualMeta ?? null;
2313
+ }
2314
+ /**
2315
+ * Populate the app map from a visual scan result.
2316
+ * Fills in -1,-1 coordinates for known elements and adds newly discovered ones.
2317
+ * Does NOT overwrite elements that already have valid positions from AX/interaction data.
2318
+ */
2319
+ populateFromVisualScan(bundleId, appName, scan, meta) {
2320
+ let data = this.ensureLoaded(bundleId);
2321
+ if (!data) {
2322
+ data = this.createEmpty(bundleId, appName);
2323
+ this.save(data);
2324
+ }
2325
+ let added = 0;
2326
+ let updated = 0;
2327
+ // Purge all OCR/LLM elements to make room for fresh scan results.
2328
+ // AX-confirmed and manual elements are kept — they're authoritative.
2329
+ for (const zone of Object.values(data.zones)) {
2330
+ zone.elements = zone.elements.filter(el => {
2331
+ if (el.labelSource === "ax" || el.labelSource === "manual")
2332
+ return true;
2333
+ // Keep elements with no labelSource that have valid AX-style positions
2334
+ // (these come from recordElementOutcome, i.e. real interactions)
2335
+ if (!el.labelSource && el.relativeX != null && el.relativeX >= 0)
2336
+ return true;
2337
+ return false;
2338
+ });
2339
+ }
2340
+ this.save(data);
2341
+ // Populate zones from scan
2342
+ for (const zone of scan.zones) {
2343
+ const zoneKey = zone.label.toLowerCase().replace(/\s+/g, "_");
2344
+ if (!data.zones[zoneKey]) {
2345
+ this.addZone(bundleId, zoneKey, {
2346
+ relativePosition: zone.bounds,
2347
+ type: zone.type,
2348
+ elements: [],
2349
+ verified: false,
2350
+ lastSeen: new Date().toISOString(),
2351
+ });
2352
+ }
2353
+ }
2354
+ // Populate elements from scan
2355
+ for (const el of scan.elements) {
2356
+ // Check if element already exists with a valid position
2357
+ const existing = this.findElement(bundleId, el.label);
2358
+ if (existing) {
2359
+ const hasValidPosition = existing.element.relativeX >= 0 && existing.element.relativeY >= 0;
2360
+ const hasAXSource = existing.element.labelSource === "ax" || existing.element.labelSource === "manual";
2361
+ // Don't overwrite AX-confirmed positions — they're more reliable
2362
+ if (hasValidPosition && hasAXSource)
2363
+ continue;
2364
+ // Update position if current is unknown (-1) or from lower-confidence source
2365
+ if (!hasValidPosition || (existing.element.labelSource === "llm" && el.confidence > (existing.element.visualConfidence ?? 0))) {
2366
+ this.updateElementPosition(bundleId, existing.zone, el.label, el.x, el.y);
2367
+ // Reload after update
2368
+ const refreshed = this.findElement(bundleId, el.label);
2369
+ if (refreshed) {
2370
+ refreshed.element.labelSource = el.confidence >= 0.7 ? "ocr" : "llm";
2371
+ refreshed.element.visualConfidence = el.confidence;
2372
+ refreshed.element.validationCount = 0;
2373
+ refreshed.element.mismatchCount = 0;
2374
+ }
2375
+ updated++;
2376
+ }
2377
+ continue;
2378
+ }
2379
+ // New element — find best matching zone or use "auto_discovered"
2380
+ const targetZone = el.zone ? el.zone.toLowerCase().replace(/\s+/g, "_") : "auto_discovered";
2381
+ const zoneKey = data.zones[targetZone] ? targetZone : "auto_discovered";
2382
+ const newElement = {
2383
+ label: el.label,
2384
+ relativeX: el.x,
2385
+ relativeY: el.y,
2386
+ anchor: "top-left",
2387
+ ocrBackup: el.label,
2388
+ successCount: 0,
2389
+ failCount: 0,
2390
+ lastInteracted: new Date().toISOString(),
2391
+ sessionsSinceUse: 0,
2392
+ labelSource: el.confidence >= 0.7 ? "ocr" : "llm",
2393
+ visualConfidence: el.confidence,
2394
+ validationCount: 0,
2395
+ mismatchCount: 0,
2396
+ };
2397
+ this.addElement(bundleId, zoneKey, newElement);
2398
+ added++;
2399
+ }
2400
+ // Store visual metadata
2401
+ data = this.ensureLoaded(bundleId);
2402
+ if (data) {
2403
+ data.visualMeta = meta;
2404
+ this.dirty.add(bundleId);
2405
+ this.scheduleSave();
2406
+ }
2407
+ return { added, updated };
2408
+ }
2409
+ /**
2410
+ * Validate a live AX element position against the visual map.
2411
+ * Returns true if positions match (within tolerance), false if mismatch.
2412
+ * Updates validationCount/mismatchCount on the stored element.
2413
+ */
2414
+ validateElementPosition(bundleId, label, liveX, liveY, tolerance = 0.05) {
2415
+ const found = this.findElement(bundleId, label);
2416
+ if (!found)
2417
+ return null;
2418
+ const el = found.element;
2419
+ // Only validate elements that have visual-scan positions
2420
+ if (el.relativeX < 0 || el.relativeY < 0)
2421
+ return null;
2422
+ if (!el.labelSource || el.labelSource === "ax" || el.labelSource === "manual")
2423
+ return null;
2424
+ const dx = Math.abs(el.relativeX - liveX);
2425
+ const dy = Math.abs(el.relativeY - liveY);
2426
+ const matches = dx <= tolerance && dy <= tolerance;
2427
+ if (matches) {
2428
+ el.validationCount = (el.validationCount ?? 0) + 1;
2429
+ // After 3 validations, promote confidence
2430
+ if ((el.validationCount ?? 0) >= 3 && (el.visualConfidence ?? 0) < 0.9) {
2431
+ el.visualConfidence = 0.9;
2432
+ el.labelSource = "ax"; // Promoted — now AX-confirmed
2433
+ }
2434
+ }
2435
+ else {
2436
+ el.mismatchCount = (el.mismatchCount ?? 0) + 1;
2437
+ // After 3 mismatches, demote confidence
2438
+ const total = (el.validationCount ?? 0) + (el.mismatchCount ?? 0);
2439
+ if (total >= 10 && (el.mismatchCount ?? 0) / total > 0.3) {
2440
+ el.visualConfidence = 0.3;
2441
+ }
2442
+ }
2443
+ this.dirty.add(bundleId);
2444
+ this.scheduleSave();
2445
+ return matches;
2446
+ }
2447
+ /**
2448
+ * Proximity-based validation: find the nearest visual-scan element to a live AX position.
2449
+ * Unlike validateElementPosition (label match), this matches by position alone.
2450
+ * If a visual-scan element is within tolerance of the AX position, validate it.
2451
+ * Also updates the element's label to the AX label if it was OCR-derived.
2452
+ */
2453
+ validateNearestElement(bundleId, axLabel, liveX, liveY, tolerance = 0.05) {
2454
+ const data = this.ensureLoaded(bundleId);
2455
+ if (!data)
2456
+ return false;
2457
+ let bestEl = null;
2458
+ let bestDist = Infinity;
2459
+ for (const zone of Object.values(data.zones)) {
2460
+ for (const el of zone.elements) {
2461
+ // Only validate visual-scan elements (ocr/llm source)
2462
+ if (el.relativeX < 0 || el.relativeY < 0)
2463
+ continue;
2464
+ if (!el.labelSource || el.labelSource === "ax" || el.labelSource === "manual")
2465
+ continue;
2466
+ const dx = Math.abs(el.relativeX - liveX);
2467
+ const dy = Math.abs(el.relativeY - liveY);
2468
+ const dist = Math.sqrt(dx * dx + dy * dy);
2469
+ if (dx <= tolerance && dy <= tolerance && dist < bestDist) {
2470
+ bestDist = dist;
2471
+ bestEl = el;
2472
+ }
2473
+ }
2474
+ }
2475
+ if (!bestEl)
2476
+ return false;
2477
+ bestEl.validationCount = (bestEl.validationCount ?? 0) + 1;
2478
+ // After 3 proximity validations, promote to AX-confirmed
2479
+ if ((bestEl.validationCount ?? 0) >= 3 && (bestEl.visualConfidence ?? 0) < 0.9) {
2480
+ bestEl.visualConfidence = 0.9;
2481
+ bestEl.labelSource = "ax";
2482
+ // Update label to the authoritative AX label
2483
+ if (axLabel && axLabel.length >= 2) {
2484
+ bestEl.label = axLabel;
2485
+ bestEl.ocrBackup = axLabel;
2486
+ }
2487
+ }
2488
+ this.dirty.add(bundleId);
2489
+ this.scheduleSave();
2490
+ return true;
2491
+ }
2492
+ /**
2493
+ * Check if an app's visual map is stale based on app version change.
2494
+ */
2495
+ isVisualMapStale(bundleId, currentAppVersion) {
2496
+ const meta = this.getVisualMeta(bundleId);
2497
+ if (!meta)
2498
+ return true; // No map = stale
2499
+ // Version mismatch = stale
2500
+ if (currentAppVersion && meta.appVersion !== currentAppVersion)
2501
+ return true;
2502
+ // Age-based: older than 7 days = stale
2503
+ const ageMs = Date.now() - new Date(meta.lastScannedAt).getTime();
2504
+ const ageDays = ageMs / 86_400_000;
2505
+ if (ageDays > 7)
2506
+ return true;
2507
+ // Confidence too low = stale
2508
+ if (meta.confidence < 0.3)
2509
+ return true;
2510
+ return false;
2511
+ }
2302
2512
  // ── Internals ─────────────────────────────────────────────────────
2303
2513
  ensureLoaded(bundleId) {
2304
2514
  return this.cache.get(bundleId) ?? this.load(bundleId);
@@ -0,0 +1,144 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ //
4
+ // This file is part of ScreenHand.
5
+ //
6
+ // ScreenHand is free software: you can redistribute it and/or modify
7
+ // it under the terms of the GNU Affero General Public License as
8
+ // published by the Free Software Foundation, version 3.
9
+ //
10
+ // ScreenHand is distributed in the hope that it will be useful,
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ // GNU Affero General Public License for more details.
14
+ //
15
+ // You should have received a copy of the GNU Affero General Public License
16
+ // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ export class StateWatcher {
18
+ worldModel;
19
+ execute;
20
+ rules = new Map();
21
+ interval = null;
22
+ pollMs;
23
+ constructor(worldModel, execute, pollMs = 2_000) {
24
+ this.worldModel = worldModel;
25
+ this.execute = execute;
26
+ this.pollMs = pollMs;
27
+ }
28
+ static MAX_RULES = 50;
29
+ /**
30
+ * Register a watch rule. Returns the rule ID for later removal.
31
+ */
32
+ register(rule) {
33
+ if (this.rules.size >= StateWatcher.MAX_RULES && !this.rules.has(rule.id)) {
34
+ throw new Error(`Maximum watch rules (${StateWatcher.MAX_RULES}) reached. Remove existing rules first.`);
35
+ }
36
+ this.rules.set(rule.id, {
37
+ rule,
38
+ fireCount: 0,
39
+ lastFiredAt: 0,
40
+ });
41
+ return rule.id;
42
+ }
43
+ /** Register a convenience rule: fire action when a control with matching title appears. */
44
+ watchForElement(id, elementTitle, action, bundleId) {
45
+ const titleLower = elementTitle.toLowerCase();
46
+ return this.register({
47
+ id,
48
+ description: `Watch for element "${elementTitle}"`,
49
+ condition: (state) => {
50
+ for (const win of state.windows.values()) {
51
+ for (const ctrl of win.controls.values()) {
52
+ if (ctrl.label?.value?.toLowerCase().includes(titleLower)) {
53
+ return true;
54
+ }
55
+ }
56
+ }
57
+ return false;
58
+ },
59
+ action,
60
+ maxFires: 1,
61
+ cooldownMs: 10_000,
62
+ ...(bundleId ? { bundleId } : {}),
63
+ });
64
+ }
65
+ /** Register: fire action when a dialog appears with matching text. */
66
+ watchForDialog(id, titlePattern, action) {
67
+ return this.register({
68
+ id,
69
+ description: `Watch for dialog matching ${titlePattern}`,
70
+ condition: (state) => state.activeDialogs.some((d) => titlePattern.test(d.title ?? "")),
71
+ action,
72
+ maxFires: 0, // unlimited — dialogs can recur
73
+ cooldownMs: 5_000,
74
+ });
75
+ }
76
+ /** Remove a watch rule by ID. */
77
+ unregister(id) {
78
+ return this.rules.delete(id);
79
+ }
80
+ /** Remove all rules. */
81
+ clear() {
82
+ this.rules.clear();
83
+ }
84
+ /** Get all registered rules. */
85
+ getRules() {
86
+ return [...this.rules.values()].map((rs) => ({
87
+ id: rs.rule.id,
88
+ description: rs.rule.description,
89
+ fireCount: rs.fireCount,
90
+ }));
91
+ }
92
+ /** Start the polling loop. */
93
+ start() {
94
+ if (this.interval)
95
+ return;
96
+ this.interval = setInterval(() => {
97
+ void this.tick();
98
+ }, this.pollMs);
99
+ }
100
+ /** Stop the polling loop. */
101
+ stop() {
102
+ if (this.interval) {
103
+ clearInterval(this.interval);
104
+ this.interval = null;
105
+ }
106
+ }
107
+ get isRunning() {
108
+ return this.interval !== null;
109
+ }
110
+ async tick() {
111
+ const state = this.worldModel.getState();
112
+ const now = Date.now();
113
+ const focusedBundleId = state.focusedApp?.bundleId;
114
+ for (const [id, rs] of this.rules) {
115
+ // Max fires check
116
+ if (rs.rule.maxFires > 0 && rs.fireCount >= rs.rule.maxFires)
117
+ continue;
118
+ // Cooldown check
119
+ if (now - rs.lastFiredAt < rs.rule.cooldownMs)
120
+ continue;
121
+ // BundleId filter
122
+ if (rs.rule.bundleId && rs.rule.bundleId !== focusedBundleId)
123
+ continue;
124
+ try {
125
+ if (rs.rule.condition(state)) {
126
+ rs.fireCount++;
127
+ rs.lastFiredAt = now;
128
+ process.stderr.write(`[state-watcher] Rule "${id}" fired (${rs.fireCount}x): ${rs.rule.description}\n`);
129
+ // Fire and forget — don't block the poll loop
130
+ this.execute(rs.rule.action.tool, rs.rule.action.params).catch((err) => {
131
+ process.stderr.write(`[state-watcher] Rule "${id}" action failed: ${err instanceof Error ? err.message : String(err)}\n`);
132
+ });
133
+ // Remove exhausted rules
134
+ if (rs.rule.maxFires > 0 && rs.fireCount >= rs.rule.maxFires) {
135
+ this.rules.delete(id);
136
+ }
137
+ }
138
+ }
139
+ catch (err) {
140
+ process.stderr.write(`[state-watcher] Rule "${id}" condition threw: ${err instanceof Error ? err.message : String(err)}\n`);
141
+ }
142
+ }
143
+ }
144
+ }