screenhand 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -97,6 +97,15 @@ export class SensorPolicy {
97
97
  });
98
98
  return qualified.length > 0 ? qualified[0].sourceType : null;
99
99
  }
100
+ /**
101
+ * Seed a single entry if no real data exists for that key.
102
+ * Used for cold-start bootstrap from AppMap ReadySignals / UIArchitecture.
103
+ */
104
+ seedEntry(entry) {
105
+ if (this.entries.has(entry.key))
106
+ return; // already have real data
107
+ this.entries.set(entry.key, { ...entry });
108
+ }
100
109
  clear() {
101
110
  this.entries.clear();
102
111
  }
@@ -163,6 +163,68 @@ export class TimingModel {
163
163
  // Clear all cached distributions
164
164
  this.distributions.clear();
165
165
  }
166
+ /**
167
+ * Wire #14: Seed timing data from AppMap's TimingProfiles.
168
+ * Converts each profile to a synthetic TimingSample and loads it,
169
+ * but only for tool×bundleId keys that don't already have real samples.
170
+ */
171
+ seedFromTimingProfiles(profiles, bundleId) {
172
+ if (!profiles.length)
173
+ return;
174
+ // Bug #5 fix: aggregate profiles by tool type, computing weighted average
175
+ // Bug #6 fix: map page_load → browser_dom (a LOCATE_TOOL), add locate_with_fallback
176
+ const toolMap = {
177
+ page_load: "browser_dom",
178
+ element_response: "click",
179
+ animation: "wait_for_state",
180
+ data_fetch: "browser_wait",
181
+ };
182
+ // Group profiles by target tool
183
+ const grouped = new Map();
184
+ for (const profile of profiles) {
185
+ // Guard: skip profiles with zero/negative sample count to prevent NaN (0/0)
186
+ if (profile.sampleCount <= 0 || !Number.isFinite(profile.avgMs) || profile.avgMs <= 0)
187
+ continue;
188
+ const tool = toolMap[profile.type] ?? "browser_wait";
189
+ const key = `${tool}::${bundleId}`;
190
+ // Skip if we already have real samples for this key
191
+ if (this.samples.has(key))
192
+ continue;
193
+ const existing = grouped.get(tool);
194
+ if (existing) {
195
+ existing.totalWeightedMs += profile.avgMs * profile.sampleCount;
196
+ existing.totalSamples += profile.sampleCount;
197
+ if (profile.lastMeasured > existing.lastMeasured) {
198
+ existing.lastMeasured = profile.lastMeasured;
199
+ }
200
+ }
201
+ else {
202
+ grouped.set(tool, {
203
+ totalWeightedMs: profile.avgMs * profile.sampleCount,
204
+ totalSamples: profile.sampleCount,
205
+ lastMeasured: profile.lastMeasured,
206
+ });
207
+ }
208
+ }
209
+ // Create synthetic samples from aggregated data
210
+ const synthetics = [];
211
+ for (const [tool, agg] of grouped) {
212
+ const avgMs = agg.totalWeightedMs / agg.totalSamples;
213
+ const count = Math.min(agg.totalSamples, 5);
214
+ for (let i = 0; i < count; i++) {
215
+ synthetics.push({
216
+ tool,
217
+ bundleId,
218
+ durationMs: avgMs,
219
+ success: true,
220
+ timestamp: agg.lastMeasured,
221
+ });
222
+ }
223
+ }
224
+ if (synthetics.length > 0) {
225
+ this.loadSamples(synthetics);
226
+ }
227
+ }
166
228
  /**
167
229
  * Compute budget for a category of tools by taking the max p95
168
230
  * across all tools in that category for the given app.
@@ -81,7 +81,13 @@ async function tryClaudeAPI(apiKey, query) {
81
81
  const text = data.content?.[0]?.text;
82
82
  return text && text.length > 10 ? text.trim() : null;
83
83
  }
84
- catch {
84
+ catch (e) {
85
+ // Sanitize error to prevent API key leakage in logs/stack traces
86
+ const msg = e instanceof Error ? e.message : "unknown";
87
+ const sanitized = msg.replace(/x-api-key[^"]*"/gi, 'x-api-key: [REDACTED]"');
88
+ if (sanitized !== msg) {
89
+ throw new Error("API call failed: " + sanitized);
90
+ }
85
91
  return null;
86
92
  }
87
93
  }
@@ -104,22 +104,29 @@ export class MemoryStore {
104
104
  // ── file locking ──────────────────────────────
105
105
  acquireLock() {
106
106
  try {
107
- // Check for stale lock (PID no longer running)
108
- if (fs.existsSync(this.lockPath)) {
107
+ // Try to create lock atomically first (avoids TOCTOU race between exists-check and write)
108
+ try {
109
+ fs.writeFileSync(this.lockPath, String(process.pid), { flag: "wx" });
110
+ }
111
+ catch {
112
+ // Lock file exists — check if it's stale (PID no longer running)
109
113
  const lockContent = fs.readFileSync(this.lockPath, "utf-8").trim();
110
114
  const lockPid = parseInt(lockContent, 10);
111
115
  if (lockPid && !this.isProcessRunning(lockPid)) {
112
- // Stale lock — remove it
116
+ // Stale lock — remove and retry with wx
113
117
  fs.unlinkSync(this.lockPath);
118
+ fs.writeFileSync(this.lockPath, String(process.pid), { flag: "wx" });
119
+ }
120
+ else {
121
+ throw new Error("Lock held by active process");
114
122
  }
115
123
  }
116
- // Write our PID
117
- fs.writeFileSync(this.lockPath, String(process.pid), { flag: "wx" });
118
124
  this.hasLock = true;
119
125
  }
120
- catch {
126
+ catch (err) {
121
127
  // Another instance holds the lock — we still work but skip writes
122
128
  // to avoid corruption. Reads are from our own cache (stale but safe).
129
+ console.error(`[MemoryStore] Lock acquisition failed — writes disabled: ${err instanceof Error ? err.message : err}`);
123
130
  this.hasLock = false;
124
131
  }
125
132
  }
@@ -263,7 +270,11 @@ export class MemoryStore {
263
270
  return;
264
271
  this.rotateActionsIfNeeded();
265
272
  // S75 Option C: Redact PII before persisting to disk (not in live responses)
266
- const redacted = { ...entry, result: entry.result ? redactPII(entry.result) : entry.result };
273
+ const TYPING_TOOLS = new Set(["type_text", "browser_type", "browser_fill_form", "type_with_fallback"]);
274
+ const redactedParams = TYPING_TOOLS.has(entry.tool) && entry.params && "text" in entry.params
275
+ ? { ...entry.params, text: "[REDACTED]" }
276
+ : entry.params;
277
+ const redacted = { ...entry, params: redactedParams, result: entry.result ? redactPII(entry.result) : entry.result };
267
278
  this.pendingActionWrites.push(JSON.stringify(redacted) + "\n");
268
279
  // Schedule batch flush (debounced 100ms)
269
280
  if (!this.flushTimer) {
@@ -57,6 +57,7 @@ export class PerceptionCoordinator extends EventEmitter {
57
57
  running = false;
58
58
  learningEngine = null;
59
59
  appMap = null;
60
+ contextTracker = null;
60
61
  browserEnricher = null;
61
62
  fusionPipeline = new FusionPipeline();
62
63
  // In-flight guards to prevent timer pileup when async cycles exceed their interval
@@ -71,6 +72,11 @@ export class PerceptionCoordinator extends EventEmitter {
71
72
  static IDLE_THRESHOLD_MS = 3_000;
72
73
  lastToolCallAt = Date.now();
73
74
  idle = false;
75
+ // Wire #9: L3→L7 — perception auto-updates AppMap
76
+ lastPerceptionTitle = null;
77
+ lastPerceptionDialogCount = 0;
78
+ reportedControlLabels = new Set();
79
+ static MAP_UPDATE_INTERVAL = 5; // every 5th medium cycle
74
80
  constructor(worldModel, axSource, cdpSource, visionSource, config) {
75
81
  super();
76
82
  this.worldModel = worldModel;
@@ -93,6 +99,12 @@ export class PerceptionCoordinator extends EventEmitter {
93
99
  setAppMap(map) {
94
100
  this.appMap = map;
95
101
  }
102
+ /**
103
+ * Wire F10: Inject context tracker for per-app perception config from references.
104
+ */
105
+ setContextTracker(tracker) {
106
+ this.contextTracker = tracker;
107
+ }
96
108
  /**
97
109
  * Set a browser enricher callback for non-CDP browsers (Safari).
98
110
  * Called during medium cycle to fetch URL/title/tabs via AppleScript.
@@ -101,6 +113,67 @@ export class PerceptionCoordinator extends EventEmitter {
101
113
  setBrowserEnricher(fn) {
102
114
  this.browserEnricher = fn;
103
115
  }
116
+ /**
117
+ * Wire #16: Adjust perception intervals based on app timing characteristics.
118
+ * Clamps: fast 50-500ms, medium 100-1000ms, slow 500-5000ms.
119
+ * Only takes effect if perception is running (restarts timers with new intervals).
120
+ */
121
+ adjustIntervals(overrides) {
122
+ if (!this.running) {
123
+ // Not running — just update config, timers will use it on next start()
124
+ const clamp = (val, min, max) => Math.max(min, Math.min(max, val));
125
+ if (overrides.fastIntervalMs != null) {
126
+ this.config = { ...this.config, fastIntervalMs: clamp(overrides.fastIntervalMs, 50, 500) };
127
+ }
128
+ if (overrides.mediumIntervalMs != null) {
129
+ this.config = { ...this.config, mediumIntervalMs: clamp(overrides.mediumIntervalMs, 100, 1000) };
130
+ }
131
+ if (overrides.slowIntervalMs != null) {
132
+ this.config = { ...this.config, slowIntervalMs: clamp(overrides.slowIntervalMs, 500, 5000) };
133
+ }
134
+ return;
135
+ }
136
+ const clamp = (val, min, max) => Math.max(min, Math.min(max, val));
137
+ if (overrides.fastIntervalMs != null) {
138
+ this.config = { ...this.config, fastIntervalMs: clamp(overrides.fastIntervalMs, 50, 500) };
139
+ }
140
+ if (overrides.mediumIntervalMs != null) {
141
+ this.config = { ...this.config, mediumIntervalMs: clamp(overrides.mediumIntervalMs, 100, 1000) };
142
+ }
143
+ if (overrides.slowIntervalMs != null) {
144
+ this.config = { ...this.config, slowIntervalMs: clamp(overrides.slowIntervalMs, 500, 5000) };
145
+ }
146
+ // Restart timers — only reached when this.running is true
147
+ if (this.running) {
148
+ if (this.fastTimer) {
149
+ clearInterval(this.fastTimer);
150
+ this.fastTimer = setInterval(() => {
151
+ if (this.fastInFlight)
152
+ return;
153
+ this.fastInFlight = true;
154
+ void this.fastCycle().catch(() => { }).finally(() => { this.fastInFlight = false; });
155
+ }, this.config.fastIntervalMs);
156
+ }
157
+ if (this.mediumTimer) {
158
+ clearInterval(this.mediumTimer);
159
+ this.mediumTimer = setInterval(() => {
160
+ if (this.mediumInFlight)
161
+ return;
162
+ this.mediumInFlight = true;
163
+ void this.mediumCycle().catch(() => { }).finally(() => { this.mediumInFlight = false; });
164
+ }, this.config.mediumIntervalMs);
165
+ }
166
+ if (this.slowTimer) {
167
+ clearInterval(this.slowTimer);
168
+ this.slowTimer = setInterval(() => {
169
+ if (this.slowInFlight)
170
+ return;
171
+ this.slowInFlight = true;
172
+ void this.slowCycle().catch(() => { }).finally(() => { this.slowInFlight = false; });
173
+ }, this.config.slowIntervalMs);
174
+ }
175
+ }
176
+ }
104
177
  /**
105
178
  * Notify that a tool call is happening — resets idle timer and starts stream if needed.
106
179
  * Call this from the intelligence wrapper PRE-CALL.
@@ -253,6 +326,10 @@ export class PerceptionCoordinator extends EventEmitter {
253
326
  this.fastInFlight = false;
254
327
  this.mediumInFlight = false;
255
328
  this.slowInFlight = false;
329
+ // Wire #9: reset AppMap tracking state on stop
330
+ this.lastPerceptionTitle = null;
331
+ this.lastPerceptionDialogCount = 0;
332
+ this.reportedControlLabels.clear();
256
333
  this.emit("stopped");
257
334
  }
258
335
  /**
@@ -288,6 +365,38 @@ export class PerceptionCoordinator extends EventEmitter {
288
365
  await this.stop();
289
366
  this.visionSource?.reset();
290
367
  this.cdpSource?.reset();
368
+ // Wire #16: reset intervals to defaults, then adjust based on AppMap timing data
369
+ this.config = {
370
+ ...this.config,
371
+ fastIntervalMs: DEFAULT_PERCEPTION_CONFIG.fastIntervalMs,
372
+ mediumIntervalMs: DEFAULT_PERCEPTION_CONFIG.mediumIntervalMs,
373
+ slowIntervalMs: DEFAULT_PERCEPTION_CONFIG.slowIntervalMs,
374
+ };
375
+ if (this.appMap && appContext.bundleId) {
376
+ const profiles = this.appMap.getTimingProfile(appContext.bundleId);
377
+ if (profiles.length > 0) {
378
+ const elementProfiles = profiles.filter((p) => p.type === "element_response");
379
+ // Require 3+ element_response profiles for reliable interval adaptation
380
+ if (elementProfiles.length >= 3) {
381
+ const avgResponse = elementProfiles.reduce((sum, p) => sum + p.avgMs, 0) / elementProfiles.length;
382
+ if (avgResponse > 1500) {
383
+ // Slow app — increase slow interval, relax medium
384
+ this.adjustIntervals({ slowIntervalMs: 2000, mediumIntervalMs: 800 });
385
+ }
386
+ else if (avgResponse < 300) {
387
+ // Fast app — tighten slow interval for quicker visual updates
388
+ this.adjustIntervals({ slowIntervalMs: 500 });
389
+ }
390
+ }
391
+ }
392
+ }
393
+ // Wire F10: Apply per-app perception config from reference/playbook (L2→L3)
394
+ if (this.contextTracker) {
395
+ const refConfig = this.contextTracker.getPerceptionConfig();
396
+ if (refConfig) {
397
+ this.adjustIntervals(refConfig);
398
+ }
399
+ }
291
400
  await this.start(appContext, cdpClient);
292
401
  }
293
402
  /**
@@ -432,6 +541,10 @@ export class PerceptionCoordinator extends EventEmitter {
432
541
  catch { /* best-effort */ }
433
542
  }
434
543
  this.stats.mediumCycles++;
544
+ // Wire #9: L3→L7 — auto-update AppMap from perception (every 5th cycle, skip cycle 1)
545
+ if (this.stats.mediumCycles > 1 && this.stats.mediumCycles % PerceptionCoordinator.MAP_UPDATE_INTERVAL === 0) {
546
+ this.updateAppMapFromPerception();
547
+ }
435
548
  this.stats.lastMediumAt = timestamp;
436
549
  }
437
550
  /**
@@ -670,13 +783,17 @@ export class PerceptionCoordinator extends EventEmitter {
670
783
  this.stats.slowCycles++;
671
784
  return; // Observer daemon is capturing — skip this cycle
672
785
  }
786
+ let didWork = false;
673
787
  try {
674
788
  // Screenshot diff — optimized single-capture pipeline
675
789
  const windowId = this.activeWindowId ?? 0;
676
790
  if (windowId === 0)
677
791
  return; // Vision needs a real window ID for screenshot
792
+ didWork = true;
678
793
  const SLOW_CYCLE_TIMEOUT_MS = 25_000;
679
- const { diffEvent, ocrEvent, yoloElements } = await withTimeout(this.visionSource.captureAndDiffOptimized(windowId, this.config.maxROIsPerCycle), SLOW_CYCLE_TIMEOUT_MS, "captureAndDiffOptimized");
794
+ // Wire #10: L7→L3 pass zone ROIs for targeted OCR instead of full-screen fallback
795
+ const zoneROIs = this.getZoneROIs();
796
+ const { diffEvent, ocrEvent, yoloElements } = await withTimeout(this.visionSource.captureAndDiffOptimized(windowId, this.config.maxROIsPerCycle, zoneROIs.length > 0 ? zoneROIs : undefined), SLOW_CYCLE_TIMEOUT_MS, "captureAndDiffOptimized");
680
797
  if (diffEvent) {
681
798
  this.stats.visionDiffs++;
682
799
  this.stats.lastVisionAt = new Date().toISOString();
@@ -707,6 +824,10 @@ export class PerceptionCoordinator extends EventEmitter {
707
824
  this.appMap.save(mapData);
708
825
  }
709
826
  }
827
+ // Wire #9: record element visibility from OCR (every 3rd slow cycle, skip first)
828
+ if (this.stats.slowCycles > 0 && this.stats.slowCycles % 3 === 0 && ocrEvent.data.type === "vision_ocr") {
829
+ this.recordVisibilityFromOCR(ocrEvent.data.regions);
830
+ }
710
831
  this.emit("perception", ocrEvent);
711
832
  }
712
833
  // Fuse YOLO element detections with OCR text regions
@@ -761,11 +882,190 @@ export class PerceptionCoordinator extends EventEmitter {
761
882
  }
762
883
  }
763
884
  finally {
764
- // Always increment stats, even on early return (windowId=0) or error
765
- this.stats.slowCycles++;
766
- this.stats.lastSlowAt = timestamp;
885
+ if (didWork) {
886
+ this.stats.slowCycles++;
887
+ this.stats.lastSlowAt = timestamp;
888
+ }
767
889
  if (!this.config.skipCaptureLock)
768
890
  releaseCaptureLock();
769
891
  }
770
892
  }
893
+ // ── Wire #9: L3→L7 — auto-update AppMap from perception ──
894
+ /**
895
+ * Wire #9: Compare current world model state against tracked previous state
896
+ * and auto-record changes to AppMap. Called every MAP_UPDATE_INTERVAL medium cycles.
897
+ *
898
+ * Records:
899
+ * - Page transitions (window title changes)
900
+ * - Dialog state changes (open/closed)
901
+ * - New element discovery from AX/CDP controls
902
+ */
903
+ updateAppMapFromPerception() {
904
+ if (!this.appMap || !this.activeAppContext)
905
+ return;
906
+ const bundleId = this.activeAppContext.bundleId;
907
+ const state = this.worldModel.getState();
908
+ const focusedWin = state.focusedWindowId !== null
909
+ ? state.windows.get(state.focusedWindowId) ?? null
910
+ : null;
911
+ if (!focusedWin)
912
+ return;
913
+ const currentTitle = focusedWin.title?.value ?? null;
914
+ // 1. Page transition detection — window title changed between cycles
915
+ if (this.lastPerceptionTitle !== null &&
916
+ currentTitle !== null &&
917
+ this.lastPerceptionTitle !== currentTitle) {
918
+ try {
919
+ const fromTitle = this.lastPerceptionTitle.length > 80
920
+ ? this.lastPerceptionTitle.slice(0, 80).trim() : this.lastPerceptionTitle;
921
+ const toTitle = currentTitle.length > 80
922
+ ? currentTitle.slice(0, 80).trim() : currentTitle;
923
+ this.appMap.recordPageTransition(bundleId, fromTitle, toTitle, "perception_detected");
924
+ }
925
+ catch { /* best-effort — limits, PII filter, etc. */ }
926
+ }
927
+ this.lastPerceptionTitle = currentTitle;
928
+ // 2. Dialog state change detection
929
+ const currentDialogCount = state.activeDialogs.length;
930
+ if (currentDialogCount !== this.lastPerceptionDialogCount) {
931
+ const from = this.lastPerceptionDialogCount > 0 ? "open" : "closed";
932
+ const to = currentDialogCount > 0 ? "open" : "closed";
933
+ if (from !== to) {
934
+ try {
935
+ this.appMap.recordStateChange(bundleId, "dialog_state", from, to, "perception_detected");
936
+ }
937
+ catch { /* best-effort */ }
938
+ }
939
+ this.lastPerceptionDialogCount = currentDialogCount;
940
+ }
941
+ // 3. New element discovery from controls (max 10 per cycle to avoid flooding)
942
+ // Truncate pageContext to avoid zone key bloat from dynamic window titles
943
+ const rawPageContext = currentTitle ?? "unknown";
944
+ const pageContext = rawPageContext.length > 80 ? rawPageContext.slice(0, 80).trim() : rawPageContext;
945
+ let added = 0;
946
+ // Cap reportedControlLabels — evict oldest entries instead of full clear
947
+ // to prevent re-flooding AppMap with already-known elements
948
+ if (this.reportedControlLabels.size > 5000) {
949
+ const iter = this.reportedControlLabels.values();
950
+ for (let i = 0; i < 1000; i++)
951
+ iter.next();
952
+ const keep = new Set();
953
+ for (const val of iter)
954
+ keep.add(val);
955
+ this.reportedControlLabels.clear();
956
+ for (const val of keep)
957
+ this.reportedControlLabels.add(val);
958
+ }
959
+ for (const [, ctrl] of focusedWin.controls) {
960
+ if (added >= 10)
961
+ break;
962
+ const label = ctrl.label?.value;
963
+ // Skip empty, too-short, too-long, or already-reported labels
964
+ if (!label || label.length < 2 || label.length > 60)
965
+ continue;
966
+ if (this.reportedControlLabels.has(label))
967
+ continue;
968
+ this.reportedControlLabels.add(label);
969
+ try {
970
+ // Use recordElementOutcome with zoneKey "auto" — auto-creates zones
971
+ // unlike addElement which silently fails when zone doesn't exist
972
+ this.appMap.recordElementOutcome(bundleId, "auto", label, true, pageContext);
973
+ added++;
974
+ }
975
+ catch { /* best-effort — zone limits, PII filter, etc. */ }
976
+ }
977
+ }
978
+ /**
979
+ * Wire #9: Record element visibility from OCR detections.
980
+ * Called in slow cycle after OCR produces text regions.
981
+ */
982
+ recordVisibilityFromOCR(regions) {
983
+ if (!this.appMap || !this.activeAppContext)
984
+ return;
985
+ const bundleId = this.activeAppContext.bundleId;
986
+ const state = this.worldModel.getState();
987
+ const focusedWin = state.focusedWindowId !== null
988
+ ? state.windows.get(state.focusedWindowId) ?? null
989
+ : null;
990
+ const rawPageContext = focusedWin?.title?.value ?? "unknown";
991
+ const pageContext = rawPageContext.length > 80 ? rawPageContext.slice(0, 80).trim() : rawPageContext;
992
+ // Only record visibility for OCR text that matches known AX controls —
993
+ // raw OCR picks up body text, logos, dates etc. that aren't UI elements
994
+ const knownLabels = new Set();
995
+ if (focusedWin) {
996
+ for (const [, ctrl] of focusedWin.controls) {
997
+ const label = ctrl.label?.value;
998
+ if (label && label.length >= 2)
999
+ knownLabels.add(label.toLowerCase());
1000
+ }
1001
+ }
1002
+ let recorded = 0;
1003
+ for (const region of regions) {
1004
+ if (recorded >= 20)
1005
+ break;
1006
+ const text = region.text.trim();
1007
+ if (text.length < 2 || text.length > 60)
1008
+ continue;
1009
+ // Cross-reference: case-insensitive match against known AX control labels
1010
+ if (!knownLabels.has(text.toLowerCase()))
1011
+ continue;
1012
+ try {
1013
+ this.appMap.recordElementVisibility(bundleId, text, pageContext, true);
1014
+ recorded++;
1015
+ }
1016
+ catch { /* best-effort */ }
1017
+ }
1018
+ }
1019
+ // ── Wire #10: L7→L3 — zone ROI → targeted OCR ──
1020
+ /**
1021
+ * Wire #10: Convert AppMap zone positions to pixel ROIs for targeted OCR.
1022
+ * Returns ROIs based on known zones, prioritized by zone type.
1023
+ */
1024
+ getZoneROIs() {
1025
+ if (!this.appMap || !this.activeAppContext)
1026
+ return [];
1027
+ const state = this.worldModel.getState();
1028
+ const focusedWin = state.focusedWindowId !== null
1029
+ ? state.windows.get(state.focusedWindowId) ?? null
1030
+ : null;
1031
+ if (!focusedWin)
1032
+ return [];
1033
+ const winBounds = focusedWin.bounds?.value;
1034
+ if (!winBounds || winBounds.width <= 0 || winBounds.height <= 0)
1035
+ return [];
1036
+ const mapData = this.appMap.load(this.activeAppContext.bundleId);
1037
+ if (!mapData)
1038
+ return [];
1039
+ const rois = [];
1040
+ for (const [, zone] of Object.entries(mapData.zones)) {
1041
+ const rp = zone.relativePosition;
1042
+ // Skip zones with zero/invalid dimensions
1043
+ if (!rp || rp.width <= 0 || rp.height <= 0)
1044
+ continue;
1045
+ // Clamp relative positions to [0, 1] to prevent out-of-bounds ROIs
1046
+ // from corrupted or stale map data
1047
+ const left = Math.max(0, Math.min(1, rp.left));
1048
+ const top = Math.max(0, Math.min(1, rp.top));
1049
+ const clampedWidth = Math.min(Math.max(0, rp.width), 1.0 - left);
1050
+ const clampedHeight = Math.min(Math.max(0, rp.height), 1.0 - top);
1051
+ if (clampedWidth <= 0 || clampedHeight <= 0)
1052
+ continue;
1053
+ // ROI coordinates are relative to the window capture image (0,0 = window top-left),
1054
+ // not screen-absolute. Zone positions are 0-1 relative to the window.
1055
+ const pixelW = Math.max(1, Math.round(clampedWidth * winBounds.width));
1056
+ const pixelH = Math.max(1, Math.round(clampedHeight * winBounds.height));
1057
+ // Skip full-window ROIs from auto_discovered zones (defeats targeted OCR purpose)
1058
+ if (clampedWidth >= 0.9 && clampedHeight >= 0.9)
1059
+ continue;
1060
+ rois.push({
1061
+ x: Math.round(left * winBounds.width),
1062
+ y: Math.round(top * winBounds.height),
1063
+ width: pixelW,
1064
+ height: pixelH,
1065
+ reason: "known_zone",
1066
+ });
1067
+ }
1068
+ // Cap at 5 zone ROIs to stay within budget
1069
+ return rois.slice(0, 5);
1070
+ }
771
1071
  }
@@ -37,6 +37,7 @@ export class PerceptionManager extends EventEmitter {
37
37
  lastCdpClient = null;
38
38
  pendingLearningEngine = null;
39
39
  pendingAppMap = null;
40
+ pendingContextTracker = null;
40
41
  constructor(worldModel, config) {
41
42
  super();
42
43
  this.worldModel = worldModel;
@@ -62,6 +63,15 @@ export class PerceptionManager extends EventEmitter {
62
63
  this.coordinator.setAppMap(map);
63
64
  }
64
65
  }
66
+ /**
67
+ * Wire F10: Inject context tracker for per-app perception config.
68
+ */
69
+ setContextTracker(tracker) {
70
+ this.pendingContextTracker = tracker;
71
+ if (this.coordinator) {
72
+ this.coordinator.setContextTracker(tracker);
73
+ }
74
+ }
65
75
  /**
66
76
  * Create perception sources from the bridge. Called once after ensureBridge().
67
77
  */
@@ -80,6 +90,9 @@ export class PerceptionManager extends EventEmitter {
80
90
  if (this.pendingAppMap) {
81
91
  this.coordinator.setAppMap(this.pendingAppMap);
82
92
  }
93
+ if (this.pendingContextTracker) {
94
+ this.coordinator.setContextTracker(this.pendingContextTracker);
95
+ }
83
96
  this.coordinator.on("perception", (event) => {
84
97
  this.handleReactiveEvent(event);
85
98
  });
@@ -180,7 +180,7 @@ export class VisionSource {
180
180
  * Performance: unchanged ~113ms, changed ~175ms (vs ~370ms before Phase 2).
181
181
  * Phase 1 (FAST OCR) + Phase 2 (region OCR) combined.
182
182
  */
183
- async captureAndDiffOptimized(windowId, maxROIs = 3) {
183
+ async captureAndDiffOptimized(windowId, maxROIs = 3, priorityROIs) {
184
184
  const start = Date.now();
185
185
  // 1. Capture: stream frame (~0ms) or one-shot (~112ms)
186
186
  const capture = await this.captureToFileOrStream(windowId);
@@ -208,12 +208,22 @@ export class VisionSource {
208
208
  // 4. Run OCR and YOLO in parallel on the same captured frame
209
209
  const mergedRegions = FrameDiffer.mergeRegions(changedRegions, maxROIs, 64, capture.width, capture.height);
210
210
  // OCR (region-based or full)
211
- const ocrPromise = (mergedRegions.length > 0 && mergedRegions.length <= maxROIs)
211
+ // Wire #10: When too many changed regions, use zone ROIs (priorityROIs) for
212
+ // targeted OCR instead of expensive full-screen OCR fallback.
213
+ const useRegionOCR = mergedRegions.length > 0 && mergedRegions.length <= maxROIs;
214
+ const useZoneOCR = !useRegionOCR && mergedRegions.length > maxROIs && priorityROIs && priorityROIs.length > 0;
215
+ const ocrTargets = useRegionOCR ? mergedRegions : useZoneOCR ? priorityROIs : null;
216
+ const ocrPromise = ocrTargets
212
217
  ? (async () => {
213
218
  const regionResults = [];
214
- for (const roi of mergedRegions) {
219
+ for (const roi of ocrTargets) {
215
220
  const regionEvent = await this.ocrRegion(windowId, roi);
216
221
  if (regionEvent?.data.type === "vision_ocr" && regionEvent.data.regions) {
222
+ // Re-anchor OCR bounds from ROI-relative to window-relative coordinates
223
+ for (const region of regionEvent.data.regions) {
224
+ region.bounds.x += roi.x;
225
+ region.bounds.y += roi.y;
226
+ }
217
227
  regionResults.push(...regionEvent.data.regions);
218
228
  }
219
229
  }
@@ -223,7 +233,7 @@ export class VisionSource {
223
233
  timestamp: new Date().toISOString(),
224
234
  data: {
225
235
  type: "vision_ocr",
226
- roi: mergedRegions[0] ?? { x: 0, y: 0, width: 0, height: 0, reason: "changed_pixels" },
236
+ roi: ocrTargets[0] ?? { x: 0, y: 0, width: 0, height: 0, reason: "changed_pixels" },
227
237
  text: fullText,
228
238
  regions: regionResults,
229
239
  latencyMs: Date.now() - start - captureMs,