screenhand 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/mcp-desktop.js +572 -162
- package/dist/src/community/fetcher.js +32 -2
- package/dist/src/community/validator.js +15 -1
- package/dist/src/context-tracker.js +115 -43
- package/dist/src/ingestion/reference-merger.js +3 -1
- package/dist/src/learning/engine.js +225 -7
- package/dist/src/learning/locator-policy.js +16 -0
- package/dist/src/learning/pattern-policy.js +9 -0
- package/dist/src/learning/recovery-policy.js +16 -0
- package/dist/src/learning/sensor-policy.js +9 -0
- package/dist/src/learning/timing-model.js +62 -0
- package/dist/src/memory/research.js +7 -1
- package/dist/src/memory/store.js +18 -7
- package/dist/src/perception/coordinator.js +304 -4
- package/dist/src/perception/manager.js +13 -0
- package/dist/src/perception/vision-source.js +14 -4
- package/dist/src/planner/executor.js +125 -2
- package/dist/src/planner/planner.js +509 -10
- package/dist/src/playbook/engine.js +10 -0
- package/dist/src/recovery/engine.js +50 -3
- package/dist/src/runtime/execution-contract.js +67 -5
- package/dist/src/runtime/executor.js +41 -1
- package/dist/src/runtime/service.js +7 -0
- package/dist/src/state/app-map.js +307 -17
- package/dist/src/util/atomic-write.js +25 -4
- package/dist-references/reddit.json +2 -2
- package/package.json +1 -1
|
@@ -97,6 +97,15 @@ export class SensorPolicy {
|
|
|
97
97
|
});
|
|
98
98
|
return qualified.length > 0 ? qualified[0].sourceType : null;
|
|
99
99
|
}
|
|
100
|
+
/**
|
|
101
|
+
* Seed a single entry if no real data exists for that key.
|
|
102
|
+
* Used for cold-start bootstrap from AppMap ReadySignals / UIArchitecture.
|
|
103
|
+
*/
|
|
104
|
+
seedEntry(entry) {
|
|
105
|
+
if (this.entries.has(entry.key))
|
|
106
|
+
return; // already have real data
|
|
107
|
+
this.entries.set(entry.key, { ...entry });
|
|
108
|
+
}
|
|
100
109
|
clear() {
|
|
101
110
|
this.entries.clear();
|
|
102
111
|
}
|
|
@@ -163,6 +163,68 @@ export class TimingModel {
|
|
|
163
163
|
// Clear all cached distributions
|
|
164
164
|
this.distributions.clear();
|
|
165
165
|
}
|
|
166
|
+
/**
|
|
167
|
+
* Wire #14: Seed timing data from AppMap's TimingProfiles.
|
|
168
|
+
* Converts each profile to a synthetic TimingSample and loads it,
|
|
169
|
+
* but only for tool×bundleId keys that don't already have real samples.
|
|
170
|
+
*/
|
|
171
|
+
seedFromTimingProfiles(profiles, bundleId) {
|
|
172
|
+
if (!profiles.length)
|
|
173
|
+
return;
|
|
174
|
+
// Bug #5 fix: aggregate profiles by tool type, computing weighted average
|
|
175
|
+
// Bug #6 fix: map page_load → browser_dom (a LOCATE_TOOL), add locate_with_fallback
|
|
176
|
+
const toolMap = {
|
|
177
|
+
page_load: "browser_dom",
|
|
178
|
+
element_response: "click",
|
|
179
|
+
animation: "wait_for_state",
|
|
180
|
+
data_fetch: "browser_wait",
|
|
181
|
+
};
|
|
182
|
+
// Group profiles by target tool
|
|
183
|
+
const grouped = new Map();
|
|
184
|
+
for (const profile of profiles) {
|
|
185
|
+
// Guard: skip profiles with zero/negative sample count to prevent NaN (0/0)
|
|
186
|
+
if (profile.sampleCount <= 0 || !Number.isFinite(profile.avgMs) || profile.avgMs <= 0)
|
|
187
|
+
continue;
|
|
188
|
+
const tool = toolMap[profile.type] ?? "browser_wait";
|
|
189
|
+
const key = `${tool}::${bundleId}`;
|
|
190
|
+
// Skip if we already have real samples for this key
|
|
191
|
+
if (this.samples.has(key))
|
|
192
|
+
continue;
|
|
193
|
+
const existing = grouped.get(tool);
|
|
194
|
+
if (existing) {
|
|
195
|
+
existing.totalWeightedMs += profile.avgMs * profile.sampleCount;
|
|
196
|
+
existing.totalSamples += profile.sampleCount;
|
|
197
|
+
if (profile.lastMeasured > existing.lastMeasured) {
|
|
198
|
+
existing.lastMeasured = profile.lastMeasured;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
else {
|
|
202
|
+
grouped.set(tool, {
|
|
203
|
+
totalWeightedMs: profile.avgMs * profile.sampleCount,
|
|
204
|
+
totalSamples: profile.sampleCount,
|
|
205
|
+
lastMeasured: profile.lastMeasured,
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
// Create synthetic samples from aggregated data
|
|
210
|
+
const synthetics = [];
|
|
211
|
+
for (const [tool, agg] of grouped) {
|
|
212
|
+
const avgMs = agg.totalWeightedMs / agg.totalSamples;
|
|
213
|
+
const count = Math.min(agg.totalSamples, 5);
|
|
214
|
+
for (let i = 0; i < count; i++) {
|
|
215
|
+
synthetics.push({
|
|
216
|
+
tool,
|
|
217
|
+
bundleId,
|
|
218
|
+
durationMs: avgMs,
|
|
219
|
+
success: true,
|
|
220
|
+
timestamp: agg.lastMeasured,
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
if (synthetics.length > 0) {
|
|
225
|
+
this.loadSamples(synthetics);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
166
228
|
/**
|
|
167
229
|
* Compute budget for a category of tools by taking the max p95
|
|
168
230
|
* across all tools in that category for the given app.
|
|
@@ -81,7 +81,13 @@ async function tryClaudeAPI(apiKey, query) {
|
|
|
81
81
|
const text = data.content?.[0]?.text;
|
|
82
82
|
return text && text.length > 10 ? text.trim() : null;
|
|
83
83
|
}
|
|
84
|
-
catch {
|
|
84
|
+
catch (e) {
|
|
85
|
+
// Sanitize error to prevent API key leakage in logs/stack traces
|
|
86
|
+
const msg = e instanceof Error ? e.message : "unknown";
|
|
87
|
+
const sanitized = msg.replace(/x-api-key[^"]*"/gi, 'x-api-key: [REDACTED]"');
|
|
88
|
+
if (sanitized !== msg) {
|
|
89
|
+
throw new Error("API call failed: " + sanitized);
|
|
90
|
+
}
|
|
85
91
|
return null;
|
|
86
92
|
}
|
|
87
93
|
}
|
package/dist/src/memory/store.js
CHANGED
|
@@ -104,22 +104,29 @@ export class MemoryStore {
|
|
|
104
104
|
// ── file locking ──────────────────────────────
|
|
105
105
|
acquireLock() {
|
|
106
106
|
try {
|
|
107
|
-
//
|
|
108
|
-
|
|
107
|
+
// Try to create lock atomically first (avoids TOCTOU race between exists-check and write)
|
|
108
|
+
try {
|
|
109
|
+
fs.writeFileSync(this.lockPath, String(process.pid), { flag: "wx" });
|
|
110
|
+
}
|
|
111
|
+
catch {
|
|
112
|
+
// Lock file exists — check if it's stale (PID no longer running)
|
|
109
113
|
const lockContent = fs.readFileSync(this.lockPath, "utf-8").trim();
|
|
110
114
|
const lockPid = parseInt(lockContent, 10);
|
|
111
115
|
if (lockPid && !this.isProcessRunning(lockPid)) {
|
|
112
|
-
// Stale lock — remove
|
|
116
|
+
// Stale lock — remove and retry with wx
|
|
113
117
|
fs.unlinkSync(this.lockPath);
|
|
118
|
+
fs.writeFileSync(this.lockPath, String(process.pid), { flag: "wx" });
|
|
119
|
+
}
|
|
120
|
+
else {
|
|
121
|
+
throw new Error("Lock held by active process");
|
|
114
122
|
}
|
|
115
123
|
}
|
|
116
|
-
// Write our PID
|
|
117
|
-
fs.writeFileSync(this.lockPath, String(process.pid), { flag: "wx" });
|
|
118
124
|
this.hasLock = true;
|
|
119
125
|
}
|
|
120
|
-
catch {
|
|
126
|
+
catch (err) {
|
|
121
127
|
// Another instance holds the lock — we still work but skip writes
|
|
122
128
|
// to avoid corruption. Reads are from our own cache (stale but safe).
|
|
129
|
+
console.error(`[MemoryStore] Lock acquisition failed — writes disabled: ${err instanceof Error ? err.message : err}`);
|
|
123
130
|
this.hasLock = false;
|
|
124
131
|
}
|
|
125
132
|
}
|
|
@@ -263,7 +270,11 @@ export class MemoryStore {
|
|
|
263
270
|
return;
|
|
264
271
|
this.rotateActionsIfNeeded();
|
|
265
272
|
// S75 Option C: Redact PII before persisting to disk (not in live responses)
|
|
266
|
-
const
|
|
273
|
+
const TYPING_TOOLS = new Set(["type_text", "browser_type", "browser_fill_form", "type_with_fallback"]);
|
|
274
|
+
const redactedParams = TYPING_TOOLS.has(entry.tool) && entry.params && "text" in entry.params
|
|
275
|
+
? { ...entry.params, text: "[REDACTED]" }
|
|
276
|
+
: entry.params;
|
|
277
|
+
const redacted = { ...entry, params: redactedParams, result: entry.result ? redactPII(entry.result) : entry.result };
|
|
267
278
|
this.pendingActionWrites.push(JSON.stringify(redacted) + "\n");
|
|
268
279
|
// Schedule batch flush (debounced 100ms)
|
|
269
280
|
if (!this.flushTimer) {
|
|
@@ -57,6 +57,7 @@ export class PerceptionCoordinator extends EventEmitter {
|
|
|
57
57
|
running = false;
|
|
58
58
|
learningEngine = null;
|
|
59
59
|
appMap = null;
|
|
60
|
+
contextTracker = null;
|
|
60
61
|
browserEnricher = null;
|
|
61
62
|
fusionPipeline = new FusionPipeline();
|
|
62
63
|
// In-flight guards to prevent timer pileup when async cycles exceed their interval
|
|
@@ -71,6 +72,11 @@ export class PerceptionCoordinator extends EventEmitter {
|
|
|
71
72
|
static IDLE_THRESHOLD_MS = 3_000;
|
|
72
73
|
lastToolCallAt = Date.now();
|
|
73
74
|
idle = false;
|
|
75
|
+
// Wire #9: L3→L7 — perception auto-updates AppMap
|
|
76
|
+
lastPerceptionTitle = null;
|
|
77
|
+
lastPerceptionDialogCount = 0;
|
|
78
|
+
reportedControlLabels = new Set();
|
|
79
|
+
static MAP_UPDATE_INTERVAL = 5; // every 5th medium cycle
|
|
74
80
|
constructor(worldModel, axSource, cdpSource, visionSource, config) {
|
|
75
81
|
super();
|
|
76
82
|
this.worldModel = worldModel;
|
|
@@ -93,6 +99,12 @@ export class PerceptionCoordinator extends EventEmitter {
|
|
|
93
99
|
setAppMap(map) {
|
|
94
100
|
this.appMap = map;
|
|
95
101
|
}
|
|
102
|
+
/**
|
|
103
|
+
* Wire F10: Inject context tracker for per-app perception config from references.
|
|
104
|
+
*/
|
|
105
|
+
setContextTracker(tracker) {
|
|
106
|
+
this.contextTracker = tracker;
|
|
107
|
+
}
|
|
96
108
|
/**
|
|
97
109
|
* Set a browser enricher callback for non-CDP browsers (Safari).
|
|
98
110
|
* Called during medium cycle to fetch URL/title/tabs via AppleScript.
|
|
@@ -101,6 +113,67 @@ export class PerceptionCoordinator extends EventEmitter {
|
|
|
101
113
|
setBrowserEnricher(fn) {
|
|
102
114
|
this.browserEnricher = fn;
|
|
103
115
|
}
|
|
116
|
+
/**
|
|
117
|
+
* Wire #16: Adjust perception intervals based on app timing characteristics.
|
|
118
|
+
* Clamps: fast 50-500ms, medium 100-1000ms, slow 500-5000ms.
|
|
119
|
+
* Only takes effect if perception is running (restarts timers with new intervals).
|
|
120
|
+
*/
|
|
121
|
+
adjustIntervals(overrides) {
|
|
122
|
+
if (!this.running) {
|
|
123
|
+
// Not running — just update config, timers will use it on next start()
|
|
124
|
+
const clamp = (val, min, max) => Math.max(min, Math.min(max, val));
|
|
125
|
+
if (overrides.fastIntervalMs != null) {
|
|
126
|
+
this.config = { ...this.config, fastIntervalMs: clamp(overrides.fastIntervalMs, 50, 500) };
|
|
127
|
+
}
|
|
128
|
+
if (overrides.mediumIntervalMs != null) {
|
|
129
|
+
this.config = { ...this.config, mediumIntervalMs: clamp(overrides.mediumIntervalMs, 100, 1000) };
|
|
130
|
+
}
|
|
131
|
+
if (overrides.slowIntervalMs != null) {
|
|
132
|
+
this.config = { ...this.config, slowIntervalMs: clamp(overrides.slowIntervalMs, 500, 5000) };
|
|
133
|
+
}
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
const clamp = (val, min, max) => Math.max(min, Math.min(max, val));
|
|
137
|
+
if (overrides.fastIntervalMs != null) {
|
|
138
|
+
this.config = { ...this.config, fastIntervalMs: clamp(overrides.fastIntervalMs, 50, 500) };
|
|
139
|
+
}
|
|
140
|
+
if (overrides.mediumIntervalMs != null) {
|
|
141
|
+
this.config = { ...this.config, mediumIntervalMs: clamp(overrides.mediumIntervalMs, 100, 1000) };
|
|
142
|
+
}
|
|
143
|
+
if (overrides.slowIntervalMs != null) {
|
|
144
|
+
this.config = { ...this.config, slowIntervalMs: clamp(overrides.slowIntervalMs, 500, 5000) };
|
|
145
|
+
}
|
|
146
|
+
// Restart timers — only reached when this.running is true
|
|
147
|
+
if (this.running) {
|
|
148
|
+
if (this.fastTimer) {
|
|
149
|
+
clearInterval(this.fastTimer);
|
|
150
|
+
this.fastTimer = setInterval(() => {
|
|
151
|
+
if (this.fastInFlight)
|
|
152
|
+
return;
|
|
153
|
+
this.fastInFlight = true;
|
|
154
|
+
void this.fastCycle().catch(() => { }).finally(() => { this.fastInFlight = false; });
|
|
155
|
+
}, this.config.fastIntervalMs);
|
|
156
|
+
}
|
|
157
|
+
if (this.mediumTimer) {
|
|
158
|
+
clearInterval(this.mediumTimer);
|
|
159
|
+
this.mediumTimer = setInterval(() => {
|
|
160
|
+
if (this.mediumInFlight)
|
|
161
|
+
return;
|
|
162
|
+
this.mediumInFlight = true;
|
|
163
|
+
void this.mediumCycle().catch(() => { }).finally(() => { this.mediumInFlight = false; });
|
|
164
|
+
}, this.config.mediumIntervalMs);
|
|
165
|
+
}
|
|
166
|
+
if (this.slowTimer) {
|
|
167
|
+
clearInterval(this.slowTimer);
|
|
168
|
+
this.slowTimer = setInterval(() => {
|
|
169
|
+
if (this.slowInFlight)
|
|
170
|
+
return;
|
|
171
|
+
this.slowInFlight = true;
|
|
172
|
+
void this.slowCycle().catch(() => { }).finally(() => { this.slowInFlight = false; });
|
|
173
|
+
}, this.config.slowIntervalMs);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
104
177
|
/**
|
|
105
178
|
* Notify that a tool call is happening — resets idle timer and starts stream if needed.
|
|
106
179
|
* Call this from the intelligence wrapper PRE-CALL.
|
|
@@ -253,6 +326,10 @@ export class PerceptionCoordinator extends EventEmitter {
|
|
|
253
326
|
this.fastInFlight = false;
|
|
254
327
|
this.mediumInFlight = false;
|
|
255
328
|
this.slowInFlight = false;
|
|
329
|
+
// Wire #9: reset AppMap tracking state on stop
|
|
330
|
+
this.lastPerceptionTitle = null;
|
|
331
|
+
this.lastPerceptionDialogCount = 0;
|
|
332
|
+
this.reportedControlLabels.clear();
|
|
256
333
|
this.emit("stopped");
|
|
257
334
|
}
|
|
258
335
|
/**
|
|
@@ -288,6 +365,38 @@ export class PerceptionCoordinator extends EventEmitter {
|
|
|
288
365
|
await this.stop();
|
|
289
366
|
this.visionSource?.reset();
|
|
290
367
|
this.cdpSource?.reset();
|
|
368
|
+
// Wire #16: reset intervals to defaults, then adjust based on AppMap timing data
|
|
369
|
+
this.config = {
|
|
370
|
+
...this.config,
|
|
371
|
+
fastIntervalMs: DEFAULT_PERCEPTION_CONFIG.fastIntervalMs,
|
|
372
|
+
mediumIntervalMs: DEFAULT_PERCEPTION_CONFIG.mediumIntervalMs,
|
|
373
|
+
slowIntervalMs: DEFAULT_PERCEPTION_CONFIG.slowIntervalMs,
|
|
374
|
+
};
|
|
375
|
+
if (this.appMap && appContext.bundleId) {
|
|
376
|
+
const profiles = this.appMap.getTimingProfile(appContext.bundleId);
|
|
377
|
+
if (profiles.length > 0) {
|
|
378
|
+
const elementProfiles = profiles.filter((p) => p.type === "element_response");
|
|
379
|
+
// Require 3+ element_response profiles for reliable interval adaptation
|
|
380
|
+
if (elementProfiles.length >= 3) {
|
|
381
|
+
const avgResponse = elementProfiles.reduce((sum, p) => sum + p.avgMs, 0) / elementProfiles.length;
|
|
382
|
+
if (avgResponse > 1500) {
|
|
383
|
+
// Slow app — increase slow interval, relax medium
|
|
384
|
+
this.adjustIntervals({ slowIntervalMs: 2000, mediumIntervalMs: 800 });
|
|
385
|
+
}
|
|
386
|
+
else if (avgResponse < 300) {
|
|
387
|
+
// Fast app — tighten slow interval for quicker visual updates
|
|
388
|
+
this.adjustIntervals({ slowIntervalMs: 500 });
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
// Wire F10: Apply per-app perception config from reference/playbook (L2→L3)
|
|
394
|
+
if (this.contextTracker) {
|
|
395
|
+
const refConfig = this.contextTracker.getPerceptionConfig();
|
|
396
|
+
if (refConfig) {
|
|
397
|
+
this.adjustIntervals(refConfig);
|
|
398
|
+
}
|
|
399
|
+
}
|
|
291
400
|
await this.start(appContext, cdpClient);
|
|
292
401
|
}
|
|
293
402
|
/**
|
|
@@ -432,6 +541,10 @@ export class PerceptionCoordinator extends EventEmitter {
|
|
|
432
541
|
catch { /* best-effort */ }
|
|
433
542
|
}
|
|
434
543
|
this.stats.mediumCycles++;
|
|
544
|
+
// Wire #9: L3→L7 — auto-update AppMap from perception (every 5th cycle, skip cycle 1)
|
|
545
|
+
if (this.stats.mediumCycles > 1 && this.stats.mediumCycles % PerceptionCoordinator.MAP_UPDATE_INTERVAL === 0) {
|
|
546
|
+
this.updateAppMapFromPerception();
|
|
547
|
+
}
|
|
435
548
|
this.stats.lastMediumAt = timestamp;
|
|
436
549
|
}
|
|
437
550
|
/**
|
|
@@ -670,13 +783,17 @@ export class PerceptionCoordinator extends EventEmitter {
|
|
|
670
783
|
this.stats.slowCycles++;
|
|
671
784
|
return; // Observer daemon is capturing — skip this cycle
|
|
672
785
|
}
|
|
786
|
+
let didWork = false;
|
|
673
787
|
try {
|
|
674
788
|
// Screenshot diff — optimized single-capture pipeline
|
|
675
789
|
const windowId = this.activeWindowId ?? 0;
|
|
676
790
|
if (windowId === 0)
|
|
677
791
|
return; // Vision needs a real window ID for screenshot
|
|
792
|
+
didWork = true;
|
|
678
793
|
const SLOW_CYCLE_TIMEOUT_MS = 25_000;
|
|
679
|
-
|
|
794
|
+
// Wire #10: L7→L3 — pass zone ROIs for targeted OCR instead of full-screen fallback
|
|
795
|
+
const zoneROIs = this.getZoneROIs();
|
|
796
|
+
const { diffEvent, ocrEvent, yoloElements } = await withTimeout(this.visionSource.captureAndDiffOptimized(windowId, this.config.maxROIsPerCycle, zoneROIs.length > 0 ? zoneROIs : undefined), SLOW_CYCLE_TIMEOUT_MS, "captureAndDiffOptimized");
|
|
680
797
|
if (diffEvent) {
|
|
681
798
|
this.stats.visionDiffs++;
|
|
682
799
|
this.stats.lastVisionAt = new Date().toISOString();
|
|
@@ -707,6 +824,10 @@ export class PerceptionCoordinator extends EventEmitter {
|
|
|
707
824
|
this.appMap.save(mapData);
|
|
708
825
|
}
|
|
709
826
|
}
|
|
827
|
+
// Wire #9: record element visibility from OCR (every 3rd slow cycle, skip first)
|
|
828
|
+
if (this.stats.slowCycles > 0 && this.stats.slowCycles % 3 === 0 && ocrEvent.data.type === "vision_ocr") {
|
|
829
|
+
this.recordVisibilityFromOCR(ocrEvent.data.regions);
|
|
830
|
+
}
|
|
710
831
|
this.emit("perception", ocrEvent);
|
|
711
832
|
}
|
|
712
833
|
// Fuse YOLO element detections with OCR text regions
|
|
@@ -761,11 +882,190 @@ export class PerceptionCoordinator extends EventEmitter {
|
|
|
761
882
|
}
|
|
762
883
|
}
|
|
763
884
|
finally {
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
885
|
+
if (didWork) {
|
|
886
|
+
this.stats.slowCycles++;
|
|
887
|
+
this.stats.lastSlowAt = timestamp;
|
|
888
|
+
}
|
|
767
889
|
if (!this.config.skipCaptureLock)
|
|
768
890
|
releaseCaptureLock();
|
|
769
891
|
}
|
|
770
892
|
}
|
|
893
|
+
// ── Wire #9: L3→L7 — auto-update AppMap from perception ──
|
|
894
|
+
/**
|
|
895
|
+
* Wire #9: Compare current world model state against tracked previous state
|
|
896
|
+
* and auto-record changes to AppMap. Called every MAP_UPDATE_INTERVAL medium cycles.
|
|
897
|
+
*
|
|
898
|
+
* Records:
|
|
899
|
+
* - Page transitions (window title changes)
|
|
900
|
+
* - Dialog state changes (open/closed)
|
|
901
|
+
* - New element discovery from AX/CDP controls
|
|
902
|
+
*/
|
|
903
|
+
updateAppMapFromPerception() {
|
|
904
|
+
if (!this.appMap || !this.activeAppContext)
|
|
905
|
+
return;
|
|
906
|
+
const bundleId = this.activeAppContext.bundleId;
|
|
907
|
+
const state = this.worldModel.getState();
|
|
908
|
+
const focusedWin = state.focusedWindowId !== null
|
|
909
|
+
? state.windows.get(state.focusedWindowId) ?? null
|
|
910
|
+
: null;
|
|
911
|
+
if (!focusedWin)
|
|
912
|
+
return;
|
|
913
|
+
const currentTitle = focusedWin.title?.value ?? null;
|
|
914
|
+
// 1. Page transition detection — window title changed between cycles
|
|
915
|
+
if (this.lastPerceptionTitle !== null &&
|
|
916
|
+
currentTitle !== null &&
|
|
917
|
+
this.lastPerceptionTitle !== currentTitle) {
|
|
918
|
+
try {
|
|
919
|
+
const fromTitle = this.lastPerceptionTitle.length > 80
|
|
920
|
+
? this.lastPerceptionTitle.slice(0, 80).trim() : this.lastPerceptionTitle;
|
|
921
|
+
const toTitle = currentTitle.length > 80
|
|
922
|
+
? currentTitle.slice(0, 80).trim() : currentTitle;
|
|
923
|
+
this.appMap.recordPageTransition(bundleId, fromTitle, toTitle, "perception_detected");
|
|
924
|
+
}
|
|
925
|
+
catch { /* best-effort — limits, PII filter, etc. */ }
|
|
926
|
+
}
|
|
927
|
+
this.lastPerceptionTitle = currentTitle;
|
|
928
|
+
// 2. Dialog state change detection
|
|
929
|
+
const currentDialogCount = state.activeDialogs.length;
|
|
930
|
+
if (currentDialogCount !== this.lastPerceptionDialogCount) {
|
|
931
|
+
const from = this.lastPerceptionDialogCount > 0 ? "open" : "closed";
|
|
932
|
+
const to = currentDialogCount > 0 ? "open" : "closed";
|
|
933
|
+
if (from !== to) {
|
|
934
|
+
try {
|
|
935
|
+
this.appMap.recordStateChange(bundleId, "dialog_state", from, to, "perception_detected");
|
|
936
|
+
}
|
|
937
|
+
catch { /* best-effort */ }
|
|
938
|
+
}
|
|
939
|
+
this.lastPerceptionDialogCount = currentDialogCount;
|
|
940
|
+
}
|
|
941
|
+
// 3. New element discovery from controls (max 10 per cycle to avoid flooding)
|
|
942
|
+
// Truncate pageContext to avoid zone key bloat from dynamic window titles
|
|
943
|
+
const rawPageContext = currentTitle ?? "unknown";
|
|
944
|
+
const pageContext = rawPageContext.length > 80 ? rawPageContext.slice(0, 80).trim() : rawPageContext;
|
|
945
|
+
let added = 0;
|
|
946
|
+
// Cap reportedControlLabels — evict oldest entries instead of full clear
|
|
947
|
+
// to prevent re-flooding AppMap with already-known elements
|
|
948
|
+
if (this.reportedControlLabels.size > 5000) {
|
|
949
|
+
const iter = this.reportedControlLabels.values();
|
|
950
|
+
for (let i = 0; i < 1000; i++)
|
|
951
|
+
iter.next();
|
|
952
|
+
const keep = new Set();
|
|
953
|
+
for (const val of iter)
|
|
954
|
+
keep.add(val);
|
|
955
|
+
this.reportedControlLabels.clear();
|
|
956
|
+
for (const val of keep)
|
|
957
|
+
this.reportedControlLabels.add(val);
|
|
958
|
+
}
|
|
959
|
+
for (const [, ctrl] of focusedWin.controls) {
|
|
960
|
+
if (added >= 10)
|
|
961
|
+
break;
|
|
962
|
+
const label = ctrl.label?.value;
|
|
963
|
+
// Skip empty, too-short, too-long, or already-reported labels
|
|
964
|
+
if (!label || label.length < 2 || label.length > 60)
|
|
965
|
+
continue;
|
|
966
|
+
if (this.reportedControlLabels.has(label))
|
|
967
|
+
continue;
|
|
968
|
+
this.reportedControlLabels.add(label);
|
|
969
|
+
try {
|
|
970
|
+
// Use recordElementOutcome with zoneKey "auto" — auto-creates zones
|
|
971
|
+
// unlike addElement which silently fails when zone doesn't exist
|
|
972
|
+
this.appMap.recordElementOutcome(bundleId, "auto", label, true, pageContext);
|
|
973
|
+
added++;
|
|
974
|
+
}
|
|
975
|
+
catch { /* best-effort — zone limits, PII filter, etc. */ }
|
|
976
|
+
}
|
|
977
|
+
}
|
|
978
|
+
/**
|
|
979
|
+
* Wire #9: Record element visibility from OCR detections.
|
|
980
|
+
* Called in slow cycle after OCR produces text regions.
|
|
981
|
+
*/
|
|
982
|
+
recordVisibilityFromOCR(regions) {
|
|
983
|
+
if (!this.appMap || !this.activeAppContext)
|
|
984
|
+
return;
|
|
985
|
+
const bundleId = this.activeAppContext.bundleId;
|
|
986
|
+
const state = this.worldModel.getState();
|
|
987
|
+
const focusedWin = state.focusedWindowId !== null
|
|
988
|
+
? state.windows.get(state.focusedWindowId) ?? null
|
|
989
|
+
: null;
|
|
990
|
+
const rawPageContext = focusedWin?.title?.value ?? "unknown";
|
|
991
|
+
const pageContext = rawPageContext.length > 80 ? rawPageContext.slice(0, 80).trim() : rawPageContext;
|
|
992
|
+
// Only record visibility for OCR text that matches known AX controls —
|
|
993
|
+
// raw OCR picks up body text, logos, dates etc. that aren't UI elements
|
|
994
|
+
const knownLabels = new Set();
|
|
995
|
+
if (focusedWin) {
|
|
996
|
+
for (const [, ctrl] of focusedWin.controls) {
|
|
997
|
+
const label = ctrl.label?.value;
|
|
998
|
+
if (label && label.length >= 2)
|
|
999
|
+
knownLabels.add(label.toLowerCase());
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
let recorded = 0;
|
|
1003
|
+
for (const region of regions) {
|
|
1004
|
+
if (recorded >= 20)
|
|
1005
|
+
break;
|
|
1006
|
+
const text = region.text.trim();
|
|
1007
|
+
if (text.length < 2 || text.length > 60)
|
|
1008
|
+
continue;
|
|
1009
|
+
// Cross-reference: case-insensitive match against known AX control labels
|
|
1010
|
+
if (!knownLabels.has(text.toLowerCase()))
|
|
1011
|
+
continue;
|
|
1012
|
+
try {
|
|
1013
|
+
this.appMap.recordElementVisibility(bundleId, text, pageContext, true);
|
|
1014
|
+
recorded++;
|
|
1015
|
+
}
|
|
1016
|
+
catch { /* best-effort */ }
|
|
1017
|
+
}
|
|
1018
|
+
}
|
|
1019
|
+
// ── Wire #10: L7→L3 — zone ROI → targeted OCR ──
|
|
1020
|
+
/**
|
|
1021
|
+
* Wire #10: Convert AppMap zone positions to pixel ROIs for targeted OCR.
|
|
1022
|
+
* Returns ROIs based on known zones, prioritized by zone type.
|
|
1023
|
+
*/
|
|
1024
|
+
getZoneROIs() {
|
|
1025
|
+
if (!this.appMap || !this.activeAppContext)
|
|
1026
|
+
return [];
|
|
1027
|
+
const state = this.worldModel.getState();
|
|
1028
|
+
const focusedWin = state.focusedWindowId !== null
|
|
1029
|
+
? state.windows.get(state.focusedWindowId) ?? null
|
|
1030
|
+
: null;
|
|
1031
|
+
if (!focusedWin)
|
|
1032
|
+
return [];
|
|
1033
|
+
const winBounds = focusedWin.bounds?.value;
|
|
1034
|
+
if (!winBounds || winBounds.width <= 0 || winBounds.height <= 0)
|
|
1035
|
+
return [];
|
|
1036
|
+
const mapData = this.appMap.load(this.activeAppContext.bundleId);
|
|
1037
|
+
if (!mapData)
|
|
1038
|
+
return [];
|
|
1039
|
+
const rois = [];
|
|
1040
|
+
for (const [, zone] of Object.entries(mapData.zones)) {
|
|
1041
|
+
const rp = zone.relativePosition;
|
|
1042
|
+
// Skip zones with zero/invalid dimensions
|
|
1043
|
+
if (!rp || rp.width <= 0 || rp.height <= 0)
|
|
1044
|
+
continue;
|
|
1045
|
+
// Clamp relative positions to [0, 1] to prevent out-of-bounds ROIs
|
|
1046
|
+
// from corrupted or stale map data
|
|
1047
|
+
const left = Math.max(0, Math.min(1, rp.left));
|
|
1048
|
+
const top = Math.max(0, Math.min(1, rp.top));
|
|
1049
|
+
const clampedWidth = Math.min(Math.max(0, rp.width), 1.0 - left);
|
|
1050
|
+
const clampedHeight = Math.min(Math.max(0, rp.height), 1.0 - top);
|
|
1051
|
+
if (clampedWidth <= 0 || clampedHeight <= 0)
|
|
1052
|
+
continue;
|
|
1053
|
+
// ROI coordinates are relative to the window capture image (0,0 = window top-left),
|
|
1054
|
+
// not screen-absolute. Zone positions are 0-1 relative to the window.
|
|
1055
|
+
const pixelW = Math.max(1, Math.round(clampedWidth * winBounds.width));
|
|
1056
|
+
const pixelH = Math.max(1, Math.round(clampedHeight * winBounds.height));
|
|
1057
|
+
// Skip full-window ROIs from auto_discovered zones (defeats targeted OCR purpose)
|
|
1058
|
+
if (clampedWidth >= 0.9 && clampedHeight >= 0.9)
|
|
1059
|
+
continue;
|
|
1060
|
+
rois.push({
|
|
1061
|
+
x: Math.round(left * winBounds.width),
|
|
1062
|
+
y: Math.round(top * winBounds.height),
|
|
1063
|
+
width: pixelW,
|
|
1064
|
+
height: pixelH,
|
|
1065
|
+
reason: "known_zone",
|
|
1066
|
+
});
|
|
1067
|
+
}
|
|
1068
|
+
// Cap at 5 zone ROIs to stay within budget
|
|
1069
|
+
return rois.slice(0, 5);
|
|
1070
|
+
}
|
|
771
1071
|
}
|
|
@@ -37,6 +37,7 @@ export class PerceptionManager extends EventEmitter {
|
|
|
37
37
|
lastCdpClient = null;
|
|
38
38
|
pendingLearningEngine = null;
|
|
39
39
|
pendingAppMap = null;
|
|
40
|
+
pendingContextTracker = null;
|
|
40
41
|
constructor(worldModel, config) {
|
|
41
42
|
super();
|
|
42
43
|
this.worldModel = worldModel;
|
|
@@ -62,6 +63,15 @@ export class PerceptionManager extends EventEmitter {
|
|
|
62
63
|
this.coordinator.setAppMap(map);
|
|
63
64
|
}
|
|
64
65
|
}
|
|
66
|
+
/**
|
|
67
|
+
* Wire F10: Inject context tracker for per-app perception config.
|
|
68
|
+
*/
|
|
69
|
+
setContextTracker(tracker) {
|
|
70
|
+
this.pendingContextTracker = tracker;
|
|
71
|
+
if (this.coordinator) {
|
|
72
|
+
this.coordinator.setContextTracker(tracker);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
65
75
|
/**
|
|
66
76
|
* Create perception sources from the bridge. Called once after ensureBridge().
|
|
67
77
|
*/
|
|
@@ -80,6 +90,9 @@ export class PerceptionManager extends EventEmitter {
|
|
|
80
90
|
if (this.pendingAppMap) {
|
|
81
91
|
this.coordinator.setAppMap(this.pendingAppMap);
|
|
82
92
|
}
|
|
93
|
+
if (this.pendingContextTracker) {
|
|
94
|
+
this.coordinator.setContextTracker(this.pendingContextTracker);
|
|
95
|
+
}
|
|
83
96
|
this.coordinator.on("perception", (event) => {
|
|
84
97
|
this.handleReactiveEvent(event);
|
|
85
98
|
});
|
|
@@ -180,7 +180,7 @@ export class VisionSource {
|
|
|
180
180
|
* Performance: unchanged ~113ms, changed ~175ms (vs ~370ms before Phase 2).
|
|
181
181
|
* Phase 1 (FAST OCR) + Phase 2 (region OCR) combined.
|
|
182
182
|
*/
|
|
183
|
-
async captureAndDiffOptimized(windowId, maxROIs = 3) {
|
|
183
|
+
async captureAndDiffOptimized(windowId, maxROIs = 3, priorityROIs) {
|
|
184
184
|
const start = Date.now();
|
|
185
185
|
// 1. Capture: stream frame (~0ms) or one-shot (~112ms)
|
|
186
186
|
const capture = await this.captureToFileOrStream(windowId);
|
|
@@ -208,12 +208,22 @@ export class VisionSource {
|
|
|
208
208
|
// 4. Run OCR and YOLO in parallel on the same captured frame
|
|
209
209
|
const mergedRegions = FrameDiffer.mergeRegions(changedRegions, maxROIs, 64, capture.width, capture.height);
|
|
210
210
|
// OCR (region-based or full)
|
|
211
|
-
|
|
211
|
+
// Wire #10: When too many changed regions, use zone ROIs (priorityROIs) for
|
|
212
|
+
// targeted OCR instead of expensive full-screen OCR fallback.
|
|
213
|
+
const useRegionOCR = mergedRegions.length > 0 && mergedRegions.length <= maxROIs;
|
|
214
|
+
const useZoneOCR = !useRegionOCR && mergedRegions.length > maxROIs && priorityROIs && priorityROIs.length > 0;
|
|
215
|
+
const ocrTargets = useRegionOCR ? mergedRegions : useZoneOCR ? priorityROIs : null;
|
|
216
|
+
const ocrPromise = ocrTargets
|
|
212
217
|
? (async () => {
|
|
213
218
|
const regionResults = [];
|
|
214
|
-
for (const roi of
|
|
219
|
+
for (const roi of ocrTargets) {
|
|
215
220
|
const regionEvent = await this.ocrRegion(windowId, roi);
|
|
216
221
|
if (regionEvent?.data.type === "vision_ocr" && regionEvent.data.regions) {
|
|
222
|
+
// Re-anchor OCR bounds from ROI-relative to window-relative coordinates
|
|
223
|
+
for (const region of regionEvent.data.regions) {
|
|
224
|
+
region.bounds.x += roi.x;
|
|
225
|
+
region.bounds.y += roi.y;
|
|
226
|
+
}
|
|
217
227
|
regionResults.push(...regionEvent.data.regions);
|
|
218
228
|
}
|
|
219
229
|
}
|
|
@@ -223,7 +233,7 @@ export class VisionSource {
|
|
|
223
233
|
timestamp: new Date().toISOString(),
|
|
224
234
|
data: {
|
|
225
235
|
type: "vision_ocr",
|
|
226
|
-
roi:
|
|
236
|
+
roi: ocrTargets[0] ?? { x: 0, y: 0, width: 0, height: 0, reason: "changed_pixels" },
|
|
227
237
|
text: fullText,
|
|
228
238
|
regions: regionResults,
|
|
229
239
|
latencyMs: Date.now() - start - captureMs,
|