screenhand 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/README.md +165 -446
  2. package/bin/darwin-arm64/macos-bridge +0 -0
  3. package/dist/mcp-desktop.js +3615 -400
  4. package/dist/scripts/export-help-center.js +112 -0
  5. package/dist/scripts/marketing-loop.js +117 -0
  6. package/dist/scripts/observer-daemon.js +288 -0
  7. package/dist/scripts/orchestrator-daemon.js +399 -0
  8. package/dist/scripts/threads-campaign.js +208 -0
  9. package/dist/src/community/fetcher.js +109 -0
  10. package/dist/src/community/index.js +6 -0
  11. package/dist/src/community/publisher.js +191 -0
  12. package/dist/src/community/remote-api.js +121 -0
  13. package/dist/src/community/types.js +3 -0
  14. package/dist/src/community/validator.js +95 -0
  15. package/dist/src/context-tracker.js +489 -0
  16. package/dist/src/ingestion/coverage-auditor.js +233 -0
  17. package/dist/src/ingestion/doc-parser.js +164 -0
  18. package/dist/src/ingestion/index.js +8 -0
  19. package/dist/src/ingestion/menu-scanner.js +152 -0
  20. package/dist/src/ingestion/reference-merger.js +186 -0
  21. package/dist/src/ingestion/shortcut-extractor.js +180 -0
  22. package/dist/src/ingestion/tutorial-extractor.js +170 -0
  23. package/dist/src/ingestion/types.js +3 -0
  24. package/dist/src/jobs/manager.js +82 -14
  25. package/dist/src/jobs/runner.js +138 -15
  26. package/dist/src/learning/engine.js +356 -0
  27. package/dist/src/learning/index.js +9 -0
  28. package/dist/src/learning/locator-policy.js +120 -0
  29. package/dist/src/learning/pattern-policy.js +89 -0
  30. package/dist/src/learning/recovery-policy.js +116 -0
  31. package/dist/src/learning/sensor-policy.js +115 -0
  32. package/dist/src/learning/timing-model.js +204 -0
  33. package/dist/src/learning/topology-policy.js +90 -0
  34. package/dist/src/learning/types.js +9 -0
  35. package/dist/src/logging/timeline-logger.js +4 -1
  36. package/dist/src/memory/playbook-seeds.js +200 -0
  37. package/dist/src/memory/recall.js +60 -8
  38. package/dist/src/memory/service.js +30 -5
  39. package/dist/src/memory/store.js +34 -5
  40. package/dist/src/native/bridge-client.js +253 -31
  41. package/dist/src/observer/state.js +199 -0
  42. package/dist/src/observer/types.js +43 -0
  43. package/dist/src/orchestrator/state.js +68 -0
  44. package/dist/src/orchestrator/types.js +22 -0
  45. package/dist/src/perception/ax-source.js +162 -0
  46. package/dist/src/perception/cdp-source.js +162 -0
  47. package/dist/src/perception/coordinator.js +771 -0
  48. package/dist/src/perception/frame-differ.js +287 -0
  49. package/dist/src/perception/index.js +22 -0
  50. package/dist/src/perception/manager.js +199 -0
  51. package/dist/src/perception/types.js +47 -0
  52. package/dist/src/perception/vision-source.js +399 -0
  53. package/dist/src/planner/deterministic.js +298 -0
  54. package/dist/src/planner/executor.js +870 -0
  55. package/dist/src/planner/goal-store.js +92 -0
  56. package/dist/src/planner/index.js +21 -0
  57. package/dist/src/planner/planner.js +520 -0
  58. package/dist/src/planner/tool-registry.js +71 -0
  59. package/dist/src/planner/types.js +22 -0
  60. package/dist/src/platform/explorer.js +213 -0
  61. package/dist/src/platform/help-center-markdown.js +527 -0
  62. package/dist/src/platform/learner.js +257 -0
  63. package/dist/src/playbook/engine.js +296 -11
  64. package/dist/src/playbook/mcp-recorder.js +204 -0
  65. package/dist/src/playbook/recorder.js +3 -2
  66. package/dist/src/playbook/runner.js +1 -1
  67. package/dist/src/playbook/store.js +139 -10
  68. package/dist/src/recovery/detectors.js +156 -0
  69. package/dist/src/recovery/engine.js +327 -0
  70. package/dist/src/recovery/index.js +20 -0
  71. package/dist/src/recovery/strategies.js +274 -0
  72. package/dist/src/recovery/types.js +20 -0
  73. package/dist/src/runtime/accessibility-adapter.js +55 -18
  74. package/dist/src/runtime/applescript-adapter.js +8 -2
  75. package/dist/src/runtime/cdp-chrome-adapter.js +1 -1
  76. package/dist/src/runtime/executor.js +23 -3
  77. package/dist/src/runtime/locator-cache.js +24 -2
  78. package/dist/src/runtime/service.js +59 -15
  79. package/dist/src/runtime/session-manager.js +4 -1
  80. package/dist/src/runtime/vision-adapter.js +2 -1
  81. package/dist/src/state/app-map-types.js +72 -0
  82. package/dist/src/state/app-map.js +1974 -0
  83. package/dist/src/state/entity-tracker.js +108 -0
  84. package/dist/src/state/fusion.js +96 -0
  85. package/dist/src/state/index.js +21 -0
  86. package/dist/src/state/ladder-generator.js +236 -0
  87. package/dist/src/state/persistence.js +156 -0
  88. package/dist/src/state/types.js +17 -0
  89. package/dist/src/state/world-model.js +1456 -0
  90. package/dist/src/util/atomic-write.js +19 -4
  91. package/dist/src/util/sanitize.js +146 -0
  92. package/dist-app-maps/com.figma.Desktop.json +959 -0
  93. package/dist-app-maps/com.hnc.Discord.json +1146 -0
  94. package/dist-app-maps/notion.id.json +2831 -0
  95. package/dist-playbooks/canva-screenhand-carousel.json +445 -0
  96. package/dist-playbooks/codex-desktop.json +76 -0
  97. package/dist-playbooks/competitor-research-stack.json +122 -0
  98. package/dist-playbooks/davinci-color-grade.json +153 -0
  99. package/dist-playbooks/davinci-edit-timeline.json +162 -0
  100. package/dist-playbooks/davinci-render.json +114 -0
  101. package/dist-playbooks/devto.json +52 -0
  102. package/dist-playbooks/discord.json +41 -0
  103. package/dist-playbooks/google-flow-create-project.json +59 -0
  104. package/dist-playbooks/google-flow-edit-image.json +90 -0
  105. package/dist-playbooks/google-flow-edit-video.json +90 -0
  106. package/dist-playbooks/google-flow-generate-image.json +68 -0
  107. package/dist-playbooks/google-flow-generate-video.json +191 -0
  108. package/dist-playbooks/google-flow-open-project.json +48 -0
  109. package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
  110. package/dist-playbooks/google-flow-search-assets.json +64 -0
  111. package/dist-playbooks/instagram.json +57 -0
  112. package/dist-playbooks/linkedin.json +52 -0
  113. package/dist-playbooks/n8n.json +43 -0
  114. package/dist-playbooks/reddit.json +52 -0
  115. package/dist-playbooks/threads.json +59 -0
  116. package/dist-playbooks/x-twitter.json +59 -0
  117. package/dist-playbooks/youtube.json +59 -0
  118. package/dist-references/canva.json +646 -0
  119. package/dist-references/codex-desktop.json +305 -0
  120. package/dist-references/davinci-resolve-keyboard.json +594 -0
  121. package/dist-references/davinci-resolve-menu-map.json +1139 -0
  122. package/dist-references/davinci-resolve-menus-batch1.json +116 -0
  123. package/dist-references/davinci-resolve-menus-batch2.json +372 -0
  124. package/dist-references/davinci-resolve-menus-batch3.json +330 -0
  125. package/dist-references/davinci-resolve-menus-batch4.json +297 -0
  126. package/dist-references/davinci-resolve-shortcuts.json +333 -0
  127. package/dist-references/devpost.json +186 -0
  128. package/dist-references/devto.json +317 -0
  129. package/dist-references/discord.json +549 -0
  130. package/dist-references/figma.json +1186 -0
  131. package/dist-references/finder.json +146 -0
  132. package/dist-references/google-ads-transparency.json +95 -0
  133. package/dist-references/google-flow.json +649 -0
  134. package/dist-references/instagram.json +341 -0
  135. package/dist-references/linkedin.json +324 -0
  136. package/dist-references/meta-ad-library.json +86 -0
  137. package/dist-references/n8n.json +387 -0
  138. package/dist-references/notes.json +27 -0
  139. package/dist-references/notion.json +163 -0
  140. package/dist-references/reddit.json +341 -0
  141. package/dist-references/threads.json +337 -0
  142. package/dist-references/x-twitter.json +403 -0
  143. package/dist-references/youtube.json +373 -0
  144. package/native/macos-bridge/Package.swift +22 -0
  145. package/native/macos-bridge/Sources/AccessibilityBridge.swift +482 -0
  146. package/native/macos-bridge/Sources/AppManagement.swift +339 -0
  147. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +537 -0
  148. package/native/macos-bridge/Sources/ObserverBridge.swift +120 -0
  149. package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
  150. package/native/macos-bridge/Sources/VisionBridge.swift +238 -0
  151. package/native/macos-bridge/Sources/main.swift +498 -0
  152. package/native/windows-bridge/AppManagement.cs +234 -0
  153. package/native/windows-bridge/InputBridge.cs +436 -0
  154. package/native/windows-bridge/Program.cs +270 -0
  155. package/native/windows-bridge/ScreenCapture.cs +453 -0
  156. package/native/windows-bridge/UIAutomationBridge.cs +571 -0
  157. package/native/windows-bridge/WindowsBridge.csproj +17 -0
  158. package/package.json +12 -1
  159. package/scripts/postinstall.cjs +127 -0
  160. package/dist/.audit-log.jsonl +0 -55
  161. package/dist/.screenhand/memory/.lock +0 -1
  162. package/dist/.screenhand/memory/actions.jsonl +0 -85
  163. package/dist/.screenhand/memory/errors.jsonl +0 -5
  164. package/dist/.screenhand/memory/errors.jsonl.bak +0 -4
  165. package/dist/.screenhand/memory/state.json +0 -35
  166. package/dist/.screenhand/memory/state.json.bak +0 -35
  167. package/dist/.screenhand/memory/strategies.jsonl +0 -12
  168. package/dist/agent/cli.js +0 -73
  169. package/dist/agent/loop.js +0 -258
  170. package/dist/config.js +0 -9
  171. package/dist/index.js +0 -56
  172. package/dist/logging/timeline-logger.js +0 -29
  173. package/dist/mcp/mcp-stdio-server.js +0 -448
  174. package/dist/mcp/server.js +0 -347
  175. package/dist/mcp-entry.js +0 -59
  176. package/dist/memory/recall.js +0 -160
  177. package/dist/memory/research.js +0 -98
  178. package/dist/memory/seeds.js +0 -89
  179. package/dist/memory/session.js +0 -161
  180. package/dist/memory/store.js +0 -391
  181. package/dist/memory/types.js +0 -4
  182. package/dist/monitor/codex-monitor.js +0 -377
  183. package/dist/monitor/task-queue.js +0 -84
  184. package/dist/monitor/types.js +0 -49
  185. package/dist/native/bridge-client.js +0 -174
  186. package/dist/native/macos-bridge-client.js +0 -5
  187. package/dist/npm-publish-helper.js +0 -117
  188. package/dist/npm-token-cdp.js +0 -113
  189. package/dist/npm-token-create.js +0 -135
  190. package/dist/npm-token-finish.js +0 -126
  191. package/dist/playbook/engine.js +0 -193
  192. package/dist/playbook/index.js +0 -4
  193. package/dist/playbook/recorder.js +0 -519
  194. package/dist/playbook/runner.js +0 -392
  195. package/dist/playbook/store.js +0 -166
  196. package/dist/playbook/types.js +0 -4
  197. package/dist/runtime/accessibility-adapter.js +0 -377
  198. package/dist/runtime/app-adapter.js +0 -48
  199. package/dist/runtime/applescript-adapter.js +0 -283
  200. package/dist/runtime/ax-role-map.js +0 -80
  201. package/dist/runtime/browser-adapter.js +0 -36
  202. package/dist/runtime/cdp-chrome-adapter.js +0 -505
  203. package/dist/runtime/composite-adapter.js +0 -205
  204. package/dist/runtime/executor.js +0 -250
  205. package/dist/runtime/locator-cache.js +0 -12
  206. package/dist/runtime/planning-loop.js +0 -47
  207. package/dist/runtime/service.js +0 -372
  208. package/dist/runtime/session-manager.js +0 -28
  209. package/dist/runtime/state-observer.js +0 -105
  210. package/dist/runtime/vision-adapter.js +0 -208
  211. package/dist/test-mcp-protocol.js +0 -138
  212. package/dist/types.js +0 -1
@@ -14,12 +14,14 @@
14
14
  //
15
15
  // You should have received a copy of the GNU Affero General Public License
16
16
  // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ import { randomUUID } from "node:crypto";
17
18
  import { toAXRole } from "./ax-role-map.js";
18
19
  const POLL_INTERVAL_MS = 100;
19
20
  export class AccessibilityAdapter {
20
21
  bridge;
21
22
  sessions = new Map();
22
23
  sessionsByProfile = new Map();
24
+ lastPidRefresh = 0;
23
25
  constructor(bridge) {
24
26
  this.bridge = bridge;
25
27
  }
@@ -35,7 +37,7 @@ export class AccessibilityAdapter {
35
37
  throw new Error("Accessibility permission not granted. Go to System Settings → Privacy & Security → Accessibility and enable this app.");
36
38
  }
37
39
  const info = {
38
- sessionId: reuseSessionId ?? `ax_session_${profile}_${Date.now()}`,
40
+ sessionId: reuseSessionId ?? `ax_session_${profile}_${Date.now()}_${randomUUID().slice(0, 8)}`,
39
41
  profile,
40
42
  createdAt: new Date().toISOString(),
41
43
  adapterType: "accessibility",
@@ -122,6 +124,7 @@ export class AccessibilityAdapter {
122
124
  }
123
125
  async click(sessionId, element) {
124
126
  const state = this.requireSession(sessionId);
127
+ await this.refreshPidIfNeeded(state);
125
128
  const elementPath = this.parseElementPath(element.handleId);
126
129
  if (elementPath) {
127
130
  await this.bridge.call("ax.performAction", {
@@ -134,7 +137,7 @@ export class AccessibilityAdapter {
134
137
  // Fallback to coordinate click
135
138
  const cx = element.coordinates.x + element.coordinates.width / 2;
136
139
  const cy = element.coordinates.y + element.coordinates.height / 2;
137
- await this.bridge.call("cg.mouseClick", { x: cx, y: cy });
140
+ await this.bridge.call("cg.mouseClick", { x: cx, y: cy, targetPid: state.pid });
138
141
  }
139
142
  else {
140
143
  throw new Error("Cannot click: no element path or coordinates");
@@ -142,6 +145,7 @@ export class AccessibilityAdapter {
142
145
  }
143
146
  async setValue(sessionId, element, text, clear) {
144
147
  const state = this.requireSession(sessionId);
148
+ await this.refreshPidIfNeeded(state);
145
149
  const elementPath = this.parseElementPath(element.handleId);
146
150
  if (clear && elementPath) {
147
151
  // Try AX value set first
@@ -158,13 +162,15 @@ export class AccessibilityAdapter {
158
162
  }
159
163
  }
160
164
  // Fallback: click to focus, select all if clearing, then type
165
+ // Use PID-targeted events to prevent keystrokes going to wrong app
166
+ const targetPid = state.pid;
161
167
  await this.click(sessionId, element);
162
168
  await sleep(50);
163
169
  if (clear) {
164
- await this.bridge.call("cg.keyCombo", { keys: ["cmd", "a"] });
170
+ await this.bridge.call("cg.keyCombo", { keys: ["cmd", "a"], targetPid });
165
171
  await sleep(50);
166
172
  }
167
- await this.bridge.call("cg.typeText", { text });
173
+ await this.bridge.call("cg.typeText", { text, targetPid });
168
174
  }
169
175
  async getValue(sessionId, element) {
170
176
  const state = this.requireSession(sessionId);
@@ -246,16 +252,18 @@ export class AccessibilityAdapter {
246
252
  async focusApp(sessionId, bundleId) {
247
253
  const state = this.requireSession(sessionId);
248
254
  await this.bridge.call("app.focus", { bundleId });
249
- // Update PID if different app
250
- if (bundleId !== state.bundleId) {
251
- const apps = await this.bridge.call("app.list");
252
- const app = apps.find((a) => a.bundleId === bundleId);
253
- if (app) {
254
- state.pid = app.pid;
255
- state.bundleId = bundleId;
256
- state.appName = app.name;
257
- }
255
+ // Verify focus was achieved by checking frontmost app
256
+ let frontmost = await this.bridge.call("app.frontmost");
257
+ // If focus didn't take, retry once
258
+ if (frontmost.bundleId !== bundleId) {
259
+ await this.bridge.call("app.focus", { bundleId });
260
+ frontmost = await this.bridge.call("app.frontmost");
258
261
  }
262
+ // Update state based on actual frontmost app, not optimistic assumption
263
+ state.pid = frontmost.pid;
264
+ state.bundleId = frontmost.bundleId;
265
+ state.appName = frontmost.name;
266
+ this.lastPidRefresh = Date.now();
259
267
  }
260
268
  async listApps(_sessionId) {
261
269
  return this.bridge.call("app.list");
@@ -265,10 +273,12 @@ export class AccessibilityAdapter {
265
273
  }
266
274
  async menuClick(sessionId, menuPath) {
267
275
  const state = this.requireSession(sessionId);
276
+ await this.refreshPidIfNeeded(state);
268
277
  await this.bridge.call("ax.menuClick", { pid: state.pid, menuPath });
269
278
  }
270
- async keyCombo(_sessionId, keys) {
271
- await this.bridge.call("cg.keyCombo", { keys });
279
+ async keyCombo(sessionId, keys) {
280
+ const state = this.requireSession(sessionId);
281
+ await this.bridge.call("cg.keyCombo", { keys, targetPid: state.pid });
272
282
  }
273
283
  async elementTree(sessionId, maxDepth, _root) {
274
284
  const state = this.requireSession(sessionId);
@@ -285,9 +295,10 @@ export class AccessibilityAdapter {
285
295
  const fromY = from.coordinates.y + from.coordinates.height / 2;
286
296
  const toX = to.coordinates.x + to.coordinates.width / 2;
287
297
  const toY = to.coordinates.y + to.coordinates.height / 2;
288
- await this.bridge.call("cg.mouseDrag", { fromX, fromY, toX, toY });
298
+ const state = this.requireSession(sessionId);
299
+ await this.bridge.call("cg.mouseDrag", { fromX, fromY, toX, toY, targetPid: state.pid });
289
300
  }
290
- async scroll(_sessionId, direction, amount, element) {
301
+ async scroll(sessionId, direction, amount, element) {
291
302
  let x = 500;
292
303
  let y = 400;
293
304
  if (element?.coordinates) {
@@ -301,9 +312,35 @@ export class AccessibilityAdapter {
301
312
  right: { deltaX: amount, deltaY: 0 },
302
313
  };
303
314
  const delta = deltaMap[direction];
304
- await this.bridge.call("cg.scroll", { x, y, ...delta });
315
+ const state = this.requireSession(sessionId);
316
+ await this.bridge.call("cg.scroll", { x, y, ...delta, targetPid: state.pid });
317
+ }
318
+ async isFrontmost() {
319
+ // Check if *any* session's bundleId matches the current frontmost app.
320
+ // Used by the executor to verify focus before acting.
321
+ const frontmost = await this.bridge.call("app.frontmost");
322
+ for (const state of this.sessions.values()) {
323
+ if (state.bundleId === frontmost.bundleId) {
324
+ return true;
325
+ }
326
+ }
327
+ return false;
305
328
  }
306
329
  // ── Private helpers ──
330
+ async refreshPidIfNeeded(state) {
331
+ if (Date.now() - this.lastPidRefresh < 500)
332
+ return;
333
+ try {
334
+ const frontmost = await this.bridge.call("app.frontmost");
335
+ if (frontmost.bundleId === state.bundleId) {
336
+ state.pid = frontmost.pid;
337
+ this.lastPidRefresh = Date.now();
338
+ }
339
+ }
340
+ catch {
341
+ // Best-effort refresh; don't break the caller
342
+ }
343
+ }
307
344
  requireSession(sessionId) {
308
345
  const state = this.sessions.get(sessionId);
309
346
  if (!state)
@@ -15,6 +15,7 @@
15
15
  // You should have received a copy of the GNU Affero General Public License
16
16
  // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
17
  import { execFile } from "node:child_process";
18
+ import { randomUUID } from "node:crypto";
18
19
  import { promisify } from "node:util";
19
20
  const execFileAsync = promisify(execFile);
20
21
  const POLL_INTERVAL_MS = 100;
@@ -49,7 +50,7 @@ export class AppleScriptAdapter {
49
50
  if (existing)
50
51
  return existing.info;
51
52
  const info = {
52
- sessionId: reuseSessionId ?? `as_session_${profile}_${Date.now()}`,
53
+ sessionId: reuseSessionId ?? `as_session_${profile}_${Date.now()}_${randomUUID().slice(0, 8)}`,
53
54
  profile,
54
55
  createdAt: new Date().toISOString(),
55
56
  adapterType: "applescript",
@@ -291,7 +292,12 @@ export class AppleScriptAdapter {
291
292
  throw new Error(`AppleScript adapter does not support target type: ${target.type}`);
292
293
  }
293
294
  escapeAS(str) {
294
- return str.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
295
+ return str
296
+ .replace(/\\/g, "\\\\")
297
+ .replace(/"/g, '\\"')
298
+ .replace(/\n/g, "\\n")
299
+ .replace(/\r/g, "\\r")
300
+ .replace(/\0/g, "");
295
301
  }
296
302
  }
297
303
  function sleep(ms) {
@@ -47,7 +47,7 @@ export class CdpChromeAdapter {
47
47
  const client = await CDP({ port: chrome.port, target: targetId });
48
48
  await Promise.all([client.Page.enable(), client.Runtime.enable()]);
49
49
  const info = {
50
- sessionId: reuseSessionId ?? `cdp_session_${profile}_${Date.now()}`,
50
+ sessionId: reuseSessionId ?? `cdp_session_${profile}_${Date.now()}_${randomUUID().slice(0, 8)}`,
51
51
  profile,
52
52
  createdAt: new Date().toISOString(),
53
53
  adapterType: "cdp",
@@ -37,6 +37,14 @@ export class Executor {
37
37
  const locateResult = await this.locateWithBudget(input.sessionId, siteKey, actionKey, input.target, budget.locateMs, retry > 0);
38
38
  attempts.push(...locateResult.attempts);
39
39
  telemetry.locateMs += locateResult.attempts.reduce((sum, attempt) => sum + attempt.timeoutMs, 0);
40
+ // Re-validate focus before acting — app may have lost focus during locate
41
+ if (this.adapter.isFrontmost) {
42
+ const front = await this.adapter.isFrontmost();
43
+ if (!front && this.adapter.focusApp) {
44
+ const ctx = await this.adapter.getAppContext(input.sessionId);
45
+ await this.adapter.focusApp(input.sessionId, ctx.bundleId);
46
+ }
47
+ }
40
48
  await this.timed(budget.actMs, async () => {
41
49
  await this.adapter.click(input.sessionId, locateResult.element);
42
50
  }, "ACTION_FAILED");
@@ -68,6 +76,14 @@ export class Executor {
68
76
  const locateResult = await this.locateWithBudget(input.sessionId, siteKey, actionKey, input.target, budget.locateMs, false);
69
77
  attempts.push(...locateResult.attempts);
70
78
  telemetry.locateMs += budget.locateMs;
79
+ // Re-validate focus before acting — app may have lost focus during locate
80
+ if (this.adapter.isFrontmost) {
81
+ const front = await this.adapter.isFrontmost();
82
+ if (!front && this.adapter.focusApp) {
83
+ const ctx = await this.adapter.getAppContext(input.sessionId);
84
+ await this.adapter.focusApp(input.sessionId, ctx.bundleId);
85
+ }
86
+ }
71
87
  await this.timed(budget.actMs, async () => {
72
88
  await this.adapter.setValue(input.sessionId, locateResult.element, input.text, input.clear ?? true);
73
89
  }, "ACTION_FAILED");
@@ -171,7 +187,7 @@ export class Executor {
171
187
  // URL parsing failed, use bundleId + windowTitle
172
188
  }
173
189
  }
174
- return `${ctx.bundleId}::${ctx.windowTitle}`;
190
+ return ctx.bundleId;
175
191
  }
176
192
  catch {
177
193
  // Fallback to page meta
@@ -191,15 +207,19 @@ export class Executor {
191
207
  };
192
208
  }
193
209
  async timed(timeoutMs, operation, errorCode) {
210
+ let timerId;
194
211
  const timeout = new Promise((_, reject) => {
195
- setTimeout(() => {
212
+ timerId = setTimeout(() => {
196
213
  reject(this.runtimeError("TIMEOUT", `Timed out after ${timeoutMs}ms.`));
197
214
  }, timeoutMs);
198
215
  });
199
216
  try {
200
- return await Promise.race([operation(), timeout]);
217
+ const result = await Promise.race([operation(), timeout]);
218
+ clearTimeout(timerId);
219
+ return result;
201
220
  }
202
221
  catch (error) {
222
+ clearTimeout(timerId);
203
223
  if (this.isRuntimeError(error)) {
204
224
  throw error;
205
225
  }
@@ -16,13 +16,35 @@
16
16
  // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
17
  export class LocatorCache {
18
18
  store = new Map();
19
+ learningEngine = null;
20
+ /**
21
+ * Inject the learning engine for fallback on cache miss.
22
+ * Called after both are constructed to avoid circular dependencies.
23
+ */
24
+ setLearningEngine(engine) {
25
+ this.learningEngine = engine;
26
+ }
19
27
  get(siteKey, actionKey) {
20
- return this.store.get(this.key(siteKey, actionKey));
28
+ // 1. Check in-memory cache first
29
+ const cached = this.store.get(this.key(siteKey, actionKey));
30
+ if (cached)
31
+ return cached;
32
+ // 2. Fallback: ask learning engine for a proven locator
33
+ if (this.learningEngine) {
34
+ const learned = this.learningEngine.recommendLocator(siteKey, actionKey);
35
+ if (learned) {
36
+ // Promote to cache for fast subsequent lookups
37
+ this.store.set(this.key(siteKey, actionKey), learned.locator);
38
+ return learned.locator;
39
+ }
40
+ }
41
+ return undefined;
21
42
  }
22
43
  set(siteKey, actionKey, locator) {
23
44
  this.store.set(this.key(siteKey, actionKey), locator);
24
45
  }
25
46
  key(siteKey, actionKey) {
26
- return `${siteKey}::${actionKey}`;
47
+ // Use length-prefixed format to avoid collision when keys contain the separator
48
+ return `${siteKey.length}:${siteKey}\0${actionKey}`;
27
49
  }
28
50
  }
@@ -23,20 +23,57 @@ export class AutomationRuntimeService {
23
23
  logger;
24
24
  sessions;
25
25
  executor;
26
+ worldModel = null;
26
27
  constructor(adapter, logger, cache = new LocatorCache()) {
27
28
  this.adapter = adapter;
28
29
  this.logger = logger;
29
30
  this.sessions = new SessionManager(adapter);
30
31
  this.executor = new Executor(adapter, cache, logger);
31
32
  }
33
+ /**
34
+ * Inject the WorldModel so runtime actions update shared state.
35
+ */
36
+ setWorldModel(model) {
37
+ this.worldModel = model;
38
+ }
32
39
  async sessionStart(profile = DEFAULT_PROFILE) {
33
40
  return this.sessions.sessionStart(profile);
34
41
  }
42
+ /**
43
+ * Ensure session exists (re-attaches if lost after MCP restart).
44
+ * Also reloads the world model from disk when re-attaching so world
45
+ * state survives across MCP server restarts.
46
+ */
47
+ async ensureSession(sessionId) {
48
+ const hadSession = !!this.sessions.getSession(sessionId);
49
+ const session = await this.sessions.requireSessionResilent(sessionId);
50
+ // If we had to re-attach, reload persisted world state
51
+ if (!hadSession && this.worldModel) {
52
+ this.worldModel.init(sessionId);
53
+ }
54
+ return session;
55
+ }
56
+ // L2-74 fix: Centralized URL protocol validation for all navigate paths
57
+ static BLOCKED_URL_PROTOCOLS = ["javascript:", "data:", "blob:", "vbscript:"];
35
58
  async navigate(input) {
36
59
  const telemetry = this.logger.start("navigate", input.sessionId);
37
60
  try {
38
- await this.sessions.requireSessionResilent(input.sessionId);
61
+ // L2-74 fix: Block dangerous URL protocols at the service level
62
+ const urlLower = input.url.trim().toLowerCase();
63
+ for (const proto of AutomationRuntimeService.BLOCKED_URL_PROTOCOLS) {
64
+ if (urlLower.startsWith(proto)) {
65
+ throw new Error(`Blocked: "${proto}" URLs are not allowed for security reasons`);
66
+ }
67
+ }
68
+ await this.ensureSession(input.sessionId);
39
69
  const page = await this.adapter.navigate(input.sessionId, input.url, input.timeoutMs ?? DEFAULT_NAVIGATE_TIMEOUT_MS);
70
+ // Feed navigation result to world model for domain state tracking
71
+ if (this.worldModel) {
72
+ const bundleId = this.worldModel.getState().focusedApp?.bundleId;
73
+ if (bundleId) {
74
+ this.worldModel.ingestCDPSnapshot(bundleId, input.url, page.title ?? "");
75
+ }
76
+ }
40
77
  return {
41
78
  ok: true,
42
79
  data: page,
@@ -57,7 +94,7 @@ export class AutomationRuntimeService {
57
94
  async waitFor(input) {
58
95
  const telemetry = this.logger.start("wait_for", input.sessionId);
59
96
  try {
60
- await this.sessions.requireSessionResilent(input.sessionId);
97
+ await this.ensureSession(input.sessionId);
61
98
  const matched = await this.adapter.waitFor(input.sessionId, input.condition, input.timeoutMs ?? DEFAULT_WAIT_TIMEOUT_MS);
62
99
  return {
63
100
  ok: true,
@@ -77,17 +114,17 @@ export class AutomationRuntimeService {
77
114
  }
78
115
  }
79
116
  async press(input) {
80
- await this.sessions.requireSessionResilent(input.sessionId);
117
+ await this.ensureSession(input.sessionId);
81
118
  return this.executor.press(input);
82
119
  }
83
120
  async typeInto(input) {
84
- await this.sessions.requireSessionResilent(input.sessionId);
121
+ await this.ensureSession(input.sessionId);
85
122
  return this.executor.typeInto(input);
86
123
  }
87
124
  async extract(input) {
88
125
  const telemetry = this.logger.start("extract", input.sessionId);
89
126
  try {
90
- await this.sessions.requireSessionResilent(input.sessionId);
127
+ await this.ensureSession(input.sessionId);
91
128
  const data = await this.adapter.extract(input.sessionId, input.target, input.format);
92
129
  return {
93
130
  ok: true,
@@ -109,7 +146,7 @@ export class AutomationRuntimeService {
109
146
  async screenshot(input) {
110
147
  const telemetry = this.logger.start("screenshot", input.sessionId);
111
148
  try {
112
- await this.sessions.requireSessionResilent(input.sessionId);
149
+ await this.ensureSession(input.sessionId);
113
150
  const path = await this.adapter.screenshot(input.sessionId, input.region);
114
151
  return {
115
152
  ok: true,
@@ -132,11 +169,12 @@ export class AutomationRuntimeService {
132
169
  async appLaunch(input) {
133
170
  const telemetry = this.logger.start("app_launch", input.sessionId);
134
171
  try {
135
- await this.sessions.requireSessionResilent(input.sessionId);
172
+ await this.ensureSession(input.sessionId);
136
173
  if (!this.adapter.launchApp) {
137
174
  throw new Error("Adapter does not support launchApp");
138
175
  }
139
176
  const ctx = await this.adapter.launchApp(input.sessionId, input.bundleId);
177
+ this.worldModel?.updateFocusedApp(ctx);
140
178
  return {
141
179
  ok: true,
142
180
  data: ctx,
@@ -157,11 +195,17 @@ export class AutomationRuntimeService {
157
195
  async appFocus(input) {
158
196
  const telemetry = this.logger.start("app_focus", input.sessionId);
159
197
  try {
160
- await this.sessions.requireSessionResilent(input.sessionId);
198
+ await this.ensureSession(input.sessionId);
161
199
  if (!this.adapter.focusApp) {
162
200
  throw new Error("Adapter does not support focusApp");
163
201
  }
164
202
  await this.adapter.focusApp(input.sessionId, input.bundleId);
203
+ this.worldModel?.updateFocusedApp({
204
+ bundleId: input.bundleId,
205
+ appName: input.bundleId,
206
+ pid: 0,
207
+ windowTitle: "",
208
+ });
165
209
  return {
166
210
  ok: true,
167
211
  data: undefined,
@@ -182,7 +226,7 @@ export class AutomationRuntimeService {
182
226
  async appList(sessionId) {
183
227
  const telemetry = this.logger.start("app_list", sessionId);
184
228
  try {
185
- await this.sessions.requireSessionResilent(sessionId);
229
+ await this.ensureSession(sessionId);
186
230
  if (!this.adapter.listApps) {
187
231
  throw new Error("Adapter does not support listApps");
188
232
  }
@@ -207,7 +251,7 @@ export class AutomationRuntimeService {
207
251
  async windowList(sessionId) {
208
252
  const telemetry = this.logger.start("window_list", sessionId);
209
253
  try {
210
- await this.sessions.requireSessionResilent(sessionId);
254
+ await this.ensureSession(sessionId);
211
255
  if (!this.adapter.listWindows) {
212
256
  throw new Error("Adapter does not support listWindows");
213
257
  }
@@ -232,7 +276,7 @@ export class AutomationRuntimeService {
232
276
  async menuClick(input) {
233
277
  const telemetry = this.logger.start("menu_click", input.sessionId);
234
278
  try {
235
- await this.sessions.requireSessionResilent(input.sessionId);
279
+ await this.ensureSession(input.sessionId);
236
280
  if (!this.adapter.menuClick) {
237
281
  throw new Error("Adapter does not support menuClick");
238
282
  }
@@ -257,7 +301,7 @@ export class AutomationRuntimeService {
257
301
  async keyCombo(input) {
258
302
  const telemetry = this.logger.start("key_combo", input.sessionId);
259
303
  try {
260
- await this.sessions.requireSessionResilent(input.sessionId);
304
+ await this.ensureSession(input.sessionId);
261
305
  if (!this.adapter.keyCombo) {
262
306
  throw new Error("Adapter does not support keyCombo");
263
307
  }
@@ -282,7 +326,7 @@ export class AutomationRuntimeService {
282
326
  async elementTree(input) {
283
327
  const telemetry = this.logger.start("element_tree", input.sessionId);
284
328
  try {
285
- await this.sessions.requireSessionResilent(input.sessionId);
329
+ await this.ensureSession(input.sessionId);
286
330
  if (!this.adapter.elementTree) {
287
331
  throw new Error("Adapter does not support elementTree");
288
332
  }
@@ -307,7 +351,7 @@ export class AutomationRuntimeService {
307
351
  async drag(input) {
308
352
  const telemetry = this.logger.start("drag", input.sessionId);
309
353
  try {
310
- await this.sessions.requireSessionResilent(input.sessionId);
354
+ await this.ensureSession(input.sessionId);
311
355
  if (!this.adapter.drag) {
312
356
  throw new Error("Adapter does not support drag");
313
357
  }
@@ -337,7 +381,7 @@ export class AutomationRuntimeService {
337
381
  async scroll(input) {
338
382
  const telemetry = this.logger.start("scroll", input.sessionId);
339
383
  try {
340
- await this.sessions.requireSessionResilent(input.sessionId);
384
+ await this.ensureSession(input.sessionId);
341
385
  if (!this.adapter.scroll) {
342
386
  throw new Error("Adapter does not support scroll");
343
387
  }
@@ -50,7 +50,10 @@ export class SessionManager {
50
50
  const existing = this.getSession(sessionId);
51
51
  if (existing)
52
52
  return existing;
53
- const match = sessionId.match(/^(?:ax|cdp|as|vision|composite)_session_(.+)_\d+$/);
53
+ // Session IDs: {prefix}_session_{profile}_{timestamp}_{random8} (new)
54
+ // or legacy: {prefix}_session_{profile}_{timestamp}
55
+ // Use greedy .+ so profiles with digits (e.g. "user_1234567890") capture fully
56
+ const match = sessionId.match(/^(?:ax|cdp|as|vision|composite)_session_(.+)_\d{13,}(?:_[a-f0-9]{8})?$/);
54
57
  const profile = match?.[1] ?? "automation";
55
58
  const created = await this.adapter.attach(profile, sessionId);
56
59
  this.sessionsByProfile.set(profile, created);
@@ -14,6 +14,7 @@
14
14
  //
15
15
  // You should have received a copy of the GNU Affero General Public License
16
16
  // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ import { randomUUID } from "node:crypto";
17
18
  const POLL_INTERVAL_MS = 200;
18
19
  /**
19
20
  * Vision-based adapter for apps with poor/no accessibility support.
@@ -33,7 +34,7 @@ export class VisionAdapter {
33
34
  await this.bridge.start();
34
35
  const frontmost = await this.bridge.call("app.frontmost");
35
36
  const info = {
36
- sessionId: reuseSessionId ?? `vision_session_${profile}_${Date.now()}`,
37
+ sessionId: reuseSessionId ?? `vision_session_${profile}_${Date.now()}_${randomUUID().slice(0, 8)}`,
37
38
  profile,
38
39
  createdAt: new Date().toISOString(),
39
40
  adapterType: "vision",
@@ -0,0 +1,72 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ // ── Rating System (F → 0) ──────────────────────────────────────────
4
+ //
5
+ // Game-style rating: F E D C B A S SS SSS 0
6
+ // Each grade has 3 sub-tiers: e.g. B1 (entry), B2 (mid), B3 (top)
7
+ // Graded by 10 weighted factors scored 0-100
8
+ //
9
+ // Years-equivalent mapping:
10
+ // F = just opened the app
11
+ // E = ~1 week user
12
+ // D = ~1-3 months
13
+ // C = ~6-12 months
14
+ // B = ~1-3 years (consistent daily user)
15
+ // A = ~3-5 years (power user / team lead)
16
+ // S = ~5-10 years (department architect)
17
+ // SS = ~10-20 years (platform expert, builds systems)
18
+ // SSS = ~20+ years (framework builder, trains others)
19
+ // 0 = Class Zero — transcendent mastery, all 10 factors maxed
20
+ /** Rating grades from lowest to highest */
21
+ export const RATING_GRADES = ["F", "E", "D", "C", "B", "A", "S", "SS", "SSS", "0"];
22
+ /** Factor weights — hard-to-fake signals dominate (57%), session-gated evidence (43%) */
23
+ export const RATING_FACTOR_WEIGHTS = {
24
+ consistency: 20, // THE core signal — can't fake showing up 50+ times
25
+ platformKnowledge: 15, // shortcuts, deep features — proves real knowledge
26
+ edgeCaseHandling: 12, // surviving unexpected states — proves resilience
27
+ teachingAbility: 10, // exporting playbooks — proves codifiable mastery
28
+ featureCoverage: 10, // breadth of features used (session-gated)
29
+ workflowDepth: 8, // multi-step workflows completed (session-gated)
30
+ outcomeVerification: 8, // verified outcomes (session-gated)
31
+ errorRecovery: 7, // healing from failures — honest if it happens
32
+ crossFeatureChains: 5, // combining features end-to-end
33
+ speedEfficiency: 5, // repeat mastery across sessions
34
+ };
35
+ export const DEFAULT_APP_MAP_CONFIG = {
36
+ mapsDir: "",
37
+ staleThresholdDays: 7,
38
+ versionDecayFactor: 0.5,
39
+ pruneSessionThreshold: 10,
40
+ maxZonesPerApp: 50,
41
+ maxElementsPerZone: 100,
42
+ maxEdges: 200,
43
+ maxHistoryEntries: 100,
44
+ maxHierarchyEntriesPerZone: 50,
45
+ maxContractsPerZone: 30,
46
+ maxOutcomesPerContract: 5,
47
+ maxStateDimensions: 30,
48
+ maxStateTransitions: 100,
49
+ maxVisibilityConditions: 200,
50
+ maxTimingProfiles: 100,
51
+ maxReadySignals: 50,
52
+ };
53
+ // ── Rating Utility ──────────────────────────────────────────────────
54
+ /** Convert Rating to display string: "B2", "SS3", "0" */
55
+ export function ratingToString(r) {
56
+ if (r.grade === "0")
57
+ return "0"; // Class Zero has no sub-tier display
58
+ return `${r.grade}${r.subTier}`;
59
+ }
60
+ /** Grade thresholds: weighted score needed for each grade (0-100 scale) */
61
+ export const GRADE_THRESHOLDS = [
62
+ { grade: "0", minScore: 97 }, // Class Zero — near-perfect across all factors
63
+ { grade: "SSS", minScore: 90 }, // 20+ years equivalent
64
+ { grade: "SS", minScore: 82 }, // 10-20 years
65
+ { grade: "S", minScore: 73 }, // 5-10 years
66
+ { grade: "A", minScore: 62 }, // 3-5 years
67
+ { grade: "B", minScore: 50 }, // 1-3 years
68
+ { grade: "C", minScore: 38 }, // 6-12 months
69
+ { grade: "D", minScore: 25 }, // 1-3 months
70
+ { grade: "E", minScore: 12 }, // ~1 week
71
+ { grade: "F", minScore: 0 }, // just opened the app
72
+ ];