autokap 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. package/assets/cursors/macos.svg +4 -0
  2. package/assets/cursors/windows.svg +15 -0
  3. package/assets/skill/OPCODE-REFERENCE.md +607 -0
  4. package/assets/skill/README.md +39 -0
  5. package/assets/skill/SKILL.md +453 -468
  6. package/assets/skill/STUDIO-SKILL.md +476 -0
  7. package/assets/skill/references/examples.md +104 -0
  8. package/assets/skill/references/interactive-demo.md +225 -0
  9. package/assets/skill/references/mock-data.md +178 -0
  10. package/dist/action-verifier.d.ts +29 -0
  11. package/dist/action-verifier.js +133 -0
  12. package/dist/agent-action-recovery.d.ts +45 -0
  13. package/dist/agent-action-recovery.js +370 -0
  14. package/dist/agent-message-utils.d.ts +21 -0
  15. package/dist/agent-message-utils.js +77 -0
  16. package/dist/agent-url-utils.d.ts +30 -0
  17. package/dist/agent-url-utils.js +138 -0
  18. package/dist/agent.d.ts +92 -8
  19. package/dist/agent.js +2936 -781
  20. package/dist/ak-tree.d.ts +39 -0
  21. package/dist/ak-tree.js +368 -0
  22. package/dist/alt-text.d.ts +26 -0
  23. package/dist/alt-text.js +55 -0
  24. package/dist/auth-capture.d.ts +17 -0
  25. package/dist/auth-capture.js +164 -0
  26. package/dist/benchmark.d.ts +59 -0
  27. package/dist/benchmark.js +135 -0
  28. package/dist/browser-bar.d.ts +14 -6
  29. package/dist/browser-bar.js +145 -8
  30. package/dist/browser-pool.d.ts +7 -0
  31. package/dist/browser-pool.js +15 -5
  32. package/dist/browser-utils.d.ts +31 -0
  33. package/dist/browser-utils.js +97 -0
  34. package/dist/browser.d.ts +51 -1
  35. package/dist/browser.js +1481 -31
  36. package/dist/capture-alt-text.js +2 -1
  37. package/dist/capture-language-preflight.js +14 -0
  38. package/dist/capture-llm-page-identity.js +22 -10
  39. package/dist/capture-page-identity.d.ts +5 -7
  40. package/dist/capture-page-identity.js +211 -78
  41. package/dist/capture-preset-credentials.d.ts +50 -0
  42. package/dist/capture-preset-credentials.js +127 -0
  43. package/dist/capture-request-plan.d.ts +2 -2
  44. package/dist/capture-request-plan.js +64 -16
  45. package/dist/capture-run-optimizer.js +48 -33
  46. package/dist/capture-selector-memory.d.ts +5 -0
  47. package/dist/capture-selector-memory.js +18 -0
  48. package/dist/capture-strategy.d.ts +36 -0
  49. package/dist/capture-strategy.js +95 -0
  50. package/dist/capture-studio-sync.d.ts +1 -0
  51. package/dist/capture-studio-sync.js +9 -3
  52. package/dist/capture-surface-contract.d.ts +36 -0
  53. package/dist/capture-surface-contract.js +299 -0
  54. package/dist/capture-transition-engine.d.ts +28 -0
  55. package/dist/capture-transition-engine.js +292 -0
  56. package/dist/capture-variant-state.d.ts +2 -0
  57. package/dist/capture-variant-state.js +26 -0
  58. package/dist/capture-verification.d.ts +35 -0
  59. package/dist/capture-verification.js +95 -0
  60. package/dist/capture-viewport-lock.d.ts +48 -0
  61. package/dist/capture-viewport-lock.js +74 -0
  62. package/dist/circuit-breaker.d.ts +42 -0
  63. package/dist/circuit-breaker.js +119 -0
  64. package/dist/cli-config.d.ts +8 -1
  65. package/dist/cli-config.js +62 -6
  66. package/dist/cli-contract.d.ts +15 -0
  67. package/dist/cli-contract.js +167 -0
  68. package/dist/cli-runner-local.d.ts +12 -0
  69. package/dist/cli-runner-local.js +102 -0
  70. package/dist/cli-runner.d.ts +34 -0
  71. package/dist/cli-runner.js +433 -0
  72. package/dist/cli-utils.d.ts +0 -1
  73. package/dist/cli-utils.js +2 -5
  74. package/dist/cli.js +1005 -267
  75. package/dist/clip-orchestrator.js +9 -2
  76. package/dist/clip-postprocess.js +25 -16
  77. package/dist/cookie-dismiss.d.ts +2 -0
  78. package/dist/cookie-dismiss.js +48 -13
  79. package/dist/cost-logging.d.ts +8 -0
  80. package/dist/cost-logging.js +160 -46
  81. package/dist/cost-resolution-monitor.d.ts +16 -0
  82. package/dist/cost-resolution-monitor.js +34 -0
  83. package/dist/credential-templates.js +2 -2
  84. package/dist/cursor-overlay-script.d.ts +6 -0
  85. package/dist/cursor-overlay-script.js +169 -0
  86. package/dist/dom-css-purger.d.ts +65 -0
  87. package/dist/dom-css-purger.js +333 -0
  88. package/dist/dom-font-inliner.d.ts +45 -0
  89. package/dist/dom-font-inliner.js +148 -0
  90. package/dist/dom-patch-resolver.d.ts +52 -0
  91. package/dist/dom-patch-resolver.js +242 -0
  92. package/dist/dom-serializer.d.ts +82 -0
  93. package/dist/dom-serializer.js +378 -0
  94. package/dist/element-capture.d.ts +1 -41
  95. package/dist/element-capture.js +202 -446
  96. package/dist/env-validation.d.ts +5 -0
  97. package/dist/env-validation.js +29 -0
  98. package/dist/execution-schema.d.ts +4423 -0
  99. package/dist/execution-schema.js +507 -0
  100. package/dist/execution-types.d.ts +886 -0
  101. package/dist/execution-types.js +65 -0
  102. package/dist/fonts-loader.d.ts +14 -0
  103. package/dist/fonts-loader.js +55 -0
  104. package/dist/hybrid-navigator.js +12 -12
  105. package/dist/index.d.ts +9 -6
  106. package/dist/index.js +10 -4
  107. package/dist/legacy/agent-action-recovery.d.ts +45 -0
  108. package/dist/legacy/agent-action-recovery.js +370 -0
  109. package/dist/legacy/agent-message-utils.d.ts +21 -0
  110. package/dist/legacy/agent-message-utils.js +77 -0
  111. package/dist/legacy/agent-url-utils.d.ts +30 -0
  112. package/dist/legacy/agent-url-utils.js +138 -0
  113. package/dist/legacy/agent.d.ts +226 -0
  114. package/dist/legacy/agent.js +6666 -0
  115. package/dist/legacy/clip-orchestrator.d.ts +148 -0
  116. package/dist/legacy/clip-orchestrator.js +957 -0
  117. package/dist/legacy/credential-templates.d.ts +5 -0
  118. package/dist/legacy/credential-templates.js +60 -0
  119. package/dist/legacy/hybrid-navigator.d.ts +138 -0
  120. package/dist/legacy/hybrid-navigator.js +468 -0
  121. package/dist/legacy/llm-usage.d.ts +17 -0
  122. package/dist/legacy/llm-usage.js +45 -0
  123. package/dist/legacy/prompt-cache.d.ts +10 -0
  124. package/dist/legacy/prompt-cache.js +24 -0
  125. package/dist/legacy/prompts.d.ts +175 -0
  126. package/dist/legacy/prompts.js +1038 -0
  127. package/dist/legacy/tools.d.ts +4 -0
  128. package/dist/legacy/tools.js +216 -0
  129. package/dist/legacy/video-agent.d.ts +143 -0
  130. package/dist/legacy/video-agent.js +4788 -0
  131. package/dist/legacy/video-observation.d.ts +36 -0
  132. package/dist/legacy/video-observation.js +192 -0
  133. package/dist/legacy/video-planner.d.ts +12 -0
  134. package/dist/legacy/video-planner.js +501 -0
  135. package/dist/legacy/video-prompts.d.ts +37 -0
  136. package/dist/legacy/video-prompts.js +569 -0
  137. package/dist/legacy/video-tools.d.ts +3 -0
  138. package/dist/legacy/video-tools.js +59 -0
  139. package/dist/legacy/video-variant-state.d.ts +29 -0
  140. package/dist/legacy/video-variant-state.js +80 -0
  141. package/dist/legacy/vision-model.d.ts +17 -0
  142. package/dist/legacy/vision-model.js +74 -0
  143. package/dist/llm-healer.d.ts +63 -0
  144. package/dist/llm-healer.js +166 -0
  145. package/dist/llm-provider.d.ts +29 -0
  146. package/dist/llm-provider.js +80 -0
  147. package/dist/logger.d.ts +6 -2
  148. package/dist/logger.js +15 -1
  149. package/dist/mockup-html.js +35 -25
  150. package/dist/mockup.d.ts +95 -2
  151. package/dist/mockup.js +427 -166
  152. package/dist/mouse-animation.d.ts +2 -2
  153. package/dist/mouse-animation.js +34 -20
  154. package/dist/opcode-actions.d.ts +42 -0
  155. package/dist/opcode-actions.js +511 -0
  156. package/dist/opcode-runner.d.ts +51 -0
  157. package/dist/opcode-runner.js +770 -0
  158. package/dist/openrouter-client.d.ts +40 -0
  159. package/dist/openrouter-client.js +16 -0
  160. package/dist/overlay-engine.d.ts +24 -0
  161. package/dist/overlay-engine.js +176 -0
  162. package/dist/postcondition.d.ts +16 -0
  163. package/dist/postcondition.js +269 -0
  164. package/dist/program-patcher.d.ts +25 -0
  165. package/dist/program-patcher.js +44 -0
  166. package/dist/prompts.d.ts +13 -5
  167. package/dist/prompts.js +224 -351
  168. package/dist/provider-config.d.ts +12 -0
  169. package/dist/provider-config.js +15 -0
  170. package/dist/recovery-chain.d.ts +37 -0
  171. package/dist/recovery-chain.js +350 -0
  172. package/dist/remote-browser.d.ts +28 -4
  173. package/dist/remote-browser.js +60 -5
  174. package/dist/safari-browser-bar.d.ts +15 -0
  175. package/dist/safari-browser-bar.js +95 -0
  176. package/dist/safari-toolbar-asset.d.ts +15 -0
  177. package/dist/safari-toolbar-asset.js +12 -0
  178. package/dist/security.d.ts +2 -1
  179. package/dist/security.js +49 -10
  180. package/dist/selector-resolver.d.ts +34 -0
  181. package/dist/selector-resolver.js +181 -0
  182. package/dist/semantic-resolver.d.ts +35 -0
  183. package/dist/semantic-resolver.js +161 -0
  184. package/dist/server-capture-runtime.d.ts +5 -3
  185. package/dist/server-capture-runtime.js +42 -95
  186. package/dist/server-credit-usage.d.ts +2 -2
  187. package/dist/server-project-webhooks.d.ts +15 -1
  188. package/dist/server-project-webhooks.js +34 -8
  189. package/dist/server-screenshot-watermark.js +27 -5
  190. package/dist/session-profile.js +164 -1
  191. package/dist/sf-pro-symbols.d.ts +1 -0
  192. package/dist/sf-pro-symbols.js +55 -0
  193. package/dist/skill-packaging.d.ts +28 -0
  194. package/dist/skill-packaging.js +169 -0
  195. package/dist/smart-wait.d.ts +27 -0
  196. package/dist/smart-wait.js +81 -0
  197. package/dist/status-bar-render.d.ts +20 -0
  198. package/dist/status-bar-render.js +410 -0
  199. package/dist/status-bar.d.ts +9 -0
  200. package/dist/status-bar.js +298 -14
  201. package/dist/svg-browser-bar.d.ts +33 -0
  202. package/dist/svg-browser-bar.js +206 -0
  203. package/dist/svg-status-bar.d.ts +36 -0
  204. package/dist/svg-status-bar.js +597 -0
  205. package/dist/svg-text.d.ts +61 -0
  206. package/dist/svg-text.js +118 -0
  207. package/dist/tools.js +89 -451
  208. package/dist/types.d.ts +240 -5
  209. package/dist/types.js +23 -1
  210. package/dist/v2/action-verifier.d.ts +29 -0
  211. package/dist/v2/action-verifier.js +133 -0
  212. package/dist/v2/alt-text.d.ts +26 -0
  213. package/dist/v2/alt-text.js +55 -0
  214. package/dist/v2/benchmark.d.ts +59 -0
  215. package/dist/v2/benchmark.js +135 -0
  216. package/dist/v2/capture-strategy.d.ts +30 -0
  217. package/dist/v2/capture-strategy.js +67 -0
  218. package/dist/v2/capture-verification.d.ts +35 -0
  219. package/dist/v2/capture-verification.js +95 -0
  220. package/dist/v2/circuit-breaker.d.ts +42 -0
  221. package/dist/v2/circuit-breaker.js +119 -0
  222. package/dist/v2/cli-runner-local.d.ts +11 -0
  223. package/dist/v2/cli-runner-local.js +91 -0
  224. package/dist/v2/cli-runner.d.ts +34 -0
  225. package/dist/v2/cli-runner.js +300 -0
  226. package/dist/v2/compiler-prompts.d.ts +27 -0
  227. package/dist/v2/compiler-prompts.js +123 -0
  228. package/dist/v2/compiler.d.ts +37 -0
  229. package/dist/v2/compiler.js +147 -0
  230. package/dist/v2/explorer.d.ts +41 -0
  231. package/dist/v2/explorer.js +56 -0
  232. package/dist/v2/index.d.ts +37 -0
  233. package/dist/v2/index.js +31 -0
  234. package/dist/v2/llm-healer.d.ts +62 -0
  235. package/dist/v2/llm-healer.js +166 -0
  236. package/dist/v2/llm-provider.d.ts +29 -0
  237. package/dist/v2/llm-provider.js +80 -0
  238. package/dist/v2/opcode-runner.d.ts +47 -0
  239. package/dist/v2/opcode-runner.js +634 -0
  240. package/dist/v2/overlay-engine.d.ts +24 -0
  241. package/dist/v2/overlay-engine.js +150 -0
  242. package/dist/v2/postcondition.d.ts +16 -0
  243. package/dist/v2/postcondition.js +249 -0
  244. package/dist/v2/program-patcher.d.ts +25 -0
  245. package/dist/v2/program-patcher.js +44 -0
  246. package/dist/v2/recovery-chain.d.ts +30 -0
  247. package/dist/v2/recovery-chain.js +368 -0
  248. package/dist/v2/schema.d.ts +2580 -0
  249. package/dist/v2/schema.js +295 -0
  250. package/dist/v2/selector-resolver.d.ts +34 -0
  251. package/dist/v2/selector-resolver.js +181 -0
  252. package/dist/v2/semantic-resolver.d.ts +35 -0
  253. package/dist/v2/semantic-resolver.js +161 -0
  254. package/dist/v2/smart-wait.d.ts +27 -0
  255. package/dist/v2/smart-wait.js +81 -0
  256. package/dist/v2/types.d.ts +444 -0
  257. package/dist/v2/types.js +19 -0
  258. package/dist/v2/web-playwright-local.d.ts +69 -0
  259. package/dist/v2/web-playwright-local.js +392 -0
  260. package/dist/version.d.ts +1 -0
  261. package/dist/version.js +5 -0
  262. package/dist/video-agent.js +18 -13
  263. package/dist/video-planner.js +2 -1
  264. package/dist/video-prompts.js +3 -3
  265. package/dist/web-playwright-local.d.ts +126 -0
  266. package/dist/web-playwright-local.js +819 -0
  267. package/dist/ws-auth.js +4 -1
  268. package/dist/ws-broadcast.d.ts +34 -0
  269. package/dist/ws-broadcast.js +85 -0
  270. package/dist/ws-connection-limits.d.ts +12 -0
  271. package/dist/ws-connection-limits.js +44 -0
  272. package/dist/ws-handler-utils.d.ts +32 -0
  273. package/dist/ws-handler-utils.js +139 -0
  274. package/dist/ws-handler.js +294 -164
  275. package/dist/ws-metrics-server.d.ts +9 -0
  276. package/dist/ws-metrics-server.js +31 -0
  277. package/dist/ws-server.js +41 -1
  278. package/package.json +51 -34
@@ -0,0 +1,392 @@
1
+ /**
2
+ * Capture Agent — WebPlaywrightLocal RuntimeAdapter
3
+ *
4
+ * Thin adapter delegating to the existing Browser class from src/browser.ts.
5
+ * This is the first (and for now only) RuntimeAdapter implementation.
6
+ */
7
+ import fs from 'node:fs/promises';
8
+ import os from 'node:os';
9
+ import path from 'node:path';
10
+ import { humanType, moveMouse } from '../mouse-animation.js';
11
+ import { resolveTarget } from './semantic-resolver.js';
12
+ export class WebPlaywrightLocal {
13
+ browser;
14
+ recordingDir;
15
+ sessionStartedAt = Date.now();
16
+ recording = null;
17
+ clipCursor = null;
18
+ constructor(browser, recordingDir) {
19
+ this.browser = browser;
20
+ this.recordingDir = recordingDir;
21
+ }
22
+ async navigate(url) {
23
+ await this.browser.navigateTo(url);
24
+ }
25
+ async getCurrentUrl() {
26
+ const page = await this.browser.currentPage;
27
+ return page.url();
28
+ }
29
+ async getAKTree() {
30
+ return this.browser.getAKTree();
31
+ }
32
+ async getPageSignals() {
33
+ return this.browser.capturePageSignals();
34
+ }
35
+ async click(selector, options) {
36
+ const page = await this.browser.currentPage;
37
+ if (options?.coordinates) {
38
+ await this.moveClipCursorToPoint(options.coordinates);
39
+ await this.browser.clickByCoordinates(options.coordinates.x, options.coordinates.y);
40
+ return;
41
+ }
42
+ const locator = page.locator(selector).first();
43
+ await this.moveClipCursorToLocator(locator);
44
+ if (options?.useKeyboard) {
45
+ await locator.focus();
46
+ await page.keyboard.press('Enter');
47
+ return;
48
+ }
49
+ if (options?.useJsDispatch) {
50
+ await locator.dispatchEvent('click');
51
+ return;
52
+ }
53
+ await this.browser.clickBySelector(selector, { force: options?.force });
54
+ }
55
+ /**
56
+ * Click an element using semantic target resolution.
57
+ * Tries CSS selector first, falls back to Playwright semantic locators.
58
+ */
59
+ async clickByTarget(opts) {
60
+ const page = await this.browser.currentPage;
61
+ const resolved = await resolveTarget(page, opts);
62
+ if (!resolved) {
63
+ throw new Error(`cannot find target: ${describeResolveOptions(opts)}`);
64
+ }
65
+ await this.moveClipCursorToLocator(resolved.locator);
66
+ await resolved.locator.click({ timeout: 5000 });
67
+ }
68
+ /**
69
+ * Type into an element using semantic target resolution.
70
+ */
71
+ async typeByTarget(opts, text, clearFirst = true) {
72
+ const page = await this.browser.currentPage;
73
+ const resolved = await resolveTarget(page, opts);
74
+ if (!resolved) {
75
+ throw new Error(`cannot find target for typing: ${describeResolveOptions(opts)}`);
76
+ }
77
+ if (this.clipCursor) {
78
+ await this.typeIntoLocator(resolved.locator, text, clearFirst);
79
+ return;
80
+ }
81
+ if (clearFirst) {
82
+ await resolved.locator.fill(text);
83
+ return;
84
+ }
85
+ await resolved.locator.click();
86
+ await resolved.locator.pressSequentially(text);
87
+ }
88
+ /**
89
+ * Wait for an element using semantic target resolution.
90
+ */
91
+ async waitForTarget(opts, timeoutMs = 10000) {
92
+ const page = await this.browser.currentPage;
93
+ const resolved = await resolveTarget(page, { ...opts, timeoutMs });
94
+ return resolved !== null;
95
+ }
96
+ /**
97
+ * Scroll an element into view using semantic target resolution.
98
+ */
99
+ async scrollIntoViewByTarget(opts) {
100
+ const page = await this.browser.currentPage;
101
+ const resolved = await resolveTarget(page, opts);
102
+ if (!resolved) {
103
+ throw new Error(`cannot find target to scroll into view: ${describeResolveOptions(opts)}`);
104
+ }
105
+ if (this.clipCursor) {
106
+ await this.moveClipCursorToViewportCenter();
107
+ await resolved.locator.evaluate((node) => {
108
+ node.scrollIntoView({ block: 'center', behavior: 'smooth' });
109
+ });
110
+ await page.waitForTimeout(350);
111
+ return;
112
+ }
113
+ await resolved.locator.scrollIntoViewIfNeeded({ timeout: 5000 });
114
+ }
115
+ async type(selector, text, clearFirst = true) {
116
+ if (this.clipCursor) {
117
+ const page = await this.browser.currentPage;
118
+ await this.typeIntoLocator(page.locator(selector).first(), text, clearFirst);
119
+ return;
120
+ }
121
+ await this.browser.typeText(text, { selector, clearFirst });
122
+ }
123
+ async pressKey(key) {
124
+ const page = await this.browser.currentPage;
125
+ if (this.clipCursor) {
126
+ await page.waitForTimeout(90);
127
+ await page.keyboard.press(key);
128
+ return;
129
+ }
130
+ await this.browser.pressKey(key);
131
+ }
132
+ async scroll(direction, amount) {
133
+ if (this.clipCursor) {
134
+ const page = await this.browser.currentPage;
135
+ await this.moveClipCursorToViewportCenter();
136
+ const dx = direction === 'right' ? (amount ?? 500) : direction === 'left' ? -(amount ?? 500) : 0;
137
+ const dy = direction === 'down' ? (amount ?? 500) : direction === 'up' ? -(amount ?? 500) : 0;
138
+ await page.evaluate(({ deltaX, deltaY }) => {
139
+ window.scrollBy({ left: deltaX, top: deltaY, behavior: 'smooth' });
140
+ }, { deltaX: dx, deltaY: dy });
141
+ await page.waitForTimeout(420);
142
+ return;
143
+ }
144
+ await this.browser.scroll(direction, amount);
145
+ }
146
+ async scrollIntoView(selector) {
147
+ const page = await this.browser.currentPage;
148
+ if (this.clipCursor) {
149
+ await this.moveClipCursorToViewportCenter();
150
+ await page.locator(selector).first().evaluate((node) => {
151
+ node.scrollIntoView({ block: 'center', behavior: 'smooth' });
152
+ });
153
+ await page.waitForTimeout(350);
154
+ return;
155
+ }
156
+ await page.locator(selector).first().scrollIntoViewIfNeeded({ timeout: 5000 });
157
+ }
158
+ async waitFor(condition) {
159
+ try {
160
+ const page = await this.browser.currentPage;
161
+ const stateMap = { visible: 'visible', attached: 'attached' };
162
+ await page.locator(condition.selector).waitFor({
163
+ state: stateMap[condition.state],
164
+ timeout: condition.timeoutMs,
165
+ });
166
+ return true;
167
+ }
168
+ catch {
169
+ return false;
170
+ }
171
+ }
172
+ async dismissOverlays() {
173
+ // Pass 1: Built-in cookie/widget dismissal (cookie-dismiss.ts)
174
+ const result = await this.browser.dismissOverlays();
175
+ if (result.dismissed)
176
+ return result;
177
+ // Pass 2: Playwright-level sweep for common accept/close buttons
178
+ // in cookie-like containers that the built-in module might miss
179
+ const page = await this.browser.currentPage;
180
+ const acceptPatterns = [
181
+ 'button:has-text("Accept")',
182
+ 'button:has-text("Accept all")',
183
+ 'button:has-text("Accepter")',
184
+ 'button:has-text("Tout accepter")',
185
+ 'button:has-text("Got it")',
186
+ 'button:has-text("I agree")',
187
+ 'button:has-text("OK")',
188
+ '[role="button"]:has-text("Accept")',
189
+ '[role="button"]:has-text("Accept all")',
190
+ ];
191
+ for (const pattern of acceptPatterns) {
192
+ try {
193
+ const btn = page.locator(pattern).first();
194
+ if (await btn.isVisible({ timeout: 500 })) {
195
+ // Check if the button is near the bottom of the viewport (likely a banner)
196
+ const box = await btn.boundingBox();
197
+ const viewport = page.viewportSize();
198
+ if (box && viewport && box.y > viewport.height * 0.6) {
199
+ await btn.click({ timeout: 2000 });
200
+ await page.waitForTimeout(500);
201
+ return { dismissed: true, method: `v2-sweep:${pattern}` };
202
+ }
203
+ }
204
+ }
205
+ catch {
206
+ // Try next pattern
207
+ }
208
+ }
209
+ // Pass 3: Try Escape key for modal overlays
210
+ try {
211
+ await page.keyboard.press('Escape');
212
+ await page.waitForTimeout(300);
213
+ }
214
+ catch {
215
+ // Non-fatal
216
+ }
217
+ return result;
218
+ }
219
+ async takeScreenshot() {
220
+ return this.browser.takeScreenshot();
221
+ }
222
+ async takeElementScreenshot(selector) {
223
+ const { buffer } = await this.browser.screenshotBySelector(selector);
224
+ return buffer;
225
+ }
226
+ async takeCleanScreenshot() {
227
+ return this.browser.takeScreenshot();
228
+ }
229
+ async beginRecording(options) {
230
+ const page = await this.browser.currentPage;
231
+ if (!page.video()) {
232
+ throw new Error(`recording is not enabled for ${options.mediaMode} mode`);
233
+ }
234
+ const recordingDir = this.recordingDir
235
+ ?? await fs.mkdtemp(path.join(os.tmpdir(), 'autokap-v2-recording-'));
236
+ this.recording = {
237
+ mediaMode: options.mediaMode,
238
+ startedAt: Date.now(),
239
+ trimStartMs: Math.max(0, Date.now() - this.sessionStartedAt),
240
+ outputPath: path.join(recordingDir, `${options.mediaMode}.webm`),
241
+ finalized: false,
242
+ };
243
+ this.clipCursor = { currentPosition: null };
244
+ await this.seedClipCursor();
245
+ }
246
+ async endRecording() {
247
+ if (!this.recording) {
248
+ throw new Error('recording was not started');
249
+ }
250
+ if (this.recording.finalized) {
251
+ const buffer = await fs.readFile(this.recording.outputPath);
252
+ return {
253
+ buffer,
254
+ durationMs: Date.now() - this.recording.startedAt,
255
+ mimeType: 'video/webm',
256
+ trimStartMs: this.recording.trimStartMs,
257
+ };
258
+ }
259
+ const videoRef = this.browser.currentPage.video();
260
+ if (!videoRef) {
261
+ throw new Error('recording finalization failed: no Playwright video handle found');
262
+ }
263
+ await this.browser.closeContext();
264
+ await videoRef.saveAs(this.recording.outputPath);
265
+ this.recording.finalized = true;
266
+ this.clipCursor = null;
267
+ const buffer = await fs.readFile(this.recording.outputPath);
268
+ return {
269
+ buffer,
270
+ durationMs: Date.now() - this.recording.startedAt,
271
+ mimeType: 'video/webm',
272
+ trimStartMs: this.recording.trimStartMs,
273
+ };
274
+ }
275
+ async setLocale(locale) {
276
+ await this.browser.setLanguage(locale);
277
+ }
278
+ async setColorScheme(scheme) {
279
+ await this.browser.setColorScheme(scheme);
280
+ }
281
+ async reloadPage() {
282
+ await this.browser.reloadCurrentPage();
283
+ }
284
+ async writeStorageHint(params) {
285
+ return this.browser.writeStorageHintCandidate({
286
+ storageName: params.storage,
287
+ key: params.key,
288
+ candidate: params.value,
289
+ kind: params.kind,
290
+ });
291
+ }
292
+ async close() {
293
+ await this.browser.close();
294
+ }
295
+ async typeIntoLocator(locator, text, clearFirst) {
296
+ const page = await this.browser.currentPage;
297
+ await locator.waitFor({ state: 'visible', timeout: 5000 });
298
+ await locator.scrollIntoViewIfNeeded({ timeout: 5000 }).catch(() => undefined);
299
+ await this.moveClipCursorToLocator(locator);
300
+ await locator.click({ timeout: 5000 });
301
+ if (clearFirst) {
302
+ await page.keyboard.press('Control+A');
303
+ }
304
+ await page.waitForTimeout(70);
305
+ await humanType(page, text);
306
+ }
307
+ async seedClipCursor() {
308
+ if (!this.clipCursor)
309
+ return;
310
+ const page = await this.browser.currentPage;
311
+ const viewport = page.viewportSize();
312
+ if (!viewport)
313
+ return;
314
+ const startX = Math.round(viewport.width * (0.3 + Math.random() * 0.4));
315
+ const startY = Math.round(viewport.height * (0.3 + Math.random() * 0.4));
316
+ await page.mouse.move(startX, startY);
317
+ this.clipCursor.currentPosition = { x: startX, y: startY };
318
+ await page.waitForTimeout(60);
319
+ }
320
+ async moveClipCursorToViewportCenter() {
321
+ if (!this.clipCursor)
322
+ return;
323
+ const page = await this.browser.currentPage;
324
+ const viewport = page.viewportSize();
325
+ if (!viewport)
326
+ return;
327
+ await this.moveClipCursorToPoint({
328
+ x: Math.round(viewport.width / 2),
329
+ y: Math.round(viewport.height / 2),
330
+ }, { durationMs: 280, steps: 18 });
331
+ }
332
+ async moveClipCursorToLocator(locator) {
333
+ if (!this.clipCursor)
334
+ return;
335
+ await locator.waitFor({ state: 'visible', timeout: 5000 });
336
+ await locator.scrollIntoViewIfNeeded({ timeout: 5000 }).catch(() => undefined);
337
+ const page = await this.browser.currentPage;
338
+ const viewport = page.viewportSize();
339
+ const box = await locator.boundingBox();
340
+ if (!box || !viewport)
341
+ return;
342
+ await this.moveClipCursorToPoint(getHumanPointInBox(box, viewport));
343
+ }
344
+ async moveClipCursorToPoint(point, options) {
345
+ if (!this.clipCursor)
346
+ return;
347
+ const page = await this.browser.currentPage;
348
+ const from = this.clipCursor.currentPosition;
349
+ if (from) {
350
+ await moveMouse(page, from, point, options);
351
+ }
352
+ else {
353
+ await page.mouse.move(point.x, point.y);
354
+ }
355
+ this.clipCursor.currentPosition = point;
356
+ }
357
+ }
358
+ function describeResolveOptions(opts) {
359
+ const parts = [];
360
+ if (opts.selector)
361
+ parts.push(`selector="${opts.selector}"`);
362
+ if (opts.target?.text)
363
+ parts.push(`text="${opts.target.text}"`);
364
+ if (opts.target?.role)
365
+ parts.push(`role="${opts.target.role}"`);
366
+ if (opts.target?.label)
367
+ parts.push(`label="${opts.target.label}"`);
368
+ if (opts.target?.placeholder)
369
+ parts.push(`placeholder="${opts.target.placeholder}"`);
370
+ return parts.join(', ') || 'no target specified';
371
+ }
372
+ function getHumanPointInBox(box, viewport) {
373
+ const insetX = Math.min(Math.max(box.width * 0.2, 6), Math.max(6, box.width / 2));
374
+ const insetY = Math.min(Math.max(box.height * 0.2, 6), Math.max(6, box.height / 2));
375
+ const minX = box.x + insetX;
376
+ const maxX = box.x + Math.max(insetX, box.width - insetX);
377
+ const minY = box.y + insetY;
378
+ const maxY = box.y + Math.max(insetY, box.height - insetY);
379
+ const targetX = maxX <= minX ? box.x + box.width / 2 : randomBetween(minX, maxX);
380
+ const targetY = maxY <= minY ? box.y + box.height / 2 : randomBetween(minY, maxY);
381
+ return {
382
+ x: clampPoint(targetX, 4, viewport.width - 4),
383
+ y: clampPoint(targetY, 4, viewport.height - 4),
384
+ };
385
+ }
386
+ function randomBetween(min, max) {
387
+ return min + Math.random() * Math.max(0, max - min);
388
+ }
389
+ function clampPoint(value, min, max) {
390
+ return Math.round(Math.max(min, Math.min(max, value)));
391
+ }
392
+ //# sourceMappingURL=web-playwright-local.js.map
@@ -0,0 +1 @@
1
+ export declare const APP_VERSION: string;
@@ -0,0 +1,5 @@
1
+ import { createRequire } from 'node:module';
2
+ const require = createRequire(import.meta.url);
3
+ const pkg = require('../package.json');
4
+ export const APP_VERSION = pkg.version;
5
+ //# sourceMappingURL=version.js.map
@@ -15,6 +15,7 @@ import { dismissOverlaysWithLogging } from './overlay-utils.js';
15
15
  import { logger } from './logger.js';
16
16
  import { evaluateActionSecurity, evaluateResolvedActionSecurity } from './security.js';
17
17
  import { createAbortError, getAbortMessage, isAbortError, throwIfAborted } from './abort.js';
18
+ import { zdrParam } from './provider-config.js';
18
19
  const VIDEO_AGENT_CACHE_LAYOUT_V2 = process.env.VIDEO_AGENT_CACHE_LAYOUT_V2 === '1';
19
20
  function createClient(apiKey) {
20
21
  return new OpenAI({
@@ -201,7 +202,7 @@ function buildVideoStepTargetFromAction(action, coherenceKey) {
201
202
  }
202
203
  function mapExecutedActionsToVideoSteps(params) {
203
204
  const actionable = params.actions.filter((action) => (action.success !== false
204
- && ['navigate_to', 'dismiss_overlays', 'click', 'type_text', 'select_option', 'scroll', 'press_key', 'wait', 'hover', 'safe_expand'].includes(action.action)));
205
+ && ['navigate_to', 'dismiss_overlays', 'click', 'type_text', 'select_option', 'scroll', 'press_key', 'wait', 'hover', 'safe_expand', 'tap', 'type'].includes(action.action)));
205
206
  return actionable.map((action, index) => {
206
207
  const id = index === 0 ? params.originalStep.id : `${params.originalStep.id}-repair-${index + 1}`;
207
208
  const target = buildVideoStepTargetFromAction(action, params.coherenceKey);
@@ -244,6 +245,10 @@ function mapExecutedActionsToVideoSteps(params) {
244
245
  return { ...baseStep, type: 'hover' };
245
246
  case 'wait':
246
247
  return { ...baseStep, type: 'wait', waitMs: Number(action.params.ms ?? params.originalStep.waitMs ?? 500) };
248
+ case 'tap':
249
+ return { ...baseStep, type: 'click' };
250
+ case 'type':
251
+ return { ...baseStep, type: 'type', text: String(action.params.text ?? params.originalStep.text ?? '') };
247
252
  case 'click':
248
253
  default:
249
254
  return { ...baseStep, type: 'click' };
@@ -263,11 +268,11 @@ function buildScreenshotRepairLanePrompt(params) {
263
268
  return `${details}
264
269
 
265
270
  Use the existing browser state on the current page. Perform ONLY the minimal actions needed to make this step succeed.
266
- - Re-read the live page and use visible interactive elements or search_text instead of guessing stale selectors.
271
+ - Re-read the live page and use the AKTree or focus() to identify elements instead of guessing stale selectors.
267
272
  - If a menu, dialog, or popover must be opened for the step to succeed, open it and stop immediately once that resulting UI is visible.
268
273
  - Do NOT perform later clip actions.
269
274
  - Do NOT navigate away unless the current step explicitly requires it.
270
- - Call ready_to_capture immediately when the exact post-step UI state is visible.`;
275
+ - Call capture immediately when the exact post-step UI state is visible.`;
271
276
  }
272
277
  function buildPreparedVariantRepairPrompt(params) {
273
278
  const details = [
@@ -298,10 +303,10 @@ Use the existing authenticated browser state. The correct project/page is alread
298
303
  - Do NOT start by searching for tiny raw tokens like "fr", "en", "light", or "dark" unless a settings page, language picker, or variant menu is already open.
299
304
  - Use visible controls, menus, settings, or search_text to find the language/theme switch. Do not guess stale selectors.
300
305
  - If the app briefly detours through settings or another route to switch language/theme, return to the same project/page before finishing.
301
- - Ignore user-generated content that may remain in another language, including project names, preset names, assistant text, and imported data labels.
302
- - Do NOT perform the recorded clip interaction itself (for example do not click "New", do not open "New preset", do not select "Hero").
303
- - Call ready_to_capture only when the current page is back on the correct project/entity and the visible app chrome matches the requested language/theme.
304
- - If you cannot restore the requested language/theme on this page, call give_up.`;
306
+ - Ignore user-generated content that may remain in another language, including names, titles, chat content, and imported data labels.
307
+ - Do NOT perform the recorded clip interaction itself. Only restore the requested pre-recording state.
308
+ - Call capture only when the current page is back on the correct project/entity and the visible app chrome matches the requested language/theme.
309
+ - If you cannot restore the requested language/theme on this page after multiple attempts, call capture with the current state.`;
305
310
  }
306
311
  async function replayPreparedActions(browser, config, callbacks) {
307
312
  if (!config.preparedReplayActions || config.preparedReplayActions.length === 0) {
@@ -1708,8 +1713,8 @@ async function scrollNamedTargetIntoViewFromDom(page, target, mode) {
1708
1713
  }
1709
1714
  /**
1710
1715
  * Extract target text hints from a step description.
1711
- * E.g. "Click 'Nouveau preset'" → ["Nouveau preset"]
1712
- * E.g. "Click top 'New' / 'Nouveau' button" → ["New", "Nouveau"]
1716
+ * E.g. "Click 'Open pricing'" → ["Open pricing"]
1717
+ * E.g. "Click top 'Open' / 'Ouvrir' button" → ["Open", "Ouvrir"]
1713
1718
  */
1714
1719
  function extractDescriptionTextHints(description) {
1715
1720
  const matches = Array.from(description.matchAll(/[''""]([^''"]+)[''""]/g));
@@ -3160,7 +3165,7 @@ async function verifyStep(client, model, step, screenshot, stepIndex, totalSteps
3160
3165
  tools: videoVerificationTools,
3161
3166
  tool_choice: 'required',
3162
3167
  max_tokens: 300,
3163
- provider: { zdr: true },
3168
+ provider: { ...zdrParam() },
3164
3169
  }, { signal });
3165
3170
  const usage = extractLlmUsageSnapshot(response);
3166
3171
  const call = response.choices?.[0]?.message?.tool_calls?.[0];
@@ -3224,7 +3229,7 @@ async function fixStep(client, model, step, failureReason, suggestion, screensho
3224
3229
  ],
3225
3230
  max_tokens: 600,
3226
3231
  response_format: { type: 'json_object' },
3227
- provider: { zdr: true },
3232
+ provider: { ...zdrParam() },
3228
3233
  }, { signal });
3229
3234
  const usage = extractLlmUsageSnapshot(response);
3230
3235
  const content = response.choices?.[0]?.message?.content ?? '';
@@ -3289,7 +3294,7 @@ async function classifyVariantStateWithLLMFallback(client, model, browser, reque
3289
3294
  }),
3290
3295
  },
3291
3296
  ],
3292
- provider: { zdr: true },
3297
+ provider: { ...zdrParam() },
3293
3298
  }, { signal });
3294
3299
  const parsed = JSON.parse(response.choices?.[0]?.message?.content ?? '{}');
3295
3300
  return {
@@ -4404,7 +4409,7 @@ async function runDryRun(plan, config, client, callbacks, videoScript) {
4404
4409
  async function runRecording(plan, config, callbacks) {
4405
4410
  log('Starting video recording...', 'info', callbacks.onLog);
4406
4411
  // Create temp directory for the video file
4407
- const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'autokap-video-'));
4412
+ const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'screenshot-agent-video-'));
4408
4413
  const outputPath = path.join(tempDir, 'recording.webm');
4409
4414
  const startTime = Date.now();
4410
4415
  const executionSteps = deriveExecutionSteps(plan, config);
@@ -2,6 +2,7 @@ import OpenAI from 'openai';
2
2
  import { buildVideoPlannerSystemPrompt, buildVideoPlannerUserMessage, } from './video-prompts.js';
3
3
  export { observePlanningContext } from './video-observation.js';
4
4
  import { throwIfAborted } from './abort.js';
5
+ import { zdrParam } from './provider-config.js';
5
6
  import { extractStepUsage } from './llm-usage.js';
6
7
  function createClient(apiKey) {
7
8
  return new OpenAI({
@@ -424,7 +425,7 @@ export async function planFromScript(script, url, model, apiKey, maxRetries = 2,
424
425
  messages,
425
426
  max_tokens: 4000,
426
427
  response_format: { type: 'json_object' },
427
- provider: { zdr: true },
428
+ provider: { ...zdrParam() },
428
429
  }, { signal: abortSignal });
429
430
  lastResponse = response;
430
431
  const content = response.choices?.[0]?.message?.content ?? '';
@@ -70,7 +70,7 @@ Given the failed step, the failure reason, a runtime page observation summary, a
70
70
  If a cookie banner, modal, sticky feedback widget, newsletter popup, or consent wall is blocking the intended interaction or making the frame unusable:
71
71
  - Add a \`dismiss_overlays\` step first
72
72
  - Then retry the intended action or continue with the next recovery strategy
73
- - Do NOT hide the product's own chat/assistant widget if the overall demo goal is to show that widget
73
+ - Do NOT hide the site's own chat/support widget if the overall demo goal is to show that widget
74
74
 
75
75
  ### 1. Use direct navigation ONLY for technical preparation steps
76
76
  Direct \`navigate\` is allowed only when at least one of these is true:
@@ -330,7 +330,7 @@ You MUST respond with a single valid JSON object matching this exact structure:
330
330
  | assert_page | pageExpectation | timeoutMs |
331
331
 
332
332
  For \`target\` and \`toTarget\`, prefer this structured shape when the page has several similar controls:
333
- \`{"label":"New preset","labelMatchMode":"exact","role":"button","tag":"button","href":null,"selector":"button:has-text('New preset')","selectorAlternates":["[role='menuitem']:has-text('New preset')"],"containerLabel":"New menu"}\`
333
+ \`{"label":"Open pricing","labelMatchMode":"exact","role":"button","tag":"button","href":null,"selector":"button:has-text('Open pricing')","selectorAlternates":["a:has-text('Pricing')","[role='menuitem']:has-text('Open pricing')"],"containerLabel":"Primary navigation"}\`
334
334
  Use \`coordinates\` only as a tie-breaker, never as the sole durable anchor when a label, href, role, or selector exists.
335
335
 
336
336
  For \`assert_page\`, use a JSON object like:
@@ -421,7 +421,7 @@ For \`key\` steps, Playwright key names are **case-sensitive**. Always use exact
421
421
 
422
422
  - This is a showcase video, not a QA trace. The viewer should only see intentional actions.
423
423
  - Use \`dismiss_overlays\` whenever a cookie banner, newsletter modal, consent wall, sticky feedback widget, or unrelated popup blocks the content.
424
- - Do NOT dismiss or hide the product's own chat/assistant/support widget if the user explicitly wants to demonstrate it.
424
+ - Do NOT dismiss or hide the site's own chat/support widget if the user explicitly wants to demonstrate it.
425
425
  - Prefer stable navigation elements. Use direct navigation only for initial landing or hidden preparation.
426
426
  - Use \`select_option\` for real dropdown controls instead of brittle click chains when possible.
427
427
  - Keep the flow concise. Do not wander through irrelevant UI or perform redundant clicks.
@@ -0,0 +1,126 @@
1
+ /**
2
+ * Capture Agent — WebPlaywrightLocal RuntimeAdapter
3
+ *
4
+ * Thin adapter delegating to the existing Browser class from src/browser.ts.
5
+ * This is the first (and for now only) RuntimeAdapter implementation.
6
+ */
7
+ import type { Browser } from './browser.js';
8
+ import type { AKTree, VideoPageSignals } from './types.js';
9
+ import type { RuntimeAdapter, ClickOptions, WaitCondition, RecordingOptions, RecordingResult } from './execution-types.js';
10
+ import { type ResolveOptions } from './semantic-resolver.js';
11
+ export declare class WebPlaywrightLocal implements RuntimeAdapter {
12
+ private browser;
13
+ private recordingDir?;
14
+ private readonly sessionStartedAt;
15
+ private recording;
16
+ private clipCursor;
17
+ constructor(browser: Browser, recordingDir?: string | undefined);
18
+ navigate(url: string): Promise<void>;
19
+ getCurrentUrl(): Promise<string>;
20
+ getAKTree(): Promise<AKTree>;
21
+ getPageSignals(): Promise<VideoPageSignals>;
22
+ click(selector: string, options?: ClickOptions): Promise<void>;
23
+ /**
24
+ * Click an element using semantic target resolution.
25
+ * Tries CSS selector first, falls back to Playwright semantic locators.
26
+ */
27
+ clickByTarget(opts: ResolveOptions): Promise<void>;
28
+ /**
29
+ * Type into an element using semantic target resolution.
30
+ */
31
+ typeByTarget(opts: ResolveOptions, text: string, clearFirst?: boolean): Promise<void>;
32
+ /**
33
+ * Wait for an element using semantic target resolution.
34
+ */
35
+ waitForTarget(opts: ResolveOptions, timeoutMs?: number): Promise<boolean>;
36
+ /**
37
+ * Scroll an element into view using semantic target resolution.
38
+ */
39
+ scrollIntoViewByTarget(opts: ResolveOptions): Promise<void>;
40
+ type(selector: string, text: string, clearFirst?: boolean): Promise<void>;
41
+ pressKey(key: string): Promise<void>;
42
+ scroll(direction: 'up' | 'down' | 'left' | 'right', amount?: number): Promise<void>;
43
+ scrollIntoView(selector: string): Promise<void>;
44
+ waitFor(condition: WaitCondition): Promise<boolean>;
45
+ dismissOverlays(): Promise<{
46
+ dismissed: boolean;
47
+ method: string | null;
48
+ }>;
49
+ takeScreenshot(): Promise<Buffer>;
50
+ takeElementScreenshot(selector: string): Promise<Buffer>;
51
+ takeCleanScreenshot(): Promise<Buffer>;
52
+ beginRecording(options: RecordingOptions): Promise<void>;
53
+ endRecording(): Promise<RecordingResult>;
54
+ setLocale(locale: string): Promise<void>;
55
+ setColorScheme(scheme: 'light' | 'dark'): Promise<void>;
56
+ reloadPage(): Promise<void>;
57
+ writeStorageHint(params: {
58
+ storage: 'localStorage' | 'sessionStorage' | 'cookie';
59
+ key: string;
60
+ value: string;
61
+ kind: 'locale' | 'theme';
62
+ }): Promise<boolean>;
63
+ hover(selector: string): Promise<void>;
64
+ hoverByTarget(opts: ResolveOptions): Promise<void>;
65
+ selectOption(selector: string, option: {
66
+ label?: string;
67
+ value?: string;
68
+ index?: number;
69
+ }): Promise<void>;
70
+ check(selector: string, checked: boolean): Promise<void>;
71
+ doubleClick(selector: string): Promise<void>;
72
+ cloneElement(opts: {
73
+ sourceSelector: string;
74
+ containerSelector: string;
75
+ count: number;
76
+ removeSource?: boolean;
77
+ }): Promise<{
78
+ clonedCount: number;
79
+ }>;
80
+ setAttribute(opts: {
81
+ selector: string;
82
+ attribute: string;
83
+ value: string;
84
+ }): Promise<void>;
85
+ setTextContent(opts: {
86
+ selector: string;
87
+ text: string;
88
+ }): Promise<void>;
89
+ removeElement(opts: {
90
+ selector: string;
91
+ }): Promise<{
92
+ removedCount: number;
93
+ }>;
94
+ setInputValue(opts: {
95
+ selector: string;
96
+ value: string;
97
+ }): Promise<void>;
98
+ clickHidden(opts: {
99
+ selector: string;
100
+ }): Promise<void>;
101
+ serializeDom(selector?: string): Promise<{
102
+ html: string;
103
+ assetUrls: string[];
104
+ viewport: {
105
+ width: number;
106
+ height: number;
107
+ };
108
+ capturedAt: string;
109
+ }>;
110
+ serializeFragment(selector: string): Promise<{
111
+ html: string;
112
+ assetUrls: string[];
113
+ capturedAt: string;
114
+ }>;
115
+ extractFavicon(): Promise<{
116
+ buffer: Buffer;
117
+ mimeType: string;
118
+ } | null>;
119
+ close(): Promise<void>;
120
+ private typeIntoLocator;
121
+ private seedClipCursor;
122
+ private moveClipCursorToViewportCenter;
123
+ private moveClipCursorToLocator;
124
+ private moveClipCursorToPoint;
125
+ private emitClipClickPulse;
126
+ }