autokap 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. package/assets/cursors/macos.svg +4 -0
  2. package/assets/cursors/windows.svg +15 -0
  3. package/assets/skill/OPCODE-REFERENCE.md +607 -0
  4. package/assets/skill/README.md +39 -0
  5. package/assets/skill/SKILL.md +453 -468
  6. package/assets/skill/STUDIO-SKILL.md +476 -0
  7. package/assets/skill/references/examples.md +104 -0
  8. package/assets/skill/references/interactive-demo.md +225 -0
  9. package/assets/skill/references/mock-data.md +178 -0
  10. package/dist/action-verifier.d.ts +29 -0
  11. package/dist/action-verifier.js +133 -0
  12. package/dist/agent-action-recovery.d.ts +45 -0
  13. package/dist/agent-action-recovery.js +370 -0
  14. package/dist/agent-message-utils.d.ts +21 -0
  15. package/dist/agent-message-utils.js +77 -0
  16. package/dist/agent-url-utils.d.ts +30 -0
  17. package/dist/agent-url-utils.js +138 -0
  18. package/dist/agent.d.ts +92 -8
  19. package/dist/agent.js +2936 -781
  20. package/dist/ak-tree.d.ts +39 -0
  21. package/dist/ak-tree.js +368 -0
  22. package/dist/alt-text.d.ts +26 -0
  23. package/dist/alt-text.js +55 -0
  24. package/dist/auth-capture.d.ts +17 -0
  25. package/dist/auth-capture.js +164 -0
  26. package/dist/benchmark.d.ts +59 -0
  27. package/dist/benchmark.js +135 -0
  28. package/dist/browser-bar.d.ts +14 -6
  29. package/dist/browser-bar.js +145 -8
  30. package/dist/browser-pool.d.ts +7 -0
  31. package/dist/browser-pool.js +15 -5
  32. package/dist/browser-utils.d.ts +31 -0
  33. package/dist/browser-utils.js +97 -0
  34. package/dist/browser.d.ts +51 -1
  35. package/dist/browser.js +1481 -31
  36. package/dist/capture-alt-text.js +2 -1
  37. package/dist/capture-language-preflight.js +14 -0
  38. package/dist/capture-llm-page-identity.js +22 -10
  39. package/dist/capture-page-identity.d.ts +5 -7
  40. package/dist/capture-page-identity.js +211 -78
  41. package/dist/capture-preset-credentials.d.ts +50 -0
  42. package/dist/capture-preset-credentials.js +127 -0
  43. package/dist/capture-request-plan.d.ts +2 -2
  44. package/dist/capture-request-plan.js +64 -16
  45. package/dist/capture-run-optimizer.js +48 -33
  46. package/dist/capture-selector-memory.d.ts +5 -0
  47. package/dist/capture-selector-memory.js +18 -0
  48. package/dist/capture-strategy.d.ts +36 -0
  49. package/dist/capture-strategy.js +95 -0
  50. package/dist/capture-studio-sync.d.ts +1 -0
  51. package/dist/capture-studio-sync.js +9 -3
  52. package/dist/capture-surface-contract.d.ts +36 -0
  53. package/dist/capture-surface-contract.js +299 -0
  54. package/dist/capture-transition-engine.d.ts +28 -0
  55. package/dist/capture-transition-engine.js +292 -0
  56. package/dist/capture-variant-state.d.ts +2 -0
  57. package/dist/capture-variant-state.js +26 -0
  58. package/dist/capture-verification.d.ts +35 -0
  59. package/dist/capture-verification.js +95 -0
  60. package/dist/capture-viewport-lock.d.ts +48 -0
  61. package/dist/capture-viewport-lock.js +74 -0
  62. package/dist/circuit-breaker.d.ts +42 -0
  63. package/dist/circuit-breaker.js +119 -0
  64. package/dist/cli-config.d.ts +8 -1
  65. package/dist/cli-config.js +62 -6
  66. package/dist/cli-contract.d.ts +15 -0
  67. package/dist/cli-contract.js +167 -0
  68. package/dist/cli-runner-local.d.ts +12 -0
  69. package/dist/cli-runner-local.js +102 -0
  70. package/dist/cli-runner.d.ts +34 -0
  71. package/dist/cli-runner.js +433 -0
  72. package/dist/cli-utils.d.ts +0 -1
  73. package/dist/cli-utils.js +2 -5
  74. package/dist/cli.js +1005 -267
  75. package/dist/clip-orchestrator.js +9 -2
  76. package/dist/clip-postprocess.js +25 -16
  77. package/dist/cookie-dismiss.d.ts +2 -0
  78. package/dist/cookie-dismiss.js +48 -13
  79. package/dist/cost-logging.d.ts +8 -0
  80. package/dist/cost-logging.js +160 -46
  81. package/dist/cost-resolution-monitor.d.ts +16 -0
  82. package/dist/cost-resolution-monitor.js +34 -0
  83. package/dist/credential-templates.js +2 -2
  84. package/dist/cursor-overlay-script.d.ts +6 -0
  85. package/dist/cursor-overlay-script.js +169 -0
  86. package/dist/dom-css-purger.d.ts +65 -0
  87. package/dist/dom-css-purger.js +333 -0
  88. package/dist/dom-font-inliner.d.ts +45 -0
  89. package/dist/dom-font-inliner.js +148 -0
  90. package/dist/dom-patch-resolver.d.ts +52 -0
  91. package/dist/dom-patch-resolver.js +242 -0
  92. package/dist/dom-serializer.d.ts +82 -0
  93. package/dist/dom-serializer.js +378 -0
  94. package/dist/element-capture.d.ts +1 -41
  95. package/dist/element-capture.js +202 -446
  96. package/dist/env-validation.d.ts +5 -0
  97. package/dist/env-validation.js +29 -0
  98. package/dist/execution-schema.d.ts +4423 -0
  99. package/dist/execution-schema.js +507 -0
  100. package/dist/execution-types.d.ts +886 -0
  101. package/dist/execution-types.js +65 -0
  102. package/dist/fonts-loader.d.ts +14 -0
  103. package/dist/fonts-loader.js +55 -0
  104. package/dist/hybrid-navigator.js +12 -12
  105. package/dist/index.d.ts +9 -6
  106. package/dist/index.js +10 -4
  107. package/dist/legacy/agent-action-recovery.d.ts +45 -0
  108. package/dist/legacy/agent-action-recovery.js +370 -0
  109. package/dist/legacy/agent-message-utils.d.ts +21 -0
  110. package/dist/legacy/agent-message-utils.js +77 -0
  111. package/dist/legacy/agent-url-utils.d.ts +30 -0
  112. package/dist/legacy/agent-url-utils.js +138 -0
  113. package/dist/legacy/agent.d.ts +226 -0
  114. package/dist/legacy/agent.js +6666 -0
  115. package/dist/legacy/clip-orchestrator.d.ts +148 -0
  116. package/dist/legacy/clip-orchestrator.js +957 -0
  117. package/dist/legacy/credential-templates.d.ts +5 -0
  118. package/dist/legacy/credential-templates.js +60 -0
  119. package/dist/legacy/hybrid-navigator.d.ts +138 -0
  120. package/dist/legacy/hybrid-navigator.js +468 -0
  121. package/dist/legacy/llm-usage.d.ts +17 -0
  122. package/dist/legacy/llm-usage.js +45 -0
  123. package/dist/legacy/prompt-cache.d.ts +10 -0
  124. package/dist/legacy/prompt-cache.js +24 -0
  125. package/dist/legacy/prompts.d.ts +175 -0
  126. package/dist/legacy/prompts.js +1038 -0
  127. package/dist/legacy/tools.d.ts +4 -0
  128. package/dist/legacy/tools.js +216 -0
  129. package/dist/legacy/video-agent.d.ts +143 -0
  130. package/dist/legacy/video-agent.js +4788 -0
  131. package/dist/legacy/video-observation.d.ts +36 -0
  132. package/dist/legacy/video-observation.js +192 -0
  133. package/dist/legacy/video-planner.d.ts +12 -0
  134. package/dist/legacy/video-planner.js +501 -0
  135. package/dist/legacy/video-prompts.d.ts +37 -0
  136. package/dist/legacy/video-prompts.js +569 -0
  137. package/dist/legacy/video-tools.d.ts +3 -0
  138. package/dist/legacy/video-tools.js +59 -0
  139. package/dist/legacy/video-variant-state.d.ts +29 -0
  140. package/dist/legacy/video-variant-state.js +80 -0
  141. package/dist/legacy/vision-model.d.ts +17 -0
  142. package/dist/legacy/vision-model.js +74 -0
  143. package/dist/llm-healer.d.ts +63 -0
  144. package/dist/llm-healer.js +166 -0
  145. package/dist/llm-provider.d.ts +29 -0
  146. package/dist/llm-provider.js +80 -0
  147. package/dist/logger.d.ts +6 -2
  148. package/dist/logger.js +15 -1
  149. package/dist/mockup-html.js +35 -25
  150. package/dist/mockup.d.ts +95 -2
  151. package/dist/mockup.js +427 -166
  152. package/dist/mouse-animation.d.ts +2 -2
  153. package/dist/mouse-animation.js +34 -20
  154. package/dist/opcode-actions.d.ts +42 -0
  155. package/dist/opcode-actions.js +511 -0
  156. package/dist/opcode-runner.d.ts +51 -0
  157. package/dist/opcode-runner.js +770 -0
  158. package/dist/openrouter-client.d.ts +40 -0
  159. package/dist/openrouter-client.js +16 -0
  160. package/dist/overlay-engine.d.ts +24 -0
  161. package/dist/overlay-engine.js +176 -0
  162. package/dist/postcondition.d.ts +16 -0
  163. package/dist/postcondition.js +269 -0
  164. package/dist/program-patcher.d.ts +25 -0
  165. package/dist/program-patcher.js +44 -0
  166. package/dist/prompts.d.ts +13 -5
  167. package/dist/prompts.js +224 -351
  168. package/dist/provider-config.d.ts +12 -0
  169. package/dist/provider-config.js +15 -0
  170. package/dist/recovery-chain.d.ts +37 -0
  171. package/dist/recovery-chain.js +350 -0
  172. package/dist/remote-browser.d.ts +28 -4
  173. package/dist/remote-browser.js +60 -5
  174. package/dist/safari-browser-bar.d.ts +15 -0
  175. package/dist/safari-browser-bar.js +95 -0
  176. package/dist/safari-toolbar-asset.d.ts +15 -0
  177. package/dist/safari-toolbar-asset.js +12 -0
  178. package/dist/security.d.ts +2 -1
  179. package/dist/security.js +49 -10
  180. package/dist/selector-resolver.d.ts +34 -0
  181. package/dist/selector-resolver.js +181 -0
  182. package/dist/semantic-resolver.d.ts +35 -0
  183. package/dist/semantic-resolver.js +161 -0
  184. package/dist/server-capture-runtime.d.ts +5 -3
  185. package/dist/server-capture-runtime.js +42 -95
  186. package/dist/server-credit-usage.d.ts +2 -2
  187. package/dist/server-project-webhooks.d.ts +15 -1
  188. package/dist/server-project-webhooks.js +34 -8
  189. package/dist/server-screenshot-watermark.js +27 -5
  190. package/dist/session-profile.js +164 -1
  191. package/dist/sf-pro-symbols.d.ts +1 -0
  192. package/dist/sf-pro-symbols.js +55 -0
  193. package/dist/skill-packaging.d.ts +28 -0
  194. package/dist/skill-packaging.js +169 -0
  195. package/dist/smart-wait.d.ts +27 -0
  196. package/dist/smart-wait.js +81 -0
  197. package/dist/status-bar-render.d.ts +20 -0
  198. package/dist/status-bar-render.js +410 -0
  199. package/dist/status-bar.d.ts +9 -0
  200. package/dist/status-bar.js +298 -14
  201. package/dist/svg-browser-bar.d.ts +33 -0
  202. package/dist/svg-browser-bar.js +206 -0
  203. package/dist/svg-status-bar.d.ts +36 -0
  204. package/dist/svg-status-bar.js +597 -0
  205. package/dist/svg-text.d.ts +61 -0
  206. package/dist/svg-text.js +118 -0
  207. package/dist/tools.js +89 -451
  208. package/dist/types.d.ts +240 -5
  209. package/dist/types.js +23 -1
  210. package/dist/v2/action-verifier.d.ts +29 -0
  211. package/dist/v2/action-verifier.js +133 -0
  212. package/dist/v2/alt-text.d.ts +26 -0
  213. package/dist/v2/alt-text.js +55 -0
  214. package/dist/v2/benchmark.d.ts +59 -0
  215. package/dist/v2/benchmark.js +135 -0
  216. package/dist/v2/capture-strategy.d.ts +30 -0
  217. package/dist/v2/capture-strategy.js +67 -0
  218. package/dist/v2/capture-verification.d.ts +35 -0
  219. package/dist/v2/capture-verification.js +95 -0
  220. package/dist/v2/circuit-breaker.d.ts +42 -0
  221. package/dist/v2/circuit-breaker.js +119 -0
  222. package/dist/v2/cli-runner-local.d.ts +11 -0
  223. package/dist/v2/cli-runner-local.js +91 -0
  224. package/dist/v2/cli-runner.d.ts +34 -0
  225. package/dist/v2/cli-runner.js +300 -0
  226. package/dist/v2/compiler-prompts.d.ts +27 -0
  227. package/dist/v2/compiler-prompts.js +123 -0
  228. package/dist/v2/compiler.d.ts +37 -0
  229. package/dist/v2/compiler.js +147 -0
  230. package/dist/v2/explorer.d.ts +41 -0
  231. package/dist/v2/explorer.js +56 -0
  232. package/dist/v2/index.d.ts +37 -0
  233. package/dist/v2/index.js +31 -0
  234. package/dist/v2/llm-healer.d.ts +62 -0
  235. package/dist/v2/llm-healer.js +166 -0
  236. package/dist/v2/llm-provider.d.ts +29 -0
  237. package/dist/v2/llm-provider.js +80 -0
  238. package/dist/v2/opcode-runner.d.ts +47 -0
  239. package/dist/v2/opcode-runner.js +634 -0
  240. package/dist/v2/overlay-engine.d.ts +24 -0
  241. package/dist/v2/overlay-engine.js +150 -0
  242. package/dist/v2/postcondition.d.ts +16 -0
  243. package/dist/v2/postcondition.js +249 -0
  244. package/dist/v2/program-patcher.d.ts +25 -0
  245. package/dist/v2/program-patcher.js +44 -0
  246. package/dist/v2/recovery-chain.d.ts +30 -0
  247. package/dist/v2/recovery-chain.js +368 -0
  248. package/dist/v2/schema.d.ts +2580 -0
  249. package/dist/v2/schema.js +295 -0
  250. package/dist/v2/selector-resolver.d.ts +34 -0
  251. package/dist/v2/selector-resolver.js +181 -0
  252. package/dist/v2/semantic-resolver.d.ts +35 -0
  253. package/dist/v2/semantic-resolver.js +161 -0
  254. package/dist/v2/smart-wait.d.ts +27 -0
  255. package/dist/v2/smart-wait.js +81 -0
  256. package/dist/v2/types.d.ts +444 -0
  257. package/dist/v2/types.js +19 -0
  258. package/dist/v2/web-playwright-local.d.ts +69 -0
  259. package/dist/v2/web-playwright-local.js +392 -0
  260. package/dist/version.d.ts +1 -0
  261. package/dist/version.js +5 -0
  262. package/dist/video-agent.js +18 -13
  263. package/dist/video-planner.js +2 -1
  264. package/dist/video-prompts.js +3 -3
  265. package/dist/web-playwright-local.d.ts +126 -0
  266. package/dist/web-playwright-local.js +819 -0
  267. package/dist/ws-auth.js +4 -1
  268. package/dist/ws-broadcast.d.ts +34 -0
  269. package/dist/ws-broadcast.js +85 -0
  270. package/dist/ws-connection-limits.d.ts +12 -0
  271. package/dist/ws-connection-limits.js +44 -0
  272. package/dist/ws-handler-utils.d.ts +32 -0
  273. package/dist/ws-handler-utils.js +139 -0
  274. package/dist/ws-handler.js +294 -164
  275. package/dist/ws-metrics-server.d.ts +9 -0
  276. package/dist/ws-metrics-server.js +31 -0
  277. package/dist/ws-server.js +41 -1
  278. package/package.json +51 -34
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Type-safe OpenRouter extensions for the OpenAI SDK.
3
+ *
4
+ * OpenRouter extends the standard chat completion API with fields like `provider`
5
+ * and `reasoning` that the official OpenAI types don't include. This module provides
6
+ * augmented types to avoid `as any` casts across the codebase.
7
+ */
8
+ import type OpenAI from 'openai';
9
+ import type { ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions';
10
+ /** OpenRouter provider routing preferences. */
11
+ export interface OpenRouterProvider {
12
+ /** Zero Data Retention mode. */
13
+ zdr?: boolean;
14
+ /** Allow fallback providers. */
15
+ allow_fallbacks?: boolean;
16
+ /** Required providers (whitelist). */
17
+ require_parameters?: boolean;
18
+ /** Provider order preference. */
19
+ order?: string[];
20
+ /** Quantization preference. */
21
+ quantizations?: string[];
22
+ /** Additional provider preferences (model-specific). */
23
+ [key: string]: unknown;
24
+ }
25
+ /** OpenRouter reasoning configuration for extended thinking models. */
26
+ export interface OpenRouterReasoning {
27
+ effort: 'none' | 'low' | 'medium' | 'high' | 'xhigh';
28
+ }
29
+ /** Chat completion params extended with OpenRouter-specific fields. */
30
+ export type OpenRouterChatCompletionParams = ChatCompletionCreateParamsNonStreaming & {
31
+ provider?: OpenRouterProvider;
32
+ reasoning?: OpenRouterReasoning;
33
+ };
34
+ /**
35
+ * Create a chat completion with OpenRouter-specific fields.
36
+ * Wraps `client.chat.completions.create()` with proper typing.
37
+ */
38
+ export declare function createOpenRouterCompletion(client: OpenAI, params: OpenRouterChatCompletionParams, options?: {
39
+ signal?: AbortSignal;
40
+ }): Promise<OpenAI.Chat.Completions.ChatCompletion>;
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Type-safe OpenRouter extensions for the OpenAI SDK.
3
+ *
4
+ * OpenRouter extends the standard chat completion API with fields like `provider`
5
+ * and `reasoning` that the official OpenAI types don't include. This module provides
6
+ * augmented types to avoid `as any` casts across the codebase.
7
+ */
8
+ /**
9
+ * Create a chat completion with OpenRouter-specific fields.
10
+ * Wraps `client.chat.completions.create()` with proper typing.
11
+ */
12
+ export function createOpenRouterCompletion(client, params, options) {
13
+ // OpenRouter accepts these extra fields; we cast once here instead of 16 times
14
+ return client.chat.completions.create(params, options);
15
+ }
16
+ //# sourceMappingURL=openrouter-client.js.map
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Capture Agent — Overlay Engine
3
+ *
4
+ * Consolidated deterministic overlay handling.
5
+ * Wraps and extends the existing cookie-dismiss module with
6
+ * additional patterns for newsletter popups, chat widgets, and age gates.
7
+ *
8
+ * This module is used by the DISMISS_OVERLAYS opcode.
9
+ */
10
+ import type { RuntimeAdapter } from './execution-types.js';
11
+ export interface OverlayDismissResult {
12
+ dismissed: boolean;
13
+ methods: string[];
14
+ overlaysFound: number;
15
+ overlaysRemaining: number;
16
+ }
17
+ /**
18
+ * Multi-pass overlay dismissal.
19
+ * 1. Delegate to the adapter's built-in dismissOverlays (cookie-dismiss.ts)
20
+ * 2. Check AKTree for remaining blocking overlays
21
+ * 3. Try additional heuristics for newsletter/chat/age gate
22
+ * 4. Final AKTree check
23
+ */
24
+ export declare function dismissAllOverlays(adapter: RuntimeAdapter): Promise<OverlayDismissResult>;
@@ -0,0 +1,176 @@
1
+ /**
2
+ * Capture Agent — Overlay Engine
3
+ *
4
+ * Consolidated deterministic overlay handling.
5
+ * Wraps and extends the existing cookie-dismiss module with
6
+ * additional patterns for newsletter popups, chat widgets, and age gates.
7
+ *
8
+ * This module is used by the DISMISS_OVERLAYS opcode.
9
+ */
10
+ /**
11
+ * Multi-pass overlay dismissal.
12
+ * 1. Delegate to the adapter's built-in dismissOverlays (cookie-dismiss.ts)
13
+ * 2. Check AKTree for remaining blocking overlays
14
+ * 3. Try additional heuristics for newsletter/chat/age gate
15
+ * 4. Final AKTree check
16
+ */
17
+ export async function dismissAllOverlays(adapter) {
18
+ const methods = [];
19
+ let totalFound = 0;
20
+ // Pass 1: Built-in cookie/widget dismissal
21
+ const cookieResult = await adapter.dismissOverlays();
22
+ if (cookieResult.dismissed) {
23
+ methods.push(cookieResult.method ?? 'cookie-dismiss');
24
+ }
25
+ // Check remaining overlays
26
+ let snapshot = await readOverlaySnapshot(adapter);
27
+ if (!snapshot) {
28
+ return {
29
+ dismissed: methods.length > 0,
30
+ methods,
31
+ overlaysFound: 0,
32
+ overlaysRemaining: 0,
33
+ };
34
+ }
35
+ let tree = snapshot.tree;
36
+ let blocking = snapshot.blocking;
37
+ totalFound = snapshot.totalFound;
38
+ if (blocking.length === 0) {
39
+ return { dismissed: methods.length > 0, methods, overlaysFound: totalFound, overlaysRemaining: 0 };
40
+ }
41
+ // Pass 2: Try Escape key to dismiss modals/popups
42
+ try {
43
+ await adapter.pressKey('Escape');
44
+ await sleep(300);
45
+ snapshot = await readOverlaySnapshot(adapter);
46
+ if (snapshot && snapshot.blocking.length < blocking.length) {
47
+ methods.push('escape-key');
48
+ }
49
+ if (snapshot && snapshot.blocking.length === 0) {
50
+ return { dismissed: true, methods, overlaysFound: totalFound, overlaysRemaining: 0 };
51
+ }
52
+ if (snapshot) {
53
+ tree = snapshot.tree;
54
+ blocking = snapshot.blocking;
55
+ }
56
+ }
57
+ catch {
58
+ // Non-fatal
59
+ }
60
+ // Pass 3: Try clicking common close patterns in AKTree
61
+ const closePatterns = [
62
+ 'close', 'dismiss', 'fermer', 'schließen', 'cerrar', 'chiudi',
63
+ 'no thanks', 'non merci', 'maybe later', 'not now', 'skip',
64
+ ];
65
+ for (const overlay of tree.overlays) {
66
+ if (!overlay.blocksInteraction)
67
+ continue;
68
+ // Look for close buttons within the overlay's subtree
69
+ const closeNode = findCloseButton(tree, overlay.nodeId, closePatterns);
70
+ if (closeNode) {
71
+ try {
72
+ // Build selector from the close node's sourceRef
73
+ const selector = buildSelectorFromSourceRef(closeNode.sourceRef);
74
+ if (selector) {
75
+ await adapter.click(selector);
76
+ methods.push(`close-button:${selector}`);
77
+ await sleep(300);
78
+ }
79
+ }
80
+ catch {
81
+ // Continue to next overlay
82
+ }
83
+ }
84
+ }
85
+ // Final check
86
+ snapshot = await readOverlaySnapshot(adapter);
87
+ const finalBlocking = snapshot?.blocking ?? [];
88
+ return {
89
+ dismissed: methods.length > 0,
90
+ methods,
91
+ overlaysFound: totalFound,
92
+ overlaysRemaining: finalBlocking.length,
93
+ };
94
+ }
95
+ async function readOverlaySnapshot(adapter) {
96
+ try {
97
+ const tree = await adapter.getAKTree();
98
+ const blocking = tree.overlays.filter(o => o.blocksInteraction);
99
+ return {
100
+ tree,
101
+ blocking,
102
+ totalFound: tree.overlays.length,
103
+ };
104
+ }
105
+ catch {
106
+ return null;
107
+ }
108
+ }
109
+ function findCloseButton(tree, overlayNodeId, patterns) {
110
+ // Find the overlay node first
111
+ const overlayNode = findNodeById(tree.root, overlayNodeId);
112
+ if (!overlayNode)
113
+ return null;
114
+ // Search for interactive close-like buttons within this subtree
115
+ let best = null;
116
+ function walk(node) {
117
+ if (node.visible && node.interactive) {
118
+ const label = node.label.toLowerCase();
119
+ const ref = node.sourceRef.toLowerCase();
120
+ const combined = label + ' ' + ref;
121
+ for (const pattern of patterns) {
122
+ if (combined.includes(pattern)) {
123
+ // Prefer buttons over links, and shorter labels over longer ones
124
+ if (!best || (node.type === 'button' && best.type !== 'button') || node.label.length < best.label.length) {
125
+ best = node;
126
+ }
127
+ break;
128
+ }
129
+ }
130
+ // Also match aria-label="close" or title="close" in sourceRef
131
+ if (ref.includes('aria-label="close"') || ref.includes('title="close"') || ref.includes('class="close"')) {
132
+ if (!best)
133
+ best = node;
134
+ }
135
+ }
136
+ node.children.forEach(walk);
137
+ }
138
+ walk(overlayNode);
139
+ return best;
140
+ }
141
+ function findNodeById(node, id) {
142
+ if (node.id === id)
143
+ return node;
144
+ for (const child of node.children) {
145
+ const found = findNodeById(child, id);
146
+ if (found)
147
+ return found;
148
+ }
149
+ return null;
150
+ }
151
+ function buildSelectorFromSourceRef(sourceRef) {
152
+ // Extract data-testid
153
+ const testIdMatch = sourceRef.match(/data-testid="([^"]+)"/);
154
+ if (testIdMatch)
155
+ return `[data-testid="${testIdMatch[1]}"]`;
156
+ // Extract id
157
+ const idMatch = sourceRef.match(/ id="([^"]+)"/);
158
+ if (idMatch)
159
+ return `#${idMatch[1]}`;
160
+ // Extract aria-label
161
+ const ariaMatch = sourceRef.match(/aria-label="([^"]+)"/);
162
+ if (ariaMatch)
163
+ return `[aria-label="${ariaMatch[1]}"]`;
164
+ // Extract class-based close button
165
+ const classMatch = sourceRef.match(/class="([^"]*close[^"]*)"/);
166
+ if (classMatch) {
167
+ const tagMatch = sourceRef.match(/^<(\w+)/);
168
+ const tag = tagMatch?.[1] ?? 'button';
169
+ return `${tag}.${classMatch[1].split(/\s+/).find(c => c.includes('close'))}`;
170
+ }
171
+ return null;
172
+ }
173
+ function sleep(ms) {
174
+ return new Promise(resolve => setTimeout(resolve, ms));
175
+ }
176
+ //# sourceMappingURL=overlay-engine.js.map
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Capture Agent — Postcondition Evaluator
3
+ *
4
+ * Deterministic evaluation of postconditions after each opcode.
5
+ * No LLM calls — purely structural checks against AKTree, URL, and screenshots.
6
+ */
7
+ import type { RuntimeAdapter, PostconditionSpec } from './execution-types.js';
8
+ /**
9
+ * Evaluates whether a postcondition holds.
10
+ * Retries internally up to postcondition.waitMs (polling).
11
+ * Returns true if the condition is satisfied, false otherwise.
12
+ */
13
+ export declare function evaluatePostcondition(adapter: RuntimeAdapter, spec: PostconditionSpec): Promise<{
14
+ passed: boolean;
15
+ reason: string;
16
+ }>;
@@ -0,0 +1,269 @@
1
+ /**
2
+ * Capture Agent — Postcondition Evaluator
3
+ *
4
+ * Deterministic evaluation of postconditions after each opcode.
5
+ * No LLM calls — purely structural checks against AKTree, URL, and screenshots.
6
+ */
7
+ import { serializeAKTree } from './ak-tree.js';
8
+ /**
9
+ * Evaluates whether a postcondition holds.
10
+ * Retries internally up to postcondition.waitMs (polling).
11
+ * Returns true if the condition is satisfied, false otherwise.
12
+ */
13
+ export async function evaluatePostcondition(adapter, spec) {
14
+ const maxWait = spec.waitMs ?? 5000;
15
+ const pollInterval = 500;
16
+ const deadline = Date.now() + maxWait;
17
+ // 'always' postcondition always passes immediately
18
+ if (spec.type === 'always') {
19
+ return { passed: true, reason: 'always passes' };
20
+ }
21
+ while (Date.now() < deadline) {
22
+ const result = await checkOnce(adapter, spec);
23
+ if (result.passed)
24
+ return result;
25
+ const remaining = deadline - Date.now();
26
+ if (remaining <= 0)
27
+ break;
28
+ await sleep(Math.min(pollInterval, remaining));
29
+ }
30
+ // Final check after timeout
31
+ return checkOnce(adapter, spec);
32
+ }
33
+ async function checkOnce(adapter, spec) {
34
+ switch (spec.type) {
35
+ case 'route_matches':
36
+ return checkRouteMatches(adapter, spec.pattern);
37
+ case 'element_visible':
38
+ return checkElementVisible(adapter, spec.selector);
39
+ case 'element_absent':
40
+ return checkElementAbsent(adapter, spec.selector);
41
+ case 'text_contains':
42
+ return checkTextContains(adapter, spec.selector, spec.text);
43
+ case 'overlay_dismissed':
44
+ return checkOverlayDismissed(adapter);
45
+ case 'screenshot_stable':
46
+ return checkScreenshotStable(adapter, spec.threshold ?? 0.01);
47
+ case 'any_change':
48
+ // 'any_change' is a soft postcondition — we just assume the action did something.
49
+ // The action-verifier handles real change detection via AKTree diff.
50
+ return { passed: true, reason: 'any_change always passes (action verifier handles real detection)' };
51
+ case 'always':
52
+ return { passed: true, reason: 'always passes' };
53
+ default:
54
+ return { passed: false, reason: `unknown postcondition type: ${spec.type}` };
55
+ }
56
+ }
57
+ // ── Individual checks ───────────────────────────────────────────────
58
+ async function checkRouteMatches(adapter, pattern) {
59
+ const url = await adapter.getCurrentUrl();
60
+ try {
61
+ const { pathname, search } = new URL(url);
62
+ const fullPath = pathname + search;
63
+ // Support glob-like patterns: ** matches anything (incl. slashes / empty),
64
+ // * matches a single path segment, ? matches one non-slash char.
65
+ // Tokenize in one pass so the `*` rewrite doesn't clobber the `*` produced
66
+ // by the `**` rewrite (e.g. `/home**` must compile to `^/home.*$`, not
67
+ // `^/home.[^/]*$` which would reject `/home` itself).
68
+ let regexStr = '';
69
+ for (let i = 0; i < pattern.length; i++) {
70
+ const ch = pattern[i];
71
+ if (ch === '*' && pattern[i + 1] === '*') {
72
+ regexStr += '.*';
73
+ i++;
74
+ }
75
+ else if (ch === '*') {
76
+ regexStr += '[^/]*';
77
+ }
78
+ else if (ch === '?') {
79
+ regexStr += '[^/]';
80
+ }
81
+ else if (/[.+^${}()|[\]\\]/.test(ch)) {
82
+ regexStr += `\\${ch}`;
83
+ }
84
+ else {
85
+ regexStr += ch;
86
+ }
87
+ }
88
+ const regex = new RegExp(`^${regexStr}$`);
89
+ if (regex.test(fullPath) || regex.test(pathname)) {
90
+ return { passed: true, reason: `URL "${fullPath}" matches pattern "${pattern}"` };
91
+ }
92
+ return { passed: false, reason: `URL "${fullPath}" does not match pattern "${pattern}"` };
93
+ }
94
+ catch {
95
+ return { passed: false, reason: `invalid URL "${url}" or pattern "${pattern}"` };
96
+ }
97
+ }
98
+ async function checkElementVisible(adapter, selector) {
99
+ // Primary check: use Playwright waitFor (fast, reliable)
100
+ try {
101
+ const found = await adapter.waitFor({ selector, state: 'visible', timeoutMs: 2000 });
102
+ if (found) {
103
+ return { passed: true, reason: `element "${selector}" is visible (Playwright)` };
104
+ }
105
+ }
106
+ catch {
107
+ // Fall through to AKTree check
108
+ }
109
+ // Fallback: check AKTree
110
+ try {
111
+ const tree = await adapter.getAKTree();
112
+ if (hasVisibleNodeWithSelector(tree, selector)) {
113
+ return { passed: true, reason: `element "${selector}" is visible in AKTree` };
114
+ }
115
+ const serialized = serializeAKTree(tree);
116
+ if (serialized.includes(selector.replace(/[[\]"]/g, ''))) {
117
+ return { passed: true, reason: `element pattern "${selector}" found in serialized AKTree` };
118
+ }
119
+ return { passed: false, reason: `element "${selector}" not visible` };
120
+ }
121
+ catch {
122
+ return { passed: false, reason: `element "${selector}" not verifiable (AKTree unavailable)` };
123
+ }
124
+ }
125
+ async function checkElementAbsent(adapter, selector) {
126
+ // Use Playwright: if waitFor fails (element not found), it's absent = good
127
+ try {
128
+ const found = await adapter.waitFor({ selector, state: 'visible', timeoutMs: 1000 });
129
+ if (found) {
130
+ return { passed: false, reason: `element "${selector}" is still visible` };
131
+ }
132
+ return { passed: true, reason: `element "${selector}" is absent` };
133
+ }
134
+ catch {
135
+ return { passed: true, reason: `element "${selector}" is absent` };
136
+ }
137
+ }
138
+ async function checkTextContains(adapter, selector, expectedText) {
139
+ try {
140
+ const tree = await adapter.getAKTree();
141
+ const node = findNodeBySelector(tree, selector);
142
+ if (!node) {
143
+ return { passed: false, reason: `element "${selector}" not found for text check` };
144
+ }
145
+ const nodeText = (node.label || '') + (node.value || '');
146
+ if (nodeText.includes(expectedText)) {
147
+ return { passed: true, reason: `element "${selector}" contains "${expectedText}"` };
148
+ }
149
+ return { passed: false, reason: `element "${selector}" text "${nodeText}" does not contain "${expectedText}"` };
150
+ }
151
+ catch (err) {
152
+ return { passed: false, reason: `error checking text: ${err}` };
153
+ }
154
+ }
155
+ async function checkOverlayDismissed(adapter) {
156
+ try {
157
+ const tree = await adapter.getAKTree();
158
+ // Check if any overlays are reported in the tree
159
+ if (tree.overlays.length === 0) {
160
+ return { passed: true, reason: 'no overlays detected' };
161
+ }
162
+ // Check if remaining overlays are blocking
163
+ const blocking = tree.overlays.filter(o => o.blocksInteraction);
164
+ if (blocking.length === 0) {
165
+ return { passed: true, reason: 'overlays present but none blocking interaction' };
166
+ }
167
+ return { passed: false, reason: `${blocking.length} blocking overlay(s) still present` };
168
+ }
169
+ catch {
170
+ // If AKTree is unavailable (e.g. page.evaluate failure), assume overlays are dismissed.
171
+ // The overlay dismissal itself ran; we just can't verify via AKTree.
172
+ return { passed: true, reason: 'overlay check skipped (AKTree unavailable), assuming dismissed' };
173
+ }
174
+ }
175
+ let lastScreenshotHash = null;
176
+ async function checkScreenshotStable(adapter, threshold) {
177
+ try {
178
+ const screenshot = await adapter.takeScreenshot();
179
+ const currentHash = simpleHash(screenshot);
180
+ if (lastScreenshotHash === null) {
181
+ lastScreenshotHash = currentHash;
182
+ // Wait and take another screenshot
183
+ await sleep(500);
184
+ const screenshot2 = await adapter.takeScreenshot();
185
+ const hash2 = simpleHash(screenshot2);
186
+ lastScreenshotHash = null;
187
+ if (currentHash === hash2) {
188
+ return { passed: true, reason: 'consecutive screenshots are identical' };
189
+ }
190
+ return { passed: false, reason: 'consecutive screenshots differ (page still changing)' };
191
+ }
192
+ // Compare with previous
193
+ if (currentHash === lastScreenshotHash) {
194
+ lastScreenshotHash = null;
195
+ return { passed: true, reason: 'screenshot matches previous' };
196
+ }
197
+ lastScreenshotHash = currentHash;
198
+ return { passed: false, reason: 'screenshot changed from previous check' };
199
+ }
200
+ catch (err) {
201
+ lastScreenshotHash = null;
202
+ return { passed: false, reason: `error checking screenshot stability: ${err}` };
203
+ }
204
+ }
205
+ // ── Helpers ─────────────────────────────────────────────────────────
206
+ function hasVisibleNodeWithSelector(tree, selector) {
207
+ // Walk the AKTree looking for a visible node whose sourceRef matches the selector
208
+ function walk(node) {
209
+ if (node.visible && node.sourceRef && matchesSelectorHeuristic(node.sourceRef, selector)) {
210
+ return true;
211
+ }
212
+ return node.children.some(walk);
213
+ }
214
+ return walk(tree.root);
215
+ }
216
+ function findNodeBySelector(tree, selector) {
217
+ function walk(node) {
218
+ if (node.sourceRef && matchesSelectorHeuristic(node.sourceRef, selector)) {
219
+ return node;
220
+ }
221
+ for (const child of node.children) {
222
+ const found = walk(child);
223
+ if (found)
224
+ return found;
225
+ }
226
+ return null;
227
+ }
228
+ return walk(tree.root);
229
+ }
230
+ /**
231
+ * Heuristic match between an AKTree sourceRef and a CSS-like selector.
232
+ * Not a full CSS selector engine — handles common patterns:
233
+ * - Tag name: "h1", "button"
234
+ * - ID: "#my-id"
235
+ * - data-testid: [data-testid="x"]
236
+ * - Class: ".my-class"
237
+ */
238
+ function matchesSelectorHeuristic(sourceRef, selector) {
239
+ const lower = sourceRef.toLowerCase();
240
+ const selectorLower = selector.toLowerCase();
241
+ // data-testid match
242
+ const testIdMatch = selector.match(/\[data-testid=["'](.+?)["']\]/);
243
+ if (testIdMatch) {
244
+ return lower.includes(`data-testid="${testIdMatch[1]}"`);
245
+ }
246
+ // ID match
247
+ if (selector.startsWith('#')) {
248
+ return lower.includes(`id="${selector.slice(1)}"`) || lower.includes(`#${selector.slice(1)}`);
249
+ }
250
+ // Tag name match
251
+ if (/^[a-z][a-z0-9]*$/i.test(selector)) {
252
+ return lower.startsWith(`<${selectorLower}`) || lower.includes(`<${selectorLower} `);
253
+ }
254
+ // Fallback: contains
255
+ return lower.includes(selectorLower);
256
+ }
257
+ function simpleHash(buffer) {
258
+ // Fast non-crypto hash for screenshot comparison
259
+ let hash = 0;
260
+ const step = Math.max(1, Math.floor(buffer.length / 10000));
261
+ for (let i = 0; i < buffer.length; i += step) {
262
+ hash = ((hash << 5) - hash + buffer[i]) | 0;
263
+ }
264
+ return hash.toString(36);
265
+ }
266
+ function sleep(ms) {
267
+ return new Promise(resolve => setTimeout(resolve, ms));
268
+ }
269
+ //# sourceMappingURL=postcondition.js.map
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Capture Agent — Program Patcher
3
+ *
4
+ * Applies healer patches to compiled programs.
5
+ * Patches are applied in-memory during the run, and propagated to
6
+ * the server ONLY after a fully successful run.
7
+ */
8
+ import type { ExecutionProgram, HealerPatch } from './execution-types.js';
9
+ /**
10
+ * Applies a healer patch to a program in-place.
11
+ * The original opcode at the given index is replaced by the healer's output.
12
+ * If the healer produced multiple opcodes, subsequent steps are shifted.
13
+ *
14
+ * Returns the modified program (same reference, mutated).
15
+ */
16
+ export declare function applyPatch(program: ExecutionProgram, patch: HealerPatch): ExecutionProgram;
17
+ /**
18
+ * Applies all patches from a successful run to a program.
19
+ * Patches are applied in reverse index order to avoid shifting issues.
20
+ */
21
+ export declare function applyAllPatches(program: ExecutionProgram, patches: HealerPatch[]): ExecutionProgram;
22
+ /**
23
+ * Creates a deep clone of a program for safe patching.
24
+ */
25
+ export declare function cloneProgram(program: ExecutionProgram): ExecutionProgram;
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Capture Agent — Program Patcher
3
+ *
4
+ * Applies healer patches to compiled programs.
5
+ * Patches are applied in-memory during the run, and propagated to
6
+ * the server ONLY after a fully successful run.
7
+ */
8
+ /**
9
+ * Applies a healer patch to a program in-place.
10
+ * The original opcode at the given index is replaced by the healer's output.
11
+ * If the healer produced multiple opcodes, subsequent steps are shifted.
12
+ *
13
+ * Returns the modified program (same reference, mutated).
14
+ */
15
+ export function applyPatch(program, patch) {
16
+ const { opcodeIndex, replacementOpcodes } = patch;
17
+ if (opcodeIndex < 0 || opcodeIndex >= program.steps.length) {
18
+ throw new Error(`patch index ${opcodeIndex} out of bounds (program has ${program.steps.length} steps)`);
19
+ }
20
+ // Replace the failed opcode with the replacement(s)
21
+ program.steps.splice(opcodeIndex, 1, ...replacementOpcodes);
22
+ // Bump version
23
+ program.programVersion++;
24
+ return program;
25
+ }
26
+ /**
27
+ * Applies all patches from a successful run to a program.
28
+ * Patches are applied in reverse index order to avoid shifting issues.
29
+ */
30
+ export function applyAllPatches(program, patches) {
31
+ // Sort patches by index descending to apply from end to start
32
+ const sorted = [...patches].sort((a, b) => b.opcodeIndex - a.opcodeIndex);
33
+ for (const patch of sorted) {
34
+ applyPatch(program, patch);
35
+ }
36
+ return program;
37
+ }
38
+ /**
39
+ * Creates a deep clone of a program for safe patching.
40
+ */
41
+ export function cloneProgram(program) {
42
+ return JSON.parse(JSON.stringify(program));
43
+ }
44
+ //# sourceMappingURL=program-patcher.js.map
package/dist/prompts.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import type { ChatCompletionContentPart } from 'openai/resources/chat/completions';
2
- import type { AgentConfig, AgentRunHint, CaptureCursor, CaptureObjective, CaptureHandoffContext, ExecutedAction, InteractiveElement, LoginCredentials, RepairTicket, VariantCaptureManifest, ValidatedSessionProfile, WorkflowScreenshot } from './types.js';
2
+ import type { AgentConfig, AgentRunHint, CaptureCursor, CaptureObjective, CaptureHandoffContext, ExecutedAction, InteractiveElement, LoginCredentials, OverlayScopeSummary, RepairTicket, VariantCaptureManifest, ValidatedSessionProfile, WorkflowScreenshot } from './types.js';
3
3
  interface SystemPromptOptions {
4
4
  reasoningLocale?: string;
5
5
  }
@@ -29,6 +29,7 @@ export interface StableAnchorUserMessageParams {
29
29
  }
30
30
  export interface IterationUserMessageParams {
31
31
  userPrompt: string;
32
+ serializedAKTree: string;
32
33
  cleanScreenshotUrl?: string;
33
34
  screenshotUrl: string;
34
35
  /** In dual-model mode, the vision model's text observation replaces images. */
@@ -107,6 +108,10 @@ export interface IterationUserMessageParams {
107
108
  };
108
109
  /** Compact summary of recent failed actions — prevents the agent from repeating dead-end strategies. */
109
110
  failedAttemptsSummary?: string;
111
+ /** When true, the browser is already on the target URL — the agent should check and capture, not navigate. */
112
+ alreadyOnTarget?: boolean;
113
+ promptFingerprint?: string;
114
+ overlayScope?: OverlayScopeSummary | null;
110
115
  }
111
116
  export declare function buildStableAnchorUserMessage(params: StableAnchorUserMessageParams): {
112
117
  content: ChatCompletionContentPart[];
@@ -140,6 +145,7 @@ export declare function buildVisionObserverPrompt(params: {
140
145
  currentUrl: string;
141
146
  interactiveElements: InteractiveElement[];
142
147
  userGoal: string;
148
+ question?: string;
143
149
  currentLang?: string;
144
150
  currentTheme?: 'light' | 'dark';
145
151
  currentPageId?: string;
@@ -150,9 +156,7 @@ export declare function buildElementSystemPrompt(description: string): string;
150
156
  export declare function buildElementIterationMessage(params: {
151
157
  elementName: string;
152
158
  elementDescription: string;
153
- accessibilityTree: string;
154
- interactiveElements: InteractiveElement[];
155
- simplifiedDOM?: string;
159
+ serializedAKTree: string;
156
160
  currentUrl: string;
157
161
  iteration: number;
158
162
  maxIterations: number;
@@ -161,7 +165,11 @@ export declare function buildElementIterationMessage(params: {
161
165
  width: number;
162
166
  height: number;
163
167
  };
164
- forbiddenSearchQueries?: string[];
168
+ scrollInfo?: {
169
+ scrollY: number;
170
+ scrollHeight: number;
171
+ viewportHeight: number;
172
+ };
165
173
  screenshotUrl?: string;
166
174
  }): ChatCompletionContentPart[];
167
175
  export {};