autokap 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. package/assets/cursors/macos.svg +4 -0
  2. package/assets/cursors/windows.svg +15 -0
  3. package/assets/skill/OPCODE-REFERENCE.md +607 -0
  4. package/assets/skill/README.md +39 -0
  5. package/assets/skill/SKILL.md +453 -468
  6. package/assets/skill/STUDIO-SKILL.md +476 -0
  7. package/assets/skill/references/examples.md +104 -0
  8. package/assets/skill/references/interactive-demo.md +225 -0
  9. package/assets/skill/references/mock-data.md +178 -0
  10. package/dist/action-verifier.d.ts +29 -0
  11. package/dist/action-verifier.js +133 -0
  12. package/dist/agent-action-recovery.d.ts +45 -0
  13. package/dist/agent-action-recovery.js +370 -0
  14. package/dist/agent-message-utils.d.ts +21 -0
  15. package/dist/agent-message-utils.js +77 -0
  16. package/dist/agent-url-utils.d.ts +30 -0
  17. package/dist/agent-url-utils.js +138 -0
  18. package/dist/agent.d.ts +92 -8
  19. package/dist/agent.js +2936 -781
  20. package/dist/ak-tree.d.ts +39 -0
  21. package/dist/ak-tree.js +368 -0
  22. package/dist/alt-text.d.ts +26 -0
  23. package/dist/alt-text.js +55 -0
  24. package/dist/auth-capture.d.ts +17 -0
  25. package/dist/auth-capture.js +164 -0
  26. package/dist/benchmark.d.ts +59 -0
  27. package/dist/benchmark.js +135 -0
  28. package/dist/browser-bar.d.ts +14 -6
  29. package/dist/browser-bar.js +145 -8
  30. package/dist/browser-pool.d.ts +7 -0
  31. package/dist/browser-pool.js +15 -5
  32. package/dist/browser-utils.d.ts +31 -0
  33. package/dist/browser-utils.js +97 -0
  34. package/dist/browser.d.ts +51 -1
  35. package/dist/browser.js +1481 -31
  36. package/dist/capture-alt-text.js +2 -1
  37. package/dist/capture-language-preflight.js +14 -0
  38. package/dist/capture-llm-page-identity.js +22 -10
  39. package/dist/capture-page-identity.d.ts +5 -7
  40. package/dist/capture-page-identity.js +211 -78
  41. package/dist/capture-preset-credentials.d.ts +50 -0
  42. package/dist/capture-preset-credentials.js +127 -0
  43. package/dist/capture-request-plan.d.ts +2 -2
  44. package/dist/capture-request-plan.js +64 -16
  45. package/dist/capture-run-optimizer.js +48 -33
  46. package/dist/capture-selector-memory.d.ts +5 -0
  47. package/dist/capture-selector-memory.js +18 -0
  48. package/dist/capture-strategy.d.ts +36 -0
  49. package/dist/capture-strategy.js +95 -0
  50. package/dist/capture-studio-sync.d.ts +1 -0
  51. package/dist/capture-studio-sync.js +9 -3
  52. package/dist/capture-surface-contract.d.ts +36 -0
  53. package/dist/capture-surface-contract.js +299 -0
  54. package/dist/capture-transition-engine.d.ts +28 -0
  55. package/dist/capture-transition-engine.js +292 -0
  56. package/dist/capture-variant-state.d.ts +2 -0
  57. package/dist/capture-variant-state.js +26 -0
  58. package/dist/capture-verification.d.ts +35 -0
  59. package/dist/capture-verification.js +95 -0
  60. package/dist/capture-viewport-lock.d.ts +48 -0
  61. package/dist/capture-viewport-lock.js +74 -0
  62. package/dist/circuit-breaker.d.ts +42 -0
  63. package/dist/circuit-breaker.js +119 -0
  64. package/dist/cli-config.d.ts +8 -1
  65. package/dist/cli-config.js +62 -6
  66. package/dist/cli-contract.d.ts +15 -0
  67. package/dist/cli-contract.js +167 -0
  68. package/dist/cli-runner-local.d.ts +12 -0
  69. package/dist/cli-runner-local.js +102 -0
  70. package/dist/cli-runner.d.ts +34 -0
  71. package/dist/cli-runner.js +433 -0
  72. package/dist/cli-utils.d.ts +0 -1
  73. package/dist/cli-utils.js +2 -5
  74. package/dist/cli.js +1005 -267
  75. package/dist/clip-orchestrator.js +9 -2
  76. package/dist/clip-postprocess.js +25 -16
  77. package/dist/cookie-dismiss.d.ts +2 -0
  78. package/dist/cookie-dismiss.js +48 -13
  79. package/dist/cost-logging.d.ts +8 -0
  80. package/dist/cost-logging.js +160 -46
  81. package/dist/cost-resolution-monitor.d.ts +16 -0
  82. package/dist/cost-resolution-monitor.js +34 -0
  83. package/dist/credential-templates.js +2 -2
  84. package/dist/cursor-overlay-script.d.ts +6 -0
  85. package/dist/cursor-overlay-script.js +169 -0
  86. package/dist/dom-css-purger.d.ts +65 -0
  87. package/dist/dom-css-purger.js +333 -0
  88. package/dist/dom-font-inliner.d.ts +45 -0
  89. package/dist/dom-font-inliner.js +148 -0
  90. package/dist/dom-patch-resolver.d.ts +52 -0
  91. package/dist/dom-patch-resolver.js +242 -0
  92. package/dist/dom-serializer.d.ts +82 -0
  93. package/dist/dom-serializer.js +378 -0
  94. package/dist/element-capture.d.ts +1 -41
  95. package/dist/element-capture.js +202 -446
  96. package/dist/env-validation.d.ts +5 -0
  97. package/dist/env-validation.js +29 -0
  98. package/dist/execution-schema.d.ts +4423 -0
  99. package/dist/execution-schema.js +507 -0
  100. package/dist/execution-types.d.ts +886 -0
  101. package/dist/execution-types.js +65 -0
  102. package/dist/fonts-loader.d.ts +14 -0
  103. package/dist/fonts-loader.js +55 -0
  104. package/dist/hybrid-navigator.js +12 -12
  105. package/dist/index.d.ts +9 -6
  106. package/dist/index.js +10 -4
  107. package/dist/legacy/agent-action-recovery.d.ts +45 -0
  108. package/dist/legacy/agent-action-recovery.js +370 -0
  109. package/dist/legacy/agent-message-utils.d.ts +21 -0
  110. package/dist/legacy/agent-message-utils.js +77 -0
  111. package/dist/legacy/agent-url-utils.d.ts +30 -0
  112. package/dist/legacy/agent-url-utils.js +138 -0
  113. package/dist/legacy/agent.d.ts +226 -0
  114. package/dist/legacy/agent.js +6666 -0
  115. package/dist/legacy/clip-orchestrator.d.ts +148 -0
  116. package/dist/legacy/clip-orchestrator.js +957 -0
  117. package/dist/legacy/credential-templates.d.ts +5 -0
  118. package/dist/legacy/credential-templates.js +60 -0
  119. package/dist/legacy/hybrid-navigator.d.ts +138 -0
  120. package/dist/legacy/hybrid-navigator.js +468 -0
  121. package/dist/legacy/llm-usage.d.ts +17 -0
  122. package/dist/legacy/llm-usage.js +45 -0
  123. package/dist/legacy/prompt-cache.d.ts +10 -0
  124. package/dist/legacy/prompt-cache.js +24 -0
  125. package/dist/legacy/prompts.d.ts +175 -0
  126. package/dist/legacy/prompts.js +1038 -0
  127. package/dist/legacy/tools.d.ts +4 -0
  128. package/dist/legacy/tools.js +216 -0
  129. package/dist/legacy/video-agent.d.ts +143 -0
  130. package/dist/legacy/video-agent.js +4788 -0
  131. package/dist/legacy/video-observation.d.ts +36 -0
  132. package/dist/legacy/video-observation.js +192 -0
  133. package/dist/legacy/video-planner.d.ts +12 -0
  134. package/dist/legacy/video-planner.js +501 -0
  135. package/dist/legacy/video-prompts.d.ts +37 -0
  136. package/dist/legacy/video-prompts.js +569 -0
  137. package/dist/legacy/video-tools.d.ts +3 -0
  138. package/dist/legacy/video-tools.js +59 -0
  139. package/dist/legacy/video-variant-state.d.ts +29 -0
  140. package/dist/legacy/video-variant-state.js +80 -0
  141. package/dist/legacy/vision-model.d.ts +17 -0
  142. package/dist/legacy/vision-model.js +74 -0
  143. package/dist/llm-healer.d.ts +63 -0
  144. package/dist/llm-healer.js +166 -0
  145. package/dist/llm-provider.d.ts +29 -0
  146. package/dist/llm-provider.js +80 -0
  147. package/dist/logger.d.ts +6 -2
  148. package/dist/logger.js +15 -1
  149. package/dist/mockup-html.js +35 -25
  150. package/dist/mockup.d.ts +95 -2
  151. package/dist/mockup.js +427 -166
  152. package/dist/mouse-animation.d.ts +2 -2
  153. package/dist/mouse-animation.js +34 -20
  154. package/dist/opcode-actions.d.ts +42 -0
  155. package/dist/opcode-actions.js +511 -0
  156. package/dist/opcode-runner.d.ts +51 -0
  157. package/dist/opcode-runner.js +770 -0
  158. package/dist/openrouter-client.d.ts +40 -0
  159. package/dist/openrouter-client.js +16 -0
  160. package/dist/overlay-engine.d.ts +24 -0
  161. package/dist/overlay-engine.js +176 -0
  162. package/dist/postcondition.d.ts +16 -0
  163. package/dist/postcondition.js +269 -0
  164. package/dist/program-patcher.d.ts +25 -0
  165. package/dist/program-patcher.js +44 -0
  166. package/dist/prompts.d.ts +13 -5
  167. package/dist/prompts.js +224 -351
  168. package/dist/provider-config.d.ts +12 -0
  169. package/dist/provider-config.js +15 -0
  170. package/dist/recovery-chain.d.ts +37 -0
  171. package/dist/recovery-chain.js +350 -0
  172. package/dist/remote-browser.d.ts +28 -4
  173. package/dist/remote-browser.js +60 -5
  174. package/dist/safari-browser-bar.d.ts +15 -0
  175. package/dist/safari-browser-bar.js +95 -0
  176. package/dist/safari-toolbar-asset.d.ts +15 -0
  177. package/dist/safari-toolbar-asset.js +12 -0
  178. package/dist/security.d.ts +2 -1
  179. package/dist/security.js +49 -10
  180. package/dist/selector-resolver.d.ts +34 -0
  181. package/dist/selector-resolver.js +181 -0
  182. package/dist/semantic-resolver.d.ts +35 -0
  183. package/dist/semantic-resolver.js +161 -0
  184. package/dist/server-capture-runtime.d.ts +5 -3
  185. package/dist/server-capture-runtime.js +42 -95
  186. package/dist/server-credit-usage.d.ts +2 -2
  187. package/dist/server-project-webhooks.d.ts +15 -1
  188. package/dist/server-project-webhooks.js +34 -8
  189. package/dist/server-screenshot-watermark.js +27 -5
  190. package/dist/session-profile.js +164 -1
  191. package/dist/sf-pro-symbols.d.ts +1 -0
  192. package/dist/sf-pro-symbols.js +55 -0
  193. package/dist/skill-packaging.d.ts +28 -0
  194. package/dist/skill-packaging.js +169 -0
  195. package/dist/smart-wait.d.ts +27 -0
  196. package/dist/smart-wait.js +81 -0
  197. package/dist/status-bar-render.d.ts +20 -0
  198. package/dist/status-bar-render.js +410 -0
  199. package/dist/status-bar.d.ts +9 -0
  200. package/dist/status-bar.js +298 -14
  201. package/dist/svg-browser-bar.d.ts +33 -0
  202. package/dist/svg-browser-bar.js +206 -0
  203. package/dist/svg-status-bar.d.ts +36 -0
  204. package/dist/svg-status-bar.js +597 -0
  205. package/dist/svg-text.d.ts +61 -0
  206. package/dist/svg-text.js +118 -0
  207. package/dist/tools.js +89 -451
  208. package/dist/types.d.ts +240 -5
  209. package/dist/types.js +23 -1
  210. package/dist/v2/action-verifier.d.ts +29 -0
  211. package/dist/v2/action-verifier.js +133 -0
  212. package/dist/v2/alt-text.d.ts +26 -0
  213. package/dist/v2/alt-text.js +55 -0
  214. package/dist/v2/benchmark.d.ts +59 -0
  215. package/dist/v2/benchmark.js +135 -0
  216. package/dist/v2/capture-strategy.d.ts +30 -0
  217. package/dist/v2/capture-strategy.js +67 -0
  218. package/dist/v2/capture-verification.d.ts +35 -0
  219. package/dist/v2/capture-verification.js +95 -0
  220. package/dist/v2/circuit-breaker.d.ts +42 -0
  221. package/dist/v2/circuit-breaker.js +119 -0
  222. package/dist/v2/cli-runner-local.d.ts +11 -0
  223. package/dist/v2/cli-runner-local.js +91 -0
  224. package/dist/v2/cli-runner.d.ts +34 -0
  225. package/dist/v2/cli-runner.js +300 -0
  226. package/dist/v2/compiler-prompts.d.ts +27 -0
  227. package/dist/v2/compiler-prompts.js +123 -0
  228. package/dist/v2/compiler.d.ts +37 -0
  229. package/dist/v2/compiler.js +147 -0
  230. package/dist/v2/explorer.d.ts +41 -0
  231. package/dist/v2/explorer.js +56 -0
  232. package/dist/v2/index.d.ts +37 -0
  233. package/dist/v2/index.js +31 -0
  234. package/dist/v2/llm-healer.d.ts +62 -0
  235. package/dist/v2/llm-healer.js +166 -0
  236. package/dist/v2/llm-provider.d.ts +29 -0
  237. package/dist/v2/llm-provider.js +80 -0
  238. package/dist/v2/opcode-runner.d.ts +47 -0
  239. package/dist/v2/opcode-runner.js +634 -0
  240. package/dist/v2/overlay-engine.d.ts +24 -0
  241. package/dist/v2/overlay-engine.js +150 -0
  242. package/dist/v2/postcondition.d.ts +16 -0
  243. package/dist/v2/postcondition.js +249 -0
  244. package/dist/v2/program-patcher.d.ts +25 -0
  245. package/dist/v2/program-patcher.js +44 -0
  246. package/dist/v2/recovery-chain.d.ts +30 -0
  247. package/dist/v2/recovery-chain.js +368 -0
  248. package/dist/v2/schema.d.ts +2580 -0
  249. package/dist/v2/schema.js +295 -0
  250. package/dist/v2/selector-resolver.d.ts +34 -0
  251. package/dist/v2/selector-resolver.js +181 -0
  252. package/dist/v2/semantic-resolver.d.ts +35 -0
  253. package/dist/v2/semantic-resolver.js +161 -0
  254. package/dist/v2/smart-wait.d.ts +27 -0
  255. package/dist/v2/smart-wait.js +81 -0
  256. package/dist/v2/types.d.ts +444 -0
  257. package/dist/v2/types.js +19 -0
  258. package/dist/v2/web-playwright-local.d.ts +69 -0
  259. package/dist/v2/web-playwright-local.js +392 -0
  260. package/dist/version.d.ts +1 -0
  261. package/dist/version.js +5 -0
  262. package/dist/video-agent.js +18 -13
  263. package/dist/video-planner.js +2 -1
  264. package/dist/video-prompts.js +3 -3
  265. package/dist/web-playwright-local.d.ts +126 -0
  266. package/dist/web-playwright-local.js +819 -0
  267. package/dist/ws-auth.js +4 -1
  268. package/dist/ws-broadcast.d.ts +34 -0
  269. package/dist/ws-broadcast.js +85 -0
  270. package/dist/ws-connection-limits.d.ts +12 -0
  271. package/dist/ws-connection-limits.js +44 -0
  272. package/dist/ws-handler-utils.d.ts +32 -0
  273. package/dist/ws-handler-utils.js +139 -0
  274. package/dist/ws-handler.js +294 -164
  275. package/dist/ws-metrics-server.d.ts +9 -0
  276. package/dist/ws-metrics-server.js +31 -0
  277. package/dist/ws-server.js +41 -1
  278. package/package.json +51 -34
@@ -0,0 +1,39 @@
1
+ import type { AKNode, AKNodeRuntimeIndexEntry, AKTree, FocusQuery, InteractiveElement } from './types.js';
2
+ interface FingerprintInput {
3
+ tagName: string;
4
+ role?: string | null;
5
+ textContent?: string | null;
6
+ ariaLabel?: string | null;
7
+ htmlId?: string | null;
8
+ name?: string | null;
9
+ inputType?: string | null;
10
+ href?: string | null;
11
+ placeholder?: string | null;
12
+ bounds: {
13
+ x: number;
14
+ y: number;
15
+ w: number;
16
+ h: number;
17
+ };
18
+ }
19
+ interface FocusResult {
20
+ tree: AKTree;
21
+ matches: AKNode[];
22
+ serialized: string;
23
+ }
24
+ export declare function toAscii(value: string | null | undefined): string;
25
+ export declare function fingerprintAKNode(input: FingerprintInput): string;
26
+ export declare function disambiguateFingerprint(input: FingerprintInput, parentHint: string): string;
27
+ export declare function flattenAKNodes(root: AKNode): AKNode[];
28
+ export declare function findAKNodeById(root: AKNode, nodeId: string): AKNode | null;
29
+ export declare function buildAKNodeRuntimeIndex(tree: AKTree): Map<string, AKNodeRuntimeIndexEntry>;
30
+ export declare function deriveInteractiveElementsFromAKTree(tree: AKTree): InteractiveElement[];
31
+ export declare function collectInteractiveAKNodes(tree: AKTree, includeHidden?: boolean): AKNode[];
32
+ export declare function serializeAKTree(tree: AKTree, options?: {
33
+ root?: AKNode;
34
+ includeInteractiveSection?: boolean;
35
+ includeHidden?: boolean;
36
+ maxDepth?: number;
37
+ }): string;
38
+ export declare function focusAKTree(tree: AKTree, query: FocusQuery): FocusResult;
39
+ export {};
@@ -0,0 +1,368 @@
1
+ import { createHash } from 'crypto';
2
+ function clipText(value, maxLength) {
3
+ return (value ?? '').replace(/\s+/g, ' ').trim().slice(0, maxLength);
4
+ }
5
+ export function toAscii(value) {
6
+ const normalized = (value ?? '')
7
+ .normalize('NFKD')
8
+ .replace(/[\u0300-\u036f]/g, '')
9
+ .replace(/[^\x20-\x7E]/g, '?')
10
+ .replace(/\s+/g, ' ')
11
+ .trim();
12
+ return normalized;
13
+ }
14
+ function quoted(value) {
15
+ return `"${toAscii(value).replace(/"/g, '\'')}"`;
16
+ }
17
+ export function fingerprintAKNode(input) {
18
+ const parts = [
19
+ input.tagName,
20
+ input.role || '',
21
+ clipText(input.textContent, 50),
22
+ clipText(input.ariaLabel, 50),
23
+ input.htmlId || '',
24
+ input.name || '',
25
+ input.inputType || '',
26
+ input.href || '',
27
+ input.placeholder || '',
28
+ String(Math.round(input.bounds.w)),
29
+ String(Math.round(input.bounds.h)),
30
+ ];
31
+ return `ak_${createHash('sha256').update(parts.join('|')).digest('hex').slice(0, 8)}`;
32
+ }
33
+ export function disambiguateFingerprint(input, parentHint) {
34
+ const parts = [
35
+ input.tagName,
36
+ input.role || '',
37
+ clipText(input.textContent, 50),
38
+ clipText(input.ariaLabel, 50),
39
+ input.htmlId || '',
40
+ input.name || '',
41
+ input.inputType || '',
42
+ input.href || '',
43
+ input.placeholder || '',
44
+ String(Math.round(input.bounds.w)),
45
+ String(Math.round(input.bounds.h)),
46
+ ];
47
+ return `ak_${createHash('sha256').update(`${parts.join('|')}§${parentHint}`).digest('hex').slice(0, 8)}`;
48
+ }
49
+ export function flattenAKNodes(root) {
50
+ const result = [];
51
+ const stack = [root];
52
+ while (stack.length > 0) {
53
+ const current = stack.shift();
54
+ if (!current)
55
+ continue;
56
+ result.push(current);
57
+ stack.unshift(...current.children);
58
+ }
59
+ return result;
60
+ }
61
+ export function findAKNodeById(root, nodeId) {
62
+ const stack = [root];
63
+ while (stack.length > 0) {
64
+ const current = stack.pop();
65
+ if (!current)
66
+ continue;
67
+ if (current.id === nodeId)
68
+ return current;
69
+ stack.push(...current.children);
70
+ }
71
+ return null;
72
+ }
73
+ export function buildAKNodeRuntimeIndex(tree) {
74
+ const index = new Map();
75
+ for (const node of flattenAKNodes(tree.root)) {
76
+ index.set(node.id, {
77
+ id: node.id,
78
+ sourceRef: node.sourceRef,
79
+ bounds: node.bounds,
80
+ type: node.type,
81
+ label: node.label,
82
+ interactive: node.interactive,
83
+ visible: node.visible,
84
+ value: node.value,
85
+ attributes: { ...node.attributes },
86
+ state: { ...node.state },
87
+ semantic: {
88
+ pattern: node.semantic.pattern,
89
+ confidence: node.semantic.confidence,
90
+ traits: [...node.semantic.traits],
91
+ },
92
+ scroll: node.scroll ? { ...node.scroll } : undefined,
93
+ });
94
+ }
95
+ return index;
96
+ }
97
+ function computeVisibilityState(node, viewportHeight) {
98
+ if (!node.visible)
99
+ return 'offscreen';
100
+ if (node.bounds.y >= 0 && node.bounds.y + node.bounds.h <= viewportHeight) {
101
+ return 'full';
102
+ }
103
+ if (node.bounds.y > viewportHeight || node.bounds.y + node.bounds.h < 0) {
104
+ return 'offscreen';
105
+ }
106
+ return 'partial';
107
+ }
108
+ function mapNodeToLegacyRole(node) {
109
+ if (node.type === 'input')
110
+ return node.attributes.type ? `input[${node.attributes.type}]` : 'input';
111
+ return node.attributes.role || node.type;
112
+ }
113
+ export function deriveInteractiveElementsFromAKTree(tree) {
114
+ return flattenAKNodes(tree.root)
115
+ .filter((node) => node.interactive)
116
+ .sort((a, b) => {
117
+ if (a.bounds.y !== b.bounds.y)
118
+ return a.bounds.y - b.bounds.y;
119
+ return a.bounds.x - b.bounds.x;
120
+ })
121
+ .map((node, index) => ({
122
+ index,
123
+ tag: node.attributes.tagName?.toLowerCase() || node.type,
124
+ role: mapNodeToLegacyRole(node),
125
+ text: node.label,
126
+ ariaLabel: node.attributes['aria-label'] || null,
127
+ title: node.attributes.title || null,
128
+ ariaControls: node.attributes['aria-controls'] || null,
129
+ ariaExpanded: node.state.expanded !== undefined ? String(node.state.expanded) : (node.attributes['aria-expanded'] || null),
130
+ ariaHasPopup: node.attributes['aria-haspopup'] || null,
131
+ href: node.attributes.href || null,
132
+ inputType: node.attributes.type || null,
133
+ boundingBox: {
134
+ x: node.bounds.x,
135
+ y: node.bounds.y,
136
+ width: node.bounds.w,
137
+ height: node.bounds.h,
138
+ },
139
+ selector: node.sourceRef,
140
+ visible: node.visible,
141
+ visibilityState: computeVisibilityState(node, tree.page.viewport.height),
142
+ }));
143
+ }
144
+ export function collectInteractiveAKNodes(tree, includeHidden = false) {
145
+ return flattenAKNodes(tree.root).filter((node) => node.interactive && (includeHidden || node.visible));
146
+ }
147
+ function describeScroll(scroll) {
148
+ const verticalMax = Math.max(0, scroll.scrollHeight - scroll.clientHeight);
149
+ const horizontalMax = Math.max(0, scroll.scrollWidth - scroll.clientWidth);
150
+ const verticalPct = verticalMax > 0 ? Math.round((scroll.scrollTop / verticalMax) * 100) : 0;
151
+ const horizontalPct = horizontalMax > 0 ? Math.round((scroll.scrollLeft / horizontalMax) * 100) : 0;
152
+ if (scroll.overflowY && scroll.overflowX) {
153
+ return `${scroll.scrollTop}/${scroll.scrollHeight} (${verticalPct}%) vertical | ${scroll.scrollLeft}/${scroll.scrollWidth} (${horizontalPct}%) horizontal`;
154
+ }
155
+ if (scroll.overflowX) {
156
+ return `${scroll.scrollLeft}/${scroll.scrollWidth} (${horizontalPct}%) horizontal`;
157
+ }
158
+ return `${scroll.scrollTop}/${scroll.scrollHeight} (${verticalPct}%) vertical`;
159
+ }
160
+ function buildNodeTags(node) {
161
+ const tags = [];
162
+ if (node.semantic.pattern)
163
+ tags.push(node.semantic.pattern);
164
+ tags.push(...node.semantic.traits);
165
+ return tags.length > 0 ? ` (${tags.join(', ')})` : '';
166
+ }
167
+ function serializeNodeLine(node) {
168
+ let line = `[${node.id}] ${node.type}`;
169
+ if (node.label)
170
+ line += ` ${quoted(node.label)}`;
171
+ if (node.value)
172
+ line += ` value=${quoted(node.value)}`;
173
+ if (node.attributes.placeholder)
174
+ line += ` placeholder=${quoted(node.attributes.placeholder)}`;
175
+ if (node.attributes.href)
176
+ line += ` href=${quoted(node.attributes.href)}`;
177
+ if (node.state.disabled)
178
+ line += ' [disabled]';
179
+ if (node.state.focused)
180
+ line += ' [focused]';
181
+ if (node.scroll)
182
+ line += ' [scrollable]';
183
+ line += `${buildNodeTags(node)} -- ${node.bounds.x},${node.bounds.y} ${node.bounds.w}x${node.bounds.h}`;
184
+ if (node.scroll) {
185
+ line += ` scroll: ${describeScroll(node.scroll)}`;
186
+ }
187
+ return toAscii(line);
188
+ }
189
+ function cloneNode(node) {
190
+ return {
191
+ ...node,
192
+ state: { ...node.state },
193
+ style: { ...node.style },
194
+ semantic: {
195
+ pattern: node.semantic.pattern,
196
+ confidence: node.semantic.confidence,
197
+ traits: [...node.semantic.traits],
198
+ },
199
+ attributes: { ...node.attributes },
200
+ scroll: node.scroll ? { ...node.scroll } : undefined,
201
+ children: node.children.map(cloneNode),
202
+ };
203
+ }
204
+ function pruneNodeToDepth(node, depth, maxDepth) {
205
+ const clone = cloneNode(node);
206
+ if (maxDepth !== undefined && depth >= maxDepth) {
207
+ clone.children = [];
208
+ return clone;
209
+ }
210
+ clone.children = node.children.map((child) => pruneNodeToDepth(child, depth + 1, maxDepth));
211
+ return clone;
212
+ }
213
+ function matchTrait(node, traits) {
214
+ return traits.every((trait) => node.semantic.traits.includes(trait));
215
+ }
216
+ function hasInteractiveDescendant(node) {
217
+ return node.children.some((child) => child.interactive || hasInteractiveDescendant(child));
218
+ }
219
+ function matchesFocus(node, query) {
220
+ if (query.type && query.type.length > 0 && !query.type.includes(node.type)) {
221
+ return false;
222
+ }
223
+ if (query.semantic && query.semantic.length > 0 && (!node.semantic.pattern || !query.semantic.includes(node.semantic.pattern))) {
224
+ return false;
225
+ }
226
+ if (query.trait && query.trait.length > 0 && !matchTrait(node, query.trait)) {
227
+ return false;
228
+ }
229
+ if (typeof query.interactive === 'boolean') {
230
+ const effectiveInteractive = node.interactive
231
+ || (query.interactive === true && !!query.semantic?.length && hasInteractiveDescendant(node));
232
+ if (effectiveInteractive !== query.interactive) {
233
+ return false;
234
+ }
235
+ }
236
+ if (typeof query.visible === 'boolean' && node.visible !== query.visible) {
237
+ return false;
238
+ }
239
+ if (query.labelContains && !node.label.toLowerCase().includes(query.labelContains.toLowerCase())) {
240
+ return false;
241
+ }
242
+ return true;
243
+ }
244
+ function pruneToMatchedPaths(node, query, maxDepth, depth = 0) {
245
+ const childResults = node.children
246
+ .map((child) => pruneToMatchedPaths(child, query, maxDepth, depth + 1))
247
+ .filter((entry) => entry !== null);
248
+ const childNodes = childResults
249
+ .map((entry) => entry.node)
250
+ .filter((entry) => entry != null);
251
+ const selfMatch = matchesFocus(node, query);
252
+ if (!selfMatch && childNodes.length === 0) {
253
+ return { node: null, matches: [] };
254
+ }
255
+ const clone = cloneNode(node);
256
+ if (maxDepth !== undefined && depth >= maxDepth) {
257
+ clone.children = [];
258
+ }
259
+ else {
260
+ clone.children = childNodes;
261
+ }
262
+ return {
263
+ node: clone,
264
+ matches: [
265
+ ...(selfMatch ? [node] : []),
266
+ ...childResults.flatMap((entry) => entry.matches),
267
+ ],
268
+ };
269
+ }
270
+ function extractMatches(node, query, maxDepth, depth = 0) {
271
+ const own = matchesFocus(node, query)
272
+ ? [pruneNodeToDepth(node, 0, maxDepth)]
273
+ : [];
274
+ if (maxDepth !== undefined && depth >= maxDepth) {
275
+ return own;
276
+ }
277
+ return [...own, ...node.children.flatMap((child) => extractMatches(child, query, maxDepth, depth + 1))];
278
+ }
279
+ function findOverlayLabel(overlay, tree) {
280
+ const node = findAKNodeById(tree.root, overlay.nodeId);
281
+ return node?.semantic.pattern || node?.type || 'overlay';
282
+ }
283
+ export function serializeAKTree(tree, options = {}) {
284
+ const root = options.root ?? tree.root;
285
+ const includeHidden = options.includeHidden === true;
286
+ const lines = [];
287
+ const viewport = tree.page.viewport;
288
+ const pageScrollMax = Math.max(0, tree.page.scroll.scrollHeight - tree.page.scroll.clientHeight);
289
+ const pageScrollPct = pageScrollMax > 0 ? Math.round((tree.page.scroll.scrollTop / pageScrollMax) * 100) : 0;
290
+ lines.push('=== PAGE STATE ===');
291
+ lines.push(`url: ${toAscii(tree.page.url)}`);
292
+ lines.push(`title: ${quoted(tree.page.title)}`);
293
+ lines.push(`viewport: ${viewport.width}x${viewport.height} | page scroll: ${tree.page.scroll.scrollTop}/${tree.page.scroll.scrollHeight} (${pageScrollPct}%) vertical`);
294
+ lines.push('');
295
+ lines.push('OVERLAYS:');
296
+ if (tree.overlays.length === 0) {
297
+ lines.push(' (none)');
298
+ }
299
+ else {
300
+ for (const overlay of tree.overlays) {
301
+ lines.push(toAscii(` [${overlay.nodeId}] ${findOverlayLabel(overlay, tree)} z:${overlay.zIndex} covers ${Math.round(overlay.coveragePercent)}%${overlay.blocksInteraction ? ' -- blocks interaction' : ''}`));
302
+ }
303
+ }
304
+ lines.push('');
305
+ lines.push('=== PAGE STRUCTURE ===');
306
+ const walk = (node, depth) => {
307
+ if (!includeHidden && !node.visible)
308
+ return;
309
+ if (options.maxDepth !== undefined && depth > options.maxDepth)
310
+ return;
311
+ lines.push(`${' '.repeat(depth)}${serializeNodeLine(node)}`);
312
+ for (const child of node.children) {
313
+ walk(child, depth + 1);
314
+ }
315
+ };
316
+ walk(root, 0);
317
+ if (options.includeInteractiveSection !== false) {
318
+ const interactiveNodes = collectInteractiveAKNodes({ ...tree, root }, includeHidden);
319
+ lines.push('');
320
+ lines.push('=== INTERACTIVE ELEMENTS ===');
321
+ if (interactiveNodes.length === 0) {
322
+ lines.push('(none)');
323
+ }
324
+ else {
325
+ for (const node of interactiveNodes) {
326
+ lines.push(toAscii(`[${node.id}] ${node.type}${node.label ? ` ${quoted(node.label)}` : ''}`));
327
+ }
328
+ }
329
+ }
330
+ return lines.join('\n');
331
+ }
332
+ export function focusAKTree(tree, query) {
333
+ const scopedRoot = query.within ? (findAKNodeById(tree.root, query.within) ?? tree.root) : tree.root;
334
+ const maxDepth = query.maxDepth;
335
+ if (query.includeAncestors) {
336
+ const pruned = pruneToMatchedPaths(scopedRoot, query, maxDepth);
337
+ const nextRoot = pruned.node ?? {
338
+ ...cloneNode(scopedRoot),
339
+ children: [],
340
+ };
341
+ const nextTree = {
342
+ ...tree,
343
+ root: nextRoot,
344
+ overlays: tree.overlays.filter((overlay) => pruned.matches.some((node) => node.id === overlay.nodeId)),
345
+ };
346
+ return {
347
+ tree: nextTree,
348
+ matches: pruned.matches,
349
+ serialized: serializeAKTree(nextTree),
350
+ };
351
+ }
352
+ const matches = extractMatches(scopedRoot, query, maxDepth);
353
+ const syntheticRoot = {
354
+ ...cloneNode(scopedRoot),
355
+ children: matches.map((node) => cloneNode(node)),
356
+ };
357
+ const nextTree = {
358
+ ...tree,
359
+ root: syntheticRoot,
360
+ overlays: tree.overlays.filter((overlay) => matches.some((node) => node.id === overlay.nodeId)),
361
+ };
362
+ return {
363
+ tree: nextTree,
364
+ matches,
365
+ serialized: serializeAKTree(nextTree),
366
+ };
367
+ }
368
+ //# sourceMappingURL=ak-tree.js.map
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Capture Agent — Alt Text Generation
3
+ *
4
+ * Generates accessible alt text for captured screenshots.
5
+ * Adapts to the variant's locale for localized descriptions.
6
+ * Cost: ~0.001 EUR per generation.
7
+ */
8
+ import { type LLMProviderConfig, type LLMCallResult } from './llm-provider.js';
9
+ export interface AltTextResult {
10
+ altText: string;
11
+ llmResult?: LLMCallResult;
12
+ }
13
+ export interface AltTextContext {
14
+ /** What the screenshot captures */
15
+ description: string;
16
+ /** Target URL */
17
+ url: string;
18
+ /** Locale for the alt text language */
19
+ locale?: string;
20
+ /** Page/preset name */
21
+ presetName?: string;
22
+ }
23
+ /**
24
+ * Generates alt text for a screenshot, localized to the variant's language.
25
+ */
26
+ export declare function generateAltText(screenshot: Buffer, context: AltTextContext, llmConfig: LLMProviderConfig): Promise<AltTextResult>;
@@ -0,0 +1,55 @@
1
+ /**
2
+ * Capture Agent — Alt Text Generation
3
+ *
4
+ * Generates accessible alt text for captured screenshots.
5
+ * Adapts to the variant's locale for localized descriptions.
6
+ * Cost: ~0.001 EUR per generation.
7
+ */
8
+ import { callLLM } from './llm-provider.js';
9
+ const SYSTEM_PROMPT = `You generate concise alt text for web page screenshots.
10
+
11
+ Rules:
12
+ - Maximum 150 characters
13
+ - Describe what is VISIBLE, not what the page is about
14
+ - Use the language specified by the locale
15
+ - Start with the main visual element (e.g., "Dashboard showing 5 projects")
16
+ - Do not wrap in quotes
17
+ - Do not start with "Screenshot of" or "Image of"
18
+
19
+ Respond with ONLY the alt text, nothing else.`;
20
+ /**
21
+ * Generates alt text for a screenshot, localized to the variant's language.
22
+ */
23
+ export async function generateAltText(screenshot, context, llmConfig) {
24
+ const locale = context.locale ?? 'en';
25
+ const userPrompt = [
26
+ `Generate alt text in ${localeToLanguage(locale)} for this screenshot.`,
27
+ `Page: ${context.presetName ?? context.url}`,
28
+ `Content: ${context.description}`,
29
+ ].join('\n');
30
+ try {
31
+ const result = await callLLM({ ...llmConfig, maxTokens: 100, timeoutMs: 10000 }, SYSTEM_PROMPT, userPrompt, screenshot);
32
+ // Clean up: remove quotes, trim
33
+ let altText = result.text.replace(/^["']|["']$/g, '').trim();
34
+ if (altText.length > 150)
35
+ altText = altText.slice(0, 147) + '...';
36
+ return { altText, llmResult: result };
37
+ }
38
+ catch {
39
+ // Fallback: generate from context without LLM
40
+ return {
41
+ altText: `${context.presetName ?? 'Page'} — ${context.description}`.slice(0, 150),
42
+ };
43
+ }
44
+ }
45
+ function localeToLanguage(locale) {
46
+ const map = {
47
+ en: 'English', fr: 'French', de: 'German', es: 'Spanish',
48
+ pt: 'Portuguese', it: 'Italian', nl: 'Dutch', ja: 'Japanese',
49
+ ko: 'Korean', zh: 'Chinese', ar: 'Arabic', ru: 'Russian',
50
+ pl: 'Polish', sv: 'Swedish', da: 'Danish', fi: 'Finnish',
51
+ nb: 'Norwegian', tr: 'Turkish', cs: 'Czech', ro: 'Romanian',
52
+ };
53
+ return map[locale.split('-')[0]] ?? locale;
54
+ }
55
+ //# sourceMappingURL=alt-text.js.map
@@ -0,0 +1,17 @@
1
+ export interface AuthCaptureOptions {
2
+ apiBaseUrl: string;
3
+ apiKey: string;
4
+ projectId: string;
5
+ accountId: string;
6
+ startUrl: string;
7
+ }
8
+ /**
9
+ * Opens a headed Chromium window, lets the user log in, and uploads the
10
+ * resulting storageState (cookies + localStorage, HttpOnly included) to the
11
+ * given project credentials account.
12
+ *
13
+ * The session is captured continuously via polling so we don't depend on the
14
+ * user explicitly clicking a button — closing the window after login is
15
+ * enough. A "Save & close" button is also rendered for explicit confirmation.
16
+ */
17
+ export declare function captureAuthSession(options: AuthCaptureOptions): Promise<void>;
@@ -0,0 +1,164 @@
1
+ import { chromium } from 'playwright';
2
+ import { logger } from './logger.js';
3
+ /**
4
+ * Opens a headed Chromium window, lets the user log in, and uploads the
5
+ * resulting storageState (cookies + localStorage, HttpOnly included) to the
6
+ * given project credentials account.
7
+ *
8
+ * The session is captured continuously via polling so we don't depend on the
9
+ * user explicitly clicking a button — closing the window after login is
10
+ * enough. A "Save & close" button is also rendered for explicit confirmation.
11
+ */
12
+ export async function captureAuthSession(options) {
13
+ const { apiBaseUrl, apiKey, projectId, accountId, startUrl } = options;
14
+ logger.info('[auth] Launching Chromium…');
15
+ const browser = await chromium.launch({ headless: false });
16
+ let context = null;
17
+ let lastGoodState = null;
18
+ let pollHandle = null;
19
+ try {
20
+ context = await browser.newContext();
21
+ const page = await context.newPage();
22
+ // Bridge to Node: clicking the banner button triggers this function.
23
+ let userConfirmed = false;
24
+ await context.exposeFunction('__autokapSaveSession', async () => {
25
+ userConfirmed = true;
26
+ });
27
+ // Visible banner with a "Save & close" button on every page.
28
+ await context.addInitScript(() => {
29
+ const mountBanner = () => {
30
+ if (document.getElementById('__autokap_auth_banner__'))
31
+ return;
32
+ const banner = document.createElement('div');
33
+ banner.id = '__autokap_auth_banner__';
34
+ Object.assign(banner.style, {
35
+ position: 'fixed',
36
+ top: '0',
37
+ left: '0',
38
+ right: '0',
39
+ zIndex: '2147483647',
40
+ background: '#111827',
41
+ color: '#f9fafb',
42
+ font: '500 13px/1.4 system-ui, sans-serif',
43
+ padding: '8px 14px',
44
+ display: 'flex',
45
+ alignItems: 'center',
46
+ justifyContent: 'space-between',
47
+ gap: '12px',
48
+ boxShadow: '0 1px 4px rgba(0,0,0,0.4)',
49
+ });
50
+ const label = document.createElement('span');
51
+ label.textContent =
52
+ '🔐 AutoKap — Log in, then click "Save & close" (or just close this window).';
53
+ banner.appendChild(label);
54
+ const btn = document.createElement('button');
55
+ btn.textContent = 'Save & close';
56
+ Object.assign(btn.style, {
57
+ background: '#10b981',
58
+ color: '#ffffff',
59
+ border: '0',
60
+ borderRadius: '6px',
61
+ padding: '4px 12px',
62
+ font: '600 12px/1 system-ui, sans-serif',
63
+ cursor: 'pointer',
64
+ });
65
+ btn.addEventListener('click', async () => {
66
+ btn.disabled = true;
67
+ btn.textContent = 'Saving…';
68
+ const win = window;
69
+ if (typeof win.__autokapSaveSession === 'function') {
70
+ await win.__autokapSaveSession();
71
+ }
72
+ });
73
+ banner.appendChild(btn);
74
+ document.documentElement.appendChild(banner);
75
+ };
76
+ if (document.readyState === 'loading') {
77
+ document.addEventListener('DOMContentLoaded', mountBanner);
78
+ }
79
+ else {
80
+ mountBanner();
81
+ }
82
+ });
83
+ logger.info(`[auth] Opening ${startUrl} — log in when ready.`);
84
+ try {
85
+ await page.goto(startUrl, { waitUntil: 'domcontentloaded' });
86
+ }
87
+ catch (err) {
88
+ logger.error(`[auth] Failed to navigate: ${err.message}`);
89
+ await browser.close();
90
+ process.exit(1);
91
+ }
92
+ // Poll the storage state every 2s so we always have a fresh snapshot
93
+ // cached in memory, independent of how the user exits.
94
+ pollHandle = setInterval(async () => {
95
+ if (!context || !browser.isConnected())
96
+ return;
97
+ try {
98
+ const snapshot = (await context.storageState());
99
+ const hasAny = (snapshot.cookies?.length ?? 0) > 0 || (snapshot.origins?.length ?? 0) > 0;
100
+ if (hasAny)
101
+ lastGoodState = snapshot;
102
+ }
103
+ catch {
104
+ // Context may be closing; swallow — we still have the last good state.
105
+ }
106
+ }, 2000);
107
+ // Wait for either an explicit confirmation or window close.
108
+ await new Promise((resolve) => {
109
+ context.once('close', () => resolve());
110
+ browser.once('disconnected', () => resolve());
111
+ const tick = setInterval(() => {
112
+ if (userConfirmed) {
113
+ clearInterval(tick);
114
+ resolve();
115
+ }
116
+ }, 200);
117
+ });
118
+ // If the user confirmed while the context is still alive, take one final
119
+ // snapshot (most up-to-date) before tearing down.
120
+ if (userConfirmed && browser.isConnected()) {
121
+ try {
122
+ lastGoodState = (await context.storageState());
123
+ }
124
+ catch {
125
+ // fall back to whatever the poll captured
126
+ }
127
+ }
128
+ if (pollHandle)
129
+ clearInterval(pollHandle);
130
+ if (!lastGoodState) {
131
+ logger.error('[auth] No session data captured — the browser had no cookies or localStorage. Did you finish logging in before closing?');
132
+ process.exit(1);
133
+ }
134
+ const cookieCount = lastGoodState.cookies?.length ?? 0;
135
+ const originCount = lastGoodState.origins?.length ?? 0;
136
+ logger.info(`[auth] Captured ${cookieCount} cookie(s), ${originCount} origin(s). Uploading…`);
137
+ const res = await fetch(`${apiBaseUrl}/api/cli/projects/${projectId}/credentials/${accountId}/session`, {
138
+ method: 'PUT',
139
+ headers: {
140
+ Authorization: `Bearer ${apiKey}`,
141
+ 'Content-Type': 'application/json',
142
+ },
143
+ body: JSON.stringify(lastGoodState),
144
+ });
145
+ if (!res.ok) {
146
+ const body = (await res.json().catch(() => ({ error: res.statusText })));
147
+ logger.error(`[auth] Upload failed: ${body.error || res.statusText}`);
148
+ process.exit(1);
149
+ }
150
+ logger.info('[auth] ✓ Session saved to the project credentials account.');
151
+ }
152
+ finally {
153
+ if (pollHandle)
154
+ clearInterval(pollHandle);
155
+ try {
156
+ if (browser.isConnected())
157
+ await browser.close();
158
+ }
159
+ catch {
160
+ // already closed
161
+ }
162
+ }
163
+ }
164
+ //# sourceMappingURL=auth-capture.js.map