explorbot 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. package/LICENSE +94 -0
  2. package/README.md +267 -0
  3. package/assets/sample-files/sample.docx +0 -0
  4. package/assets/sample-files/sample.mp3 +0 -0
  5. package/assets/sample-files/sample.mp4 +0 -0
  6. package/assets/sample-files/sample.pdf +21 -0
  7. package/assets/sample-files/sample.png +0 -0
  8. package/assets/sample-files/sample.xlsx +0 -0
  9. package/assets/sample-files/sample.zip +0 -0
  10. package/dist/assets/sample-files/sample.docx +0 -0
  11. package/dist/assets/sample-files/sample.mp3 +0 -0
  12. package/dist/assets/sample-files/sample.mp4 +0 -0
  13. package/dist/assets/sample-files/sample.pdf +21 -0
  14. package/dist/assets/sample-files/sample.png +0 -0
  15. package/dist/assets/sample-files/sample.xlsx +0 -0
  16. package/dist/assets/sample-files/sample.zip +0 -0
  17. package/dist/bin/explorbot-cli.js +683 -0
  18. package/dist/bin/explorbot-cli.js.map +1 -0
  19. package/dist/boat/api-tester/bin/apibot-cli.js +5 -0
  20. package/dist/boat/api-tester/bin/apibot-cli.js.map +1 -0
  21. package/dist/boat/api-tester/example/apibot.config.js +31 -0
  22. package/dist/boat/api-tester/example/apibot.config.js.map +1 -0
  23. package/dist/boat/api-tester/src/ai/chief/styles.js +13 -0
  24. package/dist/boat/api-tester/src/ai/chief/styles.js.map +1 -0
  25. package/dist/boat/api-tester/src/ai/chief.js +301 -0
  26. package/dist/boat/api-tester/src/ai/chief.js.map +1 -0
  27. package/dist/boat/api-tester/src/ai/curler-tools.js +263 -0
  28. package/dist/boat/api-tester/src/ai/curler-tools.js.map +1 -0
  29. package/dist/boat/api-tester/src/ai/curler.js +271 -0
  30. package/dist/boat/api-tester/src/ai/curler.js.map +1 -0
  31. package/dist/boat/api-tester/src/api-client.js +26 -0
  32. package/dist/boat/api-tester/src/api-client.js.map +1 -0
  33. package/dist/boat/api-tester/src/apibot.js +166 -0
  34. package/dist/boat/api-tester/src/apibot.js.map +1 -0
  35. package/dist/boat/api-tester/src/cli.js +262 -0
  36. package/dist/boat/api-tester/src/cli.js.map +1 -0
  37. package/dist/boat/api-tester/src/config.js +159 -0
  38. package/dist/boat/api-tester/src/config.js.map +1 -0
  39. package/dist/prompts/audit-rules.md +124 -0
  40. package/dist/rules/chief/general.md +11 -0
  41. package/dist/rules/chief/styles/curious.md +12 -0
  42. package/dist/rules/chief/styles/hacker.md +19 -0
  43. package/dist/rules/chief/styles/normal.md +11 -0
  44. package/dist/rules/chief/styles/psycho.md +17 -0
  45. package/dist/rules/navigator/multiple-locator.md +47 -0
  46. package/dist/rules/navigator/output.md +69 -0
  47. package/dist/rules/navigator/verification-actions.md +122 -0
  48. package/dist/rules/navigator/verification-output.md +53 -0
  49. package/dist/rules/planner/styles/curious.md +39 -0
  50. package/dist/rules/planner/styles/normal.md +21 -0
  51. package/dist/rules/planner/styles/psycho.md +14 -0
  52. package/dist/rules/researcher/list-element.md +11 -0
  53. package/dist/rules/researcher/screenshot-ui-map.md +30 -0
  54. package/dist/rules/researcher/section-ui-map.md +18 -0
  55. package/dist/rules/researcher/ui-map-table.md +18 -0
  56. package/dist/src/action-result.js +574 -0
  57. package/dist/src/action-result.js.map +1 -0
  58. package/dist/src/action.js +388 -0
  59. package/dist/src/action.js.map +1 -0
  60. package/dist/src/activity.js +86 -0
  61. package/dist/src/activity.js.map +1 -0
  62. package/dist/src/ai/agent.js +2 -0
  63. package/dist/src/ai/agent.js.map +1 -0
  64. package/dist/src/ai/bosun.js +443 -0
  65. package/dist/src/ai/bosun.js.map +1 -0
  66. package/dist/src/ai/captain/idle-mode.js +102 -0
  67. package/dist/src/ai/captain/idle-mode.js.map +1 -0
  68. package/dist/src/ai/captain/mixin.js +11 -0
  69. package/dist/src/ai/captain/mixin.js.map +1 -0
  70. package/dist/src/ai/captain/test-mode.js +251 -0
  71. package/dist/src/ai/captain/test-mode.js.map +1 -0
  72. package/dist/src/ai/captain/web-mode.js +124 -0
  73. package/dist/src/ai/captain/web-mode.js.map +1 -0
  74. package/dist/src/ai/captain.js +442 -0
  75. package/dist/src/ai/captain.js.map +1 -0
  76. package/dist/src/ai/conversation.js +176 -0
  77. package/dist/src/ai/conversation.js.map +1 -0
  78. package/dist/src/ai/experience-compactor.js +232 -0
  79. package/dist/src/ai/experience-compactor.js.map +1 -0
  80. package/dist/src/ai/fisherman-tools.js +154 -0
  81. package/dist/src/ai/fisherman-tools.js.map +1 -0
  82. package/dist/src/ai/fisherman.js +184 -0
  83. package/dist/src/ai/fisherman.js.map +1 -0
  84. package/dist/src/ai/historian.js +384 -0
  85. package/dist/src/ai/historian.js.map +1 -0
  86. package/dist/src/ai/navigator.js +493 -0
  87. package/dist/src/ai/navigator.js.map +1 -0
  88. package/dist/src/ai/pilot.js +684 -0
  89. package/dist/src/ai/pilot.js.map +1 -0
  90. package/dist/src/ai/planner/session-dedup.js +28 -0
  91. package/dist/src/ai/planner/session-dedup.js.map +1 -0
  92. package/dist/src/ai/planner/styles.js +15 -0
  93. package/dist/src/ai/planner/styles.js.map +1 -0
  94. package/dist/src/ai/planner/subpages.js +118 -0
  95. package/dist/src/ai/planner/subpages.js.map +1 -0
  96. package/dist/src/ai/planner.js +486 -0
  97. package/dist/src/ai/planner.js.map +1 -0
  98. package/dist/src/ai/provider.js +540 -0
  99. package/dist/src/ai/provider.js.map +1 -0
  100. package/dist/src/ai/quartermaster.js +210 -0
  101. package/dist/src/ai/quartermaster.js.map +1 -0
  102. package/dist/src/ai/researcher/cache.js +95 -0
  103. package/dist/src/ai/researcher/cache.js.map +1 -0
  104. package/dist/src/ai/researcher/coordinates.js +210 -0
  105. package/dist/src/ai/researcher/coordinates.js.map +1 -0
  106. package/dist/src/ai/researcher/deep-analysis.js +364 -0
  107. package/dist/src/ai/researcher/deep-analysis.js.map +1 -0
  108. package/dist/src/ai/researcher/fingerprint-worker.js +46 -0
  109. package/dist/src/ai/researcher/fingerprint-worker.js.map +1 -0
  110. package/dist/src/ai/researcher/focus.js +37 -0
  111. package/dist/src/ai/researcher/focus.js.map +1 -0
  112. package/dist/src/ai/researcher/locators.js +242 -0
  113. package/dist/src/ai/researcher/locators.js.map +1 -0
  114. package/dist/src/ai/researcher/mixin.js +3 -0
  115. package/dist/src/ai/researcher/mixin.js.map +1 -0
  116. package/dist/src/ai/researcher/parser.js +160 -0
  117. package/dist/src/ai/researcher/parser.js.map +1 -0
  118. package/dist/src/ai/researcher/research-result.js +110 -0
  119. package/dist/src/ai/researcher/research-result.js.map +1 -0
  120. package/dist/src/ai/researcher.js +776 -0
  121. package/dist/src/ai/researcher.js.map +1 -0
  122. package/dist/src/ai/rules.js +368 -0
  123. package/dist/src/ai/rules.js.map +1 -0
  124. package/dist/src/ai/task-agent.js +110 -0
  125. package/dist/src/ai/task-agent.js.map +1 -0
  126. package/dist/src/ai/tester.js +840 -0
  127. package/dist/src/ai/tester.js.map +1 -0
  128. package/dist/src/ai/tools.js +980 -0
  129. package/dist/src/ai/tools.js.map +1 -0
  130. package/dist/src/api/api-client.js +91 -0
  131. package/dist/src/api/api-client.js.map +1 -0
  132. package/dist/src/api/request-result.js +177 -0
  133. package/dist/src/api/request-result.js.map +1 -0
  134. package/dist/src/api/request-store.js +109 -0
  135. package/dist/src/api/request-store.js.map +1 -0
  136. package/dist/src/api/spec-reader.js +148 -0
  137. package/dist/src/api/spec-reader.js.map +1 -0
  138. package/dist/src/api/xhr-capture.js +91 -0
  139. package/dist/src/api/xhr-capture.js.map +1 -0
  140. package/dist/src/browser-server.js +67 -0
  141. package/dist/src/browser-server.js.map +1 -0
  142. package/dist/src/command-handler.js +363 -0
  143. package/dist/src/command-handler.js.map +1 -0
  144. package/dist/src/commands/add-rule-command.js +52 -0
  145. package/dist/src/commands/add-rule-command.js.map +1 -0
  146. package/dist/src/commands/base-command.js +14 -0
  147. package/dist/src/commands/base-command.js.map +1 -0
  148. package/dist/src/commands/clean-command.js +67 -0
  149. package/dist/src/commands/clean-command.js.map +1 -0
  150. package/dist/src/commands/context-aria-command.js +18 -0
  151. package/dist/src/commands/context-aria-command.js.map +1 -0
  152. package/dist/src/commands/context-command.js +57 -0
  153. package/dist/src/commands/context-command.js.map +1 -0
  154. package/dist/src/commands/context-data-command.js +25 -0
  155. package/dist/src/commands/context-data-command.js.map +1 -0
  156. package/dist/src/commands/context-experience-command.js +41 -0
  157. package/dist/src/commands/context-experience-command.js.map +1 -0
  158. package/dist/src/commands/context-html-command.js +26 -0
  159. package/dist/src/commands/context-html-command.js.map +1 -0
  160. package/dist/src/commands/context-knowledge-command.js +36 -0
  161. package/dist/src/commands/context-knowledge-command.js.map +1 -0
  162. package/dist/src/commands/debug-command.js +12 -0
  163. package/dist/src/commands/debug-command.js.map +1 -0
  164. package/dist/src/commands/drill-command.js +29 -0
  165. package/dist/src/commands/drill-command.js.map +1 -0
  166. package/dist/src/commands/exit-command.js +26 -0
  167. package/dist/src/commands/exit-command.js.map +1 -0
  168. package/dist/src/commands/explore-command.js +124 -0
  169. package/dist/src/commands/explore-command.js.map +1 -0
  170. package/dist/src/commands/freesail-command.js +84 -0
  171. package/dist/src/commands/freesail-command.js.map +1 -0
  172. package/dist/src/commands/help-command.js +7 -0
  173. package/dist/src/commands/help-command.js.map +1 -0
  174. package/dist/src/commands/index.js +63 -0
  175. package/dist/src/commands/index.js.map +1 -0
  176. package/dist/src/commands/knows-command.js +54 -0
  177. package/dist/src/commands/knows-command.js.map +1 -0
  178. package/dist/src/commands/learn-command.js +35 -0
  179. package/dist/src/commands/learn-command.js.map +1 -0
  180. package/dist/src/commands/navigate-command.js +16 -0
  181. package/dist/src/commands/navigate-command.js.map +1 -0
  182. package/dist/src/commands/path-command.js +70 -0
  183. package/dist/src/commands/path-command.js.map +1 -0
  184. package/dist/src/commands/plan-clear-command.js +13 -0
  185. package/dist/src/commands/plan-clear-command.js.map +1 -0
  186. package/dist/src/commands/plan-command.js +36 -0
  187. package/dist/src/commands/plan-command.js.map +1 -0
  188. package/dist/src/commands/plan-edit-command.js +8 -0
  189. package/dist/src/commands/plan-edit-command.js.map +1 -0
  190. package/dist/src/commands/plan-load-command.js +16 -0
  191. package/dist/src/commands/plan-load-command.js.map +1 -0
  192. package/dist/src/commands/plan-reload-command.js +23 -0
  193. package/dist/src/commands/plan-reload-command.js.map +1 -0
  194. package/dist/src/commands/plan-save-command.js +22 -0
  195. package/dist/src/commands/plan-save-command.js.map +1 -0
  196. package/dist/src/commands/research-command.js +38 -0
  197. package/dist/src/commands/research-command.js.map +1 -0
  198. package/dist/src/commands/start-command.js +12 -0
  199. package/dist/src/commands/start-command.js.map +1 -0
  200. package/dist/src/commands/status-command.js +19 -0
  201. package/dist/src/commands/status-command.js.map +1 -0
  202. package/dist/src/commands/test-command.js +85 -0
  203. package/dist/src/commands/test-command.js.map +1 -0
  204. package/dist/src/components/ActivityPane.js +55 -0
  205. package/dist/src/components/ActivityPane.js.map +1 -0
  206. package/dist/src/components/AddKnowledge.js +122 -0
  207. package/dist/src/components/AddKnowledge.js.map +1 -0
  208. package/dist/src/components/AddRule.js +117 -0
  209. package/dist/src/components/AddRule.js.map +1 -0
  210. package/dist/src/components/App.js +313 -0
  211. package/dist/src/components/App.js.map +1 -0
  212. package/dist/src/components/Autocomplete.js +43 -0
  213. package/dist/src/components/Autocomplete.js.map +1 -0
  214. package/dist/src/components/InputPane.js +207 -0
  215. package/dist/src/components/InputPane.js.map +1 -0
  216. package/dist/src/components/InputReadline.js +598 -0
  217. package/dist/src/components/InputReadline.js.map +1 -0
  218. package/dist/src/components/LogPane.js +123 -0
  219. package/dist/src/components/LogPane.js.map +1 -0
  220. package/dist/src/components/PlanEditor.js +126 -0
  221. package/dist/src/components/PlanEditor.js.map +1 -0
  222. package/dist/src/components/PlanPane.js +51 -0
  223. package/dist/src/components/PlanPane.js.map +1 -0
  224. package/dist/src/components/SessionTimer.js +26 -0
  225. package/dist/src/components/SessionTimer.js.map +1 -0
  226. package/dist/src/components/StateTransitionPane.js +107 -0
  227. package/dist/src/components/StateTransitionPane.js.map +1 -0
  228. package/dist/src/components/StatusPane.js +37 -0
  229. package/dist/src/components/StatusPane.js.map +1 -0
  230. package/dist/src/components/TaskPane.js +96 -0
  231. package/dist/src/components/TaskPane.js.map +1 -0
  232. package/dist/src/components/Welcome.js +52 -0
  233. package/dist/src/components/Welcome.js.map +1 -0
  234. package/dist/src/components/WelcomeChecklist.js +96 -0
  235. package/dist/src/components/WelcomeChecklist.js.map +1 -0
  236. package/dist/src/components/WelcomeCommands.js +61 -0
  237. package/dist/src/components/WelcomeCommands.js.map +1 -0
  238. package/dist/src/components/autocomplete-store.js +22 -0
  239. package/dist/src/components/autocomplete-store.js.map +1 -0
  240. package/dist/src/components/parse-keypress.js +174 -0
  241. package/dist/src/components/parse-keypress.js.map +1 -0
  242. package/dist/src/config.js +249 -0
  243. package/dist/src/config.js.map +1 -0
  244. package/dist/src/execution-controller.js +92 -0
  245. package/dist/src/execution-controller.js.map +1 -0
  246. package/dist/src/experience-tracker.js +294 -0
  247. package/dist/src/experience-tracker.js.map +1 -0
  248. package/dist/src/explorbot.js +348 -0
  249. package/dist/src/explorbot.js.map +1 -0
  250. package/dist/src/explorer.js +611 -0
  251. package/dist/src/explorer.js.map +1 -0
  252. package/dist/src/index.js +56 -0
  253. package/dist/src/index.js.map +1 -0
  254. package/dist/src/knowledge-tracker.js +184 -0
  255. package/dist/src/knowledge-tracker.js.map +1 -0
  256. package/dist/src/observability.js +126 -0
  257. package/dist/src/observability.js.map +1 -0
  258. package/dist/src/reporter.js +185 -0
  259. package/dist/src/reporter.js.map +1 -0
  260. package/dist/src/state-manager.js +427 -0
  261. package/dist/src/state-manager.js.map +1 -0
  262. package/dist/src/stats.js +44 -0
  263. package/dist/src/stats.js.map +1 -0
  264. package/dist/src/test-plan.js +343 -0
  265. package/dist/src/test-plan.js.map +1 -0
  266. package/dist/src/utils/aria.js +588 -0
  267. package/dist/src/utils/aria.js.map +1 -0
  268. package/dist/src/utils/code-extractor.js +21 -0
  269. package/dist/src/utils/code-extractor.js.map +1 -0
  270. package/dist/src/utils/context-formatter.js +205 -0
  271. package/dist/src/utils/context-formatter.js.map +1 -0
  272. package/dist/src/utils/error-page.js +19 -0
  273. package/dist/src/utils/error-page.js.map +1 -0
  274. package/dist/src/utils/expandable.js +35 -0
  275. package/dist/src/utils/expandable.js.map +1 -0
  276. package/dist/src/utils/hooks-runner.js +77 -0
  277. package/dist/src/utils/hooks-runner.js.map +1 -0
  278. package/dist/src/utils/html-diff.js +734 -0
  279. package/dist/src/utils/html-diff.js.map +1 -0
  280. package/dist/src/utils/html.js +1163 -0
  281. package/dist/src/utils/html.js.map +1 -0
  282. package/dist/src/utils/logger.js +465 -0
  283. package/dist/src/utils/logger.js.map +1 -0
  284. package/dist/src/utils/loop.js +126 -0
  285. package/dist/src/utils/loop.js.map +1 -0
  286. package/dist/src/utils/markdown-parser.js +117 -0
  287. package/dist/src/utils/markdown-parser.js.map +1 -0
  288. package/dist/src/utils/markdown-query.js +393 -0
  289. package/dist/src/utils/markdown-query.js.map +1 -0
  290. package/dist/src/utils/markdown-terminal.js +40 -0
  291. package/dist/src/utils/markdown-terminal.js.map +1 -0
  292. package/dist/src/utils/research-parser.js +2 -0
  293. package/dist/src/utils/research-parser.js.map +1 -0
  294. package/dist/src/utils/retry.js +55 -0
  295. package/dist/src/utils/retry.js.map +1 -0
  296. package/dist/src/utils/rules-loader.js +104 -0
  297. package/dist/src/utils/rules-loader.js.map +1 -0
  298. package/dist/src/utils/strings.js +14 -0
  299. package/dist/src/utils/strings.js.map +1 -0
  300. package/dist/src/utils/test-plan-markdown.js +301 -0
  301. package/dist/src/utils/test-plan-markdown.js.map +1 -0
  302. package/dist/src/utils/throttle.js +16 -0
  303. package/dist/src/utils/throttle.js.map +1 -0
  304. package/dist/src/utils/unique-names.js +13 -0
  305. package/dist/src/utils/unique-names.js.map +1 -0
  306. package/dist/src/utils/url-matcher.js +48 -0
  307. package/dist/src/utils/url-matcher.js.map +1 -0
  308. package/dist/src/utils/web-element.js +131 -0
  309. package/dist/src/utils/web-element.js.map +1 -0
  310. package/dist/src/utils/xpath.js +110 -0
  311. package/dist/src/utils/xpath.js.map +1 -0
  312. package/package.json +119 -0
  313. package/prompts/audit-rules.md +124 -0
  314. package/rules/chief/general.md +11 -0
  315. package/rules/chief/styles/curious.md +12 -0
  316. package/rules/chief/styles/hacker.md +19 -0
  317. package/rules/chief/styles/normal.md +11 -0
  318. package/rules/chief/styles/psycho.md +17 -0
  319. package/rules/navigator/multiple-locator.md +47 -0
  320. package/rules/navigator/output.md +69 -0
  321. package/rules/navigator/verification-actions.md +122 -0
  322. package/rules/navigator/verification-output.md +53 -0
  323. package/rules/planner/styles/curious.md +39 -0
  324. package/rules/planner/styles/normal.md +21 -0
  325. package/rules/planner/styles/psycho.md +14 -0
  326. package/rules/researcher/list-element.md +11 -0
  327. package/rules/researcher/screenshot-ui-map.md +30 -0
  328. package/rules/researcher/section-ui-map.md +18 -0
  329. package/rules/researcher/ui-map-table.md +18 -0
@@ -0,0 +1,1163 @@
1
+ import { minify } from 'html-minifier-next';
2
+ import { parse, parseFragment, serialize } from 'parse5';
3
+ /**
4
+ * HTML parsing library that preserves original structure while filtering content
5
+ * Based on CodeceptJS approach but with recursive parsing to maintain structure
6
+ */
7
+ /**
8
+ * Simple CSS selector matcher
9
+ * Supports basic selectors: tag, .class, #id, [attr], [attr=value]
10
+ */
11
+ function matchesSelector(element, selector) {
12
+ // Check if it's actually an element with tagName
13
+ if (!element || !element.tagName) {
14
+ return false;
15
+ }
16
+ // Tag selector
17
+ if (!selector.includes('[', '.') && !selector.includes('#') && !selector.includes(':')) {
18
+ return element.tagName.toLowerCase() === selector.toLowerCase();
19
+ }
20
+ // Class selector
21
+ if (selector.startsWith('.')) {
22
+ const className = selector.slice(1);
23
+ const classAttr = element.attrs.find((attr) => attr.name === 'class');
24
+ return classAttr ? classAttr.value.split(' ').includes(className) : false;
25
+ }
26
+ // ID selector
27
+ if (selector.startsWith('#')) {
28
+ const id = selector.slice(1);
29
+ const idAttr = element.attrs.find((attr) => attr.name === 'id');
30
+ return idAttr ? idAttr.value === id : false;
31
+ }
32
+ // Attribute selector
33
+ if (selector.startsWith('[') && selector.endsWith(']')) {
34
+ const attrContent = selector.slice(1, -1);
35
+ const eqIndex = attrContent.indexOf('=');
36
+ if (eqIndex === -1) {
37
+ // Just attribute existence
38
+ return element.attrs.some((attr) => attr.name === attrContent);
39
+ }
40
+ // Attribute with value
41
+ const attrName = attrContent.slice(0, eqIndex);
42
+ const attrValue = attrContent.slice(eqIndex + 1);
43
+ // Remove quotes if present
44
+ const unquotedValue = attrValue.replace(/^["']|["']$/g, '');
45
+ const attr = element.attrs.find((a) => a.name === attrName);
46
+ return attr ? attr.value === unquotedValue : false;
47
+ }
48
+ return false;
49
+ }
50
+ /**
51
+ * Check if element matches any of the provided selectors
52
+ */
53
+ function matchesAnySelector(element, selectors) {
54
+ if (!selectors || selectors.length === 0)
55
+ return false;
56
+ for (const selector of selectors) {
57
+ if (matchesSelector(element, selector)) {
58
+ return true;
59
+ }
60
+ }
61
+ return false;
62
+ }
63
+ const TEXT_ELEMENT_TAGS = new Set(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'li', 'td', 'th', 'label', 'div', 'span']);
64
+ const INTERACTIVE_TAGS = new Set(['a', 'button', 'details', 'input', 'option', 'select', 'summary', 'textarea', 'iframe']);
65
+ const INTERACTIVE_ROLES = new Set(['button', 'checkbox', 'combobox', 'link', 'listbox', 'radio', 'search', 'switch', 'tab', 'textbox']);
66
+ const INTERACTIVE_EVENT_ATTRIBUTES = new Set(['onclick', 'onchange', 'onblur', 'onfocus', 'onmousedown', 'onmouseup']);
67
+ const HIDDEN_CLASSES = new Set(['hidden', 'invisible', 'd-none', 'hide', 'dn', 'u-hidden', 'is-hidden', 'visually-hidden', 'sr-only', 'screen-reader-only', 'visuallyhidden', 'opacity-0']);
68
+ export const TRASH_HTML_CLASSES = /^(text-|color-|flex-|float-|v-|ember-|d-|border-)/;
69
+ export const TAILWIND_CLASS_PATTERNS = [
70
+ /^m[trblxy]?-/i,
71
+ /^p[trblxy]?-/i,
72
+ /^(min|max)-(w|h)-/i,
73
+ /^(h|w)-/i,
74
+ /^bg-/i,
75
+ /^text-/i,
76
+ /^font-/i,
77
+ /^leading-/i,
78
+ /^tracking-/i,
79
+ /^uppercase$/i,
80
+ /^lowercase$/i,
81
+ /^capitalize$/i,
82
+ /^italic$/i,
83
+ /^antialiased$/i,
84
+ /^subpixel-antialiased$/i,
85
+ /^whitespace-/i,
86
+ /^break-/i,
87
+ /^flex$/i,
88
+ /^inline-flex$/i,
89
+ /^grid$/i,
90
+ /^inline-grid$/i,
91
+ /^items-/i,
92
+ /^content-/i,
93
+ /^justify-/i,
94
+ /^place-/i,
95
+ /^self-/i,
96
+ /^gap-/i,
97
+ /^space-[xy]-/i,
98
+ /^order-/i,
99
+ /^z-/i,
100
+ /^shadow/i,
101
+ /^rounded/i,
102
+ /^border/i,
103
+ /^outline-/i,
104
+ /^ring-/i,
105
+ /^opacity-/i,
106
+ /^fill-/i,
107
+ /^stroke-/i,
108
+ /^blur-/i,
109
+ /^brightness-/i,
110
+ /^contrast-/i,
111
+ /^drop-shadow-/i,
112
+ /^grayscale$/i,
113
+ /^hue-rotate-/i,
114
+ /^invert$/i,
115
+ /^saturate-/i,
116
+ /^sepia$/i,
117
+ /^backdrop-/i,
118
+ /^overflow-/i,
119
+ /^truncate$/i,
120
+ /^transform$/i,
121
+ /^transition$/i,
122
+ /^duration-/i,
123
+ /^delay-/i,
124
+ /^ease-/i,
125
+ /^animate-/i,
126
+ /^cursor-/i,
127
+ /^select-/i,
128
+ /^pointer-events-/i,
129
+ /^align-/i,
130
+ /^table-/i,
131
+ /^list-/i,
132
+ /^grid-cols-/i,
133
+ /^grid-rows-/i,
134
+ /^col-span-/i,
135
+ /^row-span-/i,
136
+ /^translate-[xyz]-/i,
137
+ /^scale-[xyz]?-/i,
138
+ /^rotate-/i,
139
+ /^skew-[xy]-/i,
140
+ /^origin-/i,
141
+ /^inset-/i,
142
+ /^top-/i,
143
+ /^bottom-/i,
144
+ /^left-/i,
145
+ /^right-/i,
146
+ /^aspect-/i,
147
+ /^prose$/i,
148
+ ];
149
+ const NON_SEMANTIC_TAGS = new Set([
150
+ 'style',
151
+ 'script',
152
+ 'link',
153
+ 'meta',
154
+ 'base',
155
+ 'template',
156
+ 'slot',
157
+ 'noscript',
158
+ 'frame',
159
+ 'frameset',
160
+ 'object',
161
+ 'embed',
162
+ 'path',
163
+ 'polygon',
164
+ 'polyline',
165
+ 'circle',
166
+ 'ellipse',
167
+ 'line',
168
+ 'rect',
169
+ 'defs',
170
+ 'g',
171
+ 'symbol',
172
+ 'use',
173
+ 'mask',
174
+ 'pattern',
175
+ 'clippath',
176
+ 'animate',
177
+ 'animatetransform',
178
+ 'animatecolor',
179
+ ]);
180
+ function hasChildNodes(node) {
181
+ return !!node && typeof node === 'object' && 'childNodes' in node && Array.isArray(node.childNodes);
182
+ }
183
+ function stripElementsByTag(node, tagsToRemove) {
184
+ if (!node.childNodes)
185
+ return;
186
+ for (let i = node.childNodes.length - 1; i >= 0; i--) {
187
+ const child = node.childNodes[i];
188
+ if (child.nodeName === '#comment') {
189
+ node.childNodes.splice(i, 1);
190
+ continue;
191
+ }
192
+ if ('tagName' in child && child.tagName) {
193
+ const tagName = child.tagName.toLowerCase();
194
+ if (tagsToRemove.has(tagName)) {
195
+ node.childNodes.splice(i, 1);
196
+ continue;
197
+ }
198
+ stripElementsByTag(child, tagsToRemove);
199
+ }
200
+ else if (hasChildNodes(child)) {
201
+ stripElementsByTag(child, tagsToRemove);
202
+ }
203
+ }
204
+ }
205
+ function pruneDocumentHead(document) {
206
+ if (!document.childNodes)
207
+ return;
208
+ const htmlElement = document.childNodes.find((node) => 'tagName' in node && node.tagName?.toLowerCase() === 'html');
209
+ if (!htmlElement || !htmlElement.childNodes) {
210
+ return;
211
+ }
212
+ const headElement = htmlElement.childNodes.find((node) => 'tagName' in node && node.tagName?.toLowerCase() === 'head');
213
+ if (!headElement || !headElement.childNodes) {
214
+ return;
215
+ }
216
+ for (let i = headElement.childNodes.length - 1; i >= 0; i--) {
217
+ const child = headElement.childNodes[i];
218
+ if ('tagName' in child && child.tagName) {
219
+ const tagName = child.tagName.toLowerCase();
220
+ if (tagName !== 'title') {
221
+ headElement.childNodes.splice(i, 1);
222
+ }
223
+ continue;
224
+ }
225
+ if (child.nodeName === '#text') {
226
+ const textNode = child;
227
+ if (!textNode.value.trim()) {
228
+ headElement.childNodes.splice(i, 1);
229
+ }
230
+ continue;
231
+ }
232
+ if (child.nodeName === '#comment') {
233
+ headElement.childNodes.splice(i, 1);
234
+ continue;
235
+ }
236
+ headElement.childNodes.splice(i, 1);
237
+ }
238
+ }
239
+ function sanitizeDocumentTree(document) {
240
+ stripElementsByTag(document, NON_SEMANTIC_TAGS);
241
+ pruneDocumentHead(document);
242
+ }
243
+ function getDocumentTitle(document) {
244
+ if (!document.childNodes)
245
+ return null;
246
+ const htmlElement = document.childNodes.find((node) => 'tagName' in node && node.tagName?.toLowerCase() === 'html');
247
+ if (!htmlElement || !htmlElement.childNodes) {
248
+ return null;
249
+ }
250
+ const headElement = htmlElement.childNodes.find((node) => 'tagName' in node && node.tagName?.toLowerCase() === 'head');
251
+ if (!headElement || !headElement.childNodes) {
252
+ return null;
253
+ }
254
+ const titleElement = headElement.childNodes.find((node) => 'tagName' in node && node.tagName?.toLowerCase() === 'title');
255
+ if (!titleElement) {
256
+ return null;
257
+ }
258
+ const text = getTextContent(titleElement).trim();
259
+ return text.length > 0 ? text : null;
260
+ }
261
+ function ensureDocumentTitle(document, titleText) {
262
+ if (!titleText || !document.childNodes) {
263
+ return;
264
+ }
265
+ const htmlElement = document.childNodes.find((node) => 'tagName' in node && node.tagName?.toLowerCase() === 'html');
266
+ if (!htmlElement) {
267
+ return;
268
+ }
269
+ const namespace = htmlElement.namespaceURI || 'http://www.w3.org/1999/xhtml';
270
+ let headElement = htmlElement.childNodes.find((node) => 'tagName' in node && node.tagName?.toLowerCase() === 'head');
271
+ if (!headElement) {
272
+ headElement = {
273
+ nodeName: 'head',
274
+ tagName: 'head',
275
+ attrs: [],
276
+ namespaceURI: namespace,
277
+ childNodes: [],
278
+ parentNode: htmlElement,
279
+ };
280
+ // Insert head before body if possible, otherwise prepend
281
+ const bodyIndex = htmlElement.childNodes.findIndex((node) => 'tagName' in node && node.tagName?.toLowerCase() === 'body');
282
+ if (bodyIndex === -1) {
283
+ htmlElement.childNodes.push(headElement);
284
+ }
285
+ else {
286
+ htmlElement.childNodes.splice(bodyIndex, 0, headElement);
287
+ }
288
+ }
289
+ else {
290
+ headElement.childNodes = [];
291
+ }
292
+ const titleElement = {
293
+ nodeName: 'title',
294
+ tagName: 'title',
295
+ attrs: [],
296
+ namespaceURI: namespace,
297
+ childNodes: [],
298
+ parentNode: headElement,
299
+ };
300
+ const textNode = {
301
+ nodeName: '#text',
302
+ value: titleText,
303
+ };
304
+ textNode.parentNode = titleElement;
305
+ titleElement.childNodes.push(textNode);
306
+ headElement.childNodes.push(titleElement);
307
+ }
308
+ function createSanitizedDocument(html, _htmlConfig) {
309
+ const document = parse(html);
310
+ const documentTitle = getDocumentTitle(document);
311
+ sanitizeDocumentTree(document);
312
+ ensureDocumentTitle(document, documentTitle);
313
+ return document;
314
+ }
315
+ export function sanitizeHtmlDocument(html, htmlConfig) {
316
+ return createSanitizedDocument(html, htmlConfig);
317
+ }
318
+ export function sanitizeHtmlString(html, htmlConfig) {
319
+ const document = createSanitizedDocument(html, htmlConfig);
320
+ return serialize(document);
321
+ }
322
+ /**
323
+ * Creates a minimal snapshot keeping only interactive elements and their structure
324
+ * Based on CodeceptJS HTML library
325
+ */
326
+ export function htmlMinimalUISnapshot(html, htmlConfig) {
327
+ const document = parse(html);
328
+ const trashHtmlClasses = TRASH_HTML_CLASSES;
329
+ const removeElements = ['path', 'script'];
330
+ function isFilteredOut(node) {
331
+ // Check exclude selectors first
332
+ if (htmlConfig?.exclude && matchesAnySelector(node, htmlConfig.exclude)) {
333
+ return true;
334
+ }
335
+ if (removeElements.includes(node.nodeName))
336
+ return true;
337
+ if (node.attrs) {
338
+ if (node.attrs.find((attr) => attr.name === 'role' && attr.value === 'tooltip'))
339
+ return true;
340
+ }
341
+ return false;
342
+ }
343
+ // Define default interactive elements
344
+ const interactiveElements = ['a', 'input', 'button', 'select', 'textarea', 'option', 'iframe'];
345
+ const textElements = ['label', 'h1', 'h2'];
346
+ const allowedRoles = ['button', 'checkbox', 'search', 'textbox', 'tab'];
347
+ const allowedAttrs = ['id', 'for', 'class', 'name', 'type', 'value', 'tabindex', 'aria-labelledby', 'aria-label', 'label', 'placeholder', 'title', 'alt', 'src', 'width', 'height', 'role'];
348
+ function isInteractive(element) {
349
+ // Check if element matches include selectors
350
+ if (htmlConfig?.include && matchesAnySelector(element, htmlConfig.include)) {
351
+ return true;
352
+ }
353
+ // Check if element matches exclude selectors
354
+ if (htmlConfig?.exclude && matchesAnySelector(element, htmlConfig.exclude)) {
355
+ return false;
356
+ }
357
+ // Check for data-explorbot attributes (new addition)
358
+ if (hasExplorbotAttributes(element)) {
359
+ return true;
360
+ }
361
+ // Default logic
362
+ if (element.nodeName === 'input' && element.attrs.find((attr) => attr.name === 'type' && attr.value === 'hidden'))
363
+ return false;
364
+ if (interactiveElements.includes(element.nodeName))
365
+ return true;
366
+ if (element.attrs) {
367
+ if (element.attrs.find((attr) => attr.name === 'contenteditable'))
368
+ return true;
369
+ if (element.attrs.find((attr) => attr.name === 'tabindex'))
370
+ return true;
371
+ const role = element.attrs.find((attr) => attr.name === 'role');
372
+ if (role && allowedRoles.includes(role.value))
373
+ return true;
374
+ }
375
+ return false;
376
+ }
377
+ function hasMeaningfulText(node) {
378
+ if (textElements.includes(node.nodeName))
379
+ return true;
380
+ return false;
381
+ }
382
+ function hasInteractiveDescendant(node) {
383
+ if (!node.childNodes)
384
+ return false;
385
+ let result = false;
386
+ for (const childNode of node.childNodes) {
387
+ if (isInteractive(childNode) || hasMeaningfulText(childNode))
388
+ return true;
389
+ result = result || hasInteractiveDescendant(childNode);
390
+ }
391
+ return result;
392
+ }
393
+ function removeNonInteractive(node) {
394
+ if (node.nodeName !== '#document') {
395
+ const parent = node.parentNode;
396
+ const index = parent.childNodes.indexOf(node);
397
+ if (isFilteredOut(node)) {
398
+ parent.childNodes.splice(index, 1);
399
+ return true;
400
+ }
401
+ // keep texts for interactive elements
402
+ if ((isInteractive(parent) || hasMeaningfulText(parent)) && node.nodeName === '#text') {
403
+ node.value = node.value.trim().slice(0, 200);
404
+ if (!node.value)
405
+ return false;
406
+ return true;
407
+ }
408
+ if (
409
+ // if parent is interactive, we may need child element to match
410
+ !isInteractive(parent) &&
411
+ !isInteractive(node) &&
412
+ !hasInteractiveDescendant(node) &&
413
+ !hasMeaningfulText(node)) {
414
+ parent.childNodes.splice(index, 1);
415
+ return true;
416
+ }
417
+ }
418
+ if (node.nodeName === 'svg') {
419
+ cleanElement(node);
420
+ return false;
421
+ }
422
+ if (node.attrs) {
423
+ // Filter and keep allowed attributes, accessibility attributes
424
+ node.attrs = node.attrs.filter((attr) => {
425
+ const { name, value } = attr;
426
+ if (name === 'class') {
427
+ // Remove classes containing digits
428
+ attr.value = value
429
+ .split(' ')
430
+ // remove classes containing digits/
431
+ .filter((className) => !/\d/.test(className))
432
+ // remove popular trash classes
433
+ .filter((className) => !className.match(trashHtmlClasses))
434
+ // remove classes with : and __ in them
435
+ .filter((className) => !className.match(/(:|__)/))
436
+ // remove tailwind utility classes
437
+ .filter((className) => !TAILWIND_CLASS_PATTERNS.some((pattern) => pattern.test(className)))
438
+ .join(' ');
439
+ }
440
+ return allowedAttrs.includes(name) || name.startsWith('data-explorbot-');
441
+ });
442
+ }
443
+ // Convert data-explorbot-* attributes to regular attributes
444
+ if (node.attrs) {
445
+ convertExplorbotAttributes(node);
446
+ }
447
+ if (node.childNodes) {
448
+ for (let i = node.childNodes.length - 1; i >= 0; i--) {
449
+ const childNode = node.childNodes[i];
450
+ removeNonInteractive(childNode);
451
+ }
452
+ }
453
+ return false;
454
+ }
455
+ // Remove non-interactive elements starting from the root element
456
+ removeNonInteractive(document);
457
+ // Serialize the modified document tree back to HTML
458
+ const serializedHTML = serialize(document);
459
+ return serializedHTML;
460
+ }
461
+ export async function minifyHtml(html) {
462
+ return await minify(html, {
463
+ collapseWhitespace: true,
464
+ removeComments: true,
465
+ removeEmptyElements: false,
466
+ removeOptionalTags: false,
467
+ });
468
+ }
469
+ /**
470
+ * Creates a combined snapshot with interactive elements and meaningful text
471
+ * Preserves original HTML structure
472
+ */
473
+ export function htmlCombinedSnapshot(html, htmlConfig, opts) {
474
+ const shouldKeepWithConfig = (element) => {
475
+ return shouldKeepCombined(element, htmlConfig);
476
+ };
477
+ const document = createSanitizedDocument(html);
478
+ const body = findBody(document);
479
+ if (!body)
480
+ return html;
481
+ filterTree(body, shouldKeepWithConfig, opts?.keepPositions);
482
+ cleanAllElements(body);
483
+ return serialize(document);
484
+ }
485
+ /**
486
+ * Creates text-only snapshot with markdown formatting
487
+ */
488
+ export function htmlTextSnapshot(html, htmlConfig) {
489
+ const document = createSanitizedDocument(html);
490
+ const body = findBody(document);
491
+ if (!body)
492
+ return '';
493
+ const text = processHtmlForText(body, htmlConfig);
494
+ return text.trim();
495
+ }
496
+ function processHtmlForText(element, htmlConfig) {
497
+ const lines = [];
498
+ // Helper function to check if element matches include/exclude selectors
499
+ const shouldIncludeElement = (el) => {
500
+ if (!htmlConfig)
501
+ return true;
502
+ // If element matches any exclude selector, don't include it
503
+ if (htmlConfig.exclude && matchesAnySelector(el, htmlConfig.exclude)) {
504
+ return false;
505
+ }
506
+ // If no include selectors, include by default
507
+ if (!htmlConfig.include || htmlConfig.include.length === 0) {
508
+ return true;
509
+ }
510
+ // Include if matches any include selector
511
+ return matchesAnySelector(el, htmlConfig.include);
512
+ };
513
+ const processNode = (node) => {
514
+ if (node.nodeName === '#text') {
515
+ // For text nodes, check if parent element should be included
516
+ if (node.parentNode && 'tagName' in node.parentNode) {
517
+ const parentElement = node.parentNode;
518
+ if (!shouldIncludeElement(parentElement)) {
519
+ return;
520
+ }
521
+ }
522
+ const text = node.value.trim();
523
+ if (text.length >= 5) {
524
+ lines.push(text);
525
+ }
526
+ return;
527
+ }
528
+ if ('tagName' in node) {
529
+ const element = node;
530
+ const tagName = element.tagName.toLowerCase();
531
+ // Skip style and script elements completely
532
+ if (['style', 'script'].includes(tagName)) {
533
+ return;
534
+ }
535
+ // Check if element should be included based on configuration
536
+ if (!shouldIncludeElement(element)) {
537
+ // Still process children in case they should be included
538
+ element.childNodes.forEach((child) => processNode(child));
539
+ return;
540
+ }
541
+ // Handle headers specially - they should always be processed as markdown
542
+ if (tagName.startsWith('h')) {
543
+ const text = getTextContent(element).trim();
544
+ if (text) {
545
+ switch (tagName) {
546
+ case 'h1':
547
+ lines.push(`# ${text}`);
548
+ break;
549
+ case 'h2':
550
+ lines.push(`## ${text}`);
551
+ break;
552
+ case 'h3':
553
+ lines.push(`### ${text}`);
554
+ break;
555
+ case 'h4':
556
+ lines.push(`#### ${text}`);
557
+ break;
558
+ case 'h5':
559
+ lines.push(`##### ${text}`);
560
+ break;
561
+ case 'h6':
562
+ lines.push(`###### ${text}`);
563
+ break;
564
+ }
565
+ }
566
+ return;
567
+ }
568
+ // Handle interactive elements specially
569
+ if (shouldKeepInteractive(element)) {
570
+ // Format buttons and links
571
+ if (tagName === 'button' || getAttribute(element, 'role') === 'button') {
572
+ const buttonText = getTextContent(element).trim();
573
+ if (buttonText) {
574
+ lines.push(`[${buttonText}]`);
575
+ }
576
+ else {
577
+ lines.push('[Button]');
578
+ }
579
+ return;
580
+ }
581
+ if (tagName === 'a' || getAttribute(element, 'role') === 'link') {
582
+ const linkText = getTextContent(element).trim();
583
+ if (linkText) {
584
+ lines.push(`[${linkText}]`);
585
+ }
586
+ else {
587
+ lines.push('[Link]');
588
+ }
589
+ return;
590
+ }
591
+ // Format input fields
592
+ if (tagName === 'input') {
593
+ const name = getAttribute(element, 'name') || getAttribute(element, 'id');
594
+ const placeholder = getAttribute(element, 'placeholder');
595
+ const type = getAttribute(element, 'type');
596
+ if (type === 'submit' || type === 'button' || type === 'reset') {
597
+ const value = getAttribute(element, 'value') || type;
598
+ lines.push(`[${value}]`);
599
+ }
600
+ else if (placeholder) {
601
+ lines.push(`{${placeholder}}`);
602
+ }
603
+ else if (name) {
604
+ lines.push(`{${name}}`);
605
+ }
606
+ else {
607
+ lines.push('{Input}');
608
+ }
609
+ return;
610
+ }
611
+ // Format textarea fields
612
+ if (tagName === 'textarea') {
613
+ const name = getAttribute(element, 'name') || getAttribute(element, 'id');
614
+ const placeholder = getAttribute(element, 'placeholder');
615
+ if (placeholder) {
616
+ lines.push(`{${placeholder}}`);
617
+ }
618
+ else if (name) {
619
+ lines.push(`{${name}}`);
620
+ }
621
+ else {
622
+ lines.push('{Textarea}');
623
+ }
624
+ return;
625
+ }
626
+ // Format select fields
627
+ if (tagName === 'select') {
628
+ const name = getAttribute(element, 'name') || getAttribute(element, 'id');
629
+ if (name) {
630
+ lines.push(`{${name}}`);
631
+ }
632
+ else {
633
+ lines.push('{Select}');
634
+ }
635
+ return;
636
+ }
637
+ // For other interactive elements, just process children
638
+ element.childNodes.forEach((child) => processNode(child));
639
+ return;
640
+ }
641
+ // Handle text elements (but not headers - they're handled above)
642
+ if (TEXT_ELEMENT_TAGS.has(tagName) && !tagName.startsWith('h')) {
643
+ // Only get direct text content, not from descendants
644
+ const directText = element.childNodes
645
+ .filter((child) => child.nodeName === '#text')
646
+ .map((child) => child.value)
647
+ .join('')
648
+ .trim();
649
+ // Filter by length (5 chars minimum)
650
+ if (directText.length < 5) {
651
+ // Still process children
652
+ element.childNodes.forEach((child) => processNode(child));
653
+ return;
654
+ }
655
+ if (tagName === 'li' || tagName === 'label') {
656
+ switch (tagName) {
657
+ case 'li': {
658
+ // Handle nested lists
659
+ const indent = hasListParent(element) ? ' ' : '';
660
+ // Get all text content for list items (including descendants)
661
+ const fullText = getTextContent(element).trim();
662
+ lines.push(`${indent}- ${fullText}`);
663
+ break;
664
+ }
665
+ case 'label':
666
+ lines.push(`**${directText}**`);
667
+ break;
668
+ }
669
+ return;
670
+ }
671
+ // For other text elements, check if we should add them
672
+ if (!hasTextAncestor(element)) {
673
+ lines.push(directText);
674
+ }
675
+ // Always process children
676
+ element.childNodes.forEach((child) => processNode(child));
677
+ return;
678
+ }
679
+ // Process children of non-text elements
680
+ element.childNodes.forEach((child) => processNode(child));
681
+ }
682
+ };
683
+ processNode(element);
684
+ // Clean up spacing and trim whitespace
685
+ let result = lines.join('\n\n');
686
+ // Add some structure for better readability
687
+ // Ensure headers have proper spacing
688
+ result = result.replace(/^(#{1,6} .+)$/gm, '\n$1\n');
689
+ // Ensure form elements are grouped with proper spacing
690
+ result = result.replace(/(\{[^}]+\}| \[[^\]]+\])/g, '\n$1');
691
+ // Clean up excessive empty lines
692
+ result = result.replace(/\n{3,}/g, '\n\n');
693
+ // Trim leading/trailing whitespace
694
+ result = result.trim();
695
+ return result;
696
+ }
697
+ // Helper functions
698
+ function findBody(document) {
699
+ const html = document.childNodes.find((node) => node.nodeName === 'html');
700
+ if (!html || !('childNodes' in html))
701
+ return null;
702
+ return html.childNodes.find((node) => node.nodeName === 'body') || null;
703
+ }
704
+ function shouldKeepInteractive(element, selectorConfig) {
705
+ if (hasHiddenClass(element))
706
+ return false;
707
+ if (selectorConfig?.include && matchesAnySelector(element, selectorConfig.include)) {
708
+ return true;
709
+ }
710
+ if (selectorConfig?.exclude && matchesAnySelector(element, selectorConfig.exclude)) {
711
+ return false;
712
+ }
713
+ if (hasExplorbotAttributes(element))
714
+ return true;
715
+ const tagName = element.tagName.toLowerCase();
716
+ if (tagName === 'input') {
717
+ const type = getAttribute(element, 'type');
718
+ if (type && type.toLowerCase() === 'hidden')
719
+ return false;
720
+ }
721
+ if (INTERACTIVE_TAGS.has(tagName))
722
+ return true;
723
+ const role = getAttribute(element, 'role');
724
+ if (role && INTERACTIVE_ROLES.has(role.toLowerCase()))
725
+ return true;
726
+ for (const attr of element.attrs ?? []) {
727
+ const attrName = attr.name.toLowerCase();
728
+ if (INTERACTIVE_EVENT_ATTRIBUTES.has(attrName))
729
+ return true;
730
+ if (attrName === 'contenteditable')
731
+ return true;
732
+ if (attrName === 'tabindex')
733
+ return true;
734
+ }
735
+ return false;
736
+ }
737
+ function shouldKeepCombined(element, htmlConfig) {
738
+ if (hasHiddenClass(element))
739
+ return false;
740
+ if (htmlConfig?.include && matchesAnySelector(element, htmlConfig.include)) {
741
+ return true;
742
+ }
743
+ if (htmlConfig?.exclude && matchesAnySelector(element, htmlConfig.exclude)) {
744
+ return false;
745
+ }
746
+ if (hasExplorbotAttributes(element))
747
+ return true;
748
+ if (getAttribute(element, 'role'))
749
+ return true;
750
+ if (shouldKeepInteractive(element, htmlConfig))
751
+ return true;
752
+ const tagName = element.tagName.toLowerCase();
753
+ if (tagName === 'svg' && getAttribute(element, 'class'))
754
+ return true;
755
+ if (TEXT_ELEMENT_TAGS.has(tagName)) {
756
+ if (tagName.startsWith('h'))
757
+ return true;
758
+ const text = getTextContent(element).trim();
759
+ if (text.length <= 5)
760
+ return false;
761
+ return true;
762
+ }
763
+ return hasKeepableChildren(element, htmlConfig);
764
+ }
765
+ function hasKeepableChildren(element, htmlConfig) {
766
+ if (!element.childNodes)
767
+ return false;
768
+ for (const child of element.childNodes) {
769
+ if ('tagName' in child) {
770
+ if (shouldKeepCombined(child, htmlConfig)) {
771
+ return true;
772
+ }
773
+ }
774
+ else if (child.nodeName === '#text') {
775
+ // Also consider direct text content
776
+ const text = child.value.trim();
777
+ if (text.length >= 5) {
778
+ return true;
779
+ }
780
+ }
781
+ }
782
+ return false;
783
+ }
784
+ function hasTextAncestor(element) {
785
+ let parent = element.parentNode;
786
+ while (parent && 'tagName' in parent) {
787
+ const parentElement = parent;
788
+ const parentTagName = parentElement.tagName.toLowerCase();
789
+ if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'p', 'td', 'th', 'label'].includes(parentTagName)) {
790
+ return true;
791
+ }
792
+ parent = parent.parentNode;
793
+ }
794
+ return false;
795
+ }
796
+ function hasListParent(element) {
797
+ let parent = element.parentNode;
798
+ while (parent && 'tagName' in parent) {
799
+ if (parent.parentNode && 'tagName' in parent.parentNode) {
800
+ const grandParent = parent.parentNode;
801
+ const grandParentTagName = grandParent.tagName.toLowerCase();
802
+ if (['ul', 'ol'].includes(grandParentTagName)) {
803
+ return true;
804
+ }
805
+ }
806
+ parent = parent.parentNode;
807
+ }
808
+ return false;
809
+ }
810
+ function isInteractiveContainer(element) {
811
+ const tagName = element.tagName.toLowerCase();
812
+ if (tagName === 'button')
813
+ return true;
814
+ if (getAttribute(element, 'role'))
815
+ return true;
816
+ return false;
817
+ }
818
+ function filterTree(element, shouldKeep, keepPositions) {
819
+ if (!element.childNodes)
820
+ return false;
821
+ const isInteractive = isInteractiveContainer(element);
822
+ let hasKeepableElementChildren = false;
823
+ let hasTextContent = false;
824
+ const children = [...element.childNodes];
825
+ for (let i = children.length - 1; i >= 0; i--) {
826
+ const child = children[i];
827
+ if ('tagName' in child) {
828
+ const childElement = child;
829
+ const childHasContent = filterTree(childElement, shouldKeep, keepPositions);
830
+ if (isInteractive) {
831
+ hasKeepableElementChildren = true;
832
+ cleanElement(childElement);
833
+ continue;
834
+ }
835
+ if (!shouldKeep(childElement)) {
836
+ if (hasHiddenClass(childElement) || !childHasContent) {
837
+ const index = element.childNodes.indexOf(child);
838
+ if (index > -1) {
839
+ if (keepPositions) {
840
+ emptyElement(childElement);
841
+ }
842
+ else {
843
+ element.childNodes.splice(index, 1);
844
+ }
845
+ }
846
+ }
847
+ else {
848
+ hasKeepableElementChildren = true;
849
+ }
850
+ continue;
851
+ }
852
+ hasKeepableElementChildren = true;
853
+ cleanElement(childElement);
854
+ }
855
+ else if (child.nodeName === '#text') {
856
+ const text = child.value.trim();
857
+ if (text.length > 0) {
858
+ hasTextContent = true;
859
+ }
860
+ else {
861
+ const index = element.childNodes.indexOf(child);
862
+ if (index > -1) {
863
+ element.childNodes.splice(index, 1);
864
+ }
865
+ }
866
+ }
867
+ }
868
+ if (shouldKeep(element)) {
869
+ return true;
870
+ }
871
+ if (hasKeepableElementChildren) {
872
+ return true;
873
+ }
874
+ const tagName = element.tagName.toLowerCase();
875
+ const isTextElement = TEXT_ELEMENT_TAGS.has(tagName);
876
+ if (isTextElement && hasTextContent) {
877
+ const text = getTextContent(element).trim();
878
+ return text.length > 5;
879
+ }
880
+ return hasTextContent;
881
+ }
882
+ function emptyElement(element) {
883
+ element.childNodes = [];
884
+ element.attrs = [];
885
+ }
886
+ function cleanAllElements(element) {
887
+ cleanElement(element);
888
+ if (!element.childNodes)
889
+ return;
890
+ for (const child of element.childNodes) {
891
+ if ('tagName' in child) {
892
+ cleanAllElements(child);
893
+ }
894
+ }
895
+ }
896
+ function cleanElement(element) {
897
+ if (element.tagName.toLowerCase() === 'svg') {
898
+ element.attrs = element.attrs.filter((attr) => attr.name === 'class');
899
+ element.childNodes = [];
900
+ }
901
+ const keepAttrs = [
902
+ 'id',
903
+ 'class',
904
+ 'name',
905
+ 'type',
906
+ 'value',
907
+ 'placeholder',
908
+ 'aria-label',
909
+ 'aria-labelledby',
910
+ 'aria-describedby',
911
+ 'aria-owns',
912
+ 'role',
913
+ 'title',
914
+ 'href',
915
+ 'src',
916
+ 'tabindex',
917
+ 'contenteditable',
918
+ 'onclick',
919
+ 'onmousedown',
920
+ 'onmouseup',
921
+ 'onchange',
922
+ 'onfocus',
923
+ 'required',
924
+ 'disabled',
925
+ 'checked',
926
+ 'selected',
927
+ 'action',
928
+ 'key',
929
+ 'label',
930
+ 'important',
931
+ 'eidx',
932
+ ];
933
+ convertExplorbotAttributes(element);
934
+ element.attrs = element.attrs.filter((attr) => keepAttrs.includes(attr.name) || attr.name.startsWith('data-explorbot-'));
935
+ for (const attr of element.attrs) {
936
+ if (attr.name === 'class') {
937
+ attr.value = attr.value
938
+ .split(/\s+/)
939
+ .filter((className) => !/\d/.test(className))
940
+ .filter((className) => !TAILWIND_CLASS_PATTERNS.some((pattern) => pattern.test(className)))
941
+ .join(' ');
942
+ if (!attr.value) {
943
+ element.attrs = element.attrs.filter((a) => a.name !== 'class');
944
+ }
945
+ }
946
+ }
947
+ if (element.tagName.toLowerCase() === 'script') {
948
+ element.childNodes = [];
949
+ }
950
+ }
951
+ function getTextContent(element) {
952
+ let text = '';
953
+ function processNode(node) {
954
+ if (node.nodeName === '#text') {
955
+ text += node.value;
956
+ }
957
+ else if ('childNodes' in node) {
958
+ node.childNodes.forEach(processNode);
959
+ }
960
+ }
961
+ processNode(element);
962
+ return text.trim();
963
+ }
964
+ function getAttribute(element, name) {
965
+ const attr = element.attrs.find((a) => a.name === name);
966
+ return attr?.value;
967
+ }
968
+ function hasHiddenClass(element) {
969
+ const classAttr = element.attrs.find((attr) => attr.name === 'class');
970
+ if (!classAttr)
971
+ return false;
972
+ const classes = classAttr.value.split(/\s+/);
973
+ return classes.some((className) => HIDDEN_CLASSES.has(className));
974
+ }
975
+ /**
976
+ * Check if element has any data-explorbot-* attributes
977
+ */
978
+ function hasExplorbotAttributes(element) {
979
+ return element.attrs?.some((attr) => attr.name.startsWith('data-explorbot-'));
980
+ }
981
+ /**
982
+ * Convert data-explorbot-* attributes to regular attributes
983
+ * e.g., data-explorbot-value becomes value
984
+ */
985
+ function convertExplorbotAttributes(element) {
986
+ const explorbotAttrs = [];
987
+ element.attrs = element.attrs.filter((attr) => {
988
+ if (attr.name.startsWith('data-explorbot-')) {
989
+ const regularName = attr.name.replace('data-explorbot-', '');
990
+ explorbotAttrs.push({ name: regularName, value: attr.value });
991
+ return false;
992
+ }
993
+ return true;
994
+ });
995
+ element.attrs.push(...explorbotAttrs);
996
+ }
997
+ function sanitizeLinkTitle(text) {
998
+ return text.replace(/\s+/g, ' ').trim();
999
+ }
1000
+ export function extractLinks(html) {
1001
+ const document = parseFragment(html);
1002
+ const links = [];
1003
+ const seen = new Set();
1004
+ const skipPrefixes = ['javascript:', 'mailto:', 'tel:', '#'];
1005
+ function traverseNodes(node) {
1006
+ if ('tagName' in node) {
1007
+ const element = node;
1008
+ const tagName = element.tagName.toLowerCase();
1009
+ if (tagName === 'a') {
1010
+ const href = getAttribute(element, 'href');
1011
+ if (href) {
1012
+ const shouldSkip = skipPrefixes.some((prefix) => href.startsWith(prefix));
1013
+ if (!shouldSkip) {
1014
+ const rawTitle = getAttribute(element, 'aria-label') || getTextContent(element);
1015
+ const title = sanitizeLinkTitle(rawTitle);
1016
+ if (title && title.length <= 100) {
1017
+ const key = `${href}|${title}`;
1018
+ if (!seen.has(key)) {
1019
+ seen.add(key);
1020
+ links.push({ title, url: href });
1021
+ }
1022
+ }
1023
+ }
1024
+ }
1025
+ }
1026
+ }
1027
+ if ('childNodes' in node) {
1028
+ for (const child of node.childNodes) {
1029
+ traverseNodes(child);
1030
+ }
1031
+ }
1032
+ }
1033
+ traverseNodes(document);
1034
+ return links;
1035
+ }
1036
+ export function extractHeadings(html) {
1037
+ const document = parseFragment(html);
1038
+ const headings = {
1039
+ h1: [],
1040
+ h2: [],
1041
+ h3: [],
1042
+ h4: [],
1043
+ };
1044
+ function traverseNodes(node) {
1045
+ if ('tagName' in node) {
1046
+ const element = node;
1047
+ const tagName = element.tagName.toLowerCase();
1048
+ if (tagName === 'h1' || tagName === 'h2' || tagName === 'h3' || tagName === 'h4') {
1049
+ const text = getTextContent(element).trim();
1050
+ if (text) {
1051
+ headings[tagName].push(text);
1052
+ }
1053
+ }
1054
+ }
1055
+ if ('childNodes' in node) {
1056
+ for (const child of node.childNodes) {
1057
+ traverseNodes(child);
1058
+ }
1059
+ }
1060
+ }
1061
+ traverseNodes(document);
1062
+ const result = {};
1063
+ if (headings.h1.length > 0) {
1064
+ result.h1 = headings.h1.join(' | ');
1065
+ }
1066
+ if (headings.h2.length > 0) {
1067
+ result.h2 = headings.h2.join(' | ');
1068
+ }
1069
+ if (headings.h3.length > 0) {
1070
+ result.h3 = headings.h3.join(' | ');
1071
+ }
1072
+ if (headings.h4.length > 0) {
1073
+ result.h4 = headings.h4.join(' | ');
1074
+ }
1075
+ return result;
1076
+ }
1077
+ export function codeToMarkdown(code) {
1078
+ return `
1079
+ \`\`\`
1080
+ ${code}
1081
+ \`\`\`
1082
+ `;
1083
+ }
1084
+ export function isBodyEmpty(html) {
1085
+ if (!html)
1086
+ return true;
1087
+ const bodyMatch = html.match(/<body[^>]*>(.*?)<\/body>/is);
1088
+ if (!bodyMatch)
1089
+ return true;
1090
+ const bodyContent = bodyMatch[1].trim();
1091
+ return bodyContent === '';
1092
+ }
1093
+ /**
1094
+ * Extract HTML snippet around a targeted element based on locator
1095
+ * Used for accessibility analysis to show what element was being targeted
1096
+ */
1097
+ export function extractTargetedHtml(html, locator) {
1098
+ if (!html || !locator)
1099
+ return '';
1100
+ const searchTerms = [];
1101
+ // XPath locator
1102
+ if (locator.startsWith('//') || locator.startsWith('(//')) {
1103
+ const textMatch = locator.match(/text\(\)\s*=\s*['"]([^'"]+)['"]/i);
1104
+ if (textMatch)
1105
+ searchTerms.push(textMatch[1]);
1106
+ const attrMatch = locator.match(/@[\w-]+\s*=\s*['"]([^'"]+)['"]/g);
1107
+ if (attrMatch) {
1108
+ for (const match of attrMatch) {
1109
+ const valueMatch = match.match(/['"]([^'"]+)['"]/);
1110
+ if (valueMatch)
1111
+ searchTerms.push(valueMatch[1]);
1112
+ }
1113
+ }
1114
+ }
1115
+ else if (locator.startsWith('{')) {
1116
+ // JSON locator (Playwright-style)
1117
+ try {
1118
+ const parsed = JSON.parse(locator);
1119
+ if (parsed.text)
1120
+ searchTerms.push(parsed.text);
1121
+ if (parsed.name)
1122
+ searchTerms.push(parsed.name);
1123
+ }
1124
+ catch { }
1125
+ }
1126
+ else {
1127
+ // CSS selector or text
1128
+ const cleanLoc = locator.replace(/['"]/g, '');
1129
+ if (!cleanLoc.startsWith('.') && !cleanLoc.startsWith('#') && !cleanLoc.includes('[')) {
1130
+ searchTerms.push(cleanLoc);
1131
+ }
1132
+ const classMatch = cleanLoc.match(/\.([a-zA-Z0-9_-]+)/);
1133
+ if (classMatch)
1134
+ searchTerms.push(classMatch[1]);
1135
+ const idMatch = cleanLoc.match(/#([a-zA-Z0-9_-]+)/);
1136
+ if (idMatch)
1137
+ searchTerms.push(idMatch[1]);
1138
+ }
1139
+ for (const term of searchTerms) {
1140
+ if (term.length < 2)
1141
+ continue;
1142
+ const idx = html.indexOf(term);
1143
+ if (idx === -1)
1144
+ continue;
1145
+ const start = Math.max(0, html.lastIndexOf('<', idx));
1146
+ let depth = 0;
1147
+ let end = start;
1148
+ for (let i = start; i < html.length && i < start + 1000; i++) {
1149
+ if (html[i] === '<' && html[i + 1] !== '/')
1150
+ depth++;
1151
+ if (html[i] === '<' && html[i + 1] === '/')
1152
+ depth--;
1153
+ if (depth === 0 && html[i] === '>') {
1154
+ end = i + 1;
1155
+ break;
1156
+ }
1157
+ }
1158
+ const snippet = html.slice(start, Math.min(end, start + 500));
1159
+ return snippet.trim();
1160
+ }
1161
+ return '';
1162
+ }
1163
+ //# sourceMappingURL=html.js.map