explorbot 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. package/LICENSE +94 -0
  2. package/README.md +267 -0
  3. package/assets/sample-files/sample.docx +0 -0
  4. package/assets/sample-files/sample.mp3 +0 -0
  5. package/assets/sample-files/sample.mp4 +0 -0
  6. package/assets/sample-files/sample.pdf +21 -0
  7. package/assets/sample-files/sample.png +0 -0
  8. package/assets/sample-files/sample.xlsx +0 -0
  9. package/assets/sample-files/sample.zip +0 -0
  10. package/dist/assets/sample-files/sample.docx +0 -0
  11. package/dist/assets/sample-files/sample.mp3 +0 -0
  12. package/dist/assets/sample-files/sample.mp4 +0 -0
  13. package/dist/assets/sample-files/sample.pdf +21 -0
  14. package/dist/assets/sample-files/sample.png +0 -0
  15. package/dist/assets/sample-files/sample.xlsx +0 -0
  16. package/dist/assets/sample-files/sample.zip +0 -0
  17. package/dist/bin/explorbot-cli.js +683 -0
  18. package/dist/bin/explorbot-cli.js.map +1 -0
  19. package/dist/boat/api-tester/bin/apibot-cli.js +5 -0
  20. package/dist/boat/api-tester/bin/apibot-cli.js.map +1 -0
  21. package/dist/boat/api-tester/example/apibot.config.js +31 -0
  22. package/dist/boat/api-tester/example/apibot.config.js.map +1 -0
  23. package/dist/boat/api-tester/src/ai/chief/styles.js +13 -0
  24. package/dist/boat/api-tester/src/ai/chief/styles.js.map +1 -0
  25. package/dist/boat/api-tester/src/ai/chief.js +301 -0
  26. package/dist/boat/api-tester/src/ai/chief.js.map +1 -0
  27. package/dist/boat/api-tester/src/ai/curler-tools.js +263 -0
  28. package/dist/boat/api-tester/src/ai/curler-tools.js.map +1 -0
  29. package/dist/boat/api-tester/src/ai/curler.js +271 -0
  30. package/dist/boat/api-tester/src/ai/curler.js.map +1 -0
  31. package/dist/boat/api-tester/src/api-client.js +26 -0
  32. package/dist/boat/api-tester/src/api-client.js.map +1 -0
  33. package/dist/boat/api-tester/src/apibot.js +166 -0
  34. package/dist/boat/api-tester/src/apibot.js.map +1 -0
  35. package/dist/boat/api-tester/src/cli.js +262 -0
  36. package/dist/boat/api-tester/src/cli.js.map +1 -0
  37. package/dist/boat/api-tester/src/config.js +159 -0
  38. package/dist/boat/api-tester/src/config.js.map +1 -0
  39. package/dist/prompts/audit-rules.md +124 -0
  40. package/dist/rules/chief/general.md +11 -0
  41. package/dist/rules/chief/styles/curious.md +12 -0
  42. package/dist/rules/chief/styles/hacker.md +19 -0
  43. package/dist/rules/chief/styles/normal.md +11 -0
  44. package/dist/rules/chief/styles/psycho.md +17 -0
  45. package/dist/rules/navigator/multiple-locator.md +47 -0
  46. package/dist/rules/navigator/output.md +69 -0
  47. package/dist/rules/navigator/verification-actions.md +122 -0
  48. package/dist/rules/navigator/verification-output.md +53 -0
  49. package/dist/rules/planner/styles/curious.md +39 -0
  50. package/dist/rules/planner/styles/normal.md +21 -0
  51. package/dist/rules/planner/styles/psycho.md +14 -0
  52. package/dist/rules/researcher/list-element.md +11 -0
  53. package/dist/rules/researcher/screenshot-ui-map.md +30 -0
  54. package/dist/rules/researcher/section-ui-map.md +18 -0
  55. package/dist/rules/researcher/ui-map-table.md +18 -0
  56. package/dist/src/action-result.js +574 -0
  57. package/dist/src/action-result.js.map +1 -0
  58. package/dist/src/action.js +388 -0
  59. package/dist/src/action.js.map +1 -0
  60. package/dist/src/activity.js +86 -0
  61. package/dist/src/activity.js.map +1 -0
  62. package/dist/src/ai/agent.js +2 -0
  63. package/dist/src/ai/agent.js.map +1 -0
  64. package/dist/src/ai/bosun.js +443 -0
  65. package/dist/src/ai/bosun.js.map +1 -0
  66. package/dist/src/ai/captain/idle-mode.js +102 -0
  67. package/dist/src/ai/captain/idle-mode.js.map +1 -0
  68. package/dist/src/ai/captain/mixin.js +11 -0
  69. package/dist/src/ai/captain/mixin.js.map +1 -0
  70. package/dist/src/ai/captain/test-mode.js +251 -0
  71. package/dist/src/ai/captain/test-mode.js.map +1 -0
  72. package/dist/src/ai/captain/web-mode.js +124 -0
  73. package/dist/src/ai/captain/web-mode.js.map +1 -0
  74. package/dist/src/ai/captain.js +442 -0
  75. package/dist/src/ai/captain.js.map +1 -0
  76. package/dist/src/ai/conversation.js +176 -0
  77. package/dist/src/ai/conversation.js.map +1 -0
  78. package/dist/src/ai/experience-compactor.js +232 -0
  79. package/dist/src/ai/experience-compactor.js.map +1 -0
  80. package/dist/src/ai/fisherman-tools.js +154 -0
  81. package/dist/src/ai/fisherman-tools.js.map +1 -0
  82. package/dist/src/ai/fisherman.js +184 -0
  83. package/dist/src/ai/fisherman.js.map +1 -0
  84. package/dist/src/ai/historian.js +384 -0
  85. package/dist/src/ai/historian.js.map +1 -0
  86. package/dist/src/ai/navigator.js +493 -0
  87. package/dist/src/ai/navigator.js.map +1 -0
  88. package/dist/src/ai/pilot.js +684 -0
  89. package/dist/src/ai/pilot.js.map +1 -0
  90. package/dist/src/ai/planner/session-dedup.js +28 -0
  91. package/dist/src/ai/planner/session-dedup.js.map +1 -0
  92. package/dist/src/ai/planner/styles.js +15 -0
  93. package/dist/src/ai/planner/styles.js.map +1 -0
  94. package/dist/src/ai/planner/subpages.js +118 -0
  95. package/dist/src/ai/planner/subpages.js.map +1 -0
  96. package/dist/src/ai/planner.js +486 -0
  97. package/dist/src/ai/planner.js.map +1 -0
  98. package/dist/src/ai/provider.js +540 -0
  99. package/dist/src/ai/provider.js.map +1 -0
  100. package/dist/src/ai/quartermaster.js +210 -0
  101. package/dist/src/ai/quartermaster.js.map +1 -0
  102. package/dist/src/ai/researcher/cache.js +95 -0
  103. package/dist/src/ai/researcher/cache.js.map +1 -0
  104. package/dist/src/ai/researcher/coordinates.js +210 -0
  105. package/dist/src/ai/researcher/coordinates.js.map +1 -0
  106. package/dist/src/ai/researcher/deep-analysis.js +364 -0
  107. package/dist/src/ai/researcher/deep-analysis.js.map +1 -0
  108. package/dist/src/ai/researcher/fingerprint-worker.js +46 -0
  109. package/dist/src/ai/researcher/fingerprint-worker.js.map +1 -0
  110. package/dist/src/ai/researcher/focus.js +37 -0
  111. package/dist/src/ai/researcher/focus.js.map +1 -0
  112. package/dist/src/ai/researcher/locators.js +242 -0
  113. package/dist/src/ai/researcher/locators.js.map +1 -0
  114. package/dist/src/ai/researcher/mixin.js +3 -0
  115. package/dist/src/ai/researcher/mixin.js.map +1 -0
  116. package/dist/src/ai/researcher/parser.js +160 -0
  117. package/dist/src/ai/researcher/parser.js.map +1 -0
  118. package/dist/src/ai/researcher/research-result.js +110 -0
  119. package/dist/src/ai/researcher/research-result.js.map +1 -0
  120. package/dist/src/ai/researcher.js +776 -0
  121. package/dist/src/ai/researcher.js.map +1 -0
  122. package/dist/src/ai/rules.js +368 -0
  123. package/dist/src/ai/rules.js.map +1 -0
  124. package/dist/src/ai/task-agent.js +110 -0
  125. package/dist/src/ai/task-agent.js.map +1 -0
  126. package/dist/src/ai/tester.js +840 -0
  127. package/dist/src/ai/tester.js.map +1 -0
  128. package/dist/src/ai/tools.js +980 -0
  129. package/dist/src/ai/tools.js.map +1 -0
  130. package/dist/src/api/api-client.js +91 -0
  131. package/dist/src/api/api-client.js.map +1 -0
  132. package/dist/src/api/request-result.js +177 -0
  133. package/dist/src/api/request-result.js.map +1 -0
  134. package/dist/src/api/request-store.js +109 -0
  135. package/dist/src/api/request-store.js.map +1 -0
  136. package/dist/src/api/spec-reader.js +148 -0
  137. package/dist/src/api/spec-reader.js.map +1 -0
  138. package/dist/src/api/xhr-capture.js +91 -0
  139. package/dist/src/api/xhr-capture.js.map +1 -0
  140. package/dist/src/browser-server.js +67 -0
  141. package/dist/src/browser-server.js.map +1 -0
  142. package/dist/src/command-handler.js +363 -0
  143. package/dist/src/command-handler.js.map +1 -0
  144. package/dist/src/commands/add-rule-command.js +52 -0
  145. package/dist/src/commands/add-rule-command.js.map +1 -0
  146. package/dist/src/commands/base-command.js +14 -0
  147. package/dist/src/commands/base-command.js.map +1 -0
  148. package/dist/src/commands/clean-command.js +67 -0
  149. package/dist/src/commands/clean-command.js.map +1 -0
  150. package/dist/src/commands/context-aria-command.js +18 -0
  151. package/dist/src/commands/context-aria-command.js.map +1 -0
  152. package/dist/src/commands/context-command.js +57 -0
  153. package/dist/src/commands/context-command.js.map +1 -0
  154. package/dist/src/commands/context-data-command.js +25 -0
  155. package/dist/src/commands/context-data-command.js.map +1 -0
  156. package/dist/src/commands/context-experience-command.js +41 -0
  157. package/dist/src/commands/context-experience-command.js.map +1 -0
  158. package/dist/src/commands/context-html-command.js +26 -0
  159. package/dist/src/commands/context-html-command.js.map +1 -0
  160. package/dist/src/commands/context-knowledge-command.js +36 -0
  161. package/dist/src/commands/context-knowledge-command.js.map +1 -0
  162. package/dist/src/commands/debug-command.js +12 -0
  163. package/dist/src/commands/debug-command.js.map +1 -0
  164. package/dist/src/commands/drill-command.js +29 -0
  165. package/dist/src/commands/drill-command.js.map +1 -0
  166. package/dist/src/commands/exit-command.js +26 -0
  167. package/dist/src/commands/exit-command.js.map +1 -0
  168. package/dist/src/commands/explore-command.js +124 -0
  169. package/dist/src/commands/explore-command.js.map +1 -0
  170. package/dist/src/commands/freesail-command.js +84 -0
  171. package/dist/src/commands/freesail-command.js.map +1 -0
  172. package/dist/src/commands/help-command.js +7 -0
  173. package/dist/src/commands/help-command.js.map +1 -0
  174. package/dist/src/commands/index.js +63 -0
  175. package/dist/src/commands/index.js.map +1 -0
  176. package/dist/src/commands/knows-command.js +54 -0
  177. package/dist/src/commands/knows-command.js.map +1 -0
  178. package/dist/src/commands/learn-command.js +35 -0
  179. package/dist/src/commands/learn-command.js.map +1 -0
  180. package/dist/src/commands/navigate-command.js +16 -0
  181. package/dist/src/commands/navigate-command.js.map +1 -0
  182. package/dist/src/commands/path-command.js +70 -0
  183. package/dist/src/commands/path-command.js.map +1 -0
  184. package/dist/src/commands/plan-clear-command.js +13 -0
  185. package/dist/src/commands/plan-clear-command.js.map +1 -0
  186. package/dist/src/commands/plan-command.js +36 -0
  187. package/dist/src/commands/plan-command.js.map +1 -0
  188. package/dist/src/commands/plan-edit-command.js +8 -0
  189. package/dist/src/commands/plan-edit-command.js.map +1 -0
  190. package/dist/src/commands/plan-load-command.js +16 -0
  191. package/dist/src/commands/plan-load-command.js.map +1 -0
  192. package/dist/src/commands/plan-reload-command.js +23 -0
  193. package/dist/src/commands/plan-reload-command.js.map +1 -0
  194. package/dist/src/commands/plan-save-command.js +22 -0
  195. package/dist/src/commands/plan-save-command.js.map +1 -0
  196. package/dist/src/commands/research-command.js +38 -0
  197. package/dist/src/commands/research-command.js.map +1 -0
  198. package/dist/src/commands/start-command.js +12 -0
  199. package/dist/src/commands/start-command.js.map +1 -0
  200. package/dist/src/commands/status-command.js +19 -0
  201. package/dist/src/commands/status-command.js.map +1 -0
  202. package/dist/src/commands/test-command.js +85 -0
  203. package/dist/src/commands/test-command.js.map +1 -0
  204. package/dist/src/components/ActivityPane.js +55 -0
  205. package/dist/src/components/ActivityPane.js.map +1 -0
  206. package/dist/src/components/AddKnowledge.js +122 -0
  207. package/dist/src/components/AddKnowledge.js.map +1 -0
  208. package/dist/src/components/AddRule.js +117 -0
  209. package/dist/src/components/AddRule.js.map +1 -0
  210. package/dist/src/components/App.js +313 -0
  211. package/dist/src/components/App.js.map +1 -0
  212. package/dist/src/components/Autocomplete.js +43 -0
  213. package/dist/src/components/Autocomplete.js.map +1 -0
  214. package/dist/src/components/InputPane.js +207 -0
  215. package/dist/src/components/InputPane.js.map +1 -0
  216. package/dist/src/components/InputReadline.js +598 -0
  217. package/dist/src/components/InputReadline.js.map +1 -0
  218. package/dist/src/components/LogPane.js +123 -0
  219. package/dist/src/components/LogPane.js.map +1 -0
  220. package/dist/src/components/PlanEditor.js +126 -0
  221. package/dist/src/components/PlanEditor.js.map +1 -0
  222. package/dist/src/components/PlanPane.js +51 -0
  223. package/dist/src/components/PlanPane.js.map +1 -0
  224. package/dist/src/components/SessionTimer.js +26 -0
  225. package/dist/src/components/SessionTimer.js.map +1 -0
  226. package/dist/src/components/StateTransitionPane.js +107 -0
  227. package/dist/src/components/StateTransitionPane.js.map +1 -0
  228. package/dist/src/components/StatusPane.js +37 -0
  229. package/dist/src/components/StatusPane.js.map +1 -0
  230. package/dist/src/components/TaskPane.js +96 -0
  231. package/dist/src/components/TaskPane.js.map +1 -0
  232. package/dist/src/components/Welcome.js +52 -0
  233. package/dist/src/components/Welcome.js.map +1 -0
  234. package/dist/src/components/WelcomeChecklist.js +96 -0
  235. package/dist/src/components/WelcomeChecklist.js.map +1 -0
  236. package/dist/src/components/WelcomeCommands.js +61 -0
  237. package/dist/src/components/WelcomeCommands.js.map +1 -0
  238. package/dist/src/components/autocomplete-store.js +22 -0
  239. package/dist/src/components/autocomplete-store.js.map +1 -0
  240. package/dist/src/components/parse-keypress.js +174 -0
  241. package/dist/src/components/parse-keypress.js.map +1 -0
  242. package/dist/src/config.js +249 -0
  243. package/dist/src/config.js.map +1 -0
  244. package/dist/src/execution-controller.js +92 -0
  245. package/dist/src/execution-controller.js.map +1 -0
  246. package/dist/src/experience-tracker.js +294 -0
  247. package/dist/src/experience-tracker.js.map +1 -0
  248. package/dist/src/explorbot.js +348 -0
  249. package/dist/src/explorbot.js.map +1 -0
  250. package/dist/src/explorer.js +611 -0
  251. package/dist/src/explorer.js.map +1 -0
  252. package/dist/src/index.js +56 -0
  253. package/dist/src/index.js.map +1 -0
  254. package/dist/src/knowledge-tracker.js +184 -0
  255. package/dist/src/knowledge-tracker.js.map +1 -0
  256. package/dist/src/observability.js +126 -0
  257. package/dist/src/observability.js.map +1 -0
  258. package/dist/src/reporter.js +185 -0
  259. package/dist/src/reporter.js.map +1 -0
  260. package/dist/src/state-manager.js +427 -0
  261. package/dist/src/state-manager.js.map +1 -0
  262. package/dist/src/stats.js +44 -0
  263. package/dist/src/stats.js.map +1 -0
  264. package/dist/src/test-plan.js +343 -0
  265. package/dist/src/test-plan.js.map +1 -0
  266. package/dist/src/utils/aria.js +588 -0
  267. package/dist/src/utils/aria.js.map +1 -0
  268. package/dist/src/utils/code-extractor.js +21 -0
  269. package/dist/src/utils/code-extractor.js.map +1 -0
  270. package/dist/src/utils/context-formatter.js +205 -0
  271. package/dist/src/utils/context-formatter.js.map +1 -0
  272. package/dist/src/utils/error-page.js +19 -0
  273. package/dist/src/utils/error-page.js.map +1 -0
  274. package/dist/src/utils/expandable.js +35 -0
  275. package/dist/src/utils/expandable.js.map +1 -0
  276. package/dist/src/utils/hooks-runner.js +77 -0
  277. package/dist/src/utils/hooks-runner.js.map +1 -0
  278. package/dist/src/utils/html-diff.js +734 -0
  279. package/dist/src/utils/html-diff.js.map +1 -0
  280. package/dist/src/utils/html.js +1163 -0
  281. package/dist/src/utils/html.js.map +1 -0
  282. package/dist/src/utils/logger.js +465 -0
  283. package/dist/src/utils/logger.js.map +1 -0
  284. package/dist/src/utils/loop.js +126 -0
  285. package/dist/src/utils/loop.js.map +1 -0
  286. package/dist/src/utils/markdown-parser.js +117 -0
  287. package/dist/src/utils/markdown-parser.js.map +1 -0
  288. package/dist/src/utils/markdown-query.js +393 -0
  289. package/dist/src/utils/markdown-query.js.map +1 -0
  290. package/dist/src/utils/markdown-terminal.js +40 -0
  291. package/dist/src/utils/markdown-terminal.js.map +1 -0
  292. package/dist/src/utils/research-parser.js +2 -0
  293. package/dist/src/utils/research-parser.js.map +1 -0
  294. package/dist/src/utils/retry.js +55 -0
  295. package/dist/src/utils/retry.js.map +1 -0
  296. package/dist/src/utils/rules-loader.js +104 -0
  297. package/dist/src/utils/rules-loader.js.map +1 -0
  298. package/dist/src/utils/strings.js +14 -0
  299. package/dist/src/utils/strings.js.map +1 -0
  300. package/dist/src/utils/test-plan-markdown.js +301 -0
  301. package/dist/src/utils/test-plan-markdown.js.map +1 -0
  302. package/dist/src/utils/throttle.js +16 -0
  303. package/dist/src/utils/throttle.js.map +1 -0
  304. package/dist/src/utils/unique-names.js +13 -0
  305. package/dist/src/utils/unique-names.js.map +1 -0
  306. package/dist/src/utils/url-matcher.js +48 -0
  307. package/dist/src/utils/url-matcher.js.map +1 -0
  308. package/dist/src/utils/web-element.js +131 -0
  309. package/dist/src/utils/web-element.js.map +1 -0
  310. package/dist/src/utils/xpath.js +110 -0
  311. package/dist/src/utils/xpath.js.map +1 -0
  312. package/package.json +119 -0
  313. package/prompts/audit-rules.md +124 -0
  314. package/rules/chief/general.md +11 -0
  315. package/rules/chief/styles/curious.md +12 -0
  316. package/rules/chief/styles/hacker.md +19 -0
  317. package/rules/chief/styles/normal.md +11 -0
  318. package/rules/chief/styles/psycho.md +17 -0
  319. package/rules/navigator/multiple-locator.md +47 -0
  320. package/rules/navigator/output.md +69 -0
  321. package/rules/navigator/verification-actions.md +122 -0
  322. package/rules/navigator/verification-output.md +53 -0
  323. package/rules/planner/styles/curious.md +39 -0
  324. package/rules/planner/styles/normal.md +21 -0
  325. package/rules/planner/styles/psycho.md +14 -0
  326. package/rules/researcher/list-element.md +11 -0
  327. package/rules/researcher/screenshot-ui-map.md +30 -0
  328. package/rules/researcher/section-ui-map.md +18 -0
  329. package/rules/researcher/ui-map-table.md +18 -0
@@ -0,0 +1,734 @@
1
+ import { parse, serialize } from 'parse5';
2
+ import { TAILWIND_CLASS_PATTERNS, TRASH_HTML_CLASSES, minifyHtml } from "./html.js";
3
+ import { isDynamicId, isGenericClass } from "./xpath.js";
4
+ const IGNORED_PATHS = new Set(['html[1]', 'html[1]/head[1]', 'html[1]/body[1]']);
5
+ /**
6
+ * Get text content from an element node.
7
+ */
8
+ function getTextContent(element) {
9
+ let text = '';
10
+ function processNode(node) {
11
+ if (node.nodeName === '#text') {
12
+ text += node.value;
13
+ }
14
+ else if ('childNodes' in node) {
15
+ node.childNodes.forEach(processNode);
16
+ }
17
+ }
18
+ processNode(element);
19
+ return text;
20
+ }
21
+ /**
22
+ * Extract document title from a parsed HTML document.
23
+ */
24
+ function getDocumentTitle(document) {
25
+ if (!document.childNodes)
26
+ return null;
27
+ const htmlElement = document.childNodes.find((node) => 'tagName' in node && node.tagName?.toLowerCase() === 'html');
28
+ if (!htmlElement || !htmlElement.childNodes) {
29
+ return null;
30
+ }
31
+ const headElement = htmlElement.childNodes.find((node) => 'tagName' in node && node.tagName?.toLowerCase() === 'head');
32
+ if (!headElement || !headElement.childNodes) {
33
+ return null;
34
+ }
35
+ const titleElement = headElement.childNodes.find((node) => 'tagName' in node && node.tagName?.toLowerCase() === 'title');
36
+ if (!titleElement) {
37
+ return null;
38
+ }
39
+ const text = getTextContent(titleElement).trim();
40
+ return text.length > 0 ? text : null;
41
+ }
42
+ /**
43
+ * Ensure document has a title element in head section.
44
+ */
45
+ function ensureDocumentTitle(document, titleText) {
46
+ if (!titleText || !document.childNodes) {
47
+ return;
48
+ }
49
+ const htmlElement = document.childNodes.find((node) => 'tagName' in node && node.tagName?.toLowerCase() === 'html');
50
+ if (!htmlElement) {
51
+ return;
52
+ }
53
+ const namespace = htmlElement.namespaceURI || 'http://www.w3.org/1999/xhtml';
54
+ let headElement = htmlElement.childNodes.find((node) => 'tagName' in node && node.tagName?.toLowerCase() === 'head');
55
+ if (!headElement) {
56
+ headElement = {
57
+ nodeName: 'head',
58
+ tagName: 'head',
59
+ attrs: [],
60
+ namespaceURI: namespace,
61
+ childNodes: [],
62
+ parentNode: htmlElement,
63
+ };
64
+ // Insert head before body if possible, otherwise prepend
65
+ const bodyIndex = htmlElement.childNodes.findIndex((node) => 'tagName' in node && node.tagName?.toLowerCase() === 'body');
66
+ if (bodyIndex === -1) {
67
+ htmlElement.childNodes.push(headElement);
68
+ }
69
+ else {
70
+ htmlElement.childNodes.splice(bodyIndex, 0, headElement);
71
+ }
72
+ }
73
+ else {
74
+ headElement.childNodes = [];
75
+ }
76
+ const titleElement = {
77
+ nodeName: 'title',
78
+ tagName: 'title',
79
+ attrs: [],
80
+ namespaceURI: namespace,
81
+ childNodes: [],
82
+ parentNode: headElement,
83
+ };
84
+ const textNode = {
85
+ nodeName: '#text',
86
+ value: titleText,
87
+ };
88
+ textNode.parentNode = titleElement;
89
+ titleElement.childNodes.push(textNode);
90
+ headElement.childNodes.push(titleElement);
91
+ }
92
+ /**
93
+ * Check if a node has child nodes.
94
+ */
95
+ function hasChildNodes(node) {
96
+ return typeof node === 'object' && node !== null && 'childNodes' in node && Array.isArray(node.childNodes);
97
+ }
98
+ /**
99
+ * Remove elements with specified tags from a node tree.
100
+ */
101
+ function stripElementsByTag(node, tagsToRemove) {
102
+ if (!node.childNodes)
103
+ return;
104
+ for (let i = node.childNodes.length - 1; i >= 0; i--) {
105
+ const child = node.childNodes[i];
106
+ // Remove comments
107
+ if (child.nodeName === '#comment') {
108
+ node.childNodes.splice(i, 1);
109
+ continue;
110
+ }
111
+ // Remove elements with matching tags
112
+ if ('tagName' in child && child.tagName) {
113
+ const tagName = child.tagName.toLowerCase();
114
+ if (tagsToRemove.has(tagName)) {
115
+ node.childNodes.splice(i, 1);
116
+ continue;
117
+ }
118
+ // Recursively process child elements
119
+ stripElementsByTag(child, tagsToRemove);
120
+ }
121
+ else if (hasChildNodes(child)) {
122
+ // Recursively process non-element nodes that have children
123
+ stripElementsByTag(child, tagsToRemove);
124
+ }
125
+ }
126
+ }
127
+ export function computeHtmlFingerprint(html) {
128
+ const document = parseDocument(html);
129
+ const root = getRootNodeForFlatten(document);
130
+ return flattenHtml(root);
131
+ }
132
+ /**
133
+ * Compares two HTML documents and returns differences along with a diff subtree.
134
+ */
135
+ export async function htmlDiff(originalHtml, modifiedHtml, htmlConfig) {
136
+ const originalDocument = parseDocument(originalHtml, htmlConfig);
137
+ const modifiedDocument = parseDocument(modifiedHtml, htmlConfig);
138
+ const originalRoot = getRootNodeForFlatten(originalDocument);
139
+ const modifiedRoot = getRootNodeForFlatten(modifiedDocument);
140
+ const originalLines = flattenHtml(originalRoot);
141
+ const modifiedLines = flattenHtml(modifiedRoot);
142
+ const similarity = calculateSimilarity(originalLines, modifiedLines);
143
+ const { added, removed } = findDifferences(originalLines, modifiedLines);
144
+ const parts = await buildDiffParts(originalDocument, modifiedDocument);
145
+ const structuralAdditions = parts.flatMap((p) => p.added.filter((a) => a.startsWith('ELEMENT:')));
146
+ const allAdded = [...added, ...structuralAdditions];
147
+ const totalChanges = allAdded.length + removed.length;
148
+ const summary = totalChanges === 0 && parts.length > 0 ? 'Structural additions detected' : generateSummary(allAdded, removed, similarity);
149
+ return {
150
+ parts,
151
+ added: allAdded,
152
+ removed,
153
+ similarity,
154
+ summary,
155
+ };
156
+ }
157
+ /**
158
+ * Parse HTML into a document, wrapping fragments with html/body for consistency.
159
+ * Uses custom sanitization that removes iframes for diff purposes.
160
+ */
161
+ function parseDocument(html, htmlConfig) {
162
+ const document = parse(html);
163
+ const documentTitle = getDocumentTitle(document);
164
+ sanitizeDocumentTreeForDiff(document);
165
+ ensureDocumentTitle(document, documentTitle);
166
+ return document;
167
+ }
168
+ /**
169
+ * Custom sanitization for html-diff that removes iframes in addition to standard non-semantic tags.
170
+ */
171
+ function sanitizeDocumentTreeForDiff(document) {
172
+ // Remove standard non-semantic tags
173
+ stripElementsByTag(document, new Set([...NON_SEMANTIC_TAGS, 'iframe']));
174
+ pruneDocumentHeadForDiff(document);
175
+ }
176
+ /**
177
+ * Prune non-essential elements from document head for diff purposes.
178
+ * Only keeps title element, removes everything else.
179
+ */
180
+ function pruneDocumentHeadForDiff(document) {
181
+ if (!document.childNodes)
182
+ return;
183
+ const htmlElement = document.childNodes.find((node) => 'tagName' in node && node.tagName?.toLowerCase() === 'html');
184
+ if (!htmlElement || !htmlElement.childNodes) {
185
+ return;
186
+ }
187
+ const headElement = htmlElement.childNodes.find((node) => 'tagName' in node && node.tagName?.toLowerCase() === 'head');
188
+ if (!headElement || !headElement.childNodes) {
189
+ return;
190
+ }
191
+ for (let i = headElement.childNodes.length - 1; i >= 0; i--) {
192
+ const child = headElement.childNodes[i];
193
+ if ('tagName' in child && child.tagName) {
194
+ const tagName = child.tagName.toLowerCase();
195
+ if (tagName !== 'title') {
196
+ headElement.childNodes.splice(i, 1);
197
+ }
198
+ continue;
199
+ }
200
+ // Remove non-element nodes (like text, comments)
201
+ headElement.childNodes.splice(i, 1);
202
+ }
203
+ }
204
+ // Copy NON_SEMANTIC_TAGS from html.ts to avoid circular imports
205
+ const NON_SEMANTIC_TAGS = new Set([
206
+ 'style',
207
+ 'script',
208
+ 'link',
209
+ 'meta',
210
+ 'base',
211
+ 'template',
212
+ 'slot',
213
+ 'noscript',
214
+ 'frame',
215
+ 'frameset',
216
+ 'object',
217
+ 'embed',
218
+ 'path',
219
+ 'polygon',
220
+ 'polyline',
221
+ 'circle',
222
+ 'ellipse',
223
+ 'line',
224
+ 'rect',
225
+ 'defs',
226
+ 'g',
227
+ 'symbol',
228
+ 'use',
229
+ 'mask',
230
+ 'pattern',
231
+ 'clippath',
232
+ 'animate',
233
+ 'animatetransform',
234
+ 'animatecolor',
235
+ ]);
236
+ /**
237
+ * Returns the body (preferred) or html element converted into HtmlNode for flattening.
238
+ */
239
+ function getRootNodeForFlatten(document) {
240
+ const body = findBodyElement(document);
241
+ if (body) {
242
+ return convertNode(body);
243
+ }
244
+ const htmlElement = findHtmlElement(document);
245
+ if (htmlElement) {
246
+ return convertNode(htmlElement);
247
+ }
248
+ return {
249
+ type: 'element',
250
+ tagName: 'document',
251
+ children: [],
252
+ };
253
+ }
254
+ const attributesDiffer = (current, previous) => {
255
+ const currentAttrs = current.attrs ?? [];
256
+ const previousAttrs = previous.attrs ?? [];
257
+ if (currentAttrs.length !== previousAttrs.length) {
258
+ return true;
259
+ }
260
+ const previousMap = new Map(previousAttrs.map((attr) => [attr.name, attr.value]));
261
+ for (const attr of currentAttrs) {
262
+ if (previousMap.get(attr.name) !== attr.value) {
263
+ return true;
264
+ }
265
+ }
266
+ return false;
267
+ };
268
+ const getDirectTextValues = (element) => {
269
+ if (!element.childNodes || element.childNodes.length === 0) {
270
+ return [];
271
+ }
272
+ const values = [];
273
+ for (const child of element.childNodes) {
274
+ if (child.nodeName === '#text') {
275
+ const value = child.value.trim();
276
+ if (value.length > 0) {
277
+ values.push(value);
278
+ }
279
+ }
280
+ }
281
+ return values;
282
+ };
283
+ const directTextDiffer = (current, previous) => {
284
+ const currentText = getDirectTextValues(current);
285
+ const previousText = getDirectTextValues(previous);
286
+ if (currentText.length !== previousText.length) {
287
+ return true;
288
+ }
289
+ for (let i = 0; i < currentText.length; i++) {
290
+ if (currentText[i] !== previousText[i]) {
291
+ return true;
292
+ }
293
+ }
294
+ return false;
295
+ };
296
+ const trashHtmlClasses = TRASH_HTML_CLASSES;
297
+ function filterContainerClasses(classes) {
298
+ return classes
299
+ .filter((cls) => !/\d/.test(cls))
300
+ .filter((cls) => !isGenericClass(cls))
301
+ .filter((cls) => !trashHtmlClasses.test(cls))
302
+ .filter((cls) => !/(:|__)/.test(cls))
303
+ .filter((cls) => !TAILWIND_CLASS_PATTERNS.some((pattern) => pattern.test(cls)));
304
+ }
305
+ function buildContainerSelector(element, allElements) {
306
+ const tag = element.tagName?.toLowerCase();
307
+ if (!tag)
308
+ return null;
309
+ const attrs = element.attrs ?? [];
310
+ const id = attrs.find((a) => a.name === 'id')?.value;
311
+ if (id && !isDynamicId(id))
312
+ return `#${id}`;
313
+ const classAttr = attrs.find((a) => a.name === 'class')?.value;
314
+ if (!classAttr)
315
+ return null;
316
+ const meaningful = filterContainerClasses(classAttr.split(/\s+/).filter(Boolean));
317
+ if (meaningful.length === 0)
318
+ return null;
319
+ const selector = `${tag}.${meaningful.join('.')}`;
320
+ let matchCount = 0;
321
+ for (const el of allElements.values()) {
322
+ if (el.tagName?.toLowerCase() !== tag)
323
+ continue;
324
+ const elClass = (el.attrs ?? []).find((a) => a.name === 'class')?.value || '';
325
+ const elClasses = elClass.split(/\s+/);
326
+ if (meaningful.every((cls) => elClasses.includes(cls))) {
327
+ matchCount++;
328
+ if (matchCount > 1)
329
+ return null;
330
+ }
331
+ }
332
+ return matchCount === 1 ? selector : null;
333
+ }
334
+ function pathToXPath(treePath) {
335
+ const parts = treePath.split('/');
336
+ const bodyIdx = parts.findIndex((p) => p.startsWith('body'));
337
+ if (bodyIdx === -1)
338
+ return `//${parts.join('/')}`;
339
+ return `//body/${parts.slice(bodyIdx + 1).join('/')}`;
340
+ }
341
+ function findStableContainer(topLevelPath, originalMap, modifiedMap) {
342
+ const segments = topLevelPath.split('/');
343
+ for (let i = segments.length - 2; i >= 1; i--) {
344
+ const candidatePath = segments.slice(0, i + 1).join('/');
345
+ if (IGNORED_PATHS.has(candidatePath))
346
+ continue;
347
+ if (!originalMap.has(candidatePath) || !modifiedMap.has(candidatePath))
348
+ continue;
349
+ const element = modifiedMap.get(candidatePath);
350
+ const css = buildContainerSelector(element, modifiedMap);
351
+ if (css)
352
+ return { path: candidatePath, selector: css };
353
+ return { path: candidatePath, selector: pathToXPath(candidatePath) };
354
+ }
355
+ return { path: 'html[1]/body[1]', selector: 'body' };
356
+ }
357
+ async function buildDiffParts(originalDocument, modifiedDocument) {
358
+ const originalMap = collectElementMap(originalDocument);
359
+ const modifiedMap = collectElementMap(modifiedDocument);
360
+ const addedPaths = [];
361
+ const changedPaths = [];
362
+ for (const [path, element] of modifiedMap.entries()) {
363
+ if (IGNORED_PATHS.has(path))
364
+ continue;
365
+ const originalElement = originalMap.get(path);
366
+ if (!originalElement) {
367
+ addedPaths.push(path);
368
+ continue;
369
+ }
370
+ if (attributesDiffer(element, originalElement)) {
371
+ changedPaths.push(path);
372
+ }
373
+ }
374
+ if (addedPaths.length === 0 && changedPaths.length === 0)
375
+ return [];
376
+ const addedTopLevel = filterTopLevelPaths(addedPaths);
377
+ const changedFiltered = changedPaths.filter((path) => !addedTopLevel.some((ancestor) => isSameOrAncestor(ancestor, path)));
378
+ const changedTopLevel = filterTopLevelPaths(changedFiltered);
379
+ const allTopLevel = [...addedTopLevel, ...changedTopLevel];
380
+ const grouped = new Map();
381
+ for (const path of allTopLevel) {
382
+ const { path: containerPath, selector } = findStableContainer(path, originalMap, modifiedMap);
383
+ const existing = grouped.get(containerPath);
384
+ if (existing) {
385
+ existing.paths.push(path);
386
+ }
387
+ else {
388
+ grouped.set(containerPath, { selector, paths: [path] });
389
+ }
390
+ }
391
+ const parts = [];
392
+ for (const [containerPath, { selector, paths }] of grouped) {
393
+ const containerElement = modifiedMap.get(containerPath);
394
+ if (!containerElement)
395
+ continue;
396
+ const rootClone = cloneElementShallow(containerElement);
397
+ const diffMap = new Map();
398
+ diffMap.set(containerPath, rootClone);
399
+ for (const path of paths) {
400
+ const pathSegments = path.split('/');
401
+ const containerDepth = containerPath.split('/').length;
402
+ for (let i = containerDepth + 1; i < pathSegments.length; i++) {
403
+ const prefix = pathSegments.slice(0, i).join('/');
404
+ if (diffMap.has(prefix))
405
+ continue;
406
+ const sourceNode = modifiedMap.get(prefix);
407
+ const parentPath = pathSegments.slice(0, i - 1).join('/');
408
+ const parentNode = diffMap.get(parentPath);
409
+ if (!sourceNode || !parentNode)
410
+ continue;
411
+ const clone = cloneElementShallow(sourceNode);
412
+ appendChild(parentNode, clone);
413
+ diffMap.set(prefix, clone);
414
+ }
415
+ const sourceElement = modifiedMap.get(path);
416
+ const parentPath = getParentPath(path);
417
+ const parentNode = diffMap.get(parentPath);
418
+ if (!sourceElement || !parentNode)
419
+ continue;
420
+ appendChild(parentNode, cloneElementDeep(sourceElement));
421
+ }
422
+ const serialized = serialize({ childNodes: [rootClone], nodeName: '#document-fragment' }).trim();
423
+ const subtree = serialized ? await minifyHtml(serialized) : '';
424
+ if (!subtree)
425
+ continue;
426
+ const addedLines = paths.filter((p) => addedTopLevel.includes(p)).map((p) => `ELEMENT:${p}`);
427
+ const removedLines = [];
428
+ parts.push({ container: selector, subtree, added: addedLines, removed: removedLines });
429
+ }
430
+ return parts;
431
+ }
432
+ /**
433
+ * Collect a map of element paths to nodes using nth-of-type indexing.
434
+ */
435
+ function collectElementMap(document) {
436
+ const map = new Map();
437
+ const htmlElement = findHtmlElement(document);
438
+ if (!htmlElement) {
439
+ return map;
440
+ }
441
+ traverse(htmlElement, 'html[1]');
442
+ return map;
443
+ function traverse(element, currentPath) {
444
+ map.set(currentPath, element);
445
+ if (!element.childNodes || element.childNodes.length === 0) {
446
+ return;
447
+ }
448
+ const counts = new Map();
449
+ for (const child of element.childNodes) {
450
+ if ('tagName' in child && child.tagName) {
451
+ const tagName = child.tagName.toLowerCase();
452
+ const index = (counts.get(tagName) ?? 0) + 1;
453
+ counts.set(tagName, index);
454
+ const childPath = `${currentPath}/${tagName}[${index}]`;
455
+ traverse(child, childPath);
456
+ }
457
+ }
458
+ }
459
+ }
460
+ function filterTopLevelPaths(paths) {
461
+ const uniquePaths = Array.from(new Set(paths));
462
+ uniquePaths.sort((a, b) => a.length - b.length);
463
+ const result = [];
464
+ for (const path of uniquePaths) {
465
+ if (result.some((existing) => isSameOrAncestor(existing, path))) {
466
+ continue;
467
+ }
468
+ result.push(path);
469
+ }
470
+ return result;
471
+ }
472
+ function getParentPath(path) {
473
+ const lastSeparator = path.lastIndexOf('/');
474
+ if (lastSeparator === -1) {
475
+ return '';
476
+ }
477
+ return path.slice(0, lastSeparator);
478
+ }
479
+ function isSameOrAncestor(ancestor, path) {
480
+ return ancestor === path || path.startsWith(`${ancestor}/`);
481
+ }
482
+ function appendChild(parent, child) {
483
+ if (!parent.childNodes) {
484
+ parent.childNodes = [];
485
+ }
486
+ parent.childNodes.push(child);
487
+ child.parentNode = parent;
488
+ }
489
+ function cloneElementShallow(element) {
490
+ return {
491
+ nodeName: element.nodeName,
492
+ tagName: element.tagName,
493
+ attrs: element.attrs ? element.attrs.map((attr) => ({ ...attr })) : [],
494
+ childNodes: [],
495
+ namespaceURI: element.namespaceURI,
496
+ };
497
+ }
498
+ function cloneElementDeep(element) {
499
+ const clone = cloneElementShallow(element);
500
+ if (!element.childNodes || element.childNodes.length === 0) {
501
+ return clone;
502
+ }
503
+ for (const child of element.childNodes) {
504
+ const clonedChild = cloneNodeDeep(child);
505
+ appendChild(clone, clonedChild);
506
+ }
507
+ return clone;
508
+ }
509
+ function cloneNodeDeep(node) {
510
+ if ('tagName' in node && node.tagName) {
511
+ return cloneElementDeep(node);
512
+ }
513
+ if (node.nodeName === '#text') {
514
+ const textNode = node;
515
+ return {
516
+ nodeName: '#text',
517
+ value: textNode.value,
518
+ };
519
+ }
520
+ if (node.nodeName === '#comment') {
521
+ const commentNode = node;
522
+ return {
523
+ nodeName: '#comment',
524
+ data: commentNode.data,
525
+ };
526
+ }
527
+ return { ...node };
528
+ }
529
+ function findHtmlElement(document) {
530
+ if (!document.childNodes) {
531
+ return null;
532
+ }
533
+ for (const node of document.childNodes) {
534
+ if ('tagName' in node && node.tagName && node.tagName.toLowerCase() === 'html') {
535
+ return node;
536
+ }
537
+ }
538
+ return null;
539
+ }
540
+ function findHeadElement(document) {
541
+ const html = findHtmlElement(document);
542
+ if (!html || !html.childNodes) {
543
+ return null;
544
+ }
545
+ for (const node of html.childNodes) {
546
+ if ('tagName' in node && node.tagName && node.tagName.toLowerCase() === 'head') {
547
+ return node;
548
+ }
549
+ }
550
+ return null;
551
+ }
552
+ function findBodyElement(document) {
553
+ const html = findHtmlElement(document);
554
+ if (!html || !html.childNodes) {
555
+ return null;
556
+ }
557
+ for (const node of html.childNodes) {
558
+ if ('tagName' in node && node.tagName && node.tagName.toLowerCase() === 'body') {
559
+ return node;
560
+ }
561
+ }
562
+ return null;
563
+ }
564
+ /**
565
+ * Convert parse5 node to simplified representation for textual diffing.
566
+ */
567
+ function convertNode(node) {
568
+ if (node.nodeName === '#text') {
569
+ const textNode = node;
570
+ return {
571
+ type: 'text',
572
+ content: textNode.value.trim(),
573
+ };
574
+ }
575
+ if (node.nodeName === '#comment') {
576
+ return {
577
+ type: 'comment',
578
+ content: node.data,
579
+ };
580
+ }
581
+ if ('tagName' in node && node.tagName) {
582
+ const element = node;
583
+ const htmlNode = {
584
+ type: 'element',
585
+ tagName: element.tagName.toLowerCase(),
586
+ };
587
+ if (element.attrs && element.attrs.length > 0) {
588
+ htmlNode.attrs = element.attrs.map((attr) => ({
589
+ name: attr.name,
590
+ value: attr.value,
591
+ }));
592
+ }
593
+ if (element.childNodes && element.childNodes.length > 0) {
594
+ const children = [];
595
+ for (const child of element.childNodes) {
596
+ if (child.nodeName === '#text') {
597
+ const text = child.value.trim();
598
+ if (text.length === 0) {
599
+ continue;
600
+ }
601
+ }
602
+ children.push(convertNode(child));
603
+ }
604
+ if (children.length > 0) {
605
+ htmlNode.children = children;
606
+ }
607
+ }
608
+ if (htmlNode.children && htmlNode.children.length === 1 && htmlNode.children[0].type === 'text') {
609
+ htmlNode.content = htmlNode.children[0].content;
610
+ htmlNode.children = undefined;
611
+ }
612
+ return htmlNode;
613
+ }
614
+ return {
615
+ type: 'text',
616
+ content: '',
617
+ };
618
+ }
619
+ /**
620
+ * Flatten HTML structure into lines for comparison.
621
+ */
622
+ function flattenHtml(node) {
623
+ const lines = [];
624
+ function process(n) {
625
+ if (n.type === 'text' && n.content) {
626
+ if (n.content.length >= 5) {
627
+ lines.push(`TEXT:${n.content}`);
628
+ }
629
+ return;
630
+ }
631
+ if (n.type === 'comment') {
632
+ return;
633
+ }
634
+ if (n.type === 'element' && n.tagName) {
635
+ if (isInteractiveElement(n)) {
636
+ const content = getElementContent(n);
637
+ if (content) {
638
+ lines.push(`${n.tagName.toUpperCase()}:${content}`);
639
+ }
640
+ else {
641
+ lines.push(`${n.tagName.toUpperCase()}`);
642
+ }
643
+ return;
644
+ }
645
+ if (n.content && n.content.length >= 5) {
646
+ lines.push(`TEXT:${n.content}`);
647
+ }
648
+ if (n.children) {
649
+ n.children.forEach((child) => process(child));
650
+ }
651
+ }
652
+ }
653
+ process(node);
654
+ return lines;
655
+ }
656
+ function isInteractiveElement(node) {
657
+ if (!node.tagName) {
658
+ return false;
659
+ }
660
+ const interactiveTags = new Set(['a', 'button', 'input', 'select', 'textarea', 'details', 'summary']);
661
+ if (interactiveTags.has(node.tagName)) {
662
+ return true;
663
+ }
664
+ if (node.attrs) {
665
+ const role = node.attrs.find((attr) => attr.name === 'role');
666
+ if (role && ['button', 'link', 'checkbox', 'radio', 'combobox', 'listbox', 'textbox', 'switch', 'tab'].includes(role.value)) {
667
+ return true;
668
+ }
669
+ }
670
+ return false;
671
+ }
672
+ function getElementContent(node) {
673
+ if (node.content) {
674
+ return node.content;
675
+ }
676
+ if (node.attrs) {
677
+ if (node.tagName === 'input') {
678
+ const placeholder = node.attrs.find((attr) => attr.name === 'placeholder');
679
+ if (placeholder) {
680
+ return placeholder.value;
681
+ }
682
+ const value = node.attrs.find((attr) => attr.name === 'value');
683
+ if (value) {
684
+ return value.value;
685
+ }
686
+ const name = node.attrs.find((attr) => attr.name === 'name');
687
+ if (name) {
688
+ return name.value;
689
+ }
690
+ }
691
+ if (node.tagName === 'a') {
692
+ const href = node.attrs.find((attr) => attr.name === 'href');
693
+ if (href) {
694
+ return href.value;
695
+ }
696
+ }
697
+ }
698
+ return '';
699
+ }
700
+ function calculateSimilarity(lines1, lines2) {
701
+ const set1 = new Set(lines1);
702
+ const set2 = new Set(lines2);
703
+ const intersection = new Set([...set1].filter((x) => set2.has(x)));
704
+ const union = new Set([...set1, ...set2]);
705
+ if (union.size === 0) {
706
+ return 100;
707
+ }
708
+ return Math.round((intersection.size / union.size) * 100);
709
+ }
710
+ function findDifferences(lines1, lines2) {
711
+ const set1 = new Set(lines1);
712
+ const set2 = new Set(lines2);
713
+ const added = [...set2].filter((line) => !set1.has(line));
714
+ const removed = [...set1].filter((line) => !set2.has(line));
715
+ return { added, removed };
716
+ }
717
+ function generateSummary(added, removed, similarity) {
718
+ const totalChanges = added.length + removed.length;
719
+ if (totalChanges === 0) {
720
+ return 'No changes detected';
721
+ }
722
+ const parts = [];
723
+ if (similarity < 100) {
724
+ parts.push(`${similarity}% similar`);
725
+ }
726
+ if (added.length > 0) {
727
+ parts.push(`${added.length} addition${added.length > 1 ? 's' : ''}`);
728
+ }
729
+ if (removed.length > 0) {
730
+ parts.push(`${removed.length} removal${removed.length > 1 ? 's' : ''}`);
731
+ }
732
+ return parts.join(', ');
733
+ }
734
+ //# sourceMappingURL=html-diff.js.map