explorbot 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -26
- package/bin/explorbot-cli.ts +680 -0
- package/boat/api-tester/src/ai/chief/styles.ts +15 -0
- package/boat/api-tester/src/ai/chief.ts +335 -0
- package/boat/api-tester/src/ai/curler-tools.ts +278 -0
- package/boat/api-tester/src/ai/curler.ts +306 -0
- package/boat/api-tester/src/api-client.ts +28 -0
- package/boat/api-tester/src/apibot.ts +203 -0
- package/boat/api-tester/src/cli.ts +301 -0
- package/boat/api-tester/src/config.ts +190 -0
- package/dist/bin/explorbot-cli.js +23 -101
- package/dist/boat/api-tester/bin/apibot-cli.js +0 -1
- package/dist/boat/api-tester/src/ai/chief/styles.js +0 -1
- package/dist/boat/api-tester/src/ai/chief.js +0 -1
- package/dist/boat/api-tester/src/ai/curler-tools.js +0 -1
- package/dist/boat/api-tester/src/ai/curler.js +0 -1
- package/dist/boat/api-tester/src/api-client.js +0 -1
- package/dist/boat/api-tester/src/apibot.js +0 -1
- package/dist/boat/api-tester/src/cli.js +0 -1
- package/dist/boat/api-tester/src/config.js +0 -1
- package/dist/src/action-result.js +0 -1
- package/dist/src/action.js +14 -12
- package/dist/src/activity.js +0 -1
- package/dist/src/ai/agent.js +0 -1
- package/dist/src/ai/bosun.js +0 -1
- package/dist/src/ai/captain/idle-mode.js +0 -1
- package/dist/src/ai/captain/mixin.js +0 -1
- package/dist/src/ai/captain/test-mode.js +0 -1
- package/dist/src/ai/captain/web-mode.js +0 -1
- package/dist/src/ai/captain.js +0 -1
- package/dist/src/ai/conversation.js +0 -1
- package/dist/src/ai/experience-compactor.js +0 -1
- package/dist/src/ai/fisherman-tools.js +0 -1
- package/dist/src/ai/fisherman.js +0 -1
- package/dist/src/ai/historian.js +0 -1
- package/dist/src/ai/navigator.js +0 -1
- package/dist/src/ai/pilot.js +0 -1
- package/dist/src/ai/planner/session-dedup.js +0 -1
- package/dist/src/ai/planner/styles.js +0 -1
- package/dist/src/ai/planner/subpages.js +42 -7
- package/dist/src/ai/planner.js +15 -4
- package/dist/src/ai/provider.js +0 -1
- package/dist/src/ai/quartermaster.js +0 -1
- package/dist/src/ai/researcher/cache.js +13 -9
- package/dist/src/ai/researcher/coordinates.js +4 -3
- package/dist/src/ai/researcher/deep-analysis.js +16 -20
- package/dist/src/ai/researcher/fingerprint-worker.js +0 -1
- package/dist/src/ai/researcher/focus.js +0 -1
- package/dist/src/ai/researcher/locators.js +1 -2
- package/dist/src/ai/researcher/mixin.js +0 -1
- package/dist/src/ai/researcher/parser.js +4 -4
- package/dist/src/ai/researcher/research-result.js +2 -1
- package/dist/src/ai/researcher.js +6 -6
- package/dist/src/ai/rules.js +0 -1
- package/dist/src/ai/task-agent.js +0 -1
- package/dist/src/ai/tester.js +0 -1
- package/dist/src/ai/tools.js +4 -1
- package/dist/src/api/api-client.js +0 -1
- package/dist/src/api/request-result.js +0 -1
- package/dist/src/api/request-store.js +0 -1
- package/dist/src/api/spec-reader.js +0 -1
- package/dist/src/api/xhr-capture.js +0 -1
- package/dist/src/browser-server.js +0 -1
- package/dist/src/command-handler.js +0 -1
- package/dist/src/commands/add-rule-command.js +0 -1
- package/dist/src/commands/base-command.js +0 -1
- package/dist/src/commands/clean-command.js +0 -1
- package/dist/src/commands/context-aria-command.js +0 -1
- package/dist/src/commands/context-command.js +2 -3
- package/dist/src/commands/context-data-command.js +0 -1
- package/dist/src/commands/context-experience-command.js +0 -1
- package/dist/src/commands/context-html-command.js +0 -1
- package/dist/src/commands/context-knowledge-command.js +0 -1
- package/dist/src/commands/debug-command.js +0 -1
- package/dist/src/commands/drill-command.js +0 -1
- package/dist/src/commands/exit-command.js +0 -1
- package/dist/src/commands/explore-command.js +3 -3
- package/dist/src/commands/freesail-command.js +0 -1
- package/dist/src/commands/help-command.js +0 -1
- package/dist/src/commands/index.js +0 -1
- package/dist/src/commands/init-command.js +117 -0
- package/dist/src/commands/knows-command.js +0 -1
- package/dist/src/commands/learn-command.js +0 -1
- package/dist/src/commands/navigate-command.js +0 -1
- package/dist/src/commands/path-command.js +0 -1
- package/dist/src/commands/plan-clear-command.js +0 -1
- package/dist/src/commands/plan-command.js +6 -2
- package/dist/src/commands/plan-edit-command.js +0 -1
- package/dist/src/commands/plan-load-command.js +0 -1
- package/dist/src/commands/plan-reload-command.js +0 -1
- package/dist/src/commands/plan-save-command.js +0 -1
- package/dist/src/commands/research-command.js +0 -1
- package/dist/src/commands/start-command.js +0 -1
- package/dist/src/commands/status-command.js +0 -1
- package/dist/src/commands/test-command.js +0 -1
- package/dist/src/components/ActivityPane.js +0 -1
- package/dist/src/components/AddKnowledge.js +0 -1
- package/dist/src/components/AddRule.js +0 -1
- package/dist/src/components/App.js +0 -1
- package/dist/src/components/Autocomplete.js +0 -1
- package/dist/src/components/InputPane.js +0 -1
- package/dist/src/components/InputReadline.js +0 -1
- package/dist/src/components/LogPane.js +0 -1
- package/dist/src/components/PlanEditor.js +0 -1
- package/dist/src/components/PlanPane.js +0 -1
- package/dist/src/components/SessionTimer.js +0 -1
- package/dist/src/components/StateTransitionPane.js +0 -1
- package/dist/src/components/StatusPane.js +0 -1
- package/dist/src/components/TaskPane.js +0 -1
- package/dist/src/components/Welcome.js +0 -1
- package/dist/src/components/WelcomeChecklist.js +0 -1
- package/dist/src/components/WelcomeCommands.js +0 -1
- package/dist/src/components/autocomplete-store.js +0 -1
- package/dist/src/components/parse-keypress.js +0 -1
- package/dist/src/config.js +0 -1
- package/dist/src/execution-controller.js +0 -1
- package/dist/src/experience-tracker.js +0 -1
- package/dist/src/explorbot.js +1 -2
- package/dist/src/explorer.js +58 -17
- package/dist/src/index.js +0 -1
- package/dist/src/knowledge-tracker.js +2 -2
- package/dist/src/observability.js +0 -1
- package/dist/src/reporter.js +0 -1
- package/dist/src/state-manager.js +0 -1
- package/dist/src/stats.js +0 -1
- package/dist/src/test-plan.js +0 -1
- package/dist/src/utils/aria.js +0 -1
- package/dist/src/utils/cli-name.js +16 -0
- package/dist/src/utils/code-extractor.js +0 -1
- package/dist/src/utils/context-formatter.js +0 -1
- package/dist/src/utils/error-page.js +0 -1
- package/dist/src/utils/expandable.js +0 -1
- package/dist/src/utils/hooks-runner.js +0 -1
- package/dist/src/utils/html-diff.js +0 -1
- package/dist/src/utils/html.js +0 -1
- package/dist/src/utils/logger.js +0 -1
- package/dist/src/utils/loop.js +0 -1
- package/dist/src/utils/markdown-parser.js +0 -1
- package/dist/src/utils/markdown-query.js +0 -1
- package/dist/src/utils/markdown-terminal.js +0 -1
- package/dist/src/utils/research-parser.js +0 -1
- package/dist/src/utils/retry.js +0 -1
- package/dist/src/utils/rules-loader.js +0 -1
- package/dist/src/utils/strings.js +0 -1
- package/dist/src/utils/test-plan-markdown.js +0 -1
- package/dist/src/utils/throttle.js +0 -1
- package/dist/src/utils/unique-names.js +0 -1
- package/dist/src/utils/url-matcher.js +0 -1
- package/dist/src/utils/web-element.js +6 -5
- package/dist/src/utils/xpath.js +0 -1
- package/package.json +28 -4
- package/src/action-result.ts +694 -0
- package/src/action.ts +449 -0
- package/src/activity.ts +111 -0
- package/src/ai/agent.ts +3 -0
- package/src/ai/bosun.ts +557 -0
- package/src/ai/captain/idle-mode.ts +116 -0
- package/src/ai/captain/mixin.ts +22 -0
- package/src/ai/captain/test-mode.ts +262 -0
- package/src/ai/captain/web-mode.ts +136 -0
- package/src/ai/captain.ts +504 -0
- package/src/ai/conversation.ts +205 -0
- package/src/ai/experience-compactor.ts +284 -0
- package/src/ai/fisherman-tools.ts +181 -0
- package/src/ai/fisherman.ts +223 -0
- package/src/ai/historian.ts +457 -0
- package/src/ai/navigator.ts +572 -0
- package/src/ai/pilot.ts +776 -0
- package/src/ai/planner/session-dedup.ts +35 -0
- package/src/ai/planner/styles.ts +17 -0
- package/src/ai/planner/subpages.ts +171 -0
- package/src/ai/planner.ts +549 -0
- package/src/ai/provider.ts +613 -0
- package/src/ai/quartermaster.ts +286 -0
- package/src/ai/researcher/cache.ts +109 -0
- package/src/ai/researcher/coordinates.ts +239 -0
- package/src/ai/researcher/deep-analysis.ts +412 -0
- package/src/ai/researcher/fingerprint-worker.ts +59 -0
- package/src/ai/researcher/focus.ts +42 -0
- package/src/ai/researcher/locators.ts +282 -0
- package/src/ai/researcher/mixin.ts +4 -0
- package/src/ai/researcher/parser.ts +186 -0
- package/src/ai/researcher/research-result.ts +116 -0
- package/src/ai/researcher.ts +858 -0
- package/src/ai/rules.ts +376 -0
- package/src/ai/task-agent.ts +141 -0
- package/src/ai/tester.ts +939 -0
- package/src/ai/tools.ts +1122 -0
- package/src/api/api-client.ts +109 -0
- package/src/api/request-result.ts +212 -0
- package/src/api/request-store.ts +130 -0
- package/src/api/spec-reader.ts +174 -0
- package/src/api/xhr-capture.ts +100 -0
- package/src/browser-server.ts +74 -0
- package/src/command-handler.ts +454 -0
- package/src/commands/add-rule-command.ts +63 -0
- package/src/commands/base-command.ts +27 -0
- package/src/commands/clean-command.ts +73 -0
- package/src/commands/context-aria-command.ts +22 -0
- package/src/commands/context-command.ts +67 -0
- package/src/commands/context-data-command.ts +30 -0
- package/src/commands/context-experience-command.ts +48 -0
- package/src/commands/context-html-command.ts +33 -0
- package/src/commands/context-knowledge-command.ts +43 -0
- package/src/commands/debug-command.ts +13 -0
- package/src/commands/drill-command.ts +34 -0
- package/src/commands/exit-command.ts +32 -0
- package/src/commands/explore-command.ts +129 -0
- package/src/commands/freesail-command.ts +95 -0
- package/src/commands/help-command.ts +8 -0
- package/src/commands/index.ts +69 -0
- package/src/commands/init-command.ts +131 -0
- package/src/commands/knows-command.ts +68 -0
- package/src/commands/learn-command.ts +44 -0
- package/src/commands/navigate-command.ts +18 -0
- package/src/commands/path-command.ts +83 -0
- package/src/commands/plan-clear-command.ts +14 -0
- package/src/commands/plan-command.ts +46 -0
- package/src/commands/plan-edit-command.ts +9 -0
- package/src/commands/plan-load-command.ts +18 -0
- package/src/commands/plan-reload-command.ts +28 -0
- package/src/commands/plan-save-command.ts +25 -0
- package/src/commands/research-command.ts +45 -0
- package/src/commands/start-command.ts +13 -0
- package/src/commands/status-command.tsx +23 -0
- package/src/commands/test-command.ts +84 -0
- package/src/components/ActivityPane.tsx +80 -0
- package/src/components/AddKnowledge.tsx +169 -0
- package/src/components/AddRule.tsx +174 -0
- package/src/components/App.tsx +377 -0
- package/src/components/Autocomplete.tsx +63 -0
- package/src/components/InputPane.tsx +259 -0
- package/src/components/InputReadline.tsx +704 -0
- package/src/components/LogPane.tsx +187 -0
- package/src/components/PlanEditor.tsx +150 -0
- package/src/components/PlanPane.tsx +71 -0
- package/src/components/SessionTimer.tsx +35 -0
- package/src/components/StateTransitionPane.tsx +149 -0
- package/src/components/StatusPane.tsx +62 -0
- package/src/components/TaskPane.tsx +119 -0
- package/src/components/Welcome.tsx +83 -0
- package/src/components/WelcomeChecklist.tsx +118 -0
- package/src/components/WelcomeCommands.tsx +102 -0
- package/src/components/autocomplete-store.ts +35 -0
- package/src/components/parse-keypress.ts +170 -0
- package/src/config.ts +491 -0
- package/src/execution-controller.ts +109 -0
- package/src/experience-tracker.ts +350 -0
- package/src/explorbot.ts +405 -0
- package/src/explorer.ts +760 -0
- package/src/index.tsx +62 -0
- package/src/knowledge-tracker.ts +230 -0
- package/src/observability.ts +150 -0
- package/src/reporter.ts +224 -0
- package/src/state-manager.ts +556 -0
- package/src/stats.ts +53 -0
- package/src/test-plan.ts +432 -0
- package/src/utils/aria.ts +629 -0
- package/src/utils/cli-name.ts +13 -0
- package/src/utils/code-extractor.ts +22 -0
- package/src/utils/context-formatter.ts +239 -0
- package/src/utils/error-page.ts +23 -0
- package/src/utils/expandable.ts +38 -0
- package/src/utils/hooks-runner.ts +79 -0
- package/src/utils/html-diff.ts +918 -0
- package/src/utils/html.ts +1316 -0
- package/src/utils/logger.ts +534 -0
- package/src/utils/loop.ts +176 -0
- package/src/utils/markdown-parser.ts +127 -0
- package/src/utils/markdown-query.ts +466 -0
- package/src/utils/markdown-terminal.ts +43 -0
- package/src/utils/research-parser.ts +11 -0
- package/src/utils/retry.ts +73 -0
- package/src/utils/rules-loader.ts +118 -0
- package/src/utils/strings.ts +13 -0
- package/src/utils/test-plan-markdown.ts +332 -0
- package/src/utils/throttle.ts +18 -0
- package/src/utils/unique-names.ts +14 -0
- package/src/utils/url-matcher.ts +45 -0
- package/src/utils/web-element.ts +147 -0
- package/src/utils/xpath.ts +129 -0
- package/dist/bin/explorbot-cli.js.map +0 -1
- package/dist/boat/api-tester/bin/apibot-cli.js.map +0 -1
- package/dist/boat/api-tester/example/apibot.config.js +0 -31
- package/dist/boat/api-tester/example/apibot.config.js.map +0 -1
- package/dist/boat/api-tester/src/ai/chief/styles.js.map +0 -1
- package/dist/boat/api-tester/src/ai/chief.js.map +0 -1
- package/dist/boat/api-tester/src/ai/curler-tools.js.map +0 -1
- package/dist/boat/api-tester/src/ai/curler.js.map +0 -1
- package/dist/boat/api-tester/src/api-client.js.map +0 -1
- package/dist/boat/api-tester/src/apibot.js.map +0 -1
- package/dist/boat/api-tester/src/cli.js.map +0 -1
- package/dist/boat/api-tester/src/config.js.map +0 -1
- package/dist/prompts/audit-rules.md +0 -124
- package/dist/src/action-result.js.map +0 -1
- package/dist/src/action.js.map +0 -1
- package/dist/src/activity.js.map +0 -1
- package/dist/src/ai/agent.js.map +0 -1
- package/dist/src/ai/bosun.js.map +0 -1
- package/dist/src/ai/captain/idle-mode.js.map +0 -1
- package/dist/src/ai/captain/mixin.js.map +0 -1
- package/dist/src/ai/captain/test-mode.js.map +0 -1
- package/dist/src/ai/captain/web-mode.js.map +0 -1
- package/dist/src/ai/captain.js.map +0 -1
- package/dist/src/ai/conversation.js.map +0 -1
- package/dist/src/ai/experience-compactor.js.map +0 -1
- package/dist/src/ai/fisherman-tools.js.map +0 -1
- package/dist/src/ai/fisherman.js.map +0 -1
- package/dist/src/ai/historian.js.map +0 -1
- package/dist/src/ai/navigator.js.map +0 -1
- package/dist/src/ai/pilot.js.map +0 -1
- package/dist/src/ai/planner/session-dedup.js.map +0 -1
- package/dist/src/ai/planner/styles.js.map +0 -1
- package/dist/src/ai/planner/subpages.js.map +0 -1
- package/dist/src/ai/planner.js.map +0 -1
- package/dist/src/ai/provider.js.map +0 -1
- package/dist/src/ai/quartermaster.js.map +0 -1
- package/dist/src/ai/researcher/cache.js.map +0 -1
- package/dist/src/ai/researcher/coordinates.js.map +0 -1
- package/dist/src/ai/researcher/deep-analysis.js.map +0 -1
- package/dist/src/ai/researcher/fingerprint-worker.js.map +0 -1
- package/dist/src/ai/researcher/focus.js.map +0 -1
- package/dist/src/ai/researcher/locators.js.map +0 -1
- package/dist/src/ai/researcher/mixin.js.map +0 -1
- package/dist/src/ai/researcher/parser.js.map +0 -1
- package/dist/src/ai/researcher/research-result.js.map +0 -1
- package/dist/src/ai/researcher.js.map +0 -1
- package/dist/src/ai/rules.js.map +0 -1
- package/dist/src/ai/task-agent.js.map +0 -1
- package/dist/src/ai/tester.js.map +0 -1
- package/dist/src/ai/tools.js.map +0 -1
- package/dist/src/api/api-client.js.map +0 -1
- package/dist/src/api/request-result.js.map +0 -1
- package/dist/src/api/request-store.js.map +0 -1
- package/dist/src/api/spec-reader.js.map +0 -1
- package/dist/src/api/xhr-capture.js.map +0 -1
- package/dist/src/browser-server.js.map +0 -1
- package/dist/src/command-handler.js.map +0 -1
- package/dist/src/commands/add-rule-command.js.map +0 -1
- package/dist/src/commands/base-command.js.map +0 -1
- package/dist/src/commands/clean-command.js.map +0 -1
- package/dist/src/commands/context-aria-command.js.map +0 -1
- package/dist/src/commands/context-command.js.map +0 -1
- package/dist/src/commands/context-data-command.js.map +0 -1
- package/dist/src/commands/context-experience-command.js.map +0 -1
- package/dist/src/commands/context-html-command.js.map +0 -1
- package/dist/src/commands/context-knowledge-command.js.map +0 -1
- package/dist/src/commands/debug-command.js.map +0 -1
- package/dist/src/commands/drill-command.js.map +0 -1
- package/dist/src/commands/exit-command.js.map +0 -1
- package/dist/src/commands/explore-command.js.map +0 -1
- package/dist/src/commands/freesail-command.js.map +0 -1
- package/dist/src/commands/help-command.js.map +0 -1
- package/dist/src/commands/index.js.map +0 -1
- package/dist/src/commands/knows-command.js.map +0 -1
- package/dist/src/commands/learn-command.js.map +0 -1
- package/dist/src/commands/navigate-command.js.map +0 -1
- package/dist/src/commands/path-command.js.map +0 -1
- package/dist/src/commands/plan-clear-command.js.map +0 -1
- package/dist/src/commands/plan-command.js.map +0 -1
- package/dist/src/commands/plan-edit-command.js.map +0 -1
- package/dist/src/commands/plan-load-command.js.map +0 -1
- package/dist/src/commands/plan-reload-command.js.map +0 -1
- package/dist/src/commands/plan-save-command.js.map +0 -1
- package/dist/src/commands/research-command.js.map +0 -1
- package/dist/src/commands/start-command.js.map +0 -1
- package/dist/src/commands/status-command.js.map +0 -1
- package/dist/src/commands/test-command.js.map +0 -1
- package/dist/src/components/ActivityPane.js.map +0 -1
- package/dist/src/components/AddKnowledge.js.map +0 -1
- package/dist/src/components/AddRule.js.map +0 -1
- package/dist/src/components/App.js.map +0 -1
- package/dist/src/components/Autocomplete.js.map +0 -1
- package/dist/src/components/InputPane.js.map +0 -1
- package/dist/src/components/InputReadline.js.map +0 -1
- package/dist/src/components/LogPane.js.map +0 -1
- package/dist/src/components/PlanEditor.js.map +0 -1
- package/dist/src/components/PlanPane.js.map +0 -1
- package/dist/src/components/SessionTimer.js.map +0 -1
- package/dist/src/components/StateTransitionPane.js.map +0 -1
- package/dist/src/components/StatusPane.js.map +0 -1
- package/dist/src/components/TaskPane.js.map +0 -1
- package/dist/src/components/Welcome.js.map +0 -1
- package/dist/src/components/WelcomeChecklist.js.map +0 -1
- package/dist/src/components/WelcomeCommands.js.map +0 -1
- package/dist/src/components/autocomplete-store.js.map +0 -1
- package/dist/src/components/parse-keypress.js.map +0 -1
- package/dist/src/config.js.map +0 -1
- package/dist/src/execution-controller.js.map +0 -1
- package/dist/src/experience-tracker.js.map +0 -1
- package/dist/src/explorbot.js.map +0 -1
- package/dist/src/explorer.js.map +0 -1
- package/dist/src/index.js.map +0 -1
- package/dist/src/knowledge-tracker.js.map +0 -1
- package/dist/src/observability.js.map +0 -1
- package/dist/src/reporter.js.map +0 -1
- package/dist/src/state-manager.js.map +0 -1
- package/dist/src/stats.js.map +0 -1
- package/dist/src/test-plan.js.map +0 -1
- package/dist/src/utils/aria.js.map +0 -1
- package/dist/src/utils/code-extractor.js.map +0 -1
- package/dist/src/utils/context-formatter.js.map +0 -1
- package/dist/src/utils/error-page.js.map +0 -1
- package/dist/src/utils/expandable.js.map +0 -1
- package/dist/src/utils/hooks-runner.js.map +0 -1
- package/dist/src/utils/html-diff.js.map +0 -1
- package/dist/src/utils/html.js.map +0 -1
- package/dist/src/utils/logger.js.map +0 -1
- package/dist/src/utils/loop.js.map +0 -1
- package/dist/src/utils/markdown-parser.js.map +0 -1
- package/dist/src/utils/markdown-query.js.map +0 -1
- package/dist/src/utils/markdown-terminal.js.map +0 -1
- package/dist/src/utils/research-parser.js.map +0 -1
- package/dist/src/utils/retry.js.map +0 -1
- package/dist/src/utils/rules-loader.js.map +0 -1
- package/dist/src/utils/strings.js.map +0 -1
- package/dist/src/utils/test-plan-markdown.js.map +0 -1
- package/dist/src/utils/throttle.js.map +0 -1
- package/dist/src/utils/unique-names.js.map +0 -1
- package/dist/src/utils/url-matcher.js.map +0 -1
- package/dist/src/utils/web-element.js.map +0 -1
- package/dist/src/utils/xpath.js.map +0 -1
- package/prompts/audit-rules.md +0 -124
|
@@ -0,0 +1,918 @@
|
|
|
1
|
+
import { parse, serialize } from 'parse5';
|
|
2
|
+
import type * as parse5TreeAdapter from 'parse5/lib/tree-adapters/default';
|
|
3
|
+
import type { HtmlConfig } from '../config.ts';
|
|
4
|
+
import { TAILWIND_CLASS_PATTERNS, TRASH_HTML_CLASSES, minifyHtml } from './html.ts';
|
|
5
|
+
import { isDynamicId, isGenericClass } from './xpath.ts';
|
|
6
|
+
|
|
7
|
+
export interface HtmlDiffPart {
|
|
8
|
+
container: string;
|
|
9
|
+
subtree: string;
|
|
10
|
+
added: string[];
|
|
11
|
+
removed: string[];
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface HtmlDiffResult {
|
|
15
|
+
parts: HtmlDiffPart[];
|
|
16
|
+
added: string[];
|
|
17
|
+
removed: string[];
|
|
18
|
+
similarity: number;
|
|
19
|
+
summary: string;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
interface HtmlNode {
|
|
23
|
+
type: 'element' | 'text' | 'comment';
|
|
24
|
+
tagName?: string;
|
|
25
|
+
attrs?: Array<{ name: string; value: string }>;
|
|
26
|
+
content?: string;
|
|
27
|
+
children?: HtmlNode[];
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const IGNORED_PATHS = new Set(['html[1]', 'html[1]/head[1]', 'html[1]/body[1]']);
|
|
31
|
+
|
|
32
|
+
type DocumentNode = parse5TreeAdapter.Document;
|
|
33
|
+
type ElementNode = parse5TreeAdapter.Element;
|
|
34
|
+
type ParentNode = parse5TreeAdapter.Document | parse5TreeAdapter.Element;
|
|
35
|
+
|
|
36
|
+
type NodeMap = Map<string, ElementNode>;
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Get text content from an element node.
|
|
40
|
+
*/
|
|
41
|
+
function getTextContent(element: parse5TreeAdapter.Element): string {
|
|
42
|
+
let text = '';
|
|
43
|
+
function processNode(node: parse5TreeAdapter.Node) {
|
|
44
|
+
if (node.nodeName === '#text') {
|
|
45
|
+
text += (node as parse5TreeAdapter.TextNode).value;
|
|
46
|
+
} else if ('childNodes' in node) {
|
|
47
|
+
node.childNodes.forEach(processNode);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
processNode(element);
|
|
51
|
+
return text;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Extract document title from a parsed HTML document.
|
|
56
|
+
*/
|
|
57
|
+
function getDocumentTitle(document: parse5TreeAdapter.Document): string | null {
|
|
58
|
+
if (!document.childNodes) return null;
|
|
59
|
+
|
|
60
|
+
const htmlElement = document.childNodes.find((node): node is parse5TreeAdapter.Element => 'tagName' in node && node.tagName?.toLowerCase() === 'html');
|
|
61
|
+
|
|
62
|
+
if (!htmlElement || !htmlElement.childNodes) {
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const headElement = htmlElement.childNodes.find((node): node is parse5TreeAdapter.Element => 'tagName' in node && node.tagName?.toLowerCase() === 'head');
|
|
67
|
+
|
|
68
|
+
if (!headElement || !headElement.childNodes) {
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const titleElement = headElement.childNodes.find((node): node is parse5TreeAdapter.Element => 'tagName' in node && node.tagName?.toLowerCase() === 'title');
|
|
73
|
+
|
|
74
|
+
if (!titleElement) {
|
|
75
|
+
return null;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const text = getTextContent(titleElement).trim();
|
|
79
|
+
return text.length > 0 ? text : null;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Ensure document has a title element in head section.
|
|
84
|
+
*/
|
|
85
|
+
function ensureDocumentTitle(document: parse5TreeAdapter.Document, titleText: string | null): void {
|
|
86
|
+
if (!titleText || !document.childNodes) {
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const htmlElement = document.childNodes.find((node): node is parse5TreeAdapter.Element => 'tagName' in node && node.tagName?.toLowerCase() === 'html');
|
|
91
|
+
|
|
92
|
+
if (!htmlElement) {
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const namespace = htmlElement.namespaceURI || 'http://www.w3.org/1999/xhtml';
|
|
97
|
+
|
|
98
|
+
let headElement = htmlElement.childNodes.find((node): node is parse5TreeAdapter.Element => 'tagName' in node && node.tagName?.toLowerCase() === 'head');
|
|
99
|
+
|
|
100
|
+
if (!headElement) {
|
|
101
|
+
headElement = {
|
|
102
|
+
nodeName: 'head',
|
|
103
|
+
tagName: 'head',
|
|
104
|
+
attrs: [],
|
|
105
|
+
namespaceURI: namespace,
|
|
106
|
+
childNodes: [],
|
|
107
|
+
parentNode: htmlElement,
|
|
108
|
+
} as parse5TreeAdapter.Element;
|
|
109
|
+
|
|
110
|
+
// Insert head before body if possible, otherwise prepend
|
|
111
|
+
const bodyIndex = htmlElement.childNodes.findIndex((node) => 'tagName' in node && node.tagName?.toLowerCase() === 'body');
|
|
112
|
+
|
|
113
|
+
if (bodyIndex === -1) {
|
|
114
|
+
htmlElement.childNodes.push(headElement);
|
|
115
|
+
} else {
|
|
116
|
+
htmlElement.childNodes.splice(bodyIndex, 0, headElement);
|
|
117
|
+
}
|
|
118
|
+
} else {
|
|
119
|
+
headElement.childNodes = [];
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const titleElement: parse5TreeAdapter.Element = {
|
|
123
|
+
nodeName: 'title',
|
|
124
|
+
tagName: 'title',
|
|
125
|
+
attrs: [],
|
|
126
|
+
namespaceURI: namespace,
|
|
127
|
+
childNodes: [],
|
|
128
|
+
parentNode: headElement,
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
const textNode: parse5TreeAdapter.TextNode = {
|
|
132
|
+
nodeName: '#text',
|
|
133
|
+
value: titleText,
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
(textNode as any).parentNode = titleElement;
|
|
137
|
+
titleElement.childNodes.push(textNode);
|
|
138
|
+
headElement.childNodes.push(titleElement);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Type representing nodes that can have child nodes.
|
|
143
|
+
*/
|
|
144
|
+
type ParentNodeLike = parse5TreeAdapter.Document | parse5TreeAdapter.DocumentFragment | parse5TreeAdapter.Element;
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Check if a node has child nodes.
|
|
148
|
+
*/
|
|
149
|
+
function hasChildNodes(node: unknown): node is ParentNodeLike {
|
|
150
|
+
return typeof node === 'object' && node !== null && 'childNodes' in node && Array.isArray((node as any).childNodes);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Remove elements with specified tags from a node tree.
|
|
155
|
+
*/
|
|
156
|
+
function stripElementsByTag(node: ParentNodeLike, tagsToRemove: Set<string>): void {
|
|
157
|
+
if (!node.childNodes) return;
|
|
158
|
+
|
|
159
|
+
for (let i = node.childNodes.length - 1; i >= 0; i--) {
|
|
160
|
+
const child = node.childNodes[i];
|
|
161
|
+
|
|
162
|
+
// Remove comments
|
|
163
|
+
if (child.nodeName === '#comment') {
|
|
164
|
+
node.childNodes.splice(i, 1);
|
|
165
|
+
continue;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Remove elements with matching tags
|
|
169
|
+
if ('tagName' in child && child.tagName) {
|
|
170
|
+
const tagName = child.tagName.toLowerCase();
|
|
171
|
+
if (tagsToRemove.has(tagName)) {
|
|
172
|
+
node.childNodes.splice(i, 1);
|
|
173
|
+
continue;
|
|
174
|
+
}
|
|
175
|
+
// Recursively process child elements
|
|
176
|
+
stripElementsByTag(child as ParentNodeLike, tagsToRemove);
|
|
177
|
+
} else if (hasChildNodes(child)) {
|
|
178
|
+
// Recursively process non-element nodes that have children
|
|
179
|
+
stripElementsByTag(child as ParentNodeLike, tagsToRemove);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
export function computeHtmlFingerprint(html: string): string[] {
|
|
185
|
+
const document = parseDocument(html);
|
|
186
|
+
const root = getRootNodeForFlatten(document);
|
|
187
|
+
return flattenHtml(root);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Compares two HTML documents and returns differences along with a diff subtree.
|
|
192
|
+
*/
|
|
193
|
+
export async function htmlDiff(originalHtml: string, modifiedHtml: string, htmlConfig?: HtmlConfig): Promise<HtmlDiffResult> {
|
|
194
|
+
const originalDocument = parseDocument(originalHtml, htmlConfig);
|
|
195
|
+
const modifiedDocument = parseDocument(modifiedHtml, htmlConfig);
|
|
196
|
+
|
|
197
|
+
const originalRoot = getRootNodeForFlatten(originalDocument);
|
|
198
|
+
const modifiedRoot = getRootNodeForFlatten(modifiedDocument);
|
|
199
|
+
|
|
200
|
+
const originalLines = flattenHtml(originalRoot);
|
|
201
|
+
const modifiedLines = flattenHtml(modifiedRoot);
|
|
202
|
+
|
|
203
|
+
const similarity = calculateSimilarity(originalLines, modifiedLines);
|
|
204
|
+
const { added, removed } = findDifferences(originalLines, modifiedLines);
|
|
205
|
+
|
|
206
|
+
const parts = await buildDiffParts(originalDocument, modifiedDocument);
|
|
207
|
+
|
|
208
|
+
const structuralAdditions = parts.flatMap((p) => p.added.filter((a) => a.startsWith('ELEMENT:')));
|
|
209
|
+
const allAdded = [...added, ...structuralAdditions];
|
|
210
|
+
const totalChanges = allAdded.length + removed.length;
|
|
211
|
+
const summary = totalChanges === 0 && parts.length > 0 ? 'Structural additions detected' : generateSummary(allAdded, removed, similarity);
|
|
212
|
+
|
|
213
|
+
return {
|
|
214
|
+
parts,
|
|
215
|
+
added: allAdded,
|
|
216
|
+
removed,
|
|
217
|
+
similarity,
|
|
218
|
+
summary,
|
|
219
|
+
};
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* Parse HTML into a document, wrapping fragments with html/body for consistency.
|
|
224
|
+
* Uses custom sanitization that removes iframes for diff purposes.
|
|
225
|
+
*/
|
|
226
|
+
function parseDocument(html: string, htmlConfig?: HtmlConfig): DocumentNode {
|
|
227
|
+
const document = parse(html);
|
|
228
|
+
const documentTitle = getDocumentTitle(document);
|
|
229
|
+
sanitizeDocumentTreeForDiff(document);
|
|
230
|
+
ensureDocumentTitle(document, documentTitle);
|
|
231
|
+
return document;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Custom sanitization for html-diff that removes iframes in addition to standard non-semantic tags.
|
|
236
|
+
*/
|
|
237
|
+
function sanitizeDocumentTreeForDiff(document: parse5TreeAdapter.Document): void {
|
|
238
|
+
// Remove standard non-semantic tags
|
|
239
|
+
stripElementsByTag(document, new Set([...NON_SEMANTIC_TAGS, 'iframe']));
|
|
240
|
+
pruneDocumentHeadForDiff(document);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Prune non-essential elements from document head for diff purposes.
|
|
245
|
+
* Only keeps title element, removes everything else.
|
|
246
|
+
*/
|
|
247
|
+
function pruneDocumentHeadForDiff(document: parse5TreeAdapter.Document): void {
|
|
248
|
+
if (!document.childNodes) return;
|
|
249
|
+
|
|
250
|
+
const htmlElement = document.childNodes.find((node): node is parse5TreeAdapter.Element => 'tagName' in node && node.tagName?.toLowerCase() === 'html');
|
|
251
|
+
|
|
252
|
+
if (!htmlElement || !htmlElement.childNodes) {
|
|
253
|
+
return;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
const headElement = htmlElement.childNodes.find((node): node is parse5TreeAdapter.Element => 'tagName' in node && node.tagName?.toLowerCase() === 'head');
|
|
257
|
+
|
|
258
|
+
if (!headElement || !headElement.childNodes) {
|
|
259
|
+
return;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
for (let i = headElement.childNodes.length - 1; i >= 0; i--) {
|
|
263
|
+
const child = headElement.childNodes[i];
|
|
264
|
+
if ('tagName' in child && child.tagName) {
|
|
265
|
+
const tagName = child.tagName.toLowerCase();
|
|
266
|
+
if (tagName !== 'title') {
|
|
267
|
+
headElement.childNodes.splice(i, 1);
|
|
268
|
+
}
|
|
269
|
+
continue;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// Remove non-element nodes (like text, comments)
|
|
273
|
+
headElement.childNodes.splice(i, 1);
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// Copy NON_SEMANTIC_TAGS from html.ts to avoid circular imports
|
|
278
|
+
const NON_SEMANTIC_TAGS = new Set([
|
|
279
|
+
'style',
|
|
280
|
+
'script',
|
|
281
|
+
'link',
|
|
282
|
+
'meta',
|
|
283
|
+
'base',
|
|
284
|
+
'template',
|
|
285
|
+
'slot',
|
|
286
|
+
'noscript',
|
|
287
|
+
'frame',
|
|
288
|
+
'frameset',
|
|
289
|
+
'object',
|
|
290
|
+
'embed',
|
|
291
|
+
'path',
|
|
292
|
+
'polygon',
|
|
293
|
+
'polyline',
|
|
294
|
+
'circle',
|
|
295
|
+
'ellipse',
|
|
296
|
+
'line',
|
|
297
|
+
'rect',
|
|
298
|
+
'defs',
|
|
299
|
+
'g',
|
|
300
|
+
'symbol',
|
|
301
|
+
'use',
|
|
302
|
+
'mask',
|
|
303
|
+
'pattern',
|
|
304
|
+
'clippath',
|
|
305
|
+
'animate',
|
|
306
|
+
'animatetransform',
|
|
307
|
+
'animatecolor',
|
|
308
|
+
]);
|
|
309
|
+
|
|
310
|
+
/**
|
|
311
|
+
* Returns the body (preferred) or html element converted into HtmlNode for flattening.
|
|
312
|
+
*/
|
|
313
|
+
function getRootNodeForFlatten(document: DocumentNode): HtmlNode {
|
|
314
|
+
const body = findBodyElement(document);
|
|
315
|
+
if (body) {
|
|
316
|
+
return convertNode(body);
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
const htmlElement = findHtmlElement(document);
|
|
320
|
+
if (htmlElement) {
|
|
321
|
+
return convertNode(htmlElement);
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
return {
|
|
325
|
+
type: 'element',
|
|
326
|
+
tagName: 'document',
|
|
327
|
+
children: [],
|
|
328
|
+
};
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
const attributesDiffer = (current: ElementNode, previous: ElementNode): boolean => {
|
|
332
|
+
const currentAttrs = current.attrs ?? [];
|
|
333
|
+
const previousAttrs = previous.attrs ?? [];
|
|
334
|
+
|
|
335
|
+
if (currentAttrs.length !== previousAttrs.length) {
|
|
336
|
+
return true;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
const previousMap = new Map(previousAttrs.map((attr) => [attr.name, attr.value]));
|
|
340
|
+
|
|
341
|
+
for (const attr of currentAttrs) {
|
|
342
|
+
if (previousMap.get(attr.name) !== attr.value) {
|
|
343
|
+
return true;
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
return false;
|
|
348
|
+
};
|
|
349
|
+
|
|
350
|
+
const getDirectTextValues = (element: ElementNode): string[] => {
|
|
351
|
+
if (!element.childNodes || element.childNodes.length === 0) {
|
|
352
|
+
return [];
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
const values: string[] = [];
|
|
356
|
+
|
|
357
|
+
for (const child of element.childNodes) {
|
|
358
|
+
if (child.nodeName === '#text') {
|
|
359
|
+
const value = (child as parse5TreeAdapter.TextNode).value.trim();
|
|
360
|
+
if (value.length > 0) {
|
|
361
|
+
values.push(value);
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
return values;
|
|
367
|
+
};
|
|
368
|
+
|
|
369
|
+
const directTextDiffer = (current: ElementNode, previous: ElementNode): boolean => {
|
|
370
|
+
const currentText = getDirectTextValues(current);
|
|
371
|
+
const previousText = getDirectTextValues(previous);
|
|
372
|
+
|
|
373
|
+
if (currentText.length !== previousText.length) {
|
|
374
|
+
return true;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
for (let i = 0; i < currentText.length; i++) {
|
|
378
|
+
if (currentText[i] !== previousText[i]) {
|
|
379
|
+
return true;
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
return false;
|
|
384
|
+
};
|
|
385
|
+
|
|
386
|
+
const trashHtmlClasses = TRASH_HTML_CLASSES;
|
|
387
|
+
|
|
388
|
+
function filterContainerClasses(classes: string[]): string[] {
|
|
389
|
+
return classes
|
|
390
|
+
.filter((cls) => !/\d/.test(cls))
|
|
391
|
+
.filter((cls) => !isGenericClass(cls))
|
|
392
|
+
.filter((cls) => !trashHtmlClasses.test(cls))
|
|
393
|
+
.filter((cls) => !/(:|__)/.test(cls))
|
|
394
|
+
.filter((cls) => !TAILWIND_CLASS_PATTERNS.some((pattern) => pattern.test(cls)));
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
function buildContainerSelector(element: ElementNode, allElements: NodeMap): string | null {
|
|
398
|
+
const tag = element.tagName?.toLowerCase();
|
|
399
|
+
if (!tag) return null;
|
|
400
|
+
|
|
401
|
+
const attrs = element.attrs ?? [];
|
|
402
|
+
const id = attrs.find((a) => a.name === 'id')?.value;
|
|
403
|
+
if (id && !isDynamicId(id)) return `#${id}`;
|
|
404
|
+
|
|
405
|
+
const classAttr = attrs.find((a) => a.name === 'class')?.value;
|
|
406
|
+
if (!classAttr) return null;
|
|
407
|
+
|
|
408
|
+
const meaningful = filterContainerClasses(classAttr.split(/\s+/).filter(Boolean));
|
|
409
|
+
if (meaningful.length === 0) return null;
|
|
410
|
+
|
|
411
|
+
const selector = `${tag}.${meaningful.join('.')}`;
|
|
412
|
+
|
|
413
|
+
let matchCount = 0;
|
|
414
|
+
for (const el of allElements.values()) {
|
|
415
|
+
if (el.tagName?.toLowerCase() !== tag) continue;
|
|
416
|
+
const elClass = (el.attrs ?? []).find((a) => a.name === 'class')?.value || '';
|
|
417
|
+
const elClasses = elClass.split(/\s+/);
|
|
418
|
+
if (meaningful.every((cls) => elClasses.includes(cls))) {
|
|
419
|
+
matchCount++;
|
|
420
|
+
if (matchCount > 1) return null;
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
return matchCount === 1 ? selector : null;
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
function pathToXPath(treePath: string): string {
|
|
428
|
+
const parts = treePath.split('/');
|
|
429
|
+
const bodyIdx = parts.findIndex((p) => p.startsWith('body'));
|
|
430
|
+
if (bodyIdx === -1) return `//${parts.join('/')}`;
|
|
431
|
+
return `//body/${parts.slice(bodyIdx + 1).join('/')}`;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
function findStableContainer(topLevelPath: string, originalMap: NodeMap, modifiedMap: NodeMap): { path: string; selector: string } {
|
|
435
|
+
const segments = topLevelPath.split('/');
|
|
436
|
+
|
|
437
|
+
for (let i = segments.length - 2; i >= 1; i--) {
|
|
438
|
+
const candidatePath = segments.slice(0, i + 1).join('/');
|
|
439
|
+
if (IGNORED_PATHS.has(candidatePath)) continue;
|
|
440
|
+
if (!originalMap.has(candidatePath) || !modifiedMap.has(candidatePath)) continue;
|
|
441
|
+
|
|
442
|
+
const element = modifiedMap.get(candidatePath)!;
|
|
443
|
+
const css = buildContainerSelector(element, modifiedMap);
|
|
444
|
+
if (css) return { path: candidatePath, selector: css };
|
|
445
|
+
return { path: candidatePath, selector: pathToXPath(candidatePath) };
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
return { path: 'html[1]/body[1]', selector: 'body' };
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
async function buildDiffParts(originalDocument: DocumentNode, modifiedDocument: DocumentNode): Promise<HtmlDiffPart[]> {
|
|
452
|
+
const originalMap = collectElementMap(originalDocument);
|
|
453
|
+
const modifiedMap = collectElementMap(modifiedDocument);
|
|
454
|
+
|
|
455
|
+
const addedPaths: string[] = [];
|
|
456
|
+
const changedPaths: string[] = [];
|
|
457
|
+
|
|
458
|
+
for (const [path, element] of modifiedMap.entries()) {
|
|
459
|
+
if (IGNORED_PATHS.has(path)) continue;
|
|
460
|
+
|
|
461
|
+
const originalElement = originalMap.get(path);
|
|
462
|
+
|
|
463
|
+
if (!originalElement) {
|
|
464
|
+
addedPaths.push(path);
|
|
465
|
+
continue;
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
if (attributesDiffer(element, originalElement)) {
|
|
469
|
+
changedPaths.push(path);
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
if (addedPaths.length === 0 && changedPaths.length === 0) return [];
|
|
474
|
+
|
|
475
|
+
const addedTopLevel = filterTopLevelPaths(addedPaths);
|
|
476
|
+
const changedFiltered = changedPaths.filter((path) => !addedTopLevel.some((ancestor) => isSameOrAncestor(ancestor, path)));
|
|
477
|
+
const changedTopLevel = filterTopLevelPaths(changedFiltered);
|
|
478
|
+
|
|
479
|
+
const allTopLevel = [...addedTopLevel, ...changedTopLevel];
|
|
480
|
+
|
|
481
|
+
const grouped = new Map<string, { selector: string; paths: string[] }>();
|
|
482
|
+
|
|
483
|
+
for (const path of allTopLevel) {
|
|
484
|
+
const { path: containerPath, selector } = findStableContainer(path, originalMap, modifiedMap);
|
|
485
|
+
const existing = grouped.get(containerPath);
|
|
486
|
+
if (existing) {
|
|
487
|
+
existing.paths.push(path);
|
|
488
|
+
} else {
|
|
489
|
+
grouped.set(containerPath, { selector, paths: [path] });
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
const parts: HtmlDiffPart[] = [];
|
|
494
|
+
|
|
495
|
+
for (const [containerPath, { selector, paths }] of grouped) {
|
|
496
|
+
const containerElement = modifiedMap.get(containerPath);
|
|
497
|
+
if (!containerElement) continue;
|
|
498
|
+
|
|
499
|
+
const rootClone = cloneElementShallow(containerElement);
|
|
500
|
+
const diffMap: NodeMap = new Map();
|
|
501
|
+
diffMap.set(containerPath, rootClone);
|
|
502
|
+
|
|
503
|
+
for (const path of paths) {
|
|
504
|
+
const pathSegments = path.split('/');
|
|
505
|
+
const containerDepth = containerPath.split('/').length;
|
|
506
|
+
for (let i = containerDepth + 1; i < pathSegments.length; i++) {
|
|
507
|
+
const prefix = pathSegments.slice(0, i).join('/');
|
|
508
|
+
if (diffMap.has(prefix)) continue;
|
|
509
|
+
const sourceNode = modifiedMap.get(prefix);
|
|
510
|
+
const parentPath = pathSegments.slice(0, i - 1).join('/');
|
|
511
|
+
const parentNode = diffMap.get(parentPath);
|
|
512
|
+
if (!sourceNode || !parentNode) continue;
|
|
513
|
+
const clone = cloneElementShallow(sourceNode);
|
|
514
|
+
appendChild(parentNode, clone);
|
|
515
|
+
diffMap.set(prefix, clone);
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
const sourceElement = modifiedMap.get(path);
|
|
519
|
+
const parentPath = getParentPath(path);
|
|
520
|
+
const parentNode = diffMap.get(parentPath);
|
|
521
|
+
if (!sourceElement || !parentNode) continue;
|
|
522
|
+
|
|
523
|
+
appendChild(parentNode, cloneElementDeep(sourceElement));
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
const serialized = serialize({ childNodes: [rootClone], nodeName: '#document-fragment' } as any).trim();
|
|
527
|
+
const subtree = serialized ? await minifyHtml(serialized) : '';
|
|
528
|
+
if (!subtree) continue;
|
|
529
|
+
|
|
530
|
+
const addedLines = paths.filter((p) => addedTopLevel.includes(p)).map((p) => `ELEMENT:${p}`);
|
|
531
|
+
const removedLines: string[] = [];
|
|
532
|
+
|
|
533
|
+
parts.push({ container: selector, subtree, added: addedLines, removed: removedLines });
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
return parts;
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
/**
|
|
540
|
+
* Collect a map of element paths to nodes using nth-of-type indexing.
|
|
541
|
+
*/
|
|
542
|
+
function collectElementMap(document: DocumentNode): NodeMap {
|
|
543
|
+
const map: NodeMap = new Map();
|
|
544
|
+
const htmlElement = findHtmlElement(document);
|
|
545
|
+
if (!htmlElement) {
|
|
546
|
+
return map;
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
traverse(htmlElement, 'html[1]');
|
|
550
|
+
return map;
|
|
551
|
+
|
|
552
|
+
function traverse(element: ElementNode, currentPath: string): void {
|
|
553
|
+
map.set(currentPath, element);
|
|
554
|
+
|
|
555
|
+
if (!element.childNodes || element.childNodes.length === 0) {
|
|
556
|
+
return;
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
const counts = new Map<string, number>();
|
|
560
|
+
|
|
561
|
+
for (const child of element.childNodes) {
|
|
562
|
+
if ('tagName' in child && child.tagName) {
|
|
563
|
+
const tagName = child.tagName.toLowerCase();
|
|
564
|
+
const index = (counts.get(tagName) ?? 0) + 1;
|
|
565
|
+
counts.set(tagName, index);
|
|
566
|
+
const childPath = `${currentPath}/${tagName}[${index}]`;
|
|
567
|
+
traverse(child as ElementNode, childPath);
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
function filterTopLevelPaths(paths: string[]): string[] {
|
|
574
|
+
const uniquePaths = Array.from(new Set(paths));
|
|
575
|
+
uniquePaths.sort((a, b) => a.length - b.length);
|
|
576
|
+
|
|
577
|
+
const result: string[] = [];
|
|
578
|
+
|
|
579
|
+
for (const path of uniquePaths) {
|
|
580
|
+
if (result.some((existing) => isSameOrAncestor(existing, path))) {
|
|
581
|
+
continue;
|
|
582
|
+
}
|
|
583
|
+
result.push(path);
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
return result;
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
function getParentPath(path: string): string {
|
|
590
|
+
const lastSeparator = path.lastIndexOf('/');
|
|
591
|
+
if (lastSeparator === -1) {
|
|
592
|
+
return '';
|
|
593
|
+
}
|
|
594
|
+
return path.slice(0, lastSeparator);
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
function isSameOrAncestor(ancestor: string, path: string): boolean {
|
|
598
|
+
return ancestor === path || path.startsWith(`${ancestor}/`);
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
function appendChild(parent: ParentNode, child: parse5TreeAdapter.Node): void {
|
|
602
|
+
if (!parent.childNodes) {
|
|
603
|
+
parent.childNodes = [];
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
parent.childNodes.push(child);
|
|
607
|
+
(child as parse5TreeAdapter.Node & { parentNode?: ParentNode }).parentNode = parent;
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
function cloneElementShallow(element: ElementNode): ElementNode {
|
|
611
|
+
return {
|
|
612
|
+
nodeName: element.nodeName,
|
|
613
|
+
tagName: element.tagName,
|
|
614
|
+
attrs: element.attrs ? element.attrs.map((attr) => ({ ...attr })) : [],
|
|
615
|
+
childNodes: [],
|
|
616
|
+
namespaceURI: element.namespaceURI,
|
|
617
|
+
};
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
function cloneElementDeep(element: ElementNode): ElementNode {
|
|
621
|
+
const clone = cloneElementShallow(element);
|
|
622
|
+
|
|
623
|
+
if (!element.childNodes || element.childNodes.length === 0) {
|
|
624
|
+
return clone;
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
for (const child of element.childNodes) {
|
|
628
|
+
const clonedChild = cloneNodeDeep(child);
|
|
629
|
+
appendChild(clone, clonedChild);
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
return clone;
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
function cloneNodeDeep(node: parse5TreeAdapter.Node): parse5TreeAdapter.Node {
|
|
636
|
+
if ('tagName' in node && node.tagName) {
|
|
637
|
+
return cloneElementDeep(node as ElementNode);
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
if (node.nodeName === '#text') {
|
|
641
|
+
const textNode = node as parse5TreeAdapter.TextNode;
|
|
642
|
+
return {
|
|
643
|
+
nodeName: '#text',
|
|
644
|
+
value: textNode.value,
|
|
645
|
+
} as parse5TreeAdapter.TextNode;
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
if (node.nodeName === '#comment') {
|
|
649
|
+
const commentNode = node as parse5TreeAdapter.CommentNode;
|
|
650
|
+
return {
|
|
651
|
+
nodeName: '#comment',
|
|
652
|
+
data: commentNode.data,
|
|
653
|
+
} as parse5TreeAdapter.CommentNode;
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
return { ...node };
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
function findHtmlElement(document: DocumentNode): ElementNode | null {
|
|
660
|
+
if (!document.childNodes) {
|
|
661
|
+
return null;
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
for (const node of document.childNodes) {
|
|
665
|
+
if ('tagName' in node && node.tagName && node.tagName.toLowerCase() === 'html') {
|
|
666
|
+
return node as ElementNode;
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
return null;
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
function findHeadElement(document: DocumentNode): ElementNode | null {
|
|
674
|
+
const html = findHtmlElement(document);
|
|
675
|
+
if (!html || !html.childNodes) {
|
|
676
|
+
return null;
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
for (const node of html.childNodes) {
|
|
680
|
+
if ('tagName' in node && node.tagName && node.tagName.toLowerCase() === 'head') {
|
|
681
|
+
return node as ElementNode;
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
return null;
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
function findBodyElement(document: DocumentNode): ElementNode | null {
|
|
689
|
+
const html = findHtmlElement(document);
|
|
690
|
+
if (!html || !html.childNodes) {
|
|
691
|
+
return null;
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
for (const node of html.childNodes) {
|
|
695
|
+
if ('tagName' in node && node.tagName && node.tagName.toLowerCase() === 'body') {
|
|
696
|
+
return node as ElementNode;
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
return null;
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
/**
|
|
704
|
+
* Convert parse5 node to simplified representation for textual diffing.
|
|
705
|
+
*/
|
|
706
|
+
function convertNode(node: parse5TreeAdapter.Node): HtmlNode {
|
|
707
|
+
if (node.nodeName === '#text') {
|
|
708
|
+
const textNode = node as parse5TreeAdapter.TextNode;
|
|
709
|
+
return {
|
|
710
|
+
type: 'text',
|
|
711
|
+
content: textNode.value.trim(),
|
|
712
|
+
};
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
if (node.nodeName === '#comment') {
|
|
716
|
+
return {
|
|
717
|
+
type: 'comment',
|
|
718
|
+
content: (node as parse5TreeAdapter.CommentNode).data,
|
|
719
|
+
};
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
if ('tagName' in node && node.tagName) {
|
|
723
|
+
const element = node as ElementNode;
|
|
724
|
+
const htmlNode: HtmlNode = {
|
|
725
|
+
type: 'element',
|
|
726
|
+
tagName: element.tagName.toLowerCase(),
|
|
727
|
+
};
|
|
728
|
+
|
|
729
|
+
if (element.attrs && element.attrs.length > 0) {
|
|
730
|
+
htmlNode.attrs = element.attrs.map((attr) => ({
|
|
731
|
+
name: attr.name,
|
|
732
|
+
value: attr.value,
|
|
733
|
+
}));
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
if (element.childNodes && element.childNodes.length > 0) {
|
|
737
|
+
const children: HtmlNode[] = [];
|
|
738
|
+
|
|
739
|
+
for (const child of element.childNodes) {
|
|
740
|
+
if (child.nodeName === '#text') {
|
|
741
|
+
const text = (child as parse5TreeAdapter.TextNode).value.trim();
|
|
742
|
+
if (text.length === 0) {
|
|
743
|
+
continue;
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
children.push(convertNode(child));
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
if (children.length > 0) {
|
|
750
|
+
htmlNode.children = children;
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
if (htmlNode.children && htmlNode.children.length === 1 && htmlNode.children[0].type === 'text') {
|
|
755
|
+
htmlNode.content = htmlNode.children[0].content;
|
|
756
|
+
htmlNode.children = undefined;
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
return htmlNode;
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
return {
|
|
763
|
+
type: 'text',
|
|
764
|
+
content: '',
|
|
765
|
+
};
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
/**
|
|
769
|
+
* Flatten HTML structure into lines for comparison.
|
|
770
|
+
*/
|
|
771
|
+
function flattenHtml(node: HtmlNode): string[] {
|
|
772
|
+
const lines: string[] = [];
|
|
773
|
+
|
|
774
|
+
function process(n: HtmlNode): void {
|
|
775
|
+
if (n.type === 'text' && n.content) {
|
|
776
|
+
if (n.content.length >= 5) {
|
|
777
|
+
lines.push(`TEXT:${n.content}`);
|
|
778
|
+
}
|
|
779
|
+
return;
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
if (n.type === 'comment') {
|
|
783
|
+
return;
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
if (n.type === 'element' && n.tagName) {
|
|
787
|
+
if (isInteractiveElement(n)) {
|
|
788
|
+
const content = getElementContent(n);
|
|
789
|
+
if (content) {
|
|
790
|
+
lines.push(`${n.tagName.toUpperCase()}:${content}`);
|
|
791
|
+
} else {
|
|
792
|
+
lines.push(`${n.tagName.toUpperCase()}`);
|
|
793
|
+
}
|
|
794
|
+
return;
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
if (n.content && n.content.length >= 5) {
|
|
798
|
+
lines.push(`TEXT:${n.content}`);
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
if (n.children) {
|
|
802
|
+
n.children.forEach((child) => process(child));
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
process(node);
|
|
808
|
+
return lines;
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
function isInteractiveElement(node: HtmlNode): boolean {
|
|
812
|
+
if (!node.tagName) {
|
|
813
|
+
return false;
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
const interactiveTags = new Set(['a', 'button', 'input', 'select', 'textarea', 'details', 'summary']);
|
|
817
|
+
|
|
818
|
+
if (interactiveTags.has(node.tagName)) {
|
|
819
|
+
return true;
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
if (node.attrs) {
|
|
823
|
+
const role = node.attrs.find((attr) => attr.name === 'role');
|
|
824
|
+
if (role && ['button', 'link', 'checkbox', 'radio', 'combobox', 'listbox', 'textbox', 'switch', 'tab'].includes(role.value)) {
|
|
825
|
+
return true;
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
return false;
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
function getElementContent(node: HtmlNode): string {
|
|
833
|
+
if (node.content) {
|
|
834
|
+
return node.content;
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
if (node.attrs) {
|
|
838
|
+
if (node.tagName === 'input') {
|
|
839
|
+
const placeholder = node.attrs.find((attr) => attr.name === 'placeholder');
|
|
840
|
+
if (placeholder) {
|
|
841
|
+
return placeholder.value;
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
const value = node.attrs.find((attr) => attr.name === 'value');
|
|
845
|
+
if (value) {
|
|
846
|
+
return value.value;
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
const name = node.attrs.find((attr) => attr.name === 'name');
|
|
850
|
+
if (name) {
|
|
851
|
+
return name.value;
|
|
852
|
+
}
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
if (node.tagName === 'a') {
|
|
856
|
+
const href = node.attrs.find((attr) => attr.name === 'href');
|
|
857
|
+
if (href) {
|
|
858
|
+
return href.value;
|
|
859
|
+
}
|
|
860
|
+
}
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
return '';
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
function calculateSimilarity(lines1: string[], lines2: string[]): number {
|
|
867
|
+
const set1 = new Set(lines1);
|
|
868
|
+
const set2 = new Set(lines2);
|
|
869
|
+
|
|
870
|
+
const intersection = new Set([...set1].filter((x) => set2.has(x)));
|
|
871
|
+
const union = new Set([...set1, ...set2]);
|
|
872
|
+
|
|
873
|
+
if (union.size === 0) {
|
|
874
|
+
return 100;
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
return Math.round((intersection.size / union.size) * 100);
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
function findDifferences(
|
|
881
|
+
lines1: string[],
|
|
882
|
+
lines2: string[]
|
|
883
|
+
): {
|
|
884
|
+
added: string[];
|
|
885
|
+
removed: string[];
|
|
886
|
+
} {
|
|
887
|
+
const set1 = new Set(lines1);
|
|
888
|
+
const set2 = new Set(lines2);
|
|
889
|
+
|
|
890
|
+
const added = [...set2].filter((line) => !set1.has(line));
|
|
891
|
+
const removed = [...set1].filter((line) => !set2.has(line));
|
|
892
|
+
|
|
893
|
+
return { added, removed };
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
function generateSummary(added: string[], removed: string[], similarity: number): string {
|
|
897
|
+
const totalChanges = added.length + removed.length;
|
|
898
|
+
|
|
899
|
+
if (totalChanges === 0) {
|
|
900
|
+
return 'No changes detected';
|
|
901
|
+
}
|
|
902
|
+
|
|
903
|
+
const parts: string[] = [];
|
|
904
|
+
|
|
905
|
+
if (similarity < 100) {
|
|
906
|
+
parts.push(`${similarity}% similar`);
|
|
907
|
+
}
|
|
908
|
+
|
|
909
|
+
if (added.length > 0) {
|
|
910
|
+
parts.push(`${added.length} addition${added.length > 1 ? 's' : ''}`);
|
|
911
|
+
}
|
|
912
|
+
|
|
913
|
+
if (removed.length > 0) {
|
|
914
|
+
parts.push(`${removed.length} removal${removed.length > 1 ? 's' : ''}`);
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
return parts.join(', ');
|
|
918
|
+
}
|