design-clone 2.1.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/README.md +13 -34
  2. package/SKILL.md +69 -45
  3. package/bin/cli.js +22 -4
  4. package/bin/commands/clone-site.js +31 -171
  5. package/bin/commands/help.js +19 -6
  6. package/bin/commands/init.js +9 -86
  7. package/bin/commands/uninstall.js +105 -0
  8. package/bin/commands/update.js +70 -0
  9. package/bin/commands/verify.js +7 -14
  10. package/bin/utils/paths.js +28 -0
  11. package/bin/utils/validate.js +2 -22
  12. package/bin/utils/version.js +23 -0
  13. package/docs/code-standards.md +789 -0
  14. package/docs/codebase-summary.md +533 -286
  15. package/docs/index.md +74 -0
  16. package/docs/project-overview-pdr.md +797 -0
  17. package/docs/system-architecture.md +718 -0
  18. package/package.json +14 -17
  19. package/src/ai/prompts/design-tokens/basic.md +80 -0
  20. package/src/ai/prompts/design-tokens/section-with-css.md +41 -0
  21. package/src/ai/prompts/design-tokens/section.md +48 -0
  22. package/src/ai/prompts/design-tokens/with-css.md +87 -0
  23. package/src/ai/prompts/structure-analysis/basic.md +55 -0
  24. package/src/ai/prompts/structure-analysis/with-context.md +59 -0
  25. package/src/ai/prompts/structure-analysis/with-dimensions.md +63 -0
  26. package/src/ai/prompts/structure-analysis/with-hierarchy.md +73 -0
  27. package/src/ai/prompts/ux-audit/aggregation.md +42 -0
  28. package/src/ai/prompts/ux-audit/desktop.md +92 -0
  29. package/src/ai/prompts/ux-audit/mobile.md +93 -0
  30. package/src/ai/prompts/ux-audit/tablet.md +92 -0
  31. package/src/core/animation/animation-extractor-ast.js +183 -0
  32. package/src/core/animation/animation-extractor-output.js +152 -0
  33. package/src/core/animation/animation-extractor.js +178 -0
  34. package/src/core/animation/state-capture-detection.js +200 -0
  35. package/src/core/animation/state-capture.js +193 -0
  36. package/src/core/capture/browser-context-pool.js +96 -0
  37. package/src/core/capture/multi-page-screenshot-page.js +110 -0
  38. package/src/core/capture/multi-page-screenshot.js +208 -0
  39. package/src/core/capture/screenshot-extraction.js +186 -0
  40. package/src/core/capture/screenshot-helpers.js +175 -0
  41. package/src/core/capture/screenshot-orchestrator.js +174 -0
  42. package/src/core/capture/screenshot-viewport.js +93 -0
  43. package/src/core/capture/screenshot.js +192 -0
  44. package/src/core/content/content-counter-dom.js +191 -0
  45. package/src/core/content/content-counter.js +76 -0
  46. package/src/core/css/breakpoint-detector.js +66 -0
  47. package/src/core/css/chromium-defaults.json +23 -0
  48. package/src/core/css/computed-style-extractor.js +102 -0
  49. package/src/core/css/css-chunker.js +103 -0
  50. package/src/core/css/filter-css-dead-code.js +120 -0
  51. package/src/core/css/filter-css-html-analyzer.js +110 -0
  52. package/src/core/css/filter-css-selector-matcher.js +172 -0
  53. package/src/core/css/filter-css.js +206 -0
  54. package/src/core/css/merge-css-atrule-processor.js +158 -0
  55. package/src/core/css/merge-css-file-io.js +68 -0
  56. package/src/core/css/merge-css.js +148 -0
  57. package/src/core/detection/framework-detector-routing.js +68 -0
  58. package/src/core/detection/framework-detector-signals.js +65 -0
  59. package/src/core/detection/framework-detector.js +198 -0
  60. package/src/core/dimension/dimension-extractor-card-detector.js +82 -0
  61. package/src/core/dimension/dimension-extractor.js +317 -0
  62. package/src/core/dimension/dimension-output-ai-summary.js +111 -0
  63. package/src/core/dimension/dimension-output.js +173 -0
  64. package/src/core/dimension/dom-tree-analyzer-tree-builders.js +95 -0
  65. package/src/core/dimension/dom-tree-analyzer.js +191 -0
  66. package/src/core/discovery/app-state-snapshot-capture.js +195 -0
  67. package/src/core/discovery/app-state-snapshot-utils.js +178 -0
  68. package/src/core/discovery/app-state-snapshot.js +131 -0
  69. package/src/core/discovery/discover-pages-routes.js +84 -0
  70. package/src/core/discovery/discover-pages-utils.js +177 -0
  71. package/src/core/discovery/discover-pages.js +191 -0
  72. package/src/core/html/html-extractor-inline-styler.js +70 -0
  73. package/src/core/html/html-extractor.js +147 -0
  74. package/src/core/html/semantic-enhancer-mappings.js +200 -0
  75. package/src/core/html/semantic-enhancer-page.js +148 -0
  76. package/src/core/html/semantic-enhancer.js +135 -0
  77. package/src/core/links/rewrite-links-css-rewriter.js +53 -0
  78. package/src/core/links/rewrite-links.js +173 -0
  79. package/src/core/media/asset-validator.js +118 -0
  80. package/src/core/media/extract-assets-downloader.js +187 -0
  81. package/src/core/media/extract-assets-page-scraper.js +115 -0
  82. package/src/core/media/extract-assets.js +159 -0
  83. package/src/core/media/video-capture-convert.js +200 -0
  84. package/src/core/media/video-capture.js +201 -0
  85. package/src/core/{lazy-loader.js → page-prep/lazy-loader.js} +37 -39
  86. package/src/core/section/section-cropper-helpers.js +43 -0
  87. package/src/core/{section-cropper.js → section/section-cropper.js} +11 -88
  88. package/src/core/section/section-detector-strategies.js +139 -0
  89. package/src/core/section/section-detector-utils.js +100 -0
  90. package/src/core/section/section-detector.js +88 -0
  91. package/src/core/tests/test-section-cropper.js +2 -2
  92. package/src/core/tests/test-section-detector.js +2 -2
  93. package/src/post-process/enhance-assets.js +29 -4
  94. package/src/post-process/fetch-images-unsplash-client.js +123 -0
  95. package/src/post-process/fetch-images.js +60 -263
  96. package/src/post-process/inject-gosnap.js +88 -0
  97. package/src/post-process/inject-icons-svg-replacer.js +76 -0
  98. package/src/post-process/inject-icons.js +47 -200
  99. package/src/route-discoverers/base-discoverer-utils.js +137 -0
  100. package/src/route-discoverers/base-discoverer.js +29 -118
  101. package/src/route-discoverers/index.js +1 -1
  102. package/src/shared/config.js +38 -0
  103. package/src/shared/error-codes.js +31 -0
  104. package/src/shared/viewports.js +46 -0
  105. package/src/utils/browser.js +0 -7
  106. package/src/utils/helpers.js +4 -0
  107. package/src/utils/log.js +12 -0
  108. package/src/utils/playwright-loader.js +76 -0
  109. package/src/utils/playwright.js +3 -69
  110. package/src/utils/progress.js +32 -0
  111. package/src/verification/generate-audit-report-css-fixes.js +52 -0
  112. package/src/verification/generate-audit-report-sections.js +158 -0
  113. package/src/verification/generate-audit-report.js +5 -281
  114. package/src/verification/quality-scorer.js +92 -0
  115. package/src/verification/verify-footer-checks.js +103 -0
  116. package/src/verification/verify-footer-helpers.js +178 -0
  117. package/src/verification/verify-footer.js +23 -381
  118. package/src/verification/verify-header-checks.js +104 -0
  119. package/src/verification/verify-header-helpers.js +156 -0
  120. package/src/verification/verify-header.js +23 -365
  121. package/src/verification/verify-layout-report.js +101 -0
  122. package/src/verification/verify-layout.js +13 -259
  123. package/src/verification/verify-menu-checks.js +104 -0
  124. package/src/verification/verify-menu-helpers.js +112 -0
  125. package/src/verification/verify-menu.js +17 -285
  126. package/src/verification/verify-slider-checks.js +115 -0
  127. package/src/verification/verify-slider-constants.js +65 -0
  128. package/src/verification/verify-slider-helpers.js +164 -0
  129. package/src/verification/verify-slider.js +23 -414
  130. package/.env.example +0 -14
  131. package/docs/basic-clone.md +0 -63
  132. package/docs/cli-reference.md +0 -316
  133. package/docs/design-clone-architecture.md +0 -492
  134. package/docs/pixel-perfect.md +0 -117
  135. package/docs/project-roadmap.md +0 -382
  136. package/docs/troubleshooting.md +0 -170
  137. package/requirements.txt +0 -5
  138. package/src/ai/__pycache__/analyze-structure.cpython-313.pyc +0 -0
  139. package/src/ai/__pycache__/extract-design-tokens.cpython-313.pyc +0 -0
  140. package/src/ai/analyze-structure.py +0 -375
  141. package/src/ai/extract-design-tokens.py +0 -782
  142. package/src/ai/prompts/__init__.py +0 -2
  143. package/src/ai/prompts/__pycache__/__init__.cpython-313.pyc +0 -0
  144. package/src/ai/prompts/__pycache__/design_tokens.cpython-313.pyc +0 -0
  145. package/src/ai/prompts/__pycache__/structure_analysis.cpython-313.pyc +0 -0
  146. package/src/ai/prompts/__pycache__/ux_audit.cpython-313.pyc +0 -0
  147. package/src/ai/prompts/design_tokens.py +0 -316
  148. package/src/ai/prompts/structure_analysis.py +0 -592
  149. package/src/ai/prompts/ux_audit.py +0 -198
  150. package/src/ai/ux-audit.js +0 -596
  151. package/src/core/animation-extractor.js +0 -526
  152. package/src/core/app-state-snapshot.js +0 -511
  153. package/src/core/content-counter.js +0 -342
  154. package/src/core/design-tokens.js +0 -103
  155. package/src/core/dimension-extractor.js +0 -438
  156. package/src/core/dimension-output.js +0 -305
  157. package/src/core/discover-pages.js +0 -542
  158. package/src/core/dom-tree-analyzer.js +0 -298
  159. package/src/core/extract-assets.js +0 -468
  160. package/src/core/filter-css.js +0 -499
  161. package/src/core/framework-detector.js +0 -538
  162. package/src/core/html-extractor.js +0 -212
  163. package/src/core/merge-css.js +0 -407
  164. package/src/core/multi-page-screenshot.js +0 -380
  165. package/src/core/rewrite-links.js +0 -226
  166. package/src/core/screenshot.js +0 -701
  167. package/src/core/section-detector.js +0 -386
  168. package/src/core/semantic-enhancer.js +0 -492
  169. package/src/core/state-capture.js +0 -598
  170. package/src/core/video-capture.js +0 -546
  171. package/src/utils/__init__.py +0 -16
  172. package/src/utils/__pycache__/__init__.cpython-313.pyc +0 -0
  173. package/src/utils/__pycache__/env.cpython-313.pyc +0 -0
  174. package/src/utils/env.py +0 -134
  175. /package/src/core/{css-extractor.js → css/css-extractor.js} +0 -0
  176. /package/src/core/{cookie-handler.js → page-prep/cookie-handler.js} +0 -0
  177. /package/src/core/{page-readiness.js → page-prep/page-readiness.js} +0 -0
@@ -0,0 +1,200 @@
1
+ /**
2
+ * Semantic HTML mappings and section detection logic.
3
+ *
4
+ * Contains WordPress-compatible semantic mappings (IDs, classes, ARIA roles),
5
+ * class pattern definitions, and the detectSectionType / applySemanticAttributes
6
+ * / handleMultipleNavs functions used in Node.js context.
7
+ * Used by semantic-enhancer.js (main module).
8
+ */
9
+
10
+ // ============================================================================
11
+ // Constants
12
+ // ============================================================================
13
+
14
+ /**
15
+ * WordPress-compatible semantic mappings
16
+ */
17
+ export const SEMANTIC_MAPPINGS = {
18
+ header: {
19
+ id: 'site-header',
20
+ classes: ['site-header'],
21
+ role: 'banner'
22
+ },
23
+ nav: {
24
+ id: 'site-navigation',
25
+ classes: ['main-navigation', 'nav-menu'],
26
+ role: 'navigation'
27
+ },
28
+ main: {
29
+ id: 'main-content',
30
+ classes: ['site-main', 'content-area'],
31
+ role: 'main'
32
+ },
33
+ sidebar: {
34
+ id: 'primary-sidebar',
35
+ classes: ['widget-area', 'sidebar'],
36
+ role: 'complementary'
37
+ },
38
+ footer: {
39
+ id: 'site-footer',
40
+ classes: ['site-footer'],
41
+ role: 'contentinfo'
42
+ },
43
+ hero: {
44
+ id: 'hero-section',
45
+ classes: ['hero'],
46
+ role: null // No ARIA landmark role for hero
47
+ }
48
+ };
49
+
50
+ /**
51
+ * Class patterns for section detection (case-insensitive)
52
+ */
53
+ export const CLASS_PATTERNS = {
54
+ header: ['header', 'masthead', 'site-header', 'page-header'],
55
+ nav: ['nav', 'menu', 'navigation'],
56
+ main: ['main', 'content', 'page-content'],
57
+ sidebar: ['sidebar', 'aside', 'widget-area'],
58
+ footer: ['footer', 'site-footer', 'page-footer'],
59
+ hero: ['hero', 'banner', 'jumbotron', 'splash']
60
+ };
61
+
62
+ // ============================================================================
63
+ // Detection & Application
64
+ // ============================================================================
65
+
66
+ /**
67
+ * Detect section type from element.
68
+ *
69
+ * Priority:
70
+ * 1. Semantic HTML tags
71
+ * 2. ARIA role attributes
72
+ * 3. Class pattern matching
73
+ *
74
+ * @param {Element} element - DOM element to analyze
75
+ * @returns {string|null} Section type or null
76
+ */
77
+ export function detectSectionType(element) {
78
+ const tag = element.tagName?.toLowerCase();
79
+ const ariaRole = element.getAttribute?.('role');
80
+
81
+ // Priority 1: Semantic HTML tags
82
+ if (tag === 'header') return 'header';
83
+ if (tag === 'nav') return 'nav';
84
+ if (tag === 'main') return 'main';
85
+ if (tag === 'aside') return 'sidebar';
86
+ if (tag === 'footer') return 'footer';
87
+
88
+ // Priority 2: ARIA roles
89
+ if (ariaRole === 'banner') return 'header';
90
+ if (ariaRole === 'navigation') return 'nav';
91
+ if (ariaRole === 'main') return 'main';
92
+ if (ariaRole === 'complementary') return 'sidebar';
93
+ if (ariaRole === 'contentinfo') return 'footer';
94
+
95
+ // Priority 3: Class patterns
96
+ const className = (element.className || '').toString().toLowerCase();
97
+ if (!className) return null;
98
+
99
+ for (const [sectionType, patterns] of Object.entries(CLASS_PATTERNS)) {
100
+ if (patterns.some(pattern => className.includes(pattern))) {
101
+ // Avoid false positives: ensure it's a container element
102
+ if (tag === 'div' || tag === 'section' || tag === 'article') {
103
+ return sectionType;
104
+ }
105
+ }
106
+ }
107
+
108
+ return null;
109
+ }
110
+
111
+ /**
112
+ * Apply semantic attributes to element.
113
+ *
114
+ * Rules:
115
+ * - Add ID only if none exists
116
+ * - Append classes (preserve existing)
117
+ * - Set role only if none exists
118
+ *
119
+ * @param {Element} element - DOM element to enhance
120
+ * @param {string} sectionType - Type from SEMANTIC_MAPPINGS
121
+ * @param {Object} options - Configuration options
122
+ * @param {Set} options.usedIds - Track used IDs to avoid duplicates
123
+ * @param {number} options.navIndex - Index for multiple nav labeling
124
+ */
125
+ export function applySemanticAttributes(element, sectionType, options = {}) {
126
+ const mapping = SEMANTIC_MAPPINGS[sectionType];
127
+ if (!mapping) return;
128
+
129
+ const { usedIds = new Set(), navIndex = 0 } = options;
130
+
131
+ // Add ID only if not present and not already used
132
+ if (!element.id && mapping.id) {
133
+ let targetId = mapping.id;
134
+
135
+ if (usedIds.has(targetId)) {
136
+ targetId = `${mapping.id}-${navIndex + 1}`;
137
+ }
138
+
139
+ if (!usedIds.has(targetId)) {
140
+ element.id = targetId;
141
+ usedIds.add(targetId);
142
+ }
143
+ }
144
+
145
+ // Append classes (preserve existing)
146
+ if (mapping.classes && mapping.classes.length > 0) {
147
+ const existingClasses = element.className
148
+ ? element.className.toString().split(/\s+/).filter(Boolean)
149
+ : [];
150
+ const newClasses = mapping.classes.filter(c => !existingClasses.includes(c));
151
+
152
+ if (newClasses.length > 0) {
153
+ element.className = [...existingClasses, ...newClasses].join(' ').trim();
154
+ }
155
+ }
156
+
157
+ // Set role only if not present
158
+ if (mapping.role && !element.getAttribute('role')) {
159
+ element.setAttribute('role', mapping.role);
160
+ }
161
+ }
162
+
163
+ /**
164
+ * Handle multiple navigation elements with proper labeling.
165
+ *
166
+ * @param {NodeList|Array} navElements - All nav elements
167
+ * @param {Set} usedIds - Track used IDs
168
+ */
169
+ export function handleMultipleNavs(navElements, usedIds = new Set()) {
170
+ const navs = Array.from(navElements);
171
+ if (navs.length === 0) return;
172
+
173
+ navs.forEach((nav, index) => {
174
+ const isInHeader = nav.closest?.('header') !== null;
175
+ const isInFooter = nav.closest?.('footer') !== null;
176
+
177
+ if (isInHeader && index === 0) {
178
+ applySemanticAttributes(nav, 'nav', { usedIds, navIndex: 0 });
179
+ if (!nav.getAttribute('aria-label')) {
180
+ nav.setAttribute('aria-label', 'Primary Menu');
181
+ }
182
+ } else if (isInFooter) {
183
+ if (!nav.id) {
184
+ nav.id = usedIds.has('footer-navigation')
185
+ ? `footer-navigation-${index}`
186
+ : 'footer-navigation';
187
+ usedIds.add(nav.id);
188
+ }
189
+ nav.setAttribute('role', 'navigation');
190
+ if (!nav.getAttribute('aria-label')) {
191
+ nav.setAttribute('aria-label', 'Footer Menu');
192
+ }
193
+ } else {
194
+ applySemanticAttributes(nav, 'nav', { usedIds, navIndex: index });
195
+ if (!nav.getAttribute('aria-label')) {
196
+ nav.setAttribute('aria-label', `Navigation ${index + 1}`);
197
+ }
198
+ }
199
+ });
200
+ }
@@ -0,0 +1,148 @@
1
+ /**
2
+ * Playwright page.evaluate integration for semantic HTML enhancement.
3
+ *
4
+ * Contains enhanceSemanticHTMLInPage() which runs entirely inside browser
5
+ * context via page.evaluate(). All helper logic must be inlined here since
6
+ * ES module imports are not available inside evaluate callbacks.
7
+ * Used by semantic-enhancer.js (main module).
8
+ */
9
+
10
+ /**
11
+ * Enhance HTML using page.evaluate (for Playwright integration).
12
+ *
13
+ * This is the recommended method for Node.js/Playwright usage.
14
+ *
15
+ * @param {import('playwright').Page} page - Playwright page
16
+ * @param {string} html - Original HTML string (must be valid HTML)
17
+ * @returns {Promise<{html: string, stats: Object}>}
18
+ * @throws {Error} If page is null or html is invalid
19
+ */
20
+ export async function enhanceSemanticHTMLInPage(page, html) {
21
+ if (!page || typeof page.evaluate !== 'function') {
22
+ throw new Error('enhanceSemanticHTMLInPage requires a valid Playwright page');
23
+ }
24
+ if (!html || typeof html !== 'string') {
25
+ throw new Error('enhanceSemanticHTMLInPage requires a valid HTML string');
26
+ }
27
+
28
+ return await page.evaluate((htmlStr) => {
29
+ // All logic inlined: browser serialization boundary prevents imports
30
+ const SEMANTIC_MAPPINGS = {
31
+ header: { id: 'site-header', classes: ['site-header'], role: 'banner' },
32
+ nav: { id: 'site-navigation', classes: ['main-navigation', 'nav-menu'], role: 'navigation' },
33
+ main: { id: 'main-content', classes: ['site-main', 'content-area'], role: 'main' },
34
+ sidebar: { id: 'primary-sidebar', classes: ['widget-area', 'sidebar'], role: 'complementary' },
35
+ footer: { id: 'site-footer', classes: ['site-footer'], role: 'contentinfo' },
36
+ hero: { id: 'hero-section', classes: ['hero'], role: null }
37
+ };
38
+
39
+ const CLASS_PATTERNS = {
40
+ header: ['header', 'masthead', 'site-header', 'page-header'],
41
+ nav: ['nav', 'menu', 'navigation'],
42
+ sidebar: ['sidebar', 'aside', 'widget-area'],
43
+ footer: ['footer', 'site-footer', 'page-footer'],
44
+ hero: ['hero', 'banner', 'jumbotron', 'splash']
45
+ };
46
+
47
+ function detectSectionType(element) {
48
+ const tag = element.tagName?.toLowerCase();
49
+ const ariaRole = element.getAttribute?.('role');
50
+
51
+ if (tag === 'header') return 'header';
52
+ if (tag === 'nav') return 'nav';
53
+ if (tag === 'main') return 'main';
54
+ if (tag === 'aside') return 'sidebar';
55
+ if (tag === 'footer') return 'footer';
56
+
57
+ if (ariaRole === 'banner') return 'header';
58
+ if (ariaRole === 'navigation') return 'nav';
59
+ if (ariaRole === 'main') return 'main';
60
+ if (ariaRole === 'complementary') return 'sidebar';
61
+ if (ariaRole === 'contentinfo') return 'footer';
62
+
63
+ const className = (element.className || '').toString().toLowerCase();
64
+ if (!className) return null;
65
+
66
+ for (const [sectionType, patterns] of Object.entries(CLASS_PATTERNS)) {
67
+ if (patterns.some(pattern => className.includes(pattern))) {
68
+ if (['div', 'section', 'article'].includes(tag)) return sectionType;
69
+ }
70
+ }
71
+ return null;
72
+ }
73
+
74
+ function applySemanticAttributes(element, sectionType, usedIds, navIndex = 0) {
75
+ const mapping = SEMANTIC_MAPPINGS[sectionType];
76
+ if (!mapping) return;
77
+
78
+ if (!element.id && mapping.id) {
79
+ let targetId = mapping.id;
80
+ if (usedIds.has(targetId)) targetId = `${mapping.id}-${navIndex + 1}`;
81
+ if (!usedIds.has(targetId)) {
82
+ element.id = targetId;
83
+ usedIds.add(targetId);
84
+ }
85
+ }
86
+
87
+ if (mapping.classes?.length > 0) {
88
+ const existing = element.className
89
+ ? element.className.toString().split(/\s+/).filter(Boolean)
90
+ : [];
91
+ const added = mapping.classes.filter(c => !existing.includes(c));
92
+ if (added.length > 0) element.className = [...existing, ...added].join(' ').trim();
93
+ }
94
+
95
+ if (mapping.role && !element.getAttribute('role')) {
96
+ element.setAttribute('role', mapping.role);
97
+ }
98
+ }
99
+
100
+ const stats = { sectionsEnhanced: 0, idsAdded: 0, classesAdded: 0, rolesAdded: 0, warnings: [] };
101
+ const doc = new DOMParser().parseFromString(htmlStr, 'text/html');
102
+ const usedIds = new Set();
103
+
104
+ doc.querySelectorAll('[id]').forEach(el => usedIds.add(el.id));
105
+
106
+ ['header:not(header header)', 'footer:not(footer footer)', 'main', 'aside'].forEach(sel => {
107
+ try {
108
+ doc.querySelectorAll(sel).forEach(el => {
109
+ const type = detectSectionType(el);
110
+ if (type) {
111
+ const hadId = !!el.id;
112
+ const hadRole = !!el.getAttribute('role');
113
+ applySemanticAttributes(el, type, usedIds);
114
+ if (!hadId && el.id) stats.idsAdded++;
115
+ if (!hadRole && el.getAttribute('role')) stats.rolesAdded++;
116
+ stats.sectionsEnhanced++;
117
+ }
118
+ });
119
+ } catch (err) {
120
+ stats.warnings.push(`Selector error: ${sel}`);
121
+ }
122
+ });
123
+
124
+ doc.querySelectorAll('nav, [role="navigation"]').forEach((nav, index) => {
125
+ const isInHeader = nav.closest('header') !== null;
126
+ const isInFooter = nav.closest('footer') !== null;
127
+
128
+ if (isInHeader && index === 0) {
129
+ applySemanticAttributes(nav, 'nav', usedIds, 0);
130
+ if (!nav.getAttribute('aria-label')) nav.setAttribute('aria-label', 'Primary Menu');
131
+ } else if (isInFooter) {
132
+ if (!nav.id) {
133
+ nav.id = usedIds.has('footer-navigation')
134
+ ? `footer-navigation-${index}` : 'footer-navigation';
135
+ usedIds.add(nav.id);
136
+ }
137
+ nav.setAttribute('role', 'navigation');
138
+ if (!nav.getAttribute('aria-label')) nav.setAttribute('aria-label', 'Footer Menu');
139
+ } else {
140
+ applySemanticAttributes(nav, 'nav', usedIds, index);
141
+ if (!nav.getAttribute('aria-label')) nav.setAttribute('aria-label', `Navigation ${index + 1}`);
142
+ }
143
+ stats.sectionsEnhanced++;
144
+ });
145
+
146
+ return { html: '<!DOCTYPE html>\n' + doc.documentElement.outerHTML, stats };
147
+ }, html);
148
+ }
@@ -0,0 +1,135 @@
1
+ /**
2
+ * Semantic HTML Enhancer
3
+ *
4
+ * Injects WordPress-compatible semantic IDs, classes, and ARIA roles
5
+ * into extracted HTML while preserving original styling.
6
+ *
7
+ * Architecture:
8
+ * - semantic-enhancer-mappings.js: Constants + detection/application functions (Node.js context)
9
+ * - semantic-enhancer-page.js: Playwright page.evaluate integration (browser context, inlined)
10
+ * - This file: Browser-context DOMParser enhancer + re-exports
11
+ */
12
+
13
+ import {
14
+ SEMANTIC_MAPPINGS,
15
+ CLASS_PATTERNS,
16
+ detectSectionType,
17
+ applySemanticAttributes,
18
+ handleMultipleNavs
19
+ } from './semantic-enhancer-mappings.js';
20
+
21
+ export { enhanceSemanticHTMLInPage } from './semantic-enhancer-page.js';
22
+
23
+ // Re-export mappings module for external consumers
24
+ export {
25
+ SEMANTIC_MAPPINGS,
26
+ CLASS_PATTERNS,
27
+ detectSectionType,
28
+ applySemanticAttributes,
29
+ handleMultipleNavs
30
+ };
31
+
32
+ /**
33
+ * Enhance HTML string with semantic attributes.
34
+ *
35
+ * **IMPORTANT:** Requires browser context (uses DOMParser).
36
+ * For Node.js/Playwright, use `enhanceSemanticHTMLInPage()` instead.
37
+ *
38
+ * @param {string} html - Original HTML string (must be valid HTML)
39
+ * @param {Object} [domHierarchy=null] - Optional DOM hierarchy from dom-tree-analyzer
40
+ * @returns {{html: string, stats: Object}} Enhanced HTML and stats
41
+ * @throws {Error} If html is empty or DOMParser is unavailable
42
+ */
43
+ export function enhanceSemanticHTML(html, domHierarchy = null) {
44
+ if (!html || typeof html !== 'string') {
45
+ throw new Error('enhanceSemanticHTML requires a valid HTML string');
46
+ }
47
+ if (typeof DOMParser === 'undefined') {
48
+ throw new Error('enhanceSemanticHTML requires browser context (DOMParser). Use enhanceSemanticHTMLInPage() for Playwright.');
49
+ }
50
+
51
+ const stats = {
52
+ sectionsEnhanced: 0,
53
+ idsAdded: 0,
54
+ classesAdded: 0,
55
+ rolesAdded: 0,
56
+ warnings: []
57
+ };
58
+
59
+ const parser = new DOMParser();
60
+ const doc = parser.parseFromString(html, 'text/html');
61
+ const usedIds = new Set();
62
+
63
+ doc.querySelectorAll('[id]').forEach(el => usedIds.add(el.id));
64
+
65
+ const combinedLandmarkSelector = [
66
+ 'header:not(header header)',
67
+ 'footer:not(footer footer)',
68
+ 'main',
69
+ 'aside',
70
+ '[role="banner"]',
71
+ '[role="contentinfo"]',
72
+ '[role="main"]',
73
+ '[role="complementary"]'
74
+ ].join(', ');
75
+
76
+ const processedElements = new Set();
77
+
78
+ try {
79
+ doc.querySelectorAll(combinedLandmarkSelector).forEach(el => {
80
+ if (processedElements.has(el)) return;
81
+ processedElements.add(el);
82
+
83
+ const sectionType = detectSectionType(el);
84
+ if (sectionType) {
85
+ const hadId = !!el.id;
86
+ const hadRole = !!el.getAttribute('role');
87
+ const oldClasses = el.className;
88
+
89
+ applySemanticAttributes(el, sectionType, { usedIds });
90
+
91
+ if (!hadId && el.id) stats.idsAdded++;
92
+ if (!hadRole && el.getAttribute('role')) stats.rolesAdded++;
93
+ if (oldClasses !== el.className) stats.classesAdded++;
94
+ stats.sectionsEnhanced++;
95
+ }
96
+ });
97
+ } catch (err) {
98
+ stats.warnings.push(`Landmark selector error: ${err.message}`);
99
+ }
100
+
101
+ const navElements = doc.querySelectorAll('nav, [role="navigation"]');
102
+ let newNavCount = 0;
103
+ navElements.forEach(nav => {
104
+ if (!processedElements.has(nav)) {
105
+ processedElements.add(nav);
106
+ newNavCount++;
107
+ }
108
+ });
109
+ if (navElements.length > 0) {
110
+ handleMultipleNavs(navElements, usedIds);
111
+ stats.sectionsEnhanced += newNavCount;
112
+ }
113
+
114
+ const heroSelectors = [
115
+ '.hero', '.banner', '.jumbotron', '.splash',
116
+ '[class*="hero"]', '[class*="banner"]'
117
+ ];
118
+ heroSelectors.forEach(selector => {
119
+ try {
120
+ doc.querySelectorAll(selector).forEach(el => {
121
+ if (!el.closest('header') && !el.closest('footer')) {
122
+ const hadId = !!el.id;
123
+ applySemanticAttributes(el, 'hero', { usedIds });
124
+ if (!hadId && el.id) stats.idsAdded++;
125
+ stats.sectionsEnhanced++;
126
+ }
127
+ });
128
+ } catch (err) {
129
+ // Some selectors may not be valid in all contexts
130
+ }
131
+ });
132
+
133
+ const enhancedHtml = '<!DOCTYPE html>\n' + doc.documentElement.outerHTML;
134
+ return { html: enhancedHtml, stats };
135
+ }
@@ -0,0 +1,53 @@
1
+ /**
2
+ * CSS Link Rewriter
3
+ *
4
+ * Rewrites stylesheet <link> tags in HTML to point to the shared
5
+ * styles.css file, deduplicates them, and optionally injects tokens.css.
6
+ */
7
+
8
+ /**
9
+ * Rewrite all CSS <link> tags to use shared ../styles.css,
10
+ * remove duplicates, and optionally inject tokens.css before it.
11
+ * @param {string} html
12
+ * @param {boolean} injectTokensCss - Inject tokens.css link before styles.css
13
+ * @returns {string} Updated HTML
14
+ */
15
+ export function rewriteCssLinks(html, injectTokensCss = false) {
16
+ let result = html;
17
+
18
+ // Rewrite all stylesheet links to shared styles.css
19
+ result = result.replace(
20
+ /<link([^>]*?)href=["'][^"']*\.css["']([^>]*?)>/gi,
21
+ (match, before, after) => {
22
+ if (
23
+ match.includes('rel="stylesheet"') ||
24
+ match.includes("rel='stylesheet'") ||
25
+ !match.includes('rel=')
26
+ ) {
27
+ return `<link${before}href="../styles.css" rel="stylesheet"${after}>`;
28
+ }
29
+ return match;
30
+ }
31
+ );
32
+
33
+ // Remove duplicate stylesheet links (keep first occurrence)
34
+ const seenStylesheets = new Set();
35
+ result = result.replace(
36
+ /<link[^>]*href=["']\.\.\/styles\.css["'][^>]*>/gi,
37
+ (match) => {
38
+ if (seenStylesheets.has('styles.css')) return '';
39
+ seenStylesheets.add('styles.css');
40
+ return match;
41
+ }
42
+ );
43
+
44
+ // Optionally inject tokens.css before styles.css
45
+ if (injectTokensCss) {
46
+ result = result.replace(
47
+ /(<link[^>]*href=["']\.\.\/styles\.css["'][^>]*>)/i,
48
+ '<link href="../tokens.css" rel="stylesheet">\n $1'
49
+ );
50
+ }
51
+
52
+ return result;
53
+ }