design-clone 2.1.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -34
- package/SKILL.md +69 -45
- package/bin/cli.js +22 -4
- package/bin/commands/clone-site.js +31 -171
- package/bin/commands/help.js +19 -6
- package/bin/commands/init.js +9 -86
- package/bin/commands/uninstall.js +105 -0
- package/bin/commands/update.js +70 -0
- package/bin/commands/verify.js +7 -14
- package/bin/utils/paths.js +28 -0
- package/bin/utils/validate.js +2 -22
- package/bin/utils/version.js +23 -0
- package/docs/code-standards.md +789 -0
- package/docs/codebase-summary.md +533 -286
- package/docs/index.md +74 -0
- package/docs/project-overview-pdr.md +797 -0
- package/docs/system-architecture.md +718 -0
- package/package.json +14 -17
- package/src/ai/prompts/design-tokens/basic.md +80 -0
- package/src/ai/prompts/design-tokens/section-with-css.md +41 -0
- package/src/ai/prompts/design-tokens/section.md +48 -0
- package/src/ai/prompts/design-tokens/with-css.md +87 -0
- package/src/ai/prompts/structure-analysis/basic.md +55 -0
- package/src/ai/prompts/structure-analysis/with-context.md +59 -0
- package/src/ai/prompts/structure-analysis/with-dimensions.md +63 -0
- package/src/ai/prompts/structure-analysis/with-hierarchy.md +73 -0
- package/src/ai/prompts/ux-audit/aggregation.md +42 -0
- package/src/ai/prompts/ux-audit/desktop.md +92 -0
- package/src/ai/prompts/ux-audit/mobile.md +93 -0
- package/src/ai/prompts/ux-audit/tablet.md +92 -0
- package/src/core/animation/animation-extractor-ast.js +183 -0
- package/src/core/animation/animation-extractor-output.js +152 -0
- package/src/core/animation/animation-extractor.js +178 -0
- package/src/core/animation/state-capture-detection.js +200 -0
- package/src/core/animation/state-capture.js +193 -0
- package/src/core/capture/browser-context-pool.js +96 -0
- package/src/core/capture/multi-page-screenshot-page.js +110 -0
- package/src/core/capture/multi-page-screenshot.js +208 -0
- package/src/core/capture/screenshot-extraction.js +186 -0
- package/src/core/capture/screenshot-helpers.js +175 -0
- package/src/core/capture/screenshot-orchestrator.js +174 -0
- package/src/core/capture/screenshot-viewport.js +93 -0
- package/src/core/capture/screenshot.js +192 -0
- package/src/core/content/content-counter-dom.js +191 -0
- package/src/core/content/content-counter.js +76 -0
- package/src/core/css/breakpoint-detector.js +66 -0
- package/src/core/css/chromium-defaults.json +23 -0
- package/src/core/css/computed-style-extractor.js +102 -0
- package/src/core/css/css-chunker.js +103 -0
- package/src/core/css/filter-css-dead-code.js +120 -0
- package/src/core/css/filter-css-html-analyzer.js +110 -0
- package/src/core/css/filter-css-selector-matcher.js +172 -0
- package/src/core/css/filter-css.js +206 -0
- package/src/core/css/merge-css-atrule-processor.js +158 -0
- package/src/core/css/merge-css-file-io.js +68 -0
- package/src/core/css/merge-css.js +148 -0
- package/src/core/detection/framework-detector-routing.js +68 -0
- package/src/core/detection/framework-detector-signals.js +65 -0
- package/src/core/detection/framework-detector.js +198 -0
- package/src/core/dimension/dimension-extractor-card-detector.js +82 -0
- package/src/core/dimension/dimension-extractor.js +317 -0
- package/src/core/dimension/dimension-output-ai-summary.js +111 -0
- package/src/core/dimension/dimension-output.js +173 -0
- package/src/core/dimension/dom-tree-analyzer-tree-builders.js +95 -0
- package/src/core/dimension/dom-tree-analyzer.js +191 -0
- package/src/core/discovery/app-state-snapshot-capture.js +195 -0
- package/src/core/discovery/app-state-snapshot-utils.js +178 -0
- package/src/core/discovery/app-state-snapshot.js +131 -0
- package/src/core/discovery/discover-pages-routes.js +84 -0
- package/src/core/discovery/discover-pages-utils.js +177 -0
- package/src/core/discovery/discover-pages.js +191 -0
- package/src/core/html/html-extractor-inline-styler.js +70 -0
- package/src/core/html/html-extractor.js +147 -0
- package/src/core/html/semantic-enhancer-mappings.js +200 -0
- package/src/core/html/semantic-enhancer-page.js +148 -0
- package/src/core/html/semantic-enhancer.js +135 -0
- package/src/core/links/rewrite-links-css-rewriter.js +53 -0
- package/src/core/links/rewrite-links.js +173 -0
- package/src/core/media/asset-validator.js +118 -0
- package/src/core/media/extract-assets-downloader.js +187 -0
- package/src/core/media/extract-assets-page-scraper.js +115 -0
- package/src/core/media/extract-assets.js +159 -0
- package/src/core/media/video-capture-convert.js +200 -0
- package/src/core/media/video-capture.js +201 -0
- package/src/core/{lazy-loader.js → page-prep/lazy-loader.js} +37 -39
- package/src/core/section/section-cropper-helpers.js +43 -0
- package/src/core/{section-cropper.js → section/section-cropper.js} +11 -88
- package/src/core/section/section-detector-strategies.js +139 -0
- package/src/core/section/section-detector-utils.js +100 -0
- package/src/core/section/section-detector.js +88 -0
- package/src/core/tests/test-section-cropper.js +2 -2
- package/src/core/tests/test-section-detector.js +2 -2
- package/src/post-process/enhance-assets.js +29 -4
- package/src/post-process/fetch-images-unsplash-client.js +123 -0
- package/src/post-process/fetch-images.js +60 -263
- package/src/post-process/inject-gosnap.js +88 -0
- package/src/post-process/inject-icons-svg-replacer.js +76 -0
- package/src/post-process/inject-icons.js +47 -200
- package/src/route-discoverers/base-discoverer-utils.js +137 -0
- package/src/route-discoverers/base-discoverer.js +29 -118
- package/src/route-discoverers/index.js +1 -1
- package/src/shared/config.js +38 -0
- package/src/shared/error-codes.js +31 -0
- package/src/shared/viewports.js +46 -0
- package/src/utils/browser.js +0 -7
- package/src/utils/helpers.js +4 -0
- package/src/utils/log.js +12 -0
- package/src/utils/playwright-loader.js +76 -0
- package/src/utils/playwright.js +3 -69
- package/src/utils/progress.js +32 -0
- package/src/verification/generate-audit-report-css-fixes.js +52 -0
- package/src/verification/generate-audit-report-sections.js +158 -0
- package/src/verification/generate-audit-report.js +5 -281
- package/src/verification/quality-scorer.js +92 -0
- package/src/verification/verify-footer-checks.js +103 -0
- package/src/verification/verify-footer-helpers.js +178 -0
- package/src/verification/verify-footer.js +23 -381
- package/src/verification/verify-header-checks.js +104 -0
- package/src/verification/verify-header-helpers.js +156 -0
- package/src/verification/verify-header.js +23 -365
- package/src/verification/verify-layout-report.js +101 -0
- package/src/verification/verify-layout.js +13 -259
- package/src/verification/verify-menu-checks.js +104 -0
- package/src/verification/verify-menu-helpers.js +112 -0
- package/src/verification/verify-menu.js +17 -285
- package/src/verification/verify-slider-checks.js +115 -0
- package/src/verification/verify-slider-constants.js +65 -0
- package/src/verification/verify-slider-helpers.js +164 -0
- package/src/verification/verify-slider.js +23 -414
- package/.env.example +0 -14
- package/docs/basic-clone.md +0 -63
- package/docs/cli-reference.md +0 -316
- package/docs/design-clone-architecture.md +0 -492
- package/docs/pixel-perfect.md +0 -117
- package/docs/project-roadmap.md +0 -382
- package/docs/troubleshooting.md +0 -170
- package/requirements.txt +0 -5
- package/src/ai/__pycache__/analyze-structure.cpython-313.pyc +0 -0
- package/src/ai/__pycache__/extract-design-tokens.cpython-313.pyc +0 -0
- package/src/ai/analyze-structure.py +0 -375
- package/src/ai/extract-design-tokens.py +0 -782
- package/src/ai/prompts/__init__.py +0 -2
- package/src/ai/prompts/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/ai/prompts/__pycache__/design_tokens.cpython-313.pyc +0 -0
- package/src/ai/prompts/__pycache__/structure_analysis.cpython-313.pyc +0 -0
- package/src/ai/prompts/__pycache__/ux_audit.cpython-313.pyc +0 -0
- package/src/ai/prompts/design_tokens.py +0 -316
- package/src/ai/prompts/structure_analysis.py +0 -592
- package/src/ai/prompts/ux_audit.py +0 -198
- package/src/ai/ux-audit.js +0 -596
- package/src/core/animation-extractor.js +0 -526
- package/src/core/app-state-snapshot.js +0 -511
- package/src/core/content-counter.js +0 -342
- package/src/core/design-tokens.js +0 -103
- package/src/core/dimension-extractor.js +0 -438
- package/src/core/dimension-output.js +0 -305
- package/src/core/discover-pages.js +0 -542
- package/src/core/dom-tree-analyzer.js +0 -298
- package/src/core/extract-assets.js +0 -468
- package/src/core/filter-css.js +0 -499
- package/src/core/framework-detector.js +0 -538
- package/src/core/html-extractor.js +0 -212
- package/src/core/merge-css.js +0 -407
- package/src/core/multi-page-screenshot.js +0 -380
- package/src/core/rewrite-links.js +0 -226
- package/src/core/screenshot.js +0 -701
- package/src/core/section-detector.js +0 -386
- package/src/core/semantic-enhancer.js +0 -492
- package/src/core/state-capture.js +0 -598
- package/src/core/video-capture.js +0 -546
- package/src/utils/__init__.py +0 -16
- package/src/utils/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/utils/__pycache__/env.cpython-313.pyc +0 -0
- package/src/utils/env.py +0 -134
- /package/src/core/{css-extractor.js → css/css-extractor.js} +0 -0
- /package/src/core/{cookie-handler.js → page-prep/cookie-handler.js} +0 -0
- /package/src/core/{page-readiness.js → page-prep/page-readiness.js} +0 -0
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic HTML mappings and section detection logic.
|
|
3
|
+
*
|
|
4
|
+
* Contains WordPress-compatible semantic mappings (IDs, classes, ARIA roles),
|
|
5
|
+
* class pattern definitions, and the detectSectionType / applySemanticAttributes
|
|
6
|
+
* / handleMultipleNavs functions used in Node.js context.
|
|
7
|
+
* Used by semantic-enhancer.js (main module).
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
// ============================================================================
|
|
11
|
+
// Constants
|
|
12
|
+
// ============================================================================
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* WordPress-compatible semantic mappings
|
|
16
|
+
*/
|
|
17
|
+
export const SEMANTIC_MAPPINGS = {
|
|
18
|
+
header: {
|
|
19
|
+
id: 'site-header',
|
|
20
|
+
classes: ['site-header'],
|
|
21
|
+
role: 'banner'
|
|
22
|
+
},
|
|
23
|
+
nav: {
|
|
24
|
+
id: 'site-navigation',
|
|
25
|
+
classes: ['main-navigation', 'nav-menu'],
|
|
26
|
+
role: 'navigation'
|
|
27
|
+
},
|
|
28
|
+
main: {
|
|
29
|
+
id: 'main-content',
|
|
30
|
+
classes: ['site-main', 'content-area'],
|
|
31
|
+
role: 'main'
|
|
32
|
+
},
|
|
33
|
+
sidebar: {
|
|
34
|
+
id: 'primary-sidebar',
|
|
35
|
+
classes: ['widget-area', 'sidebar'],
|
|
36
|
+
role: 'complementary'
|
|
37
|
+
},
|
|
38
|
+
footer: {
|
|
39
|
+
id: 'site-footer',
|
|
40
|
+
classes: ['site-footer'],
|
|
41
|
+
role: 'contentinfo'
|
|
42
|
+
},
|
|
43
|
+
hero: {
|
|
44
|
+
id: 'hero-section',
|
|
45
|
+
classes: ['hero'],
|
|
46
|
+
role: null // No ARIA landmark role for hero
|
|
47
|
+
}
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Class patterns for section detection (case-insensitive)
|
|
52
|
+
*/
|
|
53
|
+
export const CLASS_PATTERNS = {
|
|
54
|
+
header: ['header', 'masthead', 'site-header', 'page-header'],
|
|
55
|
+
nav: ['nav', 'menu', 'navigation'],
|
|
56
|
+
main: ['main', 'content', 'page-content'],
|
|
57
|
+
sidebar: ['sidebar', 'aside', 'widget-area'],
|
|
58
|
+
footer: ['footer', 'site-footer', 'page-footer'],
|
|
59
|
+
hero: ['hero', 'banner', 'jumbotron', 'splash']
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
// ============================================================================
|
|
63
|
+
// Detection & Application
|
|
64
|
+
// ============================================================================
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Detect section type from element.
|
|
68
|
+
*
|
|
69
|
+
* Priority:
|
|
70
|
+
* 1. Semantic HTML tags
|
|
71
|
+
* 2. ARIA role attributes
|
|
72
|
+
* 3. Class pattern matching
|
|
73
|
+
*
|
|
74
|
+
* @param {Element} element - DOM element to analyze
|
|
75
|
+
* @returns {string|null} Section type or null
|
|
76
|
+
*/
|
|
77
|
+
export function detectSectionType(element) {
|
|
78
|
+
const tag = element.tagName?.toLowerCase();
|
|
79
|
+
const ariaRole = element.getAttribute?.('role');
|
|
80
|
+
|
|
81
|
+
// Priority 1: Semantic HTML tags
|
|
82
|
+
if (tag === 'header') return 'header';
|
|
83
|
+
if (tag === 'nav') return 'nav';
|
|
84
|
+
if (tag === 'main') return 'main';
|
|
85
|
+
if (tag === 'aside') return 'sidebar';
|
|
86
|
+
if (tag === 'footer') return 'footer';
|
|
87
|
+
|
|
88
|
+
// Priority 2: ARIA roles
|
|
89
|
+
if (ariaRole === 'banner') return 'header';
|
|
90
|
+
if (ariaRole === 'navigation') return 'nav';
|
|
91
|
+
if (ariaRole === 'main') return 'main';
|
|
92
|
+
if (ariaRole === 'complementary') return 'sidebar';
|
|
93
|
+
if (ariaRole === 'contentinfo') return 'footer';
|
|
94
|
+
|
|
95
|
+
// Priority 3: Class patterns
|
|
96
|
+
const className = (element.className || '').toString().toLowerCase();
|
|
97
|
+
if (!className) return null;
|
|
98
|
+
|
|
99
|
+
for (const [sectionType, patterns] of Object.entries(CLASS_PATTERNS)) {
|
|
100
|
+
if (patterns.some(pattern => className.includes(pattern))) {
|
|
101
|
+
// Avoid false positives: ensure it's a container element
|
|
102
|
+
if (tag === 'div' || tag === 'section' || tag === 'article') {
|
|
103
|
+
return sectionType;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return null;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Apply semantic attributes to element.
|
|
113
|
+
*
|
|
114
|
+
* Rules:
|
|
115
|
+
* - Add ID only if none exists
|
|
116
|
+
* - Append classes (preserve existing)
|
|
117
|
+
* - Set role only if none exists
|
|
118
|
+
*
|
|
119
|
+
* @param {Element} element - DOM element to enhance
|
|
120
|
+
* @param {string} sectionType - Type from SEMANTIC_MAPPINGS
|
|
121
|
+
* @param {Object} options - Configuration options
|
|
122
|
+
* @param {Set} options.usedIds - Track used IDs to avoid duplicates
|
|
123
|
+
* @param {number} options.navIndex - Index for multiple nav labeling
|
|
124
|
+
*/
|
|
125
|
+
export function applySemanticAttributes(element, sectionType, options = {}) {
|
|
126
|
+
const mapping = SEMANTIC_MAPPINGS[sectionType];
|
|
127
|
+
if (!mapping) return;
|
|
128
|
+
|
|
129
|
+
const { usedIds = new Set(), navIndex = 0 } = options;
|
|
130
|
+
|
|
131
|
+
// Add ID only if not present and not already used
|
|
132
|
+
if (!element.id && mapping.id) {
|
|
133
|
+
let targetId = mapping.id;
|
|
134
|
+
|
|
135
|
+
if (usedIds.has(targetId)) {
|
|
136
|
+
targetId = `${mapping.id}-${navIndex + 1}`;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
if (!usedIds.has(targetId)) {
|
|
140
|
+
element.id = targetId;
|
|
141
|
+
usedIds.add(targetId);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Append classes (preserve existing)
|
|
146
|
+
if (mapping.classes && mapping.classes.length > 0) {
|
|
147
|
+
const existingClasses = element.className
|
|
148
|
+
? element.className.toString().split(/\s+/).filter(Boolean)
|
|
149
|
+
: [];
|
|
150
|
+
const newClasses = mapping.classes.filter(c => !existingClasses.includes(c));
|
|
151
|
+
|
|
152
|
+
if (newClasses.length > 0) {
|
|
153
|
+
element.className = [...existingClasses, ...newClasses].join(' ').trim();
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Set role only if not present
|
|
158
|
+
if (mapping.role && !element.getAttribute('role')) {
|
|
159
|
+
element.setAttribute('role', mapping.role);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Handle multiple navigation elements with proper labeling.
|
|
165
|
+
*
|
|
166
|
+
* @param {NodeList|Array} navElements - All nav elements
|
|
167
|
+
* @param {Set} usedIds - Track used IDs
|
|
168
|
+
*/
|
|
169
|
+
export function handleMultipleNavs(navElements, usedIds = new Set()) {
|
|
170
|
+
const navs = Array.from(navElements);
|
|
171
|
+
if (navs.length === 0) return;
|
|
172
|
+
|
|
173
|
+
navs.forEach((nav, index) => {
|
|
174
|
+
const isInHeader = nav.closest?.('header') !== null;
|
|
175
|
+
const isInFooter = nav.closest?.('footer') !== null;
|
|
176
|
+
|
|
177
|
+
if (isInHeader && index === 0) {
|
|
178
|
+
applySemanticAttributes(nav, 'nav', { usedIds, navIndex: 0 });
|
|
179
|
+
if (!nav.getAttribute('aria-label')) {
|
|
180
|
+
nav.setAttribute('aria-label', 'Primary Menu');
|
|
181
|
+
}
|
|
182
|
+
} else if (isInFooter) {
|
|
183
|
+
if (!nav.id) {
|
|
184
|
+
nav.id = usedIds.has('footer-navigation')
|
|
185
|
+
? `footer-navigation-${index}`
|
|
186
|
+
: 'footer-navigation';
|
|
187
|
+
usedIds.add(nav.id);
|
|
188
|
+
}
|
|
189
|
+
nav.setAttribute('role', 'navigation');
|
|
190
|
+
if (!nav.getAttribute('aria-label')) {
|
|
191
|
+
nav.setAttribute('aria-label', 'Footer Menu');
|
|
192
|
+
}
|
|
193
|
+
} else {
|
|
194
|
+
applySemanticAttributes(nav, 'nav', { usedIds, navIndex: index });
|
|
195
|
+
if (!nav.getAttribute('aria-label')) {
|
|
196
|
+
nav.setAttribute('aria-label', `Navigation ${index + 1}`);
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
});
|
|
200
|
+
}
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Playwright page.evaluate integration for semantic HTML enhancement.
|
|
3
|
+
*
|
|
4
|
+
* Contains enhanceSemanticHTMLInPage() which runs entirely inside browser
|
|
5
|
+
* context via page.evaluate(). All helper logic must be inlined here since
|
|
6
|
+
* ES module imports are not available inside evaluate callbacks.
|
|
7
|
+
* Used by semantic-enhancer.js (main module).
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Enhance HTML using page.evaluate (for Playwright integration).
|
|
12
|
+
*
|
|
13
|
+
* This is the recommended method for Node.js/Playwright usage.
|
|
14
|
+
*
|
|
15
|
+
* @param {import('playwright').Page} page - Playwright page
|
|
16
|
+
* @param {string} html - Original HTML string (must be valid HTML)
|
|
17
|
+
* @returns {Promise<{html: string, stats: Object}>}
|
|
18
|
+
* @throws {Error} If page is null or html is invalid
|
|
19
|
+
*/
|
|
20
|
+
export async function enhanceSemanticHTMLInPage(page, html) {
|
|
21
|
+
if (!page || typeof page.evaluate !== 'function') {
|
|
22
|
+
throw new Error('enhanceSemanticHTMLInPage requires a valid Playwright page');
|
|
23
|
+
}
|
|
24
|
+
if (!html || typeof html !== 'string') {
|
|
25
|
+
throw new Error('enhanceSemanticHTMLInPage requires a valid HTML string');
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
return await page.evaluate((htmlStr) => {
|
|
29
|
+
// All logic inlined: browser serialization boundary prevents imports
|
|
30
|
+
const SEMANTIC_MAPPINGS = {
|
|
31
|
+
header: { id: 'site-header', classes: ['site-header'], role: 'banner' },
|
|
32
|
+
nav: { id: 'site-navigation', classes: ['main-navigation', 'nav-menu'], role: 'navigation' },
|
|
33
|
+
main: { id: 'main-content', classes: ['site-main', 'content-area'], role: 'main' },
|
|
34
|
+
sidebar: { id: 'primary-sidebar', classes: ['widget-area', 'sidebar'], role: 'complementary' },
|
|
35
|
+
footer: { id: 'site-footer', classes: ['site-footer'], role: 'contentinfo' },
|
|
36
|
+
hero: { id: 'hero-section', classes: ['hero'], role: null }
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
const CLASS_PATTERNS = {
|
|
40
|
+
header: ['header', 'masthead', 'site-header', 'page-header'],
|
|
41
|
+
nav: ['nav', 'menu', 'navigation'],
|
|
42
|
+
sidebar: ['sidebar', 'aside', 'widget-area'],
|
|
43
|
+
footer: ['footer', 'site-footer', 'page-footer'],
|
|
44
|
+
hero: ['hero', 'banner', 'jumbotron', 'splash']
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
function detectSectionType(element) {
|
|
48
|
+
const tag = element.tagName?.toLowerCase();
|
|
49
|
+
const ariaRole = element.getAttribute?.('role');
|
|
50
|
+
|
|
51
|
+
if (tag === 'header') return 'header';
|
|
52
|
+
if (tag === 'nav') return 'nav';
|
|
53
|
+
if (tag === 'main') return 'main';
|
|
54
|
+
if (tag === 'aside') return 'sidebar';
|
|
55
|
+
if (tag === 'footer') return 'footer';
|
|
56
|
+
|
|
57
|
+
if (ariaRole === 'banner') return 'header';
|
|
58
|
+
if (ariaRole === 'navigation') return 'nav';
|
|
59
|
+
if (ariaRole === 'main') return 'main';
|
|
60
|
+
if (ariaRole === 'complementary') return 'sidebar';
|
|
61
|
+
if (ariaRole === 'contentinfo') return 'footer';
|
|
62
|
+
|
|
63
|
+
const className = (element.className || '').toString().toLowerCase();
|
|
64
|
+
if (!className) return null;
|
|
65
|
+
|
|
66
|
+
for (const [sectionType, patterns] of Object.entries(CLASS_PATTERNS)) {
|
|
67
|
+
if (patterns.some(pattern => className.includes(pattern))) {
|
|
68
|
+
if (['div', 'section', 'article'].includes(tag)) return sectionType;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function applySemanticAttributes(element, sectionType, usedIds, navIndex = 0) {
|
|
75
|
+
const mapping = SEMANTIC_MAPPINGS[sectionType];
|
|
76
|
+
if (!mapping) return;
|
|
77
|
+
|
|
78
|
+
if (!element.id && mapping.id) {
|
|
79
|
+
let targetId = mapping.id;
|
|
80
|
+
if (usedIds.has(targetId)) targetId = `${mapping.id}-${navIndex + 1}`;
|
|
81
|
+
if (!usedIds.has(targetId)) {
|
|
82
|
+
element.id = targetId;
|
|
83
|
+
usedIds.add(targetId);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
if (mapping.classes?.length > 0) {
|
|
88
|
+
const existing = element.className
|
|
89
|
+
? element.className.toString().split(/\s+/).filter(Boolean)
|
|
90
|
+
: [];
|
|
91
|
+
const added = mapping.classes.filter(c => !existing.includes(c));
|
|
92
|
+
if (added.length > 0) element.className = [...existing, ...added].join(' ').trim();
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
if (mapping.role && !element.getAttribute('role')) {
|
|
96
|
+
element.setAttribute('role', mapping.role);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const stats = { sectionsEnhanced: 0, idsAdded: 0, classesAdded: 0, rolesAdded: 0, warnings: [] };
|
|
101
|
+
const doc = new DOMParser().parseFromString(htmlStr, 'text/html');
|
|
102
|
+
const usedIds = new Set();
|
|
103
|
+
|
|
104
|
+
doc.querySelectorAll('[id]').forEach(el => usedIds.add(el.id));
|
|
105
|
+
|
|
106
|
+
['header:not(header header)', 'footer:not(footer footer)', 'main', 'aside'].forEach(sel => {
|
|
107
|
+
try {
|
|
108
|
+
doc.querySelectorAll(sel).forEach(el => {
|
|
109
|
+
const type = detectSectionType(el);
|
|
110
|
+
if (type) {
|
|
111
|
+
const hadId = !!el.id;
|
|
112
|
+
const hadRole = !!el.getAttribute('role');
|
|
113
|
+
applySemanticAttributes(el, type, usedIds);
|
|
114
|
+
if (!hadId && el.id) stats.idsAdded++;
|
|
115
|
+
if (!hadRole && el.getAttribute('role')) stats.rolesAdded++;
|
|
116
|
+
stats.sectionsEnhanced++;
|
|
117
|
+
}
|
|
118
|
+
});
|
|
119
|
+
} catch (err) {
|
|
120
|
+
stats.warnings.push(`Selector error: ${sel}`);
|
|
121
|
+
}
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
doc.querySelectorAll('nav, [role="navigation"]').forEach((nav, index) => {
|
|
125
|
+
const isInHeader = nav.closest('header') !== null;
|
|
126
|
+
const isInFooter = nav.closest('footer') !== null;
|
|
127
|
+
|
|
128
|
+
if (isInHeader && index === 0) {
|
|
129
|
+
applySemanticAttributes(nav, 'nav', usedIds, 0);
|
|
130
|
+
if (!nav.getAttribute('aria-label')) nav.setAttribute('aria-label', 'Primary Menu');
|
|
131
|
+
} else if (isInFooter) {
|
|
132
|
+
if (!nav.id) {
|
|
133
|
+
nav.id = usedIds.has('footer-navigation')
|
|
134
|
+
? `footer-navigation-${index}` : 'footer-navigation';
|
|
135
|
+
usedIds.add(nav.id);
|
|
136
|
+
}
|
|
137
|
+
nav.setAttribute('role', 'navigation');
|
|
138
|
+
if (!nav.getAttribute('aria-label')) nav.setAttribute('aria-label', 'Footer Menu');
|
|
139
|
+
} else {
|
|
140
|
+
applySemanticAttributes(nav, 'nav', usedIds, index);
|
|
141
|
+
if (!nav.getAttribute('aria-label')) nav.setAttribute('aria-label', `Navigation ${index + 1}`);
|
|
142
|
+
}
|
|
143
|
+
stats.sectionsEnhanced++;
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
return { html: '<!DOCTYPE html>\n' + doc.documentElement.outerHTML, stats };
|
|
147
|
+
}, html);
|
|
148
|
+
}
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic HTML Enhancer
|
|
3
|
+
*
|
|
4
|
+
* Injects WordPress-compatible semantic IDs, classes, and ARIA roles
|
|
5
|
+
* into extracted HTML while preserving original styling.
|
|
6
|
+
*
|
|
7
|
+
* Architecture:
|
|
8
|
+
* - semantic-enhancer-mappings.js: Constants + detection/application functions (Node.js context)
|
|
9
|
+
* - semantic-enhancer-page.js: Playwright page.evaluate integration (browser context, inlined)
|
|
10
|
+
* - This file: Browser-context DOMParser enhancer + re-exports
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import {
|
|
14
|
+
SEMANTIC_MAPPINGS,
|
|
15
|
+
CLASS_PATTERNS,
|
|
16
|
+
detectSectionType,
|
|
17
|
+
applySemanticAttributes,
|
|
18
|
+
handleMultipleNavs
|
|
19
|
+
} from './semantic-enhancer-mappings.js';
|
|
20
|
+
|
|
21
|
+
export { enhanceSemanticHTMLInPage } from './semantic-enhancer-page.js';
|
|
22
|
+
|
|
23
|
+
// Re-export mappings module for external consumers
|
|
24
|
+
export {
|
|
25
|
+
SEMANTIC_MAPPINGS,
|
|
26
|
+
CLASS_PATTERNS,
|
|
27
|
+
detectSectionType,
|
|
28
|
+
applySemanticAttributes,
|
|
29
|
+
handleMultipleNavs
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Enhance HTML string with semantic attributes.
|
|
34
|
+
*
|
|
35
|
+
* **IMPORTANT:** Requires browser context (uses DOMParser).
|
|
36
|
+
* For Node.js/Playwright, use `enhanceSemanticHTMLInPage()` instead.
|
|
37
|
+
*
|
|
38
|
+
* @param {string} html - Original HTML string (must be valid HTML)
|
|
39
|
+
* @param {Object} [domHierarchy=null] - Optional DOM hierarchy from dom-tree-analyzer
|
|
40
|
+
* @returns {{html: string, stats: Object}} Enhanced HTML and stats
|
|
41
|
+
* @throws {Error} If html is empty or DOMParser is unavailable
|
|
42
|
+
*/
|
|
43
|
+
export function enhanceSemanticHTML(html, domHierarchy = null) {
|
|
44
|
+
if (!html || typeof html !== 'string') {
|
|
45
|
+
throw new Error('enhanceSemanticHTML requires a valid HTML string');
|
|
46
|
+
}
|
|
47
|
+
if (typeof DOMParser === 'undefined') {
|
|
48
|
+
throw new Error('enhanceSemanticHTML requires browser context (DOMParser). Use enhanceSemanticHTMLInPage() for Playwright.');
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const stats = {
|
|
52
|
+
sectionsEnhanced: 0,
|
|
53
|
+
idsAdded: 0,
|
|
54
|
+
classesAdded: 0,
|
|
55
|
+
rolesAdded: 0,
|
|
56
|
+
warnings: []
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
const parser = new DOMParser();
|
|
60
|
+
const doc = parser.parseFromString(html, 'text/html');
|
|
61
|
+
const usedIds = new Set();
|
|
62
|
+
|
|
63
|
+
doc.querySelectorAll('[id]').forEach(el => usedIds.add(el.id));
|
|
64
|
+
|
|
65
|
+
const combinedLandmarkSelector = [
|
|
66
|
+
'header:not(header header)',
|
|
67
|
+
'footer:not(footer footer)',
|
|
68
|
+
'main',
|
|
69
|
+
'aside',
|
|
70
|
+
'[role="banner"]',
|
|
71
|
+
'[role="contentinfo"]',
|
|
72
|
+
'[role="main"]',
|
|
73
|
+
'[role="complementary"]'
|
|
74
|
+
].join(', ');
|
|
75
|
+
|
|
76
|
+
const processedElements = new Set();
|
|
77
|
+
|
|
78
|
+
try {
|
|
79
|
+
doc.querySelectorAll(combinedLandmarkSelector).forEach(el => {
|
|
80
|
+
if (processedElements.has(el)) return;
|
|
81
|
+
processedElements.add(el);
|
|
82
|
+
|
|
83
|
+
const sectionType = detectSectionType(el);
|
|
84
|
+
if (sectionType) {
|
|
85
|
+
const hadId = !!el.id;
|
|
86
|
+
const hadRole = !!el.getAttribute('role');
|
|
87
|
+
const oldClasses = el.className;
|
|
88
|
+
|
|
89
|
+
applySemanticAttributes(el, sectionType, { usedIds });
|
|
90
|
+
|
|
91
|
+
if (!hadId && el.id) stats.idsAdded++;
|
|
92
|
+
if (!hadRole && el.getAttribute('role')) stats.rolesAdded++;
|
|
93
|
+
if (oldClasses !== el.className) stats.classesAdded++;
|
|
94
|
+
stats.sectionsEnhanced++;
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
} catch (err) {
|
|
98
|
+
stats.warnings.push(`Landmark selector error: ${err.message}`);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const navElements = doc.querySelectorAll('nav, [role="navigation"]');
|
|
102
|
+
let newNavCount = 0;
|
|
103
|
+
navElements.forEach(nav => {
|
|
104
|
+
if (!processedElements.has(nav)) {
|
|
105
|
+
processedElements.add(nav);
|
|
106
|
+
newNavCount++;
|
|
107
|
+
}
|
|
108
|
+
});
|
|
109
|
+
if (navElements.length > 0) {
|
|
110
|
+
handleMultipleNavs(navElements, usedIds);
|
|
111
|
+
stats.sectionsEnhanced += newNavCount;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const heroSelectors = [
|
|
115
|
+
'.hero', '.banner', '.jumbotron', '.splash',
|
|
116
|
+
'[class*="hero"]', '[class*="banner"]'
|
|
117
|
+
];
|
|
118
|
+
heroSelectors.forEach(selector => {
|
|
119
|
+
try {
|
|
120
|
+
doc.querySelectorAll(selector).forEach(el => {
|
|
121
|
+
if (!el.closest('header') && !el.closest('footer')) {
|
|
122
|
+
const hadId = !!el.id;
|
|
123
|
+
applySemanticAttributes(el, 'hero', { usedIds });
|
|
124
|
+
if (!hadId && el.id) stats.idsAdded++;
|
|
125
|
+
stats.sectionsEnhanced++;
|
|
126
|
+
}
|
|
127
|
+
});
|
|
128
|
+
} catch (err) {
|
|
129
|
+
// Some selectors may not be valid in all contexts
|
|
130
|
+
}
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
const enhancedHtml = '<!DOCTYPE html>\n' + doc.documentElement.outerHTML;
|
|
134
|
+
return { html: enhancedHtml, stats };
|
|
135
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CSS Link Rewriter
|
|
3
|
+
*
|
|
4
|
+
* Rewrites stylesheet <link> tags in HTML to point to the shared
|
|
5
|
+
* styles.css file, deduplicates them, and optionally injects tokens.css.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Rewrite all CSS <link> tags to use shared ../styles.css,
|
|
10
|
+
* remove duplicates, and optionally inject tokens.css before it.
|
|
11
|
+
* @param {string} html
|
|
12
|
+
* @param {boolean} injectTokensCss - Inject tokens.css link before styles.css
|
|
13
|
+
* @returns {string} Updated HTML
|
|
14
|
+
*/
|
|
15
|
+
export function rewriteCssLinks(html, injectTokensCss = false) {
|
|
16
|
+
let result = html;
|
|
17
|
+
|
|
18
|
+
// Rewrite all stylesheet links to shared styles.css
|
|
19
|
+
result = result.replace(
|
|
20
|
+
/<link([^>]*?)href=["'][^"']*\.css["']([^>]*?)>/gi,
|
|
21
|
+
(match, before, after) => {
|
|
22
|
+
if (
|
|
23
|
+
match.includes('rel="stylesheet"') ||
|
|
24
|
+
match.includes("rel='stylesheet'") ||
|
|
25
|
+
!match.includes('rel=')
|
|
26
|
+
) {
|
|
27
|
+
return `<link${before}href="../styles.css" rel="stylesheet"${after}>`;
|
|
28
|
+
}
|
|
29
|
+
return match;
|
|
30
|
+
}
|
|
31
|
+
);
|
|
32
|
+
|
|
33
|
+
// Remove duplicate stylesheet links (keep first occurrence)
|
|
34
|
+
const seenStylesheets = new Set();
|
|
35
|
+
result = result.replace(
|
|
36
|
+
/<link[^>]*href=["']\.\.\/styles\.css["'][^>]*>/gi,
|
|
37
|
+
(match) => {
|
|
38
|
+
if (seenStylesheets.has('styles.css')) return '';
|
|
39
|
+
seenStylesheets.add('styles.css');
|
|
40
|
+
return match;
|
|
41
|
+
}
|
|
42
|
+
);
|
|
43
|
+
|
|
44
|
+
// Optionally inject tokens.css before styles.css
|
|
45
|
+
if (injectTokensCss) {
|
|
46
|
+
result = result.replace(
|
|
47
|
+
/(<link[^>]*href=["']\.\.\/styles\.css["'][^>]*>)/i,
|
|
48
|
+
'<link href="../tokens.css" rel="stylesheet">\n $1'
|
|
49
|
+
);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return result;
|
|
53
|
+
}
|