design-clone 2.1.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -34
- package/SKILL.md +69 -45
- package/bin/cli.js +22 -4
- package/bin/commands/clone-site.js +31 -171
- package/bin/commands/help.js +19 -6
- package/bin/commands/init.js +9 -86
- package/bin/commands/uninstall.js +105 -0
- package/bin/commands/update.js +70 -0
- package/bin/commands/verify.js +7 -14
- package/bin/utils/paths.js +28 -0
- package/bin/utils/validate.js +2 -22
- package/bin/utils/version.js +23 -0
- package/docs/code-standards.md +789 -0
- package/docs/codebase-summary.md +533 -286
- package/docs/index.md +74 -0
- package/docs/project-overview-pdr.md +797 -0
- package/docs/system-architecture.md +718 -0
- package/package.json +14 -17
- package/src/ai/prompts/design-tokens/basic.md +80 -0
- package/src/ai/prompts/design-tokens/section-with-css.md +41 -0
- package/src/ai/prompts/design-tokens/section.md +48 -0
- package/src/ai/prompts/design-tokens/with-css.md +87 -0
- package/src/ai/prompts/structure-analysis/basic.md +55 -0
- package/src/ai/prompts/structure-analysis/with-context.md +59 -0
- package/src/ai/prompts/structure-analysis/with-dimensions.md +63 -0
- package/src/ai/prompts/structure-analysis/with-hierarchy.md +73 -0
- package/src/ai/prompts/ux-audit/aggregation.md +42 -0
- package/src/ai/prompts/ux-audit/desktop.md +92 -0
- package/src/ai/prompts/ux-audit/mobile.md +93 -0
- package/src/ai/prompts/ux-audit/tablet.md +92 -0
- package/src/core/animation/animation-extractor-ast.js +183 -0
- package/src/core/animation/animation-extractor-output.js +152 -0
- package/src/core/animation/animation-extractor.js +178 -0
- package/src/core/animation/state-capture-detection.js +200 -0
- package/src/core/animation/state-capture.js +193 -0
- package/src/core/capture/browser-context-pool.js +96 -0
- package/src/core/capture/multi-page-screenshot-page.js +110 -0
- package/src/core/capture/multi-page-screenshot.js +208 -0
- package/src/core/capture/screenshot-extraction.js +186 -0
- package/src/core/capture/screenshot-helpers.js +175 -0
- package/src/core/capture/screenshot-orchestrator.js +174 -0
- package/src/core/capture/screenshot-viewport.js +93 -0
- package/src/core/capture/screenshot.js +192 -0
- package/src/core/content/content-counter-dom.js +191 -0
- package/src/core/content/content-counter.js +76 -0
- package/src/core/css/breakpoint-detector.js +66 -0
- package/src/core/css/chromium-defaults.json +23 -0
- package/src/core/css/computed-style-extractor.js +102 -0
- package/src/core/css/css-chunker.js +103 -0
- package/src/core/css/filter-css-dead-code.js +120 -0
- package/src/core/css/filter-css-html-analyzer.js +110 -0
- package/src/core/css/filter-css-selector-matcher.js +172 -0
- package/src/core/css/filter-css.js +206 -0
- package/src/core/css/merge-css-atrule-processor.js +158 -0
- package/src/core/css/merge-css-file-io.js +68 -0
- package/src/core/css/merge-css.js +148 -0
- package/src/core/detection/framework-detector-routing.js +68 -0
- package/src/core/detection/framework-detector-signals.js +65 -0
- package/src/core/detection/framework-detector.js +198 -0
- package/src/core/dimension/dimension-extractor-card-detector.js +82 -0
- package/src/core/dimension/dimension-extractor.js +317 -0
- package/src/core/dimension/dimension-output-ai-summary.js +111 -0
- package/src/core/dimension/dimension-output.js +173 -0
- package/src/core/dimension/dom-tree-analyzer-tree-builders.js +95 -0
- package/src/core/dimension/dom-tree-analyzer.js +191 -0
- package/src/core/discovery/app-state-snapshot-capture.js +195 -0
- package/src/core/discovery/app-state-snapshot-utils.js +178 -0
- package/src/core/discovery/app-state-snapshot.js +131 -0
- package/src/core/discovery/discover-pages-routes.js +84 -0
- package/src/core/discovery/discover-pages-utils.js +177 -0
- package/src/core/discovery/discover-pages.js +191 -0
- package/src/core/html/html-extractor-inline-styler.js +70 -0
- package/src/core/html/html-extractor.js +147 -0
- package/src/core/html/semantic-enhancer-mappings.js +200 -0
- package/src/core/html/semantic-enhancer-page.js +148 -0
- package/src/core/html/semantic-enhancer.js +135 -0
- package/src/core/links/rewrite-links-css-rewriter.js +53 -0
- package/src/core/links/rewrite-links.js +173 -0
- package/src/core/media/asset-validator.js +118 -0
- package/src/core/media/extract-assets-downloader.js +187 -0
- package/src/core/media/extract-assets-page-scraper.js +115 -0
- package/src/core/media/extract-assets.js +159 -0
- package/src/core/media/video-capture-convert.js +200 -0
- package/src/core/media/video-capture.js +201 -0
- package/src/core/{lazy-loader.js → page-prep/lazy-loader.js} +37 -39
- package/src/core/section/section-cropper-helpers.js +43 -0
- package/src/core/{section-cropper.js → section/section-cropper.js} +11 -88
- package/src/core/section/section-detector-strategies.js +139 -0
- package/src/core/section/section-detector-utils.js +100 -0
- package/src/core/section/section-detector.js +88 -0
- package/src/core/tests/test-section-cropper.js +2 -2
- package/src/core/tests/test-section-detector.js +2 -2
- package/src/post-process/enhance-assets.js +29 -4
- package/src/post-process/fetch-images-unsplash-client.js +123 -0
- package/src/post-process/fetch-images.js +60 -263
- package/src/post-process/inject-gosnap.js +88 -0
- package/src/post-process/inject-icons-svg-replacer.js +76 -0
- package/src/post-process/inject-icons.js +47 -200
- package/src/route-discoverers/base-discoverer-utils.js +137 -0
- package/src/route-discoverers/base-discoverer.js +29 -118
- package/src/route-discoverers/index.js +1 -1
- package/src/shared/config.js +38 -0
- package/src/shared/error-codes.js +31 -0
- package/src/shared/viewports.js +46 -0
- package/src/utils/browser.js +0 -7
- package/src/utils/helpers.js +4 -0
- package/src/utils/log.js +12 -0
- package/src/utils/playwright-loader.js +76 -0
- package/src/utils/playwright.js +3 -69
- package/src/utils/progress.js +32 -0
- package/src/verification/generate-audit-report-css-fixes.js +52 -0
- package/src/verification/generate-audit-report-sections.js +158 -0
- package/src/verification/generate-audit-report.js +5 -281
- package/src/verification/quality-scorer.js +92 -0
- package/src/verification/verify-footer-checks.js +103 -0
- package/src/verification/verify-footer-helpers.js +178 -0
- package/src/verification/verify-footer.js +23 -381
- package/src/verification/verify-header-checks.js +104 -0
- package/src/verification/verify-header-helpers.js +156 -0
- package/src/verification/verify-header.js +23 -365
- package/src/verification/verify-layout-report.js +101 -0
- package/src/verification/verify-layout.js +13 -259
- package/src/verification/verify-menu-checks.js +104 -0
- package/src/verification/verify-menu-helpers.js +112 -0
- package/src/verification/verify-menu.js +17 -285
- package/src/verification/verify-slider-checks.js +115 -0
- package/src/verification/verify-slider-constants.js +65 -0
- package/src/verification/verify-slider-helpers.js +164 -0
- package/src/verification/verify-slider.js +23 -414
- package/.env.example +0 -14
- package/docs/basic-clone.md +0 -63
- package/docs/cli-reference.md +0 -316
- package/docs/design-clone-architecture.md +0 -492
- package/docs/pixel-perfect.md +0 -117
- package/docs/project-roadmap.md +0 -382
- package/docs/troubleshooting.md +0 -170
- package/requirements.txt +0 -5
- package/src/ai/__pycache__/analyze-structure.cpython-313.pyc +0 -0
- package/src/ai/__pycache__/extract-design-tokens.cpython-313.pyc +0 -0
- package/src/ai/analyze-structure.py +0 -375
- package/src/ai/extract-design-tokens.py +0 -782
- package/src/ai/prompts/__init__.py +0 -2
- package/src/ai/prompts/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/ai/prompts/__pycache__/design_tokens.cpython-313.pyc +0 -0
- package/src/ai/prompts/__pycache__/structure_analysis.cpython-313.pyc +0 -0
- package/src/ai/prompts/__pycache__/ux_audit.cpython-313.pyc +0 -0
- package/src/ai/prompts/design_tokens.py +0 -316
- package/src/ai/prompts/structure_analysis.py +0 -592
- package/src/ai/prompts/ux_audit.py +0 -198
- package/src/ai/ux-audit.js +0 -596
- package/src/core/animation-extractor.js +0 -526
- package/src/core/app-state-snapshot.js +0 -511
- package/src/core/content-counter.js +0 -342
- package/src/core/design-tokens.js +0 -103
- package/src/core/dimension-extractor.js +0 -438
- package/src/core/dimension-output.js +0 -305
- package/src/core/discover-pages.js +0 -542
- package/src/core/dom-tree-analyzer.js +0 -298
- package/src/core/extract-assets.js +0 -468
- package/src/core/filter-css.js +0 -499
- package/src/core/framework-detector.js +0 -538
- package/src/core/html-extractor.js +0 -212
- package/src/core/merge-css.js +0 -407
- package/src/core/multi-page-screenshot.js +0 -380
- package/src/core/rewrite-links.js +0 -226
- package/src/core/screenshot.js +0 -701
- package/src/core/section-detector.js +0 -386
- package/src/core/semantic-enhancer.js +0 -492
- package/src/core/state-capture.js +0 -598
- package/src/core/video-capture.js +0 -546
- package/src/utils/__init__.py +0 -16
- package/src/utils/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/utils/__pycache__/env.cpython-313.pyc +0 -0
- package/src/utils/env.py +0 -134
- /package/src/core/{css-extractor.js → css/css-extractor.js} +0 -0
- /package/src/core/{cookie-handler.js → page-prep/cookie-handler.js} +0 -0
- /package/src/core/{page-readiness.js → page-prep/page-readiness.js} +0 -0
|
@@ -1,492 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Semantic HTML Enhancer
|
|
3
|
-
*
|
|
4
|
-
* Injects WordPress-compatible semantic IDs, classes, and ARIA roles
|
|
5
|
-
* into extracted HTML while preserving original styling.
|
|
6
|
-
*
|
|
7
|
-
* Key features:
|
|
8
|
-
* - Detects sections via semantic tags, ARIA roles, class patterns
|
|
9
|
-
* - Adds IDs only if none exist
|
|
10
|
-
* - Appends classes (never replaces)
|
|
11
|
-
* - Sets roles only if not present
|
|
12
|
-
* - Handles multiple navs with aria-labels
|
|
13
|
-
*/
|
|
14
|
-
|
|
15
|
-
/**
|
|
16
|
-
* WordPress-compatible semantic mappings
|
|
17
|
-
*/
|
|
18
|
-
export const SEMANTIC_MAPPINGS = {
|
|
19
|
-
header: {
|
|
20
|
-
id: 'site-header',
|
|
21
|
-
classes: ['site-header'],
|
|
22
|
-
role: 'banner'
|
|
23
|
-
},
|
|
24
|
-
nav: {
|
|
25
|
-
id: 'site-navigation',
|
|
26
|
-
classes: ['main-navigation', 'nav-menu'],
|
|
27
|
-
role: 'navigation'
|
|
28
|
-
},
|
|
29
|
-
main: {
|
|
30
|
-
id: 'main-content',
|
|
31
|
-
classes: ['site-main', 'content-area'],
|
|
32
|
-
role: 'main'
|
|
33
|
-
},
|
|
34
|
-
sidebar: {
|
|
35
|
-
id: 'primary-sidebar',
|
|
36
|
-
classes: ['widget-area', 'sidebar'],
|
|
37
|
-
role: 'complementary'
|
|
38
|
-
},
|
|
39
|
-
footer: {
|
|
40
|
-
id: 'site-footer',
|
|
41
|
-
classes: ['site-footer'],
|
|
42
|
-
role: 'contentinfo'
|
|
43
|
-
},
|
|
44
|
-
hero: {
|
|
45
|
-
id: 'hero-section',
|
|
46
|
-
classes: ['hero'],
|
|
47
|
-
role: null // No ARIA landmark role for hero
|
|
48
|
-
}
|
|
49
|
-
};
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
* Class patterns for section detection (case-insensitive)
|
|
53
|
-
*/
|
|
54
|
-
const CLASS_PATTERNS = {
|
|
55
|
-
header: ['header', 'masthead', 'site-header', 'page-header'],
|
|
56
|
-
nav: ['nav', 'menu', 'navigation'],
|
|
57
|
-
main: ['main', 'content', 'page-content'],
|
|
58
|
-
sidebar: ['sidebar', 'aside', 'widget-area'],
|
|
59
|
-
footer: ['footer', 'site-footer', 'page-footer'],
|
|
60
|
-
hero: ['hero', 'banner', 'jumbotron', 'splash']
|
|
61
|
-
};
|
|
62
|
-
|
|
63
|
-
/**
|
|
64
|
-
* Detect section type from element
|
|
65
|
-
*
|
|
66
|
-
* Priority:
|
|
67
|
-
* 1. Semantic HTML tags
|
|
68
|
-
* 2. ARIA role attributes
|
|
69
|
-
* 3. Class pattern matching
|
|
70
|
-
*
|
|
71
|
-
* @param {Element} element - DOM element to analyze
|
|
72
|
-
* @returns {string|null} Section type or null
|
|
73
|
-
*/
|
|
74
|
-
export function detectSectionType(element) {
|
|
75
|
-
const tag = element.tagName?.toLowerCase();
|
|
76
|
-
const ariaRole = element.getAttribute?.('role');
|
|
77
|
-
|
|
78
|
-
// Priority 1: Semantic HTML tags
|
|
79
|
-
if (tag === 'header') return 'header';
|
|
80
|
-
if (tag === 'nav') return 'nav';
|
|
81
|
-
if (tag === 'main') return 'main';
|
|
82
|
-
if (tag === 'aside') return 'sidebar';
|
|
83
|
-
if (tag === 'footer') return 'footer';
|
|
84
|
-
|
|
85
|
-
// Priority 2: ARIA roles
|
|
86
|
-
if (ariaRole === 'banner') return 'header';
|
|
87
|
-
if (ariaRole === 'navigation') return 'nav';
|
|
88
|
-
if (ariaRole === 'main') return 'main';
|
|
89
|
-
if (ariaRole === 'complementary') return 'sidebar';
|
|
90
|
-
if (ariaRole === 'contentinfo') return 'footer';
|
|
91
|
-
|
|
92
|
-
// Priority 3: Class patterns
|
|
93
|
-
const className = (element.className || '').toString().toLowerCase();
|
|
94
|
-
if (!className) return null;
|
|
95
|
-
|
|
96
|
-
for (const [sectionType, patterns] of Object.entries(CLASS_PATTERNS)) {
|
|
97
|
-
if (patterns.some(pattern => className.includes(pattern))) {
|
|
98
|
-
// Avoid false positives: ensure it's a container element
|
|
99
|
-
if (tag === 'div' || tag === 'section' || tag === 'article') {
|
|
100
|
-
return sectionType;
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
return null;
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
/**
|
|
109
|
-
* Apply semantic attributes to element
|
|
110
|
-
*
|
|
111
|
-
* Rules:
|
|
112
|
-
* - Add ID only if none exists
|
|
113
|
-
* - Append classes (preserve existing)
|
|
114
|
-
* - Set role only if none exists
|
|
115
|
-
*
|
|
116
|
-
* @param {Element} element - DOM element to enhance
|
|
117
|
-
* @param {string} sectionType - Type from SEMANTIC_MAPPINGS
|
|
118
|
-
* @param {Object} options - Configuration options
|
|
119
|
-
* @param {Set} options.usedIds - Track used IDs to avoid duplicates
|
|
120
|
-
* @param {number} options.navIndex - Index for multiple nav labeling
|
|
121
|
-
*/
|
|
122
|
-
export function applySemanticAttributes(element, sectionType, options = {}) {
|
|
123
|
-
const mapping = SEMANTIC_MAPPINGS[sectionType];
|
|
124
|
-
if (!mapping) return;
|
|
125
|
-
|
|
126
|
-
const { usedIds = new Set(), navIndex = 0 } = options;
|
|
127
|
-
|
|
128
|
-
// Add ID only if not present and not already used
|
|
129
|
-
if (!element.id && mapping.id) {
|
|
130
|
-
let targetId = mapping.id;
|
|
131
|
-
|
|
132
|
-
// Handle multiple instances (e.g., footer-navigation for secondary nav)
|
|
133
|
-
if (usedIds.has(targetId)) {
|
|
134
|
-
targetId = `${mapping.id}-${navIndex + 1}`;
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
if (!usedIds.has(targetId)) {
|
|
138
|
-
element.id = targetId;
|
|
139
|
-
usedIds.add(targetId);
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
// Append classes (preserve existing)
|
|
144
|
-
if (mapping.classes && mapping.classes.length > 0) {
|
|
145
|
-
const existingClasses = element.className
|
|
146
|
-
? element.className.toString().split(/\s+/).filter(Boolean)
|
|
147
|
-
: [];
|
|
148
|
-
const newClasses = mapping.classes.filter(c => !existingClasses.includes(c));
|
|
149
|
-
|
|
150
|
-
if (newClasses.length > 0) {
|
|
151
|
-
element.className = [...existingClasses, ...newClasses].join(' ').trim();
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
// Set role only if not present
|
|
156
|
-
if (mapping.role && !element.getAttribute('role')) {
|
|
157
|
-
element.setAttribute('role', mapping.role);
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
/**
|
|
162
|
-
* Handle multiple navigation elements with proper labeling
|
|
163
|
-
*
|
|
164
|
-
* @param {NodeList|Array} navElements - All nav elements
|
|
165
|
-
* @param {Set} usedIds - Track used IDs
|
|
166
|
-
*/
|
|
167
|
-
export function handleMultipleNavs(navElements, usedIds = new Set()) {
|
|
168
|
-
const navs = Array.from(navElements);
|
|
169
|
-
if (navs.length === 0) return;
|
|
170
|
-
|
|
171
|
-
navs.forEach((nav, index) => {
|
|
172
|
-
const isInHeader = nav.closest?.('header') !== null;
|
|
173
|
-
const isInFooter = nav.closest?.('footer') !== null;
|
|
174
|
-
|
|
175
|
-
if (isInHeader && index === 0) {
|
|
176
|
-
// Primary navigation in header
|
|
177
|
-
applySemanticAttributes(nav, 'nav', { usedIds, navIndex: 0 });
|
|
178
|
-
if (!nav.getAttribute('aria-label')) {
|
|
179
|
-
nav.setAttribute('aria-label', 'Primary Menu');
|
|
180
|
-
}
|
|
181
|
-
} else if (isInFooter) {
|
|
182
|
-
// Footer navigation
|
|
183
|
-
if (!nav.id) {
|
|
184
|
-
nav.id = usedIds.has('footer-navigation')
|
|
185
|
-
? `footer-navigation-${index}`
|
|
186
|
-
: 'footer-navigation';
|
|
187
|
-
usedIds.add(nav.id);
|
|
188
|
-
}
|
|
189
|
-
nav.setAttribute('role', 'navigation');
|
|
190
|
-
if (!nav.getAttribute('aria-label')) {
|
|
191
|
-
nav.setAttribute('aria-label', 'Footer Menu');
|
|
192
|
-
}
|
|
193
|
-
} else {
|
|
194
|
-
// Secondary/other navigation
|
|
195
|
-
applySemanticAttributes(nav, 'nav', { usedIds, navIndex: index });
|
|
196
|
-
if (!nav.getAttribute('aria-label')) {
|
|
197
|
-
nav.setAttribute('aria-label', `Navigation ${index + 1}`);
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
});
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
/**
|
|
204
|
-
* Enhance HTML string with semantic attributes
|
|
205
|
-
*
|
|
206
|
-
* **IMPORTANT:** This function requires browser context (uses DOMParser).
|
|
207
|
-
* For Node.js/Playwright, use `enhanceSemanticHTMLInPage()` instead.
|
|
208
|
-
*
|
|
209
|
-
* @param {string} html - Original HTML string (must be valid HTML)
|
|
210
|
-
* @param {Object} [domHierarchy=null] - Optional DOM hierarchy from dom-tree-analyzer
|
|
211
|
-
* @returns {{html: string, stats: Object}} Enhanced HTML and stats
|
|
212
|
-
* @throws {Error} If html is empty or DOMParser is unavailable
|
|
213
|
-
*
|
|
214
|
-
* @example
|
|
215
|
-
* // In browser context:
|
|
216
|
-
* const result = enhanceSemanticHTML(htmlString);
|
|
217
|
-
* console.log(result.stats.sectionsEnhanced);
|
|
218
|
-
*/
|
|
219
|
-
export function enhanceSemanticHTML(html, domHierarchy = null) {
|
|
220
|
-
// Validate input
|
|
221
|
-
if (!html || typeof html !== 'string') {
|
|
222
|
-
throw new Error('enhanceSemanticHTML requires a valid HTML string');
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
// Check for browser context
|
|
226
|
-
if (typeof DOMParser === 'undefined') {
|
|
227
|
-
throw new Error('enhanceSemanticHTML requires browser context (DOMParser). Use enhanceSemanticHTMLInPage() for Playwright.');
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
const stats = {
|
|
231
|
-
sectionsEnhanced: 0,
|
|
232
|
-
idsAdded: 0,
|
|
233
|
-
classesAdded: 0,
|
|
234
|
-
rolesAdded: 0,
|
|
235
|
-
warnings: []
|
|
236
|
-
};
|
|
237
|
-
|
|
238
|
-
// Parse HTML
|
|
239
|
-
const parser = new DOMParser();
|
|
240
|
-
const doc = parser.parseFromString(html, 'text/html');
|
|
241
|
-
|
|
242
|
-
const usedIds = new Set();
|
|
243
|
-
|
|
244
|
-
// Collect existing IDs to avoid duplicates
|
|
245
|
-
doc.querySelectorAll('[id]').forEach(el => {
|
|
246
|
-
usedIds.add(el.id);
|
|
247
|
-
});
|
|
248
|
-
|
|
249
|
-
// Optimized: Combined landmark selector (reduces querySelectorAll calls from 8 to 1)
|
|
250
|
-
const combinedLandmarkSelector = [
|
|
251
|
-
'header:not(header header)', // Top-level headers only
|
|
252
|
-
'footer:not(footer footer)', // Top-level footers only
|
|
253
|
-
'main',
|
|
254
|
-
'aside',
|
|
255
|
-
'[role="banner"]',
|
|
256
|
-
'[role="contentinfo"]',
|
|
257
|
-
'[role="main"]',
|
|
258
|
-
'[role="complementary"]'
|
|
259
|
-
].join(', ');
|
|
260
|
-
|
|
261
|
-
const processedElements = new Set();
|
|
262
|
-
|
|
263
|
-
try {
|
|
264
|
-
doc.querySelectorAll(combinedLandmarkSelector).forEach(el => {
|
|
265
|
-
// Skip if already processed (avoid double-counting from overlapping selectors)
|
|
266
|
-
if (processedElements.has(el)) return;
|
|
267
|
-
processedElements.add(el);
|
|
268
|
-
|
|
269
|
-
const sectionType = detectSectionType(el);
|
|
270
|
-
if (sectionType) {
|
|
271
|
-
const hadId = !!el.id;
|
|
272
|
-
const hadRole = !!el.getAttribute('role');
|
|
273
|
-
const oldClasses = el.className;
|
|
274
|
-
|
|
275
|
-
applySemanticAttributes(el, sectionType, { usedIds });
|
|
276
|
-
|
|
277
|
-
if (!hadId && el.id) stats.idsAdded++;
|
|
278
|
-
if (!hadRole && el.getAttribute('role')) stats.rolesAdded++;
|
|
279
|
-
if (oldClasses !== el.className) stats.classesAdded++;
|
|
280
|
-
stats.sectionsEnhanced++;
|
|
281
|
-
}
|
|
282
|
-
});
|
|
283
|
-
} catch (err) {
|
|
284
|
-
stats.warnings.push(`Landmark selector error: ${err.message}`);
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
// Handle nav elements specially (multiple navs need labeling)
|
|
288
|
-
// Count only navs not already processed
|
|
289
|
-
const navElements = doc.querySelectorAll('nav, [role="navigation"]');
|
|
290
|
-
let newNavCount = 0;
|
|
291
|
-
navElements.forEach(nav => {
|
|
292
|
-
if (!processedElements.has(nav)) {
|
|
293
|
-
processedElements.add(nav);
|
|
294
|
-
newNavCount++;
|
|
295
|
-
}
|
|
296
|
-
});
|
|
297
|
-
if (navElements.length > 0) {
|
|
298
|
-
handleMultipleNavs(navElements, usedIds);
|
|
299
|
-
stats.sectionsEnhanced += newNavCount;
|
|
300
|
-
}
|
|
301
|
-
|
|
302
|
-
// Detect hero sections via class patterns
|
|
303
|
-
const heroSelectors = [
|
|
304
|
-
'.hero', '.banner', '.jumbotron', '.splash',
|
|
305
|
-
'[class*="hero"]', '[class*="banner"]'
|
|
306
|
-
];
|
|
307
|
-
heroSelectors.forEach(selector => {
|
|
308
|
-
try {
|
|
309
|
-
doc.querySelectorAll(selector).forEach(el => {
|
|
310
|
-
// Only top-level hero elements
|
|
311
|
-
if (!el.closest('header') && !el.closest('footer')) {
|
|
312
|
-
const hadId = !!el.id;
|
|
313
|
-
applySemanticAttributes(el, 'hero', { usedIds });
|
|
314
|
-
if (!hadId && el.id) stats.idsAdded++;
|
|
315
|
-
stats.sectionsEnhanced++;
|
|
316
|
-
}
|
|
317
|
-
});
|
|
318
|
-
} catch (err) {
|
|
319
|
-
// Some selectors may not be valid in all contexts
|
|
320
|
-
}
|
|
321
|
-
});
|
|
322
|
-
|
|
323
|
-
// Serialize back to HTML
|
|
324
|
-
const enhancedHtml = '<!DOCTYPE html>\n' + doc.documentElement.outerHTML;
|
|
325
|
-
|
|
326
|
-
return { html: enhancedHtml, stats };
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
/**
|
|
330
|
-
* Enhance HTML using page.evaluate (for Playwright integration)
|
|
331
|
-
*
|
|
332
|
-
* This is the recommended method for Node.js/Playwright usage.
|
|
333
|
-
*
|
|
334
|
-
* @param {import('playwright').Page} page - Playwright page
|
|
335
|
-
* @param {string} html - Original HTML string (must be valid HTML)
|
|
336
|
-
* @returns {Promise<{html: string, stats: Object}>}
|
|
337
|
-
* @throws {Error} If page is null or html is invalid
|
|
338
|
-
*
|
|
339
|
-
* @example
|
|
340
|
-
* const result = await enhanceSemanticHTMLInPage(page, extractedHtml);
|
|
341
|
-
* console.log(result.stats.sectionsEnhanced);
|
|
342
|
-
*/
|
|
343
|
-
export async function enhanceSemanticHTMLInPage(page, html) {
|
|
344
|
-
// Validate inputs
|
|
345
|
-
if (!page || typeof page.evaluate !== 'function') {
|
|
346
|
-
throw new Error('enhanceSemanticHTMLInPage requires a valid Playwright page');
|
|
347
|
-
}
|
|
348
|
-
if (!html || typeof html !== 'string') {
|
|
349
|
-
throw new Error('enhanceSemanticHTMLInPage requires a valid HTML string');
|
|
350
|
-
}
|
|
351
|
-
|
|
352
|
-
return await page.evaluate((htmlStr) => {
|
|
353
|
-
// Re-define functions inside evaluate context
|
|
354
|
-
const SEMANTIC_MAPPINGS = {
|
|
355
|
-
header: { id: 'site-header', classes: ['site-header'], role: 'banner' },
|
|
356
|
-
nav: { id: 'site-navigation', classes: ['main-navigation', 'nav-menu'], role: 'navigation' },
|
|
357
|
-
main: { id: 'main-content', classes: ['site-main', 'content-area'], role: 'main' },
|
|
358
|
-
sidebar: { id: 'primary-sidebar', classes: ['widget-area', 'sidebar'], role: 'complementary' },
|
|
359
|
-
footer: { id: 'site-footer', classes: ['site-footer'], role: 'contentinfo' },
|
|
360
|
-
hero: { id: 'hero-section', classes: ['hero'], role: null }
|
|
361
|
-
};
|
|
362
|
-
|
|
363
|
-
const CLASS_PATTERNS = {
|
|
364
|
-
header: ['header', 'masthead', 'site-header', 'page-header'],
|
|
365
|
-
nav: ['nav', 'menu', 'navigation'],
|
|
366
|
-
sidebar: ['sidebar', 'aside', 'widget-area'],
|
|
367
|
-
footer: ['footer', 'site-footer', 'page-footer'],
|
|
368
|
-
hero: ['hero', 'banner', 'jumbotron', 'splash']
|
|
369
|
-
};
|
|
370
|
-
|
|
371
|
-
function detectSectionType(element) {
|
|
372
|
-
const tag = element.tagName?.toLowerCase();
|
|
373
|
-
const ariaRole = element.getAttribute?.('role');
|
|
374
|
-
|
|
375
|
-
if (tag === 'header') return 'header';
|
|
376
|
-
if (tag === 'nav') return 'nav';
|
|
377
|
-
if (tag === 'main') return 'main';
|
|
378
|
-
if (tag === 'aside') return 'sidebar';
|
|
379
|
-
if (tag === 'footer') return 'footer';
|
|
380
|
-
|
|
381
|
-
if (ariaRole === 'banner') return 'header';
|
|
382
|
-
if (ariaRole === 'navigation') return 'nav';
|
|
383
|
-
if (ariaRole === 'main') return 'main';
|
|
384
|
-
if (ariaRole === 'complementary') return 'sidebar';
|
|
385
|
-
if (ariaRole === 'contentinfo') return 'footer';
|
|
386
|
-
|
|
387
|
-
const className = (element.className || '').toString().toLowerCase();
|
|
388
|
-
if (!className) return null;
|
|
389
|
-
|
|
390
|
-
for (const [sectionType, patterns] of Object.entries(CLASS_PATTERNS)) {
|
|
391
|
-
if (patterns.some(pattern => className.includes(pattern))) {
|
|
392
|
-
if (['div', 'section', 'article'].includes(tag)) {
|
|
393
|
-
return sectionType;
|
|
394
|
-
}
|
|
395
|
-
}
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
return null;
|
|
399
|
-
}
|
|
400
|
-
|
|
401
|
-
function applySemanticAttributes(element, sectionType, usedIds, navIndex = 0) {
|
|
402
|
-
const mapping = SEMANTIC_MAPPINGS[sectionType];
|
|
403
|
-
if (!mapping) return;
|
|
404
|
-
|
|
405
|
-
if (!element.id && mapping.id) {
|
|
406
|
-
let targetId = mapping.id;
|
|
407
|
-
if (usedIds.has(targetId)) {
|
|
408
|
-
targetId = `${mapping.id}-${navIndex + 1}`;
|
|
409
|
-
}
|
|
410
|
-
if (!usedIds.has(targetId)) {
|
|
411
|
-
element.id = targetId;
|
|
412
|
-
usedIds.add(targetId);
|
|
413
|
-
}
|
|
414
|
-
}
|
|
415
|
-
|
|
416
|
-
if (mapping.classes && mapping.classes.length > 0) {
|
|
417
|
-
const existingClasses = element.className
|
|
418
|
-
? element.className.toString().split(/\s+/).filter(Boolean)
|
|
419
|
-
: [];
|
|
420
|
-
const newClasses = mapping.classes.filter(c => !existingClasses.includes(c));
|
|
421
|
-
if (newClasses.length > 0) {
|
|
422
|
-
element.className = [...existingClasses, ...newClasses].join(' ').trim();
|
|
423
|
-
}
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
if (mapping.role && !element.getAttribute('role')) {
|
|
427
|
-
element.setAttribute('role', mapping.role);
|
|
428
|
-
}
|
|
429
|
-
}
|
|
430
|
-
|
|
431
|
-
const stats = { sectionsEnhanced: 0, idsAdded: 0, classesAdded: 0, rolesAdded: 0, warnings: [] };
|
|
432
|
-
|
|
433
|
-
const parser = new DOMParser();
|
|
434
|
-
const doc = parser.parseFromString(htmlStr, 'text/html');
|
|
435
|
-
|
|
436
|
-
const usedIds = new Set();
|
|
437
|
-
doc.querySelectorAll('[id]').forEach(el => usedIds.add(el.id));
|
|
438
|
-
|
|
439
|
-
// Process landmarks
|
|
440
|
-
['header:not(header header)', 'footer:not(footer footer)', 'main', 'aside'].forEach(selector => {
|
|
441
|
-
try {
|
|
442
|
-
doc.querySelectorAll(selector).forEach(el => {
|
|
443
|
-
const sectionType = detectSectionType(el);
|
|
444
|
-
if (sectionType) {
|
|
445
|
-
const hadId = !!el.id;
|
|
446
|
-
const hadRole = !!el.getAttribute('role');
|
|
447
|
-
applySemanticAttributes(el, sectionType, usedIds);
|
|
448
|
-
if (!hadId && el.id) stats.idsAdded++;
|
|
449
|
-
if (!hadRole && el.getAttribute('role')) stats.rolesAdded++;
|
|
450
|
-
stats.sectionsEnhanced++;
|
|
451
|
-
}
|
|
452
|
-
});
|
|
453
|
-
} catch (err) {
|
|
454
|
-
stats.warnings.push(`Selector error: ${selector}`);
|
|
455
|
-
}
|
|
456
|
-
});
|
|
457
|
-
|
|
458
|
-
// Handle nav elements
|
|
459
|
-
const navElements = doc.querySelectorAll('nav, [role="navigation"]');
|
|
460
|
-
navElements.forEach((nav, index) => {
|
|
461
|
-
const isInHeader = nav.closest('header') !== null;
|
|
462
|
-
const isInFooter = nav.closest('footer') !== null;
|
|
463
|
-
|
|
464
|
-
if (isInHeader && index === 0) {
|
|
465
|
-
applySemanticAttributes(nav, 'nav', usedIds, 0);
|
|
466
|
-
if (!nav.getAttribute('aria-label')) {
|
|
467
|
-
nav.setAttribute('aria-label', 'Primary Menu');
|
|
468
|
-
}
|
|
469
|
-
} else if (isInFooter) {
|
|
470
|
-
if (!nav.id) {
|
|
471
|
-
nav.id = usedIds.has('footer-navigation') ? `footer-navigation-${index}` : 'footer-navigation';
|
|
472
|
-
usedIds.add(nav.id);
|
|
473
|
-
}
|
|
474
|
-
nav.setAttribute('role', 'navigation');
|
|
475
|
-
if (!nav.getAttribute('aria-label')) {
|
|
476
|
-
nav.setAttribute('aria-label', 'Footer Menu');
|
|
477
|
-
}
|
|
478
|
-
} else {
|
|
479
|
-
applySemanticAttributes(nav, 'nav', usedIds, index);
|
|
480
|
-
if (!nav.getAttribute('aria-label')) {
|
|
481
|
-
nav.setAttribute('aria-label', `Navigation ${index + 1}`);
|
|
482
|
-
}
|
|
483
|
-
}
|
|
484
|
-
stats.sectionsEnhanced++;
|
|
485
|
-
});
|
|
486
|
-
|
|
487
|
-
return {
|
|
488
|
-
html: '<!DOCTYPE html>\n' + doc.documentElement.outerHTML,
|
|
489
|
-
stats
|
|
490
|
-
};
|
|
491
|
-
}, html);
|
|
492
|
-
}
|