design-clone 1.1.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -20
- package/SKILL.md +74 -0
- package/bin/commands/clone-site.js +75 -10
- package/bin/commands/init.js +33 -1
- package/bin/commands/verify.js +5 -3
- package/bin/utils/validate.js +24 -8
- package/docs/cli-reference.md +224 -2
- package/docs/codebase-summary.md +309 -0
- package/docs/design-clone-architecture.md +290 -45
- package/docs/pixel-perfect.md +35 -4
- package/docs/project-roadmap.md +382 -0
- package/docs/troubleshooting.md +5 -4
- package/package.json +12 -6
- package/src/ai/__pycache__/analyze-structure.cpython-313.pyc +0 -0
- package/src/ai/__pycache__/extract-design-tokens.cpython-313.pyc +0 -0
- package/src/ai/analyze-structure.py +73 -3
- package/src/ai/extract-design-tokens.py +356 -13
- package/src/ai/prompts/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/ai/prompts/__pycache__/design_tokens.cpython-313.pyc +0 -0
- package/src/ai/prompts/__pycache__/structure_analysis.cpython-313.pyc +0 -0
- package/src/ai/prompts/__pycache__/ux_audit.cpython-313.pyc +0 -0
- package/src/ai/prompts/design_tokens.py +133 -0
- package/src/ai/prompts/structure_analysis.py +329 -10
- package/src/ai/prompts/ux_audit.py +198 -0
- package/src/ai/ux-audit.js +596 -0
- package/src/core/animation-extractor.js +526 -0
- package/src/core/app-state-snapshot.js +511 -0
- package/src/core/content-counter.js +342 -0
- package/src/core/cookie-handler.js +1 -1
- package/src/core/css-extractor.js +4 -4
- package/src/core/dimension-extractor.js +93 -21
- package/src/core/dimension-output.js +103 -6
- package/src/core/discover-pages.js +242 -14
- package/src/core/dom-tree-analyzer.js +298 -0
- package/src/core/extract-assets.js +1 -1
- package/src/core/framework-detector.js +538 -0
- package/src/core/html-extractor.js +45 -4
- package/src/core/lazy-loader.js +7 -7
- package/src/core/multi-page-screenshot.js +9 -6
- package/src/core/page-readiness.js +8 -8
- package/src/core/screenshot.js +311 -7
- package/src/core/section-cropper.js +209 -0
- package/src/core/section-detector.js +386 -0
- package/src/core/semantic-enhancer.js +492 -0
- package/src/core/state-capture.js +598 -0
- package/src/core/tests/test-section-cropper.js +177 -0
- package/src/core/tests/test-section-detector.js +55 -0
- package/src/core/video-capture.js +546 -0
- package/src/route-discoverers/angular-discoverer.js +157 -0
- package/src/route-discoverers/astro-discoverer.js +123 -0
- package/src/route-discoverers/base-discoverer.js +242 -0
- package/src/route-discoverers/index.js +106 -0
- package/src/route-discoverers/next-discoverer.js +130 -0
- package/src/route-discoverers/nuxt-discoverer.js +138 -0
- package/src/route-discoverers/react-discoverer.js +139 -0
- package/src/route-discoverers/svelte-discoverer.js +109 -0
- package/src/route-discoverers/universal-discoverer.js +227 -0
- package/src/route-discoverers/vue-discoverer.js +118 -0
- package/src/utils/__init__.py +1 -1
- package/src/utils/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/utils/__pycache__/env.cpython-313.pyc +0 -0
- package/src/utils/browser.js +11 -37
- package/src/utils/playwright.js +213 -0
- package/src/verification/generate-audit-report.js +398 -0
- package/src/verification/verify-footer.js +493 -0
- package/src/verification/verify-header.js +486 -0
- package/src/verification/verify-layout.js +2 -2
- package/src/verification/verify-menu.js +4 -20
- package/src/verification/verify-slider.js +533 -0
- package/src/utils/puppeteer.js +0 -281
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Section Cropper
|
|
3
|
+
*
|
|
4
|
+
* Crop full-page screenshot into individual section images using Sharp.
|
|
5
|
+
* Uses section bounds from section-detector.js.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* import { cropSections } from './section-cropper.js';
|
|
9
|
+
* const results = await cropSections(screenshotPath, sections, outputDir);
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import path from 'path';
|
|
13
|
+
import fs from 'fs/promises';
|
|
14
|
+
|
|
15
|
+
// Try to import Sharp
|
|
16
|
+
let sharp = null;
|
|
17
|
+
try {
|
|
18
|
+
sharp = (await import('sharp')).default;
|
|
19
|
+
} catch {
|
|
20
|
+
// Sharp not available
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Default configuration
|
|
24
|
+
const DEFAULT_OPTIONS = {
|
|
25
|
+
minHeight: 100, // Skip sections smaller than this
|
|
26
|
+
quality: 90, // PNG quality
|
|
27
|
+
compressionLevel: 6, // PNG compression (0-9)
|
|
28
|
+
format: 'png' // Output format
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Crop sections from a full-page screenshot
|
|
33
|
+
* @param {string} screenshotPath - Path to full screenshot
|
|
34
|
+
* @param {Array} sections - Array of section objects with bounds
|
|
35
|
+
* @param {string} outputDir - Base output directory
|
|
36
|
+
* @param {Object} options - Configuration options
|
|
37
|
+
* @returns {Promise<Array>} Array of cropped section info
|
|
38
|
+
*/
|
|
39
|
+
export async function cropSections(screenshotPath, sections, outputDir, options = {}) {
|
|
40
|
+
const config = { ...DEFAULT_OPTIONS, ...options };
|
|
41
|
+
|
|
42
|
+
// Check Sharp availability
|
|
43
|
+
if (!sharp) {
|
|
44
|
+
throw new Error('Sharp is not installed. Run: npm install sharp');
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Create sections directory
|
|
48
|
+
const sectionsDir = path.join(outputDir, 'sections');
|
|
49
|
+
await fs.mkdir(sectionsDir, { recursive: true });
|
|
50
|
+
|
|
51
|
+
// Get source image metadata
|
|
52
|
+
const metadata = await sharp(screenshotPath).metadata();
|
|
53
|
+
const imageWidth = metadata.width;
|
|
54
|
+
const imageHeight = metadata.height;
|
|
55
|
+
|
|
56
|
+
const results = [];
|
|
57
|
+
const skipped = [];
|
|
58
|
+
|
|
59
|
+
for (const section of sections) {
|
|
60
|
+
// Validate and clamp bounds
|
|
61
|
+
const bounds = validateBounds(section.bounds, imageWidth, imageHeight);
|
|
62
|
+
|
|
63
|
+
// Skip tiny sections
|
|
64
|
+
if (bounds.height < config.minHeight) {
|
|
65
|
+
skipped.push({
|
|
66
|
+
index: section.index,
|
|
67
|
+
name: section.name,
|
|
68
|
+
reason: `Height ${bounds.height}px < ${config.minHeight}px minimum`
|
|
69
|
+
});
|
|
70
|
+
continue;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Skip zero-dimension sections
|
|
74
|
+
if (bounds.width <= 0 || bounds.height <= 0) {
|
|
75
|
+
skipped.push({
|
|
76
|
+
index: section.index,
|
|
77
|
+
name: section.name,
|
|
78
|
+
reason: 'Zero or negative dimensions'
|
|
79
|
+
});
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Generate output filename
|
|
84
|
+
const safeName = sanitizeName(section.name);
|
|
85
|
+
const filename = `section-${section.index}-${safeName}.png`;
|
|
86
|
+
const outputPath = path.join(sectionsDir, filename);
|
|
87
|
+
|
|
88
|
+
try {
|
|
89
|
+
// Crop and save
|
|
90
|
+
await sharp(screenshotPath)
|
|
91
|
+
.extract({
|
|
92
|
+
left: bounds.left,
|
|
93
|
+
top: bounds.top,
|
|
94
|
+
width: bounds.width,
|
|
95
|
+
height: bounds.height
|
|
96
|
+
})
|
|
97
|
+
.png({
|
|
98
|
+
quality: config.quality,
|
|
99
|
+
compressionLevel: config.compressionLevel
|
|
100
|
+
})
|
|
101
|
+
.toFile(outputPath);
|
|
102
|
+
|
|
103
|
+
results.push({
|
|
104
|
+
index: section.index,
|
|
105
|
+
name: section.name,
|
|
106
|
+
filename,
|
|
107
|
+
path: outputPath,
|
|
108
|
+
relativePath: path.join('sections', filename),
|
|
109
|
+
bounds: {
|
|
110
|
+
x: bounds.left,
|
|
111
|
+
y: bounds.top,
|
|
112
|
+
width: bounds.width,
|
|
113
|
+
height: bounds.height
|
|
114
|
+
},
|
|
115
|
+
role: section.role || 'unknown',
|
|
116
|
+
selector: section.selector || null
|
|
117
|
+
});
|
|
118
|
+
} catch (err) {
|
|
119
|
+
skipped.push({
|
|
120
|
+
index: section.index,
|
|
121
|
+
name: section.name,
|
|
122
|
+
reason: `Crop error: ${err.message}`
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Write summary JSON
|
|
128
|
+
const summary = {
|
|
129
|
+
source: path.basename(screenshotPath),
|
|
130
|
+
sourceWidth: imageWidth,
|
|
131
|
+
sourceHeight: imageHeight,
|
|
132
|
+
sectionsCount: results.length,
|
|
133
|
+
skippedCount: skipped.length,
|
|
134
|
+
sections: results,
|
|
135
|
+
skipped: skipped.length > 0 ? skipped : undefined,
|
|
136
|
+
createdAt: new Date().toISOString()
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
const summaryPath = path.join(sectionsDir, 'sections.json');
|
|
140
|
+
await fs.writeFile(summaryPath, JSON.stringify(summary, null, 2));
|
|
141
|
+
|
|
142
|
+
return {
|
|
143
|
+
sections: results,
|
|
144
|
+
skipped,
|
|
145
|
+
summary: summaryPath,
|
|
146
|
+
directory: sectionsDir
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Validate and clamp bounds to image dimensions
|
|
152
|
+
* @param {Object} bounds - Section bounds {x, y, width, height}
|
|
153
|
+
* @param {number} imageWidth - Source image width
|
|
154
|
+
* @param {number} imageHeight - Source image height
|
|
155
|
+
* @returns {Object} Validated bounds {left, top, width, height}
|
|
156
|
+
*/
|
|
157
|
+
function validateBounds(bounds, imageWidth, imageHeight) {
|
|
158
|
+
// Clamp starting position
|
|
159
|
+
const left = Math.max(0, Math.round(bounds.x));
|
|
160
|
+
const top = Math.max(0, Math.round(bounds.y));
|
|
161
|
+
|
|
162
|
+
// Calculate max possible dimensions
|
|
163
|
+
const maxWidth = imageWidth - left;
|
|
164
|
+
const maxHeight = imageHeight - top;
|
|
165
|
+
|
|
166
|
+
// Clamp dimensions
|
|
167
|
+
const width = Math.min(Math.round(bounds.width), maxWidth);
|
|
168
|
+
const height = Math.min(Math.round(bounds.height), maxHeight);
|
|
169
|
+
|
|
170
|
+
return { left, top, width, height };
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Sanitize section name for filename
|
|
175
|
+
* @param {string} name - Section name
|
|
176
|
+
* @returns {string} Safe filename
|
|
177
|
+
*/
|
|
178
|
+
function sanitizeName(name) {
|
|
179
|
+
return name
|
|
180
|
+
.toLowerCase()
|
|
181
|
+
.replace(/[^a-z0-9-]/g, '-')
|
|
182
|
+
.replace(/-+/g, '-')
|
|
183
|
+
.replace(/^-|-$/g, '')
|
|
184
|
+
.substring(0, 50) || 'unnamed';
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Check if Sharp is available
|
|
189
|
+
* @returns {boolean}
|
|
190
|
+
*/
|
|
191
|
+
export function isSharpAvailable() {
|
|
192
|
+
return sharp !== null;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Get cropper summary for logging
|
|
197
|
+
* @param {Object} result - Result from cropSections
|
|
198
|
+
* @returns {Object} Summary object
|
|
199
|
+
*/
|
|
200
|
+
export function getCropperSummary(result) {
|
|
201
|
+
return {
|
|
202
|
+
cropped: result.sections.length,
|
|
203
|
+
skipped: result.skipped.length,
|
|
204
|
+
directory: result.directory,
|
|
205
|
+
totalSize: result.sections.reduce((sum, s) => sum + (s.bounds.width * s.bounds.height), 0)
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
export { DEFAULT_OPTIONS };
|
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Section Detector
|
|
3
|
+
*
|
|
4
|
+
* Detect semantic page sections from DOM hierarchy for section-based
|
|
5
|
+
* screenshot analysis. Returns bounding boxes for cropping.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* import { detectSections } from './section-detector.js';
|
|
9
|
+
* const sections = await detectSections(page, { padding: 40 });
|
|
10
|
+
*
|
|
11
|
+
* Strategies (in order):
|
|
12
|
+
* 1. Semantic HTML: <header>, <main>, <section>, <footer>
|
|
13
|
+
* 2. data-section attributes
|
|
14
|
+
* 3. Class patterns: hero, services, features, about, contact
|
|
15
|
+
* 4. Large direct children of <main> or <body> (>200px height)
|
|
16
|
+
* 5. Fallback: viewport chunking if <minSections detected
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { extractDOMHierarchy } from './dom-tree-analyzer.js';
|
|
20
|
+
|
|
21
|
+
// Section class patterns to match
|
|
22
|
+
const SECTION_CLASS_PATTERNS = [
|
|
23
|
+
'hero', 'banner', 'header', 'navigation', 'nav',
|
|
24
|
+
'services', 'features', 'about', 'team', 'portfolio',
|
|
25
|
+
'testimonials', 'reviews', 'pricing', 'plans',
|
|
26
|
+
'faq', 'questions', 'blog', 'news', 'articles',
|
|
27
|
+
'contact', 'cta', 'call-to-action', 'newsletter',
|
|
28
|
+
'footer', 'partners', 'clients', 'gallery', 'showcase'
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
// Default configuration
|
|
32
|
+
const DEFAULT_OPTIONS = {
|
|
33
|
+
minSections: 3,
|
|
34
|
+
maxSections: 20,
|
|
35
|
+
padding: 40,
|
|
36
|
+
fallbackToViewport: true,
|
|
37
|
+
viewportHeight: 900,
|
|
38
|
+
minSectionHeight: 150,
|
|
39
|
+
overlapRatio: 0.1 // 10% overlap for viewport fallback
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Detect page sections from DOM hierarchy
|
|
44
|
+
* @param {import('playwright').Page} page - Playwright page instance
|
|
45
|
+
* @param {Object} options - Configuration options
|
|
46
|
+
* @returns {Promise<Array>} Array of section objects with bounds
|
|
47
|
+
*/
|
|
48
|
+
export async function detectSections(page, options = {}) {
|
|
49
|
+
const config = { ...DEFAULT_OPTIONS, ...options };
|
|
50
|
+
|
|
51
|
+
// Get page dimensions
|
|
52
|
+
const pageDimensions = await page.evaluate(() => ({
|
|
53
|
+
width: document.documentElement.clientWidth,
|
|
54
|
+
height: Math.max(
|
|
55
|
+
document.body.scrollHeight,
|
|
56
|
+
document.documentElement.scrollHeight
|
|
57
|
+
)
|
|
58
|
+
}));
|
|
59
|
+
|
|
60
|
+
// Strategy 1: Find semantic sections directly from page
|
|
61
|
+
let sections = await findSemanticSections(page, pageDimensions, config);
|
|
62
|
+
|
|
63
|
+
// Strategy 2: If not enough sections, try class pattern matching
|
|
64
|
+
if (sections.length < config.minSections) {
|
|
65
|
+
const classSections = await findClassPatternSections(page, pageDimensions, config);
|
|
66
|
+
sections = mergeSections(sections, classSections);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Strategy 3: If still not enough, find large direct children
|
|
70
|
+
if (sections.length < config.minSections) {
|
|
71
|
+
const largeSections = await findLargeChildSections(page, pageDimensions, config);
|
|
72
|
+
sections = mergeSections(sections, largeSections);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Strategy 4: Fallback to viewport chunking
|
|
76
|
+
if (sections.length < config.minSections && config.fallbackToViewport) {
|
|
77
|
+
sections = generateViewportChunks(pageDimensions, config);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Apply padding and validate bounds
|
|
81
|
+
sections = sections.map((section, idx) => ({
|
|
82
|
+
...section,
|
|
83
|
+
index: idx,
|
|
84
|
+
bounds: applyPadding(section.bounds, config.padding, pageDimensions)
|
|
85
|
+
}));
|
|
86
|
+
|
|
87
|
+
// Sort by Y position and limit
|
|
88
|
+
sections = sections
|
|
89
|
+
.sort((a, b) => a.bounds.y - b.bounds.y)
|
|
90
|
+
.slice(0, config.maxSections);
|
|
91
|
+
|
|
92
|
+
// Re-index after sort
|
|
93
|
+
return sections.map((section, idx) => ({ ...section, index: idx }));
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Find semantic HTML sections (header, main, section, footer)
|
|
98
|
+
*/
|
|
99
|
+
async function findSemanticSections(page, pageDimensions, config) {
|
|
100
|
+
return await page.evaluate(({ minHeight }) => {
|
|
101
|
+
const sections = [];
|
|
102
|
+
const processed = new Set();
|
|
103
|
+
|
|
104
|
+
// Selectors for semantic sections
|
|
105
|
+
const selectors = [
|
|
106
|
+
'header:not(header header)', // Top-level header only
|
|
107
|
+
'main > section',
|
|
108
|
+
'main > article',
|
|
109
|
+
'body > section',
|
|
110
|
+
'body > article',
|
|
111
|
+
'[data-section]',
|
|
112
|
+
'footer:not(footer footer)' // Top-level footer only
|
|
113
|
+
];
|
|
114
|
+
|
|
115
|
+
for (const selector of selectors) {
|
|
116
|
+
const elements = document.querySelectorAll(selector);
|
|
117
|
+
|
|
118
|
+
for (const el of elements) {
|
|
119
|
+
// Skip if already processed (nested elements)
|
|
120
|
+
if (processed.has(el)) continue;
|
|
121
|
+
|
|
122
|
+
const rect = el.getBoundingClientRect();
|
|
123
|
+
const absoluteY = rect.y + window.scrollY;
|
|
124
|
+
|
|
125
|
+
// Skip tiny sections
|
|
126
|
+
if (rect.height < minHeight) continue;
|
|
127
|
+
|
|
128
|
+
// Determine section name
|
|
129
|
+
let name = el.tagName.toLowerCase();
|
|
130
|
+
if (el.hasAttribute('data-section')) {
|
|
131
|
+
name = el.getAttribute('data-section');
|
|
132
|
+
} else if (el.id) {
|
|
133
|
+
name = el.id;
|
|
134
|
+
} else if (el.className) {
|
|
135
|
+
// Try to extract meaningful class name
|
|
136
|
+
const cls = el.className.toString().toLowerCase();
|
|
137
|
+
const match = cls.match(/\b(hero|about|services|features|contact|footer|header|nav|cta|testimonials|pricing|faq|team|blog|news)\b/);
|
|
138
|
+
if (match) name = match[1];
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
sections.push({
|
|
142
|
+
name,
|
|
143
|
+
role: el.tagName.toLowerCase(),
|
|
144
|
+
selector: el.id ? `#${el.id}` : `${el.tagName.toLowerCase()}`,
|
|
145
|
+
bounds: {
|
|
146
|
+
x: Math.round(rect.x),
|
|
147
|
+
y: Math.round(absoluteY),
|
|
148
|
+
width: Math.round(rect.width),
|
|
149
|
+
height: Math.round(rect.height)
|
|
150
|
+
}
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
processed.add(el);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
return sections;
|
|
158
|
+
}, { minHeight: config.minSectionHeight });
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Find sections by class pattern matching
|
|
163
|
+
*/
|
|
164
|
+
async function findClassPatternSections(page, pageDimensions, config) {
|
|
165
|
+
return await page.evaluate(({ patterns, minHeight }) => {
|
|
166
|
+
const sections = [];
|
|
167
|
+
const processed = new Set();
|
|
168
|
+
|
|
169
|
+
// Build selector from patterns
|
|
170
|
+
const classSelectors = patterns.map(p => `[class*="${p}"]`).join(', ');
|
|
171
|
+
const elements = document.querySelectorAll(classSelectors);
|
|
172
|
+
|
|
173
|
+
for (const el of elements) {
|
|
174
|
+
// Only consider direct children of body or main
|
|
175
|
+
const parent = el.parentElement;
|
|
176
|
+
if (!parent || (parent.tagName !== 'BODY' && parent.tagName !== 'MAIN')) {
|
|
177
|
+
continue;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Skip if inside another matched element
|
|
181
|
+
if (processed.has(el)) continue;
|
|
182
|
+
|
|
183
|
+
const rect = el.getBoundingClientRect();
|
|
184
|
+
const absoluteY = rect.y + window.scrollY;
|
|
185
|
+
|
|
186
|
+
if (rect.height < minHeight) continue;
|
|
187
|
+
|
|
188
|
+
// Extract pattern name from class
|
|
189
|
+
const cls = el.className.toString().toLowerCase();
|
|
190
|
+
let name = 'section';
|
|
191
|
+
for (const pattern of patterns) {
|
|
192
|
+
if (cls.includes(pattern)) {
|
|
193
|
+
name = pattern;
|
|
194
|
+
break;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
sections.push({
|
|
199
|
+
name,
|
|
200
|
+
role: 'class-pattern',
|
|
201
|
+
selector: el.id ? `#${el.id}` : `.${el.className.toString().split(' ')[0]}`,
|
|
202
|
+
bounds: {
|
|
203
|
+
x: Math.round(rect.x),
|
|
204
|
+
y: Math.round(absoluteY),
|
|
205
|
+
width: Math.round(rect.width),
|
|
206
|
+
height: Math.round(rect.height)
|
|
207
|
+
}
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
processed.add(el);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
return sections;
|
|
214
|
+
}, { patterns: SECTION_CLASS_PATTERNS, minHeight: config.minSectionHeight });
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Find large direct children of main/body as sections
|
|
219
|
+
*/
|
|
220
|
+
async function findLargeChildSections(page, pageDimensions, config) {
|
|
221
|
+
return await page.evaluate(({ minHeight }) => {
|
|
222
|
+
const sections = [];
|
|
223
|
+
|
|
224
|
+
// Check direct children of main, then body
|
|
225
|
+
const containers = [
|
|
226
|
+
document.querySelector('main'),
|
|
227
|
+
document.body
|
|
228
|
+
].filter(Boolean);
|
|
229
|
+
|
|
230
|
+
for (const container of containers) {
|
|
231
|
+
const children = Array.from(container.children);
|
|
232
|
+
|
|
233
|
+
for (const child of children) {
|
|
234
|
+
// Skip script, style, noscript
|
|
235
|
+
if (['SCRIPT', 'STYLE', 'NOSCRIPT', 'LINK', 'META'].includes(child.tagName)) {
|
|
236
|
+
continue;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
const rect = child.getBoundingClientRect();
|
|
240
|
+
const absoluteY = rect.y + window.scrollY;
|
|
241
|
+
|
|
242
|
+
// Only large sections (>300px or >20% viewport height)
|
|
243
|
+
const threshold = Math.max(300, window.innerHeight * 0.2);
|
|
244
|
+
if (rect.height < threshold) continue;
|
|
245
|
+
|
|
246
|
+
// Skip if already covered by semantic detection
|
|
247
|
+
if (child.tagName === 'HEADER' || child.tagName === 'FOOTER' ||
|
|
248
|
+
child.tagName === 'SECTION' || child.tagName === 'ARTICLE') {
|
|
249
|
+
continue;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// Generate descriptive name based on position
|
|
253
|
+
let name = child.id || '';
|
|
254
|
+
if (!name && child.className) {
|
|
255
|
+
const cls = child.className.toString();
|
|
256
|
+
const firstClass = cls.split(' ')[0].toLowerCase();
|
|
257
|
+
// Skip generic framework classes
|
|
258
|
+
const genericPatterns = ['sd', 'container', 'wrapper', 'div', 'block', 'row', 'col', 'section'];
|
|
259
|
+
if (!genericPatterns.includes(firstClass)) {
|
|
260
|
+
name = firstClass;
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
if (!name) {
|
|
264
|
+
// Name based on Y position relative to page
|
|
265
|
+
const yRatio = absoluteY / (document.body.scrollHeight || 1);
|
|
266
|
+
if (yRatio < 0.15) name = 'top-section';
|
|
267
|
+
else if (yRatio < 0.35) name = 'upper-content';
|
|
268
|
+
else if (yRatio < 0.55) name = 'middle-content';
|
|
269
|
+
else if (yRatio < 0.75) name = 'lower-content';
|
|
270
|
+
else name = 'bottom-section';
|
|
271
|
+
name = `${name}-${sections.length}`;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
sections.push({
|
|
275
|
+
name: name.toLowerCase().replace(/[^a-z0-9-]/g, '-'),
|
|
276
|
+
role: 'large-block',
|
|
277
|
+
selector: child.id ? `#${child.id}` : child.tagName.toLowerCase(),
|
|
278
|
+
bounds: {
|
|
279
|
+
x: Math.round(rect.x),
|
|
280
|
+
y: Math.round(absoluteY),
|
|
281
|
+
width: Math.round(rect.width),
|
|
282
|
+
height: Math.round(rect.height)
|
|
283
|
+
}
|
|
284
|
+
});
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// If we found sections in main, don't check body
|
|
288
|
+
if (sections.length > 0 && container.tagName === 'MAIN') break;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
return sections;
|
|
292
|
+
}, { minHeight: config.minSectionHeight });
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
/**
|
|
296
|
+
* Generate viewport chunks as fallback
|
|
297
|
+
*/
|
|
298
|
+
function generateViewportChunks(pageDimensions, config) {
|
|
299
|
+
const { width, height } = pageDimensions;
|
|
300
|
+
const { viewportHeight, overlapRatio } = config;
|
|
301
|
+
|
|
302
|
+
const sections = [];
|
|
303
|
+
const overlap = Math.round(viewportHeight * overlapRatio);
|
|
304
|
+
const step = viewportHeight - overlap;
|
|
305
|
+
|
|
306
|
+
let y = 0;
|
|
307
|
+
let index = 0;
|
|
308
|
+
|
|
309
|
+
while (y < height) {
|
|
310
|
+
const chunkHeight = Math.min(viewportHeight, height - y);
|
|
311
|
+
|
|
312
|
+
sections.push({
|
|
313
|
+
name: `viewport-${index}`,
|
|
314
|
+
role: 'viewport-chunk',
|
|
315
|
+
selector: null,
|
|
316
|
+
bounds: {
|
|
317
|
+
x: 0,
|
|
318
|
+
y: y,
|
|
319
|
+
width: width,
|
|
320
|
+
height: chunkHeight
|
|
321
|
+
}
|
|
322
|
+
});
|
|
323
|
+
|
|
324
|
+
y += step;
|
|
325
|
+
index++;
|
|
326
|
+
|
|
327
|
+
// Safety limit
|
|
328
|
+
if (index > 50) break;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
return sections;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/**
|
|
335
|
+
* Merge sections, removing duplicates based on Y overlap
|
|
336
|
+
*/
|
|
337
|
+
function mergeSections(existing, newSections) {
|
|
338
|
+
const result = [...existing];
|
|
339
|
+
|
|
340
|
+
for (const section of newSections) {
|
|
341
|
+
// Check if this section overlaps significantly with existing
|
|
342
|
+
const overlaps = result.some(s => {
|
|
343
|
+
const yOverlap = Math.max(0,
|
|
344
|
+
Math.min(s.bounds.y + s.bounds.height, section.bounds.y + section.bounds.height) -
|
|
345
|
+
Math.max(s.bounds.y, section.bounds.y)
|
|
346
|
+
);
|
|
347
|
+
const minHeight = Math.min(s.bounds.height, section.bounds.height);
|
|
348
|
+
return yOverlap > minHeight * 0.5; // >50% overlap
|
|
349
|
+
});
|
|
350
|
+
|
|
351
|
+
if (!overlaps) {
|
|
352
|
+
result.push(section);
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
return result;
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
/**
|
|
360
|
+
* Apply padding to bounds, clamping to page dimensions
|
|
361
|
+
*/
|
|
362
|
+
function applyPadding(bounds, padding, pageDimensions) {
|
|
363
|
+
return {
|
|
364
|
+
x: Math.max(0, bounds.x - padding),
|
|
365
|
+
y: Math.max(0, bounds.y - padding),
|
|
366
|
+
width: Math.min(pageDimensions.width, bounds.width + padding * 2),
|
|
367
|
+
height: Math.min(
|
|
368
|
+
pageDimensions.height - Math.max(0, bounds.y - padding),
|
|
369
|
+
bounds.height + padding * 2
|
|
370
|
+
)
|
|
371
|
+
};
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
/**
|
|
375
|
+
* Get section summary for logging
|
|
376
|
+
*/
|
|
377
|
+
export function getSectionSummary(sections) {
|
|
378
|
+
return {
|
|
379
|
+
count: sections.length,
|
|
380
|
+
names: sections.map(s => s.name),
|
|
381
|
+
totalHeight: sections.reduce((sum, s) => sum + s.bounds.height, 0),
|
|
382
|
+
hasViewportFallback: sections.some(s => s.role === 'viewport-chunk')
|
|
383
|
+
};
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
export { DEFAULT_OPTIONS, SECTION_CLASS_PATTERNS };
|