prism-design 2.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +292 -0
- package/LICENSE +21 -0
- package/README.md +203 -0
- package/bin/clone-architect.mjs +476 -0
- package/bin/prism.mjs +467 -0
- package/catalog/index.json +1155 -0
- package/extractions/airbnb.com/DESIGN.md +1068 -0
- package/extractions/airbnb.com/tokens.json +507 -0
- package/extractions/attio.com/DESIGN.md +1295 -0
- package/extractions/attio.com/tokens.json +438 -0
- package/extractions/auroxdashboard.com/DESIGN.md +724 -0
- package/extractions/auroxdashboard.com/tokens.json +195 -0
- package/extractions/careerexplorer.com/DESIGN.md +1178 -0
- package/extractions/careerexplorer.com/tokens.json +141 -0
- package/extractions/chance.co/DESIGN.md +1209 -0
- package/extractions/chance.co/tokens.json +160 -0
- package/extractions/choisis-ton-avenir.com/DESIGN.md +1265 -0
- package/extractions/choisis-ton-avenir.com/tokens.json +227 -0
- package/extractions/example.com/DESIGN.md +436 -0
- package/extractions/example.com/tokens.json +91 -0
- package/extractions/getdesign.md/DESIGN.md +1009 -0
- package/extractions/getdesign.md/tokens.json +219 -0
- package/extractions/github.com/DESIGN.md +1130 -0
- package/extractions/github.com/tokens.json +2092 -0
- package/extractions/hello-charly.com/DESIGN.md +1146 -0
- package/extractions/hello-charly.com/tokens.json +322 -0
- package/extractions/hyperliquid.xyz/DESIGN.md +779 -0
- package/extractions/hyperliquid.xyz/tokens.json +598 -0
- package/extractions/instagram.com/DESIGN.md +996 -0
- package/extractions/instagram.com/tokens.json +1240 -0
- package/extractions/jobirl.com/DESIGN.md +1160 -0
- package/extractions/jobirl.com/tokens.json +139 -0
- package/extractions/life360.com/DESIGN.md +1133 -0
- package/extractions/life360.com/tokens.json +491 -0
- package/extractions/lifesum.com/DESIGN.md +965 -0
- package/extractions/lifesum.com/tokens.json +170 -0
- package/extractions/linear.app/DESIGN.md +1301 -0
- package/extractions/linear.app/tokens.json +732 -0
- package/extractions/mavoie.org/DESIGN.md +1148 -0
- package/extractions/mavoie.org/tokens.json +128 -0
- package/extractions/miro.com/DESIGN.md +1237 -0
- package/extractions/miro.com/tokens.json +401 -0
- package/extractions/notion.so/DESIGN.md +1319 -0
- package/extractions/notion.so/tokens.json +906 -0
- package/extractions/onetonline.org/DESIGN.md +909 -0
- package/extractions/onetonline.org/tokens.json +280 -0
- package/extractions/posthog.com/DESIGN.md +1024 -0
- package/extractions/posthog.com/tokens.json +197 -0
- package/extractions/revolut.com/DESIGN.md +1080 -0
- package/extractions/revolut.com/tokens.json +401 -0
- package/extractions/stripe.com/DESIGN.md +1272 -0
- package/extractions/stripe.com/tokens.json +794 -0
- package/extractions/switchcollective.com/DESIGN.md +1040 -0
- package/extractions/switchcollective.com/tokens.json +98 -0
- package/extractions/truity.com/DESIGN.md +970 -0
- package/extractions/truity.com/tokens.json +166 -0
- package/extractions/uniquekicks.be/DESIGN.md +1171 -0
- package/extractions/uniquekicks.be/tokens.json +237 -0
- package/package.json +122 -0
- package/scripts/analyze.ts +281 -0
- package/scripts/bank-register.ts +379 -0
- package/scripts/bank.ts +374 -0
- package/scripts/browser-stealth.ts +189 -0
- package/scripts/clone.ts +198 -0
- package/scripts/compare-vs-gd-final.ts +273 -0
- package/scripts/compare-vs-gd.ts +269 -0
- package/scripts/compare.ts +405 -0
- package/scripts/deploy-site.ts +181 -0
- package/scripts/diff-snapshots.ts +340 -0
- package/scripts/enrich-catalog.ts +212 -0
- package/scripts/extract.ts +2038 -0
- package/scripts/extractors/advanced.ts +524 -0
- package/scripts/extractors/widgets.ts +711 -0
- package/scripts/generate-design-md.ts +5775 -0
- package/scripts/generate-final-pdf.ts +274 -0
- package/scripts/generate-og-image.ts +87 -0
- package/scripts/generate-showcase.ts +1588 -0
- package/scripts/generate-site.ts +847 -0
- package/scripts/mass-extract.sh +91 -0
- package/scripts/post-process-all.sh +55 -0
- package/scripts/regen-catalog.ts +203 -0
- package/scripts/shared/cache.ts +149 -0
- package/scripts/shared/css-helpers.ts +263 -0
- package/scripts/shared/logger.ts +57 -0
- package/scripts/shared/named-colors.ts +355 -0
- package/scripts/shared/types.ts +220 -0
- package/scripts/sync-catalog.ts +105 -0
- package/scripts/tokenize.ts +988 -0
- package/templates/layout-template.md +52 -0
- package/templates/tokens-template.json +34 -0
|
@@ -0,0 +1,2038 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prism — Script d'extraction Playwright
|
|
3
|
+
*
|
|
4
|
+
* Extrait le design RÉEL d'un site via getComputedStyle() :
|
|
5
|
+
* - Screenshots multi-viewport (desktop 1440px + mobile 390px)
|
|
6
|
+
* - CSS computed sur tous les éléments clés
|
|
7
|
+
* - CSS custom properties (--color-*, --font-*, etc.)
|
|
8
|
+
* - Couleurs dominantes, typo, spacing, layout
|
|
9
|
+
* - Structure DOM (sections, composants, navigation)
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { chromium, type Page, type Browser } from 'playwright';
|
|
13
|
+
import { mkdir, writeFile } from 'fs/promises';
|
|
14
|
+
import { join } from 'path';
|
|
15
|
+
|
|
16
|
+
// ── Types ────────────────────────────────────────────────────────────
|
|
17
|
+
|
|
18
|
+
interface ComputedStyles {
|
|
19
|
+
backgroundColor: string;
|
|
20
|
+
backgroundImage: string;
|
|
21
|
+
color: string;
|
|
22
|
+
fontFamily: string;
|
|
23
|
+
fontSize: string;
|
|
24
|
+
fontWeight: string;
|
|
25
|
+
lineHeight: string;
|
|
26
|
+
letterSpacing: string;
|
|
27
|
+
textAlign: string;
|
|
28
|
+
padding: string;
|
|
29
|
+
margin: string;
|
|
30
|
+
borderRadius: string;
|
|
31
|
+
border: string;
|
|
32
|
+
boxShadow: string;
|
|
33
|
+
width: string;
|
|
34
|
+
height: string;
|
|
35
|
+
minHeight: string;
|
|
36
|
+
maxWidth: string;
|
|
37
|
+
display: string;
|
|
38
|
+
gap: string;
|
|
39
|
+
gridTemplateColumns: string;
|
|
40
|
+
flexDirection: string;
|
|
41
|
+
alignItems: string;
|
|
42
|
+
justifyContent: string;
|
|
43
|
+
position: string;
|
|
44
|
+
overflow: string;
|
|
45
|
+
transition: string;
|
|
46
|
+
opacity: string;
|
|
47
|
+
textTransform: string;
|
|
48
|
+
textDecoration: string;
|
|
49
|
+
fontFeatureSettings: string;
|
|
50
|
+
fontVariationSettings: string;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
interface ElementExtraction {
|
|
54
|
+
selector: string;
|
|
55
|
+
tag: string;
|
|
56
|
+
classes: string[];
|
|
57
|
+
text: string;
|
|
58
|
+
styles: ComputedStyles;
|
|
59
|
+
children: number;
|
|
60
|
+
rect: { x: number; y: number; width: number; height: number };
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
interface SectionExtraction {
|
|
64
|
+
index: number;
|
|
65
|
+
tag: string;
|
|
66
|
+
classes: string[];
|
|
67
|
+
role: string;
|
|
68
|
+
estimatedPurpose: string;
|
|
69
|
+
rect: { x: number; y: number; width: number; height: number };
|
|
70
|
+
styles: Partial<ComputedStyles> | Record<string, string>;
|
|
71
|
+
childCount: number;
|
|
72
|
+
bgTreatment?: string;
|
|
73
|
+
isDark?: boolean;
|
|
74
|
+
aboveFold?: boolean;
|
|
75
|
+
isFullBleed?: boolean;
|
|
76
|
+
imgRatio?: number;
|
|
77
|
+
maxHeadingPx?: number;
|
|
78
|
+
gridCols?: number;
|
|
79
|
+
hasAnimation?: boolean;
|
|
80
|
+
textLen?: number;
|
|
81
|
+
vPad?: number;
|
|
82
|
+
hasChart?: boolean;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
interface ComponentVariant {
|
|
86
|
+
tag: string;
|
|
87
|
+
classes: string[];
|
|
88
|
+
text: string;
|
|
89
|
+
styles: ComputedStyles;
|
|
90
|
+
rect: { x: number; y: number; width: number; height: number };
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
interface FontFaceDeclaration {
|
|
94
|
+
family: string;
|
|
95
|
+
src: string;
|
|
96
|
+
weight: string;
|
|
97
|
+
style: string;
|
|
98
|
+
display: string;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
interface ExtractionResult {
|
|
102
|
+
url: string;
|
|
103
|
+
domain: string;
|
|
104
|
+
timestamp: string;
|
|
105
|
+
viewport: { width: number; height: number };
|
|
106
|
+
pageTitle: string;
|
|
107
|
+
cssCustomProperties: Record<string, string>;
|
|
108
|
+
elements: Record<string, ElementExtraction | null>;
|
|
109
|
+
sections: SectionExtraction[];
|
|
110
|
+
allColors: string[];
|
|
111
|
+
allFontFamilies: string[];
|
|
112
|
+
allFontSizes: string[];
|
|
113
|
+
allBorderRadii: string[];
|
|
114
|
+
allShadows: string[];
|
|
115
|
+
allTransitions: string[];
|
|
116
|
+
images: { src: string; alt: string; width: number; height: number }[];
|
|
117
|
+
widgets?: WidgetExtraction;
|
|
118
|
+
imageryProfile?: {
|
|
119
|
+
ogImage: string | null;
|
|
120
|
+
ogImageWidth: number | null;
|
|
121
|
+
ogImageHeight: number | null;
|
|
122
|
+
twitterImage: string | null;
|
|
123
|
+
heroImage: { src: string; alt: string; width: number; height: number; aspectRatio: number } | null;
|
|
124
|
+
formats: { png: number; jpg: number; webp: number; svg: number; gif: number; other: number };
|
|
125
|
+
totalImages: number;
|
|
126
|
+
totalAboveFold: number;
|
|
127
|
+
aspectRatioBuckets: { landscape: number; portrait: number; square: number; ultrawide: number };
|
|
128
|
+
illustrationHeavy: boolean;
|
|
129
|
+
photoHeavy: boolean;
|
|
130
|
+
avgImageSize: { width: number; height: number };
|
|
131
|
+
decorativePatterns?: {
|
|
132
|
+
multiStopGradients: number;
|
|
133
|
+
radialGradients: number;
|
|
134
|
+
largeSvgShapes: number;
|
|
135
|
+
backgroundImagePatterns: number;
|
|
136
|
+
hasNoise: boolean;
|
|
137
|
+
hasGlassmorphism: boolean;
|
|
138
|
+
};
|
|
139
|
+
};
|
|
140
|
+
links: { href: string; text: string; isNav: boolean }[];
|
|
141
|
+
componentVariants: Record<string, ComponentVariant[]>;
|
|
142
|
+
componentStates: Record<string, ComponentStateStyles>;
|
|
143
|
+
fontFaces: FontFaceDeclaration[];
|
|
144
|
+
mediaBreakpoints: string[];
|
|
145
|
+
openTypeFeatures: string[];
|
|
146
|
+
variableAxes: string[];
|
|
147
|
+
displaySignature?: { family: string; fontSize: string; fontWeight: string; isSerif: boolean; isItalic: boolean; sample: string; secondary?: { family: string; fontSize: string; fontWeight: string; isSerif: boolean; isItalic: boolean; sample: string } };
|
|
148
|
+
// Phase 5 Sprint 80/20 — advanced capture (kept: has downstream consumers)
|
|
149
|
+
keyframes?: Record<string, Record<string, Record<string, string>>>;
|
|
150
|
+
zIndexMap?: Array<{ selector: string; z: number; stackingRoot: string }>;
|
|
151
|
+
visualEffects?: import('./extractors/advanced.js').VisualEffects;
|
|
152
|
+
// RFC C/D/F/A/B fields removed in v2.4 (Phase 1.2) — zero downstream consumers detected:
|
|
153
|
+
// transform3DMap, containerQueries, containerTypes, gridLayouts, responsiveSnapshot, pseudoElements
|
|
154
|
+
// If needed, re-add with explicit consumer; YAGNI removed speculative extraction (saves ~50KB/site, ~10s extract time).
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// ── Config ───────────────────────────────────────────────────────────
|
|
158
|
+
|
|
159
|
+
const VIEWPORTS = {
|
|
160
|
+
desktop: { width: 1440, height: 900 },
|
|
161
|
+
mobile: { width: 390, height: 844 },
|
|
162
|
+
} as const;
|
|
163
|
+
|
|
164
|
+
// Phase 5.1.1 — Stricter selectors. AVANT: [class*="header"] matchait <body class="with-new-header">
|
|
165
|
+
// → headerHeight = 12802px (toute la page) sur Attio, Cursor, Airbnb.
|
|
166
|
+
// APRÈS: tag-first selectors, plus ARIA roles, plus class-strict.
|
|
167
|
+
const KEY_SELECTORS: Record<string, string> = {
|
|
168
|
+
body: 'body',
|
|
169
|
+
header: 'header, [role="banner"], [data-testid*="header"]:not(body):not(html)',
|
|
170
|
+
nav: 'nav, [role="navigation"]',
|
|
171
|
+
main: 'main, [role="main"]',
|
|
172
|
+
sidebar: 'aside, [role="complementary"]',
|
|
173
|
+
footer: 'footer, [role="contentinfo"]',
|
|
174
|
+
hero: 'section[class*="hero" i]:not(body), [class*="hero-section" i]:not(body), [data-section="hero"]',
|
|
175
|
+
card: 'article, [class*="card" i]:not(body):not(html):not(main):not(section)',
|
|
176
|
+
button: 'button, [role="button"]:not(body):not(html)',
|
|
177
|
+
input: 'input[type="text"], input[type="search"], input[type="email"], textarea',
|
|
178
|
+
heading: 'h1',
|
|
179
|
+
subheading: 'h2',
|
|
180
|
+
link: 'a:not([class*="btn"]):not([class*="button"])',
|
|
181
|
+
badge: '[class*="badge" i]:not(body), [class*="tag" i]:not(body), [class*="chip" i]:not(body)',
|
|
182
|
+
modal: '[class*="modal" i]:not(body), [role="dialog"]',
|
|
183
|
+
dropdown: '[role="menu"], [class*="dropdown" i]:not(body)',
|
|
184
|
+
avatar: 'img[class*="avatar" i], img[class*="profile" i]',
|
|
185
|
+
logo: 'header a img, header a svg, nav a img, nav a svg, [class*="logo" i]:not(body):not(html)',
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
// ── Extraction functions ─────────────────────────────────────────────
|
|
189
|
+
|
|
190
|
+
async function extractComputedStyles(page: Page): Promise<Record<string, ElementExtraction | null>> {
|
|
191
|
+
return page.evaluate((selectors: Record<string, string>) => {
|
|
192
|
+
const result: Record<string, any> = {};
|
|
193
|
+
for (const [name, selector] of Object.entries(selectors)) {
|
|
194
|
+
// For heading selectors (h1/h2), prefer the first VISIBLE element in DOM
|
|
195
|
+
// (SPAs often have a hidden SEO h1 before the visible one)
|
|
196
|
+
let el: Element | null = null;
|
|
197
|
+
const isHeading = /^h[1-6]$/.test(selector.trim());
|
|
198
|
+
if (isHeading) {
|
|
199
|
+
const candidates = Array.from(document.querySelectorAll(selector));
|
|
200
|
+
el = candidates.find(c => {
|
|
201
|
+
const r = c.getBoundingClientRect();
|
|
202
|
+
const cs = getComputedStyle(c);
|
|
203
|
+
return r.width > 0 && r.height > 0 &&
|
|
204
|
+
cs.display !== 'none' && cs.visibility !== 'hidden' && cs.opacity !== '0';
|
|
205
|
+
}) || candidates[0] || null;
|
|
206
|
+
// SPA fallback: no h1 at all → try ARIA heading roles, then largest-font text above fold
|
|
207
|
+
if (!el && name === 'heading') {
|
|
208
|
+
el = document.querySelector('[role="heading"][aria-level="1"]') ||
|
|
209
|
+
document.querySelector('[aria-level="1"]') || null;
|
|
210
|
+
if (!el) {
|
|
211
|
+
const vh = window.innerHeight;
|
|
212
|
+
let bestEl: Element | null = null;
|
|
213
|
+
let bestSize = 31;
|
|
214
|
+
document.querySelectorAll('div,span,p,strong').forEach((c: Element) => {
|
|
215
|
+
const r = c.getBoundingClientRect();
|
|
216
|
+
if (r.top > vh * 1.5 || r.bottom < 0 || (c as HTMLElement).children.length > 2) return;
|
|
217
|
+
const fs = parseFloat(getComputedStyle(c).fontSize);
|
|
218
|
+
if (fs > bestSize) { bestEl = c; bestSize = fs; }
|
|
219
|
+
});
|
|
220
|
+
el = bestEl;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
} else {
|
|
224
|
+
el = document.querySelector(selector);
|
|
225
|
+
}
|
|
226
|
+
if (!el) { result[name] = null; continue; }
|
|
227
|
+
const cs = getComputedStyle(el);
|
|
228
|
+
const rect = el.getBoundingClientRect();
|
|
229
|
+
result[name] = {
|
|
230
|
+
tag: el.tagName.toLowerCase(),
|
|
231
|
+
classes: Array.from(el.classList),
|
|
232
|
+
text: (el as HTMLElement).innerText?.slice(0, 200) || '',
|
|
233
|
+
ariaLabel: (el as HTMLElement).getAttribute('aria-label') || undefined,
|
|
234
|
+
dataTestId: (el as HTMLElement).getAttribute('data-testid') || undefined,
|
|
235
|
+
role: (el as HTMLElement).getAttribute('role') || undefined,
|
|
236
|
+
children: el.children.length,
|
|
237
|
+
rect: { x: Math.round(rect.x), y: Math.round(rect.y), width: Math.round(rect.width), height: Math.round(rect.height) },
|
|
238
|
+
styles: {
|
|
239
|
+
backgroundColor: cs.backgroundColor,
|
|
240
|
+
backgroundImage: cs.backgroundImage,
|
|
241
|
+
color: cs.color,
|
|
242
|
+
fontFamily: cs.fontFamily,
|
|
243
|
+
fontSize: cs.fontSize,
|
|
244
|
+
fontWeight: cs.fontWeight,
|
|
245
|
+
lineHeight: cs.lineHeight,
|
|
246
|
+
letterSpacing: cs.letterSpacing,
|
|
247
|
+
textAlign: cs.textAlign,
|
|
248
|
+
padding: cs.padding,
|
|
249
|
+
margin: cs.margin,
|
|
250
|
+
borderRadius: cs.borderRadius,
|
|
251
|
+
border: cs.border,
|
|
252
|
+
boxShadow: cs.boxShadow,
|
|
253
|
+
width: cs.width,
|
|
254
|
+
height: cs.height,
|
|
255
|
+
minHeight: cs.minHeight,
|
|
256
|
+
maxWidth: cs.maxWidth,
|
|
257
|
+
display: cs.display,
|
|
258
|
+
gap: cs.gap,
|
|
259
|
+
gridTemplateColumns: cs.gridTemplateColumns,
|
|
260
|
+
flexDirection: cs.flexDirection,
|
|
261
|
+
alignItems: cs.alignItems,
|
|
262
|
+
justifyContent: cs.justifyContent,
|
|
263
|
+
position: cs.position,
|
|
264
|
+
overflow: cs.overflow,
|
|
265
|
+
transition: cs.transition,
|
|
266
|
+
opacity: cs.opacity,
|
|
267
|
+
textTransform: cs.textTransform,
|
|
268
|
+
textDecoration: cs.textDecoration,
|
|
269
|
+
fontFeatureSettings: cs.fontFeatureSettings,
|
|
270
|
+
fontVariationSettings: (cs as any).fontVariationSettings || 'normal',
|
|
271
|
+
},
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
return result;
|
|
275
|
+
}, KEY_SELECTORS);
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
async function extractCSSCustomProperties(page: Page): Promise<Record<string, string>> {
|
|
279
|
+
return page.evaluate(() => {
|
|
280
|
+
const vars: Record<string, string> = {};
|
|
281
|
+
// From :root / html
|
|
282
|
+
const rootStyles = getComputedStyle(document.documentElement);
|
|
283
|
+
for (let i = 0; i < rootStyles.length; i++) {
|
|
284
|
+
const prop = rootStyles[i];
|
|
285
|
+
if (prop.startsWith('--')) {
|
|
286
|
+
vars[prop] = rootStyles.getPropertyValue(prop).trim();
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
// v4-V1-T5: Also scan body + main sections for scoped CSS vars
|
|
290
|
+
// (Tailwind/shadcn often define --vars on .dark, body, or section scopes)
|
|
291
|
+
const scopedRoots = [
|
|
292
|
+
document.body,
|
|
293
|
+
document.querySelector('main'),
|
|
294
|
+
document.querySelector('[data-theme]'),
|
|
295
|
+
document.querySelector('.dark, [class*="dark"]'),
|
|
296
|
+
document.querySelector('header, nav'),
|
|
297
|
+
].filter(Boolean) as HTMLElement[];
|
|
298
|
+
scopedRoots.forEach(el => {
|
|
299
|
+
const sc = getComputedStyle(el);
|
|
300
|
+
for (let i = 0; i < sc.length; i++) {
|
|
301
|
+
const prop = sc[i];
|
|
302
|
+
if (prop.startsWith('--') && !(prop in vars)) {
|
|
303
|
+
vars[prop] = sc.getPropertyValue(prop).trim();
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
});
|
|
307
|
+
// Also check stylesheets for CSS variables (extended selectors)
|
|
308
|
+
try {
|
|
309
|
+
for (const sheet of document.styleSheets) {
|
|
310
|
+
try {
|
|
311
|
+
for (const rule of sheet.cssRules) {
|
|
312
|
+
if (rule instanceof CSSStyleRule && (
|
|
313
|
+
rule.selectorText === ':root' ||
|
|
314
|
+
rule.selectorText === 'html' ||
|
|
315
|
+
rule.selectorText === 'body' ||
|
|
316
|
+
rule.selectorText === '*' ||
|
|
317
|
+
/\.dark\b/.test(rule.selectorText) ||
|
|
318
|
+
/\[data-theme/.test(rule.selectorText)
|
|
319
|
+
)) {
|
|
320
|
+
for (let i = 0; i < rule.style.length; i++) {
|
|
321
|
+
const prop = rule.style[i];
|
|
322
|
+
if (prop.startsWith('--') && !(prop in vars)) {
|
|
323
|
+
vars[prop] = rule.style.getPropertyValue(prop).trim();
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
} catch { /* cross-origin stylesheet, skip */ }
|
|
329
|
+
}
|
|
330
|
+
} catch { /* no stylesheets accessible */ }
|
|
331
|
+
return vars;
|
|
332
|
+
});
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
async function extractAllColors(page: Page): Promise<string[]> {
|
|
336
|
+
return page.evaluate(() => {
|
|
337
|
+
const colorSet = new Set<string>();
|
|
338
|
+
const MAX = 3000;
|
|
339
|
+
const all = Array.from(document.querySelectorAll('*'));
|
|
340
|
+
// Prioritize visible elements, fill remaining with rest — up to MAX
|
|
341
|
+
const visible = all.filter(el => {
|
|
342
|
+
const r = (el as HTMLElement).getBoundingClientRect();
|
|
343
|
+
return r.width > 0 && r.height > 0;
|
|
344
|
+
});
|
|
345
|
+
const sample = visible.length >= MAX
|
|
346
|
+
? visible.slice(0, MAX)
|
|
347
|
+
: [...visible, ...all.filter(el => !visible.includes(el))].slice(0, MAX);
|
|
348
|
+
|
|
349
|
+
const props = ['backgroundColor', 'color', 'borderColor', 'borderTopColor', 'outlineColor', 'caretColor', 'columnRuleColor', 'fill', 'stroke'] as const;
|
|
350
|
+
|
|
351
|
+
// Phase 5.1.1 — Inline multi-value parsing (avoids tsx __name emission inside page.evaluate)
|
|
352
|
+
// borderColor can be "rgb(A) rgb(B) rgb(C) rgb(D)" when sides differ. If mixed, drop entirely.
|
|
353
|
+
const COLOR_REGEX = /rgba?\([^)]+\)|#[0-9a-fA-F]{3,8}/g;
|
|
354
|
+
|
|
355
|
+
sample.forEach(el => {
|
|
356
|
+
const cs = getComputedStyle(el);
|
|
357
|
+
props.forEach(prop => {
|
|
358
|
+
const val = cs[prop as keyof CSSStyleDeclaration] as string;
|
|
359
|
+
if (!val || val === 'rgba(0, 0, 0, 0)' || val === 'transparent') return;
|
|
360
|
+
// Parse multi-color inline
|
|
361
|
+
const matches = val.match(COLOR_REGEX) || [];
|
|
362
|
+
if (matches.length === 0) {
|
|
363
|
+
if (val.startsWith('#') || val.startsWith('rgb')) colorSet.add(val);
|
|
364
|
+
} else if (matches.length === 1) {
|
|
365
|
+
colorSet.add(matches[0]);
|
|
366
|
+
} else {
|
|
367
|
+
const uniq = Array.from(new Set(matches));
|
|
368
|
+
if (uniq.length === 1) colorSet.add(uniq[0]);
|
|
369
|
+
// Mixed-side borderColor → drop (no canonical color)
|
|
370
|
+
}
|
|
371
|
+
});
|
|
372
|
+
|
|
373
|
+
['::before', '::after'].forEach(pseudo => {
|
|
374
|
+
const pcs = getComputedStyle(el, pseudo);
|
|
375
|
+
if (pcs.content && pcs.content !== 'none' && pcs.content !== 'normal') {
|
|
376
|
+
props.forEach(prop => {
|
|
377
|
+
const val = pcs[prop as keyof CSSStyleDeclaration] as string;
|
|
378
|
+
if (!val || val === 'rgba(0, 0, 0, 0)' || val === 'transparent') return;
|
|
379
|
+
const matches = val.match(COLOR_REGEX) || [];
|
|
380
|
+
if (matches.length === 0) {
|
|
381
|
+
if (val.startsWith('#') || val.startsWith('rgb')) colorSet.add(val);
|
|
382
|
+
} else if (matches.length === 1) {
|
|
383
|
+
colorSet.add(matches[0]);
|
|
384
|
+
} else {
|
|
385
|
+
const uniq = Array.from(new Set(matches));
|
|
386
|
+
if (uniq.length === 1) colorSet.add(uniq[0]);
|
|
387
|
+
}
|
|
388
|
+
});
|
|
389
|
+
}
|
|
390
|
+
});
|
|
391
|
+
});
|
|
392
|
+
return [...colorSet];
|
|
393
|
+
});
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
/**
|
|
397
|
+
* Phase 5.1.3 — Detect anti-bot challenge pages BEFORE extraction.
|
|
398
|
+
* Cloudflare, captchas, "Just a moment", and similar interstitials would otherwise
|
|
399
|
+
* be extracted as if they were the real site, producing nonsense tokens.
|
|
400
|
+
*/
|
|
401
|
+
async function detectBotChallenge(page: Page): Promise<{ blocked: boolean; reason?: string }> {
|
|
402
|
+
return page.evaluate(() => {
|
|
403
|
+
const title = (document.title || '').toLowerCase();
|
|
404
|
+
const bodyText = (document.body?.textContent || '').slice(0, 500).toLowerCase();
|
|
405
|
+
const html = (document.documentElement?.outerHTML || '').slice(0, 2000).toLowerCase();
|
|
406
|
+
|
|
407
|
+
const SIGNATURES = [
|
|
408
|
+
{ pattern: /just a moment\.\.\.|checking your browser/i, reason: 'Cloudflare interstitial' },
|
|
409
|
+
{ pattern: /verify you are human|are you a robot/i, reason: 'Human verification challenge' },
|
|
410
|
+
{ pattern: /captcha|recaptcha|hcaptcha/i, reason: 'CAPTCHA detected' },
|
|
411
|
+
{ pattern: /access denied|forbidden|403/i, reason: 'Access denied (403)' },
|
|
412
|
+
{ pattern: /enable javascript and cookies|please enable cookies/i, reason: 'JS/cookies required gate' },
|
|
413
|
+
{ pattern: /ddos protection|protection by/i, reason: 'DDoS protection page' },
|
|
414
|
+
];
|
|
415
|
+
|
|
416
|
+
for (const sig of SIGNATURES) {
|
|
417
|
+
if (sig.pattern.test(title) || sig.pattern.test(bodyText) || sig.pattern.test(html.slice(0, 1000))) {
|
|
418
|
+
return { blocked: true, reason: sig.reason };
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
// Heuristic: page with <100 chars of visible text AND <5 elements = suspicious
|
|
423
|
+
const visibleText = (document.body?.innerText || '').trim();
|
|
424
|
+
const elementCount = document.body?.children?.length || 0;
|
|
425
|
+
if (visibleText.length < 100 && elementCount < 5) {
|
|
426
|
+
return { blocked: true, reason: `Suspicious empty page (${visibleText.length} chars, ${elementCount} elements)` };
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
return { blocked: false };
|
|
430
|
+
});
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
async function extractAllFonts(page: Page): Promise<{ families: string[]; sizes: string[] }> {
|
|
434
|
+
return page.evaluate(() => {
|
|
435
|
+
const families = new Set<string>();
|
|
436
|
+
const sizes = new Set<string>();
|
|
437
|
+
const elements = document.querySelectorAll('*');
|
|
438
|
+
const sample = Array.from(elements).slice(0, 3000);
|
|
439
|
+
sample.forEach(el => {
|
|
440
|
+
const cs = getComputedStyle(el);
|
|
441
|
+
families.add(cs.fontFamily);
|
|
442
|
+
sizes.add(cs.fontSize);
|
|
443
|
+
});
|
|
444
|
+
return { families: [...families], sizes: [...sizes] };
|
|
445
|
+
});
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
async function extractAllBorderRadii(page: Page): Promise<string[]> {
|
|
449
|
+
return page.evaluate(() => {
|
|
450
|
+
const radii = new Set<string>();
|
|
451
|
+
const elements = document.querySelectorAll('*');
|
|
452
|
+
const sample = Array.from(elements).slice(0, 3000);
|
|
453
|
+
sample.forEach(el => {
|
|
454
|
+
const cs = getComputedStyle(el);
|
|
455
|
+
if (cs.borderRadius && cs.borderRadius !== '0px') {
|
|
456
|
+
radii.add(cs.borderRadius);
|
|
457
|
+
}
|
|
458
|
+
});
|
|
459
|
+
return [...radii];
|
|
460
|
+
});
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
async function extractAllShadows(page: Page): Promise<string[]> {
|
|
464
|
+
return page.evaluate(() => {
|
|
465
|
+
const shadows = new Set<string>();
|
|
466
|
+
const elements = document.querySelectorAll('*');
|
|
467
|
+
const sample = Array.from(elements).slice(0, 3000);
|
|
468
|
+
sample.forEach(el => {
|
|
469
|
+
const cs = getComputedStyle(el);
|
|
470
|
+
if (cs.boxShadow && cs.boxShadow !== 'none') {
|
|
471
|
+
shadows.add(cs.boxShadow);
|
|
472
|
+
}
|
|
473
|
+
});
|
|
474
|
+
return [...shadows];
|
|
475
|
+
});
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
async function extractAllTransitions(page: Page): Promise<string[]> {
|
|
479
|
+
return page.evaluate(() => {
|
|
480
|
+
const transitions = new Set<string>();
|
|
481
|
+
const elements = document.querySelectorAll('*');
|
|
482
|
+
const sample = Array.from(elements).slice(0, 1000);
|
|
483
|
+
sample.forEach(el => {
|
|
484
|
+
const cs = getComputedStyle(el);
|
|
485
|
+
if (cs.transition && cs.transition !== 'all 0s ease 0s' && cs.transition !== 'none 0s ease 0s') {
|
|
486
|
+
transitions.add(cs.transition);
|
|
487
|
+
}
|
|
488
|
+
});
|
|
489
|
+
return [...transitions];
|
|
490
|
+
});
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
// ── Component Variants (all unique styles per component type) ───────
|
|
494
|
+
|
|
495
|
+
async function extractComponentVariants(page: Page): Promise<Record<string, ComponentVariant[]>> {
|
|
496
|
+
return page.evaluate(() => {
|
|
497
|
+
const variantSelectors: Record<string, string> = {
|
|
498
|
+
// ── Core interactive ──
|
|
499
|
+
// v4-V1-T1: Extended button selector covers Tailwind utility CSS + ARIA + handlers
|
|
500
|
+
// (sites without semantic .btn class like utility-first frameworks now detected)
|
|
501
|
+
buttons: 'button, [class*="btn"], [role="button"], a[class*="button"], input[type="submit"], input[type="button"], a[class*="bg-"][href], a[class*="-cta"], button[class*="bg-"], [onclick]:not(div):not(span):not(li), [data-action="button"], a[aria-label][href][class*="px-"]',
|
|
502
|
+
inputs: 'input, textarea, select',
|
|
503
|
+
searchBar: '[role="search"], [class*="search-bar"], [class*="searchbar"], [class*="search-form"]',
|
|
504
|
+
// ── Content blocks ──
|
|
505
|
+
cards: '[class*="card"], article, [class*="tile"]',
|
|
506
|
+
badges: '[class*="badge"], [class*="tag"], [class*="chip"], [class*="label"]:not(label)',
|
|
507
|
+
statusBadge: '[class*="status"], [class*="pill"], [class*="indicator"], [class*="dot"][class*="color"], [data-status]',
|
|
508
|
+
// ── Navigation & structure ──
|
|
509
|
+
navLinks: 'nav a, [class*="nav"] a, [class*="menu"] a, [role="navigation"] a, header a',
|
|
510
|
+
tabs: '[role="tablist"], [role="tab"], [class*="tabs"], [class*="tab-bar"], [class*="tab-nav"]',
|
|
511
|
+
footerLinks: 'footer a, [role="contentinfo"] a',
|
|
512
|
+
// ── Marketing sections ──
|
|
513
|
+
pricingCard: '[class*="pricing"], [class*="price-card"], [class*="plan-card"], [class*="plan-tile"], [class*="tier"]',
|
|
514
|
+
ctaBanner: '[class*="cta"], [class*="call-to-action"], [class*="banner-cta"], [class*="promo-banner"]',
|
|
515
|
+
testimonial: '[class*="testimonial"], [class*="review-card"], [class*="quote-block"], [class*="customer-story"]',
|
|
516
|
+
logoTile: '[class*="logo-grid"] img, [class*="logos"] img, [class*="customer-logo"], [class*="partner-logo"], [class*="brand-logo"]',
|
|
517
|
+
// ── Typography roles ──
|
|
518
|
+
headingH1: 'h1',
|
|
519
|
+
headingH2: 'h2',
|
|
520
|
+
headingH3: 'h3',
|
|
521
|
+
headingH4: 'h4',
|
|
522
|
+
headingH5: 'h5',
|
|
523
|
+
headingH6: 'h6',
|
|
524
|
+
links: 'a',
|
|
525
|
+
eyebrowLabels: '[class*="eyebrow"], [class*="overline"], [class*="kicker"], [class*="label--small"], [class*="meta"], [class*="subtitle"]',
|
|
526
|
+
captions: 'figcaption, caption, [class*="caption"], [class*="helper-text"], [class*="supporting"]',
|
|
527
|
+
tableHeaders: 'th, thead td',
|
|
528
|
+
// ── Misc UI ──
|
|
529
|
+
avatar: '[class*="avatar"], [class*="profile-pic"], [class*="user-pic"]',
|
|
530
|
+
divider: 'hr, [class*="divider"], [class*="separator"]',
|
|
531
|
+
tooltip: '[role="tooltip"], [class*="tooltip"]',
|
|
532
|
+
};
|
|
533
|
+
|
|
534
|
+
// Site-specific selectors — only injected when found in DOM (avoids noise on non-matching sites)
|
|
535
|
+
const siteSpecificSelectors: Record<string, string> = {
|
|
536
|
+
// Travel / booking
|
|
537
|
+
datePicker: '[class*="date-picker"], [class*="datepicker"], [class*="calendar"], [data-testid*="date"]',
|
|
538
|
+
reservationCard: '[class*="reservation"], [class*="booking"], [class*="checkout-panel"]',
|
|
539
|
+
propertyCard: '[class*="property-card"], [class*="listing-card"], [class*="stay-card"]',
|
|
540
|
+
ratingDisplay: '[class*="rating"], [class*="review-score"], [class*="star-rating"]',
|
|
541
|
+
hostCard: '[class*="host-card"], [class*="host-info"], [class*="profile-card"]',
|
|
542
|
+
// SaaS / dev tools
|
|
543
|
+
codeBlock: 'pre, code, [class*="code-block"], [class*="syntax"], [class*="prism"]',
|
|
544
|
+
changelogRow: '[class*="changelog"], [class*="release"], [class*="release-note"], [class*="version-row"]',
|
|
545
|
+
breadcrumb: '[aria-label="breadcrumb"], [class*="breadcrumb"], nav[class*="crumb"]',
|
|
546
|
+
alert: '[role="alert"], [class*="alert"], [class*="notification-bar"], [class*="toast"]',
|
|
547
|
+
commandPalette: '[class*="command-palette"], [class*="CommandPalette"], [class*="cmdk-root"], [role="dialog"][class*="search"], [class*="CommandDialog"]',
|
|
548
|
+
dataTable: 'table[class*="data"], [class*="data-table"], [class*="DataTable"], [class*="DataGrid"], [role="grid"]',
|
|
549
|
+
accordion: '[class*="accordion"], [class*="Accordion"], details:has(summary), [role="region"][class*="collapsible"]',
|
|
550
|
+
skeleton: '[class*="skeleton"], [class*="Skeleton"], [class*="shimmer"], [class*="loading-placeholder"]',
|
|
551
|
+
progressBar: '[role="progressbar"], [class*="progress-bar"], [class*="ProgressBar"], [class*="progress-track"]',
|
|
552
|
+
emptyState: '[class*="empty-state"], [class*="EmptyState"], [class*="empty-placeholder"], [class*="no-results"]',
|
|
553
|
+
kpiCard: '[class*="metric-card"], [class*="stat-card"], [class*="KpiCard"], [class*="StatCard"], [class*="stats-card"]',
|
|
554
|
+
timelinePill: '[class*="timeline"], [class*="Timeline"], [class*="agent-trace"], [class*="AgentTrace"]',
|
|
555
|
+
// Ecom
|
|
556
|
+
productCard: '[class*="product-card"], [class*="product-tile"], [class*="sku-card"]',
|
|
557
|
+
priceTag: '[class*="price"], [class*="cost"], [itemprop="price"]',
|
|
558
|
+
};
|
|
559
|
+
for (const [name, sel] of Object.entries(siteSpecificSelectors)) {
|
|
560
|
+
if (document.querySelectorAll(sel).length > 0) (variantSelectors as Record<string, string>)[name] = sel;
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
const result: Record<string, any[]> = {};
|
|
564
|
+
|
|
565
|
+
for (const [name, selector] of Object.entries(variantSelectors)) {
|
|
566
|
+
const elems = document.querySelectorAll(selector);
|
|
567
|
+
const seen = new Set<string>();
|
|
568
|
+
const variants: any[] = [];
|
|
569
|
+
|
|
570
|
+
const els = Array.from(elems).slice(0, 50);
|
|
571
|
+
for (const el of els) {
|
|
572
|
+
const rect = el.getBoundingClientRect();
|
|
573
|
+
if (rect.width < 5 || rect.height < 5) continue;
|
|
574
|
+
|
|
575
|
+
const cs = getComputedStyle(el);
|
|
576
|
+
// Inline fingerprint
|
|
577
|
+
const fp = [
|
|
578
|
+
cs.backgroundColor, cs.color, cs.fontSize, cs.fontWeight,
|
|
579
|
+
cs.fontFamily, cs.borderRadius, cs.padding, cs.border,
|
|
580
|
+
cs.boxShadow, cs.display, cs.height,
|
|
581
|
+
].join('|');
|
|
582
|
+
if (seen.has(fp)) continue;
|
|
583
|
+
seen.add(fp);
|
|
584
|
+
|
|
585
|
+
// Inline extractStyles
|
|
586
|
+
variants.push({
|
|
587
|
+
tag: el.tagName.toLowerCase(),
|
|
588
|
+
classes: Array.from(el.classList),
|
|
589
|
+
text: (el as HTMLElement).innerText?.trim().slice(0, 100) || '',
|
|
590
|
+
rect: { x: Math.round(rect.x), y: Math.round(rect.y), width: Math.round(rect.width), height: Math.round(rect.height) },
|
|
591
|
+
styles: {
|
|
592
|
+
backgroundColor: cs.backgroundColor,
|
|
593
|
+
color: cs.color,
|
|
594
|
+
fontFamily: cs.fontFamily,
|
|
595
|
+
fontSize: cs.fontSize,
|
|
596
|
+
fontWeight: cs.fontWeight,
|
|
597
|
+
lineHeight: cs.lineHeight,
|
|
598
|
+
letterSpacing: cs.letterSpacing,
|
|
599
|
+
padding: cs.padding,
|
|
600
|
+
margin: cs.margin,
|
|
601
|
+
borderRadius: cs.borderRadius,
|
|
602
|
+
border: cs.border,
|
|
603
|
+
boxShadow: cs.boxShadow,
|
|
604
|
+
width: cs.width,
|
|
605
|
+
height: cs.height,
|
|
606
|
+
maxWidth: cs.maxWidth,
|
|
607
|
+
display: cs.display,
|
|
608
|
+
gap: cs.gap,
|
|
609
|
+
gridTemplateColumns: cs.gridTemplateColumns,
|
|
610
|
+
flexDirection: cs.flexDirection,
|
|
611
|
+
alignItems: cs.alignItems,
|
|
612
|
+
justifyContent: cs.justifyContent,
|
|
613
|
+
position: cs.position,
|
|
614
|
+
overflow: cs.overflow,
|
|
615
|
+
transition: cs.transition,
|
|
616
|
+
opacity: cs.opacity,
|
|
617
|
+
textTransform: cs.textTransform,
|
|
618
|
+
textDecoration: cs.textDecoration,
|
|
619
|
+
fontFeatureSettings: cs.fontFeatureSettings,
|
|
620
|
+
fontVariationSettings: (cs as any).fontVariationSettings || 'normal',
|
|
621
|
+
},
|
|
622
|
+
});
|
|
623
|
+
if (variants.length >= 10) break;
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
if (variants.length > 0) {
|
|
627
|
+
result[name] = variants;
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
return result;
|
|
632
|
+
});
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
// ── Component States (:hover, :focus) ──────────────────────
|
|
636
|
+
// Note: `:active` not supported — UA state unreachable via Playwright dispatchEvent
|
|
637
|
+
|
|
638
|
+
interface ComponentStateStyles {
|
|
639
|
+
default: Record<string, string>;
|
|
640
|
+
hover?: Record<string, string>;
|
|
641
|
+
focus?: Record<string, string>;
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
const STATE_PROPS = [
|
|
645
|
+
'backgroundColor', 'color', 'border', 'borderColor', 'boxShadow',
|
|
646
|
+
'opacity', 'transform', 'outline', 'textDecoration', 'cursor',
|
|
647
|
+
] as const;
|
|
648
|
+
|
|
649
|
+
async function extractComponentStates(page: Page): Promise<Record<string, ComponentStateStyles>> {
|
|
650
|
+
const result: Record<string, ComponentStateStyles> = {};
|
|
651
|
+
|
|
652
|
+
// Phase 5 Sprint 80/20 — `:active` retiré (dispatchEvent('mousedown') ne déclenche
|
|
653
|
+
// PAS l'état UA `:active` — les données étaient identiques au default, inutile.
|
|
654
|
+
// Voir audit du 2026-04-18.
|
|
655
|
+
const targets: { name: string; selector: string; states: ('hover' | 'focus')[] }[] = [
|
|
656
|
+
{ name: 'button', selector: 'button:not([disabled]), [class*="btn"]:not([disabled]), a[class*="button"]', states: ['hover', 'focus'] },
|
|
657
|
+
{ name: 'input', selector: 'input[type="text"], input[type="search"], input[type="email"]', states: ['focus'] },
|
|
658
|
+
{ name: 'link', selector: 'a:not([class*="btn"]):not([class*="button"])', states: ['hover'] },
|
|
659
|
+
{ name: 'card', selector: '[class*="card"]:not(body), article, [class*="tile"]', states: ['hover'] },
|
|
660
|
+
{ name: 'navLink', selector: 'nav a, header a, [role="navigation"] a', states: ['hover'] },
|
|
661
|
+
{ name: 'tab', selector: '[role="tab"], [class*="tab-item"], [class*="tab-link"]', states: ['hover'] },
|
|
662
|
+
{ name: 'badge', selector: '[class*="badge"], [class*="tag"], [class*="chip"]', states: ['hover'] },
|
|
663
|
+
{ name: 'footerLink', selector: 'footer a, [role="contentinfo"] a', states: ['hover'] },
|
|
664
|
+
];
|
|
665
|
+
|
|
666
|
+
for (const target of targets) {
|
|
667
|
+
try {
|
|
668
|
+
// Get default styles first
|
|
669
|
+
const defaultStyles = await page.evaluate((selector) => {
|
|
670
|
+
const el = document.querySelector(selector);
|
|
671
|
+
if (!el) return null;
|
|
672
|
+
const cs = getComputedStyle(el);
|
|
673
|
+
const props = ['backgroundColor', 'color', 'border', 'borderColor', 'boxShadow',
|
|
674
|
+
'opacity', 'transform', 'outline', 'textDecoration', 'cursor'];
|
|
675
|
+
const result: Record<string, string> = {};
|
|
676
|
+
for (const p of props) result[p] = (cs as any)[p];
|
|
677
|
+
return result;
|
|
678
|
+
}, target.selector);
|
|
679
|
+
|
|
680
|
+
if (!defaultStyles) continue;
|
|
681
|
+
|
|
682
|
+
const stateStyles: ComponentStateStyles = { default: defaultStyles };
|
|
683
|
+
|
|
684
|
+
for (const state of target.states) {
|
|
685
|
+
try {
|
|
686
|
+
let stateResult: Record<string, string> | null = null;
|
|
687
|
+
|
|
688
|
+
if (state === 'hover') {
|
|
689
|
+
// Use Playwright native hover
|
|
690
|
+
await page.hover(target.selector, { timeout: 2000 });
|
|
691
|
+
stateResult = await page.evaluate((selector) => {
|
|
692
|
+
const el = document.querySelector(selector);
|
|
693
|
+
if (!el) return null;
|
|
694
|
+
const cs = getComputedStyle(el);
|
|
695
|
+
const props = ['backgroundColor', 'color', 'border', 'borderColor', 'boxShadow',
|
|
696
|
+
'opacity', 'transform', 'outline', 'textDecoration', 'cursor'];
|
|
697
|
+
const result: Record<string, string> = {};
|
|
698
|
+
for (const p of props) result[p] = (cs as any)[p];
|
|
699
|
+
return result;
|
|
700
|
+
}, target.selector);
|
|
701
|
+
// Move away to reset
|
|
702
|
+
await page.mouse.move(0, 0);
|
|
703
|
+
} else if (state === 'focus') {
|
|
704
|
+
await page.focus(target.selector);
|
|
705
|
+
stateResult = await page.evaluate((selector) => {
|
|
706
|
+
const el = document.querySelector(selector) as HTMLElement;
|
|
707
|
+
if (!el) return null;
|
|
708
|
+
el.focus();
|
|
709
|
+
const cs = getComputedStyle(el);
|
|
710
|
+
const props = ['backgroundColor', 'color', 'border', 'borderColor', 'boxShadow',
|
|
711
|
+
'opacity', 'transform', 'outline', 'textDecoration', 'cursor'];
|
|
712
|
+
const result: Record<string, string> = {};
|
|
713
|
+
for (const p of props) result[p] = (cs as any)[p];
|
|
714
|
+
return result;
|
|
715
|
+
}, target.selector);
|
|
716
|
+
await page.evaluate(() => { (document.activeElement as HTMLElement)?.blur?.(); });
|
|
717
|
+
}
|
|
718
|
+
// Note: `:active` state intentionally not extracted — requires real pointer
|
|
719
|
+
// events not available via Playwright (dispatchEvent doesn't trigger UA state).
|
|
720
|
+
|
|
721
|
+
if (stateResult) {
|
|
722
|
+
// Only keep properties that CHANGED vs default
|
|
723
|
+
const changed: Record<string, string> = {};
|
|
724
|
+
for (const [k, v] of Object.entries(stateResult)) {
|
|
725
|
+
if (v !== defaultStyles[k]) changed[k] = v;
|
|
726
|
+
}
|
|
727
|
+
if (Object.keys(changed).length > 0) {
|
|
728
|
+
stateStyles[state] = stateResult;
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
} catch (err) {
|
|
732
|
+
// State extraction failed — site may block interaction (CSP, intercepted event)
|
|
733
|
+
if (process.env.CLONE_LOG_LEVEL === 'debug') {
|
|
734
|
+
console.log(` ⚠️ ${target.name}:${state} failed: ${(err as Error).message.slice(0, 80)}`);
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
result[target.name] = stateStyles;
|
|
740
|
+
} catch (err) {
|
|
741
|
+
// Element not found for target.selector — common, site may not have this component
|
|
742
|
+
if (process.env.CLONE_LOG_LEVEL === 'debug') {
|
|
743
|
+
console.log(` ⚠️ ${target.name} skipped: ${(err as Error).message.slice(0, 80)}`);
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
return result;
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
// ── OpenType Features & Variable Font Axes ──────────────────────────
|
|
752
|
+
|
|
753
|
+
async function extractOpenTypeFeatures(page: Page): Promise<{ features: string[]; axes: string[] }> {
|
|
754
|
+
return page.evaluate(() => {
|
|
755
|
+
const featureSet = new Set<string>();
|
|
756
|
+
const axisSet = new Set<string>();
|
|
757
|
+
|
|
758
|
+
// Scan all elements for font-feature-settings and font-variation-settings
|
|
759
|
+
const elements = Array.from(document.querySelectorAll('*'));
|
|
760
|
+
for (const el of elements) {
|
|
761
|
+
const cs = getComputedStyle(el);
|
|
762
|
+
const featureVal = cs.fontFeatureSettings;
|
|
763
|
+
const variationVal = (cs as any).fontVariationSettings;
|
|
764
|
+
|
|
765
|
+
if (featureVal && featureVal !== 'normal') {
|
|
766
|
+
// Parse "ss01" on, "kern" 1, "liga" 0 etc.
|
|
767
|
+
const matches = featureVal.match(/"([a-z0-9]{4})"/gi);
|
|
768
|
+
if (matches) {
|
|
769
|
+
for (const m of matches) featureSet.add(m.replace(/"/g, '').toLowerCase());
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
if (variationVal && variationVal !== 'normal') {
|
|
774
|
+
// Parse "wght" 400, "wdth" 75 etc.
|
|
775
|
+
const matches = variationVal.match(/"([A-Z]{4})"/gi);
|
|
776
|
+
if (matches) {
|
|
777
|
+
for (const m of matches) axisSet.add(m.replace(/"/g, '').toUpperCase());
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
// Also scan CSS rules in stylesheets
|
|
783
|
+
try {
|
|
784
|
+
for (const sheet of Array.from(document.styleSheets)) {
|
|
785
|
+
try {
|
|
786
|
+
const rules = Array.from(sheet.cssRules || []);
|
|
787
|
+
for (const rule of rules) {
|
|
788
|
+
const text = (rule as CSSStyleRule).cssText || '';
|
|
789
|
+
const featureMatches = text.match(/font-feature-settings\s*:\s*([^;]+)/gi);
|
|
790
|
+
if (featureMatches) {
|
|
791
|
+
for (const fm of featureMatches) {
|
|
792
|
+
const tags = fm.match(/"([a-z0-9]{4})"/gi);
|
|
793
|
+
if (tags) tags.forEach(t => featureSet.add(t.replace(/"/g, '').toLowerCase()));
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
const variationMatches = text.match(/font-variation-settings\s*:\s*([^;]+)/gi);
|
|
797
|
+
if (variationMatches) {
|
|
798
|
+
for (const vm of variationMatches) {
|
|
799
|
+
const tags = vm.match(/"([A-Z]{4})"/gi);
|
|
800
|
+
if (tags) tags.forEach(t => axisSet.add(t.replace(/"/g, '').toUpperCase()));
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
} catch { /* cross-origin sheet */ }
|
|
805
|
+
}
|
|
806
|
+
} catch { /* security error */ }
|
|
807
|
+
|
|
808
|
+
return {
|
|
809
|
+
features: Array.from(featureSet),
|
|
810
|
+
axes: Array.from(axisSet),
|
|
811
|
+
};
|
|
812
|
+
});
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
// ── Font-face declarations ──────────────────────────────────────────
|
|
816
|
+
|
|
817
|
+
async function extractFontFaces(page: Page): Promise<FontFaceDeclaration[]> {
|
|
818
|
+
return page.evaluate(() => {
|
|
819
|
+
const fonts: Array<{ family: string; src: string; weight: string; style: string; display: string }> = [];
|
|
820
|
+
const seen = new Set<string>();
|
|
821
|
+
|
|
822
|
+
// Method 1: document.fonts API (if available)
|
|
823
|
+
try {
|
|
824
|
+
if ('fonts' in document) {
|
|
825
|
+
(document as any).fonts.forEach((font: FontFace) => {
|
|
826
|
+
const key = `${font.family}|${font.weight}|${font.style}`;
|
|
827
|
+
if (seen.has(key)) return;
|
|
828
|
+
seen.add(key);
|
|
829
|
+
// font.src can be a URL string or local() reference
|
|
830
|
+
let src = '';
|
|
831
|
+
try {
|
|
832
|
+
src = (font as any).src || '';
|
|
833
|
+
// Clean up the src — it's often a CSS url() value
|
|
834
|
+
if (typeof src === 'string') {
|
|
835
|
+
const urlMatch = src.match(/url\(["']?([^"')]+)["']?\)/);
|
|
836
|
+
if (urlMatch) src = urlMatch[1];
|
|
837
|
+
}
|
|
838
|
+
} catch { /* ignore */ }
|
|
839
|
+
fonts.push({
|
|
840
|
+
family: font.family.replace(/['"]/g, ''),
|
|
841
|
+
src,
|
|
842
|
+
weight: font.weight || 'normal',
|
|
843
|
+
style: font.style || 'normal',
|
|
844
|
+
display: (font as any).display || 'auto',
|
|
845
|
+
});
|
|
846
|
+
});
|
|
847
|
+
}
|
|
848
|
+
} catch { /* fonts API not supported */ }
|
|
849
|
+
|
|
850
|
+
// Method 2: Parse @font-face rules from stylesheets
|
|
851
|
+
try {
|
|
852
|
+
for (const sheet of document.styleSheets) {
|
|
853
|
+
try {
|
|
854
|
+
for (const rule of sheet.cssRules) {
|
|
855
|
+
if (rule instanceof CSSFontFaceRule) {
|
|
856
|
+
const family = rule.style.getPropertyValue('font-family').replace(/['"]/g, '').trim();
|
|
857
|
+
const src = rule.style.getPropertyValue('src');
|
|
858
|
+
const weight = rule.style.getPropertyValue('font-weight') || 'normal';
|
|
859
|
+
const style = rule.style.getPropertyValue('font-style') || 'normal';
|
|
860
|
+
const display = rule.style.getPropertyValue('font-display') || 'auto';
|
|
861
|
+
const key = `${family}|${weight}|${style}`;
|
|
862
|
+
if (seen.has(key)) continue;
|
|
863
|
+
seen.add(key);
|
|
864
|
+
// Extract actual URLs from the src property
|
|
865
|
+
const urls: string[] = [];
|
|
866
|
+
const urlMatches = src.matchAll(/url\(["']?([^"')]+)["']?\)/g);
|
|
867
|
+
for (const m of urlMatches) {
|
|
868
|
+
urls.push(m[1]);
|
|
869
|
+
}
|
|
870
|
+
fonts.push({
|
|
871
|
+
family,
|
|
872
|
+
src: urls.join(', ') || src,
|
|
873
|
+
weight,
|
|
874
|
+
style,
|
|
875
|
+
display,
|
|
876
|
+
});
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
} catch { /* cross-origin stylesheet, skip */ }
|
|
880
|
+
}
|
|
881
|
+
} catch { /* no stylesheets accessible */ }
|
|
882
|
+
|
|
883
|
+
return fonts;
|
|
884
|
+
});
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
// ── Media query breakpoints ─────────────────────────────────────────
|
|
888
|
+
|
|
889
|
+
async function extractMediaBreakpoints(page: Page): Promise<string[]> {
|
|
890
|
+
return page.evaluate(() => {
|
|
891
|
+
const breakpoints = new Set<string>();
|
|
892
|
+
// Use a stack instead of recursion to avoid named function declarations
|
|
893
|
+
try {
|
|
894
|
+
for (const sheet of document.styleSheets) {
|
|
895
|
+
try {
|
|
896
|
+
const ruleStack: CSSRuleList[] = [sheet.cssRules];
|
|
897
|
+
while (ruleStack.length > 0) {
|
|
898
|
+
const rules = ruleStack.pop()!;
|
|
899
|
+
for (const rule of rules) {
|
|
900
|
+
if (typeof CSSContainerRule !== 'undefined' && rule instanceof CSSContainerRule) continue;
|
|
901
|
+
if (rule instanceof CSSMediaRule) {
|
|
902
|
+
const media = rule.conditionText || rule.media?.mediaText || '';
|
|
903
|
+
if (media) {
|
|
904
|
+
// Only capture viewport width breakpoints (min-width / max-width), not feature queries
|
|
905
|
+
if (/(min|max)-width\s*:\s*\d/.test(media)) {
|
|
906
|
+
const matches = media.matchAll(/(\d+(?:\.\d+)?)(px|em|rem)/g);
|
|
907
|
+
for (const m of matches) {
|
|
908
|
+
const val = parseFloat(m[1]);
|
|
909
|
+
const unit = m[2];
|
|
910
|
+
// Filter: ignore sub-320px values (print, tiny accessibility queries)
|
|
911
|
+
const pxVal = unit === 'px' ? val : unit === 'em' ? val * 16 : val * 16;
|
|
912
|
+
if (pxVal >= 320) breakpoints.add(`${m[1]}${m[2]}`);
|
|
913
|
+
}
|
|
914
|
+
breakpoints.add(`@media ${media}`);
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
if (rule.cssRules && rule.cssRules.length > 0) {
|
|
918
|
+
ruleStack.push(rule.cssRules);
|
|
919
|
+
}
|
|
920
|
+
}
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
} catch { /* cross-origin stylesheet, skip */ }
|
|
924
|
+
}
|
|
925
|
+
} catch { /* no stylesheets accessible */ }
|
|
926
|
+
|
|
927
|
+
const numericBPs: string[] = [];
|
|
928
|
+
const mediaBPs: string[] = [];
|
|
929
|
+
for (const bp of breakpoints) {
|
|
930
|
+
if (bp.startsWith('@media')) {
|
|
931
|
+
mediaBPs.push(bp);
|
|
932
|
+
} else {
|
|
933
|
+
numericBPs.push(bp);
|
|
934
|
+
}
|
|
935
|
+
}
|
|
936
|
+
numericBPs.sort((a, b) => parseFloat(a) - parseFloat(b));
|
|
937
|
+
|
|
938
|
+
// Phase 5.2.3 — STOP synthetic Tailwind defaults.
|
|
939
|
+
// The previous fallback injected '640px,768px,1024px,1280px' when <2 detected,
|
|
940
|
+
// presenting Tailwind defaults AS IF they were extracted from the site = factual lie.
|
|
941
|
+
// If <2 detected, return what we have. Downstream marks "fixed-width / responsive units only".
|
|
942
|
+
return [...numericBPs, ...mediaBPs];
|
|
943
|
+
});
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
// ── Sections extraction ─────────────────────────────────────────────
|
|
947
|
+
|
|
948
|
+
// v2.7 A.1 — the hero headline is frequently NOT the largest measured <h*> (it's a styled <div>,
|
|
949
|
+
// a <span>, or image text), and the §1 narrative was hardcoded to the BODY font — so the single most
|
|
950
|
+
// recognizable signature (serif vs sans display) was lost on every site. Detect the display face by
|
|
951
|
+
// VISUAL PROMINENCE: the above-fold text node with the largest fontSize × rendered width, and classify it.
|
|
952
|
+
interface DisplayFace { family: string; fontSize: string; fontWeight: string; isSerif: boolean; isItalic: boolean; sample: string }
|
|
953
|
+
async function extractDisplaySignature(page: Page): Promise<(DisplayFace & { secondary?: DisplayFace }) | null> {
|
|
954
|
+
return page.evaluate(() => {
|
|
955
|
+
const vh = window.innerHeight, vw = window.innerWidth;
|
|
956
|
+
// v2.9 A.1+ — scan the WHOLE page (hydrated), weight above-fold higher, then keep the top-2
|
|
957
|
+
// DISTINCT display families. Catches sites with a sans hero + a serif-italic in lower bands
|
|
958
|
+
// (e.g. switchcollective: Satoshi hero above the fold, Canela serif-italic in "Ré-inventer").
|
|
959
|
+
const cands: Array<{ score: number; family: string; fontSize: string; fontWeight: string; style: string; sample: string }> = [];
|
|
960
|
+
const els = document.querySelectorAll('h1,h2,h3,p,a,span,div,[class*="title"],[class*="heading"],[class*="hero"],[class*="headline"]');
|
|
961
|
+
els.forEach(el => {
|
|
962
|
+
const r = el.getBoundingClientRect();
|
|
963
|
+
if (r.width < 80 || r.height < 24) return;
|
|
964
|
+
const own = Array.from(el.childNodes).filter(n => n.nodeType === 3).map(n => n.textContent || '').join('').replace(/\s+/g, ' ').trim();
|
|
965
|
+
if (own.length < 3 || own.length > 120) return;
|
|
966
|
+
const cs = getComputedStyle(el);
|
|
967
|
+
const fs = parseFloat(cs.fontSize) || 0;
|
|
968
|
+
if (fs < 22) return; // display scale only
|
|
969
|
+
const aboveFold = r.top < vh * 1.2 && r.top > -80;
|
|
970
|
+
const score = fs * Math.min(r.width, vw) * (aboveFold ? 1.5 : 1); // hero counts more
|
|
971
|
+
cands.push({ score, family: cs.fontFamily || '', fontSize: cs.fontSize, fontWeight: cs.fontWeight, style: cs.fontStyle, sample: own.slice(0, 60) });
|
|
972
|
+
});
|
|
973
|
+
if (!cands.length) return null;
|
|
974
|
+
cands.sort((a, b) => b.score - a.score);
|
|
975
|
+
const SERIF = ['times', 'georgia', 'garamond', 'playfair', 'canela', 'fraunces', 'tiempos', 'teodor', 'quincy', 'recoleta', 'editorial', 'freight', 'noe', 'reckless', 'domaine', 'ogg', 'signifier', 'lora', 'merriweather', 'source serif', 'dm serif', 'spectral', 'newsreader', 'cormorant', 'gt sectra', 'ppeditorial', 'instrument serif'];
|
|
976
|
+
// NOTE: classification is inlined via an anonymous .map() — a NAMED const-arrow (e.g. `const classify=`)
|
|
977
|
+
// makes tsx/esbuild emit a `__name(...)` helper that is undefined inside page.evaluate (ReferenceError).
|
|
978
|
+
const topFirst = (cands[0].family.split(',')[0] || '').replace(/["']/g, '').trim().toLowerCase();
|
|
979
|
+
const secCand = cands.find(c => (c.family.split(',')[0] || '').replace(/["']/g, '').trim().toLowerCase() !== topFirst);
|
|
980
|
+
const faces = [cands[0], secCand].filter(Boolean).map(c => {
|
|
981
|
+
const cc = c as { family: string; fontSize: string; fontWeight: string; style: string; sample: string };
|
|
982
|
+
const first = (cc.family.split(',')[0] || '').replace(/["']/g, '').trim();
|
|
983
|
+
const low = first.toLowerCase();
|
|
984
|
+
return {
|
|
985
|
+
family: first,
|
|
986
|
+
fontSize: cc.fontSize,
|
|
987
|
+
fontWeight: cc.fontWeight,
|
|
988
|
+
isSerif: SERIF.some(s => low.includes(s)) || (/serif/.test(low) && !/sans/.test(low)),
|
|
989
|
+
isItalic: cc.style === 'italic' || /italic/.test(low),
|
|
990
|
+
sample: cc.sample,
|
|
991
|
+
};
|
|
992
|
+
});
|
|
993
|
+
const primary = faces[0];
|
|
994
|
+
const secondary = faces[1];
|
|
995
|
+
return secondary ? { ...primary, secondary } : primary;
|
|
996
|
+
});
|
|
997
|
+
}
|
|
998
|
+
|
|
999
|
+
async function extractSections(page: Page): Promise<SectionExtraction[]> {
|
|
1000
|
+
return page.evaluate(() => {
|
|
1001
|
+
// Find major page sections
|
|
1002
|
+
const sectionSelectors = [
|
|
1003
|
+
'header', 'nav', 'main', 'section', 'aside', 'footer',
|
|
1004
|
+
'[role="banner"]', '[role="navigation"]', '[role="main"]',
|
|
1005
|
+
'[role="complementary"]', '[role="contentinfo"]',
|
|
1006
|
+
'div[class*="section"]', 'div[class*="container"]',
|
|
1007
|
+
'div[class*="wrapper"]', 'div[class*="hero"]',
|
|
1008
|
+
];
|
|
1009
|
+
|
|
1010
|
+
const seen = new Set<Element>();
|
|
1011
|
+
const sections: Array<{
|
|
1012
|
+
index: number;
|
|
1013
|
+
tag: string;
|
|
1014
|
+
classes: string[];
|
|
1015
|
+
role: string;
|
|
1016
|
+
estimatedPurpose: string;
|
|
1017
|
+
rect: { x: number; y: number; width: number; height: number };
|
|
1018
|
+
styles: Record<string, string>;
|
|
1019
|
+
childCount: number;
|
|
1020
|
+
bgTreatment: string;
|
|
1021
|
+
isDark: boolean;
|
|
1022
|
+
aboveFold: boolean;
|
|
1023
|
+
isFullBleed: boolean;
|
|
1024
|
+
imgRatio: number;
|
|
1025
|
+
maxHeadingPx: number;
|
|
1026
|
+
gridCols: number;
|
|
1027
|
+
hasAnimation: boolean;
|
|
1028
|
+
textLen: number;
|
|
1029
|
+
vPad: number;
|
|
1030
|
+
hasChart: boolean;
|
|
1031
|
+
}> = [];
|
|
1032
|
+
|
|
1033
|
+
// Also get direct children of body that are significant
|
|
1034
|
+
const bodyChildren = (document.body ? Array.from(document.body.children) : []).filter(el => {
|
|
1035
|
+
const rect = el.getBoundingClientRect();
|
|
1036
|
+
return rect.height > 50 && rect.width > 200;
|
|
1037
|
+
});
|
|
1038
|
+
|
|
1039
|
+
// v2.10-B — SPA/Framer/WebGL fallback: when everything is nested in ≤2 wrappers (e.g. hyperliquid
|
|
1040
|
+
// returns a single monolithic section), descend to surface real bands. Iterative stack (NO named
|
|
1041
|
+
// recursive fn — that triggers tsx/esbuild __name → ReferenceError in page.evaluate). A "band" =
|
|
1042
|
+
// full-width-ish, 80–2600px tall, with content; giant wrappers (>2600px) are descended into.
|
|
1043
|
+
const extraCandidates: Element[] = [];
|
|
1044
|
+
if (bodyChildren.length <= 2) {
|
|
1045
|
+
const vw = window.innerWidth;
|
|
1046
|
+
const stack: Element[] = bodyChildren.slice();
|
|
1047
|
+
let guard = 0;
|
|
1048
|
+
while (stack.length > 0 && guard < 3000) {
|
|
1049
|
+
guard++;
|
|
1050
|
+
const node = stack.shift() as Element;
|
|
1051
|
+
const kids = Array.from(node.children);
|
|
1052
|
+
for (let ki = 0; ki < kids.length; ki++) {
|
|
1053
|
+
const k = kids[ki];
|
|
1054
|
+
const kr = k.getBoundingClientRect();
|
|
1055
|
+
const hasContent = k.children.length > 0 || (k.textContent || '').trim().length > 0;
|
|
1056
|
+
if (kr.width > vw * 0.6 && kr.height >= 80 && kr.height <= 2600 && hasContent) extraCandidates.push(k);
|
|
1057
|
+
else if (kr.height > 2600 && k.children.length > 0) stack.push(k);
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1062
|
+
const allCandidates = [
|
|
1063
|
+
...bodyChildren,
|
|
1064
|
+
...extraCandidates,
|
|
1065
|
+
...sectionSelectors.flatMap(s => Array.from(document.querySelectorAll(s))),
|
|
1066
|
+
];
|
|
1067
|
+
|
|
1068
|
+
let idx = 0;
|
|
1069
|
+
for (const el of allCandidates) {
|
|
1070
|
+
if (seen.has(el)) continue;
|
|
1071
|
+
seen.add(el);
|
|
1072
|
+
|
|
1073
|
+
const rect = el.getBoundingClientRect();
|
|
1074
|
+
if (rect.height < 30) continue;
|
|
1075
|
+
|
|
1076
|
+
const cs = getComputedStyle(el);
|
|
1077
|
+
const tag = el.tagName.toLowerCase();
|
|
1078
|
+
const classes = Array.from(el.classList);
|
|
1079
|
+
const role = el.getAttribute('role') || '';
|
|
1080
|
+
|
|
1081
|
+
// v2.7 A.3/A.4 — content-signature classification (not tag/class strings only).
|
|
1082
|
+
// The old classifier defaulted to 'unknown' on Webflow/Shopify/Chakra div-soup, making §13
|
|
1083
|
+
// a useless stub. We now read real content signals so §13 names real bands.
|
|
1084
|
+
const classStr = classes.join(' ').toLowerCase();
|
|
1085
|
+
const vw = window.innerWidth, vh = window.innerHeight;
|
|
1086
|
+
const txt = (el.textContent || '').replace(/\s+/g, ' ').trim();
|
|
1087
|
+
const txtLen = txt.length;
|
|
1088
|
+
const imgEls = el.querySelectorAll('img,picture,svg,video');
|
|
1089
|
+
let imgArea = 0;
|
|
1090
|
+
imgEls.forEach(im => { const r = im.getBoundingClientRect(); imgArea += Math.max(0, r.width) * Math.max(0, r.height); });
|
|
1091
|
+
const imgRatio = imgArea / Math.max(1, rect.width * rect.height);
|
|
1092
|
+
let maxHeadingPx = 0;
|
|
1093
|
+
el.querySelectorAll('h1,h2,h3').forEach(h => { const fs = parseFloat(getComputedStyle(h).fontSize) || 0; if (fs > maxHeadingPx) maxHeadingPx = fs; });
|
|
1094
|
+
const isFullBleed = rect.width >= vw * 0.95;
|
|
1095
|
+
const aboveFold = rect.y < vh * 0.9;
|
|
1096
|
+
const gridCols = cs.gridTemplateColumns && cs.gridTemplateColumns !== 'none' ? cs.gridTemplateColumns.split(' ').filter(Boolean).length : 0;
|
|
1097
|
+
const hasAnimation = !!cs.animationName && cs.animationName !== 'none';
|
|
1098
|
+
// Background treatment (A.4) — the dominant atmosphere the old extractor was blind to.
|
|
1099
|
+
const bgImg = cs.backgroundImage || 'none';
|
|
1100
|
+
let bgTreatment = 'flat';
|
|
1101
|
+
if (bgImg && bgImg !== 'none') {
|
|
1102
|
+
if (/gradient/i.test(bgImg)) bgTreatment = /radial-gradient|conic-gradient/i.test(bgImg) ? 'radial-gradient' : ((bgImg.match(/rgba?\(|#[0-9a-f]{3,8}/gi) || []).length >= 4 ? 'mesh-gradient' : 'linear-gradient');
|
|
1103
|
+
else if (/url\(/i.test(bgImg)) bgTreatment = 'image';
|
|
1104
|
+
}
|
|
1105
|
+
const bgM = (cs.backgroundColor || '').match(/(\d+),\s*(\d+),\s*(\d+)/);
|
|
1106
|
+
const isDarkBand = bgM ? (((+bgM[1]) * 0.2126 + (+bgM[2]) * 0.7152 + (+bgM[3]) * 0.0722) / 255 < 0.35) : false;
|
|
1107
|
+
// v2.11-B — premium sites put their generous vertical whitespace on an INNER wrapper, not the
|
|
1108
|
+
// section element (attio sections report padding "0px"). Capture the largest child's vertical
|
|
1109
|
+
// padding so the real airy rhythm survives. Also flag chart/canvas/SVG-graph bands (A).
|
|
1110
|
+
let innerPadTop = parseFloat(cs.paddingTop) || 0;
|
|
1111
|
+
let innerPadBottom = parseFloat(cs.paddingBottom) || 0;
|
|
1112
|
+
if (innerPadTop < 8 && innerPadBottom < 8) {
|
|
1113
|
+
const kidsArr = Array.from(el.children);
|
|
1114
|
+
for (let ci = 0; ci < kidsArr.length; ci++) {
|
|
1115
|
+
const cr = kidsArr[ci].getBoundingClientRect();
|
|
1116
|
+
if (cr.height > 60 && cr.width > rect.width * 0.4) {
|
|
1117
|
+
const kcs = getComputedStyle(kidsArr[ci]);
|
|
1118
|
+
innerPadTop = Math.max(innerPadTop, parseFloat(kcs.paddingTop) || 0);
|
|
1119
|
+
innerPadBottom = Math.max(innerPadBottom, parseFloat(kcs.paddingBottom) || 0);
|
|
1120
|
+
}
|
|
1121
|
+
}
|
|
1122
|
+
}
|
|
1123
|
+
const vPad = Math.round((innerPadTop + innerPadBottom) / 2);
|
|
1124
|
+
const hasChart = !!el.querySelector('canvas, svg path[d], svg polyline, [class*="chart" i], [class*="graph" i]');
|
|
1125
|
+
|
|
1126
|
+
// Classify: structural tags first, then content signature.
|
|
1127
|
+
let purpose = 'unknown';
|
|
1128
|
+
if (tag === 'header' || role === 'banner' || classStr.includes('header')) purpose = 'header';
|
|
1129
|
+
else if (tag === 'nav' || role === 'navigation' || classStr.includes('nav')) purpose = 'navigation';
|
|
1130
|
+
else if (tag === 'footer' || role === 'contentinfo' || classStr.includes('footer')) purpose = 'footer';
|
|
1131
|
+
else if (tag === 'aside' || role === 'complementary' || classStr.includes('sidebar')) purpose = 'sidebar';
|
|
1132
|
+
else if (hasAnimation && txtLen > 0 && txtLen < 220 && rect.height < 180 && isFullBleed) purpose = 'marquee';
|
|
1133
|
+
else if (aboveFold && idx <= 2 && maxHeadingPx >= 30 && isFullBleed) purpose = 'hero';
|
|
1134
|
+
else if (/pricing|tarif|formule|\bplan/.test(classStr) || (gridCols >= 2 && /€|\$|\/mo\b|\/mois|par mois/i.test(txt.slice(0, 400)))) purpose = 'pricing';
|
|
1135
|
+
else if (/faq|accordion|question/.test(classStr)) purpose = 'faq';
|
|
1136
|
+
else if (/testimonial|review|avis|t[ée]moignage/i.test(classStr + ' ' + txt.slice(0, 160))) purpose = 'testimonials';
|
|
1137
|
+
else if (imgEls.length >= 5 && rect.height < 220 && maxHeadingPx < 24) purpose = 'logo-strip';
|
|
1138
|
+
else if (imgRatio > 0.4 && gridCols >= 3) purpose = 'gallery-grid';
|
|
1139
|
+
else if (gridCols >= 2 && el.children.length >= 3 && maxHeadingPx < 30) purpose = 'card-grid';
|
|
1140
|
+
else if (maxHeadingPx >= 24 && txtLen > 60) purpose = 'feature-section';
|
|
1141
|
+
else if (txtLen > 220) purpose = 'content-section';
|
|
1142
|
+
else if (tag === 'main' || role === 'main') purpose = 'main-content';
|
|
1143
|
+
else if (tag === 'section') purpose = 'section';
|
|
1144
|
+
|
|
1145
|
+
sections.push({
|
|
1146
|
+
index: idx++,
|
|
1147
|
+
tag,
|
|
1148
|
+
classes,
|
|
1149
|
+
role,
|
|
1150
|
+
estimatedPurpose: purpose,
|
|
1151
|
+
rect: { x: Math.round(rect.x), y: Math.round(rect.y), width: Math.round(rect.width), height: Math.round(rect.height) },
|
|
1152
|
+
styles: {
|
|
1153
|
+
backgroundColor: cs.backgroundColor,
|
|
1154
|
+
color: cs.color,
|
|
1155
|
+
fontFamily: cs.fontFamily,
|
|
1156
|
+
fontSize: cs.fontSize,
|
|
1157
|
+
padding: cs.padding,
|
|
1158
|
+
margin: cs.margin,
|
|
1159
|
+
display: cs.display,
|
|
1160
|
+
gap: cs.gap,
|
|
1161
|
+
gridTemplateColumns: cs.gridTemplateColumns,
|
|
1162
|
+
flexDirection: cs.flexDirection,
|
|
1163
|
+
maxWidth: cs.maxWidth,
|
|
1164
|
+
width: cs.width,
|
|
1165
|
+
height: cs.height,
|
|
1166
|
+
position: cs.position,
|
|
1167
|
+
borderRadius: cs.borderRadius,
|
|
1168
|
+
boxShadow: cs.boxShadow,
|
|
1169
|
+
overflow: cs.overflow,
|
|
1170
|
+
alignItems: cs.alignItems,
|
|
1171
|
+
justifyContent: cs.justifyContent,
|
|
1172
|
+
textTransform: cs.textTransform,
|
|
1173
|
+
textDecoration: cs.textDecoration,
|
|
1174
|
+
border: cs.border,
|
|
1175
|
+
letterSpacing: cs.letterSpacing,
|
|
1176
|
+
lineHeight: cs.lineHeight,
|
|
1177
|
+
fontWeight: cs.fontWeight,
|
|
1178
|
+
opacity: cs.opacity,
|
|
1179
|
+
transition: cs.transition,
|
|
1180
|
+
fontFeatureSettings: cs.fontFeatureSettings,
|
|
1181
|
+
fontVariationSettings: (cs as any).fontVariationSettings || 'normal',
|
|
1182
|
+
},
|
|
1183
|
+
childCount: el.children.length,
|
|
1184
|
+
bgTreatment,
|
|
1185
|
+
isDark: isDarkBand,
|
|
1186
|
+
aboveFold,
|
|
1187
|
+
isFullBleed,
|
|
1188
|
+
imgRatio: Math.round(imgRatio * 100) / 100,
|
|
1189
|
+
maxHeadingPx: Math.round(maxHeadingPx),
|
|
1190
|
+
gridCols,
|
|
1191
|
+
hasAnimation,
|
|
1192
|
+
textLen: txtLen,
|
|
1193
|
+
vPad,
|
|
1194
|
+
hasChart,
|
|
1195
|
+
});
|
|
1196
|
+
}
|
|
1197
|
+
|
|
1198
|
+
// Sort by Y position
|
|
1199
|
+
sections.sort((a, b) => a.rect.y - b.rect.y);
|
|
1200
|
+
return sections;
|
|
1201
|
+
});
|
|
1202
|
+
}
|
|
1203
|
+
|
|
1204
|
+
async function extractImages(page: Page): Promise<{ src: string; alt: string; width: number; height: number }[]> {
|
|
1205
|
+
return page.evaluate(() => {
|
|
1206
|
+
return Array.from(document.querySelectorAll('img')).slice(0, 50).map(img => ({
|
|
1207
|
+
src: img.src,
|
|
1208
|
+
alt: img.alt,
|
|
1209
|
+
width: Math.round(img.getBoundingClientRect().width),
|
|
1210
|
+
height: Math.round(img.getBoundingClientRect().height),
|
|
1211
|
+
}));
|
|
1212
|
+
});
|
|
1213
|
+
}
|
|
1214
|
+
|
|
1215
|
+
/**
|
|
1216
|
+
* Extract imagery profile — OG image, hero image, format distribution, illustration vs photo mix.
|
|
1217
|
+
* Used in DESIGN.md §10b to guide LLMs on visual tone (lifestyle photography vs product mockups
|
|
1218
|
+
* vs abstract gradients vs illustration-heavy).
|
|
1219
|
+
*/
|
|
1220
|
+
interface ImageryProfile {
|
|
1221
|
+
ogImage: string | null;
|
|
1222
|
+
ogImageWidth: number | null;
|
|
1223
|
+
ogImageHeight: number | null;
|
|
1224
|
+
twitterImage: string | null;
|
|
1225
|
+
heroImage: { src: string; alt: string; width: number; height: number; aspectRatio: number } | null;
|
|
1226
|
+
formats: { png: number; jpg: number; webp: number; svg: number; gif: number; other: number };
|
|
1227
|
+
totalImages: number;
|
|
1228
|
+
totalAboveFold: number;
|
|
1229
|
+
aspectRatioBuckets: { landscape: number; portrait: number; square: number; ultrawide: number };
|
|
1230
|
+
illustrationHeavy: boolean; // svg + png count vs jpg/webp (illustrations vs photos)
|
|
1231
|
+
photoHeavy: boolean;
|
|
1232
|
+
avgImageSize: { width: number; height: number };
|
|
1233
|
+
decorativePatterns?: {
|
|
1234
|
+
multiStopGradients: number;
|
|
1235
|
+
radialGradients: number;
|
|
1236
|
+
largeSvgShapes: number;
|
|
1237
|
+
backgroundImagePatterns: number;
|
|
1238
|
+
hasNoise: boolean;
|
|
1239
|
+
hasGlassmorphism: boolean;
|
|
1240
|
+
};
|
|
1241
|
+
}
|
|
1242
|
+
|
|
1243
|
+
async function extractImageryProfile(page: Page): Promise<ImageryProfile> {
|
|
1244
|
+
return page.evaluate(() => {
|
|
1245
|
+
// OG / Twitter meta — inline lookups to avoid tsx __name() compilation issues
|
|
1246
|
+
const ogImageEl = document.querySelector('meta[property="og:image"]') || document.querySelector('meta[name="og:image"]');
|
|
1247
|
+
const ogImage = ogImageEl ? ogImageEl.getAttribute('content') : null;
|
|
1248
|
+
const ogWEl = document.querySelector('meta[property="og:image:width"]');
|
|
1249
|
+
const ogW = ogWEl ? ogWEl.getAttribute('content') : null;
|
|
1250
|
+
const ogHEl = document.querySelector('meta[property="og:image:height"]');
|
|
1251
|
+
const ogH = ogHEl ? ogHEl.getAttribute('content') : null;
|
|
1252
|
+
const twitterEl = document.querySelector('meta[name="twitter:image"]') || document.querySelector('meta[property="twitter:image"]');
|
|
1253
|
+
const twitterImage = twitterEl ? twitterEl.getAttribute('content') : null;
|
|
1254
|
+
|
|
1255
|
+
// All images on page
|
|
1256
|
+
const imgs = Array.from(document.querySelectorAll('img'));
|
|
1257
|
+
const formats = { png: 0, jpg: 0, webp: 0, svg: 0, gif: 0, other: 0 };
|
|
1258
|
+
const buckets = { landscape: 0, portrait: 0, square: 0, ultrawide: 0 };
|
|
1259
|
+
let totalW = 0;
|
|
1260
|
+
let totalH = 0;
|
|
1261
|
+
let counted = 0;
|
|
1262
|
+
let aboveFold = 0;
|
|
1263
|
+
const viewportH = window.innerHeight;
|
|
1264
|
+
|
|
1265
|
+
let heroImage = null;
|
|
1266
|
+
let heroArea = 0;
|
|
1267
|
+
|
|
1268
|
+
for (const img of imgs.slice(0, 100)) {
|
|
1269
|
+
const rect = img.getBoundingClientRect();
|
|
1270
|
+
const w = Math.round(rect.width);
|
|
1271
|
+
const h = Math.round(rect.height);
|
|
1272
|
+
if (w < 50 || h < 50) continue;
|
|
1273
|
+
|
|
1274
|
+
counted++;
|
|
1275
|
+
totalW += w;
|
|
1276
|
+
totalH += h;
|
|
1277
|
+
if (rect.top < viewportH) aboveFold++;
|
|
1278
|
+
|
|
1279
|
+
const area = w * h;
|
|
1280
|
+
if (area > heroArea && rect.top < viewportH * 1.5) {
|
|
1281
|
+
heroArea = area;
|
|
1282
|
+
heroImage = {
|
|
1283
|
+
src: img.src,
|
|
1284
|
+
alt: img.alt || '',
|
|
1285
|
+
width: w,
|
|
1286
|
+
height: h,
|
|
1287
|
+
aspectRatio: Math.round((w / h) * 100) / 100,
|
|
1288
|
+
};
|
|
1289
|
+
}
|
|
1290
|
+
|
|
1291
|
+
const ratio = w / h;
|
|
1292
|
+
if (ratio > 2.3) buckets.ultrawide++;
|
|
1293
|
+
else if (ratio > 1.15) buckets.landscape++;
|
|
1294
|
+
else if (ratio < 0.87) buckets.portrait++;
|
|
1295
|
+
else buckets.square++;
|
|
1296
|
+
|
|
1297
|
+
const src = img.src.toLowerCase();
|
|
1298
|
+
const srcsetFirst = (img.srcset || '').toLowerCase().split(' ')[0];
|
|
1299
|
+
const extMatch = src.match(/\.(png|jpe?g|webp|svg|gif)(\?|$)/) || srcsetFirst.match(/\.(png|jpe?g|webp|svg|gif)(\?|$)/);
|
|
1300
|
+
const ext = extMatch ? extMatch[1] : '';
|
|
1301
|
+
if (ext === 'png') formats.png++;
|
|
1302
|
+
else if (ext === 'jpg' || ext === 'jpeg') formats.jpg++;
|
|
1303
|
+
else if (ext === 'webp') formats.webp++;
|
|
1304
|
+
else if (ext === 'svg') formats.svg++;
|
|
1305
|
+
else if (ext === 'gif') formats.gif++;
|
|
1306
|
+
else formats.other++;
|
|
1307
|
+
}
|
|
1308
|
+
|
|
1309
|
+
// Count significant SVGs only (skip tiny icons < 60px which dominate counts)
|
|
1310
|
+
const inlineSvgs = Array.from(document.querySelectorAll('svg'));
|
|
1311
|
+
let largeSvgs = 0;
|
|
1312
|
+
let iconSvgs = 0;
|
|
1313
|
+
for (const svg of inlineSvgs.slice(0, 300)) {
|
|
1314
|
+
const rect = svg.getBoundingClientRect();
|
|
1315
|
+
if (rect.width >= 80 && rect.height >= 80) largeSvgs++;
|
|
1316
|
+
else iconSvgs++;
|
|
1317
|
+
}
|
|
1318
|
+
formats.svg += largeSvgs; // only count significant SVGs as illustrations
|
|
1319
|
+
|
|
1320
|
+
const totalCounted = counted || 1;
|
|
1321
|
+
const avgImageSize = {
|
|
1322
|
+
width: Math.round(totalW / totalCounted),
|
|
1323
|
+
height: Math.round(totalH / totalCounted),
|
|
1324
|
+
};
|
|
1325
|
+
|
|
1326
|
+
// Heuristics:
|
|
1327
|
+
// - photo-heavy: 10+ raster photos (jpg/webp), regardless of icon count
|
|
1328
|
+
// - illustration-heavy: large SVGs dominate AND few raster photos exist
|
|
1329
|
+
// - otherwise: mixed or text-driven
|
|
1330
|
+
const photoCount = formats.jpg + formats.webp;
|
|
1331
|
+
const illustrationCount = formats.svg; // already filtered to >=80px
|
|
1332
|
+
const photoHeavy = photoCount >= 10;
|
|
1333
|
+
const illustrationHeavy = !photoHeavy && illustrationCount > photoCount * 1.5 && illustrationCount >= 5;
|
|
1334
|
+
|
|
1335
|
+
// Phase 4.4 — Decorative patterns detection (gradient mesh, blobs, glassmorphism)
|
|
1336
|
+
let multiStopGradients = 0;
|
|
1337
|
+
let radialGradients = 0;
|
|
1338
|
+
let backgroundImagePatterns = 0;
|
|
1339
|
+
let hasNoise = false;
|
|
1340
|
+
let hasGlassmorphism = false;
|
|
1341
|
+
const allEls = document.querySelectorAll('*');
|
|
1342
|
+
const scanLimit = Math.min(allEls.length, 1500);
|
|
1343
|
+
for (let i = 0; i < scanLimit; i++) {
|
|
1344
|
+
const el = allEls[i];
|
|
1345
|
+
const cs = getComputedStyle(el);
|
|
1346
|
+
const bgImage = cs.backgroundImage;
|
|
1347
|
+
if (bgImage && bgImage !== 'none') {
|
|
1348
|
+
if (bgImage.indexOf('radial-gradient') !== -1) radialGradients++;
|
|
1349
|
+
if (bgImage.indexOf('linear-gradient') !== -1 || bgImage.indexOf('conic-gradient') !== -1) {
|
|
1350
|
+
const m = bgImage.match(/\b(linear|conic)-gradient\([^)]*\)/);
|
|
1351
|
+
if (m && (m[0].match(/,/g) || []).length >= 3) multiStopGradients++;
|
|
1352
|
+
}
|
|
1353
|
+
if (bgImage.indexOf('url(') !== -1 && !/\.(jpe?g|webp|png)/i.test(bgImage)) {
|
|
1354
|
+
backgroundImagePatterns++;
|
|
1355
|
+
}
|
|
1356
|
+
if (/noise|grain|pattern/i.test(bgImage)) hasNoise = true;
|
|
1357
|
+
}
|
|
1358
|
+
const backdrop = (cs as any).backdropFilter || (cs as any).webkitBackdropFilter;
|
|
1359
|
+
if (backdrop && /blur\(\d+/.test(backdrop)) hasGlassmorphism = true;
|
|
1360
|
+
}
|
|
1361
|
+
// Large SVG shapes (already counted above via largeSvgs)
|
|
1362
|
+
const decorativePatterns = {
|
|
1363
|
+
multiStopGradients,
|
|
1364
|
+
radialGradients,
|
|
1365
|
+
largeSvgShapes: largeSvgs,
|
|
1366
|
+
backgroundImagePatterns,
|
|
1367
|
+
hasNoise,
|
|
1368
|
+
hasGlassmorphism,
|
|
1369
|
+
};
|
|
1370
|
+
|
|
1371
|
+
return {
|
|
1372
|
+
ogImage,
|
|
1373
|
+
ogImageWidth: ogW ? parseInt(ogW, 10) : null,
|
|
1374
|
+
ogImageHeight: ogH ? parseInt(ogH, 10) : null,
|
|
1375
|
+
twitterImage,
|
|
1376
|
+
heroImage,
|
|
1377
|
+
formats,
|
|
1378
|
+
totalImages: counted,
|
|
1379
|
+
totalAboveFold: aboveFold,
|
|
1380
|
+
aspectRatioBuckets: buckets,
|
|
1381
|
+
illustrationHeavy,
|
|
1382
|
+
photoHeavy,
|
|
1383
|
+
avgImageSize,
|
|
1384
|
+
decorativePatterns,
|
|
1385
|
+
};
|
|
1386
|
+
}) as Promise<ImageryProfile>;
|
|
1387
|
+
}
|
|
1388
|
+
|
|
1389
|
+
async function extractLinks(page: Page): Promise<{ href: string; text: string; isNav: boolean }[]> {
|
|
1390
|
+
return page.evaluate(() => {
|
|
1391
|
+
return Array.from(document.querySelectorAll('a')).slice(0, 100).map(a => {
|
|
1392
|
+
const inNav = !!a.closest('nav, header, [role="navigation"], [class*="nav"]');
|
|
1393
|
+
return {
|
|
1394
|
+
href: a.href,
|
|
1395
|
+
text: a.innerText?.trim().slice(0, 100) || '',
|
|
1396
|
+
isNav: inNav,
|
|
1397
|
+
};
|
|
1398
|
+
});
|
|
1399
|
+
});
|
|
1400
|
+
}
|
|
1401
|
+
|
|
1402
|
+
// ── Advanced extractors (kept after Phase 1.2 cleanup — only zIndexMap + keyframes have consumers) ────
|
|
1403
|
+
import {
|
|
1404
|
+
extractKeyframes,
|
|
1405
|
+
extractZIndexMap,
|
|
1406
|
+
extractVisualEffects,
|
|
1407
|
+
type VisualEffects,
|
|
1408
|
+
} from './extractors/advanced.js';
|
|
1409
|
+
import { extractWidgets, type WidgetExtraction } from './extractors/widgets.js';
|
|
1410
|
+
|
|
1411
|
+
// ── Screenshots ──────────────────────────────────────────────────────
|
|
1412
|
+
|
|
1413
|
+
// v2.9 A.5 — fully hydrate lazy content BEFORE screenshots + DOM extraction.
|
|
1414
|
+
// Root cause of Attio's "white capture" + MIAM's stripped photos: IntersectionObserver / lazy
|
|
1415
|
+
// images never entered the viewport at capture time. Slow-scroll the whole page in steps (triggers
|
|
1416
|
+
// every lazy loader), wait for images to decode + network to settle, then return to top.
|
|
1417
|
+
async function ensureLazyLoaded(page: Page): Promise<void> {
|
|
1418
|
+
try {
|
|
1419
|
+
await page.evaluate(async () => {
|
|
1420
|
+
await new Promise<void>((resolve) => {
|
|
1421
|
+
const step = Math.max(240, Math.floor(window.innerHeight * 0.75));
|
|
1422
|
+
let y = 0;
|
|
1423
|
+
const tick = () => {
|
|
1424
|
+
window.scrollTo(0, y);
|
|
1425
|
+
y += step;
|
|
1426
|
+
if (y >= document.documentElement.scrollHeight) { window.scrollTo(0, 0); resolve(); }
|
|
1427
|
+
else setTimeout(tick, 110);
|
|
1428
|
+
};
|
|
1429
|
+
tick();
|
|
1430
|
+
});
|
|
1431
|
+
});
|
|
1432
|
+
// Wait for newly-revealed images to decode (capped) + network to settle.
|
|
1433
|
+
await page.evaluate(async () => {
|
|
1434
|
+
const pending = Array.from(document.images).filter(i => !i.complete && i.src);
|
|
1435
|
+
await Promise.race([
|
|
1436
|
+
Promise.all(pending.map(i => (i.decode ? i.decode().catch(() => {}) : Promise.resolve()))),
|
|
1437
|
+
new Promise(r => setTimeout(r, 2500)),
|
|
1438
|
+
]);
|
|
1439
|
+
}).catch(() => {});
|
|
1440
|
+
await page.waitForLoadState('networkidle', { timeout: 3000 }).catch(() => {});
|
|
1441
|
+
} catch { /* best-effort — never block extraction */ }
|
|
1442
|
+
}
|
|
1443
|
+
|
|
1444
|
+
async function takeScreenshots(page: Page, outputDir: string, viewport: string): Promise<void> {
|
|
1445
|
+
// Full page (with timeout fallback) — heavy sites need longer timeouts
|
|
1446
|
+
try {
|
|
1447
|
+
await page.screenshot({
|
|
1448
|
+
path: join(outputDir, `full-page-${viewport}.png`),
|
|
1449
|
+
fullPage: true,
|
|
1450
|
+
timeout: 60000,
|
|
1451
|
+
});
|
|
1452
|
+
} catch {
|
|
1453
|
+
console.log(` ⚠️ Full-page screenshot timeout, using viewport-only`);
|
|
1454
|
+
try {
|
|
1455
|
+
await page.screenshot({
|
|
1456
|
+
path: join(outputDir, `full-page-${viewport}.png`),
|
|
1457
|
+
fullPage: false,
|
|
1458
|
+
timeout: 30000,
|
|
1459
|
+
});
|
|
1460
|
+
} catch {
|
|
1461
|
+
console.log(` ⚠️ Viewport screenshot also timeout, skipping`);
|
|
1462
|
+
}
|
|
1463
|
+
}
|
|
1464
|
+
|
|
1465
|
+
// Above the fold
|
|
1466
|
+
await page.screenshot({
|
|
1467
|
+
path: join(outputDir, `above-fold-${viewport}.png`),
|
|
1468
|
+
fullPage: false,
|
|
1469
|
+
});
|
|
1470
|
+
|
|
1471
|
+
// Individual sections
|
|
1472
|
+
const sections = await page.evaluate(() => {
|
|
1473
|
+
const candidates = [
|
|
1474
|
+
...Array.from(document.querySelectorAll('header, nav, main, section, aside, footer')),
|
|
1475
|
+
...(document.body ? Array.from(document.body.children) : []).filter(el => {
|
|
1476
|
+
const r = el.getBoundingClientRect();
|
|
1477
|
+
return r.height > 100 && r.width > 200;
|
|
1478
|
+
}),
|
|
1479
|
+
];
|
|
1480
|
+
const seen = new Set<Element>();
|
|
1481
|
+
return candidates.filter(el => {
|
|
1482
|
+
if (seen.has(el)) return false;
|
|
1483
|
+
seen.add(el);
|
|
1484
|
+
return true;
|
|
1485
|
+
}).slice(0, 15).map((el, i) => {
|
|
1486
|
+
const r = el.getBoundingClientRect();
|
|
1487
|
+
const tag = el.tagName.toLowerCase();
|
|
1488
|
+
const cls = Array.from(el.classList).join('-').slice(0, 30) || 'no-class';
|
|
1489
|
+
return {
|
|
1490
|
+
name: `${i}-${tag}-${cls}`,
|
|
1491
|
+
clip: { x: r.x, y: r.y + window.scrollY, width: r.width, height: Math.min(r.height, 2000) },
|
|
1492
|
+
};
|
|
1493
|
+
});
|
|
1494
|
+
});
|
|
1495
|
+
|
|
1496
|
+
for (const section of sections) {
|
|
1497
|
+
if (section.clip.width < 10 || section.clip.height < 10) continue;
|
|
1498
|
+
try {
|
|
1499
|
+
await page.screenshot({
|
|
1500
|
+
path: join(outputDir, `section-${section.name}-${viewport}.png`),
|
|
1501
|
+
clip: section.clip,
|
|
1502
|
+
});
|
|
1503
|
+
} catch {
|
|
1504
|
+
// Skip sections that fail (out of bounds, etc.)
|
|
1505
|
+
}
|
|
1506
|
+
}
|
|
1507
|
+
}
|
|
1508
|
+
|
|
1509
|
+
// ── Scroll-and-screenshot (catches lazy-loaded content) ─────────────
|
|
1510
|
+
|
|
1511
|
+
async function scrollAndScreenshot(page: Page, outputDir: string, viewport: string): Promise<void> {
|
|
1512
|
+
const viewportHeight = page.viewportSize()?.height || 900;
|
|
1513
|
+
|
|
1514
|
+
// Get total page height
|
|
1515
|
+
const totalHeight = await page.evaluate(() => {
|
|
1516
|
+
return Math.max(
|
|
1517
|
+
document.body?.scrollHeight || 0,
|
|
1518
|
+
document.documentElement.scrollHeight,
|
|
1519
|
+
);
|
|
1520
|
+
});
|
|
1521
|
+
|
|
1522
|
+
// Phase 5 Sprint 80/20 — 5 positions × 300ms au lieu de 10 × 800ms (gain -6.5s/viewport)
|
|
1523
|
+
// Couverture lazy-load préservée par full-page screenshot déjà pris.
|
|
1524
|
+
const MAX_POSITIONS = 5;
|
|
1525
|
+
const WAIT_LAZY = 300;
|
|
1526
|
+
const positions: number[] = [];
|
|
1527
|
+
let currentY = 0;
|
|
1528
|
+
while (currentY < totalHeight && positions.length < MAX_POSITIONS) {
|
|
1529
|
+
positions.push(currentY);
|
|
1530
|
+
currentY += viewportHeight;
|
|
1531
|
+
}
|
|
1532
|
+
|
|
1533
|
+
for (let i = 0; i < positions.length; i++) {
|
|
1534
|
+
const y = positions[i];
|
|
1535
|
+
|
|
1536
|
+
// Scroll to position
|
|
1537
|
+
await page.evaluate((scrollY: number) => {
|
|
1538
|
+
window.scrollTo({ top: scrollY, behavior: 'instant' as ScrollBehavior });
|
|
1539
|
+
}, y);
|
|
1540
|
+
|
|
1541
|
+
// Wait for lazy-loaded content to appear
|
|
1542
|
+
await page.waitForTimeout(WAIT_LAZY);
|
|
1543
|
+
|
|
1544
|
+
// Take screenshot at this scroll position
|
|
1545
|
+
try {
|
|
1546
|
+
await page.screenshot({
|
|
1547
|
+
path: join(outputDir, `scroll-${i}-${viewport}.png`),
|
|
1548
|
+
fullPage: false, // Only the current viewport
|
|
1549
|
+
});
|
|
1550
|
+
} catch {
|
|
1551
|
+
// Skip on failure
|
|
1552
|
+
}
|
|
1553
|
+
}
|
|
1554
|
+
|
|
1555
|
+
// Scroll back to top for subsequent extractions
|
|
1556
|
+
await page.evaluate(() => {
|
|
1557
|
+
window.scrollTo({ top: 0, behavior: 'instant' as ScrollBehavior });
|
|
1558
|
+
});
|
|
1559
|
+
await page.waitForTimeout(500);
|
|
1560
|
+
}
|
|
1561
|
+
|
|
1562
|
+
// ── Main extraction pipeline ─────────────────────────────────────────
|
|
1563
|
+
|
|
1564
|
+
async function extractFromURL(url: string): Promise<void> {
|
|
1565
|
+
const domain = new URL(url).hostname.replace('www.', '');
|
|
1566
|
+
const baseDir = join(process.cwd(), 'extractions', domain);
|
|
1567
|
+
const screenshotDir = join(baseDir, 'screenshots');
|
|
1568
|
+
|
|
1569
|
+
await mkdir(screenshotDir, { recursive: true });
|
|
1570
|
+
|
|
1571
|
+
console.log(`\n🔍 Prism — Extracting: ${url}`);
|
|
1572
|
+
console.log(`📁 Output: ${baseDir}\n`);
|
|
1573
|
+
|
|
1574
|
+
let browser: Browser | null = null;
|
|
1575
|
+
|
|
1576
|
+
try {
|
|
1577
|
+
browser = await chromium.launch({
|
|
1578
|
+
headless: true,
|
|
1579
|
+
args: [
|
|
1580
|
+
'--no-sandbox',
|
|
1581
|
+
'--disable-setuid-sandbox',
|
|
1582
|
+
'--disable-blink-features=AutomationControlled',
|
|
1583
|
+
'--disable-infobars',
|
|
1584
|
+
'--window-size=1440,900',
|
|
1585
|
+
],
|
|
1586
|
+
});
|
|
1587
|
+
|
|
1588
|
+
const results: Record<string, ExtractionResult> = {};
|
|
1589
|
+
|
|
1590
|
+
for (const [vpName, vpSize] of Object.entries(VIEWPORTS)) {
|
|
1591
|
+
console.log(`📐 Viewport: ${vpName} (${vpSize.width}x${vpSize.height})`);
|
|
1592
|
+
|
|
1593
|
+
// sec-ch-ua Client Hints are Chromium-only — Safari/iOS never sends them.
|
|
1594
|
+
// Sending them with an iPhone UA is a detectable bot fingerprint.
|
|
1595
|
+
const mobileUA = 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Mobile/15E148 Safari/604.1';
|
|
1596
|
+
const desktopUA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36';
|
|
1597
|
+
|
|
1598
|
+
let context = await browser.newContext({
|
|
1599
|
+
viewport: vpSize,
|
|
1600
|
+
userAgent: vpName === 'mobile' ? mobileUA : desktopUA,
|
|
1601
|
+
deviceScaleFactor: vpName === 'mobile' ? 3 : 2,
|
|
1602
|
+
locale: 'fr-FR',
|
|
1603
|
+
timezoneId: 'Europe/Paris',
|
|
1604
|
+
extraHTTPHeaders: {
|
|
1605
|
+
'Accept-Language': 'fr-FR,fr;q=0.9,en;q=0.8',
|
|
1606
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
|
1607
|
+
// Client Hints uniquement pour desktop (Chrome) — jamais pour mobile Safari
|
|
1608
|
+
...(vpName !== 'mobile' ? {
|
|
1609
|
+
'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
|
|
1610
|
+
'sec-ch-ua-mobile': '?0',
|
|
1611
|
+
'sec-ch-ua-platform': '"Windows"',
|
|
1612
|
+
} : {}),
|
|
1613
|
+
},
|
|
1614
|
+
});
|
|
1615
|
+
|
|
1616
|
+
let page = await context.newPage();
|
|
1617
|
+
|
|
1618
|
+
// Masquer les indicateurs d'automatisation
|
|
1619
|
+
await page.addInitScript(() => {
|
|
1620
|
+
Object.defineProperty(navigator, 'webdriver', { get: () => false });
|
|
1621
|
+
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3] });
|
|
1622
|
+
(window as any).chrome = { runtime: {} };
|
|
1623
|
+
});
|
|
1624
|
+
|
|
1625
|
+
// Phase 2.1 — Fast page load: skip networkidle, use domcontentloaded + targeted waits
|
|
1626
|
+
// Why: networkidle waits for tracker scripts (GA, Hotjar) that often never settle on tracker-heavy
|
|
1627
|
+
// sites like Stripe/Coinbase, wasting 30s. domcontentloaded + load + conditional lazy-wait is faster
|
|
1628
|
+
// and still captures full computed styles.
|
|
1629
|
+
console.log(' ⏳ Loading page...');
|
|
1630
|
+
try {
|
|
1631
|
+
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 15000 });
|
|
1632
|
+
// Wait for load event (fonts + critical CSS) but don't block on tracker network
|
|
1633
|
+
await page.waitForLoadState('load', { timeout: 5000 }).catch(() => {});
|
|
1634
|
+
} catch {
|
|
1635
|
+
console.log(' ⚠️ Initial load failed, falling back to bare domcontentloaded...');
|
|
1636
|
+
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 10000 });
|
|
1637
|
+
}
|
|
1638
|
+
|
|
1639
|
+
// Conditional lazy-wait: only wait if the page is long enough to suggest lazy-loaded content
|
|
1640
|
+
const isLongPage = await page.evaluate(() =>
|
|
1641
|
+
document.documentElement.scrollHeight > window.innerHeight * 2
|
|
1642
|
+
).catch(() => false);
|
|
1643
|
+
if (isLongPage) await page.waitForTimeout(2000);
|
|
1644
|
+
|
|
1645
|
+
// Dismiss common popups/modals
|
|
1646
|
+
await dismissPopups(page);
|
|
1647
|
+
|
|
1648
|
+
// Phase 5.1.3 — Bot challenge detection (Cloudflare, CAPTCHA, etc.)
|
|
1649
|
+
const botCheck = await detectBotChallenge(page);
|
|
1650
|
+
if (botCheck.blocked) {
|
|
1651
|
+
console.log(` 🤖 Bot challenge: ${botCheck.reason} — stealth retry...`);
|
|
1652
|
+
await page.close().catch(() => {});
|
|
1653
|
+
await context.close().catch(() => {});
|
|
1654
|
+
await browser!.close().catch(() => {});
|
|
1655
|
+
|
|
1656
|
+
// Stealth retry via browser-stealth.ts (playwright-extra + puppeteer-stealth)
|
|
1657
|
+
const { launchBrowser } = await import('./browser-stealth.js');
|
|
1658
|
+
browser = await launchBrowser({ stealth: true });
|
|
1659
|
+
context = await browser.newContext({
|
|
1660
|
+
viewport: vpSize,
|
|
1661
|
+
userAgent: vpName === 'mobile' ? mobileUA : desktopUA,
|
|
1662
|
+
deviceScaleFactor: vpName === 'mobile' ? 3 : 2,
|
|
1663
|
+
locale: 'fr-FR',
|
|
1664
|
+
timezoneId: 'Europe/Paris',
|
|
1665
|
+
extraHTTPHeaders: {
|
|
1666
|
+
'Accept-Language': 'fr-FR,fr;q=0.9,en;q=0.8',
|
|
1667
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
|
1668
|
+
...(vpName !== 'mobile' ? {
|
|
1669
|
+
'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
|
|
1670
|
+
'sec-ch-ua-mobile': '?0',
|
|
1671
|
+
'sec-ch-ua-platform': '"Windows"',
|
|
1672
|
+
} : {}),
|
|
1673
|
+
},
|
|
1674
|
+
});
|
|
1675
|
+
page = await context.newPage();
|
|
1676
|
+
await page.addInitScript(() => {
|
|
1677
|
+
Object.defineProperty(navigator, 'webdriver', { get: () => false });
|
|
1678
|
+
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3] });
|
|
1679
|
+
(window as any).chrome = { runtime: {} };
|
|
1680
|
+
});
|
|
1681
|
+
|
|
1682
|
+
console.log(' ⏳ Stealth retry — loading page...');
|
|
1683
|
+
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
1684
|
+
await page.waitForLoadState('load', { timeout: 10000 }).catch(() => {});
|
|
1685
|
+
await dismissPopups(page);
|
|
1686
|
+
|
|
1687
|
+
const botCheck2 = await detectBotChallenge(page);
|
|
1688
|
+
if (botCheck2.blocked) {
|
|
1689
|
+
throw new Error(`Bot challenge persists after stealth retry: ${botCheck2.reason}`);
|
|
1690
|
+
}
|
|
1691
|
+
console.log(` ✅ Stealth retry succeeded`);
|
|
1692
|
+
}
|
|
1693
|
+
|
|
1694
|
+
// ── v2.9 A.5 — hydrate lazy content before capturing anything ──
|
|
1695
|
+
console.log(' 💧 Hydrating lazy content (full-page scroll + decode)...');
|
|
1696
|
+
await ensureLazyLoaded(page);
|
|
1697
|
+
|
|
1698
|
+
// ── Screenshots ──
|
|
1699
|
+
console.log(' 📸 Taking screenshots...');
|
|
1700
|
+
await takeScreenshots(page, screenshotDir, vpName);
|
|
1701
|
+
|
|
1702
|
+
// ── Scroll-and-screenshot (catches lazy-loaded content) ──
|
|
1703
|
+
console.log(' 📜 Scroll-and-screenshot for lazy-loaded content...');
|
|
1704
|
+
await scrollAndScreenshot(page, screenshotDir, vpName);
|
|
1705
|
+
|
|
1706
|
+
// ── Extract everything ──
|
|
1707
|
+
console.log(' 🎨 Extracting computed styles...');
|
|
1708
|
+
const [
|
|
1709
|
+
elements,
|
|
1710
|
+
cssVars,
|
|
1711
|
+
allColors,
|
|
1712
|
+
fonts,
|
|
1713
|
+
borderRadii,
|
|
1714
|
+
shadows,
|
|
1715
|
+
transitions,
|
|
1716
|
+
sections,
|
|
1717
|
+
images,
|
|
1718
|
+
imageryProfile,
|
|
1719
|
+
widgets,
|
|
1720
|
+
links,
|
|
1721
|
+
componentVariants,
|
|
1722
|
+
fontFaces,
|
|
1723
|
+
mediaBreakpoints,
|
|
1724
|
+
] = await Promise.all([
|
|
1725
|
+
extractComputedStyles(page),
|
|
1726
|
+
extractCSSCustomProperties(page),
|
|
1727
|
+
extractAllColors(page),
|
|
1728
|
+
extractAllFonts(page),
|
|
1729
|
+
extractAllBorderRadii(page),
|
|
1730
|
+
extractAllShadows(page),
|
|
1731
|
+
extractAllTransitions(page),
|
|
1732
|
+
extractSections(page),
|
|
1733
|
+
extractImages(page),
|
|
1734
|
+
extractImageryProfile(page).catch((e) => { console.warn(' ⚠️ imagery profile extraction failed:', (e as Error).message); return null; }),
|
|
1735
|
+
extractWidgets(page).catch((e) => { console.warn(' ⚠️ widgets extraction failed:', (e as Error).message); return null; }),
|
|
1736
|
+
extractLinks(page),
|
|
1737
|
+
extractComponentVariants(page),
|
|
1738
|
+
extractFontFaces(page),
|
|
1739
|
+
extractMediaBreakpoints(page),
|
|
1740
|
+
]);
|
|
1741
|
+
|
|
1742
|
+
// ── Advanced design capture ──
|
|
1743
|
+
const [keyframes, zIndexMap, visualEffects] = await Promise.all([
|
|
1744
|
+
extractKeyframes(page).catch((e) => { console.warn(' ⚠️ keyframes extraction failed:', (e as Error).message); return {}; }),
|
|
1745
|
+
extractZIndexMap(page).catch((e) => { console.warn(' ⚠️ z-index extraction failed:', (e as Error).message); return []; }),
|
|
1746
|
+
vpName === 'desktop' ? extractVisualEffects(page).catch((e) => { console.warn(' ⚠️ visual effects extraction failed:', (e as Error).message); return null; }) : Promise.resolve(null),
|
|
1747
|
+
]);
|
|
1748
|
+
console.log(` 🎬 Advanced: ${Object.keys(keyframes).length} keyframes, ${zIndexMap.length} z-index`);
|
|
1749
|
+
if (visualEffects) console.log(` ✨ Visual effects: ${visualEffects.motionSummary}`);
|
|
1750
|
+
|
|
1751
|
+
// ── Component States (hover/focus) — desktop only, sequential ──
|
|
1752
|
+
let componentStates: Record<string, ComponentStateStyles> = {};
|
|
1753
|
+
if (vpName === 'desktop') {
|
|
1754
|
+
console.log(' 🖱️ Extracting component states (hover/focus)...');
|
|
1755
|
+
componentStates = await extractComponentStates(page);
|
|
1756
|
+
const stateCount = Object.keys(componentStates).length;
|
|
1757
|
+
console.log(` → ${stateCount} component states captured`);
|
|
1758
|
+
}
|
|
1759
|
+
|
|
1760
|
+
// ── OpenType Features ──
|
|
1761
|
+
const openTypeData = await extractOpenTypeFeatures(page);
|
|
1762
|
+
console.log(` 🔤 OpenType features: [${openTypeData.features.join(', ') || 'none'}] | axes: [${openTypeData.axes.join(', ') || 'none'}]`);
|
|
1763
|
+
|
|
1764
|
+
const displaySignature = await extractDisplaySignature(page).catch((e) => { console.error(' ❌ displaySignature ERR:', (e as Error).message); return null; });
|
|
1765
|
+
if (displaySignature) console.log(` 🅰️ Display signature: ${displaySignature.family} ${displaySignature.fontSize} ${displaySignature.isSerif ? 'SERIF' : 'sans'}${displaySignature.isItalic ? ' italic' : ''}`);
|
|
1766
|
+
|
|
1767
|
+
const pageTitle = await page.title();
|
|
1768
|
+
|
|
1769
|
+
results[vpName] = {
|
|
1770
|
+
url,
|
|
1771
|
+
domain,
|
|
1772
|
+
timestamp: new Date().toISOString(),
|
|
1773
|
+
viewport: vpSize,
|
|
1774
|
+
pageTitle,
|
|
1775
|
+
cssCustomProperties: cssVars,
|
|
1776
|
+
elements,
|
|
1777
|
+
sections,
|
|
1778
|
+
allColors,
|
|
1779
|
+
allFontFamilies: fonts.families,
|
|
1780
|
+
allFontSizes: fonts.sizes,
|
|
1781
|
+
allBorderRadii: borderRadii,
|
|
1782
|
+
allShadows: shadows,
|
|
1783
|
+
allTransitions: transitions,
|
|
1784
|
+
images,
|
|
1785
|
+
imageryProfile: imageryProfile || undefined,
|
|
1786
|
+
widgets: widgets || undefined,
|
|
1787
|
+
links,
|
|
1788
|
+
componentVariants,
|
|
1789
|
+
componentStates,
|
|
1790
|
+
fontFaces,
|
|
1791
|
+
mediaBreakpoints,
|
|
1792
|
+
openTypeFeatures: openTypeData.features,
|
|
1793
|
+
variableAxes: openTypeData.axes,
|
|
1794
|
+
displaySignature: displaySignature || undefined,
|
|
1795
|
+
// Advanced capture
|
|
1796
|
+
keyframes,
|
|
1797
|
+
zIndexMap,
|
|
1798
|
+
visualEffects: visualEffects || undefined,
|
|
1799
|
+
};
|
|
1800
|
+
|
|
1801
|
+
const variantCount = Object.values(componentVariants).reduce((sum, v) => sum + v.length, 0);
|
|
1802
|
+
console.log(` ✅ ${vpName} done — ${Object.keys(cssVars).length} CSS vars, ${allColors.length} colors, ${sections.length} sections, ${variantCount} component variants, ${fontFaces.length} font-faces, ${mediaBreakpoints.filter(b => !b.startsWith('@')).length} breakpoints`);
|
|
1803
|
+
|
|
1804
|
+
await context.close();
|
|
1805
|
+
}
|
|
1806
|
+
|
|
1807
|
+
// ── Save results ──
|
|
1808
|
+
console.log('\n💾 Saving extraction data...');
|
|
1809
|
+
|
|
1810
|
+
// Phase 1.3 — add schema version so downstream can detect drift
|
|
1811
|
+
const versionedResults = {
|
|
1812
|
+
...results,
|
|
1813
|
+
version: '2.4.0',
|
|
1814
|
+
};
|
|
1815
|
+
|
|
1816
|
+
await writeFile(
|
|
1817
|
+
join(baseDir, 'raw-css.json'),
|
|
1818
|
+
JSON.stringify(versionedResults, null, 2),
|
|
1819
|
+
);
|
|
1820
|
+
|
|
1821
|
+
// Save a summary for quick reference
|
|
1822
|
+
const desktop = results.desktop;
|
|
1823
|
+
const summary = {
|
|
1824
|
+
url,
|
|
1825
|
+
domain,
|
|
1826
|
+
extractedAt: new Date().toISOString(),
|
|
1827
|
+
pageTitle: desktop.pageTitle,
|
|
1828
|
+
cssCustomPropertiesCount: Object.keys(desktop.cssCustomProperties).length,
|
|
1829
|
+
colorsFound: desktop.allColors.length,
|
|
1830
|
+
fontFamilies: desktop.allFontFamilies,
|
|
1831
|
+
fontSizes: desktop.allFontSizes,
|
|
1832
|
+
borderRadii: desktop.allBorderRadii,
|
|
1833
|
+
shadows: desktop.allShadows,
|
|
1834
|
+
sectionsFound: desktop.sections.length,
|
|
1835
|
+
elementsFound: Object.entries(desktop.elements).filter(([, v]) => v !== null).map(([k]) => k),
|
|
1836
|
+
imagesCount: desktop.images.length,
|
|
1837
|
+
navLinksCount: desktop.links.filter(l => l.isNav).length,
|
|
1838
|
+
componentVariants: Object.fromEntries(
|
|
1839
|
+
Object.entries(desktop.componentVariants).map(([k, v]) => [k, v.length])
|
|
1840
|
+
),
|
|
1841
|
+
fontFaces: desktop.fontFaces.map(f => `${f.family} (${f.weight} ${f.style})`),
|
|
1842
|
+
mediaBreakpoints: desktop.mediaBreakpoints.filter(b => !b.startsWith('@')),
|
|
1843
|
+
};
|
|
1844
|
+
|
|
1845
|
+
await writeFile(
|
|
1846
|
+
join(baseDir, 'extraction-summary.json'),
|
|
1847
|
+
JSON.stringify(summary, null, 2),
|
|
1848
|
+
);
|
|
1849
|
+
|
|
1850
|
+
console.log(`\n✅ Extraction complete for ${domain}`);
|
|
1851
|
+
console.log(` 📁 ${baseDir}/`);
|
|
1852
|
+
console.log(` 📸 Screenshots: ${screenshotDir}/`);
|
|
1853
|
+
console.log(` 🎨 Raw CSS: ${baseDir}/raw-css.json`);
|
|
1854
|
+
console.log(` 📊 Summary: ${baseDir}/extraction-summary.json`);
|
|
1855
|
+
console.log(`\n Next: run 'npm run analyze -- ${domain}' to analyze layout`);
|
|
1856
|
+
|
|
1857
|
+
} catch (err) {
|
|
1858
|
+
console.error('❌ Extraction failed:', err);
|
|
1859
|
+
throw err;
|
|
1860
|
+
} finally {
|
|
1861
|
+
if (browser) await browser.close();
|
|
1862
|
+
}
|
|
1863
|
+
}
|
|
1864
|
+
|
|
1865
|
+
// ── Popup dismissal ──────────────────────────────────────────────────
|
|
1866
|
+
|
|
1867
|
+
async function dismissPopups(page: Page): Promise<void> {
|
|
1868
|
+
// ── Couche 1 : Click accept (déclenche le callback consent du site) ──
|
|
1869
|
+
const clickSelectors = [
|
|
1870
|
+
// Didomi (priorité — CMP le plus courant en FR)
|
|
1871
|
+
'#didomi-notice-agree-button',
|
|
1872
|
+
'[data-testid="didomi-notice-agree-button"]',
|
|
1873
|
+
'button[aria-label*="Accepter"]',
|
|
1874
|
+
'button[aria-label*="Accept all"]',
|
|
1875
|
+
'button[aria-label*="J\'accepte"]',
|
|
1876
|
+
'.didomi-continue-without-agreeing',
|
|
1877
|
+
// OneTrust
|
|
1878
|
+
'#onetrust-accept-btn-handler',
|
|
1879
|
+
'button[data-testid="ot-sdk-btn-allow-all"]',
|
|
1880
|
+
// Cookiebot
|
|
1881
|
+
'#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll',
|
|
1882
|
+
'a#CybotCookiebotDialogBodyLevelButtonAccept',
|
|
1883
|
+
'#CybotCookiebotDialogBodyButtonAccept',
|
|
1884
|
+
'.cm__btn-allow', '.cm__btn-accept',
|
|
1885
|
+
// Axeptio
|
|
1886
|
+
'[data-axeptio-action="accept"]', '.axeptio__btn-accept',
|
|
1887
|
+
// TarteAuCitron
|
|
1888
|
+
'#tarteaucitronPersonalize2', '.tarteaucitronAllow', '.tarteaucitron-accept',
|
|
1889
|
+
// Quantcast
|
|
1890
|
+
'.qc-cmp2-buttons-primary button',
|
|
1891
|
+
// Google CMP / Funding Choices (utilisé par claude.com, anthropic.com, googleblog, etc.)
|
|
1892
|
+
'.fc-button-label',
|
|
1893
|
+
'.fc-cta-consent',
|
|
1894
|
+
'.qc-cmp2-summary-buttons button:nth-of-type(2)',
|
|
1895
|
+
'button[mode="primary"][label*="Accept"]',
|
|
1896
|
+
// Usercentrics
|
|
1897
|
+
'#uc-btn-accept-banner',
|
|
1898
|
+
'button[data-testid="uc-accept-all-button"]',
|
|
1899
|
+
// Pierrot (Shopify)
|
|
1900
|
+
'.pd-banner__button--accept', 'button[data-cookiebanner-action="accept"]',
|
|
1901
|
+
// Native dialog element
|
|
1902
|
+
'dialog button:has-text("Accept")',
|
|
1903
|
+
'dialog button:has-text("Accepter")',
|
|
1904
|
+
// Generic aria-label English/French
|
|
1905
|
+
'button[aria-label*="cookie" i][aria-label*="accept" i]',
|
|
1906
|
+
'button[aria-label*="cookie" i][aria-label*="allow" i]',
|
|
1907
|
+
'button[aria-label*="accepter" i]',
|
|
1908
|
+
// Generic consent
|
|
1909
|
+
'button[class*="cookie"][class*="accept"]',
|
|
1910
|
+
'button[class*="consent"][class*="accept"]',
|
|
1911
|
+
'[class*="cookie"] button[class*="accept"]',
|
|
1912
|
+
'button[class*="accept-all"]',
|
|
1913
|
+
'[class*="consent"] button',
|
|
1914
|
+
'button[class*="cookie"]', 'button[class*="accept"]', 'button[class*="consent"]',
|
|
1915
|
+
// Generic close
|
|
1916
|
+
'[class*="modal"] button[class*="close"]',
|
|
1917
|
+
'[class*="popup"] button[class*="close"]',
|
|
1918
|
+
'[class*="banner"] button[class*="close"]',
|
|
1919
|
+
'[aria-label="Close"]', '[aria-label="Dismiss"]',
|
|
1920
|
+
];
|
|
1921
|
+
|
|
1922
|
+
// ── Helper : essaie de cliquer sur un selector dans un frame donné ──
|
|
1923
|
+
const tryClickInFrame = async (frame: any): Promise<string | null> => {
|
|
1924
|
+
for (const selector of clickSelectors) {
|
|
1925
|
+
try {
|
|
1926
|
+
const btn = frame.locator(selector).first();
|
|
1927
|
+
if (await btn.isVisible({ timeout: 600 })) {
|
|
1928
|
+
await btn.click({ timeout: 1500 });
|
|
1929
|
+
return selector;
|
|
1930
|
+
}
|
|
1931
|
+
} catch {
|
|
1932
|
+
// Continue
|
|
1933
|
+
}
|
|
1934
|
+
}
|
|
1935
|
+
return null;
|
|
1936
|
+
};
|
|
1937
|
+
|
|
1938
|
+
// ── Couche 1a : Main frame ──
|
|
1939
|
+
let clicked = false;
|
|
1940
|
+
const mainHit = await tryClickInFrame(page);
|
|
1941
|
+
if (mainHit) {
|
|
1942
|
+
console.log(` 🖱️ Popup fermé via click (main): ${mainHit}`);
|
|
1943
|
+
clicked = true;
|
|
1944
|
+
await page.waitForTimeout(1500);
|
|
1945
|
+
}
|
|
1946
|
+
|
|
1947
|
+
// ── Couche 1b : Iframes (IAB TCF, Google Funding Choices, etc.) ──
|
|
1948
|
+
if (!clicked) {
|
|
1949
|
+
const frames = page.frames();
|
|
1950
|
+
for (const frame of frames) {
|
|
1951
|
+
if (frame === page.mainFrame()) continue;
|
|
1952
|
+
try {
|
|
1953
|
+
const url = frame.url();
|
|
1954
|
+
// Skip about:blank et frames sans URL pertinente
|
|
1955
|
+
if (!url || url === 'about:blank' || url.startsWith('data:')) continue;
|
|
1956
|
+
const hit = await tryClickInFrame(frame);
|
|
1957
|
+
if (hit) {
|
|
1958
|
+
console.log(` 🖱️ Popup fermé via click (iframe ${url.slice(0, 60)}): ${hit}`);
|
|
1959
|
+
clicked = true;
|
|
1960
|
+
await page.waitForTimeout(1500);
|
|
1961
|
+
break;
|
|
1962
|
+
}
|
|
1963
|
+
} catch {
|
|
1964
|
+
// Continue
|
|
1965
|
+
}
|
|
1966
|
+
}
|
|
1967
|
+
}
|
|
1968
|
+
|
|
1969
|
+
// ── Couche 2 : DOM cleanup (supprimer les overlays restants) ─────
|
|
1970
|
+
const removed = await page.evaluate(() => {
|
|
1971
|
+
let count = 0;
|
|
1972
|
+
const cmpSelectors = [
|
|
1973
|
+
'#didomi-host', '.didomi-popup-container', '#didomi-consent-popup',
|
|
1974
|
+
'[class*="didomi-popup"]', '[class*="didomi-notice"]',
|
|
1975
|
+
'#onetrust-banner-sdk', '#onetrust-consent-sdk', '.onetrust-pc-dark-filter',
|
|
1976
|
+
'#CookieConsent', '.CookieConsent', '#CybotCookiebotDialog',
|
|
1977
|
+
'.axeptio_container', '[class*="axeptio"]',
|
|
1978
|
+
'.tarteaucitronRoot', '#tarteaucitron', '#tarteaucitronRoot',
|
|
1979
|
+
'.qc-cmp2-container', '[class*="qc-cmp"]',
|
|
1980
|
+
'.pd-cookie-banner-window', '[class*="pd-banner"]',
|
|
1981
|
+
'[id*="cookie-banner"]', '[id*="consent-banner"]',
|
|
1982
|
+
'[class*="cookie-banner"]', '[class*="consent-banner"]',
|
|
1983
|
+
'[class*="cookie-notice"]', '[class*="consent-notice"]',
|
|
1984
|
+
'[class*="gdpr"]', '[id*="gdpr"]',
|
|
1985
|
+
];
|
|
1986
|
+
for (const sel of cmpSelectors) {
|
|
1987
|
+
document.querySelectorAll(sel).forEach(el => { el.remove(); count++; });
|
|
1988
|
+
}
|
|
1989
|
+
|
|
1990
|
+
// Supprimer les overlays/backdrops à z-index élevé
|
|
1991
|
+
document.querySelectorAll('[style*="position: fixed"], [style*="position:fixed"]').forEach(el => {
|
|
1992
|
+
const cs = window.getComputedStyle(el);
|
|
1993
|
+
const z = parseInt(cs.zIndex || '0');
|
|
1994
|
+
if (z > 9000 && (cs.backgroundColor.includes('rgba') || cs.opacity < '1')) {
|
|
1995
|
+
(el as HTMLElement).style.display = 'none';
|
|
1996
|
+
count++;
|
|
1997
|
+
}
|
|
1998
|
+
});
|
|
1999
|
+
|
|
2000
|
+
// Débloquer le scroll sur body
|
|
2001
|
+
if (document.body) {
|
|
2002
|
+
document.body.classList.remove(
|
|
2003
|
+
'didomi-popup-open', 'modal-open', 'no-scroll', 'overflow-hidden',
|
|
2004
|
+
'cookie-open', 'consent-open', 'noscroll'
|
|
2005
|
+
);
|
|
2006
|
+
document.body.style.overflow = '';
|
|
2007
|
+
}
|
|
2008
|
+
document.documentElement.style.overflow = '';
|
|
2009
|
+
return count;
|
|
2010
|
+
});
|
|
2011
|
+
|
|
2012
|
+
if (removed > 0) {
|
|
2013
|
+
console.log(` 🧹 ${removed} overlay(s) nettoyé(s)`);
|
|
2014
|
+
}
|
|
2015
|
+
}
|
|
2016
|
+
|
|
2017
|
+
// ── CLI ──────────────────────────────────────────────────────────────
|
|
2018
|
+
|
|
2019
|
+
const url = process.argv[2];
|
|
2020
|
+
|
|
2021
|
+
if (!url) {
|
|
2022
|
+
console.error('Usage: npm run extract -- <URL>');
|
|
2023
|
+
console.error('Example: npm run extract -- https://linear.app');
|
|
2024
|
+
process.exit(1);
|
|
2025
|
+
}
|
|
2026
|
+
|
|
2027
|
+
// Validate URL
|
|
2028
|
+
try {
|
|
2029
|
+
new URL(url);
|
|
2030
|
+
} catch {
|
|
2031
|
+
console.error(`Invalid URL: ${url}`);
|
|
2032
|
+
process.exit(1);
|
|
2033
|
+
}
|
|
2034
|
+
|
|
2035
|
+
extractFromURL(url).catch((err) => {
|
|
2036
|
+
console.error('Fatal error:', err);
|
|
2037
|
+
process.exit(1);
|
|
2038
|
+
});
|