prism-design 2.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/CHANGELOG.md +292 -0
  2. package/LICENSE +21 -0
  3. package/README.md +203 -0
  4. package/bin/clone-architect.mjs +476 -0
  5. package/bin/prism.mjs +467 -0
  6. package/catalog/index.json +1155 -0
  7. package/extractions/airbnb.com/DESIGN.md +1068 -0
  8. package/extractions/airbnb.com/tokens.json +507 -0
  9. package/extractions/attio.com/DESIGN.md +1295 -0
  10. package/extractions/attio.com/tokens.json +438 -0
  11. package/extractions/auroxdashboard.com/DESIGN.md +724 -0
  12. package/extractions/auroxdashboard.com/tokens.json +195 -0
  13. package/extractions/careerexplorer.com/DESIGN.md +1178 -0
  14. package/extractions/careerexplorer.com/tokens.json +141 -0
  15. package/extractions/chance.co/DESIGN.md +1209 -0
  16. package/extractions/chance.co/tokens.json +160 -0
  17. package/extractions/choisis-ton-avenir.com/DESIGN.md +1265 -0
  18. package/extractions/choisis-ton-avenir.com/tokens.json +227 -0
  19. package/extractions/example.com/DESIGN.md +436 -0
  20. package/extractions/example.com/tokens.json +91 -0
  21. package/extractions/getdesign.md/DESIGN.md +1009 -0
  22. package/extractions/getdesign.md/tokens.json +219 -0
  23. package/extractions/github.com/DESIGN.md +1130 -0
  24. package/extractions/github.com/tokens.json +2092 -0
  25. package/extractions/hello-charly.com/DESIGN.md +1146 -0
  26. package/extractions/hello-charly.com/tokens.json +322 -0
  27. package/extractions/hyperliquid.xyz/DESIGN.md +779 -0
  28. package/extractions/hyperliquid.xyz/tokens.json +598 -0
  29. package/extractions/instagram.com/DESIGN.md +996 -0
  30. package/extractions/instagram.com/tokens.json +1240 -0
  31. package/extractions/jobirl.com/DESIGN.md +1160 -0
  32. package/extractions/jobirl.com/tokens.json +139 -0
  33. package/extractions/life360.com/DESIGN.md +1133 -0
  34. package/extractions/life360.com/tokens.json +491 -0
  35. package/extractions/lifesum.com/DESIGN.md +965 -0
  36. package/extractions/lifesum.com/tokens.json +170 -0
  37. package/extractions/linear.app/DESIGN.md +1301 -0
  38. package/extractions/linear.app/tokens.json +732 -0
  39. package/extractions/mavoie.org/DESIGN.md +1148 -0
  40. package/extractions/mavoie.org/tokens.json +128 -0
  41. package/extractions/miro.com/DESIGN.md +1237 -0
  42. package/extractions/miro.com/tokens.json +401 -0
  43. package/extractions/notion.so/DESIGN.md +1319 -0
  44. package/extractions/notion.so/tokens.json +906 -0
  45. package/extractions/onetonline.org/DESIGN.md +909 -0
  46. package/extractions/onetonline.org/tokens.json +280 -0
  47. package/extractions/posthog.com/DESIGN.md +1024 -0
  48. package/extractions/posthog.com/tokens.json +197 -0
  49. package/extractions/revolut.com/DESIGN.md +1080 -0
  50. package/extractions/revolut.com/tokens.json +401 -0
  51. package/extractions/stripe.com/DESIGN.md +1272 -0
  52. package/extractions/stripe.com/tokens.json +794 -0
  53. package/extractions/switchcollective.com/DESIGN.md +1040 -0
  54. package/extractions/switchcollective.com/tokens.json +98 -0
  55. package/extractions/truity.com/DESIGN.md +970 -0
  56. package/extractions/truity.com/tokens.json +166 -0
  57. package/extractions/uniquekicks.be/DESIGN.md +1171 -0
  58. package/extractions/uniquekicks.be/tokens.json +237 -0
  59. package/package.json +122 -0
  60. package/scripts/analyze.ts +281 -0
  61. package/scripts/bank-register.ts +379 -0
  62. package/scripts/bank.ts +374 -0
  63. package/scripts/browser-stealth.ts +189 -0
  64. package/scripts/clone.ts +198 -0
  65. package/scripts/compare-vs-gd-final.ts +273 -0
  66. package/scripts/compare-vs-gd.ts +269 -0
  67. package/scripts/compare.ts +405 -0
  68. package/scripts/deploy-site.ts +181 -0
  69. package/scripts/diff-snapshots.ts +340 -0
  70. package/scripts/enrich-catalog.ts +212 -0
  71. package/scripts/extract.ts +2038 -0
  72. package/scripts/extractors/advanced.ts +524 -0
  73. package/scripts/extractors/widgets.ts +711 -0
  74. package/scripts/generate-design-md.ts +5775 -0
  75. package/scripts/generate-final-pdf.ts +274 -0
  76. package/scripts/generate-og-image.ts +87 -0
  77. package/scripts/generate-showcase.ts +1588 -0
  78. package/scripts/generate-site.ts +847 -0
  79. package/scripts/mass-extract.sh +91 -0
  80. package/scripts/post-process-all.sh +55 -0
  81. package/scripts/regen-catalog.ts +203 -0
  82. package/scripts/shared/cache.ts +149 -0
  83. package/scripts/shared/css-helpers.ts +263 -0
  84. package/scripts/shared/logger.ts +57 -0
  85. package/scripts/shared/named-colors.ts +355 -0
  86. package/scripts/shared/types.ts +220 -0
  87. package/scripts/sync-catalog.ts +105 -0
  88. package/scripts/tokenize.ts +988 -0
  89. package/templates/layout-template.md +52 -0
  90. package/templates/tokens-template.json +34 -0
@@ -0,0 +1,2038 @@
1
+ /**
2
+ * Prism — Script d'extraction Playwright
3
+ *
4
+ * Extrait le design RÉEL d'un site via getComputedStyle() :
5
+ * - Screenshots multi-viewport (desktop 1440px + mobile 390px)
6
+ * - CSS computed sur tous les éléments clés
7
+ * - CSS custom properties (--color-*, --font-*, etc.)
8
+ * - Couleurs dominantes, typo, spacing, layout
9
+ * - Structure DOM (sections, composants, navigation)
10
+ */
11
+
12
+ import { chromium, type Page, type Browser } from 'playwright';
13
+ import { mkdir, writeFile } from 'fs/promises';
14
+ import { join } from 'path';
15
+
16
+ // ── Types ────────────────────────────────────────────────────────────
17
+
18
+ interface ComputedStyles {
19
+ backgroundColor: string;
20
+ backgroundImage: string;
21
+ color: string;
22
+ fontFamily: string;
23
+ fontSize: string;
24
+ fontWeight: string;
25
+ lineHeight: string;
26
+ letterSpacing: string;
27
+ textAlign: string;
28
+ padding: string;
29
+ margin: string;
30
+ borderRadius: string;
31
+ border: string;
32
+ boxShadow: string;
33
+ width: string;
34
+ height: string;
35
+ minHeight: string;
36
+ maxWidth: string;
37
+ display: string;
38
+ gap: string;
39
+ gridTemplateColumns: string;
40
+ flexDirection: string;
41
+ alignItems: string;
42
+ justifyContent: string;
43
+ position: string;
44
+ overflow: string;
45
+ transition: string;
46
+ opacity: string;
47
+ textTransform: string;
48
+ textDecoration: string;
49
+ fontFeatureSettings: string;
50
+ fontVariationSettings: string;
51
+ }
52
+
53
+ interface ElementExtraction {
54
+ selector: string;
55
+ tag: string;
56
+ classes: string[];
57
+ text: string;
58
+ styles: ComputedStyles;
59
+ children: number;
60
+ rect: { x: number; y: number; width: number; height: number };
61
+ }
62
+
63
+ interface SectionExtraction {
64
+ index: number;
65
+ tag: string;
66
+ classes: string[];
67
+ role: string;
68
+ estimatedPurpose: string;
69
+ rect: { x: number; y: number; width: number; height: number };
70
+ styles: Partial<ComputedStyles> | Record<string, string>;
71
+ childCount: number;
72
+ bgTreatment?: string;
73
+ isDark?: boolean;
74
+ aboveFold?: boolean;
75
+ isFullBleed?: boolean;
76
+ imgRatio?: number;
77
+ maxHeadingPx?: number;
78
+ gridCols?: number;
79
+ hasAnimation?: boolean;
80
+ textLen?: number;
81
+ vPad?: number;
82
+ hasChart?: boolean;
83
+ }
84
+
85
+ interface ComponentVariant {
86
+ tag: string;
87
+ classes: string[];
88
+ text: string;
89
+ styles: ComputedStyles;
90
+ rect: { x: number; y: number; width: number; height: number };
91
+ }
92
+
93
+ interface FontFaceDeclaration {
94
+ family: string;
95
+ src: string;
96
+ weight: string;
97
+ style: string;
98
+ display: string;
99
+ }
100
+
101
+ interface ExtractionResult {
102
+ url: string;
103
+ domain: string;
104
+ timestamp: string;
105
+ viewport: { width: number; height: number };
106
+ pageTitle: string;
107
+ cssCustomProperties: Record<string, string>;
108
+ elements: Record<string, ElementExtraction | null>;
109
+ sections: SectionExtraction[];
110
+ allColors: string[];
111
+ allFontFamilies: string[];
112
+ allFontSizes: string[];
113
+ allBorderRadii: string[];
114
+ allShadows: string[];
115
+ allTransitions: string[];
116
+ images: { src: string; alt: string; width: number; height: number }[];
117
+ widgets?: WidgetExtraction;
118
+ imageryProfile?: {
119
+ ogImage: string | null;
120
+ ogImageWidth: number | null;
121
+ ogImageHeight: number | null;
122
+ twitterImage: string | null;
123
+ heroImage: { src: string; alt: string; width: number; height: number; aspectRatio: number } | null;
124
+ formats: { png: number; jpg: number; webp: number; svg: number; gif: number; other: number };
125
+ totalImages: number;
126
+ totalAboveFold: number;
127
+ aspectRatioBuckets: { landscape: number; portrait: number; square: number; ultrawide: number };
128
+ illustrationHeavy: boolean;
129
+ photoHeavy: boolean;
130
+ avgImageSize: { width: number; height: number };
131
+ decorativePatterns?: {
132
+ multiStopGradients: number;
133
+ radialGradients: number;
134
+ largeSvgShapes: number;
135
+ backgroundImagePatterns: number;
136
+ hasNoise: boolean;
137
+ hasGlassmorphism: boolean;
138
+ };
139
+ };
140
+ links: { href: string; text: string; isNav: boolean }[];
141
+ componentVariants: Record<string, ComponentVariant[]>;
142
+ componentStates: Record<string, ComponentStateStyles>;
143
+ fontFaces: FontFaceDeclaration[];
144
+ mediaBreakpoints: string[];
145
+ openTypeFeatures: string[];
146
+ variableAxes: string[];
147
+ displaySignature?: { family: string; fontSize: string; fontWeight: string; isSerif: boolean; isItalic: boolean; sample: string; secondary?: { family: string; fontSize: string; fontWeight: string; isSerif: boolean; isItalic: boolean; sample: string } };
148
+ // Phase 5 Sprint 80/20 — advanced capture (kept: has downstream consumers)
149
+ keyframes?: Record<string, Record<string, Record<string, string>>>;
150
+ zIndexMap?: Array<{ selector: string; z: number; stackingRoot: string }>;
151
+ visualEffects?: import('./extractors/advanced.js').VisualEffects;
152
+ // RFC C/D/F/A/B fields removed in v2.4 (Phase 1.2) — zero downstream consumers detected:
153
+ // transform3DMap, containerQueries, containerTypes, gridLayouts, responsiveSnapshot, pseudoElements
154
+ // If needed, re-add with explicit consumer; YAGNI removed speculative extraction (saves ~50KB/site, ~10s extract time).
155
+ }
156
+
157
+ // ── Config ───────────────────────────────────────────────────────────
158
+
159
+ const VIEWPORTS = {
160
+ desktop: { width: 1440, height: 900 },
161
+ mobile: { width: 390, height: 844 },
162
+ } as const;
163
+
164
+ // Phase 5.1.1 — Stricter selectors. AVANT: [class*="header"] matchait <body class="with-new-header">
165
+ // → headerHeight = 12802px (toute la page) sur Attio, Cursor, Airbnb.
166
+ // APRÈS: tag-first selectors, plus ARIA roles, plus class-strict.
167
+ const KEY_SELECTORS: Record<string, string> = {
168
+ body: 'body',
169
+ header: 'header, [role="banner"], [data-testid*="header"]:not(body):not(html)',
170
+ nav: 'nav, [role="navigation"]',
171
+ main: 'main, [role="main"]',
172
+ sidebar: 'aside, [role="complementary"]',
173
+ footer: 'footer, [role="contentinfo"]',
174
+ hero: 'section[class*="hero" i]:not(body), [class*="hero-section" i]:not(body), [data-section="hero"]',
175
+ card: 'article, [class*="card" i]:not(body):not(html):not(main):not(section)',
176
+ button: 'button, [role="button"]:not(body):not(html)',
177
+ input: 'input[type="text"], input[type="search"], input[type="email"], textarea',
178
+ heading: 'h1',
179
+ subheading: 'h2',
180
+ link: 'a:not([class*="btn"]):not([class*="button"])',
181
+ badge: '[class*="badge" i]:not(body), [class*="tag" i]:not(body), [class*="chip" i]:not(body)',
182
+ modal: '[class*="modal" i]:not(body), [role="dialog"]',
183
+ dropdown: '[role="menu"], [class*="dropdown" i]:not(body)',
184
+ avatar: 'img[class*="avatar" i], img[class*="profile" i]',
185
+ logo: 'header a img, header a svg, nav a img, nav a svg, [class*="logo" i]:not(body):not(html)',
186
+ };
187
+
188
+ // ── Extraction functions ─────────────────────────────────────────────
189
+
190
+ async function extractComputedStyles(page: Page): Promise<Record<string, ElementExtraction | null>> {
191
+ return page.evaluate((selectors: Record<string, string>) => {
192
+ const result: Record<string, any> = {};
193
+ for (const [name, selector] of Object.entries(selectors)) {
194
+ // For heading selectors (h1/h2), prefer the first VISIBLE element in DOM
195
+ // (SPAs often have a hidden SEO h1 before the visible one)
196
+ let el: Element | null = null;
197
+ const isHeading = /^h[1-6]$/.test(selector.trim());
198
+ if (isHeading) {
199
+ const candidates = Array.from(document.querySelectorAll(selector));
200
+ el = candidates.find(c => {
201
+ const r = c.getBoundingClientRect();
202
+ const cs = getComputedStyle(c);
203
+ return r.width > 0 && r.height > 0 &&
204
+ cs.display !== 'none' && cs.visibility !== 'hidden' && cs.opacity !== '0';
205
+ }) || candidates[0] || null;
206
+ // SPA fallback: no h1 at all → try ARIA heading roles, then largest-font text above fold
207
+ if (!el && name === 'heading') {
208
+ el = document.querySelector('[role="heading"][aria-level="1"]') ||
209
+ document.querySelector('[aria-level="1"]') || null;
210
+ if (!el) {
211
+ const vh = window.innerHeight;
212
+ let bestEl: Element | null = null;
213
+ let bestSize = 31;
214
+ document.querySelectorAll('div,span,p,strong').forEach((c: Element) => {
215
+ const r = c.getBoundingClientRect();
216
+ if (r.top > vh * 1.5 || r.bottom < 0 || (c as HTMLElement).children.length > 2) return;
217
+ const fs = parseFloat(getComputedStyle(c).fontSize);
218
+ if (fs > bestSize) { bestEl = c; bestSize = fs; }
219
+ });
220
+ el = bestEl;
221
+ }
222
+ }
223
+ } else {
224
+ el = document.querySelector(selector);
225
+ }
226
+ if (!el) { result[name] = null; continue; }
227
+ const cs = getComputedStyle(el);
228
+ const rect = el.getBoundingClientRect();
229
+ result[name] = {
230
+ tag: el.tagName.toLowerCase(),
231
+ classes: Array.from(el.classList),
232
+ text: (el as HTMLElement).innerText?.slice(0, 200) || '',
233
+ ariaLabel: (el as HTMLElement).getAttribute('aria-label') || undefined,
234
+ dataTestId: (el as HTMLElement).getAttribute('data-testid') || undefined,
235
+ role: (el as HTMLElement).getAttribute('role') || undefined,
236
+ children: el.children.length,
237
+ rect: { x: Math.round(rect.x), y: Math.round(rect.y), width: Math.round(rect.width), height: Math.round(rect.height) },
238
+ styles: {
239
+ backgroundColor: cs.backgroundColor,
240
+ backgroundImage: cs.backgroundImage,
241
+ color: cs.color,
242
+ fontFamily: cs.fontFamily,
243
+ fontSize: cs.fontSize,
244
+ fontWeight: cs.fontWeight,
245
+ lineHeight: cs.lineHeight,
246
+ letterSpacing: cs.letterSpacing,
247
+ textAlign: cs.textAlign,
248
+ padding: cs.padding,
249
+ margin: cs.margin,
250
+ borderRadius: cs.borderRadius,
251
+ border: cs.border,
252
+ boxShadow: cs.boxShadow,
253
+ width: cs.width,
254
+ height: cs.height,
255
+ minHeight: cs.minHeight,
256
+ maxWidth: cs.maxWidth,
257
+ display: cs.display,
258
+ gap: cs.gap,
259
+ gridTemplateColumns: cs.gridTemplateColumns,
260
+ flexDirection: cs.flexDirection,
261
+ alignItems: cs.alignItems,
262
+ justifyContent: cs.justifyContent,
263
+ position: cs.position,
264
+ overflow: cs.overflow,
265
+ transition: cs.transition,
266
+ opacity: cs.opacity,
267
+ textTransform: cs.textTransform,
268
+ textDecoration: cs.textDecoration,
269
+ fontFeatureSettings: cs.fontFeatureSettings,
270
+ fontVariationSettings: (cs as any).fontVariationSettings || 'normal',
271
+ },
272
+ };
273
+ }
274
+ return result;
275
+ }, KEY_SELECTORS);
276
+ }
277
+
278
+ async function extractCSSCustomProperties(page: Page): Promise<Record<string, string>> {
279
+ return page.evaluate(() => {
280
+ const vars: Record<string, string> = {};
281
+ // From :root / html
282
+ const rootStyles = getComputedStyle(document.documentElement);
283
+ for (let i = 0; i < rootStyles.length; i++) {
284
+ const prop = rootStyles[i];
285
+ if (prop.startsWith('--')) {
286
+ vars[prop] = rootStyles.getPropertyValue(prop).trim();
287
+ }
288
+ }
289
+ // v4-V1-T5: Also scan body + main sections for scoped CSS vars
290
+ // (Tailwind/shadcn often define --vars on .dark, body, or section scopes)
291
+ const scopedRoots = [
292
+ document.body,
293
+ document.querySelector('main'),
294
+ document.querySelector('[data-theme]'),
295
+ document.querySelector('.dark, [class*="dark"]'),
296
+ document.querySelector('header, nav'),
297
+ ].filter(Boolean) as HTMLElement[];
298
+ scopedRoots.forEach(el => {
299
+ const sc = getComputedStyle(el);
300
+ for (let i = 0; i < sc.length; i++) {
301
+ const prop = sc[i];
302
+ if (prop.startsWith('--') && !(prop in vars)) {
303
+ vars[prop] = sc.getPropertyValue(prop).trim();
304
+ }
305
+ }
306
+ });
307
+ // Also check stylesheets for CSS variables (extended selectors)
308
+ try {
309
+ for (const sheet of document.styleSheets) {
310
+ try {
311
+ for (const rule of sheet.cssRules) {
312
+ if (rule instanceof CSSStyleRule && (
313
+ rule.selectorText === ':root' ||
314
+ rule.selectorText === 'html' ||
315
+ rule.selectorText === 'body' ||
316
+ rule.selectorText === '*' ||
317
+ /\.dark\b/.test(rule.selectorText) ||
318
+ /\[data-theme/.test(rule.selectorText)
319
+ )) {
320
+ for (let i = 0; i < rule.style.length; i++) {
321
+ const prop = rule.style[i];
322
+ if (prop.startsWith('--') && !(prop in vars)) {
323
+ vars[prop] = rule.style.getPropertyValue(prop).trim();
324
+ }
325
+ }
326
+ }
327
+ }
328
+ } catch { /* cross-origin stylesheet, skip */ }
329
+ }
330
+ } catch { /* no stylesheets accessible */ }
331
+ return vars;
332
+ });
333
+ }
334
+
335
+ async function extractAllColors(page: Page): Promise<string[]> {
336
+ return page.evaluate(() => {
337
+ const colorSet = new Set<string>();
338
+ const MAX = 3000;
339
+ const all = Array.from(document.querySelectorAll('*'));
340
+ // Prioritize visible elements, fill remaining with rest — up to MAX
341
+ const visible = all.filter(el => {
342
+ const r = (el as HTMLElement).getBoundingClientRect();
343
+ return r.width > 0 && r.height > 0;
344
+ });
345
+ const sample = visible.length >= MAX
346
+ ? visible.slice(0, MAX)
347
+ : [...visible, ...all.filter(el => !visible.includes(el))].slice(0, MAX);
348
+
349
+ const props = ['backgroundColor', 'color', 'borderColor', 'borderTopColor', 'outlineColor', 'caretColor', 'columnRuleColor', 'fill', 'stroke'] as const;
350
+
351
+ // Phase 5.1.1 — Inline multi-value parsing (avoids tsx __name emission inside page.evaluate)
352
+ // borderColor can be "rgb(A) rgb(B) rgb(C) rgb(D)" when sides differ. If mixed, drop entirely.
353
+ const COLOR_REGEX = /rgba?\([^)]+\)|#[0-9a-fA-F]{3,8}/g;
354
+
355
+ sample.forEach(el => {
356
+ const cs = getComputedStyle(el);
357
+ props.forEach(prop => {
358
+ const val = cs[prop as keyof CSSStyleDeclaration] as string;
359
+ if (!val || val === 'rgba(0, 0, 0, 0)' || val === 'transparent') return;
360
+ // Parse multi-color inline
361
+ const matches = val.match(COLOR_REGEX) || [];
362
+ if (matches.length === 0) {
363
+ if (val.startsWith('#') || val.startsWith('rgb')) colorSet.add(val);
364
+ } else if (matches.length === 1) {
365
+ colorSet.add(matches[0]);
366
+ } else {
367
+ const uniq = Array.from(new Set(matches));
368
+ if (uniq.length === 1) colorSet.add(uniq[0]);
369
+ // Mixed-side borderColor → drop (no canonical color)
370
+ }
371
+ });
372
+
373
+ ['::before', '::after'].forEach(pseudo => {
374
+ const pcs = getComputedStyle(el, pseudo);
375
+ if (pcs.content && pcs.content !== 'none' && pcs.content !== 'normal') {
376
+ props.forEach(prop => {
377
+ const val = pcs[prop as keyof CSSStyleDeclaration] as string;
378
+ if (!val || val === 'rgba(0, 0, 0, 0)' || val === 'transparent') return;
379
+ const matches = val.match(COLOR_REGEX) || [];
380
+ if (matches.length === 0) {
381
+ if (val.startsWith('#') || val.startsWith('rgb')) colorSet.add(val);
382
+ } else if (matches.length === 1) {
383
+ colorSet.add(matches[0]);
384
+ } else {
385
+ const uniq = Array.from(new Set(matches));
386
+ if (uniq.length === 1) colorSet.add(uniq[0]);
387
+ }
388
+ });
389
+ }
390
+ });
391
+ });
392
+ return [...colorSet];
393
+ });
394
+ }
395
+
396
+ /**
397
+ * Phase 5.1.3 — Detect anti-bot challenge pages BEFORE extraction.
398
+ * Cloudflare, captchas, "Just a moment", and similar interstitials would otherwise
399
+ * be extracted as if they were the real site, producing nonsense tokens.
400
+ */
401
+ async function detectBotChallenge(page: Page): Promise<{ blocked: boolean; reason?: string }> {
402
+ return page.evaluate(() => {
403
+ const title = (document.title || '').toLowerCase();
404
+ const bodyText = (document.body?.textContent || '').slice(0, 500).toLowerCase();
405
+ const html = (document.documentElement?.outerHTML || '').slice(0, 2000).toLowerCase();
406
+
407
+ const SIGNATURES = [
408
+ { pattern: /just a moment\.\.\.|checking your browser/i, reason: 'Cloudflare interstitial' },
409
+ { pattern: /verify you are human|are you a robot/i, reason: 'Human verification challenge' },
410
+ { pattern: /captcha|recaptcha|hcaptcha/i, reason: 'CAPTCHA detected' },
411
+ { pattern: /access denied|forbidden|403/i, reason: 'Access denied (403)' },
412
+ { pattern: /enable javascript and cookies|please enable cookies/i, reason: 'JS/cookies required gate' },
413
+ { pattern: /ddos protection|protection by/i, reason: 'DDoS protection page' },
414
+ ];
415
+
416
+ for (const sig of SIGNATURES) {
417
+ if (sig.pattern.test(title) || sig.pattern.test(bodyText) || sig.pattern.test(html.slice(0, 1000))) {
418
+ return { blocked: true, reason: sig.reason };
419
+ }
420
+ }
421
+
422
+ // Heuristic: page with <100 chars of visible text AND <5 elements = suspicious
423
+ const visibleText = (document.body?.innerText || '').trim();
424
+ const elementCount = document.body?.children?.length || 0;
425
+ if (visibleText.length < 100 && elementCount < 5) {
426
+ return { blocked: true, reason: `Suspicious empty page (${visibleText.length} chars, ${elementCount} elements)` };
427
+ }
428
+
429
+ return { blocked: false };
430
+ });
431
+ }
432
+
433
+ async function extractAllFonts(page: Page): Promise<{ families: string[]; sizes: string[] }> {
434
+ return page.evaluate(() => {
435
+ const families = new Set<string>();
436
+ const sizes = new Set<string>();
437
+ const elements = document.querySelectorAll('*');
438
+ const sample = Array.from(elements).slice(0, 3000);
439
+ sample.forEach(el => {
440
+ const cs = getComputedStyle(el);
441
+ families.add(cs.fontFamily);
442
+ sizes.add(cs.fontSize);
443
+ });
444
+ return { families: [...families], sizes: [...sizes] };
445
+ });
446
+ }
447
+
448
+ async function extractAllBorderRadii(page: Page): Promise<string[]> {
449
+ return page.evaluate(() => {
450
+ const radii = new Set<string>();
451
+ const elements = document.querySelectorAll('*');
452
+ const sample = Array.from(elements).slice(0, 3000);
453
+ sample.forEach(el => {
454
+ const cs = getComputedStyle(el);
455
+ if (cs.borderRadius && cs.borderRadius !== '0px') {
456
+ radii.add(cs.borderRadius);
457
+ }
458
+ });
459
+ return [...radii];
460
+ });
461
+ }
462
+
463
+ async function extractAllShadows(page: Page): Promise<string[]> {
464
+ return page.evaluate(() => {
465
+ const shadows = new Set<string>();
466
+ const elements = document.querySelectorAll('*');
467
+ const sample = Array.from(elements).slice(0, 3000);
468
+ sample.forEach(el => {
469
+ const cs = getComputedStyle(el);
470
+ if (cs.boxShadow && cs.boxShadow !== 'none') {
471
+ shadows.add(cs.boxShadow);
472
+ }
473
+ });
474
+ return [...shadows];
475
+ });
476
+ }
477
+
478
+ async function extractAllTransitions(page: Page): Promise<string[]> {
479
+ return page.evaluate(() => {
480
+ const transitions = new Set<string>();
481
+ const elements = document.querySelectorAll('*');
482
+ const sample = Array.from(elements).slice(0, 1000);
483
+ sample.forEach(el => {
484
+ const cs = getComputedStyle(el);
485
+ if (cs.transition && cs.transition !== 'all 0s ease 0s' && cs.transition !== 'none 0s ease 0s') {
486
+ transitions.add(cs.transition);
487
+ }
488
+ });
489
+ return [...transitions];
490
+ });
491
+ }
492
+
493
+ // ── Component Variants (all unique styles per component type) ───────
494
+
495
+ async function extractComponentVariants(page: Page): Promise<Record<string, ComponentVariant[]>> {
496
+ return page.evaluate(() => {
497
+ const variantSelectors: Record<string, string> = {
498
+ // ── Core interactive ──
499
+ // v4-V1-T1: Extended button selector covers Tailwind utility CSS + ARIA + handlers
500
+ // (sites without semantic .btn class like utility-first frameworks now detected)
501
+ buttons: 'button, [class*="btn"], [role="button"], a[class*="button"], input[type="submit"], input[type="button"], a[class*="bg-"][href], a[class*="-cta"], button[class*="bg-"], [onclick]:not(div):not(span):not(li), [data-action="button"], a[aria-label][href][class*="px-"]',
502
+ inputs: 'input, textarea, select',
503
+ searchBar: '[role="search"], [class*="search-bar"], [class*="searchbar"], [class*="search-form"]',
504
+ // ── Content blocks ──
505
+ cards: '[class*="card"], article, [class*="tile"]',
506
+ badges: '[class*="badge"], [class*="tag"], [class*="chip"], [class*="label"]:not(label)',
507
+ statusBadge: '[class*="status"], [class*="pill"], [class*="indicator"], [class*="dot"][class*="color"], [data-status]',
508
+ // ── Navigation & structure ──
509
+ navLinks: 'nav a, [class*="nav"] a, [class*="menu"] a, [role="navigation"] a, header a',
510
+ tabs: '[role="tablist"], [role="tab"], [class*="tabs"], [class*="tab-bar"], [class*="tab-nav"]',
511
+ footerLinks: 'footer a, [role="contentinfo"] a',
512
+ // ── Marketing sections ──
513
+ pricingCard: '[class*="pricing"], [class*="price-card"], [class*="plan-card"], [class*="plan-tile"], [class*="tier"]',
514
+ ctaBanner: '[class*="cta"], [class*="call-to-action"], [class*="banner-cta"], [class*="promo-banner"]',
515
+ testimonial: '[class*="testimonial"], [class*="review-card"], [class*="quote-block"], [class*="customer-story"]',
516
+ logoTile: '[class*="logo-grid"] img, [class*="logos"] img, [class*="customer-logo"], [class*="partner-logo"], [class*="brand-logo"]',
517
+ // ── Typography roles ──
518
+ headingH1: 'h1',
519
+ headingH2: 'h2',
520
+ headingH3: 'h3',
521
+ headingH4: 'h4',
522
+ headingH5: 'h5',
523
+ headingH6: 'h6',
524
+ links: 'a',
525
+ eyebrowLabels: '[class*="eyebrow"], [class*="overline"], [class*="kicker"], [class*="label--small"], [class*="meta"], [class*="subtitle"]',
526
+ captions: 'figcaption, caption, [class*="caption"], [class*="helper-text"], [class*="supporting"]',
527
+ tableHeaders: 'th, thead td',
528
+ // ── Misc UI ──
529
+ avatar: '[class*="avatar"], [class*="profile-pic"], [class*="user-pic"]',
530
+ divider: 'hr, [class*="divider"], [class*="separator"]',
531
+ tooltip: '[role="tooltip"], [class*="tooltip"]',
532
+ };
533
+
534
+ // Site-specific selectors — only injected when found in DOM (avoids noise on non-matching sites)
535
+ const siteSpecificSelectors: Record<string, string> = {
536
+ // Travel / booking
537
+ datePicker: '[class*="date-picker"], [class*="datepicker"], [class*="calendar"], [data-testid*="date"]',
538
+ reservationCard: '[class*="reservation"], [class*="booking"], [class*="checkout-panel"]',
539
+ propertyCard: '[class*="property-card"], [class*="listing-card"], [class*="stay-card"]',
540
+ ratingDisplay: '[class*="rating"], [class*="review-score"], [class*="star-rating"]',
541
+ hostCard: '[class*="host-card"], [class*="host-info"], [class*="profile-card"]',
542
+ // SaaS / dev tools
543
+ codeBlock: 'pre, code, [class*="code-block"], [class*="syntax"], [class*="prism"]',
544
+ changelogRow: '[class*="changelog"], [class*="release"], [class*="release-note"], [class*="version-row"]',
545
+ breadcrumb: '[aria-label="breadcrumb"], [class*="breadcrumb"], nav[class*="crumb"]',
546
+ alert: '[role="alert"], [class*="alert"], [class*="notification-bar"], [class*="toast"]',
547
+ commandPalette: '[class*="command-palette"], [class*="CommandPalette"], [class*="cmdk-root"], [role="dialog"][class*="search"], [class*="CommandDialog"]',
548
+ dataTable: 'table[class*="data"], [class*="data-table"], [class*="DataTable"], [class*="DataGrid"], [role="grid"]',
549
+ accordion: '[class*="accordion"], [class*="Accordion"], details:has(summary), [role="region"][class*="collapsible"]',
550
+ skeleton: '[class*="skeleton"], [class*="Skeleton"], [class*="shimmer"], [class*="loading-placeholder"]',
551
+ progressBar: '[role="progressbar"], [class*="progress-bar"], [class*="ProgressBar"], [class*="progress-track"]',
552
+ emptyState: '[class*="empty-state"], [class*="EmptyState"], [class*="empty-placeholder"], [class*="no-results"]',
553
+ kpiCard: '[class*="metric-card"], [class*="stat-card"], [class*="KpiCard"], [class*="StatCard"], [class*="stats-card"]',
554
+ timelinePill: '[class*="timeline"], [class*="Timeline"], [class*="agent-trace"], [class*="AgentTrace"]',
555
+ // Ecom
556
+ productCard: '[class*="product-card"], [class*="product-tile"], [class*="sku-card"]',
557
+ priceTag: '[class*="price"], [class*="cost"], [itemprop="price"]',
558
+ };
559
+ for (const [name, sel] of Object.entries(siteSpecificSelectors)) {
560
+ if (document.querySelectorAll(sel).length > 0) (variantSelectors as Record<string, string>)[name] = sel;
561
+ }
562
+
563
+ const result: Record<string, any[]> = {};
564
+
565
+ for (const [name, selector] of Object.entries(variantSelectors)) {
566
+ const elems = document.querySelectorAll(selector);
567
+ const seen = new Set<string>();
568
+ const variants: any[] = [];
569
+
570
+ const els = Array.from(elems).slice(0, 50);
571
+ for (const el of els) {
572
+ const rect = el.getBoundingClientRect();
573
+ if (rect.width < 5 || rect.height < 5) continue;
574
+
575
+ const cs = getComputedStyle(el);
576
+ // Inline fingerprint
577
+ const fp = [
578
+ cs.backgroundColor, cs.color, cs.fontSize, cs.fontWeight,
579
+ cs.fontFamily, cs.borderRadius, cs.padding, cs.border,
580
+ cs.boxShadow, cs.display, cs.height,
581
+ ].join('|');
582
+ if (seen.has(fp)) continue;
583
+ seen.add(fp);
584
+
585
+ // Inline extractStyles
586
+ variants.push({
587
+ tag: el.tagName.toLowerCase(),
588
+ classes: Array.from(el.classList),
589
+ text: (el as HTMLElement).innerText?.trim().slice(0, 100) || '',
590
+ rect: { x: Math.round(rect.x), y: Math.round(rect.y), width: Math.round(rect.width), height: Math.round(rect.height) },
591
+ styles: {
592
+ backgroundColor: cs.backgroundColor,
593
+ color: cs.color,
594
+ fontFamily: cs.fontFamily,
595
+ fontSize: cs.fontSize,
596
+ fontWeight: cs.fontWeight,
597
+ lineHeight: cs.lineHeight,
598
+ letterSpacing: cs.letterSpacing,
599
+ padding: cs.padding,
600
+ margin: cs.margin,
601
+ borderRadius: cs.borderRadius,
602
+ border: cs.border,
603
+ boxShadow: cs.boxShadow,
604
+ width: cs.width,
605
+ height: cs.height,
606
+ maxWidth: cs.maxWidth,
607
+ display: cs.display,
608
+ gap: cs.gap,
609
+ gridTemplateColumns: cs.gridTemplateColumns,
610
+ flexDirection: cs.flexDirection,
611
+ alignItems: cs.alignItems,
612
+ justifyContent: cs.justifyContent,
613
+ position: cs.position,
614
+ overflow: cs.overflow,
615
+ transition: cs.transition,
616
+ opacity: cs.opacity,
617
+ textTransform: cs.textTransform,
618
+ textDecoration: cs.textDecoration,
619
+ fontFeatureSettings: cs.fontFeatureSettings,
620
+ fontVariationSettings: (cs as any).fontVariationSettings || 'normal',
621
+ },
622
+ });
623
+ if (variants.length >= 10) break;
624
+ }
625
+
626
+ if (variants.length > 0) {
627
+ result[name] = variants;
628
+ }
629
+ }
630
+
631
+ return result;
632
+ });
633
+ }
634
+
635
+ // ── Component States (:hover, :focus) ──────────────────────
636
+ // Note: `:active` not supported — UA state unreachable via Playwright dispatchEvent
637
+
638
+ interface ComponentStateStyles {
639
+ default: Record<string, string>;
640
+ hover?: Record<string, string>;
641
+ focus?: Record<string, string>;
642
+ }
643
+
644
+ const STATE_PROPS = [
645
+ 'backgroundColor', 'color', 'border', 'borderColor', 'boxShadow',
646
+ 'opacity', 'transform', 'outline', 'textDecoration', 'cursor',
647
+ ] as const;
648
+
649
+ async function extractComponentStates(page: Page): Promise<Record<string, ComponentStateStyles>> {
650
+ const result: Record<string, ComponentStateStyles> = {};
651
+
652
+ // Phase 5 Sprint 80/20 — `:active` retiré (dispatchEvent('mousedown') ne déclenche
653
+ // PAS l'état UA `:active` — les données étaient identiques au default, inutile.
654
+ // Voir audit du 2026-04-18.
655
+ const targets: { name: string; selector: string; states: ('hover' | 'focus')[] }[] = [
656
+ { name: 'button', selector: 'button:not([disabled]), [class*="btn"]:not([disabled]), a[class*="button"]', states: ['hover', 'focus'] },
657
+ { name: 'input', selector: 'input[type="text"], input[type="search"], input[type="email"]', states: ['focus'] },
658
+ { name: 'link', selector: 'a:not([class*="btn"]):not([class*="button"])', states: ['hover'] },
659
+ { name: 'card', selector: '[class*="card"]:not(body), article, [class*="tile"]', states: ['hover'] },
660
+ { name: 'navLink', selector: 'nav a, header a, [role="navigation"] a', states: ['hover'] },
661
+ { name: 'tab', selector: '[role="tab"], [class*="tab-item"], [class*="tab-link"]', states: ['hover'] },
662
+ { name: 'badge', selector: '[class*="badge"], [class*="tag"], [class*="chip"]', states: ['hover'] },
663
+ { name: 'footerLink', selector: 'footer a, [role="contentinfo"] a', states: ['hover'] },
664
+ ];
665
+
666
+ for (const target of targets) {
667
+ try {
668
+ // Get default styles first
669
+ const defaultStyles = await page.evaluate((selector) => {
670
+ const el = document.querySelector(selector);
671
+ if (!el) return null;
672
+ const cs = getComputedStyle(el);
673
+ const props = ['backgroundColor', 'color', 'border', 'borderColor', 'boxShadow',
674
+ 'opacity', 'transform', 'outline', 'textDecoration', 'cursor'];
675
+ const result: Record<string, string> = {};
676
+ for (const p of props) result[p] = (cs as any)[p];
677
+ return result;
678
+ }, target.selector);
679
+
680
+ if (!defaultStyles) continue;
681
+
682
+ const stateStyles: ComponentStateStyles = { default: defaultStyles };
683
+
684
+ for (const state of target.states) {
685
+ try {
686
+ let stateResult: Record<string, string> | null = null;
687
+
688
+ if (state === 'hover') {
689
+ // Use Playwright native hover
690
+ await page.hover(target.selector, { timeout: 2000 });
691
+ stateResult = await page.evaluate((selector) => {
692
+ const el = document.querySelector(selector);
693
+ if (!el) return null;
694
+ const cs = getComputedStyle(el);
695
+ const props = ['backgroundColor', 'color', 'border', 'borderColor', 'boxShadow',
696
+ 'opacity', 'transform', 'outline', 'textDecoration', 'cursor'];
697
+ const result: Record<string, string> = {};
698
+ for (const p of props) result[p] = (cs as any)[p];
699
+ return result;
700
+ }, target.selector);
701
+ // Move away to reset
702
+ await page.mouse.move(0, 0);
703
+ } else if (state === 'focus') {
704
+ await page.focus(target.selector);
705
+ stateResult = await page.evaluate((selector) => {
706
+ const el = document.querySelector(selector) as HTMLElement;
707
+ if (!el) return null;
708
+ el.focus();
709
+ const cs = getComputedStyle(el);
710
+ const props = ['backgroundColor', 'color', 'border', 'borderColor', 'boxShadow',
711
+ 'opacity', 'transform', 'outline', 'textDecoration', 'cursor'];
712
+ const result: Record<string, string> = {};
713
+ for (const p of props) result[p] = (cs as any)[p];
714
+ return result;
715
+ }, target.selector);
716
+ await page.evaluate(() => { (document.activeElement as HTMLElement)?.blur?.(); });
717
+ }
718
+ // Note: `:active` state intentionally not extracted — requires real pointer
719
+ // events not available via Playwright (dispatchEvent doesn't trigger UA state).
720
+
721
+ if (stateResult) {
722
+ // Only keep properties that CHANGED vs default
723
+ const changed: Record<string, string> = {};
724
+ for (const [k, v] of Object.entries(stateResult)) {
725
+ if (v !== defaultStyles[k]) changed[k] = v;
726
+ }
727
+ if (Object.keys(changed).length > 0) {
728
+ stateStyles[state] = stateResult;
729
+ }
730
+ }
731
+ } catch (err) {
732
+ // State extraction failed — site may block interaction (CSP, intercepted event)
733
+ if (process.env.CLONE_LOG_LEVEL === 'debug') {
734
+ console.log(` ⚠️ ${target.name}:${state} failed: ${(err as Error).message.slice(0, 80)}`);
735
+ }
736
+ }
737
+ }
738
+
739
+ result[target.name] = stateStyles;
740
+ } catch (err) {
741
+ // Element not found for target.selector — common, site may not have this component
742
+ if (process.env.CLONE_LOG_LEVEL === 'debug') {
743
+ console.log(` ⚠️ ${target.name} skipped: ${(err as Error).message.slice(0, 80)}`);
744
+ }
745
+ }
746
+ }
747
+
748
+ return result;
749
+ }
750
+
751
+ // ── OpenType Features & Variable Font Axes ──────────────────────────
752
+
753
+ async function extractOpenTypeFeatures(page: Page): Promise<{ features: string[]; axes: string[] }> {
754
+ return page.evaluate(() => {
755
+ const featureSet = new Set<string>();
756
+ const axisSet = new Set<string>();
757
+
758
+ // Scan all elements for font-feature-settings and font-variation-settings
759
+ const elements = Array.from(document.querySelectorAll('*'));
760
+ for (const el of elements) {
761
+ const cs = getComputedStyle(el);
762
+ const featureVal = cs.fontFeatureSettings;
763
+ const variationVal = (cs as any).fontVariationSettings;
764
+
765
+ if (featureVal && featureVal !== 'normal') {
766
+ // Parse "ss01" on, "kern" 1, "liga" 0 etc.
767
+ const matches = featureVal.match(/"([a-z0-9]{4})"/gi);
768
+ if (matches) {
769
+ for (const m of matches) featureSet.add(m.replace(/"/g, '').toLowerCase());
770
+ }
771
+ }
772
+
773
+ if (variationVal && variationVal !== 'normal') {
774
+ // Parse "wght" 400, "wdth" 75 etc.
775
+ const matches = variationVal.match(/"([A-Z]{4})"/gi);
776
+ if (matches) {
777
+ for (const m of matches) axisSet.add(m.replace(/"/g, '').toUpperCase());
778
+ }
779
+ }
780
+ }
781
+
782
+ // Also scan CSS rules in stylesheets
783
+ try {
784
+ for (const sheet of Array.from(document.styleSheets)) {
785
+ try {
786
+ const rules = Array.from(sheet.cssRules || []);
787
+ for (const rule of rules) {
788
+ const text = (rule as CSSStyleRule).cssText || '';
789
+ const featureMatches = text.match(/font-feature-settings\s*:\s*([^;]+)/gi);
790
+ if (featureMatches) {
791
+ for (const fm of featureMatches) {
792
+ const tags = fm.match(/"([a-z0-9]{4})"/gi);
793
+ if (tags) tags.forEach(t => featureSet.add(t.replace(/"/g, '').toLowerCase()));
794
+ }
795
+ }
796
+ const variationMatches = text.match(/font-variation-settings\s*:\s*([^;]+)/gi);
797
+ if (variationMatches) {
798
+ for (const vm of variationMatches) {
799
+ const tags = vm.match(/"([A-Z]{4})"/gi);
800
+ if (tags) tags.forEach(t => axisSet.add(t.replace(/"/g, '').toUpperCase()));
801
+ }
802
+ }
803
+ }
804
+ } catch { /* cross-origin sheet */ }
805
+ }
806
+ } catch { /* security error */ }
807
+
808
+ return {
809
+ features: Array.from(featureSet),
810
+ axes: Array.from(axisSet),
811
+ };
812
+ });
813
+ }
814
+
815
+ // ── Font-face declarations ──────────────────────────────────────────
816
+
817
+ async function extractFontFaces(page: Page): Promise<FontFaceDeclaration[]> {
818
+ return page.evaluate(() => {
819
+ const fonts: Array<{ family: string; src: string; weight: string; style: string; display: string }> = [];
820
+ const seen = new Set<string>();
821
+
822
+ // Method 1: document.fonts API (if available)
823
+ try {
824
+ if ('fonts' in document) {
825
+ (document as any).fonts.forEach((font: FontFace) => {
826
+ const key = `${font.family}|${font.weight}|${font.style}`;
827
+ if (seen.has(key)) return;
828
+ seen.add(key);
829
+ // font.src can be a URL string or local() reference
830
+ let src = '';
831
+ try {
832
+ src = (font as any).src || '';
833
+ // Clean up the src — it's often a CSS url() value
834
+ if (typeof src === 'string') {
835
+ const urlMatch = src.match(/url\(["']?([^"')]+)["']?\)/);
836
+ if (urlMatch) src = urlMatch[1];
837
+ }
838
+ } catch { /* ignore */ }
839
+ fonts.push({
840
+ family: font.family.replace(/['"]/g, ''),
841
+ src,
842
+ weight: font.weight || 'normal',
843
+ style: font.style || 'normal',
844
+ display: (font as any).display || 'auto',
845
+ });
846
+ });
847
+ }
848
+ } catch { /* fonts API not supported */ }
849
+
850
+ // Method 2: Parse @font-face rules from stylesheets
851
+ try {
852
+ for (const sheet of document.styleSheets) {
853
+ try {
854
+ for (const rule of sheet.cssRules) {
855
+ if (rule instanceof CSSFontFaceRule) {
856
+ const family = rule.style.getPropertyValue('font-family').replace(/['"]/g, '').trim();
857
+ const src = rule.style.getPropertyValue('src');
858
+ const weight = rule.style.getPropertyValue('font-weight') || 'normal';
859
+ const style = rule.style.getPropertyValue('font-style') || 'normal';
860
+ const display = rule.style.getPropertyValue('font-display') || 'auto';
861
+ const key = `${family}|${weight}|${style}`;
862
+ if (seen.has(key)) continue;
863
+ seen.add(key);
864
+ // Extract actual URLs from the src property
865
+ const urls: string[] = [];
866
+ const urlMatches = src.matchAll(/url\(["']?([^"')]+)["']?\)/g);
867
+ for (const m of urlMatches) {
868
+ urls.push(m[1]);
869
+ }
870
+ fonts.push({
871
+ family,
872
+ src: urls.join(', ') || src,
873
+ weight,
874
+ style,
875
+ display,
876
+ });
877
+ }
878
+ }
879
+ } catch { /* cross-origin stylesheet, skip */ }
880
+ }
881
+ } catch { /* no stylesheets accessible */ }
882
+
883
+ return fonts;
884
+ });
885
+ }
886
+
887
+ // ── Media query breakpoints ─────────────────────────────────────────
888
+
889
+ async function extractMediaBreakpoints(page: Page): Promise<string[]> {
890
+ return page.evaluate(() => {
891
+ const breakpoints = new Set<string>();
892
+ // Use a stack instead of recursion to avoid named function declarations
893
+ try {
894
+ for (const sheet of document.styleSheets) {
895
+ try {
896
+ const ruleStack: CSSRuleList[] = [sheet.cssRules];
897
+ while (ruleStack.length > 0) {
898
+ const rules = ruleStack.pop()!;
899
+ for (const rule of rules) {
900
+ if (typeof CSSContainerRule !== 'undefined' && rule instanceof CSSContainerRule) continue;
901
+ if (rule instanceof CSSMediaRule) {
902
+ const media = rule.conditionText || rule.media?.mediaText || '';
903
+ if (media) {
904
+ // Only capture viewport width breakpoints (min-width / max-width), not feature queries
905
+ if (/(min|max)-width\s*:\s*\d/.test(media)) {
906
+ const matches = media.matchAll(/(\d+(?:\.\d+)?)(px|em|rem)/g);
907
+ for (const m of matches) {
908
+ const val = parseFloat(m[1]);
909
+ const unit = m[2];
910
+ // Filter: ignore sub-320px values (print, tiny accessibility queries)
911
+ const pxVal = unit === 'px' ? val : unit === 'em' ? val * 16 : val * 16;
912
+ if (pxVal >= 320) breakpoints.add(`${m[1]}${m[2]}`);
913
+ }
914
+ breakpoints.add(`@media ${media}`);
915
+ }
916
+ }
917
+ if (rule.cssRules && rule.cssRules.length > 0) {
918
+ ruleStack.push(rule.cssRules);
919
+ }
920
+ }
921
+ }
922
+ }
923
+ } catch { /* cross-origin stylesheet, skip */ }
924
+ }
925
+ } catch { /* no stylesheets accessible */ }
926
+
927
+ const numericBPs: string[] = [];
928
+ const mediaBPs: string[] = [];
929
+ for (const bp of breakpoints) {
930
+ if (bp.startsWith('@media')) {
931
+ mediaBPs.push(bp);
932
+ } else {
933
+ numericBPs.push(bp);
934
+ }
935
+ }
936
+ numericBPs.sort((a, b) => parseFloat(a) - parseFloat(b));
937
+
938
+ // Phase 5.2.3 — STOP synthetic Tailwind defaults.
939
+ // The previous fallback injected '640px,768px,1024px,1280px' when <2 detected,
940
+ // presenting Tailwind defaults AS IF they were extracted from the site = factual lie.
941
+ // If <2 detected, return what we have. Downstream marks "fixed-width / responsive units only".
942
+ return [...numericBPs, ...mediaBPs];
943
+ });
944
+ }
945
+
946
+ // ── Sections extraction ─────────────────────────────────────────────
947
+
948
+ // v2.7 A.1 — the hero headline is frequently NOT the largest measured <h*> (it's a styled <div>,
949
+ // a <span>, or image text), and the §1 narrative was hardcoded to the BODY font — so the single most
950
+ // recognizable signature (serif vs sans display) was lost on every site. Detect the display face by
951
+ // VISUAL PROMINENCE: the above-fold text node with the largest fontSize × rendered width, and classify it.
952
+ interface DisplayFace { family: string; fontSize: string; fontWeight: string; isSerif: boolean; isItalic: boolean; sample: string }
953
+ async function extractDisplaySignature(page: Page): Promise<(DisplayFace & { secondary?: DisplayFace }) | null> {
954
+ return page.evaluate(() => {
955
+ const vh = window.innerHeight, vw = window.innerWidth;
956
+ // v2.9 A.1+ — scan the WHOLE page (hydrated), weight above-fold higher, then keep the top-2
957
+ // DISTINCT display families. Catches sites with a sans hero + a serif-italic in lower bands
958
+ // (e.g. switchcollective: Satoshi hero above the fold, Canela serif-italic in "Ré-inventer").
959
+ const cands: Array<{ score: number; family: string; fontSize: string; fontWeight: string; style: string; sample: string }> = [];
960
+ const els = document.querySelectorAll('h1,h2,h3,p,a,span,div,[class*="title"],[class*="heading"],[class*="hero"],[class*="headline"]');
961
+ els.forEach(el => {
962
+ const r = el.getBoundingClientRect();
963
+ if (r.width < 80 || r.height < 24) return;
964
+ const own = Array.from(el.childNodes).filter(n => n.nodeType === 3).map(n => n.textContent || '').join('').replace(/\s+/g, ' ').trim();
965
+ if (own.length < 3 || own.length > 120) return;
966
+ const cs = getComputedStyle(el);
967
+ const fs = parseFloat(cs.fontSize) || 0;
968
+ if (fs < 22) return; // display scale only
969
+ const aboveFold = r.top < vh * 1.2 && r.top > -80;
970
+ const score = fs * Math.min(r.width, vw) * (aboveFold ? 1.5 : 1); // hero counts more
971
+ cands.push({ score, family: cs.fontFamily || '', fontSize: cs.fontSize, fontWeight: cs.fontWeight, style: cs.fontStyle, sample: own.slice(0, 60) });
972
+ });
973
+ if (!cands.length) return null;
974
+ cands.sort((a, b) => b.score - a.score);
975
+ const SERIF = ['times', 'georgia', 'garamond', 'playfair', 'canela', 'fraunces', 'tiempos', 'teodor', 'quincy', 'recoleta', 'editorial', 'freight', 'noe', 'reckless', 'domaine', 'ogg', 'signifier', 'lora', 'merriweather', 'source serif', 'dm serif', 'spectral', 'newsreader', 'cormorant', 'gt sectra', 'ppeditorial', 'instrument serif'];
976
+ // NOTE: classification is inlined via an anonymous .map() — a NAMED const-arrow (e.g. `const classify=`)
977
+ // makes tsx/esbuild emit a `__name(...)` helper that is undefined inside page.evaluate (ReferenceError).
978
+ const topFirst = (cands[0].family.split(',')[0] || '').replace(/["']/g, '').trim().toLowerCase();
979
+ const secCand = cands.find(c => (c.family.split(',')[0] || '').replace(/["']/g, '').trim().toLowerCase() !== topFirst);
980
+ const faces = [cands[0], secCand].filter(Boolean).map(c => {
981
+ const cc = c as { family: string; fontSize: string; fontWeight: string; style: string; sample: string };
982
+ const first = (cc.family.split(',')[0] || '').replace(/["']/g, '').trim();
983
+ const low = first.toLowerCase();
984
+ return {
985
+ family: first,
986
+ fontSize: cc.fontSize,
987
+ fontWeight: cc.fontWeight,
988
+ isSerif: SERIF.some(s => low.includes(s)) || (/serif/.test(low) && !/sans/.test(low)),
989
+ isItalic: cc.style === 'italic' || /italic/.test(low),
990
+ sample: cc.sample,
991
+ };
992
+ });
993
+ const primary = faces[0];
994
+ const secondary = faces[1];
995
+ return secondary ? { ...primary, secondary } : primary;
996
+ });
997
+ }
998
+
999
+ async function extractSections(page: Page): Promise<SectionExtraction[]> {
1000
+ return page.evaluate(() => {
1001
+ // Find major page sections
1002
+ const sectionSelectors = [
1003
+ 'header', 'nav', 'main', 'section', 'aside', 'footer',
1004
+ '[role="banner"]', '[role="navigation"]', '[role="main"]',
1005
+ '[role="complementary"]', '[role="contentinfo"]',
1006
+ 'div[class*="section"]', 'div[class*="container"]',
1007
+ 'div[class*="wrapper"]', 'div[class*="hero"]',
1008
+ ];
1009
+
1010
+ const seen = new Set<Element>();
1011
+ const sections: Array<{
1012
+ index: number;
1013
+ tag: string;
1014
+ classes: string[];
1015
+ role: string;
1016
+ estimatedPurpose: string;
1017
+ rect: { x: number; y: number; width: number; height: number };
1018
+ styles: Record<string, string>;
1019
+ childCount: number;
1020
+ bgTreatment: string;
1021
+ isDark: boolean;
1022
+ aboveFold: boolean;
1023
+ isFullBleed: boolean;
1024
+ imgRatio: number;
1025
+ maxHeadingPx: number;
1026
+ gridCols: number;
1027
+ hasAnimation: boolean;
1028
+ textLen: number;
1029
+ vPad: number;
1030
+ hasChart: boolean;
1031
+ }> = [];
1032
+
1033
+ // Also get direct children of body that are significant
1034
+ const bodyChildren = (document.body ? Array.from(document.body.children) : []).filter(el => {
1035
+ const rect = el.getBoundingClientRect();
1036
+ return rect.height > 50 && rect.width > 200;
1037
+ });
1038
+
1039
+ // v2.10-B — SPA/Framer/WebGL fallback: when everything is nested in ≤2 wrappers (e.g. hyperliquid
1040
+ // returns a single monolithic section), descend to surface real bands. Iterative stack (NO named
1041
+ // recursive fn — that triggers tsx/esbuild __name → ReferenceError in page.evaluate). A "band" =
1042
+ // full-width-ish, 80–2600px tall, with content; giant wrappers (>2600px) are descended into.
1043
+ const extraCandidates: Element[] = [];
1044
+ if (bodyChildren.length <= 2) {
1045
+ const vw = window.innerWidth;
1046
+ const stack: Element[] = bodyChildren.slice();
1047
+ let guard = 0;
1048
+ while (stack.length > 0 && guard < 3000) {
1049
+ guard++;
1050
+ const node = stack.shift() as Element;
1051
+ const kids = Array.from(node.children);
1052
+ for (let ki = 0; ki < kids.length; ki++) {
1053
+ const k = kids[ki];
1054
+ const kr = k.getBoundingClientRect();
1055
+ const hasContent = k.children.length > 0 || (k.textContent || '').trim().length > 0;
1056
+ if (kr.width > vw * 0.6 && kr.height >= 80 && kr.height <= 2600 && hasContent) extraCandidates.push(k);
1057
+ else if (kr.height > 2600 && k.children.length > 0) stack.push(k);
1058
+ }
1059
+ }
1060
+ }
1061
+
1062
+ const allCandidates = [
1063
+ ...bodyChildren,
1064
+ ...extraCandidates,
1065
+ ...sectionSelectors.flatMap(s => Array.from(document.querySelectorAll(s))),
1066
+ ];
1067
+
1068
+ let idx = 0;
1069
+ for (const el of allCandidates) {
1070
+ if (seen.has(el)) continue;
1071
+ seen.add(el);
1072
+
1073
+ const rect = el.getBoundingClientRect();
1074
+ if (rect.height < 30) continue;
1075
+
1076
+ const cs = getComputedStyle(el);
1077
+ const tag = el.tagName.toLowerCase();
1078
+ const classes = Array.from(el.classList);
1079
+ const role = el.getAttribute('role') || '';
1080
+
1081
+ // v2.7 A.3/A.4 — content-signature classification (not tag/class strings only).
1082
+ // The old classifier defaulted to 'unknown' on Webflow/Shopify/Chakra div-soup, making §13
1083
+ // a useless stub. We now read real content signals so §13 names real bands.
1084
+ const classStr = classes.join(' ').toLowerCase();
1085
+ const vw = window.innerWidth, vh = window.innerHeight;
1086
+ const txt = (el.textContent || '').replace(/\s+/g, ' ').trim();
1087
+ const txtLen = txt.length;
1088
+ const imgEls = el.querySelectorAll('img,picture,svg,video');
1089
+ let imgArea = 0;
1090
+ imgEls.forEach(im => { const r = im.getBoundingClientRect(); imgArea += Math.max(0, r.width) * Math.max(0, r.height); });
1091
+ const imgRatio = imgArea / Math.max(1, rect.width * rect.height);
1092
+ let maxHeadingPx = 0;
1093
+ el.querySelectorAll('h1,h2,h3').forEach(h => { const fs = parseFloat(getComputedStyle(h).fontSize) || 0; if (fs > maxHeadingPx) maxHeadingPx = fs; });
1094
+ const isFullBleed = rect.width >= vw * 0.95;
1095
+ const aboveFold = rect.y < vh * 0.9;
1096
+ const gridCols = cs.gridTemplateColumns && cs.gridTemplateColumns !== 'none' ? cs.gridTemplateColumns.split(' ').filter(Boolean).length : 0;
1097
+ const hasAnimation = !!cs.animationName && cs.animationName !== 'none';
1098
+ // Background treatment (A.4) — the dominant atmosphere the old extractor was blind to.
1099
+ const bgImg = cs.backgroundImage || 'none';
1100
+ let bgTreatment = 'flat';
1101
+ if (bgImg && bgImg !== 'none') {
1102
+ if (/gradient/i.test(bgImg)) bgTreatment = /radial-gradient|conic-gradient/i.test(bgImg) ? 'radial-gradient' : ((bgImg.match(/rgba?\(|#[0-9a-f]{3,8}/gi) || []).length >= 4 ? 'mesh-gradient' : 'linear-gradient');
1103
+ else if (/url\(/i.test(bgImg)) bgTreatment = 'image';
1104
+ }
1105
+ const bgM = (cs.backgroundColor || '').match(/(\d+),\s*(\d+),\s*(\d+)/);
1106
+ const isDarkBand = bgM ? (((+bgM[1]) * 0.2126 + (+bgM[2]) * 0.7152 + (+bgM[3]) * 0.0722) / 255 < 0.35) : false;
1107
+ // v2.11-B — premium sites put their generous vertical whitespace on an INNER wrapper, not the
1108
+ // section element (attio sections report padding "0px"). Capture the largest child's vertical
1109
+ // padding so the real airy rhythm survives. Also flag chart/canvas/SVG-graph bands (A).
1110
+ let innerPadTop = parseFloat(cs.paddingTop) || 0;
1111
+ let innerPadBottom = parseFloat(cs.paddingBottom) || 0;
1112
+ if (innerPadTop < 8 && innerPadBottom < 8) {
1113
+ const kidsArr = Array.from(el.children);
1114
+ for (let ci = 0; ci < kidsArr.length; ci++) {
1115
+ const cr = kidsArr[ci].getBoundingClientRect();
1116
+ if (cr.height > 60 && cr.width > rect.width * 0.4) {
1117
+ const kcs = getComputedStyle(kidsArr[ci]);
1118
+ innerPadTop = Math.max(innerPadTop, parseFloat(kcs.paddingTop) || 0);
1119
+ innerPadBottom = Math.max(innerPadBottom, parseFloat(kcs.paddingBottom) || 0);
1120
+ }
1121
+ }
1122
+ }
1123
+ const vPad = Math.round((innerPadTop + innerPadBottom) / 2);
1124
+ const hasChart = !!el.querySelector('canvas, svg path[d], svg polyline, [class*="chart" i], [class*="graph" i]');
1125
+
1126
+ // Classify: structural tags first, then content signature.
1127
+ let purpose = 'unknown';
1128
+ if (tag === 'header' || role === 'banner' || classStr.includes('header')) purpose = 'header';
1129
+ else if (tag === 'nav' || role === 'navigation' || classStr.includes('nav')) purpose = 'navigation';
1130
+ else if (tag === 'footer' || role === 'contentinfo' || classStr.includes('footer')) purpose = 'footer';
1131
+ else if (tag === 'aside' || role === 'complementary' || classStr.includes('sidebar')) purpose = 'sidebar';
1132
+ else if (hasAnimation && txtLen > 0 && txtLen < 220 && rect.height < 180 && isFullBleed) purpose = 'marquee';
1133
+ else if (aboveFold && idx <= 2 && maxHeadingPx >= 30 && isFullBleed) purpose = 'hero';
1134
+ else if (/pricing|tarif|formule|\bplan/.test(classStr) || (gridCols >= 2 && /€|\$|\/mo\b|\/mois|par mois/i.test(txt.slice(0, 400)))) purpose = 'pricing';
1135
+ else if (/faq|accordion|question/.test(classStr)) purpose = 'faq';
1136
+ else if (/testimonial|review|avis|t[ée]moignage/i.test(classStr + ' ' + txt.slice(0, 160))) purpose = 'testimonials';
1137
+ else if (imgEls.length >= 5 && rect.height < 220 && maxHeadingPx < 24) purpose = 'logo-strip';
1138
+ else if (imgRatio > 0.4 && gridCols >= 3) purpose = 'gallery-grid';
1139
+ else if (gridCols >= 2 && el.children.length >= 3 && maxHeadingPx < 30) purpose = 'card-grid';
1140
+ else if (maxHeadingPx >= 24 && txtLen > 60) purpose = 'feature-section';
1141
+ else if (txtLen > 220) purpose = 'content-section';
1142
+ else if (tag === 'main' || role === 'main') purpose = 'main-content';
1143
+ else if (tag === 'section') purpose = 'section';
1144
+
1145
+ sections.push({
1146
+ index: idx++,
1147
+ tag,
1148
+ classes,
1149
+ role,
1150
+ estimatedPurpose: purpose,
1151
+ rect: { x: Math.round(rect.x), y: Math.round(rect.y), width: Math.round(rect.width), height: Math.round(rect.height) },
1152
+ styles: {
1153
+ backgroundColor: cs.backgroundColor,
1154
+ color: cs.color,
1155
+ fontFamily: cs.fontFamily,
1156
+ fontSize: cs.fontSize,
1157
+ padding: cs.padding,
1158
+ margin: cs.margin,
1159
+ display: cs.display,
1160
+ gap: cs.gap,
1161
+ gridTemplateColumns: cs.gridTemplateColumns,
1162
+ flexDirection: cs.flexDirection,
1163
+ maxWidth: cs.maxWidth,
1164
+ width: cs.width,
1165
+ height: cs.height,
1166
+ position: cs.position,
1167
+ borderRadius: cs.borderRadius,
1168
+ boxShadow: cs.boxShadow,
1169
+ overflow: cs.overflow,
1170
+ alignItems: cs.alignItems,
1171
+ justifyContent: cs.justifyContent,
1172
+ textTransform: cs.textTransform,
1173
+ textDecoration: cs.textDecoration,
1174
+ border: cs.border,
1175
+ letterSpacing: cs.letterSpacing,
1176
+ lineHeight: cs.lineHeight,
1177
+ fontWeight: cs.fontWeight,
1178
+ opacity: cs.opacity,
1179
+ transition: cs.transition,
1180
+ fontFeatureSettings: cs.fontFeatureSettings,
1181
+ fontVariationSettings: (cs as any).fontVariationSettings || 'normal',
1182
+ },
1183
+ childCount: el.children.length,
1184
+ bgTreatment,
1185
+ isDark: isDarkBand,
1186
+ aboveFold,
1187
+ isFullBleed,
1188
+ imgRatio: Math.round(imgRatio * 100) / 100,
1189
+ maxHeadingPx: Math.round(maxHeadingPx),
1190
+ gridCols,
1191
+ hasAnimation,
1192
+ textLen: txtLen,
1193
+ vPad,
1194
+ hasChart,
1195
+ });
1196
+ }
1197
+
1198
+ // Sort by Y position
1199
+ sections.sort((a, b) => a.rect.y - b.rect.y);
1200
+ return sections;
1201
+ });
1202
+ }
1203
+
1204
+ async function extractImages(page: Page): Promise<{ src: string; alt: string; width: number; height: number }[]> {
1205
+ return page.evaluate(() => {
1206
+ return Array.from(document.querySelectorAll('img')).slice(0, 50).map(img => ({
1207
+ src: img.src,
1208
+ alt: img.alt,
1209
+ width: Math.round(img.getBoundingClientRect().width),
1210
+ height: Math.round(img.getBoundingClientRect().height),
1211
+ }));
1212
+ });
1213
+ }
1214
+
1215
+ /**
1216
+ * Extract imagery profile — OG image, hero image, format distribution, illustration vs photo mix.
1217
+ * Used in DESIGN.md §10b to guide LLMs on visual tone (lifestyle photography vs product mockups
1218
+ * vs abstract gradients vs illustration-heavy).
1219
+ */
1220
+ interface ImageryProfile {
1221
+ ogImage: string | null;
1222
+ ogImageWidth: number | null;
1223
+ ogImageHeight: number | null;
1224
+ twitterImage: string | null;
1225
+ heroImage: { src: string; alt: string; width: number; height: number; aspectRatio: number } | null;
1226
+ formats: { png: number; jpg: number; webp: number; svg: number; gif: number; other: number };
1227
+ totalImages: number;
1228
+ totalAboveFold: number;
1229
+ aspectRatioBuckets: { landscape: number; portrait: number; square: number; ultrawide: number };
1230
+ illustrationHeavy: boolean; // svg + png count vs jpg/webp (illustrations vs photos)
1231
+ photoHeavy: boolean;
1232
+ avgImageSize: { width: number; height: number };
1233
+ decorativePatterns?: {
1234
+ multiStopGradients: number;
1235
+ radialGradients: number;
1236
+ largeSvgShapes: number;
1237
+ backgroundImagePatterns: number;
1238
+ hasNoise: boolean;
1239
+ hasGlassmorphism: boolean;
1240
+ };
1241
+ }
1242
+
1243
+ async function extractImageryProfile(page: Page): Promise<ImageryProfile> {
1244
+ return page.evaluate(() => {
1245
+ // OG / Twitter meta — inline lookups to avoid tsx __name() compilation issues
1246
+ const ogImageEl = document.querySelector('meta[property="og:image"]') || document.querySelector('meta[name="og:image"]');
1247
+ const ogImage = ogImageEl ? ogImageEl.getAttribute('content') : null;
1248
+ const ogWEl = document.querySelector('meta[property="og:image:width"]');
1249
+ const ogW = ogWEl ? ogWEl.getAttribute('content') : null;
1250
+ const ogHEl = document.querySelector('meta[property="og:image:height"]');
1251
+ const ogH = ogHEl ? ogHEl.getAttribute('content') : null;
1252
+ const twitterEl = document.querySelector('meta[name="twitter:image"]') || document.querySelector('meta[property="twitter:image"]');
1253
+ const twitterImage = twitterEl ? twitterEl.getAttribute('content') : null;
1254
+
1255
+ // All images on page
1256
+ const imgs = Array.from(document.querySelectorAll('img'));
1257
+ const formats = { png: 0, jpg: 0, webp: 0, svg: 0, gif: 0, other: 0 };
1258
+ const buckets = { landscape: 0, portrait: 0, square: 0, ultrawide: 0 };
1259
+ let totalW = 0;
1260
+ let totalH = 0;
1261
+ let counted = 0;
1262
+ let aboveFold = 0;
1263
+ const viewportH = window.innerHeight;
1264
+
1265
+ let heroImage = null;
1266
+ let heroArea = 0;
1267
+
1268
+ for (const img of imgs.slice(0, 100)) {
1269
+ const rect = img.getBoundingClientRect();
1270
+ const w = Math.round(rect.width);
1271
+ const h = Math.round(rect.height);
1272
+ if (w < 50 || h < 50) continue;
1273
+
1274
+ counted++;
1275
+ totalW += w;
1276
+ totalH += h;
1277
+ if (rect.top < viewportH) aboveFold++;
1278
+
1279
+ const area = w * h;
1280
+ if (area > heroArea && rect.top < viewportH * 1.5) {
1281
+ heroArea = area;
1282
+ heroImage = {
1283
+ src: img.src,
1284
+ alt: img.alt || '',
1285
+ width: w,
1286
+ height: h,
1287
+ aspectRatio: Math.round((w / h) * 100) / 100,
1288
+ };
1289
+ }
1290
+
1291
+ const ratio = w / h;
1292
+ if (ratio > 2.3) buckets.ultrawide++;
1293
+ else if (ratio > 1.15) buckets.landscape++;
1294
+ else if (ratio < 0.87) buckets.portrait++;
1295
+ else buckets.square++;
1296
+
1297
+ const src = img.src.toLowerCase();
1298
+ const srcsetFirst = (img.srcset || '').toLowerCase().split(' ')[0];
1299
+ const extMatch = src.match(/\.(png|jpe?g|webp|svg|gif)(\?|$)/) || srcsetFirst.match(/\.(png|jpe?g|webp|svg|gif)(\?|$)/);
1300
+ const ext = extMatch ? extMatch[1] : '';
1301
+ if (ext === 'png') formats.png++;
1302
+ else if (ext === 'jpg' || ext === 'jpeg') formats.jpg++;
1303
+ else if (ext === 'webp') formats.webp++;
1304
+ else if (ext === 'svg') formats.svg++;
1305
+ else if (ext === 'gif') formats.gif++;
1306
+ else formats.other++;
1307
+ }
1308
+
1309
+ // Count significant SVGs only (skip tiny icons < 60px which dominate counts)
1310
+ const inlineSvgs = Array.from(document.querySelectorAll('svg'));
1311
+ let largeSvgs = 0;
1312
+ let iconSvgs = 0;
1313
+ for (const svg of inlineSvgs.slice(0, 300)) {
1314
+ const rect = svg.getBoundingClientRect();
1315
+ if (rect.width >= 80 && rect.height >= 80) largeSvgs++;
1316
+ else iconSvgs++;
1317
+ }
1318
+ formats.svg += largeSvgs; // only count significant SVGs as illustrations
1319
+
1320
+ const totalCounted = counted || 1;
1321
+ const avgImageSize = {
1322
+ width: Math.round(totalW / totalCounted),
1323
+ height: Math.round(totalH / totalCounted),
1324
+ };
1325
+
1326
+ // Heuristics:
1327
+ // - photo-heavy: 10+ raster photos (jpg/webp), regardless of icon count
1328
+ // - illustration-heavy: large SVGs dominate AND few raster photos exist
1329
+ // - otherwise: mixed or text-driven
1330
+ const photoCount = formats.jpg + formats.webp;
1331
+ const illustrationCount = formats.svg; // already filtered to >=80px
1332
+ const photoHeavy = photoCount >= 10;
1333
+ const illustrationHeavy = !photoHeavy && illustrationCount > photoCount * 1.5 && illustrationCount >= 5;
1334
+
1335
+ // Phase 4.4 — Decorative patterns detection (gradient mesh, blobs, glassmorphism)
1336
+ let multiStopGradients = 0;
1337
+ let radialGradients = 0;
1338
+ let backgroundImagePatterns = 0;
1339
+ let hasNoise = false;
1340
+ let hasGlassmorphism = false;
1341
+ const allEls = document.querySelectorAll('*');
1342
+ const scanLimit = Math.min(allEls.length, 1500);
1343
+ for (let i = 0; i < scanLimit; i++) {
1344
+ const el = allEls[i];
1345
+ const cs = getComputedStyle(el);
1346
+ const bgImage = cs.backgroundImage;
1347
+ if (bgImage && bgImage !== 'none') {
1348
+ if (bgImage.indexOf('radial-gradient') !== -1) radialGradients++;
1349
+ if (bgImage.indexOf('linear-gradient') !== -1 || bgImage.indexOf('conic-gradient') !== -1) {
1350
+ const m = bgImage.match(/\b(linear|conic)-gradient\([^)]*\)/);
1351
+ if (m && (m[0].match(/,/g) || []).length >= 3) multiStopGradients++;
1352
+ }
1353
+ if (bgImage.indexOf('url(') !== -1 && !/\.(jpe?g|webp|png)/i.test(bgImage)) {
1354
+ backgroundImagePatterns++;
1355
+ }
1356
+ if (/noise|grain|pattern/i.test(bgImage)) hasNoise = true;
1357
+ }
1358
+ const backdrop = (cs as any).backdropFilter || (cs as any).webkitBackdropFilter;
1359
+ if (backdrop && /blur\(\d+/.test(backdrop)) hasGlassmorphism = true;
1360
+ }
1361
+ // Large SVG shapes (already counted above via largeSvgs)
1362
+ const decorativePatterns = {
1363
+ multiStopGradients,
1364
+ radialGradients,
1365
+ largeSvgShapes: largeSvgs,
1366
+ backgroundImagePatterns,
1367
+ hasNoise,
1368
+ hasGlassmorphism,
1369
+ };
1370
+
1371
+ return {
1372
+ ogImage,
1373
+ ogImageWidth: ogW ? parseInt(ogW, 10) : null,
1374
+ ogImageHeight: ogH ? parseInt(ogH, 10) : null,
1375
+ twitterImage,
1376
+ heroImage,
1377
+ formats,
1378
+ totalImages: counted,
1379
+ totalAboveFold: aboveFold,
1380
+ aspectRatioBuckets: buckets,
1381
+ illustrationHeavy,
1382
+ photoHeavy,
1383
+ avgImageSize,
1384
+ decorativePatterns,
1385
+ };
1386
+ }) as Promise<ImageryProfile>;
1387
+ }
1388
+
1389
+ async function extractLinks(page: Page): Promise<{ href: string; text: string; isNav: boolean }[]> {
1390
+ return page.evaluate(() => {
1391
+ return Array.from(document.querySelectorAll('a')).slice(0, 100).map(a => {
1392
+ const inNav = !!a.closest('nav, header, [role="navigation"], [class*="nav"]');
1393
+ return {
1394
+ href: a.href,
1395
+ text: a.innerText?.trim().slice(0, 100) || '',
1396
+ isNav: inNav,
1397
+ };
1398
+ });
1399
+ });
1400
+ }
1401
+
1402
+ // ── Advanced extractors (kept after Phase 1.2 cleanup — only zIndexMap + keyframes have consumers) ────
1403
+ import {
1404
+ extractKeyframes,
1405
+ extractZIndexMap,
1406
+ extractVisualEffects,
1407
+ type VisualEffects,
1408
+ } from './extractors/advanced.js';
1409
+ import { extractWidgets, type WidgetExtraction } from './extractors/widgets.js';
1410
+
1411
+ // ── Screenshots ──────────────────────────────────────────────────────
1412
+
1413
+ // v2.9 A.5 — fully hydrate lazy content BEFORE screenshots + DOM extraction.
1414
+ // Root cause of Attio's "white capture" + MIAM's stripped photos: IntersectionObserver / lazy
1415
+ // images never entered the viewport at capture time. Slow-scroll the whole page in steps (triggers
1416
+ // every lazy loader), wait for images to decode + network to settle, then return to top.
1417
+ async function ensureLazyLoaded(page: Page): Promise<void> {
1418
+ try {
1419
+ await page.evaluate(async () => {
1420
+ await new Promise<void>((resolve) => {
1421
+ const step = Math.max(240, Math.floor(window.innerHeight * 0.75));
1422
+ let y = 0;
1423
+ const tick = () => {
1424
+ window.scrollTo(0, y);
1425
+ y += step;
1426
+ if (y >= document.documentElement.scrollHeight) { window.scrollTo(0, 0); resolve(); }
1427
+ else setTimeout(tick, 110);
1428
+ };
1429
+ tick();
1430
+ });
1431
+ });
1432
+ // Wait for newly-revealed images to decode (capped) + network to settle.
1433
+ await page.evaluate(async () => {
1434
+ const pending = Array.from(document.images).filter(i => !i.complete && i.src);
1435
+ await Promise.race([
1436
+ Promise.all(pending.map(i => (i.decode ? i.decode().catch(() => {}) : Promise.resolve()))),
1437
+ new Promise(r => setTimeout(r, 2500)),
1438
+ ]);
1439
+ }).catch(() => {});
1440
+ await page.waitForLoadState('networkidle', { timeout: 3000 }).catch(() => {});
1441
+ } catch { /* best-effort — never block extraction */ }
1442
+ }
1443
+
1444
+ async function takeScreenshots(page: Page, outputDir: string, viewport: string): Promise<void> {
1445
+ // Full page (with timeout fallback) — heavy sites need longer timeouts
1446
+ try {
1447
+ await page.screenshot({
1448
+ path: join(outputDir, `full-page-${viewport}.png`),
1449
+ fullPage: true,
1450
+ timeout: 60000,
1451
+ });
1452
+ } catch {
1453
+ console.log(` ⚠️ Full-page screenshot timeout, using viewport-only`);
1454
+ try {
1455
+ await page.screenshot({
1456
+ path: join(outputDir, `full-page-${viewport}.png`),
1457
+ fullPage: false,
1458
+ timeout: 30000,
1459
+ });
1460
+ } catch {
1461
+ console.log(` ⚠️ Viewport screenshot also timeout, skipping`);
1462
+ }
1463
+ }
1464
+
1465
+ // Above the fold
1466
+ await page.screenshot({
1467
+ path: join(outputDir, `above-fold-${viewport}.png`),
1468
+ fullPage: false,
1469
+ });
1470
+
1471
+ // Individual sections
1472
+ const sections = await page.evaluate(() => {
1473
+ const candidates = [
1474
+ ...Array.from(document.querySelectorAll('header, nav, main, section, aside, footer')),
1475
+ ...(document.body ? Array.from(document.body.children) : []).filter(el => {
1476
+ const r = el.getBoundingClientRect();
1477
+ return r.height > 100 && r.width > 200;
1478
+ }),
1479
+ ];
1480
+ const seen = new Set<Element>();
1481
+ return candidates.filter(el => {
1482
+ if (seen.has(el)) return false;
1483
+ seen.add(el);
1484
+ return true;
1485
+ }).slice(0, 15).map((el, i) => {
1486
+ const r = el.getBoundingClientRect();
1487
+ const tag = el.tagName.toLowerCase();
1488
+ const cls = Array.from(el.classList).join('-').slice(0, 30) || 'no-class';
1489
+ return {
1490
+ name: `${i}-${tag}-${cls}`,
1491
+ clip: { x: r.x, y: r.y + window.scrollY, width: r.width, height: Math.min(r.height, 2000) },
1492
+ };
1493
+ });
1494
+ });
1495
+
1496
+ for (const section of sections) {
1497
+ if (section.clip.width < 10 || section.clip.height < 10) continue;
1498
+ try {
1499
+ await page.screenshot({
1500
+ path: join(outputDir, `section-${section.name}-${viewport}.png`),
1501
+ clip: section.clip,
1502
+ });
1503
+ } catch {
1504
+ // Skip sections that fail (out of bounds, etc.)
1505
+ }
1506
+ }
1507
+ }
1508
+
1509
+ // ── Scroll-and-screenshot (catches lazy-loaded content) ─────────────
1510
+
1511
+ async function scrollAndScreenshot(page: Page, outputDir: string, viewport: string): Promise<void> {
1512
+ const viewportHeight = page.viewportSize()?.height || 900;
1513
+
1514
+ // Get total page height
1515
+ const totalHeight = await page.evaluate(() => {
1516
+ return Math.max(
1517
+ document.body?.scrollHeight || 0,
1518
+ document.documentElement.scrollHeight,
1519
+ );
1520
+ });
1521
+
1522
+ // Phase 5 Sprint 80/20 — 5 positions × 300ms au lieu de 10 × 800ms (gain -6.5s/viewport)
1523
+ // Couverture lazy-load préservée par full-page screenshot déjà pris.
1524
+ const MAX_POSITIONS = 5;
1525
+ const WAIT_LAZY = 300;
1526
+ const positions: number[] = [];
1527
+ let currentY = 0;
1528
+ while (currentY < totalHeight && positions.length < MAX_POSITIONS) {
1529
+ positions.push(currentY);
1530
+ currentY += viewportHeight;
1531
+ }
1532
+
1533
+ for (let i = 0; i < positions.length; i++) {
1534
+ const y = positions[i];
1535
+
1536
+ // Scroll to position
1537
+ await page.evaluate((scrollY: number) => {
1538
+ window.scrollTo({ top: scrollY, behavior: 'instant' as ScrollBehavior });
1539
+ }, y);
1540
+
1541
+ // Wait for lazy-loaded content to appear
1542
+ await page.waitForTimeout(WAIT_LAZY);
1543
+
1544
+ // Take screenshot at this scroll position
1545
+ try {
1546
+ await page.screenshot({
1547
+ path: join(outputDir, `scroll-${i}-${viewport}.png`),
1548
+ fullPage: false, // Only the current viewport
1549
+ });
1550
+ } catch {
1551
+ // Skip on failure
1552
+ }
1553
+ }
1554
+
1555
+ // Scroll back to top for subsequent extractions
1556
+ await page.evaluate(() => {
1557
+ window.scrollTo({ top: 0, behavior: 'instant' as ScrollBehavior });
1558
+ });
1559
+ await page.waitForTimeout(500);
1560
+ }
1561
+
1562
+ // ── Main extraction pipeline ─────────────────────────────────────────
1563
+
1564
+ async function extractFromURL(url: string): Promise<void> {
1565
+ const domain = new URL(url).hostname.replace('www.', '');
1566
+ const baseDir = join(process.cwd(), 'extractions', domain);
1567
+ const screenshotDir = join(baseDir, 'screenshots');
1568
+
1569
+ await mkdir(screenshotDir, { recursive: true });
1570
+
1571
+ console.log(`\n🔍 Prism — Extracting: ${url}`);
1572
+ console.log(`📁 Output: ${baseDir}\n`);
1573
+
1574
+ let browser: Browser | null = null;
1575
+
1576
+ try {
1577
+ browser = await chromium.launch({
1578
+ headless: true,
1579
+ args: [
1580
+ '--no-sandbox',
1581
+ '--disable-setuid-sandbox',
1582
+ '--disable-blink-features=AutomationControlled',
1583
+ '--disable-infobars',
1584
+ '--window-size=1440,900',
1585
+ ],
1586
+ });
1587
+
1588
+ const results: Record<string, ExtractionResult> = {};
1589
+
1590
+ for (const [vpName, vpSize] of Object.entries(VIEWPORTS)) {
1591
+ console.log(`📐 Viewport: ${vpName} (${vpSize.width}x${vpSize.height})`);
1592
+
1593
+ // sec-ch-ua Client Hints are Chromium-only — Safari/iOS never sends them.
1594
+ // Sending them with an iPhone UA is a detectable bot fingerprint.
1595
+ const mobileUA = 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Mobile/15E148 Safari/604.1';
1596
+ const desktopUA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36';
1597
+
1598
+ let context = await browser.newContext({
1599
+ viewport: vpSize,
1600
+ userAgent: vpName === 'mobile' ? mobileUA : desktopUA,
1601
+ deviceScaleFactor: vpName === 'mobile' ? 3 : 2,
1602
+ locale: 'fr-FR',
1603
+ timezoneId: 'Europe/Paris',
1604
+ extraHTTPHeaders: {
1605
+ 'Accept-Language': 'fr-FR,fr;q=0.9,en;q=0.8',
1606
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
1607
+ // Client Hints uniquement pour desktop (Chrome) — jamais pour mobile Safari
1608
+ ...(vpName !== 'mobile' ? {
1609
+ 'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
1610
+ 'sec-ch-ua-mobile': '?0',
1611
+ 'sec-ch-ua-platform': '"Windows"',
1612
+ } : {}),
1613
+ },
1614
+ });
1615
+
1616
+ let page = await context.newPage();
1617
+
1618
+ // Masquer les indicateurs d'automatisation
1619
+ await page.addInitScript(() => {
1620
+ Object.defineProperty(navigator, 'webdriver', { get: () => false });
1621
+ Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3] });
1622
+ (window as any).chrome = { runtime: {} };
1623
+ });
1624
+
1625
+ // Phase 2.1 — Fast page load: skip networkidle, use domcontentloaded + targeted waits
1626
+ // Why: networkidle waits for tracker scripts (GA, Hotjar) that often never settle on tracker-heavy
1627
+ // sites like Stripe/Coinbase, wasting 30s. domcontentloaded + load + conditional lazy-wait is faster
1628
+ // and still captures full computed styles.
1629
+ console.log(' ⏳ Loading page...');
1630
+ try {
1631
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 15000 });
1632
+ // Wait for load event (fonts + critical CSS) but don't block on tracker network
1633
+ await page.waitForLoadState('load', { timeout: 5000 }).catch(() => {});
1634
+ } catch {
1635
+ console.log(' ⚠️ Initial load failed, falling back to bare domcontentloaded...');
1636
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 10000 });
1637
+ }
1638
+
1639
+ // Conditional lazy-wait: only wait if the page is long enough to suggest lazy-loaded content
1640
+ const isLongPage = await page.evaluate(() =>
1641
+ document.documentElement.scrollHeight > window.innerHeight * 2
1642
+ ).catch(() => false);
1643
+ if (isLongPage) await page.waitForTimeout(2000);
1644
+
1645
+ // Dismiss common popups/modals
1646
+ await dismissPopups(page);
1647
+
1648
+ // Phase 5.1.3 — Bot challenge detection (Cloudflare, CAPTCHA, etc.)
1649
+ const botCheck = await detectBotChallenge(page);
1650
+ if (botCheck.blocked) {
1651
+ console.log(` 🤖 Bot challenge: ${botCheck.reason} — stealth retry...`);
1652
+ await page.close().catch(() => {});
1653
+ await context.close().catch(() => {});
1654
+ await browser!.close().catch(() => {});
1655
+
1656
+ // Stealth retry via browser-stealth.ts (playwright-extra + puppeteer-stealth)
1657
+ const { launchBrowser } = await import('./browser-stealth.js');
1658
+ browser = await launchBrowser({ stealth: true });
1659
+ context = await browser.newContext({
1660
+ viewport: vpSize,
1661
+ userAgent: vpName === 'mobile' ? mobileUA : desktopUA,
1662
+ deviceScaleFactor: vpName === 'mobile' ? 3 : 2,
1663
+ locale: 'fr-FR',
1664
+ timezoneId: 'Europe/Paris',
1665
+ extraHTTPHeaders: {
1666
+ 'Accept-Language': 'fr-FR,fr;q=0.9,en;q=0.8',
1667
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
1668
+ ...(vpName !== 'mobile' ? {
1669
+ 'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
1670
+ 'sec-ch-ua-mobile': '?0',
1671
+ 'sec-ch-ua-platform': '"Windows"',
1672
+ } : {}),
1673
+ },
1674
+ });
1675
+ page = await context.newPage();
1676
+ await page.addInitScript(() => {
1677
+ Object.defineProperty(navigator, 'webdriver', { get: () => false });
1678
+ Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3] });
1679
+ (window as any).chrome = { runtime: {} };
1680
+ });
1681
+
1682
+ console.log(' ⏳ Stealth retry — loading page...');
1683
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
1684
+ await page.waitForLoadState('load', { timeout: 10000 }).catch(() => {});
1685
+ await dismissPopups(page);
1686
+
1687
+ const botCheck2 = await detectBotChallenge(page);
1688
+ if (botCheck2.blocked) {
1689
+ throw new Error(`Bot challenge persists after stealth retry: ${botCheck2.reason}`);
1690
+ }
1691
+ console.log(` ✅ Stealth retry succeeded`);
1692
+ }
1693
+
1694
+ // ── v2.9 A.5 — hydrate lazy content before capturing anything ──
1695
+ console.log(' 💧 Hydrating lazy content (full-page scroll + decode)...');
1696
+ await ensureLazyLoaded(page);
1697
+
1698
+ // ── Screenshots ──
1699
+ console.log(' 📸 Taking screenshots...');
1700
+ await takeScreenshots(page, screenshotDir, vpName);
1701
+
1702
+ // ── Scroll-and-screenshot (catches lazy-loaded content) ──
1703
+ console.log(' 📜 Scroll-and-screenshot for lazy-loaded content...');
1704
+ await scrollAndScreenshot(page, screenshotDir, vpName);
1705
+
1706
+ // ── Extract everything ──
1707
+ console.log(' 🎨 Extracting computed styles...');
1708
+ const [
1709
+ elements,
1710
+ cssVars,
1711
+ allColors,
1712
+ fonts,
1713
+ borderRadii,
1714
+ shadows,
1715
+ transitions,
1716
+ sections,
1717
+ images,
1718
+ imageryProfile,
1719
+ widgets,
1720
+ links,
1721
+ componentVariants,
1722
+ fontFaces,
1723
+ mediaBreakpoints,
1724
+ ] = await Promise.all([
1725
+ extractComputedStyles(page),
1726
+ extractCSSCustomProperties(page),
1727
+ extractAllColors(page),
1728
+ extractAllFonts(page),
1729
+ extractAllBorderRadii(page),
1730
+ extractAllShadows(page),
1731
+ extractAllTransitions(page),
1732
+ extractSections(page),
1733
+ extractImages(page),
1734
+ extractImageryProfile(page).catch((e) => { console.warn(' ⚠️ imagery profile extraction failed:', (e as Error).message); return null; }),
1735
+ extractWidgets(page).catch((e) => { console.warn(' ⚠️ widgets extraction failed:', (e as Error).message); return null; }),
1736
+ extractLinks(page),
1737
+ extractComponentVariants(page),
1738
+ extractFontFaces(page),
1739
+ extractMediaBreakpoints(page),
1740
+ ]);
1741
+
1742
+ // ── Advanced design capture ──
1743
+ const [keyframes, zIndexMap, visualEffects] = await Promise.all([
1744
+ extractKeyframes(page).catch((e) => { console.warn(' ⚠️ keyframes extraction failed:', (e as Error).message); return {}; }),
1745
+ extractZIndexMap(page).catch((e) => { console.warn(' ⚠️ z-index extraction failed:', (e as Error).message); return []; }),
1746
+ vpName === 'desktop' ? extractVisualEffects(page).catch((e) => { console.warn(' ⚠️ visual effects extraction failed:', (e as Error).message); return null; }) : Promise.resolve(null),
1747
+ ]);
1748
+ console.log(` 🎬 Advanced: ${Object.keys(keyframes).length} keyframes, ${zIndexMap.length} z-index`);
1749
+ if (visualEffects) console.log(` ✨ Visual effects: ${visualEffects.motionSummary}`);
1750
+
1751
+ // ── Component States (hover/focus) — desktop only, sequential ──
1752
+ let componentStates: Record<string, ComponentStateStyles> = {};
1753
+ if (vpName === 'desktop') {
1754
+ console.log(' 🖱️ Extracting component states (hover/focus)...');
1755
+ componentStates = await extractComponentStates(page);
1756
+ const stateCount = Object.keys(componentStates).length;
1757
+ console.log(` → ${stateCount} component states captured`);
1758
+ }
1759
+
1760
+ // ── OpenType Features ──
1761
+ const openTypeData = await extractOpenTypeFeatures(page);
1762
+ console.log(` 🔤 OpenType features: [${openTypeData.features.join(', ') || 'none'}] | axes: [${openTypeData.axes.join(', ') || 'none'}]`);
1763
+
1764
+ const displaySignature = await extractDisplaySignature(page).catch((e) => { console.error(' ❌ displaySignature ERR:', (e as Error).message); return null; });
1765
+ if (displaySignature) console.log(` 🅰️ Display signature: ${displaySignature.family} ${displaySignature.fontSize} ${displaySignature.isSerif ? 'SERIF' : 'sans'}${displaySignature.isItalic ? ' italic' : ''}`);
1766
+
1767
+ const pageTitle = await page.title();
1768
+
1769
+ results[vpName] = {
1770
+ url,
1771
+ domain,
1772
+ timestamp: new Date().toISOString(),
1773
+ viewport: vpSize,
1774
+ pageTitle,
1775
+ cssCustomProperties: cssVars,
1776
+ elements,
1777
+ sections,
1778
+ allColors,
1779
+ allFontFamilies: fonts.families,
1780
+ allFontSizes: fonts.sizes,
1781
+ allBorderRadii: borderRadii,
1782
+ allShadows: shadows,
1783
+ allTransitions: transitions,
1784
+ images,
1785
+ imageryProfile: imageryProfile || undefined,
1786
+ widgets: widgets || undefined,
1787
+ links,
1788
+ componentVariants,
1789
+ componentStates,
1790
+ fontFaces,
1791
+ mediaBreakpoints,
1792
+ openTypeFeatures: openTypeData.features,
1793
+ variableAxes: openTypeData.axes,
1794
+ displaySignature: displaySignature || undefined,
1795
+ // Advanced capture
1796
+ keyframes,
1797
+ zIndexMap,
1798
+ visualEffects: visualEffects || undefined,
1799
+ };
1800
+
1801
+ const variantCount = Object.values(componentVariants).reduce((sum, v) => sum + v.length, 0);
1802
+ console.log(` ✅ ${vpName} done — ${Object.keys(cssVars).length} CSS vars, ${allColors.length} colors, ${sections.length} sections, ${variantCount} component variants, ${fontFaces.length} font-faces, ${mediaBreakpoints.filter(b => !b.startsWith('@')).length} breakpoints`);
1803
+
1804
+ await context.close();
1805
+ }
1806
+
1807
+ // ── Save results ──
1808
+ console.log('\n💾 Saving extraction data...');
1809
+
1810
+ // Phase 1.3 — add schema version so downstream can detect drift
1811
+ const versionedResults = {
1812
+ ...results,
1813
+ version: '2.4.0',
1814
+ };
1815
+
1816
+ await writeFile(
1817
+ join(baseDir, 'raw-css.json'),
1818
+ JSON.stringify(versionedResults, null, 2),
1819
+ );
1820
+
1821
+ // Save a summary for quick reference
1822
+ const desktop = results.desktop;
1823
+ const summary = {
1824
+ url,
1825
+ domain,
1826
+ extractedAt: new Date().toISOString(),
1827
+ pageTitle: desktop.pageTitle,
1828
+ cssCustomPropertiesCount: Object.keys(desktop.cssCustomProperties).length,
1829
+ colorsFound: desktop.allColors.length,
1830
+ fontFamilies: desktop.allFontFamilies,
1831
+ fontSizes: desktop.allFontSizes,
1832
+ borderRadii: desktop.allBorderRadii,
1833
+ shadows: desktop.allShadows,
1834
+ sectionsFound: desktop.sections.length,
1835
+ elementsFound: Object.entries(desktop.elements).filter(([, v]) => v !== null).map(([k]) => k),
1836
+ imagesCount: desktop.images.length,
1837
+ navLinksCount: desktop.links.filter(l => l.isNav).length,
1838
+ componentVariants: Object.fromEntries(
1839
+ Object.entries(desktop.componentVariants).map(([k, v]) => [k, v.length])
1840
+ ),
1841
+ fontFaces: desktop.fontFaces.map(f => `${f.family} (${f.weight} ${f.style})`),
1842
+ mediaBreakpoints: desktop.mediaBreakpoints.filter(b => !b.startsWith('@')),
1843
+ };
1844
+
1845
+ await writeFile(
1846
+ join(baseDir, 'extraction-summary.json'),
1847
+ JSON.stringify(summary, null, 2),
1848
+ );
1849
+
1850
+ console.log(`\n✅ Extraction complete for ${domain}`);
1851
+ console.log(` 📁 ${baseDir}/`);
1852
+ console.log(` 📸 Screenshots: ${screenshotDir}/`);
1853
+ console.log(` 🎨 Raw CSS: ${baseDir}/raw-css.json`);
1854
+ console.log(` 📊 Summary: ${baseDir}/extraction-summary.json`);
1855
+ console.log(`\n Next: run 'npm run analyze -- ${domain}' to analyze layout`);
1856
+
1857
+ } catch (err) {
1858
+ console.error('❌ Extraction failed:', err);
1859
+ throw err;
1860
+ } finally {
1861
+ if (browser) await browser.close();
1862
+ }
1863
+ }
1864
+
1865
+ // ── Popup dismissal ──────────────────────────────────────────────────
1866
+
1867
+ async function dismissPopups(page: Page): Promise<void> {
1868
+ // ── Couche 1 : Click accept (déclenche le callback consent du site) ──
1869
+ const clickSelectors = [
1870
+ // Didomi (priorité — CMP le plus courant en FR)
1871
+ '#didomi-notice-agree-button',
1872
+ '[data-testid="didomi-notice-agree-button"]',
1873
+ 'button[aria-label*="Accepter"]',
1874
+ 'button[aria-label*="Accept all"]',
1875
+ 'button[aria-label*="J\'accepte"]',
1876
+ '.didomi-continue-without-agreeing',
1877
+ // OneTrust
1878
+ '#onetrust-accept-btn-handler',
1879
+ 'button[data-testid="ot-sdk-btn-allow-all"]',
1880
+ // Cookiebot
1881
+ '#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll',
1882
+ 'a#CybotCookiebotDialogBodyLevelButtonAccept',
1883
+ '#CybotCookiebotDialogBodyButtonAccept',
1884
+ '.cm__btn-allow', '.cm__btn-accept',
1885
+ // Axeptio
1886
+ '[data-axeptio-action="accept"]', '.axeptio__btn-accept',
1887
+ // TarteAuCitron
1888
+ '#tarteaucitronPersonalize2', '.tarteaucitronAllow', '.tarteaucitron-accept',
1889
+ // Quantcast
1890
+ '.qc-cmp2-buttons-primary button',
1891
+ // Google CMP / Funding Choices (utilisé par claude.com, anthropic.com, googleblog, etc.)
1892
+ '.fc-button-label',
1893
+ '.fc-cta-consent',
1894
+ '.qc-cmp2-summary-buttons button:nth-of-type(2)',
1895
+ 'button[mode="primary"][label*="Accept"]',
1896
+ // Usercentrics
1897
+ '#uc-btn-accept-banner',
1898
+ 'button[data-testid="uc-accept-all-button"]',
1899
+ // Pierrot (Shopify)
1900
+ '.pd-banner__button--accept', 'button[data-cookiebanner-action="accept"]',
1901
+ // Native dialog element
1902
+ 'dialog button:has-text("Accept")',
1903
+ 'dialog button:has-text("Accepter")',
1904
+ // Generic aria-label English/French
1905
+ 'button[aria-label*="cookie" i][aria-label*="accept" i]',
1906
+ 'button[aria-label*="cookie" i][aria-label*="allow" i]',
1907
+ 'button[aria-label*="accepter" i]',
1908
+ // Generic consent
1909
+ 'button[class*="cookie"][class*="accept"]',
1910
+ 'button[class*="consent"][class*="accept"]',
1911
+ '[class*="cookie"] button[class*="accept"]',
1912
+ 'button[class*="accept-all"]',
1913
+ '[class*="consent"] button',
1914
+ 'button[class*="cookie"]', 'button[class*="accept"]', 'button[class*="consent"]',
1915
+ // Generic close
1916
+ '[class*="modal"] button[class*="close"]',
1917
+ '[class*="popup"] button[class*="close"]',
1918
+ '[class*="banner"] button[class*="close"]',
1919
+ '[aria-label="Close"]', '[aria-label="Dismiss"]',
1920
+ ];
1921
+
1922
+ // ── Helper : essaie de cliquer sur un selector dans un frame donné ──
1923
+ const tryClickInFrame = async (frame: any): Promise<string | null> => {
1924
+ for (const selector of clickSelectors) {
1925
+ try {
1926
+ const btn = frame.locator(selector).first();
1927
+ if (await btn.isVisible({ timeout: 600 })) {
1928
+ await btn.click({ timeout: 1500 });
1929
+ return selector;
1930
+ }
1931
+ } catch {
1932
+ // Continue
1933
+ }
1934
+ }
1935
+ return null;
1936
+ };
1937
+
1938
+ // ── Couche 1a : Main frame ──
1939
+ let clicked = false;
1940
+ const mainHit = await tryClickInFrame(page);
1941
+ if (mainHit) {
1942
+ console.log(` 🖱️ Popup fermé via click (main): ${mainHit}`);
1943
+ clicked = true;
1944
+ await page.waitForTimeout(1500);
1945
+ }
1946
+
1947
+ // ── Couche 1b : Iframes (IAB TCF, Google Funding Choices, etc.) ──
1948
+ if (!clicked) {
1949
+ const frames = page.frames();
1950
+ for (const frame of frames) {
1951
+ if (frame === page.mainFrame()) continue;
1952
+ try {
1953
+ const url = frame.url();
1954
+ // Skip about:blank et frames sans URL pertinente
1955
+ if (!url || url === 'about:blank' || url.startsWith('data:')) continue;
1956
+ const hit = await tryClickInFrame(frame);
1957
+ if (hit) {
1958
+ console.log(` 🖱️ Popup fermé via click (iframe ${url.slice(0, 60)}): ${hit}`);
1959
+ clicked = true;
1960
+ await page.waitForTimeout(1500);
1961
+ break;
1962
+ }
1963
+ } catch {
1964
+ // Continue
1965
+ }
1966
+ }
1967
+ }
1968
+
1969
+ // ── Couche 2 : DOM cleanup (supprimer les overlays restants) ─────
1970
+ const removed = await page.evaluate(() => {
1971
+ let count = 0;
1972
+ const cmpSelectors = [
1973
+ '#didomi-host', '.didomi-popup-container', '#didomi-consent-popup',
1974
+ '[class*="didomi-popup"]', '[class*="didomi-notice"]',
1975
+ '#onetrust-banner-sdk', '#onetrust-consent-sdk', '.onetrust-pc-dark-filter',
1976
+ '#CookieConsent', '.CookieConsent', '#CybotCookiebotDialog',
1977
+ '.axeptio_container', '[class*="axeptio"]',
1978
+ '.tarteaucitronRoot', '#tarteaucitron', '#tarteaucitronRoot',
1979
+ '.qc-cmp2-container', '[class*="qc-cmp"]',
1980
+ '.pd-cookie-banner-window', '[class*="pd-banner"]',
1981
+ '[id*="cookie-banner"]', '[id*="consent-banner"]',
1982
+ '[class*="cookie-banner"]', '[class*="consent-banner"]',
1983
+ '[class*="cookie-notice"]', '[class*="consent-notice"]',
1984
+ '[class*="gdpr"]', '[id*="gdpr"]',
1985
+ ];
1986
+ for (const sel of cmpSelectors) {
1987
+ document.querySelectorAll(sel).forEach(el => { el.remove(); count++; });
1988
+ }
1989
+
1990
+ // Supprimer les overlays/backdrops à z-index élevé
1991
+ document.querySelectorAll('[style*="position: fixed"], [style*="position:fixed"]').forEach(el => {
1992
+ const cs = window.getComputedStyle(el);
1993
+ const z = parseInt(cs.zIndex || '0');
1994
+ if (z > 9000 && (cs.backgroundColor.includes('rgba') || cs.opacity < '1')) {
1995
+ (el as HTMLElement).style.display = 'none';
1996
+ count++;
1997
+ }
1998
+ });
1999
+
2000
+ // Débloquer le scroll sur body
2001
+ if (document.body) {
2002
+ document.body.classList.remove(
2003
+ 'didomi-popup-open', 'modal-open', 'no-scroll', 'overflow-hidden',
2004
+ 'cookie-open', 'consent-open', 'noscroll'
2005
+ );
2006
+ document.body.style.overflow = '';
2007
+ }
2008
+ document.documentElement.style.overflow = '';
2009
+ return count;
2010
+ });
2011
+
2012
+ if (removed > 0) {
2013
+ console.log(` 🧹 ${removed} overlay(s) nettoyé(s)`);
2014
+ }
2015
+ }
2016
+
2017
+ // ── CLI ──────────────────────────────────────────────────────────────
2018
+
2019
+ const url = process.argv[2];
2020
+
2021
+ if (!url) {
2022
+ console.error('Usage: npm run extract -- <URL>');
2023
+ console.error('Example: npm run extract -- https://linear.app');
2024
+ process.exit(1);
2025
+ }
2026
+
2027
+ // Validate URL
2028
+ try {
2029
+ new URL(url);
2030
+ } catch {
2031
+ console.error(`Invalid URL: ${url}`);
2032
+ process.exit(1);
2033
+ }
2034
+
2035
+ extractFromURL(url).catch((err) => {
2036
+ console.error('Fatal error:', err);
2037
+ process.exit(1);
2038
+ });