designlang 10.2.0 → 10.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,29 @@
1
1
  # Changelog
2
2
 
3
+ ## [10.4.0] — 2026-04-22
4
+
5
+ **Identification trio: icon system, background patterns, stack intel.**
6
+
7
+ ### Added
8
+
9
+ - **`src/extractors/icon-system.js`** — fingerprints the icon library (Lucide / Heroicons outline+solid / Phosphor / Tabler / Feather / Remix / Material) from stroke vs fill dominance, stroke width, grid size, and rounded-caps presence. Emits per-icon hints agents can act on.
10
+ - **`src/extractors/background-patterns.js`** — classifies noise / dot-grid / line-grid / gradient-mesh / svg-pattern / plain from computed `background-image` values. Merged into `*-visual-dna.json`.
11
+ - **`src/extractors/stack-intel.js`** — extends the existing stack-fingerprint with 12 CMSs (Webflow, Framer, Shopify, Ghost, Sanity, Contentful, Wix, Squarespace, WordPress, Hashnode, Notion, Bubble), 13 analytics platforms, and 7 experimentation platforms.
12
+ - Bin reads its own version from `package.json` — no more per-release version drift in the CLI.
13
+ - New outputs: `*-icon-system.json`, `*-stack-intel.json`.
14
+
15
+ ## [10.3.0] — 2026-04-22
16
+
17
+ **Perf + SEO.** designlang now doubles as a lightweight auditor.
18
+
19
+ ### Added
20
+
21
+ - **`src/extractors/perf.js`** — `captureCoreWebVitals(url)` opens a fresh Playwright context, measures LCP / CLS / INP via PerformanceObserver, categorises every network response into JS / CSS / font / image / document / other, counts third-party requests against a known-host list, and synthesises an interaction so INP reports. Returns grade buckets (good / needs-improvement / poor) per vital.
22
+ - **`src/extractors/seo.js`** — pure extractor for Open Graph, Twitter cards, canonical, manifest, theme-color, viewport, every favicon, and inline JSON-LD blocks (schema.org structured data).
23
+ - Crawler now captures `favicons`, `manifest`, and `<script type="application/ld+json">` content.
24
+ - New flag `--perf`. Auto-on with `--full`.
25
+ - New outputs: `*-seo.json`, `*-perf.json`.
26
+
3
27
  ## [10.2.0] — 2026-04-22
4
28
 
5
29
  **Dark mode pairing + responsive screenshots.** Joins the light & dark extractor passes into semantic pairs, and adds full-page captures at 4 breakpoints × (light, dark).
@@ -1,8 +1,12 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  import { Command } from 'commander';
4
- import { mkdirSync, writeFileSync } from 'fs';
5
- import { resolve, join } from 'path';
4
+ import { mkdirSync, writeFileSync, readFileSync } from 'fs';
5
+ import { resolve, join, dirname } from 'path';
6
+ import { fileURLToPath } from 'url';
7
+
8
+ const __dirname = dirname(fileURLToPath(import.meta.url));
9
+ const PKG_VERSION = JSON.parse(readFileSync(resolve(__dirname, '..', 'package.json'), 'utf-8')).version;
6
10
  import chalk from 'chalk';
7
11
  import ora from 'ora';
8
12
  import { extractDesignLanguage } from '../src/index.js';
@@ -12,6 +16,7 @@ import { extractLogo } from '../src/extractors/logo.js';
12
16
  import { captureComponentScreenshotsV10 } from '../src/extractors/component-screenshots.js';
13
17
  import { pairDarkMode } from '../src/extractors/dark-mode-pair.js';
14
18
  import { captureResponsiveScreenshots } from '../src/extractors/responsive-screenshots.js';
19
+ import { captureCoreWebVitals, extractFontLoading } from '../src/extractors/perf.js';
15
20
  import { buildPromptPack } from '../src/formatters/prompt-pack.js';
16
21
  import { formatMarkdown } from '../src/formatters/markdown.js';
17
22
  import { formatTokens } from '../src/formatters/tokens.js';
@@ -55,7 +60,7 @@ const program = new Command();
55
60
  program
56
61
  .name('designlang')
57
62
  .description('Extract the complete design language from any website')
58
- .version('10.2.0');
63
+ .version(PKG_VERSION);
59
64
 
60
65
  // ── Main command: extract ──────────────────────────────────────
61
66
  program
@@ -88,6 +93,7 @@ program
88
93
  .option('--pages <n>', 'crawl N canonical pages (pricing/docs/blog/about/product) in addition to the homepage', parseInt)
89
94
  .option('--no-prompts', 'skip writing the prompt-pack directory')
90
95
  .option('--responsive-shots', 'capture full-page PNGs at 4 breakpoints × (light,dark)')
96
+ .option('--perf', 'measure Core Web Vitals + bundle profile (LCP/CLS/INP, JS/CSS/font/img bytes, third-party count)')
91
97
  .option('--json', 'output raw JSON to stdout (for CI/CD)')
92
98
  .option('--json-pretty', 'output formatted JSON to stdout')
93
99
  .option('--no-history', 'skip saving to history')
@@ -239,6 +245,19 @@ program
239
245
  // v10.2: dark-mode pairing (pure, based on already-extracted data).
240
246
  design.darkModePaired = pairDarkMode(design);
241
247
 
248
+ // v10.3: Core Web Vitals + bundle profile.
249
+ if (merged.full || merged.perf) {
250
+ spinner.text = 'Measuring Core Web Vitals...';
251
+ try {
252
+ design.perf = await captureCoreWebVitals(url, {
253
+ width: merged.width,
254
+ height: parseInt(merged.height) || 800,
255
+ channel: merged.systemChrome ? 'chrome' : undefined,
256
+ });
257
+ design.perf.fontLoading = extractFontLoading(design._raw?.light?.stack || {});
258
+ } catch (e) { design.perf = { error: e.message }; }
259
+ }
260
+
242
261
  // v10.2: responsive screenshots at 4 breakpoints × (light, dark).
243
262
  if (merged.full || merged.responsiveShots) {
244
263
  spinner.text = 'Capturing responsive screenshots...';
@@ -328,7 +347,7 @@ program
328
347
 
329
348
  // v10: page intent + section roles + visual DNA + component library + multi-page + prompt pack.
330
349
  files.push({ name: `${prefix}-intent.json`, content: JSON.stringify({ pageIntent: design.pageIntent, sectionRoles: design.sectionRoles }, null, 2), label: 'Page Intent + Section Roles' });
331
- files.push({ name: `${prefix}-visual-dna.json`, content: JSON.stringify({ materialLanguage: design.materialLanguage, imageryStyle: design.imageryStyle }, null, 2), label: 'Visual DNA' });
350
+ files.push({ name: `${prefix}-visual-dna.json`, content: JSON.stringify({ materialLanguage: design.materialLanguage, imageryStyle: design.imageryStyle, backgroundPatterns: design.backgroundPatterns }, null, 2), label: 'Visual DNA' });
332
351
  files.push({ name: `${prefix}-library.json`, content: JSON.stringify(design.componentLibrary || {}, null, 2), label: 'Component Library Detection' });
333
352
  if (design.logo && design.logo.found) {
334
353
  files.push({ name: `${prefix}-logo.json`, content: JSON.stringify(design.logo, null, 2), label: 'Logo Metadata' });
@@ -345,6 +364,18 @@ program
345
364
  if (design.responsiveShots && Array.isArray(design.responsiveShots.shots) && design.responsiveShots.shots.length) {
346
365
  files.push({ name: `${prefix}-responsive.json`, content: JSON.stringify(design.responsiveShots, null, 2), label: 'Responsive Screenshots index' });
347
366
  }
367
+ if (design.seo) {
368
+ files.push({ name: `${prefix}-seo.json`, content: JSON.stringify(design.seo, null, 2), label: 'SEO + Structured Data' });
369
+ }
370
+ if (design.perf && !design.perf.error) {
371
+ files.push({ name: `${prefix}-perf.json`, content: JSON.stringify(design.perf, null, 2), label: 'Perf + Bundle' });
372
+ }
373
+ if (design.iconSystem && (design.iconSystem.icons || []).length) {
374
+ files.push({ name: `${prefix}-icon-system.json`, content: JSON.stringify(design.iconSystem, null, 2), label: 'Icon System' });
375
+ }
376
+ if (design.stackIntel) {
377
+ files.push({ name: `${prefix}-stack-intel.json`, content: JSON.stringify(design.stackIntel, null, 2), label: 'Stack Intel (CMS/analytics/experimentation)' });
378
+ }
348
379
  if (merged.prompts !== false) {
349
380
  const pack = buildPromptPack(design);
350
381
  const promptsDir = join(outDir, `${prefix}-prompts`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "designlang",
3
- "version": "10.2.0",
3
+ "version": "10.4.0",
4
4
  "description": "Extract the complete design language from any website — colors, typography, spacing, shadows, motion, component anatomy, brand voice, page intent, section roles, material language, component library, imagery style, and logo. Outputs AI-optimized markdown, W3C design tokens, motion tokens, typed component stubs, Tailwind config, and ready-to-paste v0 / Lovable / Cursor / Claude-Artifacts prompts.",
5
5
  "type": "module",
6
6
  "bin": {
package/src/crawler.js CHANGED
@@ -867,6 +867,17 @@ async function extractPageData(page, ignoreSelectors, scopeSelector) {
867
867
  results.fontData.documentFonts.push({ family: font.family.replace(/['"]/g, ''), style: font.style, weight: font.weight, status: font.status });
868
868
  }
869
869
 
870
+ // v10.3 — favicons, manifest, JSON-LD.
871
+ results.favicons = Array.from(document.querySelectorAll('link[rel~="icon"], link[rel="shortcut icon"], link[rel="apple-touch-icon"]'))
872
+ .slice(0, 10)
873
+ .map(l => ({ rel: l.getAttribute('rel'), href: l.href, sizes: l.getAttribute('sizes') || '', type: l.getAttribute('type') || '' }));
874
+ const manifestLink = document.querySelector('link[rel="manifest"]');
875
+ results.manifest = manifestLink ? manifestLink.href : null;
876
+ results.jsonLd = Array.from(document.querySelectorAll('script[type="application/ld+json"]'))
877
+ .slice(0, 12)
878
+ .map(s => s.textContent || '')
879
+ .filter(Boolean);
880
+
870
881
  // Image data
871
882
  results.images = [];
872
883
  for (const img of document.querySelectorAll('img, picture img, [role="img"]')) {
@@ -0,0 +1,72 @@
1
+ // v10.4 — Background Patterns
2
+ //
3
+ // Classifies the visual backgrounds on a site from computed-style evidence:
4
+ // noise (repeated grain PNG/SVG), dot-grid, line-grid, gradient-mesh (multiple
5
+ // radial gradients), chequer, diagonal stripes, SVG patterns, or plain.
6
+ //
7
+ // Pure function — reads `rawData.light.computedStyles`, which every extractor
8
+ // already has access to, plus the `modernColors` and any collected svgs.
9
+
10
+ function looksLikeDotGrid(image) {
11
+ return /radial-gradient\(.*\)/i.test(image) && /repeat/i.test(image) && /(\d+px\s*\d+px)/.test(image);
12
+ }
13
+
14
+ function looksLikeLineGrid(image) {
15
+ // repeating-linear-gradient with a narrow colored band.
16
+ return /repeating-linear-gradient/i.test(image);
17
+ }
18
+
19
+ function looksLikeNoise(image) {
20
+ // data URI SVG with feTurbulence filter, or a well-known noise png path.
21
+ return /feTurbulence|data:image\/svg.+fractalNoise/i.test(image) || /noise\.(png|svg|webp)/i.test(image);
22
+ }
23
+
24
+ function countRadialGradients(image) {
25
+ return (image.match(/radial-gradient\(/gi) || []).length;
26
+ }
27
+
28
+ function countLinearGradients(image) {
29
+ return (image.match(/linear-gradient\(/gi) || []).length;
30
+ }
31
+
32
+ function detectSvgPattern(image) {
33
+ return /url\("data:image\/svg/i.test(image) && !looksLikeNoise(image);
34
+ }
35
+
36
+ export function extractBackgroundPatterns(rawData = {}) {
37
+ const styles = (rawData.light?.computedStyles) || [];
38
+ let dotGrid = 0, lineGrid = 0, noise = 0, svgPattern = 0, radialSum = 0, linearSum = 0, meshCount = 0, plain = 0;
39
+ const samples = [];
40
+
41
+ for (const s of styles) {
42
+ const bg = s.backgroundImage || s['background-image'] || '';
43
+ if (!bg || bg === 'none') { plain++; continue; }
44
+ const radial = countRadialGradients(bg);
45
+ const linear = countLinearGradients(bg);
46
+ radialSum += radial;
47
+ linearSum += linear;
48
+ let tag = null;
49
+ if (looksLikeNoise(bg)) { noise++; tag = 'noise'; }
50
+ else if (looksLikeDotGrid(bg)) { dotGrid++; tag = 'dot-grid'; }
51
+ else if (looksLikeLineGrid(bg)) { lineGrid++; tag = 'line-grid'; }
52
+ else if (radial >= 2) { meshCount++; tag = 'gradient-mesh'; }
53
+ else if (detectSvgPattern(bg)) { svgPattern++; tag = 'svg-pattern'; }
54
+ if (tag && samples.length < 8) samples.push({ tag, value: bg.slice(0, 200) });
55
+ }
56
+
57
+ const total = styles.length || 1;
58
+ const labels = [];
59
+ if (noise / total > 0.002) labels.push('noise');
60
+ if (dotGrid / total > 0.002) labels.push('dot-grid');
61
+ if (lineGrid / total > 0.002) labels.push('line-grid');
62
+ if (meshCount > 0) labels.push('gradient-mesh');
63
+ if (svgPattern > 0) labels.push('svg-pattern');
64
+ if (!labels.length) labels.push('plain');
65
+
66
+ return {
67
+ labels,
68
+ counts: { noise, dotGrid, lineGrid, meshCount, svgPattern },
69
+ gradientTotals: { radial: radialSum, linear: linearSum },
70
+ samples,
71
+ };
72
+ }
@@ -0,0 +1,110 @@
1
+ // v10.4 — Icon System fingerprint
2
+ //
3
+ // Pure extractor — operates on the icon payload the crawler already collects.
4
+ // We can't reliably match against Lucide/Phosphor/Heroicons path-data without
5
+ // shipping the full libraries, so this extractor does the next-best thing:
6
+ // infers the *system* an icon set came from (stroke vs fill, stroke width,
7
+ // corner style, grid size, viewBox convention) and emits guidance any LLM can
8
+ // act on ("use Lucide @ 1.5 stroke, 24px grid").
9
+
10
+ const LIBRARY_HINTS = [
11
+ { id: 'lucide', match: (ctx) => ctx.strokeDominant && ctx.avgWeight > 1.3 && ctx.avgWeight < 1.7 && ctx.grid24 && !ctx.roundedCaps, score: 0.8 },
12
+ { id: 'heroicons-outline', match: (ctx) => ctx.strokeDominant && ctx.avgWeight >= 1.8 && ctx.avgWeight <= 2.2 && ctx.grid24, score: 0.8 },
13
+ { id: 'heroicons-solid', match: (ctx) => ctx.fillDominant && ctx.grid24, score: 0.55 },
14
+ { id: 'phosphor', match: (ctx) => ctx.strokeDominant && ctx.roundedCaps && ctx.grid24, score: 0.7 },
15
+ { id: 'tabler', match: (ctx) => ctx.strokeDominant && ctx.avgWeight > 1.9 && ctx.grid24, score: 0.6 },
16
+ { id: 'feather', match: (ctx) => ctx.strokeDominant && ctx.avgWeight > 1.8 && ctx.roundedCaps && ctx.grid24, score: 0.7 },
17
+ { id: 'remix', match: (ctx) => ctx.mixedFillStroke && ctx.grid24, score: 0.45 },
18
+ { id: 'material', match: (ctx) => ctx.fillDominant && ctx.grid24, score: 0.4 },
19
+ ];
20
+
21
+ function parseStroke(v) {
22
+ if (!v) return 0;
23
+ const n = parseFloat(v);
24
+ return Number.isFinite(n) ? n : 0;
25
+ }
26
+
27
+ function viewBoxGrid(vb) {
28
+ if (!vb) return null;
29
+ const parts = vb.trim().split(/\s+/).map(Number);
30
+ if (parts.length !== 4 || parts.some(n => !Number.isFinite(n))) return null;
31
+ const w = parts[2], h = parts[3];
32
+ if (w === h && [16, 20, 24, 32, 48, 64].includes(w)) return w;
33
+ return null;
34
+ }
35
+
36
+ function detectRoundedCaps(svg) {
37
+ // Look for `stroke-linecap="round"` or `stroke-linejoin="round"` as a
38
+ // proxy for Phosphor/Feather-style rounded terminals.
39
+ return /stroke-linecap="round"|stroke-linejoin="round"/i.test(svg || '');
40
+ }
41
+
42
+ export function extractIconSystem(icons = []) {
43
+ if (!icons.length) {
44
+ return { library: 'unknown', confidence: 0, stats: {}, signals: [], icons: [] };
45
+ }
46
+
47
+ let strokeCount = 0, fillCount = 0, mixed = 0, weights = [], gridHits = {};
48
+ let rounded = 0;
49
+ const perIconHints = [];
50
+
51
+ for (const icon of icons) {
52
+ const svg = icon.svg || '';
53
+ const stroke = icon.stroke || (svg.match(/stroke="([^"]+)"/i) || [])[1];
54
+ const fill = icon.fill || (svg.match(/fill="([^"]+)"/i) || [])[1];
55
+ const strokeWidthMatch = svg.match(/stroke-width="([0-9.]+)"/i);
56
+ const sw = strokeWidthMatch ? parseStroke(strokeWidthMatch[1]) : 0;
57
+
58
+ const hasStroke = !!(stroke && stroke !== 'none');
59
+ const hasFill = !!(fill && fill !== 'none');
60
+ if (hasStroke && !hasFill) strokeCount++;
61
+ else if (hasFill && !hasStroke) fillCount++;
62
+ else if (hasStroke && hasFill) mixed++;
63
+ if (sw > 0) weights.push(sw);
64
+ const grid = viewBoxGrid(icon.viewBox);
65
+ if (grid) gridHits[grid] = (gridHits[grid] || 0) + 1;
66
+ if (detectRoundedCaps(svg)) rounded++;
67
+
68
+ perIconHints.push({
69
+ class: (icon.classList || '').slice(0, 80),
70
+ grid,
71
+ strokeWidth: sw || null,
72
+ style: hasStroke && !hasFill ? 'stroke' : hasFill && !hasStroke ? 'fill' : 'mixed',
73
+ });
74
+ }
75
+
76
+ const avgWeight = weights.length ? weights.reduce((a, b) => a + b, 0) / weights.length : 0;
77
+ const total = icons.length;
78
+ const ctx = {
79
+ strokeDominant: strokeCount / total > 0.55,
80
+ fillDominant: fillCount / total > 0.55,
81
+ mixedFillStroke: mixed / total > 0.3,
82
+ avgWeight,
83
+ roundedCaps: rounded / total > 0.4,
84
+ grid24: gridHits[24] ? gridHits[24] / total > 0.5 : false,
85
+ };
86
+
87
+ const scored = LIBRARY_HINTS
88
+ .map(lib => ({ id: lib.id, score: lib.match(ctx) ? lib.score : 0 }))
89
+ .filter(x => x.score > 0)
90
+ .sort((a, b) => b.score - a.score);
91
+
92
+ const primary = scored[0] || { id: 'unknown', score: 0 };
93
+
94
+ return {
95
+ library: primary.id,
96
+ confidence: Number(primary.score.toFixed(3)),
97
+ alternates: scored.slice(1, 4),
98
+ stats: {
99
+ count: total,
100
+ strokeOnly: strokeCount,
101
+ fillOnly: fillCount,
102
+ mixed,
103
+ avgStrokeWidth: Number(avgWeight.toFixed(2)),
104
+ gridDistribution: gridHits,
105
+ roundedCapsFraction: Number((rounded / total).toFixed(2)),
106
+ },
107
+ signals: Object.entries(ctx).filter(([, v]) => v === true).map(([k]) => k),
108
+ icons: perIconHints.slice(0, 30),
109
+ };
110
+ }
@@ -0,0 +1,140 @@
1
+ // v10.3 — Perf & Bundle Profile
2
+ //
3
+ // Opens a fresh Playwright context, captures every network response, and
4
+ // measures Core Web Vitals via PerformanceObserver. Returns a single JSON
5
+ // payload a bin-level consumer can write as `*-perf.json`.
6
+ //
7
+ // No Lighthouse dependency — everything is pure `playwright` + the page's own
8
+ // `PerformanceObserver` API, which keeps the package size flat.
9
+
10
+ import { chromium } from 'playwright';
11
+
12
+ const THIRD_PARTY_HOSTS = [
13
+ 'google-analytics', 'googletagmanager', 'analytics.google', 'segment.', 'mixpanel',
14
+ 'amplitude', 'posthog', 'intercom', 'hotjar', 'fullstory', 'sentry', 'datadog',
15
+ 'cloudflare', 'fastly', 'doubleclick', 'facebook.net', 'adservice.google', 'hs-analytics',
16
+ 'stripe.com', 'recaptcha', 'hcaptcha', 'sentry-cdn', 'optimizely', 'statsig',
17
+ ];
18
+
19
+ function categorize(url) {
20
+ if (!url) return 'other';
21
+ if (/\.(js|mjs)(?:\?|$)/i.test(url)) return 'js';
22
+ if (/\.(css)(?:\?|$)/i.test(url)) return 'css';
23
+ if (/\.(woff2?|ttf|otf|eot)(?:\?|$)/i.test(url)) return 'font';
24
+ if (/\.(png|jpe?g|webp|avif|gif|svg|ico)(?:\?|$)/i.test(url)) return 'image';
25
+ if (/fonts\.gstatic|fonts\.googleapis/.test(url)) return 'font';
26
+ if (/\.(html?)(?:\?|$)/i.test(url)) return 'document';
27
+ return 'other';
28
+ }
29
+
30
+ function isThirdParty(resUrl, pageHost) {
31
+ try {
32
+ const u = new URL(resUrl);
33
+ if (u.hostname === pageHost) return false;
34
+ if (THIRD_PARTY_HOSTS.some(h => u.hostname.includes(h))) return true;
35
+ return u.hostname !== pageHost;
36
+ } catch { return false; }
37
+ }
38
+
39
+ function fontLoadingStrategy(stack) {
40
+ const classes = (stack.classNameSample || []).join(' ');
41
+ const metas = (stack.metas || []).map(m => `${m.name || ''}=${m.content || ''}`).join(' ');
42
+ const preloadCount = ((metas + classes).match(/preload|rel=["']preload/g) || []).length;
43
+ return { preloadCount };
44
+ }
45
+
46
+ export async function captureCoreWebVitals(url, { width = 1280, height = 800, channel, timeout = 30000 } = {}) {
47
+ const browser = await chromium.launch({ headless: true, ...(channel && { channel }) });
48
+ try {
49
+ const ctx = await browser.newContext({ viewport: { width, height }, colorScheme: 'light' });
50
+ const page = await ctx.newPage();
51
+
52
+ const requests = [];
53
+ page.on('response', async (res) => {
54
+ try {
55
+ const req = res.request();
56
+ const headers = res.headers();
57
+ const contentLength = Number(headers['content-length'] || 0);
58
+ requests.push({
59
+ url: res.url(),
60
+ method: req.method(),
61
+ status: res.status(),
62
+ type: categorize(res.url()),
63
+ bytes: contentLength,
64
+ fromCache: res.fromServiceWorker() || /hit/i.test(headers['x-cache'] || ''),
65
+ });
66
+ } catch { /* ignore */ }
67
+ });
68
+
69
+ await page.addInitScript(() => {
70
+ window.__dlVitals = { lcp: 0, cls: 0, inp: 0 };
71
+ try {
72
+ new PerformanceObserver((list) => {
73
+ for (const e of list.getEntries()) window.__dlVitals.lcp = e.startTime;
74
+ }).observe({ type: 'largest-contentful-paint', buffered: true });
75
+ } catch {}
76
+ try {
77
+ let cls = 0;
78
+ new PerformanceObserver((list) => {
79
+ for (const e of list.getEntries()) {
80
+ if (!e.hadRecentInput) cls += e.value;
81
+ }
82
+ window.__dlVitals.cls = cls;
83
+ }).observe({ type: 'layout-shift', buffered: true });
84
+ } catch {}
85
+ try {
86
+ new PerformanceObserver((list) => {
87
+ for (const e of list.getEntries()) {
88
+ if ((e.duration || 0) > window.__dlVitals.inp) window.__dlVitals.inp = e.duration;
89
+ }
90
+ }).observe({ type: 'event', buffered: true, durationThreshold: 16 });
91
+ } catch {}
92
+ });
93
+
94
+ const start = Date.now();
95
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout }).catch(() => {});
96
+ await page.waitForLoadState('networkidle').catch(() => {});
97
+ // Give the observers a moment; synthesize an interaction so INP reports.
98
+ await page.mouse.move(100, 100);
99
+ await page.mouse.click(100, 100).catch(() => {});
100
+ await page.waitForTimeout(1200);
101
+
102
+ const ttfbish = Date.now() - start;
103
+ const vitals = await page.evaluate(() => ({ ...(window.__dlVitals || {}) }));
104
+ const pageHost = new URL(url).hostname;
105
+
106
+ const totals = { js: 0, css: 0, font: 0, image: 0, document: 0, other: 0 };
107
+ const counts = { js: 0, css: 0, font: 0, image: 0, document: 0, other: 0 };
108
+ let thirdPartyCount = 0, thirdPartyBytes = 0;
109
+ for (const r of requests) {
110
+ totals[r.type] = (totals[r.type] || 0) + (r.bytes || 0);
111
+ counts[r.type] = (counts[r.type] || 0) + 1;
112
+ if (isThirdParty(r.url, pageHost)) {
113
+ thirdPartyCount++;
114
+ thirdPartyBytes += r.bytes || 0;
115
+ }
116
+ }
117
+
118
+ return {
119
+ vitals: {
120
+ lcp: Math.round(vitals.lcp || 0),
121
+ cls: Number((vitals.cls || 0).toFixed(4)),
122
+ inp: Math.round(vitals.inp || 0),
123
+ // Rough classification vs Google's good/needs-improvement thresholds.
124
+ lcpGrade: vitals.lcp < 2500 ? 'good' : vitals.lcp < 4000 ? 'needs-improvement' : 'poor',
125
+ clsGrade: (vitals.cls || 0) < 0.1 ? 'good' : (vitals.cls || 0) < 0.25 ? 'needs-improvement' : 'poor',
126
+ },
127
+ ttfbApprox: ttfbish,
128
+ bytes: totals,
129
+ counts,
130
+ thirdParty: { count: thirdPartyCount, bytes: thirdPartyBytes },
131
+ requestsTotal: requests.length,
132
+ };
133
+ } finally {
134
+ await browser.close();
135
+ }
136
+ }
137
+
138
+ export function extractFontLoading(stack = {}) {
139
+ return fontLoadingStrategy(stack);
140
+ }
@@ -0,0 +1,69 @@
1
+ // v10.3 — SEO & Structured Data
2
+ //
3
+ // Pure extractor — operates on the metas + scripts the crawler already
4
+ // collected. Captures Open Graph, Twitter cards, canonical, manifest, theme
5
+ // color, and every inline JSON-LD block (schema.org structured data).
6
+
7
+ function pickMeta(metas, name) {
8
+ const m = metas.find(m => (m.name || '').toLowerCase() === name.toLowerCase());
9
+ return m ? m.content : null;
10
+ }
11
+
12
+ function jsonLdFromScripts(rawScripts = []) {
13
+ // rawScripts here may be just URLs. For JSON-LD we need inline script text,
14
+ // which the crawler doesn't currently capture. Accept an optional `inline`
15
+ // parameter from a richer payload where available.
16
+ return [];
17
+ }
18
+
19
+ export function extractSeo(rawData = {}) {
20
+ const stack = rawData.light?.stack || {};
21
+ const metas = stack.metas || [];
22
+ const openGraph = {};
23
+ const twitter = {};
24
+ for (const m of metas) {
25
+ const name = (m.name || '').toLowerCase();
26
+ if (name.startsWith('og:')) openGraph[name.slice(3)] = m.content;
27
+ else if (name.startsWith('twitter:')) twitter[name.slice(8)] = m.content;
28
+ }
29
+ const description = pickMeta(metas, 'description');
30
+ const canonical = pickMeta(metas, 'canonical');
31
+ const themeColor = pickMeta(metas, 'theme-color');
32
+ const viewport = pickMeta(metas, 'viewport');
33
+
34
+ const inlineJsonLd = Array.isArray(rawData.light?.jsonLd) ? rawData.light.jsonLd : [];
35
+ const favicons = rawData.light?.favicons || [];
36
+ const manifest = rawData.light?.manifest || null;
37
+
38
+ const structured = [];
39
+ for (const block of inlineJsonLd) {
40
+ try {
41
+ const parsed = typeof block === 'string' ? JSON.parse(block) : block;
42
+ const entries = Array.isArray(parsed) ? parsed : [parsed];
43
+ for (const e of entries) {
44
+ structured.push({ type: e['@type'] || 'Thing', name: e.name || e.headline || null, sample: JSON.stringify(e).slice(0, 400) });
45
+ }
46
+ } catch { /* skip bad JSON-LD */ }
47
+ }
48
+
49
+ return {
50
+ openGraph,
51
+ twitter,
52
+ description,
53
+ canonical,
54
+ themeColor,
55
+ viewport,
56
+ favicons,
57
+ manifest,
58
+ structuredData: structured,
59
+ score: {
60
+ hasOg: Object.keys(openGraph).length > 0,
61
+ hasTwitter: Object.keys(twitter).length > 0,
62
+ hasDescription: !!description,
63
+ hasCanonical: !!canonical,
64
+ hasStructuredData: structured.length > 0,
65
+ hasFavicon: favicons.length > 0,
66
+ hasThemeColor: !!themeColor,
67
+ },
68
+ };
69
+ }
@@ -0,0 +1,73 @@
1
+ // v10.4 — Stack Intel
2
+ //
3
+ // Extends stack-fingerprint.js with detectors for CMS platforms (Webflow,
4
+ // Framer, Shopify, Ghost, Sanity, Contentful, Wix, Squarespace, WordPress),
5
+ // analytics (GA, Segment, Mixpanel, PostHog, Amplitude, Heap), and
6
+ // experimentation platforms (Optimizely, Statsig, GrowthBook, LaunchDarkly,
7
+ // Split, Eppo). All signals come from script URLs + meta + known globals.
8
+
9
+ const CMS = [
10
+ { id: 'webflow', re: /webflow\.com|wf-|\.webflow\./i },
11
+ { id: 'framer', re: /framer\.(?:com|website)|__framer|framer-motion\b/i },
12
+ { id: 'shopify', re: /cdn\.shopify|shopify\.com|x-shopify/i },
13
+ { id: 'ghost', re: /ghost\.io|__ghost_|ghost-url/i },
14
+ { id: 'sanity', re: /cdn\.sanity\.io|sanity-studio/i },
15
+ { id: 'contentful', re: /cdn\.contentful\.com|ctfassets\.net/i },
16
+ { id: 'wix', re: /parastorage\.com|\.wix\.com/i },
17
+ { id: 'squarespace', re: /squarespace\.com|sqspcdn\.com|squarespace-cdn/i },
18
+ { id: 'wordpress', re: /wp-content|wp-includes|wordpress/i },
19
+ { id: 'hashnode', re: /hashnode\.com/i },
20
+ { id: 'notion', re: /notion\.so\/image|notion-static/i },
21
+ { id: 'bubble', re: /bubble\.io|bubble-cdn/i },
22
+ ];
23
+
24
+ const ANALYTICS = [
25
+ { id: 'google-analytics', re: /google-analytics\.com|googletagmanager\.com|gtag\(/ },
26
+ { id: 'segment', re: /segment\.com\/analytics|cdn\.segment\.io/i },
27
+ { id: 'mixpanel', re: /cdn\.mxpnl\.com|mixpanel\.com\/lib/i },
28
+ { id: 'amplitude', re: /amplitude\.com|cdn\.amplitude\.com/i },
29
+ { id: 'posthog', re: /posthog\.com|ph\.posthog\.com/i },
30
+ { id: 'heap', re: /heapanalytics\.com/i },
31
+ { id: 'fullstory', re: /fullstory\.com/i },
32
+ { id: 'hotjar', re: /static\.hotjar\.com|hj\.contentsquare/i },
33
+ { id: 'vercel-analytics', re: /_vercel\/insights|vercel\/analytics/i },
34
+ { id: 'plausible', re: /plausible\.io\/js|plausible\.io\/api/i },
35
+ { id: 'fathom', re: /usefathom\.com/i },
36
+ { id: 'sentry', re: /sentry\.io|sentry-cdn/i },
37
+ { id: 'datadog', re: /datadoghq\.com|datadog-rum/i },
38
+ ];
39
+
40
+ const EXPERIMENTATION = [
41
+ { id: 'optimizely', re: /optimizely\.com|cdn\.optimizely\./i },
42
+ { id: 'statsig', re: /statsig\.com/i },
43
+ { id: 'growthbook', re: /growthbook\.io/i },
44
+ { id: 'launchdarkly', re: /launchdarkly\.com/i },
45
+ { id: 'split', re: /split\.io|sdk\.split\.io/i },
46
+ { id: 'eppo', re: /eppo\.cloud/i },
47
+ { id: 'vercel-flags', re: /vercel\/flags|flags\.sdk/i },
48
+ ];
49
+
50
+ function fingerprint(haystack, list) {
51
+ const hits = [];
52
+ for (const entry of list) {
53
+ if (entry.re.test(haystack)) hits.push(entry.id);
54
+ }
55
+ return hits;
56
+ }
57
+
58
+ export function extractStackIntel(stack = {}) {
59
+ const scripts = (stack.scripts || []).join(' \n');
60
+ const metas = (stack.metas || []).map(m => `${m.name || ''} ${m.content || ''}`).join(' ');
61
+ const classes = (stack.classNameSample || []).join(' ');
62
+ const haystack = `${scripts}\n${metas}\n${classes}`;
63
+
64
+ return {
65
+ cms: fingerprint(haystack, CMS),
66
+ analytics: fingerprint(haystack, ANALYTICS),
67
+ experimentation: fingerprint(haystack, EXPERIMENTATION),
68
+ signals: {
69
+ scriptCount: (stack.scripts || []).length,
70
+ metaCount: (stack.metas || []).length,
71
+ },
72
+ };
73
+ }
package/src/index.js CHANGED
@@ -33,6 +33,10 @@ import { extractSectionRoles } from './extractors/section-roles.js';
33
33
  import { extractComponentLibrary } from './extractors/component-library.js';
34
34
  import { extractMaterialLanguage } from './extractors/material-language.js';
35
35
  import { extractImageryStyle } from './extractors/imagery-style.js';
36
+ import { extractSeo } from './extractors/seo.js';
37
+ import { extractIconSystem } from './extractors/icon-system.js';
38
+ import { extractBackgroundPatterns } from './extractors/background-patterns.js';
39
+ import { extractStackIntel } from './extractors/stack-intel.js';
36
40
  import { formatDtcgTokens } from './formatters/dtcg-tokens.js';
37
41
  import { formatMotionTokens } from './formatters/motion-tokens.js';
38
42
 
@@ -137,6 +141,10 @@ export async function extractDesignLanguage(url, options = {}) {
137
141
  design.componentLibrary = safeExtract(extractComponentLibrary, rawData.light?.stack || {}) || { library: 'unknown', confidence: 0, evidence: [], alternates: [] };
138
142
  design.materialLanguage = safeExtract(extractMaterialLanguage, design) || { label: 'flat', confidence: 0, signals: [], metrics: {} };
139
143
  design.imageryStyle = safeExtract(extractImageryStyle, rawData.light?.images || []) || { label: 'none', confidence: 0, counts: {}, signals: [] };
144
+ design.seo = safeExtract(extractSeo, rawData) || { openGraph: {}, twitter: {}, structuredData: [], score: {} };
145
+ design.iconSystem = safeExtract(extractIconSystem, rawData.light?.icons || []) || { library: 'unknown', confidence: 0, stats: {}, signals: [], icons: [] };
146
+ design.backgroundPatterns = safeExtract(extractBackgroundPatterns, rawData) || { labels: ['plain'], counts: {}, gradientTotals: {}, samples: [] };
147
+ design.stackIntel = safeExtract(extractStackIntel, rawData.light?.stack || {}) || { cms: [], analytics: [], experimentation: [] };
140
148
  // Stash raw crawler output so downstream orchestration (multipage, smart)
141
149
  // can rebuild the digest without re-crawling.
142
150
  design._raw = rawData;
@@ -205,6 +213,11 @@ export { extractLogo } from './extractors/logo.js';
205
213
  export { captureComponentScreenshotsV10 } from './extractors/component-screenshots.js';
206
214
  export { pairDarkMode } from './extractors/dark-mode-pair.js';
207
215
  export { captureResponsiveScreenshots } from './extractors/responsive-screenshots.js';
216
+ export { captureCoreWebVitals, extractFontLoading } from './extractors/perf.js';
217
+ export { extractSeo } from './extractors/seo.js';
218
+ export { extractIconSystem } from './extractors/icon-system.js';
219
+ export { extractBackgroundPatterns } from './extractors/background-patterns.js';
220
+ export { extractStackIntel } from './extractors/stack-intel.js';
208
221
  export { refineWithSmart } from './classifiers/smart.js';
209
222
  export { crawlCanonicalPages, computeCrossPageConsistency, discoverCanonicalPages } from './multipage.js';
210
223
  export { buildPromptPack, formatV0Prompt, formatLovablePrompt, formatCursorPrompt, formatClaudeArtifactPrompt } from './formatters/prompt-pack.js';