designlang 10.2.0 → 10.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## [10.3.0] — 2026-04-22
4
+
5
+ **Perf + SEO.** designlang now doubles as a lightweight auditor.
6
+
7
+ ### Added
8
+
9
+ - **`src/extractors/perf.js`** — `captureCoreWebVitals(url)` opens a fresh Playwright context, measures LCP / CLS / INP via PerformanceObserver, categorises every network response into JS / CSS / font / image / document / other, counts third-party requests against a known-host list, and synthesises an interaction so INP reports. Returns grade buckets (good / needs-improvement / poor) per vital.
10
+ - **`src/extractors/seo.js`** — pure extractor for Open Graph, Twitter cards, canonical, manifest, theme-color, viewport, every favicon, and inline JSON-LD blocks (schema.org structured data).
11
+ - Crawler now captures `favicons`, `manifest`, and `<script type="application/ld+json">` content.
12
+ - New flag `--perf`. Auto-on with `--full`.
13
+ - New outputs: `*-seo.json`, `*-perf.json`.
14
+
3
15
  ## [10.2.0] — 2026-04-22
4
16
 
5
17
  **Dark mode pairing + responsive screenshots.** Joins the light & dark extractor passes into semantic pairs, and adds full-page captures at 4 breakpoints × (light, dark).
@@ -12,6 +12,7 @@ import { extractLogo } from '../src/extractors/logo.js';
12
12
  import { captureComponentScreenshotsV10 } from '../src/extractors/component-screenshots.js';
13
13
  import { pairDarkMode } from '../src/extractors/dark-mode-pair.js';
14
14
  import { captureResponsiveScreenshots } from '../src/extractors/responsive-screenshots.js';
15
+ import { captureCoreWebVitals, extractFontLoading } from '../src/extractors/perf.js';
15
16
  import { buildPromptPack } from '../src/formatters/prompt-pack.js';
16
17
  import { formatMarkdown } from '../src/formatters/markdown.js';
17
18
  import { formatTokens } from '../src/formatters/tokens.js';
@@ -55,7 +56,7 @@ const program = new Command();
55
56
  program
56
57
  .name('designlang')
57
58
  .description('Extract the complete design language from any website')
58
- .version('10.2.0');
59
+ .version('10.3.0');
59
60
 
60
61
  // ── Main command: extract ──────────────────────────────────────
61
62
  program
@@ -88,6 +89,7 @@ program
88
89
  .option('--pages <n>', 'crawl N canonical pages (pricing/docs/blog/about/product) in addition to the homepage', parseInt)
89
90
  .option('--no-prompts', 'skip writing the prompt-pack directory')
90
91
  .option('--responsive-shots', 'capture full-page PNGs at 4 breakpoints × (light,dark)')
92
+ .option('--perf', 'measure Core Web Vitals + bundle profile (LCP/CLS/INP, JS/CSS/font/img bytes, third-party count)')
91
93
  .option('--json', 'output raw JSON to stdout (for CI/CD)')
92
94
  .option('--json-pretty', 'output formatted JSON to stdout')
93
95
  .option('--no-history', 'skip saving to history')
@@ -239,6 +241,19 @@ program
239
241
  // v10.2: dark-mode pairing (pure, based on already-extracted data).
240
242
  design.darkModePaired = pairDarkMode(design);
241
243
 
244
+ // v10.3: Core Web Vitals + bundle profile.
245
+ if (merged.full || merged.perf) {
246
+ spinner.text = 'Measuring Core Web Vitals...';
247
+ try {
248
+ design.perf = await captureCoreWebVitals(url, {
249
+ width: merged.width,
250
+ height: parseInt(merged.height) || 800,
251
+ channel: merged.systemChrome ? 'chrome' : undefined,
252
+ });
253
+ design.perf.fontLoading = extractFontLoading(design._raw?.light?.stack || {});
254
+ } catch (e) { design.perf = { error: e.message }; }
255
+ }
256
+
242
257
  // v10.2: responsive screenshots at 4 breakpoints × (light, dark).
243
258
  if (merged.full || merged.responsiveShots) {
244
259
  spinner.text = 'Capturing responsive screenshots...';
@@ -345,6 +360,12 @@ program
345
360
  if (design.responsiveShots && Array.isArray(design.responsiveShots.shots) && design.responsiveShots.shots.length) {
346
361
  files.push({ name: `${prefix}-responsive.json`, content: JSON.stringify(design.responsiveShots, null, 2), label: 'Responsive Screenshots index' });
347
362
  }
363
+ if (design.seo) {
364
+ files.push({ name: `${prefix}-seo.json`, content: JSON.stringify(design.seo, null, 2), label: 'SEO + Structured Data' });
365
+ }
366
+ if (design.perf && !design.perf.error) {
367
+ files.push({ name: `${prefix}-perf.json`, content: JSON.stringify(design.perf, null, 2), label: 'Perf + Bundle' });
368
+ }
348
369
  if (merged.prompts !== false) {
349
370
  const pack = buildPromptPack(design);
350
371
  const promptsDir = join(outDir, `${prefix}-prompts`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "designlang",
3
- "version": "10.2.0",
3
+ "version": "10.3.0",
4
4
  "description": "Extract the complete design language from any website — colors, typography, spacing, shadows, motion, component anatomy, brand voice, page intent, section roles, material language, component library, imagery style, and logo. Outputs AI-optimized markdown, W3C design tokens, motion tokens, typed component stubs, Tailwind config, and ready-to-paste v0 / Lovable / Cursor / Claude-Artifacts prompts.",
5
5
  "type": "module",
6
6
  "bin": {
package/src/crawler.js CHANGED
@@ -867,6 +867,17 @@ async function extractPageData(page, ignoreSelectors, scopeSelector) {
867
867
  results.fontData.documentFonts.push({ family: font.family.replace(/['"]/g, ''), style: font.style, weight: font.weight, status: font.status });
868
868
  }
869
869
 
870
+ // v10.3 — favicons, manifest, JSON-LD.
871
+ results.favicons = Array.from(document.querySelectorAll('link[rel~="icon"], link[rel="shortcut icon"], link[rel="apple-touch-icon"]'))
872
+ .slice(0, 10)
873
+ .map(l => ({ rel: l.getAttribute('rel'), href: l.href, sizes: l.getAttribute('sizes') || '', type: l.getAttribute('type') || '' }));
874
+ const manifestLink = document.querySelector('link[rel="manifest"]');
875
+ results.manifest = manifestLink ? manifestLink.href : null;
876
+ results.jsonLd = Array.from(document.querySelectorAll('script[type="application/ld+json"]'))
877
+ .slice(0, 12)
878
+ .map(s => s.textContent || '')
879
+ .filter(Boolean);
880
+
870
881
  // Image data
871
882
  results.images = [];
872
883
  for (const img of document.querySelectorAll('img, picture img, [role="img"]')) {
@@ -0,0 +1,140 @@
1
+ // v10.3 — Perf & Bundle Profile
2
+ //
3
+ // Opens a fresh Playwright context, captures every network response, and
4
+ // measures Core Web Vitals via PerformanceObserver. Returns a single JSON
5
+ // payload a bin-level consumer can write as `*-perf.json`.
6
+ //
7
+ // No Lighthouse dependency — everything is pure `playwright` + the page's own
8
+ // `PerformanceObserver` API, which keeps the package size flat.
9
+
10
+ import { chromium } from 'playwright';
11
+
12
+ const THIRD_PARTY_HOSTS = [
13
+ 'google-analytics', 'googletagmanager', 'analytics.google', 'segment.', 'mixpanel',
14
+ 'amplitude', 'posthog', 'intercom', 'hotjar', 'fullstory', 'sentry', 'datadog',
15
+ 'cloudflare', 'fastly', 'doubleclick', 'facebook.net', 'adservice.google', 'hs-analytics',
16
+ 'stripe.com', 'recaptcha', 'hcaptcha', 'sentry-cdn', 'optimizely', 'statsig',
17
+ ];
18
+
19
+ function categorize(url) {
20
+ if (!url) return 'other';
21
+ if (/\.(js|mjs)(?:\?|$)/i.test(url)) return 'js';
22
+ if (/\.(css)(?:\?|$)/i.test(url)) return 'css';
23
+ if (/\.(woff2?|ttf|otf|eot)(?:\?|$)/i.test(url)) return 'font';
24
+ if (/\.(png|jpe?g|webp|avif|gif|svg|ico)(?:\?|$)/i.test(url)) return 'image';
25
+ if (/fonts\.gstatic|fonts\.googleapis/.test(url)) return 'font';
26
+ if (/\.(html?)(?:\?|$)/i.test(url)) return 'document';
27
+ return 'other';
28
+ }
29
+
30
+ function isThirdParty(resUrl, pageHost) {
31
+ try {
32
+ const u = new URL(resUrl);
33
+ if (u.hostname === pageHost) return false;
34
+ if (THIRD_PARTY_HOSTS.some(h => u.hostname.includes(h))) return true;
35
+ return u.hostname !== pageHost;
36
+ } catch { return false; }
37
+ }
38
+
39
+ function fontLoadingStrategy(stack) {
40
+ const classes = (stack.classNameSample || []).join(' ');
41
+ const metas = (stack.metas || []).map(m => `${m.name || ''}=${m.content || ''}`).join(' ');
42
+ const preloadCount = ((metas + classes).match(/preload|rel=["']preload/g) || []).length;
43
+ return { preloadCount };
44
+ }
45
+
46
+ export async function captureCoreWebVitals(url, { width = 1280, height = 800, channel, timeout = 30000 } = {}) {
47
+ const browser = await chromium.launch({ headless: true, ...(channel && { channel }) });
48
+ try {
49
+ const ctx = await browser.newContext({ viewport: { width, height }, colorScheme: 'light' });
50
+ const page = await ctx.newPage();
51
+
52
+ const requests = [];
53
+ page.on('response', async (res) => {
54
+ try {
55
+ const req = res.request();
56
+ const headers = res.headers();
57
+ const contentLength = Number(headers['content-length'] || 0);
58
+ requests.push({
59
+ url: res.url(),
60
+ method: req.method(),
61
+ status: res.status(),
62
+ type: categorize(res.url()),
63
+ bytes: contentLength,
64
+ fromCache: res.fromServiceWorker() || /hit/i.test(headers['x-cache'] || ''),
65
+ });
66
+ } catch { /* ignore */ }
67
+ });
68
+
69
+ await page.addInitScript(() => {
70
+ window.__dlVitals = { lcp: 0, cls: 0, inp: 0 };
71
+ try {
72
+ new PerformanceObserver((list) => {
73
+ for (const e of list.getEntries()) window.__dlVitals.lcp = e.startTime;
74
+ }).observe({ type: 'largest-contentful-paint', buffered: true });
75
+ } catch {}
76
+ try {
77
+ let cls = 0;
78
+ new PerformanceObserver((list) => {
79
+ for (const e of list.getEntries()) {
80
+ if (!e.hadRecentInput) cls += e.value;
81
+ }
82
+ window.__dlVitals.cls = cls;
83
+ }).observe({ type: 'layout-shift', buffered: true });
84
+ } catch {}
85
+ try {
86
+ new PerformanceObserver((list) => {
87
+ for (const e of list.getEntries()) {
88
+ if ((e.duration || 0) > window.__dlVitals.inp) window.__dlVitals.inp = e.duration;
89
+ }
90
+ }).observe({ type: 'event', buffered: true, durationThreshold: 16 });
91
+ } catch {}
92
+ });
93
+
94
+ const start = Date.now();
95
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout }).catch(() => {});
96
+ await page.waitForLoadState('networkidle').catch(() => {});
97
+ // Give the observers a moment; synthesize an interaction so INP reports.
98
+ await page.mouse.move(100, 100);
99
+ await page.mouse.click(100, 100).catch(() => {});
100
+ await page.waitForTimeout(1200);
101
+
102
+ const ttfbish = Date.now() - start;
103
+ const vitals = await page.evaluate(() => ({ ...(window.__dlVitals || {}) }));
104
+ const pageHost = new URL(url).hostname;
105
+
106
+ const totals = { js: 0, css: 0, font: 0, image: 0, document: 0, other: 0 };
107
+ const counts = { js: 0, css: 0, font: 0, image: 0, document: 0, other: 0 };
108
+ let thirdPartyCount = 0, thirdPartyBytes = 0;
109
+ for (const r of requests) {
110
+ totals[r.type] = (totals[r.type] || 0) + (r.bytes || 0);
111
+ counts[r.type] = (counts[r.type] || 0) + 1;
112
+ if (isThirdParty(r.url, pageHost)) {
113
+ thirdPartyCount++;
114
+ thirdPartyBytes += r.bytes || 0;
115
+ }
116
+ }
117
+
118
+ return {
119
+ vitals: {
120
+ lcp: Math.round(vitals.lcp || 0),
121
+ cls: Number((vitals.cls || 0).toFixed(4)),
122
+ inp: Math.round(vitals.inp || 0),
123
+ // Rough classification vs Google's good/needs-improvement thresholds.
124
+ lcpGrade: vitals.lcp < 2500 ? 'good' : vitals.lcp < 4000 ? 'needs-improvement' : 'poor',
125
+ clsGrade: (vitals.cls || 0) < 0.1 ? 'good' : (vitals.cls || 0) < 0.25 ? 'needs-improvement' : 'poor',
126
+ },
127
+ ttfbApprox: ttfbish,
128
+ bytes: totals,
129
+ counts,
130
+ thirdParty: { count: thirdPartyCount, bytes: thirdPartyBytes },
131
+ requestsTotal: requests.length,
132
+ };
133
+ } finally {
134
+ await browser.close();
135
+ }
136
+ }
137
+
138
+ export function extractFontLoading(stack = {}) {
139
+ return fontLoadingStrategy(stack);
140
+ }
@@ -0,0 +1,69 @@
1
+ // v10.3 — SEO & Structured Data
2
+ //
3
+ // Pure extractor — operates on the metas + scripts the crawler already
4
+ // collected. Captures Open Graph, Twitter cards, canonical, manifest, theme
5
+ // color, and every inline JSON-LD block (schema.org structured data).
6
+
7
+ function pickMeta(metas, name) {
8
+ const m = metas.find(m => (m.name || '').toLowerCase() === name.toLowerCase());
9
+ return m ? m.content : null;
10
+ }
11
+
12
+ function jsonLdFromScripts(rawScripts = []) {
13
+ // rawScripts here may be just URLs. For JSON-LD we need inline script text,
14
+ // which the crawler doesn't currently capture. Accept an optional `inline`
15
+ // parameter from a richer payload where available.
16
+ return [];
17
+ }
18
+
19
+ export function extractSeo(rawData = {}) {
20
+ const stack = rawData.light?.stack || {};
21
+ const metas = stack.metas || [];
22
+ const openGraph = {};
23
+ const twitter = {};
24
+ for (const m of metas) {
25
+ const name = (m.name || '').toLowerCase();
26
+ if (name.startsWith('og:')) openGraph[name.slice(3)] = m.content;
27
+ else if (name.startsWith('twitter:')) twitter[name.slice(8)] = m.content;
28
+ }
29
+ const description = pickMeta(metas, 'description');
30
+ const canonical = pickMeta(metas, 'canonical');
31
+ const themeColor = pickMeta(metas, 'theme-color');
32
+ const viewport = pickMeta(metas, 'viewport');
33
+
34
+ const inlineJsonLd = Array.isArray(rawData.light?.jsonLd) ? rawData.light.jsonLd : [];
35
+ const favicons = rawData.light?.favicons || [];
36
+ const manifest = rawData.light?.manifest || null;
37
+
38
+ const structured = [];
39
+ for (const block of inlineJsonLd) {
40
+ try {
41
+ const parsed = typeof block === 'string' ? JSON.parse(block) : block;
42
+ const entries = Array.isArray(parsed) ? parsed : [parsed];
43
+ for (const e of entries) {
44
+ structured.push({ type: e['@type'] || 'Thing', name: e.name || e.headline || null, sample: JSON.stringify(e).slice(0, 400) });
45
+ }
46
+ } catch { /* skip bad JSON-LD */ }
47
+ }
48
+
49
+ return {
50
+ openGraph,
51
+ twitter,
52
+ description,
53
+ canonical,
54
+ themeColor,
55
+ viewport,
56
+ favicons,
57
+ manifest,
58
+ structuredData: structured,
59
+ score: {
60
+ hasOg: Object.keys(openGraph).length > 0,
61
+ hasTwitter: Object.keys(twitter).length > 0,
62
+ hasDescription: !!description,
63
+ hasCanonical: !!canonical,
64
+ hasStructuredData: structured.length > 0,
65
+ hasFavicon: favicons.length > 0,
66
+ hasThemeColor: !!themeColor,
67
+ },
68
+ };
69
+ }
package/src/index.js CHANGED
@@ -33,6 +33,7 @@ import { extractSectionRoles } from './extractors/section-roles.js';
33
33
  import { extractComponentLibrary } from './extractors/component-library.js';
34
34
  import { extractMaterialLanguage } from './extractors/material-language.js';
35
35
  import { extractImageryStyle } from './extractors/imagery-style.js';
36
+ import { extractSeo } from './extractors/seo.js';
36
37
  import { formatDtcgTokens } from './formatters/dtcg-tokens.js';
37
38
  import { formatMotionTokens } from './formatters/motion-tokens.js';
38
39
 
@@ -137,6 +138,7 @@ export async function extractDesignLanguage(url, options = {}) {
137
138
  design.componentLibrary = safeExtract(extractComponentLibrary, rawData.light?.stack || {}) || { library: 'unknown', confidence: 0, evidence: [], alternates: [] };
138
139
  design.materialLanguage = safeExtract(extractMaterialLanguage, design) || { label: 'flat', confidence: 0, signals: [], metrics: {} };
139
140
  design.imageryStyle = safeExtract(extractImageryStyle, rawData.light?.images || []) || { label: 'none', confidence: 0, counts: {}, signals: [] };
141
+ design.seo = safeExtract(extractSeo, rawData) || { openGraph: {}, twitter: {}, structuredData: [], score: {} };
140
142
  // Stash raw crawler output so downstream orchestration (multipage, smart)
141
143
  // can rebuild the digest without re-crawling.
142
144
  design._raw = rawData;
@@ -205,6 +207,8 @@ export { extractLogo } from './extractors/logo.js';
205
207
  export { captureComponentScreenshotsV10 } from './extractors/component-screenshots.js';
206
208
  export { pairDarkMode } from './extractors/dark-mode-pair.js';
207
209
  export { captureResponsiveScreenshots } from './extractors/responsive-screenshots.js';
210
+ export { captureCoreWebVitals, extractFontLoading } from './extractors/perf.js';
211
+ export { extractSeo } from './extractors/seo.js';
208
212
  export { refineWithSmart } from './classifiers/smart.js';
209
213
  export { crawlCanonicalPages, computeCrossPageConsistency, discoverCanonicalPages } from './multipage.js';
210
214
  export { buildPromptPack, formatV0Prompt, formatLovablePrompt, formatCursorPrompt, formatClaudeArtifactPrompt } from './formatters/prompt-pack.js';