dembrandt 0.7.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -54,12 +54,36 @@ dembrandt bmw.de --dark-mode # Extract colors from dark mode variant
54
54
  dembrandt bmw.de --mobile # Use mobile viewport (390x844, iPhone 12/13/14/15) for responsive analysis
55
55
  dembrandt bmw.de --slow # 3x longer timeouts (24s hydration) for JavaScript-heavy sites
56
56
  dembrandt bmw.de --brand-guide # Generate a brand guide PDF
57
+ dembrandt bmw.de --pages 5 # Analyze 5 pages (homepage + 4 discovered pages), merges results
58
+ dembrandt bmw.de --sitemap # Discover pages from sitemap.xml instead of DOM links
59
+ dembrandt bmw.de --pages 10 --sitemap # Combine: up to 10 pages discovered via sitemap
57
60
  dembrandt bmw.de --no-sandbox # Disable Chromium sandbox (required for Docker/CI)
58
61
  dembrandt bmw.de --browser=firefox # Use Firefox instead of Chromium (better for Cloudflare bypass)
59
62
  ```
60
63
 
61
64
  Default: formatted terminal display only. Use `--save-output` to persist results as JSON files. Browser automatically retries in visible mode if headless extraction fails.
62
65
 
66
+ ### Multi-Page Extraction
67
+
68
+ Analyze multiple pages to get a more complete picture of a site's design system. Results are merged into a single unified output with cross-page confidence boosting — colors appearing on multiple pages get higher confidence scores.
69
+
70
+ ```bash
71
+ # Analyze homepage + 4 auto-discovered pages (default: 5 total)
72
+ dembrandt stripe.com --pages 5
73
+
74
+ # Use sitemap.xml for page discovery instead of DOM link scraping
75
+ dembrandt stripe.com --sitemap
76
+
77
+ # Combine both: up to 10 pages from sitemap
78
+ dembrandt stripe.com --pages 10 --sitemap
79
+ ```
80
+
81
+ **Page discovery** works two ways:
82
+ - **DOM links** (default): Scrapes navigation, header, and footer links from the homepage, prioritizing key pages like /pricing, /about, /features
83
+ - **Sitemap** (`--sitemap`): Parses sitemap.xml (checks robots.txt first), follows sitemapindex references, and scores URLs by importance
84
+
85
+ Pages are crawled sequentially with polite delays. Failed pages are skipped without aborting the run.
86
+
63
87
  ### Browser Selection
64
88
 
65
89
  By default, dembrandt uses Chromium. If you encounter bot detection or timeouts (especially on sites behind Cloudflare), try Firefox which is often more successful at bypassing these protections:
package/index.js CHANGED
@@ -15,13 +15,15 @@ import { extractBranding } from "./lib/extractors.js";
15
15
  import { displayResults } from "./lib/display.js";
16
16
  import { toW3CFormat } from "./lib/w3c-exporter.js";
17
17
  import { generatePDF } from "./lib/pdf.js";
18
+ import { parseSitemap } from "./lib/discovery.js";
19
+ import { mergeResults } from "./lib/merger.js";
18
20
  import { writeFileSync, mkdirSync } from "fs";
19
21
  import { join } from "path";
20
22
 
21
23
  program
22
24
  .name("dembrandt")
23
25
  .description("Extract design tokens from any website")
24
- .version("0.7.2")
26
+ .version("0.7.3")
25
27
  .argument("<url>")
26
28
  .option("--browser <type>", "Browser to use (chromium|firefox)", "chromium")
27
29
  .option("--json-only", "Output raw JSON")
@@ -34,6 +36,12 @@ program
34
36
  .option("--no-sandbox", "Disable browser sandbox (needed for Docker/CI)")
35
37
  .option("--raw-colors", "Include pre-filter raw colors in JSON output")
36
38
  .option("--screenshot <path>", "Save a screenshot of the page")
39
+ .option("--pages <n>", "Analyze up to N total pages including start URL (default: 5)", (v) => {
40
+ const n = parseInt(v, 10);
41
+ if (isNaN(n) || n < 1) throw new Error(`--pages must be a positive integer, got: ${v}`);
42
+ return n;
43
+ })
44
+ .option("--sitemap", "Discover pages from sitemap.xml instead of DOM links")
37
45
  .action(async (input, opts) => {
38
46
  let url = input;
39
47
  if (!url.startsWith("http://") && !url.startsWith("https://")) {
@@ -67,13 +75,73 @@ program
67
75
  });
68
76
 
69
77
  try {
78
+ const isMultiPage = opts.pages || opts.sitemap;
79
+ const maxPages = (opts.pages || 5) - 1; // -1 because homepage counts
70
80
  result = await extractBranding(url, spinner, browser, {
71
81
  navigationTimeout: 90000,
72
82
  darkMode: opts.darkMode,
73
83
  mobile: opts.mobile,
74
84
  slow: opts.slow,
75
85
  screenshotPath: opts.screenshot,
86
+ discoverLinks: isMultiPage && !opts.sitemap ? maxPages : null,
76
87
  });
88
+
89
+ // Multi-page crawl
90
+ if (isMultiPage && maxPages > 0) {
91
+ spinner.start("Discovering pages...");
92
+
93
+ let additionalUrls;
94
+ if (opts.sitemap) {
95
+ // Try post-redirect URL first, fall back to user-provided URL
96
+ // (sites like spotify.com redirect browser to open.spotify.com
97
+ // but sitemap lives at www.spotify.com)
98
+ additionalUrls = await parseSitemap(result.url, maxPages);
99
+ if (additionalUrls.length === 0 && result.url !== url) {
100
+ additionalUrls = await parseSitemap(url, maxPages);
101
+ }
102
+ } else {
103
+ additionalUrls = result._discoveredLinks || [];
104
+ }
105
+
106
+ delete result._discoveredLinks;
107
+
108
+ if (additionalUrls.length === 0) {
109
+ spinner.warn("No additional pages discovered");
110
+ } else {
111
+ spinner.stop();
112
+ console.log(chalk.dim(` Found ${additionalUrls.length} page(s) to analyze`));
113
+
114
+ const allResults = [result];
115
+ for (let i = 0; i < additionalUrls.length; i++) {
116
+ const pageUrl = additionalUrls[i];
117
+ const pageNum = i + 2;
118
+ const total = additionalUrls.length + 1;
119
+ spinner.start(`Extracting page ${pageNum}/${total}: ${new URL(pageUrl).pathname}`);
120
+
121
+ // Polite delay between pages
122
+ await new Promise(r => setTimeout(r, 2000 + Math.random() * 3000));
123
+
124
+ try {
125
+ const pageResult = await extractBranding(pageUrl, spinner, browser, {
126
+ navigationTimeout: 90000,
127
+ darkMode: opts.darkMode,
128
+ mobile: opts.mobile,
129
+ slow: opts.slow,
130
+ });
131
+ delete pageResult._discoveredLinks;
132
+ allResults.push(pageResult);
133
+ } catch (err) {
134
+ spinner.warn(`Skipping ${pageUrl}: ${String(err?.message || err).slice(0, 80)}`);
135
+ }
136
+ }
137
+
138
+ spinner.stop();
139
+ result = mergeResults(allResults);
140
+ }
141
+ } else {
142
+ delete result._discoveredLinks;
143
+ }
144
+
77
145
  break;
78
146
  } catch (err) {
79
147
  await browser.close();
package/lib/colors.js CHANGED
@@ -159,6 +159,33 @@ export function formatOklch(oklch, alpha) {
159
159
  return `oklch(${l}% ${c} ${h})`;
160
160
  }
161
161
 
162
+ /**
163
+ * Compute CIE76 delta-E perceptual distance between two hex colors.
164
+ * Returns 0 for identical colors, ~100 for maximally different.
165
+ * @param {string} hex1 - Hex color string (e.g. "#ff0000")
166
+ * @param {string} hex2 - Hex color string
167
+ * @returns {number}
168
+ */
169
+ export function deltaE(hex1, hex2) {
170
+ function toLab(hex) {
171
+ const rgb = hexToRgb(hex);
172
+ if (!rgb) return null;
173
+ const lr = srgbToLinear(rgb.r);
174
+ const lg = srgbToLinear(rgb.g);
175
+ const lb = srgbToLinear(rgb.b);
176
+ const xyz = linearRgbToXyz(lr, lg, lb);
177
+ return xyzToLab(xyz.x, xyz.y, xyz.z);
178
+ }
179
+ const lab1 = toLab(hex1);
180
+ const lab2 = toLab(hex2);
181
+ if (!lab1 || !lab2) return 999;
182
+ return Math.sqrt(
183
+ Math.pow(lab1.l - lab2.l, 2) +
184
+ Math.pow(lab1.a - lab2.a, 2) +
185
+ Math.pow(lab1.b - lab2.b, 2)
186
+ );
187
+ }
188
+
162
189
  /**
163
190
  * Parse a hex color string and return RGB values
164
191
  * @param {string} hex - Hex color (#fff, #ffffff, #ffffffaa)
@@ -0,0 +1,247 @@
1
+ /**
2
+ * Page Discovery
3
+ *
4
+ * Discovers internal pages from a starting URL via DOM link extraction
5
+ * or sitemap.xml parsing.
6
+ */
7
+
8
+ /**
9
+ * Score a URL path for importance (higher = more important).
10
+ * Prioritizes shallow, navigational paths over blog posts / legal pages.
11
+ */
12
+ export function scoreUrl(pathname) {
13
+ const lc = pathname.toLowerCase();
14
+
15
+ // Exclude noise
16
+ const exclude = [
17
+ /\/(login|signin|sign-in|signup|sign-up|register|auth)/,
18
+ /\/(terms|privacy|legal|cookie|gdpr|tos)/,
19
+ /\/(cdn-cgi|wp-admin|wp-content|wp-json)/,
20
+ /\.(pdf|jpg|jpeg|png|gif|svg|webp|zip|tar|gz|mp4|mp3)$/,
21
+ /\/tag\//,
22
+ /\/author\//,
23
+ /\/page\/\d+/,
24
+ /\?/, // query strings (shouldn't appear here but safety)
25
+ ];
26
+ if (exclude.some(r => r.test(lc))) return -1;
27
+
28
+ let score = 100;
29
+
30
+ // Prefer shallow paths
31
+ const depth = (pathname.match(/\//g) || []).length;
32
+ score -= depth * 10;
33
+
34
+ // Boost key navigational pages
35
+ const boosts = [
36
+ [/^\/(pricing|plans?|cost)/, 30],
37
+ [/^\/(about|company|team|story)/, 25],
38
+ [/^\/(product|features?|solutions?)/, 25],
39
+ [/^\/(enterprise|business|platform)/, 20],
40
+ [/^\/(contact|demo|trial|start)/, 20],
41
+ [/^\/(docs|documentation|developers?|api)/, 15],
42
+ [/^\/(blog|resources|insights|news)/, 10],
43
+ ];
44
+ for (const [re, boost] of boosts) {
45
+ if (re.test(lc)) { score += boost; break; }
46
+ }
47
+
48
+ return score;
49
+ }
50
+
51
+ /**
52
+ * Discover internal links from an already-loaded Playwright page.
53
+ * Call after extractBranding() has loaded and scrolled the page.
54
+ *
55
+ * @param {import('playwright-core').Page} page
56
+ * @param {string} baseUrl - The starting URL (used to determine same-origin)
57
+ * @param {number} maxPages - Maximum number of URLs to return
58
+ * @returns {Promise<string[]>} Ordered list of URLs to crawl (excluding homepage)
59
+ */
60
+ export async function discoverLinks(page, baseUrl, maxPages) {
61
+ const origin = new URL(baseUrl).origin;
62
+
63
+ const links = await page.evaluate((origin) => {
64
+ const results = [];
65
+ const navSelectors = [
66
+ 'nav a', 'header a', 'footer a',
67
+ '[role="navigation"] a', '[aria-label*="nav"] a',
68
+ '[data-nav] a', '.nav a', '.navbar a', '.header a', '.menu a',
69
+ ];
70
+
71
+ // Score links by location: nav/header/footer = 2, elsewhere = 1
72
+ const scored = new Map(); // pathname → { href, locationScore }
73
+
74
+ function addLinks(selector, locationScore) {
75
+ document.querySelectorAll(selector).forEach(a => {
76
+ try {
77
+ const url = new URL(a.href);
78
+ if (url.origin !== origin) return;
79
+ const pathname = url.pathname.replace(/\/$/, '') || '/';
80
+ if (!scored.has(pathname) || scored.get(pathname).locationScore < locationScore) {
81
+ scored.set(pathname, { href: url.origin + pathname, locationScore });
82
+ }
83
+ } catch {}
84
+ });
85
+ }
86
+
87
+ navSelectors.forEach(sel => addLinks(sel, 2));
88
+ addLinks('a[href]', 1);
89
+
90
+ scored.forEach(({ href, locationScore }, pathname) => {
91
+ results.push({ href, pathname, locationScore });
92
+ });
93
+
94
+ return results;
95
+ }, origin);
96
+
97
+ // Remove homepage itself
98
+ const homepagePath = new URL(baseUrl).pathname.replace(/\/$/, '') || '/';
99
+
100
+ const scored = links
101
+ .filter(l => l.pathname !== homepagePath)
102
+ .map(l => ({ ...l, score: scoreUrl(l.pathname) + (l.locationScore * 5) }))
103
+ .filter(l => l.score >= 0)
104
+ .sort((a, b) => b.score - a.score);
105
+
106
+ return scored.slice(0, maxPages).map(l => l.href);
107
+ }
108
+
109
+ const FETCH_OPTS = {
110
+ headers: { 'User-Agent': 'Mozilla/5.0 (compatible; Dembrandt/1.0)' },
111
+ };
112
+
113
+ /**
114
+ * Discover sitemap URLs by checking robots.txt first, then common paths.
115
+ * Returns the first sitemap XML that responds successfully.
116
+ */
117
+ async function findSitemapUrls(origin) {
118
+ // 1. Check robots.txt for Sitemap: directives
119
+ try {
120
+ const res = await fetch(`${origin}/robots.txt`, {
121
+ ...FETCH_OPTS,
122
+ signal: AbortSignal.timeout(5000),
123
+ });
124
+ if (res.ok) {
125
+ const text = await res.text();
126
+ const sitemaps = [...text.matchAll(/^Sitemap:\s*(.+)$/gmi)]
127
+ .map(m => m[1].trim())
128
+ .filter(u => u.startsWith('http'));
129
+ if (sitemaps.length > 0) return sitemaps;
130
+ }
131
+ } catch {}
132
+
133
+ // 2. Fall back to common sitemap locations
134
+ return [
135
+ `${origin}/sitemap.xml`,
136
+ `${origin}/sitemap_index.xml`,
137
+ `${origin}/sitemap/sitemap-index.xml`,
138
+ ];
139
+ }
140
+
141
+ /**
142
+ * Fetch a single sitemap URL, returning its XML text or empty string.
143
+ */
144
+ async function fetchSitemap(url) {
145
+ try {
146
+ const res = await fetch(url, {
147
+ ...FETCH_OPTS,
148
+ signal: AbortSignal.timeout(10000),
149
+ });
150
+ if (!res.ok) return '';
151
+ const text = await res.text();
152
+ return text.includes('<') ? text : ''; // sanity check for XML
153
+ } catch {
154
+ return '';
155
+ }
156
+ }
157
+
158
+ /**
159
+ * Discover pages from a site's sitemap.xml.
160
+ * Checks robots.txt for Sitemap directives, then tries common paths.
161
+ * Follows sitemapindex references one level deep.
162
+ *
163
+ * @param {string} baseUrl - The starting URL (should be post-redirect)
164
+ * @param {number} maxPages - Maximum number of URLs to return
165
+ * @returns {Promise<string[]>} List of URLs from sitemap (excluding homepage)
166
+ */
167
+ export async function parseSitemap(baseUrl, maxPages) {
168
+ const base = new URL(baseUrl);
169
+
170
+ // Find and fetch sitemap(s)
171
+ const candidates = await findSitemapUrls(base.origin);
172
+ let xml = '';
173
+ for (const candidate of candidates) {
174
+ xml = await fetchSitemap(candidate);
175
+ if (xml) break;
176
+ }
177
+ if (!xml) return [];
178
+
179
+ // Also accept www/non-www variants of the same domain
180
+ const acceptedHostnames = new Set([base.hostname]);
181
+ if (base.hostname.startsWith('www.')) {
182
+ acceptedHostnames.add(base.hostname.slice(4));
183
+ } else {
184
+ acceptedHostnames.add('www.' + base.hostname);
185
+ }
186
+
187
+ // Expand sitemapindex references up to two levels deep
188
+ let allXml = xml;
189
+ if (xml.includes('<sitemapindex')) {
190
+ const extractLocs = (text) => [...text.matchAll(/<loc>\s*(https?:\/\/[^<]+)\s*<\/loc>/g)]
191
+ .map(m => m[1].trim())
192
+ .filter(u => { try { return acceptedHostnames.has(new URL(u).hostname); } catch { return false; } });
193
+
194
+ const childUrls = extractLocs(xml).slice(0, 10);
195
+ const childXmls = await Promise.all(childUrls.map(u => fetchSitemap(u)));
196
+
197
+ // Check if children are also sitemapindex (e.g. Spotify's 3-level hierarchy)
198
+ const grandchildFetches = [];
199
+ const pageXmls = [];
200
+ for (const childXml of childXmls) {
201
+ if (!childXml) continue;
202
+ if (childXml.includes('<sitemapindex')) {
203
+ const gcUrls = extractLocs(childXml).slice(0, 3);
204
+ grandchildFetches.push(...gcUrls.map(u => fetchSitemap(u)));
205
+ } else {
206
+ pageXmls.push(childXml);
207
+ }
208
+ }
209
+ if (grandchildFetches.length > 0) {
210
+ const gcXmls = await Promise.all(grandchildFetches);
211
+ pageXmls.push(...gcXmls);
212
+ }
213
+ allXml = pageXmls.join('\n');
214
+ }
215
+
216
+ const homepagePath = base.pathname.replace(/\/$/, '') || '/';
217
+
218
+ const urls = [...allXml.matchAll(/<loc>\s*(https?:\/\/[^<]+)\s*<\/loc>/g)]
219
+ .map(m => m[1].trim())
220
+ .filter(u => {
221
+ try { return acceptedHostnames.has(new URL(u).hostname); }
222
+ catch { return false; }
223
+ })
224
+ .map(u => {
225
+ const p = new URL(u);
226
+ return p.origin + (p.pathname.replace(/\/$/, '') || '/');
227
+ });
228
+
229
+ // Deduplicate
230
+ const seen = new Set();
231
+ const deduped = [];
232
+ for (const u of urls) {
233
+ const path = new URL(u).pathname;
234
+ if (path === homepagePath) continue;
235
+ if (seen.has(u)) continue;
236
+ seen.add(u);
237
+ deduped.push(u);
238
+ }
239
+
240
+ // Score and sort
241
+ const scored = deduped
242
+ .map(u => ({ href: u, score: scoreUrl(new URL(u).pathname) }))
243
+ .filter(l => l.score >= 0)
244
+ .sort((a, b) => b.score - a.score);
245
+
246
+ return scored.slice(0, maxPages).map(l => l.href);
247
+ }
package/lib/display.js CHANGED
@@ -33,6 +33,10 @@ export function displayResults(data) {
33
33
  second: '2-digit'
34
34
  });
35
35
  console.log(chalk.dim('├─') + ' ' + chalk.dim(timeString));
36
+ if (data.pages && data.pages.length > 1) {
37
+ const paths = data.pages.map(p => new URL(p.url).pathname || '/').join(', ');
38
+ console.log(chalk.dim('├─') + ' ' + chalk.dim(`${data.pages.length} pages: ${paths}`));
39
+ }
36
40
  console.log(chalk.dim('│'));
37
41
 
38
42
  displayLogo(data.logo);
package/lib/extractors.js CHANGED
@@ -7,6 +7,7 @@
7
7
 
8
8
  import chalk from "chalk";
9
9
  import { convertColor } from "./colors.js";
10
+ import { discoverLinks } from "./discovery.js";
10
11
 
11
12
  /**
12
13
  * Main extraction function - orchestrates the entire brand analysis process
@@ -157,9 +158,22 @@ export async function extractBranding(
157
158
  300 + Math.random() * 400,
158
159
  200 + Math.random() * 300
159
160
  );
160
- await page.evaluate(() => window.scrollTo(0, 400));
161
+ // Scroll through entire page to trigger lazy-loaded content
162
+ await page.evaluate(async () => {
163
+ const delay = (ms) => new Promise(r => setTimeout(r, ms));
164
+ const scrollStep = 600;
165
+ const maxHeight = Math.min(document.body.scrollHeight, 30000);
166
+ let y = 0;
167
+ while (y < maxHeight) {
168
+ y = Math.min(y + scrollStep, maxHeight);
169
+ window.scrollTo(0, y);
170
+ await delay(150 + Math.random() * 100);
171
+ }
172
+ // Scroll back to top
173
+ window.scrollTo(0, 0);
174
+ });
161
175
  spinner.stop();
162
- console.log(chalk.hex('#50FA7B')(` ✓ Human behavior simulated`));
176
+ console.log(chalk.hex('#50FA7B')(` ✓ Full page scrolled (lazy content triggered)`));
163
177
 
164
178
  // Final hydration wait
165
179
  spinner.start("Final content stabilization...");
@@ -641,6 +655,15 @@ export async function extractBranding(
641
655
  result.colors.rawColors = colors._raw || [];
642
656
  }
643
657
 
658
+ // Discover internal links for multi-page extraction
659
+ if (options.discoverLinks) {
660
+ try {
661
+ result._discoveredLinks = await discoverLinks(page, page.url(), options.discoverLinks);
662
+ } catch {
663
+ result._discoveredLinks = [];
664
+ }
665
+ }
666
+
644
667
  return result;
645
668
  } catch (error) {
646
669
  spinner.fail("Extraction failed");
package/lib/merger.js ADDED
@@ -0,0 +1,311 @@
1
+ /**
2
+ * Multi-Page Result Merger
3
+ *
4
+ * Merges extraction results from multiple pages into a single
5
+ * unified result that is a superset of the single-page result: all single-page
6
+ * fields are preserved, with additional multi-page metadata (pages array,
7
+ * pageCount on palette entries) added.
8
+ */
9
+
10
+ import { deltaE } from './colors.js';
11
+
12
+ const DELTA_E_THRESHOLD = 15;
13
+
14
+ function mergeColors(results) {
15
+ const base = results[0].colors;
16
+
17
+ // Pool all palette entries with their source page index
18
+ const allColors = [];
19
+ results.forEach((r, pageIdx) => {
20
+ (r.colors?.palette || []).forEach(c => {
21
+ allColors.push({ ...c, _pageIdx: pageIdx });
22
+ });
23
+ });
24
+
25
+ // Perceptual dedup across all pages
26
+ const merged = [];
27
+ const used = new Set();
28
+
29
+ for (let i = 0; i < allColors.length; i++) {
30
+ if (used.has(i)) continue;
31
+
32
+ const c = allColors[i];
33
+ const similar = [c];
34
+ const pagesSeen = new Set([c._pageIdx]);
35
+
36
+ for (let j = i + 1; j < allColors.length; j++) {
37
+ if (used.has(j)) continue;
38
+ try {
39
+ if (deltaE(c.normalized, allColors[j].normalized) < DELTA_E_THRESHOLD) {
40
+ similar.push(allColors[j]);
41
+ pagesSeen.add(allColors[j]._pageIdx);
42
+ used.add(j);
43
+ }
44
+ } catch { /* skip unparseable colors */ }
45
+ }
46
+ used.add(i);
47
+
48
+ // Keep variant with highest count as canonical
49
+ const best = similar.sort((a, b) => b.count - a.count)[0];
50
+ const totalCount = similar.reduce((s, x) => s + (x.count || 0), 0);
51
+ const pageCount = pagesSeen.size;
52
+
53
+ // Boost confidence when color appears on multiple pages
54
+ let confidence = best.confidence;
55
+ if (pageCount > 1 && confidence === 'low') confidence = 'medium';
56
+ if (pageCount > 2 && confidence === 'medium') confidence = 'high';
57
+
58
+ const { _pageIdx, ...clean } = best;
59
+ merged.push({ ...clean, count: totalCount, confidence, pageCount });
60
+ }
61
+
62
+ // Semantic: homepage wins, fill missing from other pages
63
+ const semantic = { ...base.semantic };
64
+ for (let i = 1; i < results.length; i++) {
65
+ const s = results[i].colors?.semantic || {};
66
+ for (const [k, v] of Object.entries(s)) {
67
+ if (!semantic[k] && v) semantic[k] = v;
68
+ }
69
+ }
70
+
71
+ // CSS variables: union, first occurrence wins
72
+ const cssVariables = {};
73
+ results.forEach(r => {
74
+ const vars = r.colors?.cssVariables || {};
75
+ for (const [k, v] of Object.entries(vars)) {
76
+ if (!(k in cssVariables)) cssVariables[k] = v;
77
+ }
78
+ });
79
+
80
+ return { ...base, semantic, palette: merged, cssVariables };
81
+ }
82
+
83
+ function mergeTypography(results) {
84
+ const base = results[0].typography || {};
85
+
86
+ // Dedup styles by (family, size, weight) tuple, sum counts
87
+ const styleMap = new Map();
88
+ results.forEach(r => {
89
+ (r.typography?.styles || []).forEach(s => {
90
+ const key = `${s.family}|${s.size}|${s.weight}`;
91
+ if (!styleMap.has(key)) {
92
+ styleMap.set(key, { ...s, count: 1 });
93
+ } else {
94
+ styleMap.get(key).count++;
95
+ }
96
+ });
97
+ });
98
+
99
+ // Merge sources
100
+ const sources = { ...(base.sources || {}) };
101
+ results.slice(1).forEach(r => {
102
+ const s = r.typography?.sources || {};
103
+ for (const [k, v] of Object.entries(s)) {
104
+ if (Array.isArray(v)) {
105
+ sources[k] = [...new Set([...(sources[k] || []), ...v])];
106
+ } else if (v && !sources[k]) {
107
+ sources[k] = v;
108
+ }
109
+ }
110
+ });
111
+
112
+ const styles = [...styleMap.values()].sort((a, b) => parseFloat(b.size) - parseFloat(a.size));
113
+ return { ...base, styles, sources };
114
+ }
115
+
116
+ /**
117
+ * Fingerprint a component by its visual properties.
118
+ * Buttons and links have states.default; badges have top-level props.
119
+ */
120
+ function fingerprintComponent(c) {
121
+ const base = c.states?.default || c;
122
+ return [
123
+ base.backgroundColor || '', base.color || '', base.borderRadius || '',
124
+ base.fontSize || '', base.fontWeight || '', base.border || ''
125
+ ].join('|');
126
+ }
127
+
128
+ function mergeComponentArray(arrays) {
129
+ const map = new Map();
130
+ arrays.flat().forEach(item => {
131
+ const key = fingerprintComponent(item);
132
+ if (!map.has(key)) {
133
+ map.set(key, { ...item, count: item.count || 1 });
134
+ } else {
135
+ map.get(key).count += (item.count || 1);
136
+ }
137
+ });
138
+ return [...map.values()].sort((a, b) => b.count - a.count);
139
+ }
140
+
141
+ /**
142
+ * Merge grouped component objects (inputs: {text,checkbox,...}, badges: {all,byVariant}).
143
+ * Preserves the grouping keys and merges each sub-array independently.
144
+ */
145
+ function mergeComponentGroups(groupObjects) {
146
+ const grouped = {};
147
+ groupObjects.forEach(obj => {
148
+ if (!obj) return;
149
+ for (const [key, val] of Object.entries(obj)) {
150
+ if (Array.isArray(val)) {
151
+ if (!grouped[key]) grouped[key] = [];
152
+ grouped[key].push(val);
153
+ } else if (typeof val === 'object' && val !== null) {
154
+ // Nested groups like byVariant: { error: [], warning: [], ... }
155
+ if (!grouped[key]) grouped[key] = {};
156
+ for (const [subKey, subArr] of Object.entries(val)) {
157
+ if (Array.isArray(subArr)) {
158
+ if (!grouped[key][subKey]) grouped[key][subKey] = [];
159
+ grouped[key][subKey].push(subArr);
160
+ }
161
+ }
162
+ }
163
+ }
164
+ });
165
+
166
+ const merged = {};
167
+ for (const [key, val] of Object.entries(grouped)) {
168
+ if (Array.isArray(val)) {
169
+ merged[key] = mergeComponentArray(val);
170
+ } else {
171
+ // Nested groups
172
+ merged[key] = {};
173
+ for (const [subKey, subArrays] of Object.entries(val)) {
174
+ merged[key][subKey] = mergeComponentArray(subArrays);
175
+ }
176
+ }
177
+ }
178
+ return merged;
179
+ }
180
+
181
+ function mergeComponents(results) {
182
+ return {
183
+ buttons: mergeComponentArray(results.map(r => r.components?.buttons || [])),
184
+ inputs: mergeComponentGroups(results.map(r => r.components?.inputs).filter(Boolean)),
185
+ links: mergeComponentArray(results.map(r => r.components?.links || [])),
186
+ badges: mergeComponentGroups(results.map(r => r.components?.badges).filter(Boolean)),
187
+ };
188
+ }
189
+
190
+ function mergeValueArrays(results, getter, valueKey = 'value') {
191
+ const map = new Map();
192
+ results.forEach(r => {
193
+ (getter(r) || []).forEach(item => {
194
+ const key = item[valueKey];
195
+ if (!map.has(key)) {
196
+ map.set(key, { ...item });
197
+ } else {
198
+ const e = map.get(key);
199
+ e.count = (e.count || 0) + (item.count || 1);
200
+ e.frequency = (e.frequency || 0) + (item.frequency || 0);
201
+ }
202
+ });
203
+ });
204
+ return [...map.values()].sort((a, b) => (b.count || b.frequency || 0) - (a.count || a.frequency || 0));
205
+ }
206
+
207
+ function mergeSpacing(results) {
208
+ const base = results[0].spacing || {};
209
+ const values = mergeValueArrays(results, r => r.spacing?.commonValues, 'px');
210
+ return { ...base, commonValues: values };
211
+ }
212
+
213
+ function mergeBorderRadius(results) {
214
+ const base = results[0].borderRadius || {};
215
+ const values = mergeValueArrays(results, r => r.borderRadius?.values);
216
+ // Recompute confidence from aggregated count
217
+ for (const v of values) {
218
+ if (v.count > 10) v.confidence = 'high';
219
+ else if (v.count > 3) v.confidence = 'medium';
220
+ }
221
+ return { ...base, values };
222
+ }
223
+
224
+ function mergeBorders(results) {
225
+ const map = new Map();
226
+ results.forEach(r => {
227
+ (r.borders?.combinations || []).forEach(item => {
228
+ const key = `${item.width}|${item.style}|${item.color}`;
229
+ if (!map.has(key)) {
230
+ map.set(key, { ...item, elements: [...(item.elements || [])] });
231
+ } else {
232
+ const e = map.get(key);
233
+ e.count += (item.count || 1);
234
+ const elementSet = new Set([...(e.elements || []), ...(item.elements || [])]);
235
+ e.elements = [...elementSet].slice(0, 5);
236
+ if (e.count > 10) e.confidence = 'high';
237
+ else if (e.count > 3) e.confidence = 'medium';
238
+ }
239
+ });
240
+ });
241
+ return {
242
+ combinations: [...map.values()].sort((a, b) => b.count - a.count),
243
+ };
244
+ }
245
+
246
+ function mergeShadows(results) {
247
+ const map = new Map();
248
+ results.forEach(r => {
249
+ (r.shadows || []).forEach(s => {
250
+ const key = s.shadow || s.value || JSON.stringify(s);
251
+ if (!map.has(key)) {
252
+ map.set(key, { ...s, count: s.count || 1 });
253
+ } else {
254
+ map.get(key).count += (s.count || 1);
255
+ }
256
+ });
257
+ });
258
+ // Recompute confidence from aggregated count
259
+ for (const s of map.values()) {
260
+ if (s.count > 10) s.confidence = 'high';
261
+ else if (s.count > 3) s.confidence = 'medium';
262
+ }
263
+ return [...map.values()].sort((a, b) => b.count - a.count);
264
+ }
265
+
266
+ function mergeByName(results, getter) {
267
+ const seen = new Set();
268
+ const out = [];
269
+ results.forEach(r => {
270
+ (getter(r) || []).forEach(item => {
271
+ const key = item.name || item.library || JSON.stringify(item);
272
+ if (!seen.has(key)) { seen.add(key); out.push(item); }
273
+ });
274
+ });
275
+ return out;
276
+ }
277
+
278
+ /**
279
+ * Merge an array of per-page result objects into a single unified result.
280
+ * @param {Object[]} results - Array of extractBranding() result objects
281
+ * @returns {Object} Merged result with same shape as single-page result
282
+ */
283
+ export function mergeResults(results) {
284
+ if (results.length === 0) throw new Error('No results to merge');
285
+ if (results.length === 1) return results[0];
286
+
287
+ const home = results[0];
288
+
289
+ return {
290
+ url: home.url,
291
+ extractedAt: home.extractedAt,
292
+ siteName: home.siteName,
293
+ logo: home.logo,
294
+ favicons: home.favicons,
295
+ colors: mergeColors(results),
296
+ typography: mergeTypography(results),
297
+ spacing: mergeSpacing(results),
298
+ borderRadius: mergeBorderRadius(results),
299
+ borders: mergeBorders(results),
300
+ shadows: mergeShadows(results),
301
+ components: mergeComponents(results),
302
+ breakpoints: [
303
+ ...new Map(
304
+ results.flatMap(r => r.breakpoints || []).map(b => [b.px, b])
305
+ ).values()
306
+ ].sort((a, b) => parseInt(b.px) - parseInt(a.px)),
307
+ iconSystem: mergeByName(results, r => r.iconSystem),
308
+ frameworks: mergeByName(results, r => r.frameworks),
309
+ pages: results.map(r => ({ url: r.url, extractedAt: r.extractedAt })),
310
+ };
311
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dembrandt",
3
- "version": "0.7.2",
3
+ "version": "0.8.0",
4
4
  "description": "Extract design tokens and brand assets from any website",
5
5
  "main": "index.js",
6
6
  "type": "module",