dembrandt 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 thevangelist
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,378 @@
1
+ # šŸŽØ Dembrandt
2
+
3
+ [![npm version](https://img.shields.io/npm/v/dembrandt.svg)](https://www.npmjs.com/package/dembrandt)
4
+ [![npm downloads](https://img.shields.io/npm/dm/dembrandt.svg)](https://www.npmjs.com/package/dembrandt)
5
+ [![license](https://img.shields.io/npm/l/dembrandt.svg)](https://github.com/thevangelist/dembrandt/blob/main/LICENSE)
6
+
7
+ A CLI tool for extracting design tokens and brand assets from any website. Powered by Playwright with advanced bot detection avoidance.
8
+
9
+ ![Dembrandt Demo](showcase.gif)
10
+
11
+ ## Quick Start
12
+
13
+ ```bash
14
+ npx dembrandt stripe.com
15
+ ```
16
+
17
+ No installation required! Extract design tokens from any website in seconds. Or install globally with `npm install -g dembrandt`.
18
+
19
+ ## What It Does
20
+
21
+ Dembrandt analyzes live websites and extracts their complete design system:
22
+
23
+ - **Logo** — Logo detection (img/svg) with dimensions and source URL
24
+ - **Colors** — Semantic colors, color palette with confidence scoring, CSS variables
25
+ - **Typography** — Font families, sizes, weights, line heights, font sources (Google Fonts, Adobe Fonts, custom)
26
+ - **Spacing** — Margin and padding scales with grid system detection (4px/8px/custom)
27
+ - **Border Radius** — Corner radius patterns with usage frequency
28
+ - **Shadows** — Box shadow values for elevation systems
29
+ - **Buttons** — Component styles with variants and states
30
+ - **Inputs** — Form field styles (input, textarea, select)
31
+ - **Breakpoints** — Responsive design breakpoints from media queries
32
+ - **Icons** — Icon system detection (Font Awesome, Material Icons, SVG)
33
+ - **Frameworks** — CSS framework detection (Tailwind, Bootstrap, Material-UI, Chakra)
34
+
35
+ Perfect for competitive analysis, brand audits, or rebuilding a brand when you don't have design guidelines.
36
+
37
+ ## Why It Matters
38
+
39
+ **Designers** — Analyze competitor systems, document production tokens, audit brand consistency
40
+
41
+ **Developers** — Migrate design tokens, reverse engineer components, validate implementations
42
+
43
+ **Product Managers** — Track competitor evolution, quantify design debt, evaluate vendors
44
+
45
+ **Marketing** — Audit competitor brands, plan rebrands, monitor brand compliance
46
+
47
+ **Engineering Leaders** — Measure technical debt, plan migrations, assess acquisition targets
48
+
49
+ ## How It Works
50
+
51
+ Uses Playwright to render the page, extracts computed styles from the DOM, analyzes color usage and confidence, groups similar typography, detects spacing patterns, and returns actionable design tokens.
52
+
53
+ ### Extraction Process
54
+
55
+ 1. **Browser Launch** - Launches Chromium with stealth configuration
56
+ 2. **Anti-Detection** - Injects scripts to bypass bot detection
57
+ 3. **Navigation** - Navigates to target URL with retry logic
58
+ 4. **Hydration** - Waits for SPAs to fully load (8s initial + 4s stabilization)
59
+ 5. **Content Validation** - Verifies page content is substantial (>500 chars)
60
+ 6. **Parallel Extraction** - Runs all extractors concurrently for speed
61
+ 7. **Analysis** - Analyzes computed styles, DOM structure, and CSS variables
62
+ 8. **Scoring** - Assigns confidence scores based on context and usage
63
+
64
+ ### Color Confidence
65
+
66
+ - **High** — Logo, brand elements, primary buttons
67
+ - **Medium** — Interactive elements, icons, navigation
68
+ - **Low** — Generic UI components (filtered from display)
69
+
70
+ Only shows high and medium confidence colors in terminal. Full palette in JSON.
71
+
72
+ ### Typography Detection
73
+
74
+ Samples all heading levels (h1-h6), body text, buttons, links. Groups by font family, size, and weight. Detects Google Fonts, Adobe Fonts, custom @font-face.
75
+
76
+ ### Framework Detection
77
+
78
+ Recognizes Tailwind CSS, Bootstrap, Material-UI, and others by class patterns and CDN links.
79
+
80
+ ## Installation
81
+
82
+ ### Using npx (Recommended)
83
+
84
+ No installation needed! Run directly with `npx`:
85
+
86
+ ```bash
87
+ npx dembrandt stripe.com
88
+ ```
89
+
90
+ The first run will automatically install Chromium (~170MB).
91
+
92
+ ### Global Installation
93
+
94
+ Install globally for repeated use:
95
+
96
+ ```bash
97
+ npm install -g dembrandt
98
+ dembrandt stripe.com
99
+ ```
100
+
101
+ ### Prerequisites
102
+
103
+ - Node.js 18 or higher
104
+
105
+ ### Development Setup
106
+
107
+ For contributors who want to work on dembrandt:
108
+
109
+ ```bash
110
+ git clone https://github.com/thevangelist/dembrandt.git
111
+ cd dembrandt
112
+ npm install
113
+ npm link
114
+ ```
115
+
116
+ ## Usage
117
+
118
+ ### Basic Usage
119
+
120
+ ```bash
121
+ # Using npx (no installation)
122
+ npx dembrandt <url>
123
+
124
+ # Or if installed globally
125
+ dembrandt <url>
126
+
127
+ # Examples
128
+ dembrandt stripe.com
129
+ dembrandt https://github.com
130
+ dembrandt tailwindcss.com
131
+ ```
132
+
133
+ ### Options
134
+
135
+ **`--json-only`** - Output raw JSON to stdout instead of formatted terminal display
136
+
137
+ ```bash
138
+ dembrandt stripe.com --json-only > tokens.json
139
+ ```
140
+
141
+ Note: JSON is automatically saved to `output/domain.com/` regardless of this flag.
142
+
143
+ **`-d, --debug`** - Run with visible browser and detailed logs
144
+
145
+ ```bash
146
+ dembrandt stripe.com --debug
147
+ ```
148
+
149
+ Useful for troubleshooting bot detection, timeouts, or extraction issues.
150
+
151
+ ## Output
152
+
153
+ ### Automatic JSON Saves
154
+
155
+ Every extraction is automatically saved to `output/domain.com/YYYY-MM-DDTHH-MM-SS.json` with:
156
+
157
+ - Complete design token data
158
+ - Timestamped for version tracking
159
+ - Organized by domain
160
+
161
+ Example: `output/stripe.com/2025-11-22T14-30-45.json`
162
+
163
+ ### Terminal Output
164
+
165
+ Clean, formatted tables showing:
166
+
167
+ - Color palette with confidence ratings (with visual swatches)
168
+ - CSS variables with color previews
169
+ - Typography hierarchy with context
170
+ - Spacing scale (4px/8px grid detection)
171
+ - Shadow system
172
+ - Button variants
173
+ - Component style breakdowns
174
+ - Framework and icon system detection
175
+
176
+ ### JSON Output Format
177
+
178
+ Complete extraction data for programmatic use:
179
+
180
+ ```json
181
+ {
182
+ "url": "https://example.com",
183
+ "extractedAt": "2025-11-22T...",
184
+ "logo": { "source": "img", "url": "...", "width": 120, "height": 40 },
185
+ "colors": {
186
+ "semantic": { "primary": "#3b82f6", ... },
187
+ "palette": [{ "color": "#3b82f6", "confidence": "high", "count": 45, "sources": [...] }],
188
+ "cssVariables": { "--color-primary": "#3b82f6", ... }
189
+ },
190
+ "typography": {
191
+ "styles": [{ "fontFamily": "Inter", "fontSize": "16px", "fontWeight": "400", ... }],
192
+ "sources": { "googleFonts": [...], "adobeFonts": false, "customFonts": [...] }
193
+ },
194
+ "spacing": { "scaleType": "8px", "commonValues": [{ "px": "16px", "rem": "1rem", "count": 42 }, ...] },
195
+ "borderRadius": { "values": [{ "value": "8px", "count": 15, "confidence": "high" }, ...] },
196
+ "shadows": [{ "shadow": "0 2px 4px rgba(0,0,0,0.1)", "count": 8, "confidence": "high" }, ...],
197
+ "components": {
198
+ "buttons": [{ "backgroundColor": "...", "color": "...", "padding": "...", ... }],
199
+ "inputs": [{ "type": "input", "border": "...", "borderRadius": "...", ... }]
200
+ },
201
+ "breakpoints": [{ "px": "768px" }, ...],
202
+ "iconSystem": [{ "name": "Font Awesome", "type": "icon-font" }, ...],
203
+ "frameworks": [{ "name": "Tailwind CSS", "confidence": "high", "evidence": "class patterns" }]
204
+ }
205
+ ```
206
+
207
+ ## Examples
208
+
209
+ ### Extract Design Tokens
210
+
211
+ ```bash
212
+ # Analyze a single site (auto-saves JSON to output/stripe.com/)
213
+ dembrandt stripe.com
214
+
215
+ # View saved JSON files
216
+ ls output/stripe.com/
217
+
218
+ # Output to stdout for piping
219
+ dembrandt stripe.com --json-only | jq '.colors.semantic'
220
+
221
+ # Debug mode for difficult sites
222
+ dembrandt example.com --debug
223
+ ```
224
+
225
+ ### Compare Competitors
226
+
227
+ ```bash
228
+ # Extract tokens from multiple competitors (auto-saved to output/)
229
+ for site in stripe.com square.com paypal.com; do
230
+ dembrandt $site
231
+ done
232
+
233
+ # Compare color palettes from most recent extractions
234
+ jq '.colors.palette[] | select(.confidence=="high")' output/stripe.com/2025-11-22T*.json output/square.com/2025-11-22T*.json
235
+
236
+ # Compare semantic colors across competitors
237
+ jq '.colors.semantic' output/*/2025-11-22T*.json
238
+ ```
239
+
240
+ ### Integration with Design Tools
241
+
242
+ ```bash
243
+ # Extract and convert to custom config format
244
+ dembrandt mysite.com --json-only | jq '{
245
+ colors: .colors.semantic,
246
+ fontFamily: .typography.sources,
247
+ spacing: .spacing.commonValues
248
+ }' > design-tokens.json
249
+ ```
250
+
251
+ ## Use Cases
252
+
253
+ ### Brand Audits
254
+
255
+ Extract and document your company's current design system from production websites.
256
+
257
+ ### Competitive Analysis
258
+
259
+ Compare design systems across competitors to identify trends and opportunities.
260
+
261
+ ### Design System Migration
262
+
263
+ Document legacy design tokens before migrating to a new system.
264
+
265
+ ### Reverse Engineering
266
+
267
+ Rebuild a brand when original design guidelines are unavailable.
268
+
269
+ ### Quality Assurance
270
+
271
+ Verify design consistency across different pages and environments.
272
+
273
+ ## Advanced Features
274
+
275
+ ### Bot Detection Avoidance
276
+
277
+ - Stealth mode with anti-detection scripts
278
+ - Automatic fallback to visible browser on detection
279
+ - Human-like interaction simulation (mouse movement, scrolling)
280
+ - Custom user agent and browser fingerprinting
281
+
282
+ ### Smart Retry Logic
283
+
284
+ - Automatic retry on navigation failures (up to 2 attempts)
285
+ - SPA hydration detection and waiting
286
+ - Content validation to ensure page is fully loaded
287
+ - Detailed progress logging at each step
288
+
289
+ ### Comprehensive Logging
290
+
291
+ - Real-time spinner with step-by-step progress
292
+ - Detailed extraction metrics (colors found, styles detected, etc.)
293
+ - Error context with URL, stage, and attempt information
294
+ - Debug mode with full stack traces
295
+
296
+ ## Troubleshooting
297
+
298
+ ### Bot Detection Issues
299
+
300
+ If you encounter timeouts or network errors:
301
+
302
+ ```bash
303
+ dembrandt example.com --debug
304
+ ```
305
+
306
+ This will automatically retry with a visible browser.
307
+
308
+ ### Page Not Loading
309
+
310
+ Some sites require longer load times. The tool waits 8 seconds for SPA hydration, but you can modify this in the source.
311
+
312
+ ### Empty Content
313
+
314
+ If content length is < 500 chars, the tool will automatically retry (up to 2 attempts).
315
+
316
+ ### Debug Mode
317
+
318
+ Use `--debug` to see:
319
+
320
+ - Browser launch confirmation
321
+ - Step-by-step progress logs
322
+ - Full error stack traces
323
+ - Extraction metrics
324
+
325
+ ## Limitations
326
+
327
+ - Captures default/light theme only (dark mode not detected)
328
+ - Hover/focus states extracted from CSS (not fully interactive)
329
+ - Canvas/WebGL-rendered sites cannot be analyzed (e.g., Tesla, Apple Vision Pro demos)
330
+ - JavaScript-heavy sites require hydration time (8s initial + 4s stabilization)
331
+ - Some dynamically-loaded content may be missed
332
+ - Requires viewport simulation at 1920x1080
333
+
334
+ ## Architecture
335
+
336
+ ```
337
+ dembrandt/
338
+ ā”œā”€ā”€ index.js # CLI entry point, command handling
339
+ ā”œā”€ā”€ lib/
340
+ │ ā”œā”€ā”€ extractors.js # Core extraction logic with stealth mode
341
+ │ └── display.js # Terminal output formatting
342
+ ā”œā”€ā”€ output/ # Auto-saved JSON extractions (gitignored)
343
+ │ ā”œā”€ā”€ stripe.com/
344
+ │ │ ā”œā”€ā”€ 2025-11-22T14-30-45.json
345
+ │ │ └── 2025-11-22T15-12-33.json
346
+ │ └── github.com/
347
+ │ └── 2025-11-22T14-35-12.json
348
+ ā”œā”€ā”€ package.json
349
+ └── README.md
350
+ ```
351
+
352
+ ## Ethics & Legality
353
+
354
+ Dembrandt extracts publicly available design information (colors, fonts, spacing) from website DOMs for analysis purposes. This falls under fair use in most jurisdictions (USA's DMCA § 1201(f), EU Software Directive 2009/24/EC) when used for competitive analysis, documentation, or learning.
355
+
356
+ **Legal:** Analyzing public HTML/CSS is generally legal. Does not bypass protections or violate copyright. Check site ToS before mass extraction.
357
+
358
+ **Ethical:** Use for inspiration and analysis, not direct copying. Respect servers (no mass crawling), give credit to sources, be transparent about data origin.
359
+
360
+ ## Contributing
361
+
362
+ Issues and pull requests welcome. Please include:
363
+
364
+ - Clear description of the issue/feature
365
+ - Example URLs that demonstrate the problem
366
+ - Expected vs actual behavior
367
+
368
+ ## License
369
+
370
+ MIT
371
+
372
+ ## Roadmap
373
+
374
+ - [ ] Dark mode detection and extraction
375
+ - [ ] Animation/transition detection
376
+ - [ ] Interactive state capture (hover, focus, active)
377
+ - [ ] Multi-page analysis
378
+ - [ ] Configuration file support
package/index.js ADDED
@@ -0,0 +1,151 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * Dembrandt - Design Token Extraction CLI
5
+ *
6
+ * Extracts design tokens, brand colors, typography, spacing, and component styles
7
+ * from any website using Playwright with advanced bot detection avoidance.
8
+ */
9
+
10
+ import { program } from "commander";
11
+ import chalk from "chalk";
12
+ import ora from "ora";
13
+ import { chromium } from "playwright";
14
+ import { extractBranding } from "./lib/extractors.js";
15
+ import { displayResults } from "./lib/display.js";
16
+ import { writeFileSync, mkdirSync } from "fs";
17
+ import { join, dirname } from "path";
18
+ import { fileURLToPath } from "url";
19
+
20
+ const __dirname = dirname(fileURLToPath(import.meta.url));
21
+
22
+ program
23
+ .name("dembrandt")
24
+ .description("Extract design tokens from any website")
25
+ .version("1.0.0")
26
+ .argument("<url>")
27
+ .option("--json-only", "Output raw JSON")
28
+ .option("-d, --debug", "Force visible browser")
29
+ .action(async (input, opts) => {
30
+ let url = input;
31
+ if (!url.match(/^https?:\/\//)) url = "https://" + url;
32
+
33
+ const spinner = ora("Starting extraction...").start();
34
+ let browser = null;
35
+
36
+ try {
37
+ let useHeaded = opts.debug;
38
+ let result;
39
+
40
+ while (true) {
41
+ spinner.text = `Launching browser (${
42
+ useHeaded ? "visible" : "headless"
43
+ } mode)`;
44
+ browser = await chromium.launch({
45
+ headless: !useHeaded,
46
+ args: [
47
+ "--no-sandbox",
48
+ "--disable-setuid-sandbox",
49
+ "--disable-blink-features=AutomationControlled",
50
+ ],
51
+ });
52
+ if (opts.debug) {
53
+ console.log(
54
+ chalk.dim(
55
+ ` āœ“ Browser launched in ${
56
+ useHeaded ? "visible" : "headless"
57
+ } mode`
58
+ )
59
+ );
60
+ }
61
+
62
+ try {
63
+ result = await extractBranding(url, spinner, browser, {
64
+ navigationTimeout: 90000,
65
+ });
66
+ break;
67
+ } catch (err) {
68
+ await browser.close();
69
+ browser = null;
70
+
71
+ if (useHeaded) throw err;
72
+
73
+ if (
74
+ err.message.includes("Timeout") ||
75
+ err.message.includes("net::ERR_")
76
+ ) {
77
+ spinner.warn(
78
+ "Bot detection detected → retrying with visible browser"
79
+ );
80
+ console.error(chalk.dim(` ↳ Error: ${err.message}`));
81
+ console.error(chalk.dim(` ↳ URL: ${url}`));
82
+ console.error(chalk.dim(` ↳ Mode: headless`));
83
+ useHeaded = true;
84
+ continue;
85
+ }
86
+ throw err;
87
+ }
88
+ }
89
+
90
+ spinner.succeed("Done!");
91
+
92
+ // Save JSON output automatically (unless --json-only)
93
+ if (!opts.jsonOnly) {
94
+ try {
95
+ const domain = new URL(url).hostname.replace("www.", "");
96
+ const timestamp = new Date()
97
+ .toISOString()
98
+ .replace(/[:.]/g, "-")
99
+ .split(".")[0];
100
+ const outputDir = join(__dirname, "output", domain);
101
+ mkdirSync(outputDir, { recursive: true });
102
+
103
+ const filename = `${timestamp}.json`;
104
+ const filepath = join(outputDir, filename);
105
+ writeFileSync(filepath, JSON.stringify(result, null, 2));
106
+
107
+ console.log(
108
+ chalk.dim(
109
+ `\nšŸ’¾ JSON saved to: ${chalk.cyan(
110
+ `output/${domain}/${filename}`
111
+ )}`
112
+ )
113
+ );
114
+ } catch (err) {
115
+ console.log(
116
+ chalk.yellow(`⚠ Could not save JSON file: ${err.message}`)
117
+ );
118
+ }
119
+ }
120
+
121
+ // Output to terminal
122
+ if (opts.jsonOnly) {
123
+ console.log(JSON.stringify(result, null, 2));
124
+ } else {
125
+ displayResults(result);
126
+ }
127
+ } catch (err) {
128
+ spinner.fail("Failed");
129
+ console.error(chalk.red("\nāœ— Extraction failed"));
130
+ console.error(chalk.red(` Error: ${err.message}`));
131
+ console.error(chalk.dim(` URL: ${url}`));
132
+
133
+ if (opts.debug && err.stack) {
134
+ console.error(chalk.dim("\nStack trace:"));
135
+ console.error(chalk.dim(err.stack));
136
+ }
137
+
138
+ if (!opts.debug) {
139
+ console.log(
140
+ chalk.yellow(
141
+ "\nTip: Try with --debug flag for tough sites and detailed error logs"
142
+ )
143
+ );
144
+ }
145
+ process.exit(1);
146
+ } finally {
147
+ if (browser) await browser.close();
148
+ }
149
+ });
150
+
151
+ program.parse();