dembrandt 0.10.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -68
- package/index.js +32 -9
- package/lib/extractors.js +18 -12
- package/lib/robots.js +101 -0
- package/mcp-server.js +1 -1
- package/package.json +5 -8
package/README.md
CHANGED
|
@@ -4,37 +4,25 @@
|
|
|
4
4
|
[](https://www.npmjs.com/package/dembrandt)
|
|
5
5
|
[](https://github.com/dembrandt/dembrandt/blob/main/LICENSE)
|
|
6
6
|
|
|
7
|
-
Extract
|
|
7
|
+
Extract a website's design system into design tokens in a few seconds: logo, colors, typography, borders, and more. One command.
|
|
8
8
|
|
|
9
|
-

|
|
14
|
-
|
|
15
|
-
**Brand Guide PDF**
|
|
16
|
-
|
|
17
|
-

|
|
18
|
-
|
|
19
|
-
**Local UI**
|
|
20
|
-
|
|
21
|
-

|
|
9
|
+

|
|
22
10
|
|
|
23
11
|
## Install
|
|
24
12
|
|
|
25
13
|
Install globally: `npm install -g dembrandt`
|
|
26
14
|
|
|
27
15
|
```bash
|
|
28
|
-
dembrandt
|
|
16
|
+
dembrandt example.com
|
|
29
17
|
```
|
|
30
18
|
|
|
31
|
-
Or use npx without installing: `npx dembrandt
|
|
19
|
+
Or use npx without installing: `npx dembrandt example.com`
|
|
32
20
|
|
|
33
21
|
Requires Node.js 18+
|
|
34
22
|
|
|
35
23
|
## AI Agent Integration (MCP)
|
|
36
24
|
|
|
37
|
-
Use Dembrandt as a tool in Claude Code, Cursor, Windsurf, or any MCP-compatible client. Ask your agent to "extract the color palette from
|
|
25
|
+
Use Dembrandt as a tool in Claude Code, Cursor, Windsurf, or any MCP-compatible client. Ask your agent to "extract the color palette from example.com" and it calls Dembrandt automatically.
|
|
38
26
|
|
|
39
27
|
```bash
|
|
40
28
|
claude mcp add --transport stdio dembrandt -- npx -y dembrandt-mcp
|
|
@@ -69,44 +57,44 @@ Or add to your project's `.mcp.json`:
|
|
|
69
57
|
## Usage
|
|
70
58
|
|
|
71
59
|
```bash
|
|
72
|
-
dembrandt <url>
|
|
73
|
-
dembrandt
|
|
74
|
-
dembrandt
|
|
75
|
-
dembrandt
|
|
76
|
-
dembrandt
|
|
77
|
-
dembrandt
|
|
78
|
-
dembrandt
|
|
79
|
-
dembrandt
|
|
80
|
-
dembrandt
|
|
81
|
-
dembrandt
|
|
82
|
-
dembrandt
|
|
83
|
-
dembrandt
|
|
84
|
-
dembrandt
|
|
85
|
-
dembrandt
|
|
60
|
+
dembrandt <url> # Basic extraction (terminal display only)
|
|
61
|
+
dembrandt example.com --json-only # Output raw JSON to terminal (no formatted display, no file save)
|
|
62
|
+
dembrandt example.com --save-output # Save JSON to output/example.com/YYYY-MM-DDTHH-MM-SS.json
|
|
63
|
+
dembrandt example.com --dtcg # Export in W3C Design Tokens (DTCG) format (auto-saves as .tokens.json)
|
|
64
|
+
dembrandt example.com --dark-mode # Extract colors from dark mode variant
|
|
65
|
+
dembrandt example.com --mobile # Use mobile viewport (390x844) for responsive analysis
|
|
66
|
+
dembrandt example.com --slow # 3x longer timeouts (24s hydration) for JavaScript-heavy sites
|
|
67
|
+
dembrandt example.com --brand-guide # Generate a brand guide PDF
|
|
68
|
+
dembrandt example.com --design-md # Generate a DESIGN.md file for AI agents
|
|
69
|
+
dembrandt example.com --pages 5 # Analyze 5 pages (homepage + 4 discovered pages), merges results
|
|
70
|
+
dembrandt example.com --sitemap # Discover pages from sitemap.xml instead of DOM links
|
|
71
|
+
dembrandt example.com --pages 10 --sitemap # Combine: up to 10 pages discovered via sitemap
|
|
72
|
+
dembrandt example.com --no-sandbox # Disable Chromium sandbox (required for Docker/CI)
|
|
73
|
+
dembrandt example.com --browser=firefox # Use Firefox instead of Chromium (better for Cloudflare bypass)
|
|
86
74
|
```
|
|
87
75
|
|
|
88
76
|
Default: formatted terminal display only. Use `--save-output` to persist results as JSON files. Browser automatically retries in visible mode if headless extraction fails.
|
|
89
77
|
|
|
90
78
|
### Multi-Page Extraction
|
|
91
79
|
|
|
92
|
-
Analyze multiple pages to get a more complete picture of a site's design system. Results are merged into a single unified output with cross-page confidence boosting
|
|
80
|
+
Analyze multiple pages to get a more complete picture of a site's design system. Results are merged into a single unified output with cross-page confidence boosting: tokens appearing on multiple pages get higher confidence scores.
|
|
93
81
|
|
|
94
82
|
```bash
|
|
95
83
|
# Analyze homepage + 4 auto-discovered pages (default: 5 total)
|
|
96
|
-
dembrandt
|
|
84
|
+
dembrandt example.com --pages 5
|
|
97
85
|
|
|
98
86
|
# Use sitemap.xml for page discovery instead of DOM link scraping
|
|
99
|
-
dembrandt
|
|
87
|
+
dembrandt example.com --sitemap
|
|
100
88
|
|
|
101
89
|
# Combine both: up to 10 pages from sitemap
|
|
102
|
-
dembrandt
|
|
90
|
+
dembrandt example.com --pages 10 --sitemap
|
|
103
91
|
```
|
|
104
92
|
|
|
105
93
|
**Page discovery** works two ways:
|
|
106
|
-
- **DOM links** (default):
|
|
94
|
+
- **DOM links** (default): Reads navigation, header, and footer links from the homepage, prioritizing key pages like /pricing, /about, /features
|
|
107
95
|
- **Sitemap** (`--sitemap`): Parses sitemap.xml (checks robots.txt first), follows sitemapindex references, and scores URLs by importance
|
|
108
96
|
|
|
109
|
-
Pages are
|
|
97
|
+
Pages are fetched sequentially with polite delays. Failed pages are skipped without aborting the run.
|
|
110
98
|
|
|
111
99
|
### Browser Selection
|
|
112
100
|
|
|
@@ -114,10 +102,10 @@ By default, dembrandt uses Chromium. If you encounter bot detection or timeouts
|
|
|
114
102
|
|
|
115
103
|
```bash
|
|
116
104
|
# Use Firefox instead of Chromium
|
|
117
|
-
dembrandt
|
|
105
|
+
dembrandt example.com --browser=firefox
|
|
118
106
|
|
|
119
107
|
# Combine with other flags
|
|
120
|
-
dembrandt
|
|
108
|
+
dembrandt example.com --browser=firefox --save-output --dtcg
|
|
121
109
|
```
|
|
122
110
|
|
|
123
111
|
**When to use Firefox:**
|
|
@@ -137,24 +125,33 @@ npx playwright install firefox
|
|
|
137
125
|
Use `--dtcg` to export in the standardized [W3C Design Tokens Community Group](https://www.designtokens.org/) format:
|
|
138
126
|
|
|
139
127
|
```bash
|
|
140
|
-
dembrandt
|
|
141
|
-
# Saves to: output/
|
|
128
|
+
dembrandt example.com --dtcg
|
|
129
|
+
# Saves to: output/example.com/TIMESTAMP.tokens.json
|
|
142
130
|
```
|
|
143
131
|
|
|
144
132
|
The DTCG format is an industry-standard JSON schema that can be consumed by design tools and token transformation libraries like [Style Dictionary](https://styledictionary.com).
|
|
145
133
|
|
|
146
134
|
### DESIGN.md
|
|
147
135
|
|
|
148
|
-
Use `--design-md` to generate a [DESIGN.md](https://stitch.withgoogle.com/docs/design-md) file
|
|
136
|
+
Use `--design-md` to generate a [DESIGN.md](https://stitch.withgoogle.com/docs/design-md) file, a plain-text design system document readable by AI agents.
|
|
149
137
|
|
|
150
138
|
```bash
|
|
151
|
-
dembrandt
|
|
152
|
-
# Saves to: output/
|
|
139
|
+
dembrandt example.com --design-md
|
|
140
|
+
# Saves to: output/example.com/DESIGN.md
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
### Brand Guide PDF
|
|
144
|
+
|
|
145
|
+
Use `--brand-guide` to generate a printable PDF summarizing the extracted design system: colors, typography, components, and logo on a single document.
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
dembrandt example.com --brand-guide
|
|
149
|
+
# Saves to: output/example.com/TIMESTAMP.brand-guide.pdf
|
|
153
150
|
```
|
|
154
151
|
|
|
155
152
|
## Local UI
|
|
156
153
|
|
|
157
|
-
Browse your
|
|
154
|
+
Browse your extractions in a visual interface.
|
|
158
155
|
|
|
159
156
|
### Setup
|
|
160
157
|
|
|
@@ -173,26 +170,26 @@ Opens http://localhost:5173 with API on port 3002.
|
|
|
173
170
|
|
|
174
171
|
### Features
|
|
175
172
|
|
|
176
|
-
- Visual grid of all
|
|
173
|
+
- Visual grid of all extractions
|
|
177
174
|
- Color palettes with click-to-copy
|
|
178
175
|
- Typography specimens
|
|
179
176
|
- Spacing, shadows, border radius visualization
|
|
180
177
|
- Button and link component previews
|
|
181
178
|
- Dark/light theme toggle
|
|
182
|
-
- Section nav links on extraction pages
|
|
179
|
+
- Section nav links on extraction pages, jump directly to Colors, Typography, Shadows, etc. via a sticky sidebar
|
|
183
180
|
|
|
184
181
|
Extractions are performed via CLI (`dembrandt <url> --save-output`) and automatically appear in the UI.
|
|
185
182
|
|
|
186
183
|
## Use Cases
|
|
187
184
|
|
|
188
|
-
- Brand audits & competitive analysis
|
|
189
185
|
- Design system documentation
|
|
190
|
-
-
|
|
191
|
-
-
|
|
186
|
+
- Multi-site design consolidation
|
|
187
|
+
- Internal design audits on your own properties
|
|
188
|
+
- Learning how design tokens map to real CSS
|
|
192
189
|
|
|
193
190
|
## How It Works
|
|
194
191
|
|
|
195
|
-
Uses Playwright to render the page,
|
|
192
|
+
Uses Playwright to render the page, reads computed styles from the DOM, analyzes color usage and confidence, groups similar typography, detects spacing patterns, and returns design tokens.
|
|
196
193
|
|
|
197
194
|
### Extraction Process
|
|
198
195
|
|
|
@@ -207,38 +204,34 @@ Uses Playwright to render the page, extracts computed styles from the DOM, analy
|
|
|
207
204
|
|
|
208
205
|
### Color Confidence
|
|
209
206
|
|
|
210
|
-
- High
|
|
211
|
-
- Medium
|
|
212
|
-
- Low
|
|
207
|
+
- High: Logo, primary interactive elements
|
|
208
|
+
- Medium: Secondary interactive elements, icons, navigation
|
|
209
|
+
- Low: Generic UI components (filtered from display)
|
|
213
210
|
- Only shows high and medium confidence colors in terminal. Full palette in JSON.
|
|
214
211
|
|
|
215
212
|
## Limitations
|
|
216
213
|
|
|
217
|
-
- Dark mode requires
|
|
214
|
+
- Dark mode requires `--dark-mode` flag (not automatically detected)
|
|
218
215
|
- Hover/focus states extracted from CSS (not fully interactive)
|
|
219
|
-
- Canvas/WebGL-rendered sites cannot be analyzed (
|
|
216
|
+
- Canvas/WebGL-rendered sites cannot be analyzed (no DOM to read)
|
|
220
217
|
- JavaScript-heavy sites require hydration time (8s initial + 4s stabilization)
|
|
221
218
|
- Some dynamically-loaded content may be missed
|
|
222
|
-
- Default viewport is 1920x1080 (use
|
|
219
|
+
- Default viewport is 1920x1080 (use `--mobile` for 390x844 mobile viewport)
|
|
223
220
|
|
|
224
|
-
##
|
|
221
|
+
## Intended Use
|
|
225
222
|
|
|
226
|
-
Dembrandt
|
|
223
|
+
Dembrandt reads publicly available CSS and computed styles from website DOMs for documentation, learning, and analysis of design systems you own or have permission to analyze.
|
|
227
224
|
|
|
228
|
-
|
|
225
|
+
Only run Dembrandt against sites whose Terms of Service permit automated access, or against your own properties. Do not use extracted material to reproduce third-party brand identities, logos, or trademarks. Respect robots.txt, rate limits, and copyright.
|
|
229
226
|
|
|
230
|
-
|
|
227
|
+
Dembrandt does not host, redistribute, or claim rights to any third-party brand assets.
|
|
231
228
|
|
|
232
229
|
## Contributing
|
|
233
230
|
|
|
234
|
-
Bugs
|
|
235
|
-
|
|
236
|
-
Spam me in [Issues](https://github.com/dembrandt/dembrandt/issues) or PRs. I reply to everything.
|
|
231
|
+
Bugs, weird sites, pull requests. All welcome.
|
|
237
232
|
|
|
238
|
-
|
|
233
|
+
Open an [Issue](https://github.com/dembrandt/dembrandt/issues) or PR.
|
|
239
234
|
|
|
240
235
|
@thevangelist
|
|
241
236
|
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
MIT — do whatever you want with it.
|
|
237
|
+
MIT. Do whatever you want with it.
|
package/index.js
CHANGED
|
@@ -20,13 +20,14 @@ import { parseSitemap } from "./lib/discovery.js";
|
|
|
20
20
|
import { mergeResults } from "./lib/merger.js";
|
|
21
21
|
import { writeFileSync, mkdirSync } from "fs";
|
|
22
22
|
import { join } from "path";
|
|
23
|
+
import { checkRobotsTxt } from "./lib/robots.js";
|
|
23
24
|
|
|
24
25
|
program
|
|
25
26
|
.name("dembrandt")
|
|
26
27
|
.description("Extract design tokens from any website")
|
|
27
|
-
.version("0.
|
|
28
|
+
.version("0.11.0")
|
|
28
29
|
.argument("<url>")
|
|
29
|
-
.option("--browser <type>", "Browser to use (chromium|firefox)", "chromium")
|
|
30
|
+
.option("--browser <type>", "Browser to use (chromium|firefox); set BROWSER_CDP_ENDPOINT env var to connect to an existing Chromium instance via CDP", "chromium")
|
|
30
31
|
.option("--json-only", "Output raw JSON")
|
|
31
32
|
.option("--save-output", "Save JSON file to output folder")
|
|
32
33
|
.option("--dtcg", "Export in W3C Design Tokens (DTCG) format")
|
|
@@ -57,6 +58,21 @@ program
|
|
|
57
58
|
}
|
|
58
59
|
|
|
59
60
|
const spinner = ora({ text: "Starting extraction...", stream: opts.jsonOnly ? process.stderr : process.stdout }).start();
|
|
61
|
+
|
|
62
|
+
try {
|
|
63
|
+
const robots = await checkRobotsTxt(url);
|
|
64
|
+
if (robots.status === "ok" && robots.allowed === false) {
|
|
65
|
+
spinner.warn(
|
|
66
|
+
chalk.hex("#FFB86C")(
|
|
67
|
+
`robots.txt disallows this path (rule: "${robots.rule}"). Proceeding anyway — respect the site's terms.`
|
|
68
|
+
)
|
|
69
|
+
);
|
|
70
|
+
spinner.start("Starting extraction...");
|
|
71
|
+
}
|
|
72
|
+
} catch {
|
|
73
|
+
// robots check is advisory; never block extraction
|
|
74
|
+
}
|
|
75
|
+
|
|
60
76
|
let browser = null;
|
|
61
77
|
|
|
62
78
|
try {
|
|
@@ -77,10 +93,18 @@ program
|
|
|
77
93
|
if (opts.noSandbox && opts.browser === 'chromium') {
|
|
78
94
|
launchArgs.push("--no-sandbox", "--disable-setuid-sandbox");
|
|
79
95
|
}
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
96
|
+
if (process.env.BROWSER_CDP_ENDPOINT) {
|
|
97
|
+
if (opts.browser !== 'chromium') {
|
|
98
|
+
throw new Error("BROWSER_CDP_ENDPOINT is only supported with --browser chromium.");
|
|
99
|
+
}
|
|
100
|
+
spinner.text = "Connecting over CDP...";
|
|
101
|
+
browser = await browserType.connectOverCDP(process.env.BROWSER_CDP_ENDPOINT);
|
|
102
|
+
} else {
|
|
103
|
+
browser = await browserType.launch({
|
|
104
|
+
headless: !useHeaded,
|
|
105
|
+
args: launchArgs,
|
|
106
|
+
});
|
|
107
|
+
}
|
|
84
108
|
|
|
85
109
|
try {
|
|
86
110
|
const isMultiPage = opts.pages || opts.sitemap;
|
|
@@ -101,8 +125,7 @@ program
|
|
|
101
125
|
let additionalUrls;
|
|
102
126
|
if (opts.sitemap) {
|
|
103
127
|
// Try post-redirect URL first, fall back to user-provided URL
|
|
104
|
-
// (sites
|
|
105
|
-
// but sitemap lives at www.spotify.com)
|
|
128
|
+
// (some sites redirect to a subdomain while the sitemap stays on www)
|
|
106
129
|
additionalUrls = await parseSitemap(result.url, maxPages);
|
|
107
130
|
if (additionalUrls.length === 0 && result.url !== url) {
|
|
108
131
|
additionalUrls = await parseSitemap(url, maxPages);
|
|
@@ -155,7 +178,7 @@ program
|
|
|
155
178
|
await browser.close();
|
|
156
179
|
browser = null;
|
|
157
180
|
|
|
158
|
-
if (useHeaded) throw err;
|
|
181
|
+
if (useHeaded || process.env.BROWSER_CDP_ENDPOINT) throw err;
|
|
159
182
|
|
|
160
183
|
if (
|
|
161
184
|
err.message.includes("Timeout") ||
|
package/lib/extractors.js
CHANGED
|
@@ -131,7 +131,7 @@ export async function extractBranding(
|
|
|
131
131
|
timeouts.push('Body content rendering');
|
|
132
132
|
}
|
|
133
133
|
|
|
134
|
-
// Give SPAs time to hydrate
|
|
134
|
+
// Give SPAs time to hydrate
|
|
135
135
|
spinner.start("Waiting for SPA hydration...");
|
|
136
136
|
const hydrationTime = 8000 * timeoutMultiplier;
|
|
137
137
|
await page.waitForTimeout(hydrationTime);
|
|
@@ -464,7 +464,7 @@ export async function extractBranding(
|
|
|
464
464
|
// Merge hover/focus colors into palette
|
|
465
465
|
hoverFocusColors.forEach(({ color, property }) => {
|
|
466
466
|
const isDuplicate = colors.palette.some((c) => c.color === color);
|
|
467
|
-
if (!isDuplicate && color) {
|
|
467
|
+
if (!isDuplicate && color && /^(rgba?\(|hsla?\(|#)/i.test(color.trim())) {
|
|
468
468
|
// Normalize and add to palette
|
|
469
469
|
const rgbaMatch = color.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)/);
|
|
470
470
|
let normalized = color.toLowerCase();
|
|
@@ -627,7 +627,7 @@ export async function extractBranding(
|
|
|
627
627
|
frameworks,
|
|
628
628
|
};
|
|
629
629
|
|
|
630
|
-
// Detect canvas-only / WebGL sites
|
|
630
|
+
// Detect canvas-only / WebGL sites
|
|
631
631
|
const isCanvasOnly = await page.evaluate(() => {
|
|
632
632
|
const canvases = document.querySelectorAll("canvas");
|
|
633
633
|
const hasRealContent = document.body.textContent.trim().length > 200;
|
|
@@ -641,7 +641,7 @@ export async function extractBranding(
|
|
|
641
641
|
|
|
642
642
|
if (isCanvasOnly) {
|
|
643
643
|
result.note =
|
|
644
|
-
"This website uses canvas/WebGL rendering
|
|
644
|
+
"This website uses canvas/WebGL rendering. Design system cannot be extracted from DOM.";
|
|
645
645
|
result.isCanvasOnly = true;
|
|
646
646
|
}
|
|
647
647
|
|
|
@@ -1000,6 +1000,8 @@ async function extractColors(page) {
|
|
|
1000
1000
|
// Only accept if it contains rgb/hsl/# inside
|
|
1001
1001
|
return /#[0-9a-f]{3,6}|rgba?\(|hsla?\(/i.test(value);
|
|
1002
1002
|
}
|
|
1003
|
+
// Reject modern color functions not parseable to hex (oklab, oklch, lch, lab, color(), display-p3)
|
|
1004
|
+
if (/^(oklab|oklch|lch|lab|color)\s*\(/i.test(value)) return false;
|
|
1003
1005
|
// Accept hex, rgb, hsl, named colors
|
|
1004
1006
|
return /^(#[0-9a-f]{3,8}|rgba?\(|hsla?\(|[a-z]+)/i.test(value);
|
|
1005
1007
|
}
|
|
@@ -1171,12 +1173,14 @@ async function extractColors(page) {
|
|
|
1171
1173
|
const colorRegex = /(#[0-9a-f]{3,8}|rgba?\([^)]+\)|hsla?\([^)]+\)|[a-z]+)/gi;
|
|
1172
1174
|
const matches = colorValue.match(colorRegex) || [];
|
|
1173
1175
|
|
|
1174
|
-
// Filter out invalid matches
|
|
1176
|
+
// Filter out invalid matches and modern CSS function names that aren't parseable
|
|
1177
|
+
const cssColorFunctions = new Set(['oklab','oklch','lch','lab','color','display','hsl','rgb','rgba','hsla','inherit','initial','unset','none','auto','normal']);
|
|
1175
1178
|
return matches.filter(c =>
|
|
1176
1179
|
c !== 'transparent' &&
|
|
1177
1180
|
c !== 'rgba(0, 0, 0, 0)' &&
|
|
1178
1181
|
c !== 'rgba(0,0,0,0)' &&
|
|
1179
|
-
c.length > 2
|
|
1182
|
+
c.length > 2 &&
|
|
1183
|
+
!cssColorFunctions.has(c.toLowerCase())
|
|
1180
1184
|
);
|
|
1181
1185
|
}
|
|
1182
1186
|
|
|
@@ -1244,9 +1248,10 @@ async function extractColors(page) {
|
|
|
1244
1248
|
return true;
|
|
1245
1249
|
}
|
|
1246
1250
|
|
|
1247
|
-
// Chromatic colors
|
|
1248
|
-
// or framework internals
|
|
1249
|
-
|
|
1251
|
+
// Chromatic colors with no background usage and no semantic context are likely browser
|
|
1252
|
+
// defaults or framework internals. But link/text brand colors have semantic score > count
|
|
1253
|
+
// (they appear on semantically-labeled elements), so exempt those.
|
|
1254
|
+
if (data.bgCount === 0 && data.score < data.count * 1.5) {
|
|
1250
1255
|
const hex = normalized.replace('#', '');
|
|
1251
1256
|
const r = parseInt(hex.substring(0, 2), 16);
|
|
1252
1257
|
const g = parseInt(hex.substring(2, 4), 16);
|
|
@@ -1254,7 +1259,6 @@ async function extractColors(page) {
|
|
|
1254
1259
|
const max = Math.max(r, g, b);
|
|
1255
1260
|
const min = Math.min(r, g, b);
|
|
1256
1261
|
const saturation = max === 0 ? 0 : (max - min) / max;
|
|
1257
|
-
// If saturation > 0.3, this is a chromatic color with no background usage
|
|
1258
1262
|
if (saturation > 0.3) {
|
|
1259
1263
|
return true;
|
|
1260
1264
|
}
|
|
@@ -1347,8 +1351,10 @@ async function extractColors(page) {
|
|
|
1347
1351
|
|
|
1348
1352
|
const palette = Array.from(colorMap.entries())
|
|
1349
1353
|
.filter(([normalizedColor, data]) => {
|
|
1350
|
-
//
|
|
1351
|
-
|
|
1354
|
+
// High-score colors (semantically significant: buttons, headers with brand colors) bypass
|
|
1355
|
+
// the count threshold — a header that appears once can still be a primary brand color.
|
|
1356
|
+
const highScore = data.score >= 10 || (data.count > 0 && data.score / data.count >= 3);
|
|
1357
|
+
if (!highScore && data.count < threshold) return false;
|
|
1352
1358
|
|
|
1353
1359
|
// Filter out structural colors (very high usage without semantic context)
|
|
1354
1360
|
if (isStructuralColor(data, totalElements)) {
|
package/lib/robots.js
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
const UA = "Dembrandt";
|
|
2
|
+
|
|
3
|
+
export async function checkRobotsTxt(targetUrl, { timeoutMs = 5000 } = {}) {
|
|
4
|
+
const u = new URL(targetUrl);
|
|
5
|
+
const robotsUrl = `${u.protocol}//${u.host}/robots.txt`;
|
|
6
|
+
const path = u.pathname || "/";
|
|
7
|
+
|
|
8
|
+
const controller = new AbortController();
|
|
9
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
10
|
+
|
|
11
|
+
let body;
|
|
12
|
+
try {
|
|
13
|
+
const res = await fetch(robotsUrl, {
|
|
14
|
+
signal: controller.signal,
|
|
15
|
+
headers: { "User-Agent": UA },
|
|
16
|
+
});
|
|
17
|
+
if (!res.ok) return { status: "unavailable", robotsUrl };
|
|
18
|
+
body = await res.text();
|
|
19
|
+
} catch {
|
|
20
|
+
return { status: "unavailable", robotsUrl };
|
|
21
|
+
} finally {
|
|
22
|
+
clearTimeout(timer);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const groups = parseRobots(body);
|
|
26
|
+
const rules = matchGroup(groups, UA) || matchGroup(groups, "*") || [];
|
|
27
|
+
const decision = evaluate(rules, path);
|
|
28
|
+
|
|
29
|
+
return { status: "ok", robotsUrl, ...decision };
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function parseRobots(text) {
|
|
33
|
+
const groups = [];
|
|
34
|
+
let current = null;
|
|
35
|
+
let lastWasAgent = false;
|
|
36
|
+
|
|
37
|
+
for (const raw of text.split(/\r?\n/)) {
|
|
38
|
+
const line = raw.replace(/#.*$/, "").trim();
|
|
39
|
+
if (!line) continue;
|
|
40
|
+
const idx = line.indexOf(":");
|
|
41
|
+
if (idx === -1) continue;
|
|
42
|
+
const field = line.slice(0, idx).trim().toLowerCase();
|
|
43
|
+
const value = line.slice(idx + 1).trim();
|
|
44
|
+
|
|
45
|
+
if (field === "user-agent") {
|
|
46
|
+
if (!current || !lastWasAgent) {
|
|
47
|
+
current = { agents: [], rules: [] };
|
|
48
|
+
groups.push(current);
|
|
49
|
+
}
|
|
50
|
+
current.agents.push(value.toLowerCase());
|
|
51
|
+
lastWasAgent = true;
|
|
52
|
+
} else if (field === "allow" || field === "disallow") {
|
|
53
|
+
if (!current) {
|
|
54
|
+
current = { agents: ["*"], rules: [] };
|
|
55
|
+
groups.push(current);
|
|
56
|
+
}
|
|
57
|
+
current.rules.push({ type: field, value });
|
|
58
|
+
lastWasAgent = false;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return groups;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function matchGroup(groups, agent) {
|
|
65
|
+
const wanted = agent.toLowerCase();
|
|
66
|
+
for (const g of groups) {
|
|
67
|
+
if (g.agents.includes(wanted)) return g.rules;
|
|
68
|
+
}
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function evaluate(rules, path) {
|
|
73
|
+
let best = { type: null, length: -1, value: "" };
|
|
74
|
+
for (const r of rules) {
|
|
75
|
+
if (!r.value) continue;
|
|
76
|
+
if (!pathMatches(path, r.value)) continue;
|
|
77
|
+
if (r.value.length > best.length) best = { ...r, length: r.value.length };
|
|
78
|
+
}
|
|
79
|
+
if (best.type === "disallow") return { allowed: false, rule: best.value };
|
|
80
|
+
return { allowed: true, rule: best.value || null };
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function pathMatches(path, pattern) {
|
|
84
|
+
const anchored = pattern.endsWith("$");
|
|
85
|
+
const p = anchored ? pattern.slice(0, -1) : pattern;
|
|
86
|
+
const parts = p.split("*");
|
|
87
|
+
let i = 0;
|
|
88
|
+
for (let k = 0; k < parts.length; k++) {
|
|
89
|
+
const seg = parts[k];
|
|
90
|
+
if (k === 0) {
|
|
91
|
+
if (!path.startsWith(seg)) return false;
|
|
92
|
+
i = seg.length;
|
|
93
|
+
} else {
|
|
94
|
+
const found = path.indexOf(seg, i);
|
|
95
|
+
if (found === -1) return false;
|
|
96
|
+
i = found + seg.length;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
if (anchored && i !== path.length) return false;
|
|
100
|
+
return true;
|
|
101
|
+
}
|
package/mcp-server.js
CHANGED
|
@@ -114,7 +114,7 @@ function toolHandler(pick, extraOptions = {}) {
|
|
|
114
114
|
|
|
115
115
|
// ── Shared params ──────────────────────────────────────────────────────
|
|
116
116
|
|
|
117
|
-
const url = z.string().describe("Website URL (e.g.
|
|
117
|
+
const url = z.string().describe("Website URL (e.g. example.com)");
|
|
118
118
|
const slow = z.boolean().optional().default(false).describe("3x timeouts for heavy SPAs");
|
|
119
119
|
|
|
120
120
|
// ── Tools ──────────────────────────────────────────────────────────────
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "dembrandt",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "Extract design tokens and
|
|
3
|
+
"version": "0.12.0",
|
|
4
|
+
"description": "Extract design tokens and publicly visible CSS information from any website",
|
|
5
5
|
"mcpName": "io.github.dembrandt/dembrandt",
|
|
6
6
|
"main": "index.js",
|
|
7
7
|
"type": "module",
|
|
@@ -16,8 +16,6 @@
|
|
|
16
16
|
],
|
|
17
17
|
"scripts": {
|
|
18
18
|
"start": "node index.js",
|
|
19
|
-
"brand-challenge": "node run-no-login-challenge.mjs",
|
|
20
|
-
"brand-challenge:report": "node run-no-login-challenge.mjs || true",
|
|
21
19
|
"install-browser": "npx playwright install chromium firefox || echo 'Playwright browser installation failed. You may need to install system dependencies manually.'",
|
|
22
20
|
"local-ui": "cd local-ui && npm start",
|
|
23
21
|
"qa:baseline": "node test/qa.mjs --baseline",
|
|
@@ -28,10 +26,9 @@
|
|
|
28
26
|
"design-tokens",
|
|
29
27
|
"design-system",
|
|
30
28
|
"branding",
|
|
31
|
-
"
|
|
29
|
+
"css-analysis",
|
|
32
30
|
"cli",
|
|
33
|
-
"playwright"
|
|
34
|
-
"extraction"
|
|
31
|
+
"playwright"
|
|
35
32
|
],
|
|
36
33
|
"repository": {
|
|
37
34
|
"type": "git",
|
|
@@ -56,4 +53,4 @@
|
|
|
56
53
|
"engines": {
|
|
57
54
|
"node": ">=18.0.0"
|
|
58
55
|
}
|
|
59
|
-
}
|
|
56
|
+
}
|