dembrandt 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +378 -0
- package/index.js +151 -0
- package/lib/display.js +317 -0
- package/lib/extractors.js +713 -0
- package/package.json +48 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 thevangelist
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
# šØ Dembrandt
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/dembrandt)
|
|
4
|
+
[](https://www.npmjs.com/package/dembrandt)
|
|
5
|
+
[](https://github.com/thevangelist/dembrandt/blob/main/LICENSE)
|
|
6
|
+
|
|
7
|
+
A CLI tool for extracting design tokens and brand assets from any website. Powered by Playwright with advanced bot detection avoidance.
|
|
8
|
+
|
|
9
|
+

|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
npx dembrandt stripe.com
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
No installation required! Extract design tokens from any website in seconds. Or install globally with `npm install -g dembrandt`.
|
|
18
|
+
|
|
19
|
+
## What It Does
|
|
20
|
+
|
|
21
|
+
Dembrandt analyzes live websites and extracts their complete design system:
|
|
22
|
+
|
|
23
|
+
- **Logo** ā Logo detection (img/svg) with dimensions and source URL
|
|
24
|
+
- **Colors** ā Semantic colors, color palette with confidence scoring, CSS variables
|
|
25
|
+
- **Typography** ā Font families, sizes, weights, line heights, font sources (Google Fonts, Adobe Fonts, custom)
|
|
26
|
+
- **Spacing** ā Margin and padding scales with grid system detection (4px/8px/custom)
|
|
27
|
+
- **Border Radius** ā Corner radius patterns with usage frequency
|
|
28
|
+
- **Shadows** ā Box shadow values for elevation systems
|
|
29
|
+
- **Buttons** ā Component styles with variants and states
|
|
30
|
+
- **Inputs** ā Form field styles (input, textarea, select)
|
|
31
|
+
- **Breakpoints** ā Responsive design breakpoints from media queries
|
|
32
|
+
- **Icons** ā Icon system detection (Font Awesome, Material Icons, SVG)
|
|
33
|
+
- **Frameworks** ā CSS framework detection (Tailwind, Bootstrap, Material-UI, Chakra)
|
|
34
|
+
|
|
35
|
+
Perfect for competitive analysis, brand audits, or rebuilding a brand when you don't have design guidelines.
|
|
36
|
+
|
|
37
|
+
## Why It Matters
|
|
38
|
+
|
|
39
|
+
**Designers** ā Analyze competitor systems, document production tokens, audit brand consistency
|
|
40
|
+
|
|
41
|
+
**Developers** ā Migrate design tokens, reverse engineer components, validate implementations
|
|
42
|
+
|
|
43
|
+
**Product Managers** ā Track competitor evolution, quantify design debt, evaluate vendors
|
|
44
|
+
|
|
45
|
+
**Marketing** ā Audit competitor brands, plan rebrands, monitor brand compliance
|
|
46
|
+
|
|
47
|
+
**Engineering Leaders** ā Measure technical debt, plan migrations, assess acquisition targets
|
|
48
|
+
|
|
49
|
+
## How It Works
|
|
50
|
+
|
|
51
|
+
Uses Playwright to render the page, extracts computed styles from the DOM, analyzes color usage and confidence, groups similar typography, detects spacing patterns, and returns actionable design tokens.
|
|
52
|
+
|
|
53
|
+
### Extraction Process
|
|
54
|
+
|
|
55
|
+
1. **Browser Launch** - Launches Chromium with stealth configuration
|
|
56
|
+
2. **Anti-Detection** - Injects scripts to bypass bot detection
|
|
57
|
+
3. **Navigation** - Navigates to target URL with retry logic
|
|
58
|
+
4. **Hydration** - Waits for SPAs to fully load (8s initial + 4s stabilization)
|
|
59
|
+
5. **Content Validation** - Verifies page content is substantial (>500 chars)
|
|
60
|
+
6. **Parallel Extraction** - Runs all extractors concurrently for speed
|
|
61
|
+
7. **Analysis** - Analyzes computed styles, DOM structure, and CSS variables
|
|
62
|
+
8. **Scoring** - Assigns confidence scores based on context and usage
|
|
63
|
+
|
|
64
|
+
### Color Confidence
|
|
65
|
+
|
|
66
|
+
- **High** ā Logo, brand elements, primary buttons
|
|
67
|
+
- **Medium** ā Interactive elements, icons, navigation
|
|
68
|
+
- **Low** ā Generic UI components (filtered from display)
|
|
69
|
+
|
|
70
|
+
Only shows high and medium confidence colors in terminal. Full palette in JSON.
|
|
71
|
+
|
|
72
|
+
### Typography Detection
|
|
73
|
+
|
|
74
|
+
Samples all heading levels (h1-h6), body text, buttons, links. Groups by font family, size, and weight. Detects Google Fonts, Adobe Fonts, custom @font-face.
|
|
75
|
+
|
|
76
|
+
### Framework Detection
|
|
77
|
+
|
|
78
|
+
Recognizes Tailwind CSS, Bootstrap, Material-UI, and others by class patterns and CDN links.
|
|
79
|
+
|
|
80
|
+
## Installation
|
|
81
|
+
|
|
82
|
+
### Using npx (Recommended)
|
|
83
|
+
|
|
84
|
+
No installation needed! Run directly with `npx`:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
npx dembrandt stripe.com
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
The first run will automatically install Chromium (~170MB).
|
|
91
|
+
|
|
92
|
+
### Global Installation
|
|
93
|
+
|
|
94
|
+
Install globally for repeated use:
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
npm install -g dembrandt
|
|
98
|
+
dembrandt stripe.com
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### Prerequisites
|
|
102
|
+
|
|
103
|
+
- Node.js 18 or higher
|
|
104
|
+
|
|
105
|
+
### Development Setup
|
|
106
|
+
|
|
107
|
+
For contributors who want to work on dembrandt:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
git clone https://github.com/thevangelist/dembrandt.git
|
|
111
|
+
cd dembrandt
|
|
112
|
+
npm install
|
|
113
|
+
npm link
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Usage
|
|
117
|
+
|
|
118
|
+
### Basic Usage
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
# Using npx (no installation)
|
|
122
|
+
npx dembrandt <url>
|
|
123
|
+
|
|
124
|
+
# Or if installed globally
|
|
125
|
+
dembrandt <url>
|
|
126
|
+
|
|
127
|
+
# Examples
|
|
128
|
+
dembrandt stripe.com
|
|
129
|
+
dembrandt https://github.com
|
|
130
|
+
dembrandt tailwindcss.com
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### Options
|
|
134
|
+
|
|
135
|
+
**`--json-only`** - Output raw JSON to stdout instead of formatted terminal display
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
dembrandt stripe.com --json-only > tokens.json
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Note: JSON is automatically saved to `output/domain.com/` regardless of this flag.
|
|
142
|
+
|
|
143
|
+
**`-d, --debug`** - Run with visible browser and detailed logs
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
dembrandt stripe.com --debug
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Useful for troubleshooting bot detection, timeouts, or extraction issues.
|
|
150
|
+
|
|
151
|
+
## Output
|
|
152
|
+
|
|
153
|
+
### Automatic JSON Saves
|
|
154
|
+
|
|
155
|
+
Every extraction is automatically saved to `output/domain.com/YYYY-MM-DDTHH-MM-SS.json` with:
|
|
156
|
+
|
|
157
|
+
- Complete design token data
|
|
158
|
+
- Timestamped for version tracking
|
|
159
|
+
- Organized by domain
|
|
160
|
+
|
|
161
|
+
Example: `output/stripe.com/2025-11-22T14-30-45.json`
|
|
162
|
+
|
|
163
|
+
### Terminal Output
|
|
164
|
+
|
|
165
|
+
Clean, formatted tables showing:
|
|
166
|
+
|
|
167
|
+
- Color palette with confidence ratings (with visual swatches)
|
|
168
|
+
- CSS variables with color previews
|
|
169
|
+
- Typography hierarchy with context
|
|
170
|
+
- Spacing scale (4px/8px grid detection)
|
|
171
|
+
- Shadow system
|
|
172
|
+
- Button variants
|
|
173
|
+
- Component style breakdowns
|
|
174
|
+
- Framework and icon system detection
|
|
175
|
+
|
|
176
|
+
### JSON Output Format
|
|
177
|
+
|
|
178
|
+
Complete extraction data for programmatic use:
|
|
179
|
+
|
|
180
|
+
```json
|
|
181
|
+
{
|
|
182
|
+
"url": "https://example.com",
|
|
183
|
+
"extractedAt": "2025-11-22T...",
|
|
184
|
+
"logo": { "source": "img", "url": "...", "width": 120, "height": 40 },
|
|
185
|
+
"colors": {
|
|
186
|
+
"semantic": { "primary": "#3b82f6", ... },
|
|
187
|
+
"palette": [{ "color": "#3b82f6", "confidence": "high", "count": 45, "sources": [...] }],
|
|
188
|
+
"cssVariables": { "--color-primary": "#3b82f6", ... }
|
|
189
|
+
},
|
|
190
|
+
"typography": {
|
|
191
|
+
"styles": [{ "fontFamily": "Inter", "fontSize": "16px", "fontWeight": "400", ... }],
|
|
192
|
+
"sources": { "googleFonts": [...], "adobeFonts": false, "customFonts": [...] }
|
|
193
|
+
},
|
|
194
|
+
"spacing": { "scaleType": "8px", "commonValues": [{ "px": "16px", "rem": "1rem", "count": 42 }, ...] },
|
|
195
|
+
"borderRadius": { "values": [{ "value": "8px", "count": 15, "confidence": "high" }, ...] },
|
|
196
|
+
"shadows": [{ "shadow": "0 2px 4px rgba(0,0,0,0.1)", "count": 8, "confidence": "high" }, ...],
|
|
197
|
+
"components": {
|
|
198
|
+
"buttons": [{ "backgroundColor": "...", "color": "...", "padding": "...", ... }],
|
|
199
|
+
"inputs": [{ "type": "input", "border": "...", "borderRadius": "...", ... }]
|
|
200
|
+
},
|
|
201
|
+
"breakpoints": [{ "px": "768px" }, ...],
|
|
202
|
+
"iconSystem": [{ "name": "Font Awesome", "type": "icon-font" }, ...],
|
|
203
|
+
"frameworks": [{ "name": "Tailwind CSS", "confidence": "high", "evidence": "class patterns" }]
|
|
204
|
+
}
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
## Examples
|
|
208
|
+
|
|
209
|
+
### Extract Design Tokens
|
|
210
|
+
|
|
211
|
+
```bash
|
|
212
|
+
# Analyze a single site (auto-saves JSON to output/stripe.com/)
|
|
213
|
+
dembrandt stripe.com
|
|
214
|
+
|
|
215
|
+
# View saved JSON files
|
|
216
|
+
ls output/stripe.com/
|
|
217
|
+
|
|
218
|
+
# Output to stdout for piping
|
|
219
|
+
dembrandt stripe.com --json-only | jq '.colors.semantic'
|
|
220
|
+
|
|
221
|
+
# Debug mode for difficult sites
|
|
222
|
+
dembrandt example.com --debug
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
### Compare Competitors
|
|
226
|
+
|
|
227
|
+
```bash
|
|
228
|
+
# Extract tokens from multiple competitors (auto-saved to output/)
|
|
229
|
+
for site in stripe.com square.com paypal.com; do
|
|
230
|
+
dembrandt $site
|
|
231
|
+
done
|
|
232
|
+
|
|
233
|
+
# Compare color palettes from most recent extractions
|
|
234
|
+
jq '.colors.palette[] | select(.confidence=="high")' output/stripe.com/2025-11-22T*.json output/square.com/2025-11-22T*.json
|
|
235
|
+
|
|
236
|
+
# Compare semantic colors across competitors
|
|
237
|
+
jq '.colors.semantic' output/*/2025-11-22T*.json
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
### Integration with Design Tools
|
|
241
|
+
|
|
242
|
+
```bash
|
|
243
|
+
# Extract and convert to custom config format
|
|
244
|
+
dembrandt mysite.com --json-only | jq '{
|
|
245
|
+
colors: .colors.semantic,
|
|
246
|
+
fontFamily: .typography.sources,
|
|
247
|
+
spacing: .spacing.commonValues
|
|
248
|
+
}' > design-tokens.json
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## Use Cases
|
|
252
|
+
|
|
253
|
+
### Brand Audits
|
|
254
|
+
|
|
255
|
+
Extract and document your company's current design system from production websites.
|
|
256
|
+
|
|
257
|
+
### Competitive Analysis
|
|
258
|
+
|
|
259
|
+
Compare design systems across competitors to identify trends and opportunities.
|
|
260
|
+
|
|
261
|
+
### Design System Migration
|
|
262
|
+
|
|
263
|
+
Document legacy design tokens before migrating to a new system.
|
|
264
|
+
|
|
265
|
+
### Reverse Engineering
|
|
266
|
+
|
|
267
|
+
Rebuild a brand when original design guidelines are unavailable.
|
|
268
|
+
|
|
269
|
+
### Quality Assurance
|
|
270
|
+
|
|
271
|
+
Verify design consistency across different pages and environments.
|
|
272
|
+
|
|
273
|
+
## Advanced Features
|
|
274
|
+
|
|
275
|
+
### Bot Detection Avoidance
|
|
276
|
+
|
|
277
|
+
- Stealth mode with anti-detection scripts
|
|
278
|
+
- Automatic fallback to visible browser on detection
|
|
279
|
+
- Human-like interaction simulation (mouse movement, scrolling)
|
|
280
|
+
- Custom user agent and browser fingerprinting
|
|
281
|
+
|
|
282
|
+
### Smart Retry Logic
|
|
283
|
+
|
|
284
|
+
- Automatic retry on navigation failures (up to 2 attempts)
|
|
285
|
+
- SPA hydration detection and waiting
|
|
286
|
+
- Content validation to ensure page is fully loaded
|
|
287
|
+
- Detailed progress logging at each step
|
|
288
|
+
|
|
289
|
+
### Comprehensive Logging
|
|
290
|
+
|
|
291
|
+
- Real-time spinner with step-by-step progress
|
|
292
|
+
- Detailed extraction metrics (colors found, styles detected, etc.)
|
|
293
|
+
- Error context with URL, stage, and attempt information
|
|
294
|
+
- Debug mode with full stack traces
|
|
295
|
+
|
|
296
|
+
## Troubleshooting
|
|
297
|
+
|
|
298
|
+
### Bot Detection Issues
|
|
299
|
+
|
|
300
|
+
If you encounter timeouts or network errors:
|
|
301
|
+
|
|
302
|
+
```bash
|
|
303
|
+
dembrandt example.com --debug
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
This will automatically retry with a visible browser.
|
|
307
|
+
|
|
308
|
+
### Page Not Loading
|
|
309
|
+
|
|
310
|
+
Some sites require longer load times. The tool waits 8 seconds for SPA hydration, but you can modify this in the source.
|
|
311
|
+
|
|
312
|
+
### Empty Content
|
|
313
|
+
|
|
314
|
+
If content length is < 500 chars, the tool will automatically retry (up to 2 attempts).
|
|
315
|
+
|
|
316
|
+
### Debug Mode
|
|
317
|
+
|
|
318
|
+
Use `--debug` to see:
|
|
319
|
+
|
|
320
|
+
- Browser launch confirmation
|
|
321
|
+
- Step-by-step progress logs
|
|
322
|
+
- Full error stack traces
|
|
323
|
+
- Extraction metrics
|
|
324
|
+
|
|
325
|
+
## Limitations
|
|
326
|
+
|
|
327
|
+
- Captures default/light theme only (dark mode not detected)
|
|
328
|
+
- Hover/focus states extracted from CSS (not fully interactive)
|
|
329
|
+
- Canvas/WebGL-rendered sites cannot be analyzed (e.g., Tesla, Apple Vision Pro demos)
|
|
330
|
+
- JavaScript-heavy sites require hydration time (8s initial + 4s stabilization)
|
|
331
|
+
- Some dynamically-loaded content may be missed
|
|
332
|
+
- Requires viewport simulation at 1920x1080
|
|
333
|
+
|
|
334
|
+
## Architecture
|
|
335
|
+
|
|
336
|
+
```
|
|
337
|
+
dembrandt/
|
|
338
|
+
āāā index.js # CLI entry point, command handling
|
|
339
|
+
āāā lib/
|
|
340
|
+
ā āāā extractors.js # Core extraction logic with stealth mode
|
|
341
|
+
ā āāā display.js # Terminal output formatting
|
|
342
|
+
āāā output/ # Auto-saved JSON extractions (gitignored)
|
|
343
|
+
ā āāā stripe.com/
|
|
344
|
+
ā ā āāā 2025-11-22T14-30-45.json
|
|
345
|
+
ā ā āāā 2025-11-22T15-12-33.json
|
|
346
|
+
ā āāā github.com/
|
|
347
|
+
ā āāā 2025-11-22T14-35-12.json
|
|
348
|
+
āāā package.json
|
|
349
|
+
āāā README.md
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
## Ethics & Legality
|
|
353
|
+
|
|
354
|
+
Dembrandt extracts publicly available design information (colors, fonts, spacing) from website DOMs for analysis purposes. This falls under fair use in most jurisdictions (USA's DMCA § 1201(f), EU Software Directive 2009/24/EC) when used for competitive analysis, documentation, or learning.
|
|
355
|
+
|
|
356
|
+
**Legal:** Analyzing public HTML/CSS is generally legal. Does not bypass protections or violate copyright. Check site ToS before mass extraction.
|
|
357
|
+
|
|
358
|
+
**Ethical:** Use for inspiration and analysis, not direct copying. Respect servers (no mass crawling), give credit to sources, be transparent about data origin.
|
|
359
|
+
|
|
360
|
+
## Contributing
|
|
361
|
+
|
|
362
|
+
Issues and pull requests welcome. Please include:
|
|
363
|
+
|
|
364
|
+
- Clear description of the issue/feature
|
|
365
|
+
- Example URLs that demonstrate the problem
|
|
366
|
+
- Expected vs actual behavior
|
|
367
|
+
|
|
368
|
+
## License
|
|
369
|
+
|
|
370
|
+
MIT
|
|
371
|
+
|
|
372
|
+
## Roadmap
|
|
373
|
+
|
|
374
|
+
- [ ] Dark mode detection and extraction
|
|
375
|
+
- [ ] Animation/transition detection
|
|
376
|
+
- [ ] Interactive state capture (hover, focus, active)
|
|
377
|
+
- [ ] Multi-page analysis
|
|
378
|
+
- [ ] Configuration file support
|
package/index.js
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Dembrandt - Design Token Extraction CLI
|
|
5
|
+
*
|
|
6
|
+
* Extracts design tokens, brand colors, typography, spacing, and component styles
|
|
7
|
+
* from any website using Playwright with advanced bot detection avoidance.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { program } from "commander";
|
|
11
|
+
import chalk from "chalk";
|
|
12
|
+
import ora from "ora";
|
|
13
|
+
import { chromium } from "playwright";
|
|
14
|
+
import { extractBranding } from "./lib/extractors.js";
|
|
15
|
+
import { displayResults } from "./lib/display.js";
|
|
16
|
+
import { writeFileSync, mkdirSync } from "fs";
|
|
17
|
+
import { join, dirname } from "path";
|
|
18
|
+
import { fileURLToPath } from "url";
|
|
19
|
+
|
|
20
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
21
|
+
|
|
22
|
+
program
|
|
23
|
+
.name("dembrandt")
|
|
24
|
+
.description("Extract design tokens from any website")
|
|
25
|
+
.version("1.0.0")
|
|
26
|
+
.argument("<url>")
|
|
27
|
+
.option("--json-only", "Output raw JSON")
|
|
28
|
+
.option("-d, --debug", "Force visible browser")
|
|
29
|
+
.action(async (input, opts) => {
|
|
30
|
+
let url = input;
|
|
31
|
+
if (!url.match(/^https?:\/\//)) url = "https://" + url;
|
|
32
|
+
|
|
33
|
+
const spinner = ora("Starting extraction...").start();
|
|
34
|
+
let browser = null;
|
|
35
|
+
|
|
36
|
+
try {
|
|
37
|
+
let useHeaded = opts.debug;
|
|
38
|
+
let result;
|
|
39
|
+
|
|
40
|
+
while (true) {
|
|
41
|
+
spinner.text = `Launching browser (${
|
|
42
|
+
useHeaded ? "visible" : "headless"
|
|
43
|
+
} mode)`;
|
|
44
|
+
browser = await chromium.launch({
|
|
45
|
+
headless: !useHeaded,
|
|
46
|
+
args: [
|
|
47
|
+
"--no-sandbox",
|
|
48
|
+
"--disable-setuid-sandbox",
|
|
49
|
+
"--disable-blink-features=AutomationControlled",
|
|
50
|
+
],
|
|
51
|
+
});
|
|
52
|
+
if (opts.debug) {
|
|
53
|
+
console.log(
|
|
54
|
+
chalk.dim(
|
|
55
|
+
` ā Browser launched in ${
|
|
56
|
+
useHeaded ? "visible" : "headless"
|
|
57
|
+
} mode`
|
|
58
|
+
)
|
|
59
|
+
);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
try {
|
|
63
|
+
result = await extractBranding(url, spinner, browser, {
|
|
64
|
+
navigationTimeout: 90000,
|
|
65
|
+
});
|
|
66
|
+
break;
|
|
67
|
+
} catch (err) {
|
|
68
|
+
await browser.close();
|
|
69
|
+
browser = null;
|
|
70
|
+
|
|
71
|
+
if (useHeaded) throw err;
|
|
72
|
+
|
|
73
|
+
if (
|
|
74
|
+
err.message.includes("Timeout") ||
|
|
75
|
+
err.message.includes("net::ERR_")
|
|
76
|
+
) {
|
|
77
|
+
spinner.warn(
|
|
78
|
+
"Bot detection detected ā retrying with visible browser"
|
|
79
|
+
);
|
|
80
|
+
console.error(chalk.dim(` ā³ Error: ${err.message}`));
|
|
81
|
+
console.error(chalk.dim(` ā³ URL: ${url}`));
|
|
82
|
+
console.error(chalk.dim(` ā³ Mode: headless`));
|
|
83
|
+
useHeaded = true;
|
|
84
|
+
continue;
|
|
85
|
+
}
|
|
86
|
+
throw err;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
spinner.succeed("Done!");
|
|
91
|
+
|
|
92
|
+
// Save JSON output automatically (unless --json-only)
|
|
93
|
+
if (!opts.jsonOnly) {
|
|
94
|
+
try {
|
|
95
|
+
const domain = new URL(url).hostname.replace("www.", "");
|
|
96
|
+
const timestamp = new Date()
|
|
97
|
+
.toISOString()
|
|
98
|
+
.replace(/[:.]/g, "-")
|
|
99
|
+
.split(".")[0];
|
|
100
|
+
const outputDir = join(__dirname, "output", domain);
|
|
101
|
+
mkdirSync(outputDir, { recursive: true });
|
|
102
|
+
|
|
103
|
+
const filename = `${timestamp}.json`;
|
|
104
|
+
const filepath = join(outputDir, filename);
|
|
105
|
+
writeFileSync(filepath, JSON.stringify(result, null, 2));
|
|
106
|
+
|
|
107
|
+
console.log(
|
|
108
|
+
chalk.dim(
|
|
109
|
+
`\nš¾ JSON saved to: ${chalk.cyan(
|
|
110
|
+
`output/${domain}/${filename}`
|
|
111
|
+
)}`
|
|
112
|
+
)
|
|
113
|
+
);
|
|
114
|
+
} catch (err) {
|
|
115
|
+
console.log(
|
|
116
|
+
chalk.yellow(`ā Could not save JSON file: ${err.message}`)
|
|
117
|
+
);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Output to terminal
|
|
122
|
+
if (opts.jsonOnly) {
|
|
123
|
+
console.log(JSON.stringify(result, null, 2));
|
|
124
|
+
} else {
|
|
125
|
+
displayResults(result);
|
|
126
|
+
}
|
|
127
|
+
} catch (err) {
|
|
128
|
+
spinner.fail("Failed");
|
|
129
|
+
console.error(chalk.red("\nā Extraction failed"));
|
|
130
|
+
console.error(chalk.red(` Error: ${err.message}`));
|
|
131
|
+
console.error(chalk.dim(` URL: ${url}`));
|
|
132
|
+
|
|
133
|
+
if (opts.debug && err.stack) {
|
|
134
|
+
console.error(chalk.dim("\nStack trace:"));
|
|
135
|
+
console.error(chalk.dim(err.stack));
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if (!opts.debug) {
|
|
139
|
+
console.log(
|
|
140
|
+
chalk.yellow(
|
|
141
|
+
"\nTip: Try with --debug flag for tough sites and detailed error logs"
|
|
142
|
+
)
|
|
143
|
+
);
|
|
144
|
+
}
|
|
145
|
+
process.exit(1);
|
|
146
|
+
} finally {
|
|
147
|
+
if (browser) await browser.close();
|
|
148
|
+
}
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
program.parse();
|