@monostate/node-scraper 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +27 -0
- package/bin/lightpanda +0 -0
- package/index.js +94 -28
- package/package.json +7 -2
- package/scripts/install-lightpanda.js +183 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 BNCA Team
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -19,6 +19,15 @@ yarn add @monostate/node-scraper
|
|
|
19
19
|
pnpm add @monostate/node-scraper
|
|
20
20
|
```
|
|
21
21
|
|
|
22
|
+
**🎉 New in v1.2.0**: Lightpanda binary is now automatically downloaded and configured during installation! No manual setup required.
|
|
23
|
+
|
|
24
|
+
### Zero-Configuration Setup
|
|
25
|
+
|
|
26
|
+
The package now automatically:
|
|
27
|
+
- 📦 Downloads the correct Lightpanda binary for your platform (macOS, Linux, Windows/WSL)
|
|
28
|
+
- 🔧 Configures binary paths and permissions
|
|
29
|
+
- ✅ Validates installation health on first use
|
|
30
|
+
|
|
22
31
|
### Basic Usage
|
|
23
32
|
|
|
24
33
|
```javascript
|
|
@@ -343,6 +352,24 @@ const scraper: BNCASmartScraper = new BNCASmartScraper({
|
|
|
343
352
|
const result: ScrapingResult = await scraper.scrape('https://example.com');
|
|
344
353
|
```
|
|
345
354
|
|
|
355
|
+
## 📋 Changelog
|
|
356
|
+
|
|
357
|
+
### v1.2.0 (Latest)
|
|
358
|
+
- 🎉 **Auto-Installation**: Lightpanda binary is now automatically downloaded during `npm install`
|
|
359
|
+
- 🔧 **Cross-Platform Support**: Automatic detection and installation for macOS, Linux, and Windows/WSL
|
|
360
|
+
- ⚡ **Improved Performance**: Enhanced binary detection and ES6 module compatibility
|
|
361
|
+
- 🛠️ **Better Error Handling**: More robust installation scripts with retry logic
|
|
362
|
+
- 📦 **Zero Configuration**: No manual setup required - works out of the box
|
|
363
|
+
|
|
364
|
+
### v1.1.1
|
|
365
|
+
- Bug fixes and stability improvements
|
|
366
|
+
- Enhanced Puppeteer integration
|
|
367
|
+
|
|
368
|
+
### v1.1.0
|
|
369
|
+
- Added screenshot capabilities
|
|
370
|
+
- Improved fallback system
|
|
371
|
+
- Performance optimizations
|
|
372
|
+
|
|
346
373
|
## 🤝 Contributing
|
|
347
374
|
|
|
348
375
|
See the [main repository](https://github.com/your-org/bnca-prototype) for contribution guidelines.
|
package/bin/lightpanda
ADDED
|
Binary file
|
package/index.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import fetch from 'node-fetch';
|
|
2
|
-
import { spawn } from 'child_process';
|
|
2
|
+
import { spawn, execSync } from 'child_process';
|
|
3
3
|
import fs from 'fs/promises';
|
|
4
|
+
import { existsSync, statSync } from 'fs';
|
|
4
5
|
import path from 'path';
|
|
5
6
|
import { fileURLToPath } from 'url';
|
|
6
7
|
import { promises as fsPromises } from 'fs';
|
|
@@ -201,7 +202,13 @@ export class BNCASmartScraper {
|
|
|
201
202
|
|
|
202
203
|
try {
|
|
203
204
|
// Check if binary exists
|
|
204
|
-
|
|
205
|
+
const stats = statSync(this.options.lightpandaPath);
|
|
206
|
+
if (!stats.isFile()) {
|
|
207
|
+
return {
|
|
208
|
+
success: false,
|
|
209
|
+
error: 'Lightpanda binary is not a file'
|
|
210
|
+
};
|
|
211
|
+
}
|
|
205
212
|
} catch {
|
|
206
213
|
return {
|
|
207
214
|
success: false,
|
|
@@ -210,9 +217,9 @@ export class BNCASmartScraper {
|
|
|
210
217
|
}
|
|
211
218
|
|
|
212
219
|
return new Promise((resolve) => {
|
|
213
|
-
const args = ['fetch', '--dump',
|
|
220
|
+
const args = ['fetch', '--dump', url];
|
|
214
221
|
const process = spawn(this.options.lightpandaPath, args, {
|
|
215
|
-
timeout: config.timeout + 1000 // Add buffer
|
|
222
|
+
timeout: config.timeout + 1000 // Add buffer for process timeout only
|
|
216
223
|
});
|
|
217
224
|
|
|
218
225
|
let output = '';
|
|
@@ -387,7 +394,21 @@ export class BNCASmartScraper {
|
|
|
387
394
|
* Intelligent detection of browser requirement
|
|
388
395
|
*/
|
|
389
396
|
detectBrowserRequirement(html, url) {
|
|
390
|
-
//
|
|
397
|
+
// Whitelist simple sites that should always use direct fetch
|
|
398
|
+
const simpleSites = [
|
|
399
|
+
'example.com',
|
|
400
|
+
'httpbin.org',
|
|
401
|
+
'wikipedia.org',
|
|
402
|
+
'github.io',
|
|
403
|
+
'netlify.app',
|
|
404
|
+
'vercel.app'
|
|
405
|
+
];
|
|
406
|
+
|
|
407
|
+
if (simpleSites.some(site => url.includes(site))) {
|
|
408
|
+
return false; // Always use direct fetch for these
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
// Check for common SPA patterns (be more specific)
|
|
391
412
|
const spaIndicators = [
|
|
392
413
|
/<div[^>]*id=['"]?root['"]?[^>]*>\s*<\/div>/i,
|
|
393
414
|
/<div[^>]*id=['"]?app['"]?[^>]*>\s*<\/div>/i,
|
|
@@ -398,16 +419,36 @@ export class BNCASmartScraper {
|
|
|
398
419
|
/__webpack_require__/i
|
|
399
420
|
];
|
|
400
421
|
|
|
401
|
-
// Check for protection systems
|
|
422
|
+
// Check for protection systems (more specific patterns)
|
|
402
423
|
const protectionIndicators = [
|
|
403
|
-
/cloudflare/i,
|
|
424
|
+
/cloudflare.*challenge/i,
|
|
425
|
+
/cloudflare.*protection/i,
|
|
426
|
+
/ray id.*cloudflare/i,
|
|
404
427
|
/please enable javascript/i,
|
|
405
428
|
/you need to enable javascript/i,
|
|
406
429
|
/this site requires javascript/i,
|
|
407
|
-
/jscript.*required/i
|
|
430
|
+
/jscript.*required/i,
|
|
431
|
+
/security check.*cloudflare/i,
|
|
432
|
+
/attention required.*cloudflare/i
|
|
408
433
|
];
|
|
409
434
|
|
|
410
|
-
//
|
|
435
|
+
// Domain-based checks for known SPA sites
|
|
436
|
+
const domainIndicators = [
|
|
437
|
+
/instagram\.com/i,
|
|
438
|
+
/twitter\.com/i,
|
|
439
|
+
/facebook\.com/i,
|
|
440
|
+
/linkedin\.com/i,
|
|
441
|
+
/maps\.google/i,
|
|
442
|
+
/gmail\.com/i,
|
|
443
|
+
/youtube\.com/i
|
|
444
|
+
];
|
|
445
|
+
|
|
446
|
+
// Check if it's clearly a SPA or protected site
|
|
447
|
+
const hasSpaIndicators = spaIndicators.some(pattern => pattern.test(html));
|
|
448
|
+
const hasProtection = protectionIndicators.some(pattern => pattern.test(html));
|
|
449
|
+
const isKnownSpa = domainIndicators.some(pattern => pattern.test(url));
|
|
450
|
+
|
|
451
|
+
// Check for minimal content BUT only if we also have SPA indicators
|
|
411
452
|
const bodyContent = html.match(/<body[^>]*>([\s\S]*)<\/body>/i)?.[1] || '';
|
|
412
453
|
const textContent = bodyContent
|
|
413
454
|
.replace(/<script[\s\S]*?<\/script>/gi, '')
|
|
@@ -416,22 +457,11 @@ export class BNCASmartScraper {
|
|
|
416
457
|
.replace(/\s+/g, ' ')
|
|
417
458
|
.trim();
|
|
418
459
|
|
|
419
|
-
const hasMinimalContent = textContent.length <
|
|
460
|
+
const hasMinimalContent = textContent.length < 200; // More conservative threshold
|
|
461
|
+
const isLikelySpa = hasMinimalContent && hasSpaIndicators;
|
|
420
462
|
|
|
421
|
-
//
|
|
422
|
-
const
|
|
423
|
-
/instagram\.com/i,
|
|
424
|
-
/twitter\.com/i,
|
|
425
|
-
/facebook\.com/i,
|
|
426
|
-
/linkedin\.com/i,
|
|
427
|
-
/maps\.google/i
|
|
428
|
-
];
|
|
429
|
-
|
|
430
|
-
const needsBrowser =
|
|
431
|
-
spaIndicators.some(pattern => pattern.test(html)) ||
|
|
432
|
-
protectionIndicators.some(pattern => pattern.test(html)) ||
|
|
433
|
-
(hasMinimalContent && spaIndicators.some(pattern => pattern.test(html))) ||
|
|
434
|
-
domainIndicators.some(pattern => pattern.test(url));
|
|
463
|
+
// Only require browser if we have strong indicators
|
|
464
|
+
const needsBrowser = hasProtection || isKnownSpa || isLikelySpa;
|
|
435
465
|
|
|
436
466
|
return needsBrowser;
|
|
437
467
|
}
|
|
@@ -448,7 +478,10 @@ export class BNCASmartScraper {
|
|
|
448
478
|
if (/window\.__NEXT_DATA__/i.test(html)) {
|
|
449
479
|
indicators.push('Next.js data detected');
|
|
450
480
|
}
|
|
451
|
-
if (/cloudflare/i.test(html)) {
|
|
481
|
+
if (/cloudflare.*challenge/i.test(html)) {
|
|
482
|
+
indicators.push('Cloudflare challenge detected');
|
|
483
|
+
}
|
|
484
|
+
if (/cloudflare.*protection/i.test(html)) {
|
|
452
485
|
indicators.push('Cloudflare protection detected');
|
|
453
486
|
}
|
|
454
487
|
if (/please enable javascript/i.test(html)) {
|
|
@@ -534,19 +567,40 @@ export class BNCASmartScraper {
|
|
|
534
567
|
* Find Lightpanda binary
|
|
535
568
|
*/
|
|
536
569
|
findLightpandaBinary() {
|
|
570
|
+
// First check the package's bin directory (installed by postinstall script)
|
|
571
|
+
const packageDir = path.dirname(new URL(import.meta.url).pathname);
|
|
572
|
+
const packageBinPath = path.join(packageDir, 'bin', 'lightpanda');
|
|
573
|
+
|
|
537
574
|
const possiblePaths = [
|
|
575
|
+
packageBinPath, // Package's bin directory (highest priority)
|
|
538
576
|
'./lightpanda',
|
|
539
577
|
'../lightpanda',
|
|
540
578
|
'./lightpanda/lightpanda',
|
|
541
579
|
'/usr/local/bin/lightpanda',
|
|
542
|
-
path.join(process.cwd(), 'lightpanda')
|
|
580
|
+
path.join(process.cwd(), 'lightpanda'),
|
|
581
|
+
path.join(process.cwd(), 'bin', 'lightpanda')
|
|
543
582
|
];
|
|
544
583
|
|
|
545
584
|
for (const binaryPath of possiblePaths) {
|
|
546
585
|
try {
|
|
547
|
-
// Synchronous check for binary
|
|
586
|
+
// Synchronous check for binary existence and executability
|
|
548
587
|
const fullPath = path.resolve(binaryPath);
|
|
549
|
-
|
|
588
|
+
if (existsSync(fullPath)) {
|
|
589
|
+
const stats = statSync(fullPath);
|
|
590
|
+
if (stats.isFile()) {
|
|
591
|
+
// Check if it's executable (on Unix-like systems including WSL)
|
|
592
|
+
if (process.platform !== 'win32' || this.isWSL()) {
|
|
593
|
+
const mode = stats.mode;
|
|
594
|
+
const isExecutable = Boolean(mode & parseInt('111', 8));
|
|
595
|
+
if (isExecutable) {
|
|
596
|
+
return fullPath;
|
|
597
|
+
}
|
|
598
|
+
} else {
|
|
599
|
+
// On native Windows (not WSL), Lightpanda is not supported
|
|
600
|
+
continue;
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
}
|
|
550
604
|
} catch {
|
|
551
605
|
continue;
|
|
552
606
|
}
|
|
@@ -555,6 +609,18 @@ export class BNCASmartScraper {
|
|
|
555
609
|
return null;
|
|
556
610
|
}
|
|
557
611
|
|
|
612
|
+
/**
|
|
613
|
+
* Check if running in WSL environment
|
|
614
|
+
*/
|
|
615
|
+
isWSL() {
|
|
616
|
+
try {
|
|
617
|
+
const uname = execSync('uname -r', { encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'] });
|
|
618
|
+
return uname.toLowerCase().includes('microsoft') || uname.toLowerCase().includes('wsl');
|
|
619
|
+
} catch {
|
|
620
|
+
return false;
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
|
|
558
624
|
/**
|
|
559
625
|
* Get performance statistics
|
|
560
626
|
*/
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@monostate/node-scraper",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"description": "Intelligent web scraping with multi-level fallback system - 11.35x faster than Firecrawl",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
|
@@ -15,7 +15,9 @@
|
|
|
15
15
|
"index.js",
|
|
16
16
|
"index.d.ts",
|
|
17
17
|
"README.md",
|
|
18
|
-
"package.json"
|
|
18
|
+
"package.json",
|
|
19
|
+
"scripts/",
|
|
20
|
+
"bin/"
|
|
19
21
|
],
|
|
20
22
|
"keywords": [
|
|
21
23
|
"web-scraping",
|
|
@@ -63,5 +65,8 @@
|
|
|
63
65
|
},
|
|
64
66
|
"publishConfig": {
|
|
65
67
|
"access": "public"
|
|
68
|
+
},
|
|
69
|
+
"scripts": {
|
|
70
|
+
"postinstall": "node scripts/install-lightpanda.js"
|
|
66
71
|
}
|
|
67
72
|
}
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import fs from 'fs';
|
|
4
|
+
import https from 'https';
|
|
5
|
+
import path from 'path';
|
|
6
|
+
import { createWriteStream } from 'fs';
|
|
7
|
+
import { execSync } from 'child_process';
|
|
8
|
+
|
|
9
|
+
const LIGHTPANDA_VERSION = 'nightly';
|
|
10
|
+
const BINARY_DIR = path.join(path.dirname(path.dirname(new URL(import.meta.url).pathname)), 'bin');
|
|
11
|
+
const BINARY_NAME = 'lightpanda';
|
|
12
|
+
const BINARY_PATH = path.join(BINARY_DIR, BINARY_NAME);
|
|
13
|
+
|
|
14
|
+
// Platform-specific download URLs (matching official Lightpanda instructions)
|
|
15
|
+
const DOWNLOAD_URLS = {
|
|
16
|
+
'darwin': `https://github.com/lightpanda-io/browser/releases/download/${LIGHTPANDA_VERSION}/lightpanda-aarch64-macos`,
|
|
17
|
+
'linux': `https://github.com/lightpanda-io/browser/releases/download/${LIGHTPANDA_VERSION}/lightpanda-x86_64-linux`,
|
|
18
|
+
'wsl': `https://github.com/lightpanda-io/browser/releases/download/${LIGHTPANDA_VERSION}/lightpanda-x86_64-linux` // WSL uses Linux binary
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
function detectPlatform() {
|
|
22
|
+
const platform = process.platform;
|
|
23
|
+
|
|
24
|
+
if (platform === 'darwin') {
|
|
25
|
+
return 'darwin';
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
if (platform === 'linux') {
|
|
29
|
+
return 'linux';
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
if (platform === 'win32') {
|
|
33
|
+
// Check if we're running in WSL
|
|
34
|
+
try {
|
|
35
|
+
const uname = execSync('uname -r', { encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'] });
|
|
36
|
+
if (uname.toLowerCase().includes('microsoft') || uname.toLowerCase().includes('wsl')) {
|
|
37
|
+
console.log('🐧 WSL detected - using Linux binary');
|
|
38
|
+
return 'wsl';
|
|
39
|
+
}
|
|
40
|
+
} catch {
|
|
41
|
+
// Not in WSL or uname not available
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
console.log('⚠️ Windows detected. Lightpanda is recommended to run in WSL2.');
|
|
45
|
+
console.log(' Please install WSL2 and run this package from within WSL2.');
|
|
46
|
+
console.log(' See: https://docs.microsoft.com/en-us/windows/wsl/install');
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
async function downloadFile(url, destination) {
|
|
54
|
+
console.log(`📥 Downloading Lightpanda binary from: ${url}`);
|
|
55
|
+
|
|
56
|
+
return new Promise((resolve, reject) => {
|
|
57
|
+
const request = https.get(url, (response) => {
|
|
58
|
+
// Handle redirects
|
|
59
|
+
if (response.statusCode >= 300 && response.statusCode < 400 && response.headers.location) {
|
|
60
|
+
return downloadFile(response.headers.location, destination).then(resolve).catch(reject);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if (response.statusCode !== 200) {
|
|
64
|
+
reject(new Error(`HTTP ${response.statusCode}: ${response.statusMessage}`));
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const fileStream = createWriteStream(destination);
|
|
69
|
+
const totalSize = parseInt(response.headers['content-length'] || '0');
|
|
70
|
+
let downloadedSize = 0;
|
|
71
|
+
|
|
72
|
+
response.on('data', (chunk) => {
|
|
73
|
+
downloadedSize += chunk.length;
|
|
74
|
+
if (totalSize > 0) {
|
|
75
|
+
const progress = (downloadedSize / totalSize * 100).toFixed(1);
|
|
76
|
+
process.stdout.write(`\r⏳ Progress: ${progress}%`);
|
|
77
|
+
}
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
response.on('end', () => {
|
|
81
|
+
process.stdout.write('\r✅ Download completed! \n');
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
response.pipe(fileStream);
|
|
85
|
+
|
|
86
|
+
fileStream.on('finish', () => {
|
|
87
|
+
fileStream.close();
|
|
88
|
+
resolve();
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
fileStream.on('error', reject);
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
request.on('error', reject);
|
|
95
|
+
request.setTimeout(60000, () => {
|
|
96
|
+
request.destroy();
|
|
97
|
+
reject(new Error('Download timeout'));
|
|
98
|
+
});
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
async function makeExecutable(filePath) {
|
|
103
|
+
try {
|
|
104
|
+
await fs.promises.chmod(filePath, 0o755);
|
|
105
|
+
console.log(`🔧 Made ${filePath} executable`);
|
|
106
|
+
} catch (error) {
|
|
107
|
+
console.warn(`⚠️ Warning: Could not make binary executable: ${error.message}`);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
async function installLightpanda() {
|
|
112
|
+
try {
|
|
113
|
+
const platform = detectPlatform();
|
|
114
|
+
|
|
115
|
+
if (!platform) {
|
|
116
|
+
console.log(' Falling back to Puppeteer for browser-based scraping.');
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const downloadUrl = DOWNLOAD_URLS[platform];
|
|
121
|
+
|
|
122
|
+
if (!downloadUrl) {
|
|
123
|
+
console.log(`⚠️ Lightpanda binary not available for platform: ${platform}`);
|
|
124
|
+
console.log(' Falling back to Puppeteer for browser-based scraping.');
|
|
125
|
+
return;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Create bin directory if it doesn't exist
|
|
129
|
+
if (!fs.existsSync(BINARY_DIR)) {
|
|
130
|
+
await fs.promises.mkdir(BINARY_DIR, { recursive: true });
|
|
131
|
+
console.log(`📁 Created directory: ${BINARY_DIR}`);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Check if binary already exists
|
|
135
|
+
if (fs.existsSync(BINARY_PATH)) {
|
|
136
|
+
console.log(`✅ Lightpanda binary already exists at: ${BINARY_PATH}`);
|
|
137
|
+
await makeExecutable(BINARY_PATH);
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
console.log(`🚀 Installing Lightpanda binary for ${platform}...`);
|
|
142
|
+
|
|
143
|
+
// Download the binary
|
|
144
|
+
await downloadFile(downloadUrl, BINARY_PATH);
|
|
145
|
+
|
|
146
|
+
// Make executable (all Unix-like systems including WSL)
|
|
147
|
+
await makeExecutable(BINARY_PATH);
|
|
148
|
+
|
|
149
|
+
// Verify the binary
|
|
150
|
+
if (fs.existsSync(BINARY_PATH)) {
|
|
151
|
+
const stats = await fs.promises.stat(BINARY_PATH);
|
|
152
|
+
console.log(`✅ Lightpanda binary installed successfully!`);
|
|
153
|
+
console.log(` Location: ${BINARY_PATH}`);
|
|
154
|
+
console.log(` Size: ${(stats.size / 1024 / 1024).toFixed(2)} MB`);
|
|
155
|
+
|
|
156
|
+
// Additional WSL information
|
|
157
|
+
if (platform === 'wsl') {
|
|
158
|
+
console.log('');
|
|
159
|
+
console.log('📝 WSL Setup Notes:');
|
|
160
|
+
console.log(' - Lightpanda binary installed for WSL environment');
|
|
161
|
+
console.log(' - Ensure your Node.js application runs within WSL2');
|
|
162
|
+
console.log(' - For best performance, keep files within WSL filesystem');
|
|
163
|
+
}
|
|
164
|
+
} else {
|
|
165
|
+
throw new Error('Binary download verification failed');
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
} catch (error) {
|
|
169
|
+
console.error(`❌ Failed to install Lightpanda binary: ${error.message}`);
|
|
170
|
+
console.log(' The package will fall back to Puppeteer for browser-based scraping.');
|
|
171
|
+
|
|
172
|
+
// Don't fail the installation, just log the issue
|
|
173
|
+
process.exit(0);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Only run if this is the main module (not imported)
|
|
178
|
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
179
|
+
installLightpanda().catch((error) => {
|
|
180
|
+
console.error('Installation failed:', error);
|
|
181
|
+
process.exit(0); // Don't fail package installation
|
|
182
|
+
});
|
|
183
|
+
}
|