@monostate/node-scraper 1.0.3 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +41 -5
- package/index.d.ts +35 -1
- package/index.js +298 -5
- package/package.json +1 -1
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 BNCA Team
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -22,13 +22,20 @@ pnpm add @monostate/node-scraper
|
|
|
22
22
|
### Basic Usage
|
|
23
23
|
|
|
24
24
|
```javascript
|
|
25
|
-
import { smartScrape,
|
|
25
|
+
import { smartScrape, smartScreenshot, quickShot } from '@monostate/node-scraper';
|
|
26
26
|
|
|
27
27
|
// Simple one-line scraping
|
|
28
28
|
const result = await smartScrape('https://example.com');
|
|
29
29
|
console.log(result.content); // Extracted content
|
|
30
30
|
console.log(result.method); // Method used: direct-fetch, lightpanda, or puppeteer
|
|
31
|
-
|
|
31
|
+
|
|
32
|
+
// Take a screenshot
|
|
33
|
+
const screenshot = await smartScreenshot('https://example.com');
|
|
34
|
+
console.log(screenshot.screenshot); // Base64 encoded image
|
|
35
|
+
|
|
36
|
+
// Quick screenshot (optimized for speed)
|
|
37
|
+
const quick = await quickShot('https://example.com');
|
|
38
|
+
console.log(quick.screenshot); // Fast screenshot capture
|
|
32
39
|
```
|
|
33
40
|
|
|
34
41
|
### Advanced Usage
|
|
@@ -67,6 +74,10 @@ BNCA uses a sophisticated 3-tier fallback system:
|
|
|
67
74
|
- **Performance**: Complete JavaScript execution
|
|
68
75
|
- **Fallback triggers**: Complex interactions needed
|
|
69
76
|
|
|
77
|
+
### 📸 Screenshot Methods
|
|
78
|
+
- **Chrome CLI**: Direct Chrome screenshot capture
|
|
79
|
+
- **Quickshot**: Optimized with retry logic and smart timeouts
|
|
80
|
+
|
|
70
81
|
## 📊 Performance Benchmark
|
|
71
82
|
|
|
72
83
|
| Site Type | BNCA | Firecrawl | Speed Advantage |
|
|
@@ -79,12 +90,19 @@ BNCA uses a sophisticated 3-tier fallback system:
|
|
|
79
90
|
|
|
80
91
|
## 🎛️ API Reference
|
|
81
92
|
|
|
82
|
-
###
|
|
93
|
+
### Convenience Functions
|
|
94
|
+
|
|
95
|
+
#### `smartScrape(url, options?)`
|
|
96
|
+
Quick scraping with intelligent fallback.
|
|
83
97
|
|
|
84
|
-
|
|
98
|
+
#### `smartScreenshot(url, options?)`
|
|
99
|
+
Take a screenshot of any webpage.
|
|
100
|
+
|
|
101
|
+
#### `quickShot(url, options?)`
|
|
102
|
+
Optimized screenshot capture for maximum speed.
|
|
85
103
|
|
|
86
104
|
**Parameters:**
|
|
87
|
-
- `url` (string): URL to scrape
|
|
105
|
+
- `url` (string): URL to scrape/capture
|
|
88
106
|
- `options` (object, optional): Configuration options
|
|
89
107
|
|
|
90
108
|
**Returns:** Promise<ScrapingResult>
|
|
@@ -115,6 +133,24 @@ Scrape a URL with intelligent fallback.
|
|
|
115
133
|
const result = await scraper.scrape('https://example.com');
|
|
116
134
|
```
|
|
117
135
|
|
|
136
|
+
##### `scraper.screenshot(url, options?)`
|
|
137
|
+
|
|
138
|
+
Take a screenshot of a webpage.
|
|
139
|
+
|
|
140
|
+
```javascript
|
|
141
|
+
const result = await scraper.screenshot('https://example.com');
|
|
142
|
+
const img = result.screenshot; // data:image/png;base64,...
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
##### `scraper.quickshot(url, options?)`
|
|
146
|
+
|
|
147
|
+
Quick screenshot capture - optimized for speed with retry logic.
|
|
148
|
+
|
|
149
|
+
```javascript
|
|
150
|
+
const result = await scraper.quickshot('https://example.com');
|
|
151
|
+
// 2-3x faster than regular screenshot
|
|
152
|
+
```
|
|
153
|
+
|
|
118
154
|
##### `scraper.getStats()`
|
|
119
155
|
|
|
120
156
|
Get performance statistics.
|
package/index.d.ts
CHANGED
|
@@ -25,13 +25,15 @@ export interface ScrapingResult {
|
|
|
25
25
|
/** Size of the content in bytes */
|
|
26
26
|
size?: number;
|
|
27
27
|
/** Method used for scraping */
|
|
28
|
-
method: 'direct-fetch' | 'lightpanda' | 'puppeteer' | 'failed' | 'error';
|
|
28
|
+
method: 'direct-fetch' | 'lightpanda' | 'puppeteer' | 'chrome-screenshot' | 'quickshot' | 'failed' | 'error';
|
|
29
29
|
/** Whether browser rendering was needed */
|
|
30
30
|
needsBrowser?: boolean;
|
|
31
31
|
/** Content type from response headers */
|
|
32
32
|
contentType?: string;
|
|
33
33
|
/** Error message if scraping failed */
|
|
34
34
|
error?: string;
|
|
35
|
+
/** Base64 encoded screenshot (if captured) */
|
|
36
|
+
screenshot?: string;
|
|
35
37
|
/** Performance metrics */
|
|
36
38
|
performance: {
|
|
37
39
|
/** Total time taken in milliseconds */
|
|
@@ -129,6 +131,22 @@ export class BNCASmartScraper {
|
|
|
129
131
|
*/
|
|
130
132
|
scrape(url: string, options?: ScrapingOptions): Promise<ScrapingResult>;
|
|
131
133
|
|
|
134
|
+
/**
|
|
135
|
+
* Take a screenshot of a webpage
|
|
136
|
+
* @param url The URL to capture
|
|
137
|
+
* @param options Optional configuration overrides
|
|
138
|
+
* @returns Promise resolving to screenshot result
|
|
139
|
+
*/
|
|
140
|
+
screenshot(url: string, options?: ScrapingOptions): Promise<ScrapingResult>;
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Quick screenshot capture - optimized for speed
|
|
144
|
+
* @param url The URL to capture
|
|
145
|
+
* @param options Optional configuration overrides
|
|
146
|
+
* @returns Promise resolving to screenshot result
|
|
147
|
+
*/
|
|
148
|
+
quickshot(url: string, options?: ScrapingOptions): Promise<ScrapingResult>;
|
|
149
|
+
|
|
132
150
|
/**
|
|
133
151
|
* Get performance statistics for all methods
|
|
134
152
|
* @returns Current statistics
|
|
@@ -214,6 +232,22 @@ export class BNCASmartScraper {
|
|
|
214
232
|
*/
|
|
215
233
|
export function smartScrape(url: string, options?: ScrapingOptions): Promise<ScrapingResult>;
|
|
216
234
|
|
|
235
|
+
/**
|
|
236
|
+
* Convenience function for taking screenshots
|
|
237
|
+
* @param url The URL to capture
|
|
238
|
+
* @param options Optional configuration
|
|
239
|
+
* @returns Promise resolving to screenshot result
|
|
240
|
+
*/
|
|
241
|
+
export function smartScreenshot(url: string, options?: ScrapingOptions): Promise<ScrapingResult>;
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Convenience function for quick screenshot capture
|
|
245
|
+
* @param url The URL to capture
|
|
246
|
+
* @param options Optional configuration
|
|
247
|
+
* @returns Promise resolving to screenshot result
|
|
248
|
+
*/
|
|
249
|
+
export function quickShot(url: string, options?: ScrapingOptions): Promise<ScrapingResult>;
|
|
250
|
+
|
|
217
251
|
/**
|
|
218
252
|
* Default export - same as BNCASmartScraper class
|
|
219
253
|
*/
|
package/index.js
CHANGED
|
@@ -3,6 +3,7 @@ import { spawn } from 'child_process';
|
|
|
3
3
|
import fs from 'fs/promises';
|
|
4
4
|
import path from 'path';
|
|
5
5
|
import { fileURLToPath } from 'url';
|
|
6
|
+
import { promises as fsPromises } from 'fs';
|
|
6
7
|
|
|
7
8
|
let puppeteer = null;
|
|
8
9
|
try {
|
|
@@ -397,13 +398,17 @@ export class BNCASmartScraper {
|
|
|
397
398
|
/__webpack_require__/i
|
|
398
399
|
];
|
|
399
400
|
|
|
400
|
-
// Check for protection systems
|
|
401
|
+
// Check for protection systems (more specific patterns)
|
|
401
402
|
const protectionIndicators = [
|
|
402
|
-
/cloudflare/i,
|
|
403
|
+
/cloudflare.*challenge/i,
|
|
404
|
+
/cloudflare.*protection/i,
|
|
405
|
+
/ray id.*cloudflare/i,
|
|
403
406
|
/please enable javascript/i,
|
|
404
407
|
/you need to enable javascript/i,
|
|
405
408
|
/this site requires javascript/i,
|
|
406
|
-
/jscript.*required/i
|
|
409
|
+
/jscript.*required/i,
|
|
410
|
+
/security check.*cloudflare/i,
|
|
411
|
+
/attention required.*cloudflare/i
|
|
407
412
|
];
|
|
408
413
|
|
|
409
414
|
// Check for minimal content (likely SPA)
|
|
@@ -447,7 +452,10 @@ export class BNCASmartScraper {
|
|
|
447
452
|
if (/window\.__NEXT_DATA__/i.test(html)) {
|
|
448
453
|
indicators.push('Next.js data detected');
|
|
449
454
|
}
|
|
450
|
-
if (/cloudflare/i.test(html)) {
|
|
455
|
+
if (/cloudflare.*challenge/i.test(html)) {
|
|
456
|
+
indicators.push('Cloudflare challenge detected');
|
|
457
|
+
}
|
|
458
|
+
if (/cloudflare.*protection/i.test(html)) {
|
|
451
459
|
indicators.push('Cloudflare protection detected');
|
|
452
460
|
}
|
|
453
461
|
if (/please enable javascript/i.test(html)) {
|
|
@@ -590,6 +598,271 @@ export class BNCASmartScraper {
|
|
|
590
598
|
}
|
|
591
599
|
}
|
|
592
600
|
|
|
601
|
+
/**
|
|
602
|
+
* Take a screenshot of a webpage
|
|
603
|
+
*/
|
|
604
|
+
async screenshot(url, options = {}) {
|
|
605
|
+
const startTime = Date.now();
|
|
606
|
+
const config = { ...this.options, ...options };
|
|
607
|
+
|
|
608
|
+
this.log(`📸 Taking screenshot for: ${url}`);
|
|
609
|
+
|
|
610
|
+
try {
|
|
611
|
+
const screenshot = await this.takeScreenshotWithChrome(url, config);
|
|
612
|
+
|
|
613
|
+
return {
|
|
614
|
+
success: !!screenshot,
|
|
615
|
+
screenshot,
|
|
616
|
+
method: 'chrome-screenshot',
|
|
617
|
+
performance: {
|
|
618
|
+
totalTime: Date.now() - startTime
|
|
619
|
+
}
|
|
620
|
+
};
|
|
621
|
+
} catch (error) {
|
|
622
|
+
return {
|
|
623
|
+
success: false,
|
|
624
|
+
error: error.message,
|
|
625
|
+
method: 'chrome-screenshot',
|
|
626
|
+
performance: {
|
|
627
|
+
totalTime: Date.now() - startTime
|
|
628
|
+
}
|
|
629
|
+
};
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
/**
|
|
634
|
+
* Quick screenshot capture - optimized for speed
|
|
635
|
+
*/
|
|
636
|
+
async quickshot(url, options = {}) {
|
|
637
|
+
const startTime = Date.now();
|
|
638
|
+
const config = {
|
|
639
|
+
...this.options,
|
|
640
|
+
...options,
|
|
641
|
+
timeout: options.timeout || 15000 // Longer timeout for screenshots
|
|
642
|
+
};
|
|
643
|
+
|
|
644
|
+
this.log(`⚡ Taking quick screenshot for: ${url}`);
|
|
645
|
+
|
|
646
|
+
try {
|
|
647
|
+
const screenshot = await this.takeScreenshotOptimized(url, config);
|
|
648
|
+
|
|
649
|
+
return {
|
|
650
|
+
success: !!screenshot,
|
|
651
|
+
screenshot,
|
|
652
|
+
method: 'quickshot',
|
|
653
|
+
performance: {
|
|
654
|
+
totalTime: Date.now() - startTime
|
|
655
|
+
}
|
|
656
|
+
};
|
|
657
|
+
} catch (error) {
|
|
658
|
+
return {
|
|
659
|
+
success: false,
|
|
660
|
+
error: error.message,
|
|
661
|
+
method: 'quickshot',
|
|
662
|
+
performance: {
|
|
663
|
+
totalTime: Date.now() - startTime
|
|
664
|
+
}
|
|
665
|
+
};
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
/**
|
|
670
|
+
* Take screenshot using Chrome CLI
|
|
671
|
+
*/
|
|
672
|
+
async takeScreenshotWithChrome(url, config) {
|
|
673
|
+
const tempFile = path.join('/tmp', `screenshot_${Date.now()}_${Math.random().toString(36).substring(7)}.png`);
|
|
674
|
+
|
|
675
|
+
try {
|
|
676
|
+
const args = [
|
|
677
|
+
'--headless=new',
|
|
678
|
+
'--disable-gpu',
|
|
679
|
+
'--no-sandbox',
|
|
680
|
+
'--disable-setuid-sandbox',
|
|
681
|
+
'--disable-dev-shm-usage',
|
|
682
|
+
'--disable-blink-features=AutomationControlled',
|
|
683
|
+
'--user-agent=' + config.userAgent,
|
|
684
|
+
'--screenshot=' + tempFile,
|
|
685
|
+
'--window-size=1280,800',
|
|
686
|
+
'--hide-scrollbars',
|
|
687
|
+
'--virtual-time-budget=10000',
|
|
688
|
+
url
|
|
689
|
+
];
|
|
690
|
+
|
|
691
|
+
const chromePath = await this.findChromePath();
|
|
692
|
+
if (!chromePath) {
|
|
693
|
+
throw new Error('Chrome/Chromium not found');
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
return new Promise((resolve) => {
|
|
697
|
+
const chrome = spawn(chromePath, args, {
|
|
698
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
699
|
+
detached: false
|
|
700
|
+
});
|
|
701
|
+
|
|
702
|
+
let processExited = false;
|
|
703
|
+
let stderr = '';
|
|
704
|
+
|
|
705
|
+
chrome.stderr.on('data', (data) => {
|
|
706
|
+
stderr += data.toString();
|
|
707
|
+
});
|
|
708
|
+
|
|
709
|
+
const killTimeout = setTimeout(() => {
|
|
710
|
+
if (!processExited) {
|
|
711
|
+
this.log('Chrome timeout, sending SIGTERM...');
|
|
712
|
+
chrome.kill('SIGTERM');
|
|
713
|
+
|
|
714
|
+
setTimeout(() => {
|
|
715
|
+
if (!processExited) {
|
|
716
|
+
chrome.kill('SIGKILL');
|
|
717
|
+
}
|
|
718
|
+
}, 1000);
|
|
719
|
+
}
|
|
720
|
+
}, config.timeout || 15000);
|
|
721
|
+
|
|
722
|
+
chrome.on('exit', async (code, signal) => {
|
|
723
|
+
processExited = true;
|
|
724
|
+
clearTimeout(killTimeout);
|
|
725
|
+
|
|
726
|
+
try {
|
|
727
|
+
await new Promise(r => setTimeout(r, 500));
|
|
728
|
+
const screenshotBuffer = await fsPromises.readFile(tempFile);
|
|
729
|
+
const base64 = screenshotBuffer.toString('base64');
|
|
730
|
+
await fsPromises.unlink(tempFile).catch(() => {});
|
|
731
|
+
resolve(`data:image/png;base64,${base64}`);
|
|
732
|
+
} catch (error) {
|
|
733
|
+
resolve(null);
|
|
734
|
+
}
|
|
735
|
+
});
|
|
736
|
+
|
|
737
|
+
chrome.on('error', (error) => {
|
|
738
|
+
clearTimeout(killTimeout);
|
|
739
|
+
resolve(null);
|
|
740
|
+
});
|
|
741
|
+
});
|
|
742
|
+
} catch (error) {
|
|
743
|
+
return null;
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
/**
|
|
748
|
+
* Optimized screenshot for speed
|
|
749
|
+
*/
|
|
750
|
+
async takeScreenshotOptimized(url, config, retryCount = 0) {
|
|
751
|
+
const tempFile = path.join('/tmp', `screenshot_${Date.now()}_${Math.random().toString(36).substring(7)}.png`);
|
|
752
|
+
|
|
753
|
+
try {
|
|
754
|
+
const virtualTimeBudget = retryCount === 0 ? 5000 : 8000;
|
|
755
|
+
const processTimeout = retryCount === 0 ? 8000 : 12000;
|
|
756
|
+
|
|
757
|
+
const args = [
|
|
758
|
+
'--headless=new',
|
|
759
|
+
'--disable-gpu',
|
|
760
|
+
'--no-sandbox',
|
|
761
|
+
'--disable-setuid-sandbox',
|
|
762
|
+
'--disable-dev-shm-usage',
|
|
763
|
+
'--disable-blink-features=AutomationControlled',
|
|
764
|
+
'--disable-features=TranslateUI',
|
|
765
|
+
'--disable-extensions',
|
|
766
|
+
'--disable-default-apps',
|
|
767
|
+
'--disable-sync',
|
|
768
|
+
'--metrics-recording-only',
|
|
769
|
+
'--mute-audio',
|
|
770
|
+
'--no-first-run',
|
|
771
|
+
'--disable-background-timer-throttling',
|
|
772
|
+
'--disable-backgrounding-occluded-windows',
|
|
773
|
+
'--disable-renderer-backgrounding',
|
|
774
|
+
'--user-agent=' + config.userAgent,
|
|
775
|
+
'--screenshot=' + tempFile,
|
|
776
|
+
'--window-size=1280,800',
|
|
777
|
+
'--hide-scrollbars',
|
|
778
|
+
'--run-all-compositor-stages-before-draw',
|
|
779
|
+
`--virtual-time-budget=${virtualTimeBudget}`,
|
|
780
|
+
url
|
|
781
|
+
];
|
|
782
|
+
|
|
783
|
+
const chromePath = await this.findChromePath();
|
|
784
|
+
if (!chromePath) {
|
|
785
|
+
throw new Error('Chrome/Chromium not found');
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
return new Promise((resolve) => {
|
|
789
|
+
const chrome = spawn(chromePath, args, {
|
|
790
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
791
|
+
detached: false
|
|
792
|
+
});
|
|
793
|
+
|
|
794
|
+
let processExited = false;
|
|
795
|
+
|
|
796
|
+
const killTimeout = setTimeout(() => {
|
|
797
|
+
if (!processExited) {
|
|
798
|
+
chrome.kill('SIGTERM');
|
|
799
|
+
setTimeout(() => {
|
|
800
|
+
if (!processExited) {
|
|
801
|
+
chrome.kill('SIGKILL');
|
|
802
|
+
}
|
|
803
|
+
}, 1000);
|
|
804
|
+
}
|
|
805
|
+
}, processTimeout);
|
|
806
|
+
|
|
807
|
+
chrome.on('exit', async (code, signal) => {
|
|
808
|
+
processExited = true;
|
|
809
|
+
clearTimeout(killTimeout);
|
|
810
|
+
|
|
811
|
+
try {
|
|
812
|
+
await new Promise(r => setTimeout(r, 500));
|
|
813
|
+
const screenshotBuffer = await fsPromises.readFile(tempFile);
|
|
814
|
+
const base64 = screenshotBuffer.toString('base64');
|
|
815
|
+
await fsPromises.unlink(tempFile).catch(() => {});
|
|
816
|
+
resolve(`data:image/png;base64,${base64}`);
|
|
817
|
+
} catch (error) {
|
|
818
|
+
if (retryCount === 0) {
|
|
819
|
+
const retryResult = await this.takeScreenshotOptimized(url, config, 1);
|
|
820
|
+
resolve(retryResult);
|
|
821
|
+
} else {
|
|
822
|
+
resolve(null);
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
});
|
|
826
|
+
|
|
827
|
+
chrome.on('error', (error) => {
|
|
828
|
+
clearTimeout(killTimeout);
|
|
829
|
+
resolve(null);
|
|
830
|
+
});
|
|
831
|
+
});
|
|
832
|
+
} catch (error) {
|
|
833
|
+
if (retryCount === 0) {
|
|
834
|
+
return this.takeScreenshotOptimized(url, config, 1);
|
|
835
|
+
}
|
|
836
|
+
return null;
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
/**
|
|
841
|
+
* Find Chrome/Chromium binary path
|
|
842
|
+
*/
|
|
843
|
+
async findChromePath() {
|
|
844
|
+
const chromePaths = process.platform === 'darwin' ? [
|
|
845
|
+
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
|
846
|
+
'/Applications/Chromium.app/Contents/MacOS/Chromium',
|
|
847
|
+
] : [
|
|
848
|
+
'/usr/bin/chromium-browser',
|
|
849
|
+
'/usr/bin/chromium',
|
|
850
|
+
'/usr/bin/google-chrome-stable',
|
|
851
|
+
'/usr/bin/google-chrome',
|
|
852
|
+
];
|
|
853
|
+
|
|
854
|
+
for (const path of chromePaths) {
|
|
855
|
+
try {
|
|
856
|
+
await fsPromises.access(path);
|
|
857
|
+
return path;
|
|
858
|
+
} catch (e) {
|
|
859
|
+
continue;
|
|
860
|
+
}
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
return null;
|
|
864
|
+
}
|
|
865
|
+
|
|
593
866
|
/**
|
|
594
867
|
* Health check for all scraping methods
|
|
595
868
|
*/
|
|
@@ -630,7 +903,7 @@ export class BNCASmartScraper {
|
|
|
630
903
|
}
|
|
631
904
|
}
|
|
632
905
|
|
|
633
|
-
// Export convenience
|
|
906
|
+
// Export convenience functions
|
|
634
907
|
export async function smartScrape(url, options = {}) {
|
|
635
908
|
const scraper = new BNCASmartScraper(options);
|
|
636
909
|
try {
|
|
@@ -643,4 +916,24 @@ export async function smartScrape(url, options = {}) {
|
|
|
643
916
|
}
|
|
644
917
|
}
|
|
645
918
|
|
|
919
|
+
export async function smartScreenshot(url, options = {}) {
|
|
920
|
+
const scraper = new BNCASmartScraper(options);
|
|
921
|
+
try {
|
|
922
|
+
const result = await scraper.screenshot(url, options);
|
|
923
|
+
return result;
|
|
924
|
+
} catch (error) {
|
|
925
|
+
throw error;
|
|
926
|
+
}
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
export async function quickShot(url, options = {}) {
|
|
930
|
+
const scraper = new BNCASmartScraper(options);
|
|
931
|
+
try {
|
|
932
|
+
const result = await scraper.quickshot(url, options);
|
|
933
|
+
return result;
|
|
934
|
+
} catch (error) {
|
|
935
|
+
throw error;
|
|
936
|
+
}
|
|
937
|
+
}
|
|
938
|
+
|
|
646
939
|
export default BNCASmartScraper;
|