ag-webscrape 0.0.15 → 0.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +206 -206
- package/dist/WebScraper.d.ts +7 -0
- package/dist/WebScraper.d.ts.map +1 -1
- package/dist/WebScraper.js +102 -12
- package/dist/WebScraper.js.map +1 -1
- package/dist/helpers/dom.d.ts.map +1 -1
- package/dist/helpers/dom.js +64 -13
- package/dist/helpers/dom.js.map +1 -1
- package/package.json +4 -4
package/README.md
CHANGED
|
@@ -1,207 +1,207 @@
|
|
|
1
|
-
# ag-webscrape
|
|
2
|
-
|
|
3
|
-
A TypeScript web scraper with intelligent fallback strategy. Attempts direct HTTP fetching first, then falls back to Playwright for anti-scraping protection.
|
|
4
|
-
|
|
5
|
-
## Features
|
|
6
|
-
|
|
7
|
-
- **Dual Strategy**: Direct fetch first, Playwright fallback
|
|
8
|
-
- **Anti-Scraping Detection**: Automatically detects and bypasses common anti-scraping measures
|
|
9
|
-
- **Persistent Browser**: Maintains browser instance for faster subsequent scrapes
|
|
10
|
-
- **Error Handling**: Comprehensive error detection for 4xx/5xx responses
|
|
11
|
-
- **TypeScript Support**: Full type safety and IntelliSense
|
|
12
|
-
- **Configurable**: Extensive customization options
|
|
13
|
-
|
|
14
|
-
## Installation
|
|
15
|
-
|
|
16
|
-
```bash
|
|
17
|
-
npm install ag-webscrape
|
|
18
|
-
```
|
|
19
|
-
|
|
20
|
-
## Quick Start
|
|
21
|
-
|
|
22
|
-
```typescript
|
|
23
|
-
import { WebScraper } from 'ag-webscrape';
|
|
24
|
-
|
|
25
|
-
const scraper = new WebScraper();
|
|
26
|
-
|
|
27
|
-
// Scrape a single URL
|
|
28
|
-
const result = await scraper.scrape('https://example.com');
|
|
29
|
-
console.log(result.html);
|
|
30
|
-
|
|
31
|
-
// Clean up when done
|
|
32
|
-
await scraper.dispose();
|
|
33
|
-
```
|
|
34
|
-
|
|
35
|
-
## API Reference
|
|
36
|
-
|
|
37
|
-
### WebScraper Class
|
|
38
|
-
|
|
39
|
-
#### Constructor
|
|
40
|
-
|
|
41
|
-
```typescript
|
|
42
|
-
new WebScraper(options?: ScrapingOptions)
|
|
43
|
-
```
|
|
44
|
-
|
|
45
|
-
#### Options
|
|
46
|
-
|
|
47
|
-
```typescript
|
|
48
|
-
interface ScrapingOptions {
|
|
49
|
-
timeout?: number; // Request timeout in ms (default: 30000)
|
|
50
|
-
userAgent?: string; // Custom user agent
|
|
51
|
-
headers?: Record<string, string>; // Additional headers
|
|
52
|
-
retries?: number; // Number of retries (default: 3)
|
|
53
|
-
waitForSelector?: string; // CSS selector to wait for
|
|
54
|
-
waitForTimeout?: number; // Time to wait in ms (default: 5000)
|
|
55
|
-
}
|
|
56
|
-
```
|
|
57
|
-
|
|
58
|
-
#### Methods
|
|
59
|
-
|
|
60
|
-
##### `scrape(url: string, options?: ScrapingOptions): Promise<ScrapingResult>`
|
|
61
|
-
|
|
62
|
-
Scrapes a single URL with fallback strategy.
|
|
63
|
-
|
|
64
|
-
```typescript
|
|
65
|
-
const result = await scraper.scrape('https://example.com', {
|
|
66
|
-
timeout: 60000,
|
|
67
|
-
waitForSelector: '.main-content'
|
|
68
|
-
});
|
|
69
|
-
```
|
|
70
|
-
|
|
71
|
-
##### `scrapeMultiple(urls: string[], options?: ScrapingOptions): Promise<ScrapingResult[]>`
|
|
72
|
-
|
|
73
|
-
Scrapes multiple URLs efficiently.
|
|
74
|
-
|
|
75
|
-
```typescript
|
|
76
|
-
const results = await scraper.scrapeMultiple([
|
|
77
|
-
'https://example1.com',
|
|
78
|
-
'https://example2.com'
|
|
79
|
-
]);
|
|
80
|
-
```
|
|
81
|
-
|
|
82
|
-
##### `dispose(): Promise<void>`
|
|
83
|
-
|
|
84
|
-
Cleans up browser resources. Always call this when done.
|
|
85
|
-
|
|
86
|
-
```typescript
|
|
87
|
-
await scraper.dispose();
|
|
88
|
-
```
|
|
89
|
-
|
|
90
|
-
#### Result Object
|
|
91
|
-
|
|
92
|
-
```typescript
|
|
93
|
-
interface ScrapingResult {
|
|
94
|
-
url: string; // Original URL
|
|
95
|
-
html: string; // HTML content
|
|
96
|
-
status: number; // HTTP status code
|
|
97
|
-
method: 'fetch' | 'playwright'; // Method used
|
|
98
|
-
error?: string; // Error message if any
|
|
99
|
-
redirected?: boolean; // Whether request was redirected
|
|
100
|
-
finalUrl?: string; // Final URL after redirects
|
|
101
|
-
}
|
|
102
|
-
```
|
|
103
|
-
|
|
104
|
-
## Advanced Usage
|
|
105
|
-
|
|
106
|
-
### Custom Headers and User Agent
|
|
107
|
-
|
|
108
|
-
```typescript
|
|
109
|
-
const scraper = new WebScraper({
|
|
110
|
-
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
|
111
|
-
headers: {
|
|
112
|
-
'Accept': 'text/html,application/xhtml+xml',
|
|
113
|
-
'Accept-Language': 'en-US,en;q=0.9'
|
|
114
|
-
}
|
|
115
|
-
});
|
|
116
|
-
```
|
|
117
|
-
|
|
118
|
-
### Waiting for Content
|
|
119
|
-
|
|
120
|
-
```typescript
|
|
121
|
-
// Wait for specific element
|
|
122
|
-
const result = await scraper.scrape('https://spa-app.com', {
|
|
123
|
-
waitForSelector: '.dynamic-content'
|
|
124
|
-
});
|
|
125
|
-
|
|
126
|
-
// Wait for specific time
|
|
127
|
-
const result = await scraper.scrape('https://slow-app.com', {
|
|
128
|
-
waitForTimeout: 10000
|
|
129
|
-
});
|
|
130
|
-
```
|
|
131
|
-
|
|
132
|
-
### Error Handling
|
|
133
|
-
|
|
134
|
-
```typescript
|
|
135
|
-
const result = await scraper.scrape('https://example.com');
|
|
136
|
-
|
|
137
|
-
if (result.error) {
|
|
138
|
-
console.error('Scraping failed:', result.error);
|
|
139
|
-
} else {
|
|
140
|
-
console.log('Success:', result.html.length, 'characters');
|
|
141
|
-
}
|
|
142
|
-
```
|
|
143
|
-
|
|
144
|
-
### Batch Scraping
|
|
145
|
-
|
|
146
|
-
```typescript
|
|
147
|
-
const urls = [
|
|
148
|
-
'https://news.site.com/article1',
|
|
149
|
-
'https://news.site.com/article2',
|
|
150
|
-
'https://news.site.com/article3'
|
|
151
|
-
];
|
|
152
|
-
|
|
153
|
-
const results = await scraper.scrapeMultiple(urls, {
|
|
154
|
-
waitForSelector: '.article-content'
|
|
155
|
-
});
|
|
156
|
-
|
|
157
|
-
results.forEach((result, index) => {
|
|
158
|
-
if (!result.error) {
|
|
159
|
-
console.log(`Article ${index + 1}: ${result.html.length} chars`);
|
|
160
|
-
}
|
|
161
|
-
});
|
|
162
|
-
```
|
|
163
|
-
|
|
164
|
-
## How It Works
|
|
165
|
-
|
|
166
|
-
1. **Direct Fetch**: First attempts HTTP request using `node-fetch`
|
|
167
|
-
2. **Anti-Scraping Detection**: Checks response for common anti-scraping patterns
|
|
168
|
-
3. **Playwright Fallback**: If direct fetch fails or anti-scraping detected, uses Playwright
|
|
169
|
-
4. **Error Detection**: Monitors for 4xx/5xx responses in both methods
|
|
170
|
-
5. **Resource Management**: Maintains browser instance for performance
|
|
171
|
-
|
|
172
|
-
## Anti-Scraping Protection
|
|
173
|
-
|
|
174
|
-
The scraper automatically detects and handles:
|
|
175
|
-
|
|
176
|
-
- Cloudflare protection
|
|
177
|
-
- DistilNetworks
|
|
178
|
-
- PerimeterX
|
|
179
|
-
- DataDome
|
|
180
|
-
- Akamai Bot Manager
|
|
181
|
-
- CAPTCHA challenges
|
|
182
|
-
- JavaScript requirement checks
|
|
183
|
-
- Rate limiting
|
|
184
|
-
- Access denied pages
|
|
185
|
-
|
|
186
|
-
## Performance
|
|
187
|
-
|
|
188
|
-
- **Fast**: Direct fetch for simple pages
|
|
189
|
-
- **Efficient**: Reuses browser instance
|
|
190
|
-
- **Robust**: Fallback ensures high success rate
|
|
191
|
-
- **Intelligent**: Only uses Playwright when necessary
|
|
192
|
-
|
|
193
|
-
## Examples
|
|
194
|
-
|
|
195
|
-
Check out the `src/example.ts` file for complete usage examples.
|
|
196
|
-
|
|
197
|
-
## License
|
|
198
|
-
|
|
199
|
-
MIT
|
|
200
|
-
|
|
201
|
-
## Contributing
|
|
202
|
-
|
|
203
|
-
Pull requests welcome! Please ensure TypeScript compilation and tests pass.
|
|
204
|
-
|
|
205
|
-
## Support
|
|
206
|
-
|
|
1
|
+
# ag-webscrape
|
|
2
|
+
|
|
3
|
+
A TypeScript web scraper with intelligent fallback strategy. Attempts direct HTTP fetching first, then falls back to Playwright for anti-scraping protection.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Dual Strategy**: Direct fetch first, Playwright fallback
|
|
8
|
+
- **Anti-Scraping Detection**: Automatically detects and bypasses common anti-scraping measures
|
|
9
|
+
- **Persistent Browser**: Maintains browser instance for faster subsequent scrapes
|
|
10
|
+
- **Error Handling**: Comprehensive error detection for 4xx/5xx responses
|
|
11
|
+
- **TypeScript Support**: Full type safety and IntelliSense
|
|
12
|
+
- **Configurable**: Extensive customization options
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
npm install ag-webscrape
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Quick Start
|
|
21
|
+
|
|
22
|
+
```typescript
|
|
23
|
+
import { WebScraper } from 'ag-webscrape';
|
|
24
|
+
|
|
25
|
+
const scraper = new WebScraper();
|
|
26
|
+
|
|
27
|
+
// Scrape a single URL
|
|
28
|
+
const result = await scraper.scrape('https://example.com');
|
|
29
|
+
console.log(result.html);
|
|
30
|
+
|
|
31
|
+
// Clean up when done
|
|
32
|
+
await scraper.dispose();
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## API Reference
|
|
36
|
+
|
|
37
|
+
### WebScraper Class
|
|
38
|
+
|
|
39
|
+
#### Constructor
|
|
40
|
+
|
|
41
|
+
```typescript
|
|
42
|
+
new WebScraper(options?: ScrapingOptions)
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
#### Options
|
|
46
|
+
|
|
47
|
+
```typescript
|
|
48
|
+
interface ScrapingOptions {
|
|
49
|
+
timeout?: number; // Request timeout in ms (default: 30000)
|
|
50
|
+
userAgent?: string; // Custom user agent
|
|
51
|
+
headers?: Record<string, string>; // Additional headers
|
|
52
|
+
retries?: number; // Number of retries (default: 3)
|
|
53
|
+
waitForSelector?: string; // CSS selector to wait for
|
|
54
|
+
waitForTimeout?: number; // Time to wait in ms (default: 5000)
|
|
55
|
+
}
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
#### Methods
|
|
59
|
+
|
|
60
|
+
##### `scrape(url: string, options?: ScrapingOptions): Promise<ScrapingResult>`
|
|
61
|
+
|
|
62
|
+
Scrapes a single URL with fallback strategy.
|
|
63
|
+
|
|
64
|
+
```typescript
|
|
65
|
+
const result = await scraper.scrape('https://example.com', {
|
|
66
|
+
timeout: 60000,
|
|
67
|
+
waitForSelector: '.main-content'
|
|
68
|
+
});
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
##### `scrapeMultiple(urls: string[], options?: ScrapingOptions): Promise<ScrapingResult[]>`
|
|
72
|
+
|
|
73
|
+
Scrapes multiple URLs efficiently.
|
|
74
|
+
|
|
75
|
+
```typescript
|
|
76
|
+
const results = await scraper.scrapeMultiple([
|
|
77
|
+
'https://example1.com',
|
|
78
|
+
'https://example2.com'
|
|
79
|
+
]);
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
##### `dispose(): Promise<void>`
|
|
83
|
+
|
|
84
|
+
Cleans up browser resources. Always call this when done.
|
|
85
|
+
|
|
86
|
+
```typescript
|
|
87
|
+
await scraper.dispose();
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
#### Result Object
|
|
91
|
+
|
|
92
|
+
```typescript
|
|
93
|
+
interface ScrapingResult {
|
|
94
|
+
url: string; // Original URL
|
|
95
|
+
html: string; // HTML content
|
|
96
|
+
status: number; // HTTP status code
|
|
97
|
+
method: 'fetch' | 'playwright'; // Method used
|
|
98
|
+
error?: string; // Error message if any
|
|
99
|
+
redirected?: boolean; // Whether request was redirected
|
|
100
|
+
finalUrl?: string; // Final URL after redirects
|
|
101
|
+
}
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## Advanced Usage
|
|
105
|
+
|
|
106
|
+
### Custom Headers and User Agent
|
|
107
|
+
|
|
108
|
+
```typescript
|
|
109
|
+
const scraper = new WebScraper({
|
|
110
|
+
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
|
111
|
+
headers: {
|
|
112
|
+
'Accept': 'text/html,application/xhtml+xml',
|
|
113
|
+
'Accept-Language': 'en-US,en;q=0.9'
|
|
114
|
+
}
|
|
115
|
+
});
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Waiting for Content
|
|
119
|
+
|
|
120
|
+
```typescript
|
|
121
|
+
// Wait for specific element
|
|
122
|
+
const result = await scraper.scrape('https://spa-app.com', {
|
|
123
|
+
waitForSelector: '.dynamic-content'
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
// Wait for specific time
|
|
127
|
+
const result = await scraper.scrape('https://slow-app.com', {
|
|
128
|
+
waitForTimeout: 10000
|
|
129
|
+
});
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### Error Handling
|
|
133
|
+
|
|
134
|
+
```typescript
|
|
135
|
+
const result = await scraper.scrape('https://example.com');
|
|
136
|
+
|
|
137
|
+
if (result.error) {
|
|
138
|
+
console.error('Scraping failed:', result.error);
|
|
139
|
+
} else {
|
|
140
|
+
console.log('Success:', result.html.length, 'characters');
|
|
141
|
+
}
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Batch Scraping
|
|
145
|
+
|
|
146
|
+
```typescript
|
|
147
|
+
const urls = [
|
|
148
|
+
'https://news.site.com/article1',
|
|
149
|
+
'https://news.site.com/article2',
|
|
150
|
+
'https://news.site.com/article3'
|
|
151
|
+
];
|
|
152
|
+
|
|
153
|
+
const results = await scraper.scrapeMultiple(urls, {
|
|
154
|
+
waitForSelector: '.article-content'
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
results.forEach((result, index) => {
|
|
158
|
+
if (!result.error) {
|
|
159
|
+
console.log(`Article ${index + 1}: ${result.html.length} chars`);
|
|
160
|
+
}
|
|
161
|
+
});
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## How It Works
|
|
165
|
+
|
|
166
|
+
1. **Direct Fetch**: First attempts HTTP request using `node-fetch`
|
|
167
|
+
2. **Anti-Scraping Detection**: Checks response for common anti-scraping patterns
|
|
168
|
+
3. **Playwright Fallback**: If direct fetch fails or anti-scraping detected, uses Playwright
|
|
169
|
+
4. **Error Detection**: Monitors for 4xx/5xx responses in both methods
|
|
170
|
+
5. **Resource Management**: Maintains browser instance for performance
|
|
171
|
+
|
|
172
|
+
## Anti-Scraping Protection
|
|
173
|
+
|
|
174
|
+
The scraper automatically detects and handles:
|
|
175
|
+
|
|
176
|
+
- Cloudflare protection
|
|
177
|
+
- DistilNetworks
|
|
178
|
+
- PerimeterX
|
|
179
|
+
- DataDome
|
|
180
|
+
- Akamai Bot Manager
|
|
181
|
+
- CAPTCHA challenges
|
|
182
|
+
- JavaScript requirement checks
|
|
183
|
+
- Rate limiting
|
|
184
|
+
- Access denied pages
|
|
185
|
+
|
|
186
|
+
## Performance
|
|
187
|
+
|
|
188
|
+
- **Fast**: Direct fetch for simple pages
|
|
189
|
+
- **Efficient**: Reuses browser instance
|
|
190
|
+
- **Robust**: Fallback ensures high success rate
|
|
191
|
+
- **Intelligent**: Only uses Playwright when necessary
|
|
192
|
+
|
|
193
|
+
## Examples
|
|
194
|
+
|
|
195
|
+
Check out the `src/example.ts` file for complete usage examples.
|
|
196
|
+
|
|
197
|
+
## License
|
|
198
|
+
|
|
199
|
+
MIT
|
|
200
|
+
|
|
201
|
+
## Contributing
|
|
202
|
+
|
|
203
|
+
Pull requests welcome! Please ensure TypeScript compilation and tests pass.
|
|
204
|
+
|
|
205
|
+
## Support
|
|
206
|
+
|
|
207
207
|
For issues and questions, please use the GitHub issue tracker.
|
package/dist/WebScraper.d.ts
CHANGED
|
@@ -7,6 +7,8 @@ export interface ScrapingOptions {
|
|
|
7
7
|
waitForTimeout?: number;
|
|
8
8
|
executablePath?: string;
|
|
9
9
|
}
|
|
10
|
+
export type SecurityBlockProvider = 'cloudflare' | 'akamai' | 'datadome' | 'perimeterx' | 'unknown';
|
|
11
|
+
export type ScrapedContentType = 'target' | 'challenge' | 'empty' | 'error' | 'unknown';
|
|
10
12
|
export interface ScrapingResult {
|
|
11
13
|
url: string;
|
|
12
14
|
html: string;
|
|
@@ -15,6 +17,11 @@ export interface ScrapingResult {
|
|
|
15
17
|
error?: string;
|
|
16
18
|
redirected?: boolean;
|
|
17
19
|
finalUrl?: string;
|
|
20
|
+
contentType: ScrapedContentType;
|
|
21
|
+
blockedBySecurity: boolean;
|
|
22
|
+
blockProvider?: SecurityBlockProvider;
|
|
23
|
+
blockReason?: string;
|
|
24
|
+
challengeSnippet?: string;
|
|
18
25
|
}
|
|
19
26
|
export declare class WebScraper {
|
|
20
27
|
private userAgent;
|
package/dist/WebScraper.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"WebScraper.d.ts","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":"AAKA,MAAM,WAAW,eAAe;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,GAAG,QAAQ,CAAC;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"WebScraper.d.ts","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":"AAKA,MAAM,WAAW,eAAe;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,MAAM,qBAAqB,GAC7B,YAAY,GACZ,QAAQ,GACR,UAAU,GACV,YAAY,GACZ,SAAS,CAAC;AAEd,MAAM,MAAM,kBAAkB,GAC1B,QAAQ,GACR,WAAW,GACX,OAAO,GACP,OAAO,GACP,SAAS,CAAC;AAEd,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,GAAG,QAAQ,CAAC;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,kBAAkB,CAAC;IAChC,iBAAiB,EAAE,OAAO,CAAC;IAC3B,aAAa,CAAC,EAAE,qBAAqB,CAAC;IACtC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAkID,qBAAa,UAAU;IACrB,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,cAAc,CAAkB;gBAE5B,OAAO,GAAE,eAAoB;YAc3B,aAAa;YAkDb,mBAAmB;IAqE3B,MAAM,CACV,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,CAAC;IA0DpB,cAAc,CAClB,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,EAAE,CAAC;IA0BtB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B"}
|
package/dist/WebScraper.js
CHANGED
|
@@ -3,6 +3,94 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.WebScraper = void 0;
|
|
4
4
|
const log_1 = require("ag-common/dist/common/helpers/log");
|
|
5
5
|
const dom_1 = require("./helpers/dom");
|
|
6
|
+
const SECURITY_MARKERS = [
|
|
7
|
+
{
|
|
8
|
+
provider: 'cloudflare',
|
|
9
|
+
reason: 'Cloudflare challenge',
|
|
10
|
+
patterns: [
|
|
11
|
+
/cdn-cgi\/challenge-platform/i,
|
|
12
|
+
/__cf_chl_/i,
|
|
13
|
+
/cloudflare/i,
|
|
14
|
+
/turnstile/i,
|
|
15
|
+
/just a moment/i,
|
|
16
|
+
],
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
provider: 'akamai',
|
|
20
|
+
reason: 'Akamai bot challenge',
|
|
21
|
+
patterns: [/akamai/i, /abck/i, /bm_sz/i],
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
provider: 'datadome',
|
|
25
|
+
reason: 'DataDome challenge',
|
|
26
|
+
patterns: [/datadome/i],
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
provider: 'perimeterx',
|
|
30
|
+
reason: 'PerimeterX challenge',
|
|
31
|
+
patterns: [/perimeterx/i, /px-captcha/i, /_px3/i],
|
|
32
|
+
},
|
|
33
|
+
];
|
|
34
|
+
function createChallengeSnippet(html, index) {
|
|
35
|
+
const start = Math.max(0, index - 80);
|
|
36
|
+
const end = Math.min(html.length, index + 200);
|
|
37
|
+
return html.slice(start, end).replace(/\s+/g, ' ').trim();
|
|
38
|
+
}
|
|
39
|
+
function detectSecurityBlock(params) {
|
|
40
|
+
const { html, status, error } = params;
|
|
41
|
+
const text = `${html}\n${error ?? ''}`;
|
|
42
|
+
for (const marker of SECURITY_MARKERS) {
|
|
43
|
+
for (const pattern of marker.patterns) {
|
|
44
|
+
const match = pattern.exec(text);
|
|
45
|
+
if (!match?.index && match?.index !== 0) {
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
return {
|
|
49
|
+
blockedBySecurity: true,
|
|
50
|
+
blockProvider: marker.provider,
|
|
51
|
+
blockReason: marker.reason,
|
|
52
|
+
challengeSnippet: createChallengeSnippet(text, match.index),
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
if (status === 403 || status === 429) {
|
|
57
|
+
return {
|
|
58
|
+
blockedBySecurity: true,
|
|
59
|
+
blockProvider: 'unknown',
|
|
60
|
+
blockReason: `HTTP ${status} suspected anti-bot block`,
|
|
61
|
+
challengeSnippet: html
|
|
62
|
+
? createChallengeSnippet(html, 0)
|
|
63
|
+
: error?.slice(0, 240),
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
return { blockedBySecurity: false };
|
|
67
|
+
}
|
|
68
|
+
function inferContentType(params) {
|
|
69
|
+
const { html, error, blockedBySecurity } = params;
|
|
70
|
+
if (blockedBySecurity) {
|
|
71
|
+
return 'challenge';
|
|
72
|
+
}
|
|
73
|
+
if (!html.trim()) {
|
|
74
|
+
return error ? 'error' : 'empty';
|
|
75
|
+
}
|
|
76
|
+
return 'target';
|
|
77
|
+
}
|
|
78
|
+
function withSecurityMetadata(base) {
|
|
79
|
+
const detection = detectSecurityBlock({
|
|
80
|
+
html: base.html,
|
|
81
|
+
status: base.status,
|
|
82
|
+
error: base.error,
|
|
83
|
+
});
|
|
84
|
+
return {
|
|
85
|
+
...base,
|
|
86
|
+
...detection,
|
|
87
|
+
contentType: inferContentType({
|
|
88
|
+
html: base.html,
|
|
89
|
+
error: base.error,
|
|
90
|
+
blockedBySecurity: detection.blockedBySecurity,
|
|
91
|
+
}),
|
|
92
|
+
};
|
|
93
|
+
}
|
|
6
94
|
class WebScraper {
|
|
7
95
|
constructor(options = {}) {
|
|
8
96
|
this.userAgent =
|
|
@@ -34,7 +122,7 @@ class WebScraper {
|
|
|
34
122
|
});
|
|
35
123
|
clearTimeout(timeoutId);
|
|
36
124
|
const html = await response.text();
|
|
37
|
-
return {
|
|
125
|
+
return withSecurityMetadata({
|
|
38
126
|
url,
|
|
39
127
|
html,
|
|
40
128
|
status: response.status,
|
|
@@ -42,7 +130,7 @@ class WebScraper {
|
|
|
42
130
|
method: 'fetch',
|
|
43
131
|
redirected: response.redirected,
|
|
44
132
|
finalUrl: response.url,
|
|
45
|
-
};
|
|
133
|
+
});
|
|
46
134
|
}
|
|
47
135
|
catch (error) {
|
|
48
136
|
clearTimeout(timeoutId);
|
|
@@ -65,14 +153,14 @@ class WebScraper {
|
|
|
65
153
|
finalUrl = pageResult.url;
|
|
66
154
|
error =
|
|
67
155
|
status === 200 ? undefined : `HTTP ${status}: ${pageResult.statusText}`;
|
|
68
|
-
return {
|
|
156
|
+
return withSecurityMetadata({
|
|
69
157
|
url,
|
|
70
158
|
html,
|
|
71
159
|
status,
|
|
72
160
|
method: 'visual',
|
|
73
161
|
error,
|
|
74
162
|
finalUrl,
|
|
75
|
-
};
|
|
163
|
+
});
|
|
76
164
|
}
|
|
77
165
|
catch (err) {
|
|
78
166
|
const errorMessage = err instanceof Error ? err.message : 'Unknown error';
|
|
@@ -93,14 +181,14 @@ class WebScraper {
|
|
|
93
181
|
else {
|
|
94
182
|
status = 0;
|
|
95
183
|
}
|
|
96
|
-
return {
|
|
184
|
+
return withSecurityMetadata({
|
|
97
185
|
url,
|
|
98
186
|
html: '',
|
|
99
187
|
status,
|
|
100
188
|
method: 'visual',
|
|
101
189
|
error: errorMessage || 'err',
|
|
102
190
|
finalUrl,
|
|
103
|
-
};
|
|
191
|
+
});
|
|
104
192
|
}
|
|
105
193
|
}
|
|
106
194
|
async scrape(url, options = {}) {
|
|
@@ -108,11 +196,13 @@ class WebScraper {
|
|
|
108
196
|
let lastError = null;
|
|
109
197
|
try {
|
|
110
198
|
const result = await this.fetchDirectly(url, mergedOptions);
|
|
111
|
-
if (result.status >= 200 &&
|
|
199
|
+
if (result.status >= 200 &&
|
|
200
|
+
result.status < 300 &&
|
|
201
|
+
!result.blockedBySecurity) {
|
|
112
202
|
(0, log_1.info)('fetch: OK', url);
|
|
113
203
|
return result;
|
|
114
204
|
}
|
|
115
|
-
if (result.status === 404) {
|
|
205
|
+
if (result.status === 404 && !result.blockedBySecurity) {
|
|
116
206
|
(0, log_1.info)(`fetch:${result.status}. skip:`, url);
|
|
117
207
|
return result;
|
|
118
208
|
}
|
|
@@ -131,13 +221,13 @@ class WebScraper {
|
|
|
131
221
|
const puppeteerError = error instanceof Error ? error : new Error('Unknown puppeteer error');
|
|
132
222
|
const m = `Both methods failed. Fetch: ${lastError?.message || 'Unknown'}. puppeteer: ${puppeteerError.message}. err=${error.message}`;
|
|
133
223
|
(0, log_1.warn)(m);
|
|
134
|
-
return {
|
|
224
|
+
return withSecurityMetadata({
|
|
135
225
|
url,
|
|
136
226
|
html: '',
|
|
137
227
|
status: 0,
|
|
138
228
|
method: 'visual',
|
|
139
229
|
error: m,
|
|
140
|
-
};
|
|
230
|
+
});
|
|
141
231
|
}
|
|
142
232
|
}
|
|
143
233
|
async scrapeMultiple(urls, options = {}) {
|
|
@@ -148,13 +238,13 @@ class WebScraper {
|
|
|
148
238
|
results.push(result);
|
|
149
239
|
}
|
|
150
240
|
catch (error) {
|
|
151
|
-
results.push({
|
|
241
|
+
results.push(withSecurityMetadata({
|
|
152
242
|
url,
|
|
153
243
|
html: '',
|
|
154
244
|
status: 0,
|
|
155
245
|
method: 'fetch',
|
|
156
246
|
error: error instanceof Error ? error.message : 'Unknown error',
|
|
157
|
-
});
|
|
247
|
+
}));
|
|
158
248
|
}
|
|
159
249
|
}
|
|
160
250
|
return results;
|
package/dist/WebScraper.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"WebScraper.js","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":";;;AACA,2DAAsE;AAEtE,uCAAwE;
|
|
1
|
+
{"version":3,"file":"WebScraper.js","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":";;;AACA,2DAAsE;AAEtE,uCAAwE;AAgDxE,MAAM,gBAAgB,GAIjB;IACH;QACE,QAAQ,EAAE,YAAY;QACtB,MAAM,EAAE,sBAAsB;QAC9B,QAAQ,EAAE;YACR,8BAA8B;YAC9B,YAAY;YACZ,aAAa;YACb,YAAY;YACZ,gBAAgB;SACjB;KACF;IACD;QACE,QAAQ,EAAE,QAAQ;QAClB,MAAM,EAAE,sBAAsB;QAC9B,QAAQ,EAAE,CAAC,SAAS,EAAE,OAAO,EAAE,QAAQ,CAAC;KACzC;IACD;QACE,QAAQ,EAAE,UAAU;QACpB,MAAM,EAAE,oBAAoB;QAC5B,QAAQ,EAAE,CAAC,WAAW,CAAC;KACxB;IACD;QACE,QAAQ,EAAE,YAAY;QACtB,MAAM,EAAE,sBAAsB;QAC9B,QAAQ,EAAE,CAAC,aAAa,EAAE,aAAa,EAAE,OAAO,CAAC;KAClD;CACF,CAAC;AAEF,SAAS,sBAAsB,CAAC,IAAY,EAAE,KAAa;IACzD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,EAAE,CAAC,CAAC;IACtC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,GAAG,CAAC,CAAC;IAC/C,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;AAC5D,CAAC;AAED,SAAS,mBAAmB,CAAC,MAI5B;IACC,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,CAAC;IACvC,MAAM,IAAI,GAAG,GAAG,IAAI,KAAK,KAAK,IAAI,EAAE,EAAE,CAAC;IAEvC,KAAK,MAAM,MAAM,IAAI,gBAAgB,EAAE,CAAC;QACtC,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YACtC,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACjC,IAAI,CAAC,KAAK,EAAE,KAAK,IAAI,KAAK,EAAE,KAAK,KAAK,CAAC,EAAE,CAAC;gBACxC,SAAS;YACX,CAAC;YAED,OAAO;gBACL,iBAAiB,EAAE,IAAI;gBACvB,aAAa,EAAE,MAAM,CAAC,QAAQ;gBAC9B,WAAW,EAAE,MAAM,CAAC,MAAM;gBAC1B,gBAAgB,EAAE,sBAAsB,CAAC,IAAI,EAAE,KAAK,CAAC,KAAK,CAAC;aAC5D,CAAC;QACJ,CAAC;IACH,CAAC;IAED,IAAI,MAAM,KAAK,GAAG,IAAI,MAAM,KAAK,GAAG,EAAE,CAAC;QACrC,OAAO;YACL,iBAAiB,EAAE,IAAI;YACvB,aAAa,EAAE,SAAS;YACxB,WAAW,EAAE,QAAQ,MAAM,2BAA2B;YACtD,gBAAgB,EAAE,IAAI;gBACpB,CAAC,CAAC,sBAAsB,CAAC,IAAI,EAAE,CAAC,CAAC;gBACjC,CAAC,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;SACzB,CAAC;IACJ,CAAC;IAED,OAAO,EAAE,iBAAiB,EAAE,KAAK,EAAE,CAAC;AACtC,CAAC;AAED,SAAS,gBAAgB,CAAC,MAIzB;IACC,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,iBAAiB,EAAE,GAAG,MAAM,CAAC;IAElD,IAAI,iBAAiB,EAAE,CAAC;QACtB,OAAO,WAAW,CAAC;IACrB,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;QACjB,OAAO,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC;IACnC,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,oBAAoB,CAAC,IAQ7B;IACC,MAAM,SAAS,GAAG,mBAAmB,CAAC;QACpC,IAAI,EAAE,IAAI,CAAC,IAAI;QACf,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,KAAK,EAAE,IAAI,CAAC,KAAK;KAClB,CAAC,CAAC;IAEH,OAAO;QACL,GAAG,IAAI;QACP,GAAG,SAAS;QACZ,WAAW,EAAE,gBAAgB,CAAC;YAC5B,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,iBAAiB,EAAE,SAAS,CAAC,iBAAiB;SAC/C,CAAC;KACH,CAAC;AACJ,CAAC;AAED,MAAa,UAAU;IAIrB,YAAY,UAA2B,EAAE;QACvC,IAAI,CAAC,SAAS;YACZ,iHAAiH,CAAC;QACpH,IAAI,CAAC,cAAc,GAAG;YACpB,OAAO,EAAE,KAAK;YACd,OAAO,EAAE,CAAC;YACV,cAAc,EAAE,IAAI;YACpB,GAAG,OAAO;SACX,CAAC;IACJ,CAAC;IAKO,KAAK,CAAC,aAAa,CACzB,GAAW,EACX,OAAwB;QAExB,MAAM,OAAO,GAAG;YACd,YAAY,EAAE,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE;YAC5D,MAAM,EACJ,4EAA4E;YAC9E,iBAAiB,EAAE,gBAAgB;YACnC,iBAAiB,EAAE,eAAe;YAClC,UAAU,EAAE,YAAY;YACxB,2BAA2B,EAAE,GAAG;YAChC,GAAG,OAAO,CAAC,OAAO;SACnB,CAAC;QAEF,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;QACzC,MAAM,SAAS,GAAG,UAAU,CAC1B,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EACxB,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC,OAAQ,CAChD,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,OAAO;gBACP,MAAM,EAAE,UAAU,CAAC,MAAM;gBACzB,QAAQ,EAAE,QAAQ;aACnB,CAAC,CAAC;YAEH,YAAY,CAAC,SAAS,CAAC,CAAC;YAExB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAEnC,OAAO,oBAAoB,CAAC;gBAC1B,GAAG;gBACH,IAAI;gBACJ,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,KAAK,EAAE,QAAQ,CAAC,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,EAAE;gBACvE,MAAM,EAAE,OAAO;gBACf,UAAU,EAAE,QAAQ,CAAC,UAAU;gBAC/B,QAAQ,EAAE,QAAQ,CAAC,GAAG;aACvB,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,YAAY,CAAC,SAAS,CAAC,CAAC;YACxB,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAKO,KAAK,CAAC,mBAAmB,CAC/B,GAAW,EACX,OAAwB;QAExB,IAAI,IAAI,GAAG,EAAE,CAAC;QACd,IAAI,MAAM,GAAG,GAAG,CAAC;QACjB,IAAI,KAAyB,CAAC;QAC9B,IAAI,QAAQ,GAAG,GAAG,CAAC;QAEnB,IAAI,CAAC;YAEH,MAAM,UAAU,GAAe,MAAM,IAAA,cAAQ,EAAC,GAAG,EAAE;gBACjD,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO;gBACvD,iBAAiB,EAAE,OAAO,CAAC,eAAe;gBAC1C,cAAc,EAAE,OAAO,CAAC,cAAc;aACvC,CAAC,CAAC;YAGH,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC;YACjC,MAAM,GAAG,UAAU,CAAC,MAAM,CAAC;YAC3B,QAAQ,GAAG,UAAU,CAAC,GAAG,CAAC;YAE1B,KAAK;gBACH,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,MAAM,KAAK,UAAU,CAAC,UAAU,EAAE,CAAC;YAE1E,OAAO,oBAAoB,CAAC;gBAC1B,GAAG;gBACH,IAAI;gBACJ,MAAM;gBACN,MAAM,EAAE,QAAQ;gBAChB,KAAK;gBACL,QAAQ;aACT,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,YAAY,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;YAG1E,IAAI,YAAY,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBACrC,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;iBAAM,IACL,YAAY,CAAC,QAAQ,CAAC,KAAK,CAAC;gBAC5B,YAAY,CAAC,QAAQ,CAAC,WAAW,CAAC,EAClC,CAAC;gBACD,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;iBAAM,IACL,YAAY,CAAC,QAAQ,CAAC,KAAK,CAAC;gBAC5B,YAAY,CAAC,QAAQ,CAAC,WAAW,CAAC,EAClC,CAAC;gBACD,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;iBAAM,IAAI,YAAY,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBACxC,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;iBAAM,CAAC;gBACN,MAAM,GAAG,CAAC,CAAC;YACb,CAAC;YAED,OAAO,oBAAoB,CAAC;gBAC1B,GAAG;gBACH,IAAI,EAAE,EAAE;gBACR,MAAM;gBACN,MAAM,EAAE,QAAQ;gBAChB,KAAK,EAAE,YAAY,IAAI,KAAK;gBAC5B,QAAQ;aACT,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAKD,KAAK,CAAC,MAAM,CACV,GAAW,EACX,UAA2B,EAAE;QAE7B,MAAM,aAAa,GAAG,EAAE,GAAG,IAAI,CAAC,cAAc,EAAE,GAAG,OAAO,EAAE,CAAC;QAC7D,IAAI,SAAS,GAAiB,IAAI,CAAC;QAGnC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YAG5D,IACE,MAAM,CAAC,MAAM,IAAI,GAAG;gBACpB,MAAM,CAAC,MAAM,GAAG,GAAG;gBACnB,CAAC,MAAM,CAAC,iBAAiB,EACzB,CAAC;gBACD,IAAA,UAAI,EAAC,WAAW,EAAE,GAAG,CAAC,CAAC;gBACvB,OAAO,MAAM,CAAC;YAChB,CAAC;YAGD,IAAI,MAAM,CAAC,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,iBAAiB,EAAE,CAAC;gBACvD,IAAA,UAAI,EAAC,SAAS,MAAM,CAAC,MAAM,SAAS,EAAE,GAAG,CAAC,CAAC;gBAC3C,OAAO,MAAM,CAAC;YAChB,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,SAAS;gBACP,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;YACpE,IAAA,UAAI,EACF,qDAAqD,GAAG,KAAK,SAAS,CAAC,OAAO,8BAA8B,CAC7G,CAAC;QACJ,CAAC;QAGD,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,mBAAmB,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YACnE,IAAA,WAAK,EACH,mCAAmC,GAAG,GAAG,EACzC,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CACjC,CAAC;YACF,OAAO,OAAO,CAAC;QACjB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,cAAc,GAClB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;YAExE,MAAM,CAAC,GAAG,+BAA+B,SAAS,EAAE,OAAO,IAAI,SAAS,gBAAgB,cAAc,CAAC,OAAO,SAAU,KAAe,CAAC,OAAO,EAAE,CAAC;YAClJ,IAAA,UAAI,EAAC,CAAC,CAAC,CAAC;YACR,OAAO,oBAAoB,CAAC;gBAC1B,GAAG;gBACH,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,CAAC;gBACT,MAAM,EAAE,QAAQ;gBAChB,KAAK,EAAE,CAAC;aACT,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAKD,KAAK,CAAC,cAAc,CAClB,IAAc,EACd,UAA2B,EAAE;QAE7B,MAAM,OAAO,GAAqB,EAAE,CAAC;QAErC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAC/C,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACvB,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,IAAI,CACV,oBAAoB,CAAC;oBACnB,GAAG;oBACH,IAAI,EAAE,EAAE;oBACR,MAAM,EAAE,CAAC;oBACT,MAAM,EAAE,OAAO;oBACf,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;iBAChE,CAAC,CACH,CAAC;YACJ,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAKD,KAAK,CAAC,OAAO;QACX,MAAM,IAAA,kBAAY,GAAE,CAAC;IACvB,CAAC;CACF;AAtOD,gCAsOC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"dom.d.ts","sourceRoot":"","sources":["../../src/helpers/dom.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAOpD,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,WAAW,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACjC;
|
|
1
|
+
{"version":3,"file":"dom.d.ts","sourceRoot":"","sources":["../../src/helpers/dom.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAOpD,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,WAAW,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACjC;AAgFD,eAAO,MAAM,aAAa,GAAU,iBAAiB,MAAM,kBAuC1D,CAAC;AAEF,eAAO,MAAM,YAAY,qBAUxB,CAAC;AAEF,eAAO,MAAM,QAAQ,GACnB,KAAK,MAAM,GAAG,GAAG,EACjB,MAAM;IAEJ,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB,KACA,OAAO,CAAC,UAAU,CAuGpB,CAAC"}
|
package/dist/helpers/dom.js
CHANGED
|
@@ -10,23 +10,74 @@ const fs_1 = require("fs");
|
|
|
10
10
|
const node_html_parser_1 = require("node-html-parser");
|
|
11
11
|
const puppeteer_core_1 = require("puppeteer-core");
|
|
12
12
|
let browser;
|
|
13
|
+
const ENV_EXECUTABLE_PATH_KEYS = [
|
|
14
|
+
'CHROME_EXECUTABLE_PATH',
|
|
15
|
+
'PUPPETEER_EXECUTABLE_PATH',
|
|
16
|
+
'BROWSER_EXECUTABLE_PATH',
|
|
17
|
+
];
|
|
18
|
+
function getPlatformChromePaths() {
|
|
19
|
+
switch (process.platform) {
|
|
20
|
+
case 'win32':
|
|
21
|
+
return [
|
|
22
|
+
'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe',
|
|
23
|
+
'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',
|
|
24
|
+
'C:\\Program Files\\Microsoft\\Edge\\Application\\msedge.exe',
|
|
25
|
+
'C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe',
|
|
26
|
+
];
|
|
27
|
+
case 'darwin':
|
|
28
|
+
return [
|
|
29
|
+
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
|
30
|
+
'/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge',
|
|
31
|
+
'/Applications/Chromium.app/Contents/MacOS/Chromium',
|
|
32
|
+
];
|
|
33
|
+
case 'linux':
|
|
34
|
+
return [
|
|
35
|
+
'/usr/bin/google-chrome',
|
|
36
|
+
'/usr/bin/google-chrome-stable',
|
|
37
|
+
'/usr/bin/chromium',
|
|
38
|
+
'/usr/bin/chromium-browser',
|
|
39
|
+
'/snap/bin/chromium',
|
|
40
|
+
];
|
|
41
|
+
case 'aix':
|
|
42
|
+
case 'android':
|
|
43
|
+
case 'freebsd':
|
|
44
|
+
case 'haiku':
|
|
45
|
+
case 'openbsd':
|
|
46
|
+
case 'sunos':
|
|
47
|
+
case 'cygwin':
|
|
48
|
+
case 'netbsd':
|
|
49
|
+
return [];
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
const pathExists = (path) => {
|
|
53
|
+
try {
|
|
54
|
+
(0, fs_1.accessSync)(path);
|
|
55
|
+
return true;
|
|
56
|
+
}
|
|
57
|
+
catch {
|
|
58
|
+
return false;
|
|
59
|
+
}
|
|
60
|
+
};
|
|
13
61
|
const getSystemChromePath = async () => {
|
|
14
|
-
const
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
try {
|
|
23
|
-
(0, fs_1.accessSync)(path);
|
|
24
|
-
return path;
|
|
62
|
+
for (const key of ENV_EXECUTABLE_PATH_KEYS) {
|
|
63
|
+
const value = process.env[key];
|
|
64
|
+
if (!value) {
|
|
65
|
+
continue;
|
|
66
|
+
}
|
|
67
|
+
if (pathExists(value)) {
|
|
68
|
+
(0, log_1.info)(`using browser executable from ${key}: ${value}`);
|
|
69
|
+
return value;
|
|
25
70
|
}
|
|
26
|
-
|
|
71
|
+
(0, log_1.info)(`browser executable from ${key} not found: ${value}`);
|
|
72
|
+
}
|
|
73
|
+
for (const path of getPlatformChromePaths()) {
|
|
74
|
+
if (pathExists(path)) {
|
|
75
|
+
(0, log_1.info)(`using detected browser executable: ${path}`);
|
|
76
|
+
return path;
|
|
27
77
|
}
|
|
28
78
|
}
|
|
29
79
|
const ret = await chromium_1.default.executablePath();
|
|
80
|
+
(0, log_1.info)(`using sparticuz chromium executable: ${ret}`);
|
|
30
81
|
return ret;
|
|
31
82
|
};
|
|
32
83
|
const launchBrowser = async (executablePath) => {
|
|
@@ -117,7 +168,7 @@ const goToPage = async (url, opt) => {
|
|
|
117
168
|
}
|
|
118
169
|
const content = await page.content();
|
|
119
170
|
const doc = (0, node_html_parser_1.parse)(content);
|
|
120
|
-
doc.querySelectorAll('.visually-hidden')
|
|
171
|
+
doc.querySelectorAll('.visually-hidden').forEach((n) => n.remove());
|
|
121
172
|
await page.close();
|
|
122
173
|
const result = {
|
|
123
174
|
html: doc,
|
package/dist/helpers/dom.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"dom.js","sourceRoot":"","sources":["../../src/helpers/dom.ts"],"names":[],"mappings":";;;;;;AACA,mEAA2C;AAC3C,2DAA8E;AAC9E,2BAAgC;AAEhC,uDAAyC;AAEzC,mDAAwC;AAExC,IAAI,OAA4B,CAAC;AAUjC,MAAM,
|
|
1
|
+
{"version":3,"file":"dom.js","sourceRoot":"","sources":["../../src/helpers/dom.ts"],"names":[],"mappings":";;;;;;AACA,mEAA2C;AAC3C,2DAA8E;AAC9E,2BAAgC;AAEhC,uDAAyC;AAEzC,mDAAwC;AAExC,IAAI,OAA4B,CAAC;AAUjC,MAAM,wBAAwB,GAAG;IAC/B,wBAAwB;IACxB,2BAA2B;IAC3B,yBAAyB;CACjB,CAAC;AAEX,SAAS,sBAAsB;IAC7B,QAAQ,OAAO,CAAC,QAAQ,EAAE,CAAC;QACzB,KAAK,OAAO;YACV,OAAO;gBACL,4DAA4D;gBAC5D,kEAAkE;gBAClE,6DAA6D;gBAC7D,mEAAmE;aACpE,CAAC;QACJ,KAAK,QAAQ;YACX,OAAO;gBACL,8DAA8D;gBAC9D,gEAAgE;gBAChE,oDAAoD;aACrD,CAAC;QACJ,KAAK,OAAO;YACV,OAAO;gBACL,wBAAwB;gBACxB,+BAA+B;gBAC/B,mBAAmB;gBACnB,2BAA2B;gBAC3B,oBAAoB;aACrB,CAAC;QACJ,KAAK,KAAK,CAAC;QACX,KAAK,SAAS,CAAC;QACf,KAAK,SAAS,CAAC;QACf,KAAK,OAAO,CAAC;QACb,KAAK,SAAS,CAAC;QACf,KAAK,OAAO,CAAC;QACb,KAAK,QAAQ,CAAC;QACd,KAAK,QAAQ;YACX,OAAO,EAAE,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,UAAU,GAAG,CAAC,IAAY,EAAW,EAAE;IAC3C,IAAI,CAAC;QACH,IAAA,eAAU,EAAC,IAAI,CAAC,CAAC;QACjB,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC,CAAC;AAEF,MAAM,mBAAmB,GAAG,KAAK,IAAI,EAAE;IACrC,KAAK,MAAM,GAAG,IAAI,wBAAwB,EAAE,CAAC;QAC3C,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAC/B,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,SAAS;QACX,CAAC;QAED,IAAI,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;YACtB,IAAA,UAAI,EAAC,iCAAiC,GAAG,KAAK,KAAK,EAAE,CAAC,CAAC;YACvD,OAAO,KAAK,CAAC;QACf,CAAC;QAED,IAAA,UAAI,EAAC,2BAA2B,GAAG,eAAe,KAAK,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,sBAAsB,EAAE,EAAE,CAAC;QAC5C,IAAI,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YACrB,IAAA,UAAI,EAAC,sCAAsC,IAAI,EAAE,CAAC,CAAC;YACnD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAGD,MAAM,GAAG,GAAG,MAAM,kBAAQ,CAAC,cAAc,EAAE,CAAC;IAC5C,IAAA,UAAI,EAAC,wCAAwC,GAAG,EAAE,CAAC,CAAC;IACpD,OAAO,GAAG,CAAC;AACb,CAAC,CAAC;AAEK,MAAM,aAAa,GAAG,KAAK,EAAE,cAAuB,EAAE,EAAE;IAC7D,MAAM,qBAAqB,GAAG,cAAc,IAAI,CAAC,MAAM,mBAAmB,EAAE,CAAC,CAAC;IAC9E,MAAM,GAAG,GAAG;QACV,eAAe,EAAE;YACf,MAAM,EAAE,IAAI;YACZ,KAAK,EAAE,IAAI;SACZ;QACD,QAAQ,EAAE,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI;QACzD,iBAAiB,EAAE,IAAI;QACvB,QAAQ,EAAE,KAAK;QACf,cAAc,EAAE,qBAAqB;QACrC,IAAI,EAAE;YACJ,cAAc;YACd,0BAA0B;YAC1B,yBAAyB;YACzB,iCAAiC;YACjC,gBAAgB;YAChB,aAAa;YACb,kBAAkB;YAClB,eAAe;YACf,uCAAuC;YACvC,kCAAkC;YAClC,0CAA0C;YAC1C,mCAAmC;YACnC,4BAA4B;YAC5B,0BAA0B;YAC1B,sBAAsB;SACvB;KACF,CAAC;IAEF,IAAA,WAAK,EAAC,sBAAsB,EAAE,GAAG,CAAC,CAAC;IACnC,IAAI,CAAC;QACH,IAAI,OAAO,EAAE,KAAK,EAAE,CAAC;YACnB,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;QACxB,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;IAET,CAAC;IACD,OAAO,GAAG,CAAC,MAAM,IAAA,uBAAM,EAAC,GAAG,CAAC,CAAuB,CAAC;AACtD,CAAC,CAAC;AAvCW,QAAA,aAAa,iBAuCxB;AAEK,MAAM,YAAY,GAAG,KAAK,IAAI,EAAE;IACrC,IAAI,CAAC;QACH,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,OAAO;QACT,CAAC;QAED,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;IACxB,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,IAAA,UAAI,EAAC,wBAAwB,EAAE,CAAC,CAAC,CAAC;IACpC,CAAC;AACH,CAAC,CAAC;AAVW,QAAA,YAAY,gBAUvB;AAEK,MAAM,QAAQ,GAAG,KAAK,EAC3B,GAAiB,EACjB,GAKC,EACoB,EAAE;IACvB,IAAI,UAAU,GAAG,KAAK,CAAC;IACvB,GAAG,CAAC;QACF,IAAI,CAAC;YACH,IAAI,CAAC,OAAO,EAAE,CAAC;gBACb,MAAM,IAAA,qBAAa,EAAC,GAAG,EAAE,cAAc,CAAC,CAAC;YAC3C,CAAC;YAED,IAAA,WAAK,EAAC,aAAa,GAAG,GAAG,CAAC,CAAC;YAC3B,MAAM,IAAI,GAAG,MAAM,OAAQ,CAAC,OAAO,EAAE,CAAC;YACtC,IAAI,CAAC,GAAG,GAAG,EAAE,OAAO,IAAI,IAAI,CAAC;YAC7B,IAAI,UAAU,EAAE,CAAC;gBACf,CAAC,IAAI,IAAI,CAAC;YACZ,CAAC;YAED,MAAM,IAAI,GAAG,OAAO,GAAG,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;YAC5D,IAAI,QAAQ,CAAC;YAEb,IAAI,CAAC,GAAG,EAAE,iBAAiB,EAAE,CAAC;gBAE5B,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;oBAC/B,SAAS,EAAE,CAAC,MAAM,EAAE,kBAAkB,CAAC;oBACvC,OAAO,EAAE,CAAC;iBACX,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;oBAC/B,SAAS,EAAE,CAAC,MAAM,CAAC;oBACnB,OAAO,EAAE,CAAC;iBACX,CAAC,CAAC;gBACH,MAAM,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,iBAAiB,EAAE;oBAChD,OAAO,EAAE,CAAC;oBACV,OAAO,EAAE,IAAI;iBACd,CAAC,CAAC;YACL,CAAC;YAGD,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;YAC/D,CAAC;YAED,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;YACrC,MAAM,GAAG,GAAG,IAAA,wBAAK,EAAC,OAAO,CAAC,CAAC;YAG3B,GAAG,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;YACpE,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;YAGnB,MAAM,MAAM,GAAe;gBACzB,IAAI,EAAE,GAAG;gBACT,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE;gBACzB,UAAU,EAAE,QAAQ,CAAC,UAAU,EAAE;gBACjC,GAAG,EAAE,QAAQ,CAAC,GAAG,EAAE;gBACnB,OAAO,EAAE,QAAQ,CAAC,OAAO,EAAE;aAC5B,CAAC;YAEF,UAAU,GAAG,KAAK,CAAC;YACnB,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,CAAC,GAAG,GAAY,CAAC;YACvB,IAAI,UAAU,EAAE,CAAC;gBACf,IAAA,WAAK,EAAC,qBAAqB,EAAE,GAAG,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC;gBAChD,MAAM,CAAC,CAAC;YACV,CAAC;YAGD,IACE,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,kBAAkB,CAAC;gBACzC,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC;gBACnC,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,oBAAoB,CAAC;gBAC3C,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,+BAA+B,CAAC;gBACtD,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,gBAAgB,CAAC;gBACvC,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,eAAe,CAAC;gBACtC,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC;gBAChC,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC;gBAC9B,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC;gBAC/B,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAC/B,CAAC;gBACD,IAAI,CAAC;oBACH,IAAA,WAAK,EAAC,QAAQ,EAAE,GAAG,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC;oBAGnC,IACE,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC;wBAChC,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,EAC9B,CAAC;wBACD,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;oBAC5D,CAAC;oBAED,MAAM,IAAA,qBAAa,EAAC,GAAG,EAAE,cAAc,CAAC,CAAC;oBACzC,UAAU,GAAG,IAAI,CAAC;gBACpB,CAAC;gBAAC,OAAO,EAAE,EAAE,CAAC;oBACZ,IAAA,WAAK,EAAC,4BAA4B,EAAE,EAAE,CAAC,CAAC;oBACxC,MAAM,EAAE,CAAC;gBACX,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,IAAA,WAAK,EAAC,gBAAgB,CAAC,EAAE,CAAC,CAAC;gBAC3B,MAAM,CAAC,CAAC;YACV,CAAC;QACH,CAAC;IAEH,CAAC,QAAQ,UAAU,EAAE;IACrB,MAAM,IAAI,KAAK,CAAC,iBAAiB,CAAC,CAAC;AACrC,CAAC,CAAC;AA/GW,QAAA,QAAQ,YA+GnB"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ag-webscrape",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.17",
|
|
4
4
|
"author": "admin@gec.dev",
|
|
5
5
|
"description": "TypeScript web scraper with Playwright fallback for anti-scraping protection",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -14,14 +14,14 @@
|
|
|
14
14
|
],
|
|
15
15
|
"license": "MIT",
|
|
16
16
|
"dependencies": {
|
|
17
|
-
"@sparticuz/chromium": "^
|
|
18
|
-
"ag-common": "^0.0.
|
|
17
|
+
"@sparticuz/chromium": "^143.0.0",
|
|
18
|
+
"ag-common": "^0.0.874",
|
|
19
19
|
"node-html-parser": "^7.0.1",
|
|
20
20
|
"puppeteer": "^24.15.0",
|
|
21
21
|
"puppeteer-core": "^24.15.0"
|
|
22
22
|
},
|
|
23
23
|
"devDependencies": {
|
|
24
|
-
"@types/node": "^
|
|
24
|
+
"@types/node": "^25.0.0",
|
|
25
25
|
"eslint": "^9.32.0",
|
|
26
26
|
"eslint-config-e7npm": "^0.1.23",
|
|
27
27
|
"tsx": "^4.20.3",
|