ag-webscrape 0.0.16 → 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/WebScraper.d.ts +7 -0
- package/dist/WebScraper.d.ts.map +1 -1
- package/dist/WebScraper.js +102 -12
- package/dist/WebScraper.js.map +1 -1
- package/package.json +2 -2
package/dist/WebScraper.d.ts
CHANGED
|
@@ -7,6 +7,8 @@ export interface ScrapingOptions {
|
|
|
7
7
|
waitForTimeout?: number;
|
|
8
8
|
executablePath?: string;
|
|
9
9
|
}
|
|
10
|
+
export type SecurityBlockProvider = 'cloudflare' | 'akamai' | 'datadome' | 'perimeterx' | 'unknown';
|
|
11
|
+
export type ScrapedContentType = 'target' | 'challenge' | 'empty' | 'error' | 'unknown';
|
|
10
12
|
export interface ScrapingResult {
|
|
11
13
|
url: string;
|
|
12
14
|
html: string;
|
|
@@ -15,6 +17,11 @@ export interface ScrapingResult {
|
|
|
15
17
|
error?: string;
|
|
16
18
|
redirected?: boolean;
|
|
17
19
|
finalUrl?: string;
|
|
20
|
+
contentType: ScrapedContentType;
|
|
21
|
+
blockedBySecurity: boolean;
|
|
22
|
+
blockProvider?: SecurityBlockProvider;
|
|
23
|
+
blockReason?: string;
|
|
24
|
+
challengeSnippet?: string;
|
|
18
25
|
}
|
|
19
26
|
export declare class WebScraper {
|
|
20
27
|
private userAgent;
|
package/dist/WebScraper.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"WebScraper.d.ts","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":"AAKA,MAAM,WAAW,eAAe;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,GAAG,QAAQ,CAAC;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"WebScraper.d.ts","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":"AAKA,MAAM,WAAW,eAAe;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,MAAM,qBAAqB,GAC7B,YAAY,GACZ,QAAQ,GACR,UAAU,GACV,YAAY,GACZ,SAAS,CAAC;AAEd,MAAM,MAAM,kBAAkB,GAC1B,QAAQ,GACR,WAAW,GACX,OAAO,GACP,OAAO,GACP,SAAS,CAAC;AAEd,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,GAAG,QAAQ,CAAC;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,kBAAkB,CAAC;IAChC,iBAAiB,EAAE,OAAO,CAAC;IAC3B,aAAa,CAAC,EAAE,qBAAqB,CAAC;IACtC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAkID,qBAAa,UAAU;IACrB,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,cAAc,CAAkB;gBAE5B,OAAO,GAAE,eAAoB;YAc3B,aAAa;YAkDb,mBAAmB;IAqE3B,MAAM,CACV,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,CAAC;IA0DpB,cAAc,CAClB,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,EAAE,CAAC;IA0BtB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B"}
|
package/dist/WebScraper.js
CHANGED
|
@@ -3,6 +3,94 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.WebScraper = void 0;
|
|
4
4
|
const log_1 = require("ag-common/dist/common/helpers/log");
|
|
5
5
|
const dom_1 = require("./helpers/dom");
|
|
6
|
+
const SECURITY_MARKERS = [
|
|
7
|
+
{
|
|
8
|
+
provider: 'cloudflare',
|
|
9
|
+
reason: 'Cloudflare challenge',
|
|
10
|
+
patterns: [
|
|
11
|
+
/cdn-cgi\/challenge-platform/i,
|
|
12
|
+
/__cf_chl_/i,
|
|
13
|
+
/cloudflare/i,
|
|
14
|
+
/turnstile/i,
|
|
15
|
+
/just a moment/i,
|
|
16
|
+
],
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
provider: 'akamai',
|
|
20
|
+
reason: 'Akamai bot challenge',
|
|
21
|
+
patterns: [/akamai/i, /abck/i, /bm_sz/i],
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
provider: 'datadome',
|
|
25
|
+
reason: 'DataDome challenge',
|
|
26
|
+
patterns: [/datadome/i],
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
provider: 'perimeterx',
|
|
30
|
+
reason: 'PerimeterX challenge',
|
|
31
|
+
patterns: [/perimeterx/i, /px-captcha/i, /_px3/i],
|
|
32
|
+
},
|
|
33
|
+
];
|
|
34
|
+
function createChallengeSnippet(html, index) {
|
|
35
|
+
const start = Math.max(0, index - 80);
|
|
36
|
+
const end = Math.min(html.length, index + 200);
|
|
37
|
+
return html.slice(start, end).replace(/\s+/g, ' ').trim();
|
|
38
|
+
}
|
|
39
|
+
function detectSecurityBlock(params) {
|
|
40
|
+
const { html, status, error } = params;
|
|
41
|
+
const text = `${html}\n${error ?? ''}`;
|
|
42
|
+
for (const marker of SECURITY_MARKERS) {
|
|
43
|
+
for (const pattern of marker.patterns) {
|
|
44
|
+
const match = pattern.exec(text);
|
|
45
|
+
if (!match?.index && match?.index !== 0) {
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
return {
|
|
49
|
+
blockedBySecurity: true,
|
|
50
|
+
blockProvider: marker.provider,
|
|
51
|
+
blockReason: marker.reason,
|
|
52
|
+
challengeSnippet: createChallengeSnippet(text, match.index),
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
if (status === 403 || status === 429) {
|
|
57
|
+
return {
|
|
58
|
+
blockedBySecurity: true,
|
|
59
|
+
blockProvider: 'unknown',
|
|
60
|
+
blockReason: `HTTP ${status} suspected anti-bot block`,
|
|
61
|
+
challengeSnippet: html
|
|
62
|
+
? createChallengeSnippet(html, 0)
|
|
63
|
+
: error?.slice(0, 240),
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
return { blockedBySecurity: false };
|
|
67
|
+
}
|
|
68
|
+
function inferContentType(params) {
|
|
69
|
+
const { html, error, blockedBySecurity } = params;
|
|
70
|
+
if (blockedBySecurity) {
|
|
71
|
+
return 'challenge';
|
|
72
|
+
}
|
|
73
|
+
if (!html.trim()) {
|
|
74
|
+
return error ? 'error' : 'empty';
|
|
75
|
+
}
|
|
76
|
+
return 'target';
|
|
77
|
+
}
|
|
78
|
+
function withSecurityMetadata(base) {
|
|
79
|
+
const detection = detectSecurityBlock({
|
|
80
|
+
html: base.html,
|
|
81
|
+
status: base.status,
|
|
82
|
+
error: base.error,
|
|
83
|
+
});
|
|
84
|
+
return {
|
|
85
|
+
...base,
|
|
86
|
+
...detection,
|
|
87
|
+
contentType: inferContentType({
|
|
88
|
+
html: base.html,
|
|
89
|
+
error: base.error,
|
|
90
|
+
blockedBySecurity: detection.blockedBySecurity,
|
|
91
|
+
}),
|
|
92
|
+
};
|
|
93
|
+
}
|
|
6
94
|
class WebScraper {
|
|
7
95
|
constructor(options = {}) {
|
|
8
96
|
this.userAgent =
|
|
@@ -34,7 +122,7 @@ class WebScraper {
|
|
|
34
122
|
});
|
|
35
123
|
clearTimeout(timeoutId);
|
|
36
124
|
const html = await response.text();
|
|
37
|
-
return {
|
|
125
|
+
return withSecurityMetadata({
|
|
38
126
|
url,
|
|
39
127
|
html,
|
|
40
128
|
status: response.status,
|
|
@@ -42,7 +130,7 @@ class WebScraper {
|
|
|
42
130
|
method: 'fetch',
|
|
43
131
|
redirected: response.redirected,
|
|
44
132
|
finalUrl: response.url,
|
|
45
|
-
};
|
|
133
|
+
});
|
|
46
134
|
}
|
|
47
135
|
catch (error) {
|
|
48
136
|
clearTimeout(timeoutId);
|
|
@@ -65,14 +153,14 @@ class WebScraper {
|
|
|
65
153
|
finalUrl = pageResult.url;
|
|
66
154
|
error =
|
|
67
155
|
status === 200 ? undefined : `HTTP ${status}: ${pageResult.statusText}`;
|
|
68
|
-
return {
|
|
156
|
+
return withSecurityMetadata({
|
|
69
157
|
url,
|
|
70
158
|
html,
|
|
71
159
|
status,
|
|
72
160
|
method: 'visual',
|
|
73
161
|
error,
|
|
74
162
|
finalUrl,
|
|
75
|
-
};
|
|
163
|
+
});
|
|
76
164
|
}
|
|
77
165
|
catch (err) {
|
|
78
166
|
const errorMessage = err instanceof Error ? err.message : 'Unknown error';
|
|
@@ -93,14 +181,14 @@ class WebScraper {
|
|
|
93
181
|
else {
|
|
94
182
|
status = 0;
|
|
95
183
|
}
|
|
96
|
-
return {
|
|
184
|
+
return withSecurityMetadata({
|
|
97
185
|
url,
|
|
98
186
|
html: '',
|
|
99
187
|
status,
|
|
100
188
|
method: 'visual',
|
|
101
189
|
error: errorMessage || 'err',
|
|
102
190
|
finalUrl,
|
|
103
|
-
};
|
|
191
|
+
});
|
|
104
192
|
}
|
|
105
193
|
}
|
|
106
194
|
async scrape(url, options = {}) {
|
|
@@ -108,11 +196,13 @@ class WebScraper {
|
|
|
108
196
|
let lastError = null;
|
|
109
197
|
try {
|
|
110
198
|
const result = await this.fetchDirectly(url, mergedOptions);
|
|
111
|
-
if (result.status >= 200 &&
|
|
199
|
+
if (result.status >= 200 &&
|
|
200
|
+
result.status < 300 &&
|
|
201
|
+
!result.blockedBySecurity) {
|
|
112
202
|
(0, log_1.info)('fetch: OK', url);
|
|
113
203
|
return result;
|
|
114
204
|
}
|
|
115
|
-
if (result.status === 404) {
|
|
205
|
+
if (result.status === 404 && !result.blockedBySecurity) {
|
|
116
206
|
(0, log_1.info)(`fetch:${result.status}. skip:`, url);
|
|
117
207
|
return result;
|
|
118
208
|
}
|
|
@@ -131,13 +221,13 @@ class WebScraper {
|
|
|
131
221
|
const puppeteerError = error instanceof Error ? error : new Error('Unknown puppeteer error');
|
|
132
222
|
const m = `Both methods failed. Fetch: ${lastError?.message || 'Unknown'}. puppeteer: ${puppeteerError.message}. err=${error.message}`;
|
|
133
223
|
(0, log_1.warn)(m);
|
|
134
|
-
return {
|
|
224
|
+
return withSecurityMetadata({
|
|
135
225
|
url,
|
|
136
226
|
html: '',
|
|
137
227
|
status: 0,
|
|
138
228
|
method: 'visual',
|
|
139
229
|
error: m,
|
|
140
|
-
};
|
|
230
|
+
});
|
|
141
231
|
}
|
|
142
232
|
}
|
|
143
233
|
async scrapeMultiple(urls, options = {}) {
|
|
@@ -148,13 +238,13 @@ class WebScraper {
|
|
|
148
238
|
results.push(result);
|
|
149
239
|
}
|
|
150
240
|
catch (error) {
|
|
151
|
-
results.push({
|
|
241
|
+
results.push(withSecurityMetadata({
|
|
152
242
|
url,
|
|
153
243
|
html: '',
|
|
154
244
|
status: 0,
|
|
155
245
|
method: 'fetch',
|
|
156
246
|
error: error instanceof Error ? error.message : 'Unknown error',
|
|
157
|
-
});
|
|
247
|
+
}));
|
|
158
248
|
}
|
|
159
249
|
}
|
|
160
250
|
return results;
|
package/dist/WebScraper.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"WebScraper.js","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":";;;AACA,2DAAsE;AAEtE,uCAAwE;
|
|
1
|
+
{"version":3,"file":"WebScraper.js","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":";;;AACA,2DAAsE;AAEtE,uCAAwE;AAgDxE,MAAM,gBAAgB,GAIjB;IACH;QACE,QAAQ,EAAE,YAAY;QACtB,MAAM,EAAE,sBAAsB;QAC9B,QAAQ,EAAE;YACR,8BAA8B;YAC9B,YAAY;YACZ,aAAa;YACb,YAAY;YACZ,gBAAgB;SACjB;KACF;IACD;QACE,QAAQ,EAAE,QAAQ;QAClB,MAAM,EAAE,sBAAsB;QAC9B,QAAQ,EAAE,CAAC,SAAS,EAAE,OAAO,EAAE,QAAQ,CAAC;KACzC;IACD;QACE,QAAQ,EAAE,UAAU;QACpB,MAAM,EAAE,oBAAoB;QAC5B,QAAQ,EAAE,CAAC,WAAW,CAAC;KACxB;IACD;QACE,QAAQ,EAAE,YAAY;QACtB,MAAM,EAAE,sBAAsB;QAC9B,QAAQ,EAAE,CAAC,aAAa,EAAE,aAAa,EAAE,OAAO,CAAC;KAClD;CACF,CAAC;AAEF,SAAS,sBAAsB,CAAC,IAAY,EAAE,KAAa;IACzD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,EAAE,CAAC,CAAC;IACtC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,GAAG,CAAC,CAAC;IAC/C,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;AAC5D,CAAC;AAED,SAAS,mBAAmB,CAAC,MAI5B;IACC,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,CAAC;IACvC,MAAM,IAAI,GAAG,GAAG,IAAI,KAAK,KAAK,IAAI,EAAE,EAAE,CAAC;IAEvC,KAAK,MAAM,MAAM,IAAI,gBAAgB,EAAE,CAAC;QACtC,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YACtC,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACjC,IAAI,CAAC,KAAK,EAAE,KAAK,IAAI,KAAK,EAAE,KAAK,KAAK,CAAC,EAAE,CAAC;gBACxC,SAAS;YACX,CAAC;YAED,OAAO;gBACL,iBAAiB,EAAE,IAAI;gBACvB,aAAa,EAAE,MAAM,CAAC,QAAQ;gBAC9B,WAAW,EAAE,MAAM,CAAC,MAAM;gBAC1B,gBAAgB,EAAE,sBAAsB,CAAC,IAAI,EAAE,KAAK,CAAC,KAAK,CAAC;aAC5D,CAAC;QACJ,CAAC;IACH,CAAC;IAED,IAAI,MAAM,KAAK,GAAG,IAAI,MAAM,KAAK,GAAG,EAAE,CAAC;QACrC,OAAO;YACL,iBAAiB,EAAE,IAAI;YACvB,aAAa,EAAE,SAAS;YACxB,WAAW,EAAE,QAAQ,MAAM,2BAA2B;YACtD,gBAAgB,EAAE,IAAI;gBACpB,CAAC,CAAC,sBAAsB,CAAC,IAAI,EAAE,CAAC,CAAC;gBACjC,CAAC,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;SACzB,CAAC;IACJ,CAAC;IAED,OAAO,EAAE,iBAAiB,EAAE,KAAK,EAAE,CAAC;AACtC,CAAC;AAED,SAAS,gBAAgB,CAAC,MAIzB;IACC,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,iBAAiB,EAAE,GAAG,MAAM,CAAC;IAElD,IAAI,iBAAiB,EAAE,CAAC;QACtB,OAAO,WAAW,CAAC;IACrB,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;QACjB,OAAO,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC;IACnC,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,oBAAoB,CAAC,IAQ7B;IACC,MAAM,SAAS,GAAG,mBAAmB,CAAC;QACpC,IAAI,EAAE,IAAI,CAAC,IAAI;QACf,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,KAAK,EAAE,IAAI,CAAC,KAAK;KAClB,CAAC,CAAC;IAEH,OAAO;QACL,GAAG,IAAI;QACP,GAAG,SAAS;QACZ,WAAW,EAAE,gBAAgB,CAAC;YAC5B,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,iBAAiB,EAAE,SAAS,CAAC,iBAAiB;SAC/C,CAAC;KACH,CAAC;AACJ,CAAC;AAED,MAAa,UAAU;IAIrB,YAAY,UAA2B,EAAE;QACvC,IAAI,CAAC,SAAS;YACZ,iHAAiH,CAAC;QACpH,IAAI,CAAC,cAAc,GAAG;YACpB,OAAO,EAAE,KAAK;YACd,OAAO,EAAE,CAAC;YACV,cAAc,EAAE,IAAI;YACpB,GAAG,OAAO;SACX,CAAC;IACJ,CAAC;IAKO,KAAK,CAAC,aAAa,CACzB,GAAW,EACX,OAAwB;QAExB,MAAM,OAAO,GAAG;YACd,YAAY,EAAE,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE;YAC5D,MAAM,EACJ,4EAA4E;YAC9E,iBAAiB,EAAE,gBAAgB;YACnC,iBAAiB,EAAE,eAAe;YAClC,UAAU,EAAE,YAAY;YACxB,2BAA2B,EAAE,GAAG;YAChC,GAAG,OAAO,CAAC,OAAO;SACnB,CAAC;QAEF,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;QACzC,MAAM,SAAS,GAAG,UAAU,CAC1B,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EACxB,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC,OAAQ,CAChD,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,OAAO;gBACP,MAAM,EAAE,UAAU,CAAC,MAAM;gBACzB,QAAQ,EAAE,QAAQ;aACnB,CAAC,CAAC;YAEH,YAAY,CAAC,SAAS,CAAC,CAAC;YAExB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAEnC,OAAO,oBAAoB,CAAC;gBAC1B,GAAG;gBACH,IAAI;gBACJ,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,KAAK,EAAE,QAAQ,CAAC,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,EAAE;gBACvE,MAAM,EAAE,OAAO;gBACf,UAAU,EAAE,QAAQ,CAAC,UAAU;gBAC/B,QAAQ,EAAE,QAAQ,CAAC,GAAG;aACvB,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,YAAY,CAAC,SAAS,CAAC,CAAC;YACxB,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAKO,KAAK,CAAC,mBAAmB,CAC/B,GAAW,EACX,OAAwB;QAExB,IAAI,IAAI,GAAG,EAAE,CAAC;QACd,IAAI,MAAM,GAAG,GAAG,CAAC;QACjB,IAAI,KAAyB,CAAC;QAC9B,IAAI,QAAQ,GAAG,GAAG,CAAC;QAEnB,IAAI,CAAC;YAEH,MAAM,UAAU,GAAe,MAAM,IAAA,cAAQ,EAAC,GAAG,EAAE;gBACjD,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO;gBACvD,iBAAiB,EAAE,OAAO,CAAC,eAAe;gBAC1C,cAAc,EAAE,OAAO,CAAC,cAAc;aACvC,CAAC,CAAC;YAGH,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC;YACjC,MAAM,GAAG,UAAU,CAAC,MAAM,CAAC;YAC3B,QAAQ,GAAG,UAAU,CAAC,GAAG,CAAC;YAE1B,KAAK;gBACH,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,MAAM,KAAK,UAAU,CAAC,UAAU,EAAE,CAAC;YAE1E,OAAO,oBAAoB,CAAC;gBAC1B,GAAG;gBACH,IAAI;gBACJ,MAAM;gBACN,MAAM,EAAE,QAAQ;gBAChB,KAAK;gBACL,QAAQ;aACT,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,YAAY,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;YAG1E,IAAI,YAAY,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBACrC,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;iBAAM,IACL,YAAY,CAAC,QAAQ,CAAC,KAAK,CAAC;gBAC5B,YAAY,CAAC,QAAQ,CAAC,WAAW,CAAC,EAClC,CAAC;gBACD,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;iBAAM,IACL,YAAY,CAAC,QAAQ,CAAC,KAAK,CAAC;gBAC5B,YAAY,CAAC,QAAQ,CAAC,WAAW,CAAC,EAClC,CAAC;gBACD,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;iBAAM,IAAI,YAAY,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBACxC,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;iBAAM,CAAC;gBACN,MAAM,GAAG,CAAC,CAAC;YACb,CAAC;YAED,OAAO,oBAAoB,CAAC;gBAC1B,GAAG;gBACH,IAAI,EAAE,EAAE;gBACR,MAAM;gBACN,MAAM,EAAE,QAAQ;gBAChB,KAAK,EAAE,YAAY,IAAI,KAAK;gBAC5B,QAAQ;aACT,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAKD,KAAK,CAAC,MAAM,CACV,GAAW,EACX,UAA2B,EAAE;QAE7B,MAAM,aAAa,GAAG,EAAE,GAAG,IAAI,CAAC,cAAc,EAAE,GAAG,OAAO,EAAE,CAAC;QAC7D,IAAI,SAAS,GAAiB,IAAI,CAAC;QAGnC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YAG5D,IACE,MAAM,CAAC,MAAM,IAAI,GAAG;gBACpB,MAAM,CAAC,MAAM,GAAG,GAAG;gBACnB,CAAC,MAAM,CAAC,iBAAiB,EACzB,CAAC;gBACD,IAAA,UAAI,EAAC,WAAW,EAAE,GAAG,CAAC,CAAC;gBACvB,OAAO,MAAM,CAAC;YAChB,CAAC;YAGD,IAAI,MAAM,CAAC,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,iBAAiB,EAAE,CAAC;gBACvD,IAAA,UAAI,EAAC,SAAS,MAAM,CAAC,MAAM,SAAS,EAAE,GAAG,CAAC,CAAC;gBAC3C,OAAO,MAAM,CAAC;YAChB,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,SAAS;gBACP,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;YACpE,IAAA,UAAI,EACF,qDAAqD,GAAG,KAAK,SAAS,CAAC,OAAO,8BAA8B,CAC7G,CAAC;QACJ,CAAC;QAGD,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,mBAAmB,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YACnE,IAAA,WAAK,EACH,mCAAmC,GAAG,GAAG,EACzC,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CACjC,CAAC;YACF,OAAO,OAAO,CAAC;QACjB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,cAAc,GAClB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;YAExE,MAAM,CAAC,GAAG,+BAA+B,SAAS,EAAE,OAAO,IAAI,SAAS,gBAAgB,cAAc,CAAC,OAAO,SAAU,KAAe,CAAC,OAAO,EAAE,CAAC;YAClJ,IAAA,UAAI,EAAC,CAAC,CAAC,CAAC;YACR,OAAO,oBAAoB,CAAC;gBAC1B,GAAG;gBACH,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,CAAC;gBACT,MAAM,EAAE,QAAQ;gBAChB,KAAK,EAAE,CAAC;aACT,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAKD,KAAK,CAAC,cAAc,CAClB,IAAc,EACd,UAA2B,EAAE;QAE7B,MAAM,OAAO,GAAqB,EAAE,CAAC;QAErC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAC/C,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACvB,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,IAAI,CACV,oBAAoB,CAAC;oBACnB,GAAG;oBACH,IAAI,EAAE,EAAE;oBACR,MAAM,EAAE,CAAC;oBACT,MAAM,EAAE,OAAO;oBACf,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;iBAChE,CAAC,CACH,CAAC;YACJ,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAKD,KAAK,CAAC,OAAO;QACX,MAAM,IAAA,kBAAY,GAAE,CAAC;IACvB,CAAC;CACF;AAtOD,gCAsOC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ag-webscrape",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.18",
|
|
4
4
|
"author": "admin@gec.dev",
|
|
5
5
|
"description": "TypeScript web scraper with Playwright fallback for anti-scraping protection",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
"license": "MIT",
|
|
16
16
|
"dependencies": {
|
|
17
17
|
"@sparticuz/chromium": "^143.0.0",
|
|
18
|
-
"ag-common": "^0.0.
|
|
18
|
+
"ag-common": "^0.0.875",
|
|
19
19
|
"node-html-parser": "^7.0.1",
|
|
20
20
|
"puppeteer": "^24.15.0",
|
|
21
21
|
"puppeteer-core": "^24.15.0"
|