ag-webscrape 0.0.16 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,8 @@ export interface ScrapingOptions {
7
7
  waitForTimeout?: number;
8
8
  executablePath?: string;
9
9
  }
10
+ export type SecurityBlockProvider = 'cloudflare' | 'akamai' | 'datadome' | 'perimeterx' | 'unknown';
11
+ export type ScrapedContentType = 'target' | 'challenge' | 'empty' | 'error' | 'unknown';
10
12
  export interface ScrapingResult {
11
13
  url: string;
12
14
  html: string;
@@ -15,6 +17,11 @@ export interface ScrapingResult {
15
17
  error?: string;
16
18
  redirected?: boolean;
17
19
  finalUrl?: string;
20
+ contentType: ScrapedContentType;
21
+ blockedBySecurity: boolean;
22
+ blockProvider?: SecurityBlockProvider;
23
+ blockReason?: string;
24
+ challengeSnippet?: string;
18
25
  }
19
26
  export declare class WebScraper {
20
27
  private userAgent;
@@ -1 +1 @@
1
- {"version":3,"file":"WebScraper.d.ts","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":"AAKA,MAAM,WAAW,eAAe;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,GAAG,QAAQ,CAAC;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,qBAAa,UAAU;IACrB,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,cAAc,CAAkB;gBAE5B,OAAO,GAAE,eAAoB;YAc3B,aAAa;YAkDb,mBAAmB;IAqE3B,MAAM,CACV,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,CAAC;IAsDpB,cAAc,CAClB,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,EAAE,CAAC;IAwBtB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B"}
1
+ {"version":3,"file":"WebScraper.d.ts","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":"AAKA,MAAM,WAAW,eAAe;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,MAAM,qBAAqB,GAC7B,YAAY,GACZ,QAAQ,GACR,UAAU,GACV,YAAY,GACZ,SAAS,CAAC;AAEd,MAAM,MAAM,kBAAkB,GAC1B,QAAQ,GACR,WAAW,GACX,OAAO,GACP,OAAO,GACP,SAAS,CAAC;AAEd,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,GAAG,QAAQ,CAAC;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,kBAAkB,CAAC;IAChC,iBAAiB,EAAE,OAAO,CAAC;IAC3B,aAAa,CAAC,EAAE,qBAAqB,CAAC;IACtC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAkID,qBAAa,UAAU;IACrB,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,cAAc,CAAkB;gBAE5B,OAAO,GAAE,eAAoB;YAc3B,aAAa;YAkDb,mBAAmB;IAqE3B,MAAM,CACV,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,CAAC;IA0DpB,cAAc,CAClB,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,EAAE,CAAC;IA0BtB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B"}
@@ -3,6 +3,94 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.WebScraper = void 0;
4
4
  const log_1 = require("ag-common/dist/common/helpers/log");
5
5
  const dom_1 = require("./helpers/dom");
6
+ const SECURITY_MARKERS = [
7
+ {
8
+ provider: 'cloudflare',
9
+ reason: 'Cloudflare challenge',
10
+ patterns: [
11
+ /cdn-cgi\/challenge-platform/i,
12
+ /__cf_chl_/i,
13
+ /cloudflare/i,
14
+ /turnstile/i,
15
+ /just a moment/i,
16
+ ],
17
+ },
18
+ {
19
+ provider: 'akamai',
20
+ reason: 'Akamai bot challenge',
21
+ patterns: [/akamai/i, /abck/i, /bm_sz/i],
22
+ },
23
+ {
24
+ provider: 'datadome',
25
+ reason: 'DataDome challenge',
26
+ patterns: [/datadome/i],
27
+ },
28
+ {
29
+ provider: 'perimeterx',
30
+ reason: 'PerimeterX challenge',
31
+ patterns: [/perimeterx/i, /px-captcha/i, /_px3/i],
32
+ },
33
+ ];
34
+ function createChallengeSnippet(html, index) {
35
+ const start = Math.max(0, index - 80);
36
+ const end = Math.min(html.length, index + 200);
37
+ return html.slice(start, end).replace(/\s+/g, ' ').trim();
38
+ }
39
+ function detectSecurityBlock(params) {
40
+ const { html, status, error } = params;
41
+ const text = `${html}\n${error ?? ''}`;
42
+ for (const marker of SECURITY_MARKERS) {
43
+ for (const pattern of marker.patterns) {
44
+ const match = pattern.exec(text);
45
+ if (!match?.index && match?.index !== 0) {
46
+ continue;
47
+ }
48
+ return {
49
+ blockedBySecurity: true,
50
+ blockProvider: marker.provider,
51
+ blockReason: marker.reason,
52
+ challengeSnippet: createChallengeSnippet(text, match.index),
53
+ };
54
+ }
55
+ }
56
+ if (status === 403 || status === 429) {
57
+ return {
58
+ blockedBySecurity: true,
59
+ blockProvider: 'unknown',
60
+ blockReason: `HTTP ${status} suspected anti-bot block`,
61
+ challengeSnippet: html
62
+ ? createChallengeSnippet(html, 0)
63
+ : error?.slice(0, 240),
64
+ };
65
+ }
66
+ return { blockedBySecurity: false };
67
+ }
68
+ function inferContentType(params) {
69
+ const { html, error, blockedBySecurity } = params;
70
+ if (blockedBySecurity) {
71
+ return 'challenge';
72
+ }
73
+ if (!html.trim()) {
74
+ return error ? 'error' : 'empty';
75
+ }
76
+ return 'target';
77
+ }
78
+ function withSecurityMetadata(base) {
79
+ const detection = detectSecurityBlock({
80
+ html: base.html,
81
+ status: base.status,
82
+ error: base.error,
83
+ });
84
+ return {
85
+ ...base,
86
+ ...detection,
87
+ contentType: inferContentType({
88
+ html: base.html,
89
+ error: base.error,
90
+ blockedBySecurity: detection.blockedBySecurity,
91
+ }),
92
+ };
93
+ }
6
94
  class WebScraper {
7
95
  constructor(options = {}) {
8
96
  this.userAgent =
@@ -34,7 +122,7 @@ class WebScraper {
34
122
  });
35
123
  clearTimeout(timeoutId);
36
124
  const html = await response.text();
37
- return {
125
+ return withSecurityMetadata({
38
126
  url,
39
127
  html,
40
128
  status: response.status,
@@ -42,7 +130,7 @@ class WebScraper {
42
130
  method: 'fetch',
43
131
  redirected: response.redirected,
44
132
  finalUrl: response.url,
45
- };
133
+ });
46
134
  }
47
135
  catch (error) {
48
136
  clearTimeout(timeoutId);
@@ -65,14 +153,14 @@ class WebScraper {
65
153
  finalUrl = pageResult.url;
66
154
  error =
67
155
  status === 200 ? undefined : `HTTP ${status}: ${pageResult.statusText}`;
68
- return {
156
+ return withSecurityMetadata({
69
157
  url,
70
158
  html,
71
159
  status,
72
160
  method: 'visual',
73
161
  error,
74
162
  finalUrl,
75
- };
163
+ });
76
164
  }
77
165
  catch (err) {
78
166
  const errorMessage = err instanceof Error ? err.message : 'Unknown error';
@@ -93,14 +181,14 @@ class WebScraper {
93
181
  else {
94
182
  status = 0;
95
183
  }
96
- return {
184
+ return withSecurityMetadata({
97
185
  url,
98
186
  html: '',
99
187
  status,
100
188
  method: 'visual',
101
189
  error: errorMessage || 'err',
102
190
  finalUrl,
103
- };
191
+ });
104
192
  }
105
193
  }
106
194
  async scrape(url, options = {}) {
@@ -108,11 +196,13 @@ class WebScraper {
108
196
  let lastError = null;
109
197
  try {
110
198
  const result = await this.fetchDirectly(url, mergedOptions);
111
- if (result.status >= 200 && result.status < 300) {
199
+ if (result.status >= 200 &&
200
+ result.status < 300 &&
201
+ !result.blockedBySecurity) {
112
202
  (0, log_1.info)('fetch: OK', url);
113
203
  return result;
114
204
  }
115
- if (result.status === 404) {
205
+ if (result.status === 404 && !result.blockedBySecurity) {
116
206
  (0, log_1.info)(`fetch:${result.status}. skip:`, url);
117
207
  return result;
118
208
  }
@@ -131,13 +221,13 @@ class WebScraper {
131
221
  const puppeteerError = error instanceof Error ? error : new Error('Unknown puppeteer error');
132
222
  const m = `Both methods failed. Fetch: ${lastError?.message || 'Unknown'}. puppeteer: ${puppeteerError.message}. err=${error.message}`;
133
223
  (0, log_1.warn)(m);
134
- return {
224
+ return withSecurityMetadata({
135
225
  url,
136
226
  html: '',
137
227
  status: 0,
138
228
  method: 'visual',
139
229
  error: m,
140
- };
230
+ });
141
231
  }
142
232
  }
143
233
  async scrapeMultiple(urls, options = {}) {
@@ -148,13 +238,13 @@ class WebScraper {
148
238
  results.push(result);
149
239
  }
150
240
  catch (error) {
151
- results.push({
241
+ results.push(withSecurityMetadata({
152
242
  url,
153
243
  html: '',
154
244
  status: 0,
155
245
  method: 'fetch',
156
246
  error: error instanceof Error ? error.message : 'Unknown error',
157
- });
247
+ }));
158
248
  }
159
249
  }
160
250
  return results;
@@ -1 +1 @@
1
- {"version":3,"file":"WebScraper.js","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":";;;AACA,2DAAsE;AAEtE,uCAAwE;AAsBxE,MAAa,UAAU;IAIrB,YAAY,UAA2B,EAAE;QACvC,IAAI,CAAC,SAAS;YACZ,iHAAiH,CAAC;QACpH,IAAI,CAAC,cAAc,GAAG;YACpB,OAAO,EAAE,KAAK;YACd,OAAO,EAAE,CAAC;YACV,cAAc,EAAE,IAAI;YACpB,GAAG,OAAO;SACX,CAAC;IACJ,CAAC;IAKO,KAAK,CAAC,aAAa,CACzB,GAAW,EACX,OAAwB;QAExB,MAAM,OAAO,GAAG;YACd,YAAY,EAAE,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE;YAC5D,MAAM,EACJ,4EAA4E;YAC9E,iBAAiB,EAAE,gBAAgB;YACnC,iBAAiB,EAAE,eAAe;YAClC,UAAU,EAAE,YAAY;YACxB,2BAA2B,EAAE,GAAG;YAChC,GAAG,OAAO,CAAC,OAAO;SACnB,CAAC;QAEF,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;QACzC,MAAM,SAAS,GAAG,UAAU,CAC1B,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EACxB,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC,OAAQ,CAChD,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,OAAO;gBACP,MAAM,EAAE,UAAU,CAAC,MAAM;gBACzB,QAAQ,EAAE,QAAQ;aACnB,CAAC,CAAC;YAEH,YAAY,CAAC,SAAS,CAAC,CAAC;YAExB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAEnC,OAAO;gBACL,GAAG;gBACH,IAAI;gBACJ,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,KAAK,EAAE,QAAQ,CAAC,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,EAAE;gBACvE,MAAM,EAAE,OAAO;gBACf,UAAU,EAAE,QAAQ,CAAC,UAAU;gBAC/B,QAAQ,EAAE,QAAQ,CAAC,GAAG;aACvB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,YAAY,CAAC,SAAS,CAAC,CAAC;YACxB,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAKO,KAAK,CAAC,mBAAmB,CAC/B,GAAW,EACX,OAAwB;QAExB,IAAI,IAAI,GAAG,EAAE,CAAC;QACd,IAAI,MAAM,GAAG,GAAG,CAAC;QACjB,IAAI,KAAyB,CAAC;QAC9B,IAAI,QAAQ,GAAG,GAAG,CAAC;QAEnB,IAAI,CAAC;YAEH,MAAM,UAAU,GAAe,MAAM,IAAA,cAAQ,EAAC,GAAG,EAAE;gBACjD,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO;gBACvD,iBAAiB,EAAE,OAAO,CAAC,eAAe;gBAC1C,cAAc,EAAE,OAAO,CAAC,cAAc;aACvC,CAAC,CAAC;YAGH,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC;YACjC,MAAM,GAAG,UAAU,CAAC,MAAM,CAAC;YAC3B,QAAQ,GAAG,UAAU,CAAC,GAAG,CAAC;YAE1B,KAAK;gBACH,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,MAAM,KAAK,UAAU,CAAC,UAAU,EAAE,CAAC;YAE1E,OAAO;gBACL,GAAG;gBACH,IAAI;gBACJ,MAAM;gBACN,MAAM,EAAE,QAAQ;gBAChB,KAAK;gBACL,QAAQ;aACT,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,YAAY,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;YAG1E,IAAI,YAAY,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBACrC,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;iBAAM,IACL,YAAY,CAAC,QAAQ,CAAC,KAAK,CAAC;gBAC5B,YAAY,CAAC,QAAQ,CAAC,WAAW,CAAC,EAClC,CAAC;gBACD,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;iBAAM,IACL,YAAY,CAAC,QAAQ,CAAC,KAAK,CAAC;gBAC5B,YAAY,CAAC,QAAQ,CAAC,WAAW,CAAC,EAClC,CAAC;gBACD,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;iBAAM,IAAI,YAAY,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBACxC,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;iBAAM,CAAC;gBACN,MAAM,GAAG,CAAC,CAAC;YACb,CAAC;YAED,OAAO;gBACL,GAAG;gBACH,IAAI,EAAE,EAAE;gBACR,MAAM;gBACN,MAAM,EAAE,QAAQ;gBAChB,KAAK,EAAE,YAAY,IAAI,KAAK;gBAC5B,QAAQ;aACT,CAAC;QACJ,CAAC;IACH,CAAC;IAKD,KAAK,CAAC,MAAM,CACV,GAAW,EACX,UAA2B,EAAE;QAE7B,MAAM,aAAa,GAAG,EAAE,GAAG,IAAI,CAAC,cAAc,EAAE,GAAG,OAAO,EAAE,CAAC;QAC7D,IAAI,SAAS,GAAiB,IAAI,CAAC;QAGnC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YAG5D,IAAI,MAAM,CAAC,MAAM,IAAI,GAAG,IAAI,MAAM,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;gBAChD,IAAA,UAAI,EAAC,WAAW,EAAE,GAAG,CAAC,CAAC;gBACvB,OAAO,MAAM,CAAC;YAChB,CAAC;YAGD,IAAI,MAAM,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;gBAC1B,IAAA,UAAI,EAAC,SAAS,MAAM,CAAC,MAAM,SAAS,EAAE,GAAG,CAAC,CAAC;gBAC3C,OAAO,MAAM,CAAC;YAChB,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,SAAS;gBACP,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;YACpE,IAAA,UAAI,EACF,qDAAqD,GAAG,KAAK,SAAS,CAAC,OAAO,8BAA8B,CAC7G,CAAC;QACJ,CAAC;QAGD,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,mBAAmB,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YACnE,IAAA,WAAK,EACH,mCAAmC,GAAG,GAAG,EACzC,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CACjC,CAAC;YACF,OAAO,OAAO,CAAC;QACjB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,cAAc,GAClB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;YAExE,MAAM,CAAC,GAAG,+BAA+B,SAAS,EAAE,OAAO,IAAI,SAAS,gBAAgB,cAAc,CAAC,OAAO,SAAU,KAAe,CAAC,OAAO,EAAE,CAAC;YAClJ,IAAA,UAAI,EAAC,CAAC,CAAC,CAAC;YACR,OAAO;gBACL,GAAG;gBACH,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,CAAC;gBACT,MAAM,EAAE,QAAQ;gBAChB,KAAK,EAAE,CAAC;aACT,CAAC;QACJ,CAAC;IACH,CAAC;IAKD,KAAK,CAAC,cAAc,CAClB,IAAc,EACd,UAA2B,EAAE;QAE7B,MAAM,OAAO,GAAqB,EAAE,CAAC;QAErC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAC/C,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACvB,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,IAAI,CAAC;oBACX,GAAG;oBACH,IAAI,EAAE,EAAE;oBACR,MAAM,EAAE,CAAC;oBACT,MAAM,EAAE,OAAO;oBACf,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;iBAChE,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAKD,KAAK,CAAC,OAAO;QACX,MAAM,IAAA,kBAAY,GAAE,CAAC;IACvB,CAAC;CACF;AAhOD,gCAgOC"}
1
+ {"version":3,"file":"WebScraper.js","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":";;;AACA,2DAAsE;AAEtE,uCAAwE;AAgDxE,MAAM,gBAAgB,GAIjB;IACH;QACE,QAAQ,EAAE,YAAY;QACtB,MAAM,EAAE,sBAAsB;QAC9B,QAAQ,EAAE;YACR,8BAA8B;YAC9B,YAAY;YACZ,aAAa;YACb,YAAY;YACZ,gBAAgB;SACjB;KACF;IACD;QACE,QAAQ,EAAE,QAAQ;QAClB,MAAM,EAAE,sBAAsB;QAC9B,QAAQ,EAAE,CAAC,SAAS,EAAE,OAAO,EAAE,QAAQ,CAAC;KACzC;IACD;QACE,QAAQ,EAAE,UAAU;QACpB,MAAM,EAAE,oBAAoB;QAC5B,QAAQ,EAAE,CAAC,WAAW,CAAC;KACxB;IACD;QACE,QAAQ,EAAE,YAAY;QACtB,MAAM,EAAE,sBAAsB;QAC9B,QAAQ,EAAE,CAAC,aAAa,EAAE,aAAa,EAAE,OAAO,CAAC;KAClD;CACF,CAAC;AAEF,SAAS,sBAAsB,CAAC,IAAY,EAAE,KAAa;IACzD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,EAAE,CAAC,CAAC;IACtC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,GAAG,CAAC,CAAC;IAC/C,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;AAC5D,CAAC;AAED,SAAS,mBAAmB,CAAC,MAI5B;IACC,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,CAAC;IACvC,MAAM,IAAI,GAAG,GAAG,IAAI,KAAK,KAAK,IAAI,EAAE,EAAE,CAAC;IAEvC,KAAK,MAAM,MAAM,IAAI,gBAAgB,EAAE,CAAC;QACtC,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YACtC,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACjC,IAAI,CAAC,KAAK,EAAE,KAAK,IAAI,KAAK,EAAE,KAAK,KAAK,CAAC,EAAE,CAAC;gBACxC,SAAS;YACX,CAAC;YAED,OAAO;gBACL,iBAAiB,EAAE,IAAI;gBACvB,aAAa,EAAE,MAAM,CAAC,QAAQ;gBAC9B,WAAW,EAAE,MAAM,CAAC,MAAM;gBAC1B,gBAAgB,EAAE,sBAAsB,CAAC,IAAI,EAAE,KAAK,CAAC,KAAK,CAAC;aAC5D,CAAC;QACJ,CAAC;IACH,CAAC;IAED,IAAI,MAAM,KAAK,GAAG,IAAI,MAAM,KAAK,GAAG,EAAE,CAAC;QACrC,OAAO;YACL,iBAAiB,EAAE,IAAI;YACvB,aAAa,EAAE,SAAS;YACxB,WAAW,EAAE,QAAQ,MAAM,2BAA2B;YACtD,gBAAgB,EAAE,IAAI;gBACpB,CAAC,CAAC,sBAAsB,CAAC,IAAI,EAAE,CAAC,CAAC;gBACjC,CAAC,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;SACzB,CAAC;IACJ,CAAC;IAED,OAAO,EAAE,iBAAiB,EAAE,KAAK,EAAE,CAAC;AACtC,CAAC;AAED,SAAS,gBAAgB,CAAC,MAIzB;IACC,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,iBAAiB,EAAE,GAAG,MAAM,CAAC;IAElD,IAAI,iBAAiB,EAAE,CAAC;QACtB,OAAO,WAAW,CAAC;IACrB,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;QACjB,OAAO,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC;IACnC,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,oBAAoB,CAAC,IAQ7B;IACC,MAAM,SAAS,GAAG,mBAAmB,CAAC;QACpC,IAAI,EAAE,IAAI,CAAC,IAAI;QACf,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,KAAK,EAAE,IAAI,CAAC,KAAK;KAClB,CAAC,CAAC;IAEH,OAAO;QACL,GAAG,IAAI;QACP,GAAG,SAAS;QACZ,WAAW,EAAE,gBAAgB,CAAC;YAC5B,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,iBAAiB,EAAE,SAAS,CAAC,iBAAiB;SAC/C,CAAC;KACH,CAAC;AACJ,CAAC;AAED,MAAa,UAAU;IAIrB,YAAY,UAA2B,EAAE;QACvC,IAAI,CAAC,SAAS;YACZ,iHAAiH,CAAC;QACpH,IAAI,CAAC,cAAc,GAAG;YACpB,OAAO,EAAE,KAAK;YACd,OAAO,EAAE,CAAC;YACV,cAAc,EAAE,IAAI;YACpB,GAAG,OAAO;SACX,CAAC;IACJ,CAAC;IAKO,KAAK,CAAC,aAAa,CACzB,GAAW,EACX,OAAwB;QAExB,MAAM,OAAO,GAAG;YACd,YAAY,EAAE,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE;YAC5D,MAAM,EACJ,4EAA4E;YAC9E,iBAAiB,EAAE,gBAAgB;YACnC,iBAAiB,EAAE,eAAe;YAClC,UAAU,EAAE,YAAY;YACxB,2BAA2B,EAAE,GAAG;YAChC,GAAG,OAAO,CAAC,OAAO;SACnB,CAAC;QAEF,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;QACzC,MAAM,SAAS,GAAG,UAAU,CAC1B,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EACxB,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC,OAAQ,CAChD,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,OAAO;gBACP,MAAM,EAAE,UAAU,CAAC,MAAM;gBACzB,QAAQ,EAAE,QAAQ;aACnB,CAAC,CAAC;YAEH,YAAY,CAAC,SAAS,CAAC,CAAC;YAExB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAEnC,OAAO,oBAAoB,CAAC;gBAC1B,GAAG;gBACH,IAAI;gBACJ,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,KAAK,EAAE,QAAQ,CAAC,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,EAAE;gBACvE,MAAM,EAAE,OAAO;gBACf,UAAU,EAAE,QAAQ,CAAC,UAAU;gBAC/B,QAAQ,EAAE,QAAQ,CAAC,GAAG;aACvB,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,YAAY,CAAC,SAAS,CAAC,CAAC;YACxB,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAKO,KAAK,CAAC,mBAAmB,CAC/B,GAAW,EACX,OAAwB;QAExB,IAAI,IAAI,GAAG,EAAE,CAAC;QACd,IAAI,MAAM,GAAG,GAAG,CAAC;QACjB,IAAI,KAAyB,CAAC;QAC9B,IAAI,QAAQ,GAAG,GAAG,CAAC;QAEnB,IAAI,CAAC;YAEH,MAAM,UAAU,GAAe,MAAM,IAAA,cAAQ,EAAC,GAAG,EAAE;gBACjD,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO;gBACvD,iBAAiB,EAAE,OAAO,CAAC,eAAe;gBAC1C,cAAc,EAAE,OAAO,CAAC,cAAc;aACvC,CAAC,CAAC;YAGH,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC;YACjC,MAAM,GAAG,UAAU,CAAC,MAAM,CAAC;YAC3B,QAAQ,GAAG,UAAU,CAAC,GAAG,CAAC;YAE1B,KAAK;gBACH,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,MAAM,KAAK,UAAU,CAAC,UAAU,EAAE,CAAC;YAE1E,OAAO,oBAAoB,CAAC;gBAC1B,GAAG;gBACH,IAAI;gBACJ,MAAM;gBACN,MAAM,EAAE,QAAQ;gBAChB,KAAK;gBACL,QAAQ;aACT,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,YAAY,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;YAG1E,IAAI,YAAY,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBACrC,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;iBAAM,IACL,YAAY,CAAC,QAAQ,CAAC,KAAK,CAAC;gBAC5B,YAAY,CAAC,QAAQ,CAAC,WAAW,CAAC,EAClC,CAAC;gBACD,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;iBAAM,IACL,YAAY,CAAC,QAAQ,CAAC,KAAK,CAAC;gBAC5B,YAAY,CAAC,QAAQ,CAAC,WAAW,CAAC,EAClC,CAAC;gBACD,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;iBAAM,IAAI,YAAY,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBACxC,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;iBAAM,CAAC;gBACN,MAAM,GAAG,CAAC,CAAC;YACb,CAAC;YAED,OAAO,oBAAoB,CAAC;gBAC1B,GAAG;gBACH,IAAI,EAAE,EAAE;gBACR,MAAM;gBACN,MAAM,EAAE,QAAQ;gBAChB,KAAK,EAAE,YAAY,IAAI,KAAK;gBAC5B,QAAQ;aACT,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAKD,KAAK,CAAC,MAAM,CACV,GAAW,EACX,UAA2B,EAAE;QAE7B,MAAM,aAAa,GAAG,EAAE,GAAG,IAAI,CAAC,cAAc,EAAE,GAAG,OAAO,EAAE,CAAC;QAC7D,IAAI,SAAS,GAAiB,IAAI,CAAC;QAGnC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YAG5D,IACE,MAAM,CAAC,MAAM,IAAI,GAAG;gBACpB,MAAM,CAAC,MAAM,GAAG,GAAG;gBACnB,CAAC,MAAM,CAAC,iBAAiB,EACzB,CAAC;gBACD,IAAA,UAAI,EAAC,WAAW,EAAE,GAAG,CAAC,CAAC;gBACvB,OAAO,MAAM,CAAC;YAChB,CAAC;YAGD,IAAI,MAAM,CAAC,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,iBAAiB,EAAE,CAAC;gBACvD,IAAA,UAAI,EAAC,SAAS,MAAM,CAAC,MAAM,SAAS,EAAE,GAAG,CAAC,CAAC;gBAC3C,OAAO,MAAM,CAAC;YAChB,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,SAAS;gBACP,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;YACpE,IAAA,UAAI,EACF,qDAAqD,GAAG,KAAK,SAAS,CAAC,OAAO,8BAA8B,CAC7G,CAAC;QACJ,CAAC;QAGD,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,mBAAmB,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YACnE,IAAA,WAAK,EACH,mCAAmC,GAAG,GAAG,EACzC,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CACjC,CAAC;YACF,OAAO,OAAO,CAAC;QACjB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,cAAc,GAClB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;YAExE,MAAM,CAAC,GAAG,+BAA+B,SAAS,EAAE,OAAO,IAAI,SAAS,gBAAgB,cAAc,CAAC,OAAO,SAAU,KAAe,CAAC,OAAO,EAAE,CAAC;YAClJ,IAAA,UAAI,EAAC,CAAC,CAAC,CAAC;YACR,OAAO,oBAAoB,CAAC;gBAC1B,GAAG;gBACH,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,CAAC;gBACT,MAAM,EAAE,QAAQ;gBAChB,KAAK,EAAE,CAAC;aACT,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAKD,KAAK,CAAC,cAAc,CAClB,IAAc,EACd,UAA2B,EAAE;QAE7B,MAAM,OAAO,GAAqB,EAAE,CAAC;QAErC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAC/C,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACvB,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,IAAI,CACV,oBAAoB,CAAC;oBACnB,GAAG;oBACH,IAAI,EAAE,EAAE;oBACR,MAAM,EAAE,CAAC;oBACT,MAAM,EAAE,OAAO;oBACf,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;iBAChE,CAAC,CACH,CAAC;YACJ,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAKD,KAAK,CAAC,OAAO;QACX,MAAM,IAAA,kBAAY,GAAE,CAAC;IACvB,CAAC;CACF;AAtOD,gCAsOC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ag-webscrape",
3
- "version": "0.0.16",
3
+ "version": "0.0.18",
4
4
  "author": "admin@gec.dev",
5
5
  "description": "TypeScript web scraper with Playwright fallback for anti-scraping protection",
6
6
  "main": "dist/index.js",
@@ -15,7 +15,7 @@
15
15
  "license": "MIT",
16
16
  "dependencies": {
17
17
  "@sparticuz/chromium": "^143.0.0",
18
- "ag-common": "^0.0.874",
18
+ "ag-common": "^0.0.875",
19
19
  "node-html-parser": "^7.0.1",
20
20
  "puppeteer": "^24.15.0",
21
21
  "puppeteer-core": "^24.15.0"