ag-webscrape 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,21 +10,17 @@ export interface ScrapingResult {
10
10
  url: string;
11
11
  html: string;
12
12
  status: number;
13
- method: 'fetch' | 'playwright';
13
+ method: 'fetch' | 'visual';
14
14
  error?: string;
15
15
  redirected?: boolean;
16
16
  finalUrl?: string;
17
17
  }
18
18
  export declare class WebScraper {
19
- private browser;
20
- private context;
21
- private page;
22
19
  private userAgent;
23
20
  private defaultOptions;
24
21
  constructor(options?: ScrapingOptions);
25
- private initializeBrowser;
26
22
  private fetchDirectly;
27
- private scrapeWithPlaywright;
23
+ private scrapeWithpuppeteer;
28
24
  private isErrorPage;
29
25
  scrape(url: string, options?: ScrapingOptions): Promise<ScrapingResult>;
30
26
  scrapeMultiple(urls: string[], options?: ScrapingOptions): Promise<ScrapingResult[]>;
@@ -1 +1 @@
1
- {"version":3,"file":"WebScraper.d.ts","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":"AAMA,MAAM,WAAW,eAAe;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,GAAG,YAAY,CAAC;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,qBAAa,UAAU;IACrB,OAAO,CAAC,OAAO,CAAwB;IACvC,OAAO,CAAC,OAAO,CAA+B;IAC9C,OAAO,CAAC,IAAI,CAAqB;IACjC,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,cAAc,CAAkB;gBAE5B,OAAO,GAAE,eAAoB;YAc3B,iBAAiB;YAuCjB,aAAa;YAiDb,oBAAoB;IA4ElC,OAAO,CAAC,WAAW;IAmBb,MAAM,CACV,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,CAAC;IA+CpB,cAAc,CAClB,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,EAAE,CAAC;IAwBtB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAgB/B"}
1
+ {"version":3,"file":"WebScraper.d.ts","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":"AAKA,MAAM,WAAW,eAAe;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,GAAG,QAAQ,CAAC;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,qBAAa,UAAU;IACrB,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,cAAc,CAAkB;gBAE5B,OAAO,GAAE,eAAoB;YAc3B,aAAa;YAiDb,mBAAmB;IAsEjC,OAAO,CAAC,WAAW;IAmBb,MAAM,CACV,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,CAAC;IA+CpB,cAAc,CAClB,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,EAAE,CAAC;IAwBtB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B"}
@@ -1,17 +1,10 @@
1
1
  "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
2
  Object.defineProperty(exports, "__esModule", { value: true });
6
3
  exports.WebScraper = void 0;
7
4
  const log_1 = require("ag-common/dist/common/helpers/log");
8
- const playwright_aws_lambda_1 = __importDefault(require("playwright-aws-lambda"));
9
- const playwright_core_1 = require("playwright-core");
5
+ const dom_1 = require("./helpers/dom");
10
6
  class WebScraper {
11
7
  constructor(options = {}) {
12
- this.browser = null;
13
- this.context = null;
14
- this.page = null;
15
8
  this.userAgent =
16
9
  'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36';
17
10
  this.defaultOptions = {
@@ -21,36 +14,6 @@ class WebScraper {
21
14
  ...options,
22
15
  };
23
16
  }
24
- async initializeBrowser() {
25
- if (!this.browser) {
26
- const isLambda = !!process.env.AWS_LAMBDA_FUNCTION_NAME;
27
- if (isLambda) {
28
- this.browser = await playwright_aws_lambda_1.default.launchChromium();
29
- }
30
- else {
31
- this.browser = await playwright_core_1.chromium.launch({
32
- headless: true,
33
- args: [
34
- '--no-sandbox',
35
- '--disable-setuid-sandbox',
36
- '--disable-dev-shm-usage',
37
- '--disable-accelerated-2d-canvas',
38
- '--disable-gpu',
39
- '--window-size=1920,1080',
40
- ],
41
- });
42
- }
43
- if (!this.browser) {
44
- throw new Error('Failed to initialize browser');
45
- }
46
- this.context = await this.browser.newContext({
47
- userAgent: this.defaultOptions.userAgent || this.userAgent.toString(),
48
- viewport: { width: 1920, height: 1080 },
49
- extraHTTPHeaders: this.defaultOptions.headers ?? {},
50
- });
51
- this.page = await this.context.newPage();
52
- }
53
- }
54
17
  async fetchDirectly(url, options) {
55
18
  const headers = {
56
19
  'User-Agent': options.userAgent || this.userAgent.toString(),
@@ -85,58 +48,57 @@ class WebScraper {
85
48
  throw error;
86
49
  }
87
50
  }
88
- async scrapeWithPlaywright(url, options) {
89
- await this.initializeBrowser();
90
- if (!this.page) {
91
- throw new Error('Failed to initialize Playwright page');
92
- }
93
- const page = this.page;
51
+ async scrapeWithpuppeteer(url, options) {
94
52
  let html = '';
95
- let status = 0;
53
+ let status = 200;
96
54
  let error;
55
+ let finalUrl = url;
97
56
  try {
98
- if (options.headers) {
99
- await page.setExtraHTTPHeaders(options.headers);
100
- }
101
- const response = await page.goto(url, {
102
- waitUntil: 'networkidle',
57
+ const domElement = await (0, dom_1.goToPage)(url, {
103
58
  timeout: options.timeout ?? this.defaultOptions.timeout,
59
+ wailUntilSelector: options.waitForSelector,
104
60
  });
105
- if (response) {
106
- status = response.status();
107
- if (status >= 400) {
108
- error = `HTTP ${status} error`;
109
- }
110
- }
111
- if (options.waitForSelector) {
112
- await page.waitForSelector(options.waitForSelector, {
113
- timeout: options.waitForTimeout ?? this.defaultOptions.waitForTimeout,
114
- });
115
- }
116
- else if (options.waitForTimeout) {
117
- await page.waitForTimeout(options.waitForTimeout);
118
- }
119
- html = await page.content();
61
+ html = domElement.outerHTML;
62
+ finalUrl = url;
120
63
  if (this.isErrorPage(html)) {
121
- error = error || 'Error page detected in HTML content';
64
+ error = 'Error page detected in HTML content';
65
+ status = 400;
122
66
  }
123
67
  return {
124
68
  url,
125
69
  html,
126
70
  status,
127
- method: 'playwright',
71
+ method: 'visual',
128
72
  error,
129
- finalUrl: page.url(),
73
+ finalUrl,
130
74
  };
131
75
  }
132
76
  catch (err) {
77
+ const errorMessage = err instanceof Error ? err.message : 'Unknown error';
78
+ if (errorMessage.includes('timeout')) {
79
+ status = 408;
80
+ }
81
+ else if (errorMessage.includes('404') ||
82
+ errorMessage.includes('not found')) {
83
+ status = 404;
84
+ }
85
+ else if (errorMessage.includes('403') ||
86
+ errorMessage.includes('forbidden')) {
87
+ status = 403;
88
+ }
89
+ else if (errorMessage.includes('500')) {
90
+ status = 500;
91
+ }
92
+ else {
93
+ status = 0;
94
+ }
133
95
  return {
134
96
  url,
135
97
  html: '',
136
- status: 0,
137
- method: 'playwright',
138
- error: err instanceof Error ? err.message : 'Unknown error',
139
- finalUrl: page.url(),
98
+ status,
99
+ method: 'visual',
100
+ error: errorMessage,
101
+ finalUrl,
140
102
  };
141
103
  }
142
104
  }
@@ -162,25 +124,25 @@ class WebScraper {
162
124
  if (result.status >= 200 && result.status < 300) {
163
125
  return result;
164
126
  }
165
- (0, log_1.warn)(`Direct fetch failed or anti-scraping detected for ${url}. Falling back to Playwright.`, JSON.stringify(result, null, 2));
127
+ (0, log_1.warn)(`Direct fetch failed or anti-scraping detected for ${url}. Falling back to puppeteer.`, JSON.stringify(result, null, 2));
166
128
  }
167
129
  catch (error) {
168
130
  lastError =
169
131
  error instanceof Error ? error : new Error('Unknown fetch error');
170
- (0, log_1.warn)(`Direct fetch failed for ${url}: ${lastError.message}. Falling back to Playwright.`);
132
+ (0, log_1.warn)(`Direct fetch failed for ${url}: ${lastError.message}. Falling back to puppeteer.`);
171
133
  }
172
134
  try {
173
- const result = await this.scrapeWithPlaywright(url, mergedOptions);
135
+ const result = await this.scrapeWithpuppeteer(url, mergedOptions);
174
136
  return result;
175
137
  }
176
138
  catch (error) {
177
- const playwrightError = error instanceof Error ? error : new Error('Unknown Playwright error');
139
+ const puppeteerError = error instanceof Error ? error : new Error('Unknown puppeteer error');
178
140
  return {
179
141
  url,
180
142
  html: '',
181
143
  status: 0,
182
- method: 'playwright',
183
- error: `Both methods failed. Fetch: ${lastError?.message || 'Unknown'}. Playwright: ${playwrightError.message}`,
144
+ method: 'visual',
145
+ error: `Both methods failed. Fetch: ${lastError?.message || 'Unknown'}. puppeteer: ${puppeteerError.message}`,
184
146
  };
185
147
  }
186
148
  }
@@ -204,18 +166,7 @@ class WebScraper {
204
166
  return results;
205
167
  }
206
168
  async dispose() {
207
- if (this.page) {
208
- await this.page.close();
209
- this.page = null;
210
- }
211
- if (this.context) {
212
- await this.context.close();
213
- this.context = null;
214
- }
215
- if (this.browser) {
216
- await this.browser.close();
217
- this.browser = null;
218
- }
169
+ await (0, dom_1.closeBrowser)();
219
170
  }
220
171
  }
221
172
  exports.WebScraper = WebScraper;
@@ -1 +1 @@
1
- {"version":3,"file":"WebScraper.js","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":";;;;;;AACA,2DAAyD;AACzD,kFAA+C;AAE/C,qDAA2C;AAqB3C,MAAa,UAAU;IAOrB,YAAY,UAA2B,EAAE;QANjC,YAAO,GAAmB,IAAI,CAAC;QAC/B,YAAO,GAA0B,IAAI,CAAC;QACtC,SAAI,GAAgB,IAAI,CAAC;QAK/B,IAAI,CAAC,SAAS;YACZ,iHAAiH,CAAC;QACpH,IAAI,CAAC,cAAc,GAAG;YACpB,OAAO,EAAE,KAAK;YACd,OAAO,EAAE,CAAC;YACV,cAAc,EAAE,IAAI;YACpB,GAAG,OAAO;SACX,CAAC;IACJ,CAAC;IAKO,KAAK,CAAC,iBAAiB;QAC7B,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YAElB,MAAM,QAAQ,GAAG,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC;YAExD,IAAI,QAAQ,EAAE,CAAC;gBACb,IAAI,CAAC,OAAO,GAAG,MAAM,+BAAU,CAAC,cAAc,EAAE,CAAC;YACnD,CAAC;iBAAM,CAAC;gBAEN,IAAI,CAAC,OAAO,GAAG,MAAM,0BAAQ,CAAC,MAAM,CAAC;oBACnC,QAAQ,EAAE,IAAI;oBACd,IAAI,EAAE;wBACJ,cAAc;wBACd,0BAA0B;wBAC1B,yBAAyB;wBACzB,iCAAiC;wBACjC,eAAe;wBACf,yBAAyB;qBAC1B;iBACF,CAAC,CAAC;YACL,CAAC;YAED,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;gBAClB,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;YAClD,CAAC;YAED,IAAI,CAAC,OAAO,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC;gBAC3C,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE;gBACrE,QAAQ,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;gBACvC,gBAAgB,EAAE,IAAI,CAAC,cAAc,CAAC,OAAO,IAAI,EAAE;aACpD,CAAC,CAAC;YAEH,IAAI,CAAC,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QAC3C,CAAC;IACH,CAAC;IAKO,KAAK,CAAC,aAAa,CACzB,GAAW,EACX,OAAwB;QAExB,MAAM,OAAO,GAAG;YACd,YAAY,EAAE,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE;YAC5D,MAAM,EACJ,4EAA4E;YAC9E,iBAAiB,EAAE,gBAAgB;YACnC,iBAAiB,EAAE,eAAe;YAClC,UAAU,EAAE,YAAY;YACxB,2BAA2B,EAAE,GAAG;YAChC,GAAG,OAAO,CAAC,OAAO;SACnB,CAAC;QAEF,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;QACzC,MAAM,SAAS,GAAG,UAAU,CAC1B,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EACxB,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC,OAAQ,CAChD,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,OAAO;gBACP,MAAM,EAAE,UAAU,CAAC,MAAM;gBACzB,QAAQ,EAAE,QAAQ;aACnB,CAAC,CAAC;YAEH,YAAY,CAAC,SAAS,CAAC,CAAC;YAExB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAEnC,OAAO;gBACL,GAAG;gBACH,IAAI;gBACJ,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,MAAM,EAAE,OAAO;gBACf,UAAU,EAAE,QAAQ,CAAC,UAAU;gBAC/B,QAAQ,EAAE,QAAQ,CAAC,GAAG;aACvB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,YAAY,CAAC,SAAS,CAAC,CAAC;YACxB,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAKO,KAAK,CAAC,oBAAoB,CAChC,GAAW,EACX,OAAwB;QAExB,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE/B,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;QAC1D,CAAC;QAED,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;QACvB,IAAI,IAAI,GAAG,EAAE,CAAC;QACd,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,KAAyB,CAAC;QAE9B,IAAI,CAAC;YAEH,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;gBACpB,MAAM,IAAI,CAAC,mBAAmB,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YAClD,CAAC;YAGD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE;gBACpC,SAAS,EAAE,aAAa;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO;aACxD,CAAC,CAAC;YAEH,IAAI,QAAQ,EAAE,CAAC;gBACb,MAAM,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;gBAG3B,IAAI,MAAM,IAAI,GAAG,EAAE,CAAC;oBAClB,KAAK,GAAG,QAAQ,MAAM,QAAQ,CAAC;gBACjC,CAAC;YACH,CAAC;YAGD,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;gBAC5B,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,eAAe,EAAE;oBAClD,OAAO,EAAE,OAAO,CAAC,cAAc,IAAI,IAAI,CAAC,cAAc,CAAC,cAAc;iBACtE,CAAC,CAAC;YACL,CAAC;iBAAM,IAAI,OAAO,CAAC,cAAc,EAAE,CAAC;gBAClC,MAAM,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;YACpD,CAAC;YAGD,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;YAG5B,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC3B,KAAK,GAAG,KAAK,IAAI,qCAAqC,CAAC;YACzD,CAAC;YAED,OAAO;gBACL,GAAG;gBACH,IAAI;gBACJ,MAAM;gBACN,MAAM,EAAE,YAAY;gBACpB,KAAK;gBACL,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE;aACrB,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO;gBACL,GAAG;gBACH,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,CAAC;gBACT,MAAM,EAAE,YAAY;gBACpB,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;gBAC3D,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE;aACrB,CAAC;QACJ,CAAC;IACH,CAAC;IAKO,WAAW,CAAC,IAAY;QAC9B,MAAM,aAAa,GAAG;YACpB,4BAA4B;YAC5B,4BAA4B;YAC5B,4BAA4B;YAC5B,gCAAgC;YAChC,gCAAgC;YAChC,mCAAmC;YACnC,sBAAsB;YACtB,sBAAsB;YACtB,sBAAsB;SACvB,CAAC;QAEF,OAAO,aAAa,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IAC7D,CAAC;IAKD,KAAK,CAAC,MAAM,CACV,GAAW,EACX,UAA2B,EAAE;QAE7B,MAAM,aAAa,GAAG,EAAE,GAAG,IAAI,CAAC,cAAc,EAAE,GAAG,OAAO,EAAE,CAAC;QAC7D,IAAI,SAAS,GAAiB,IAAI,CAAC;QAGnC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YAG5D,IAAI,MAAM,CAAC,MAAM,IAAI,GAAG,IAAI,MAAM,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;gBAChD,OAAO,MAAM,CAAC;YAChB,CAAC;YAGD,IAAA,UAAI,EACF,qDAAqD,GAAG,+BAA+B,EACvF,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAChC,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,SAAS;gBACP,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;YACpE,IAAA,UAAI,EACF,2BAA2B,GAAG,KAAK,SAAS,CAAC,OAAO,+BAA+B,CACpF,CAAC;QACJ,CAAC;QAGD,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,oBAAoB,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YACnE,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,eAAe,GACnB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,0BAA0B,CAAC,CAAC;YAEzE,OAAO;gBACL,GAAG;gBACH,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,CAAC;gBACT,MAAM,EAAE,YAAY;gBACpB,KAAK,EAAE,+BAA+B,SAAS,EAAE,OAAO,IAAI,SAAS,iBAAiB,eAAe,CAAC,OAAO,EAAE;aAChH,CAAC;QACJ,CAAC;IACH,CAAC;IAKD,KAAK,CAAC,cAAc,CAClB,IAAc,EACd,UAA2B,EAAE;QAE7B,MAAM,OAAO,GAAqB,EAAE,CAAC;QAErC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAC/C,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACvB,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,IAAI,CAAC;oBACX,GAAG;oBACH,IAAI,EAAE,EAAE;oBACR,MAAM,EAAE,CAAC;oBACT,MAAM,EAAE,OAAO;oBACf,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;iBAChE,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAKD,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YACd,MAAM,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YACxB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACnB,CAAC;QAED,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YAC3B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;QAED,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YAC3B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;IACH,CAAC;CACF;AAzSD,gCAySC"}
1
+ {"version":3,"file":"WebScraper.js","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":";;;AACA,2DAAyD;AAEzD,uCAAuD;AAqBvD,MAAa,UAAU;IAIrB,YAAY,UAA2B,EAAE;QACvC,IAAI,CAAC,SAAS;YACZ,iHAAiH,CAAC;QACpH,IAAI,CAAC,cAAc,GAAG;YACpB,OAAO,EAAE,KAAK;YACd,OAAO,EAAE,CAAC;YACV,cAAc,EAAE,IAAI;YACpB,GAAG,OAAO;SACX,CAAC;IACJ,CAAC;IAKO,KAAK,CAAC,aAAa,CACzB,GAAW,EACX,OAAwB;QAExB,MAAM,OAAO,GAAG;YACd,YAAY,EAAE,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE;YAC5D,MAAM,EACJ,4EAA4E;YAC9E,iBAAiB,EAAE,gBAAgB;YACnC,iBAAiB,EAAE,eAAe;YAClC,UAAU,EAAE,YAAY;YACxB,2BAA2B,EAAE,GAAG;YAChC,GAAG,OAAO,CAAC,OAAO;SACnB,CAAC;QAEF,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;QACzC,MAAM,SAAS,GAAG,UAAU,CAC1B,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EACxB,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC,OAAQ,CAChD,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,OAAO;gBACP,MAAM,EAAE,UAAU,CAAC,MAAM;gBACzB,QAAQ,EAAE,QAAQ;aACnB,CAAC,CAAC;YAEH,YAAY,CAAC,SAAS,CAAC,CAAC;YAExB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAEnC,OAAO;gBACL,GAAG;gBACH,IAAI;gBACJ,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,MAAM,EAAE,OAAO;gBACf,UAAU,EAAE,QAAQ,CAAC,UAAU;gBAC/B,QAAQ,EAAE,QAAQ,CAAC,GAAG;aACvB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,YAAY,CAAC,SAAS,CAAC,CAAC;YACxB,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAKO,KAAK,CAAC,mBAAmB,CAC/B,GAAW,EACX,OAAwB;QAExB,IAAI,IAAI,GAAG,EAAE,CAAC;QACd,IAAI,MAAM,GAAG,GAAG,CAAC;QACjB,IAAI,KAAyB,CAAC;QAC9B,IAAI,QAAQ,GAAG,GAAG,CAAC;QAEnB,IAAI,CAAC;YAEH,MAAM,UAAU,GAAG,MAAM,IAAA,cAAQ,EAAC,GAAG,EAAE;gBACrC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO;gBACvD,iBAAiB,EAAE,OAAO,CAAC,eAAe;aAC3C,CAAC,CAAC;YAGH,IAAI,GAAG,UAAU,CAAC,SAAS,CAAC;YAC5B,QAAQ,GAAG,GAAG,CAAC;YAGf,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC3B,KAAK,GAAG,qCAAqC,CAAC;gBAC9C,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;YAED,OAAO;gBACL,GAAG;gBACH,IAAI;gBACJ,MAAM;gBACN,MAAM,EAAE,QAAQ;gBAChB,KAAK;gBACL,QAAQ;aACT,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,YAAY,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;YAG1E,IAAI,YAAY,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBACrC,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;iBAAM,IACL,YAAY,CAAC,QAAQ,CAAC,KAAK,CAAC;gBAC5B,YAAY,CAAC,QAAQ,CAAC,WAAW,CAAC,EAClC,CAAC;gBACD,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;iBAAM,IACL,YAAY,CAAC,QAAQ,CAAC,KAAK,CAAC;gBAC5B,YAAY,CAAC,QAAQ,CAAC,WAAW,CAAC,EAClC,CAAC;gBACD,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;iBAAM,IAAI,YAAY,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBACxC,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;iBAAM,CAAC;gBACN,MAAM,GAAG,CAAC,CAAC;YACb,CAAC;YAED,OAAO;gBACL,GAAG;gBACH,IAAI,EAAE,EAAE;gBACR,MAAM;gBACN,MAAM,EAAE,QAAQ;gBAChB,KAAK,EAAE,YAAY;gBACnB,QAAQ;aACT,CAAC;QACJ,CAAC;IACH,CAAC;IAKO,WAAW,CAAC,IAAY;QAC9B,MAAM,aAAa,GAAG;YACpB,4BAA4B;YAC5B,4BAA4B;YAC5B,4BAA4B;YAC5B,gCAAgC;YAChC,gCAAgC;YAChC,mCAAmC;YACnC,sBAAsB;YACtB,sBAAsB;YACtB,sBAAsB;SACvB,CAAC;QAEF,OAAO,aAAa,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IAC7D,CAAC;IAKD,KAAK,CAAC,MAAM,CACV,GAAW,EACX,UAA2B,EAAE;QAE7B,MAAM,aAAa,GAAG,EAAE,GAAG,IAAI,CAAC,cAAc,EAAE,GAAG,OAAO,EAAE,CAAC;QAC7D,IAAI,SAAS,GAAiB,IAAI,CAAC;QAGnC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YAG5D,IAAI,MAAM,CAAC,MAAM,IAAI,GAAG,IAAI,MAAM,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;gBAChD,OAAO,MAAM,CAAC;YAChB,CAAC;YAGD,IAAA,UAAI,EACF,qDAAqD,GAAG,8BAA8B,EACtF,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAChC,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,SAAS;gBACP,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;YACpE,IAAA,UAAI,EACF,2BAA2B,GAAG,KAAK,SAAS,CAAC,OAAO,8BAA8B,CACnF,CAAC;QACJ,CAAC;QAGD,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,mBAAmB,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YAClE,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,cAAc,GAClB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;YAExE,OAAO;gBACL,GAAG;gBACH,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,CAAC;gBACT,MAAM,EAAE,QAAQ;gBAChB,KAAK,EAAE,+BAA+B,SAAS,EAAE,OAAO,IAAI,SAAS,gBAAgB,cAAc,CAAC,OAAO,EAAE;aAC9G,CAAC;QACJ,CAAC;IACH,CAAC;IAKD,KAAK,CAAC,cAAc,CAClB,IAAc,EACd,UAA2B,EAAE;QAE7B,MAAM,OAAO,GAAqB,EAAE,CAAC;QAErC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAC/C,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACvB,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,IAAI,CAAC;oBACX,GAAG;oBACH,IAAI,EAAE,EAAE;oBACR,MAAM,EAAE,CAAC;oBACT,MAAM,EAAE,OAAO;oBACf,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;iBAChE,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAKD,KAAK,CAAC,OAAO;QACX,MAAM,IAAA,kBAAY,GAAE,CAAC;IACvB,CAAC;CACF;AA5OD,gCA4OC"}
@@ -0,0 +1,8 @@
1
+ import type { HTMLElement } from 'node-html-parser';
2
+ export declare const launchBrowser: () => Promise<void>;
3
+ export declare const closeBrowser: () => Promise<void>;
4
+ export declare const goToPage: (url: string | URL, opt?: {
5
+ timeout?: number;
6
+ wailUntilSelector?: string;
7
+ }) => Promise<HTMLElement>;
8
+ //# sourceMappingURL=dom.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dom.d.ts","sourceRoot":"","sources":["../../src/helpers/dom.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAiCpD,eAAO,MAAM,aAAa,qBAqDzB,CAAC;AAEF,eAAO,MAAM,YAAY,qBAUxB,CAAC;AAEF,eAAO,MAAM,QAAQ,GACnB,KAAK,MAAM,GAAG,GAAG,EACjB,MAAM;IAEJ,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B,KACA,OAAO,CAAC,WAAW,CAwErB,CAAC"}
@@ -0,0 +1,165 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.goToPage = exports.closeBrowser = exports.launchBrowser = void 0;
7
+ const chromium_1 = __importDefault(require("@sparticuz/chromium"));
8
+ const log_1 = require("ag-common/dist/common/helpers/log");
9
+ const fs_1 = require("fs");
10
+ const node_html_parser_1 = require("node-html-parser");
11
+ const puppeteer_core_1 = require("puppeteer-core");
12
+ let browser;
13
+ const getSystemChromePath = async () => {
14
+ const platform = process.platform;
15
+ if (platform === 'win32') {
16
+ const possiblePaths = [
17
+ 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe',
18
+ 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',
19
+ 'C:\\Program Files\\Microsoft\\Edge\\Application\\msedge.exe',
20
+ 'C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe',
21
+ ];
22
+ for (const path of possiblePaths) {
23
+ try {
24
+ (0, fs_1.accessSync)(path);
25
+ return path;
26
+ }
27
+ catch {
28
+ }
29
+ }
30
+ }
31
+ const ret = await chromium_1.default.executablePath();
32
+ return ret;
33
+ };
34
+ const launchBrowser = async () => {
35
+ const executablePath = await getSystemChromePath();
36
+ const opt = {
37
+ defaultViewport: {
38
+ height: 1920,
39
+ width: 1080,
40
+ },
41
+ args: chromium_1.default.args,
42
+ headless: process.env.HEADLESS === 'false' ? false : true,
43
+ ignoreHTTPSErrors: true,
44
+ devtools: false,
45
+ executablePath,
46
+ };
47
+ if (!opt.args) {
48
+ opt.args = [];
49
+ }
50
+ opt.args.push('--disable-features=AudioServiceOutOfProcess');
51
+ opt.args.push('--disable-features=AudioServiceOutOfProcessKillAtHang');
52
+ opt.args.push('--disable-software-rasterizer');
53
+ opt.args.push('--disable-gpu');
54
+ opt.args.push('--disable-dev-shm-usage');
55
+ opt.args.push('--disable-setuid-sandbox');
56
+ opt.args.push('--disable-web-security');
57
+ opt.args.push('--disable-features=site-per-process');
58
+ opt.args.push('--disable-features=VizDisplayCompositor');
59
+ opt.args.push('--disable-extensions');
60
+ opt.args.push('--disable-plugins');
61
+ opt.args.push('--disable-default-apps');
62
+ opt.args.push('--disable-background-timer-throttling');
63
+ opt.args.push('--disable-backgrounding-occluded-windows');
64
+ opt.args.push('--disable-renderer-backgrounding');
65
+ opt.args.push('--disable-hang-monitor');
66
+ opt.args.push('--disable-ipc-flooding-protection');
67
+ opt.args.push('--disable-prompt-on-repost');
68
+ opt.args.push('--disable-client-side-phishing-detection');
69
+ opt.args.push('--disable-component-update');
70
+ opt.args.push('--disable-domain-reliability');
71
+ opt.args.push('--disable-features=TranslateUI');
72
+ opt.args.push('--disable-features=BlinkGenPropertyTrees');
73
+ opt.args.push('--enable-features=NetworkService,NetworkServiceInProcess');
74
+ (0, log_1.trace)('launch browser, opt=', opt);
75
+ try {
76
+ if (browser?.close) {
77
+ await browser?.close();
78
+ }
79
+ }
80
+ catch {
81
+ }
82
+ browser = (await (0, puppeteer_core_1.launch)(opt));
83
+ };
84
+ exports.launchBrowser = launchBrowser;
85
+ const closeBrowser = async () => {
86
+ try {
87
+ if (!browser) {
88
+ return;
89
+ }
90
+ await browser.close();
91
+ }
92
+ catch (e) {
93
+ (0, log_1.warn)('error closing browser:', e);
94
+ }
95
+ };
96
+ exports.closeBrowser = closeBrowser;
97
+ const goToPage = async (url, opt) => {
98
+ let errorRetry = false;
99
+ do {
100
+ try {
101
+ if (!browser) {
102
+ await (0, exports.launchBrowser)();
103
+ }
104
+ (0, log_1.debug)('go to page:' + url);
105
+ const page = await browser.newPage();
106
+ let t = opt?.timeout ?? 5000;
107
+ if (errorRetry) {
108
+ t += 5000;
109
+ }
110
+ const urlx = typeof url === 'string' ? url : url.toString();
111
+ if (!opt?.wailUntilSelector) {
112
+ await page.goto(urlx, {
113
+ waitUntil: ['load', 'domcontentloaded'],
114
+ timeout: t,
115
+ });
116
+ }
117
+ else {
118
+ await page.goto(urlx, {
119
+ waitUntil: ['load'],
120
+ timeout: t,
121
+ });
122
+ await page.waitForSelector(opt.wailUntilSelector, {
123
+ timeout: t,
124
+ visible: true,
125
+ });
126
+ }
127
+ const content = await page.content();
128
+ const doc = (0, node_html_parser_1.parse)(content);
129
+ doc.querySelectorAll('.visually-hidden')?.forEach((n) => n.remove());
130
+ await page.close();
131
+ errorRetry = false;
132
+ return doc;
133
+ }
134
+ catch (err) {
135
+ const e = err;
136
+ if (errorRetry) {
137
+ (0, log_1.error)('retry already, bail', url, e.toString());
138
+ throw e;
139
+ }
140
+ if (e.toString().includes('has disconnected') ||
141
+ e.toString().includes('timeout of') ||
142
+ e.toString().includes('frame was detached') ||
143
+ e.toString().includes('Navigating frame was detached') ||
144
+ e.toString().includes('Protocol error') ||
145
+ e.toString().includes('Target closed')) {
146
+ try {
147
+ (0, log_1.warn)('retry:', url, e.toString());
148
+ await (0, exports.launchBrowser)();
149
+ errorRetry = true;
150
+ }
151
+ catch (ex) {
152
+ (0, log_1.error)('error relaunching browser:', ex);
153
+ throw ex;
154
+ }
155
+ }
156
+ else {
157
+ (0, log_1.error)(`scrape error:${e}`);
158
+ throw e;
159
+ }
160
+ }
161
+ } while (errorRetry);
162
+ throw new Error('too many errors');
163
+ };
164
+ exports.goToPage = goToPage;
165
+ //# sourceMappingURL=dom.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dom.js","sourceRoot":"","sources":["../../src/helpers/dom.ts"],"names":[],"mappings":";;;;;;AACA,mEAA2C;AAC3C,2DAA8E;AAC9E,2BAAgC;AAEhC,uDAAyC;AAEzC,mDAAwC;AAExC,IAAI,OAA4B,CAAC;AAEjC,MAAM,mBAAmB,GAAG,KAAK,IAAI,EAAE;IACrC,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;IAElC,IAAI,QAAQ,KAAK,OAAO,EAAE,CAAC;QAEzB,MAAM,aAAa,GAAG;YACpB,4DAA4D;YAC5D,kEAAkE;YAClE,6DAA6D;YAC7D,mEAAmE;SACpE,CAAC;QAEF,KAAK,MAAM,IAAI,IAAI,aAAa,EAAE,CAAC;YACjC,IAAI,CAAC;gBACH,IAAA,eAAU,EAAC,IAAI,CAAC,CAAC;gBACjB,OAAO,IAAI,CAAC;YACd,CAAC;YAAC,MAAM,CAAC;YAET,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,GAAG,GAAG,MAAM,kBAAQ,CAAC,cAAc,EAAE,CAAC;IAC5C,OAAO,GAAG,CAAC;AACb,CAAC,CAAC;AAEK,MAAM,aAAa,GAAG,KAAK,IAAI,EAAE;IACtC,MAAM,cAAc,GAAG,MAAM,mBAAmB,EAAE,CAAC;IACnD,MAAM,GAAG,GAAG;QACV,eAAe,EAAE;YACf,MAAM,EAAE,IAAI;YACZ,KAAK,EAAE,IAAI;SACZ;QACD,IAAI,EAAE,kBAAQ,CAAC,IAAI;QACnB,QAAQ,EAAE,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI;QACzD,iBAAiB,EAAE,IAAI;QACvB,QAAQ,EAAE,KAAK;QACf,cAAc;KACf,CAAC;IAEF,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QACd,GAAG,CAAC,IAAI,GAAG,EAAE,CAAC;IAChB,CAAC;IAGD,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,6CAA6C,CAAC,CAAC;IAC7D,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,uDAAuD,CAAC,CAAC;IACvE,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;IAC/C,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;IAC/B,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC;IACzC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;IAC1C,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;IACxC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;IACrD,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;IACzD,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;IACtC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;IACnC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;IACxC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,uCAAuC,CAAC,CAAC;IACvD,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,0CAA0C,CAAC,CAAC;IAC1D,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;IAClD,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;IACxC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;IACnD,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;IAC5C,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,0CAA0C,CAAC,CAAC;IAC1D,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;IAC5C,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;IAC9C,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;IAChD,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,0CAA0C,CAAC,CAAC;IAC1D,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,0DAA0D,CAAC,CAAC;IAE1E,IAAA,WAAK,EAAC,sBAAsB,EAAE,GAAG,CAAC,CAAC;IACnC,IAAI,CAAC;QACH,IAAI,OAAO,EAAE,KAAK,EAAE,CAAC;YACnB,MAAM,OAAO,EAAE,KAAK,EAAE,CAAC;QACzB,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;IAET,CAAC;IACD,OAAO,GAAG,CAAC,MAAM,IAAA,uBAAM,EAAC,GAAG,CAAC,CAAuB,CAAC;AACtD,CAAC,CAAC;AArDW,QAAA,aAAa,iBAqDxB;AAEK,MAAM,YAAY,GAAG,KAAK,IAAI,EAAE;IACrC,IAAI,CAAC;QACH,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,OAAO;QACT,CAAC;QAED,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;IACxB,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,IAAA,UAAI,EAAC,wBAAwB,EAAE,CAAC,CAAC,CAAC;IACpC,CAAC;AACH,CAAC,CAAC;AAVW,QAAA,YAAY,gBAUvB;AAEK,MAAM,QAAQ,GAAG,KAAK,EAC3B,GAAiB,EACjB,GAIC,EACqB,EAAE;IACxB,IAAI,UAAU,GAAG,KAAK,CAAC;IACvB,GAAG,CAAC;QACF,IAAI,CAAC;YACH,IAAI,CAAC,OAAO,EAAE,CAAC;gBACb,MAAM,IAAA,qBAAa,GAAE,CAAC;YACxB,CAAC;YAED,IAAA,WAAK,EAAC,aAAa,GAAG,GAAG,CAAC,CAAC;YAC3B,MAAM,IAAI,GAAG,MAAM,OAAQ,CAAC,OAAO,EAAE,CAAC;YACtC,IAAI,CAAC,GAAG,GAAG,EAAE,OAAO,IAAI,IAAI,CAAC;YAC7B,IAAI,UAAU,EAAE,CAAC;gBACf,CAAC,IAAI,IAAI,CAAC;YACZ,CAAC;YAED,MAAM,IAAI,GAAG,OAAO,GAAG,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;YAE5D,IAAI,CAAC,GAAG,EAAE,iBAAiB,EAAE,CAAC;gBAE5B,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;oBACpB,SAAS,EAAE,CAAC,MAAM,EAAE,kBAAkB,CAAC;oBACvC,OAAO,EAAE,CAAC;iBACX,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;oBACpB,SAAS,EAAE,CAAC,MAAM,CAAC;oBACnB,OAAO,EAAE,CAAC;iBACX,CAAC,CAAC;gBACH,MAAM,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,iBAAiB,EAAE;oBAChD,OAAO,EAAE,CAAC;oBACV,OAAO,EAAE,IAAI;iBACd,CAAC,CAAC;YACL,CAAC;YAED,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;YACrC,MAAM,GAAG,GAAG,IAAA,wBAAK,EAAC,OAAO,CAAC,CAAC;YAE3B,GAAG,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;YACrE,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;YACnB,UAAU,GAAG,KAAK,CAAC;YACnB,OAAO,GAAG,CAAC;QACb,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,CAAC,GAAG,GAAY,CAAC;YACvB,IAAI,UAAU,EAAE,CAAC;gBACf,IAAA,WAAK,EAAC,qBAAqB,EAAE,GAAG,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC;gBAChD,MAAM,CAAC,CAAC;YACV,CAAC;YAGD,IACE,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,kBAAkB,CAAC;gBACzC,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC;gBACnC,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,oBAAoB,CAAC;gBAC3C,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,+BAA+B,CAAC;gBACtD,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,gBAAgB,CAAC;gBACvC,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,eAAe,CAAC,EACtC,CAAC;gBACD,IAAI,CAAC;oBACH,IAAA,UAAI,EAAC,QAAQ,EAAE,GAAG,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC;oBAClC,MAAM,IAAA,qBAAa,GAAE,CAAC;oBACtB,UAAU,GAAG,IAAI,CAAC;gBACpB,CAAC;gBAAC,OAAO,EAAE,EAAE,CAAC;oBACZ,IAAA,WAAK,EAAC,4BAA4B,EAAE,EAAE,CAAC,CAAC;oBACxC,MAAM,EAAE,CAAC;gBACX,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,IAAA,WAAK,EAAC,gBAAgB,CAAC,EAAE,CAAC,CAAC;gBAC3B,MAAM,CAAC,CAAC;YACV,CAAC;QACH,CAAC;IACH,CAAC,QAAQ,UAAU,EAAE;IACrB,MAAM,IAAI,KAAK,CAAC,iBAAiB,CAAC,CAAC;AACrC,CAAC,CAAC;AA/EW,QAAA,QAAQ,YA+EnB"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ag-webscrape",
3
- "version": "0.0.4",
3
+ "version": "0.0.6",
4
4
  "author": "admin@gec.dev",
5
5
  "description": "TypeScript web scraper with Playwright fallback for anti-scraping protection",
6
6
  "main": "dist/index.js",
@@ -14,16 +14,18 @@
14
14
  ],
15
15
  "license": "MIT",
16
16
  "dependencies": {
17
- "ag-common": "^0.0.752",
18
- "playwright-core": "1.54.1",
19
- "playwright-aws-lambda": "^0.11.0"
17
+ "@sparticuz/chromium": "138.0.1",
18
+ "ag-common": "0.0.752",
19
+ "node-html-parser": "^7.0.1",
20
+ "puppeteer": "^24.12.1",
21
+ "puppeteer-core": "^24.12.1"
20
22
  },
21
23
  "devDependencies": {
22
24
  "@types/node": "24.0.13",
23
25
  "eslint": "9.31.0",
24
26
  "eslint-config-e7npm": "0.1.23",
25
- "typescript": "5.8.3",
26
- "tsx": "^4.19.1"
27
+ "tsx": "4.20.3",
28
+ "typescript": "5.8.3"
27
29
  },
28
30
  "files": [
29
31
  "dist/**/*",