ag-webscrape 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,7 +23,6 @@ export declare class WebScraper {
23
23
  private defaultOptions;
24
24
  constructor(options?: ScrapingOptions);
25
25
  private initializeBrowser;
26
- private detectAntiScraping;
27
26
  private fetchDirectly;
28
27
  private scrapeWithPlaywright;
29
28
  private isErrorPage;
@@ -1 +1 @@
1
- {"version":3,"file":"WebScraper.d.ts","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,eAAe;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,GAAG,YAAY,CAAC;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,qBAAa,UAAU;IACrB,OAAO,CAAC,OAAO,CAAwB;IACvC,OAAO,CAAC,OAAO,CAA+B;IAC9C,OAAO,CAAC,IAAI,CAAqB;IACjC,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,cAAc,CAAkB;gBAE5B,OAAO,GAAE,eAAoB;YAc3B,iBAAiB;IA2B/B,OAAO,CAAC,kBAAkB;YA0BZ,aAAa;YAiDb,oBAAoB;IA4ElC,OAAO,CAAC,WAAW;IAmBb,MAAM,CACV,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,CAAC;IAmDpB,cAAc,CAClB,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,EAAE,CAAC;IAwBtB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAgB/B"}
1
+ {"version":3,"file":"WebScraper.d.ts","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":"AAMA,MAAM,WAAW,eAAe;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,GAAG,YAAY,CAAC;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,qBAAa,UAAU;IACrB,OAAO,CAAC,OAAO,CAAwB;IACvC,OAAO,CAAC,OAAO,CAA+B;IAC9C,OAAO,CAAC,IAAI,CAAqB;IACjC,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,cAAc,CAAkB;gBAE5B,OAAO,GAAE,eAAoB;YAc3B,iBAAiB;YAuCjB,aAAa;YAiDb,oBAAoB;IA4ElC,OAAO,CAAC,WAAW;IAmBb,MAAM,CACV,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,CAAC;IA+CpB,cAAc,CAClB,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,EAAE,CAAC;IAwBtB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAgB/B"}
@@ -1,8 +1,12 @@
1
1
  "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
2
5
  Object.defineProperty(exports, "__esModule", { value: true });
3
6
  exports.WebScraper = void 0;
4
7
  const log_1 = require("ag-common/dist/common/helpers/log");
5
- const playwright_1 = require("playwright");
8
+ const playwright_aws_lambda_1 = __importDefault(require("playwright-aws-lambda"));
9
+ const playwright_core_1 = require("playwright-core");
6
10
  class WebScraper {
7
11
  constructor(options = {}) {
8
12
  this.browser = null;
@@ -19,46 +23,34 @@ class WebScraper {
19
23
  }
20
24
  async initializeBrowser() {
21
25
  if (!this.browser) {
22
- this.browser = await playwright_1.chromium.launch({
23
- headless: true,
24
- args: [
25
- '--no-sandbox',
26
- '--disable-setuid-sandbox',
27
- '--disable-dev-shm-usage',
28
- '--disable-accelerated-2d-canvas',
29
- '--disable-gpu',
30
- '--window-size=1920,1080',
31
- ],
32
- });
26
+ const isLambda = !!process.env.AWS_LAMBDA_FUNCTION_NAME;
27
+ if (isLambda) {
28
+ this.browser = await playwright_aws_lambda_1.default.launchChromium();
29
+ }
30
+ else {
31
+ this.browser = await playwright_core_1.chromium.launch({
32
+ headless: true,
33
+ args: [
34
+ '--no-sandbox',
35
+ '--disable-setuid-sandbox',
36
+ '--disable-dev-shm-usage',
37
+ '--disable-accelerated-2d-canvas',
38
+ '--disable-gpu',
39
+ '--window-size=1920,1080',
40
+ ],
41
+ });
42
+ }
43
+ if (!this.browser) {
44
+ throw new Error('Failed to initialize browser');
45
+ }
33
46
  this.context = await this.browser.newContext({
34
47
  userAgent: this.defaultOptions.userAgent || this.userAgent.toString(),
35
48
  viewport: { width: 1920, height: 1080 },
36
- extraHTTPHeaders: this.defaultOptions.headers || {},
49
+ extraHTTPHeaders: this.defaultOptions.headers ?? {},
37
50
  });
38
51
  this.page = await this.context.newPage();
39
52
  }
40
53
  }
41
- detectAntiScraping(html) {
42
- const antiScrapingPatterns = [
43
- /cloudflare/i,
44
- /distil.networks/i,
45
- /perimeterx/i,
46
- /datadome/i,
47
- /akamai/i,
48
- /bot.protection/i,
49
- /please.enable.javascript/i,
50
- /access.denied/i,
51
- /blocked/i,
52
- /captcha/i,
53
- /challenge/i,
54
- /security.check/i,
55
- /rate.limit/i,
56
- /temporarily.unavailable/i,
57
- ];
58
- const result = antiScrapingPatterns.some((pattern) => pattern.test(html));
59
- (0, log_1.info)('Anti-scraping detected:', result);
60
- return result;
61
- }
62
54
  async fetchDirectly(url, options) {
63
55
  const headers = {
64
56
  'User-Agent': options.userAgent || this.userAgent.toString(),
@@ -70,7 +62,7 @@ class WebScraper {
70
62
  ...options.headers,
71
63
  };
72
64
  const controller = new AbortController();
73
- const timeoutId = setTimeout(() => controller.abort(), options.timeout || this.defaultOptions.timeout);
65
+ const timeoutId = setTimeout(() => controller.abort(), options.timeout ?? this.defaultOptions.timeout);
74
66
  try {
75
67
  const response = await fetch(url, {
76
68
  headers,
@@ -108,7 +100,7 @@ class WebScraper {
108
100
  }
109
101
  const response = await page.goto(url, {
110
102
  waitUntil: 'networkidle',
111
- timeout: options.timeout || this.defaultOptions.timeout,
103
+ timeout: options.timeout ?? this.defaultOptions.timeout,
112
104
  });
113
105
  if (response) {
114
106
  status = response.status();
@@ -118,7 +110,7 @@ class WebScraper {
118
110
  }
119
111
  if (options.waitForSelector) {
120
112
  await page.waitForSelector(options.waitForSelector, {
121
- timeout: options.waitForTimeout || this.defaultOptions.waitForTimeout,
113
+ timeout: options.waitForTimeout ?? this.defaultOptions.waitForTimeout,
122
114
  });
123
115
  }
124
116
  else if (options.waitForTimeout) {
@@ -167,9 +159,7 @@ class WebScraper {
167
159
  let lastError = null;
168
160
  try {
169
161
  const result = await this.fetchDirectly(url, mergedOptions);
170
- if (result.status >= 200 &&
171
- result.status < 300 &&
172
- !this.detectAntiScraping(result.html)) {
162
+ if (result.status >= 200 && result.status < 300) {
173
163
  return result;
174
164
  }
175
165
  (0, log_1.warn)(`Direct fetch failed or anti-scraping detected for ${url}. Falling back to Playwright.`, JSON.stringify(result, null, 2));
@@ -1 +1 @@
1
- {"version":3,"file":"WebScraper.js","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":";;;AAAA,2DAA+D;AAE/D,2CAAsC;AAqBtC,MAAa,UAAU;IAOrB,YAAY,UAA2B,EAAE;QANjC,YAAO,GAAmB,IAAI,CAAC;QAC/B,YAAO,GAA0B,IAAI,CAAC;QACtC,SAAI,GAAgB,IAAI,CAAC;QAK/B,IAAI,CAAC,SAAS;YACZ,iHAAiH,CAAC;QACpH,IAAI,CAAC,cAAc,GAAG;YACpB,OAAO,EAAE,KAAK;YACd,OAAO,EAAE,CAAC;YACV,cAAc,EAAE,IAAI;YACpB,GAAG,OAAO;SACX,CAAC;IACJ,CAAC;IAKO,KAAK,CAAC,iBAAiB;QAC7B,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YAClB,IAAI,CAAC,OAAO,GAAG,MAAM,qBAAQ,CAAC,MAAM,CAAC;gBACnC,QAAQ,EAAE,IAAI;gBACd,IAAI,EAAE;oBACJ,cAAc;oBACd,0BAA0B;oBAC1B,yBAAyB;oBACzB,iCAAiC;oBACjC,eAAe;oBACf,yBAAyB;iBAC1B;aACF,CAAC,CAAC;YAEH,IAAI,CAAC,OAAO,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC;gBAC3C,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE;gBACrE,QAAQ,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;gBACvC,gBAAgB,EAAE,IAAI,CAAC,cAAc,CAAC,OAAO,IAAI,EAAE;aACpD,CAAC,CAAC;YAEH,IAAI,CAAC,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QAC3C,CAAC;IACH,CAAC;IAKO,kBAAkB,CAAC,IAAY;QACrC,MAAM,oBAAoB,GAAG;YAC3B,aAAa;YACb,kBAAkB;YAClB,aAAa;YACb,WAAW;YACX,SAAS;YACT,iBAAiB;YACjB,2BAA2B;YAC3B,gBAAgB;YAChB,UAAU;YACV,UAAU;YACV,YAAY;YACZ,iBAAiB;YACjB,aAAa;YACb,0BAA0B;SAC3B,CAAC;QAEF,MAAM,MAAM,GAAG,oBAAoB,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;QAC1E,IAAA,UAAI,EAAC,yBAAyB,EAAE,MAAM,CAAC,CAAC;QACxC,OAAO,MAAM,CAAC;IAChB,CAAC;IAKO,KAAK,CAAC,aAAa,CACzB,GAAW,EACX,OAAwB;QAExB,MAAM,OAAO,GAAG;YACd,YAAY,EAAE,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE;YAC5D,MAAM,EACJ,4EAA4E;YAC9E,iBAAiB,EAAE,gBAAgB;YACnC,iBAAiB,EAAE,eAAe;YAClC,UAAU,EAAE,YAAY;YACxB,2BAA2B,EAAE,GAAG;YAChC,GAAG,OAAO,CAAC,OAAO;SACnB,CAAC;QAEF,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;QACzC,MAAM,SAAS,GAAG,UAAU,CAC1B,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EACxB,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC,OAAQ,CAChD,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,OAAO;gBACP,MAAM,EAAE,UAAU,CAAC,MAAM;gBACzB,QAAQ,EAAE,QAAQ;aACnB,CAAC,CAAC;YAEH,YAAY,CAAC,SAAS,CAAC,CAAC;YAExB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAEnC,OAAO;gBACL,GAAG;gBACH,IAAI;gBACJ,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,MAAM,EAAE,OAAO;gBACf,UAAU,EAAE,QAAQ,CAAC,UAAU;gBAC/B,QAAQ,EAAE,QAAQ,CAAC,GAAG;aACvB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,YAAY,CAAC,SAAS,CAAC,CAAC;YACxB,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAKO,KAAK,CAAC,oBAAoB,CAChC,GAAW,EACX,OAAwB;QAExB,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE/B,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;QAC1D,CAAC;QAED,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;QACvB,IAAI,IAAI,GAAG,EAAE,CAAC;QACd,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,KAAyB,CAAC;QAE9B,IAAI,CAAC;YAEH,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;gBACpB,MAAM,IAAI,CAAC,mBAAmB,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YAClD,CAAC;YAGD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE;gBACpC,SAAS,EAAE,aAAa;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO;aACxD,CAAC,CAAC;YAEH,IAAI,QAAQ,EAAE,CAAC;gBACb,MAAM,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;gBAG3B,IAAI,MAAM,IAAI,GAAG,EAAE,CAAC;oBAClB,KAAK,GAAG,QAAQ,MAAM,QAAQ,CAAC;gBACjC,CAAC;YACH,CAAC;YAGD,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;gBAC5B,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,eAAe,EAAE;oBAClD,OAAO,EAAE,OAAO,CAAC,cAAc,IAAI,IAAI,CAAC,cAAc,CAAC,cAAc;iBACtE,CAAC,CAAC;YACL,CAAC;iBAAM,IAAI,OAAO,CAAC,cAAc,EAAE,CAAC;gBAClC,MAAM,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;YACpD,CAAC;YAGD,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;YAG5B,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC3B,KAAK,GAAG,KAAK,IAAI,qCAAqC,CAAC;YACzD,CAAC;YAED,OAAO;gBACL,GAAG;gBACH,IAAI;gBACJ,MAAM;gBACN,MAAM,EAAE,YAAY;gBACpB,KAAK;gBACL,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE;aACrB,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO;gBACL,GAAG;gBACH,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,CAAC;gBACT,MAAM,EAAE,YAAY;gBACpB,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;gBAC3D,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE;aACrB,CAAC;QACJ,CAAC;IACH,CAAC;IAKO,WAAW,CAAC,IAAY;QAC9B,MAAM,aAAa,GAAG;YACpB,4BAA4B;YAC5B,4BAA4B;YAC5B,4BAA4B;YAC5B,gCAAgC;YAChC,gCAAgC;YAChC,mCAAmC;YACnC,sBAAsB;YACtB,sBAAsB;YACtB,sBAAsB;SACvB,CAAC;QAEF,OAAO,aAAa,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IAC7D,CAAC;IAKD,KAAK,CAAC,MAAM,CACV,GAAW,EACX,UAA2B,EAAE;QAE7B,MAAM,aAAa,GAAG,EAAE,GAAG,IAAI,CAAC,cAAc,EAAE,GAAG,OAAO,EAAE,CAAC;QAC7D,IAAI,SAAS,GAAiB,IAAI,CAAC;QAGnC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YAG5D,IACE,MAAM,CAAC,MAAM,IAAI,GAAG;gBACpB,MAAM,CAAC,MAAM,GAAG,GAAG;gBACnB,CAAC,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,IAAI,CAAC,EACrC,CAAC;gBACD,OAAO,MAAM,CAAC;YAChB,CAAC;YAGD,IAAA,UAAI,EACF,qDAAqD,GAAG,+BAA+B,EACvF,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAChC,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,SAAS;gBACP,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;YACpE,IAAA,UAAI,EACF,2BAA2B,GAAG,KAAK,SAAS,CAAC,OAAO,+BAA+B,CACpF,CAAC;QACJ,CAAC;QAGD,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,oBAAoB,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YACnE,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,eAAe,GACnB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,0BAA0B,CAAC,CAAC;YAEzE,OAAO;gBACL,GAAG;gBACH,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,CAAC;gBACT,MAAM,EAAE,YAAY;gBACpB,KAAK,EAAE,+BAA+B,SAAS,EAAE,OAAO,IAAI,SAAS,iBAAiB,eAAe,CAAC,OAAO,EAAE;aAChH,CAAC;QACJ,CAAC;IACH,CAAC;IAKD,KAAK,CAAC,cAAc,CAClB,IAAc,EACd,UAA2B,EAAE;QAE7B,MAAM,OAAO,GAAqB,EAAE,CAAC;QAErC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAC/C,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACvB,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,IAAI,CAAC;oBACX,GAAG;oBACH,IAAI,EAAE,EAAE;oBACR,MAAM,EAAE,CAAC;oBACT,MAAM,EAAE,OAAO;oBACf,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;iBAChE,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAKD,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YACd,MAAM,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YACxB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACnB,CAAC;QAED,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YAC3B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;QAED,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YAC3B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;IACH,CAAC;CACF;AA3TD,gCA2TC"}
1
+ {"version":3,"file":"WebScraper.js","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":";;;;;;AACA,2DAAyD;AACzD,kFAA+C;AAE/C,qDAA2C;AAqB3C,MAAa,UAAU;IAOrB,YAAY,UAA2B,EAAE;QANjC,YAAO,GAAmB,IAAI,CAAC;QAC/B,YAAO,GAA0B,IAAI,CAAC;QACtC,SAAI,GAAgB,IAAI,CAAC;QAK/B,IAAI,CAAC,SAAS;YACZ,iHAAiH,CAAC;QACpH,IAAI,CAAC,cAAc,GAAG;YACpB,OAAO,EAAE,KAAK;YACd,OAAO,EAAE,CAAC;YACV,cAAc,EAAE,IAAI;YACpB,GAAG,OAAO;SACX,CAAC;IACJ,CAAC;IAKO,KAAK,CAAC,iBAAiB;QAC7B,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YAElB,MAAM,QAAQ,GAAG,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC;YAExD,IAAI,QAAQ,EAAE,CAAC;gBACb,IAAI,CAAC,OAAO,GAAG,MAAM,+BAAU,CAAC,cAAc,EAAE,CAAC;YACnD,CAAC;iBAAM,CAAC;gBAEN,IAAI,CAAC,OAAO,GAAG,MAAM,0BAAQ,CAAC,MAAM,CAAC;oBACnC,QAAQ,EAAE,IAAI;oBACd,IAAI,EAAE;wBACJ,cAAc;wBACd,0BAA0B;wBAC1B,yBAAyB;wBACzB,iCAAiC;wBACjC,eAAe;wBACf,yBAAyB;qBAC1B;iBACF,CAAC,CAAC;YACL,CAAC;YAED,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;gBAClB,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;YAClD,CAAC;YAED,IAAI,CAAC,OAAO,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC;gBAC3C,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE;gBACrE,QAAQ,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;gBACvC,gBAAgB,EAAE,IAAI,CAAC,cAAc,CAAC,OAAO,IAAI,EAAE;aACpD,CAAC,CAAC;YAEH,IAAI,CAAC,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QAC3C,CAAC;IACH,CAAC;IAKO,KAAK,CAAC,aAAa,CACzB,GAAW,EACX,OAAwB;QAExB,MAAM,OAAO,GAAG;YACd,YAAY,EAAE,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE;YAC5D,MAAM,EACJ,4EAA4E;YAC9E,iBAAiB,EAAE,gBAAgB;YACnC,iBAAiB,EAAE,eAAe;YAClC,UAAU,EAAE,YAAY;YACxB,2BAA2B,EAAE,GAAG;YAChC,GAAG,OAAO,CAAC,OAAO;SACnB,CAAC;QAEF,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;QACzC,MAAM,SAAS,GAAG,UAAU,CAC1B,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EACxB,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC,OAAQ,CAChD,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,OAAO;gBACP,MAAM,EAAE,UAAU,CAAC,MAAM;gBACzB,QAAQ,EAAE,QAAQ;aACnB,CAAC,CAAC;YAEH,YAAY,CAAC,SAAS,CAAC,CAAC;YAExB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAEnC,OAAO;gBACL,GAAG;gBACH,IAAI;gBACJ,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,MAAM,EAAE,OAAO;gBACf,UAAU,EAAE,QAAQ,CAAC,UAAU;gBAC/B,QAAQ,EAAE,QAAQ,CAAC,GAAG;aACvB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,YAAY,CAAC,SAAS,CAAC,CAAC;YACxB,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAKO,KAAK,CAAC,oBAAoB,CAChC,GAAW,EACX,OAAwB;QAExB,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE/B,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;QAC1D,CAAC;QAED,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;QACvB,IAAI,IAAI,GAAG,EAAE,CAAC;QACd,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,KAAyB,CAAC;QAE9B,IAAI,CAAC;YAEH,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;gBACpB,MAAM,IAAI,CAAC,mBAAmB,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YAClD,CAAC;YAGD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE;gBACpC,SAAS,EAAE,aAAa;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO;aACxD,CAAC,CAAC;YAEH,IAAI,QAAQ,EAAE,CAAC;gBACb,MAAM,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;gBAG3B,IAAI,MAAM,IAAI,GAAG,EAAE,CAAC;oBAClB,KAAK,GAAG,QAAQ,MAAM,QAAQ,CAAC;gBACjC,CAAC;YACH,CAAC;YAGD,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;gBAC5B,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,eAAe,EAAE;oBAClD,OAAO,EAAE,OAAO,CAAC,cAAc,IAAI,IAAI,CAAC,cAAc,CAAC,cAAc;iBACtE,CAAC,CAAC;YACL,CAAC;iBAAM,IAAI,OAAO,CAAC,cAAc,EAAE,CAAC;gBAClC,MAAM,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;YACpD,CAAC;YAGD,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;YAG5B,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC3B,KAAK,GAAG,KAAK,IAAI,qCAAqC,CAAC;YACzD,CAAC;YAED,OAAO;gBACL,GAAG;gBACH,IAAI;gBACJ,MAAM;gBACN,MAAM,EAAE,YAAY;gBACpB,KAAK;gBACL,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE;aACrB,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO;gBACL,GAAG;gBACH,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,CAAC;gBACT,MAAM,EAAE,YAAY;gBACpB,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;gBAC3D,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE;aACrB,CAAC;QACJ,CAAC;IACH,CAAC;IAKO,WAAW,CAAC,IAAY;QAC9B,MAAM,aAAa,GAAG;YACpB,4BAA4B;YAC5B,4BAA4B;YAC5B,4BAA4B;YAC5B,gCAAgC;YAChC,gCAAgC;YAChC,mCAAmC;YACnC,sBAAsB;YACtB,sBAAsB;YACtB,sBAAsB;SACvB,CAAC;QAEF,OAAO,aAAa,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IAC7D,CAAC;IAKD,KAAK,CAAC,MAAM,CACV,GAAW,EACX,UAA2B,EAAE;QAE7B,MAAM,aAAa,GAAG,EAAE,GAAG,IAAI,CAAC,cAAc,EAAE,GAAG,OAAO,EAAE,CAAC;QAC7D,IAAI,SAAS,GAAiB,IAAI,CAAC;QAGnC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YAG5D,IAAI,MAAM,CAAC,MAAM,IAAI,GAAG,IAAI,MAAM,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;gBAChD,OAAO,MAAM,CAAC;YAChB,CAAC;YAGD,IAAA,UAAI,EACF,qDAAqD,GAAG,+BAA+B,EACvF,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAChC,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,SAAS;gBACP,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;YACpE,IAAA,UAAI,EACF,2BAA2B,GAAG,KAAK,SAAS,CAAC,OAAO,+BAA+B,CACpF,CAAC;QACJ,CAAC;QAGD,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,oBAAoB,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YACnE,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,eAAe,GACnB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,0BAA0B,CAAC,CAAC;YAEzE,OAAO;gBACL,GAAG;gBACH,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,CAAC;gBACT,MAAM,EAAE,YAAY;gBACpB,KAAK,EAAE,+BAA+B,SAAS,EAAE,OAAO,IAAI,SAAS,iBAAiB,eAAe,CAAC,OAAO,EAAE;aAChH,CAAC;QACJ,CAAC;IACH,CAAC;IAKD,KAAK,CAAC,cAAc,CAClB,IAAc,EACd,UAA2B,EAAE;QAE7B,MAAM,OAAO,GAAqB,EAAE,CAAC;QAErC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAC/C,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACvB,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,IAAI,CAAC;oBACX,GAAG;oBACH,IAAI,EAAE,EAAE;oBACR,MAAM,EAAE,CAAC;oBACT,MAAM,EAAE,OAAO;oBACf,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;iBAChE,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAKD,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YACd,MAAM,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YACxB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACnB,CAAC;QAED,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YAC3B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;QAED,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YAC3B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;IACH,CAAC;CACF;AAzSD,gCAySC"}
package/dist/test.d.ts ADDED
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"test.d.ts","sourceRoot":"","sources":["../src/test.ts"],"names":[],"mappings":""}
package/dist/test.js ADDED
@@ -0,0 +1,10 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const WebScraper_1 = require("./WebScraper");
4
+ async function main() {
5
+ const ws = new WebScraper_1.WebScraper();
6
+ const result = await ws.scrape('https://bothsidesofthetable.com');
7
+ console.log('result=', JSON.stringify(result, null, 2));
8
+ }
9
+ void main();
10
+ //# sourceMappingURL=test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"test.js","sourceRoot":"","sources":["../src/test.ts"],"names":[],"mappings":";;AAAA,6CAA0C;AAE1C,KAAK,UAAU,IAAI;IACjB,MAAM,EAAE,GAAG,IAAI,uBAAU,EAAE,CAAC;IAC5B,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,MAAM,CAAC,iCAAiC,CAAC,CAAC;IAClE,OAAO,CAAC,GAAG,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;AAC1D,CAAC;AAED,KAAK,IAAI,EAAE,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ag-webscrape",
3
- "version": "0.0.2",
3
+ "version": "0.0.4",
4
4
  "author": "admin@gec.dev",
5
5
  "description": "TypeScript web scraper with Playwright fallback for anti-scraping protection",
6
6
  "main": "dist/index.js",
@@ -15,13 +15,15 @@
15
15
  "license": "MIT",
16
16
  "dependencies": {
17
17
  "ag-common": "^0.0.752",
18
- "playwright": "1.54.1"
18
+ "playwright-core": "1.54.1",
19
+ "playwright-aws-lambda": "^0.11.0"
19
20
  },
20
21
  "devDependencies": {
21
22
  "@types/node": "24.0.13",
22
23
  "eslint": "9.31.0",
23
24
  "eslint-config-e7npm": "0.1.23",
24
- "typescript": "5.8.3"
25
+ "typescript": "5.8.3",
26
+ "tsx": "^4.19.1"
25
27
  },
26
28
  "files": [
27
29
  "dist/**/*",
@@ -38,6 +40,7 @@
38
40
  "build": "tsc",
39
41
  "dev": "tsc --watch",
40
42
  "lint": "next lint",
41
- "format": "eslint src --fix"
43
+ "format": "eslint src --fix",
44
+ "test": "tsx src/test.ts"
42
45
  }
43
46
  }
package/dist/example.d.ts DELETED
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=example.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"example.d.ts","sourceRoot":"","sources":["../src/example.ts"],"names":[],"mappings":""}
package/dist/example.js DELETED
@@ -1,47 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- const index_1 = require("./index");
4
- async function runExample() {
5
- const scraper = new index_1.WebScraper({
6
- timeout: 30000,
7
- waitForTimeout: 3000,
8
- });
9
- try {
10
- console.log('Starting web scraping example...');
11
- const result = await scraper.scrape('https://httpbin.org/html');
12
- console.log('Scraped successfully:', {
13
- url: result.url,
14
- method: result.method,
15
- status: result.status,
16
- htmlLength: result.html.length,
17
- error: result.error,
18
- });
19
- const urls = [
20
- 'https://httpbin.org/html',
21
- 'https://httpbin.org/status/200',
22
- 'https://httpbin.org/status/404',
23
- ];
24
- console.log('\nScraping multiple URLs...');
25
- const results = await scraper.scrapeMultiple(urls);
26
- results.forEach((result, index) => {
27
- console.log(`Result ${index + 1}:`, {
28
- url: result.url,
29
- method: result.method,
30
- status: result.status,
31
- htmlLength: result.html.length,
32
- error: result.error,
33
- });
34
- });
35
- }
36
- catch (error) {
37
- console.error('Error during scraping:', error);
38
- }
39
- finally {
40
- await scraper.dispose();
41
- console.log('Scraper disposed successfully');
42
- }
43
- }
44
- if (require.main === module) {
45
- runExample().catch(console.error);
46
- }
47
- //# sourceMappingURL=example.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"example.js","sourceRoot":"","sources":["../src/example.ts"],"names":[],"mappings":";;AAAA,mCAAqC;AAErC,KAAK,UAAU,UAAU;IACvB,MAAM,OAAO,GAAG,IAAI,kBAAU,CAAC;QAC7B,OAAO,EAAE,KAAK;QACd,cAAc,EAAE,IAAI;KACrB,CAAC,CAAC;IAEH,IAAI,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,kCAAkC,CAAC,CAAC;QAGhD,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,MAAM,CAAC,0BAA0B,CAAC,CAAC;QAChE,OAAO,CAAC,GAAG,CAAC,uBAAuB,EAAE;YACnC,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,UAAU,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM;YAC9B,KAAK,EAAE,MAAM,CAAC,KAAK;SACpB,CAAC,CAAC;QAGH,MAAM,IAAI,GAAG;YACX,0BAA0B;YAC1B,gCAAgC;YAChC,gCAAgC;SACjC,CAAC;QAEF,OAAO,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAC;QAC3C,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;QAEnD,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE;YAChC,OAAO,CAAC,GAAG,CAAC,UAAU,KAAK,GAAG,CAAC,GAAG,EAAE;gBAClC,GAAG,EAAE,MAAM,CAAC,GAAG;gBACf,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,UAAU,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM;gBAC9B,KAAK,EAAE,MAAM,CAAC,KAAK;aACpB,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC,CAAC;IACjD,CAAC;YAAS,CAAC;QAET,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QACxB,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAAC,CAAC;IAC/C,CAAC;AACH,CAAC;AAGD,IAAI,OAAO,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;IAC5B,UAAU,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;AACpC,CAAC"}