ag-webscrape 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/WebScraper.d.ts +0 -1
- package/dist/WebScraper.d.ts.map +1 -1
- package/dist/WebScraper.js +30 -40
- package/dist/WebScraper.js.map +1 -1
- package/dist/test.d.ts +2 -0
- package/dist/test.d.ts.map +1 -0
- package/dist/test.js +10 -0
- package/dist/test.js.map +1 -0
- package/package.json +7 -4
- package/dist/example.d.ts +0 -2
- package/dist/example.d.ts.map +0 -1
- package/dist/example.js +0 -47
- package/dist/example.js.map +0 -1
package/dist/WebScraper.d.ts
CHANGED
package/dist/WebScraper.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"WebScraper.d.ts","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"WebScraper.d.ts","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":"AAMA,MAAM,WAAW,eAAe;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,GAAG,YAAY,CAAC;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,qBAAa,UAAU;IACrB,OAAO,CAAC,OAAO,CAAwB;IACvC,OAAO,CAAC,OAAO,CAA+B;IAC9C,OAAO,CAAC,IAAI,CAAqB;IACjC,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,cAAc,CAAkB;gBAE5B,OAAO,GAAE,eAAoB;YAc3B,iBAAiB;YAuCjB,aAAa;YAiDb,oBAAoB;IA4ElC,OAAO,CAAC,WAAW;IAmBb,MAAM,CACV,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,CAAC;IA+CpB,cAAc,CAClB,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,EAAE,CAAC;IAwBtB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAgB/B"}
|
package/dist/WebScraper.js
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
2
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
6
|
exports.WebScraper = void 0;
|
|
4
7
|
const log_1 = require("ag-common/dist/common/helpers/log");
|
|
5
|
-
const
|
|
8
|
+
const playwright_aws_lambda_1 = __importDefault(require("playwright-aws-lambda"));
|
|
9
|
+
const playwright_core_1 = require("playwright-core");
|
|
6
10
|
class WebScraper {
|
|
7
11
|
constructor(options = {}) {
|
|
8
12
|
this.browser = null;
|
|
@@ -19,46 +23,34 @@ class WebScraper {
|
|
|
19
23
|
}
|
|
20
24
|
async initializeBrowser() {
|
|
21
25
|
if (!this.browser) {
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
26
|
+
const isLambda = !!process.env.AWS_LAMBDA_FUNCTION_NAME;
|
|
27
|
+
if (isLambda) {
|
|
28
|
+
this.browser = await playwright_aws_lambda_1.default.launchChromium();
|
|
29
|
+
}
|
|
30
|
+
else {
|
|
31
|
+
this.browser = await playwright_core_1.chromium.launch({
|
|
32
|
+
headless: true,
|
|
33
|
+
args: [
|
|
34
|
+
'--no-sandbox',
|
|
35
|
+
'--disable-setuid-sandbox',
|
|
36
|
+
'--disable-dev-shm-usage',
|
|
37
|
+
'--disable-accelerated-2d-canvas',
|
|
38
|
+
'--disable-gpu',
|
|
39
|
+
'--window-size=1920,1080',
|
|
40
|
+
],
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
if (!this.browser) {
|
|
44
|
+
throw new Error('Failed to initialize browser');
|
|
45
|
+
}
|
|
33
46
|
this.context = await this.browser.newContext({
|
|
34
47
|
userAgent: this.defaultOptions.userAgent || this.userAgent.toString(),
|
|
35
48
|
viewport: { width: 1920, height: 1080 },
|
|
36
|
-
extraHTTPHeaders: this.defaultOptions.headers
|
|
49
|
+
extraHTTPHeaders: this.defaultOptions.headers ?? {},
|
|
37
50
|
});
|
|
38
51
|
this.page = await this.context.newPage();
|
|
39
52
|
}
|
|
40
53
|
}
|
|
41
|
-
detectAntiScraping(html) {
|
|
42
|
-
const antiScrapingPatterns = [
|
|
43
|
-
/cloudflare/i,
|
|
44
|
-
/distil.networks/i,
|
|
45
|
-
/perimeterx/i,
|
|
46
|
-
/datadome/i,
|
|
47
|
-
/akamai/i,
|
|
48
|
-
/bot.protection/i,
|
|
49
|
-
/please.enable.javascript/i,
|
|
50
|
-
/access.denied/i,
|
|
51
|
-
/blocked/i,
|
|
52
|
-
/captcha/i,
|
|
53
|
-
/challenge/i,
|
|
54
|
-
/security.check/i,
|
|
55
|
-
/rate.limit/i,
|
|
56
|
-
/temporarily.unavailable/i,
|
|
57
|
-
];
|
|
58
|
-
const result = antiScrapingPatterns.some((pattern) => pattern.test(html));
|
|
59
|
-
(0, log_1.info)('Anti-scraping detected:', result);
|
|
60
|
-
return result;
|
|
61
|
-
}
|
|
62
54
|
async fetchDirectly(url, options) {
|
|
63
55
|
const headers = {
|
|
64
56
|
'User-Agent': options.userAgent || this.userAgent.toString(),
|
|
@@ -70,7 +62,7 @@ class WebScraper {
|
|
|
70
62
|
...options.headers,
|
|
71
63
|
};
|
|
72
64
|
const controller = new AbortController();
|
|
73
|
-
const timeoutId = setTimeout(() => controller.abort(), options.timeout
|
|
65
|
+
const timeoutId = setTimeout(() => controller.abort(), options.timeout ?? this.defaultOptions.timeout);
|
|
74
66
|
try {
|
|
75
67
|
const response = await fetch(url, {
|
|
76
68
|
headers,
|
|
@@ -108,7 +100,7 @@ class WebScraper {
|
|
|
108
100
|
}
|
|
109
101
|
const response = await page.goto(url, {
|
|
110
102
|
waitUntil: 'networkidle',
|
|
111
|
-
timeout: options.timeout
|
|
103
|
+
timeout: options.timeout ?? this.defaultOptions.timeout,
|
|
112
104
|
});
|
|
113
105
|
if (response) {
|
|
114
106
|
status = response.status();
|
|
@@ -118,7 +110,7 @@ class WebScraper {
|
|
|
118
110
|
}
|
|
119
111
|
if (options.waitForSelector) {
|
|
120
112
|
await page.waitForSelector(options.waitForSelector, {
|
|
121
|
-
timeout: options.waitForTimeout
|
|
113
|
+
timeout: options.waitForTimeout ?? this.defaultOptions.waitForTimeout,
|
|
122
114
|
});
|
|
123
115
|
}
|
|
124
116
|
else if (options.waitForTimeout) {
|
|
@@ -167,9 +159,7 @@ class WebScraper {
|
|
|
167
159
|
let lastError = null;
|
|
168
160
|
try {
|
|
169
161
|
const result = await this.fetchDirectly(url, mergedOptions);
|
|
170
|
-
if (result.status >= 200 &&
|
|
171
|
-
result.status < 300 &&
|
|
172
|
-
!this.detectAntiScraping(result.html)) {
|
|
162
|
+
if (result.status >= 200 && result.status < 300) {
|
|
173
163
|
return result;
|
|
174
164
|
}
|
|
175
165
|
(0, log_1.warn)(`Direct fetch failed or anti-scraping detected for ${url}. Falling back to Playwright.`, JSON.stringify(result, null, 2));
|
package/dist/WebScraper.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"WebScraper.js","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"WebScraper.js","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":";;;;;;AACA,2DAAyD;AACzD,kFAA+C;AAE/C,qDAA2C;AAqB3C,MAAa,UAAU;IAOrB,YAAY,UAA2B,EAAE;QANjC,YAAO,GAAmB,IAAI,CAAC;QAC/B,YAAO,GAA0B,IAAI,CAAC;QACtC,SAAI,GAAgB,IAAI,CAAC;QAK/B,IAAI,CAAC,SAAS;YACZ,iHAAiH,CAAC;QACpH,IAAI,CAAC,cAAc,GAAG;YACpB,OAAO,EAAE,KAAK;YACd,OAAO,EAAE,CAAC;YACV,cAAc,EAAE,IAAI;YACpB,GAAG,OAAO;SACX,CAAC;IACJ,CAAC;IAKO,KAAK,CAAC,iBAAiB;QAC7B,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YAElB,MAAM,QAAQ,GAAG,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC;YAExD,IAAI,QAAQ,EAAE,CAAC;gBACb,IAAI,CAAC,OAAO,GAAG,MAAM,+BAAU,CAAC,cAAc,EAAE,CAAC;YACnD,CAAC;iBAAM,CAAC;gBAEN,IAAI,CAAC,OAAO,GAAG,MAAM,0BAAQ,CAAC,MAAM,CAAC;oBACnC,QAAQ,EAAE,IAAI;oBACd,IAAI,EAAE;wBACJ,cAAc;wBACd,0BAA0B;wBAC1B,yBAAyB;wBACzB,iCAAiC;wBACjC,eAAe;wBACf,yBAAyB;qBAC1B;iBACF,CAAC,CAAC;YACL,CAAC;YAED,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;gBAClB,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;YAClD,CAAC;YAED,IAAI,CAAC,OAAO,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC;gBAC3C,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE;gBACrE,QAAQ,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;gBACvC,gBAAgB,EAAE,IAAI,CAAC,cAAc,CAAC,OAAO,IAAI,EAAE;aACpD,CAAC,CAAC;YAEH,IAAI,CAAC,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QAC3C,CAAC;IACH,CAAC;IAKO,KAAK,CAAC,aAAa,CACzB,GAAW,EACX,OAAwB;QAExB,MAAM,OAAO,GAAG;YACd,YAAY,EAAE,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE;YAC5D,MAAM,EACJ,4EAA4E;YAC9E,iBAAiB,EAAE,gBAAgB;YACnC,iBAAiB,EAAE,eAAe;YAClC,UAAU,EAAE,YAAY;YACxB,2BAA2B,EAAE,GAAG;YAChC,GAAG,OAAO,CAAC,OAAO;SACnB,CAAC;QAEF,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;QACzC,MAAM,SAAS,GAAG,UAAU,CAC1B,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EACxB,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC,OAAQ,CAChD,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,OAAO;gBACP,MAAM,EAAE,UAAU,CAAC,MAAM;gBACzB,QAAQ,EAAE,QAAQ;aACnB,CAAC,CAAC;YAEH,YAAY,CAAC,SAAS,CAAC,CAAC;YAExB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAEnC,OAAO;gBACL,GAAG;gBACH,IAAI;gBACJ,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,MAAM,EAAE,OAAO;gBACf,UAAU,EAAE,QAAQ,CAAC,UAAU;gBAC/B,QAAQ,EAAE,QAAQ,CAAC,GAAG;aACvB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,YAAY,CAAC,SAAS,CAAC,CAAC;YACxB,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAKO,KAAK,CAAC,oBAAoB,CAChC,GAAW,EACX,OAAwB;QAExB,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE/B,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;QAC1D,CAAC;QAED,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;QACvB,IAAI,IAAI,GAAG,EAAE,CAAC;QACd,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,KAAyB,CAAC;QAE9B,IAAI,CAAC;YAEH,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;gBACpB,MAAM,IAAI,CAAC,mBAAmB,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YAClD,CAAC;YAGD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE;gBACpC,SAAS,EAAE,aAAa;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO;aACxD,CAAC,CAAC;YAEH,IAAI,QAAQ,EAAE,CAAC;gBACb,MAAM,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;gBAG3B,IAAI,MAAM,IAAI,GAAG,EAAE,CAAC;oBAClB,KAAK,GAAG,QAAQ,MAAM,QAAQ,CAAC;gBACjC,CAAC;YACH,CAAC;YAGD,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;gBAC5B,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,eAAe,EAAE;oBAClD,OAAO,EAAE,OAAO,CAAC,cAAc,IAAI,IAAI,CAAC,cAAc,CAAC,cAAc;iBACtE,CAAC,CAAC;YACL,CAAC;iBAAM,IAAI,OAAO,CAAC,cAAc,EAAE,CAAC;gBAClC,MAAM,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;YACpD,CAAC;YAGD,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;YAG5B,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC3B,KAAK,GAAG,KAAK,IAAI,qCAAqC,CAAC;YACzD,CAAC;YAED,OAAO;gBACL,GAAG;gBACH,IAAI;gBACJ,MAAM;gBACN,MAAM,EAAE,YAAY;gBACpB,KAAK;gBACL,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE;aACrB,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO;gBACL,GAAG;gBACH,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,CAAC;gBACT,MAAM,EAAE,YAAY;gBACpB,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;gBAC3D,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE;aACrB,CAAC;QACJ,CAAC;IACH,CAAC;IAKO,WAAW,CAAC,IAAY;QAC9B,MAAM,aAAa,GAAG;YACpB,4BAA4B;YAC5B,4BAA4B;YAC5B,4BAA4B;YAC5B,gCAAgC;YAChC,gCAAgC;YAChC,mCAAmC;YACnC,sBAAsB;YACtB,sBAAsB;YACtB,sBAAsB;SACvB,CAAC;QAEF,OAAO,aAAa,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IAC7D,CAAC;IAKD,KAAK,CAAC,MAAM,CACV,GAAW,EACX,UAA2B,EAAE;QAE7B,MAAM,aAAa,GAAG,EAAE,GAAG,IAAI,CAAC,cAAc,EAAE,GAAG,OAAO,EAAE,CAAC;QAC7D,IAAI,SAAS,GAAiB,IAAI,CAAC;QAGnC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YAG5D,IAAI,MAAM,CAAC,MAAM,IAAI,GAAG,IAAI,MAAM,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;gBAChD,OAAO,MAAM,CAAC;YAChB,CAAC;YAGD,IAAA,UAAI,EACF,qDAAqD,GAAG,+BAA+B,EACvF,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAChC,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,SAAS;gBACP,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;YACpE,IAAA,UAAI,EACF,2BAA2B,GAAG,KAAK,SAAS,CAAC,OAAO,+BAA+B,CACpF,CAAC;QACJ,CAAC;QAGD,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,oBAAoB,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YACnE,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,eAAe,GACnB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,0BAA0B,CAAC,CAAC;YAEzE,OAAO;gBACL,GAAG;gBACH,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,CAAC;gBACT,MAAM,EAAE,YAAY;gBACpB,KAAK,EAAE,+BAA+B,SAAS,EAAE,OAAO,IAAI,SAAS,iBAAiB,eAAe,CAAC,OAAO,EAAE;aAChH,CAAC;QACJ,CAAC;IACH,CAAC;IAKD,KAAK,CAAC,cAAc,CAClB,IAAc,EACd,UAA2B,EAAE;QAE7B,MAAM,OAAO,GAAqB,EAAE,CAAC;QAErC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAC/C,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACvB,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,IAAI,CAAC;oBACX,GAAG;oBACH,IAAI,EAAE,EAAE;oBACR,MAAM,EAAE,CAAC;oBACT,MAAM,EAAE,OAAO;oBACf,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;iBAChE,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAKD,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YACd,MAAM,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YACxB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACnB,CAAC;QAED,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YAC3B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;QAED,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YAC3B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;IACH,CAAC;CACF;AAzSD,gCAySC"}
|
package/dist/test.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"test.d.ts","sourceRoot":"","sources":["../src/test.ts"],"names":[],"mappings":""}
|
package/dist/test.js
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const WebScraper_1 = require("./WebScraper");
|
|
4
|
+
async function main() {
|
|
5
|
+
const ws = new WebScraper_1.WebScraper();
|
|
6
|
+
const result = await ws.scrape('https://bothsidesofthetable.com');
|
|
7
|
+
console.log('result=', JSON.stringify(result, null, 2));
|
|
8
|
+
}
|
|
9
|
+
void main();
|
|
10
|
+
//# sourceMappingURL=test.js.map
|
package/dist/test.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"test.js","sourceRoot":"","sources":["../src/test.ts"],"names":[],"mappings":";;AAAA,6CAA0C;AAE1C,KAAK,UAAU,IAAI;IACjB,MAAM,EAAE,GAAG,IAAI,uBAAU,EAAE,CAAC;IAC5B,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,MAAM,CAAC,iCAAiC,CAAC,CAAC;IAClE,OAAO,CAAC,GAAG,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;AAC1D,CAAC;AAED,KAAK,IAAI,EAAE,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ag-webscrape",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.4",
|
|
4
4
|
"author": "admin@gec.dev",
|
|
5
5
|
"description": "TypeScript web scraper with Playwright fallback for anti-scraping protection",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -15,13 +15,15 @@
|
|
|
15
15
|
"license": "MIT",
|
|
16
16
|
"dependencies": {
|
|
17
17
|
"ag-common": "^0.0.752",
|
|
18
|
-
"playwright": "1.54.1"
|
|
18
|
+
"playwright-core": "1.54.1",
|
|
19
|
+
"playwright-aws-lambda": "^0.11.0"
|
|
19
20
|
},
|
|
20
21
|
"devDependencies": {
|
|
21
22
|
"@types/node": "24.0.13",
|
|
22
23
|
"eslint": "9.31.0",
|
|
23
24
|
"eslint-config-e7npm": "0.1.23",
|
|
24
|
-
"typescript": "5.8.3"
|
|
25
|
+
"typescript": "5.8.3",
|
|
26
|
+
"tsx": "^4.19.1"
|
|
25
27
|
},
|
|
26
28
|
"files": [
|
|
27
29
|
"dist/**/*",
|
|
@@ -38,6 +40,7 @@
|
|
|
38
40
|
"build": "tsc",
|
|
39
41
|
"dev": "tsc --watch",
|
|
40
42
|
"lint": "next lint",
|
|
41
|
-
"format": "eslint src --fix"
|
|
43
|
+
"format": "eslint src --fix",
|
|
44
|
+
"test": "tsx src/test.ts"
|
|
42
45
|
}
|
|
43
46
|
}
|
package/dist/example.d.ts
DELETED
package/dist/example.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"example.d.ts","sourceRoot":"","sources":["../src/example.ts"],"names":[],"mappings":""}
|
package/dist/example.js
DELETED
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
const index_1 = require("./index");
|
|
4
|
-
async function runExample() {
|
|
5
|
-
const scraper = new index_1.WebScraper({
|
|
6
|
-
timeout: 30000,
|
|
7
|
-
waitForTimeout: 3000,
|
|
8
|
-
});
|
|
9
|
-
try {
|
|
10
|
-
console.log('Starting web scraping example...');
|
|
11
|
-
const result = await scraper.scrape('https://httpbin.org/html');
|
|
12
|
-
console.log('Scraped successfully:', {
|
|
13
|
-
url: result.url,
|
|
14
|
-
method: result.method,
|
|
15
|
-
status: result.status,
|
|
16
|
-
htmlLength: result.html.length,
|
|
17
|
-
error: result.error,
|
|
18
|
-
});
|
|
19
|
-
const urls = [
|
|
20
|
-
'https://httpbin.org/html',
|
|
21
|
-
'https://httpbin.org/status/200',
|
|
22
|
-
'https://httpbin.org/status/404',
|
|
23
|
-
];
|
|
24
|
-
console.log('\nScraping multiple URLs...');
|
|
25
|
-
const results = await scraper.scrapeMultiple(urls);
|
|
26
|
-
results.forEach((result, index) => {
|
|
27
|
-
console.log(`Result ${index + 1}:`, {
|
|
28
|
-
url: result.url,
|
|
29
|
-
method: result.method,
|
|
30
|
-
status: result.status,
|
|
31
|
-
htmlLength: result.html.length,
|
|
32
|
-
error: result.error,
|
|
33
|
-
});
|
|
34
|
-
});
|
|
35
|
-
}
|
|
36
|
-
catch (error) {
|
|
37
|
-
console.error('Error during scraping:', error);
|
|
38
|
-
}
|
|
39
|
-
finally {
|
|
40
|
-
await scraper.dispose();
|
|
41
|
-
console.log('Scraper disposed successfully');
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
if (require.main === module) {
|
|
45
|
-
runExample().catch(console.error);
|
|
46
|
-
}
|
|
47
|
-
//# sourceMappingURL=example.js.map
|
package/dist/example.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"example.js","sourceRoot":"","sources":["../src/example.ts"],"names":[],"mappings":";;AAAA,mCAAqC;AAErC,KAAK,UAAU,UAAU;IACvB,MAAM,OAAO,GAAG,IAAI,kBAAU,CAAC;QAC7B,OAAO,EAAE,KAAK;QACd,cAAc,EAAE,IAAI;KACrB,CAAC,CAAC;IAEH,IAAI,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,kCAAkC,CAAC,CAAC;QAGhD,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,MAAM,CAAC,0BAA0B,CAAC,CAAC;QAChE,OAAO,CAAC,GAAG,CAAC,uBAAuB,EAAE;YACnC,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,UAAU,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM;YAC9B,KAAK,EAAE,MAAM,CAAC,KAAK;SACpB,CAAC,CAAC;QAGH,MAAM,IAAI,GAAG;YACX,0BAA0B;YAC1B,gCAAgC;YAChC,gCAAgC;SACjC,CAAC;QAEF,OAAO,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAC;QAC3C,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;QAEnD,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE;YAChC,OAAO,CAAC,GAAG,CAAC,UAAU,KAAK,GAAG,CAAC,GAAG,EAAE;gBAClC,GAAG,EAAE,MAAM,CAAC,GAAG;gBACf,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,UAAU,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM;gBAC9B,KAAK,EAAE,MAAM,CAAC,KAAK;aACpB,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC,CAAC;IACjD,CAAC;YAAS,CAAC;QAET,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QACxB,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAAC,CAAC;IAC/C,CAAC;AACH,CAAC;AAGD,IAAI,OAAO,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;IAC5B,UAAU,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;AACpC,CAAC"}
|