ag-webscrape 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/WebScraper.d.ts.map +1 -1
- package/dist/WebScraper.js +8 -4
- package/dist/WebScraper.js.map +1 -1
- package/package.json +2 -1
package/dist/WebScraper.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"WebScraper.d.ts","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"WebScraper.d.ts","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,eAAe;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,GAAG,YAAY,CAAC;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,qBAAa,UAAU;IACrB,OAAO,CAAC,OAAO,CAAwB;IACvC,OAAO,CAAC,OAAO,CAA+B;IAC9C,OAAO,CAAC,IAAI,CAAqB;IACjC,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,cAAc,CAAkB;gBAE5B,OAAO,GAAE,eAAoB;YAc3B,iBAAiB;IA2B/B,OAAO,CAAC,kBAAkB;YA0BZ,aAAa;YAiDb,oBAAoB;IA4ElC,OAAO,CAAC,WAAW;IAmBb,MAAM,CACV,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,CAAC;IAmDpB,cAAc,CAClB,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,EAAE,CAAC;IAwBtB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAgB/B"}
|
package/dist/WebScraper.js
CHANGED
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.WebScraper = void 0;
|
|
4
|
+
const log_1 = require("ag-common/dist/common/helpers/log");
|
|
4
5
|
const playwright_1 = require("playwright");
|
|
5
6
|
class WebScraper {
|
|
6
7
|
constructor(options = {}) {
|
|
7
8
|
this.browser = null;
|
|
8
9
|
this.context = null;
|
|
9
10
|
this.page = null;
|
|
10
|
-
this.userAgent =
|
|
11
|
+
this.userAgent =
|
|
12
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36';
|
|
11
13
|
this.defaultOptions = {
|
|
12
14
|
timeout: 30000,
|
|
13
15
|
retries: 3,
|
|
@@ -53,7 +55,9 @@ class WebScraper {
|
|
|
53
55
|
/rate.limit/i,
|
|
54
56
|
/temporarily.unavailable/i,
|
|
55
57
|
];
|
|
56
|
-
|
|
58
|
+
const result = antiScrapingPatterns.some((pattern) => pattern.test(html));
|
|
59
|
+
(0, log_1.info)('Anti-scraping detected:', result);
|
|
60
|
+
return result;
|
|
57
61
|
}
|
|
58
62
|
async fetchDirectly(url, options) {
|
|
59
63
|
const headers = {
|
|
@@ -168,12 +172,12 @@ class WebScraper {
|
|
|
168
172
|
!this.detectAntiScraping(result.html)) {
|
|
169
173
|
return result;
|
|
170
174
|
}
|
|
171
|
-
|
|
175
|
+
(0, log_1.warn)(`Direct fetch failed or anti-scraping detected for ${url}. Falling back to Playwright.`, JSON.stringify(result, null, 2));
|
|
172
176
|
}
|
|
173
177
|
catch (error) {
|
|
174
178
|
lastError =
|
|
175
179
|
error instanceof Error ? error : new Error('Unknown fetch error');
|
|
176
|
-
|
|
180
|
+
(0, log_1.warn)(`Direct fetch failed for ${url}: ${lastError.message}. Falling back to Playwright.`);
|
|
177
181
|
}
|
|
178
182
|
try {
|
|
179
183
|
const result = await this.scrapeWithPlaywright(url, mergedOptions);
|
package/dist/WebScraper.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"WebScraper.js","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":";;;
|
|
1
|
+
{"version":3,"file":"WebScraper.js","sourceRoot":"","sources":["../src/WebScraper.ts"],"names":[],"mappings":";;;AAAA,2DAA+D;AAE/D,2CAAsC;AAqBtC,MAAa,UAAU;IAOrB,YAAY,UAA2B,EAAE;QANjC,YAAO,GAAmB,IAAI,CAAC;QAC/B,YAAO,GAA0B,IAAI,CAAC;QACtC,SAAI,GAAgB,IAAI,CAAC;QAK/B,IAAI,CAAC,SAAS;YACZ,iHAAiH,CAAC;QACpH,IAAI,CAAC,cAAc,GAAG;YACpB,OAAO,EAAE,KAAK;YACd,OAAO,EAAE,CAAC;YACV,cAAc,EAAE,IAAI;YACpB,GAAG,OAAO;SACX,CAAC;IACJ,CAAC;IAKO,KAAK,CAAC,iBAAiB;QAC7B,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YAClB,IAAI,CAAC,OAAO,GAAG,MAAM,qBAAQ,CAAC,MAAM,CAAC;gBACnC,QAAQ,EAAE,IAAI;gBACd,IAAI,EAAE;oBACJ,cAAc;oBACd,0BAA0B;oBAC1B,yBAAyB;oBACzB,iCAAiC;oBACjC,eAAe;oBACf,yBAAyB;iBAC1B;aACF,CAAC,CAAC;YAEH,IAAI,CAAC,OAAO,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC;gBAC3C,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE;gBACrE,QAAQ,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;gBACvC,gBAAgB,EAAE,IAAI,CAAC,cAAc,CAAC,OAAO,IAAI,EAAE;aACpD,CAAC,CAAC;YAEH,IAAI,CAAC,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QAC3C,CAAC;IACH,CAAC;IAKO,kBAAkB,CAAC,IAAY;QACrC,MAAM,oBAAoB,GAAG;YAC3B,aAAa;YACb,kBAAkB;YAClB,aAAa;YACb,WAAW;YACX,SAAS;YACT,iBAAiB;YACjB,2BAA2B;YAC3B,gBAAgB;YAChB,UAAU;YACV,UAAU;YACV,YAAY;YACZ,iBAAiB;YACjB,aAAa;YACb,0BAA0B;SAC3B,CAAC;QAEF,MAAM,MAAM,GAAG,oBAAoB,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;QAC1E,IAAA,UAAI,EAAC,yBAAyB,EAAE,MAAM,CAAC,CAAC;QACxC,OAAO,MAAM,CAAC;IAChB,CAAC;IAKO,KAAK,CAAC,aAAa,CACzB,GAAW,EACX,OAAwB;QAExB,MAAM,OAAO,GAAG;YACd,YAAY,EAAE,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE;YAC5D,MAAM,EACJ,4EAA4E;YAC9E,iBAAiB,EAAE,gBAAgB;YACnC,iBAAiB,EAAE,eAAe;YAClC,UAAU,EAAE,YAAY;YACxB,2BAA2B,EAAE,GAAG;YAChC,GAAG,OAAO,CAAC,OAAO;SACnB,CAAC;QAEF,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;QACzC,MAAM,SAAS,GAAG,UAAU,CAC1B,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EACxB,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC,OAAQ,CAChD,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,OAAO;gBACP,MAAM,EAAE,UAAU,CAAC,MAAM;gBACzB,QAAQ,EAAE,QAAQ;aACnB,CAAC,CAAC;YAEH,YAAY,CAAC,SAAS,CAAC,CAAC;YAExB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAEnC,OAAO;gBACL,GAAG;gBACH,IAAI;gBACJ,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,MAAM,EAAE,OAAO;gBACf,UAAU,EAAE,QAAQ,CAAC,UAAU;gBAC/B,QAAQ,EAAE,QAAQ,CAAC,GAAG;aACvB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,YAAY,CAAC,SAAS,CAAC,CAAC;YACxB,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAKO,KAAK,CAAC,oBAAoB,CAChC,GAAW,EACX,OAAwB;QAExB,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE/B,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;QAC1D,CAAC;QAED,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;QACvB,IAAI,IAAI,GAAG,EAAE,CAAC;QACd,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,KAAyB,CAAC;QAE9B,IAAI,CAAC;YAEH,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;gBACpB,MAAM,IAAI,CAAC,mBAAmB,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YAClD,CAAC;YAGD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE;gBACpC,SAAS,EAAE,aAAa;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO;aACxD,CAAC,CAAC;YAEH,IAAI,QAAQ,EAAE,CAAC;gBACb,MAAM,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;gBAG3B,IAAI,MAAM,IAAI,GAAG,EAAE,CAAC;oBAClB,KAAK,GAAG,QAAQ,MAAM,QAAQ,CAAC;gBACjC,CAAC;YACH,CAAC;YAGD,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;gBAC5B,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,eAAe,EAAE;oBAClD,OAAO,EAAE,OAAO,CAAC,cAAc,IAAI,IAAI,CAAC,cAAc,CAAC,cAAc;iBACtE,CAAC,CAAC;YACL,CAAC;iBAAM,IAAI,OAAO,CAAC,cAAc,EAAE,CAAC;gBAClC,MAAM,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;YACpD,CAAC;YAGD,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;YAG5B,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC3B,KAAK,GAAG,KAAK,IAAI,qCAAqC,CAAC;YACzD,CAAC;YAED,OAAO;gBACL,GAAG;gBACH,IAAI;gBACJ,MAAM;gBACN,MAAM,EAAE,YAAY;gBACpB,KAAK;gBACL,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE;aACrB,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO;gBACL,GAAG;gBACH,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,CAAC;gBACT,MAAM,EAAE,YAAY;gBACpB,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;gBAC3D,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE;aACrB,CAAC;QACJ,CAAC;IACH,CAAC;IAKO,WAAW,CAAC,IAAY;QAC9B,MAAM,aAAa,GAAG;YACpB,4BAA4B;YAC5B,4BAA4B;YAC5B,4BAA4B;YAC5B,gCAAgC;YAChC,gCAAgC;YAChC,mCAAmC;YACnC,sBAAsB;YACtB,sBAAsB;YACtB,sBAAsB;SACvB,CAAC;QAEF,OAAO,aAAa,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IAC7D,CAAC;IAKD,KAAK,CAAC,MAAM,CACV,GAAW,EACX,UAA2B,EAAE;QAE7B,MAAM,aAAa,GAAG,EAAE,GAAG,IAAI,CAAC,cAAc,EAAE,GAAG,OAAO,EAAE,CAAC;QAC7D,IAAI,SAAS,GAAiB,IAAI,CAAC;QAGnC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YAG5D,IACE,MAAM,CAAC,MAAM,IAAI,GAAG;gBACpB,MAAM,CAAC,MAAM,GAAG,GAAG;gBACnB,CAAC,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,IAAI,CAAC,EACrC,CAAC;gBACD,OAAO,MAAM,CAAC;YAChB,CAAC;YAGD,IAAA,UAAI,EACF,qDAAqD,GAAG,+BAA+B,EACvF,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAChC,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,SAAS;gBACP,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;YACpE,IAAA,UAAI,EACF,2BAA2B,GAAG,KAAK,SAAS,CAAC,OAAO,+BAA+B,CACpF,CAAC;QACJ,CAAC;QAGD,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,oBAAoB,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YACnE,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,eAAe,GACnB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,0BAA0B,CAAC,CAAC;YAEzE,OAAO;gBACL,GAAG;gBACH,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,CAAC;gBACT,MAAM,EAAE,YAAY;gBACpB,KAAK,EAAE,+BAA+B,SAAS,EAAE,OAAO,IAAI,SAAS,iBAAiB,eAAe,CAAC,OAAO,EAAE;aAChH,CAAC;QACJ,CAAC;IACH,CAAC;IAKD,KAAK,CAAC,cAAc,CAClB,IAAc,EACd,UAA2B,EAAE;QAE7B,MAAM,OAAO,GAAqB,EAAE,CAAC;QAErC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAC/C,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACvB,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,IAAI,CAAC;oBACX,GAAG;oBACH,IAAI,EAAE,EAAE;oBACR,MAAM,EAAE,CAAC;oBACT,MAAM,EAAE,OAAO;oBACf,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;iBAChE,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAKD,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YACd,MAAM,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YACxB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACnB,CAAC;QAED,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YAC3B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;QAED,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YAC3B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;IACH,CAAC;CACF;AA3TD,gCA2TC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ag-webscrape",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.2",
|
|
4
4
|
"author": "admin@gec.dev",
|
|
5
5
|
"description": "TypeScript web scraper with Playwright fallback for anti-scraping protection",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
],
|
|
15
15
|
"license": "MIT",
|
|
16
16
|
"dependencies": {
|
|
17
|
+
"ag-common": "^0.0.752",
|
|
17
18
|
"playwright": "1.54.1"
|
|
18
19
|
},
|
|
19
20
|
"devDependencies": {
|