@letsscrapedata/controller 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{index.mjs → index.cjs} +216 -178
- package/dist/{index.d.mts → index.d.cts} +18 -13
- package/dist/index.d.ts +18 -13
- package/dist/index.js +196 -198
- package/package.json +3 -2
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
"use strict";
|
|
1
2
|
var __create = Object.create;
|
|
2
3
|
var __defProp = Object.defineProperty;
|
|
3
4
|
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
@@ -27,12 +28,13 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
27
28
|
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
28
29
|
mod
|
|
29
30
|
));
|
|
31
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
30
32
|
|
|
31
33
|
// ../../node_modules/boolbase/index.js
|
|
32
34
|
var require_boolbase = __commonJS({
|
|
33
|
-
"../../node_modules/boolbase/index.js"(
|
|
35
|
+
"../../node_modules/boolbase/index.js"(exports2, module2) {
|
|
34
36
|
"use strict";
|
|
35
|
-
|
|
37
|
+
module2.exports = {
|
|
36
38
|
trueFunc: function trueFunc2() {
|
|
37
39
|
return true;
|
|
38
40
|
},
|
|
@@ -43,6 +45,23 @@ var require_boolbase = __commonJS({
|
|
|
43
45
|
}
|
|
44
46
|
});
|
|
45
47
|
|
|
48
|
+
// src/index.ts
|
|
49
|
+
var src_exports = {};
|
|
50
|
+
__export(src_exports, {
|
|
51
|
+
CheerioElement: () => CheerioElement,
|
|
52
|
+
CheerioPage: () => CheerioPage,
|
|
53
|
+
PlaywrightBrowser: () => PlaywrightBrowser,
|
|
54
|
+
PlaywrightBrowserContext: () => PlaywrightBrowserContext,
|
|
55
|
+
PlaywrightElement: () => PlaywrightElement,
|
|
56
|
+
PlaywrightPage: () => PlaywrightPage,
|
|
57
|
+
PuppeteerBrowser: () => PuppeteerBrowser,
|
|
58
|
+
PuppeteerBrowserContext: () => PuppeteerBrowserContext,
|
|
59
|
+
PuppeteerElement: () => PuppeteerElement,
|
|
60
|
+
PuppeteerPage: () => PuppeteerPage,
|
|
61
|
+
defaultProxy: () => defaultProxy
|
|
62
|
+
});
|
|
63
|
+
module.exports = __toCommonJS(src_exports);
|
|
64
|
+
|
|
46
65
|
// src/types/types.ts
|
|
47
66
|
var defaultProxy = {
|
|
48
67
|
server: "default",
|
|
@@ -54,18 +73,18 @@ var defaultProxy = {
|
|
|
54
73
|
};
|
|
55
74
|
|
|
56
75
|
// src/playwright/browser.ts
|
|
57
|
-
|
|
76
|
+
var import_node_events3 = __toESM(require("events"), 1);
|
|
58
77
|
|
|
59
78
|
// src/playwright/context.ts
|
|
60
|
-
|
|
61
|
-
|
|
79
|
+
var import_node_events2 = __toESM(require("events"), 1);
|
|
80
|
+
var import_utils3 = require("@letsscrapedata/utils");
|
|
62
81
|
|
|
63
82
|
// src/playwright/page.ts
|
|
64
|
-
|
|
65
|
-
|
|
83
|
+
var import_node_events = __toESM(require("events"), 1);
|
|
84
|
+
var import_utils2 = require("@letsscrapedata/utils");
|
|
66
85
|
|
|
67
86
|
// src/playwright/element.ts
|
|
68
|
-
|
|
87
|
+
var import_utils = require("@letsscrapedata/utils");
|
|
69
88
|
var PlaywrightElement = class _PlaywrightElement {
|
|
70
89
|
#frame;
|
|
71
90
|
#locator;
|
|
@@ -281,39 +300,21 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
281
300
|
}
|
|
282
301
|
break;
|
|
283
302
|
default:
|
|
284
|
-
unreachable(type);
|
|
303
|
+
(0, import_utils.unreachable)(type);
|
|
285
304
|
}
|
|
286
305
|
return true;
|
|
287
306
|
}
|
|
288
307
|
async screenshot(options) {
|
|
289
308
|
return await this.#locator.screenshot(options);
|
|
290
309
|
}
|
|
291
|
-
async scrollBy(x, y) {
|
|
292
|
-
await this.#locator.page().evaluate(
|
|
293
|
-
([x2, y2]) => {
|
|
294
|
-
window.scrollBy(x2, y2);
|
|
295
|
-
},
|
|
296
|
-
[x, y]
|
|
297
|
-
);
|
|
298
|
-
return true;
|
|
299
|
-
}
|
|
300
310
|
async scrollIntoView() {
|
|
301
311
|
await this.#locator.scrollIntoViewIfNeeded();
|
|
302
312
|
return true;
|
|
303
313
|
}
|
|
304
|
-
async scrollTo(x, y) {
|
|
305
|
-
await this.#locator.page().evaluate(
|
|
306
|
-
([x2, y2]) => {
|
|
307
|
-
window.scrollTo(x2, y2);
|
|
308
|
-
},
|
|
309
|
-
[x, y]
|
|
310
|
-
);
|
|
311
|
-
return true;
|
|
312
|
-
}
|
|
313
314
|
};
|
|
314
315
|
|
|
315
316
|
// src/playwright/page.ts
|
|
316
|
-
var PlaywrightPage = class extends
|
|
317
|
+
var PlaywrightPage = class extends import_node_events.default {
|
|
317
318
|
#lsdBrowserContext;
|
|
318
319
|
#page;
|
|
319
320
|
#status;
|
|
@@ -335,7 +336,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
335
336
|
const cookieItems = await this.#getCookies(page);
|
|
336
337
|
const domainSet = new Set(cookieItems.map((c) => c.domain));
|
|
337
338
|
if (domainSet.size !== 1) {
|
|
338
|
-
logwarn(`Domains in clearCookies: ${Array.from(domainSet.values())}`);
|
|
339
|
+
(0, import_utils2.logwarn)(`Domains in clearCookies: ${Array.from(domainSet.values())}`);
|
|
339
340
|
}
|
|
340
341
|
for (const domain of domainSet.values()) {
|
|
341
342
|
await browserContext.clearCookies({ domain });
|
|
@@ -487,9 +488,9 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
487
488
|
const page = this.#page;
|
|
488
489
|
const pageId = this.#pageId;
|
|
489
490
|
page.on("close", async () => {
|
|
490
|
-
loginfo(`##browser ${pageId} closed`);
|
|
491
|
+
(0, import_utils2.loginfo)(`##browser ${pageId} closed`);
|
|
491
492
|
if (!page.pageInfo) {
|
|
492
|
-
logerr(`Logic error in page.on("close")`);
|
|
493
|
+
(0, import_utils2.logerr)(`Logic error in page.on("close")`);
|
|
493
494
|
}
|
|
494
495
|
this.emit("pageClose");
|
|
495
496
|
this.#lsdBrowserContext.emit("pageClose", this);
|
|
@@ -503,12 +504,12 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
503
504
|
popupPageId = `page-${browserIdx}-${browserContextIdx}-${pageIdx}`;
|
|
504
505
|
pageInfo.openType = "popup";
|
|
505
506
|
} else {
|
|
506
|
-
logerr(`##browser ${pageId} has popup without page.pageInfo`);
|
|
507
|
+
(0, import_utils2.logerr)(`##browser ${pageId} has popup without page.pageInfo`);
|
|
507
508
|
}
|
|
508
|
-
loginfo(`##browser ${pageId} has popup ${popupPageId}`);
|
|
509
|
+
(0, import_utils2.loginfo)(`##browser ${pageId} has popup ${popupPageId}`);
|
|
509
510
|
this.emit("pagePopup", pageInfo);
|
|
510
511
|
} else {
|
|
511
|
-
logerr(`##browser ${pageId} has popup page with null page`);
|
|
512
|
+
(0, import_utils2.logerr)(`##browser ${pageId} has popup page with null page`);
|
|
512
513
|
}
|
|
513
514
|
});
|
|
514
515
|
}
|
|
@@ -520,7 +521,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
520
521
|
this.#lsdBrowserContext = browserContext;
|
|
521
522
|
this.#page = page;
|
|
522
523
|
this.#status = "free";
|
|
523
|
-
const currentTime = getCurrentUnixTime();
|
|
524
|
+
const currentTime = (0, import_utils2.getCurrentUnixTime)();
|
|
524
525
|
const { browserIdx = 0, browserContextIdx = 0, pageIdx = 0, openType = "other", openTime = currentTime, lastStatusUpdateTime = currentTime, taskId = 0 } = pageInfo ? pageInfo : {};
|
|
525
526
|
this.#page.pageInfo = { browserIdx, browserContextIdx, pageIdx, openType, openTime, lastStatusUpdateTime, taskId };
|
|
526
527
|
this.#pageId = `page${browserIdx}-${browserContextIdx}-${pageIdx}`;
|
|
@@ -568,7 +569,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
568
569
|
}
|
|
569
570
|
return true;
|
|
570
571
|
} catch (err) {
|
|
571
|
-
logerr(err);
|
|
572
|
+
(0, import_utils2.logerr)(err);
|
|
572
573
|
return false;
|
|
573
574
|
}
|
|
574
575
|
}
|
|
@@ -752,6 +753,30 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
752
753
|
}
|
|
753
754
|
return await this.#page.screenshot(options);
|
|
754
755
|
}
|
|
756
|
+
async scrollBy(x, y) {
|
|
757
|
+
if (!this.#page) {
|
|
758
|
+
throw new Error("No valid page");
|
|
759
|
+
}
|
|
760
|
+
await this.#page.evaluate(
|
|
761
|
+
([x2, y2]) => {
|
|
762
|
+
window.scrollBy(x2, y2);
|
|
763
|
+
},
|
|
764
|
+
[x, y]
|
|
765
|
+
);
|
|
766
|
+
return true;
|
|
767
|
+
}
|
|
768
|
+
async scrollTo(x, y) {
|
|
769
|
+
if (!this.#page) {
|
|
770
|
+
throw new Error("No valid page");
|
|
771
|
+
}
|
|
772
|
+
await this.#page.evaluate(
|
|
773
|
+
([x2, y2]) => {
|
|
774
|
+
window.scrollTo(x2, y2);
|
|
775
|
+
},
|
|
776
|
+
[x, y]
|
|
777
|
+
);
|
|
778
|
+
return true;
|
|
779
|
+
}
|
|
755
780
|
async setCookies(cookies) {
|
|
756
781
|
if (!this.#page) {
|
|
757
782
|
throw new Error("No valid page");
|
|
@@ -812,7 +837,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
812
837
|
}
|
|
813
838
|
return true;
|
|
814
839
|
} catch (err) {
|
|
815
|
-
logerr(err);
|
|
840
|
+
(0, import_utils2.logerr)(err);
|
|
816
841
|
return false;
|
|
817
842
|
}
|
|
818
843
|
}
|
|
@@ -822,7 +847,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
822
847
|
}
|
|
823
848
|
const actOptions = Array.isArray(options) ? options : [options];
|
|
824
849
|
if (actOptions.length <= 0) {
|
|
825
|
-
logwarn("Invalid paras in setRequestInterception");
|
|
850
|
+
(0, import_utils2.logwarn)("Invalid paras in setRequestInterception");
|
|
826
851
|
return false;
|
|
827
852
|
}
|
|
828
853
|
const firstRequestInterception = this.#resquestInterceptionOptions.length <= 0;
|
|
@@ -833,7 +858,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
833
858
|
this.#resquestInterceptionOptions.push(option);
|
|
834
859
|
break;
|
|
835
860
|
default:
|
|
836
|
-
|
|
861
|
+
(0, import_utils2.unreachable)(option.action);
|
|
837
862
|
}
|
|
838
863
|
}
|
|
839
864
|
if (firstRequestInterception && this.#resquestInterceptionOptions.length > 0) {
|
|
@@ -857,7 +882,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
857
882
|
});
|
|
858
883
|
break;
|
|
859
884
|
default:
|
|
860
|
-
|
|
885
|
+
(0, import_utils2.unreachable)(action);
|
|
861
886
|
}
|
|
862
887
|
return true;
|
|
863
888
|
} else {
|
|
@@ -866,7 +891,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
866
891
|
await route.continue();
|
|
867
892
|
return true;
|
|
868
893
|
} catch (err) {
|
|
869
|
-
logerr(err);
|
|
894
|
+
(0, import_utils2.logerr)(err);
|
|
870
895
|
return false;
|
|
871
896
|
}
|
|
872
897
|
});
|
|
@@ -884,7 +909,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
884
909
|
return;
|
|
885
910
|
}
|
|
886
911
|
for (const option of this.#responseInterceptionOptions) {
|
|
887
|
-
const { requestMatch, responseMatch,
|
|
912
|
+
const { requestMatch, responseMatch, responseItems, handler, handlerOptions } = option;
|
|
888
913
|
let matchedFlag = !requestMatch || this.#checkRequestMatch(request, requestMatch);
|
|
889
914
|
if (matchedFlag && responseMatch) {
|
|
890
915
|
const { minLength, maxLength } = responseMatch;
|
|
@@ -897,20 +922,20 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
897
922
|
if (!matchedFlag) {
|
|
898
923
|
continue;
|
|
899
924
|
}
|
|
900
|
-
if (Array.isArray(
|
|
925
|
+
if (Array.isArray(responseItems)) {
|
|
901
926
|
const requestMethod = request.method();
|
|
902
927
|
const requestUrl = request.url();
|
|
903
928
|
const reqData2 = request.postData();
|
|
904
929
|
const requestData = reqData2 ? reqData2 : "";
|
|
905
930
|
const responseData = await response.text();
|
|
906
|
-
|
|
931
|
+
responseItems.push({
|
|
907
932
|
pageUrl,
|
|
908
933
|
requestMethod,
|
|
909
934
|
requestUrl,
|
|
910
935
|
requestData,
|
|
911
936
|
responseData
|
|
912
937
|
});
|
|
913
|
-
loginfo(`##browser cache matched response: ${requestUrl}`);
|
|
938
|
+
(0, import_utils2.loginfo)(`##browser cache matched response: ${requestUrl}`);
|
|
914
939
|
}
|
|
915
940
|
if (typeof handler === "function") {
|
|
916
941
|
await handler(response, handlerOptions);
|
|
@@ -918,7 +943,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
918
943
|
}
|
|
919
944
|
return;
|
|
920
945
|
} catch (err) {
|
|
921
|
-
logerr(err);
|
|
946
|
+
(0, import_utils2.logerr)(err);
|
|
922
947
|
return;
|
|
923
948
|
}
|
|
924
949
|
}
|
|
@@ -933,7 +958,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
933
958
|
}
|
|
934
959
|
const firstResponseInterception = this.#responseInterceptionOptions.length <= 0;
|
|
935
960
|
for (const option of actOptions) {
|
|
936
|
-
if (option?.
|
|
961
|
+
if (option?.responseItems || option?.handler) {
|
|
937
962
|
this.#responseInterceptionOptions.push(option);
|
|
938
963
|
} else {
|
|
939
964
|
throw new Error(`Invalid ResponseInterceptionOption`);
|
|
@@ -1018,7 +1043,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
1018
1043
|
};
|
|
1019
1044
|
|
|
1020
1045
|
// src/playwright/context.ts
|
|
1021
|
-
var PlaywrightBrowserContext = class extends
|
|
1046
|
+
var PlaywrightBrowserContext = class extends import_node_events2.default {
|
|
1022
1047
|
#lsdBrowser;
|
|
1023
1048
|
#browserIdx;
|
|
1024
1049
|
#browserContextIdx;
|
|
@@ -1037,12 +1062,12 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1037
1062
|
}
|
|
1038
1063
|
const pages = this.#browserContext.pages();
|
|
1039
1064
|
const openType = this.#lsdBrowser.browserCreationMethod();
|
|
1040
|
-
const lastStatusUpdateTime =
|
|
1065
|
+
const lastStatusUpdateTime = (0, import_utils3.getCurrentUnixTime)();
|
|
1041
1066
|
for (const page of pages) {
|
|
1042
1067
|
const pageInfo = { browserIdx: this.#browserIdx, browserContextIdx: this.#browserContextIdx, pageIdx: this.#nextPageIdx++, openType, openTime: this.#createTime, lastStatusUpdateTime, taskId: 0 };
|
|
1043
1068
|
const lsdPage = new PlaywrightPage(this, page, pageInfo);
|
|
1044
1069
|
this.#lsdPages.push(lsdPage);
|
|
1045
|
-
|
|
1070
|
+
(0, import_utils3.loginfo)(`##browser ${lsdPage.id()} ${openType}ed`);
|
|
1046
1071
|
}
|
|
1047
1072
|
}
|
|
1048
1073
|
constructor(lsdBrowser, browserContext, incognito = false, proxy = null, browserIdx = 0, browserContextIdx = 0, maxPagesPerBrowserContext = 20, maxPageFreeSeconds = 0) {
|
|
@@ -1057,7 +1082,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1057
1082
|
this.#browserIdx = browserIdx;
|
|
1058
1083
|
this.#browserContextIdx = browserContextIdx;
|
|
1059
1084
|
this.#browserContext = browserContext;
|
|
1060
|
-
this.#createTime =
|
|
1085
|
+
this.#createTime = (0, import_utils3.getCurrentUnixTime)();
|
|
1061
1086
|
this.#incognito = incognito === false ? false : true;
|
|
1062
1087
|
this.#proxy = proxy?.server ? proxy : null;
|
|
1063
1088
|
this.#maxPagesPerBrowserContext = maxPagesPerBrowserContext;
|
|
@@ -1070,29 +1095,29 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1070
1095
|
const pageInfo = page.pageInfo;
|
|
1071
1096
|
if (pageInfo) {
|
|
1072
1097
|
const { browserIdx: browserIdx2, browserContextIdx: browserContextIdx2, pageIdx } = pageInfo;
|
|
1073
|
-
|
|
1098
|
+
(0, import_utils3.logwarn)(`##browser page-${browserIdx2}-${browserContextIdx2}-${pageIdx} has been already created`);
|
|
1074
1099
|
} else {
|
|
1075
|
-
const currentTime =
|
|
1100
|
+
const currentTime = (0, import_utils3.getCurrentUnixTime)();
|
|
1076
1101
|
const pageInfo2 = { browserIdx: this.#browserIdx, browserContextIdx: this.#browserContextIdx, pageIdx: this.#nextPageIdx++, openType: "other", openTime: currentTime, lastStatusUpdateTime: currentTime, taskId: 0 };
|
|
1077
1102
|
const lsdPage = new PlaywrightPage(this, page, pageInfo2);
|
|
1078
1103
|
this.#lsdPages.push(lsdPage);
|
|
1079
|
-
|
|
1104
|
+
(0, import_utils3.loginfo)(`##browser ${lsdPage.id()} created`);
|
|
1080
1105
|
}
|
|
1081
1106
|
});
|
|
1082
1107
|
browserContext.on("close", (bc) => {
|
|
1083
1108
|
if (browserContext !== bc) {
|
|
1084
|
-
|
|
1109
|
+
(0, import_utils3.logerr)(`##browser different browserContext in browserContext.on("close")`);
|
|
1085
1110
|
}
|
|
1086
1111
|
this.#lsdBrowser.emit("browserContextClose", this);
|
|
1087
1112
|
});
|
|
1088
1113
|
this.on("pageClose", (lsdPage) => {
|
|
1089
1114
|
if (!(lsdPage instanceof PlaywrightPage)) {
|
|
1090
|
-
|
|
1115
|
+
(0, import_utils3.logerr)(`Invalid data in LsdBrowserContext.on("pageClose)`);
|
|
1091
1116
|
return;
|
|
1092
1117
|
}
|
|
1093
1118
|
const idx = this.#lsdPages.findIndex((p) => p === lsdPage);
|
|
1094
1119
|
if (idx < 0) {
|
|
1095
|
-
|
|
1120
|
+
(0, import_utils3.logerr)(`Invalid lsdPage in LsdBrowserContext.on("pageClose)`);
|
|
1096
1121
|
return;
|
|
1097
1122
|
}
|
|
1098
1123
|
this.#lsdPages.splice(idx, 1);
|
|
@@ -1115,15 +1140,15 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1115
1140
|
this.#gettingPage = true;
|
|
1116
1141
|
return true;
|
|
1117
1142
|
} else {
|
|
1118
|
-
await sleep(200);
|
|
1143
|
+
await (0, import_utils3.sleep)(200);
|
|
1119
1144
|
}
|
|
1120
1145
|
}
|
|
1121
|
-
|
|
1146
|
+
(0, import_utils3.logwarn)(`Cannot get the gettingLock.`);
|
|
1122
1147
|
return false;
|
|
1123
1148
|
}
|
|
1124
1149
|
#freeGettingLock() {
|
|
1125
1150
|
if (!this.#gettingPage) {
|
|
1126
|
-
|
|
1151
|
+
(0, import_utils3.logwarn)(`Getting lock is already free now.`);
|
|
1127
1152
|
}
|
|
1128
1153
|
this.#gettingPage = false;
|
|
1129
1154
|
}
|
|
@@ -1132,7 +1157,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1132
1157
|
maxPageFreeSeconds = this.#maxPageFreeSeconds;
|
|
1133
1158
|
}
|
|
1134
1159
|
if (maxPageFreeSeconds <= 0) {
|
|
1135
|
-
|
|
1160
|
+
(0, import_utils3.logwarn)(`Please set valid maxPageFreeSeconds to close free pages`);
|
|
1136
1161
|
return false;
|
|
1137
1162
|
}
|
|
1138
1163
|
const gotLock = await this.#tryToGetGettingLock();
|
|
@@ -1140,7 +1165,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1140
1165
|
return false;
|
|
1141
1166
|
}
|
|
1142
1167
|
try {
|
|
1143
|
-
const maxUpdateTime =
|
|
1168
|
+
const maxUpdateTime = (0, import_utils3.getCurrentUnixTime)() - this.#maxPageFreeSeconds;
|
|
1144
1169
|
let freePages = this.#lsdPages.filter((p) => p.isFree() && p.pageInfo().lastStatusUpdateTime < maxUpdateTime);
|
|
1145
1170
|
if (freePages.length === this.#lsdPages.length) {
|
|
1146
1171
|
freePages = freePages.slice(1);
|
|
@@ -1151,7 +1176,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1151
1176
|
this.#freeGettingLock();
|
|
1152
1177
|
return true;
|
|
1153
1178
|
} catch (err) {
|
|
1154
|
-
|
|
1179
|
+
(0, import_utils3.logerr)(err);
|
|
1155
1180
|
this.#freeGettingLock();
|
|
1156
1181
|
return false;
|
|
1157
1182
|
}
|
|
@@ -1192,7 +1217,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1192
1217
|
return null;
|
|
1193
1218
|
}
|
|
1194
1219
|
} catch (err) {
|
|
1195
|
-
|
|
1220
|
+
(0, import_utils3.logerr)(err);
|
|
1196
1221
|
this.#freeGettingLock();
|
|
1197
1222
|
return null;
|
|
1198
1223
|
}
|
|
@@ -1255,8 +1280,8 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1255
1280
|
};
|
|
1256
1281
|
|
|
1257
1282
|
// src/playwright/browser.ts
|
|
1258
|
-
|
|
1259
|
-
var PlaywrightBrowser = class extends
|
|
1283
|
+
var import_utils4 = require("@letsscrapedata/utils");
|
|
1284
|
+
var PlaywrightBrowser = class extends import_node_events3.default {
|
|
1260
1285
|
#browser;
|
|
1261
1286
|
#browserIdx;
|
|
1262
1287
|
#lsdBrowserContexts;
|
|
@@ -1301,38 +1326,38 @@ var PlaywrightBrowser = class extends EventEmitter3 {
|
|
|
1301
1326
|
this.#executablePath = executablePath;
|
|
1302
1327
|
this.#nextBrowserContextIdx = 1;
|
|
1303
1328
|
this.#closeFreePagesIntervalId = null;
|
|
1304
|
-
|
|
1329
|
+
(0, import_utils4.loginfo)(`##browser ${this.#browserType} ${this.id()} ${this.#browserCreationMethod}ed by ${this.#browserControllerType}`);
|
|
1305
1330
|
const browserContexts = browser.contexts();
|
|
1306
1331
|
if (browserContexts.length > 0) {
|
|
1307
|
-
|
|
1332
|
+
(0, import_utils4.logwarn)(`There are ${browserContexts.length} new browserContexts when playwright launches new browser`);
|
|
1308
1333
|
}
|
|
1309
1334
|
const incognito = typeof options?.incognito === "boolean" ? options.incognito : true;
|
|
1310
1335
|
for (const browserContext of browserContexts) {
|
|
1311
1336
|
const lsdBrowserContext = new PlaywrightBrowserContext(this, browserContext, incognito, this.#proxy, this.#browserIdx++, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds());
|
|
1312
1337
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
1313
|
-
|
|
1338
|
+
(0, import_utils4.loginfo)(`##browser ${lsdBrowserContext.id()} ${this.#browserCreationMethod}ed`);
|
|
1314
1339
|
}
|
|
1315
1340
|
browser.on("disconnected", () => {
|
|
1316
|
-
|
|
1341
|
+
(0, import_utils4.loginfo)(`##browser ${this.id()} disconnected`);
|
|
1317
1342
|
if (this.#lsdBrowserContexts.length > 0) {
|
|
1318
|
-
|
|
1343
|
+
(0, import_utils4.logerr)(`${this.id()} has browserContexts when disconnected`);
|
|
1319
1344
|
}
|
|
1320
1345
|
});
|
|
1321
1346
|
this.on("browserContextClose", (lsdBrowserContext) => {
|
|
1322
1347
|
if (!(lsdBrowserContext instanceof PlaywrightBrowserContext)) {
|
|
1323
|
-
|
|
1348
|
+
(0, import_utils4.logerr)(`Invalid data in LsdBrowser.on("browserContextClose)`);
|
|
1324
1349
|
return;
|
|
1325
1350
|
}
|
|
1326
1351
|
const idx = this.#lsdBrowserContexts.findIndex((bc) => bc === lsdBrowserContext);
|
|
1327
1352
|
if (idx < 0) {
|
|
1328
|
-
|
|
1353
|
+
(0, import_utils4.logerr)(`Invalid lsdBrowserContext in LsdBrowser.on("browserContextClose)`);
|
|
1329
1354
|
return;
|
|
1330
1355
|
}
|
|
1331
|
-
|
|
1356
|
+
(0, import_utils4.loginfo)(`##browser ${lsdBrowserContext.id()} closed
|
|
1332
1357
|
`);
|
|
1333
1358
|
this.#lsdBrowserContexts.splice(idx, 1);
|
|
1334
1359
|
if (this.#lsdBrowserContexts.length === 0) {
|
|
1335
|
-
|
|
1360
|
+
(0, import_utils4.loginfo)(`##browser ${this.id()} has no browserContexts now`);
|
|
1336
1361
|
}
|
|
1337
1362
|
return;
|
|
1338
1363
|
});
|
|
@@ -1350,7 +1375,7 @@ var PlaywrightBrowser = class extends EventEmitter3 {
|
|
|
1350
1375
|
// 常用方法(按常见调用顺序排序)
|
|
1351
1376
|
async newBrowserContext(options) {
|
|
1352
1377
|
if (this.#lsdBrowserContexts.length >= this.#maxBrowserContextsPerBrowser()) {
|
|
1353
|
-
|
|
1378
|
+
(0, import_utils4.logwarn)(`##browser ${this.id()} can not create more new browserContext`);
|
|
1354
1379
|
return null;
|
|
1355
1380
|
}
|
|
1356
1381
|
const browserContextOptions = {};
|
|
@@ -1365,7 +1390,7 @@ var PlaywrightBrowser = class extends EventEmitter3 {
|
|
|
1365
1390
|
const browserContext = await this.#browser.newContext(browserContextOptions);
|
|
1366
1391
|
const lsdBrowserContext = new PlaywrightBrowserContext(this, browserContext, true, proxy, this.#browserIdx++, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds());
|
|
1367
1392
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
1368
|
-
|
|
1393
|
+
(0, import_utils4.loginfo)(`##browser ${lsdBrowserContext.id()} created`);
|
|
1369
1394
|
return lsdBrowserContext;
|
|
1370
1395
|
}
|
|
1371
1396
|
async close() {
|
|
@@ -1416,16 +1441,16 @@ var PlaywrightBrowser = class extends EventEmitter3 {
|
|
|
1416
1441
|
};
|
|
1417
1442
|
|
|
1418
1443
|
// src/puppeteer/browser.ts
|
|
1419
|
-
|
|
1444
|
+
var import_node_events6 = __toESM(require("events"), 1);
|
|
1420
1445
|
|
|
1421
1446
|
// src/puppeteer/context.ts
|
|
1422
|
-
|
|
1447
|
+
var import_node_events5 = __toESM(require("events"), 1);
|
|
1423
1448
|
|
|
1424
1449
|
// src/puppeteer/page.ts
|
|
1425
|
-
|
|
1450
|
+
var import_node_events4 = __toESM(require("events"), 1);
|
|
1426
1451
|
|
|
1427
1452
|
// src/puppeteer/element.ts
|
|
1428
|
-
|
|
1453
|
+
var import_utils5 = require("@letsscrapedata/utils");
|
|
1429
1454
|
var PuppeteerElement = class _PuppeteerElement {
|
|
1430
1455
|
#frame;
|
|
1431
1456
|
#$ele;
|
|
@@ -1513,7 +1538,7 @@ var PuppeteerElement = class _PuppeteerElement {
|
|
|
1513
1538
|
}
|
|
1514
1539
|
return retObj;
|
|
1515
1540
|
} catch (err) {
|
|
1516
|
-
|
|
1541
|
+
(0, import_utils5.logerr)(err);
|
|
1517
1542
|
return retObj;
|
|
1518
1543
|
}
|
|
1519
1544
|
}
|
|
@@ -1663,44 +1688,24 @@ var PuppeteerElement = class _PuppeteerElement {
|
|
|
1663
1688
|
}
|
|
1664
1689
|
break;
|
|
1665
1690
|
default:
|
|
1666
|
-
|
|
1691
|
+
(0, import_utils5.unreachable)(type);
|
|
1667
1692
|
}
|
|
1668
1693
|
return true;
|
|
1669
1694
|
}
|
|
1670
1695
|
async screenshot(options) {
|
|
1671
1696
|
return await this.#$ele.screenshot(options);
|
|
1672
1697
|
}
|
|
1673
|
-
async scrollBy(x, y) {
|
|
1674
|
-
await this.#frame.evaluate(
|
|
1675
|
-
(x2, y2) => {
|
|
1676
|
-
window.scrollBy(x2, y2);
|
|
1677
|
-
},
|
|
1678
|
-
x,
|
|
1679
|
-
y
|
|
1680
|
-
);
|
|
1681
|
-
return true;
|
|
1682
|
-
}
|
|
1683
1698
|
async scrollIntoView() {
|
|
1684
1699
|
await this.#frame.evaluate((ele) => {
|
|
1685
1700
|
ele.scrollIntoView();
|
|
1686
1701
|
}, this.#$ele);
|
|
1687
1702
|
return true;
|
|
1688
1703
|
}
|
|
1689
|
-
async scrollTo(x, y) {
|
|
1690
|
-
await this.#frame.evaluate(
|
|
1691
|
-
(x2, y2) => {
|
|
1692
|
-
window.scrollTo(x2, y2);
|
|
1693
|
-
},
|
|
1694
|
-
x,
|
|
1695
|
-
y
|
|
1696
|
-
);
|
|
1697
|
-
return true;
|
|
1698
|
-
}
|
|
1699
1704
|
};
|
|
1700
1705
|
|
|
1701
1706
|
// src/puppeteer/page.ts
|
|
1702
|
-
|
|
1703
|
-
var PuppeteerPage = class extends
|
|
1707
|
+
var import_utils6 = require("@letsscrapedata/utils");
|
|
1708
|
+
var PuppeteerPage = class extends import_node_events4.default {
|
|
1704
1709
|
#lsdBrowserContext;
|
|
1705
1710
|
#page;
|
|
1706
1711
|
#status;
|
|
@@ -1859,9 +1864,9 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
1859
1864
|
const page = this.#page;
|
|
1860
1865
|
const pageId = this.#pageId;
|
|
1861
1866
|
page.on("close", async () => {
|
|
1862
|
-
|
|
1867
|
+
(0, import_utils6.loginfo)(`##browser ${pageId} closed`);
|
|
1863
1868
|
if (!page.pageInfo) {
|
|
1864
|
-
|
|
1869
|
+
(0, import_utils6.logerr)(`Logic error in page.on("close")`);
|
|
1865
1870
|
}
|
|
1866
1871
|
this.emit("pageClose");
|
|
1867
1872
|
this.#lsdBrowserContext.emit("pageClose", this);
|
|
@@ -1875,12 +1880,12 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
1875
1880
|
popupPageId = `page-${browserIdx}-${browserContextIdx}-${pageIdx}`;
|
|
1876
1881
|
pageInfo.openType = "popup";
|
|
1877
1882
|
} else {
|
|
1878
|
-
|
|
1883
|
+
(0, import_utils6.logerr)(`##browser ${pageId} has popup without page.pageInfo`);
|
|
1879
1884
|
}
|
|
1880
|
-
|
|
1885
|
+
(0, import_utils6.loginfo)(`##browser ${pageId} has popup ${popupPageId}`);
|
|
1881
1886
|
this.emit("pagePopup", pageInfo);
|
|
1882
1887
|
} else {
|
|
1883
|
-
|
|
1888
|
+
(0, import_utils6.logerr)(`##browser ${pageId} has popup page with null page`);
|
|
1884
1889
|
}
|
|
1885
1890
|
});
|
|
1886
1891
|
}
|
|
@@ -1892,7 +1897,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
1892
1897
|
this.#lsdBrowserContext = browserContext;
|
|
1893
1898
|
this.#page = page;
|
|
1894
1899
|
this.#status = "free";
|
|
1895
|
-
const currentTime =
|
|
1900
|
+
const currentTime = (0, import_utils6.getCurrentUnixTime)();
|
|
1896
1901
|
const { browserIdx = 0, browserContextIdx = 0, pageIdx = 0, openType = "other", openTime = currentTime, lastStatusUpdateTime = currentTime, taskId = 0 } = pageInfo ? pageInfo : {};
|
|
1897
1902
|
this.#page.pageInfo = { browserIdx, browserContextIdx, pageIdx, openType, openTime, lastStatusUpdateTime, taskId };
|
|
1898
1903
|
this.#pageId = `page${browserIdx}-${browserContextIdx}-${pageIdx}`;
|
|
@@ -2123,6 +2128,30 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2123
2128
|
}
|
|
2124
2129
|
return await this.#page.screenshot(options);
|
|
2125
2130
|
}
|
|
2131
|
+
async scrollBy(x, y) {
|
|
2132
|
+
if (!this.#page) {
|
|
2133
|
+
throw new Error("No valid page");
|
|
2134
|
+
}
|
|
2135
|
+
await this.#page.evaluate(
|
|
2136
|
+
([x2, y2]) => {
|
|
2137
|
+
window.scrollBy(x2, y2);
|
|
2138
|
+
},
|
|
2139
|
+
[x, y]
|
|
2140
|
+
);
|
|
2141
|
+
return true;
|
|
2142
|
+
}
|
|
2143
|
+
async scrollTo(x, y) {
|
|
2144
|
+
if (!this.#page) {
|
|
2145
|
+
throw new Error("No valid page");
|
|
2146
|
+
}
|
|
2147
|
+
await this.#page.evaluate(
|
|
2148
|
+
([x2, y2]) => {
|
|
2149
|
+
window.scrollTo(x2, y2);
|
|
2150
|
+
},
|
|
2151
|
+
[x, y]
|
|
2152
|
+
);
|
|
2153
|
+
return true;
|
|
2154
|
+
}
|
|
2126
2155
|
async setCookies(cookies) {
|
|
2127
2156
|
if (!this.#page) {
|
|
2128
2157
|
throw new Error("No valid page");
|
|
@@ -2220,7 +2249,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2220
2249
|
});
|
|
2221
2250
|
break;
|
|
2222
2251
|
default:
|
|
2223
|
-
|
|
2252
|
+
(0, import_utils6.unreachable)(action);
|
|
2224
2253
|
}
|
|
2225
2254
|
return true;
|
|
2226
2255
|
}
|
|
@@ -2255,7 +2284,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2255
2284
|
return false;
|
|
2256
2285
|
}
|
|
2257
2286
|
for (const option of actOptions) {
|
|
2258
|
-
const { requestMatch, responseMatch,
|
|
2287
|
+
const { requestMatch, responseMatch, responseItems, handler, handlerOptions } = option;
|
|
2259
2288
|
let matchedFlag = !requestMatch || this.#checkRequestMatch(request, requestMatch);
|
|
2260
2289
|
if (matchedFlag && responseMatch) {
|
|
2261
2290
|
const { minLength, maxLength } = responseMatch;
|
|
@@ -2268,13 +2297,13 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2268
2297
|
if (!matchedFlag) {
|
|
2269
2298
|
continue;
|
|
2270
2299
|
}
|
|
2271
|
-
if (Array.isArray(
|
|
2300
|
+
if (Array.isArray(responseItems)) {
|
|
2272
2301
|
const requestMethod = request.method();
|
|
2273
2302
|
const requestUrl = request.url();
|
|
2274
2303
|
const reqData2 = request.postData();
|
|
2275
2304
|
const requestData = reqData2 ? reqData2 : "";
|
|
2276
2305
|
const responseData = await response.text();
|
|
2277
|
-
|
|
2306
|
+
responseItems.push({
|
|
2278
2307
|
pageUrl,
|
|
2279
2308
|
requestMethod,
|
|
2280
2309
|
requestUrl,
|
|
@@ -2369,8 +2398,8 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2369
2398
|
};
|
|
2370
2399
|
|
|
2371
2400
|
// src/puppeteer/context.ts
|
|
2372
|
-
|
|
2373
|
-
var PuppeteerBrowserContext = class extends
|
|
2401
|
+
var import_utils7 = require("@letsscrapedata/utils");
|
|
2402
|
+
var PuppeteerBrowserContext = class extends import_node_events5.default {
|
|
2374
2403
|
#lsdBrowser;
|
|
2375
2404
|
#browserIdx;
|
|
2376
2405
|
#browserContextIdx;
|
|
@@ -2394,7 +2423,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2394
2423
|
}
|
|
2395
2424
|
const pages = await this.#browserContext.pages();
|
|
2396
2425
|
const openType = this.#lsdBrowser.browserCreationMethod();
|
|
2397
|
-
const lastStatusUpdateTime =
|
|
2426
|
+
const lastStatusUpdateTime = (0, import_utils7.getCurrentUnixTime)();
|
|
2398
2427
|
for (const page of pages) {
|
|
2399
2428
|
const pageInfo = { browserIdx: this.#browserIdx, browserContextIdx: this.#browserContextIdx, pageIdx: this.#nextPageIdx++, openType, openTime: this.#createTime, lastStatusUpdateTime, taskId: 0 };
|
|
2400
2429
|
const lsdPage = new PuppeteerPage(this, page, pageInfo);
|
|
@@ -2402,7 +2431,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2402
2431
|
await lsdPage.setUserAgent(this.#userAgent);
|
|
2403
2432
|
}
|
|
2404
2433
|
this.#lsdPages.push(lsdPage);
|
|
2405
|
-
|
|
2434
|
+
(0, import_utils7.loginfo)(`##browser ${lsdPage.id()} ${openType}ed`);
|
|
2406
2435
|
}
|
|
2407
2436
|
}
|
|
2408
2437
|
constructor(lsdBrowser, browserContext, incognito = false, proxy = null, browserIdx = 0, browserContextIdx = 0, maxPagesPerBrowserContext = 20, maxPageFreeSeconds = 0, userAgent = "") {
|
|
@@ -2418,7 +2447,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2418
2447
|
this.#browserContextIdx = browserContextIdx;
|
|
2419
2448
|
this.#browserContext = browserContext;
|
|
2420
2449
|
this.#userAgent = userAgent;
|
|
2421
|
-
this.#createTime =
|
|
2450
|
+
this.#createTime = (0, import_utils7.getCurrentUnixTime)();
|
|
2422
2451
|
this.#incognito = incognito === false ? false : true;
|
|
2423
2452
|
this.#proxy = proxy?.server ? proxy : null;
|
|
2424
2453
|
this.#maxPagesPerBrowserContext = maxPagesPerBrowserContext;
|
|
@@ -2436,27 +2465,27 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2436
2465
|
const pageInfo = page.pageInfo;
|
|
2437
2466
|
if (pageInfo) {
|
|
2438
2467
|
const { browserIdx: browserIdx2, browserContextIdx: browserContextIdx2, pageIdx } = pageInfo;
|
|
2439
|
-
|
|
2468
|
+
(0, import_utils7.logwarn)(`##browser page-${browserIdx2}-${browserContextIdx2}-${pageIdx} has been already created`);
|
|
2440
2469
|
} else {
|
|
2441
|
-
const currentTime =
|
|
2470
|
+
const currentTime = (0, import_utils7.getCurrentUnixTime)();
|
|
2442
2471
|
const pageInfo2 = { browserIdx: this.#browserIdx, browserContextIdx: this.#browserContextIdx, pageIdx: this.#nextPageIdx++, openType: "other", openTime: currentTime, lastStatusUpdateTime: currentTime, taskId: 0 };
|
|
2443
2472
|
const lsdPage = new PuppeteerPage(this, page, pageInfo2);
|
|
2444
2473
|
if (this.#userAgent) {
|
|
2445
2474
|
await lsdPage.setUserAgent(this.#userAgent);
|
|
2446
2475
|
}
|
|
2447
2476
|
this.#lsdPages.push(lsdPage);
|
|
2448
|
-
|
|
2477
|
+
(0, import_utils7.loginfo)(`##browser ${lsdPage.id()} created`);
|
|
2449
2478
|
}
|
|
2450
2479
|
}
|
|
2451
2480
|
});
|
|
2452
2481
|
this.on("pageClose", (lsdPage) => {
|
|
2453
2482
|
if (!(lsdPage instanceof PuppeteerPage)) {
|
|
2454
|
-
|
|
2483
|
+
(0, import_utils7.logerr)(`Invalid data in LsdBrowserContext.on("pageClose)`);
|
|
2455
2484
|
return;
|
|
2456
2485
|
}
|
|
2457
2486
|
const idx = this.#lsdPages.findIndex((p) => p === lsdPage);
|
|
2458
2487
|
if (idx < 0) {
|
|
2459
|
-
|
|
2488
|
+
(0, import_utils7.logerr)(`Invalid lsdPage in LsdBrowserContext.on("pageClose)`);
|
|
2460
2489
|
return;
|
|
2461
2490
|
}
|
|
2462
2491
|
this.#lsdPages.splice(idx, 1);
|
|
@@ -2482,15 +2511,15 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2482
2511
|
this.#gettingPage = true;
|
|
2483
2512
|
return true;
|
|
2484
2513
|
} else {
|
|
2485
|
-
await
|
|
2514
|
+
await (0, import_utils7.sleep)(200);
|
|
2486
2515
|
}
|
|
2487
2516
|
}
|
|
2488
|
-
|
|
2517
|
+
(0, import_utils7.logwarn)(`Cannot get the gettingLock.`);
|
|
2489
2518
|
return false;
|
|
2490
2519
|
}
|
|
2491
2520
|
#freeGettingLock() {
|
|
2492
2521
|
if (!this.#gettingPage) {
|
|
2493
|
-
|
|
2522
|
+
(0, import_utils7.logwarn)(`Getting lock is already free now.`);
|
|
2494
2523
|
}
|
|
2495
2524
|
this.#gettingPage = false;
|
|
2496
2525
|
}
|
|
@@ -2499,7 +2528,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2499
2528
|
maxPageFreeSeconds = this.#maxPageFreeSeconds;
|
|
2500
2529
|
}
|
|
2501
2530
|
if (maxPageFreeSeconds <= 0) {
|
|
2502
|
-
|
|
2531
|
+
(0, import_utils7.logwarn)(`Please set valid maxPageFreeSeconds to close free pages`);
|
|
2503
2532
|
return false;
|
|
2504
2533
|
}
|
|
2505
2534
|
const gotLock = await this.#tryToGetGettingLock();
|
|
@@ -2507,7 +2536,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2507
2536
|
return false;
|
|
2508
2537
|
}
|
|
2509
2538
|
try {
|
|
2510
|
-
const maxUpdateTime =
|
|
2539
|
+
const maxUpdateTime = (0, import_utils7.getCurrentUnixTime)() - this.#maxPageFreeSeconds;
|
|
2511
2540
|
let freePages = this.#lsdPages.filter((p) => p.isFree() && p.pageInfo().lastStatusUpdateTime < maxUpdateTime);
|
|
2512
2541
|
if (freePages.length === this.#lsdPages.length) {
|
|
2513
2542
|
freePages = freePages.slice(1);
|
|
@@ -2518,7 +2547,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2518
2547
|
this.#freeGettingLock();
|
|
2519
2548
|
return true;
|
|
2520
2549
|
} catch (err) {
|
|
2521
|
-
|
|
2550
|
+
(0, import_utils7.logerr)(err);
|
|
2522
2551
|
this.#freeGettingLock();
|
|
2523
2552
|
return false;
|
|
2524
2553
|
}
|
|
@@ -2533,7 +2562,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2533
2562
|
}
|
|
2534
2563
|
try {
|
|
2535
2564
|
if (this.#lsdPages.length === 0) {
|
|
2536
|
-
await
|
|
2565
|
+
await (0, import_utils7.sleep)(1e3);
|
|
2537
2566
|
}
|
|
2538
2567
|
let lsdPage = this.#lsdPages.find((p) => p.isFree());
|
|
2539
2568
|
if (lsdPage) {
|
|
@@ -2561,7 +2590,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2561
2590
|
return null;
|
|
2562
2591
|
}
|
|
2563
2592
|
} catch (err) {
|
|
2564
|
-
|
|
2593
|
+
(0, import_utils7.logerr)(err);
|
|
2565
2594
|
this.#freeGettingLock();
|
|
2566
2595
|
return null;
|
|
2567
2596
|
}
|
|
@@ -2624,8 +2653,8 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2624
2653
|
};
|
|
2625
2654
|
|
|
2626
2655
|
// src/puppeteer/browser.ts
|
|
2627
|
-
|
|
2628
|
-
var PuppeteerBrowser = class extends
|
|
2656
|
+
var import_utils8 = require("@letsscrapedata/utils");
|
|
2657
|
+
var PuppeteerBrowser = class extends import_node_events6.default {
|
|
2629
2658
|
#browser;
|
|
2630
2659
|
#browserIdx;
|
|
2631
2660
|
#lsdBrowserContexts;
|
|
@@ -2673,35 +2702,35 @@ var PuppeteerBrowser = class extends EventEmitter6 {
|
|
|
2673
2702
|
this.#executablePath = executablePath;
|
|
2674
2703
|
this.#nextBrowserContextIdx = 1;
|
|
2675
2704
|
this.#closeFreePagesIntervalId = null;
|
|
2676
|
-
|
|
2705
|
+
(0, import_utils8.loginfo)(`##browser ${this.#browserType} ${this.id()} ${this.#browserCreationMethod}ed by ${this.#browserControllerType}`);
|
|
2677
2706
|
const browserContexts = browser.browserContexts();
|
|
2678
2707
|
const incognito = typeof options?.incognito === "boolean" ? options.incognito : false;
|
|
2679
2708
|
for (const browserContext of browserContexts) {
|
|
2680
2709
|
const lsdBrowserContext = new PuppeteerBrowserContext(this, browserContext, incognito, this.#proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), this.#userAgent());
|
|
2681
2710
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
2682
|
-
|
|
2711
|
+
(0, import_utils8.loginfo)(`##browser ${lsdBrowserContext.id()} ${this.#browserCreationMethod}ed`);
|
|
2683
2712
|
}
|
|
2684
2713
|
browser.on("disconnected", () => {
|
|
2685
|
-
|
|
2714
|
+
(0, import_utils8.loginfo)(`##browser ${this.id()} disconnected`);
|
|
2686
2715
|
if (this.#lsdBrowserContexts.length > 0) {
|
|
2687
|
-
|
|
2716
|
+
(0, import_utils8.logerr)(`${this.id()} has browserContexts when disconnected`);
|
|
2688
2717
|
}
|
|
2689
2718
|
});
|
|
2690
2719
|
this.on("browserContextClose", (lsdBrowserContext) => {
|
|
2691
2720
|
if (!(lsdBrowserContext instanceof PuppeteerBrowserContext)) {
|
|
2692
|
-
|
|
2721
|
+
(0, import_utils8.logerr)(`Invalid data in LsdBrowser.on("browserContextClose)`);
|
|
2693
2722
|
return;
|
|
2694
2723
|
}
|
|
2695
2724
|
const idx = this.#lsdBrowserContexts.findIndex((bc) => bc === lsdBrowserContext);
|
|
2696
2725
|
if (idx < 0) {
|
|
2697
|
-
|
|
2726
|
+
(0, import_utils8.logerr)(`Invalid lsdBrowserContext in LsdBrowser.on("browserContextClose)`);
|
|
2698
2727
|
return;
|
|
2699
2728
|
}
|
|
2700
|
-
|
|
2729
|
+
(0, import_utils8.loginfo)(`##browser ${lsdBrowserContext.id()} closed
|
|
2701
2730
|
`);
|
|
2702
2731
|
this.#lsdBrowserContexts.splice(idx, 1);
|
|
2703
2732
|
if (this.#lsdBrowserContexts.length === 0) {
|
|
2704
|
-
|
|
2733
|
+
(0, import_utils8.loginfo)(`##browser ${this.id()} has no browserContexts now`);
|
|
2705
2734
|
}
|
|
2706
2735
|
return;
|
|
2707
2736
|
});
|
|
@@ -2719,7 +2748,7 @@ var PuppeteerBrowser = class extends EventEmitter6 {
|
|
|
2719
2748
|
// 常用方法(按常见调用顺序排序)
|
|
2720
2749
|
async newBrowserContext(options) {
|
|
2721
2750
|
if (this.#lsdBrowserContexts.length >= this.#maxBrowserContextsPerBrowser()) {
|
|
2722
|
-
|
|
2751
|
+
(0, import_utils8.logwarn)(`##browser ${this.id()} can not create more new browserContext`);
|
|
2723
2752
|
return null;
|
|
2724
2753
|
}
|
|
2725
2754
|
const browserContextOptions = {};
|
|
@@ -2731,7 +2760,7 @@ var PuppeteerBrowser = class extends EventEmitter6 {
|
|
|
2731
2760
|
const userAgent = options?.userAgent ? options.userAgent : "";
|
|
2732
2761
|
const lsdBrowserContext = new PuppeteerBrowserContext(this, browserContext, true, proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), userAgent);
|
|
2733
2762
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
2734
|
-
|
|
2763
|
+
(0, import_utils8.loginfo)(`##browser ${lsdBrowserContext.id()} created`);
|
|
2735
2764
|
return lsdBrowserContext;
|
|
2736
2765
|
}
|
|
2737
2766
|
async close() {
|
|
@@ -2784,7 +2813,7 @@ var PuppeteerBrowser = class extends EventEmitter6 {
|
|
|
2784
2813
|
};
|
|
2785
2814
|
|
|
2786
2815
|
// src/cheerio/page.ts
|
|
2787
|
-
|
|
2816
|
+
var import_node_events7 = __toESM(require("events"), 1);
|
|
2788
2817
|
|
|
2789
2818
|
// ../../node_modules/cheerio/lib/esm/options.js
|
|
2790
2819
|
var defaultOpts = {
|
|
@@ -16656,29 +16685,34 @@ var CheerioElement = class _CheerioElement {
|
|
|
16656
16685
|
return Array.from(Object.keys(element.attribs));
|
|
16657
16686
|
}
|
|
16658
16687
|
}
|
|
16659
|
-
#findNodes(selector) {
|
|
16688
|
+
#findNodes(selector, absolute) {
|
|
16660
16689
|
if (selector.startsWith("./") || selector.startsWith("/")) {
|
|
16661
16690
|
throw new Error("Do not support XPath in cheerio.");
|
|
16662
16691
|
}
|
|
16663
16692
|
if (selector === ".") {
|
|
16664
16693
|
return [this.#node];
|
|
16665
16694
|
}
|
|
16666
|
-
const
|
|
16667
|
-
|
|
16668
|
-
|
|
16695
|
+
const nodes = [];
|
|
16696
|
+
const cheerioNode = !absolute ? this.#node.find(selector) : this.#node._root?.find(selector);
|
|
16697
|
+
if (!cheerioNode) {
|
|
16698
|
+
return nodes;
|
|
16699
|
+
} else if (cheerioNode.length > 0) {
|
|
16669
16700
|
const len = cheerioNode.length;
|
|
16670
16701
|
for (let i = 0; i < len; i++) {
|
|
16671
16702
|
nodes.push(cheerioNode.eq(i));
|
|
16672
16703
|
}
|
|
16673
16704
|
return nodes;
|
|
16674
16705
|
} else {
|
|
16675
|
-
return
|
|
16706
|
+
return nodes;
|
|
16676
16707
|
}
|
|
16677
16708
|
}
|
|
16678
|
-
async findElement(selectorOrXpath) {
|
|
16709
|
+
async findElement(selectorOrXpath, iframeOptions = [], absolute = false) {
|
|
16710
|
+
if (!iframeOptions) {
|
|
16711
|
+
return null;
|
|
16712
|
+
}
|
|
16679
16713
|
const selectors = typeof selectorOrXpath === "string" ? [selectorOrXpath] : selectorOrXpath;
|
|
16680
16714
|
for (const selector of selectors) {
|
|
16681
|
-
const nodes = this.#findNodes(selector);
|
|
16715
|
+
const nodes = this.#findNodes(selector, absolute);
|
|
16682
16716
|
if (nodes.length > 0) {
|
|
16683
16717
|
const cheerioElement = new _CheerioElement(nodes[0]);
|
|
16684
16718
|
return cheerioElement;
|
|
@@ -16686,10 +16720,13 @@ var CheerioElement = class _CheerioElement {
|
|
|
16686
16720
|
}
|
|
16687
16721
|
return null;
|
|
16688
16722
|
}
|
|
16689
|
-
async findElements(selectorOrXpath) {
|
|
16723
|
+
async findElements(selectorOrXpath, iframeOptions = [], absolute = false) {
|
|
16724
|
+
if (!iframeOptions) {
|
|
16725
|
+
return [];
|
|
16726
|
+
}
|
|
16690
16727
|
const selectors = typeof selectorOrXpath === "string" ? [selectorOrXpath] : selectorOrXpath;
|
|
16691
16728
|
for (const selector of selectors) {
|
|
16692
|
-
const nodes = this.#findNodes(selector);
|
|
16729
|
+
const nodes = this.#findNodes(selector, absolute);
|
|
16693
16730
|
if (nodes.length > 0) {
|
|
16694
16731
|
const cheerioElements = nodes.map((node) => new _CheerioElement(node));
|
|
16695
16732
|
return cheerioElements;
|
|
@@ -16738,19 +16775,13 @@ var CheerioElement = class _CheerioElement {
|
|
|
16738
16775
|
async screenshot() {
|
|
16739
16776
|
throw new Error("Not supported in CheerioElement.");
|
|
16740
16777
|
}
|
|
16741
|
-
async scrollBy() {
|
|
16742
|
-
throw new Error("Not supported in CheerioElement.");
|
|
16743
|
-
}
|
|
16744
16778
|
async scrollIntoView() {
|
|
16745
16779
|
throw new Error("Not supported in CheerioElement.");
|
|
16746
16780
|
}
|
|
16747
|
-
async scrollTo() {
|
|
16748
|
-
throw new Error("Not supported in CheerioElement.");
|
|
16749
|
-
}
|
|
16750
16781
|
};
|
|
16751
16782
|
|
|
16752
16783
|
// src/cheerio/page.ts
|
|
16753
|
-
var CheerioPage = class extends
|
|
16784
|
+
var CheerioPage = class extends import_node_events7.default {
|
|
16754
16785
|
#document;
|
|
16755
16786
|
constructor(html3 = "") {
|
|
16756
16787
|
super();
|
|
@@ -16867,6 +16898,12 @@ var CheerioPage = class extends EventEmitter7 {
|
|
|
16867
16898
|
async screenshot() {
|
|
16868
16899
|
throw new Error("Not supported in CheerioPage.");
|
|
16869
16900
|
}
|
|
16901
|
+
async scrollBy() {
|
|
16902
|
+
throw new Error("Not supported in CheerioElement.");
|
|
16903
|
+
}
|
|
16904
|
+
async scrollTo() {
|
|
16905
|
+
throw new Error("Not supported in CheerioElement.");
|
|
16906
|
+
}
|
|
16870
16907
|
async setCookies() {
|
|
16871
16908
|
throw new Error("Not supported in CheerioPage.");
|
|
16872
16909
|
}
|
|
@@ -16915,10 +16952,10 @@ var CheerioPage = class extends EventEmitter7 {
|
|
|
16915
16952
|
};
|
|
16916
16953
|
|
|
16917
16954
|
// src/controller/controller.ts
|
|
16918
|
-
|
|
16919
|
-
|
|
16920
|
-
|
|
16921
|
-
|
|
16955
|
+
var import_os = __toESM(require("os"), 1);
|
|
16956
|
+
var import_puppeteer = __toESM(require("puppeteer"), 1);
|
|
16957
|
+
var import_playwright = __toESM(require("playwright"), 1);
|
|
16958
|
+
var import_utils15 = require("@letsscrapedata/utils");
|
|
16922
16959
|
var LsdBrowserController = class _LsdBrowserController {
|
|
16923
16960
|
static #forbidConstructor = false;
|
|
16924
16961
|
#nextBrowserIdx;
|
|
@@ -16930,19 +16967,19 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
16930
16967
|
if (_LsdBrowserController.#forbidConstructor) {
|
|
16931
16968
|
throw new Error("Only one LsdBrowserController instance can be created!");
|
|
16932
16969
|
}
|
|
16933
|
-
this.#osPlatform =
|
|
16970
|
+
this.#osPlatform = import_os.default.platform();
|
|
16934
16971
|
this.#nextBrowserIdx = 1;
|
|
16935
16972
|
_LsdBrowserController.#forbidConstructor = true;
|
|
16936
16973
|
}
|
|
16937
16974
|
#playwrightBrowserType(browserType, connectFlag = false) {
|
|
16938
16975
|
if (browserType === "chromium") {
|
|
16939
|
-
return
|
|
16976
|
+
return import_playwright.default.chromium;
|
|
16940
16977
|
} else if (connectFlag) {
|
|
16941
16978
|
throw new Error(`playwright only can connect to chromium browser, not support ${browserType} browser`);
|
|
16942
16979
|
} else if (browserType === "firefox") {
|
|
16943
|
-
return
|
|
16980
|
+
return import_playwright.default.firefox;
|
|
16944
16981
|
} else if (browserType === "webkit") {
|
|
16945
|
-
return
|
|
16982
|
+
return import_playwright.default.webkit;
|
|
16946
16983
|
} else {
|
|
16947
16984
|
throw new Error(`Invalid playwright browserType ${browserType}`);
|
|
16948
16985
|
}
|
|
@@ -16974,17 +17011,17 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
16974
17011
|
const actOptions = { closeFreePagesIntervalSeconds, maxBrowserContextsPerBrowser, maxPagesPerBrowserContext, maxPageFreeSeconds, timeout, args, executablePath, headless, incognito, proxy, proxyPerBrowserContext, userDataDir, userAgent };
|
|
16975
17012
|
let idx = args.findIndex((arg) => arg.toLowerCase().startsWith("--incoginto"));
|
|
16976
17013
|
if (idx >= 0) {
|
|
16977
|
-
|
|
17014
|
+
(0, import_utils15.logwarn)(`Please use options.incognito instead when launching new browser.`);
|
|
16978
17015
|
args.splice(idx, 1);
|
|
16979
17016
|
}
|
|
16980
17017
|
idx = args.findIndex((arg) => arg.toLowerCase().startsWith("--proxy-server"));
|
|
16981
17018
|
if (idx >= 0) {
|
|
16982
|
-
|
|
17019
|
+
(0, import_utils15.logwarn)(`Please use options.proxy instead when launching new browser.`);
|
|
16983
17020
|
args.splice(idx, 1);
|
|
16984
17021
|
}
|
|
16985
17022
|
idx = args.findIndex((arg) => arg.toLowerCase().startsWith("--user-data-dir"));
|
|
16986
17023
|
if (idx >= 0) {
|
|
16987
|
-
|
|
17024
|
+
(0, import_utils15.logwarn)(`Please use options.userDataDir instead when launching new browser.`);
|
|
16988
17025
|
args.splice(idx, 1);
|
|
16989
17026
|
}
|
|
16990
17027
|
if (browserControllerType === "playwright") {
|
|
@@ -17036,9 +17073,9 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
17036
17073
|
launchOptions.args = args;
|
|
17037
17074
|
}
|
|
17038
17075
|
if (!actOptions.executablePath) {
|
|
17039
|
-
actOptions.executablePath =
|
|
17076
|
+
actOptions.executablePath = import_puppeteer.default.executablePath();
|
|
17040
17077
|
}
|
|
17041
|
-
const browser = await
|
|
17078
|
+
const browser = await import_puppeteer.default.launch(launchOptions);
|
|
17042
17079
|
const lsdBrowser = new PuppeteerBrowser(browser, browserType, "launch", actOptions, this.#nextBrowserIdx++);
|
|
17043
17080
|
return lsdBrowser;
|
|
17044
17081
|
} else {
|
|
@@ -17057,7 +17094,7 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
17057
17094
|
return lsdBrowser;
|
|
17058
17095
|
} else if (browserControllerType === "puppeteer") {
|
|
17059
17096
|
this.#puppeteerProduct(browserType);
|
|
17060
|
-
const browser = await
|
|
17097
|
+
const browser = await import_puppeteer.default.connect({ browserURL: browserUrl });
|
|
17061
17098
|
const lsdBrowser = new PuppeteerBrowser(browser, browserType, "connect", options, this.#nextBrowserIdx++);
|
|
17062
17099
|
;
|
|
17063
17100
|
return lsdBrowser;
|
|
@@ -17067,7 +17104,8 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
17067
17104
|
}
|
|
17068
17105
|
};
|
|
17069
17106
|
var controller = new LsdBrowserController();
|
|
17070
|
-
export
|
|
17107
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
17108
|
+
0 && (module.exports = {
|
|
17071
17109
|
CheerioElement,
|
|
17072
17110
|
CheerioPage,
|
|
17073
17111
|
PlaywrightBrowser,
|
|
@@ -17079,4 +17117,4 @@ export {
|
|
|
17079
17117
|
PuppeteerElement,
|
|
17080
17118
|
PuppeteerPage,
|
|
17081
17119
|
defaultProxy
|
|
17082
|
-
};
|
|
17120
|
+
});
|