@letsscrapedata/controller 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{index.mjs → index.cjs} +216 -178
- package/dist/{index.d.mts → index.d.cts} +18 -13
- package/dist/index.d.ts +18 -13
- package/dist/index.js +196 -198
- package/package.json +3 -2
package/dist/index.js
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
"use strict";
|
|
2
1
|
var __create = Object.create;
|
|
3
2
|
var __defProp = Object.defineProperty;
|
|
4
3
|
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
@@ -28,13 +27,12 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
28
27
|
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
29
28
|
mod
|
|
30
29
|
));
|
|
31
|
-
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
32
30
|
|
|
33
31
|
// ../../node_modules/boolbase/index.js
|
|
34
32
|
var require_boolbase = __commonJS({
|
|
35
|
-
"../../node_modules/boolbase/index.js"(
|
|
33
|
+
"../../node_modules/boolbase/index.js"(exports, module) {
|
|
36
34
|
"use strict";
|
|
37
|
-
|
|
35
|
+
module.exports = {
|
|
38
36
|
trueFunc: function trueFunc2() {
|
|
39
37
|
return true;
|
|
40
38
|
},
|
|
@@ -45,23 +43,6 @@ var require_boolbase = __commonJS({
|
|
|
45
43
|
}
|
|
46
44
|
});
|
|
47
45
|
|
|
48
|
-
// src/index.ts
|
|
49
|
-
var src_exports = {};
|
|
50
|
-
__export(src_exports, {
|
|
51
|
-
CheerioElement: () => CheerioElement,
|
|
52
|
-
CheerioPage: () => CheerioPage,
|
|
53
|
-
PlaywrightBrowser: () => PlaywrightBrowser,
|
|
54
|
-
PlaywrightBrowserContext: () => PlaywrightBrowserContext,
|
|
55
|
-
PlaywrightElement: () => PlaywrightElement,
|
|
56
|
-
PlaywrightPage: () => PlaywrightPage,
|
|
57
|
-
PuppeteerBrowser: () => PuppeteerBrowser,
|
|
58
|
-
PuppeteerBrowserContext: () => PuppeteerBrowserContext,
|
|
59
|
-
PuppeteerElement: () => PuppeteerElement,
|
|
60
|
-
PuppeteerPage: () => PuppeteerPage,
|
|
61
|
-
defaultProxy: () => defaultProxy
|
|
62
|
-
});
|
|
63
|
-
module.exports = __toCommonJS(src_exports);
|
|
64
|
-
|
|
65
46
|
// src/types/types.ts
|
|
66
47
|
var defaultProxy = {
|
|
67
48
|
server: "default",
|
|
@@ -73,18 +54,18 @@ var defaultProxy = {
|
|
|
73
54
|
};
|
|
74
55
|
|
|
75
56
|
// src/playwright/browser.ts
|
|
76
|
-
|
|
57
|
+
import EventEmitter3 from "events";
|
|
77
58
|
|
|
78
59
|
// src/playwright/context.ts
|
|
79
|
-
|
|
80
|
-
|
|
60
|
+
import EventEmitter2 from "events";
|
|
61
|
+
import { getCurrentUnixTime as getCurrentUnixTime2, logerr as logerr2, loginfo as loginfo2, logwarn as logwarn2, sleep } from "@letsscrapedata/utils";
|
|
81
62
|
|
|
82
63
|
// src/playwright/page.ts
|
|
83
|
-
|
|
84
|
-
|
|
64
|
+
import EventEmitter from "events";
|
|
65
|
+
import { getCurrentUnixTime, logerr, loginfo, logwarn, unreachable as unreachable2 } from "@letsscrapedata/utils";
|
|
85
66
|
|
|
86
67
|
// src/playwright/element.ts
|
|
87
|
-
|
|
68
|
+
import { unreachable } from "@letsscrapedata/utils";
|
|
88
69
|
var PlaywrightElement = class _PlaywrightElement {
|
|
89
70
|
#frame;
|
|
90
71
|
#locator;
|
|
@@ -300,39 +281,21 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
300
281
|
}
|
|
301
282
|
break;
|
|
302
283
|
default:
|
|
303
|
-
|
|
284
|
+
unreachable(type);
|
|
304
285
|
}
|
|
305
286
|
return true;
|
|
306
287
|
}
|
|
307
288
|
async screenshot(options) {
|
|
308
289
|
return await this.#locator.screenshot(options);
|
|
309
290
|
}
|
|
310
|
-
async scrollBy(x, y) {
|
|
311
|
-
await this.#locator.page().evaluate(
|
|
312
|
-
([x2, y2]) => {
|
|
313
|
-
window.scrollBy(x2, y2);
|
|
314
|
-
},
|
|
315
|
-
[x, y]
|
|
316
|
-
);
|
|
317
|
-
return true;
|
|
318
|
-
}
|
|
319
291
|
async scrollIntoView() {
|
|
320
292
|
await this.#locator.scrollIntoViewIfNeeded();
|
|
321
293
|
return true;
|
|
322
294
|
}
|
|
323
|
-
async scrollTo(x, y) {
|
|
324
|
-
await this.#locator.page().evaluate(
|
|
325
|
-
([x2, y2]) => {
|
|
326
|
-
window.scrollTo(x2, y2);
|
|
327
|
-
},
|
|
328
|
-
[x, y]
|
|
329
|
-
);
|
|
330
|
-
return true;
|
|
331
|
-
}
|
|
332
295
|
};
|
|
333
296
|
|
|
334
297
|
// src/playwright/page.ts
|
|
335
|
-
var PlaywrightPage = class extends
|
|
298
|
+
var PlaywrightPage = class extends EventEmitter {
|
|
336
299
|
#lsdBrowserContext;
|
|
337
300
|
#page;
|
|
338
301
|
#status;
|
|
@@ -354,7 +317,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
354
317
|
const cookieItems = await this.#getCookies(page);
|
|
355
318
|
const domainSet = new Set(cookieItems.map((c) => c.domain));
|
|
356
319
|
if (domainSet.size !== 1) {
|
|
357
|
-
|
|
320
|
+
logwarn(`Domains in clearCookies: ${Array.from(domainSet.values())}`);
|
|
358
321
|
}
|
|
359
322
|
for (const domain of domainSet.values()) {
|
|
360
323
|
await browserContext.clearCookies({ domain });
|
|
@@ -506,9 +469,9 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
506
469
|
const page = this.#page;
|
|
507
470
|
const pageId = this.#pageId;
|
|
508
471
|
page.on("close", async () => {
|
|
509
|
-
|
|
472
|
+
loginfo(`##browser ${pageId} closed`);
|
|
510
473
|
if (!page.pageInfo) {
|
|
511
|
-
|
|
474
|
+
logerr(`Logic error in page.on("close")`);
|
|
512
475
|
}
|
|
513
476
|
this.emit("pageClose");
|
|
514
477
|
this.#lsdBrowserContext.emit("pageClose", this);
|
|
@@ -522,12 +485,12 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
522
485
|
popupPageId = `page-${browserIdx}-${browserContextIdx}-${pageIdx}`;
|
|
523
486
|
pageInfo.openType = "popup";
|
|
524
487
|
} else {
|
|
525
|
-
|
|
488
|
+
logerr(`##browser ${pageId} has popup without page.pageInfo`);
|
|
526
489
|
}
|
|
527
|
-
|
|
490
|
+
loginfo(`##browser ${pageId} has popup ${popupPageId}`);
|
|
528
491
|
this.emit("pagePopup", pageInfo);
|
|
529
492
|
} else {
|
|
530
|
-
|
|
493
|
+
logerr(`##browser ${pageId} has popup page with null page`);
|
|
531
494
|
}
|
|
532
495
|
});
|
|
533
496
|
}
|
|
@@ -539,7 +502,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
539
502
|
this.#lsdBrowserContext = browserContext;
|
|
540
503
|
this.#page = page;
|
|
541
504
|
this.#status = "free";
|
|
542
|
-
const currentTime =
|
|
505
|
+
const currentTime = getCurrentUnixTime();
|
|
543
506
|
const { browserIdx = 0, browserContextIdx = 0, pageIdx = 0, openType = "other", openTime = currentTime, lastStatusUpdateTime = currentTime, taskId = 0 } = pageInfo ? pageInfo : {};
|
|
544
507
|
this.#page.pageInfo = { browserIdx, browserContextIdx, pageIdx, openType, openTime, lastStatusUpdateTime, taskId };
|
|
545
508
|
this.#pageId = `page${browserIdx}-${browserContextIdx}-${pageIdx}`;
|
|
@@ -587,7 +550,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
587
550
|
}
|
|
588
551
|
return true;
|
|
589
552
|
} catch (err) {
|
|
590
|
-
|
|
553
|
+
logerr(err);
|
|
591
554
|
return false;
|
|
592
555
|
}
|
|
593
556
|
}
|
|
@@ -771,6 +734,30 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
771
734
|
}
|
|
772
735
|
return await this.#page.screenshot(options);
|
|
773
736
|
}
|
|
737
|
+
async scrollBy(x, y) {
|
|
738
|
+
if (!this.#page) {
|
|
739
|
+
throw new Error("No valid page");
|
|
740
|
+
}
|
|
741
|
+
await this.#page.evaluate(
|
|
742
|
+
([x2, y2]) => {
|
|
743
|
+
window.scrollBy(x2, y2);
|
|
744
|
+
},
|
|
745
|
+
[x, y]
|
|
746
|
+
);
|
|
747
|
+
return true;
|
|
748
|
+
}
|
|
749
|
+
async scrollTo(x, y) {
|
|
750
|
+
if (!this.#page) {
|
|
751
|
+
throw new Error("No valid page");
|
|
752
|
+
}
|
|
753
|
+
await this.#page.evaluate(
|
|
754
|
+
([x2, y2]) => {
|
|
755
|
+
window.scrollTo(x2, y2);
|
|
756
|
+
},
|
|
757
|
+
[x, y]
|
|
758
|
+
);
|
|
759
|
+
return true;
|
|
760
|
+
}
|
|
774
761
|
async setCookies(cookies) {
|
|
775
762
|
if (!this.#page) {
|
|
776
763
|
throw new Error("No valid page");
|
|
@@ -831,7 +818,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
831
818
|
}
|
|
832
819
|
return true;
|
|
833
820
|
} catch (err) {
|
|
834
|
-
|
|
821
|
+
logerr(err);
|
|
835
822
|
return false;
|
|
836
823
|
}
|
|
837
824
|
}
|
|
@@ -841,7 +828,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
841
828
|
}
|
|
842
829
|
const actOptions = Array.isArray(options) ? options : [options];
|
|
843
830
|
if (actOptions.length <= 0) {
|
|
844
|
-
|
|
831
|
+
logwarn("Invalid paras in setRequestInterception");
|
|
845
832
|
return false;
|
|
846
833
|
}
|
|
847
834
|
const firstRequestInterception = this.#resquestInterceptionOptions.length <= 0;
|
|
@@ -852,7 +839,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
852
839
|
this.#resquestInterceptionOptions.push(option);
|
|
853
840
|
break;
|
|
854
841
|
default:
|
|
855
|
-
(
|
|
842
|
+
unreachable2(option.action);
|
|
856
843
|
}
|
|
857
844
|
}
|
|
858
845
|
if (firstRequestInterception && this.#resquestInterceptionOptions.length > 0) {
|
|
@@ -876,7 +863,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
876
863
|
});
|
|
877
864
|
break;
|
|
878
865
|
default:
|
|
879
|
-
(
|
|
866
|
+
unreachable2(action);
|
|
880
867
|
}
|
|
881
868
|
return true;
|
|
882
869
|
} else {
|
|
@@ -885,7 +872,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
885
872
|
await route.continue();
|
|
886
873
|
return true;
|
|
887
874
|
} catch (err) {
|
|
888
|
-
|
|
875
|
+
logerr(err);
|
|
889
876
|
return false;
|
|
890
877
|
}
|
|
891
878
|
});
|
|
@@ -903,7 +890,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
903
890
|
return;
|
|
904
891
|
}
|
|
905
892
|
for (const option of this.#responseInterceptionOptions) {
|
|
906
|
-
const { requestMatch, responseMatch,
|
|
893
|
+
const { requestMatch, responseMatch, responseItems, handler, handlerOptions } = option;
|
|
907
894
|
let matchedFlag = !requestMatch || this.#checkRequestMatch(request, requestMatch);
|
|
908
895
|
if (matchedFlag && responseMatch) {
|
|
909
896
|
const { minLength, maxLength } = responseMatch;
|
|
@@ -916,20 +903,20 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
916
903
|
if (!matchedFlag) {
|
|
917
904
|
continue;
|
|
918
905
|
}
|
|
919
|
-
if (Array.isArray(
|
|
906
|
+
if (Array.isArray(responseItems)) {
|
|
920
907
|
const requestMethod = request.method();
|
|
921
908
|
const requestUrl = request.url();
|
|
922
909
|
const reqData2 = request.postData();
|
|
923
910
|
const requestData = reqData2 ? reqData2 : "";
|
|
924
911
|
const responseData = await response.text();
|
|
925
|
-
|
|
912
|
+
responseItems.push({
|
|
926
913
|
pageUrl,
|
|
927
914
|
requestMethod,
|
|
928
915
|
requestUrl,
|
|
929
916
|
requestData,
|
|
930
917
|
responseData
|
|
931
918
|
});
|
|
932
|
-
|
|
919
|
+
loginfo(`##browser cache matched response: ${requestUrl}`);
|
|
933
920
|
}
|
|
934
921
|
if (typeof handler === "function") {
|
|
935
922
|
await handler(response, handlerOptions);
|
|
@@ -937,7 +924,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
937
924
|
}
|
|
938
925
|
return;
|
|
939
926
|
} catch (err) {
|
|
940
|
-
|
|
927
|
+
logerr(err);
|
|
941
928
|
return;
|
|
942
929
|
}
|
|
943
930
|
}
|
|
@@ -952,7 +939,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
952
939
|
}
|
|
953
940
|
const firstResponseInterception = this.#responseInterceptionOptions.length <= 0;
|
|
954
941
|
for (const option of actOptions) {
|
|
955
|
-
if (option?.
|
|
942
|
+
if (option?.responseItems || option?.handler) {
|
|
956
943
|
this.#responseInterceptionOptions.push(option);
|
|
957
944
|
} else {
|
|
958
945
|
throw new Error(`Invalid ResponseInterceptionOption`);
|
|
@@ -1037,7 +1024,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
1037
1024
|
};
|
|
1038
1025
|
|
|
1039
1026
|
// src/playwright/context.ts
|
|
1040
|
-
var PlaywrightBrowserContext = class extends
|
|
1027
|
+
var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
1041
1028
|
#lsdBrowser;
|
|
1042
1029
|
#browserIdx;
|
|
1043
1030
|
#browserContextIdx;
|
|
@@ -1056,12 +1043,12 @@ var PlaywrightBrowserContext = class extends import_node_events2.default {
|
|
|
1056
1043
|
}
|
|
1057
1044
|
const pages = this.#browserContext.pages();
|
|
1058
1045
|
const openType = this.#lsdBrowser.browserCreationMethod();
|
|
1059
|
-
const lastStatusUpdateTime = (
|
|
1046
|
+
const lastStatusUpdateTime = getCurrentUnixTime2();
|
|
1060
1047
|
for (const page of pages) {
|
|
1061
1048
|
const pageInfo = { browserIdx: this.#browserIdx, browserContextIdx: this.#browserContextIdx, pageIdx: this.#nextPageIdx++, openType, openTime: this.#createTime, lastStatusUpdateTime, taskId: 0 };
|
|
1062
1049
|
const lsdPage = new PlaywrightPage(this, page, pageInfo);
|
|
1063
1050
|
this.#lsdPages.push(lsdPage);
|
|
1064
|
-
(
|
|
1051
|
+
loginfo2(`##browser ${lsdPage.id()} ${openType}ed`);
|
|
1065
1052
|
}
|
|
1066
1053
|
}
|
|
1067
1054
|
constructor(lsdBrowser, browserContext, incognito = false, proxy = null, browserIdx = 0, browserContextIdx = 0, maxPagesPerBrowserContext = 20, maxPageFreeSeconds = 0) {
|
|
@@ -1076,7 +1063,7 @@ var PlaywrightBrowserContext = class extends import_node_events2.default {
|
|
|
1076
1063
|
this.#browserIdx = browserIdx;
|
|
1077
1064
|
this.#browserContextIdx = browserContextIdx;
|
|
1078
1065
|
this.#browserContext = browserContext;
|
|
1079
|
-
this.#createTime = (
|
|
1066
|
+
this.#createTime = getCurrentUnixTime2();
|
|
1080
1067
|
this.#incognito = incognito === false ? false : true;
|
|
1081
1068
|
this.#proxy = proxy?.server ? proxy : null;
|
|
1082
1069
|
this.#maxPagesPerBrowserContext = maxPagesPerBrowserContext;
|
|
@@ -1089,29 +1076,29 @@ var PlaywrightBrowserContext = class extends import_node_events2.default {
|
|
|
1089
1076
|
const pageInfo = page.pageInfo;
|
|
1090
1077
|
if (pageInfo) {
|
|
1091
1078
|
const { browserIdx: browserIdx2, browserContextIdx: browserContextIdx2, pageIdx } = pageInfo;
|
|
1092
|
-
(
|
|
1079
|
+
logwarn2(`##browser page-${browserIdx2}-${browserContextIdx2}-${pageIdx} has been already created`);
|
|
1093
1080
|
} else {
|
|
1094
|
-
const currentTime = (
|
|
1081
|
+
const currentTime = getCurrentUnixTime2();
|
|
1095
1082
|
const pageInfo2 = { browserIdx: this.#browserIdx, browserContextIdx: this.#browserContextIdx, pageIdx: this.#nextPageIdx++, openType: "other", openTime: currentTime, lastStatusUpdateTime: currentTime, taskId: 0 };
|
|
1096
1083
|
const lsdPage = new PlaywrightPage(this, page, pageInfo2);
|
|
1097
1084
|
this.#lsdPages.push(lsdPage);
|
|
1098
|
-
(
|
|
1085
|
+
loginfo2(`##browser ${lsdPage.id()} created`);
|
|
1099
1086
|
}
|
|
1100
1087
|
});
|
|
1101
1088
|
browserContext.on("close", (bc) => {
|
|
1102
1089
|
if (browserContext !== bc) {
|
|
1103
|
-
(
|
|
1090
|
+
logerr2(`##browser different browserContext in browserContext.on("close")`);
|
|
1104
1091
|
}
|
|
1105
1092
|
this.#lsdBrowser.emit("browserContextClose", this);
|
|
1106
1093
|
});
|
|
1107
1094
|
this.on("pageClose", (lsdPage) => {
|
|
1108
1095
|
if (!(lsdPage instanceof PlaywrightPage)) {
|
|
1109
|
-
(
|
|
1096
|
+
logerr2(`Invalid data in LsdBrowserContext.on("pageClose)`);
|
|
1110
1097
|
return;
|
|
1111
1098
|
}
|
|
1112
1099
|
const idx = this.#lsdPages.findIndex((p) => p === lsdPage);
|
|
1113
1100
|
if (idx < 0) {
|
|
1114
|
-
(
|
|
1101
|
+
logerr2(`Invalid lsdPage in LsdBrowserContext.on("pageClose)`);
|
|
1115
1102
|
return;
|
|
1116
1103
|
}
|
|
1117
1104
|
this.#lsdPages.splice(idx, 1);
|
|
@@ -1134,15 +1121,15 @@ var PlaywrightBrowserContext = class extends import_node_events2.default {
|
|
|
1134
1121
|
this.#gettingPage = true;
|
|
1135
1122
|
return true;
|
|
1136
1123
|
} else {
|
|
1137
|
-
await
|
|
1124
|
+
await sleep(200);
|
|
1138
1125
|
}
|
|
1139
1126
|
}
|
|
1140
|
-
(
|
|
1127
|
+
logwarn2(`Cannot get the gettingLock.`);
|
|
1141
1128
|
return false;
|
|
1142
1129
|
}
|
|
1143
1130
|
#freeGettingLock() {
|
|
1144
1131
|
if (!this.#gettingPage) {
|
|
1145
|
-
(
|
|
1132
|
+
logwarn2(`Getting lock is already free now.`);
|
|
1146
1133
|
}
|
|
1147
1134
|
this.#gettingPage = false;
|
|
1148
1135
|
}
|
|
@@ -1151,7 +1138,7 @@ var PlaywrightBrowserContext = class extends import_node_events2.default {
|
|
|
1151
1138
|
maxPageFreeSeconds = this.#maxPageFreeSeconds;
|
|
1152
1139
|
}
|
|
1153
1140
|
if (maxPageFreeSeconds <= 0) {
|
|
1154
|
-
(
|
|
1141
|
+
logwarn2(`Please set valid maxPageFreeSeconds to close free pages`);
|
|
1155
1142
|
return false;
|
|
1156
1143
|
}
|
|
1157
1144
|
const gotLock = await this.#tryToGetGettingLock();
|
|
@@ -1159,7 +1146,7 @@ var PlaywrightBrowserContext = class extends import_node_events2.default {
|
|
|
1159
1146
|
return false;
|
|
1160
1147
|
}
|
|
1161
1148
|
try {
|
|
1162
|
-
const maxUpdateTime = (
|
|
1149
|
+
const maxUpdateTime = getCurrentUnixTime2() - this.#maxPageFreeSeconds;
|
|
1163
1150
|
let freePages = this.#lsdPages.filter((p) => p.isFree() && p.pageInfo().lastStatusUpdateTime < maxUpdateTime);
|
|
1164
1151
|
if (freePages.length === this.#lsdPages.length) {
|
|
1165
1152
|
freePages = freePages.slice(1);
|
|
@@ -1170,7 +1157,7 @@ var PlaywrightBrowserContext = class extends import_node_events2.default {
|
|
|
1170
1157
|
this.#freeGettingLock();
|
|
1171
1158
|
return true;
|
|
1172
1159
|
} catch (err) {
|
|
1173
|
-
(
|
|
1160
|
+
logerr2(err);
|
|
1174
1161
|
this.#freeGettingLock();
|
|
1175
1162
|
return false;
|
|
1176
1163
|
}
|
|
@@ -1211,7 +1198,7 @@ var PlaywrightBrowserContext = class extends import_node_events2.default {
|
|
|
1211
1198
|
return null;
|
|
1212
1199
|
}
|
|
1213
1200
|
} catch (err) {
|
|
1214
|
-
(
|
|
1201
|
+
logerr2(err);
|
|
1215
1202
|
this.#freeGettingLock();
|
|
1216
1203
|
return null;
|
|
1217
1204
|
}
|
|
@@ -1274,8 +1261,8 @@ var PlaywrightBrowserContext = class extends import_node_events2.default {
|
|
|
1274
1261
|
};
|
|
1275
1262
|
|
|
1276
1263
|
// src/playwright/browser.ts
|
|
1277
|
-
|
|
1278
|
-
var PlaywrightBrowser = class extends
|
|
1264
|
+
import { logerr as logerr3, loginfo as loginfo3, logwarn as logwarn3 } from "@letsscrapedata/utils";
|
|
1265
|
+
var PlaywrightBrowser = class extends EventEmitter3 {
|
|
1279
1266
|
#browser;
|
|
1280
1267
|
#browserIdx;
|
|
1281
1268
|
#lsdBrowserContexts;
|
|
@@ -1320,38 +1307,38 @@ var PlaywrightBrowser = class extends import_node_events3.default {
|
|
|
1320
1307
|
this.#executablePath = executablePath;
|
|
1321
1308
|
this.#nextBrowserContextIdx = 1;
|
|
1322
1309
|
this.#closeFreePagesIntervalId = null;
|
|
1323
|
-
(
|
|
1310
|
+
loginfo3(`##browser ${this.#browserType} ${this.id()} ${this.#browserCreationMethod}ed by ${this.#browserControllerType}`);
|
|
1324
1311
|
const browserContexts = browser.contexts();
|
|
1325
1312
|
if (browserContexts.length > 0) {
|
|
1326
|
-
(
|
|
1313
|
+
logwarn3(`There are ${browserContexts.length} new browserContexts when playwright launches new browser`);
|
|
1327
1314
|
}
|
|
1328
1315
|
const incognito = typeof options?.incognito === "boolean" ? options.incognito : true;
|
|
1329
1316
|
for (const browserContext of browserContexts) {
|
|
1330
1317
|
const lsdBrowserContext = new PlaywrightBrowserContext(this, browserContext, incognito, this.#proxy, this.#browserIdx++, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds());
|
|
1331
1318
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
1332
|
-
(
|
|
1319
|
+
loginfo3(`##browser ${lsdBrowserContext.id()} ${this.#browserCreationMethod}ed`);
|
|
1333
1320
|
}
|
|
1334
1321
|
browser.on("disconnected", () => {
|
|
1335
|
-
(
|
|
1322
|
+
loginfo3(`##browser ${this.id()} disconnected`);
|
|
1336
1323
|
if (this.#lsdBrowserContexts.length > 0) {
|
|
1337
|
-
(
|
|
1324
|
+
logerr3(`${this.id()} has browserContexts when disconnected`);
|
|
1338
1325
|
}
|
|
1339
1326
|
});
|
|
1340
1327
|
this.on("browserContextClose", (lsdBrowserContext) => {
|
|
1341
1328
|
if (!(lsdBrowserContext instanceof PlaywrightBrowserContext)) {
|
|
1342
|
-
(
|
|
1329
|
+
logerr3(`Invalid data in LsdBrowser.on("browserContextClose)`);
|
|
1343
1330
|
return;
|
|
1344
1331
|
}
|
|
1345
1332
|
const idx = this.#lsdBrowserContexts.findIndex((bc) => bc === lsdBrowserContext);
|
|
1346
1333
|
if (idx < 0) {
|
|
1347
|
-
(
|
|
1334
|
+
logerr3(`Invalid lsdBrowserContext in LsdBrowser.on("browserContextClose)`);
|
|
1348
1335
|
return;
|
|
1349
1336
|
}
|
|
1350
|
-
(
|
|
1337
|
+
loginfo3(`##browser ${lsdBrowserContext.id()} closed
|
|
1351
1338
|
`);
|
|
1352
1339
|
this.#lsdBrowserContexts.splice(idx, 1);
|
|
1353
1340
|
if (this.#lsdBrowserContexts.length === 0) {
|
|
1354
|
-
(
|
|
1341
|
+
loginfo3(`##browser ${this.id()} has no browserContexts now`);
|
|
1355
1342
|
}
|
|
1356
1343
|
return;
|
|
1357
1344
|
});
|
|
@@ -1369,7 +1356,7 @@ var PlaywrightBrowser = class extends import_node_events3.default {
|
|
|
1369
1356
|
// 常用方法(按常见调用顺序排序)
|
|
1370
1357
|
async newBrowserContext(options) {
|
|
1371
1358
|
if (this.#lsdBrowserContexts.length >= this.#maxBrowserContextsPerBrowser()) {
|
|
1372
|
-
(
|
|
1359
|
+
logwarn3(`##browser ${this.id()} can not create more new browserContext`);
|
|
1373
1360
|
return null;
|
|
1374
1361
|
}
|
|
1375
1362
|
const browserContextOptions = {};
|
|
@@ -1384,7 +1371,7 @@ var PlaywrightBrowser = class extends import_node_events3.default {
|
|
|
1384
1371
|
const browserContext = await this.#browser.newContext(browserContextOptions);
|
|
1385
1372
|
const lsdBrowserContext = new PlaywrightBrowserContext(this, browserContext, true, proxy, this.#browserIdx++, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds());
|
|
1386
1373
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
1387
|
-
(
|
|
1374
|
+
loginfo3(`##browser ${lsdBrowserContext.id()} created`);
|
|
1388
1375
|
return lsdBrowserContext;
|
|
1389
1376
|
}
|
|
1390
1377
|
async close() {
|
|
@@ -1435,16 +1422,16 @@ var PlaywrightBrowser = class extends import_node_events3.default {
|
|
|
1435
1422
|
};
|
|
1436
1423
|
|
|
1437
1424
|
// src/puppeteer/browser.ts
|
|
1438
|
-
|
|
1425
|
+
import EventEmitter6 from "events";
|
|
1439
1426
|
|
|
1440
1427
|
// src/puppeteer/context.ts
|
|
1441
|
-
|
|
1428
|
+
import EventEmitter5 from "events";
|
|
1442
1429
|
|
|
1443
1430
|
// src/puppeteer/page.ts
|
|
1444
|
-
|
|
1431
|
+
import EventEmitter4 from "events";
|
|
1445
1432
|
|
|
1446
1433
|
// src/puppeteer/element.ts
|
|
1447
|
-
|
|
1434
|
+
import { logerr as logerr4, unreachable as unreachable3 } from "@letsscrapedata/utils";
|
|
1448
1435
|
var PuppeteerElement = class _PuppeteerElement {
|
|
1449
1436
|
#frame;
|
|
1450
1437
|
#$ele;
|
|
@@ -1532,7 +1519,7 @@ var PuppeteerElement = class _PuppeteerElement {
|
|
|
1532
1519
|
}
|
|
1533
1520
|
return retObj;
|
|
1534
1521
|
} catch (err) {
|
|
1535
|
-
(
|
|
1522
|
+
logerr4(err);
|
|
1536
1523
|
return retObj;
|
|
1537
1524
|
}
|
|
1538
1525
|
}
|
|
@@ -1682,44 +1669,24 @@ var PuppeteerElement = class _PuppeteerElement {
|
|
|
1682
1669
|
}
|
|
1683
1670
|
break;
|
|
1684
1671
|
default:
|
|
1685
|
-
(
|
|
1672
|
+
unreachable3(type);
|
|
1686
1673
|
}
|
|
1687
1674
|
return true;
|
|
1688
1675
|
}
|
|
1689
1676
|
async screenshot(options) {
|
|
1690
1677
|
return await this.#$ele.screenshot(options);
|
|
1691
1678
|
}
|
|
1692
|
-
async scrollBy(x, y) {
|
|
1693
|
-
await this.#frame.evaluate(
|
|
1694
|
-
(x2, y2) => {
|
|
1695
|
-
window.scrollBy(x2, y2);
|
|
1696
|
-
},
|
|
1697
|
-
x,
|
|
1698
|
-
y
|
|
1699
|
-
);
|
|
1700
|
-
return true;
|
|
1701
|
-
}
|
|
1702
1679
|
async scrollIntoView() {
|
|
1703
1680
|
await this.#frame.evaluate((ele) => {
|
|
1704
1681
|
ele.scrollIntoView();
|
|
1705
1682
|
}, this.#$ele);
|
|
1706
1683
|
return true;
|
|
1707
1684
|
}
|
|
1708
|
-
async scrollTo(x, y) {
|
|
1709
|
-
await this.#frame.evaluate(
|
|
1710
|
-
(x2, y2) => {
|
|
1711
|
-
window.scrollTo(x2, y2);
|
|
1712
|
-
},
|
|
1713
|
-
x,
|
|
1714
|
-
y
|
|
1715
|
-
);
|
|
1716
|
-
return true;
|
|
1717
|
-
}
|
|
1718
1685
|
};
|
|
1719
1686
|
|
|
1720
1687
|
// src/puppeteer/page.ts
|
|
1721
|
-
|
|
1722
|
-
var PuppeteerPage = class extends
|
|
1688
|
+
import { getCurrentUnixTime as getCurrentUnixTime3, logerr as logerr5, loginfo as loginfo4, unreachable as unreachable4 } from "@letsscrapedata/utils";
|
|
1689
|
+
var PuppeteerPage = class extends EventEmitter4 {
|
|
1723
1690
|
#lsdBrowserContext;
|
|
1724
1691
|
#page;
|
|
1725
1692
|
#status;
|
|
@@ -1878,9 +1845,9 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
1878
1845
|
const page = this.#page;
|
|
1879
1846
|
const pageId = this.#pageId;
|
|
1880
1847
|
page.on("close", async () => {
|
|
1881
|
-
(
|
|
1848
|
+
loginfo4(`##browser ${pageId} closed`);
|
|
1882
1849
|
if (!page.pageInfo) {
|
|
1883
|
-
(
|
|
1850
|
+
logerr5(`Logic error in page.on("close")`);
|
|
1884
1851
|
}
|
|
1885
1852
|
this.emit("pageClose");
|
|
1886
1853
|
this.#lsdBrowserContext.emit("pageClose", this);
|
|
@@ -1894,12 +1861,12 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
1894
1861
|
popupPageId = `page-${browserIdx}-${browserContextIdx}-${pageIdx}`;
|
|
1895
1862
|
pageInfo.openType = "popup";
|
|
1896
1863
|
} else {
|
|
1897
|
-
(
|
|
1864
|
+
logerr5(`##browser ${pageId} has popup without page.pageInfo`);
|
|
1898
1865
|
}
|
|
1899
|
-
(
|
|
1866
|
+
loginfo4(`##browser ${pageId} has popup ${popupPageId}`);
|
|
1900
1867
|
this.emit("pagePopup", pageInfo);
|
|
1901
1868
|
} else {
|
|
1902
|
-
(
|
|
1869
|
+
logerr5(`##browser ${pageId} has popup page with null page`);
|
|
1903
1870
|
}
|
|
1904
1871
|
});
|
|
1905
1872
|
}
|
|
@@ -1911,7 +1878,7 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
1911
1878
|
this.#lsdBrowserContext = browserContext;
|
|
1912
1879
|
this.#page = page;
|
|
1913
1880
|
this.#status = "free";
|
|
1914
|
-
const currentTime = (
|
|
1881
|
+
const currentTime = getCurrentUnixTime3();
|
|
1915
1882
|
const { browserIdx = 0, browserContextIdx = 0, pageIdx = 0, openType = "other", openTime = currentTime, lastStatusUpdateTime = currentTime, taskId = 0 } = pageInfo ? pageInfo : {};
|
|
1916
1883
|
this.#page.pageInfo = { browserIdx, browserContextIdx, pageIdx, openType, openTime, lastStatusUpdateTime, taskId };
|
|
1917
1884
|
this.#pageId = `page${browserIdx}-${browserContextIdx}-${pageIdx}`;
|
|
@@ -2142,6 +2109,30 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2142
2109
|
}
|
|
2143
2110
|
return await this.#page.screenshot(options);
|
|
2144
2111
|
}
|
|
2112
|
+
async scrollBy(x, y) {
|
|
2113
|
+
if (!this.#page) {
|
|
2114
|
+
throw new Error("No valid page");
|
|
2115
|
+
}
|
|
2116
|
+
await this.#page.evaluate(
|
|
2117
|
+
([x2, y2]) => {
|
|
2118
|
+
window.scrollBy(x2, y2);
|
|
2119
|
+
},
|
|
2120
|
+
[x, y]
|
|
2121
|
+
);
|
|
2122
|
+
return true;
|
|
2123
|
+
}
|
|
2124
|
+
async scrollTo(x, y) {
|
|
2125
|
+
if (!this.#page) {
|
|
2126
|
+
throw new Error("No valid page");
|
|
2127
|
+
}
|
|
2128
|
+
await this.#page.evaluate(
|
|
2129
|
+
([x2, y2]) => {
|
|
2130
|
+
window.scrollTo(x2, y2);
|
|
2131
|
+
},
|
|
2132
|
+
[x, y]
|
|
2133
|
+
);
|
|
2134
|
+
return true;
|
|
2135
|
+
}
|
|
2145
2136
|
async setCookies(cookies) {
|
|
2146
2137
|
if (!this.#page) {
|
|
2147
2138
|
throw new Error("No valid page");
|
|
@@ -2239,7 +2230,7 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2239
2230
|
});
|
|
2240
2231
|
break;
|
|
2241
2232
|
default:
|
|
2242
|
-
(
|
|
2233
|
+
unreachable4(action);
|
|
2243
2234
|
}
|
|
2244
2235
|
return true;
|
|
2245
2236
|
}
|
|
@@ -2274,7 +2265,7 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2274
2265
|
return false;
|
|
2275
2266
|
}
|
|
2276
2267
|
for (const option of actOptions) {
|
|
2277
|
-
const { requestMatch, responseMatch,
|
|
2268
|
+
const { requestMatch, responseMatch, responseItems, handler, handlerOptions } = option;
|
|
2278
2269
|
let matchedFlag = !requestMatch || this.#checkRequestMatch(request, requestMatch);
|
|
2279
2270
|
if (matchedFlag && responseMatch) {
|
|
2280
2271
|
const { minLength, maxLength } = responseMatch;
|
|
@@ -2287,13 +2278,13 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2287
2278
|
if (!matchedFlag) {
|
|
2288
2279
|
continue;
|
|
2289
2280
|
}
|
|
2290
|
-
if (Array.isArray(
|
|
2281
|
+
if (Array.isArray(responseItems)) {
|
|
2291
2282
|
const requestMethod = request.method();
|
|
2292
2283
|
const requestUrl = request.url();
|
|
2293
2284
|
const reqData2 = request.postData();
|
|
2294
2285
|
const requestData = reqData2 ? reqData2 : "";
|
|
2295
2286
|
const responseData = await response.text();
|
|
2296
|
-
|
|
2287
|
+
responseItems.push({
|
|
2297
2288
|
pageUrl,
|
|
2298
2289
|
requestMethod,
|
|
2299
2290
|
requestUrl,
|
|
@@ -2388,8 +2379,8 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2388
2379
|
};
|
|
2389
2380
|
|
|
2390
2381
|
// src/puppeteer/context.ts
|
|
2391
|
-
|
|
2392
|
-
var PuppeteerBrowserContext = class extends
|
|
2382
|
+
import { getCurrentUnixTime as getCurrentUnixTime4, logerr as logerr6, loginfo as loginfo5, logwarn as logwarn4, sleep as sleep2 } from "@letsscrapedata/utils";
|
|
2383
|
+
var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
2393
2384
|
#lsdBrowser;
|
|
2394
2385
|
#browserIdx;
|
|
2395
2386
|
#browserContextIdx;
|
|
@@ -2413,7 +2404,7 @@ var PuppeteerBrowserContext = class extends import_node_events5.default {
|
|
|
2413
2404
|
}
|
|
2414
2405
|
const pages = await this.#browserContext.pages();
|
|
2415
2406
|
const openType = this.#lsdBrowser.browserCreationMethod();
|
|
2416
|
-
const lastStatusUpdateTime = (
|
|
2407
|
+
const lastStatusUpdateTime = getCurrentUnixTime4();
|
|
2417
2408
|
for (const page of pages) {
|
|
2418
2409
|
const pageInfo = { browserIdx: this.#browserIdx, browserContextIdx: this.#browserContextIdx, pageIdx: this.#nextPageIdx++, openType, openTime: this.#createTime, lastStatusUpdateTime, taskId: 0 };
|
|
2419
2410
|
const lsdPage = new PuppeteerPage(this, page, pageInfo);
|
|
@@ -2421,7 +2412,7 @@ var PuppeteerBrowserContext = class extends import_node_events5.default {
|
|
|
2421
2412
|
await lsdPage.setUserAgent(this.#userAgent);
|
|
2422
2413
|
}
|
|
2423
2414
|
this.#lsdPages.push(lsdPage);
|
|
2424
|
-
(
|
|
2415
|
+
loginfo5(`##browser ${lsdPage.id()} ${openType}ed`);
|
|
2425
2416
|
}
|
|
2426
2417
|
}
|
|
2427
2418
|
constructor(lsdBrowser, browserContext, incognito = false, proxy = null, browserIdx = 0, browserContextIdx = 0, maxPagesPerBrowserContext = 20, maxPageFreeSeconds = 0, userAgent = "") {
|
|
@@ -2437,7 +2428,7 @@ var PuppeteerBrowserContext = class extends import_node_events5.default {
|
|
|
2437
2428
|
this.#browserContextIdx = browserContextIdx;
|
|
2438
2429
|
this.#browserContext = browserContext;
|
|
2439
2430
|
this.#userAgent = userAgent;
|
|
2440
|
-
this.#createTime = (
|
|
2431
|
+
this.#createTime = getCurrentUnixTime4();
|
|
2441
2432
|
this.#incognito = incognito === false ? false : true;
|
|
2442
2433
|
this.#proxy = proxy?.server ? proxy : null;
|
|
2443
2434
|
this.#maxPagesPerBrowserContext = maxPagesPerBrowserContext;
|
|
@@ -2455,27 +2446,27 @@ var PuppeteerBrowserContext = class extends import_node_events5.default {
|
|
|
2455
2446
|
const pageInfo = page.pageInfo;
|
|
2456
2447
|
if (pageInfo) {
|
|
2457
2448
|
const { browserIdx: browserIdx2, browserContextIdx: browserContextIdx2, pageIdx } = pageInfo;
|
|
2458
|
-
(
|
|
2449
|
+
logwarn4(`##browser page-${browserIdx2}-${browserContextIdx2}-${pageIdx} has been already created`);
|
|
2459
2450
|
} else {
|
|
2460
|
-
const currentTime = (
|
|
2451
|
+
const currentTime = getCurrentUnixTime4();
|
|
2461
2452
|
const pageInfo2 = { browserIdx: this.#browserIdx, browserContextIdx: this.#browserContextIdx, pageIdx: this.#nextPageIdx++, openType: "other", openTime: currentTime, lastStatusUpdateTime: currentTime, taskId: 0 };
|
|
2462
2453
|
const lsdPage = new PuppeteerPage(this, page, pageInfo2);
|
|
2463
2454
|
if (this.#userAgent) {
|
|
2464
2455
|
await lsdPage.setUserAgent(this.#userAgent);
|
|
2465
2456
|
}
|
|
2466
2457
|
this.#lsdPages.push(lsdPage);
|
|
2467
|
-
(
|
|
2458
|
+
loginfo5(`##browser ${lsdPage.id()} created`);
|
|
2468
2459
|
}
|
|
2469
2460
|
}
|
|
2470
2461
|
});
|
|
2471
2462
|
this.on("pageClose", (lsdPage) => {
|
|
2472
2463
|
if (!(lsdPage instanceof PuppeteerPage)) {
|
|
2473
|
-
(
|
|
2464
|
+
logerr6(`Invalid data in LsdBrowserContext.on("pageClose)`);
|
|
2474
2465
|
return;
|
|
2475
2466
|
}
|
|
2476
2467
|
const idx = this.#lsdPages.findIndex((p) => p === lsdPage);
|
|
2477
2468
|
if (idx < 0) {
|
|
2478
|
-
(
|
|
2469
|
+
logerr6(`Invalid lsdPage in LsdBrowserContext.on("pageClose)`);
|
|
2479
2470
|
return;
|
|
2480
2471
|
}
|
|
2481
2472
|
this.#lsdPages.splice(idx, 1);
|
|
@@ -2501,15 +2492,15 @@ var PuppeteerBrowserContext = class extends import_node_events5.default {
|
|
|
2501
2492
|
this.#gettingPage = true;
|
|
2502
2493
|
return true;
|
|
2503
2494
|
} else {
|
|
2504
|
-
await (
|
|
2495
|
+
await sleep2(200);
|
|
2505
2496
|
}
|
|
2506
2497
|
}
|
|
2507
|
-
(
|
|
2498
|
+
logwarn4(`Cannot get the gettingLock.`);
|
|
2508
2499
|
return false;
|
|
2509
2500
|
}
|
|
2510
2501
|
#freeGettingLock() {
|
|
2511
2502
|
if (!this.#gettingPage) {
|
|
2512
|
-
(
|
|
2503
|
+
logwarn4(`Getting lock is already free now.`);
|
|
2513
2504
|
}
|
|
2514
2505
|
this.#gettingPage = false;
|
|
2515
2506
|
}
|
|
@@ -2518,7 +2509,7 @@ var PuppeteerBrowserContext = class extends import_node_events5.default {
|
|
|
2518
2509
|
maxPageFreeSeconds = this.#maxPageFreeSeconds;
|
|
2519
2510
|
}
|
|
2520
2511
|
if (maxPageFreeSeconds <= 0) {
|
|
2521
|
-
(
|
|
2512
|
+
logwarn4(`Please set valid maxPageFreeSeconds to close free pages`);
|
|
2522
2513
|
return false;
|
|
2523
2514
|
}
|
|
2524
2515
|
const gotLock = await this.#tryToGetGettingLock();
|
|
@@ -2526,7 +2517,7 @@ var PuppeteerBrowserContext = class extends import_node_events5.default {
|
|
|
2526
2517
|
return false;
|
|
2527
2518
|
}
|
|
2528
2519
|
try {
|
|
2529
|
-
const maxUpdateTime = (
|
|
2520
|
+
const maxUpdateTime = getCurrentUnixTime4() - this.#maxPageFreeSeconds;
|
|
2530
2521
|
let freePages = this.#lsdPages.filter((p) => p.isFree() && p.pageInfo().lastStatusUpdateTime < maxUpdateTime);
|
|
2531
2522
|
if (freePages.length === this.#lsdPages.length) {
|
|
2532
2523
|
freePages = freePages.slice(1);
|
|
@@ -2537,7 +2528,7 @@ var PuppeteerBrowserContext = class extends import_node_events5.default {
|
|
|
2537
2528
|
this.#freeGettingLock();
|
|
2538
2529
|
return true;
|
|
2539
2530
|
} catch (err) {
|
|
2540
|
-
(
|
|
2531
|
+
logerr6(err);
|
|
2541
2532
|
this.#freeGettingLock();
|
|
2542
2533
|
return false;
|
|
2543
2534
|
}
|
|
@@ -2552,7 +2543,7 @@ var PuppeteerBrowserContext = class extends import_node_events5.default {
|
|
|
2552
2543
|
}
|
|
2553
2544
|
try {
|
|
2554
2545
|
if (this.#lsdPages.length === 0) {
|
|
2555
|
-
await (
|
|
2546
|
+
await sleep2(1e3);
|
|
2556
2547
|
}
|
|
2557
2548
|
let lsdPage = this.#lsdPages.find((p) => p.isFree());
|
|
2558
2549
|
if (lsdPage) {
|
|
@@ -2580,7 +2571,7 @@ var PuppeteerBrowserContext = class extends import_node_events5.default {
|
|
|
2580
2571
|
return null;
|
|
2581
2572
|
}
|
|
2582
2573
|
} catch (err) {
|
|
2583
|
-
(
|
|
2574
|
+
logerr6(err);
|
|
2584
2575
|
this.#freeGettingLock();
|
|
2585
2576
|
return null;
|
|
2586
2577
|
}
|
|
@@ -2643,8 +2634,8 @@ var PuppeteerBrowserContext = class extends import_node_events5.default {
|
|
|
2643
2634
|
};
|
|
2644
2635
|
|
|
2645
2636
|
// src/puppeteer/browser.ts
|
|
2646
|
-
|
|
2647
|
-
var PuppeteerBrowser = class extends
|
|
2637
|
+
import { logerr as logerr7, loginfo as loginfo6, logwarn as logwarn5 } from "@letsscrapedata/utils";
|
|
2638
|
+
var PuppeteerBrowser = class extends EventEmitter6 {
|
|
2648
2639
|
#browser;
|
|
2649
2640
|
#browserIdx;
|
|
2650
2641
|
#lsdBrowserContexts;
|
|
@@ -2692,35 +2683,35 @@ var PuppeteerBrowser = class extends import_node_events6.default {
|
|
|
2692
2683
|
this.#executablePath = executablePath;
|
|
2693
2684
|
this.#nextBrowserContextIdx = 1;
|
|
2694
2685
|
this.#closeFreePagesIntervalId = null;
|
|
2695
|
-
(
|
|
2686
|
+
loginfo6(`##browser ${this.#browserType} ${this.id()} ${this.#browserCreationMethod}ed by ${this.#browserControllerType}`);
|
|
2696
2687
|
const browserContexts = browser.browserContexts();
|
|
2697
2688
|
const incognito = typeof options?.incognito === "boolean" ? options.incognito : false;
|
|
2698
2689
|
for (const browserContext of browserContexts) {
|
|
2699
2690
|
const lsdBrowserContext = new PuppeteerBrowserContext(this, browserContext, incognito, this.#proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), this.#userAgent());
|
|
2700
2691
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
2701
|
-
(
|
|
2692
|
+
loginfo6(`##browser ${lsdBrowserContext.id()} ${this.#browserCreationMethod}ed`);
|
|
2702
2693
|
}
|
|
2703
2694
|
browser.on("disconnected", () => {
|
|
2704
|
-
(
|
|
2695
|
+
loginfo6(`##browser ${this.id()} disconnected`);
|
|
2705
2696
|
if (this.#lsdBrowserContexts.length > 0) {
|
|
2706
|
-
(
|
|
2697
|
+
logerr7(`${this.id()} has browserContexts when disconnected`);
|
|
2707
2698
|
}
|
|
2708
2699
|
});
|
|
2709
2700
|
this.on("browserContextClose", (lsdBrowserContext) => {
|
|
2710
2701
|
if (!(lsdBrowserContext instanceof PuppeteerBrowserContext)) {
|
|
2711
|
-
(
|
|
2702
|
+
logerr7(`Invalid data in LsdBrowser.on("browserContextClose)`);
|
|
2712
2703
|
return;
|
|
2713
2704
|
}
|
|
2714
2705
|
const idx = this.#lsdBrowserContexts.findIndex((bc) => bc === lsdBrowserContext);
|
|
2715
2706
|
if (idx < 0) {
|
|
2716
|
-
(
|
|
2707
|
+
logerr7(`Invalid lsdBrowserContext in LsdBrowser.on("browserContextClose)`);
|
|
2717
2708
|
return;
|
|
2718
2709
|
}
|
|
2719
|
-
(
|
|
2710
|
+
loginfo6(`##browser ${lsdBrowserContext.id()} closed
|
|
2720
2711
|
`);
|
|
2721
2712
|
this.#lsdBrowserContexts.splice(idx, 1);
|
|
2722
2713
|
if (this.#lsdBrowserContexts.length === 0) {
|
|
2723
|
-
(
|
|
2714
|
+
loginfo6(`##browser ${this.id()} has no browserContexts now`);
|
|
2724
2715
|
}
|
|
2725
2716
|
return;
|
|
2726
2717
|
});
|
|
@@ -2738,7 +2729,7 @@ var PuppeteerBrowser = class extends import_node_events6.default {
|
|
|
2738
2729
|
// 常用方法(按常见调用顺序排序)
|
|
2739
2730
|
async newBrowserContext(options) {
|
|
2740
2731
|
if (this.#lsdBrowserContexts.length >= this.#maxBrowserContextsPerBrowser()) {
|
|
2741
|
-
(
|
|
2732
|
+
logwarn5(`##browser ${this.id()} can not create more new browserContext`);
|
|
2742
2733
|
return null;
|
|
2743
2734
|
}
|
|
2744
2735
|
const browserContextOptions = {};
|
|
@@ -2750,7 +2741,7 @@ var PuppeteerBrowser = class extends import_node_events6.default {
|
|
|
2750
2741
|
const userAgent = options?.userAgent ? options.userAgent : "";
|
|
2751
2742
|
const lsdBrowserContext = new PuppeteerBrowserContext(this, browserContext, true, proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), userAgent);
|
|
2752
2743
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
2753
|
-
(
|
|
2744
|
+
loginfo6(`##browser ${lsdBrowserContext.id()} created`);
|
|
2754
2745
|
return lsdBrowserContext;
|
|
2755
2746
|
}
|
|
2756
2747
|
async close() {
|
|
@@ -2803,7 +2794,7 @@ var PuppeteerBrowser = class extends import_node_events6.default {
|
|
|
2803
2794
|
};
|
|
2804
2795
|
|
|
2805
2796
|
// src/cheerio/page.ts
|
|
2806
|
-
|
|
2797
|
+
import EventEmitter7 from "events";
|
|
2807
2798
|
|
|
2808
2799
|
// ../../node_modules/cheerio/lib/esm/options.js
|
|
2809
2800
|
var defaultOpts = {
|
|
@@ -16675,29 +16666,34 @@ var CheerioElement = class _CheerioElement {
|
|
|
16675
16666
|
return Array.from(Object.keys(element.attribs));
|
|
16676
16667
|
}
|
|
16677
16668
|
}
|
|
16678
|
-
#findNodes(selector) {
|
|
16669
|
+
#findNodes(selector, absolute) {
|
|
16679
16670
|
if (selector.startsWith("./") || selector.startsWith("/")) {
|
|
16680
16671
|
throw new Error("Do not support XPath in cheerio.");
|
|
16681
16672
|
}
|
|
16682
16673
|
if (selector === ".") {
|
|
16683
16674
|
return [this.#node];
|
|
16684
16675
|
}
|
|
16685
|
-
const
|
|
16686
|
-
|
|
16687
|
-
|
|
16676
|
+
const nodes = [];
|
|
16677
|
+
const cheerioNode = !absolute ? this.#node.find(selector) : this.#node._root?.find(selector);
|
|
16678
|
+
if (!cheerioNode) {
|
|
16679
|
+
return nodes;
|
|
16680
|
+
} else if (cheerioNode.length > 0) {
|
|
16688
16681
|
const len = cheerioNode.length;
|
|
16689
16682
|
for (let i = 0; i < len; i++) {
|
|
16690
16683
|
nodes.push(cheerioNode.eq(i));
|
|
16691
16684
|
}
|
|
16692
16685
|
return nodes;
|
|
16693
16686
|
} else {
|
|
16694
|
-
return
|
|
16687
|
+
return nodes;
|
|
16695
16688
|
}
|
|
16696
16689
|
}
|
|
16697
|
-
async findElement(selectorOrXpath) {
|
|
16690
|
+
async findElement(selectorOrXpath, iframeOptions = [], absolute = false) {
|
|
16691
|
+
if (!iframeOptions) {
|
|
16692
|
+
return null;
|
|
16693
|
+
}
|
|
16698
16694
|
const selectors = typeof selectorOrXpath === "string" ? [selectorOrXpath] : selectorOrXpath;
|
|
16699
16695
|
for (const selector of selectors) {
|
|
16700
|
-
const nodes = this.#findNodes(selector);
|
|
16696
|
+
const nodes = this.#findNodes(selector, absolute);
|
|
16701
16697
|
if (nodes.length > 0) {
|
|
16702
16698
|
const cheerioElement = new _CheerioElement(nodes[0]);
|
|
16703
16699
|
return cheerioElement;
|
|
@@ -16705,10 +16701,13 @@ var CheerioElement = class _CheerioElement {
|
|
|
16705
16701
|
}
|
|
16706
16702
|
return null;
|
|
16707
16703
|
}
|
|
16708
|
-
async findElements(selectorOrXpath) {
|
|
16704
|
+
async findElements(selectorOrXpath, iframeOptions = [], absolute = false) {
|
|
16705
|
+
if (!iframeOptions) {
|
|
16706
|
+
return [];
|
|
16707
|
+
}
|
|
16709
16708
|
const selectors = typeof selectorOrXpath === "string" ? [selectorOrXpath] : selectorOrXpath;
|
|
16710
16709
|
for (const selector of selectors) {
|
|
16711
|
-
const nodes = this.#findNodes(selector);
|
|
16710
|
+
const nodes = this.#findNodes(selector, absolute);
|
|
16712
16711
|
if (nodes.length > 0) {
|
|
16713
16712
|
const cheerioElements = nodes.map((node) => new _CheerioElement(node));
|
|
16714
16713
|
return cheerioElements;
|
|
@@ -16757,19 +16756,13 @@ var CheerioElement = class _CheerioElement {
|
|
|
16757
16756
|
async screenshot() {
|
|
16758
16757
|
throw new Error("Not supported in CheerioElement.");
|
|
16759
16758
|
}
|
|
16760
|
-
async scrollBy() {
|
|
16761
|
-
throw new Error("Not supported in CheerioElement.");
|
|
16762
|
-
}
|
|
16763
16759
|
async scrollIntoView() {
|
|
16764
16760
|
throw new Error("Not supported in CheerioElement.");
|
|
16765
16761
|
}
|
|
16766
|
-
async scrollTo() {
|
|
16767
|
-
throw new Error("Not supported in CheerioElement.");
|
|
16768
|
-
}
|
|
16769
16762
|
};
|
|
16770
16763
|
|
|
16771
16764
|
// src/cheerio/page.ts
|
|
16772
|
-
var CheerioPage = class extends
|
|
16765
|
+
var CheerioPage = class extends EventEmitter7 {
|
|
16773
16766
|
#document;
|
|
16774
16767
|
constructor(html3 = "") {
|
|
16775
16768
|
super();
|
|
@@ -16886,6 +16879,12 @@ var CheerioPage = class extends import_node_events7.default {
|
|
|
16886
16879
|
async screenshot() {
|
|
16887
16880
|
throw new Error("Not supported in CheerioPage.");
|
|
16888
16881
|
}
|
|
16882
|
+
async scrollBy() {
|
|
16883
|
+
throw new Error("Not supported in CheerioElement.");
|
|
16884
|
+
}
|
|
16885
|
+
async scrollTo() {
|
|
16886
|
+
throw new Error("Not supported in CheerioElement.");
|
|
16887
|
+
}
|
|
16889
16888
|
async setCookies() {
|
|
16890
16889
|
throw new Error("Not supported in CheerioPage.");
|
|
16891
16890
|
}
|
|
@@ -16934,10 +16933,10 @@ var CheerioPage = class extends import_node_events7.default {
|
|
|
16934
16933
|
};
|
|
16935
16934
|
|
|
16936
16935
|
// src/controller/controller.ts
|
|
16937
|
-
|
|
16938
|
-
|
|
16939
|
-
|
|
16940
|
-
|
|
16936
|
+
import os from "os";
|
|
16937
|
+
import puppeteer from "puppeteer";
|
|
16938
|
+
import playwright from "playwright";
|
|
16939
|
+
import { logwarn as logwarn6 } from "@letsscrapedata/utils";
|
|
16941
16940
|
var LsdBrowserController = class _LsdBrowserController {
|
|
16942
16941
|
static #forbidConstructor = false;
|
|
16943
16942
|
#nextBrowserIdx;
|
|
@@ -16949,19 +16948,19 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
16949
16948
|
if (_LsdBrowserController.#forbidConstructor) {
|
|
16950
16949
|
throw new Error("Only one LsdBrowserController instance can be created!");
|
|
16951
16950
|
}
|
|
16952
|
-
this.#osPlatform =
|
|
16951
|
+
this.#osPlatform = os.platform();
|
|
16953
16952
|
this.#nextBrowserIdx = 1;
|
|
16954
16953
|
_LsdBrowserController.#forbidConstructor = true;
|
|
16955
16954
|
}
|
|
16956
16955
|
#playwrightBrowserType(browserType, connectFlag = false) {
|
|
16957
16956
|
if (browserType === "chromium") {
|
|
16958
|
-
return
|
|
16957
|
+
return playwright.chromium;
|
|
16959
16958
|
} else if (connectFlag) {
|
|
16960
16959
|
throw new Error(`playwright only can connect to chromium browser, not support ${browserType} browser`);
|
|
16961
16960
|
} else if (browserType === "firefox") {
|
|
16962
|
-
return
|
|
16961
|
+
return playwright.firefox;
|
|
16963
16962
|
} else if (browserType === "webkit") {
|
|
16964
|
-
return
|
|
16963
|
+
return playwright.webkit;
|
|
16965
16964
|
} else {
|
|
16966
16965
|
throw new Error(`Invalid playwright browserType ${browserType}`);
|
|
16967
16966
|
}
|
|
@@ -16993,17 +16992,17 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
16993
16992
|
const actOptions = { closeFreePagesIntervalSeconds, maxBrowserContextsPerBrowser, maxPagesPerBrowserContext, maxPageFreeSeconds, timeout, args, executablePath, headless, incognito, proxy, proxyPerBrowserContext, userDataDir, userAgent };
|
|
16994
16993
|
let idx = args.findIndex((arg) => arg.toLowerCase().startsWith("--incoginto"));
|
|
16995
16994
|
if (idx >= 0) {
|
|
16996
|
-
(
|
|
16995
|
+
logwarn6(`Please use options.incognito instead when launching new browser.`);
|
|
16997
16996
|
args.splice(idx, 1);
|
|
16998
16997
|
}
|
|
16999
16998
|
idx = args.findIndex((arg) => arg.toLowerCase().startsWith("--proxy-server"));
|
|
17000
16999
|
if (idx >= 0) {
|
|
17001
|
-
(
|
|
17000
|
+
logwarn6(`Please use options.proxy instead when launching new browser.`);
|
|
17002
17001
|
args.splice(idx, 1);
|
|
17003
17002
|
}
|
|
17004
17003
|
idx = args.findIndex((arg) => arg.toLowerCase().startsWith("--user-data-dir"));
|
|
17005
17004
|
if (idx >= 0) {
|
|
17006
|
-
(
|
|
17005
|
+
logwarn6(`Please use options.userDataDir instead when launching new browser.`);
|
|
17007
17006
|
args.splice(idx, 1);
|
|
17008
17007
|
}
|
|
17009
17008
|
if (browserControllerType === "playwright") {
|
|
@@ -17055,9 +17054,9 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
17055
17054
|
launchOptions.args = args;
|
|
17056
17055
|
}
|
|
17057
17056
|
if (!actOptions.executablePath) {
|
|
17058
|
-
actOptions.executablePath =
|
|
17057
|
+
actOptions.executablePath = puppeteer.executablePath();
|
|
17059
17058
|
}
|
|
17060
|
-
const browser = await
|
|
17059
|
+
const browser = await puppeteer.launch(launchOptions);
|
|
17061
17060
|
const lsdBrowser = new PuppeteerBrowser(browser, browserType, "launch", actOptions, this.#nextBrowserIdx++);
|
|
17062
17061
|
return lsdBrowser;
|
|
17063
17062
|
} else {
|
|
@@ -17076,7 +17075,7 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
17076
17075
|
return lsdBrowser;
|
|
17077
17076
|
} else if (browserControllerType === "puppeteer") {
|
|
17078
17077
|
this.#puppeteerProduct(browserType);
|
|
17079
|
-
const browser = await
|
|
17078
|
+
const browser = await puppeteer.connect({ browserURL: browserUrl });
|
|
17080
17079
|
const lsdBrowser = new PuppeteerBrowser(browser, browserType, "connect", options, this.#nextBrowserIdx++);
|
|
17081
17080
|
;
|
|
17082
17081
|
return lsdBrowser;
|
|
@@ -17086,8 +17085,7 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
17086
17085
|
}
|
|
17087
17086
|
};
|
|
17088
17087
|
var controller = new LsdBrowserController();
|
|
17089
|
-
|
|
17090
|
-
0 && (module.exports = {
|
|
17088
|
+
export {
|
|
17091
17089
|
CheerioElement,
|
|
17092
17090
|
CheerioPage,
|
|
17093
17091
|
PlaywrightBrowser,
|
|
@@ -17099,4 +17097,4 @@ var controller = new LsdBrowserController();
|
|
|
17099
17097
|
PuppeteerElement,
|
|
17100
17098
|
PuppeteerPage,
|
|
17101
17099
|
defaultProxy
|
|
17102
|
-
}
|
|
17100
|
+
};
|