@letsscrapedata/controller 0.0.51 → 0.0.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +279 -56
- package/dist/index.d.cts +117 -10
- package/dist/index.d.ts +117 -10
- package/dist/index.js +279 -56
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -78,6 +78,32 @@ var import_utils4 = require("@letsscrapedata/utils");
|
|
|
78
78
|
var import_node_events = __toESM(require("events"), 1);
|
|
79
79
|
var import_utils3 = require("@letsscrapedata/utils");
|
|
80
80
|
|
|
81
|
+
// src/utils/common.ts
|
|
82
|
+
function convertDataAttributeName(attr) {
|
|
83
|
+
if (!attr.startsWith("data-")) {
|
|
84
|
+
return "";
|
|
85
|
+
}
|
|
86
|
+
const parts = attr.split("-");
|
|
87
|
+
let name = parts[1];
|
|
88
|
+
for (const part of parts.slice(2)) {
|
|
89
|
+
if (!part) {
|
|
90
|
+
continue;
|
|
91
|
+
}
|
|
92
|
+
name = `${name}${part[1].toUpperCase()}${part.slice(1).toLowerCase()}`;
|
|
93
|
+
}
|
|
94
|
+
return name;
|
|
95
|
+
}
|
|
96
|
+
function getIframeSelector(iframeOption) {
|
|
97
|
+
const { src = "", id = "", selector = "" } = iframeOption;
|
|
98
|
+
if (typeof src === "string" && src) {
|
|
99
|
+
return `iframe[src^="${src}"]`;
|
|
100
|
+
} else if (typeof id === "string" && id) {
|
|
101
|
+
return `iframe[id="${id}"]`;
|
|
102
|
+
} else {
|
|
103
|
+
return selector;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
81
107
|
// src/playwright/element.ts
|
|
82
108
|
var import_utils2 = require("@letsscrapedata/utils");
|
|
83
109
|
var PlaywrightElement = class _PlaywrightElement {
|
|
@@ -98,6 +124,29 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
98
124
|
const names = await this.#locator.evaluate((node) => node.getAttributeNames());
|
|
99
125
|
return names;
|
|
100
126
|
}
|
|
127
|
+
async dataset() {
|
|
128
|
+
try {
|
|
129
|
+
const dataset = await this.#locator.evaluate((node) => node.dataset);
|
|
130
|
+
return dataset;
|
|
131
|
+
} catch (err) {
|
|
132
|
+
return {};
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
async evaluate(func, args) {
|
|
136
|
+
try {
|
|
137
|
+
const frame = this.#frame;
|
|
138
|
+
;
|
|
139
|
+
if (typeof frame.parentFrame === "function") {
|
|
140
|
+
return await frame.evaluate(func, args);
|
|
141
|
+
} else {
|
|
142
|
+
const locator = this.#frame.owner();
|
|
143
|
+
return await locator.evaluate(func, args);
|
|
144
|
+
}
|
|
145
|
+
} catch (err) {
|
|
146
|
+
logerr(err);
|
|
147
|
+
return "";
|
|
148
|
+
}
|
|
149
|
+
}
|
|
101
150
|
/*
|
|
102
151
|
async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
|
|
103
152
|
if (!parentFrame) {
|
|
@@ -131,22 +180,15 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
131
180
|
return null;
|
|
132
181
|
}
|
|
133
182
|
*/
|
|
134
|
-
#getIframeSelector(iframeOption) {
|
|
135
|
-
const { src = "", selector = "" } = iframeOption;
|
|
136
|
-
if (!src && !selector) {
|
|
137
|
-
throw new Error("Invalid parent frame");
|
|
138
|
-
}
|
|
139
|
-
return selector ? selector : `iframe[src^="${src}"]`;
|
|
140
|
-
}
|
|
141
183
|
async #getChildFrameLocator(parent, iframeOption) {
|
|
142
|
-
return parent.frameLocator(
|
|
184
|
+
return parent.frameLocator(getIframeSelector(iframeOption));
|
|
143
185
|
}
|
|
144
186
|
async #getDescendantFrame(parent, iframeOptions) {
|
|
145
187
|
try {
|
|
146
188
|
if (iframeOptions.length <= 0) {
|
|
147
189
|
return null;
|
|
148
190
|
}
|
|
149
|
-
let frameLocator = parent.frameLocator(
|
|
191
|
+
let frameLocator = parent.frameLocator(getIframeSelector(iframeOptions[0]));
|
|
150
192
|
for (const iframeOption of iframeOptions.slice(1)) {
|
|
151
193
|
if (!frameLocator) {
|
|
152
194
|
return null;
|
|
@@ -163,12 +205,12 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
163
205
|
let frame = this.#frame;
|
|
164
206
|
const retObj = { frame, locators: [] };
|
|
165
207
|
if (iframeOptions.length > 0) {
|
|
166
|
-
|
|
167
|
-
if (!
|
|
208
|
+
const childFrame = await this.#getDescendantFrame(frame, iframeOptions);
|
|
209
|
+
if (!childFrame) {
|
|
168
210
|
return retObj;
|
|
169
211
|
}
|
|
170
|
-
retObj.frame =
|
|
171
|
-
parent =
|
|
212
|
+
retObj.frame = childFrame;
|
|
213
|
+
parent = childFrame;
|
|
172
214
|
}
|
|
173
215
|
try {
|
|
174
216
|
let locators = [];
|
|
@@ -353,6 +395,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
353
395
|
#page;
|
|
354
396
|
#status;
|
|
355
397
|
#pageId;
|
|
398
|
+
#closeWhenFree;
|
|
356
399
|
#resquestInterceptionOptions;
|
|
357
400
|
#responseInterceptionOptions;
|
|
358
401
|
#client;
|
|
@@ -483,29 +526,57 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
483
526
|
return null;
|
|
484
527
|
}
|
|
485
528
|
*/
|
|
486
|
-
#
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
529
|
+
async #findDescendantFrame(src, id) {
|
|
530
|
+
if (!this.#page) {
|
|
531
|
+
throw new Error("No valid page");
|
|
532
|
+
}
|
|
533
|
+
const frames = this.#page.frames();
|
|
534
|
+
for (const frame of frames) {
|
|
535
|
+
const url = frame.url();
|
|
536
|
+
if (typeof src === "string" && src) {
|
|
537
|
+
if (url.startsWith(src)) {
|
|
538
|
+
return frame;
|
|
539
|
+
} else if (url.toLowerCase().startsWith(src)) {
|
|
540
|
+
return frame;
|
|
541
|
+
}
|
|
542
|
+
} else if (src instanceof RegExp) {
|
|
543
|
+
if (url.match(src)) {
|
|
544
|
+
return frame;
|
|
545
|
+
}
|
|
546
|
+
} else if (id) {
|
|
547
|
+
const element = await frame.frameElement();
|
|
548
|
+
if (element) {
|
|
549
|
+
const frameId = await frame.evaluate(([ele, attr]) => ele.getAttribute(attr), [element, "id"]);
|
|
550
|
+
if (frameId === id) {
|
|
551
|
+
return frame;
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
}
|
|
490
555
|
}
|
|
491
|
-
return
|
|
556
|
+
return null;
|
|
492
557
|
}
|
|
493
558
|
async #getChildFrameLocator(parent, iframeOption) {
|
|
494
|
-
return parent.frameLocator(
|
|
559
|
+
return parent.frameLocator(getIframeSelector(iframeOption));
|
|
495
560
|
}
|
|
496
|
-
async #
|
|
561
|
+
async #getDescendantFrame(mainFrame, iframeOptions) {
|
|
497
562
|
try {
|
|
498
563
|
if (iframeOptions.length <= 0) {
|
|
499
564
|
return null;
|
|
500
565
|
}
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
566
|
+
if (iframeOptions.length === 1 && !iframeOptions[0].selector) {
|
|
567
|
+
const { src = "", id = "" } = iframeOptions[0];
|
|
568
|
+
const frame = await this.#findDescendantFrame(src, id);
|
|
569
|
+
return frame;
|
|
570
|
+
} else {
|
|
571
|
+
let frameLocator = mainFrame.frameLocator(getIframeSelector(iframeOptions[0]));
|
|
572
|
+
for (const iframeOption of iframeOptions.slice(1)) {
|
|
573
|
+
if (!frameLocator) {
|
|
574
|
+
return null;
|
|
575
|
+
}
|
|
576
|
+
frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
|
|
505
577
|
}
|
|
506
|
-
|
|
578
|
+
return frameLocator;
|
|
507
579
|
}
|
|
508
|
-
return frameLocator;
|
|
509
580
|
} catch (err) {
|
|
510
581
|
throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
|
|
511
582
|
}
|
|
@@ -517,7 +588,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
517
588
|
let frame = this.#page.mainFrame();
|
|
518
589
|
const retObj = { frame, locators: [] };
|
|
519
590
|
if (iframeOptions.length > 0) {
|
|
520
|
-
frame = await this.#
|
|
591
|
+
frame = await this.#getDescendantFrame(frame, iframeOptions);
|
|
521
592
|
if (!frame) {
|
|
522
593
|
return retObj;
|
|
523
594
|
}
|
|
@@ -590,12 +661,32 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
590
661
|
const { browserIdx = 0, browserContextIdx = 0, pageIdx = 0, openType = "other", openTime = currentTime, lastStatusUpdateTime = currentTime, taskId = 0, relatedId = 0, misc = {} } = pageInfo ? pageInfo : {};
|
|
591
662
|
this.#page.pageInfo = { browserIdx, browserContextIdx, pageIdx, openType, openTime, lastStatusUpdateTime, taskId, relatedId, misc };
|
|
592
663
|
this.#pageId = `page-${browserIdx}-${browserContextIdx}-${pageIdx}`;
|
|
664
|
+
this.#closeWhenFree = false;
|
|
593
665
|
this.#resquestInterceptionOptions = [];
|
|
594
666
|
this.#responseInterceptionOptions = [];
|
|
595
667
|
this.#client = null;
|
|
596
668
|
this.#responseCb = null;
|
|
597
669
|
this.#addPageOn();
|
|
598
670
|
}
|
|
671
|
+
async addPreloadScript(scriptOrFunc, arg) {
|
|
672
|
+
if (!this.#page) {
|
|
673
|
+
throw new Error("No valid page");
|
|
674
|
+
}
|
|
675
|
+
if (typeof scriptOrFunc === "string") {
|
|
676
|
+
await this.#page.addInitScript({ content: scriptOrFunc });
|
|
677
|
+
} else if (typeof scriptOrFunc === "function") {
|
|
678
|
+
await this.#page.addInitScript(scriptOrFunc, arg);
|
|
679
|
+
} else {
|
|
680
|
+
throw new Error(`Invalid type of scriptOrFunc ${typeof scriptOrFunc}`);
|
|
681
|
+
}
|
|
682
|
+
return true;
|
|
683
|
+
}
|
|
684
|
+
async addScriptTag(options) {
|
|
685
|
+
if (!this.#page) {
|
|
686
|
+
throw new Error("No valid page");
|
|
687
|
+
}
|
|
688
|
+
return this.#page.addScriptTag(options);
|
|
689
|
+
}
|
|
599
690
|
apiContext() {
|
|
600
691
|
return this.browserContext().apiContext();
|
|
601
692
|
}
|
|
@@ -655,7 +746,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
655
746
|
}
|
|
656
747
|
async close() {
|
|
657
748
|
if (this.#status === "closed") {
|
|
658
|
-
|
|
749
|
+
logwarn(`Page ${this.#pageId} is already closed.`);
|
|
659
750
|
return true;
|
|
660
751
|
} else if (this.#status === "busy") {
|
|
661
752
|
throw new Error(`Page ${this.#pageId} cannot be closed because it is busy.`);
|
|
@@ -668,13 +759,16 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
668
759
|
this.#status = "closed";
|
|
669
760
|
return true;
|
|
670
761
|
}
|
|
762
|
+
closeWhenFree() {
|
|
763
|
+
return this.#closeWhenFree;
|
|
764
|
+
}
|
|
671
765
|
async content(iframeOptions = []) {
|
|
672
766
|
if (!this.#page) {
|
|
673
767
|
throw new Error("No valid page");
|
|
674
768
|
}
|
|
675
769
|
let content = "";
|
|
676
770
|
if (iframeOptions.length > 0) {
|
|
677
|
-
const frameLocator = await this.#
|
|
771
|
+
const frameLocator = await this.#getDescendantFrame(this.#page.mainFrame(), iframeOptions);
|
|
678
772
|
if (frameLocator) {
|
|
679
773
|
content = await frameLocator.locator(":root").evaluate(() => document.documentElement.outerHTML);
|
|
680
774
|
}
|
|
@@ -696,11 +790,18 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
696
790
|
const height = await this.#page.evaluate(() => document.documentElement.scrollHeight);
|
|
697
791
|
return height;
|
|
698
792
|
}
|
|
699
|
-
async
|
|
793
|
+
async evaluate(func, args) {
|
|
700
794
|
if (!this.#page) {
|
|
701
795
|
throw new Error("No valid page");
|
|
702
796
|
}
|
|
703
|
-
return this.#page.evaluate(
|
|
797
|
+
return this.#page.evaluate(func, args);
|
|
798
|
+
}
|
|
799
|
+
async exposeFunction(name, callbackFunction) {
|
|
800
|
+
if (!this.#page) {
|
|
801
|
+
throw new Error("No valid page");
|
|
802
|
+
}
|
|
803
|
+
await this.#page.exposeFunction(name, callbackFunction);
|
|
804
|
+
return;
|
|
704
805
|
}
|
|
705
806
|
async findElement(selectorOrXpath, iframeOptions = []) {
|
|
706
807
|
if (!this.#page) {
|
|
@@ -738,7 +839,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
738
839
|
}
|
|
739
840
|
async free() {
|
|
740
841
|
if (this.#status === "free") {
|
|
741
|
-
|
|
842
|
+
logwarn(`Page ${this.#pageId} is already free.`);
|
|
742
843
|
}
|
|
743
844
|
this.#status = "free";
|
|
744
845
|
await this.clearRequestInterceptions();
|
|
@@ -878,6 +979,10 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
878
979
|
}
|
|
879
980
|
return response;
|
|
880
981
|
}
|
|
982
|
+
setCloseWhenFree(closeWhenFree) {
|
|
983
|
+
this.#closeWhenFree = closeWhenFree;
|
|
984
|
+
return true;
|
|
985
|
+
}
|
|
881
986
|
async setCookies(cookies) {
|
|
882
987
|
if (!this.#page) {
|
|
883
988
|
throw new Error("No valid page");
|
|
@@ -1061,7 +1166,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
1061
1166
|
}
|
|
1062
1167
|
const actOptions = Array.isArray(options) ? options : [options];
|
|
1063
1168
|
if (actOptions.length <= 0) {
|
|
1064
|
-
|
|
1169
|
+
logwarn("Invalid paras in setResponseInterception");
|
|
1065
1170
|
return false;
|
|
1066
1171
|
}
|
|
1067
1172
|
const firstResponseInterception = this.#responseInterceptionOptions.length <= 0;
|
|
@@ -1763,6 +1868,26 @@ var PuppeteerElement = class _PuppeteerElement {
|
|
|
1763
1868
|
const names = await this.#frame.evaluate((ele) => ele.getAttributeNames(), this.#$ele);
|
|
1764
1869
|
return names;
|
|
1765
1870
|
}
|
|
1871
|
+
async dataset() {
|
|
1872
|
+
try {
|
|
1873
|
+
const attributeNames = await this.attributeNames();
|
|
1874
|
+
const dataset = {};
|
|
1875
|
+
for (const attributeName of attributeNames) {
|
|
1876
|
+
if (!attributeName.startsWith("data-")) {
|
|
1877
|
+
continue;
|
|
1878
|
+
}
|
|
1879
|
+
const val = await this.attribute(attributeName);
|
|
1880
|
+
const key = convertDataAttributeName(attributeName);
|
|
1881
|
+
dataset[key] = val;
|
|
1882
|
+
}
|
|
1883
|
+
return dataset;
|
|
1884
|
+
} catch (err) {
|
|
1885
|
+
return {};
|
|
1886
|
+
}
|
|
1887
|
+
}
|
|
1888
|
+
async evaluate(func, args) {
|
|
1889
|
+
return await this.#frame.evaluate(func, args);
|
|
1890
|
+
}
|
|
1766
1891
|
async #getChildFrame(parentFrame, iframeOption) {
|
|
1767
1892
|
if (!parentFrame) {
|
|
1768
1893
|
throw new Error("Invalid parent frame");
|
|
@@ -1812,13 +1937,13 @@ var PuppeteerElement = class _PuppeteerElement {
|
|
|
1812
1937
|
let frame = this.#frame;
|
|
1813
1938
|
const retObj = { frame, elementHandles: [] };
|
|
1814
1939
|
if (iframeOptions.length > 0) {
|
|
1815
|
-
|
|
1816
|
-
if (!
|
|
1940
|
+
const childFrame = await this.#getDescendantFrame(frame, iframeOptions);
|
|
1941
|
+
if (!childFrame) {
|
|
1817
1942
|
return retObj;
|
|
1818
1943
|
}
|
|
1819
|
-
retObj.frame =
|
|
1944
|
+
retObj.frame = childFrame;
|
|
1820
1945
|
absolute = true;
|
|
1821
|
-
parent =
|
|
1946
|
+
parent = childFrame;
|
|
1822
1947
|
}
|
|
1823
1948
|
try {
|
|
1824
1949
|
if (selector.startsWith("./") || selector.startsWith("/") || selector.startsWith("..")) {
|
|
@@ -2020,6 +2145,7 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2020
2145
|
#page;
|
|
2021
2146
|
#status;
|
|
2022
2147
|
#pageId;
|
|
2148
|
+
#closeWhenFree;
|
|
2023
2149
|
#requestInterceptionNum;
|
|
2024
2150
|
#responseInterceptionNum;
|
|
2025
2151
|
#client;
|
|
@@ -2094,15 +2220,41 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2094
2220
|
});
|
|
2095
2221
|
return true;
|
|
2096
2222
|
}
|
|
2223
|
+
async #findDescendantFrame(src, id) {
|
|
2224
|
+
if (!this.#page) {
|
|
2225
|
+
throw new Error("No valid page");
|
|
2226
|
+
}
|
|
2227
|
+
const frames = this.#page.frames();
|
|
2228
|
+
for (const frame of frames) {
|
|
2229
|
+
const url = frame.url();
|
|
2230
|
+
if (typeof src === "string" && src) {
|
|
2231
|
+
if (url.startsWith(src)) {
|
|
2232
|
+
return frame;
|
|
2233
|
+
} else if (url.toLowerCase().startsWith(src)) {
|
|
2234
|
+
return frame;
|
|
2235
|
+
}
|
|
2236
|
+
} else if (src instanceof RegExp) {
|
|
2237
|
+
if (url.match(src)) {
|
|
2238
|
+
return frame;
|
|
2239
|
+
}
|
|
2240
|
+
} else if (id) {
|
|
2241
|
+
const element = await frame.frameElement();
|
|
2242
|
+
if (element) {
|
|
2243
|
+
const frameId = await frame.evaluate((ele, attr) => ele.getAttribute(attr), element, "id");
|
|
2244
|
+
if (frameId === id) {
|
|
2245
|
+
return frame;
|
|
2246
|
+
}
|
|
2247
|
+
}
|
|
2248
|
+
}
|
|
2249
|
+
}
|
|
2250
|
+
return null;
|
|
2251
|
+
}
|
|
2097
2252
|
async #getChildFrame(parentFrame, iframeOption) {
|
|
2098
2253
|
if (!parentFrame) {
|
|
2099
2254
|
throw new Error("Invalid parent frame");
|
|
2100
2255
|
}
|
|
2101
2256
|
let iframe = null;
|
|
2102
|
-
let { src = ""
|
|
2103
|
-
if (!src && !selector) {
|
|
2104
|
-
throw new Error("Invalid IframeOption");
|
|
2105
|
-
}
|
|
2257
|
+
let { src = "" } = iframeOption;
|
|
2106
2258
|
if (src) {
|
|
2107
2259
|
const childFrames = parentFrame.childFrames();
|
|
2108
2260
|
for (const childFrame of childFrames) {
|
|
@@ -2120,7 +2272,8 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2120
2272
|
}
|
|
2121
2273
|
}
|
|
2122
2274
|
} else {
|
|
2123
|
-
const
|
|
2275
|
+
const frameSelector = getIframeSelector(iframeOption);
|
|
2276
|
+
const $eleIframe = await parentFrame.$(frameSelector);
|
|
2124
2277
|
if ($eleIframe) {
|
|
2125
2278
|
iframe = await $eleIframe.contentFrame();
|
|
2126
2279
|
return iframe;
|
|
@@ -2130,11 +2283,16 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2130
2283
|
}
|
|
2131
2284
|
async #getDescendantFrame(parentFrame, iframeOptions) {
|
|
2132
2285
|
let iframe = parentFrame;
|
|
2133
|
-
|
|
2134
|
-
|
|
2135
|
-
|
|
2286
|
+
if (iframeOptions.length === 1 && !iframeOptions[0].selector) {
|
|
2287
|
+
const { src = "", id = "" } = iframeOptions[0];
|
|
2288
|
+
iframe = await this.#findDescendantFrame(src, id);
|
|
2289
|
+
} else {
|
|
2290
|
+
for (const iframeOption of iframeOptions) {
|
|
2291
|
+
if (!iframe) {
|
|
2292
|
+
return null;
|
|
2293
|
+
}
|
|
2294
|
+
iframe = await this.#getChildFrame(iframe, iframeOption);
|
|
2136
2295
|
}
|
|
2137
|
-
iframe = await this.#getChildFrame(iframe, iframeOption);
|
|
2138
2296
|
}
|
|
2139
2297
|
return iframe;
|
|
2140
2298
|
}
|
|
@@ -2163,7 +2321,7 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2163
2321
|
}
|
|
2164
2322
|
return retObj;
|
|
2165
2323
|
} catch (err) {
|
|
2166
|
-
|
|
2324
|
+
loginfo(err);
|
|
2167
2325
|
return retObj;
|
|
2168
2326
|
}
|
|
2169
2327
|
}
|
|
@@ -2216,11 +2374,31 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2216
2374
|
const { browserIdx = 0, browserContextIdx = 0, pageIdx = 0, openType = "other", openTime = currentTime, lastStatusUpdateTime = currentTime, taskId = 0, relatedId = 0, misc = {} } = pageInfo ? pageInfo : {};
|
|
2217
2375
|
this.#page.pageInfo = { browserIdx, browserContextIdx, pageIdx, openType, openTime, lastStatusUpdateTime, taskId, relatedId, misc };
|
|
2218
2376
|
this.#pageId = `page-${browserIdx}-${browserContextIdx}-${pageIdx}`;
|
|
2377
|
+
this.#closeWhenFree = false;
|
|
2219
2378
|
this.#requestInterceptionNum = 0;
|
|
2220
2379
|
this.#responseInterceptionNum = 0;
|
|
2221
2380
|
this.#client = null;
|
|
2222
2381
|
this.#addPageOn();
|
|
2223
2382
|
}
|
|
2383
|
+
async addPreloadScript(scriptOrFunc, arg) {
|
|
2384
|
+
if (!this.#page) {
|
|
2385
|
+
throw new Error("No valid page");
|
|
2386
|
+
}
|
|
2387
|
+
if (typeof scriptOrFunc === "string") {
|
|
2388
|
+
await this.#page.evaluateOnNewDocument(scriptOrFunc);
|
|
2389
|
+
} else if (typeof scriptOrFunc === "function") {
|
|
2390
|
+
await this.#page.evaluateOnNewDocument(scriptOrFunc, arg);
|
|
2391
|
+
} else {
|
|
2392
|
+
throw new Error(`Invalid type of scriptOrFunc ${typeof scriptOrFunc}`);
|
|
2393
|
+
}
|
|
2394
|
+
return true;
|
|
2395
|
+
}
|
|
2396
|
+
async addScriptTag(options) {
|
|
2397
|
+
if (!this.#page) {
|
|
2398
|
+
throw new Error("No valid page");
|
|
2399
|
+
}
|
|
2400
|
+
return this.#page.addScriptTag(options);
|
|
2401
|
+
}
|
|
2224
2402
|
apiContext() {
|
|
2225
2403
|
throw new Error("Not supported in PuppeteerPage.");
|
|
2226
2404
|
}
|
|
@@ -2277,7 +2455,7 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2277
2455
|
}
|
|
2278
2456
|
async close() {
|
|
2279
2457
|
if (this.#status === "closed") {
|
|
2280
|
-
|
|
2458
|
+
logwarn(`Page ${this.#pageId} is already closed.`);
|
|
2281
2459
|
return true;
|
|
2282
2460
|
} else if (this.#status === "busy") {
|
|
2283
2461
|
throw new Error(`Page ${this.#pageId} cannot be closed because it is busy.`);
|
|
@@ -2290,6 +2468,9 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2290
2468
|
this.#status = "closed";
|
|
2291
2469
|
return true;
|
|
2292
2470
|
}
|
|
2471
|
+
closeWhenFree() {
|
|
2472
|
+
return this.#closeWhenFree;
|
|
2473
|
+
}
|
|
2293
2474
|
async content(iframeOptions = []) {
|
|
2294
2475
|
if (!this.#page) {
|
|
2295
2476
|
throw new Error("No valid page");
|
|
@@ -2318,11 +2499,18 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2318
2499
|
const height = await this.#page.evaluate(() => document.documentElement.scrollHeight);
|
|
2319
2500
|
return height;
|
|
2320
2501
|
}
|
|
2321
|
-
async
|
|
2502
|
+
async evaluate(func, args) {
|
|
2503
|
+
if (!this.#page) {
|
|
2504
|
+
throw new Error("No valid page");
|
|
2505
|
+
}
|
|
2506
|
+
return this.#page.evaluate(func, args);
|
|
2507
|
+
}
|
|
2508
|
+
async exposeFunction(name, callbackFunction) {
|
|
2322
2509
|
if (!this.#page) {
|
|
2323
2510
|
throw new Error("No valid page");
|
|
2324
2511
|
}
|
|
2325
|
-
|
|
2512
|
+
await this.#page.exposeFunction(name, callbackFunction);
|
|
2513
|
+
return;
|
|
2326
2514
|
}
|
|
2327
2515
|
async findElement(selectorOrXpath, iframeOptions = []) {
|
|
2328
2516
|
if (!this.#page) {
|
|
@@ -2360,7 +2548,7 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2360
2548
|
}
|
|
2361
2549
|
async free() {
|
|
2362
2550
|
if (this.#status === "free") {
|
|
2363
|
-
|
|
2551
|
+
logwarn(`Page ${this.#pageId} is already free.`);
|
|
2364
2552
|
}
|
|
2365
2553
|
this.#status = "free";
|
|
2366
2554
|
await this.clearRequestInterceptions();
|
|
@@ -2503,6 +2691,10 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2503
2691
|
}
|
|
2504
2692
|
return response;
|
|
2505
2693
|
}
|
|
2694
|
+
setCloseWhenFree(closeWhenFree) {
|
|
2695
|
+
this.#closeWhenFree = closeWhenFree;
|
|
2696
|
+
return true;
|
|
2697
|
+
}
|
|
2506
2698
|
async setCookies(cookies) {
|
|
2507
2699
|
if (!this.#page) {
|
|
2508
2700
|
throw new Error("No valid page");
|
|
@@ -2578,7 +2770,7 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2578
2770
|
}
|
|
2579
2771
|
const actOptions = Array.isArray(options) ? options : [options];
|
|
2580
2772
|
if (actOptions.length <= 0) {
|
|
2581
|
-
|
|
2773
|
+
logwarn("Invalid paras in setRequestInterception");
|
|
2582
2774
|
return false;
|
|
2583
2775
|
}
|
|
2584
2776
|
if (this.#requestInterceptionNum <= 0) {
|
|
@@ -2614,7 +2806,7 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2614
2806
|
await request.continue();
|
|
2615
2807
|
return true;
|
|
2616
2808
|
} catch (err) {
|
|
2617
|
-
|
|
2809
|
+
logerr(err);
|
|
2618
2810
|
return false;
|
|
2619
2811
|
}
|
|
2620
2812
|
});
|
|
@@ -2626,7 +2818,7 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2626
2818
|
}
|
|
2627
2819
|
const actOptions = Array.isArray(options) ? options : [options];
|
|
2628
2820
|
if (actOptions.length <= 0) {
|
|
2629
|
-
|
|
2821
|
+
logwarn("Invalid paras in setResponseInterception");
|
|
2630
2822
|
return false;
|
|
2631
2823
|
}
|
|
2632
2824
|
this.#responseInterceptionNum++;
|
|
@@ -2681,7 +2873,7 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2681
2873
|
}
|
|
2682
2874
|
return true;
|
|
2683
2875
|
} catch (err) {
|
|
2684
|
-
|
|
2876
|
+
logerr(err);
|
|
2685
2877
|
return false;
|
|
2686
2878
|
}
|
|
2687
2879
|
});
|
|
@@ -3341,6 +3533,22 @@ var CheerioElement = class _CheerioElement {
|
|
|
3341
3533
|
return Array.from(Object.keys(element.attribs));
|
|
3342
3534
|
}
|
|
3343
3535
|
}
|
|
3536
|
+
async dataset() {
|
|
3537
|
+
const attributeNames = await this.attributeNames();
|
|
3538
|
+
const dataset = {};
|
|
3539
|
+
for (const attributeName of attributeNames) {
|
|
3540
|
+
if (!attributeName.startsWith("data-")) {
|
|
3541
|
+
continue;
|
|
3542
|
+
}
|
|
3543
|
+
const val = await this.attribute(attributeName);
|
|
3544
|
+
const key = convertDataAttributeName(attributeName);
|
|
3545
|
+
dataset[key] = val;
|
|
3546
|
+
}
|
|
3547
|
+
return dataset;
|
|
3548
|
+
}
|
|
3549
|
+
async evaluate() {
|
|
3550
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3551
|
+
}
|
|
3344
3552
|
#findNodes(selector, absolute) {
|
|
3345
3553
|
if (selector.startsWith("./") || selector.startsWith("/")) {
|
|
3346
3554
|
throw new Error("Do not support XPath in cheerio.");
|
|
@@ -3461,6 +3669,12 @@ var CheerioPage = class extends import_node_events7.default {
|
|
|
3461
3669
|
_origPage() {
|
|
3462
3670
|
throw new Error("Method not implemented.");
|
|
3463
3671
|
}
|
|
3672
|
+
async addPreloadScript() {
|
|
3673
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3674
|
+
}
|
|
3675
|
+
addScriptTag() {
|
|
3676
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3677
|
+
}
|
|
3464
3678
|
apiContext() {
|
|
3465
3679
|
throw new Error("Not supported in CheerioPage.");
|
|
3466
3680
|
}
|
|
@@ -3488,13 +3702,19 @@ var CheerioPage = class extends import_node_events7.default {
|
|
|
3488
3702
|
async close() {
|
|
3489
3703
|
throw new Error("Not supported in CheerioPage.");
|
|
3490
3704
|
}
|
|
3705
|
+
closeWhenFree() {
|
|
3706
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3707
|
+
}
|
|
3491
3708
|
async content() {
|
|
3492
3709
|
throw new Error("Not supported in CheerioPage.");
|
|
3493
3710
|
}
|
|
3494
3711
|
async cookies() {
|
|
3495
3712
|
throw new Error("Not supported in CheerioPage.");
|
|
3496
3713
|
}
|
|
3497
|
-
async
|
|
3714
|
+
async evaluate() {
|
|
3715
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3716
|
+
}
|
|
3717
|
+
exposeFunction() {
|
|
3498
3718
|
throw new Error("Not supported in CheerioPage.");
|
|
3499
3719
|
}
|
|
3500
3720
|
#findNodes(selector) {
|
|
@@ -3591,6 +3811,9 @@ var CheerioPage = class extends import_node_events7.default {
|
|
|
3591
3811
|
async sendCDPMessage() {
|
|
3592
3812
|
throw new Error("Method not implemented.");
|
|
3593
3813
|
}
|
|
3814
|
+
setCloseWhenFree() {
|
|
3815
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3816
|
+
}
|
|
3594
3817
|
async setCookies() {
|
|
3595
3818
|
throw new Error("Not supported in CheerioPage.");
|
|
3596
3819
|
}
|