@letsscrapedata/controller 0.0.50 → 0.0.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +299 -66
- package/dist/index.d.cts +120 -10
- package/dist/index.d.ts +120 -10
- package/dist/index.js +297 -65
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -40,21 +40,22 @@ __export(src_exports, {
|
|
|
40
40
|
PuppeteerBrowserContext: () => PuppeteerBrowserContext,
|
|
41
41
|
PuppeteerElement: () => PuppeteerElement,
|
|
42
42
|
PuppeteerPage: () => PuppeteerPage,
|
|
43
|
-
controller: () => controller
|
|
43
|
+
controller: () => controller,
|
|
44
|
+
setControllerLogFun: () => setControllerLogFun
|
|
44
45
|
});
|
|
45
46
|
module.exports = __toCommonJS(src_exports);
|
|
46
47
|
|
|
47
|
-
// src/playwright/browser.ts
|
|
48
|
-
var import_node_events3 = __toESM(require("events"), 1);
|
|
49
|
-
var import_utils5 = require("@letsscrapedata/utils");
|
|
50
|
-
|
|
51
|
-
// src/playwright/context.ts
|
|
52
|
-
var import_node_events2 = __toESM(require("events"), 1);
|
|
53
|
-
var import_utils4 = require("@letsscrapedata/utils");
|
|
54
|
-
|
|
55
48
|
// src/utils/log.ts
|
|
56
49
|
var import_utils = require("@letsscrapedata/utils");
|
|
57
50
|
var pkgLog = import_utils.log;
|
|
51
|
+
function setControllerLogFun(logFun) {
|
|
52
|
+
if (typeof logFun === "function") {
|
|
53
|
+
pkgLog = logFun;
|
|
54
|
+
return true;
|
|
55
|
+
} else {
|
|
56
|
+
return false;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
58
59
|
async function loginfo(...args) {
|
|
59
60
|
await pkgLog(import_utils.LogLevel.INF, ...args);
|
|
60
61
|
}
|
|
@@ -65,10 +66,44 @@ async function logerr(...args) {
|
|
|
65
66
|
await pkgLog(import_utils.LogLevel.ERR, ...args);
|
|
66
67
|
}
|
|
67
68
|
|
|
69
|
+
// src/playwright/browser.ts
|
|
70
|
+
var import_node_events3 = __toESM(require("events"), 1);
|
|
71
|
+
var import_utils5 = require("@letsscrapedata/utils");
|
|
72
|
+
|
|
73
|
+
// src/playwright/context.ts
|
|
74
|
+
var import_node_events2 = __toESM(require("events"), 1);
|
|
75
|
+
var import_utils4 = require("@letsscrapedata/utils");
|
|
76
|
+
|
|
68
77
|
// src/playwright/page.ts
|
|
69
78
|
var import_node_events = __toESM(require("events"), 1);
|
|
70
79
|
var import_utils3 = require("@letsscrapedata/utils");
|
|
71
80
|
|
|
81
|
+
// src/utils/common.ts
|
|
82
|
+
function convertDataAttributeName(attr) {
|
|
83
|
+
if (!attr.startsWith("data-")) {
|
|
84
|
+
return "";
|
|
85
|
+
}
|
|
86
|
+
const parts = attr.split("-");
|
|
87
|
+
let name = parts[1];
|
|
88
|
+
for (const part of parts.slice(2)) {
|
|
89
|
+
if (!part) {
|
|
90
|
+
continue;
|
|
91
|
+
}
|
|
92
|
+
name = `${name}${part[1].toUpperCase()}${part.slice(1).toLowerCase()}`;
|
|
93
|
+
}
|
|
94
|
+
return name;
|
|
95
|
+
}
|
|
96
|
+
function getIframeSelector(iframeOption) {
|
|
97
|
+
const { src = "", id = "", selector = "" } = iframeOption;
|
|
98
|
+
if (typeof src === "string" && src) {
|
|
99
|
+
return `iframe[src^="${src}"]`;
|
|
100
|
+
} else if (typeof id === "string" && id) {
|
|
101
|
+
return `iframe[id="${id}"]`;
|
|
102
|
+
} else {
|
|
103
|
+
return selector;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
72
107
|
// src/playwright/element.ts
|
|
73
108
|
var import_utils2 = require("@letsscrapedata/utils");
|
|
74
109
|
var PlaywrightElement = class _PlaywrightElement {
|
|
@@ -89,6 +124,29 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
89
124
|
const names = await this.#locator.evaluate((node) => node.getAttributeNames());
|
|
90
125
|
return names;
|
|
91
126
|
}
|
|
127
|
+
async dataset() {
|
|
128
|
+
try {
|
|
129
|
+
const dataset = await this.#locator.evaluate((node) => node.dataset);
|
|
130
|
+
return dataset;
|
|
131
|
+
} catch (err) {
|
|
132
|
+
return {};
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
async evaluate(func, args) {
|
|
136
|
+
try {
|
|
137
|
+
const frame = this.#frame;
|
|
138
|
+
;
|
|
139
|
+
if (typeof frame.parentFrame === "function") {
|
|
140
|
+
return await frame.evaluate(func, args);
|
|
141
|
+
} else {
|
|
142
|
+
const locator = this.#frame.owner();
|
|
143
|
+
return await locator.evaluate(func, args);
|
|
144
|
+
}
|
|
145
|
+
} catch (err) {
|
|
146
|
+
logerr(err);
|
|
147
|
+
return "";
|
|
148
|
+
}
|
|
149
|
+
}
|
|
92
150
|
/*
|
|
93
151
|
async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
|
|
94
152
|
if (!parentFrame) {
|
|
@@ -122,22 +180,15 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
122
180
|
return null;
|
|
123
181
|
}
|
|
124
182
|
*/
|
|
125
|
-
#getIframeSelector(iframeOption) {
|
|
126
|
-
const { src = "", selector = "" } = iframeOption;
|
|
127
|
-
if (!src && !selector) {
|
|
128
|
-
throw new Error("Invalid parent frame");
|
|
129
|
-
}
|
|
130
|
-
return selector ? selector : `iframe[src^="${src}"]`;
|
|
131
|
-
}
|
|
132
183
|
async #getChildFrameLocator(parent, iframeOption) {
|
|
133
|
-
return parent.frameLocator(
|
|
184
|
+
return parent.frameLocator(getIframeSelector(iframeOption));
|
|
134
185
|
}
|
|
135
186
|
async #getDescendantFrame(parent, iframeOptions) {
|
|
136
187
|
try {
|
|
137
188
|
if (iframeOptions.length <= 0) {
|
|
138
189
|
return null;
|
|
139
190
|
}
|
|
140
|
-
let frameLocator = parent.frameLocator(
|
|
191
|
+
let frameLocator = parent.frameLocator(getIframeSelector(iframeOptions[0]));
|
|
141
192
|
for (const iframeOption of iframeOptions.slice(1)) {
|
|
142
193
|
if (!frameLocator) {
|
|
143
194
|
return null;
|
|
@@ -154,12 +205,12 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
154
205
|
let frame = this.#frame;
|
|
155
206
|
const retObj = { frame, locators: [] };
|
|
156
207
|
if (iframeOptions.length > 0) {
|
|
157
|
-
|
|
158
|
-
if (!
|
|
208
|
+
const childFrame = await this.#getDescendantFrame(frame, iframeOptions);
|
|
209
|
+
if (!childFrame) {
|
|
159
210
|
return retObj;
|
|
160
211
|
}
|
|
161
|
-
retObj.frame =
|
|
162
|
-
parent =
|
|
212
|
+
retObj.frame = childFrame;
|
|
213
|
+
parent = childFrame;
|
|
163
214
|
}
|
|
164
215
|
try {
|
|
165
216
|
let locators = [];
|
|
@@ -344,6 +395,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
344
395
|
#page;
|
|
345
396
|
#status;
|
|
346
397
|
#pageId;
|
|
398
|
+
#closeWhenFree;
|
|
347
399
|
#resquestInterceptionOptions;
|
|
348
400
|
#responseInterceptionOptions;
|
|
349
401
|
#client;
|
|
@@ -474,29 +526,57 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
474
526
|
return null;
|
|
475
527
|
}
|
|
476
528
|
*/
|
|
477
|
-
#
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
throw new Error("Invalid parent frame");
|
|
529
|
+
async #findDescendantFrame(src, id) {
|
|
530
|
+
if (!this.#page) {
|
|
531
|
+
throw new Error("No valid page");
|
|
481
532
|
}
|
|
482
|
-
|
|
533
|
+
const frames = this.#page.frames();
|
|
534
|
+
for (const frame of frames) {
|
|
535
|
+
const url = frame.url();
|
|
536
|
+
if (typeof src === "string" && src) {
|
|
537
|
+
if (url.startsWith(src)) {
|
|
538
|
+
return frame;
|
|
539
|
+
} else if (url.toLowerCase().startsWith(src)) {
|
|
540
|
+
return frame;
|
|
541
|
+
}
|
|
542
|
+
} else if (src instanceof RegExp) {
|
|
543
|
+
if (url.match(src)) {
|
|
544
|
+
return frame;
|
|
545
|
+
}
|
|
546
|
+
} else if (id) {
|
|
547
|
+
const element = await frame.frameElement();
|
|
548
|
+
if (element) {
|
|
549
|
+
const frameId = await frame.evaluate(([ele, attr]) => ele.getAttribute(attr), [element, "id"]);
|
|
550
|
+
if (frameId === id) {
|
|
551
|
+
return frame;
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
return null;
|
|
483
557
|
}
|
|
484
558
|
async #getChildFrameLocator(parent, iframeOption) {
|
|
485
|
-
return parent.frameLocator(
|
|
559
|
+
return parent.frameLocator(getIframeSelector(iframeOption));
|
|
486
560
|
}
|
|
487
|
-
async #
|
|
561
|
+
async #getDescendantFrame(mainFrame, iframeOptions) {
|
|
488
562
|
try {
|
|
489
563
|
if (iframeOptions.length <= 0) {
|
|
490
564
|
return null;
|
|
491
565
|
}
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
566
|
+
if (iframeOptions.length === 1 && !iframeOptions[0].selector) {
|
|
567
|
+
const { src = "", id = "" } = iframeOptions[0];
|
|
568
|
+
const frame = await this.#findDescendantFrame(src, id);
|
|
569
|
+
return frame;
|
|
570
|
+
} else {
|
|
571
|
+
let frameLocator = mainFrame.frameLocator(getIframeSelector(iframeOptions[0]));
|
|
572
|
+
for (const iframeOption of iframeOptions.slice(1)) {
|
|
573
|
+
if (!frameLocator) {
|
|
574
|
+
return null;
|
|
575
|
+
}
|
|
576
|
+
frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
|
|
496
577
|
}
|
|
497
|
-
|
|
578
|
+
return frameLocator;
|
|
498
579
|
}
|
|
499
|
-
return frameLocator;
|
|
500
580
|
} catch (err) {
|
|
501
581
|
throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
|
|
502
582
|
}
|
|
@@ -508,7 +588,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
508
588
|
let frame = this.#page.mainFrame();
|
|
509
589
|
const retObj = { frame, locators: [] };
|
|
510
590
|
if (iframeOptions.length > 0) {
|
|
511
|
-
frame = await this.#
|
|
591
|
+
frame = await this.#getDescendantFrame(frame, iframeOptions);
|
|
512
592
|
if (!frame) {
|
|
513
593
|
return retObj;
|
|
514
594
|
}
|
|
@@ -581,12 +661,32 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
581
661
|
const { browserIdx = 0, browserContextIdx = 0, pageIdx = 0, openType = "other", openTime = currentTime, lastStatusUpdateTime = currentTime, taskId = 0, relatedId = 0, misc = {} } = pageInfo ? pageInfo : {};
|
|
582
662
|
this.#page.pageInfo = { browserIdx, browserContextIdx, pageIdx, openType, openTime, lastStatusUpdateTime, taskId, relatedId, misc };
|
|
583
663
|
this.#pageId = `page-${browserIdx}-${browserContextIdx}-${pageIdx}`;
|
|
664
|
+
this.#closeWhenFree = false;
|
|
584
665
|
this.#resquestInterceptionOptions = [];
|
|
585
666
|
this.#responseInterceptionOptions = [];
|
|
586
667
|
this.#client = null;
|
|
587
668
|
this.#responseCb = null;
|
|
588
669
|
this.#addPageOn();
|
|
589
670
|
}
|
|
671
|
+
async addPreloadScript(scriptOrFunc, arg) {
|
|
672
|
+
if (!this.#page) {
|
|
673
|
+
throw new Error("No valid page");
|
|
674
|
+
}
|
|
675
|
+
if (typeof scriptOrFunc === "string") {
|
|
676
|
+
await this.#page.addInitScript({ content: scriptOrFunc });
|
|
677
|
+
} else if (typeof scriptOrFunc === "function") {
|
|
678
|
+
await this.#page.addInitScript(scriptOrFunc, arg);
|
|
679
|
+
} else {
|
|
680
|
+
throw new Error(`Invalid type of scriptOrFunc ${typeof scriptOrFunc}`);
|
|
681
|
+
}
|
|
682
|
+
return true;
|
|
683
|
+
}
|
|
684
|
+
async addScriptTag(options) {
|
|
685
|
+
if (!this.#page) {
|
|
686
|
+
throw new Error("No valid page");
|
|
687
|
+
}
|
|
688
|
+
return this.#page.addScriptTag(options);
|
|
689
|
+
}
|
|
590
690
|
apiContext() {
|
|
591
691
|
return this.browserContext().apiContext();
|
|
592
692
|
}
|
|
@@ -646,7 +746,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
646
746
|
}
|
|
647
747
|
async close() {
|
|
648
748
|
if (this.#status === "closed") {
|
|
649
|
-
|
|
749
|
+
logwarn(`Page ${this.#pageId} is already closed.`);
|
|
650
750
|
return true;
|
|
651
751
|
} else if (this.#status === "busy") {
|
|
652
752
|
throw new Error(`Page ${this.#pageId} cannot be closed because it is busy.`);
|
|
@@ -659,13 +759,16 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
659
759
|
this.#status = "closed";
|
|
660
760
|
return true;
|
|
661
761
|
}
|
|
762
|
+
closeWhenFree() {
|
|
763
|
+
return this.#closeWhenFree;
|
|
764
|
+
}
|
|
662
765
|
async content(iframeOptions = []) {
|
|
663
766
|
if (!this.#page) {
|
|
664
767
|
throw new Error("No valid page");
|
|
665
768
|
}
|
|
666
769
|
let content = "";
|
|
667
770
|
if (iframeOptions.length > 0) {
|
|
668
|
-
const frameLocator = await this.#
|
|
771
|
+
const frameLocator = await this.#getDescendantFrame(this.#page.mainFrame(), iframeOptions);
|
|
669
772
|
if (frameLocator) {
|
|
670
773
|
content = await frameLocator.locator(":root").evaluate(() => document.documentElement.outerHTML);
|
|
671
774
|
}
|
|
@@ -687,11 +790,18 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
687
790
|
const height = await this.#page.evaluate(() => document.documentElement.scrollHeight);
|
|
688
791
|
return height;
|
|
689
792
|
}
|
|
690
|
-
async
|
|
793
|
+
async evaluate(func, args) {
|
|
794
|
+
if (!this.#page) {
|
|
795
|
+
throw new Error("No valid page");
|
|
796
|
+
}
|
|
797
|
+
return this.#page.evaluate(func, args);
|
|
798
|
+
}
|
|
799
|
+
async exposeFunction(name, callbackFunction) {
|
|
691
800
|
if (!this.#page) {
|
|
692
801
|
throw new Error("No valid page");
|
|
693
802
|
}
|
|
694
|
-
|
|
803
|
+
await this.#page.exposeFunction(name, callbackFunction);
|
|
804
|
+
return;
|
|
695
805
|
}
|
|
696
806
|
async findElement(selectorOrXpath, iframeOptions = []) {
|
|
697
807
|
if (!this.#page) {
|
|
@@ -729,7 +839,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
729
839
|
}
|
|
730
840
|
async free() {
|
|
731
841
|
if (this.#status === "free") {
|
|
732
|
-
|
|
842
|
+
logwarn(`Page ${this.#pageId} is already free.`);
|
|
733
843
|
}
|
|
734
844
|
this.#status = "free";
|
|
735
845
|
await this.clearRequestInterceptions();
|
|
@@ -869,6 +979,10 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
869
979
|
}
|
|
870
980
|
return response;
|
|
871
981
|
}
|
|
982
|
+
setCloseWhenFree(closeWhenFree) {
|
|
983
|
+
this.#closeWhenFree = closeWhenFree;
|
|
984
|
+
return true;
|
|
985
|
+
}
|
|
872
986
|
async setCookies(cookies) {
|
|
873
987
|
if (!this.#page) {
|
|
874
988
|
throw new Error("No valid page");
|
|
@@ -1052,7 +1166,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
1052
1166
|
}
|
|
1053
1167
|
const actOptions = Array.isArray(options) ? options : [options];
|
|
1054
1168
|
if (actOptions.length <= 0) {
|
|
1055
|
-
|
|
1169
|
+
logwarn("Invalid paras in setResponseInterception");
|
|
1056
1170
|
return false;
|
|
1057
1171
|
}
|
|
1058
1172
|
const firstResponseInterception = this.#responseInterceptionOptions.length <= 0;
|
|
@@ -1754,6 +1868,26 @@ var PuppeteerElement = class _PuppeteerElement {
|
|
|
1754
1868
|
const names = await this.#frame.evaluate((ele) => ele.getAttributeNames(), this.#$ele);
|
|
1755
1869
|
return names;
|
|
1756
1870
|
}
|
|
1871
|
+
async dataset() {
|
|
1872
|
+
try {
|
|
1873
|
+
const attributeNames = await this.attributeNames();
|
|
1874
|
+
const dataset = {};
|
|
1875
|
+
for (const attributeName of attributeNames) {
|
|
1876
|
+
if (!attributeName.startsWith("data-")) {
|
|
1877
|
+
continue;
|
|
1878
|
+
}
|
|
1879
|
+
const val = await this.attribute(attributeName);
|
|
1880
|
+
const key = convertDataAttributeName(attributeName);
|
|
1881
|
+
dataset[key] = val;
|
|
1882
|
+
}
|
|
1883
|
+
return dataset;
|
|
1884
|
+
} catch (err) {
|
|
1885
|
+
return {};
|
|
1886
|
+
}
|
|
1887
|
+
}
|
|
1888
|
+
async evaluate(func, args) {
|
|
1889
|
+
return await this.#frame.evaluate(func, args);
|
|
1890
|
+
}
|
|
1757
1891
|
async #getChildFrame(parentFrame, iframeOption) {
|
|
1758
1892
|
if (!parentFrame) {
|
|
1759
1893
|
throw new Error("Invalid parent frame");
|
|
@@ -1803,13 +1937,13 @@ var PuppeteerElement = class _PuppeteerElement {
|
|
|
1803
1937
|
let frame = this.#frame;
|
|
1804
1938
|
const retObj = { frame, elementHandles: [] };
|
|
1805
1939
|
if (iframeOptions.length > 0) {
|
|
1806
|
-
|
|
1807
|
-
if (!
|
|
1940
|
+
const childFrame = await this.#getDescendantFrame(frame, iframeOptions);
|
|
1941
|
+
if (!childFrame) {
|
|
1808
1942
|
return retObj;
|
|
1809
1943
|
}
|
|
1810
|
-
retObj.frame =
|
|
1944
|
+
retObj.frame = childFrame;
|
|
1811
1945
|
absolute = true;
|
|
1812
|
-
parent =
|
|
1946
|
+
parent = childFrame;
|
|
1813
1947
|
}
|
|
1814
1948
|
try {
|
|
1815
1949
|
if (selector.startsWith("./") || selector.startsWith("/") || selector.startsWith("..")) {
|
|
@@ -2011,6 +2145,7 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2011
2145
|
#page;
|
|
2012
2146
|
#status;
|
|
2013
2147
|
#pageId;
|
|
2148
|
+
#closeWhenFree;
|
|
2014
2149
|
#requestInterceptionNum;
|
|
2015
2150
|
#responseInterceptionNum;
|
|
2016
2151
|
#client;
|
|
@@ -2085,15 +2220,41 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2085
2220
|
});
|
|
2086
2221
|
return true;
|
|
2087
2222
|
}
|
|
2223
|
+
async #findDescendantFrame(src, id) {
|
|
2224
|
+
if (!this.#page) {
|
|
2225
|
+
throw new Error("No valid page");
|
|
2226
|
+
}
|
|
2227
|
+
const frames = this.#page.frames();
|
|
2228
|
+
for (const frame of frames) {
|
|
2229
|
+
const url = frame.url();
|
|
2230
|
+
if (typeof src === "string" && src) {
|
|
2231
|
+
if (url.startsWith(src)) {
|
|
2232
|
+
return frame;
|
|
2233
|
+
} else if (url.toLowerCase().startsWith(src)) {
|
|
2234
|
+
return frame;
|
|
2235
|
+
}
|
|
2236
|
+
} else if (src instanceof RegExp) {
|
|
2237
|
+
if (url.match(src)) {
|
|
2238
|
+
return frame;
|
|
2239
|
+
}
|
|
2240
|
+
} else if (id) {
|
|
2241
|
+
const element = await frame.frameElement();
|
|
2242
|
+
if (element) {
|
|
2243
|
+
const frameId = await frame.evaluate((ele, attr) => ele.getAttribute(attr), element, "id");
|
|
2244
|
+
if (frameId === id) {
|
|
2245
|
+
return frame;
|
|
2246
|
+
}
|
|
2247
|
+
}
|
|
2248
|
+
}
|
|
2249
|
+
}
|
|
2250
|
+
return null;
|
|
2251
|
+
}
|
|
2088
2252
|
async #getChildFrame(parentFrame, iframeOption) {
|
|
2089
2253
|
if (!parentFrame) {
|
|
2090
2254
|
throw new Error("Invalid parent frame");
|
|
2091
2255
|
}
|
|
2092
2256
|
let iframe = null;
|
|
2093
|
-
let { src = ""
|
|
2094
|
-
if (!src && !selector) {
|
|
2095
|
-
throw new Error("Invalid IframeOption");
|
|
2096
|
-
}
|
|
2257
|
+
let { src = "" } = iframeOption;
|
|
2097
2258
|
if (src) {
|
|
2098
2259
|
const childFrames = parentFrame.childFrames();
|
|
2099
2260
|
for (const childFrame of childFrames) {
|
|
@@ -2111,7 +2272,8 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2111
2272
|
}
|
|
2112
2273
|
}
|
|
2113
2274
|
} else {
|
|
2114
|
-
const
|
|
2275
|
+
const frameSelector = getIframeSelector(iframeOption);
|
|
2276
|
+
const $eleIframe = await parentFrame.$(frameSelector);
|
|
2115
2277
|
if ($eleIframe) {
|
|
2116
2278
|
iframe = await $eleIframe.contentFrame();
|
|
2117
2279
|
return iframe;
|
|
@@ -2121,11 +2283,16 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2121
2283
|
}
|
|
2122
2284
|
async #getDescendantFrame(parentFrame, iframeOptions) {
|
|
2123
2285
|
let iframe = parentFrame;
|
|
2124
|
-
|
|
2125
|
-
|
|
2126
|
-
|
|
2286
|
+
if (iframeOptions.length === 1 && !iframeOptions[0].selector) {
|
|
2287
|
+
const { src = "", id = "" } = iframeOptions[0];
|
|
2288
|
+
iframe = await this.#findDescendantFrame(src, id);
|
|
2289
|
+
} else {
|
|
2290
|
+
for (const iframeOption of iframeOptions) {
|
|
2291
|
+
if (!iframe) {
|
|
2292
|
+
return null;
|
|
2293
|
+
}
|
|
2294
|
+
iframe = await this.#getChildFrame(iframe, iframeOption);
|
|
2127
2295
|
}
|
|
2128
|
-
iframe = await this.#getChildFrame(iframe, iframeOption);
|
|
2129
2296
|
}
|
|
2130
2297
|
return iframe;
|
|
2131
2298
|
}
|
|
@@ -2154,7 +2321,7 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2154
2321
|
}
|
|
2155
2322
|
return retObj;
|
|
2156
2323
|
} catch (err) {
|
|
2157
|
-
|
|
2324
|
+
loginfo(err);
|
|
2158
2325
|
return retObj;
|
|
2159
2326
|
}
|
|
2160
2327
|
}
|
|
@@ -2207,11 +2374,31 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2207
2374
|
const { browserIdx = 0, browserContextIdx = 0, pageIdx = 0, openType = "other", openTime = currentTime, lastStatusUpdateTime = currentTime, taskId = 0, relatedId = 0, misc = {} } = pageInfo ? pageInfo : {};
|
|
2208
2375
|
this.#page.pageInfo = { browserIdx, browserContextIdx, pageIdx, openType, openTime, lastStatusUpdateTime, taskId, relatedId, misc };
|
|
2209
2376
|
this.#pageId = `page-${browserIdx}-${browserContextIdx}-${pageIdx}`;
|
|
2377
|
+
this.#closeWhenFree = false;
|
|
2210
2378
|
this.#requestInterceptionNum = 0;
|
|
2211
2379
|
this.#responseInterceptionNum = 0;
|
|
2212
2380
|
this.#client = null;
|
|
2213
2381
|
this.#addPageOn();
|
|
2214
2382
|
}
|
|
2383
|
+
async addPreloadScript(scriptOrFunc, arg) {
|
|
2384
|
+
if (!this.#page) {
|
|
2385
|
+
throw new Error("No valid page");
|
|
2386
|
+
}
|
|
2387
|
+
if (typeof scriptOrFunc === "string") {
|
|
2388
|
+
await this.#page.evaluateOnNewDocument(scriptOrFunc);
|
|
2389
|
+
} else if (typeof scriptOrFunc === "function") {
|
|
2390
|
+
await this.#page.evaluateOnNewDocument(scriptOrFunc, arg);
|
|
2391
|
+
} else {
|
|
2392
|
+
throw new Error(`Invalid type of scriptOrFunc ${typeof scriptOrFunc}`);
|
|
2393
|
+
}
|
|
2394
|
+
return true;
|
|
2395
|
+
}
|
|
2396
|
+
async addScriptTag(options) {
|
|
2397
|
+
if (!this.#page) {
|
|
2398
|
+
throw new Error("No valid page");
|
|
2399
|
+
}
|
|
2400
|
+
return this.#page.addScriptTag(options);
|
|
2401
|
+
}
|
|
2215
2402
|
apiContext() {
|
|
2216
2403
|
throw new Error("Not supported in PuppeteerPage.");
|
|
2217
2404
|
}
|
|
@@ -2268,7 +2455,7 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2268
2455
|
}
|
|
2269
2456
|
async close() {
|
|
2270
2457
|
if (this.#status === "closed") {
|
|
2271
|
-
|
|
2458
|
+
logwarn(`Page ${this.#pageId} is already closed.`);
|
|
2272
2459
|
return true;
|
|
2273
2460
|
} else if (this.#status === "busy") {
|
|
2274
2461
|
throw new Error(`Page ${this.#pageId} cannot be closed because it is busy.`);
|
|
@@ -2281,6 +2468,9 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2281
2468
|
this.#status = "closed";
|
|
2282
2469
|
return true;
|
|
2283
2470
|
}
|
|
2471
|
+
closeWhenFree() {
|
|
2472
|
+
return this.#closeWhenFree;
|
|
2473
|
+
}
|
|
2284
2474
|
async content(iframeOptions = []) {
|
|
2285
2475
|
if (!this.#page) {
|
|
2286
2476
|
throw new Error("No valid page");
|
|
@@ -2309,11 +2499,18 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2309
2499
|
const height = await this.#page.evaluate(() => document.documentElement.scrollHeight);
|
|
2310
2500
|
return height;
|
|
2311
2501
|
}
|
|
2312
|
-
async
|
|
2502
|
+
async evaluate(func, args) {
|
|
2503
|
+
if (!this.#page) {
|
|
2504
|
+
throw new Error("No valid page");
|
|
2505
|
+
}
|
|
2506
|
+
return this.#page.evaluate(func, args);
|
|
2507
|
+
}
|
|
2508
|
+
async exposeFunction(name, callbackFunction) {
|
|
2313
2509
|
if (!this.#page) {
|
|
2314
2510
|
throw new Error("No valid page");
|
|
2315
2511
|
}
|
|
2316
|
-
|
|
2512
|
+
await this.#page.exposeFunction(name, callbackFunction);
|
|
2513
|
+
return;
|
|
2317
2514
|
}
|
|
2318
2515
|
async findElement(selectorOrXpath, iframeOptions = []) {
|
|
2319
2516
|
if (!this.#page) {
|
|
@@ -2351,7 +2548,7 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2351
2548
|
}
|
|
2352
2549
|
async free() {
|
|
2353
2550
|
if (this.#status === "free") {
|
|
2354
|
-
|
|
2551
|
+
logwarn(`Page ${this.#pageId} is already free.`);
|
|
2355
2552
|
}
|
|
2356
2553
|
this.#status = "free";
|
|
2357
2554
|
await this.clearRequestInterceptions();
|
|
@@ -2494,6 +2691,10 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2494
2691
|
}
|
|
2495
2692
|
return response;
|
|
2496
2693
|
}
|
|
2694
|
+
setCloseWhenFree(closeWhenFree) {
|
|
2695
|
+
this.#closeWhenFree = closeWhenFree;
|
|
2696
|
+
return true;
|
|
2697
|
+
}
|
|
2497
2698
|
async setCookies(cookies) {
|
|
2498
2699
|
if (!this.#page) {
|
|
2499
2700
|
throw new Error("No valid page");
|
|
@@ -2569,7 +2770,7 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2569
2770
|
}
|
|
2570
2771
|
const actOptions = Array.isArray(options) ? options : [options];
|
|
2571
2772
|
if (actOptions.length <= 0) {
|
|
2572
|
-
|
|
2773
|
+
logwarn("Invalid paras in setRequestInterception");
|
|
2573
2774
|
return false;
|
|
2574
2775
|
}
|
|
2575
2776
|
if (this.#requestInterceptionNum <= 0) {
|
|
@@ -2605,7 +2806,7 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2605
2806
|
await request.continue();
|
|
2606
2807
|
return true;
|
|
2607
2808
|
} catch (err) {
|
|
2608
|
-
|
|
2809
|
+
logerr(err);
|
|
2609
2810
|
return false;
|
|
2610
2811
|
}
|
|
2611
2812
|
});
|
|
@@ -2617,7 +2818,7 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2617
2818
|
}
|
|
2618
2819
|
const actOptions = Array.isArray(options) ? options : [options];
|
|
2619
2820
|
if (actOptions.length <= 0) {
|
|
2620
|
-
|
|
2821
|
+
logwarn("Invalid paras in setResponseInterception");
|
|
2621
2822
|
return false;
|
|
2622
2823
|
}
|
|
2623
2824
|
this.#responseInterceptionNum++;
|
|
@@ -2672,7 +2873,7 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2672
2873
|
}
|
|
2673
2874
|
return true;
|
|
2674
2875
|
} catch (err) {
|
|
2675
|
-
|
|
2876
|
+
logerr(err);
|
|
2676
2877
|
return false;
|
|
2677
2878
|
}
|
|
2678
2879
|
});
|
|
@@ -3332,6 +3533,22 @@ var CheerioElement = class _CheerioElement {
|
|
|
3332
3533
|
return Array.from(Object.keys(element.attribs));
|
|
3333
3534
|
}
|
|
3334
3535
|
}
|
|
3536
|
+
async dataset() {
|
|
3537
|
+
const attributeNames = await this.attributeNames();
|
|
3538
|
+
const dataset = {};
|
|
3539
|
+
for (const attributeName of attributeNames) {
|
|
3540
|
+
if (!attributeName.startsWith("data-")) {
|
|
3541
|
+
continue;
|
|
3542
|
+
}
|
|
3543
|
+
const val = await this.attribute(attributeName);
|
|
3544
|
+
const key = convertDataAttributeName(attributeName);
|
|
3545
|
+
dataset[key] = val;
|
|
3546
|
+
}
|
|
3547
|
+
return dataset;
|
|
3548
|
+
}
|
|
3549
|
+
async evaluate() {
|
|
3550
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3551
|
+
}
|
|
3335
3552
|
#findNodes(selector, absolute) {
|
|
3336
3553
|
if (selector.startsWith("./") || selector.startsWith("/")) {
|
|
3337
3554
|
throw new Error("Do not support XPath in cheerio.");
|
|
@@ -3452,6 +3669,12 @@ var CheerioPage = class extends import_node_events7.default {
|
|
|
3452
3669
|
_origPage() {
|
|
3453
3670
|
throw new Error("Method not implemented.");
|
|
3454
3671
|
}
|
|
3672
|
+
async addPreloadScript() {
|
|
3673
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3674
|
+
}
|
|
3675
|
+
addScriptTag() {
|
|
3676
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3677
|
+
}
|
|
3455
3678
|
apiContext() {
|
|
3456
3679
|
throw new Error("Not supported in CheerioPage.");
|
|
3457
3680
|
}
|
|
@@ -3479,13 +3702,19 @@ var CheerioPage = class extends import_node_events7.default {
|
|
|
3479
3702
|
async close() {
|
|
3480
3703
|
throw new Error("Not supported in CheerioPage.");
|
|
3481
3704
|
}
|
|
3705
|
+
closeWhenFree() {
|
|
3706
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3707
|
+
}
|
|
3482
3708
|
async content() {
|
|
3483
3709
|
throw new Error("Not supported in CheerioPage.");
|
|
3484
3710
|
}
|
|
3485
3711
|
async cookies() {
|
|
3486
3712
|
throw new Error("Not supported in CheerioPage.");
|
|
3487
3713
|
}
|
|
3488
|
-
async
|
|
3714
|
+
async evaluate() {
|
|
3715
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3716
|
+
}
|
|
3717
|
+
exposeFunction() {
|
|
3489
3718
|
throw new Error("Not supported in CheerioPage.");
|
|
3490
3719
|
}
|
|
3491
3720
|
#findNodes(selector) {
|
|
@@ -3582,6 +3811,9 @@ var CheerioPage = class extends import_node_events7.default {
|
|
|
3582
3811
|
async sendCDPMessage() {
|
|
3583
3812
|
throw new Error("Method not implemented.");
|
|
3584
3813
|
}
|
|
3814
|
+
setCloseWhenFree() {
|
|
3815
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3816
|
+
}
|
|
3585
3817
|
async setCookies() {
|
|
3586
3818
|
throw new Error("Not supported in CheerioPage.");
|
|
3587
3819
|
}
|
|
@@ -3919,5 +4151,6 @@ var controller = new LsdBrowserController();
|
|
|
3919
4151
|
PuppeteerBrowserContext,
|
|
3920
4152
|
PuppeteerElement,
|
|
3921
4153
|
PuppeteerPage,
|
|
3922
|
-
controller
|
|
4154
|
+
controller,
|
|
4155
|
+
setControllerLogFun
|
|
3923
4156
|
});
|