@letsscrapedata/controller 0.0.50 → 0.0.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +299 -66
- package/dist/index.d.cts +120 -10
- package/dist/index.d.ts +120 -10
- package/dist/index.js +297 -65
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
// src/playwright/browser.ts
|
|
2
|
-
import EventEmitter3 from "events";
|
|
3
|
-
import { getCurrentUnixTime as getCurrentUnixTime3, getPerformanceOfPidTree } from "@letsscrapedata/utils";
|
|
4
|
-
|
|
5
|
-
// src/playwright/context.ts
|
|
6
|
-
import EventEmitter2 from "events";
|
|
7
|
-
import { getCurrentUnixTime as getCurrentUnixTime2, sleep } from "@letsscrapedata/utils";
|
|
8
|
-
|
|
9
1
|
// src/utils/log.ts
|
|
10
2
|
import { log, LogLevel } from "@letsscrapedata/utils";
|
|
11
3
|
var pkgLog = log;
|
|
4
|
+
function setControllerLogFun(logFun) {
|
|
5
|
+
if (typeof logFun === "function") {
|
|
6
|
+
pkgLog = logFun;
|
|
7
|
+
return true;
|
|
8
|
+
} else {
|
|
9
|
+
return false;
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
12
|
async function loginfo(...args) {
|
|
13
13
|
await pkgLog(LogLevel.INF, ...args);
|
|
14
14
|
}
|
|
@@ -19,10 +19,44 @@ async function logerr(...args) {
|
|
|
19
19
|
await pkgLog(LogLevel.ERR, ...args);
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
+
// src/playwright/browser.ts
|
|
23
|
+
import EventEmitter3 from "events";
|
|
24
|
+
import { getCurrentUnixTime as getCurrentUnixTime3, getPerformanceOfPidTree } from "@letsscrapedata/utils";
|
|
25
|
+
|
|
26
|
+
// src/playwright/context.ts
|
|
27
|
+
import EventEmitter2 from "events";
|
|
28
|
+
import { getCurrentUnixTime as getCurrentUnixTime2, sleep } from "@letsscrapedata/utils";
|
|
29
|
+
|
|
22
30
|
// src/playwright/page.ts
|
|
23
31
|
import EventEmitter from "events";
|
|
24
32
|
import { getCurrentUnixTime, unreachable as unreachable2 } from "@letsscrapedata/utils";
|
|
25
33
|
|
|
34
|
+
// src/utils/common.ts
|
|
35
|
+
function convertDataAttributeName(attr) {
|
|
36
|
+
if (!attr.startsWith("data-")) {
|
|
37
|
+
return "";
|
|
38
|
+
}
|
|
39
|
+
const parts = attr.split("-");
|
|
40
|
+
let name = parts[1];
|
|
41
|
+
for (const part of parts.slice(2)) {
|
|
42
|
+
if (!part) {
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
name = `${name}${part[1].toUpperCase()}${part.slice(1).toLowerCase()}`;
|
|
46
|
+
}
|
|
47
|
+
return name;
|
|
48
|
+
}
|
|
49
|
+
function getIframeSelector(iframeOption) {
|
|
50
|
+
const { src = "", id = "", selector = "" } = iframeOption;
|
|
51
|
+
if (typeof src === "string" && src) {
|
|
52
|
+
return `iframe[src^="${src}"]`;
|
|
53
|
+
} else if (typeof id === "string" && id) {
|
|
54
|
+
return `iframe[id="${id}"]`;
|
|
55
|
+
} else {
|
|
56
|
+
return selector;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
26
60
|
// src/playwright/element.ts
|
|
27
61
|
import { unreachable } from "@letsscrapedata/utils";
|
|
28
62
|
var PlaywrightElement = class _PlaywrightElement {
|
|
@@ -43,6 +77,29 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
43
77
|
const names = await this.#locator.evaluate((node) => node.getAttributeNames());
|
|
44
78
|
return names;
|
|
45
79
|
}
|
|
80
|
+
async dataset() {
|
|
81
|
+
try {
|
|
82
|
+
const dataset = await this.#locator.evaluate((node) => node.dataset);
|
|
83
|
+
return dataset;
|
|
84
|
+
} catch (err) {
|
|
85
|
+
return {};
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
async evaluate(func, args) {
|
|
89
|
+
try {
|
|
90
|
+
const frame = this.#frame;
|
|
91
|
+
;
|
|
92
|
+
if (typeof frame.parentFrame === "function") {
|
|
93
|
+
return await frame.evaluate(func, args);
|
|
94
|
+
} else {
|
|
95
|
+
const locator = this.#frame.owner();
|
|
96
|
+
return await locator.evaluate(func, args);
|
|
97
|
+
}
|
|
98
|
+
} catch (err) {
|
|
99
|
+
logerr(err);
|
|
100
|
+
return "";
|
|
101
|
+
}
|
|
102
|
+
}
|
|
46
103
|
/*
|
|
47
104
|
async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
|
|
48
105
|
if (!parentFrame) {
|
|
@@ -76,22 +133,15 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
76
133
|
return null;
|
|
77
134
|
}
|
|
78
135
|
*/
|
|
79
|
-
#getIframeSelector(iframeOption) {
|
|
80
|
-
const { src = "", selector = "" } = iframeOption;
|
|
81
|
-
if (!src && !selector) {
|
|
82
|
-
throw new Error("Invalid parent frame");
|
|
83
|
-
}
|
|
84
|
-
return selector ? selector : `iframe[src^="${src}"]`;
|
|
85
|
-
}
|
|
86
136
|
async #getChildFrameLocator(parent, iframeOption) {
|
|
87
|
-
return parent.frameLocator(
|
|
137
|
+
return parent.frameLocator(getIframeSelector(iframeOption));
|
|
88
138
|
}
|
|
89
139
|
async #getDescendantFrame(parent, iframeOptions) {
|
|
90
140
|
try {
|
|
91
141
|
if (iframeOptions.length <= 0) {
|
|
92
142
|
return null;
|
|
93
143
|
}
|
|
94
|
-
let frameLocator = parent.frameLocator(
|
|
144
|
+
let frameLocator = parent.frameLocator(getIframeSelector(iframeOptions[0]));
|
|
95
145
|
for (const iframeOption of iframeOptions.slice(1)) {
|
|
96
146
|
if (!frameLocator) {
|
|
97
147
|
return null;
|
|
@@ -108,12 +158,12 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
108
158
|
let frame = this.#frame;
|
|
109
159
|
const retObj = { frame, locators: [] };
|
|
110
160
|
if (iframeOptions.length > 0) {
|
|
111
|
-
|
|
112
|
-
if (!
|
|
161
|
+
const childFrame = await this.#getDescendantFrame(frame, iframeOptions);
|
|
162
|
+
if (!childFrame) {
|
|
113
163
|
return retObj;
|
|
114
164
|
}
|
|
115
|
-
retObj.frame =
|
|
116
|
-
parent =
|
|
165
|
+
retObj.frame = childFrame;
|
|
166
|
+
parent = childFrame;
|
|
117
167
|
}
|
|
118
168
|
try {
|
|
119
169
|
let locators = [];
|
|
@@ -298,6 +348,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
298
348
|
#page;
|
|
299
349
|
#status;
|
|
300
350
|
#pageId;
|
|
351
|
+
#closeWhenFree;
|
|
301
352
|
#resquestInterceptionOptions;
|
|
302
353
|
#responseInterceptionOptions;
|
|
303
354
|
#client;
|
|
@@ -428,29 +479,57 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
428
479
|
return null;
|
|
429
480
|
}
|
|
430
481
|
*/
|
|
431
|
-
#
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
throw new Error("Invalid parent frame");
|
|
482
|
+
async #findDescendantFrame(src, id) {
|
|
483
|
+
if (!this.#page) {
|
|
484
|
+
throw new Error("No valid page");
|
|
435
485
|
}
|
|
436
|
-
|
|
486
|
+
const frames = this.#page.frames();
|
|
487
|
+
for (const frame of frames) {
|
|
488
|
+
const url = frame.url();
|
|
489
|
+
if (typeof src === "string" && src) {
|
|
490
|
+
if (url.startsWith(src)) {
|
|
491
|
+
return frame;
|
|
492
|
+
} else if (url.toLowerCase().startsWith(src)) {
|
|
493
|
+
return frame;
|
|
494
|
+
}
|
|
495
|
+
} else if (src instanceof RegExp) {
|
|
496
|
+
if (url.match(src)) {
|
|
497
|
+
return frame;
|
|
498
|
+
}
|
|
499
|
+
} else if (id) {
|
|
500
|
+
const element = await frame.frameElement();
|
|
501
|
+
if (element) {
|
|
502
|
+
const frameId = await frame.evaluate(([ele, attr]) => ele.getAttribute(attr), [element, "id"]);
|
|
503
|
+
if (frameId === id) {
|
|
504
|
+
return frame;
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
return null;
|
|
437
510
|
}
|
|
438
511
|
async #getChildFrameLocator(parent, iframeOption) {
|
|
439
|
-
return parent.frameLocator(
|
|
512
|
+
return parent.frameLocator(getIframeSelector(iframeOption));
|
|
440
513
|
}
|
|
441
|
-
async #
|
|
514
|
+
async #getDescendantFrame(mainFrame, iframeOptions) {
|
|
442
515
|
try {
|
|
443
516
|
if (iframeOptions.length <= 0) {
|
|
444
517
|
return null;
|
|
445
518
|
}
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
519
|
+
if (iframeOptions.length === 1 && !iframeOptions[0].selector) {
|
|
520
|
+
const { src = "", id = "" } = iframeOptions[0];
|
|
521
|
+
const frame = await this.#findDescendantFrame(src, id);
|
|
522
|
+
return frame;
|
|
523
|
+
} else {
|
|
524
|
+
let frameLocator = mainFrame.frameLocator(getIframeSelector(iframeOptions[0]));
|
|
525
|
+
for (const iframeOption of iframeOptions.slice(1)) {
|
|
526
|
+
if (!frameLocator) {
|
|
527
|
+
return null;
|
|
528
|
+
}
|
|
529
|
+
frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
|
|
450
530
|
}
|
|
451
|
-
|
|
531
|
+
return frameLocator;
|
|
452
532
|
}
|
|
453
|
-
return frameLocator;
|
|
454
533
|
} catch (err) {
|
|
455
534
|
throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
|
|
456
535
|
}
|
|
@@ -462,7 +541,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
462
541
|
let frame = this.#page.mainFrame();
|
|
463
542
|
const retObj = { frame, locators: [] };
|
|
464
543
|
if (iframeOptions.length > 0) {
|
|
465
|
-
frame = await this.#
|
|
544
|
+
frame = await this.#getDescendantFrame(frame, iframeOptions);
|
|
466
545
|
if (!frame) {
|
|
467
546
|
return retObj;
|
|
468
547
|
}
|
|
@@ -535,12 +614,32 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
535
614
|
const { browserIdx = 0, browserContextIdx = 0, pageIdx = 0, openType = "other", openTime = currentTime, lastStatusUpdateTime = currentTime, taskId = 0, relatedId = 0, misc = {} } = pageInfo ? pageInfo : {};
|
|
536
615
|
this.#page.pageInfo = { browserIdx, browserContextIdx, pageIdx, openType, openTime, lastStatusUpdateTime, taskId, relatedId, misc };
|
|
537
616
|
this.#pageId = `page-${browserIdx}-${browserContextIdx}-${pageIdx}`;
|
|
617
|
+
this.#closeWhenFree = false;
|
|
538
618
|
this.#resquestInterceptionOptions = [];
|
|
539
619
|
this.#responseInterceptionOptions = [];
|
|
540
620
|
this.#client = null;
|
|
541
621
|
this.#responseCb = null;
|
|
542
622
|
this.#addPageOn();
|
|
543
623
|
}
|
|
624
|
+
async addPreloadScript(scriptOrFunc, arg) {
|
|
625
|
+
if (!this.#page) {
|
|
626
|
+
throw new Error("No valid page");
|
|
627
|
+
}
|
|
628
|
+
if (typeof scriptOrFunc === "string") {
|
|
629
|
+
await this.#page.addInitScript({ content: scriptOrFunc });
|
|
630
|
+
} else if (typeof scriptOrFunc === "function") {
|
|
631
|
+
await this.#page.addInitScript(scriptOrFunc, arg);
|
|
632
|
+
} else {
|
|
633
|
+
throw new Error(`Invalid type of scriptOrFunc ${typeof scriptOrFunc}`);
|
|
634
|
+
}
|
|
635
|
+
return true;
|
|
636
|
+
}
|
|
637
|
+
async addScriptTag(options) {
|
|
638
|
+
if (!this.#page) {
|
|
639
|
+
throw new Error("No valid page");
|
|
640
|
+
}
|
|
641
|
+
return this.#page.addScriptTag(options);
|
|
642
|
+
}
|
|
544
643
|
apiContext() {
|
|
545
644
|
return this.browserContext().apiContext();
|
|
546
645
|
}
|
|
@@ -600,7 +699,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
600
699
|
}
|
|
601
700
|
async close() {
|
|
602
701
|
if (this.#status === "closed") {
|
|
603
|
-
|
|
702
|
+
logwarn(`Page ${this.#pageId} is already closed.`);
|
|
604
703
|
return true;
|
|
605
704
|
} else if (this.#status === "busy") {
|
|
606
705
|
throw new Error(`Page ${this.#pageId} cannot be closed because it is busy.`);
|
|
@@ -613,13 +712,16 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
613
712
|
this.#status = "closed";
|
|
614
713
|
return true;
|
|
615
714
|
}
|
|
715
|
+
closeWhenFree() {
|
|
716
|
+
return this.#closeWhenFree;
|
|
717
|
+
}
|
|
616
718
|
async content(iframeOptions = []) {
|
|
617
719
|
if (!this.#page) {
|
|
618
720
|
throw new Error("No valid page");
|
|
619
721
|
}
|
|
620
722
|
let content = "";
|
|
621
723
|
if (iframeOptions.length > 0) {
|
|
622
|
-
const frameLocator = await this.#
|
|
724
|
+
const frameLocator = await this.#getDescendantFrame(this.#page.mainFrame(), iframeOptions);
|
|
623
725
|
if (frameLocator) {
|
|
624
726
|
content = await frameLocator.locator(":root").evaluate(() => document.documentElement.outerHTML);
|
|
625
727
|
}
|
|
@@ -641,11 +743,18 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
641
743
|
const height = await this.#page.evaluate(() => document.documentElement.scrollHeight);
|
|
642
744
|
return height;
|
|
643
745
|
}
|
|
644
|
-
async
|
|
746
|
+
async evaluate(func, args) {
|
|
747
|
+
if (!this.#page) {
|
|
748
|
+
throw new Error("No valid page");
|
|
749
|
+
}
|
|
750
|
+
return this.#page.evaluate(func, args);
|
|
751
|
+
}
|
|
752
|
+
async exposeFunction(name, callbackFunction) {
|
|
645
753
|
if (!this.#page) {
|
|
646
754
|
throw new Error("No valid page");
|
|
647
755
|
}
|
|
648
|
-
|
|
756
|
+
await this.#page.exposeFunction(name, callbackFunction);
|
|
757
|
+
return;
|
|
649
758
|
}
|
|
650
759
|
async findElement(selectorOrXpath, iframeOptions = []) {
|
|
651
760
|
if (!this.#page) {
|
|
@@ -683,7 +792,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
683
792
|
}
|
|
684
793
|
async free() {
|
|
685
794
|
if (this.#status === "free") {
|
|
686
|
-
|
|
795
|
+
logwarn(`Page ${this.#pageId} is already free.`);
|
|
687
796
|
}
|
|
688
797
|
this.#status = "free";
|
|
689
798
|
await this.clearRequestInterceptions();
|
|
@@ -823,6 +932,10 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
823
932
|
}
|
|
824
933
|
return response;
|
|
825
934
|
}
|
|
935
|
+
setCloseWhenFree(closeWhenFree) {
|
|
936
|
+
this.#closeWhenFree = closeWhenFree;
|
|
937
|
+
return true;
|
|
938
|
+
}
|
|
826
939
|
async setCookies(cookies) {
|
|
827
940
|
if (!this.#page) {
|
|
828
941
|
throw new Error("No valid page");
|
|
@@ -1006,7 +1119,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
1006
1119
|
}
|
|
1007
1120
|
const actOptions = Array.isArray(options) ? options : [options];
|
|
1008
1121
|
if (actOptions.length <= 0) {
|
|
1009
|
-
|
|
1122
|
+
logwarn("Invalid paras in setResponseInterception");
|
|
1010
1123
|
return false;
|
|
1011
1124
|
}
|
|
1012
1125
|
const firstResponseInterception = this.#responseInterceptionOptions.length <= 0;
|
|
@@ -1708,6 +1821,26 @@ var PuppeteerElement = class _PuppeteerElement {
|
|
|
1708
1821
|
const names = await this.#frame.evaluate((ele) => ele.getAttributeNames(), this.#$ele);
|
|
1709
1822
|
return names;
|
|
1710
1823
|
}
|
|
1824
|
+
async dataset() {
|
|
1825
|
+
try {
|
|
1826
|
+
const attributeNames = await this.attributeNames();
|
|
1827
|
+
const dataset = {};
|
|
1828
|
+
for (const attributeName of attributeNames) {
|
|
1829
|
+
if (!attributeName.startsWith("data-")) {
|
|
1830
|
+
continue;
|
|
1831
|
+
}
|
|
1832
|
+
const val = await this.attribute(attributeName);
|
|
1833
|
+
const key = convertDataAttributeName(attributeName);
|
|
1834
|
+
dataset[key] = val;
|
|
1835
|
+
}
|
|
1836
|
+
return dataset;
|
|
1837
|
+
} catch (err) {
|
|
1838
|
+
return {};
|
|
1839
|
+
}
|
|
1840
|
+
}
|
|
1841
|
+
async evaluate(func, args) {
|
|
1842
|
+
return await this.#frame.evaluate(func, args);
|
|
1843
|
+
}
|
|
1711
1844
|
async #getChildFrame(parentFrame, iframeOption) {
|
|
1712
1845
|
if (!parentFrame) {
|
|
1713
1846
|
throw new Error("Invalid parent frame");
|
|
@@ -1757,13 +1890,13 @@ var PuppeteerElement = class _PuppeteerElement {
|
|
|
1757
1890
|
let frame = this.#frame;
|
|
1758
1891
|
const retObj = { frame, elementHandles: [] };
|
|
1759
1892
|
if (iframeOptions.length > 0) {
|
|
1760
|
-
|
|
1761
|
-
if (!
|
|
1893
|
+
const childFrame = await this.#getDescendantFrame(frame, iframeOptions);
|
|
1894
|
+
if (!childFrame) {
|
|
1762
1895
|
return retObj;
|
|
1763
1896
|
}
|
|
1764
|
-
retObj.frame =
|
|
1897
|
+
retObj.frame = childFrame;
|
|
1765
1898
|
absolute = true;
|
|
1766
|
-
parent =
|
|
1899
|
+
parent = childFrame;
|
|
1767
1900
|
}
|
|
1768
1901
|
try {
|
|
1769
1902
|
if (selector.startsWith("./") || selector.startsWith("/") || selector.startsWith("..")) {
|
|
@@ -1965,6 +2098,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
1965
2098
|
#page;
|
|
1966
2099
|
#status;
|
|
1967
2100
|
#pageId;
|
|
2101
|
+
#closeWhenFree;
|
|
1968
2102
|
#requestInterceptionNum;
|
|
1969
2103
|
#responseInterceptionNum;
|
|
1970
2104
|
#client;
|
|
@@ -2039,15 +2173,41 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2039
2173
|
});
|
|
2040
2174
|
return true;
|
|
2041
2175
|
}
|
|
2176
|
+
async #findDescendantFrame(src, id) {
|
|
2177
|
+
if (!this.#page) {
|
|
2178
|
+
throw new Error("No valid page");
|
|
2179
|
+
}
|
|
2180
|
+
const frames = this.#page.frames();
|
|
2181
|
+
for (const frame of frames) {
|
|
2182
|
+
const url = frame.url();
|
|
2183
|
+
if (typeof src === "string" && src) {
|
|
2184
|
+
if (url.startsWith(src)) {
|
|
2185
|
+
return frame;
|
|
2186
|
+
} else if (url.toLowerCase().startsWith(src)) {
|
|
2187
|
+
return frame;
|
|
2188
|
+
}
|
|
2189
|
+
} else if (src instanceof RegExp) {
|
|
2190
|
+
if (url.match(src)) {
|
|
2191
|
+
return frame;
|
|
2192
|
+
}
|
|
2193
|
+
} else if (id) {
|
|
2194
|
+
const element = await frame.frameElement();
|
|
2195
|
+
if (element) {
|
|
2196
|
+
const frameId = await frame.evaluate((ele, attr) => ele.getAttribute(attr), element, "id");
|
|
2197
|
+
if (frameId === id) {
|
|
2198
|
+
return frame;
|
|
2199
|
+
}
|
|
2200
|
+
}
|
|
2201
|
+
}
|
|
2202
|
+
}
|
|
2203
|
+
return null;
|
|
2204
|
+
}
|
|
2042
2205
|
async #getChildFrame(parentFrame, iframeOption) {
|
|
2043
2206
|
if (!parentFrame) {
|
|
2044
2207
|
throw new Error("Invalid parent frame");
|
|
2045
2208
|
}
|
|
2046
2209
|
let iframe = null;
|
|
2047
|
-
let { src = ""
|
|
2048
|
-
if (!src && !selector) {
|
|
2049
|
-
throw new Error("Invalid IframeOption");
|
|
2050
|
-
}
|
|
2210
|
+
let { src = "" } = iframeOption;
|
|
2051
2211
|
if (src) {
|
|
2052
2212
|
const childFrames = parentFrame.childFrames();
|
|
2053
2213
|
for (const childFrame of childFrames) {
|
|
@@ -2065,7 +2225,8 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2065
2225
|
}
|
|
2066
2226
|
}
|
|
2067
2227
|
} else {
|
|
2068
|
-
const
|
|
2228
|
+
const frameSelector = getIframeSelector(iframeOption);
|
|
2229
|
+
const $eleIframe = await parentFrame.$(frameSelector);
|
|
2069
2230
|
if ($eleIframe) {
|
|
2070
2231
|
iframe = await $eleIframe.contentFrame();
|
|
2071
2232
|
return iframe;
|
|
@@ -2075,11 +2236,16 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2075
2236
|
}
|
|
2076
2237
|
async #getDescendantFrame(parentFrame, iframeOptions) {
|
|
2077
2238
|
let iframe = parentFrame;
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2239
|
+
if (iframeOptions.length === 1 && !iframeOptions[0].selector) {
|
|
2240
|
+
const { src = "", id = "" } = iframeOptions[0];
|
|
2241
|
+
iframe = await this.#findDescendantFrame(src, id);
|
|
2242
|
+
} else {
|
|
2243
|
+
for (const iframeOption of iframeOptions) {
|
|
2244
|
+
if (!iframe) {
|
|
2245
|
+
return null;
|
|
2246
|
+
}
|
|
2247
|
+
iframe = await this.#getChildFrame(iframe, iframeOption);
|
|
2081
2248
|
}
|
|
2082
|
-
iframe = await this.#getChildFrame(iframe, iframeOption);
|
|
2083
2249
|
}
|
|
2084
2250
|
return iframe;
|
|
2085
2251
|
}
|
|
@@ -2108,7 +2274,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2108
2274
|
}
|
|
2109
2275
|
return retObj;
|
|
2110
2276
|
} catch (err) {
|
|
2111
|
-
|
|
2277
|
+
loginfo(err);
|
|
2112
2278
|
return retObj;
|
|
2113
2279
|
}
|
|
2114
2280
|
}
|
|
@@ -2161,11 +2327,31 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2161
2327
|
const { browserIdx = 0, browserContextIdx = 0, pageIdx = 0, openType = "other", openTime = currentTime, lastStatusUpdateTime = currentTime, taskId = 0, relatedId = 0, misc = {} } = pageInfo ? pageInfo : {};
|
|
2162
2328
|
this.#page.pageInfo = { browserIdx, browserContextIdx, pageIdx, openType, openTime, lastStatusUpdateTime, taskId, relatedId, misc };
|
|
2163
2329
|
this.#pageId = `page-${browserIdx}-${browserContextIdx}-${pageIdx}`;
|
|
2330
|
+
this.#closeWhenFree = false;
|
|
2164
2331
|
this.#requestInterceptionNum = 0;
|
|
2165
2332
|
this.#responseInterceptionNum = 0;
|
|
2166
2333
|
this.#client = null;
|
|
2167
2334
|
this.#addPageOn();
|
|
2168
2335
|
}
|
|
2336
|
+
async addPreloadScript(scriptOrFunc, arg) {
|
|
2337
|
+
if (!this.#page) {
|
|
2338
|
+
throw new Error("No valid page");
|
|
2339
|
+
}
|
|
2340
|
+
if (typeof scriptOrFunc === "string") {
|
|
2341
|
+
await this.#page.evaluateOnNewDocument(scriptOrFunc);
|
|
2342
|
+
} else if (typeof scriptOrFunc === "function") {
|
|
2343
|
+
await this.#page.evaluateOnNewDocument(scriptOrFunc, arg);
|
|
2344
|
+
} else {
|
|
2345
|
+
throw new Error(`Invalid type of scriptOrFunc ${typeof scriptOrFunc}`);
|
|
2346
|
+
}
|
|
2347
|
+
return true;
|
|
2348
|
+
}
|
|
2349
|
+
async addScriptTag(options) {
|
|
2350
|
+
if (!this.#page) {
|
|
2351
|
+
throw new Error("No valid page");
|
|
2352
|
+
}
|
|
2353
|
+
return this.#page.addScriptTag(options);
|
|
2354
|
+
}
|
|
2169
2355
|
apiContext() {
|
|
2170
2356
|
throw new Error("Not supported in PuppeteerPage.");
|
|
2171
2357
|
}
|
|
@@ -2222,7 +2408,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2222
2408
|
}
|
|
2223
2409
|
async close() {
|
|
2224
2410
|
if (this.#status === "closed") {
|
|
2225
|
-
|
|
2411
|
+
logwarn(`Page ${this.#pageId} is already closed.`);
|
|
2226
2412
|
return true;
|
|
2227
2413
|
} else if (this.#status === "busy") {
|
|
2228
2414
|
throw new Error(`Page ${this.#pageId} cannot be closed because it is busy.`);
|
|
@@ -2235,6 +2421,9 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2235
2421
|
this.#status = "closed";
|
|
2236
2422
|
return true;
|
|
2237
2423
|
}
|
|
2424
|
+
closeWhenFree() {
|
|
2425
|
+
return this.#closeWhenFree;
|
|
2426
|
+
}
|
|
2238
2427
|
async content(iframeOptions = []) {
|
|
2239
2428
|
if (!this.#page) {
|
|
2240
2429
|
throw new Error("No valid page");
|
|
@@ -2263,11 +2452,18 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2263
2452
|
const height = await this.#page.evaluate(() => document.documentElement.scrollHeight);
|
|
2264
2453
|
return height;
|
|
2265
2454
|
}
|
|
2266
|
-
async
|
|
2455
|
+
async evaluate(func, args) {
|
|
2456
|
+
if (!this.#page) {
|
|
2457
|
+
throw new Error("No valid page");
|
|
2458
|
+
}
|
|
2459
|
+
return this.#page.evaluate(func, args);
|
|
2460
|
+
}
|
|
2461
|
+
async exposeFunction(name, callbackFunction) {
|
|
2267
2462
|
if (!this.#page) {
|
|
2268
2463
|
throw new Error("No valid page");
|
|
2269
2464
|
}
|
|
2270
|
-
|
|
2465
|
+
await this.#page.exposeFunction(name, callbackFunction);
|
|
2466
|
+
return;
|
|
2271
2467
|
}
|
|
2272
2468
|
async findElement(selectorOrXpath, iframeOptions = []) {
|
|
2273
2469
|
if (!this.#page) {
|
|
@@ -2305,7 +2501,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2305
2501
|
}
|
|
2306
2502
|
async free() {
|
|
2307
2503
|
if (this.#status === "free") {
|
|
2308
|
-
|
|
2504
|
+
logwarn(`Page ${this.#pageId} is already free.`);
|
|
2309
2505
|
}
|
|
2310
2506
|
this.#status = "free";
|
|
2311
2507
|
await this.clearRequestInterceptions();
|
|
@@ -2448,6 +2644,10 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2448
2644
|
}
|
|
2449
2645
|
return response;
|
|
2450
2646
|
}
|
|
2647
|
+
setCloseWhenFree(closeWhenFree) {
|
|
2648
|
+
this.#closeWhenFree = closeWhenFree;
|
|
2649
|
+
return true;
|
|
2650
|
+
}
|
|
2451
2651
|
async setCookies(cookies) {
|
|
2452
2652
|
if (!this.#page) {
|
|
2453
2653
|
throw new Error("No valid page");
|
|
@@ -2523,7 +2723,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2523
2723
|
}
|
|
2524
2724
|
const actOptions = Array.isArray(options) ? options : [options];
|
|
2525
2725
|
if (actOptions.length <= 0) {
|
|
2526
|
-
|
|
2726
|
+
logwarn("Invalid paras in setRequestInterception");
|
|
2527
2727
|
return false;
|
|
2528
2728
|
}
|
|
2529
2729
|
if (this.#requestInterceptionNum <= 0) {
|
|
@@ -2559,7 +2759,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2559
2759
|
await request.continue();
|
|
2560
2760
|
return true;
|
|
2561
2761
|
} catch (err) {
|
|
2562
|
-
|
|
2762
|
+
logerr(err);
|
|
2563
2763
|
return false;
|
|
2564
2764
|
}
|
|
2565
2765
|
});
|
|
@@ -2571,7 +2771,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2571
2771
|
}
|
|
2572
2772
|
const actOptions = Array.isArray(options) ? options : [options];
|
|
2573
2773
|
if (actOptions.length <= 0) {
|
|
2574
|
-
|
|
2774
|
+
logwarn("Invalid paras in setResponseInterception");
|
|
2575
2775
|
return false;
|
|
2576
2776
|
}
|
|
2577
2777
|
this.#responseInterceptionNum++;
|
|
@@ -2626,7 +2826,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2626
2826
|
}
|
|
2627
2827
|
return true;
|
|
2628
2828
|
} catch (err) {
|
|
2629
|
-
|
|
2829
|
+
logerr(err);
|
|
2630
2830
|
return false;
|
|
2631
2831
|
}
|
|
2632
2832
|
});
|
|
@@ -3286,6 +3486,22 @@ var CheerioElement = class _CheerioElement {
|
|
|
3286
3486
|
return Array.from(Object.keys(element.attribs));
|
|
3287
3487
|
}
|
|
3288
3488
|
}
|
|
3489
|
+
async dataset() {
|
|
3490
|
+
const attributeNames = await this.attributeNames();
|
|
3491
|
+
const dataset = {};
|
|
3492
|
+
for (const attributeName of attributeNames) {
|
|
3493
|
+
if (!attributeName.startsWith("data-")) {
|
|
3494
|
+
continue;
|
|
3495
|
+
}
|
|
3496
|
+
const val = await this.attribute(attributeName);
|
|
3497
|
+
const key = convertDataAttributeName(attributeName);
|
|
3498
|
+
dataset[key] = val;
|
|
3499
|
+
}
|
|
3500
|
+
return dataset;
|
|
3501
|
+
}
|
|
3502
|
+
async evaluate() {
|
|
3503
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3504
|
+
}
|
|
3289
3505
|
#findNodes(selector, absolute) {
|
|
3290
3506
|
if (selector.startsWith("./") || selector.startsWith("/")) {
|
|
3291
3507
|
throw new Error("Do not support XPath in cheerio.");
|
|
@@ -3406,6 +3622,12 @@ var CheerioPage = class extends EventEmitter7 {
|
|
|
3406
3622
|
_origPage() {
|
|
3407
3623
|
throw new Error("Method not implemented.");
|
|
3408
3624
|
}
|
|
3625
|
+
async addPreloadScript() {
|
|
3626
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3627
|
+
}
|
|
3628
|
+
addScriptTag() {
|
|
3629
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3630
|
+
}
|
|
3409
3631
|
apiContext() {
|
|
3410
3632
|
throw new Error("Not supported in CheerioPage.");
|
|
3411
3633
|
}
|
|
@@ -3433,13 +3655,19 @@ var CheerioPage = class extends EventEmitter7 {
|
|
|
3433
3655
|
async close() {
|
|
3434
3656
|
throw new Error("Not supported in CheerioPage.");
|
|
3435
3657
|
}
|
|
3658
|
+
closeWhenFree() {
|
|
3659
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3660
|
+
}
|
|
3436
3661
|
async content() {
|
|
3437
3662
|
throw new Error("Not supported in CheerioPage.");
|
|
3438
3663
|
}
|
|
3439
3664
|
async cookies() {
|
|
3440
3665
|
throw new Error("Not supported in CheerioPage.");
|
|
3441
3666
|
}
|
|
3442
|
-
async
|
|
3667
|
+
async evaluate() {
|
|
3668
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3669
|
+
}
|
|
3670
|
+
exposeFunction() {
|
|
3443
3671
|
throw new Error("Not supported in CheerioPage.");
|
|
3444
3672
|
}
|
|
3445
3673
|
#findNodes(selector) {
|
|
@@ -3536,6 +3764,9 @@ var CheerioPage = class extends EventEmitter7 {
|
|
|
3536
3764
|
async sendCDPMessage() {
|
|
3537
3765
|
throw new Error("Method not implemented.");
|
|
3538
3766
|
}
|
|
3767
|
+
setCloseWhenFree() {
|
|
3768
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3769
|
+
}
|
|
3539
3770
|
async setCookies() {
|
|
3540
3771
|
throw new Error("Not supported in CheerioPage.");
|
|
3541
3772
|
}
|
|
@@ -3872,5 +4103,6 @@ export {
|
|
|
3872
4103
|
PuppeteerBrowserContext,
|
|
3873
4104
|
PuppeteerElement,
|
|
3874
4105
|
PuppeteerPage,
|
|
3875
|
-
controller
|
|
4106
|
+
controller,
|
|
4107
|
+
setControllerLogFun
|
|
3876
4108
|
};
|
package/package.json
CHANGED