@letsscrapedata/controller 0.0.51 → 0.0.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +279 -56
- package/dist/index.d.cts +117 -10
- package/dist/index.d.ts +117 -10
- package/dist/index.js +279 -56
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -31,6 +31,32 @@ import { getCurrentUnixTime as getCurrentUnixTime2, sleep } from "@letsscrapedat
|
|
|
31
31
|
import EventEmitter from "events";
|
|
32
32
|
import { getCurrentUnixTime, unreachable as unreachable2 } from "@letsscrapedata/utils";
|
|
33
33
|
|
|
34
|
+
// src/utils/common.ts
|
|
35
|
+
function convertDataAttributeName(attr) {
|
|
36
|
+
if (!attr.startsWith("data-")) {
|
|
37
|
+
return "";
|
|
38
|
+
}
|
|
39
|
+
const parts = attr.split("-");
|
|
40
|
+
let name = parts[1];
|
|
41
|
+
for (const part of parts.slice(2)) {
|
|
42
|
+
if (!part) {
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
name = `${name}${part[1].toUpperCase()}${part.slice(1).toLowerCase()}`;
|
|
46
|
+
}
|
|
47
|
+
return name;
|
|
48
|
+
}
|
|
49
|
+
function getIframeSelector(iframeOption) {
|
|
50
|
+
const { src = "", id = "", selector = "" } = iframeOption;
|
|
51
|
+
if (typeof src === "string" && src) {
|
|
52
|
+
return `iframe[src^="${src}"]`;
|
|
53
|
+
} else if (typeof id === "string" && id) {
|
|
54
|
+
return `iframe[id="${id}"]`;
|
|
55
|
+
} else {
|
|
56
|
+
return selector;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
34
60
|
// src/playwright/element.ts
|
|
35
61
|
import { unreachable } from "@letsscrapedata/utils";
|
|
36
62
|
var PlaywrightElement = class _PlaywrightElement {
|
|
@@ -51,6 +77,29 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
51
77
|
const names = await this.#locator.evaluate((node) => node.getAttributeNames());
|
|
52
78
|
return names;
|
|
53
79
|
}
|
|
80
|
+
async dataset() {
|
|
81
|
+
try {
|
|
82
|
+
const dataset = await this.#locator.evaluate((node) => node.dataset);
|
|
83
|
+
return dataset;
|
|
84
|
+
} catch (err) {
|
|
85
|
+
return {};
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
async evaluate(func, args) {
|
|
89
|
+
try {
|
|
90
|
+
const frame = this.#frame;
|
|
91
|
+
;
|
|
92
|
+
if (typeof frame.parentFrame === "function") {
|
|
93
|
+
return await frame.evaluate(func, args);
|
|
94
|
+
} else {
|
|
95
|
+
const locator = this.#frame.owner();
|
|
96
|
+
return await locator.evaluate(func, args);
|
|
97
|
+
}
|
|
98
|
+
} catch (err) {
|
|
99
|
+
logerr(err);
|
|
100
|
+
return "";
|
|
101
|
+
}
|
|
102
|
+
}
|
|
54
103
|
/*
|
|
55
104
|
async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
|
|
56
105
|
if (!parentFrame) {
|
|
@@ -84,22 +133,15 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
84
133
|
return null;
|
|
85
134
|
}
|
|
86
135
|
*/
|
|
87
|
-
#getIframeSelector(iframeOption) {
|
|
88
|
-
const { src = "", selector = "" } = iframeOption;
|
|
89
|
-
if (!src && !selector) {
|
|
90
|
-
throw new Error("Invalid parent frame");
|
|
91
|
-
}
|
|
92
|
-
return selector ? selector : `iframe[src^="${src}"]`;
|
|
93
|
-
}
|
|
94
136
|
async #getChildFrameLocator(parent, iframeOption) {
|
|
95
|
-
return parent.frameLocator(
|
|
137
|
+
return parent.frameLocator(getIframeSelector(iframeOption));
|
|
96
138
|
}
|
|
97
139
|
async #getDescendantFrame(parent, iframeOptions) {
|
|
98
140
|
try {
|
|
99
141
|
if (iframeOptions.length <= 0) {
|
|
100
142
|
return null;
|
|
101
143
|
}
|
|
102
|
-
let frameLocator = parent.frameLocator(
|
|
144
|
+
let frameLocator = parent.frameLocator(getIframeSelector(iframeOptions[0]));
|
|
103
145
|
for (const iframeOption of iframeOptions.slice(1)) {
|
|
104
146
|
if (!frameLocator) {
|
|
105
147
|
return null;
|
|
@@ -116,12 +158,12 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
116
158
|
let frame = this.#frame;
|
|
117
159
|
const retObj = { frame, locators: [] };
|
|
118
160
|
if (iframeOptions.length > 0) {
|
|
119
|
-
|
|
120
|
-
if (!
|
|
161
|
+
const childFrame = await this.#getDescendantFrame(frame, iframeOptions);
|
|
162
|
+
if (!childFrame) {
|
|
121
163
|
return retObj;
|
|
122
164
|
}
|
|
123
|
-
retObj.frame =
|
|
124
|
-
parent =
|
|
165
|
+
retObj.frame = childFrame;
|
|
166
|
+
parent = childFrame;
|
|
125
167
|
}
|
|
126
168
|
try {
|
|
127
169
|
let locators = [];
|
|
@@ -306,6 +348,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
306
348
|
#page;
|
|
307
349
|
#status;
|
|
308
350
|
#pageId;
|
|
351
|
+
#closeWhenFree;
|
|
309
352
|
#resquestInterceptionOptions;
|
|
310
353
|
#responseInterceptionOptions;
|
|
311
354
|
#client;
|
|
@@ -436,29 +479,57 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
436
479
|
return null;
|
|
437
480
|
}
|
|
438
481
|
*/
|
|
439
|
-
#
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
482
|
+
async #findDescendantFrame(src, id) {
|
|
483
|
+
if (!this.#page) {
|
|
484
|
+
throw new Error("No valid page");
|
|
485
|
+
}
|
|
486
|
+
const frames = this.#page.frames();
|
|
487
|
+
for (const frame of frames) {
|
|
488
|
+
const url = frame.url();
|
|
489
|
+
if (typeof src === "string" && src) {
|
|
490
|
+
if (url.startsWith(src)) {
|
|
491
|
+
return frame;
|
|
492
|
+
} else if (url.toLowerCase().startsWith(src)) {
|
|
493
|
+
return frame;
|
|
494
|
+
}
|
|
495
|
+
} else if (src instanceof RegExp) {
|
|
496
|
+
if (url.match(src)) {
|
|
497
|
+
return frame;
|
|
498
|
+
}
|
|
499
|
+
} else if (id) {
|
|
500
|
+
const element = await frame.frameElement();
|
|
501
|
+
if (element) {
|
|
502
|
+
const frameId = await frame.evaluate(([ele, attr]) => ele.getAttribute(attr), [element, "id"]);
|
|
503
|
+
if (frameId === id) {
|
|
504
|
+
return frame;
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
}
|
|
443
508
|
}
|
|
444
|
-
return
|
|
509
|
+
return null;
|
|
445
510
|
}
|
|
446
511
|
async #getChildFrameLocator(parent, iframeOption) {
|
|
447
|
-
return parent.frameLocator(
|
|
512
|
+
return parent.frameLocator(getIframeSelector(iframeOption));
|
|
448
513
|
}
|
|
449
|
-
async #
|
|
514
|
+
async #getDescendantFrame(mainFrame, iframeOptions) {
|
|
450
515
|
try {
|
|
451
516
|
if (iframeOptions.length <= 0) {
|
|
452
517
|
return null;
|
|
453
518
|
}
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
519
|
+
if (iframeOptions.length === 1 && !iframeOptions[0].selector) {
|
|
520
|
+
const { src = "", id = "" } = iframeOptions[0];
|
|
521
|
+
const frame = await this.#findDescendantFrame(src, id);
|
|
522
|
+
return frame;
|
|
523
|
+
} else {
|
|
524
|
+
let frameLocator = mainFrame.frameLocator(getIframeSelector(iframeOptions[0]));
|
|
525
|
+
for (const iframeOption of iframeOptions.slice(1)) {
|
|
526
|
+
if (!frameLocator) {
|
|
527
|
+
return null;
|
|
528
|
+
}
|
|
529
|
+
frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
|
|
458
530
|
}
|
|
459
|
-
|
|
531
|
+
return frameLocator;
|
|
460
532
|
}
|
|
461
|
-
return frameLocator;
|
|
462
533
|
} catch (err) {
|
|
463
534
|
throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
|
|
464
535
|
}
|
|
@@ -470,7 +541,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
470
541
|
let frame = this.#page.mainFrame();
|
|
471
542
|
const retObj = { frame, locators: [] };
|
|
472
543
|
if (iframeOptions.length > 0) {
|
|
473
|
-
frame = await this.#
|
|
544
|
+
frame = await this.#getDescendantFrame(frame, iframeOptions);
|
|
474
545
|
if (!frame) {
|
|
475
546
|
return retObj;
|
|
476
547
|
}
|
|
@@ -543,12 +614,32 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
543
614
|
const { browserIdx = 0, browserContextIdx = 0, pageIdx = 0, openType = "other", openTime = currentTime, lastStatusUpdateTime = currentTime, taskId = 0, relatedId = 0, misc = {} } = pageInfo ? pageInfo : {};
|
|
544
615
|
this.#page.pageInfo = { browserIdx, browserContextIdx, pageIdx, openType, openTime, lastStatusUpdateTime, taskId, relatedId, misc };
|
|
545
616
|
this.#pageId = `page-${browserIdx}-${browserContextIdx}-${pageIdx}`;
|
|
617
|
+
this.#closeWhenFree = false;
|
|
546
618
|
this.#resquestInterceptionOptions = [];
|
|
547
619
|
this.#responseInterceptionOptions = [];
|
|
548
620
|
this.#client = null;
|
|
549
621
|
this.#responseCb = null;
|
|
550
622
|
this.#addPageOn();
|
|
551
623
|
}
|
|
624
|
+
async addPreloadScript(scriptOrFunc, arg) {
|
|
625
|
+
if (!this.#page) {
|
|
626
|
+
throw new Error("No valid page");
|
|
627
|
+
}
|
|
628
|
+
if (typeof scriptOrFunc === "string") {
|
|
629
|
+
await this.#page.addInitScript({ content: scriptOrFunc });
|
|
630
|
+
} else if (typeof scriptOrFunc === "function") {
|
|
631
|
+
await this.#page.addInitScript(scriptOrFunc, arg);
|
|
632
|
+
} else {
|
|
633
|
+
throw new Error(`Invalid type of scriptOrFunc ${typeof scriptOrFunc}`);
|
|
634
|
+
}
|
|
635
|
+
return true;
|
|
636
|
+
}
|
|
637
|
+
async addScriptTag(options) {
|
|
638
|
+
if (!this.#page) {
|
|
639
|
+
throw new Error("No valid page");
|
|
640
|
+
}
|
|
641
|
+
return this.#page.addScriptTag(options);
|
|
642
|
+
}
|
|
552
643
|
apiContext() {
|
|
553
644
|
return this.browserContext().apiContext();
|
|
554
645
|
}
|
|
@@ -608,7 +699,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
608
699
|
}
|
|
609
700
|
async close() {
|
|
610
701
|
if (this.#status === "closed") {
|
|
611
|
-
|
|
702
|
+
logwarn(`Page ${this.#pageId} is already closed.`);
|
|
612
703
|
return true;
|
|
613
704
|
} else if (this.#status === "busy") {
|
|
614
705
|
throw new Error(`Page ${this.#pageId} cannot be closed because it is busy.`);
|
|
@@ -621,13 +712,16 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
621
712
|
this.#status = "closed";
|
|
622
713
|
return true;
|
|
623
714
|
}
|
|
715
|
+
closeWhenFree() {
|
|
716
|
+
return this.#closeWhenFree;
|
|
717
|
+
}
|
|
624
718
|
async content(iframeOptions = []) {
|
|
625
719
|
if (!this.#page) {
|
|
626
720
|
throw new Error("No valid page");
|
|
627
721
|
}
|
|
628
722
|
let content = "";
|
|
629
723
|
if (iframeOptions.length > 0) {
|
|
630
|
-
const frameLocator = await this.#
|
|
724
|
+
const frameLocator = await this.#getDescendantFrame(this.#page.mainFrame(), iframeOptions);
|
|
631
725
|
if (frameLocator) {
|
|
632
726
|
content = await frameLocator.locator(":root").evaluate(() => document.documentElement.outerHTML);
|
|
633
727
|
}
|
|
@@ -649,11 +743,18 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
649
743
|
const height = await this.#page.evaluate(() => document.documentElement.scrollHeight);
|
|
650
744
|
return height;
|
|
651
745
|
}
|
|
652
|
-
async
|
|
746
|
+
async evaluate(func, args) {
|
|
653
747
|
if (!this.#page) {
|
|
654
748
|
throw new Error("No valid page");
|
|
655
749
|
}
|
|
656
|
-
return this.#page.evaluate(
|
|
750
|
+
return this.#page.evaluate(func, args);
|
|
751
|
+
}
|
|
752
|
+
async exposeFunction(name, callbackFunction) {
|
|
753
|
+
if (!this.#page) {
|
|
754
|
+
throw new Error("No valid page");
|
|
755
|
+
}
|
|
756
|
+
await this.#page.exposeFunction(name, callbackFunction);
|
|
757
|
+
return;
|
|
657
758
|
}
|
|
658
759
|
async findElement(selectorOrXpath, iframeOptions = []) {
|
|
659
760
|
if (!this.#page) {
|
|
@@ -691,7 +792,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
691
792
|
}
|
|
692
793
|
async free() {
|
|
693
794
|
if (this.#status === "free") {
|
|
694
|
-
|
|
795
|
+
logwarn(`Page ${this.#pageId} is already free.`);
|
|
695
796
|
}
|
|
696
797
|
this.#status = "free";
|
|
697
798
|
await this.clearRequestInterceptions();
|
|
@@ -831,6 +932,10 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
831
932
|
}
|
|
832
933
|
return response;
|
|
833
934
|
}
|
|
935
|
+
setCloseWhenFree(closeWhenFree) {
|
|
936
|
+
this.#closeWhenFree = closeWhenFree;
|
|
937
|
+
return true;
|
|
938
|
+
}
|
|
834
939
|
async setCookies(cookies) {
|
|
835
940
|
if (!this.#page) {
|
|
836
941
|
throw new Error("No valid page");
|
|
@@ -1014,7 +1119,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
1014
1119
|
}
|
|
1015
1120
|
const actOptions = Array.isArray(options) ? options : [options];
|
|
1016
1121
|
if (actOptions.length <= 0) {
|
|
1017
|
-
|
|
1122
|
+
logwarn("Invalid paras in setResponseInterception");
|
|
1018
1123
|
return false;
|
|
1019
1124
|
}
|
|
1020
1125
|
const firstResponseInterception = this.#responseInterceptionOptions.length <= 0;
|
|
@@ -1716,6 +1821,26 @@ var PuppeteerElement = class _PuppeteerElement {
|
|
|
1716
1821
|
const names = await this.#frame.evaluate((ele) => ele.getAttributeNames(), this.#$ele);
|
|
1717
1822
|
return names;
|
|
1718
1823
|
}
|
|
1824
|
+
async dataset() {
|
|
1825
|
+
try {
|
|
1826
|
+
const attributeNames = await this.attributeNames();
|
|
1827
|
+
const dataset = {};
|
|
1828
|
+
for (const attributeName of attributeNames) {
|
|
1829
|
+
if (!attributeName.startsWith("data-")) {
|
|
1830
|
+
continue;
|
|
1831
|
+
}
|
|
1832
|
+
const val = await this.attribute(attributeName);
|
|
1833
|
+
const key = convertDataAttributeName(attributeName);
|
|
1834
|
+
dataset[key] = val;
|
|
1835
|
+
}
|
|
1836
|
+
return dataset;
|
|
1837
|
+
} catch (err) {
|
|
1838
|
+
return {};
|
|
1839
|
+
}
|
|
1840
|
+
}
|
|
1841
|
+
async evaluate(func, args) {
|
|
1842
|
+
return await this.#frame.evaluate(func, args);
|
|
1843
|
+
}
|
|
1719
1844
|
async #getChildFrame(parentFrame, iframeOption) {
|
|
1720
1845
|
if (!parentFrame) {
|
|
1721
1846
|
throw new Error("Invalid parent frame");
|
|
@@ -1765,13 +1890,13 @@ var PuppeteerElement = class _PuppeteerElement {
|
|
|
1765
1890
|
let frame = this.#frame;
|
|
1766
1891
|
const retObj = { frame, elementHandles: [] };
|
|
1767
1892
|
if (iframeOptions.length > 0) {
|
|
1768
|
-
|
|
1769
|
-
if (!
|
|
1893
|
+
const childFrame = await this.#getDescendantFrame(frame, iframeOptions);
|
|
1894
|
+
if (!childFrame) {
|
|
1770
1895
|
return retObj;
|
|
1771
1896
|
}
|
|
1772
|
-
retObj.frame =
|
|
1897
|
+
retObj.frame = childFrame;
|
|
1773
1898
|
absolute = true;
|
|
1774
|
-
parent =
|
|
1899
|
+
parent = childFrame;
|
|
1775
1900
|
}
|
|
1776
1901
|
try {
|
|
1777
1902
|
if (selector.startsWith("./") || selector.startsWith("/") || selector.startsWith("..")) {
|
|
@@ -1973,6 +2098,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
1973
2098
|
#page;
|
|
1974
2099
|
#status;
|
|
1975
2100
|
#pageId;
|
|
2101
|
+
#closeWhenFree;
|
|
1976
2102
|
#requestInterceptionNum;
|
|
1977
2103
|
#responseInterceptionNum;
|
|
1978
2104
|
#client;
|
|
@@ -2047,15 +2173,41 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2047
2173
|
});
|
|
2048
2174
|
return true;
|
|
2049
2175
|
}
|
|
2176
|
+
async #findDescendantFrame(src, id) {
|
|
2177
|
+
if (!this.#page) {
|
|
2178
|
+
throw new Error("No valid page");
|
|
2179
|
+
}
|
|
2180
|
+
const frames = this.#page.frames();
|
|
2181
|
+
for (const frame of frames) {
|
|
2182
|
+
const url = frame.url();
|
|
2183
|
+
if (typeof src === "string" && src) {
|
|
2184
|
+
if (url.startsWith(src)) {
|
|
2185
|
+
return frame;
|
|
2186
|
+
} else if (url.toLowerCase().startsWith(src)) {
|
|
2187
|
+
return frame;
|
|
2188
|
+
}
|
|
2189
|
+
} else if (src instanceof RegExp) {
|
|
2190
|
+
if (url.match(src)) {
|
|
2191
|
+
return frame;
|
|
2192
|
+
}
|
|
2193
|
+
} else if (id) {
|
|
2194
|
+
const element = await frame.frameElement();
|
|
2195
|
+
if (element) {
|
|
2196
|
+
const frameId = await frame.evaluate((ele, attr) => ele.getAttribute(attr), element, "id");
|
|
2197
|
+
if (frameId === id) {
|
|
2198
|
+
return frame;
|
|
2199
|
+
}
|
|
2200
|
+
}
|
|
2201
|
+
}
|
|
2202
|
+
}
|
|
2203
|
+
return null;
|
|
2204
|
+
}
|
|
2050
2205
|
async #getChildFrame(parentFrame, iframeOption) {
|
|
2051
2206
|
if (!parentFrame) {
|
|
2052
2207
|
throw new Error("Invalid parent frame");
|
|
2053
2208
|
}
|
|
2054
2209
|
let iframe = null;
|
|
2055
|
-
let { src = ""
|
|
2056
|
-
if (!src && !selector) {
|
|
2057
|
-
throw new Error("Invalid IframeOption");
|
|
2058
|
-
}
|
|
2210
|
+
let { src = "" } = iframeOption;
|
|
2059
2211
|
if (src) {
|
|
2060
2212
|
const childFrames = parentFrame.childFrames();
|
|
2061
2213
|
for (const childFrame of childFrames) {
|
|
@@ -2073,7 +2225,8 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2073
2225
|
}
|
|
2074
2226
|
}
|
|
2075
2227
|
} else {
|
|
2076
|
-
const
|
|
2228
|
+
const frameSelector = getIframeSelector(iframeOption);
|
|
2229
|
+
const $eleIframe = await parentFrame.$(frameSelector);
|
|
2077
2230
|
if ($eleIframe) {
|
|
2078
2231
|
iframe = await $eleIframe.contentFrame();
|
|
2079
2232
|
return iframe;
|
|
@@ -2083,11 +2236,16 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2083
2236
|
}
|
|
2084
2237
|
async #getDescendantFrame(parentFrame, iframeOptions) {
|
|
2085
2238
|
let iframe = parentFrame;
|
|
2086
|
-
|
|
2087
|
-
|
|
2088
|
-
|
|
2239
|
+
if (iframeOptions.length === 1 && !iframeOptions[0].selector) {
|
|
2240
|
+
const { src = "", id = "" } = iframeOptions[0];
|
|
2241
|
+
iframe = await this.#findDescendantFrame(src, id);
|
|
2242
|
+
} else {
|
|
2243
|
+
for (const iframeOption of iframeOptions) {
|
|
2244
|
+
if (!iframe) {
|
|
2245
|
+
return null;
|
|
2246
|
+
}
|
|
2247
|
+
iframe = await this.#getChildFrame(iframe, iframeOption);
|
|
2089
2248
|
}
|
|
2090
|
-
iframe = await this.#getChildFrame(iframe, iframeOption);
|
|
2091
2249
|
}
|
|
2092
2250
|
return iframe;
|
|
2093
2251
|
}
|
|
@@ -2116,7 +2274,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2116
2274
|
}
|
|
2117
2275
|
return retObj;
|
|
2118
2276
|
} catch (err) {
|
|
2119
|
-
|
|
2277
|
+
loginfo(err);
|
|
2120
2278
|
return retObj;
|
|
2121
2279
|
}
|
|
2122
2280
|
}
|
|
@@ -2169,11 +2327,31 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2169
2327
|
const { browserIdx = 0, browserContextIdx = 0, pageIdx = 0, openType = "other", openTime = currentTime, lastStatusUpdateTime = currentTime, taskId = 0, relatedId = 0, misc = {} } = pageInfo ? pageInfo : {};
|
|
2170
2328
|
this.#page.pageInfo = { browserIdx, browserContextIdx, pageIdx, openType, openTime, lastStatusUpdateTime, taskId, relatedId, misc };
|
|
2171
2329
|
this.#pageId = `page-${browserIdx}-${browserContextIdx}-${pageIdx}`;
|
|
2330
|
+
this.#closeWhenFree = false;
|
|
2172
2331
|
this.#requestInterceptionNum = 0;
|
|
2173
2332
|
this.#responseInterceptionNum = 0;
|
|
2174
2333
|
this.#client = null;
|
|
2175
2334
|
this.#addPageOn();
|
|
2176
2335
|
}
|
|
2336
|
+
async addPreloadScript(scriptOrFunc, arg) {
|
|
2337
|
+
if (!this.#page) {
|
|
2338
|
+
throw new Error("No valid page");
|
|
2339
|
+
}
|
|
2340
|
+
if (typeof scriptOrFunc === "string") {
|
|
2341
|
+
await this.#page.evaluateOnNewDocument(scriptOrFunc);
|
|
2342
|
+
} else if (typeof scriptOrFunc === "function") {
|
|
2343
|
+
await this.#page.evaluateOnNewDocument(scriptOrFunc, arg);
|
|
2344
|
+
} else {
|
|
2345
|
+
throw new Error(`Invalid type of scriptOrFunc ${typeof scriptOrFunc}`);
|
|
2346
|
+
}
|
|
2347
|
+
return true;
|
|
2348
|
+
}
|
|
2349
|
+
async addScriptTag(options) {
|
|
2350
|
+
if (!this.#page) {
|
|
2351
|
+
throw new Error("No valid page");
|
|
2352
|
+
}
|
|
2353
|
+
return this.#page.addScriptTag(options);
|
|
2354
|
+
}
|
|
2177
2355
|
apiContext() {
|
|
2178
2356
|
throw new Error("Not supported in PuppeteerPage.");
|
|
2179
2357
|
}
|
|
@@ -2230,7 +2408,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2230
2408
|
}
|
|
2231
2409
|
async close() {
|
|
2232
2410
|
if (this.#status === "closed") {
|
|
2233
|
-
|
|
2411
|
+
logwarn(`Page ${this.#pageId} is already closed.`);
|
|
2234
2412
|
return true;
|
|
2235
2413
|
} else if (this.#status === "busy") {
|
|
2236
2414
|
throw new Error(`Page ${this.#pageId} cannot be closed because it is busy.`);
|
|
@@ -2243,6 +2421,9 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2243
2421
|
this.#status = "closed";
|
|
2244
2422
|
return true;
|
|
2245
2423
|
}
|
|
2424
|
+
closeWhenFree() {
|
|
2425
|
+
return this.#closeWhenFree;
|
|
2426
|
+
}
|
|
2246
2427
|
async content(iframeOptions = []) {
|
|
2247
2428
|
if (!this.#page) {
|
|
2248
2429
|
throw new Error("No valid page");
|
|
@@ -2271,11 +2452,18 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2271
2452
|
const height = await this.#page.evaluate(() => document.documentElement.scrollHeight);
|
|
2272
2453
|
return height;
|
|
2273
2454
|
}
|
|
2274
|
-
async
|
|
2455
|
+
async evaluate(func, args) {
|
|
2456
|
+
if (!this.#page) {
|
|
2457
|
+
throw new Error("No valid page");
|
|
2458
|
+
}
|
|
2459
|
+
return this.#page.evaluate(func, args);
|
|
2460
|
+
}
|
|
2461
|
+
async exposeFunction(name, callbackFunction) {
|
|
2275
2462
|
if (!this.#page) {
|
|
2276
2463
|
throw new Error("No valid page");
|
|
2277
2464
|
}
|
|
2278
|
-
|
|
2465
|
+
await this.#page.exposeFunction(name, callbackFunction);
|
|
2466
|
+
return;
|
|
2279
2467
|
}
|
|
2280
2468
|
async findElement(selectorOrXpath, iframeOptions = []) {
|
|
2281
2469
|
if (!this.#page) {
|
|
@@ -2313,7 +2501,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2313
2501
|
}
|
|
2314
2502
|
async free() {
|
|
2315
2503
|
if (this.#status === "free") {
|
|
2316
|
-
|
|
2504
|
+
logwarn(`Page ${this.#pageId} is already free.`);
|
|
2317
2505
|
}
|
|
2318
2506
|
this.#status = "free";
|
|
2319
2507
|
await this.clearRequestInterceptions();
|
|
@@ -2456,6 +2644,10 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2456
2644
|
}
|
|
2457
2645
|
return response;
|
|
2458
2646
|
}
|
|
2647
|
+
setCloseWhenFree(closeWhenFree) {
|
|
2648
|
+
this.#closeWhenFree = closeWhenFree;
|
|
2649
|
+
return true;
|
|
2650
|
+
}
|
|
2459
2651
|
async setCookies(cookies) {
|
|
2460
2652
|
if (!this.#page) {
|
|
2461
2653
|
throw new Error("No valid page");
|
|
@@ -2531,7 +2723,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2531
2723
|
}
|
|
2532
2724
|
const actOptions = Array.isArray(options) ? options : [options];
|
|
2533
2725
|
if (actOptions.length <= 0) {
|
|
2534
|
-
|
|
2726
|
+
logwarn("Invalid paras in setRequestInterception");
|
|
2535
2727
|
return false;
|
|
2536
2728
|
}
|
|
2537
2729
|
if (this.#requestInterceptionNum <= 0) {
|
|
@@ -2567,7 +2759,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2567
2759
|
await request.continue();
|
|
2568
2760
|
return true;
|
|
2569
2761
|
} catch (err) {
|
|
2570
|
-
|
|
2762
|
+
logerr(err);
|
|
2571
2763
|
return false;
|
|
2572
2764
|
}
|
|
2573
2765
|
});
|
|
@@ -2579,7 +2771,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2579
2771
|
}
|
|
2580
2772
|
const actOptions = Array.isArray(options) ? options : [options];
|
|
2581
2773
|
if (actOptions.length <= 0) {
|
|
2582
|
-
|
|
2774
|
+
logwarn("Invalid paras in setResponseInterception");
|
|
2583
2775
|
return false;
|
|
2584
2776
|
}
|
|
2585
2777
|
this.#responseInterceptionNum++;
|
|
@@ -2634,7 +2826,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2634
2826
|
}
|
|
2635
2827
|
return true;
|
|
2636
2828
|
} catch (err) {
|
|
2637
|
-
|
|
2829
|
+
logerr(err);
|
|
2638
2830
|
return false;
|
|
2639
2831
|
}
|
|
2640
2832
|
});
|
|
@@ -3294,6 +3486,22 @@ var CheerioElement = class _CheerioElement {
|
|
|
3294
3486
|
return Array.from(Object.keys(element.attribs));
|
|
3295
3487
|
}
|
|
3296
3488
|
}
|
|
3489
|
+
async dataset() {
|
|
3490
|
+
const attributeNames = await this.attributeNames();
|
|
3491
|
+
const dataset = {};
|
|
3492
|
+
for (const attributeName of attributeNames) {
|
|
3493
|
+
if (!attributeName.startsWith("data-")) {
|
|
3494
|
+
continue;
|
|
3495
|
+
}
|
|
3496
|
+
const val = await this.attribute(attributeName);
|
|
3497
|
+
const key = convertDataAttributeName(attributeName);
|
|
3498
|
+
dataset[key] = val;
|
|
3499
|
+
}
|
|
3500
|
+
return dataset;
|
|
3501
|
+
}
|
|
3502
|
+
async evaluate() {
|
|
3503
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3504
|
+
}
|
|
3297
3505
|
#findNodes(selector, absolute) {
|
|
3298
3506
|
if (selector.startsWith("./") || selector.startsWith("/")) {
|
|
3299
3507
|
throw new Error("Do not support XPath in cheerio.");
|
|
@@ -3414,6 +3622,12 @@ var CheerioPage = class extends EventEmitter7 {
|
|
|
3414
3622
|
_origPage() {
|
|
3415
3623
|
throw new Error("Method not implemented.");
|
|
3416
3624
|
}
|
|
3625
|
+
async addPreloadScript() {
|
|
3626
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3627
|
+
}
|
|
3628
|
+
addScriptTag() {
|
|
3629
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3630
|
+
}
|
|
3417
3631
|
apiContext() {
|
|
3418
3632
|
throw new Error("Not supported in CheerioPage.");
|
|
3419
3633
|
}
|
|
@@ -3441,13 +3655,19 @@ var CheerioPage = class extends EventEmitter7 {
|
|
|
3441
3655
|
async close() {
|
|
3442
3656
|
throw new Error("Not supported in CheerioPage.");
|
|
3443
3657
|
}
|
|
3658
|
+
closeWhenFree() {
|
|
3659
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3660
|
+
}
|
|
3444
3661
|
async content() {
|
|
3445
3662
|
throw new Error("Not supported in CheerioPage.");
|
|
3446
3663
|
}
|
|
3447
3664
|
async cookies() {
|
|
3448
3665
|
throw new Error("Not supported in CheerioPage.");
|
|
3449
3666
|
}
|
|
3450
|
-
async
|
|
3667
|
+
async evaluate() {
|
|
3668
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3669
|
+
}
|
|
3670
|
+
exposeFunction() {
|
|
3451
3671
|
throw new Error("Not supported in CheerioPage.");
|
|
3452
3672
|
}
|
|
3453
3673
|
#findNodes(selector) {
|
|
@@ -3544,6 +3764,9 @@ var CheerioPage = class extends EventEmitter7 {
|
|
|
3544
3764
|
async sendCDPMessage() {
|
|
3545
3765
|
throw new Error("Method not implemented.");
|
|
3546
3766
|
}
|
|
3767
|
+
setCloseWhenFree() {
|
|
3768
|
+
throw new Error("Not supported in CheerioPage.");
|
|
3769
|
+
}
|
|
3547
3770
|
async setCookies() {
|
|
3548
3771
|
throw new Error("Not supported in CheerioPage.");
|
|
3549
3772
|
}
|
package/package.json
CHANGED