@letsscrapedata/controller 0.0.29 → 0.0.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +141 -65
- package/dist/index.d.cts +25 -7
- package/dist/index.d.ts +25 -7
- package/dist/index.js +170 -94
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -90,7 +90,7 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
90
90
|
#frame;
|
|
91
91
|
#locator;
|
|
92
92
|
constructor(locator, frame) {
|
|
93
|
-
if (!frame.
|
|
93
|
+
if (!frame.locator || !locator.click) {
|
|
94
94
|
throw new Error("Invalid paras in new PlaywrightElement");
|
|
95
95
|
}
|
|
96
96
|
this.#frame = frame;
|
|
@@ -104,41 +104,66 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
104
104
|
const names = await this.#locator.evaluate((node) => node.getAttributeNames());
|
|
105
105
|
return names;
|
|
106
106
|
}
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
107
|
+
/*
|
|
108
|
+
// 如果不存在指定的子iframe,则返回null
|
|
109
|
+
async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
|
|
110
|
+
if (!parentFrame) {
|
|
111
|
+
throw new Error("Invalid parent frame");
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
let { src = "" } = iframeOption;
|
|
115
|
+
if (!src) {
|
|
116
|
+
throw new Error("Invalid src in IframeOption");
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// src: use childFrames()
|
|
120
|
+
const childFrames = parentFrame.childFrames();
|
|
121
|
+
for (const childFrame of childFrames) {
|
|
122
|
+
const url = childFrame.url();
|
|
123
|
+
if (typeof src === "string") {
|
|
124
|
+
// src: string
|
|
125
|
+
if (url.startsWith(src)) {
|
|
126
|
+
return childFrame;
|
|
127
|
+
} else if (url.toLowerCase().startsWith(src)) {
|
|
128
|
+
return childFrame;
|
|
129
|
+
}
|
|
130
|
+
} else {
|
|
131
|
+
// src: RegExp
|
|
132
|
+
if (url.match(src)) {
|
|
133
|
+
return childFrame;
|
|
134
|
+
}
|
|
128
135
|
}
|
|
129
136
|
}
|
|
137
|
+
|
|
138
|
+
return null;
|
|
130
139
|
}
|
|
131
|
-
|
|
140
|
+
*/
|
|
141
|
+
#getIframeSelector(iframeOption) {
|
|
142
|
+
const { src = "", selector = "" } = iframeOption;
|
|
143
|
+
if (!src && !selector) {
|
|
144
|
+
throw new Error("Invalid parent frame");
|
|
145
|
+
}
|
|
146
|
+
return selector ? selector : `iframe[src^="${src}"]`;
|
|
132
147
|
}
|
|
133
|
-
async #
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
148
|
+
async #getChildFrameLocator(parent2, iframeOption) {
|
|
149
|
+
return parent2.frameLocator(this.#getIframeSelector(iframeOption));
|
|
150
|
+
}
|
|
151
|
+
async #getDescendantFrame(parent2, iframeOptions) {
|
|
152
|
+
try {
|
|
153
|
+
if (iframeOptions.length <= 0) {
|
|
137
154
|
return null;
|
|
138
155
|
}
|
|
139
|
-
|
|
156
|
+
let frameLocator = parent2.frameLocator(this.#getIframeSelector(iframeOptions[0]));
|
|
157
|
+
for (const iframeOption of iframeOptions.slice(1)) {
|
|
158
|
+
if (!frameLocator) {
|
|
159
|
+
return null;
|
|
160
|
+
}
|
|
161
|
+
frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
|
|
162
|
+
}
|
|
163
|
+
return frameLocator;
|
|
164
|
+
} catch (err) {
|
|
165
|
+
throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
|
|
140
166
|
}
|
|
141
|
-
return iframe;
|
|
142
167
|
}
|
|
143
168
|
async #findElementHandles(selector, absolute = false, iframeOptions = []) {
|
|
144
169
|
let parent2 = absolute ? this.#frame : this.#locator;
|
|
@@ -166,7 +191,7 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
166
191
|
retObj.locators = locators;
|
|
167
192
|
return retObj;
|
|
168
193
|
} catch (err) {
|
|
169
|
-
|
|
194
|
+
(0, import_utils.loginfo)(err);
|
|
170
195
|
return retObj;
|
|
171
196
|
}
|
|
172
197
|
}
|
|
@@ -423,41 +448,66 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
423
448
|
});
|
|
424
449
|
return true;
|
|
425
450
|
}
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
451
|
+
/*
|
|
452
|
+
// 如果不存在指定的子iframe,则返回null
|
|
453
|
+
async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
|
|
454
|
+
if (!parentFrame) {
|
|
455
|
+
throw new Error("Invalid parent frame");
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
let { src = "" } = iframeOption;
|
|
459
|
+
if (!src) {
|
|
460
|
+
throw new Error("Invalid src in IframeOption");
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// src: use childFrames()
|
|
464
|
+
const childFrames = parentFrame.childFrames();
|
|
465
|
+
for (const childFrame of childFrames) {
|
|
466
|
+
const url = childFrame.url();
|
|
467
|
+
if (typeof src === "string") {
|
|
468
|
+
// src: string
|
|
469
|
+
if (url.startsWith(src)) {
|
|
470
|
+
return childFrame;
|
|
471
|
+
} else if (url.toLowerCase().startsWith(src)) {
|
|
472
|
+
return childFrame;
|
|
473
|
+
}
|
|
474
|
+
} else {
|
|
475
|
+
// src: RegExp
|
|
476
|
+
if (url.match(src)) {
|
|
477
|
+
return childFrame;
|
|
478
|
+
}
|
|
447
479
|
}
|
|
448
480
|
}
|
|
481
|
+
|
|
482
|
+
return null;
|
|
449
483
|
}
|
|
450
|
-
|
|
484
|
+
*/
|
|
485
|
+
#getIframeSelector(iframeOption) {
|
|
486
|
+
const { src = "", selector = "" } = iframeOption;
|
|
487
|
+
if (!src && !selector) {
|
|
488
|
+
throw new Error("Invalid parent frame");
|
|
489
|
+
}
|
|
490
|
+
return selector ? selector : `iframe[src^="${src}"]`;
|
|
451
491
|
}
|
|
452
|
-
async #
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
492
|
+
async #getChildFrameLocator(parent2, iframeOption) {
|
|
493
|
+
return parent2.frameLocator(this.#getIframeSelector(iframeOption));
|
|
494
|
+
}
|
|
495
|
+
async #getDescendantFrameLocator(mainFrame, iframeOptions) {
|
|
496
|
+
try {
|
|
497
|
+
if (iframeOptions.length <= 0) {
|
|
456
498
|
return null;
|
|
457
499
|
}
|
|
458
|
-
|
|
500
|
+
let frameLocator = mainFrame.frameLocator(this.#getIframeSelector(iframeOptions[0]));
|
|
501
|
+
for (const iframeOption of iframeOptions.slice(1)) {
|
|
502
|
+
if (!frameLocator) {
|
|
503
|
+
return null;
|
|
504
|
+
}
|
|
505
|
+
frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
|
|
506
|
+
}
|
|
507
|
+
return frameLocator;
|
|
508
|
+
} catch (err) {
|
|
509
|
+
throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
|
|
459
510
|
}
|
|
460
|
-
return iframe;
|
|
461
511
|
}
|
|
462
512
|
async #findElementHandles(selector, iframeOptions = []) {
|
|
463
513
|
if (!this.#page) {
|
|
@@ -466,7 +516,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
466
516
|
let frame = this.#page.mainFrame();
|
|
467
517
|
const retObj = { frame, locators: [] };
|
|
468
518
|
if (iframeOptions.length > 0) {
|
|
469
|
-
frame = await this.#
|
|
519
|
+
frame = await this.#getDescendantFrameLocator(frame, iframeOptions);
|
|
470
520
|
if (!frame) {
|
|
471
521
|
return retObj;
|
|
472
522
|
}
|
|
@@ -486,7 +536,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
486
536
|
retObj.locators = locators;
|
|
487
537
|
return retObj;
|
|
488
538
|
} catch (err) {
|
|
489
|
-
|
|
539
|
+
(0, import_utils2.loginfo)(err);
|
|
490
540
|
return retObj;
|
|
491
541
|
}
|
|
492
542
|
}
|
|
@@ -545,6 +595,11 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
545
595
|
this.#responseCb = null;
|
|
546
596
|
this.#addPageOn();
|
|
547
597
|
}
|
|
598
|
+
apiRequestContext() {
|
|
599
|
+
const origBrowserContext = this.browserContext()._origBrowserContext();
|
|
600
|
+
const apiRequestContext = origBrowserContext.request;
|
|
601
|
+
return apiRequestContext;
|
|
602
|
+
}
|
|
548
603
|
async bringToFront() {
|
|
549
604
|
if (!this.#page) {
|
|
550
605
|
throw new Error("No valid page");
|
|
@@ -620,9 +675,9 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
620
675
|
}
|
|
621
676
|
let content = "";
|
|
622
677
|
if (iframeOptions.length > 0) {
|
|
623
|
-
const
|
|
624
|
-
if (
|
|
625
|
-
content = await
|
|
678
|
+
const frameLocator = await this.#getDescendantFrameLocator(this.#page.mainFrame(), iframeOptions);
|
|
679
|
+
if (frameLocator) {
|
|
680
|
+
content = await frameLocator.locator(":root").evaluate(() => document.documentElement.outerHTML);
|
|
626
681
|
}
|
|
627
682
|
} else {
|
|
628
683
|
content = await this.#page.content();
|
|
@@ -985,7 +1040,8 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
985
1040
|
(0, import_utils2.loginfo)(`##browser cache matched response: ${requestUrl}`);
|
|
986
1041
|
}
|
|
987
1042
|
if (typeof handler === "function") {
|
|
988
|
-
|
|
1043
|
+
const pageData = { pageUrl, cookies: "" };
|
|
1044
|
+
await handler(response, handlerOptions, pageData);
|
|
989
1045
|
}
|
|
990
1046
|
}
|
|
991
1047
|
return;
|
|
@@ -2026,6 +2082,9 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2026
2082
|
this.#client = null;
|
|
2027
2083
|
this.#addPageOn();
|
|
2028
2084
|
}
|
|
2085
|
+
apiRequestContext() {
|
|
2086
|
+
throw new Error("Not supported in PuppeteerPage.");
|
|
2087
|
+
}
|
|
2029
2088
|
async bringToFront() {
|
|
2030
2089
|
if (!this.#page) {
|
|
2031
2090
|
throw new Error("No valid page");
|
|
@@ -2465,7 +2524,14 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2465
2524
|
});
|
|
2466
2525
|
}
|
|
2467
2526
|
if (typeof handler === "function") {
|
|
2468
|
-
|
|
2527
|
+
const pageData = { pageUrl, cookies: "" };
|
|
2528
|
+
if (handlerOptions?.requestHeadersFlag) {
|
|
2529
|
+
const cookies = (await this.#page.cookies()).map((cookie) => {
|
|
2530
|
+
return `${cookie.name}=${cookie.value}`;
|
|
2531
|
+
}).join("; ");
|
|
2532
|
+
pageData.cookies = cookies;
|
|
2533
|
+
}
|
|
2534
|
+
await handler(response, handlerOptions, pageData);
|
|
2469
2535
|
}
|
|
2470
2536
|
}
|
|
2471
2537
|
return true;
|
|
@@ -17008,6 +17074,9 @@ var CheerioPage = class extends import_node_events7.default {
|
|
|
17008
17074
|
this.#document = load(html3, { xml: true }).root();
|
|
17009
17075
|
}
|
|
17010
17076
|
}
|
|
17077
|
+
apiRequestContext() {
|
|
17078
|
+
throw new Error("Not supported in CheerioPage.");
|
|
17079
|
+
}
|
|
17011
17080
|
async bringToFront() {
|
|
17012
17081
|
throw new Error("Not supported in CheerioPage.");
|
|
17013
17082
|
}
|
|
@@ -17350,6 +17419,9 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
17350
17419
|
args.push("--start-maximized");
|
|
17351
17420
|
launchOptions.defaultViewport = null;
|
|
17352
17421
|
}
|
|
17422
|
+
if (!args.includes("--no-sandbox")) {
|
|
17423
|
+
args.push("--no-sandbox");
|
|
17424
|
+
}
|
|
17353
17425
|
if (browserType === "chromium") {
|
|
17354
17426
|
if (incognito) {
|
|
17355
17427
|
args.push("--incognito");
|
|
@@ -17412,6 +17484,10 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
17412
17484
|
throw new Error(`Invalid browserControllerType: ${browserControllerType} in connect`);
|
|
17413
17485
|
}
|
|
17414
17486
|
}
|
|
17487
|
+
async newApiRequestContext(options) {
|
|
17488
|
+
const apiRequestContext = await import_playwright.request.newContext(options);
|
|
17489
|
+
return apiRequestContext;
|
|
17490
|
+
}
|
|
17415
17491
|
};
|
|
17416
17492
|
var controller = new LsdBrowserController();
|
|
17417
17493
|
// Annotate the CommonJS export names for ESM import in node:
|
package/dist/index.d.cts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import EventEmitter from 'node:events';
|
|
2
2
|
import { Browser as Browser$1, BrowserContext as BrowserContext$1, Frame as Frame$1, Page as Page$1, HTTPResponse, PuppeteerNode, ElementHandle } from 'puppeteer';
|
|
3
|
-
import { Browser, BrowserContext, Frame, Page, Response, BrowserType, Locator } from 'playwright';
|
|
3
|
+
import { Browser, BrowserContext, Frame, Page, Response, APIRequestContext, BrowserType, Locator, FrameLocator } from 'playwright';
|
|
4
4
|
import * as cheerio from 'cheerio';
|
|
5
5
|
|
|
6
6
|
/**
|
|
@@ -15,6 +15,7 @@ type AllBrowserContext = BrowserContext | BrowserContext$1;
|
|
|
15
15
|
type AllFrame = Frame | Frame$1;
|
|
16
16
|
type AllPage = Page | Page$1;
|
|
17
17
|
type AllResponse = Response | HTTPResponse;
|
|
18
|
+
type AllApiRequestContext = APIRequestContext;
|
|
18
19
|
type CheerioNode = cheerio.Cheerio<cheerio.Element>;
|
|
19
20
|
type Proxy = {
|
|
20
21
|
server: string;
|
|
@@ -478,7 +479,11 @@ interface ResponseMatch {
|
|
|
478
479
|
maxLength?: number;
|
|
479
480
|
}
|
|
480
481
|
type ResponseHandlerOptions = Record<string, any>;
|
|
481
|
-
|
|
482
|
+
interface ResponsePageData {
|
|
483
|
+
pageUrl: string;
|
|
484
|
+
cookies: string;
|
|
485
|
+
}
|
|
486
|
+
type ResponseHandler = (response: AllResponse, options: ResponseHandlerOptions, pageData: ResponsePageData) => Promise<void> | void;
|
|
482
487
|
interface ResponseInterceptionItem {
|
|
483
488
|
/**
|
|
484
489
|
* page.url()
|
|
@@ -772,6 +777,11 @@ interface WaitNavigationOptions {
|
|
|
772
777
|
}
|
|
773
778
|
type PageEvent = "pageClose" | "pagePopup";
|
|
774
779
|
interface LsdPage extends EventEmitter {
|
|
780
|
+
/**
|
|
781
|
+
* Get the APIRequestContext associated with this page's browser context.
|
|
782
|
+
* * only vaild in playwright
|
|
783
|
+
*/
|
|
784
|
+
apiRequestContext(): AllApiRequestContext;
|
|
775
785
|
bringToFront(): Promise<boolean>;
|
|
776
786
|
browserContext(): LsdBrowserContext;
|
|
777
787
|
clearCookies(): Promise<boolean>;
|
|
@@ -918,7 +928,7 @@ interface LsdBrowserContext extends EventEmitter {
|
|
|
918
928
|
pages(): LsdPage[];
|
|
919
929
|
proxy(): Proxy | null;
|
|
920
930
|
setStateData(stateData: StateData): Promise<boolean>;
|
|
921
|
-
_origBrowserContext(): AllBrowserContext
|
|
931
|
+
_origBrowserContext(): AllBrowserContext;
|
|
922
932
|
}
|
|
923
933
|
interface LsdBrowser extends EventEmitter {
|
|
924
934
|
newBrowserContext(options?: LsdBrowserContextOptions): Promise<LsdBrowserContext | null>;
|
|
@@ -960,6 +970,10 @@ interface LsdBrowserController$1 {
|
|
|
960
970
|
* @param puppeteer null means use puppeteer-extra-plugin-stealth based on playwright-extra
|
|
961
971
|
*/
|
|
962
972
|
setPlaywrightBrowserType(browserType: LsdBrowserType, playwrightBrowserType: BrowserType | null): boolean;
|
|
973
|
+
/**
|
|
974
|
+
* Create a new APIRequestContext, valid in playwright;
|
|
975
|
+
*/
|
|
976
|
+
newApiRequestContext(options?: any): Promise<AllApiRequestContext>;
|
|
963
977
|
}
|
|
964
978
|
/**
|
|
965
979
|
* globObj.cfg.XXX:
|
|
@@ -1021,12 +1035,13 @@ declare class PlaywrightBrowserContext extends EventEmitter implements LsdBrowse
|
|
|
1021
1035
|
pages(): LsdPage[];
|
|
1022
1036
|
proxy(): Proxy | null;
|
|
1023
1037
|
setStateData(stateData: StateData): Promise<boolean>;
|
|
1024
|
-
_origBrowserContext(): AllBrowserContext
|
|
1038
|
+
_origBrowserContext(): AllBrowserContext;
|
|
1025
1039
|
}
|
|
1026
1040
|
|
|
1027
1041
|
declare class PlaywrightPage extends EventEmitter implements LsdPage {
|
|
1028
1042
|
#private;
|
|
1029
1043
|
constructor(browserContext: LsdBrowserContext, page: Page, pageInfo?: PageInfo);
|
|
1044
|
+
apiRequestContext(): APIRequestContext;
|
|
1030
1045
|
bringToFront(): Promise<boolean>;
|
|
1031
1046
|
browserContext(): LsdBrowserContext;
|
|
1032
1047
|
clearCookies(): Promise<boolean>;
|
|
@@ -1077,7 +1092,7 @@ declare class PlaywrightPage extends EventEmitter implements LsdPage {
|
|
|
1077
1092
|
|
|
1078
1093
|
declare class PlaywrightElement implements LsdElement {
|
|
1079
1094
|
#private;
|
|
1080
|
-
constructor(locator: Locator, frame: Frame);
|
|
1095
|
+
constructor(locator: Locator, frame: Frame | FrameLocator);
|
|
1081
1096
|
attribute(attributeName: string): Promise<string>;
|
|
1082
1097
|
attributeNames(): Promise<string[]>;
|
|
1083
1098
|
findElement(selectorOrXpath: string | string[], iframeOptions?: IframeOption[], absolute?: boolean): Promise<LsdElement | null>;
|
|
@@ -1129,12 +1144,13 @@ declare class PuppeteerBrowserContext extends EventEmitter implements LsdBrowser
|
|
|
1129
1144
|
pages(): LsdPage[];
|
|
1130
1145
|
proxy(): Proxy | null;
|
|
1131
1146
|
setStateData(stateData: StateData): Promise<boolean>;
|
|
1132
|
-
_origBrowserContext(): AllBrowserContext
|
|
1147
|
+
_origBrowserContext(): AllBrowserContext;
|
|
1133
1148
|
}
|
|
1134
1149
|
|
|
1135
1150
|
declare class PuppeteerPage extends EventEmitter implements LsdPage {
|
|
1136
1151
|
#private;
|
|
1137
1152
|
constructor(browserContext: LsdBrowserContext, page: Page$1, pageInfo?: PageInfo);
|
|
1153
|
+
apiRequestContext(): APIRequestContext;
|
|
1138
1154
|
bringToFront(): Promise<boolean>;
|
|
1139
1155
|
browserContext(): LsdBrowserContext;
|
|
1140
1156
|
clearCookies(): Promise<boolean>;
|
|
@@ -1213,6 +1229,7 @@ declare class CheerioPage extends EventEmitter implements LsdPage {
|
|
|
1213
1229
|
* @param isHtml default true
|
|
1214
1230
|
*/
|
|
1215
1231
|
constructor(html?: string, isHtml?: boolean);
|
|
1232
|
+
apiRequestContext(): APIRequestContext;
|
|
1216
1233
|
bringToFront(): Promise<boolean>;
|
|
1217
1234
|
browserContext(): LsdBrowserContext;
|
|
1218
1235
|
clearCookies(): Promise<boolean>;
|
|
@@ -1289,7 +1306,8 @@ declare class LsdBrowserController implements LsdBrowserController$1 {
|
|
|
1289
1306
|
setPlaywrightBrowserType(browserType: LsdBrowserType, playwrightBrowserType: BrowserType | null): boolean;
|
|
1290
1307
|
launch(browserControllerType: BrowserControllerType, browserType: LsdBrowserType, options: LsdLaunchOptions): Promise<LsdBrowser>;
|
|
1291
1308
|
connect(browserControllerType: BrowserControllerType, browserType: LsdBrowserType, options: LsdConnectOptions): Promise<LsdBrowser>;
|
|
1309
|
+
newApiRequestContext(options?: any): Promise<AllApiRequestContext>;
|
|
1292
1310
|
}
|
|
1293
1311
|
declare const controller: LsdBrowserController;
|
|
1294
1312
|
|
|
1295
|
-
export { type AllBrowser, type AllBrowserContext, type AllFrame, type AllPage, type AllResponse, type BrowserControllerOptions, type BrowserControllerType, type BrowserCreationMethod, type BrowserManager, CheerioElement, type CheerioNode, CheerioPage, type CookieItem, type GotoOptions, type IframeOption, type InputOptions, type KeyInput, type KeyPressOptions, type LocalStorageItem, type LocalStorageOrigin, type LowerCasePaperFormat, type LsdBrowser, type LsdBrowserContext, type LsdBrowserContextOptions, type LsdBrowserController$1 as LsdBrowserController, type LsdBrowserType, type LsdConnectOptions, type LsdElement, type LsdLaunchOptions, type LsdPage, type MouseClickOptions, type MouseClickType, type NavigationWaitUntil, type PDFMargin, type PDFOptions, type PageEvent, type PageExtInPlaywright, type PageExtInPuppeteer, type PageInfo, type PageOpenType, type PageStatus, type PaperFormat, PlaywrightBrowser, PlaywrightBrowserContext, type PlaywrightBrowserTypes, PlaywrightElement, PlaywrightPage, type Proxy, PuppeteerBrowser, PuppeteerBrowserContext, PuppeteerElement, PuppeteerPage, type RequestInterceptionAction, type RequestInterceptionOption, type RequestMatch, type RequestMethod, type RequestResourceType, type ResponseHandler, type ResponseHandlerOptions, type ResponseInterceptionItem, type ResponseInterceptionOption, type ResponseMatch, type ScreenshotOptions, type SelectOptions, type StateData, type UpdatablePageInfo, type ViewportSize, type WaitElementOptions, type WaitElementState, type WaitNavigationOptions, controller, defaultProxy };
|
|
1313
|
+
export { type AllApiRequestContext, type AllBrowser, type AllBrowserContext, type AllFrame, type AllPage, type AllResponse, type BrowserControllerOptions, type BrowserControllerType, type BrowserCreationMethod, type BrowserManager, CheerioElement, type CheerioNode, CheerioPage, type CookieItem, type GotoOptions, type IframeOption, type InputOptions, type KeyInput, type KeyPressOptions, type LocalStorageItem, type LocalStorageOrigin, type LowerCasePaperFormat, type LsdBrowser, type LsdBrowserContext, type LsdBrowserContextOptions, type LsdBrowserController$1 as LsdBrowserController, type LsdBrowserType, type LsdConnectOptions, type LsdElement, type LsdLaunchOptions, type LsdPage, type MouseClickOptions, type MouseClickType, type NavigationWaitUntil, type PDFMargin, type PDFOptions, type PageEvent, type PageExtInPlaywright, type PageExtInPuppeteer, type PageInfo, type PageOpenType, type PageStatus, type PaperFormat, PlaywrightBrowser, PlaywrightBrowserContext, type PlaywrightBrowserTypes, PlaywrightElement, PlaywrightPage, type Proxy, PuppeteerBrowser, PuppeteerBrowserContext, PuppeteerElement, PuppeteerPage, type RequestInterceptionAction, type RequestInterceptionOption, type RequestMatch, type RequestMethod, type RequestResourceType, type ResponseHandler, type ResponseHandlerOptions, type ResponseInterceptionItem, type ResponseInterceptionOption, type ResponseMatch, type ResponsePageData, type ScreenshotOptions, type SelectOptions, type StateData, type UpdatablePageInfo, type ViewportSize, type WaitElementOptions, type WaitElementState, type WaitNavigationOptions, controller, defaultProxy };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import EventEmitter from 'node:events';
|
|
2
2
|
import { Browser as Browser$1, BrowserContext as BrowserContext$1, Frame as Frame$1, Page as Page$1, HTTPResponse, PuppeteerNode, ElementHandle } from 'puppeteer';
|
|
3
|
-
import { Browser, BrowserContext, Frame, Page, Response, BrowserType, Locator } from 'playwright';
|
|
3
|
+
import { Browser, BrowserContext, Frame, Page, Response, APIRequestContext, BrowserType, Locator, FrameLocator } from 'playwright';
|
|
4
4
|
import * as cheerio from 'cheerio';
|
|
5
5
|
|
|
6
6
|
/**
|
|
@@ -15,6 +15,7 @@ type AllBrowserContext = BrowserContext | BrowserContext$1;
|
|
|
15
15
|
type AllFrame = Frame | Frame$1;
|
|
16
16
|
type AllPage = Page | Page$1;
|
|
17
17
|
type AllResponse = Response | HTTPResponse;
|
|
18
|
+
type AllApiRequestContext = APIRequestContext;
|
|
18
19
|
type CheerioNode = cheerio.Cheerio<cheerio.Element>;
|
|
19
20
|
type Proxy = {
|
|
20
21
|
server: string;
|
|
@@ -478,7 +479,11 @@ interface ResponseMatch {
|
|
|
478
479
|
maxLength?: number;
|
|
479
480
|
}
|
|
480
481
|
type ResponseHandlerOptions = Record<string, any>;
|
|
481
|
-
|
|
482
|
+
interface ResponsePageData {
|
|
483
|
+
pageUrl: string;
|
|
484
|
+
cookies: string;
|
|
485
|
+
}
|
|
486
|
+
type ResponseHandler = (response: AllResponse, options: ResponseHandlerOptions, pageData: ResponsePageData) => Promise<void> | void;
|
|
482
487
|
interface ResponseInterceptionItem {
|
|
483
488
|
/**
|
|
484
489
|
* page.url()
|
|
@@ -772,6 +777,11 @@ interface WaitNavigationOptions {
|
|
|
772
777
|
}
|
|
773
778
|
type PageEvent = "pageClose" | "pagePopup";
|
|
774
779
|
interface LsdPage extends EventEmitter {
|
|
780
|
+
/**
|
|
781
|
+
* Get the APIRequestContext associated with this page's browser context.
|
|
782
|
+
* * only vaild in playwright
|
|
783
|
+
*/
|
|
784
|
+
apiRequestContext(): AllApiRequestContext;
|
|
775
785
|
bringToFront(): Promise<boolean>;
|
|
776
786
|
browserContext(): LsdBrowserContext;
|
|
777
787
|
clearCookies(): Promise<boolean>;
|
|
@@ -918,7 +928,7 @@ interface LsdBrowserContext extends EventEmitter {
|
|
|
918
928
|
pages(): LsdPage[];
|
|
919
929
|
proxy(): Proxy | null;
|
|
920
930
|
setStateData(stateData: StateData): Promise<boolean>;
|
|
921
|
-
_origBrowserContext(): AllBrowserContext
|
|
931
|
+
_origBrowserContext(): AllBrowserContext;
|
|
922
932
|
}
|
|
923
933
|
interface LsdBrowser extends EventEmitter {
|
|
924
934
|
newBrowserContext(options?: LsdBrowserContextOptions): Promise<LsdBrowserContext | null>;
|
|
@@ -960,6 +970,10 @@ interface LsdBrowserController$1 {
|
|
|
960
970
|
* @param puppeteer null means use puppeteer-extra-plugin-stealth based on playwright-extra
|
|
961
971
|
*/
|
|
962
972
|
setPlaywrightBrowserType(browserType: LsdBrowserType, playwrightBrowserType: BrowserType | null): boolean;
|
|
973
|
+
/**
|
|
974
|
+
* Create a new APIRequestContext, valid in playwright;
|
|
975
|
+
*/
|
|
976
|
+
newApiRequestContext(options?: any): Promise<AllApiRequestContext>;
|
|
963
977
|
}
|
|
964
978
|
/**
|
|
965
979
|
* globObj.cfg.XXX:
|
|
@@ -1021,12 +1035,13 @@ declare class PlaywrightBrowserContext extends EventEmitter implements LsdBrowse
|
|
|
1021
1035
|
pages(): LsdPage[];
|
|
1022
1036
|
proxy(): Proxy | null;
|
|
1023
1037
|
setStateData(stateData: StateData): Promise<boolean>;
|
|
1024
|
-
_origBrowserContext(): AllBrowserContext
|
|
1038
|
+
_origBrowserContext(): AllBrowserContext;
|
|
1025
1039
|
}
|
|
1026
1040
|
|
|
1027
1041
|
declare class PlaywrightPage extends EventEmitter implements LsdPage {
|
|
1028
1042
|
#private;
|
|
1029
1043
|
constructor(browserContext: LsdBrowserContext, page: Page, pageInfo?: PageInfo);
|
|
1044
|
+
apiRequestContext(): APIRequestContext;
|
|
1030
1045
|
bringToFront(): Promise<boolean>;
|
|
1031
1046
|
browserContext(): LsdBrowserContext;
|
|
1032
1047
|
clearCookies(): Promise<boolean>;
|
|
@@ -1077,7 +1092,7 @@ declare class PlaywrightPage extends EventEmitter implements LsdPage {
|
|
|
1077
1092
|
|
|
1078
1093
|
declare class PlaywrightElement implements LsdElement {
|
|
1079
1094
|
#private;
|
|
1080
|
-
constructor(locator: Locator, frame: Frame);
|
|
1095
|
+
constructor(locator: Locator, frame: Frame | FrameLocator);
|
|
1081
1096
|
attribute(attributeName: string): Promise<string>;
|
|
1082
1097
|
attributeNames(): Promise<string[]>;
|
|
1083
1098
|
findElement(selectorOrXpath: string | string[], iframeOptions?: IframeOption[], absolute?: boolean): Promise<LsdElement | null>;
|
|
@@ -1129,12 +1144,13 @@ declare class PuppeteerBrowserContext extends EventEmitter implements LsdBrowser
|
|
|
1129
1144
|
pages(): LsdPage[];
|
|
1130
1145
|
proxy(): Proxy | null;
|
|
1131
1146
|
setStateData(stateData: StateData): Promise<boolean>;
|
|
1132
|
-
_origBrowserContext(): AllBrowserContext
|
|
1147
|
+
_origBrowserContext(): AllBrowserContext;
|
|
1133
1148
|
}
|
|
1134
1149
|
|
|
1135
1150
|
declare class PuppeteerPage extends EventEmitter implements LsdPage {
|
|
1136
1151
|
#private;
|
|
1137
1152
|
constructor(browserContext: LsdBrowserContext, page: Page$1, pageInfo?: PageInfo);
|
|
1153
|
+
apiRequestContext(): APIRequestContext;
|
|
1138
1154
|
bringToFront(): Promise<boolean>;
|
|
1139
1155
|
browserContext(): LsdBrowserContext;
|
|
1140
1156
|
clearCookies(): Promise<boolean>;
|
|
@@ -1213,6 +1229,7 @@ declare class CheerioPage extends EventEmitter implements LsdPage {
|
|
|
1213
1229
|
* @param isHtml default true
|
|
1214
1230
|
*/
|
|
1215
1231
|
constructor(html?: string, isHtml?: boolean);
|
|
1232
|
+
apiRequestContext(): APIRequestContext;
|
|
1216
1233
|
bringToFront(): Promise<boolean>;
|
|
1217
1234
|
browserContext(): LsdBrowserContext;
|
|
1218
1235
|
clearCookies(): Promise<boolean>;
|
|
@@ -1289,7 +1306,8 @@ declare class LsdBrowserController implements LsdBrowserController$1 {
|
|
|
1289
1306
|
setPlaywrightBrowserType(browserType: LsdBrowserType, playwrightBrowserType: BrowserType | null): boolean;
|
|
1290
1307
|
launch(browserControllerType: BrowserControllerType, browserType: LsdBrowserType, options: LsdLaunchOptions): Promise<LsdBrowser>;
|
|
1291
1308
|
connect(browserControllerType: BrowserControllerType, browserType: LsdBrowserType, options: LsdConnectOptions): Promise<LsdBrowser>;
|
|
1309
|
+
newApiRequestContext(options?: any): Promise<AllApiRequestContext>;
|
|
1292
1310
|
}
|
|
1293
1311
|
declare const controller: LsdBrowserController;
|
|
1294
1312
|
|
|
1295
|
-
export { type AllBrowser, type AllBrowserContext, type AllFrame, type AllPage, type AllResponse, type BrowserControllerOptions, type BrowserControllerType, type BrowserCreationMethod, type BrowserManager, CheerioElement, type CheerioNode, CheerioPage, type CookieItem, type GotoOptions, type IframeOption, type InputOptions, type KeyInput, type KeyPressOptions, type LocalStorageItem, type LocalStorageOrigin, type LowerCasePaperFormat, type LsdBrowser, type LsdBrowserContext, type LsdBrowserContextOptions, type LsdBrowserController$1 as LsdBrowserController, type LsdBrowserType, type LsdConnectOptions, type LsdElement, type LsdLaunchOptions, type LsdPage, type MouseClickOptions, type MouseClickType, type NavigationWaitUntil, type PDFMargin, type PDFOptions, type PageEvent, type PageExtInPlaywright, type PageExtInPuppeteer, type PageInfo, type PageOpenType, type PageStatus, type PaperFormat, PlaywrightBrowser, PlaywrightBrowserContext, type PlaywrightBrowserTypes, PlaywrightElement, PlaywrightPage, type Proxy, PuppeteerBrowser, PuppeteerBrowserContext, PuppeteerElement, PuppeteerPage, type RequestInterceptionAction, type RequestInterceptionOption, type RequestMatch, type RequestMethod, type RequestResourceType, type ResponseHandler, type ResponseHandlerOptions, type ResponseInterceptionItem, type ResponseInterceptionOption, type ResponseMatch, type ScreenshotOptions, type SelectOptions, type StateData, type UpdatablePageInfo, type ViewportSize, type WaitElementOptions, type WaitElementState, type WaitNavigationOptions, controller, defaultProxy };
|
|
1313
|
+
export { type AllApiRequestContext, type AllBrowser, type AllBrowserContext, type AllFrame, type AllPage, type AllResponse, type BrowserControllerOptions, type BrowserControllerType, type BrowserCreationMethod, type BrowserManager, CheerioElement, type CheerioNode, CheerioPage, type CookieItem, type GotoOptions, type IframeOption, type InputOptions, type KeyInput, type KeyPressOptions, type LocalStorageItem, type LocalStorageOrigin, type LowerCasePaperFormat, type LsdBrowser, type LsdBrowserContext, type LsdBrowserContextOptions, type LsdBrowserController$1 as LsdBrowserController, type LsdBrowserType, type LsdConnectOptions, type LsdElement, type LsdLaunchOptions, type LsdPage, type MouseClickOptions, type MouseClickType, type NavigationWaitUntil, type PDFMargin, type PDFOptions, type PageEvent, type PageExtInPlaywright, type PageExtInPuppeteer, type PageInfo, type PageOpenType, type PageStatus, type PaperFormat, PlaywrightBrowser, PlaywrightBrowserContext, type PlaywrightBrowserTypes, PlaywrightElement, PlaywrightPage, type Proxy, PuppeteerBrowser, PuppeteerBrowserContext, PuppeteerElement, PuppeteerPage, type RequestInterceptionAction, type RequestInterceptionOption, type RequestMatch, type RequestMethod, type RequestResourceType, type ResponseHandler, type ResponseHandlerOptions, type ResponseInterceptionItem, type ResponseInterceptionOption, type ResponseMatch, type ResponsePageData, type ScreenshotOptions, type SelectOptions, type StateData, type UpdatablePageInfo, type ViewportSize, type WaitElementOptions, type WaitElementState, type WaitNavigationOptions, controller, defaultProxy };
|
package/dist/index.js
CHANGED
|
@@ -58,19 +58,19 @@ import EventEmitter3 from "events";
|
|
|
58
58
|
|
|
59
59
|
// src/playwright/context.ts
|
|
60
60
|
import EventEmitter2 from "events";
|
|
61
|
-
import { getCurrentUnixTime as getCurrentUnixTime2, logerr as logerr2, loginfo as
|
|
61
|
+
import { getCurrentUnixTime as getCurrentUnixTime2, logerr as logerr2, loginfo as loginfo3, logwarn as logwarn2, sleep } from "@letsscrapedata/utils";
|
|
62
62
|
|
|
63
63
|
// src/playwright/page.ts
|
|
64
64
|
import EventEmitter from "events";
|
|
65
|
-
import { getCurrentUnixTime, logerr, loginfo, logwarn, unreachable as unreachable2 } from "@letsscrapedata/utils";
|
|
65
|
+
import { getCurrentUnixTime, logerr, loginfo as loginfo2, logwarn, unreachable as unreachable2 } from "@letsscrapedata/utils";
|
|
66
66
|
|
|
67
67
|
// src/playwright/element.ts
|
|
68
|
-
import { unreachable } from "@letsscrapedata/utils";
|
|
68
|
+
import { loginfo, unreachable } from "@letsscrapedata/utils";
|
|
69
69
|
var PlaywrightElement = class _PlaywrightElement {
|
|
70
70
|
#frame;
|
|
71
71
|
#locator;
|
|
72
72
|
constructor(locator, frame) {
|
|
73
|
-
if (!frame.
|
|
73
|
+
if (!frame.locator || !locator.click) {
|
|
74
74
|
throw new Error("Invalid paras in new PlaywrightElement");
|
|
75
75
|
}
|
|
76
76
|
this.#frame = frame;
|
|
@@ -84,41 +84,66 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
84
84
|
const names = await this.#locator.evaluate((node) => node.getAttributeNames());
|
|
85
85
|
return names;
|
|
86
86
|
}
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
87
|
+
/*
|
|
88
|
+
// 如果不存在指定的子iframe,则返回null
|
|
89
|
+
async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
|
|
90
|
+
if (!parentFrame) {
|
|
91
|
+
throw new Error("Invalid parent frame");
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
let { src = "" } = iframeOption;
|
|
95
|
+
if (!src) {
|
|
96
|
+
throw new Error("Invalid src in IframeOption");
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// src: use childFrames()
|
|
100
|
+
const childFrames = parentFrame.childFrames();
|
|
101
|
+
for (const childFrame of childFrames) {
|
|
102
|
+
const url = childFrame.url();
|
|
103
|
+
if (typeof src === "string") {
|
|
104
|
+
// src: string
|
|
105
|
+
if (url.startsWith(src)) {
|
|
106
|
+
return childFrame;
|
|
107
|
+
} else if (url.toLowerCase().startsWith(src)) {
|
|
108
|
+
return childFrame;
|
|
109
|
+
}
|
|
110
|
+
} else {
|
|
111
|
+
// src: RegExp
|
|
112
|
+
if (url.match(src)) {
|
|
113
|
+
return childFrame;
|
|
114
|
+
}
|
|
108
115
|
}
|
|
109
116
|
}
|
|
117
|
+
|
|
118
|
+
return null;
|
|
110
119
|
}
|
|
111
|
-
|
|
120
|
+
*/
|
|
121
|
+
#getIframeSelector(iframeOption) {
|
|
122
|
+
const { src = "", selector = "" } = iframeOption;
|
|
123
|
+
if (!src && !selector) {
|
|
124
|
+
throw new Error("Invalid parent frame");
|
|
125
|
+
}
|
|
126
|
+
return selector ? selector : `iframe[src^="${src}"]`;
|
|
112
127
|
}
|
|
113
|
-
async #
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
128
|
+
async #getChildFrameLocator(parent2, iframeOption) {
|
|
129
|
+
return parent2.frameLocator(this.#getIframeSelector(iframeOption));
|
|
130
|
+
}
|
|
131
|
+
async #getDescendantFrame(parent2, iframeOptions) {
|
|
132
|
+
try {
|
|
133
|
+
if (iframeOptions.length <= 0) {
|
|
117
134
|
return null;
|
|
118
135
|
}
|
|
119
|
-
|
|
136
|
+
let frameLocator = parent2.frameLocator(this.#getIframeSelector(iframeOptions[0]));
|
|
137
|
+
for (const iframeOption of iframeOptions.slice(1)) {
|
|
138
|
+
if (!frameLocator) {
|
|
139
|
+
return null;
|
|
140
|
+
}
|
|
141
|
+
frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
|
|
142
|
+
}
|
|
143
|
+
return frameLocator;
|
|
144
|
+
} catch (err) {
|
|
145
|
+
throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
|
|
120
146
|
}
|
|
121
|
-
return iframe;
|
|
122
147
|
}
|
|
123
148
|
async #findElementHandles(selector, absolute = false, iframeOptions = []) {
|
|
124
149
|
let parent2 = absolute ? this.#frame : this.#locator;
|
|
@@ -146,7 +171,7 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
146
171
|
retObj.locators = locators;
|
|
147
172
|
return retObj;
|
|
148
173
|
} catch (err) {
|
|
149
|
-
|
|
174
|
+
loginfo(err);
|
|
150
175
|
return retObj;
|
|
151
176
|
}
|
|
152
177
|
}
|
|
@@ -403,41 +428,66 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
403
428
|
});
|
|
404
429
|
return true;
|
|
405
430
|
}
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
431
|
+
/*
|
|
432
|
+
// 如果不存在指定的子iframe,则返回null
|
|
433
|
+
async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
|
|
434
|
+
if (!parentFrame) {
|
|
435
|
+
throw new Error("Invalid parent frame");
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
let { src = "" } = iframeOption;
|
|
439
|
+
if (!src) {
|
|
440
|
+
throw new Error("Invalid src in IframeOption");
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
// src: use childFrames()
|
|
444
|
+
const childFrames = parentFrame.childFrames();
|
|
445
|
+
for (const childFrame of childFrames) {
|
|
446
|
+
const url = childFrame.url();
|
|
447
|
+
if (typeof src === "string") {
|
|
448
|
+
// src: string
|
|
449
|
+
if (url.startsWith(src)) {
|
|
450
|
+
return childFrame;
|
|
451
|
+
} else if (url.toLowerCase().startsWith(src)) {
|
|
452
|
+
return childFrame;
|
|
453
|
+
}
|
|
454
|
+
} else {
|
|
455
|
+
// src: RegExp
|
|
456
|
+
if (url.match(src)) {
|
|
457
|
+
return childFrame;
|
|
458
|
+
}
|
|
427
459
|
}
|
|
428
460
|
}
|
|
461
|
+
|
|
462
|
+
return null;
|
|
429
463
|
}
|
|
430
|
-
|
|
464
|
+
*/
|
|
465
|
+
#getIframeSelector(iframeOption) {
|
|
466
|
+
const { src = "", selector = "" } = iframeOption;
|
|
467
|
+
if (!src && !selector) {
|
|
468
|
+
throw new Error("Invalid parent frame");
|
|
469
|
+
}
|
|
470
|
+
return selector ? selector : `iframe[src^="${src}"]`;
|
|
431
471
|
}
|
|
432
|
-
async #
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
472
|
+
async #getChildFrameLocator(parent2, iframeOption) {
|
|
473
|
+
return parent2.frameLocator(this.#getIframeSelector(iframeOption));
|
|
474
|
+
}
|
|
475
|
+
async #getDescendantFrameLocator(mainFrame, iframeOptions) {
|
|
476
|
+
try {
|
|
477
|
+
if (iframeOptions.length <= 0) {
|
|
436
478
|
return null;
|
|
437
479
|
}
|
|
438
|
-
|
|
480
|
+
let frameLocator = mainFrame.frameLocator(this.#getIframeSelector(iframeOptions[0]));
|
|
481
|
+
for (const iframeOption of iframeOptions.slice(1)) {
|
|
482
|
+
if (!frameLocator) {
|
|
483
|
+
return null;
|
|
484
|
+
}
|
|
485
|
+
frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
|
|
486
|
+
}
|
|
487
|
+
return frameLocator;
|
|
488
|
+
} catch (err) {
|
|
489
|
+
throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
|
|
439
490
|
}
|
|
440
|
-
return iframe;
|
|
441
491
|
}
|
|
442
492
|
async #findElementHandles(selector, iframeOptions = []) {
|
|
443
493
|
if (!this.#page) {
|
|
@@ -446,7 +496,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
446
496
|
let frame = this.#page.mainFrame();
|
|
447
497
|
const retObj = { frame, locators: [] };
|
|
448
498
|
if (iframeOptions.length > 0) {
|
|
449
|
-
frame = await this.#
|
|
499
|
+
frame = await this.#getDescendantFrameLocator(frame, iframeOptions);
|
|
450
500
|
if (!frame) {
|
|
451
501
|
return retObj;
|
|
452
502
|
}
|
|
@@ -466,7 +516,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
466
516
|
retObj.locators = locators;
|
|
467
517
|
return retObj;
|
|
468
518
|
} catch (err) {
|
|
469
|
-
|
|
519
|
+
loginfo2(err);
|
|
470
520
|
return retObj;
|
|
471
521
|
}
|
|
472
522
|
}
|
|
@@ -477,7 +527,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
477
527
|
const page = this.#page;
|
|
478
528
|
const pageId = this.#pageId;
|
|
479
529
|
page.on("close", async () => {
|
|
480
|
-
|
|
530
|
+
loginfo2(`##browser ${pageId} closed`);
|
|
481
531
|
if (!page.pageInfo) {
|
|
482
532
|
logerr(`Logic error in page.on("close")`);
|
|
483
533
|
}
|
|
@@ -500,7 +550,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
500
550
|
} else {
|
|
501
551
|
logerr(`##browser ${pageId} has popup without page.pageInfo`);
|
|
502
552
|
}
|
|
503
|
-
|
|
553
|
+
loginfo2(`##browser ${pageId} has popup ${popupPageId}`);
|
|
504
554
|
this.emit("pagePopup", evtData);
|
|
505
555
|
} else {
|
|
506
556
|
logerr(`##browser ${pageId} has popup page with null page`);
|
|
@@ -525,6 +575,11 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
525
575
|
this.#responseCb = null;
|
|
526
576
|
this.#addPageOn();
|
|
527
577
|
}
|
|
578
|
+
apiRequestContext() {
|
|
579
|
+
const origBrowserContext = this.browserContext()._origBrowserContext();
|
|
580
|
+
const apiRequestContext = origBrowserContext.request;
|
|
581
|
+
return apiRequestContext;
|
|
582
|
+
}
|
|
528
583
|
async bringToFront() {
|
|
529
584
|
if (!this.#page) {
|
|
530
585
|
throw new Error("No valid page");
|
|
@@ -600,9 +655,9 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
600
655
|
}
|
|
601
656
|
let content = "";
|
|
602
657
|
if (iframeOptions.length > 0) {
|
|
603
|
-
const
|
|
604
|
-
if (
|
|
605
|
-
content = await
|
|
658
|
+
const frameLocator = await this.#getDescendantFrameLocator(this.#page.mainFrame(), iframeOptions);
|
|
659
|
+
if (frameLocator) {
|
|
660
|
+
content = await frameLocator.locator(":root").evaluate(() => document.documentElement.outerHTML);
|
|
606
661
|
}
|
|
607
662
|
} else {
|
|
608
663
|
content = await this.#page.content();
|
|
@@ -962,10 +1017,11 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
962
1017
|
requestData,
|
|
963
1018
|
responseData
|
|
964
1019
|
});
|
|
965
|
-
|
|
1020
|
+
loginfo2(`##browser cache matched response: ${requestUrl}`);
|
|
966
1021
|
}
|
|
967
1022
|
if (typeof handler === "function") {
|
|
968
|
-
|
|
1023
|
+
const pageData = { pageUrl, cookies: "" };
|
|
1024
|
+
await handler(response, handlerOptions, pageData);
|
|
969
1025
|
}
|
|
970
1026
|
}
|
|
971
1027
|
return;
|
|
@@ -1141,7 +1197,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1141
1197
|
await lsdPage.maximizeViewport();
|
|
1142
1198
|
}
|
|
1143
1199
|
this.#lsdPages.push(lsdPage);
|
|
1144
|
-
|
|
1200
|
+
loginfo3(`##browser ${lsdPage.id()} ${openType}ed`);
|
|
1145
1201
|
}
|
|
1146
1202
|
}
|
|
1147
1203
|
constructor(lsdBrowser, browserContext, incognito = false, proxy = null, browserIdx = 0, browserContextIdx = 0, maxPagesPerBrowserContext = 20, maxPageFreeSeconds = 0, maxViewportOfNewPage = true) {
|
|
@@ -1179,7 +1235,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1179
1235
|
await lsdPage.maximizeViewport();
|
|
1180
1236
|
}
|
|
1181
1237
|
this.#lsdPages.push(lsdPage);
|
|
1182
|
-
|
|
1238
|
+
loginfo3(`##browser ${lsdPage.id()} created`);
|
|
1183
1239
|
}
|
|
1184
1240
|
});
|
|
1185
1241
|
browserContext.on("close", (bc) => {
|
|
@@ -1363,7 +1419,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1363
1419
|
};
|
|
1364
1420
|
|
|
1365
1421
|
// src/playwright/browser.ts
|
|
1366
|
-
import { logerr as logerr3, loginfo as
|
|
1422
|
+
import { logerr as logerr3, loginfo as loginfo4, logwarn as logwarn3 } from "@letsscrapedata/utils";
|
|
1367
1423
|
var PlaywrightBrowser = class extends EventEmitter3 {
|
|
1368
1424
|
#browser;
|
|
1369
1425
|
#browserIdx;
|
|
@@ -1409,7 +1465,7 @@ var PlaywrightBrowser = class extends EventEmitter3 {
|
|
|
1409
1465
|
this.#executablePath = executablePath;
|
|
1410
1466
|
this.#nextBrowserContextIdx = 1;
|
|
1411
1467
|
this.#closeFreePagesIntervalId = null;
|
|
1412
|
-
|
|
1468
|
+
loginfo4(`##browser ${this.#browserType} ${this.id()} ${this.#browserCreationMethod}ed by ${this.#browserControllerType}`);
|
|
1413
1469
|
const browserContexts = browser.contexts();
|
|
1414
1470
|
if (browserContexts.length > 0) {
|
|
1415
1471
|
logwarn3(`There are ${browserContexts.length} new browserContexts when playwright launches new browser`);
|
|
@@ -1418,10 +1474,10 @@ var PlaywrightBrowser = class extends EventEmitter3 {
|
|
|
1418
1474
|
for (const browserContext of browserContexts) {
|
|
1419
1475
|
const lsdBrowserContext = new PlaywrightBrowserContext(this, browserContext, incognito, this.#proxy, this.#browserIdx++, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), maxViewportOfNewPage);
|
|
1420
1476
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
1421
|
-
|
|
1477
|
+
loginfo4(`##browser ${lsdBrowserContext.id()} ${this.#browserCreationMethod}ed`);
|
|
1422
1478
|
}
|
|
1423
1479
|
browser.on("disconnected", () => {
|
|
1424
|
-
|
|
1480
|
+
loginfo4(`##browser ${this.id()} disconnected`);
|
|
1425
1481
|
if (this.#lsdBrowserContexts.length > 0) {
|
|
1426
1482
|
logerr3(`${this.id()} has browserContexts when disconnected`);
|
|
1427
1483
|
}
|
|
@@ -1436,11 +1492,11 @@ var PlaywrightBrowser = class extends EventEmitter3 {
|
|
|
1436
1492
|
logerr3(`Invalid lsdBrowserContext in LsdBrowser.on("browserContextClose)`);
|
|
1437
1493
|
return;
|
|
1438
1494
|
}
|
|
1439
|
-
|
|
1495
|
+
loginfo4(`##browser ${lsdBrowserContext.id()} closed
|
|
1440
1496
|
`);
|
|
1441
1497
|
this.#lsdBrowserContexts.splice(idx, 1);
|
|
1442
1498
|
if (this.#lsdBrowserContexts.length === 0) {
|
|
1443
|
-
|
|
1499
|
+
loginfo4(`##browser ${this.id()} has no browserContexts now`);
|
|
1444
1500
|
}
|
|
1445
1501
|
return;
|
|
1446
1502
|
});
|
|
@@ -1477,7 +1533,7 @@ var PlaywrightBrowser = class extends EventEmitter3 {
|
|
|
1477
1533
|
const { maxViewportOfNewPage = this.#options.maxViewportOfNewPage } = options ? options : {};
|
|
1478
1534
|
const lsdBrowserContext = new PlaywrightBrowserContext(this, browserContext, true, proxy, this.#browserIdx++, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), maxViewportOfNewPage);
|
|
1479
1535
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
1480
|
-
|
|
1536
|
+
loginfo4(`##browser ${lsdBrowserContext.id()} created`);
|
|
1481
1537
|
return lsdBrowserContext;
|
|
1482
1538
|
}
|
|
1483
1539
|
async close() {
|
|
@@ -1798,7 +1854,7 @@ var PuppeteerElement = class _PuppeteerElement {
|
|
|
1798
1854
|
};
|
|
1799
1855
|
|
|
1800
1856
|
// src/puppeteer/page.ts
|
|
1801
|
-
import { getCurrentUnixTime as getCurrentUnixTime3, logerr as logerr5, loginfo as
|
|
1857
|
+
import { getCurrentUnixTime as getCurrentUnixTime3, logerr as logerr5, loginfo as loginfo5, unreachable as unreachable4 } from "@letsscrapedata/utils";
|
|
1802
1858
|
var PuppeteerPage = class extends EventEmitter4 {
|
|
1803
1859
|
#lsdBrowserContext;
|
|
1804
1860
|
#page;
|
|
@@ -1959,7 +2015,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
1959
2015
|
const page = this.#page;
|
|
1960
2016
|
const pageId = this.#pageId;
|
|
1961
2017
|
page.on("close", async () => {
|
|
1962
|
-
|
|
2018
|
+
loginfo5(`##browser ${pageId} closed`);
|
|
1963
2019
|
if (!page.pageInfo) {
|
|
1964
2020
|
logerr5(`Logic error in page.on("close")`);
|
|
1965
2021
|
}
|
|
@@ -1982,7 +2038,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
1982
2038
|
} else {
|
|
1983
2039
|
logerr5(`##browser ${pageId} has popup without page.pageInfo`);
|
|
1984
2040
|
}
|
|
1985
|
-
|
|
2041
|
+
loginfo5(`##browser ${pageId} has popup ${popupPageId}`);
|
|
1986
2042
|
this.emit("pagePopup", evtData);
|
|
1987
2043
|
} else {
|
|
1988
2044
|
logerr5(`##browser ${pageId} has popup page with null page`);
|
|
@@ -2006,6 +2062,9 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2006
2062
|
this.#client = null;
|
|
2007
2063
|
this.#addPageOn();
|
|
2008
2064
|
}
|
|
2065
|
+
apiRequestContext() {
|
|
2066
|
+
throw new Error("Not supported in PuppeteerPage.");
|
|
2067
|
+
}
|
|
2009
2068
|
async bringToFront() {
|
|
2010
2069
|
if (!this.#page) {
|
|
2011
2070
|
throw new Error("No valid page");
|
|
@@ -2445,7 +2504,14 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2445
2504
|
});
|
|
2446
2505
|
}
|
|
2447
2506
|
if (typeof handler === "function") {
|
|
2448
|
-
|
|
2507
|
+
const pageData = { pageUrl, cookies: "" };
|
|
2508
|
+
if (handlerOptions?.requestHeadersFlag) {
|
|
2509
|
+
const cookies = (await this.#page.cookies()).map((cookie) => {
|
|
2510
|
+
return `${cookie.name}=${cookie.value}`;
|
|
2511
|
+
}).join("; ");
|
|
2512
|
+
pageData.cookies = cookies;
|
|
2513
|
+
}
|
|
2514
|
+
await handler(response, handlerOptions, pageData);
|
|
2449
2515
|
}
|
|
2450
2516
|
}
|
|
2451
2517
|
return true;
|
|
@@ -2576,7 +2642,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2576
2642
|
};
|
|
2577
2643
|
|
|
2578
2644
|
// src/puppeteer/context.ts
|
|
2579
|
-
import { getCurrentUnixTime as getCurrentUnixTime4, logerr as logerr6, loginfo as
|
|
2645
|
+
import { getCurrentUnixTime as getCurrentUnixTime4, logerr as logerr6, loginfo as loginfo6, logwarn as logwarn4, sleep as sleep2 } from "@letsscrapedata/utils";
|
|
2580
2646
|
var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
2581
2647
|
#lsdBrowser;
|
|
2582
2648
|
#browserIdx;
|
|
@@ -2613,7 +2679,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2613
2679
|
await lsdPage.setUserAgent(this.#userAgent);
|
|
2614
2680
|
}
|
|
2615
2681
|
this.#lsdPages.push(lsdPage);
|
|
2616
|
-
|
|
2682
|
+
loginfo6(`##browser ${lsdPage.id()} ${openType}ed`);
|
|
2617
2683
|
}
|
|
2618
2684
|
}
|
|
2619
2685
|
constructor(lsdBrowser, browserContext, incognito = false, proxy = null, browserIdx = 0, browserContextIdx = 0, maxPagesPerBrowserContext = 20, maxPageFreeSeconds = 0, userAgent = "", maxViewportOfNewPage = true) {
|
|
@@ -2660,7 +2726,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2660
2726
|
await lsdPage.setUserAgent(this.#userAgent);
|
|
2661
2727
|
}
|
|
2662
2728
|
this.#lsdPages.push(lsdPage);
|
|
2663
|
-
|
|
2729
|
+
loginfo6(`##browser ${lsdPage.id()} created`);
|
|
2664
2730
|
}
|
|
2665
2731
|
}
|
|
2666
2732
|
});
|
|
@@ -2844,7 +2910,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2844
2910
|
};
|
|
2845
2911
|
|
|
2846
2912
|
// src/puppeteer/browser.ts
|
|
2847
|
-
import { logerr as logerr7, loginfo as
|
|
2913
|
+
import { logerr as logerr7, loginfo as loginfo7, logwarn as logwarn5 } from "@letsscrapedata/utils";
|
|
2848
2914
|
var PuppeteerBrowser = class extends EventEmitter6 {
|
|
2849
2915
|
#browser;
|
|
2850
2916
|
#browserIdx;
|
|
@@ -2893,16 +2959,16 @@ var PuppeteerBrowser = class extends EventEmitter6 {
|
|
|
2893
2959
|
this.#executablePath = executablePath;
|
|
2894
2960
|
this.#nextBrowserContextIdx = 1;
|
|
2895
2961
|
this.#closeFreePagesIntervalId = null;
|
|
2896
|
-
|
|
2962
|
+
loginfo7(`##browser ${this.#browserType} ${this.id()} ${this.#browserCreationMethod}ed by ${this.#browserControllerType}`);
|
|
2897
2963
|
const browserContexts = browser.browserContexts();
|
|
2898
2964
|
const incognito = typeof options?.incognito === "boolean" ? options.incognito : false;
|
|
2899
2965
|
for (const browserContext of browserContexts) {
|
|
2900
2966
|
const lsdBrowserContext = new PuppeteerBrowserContext(this, browserContext, incognito, this.#proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), this.#userAgent(), maxViewportOfNewPage);
|
|
2901
2967
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
2902
|
-
|
|
2968
|
+
loginfo7(`##browser ${lsdBrowserContext.id()} ${this.#browserCreationMethod}ed`);
|
|
2903
2969
|
}
|
|
2904
2970
|
browser.on("disconnected", () => {
|
|
2905
|
-
|
|
2971
|
+
loginfo7(`##browser ${this.id()} disconnected`);
|
|
2906
2972
|
if (this.#lsdBrowserContexts.length > 0) {
|
|
2907
2973
|
logerr7(`${this.id()} has browserContexts when disconnected`);
|
|
2908
2974
|
}
|
|
@@ -2917,11 +2983,11 @@ var PuppeteerBrowser = class extends EventEmitter6 {
|
|
|
2917
2983
|
logerr7(`Invalid lsdBrowserContext in LsdBrowser.on("browserContextClose)`);
|
|
2918
2984
|
return;
|
|
2919
2985
|
}
|
|
2920
|
-
|
|
2986
|
+
loginfo7(`##browser ${lsdBrowserContext.id()} closed
|
|
2921
2987
|
`);
|
|
2922
2988
|
this.#lsdBrowserContexts.splice(idx, 1);
|
|
2923
2989
|
if (this.#lsdBrowserContexts.length === 0) {
|
|
2924
|
-
|
|
2990
|
+
loginfo7(`##browser ${this.id()} has no browserContexts now`);
|
|
2925
2991
|
}
|
|
2926
2992
|
return;
|
|
2927
2993
|
});
|
|
@@ -2952,7 +3018,7 @@ var PuppeteerBrowser = class extends EventEmitter6 {
|
|
|
2952
3018
|
const { maxViewportOfNewPage = this.#options.maxViewportOfNewPage } = options ? options : {};
|
|
2953
3019
|
const lsdBrowserContext = new PuppeteerBrowserContext(this, browserContext, true, proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), userAgent, maxViewportOfNewPage);
|
|
2954
3020
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
2955
|
-
|
|
3021
|
+
loginfo7(`##browser ${lsdBrowserContext.id()} created`);
|
|
2956
3022
|
return lsdBrowserContext;
|
|
2957
3023
|
}
|
|
2958
3024
|
async close() {
|
|
@@ -16988,6 +17054,9 @@ var CheerioPage = class extends EventEmitter7 {
|
|
|
16988
17054
|
this.#document = load(html3, { xml: true }).root();
|
|
16989
17055
|
}
|
|
16990
17056
|
}
|
|
17057
|
+
apiRequestContext() {
|
|
17058
|
+
throw new Error("Not supported in CheerioPage.");
|
|
17059
|
+
}
|
|
16991
17060
|
async bringToFront() {
|
|
16992
17061
|
throw new Error("Not supported in CheerioPage.");
|
|
16993
17062
|
}
|
|
@@ -17168,7 +17237,7 @@ var CheerioPage = class extends EventEmitter7 {
|
|
|
17168
17237
|
// src/controller/controller.ts
|
|
17169
17238
|
import os from "os";
|
|
17170
17239
|
import puppeteer from "puppeteer";
|
|
17171
|
-
import playwright from "playwright";
|
|
17240
|
+
import playwright, { request as apiRequest } from "playwright";
|
|
17172
17241
|
import { logwarn as logwarn6, unreachable as unreachable5 } from "@letsscrapedata/utils";
|
|
17173
17242
|
import puppeteerExtra from "puppeteer-extra";
|
|
17174
17243
|
import * as playwrightExtra from "playwright-extra";
|
|
@@ -17330,6 +17399,9 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
17330
17399
|
args.push("--start-maximized");
|
|
17331
17400
|
launchOptions.defaultViewport = null;
|
|
17332
17401
|
}
|
|
17402
|
+
if (!args.includes("--no-sandbox")) {
|
|
17403
|
+
args.push("--no-sandbox");
|
|
17404
|
+
}
|
|
17333
17405
|
if (browserType === "chromium") {
|
|
17334
17406
|
if (incognito) {
|
|
17335
17407
|
args.push("--incognito");
|
|
@@ -17392,6 +17464,10 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
17392
17464
|
throw new Error(`Invalid browserControllerType: ${browserControllerType} in connect`);
|
|
17393
17465
|
}
|
|
17394
17466
|
}
|
|
17467
|
+
async newApiRequestContext(options) {
|
|
17468
|
+
const apiRequestContext = await apiRequest.newContext(options);
|
|
17469
|
+
return apiRequestContext;
|
|
17470
|
+
}
|
|
17395
17471
|
};
|
|
17396
17472
|
var controller = new LsdBrowserController();
|
|
17397
17473
|
export {
|
package/package.json
CHANGED