@letsscrapedata/controller 0.0.30 → 0.0.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +123 -65
- package/dist/index.d.cts +8 -4
- package/dist/index.d.ts +8 -4
- package/dist/index.js +151 -93
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -90,7 +90,7 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
90
90
|
#frame;
|
|
91
91
|
#locator;
|
|
92
92
|
constructor(locator, frame) {
|
|
93
|
-
if (!frame.
|
|
93
|
+
if (!frame.locator || !locator.click) {
|
|
94
94
|
throw new Error("Invalid paras in new PlaywrightElement");
|
|
95
95
|
}
|
|
96
96
|
this.#frame = frame;
|
|
@@ -104,41 +104,66 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
104
104
|
const names = await this.#locator.evaluate((node) => node.getAttributeNames());
|
|
105
105
|
return names;
|
|
106
106
|
}
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
107
|
+
/*
|
|
108
|
+
// 如果不存在指定的子iframe,则返回null
|
|
109
|
+
async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
|
|
110
|
+
if (!parentFrame) {
|
|
111
|
+
throw new Error("Invalid parent frame");
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
let { src = "" } = iframeOption;
|
|
115
|
+
if (!src) {
|
|
116
|
+
throw new Error("Invalid src in IframeOption");
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// src: use childFrames()
|
|
120
|
+
const childFrames = parentFrame.childFrames();
|
|
121
|
+
for (const childFrame of childFrames) {
|
|
122
|
+
const url = childFrame.url();
|
|
123
|
+
if (typeof src === "string") {
|
|
124
|
+
// src: string
|
|
125
|
+
if (url.startsWith(src)) {
|
|
126
|
+
return childFrame;
|
|
127
|
+
} else if (url.toLowerCase().startsWith(src)) {
|
|
128
|
+
return childFrame;
|
|
129
|
+
}
|
|
130
|
+
} else {
|
|
131
|
+
// src: RegExp
|
|
132
|
+
if (url.match(src)) {
|
|
133
|
+
return childFrame;
|
|
134
|
+
}
|
|
128
135
|
}
|
|
129
136
|
}
|
|
137
|
+
|
|
138
|
+
return null;
|
|
130
139
|
}
|
|
131
|
-
|
|
140
|
+
*/
|
|
141
|
+
#getIframeSelector(iframeOption) {
|
|
142
|
+
const { src = "", selector = "" } = iframeOption;
|
|
143
|
+
if (!src && !selector) {
|
|
144
|
+
throw new Error("Invalid parent frame");
|
|
145
|
+
}
|
|
146
|
+
return selector ? selector : `iframe[src^="${src}"]`;
|
|
132
147
|
}
|
|
133
|
-
async #
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
148
|
+
async #getChildFrameLocator(parent2, iframeOption) {
|
|
149
|
+
return parent2.frameLocator(this.#getIframeSelector(iframeOption));
|
|
150
|
+
}
|
|
151
|
+
async #getDescendantFrame(parent2, iframeOptions) {
|
|
152
|
+
try {
|
|
153
|
+
if (iframeOptions.length <= 0) {
|
|
137
154
|
return null;
|
|
138
155
|
}
|
|
139
|
-
|
|
156
|
+
let frameLocator = parent2.frameLocator(this.#getIframeSelector(iframeOptions[0]));
|
|
157
|
+
for (const iframeOption of iframeOptions.slice(1)) {
|
|
158
|
+
if (!frameLocator) {
|
|
159
|
+
return null;
|
|
160
|
+
}
|
|
161
|
+
frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
|
|
162
|
+
}
|
|
163
|
+
return frameLocator;
|
|
164
|
+
} catch (err) {
|
|
165
|
+
throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
|
|
140
166
|
}
|
|
141
|
-
return iframe;
|
|
142
167
|
}
|
|
143
168
|
async #findElementHandles(selector, absolute = false, iframeOptions = []) {
|
|
144
169
|
let parent2 = absolute ? this.#frame : this.#locator;
|
|
@@ -166,7 +191,7 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
166
191
|
retObj.locators = locators;
|
|
167
192
|
return retObj;
|
|
168
193
|
} catch (err) {
|
|
169
|
-
|
|
194
|
+
(0, import_utils.loginfo)(err);
|
|
170
195
|
return retObj;
|
|
171
196
|
}
|
|
172
197
|
}
|
|
@@ -423,41 +448,66 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
423
448
|
});
|
|
424
449
|
return true;
|
|
425
450
|
}
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
451
|
+
/*
|
|
452
|
+
// 如果不存在指定的子iframe,则返回null
|
|
453
|
+
async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
|
|
454
|
+
if (!parentFrame) {
|
|
455
|
+
throw new Error("Invalid parent frame");
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
let { src = "" } = iframeOption;
|
|
459
|
+
if (!src) {
|
|
460
|
+
throw new Error("Invalid src in IframeOption");
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// src: use childFrames()
|
|
464
|
+
const childFrames = parentFrame.childFrames();
|
|
465
|
+
for (const childFrame of childFrames) {
|
|
466
|
+
const url = childFrame.url();
|
|
467
|
+
if (typeof src === "string") {
|
|
468
|
+
// src: string
|
|
469
|
+
if (url.startsWith(src)) {
|
|
470
|
+
return childFrame;
|
|
471
|
+
} else if (url.toLowerCase().startsWith(src)) {
|
|
472
|
+
return childFrame;
|
|
473
|
+
}
|
|
474
|
+
} else {
|
|
475
|
+
// src: RegExp
|
|
476
|
+
if (url.match(src)) {
|
|
477
|
+
return childFrame;
|
|
478
|
+
}
|
|
447
479
|
}
|
|
448
480
|
}
|
|
481
|
+
|
|
482
|
+
return null;
|
|
449
483
|
}
|
|
450
|
-
|
|
484
|
+
*/
|
|
485
|
+
#getIframeSelector(iframeOption) {
|
|
486
|
+
const { src = "", selector = "" } = iframeOption;
|
|
487
|
+
if (!src && !selector) {
|
|
488
|
+
throw new Error("Invalid parent frame");
|
|
489
|
+
}
|
|
490
|
+
return selector ? selector : `iframe[src^="${src}"]`;
|
|
451
491
|
}
|
|
452
|
-
async #
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
492
|
+
async #getChildFrameLocator(parent2, iframeOption) {
|
|
493
|
+
return parent2.frameLocator(this.#getIframeSelector(iframeOption));
|
|
494
|
+
}
|
|
495
|
+
async #getDescendantFrameLocator(mainFrame, iframeOptions) {
|
|
496
|
+
try {
|
|
497
|
+
if (iframeOptions.length <= 0) {
|
|
456
498
|
return null;
|
|
457
499
|
}
|
|
458
|
-
|
|
500
|
+
let frameLocator = mainFrame.frameLocator(this.#getIframeSelector(iframeOptions[0]));
|
|
501
|
+
for (const iframeOption of iframeOptions.slice(1)) {
|
|
502
|
+
if (!frameLocator) {
|
|
503
|
+
return null;
|
|
504
|
+
}
|
|
505
|
+
frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
|
|
506
|
+
}
|
|
507
|
+
return frameLocator;
|
|
508
|
+
} catch (err) {
|
|
509
|
+
throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
|
|
459
510
|
}
|
|
460
|
-
return iframe;
|
|
461
511
|
}
|
|
462
512
|
async #findElementHandles(selector, iframeOptions = []) {
|
|
463
513
|
if (!this.#page) {
|
|
@@ -466,7 +516,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
466
516
|
let frame = this.#page.mainFrame();
|
|
467
517
|
const retObj = { frame, locators: [] };
|
|
468
518
|
if (iframeOptions.length > 0) {
|
|
469
|
-
frame = await this.#
|
|
519
|
+
frame = await this.#getDescendantFrameLocator(frame, iframeOptions);
|
|
470
520
|
if (!frame) {
|
|
471
521
|
return retObj;
|
|
472
522
|
}
|
|
@@ -486,7 +536,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
486
536
|
retObj.locators = locators;
|
|
487
537
|
return retObj;
|
|
488
538
|
} catch (err) {
|
|
489
|
-
|
|
539
|
+
(0, import_utils2.loginfo)(err);
|
|
490
540
|
return retObj;
|
|
491
541
|
}
|
|
492
542
|
}
|
|
@@ -625,9 +675,9 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
625
675
|
}
|
|
626
676
|
let content = "";
|
|
627
677
|
if (iframeOptions.length > 0) {
|
|
628
|
-
const
|
|
629
|
-
if (
|
|
630
|
-
content = await
|
|
678
|
+
const frameLocator = await this.#getDescendantFrameLocator(this.#page.mainFrame(), iframeOptions);
|
|
679
|
+
if (frameLocator) {
|
|
680
|
+
content = await frameLocator.locator(":root").evaluate(() => document.documentElement.outerHTML);
|
|
631
681
|
}
|
|
632
682
|
} else {
|
|
633
683
|
content = await this.#page.content();
|
|
@@ -990,7 +1040,8 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
990
1040
|
(0, import_utils2.loginfo)(`##browser cache matched response: ${requestUrl}`);
|
|
991
1041
|
}
|
|
992
1042
|
if (typeof handler === "function") {
|
|
993
|
-
|
|
1043
|
+
const pageData = { pageUrl, cookies: "" };
|
|
1044
|
+
await handler(response, handlerOptions, pageData);
|
|
994
1045
|
}
|
|
995
1046
|
}
|
|
996
1047
|
return;
|
|
@@ -2473,7 +2524,14 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2473
2524
|
});
|
|
2474
2525
|
}
|
|
2475
2526
|
if (typeof handler === "function") {
|
|
2476
|
-
|
|
2527
|
+
const pageData = { pageUrl, cookies: "" };
|
|
2528
|
+
if (handlerOptions?.requestHeadersFlag) {
|
|
2529
|
+
const cookies = (await this.#page.cookies()).map((cookie) => {
|
|
2530
|
+
return `${cookie.name}=${cookie.value}`;
|
|
2531
|
+
}).join("; ");
|
|
2532
|
+
pageData.cookies = cookies;
|
|
2533
|
+
}
|
|
2534
|
+
await handler(response, handlerOptions, pageData);
|
|
2477
2535
|
}
|
|
2478
2536
|
}
|
|
2479
2537
|
return true;
|
package/dist/index.d.cts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import EventEmitter from 'node:events';
|
|
2
2
|
import { Browser as Browser$1, BrowserContext as BrowserContext$1, Frame as Frame$1, Page as Page$1, HTTPResponse, PuppeteerNode, ElementHandle } from 'puppeteer';
|
|
3
|
-
import { Browser, BrowserContext, Frame, Page, Response, APIRequestContext, BrowserType, Locator } from 'playwright';
|
|
3
|
+
import { Browser, BrowserContext, Frame, Page, Response, APIRequestContext, BrowserType, Locator, FrameLocator } from 'playwright';
|
|
4
4
|
import * as cheerio from 'cheerio';
|
|
5
5
|
|
|
6
6
|
/**
|
|
@@ -479,7 +479,11 @@ interface ResponseMatch {
|
|
|
479
479
|
maxLength?: number;
|
|
480
480
|
}
|
|
481
481
|
type ResponseHandlerOptions = Record<string, any>;
|
|
482
|
-
|
|
482
|
+
interface ResponsePageData {
|
|
483
|
+
pageUrl: string;
|
|
484
|
+
cookies: string;
|
|
485
|
+
}
|
|
486
|
+
type ResponseHandler = (response: AllResponse, options: ResponseHandlerOptions, pageData: ResponsePageData) => Promise<void> | void;
|
|
483
487
|
interface ResponseInterceptionItem {
|
|
484
488
|
/**
|
|
485
489
|
* page.url()
|
|
@@ -1088,7 +1092,7 @@ declare class PlaywrightPage extends EventEmitter implements LsdPage {
|
|
|
1088
1092
|
|
|
1089
1093
|
declare class PlaywrightElement implements LsdElement {
|
|
1090
1094
|
#private;
|
|
1091
|
-
constructor(locator: Locator, frame: Frame);
|
|
1095
|
+
constructor(locator: Locator, frame: Frame | FrameLocator);
|
|
1092
1096
|
attribute(attributeName: string): Promise<string>;
|
|
1093
1097
|
attributeNames(): Promise<string[]>;
|
|
1094
1098
|
findElement(selectorOrXpath: string | string[], iframeOptions?: IframeOption[], absolute?: boolean): Promise<LsdElement | null>;
|
|
@@ -1306,4 +1310,4 @@ declare class LsdBrowserController implements LsdBrowserController$1 {
|
|
|
1306
1310
|
}
|
|
1307
1311
|
declare const controller: LsdBrowserController;
|
|
1308
1312
|
|
|
1309
|
-
export { type AllApiRequestContext, type AllBrowser, type AllBrowserContext, type AllFrame, type AllPage, type AllResponse, type BrowserControllerOptions, type BrowserControllerType, type BrowserCreationMethod, type BrowserManager, CheerioElement, type CheerioNode, CheerioPage, type CookieItem, type GotoOptions, type IframeOption, type InputOptions, type KeyInput, type KeyPressOptions, type LocalStorageItem, type LocalStorageOrigin, type LowerCasePaperFormat, type LsdBrowser, type LsdBrowserContext, type LsdBrowserContextOptions, type LsdBrowserController$1 as LsdBrowserController, type LsdBrowserType, type LsdConnectOptions, type LsdElement, type LsdLaunchOptions, type LsdPage, type MouseClickOptions, type MouseClickType, type NavigationWaitUntil, type PDFMargin, type PDFOptions, type PageEvent, type PageExtInPlaywright, type PageExtInPuppeteer, type PageInfo, type PageOpenType, type PageStatus, type PaperFormat, PlaywrightBrowser, PlaywrightBrowserContext, type PlaywrightBrowserTypes, PlaywrightElement, PlaywrightPage, type Proxy, PuppeteerBrowser, PuppeteerBrowserContext, PuppeteerElement, PuppeteerPage, type RequestInterceptionAction, type RequestInterceptionOption, type RequestMatch, type RequestMethod, type RequestResourceType, type ResponseHandler, type ResponseHandlerOptions, type ResponseInterceptionItem, type ResponseInterceptionOption, type ResponseMatch, type ScreenshotOptions, type SelectOptions, type StateData, type UpdatablePageInfo, type ViewportSize, type WaitElementOptions, type WaitElementState, type WaitNavigationOptions, controller, defaultProxy };
|
|
1313
|
+
export { type AllApiRequestContext, type AllBrowser, type AllBrowserContext, type AllFrame, type AllPage, type AllResponse, type BrowserControllerOptions, type BrowserControllerType, type BrowserCreationMethod, type BrowserManager, CheerioElement, type CheerioNode, CheerioPage, type CookieItem, type GotoOptions, type IframeOption, type InputOptions, type KeyInput, type KeyPressOptions, type LocalStorageItem, type LocalStorageOrigin, type LowerCasePaperFormat, type LsdBrowser, type LsdBrowserContext, type LsdBrowserContextOptions, type LsdBrowserController$1 as LsdBrowserController, type LsdBrowserType, type LsdConnectOptions, type LsdElement, type LsdLaunchOptions, type LsdPage, type MouseClickOptions, type MouseClickType, type NavigationWaitUntil, type PDFMargin, type PDFOptions, type PageEvent, type PageExtInPlaywright, type PageExtInPuppeteer, type PageInfo, type PageOpenType, type PageStatus, type PaperFormat, PlaywrightBrowser, PlaywrightBrowserContext, type PlaywrightBrowserTypes, PlaywrightElement, PlaywrightPage, type Proxy, PuppeteerBrowser, PuppeteerBrowserContext, PuppeteerElement, PuppeteerPage, type RequestInterceptionAction, type RequestInterceptionOption, type RequestMatch, type RequestMethod, type RequestResourceType, type ResponseHandler, type ResponseHandlerOptions, type ResponseInterceptionItem, type ResponseInterceptionOption, type ResponseMatch, type ResponsePageData, type ScreenshotOptions, type SelectOptions, type StateData, type UpdatablePageInfo, type ViewportSize, type WaitElementOptions, type WaitElementState, type WaitNavigationOptions, controller, defaultProxy };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import EventEmitter from 'node:events';
|
|
2
2
|
import { Browser as Browser$1, BrowserContext as BrowserContext$1, Frame as Frame$1, Page as Page$1, HTTPResponse, PuppeteerNode, ElementHandle } from 'puppeteer';
|
|
3
|
-
import { Browser, BrowserContext, Frame, Page, Response, APIRequestContext, BrowserType, Locator } from 'playwright';
|
|
3
|
+
import { Browser, BrowserContext, Frame, Page, Response, APIRequestContext, BrowserType, Locator, FrameLocator } from 'playwright';
|
|
4
4
|
import * as cheerio from 'cheerio';
|
|
5
5
|
|
|
6
6
|
/**
|
|
@@ -479,7 +479,11 @@ interface ResponseMatch {
|
|
|
479
479
|
maxLength?: number;
|
|
480
480
|
}
|
|
481
481
|
type ResponseHandlerOptions = Record<string, any>;
|
|
482
|
-
|
|
482
|
+
interface ResponsePageData {
|
|
483
|
+
pageUrl: string;
|
|
484
|
+
cookies: string;
|
|
485
|
+
}
|
|
486
|
+
type ResponseHandler = (response: AllResponse, options: ResponseHandlerOptions, pageData: ResponsePageData) => Promise<void> | void;
|
|
483
487
|
interface ResponseInterceptionItem {
|
|
484
488
|
/**
|
|
485
489
|
* page.url()
|
|
@@ -1088,7 +1092,7 @@ declare class PlaywrightPage extends EventEmitter implements LsdPage {
|
|
|
1088
1092
|
|
|
1089
1093
|
declare class PlaywrightElement implements LsdElement {
|
|
1090
1094
|
#private;
|
|
1091
|
-
constructor(locator: Locator, frame: Frame);
|
|
1095
|
+
constructor(locator: Locator, frame: Frame | FrameLocator);
|
|
1092
1096
|
attribute(attributeName: string): Promise<string>;
|
|
1093
1097
|
attributeNames(): Promise<string[]>;
|
|
1094
1098
|
findElement(selectorOrXpath: string | string[], iframeOptions?: IframeOption[], absolute?: boolean): Promise<LsdElement | null>;
|
|
@@ -1306,4 +1310,4 @@ declare class LsdBrowserController implements LsdBrowserController$1 {
|
|
|
1306
1310
|
}
|
|
1307
1311
|
declare const controller: LsdBrowserController;
|
|
1308
1312
|
|
|
1309
|
-
export { type AllApiRequestContext, type AllBrowser, type AllBrowserContext, type AllFrame, type AllPage, type AllResponse, type BrowserControllerOptions, type BrowserControllerType, type BrowserCreationMethod, type BrowserManager, CheerioElement, type CheerioNode, CheerioPage, type CookieItem, type GotoOptions, type IframeOption, type InputOptions, type KeyInput, type KeyPressOptions, type LocalStorageItem, type LocalStorageOrigin, type LowerCasePaperFormat, type LsdBrowser, type LsdBrowserContext, type LsdBrowserContextOptions, type LsdBrowserController$1 as LsdBrowserController, type LsdBrowserType, type LsdConnectOptions, type LsdElement, type LsdLaunchOptions, type LsdPage, type MouseClickOptions, type MouseClickType, type NavigationWaitUntil, type PDFMargin, type PDFOptions, type PageEvent, type PageExtInPlaywright, type PageExtInPuppeteer, type PageInfo, type PageOpenType, type PageStatus, type PaperFormat, PlaywrightBrowser, PlaywrightBrowserContext, type PlaywrightBrowserTypes, PlaywrightElement, PlaywrightPage, type Proxy, PuppeteerBrowser, PuppeteerBrowserContext, PuppeteerElement, PuppeteerPage, type RequestInterceptionAction, type RequestInterceptionOption, type RequestMatch, type RequestMethod, type RequestResourceType, type ResponseHandler, type ResponseHandlerOptions, type ResponseInterceptionItem, type ResponseInterceptionOption, type ResponseMatch, type ScreenshotOptions, type SelectOptions, type StateData, type UpdatablePageInfo, type ViewportSize, type WaitElementOptions, type WaitElementState, type WaitNavigationOptions, controller, defaultProxy };
|
|
1313
|
+
export { type AllApiRequestContext, type AllBrowser, type AllBrowserContext, type AllFrame, type AllPage, type AllResponse, type BrowserControllerOptions, type BrowserControllerType, type BrowserCreationMethod, type BrowserManager, CheerioElement, type CheerioNode, CheerioPage, type CookieItem, type GotoOptions, type IframeOption, type InputOptions, type KeyInput, type KeyPressOptions, type LocalStorageItem, type LocalStorageOrigin, type LowerCasePaperFormat, type LsdBrowser, type LsdBrowserContext, type LsdBrowserContextOptions, type LsdBrowserController$1 as LsdBrowserController, type LsdBrowserType, type LsdConnectOptions, type LsdElement, type LsdLaunchOptions, type LsdPage, type MouseClickOptions, type MouseClickType, type NavigationWaitUntil, type PDFMargin, type PDFOptions, type PageEvent, type PageExtInPlaywright, type PageExtInPuppeteer, type PageInfo, type PageOpenType, type PageStatus, type PaperFormat, PlaywrightBrowser, PlaywrightBrowserContext, type PlaywrightBrowserTypes, PlaywrightElement, PlaywrightPage, type Proxy, PuppeteerBrowser, PuppeteerBrowserContext, PuppeteerElement, PuppeteerPage, type RequestInterceptionAction, type RequestInterceptionOption, type RequestMatch, type RequestMethod, type RequestResourceType, type ResponseHandler, type ResponseHandlerOptions, type ResponseInterceptionItem, type ResponseInterceptionOption, type ResponseMatch, type ResponsePageData, type ScreenshotOptions, type SelectOptions, type StateData, type UpdatablePageInfo, type ViewportSize, type WaitElementOptions, type WaitElementState, type WaitNavigationOptions, controller, defaultProxy };
|
package/dist/index.js
CHANGED
|
@@ -58,19 +58,19 @@ import EventEmitter3 from "events";
|
|
|
58
58
|
|
|
59
59
|
// src/playwright/context.ts
|
|
60
60
|
import EventEmitter2 from "events";
|
|
61
|
-
import { getCurrentUnixTime as getCurrentUnixTime2, logerr as logerr2, loginfo as
|
|
61
|
+
import { getCurrentUnixTime as getCurrentUnixTime2, logerr as logerr2, loginfo as loginfo3, logwarn as logwarn2, sleep } from "@letsscrapedata/utils";
|
|
62
62
|
|
|
63
63
|
// src/playwright/page.ts
|
|
64
64
|
import EventEmitter from "events";
|
|
65
|
-
import { getCurrentUnixTime, logerr, loginfo, logwarn, unreachable as unreachable2 } from "@letsscrapedata/utils";
|
|
65
|
+
import { getCurrentUnixTime, logerr, loginfo as loginfo2, logwarn, unreachable as unreachable2 } from "@letsscrapedata/utils";
|
|
66
66
|
|
|
67
67
|
// src/playwright/element.ts
|
|
68
|
-
import { unreachable } from "@letsscrapedata/utils";
|
|
68
|
+
import { loginfo, unreachable } from "@letsscrapedata/utils";
|
|
69
69
|
var PlaywrightElement = class _PlaywrightElement {
|
|
70
70
|
#frame;
|
|
71
71
|
#locator;
|
|
72
72
|
constructor(locator, frame) {
|
|
73
|
-
if (!frame.
|
|
73
|
+
if (!frame.locator || !locator.click) {
|
|
74
74
|
throw new Error("Invalid paras in new PlaywrightElement");
|
|
75
75
|
}
|
|
76
76
|
this.#frame = frame;
|
|
@@ -84,41 +84,66 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
84
84
|
const names = await this.#locator.evaluate((node) => node.getAttributeNames());
|
|
85
85
|
return names;
|
|
86
86
|
}
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
87
|
+
/*
|
|
88
|
+
// 如果不存在指定的子iframe,则返回null
|
|
89
|
+
async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
|
|
90
|
+
if (!parentFrame) {
|
|
91
|
+
throw new Error("Invalid parent frame");
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
let { src = "" } = iframeOption;
|
|
95
|
+
if (!src) {
|
|
96
|
+
throw new Error("Invalid src in IframeOption");
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// src: use childFrames()
|
|
100
|
+
const childFrames = parentFrame.childFrames();
|
|
101
|
+
for (const childFrame of childFrames) {
|
|
102
|
+
const url = childFrame.url();
|
|
103
|
+
if (typeof src === "string") {
|
|
104
|
+
// src: string
|
|
105
|
+
if (url.startsWith(src)) {
|
|
106
|
+
return childFrame;
|
|
107
|
+
} else if (url.toLowerCase().startsWith(src)) {
|
|
108
|
+
return childFrame;
|
|
109
|
+
}
|
|
110
|
+
} else {
|
|
111
|
+
// src: RegExp
|
|
112
|
+
if (url.match(src)) {
|
|
113
|
+
return childFrame;
|
|
114
|
+
}
|
|
108
115
|
}
|
|
109
116
|
}
|
|
117
|
+
|
|
118
|
+
return null;
|
|
110
119
|
}
|
|
111
|
-
|
|
120
|
+
*/
|
|
121
|
+
#getIframeSelector(iframeOption) {
|
|
122
|
+
const { src = "", selector = "" } = iframeOption;
|
|
123
|
+
if (!src && !selector) {
|
|
124
|
+
throw new Error("Invalid parent frame");
|
|
125
|
+
}
|
|
126
|
+
return selector ? selector : `iframe[src^="${src}"]`;
|
|
112
127
|
}
|
|
113
|
-
async #
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
128
|
+
async #getChildFrameLocator(parent2, iframeOption) {
|
|
129
|
+
return parent2.frameLocator(this.#getIframeSelector(iframeOption));
|
|
130
|
+
}
|
|
131
|
+
async #getDescendantFrame(parent2, iframeOptions) {
|
|
132
|
+
try {
|
|
133
|
+
if (iframeOptions.length <= 0) {
|
|
117
134
|
return null;
|
|
118
135
|
}
|
|
119
|
-
|
|
136
|
+
let frameLocator = parent2.frameLocator(this.#getIframeSelector(iframeOptions[0]));
|
|
137
|
+
for (const iframeOption of iframeOptions.slice(1)) {
|
|
138
|
+
if (!frameLocator) {
|
|
139
|
+
return null;
|
|
140
|
+
}
|
|
141
|
+
frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
|
|
142
|
+
}
|
|
143
|
+
return frameLocator;
|
|
144
|
+
} catch (err) {
|
|
145
|
+
throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
|
|
120
146
|
}
|
|
121
|
-
return iframe;
|
|
122
147
|
}
|
|
123
148
|
async #findElementHandles(selector, absolute = false, iframeOptions = []) {
|
|
124
149
|
let parent2 = absolute ? this.#frame : this.#locator;
|
|
@@ -146,7 +171,7 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
146
171
|
retObj.locators = locators;
|
|
147
172
|
return retObj;
|
|
148
173
|
} catch (err) {
|
|
149
|
-
|
|
174
|
+
loginfo(err);
|
|
150
175
|
return retObj;
|
|
151
176
|
}
|
|
152
177
|
}
|
|
@@ -403,41 +428,66 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
403
428
|
});
|
|
404
429
|
return true;
|
|
405
430
|
}
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
431
|
+
/*
|
|
432
|
+
// 如果不存在指定的子iframe,则返回null
|
|
433
|
+
async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
|
|
434
|
+
if (!parentFrame) {
|
|
435
|
+
throw new Error("Invalid parent frame");
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
let { src = "" } = iframeOption;
|
|
439
|
+
if (!src) {
|
|
440
|
+
throw new Error("Invalid src in IframeOption");
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
// src: use childFrames()
|
|
444
|
+
const childFrames = parentFrame.childFrames();
|
|
445
|
+
for (const childFrame of childFrames) {
|
|
446
|
+
const url = childFrame.url();
|
|
447
|
+
if (typeof src === "string") {
|
|
448
|
+
// src: string
|
|
449
|
+
if (url.startsWith(src)) {
|
|
450
|
+
return childFrame;
|
|
451
|
+
} else if (url.toLowerCase().startsWith(src)) {
|
|
452
|
+
return childFrame;
|
|
453
|
+
}
|
|
454
|
+
} else {
|
|
455
|
+
// src: RegExp
|
|
456
|
+
if (url.match(src)) {
|
|
457
|
+
return childFrame;
|
|
458
|
+
}
|
|
427
459
|
}
|
|
428
460
|
}
|
|
461
|
+
|
|
462
|
+
return null;
|
|
429
463
|
}
|
|
430
|
-
|
|
464
|
+
*/
|
|
465
|
+
#getIframeSelector(iframeOption) {
|
|
466
|
+
const { src = "", selector = "" } = iframeOption;
|
|
467
|
+
if (!src && !selector) {
|
|
468
|
+
throw new Error("Invalid parent frame");
|
|
469
|
+
}
|
|
470
|
+
return selector ? selector : `iframe[src^="${src}"]`;
|
|
431
471
|
}
|
|
432
|
-
async #
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
472
|
+
async #getChildFrameLocator(parent2, iframeOption) {
|
|
473
|
+
return parent2.frameLocator(this.#getIframeSelector(iframeOption));
|
|
474
|
+
}
|
|
475
|
+
async #getDescendantFrameLocator(mainFrame, iframeOptions) {
|
|
476
|
+
try {
|
|
477
|
+
if (iframeOptions.length <= 0) {
|
|
436
478
|
return null;
|
|
437
479
|
}
|
|
438
|
-
|
|
480
|
+
let frameLocator = mainFrame.frameLocator(this.#getIframeSelector(iframeOptions[0]));
|
|
481
|
+
for (const iframeOption of iframeOptions.slice(1)) {
|
|
482
|
+
if (!frameLocator) {
|
|
483
|
+
return null;
|
|
484
|
+
}
|
|
485
|
+
frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
|
|
486
|
+
}
|
|
487
|
+
return frameLocator;
|
|
488
|
+
} catch (err) {
|
|
489
|
+
throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
|
|
439
490
|
}
|
|
440
|
-
return iframe;
|
|
441
491
|
}
|
|
442
492
|
async #findElementHandles(selector, iframeOptions = []) {
|
|
443
493
|
if (!this.#page) {
|
|
@@ -446,7 +496,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
446
496
|
let frame = this.#page.mainFrame();
|
|
447
497
|
const retObj = { frame, locators: [] };
|
|
448
498
|
if (iframeOptions.length > 0) {
|
|
449
|
-
frame = await this.#
|
|
499
|
+
frame = await this.#getDescendantFrameLocator(frame, iframeOptions);
|
|
450
500
|
if (!frame) {
|
|
451
501
|
return retObj;
|
|
452
502
|
}
|
|
@@ -466,7 +516,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
466
516
|
retObj.locators = locators;
|
|
467
517
|
return retObj;
|
|
468
518
|
} catch (err) {
|
|
469
|
-
|
|
519
|
+
loginfo2(err);
|
|
470
520
|
return retObj;
|
|
471
521
|
}
|
|
472
522
|
}
|
|
@@ -477,7 +527,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
477
527
|
const page = this.#page;
|
|
478
528
|
const pageId = this.#pageId;
|
|
479
529
|
page.on("close", async () => {
|
|
480
|
-
|
|
530
|
+
loginfo2(`##browser ${pageId} closed`);
|
|
481
531
|
if (!page.pageInfo) {
|
|
482
532
|
logerr(`Logic error in page.on("close")`);
|
|
483
533
|
}
|
|
@@ -500,7 +550,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
500
550
|
} else {
|
|
501
551
|
logerr(`##browser ${pageId} has popup without page.pageInfo`);
|
|
502
552
|
}
|
|
503
|
-
|
|
553
|
+
loginfo2(`##browser ${pageId} has popup ${popupPageId}`);
|
|
504
554
|
this.emit("pagePopup", evtData);
|
|
505
555
|
} else {
|
|
506
556
|
logerr(`##browser ${pageId} has popup page with null page`);
|
|
@@ -605,9 +655,9 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
605
655
|
}
|
|
606
656
|
let content = "";
|
|
607
657
|
if (iframeOptions.length > 0) {
|
|
608
|
-
const
|
|
609
|
-
if (
|
|
610
|
-
content = await
|
|
658
|
+
const frameLocator = await this.#getDescendantFrameLocator(this.#page.mainFrame(), iframeOptions);
|
|
659
|
+
if (frameLocator) {
|
|
660
|
+
content = await frameLocator.locator(":root").evaluate(() => document.documentElement.outerHTML);
|
|
611
661
|
}
|
|
612
662
|
} else {
|
|
613
663
|
content = await this.#page.content();
|
|
@@ -967,10 +1017,11 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
967
1017
|
requestData,
|
|
968
1018
|
responseData
|
|
969
1019
|
});
|
|
970
|
-
|
|
1020
|
+
loginfo2(`##browser cache matched response: ${requestUrl}`);
|
|
971
1021
|
}
|
|
972
1022
|
if (typeof handler === "function") {
|
|
973
|
-
|
|
1023
|
+
const pageData = { pageUrl, cookies: "" };
|
|
1024
|
+
await handler(response, handlerOptions, pageData);
|
|
974
1025
|
}
|
|
975
1026
|
}
|
|
976
1027
|
return;
|
|
@@ -1146,7 +1197,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1146
1197
|
await lsdPage.maximizeViewport();
|
|
1147
1198
|
}
|
|
1148
1199
|
this.#lsdPages.push(lsdPage);
|
|
1149
|
-
|
|
1200
|
+
loginfo3(`##browser ${lsdPage.id()} ${openType}ed`);
|
|
1150
1201
|
}
|
|
1151
1202
|
}
|
|
1152
1203
|
constructor(lsdBrowser, browserContext, incognito = false, proxy = null, browserIdx = 0, browserContextIdx = 0, maxPagesPerBrowserContext = 20, maxPageFreeSeconds = 0, maxViewportOfNewPage = true) {
|
|
@@ -1184,7 +1235,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1184
1235
|
await lsdPage.maximizeViewport();
|
|
1185
1236
|
}
|
|
1186
1237
|
this.#lsdPages.push(lsdPage);
|
|
1187
|
-
|
|
1238
|
+
loginfo3(`##browser ${lsdPage.id()} created`);
|
|
1188
1239
|
}
|
|
1189
1240
|
});
|
|
1190
1241
|
browserContext.on("close", (bc) => {
|
|
@@ -1368,7 +1419,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1368
1419
|
};
|
|
1369
1420
|
|
|
1370
1421
|
// src/playwright/browser.ts
|
|
1371
|
-
import { logerr as logerr3, loginfo as
|
|
1422
|
+
import { logerr as logerr3, loginfo as loginfo4, logwarn as logwarn3 } from "@letsscrapedata/utils";
|
|
1372
1423
|
var PlaywrightBrowser = class extends EventEmitter3 {
|
|
1373
1424
|
#browser;
|
|
1374
1425
|
#browserIdx;
|
|
@@ -1414,7 +1465,7 @@ var PlaywrightBrowser = class extends EventEmitter3 {
|
|
|
1414
1465
|
this.#executablePath = executablePath;
|
|
1415
1466
|
this.#nextBrowserContextIdx = 1;
|
|
1416
1467
|
this.#closeFreePagesIntervalId = null;
|
|
1417
|
-
|
|
1468
|
+
loginfo4(`##browser ${this.#browserType} ${this.id()} ${this.#browserCreationMethod}ed by ${this.#browserControllerType}`);
|
|
1418
1469
|
const browserContexts = browser.contexts();
|
|
1419
1470
|
if (browserContexts.length > 0) {
|
|
1420
1471
|
logwarn3(`There are ${browserContexts.length} new browserContexts when playwright launches new browser`);
|
|
@@ -1423,10 +1474,10 @@ var PlaywrightBrowser = class extends EventEmitter3 {
|
|
|
1423
1474
|
for (const browserContext of browserContexts) {
|
|
1424
1475
|
const lsdBrowserContext = new PlaywrightBrowserContext(this, browserContext, incognito, this.#proxy, this.#browserIdx++, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), maxViewportOfNewPage);
|
|
1425
1476
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
1426
|
-
|
|
1477
|
+
loginfo4(`##browser ${lsdBrowserContext.id()} ${this.#browserCreationMethod}ed`);
|
|
1427
1478
|
}
|
|
1428
1479
|
browser.on("disconnected", () => {
|
|
1429
|
-
|
|
1480
|
+
loginfo4(`##browser ${this.id()} disconnected`);
|
|
1430
1481
|
if (this.#lsdBrowserContexts.length > 0) {
|
|
1431
1482
|
logerr3(`${this.id()} has browserContexts when disconnected`);
|
|
1432
1483
|
}
|
|
@@ -1441,11 +1492,11 @@ var PlaywrightBrowser = class extends EventEmitter3 {
|
|
|
1441
1492
|
logerr3(`Invalid lsdBrowserContext in LsdBrowser.on("browserContextClose)`);
|
|
1442
1493
|
return;
|
|
1443
1494
|
}
|
|
1444
|
-
|
|
1495
|
+
loginfo4(`##browser ${lsdBrowserContext.id()} closed
|
|
1445
1496
|
`);
|
|
1446
1497
|
this.#lsdBrowserContexts.splice(idx, 1);
|
|
1447
1498
|
if (this.#lsdBrowserContexts.length === 0) {
|
|
1448
|
-
|
|
1499
|
+
loginfo4(`##browser ${this.id()} has no browserContexts now`);
|
|
1449
1500
|
}
|
|
1450
1501
|
return;
|
|
1451
1502
|
});
|
|
@@ -1482,7 +1533,7 @@ var PlaywrightBrowser = class extends EventEmitter3 {
|
|
|
1482
1533
|
const { maxViewportOfNewPage = this.#options.maxViewportOfNewPage } = options ? options : {};
|
|
1483
1534
|
const lsdBrowserContext = new PlaywrightBrowserContext(this, browserContext, true, proxy, this.#browserIdx++, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), maxViewportOfNewPage);
|
|
1484
1535
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
1485
|
-
|
|
1536
|
+
loginfo4(`##browser ${lsdBrowserContext.id()} created`);
|
|
1486
1537
|
return lsdBrowserContext;
|
|
1487
1538
|
}
|
|
1488
1539
|
async close() {
|
|
@@ -1803,7 +1854,7 @@ var PuppeteerElement = class _PuppeteerElement {
|
|
|
1803
1854
|
};
|
|
1804
1855
|
|
|
1805
1856
|
// src/puppeteer/page.ts
|
|
1806
|
-
import { getCurrentUnixTime as getCurrentUnixTime3, logerr as logerr5, loginfo as
|
|
1857
|
+
import { getCurrentUnixTime as getCurrentUnixTime3, logerr as logerr5, loginfo as loginfo5, unreachable as unreachable4 } from "@letsscrapedata/utils";
|
|
1807
1858
|
var PuppeteerPage = class extends EventEmitter4 {
|
|
1808
1859
|
#lsdBrowserContext;
|
|
1809
1860
|
#page;
|
|
@@ -1964,7 +2015,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
1964
2015
|
const page = this.#page;
|
|
1965
2016
|
const pageId = this.#pageId;
|
|
1966
2017
|
page.on("close", async () => {
|
|
1967
|
-
|
|
2018
|
+
loginfo5(`##browser ${pageId} closed`);
|
|
1968
2019
|
if (!page.pageInfo) {
|
|
1969
2020
|
logerr5(`Logic error in page.on("close")`);
|
|
1970
2021
|
}
|
|
@@ -1987,7 +2038,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
1987
2038
|
} else {
|
|
1988
2039
|
logerr5(`##browser ${pageId} has popup without page.pageInfo`);
|
|
1989
2040
|
}
|
|
1990
|
-
|
|
2041
|
+
loginfo5(`##browser ${pageId} has popup ${popupPageId}`);
|
|
1991
2042
|
this.emit("pagePopup", evtData);
|
|
1992
2043
|
} else {
|
|
1993
2044
|
logerr5(`##browser ${pageId} has popup page with null page`);
|
|
@@ -2453,7 +2504,14 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2453
2504
|
});
|
|
2454
2505
|
}
|
|
2455
2506
|
if (typeof handler === "function") {
|
|
2456
|
-
|
|
2507
|
+
const pageData = { pageUrl, cookies: "" };
|
|
2508
|
+
if (handlerOptions?.requestHeadersFlag) {
|
|
2509
|
+
const cookies = (await this.#page.cookies()).map((cookie) => {
|
|
2510
|
+
return `${cookie.name}=${cookie.value}`;
|
|
2511
|
+
}).join("; ");
|
|
2512
|
+
pageData.cookies = cookies;
|
|
2513
|
+
}
|
|
2514
|
+
await handler(response, handlerOptions, pageData);
|
|
2457
2515
|
}
|
|
2458
2516
|
}
|
|
2459
2517
|
return true;
|
|
@@ -2584,7 +2642,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2584
2642
|
};
|
|
2585
2643
|
|
|
2586
2644
|
// src/puppeteer/context.ts
|
|
2587
|
-
import { getCurrentUnixTime as getCurrentUnixTime4, logerr as logerr6, loginfo as
|
|
2645
|
+
import { getCurrentUnixTime as getCurrentUnixTime4, logerr as logerr6, loginfo as loginfo6, logwarn as logwarn4, sleep as sleep2 } from "@letsscrapedata/utils";
|
|
2588
2646
|
var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
2589
2647
|
#lsdBrowser;
|
|
2590
2648
|
#browserIdx;
|
|
@@ -2621,7 +2679,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2621
2679
|
await lsdPage.setUserAgent(this.#userAgent);
|
|
2622
2680
|
}
|
|
2623
2681
|
this.#lsdPages.push(lsdPage);
|
|
2624
|
-
|
|
2682
|
+
loginfo6(`##browser ${lsdPage.id()} ${openType}ed`);
|
|
2625
2683
|
}
|
|
2626
2684
|
}
|
|
2627
2685
|
constructor(lsdBrowser, browserContext, incognito = false, proxy = null, browserIdx = 0, browserContextIdx = 0, maxPagesPerBrowserContext = 20, maxPageFreeSeconds = 0, userAgent = "", maxViewportOfNewPage = true) {
|
|
@@ -2668,7 +2726,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2668
2726
|
await lsdPage.setUserAgent(this.#userAgent);
|
|
2669
2727
|
}
|
|
2670
2728
|
this.#lsdPages.push(lsdPage);
|
|
2671
|
-
|
|
2729
|
+
loginfo6(`##browser ${lsdPage.id()} created`);
|
|
2672
2730
|
}
|
|
2673
2731
|
}
|
|
2674
2732
|
});
|
|
@@ -2852,7 +2910,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2852
2910
|
};
|
|
2853
2911
|
|
|
2854
2912
|
// src/puppeteer/browser.ts
|
|
2855
|
-
import { logerr as logerr7, loginfo as
|
|
2913
|
+
import { logerr as logerr7, loginfo as loginfo7, logwarn as logwarn5 } from "@letsscrapedata/utils";
|
|
2856
2914
|
var PuppeteerBrowser = class extends EventEmitter6 {
|
|
2857
2915
|
#browser;
|
|
2858
2916
|
#browserIdx;
|
|
@@ -2901,16 +2959,16 @@ var PuppeteerBrowser = class extends EventEmitter6 {
|
|
|
2901
2959
|
this.#executablePath = executablePath;
|
|
2902
2960
|
this.#nextBrowserContextIdx = 1;
|
|
2903
2961
|
this.#closeFreePagesIntervalId = null;
|
|
2904
|
-
|
|
2962
|
+
loginfo7(`##browser ${this.#browserType} ${this.id()} ${this.#browserCreationMethod}ed by ${this.#browserControllerType}`);
|
|
2905
2963
|
const browserContexts = browser.browserContexts();
|
|
2906
2964
|
const incognito = typeof options?.incognito === "boolean" ? options.incognito : false;
|
|
2907
2965
|
for (const browserContext of browserContexts) {
|
|
2908
2966
|
const lsdBrowserContext = new PuppeteerBrowserContext(this, browserContext, incognito, this.#proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), this.#userAgent(), maxViewportOfNewPage);
|
|
2909
2967
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
2910
|
-
|
|
2968
|
+
loginfo7(`##browser ${lsdBrowserContext.id()} ${this.#browserCreationMethod}ed`);
|
|
2911
2969
|
}
|
|
2912
2970
|
browser.on("disconnected", () => {
|
|
2913
|
-
|
|
2971
|
+
loginfo7(`##browser ${this.id()} disconnected`);
|
|
2914
2972
|
if (this.#lsdBrowserContexts.length > 0) {
|
|
2915
2973
|
logerr7(`${this.id()} has browserContexts when disconnected`);
|
|
2916
2974
|
}
|
|
@@ -2925,11 +2983,11 @@ var PuppeteerBrowser = class extends EventEmitter6 {
|
|
|
2925
2983
|
logerr7(`Invalid lsdBrowserContext in LsdBrowser.on("browserContextClose)`);
|
|
2926
2984
|
return;
|
|
2927
2985
|
}
|
|
2928
|
-
|
|
2986
|
+
loginfo7(`##browser ${lsdBrowserContext.id()} closed
|
|
2929
2987
|
`);
|
|
2930
2988
|
this.#lsdBrowserContexts.splice(idx, 1);
|
|
2931
2989
|
if (this.#lsdBrowserContexts.length === 0) {
|
|
2932
|
-
|
|
2990
|
+
loginfo7(`##browser ${this.id()} has no browserContexts now`);
|
|
2933
2991
|
}
|
|
2934
2992
|
return;
|
|
2935
2993
|
});
|
|
@@ -2960,7 +3018,7 @@ var PuppeteerBrowser = class extends EventEmitter6 {
|
|
|
2960
3018
|
const { maxViewportOfNewPage = this.#options.maxViewportOfNewPage } = options ? options : {};
|
|
2961
3019
|
const lsdBrowserContext = new PuppeteerBrowserContext(this, browserContext, true, proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), userAgent, maxViewportOfNewPage);
|
|
2962
3020
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
2963
|
-
|
|
3021
|
+
loginfo7(`##browser ${lsdBrowserContext.id()} created`);
|
|
2964
3022
|
return lsdBrowserContext;
|
|
2965
3023
|
}
|
|
2966
3024
|
async close() {
|
package/package.json
CHANGED