@letsscrapedata/controller 0.0.30 → 0.0.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +133 -73
- package/dist/index.d.cts +8 -4
- package/dist/index.d.ts +8 -4
- package/dist/index.js +161 -101
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -90,7 +90,7 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
90
90
|
#frame;
|
|
91
91
|
#locator;
|
|
92
92
|
constructor(locator, frame) {
|
|
93
|
-
if (!frame.
|
|
93
|
+
if (!frame.locator || !locator.click) {
|
|
94
94
|
throw new Error("Invalid paras in new PlaywrightElement");
|
|
95
95
|
}
|
|
96
96
|
this.#frame = frame;
|
|
@@ -104,41 +104,66 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
104
104
|
const names = await this.#locator.evaluate((node) => node.getAttributeNames());
|
|
105
105
|
return names;
|
|
106
106
|
}
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
107
|
+
/*
|
|
108
|
+
// 如果不存在指定的子iframe,则返回null
|
|
109
|
+
async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
|
|
110
|
+
if (!parentFrame) {
|
|
111
|
+
throw new Error("Invalid parent frame");
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
let { src = "" } = iframeOption;
|
|
115
|
+
if (!src) {
|
|
116
|
+
throw new Error("Invalid src in IframeOption");
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// src: use childFrames()
|
|
120
|
+
const childFrames = parentFrame.childFrames();
|
|
121
|
+
for (const childFrame of childFrames) {
|
|
122
|
+
const url = childFrame.url();
|
|
123
|
+
if (typeof src === "string") {
|
|
124
|
+
// src: string
|
|
125
|
+
if (url.startsWith(src)) {
|
|
126
|
+
return childFrame;
|
|
127
|
+
} else if (url.toLowerCase().startsWith(src)) {
|
|
128
|
+
return childFrame;
|
|
129
|
+
}
|
|
130
|
+
} else {
|
|
131
|
+
// src: RegExp
|
|
132
|
+
if (url.match(src)) {
|
|
133
|
+
return childFrame;
|
|
134
|
+
}
|
|
128
135
|
}
|
|
129
136
|
}
|
|
137
|
+
|
|
138
|
+
return null;
|
|
130
139
|
}
|
|
131
|
-
|
|
140
|
+
*/
|
|
141
|
+
#getIframeSelector(iframeOption) {
|
|
142
|
+
const { src = "", selector = "" } = iframeOption;
|
|
143
|
+
if (!src && !selector) {
|
|
144
|
+
throw new Error("Invalid parent frame");
|
|
145
|
+
}
|
|
146
|
+
return selector ? selector : `iframe[src^="${src}"]`;
|
|
132
147
|
}
|
|
133
|
-
async #
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
148
|
+
async #getChildFrameLocator(parent2, iframeOption) {
|
|
149
|
+
return parent2.frameLocator(this.#getIframeSelector(iframeOption));
|
|
150
|
+
}
|
|
151
|
+
async #getDescendantFrame(parent2, iframeOptions) {
|
|
152
|
+
try {
|
|
153
|
+
if (iframeOptions.length <= 0) {
|
|
137
154
|
return null;
|
|
138
155
|
}
|
|
139
|
-
|
|
156
|
+
let frameLocator = parent2.frameLocator(this.#getIframeSelector(iframeOptions[0]));
|
|
157
|
+
for (const iframeOption of iframeOptions.slice(1)) {
|
|
158
|
+
if (!frameLocator) {
|
|
159
|
+
return null;
|
|
160
|
+
}
|
|
161
|
+
frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
|
|
162
|
+
}
|
|
163
|
+
return frameLocator;
|
|
164
|
+
} catch (err) {
|
|
165
|
+
throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
|
|
140
166
|
}
|
|
141
|
-
return iframe;
|
|
142
167
|
}
|
|
143
168
|
async #findElementHandles(selector, absolute = false, iframeOptions = []) {
|
|
144
169
|
let parent2 = absolute ? this.#frame : this.#locator;
|
|
@@ -154,8 +179,8 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
154
179
|
}
|
|
155
180
|
try {
|
|
156
181
|
let locators = [];
|
|
157
|
-
if (selector.startsWith("./") || selector.startsWith("/")) {
|
|
158
|
-
locators = await parent2.locator(selector).all();
|
|
182
|
+
if (selector.startsWith("./") || selector.startsWith("/") || selector.startsWith("..")) {
|
|
183
|
+
locators = await parent2.locator(`xpath=${selector}`).all();
|
|
159
184
|
} else {
|
|
160
185
|
if (selector !== ".") {
|
|
161
186
|
locators = await parent2.locator(selector).all();
|
|
@@ -166,7 +191,7 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
166
191
|
retObj.locators = locators;
|
|
167
192
|
return retObj;
|
|
168
193
|
} catch (err) {
|
|
169
|
-
|
|
194
|
+
(0, import_utils.loginfo)(err);
|
|
170
195
|
return retObj;
|
|
171
196
|
}
|
|
172
197
|
}
|
|
@@ -423,41 +448,66 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
423
448
|
});
|
|
424
449
|
return true;
|
|
425
450
|
}
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
451
|
+
/*
|
|
452
|
+
// 如果不存在指定的子iframe,则返回null
|
|
453
|
+
async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
|
|
454
|
+
if (!parentFrame) {
|
|
455
|
+
throw new Error("Invalid parent frame");
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
let { src = "" } = iframeOption;
|
|
459
|
+
if (!src) {
|
|
460
|
+
throw new Error("Invalid src in IframeOption");
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// src: use childFrames()
|
|
464
|
+
const childFrames = parentFrame.childFrames();
|
|
465
|
+
for (const childFrame of childFrames) {
|
|
466
|
+
const url = childFrame.url();
|
|
467
|
+
if (typeof src === "string") {
|
|
468
|
+
// src: string
|
|
469
|
+
if (url.startsWith(src)) {
|
|
470
|
+
return childFrame;
|
|
471
|
+
} else if (url.toLowerCase().startsWith(src)) {
|
|
472
|
+
return childFrame;
|
|
473
|
+
}
|
|
474
|
+
} else {
|
|
475
|
+
// src: RegExp
|
|
476
|
+
if (url.match(src)) {
|
|
477
|
+
return childFrame;
|
|
478
|
+
}
|
|
447
479
|
}
|
|
448
480
|
}
|
|
481
|
+
|
|
482
|
+
return null;
|
|
449
483
|
}
|
|
450
|
-
|
|
484
|
+
*/
|
|
485
|
+
#getIframeSelector(iframeOption) {
|
|
486
|
+
const { src = "", selector = "" } = iframeOption;
|
|
487
|
+
if (!src && !selector) {
|
|
488
|
+
throw new Error("Invalid parent frame");
|
|
489
|
+
}
|
|
490
|
+
return selector ? selector : `iframe[src^="${src}"]`;
|
|
451
491
|
}
|
|
452
|
-
async #
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
492
|
+
async #getChildFrameLocator(parent2, iframeOption) {
|
|
493
|
+
return parent2.frameLocator(this.#getIframeSelector(iframeOption));
|
|
494
|
+
}
|
|
495
|
+
async #getDescendantFrameLocator(mainFrame, iframeOptions) {
|
|
496
|
+
try {
|
|
497
|
+
if (iframeOptions.length <= 0) {
|
|
456
498
|
return null;
|
|
457
499
|
}
|
|
458
|
-
|
|
500
|
+
let frameLocator = mainFrame.frameLocator(this.#getIframeSelector(iframeOptions[0]));
|
|
501
|
+
for (const iframeOption of iframeOptions.slice(1)) {
|
|
502
|
+
if (!frameLocator) {
|
|
503
|
+
return null;
|
|
504
|
+
}
|
|
505
|
+
frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
|
|
506
|
+
}
|
|
507
|
+
return frameLocator;
|
|
508
|
+
} catch (err) {
|
|
509
|
+
throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
|
|
459
510
|
}
|
|
460
|
-
return iframe;
|
|
461
511
|
}
|
|
462
512
|
async #findElementHandles(selector, iframeOptions = []) {
|
|
463
513
|
if (!this.#page) {
|
|
@@ -466,7 +516,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
466
516
|
let frame = this.#page.mainFrame();
|
|
467
517
|
const retObj = { frame, locators: [] };
|
|
468
518
|
if (iframeOptions.length > 0) {
|
|
469
|
-
frame = await this.#
|
|
519
|
+
frame = await this.#getDescendantFrameLocator(frame, iframeOptions);
|
|
470
520
|
if (!frame) {
|
|
471
521
|
return retObj;
|
|
472
522
|
}
|
|
@@ -474,8 +524,8 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
474
524
|
}
|
|
475
525
|
try {
|
|
476
526
|
let locators = [];
|
|
477
|
-
if (selector.startsWith("./") || selector.startsWith("/")) {
|
|
478
|
-
locators = await frame.locator(selector).all();
|
|
527
|
+
if (selector.startsWith("./") || selector.startsWith("/") || selector.startsWith("..")) {
|
|
528
|
+
locators = await frame.locator(`xpath=${selector}`).all();
|
|
479
529
|
} else {
|
|
480
530
|
if (selector !== ".") {
|
|
481
531
|
locators = await frame.locator(selector).all();
|
|
@@ -486,7 +536,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
486
536
|
retObj.locators = locators;
|
|
487
537
|
return retObj;
|
|
488
538
|
} catch (err) {
|
|
489
|
-
|
|
539
|
+
(0, import_utils2.loginfo)(err);
|
|
490
540
|
return retObj;
|
|
491
541
|
}
|
|
492
542
|
}
|
|
@@ -625,9 +675,9 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
625
675
|
}
|
|
626
676
|
let content = "";
|
|
627
677
|
if (iframeOptions.length > 0) {
|
|
628
|
-
const
|
|
629
|
-
if (
|
|
630
|
-
content = await
|
|
678
|
+
const frameLocator = await this.#getDescendantFrameLocator(this.#page.mainFrame(), iframeOptions);
|
|
679
|
+
if (frameLocator) {
|
|
680
|
+
content = await frameLocator.locator(":root").evaluate(() => document.documentElement.outerHTML);
|
|
631
681
|
}
|
|
632
682
|
} else {
|
|
633
683
|
content = await this.#page.content();
|
|
@@ -990,7 +1040,8 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
990
1040
|
(0, import_utils2.loginfo)(`##browser cache matched response: ${requestUrl}`);
|
|
991
1041
|
}
|
|
992
1042
|
if (typeof handler === "function") {
|
|
993
|
-
|
|
1043
|
+
const pageData = { pageUrl, cookies: "" };
|
|
1044
|
+
await handler(response, handlerOptions, pageData);
|
|
994
1045
|
}
|
|
995
1046
|
}
|
|
996
1047
|
return;
|
|
@@ -1636,11 +1687,13 @@ var PuppeteerElement = class _PuppeteerElement {
|
|
|
1636
1687
|
return retObj;
|
|
1637
1688
|
}
|
|
1638
1689
|
retObj.frame = frame;
|
|
1690
|
+
absolute = true;
|
|
1639
1691
|
parent2 = frame;
|
|
1640
1692
|
}
|
|
1641
1693
|
try {
|
|
1642
|
-
if (selector.startsWith("./") || selector.startsWith("/")) {
|
|
1643
|
-
|
|
1694
|
+
if (selector.startsWith("./") || selector.startsWith("/") || selector.startsWith("..")) {
|
|
1695
|
+
const newSelector = !absolute && selector.startsWith("/") ? `.${selector}` : selector;
|
|
1696
|
+
retObj.elementHandles = await parent2.$$(`::-p-xpath(${newSelector})`);
|
|
1644
1697
|
} else {
|
|
1645
1698
|
if (selector !== ".") {
|
|
1646
1699
|
retObj.elementHandles = await parent2.$$(selector);
|
|
@@ -1962,8 +2015,8 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
1962
2015
|
retObj.frame = frame;
|
|
1963
2016
|
}
|
|
1964
2017
|
try {
|
|
1965
|
-
if (selector.startsWith("./") || selector.startsWith("/")) {
|
|
1966
|
-
retObj.elementHandles = await frame.$$(selector);
|
|
2018
|
+
if (selector.startsWith("./") || selector.startsWith("/") || selector.startsWith("..")) {
|
|
2019
|
+
retObj.elementHandles = await frame.$$(`::-p-xpath(${selector})`);
|
|
1967
2020
|
} else {
|
|
1968
2021
|
if (selector !== ".") {
|
|
1969
2022
|
retObj.elementHandles = await frame.$$(selector);
|
|
@@ -2473,7 +2526,14 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2473
2526
|
});
|
|
2474
2527
|
}
|
|
2475
2528
|
if (typeof handler === "function") {
|
|
2476
|
-
|
|
2529
|
+
const pageData = { pageUrl, cookies: "" };
|
|
2530
|
+
if (handlerOptions?.requestHeadersFlag) {
|
|
2531
|
+
const cookies = (await this.#page.cookies()).map((cookie) => {
|
|
2532
|
+
return `${cookie.name}=${cookie.value}`;
|
|
2533
|
+
}).join("; ");
|
|
2534
|
+
pageData.cookies = cookies;
|
|
2535
|
+
}
|
|
2536
|
+
await handler(response, handlerOptions, pageData);
|
|
2477
2537
|
}
|
|
2478
2538
|
}
|
|
2479
2539
|
return true;
|
package/dist/index.d.cts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import EventEmitter from 'node:events';
|
|
2
2
|
import { Browser as Browser$1, BrowserContext as BrowserContext$1, Frame as Frame$1, Page as Page$1, HTTPResponse, PuppeteerNode, ElementHandle } from 'puppeteer';
|
|
3
|
-
import { Browser, BrowserContext, Frame, Page, Response, APIRequestContext, BrowserType, Locator } from 'playwright';
|
|
3
|
+
import { Browser, BrowserContext, Frame, Page, Response, APIRequestContext, BrowserType, Locator, FrameLocator } from 'playwright';
|
|
4
4
|
import * as cheerio from 'cheerio';
|
|
5
5
|
|
|
6
6
|
/**
|
|
@@ -479,7 +479,11 @@ interface ResponseMatch {
|
|
|
479
479
|
maxLength?: number;
|
|
480
480
|
}
|
|
481
481
|
type ResponseHandlerOptions = Record<string, any>;
|
|
482
|
-
|
|
482
|
+
interface ResponsePageData {
|
|
483
|
+
pageUrl: string;
|
|
484
|
+
cookies: string;
|
|
485
|
+
}
|
|
486
|
+
type ResponseHandler = (response: AllResponse, options: ResponseHandlerOptions, pageData: ResponsePageData) => Promise<void> | void;
|
|
483
487
|
interface ResponseInterceptionItem {
|
|
484
488
|
/**
|
|
485
489
|
* page.url()
|
|
@@ -1088,7 +1092,7 @@ declare class PlaywrightPage extends EventEmitter implements LsdPage {
|
|
|
1088
1092
|
|
|
1089
1093
|
declare class PlaywrightElement implements LsdElement {
|
|
1090
1094
|
#private;
|
|
1091
|
-
constructor(locator: Locator, frame: Frame);
|
|
1095
|
+
constructor(locator: Locator, frame: Frame | FrameLocator);
|
|
1092
1096
|
attribute(attributeName: string): Promise<string>;
|
|
1093
1097
|
attributeNames(): Promise<string[]>;
|
|
1094
1098
|
findElement(selectorOrXpath: string | string[], iframeOptions?: IframeOption[], absolute?: boolean): Promise<LsdElement | null>;
|
|
@@ -1306,4 +1310,4 @@ declare class LsdBrowserController implements LsdBrowserController$1 {
|
|
|
1306
1310
|
}
|
|
1307
1311
|
declare const controller: LsdBrowserController;
|
|
1308
1312
|
|
|
1309
|
-
export { type AllApiRequestContext, type AllBrowser, type AllBrowserContext, type AllFrame, type AllPage, type AllResponse, type BrowserControllerOptions, type BrowserControllerType, type BrowserCreationMethod, type BrowserManager, CheerioElement, type CheerioNode, CheerioPage, type CookieItem, type GotoOptions, type IframeOption, type InputOptions, type KeyInput, type KeyPressOptions, type LocalStorageItem, type LocalStorageOrigin, type LowerCasePaperFormat, type LsdBrowser, type LsdBrowserContext, type LsdBrowserContextOptions, type LsdBrowserController$1 as LsdBrowserController, type LsdBrowserType, type LsdConnectOptions, type LsdElement, type LsdLaunchOptions, type LsdPage, type MouseClickOptions, type MouseClickType, type NavigationWaitUntil, type PDFMargin, type PDFOptions, type PageEvent, type PageExtInPlaywright, type PageExtInPuppeteer, type PageInfo, type PageOpenType, type PageStatus, type PaperFormat, PlaywrightBrowser, PlaywrightBrowserContext, type PlaywrightBrowserTypes, PlaywrightElement, PlaywrightPage, type Proxy, PuppeteerBrowser, PuppeteerBrowserContext, PuppeteerElement, PuppeteerPage, type RequestInterceptionAction, type RequestInterceptionOption, type RequestMatch, type RequestMethod, type RequestResourceType, type ResponseHandler, type ResponseHandlerOptions, type ResponseInterceptionItem, type ResponseInterceptionOption, type ResponseMatch, type ScreenshotOptions, type SelectOptions, type StateData, type UpdatablePageInfo, type ViewportSize, type WaitElementOptions, type WaitElementState, type WaitNavigationOptions, controller, defaultProxy };
|
|
1313
|
+
export { type AllApiRequestContext, type AllBrowser, type AllBrowserContext, type AllFrame, type AllPage, type AllResponse, type BrowserControllerOptions, type BrowserControllerType, type BrowserCreationMethod, type BrowserManager, CheerioElement, type CheerioNode, CheerioPage, type CookieItem, type GotoOptions, type IframeOption, type InputOptions, type KeyInput, type KeyPressOptions, type LocalStorageItem, type LocalStorageOrigin, type LowerCasePaperFormat, type LsdBrowser, type LsdBrowserContext, type LsdBrowserContextOptions, type LsdBrowserController$1 as LsdBrowserController, type LsdBrowserType, type LsdConnectOptions, type LsdElement, type LsdLaunchOptions, type LsdPage, type MouseClickOptions, type MouseClickType, type NavigationWaitUntil, type PDFMargin, type PDFOptions, type PageEvent, type PageExtInPlaywright, type PageExtInPuppeteer, type PageInfo, type PageOpenType, type PageStatus, type PaperFormat, PlaywrightBrowser, PlaywrightBrowserContext, type PlaywrightBrowserTypes, PlaywrightElement, PlaywrightPage, type Proxy, PuppeteerBrowser, PuppeteerBrowserContext, PuppeteerElement, PuppeteerPage, type RequestInterceptionAction, type RequestInterceptionOption, type RequestMatch, type RequestMethod, type RequestResourceType, type ResponseHandler, type ResponseHandlerOptions, type ResponseInterceptionItem, type ResponseInterceptionOption, type ResponseMatch, type ResponsePageData, type ScreenshotOptions, type SelectOptions, type StateData, type UpdatablePageInfo, type ViewportSize, type WaitElementOptions, type WaitElementState, type WaitNavigationOptions, controller, defaultProxy };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import EventEmitter from 'node:events';
|
|
2
2
|
import { Browser as Browser$1, BrowserContext as BrowserContext$1, Frame as Frame$1, Page as Page$1, HTTPResponse, PuppeteerNode, ElementHandle } from 'puppeteer';
|
|
3
|
-
import { Browser, BrowserContext, Frame, Page, Response, APIRequestContext, BrowserType, Locator } from 'playwright';
|
|
3
|
+
import { Browser, BrowserContext, Frame, Page, Response, APIRequestContext, BrowserType, Locator, FrameLocator } from 'playwright';
|
|
4
4
|
import * as cheerio from 'cheerio';
|
|
5
5
|
|
|
6
6
|
/**
|
|
@@ -479,7 +479,11 @@ interface ResponseMatch {
|
|
|
479
479
|
maxLength?: number;
|
|
480
480
|
}
|
|
481
481
|
type ResponseHandlerOptions = Record<string, any>;
|
|
482
|
-
|
|
482
|
+
interface ResponsePageData {
|
|
483
|
+
pageUrl: string;
|
|
484
|
+
cookies: string;
|
|
485
|
+
}
|
|
486
|
+
type ResponseHandler = (response: AllResponse, options: ResponseHandlerOptions, pageData: ResponsePageData) => Promise<void> | void;
|
|
483
487
|
interface ResponseInterceptionItem {
|
|
484
488
|
/**
|
|
485
489
|
* page.url()
|
|
@@ -1088,7 +1092,7 @@ declare class PlaywrightPage extends EventEmitter implements LsdPage {
|
|
|
1088
1092
|
|
|
1089
1093
|
declare class PlaywrightElement implements LsdElement {
|
|
1090
1094
|
#private;
|
|
1091
|
-
constructor(locator: Locator, frame: Frame);
|
|
1095
|
+
constructor(locator: Locator, frame: Frame | FrameLocator);
|
|
1092
1096
|
attribute(attributeName: string): Promise<string>;
|
|
1093
1097
|
attributeNames(): Promise<string[]>;
|
|
1094
1098
|
findElement(selectorOrXpath: string | string[], iframeOptions?: IframeOption[], absolute?: boolean): Promise<LsdElement | null>;
|
|
@@ -1306,4 +1310,4 @@ declare class LsdBrowserController implements LsdBrowserController$1 {
|
|
|
1306
1310
|
}
|
|
1307
1311
|
declare const controller: LsdBrowserController;
|
|
1308
1312
|
|
|
1309
|
-
export { type AllApiRequestContext, type AllBrowser, type AllBrowserContext, type AllFrame, type AllPage, type AllResponse, type BrowserControllerOptions, type BrowserControllerType, type BrowserCreationMethod, type BrowserManager, CheerioElement, type CheerioNode, CheerioPage, type CookieItem, type GotoOptions, type IframeOption, type InputOptions, type KeyInput, type KeyPressOptions, type LocalStorageItem, type LocalStorageOrigin, type LowerCasePaperFormat, type LsdBrowser, type LsdBrowserContext, type LsdBrowserContextOptions, type LsdBrowserController$1 as LsdBrowserController, type LsdBrowserType, type LsdConnectOptions, type LsdElement, type LsdLaunchOptions, type LsdPage, type MouseClickOptions, type MouseClickType, type NavigationWaitUntil, type PDFMargin, type PDFOptions, type PageEvent, type PageExtInPlaywright, type PageExtInPuppeteer, type PageInfo, type PageOpenType, type PageStatus, type PaperFormat, PlaywrightBrowser, PlaywrightBrowserContext, type PlaywrightBrowserTypes, PlaywrightElement, PlaywrightPage, type Proxy, PuppeteerBrowser, PuppeteerBrowserContext, PuppeteerElement, PuppeteerPage, type RequestInterceptionAction, type RequestInterceptionOption, type RequestMatch, type RequestMethod, type RequestResourceType, type ResponseHandler, type ResponseHandlerOptions, type ResponseInterceptionItem, type ResponseInterceptionOption, type ResponseMatch, type ScreenshotOptions, type SelectOptions, type StateData, type UpdatablePageInfo, type ViewportSize, type WaitElementOptions, type WaitElementState, type WaitNavigationOptions, controller, defaultProxy };
|
|
1313
|
+
export { type AllApiRequestContext, type AllBrowser, type AllBrowserContext, type AllFrame, type AllPage, type AllResponse, type BrowserControllerOptions, type BrowserControllerType, type BrowserCreationMethod, type BrowserManager, CheerioElement, type CheerioNode, CheerioPage, type CookieItem, type GotoOptions, type IframeOption, type InputOptions, type KeyInput, type KeyPressOptions, type LocalStorageItem, type LocalStorageOrigin, type LowerCasePaperFormat, type LsdBrowser, type LsdBrowserContext, type LsdBrowserContextOptions, type LsdBrowserController$1 as LsdBrowserController, type LsdBrowserType, type LsdConnectOptions, type LsdElement, type LsdLaunchOptions, type LsdPage, type MouseClickOptions, type MouseClickType, type NavigationWaitUntil, type PDFMargin, type PDFOptions, type PageEvent, type PageExtInPlaywright, type PageExtInPuppeteer, type PageInfo, type PageOpenType, type PageStatus, type PaperFormat, PlaywrightBrowser, PlaywrightBrowserContext, type PlaywrightBrowserTypes, PlaywrightElement, PlaywrightPage, type Proxy, PuppeteerBrowser, PuppeteerBrowserContext, PuppeteerElement, PuppeteerPage, type RequestInterceptionAction, type RequestInterceptionOption, type RequestMatch, type RequestMethod, type RequestResourceType, type ResponseHandler, type ResponseHandlerOptions, type ResponseInterceptionItem, type ResponseInterceptionOption, type ResponseMatch, type ResponsePageData, type ScreenshotOptions, type SelectOptions, type StateData, type UpdatablePageInfo, type ViewportSize, type WaitElementOptions, type WaitElementState, type WaitNavigationOptions, controller, defaultProxy };
|
package/dist/index.js
CHANGED
|
@@ -58,19 +58,19 @@ import EventEmitter3 from "events";
|
|
|
58
58
|
|
|
59
59
|
// src/playwright/context.ts
|
|
60
60
|
import EventEmitter2 from "events";
|
|
61
|
-
import { getCurrentUnixTime as getCurrentUnixTime2, logerr as logerr2, loginfo as
|
|
61
|
+
import { getCurrentUnixTime as getCurrentUnixTime2, logerr as logerr2, loginfo as loginfo3, logwarn as logwarn2, sleep } from "@letsscrapedata/utils";
|
|
62
62
|
|
|
63
63
|
// src/playwright/page.ts
|
|
64
64
|
import EventEmitter from "events";
|
|
65
|
-
import { getCurrentUnixTime, logerr, loginfo, logwarn, unreachable as unreachable2 } from "@letsscrapedata/utils";
|
|
65
|
+
import { getCurrentUnixTime, logerr, loginfo as loginfo2, logwarn, unreachable as unreachable2 } from "@letsscrapedata/utils";
|
|
66
66
|
|
|
67
67
|
// src/playwright/element.ts
|
|
68
|
-
import { unreachable } from "@letsscrapedata/utils";
|
|
68
|
+
import { loginfo, unreachable } from "@letsscrapedata/utils";
|
|
69
69
|
var PlaywrightElement = class _PlaywrightElement {
|
|
70
70
|
#frame;
|
|
71
71
|
#locator;
|
|
72
72
|
constructor(locator, frame) {
|
|
73
|
-
if (!frame.
|
|
73
|
+
if (!frame.locator || !locator.click) {
|
|
74
74
|
throw new Error("Invalid paras in new PlaywrightElement");
|
|
75
75
|
}
|
|
76
76
|
this.#frame = frame;
|
|
@@ -84,41 +84,66 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
84
84
|
const names = await this.#locator.evaluate((node) => node.getAttributeNames());
|
|
85
85
|
return names;
|
|
86
86
|
}
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
87
|
+
/*
|
|
88
|
+
// 如果不存在指定的子iframe,则返回null
|
|
89
|
+
async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
|
|
90
|
+
if (!parentFrame) {
|
|
91
|
+
throw new Error("Invalid parent frame");
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
let { src = "" } = iframeOption;
|
|
95
|
+
if (!src) {
|
|
96
|
+
throw new Error("Invalid src in IframeOption");
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// src: use childFrames()
|
|
100
|
+
const childFrames = parentFrame.childFrames();
|
|
101
|
+
for (const childFrame of childFrames) {
|
|
102
|
+
const url = childFrame.url();
|
|
103
|
+
if (typeof src === "string") {
|
|
104
|
+
// src: string
|
|
105
|
+
if (url.startsWith(src)) {
|
|
106
|
+
return childFrame;
|
|
107
|
+
} else if (url.toLowerCase().startsWith(src)) {
|
|
108
|
+
return childFrame;
|
|
109
|
+
}
|
|
110
|
+
} else {
|
|
111
|
+
// src: RegExp
|
|
112
|
+
if (url.match(src)) {
|
|
113
|
+
return childFrame;
|
|
114
|
+
}
|
|
108
115
|
}
|
|
109
116
|
}
|
|
117
|
+
|
|
118
|
+
return null;
|
|
110
119
|
}
|
|
111
|
-
|
|
120
|
+
*/
|
|
121
|
+
#getIframeSelector(iframeOption) {
|
|
122
|
+
const { src = "", selector = "" } = iframeOption;
|
|
123
|
+
if (!src && !selector) {
|
|
124
|
+
throw new Error("Invalid parent frame");
|
|
125
|
+
}
|
|
126
|
+
return selector ? selector : `iframe[src^="${src}"]`;
|
|
112
127
|
}
|
|
113
|
-
async #
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
128
|
+
async #getChildFrameLocator(parent2, iframeOption) {
|
|
129
|
+
return parent2.frameLocator(this.#getIframeSelector(iframeOption));
|
|
130
|
+
}
|
|
131
|
+
async #getDescendantFrame(parent2, iframeOptions) {
|
|
132
|
+
try {
|
|
133
|
+
if (iframeOptions.length <= 0) {
|
|
117
134
|
return null;
|
|
118
135
|
}
|
|
119
|
-
|
|
136
|
+
let frameLocator = parent2.frameLocator(this.#getIframeSelector(iframeOptions[0]));
|
|
137
|
+
for (const iframeOption of iframeOptions.slice(1)) {
|
|
138
|
+
if (!frameLocator) {
|
|
139
|
+
return null;
|
|
140
|
+
}
|
|
141
|
+
frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
|
|
142
|
+
}
|
|
143
|
+
return frameLocator;
|
|
144
|
+
} catch (err) {
|
|
145
|
+
throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
|
|
120
146
|
}
|
|
121
|
-
return iframe;
|
|
122
147
|
}
|
|
123
148
|
async #findElementHandles(selector, absolute = false, iframeOptions = []) {
|
|
124
149
|
let parent2 = absolute ? this.#frame : this.#locator;
|
|
@@ -134,8 +159,8 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
134
159
|
}
|
|
135
160
|
try {
|
|
136
161
|
let locators = [];
|
|
137
|
-
if (selector.startsWith("./") || selector.startsWith("/")) {
|
|
138
|
-
locators = await parent2.locator(selector).all();
|
|
162
|
+
if (selector.startsWith("./") || selector.startsWith("/") || selector.startsWith("..")) {
|
|
163
|
+
locators = await parent2.locator(`xpath=${selector}`).all();
|
|
139
164
|
} else {
|
|
140
165
|
if (selector !== ".") {
|
|
141
166
|
locators = await parent2.locator(selector).all();
|
|
@@ -146,7 +171,7 @@ var PlaywrightElement = class _PlaywrightElement {
|
|
|
146
171
|
retObj.locators = locators;
|
|
147
172
|
return retObj;
|
|
148
173
|
} catch (err) {
|
|
149
|
-
|
|
174
|
+
loginfo(err);
|
|
150
175
|
return retObj;
|
|
151
176
|
}
|
|
152
177
|
}
|
|
@@ -403,41 +428,66 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
403
428
|
});
|
|
404
429
|
return true;
|
|
405
430
|
}
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
431
|
+
/*
|
|
432
|
+
// 如果不存在指定的子iframe,则返回null
|
|
433
|
+
async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
|
|
434
|
+
if (!parentFrame) {
|
|
435
|
+
throw new Error("Invalid parent frame");
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
let { src = "" } = iframeOption;
|
|
439
|
+
if (!src) {
|
|
440
|
+
throw new Error("Invalid src in IframeOption");
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
// src: use childFrames()
|
|
444
|
+
const childFrames = parentFrame.childFrames();
|
|
445
|
+
for (const childFrame of childFrames) {
|
|
446
|
+
const url = childFrame.url();
|
|
447
|
+
if (typeof src === "string") {
|
|
448
|
+
// src: string
|
|
449
|
+
if (url.startsWith(src)) {
|
|
450
|
+
return childFrame;
|
|
451
|
+
} else if (url.toLowerCase().startsWith(src)) {
|
|
452
|
+
return childFrame;
|
|
453
|
+
}
|
|
454
|
+
} else {
|
|
455
|
+
// src: RegExp
|
|
456
|
+
if (url.match(src)) {
|
|
457
|
+
return childFrame;
|
|
458
|
+
}
|
|
427
459
|
}
|
|
428
460
|
}
|
|
461
|
+
|
|
462
|
+
return null;
|
|
429
463
|
}
|
|
430
|
-
|
|
464
|
+
*/
|
|
465
|
+
#getIframeSelector(iframeOption) {
|
|
466
|
+
const { src = "", selector = "" } = iframeOption;
|
|
467
|
+
if (!src && !selector) {
|
|
468
|
+
throw new Error("Invalid parent frame");
|
|
469
|
+
}
|
|
470
|
+
return selector ? selector : `iframe[src^="${src}"]`;
|
|
431
471
|
}
|
|
432
|
-
async #
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
472
|
+
async #getChildFrameLocator(parent2, iframeOption) {
|
|
473
|
+
return parent2.frameLocator(this.#getIframeSelector(iframeOption));
|
|
474
|
+
}
|
|
475
|
+
async #getDescendantFrameLocator(mainFrame, iframeOptions) {
|
|
476
|
+
try {
|
|
477
|
+
if (iframeOptions.length <= 0) {
|
|
436
478
|
return null;
|
|
437
479
|
}
|
|
438
|
-
|
|
480
|
+
let frameLocator = mainFrame.frameLocator(this.#getIframeSelector(iframeOptions[0]));
|
|
481
|
+
for (const iframeOption of iframeOptions.slice(1)) {
|
|
482
|
+
if (!frameLocator) {
|
|
483
|
+
return null;
|
|
484
|
+
}
|
|
485
|
+
frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
|
|
486
|
+
}
|
|
487
|
+
return frameLocator;
|
|
488
|
+
} catch (err) {
|
|
489
|
+
throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
|
|
439
490
|
}
|
|
440
|
-
return iframe;
|
|
441
491
|
}
|
|
442
492
|
async #findElementHandles(selector, iframeOptions = []) {
|
|
443
493
|
if (!this.#page) {
|
|
@@ -446,7 +496,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
446
496
|
let frame = this.#page.mainFrame();
|
|
447
497
|
const retObj = { frame, locators: [] };
|
|
448
498
|
if (iframeOptions.length > 0) {
|
|
449
|
-
frame = await this.#
|
|
499
|
+
frame = await this.#getDescendantFrameLocator(frame, iframeOptions);
|
|
450
500
|
if (!frame) {
|
|
451
501
|
return retObj;
|
|
452
502
|
}
|
|
@@ -454,8 +504,8 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
454
504
|
}
|
|
455
505
|
try {
|
|
456
506
|
let locators = [];
|
|
457
|
-
if (selector.startsWith("./") || selector.startsWith("/")) {
|
|
458
|
-
locators = await frame.locator(selector).all();
|
|
507
|
+
if (selector.startsWith("./") || selector.startsWith("/") || selector.startsWith("..")) {
|
|
508
|
+
locators = await frame.locator(`xpath=${selector}`).all();
|
|
459
509
|
} else {
|
|
460
510
|
if (selector !== ".") {
|
|
461
511
|
locators = await frame.locator(selector).all();
|
|
@@ -466,7 +516,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
466
516
|
retObj.locators = locators;
|
|
467
517
|
return retObj;
|
|
468
518
|
} catch (err) {
|
|
469
|
-
|
|
519
|
+
loginfo2(err);
|
|
470
520
|
return retObj;
|
|
471
521
|
}
|
|
472
522
|
}
|
|
@@ -477,7 +527,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
477
527
|
const page = this.#page;
|
|
478
528
|
const pageId = this.#pageId;
|
|
479
529
|
page.on("close", async () => {
|
|
480
|
-
|
|
530
|
+
loginfo2(`##browser ${pageId} closed`);
|
|
481
531
|
if (!page.pageInfo) {
|
|
482
532
|
logerr(`Logic error in page.on("close")`);
|
|
483
533
|
}
|
|
@@ -500,7 +550,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
500
550
|
} else {
|
|
501
551
|
logerr(`##browser ${pageId} has popup without page.pageInfo`);
|
|
502
552
|
}
|
|
503
|
-
|
|
553
|
+
loginfo2(`##browser ${pageId} has popup ${popupPageId}`);
|
|
504
554
|
this.emit("pagePopup", evtData);
|
|
505
555
|
} else {
|
|
506
556
|
logerr(`##browser ${pageId} has popup page with null page`);
|
|
@@ -605,9 +655,9 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
605
655
|
}
|
|
606
656
|
let content = "";
|
|
607
657
|
if (iframeOptions.length > 0) {
|
|
608
|
-
const
|
|
609
|
-
if (
|
|
610
|
-
content = await
|
|
658
|
+
const frameLocator = await this.#getDescendantFrameLocator(this.#page.mainFrame(), iframeOptions);
|
|
659
|
+
if (frameLocator) {
|
|
660
|
+
content = await frameLocator.locator(":root").evaluate(() => document.documentElement.outerHTML);
|
|
611
661
|
}
|
|
612
662
|
} else {
|
|
613
663
|
content = await this.#page.content();
|
|
@@ -967,10 +1017,11 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
967
1017
|
requestData,
|
|
968
1018
|
responseData
|
|
969
1019
|
});
|
|
970
|
-
|
|
1020
|
+
loginfo2(`##browser cache matched response: ${requestUrl}`);
|
|
971
1021
|
}
|
|
972
1022
|
if (typeof handler === "function") {
|
|
973
|
-
|
|
1023
|
+
const pageData = { pageUrl, cookies: "" };
|
|
1024
|
+
await handler(response, handlerOptions, pageData);
|
|
974
1025
|
}
|
|
975
1026
|
}
|
|
976
1027
|
return;
|
|
@@ -1146,7 +1197,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1146
1197
|
await lsdPage.maximizeViewport();
|
|
1147
1198
|
}
|
|
1148
1199
|
this.#lsdPages.push(lsdPage);
|
|
1149
|
-
|
|
1200
|
+
loginfo3(`##browser ${lsdPage.id()} ${openType}ed`);
|
|
1150
1201
|
}
|
|
1151
1202
|
}
|
|
1152
1203
|
constructor(lsdBrowser, browserContext, incognito = false, proxy = null, browserIdx = 0, browserContextIdx = 0, maxPagesPerBrowserContext = 20, maxPageFreeSeconds = 0, maxViewportOfNewPage = true) {
|
|
@@ -1184,7 +1235,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1184
1235
|
await lsdPage.maximizeViewport();
|
|
1185
1236
|
}
|
|
1186
1237
|
this.#lsdPages.push(lsdPage);
|
|
1187
|
-
|
|
1238
|
+
loginfo3(`##browser ${lsdPage.id()} created`);
|
|
1188
1239
|
}
|
|
1189
1240
|
});
|
|
1190
1241
|
browserContext.on("close", (bc) => {
|
|
@@ -1368,7 +1419,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1368
1419
|
};
|
|
1369
1420
|
|
|
1370
1421
|
// src/playwright/browser.ts
|
|
1371
|
-
import { logerr as logerr3, loginfo as
|
|
1422
|
+
import { logerr as logerr3, loginfo as loginfo4, logwarn as logwarn3 } from "@letsscrapedata/utils";
|
|
1372
1423
|
var PlaywrightBrowser = class extends EventEmitter3 {
|
|
1373
1424
|
#browser;
|
|
1374
1425
|
#browserIdx;
|
|
@@ -1414,7 +1465,7 @@ var PlaywrightBrowser = class extends EventEmitter3 {
|
|
|
1414
1465
|
this.#executablePath = executablePath;
|
|
1415
1466
|
this.#nextBrowserContextIdx = 1;
|
|
1416
1467
|
this.#closeFreePagesIntervalId = null;
|
|
1417
|
-
|
|
1468
|
+
loginfo4(`##browser ${this.#browserType} ${this.id()} ${this.#browserCreationMethod}ed by ${this.#browserControllerType}`);
|
|
1418
1469
|
const browserContexts = browser.contexts();
|
|
1419
1470
|
if (browserContexts.length > 0) {
|
|
1420
1471
|
logwarn3(`There are ${browserContexts.length} new browserContexts when playwright launches new browser`);
|
|
@@ -1423,10 +1474,10 @@ var PlaywrightBrowser = class extends EventEmitter3 {
|
|
|
1423
1474
|
for (const browserContext of browserContexts) {
|
|
1424
1475
|
const lsdBrowserContext = new PlaywrightBrowserContext(this, browserContext, incognito, this.#proxy, this.#browserIdx++, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), maxViewportOfNewPage);
|
|
1425
1476
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
1426
|
-
|
|
1477
|
+
loginfo4(`##browser ${lsdBrowserContext.id()} ${this.#browserCreationMethod}ed`);
|
|
1427
1478
|
}
|
|
1428
1479
|
browser.on("disconnected", () => {
|
|
1429
|
-
|
|
1480
|
+
loginfo4(`##browser ${this.id()} disconnected`);
|
|
1430
1481
|
if (this.#lsdBrowserContexts.length > 0) {
|
|
1431
1482
|
logerr3(`${this.id()} has browserContexts when disconnected`);
|
|
1432
1483
|
}
|
|
@@ -1441,11 +1492,11 @@ var PlaywrightBrowser = class extends EventEmitter3 {
|
|
|
1441
1492
|
logerr3(`Invalid lsdBrowserContext in LsdBrowser.on("browserContextClose)`);
|
|
1442
1493
|
return;
|
|
1443
1494
|
}
|
|
1444
|
-
|
|
1495
|
+
loginfo4(`##browser ${lsdBrowserContext.id()} closed
|
|
1445
1496
|
`);
|
|
1446
1497
|
this.#lsdBrowserContexts.splice(idx, 1);
|
|
1447
1498
|
if (this.#lsdBrowserContexts.length === 0) {
|
|
1448
|
-
|
|
1499
|
+
loginfo4(`##browser ${this.id()} has no browserContexts now`);
|
|
1449
1500
|
}
|
|
1450
1501
|
return;
|
|
1451
1502
|
});
|
|
@@ -1482,7 +1533,7 @@ var PlaywrightBrowser = class extends EventEmitter3 {
|
|
|
1482
1533
|
const { maxViewportOfNewPage = this.#options.maxViewportOfNewPage } = options ? options : {};
|
|
1483
1534
|
const lsdBrowserContext = new PlaywrightBrowserContext(this, browserContext, true, proxy, this.#browserIdx++, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), maxViewportOfNewPage);
|
|
1484
1535
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
1485
|
-
|
|
1536
|
+
loginfo4(`##browser ${lsdBrowserContext.id()} created`);
|
|
1486
1537
|
return lsdBrowserContext;
|
|
1487
1538
|
}
|
|
1488
1539
|
async close() {
|
|
@@ -1616,11 +1667,13 @@ var PuppeteerElement = class _PuppeteerElement {
|
|
|
1616
1667
|
return retObj;
|
|
1617
1668
|
}
|
|
1618
1669
|
retObj.frame = frame;
|
|
1670
|
+
absolute = true;
|
|
1619
1671
|
parent2 = frame;
|
|
1620
1672
|
}
|
|
1621
1673
|
try {
|
|
1622
|
-
if (selector.startsWith("./") || selector.startsWith("/")) {
|
|
1623
|
-
|
|
1674
|
+
if (selector.startsWith("./") || selector.startsWith("/") || selector.startsWith("..")) {
|
|
1675
|
+
const newSelector = !absolute && selector.startsWith("/") ? `.${selector}` : selector;
|
|
1676
|
+
retObj.elementHandles = await parent2.$$(`::-p-xpath(${newSelector})`);
|
|
1624
1677
|
} else {
|
|
1625
1678
|
if (selector !== ".") {
|
|
1626
1679
|
retObj.elementHandles = await parent2.$$(selector);
|
|
@@ -1803,7 +1856,7 @@ var PuppeteerElement = class _PuppeteerElement {
|
|
|
1803
1856
|
};
|
|
1804
1857
|
|
|
1805
1858
|
// src/puppeteer/page.ts
|
|
1806
|
-
import { getCurrentUnixTime as getCurrentUnixTime3, logerr as logerr5, loginfo as
|
|
1859
|
+
import { getCurrentUnixTime as getCurrentUnixTime3, logerr as logerr5, loginfo as loginfo5, unreachable as unreachable4 } from "@letsscrapedata/utils";
|
|
1807
1860
|
var PuppeteerPage = class extends EventEmitter4 {
|
|
1808
1861
|
#lsdBrowserContext;
|
|
1809
1862
|
#page;
|
|
@@ -1942,8 +1995,8 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
1942
1995
|
retObj.frame = frame;
|
|
1943
1996
|
}
|
|
1944
1997
|
try {
|
|
1945
|
-
if (selector.startsWith("./") || selector.startsWith("/")) {
|
|
1946
|
-
retObj.elementHandles = await frame.$$(selector);
|
|
1998
|
+
if (selector.startsWith("./") || selector.startsWith("/") || selector.startsWith("..")) {
|
|
1999
|
+
retObj.elementHandles = await frame.$$(`::-p-xpath(${selector})`);
|
|
1947
2000
|
} else {
|
|
1948
2001
|
if (selector !== ".") {
|
|
1949
2002
|
retObj.elementHandles = await frame.$$(selector);
|
|
@@ -1964,7 +2017,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
1964
2017
|
const page = this.#page;
|
|
1965
2018
|
const pageId = this.#pageId;
|
|
1966
2019
|
page.on("close", async () => {
|
|
1967
|
-
|
|
2020
|
+
loginfo5(`##browser ${pageId} closed`);
|
|
1968
2021
|
if (!page.pageInfo) {
|
|
1969
2022
|
logerr5(`Logic error in page.on("close")`);
|
|
1970
2023
|
}
|
|
@@ -1987,7 +2040,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
1987
2040
|
} else {
|
|
1988
2041
|
logerr5(`##browser ${pageId} has popup without page.pageInfo`);
|
|
1989
2042
|
}
|
|
1990
|
-
|
|
2043
|
+
loginfo5(`##browser ${pageId} has popup ${popupPageId}`);
|
|
1991
2044
|
this.emit("pagePopup", evtData);
|
|
1992
2045
|
} else {
|
|
1993
2046
|
logerr5(`##browser ${pageId} has popup page with null page`);
|
|
@@ -2453,7 +2506,14 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2453
2506
|
});
|
|
2454
2507
|
}
|
|
2455
2508
|
if (typeof handler === "function") {
|
|
2456
|
-
|
|
2509
|
+
const pageData = { pageUrl, cookies: "" };
|
|
2510
|
+
if (handlerOptions?.requestHeadersFlag) {
|
|
2511
|
+
const cookies = (await this.#page.cookies()).map((cookie) => {
|
|
2512
|
+
return `${cookie.name}=${cookie.value}`;
|
|
2513
|
+
}).join("; ");
|
|
2514
|
+
pageData.cookies = cookies;
|
|
2515
|
+
}
|
|
2516
|
+
await handler(response, handlerOptions, pageData);
|
|
2457
2517
|
}
|
|
2458
2518
|
}
|
|
2459
2519
|
return true;
|
|
@@ -2584,7 +2644,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2584
2644
|
};
|
|
2585
2645
|
|
|
2586
2646
|
// src/puppeteer/context.ts
|
|
2587
|
-
import { getCurrentUnixTime as getCurrentUnixTime4, logerr as logerr6, loginfo as
|
|
2647
|
+
import { getCurrentUnixTime as getCurrentUnixTime4, logerr as logerr6, loginfo as loginfo6, logwarn as logwarn4, sleep as sleep2 } from "@letsscrapedata/utils";
|
|
2588
2648
|
var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
2589
2649
|
#lsdBrowser;
|
|
2590
2650
|
#browserIdx;
|
|
@@ -2621,7 +2681,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2621
2681
|
await lsdPage.setUserAgent(this.#userAgent);
|
|
2622
2682
|
}
|
|
2623
2683
|
this.#lsdPages.push(lsdPage);
|
|
2624
|
-
|
|
2684
|
+
loginfo6(`##browser ${lsdPage.id()} ${openType}ed`);
|
|
2625
2685
|
}
|
|
2626
2686
|
}
|
|
2627
2687
|
constructor(lsdBrowser, browserContext, incognito = false, proxy = null, browserIdx = 0, browserContextIdx = 0, maxPagesPerBrowserContext = 20, maxPageFreeSeconds = 0, userAgent = "", maxViewportOfNewPage = true) {
|
|
@@ -2668,7 +2728,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2668
2728
|
await lsdPage.setUserAgent(this.#userAgent);
|
|
2669
2729
|
}
|
|
2670
2730
|
this.#lsdPages.push(lsdPage);
|
|
2671
|
-
|
|
2731
|
+
loginfo6(`##browser ${lsdPage.id()} created`);
|
|
2672
2732
|
}
|
|
2673
2733
|
}
|
|
2674
2734
|
});
|
|
@@ -2852,7 +2912,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2852
2912
|
};
|
|
2853
2913
|
|
|
2854
2914
|
// src/puppeteer/browser.ts
|
|
2855
|
-
import { logerr as logerr7, loginfo as
|
|
2915
|
+
import { logerr as logerr7, loginfo as loginfo7, logwarn as logwarn5 } from "@letsscrapedata/utils";
|
|
2856
2916
|
var PuppeteerBrowser = class extends EventEmitter6 {
|
|
2857
2917
|
#browser;
|
|
2858
2918
|
#browserIdx;
|
|
@@ -2901,16 +2961,16 @@ var PuppeteerBrowser = class extends EventEmitter6 {
|
|
|
2901
2961
|
this.#executablePath = executablePath;
|
|
2902
2962
|
this.#nextBrowserContextIdx = 1;
|
|
2903
2963
|
this.#closeFreePagesIntervalId = null;
|
|
2904
|
-
|
|
2964
|
+
loginfo7(`##browser ${this.#browserType} ${this.id()} ${this.#browserCreationMethod}ed by ${this.#browserControllerType}`);
|
|
2905
2965
|
const browserContexts = browser.browserContexts();
|
|
2906
2966
|
const incognito = typeof options?.incognito === "boolean" ? options.incognito : false;
|
|
2907
2967
|
for (const browserContext of browserContexts) {
|
|
2908
2968
|
const lsdBrowserContext = new PuppeteerBrowserContext(this, browserContext, incognito, this.#proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), this.#userAgent(), maxViewportOfNewPage);
|
|
2909
2969
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
2910
|
-
|
|
2970
|
+
loginfo7(`##browser ${lsdBrowserContext.id()} ${this.#browserCreationMethod}ed`);
|
|
2911
2971
|
}
|
|
2912
2972
|
browser.on("disconnected", () => {
|
|
2913
|
-
|
|
2973
|
+
loginfo7(`##browser ${this.id()} disconnected`);
|
|
2914
2974
|
if (this.#lsdBrowserContexts.length > 0) {
|
|
2915
2975
|
logerr7(`${this.id()} has browserContexts when disconnected`);
|
|
2916
2976
|
}
|
|
@@ -2925,11 +2985,11 @@ var PuppeteerBrowser = class extends EventEmitter6 {
|
|
|
2925
2985
|
logerr7(`Invalid lsdBrowserContext in LsdBrowser.on("browserContextClose)`);
|
|
2926
2986
|
return;
|
|
2927
2987
|
}
|
|
2928
|
-
|
|
2988
|
+
loginfo7(`##browser ${lsdBrowserContext.id()} closed
|
|
2929
2989
|
`);
|
|
2930
2990
|
this.#lsdBrowserContexts.splice(idx, 1);
|
|
2931
2991
|
if (this.#lsdBrowserContexts.length === 0) {
|
|
2932
|
-
|
|
2992
|
+
loginfo7(`##browser ${this.id()} has no browserContexts now`);
|
|
2933
2993
|
}
|
|
2934
2994
|
return;
|
|
2935
2995
|
});
|
|
@@ -2960,7 +3020,7 @@ var PuppeteerBrowser = class extends EventEmitter6 {
|
|
|
2960
3020
|
const { maxViewportOfNewPage = this.#options.maxViewportOfNewPage } = options ? options : {};
|
|
2961
3021
|
const lsdBrowserContext = new PuppeteerBrowserContext(this, browserContext, true, proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), userAgent, maxViewportOfNewPage);
|
|
2962
3022
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
2963
|
-
|
|
3023
|
+
loginfo7(`##browser ${lsdBrowserContext.id()} created`);
|
|
2964
3024
|
return lsdBrowserContext;
|
|
2965
3025
|
}
|
|
2966
3026
|
async close() {
|
package/package.json
CHANGED