@letsscrapedata/controller 0.0.30 → 0.0.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -90,7 +90,7 @@ var PlaywrightElement = class _PlaywrightElement {
90
90
  #frame;
91
91
  #locator;
92
92
  constructor(locator, frame) {
93
- if (!frame.goto || !locator.click) {
93
+ if (!frame.locator || !locator.click) {
94
94
  throw new Error("Invalid paras in new PlaywrightElement");
95
95
  }
96
96
  this.#frame = frame;
@@ -104,41 +104,66 @@ var PlaywrightElement = class _PlaywrightElement {
104
104
  const names = await this.#locator.evaluate((node) => node.getAttributeNames());
105
105
  return names;
106
106
  }
107
- // 如果不存在指定的子iframe,则返回null
108
- async #getChildFrame(parentFrame, iframeOption) {
109
- if (!parentFrame) {
110
- throw new Error("Invalid parent frame");
111
- }
112
- let { src = "" } = iframeOption;
113
- if (!src) {
114
- throw new Error("Invalid src in IframeOption");
115
- }
116
- const childFrames = parentFrame.childFrames();
117
- for (const childFrame of childFrames) {
118
- const url = childFrame.url();
119
- if (typeof src === "string") {
120
- if (url.startsWith(src)) {
121
- return childFrame;
122
- } else if (url.toLowerCase().startsWith(src)) {
123
- return childFrame;
124
- }
125
- } else {
126
- if (url.match(src)) {
127
- return childFrame;
107
+ /*
108
+ // 如果不存在指定的子iframe,则返回null
109
+ async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
110
+ if (!parentFrame) {
111
+ throw new Error("Invalid parent frame");
112
+ }
113
+
114
+ let { src = "" } = iframeOption;
115
+ if (!src) {
116
+ throw new Error("Invalid src in IframeOption");
117
+ }
118
+
119
+ // src: use childFrames()
120
+ const childFrames = parentFrame.childFrames();
121
+ for (const childFrame of childFrames) {
122
+ const url = childFrame.url();
123
+ if (typeof src === "string") {
124
+ // src: string
125
+ if (url.startsWith(src)) {
126
+ return childFrame;
127
+ } else if (url.toLowerCase().startsWith(src)) {
128
+ return childFrame;
129
+ }
130
+ } else {
131
+ // src: RegExp
132
+ if (url.match(src)) {
133
+ return childFrame;
134
+ }
128
135
  }
129
136
  }
137
+
138
+ return null;
130
139
  }
131
- return null;
140
+ */
141
+ #getIframeSelector(iframeOption) {
142
+ const { src = "", selector = "" } = iframeOption;
143
+ if (!src && !selector) {
144
+ throw new Error("Invalid parent frame");
145
+ }
146
+ return selector ? selector : `iframe[src^="${src}"]`;
132
147
  }
133
- async #getDescendantFrame(parentFrame, iframeOptions) {
134
- let iframe = parentFrame;
135
- for (const iframeOption of iframeOptions) {
136
- if (!iframe) {
148
+ async #getChildFrameLocator(parent2, iframeOption) {
149
+ return parent2.frameLocator(this.#getIframeSelector(iframeOption));
150
+ }
151
+ async #getDescendantFrame(parent2, iframeOptions) {
152
+ try {
153
+ if (iframeOptions.length <= 0) {
137
154
  return null;
138
155
  }
139
- iframe = await this.#getChildFrame(iframe, iframeOption);
156
+ let frameLocator = parent2.frameLocator(this.#getIframeSelector(iframeOptions[0]));
157
+ for (const iframeOption of iframeOptions.slice(1)) {
158
+ if (!frameLocator) {
159
+ return null;
160
+ }
161
+ frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
162
+ }
163
+ return frameLocator;
164
+ } catch (err) {
165
+ throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
140
166
  }
141
- return iframe;
142
167
  }
143
168
  async #findElementHandles(selector, absolute = false, iframeOptions = []) {
144
169
  let parent2 = absolute ? this.#frame : this.#locator;
@@ -166,7 +191,7 @@ var PlaywrightElement = class _PlaywrightElement {
166
191
  retObj.locators = locators;
167
192
  return retObj;
168
193
  } catch (err) {
169
- console.log(err);
194
+ (0, import_utils.loginfo)(err);
170
195
  return retObj;
171
196
  }
172
197
  }
@@ -423,41 +448,66 @@ var PlaywrightPage = class extends import_node_events.default {
423
448
  });
424
449
  return true;
425
450
  }
426
- // 如果不存在指定的子iframe,则返回null
427
- async #getChildFrame(parentFrame, iframeOption) {
428
- if (!parentFrame) {
429
- throw new Error("Invalid parent frame");
430
- }
431
- let { src = "" } = iframeOption;
432
- if (!src) {
433
- throw new Error("Invalid src in IframeOption");
434
- }
435
- const childFrames = parentFrame.childFrames();
436
- for (const childFrame of childFrames) {
437
- const url = childFrame.url();
438
- if (typeof src === "string") {
439
- if (url.startsWith(src)) {
440
- return childFrame;
441
- } else if (url.toLowerCase().startsWith(src)) {
442
- return childFrame;
443
- }
444
- } else {
445
- if (url.match(src)) {
446
- return childFrame;
451
+ /*
452
+ // 如果不存在指定的子iframe,则返回null
453
+ async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
454
+ if (!parentFrame) {
455
+ throw new Error("Invalid parent frame");
456
+ }
457
+
458
+ let { src = "" } = iframeOption;
459
+ if (!src) {
460
+ throw new Error("Invalid src in IframeOption");
461
+ }
462
+
463
+ // src: use childFrames()
464
+ const childFrames = parentFrame.childFrames();
465
+ for (const childFrame of childFrames) {
466
+ const url = childFrame.url();
467
+ if (typeof src === "string") {
468
+ // src: string
469
+ if (url.startsWith(src)) {
470
+ return childFrame;
471
+ } else if (url.toLowerCase().startsWith(src)) {
472
+ return childFrame;
473
+ }
474
+ } else {
475
+ // src: RegExp
476
+ if (url.match(src)) {
477
+ return childFrame;
478
+ }
447
479
  }
448
480
  }
481
+
482
+ return null;
449
483
  }
450
- return null;
484
+ */
485
+ #getIframeSelector(iframeOption) {
486
+ const { src = "", selector = "" } = iframeOption;
487
+ if (!src && !selector) {
488
+ throw new Error("Invalid parent frame");
489
+ }
490
+ return selector ? selector : `iframe[src^="${src}"]`;
451
491
  }
452
- async #getDescendantFrame(parentFrame, iframeOptions) {
453
- let iframe = parentFrame;
454
- for (const iframeOption of iframeOptions) {
455
- if (!iframe) {
492
+ async #getChildFrameLocator(parent2, iframeOption) {
493
+ return parent2.frameLocator(this.#getIframeSelector(iframeOption));
494
+ }
495
+ async #getDescendantFrameLocator(mainFrame, iframeOptions) {
496
+ try {
497
+ if (iframeOptions.length <= 0) {
456
498
  return null;
457
499
  }
458
- iframe = await this.#getChildFrame(iframe, iframeOption);
500
+ let frameLocator = mainFrame.frameLocator(this.#getIframeSelector(iframeOptions[0]));
501
+ for (const iframeOption of iframeOptions.slice(1)) {
502
+ if (!frameLocator) {
503
+ return null;
504
+ }
505
+ frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
506
+ }
507
+ return frameLocator;
508
+ } catch (err) {
509
+ throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
459
510
  }
460
- return iframe;
461
511
  }
462
512
  async #findElementHandles(selector, iframeOptions = []) {
463
513
  if (!this.#page) {
@@ -466,7 +516,7 @@ var PlaywrightPage = class extends import_node_events.default {
466
516
  let frame = this.#page.mainFrame();
467
517
  const retObj = { frame, locators: [] };
468
518
  if (iframeOptions.length > 0) {
469
- frame = await this.#getDescendantFrame(frame, iframeOptions);
519
+ frame = await this.#getDescendantFrameLocator(frame, iframeOptions);
470
520
  if (!frame) {
471
521
  return retObj;
472
522
  }
@@ -486,7 +536,7 @@ var PlaywrightPage = class extends import_node_events.default {
486
536
  retObj.locators = locators;
487
537
  return retObj;
488
538
  } catch (err) {
489
- console.log(err);
539
+ (0, import_utils2.loginfo)(err);
490
540
  return retObj;
491
541
  }
492
542
  }
@@ -625,9 +675,9 @@ var PlaywrightPage = class extends import_node_events.default {
625
675
  }
626
676
  let content = "";
627
677
  if (iframeOptions.length > 0) {
628
- const frame = await this.#getDescendantFrame(this.#page.mainFrame(), iframeOptions);
629
- if (frame) {
630
- content = await frame.content();
678
+ const frameLocator = await this.#getDescendantFrameLocator(this.#page.mainFrame(), iframeOptions);
679
+ if (frameLocator) {
680
+ content = await frameLocator.locator(":root").evaluate(() => document.documentElement.outerHTML);
631
681
  }
632
682
  } else {
633
683
  content = await this.#page.content();
@@ -990,7 +1040,8 @@ var PlaywrightPage = class extends import_node_events.default {
990
1040
  (0, import_utils2.loginfo)(`##browser cache matched response: ${requestUrl}`);
991
1041
  }
992
1042
  if (typeof handler === "function") {
993
- await handler(response, handlerOptions, pageUrl);
1043
+ const pageData = { pageUrl, cookies: "" };
1044
+ await handler(response, handlerOptions, pageData);
994
1045
  }
995
1046
  }
996
1047
  return;
@@ -2473,7 +2524,14 @@ var PuppeteerPage = class extends import_node_events4.default {
2473
2524
  });
2474
2525
  }
2475
2526
  if (typeof handler === "function") {
2476
- await handler(response, handlerOptions, pageUrl);
2527
+ const pageData = { pageUrl, cookies: "" };
2528
+ if (handlerOptions?.requestHeadersFlag) {
2529
+ const cookies = (await this.#page.cookies()).map((cookie) => {
2530
+ return `${cookie.name}=${cookie.value}`;
2531
+ }).join("; ");
2532
+ pageData.cookies = cookies;
2533
+ }
2534
+ await handler(response, handlerOptions, pageData);
2477
2535
  }
2478
2536
  }
2479
2537
  return true;
package/dist/index.d.cts CHANGED
@@ -1,6 +1,6 @@
1
1
  import EventEmitter from 'node:events';
2
2
  import { Browser as Browser$1, BrowserContext as BrowserContext$1, Frame as Frame$1, Page as Page$1, HTTPResponse, PuppeteerNode, ElementHandle } from 'puppeteer';
3
- import { Browser, BrowserContext, Frame, Page, Response, APIRequestContext, BrowserType, Locator } from 'playwright';
3
+ import { Browser, BrowserContext, Frame, Page, Response, APIRequestContext, BrowserType, Locator, FrameLocator } from 'playwright';
4
4
  import * as cheerio from 'cheerio';
5
5
 
6
6
  /**
@@ -479,7 +479,11 @@ interface ResponseMatch {
479
479
  maxLength?: number;
480
480
  }
481
481
  type ResponseHandlerOptions = Record<string, any>;
482
- type ResponseHandler = (response: AllResponse, options: ResponseHandlerOptions, pageUrl: string) => Promise<void> | void;
482
+ interface ResponsePageData {
483
+ pageUrl: string;
484
+ cookies: string;
485
+ }
486
+ type ResponseHandler = (response: AllResponse, options: ResponseHandlerOptions, pageData: ResponsePageData) => Promise<void> | void;
483
487
  interface ResponseInterceptionItem {
484
488
  /**
485
489
  * page.url()
@@ -1088,7 +1092,7 @@ declare class PlaywrightPage extends EventEmitter implements LsdPage {
1088
1092
 
1089
1093
  declare class PlaywrightElement implements LsdElement {
1090
1094
  #private;
1091
- constructor(locator: Locator, frame: Frame);
1095
+ constructor(locator: Locator, frame: Frame | FrameLocator);
1092
1096
  attribute(attributeName: string): Promise<string>;
1093
1097
  attributeNames(): Promise<string[]>;
1094
1098
  findElement(selectorOrXpath: string | string[], iframeOptions?: IframeOption[], absolute?: boolean): Promise<LsdElement | null>;
@@ -1306,4 +1310,4 @@ declare class LsdBrowserController implements LsdBrowserController$1 {
1306
1310
  }
1307
1311
  declare const controller: LsdBrowserController;
1308
1312
 
1309
- export { type AllApiRequestContext, type AllBrowser, type AllBrowserContext, type AllFrame, type AllPage, type AllResponse, type BrowserControllerOptions, type BrowserControllerType, type BrowserCreationMethod, type BrowserManager, CheerioElement, type CheerioNode, CheerioPage, type CookieItem, type GotoOptions, type IframeOption, type InputOptions, type KeyInput, type KeyPressOptions, type LocalStorageItem, type LocalStorageOrigin, type LowerCasePaperFormat, type LsdBrowser, type LsdBrowserContext, type LsdBrowserContextOptions, type LsdBrowserController$1 as LsdBrowserController, type LsdBrowserType, type LsdConnectOptions, type LsdElement, type LsdLaunchOptions, type LsdPage, type MouseClickOptions, type MouseClickType, type NavigationWaitUntil, type PDFMargin, type PDFOptions, type PageEvent, type PageExtInPlaywright, type PageExtInPuppeteer, type PageInfo, type PageOpenType, type PageStatus, type PaperFormat, PlaywrightBrowser, PlaywrightBrowserContext, type PlaywrightBrowserTypes, PlaywrightElement, PlaywrightPage, type Proxy, PuppeteerBrowser, PuppeteerBrowserContext, PuppeteerElement, PuppeteerPage, type RequestInterceptionAction, type RequestInterceptionOption, type RequestMatch, type RequestMethod, type RequestResourceType, type ResponseHandler, type ResponseHandlerOptions, type ResponseInterceptionItem, type ResponseInterceptionOption, type ResponseMatch, type ScreenshotOptions, type SelectOptions, type StateData, type UpdatablePageInfo, type ViewportSize, type WaitElementOptions, type WaitElementState, type WaitNavigationOptions, controller, defaultProxy };
1313
+ export { type AllApiRequestContext, type AllBrowser, type AllBrowserContext, type AllFrame, type AllPage, type AllResponse, type BrowserControllerOptions, type BrowserControllerType, type BrowserCreationMethod, type BrowserManager, CheerioElement, type CheerioNode, CheerioPage, type CookieItem, type GotoOptions, type IframeOption, type InputOptions, type KeyInput, type KeyPressOptions, type LocalStorageItem, type LocalStorageOrigin, type LowerCasePaperFormat, type LsdBrowser, type LsdBrowserContext, type LsdBrowserContextOptions, type LsdBrowserController$1 as LsdBrowserController, type LsdBrowserType, type LsdConnectOptions, type LsdElement, type LsdLaunchOptions, type LsdPage, type MouseClickOptions, type MouseClickType, type NavigationWaitUntil, type PDFMargin, type PDFOptions, type PageEvent, type PageExtInPlaywright, type PageExtInPuppeteer, type PageInfo, type PageOpenType, type PageStatus, type PaperFormat, PlaywrightBrowser, PlaywrightBrowserContext, type PlaywrightBrowserTypes, PlaywrightElement, PlaywrightPage, type Proxy, PuppeteerBrowser, PuppeteerBrowserContext, PuppeteerElement, PuppeteerPage, type RequestInterceptionAction, type RequestInterceptionOption, type RequestMatch, type RequestMethod, type RequestResourceType, type ResponseHandler, type ResponseHandlerOptions, type ResponseInterceptionItem, type ResponseInterceptionOption, type ResponseMatch, type ResponsePageData, type ScreenshotOptions, type SelectOptions, type StateData, type UpdatablePageInfo, type ViewportSize, type WaitElementOptions, type WaitElementState, type WaitNavigationOptions, controller, defaultProxy };
package/dist/index.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import EventEmitter from 'node:events';
2
2
  import { Browser as Browser$1, BrowserContext as BrowserContext$1, Frame as Frame$1, Page as Page$1, HTTPResponse, PuppeteerNode, ElementHandle } from 'puppeteer';
3
- import { Browser, BrowserContext, Frame, Page, Response, APIRequestContext, BrowserType, Locator } from 'playwright';
3
+ import { Browser, BrowserContext, Frame, Page, Response, APIRequestContext, BrowserType, Locator, FrameLocator } from 'playwright';
4
4
  import * as cheerio from 'cheerio';
5
5
 
6
6
  /**
@@ -479,7 +479,11 @@ interface ResponseMatch {
479
479
  maxLength?: number;
480
480
  }
481
481
  type ResponseHandlerOptions = Record<string, any>;
482
- type ResponseHandler = (response: AllResponse, options: ResponseHandlerOptions, pageUrl: string) => Promise<void> | void;
482
+ interface ResponsePageData {
483
+ pageUrl: string;
484
+ cookies: string;
485
+ }
486
+ type ResponseHandler = (response: AllResponse, options: ResponseHandlerOptions, pageData: ResponsePageData) => Promise<void> | void;
483
487
  interface ResponseInterceptionItem {
484
488
  /**
485
489
  * page.url()
@@ -1088,7 +1092,7 @@ declare class PlaywrightPage extends EventEmitter implements LsdPage {
1088
1092
 
1089
1093
  declare class PlaywrightElement implements LsdElement {
1090
1094
  #private;
1091
- constructor(locator: Locator, frame: Frame);
1095
+ constructor(locator: Locator, frame: Frame | FrameLocator);
1092
1096
  attribute(attributeName: string): Promise<string>;
1093
1097
  attributeNames(): Promise<string[]>;
1094
1098
  findElement(selectorOrXpath: string | string[], iframeOptions?: IframeOption[], absolute?: boolean): Promise<LsdElement | null>;
@@ -1306,4 +1310,4 @@ declare class LsdBrowserController implements LsdBrowserController$1 {
1306
1310
  }
1307
1311
  declare const controller: LsdBrowserController;
1308
1312
 
1309
- export { type AllApiRequestContext, type AllBrowser, type AllBrowserContext, type AllFrame, type AllPage, type AllResponse, type BrowserControllerOptions, type BrowserControllerType, type BrowserCreationMethod, type BrowserManager, CheerioElement, type CheerioNode, CheerioPage, type CookieItem, type GotoOptions, type IframeOption, type InputOptions, type KeyInput, type KeyPressOptions, type LocalStorageItem, type LocalStorageOrigin, type LowerCasePaperFormat, type LsdBrowser, type LsdBrowserContext, type LsdBrowserContextOptions, type LsdBrowserController$1 as LsdBrowserController, type LsdBrowserType, type LsdConnectOptions, type LsdElement, type LsdLaunchOptions, type LsdPage, type MouseClickOptions, type MouseClickType, type NavigationWaitUntil, type PDFMargin, type PDFOptions, type PageEvent, type PageExtInPlaywright, type PageExtInPuppeteer, type PageInfo, type PageOpenType, type PageStatus, type PaperFormat, PlaywrightBrowser, PlaywrightBrowserContext, type PlaywrightBrowserTypes, PlaywrightElement, PlaywrightPage, type Proxy, PuppeteerBrowser, PuppeteerBrowserContext, PuppeteerElement, PuppeteerPage, type RequestInterceptionAction, type RequestInterceptionOption, type RequestMatch, type RequestMethod, type RequestResourceType, type ResponseHandler, type ResponseHandlerOptions, type ResponseInterceptionItem, type ResponseInterceptionOption, type ResponseMatch, type ScreenshotOptions, type SelectOptions, type StateData, type UpdatablePageInfo, type ViewportSize, type WaitElementOptions, type WaitElementState, type WaitNavigationOptions, controller, defaultProxy };
1313
+ export { type AllApiRequestContext, type AllBrowser, type AllBrowserContext, type AllFrame, type AllPage, type AllResponse, type BrowserControllerOptions, type BrowserControllerType, type BrowserCreationMethod, type BrowserManager, CheerioElement, type CheerioNode, CheerioPage, type CookieItem, type GotoOptions, type IframeOption, type InputOptions, type KeyInput, type KeyPressOptions, type LocalStorageItem, type LocalStorageOrigin, type LowerCasePaperFormat, type LsdBrowser, type LsdBrowserContext, type LsdBrowserContextOptions, type LsdBrowserController$1 as LsdBrowserController, type LsdBrowserType, type LsdConnectOptions, type LsdElement, type LsdLaunchOptions, type LsdPage, type MouseClickOptions, type MouseClickType, type NavigationWaitUntil, type PDFMargin, type PDFOptions, type PageEvent, type PageExtInPlaywright, type PageExtInPuppeteer, type PageInfo, type PageOpenType, type PageStatus, type PaperFormat, PlaywrightBrowser, PlaywrightBrowserContext, type PlaywrightBrowserTypes, PlaywrightElement, PlaywrightPage, type Proxy, PuppeteerBrowser, PuppeteerBrowserContext, PuppeteerElement, PuppeteerPage, type RequestInterceptionAction, type RequestInterceptionOption, type RequestMatch, type RequestMethod, type RequestResourceType, type ResponseHandler, type ResponseHandlerOptions, type ResponseInterceptionItem, type ResponseInterceptionOption, type ResponseMatch, type ResponsePageData, type ScreenshotOptions, type SelectOptions, type StateData, type UpdatablePageInfo, type ViewportSize, type WaitElementOptions, type WaitElementState, type WaitNavigationOptions, controller, defaultProxy };
package/dist/index.js CHANGED
@@ -58,19 +58,19 @@ import EventEmitter3 from "events";
58
58
 
59
59
  // src/playwright/context.ts
60
60
  import EventEmitter2 from "events";
61
- import { getCurrentUnixTime as getCurrentUnixTime2, logerr as logerr2, loginfo as loginfo2, logwarn as logwarn2, sleep } from "@letsscrapedata/utils";
61
+ import { getCurrentUnixTime as getCurrentUnixTime2, logerr as logerr2, loginfo as loginfo3, logwarn as logwarn2, sleep } from "@letsscrapedata/utils";
62
62
 
63
63
  // src/playwright/page.ts
64
64
  import EventEmitter from "events";
65
- import { getCurrentUnixTime, logerr, loginfo, logwarn, unreachable as unreachable2 } from "@letsscrapedata/utils";
65
+ import { getCurrentUnixTime, logerr, loginfo as loginfo2, logwarn, unreachable as unreachable2 } from "@letsscrapedata/utils";
66
66
 
67
67
  // src/playwright/element.ts
68
- import { unreachable } from "@letsscrapedata/utils";
68
+ import { loginfo, unreachable } from "@letsscrapedata/utils";
69
69
  var PlaywrightElement = class _PlaywrightElement {
70
70
  #frame;
71
71
  #locator;
72
72
  constructor(locator, frame) {
73
- if (!frame.goto || !locator.click) {
73
+ if (!frame.locator || !locator.click) {
74
74
  throw new Error("Invalid paras in new PlaywrightElement");
75
75
  }
76
76
  this.#frame = frame;
@@ -84,41 +84,66 @@ var PlaywrightElement = class _PlaywrightElement {
84
84
  const names = await this.#locator.evaluate((node) => node.getAttributeNames());
85
85
  return names;
86
86
  }
87
- // 如果不存在指定的子iframe,则返回null
88
- async #getChildFrame(parentFrame, iframeOption) {
89
- if (!parentFrame) {
90
- throw new Error("Invalid parent frame");
91
- }
92
- let { src = "" } = iframeOption;
93
- if (!src) {
94
- throw new Error("Invalid src in IframeOption");
95
- }
96
- const childFrames = parentFrame.childFrames();
97
- for (const childFrame of childFrames) {
98
- const url = childFrame.url();
99
- if (typeof src === "string") {
100
- if (url.startsWith(src)) {
101
- return childFrame;
102
- } else if (url.toLowerCase().startsWith(src)) {
103
- return childFrame;
104
- }
105
- } else {
106
- if (url.match(src)) {
107
- return childFrame;
87
+ /*
88
+ // 如果不存在指定的子iframe,则返回null
89
+ async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
90
+ if (!parentFrame) {
91
+ throw new Error("Invalid parent frame");
92
+ }
93
+
94
+ let { src = "" } = iframeOption;
95
+ if (!src) {
96
+ throw new Error("Invalid src in IframeOption");
97
+ }
98
+
99
+ // src: use childFrames()
100
+ const childFrames = parentFrame.childFrames();
101
+ for (const childFrame of childFrames) {
102
+ const url = childFrame.url();
103
+ if (typeof src === "string") {
104
+ // src: string
105
+ if (url.startsWith(src)) {
106
+ return childFrame;
107
+ } else if (url.toLowerCase().startsWith(src)) {
108
+ return childFrame;
109
+ }
110
+ } else {
111
+ // src: RegExp
112
+ if (url.match(src)) {
113
+ return childFrame;
114
+ }
108
115
  }
109
116
  }
117
+
118
+ return null;
110
119
  }
111
- return null;
120
+ */
121
+ #getIframeSelector(iframeOption) {
122
+ const { src = "", selector = "" } = iframeOption;
123
+ if (!src && !selector) {
124
+ throw new Error("Invalid parent frame");
125
+ }
126
+ return selector ? selector : `iframe[src^="${src}"]`;
112
127
  }
113
- async #getDescendantFrame(parentFrame, iframeOptions) {
114
- let iframe = parentFrame;
115
- for (const iframeOption of iframeOptions) {
116
- if (!iframe) {
128
+ async #getChildFrameLocator(parent2, iframeOption) {
129
+ return parent2.frameLocator(this.#getIframeSelector(iframeOption));
130
+ }
131
+ async #getDescendantFrame(parent2, iframeOptions) {
132
+ try {
133
+ if (iframeOptions.length <= 0) {
117
134
  return null;
118
135
  }
119
- iframe = await this.#getChildFrame(iframe, iframeOption);
136
+ let frameLocator = parent2.frameLocator(this.#getIframeSelector(iframeOptions[0]));
137
+ for (const iframeOption of iframeOptions.slice(1)) {
138
+ if (!frameLocator) {
139
+ return null;
140
+ }
141
+ frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
142
+ }
143
+ return frameLocator;
144
+ } catch (err) {
145
+ throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
120
146
  }
121
- return iframe;
122
147
  }
123
148
  async #findElementHandles(selector, absolute = false, iframeOptions = []) {
124
149
  let parent2 = absolute ? this.#frame : this.#locator;
@@ -146,7 +171,7 @@ var PlaywrightElement = class _PlaywrightElement {
146
171
  retObj.locators = locators;
147
172
  return retObj;
148
173
  } catch (err) {
149
- console.log(err);
174
+ loginfo(err);
150
175
  return retObj;
151
176
  }
152
177
  }
@@ -403,41 +428,66 @@ var PlaywrightPage = class extends EventEmitter {
403
428
  });
404
429
  return true;
405
430
  }
406
- // 如果不存在指定的子iframe,则返回null
407
- async #getChildFrame(parentFrame, iframeOption) {
408
- if (!parentFrame) {
409
- throw new Error("Invalid parent frame");
410
- }
411
- let { src = "" } = iframeOption;
412
- if (!src) {
413
- throw new Error("Invalid src in IframeOption");
414
- }
415
- const childFrames = parentFrame.childFrames();
416
- for (const childFrame of childFrames) {
417
- const url = childFrame.url();
418
- if (typeof src === "string") {
419
- if (url.startsWith(src)) {
420
- return childFrame;
421
- } else if (url.toLowerCase().startsWith(src)) {
422
- return childFrame;
423
- }
424
- } else {
425
- if (url.match(src)) {
426
- return childFrame;
431
+ /*
432
+ // 如果不存在指定的子iframe,则返回null
433
+ async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
434
+ if (!parentFrame) {
435
+ throw new Error("Invalid parent frame");
436
+ }
437
+
438
+ let { src = "" } = iframeOption;
439
+ if (!src) {
440
+ throw new Error("Invalid src in IframeOption");
441
+ }
442
+
443
+ // src: use childFrames()
444
+ const childFrames = parentFrame.childFrames();
445
+ for (const childFrame of childFrames) {
446
+ const url = childFrame.url();
447
+ if (typeof src === "string") {
448
+ // src: string
449
+ if (url.startsWith(src)) {
450
+ return childFrame;
451
+ } else if (url.toLowerCase().startsWith(src)) {
452
+ return childFrame;
453
+ }
454
+ } else {
455
+ // src: RegExp
456
+ if (url.match(src)) {
457
+ return childFrame;
458
+ }
427
459
  }
428
460
  }
461
+
462
+ return null;
429
463
  }
430
- return null;
464
+ */
465
+ #getIframeSelector(iframeOption) {
466
+ const { src = "", selector = "" } = iframeOption;
467
+ if (!src && !selector) {
468
+ throw new Error("Invalid parent frame");
469
+ }
470
+ return selector ? selector : `iframe[src^="${src}"]`;
431
471
  }
432
- async #getDescendantFrame(parentFrame, iframeOptions) {
433
- let iframe = parentFrame;
434
- for (const iframeOption of iframeOptions) {
435
- if (!iframe) {
472
+ async #getChildFrameLocator(parent2, iframeOption) {
473
+ return parent2.frameLocator(this.#getIframeSelector(iframeOption));
474
+ }
475
+ async #getDescendantFrameLocator(mainFrame, iframeOptions) {
476
+ try {
477
+ if (iframeOptions.length <= 0) {
436
478
  return null;
437
479
  }
438
- iframe = await this.#getChildFrame(iframe, iframeOption);
480
+ let frameLocator = mainFrame.frameLocator(this.#getIframeSelector(iframeOptions[0]));
481
+ for (const iframeOption of iframeOptions.slice(1)) {
482
+ if (!frameLocator) {
483
+ return null;
484
+ }
485
+ frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
486
+ }
487
+ return frameLocator;
488
+ } catch (err) {
489
+ throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
439
490
  }
440
- return iframe;
441
491
  }
442
492
  async #findElementHandles(selector, iframeOptions = []) {
443
493
  if (!this.#page) {
@@ -446,7 +496,7 @@ var PlaywrightPage = class extends EventEmitter {
446
496
  let frame = this.#page.mainFrame();
447
497
  const retObj = { frame, locators: [] };
448
498
  if (iframeOptions.length > 0) {
449
- frame = await this.#getDescendantFrame(frame, iframeOptions);
499
+ frame = await this.#getDescendantFrameLocator(frame, iframeOptions);
450
500
  if (!frame) {
451
501
  return retObj;
452
502
  }
@@ -466,7 +516,7 @@ var PlaywrightPage = class extends EventEmitter {
466
516
  retObj.locators = locators;
467
517
  return retObj;
468
518
  } catch (err) {
469
- console.log(err);
519
+ loginfo2(err);
470
520
  return retObj;
471
521
  }
472
522
  }
@@ -477,7 +527,7 @@ var PlaywrightPage = class extends EventEmitter {
477
527
  const page = this.#page;
478
528
  const pageId = this.#pageId;
479
529
  page.on("close", async () => {
480
- loginfo(`##browser ${pageId} closed`);
530
+ loginfo2(`##browser ${pageId} closed`);
481
531
  if (!page.pageInfo) {
482
532
  logerr(`Logic error in page.on("close")`);
483
533
  }
@@ -500,7 +550,7 @@ var PlaywrightPage = class extends EventEmitter {
500
550
  } else {
501
551
  logerr(`##browser ${pageId} has popup without page.pageInfo`);
502
552
  }
503
- loginfo(`##browser ${pageId} has popup ${popupPageId}`);
553
+ loginfo2(`##browser ${pageId} has popup ${popupPageId}`);
504
554
  this.emit("pagePopup", evtData);
505
555
  } else {
506
556
  logerr(`##browser ${pageId} has popup page with null page`);
@@ -605,9 +655,9 @@ var PlaywrightPage = class extends EventEmitter {
605
655
  }
606
656
  let content = "";
607
657
  if (iframeOptions.length > 0) {
608
- const frame = await this.#getDescendantFrame(this.#page.mainFrame(), iframeOptions);
609
- if (frame) {
610
- content = await frame.content();
658
+ const frameLocator = await this.#getDescendantFrameLocator(this.#page.mainFrame(), iframeOptions);
659
+ if (frameLocator) {
660
+ content = await frameLocator.locator(":root").evaluate(() => document.documentElement.outerHTML);
611
661
  }
612
662
  } else {
613
663
  content = await this.#page.content();
@@ -967,10 +1017,11 @@ var PlaywrightPage = class extends EventEmitter {
967
1017
  requestData,
968
1018
  responseData
969
1019
  });
970
- loginfo(`##browser cache matched response: ${requestUrl}`);
1020
+ loginfo2(`##browser cache matched response: ${requestUrl}`);
971
1021
  }
972
1022
  if (typeof handler === "function") {
973
- await handler(response, handlerOptions, pageUrl);
1023
+ const pageData = { pageUrl, cookies: "" };
1024
+ await handler(response, handlerOptions, pageData);
974
1025
  }
975
1026
  }
976
1027
  return;
@@ -1146,7 +1197,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
1146
1197
  await lsdPage.maximizeViewport();
1147
1198
  }
1148
1199
  this.#lsdPages.push(lsdPage);
1149
- loginfo2(`##browser ${lsdPage.id()} ${openType}ed`);
1200
+ loginfo3(`##browser ${lsdPage.id()} ${openType}ed`);
1150
1201
  }
1151
1202
  }
1152
1203
  constructor(lsdBrowser, browserContext, incognito = false, proxy = null, browserIdx = 0, browserContextIdx = 0, maxPagesPerBrowserContext = 20, maxPageFreeSeconds = 0, maxViewportOfNewPage = true) {
@@ -1184,7 +1235,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
1184
1235
  await lsdPage.maximizeViewport();
1185
1236
  }
1186
1237
  this.#lsdPages.push(lsdPage);
1187
- loginfo2(`##browser ${lsdPage.id()} created`);
1238
+ loginfo3(`##browser ${lsdPage.id()} created`);
1188
1239
  }
1189
1240
  });
1190
1241
  browserContext.on("close", (bc) => {
@@ -1368,7 +1419,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
1368
1419
  };
1369
1420
 
1370
1421
  // src/playwright/browser.ts
1371
- import { logerr as logerr3, loginfo as loginfo3, logwarn as logwarn3 } from "@letsscrapedata/utils";
1422
+ import { logerr as logerr3, loginfo as loginfo4, logwarn as logwarn3 } from "@letsscrapedata/utils";
1372
1423
  var PlaywrightBrowser = class extends EventEmitter3 {
1373
1424
  #browser;
1374
1425
  #browserIdx;
@@ -1414,7 +1465,7 @@ var PlaywrightBrowser = class extends EventEmitter3 {
1414
1465
  this.#executablePath = executablePath;
1415
1466
  this.#nextBrowserContextIdx = 1;
1416
1467
  this.#closeFreePagesIntervalId = null;
1417
- loginfo3(`##browser ${this.#browserType} ${this.id()} ${this.#browserCreationMethod}ed by ${this.#browserControllerType}`);
1468
+ loginfo4(`##browser ${this.#browserType} ${this.id()} ${this.#browserCreationMethod}ed by ${this.#browserControllerType}`);
1418
1469
  const browserContexts = browser.contexts();
1419
1470
  if (browserContexts.length > 0) {
1420
1471
  logwarn3(`There are ${browserContexts.length} new browserContexts when playwright launches new browser`);
@@ -1423,10 +1474,10 @@ var PlaywrightBrowser = class extends EventEmitter3 {
1423
1474
  for (const browserContext of browserContexts) {
1424
1475
  const lsdBrowserContext = new PlaywrightBrowserContext(this, browserContext, incognito, this.#proxy, this.#browserIdx++, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), maxViewportOfNewPage);
1425
1476
  this.#lsdBrowserContexts.push(lsdBrowserContext);
1426
- loginfo3(`##browser ${lsdBrowserContext.id()} ${this.#browserCreationMethod}ed`);
1477
+ loginfo4(`##browser ${lsdBrowserContext.id()} ${this.#browserCreationMethod}ed`);
1427
1478
  }
1428
1479
  browser.on("disconnected", () => {
1429
- loginfo3(`##browser ${this.id()} disconnected`);
1480
+ loginfo4(`##browser ${this.id()} disconnected`);
1430
1481
  if (this.#lsdBrowserContexts.length > 0) {
1431
1482
  logerr3(`${this.id()} has browserContexts when disconnected`);
1432
1483
  }
@@ -1441,11 +1492,11 @@ var PlaywrightBrowser = class extends EventEmitter3 {
1441
1492
  logerr3(`Invalid lsdBrowserContext in LsdBrowser.on("browserContextClose)`);
1442
1493
  return;
1443
1494
  }
1444
- loginfo3(`##browser ${lsdBrowserContext.id()} closed
1495
+ loginfo4(`##browser ${lsdBrowserContext.id()} closed
1445
1496
  `);
1446
1497
  this.#lsdBrowserContexts.splice(idx, 1);
1447
1498
  if (this.#lsdBrowserContexts.length === 0) {
1448
- loginfo3(`##browser ${this.id()} has no browserContexts now`);
1499
+ loginfo4(`##browser ${this.id()} has no browserContexts now`);
1449
1500
  }
1450
1501
  return;
1451
1502
  });
@@ -1482,7 +1533,7 @@ var PlaywrightBrowser = class extends EventEmitter3 {
1482
1533
  const { maxViewportOfNewPage = this.#options.maxViewportOfNewPage } = options ? options : {};
1483
1534
  const lsdBrowserContext = new PlaywrightBrowserContext(this, browserContext, true, proxy, this.#browserIdx++, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), maxViewportOfNewPage);
1484
1535
  this.#lsdBrowserContexts.push(lsdBrowserContext);
1485
- loginfo3(`##browser ${lsdBrowserContext.id()} created`);
1536
+ loginfo4(`##browser ${lsdBrowserContext.id()} created`);
1486
1537
  return lsdBrowserContext;
1487
1538
  }
1488
1539
  async close() {
@@ -1803,7 +1854,7 @@ var PuppeteerElement = class _PuppeteerElement {
1803
1854
  };
1804
1855
 
1805
1856
  // src/puppeteer/page.ts
1806
- import { getCurrentUnixTime as getCurrentUnixTime3, logerr as logerr5, loginfo as loginfo4, unreachable as unreachable4 } from "@letsscrapedata/utils";
1857
+ import { getCurrentUnixTime as getCurrentUnixTime3, logerr as logerr5, loginfo as loginfo5, unreachable as unreachable4 } from "@letsscrapedata/utils";
1807
1858
  var PuppeteerPage = class extends EventEmitter4 {
1808
1859
  #lsdBrowserContext;
1809
1860
  #page;
@@ -1964,7 +2015,7 @@ var PuppeteerPage = class extends EventEmitter4 {
1964
2015
  const page = this.#page;
1965
2016
  const pageId = this.#pageId;
1966
2017
  page.on("close", async () => {
1967
- loginfo4(`##browser ${pageId} closed`);
2018
+ loginfo5(`##browser ${pageId} closed`);
1968
2019
  if (!page.pageInfo) {
1969
2020
  logerr5(`Logic error in page.on("close")`);
1970
2021
  }
@@ -1987,7 +2038,7 @@ var PuppeteerPage = class extends EventEmitter4 {
1987
2038
  } else {
1988
2039
  logerr5(`##browser ${pageId} has popup without page.pageInfo`);
1989
2040
  }
1990
- loginfo4(`##browser ${pageId} has popup ${popupPageId}`);
2041
+ loginfo5(`##browser ${pageId} has popup ${popupPageId}`);
1991
2042
  this.emit("pagePopup", evtData);
1992
2043
  } else {
1993
2044
  logerr5(`##browser ${pageId} has popup page with null page`);
@@ -2453,7 +2504,14 @@ var PuppeteerPage = class extends EventEmitter4 {
2453
2504
  });
2454
2505
  }
2455
2506
  if (typeof handler === "function") {
2456
- await handler(response, handlerOptions, pageUrl);
2507
+ const pageData = { pageUrl, cookies: "" };
2508
+ if (handlerOptions?.requestHeadersFlag) {
2509
+ const cookies = (await this.#page.cookies()).map((cookie) => {
2510
+ return `${cookie.name}=${cookie.value}`;
2511
+ }).join("; ");
2512
+ pageData.cookies = cookies;
2513
+ }
2514
+ await handler(response, handlerOptions, pageData);
2457
2515
  }
2458
2516
  }
2459
2517
  return true;
@@ -2584,7 +2642,7 @@ var PuppeteerPage = class extends EventEmitter4 {
2584
2642
  };
2585
2643
 
2586
2644
  // src/puppeteer/context.ts
2587
- import { getCurrentUnixTime as getCurrentUnixTime4, logerr as logerr6, loginfo as loginfo5, logwarn as logwarn4, sleep as sleep2 } from "@letsscrapedata/utils";
2645
+ import { getCurrentUnixTime as getCurrentUnixTime4, logerr as logerr6, loginfo as loginfo6, logwarn as logwarn4, sleep as sleep2 } from "@letsscrapedata/utils";
2588
2646
  var PuppeteerBrowserContext = class extends EventEmitter5 {
2589
2647
  #lsdBrowser;
2590
2648
  #browserIdx;
@@ -2621,7 +2679,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
2621
2679
  await lsdPage.setUserAgent(this.#userAgent);
2622
2680
  }
2623
2681
  this.#lsdPages.push(lsdPage);
2624
- loginfo5(`##browser ${lsdPage.id()} ${openType}ed`);
2682
+ loginfo6(`##browser ${lsdPage.id()} ${openType}ed`);
2625
2683
  }
2626
2684
  }
2627
2685
  constructor(lsdBrowser, browserContext, incognito = false, proxy = null, browserIdx = 0, browserContextIdx = 0, maxPagesPerBrowserContext = 20, maxPageFreeSeconds = 0, userAgent = "", maxViewportOfNewPage = true) {
@@ -2668,7 +2726,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
2668
2726
  await lsdPage.setUserAgent(this.#userAgent);
2669
2727
  }
2670
2728
  this.#lsdPages.push(lsdPage);
2671
- loginfo5(`##browser ${lsdPage.id()} created`);
2729
+ loginfo6(`##browser ${lsdPage.id()} created`);
2672
2730
  }
2673
2731
  }
2674
2732
  });
@@ -2852,7 +2910,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
2852
2910
  };
2853
2911
 
2854
2912
  // src/puppeteer/browser.ts
2855
- import { logerr as logerr7, loginfo as loginfo6, logwarn as logwarn5 } from "@letsscrapedata/utils";
2913
+ import { logerr as logerr7, loginfo as loginfo7, logwarn as logwarn5 } from "@letsscrapedata/utils";
2856
2914
  var PuppeteerBrowser = class extends EventEmitter6 {
2857
2915
  #browser;
2858
2916
  #browserIdx;
@@ -2901,16 +2959,16 @@ var PuppeteerBrowser = class extends EventEmitter6 {
2901
2959
  this.#executablePath = executablePath;
2902
2960
  this.#nextBrowserContextIdx = 1;
2903
2961
  this.#closeFreePagesIntervalId = null;
2904
- loginfo6(`##browser ${this.#browserType} ${this.id()} ${this.#browserCreationMethod}ed by ${this.#browserControllerType}`);
2962
+ loginfo7(`##browser ${this.#browserType} ${this.id()} ${this.#browserCreationMethod}ed by ${this.#browserControllerType}`);
2905
2963
  const browserContexts = browser.browserContexts();
2906
2964
  const incognito = typeof options?.incognito === "boolean" ? options.incognito : false;
2907
2965
  for (const browserContext of browserContexts) {
2908
2966
  const lsdBrowserContext = new PuppeteerBrowserContext(this, browserContext, incognito, this.#proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), this.#userAgent(), maxViewportOfNewPage);
2909
2967
  this.#lsdBrowserContexts.push(lsdBrowserContext);
2910
- loginfo6(`##browser ${lsdBrowserContext.id()} ${this.#browserCreationMethod}ed`);
2968
+ loginfo7(`##browser ${lsdBrowserContext.id()} ${this.#browserCreationMethod}ed`);
2911
2969
  }
2912
2970
  browser.on("disconnected", () => {
2913
- loginfo6(`##browser ${this.id()} disconnected`);
2971
+ loginfo7(`##browser ${this.id()} disconnected`);
2914
2972
  if (this.#lsdBrowserContexts.length > 0) {
2915
2973
  logerr7(`${this.id()} has browserContexts when disconnected`);
2916
2974
  }
@@ -2925,11 +2983,11 @@ var PuppeteerBrowser = class extends EventEmitter6 {
2925
2983
  logerr7(`Invalid lsdBrowserContext in LsdBrowser.on("browserContextClose)`);
2926
2984
  return;
2927
2985
  }
2928
- loginfo6(`##browser ${lsdBrowserContext.id()} closed
2986
+ loginfo7(`##browser ${lsdBrowserContext.id()} closed
2929
2987
  `);
2930
2988
  this.#lsdBrowserContexts.splice(idx, 1);
2931
2989
  if (this.#lsdBrowserContexts.length === 0) {
2932
- loginfo6(`##browser ${this.id()} has no browserContexts now`);
2990
+ loginfo7(`##browser ${this.id()} has no browserContexts now`);
2933
2991
  }
2934
2992
  return;
2935
2993
  });
@@ -2960,7 +3018,7 @@ var PuppeteerBrowser = class extends EventEmitter6 {
2960
3018
  const { maxViewportOfNewPage = this.#options.maxViewportOfNewPage } = options ? options : {};
2961
3019
  const lsdBrowserContext = new PuppeteerBrowserContext(this, browserContext, true, proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), userAgent, maxViewportOfNewPage);
2962
3020
  this.#lsdBrowserContexts.push(lsdBrowserContext);
2963
- loginfo6(`##browser ${lsdBrowserContext.id()} created`);
3021
+ loginfo7(`##browser ${lsdBrowserContext.id()} created`);
2964
3022
  return lsdBrowserContext;
2965
3023
  }
2966
3024
  async close() {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@letsscrapedata/controller",
3
- "version": "0.0.30",
3
+ "version": "0.0.31",
4
4
  "description": "Unified browser / HTML controller interfaces that support playwright, puppeteer and cheerio",
5
5
  "type": "module",
6
6
  "main": "./dist/index.mjs",