@letsscrapedata/controller 0.0.50 → 0.0.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,14 +1,14 @@
1
- // src/playwright/browser.ts
2
- import EventEmitter3 from "events";
3
- import { getCurrentUnixTime as getCurrentUnixTime3, getPerformanceOfPidTree } from "@letsscrapedata/utils";
4
-
5
- // src/playwright/context.ts
6
- import EventEmitter2 from "events";
7
- import { getCurrentUnixTime as getCurrentUnixTime2, sleep } from "@letsscrapedata/utils";
8
-
9
1
  // src/utils/log.ts
10
2
  import { log, LogLevel } from "@letsscrapedata/utils";
11
3
  var pkgLog = log;
4
+ function setControllerLogFun(logFun) {
5
+ if (typeof logFun === "function") {
6
+ pkgLog = logFun;
7
+ return true;
8
+ } else {
9
+ return false;
10
+ }
11
+ }
12
12
  async function loginfo(...args) {
13
13
  await pkgLog(LogLevel.INF, ...args);
14
14
  }
@@ -19,10 +19,44 @@ async function logerr(...args) {
19
19
  await pkgLog(LogLevel.ERR, ...args);
20
20
  }
21
21
 
22
+ // src/playwright/browser.ts
23
+ import EventEmitter3 from "events";
24
+ import { getCurrentUnixTime as getCurrentUnixTime3, getPerformanceOfPidTree } from "@letsscrapedata/utils";
25
+
26
+ // src/playwright/context.ts
27
+ import EventEmitter2 from "events";
28
+ import { getCurrentUnixTime as getCurrentUnixTime2, sleep } from "@letsscrapedata/utils";
29
+
22
30
  // src/playwright/page.ts
23
31
  import EventEmitter from "events";
24
32
  import { getCurrentUnixTime, unreachable as unreachable2 } from "@letsscrapedata/utils";
25
33
 
34
+ // src/utils/common.ts
35
+ function convertDataAttributeName(attr) {
36
+ if (!attr.startsWith("data-")) {
37
+ return "";
38
+ }
39
+ const parts = attr.split("-");
40
+ let name = parts[1];
41
+ for (const part of parts.slice(2)) {
42
+ if (!part) {
43
+ continue;
44
+ }
45
+ name = `${name}${part[1].toUpperCase()}${part.slice(1).toLowerCase()}`;
46
+ }
47
+ return name;
48
+ }
49
+ function getIframeSelector(iframeOption) {
50
+ const { src = "", id = "", selector = "" } = iframeOption;
51
+ if (typeof src === "string" && src) {
52
+ return `iframe[src^="${src}"]`;
53
+ } else if (typeof id === "string" && id) {
54
+ return `iframe[id="${id}"]`;
55
+ } else {
56
+ return selector;
57
+ }
58
+ }
59
+
26
60
  // src/playwright/element.ts
27
61
  import { unreachable } from "@letsscrapedata/utils";
28
62
  var PlaywrightElement = class _PlaywrightElement {
@@ -43,6 +77,29 @@ var PlaywrightElement = class _PlaywrightElement {
43
77
  const names = await this.#locator.evaluate((node) => node.getAttributeNames());
44
78
  return names;
45
79
  }
80
+ async dataset() {
81
+ try {
82
+ const dataset = await this.#locator.evaluate((node) => node.dataset);
83
+ return dataset;
84
+ } catch (err) {
85
+ return {};
86
+ }
87
+ }
88
+ async evaluate(func, args) {
89
+ try {
90
+ const frame = this.#frame;
91
+ ;
92
+ if (typeof frame.parentFrame === "function") {
93
+ return await frame.evaluate(func, args);
94
+ } else {
95
+ const locator = this.#frame.owner();
96
+ return await locator.evaluate(func, args);
97
+ }
98
+ } catch (err) {
99
+ logerr(err);
100
+ return "";
101
+ }
102
+ }
46
103
  /*
47
104
  async #getChildFrame(parentFrame: Frame, iframeOption: IframeOption): Promise<Frame | null> {
48
105
  if (!parentFrame) {
@@ -76,22 +133,15 @@ var PlaywrightElement = class _PlaywrightElement {
76
133
  return null;
77
134
  }
78
135
  */
79
- #getIframeSelector(iframeOption) {
80
- const { src = "", selector = "" } = iframeOption;
81
- if (!src && !selector) {
82
- throw new Error("Invalid parent frame");
83
- }
84
- return selector ? selector : `iframe[src^="${src}"]`;
85
- }
86
136
  async #getChildFrameLocator(parent, iframeOption) {
87
- return parent.frameLocator(this.#getIframeSelector(iframeOption));
137
+ return parent.frameLocator(getIframeSelector(iframeOption));
88
138
  }
89
139
  async #getDescendantFrame(parent, iframeOptions) {
90
140
  try {
91
141
  if (iframeOptions.length <= 0) {
92
142
  return null;
93
143
  }
94
- let frameLocator = parent.frameLocator(this.#getIframeSelector(iframeOptions[0]));
144
+ let frameLocator = parent.frameLocator(getIframeSelector(iframeOptions[0]));
95
145
  for (const iframeOption of iframeOptions.slice(1)) {
96
146
  if (!frameLocator) {
97
147
  return null;
@@ -108,12 +158,12 @@ var PlaywrightElement = class _PlaywrightElement {
108
158
  let frame = this.#frame;
109
159
  const retObj = { frame, locators: [] };
110
160
  if (iframeOptions.length > 0) {
111
- frame = await this.#getDescendantFrame(frame, iframeOptions);
112
- if (!frame) {
161
+ const childFrame = await this.#getDescendantFrame(frame, iframeOptions);
162
+ if (!childFrame) {
113
163
  return retObj;
114
164
  }
115
- retObj.frame = frame;
116
- parent = frame;
165
+ retObj.frame = childFrame;
166
+ parent = childFrame;
117
167
  }
118
168
  try {
119
169
  let locators = [];
@@ -298,6 +348,7 @@ var PlaywrightPage = class extends EventEmitter {
298
348
  #page;
299
349
  #status;
300
350
  #pageId;
351
+ #closeWhenFree;
301
352
  #resquestInterceptionOptions;
302
353
  #responseInterceptionOptions;
303
354
  #client;
@@ -428,29 +479,57 @@ var PlaywrightPage = class extends EventEmitter {
428
479
  return null;
429
480
  }
430
481
  */
431
- #getIframeSelector(iframeOption) {
432
- const { src = "", selector = "" } = iframeOption;
433
- if (!src && !selector) {
434
- throw new Error("Invalid parent frame");
482
+ async #findDescendantFrame(src, id) {
483
+ if (!this.#page) {
484
+ throw new Error("No valid page");
435
485
  }
436
- return selector ? selector : `iframe[src^="${src}"]`;
486
+ const frames = this.#page.frames();
487
+ for (const frame of frames) {
488
+ const url = frame.url();
489
+ if (typeof src === "string" && src) {
490
+ if (url.startsWith(src)) {
491
+ return frame;
492
+ } else if (url.toLowerCase().startsWith(src)) {
493
+ return frame;
494
+ }
495
+ } else if (src instanceof RegExp) {
496
+ if (url.match(src)) {
497
+ return frame;
498
+ }
499
+ } else if (id) {
500
+ const element = await frame.frameElement();
501
+ if (element) {
502
+ const frameId = await frame.evaluate(([ele, attr]) => ele.getAttribute(attr), [element, "id"]);
503
+ if (frameId === id) {
504
+ return frame;
505
+ }
506
+ }
507
+ }
508
+ }
509
+ return null;
437
510
  }
438
511
  async #getChildFrameLocator(parent, iframeOption) {
439
- return parent.frameLocator(this.#getIframeSelector(iframeOption));
512
+ return parent.frameLocator(getIframeSelector(iframeOption));
440
513
  }
441
- async #getDescendantFrameLocator(mainFrame, iframeOptions) {
514
+ async #getDescendantFrame(mainFrame, iframeOptions) {
442
515
  try {
443
516
  if (iframeOptions.length <= 0) {
444
517
  return null;
445
518
  }
446
- let frameLocator = mainFrame.frameLocator(this.#getIframeSelector(iframeOptions[0]));
447
- for (const iframeOption of iframeOptions.slice(1)) {
448
- if (!frameLocator) {
449
- return null;
519
+ if (iframeOptions.length === 1 && !iframeOptions[0].selector) {
520
+ const { src = "", id = "" } = iframeOptions[0];
521
+ const frame = await this.#findDescendantFrame(src, id);
522
+ return frame;
523
+ } else {
524
+ let frameLocator = mainFrame.frameLocator(getIframeSelector(iframeOptions[0]));
525
+ for (const iframeOption of iframeOptions.slice(1)) {
526
+ if (!frameLocator) {
527
+ return null;
528
+ }
529
+ frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
450
530
  }
451
- frameLocator = await this.#getChildFrameLocator(frameLocator, iframeOption);
531
+ return frameLocator;
452
532
  }
453
- return frameLocator;
454
533
  } catch (err) {
455
534
  throw new Error(`No child iframe: ${JSON.stringify(iframeOptions)}`);
456
535
  }
@@ -462,7 +541,7 @@ var PlaywrightPage = class extends EventEmitter {
462
541
  let frame = this.#page.mainFrame();
463
542
  const retObj = { frame, locators: [] };
464
543
  if (iframeOptions.length > 0) {
465
- frame = await this.#getDescendantFrameLocator(frame, iframeOptions);
544
+ frame = await this.#getDescendantFrame(frame, iframeOptions);
466
545
  if (!frame) {
467
546
  return retObj;
468
547
  }
@@ -535,12 +614,32 @@ var PlaywrightPage = class extends EventEmitter {
535
614
  const { browserIdx = 0, browserContextIdx = 0, pageIdx = 0, openType = "other", openTime = currentTime, lastStatusUpdateTime = currentTime, taskId = 0, relatedId = 0, misc = {} } = pageInfo ? pageInfo : {};
536
615
  this.#page.pageInfo = { browserIdx, browserContextIdx, pageIdx, openType, openTime, lastStatusUpdateTime, taskId, relatedId, misc };
537
616
  this.#pageId = `page-${browserIdx}-${browserContextIdx}-${pageIdx}`;
617
+ this.#closeWhenFree = false;
538
618
  this.#resquestInterceptionOptions = [];
539
619
  this.#responseInterceptionOptions = [];
540
620
  this.#client = null;
541
621
  this.#responseCb = null;
542
622
  this.#addPageOn();
543
623
  }
624
+ async addPreloadScript(scriptOrFunc, arg) {
625
+ if (!this.#page) {
626
+ throw new Error("No valid page");
627
+ }
628
+ if (typeof scriptOrFunc === "string") {
629
+ await this.#page.addInitScript({ content: scriptOrFunc });
630
+ } else if (typeof scriptOrFunc === "function") {
631
+ await this.#page.addInitScript(scriptOrFunc, arg);
632
+ } else {
633
+ throw new Error(`Invalid type of scriptOrFunc ${typeof scriptOrFunc}`);
634
+ }
635
+ return true;
636
+ }
637
+ async addScriptTag(options) {
638
+ if (!this.#page) {
639
+ throw new Error("No valid page");
640
+ }
641
+ return this.#page.addScriptTag(options);
642
+ }
544
643
  apiContext() {
545
644
  return this.browserContext().apiContext();
546
645
  }
@@ -600,7 +699,7 @@ var PlaywrightPage = class extends EventEmitter {
600
699
  }
601
700
  async close() {
602
701
  if (this.#status === "closed") {
603
- console.warn(`Page ${this.#pageId} is already closed.`);
702
+ logwarn(`Page ${this.#pageId} is already closed.`);
604
703
  return true;
605
704
  } else if (this.#status === "busy") {
606
705
  throw new Error(`Page ${this.#pageId} cannot be closed because it is busy.`);
@@ -613,13 +712,16 @@ var PlaywrightPage = class extends EventEmitter {
613
712
  this.#status = "closed";
614
713
  return true;
615
714
  }
715
+ closeWhenFree() {
716
+ return this.#closeWhenFree;
717
+ }
616
718
  async content(iframeOptions = []) {
617
719
  if (!this.#page) {
618
720
  throw new Error("No valid page");
619
721
  }
620
722
  let content = "";
621
723
  if (iframeOptions.length > 0) {
622
- const frameLocator = await this.#getDescendantFrameLocator(this.#page.mainFrame(), iframeOptions);
724
+ const frameLocator = await this.#getDescendantFrame(this.#page.mainFrame(), iframeOptions);
623
725
  if (frameLocator) {
624
726
  content = await frameLocator.locator(":root").evaluate(() => document.documentElement.outerHTML);
625
727
  }
@@ -641,11 +743,18 @@ var PlaywrightPage = class extends EventEmitter {
641
743
  const height = await this.#page.evaluate(() => document.documentElement.scrollHeight);
642
744
  return height;
643
745
  }
644
- async evalute(fun, args) {
746
+ async evaluate(func, args) {
747
+ if (!this.#page) {
748
+ throw new Error("No valid page");
749
+ }
750
+ return this.#page.evaluate(func, args);
751
+ }
752
+ async exposeFunction(name, callbackFunction) {
645
753
  if (!this.#page) {
646
754
  throw new Error("No valid page");
647
755
  }
648
- return this.#page.evaluate(fun, args);
756
+ await this.#page.exposeFunction(name, callbackFunction);
757
+ return;
649
758
  }
650
759
  async findElement(selectorOrXpath, iframeOptions = []) {
651
760
  if (!this.#page) {
@@ -683,7 +792,7 @@ var PlaywrightPage = class extends EventEmitter {
683
792
  }
684
793
  async free() {
685
794
  if (this.#status === "free") {
686
- console.warn(`Page ${this.#pageId} is already free.`);
795
+ logwarn(`Page ${this.#pageId} is already free.`);
687
796
  }
688
797
  this.#status = "free";
689
798
  await this.clearRequestInterceptions();
@@ -823,6 +932,10 @@ var PlaywrightPage = class extends EventEmitter {
823
932
  }
824
933
  return response;
825
934
  }
935
+ setCloseWhenFree(closeWhenFree) {
936
+ this.#closeWhenFree = closeWhenFree;
937
+ return true;
938
+ }
826
939
  async setCookies(cookies) {
827
940
  if (!this.#page) {
828
941
  throw new Error("No valid page");
@@ -1006,7 +1119,7 @@ var PlaywrightPage = class extends EventEmitter {
1006
1119
  }
1007
1120
  const actOptions = Array.isArray(options) ? options : [options];
1008
1121
  if (actOptions.length <= 0) {
1009
- console.warn("Invalid paras in setResponseInterception");
1122
+ logwarn("Invalid paras in setResponseInterception");
1010
1123
  return false;
1011
1124
  }
1012
1125
  const firstResponseInterception = this.#responseInterceptionOptions.length <= 0;
@@ -1708,6 +1821,26 @@ var PuppeteerElement = class _PuppeteerElement {
1708
1821
  const names = await this.#frame.evaluate((ele) => ele.getAttributeNames(), this.#$ele);
1709
1822
  return names;
1710
1823
  }
1824
+ async dataset() {
1825
+ try {
1826
+ const attributeNames = await this.attributeNames();
1827
+ const dataset = {};
1828
+ for (const attributeName of attributeNames) {
1829
+ if (!attributeName.startsWith("data-")) {
1830
+ continue;
1831
+ }
1832
+ const val = await this.attribute(attributeName);
1833
+ const key = convertDataAttributeName(attributeName);
1834
+ dataset[key] = val;
1835
+ }
1836
+ return dataset;
1837
+ } catch (err) {
1838
+ return {};
1839
+ }
1840
+ }
1841
+ async evaluate(func, args) {
1842
+ return await this.#frame.evaluate(func, args);
1843
+ }
1711
1844
  async #getChildFrame(parentFrame, iframeOption) {
1712
1845
  if (!parentFrame) {
1713
1846
  throw new Error("Invalid parent frame");
@@ -1757,13 +1890,13 @@ var PuppeteerElement = class _PuppeteerElement {
1757
1890
  let frame = this.#frame;
1758
1891
  const retObj = { frame, elementHandles: [] };
1759
1892
  if (iframeOptions.length > 0) {
1760
- frame = await this.#getDescendantFrame(frame, iframeOptions);
1761
- if (!frame) {
1893
+ const childFrame = await this.#getDescendantFrame(frame, iframeOptions);
1894
+ if (!childFrame) {
1762
1895
  return retObj;
1763
1896
  }
1764
- retObj.frame = frame;
1897
+ retObj.frame = childFrame;
1765
1898
  absolute = true;
1766
- parent = frame;
1899
+ parent = childFrame;
1767
1900
  }
1768
1901
  try {
1769
1902
  if (selector.startsWith("./") || selector.startsWith("/") || selector.startsWith("..")) {
@@ -1965,6 +2098,7 @@ var PuppeteerPage = class extends EventEmitter4 {
1965
2098
  #page;
1966
2099
  #status;
1967
2100
  #pageId;
2101
+ #closeWhenFree;
1968
2102
  #requestInterceptionNum;
1969
2103
  #responseInterceptionNum;
1970
2104
  #client;
@@ -2039,15 +2173,41 @@ var PuppeteerPage = class extends EventEmitter4 {
2039
2173
  });
2040
2174
  return true;
2041
2175
  }
2176
+ async #findDescendantFrame(src, id) {
2177
+ if (!this.#page) {
2178
+ throw new Error("No valid page");
2179
+ }
2180
+ const frames = this.#page.frames();
2181
+ for (const frame of frames) {
2182
+ const url = frame.url();
2183
+ if (typeof src === "string" && src) {
2184
+ if (url.startsWith(src)) {
2185
+ return frame;
2186
+ } else if (url.toLowerCase().startsWith(src)) {
2187
+ return frame;
2188
+ }
2189
+ } else if (src instanceof RegExp) {
2190
+ if (url.match(src)) {
2191
+ return frame;
2192
+ }
2193
+ } else if (id) {
2194
+ const element = await frame.frameElement();
2195
+ if (element) {
2196
+ const frameId = await frame.evaluate((ele, attr) => ele.getAttribute(attr), element, "id");
2197
+ if (frameId === id) {
2198
+ return frame;
2199
+ }
2200
+ }
2201
+ }
2202
+ }
2203
+ return null;
2204
+ }
2042
2205
  async #getChildFrame(parentFrame, iframeOption) {
2043
2206
  if (!parentFrame) {
2044
2207
  throw new Error("Invalid parent frame");
2045
2208
  }
2046
2209
  let iframe = null;
2047
- let { src = "", selector = "" } = iframeOption;
2048
- if (!src && !selector) {
2049
- throw new Error("Invalid IframeOption");
2050
- }
2210
+ let { src = "" } = iframeOption;
2051
2211
  if (src) {
2052
2212
  const childFrames = parentFrame.childFrames();
2053
2213
  for (const childFrame of childFrames) {
@@ -2065,7 +2225,8 @@ var PuppeteerPage = class extends EventEmitter4 {
2065
2225
  }
2066
2226
  }
2067
2227
  } else {
2068
- const $eleIframe = await parentFrame.$(selector);
2228
+ const frameSelector = getIframeSelector(iframeOption);
2229
+ const $eleIframe = await parentFrame.$(frameSelector);
2069
2230
  if ($eleIframe) {
2070
2231
  iframe = await $eleIframe.contentFrame();
2071
2232
  return iframe;
@@ -2075,11 +2236,16 @@ var PuppeteerPage = class extends EventEmitter4 {
2075
2236
  }
2076
2237
  async #getDescendantFrame(parentFrame, iframeOptions) {
2077
2238
  let iframe = parentFrame;
2078
- for (const iframeOption of iframeOptions) {
2079
- if (!iframe) {
2080
- return null;
2239
+ if (iframeOptions.length === 1 && !iframeOptions[0].selector) {
2240
+ const { src = "", id = "" } = iframeOptions[0];
2241
+ iframe = await this.#findDescendantFrame(src, id);
2242
+ } else {
2243
+ for (const iframeOption of iframeOptions) {
2244
+ if (!iframe) {
2245
+ return null;
2246
+ }
2247
+ iframe = await this.#getChildFrame(iframe, iframeOption);
2081
2248
  }
2082
- iframe = await this.#getChildFrame(iframe, iframeOption);
2083
2249
  }
2084
2250
  return iframe;
2085
2251
  }
@@ -2108,7 +2274,7 @@ var PuppeteerPage = class extends EventEmitter4 {
2108
2274
  }
2109
2275
  return retObj;
2110
2276
  } catch (err) {
2111
- console.log(err);
2277
+ loginfo(err);
2112
2278
  return retObj;
2113
2279
  }
2114
2280
  }
@@ -2161,11 +2327,31 @@ var PuppeteerPage = class extends EventEmitter4 {
2161
2327
  const { browserIdx = 0, browserContextIdx = 0, pageIdx = 0, openType = "other", openTime = currentTime, lastStatusUpdateTime = currentTime, taskId = 0, relatedId = 0, misc = {} } = pageInfo ? pageInfo : {};
2162
2328
  this.#page.pageInfo = { browserIdx, browserContextIdx, pageIdx, openType, openTime, lastStatusUpdateTime, taskId, relatedId, misc };
2163
2329
  this.#pageId = `page-${browserIdx}-${browserContextIdx}-${pageIdx}`;
2330
+ this.#closeWhenFree = false;
2164
2331
  this.#requestInterceptionNum = 0;
2165
2332
  this.#responseInterceptionNum = 0;
2166
2333
  this.#client = null;
2167
2334
  this.#addPageOn();
2168
2335
  }
2336
+ async addPreloadScript(scriptOrFunc, arg) {
2337
+ if (!this.#page) {
2338
+ throw new Error("No valid page");
2339
+ }
2340
+ if (typeof scriptOrFunc === "string") {
2341
+ await this.#page.evaluateOnNewDocument(scriptOrFunc);
2342
+ } else if (typeof scriptOrFunc === "function") {
2343
+ await this.#page.evaluateOnNewDocument(scriptOrFunc, arg);
2344
+ } else {
2345
+ throw new Error(`Invalid type of scriptOrFunc ${typeof scriptOrFunc}`);
2346
+ }
2347
+ return true;
2348
+ }
2349
+ async addScriptTag(options) {
2350
+ if (!this.#page) {
2351
+ throw new Error("No valid page");
2352
+ }
2353
+ return this.#page.addScriptTag(options);
2354
+ }
2169
2355
  apiContext() {
2170
2356
  throw new Error("Not supported in PuppeteerPage.");
2171
2357
  }
@@ -2222,7 +2408,7 @@ var PuppeteerPage = class extends EventEmitter4 {
2222
2408
  }
2223
2409
  async close() {
2224
2410
  if (this.#status === "closed") {
2225
- console.warn(`Page ${this.#pageId} is already closed.`);
2411
+ logwarn(`Page ${this.#pageId} is already closed.`);
2226
2412
  return true;
2227
2413
  } else if (this.#status === "busy") {
2228
2414
  throw new Error(`Page ${this.#pageId} cannot be closed because it is busy.`);
@@ -2235,6 +2421,9 @@ var PuppeteerPage = class extends EventEmitter4 {
2235
2421
  this.#status = "closed";
2236
2422
  return true;
2237
2423
  }
2424
+ closeWhenFree() {
2425
+ return this.#closeWhenFree;
2426
+ }
2238
2427
  async content(iframeOptions = []) {
2239
2428
  if (!this.#page) {
2240
2429
  throw new Error("No valid page");
@@ -2263,11 +2452,18 @@ var PuppeteerPage = class extends EventEmitter4 {
2263
2452
  const height = await this.#page.evaluate(() => document.documentElement.scrollHeight);
2264
2453
  return height;
2265
2454
  }
2266
- async evalute(fun, args) {
2455
+ async evaluate(func, args) {
2456
+ if (!this.#page) {
2457
+ throw new Error("No valid page");
2458
+ }
2459
+ return this.#page.evaluate(func, args);
2460
+ }
2461
+ async exposeFunction(name, callbackFunction) {
2267
2462
  if (!this.#page) {
2268
2463
  throw new Error("No valid page");
2269
2464
  }
2270
- return this.#page.evaluate(fun, args);
2465
+ await this.#page.exposeFunction(name, callbackFunction);
2466
+ return;
2271
2467
  }
2272
2468
  async findElement(selectorOrXpath, iframeOptions = []) {
2273
2469
  if (!this.#page) {
@@ -2305,7 +2501,7 @@ var PuppeteerPage = class extends EventEmitter4 {
2305
2501
  }
2306
2502
  async free() {
2307
2503
  if (this.#status === "free") {
2308
- console.warn(`Page ${this.#pageId} is already free.`);
2504
+ logwarn(`Page ${this.#pageId} is already free.`);
2309
2505
  }
2310
2506
  this.#status = "free";
2311
2507
  await this.clearRequestInterceptions();
@@ -2448,6 +2644,10 @@ var PuppeteerPage = class extends EventEmitter4 {
2448
2644
  }
2449
2645
  return response;
2450
2646
  }
2647
+ setCloseWhenFree(closeWhenFree) {
2648
+ this.#closeWhenFree = closeWhenFree;
2649
+ return true;
2650
+ }
2451
2651
  async setCookies(cookies) {
2452
2652
  if (!this.#page) {
2453
2653
  throw new Error("No valid page");
@@ -2523,7 +2723,7 @@ var PuppeteerPage = class extends EventEmitter4 {
2523
2723
  }
2524
2724
  const actOptions = Array.isArray(options) ? options : [options];
2525
2725
  if (actOptions.length <= 0) {
2526
- console.warn("Invalid paras in setRequestInterception");
2726
+ logwarn("Invalid paras in setRequestInterception");
2527
2727
  return false;
2528
2728
  }
2529
2729
  if (this.#requestInterceptionNum <= 0) {
@@ -2559,7 +2759,7 @@ var PuppeteerPage = class extends EventEmitter4 {
2559
2759
  await request.continue();
2560
2760
  return true;
2561
2761
  } catch (err) {
2562
- console.error(err);
2762
+ logerr(err);
2563
2763
  return false;
2564
2764
  }
2565
2765
  });
@@ -2571,7 +2771,7 @@ var PuppeteerPage = class extends EventEmitter4 {
2571
2771
  }
2572
2772
  const actOptions = Array.isArray(options) ? options : [options];
2573
2773
  if (actOptions.length <= 0) {
2574
- console.warn("Invalid paras in setResponseInterception");
2774
+ logwarn("Invalid paras in setResponseInterception");
2575
2775
  return false;
2576
2776
  }
2577
2777
  this.#responseInterceptionNum++;
@@ -2626,7 +2826,7 @@ var PuppeteerPage = class extends EventEmitter4 {
2626
2826
  }
2627
2827
  return true;
2628
2828
  } catch (err) {
2629
- console.error(err);
2829
+ logerr(err);
2630
2830
  return false;
2631
2831
  }
2632
2832
  });
@@ -3286,6 +3486,22 @@ var CheerioElement = class _CheerioElement {
3286
3486
  return Array.from(Object.keys(element.attribs));
3287
3487
  }
3288
3488
  }
3489
+ async dataset() {
3490
+ const attributeNames = await this.attributeNames();
3491
+ const dataset = {};
3492
+ for (const attributeName of attributeNames) {
3493
+ if (!attributeName.startsWith("data-")) {
3494
+ continue;
3495
+ }
3496
+ const val = await this.attribute(attributeName);
3497
+ const key = convertDataAttributeName(attributeName);
3498
+ dataset[key] = val;
3499
+ }
3500
+ return dataset;
3501
+ }
3502
+ async evaluate() {
3503
+ throw new Error("Not supported in CheerioPage.");
3504
+ }
3289
3505
  #findNodes(selector, absolute) {
3290
3506
  if (selector.startsWith("./") || selector.startsWith("/")) {
3291
3507
  throw new Error("Do not support XPath in cheerio.");
@@ -3406,6 +3622,12 @@ var CheerioPage = class extends EventEmitter7 {
3406
3622
  _origPage() {
3407
3623
  throw new Error("Method not implemented.");
3408
3624
  }
3625
+ async addPreloadScript() {
3626
+ throw new Error("Not supported in CheerioPage.");
3627
+ }
3628
+ addScriptTag() {
3629
+ throw new Error("Not supported in CheerioPage.");
3630
+ }
3409
3631
  apiContext() {
3410
3632
  throw new Error("Not supported in CheerioPage.");
3411
3633
  }
@@ -3433,13 +3655,19 @@ var CheerioPage = class extends EventEmitter7 {
3433
3655
  async close() {
3434
3656
  throw new Error("Not supported in CheerioPage.");
3435
3657
  }
3658
+ closeWhenFree() {
3659
+ throw new Error("Not supported in CheerioPage.");
3660
+ }
3436
3661
  async content() {
3437
3662
  throw new Error("Not supported in CheerioPage.");
3438
3663
  }
3439
3664
  async cookies() {
3440
3665
  throw new Error("Not supported in CheerioPage.");
3441
3666
  }
3442
- async evalute() {
3667
+ async evaluate() {
3668
+ throw new Error("Not supported in CheerioPage.");
3669
+ }
3670
+ exposeFunction() {
3443
3671
  throw new Error("Not supported in CheerioPage.");
3444
3672
  }
3445
3673
  #findNodes(selector) {
@@ -3536,6 +3764,9 @@ var CheerioPage = class extends EventEmitter7 {
3536
3764
  async sendCDPMessage() {
3537
3765
  throw new Error("Method not implemented.");
3538
3766
  }
3767
+ setCloseWhenFree() {
3768
+ throw new Error("Not supported in CheerioPage.");
3769
+ }
3539
3770
  async setCookies() {
3540
3771
  throw new Error("Not supported in CheerioPage.");
3541
3772
  }
@@ -3872,5 +4103,6 @@ export {
3872
4103
  PuppeteerBrowserContext,
3873
4104
  PuppeteerElement,
3874
4105
  PuppeteerPage,
3875
- controller
4106
+ controller,
4107
+ setControllerLogFun
3876
4108
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@letsscrapedata/controller",
3
- "version": "0.0.50",
3
+ "version": "0.0.52",
4
4
  "description": "Unified browser / HTML controller interfaces that support playwright, puppeteer and cheerio",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",