@letsscrapedata/controller 0.0.48 → 0.0.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +129 -116
- package/dist/index.d.cts +5 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.js +83 -70
- package/package.json +2 -2
package/dist/index.d.cts
CHANGED
|
@@ -982,6 +982,11 @@ interface LsdBrowserContext extends EventEmitter {
|
|
|
982
982
|
*/
|
|
983
983
|
apiContext(): LsdApiContext;
|
|
984
984
|
browser(): LsdBrowser;
|
|
985
|
+
/**
|
|
986
|
+
* close this BrowserContext
|
|
987
|
+
* * For BrowserContext that cannot be closed directly, it is only marked as closed; it will be closed when browser is closed.
|
|
988
|
+
* * refer to " Error: Non-incognito profiles cannot be closed" in puppeteer
|
|
989
|
+
*/
|
|
985
990
|
close(): Promise<boolean>;
|
|
986
991
|
/**
|
|
987
992
|
* close pages that are free more than maxPageFreeSeconds if maxPageFreeSeconds > 0
|
package/dist/index.d.ts
CHANGED
|
@@ -982,6 +982,11 @@ interface LsdBrowserContext extends EventEmitter {
|
|
|
982
982
|
*/
|
|
983
983
|
apiContext(): LsdApiContext;
|
|
984
984
|
browser(): LsdBrowser;
|
|
985
|
+
/**
|
|
986
|
+
* close this BrowserContext
|
|
987
|
+
* * For BrowserContext that cannot be closed directly, it is only marked as closed; it will be closed when browser is closed.
|
|
988
|
+
* * refer to " Error: Non-incognito profiles cannot be closed" in puppeteer
|
|
989
|
+
*/
|
|
985
990
|
close(): Promise<boolean>;
|
|
986
991
|
/**
|
|
987
992
|
* close pages that are free more than maxPageFreeSeconds if maxPageFreeSeconds > 0
|
package/dist/index.js
CHANGED
|
@@ -1,16 +1,30 @@
|
|
|
1
1
|
// src/playwright/browser.ts
|
|
2
2
|
import EventEmitter3 from "events";
|
|
3
|
+
import { getCurrentUnixTime as getCurrentUnixTime3, getPerformanceOfPidTree } from "@letsscrapedata/utils";
|
|
3
4
|
|
|
4
5
|
// src/playwright/context.ts
|
|
5
6
|
import EventEmitter2 from "events";
|
|
6
|
-
import { getCurrentUnixTime as getCurrentUnixTime2,
|
|
7
|
+
import { getCurrentUnixTime as getCurrentUnixTime2, sleep } from "@letsscrapedata/utils";
|
|
8
|
+
|
|
9
|
+
// src/utils/log.ts
|
|
10
|
+
import { log, LogLevel } from "@letsscrapedata/utils";
|
|
11
|
+
var pkgLog = log;
|
|
12
|
+
async function loginfo(...args) {
|
|
13
|
+
await pkgLog(LogLevel.INF, ...args);
|
|
14
|
+
}
|
|
15
|
+
async function logwarn(...args) {
|
|
16
|
+
await pkgLog(LogLevel.WRN, ...args);
|
|
17
|
+
}
|
|
18
|
+
async function logerr(...args) {
|
|
19
|
+
await pkgLog(LogLevel.ERR, ...args);
|
|
20
|
+
}
|
|
7
21
|
|
|
8
22
|
// src/playwright/page.ts
|
|
9
23
|
import EventEmitter from "events";
|
|
10
|
-
import { getCurrentUnixTime,
|
|
24
|
+
import { getCurrentUnixTime, unreachable as unreachable2 } from "@letsscrapedata/utils";
|
|
11
25
|
|
|
12
26
|
// src/playwright/element.ts
|
|
13
|
-
import {
|
|
27
|
+
import { unreachable } from "@letsscrapedata/utils";
|
|
14
28
|
var PlaywrightElement = class _PlaywrightElement {
|
|
15
29
|
#frame;
|
|
16
30
|
#locator;
|
|
@@ -468,7 +482,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
468
482
|
retObj.locators = locators;
|
|
469
483
|
return retObj;
|
|
470
484
|
} catch (err) {
|
|
471
|
-
|
|
485
|
+
loginfo(err);
|
|
472
486
|
return retObj;
|
|
473
487
|
}
|
|
474
488
|
}
|
|
@@ -479,7 +493,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
479
493
|
const page = this.#page;
|
|
480
494
|
const pageId = this.#pageId;
|
|
481
495
|
page.on("close", async () => {
|
|
482
|
-
|
|
496
|
+
loginfo(`##browser ${pageId} closed`);
|
|
483
497
|
if (!page.pageInfo) {
|
|
484
498
|
logerr(`Logic error in page.on("close")`);
|
|
485
499
|
}
|
|
@@ -502,7 +516,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
502
516
|
} else {
|
|
503
517
|
logerr(`##browser ${pageId} has popup without page.pageInfo`);
|
|
504
518
|
}
|
|
505
|
-
|
|
519
|
+
loginfo(`##browser ${pageId} has popup ${popupPageId}`);
|
|
506
520
|
this.emit("pagePopup", evtData);
|
|
507
521
|
} else {
|
|
508
522
|
logerr(`##browser ${pageId} has popup page with null page`);
|
|
@@ -973,7 +987,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
973
987
|
requestData,
|
|
974
988
|
responseData
|
|
975
989
|
});
|
|
976
|
-
|
|
990
|
+
loginfo(`##browser cache matched response: ${requestUrl}`);
|
|
977
991
|
}
|
|
978
992
|
if (typeof handler === "function") {
|
|
979
993
|
const pageData = { pageUrl, cookies: "" };
|
|
@@ -1195,7 +1209,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1195
1209
|
await lsdPage.maximizeViewport();
|
|
1196
1210
|
}
|
|
1197
1211
|
this.#lsdPages.push(lsdPage);
|
|
1198
|
-
|
|
1212
|
+
loginfo(`##browser ${lsdPage.id()} ${openType}ed`);
|
|
1199
1213
|
}
|
|
1200
1214
|
}
|
|
1201
1215
|
constructor(lsdBrowser, browserContext, browserContextCreationMethod, incognito = false, proxy = null, browserIdx = 0, browserContextIdx = 0, maxPagesPerBrowserContext = 20, maxPageFreeSeconds = 0, maxViewportOfNewPage = true) {
|
|
@@ -1230,7 +1244,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1230
1244
|
const pageInfo = page.pageInfo;
|
|
1231
1245
|
if (pageInfo) {
|
|
1232
1246
|
const { browserIdx: browserIdx2, browserContextIdx: browserContextIdx2, pageIdx } = pageInfo;
|
|
1233
|
-
|
|
1247
|
+
logwarn(`##browser page-${browserIdx2}-${browserContextIdx2}-${pageIdx} has been already created`);
|
|
1234
1248
|
} else {
|
|
1235
1249
|
const currentTime2 = getCurrentUnixTime2();
|
|
1236
1250
|
const pageInfo2 = { browserIdx: this.#browserIdx, browserContextIdx: this.#browserContextIdx, pageIdx: this.#nextPageIdx++, openType: "other", openTime: currentTime2, lastStatusUpdateTime: currentTime2, taskId: 0, relatedId: 0, misc: {} };
|
|
@@ -1239,23 +1253,23 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1239
1253
|
await lsdPage.maximizeViewport();
|
|
1240
1254
|
}
|
|
1241
1255
|
this.#lsdPages.push(lsdPage);
|
|
1242
|
-
|
|
1256
|
+
loginfo(`##page ${lsdPage.id()} created`);
|
|
1243
1257
|
}
|
|
1244
1258
|
});
|
|
1245
1259
|
browserContext.on("close", (bc) => {
|
|
1246
1260
|
if (browserContext !== bc) {
|
|
1247
|
-
|
|
1261
|
+
logerr(`##browser different browserContext in browserContext.on("close")`);
|
|
1248
1262
|
}
|
|
1249
1263
|
this.#lsdBrowser.emit("browserContextClose", this);
|
|
1250
1264
|
});
|
|
1251
1265
|
this.on("pageClose", (lsdPage) => {
|
|
1252
1266
|
if (!(lsdPage instanceof PlaywrightPage)) {
|
|
1253
|
-
|
|
1267
|
+
logerr(`Invalid data in LsdBrowserContext.on("pageClose)`);
|
|
1254
1268
|
return;
|
|
1255
1269
|
}
|
|
1256
1270
|
const idx = this.#lsdPages.findIndex((p) => p === lsdPage);
|
|
1257
1271
|
if (idx < 0) {
|
|
1258
|
-
|
|
1272
|
+
logerr(`Invalid lsdPage in LsdBrowserContext.on("pageClose)`);
|
|
1259
1273
|
return;
|
|
1260
1274
|
}
|
|
1261
1275
|
this.#lsdPages.splice(idx, 1);
|
|
@@ -1272,7 +1286,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1272
1286
|
if (this.#browserContext) {
|
|
1273
1287
|
this.#status = "closed";
|
|
1274
1288
|
this.#lastStatusUpdateTime = getCurrentUnixTime2();
|
|
1275
|
-
|
|
1289
|
+
loginfo(`browserContext ${this.id()} closed at ${this.#lastStatusUpdateTime}`);
|
|
1276
1290
|
await this.#browserContext.close();
|
|
1277
1291
|
}
|
|
1278
1292
|
return true;
|
|
@@ -1287,12 +1301,12 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1287
1301
|
await sleep(200);
|
|
1288
1302
|
}
|
|
1289
1303
|
}
|
|
1290
|
-
|
|
1304
|
+
logwarn(`Cannot get the gettingLock.`);
|
|
1291
1305
|
return false;
|
|
1292
1306
|
}
|
|
1293
1307
|
#freeGettingLock() {
|
|
1294
1308
|
if (!this.#gettingPage) {
|
|
1295
|
-
|
|
1309
|
+
logwarn(`Getting lock is already free now.`);
|
|
1296
1310
|
}
|
|
1297
1311
|
this.#gettingPage = false;
|
|
1298
1312
|
}
|
|
@@ -1301,7 +1315,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1301
1315
|
maxPageFreeSeconds = this.#maxPageFreeSeconds;
|
|
1302
1316
|
}
|
|
1303
1317
|
if (maxPageFreeSeconds <= 0) {
|
|
1304
|
-
|
|
1318
|
+
logwarn(`Please set valid maxPageFreeSeconds to close free pages`);
|
|
1305
1319
|
return false;
|
|
1306
1320
|
}
|
|
1307
1321
|
const gotLock = await this.#tryToGetGettingLock();
|
|
@@ -1320,7 +1334,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1320
1334
|
this.#freeGettingLock();
|
|
1321
1335
|
return true;
|
|
1322
1336
|
} catch (err) {
|
|
1323
|
-
|
|
1337
|
+
logerr(err);
|
|
1324
1338
|
this.#freeGettingLock();
|
|
1325
1339
|
return false;
|
|
1326
1340
|
}
|
|
@@ -1372,7 +1386,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1372
1386
|
return null;
|
|
1373
1387
|
}
|
|
1374
1388
|
} catch (err) {
|
|
1375
|
-
|
|
1389
|
+
logerr(err);
|
|
1376
1390
|
this.#freeGettingLock();
|
|
1377
1391
|
return null;
|
|
1378
1392
|
}
|
|
@@ -1454,7 +1468,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1454
1468
|
await page.free();
|
|
1455
1469
|
return true;
|
|
1456
1470
|
} catch (err) {
|
|
1457
|
-
|
|
1471
|
+
logerr(err);
|
|
1458
1472
|
return false;
|
|
1459
1473
|
}
|
|
1460
1474
|
}
|
|
@@ -1476,7 +1490,6 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1476
1490
|
};
|
|
1477
1491
|
|
|
1478
1492
|
// src/playwright/browser.ts
|
|
1479
|
-
import { getCurrentUnixTime as getCurrentUnixTime3, getPerformanceOfPidTree, logerr as logerr3, loginfo as loginfo4, logwarn as logwarn3 } from "@letsscrapedata/utils";
|
|
1480
1493
|
var PlaywrightBrowser = class extends EventEmitter3 {
|
|
1481
1494
|
#browser;
|
|
1482
1495
|
#browserIdx;
|
|
@@ -1526,38 +1539,38 @@ var PlaywrightBrowser = class extends EventEmitter3 {
|
|
|
1526
1539
|
this.#executablePath = executablePath;
|
|
1527
1540
|
this.#nextBrowserContextIdx = 1;
|
|
1528
1541
|
this.#closeFreePagesIntervalId = null;
|
|
1529
|
-
|
|
1542
|
+
loginfo(`##browser ${this.id()} ${this.#browserCreationMethod}ed by ${this.#browserControllerType}`);
|
|
1530
1543
|
const browserContexts = browser.contexts();
|
|
1531
1544
|
if (browserContexts.length > 0) {
|
|
1532
|
-
|
|
1545
|
+
logwarn(`There are ${browserContexts.length} new browserContexts when playwright launches new browser`);
|
|
1533
1546
|
}
|
|
1534
1547
|
const incognito = typeof options?.incognito === "boolean" ? options.incognito : true;
|
|
1535
1548
|
for (const browserContext of browserContexts) {
|
|
1536
1549
|
const lsdBrowserContext = new PlaywrightBrowserContext(this, browserContext, "launch", incognito, this.#proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), maxViewportOfNewPage);
|
|
1537
1550
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
1538
|
-
|
|
1551
|
+
loginfo(`##browserContext ${lsdBrowserContext.id()} ${this.#browserCreationMethod}ed`);
|
|
1539
1552
|
}
|
|
1540
1553
|
browser.on("disconnected", () => {
|
|
1541
|
-
|
|
1554
|
+
loginfo(`##browser ${this.id()} disconnected`);
|
|
1542
1555
|
if (this.#lsdBrowserContexts.length > 0) {
|
|
1543
|
-
|
|
1556
|
+
logerr(`${this.id()} has browserContexts when disconnected`);
|
|
1544
1557
|
}
|
|
1545
1558
|
});
|
|
1546
1559
|
this.on("browserContextClose", (lsdBrowserContext) => {
|
|
1547
1560
|
if (!(lsdBrowserContext instanceof PlaywrightBrowserContext)) {
|
|
1548
|
-
|
|
1561
|
+
logerr(`Invalid data in LsdBrowser.on("browserContextClose)`);
|
|
1549
1562
|
return;
|
|
1550
1563
|
}
|
|
1551
1564
|
const idx = this.#lsdBrowserContexts.findIndex((bc) => bc === lsdBrowserContext);
|
|
1552
1565
|
if (idx < 0) {
|
|
1553
|
-
|
|
1566
|
+
logerr(`Invalid lsdBrowserContext in LsdBrowser.on("browserContextClose)`);
|
|
1554
1567
|
return;
|
|
1555
1568
|
}
|
|
1556
|
-
|
|
1569
|
+
loginfo(`##browserContext ${lsdBrowserContext.id()} closed
|
|
1557
1570
|
`);
|
|
1558
1571
|
this.#lsdBrowserContexts.splice(idx, 1);
|
|
1559
1572
|
if (this.#lsdBrowserContexts.length === 0) {
|
|
1560
|
-
|
|
1573
|
+
loginfo(`##browser ${this.id()} has no browserContexts now`);
|
|
1561
1574
|
}
|
|
1562
1575
|
return;
|
|
1563
1576
|
});
|
|
@@ -1574,7 +1587,7 @@ var PlaywrightBrowser = class extends EventEmitter3 {
|
|
|
1574
1587
|
}
|
|
1575
1588
|
async newBrowserContext(options) {
|
|
1576
1589
|
if (this.#lsdBrowserContexts.length >= this.#maxBrowserContextsPerBrowser()) {
|
|
1577
|
-
|
|
1590
|
+
logwarn(`##browser ${this.id()} can not create more new browserContext`);
|
|
1578
1591
|
return null;
|
|
1579
1592
|
}
|
|
1580
1593
|
const browserContextOptions = {};
|
|
@@ -1593,7 +1606,7 @@ var PlaywrightBrowser = class extends EventEmitter3 {
|
|
|
1593
1606
|
const { maxViewportOfNewPage = this.#options.maxViewportOfNewPage } = options ? options : {};
|
|
1594
1607
|
const lsdBrowserContext = new PlaywrightBrowserContext(this, browserContext, "new", true, proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), maxViewportOfNewPage);
|
|
1595
1608
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
1596
|
-
|
|
1609
|
+
loginfo(`##browser ${lsdBrowserContext.id()} created`);
|
|
1597
1610
|
return lsdBrowserContext;
|
|
1598
1611
|
}
|
|
1599
1612
|
async close() {
|
|
@@ -1665,15 +1678,18 @@ var PlaywrightBrowser = class extends EventEmitter3 {
|
|
|
1665
1678
|
|
|
1666
1679
|
// src/puppeteer/browser.ts
|
|
1667
1680
|
import EventEmitter6 from "events";
|
|
1681
|
+
import { getCurrentUnixTime as getCurrentUnixTime6, getPerformanceOfPidTree as getPerformanceOfPidTree2 } from "@letsscrapedata/utils";
|
|
1668
1682
|
|
|
1669
1683
|
// src/puppeteer/context.ts
|
|
1670
1684
|
import EventEmitter5 from "events";
|
|
1685
|
+
import { getCurrentUnixTime as getCurrentUnixTime5, sleep as sleep2 } from "@letsscrapedata/utils";
|
|
1671
1686
|
|
|
1672
1687
|
// src/puppeteer/page.ts
|
|
1673
1688
|
import EventEmitter4 from "events";
|
|
1689
|
+
import { getCurrentUnixTime as getCurrentUnixTime4, unreachable as unreachable4 } from "@letsscrapedata/utils";
|
|
1674
1690
|
|
|
1675
1691
|
// src/puppeteer/element.ts
|
|
1676
|
-
import {
|
|
1692
|
+
import { unreachable as unreachable3 } from "@letsscrapedata/utils";
|
|
1677
1693
|
var PuppeteerElement = class _PuppeteerElement {
|
|
1678
1694
|
#frame;
|
|
1679
1695
|
#$ele;
|
|
@@ -1762,7 +1778,7 @@ var PuppeteerElement = class _PuppeteerElement {
|
|
|
1762
1778
|
}
|
|
1763
1779
|
return retObj;
|
|
1764
1780
|
} catch (err) {
|
|
1765
|
-
|
|
1781
|
+
logerr(err);
|
|
1766
1782
|
return retObj;
|
|
1767
1783
|
}
|
|
1768
1784
|
}
|
|
@@ -1944,7 +1960,6 @@ var PuppeteerElement = class _PuppeteerElement {
|
|
|
1944
1960
|
};
|
|
1945
1961
|
|
|
1946
1962
|
// src/puppeteer/page.ts
|
|
1947
|
-
import { getCurrentUnixTime as getCurrentUnixTime4, logerr as logerr5, loginfo as loginfo5, unreachable as unreachable4 } from "@letsscrapedata/utils";
|
|
1948
1963
|
var PuppeteerPage = class extends EventEmitter4 {
|
|
1949
1964
|
#lsdBrowserContext;
|
|
1950
1965
|
#page;
|
|
@@ -2104,9 +2119,9 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2104
2119
|
const page = this.#page;
|
|
2105
2120
|
const pageId = this.#pageId;
|
|
2106
2121
|
page.on("close", async () => {
|
|
2107
|
-
|
|
2122
|
+
loginfo(`##browser ${pageId} closed`);
|
|
2108
2123
|
if (!page.pageInfo) {
|
|
2109
|
-
|
|
2124
|
+
logerr(`Logic error in page.on("close")`);
|
|
2110
2125
|
}
|
|
2111
2126
|
this.emit("pageClose");
|
|
2112
2127
|
this.#lsdBrowserContext.emit("pageClose", this);
|
|
@@ -2125,12 +2140,12 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2125
2140
|
pageInfo.relatedId = page.pageInfo.taskId;
|
|
2126
2141
|
}
|
|
2127
2142
|
} else {
|
|
2128
|
-
|
|
2143
|
+
logerr(`##browser ${pageId} has popup without page.pageInfo`);
|
|
2129
2144
|
}
|
|
2130
|
-
|
|
2145
|
+
loginfo(`##browser ${pageId} has popup ${popupPageId}`);
|
|
2131
2146
|
this.emit("pagePopup", evtData);
|
|
2132
2147
|
} else {
|
|
2133
|
-
|
|
2148
|
+
logerr(`##browser ${pageId} has popup page with null page`);
|
|
2134
2149
|
}
|
|
2135
2150
|
});
|
|
2136
2151
|
}
|
|
@@ -2740,7 +2755,6 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2740
2755
|
};
|
|
2741
2756
|
|
|
2742
2757
|
// src/puppeteer/context.ts
|
|
2743
|
-
import { getCurrentUnixTime as getCurrentUnixTime5, logerr as logerr6, loginfo as loginfo6, logwarn as logwarn4, sleep as sleep2 } from "@letsscrapedata/utils";
|
|
2744
2758
|
var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
2745
2759
|
#lsdBrowser;
|
|
2746
2760
|
#browserIdx;
|
|
@@ -2780,7 +2794,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2780
2794
|
await lsdPage.setUserAgent(this.#userAgent);
|
|
2781
2795
|
}
|
|
2782
2796
|
this.#lsdPages.push(lsdPage);
|
|
2783
|
-
|
|
2797
|
+
loginfo(`##browser ${lsdPage.id()} ${openType}ed`);
|
|
2784
2798
|
}
|
|
2785
2799
|
}
|
|
2786
2800
|
constructor(lsdBrowser, browserContext, browserContextCreationMethod, incognito = false, proxy = null, browserIdx = 0, browserContextIdx = 0, maxPagesPerBrowserContext = 20, maxPageFreeSeconds = 0, userAgent = "", maxViewportOfNewPage = true) {
|
|
@@ -2819,7 +2833,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2819
2833
|
const pageInfo = page.pageInfo;
|
|
2820
2834
|
if (pageInfo) {
|
|
2821
2835
|
const { browserIdx: browserIdx2, browserContextIdx: browserContextIdx2, pageIdx } = pageInfo;
|
|
2822
|
-
|
|
2836
|
+
logwarn(`##browser page-${browserIdx2}-${browserContextIdx2}-${pageIdx} has been already created`);
|
|
2823
2837
|
} else {
|
|
2824
2838
|
const currentTime2 = getCurrentUnixTime5();
|
|
2825
2839
|
const pageInfo2 = { browserIdx: this.#browserIdx, browserContextIdx: this.#browserContextIdx, pageIdx: this.#nextPageIdx++, openType: "other", openTime: currentTime2, lastStatusUpdateTime: currentTime2, taskId: 0, relatedId: 0, misc: {} };
|
|
@@ -2831,18 +2845,18 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2831
2845
|
await lsdPage.setUserAgent(this.#userAgent);
|
|
2832
2846
|
}
|
|
2833
2847
|
this.#lsdPages.push(lsdPage);
|
|
2834
|
-
|
|
2848
|
+
loginfo(`##page ${lsdPage.id()} created`);
|
|
2835
2849
|
}
|
|
2836
2850
|
}
|
|
2837
2851
|
});
|
|
2838
2852
|
this.on("pageClose", (lsdPage) => {
|
|
2839
2853
|
if (!(lsdPage instanceof PuppeteerPage)) {
|
|
2840
|
-
|
|
2854
|
+
logerr(`Invalid data in LsdBrowserContext.on("pageClose)`);
|
|
2841
2855
|
return;
|
|
2842
2856
|
}
|
|
2843
2857
|
const idx = this.#lsdPages.findIndex((p) => p === lsdPage);
|
|
2844
2858
|
if (idx < 0) {
|
|
2845
|
-
|
|
2859
|
+
logerr(`Invalid lsdPage in LsdBrowserContext.on("pageClose)`);
|
|
2846
2860
|
return;
|
|
2847
2861
|
}
|
|
2848
2862
|
this.#lsdPages.splice(idx, 1);
|
|
@@ -2862,7 +2876,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2862
2876
|
if (this.#browserContext) {
|
|
2863
2877
|
this.#status = "closed";
|
|
2864
2878
|
this.#lastStatusUpdateTime = getCurrentUnixTime5();
|
|
2865
|
-
|
|
2879
|
+
loginfo(`browserContext ${this.id()} closed at ${this.#lastStatusUpdateTime}`);
|
|
2866
2880
|
if (this.#browserContextCreationMethod !== "launch") {
|
|
2867
2881
|
await this.#browserContext.close();
|
|
2868
2882
|
}
|
|
@@ -2879,12 +2893,12 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2879
2893
|
await sleep2(200);
|
|
2880
2894
|
}
|
|
2881
2895
|
}
|
|
2882
|
-
|
|
2896
|
+
logwarn(`Cannot get the gettingLock.`);
|
|
2883
2897
|
return false;
|
|
2884
2898
|
}
|
|
2885
2899
|
#freeGettingLock() {
|
|
2886
2900
|
if (!this.#gettingPage) {
|
|
2887
|
-
|
|
2901
|
+
logwarn(`Getting lock is already free now.`);
|
|
2888
2902
|
}
|
|
2889
2903
|
this.#gettingPage = false;
|
|
2890
2904
|
}
|
|
@@ -2893,7 +2907,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2893
2907
|
maxPageFreeSeconds = this.#maxPageFreeSeconds;
|
|
2894
2908
|
}
|
|
2895
2909
|
if (maxPageFreeSeconds <= 0) {
|
|
2896
|
-
|
|
2910
|
+
logwarn(`Please set valid maxPageFreeSeconds to close free pages`);
|
|
2897
2911
|
return false;
|
|
2898
2912
|
}
|
|
2899
2913
|
const gotLock = await this.#tryToGetGettingLock();
|
|
@@ -2912,7 +2926,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2912
2926
|
this.#freeGettingLock();
|
|
2913
2927
|
return true;
|
|
2914
2928
|
} catch (err) {
|
|
2915
|
-
|
|
2929
|
+
logerr(err);
|
|
2916
2930
|
this.#freeGettingLock();
|
|
2917
2931
|
return false;
|
|
2918
2932
|
}
|
|
@@ -2966,7 +2980,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
2966
2980
|
return null;
|
|
2967
2981
|
}
|
|
2968
2982
|
} catch (err) {
|
|
2969
|
-
|
|
2983
|
+
logerr(err);
|
|
2970
2984
|
this.#freeGettingLock();
|
|
2971
2985
|
return null;
|
|
2972
2986
|
}
|
|
@@ -3048,7 +3062,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
3048
3062
|
await page.free();
|
|
3049
3063
|
return true;
|
|
3050
3064
|
} catch (err) {
|
|
3051
|
-
|
|
3065
|
+
logerr(err);
|
|
3052
3066
|
return false;
|
|
3053
3067
|
}
|
|
3054
3068
|
}
|
|
@@ -3070,7 +3084,6 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
3070
3084
|
};
|
|
3071
3085
|
|
|
3072
3086
|
// src/puppeteer/browser.ts
|
|
3073
|
-
import { getCurrentUnixTime as getCurrentUnixTime6, getPerformanceOfPidTree as getPerformanceOfPidTree2, logerr as logerr7, loginfo as loginfo7, logwarn as logwarn5 } from "@letsscrapedata/utils";
|
|
3074
3087
|
var PuppeteerBrowser = class extends EventEmitter6 {
|
|
3075
3088
|
#browser;
|
|
3076
3089
|
#browserIdx;
|
|
@@ -3123,35 +3136,35 @@ var PuppeteerBrowser = class extends EventEmitter6 {
|
|
|
3123
3136
|
this.#executablePath = executablePath;
|
|
3124
3137
|
this.#nextBrowserContextIdx = 1;
|
|
3125
3138
|
this.#closeFreePagesIntervalId = null;
|
|
3126
|
-
|
|
3139
|
+
loginfo(`##browser ${this.#browserType} ${this.id()} ${this.#browserCreationMethod}ed by ${this.#browserControllerType}`);
|
|
3127
3140
|
const browserContexts = browser.browserContexts();
|
|
3128
3141
|
const incognito = typeof options?.incognito === "boolean" ? options.incognito : false;
|
|
3129
3142
|
for (const browserContext of browserContexts) {
|
|
3130
3143
|
const lsdBrowserContext = new PuppeteerBrowserContext(this, browserContext, "launch", incognito, this.#proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), this.#userAgent(), maxViewportOfNewPage);
|
|
3131
3144
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
3132
|
-
|
|
3145
|
+
loginfo(`##browser ${lsdBrowserContext.id()} ${this.#browserCreationMethod}ed`);
|
|
3133
3146
|
}
|
|
3134
3147
|
browser.on("disconnected", () => {
|
|
3135
|
-
|
|
3148
|
+
loginfo(`##browser ${this.id()} disconnected`);
|
|
3136
3149
|
if (this.#lsdBrowserContexts.length > 0) {
|
|
3137
|
-
|
|
3150
|
+
logerr(`${this.id()} has browserContexts when disconnected`);
|
|
3138
3151
|
}
|
|
3139
3152
|
});
|
|
3140
3153
|
this.on("browserContextClose", (lsdBrowserContext) => {
|
|
3141
3154
|
if (!(lsdBrowserContext instanceof PuppeteerBrowserContext)) {
|
|
3142
|
-
|
|
3155
|
+
logerr(`Invalid data in LsdBrowser.on("browserContextClose)`);
|
|
3143
3156
|
return;
|
|
3144
3157
|
}
|
|
3145
3158
|
const idx = this.#lsdBrowserContexts.findIndex((bc) => bc === lsdBrowserContext);
|
|
3146
3159
|
if (idx < 0) {
|
|
3147
|
-
|
|
3160
|
+
logerr(`Invalid lsdBrowserContext in LsdBrowser.on("browserContextClose)`);
|
|
3148
3161
|
return;
|
|
3149
3162
|
}
|
|
3150
|
-
|
|
3163
|
+
loginfo(`##browser ${lsdBrowserContext.id()} closed
|
|
3151
3164
|
`);
|
|
3152
3165
|
this.#lsdBrowserContexts.splice(idx, 1);
|
|
3153
3166
|
if (this.#lsdBrowserContexts.length === 0) {
|
|
3154
|
-
|
|
3167
|
+
loginfo(`##browser ${this.id()} has no browserContexts now`);
|
|
3155
3168
|
}
|
|
3156
3169
|
return;
|
|
3157
3170
|
});
|
|
@@ -3168,7 +3181,7 @@ var PuppeteerBrowser = class extends EventEmitter6 {
|
|
|
3168
3181
|
}
|
|
3169
3182
|
async newBrowserContext(options) {
|
|
3170
3183
|
if (this.#lsdBrowserContexts.length >= this.#maxBrowserContextsPerBrowser()) {
|
|
3171
|
-
|
|
3184
|
+
logwarn(`##browser ${this.id()} can not create more new browserContext`);
|
|
3172
3185
|
return null;
|
|
3173
3186
|
}
|
|
3174
3187
|
const browserContextOptions = {};
|
|
@@ -3181,7 +3194,7 @@ var PuppeteerBrowser = class extends EventEmitter6 {
|
|
|
3181
3194
|
const { maxViewportOfNewPage = this.#options.maxViewportOfNewPage } = options ? options : {};
|
|
3182
3195
|
const lsdBrowserContext = new PuppeteerBrowserContext(this, browserContext, "new", true, proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), userAgent, maxViewportOfNewPage);
|
|
3183
3196
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
3184
|
-
|
|
3197
|
+
loginfo(`##browser ${lsdBrowserContext.id()} created`);
|
|
3185
3198
|
return lsdBrowserContext;
|
|
3186
3199
|
}
|
|
3187
3200
|
async close() {
|
|
@@ -3580,10 +3593,10 @@ var CheerioPage = class extends EventEmitter7 {
|
|
|
3580
3593
|
import os from "os";
|
|
3581
3594
|
import puppeteer from "puppeteer";
|
|
3582
3595
|
import playwright, { request as apiRequest } from "playwright";
|
|
3583
|
-
import { getPidsListeningOnPort, logerr as logerr8, logwarn as logwarn6, unreachable as unreachable5 } from "@letsscrapedata/utils";
|
|
3584
3596
|
import puppeteerExtra from "puppeteer-extra";
|
|
3585
3597
|
import * as playwrightExtra from "playwright-extra";
|
|
3586
3598
|
import StealthPlugin from "puppeteer-extra-plugin-stealth";
|
|
3599
|
+
import { getPidsListeningOnPort, unreachable as unreachable5 } from "@letsscrapedata/utils";
|
|
3587
3600
|
var LsdBrowserController = class _LsdBrowserController {
|
|
3588
3601
|
static #forbidConstructor = false;
|
|
3589
3602
|
#puppeteer;
|
|
@@ -3684,22 +3697,22 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
3684
3697
|
const actOptions = { closeFreePagesIntervalSeconds, maxBrowserContextsPerBrowser, maxPagesPerBrowserContext, maxPageFreeSeconds, maxViewportOfNewPage, proxy, timeout, args, executablePath, maxWindowSize, headless, minBrowserContexts, incognito, proxyPerBrowserContext, userDataDir, userAgent };
|
|
3685
3698
|
let idx = args.findIndex((arg) => arg.toLowerCase().startsWith("--incoginto"));
|
|
3686
3699
|
if (idx >= 0) {
|
|
3687
|
-
|
|
3700
|
+
logwarn(`Please use options.incognito instead when launching new browser.`);
|
|
3688
3701
|
args.splice(idx, 1);
|
|
3689
3702
|
}
|
|
3690
3703
|
idx = args.findIndex((arg) => arg.toLowerCase().startsWith("--proxy-server"));
|
|
3691
3704
|
if (idx >= 0) {
|
|
3692
|
-
|
|
3705
|
+
logwarn(`Please use options.proxy instead when launching new browser.`);
|
|
3693
3706
|
args.splice(idx, 1);
|
|
3694
3707
|
}
|
|
3695
3708
|
idx = args.findIndex((arg) => arg.toLowerCase().startsWith("--user-data-dir"));
|
|
3696
3709
|
if (idx >= 0) {
|
|
3697
|
-
|
|
3710
|
+
logwarn(`Please use options.userDataDir instead when launching new browser.`);
|
|
3698
3711
|
args.splice(idx, 1);
|
|
3699
3712
|
}
|
|
3700
3713
|
idx = args.findIndex((arg) => arg.toLowerCase().startsWith("--start-maximized"));
|
|
3701
3714
|
if (idx >= 0) {
|
|
3702
|
-
|
|
3715
|
+
logwarn(`Please use options.maxWindowSize instead when launching new browser.`);
|
|
3703
3716
|
args.splice(idx, 1);
|
|
3704
3717
|
}
|
|
3705
3718
|
let lsdBrowser;
|
|
@@ -3812,7 +3825,7 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
3812
3825
|
const pids = await getPidsListeningOnPort(port);
|
|
3813
3826
|
let browserPid = 0;
|
|
3814
3827
|
if (pids.length !== 1) {
|
|
3815
|
-
|
|
3828
|
+
logerr(`##browser pids.length ${pids.length} is not 1 when trying to connect to browserUrl ${browserUrl}`);
|
|
3816
3829
|
} else {
|
|
3817
3830
|
browserPid = pids[0];
|
|
3818
3831
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@letsscrapedata/controller",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.50",
|
|
4
4
|
"description": "Unified browser / HTML controller interfaces that support playwright, puppeteer and cheerio",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -46,7 +46,7 @@
|
|
|
46
46
|
"apify"
|
|
47
47
|
],
|
|
48
48
|
"dependencies": {
|
|
49
|
-
"@letsscrapedata/utils": "^0.0.
|
|
49
|
+
"@letsscrapedata/utils": "^0.0.26",
|
|
50
50
|
"cheerio": "^1.0.0",
|
|
51
51
|
"playwright": "^1.43.0",
|
|
52
52
|
"playwright-extra": "^4.3.6",
|