@letsscrapedata/controller 0.0.69 → 0.0.71
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +165 -42
- package/dist/index.d.cts +21 -3
- package/dist/index.d.ts +21 -3
- package/dist/index.js +163 -40
- package/package.json +7 -12
package/dist/index.js
CHANGED
|
@@ -419,8 +419,8 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
419
419
|
const url = page.url();
|
|
420
420
|
const origCookies = await browserContext.cookies(url);
|
|
421
421
|
const cookies = origCookies.map((origCookie) => {
|
|
422
|
-
const { name, value, domain, path, expires, httpOnly, secure, sameSite = "Lax" } = origCookie;
|
|
423
|
-
return { name, value, domain, path, expires, httpOnly, secure, sameSite };
|
|
422
|
+
const { name, value, domain, path: path2, expires, httpOnly, secure, sameSite = "Lax" } = origCookie;
|
|
423
|
+
return { name, value, domain, path: path2, expires, httpOnly, secure, sameSite };
|
|
424
424
|
});
|
|
425
425
|
return cookies;
|
|
426
426
|
}
|
|
@@ -1304,6 +1304,9 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
1304
1304
|
logdbg(`##browser LsdPage ${this.#pageId} is allocated`);
|
|
1305
1305
|
return true;
|
|
1306
1306
|
}
|
|
1307
|
+
userAgent() {
|
|
1308
|
+
return this.#lsdBrowserContext.userAgent();
|
|
1309
|
+
}
|
|
1307
1310
|
async waitForElement(selector, options = {}) {
|
|
1308
1311
|
if (!this.#page) {
|
|
1309
1312
|
throw new Error("No valid page");
|
|
@@ -1418,6 +1421,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1418
1421
|
#browserContextIdx;
|
|
1419
1422
|
#browserContext;
|
|
1420
1423
|
#browserContextCreationMethod;
|
|
1424
|
+
#userAgent;
|
|
1421
1425
|
#apiContext;
|
|
1422
1426
|
#createTime;
|
|
1423
1427
|
#lastStatusUpdateTime;
|
|
@@ -1445,8 +1449,15 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1445
1449
|
await lsdPage.maximizeViewport();
|
|
1446
1450
|
}
|
|
1447
1451
|
}
|
|
1452
|
+
if (!this.#userAgent && this.#lsdPages.length > 0) {
|
|
1453
|
+
const lsdPage = this.#lsdPages[0];
|
|
1454
|
+
const userAgent = await lsdPage.evaluate(() => navigator.userAgent);
|
|
1455
|
+
if (typeof userAgent === "string" && userAgent) {
|
|
1456
|
+
this.#userAgent = userAgent;
|
|
1457
|
+
}
|
|
1458
|
+
}
|
|
1448
1459
|
}
|
|
1449
|
-
constructor(lsdBrowser, browserContext, browserContextCreationMethod, incognito = false, proxy = null, browserIdx = 0, browserContextIdx = 0, maxPagesPerBrowserContext = 20, maxPageFreeSeconds = 0, maxViewportOfNewPage = true) {
|
|
1460
|
+
constructor(lsdBrowser, browserContext, browserContextCreationMethod, incognito = false, proxy = null, browserIdx = 0, browserContextIdx = 0, maxPagesPerBrowserContext = 20, maxPageFreeSeconds = 0, userAgent = "", maxViewportOfNewPage = true) {
|
|
1450
1461
|
if (!lsdBrowser || typeof lsdBrowser.browserContexts !== "function") {
|
|
1451
1462
|
throw new Error(`Invalid lsdBrowser parameter`);
|
|
1452
1463
|
}
|
|
@@ -1459,6 +1470,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1459
1470
|
this.#browserContextIdx = browserContextIdx;
|
|
1460
1471
|
this.#browserContext = browserContext;
|
|
1461
1472
|
this.#browserContextCreationMethod = browserContextCreationMethod;
|
|
1473
|
+
this.#userAgent = userAgent;
|
|
1462
1474
|
const apiRequestContext = browserContext.request;
|
|
1463
1475
|
this.#apiContext = new PlaywrightApiContext(apiRequestContext);
|
|
1464
1476
|
const currentTime = getCurrentUnixTime2();
|
|
@@ -1612,6 +1624,12 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1612
1624
|
}
|
|
1613
1625
|
lsdPage = this.#lsdPages.find((p) => p.isFree());
|
|
1614
1626
|
if (lsdPage) {
|
|
1627
|
+
if (!this.#userAgent) {
|
|
1628
|
+
const userAgent = await lsdPage.evaluate(() => navigator.userAgent);
|
|
1629
|
+
if (typeof userAgent === "string" && userAgent) {
|
|
1630
|
+
this.#userAgent = userAgent;
|
|
1631
|
+
}
|
|
1632
|
+
}
|
|
1615
1633
|
lsdPage.use();
|
|
1616
1634
|
this.#freeGettingLock();
|
|
1617
1635
|
return lsdPage;
|
|
@@ -1718,6 +1736,9 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1718
1736
|
return false;
|
|
1719
1737
|
}
|
|
1720
1738
|
}
|
|
1739
|
+
userAgent() {
|
|
1740
|
+
return this.#userAgent;
|
|
1741
|
+
}
|
|
1721
1742
|
_origBrowserContext() {
|
|
1722
1743
|
return this.#browserContext;
|
|
1723
1744
|
}
|
|
@@ -1787,7 +1808,7 @@ var PlaywrightBrowser = class _PlaywrightBrowser extends EventEmitter3 {
|
|
|
1787
1808
|
}
|
|
1788
1809
|
const incognito = typeof options?.incognito === "boolean" ? options.incognito : true;
|
|
1789
1810
|
for (const browserContext of browserContexts) {
|
|
1790
|
-
const lsdBrowserContext = new PlaywrightBrowserContext(this, browserContext, "launch", incognito, this.#proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), maxViewportOfNewPage);
|
|
1811
|
+
const lsdBrowserContext = new PlaywrightBrowserContext(this, browserContext, "launch", incognito, this.#proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), "", maxViewportOfNewPage);
|
|
1791
1812
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
1792
1813
|
}
|
|
1793
1814
|
browser.on("disconnected" /* BROWSER_DISCONNECTED */, () => {
|
|
@@ -1839,12 +1860,13 @@ var PlaywrightBrowser = class _PlaywrightBrowser extends EventEmitter3 {
|
|
|
1839
1860
|
const { proxyUrl: server, username, password } = proxy;
|
|
1840
1861
|
browserContextOptions.proxy = { server, username, password };
|
|
1841
1862
|
}
|
|
1842
|
-
|
|
1843
|
-
|
|
1863
|
+
let userAgent = options?.userAgent ? options.userAgent : this.#options.userAgent;
|
|
1864
|
+
if (userAgent) {
|
|
1865
|
+
browserContextOptions.userAgent = userAgent;
|
|
1844
1866
|
}
|
|
1845
1867
|
const browserContext = await this.#browser.newContext(browserContextOptions);
|
|
1846
1868
|
const { maxViewportOfNewPage = this.#options.maxViewportOfNewPage } = options ? options : {};
|
|
1847
|
-
const lsdBrowserContext = new PlaywrightBrowserContext(this, browserContext, "new", true, proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), maxViewportOfNewPage);
|
|
1869
|
+
const lsdBrowserContext = new PlaywrightBrowserContext(this, browserContext, "new", true, proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), userAgent, maxViewportOfNewPage);
|
|
1848
1870
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
1849
1871
|
return lsdBrowserContext;
|
|
1850
1872
|
}
|
|
@@ -2227,6 +2249,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2227
2249
|
#page;
|
|
2228
2250
|
#status;
|
|
2229
2251
|
#pageId;
|
|
2252
|
+
#userAgent;
|
|
2230
2253
|
#closeWhenFree;
|
|
2231
2254
|
#requestInterceptionNum;
|
|
2232
2255
|
#responseInterceptionNum;
|
|
@@ -2250,8 +2273,8 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2250
2273
|
}
|
|
2251
2274
|
const origCookies = await page.cookies();
|
|
2252
2275
|
const cookies = origCookies.map((origCookie) => {
|
|
2253
|
-
const { name, value, domain, path, expires, httpOnly, secure, sameSite = "Lax" } = origCookie;
|
|
2254
|
-
return { name, value, domain, path, expires, httpOnly, secure, sameSite };
|
|
2276
|
+
const { name, value, domain, path: path2, expires, httpOnly = false, secure, sameSite = "Lax" } = origCookie;
|
|
2277
|
+
return { name, value, domain, path: path2, expires, httpOnly, secure, sameSite };
|
|
2255
2278
|
});
|
|
2256
2279
|
return cookies;
|
|
2257
2280
|
}
|
|
@@ -2460,6 +2483,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2460
2483
|
const { browserIdx = 0, browserContextIdx = 0, pageIdx = 0, openType = "other", openTime = currentTime, lastStatusUpdateTime = currentTime, taskId = 0, relatedId = 0, misc = {} } = pageInfo ? pageInfo : {};
|
|
2461
2484
|
this.#page.pageInfo = { browserIdx, browserContextIdx, pageIdx, openType, openTime, lastStatusUpdateTime, taskId, relatedId, misc };
|
|
2462
2485
|
this.#pageId = `PuppeteerPage-${browserIdx}-${browserContextIdx}-${pageIdx}`;
|
|
2486
|
+
this.#userAgent = browserContext.userAgent();
|
|
2463
2487
|
this.#closeWhenFree = false;
|
|
2464
2488
|
this.#requestInterceptionNum = 0;
|
|
2465
2489
|
this.#responseInterceptionNum = 0;
|
|
@@ -3055,10 +3079,11 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
3055
3079
|
return await this.#lsdBrowserContext.setStateData(stateData);
|
|
3056
3080
|
}
|
|
3057
3081
|
async setUserAgent(userAgent) {
|
|
3058
|
-
if (!this.#page) {
|
|
3082
|
+
if (!this.#page || !userAgent) {
|
|
3059
3083
|
throw new Error("No valid page");
|
|
3060
3084
|
}
|
|
3061
3085
|
await this.#page.setUserAgent(userAgent);
|
|
3086
|
+
this.#userAgent = userAgent;
|
|
3062
3087
|
return true;
|
|
3063
3088
|
}
|
|
3064
3089
|
async setViewportSize(viewPortSize) {
|
|
@@ -3099,6 +3124,9 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
3099
3124
|
logdbg(`##browser LsdPage ${this.#pageId} is allocated`);
|
|
3100
3125
|
return true;
|
|
3101
3126
|
}
|
|
3127
|
+
userAgent() {
|
|
3128
|
+
return this.#userAgent;
|
|
3129
|
+
}
|
|
3102
3130
|
async waitForElement(selector, options = {}) {
|
|
3103
3131
|
if (!this.#page) {
|
|
3104
3132
|
throw new Error("No valid page");
|
|
@@ -3182,9 +3210,9 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
3182
3210
|
#browserContext;
|
|
3183
3211
|
#browserContextCreationMethod;
|
|
3184
3212
|
/**
|
|
3185
|
-
*
|
|
3186
|
-
* * Notice: it is not the actual userAgent if !#userAgent (ignored if !#userAgent)
|
|
3213
|
+
* userAgent of BrowserContext
|
|
3187
3214
|
*/
|
|
3215
|
+
#bcUserAgent;
|
|
3188
3216
|
#userAgent;
|
|
3189
3217
|
#createTime;
|
|
3190
3218
|
#lastStatusUpdateTime;
|
|
@@ -3211,8 +3239,15 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
3211
3239
|
if (this.#maxViewportOfNewPage) {
|
|
3212
3240
|
await lsdPage.maximizeViewport();
|
|
3213
3241
|
}
|
|
3214
|
-
if (this.#
|
|
3215
|
-
await lsdPage.setUserAgent(this.#
|
|
3242
|
+
if (this.#bcUserAgent) {
|
|
3243
|
+
await lsdPage.setUserAgent(this.#bcUserAgent);
|
|
3244
|
+
}
|
|
3245
|
+
}
|
|
3246
|
+
if (!this.#userAgent && this.#lsdPages.length > 0) {
|
|
3247
|
+
const lsdPage = this.#lsdPages[0];
|
|
3248
|
+
const userAgent = await lsdPage.evaluate(() => navigator.userAgent);
|
|
3249
|
+
if (typeof userAgent === "string" && userAgent) {
|
|
3250
|
+
this.#userAgent = userAgent;
|
|
3216
3251
|
}
|
|
3217
3252
|
}
|
|
3218
3253
|
}
|
|
@@ -3229,6 +3264,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
3229
3264
|
this.#browserContextIdx = browserContextIdx;
|
|
3230
3265
|
this.#browserContext = browserContext;
|
|
3231
3266
|
this.#browserContextCreationMethod = browserContextCreationMethod;
|
|
3267
|
+
this.#bcUserAgent = userAgent;
|
|
3232
3268
|
this.#userAgent = userAgent;
|
|
3233
3269
|
const currentTime = getCurrentUnixTime5();
|
|
3234
3270
|
this.#createTime = currentTime;
|
|
@@ -3259,9 +3295,6 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
3259
3295
|
const pageInfo2 = { browserIdx: this.#browserIdx, browserContextIdx: this.#browserContextIdx, pageIdx: this.#nextPageIdx++, openType: "other", openTime: currentTime2, lastStatusUpdateTime: currentTime2, taskId: 0, relatedId: 0, misc: {} };
|
|
3260
3296
|
const lsdPage = new PuppeteerPage(this, page, pageInfo2);
|
|
3261
3297
|
this.#lsdPages.push(lsdPage);
|
|
3262
|
-
if (this.#userAgent) {
|
|
3263
|
-
await lsdPage.setUserAgent(this.#userAgent);
|
|
3264
|
-
}
|
|
3265
3298
|
if (this.#maxViewportOfNewPage) {
|
|
3266
3299
|
await lsdPage.maximizeViewport();
|
|
3267
3300
|
}
|
|
@@ -3389,6 +3422,9 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
3389
3422
|
return null;
|
|
3390
3423
|
}
|
|
3391
3424
|
const page = await this.#browserContext.newPage();
|
|
3425
|
+
if (this.#bcUserAgent) {
|
|
3426
|
+
await page.setUserAgent(this.#bcUserAgent);
|
|
3427
|
+
}
|
|
3392
3428
|
await sleep2(2e3);
|
|
3393
3429
|
const pageInfo = page.pageInfo;
|
|
3394
3430
|
if (!pageInfo) {
|
|
@@ -3398,6 +3434,12 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
3398
3434
|
}
|
|
3399
3435
|
lsdPage = this.#lsdPages.find((p) => p.isFree());
|
|
3400
3436
|
if (lsdPage) {
|
|
3437
|
+
if (!this.#userAgent) {
|
|
3438
|
+
const userAgent = await lsdPage.evaluate(() => navigator.userAgent);
|
|
3439
|
+
if (typeof userAgent === "string" && userAgent) {
|
|
3440
|
+
this.#userAgent = userAgent;
|
|
3441
|
+
}
|
|
3442
|
+
}
|
|
3401
3443
|
lsdPage.use();
|
|
3402
3444
|
this.#freeGettingLock();
|
|
3403
3445
|
return lsdPage;
|
|
@@ -3504,6 +3546,9 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
3504
3546
|
return false;
|
|
3505
3547
|
}
|
|
3506
3548
|
}
|
|
3549
|
+
userAgent() {
|
|
3550
|
+
return this.#userAgent;
|
|
3551
|
+
}
|
|
3507
3552
|
_origBrowserContext() {
|
|
3508
3553
|
return this.#browserContext;
|
|
3509
3554
|
}
|
|
@@ -3621,8 +3666,8 @@ var PuppeteerBrowser = class _PuppeteerBrowser extends EventEmitter6 {
|
|
|
3621
3666
|
if (proxy?.proxyUrl) {
|
|
3622
3667
|
browserContextOptions.proxyServer = proxy.proxyUrl;
|
|
3623
3668
|
}
|
|
3669
|
+
let userAgent = options?.userAgent ? options.userAgent : this.#options.userAgent;
|
|
3624
3670
|
const browserContext = await this.#browser.createBrowserContext(browserContextOptions);
|
|
3625
|
-
const userAgent = options?.userAgent ? options.userAgent : "";
|
|
3626
3671
|
const { maxViewportOfNewPage = this.#options.maxViewportOfNewPage } = options ? options : {};
|
|
3627
3672
|
const lsdBrowserContext = new PuppeteerBrowserContext(this, browserContext, "new", true, proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), userAgent, maxViewportOfNewPage);
|
|
3628
3673
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
@@ -4062,6 +4107,9 @@ var CheerioPage = class extends EventEmitter7 {
|
|
|
4062
4107
|
use() {
|
|
4063
4108
|
throw new Error("Not supported in CheerioPage.");
|
|
4064
4109
|
}
|
|
4110
|
+
userAgent() {
|
|
4111
|
+
throw new Error("Not supported in CheerioPage.");
|
|
4112
|
+
}
|
|
4065
4113
|
waitForElement() {
|
|
4066
4114
|
throw new Error("Not supported in CheerioPage.");
|
|
4067
4115
|
}
|
|
@@ -4075,6 +4123,8 @@ var CheerioPage = class extends EventEmitter7 {
|
|
|
4075
4123
|
|
|
4076
4124
|
// src/controller/controller.ts
|
|
4077
4125
|
import os from "os";
|
|
4126
|
+
import fs from "fs";
|
|
4127
|
+
import path from "path";
|
|
4078
4128
|
import puppeteer from "puppeteer";
|
|
4079
4129
|
import playwright, { request as apiRequestInPlaywright } from "playwright";
|
|
4080
4130
|
import patchright from "patchright";
|
|
@@ -4425,8 +4475,8 @@ var PatchrightPage = class extends EventEmitter8 {
|
|
|
4425
4475
|
const url = page.url();
|
|
4426
4476
|
const origCookies = await browserContext.cookies(url);
|
|
4427
4477
|
const cookies = origCookies.map((origCookie) => {
|
|
4428
|
-
const { name, value, domain, path, expires, httpOnly, secure, sameSite = "Lax" } = origCookie;
|
|
4429
|
-
return { name, value, domain, path, expires, httpOnly, secure, sameSite };
|
|
4478
|
+
const { name, value, domain, path: path2, expires, httpOnly, secure, sameSite = "Lax" } = origCookie;
|
|
4479
|
+
return { name, value, domain, path: path2, expires, httpOnly, secure, sameSite };
|
|
4430
4480
|
});
|
|
4431
4481
|
return cookies;
|
|
4432
4482
|
}
|
|
@@ -5309,6 +5359,9 @@ var PatchrightPage = class extends EventEmitter8 {
|
|
|
5309
5359
|
logdbg(`##browser LsdPage ${this.#pageId} is allocated`);
|
|
5310
5360
|
return true;
|
|
5311
5361
|
}
|
|
5362
|
+
userAgent() {
|
|
5363
|
+
return this.#lsdBrowserContext.userAgent();
|
|
5364
|
+
}
|
|
5312
5365
|
async waitForElement(selector, options = {}) {
|
|
5313
5366
|
if (!this.#page) {
|
|
5314
5367
|
throw new Error("No valid page");
|
|
@@ -5423,6 +5476,7 @@ var PatchrightBrowserContext = class extends EventEmitter9 {
|
|
|
5423
5476
|
#browserContextIdx;
|
|
5424
5477
|
#browserContext;
|
|
5425
5478
|
#browserContextCreationMethod;
|
|
5479
|
+
#userAgent;
|
|
5426
5480
|
#apiContext;
|
|
5427
5481
|
#createTime;
|
|
5428
5482
|
#lastStatusUpdateTime;
|
|
@@ -5450,8 +5504,15 @@ var PatchrightBrowserContext = class extends EventEmitter9 {
|
|
|
5450
5504
|
await lsdPage.maximizeViewport();
|
|
5451
5505
|
}
|
|
5452
5506
|
}
|
|
5507
|
+
if (!this.#userAgent && this.#lsdPages.length > 0) {
|
|
5508
|
+
const lsdPage = this.#lsdPages[0];
|
|
5509
|
+
const userAgent = await lsdPage.evaluate(() => navigator.userAgent);
|
|
5510
|
+
if (typeof userAgent === "string" && userAgent) {
|
|
5511
|
+
this.#userAgent = userAgent;
|
|
5512
|
+
}
|
|
5513
|
+
}
|
|
5453
5514
|
}
|
|
5454
|
-
constructor(lsdBrowser, browserContext, browserContextCreationMethod, incognito = false, proxy = null, browserIdx = 0, browserContextIdx = 0, maxPagesPerBrowserContext = 20, maxPageFreeSeconds = 0, maxViewportOfNewPage = true) {
|
|
5515
|
+
constructor(lsdBrowser, browserContext, browserContextCreationMethod, incognito = false, proxy = null, browserIdx = 0, browserContextIdx = 0, maxPagesPerBrowserContext = 20, maxPageFreeSeconds = 0, userAgent = "", maxViewportOfNewPage = true) {
|
|
5455
5516
|
if (!lsdBrowser || typeof lsdBrowser.browserContexts !== "function") {
|
|
5456
5517
|
throw new Error(`Invalid lsdBrowser parameter`);
|
|
5457
5518
|
}
|
|
@@ -5464,6 +5525,7 @@ var PatchrightBrowserContext = class extends EventEmitter9 {
|
|
|
5464
5525
|
this.#browserContextIdx = browserContextIdx;
|
|
5465
5526
|
this.#browserContext = browserContext;
|
|
5466
5527
|
this.#browserContextCreationMethod = browserContextCreationMethod;
|
|
5528
|
+
this.#userAgent = userAgent;
|
|
5467
5529
|
const apiRequestContext = browserContext.request;
|
|
5468
5530
|
this.#apiContext = new PatchrightApiContext(apiRequestContext);
|
|
5469
5531
|
const currentTime = getCurrentUnixTime8();
|
|
@@ -5617,6 +5679,12 @@ var PatchrightBrowserContext = class extends EventEmitter9 {
|
|
|
5617
5679
|
}
|
|
5618
5680
|
lsdPage = this.#lsdPages.find((p) => p.isFree());
|
|
5619
5681
|
if (lsdPage) {
|
|
5682
|
+
if (!this.#userAgent) {
|
|
5683
|
+
const userAgent = await lsdPage.evaluate(() => navigator.userAgent);
|
|
5684
|
+
if (typeof userAgent === "string" && userAgent) {
|
|
5685
|
+
this.#userAgent = userAgent;
|
|
5686
|
+
}
|
|
5687
|
+
}
|
|
5620
5688
|
lsdPage.use();
|
|
5621
5689
|
this.#freeGettingLock();
|
|
5622
5690
|
return lsdPage;
|
|
@@ -5723,6 +5791,9 @@ var PatchrightBrowserContext = class extends EventEmitter9 {
|
|
|
5723
5791
|
return false;
|
|
5724
5792
|
}
|
|
5725
5793
|
}
|
|
5794
|
+
userAgent() {
|
|
5795
|
+
return this.#userAgent;
|
|
5796
|
+
}
|
|
5726
5797
|
_origBrowserContext() {
|
|
5727
5798
|
return this.#browserContext;
|
|
5728
5799
|
}
|
|
@@ -5792,7 +5863,7 @@ var PatchrightBrowser = class _PatchrightBrowser extends EventEmitter10 {
|
|
|
5792
5863
|
}
|
|
5793
5864
|
const incognito = typeof options?.incognito === "boolean" ? options.incognito : true;
|
|
5794
5865
|
for (const browserContext of browserContexts) {
|
|
5795
|
-
const lsdBrowserContext = new PatchrightBrowserContext(this, browserContext, "launch", incognito, this.#proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), maxViewportOfNewPage);
|
|
5866
|
+
const lsdBrowserContext = new PatchrightBrowserContext(this, browserContext, "launch", incognito, this.#proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), "", maxViewportOfNewPage);
|
|
5796
5867
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
5797
5868
|
}
|
|
5798
5869
|
browser.on("disconnected" /* BROWSER_DISCONNECTED */, () => {
|
|
@@ -5844,12 +5915,13 @@ var PatchrightBrowser = class _PatchrightBrowser extends EventEmitter10 {
|
|
|
5844
5915
|
const { proxyUrl: server, username, password } = proxy;
|
|
5845
5916
|
browserContextOptions.proxy = { server, username, password };
|
|
5846
5917
|
}
|
|
5847
|
-
|
|
5848
|
-
|
|
5918
|
+
let userAgent = options?.userAgent ? options.userAgent : this.#options.userAgent;
|
|
5919
|
+
if (userAgent) {
|
|
5920
|
+
browserContextOptions.userAgent = userAgent;
|
|
5849
5921
|
}
|
|
5850
5922
|
const browserContext = await this.#browser.newContext(browserContextOptions);
|
|
5851
5923
|
const { maxViewportOfNewPage = this.#options.maxViewportOfNewPage } = options ? options : {};
|
|
5852
|
-
const lsdBrowserContext = new PatchrightBrowserContext(this, browserContext, "new", true, proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), maxViewportOfNewPage);
|
|
5924
|
+
const lsdBrowserContext = new PatchrightBrowserContext(this, browserContext, "new", true, proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), userAgent, maxViewportOfNewPage);
|
|
5853
5925
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
5854
5926
|
return lsdBrowserContext;
|
|
5855
5927
|
}
|
|
@@ -6277,8 +6349,8 @@ var CamoufoxPage = class extends EventEmitter11 {
|
|
|
6277
6349
|
const url = page.url();
|
|
6278
6350
|
const origCookies = await browserContext.cookies(url);
|
|
6279
6351
|
const cookies = origCookies.map((origCookie) => {
|
|
6280
|
-
const { name, value, domain, path, expires, httpOnly, secure, sameSite = "Lax" } = origCookie;
|
|
6281
|
-
return { name, value, domain, path, expires, httpOnly, secure, sameSite };
|
|
6352
|
+
const { name, value, domain, path: path2, expires, httpOnly, secure, sameSite = "Lax" } = origCookie;
|
|
6353
|
+
return { name, value, domain, path: path2, expires, httpOnly, secure, sameSite };
|
|
6282
6354
|
});
|
|
6283
6355
|
return cookies;
|
|
6284
6356
|
}
|
|
@@ -7133,6 +7205,9 @@ var CamoufoxPage = class extends EventEmitter11 {
|
|
|
7133
7205
|
logdbg(`##browser LsdPage ${this.#pageId} is allocated`);
|
|
7134
7206
|
return true;
|
|
7135
7207
|
}
|
|
7208
|
+
userAgent() {
|
|
7209
|
+
return this.#lsdBrowserContext.userAgent();
|
|
7210
|
+
}
|
|
7136
7211
|
async waitForElement(selector, options = {}) {
|
|
7137
7212
|
if (!this.#page) {
|
|
7138
7213
|
throw new Error("No valid page");
|
|
@@ -7247,6 +7322,7 @@ var CamoufoxBrowserContext = class extends EventEmitter12 {
|
|
|
7247
7322
|
#browserContextIdx;
|
|
7248
7323
|
#browserContext;
|
|
7249
7324
|
#browserContextCreationMethod;
|
|
7325
|
+
#userAgent;
|
|
7250
7326
|
#apiContext;
|
|
7251
7327
|
#createTime;
|
|
7252
7328
|
#lastStatusUpdateTime;
|
|
@@ -7274,8 +7350,15 @@ var CamoufoxBrowserContext = class extends EventEmitter12 {
|
|
|
7274
7350
|
await lsdPage.maximizeViewport();
|
|
7275
7351
|
}
|
|
7276
7352
|
}
|
|
7353
|
+
if (!this.#userAgent && this.#lsdPages.length > 0) {
|
|
7354
|
+
const lsdPage = this.#lsdPages[0];
|
|
7355
|
+
const userAgent = await lsdPage.evaluate(() => navigator.userAgent);
|
|
7356
|
+
if (typeof userAgent === "string" && userAgent) {
|
|
7357
|
+
this.#userAgent = userAgent;
|
|
7358
|
+
}
|
|
7359
|
+
}
|
|
7277
7360
|
}
|
|
7278
|
-
constructor(lsdBrowser, browserContext, browserContextCreationMethod, incognito = false, proxy = null, browserIdx = 0, browserContextIdx = 0, maxPagesPerBrowserContext = 20, maxPageFreeSeconds = 0, maxViewportOfNewPage = true) {
|
|
7361
|
+
constructor(lsdBrowser, browserContext, browserContextCreationMethod, incognito = false, proxy = null, browserIdx = 0, browserContextIdx = 0, maxPagesPerBrowserContext = 20, maxPageFreeSeconds = 0, userAgent = "", maxViewportOfNewPage = true) {
|
|
7279
7362
|
if (!lsdBrowser || typeof lsdBrowser.browserContexts !== "function") {
|
|
7280
7363
|
throw new Error(`Invalid lsdBrowser parameter`);
|
|
7281
7364
|
}
|
|
@@ -7288,6 +7371,7 @@ var CamoufoxBrowserContext = class extends EventEmitter12 {
|
|
|
7288
7371
|
this.#browserContextIdx = browserContextIdx;
|
|
7289
7372
|
this.#browserContext = browserContext;
|
|
7290
7373
|
this.#browserContextCreationMethod = browserContextCreationMethod;
|
|
7374
|
+
this.#userAgent = userAgent;
|
|
7291
7375
|
const apiRequestContext = browserContext.request;
|
|
7292
7376
|
this.#apiContext = new CamoufoxApiContext(apiRequestContext);
|
|
7293
7377
|
const currentTime = getCurrentUnixTime11();
|
|
@@ -7441,6 +7525,12 @@ var CamoufoxBrowserContext = class extends EventEmitter12 {
|
|
|
7441
7525
|
}
|
|
7442
7526
|
lsdPage = this.#lsdPages.find((p) => p.isFree());
|
|
7443
7527
|
if (lsdPage) {
|
|
7528
|
+
if (!this.#userAgent) {
|
|
7529
|
+
const userAgent = await lsdPage.evaluate(() => navigator.userAgent);
|
|
7530
|
+
if (typeof userAgent === "string" && userAgent) {
|
|
7531
|
+
this.#userAgent = userAgent;
|
|
7532
|
+
}
|
|
7533
|
+
}
|
|
7444
7534
|
lsdPage.use();
|
|
7445
7535
|
this.#freeGettingLock();
|
|
7446
7536
|
return lsdPage;
|
|
@@ -7547,6 +7637,9 @@ var CamoufoxBrowserContext = class extends EventEmitter12 {
|
|
|
7547
7637
|
return false;
|
|
7548
7638
|
}
|
|
7549
7639
|
}
|
|
7640
|
+
userAgent() {
|
|
7641
|
+
return this.#userAgent;
|
|
7642
|
+
}
|
|
7550
7643
|
_origBrowserContext() {
|
|
7551
7644
|
return this.#browserContext;
|
|
7552
7645
|
}
|
|
@@ -7616,7 +7709,7 @@ var CamoufoxBrowser = class _CamoufoxBrowser extends EventEmitter13 {
|
|
|
7616
7709
|
}
|
|
7617
7710
|
const incognito = typeof options?.incognito === "boolean" ? options.incognito : true;
|
|
7618
7711
|
for (const browserContext of browserContexts) {
|
|
7619
|
-
const lsdBrowserContext = new CamoufoxBrowserContext(this, browserContext, "launch", incognito, this.#proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), maxViewportOfNewPage);
|
|
7712
|
+
const lsdBrowserContext = new CamoufoxBrowserContext(this, browserContext, "launch", incognito, this.#proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), "", maxViewportOfNewPage);
|
|
7620
7713
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
7621
7714
|
}
|
|
7622
7715
|
browser.on("disconnected" /* BROWSER_DISCONNECTED */, () => {
|
|
@@ -7668,12 +7761,13 @@ var CamoufoxBrowser = class _CamoufoxBrowser extends EventEmitter13 {
|
|
|
7668
7761
|
const { proxyUrl: server, username, password } = proxy;
|
|
7669
7762
|
browserContextOptions.proxy = { server, username, password };
|
|
7670
7763
|
}
|
|
7671
|
-
|
|
7672
|
-
|
|
7764
|
+
let userAgent = options?.userAgent ? options.userAgent : this.#options.userAgent;
|
|
7765
|
+
if (userAgent) {
|
|
7766
|
+
browserContextOptions.userAgent = userAgent;
|
|
7673
7767
|
}
|
|
7674
7768
|
const browserContext = await this.#browser.newContext(browserContextOptions);
|
|
7675
7769
|
const { maxViewportOfNewPage = this.#options.maxViewportOfNewPage } = options ? options : {};
|
|
7676
|
-
const lsdBrowserContext = new CamoufoxBrowserContext(this, browserContext, "new", true, proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), maxViewportOfNewPage);
|
|
7770
|
+
const lsdBrowserContext = new CamoufoxBrowserContext(this, browserContext, "new", true, proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), userAgent, maxViewportOfNewPage);
|
|
7677
7771
|
this.#lsdBrowserContexts.push(lsdBrowserContext);
|
|
7678
7772
|
return lsdBrowserContext;
|
|
7679
7773
|
}
|
|
@@ -7762,11 +7856,13 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
7762
7856
|
}
|
|
7763
7857
|
this.#puppeteer = puppeteer;
|
|
7764
7858
|
this.#playwrightBrowserTypes = {
|
|
7859
|
+
chrome: playwright.chromium,
|
|
7765
7860
|
chromium: playwright.chromium,
|
|
7766
7861
|
firefox: playwright.firefox,
|
|
7767
7862
|
webkit: playwright.webkit
|
|
7768
7863
|
};
|
|
7769
7864
|
this.#patchrightBrowserTypes = {
|
|
7865
|
+
chrome: playwright.chromium,
|
|
7770
7866
|
chromium: patchright.chromium,
|
|
7771
7867
|
firefox: patchright.firefox,
|
|
7772
7868
|
webkit: patchright.webkit
|
|
@@ -7776,7 +7872,7 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
7776
7872
|
_LsdBrowserController.#forbidConstructor = true;
|
|
7777
7873
|
}
|
|
7778
7874
|
#playwrightBrowserType(browserType, connectFlag = false) {
|
|
7779
|
-
if (browserType === "chromium") {
|
|
7875
|
+
if (browserType === "chrome" || browserType === "chromium") {
|
|
7780
7876
|
return this.#playwrightBrowserTypes.chromium;
|
|
7781
7877
|
} else if (connectFlag) {
|
|
7782
7878
|
throw new Error(`playwright only can connect to chromium browser, not support ${browserType} browser`);
|
|
@@ -7789,7 +7885,7 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
7789
7885
|
}
|
|
7790
7886
|
}
|
|
7791
7887
|
#patchrightBrowserType(browserType, connectFlag = false) {
|
|
7792
|
-
if (browserType === "chromium") {
|
|
7888
|
+
if (browserType === "chrome" || browserType === "chromium") {
|
|
7793
7889
|
return this.#patchrightBrowserTypes.chromium;
|
|
7794
7890
|
} else if (connectFlag) {
|
|
7795
7891
|
throw new Error(`patchright only can connect to chromium browser, not support ${browserType} browser`);
|
|
@@ -7819,6 +7915,7 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
7819
7915
|
throw new Error(`BrowserControllerType ${browserControllerType} doesnot support browserType ${browserType}`);
|
|
7820
7916
|
}
|
|
7821
7917
|
switch (browserType) {
|
|
7918
|
+
case "chrome":
|
|
7822
7919
|
case "chromium":
|
|
7823
7920
|
case "firefox":
|
|
7824
7921
|
case "webkit":
|
|
@@ -7832,6 +7929,7 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
7832
7929
|
throw new Error(`BrowserControllerType ${browserControllerType} doesnot support browserType ${browserType}`);
|
|
7833
7930
|
}
|
|
7834
7931
|
switch (browserType) {
|
|
7932
|
+
case "chrome":
|
|
7835
7933
|
case "chromium":
|
|
7836
7934
|
case "firefox":
|
|
7837
7935
|
case "webkit":
|
|
@@ -7866,6 +7964,7 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
7866
7964
|
// incognito
|
|
7867
7965
|
proxyPerBrowserContext = false,
|
|
7868
7966
|
userDataDir = "",
|
|
7967
|
+
userDataBaseDir = os.tmpdir(),
|
|
7869
7968
|
userAgent = ""
|
|
7870
7969
|
} = options ? options : {};
|
|
7871
7970
|
let browserPid = 0;
|
|
@@ -7892,6 +7991,18 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
7892
7991
|
args.splice(idx, 1);
|
|
7893
7992
|
}
|
|
7894
7993
|
let lsdBrowser;
|
|
7994
|
+
if (browserType === "chrome" && ["playwright", "patchright", "puppeteer"].includes(browserControllerType)) {
|
|
7995
|
+
if (!userDataDir) {
|
|
7996
|
+
if (!userDataBaseDir) {
|
|
7997
|
+
userDataBaseDir = os.tmpdir();
|
|
7998
|
+
}
|
|
7999
|
+
userDataDir = fs.mkdtempSync(path.join(userDataBaseDir, `lsd-${browserControllerType}-chrome-`));
|
|
8000
|
+
actOptions.userDataDir = userDataDir;
|
|
8001
|
+
}
|
|
8002
|
+
if (!executablePath) {
|
|
8003
|
+
throw new Error("Invalid executablePath when to launch chrome");
|
|
8004
|
+
}
|
|
8005
|
+
}
|
|
7895
8006
|
if (browserControllerType === "playwright") {
|
|
7896
8007
|
const launchOptions = { headless, timeout };
|
|
7897
8008
|
if (executablePath) {
|
|
@@ -7906,10 +8017,14 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
7906
8017
|
} else if (proxyPerBrowserContext && browserType === "chromium" && this.#osPlatform.startsWith("win")) {
|
|
7907
8018
|
launchOptions.proxy = { server: "proxyPerBrowserContext" };
|
|
7908
8019
|
}
|
|
7909
|
-
if (browserType === "chromium") {
|
|
8020
|
+
if (browserType === "chrome" || browserType === "chromium") {
|
|
8021
|
+
if (browserType === "chrome") {
|
|
8022
|
+
launchOptions.channel = "chrome";
|
|
8023
|
+
}
|
|
7910
8024
|
if (incognito) {
|
|
7911
8025
|
args.push("--incognito");
|
|
7912
|
-
}
|
|
8026
|
+
}
|
|
8027
|
+
if (userDataDir) {
|
|
7913
8028
|
args.push(`--user-data-dir=${userDataDir}`);
|
|
7914
8029
|
}
|
|
7915
8030
|
}
|
|
@@ -7947,10 +8062,14 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
7947
8062
|
} else if (proxyPerBrowserContext && browserType === "chromium" && this.#osPlatform.startsWith("win")) {
|
|
7948
8063
|
launchOptions.proxy = { server: "proxyPerBrowserContext" };
|
|
7949
8064
|
}
|
|
7950
|
-
if (browserType === "chromium") {
|
|
8065
|
+
if (browserType === "chrome" || browserType === "chromium") {
|
|
8066
|
+
if (browserType === "chrome") {
|
|
8067
|
+
launchOptions.channel = "chrome";
|
|
8068
|
+
}
|
|
7951
8069
|
if (incognito) {
|
|
7952
8070
|
args.push("--incognito");
|
|
7953
|
-
}
|
|
8071
|
+
}
|
|
8072
|
+
if (userDataDir) {
|
|
7954
8073
|
args.push(`--user-data-dir=${userDataDir}`);
|
|
7955
8074
|
}
|
|
7956
8075
|
}
|
|
@@ -7987,10 +8106,14 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
7987
8106
|
if (!args.includes("--no-sandbox")) {
|
|
7988
8107
|
args.push("--no-sandbox");
|
|
7989
8108
|
}
|
|
7990
|
-
if (browserType === "chromium") {
|
|
8109
|
+
if (browserType === "chrome" || browserType === "chromium") {
|
|
8110
|
+
if (browserType === "chrome") {
|
|
8111
|
+
launchOptions.channel = "chrome";
|
|
8112
|
+
}
|
|
7991
8113
|
if (incognito) {
|
|
7992
8114
|
args.push("--incognito");
|
|
7993
|
-
}
|
|
8115
|
+
}
|
|
8116
|
+
if (userDataDir) {
|
|
7994
8117
|
args.push(`--user-data-dir=${userDataDir}`);
|
|
7995
8118
|
}
|
|
7996
8119
|
if (proxy?.proxyUrl && proxy.proxyUrl !== "default") {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@letsscrapedata/controller",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.71",
|
|
4
4
|
"description": "Unified browser / HTML controller interfaces that support patchright, camoufox, playwright, puppeteer and cheerio",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -44,20 +44,15 @@
|
|
|
44
44
|
"puppeteer",
|
|
45
45
|
"cheerio",
|
|
46
46
|
"crawler",
|
|
47
|
-
"scraper"
|
|
48
|
-
"apify"
|
|
47
|
+
"scraper"
|
|
49
48
|
],
|
|
50
49
|
"dependencies": {
|
|
51
|
-
"@letsscrapedata/utils": "^0.0.
|
|
50
|
+
"@letsscrapedata/utils": "^0.0.33",
|
|
52
51
|
"camoufox-js-lsd": "^0.6.5",
|
|
53
|
-
"cheerio": "^1.
|
|
54
|
-
"patchright": "^1.
|
|
55
|
-
"playwright": "^1.
|
|
56
|
-
"
|
|
57
|
-
"puppeteer": "^24.16.0",
|
|
58
|
-
"puppeteer-extra": "^3.3.6",
|
|
59
|
-
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
|
60
|
-
"utils": "^0.3.1"
|
|
52
|
+
"cheerio": "^1.1.2",
|
|
53
|
+
"patchright": "^1.56.1",
|
|
54
|
+
"playwright": "^1.56.1",
|
|
55
|
+
"puppeteer": "^24.25.0"
|
|
61
56
|
},
|
|
62
57
|
"devDependencies": {
|
|
63
58
|
"@types/cheerio": "^0.22.35",
|