@letsscrapedata/controller 0.0.71 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +40 -34
- package/dist/index.d.cts +12 -6
- package/dist/index.d.ts +12 -6
- package/dist/index.js +40 -34
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -470,8 +470,8 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
470
470
|
const url = page.url();
|
|
471
471
|
const origCookies = await browserContext.cookies(url);
|
|
472
472
|
const cookies = origCookies.map((origCookie) => {
|
|
473
|
-
const { name, value, domain, path
|
|
474
|
-
return { name, value, domain, path
|
|
473
|
+
const { name, value, domain, path, expires, httpOnly, secure, sameSite = "Lax" } = origCookie;
|
|
474
|
+
return { name, value, domain, path, expires, httpOnly, secure, sameSite };
|
|
475
475
|
});
|
|
476
476
|
return cookies;
|
|
477
477
|
}
|
|
@@ -1270,6 +1270,7 @@ var PlaywrightPage = class extends import_node_events.default {
|
|
|
1270
1270
|
requestMethod,
|
|
1271
1271
|
requestUrl,
|
|
1272
1272
|
requestData,
|
|
1273
|
+
resourceType: request.resourceType(),
|
|
1273
1274
|
responseData
|
|
1274
1275
|
});
|
|
1275
1276
|
loginfo(`##browser cache matched response: ${requestUrl}`);
|
|
@@ -1797,7 +1798,7 @@ var PlaywrightBrowserContext = class extends import_node_events2.default {
|
|
|
1797
1798
|
|
|
1798
1799
|
// src/playwright/browser.ts
|
|
1799
1800
|
var PlaywrightBrowser = class _PlaywrightBrowser extends import_node_events3.default {
|
|
1800
|
-
static #supportedBrowserTypes = ["chromium", "firefox", "webkit"];
|
|
1801
|
+
static #supportedBrowserTypes = ["chrome", "chromium", "firefox", "webkit"];
|
|
1801
1802
|
static doesSupport(browserType) {
|
|
1802
1803
|
return _PlaywrightBrowser.#supportedBrowserTypes.includes(browserType);
|
|
1803
1804
|
}
|
|
@@ -1912,6 +1913,10 @@ var PlaywrightBrowser = class _PlaywrightBrowser extends import_node_events3.def
|
|
|
1912
1913
|
browserContextOptions.proxy = { server, username, password };
|
|
1913
1914
|
}
|
|
1914
1915
|
let userAgent = options?.userAgent ? options.userAgent : this.#options.userAgent;
|
|
1916
|
+
const lsdLaunchOptions = this.#options;
|
|
1917
|
+
if (!userAgent && lsdLaunchOptions.headless && lsdLaunchOptions.headlessUserAgent) {
|
|
1918
|
+
userAgent = lsdLaunchOptions.headlessUserAgent;
|
|
1919
|
+
}
|
|
1915
1920
|
if (userAgent) {
|
|
1916
1921
|
browserContextOptions.userAgent = userAgent;
|
|
1917
1922
|
}
|
|
@@ -2324,8 +2329,8 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
2324
2329
|
}
|
|
2325
2330
|
const origCookies = await page.cookies();
|
|
2326
2331
|
const cookies = origCookies.map((origCookie) => {
|
|
2327
|
-
const { name, value, domain, path
|
|
2328
|
-
return { name, value, domain, path
|
|
2332
|
+
const { name, value, domain, path, expires, httpOnly = false, secure, sameSite = "Lax" } = origCookie;
|
|
2333
|
+
return { name, value, domain, path, expires, httpOnly, secure, sameSite };
|
|
2329
2334
|
});
|
|
2330
2335
|
return cookies;
|
|
2331
2336
|
}
|
|
@@ -3103,6 +3108,7 @@ var PuppeteerPage = class extends import_node_events4.default {
|
|
|
3103
3108
|
requestMethod,
|
|
3104
3109
|
requestUrl,
|
|
3105
3110
|
requestData,
|
|
3111
|
+
resourceType: request.resourceType(),
|
|
3106
3112
|
responseData
|
|
3107
3113
|
});
|
|
3108
3114
|
loginfo(`##browser cache matched response: ${requestUrl}`);
|
|
@@ -3474,7 +3480,7 @@ var PuppeteerBrowserContext = class extends import_node_events5.default {
|
|
|
3474
3480
|
}
|
|
3475
3481
|
const page = await this.#browserContext.newPage();
|
|
3476
3482
|
if (this.#bcUserAgent) {
|
|
3477
|
-
await page.setUserAgent(this.#bcUserAgent);
|
|
3483
|
+
await page.setUserAgent({ userAgent: this.#bcUserAgent });
|
|
3478
3484
|
}
|
|
3479
3485
|
await (0, import_utils8.sleep)(2e3);
|
|
3480
3486
|
const pageInfo = page.pageInfo;
|
|
@@ -3607,7 +3613,7 @@ var PuppeteerBrowserContext = class extends import_node_events5.default {
|
|
|
3607
3613
|
|
|
3608
3614
|
// src/puppeteer/browser.ts
|
|
3609
3615
|
var PuppeteerBrowser = class _PuppeteerBrowser extends import_node_events6.default {
|
|
3610
|
-
static #supportedBrowserTypes = ["chromium"];
|
|
3616
|
+
static #supportedBrowserTypes = ["chrome", "chromium"];
|
|
3611
3617
|
static doesSupport(browserType) {
|
|
3612
3618
|
return _PuppeteerBrowser.#supportedBrowserTypes.includes(browserType);
|
|
3613
3619
|
}
|
|
@@ -3718,6 +3724,10 @@ var PuppeteerBrowser = class _PuppeteerBrowser extends import_node_events6.defau
|
|
|
3718
3724
|
browserContextOptions.proxyServer = proxy.proxyUrl;
|
|
3719
3725
|
}
|
|
3720
3726
|
let userAgent = options?.userAgent ? options.userAgent : this.#options.userAgent;
|
|
3727
|
+
const lsdLaunchOptions = this.#options;
|
|
3728
|
+
if (!userAgent && lsdLaunchOptions.headless && lsdLaunchOptions.headlessUserAgent) {
|
|
3729
|
+
userAgent = lsdLaunchOptions.headlessUserAgent;
|
|
3730
|
+
}
|
|
3721
3731
|
const browserContext = await this.#browser.createBrowserContext(browserContextOptions);
|
|
3722
3732
|
const { maxViewportOfNewPage = this.#options.maxViewportOfNewPage } = options ? options : {};
|
|
3723
3733
|
const lsdBrowserContext = new PuppeteerBrowserContext(this, browserContext, "new", true, proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), userAgent, maxViewportOfNewPage);
|
|
@@ -4174,8 +4184,6 @@ var CheerioPage = class extends import_node_events7.default {
|
|
|
4174
4184
|
|
|
4175
4185
|
// src/controller/controller.ts
|
|
4176
4186
|
var import_node_os = __toESM(require("os"), 1);
|
|
4177
|
-
var import_node_fs = __toESM(require("fs"), 1);
|
|
4178
|
-
var import_node_path = __toESM(require("path"), 1);
|
|
4179
4187
|
var import_puppeteer = __toESM(require("puppeteer"), 1);
|
|
4180
4188
|
var import_playwright = __toESM(require("playwright"), 1);
|
|
4181
4189
|
var import_patchright = __toESM(require("patchright"), 1);
|
|
@@ -4526,8 +4534,8 @@ var PatchrightPage = class extends import_node_events8.default {
|
|
|
4526
4534
|
const url = page.url();
|
|
4527
4535
|
const origCookies = await browserContext.cookies(url);
|
|
4528
4536
|
const cookies = origCookies.map((origCookie) => {
|
|
4529
|
-
const { name, value, domain, path
|
|
4530
|
-
return { name, value, domain, path
|
|
4537
|
+
const { name, value, domain, path, expires, httpOnly, secure, sameSite = "Lax" } = origCookie;
|
|
4538
|
+
return { name, value, domain, path, expires, httpOnly, secure, sameSite };
|
|
4531
4539
|
});
|
|
4532
4540
|
return cookies;
|
|
4533
4541
|
}
|
|
@@ -5325,6 +5333,7 @@ var PatchrightPage = class extends import_node_events8.default {
|
|
|
5325
5333
|
requestMethod,
|
|
5326
5334
|
requestUrl,
|
|
5327
5335
|
requestData,
|
|
5336
|
+
resourceType: request.resourceType(),
|
|
5328
5337
|
responseData
|
|
5329
5338
|
});
|
|
5330
5339
|
loginfo(`##browser cache matched response: ${requestUrl}`);
|
|
@@ -5437,7 +5446,7 @@ var PatchrightPage = class extends import_node_events8.default {
|
|
|
5437
5446
|
}
|
|
5438
5447
|
return true;
|
|
5439
5448
|
}
|
|
5440
|
-
async windowMember(keys) {
|
|
5449
|
+
async windowMember(keys, isolated = true) {
|
|
5441
5450
|
if (!this.#page) {
|
|
5442
5451
|
throw new Error("No valid page");
|
|
5443
5452
|
}
|
|
@@ -5476,7 +5485,8 @@ var PatchrightPage = class extends import_node_events8.default {
|
|
|
5476
5485
|
return "";
|
|
5477
5486
|
}
|
|
5478
5487
|
},
|
|
5479
|
-
keys
|
|
5488
|
+
keys,
|
|
5489
|
+
!!isolated
|
|
5480
5490
|
);
|
|
5481
5491
|
return content;
|
|
5482
5492
|
}
|
|
@@ -5852,7 +5862,7 @@ var PatchrightBrowserContext = class extends import_node_events9.default {
|
|
|
5852
5862
|
|
|
5853
5863
|
// src/patchright/browser.ts
|
|
5854
5864
|
var PatchrightBrowser = class _PatchrightBrowser extends import_node_events10.default {
|
|
5855
|
-
static #supportedBrowserTypes = ["chromium", "firefox", "webkit"];
|
|
5865
|
+
static #supportedBrowserTypes = ["chrome", "chromium", "firefox", "webkit"];
|
|
5856
5866
|
static doesSupport(browserType) {
|
|
5857
5867
|
return _PatchrightBrowser.#supportedBrowserTypes.includes(browserType);
|
|
5858
5868
|
}
|
|
@@ -5967,6 +5977,10 @@ var PatchrightBrowser = class _PatchrightBrowser extends import_node_events10.de
|
|
|
5967
5977
|
browserContextOptions.proxy = { server, username, password };
|
|
5968
5978
|
}
|
|
5969
5979
|
let userAgent = options?.userAgent ? options.userAgent : this.#options.userAgent;
|
|
5980
|
+
const lsdLaunchOptions = this.#options;
|
|
5981
|
+
if (!userAgent && lsdLaunchOptions.headless && lsdLaunchOptions.headlessUserAgent) {
|
|
5982
|
+
userAgent = lsdLaunchOptions.headlessUserAgent;
|
|
5983
|
+
}
|
|
5970
5984
|
if (userAgent) {
|
|
5971
5985
|
browserContextOptions.userAgent = userAgent;
|
|
5972
5986
|
}
|
|
@@ -6400,8 +6414,8 @@ var CamoufoxPage = class extends import_node_events11.default {
|
|
|
6400
6414
|
const url = page.url();
|
|
6401
6415
|
const origCookies = await browserContext.cookies(url);
|
|
6402
6416
|
const cookies = origCookies.map((origCookie) => {
|
|
6403
|
-
const { name, value, domain, path
|
|
6404
|
-
return { name, value, domain, path
|
|
6417
|
+
const { name, value, domain, path, expires, httpOnly, secure, sameSite = "Lax" } = origCookie;
|
|
6418
|
+
return { name, value, domain, path, expires, httpOnly, secure, sameSite };
|
|
6405
6419
|
});
|
|
6406
6420
|
return cookies;
|
|
6407
6421
|
}
|
|
@@ -7171,6 +7185,7 @@ var CamoufoxPage = class extends import_node_events11.default {
|
|
|
7171
7185
|
requestMethod,
|
|
7172
7186
|
requestUrl,
|
|
7173
7187
|
requestData,
|
|
7188
|
+
resourceType: request.resourceType(),
|
|
7174
7189
|
responseData
|
|
7175
7190
|
});
|
|
7176
7191
|
loginfo(`##browser cache matched response: ${requestUrl}`);
|
|
@@ -7813,6 +7828,10 @@ var CamoufoxBrowser = class _CamoufoxBrowser extends import_node_events13.defaul
|
|
|
7813
7828
|
browserContextOptions.proxy = { server, username, password };
|
|
7814
7829
|
}
|
|
7815
7830
|
let userAgent = options?.userAgent ? options.userAgent : this.#options.userAgent;
|
|
7831
|
+
const lsdLaunchOptions = this.#options;
|
|
7832
|
+
if (!userAgent && lsdLaunchOptions.headless && lsdLaunchOptions.headlessUserAgent) {
|
|
7833
|
+
userAgent = lsdLaunchOptions.headlessUserAgent;
|
|
7834
|
+
}
|
|
7816
7835
|
if (userAgent) {
|
|
7817
7836
|
browserContextOptions.userAgent = userAgent;
|
|
7818
7837
|
}
|
|
@@ -7949,7 +7968,7 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
7949
7968
|
}
|
|
7950
7969
|
}
|
|
7951
7970
|
#puppeteerProduct(browserType) {
|
|
7952
|
-
if (browserType === "chromium") {
|
|
7971
|
+
if (browserType === "chrome" || browserType === "chromium") {
|
|
7953
7972
|
return "chrome";
|
|
7954
7973
|
} else {
|
|
7955
7974
|
throw new Error(`Invalid puppeteer product ${browserType}`);
|
|
@@ -8015,12 +8034,12 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
8015
8034
|
// incognito
|
|
8016
8035
|
proxyPerBrowserContext = false,
|
|
8017
8036
|
userDataDir = "",
|
|
8018
|
-
|
|
8019
|
-
|
|
8037
|
+
userAgent = "",
|
|
8038
|
+
headlessUserAgent = ""
|
|
8020
8039
|
} = options ? options : {};
|
|
8021
8040
|
let browserPid = 0;
|
|
8022
8041
|
const incognito = typeof options?.incognito === "boolean" ? options.incognito : browserControllerType === "puppeteer" ? false : true;
|
|
8023
|
-
const actOptions = { closeFreePagesIntervalSeconds, maxBrowserContextsPerBrowser, maxPagesPerBrowserContext, maxPageFreeSeconds, maxViewportOfNewPage, proxy, timeout, args, executablePath, maxWindowSize, headless, minBrowserContexts, incognito, proxyPerBrowserContext, userDataDir, userAgent };
|
|
8042
|
+
const actOptions = { closeFreePagesIntervalSeconds, maxBrowserContextsPerBrowser, maxPagesPerBrowserContext, maxPageFreeSeconds, maxViewportOfNewPage, proxy, timeout, args, executablePath, maxWindowSize, headless, minBrowserContexts, incognito, proxyPerBrowserContext, userDataDir, userAgent, headlessUserAgent };
|
|
8024
8043
|
let idx = args.findIndex((arg) => arg.toLowerCase().startsWith("--incoginto"));
|
|
8025
8044
|
if (idx >= 0) {
|
|
8026
8045
|
logwarn(`##browser controller Please use options.incognito instead when launching new browser.`);
|
|
@@ -8043,13 +8062,6 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
8043
8062
|
}
|
|
8044
8063
|
let lsdBrowser;
|
|
8045
8064
|
if (browserType === "chrome" && ["playwright", "patchright", "puppeteer"].includes(browserControllerType)) {
|
|
8046
|
-
if (!userDataDir) {
|
|
8047
|
-
if (!userDataBaseDir) {
|
|
8048
|
-
userDataBaseDir = import_node_os.default.tmpdir();
|
|
8049
|
-
}
|
|
8050
|
-
userDataDir = import_node_fs.default.mkdtempSync(import_node_path.default.join(userDataBaseDir, `lsd-${browserControllerType}-chrome-`));
|
|
8051
|
-
actOptions.userDataDir = userDataDir;
|
|
8052
|
-
}
|
|
8053
8065
|
if (!executablePath) {
|
|
8054
8066
|
throw new Error("Invalid executablePath when to launch chrome");
|
|
8055
8067
|
}
|
|
@@ -8075,9 +8087,6 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
8075
8087
|
if (incognito) {
|
|
8076
8088
|
args.push("--incognito");
|
|
8077
8089
|
}
|
|
8078
|
-
if (userDataDir) {
|
|
8079
|
-
args.push(`--user-data-dir=${userDataDir}`);
|
|
8080
|
-
}
|
|
8081
8090
|
}
|
|
8082
8091
|
if (args.length > 0) {
|
|
8083
8092
|
launchOptions.args = args;
|
|
@@ -8120,9 +8129,6 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
8120
8129
|
if (incognito) {
|
|
8121
8130
|
args.push("--incognito");
|
|
8122
8131
|
}
|
|
8123
|
-
if (userDataDir) {
|
|
8124
|
-
args.push(`--user-data-dir=${userDataDir}`);
|
|
8125
|
-
}
|
|
8126
8132
|
}
|
|
8127
8133
|
if (args.length > 0) {
|
|
8128
8134
|
launchOptions.args = args;
|
|
@@ -8165,7 +8171,7 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
8165
8171
|
args.push("--incognito");
|
|
8166
8172
|
}
|
|
8167
8173
|
if (userDataDir) {
|
|
8168
|
-
|
|
8174
|
+
launchOptions.userDataDir = userDataDir;
|
|
8169
8175
|
}
|
|
8170
8176
|
if (proxy?.proxyUrl && proxy.proxyUrl !== "default") {
|
|
8171
8177
|
args.push(`--proxy-server=${proxy.proxyUrl}`);
|
package/dist/index.d.cts
CHANGED
|
@@ -213,13 +213,14 @@ interface LsdLaunchOptions extends BrowserOptions {
|
|
|
213
213
|
* * puppeteer creates a default/new browserContext during launching browser
|
|
214
214
|
*/
|
|
215
215
|
userAgent?: string;
|
|
216
|
-
userDataDir?: string;
|
|
217
216
|
/**
|
|
218
|
-
* only valid when
|
|
219
|
-
|
|
220
|
-
|
|
217
|
+
* only valid when !userAgent && headerless && !headlessUserAgent
|
|
218
|
+
*/
|
|
219
|
+
headlessUserAgent?: string;
|
|
220
|
+
/**
|
|
221
|
+
* if userDataDir is not "" (or undefined) and browserControllerType is "playwright" / "patchwright", then launch
|
|
221
222
|
*/
|
|
222
|
-
|
|
223
|
+
userDataDir?: string;
|
|
223
224
|
/**
|
|
224
225
|
* @default "launch"
|
|
225
226
|
*/
|
|
@@ -703,6 +704,10 @@ interface ResponseInterceptionItem {
|
|
|
703
704
|
* request.postData()
|
|
704
705
|
*/
|
|
705
706
|
requestData: string;
|
|
707
|
+
/**
|
|
708
|
+
* request.resourceType()
|
|
709
|
+
*/
|
|
710
|
+
resourceType: string;
|
|
706
711
|
/**
|
|
707
712
|
* response.text()
|
|
708
713
|
*/
|
|
@@ -1172,8 +1177,9 @@ interface LsdPage extends EventEmitter {
|
|
|
1172
1177
|
* obj=window?.[key1]...?.[keyn]
|
|
1173
1178
|
* @return obj ? JSON.stringify(obj) : ""
|
|
1174
1179
|
* @param keys
|
|
1180
|
+
* @param isolated default true; whether to run in isolated context; only valid for patchwright(TBD: and camoufox)
|
|
1175
1181
|
*/
|
|
1176
|
-
windowMember(keys: string[]): Promise<string>;
|
|
1182
|
+
windowMember(keys: string[], isolated?: boolean): Promise<string>;
|
|
1177
1183
|
_origPage(): AllPage;
|
|
1178
1184
|
}
|
|
1179
1185
|
interface LsdBrowserContext extends EventEmitter {
|
package/dist/index.d.ts
CHANGED
|
@@ -213,13 +213,14 @@ interface LsdLaunchOptions extends BrowserOptions {
|
|
|
213
213
|
* * puppeteer creates a default/new browserContext during launching browser
|
|
214
214
|
*/
|
|
215
215
|
userAgent?: string;
|
|
216
|
-
userDataDir?: string;
|
|
217
216
|
/**
|
|
218
|
-
* only valid when
|
|
219
|
-
|
|
220
|
-
|
|
217
|
+
* only valid when !userAgent && headerless && !headlessUserAgent
|
|
218
|
+
*/
|
|
219
|
+
headlessUserAgent?: string;
|
|
220
|
+
/**
|
|
221
|
+
* if userDataDir is not "" (or undefined) and browserControllerType is "playwright" / "patchwright", then launch
|
|
221
222
|
*/
|
|
222
|
-
|
|
223
|
+
userDataDir?: string;
|
|
223
224
|
/**
|
|
224
225
|
* @default "launch"
|
|
225
226
|
*/
|
|
@@ -703,6 +704,10 @@ interface ResponseInterceptionItem {
|
|
|
703
704
|
* request.postData()
|
|
704
705
|
*/
|
|
705
706
|
requestData: string;
|
|
707
|
+
/**
|
|
708
|
+
* request.resourceType()
|
|
709
|
+
*/
|
|
710
|
+
resourceType: string;
|
|
706
711
|
/**
|
|
707
712
|
* response.text()
|
|
708
713
|
*/
|
|
@@ -1172,8 +1177,9 @@ interface LsdPage extends EventEmitter {
|
|
|
1172
1177
|
* obj=window?.[key1]...?.[keyn]
|
|
1173
1178
|
* @return obj ? JSON.stringify(obj) : ""
|
|
1174
1179
|
* @param keys
|
|
1180
|
+
* @param isolated default true; whether to run in isolated context; only valid for patchwright(TBD: and camoufox)
|
|
1175
1181
|
*/
|
|
1176
|
-
windowMember(keys: string[]): Promise<string>;
|
|
1182
|
+
windowMember(keys: string[], isolated?: boolean): Promise<string>;
|
|
1177
1183
|
_origPage(): AllPage;
|
|
1178
1184
|
}
|
|
1179
1185
|
interface LsdBrowserContext extends EventEmitter {
|
package/dist/index.js
CHANGED
|
@@ -419,8 +419,8 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
419
419
|
const url = page.url();
|
|
420
420
|
const origCookies = await browserContext.cookies(url);
|
|
421
421
|
const cookies = origCookies.map((origCookie) => {
|
|
422
|
-
const { name, value, domain, path
|
|
423
|
-
return { name, value, domain, path
|
|
422
|
+
const { name, value, domain, path, expires, httpOnly, secure, sameSite = "Lax" } = origCookie;
|
|
423
|
+
return { name, value, domain, path, expires, httpOnly, secure, sameSite };
|
|
424
424
|
});
|
|
425
425
|
return cookies;
|
|
426
426
|
}
|
|
@@ -1219,6 +1219,7 @@ var PlaywrightPage = class extends EventEmitter {
|
|
|
1219
1219
|
requestMethod,
|
|
1220
1220
|
requestUrl,
|
|
1221
1221
|
requestData,
|
|
1222
|
+
resourceType: request.resourceType(),
|
|
1222
1223
|
responseData
|
|
1223
1224
|
});
|
|
1224
1225
|
loginfo(`##browser cache matched response: ${requestUrl}`);
|
|
@@ -1746,7 +1747,7 @@ var PlaywrightBrowserContext = class extends EventEmitter2 {
|
|
|
1746
1747
|
|
|
1747
1748
|
// src/playwright/browser.ts
|
|
1748
1749
|
var PlaywrightBrowser = class _PlaywrightBrowser extends EventEmitter3 {
|
|
1749
|
-
static #supportedBrowserTypes = ["chromium", "firefox", "webkit"];
|
|
1750
|
+
static #supportedBrowserTypes = ["chrome", "chromium", "firefox", "webkit"];
|
|
1750
1751
|
static doesSupport(browserType) {
|
|
1751
1752
|
return _PlaywrightBrowser.#supportedBrowserTypes.includes(browserType);
|
|
1752
1753
|
}
|
|
@@ -1861,6 +1862,10 @@ var PlaywrightBrowser = class _PlaywrightBrowser extends EventEmitter3 {
|
|
|
1861
1862
|
browserContextOptions.proxy = { server, username, password };
|
|
1862
1863
|
}
|
|
1863
1864
|
let userAgent = options?.userAgent ? options.userAgent : this.#options.userAgent;
|
|
1865
|
+
const lsdLaunchOptions = this.#options;
|
|
1866
|
+
if (!userAgent && lsdLaunchOptions.headless && lsdLaunchOptions.headlessUserAgent) {
|
|
1867
|
+
userAgent = lsdLaunchOptions.headlessUserAgent;
|
|
1868
|
+
}
|
|
1864
1869
|
if (userAgent) {
|
|
1865
1870
|
browserContextOptions.userAgent = userAgent;
|
|
1866
1871
|
}
|
|
@@ -2273,8 +2278,8 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
2273
2278
|
}
|
|
2274
2279
|
const origCookies = await page.cookies();
|
|
2275
2280
|
const cookies = origCookies.map((origCookie) => {
|
|
2276
|
-
const { name, value, domain, path
|
|
2277
|
-
return { name, value, domain, path
|
|
2281
|
+
const { name, value, domain, path, expires, httpOnly = false, secure, sameSite = "Lax" } = origCookie;
|
|
2282
|
+
return { name, value, domain, path, expires, httpOnly, secure, sameSite };
|
|
2278
2283
|
});
|
|
2279
2284
|
return cookies;
|
|
2280
2285
|
}
|
|
@@ -3052,6 +3057,7 @@ var PuppeteerPage = class extends EventEmitter4 {
|
|
|
3052
3057
|
requestMethod,
|
|
3053
3058
|
requestUrl,
|
|
3054
3059
|
requestData,
|
|
3060
|
+
resourceType: request.resourceType(),
|
|
3055
3061
|
responseData
|
|
3056
3062
|
});
|
|
3057
3063
|
loginfo(`##browser cache matched response: ${requestUrl}`);
|
|
@@ -3423,7 +3429,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
3423
3429
|
}
|
|
3424
3430
|
const page = await this.#browserContext.newPage();
|
|
3425
3431
|
if (this.#bcUserAgent) {
|
|
3426
|
-
await page.setUserAgent(this.#bcUserAgent);
|
|
3432
|
+
await page.setUserAgent({ userAgent: this.#bcUserAgent });
|
|
3427
3433
|
}
|
|
3428
3434
|
await sleep2(2e3);
|
|
3429
3435
|
const pageInfo = page.pageInfo;
|
|
@@ -3556,7 +3562,7 @@ var PuppeteerBrowserContext = class extends EventEmitter5 {
|
|
|
3556
3562
|
|
|
3557
3563
|
// src/puppeteer/browser.ts
|
|
3558
3564
|
var PuppeteerBrowser = class _PuppeteerBrowser extends EventEmitter6 {
|
|
3559
|
-
static #supportedBrowserTypes = ["chromium"];
|
|
3565
|
+
static #supportedBrowserTypes = ["chrome", "chromium"];
|
|
3560
3566
|
static doesSupport(browserType) {
|
|
3561
3567
|
return _PuppeteerBrowser.#supportedBrowserTypes.includes(browserType);
|
|
3562
3568
|
}
|
|
@@ -3667,6 +3673,10 @@ var PuppeteerBrowser = class _PuppeteerBrowser extends EventEmitter6 {
|
|
|
3667
3673
|
browserContextOptions.proxyServer = proxy.proxyUrl;
|
|
3668
3674
|
}
|
|
3669
3675
|
let userAgent = options?.userAgent ? options.userAgent : this.#options.userAgent;
|
|
3676
|
+
const lsdLaunchOptions = this.#options;
|
|
3677
|
+
if (!userAgent && lsdLaunchOptions.headless && lsdLaunchOptions.headlessUserAgent) {
|
|
3678
|
+
userAgent = lsdLaunchOptions.headlessUserAgent;
|
|
3679
|
+
}
|
|
3670
3680
|
const browserContext = await this.#browser.createBrowserContext(browserContextOptions);
|
|
3671
3681
|
const { maxViewportOfNewPage = this.#options.maxViewportOfNewPage } = options ? options : {};
|
|
3672
3682
|
const lsdBrowserContext = new PuppeteerBrowserContext(this, browserContext, "new", true, proxy, this.#browserIdx, this.#nextBrowserContextIdx++, this.#maxPagesPerBrowserContext(), this.#maxPageFreeSeconds(), userAgent, maxViewportOfNewPage);
|
|
@@ -4123,8 +4133,6 @@ var CheerioPage = class extends EventEmitter7 {
|
|
|
4123
4133
|
|
|
4124
4134
|
// src/controller/controller.ts
|
|
4125
4135
|
import os from "os";
|
|
4126
|
-
import fs from "fs";
|
|
4127
|
-
import path from "path";
|
|
4128
4136
|
import puppeteer from "puppeteer";
|
|
4129
4137
|
import playwright, { request as apiRequestInPlaywright } from "playwright";
|
|
4130
4138
|
import patchright from "patchright";
|
|
@@ -4475,8 +4483,8 @@ var PatchrightPage = class extends EventEmitter8 {
|
|
|
4475
4483
|
const url = page.url();
|
|
4476
4484
|
const origCookies = await browserContext.cookies(url);
|
|
4477
4485
|
const cookies = origCookies.map((origCookie) => {
|
|
4478
|
-
const { name, value, domain, path
|
|
4479
|
-
return { name, value, domain, path
|
|
4486
|
+
const { name, value, domain, path, expires, httpOnly, secure, sameSite = "Lax" } = origCookie;
|
|
4487
|
+
return { name, value, domain, path, expires, httpOnly, secure, sameSite };
|
|
4480
4488
|
});
|
|
4481
4489
|
return cookies;
|
|
4482
4490
|
}
|
|
@@ -5274,6 +5282,7 @@ var PatchrightPage = class extends EventEmitter8 {
|
|
|
5274
5282
|
requestMethod,
|
|
5275
5283
|
requestUrl,
|
|
5276
5284
|
requestData,
|
|
5285
|
+
resourceType: request.resourceType(),
|
|
5277
5286
|
responseData
|
|
5278
5287
|
});
|
|
5279
5288
|
loginfo(`##browser cache matched response: ${requestUrl}`);
|
|
@@ -5386,7 +5395,7 @@ var PatchrightPage = class extends EventEmitter8 {
|
|
|
5386
5395
|
}
|
|
5387
5396
|
return true;
|
|
5388
5397
|
}
|
|
5389
|
-
async windowMember(keys) {
|
|
5398
|
+
async windowMember(keys, isolated = true) {
|
|
5390
5399
|
if (!this.#page) {
|
|
5391
5400
|
throw new Error("No valid page");
|
|
5392
5401
|
}
|
|
@@ -5425,7 +5434,8 @@ var PatchrightPage = class extends EventEmitter8 {
|
|
|
5425
5434
|
return "";
|
|
5426
5435
|
}
|
|
5427
5436
|
},
|
|
5428
|
-
keys
|
|
5437
|
+
keys,
|
|
5438
|
+
!!isolated
|
|
5429
5439
|
);
|
|
5430
5440
|
return content;
|
|
5431
5441
|
}
|
|
@@ -5801,7 +5811,7 @@ var PatchrightBrowserContext = class extends EventEmitter9 {
|
|
|
5801
5811
|
|
|
5802
5812
|
// src/patchright/browser.ts
|
|
5803
5813
|
var PatchrightBrowser = class _PatchrightBrowser extends EventEmitter10 {
|
|
5804
|
-
static #supportedBrowserTypes = ["chromium", "firefox", "webkit"];
|
|
5814
|
+
static #supportedBrowserTypes = ["chrome", "chromium", "firefox", "webkit"];
|
|
5805
5815
|
static doesSupport(browserType) {
|
|
5806
5816
|
return _PatchrightBrowser.#supportedBrowserTypes.includes(browserType);
|
|
5807
5817
|
}
|
|
@@ -5916,6 +5926,10 @@ var PatchrightBrowser = class _PatchrightBrowser extends EventEmitter10 {
|
|
|
5916
5926
|
browserContextOptions.proxy = { server, username, password };
|
|
5917
5927
|
}
|
|
5918
5928
|
let userAgent = options?.userAgent ? options.userAgent : this.#options.userAgent;
|
|
5929
|
+
const lsdLaunchOptions = this.#options;
|
|
5930
|
+
if (!userAgent && lsdLaunchOptions.headless && lsdLaunchOptions.headlessUserAgent) {
|
|
5931
|
+
userAgent = lsdLaunchOptions.headlessUserAgent;
|
|
5932
|
+
}
|
|
5919
5933
|
if (userAgent) {
|
|
5920
5934
|
browserContextOptions.userAgent = userAgent;
|
|
5921
5935
|
}
|
|
@@ -6349,8 +6363,8 @@ var CamoufoxPage = class extends EventEmitter11 {
|
|
|
6349
6363
|
const url = page.url();
|
|
6350
6364
|
const origCookies = await browserContext.cookies(url);
|
|
6351
6365
|
const cookies = origCookies.map((origCookie) => {
|
|
6352
|
-
const { name, value, domain, path
|
|
6353
|
-
return { name, value, domain, path
|
|
6366
|
+
const { name, value, domain, path, expires, httpOnly, secure, sameSite = "Lax" } = origCookie;
|
|
6367
|
+
return { name, value, domain, path, expires, httpOnly, secure, sameSite };
|
|
6354
6368
|
});
|
|
6355
6369
|
return cookies;
|
|
6356
6370
|
}
|
|
@@ -7120,6 +7134,7 @@ var CamoufoxPage = class extends EventEmitter11 {
|
|
|
7120
7134
|
requestMethod,
|
|
7121
7135
|
requestUrl,
|
|
7122
7136
|
requestData,
|
|
7137
|
+
resourceType: request.resourceType(),
|
|
7123
7138
|
responseData
|
|
7124
7139
|
});
|
|
7125
7140
|
loginfo(`##browser cache matched response: ${requestUrl}`);
|
|
@@ -7762,6 +7777,10 @@ var CamoufoxBrowser = class _CamoufoxBrowser extends EventEmitter13 {
|
|
|
7762
7777
|
browserContextOptions.proxy = { server, username, password };
|
|
7763
7778
|
}
|
|
7764
7779
|
let userAgent = options?.userAgent ? options.userAgent : this.#options.userAgent;
|
|
7780
|
+
const lsdLaunchOptions = this.#options;
|
|
7781
|
+
if (!userAgent && lsdLaunchOptions.headless && lsdLaunchOptions.headlessUserAgent) {
|
|
7782
|
+
userAgent = lsdLaunchOptions.headlessUserAgent;
|
|
7783
|
+
}
|
|
7765
7784
|
if (userAgent) {
|
|
7766
7785
|
browserContextOptions.userAgent = userAgent;
|
|
7767
7786
|
}
|
|
@@ -7898,7 +7917,7 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
7898
7917
|
}
|
|
7899
7918
|
}
|
|
7900
7919
|
#puppeteerProduct(browserType) {
|
|
7901
|
-
if (browserType === "chromium") {
|
|
7920
|
+
if (browserType === "chrome" || browserType === "chromium") {
|
|
7902
7921
|
return "chrome";
|
|
7903
7922
|
} else {
|
|
7904
7923
|
throw new Error(`Invalid puppeteer product ${browserType}`);
|
|
@@ -7964,12 +7983,12 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
7964
7983
|
// incognito
|
|
7965
7984
|
proxyPerBrowserContext = false,
|
|
7966
7985
|
userDataDir = "",
|
|
7967
|
-
|
|
7968
|
-
|
|
7986
|
+
userAgent = "",
|
|
7987
|
+
headlessUserAgent = ""
|
|
7969
7988
|
} = options ? options : {};
|
|
7970
7989
|
let browserPid = 0;
|
|
7971
7990
|
const incognito = typeof options?.incognito === "boolean" ? options.incognito : browserControllerType === "puppeteer" ? false : true;
|
|
7972
|
-
const actOptions = { closeFreePagesIntervalSeconds, maxBrowserContextsPerBrowser, maxPagesPerBrowserContext, maxPageFreeSeconds, maxViewportOfNewPage, proxy, timeout, args, executablePath, maxWindowSize, headless, minBrowserContexts, incognito, proxyPerBrowserContext, userDataDir, userAgent };
|
|
7991
|
+
const actOptions = { closeFreePagesIntervalSeconds, maxBrowserContextsPerBrowser, maxPagesPerBrowserContext, maxPageFreeSeconds, maxViewportOfNewPage, proxy, timeout, args, executablePath, maxWindowSize, headless, minBrowserContexts, incognito, proxyPerBrowserContext, userDataDir, userAgent, headlessUserAgent };
|
|
7973
7992
|
let idx = args.findIndex((arg) => arg.toLowerCase().startsWith("--incoginto"));
|
|
7974
7993
|
if (idx >= 0) {
|
|
7975
7994
|
logwarn(`##browser controller Please use options.incognito instead when launching new browser.`);
|
|
@@ -7992,13 +8011,6 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
7992
8011
|
}
|
|
7993
8012
|
let lsdBrowser;
|
|
7994
8013
|
if (browserType === "chrome" && ["playwright", "patchright", "puppeteer"].includes(browserControllerType)) {
|
|
7995
|
-
if (!userDataDir) {
|
|
7996
|
-
if (!userDataBaseDir) {
|
|
7997
|
-
userDataBaseDir = os.tmpdir();
|
|
7998
|
-
}
|
|
7999
|
-
userDataDir = fs.mkdtempSync(path.join(userDataBaseDir, `lsd-${browserControllerType}-chrome-`));
|
|
8000
|
-
actOptions.userDataDir = userDataDir;
|
|
8001
|
-
}
|
|
8002
8014
|
if (!executablePath) {
|
|
8003
8015
|
throw new Error("Invalid executablePath when to launch chrome");
|
|
8004
8016
|
}
|
|
@@ -8024,9 +8036,6 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
8024
8036
|
if (incognito) {
|
|
8025
8037
|
args.push("--incognito");
|
|
8026
8038
|
}
|
|
8027
|
-
if (userDataDir) {
|
|
8028
|
-
args.push(`--user-data-dir=${userDataDir}`);
|
|
8029
|
-
}
|
|
8030
8039
|
}
|
|
8031
8040
|
if (args.length > 0) {
|
|
8032
8041
|
launchOptions.args = args;
|
|
@@ -8069,9 +8078,6 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
8069
8078
|
if (incognito) {
|
|
8070
8079
|
args.push("--incognito");
|
|
8071
8080
|
}
|
|
8072
|
-
if (userDataDir) {
|
|
8073
|
-
args.push(`--user-data-dir=${userDataDir}`);
|
|
8074
|
-
}
|
|
8075
8081
|
}
|
|
8076
8082
|
if (args.length > 0) {
|
|
8077
8083
|
launchOptions.args = args;
|
|
@@ -8114,7 +8120,7 @@ var LsdBrowserController = class _LsdBrowserController {
|
|
|
8114
8120
|
args.push("--incognito");
|
|
8115
8121
|
}
|
|
8116
8122
|
if (userDataDir) {
|
|
8117
|
-
|
|
8123
|
+
launchOptions.userDataDir = userDataDir;
|
|
8118
8124
|
}
|
|
8119
8125
|
if (proxy?.proxyUrl && proxy.proxyUrl !== "default") {
|
|
8120
8126
|
args.push(`--proxy-server=${proxy.proxyUrl}`);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@letsscrapedata/controller",
|
|
3
|
-
"version": "0.0
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "Unified browser / HTML controller interfaces that support patchright, camoufox, playwright, puppeteer and cheerio",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|