@skrillex1224/playwright-toolkit 2.1.9 → 2.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +87 -49
- package/dist/index.cjs.map +2 -2
- package/dist/index.js +87 -49
- package/dist/index.js.map +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1206,6 +1206,14 @@ import { gotScraping } from "got-scraping";
|
|
|
1206
1206
|
import { Agent as HttpAgent } from "http";
|
|
1207
1207
|
import { Agent as HttpsAgent } from "https";
|
|
1208
1208
|
var logger9 = createLogger("Interception");
|
|
1209
|
+
var SHARED_HTTP_AGENT = new HttpAgent({ keepAlive: false });
|
|
1210
|
+
var SHARED_HTTPS_AGENT = new HttpsAgent({ keepAlive: false, rejectUnauthorized: false });
|
|
1211
|
+
var DirectConfig = {
|
|
1212
|
+
/** 直连请求超时时间(秒) */
|
|
1213
|
+
directTimeout: 12,
|
|
1214
|
+
/** 静默扩展名:这些扩展名的直连成功日志用 debug 级别 */
|
|
1215
|
+
silentExtensions: [".js"]
|
|
1216
|
+
};
|
|
1209
1217
|
var ARCHIVE_EXTENSIONS = [".7z", ".zip", ".rar", ".gz", ".bz2", ".tar", ".zst"];
|
|
1210
1218
|
var EXECUTABLE_EXTENSIONS = [".exe", ".apk", ".bin", ".dmg", ".jar", ".class"];
|
|
1211
1219
|
var DOCUMENT_EXTENSIONS = [".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", ".pdf", ".csv"];
|
|
@@ -1252,6 +1260,14 @@ var DEFAULT_BLOCKING_CONFIG = {
|
|
|
1252
1260
|
/** 额外自定义扩展名列表 */
|
|
1253
1261
|
customExtensions: []
|
|
1254
1262
|
};
|
|
1263
|
+
var SHARED_GOT_OPTIONS = {
|
|
1264
|
+
http2: false,
|
|
1265
|
+
// 禁用 HTTP2 避免在拦截场景下的握手兼容性问题
|
|
1266
|
+
retry: { limit: 0 },
|
|
1267
|
+
// 让 Playwright 或外层逻辑处理重试
|
|
1268
|
+
throwHttpErrors: false
|
|
1269
|
+
// 404/500 等错误不抛出异常,直接透传给浏览器
|
|
1270
|
+
};
|
|
1255
1271
|
var Interception = {
|
|
1256
1272
|
/**
|
|
1257
1273
|
* 根据配置生成需要屏蔽的扩展名列表
|
|
@@ -1295,16 +1311,6 @@ var Interception = {
|
|
|
1295
1311
|
/**
|
|
1296
1312
|
* 设置网络拦截规则(资源屏蔽 + CDN 直连)
|
|
1297
1313
|
*
|
|
1298
|
-
* 工作流程:
|
|
1299
|
-
* 1. 检查请求是否在屏蔽列表中 → 如果是,直接 abort
|
|
1300
|
-
* 2. 检查是否匹配直连域名 → 如果是,使用 Node.js fetch 直连
|
|
1301
|
-
* 3. 其他请求正常走代理
|
|
1302
|
-
*
|
|
1303
|
-
* 适用场景:
|
|
1304
|
-
* - 代理 IP 无法访问某些 CDN 域名
|
|
1305
|
-
* - 需要加速静态资源加载
|
|
1306
|
-
* - 屏蔽不必要的资源请求
|
|
1307
|
-
*
|
|
1308
1314
|
* @param {import('playwright').Page} page - Playwright Page 对象
|
|
1309
1315
|
* @param {Object} [options] - 配置选项
|
|
1310
1316
|
* @param {string[]} [options.directDomains] - 需要直连的域名列表
|
|
@@ -1317,10 +1323,10 @@ var Interception = {
|
|
|
1317
1323
|
directDomains = [],
|
|
1318
1324
|
blockingConfig = {},
|
|
1319
1325
|
fallbackToProxy = true
|
|
1320
|
-
// 默认回退到代理,保证可用性
|
|
1321
1326
|
} = options;
|
|
1322
1327
|
const mergedBlockingConfig = { ...DEFAULT_BLOCKING_CONFIG, ...blockingConfig };
|
|
1323
1328
|
const blockedExtensions = this.getBlockedExtensions(mergedBlockingConfig);
|
|
1329
|
+
const hasDirectDomains = directDomains.length > 0;
|
|
1324
1330
|
const enabledCategories = [];
|
|
1325
1331
|
if (mergedBlockingConfig.blockArchive) enabledCategories.push("\u538B\u7F29\u5305");
|
|
1326
1332
|
if (mergedBlockingConfig.blockExecutable) enabledCategories.push("\u53EF\u6267\u884C\u6587\u4EF6");
|
|
@@ -1330,21 +1336,22 @@ var Interception = {
|
|
|
1330
1336
|
if (mergedBlockingConfig.blockFont) enabledCategories.push("\u5B57\u4F53");
|
|
1331
1337
|
if (mergedBlockingConfig.blockCss) enabledCategories.push("CSS");
|
|
1332
1338
|
if (mergedBlockingConfig.blockOther) enabledCategories.push("\u5176\u4ED6");
|
|
1333
|
-
|
|
1334
|
-
logger9.start("setup", hasDirectDomains ? `\u76F4\u8FDE\u57DF\u540D: [${directDomains.join(", ")}]` : "\u4EC5\u8D44\u6E90\u5C4F\u853D\u6A21\u5F0F");
|
|
1339
|
+
logger9.start("setup", hasDirectDomains ? `\u76F4\u8FDE\u57DF\u540D: [${directDomains.length} \u4E2A] | \u5C4F\u853D: [${enabledCategories.join(", ")}]` : `\u4EC5\u8D44\u6E90\u5C4F\u853D\u6A21\u5F0F | \u5C4F\u853D: [${enabledCategories.join(", ")}]`);
|
|
1335
1340
|
await page.route("**/*", async (route) => {
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1341
|
+
let handled = false;
|
|
1342
|
+
try {
|
|
1343
|
+
const request = route.request();
|
|
1344
|
+
const url = request.url();
|
|
1345
|
+
const urlLower = url.toLowerCase();
|
|
1346
|
+
const urlPath = urlLower.split("?")[0];
|
|
1347
|
+
const isSilent = DirectConfig.silentExtensions.some((ext) => urlPath.endsWith(ext));
|
|
1348
|
+
const shouldBlock = blockedExtensions.some((ext) => urlPath.endsWith(ext));
|
|
1349
|
+
if (shouldBlock) {
|
|
1350
|
+
await route.abort();
|
|
1351
|
+
handled = true;
|
|
1352
|
+
return;
|
|
1353
|
+
}
|
|
1354
|
+
if (hasDirectDomains && directDomains.some((domain) => url.includes(domain))) {
|
|
1348
1355
|
try {
|
|
1349
1356
|
const reqHeaders = await request.allHeaders();
|
|
1350
1357
|
delete reqHeaders["host"];
|
|
@@ -1352,30 +1359,24 @@ var Interception = {
|
|
|
1352
1359
|
const method = request.method();
|
|
1353
1360
|
const postData = method !== "GET" && method !== "HEAD" ? request.postDataBuffer() : void 0;
|
|
1354
1361
|
const response = await gotScraping({
|
|
1362
|
+
...SHARED_GOT_OPTIONS,
|
|
1363
|
+
// 应用通用配置
|
|
1355
1364
|
url,
|
|
1356
1365
|
method,
|
|
1357
1366
|
headers: reqHeaders,
|
|
1358
1367
|
body: postData,
|
|
1359
1368
|
responseType: "buffer",
|
|
1360
|
-
//
|
|
1361
|
-
//
|
|
1362
|
-
http2: false,
|
|
1363
|
-
// 模拟浏览器指纹 (Header 部分)
|
|
1369
|
+
// 强制获取 Buffer
|
|
1370
|
+
// 模拟浏览器 TLS 指纹
|
|
1364
1371
|
headerGeneratorOptions: Stealth.getTlsFingerprintOptions(userAgent),
|
|
1365
|
-
|
|
1366
|
-
// 【修复 2】显式使用新的 Agent 实例,彻底无视 Apify 的环境变量代理
|
|
1367
|
-
// 注意:这会牺牲一部分 TLS 指纹模拟能力,但能保证网络通畅
|
|
1372
|
+
// 使用共享的 Agent 单例(keepAlive: false,不会池化连接)
|
|
1368
1373
|
agent: {
|
|
1369
|
-
http:
|
|
1370
|
-
https:
|
|
1374
|
+
http: SHARED_HTTP_AGENT,
|
|
1375
|
+
https: SHARED_HTTPS_AGENT
|
|
1371
1376
|
},
|
|
1372
|
-
//
|
|
1373
|
-
timeout: {
|
|
1374
|
-
request: 15 * 1e3
|
|
1375
|
-
// 建议缩短到 15s
|
|
1376
|
-
}
|
|
1377
|
+
// 超时时间
|
|
1378
|
+
timeout: { request: DirectConfig.directTimeout * 1e3 }
|
|
1377
1379
|
});
|
|
1378
|
-
const body = response.body;
|
|
1379
1380
|
const resHeaders = {};
|
|
1380
1381
|
for (const [key, value] of Object.entries(response.headers)) {
|
|
1381
1382
|
if (Array.isArray(value)) {
|
|
@@ -1386,29 +1387,66 @@ var Interception = {
|
|
|
1386
1387
|
}
|
|
1387
1388
|
delete resHeaders["content-encoding"];
|
|
1388
1389
|
delete resHeaders["content-length"];
|
|
1389
|
-
|
|
1390
|
-
|
|
1390
|
+
delete resHeaders["transfer-encoding"];
|
|
1391
|
+
delete resHeaders["connection"];
|
|
1392
|
+
delete resHeaders["keep-alive"];
|
|
1393
|
+
isSilent ? logger9.debug(`\u76F4\u8FDE\u6210\u529F: ${url}`) : logger9.info(`\u76F4\u8FDE\u6210\u529F: ${url}`);
|
|
1394
|
+
await safeFulfill(route, {
|
|
1391
1395
|
status: response.statusCode,
|
|
1392
1396
|
headers: resHeaders,
|
|
1393
|
-
body
|
|
1397
|
+
body: response.body
|
|
1394
1398
|
});
|
|
1399
|
+
handled = true;
|
|
1395
1400
|
return;
|
|
1396
1401
|
} catch (e) {
|
|
1402
|
+
const isTimeout = e.code === "ETIMEDOUT" || e.message.toLowerCase().includes("timeout");
|
|
1403
|
+
const action = fallbackToProxy ? "\u56DE\u9000\u4EE3\u7406" : "\u5DF2\u653E\u5F03";
|
|
1404
|
+
const reason = isTimeout ? `\u8D85\u65F6(${DirectConfig.directTimeout}s)` : `\u5F02\u5E38: ${e.message}`;
|
|
1405
|
+
logger9.warn(`\u76F4\u8FDE${reason}\uFF0C${action}: ${url}`);
|
|
1397
1406
|
if (fallbackToProxy) {
|
|
1398
|
-
|
|
1399
|
-
return route.continue();
|
|
1407
|
+
await safeContinue(route);
|
|
1400
1408
|
} else {
|
|
1401
|
-
|
|
1402
|
-
return route.abort();
|
|
1409
|
+
await route.abort();
|
|
1403
1410
|
}
|
|
1411
|
+
handled = true;
|
|
1412
|
+
return;
|
|
1413
|
+
}
|
|
1414
|
+
}
|
|
1415
|
+
await safeContinue(route);
|
|
1416
|
+
handled = true;
|
|
1417
|
+
} catch (err) {
|
|
1418
|
+
logger9.warn(`\u8DEF\u7531\u5904\u7406\u5F02\u5E38: ${err.message}`);
|
|
1419
|
+
if (!handled) {
|
|
1420
|
+
try {
|
|
1421
|
+
await route.continue();
|
|
1422
|
+
} catch (_) {
|
|
1404
1423
|
}
|
|
1405
1424
|
}
|
|
1406
1425
|
}
|
|
1407
|
-
return route.continue();
|
|
1408
1426
|
});
|
|
1409
|
-
logger9.success("setup", `\u5C4F\u853D\u5206\u7C7B: [${enabledCategories.join(", ")}]`);
|
|
1410
1427
|
}
|
|
1411
1428
|
};
|
|
1429
|
+
async function safeFulfill(route, options) {
|
|
1430
|
+
try {
|
|
1431
|
+
await route.fulfill(options);
|
|
1432
|
+
} catch (error) {
|
|
1433
|
+
if (!isIgnorableError(error)) {
|
|
1434
|
+
console.error(`[Interception] Fulfill Error: ${error.message}`);
|
|
1435
|
+
}
|
|
1436
|
+
}
|
|
1437
|
+
}
|
|
1438
|
+
async function safeContinue(route) {
|
|
1439
|
+
try {
|
|
1440
|
+
await route.continue();
|
|
1441
|
+
} catch (error) {
|
|
1442
|
+
if (!isIgnorableError(error)) {
|
|
1443
|
+
}
|
|
1444
|
+
}
|
|
1445
|
+
}
|
|
1446
|
+
function isIgnorableError(error) {
|
|
1447
|
+
const msg = error.message;
|
|
1448
|
+
return msg.includes("already handled") || msg.includes("Target closed") || msg.includes("closed");
|
|
1449
|
+
}
|
|
1412
1450
|
|
|
1413
1451
|
// index.js
|
|
1414
1452
|
var usePlaywrightToolKit = () => {
|