@skrillex1224/playwright-toolkit 2.1.9 → 2.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1235,6 +1235,14 @@ var import_got_scraping = require("got-scraping");
1235
1235
  var import_http = require("http");
1236
1236
  var import_https2 = require("https");
1237
1237
  var logger9 = createLogger("Interception");
1238
+ var SHARED_HTTP_AGENT = new import_http.Agent({ keepAlive: false });
1239
+ var SHARED_HTTPS_AGENT = new import_https2.Agent({ keepAlive: false, rejectUnauthorized: false });
1240
+ var DirectConfig = {
1241
+ /** 直连请求超时时间(秒) */
1242
+ directTimeout: 12,
1243
+ /** 静默扩展名:这些扩展名的直连成功日志用 debug 级别 */
1244
+ silentExtensions: [".js"]
1245
+ };
1238
1246
  var ARCHIVE_EXTENSIONS = [".7z", ".zip", ".rar", ".gz", ".bz2", ".tar", ".zst"];
1239
1247
  var EXECUTABLE_EXTENSIONS = [".exe", ".apk", ".bin", ".dmg", ".jar", ".class"];
1240
1248
  var DOCUMENT_EXTENSIONS = [".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", ".pdf", ".csv"];
@@ -1281,6 +1289,14 @@ var DEFAULT_BLOCKING_CONFIG = {
1281
1289
  /** 额外自定义扩展名列表 */
1282
1290
  customExtensions: []
1283
1291
  };
1292
+ var SHARED_GOT_OPTIONS = {
1293
+ http2: false,
1294
+ // 禁用 HTTP2 避免在拦截场景下的握手兼容性问题
1295
+ retry: { limit: 0 },
1296
+ // 让 Playwright 或外层逻辑处理重试
1297
+ throwHttpErrors: false
1298
+ // 404/500 等错误不抛出异常,直接透传给浏览器
1299
+ };
1284
1300
  var Interception = {
1285
1301
  /**
1286
1302
  * 根据配置生成需要屏蔽的扩展名列表
@@ -1324,16 +1340,6 @@ var Interception = {
1324
1340
  /**
1325
1341
  * 设置网络拦截规则(资源屏蔽 + CDN 直连)
1326
1342
  *
1327
- * 工作流程:
1328
- * 1. 检查请求是否在屏蔽列表中 → 如果是,直接 abort
1329
- * 2. 检查是否匹配直连域名 → 如果是,使用 Node.js fetch 直连
1330
- * 3. 其他请求正常走代理
1331
- *
1332
- * 适用场景:
1333
- * - 代理 IP 无法访问某些 CDN 域名
1334
- * - 需要加速静态资源加载
1335
- * - 屏蔽不必要的资源请求
1336
- *
1337
1343
  * @param {import('playwright').Page} page - Playwright Page 对象
1338
1344
  * @param {Object} [options] - 配置选项
1339
1345
  * @param {string[]} [options.directDomains] - 需要直连的域名列表
@@ -1346,10 +1352,10 @@ var Interception = {
1346
1352
  directDomains = [],
1347
1353
  blockingConfig = {},
1348
1354
  fallbackToProxy = true
1349
- // 默认回退到代理,保证可用性
1350
1355
  } = options;
1351
1356
  const mergedBlockingConfig = { ...DEFAULT_BLOCKING_CONFIG, ...blockingConfig };
1352
1357
  const blockedExtensions = this.getBlockedExtensions(mergedBlockingConfig);
1358
+ const hasDirectDomains = directDomains.length > 0;
1353
1359
  const enabledCategories = [];
1354
1360
  if (mergedBlockingConfig.blockArchive) enabledCategories.push("\u538B\u7F29\u5305");
1355
1361
  if (mergedBlockingConfig.blockExecutable) enabledCategories.push("\u53EF\u6267\u884C\u6587\u4EF6");
@@ -1359,21 +1365,22 @@ var Interception = {
1359
1365
  if (mergedBlockingConfig.blockFont) enabledCategories.push("\u5B57\u4F53");
1360
1366
  if (mergedBlockingConfig.blockCss) enabledCategories.push("CSS");
1361
1367
  if (mergedBlockingConfig.blockOther) enabledCategories.push("\u5176\u4ED6");
1362
- const hasDirectDomains = directDomains.length > 0;
1363
- logger9.start("setup", hasDirectDomains ? `\u76F4\u8FDE\u57DF\u540D: [${directDomains.join(", ")}]` : "\u4EC5\u8D44\u6E90\u5C4F\u853D\u6A21\u5F0F");
1368
+ logger9.start("setup", hasDirectDomains ? `\u76F4\u8FDE\u57DF\u540D: [${directDomains.length} \u4E2A] | \u5C4F\u853D: [${enabledCategories.join(", ")}]` : `\u4EC5\u8D44\u6E90\u5C4F\u853D\u6A21\u5F0F | \u5C4F\u853D: [${enabledCategories.join(", ")}]`);
1364
1369
  await page.route("**/*", async (route) => {
1365
- const request = route.request();
1366
- const url = request.url();
1367
- const urlLower = url.toLowerCase();
1368
- const urlPath = urlLower.split("?")[0];
1369
- const shouldBlock = blockedExtensions.some((ext) => urlPath.endsWith(ext));
1370
- if (shouldBlock) {
1371
- logger9.debug(`\u5DF2\u5C4F\u853D: ${url.substring(0, 100)}`);
1372
- return route.abort();
1373
- }
1374
- if (hasDirectDomains) {
1375
- const matchesDomain = directDomains.some((domain) => url.includes(domain));
1376
- if (matchesDomain) {
1370
+ let handled = false;
1371
+ try {
1372
+ const request = route.request();
1373
+ const url = request.url();
1374
+ const urlLower = url.toLowerCase();
1375
+ const urlPath = urlLower.split("?")[0];
1376
+ const isSilent = DirectConfig.silentExtensions.some((ext) => urlPath.endsWith(ext));
1377
+ const shouldBlock = blockedExtensions.some((ext) => urlPath.endsWith(ext));
1378
+ if (shouldBlock) {
1379
+ await route.abort();
1380
+ handled = true;
1381
+ return;
1382
+ }
1383
+ if (hasDirectDomains && directDomains.some((domain) => url.includes(domain))) {
1377
1384
  try {
1378
1385
  const reqHeaders = await request.allHeaders();
1379
1386
  delete reqHeaders["host"];
@@ -1381,30 +1388,24 @@ var Interception = {
1381
1388
  const method = request.method();
1382
1389
  const postData = method !== "GET" && method !== "HEAD" ? request.postDataBuffer() : void 0;
1383
1390
  const response = await (0, import_got_scraping.gotScraping)({
1391
+ ...SHARED_GOT_OPTIONS,
1392
+ // 应用通用配置
1384
1393
  url,
1385
1394
  method,
1386
1395
  headers: reqHeaders,
1387
1396
  body: postData,
1388
1397
  responseType: "buffer",
1389
- // 直接获取 Buffer
1390
- // 【修复 1】禁用 HTTP/2,防止握手挂起
1391
- http2: false,
1392
- // 模拟浏览器指纹 (Header 部分)
1398
+ // 强制获取 Buffer
1399
+ // 模拟浏览器 TLS 指纹
1393
1400
  headerGeneratorOptions: Stealth.getTlsFingerprintOptions(userAgent),
1394
- retry: { limit: 0 },
1395
- // 【修复 2】显式使用新的 Agent 实例,彻底无视 Apify 的环境变量代理
1396
- // 注意:这会牺牲一部分 TLS 指纹模拟能力,但能保证网络通畅
1401
+ // 使用共享的 Agent 单例(keepAlive: false,不会池化连接)
1397
1402
  agent: {
1398
- http: new import_http.Agent({ keepAlive: true }),
1399
- https: new import_https2.Agent({ keepAlive: true, rejectUnauthorized: false })
1403
+ http: SHARED_HTTP_AGENT,
1404
+ https: SHARED_HTTPS_AGENT
1400
1405
  },
1401
- // 设置超时,防止永久 Pending
1402
- timeout: {
1403
- request: 15 * 1e3
1404
- // 建议缩短到 15s
1405
- }
1406
+ // 超时时间
1407
+ timeout: { request: DirectConfig.directTimeout * 1e3 }
1406
1408
  });
1407
- const body = response.body;
1408
1409
  const resHeaders = {};
1409
1410
  for (const [key, value] of Object.entries(response.headers)) {
1410
1411
  if (Array.isArray(value)) {
@@ -1415,29 +1416,66 @@ var Interception = {
1415
1416
  }
1416
1417
  delete resHeaders["content-encoding"];
1417
1418
  delete resHeaders["content-length"];
1418
- logger9.debug(`\u76F4\u8FDE\u6210\u529F: ${url.substring(0, 100)}`);
1419
- await route.fulfill({
1419
+ delete resHeaders["transfer-encoding"];
1420
+ delete resHeaders["connection"];
1421
+ delete resHeaders["keep-alive"];
1422
+ isSilent ? logger9.debug(`\u76F4\u8FDE\u6210\u529F: ${url}`) : logger9.info(`\u76F4\u8FDE\u6210\u529F: ${url}`);
1423
+ await safeFulfill(route, {
1420
1424
  status: response.statusCode,
1421
1425
  headers: resHeaders,
1422
- body
1426
+ body: response.body
1423
1427
  });
1428
+ handled = true;
1424
1429
  return;
1425
1430
  } catch (e) {
1431
+ const isTimeout = e.code === "ETIMEDOUT" || e.message.toLowerCase().includes("timeout");
1432
+ const action = fallbackToProxy ? "\u56DE\u9000\u4EE3\u7406" : "\u5DF2\u653E\u5F03";
1433
+ const reason = isTimeout ? `\u8D85\u65F6(${DirectConfig.directTimeout}s)` : `\u5F02\u5E38: ${e.message}`;
1434
+ logger9.warn(`\u76F4\u8FDE${reason}\uFF0C${action}: ${url}`);
1426
1435
  if (fallbackToProxy) {
1427
- logger9.warn(`\u76F4\u8FDE\u5931\u8D25\uFF0C\u56DE\u9000\u4EE3\u7406: ${url.substring(0, 80)} | \u539F\u56E0: ${e.message}`);
1428
- return route.continue();
1436
+ await safeContinue(route);
1429
1437
  } else {
1430
- logger9.warn(`\u76F4\u8FDE\u5931\u8D25: ${url.substring(0, 80)} | \u539F\u56E0: ${e.message}`);
1431
- return route.abort();
1438
+ await route.abort();
1432
1439
  }
1440
+ handled = true;
1441
+ return;
1442
+ }
1443
+ }
1444
+ await safeContinue(route);
1445
+ handled = true;
1446
+ } catch (err) {
1447
+ logger9.warn(`\u8DEF\u7531\u5904\u7406\u5F02\u5E38: ${err.message}`);
1448
+ if (!handled) {
1449
+ try {
1450
+ await route.continue();
1451
+ } catch (_) {
1433
1452
  }
1434
1453
  }
1435
1454
  }
1436
- return route.continue();
1437
1455
  });
1438
- logger9.success("setup", `\u5C4F\u853D\u5206\u7C7B: [${enabledCategories.join(", ")}]`);
1439
1456
  }
1440
1457
  };
1458
+ async function safeFulfill(route, options) {
1459
+ try {
1460
+ await route.fulfill(options);
1461
+ } catch (error) {
1462
+ if (!isIgnorableError(error)) {
1463
+ console.error(`[Interception] Fulfill Error: ${error.message}`);
1464
+ }
1465
+ }
1466
+ }
1467
+ async function safeContinue(route) {
1468
+ try {
1469
+ await route.continue();
1470
+ } catch (error) {
1471
+ if (!isIgnorableError(error)) {
1472
+ }
1473
+ }
1474
+ }
1475
+ function isIgnorableError(error) {
1476
+ const msg = error.message;
1477
+ return msg.includes("already handled") || msg.includes("Target closed") || msg.includes("closed");
1478
+ }
1441
1479
 
1442
1480
  // index.js
1443
1481
  var usePlaywrightToolKit = () => {