@skrillex1224/playwright-toolkit 2.1.8 → 2.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1252,6 +1252,14 @@ var DEFAULT_BLOCKING_CONFIG = {
1252
1252
  /** 额外自定义扩展名列表 */
1253
1253
  customExtensions: []
1254
1254
  };
1255
+ var SHARED_GOT_OPTIONS = {
1256
+ http2: false,
1257
+ // 禁用 HTTP2 避免在拦截场景下的握手兼容性问题
1258
+ retry: { limit: 0 },
1259
+ // 让 Playwright 或外层逻辑处理重试
1260
+ throwHttpErrors: false
1261
+ // 404/500 等错误不抛出异常,直接透传给浏览器
1262
+ };
1255
1263
  var Interception = {
1256
1264
  /**
1257
1265
  * 根据配置生成需要屏蔽的扩展名列表
@@ -1295,16 +1303,6 @@ var Interception = {
1295
1303
  /**
1296
1304
  * 设置网络拦截规则(资源屏蔽 + CDN 直连)
1297
1305
  *
1298
- * 工作流程:
1299
- * 1. 检查请求是否在屏蔽列表中 → 如果是,直接 abort
1300
- * 2. 检查是否匹配直连域名 → 如果是,使用 Node.js fetch 直连
1301
- * 3. 其他请求正常走代理
1302
- *
1303
- * 适用场景:
1304
- * - 代理 IP 无法访问某些 CDN 域名
1305
- * - 需要加速静态资源加载
1306
- * - 屏蔽不必要的资源请求
1307
- *
1308
1306
  * @param {import('playwright').Page} page - Playwright Page 对象
1309
1307
  * @param {Object} [options] - 配置选项
1310
1308
  * @param {string[]} [options.directDomains] - 需要直连的域名列表
@@ -1317,10 +1315,10 @@ var Interception = {
1317
1315
  directDomains = [],
1318
1316
  blockingConfig = {},
1319
1317
  fallbackToProxy = true
1320
- // 默认回退到代理,保证可用性
1321
1318
  } = options;
1322
1319
  const mergedBlockingConfig = { ...DEFAULT_BLOCKING_CONFIG, ...blockingConfig };
1323
1320
  const blockedExtensions = this.getBlockedExtensions(mergedBlockingConfig);
1321
+ const hasDirectDomains = directDomains.length > 0;
1324
1322
  const enabledCategories = [];
1325
1323
  if (mergedBlockingConfig.blockArchive) enabledCategories.push("\u538B\u7F29\u5305");
1326
1324
  if (mergedBlockingConfig.blockExecutable) enabledCategories.push("\u53EF\u6267\u884C\u6587\u4EF6");
@@ -1330,21 +1328,19 @@ var Interception = {
1330
1328
  if (mergedBlockingConfig.blockFont) enabledCategories.push("\u5B57\u4F53");
1331
1329
  if (mergedBlockingConfig.blockCss) enabledCategories.push("CSS");
1332
1330
  if (mergedBlockingConfig.blockOther) enabledCategories.push("\u5176\u4ED6");
1333
- const hasDirectDomains = directDomains.length > 0;
1334
- logger9.start("setup", hasDirectDomains ? `\u76F4\u8FDE\u57DF\u540D: [${directDomains.join(", ")}]` : "\u4EC5\u8D44\u6E90\u5C4F\u853D\u6A21\u5F0F");
1331
+ logger9.start("setup", hasDirectDomains ? `\u76F4\u8FDE\u57DF\u540D: [${directDomains.length} \u4E2A] | \u5C4F\u853D: [${enabledCategories.join(", ")}]` : `\u4EC5\u8D44\u6E90\u5C4F\u853D\u6A21\u5F0F | \u5C4F\u853D: [${enabledCategories.join(", ")}]`);
1335
1332
  await page.route("**/*", async (route) => {
1336
- const request = route.request();
1337
- const url = request.url();
1338
- const urlLower = url.toLowerCase();
1339
- const urlPath = urlLower.split("?")[0];
1340
- const shouldBlock = blockedExtensions.some((ext) => urlPath.endsWith(ext));
1341
- if (shouldBlock) {
1342
- logger9.debug(`\u5DF2\u5C4F\u853D: ${url.substring(0, 100)}`);
1343
- return route.abort();
1344
- }
1345
- if (hasDirectDomains) {
1346
- const matchesDomain = directDomains.some((domain) => url.includes(domain));
1347
- if (matchesDomain) {
1333
+ try {
1334
+ const request = route.request();
1335
+ const url = request.url();
1336
+ const urlLower = url.toLowerCase();
1337
+ const urlPath = urlLower.split("?")[0];
1338
+ const shouldBlock = blockedExtensions.some((ext) => urlPath.endsWith(ext));
1339
+ if (shouldBlock) {
1340
+ await route.abort();
1341
+ return;
1342
+ }
1343
+ if (hasDirectDomains && directDomains.some((domain) => url.includes(domain))) {
1348
1344
  try {
1349
1345
  const reqHeaders = await request.allHeaders();
1350
1346
  delete reqHeaders["host"];
@@ -1352,30 +1348,29 @@ var Interception = {
1352
1348
  const method = request.method();
1353
1349
  const postData = method !== "GET" && method !== "HEAD" ? request.postDataBuffer() : void 0;
1354
1350
  const response = await gotScraping({
1351
+ ...SHARED_GOT_OPTIONS,
1352
+ // 应用通用配置
1355
1353
  url,
1356
1354
  method,
1357
1355
  headers: reqHeaders,
1358
1356
  body: postData,
1359
1357
  responseType: "buffer",
1360
- // 直接获取 Buffer
1361
- // 【修复 1】禁用 HTTP/2,防止握手挂起
1362
- http2: false,
1363
- // 模拟浏览器指纹 (Header 部分)
1358
+ // 强制获取 Buffer
1359
+ // 模拟浏览器 TLS 指纹
1364
1360
  headerGeneratorOptions: Stealth.getTlsFingerprintOptions(userAgent),
1365
- retry: { limit: 0 },
1366
- // 【修复 2】显式使用新的 Agent 实例,彻底无视 Apify 的环境变量代理
1367
- // 注意:这会牺牲一部分 TLS 指纹模拟能力,但能保证网络通畅
1361
+ // 【核心修复 1】: keepAlive 设置为 false
1362
+ // 每一个拦截请求都是独立的,使用 keepAlive 会导致 Agent 池耗尽,
1363
+ // 从而导致后续请求一直 Pending。
1368
1364
  agent: {
1369
- http: new HttpAgent({ keepAlive: true }),
1370
- https: new HttpsAgent({ keepAlive: true, rejectUnauthorized: false })
1365
+ http: new HttpAgent({ keepAlive: false }),
1366
+ https: new HttpsAgent({ keepAlive: false, rejectUnauthorized: false })
1371
1367
  },
1372
- // 设置超时,防止永久 Pending
1368
+ // 设置较短的超时时间,给回退代理留出机会
1373
1369
  timeout: {
1374
- request: 15 * 1e3
1375
- // 建议缩短到 15s
1370
+ request: 12 * 1e3
1371
+ // 12秒超时
1376
1372
  }
1377
1373
  });
1378
- const body = response.body;
1379
1374
  const resHeaders = {};
1380
1375
  for (const [key, value] of Object.entries(response.headers)) {
1381
1376
  if (Array.isArray(value)) {
@@ -1386,29 +1381,61 @@ var Interception = {
1386
1381
  }
1387
1382
  delete resHeaders["content-encoding"];
1388
1383
  delete resHeaders["content-length"];
1389
- logger9.debug(`\u76F4\u8FDE\u6210\u529F: ${url.substring(0, 100)}`);
1390
- await route.fulfill({
1384
+ delete resHeaders["transfer-encoding"];
1385
+ delete resHeaders["connection"];
1386
+ delete resHeaders["keep-alive"];
1387
+ logger9.info(`\u76F4\u8FDE\u6210\u529F: ${url}`);
1388
+ await safeFulfill(route, {
1391
1389
  status: response.statusCode,
1392
1390
  headers: resHeaders,
1393
- body
1391
+ body: response.body
1394
1392
  });
1395
1393
  return;
1396
1394
  } catch (e) {
1397
1395
  if (fallbackToProxy) {
1398
- logger9.warn(`\u76F4\u8FDE\u5931\u8D25\uFF0C\u56DE\u9000\u4EE3\u7406: ${url.substring(0, 80)} | \u539F\u56E0: ${e.message}`);
1399
- return route.continue();
1396
+ logger9.warn(`\u76F4\u8FDE\u5F02\u5E38\uFF0C\u56DE\u9000\u4EE3\u7406: ${url} | Err: ${e.message}`);
1397
+ await safeContinue(route);
1398
+ return;
1400
1399
  } else {
1401
- logger9.warn(`\u76F4\u8FDE\u5931\u8D25: ${url.substring(0, 80)} | \u539F\u56E0: ${e.message}`);
1402
- return route.abort();
1400
+ logger9.warn(`\u76F4\u8FDE\u5931\u8D25: ${url} | Err: ${e.message}`);
1401
+ await route.abort();
1402
+ return;
1403
1403
  }
1404
1404
  }
1405
1405
  }
1406
+ await safeContinue(route);
1407
+ } catch (err) {
1408
+ try {
1409
+ if (!route.request().failure()) {
1410
+ await route.continue();
1411
+ }
1412
+ } catch (_) {
1413
+ }
1406
1414
  }
1407
- return route.continue();
1408
1415
  });
1409
- logger9.success("setup", `\u5C4F\u853D\u5206\u7C7B: [${enabledCategories.join(", ")}]`);
1410
1416
  }
1411
1417
  };
1418
+ async function safeFulfill(route, options) {
1419
+ try {
1420
+ await route.fulfill(options);
1421
+ } catch (error) {
1422
+ if (!isIgnorableError(error)) {
1423
+ console.error(`[Interception] Fulfill Error: ${error.message}`);
1424
+ }
1425
+ }
1426
+ }
1427
+ async function safeContinue(route) {
1428
+ try {
1429
+ await route.continue();
1430
+ } catch (error) {
1431
+ if (!isIgnorableError(error)) {
1432
+ }
1433
+ }
1434
+ }
1435
+ function isIgnorableError(error) {
1436
+ const msg = error.message;
1437
+ return msg.includes("already handled") || msg.includes("Target closed") || msg.includes("closed");
1438
+ }
1412
1439
 
1413
1440
  // index.js
1414
1441
  var usePlaywrightToolKit = () => {