@skrillex1224/playwright-toolkit 2.1.9 → 2.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1206,6 +1206,14 @@ import { gotScraping } from "got-scraping";
1206
1206
  import { Agent as HttpAgent } from "http";
1207
1207
  import { Agent as HttpsAgent } from "https";
1208
1208
  var logger9 = createLogger("Interception");
1209
+ var SHARED_HTTP_AGENT = new HttpAgent({ keepAlive: false });
1210
+ var SHARED_HTTPS_AGENT = new HttpsAgent({ keepAlive: false, rejectUnauthorized: false });
1211
+ var DirectConfig = {
1212
+ /** 直连请求超时时间(秒) */
1213
+ directTimeout: 12,
1214
+ /** 静默扩展名:这些扩展名的直连成功日志用 debug 级别 */
1215
+ silentExtensions: [".js"]
1216
+ };
1209
1217
  var ARCHIVE_EXTENSIONS = [".7z", ".zip", ".rar", ".gz", ".bz2", ".tar", ".zst"];
1210
1218
  var EXECUTABLE_EXTENSIONS = [".exe", ".apk", ".bin", ".dmg", ".jar", ".class"];
1211
1219
  var DOCUMENT_EXTENSIONS = [".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", ".pdf", ".csv"];
@@ -1252,6 +1260,14 @@ var DEFAULT_BLOCKING_CONFIG = {
1252
1260
  /** 额外自定义扩展名列表 */
1253
1261
  customExtensions: []
1254
1262
  };
1263
+ var SHARED_GOT_OPTIONS = {
1264
+ http2: false,
1265
+ // 禁用 HTTP2 避免在拦截场景下的握手兼容性问题
1266
+ retry: { limit: 0 },
1267
+ // 让 Playwright 或外层逻辑处理重试
1268
+ throwHttpErrors: false
1269
+ // 404/500 等错误不抛出异常,直接透传给浏览器
1270
+ };
1255
1271
  var Interception = {
1256
1272
  /**
1257
1273
  * 根据配置生成需要屏蔽的扩展名列表
@@ -1295,16 +1311,6 @@ var Interception = {
1295
1311
  /**
1296
1312
  * 设置网络拦截规则(资源屏蔽 + CDN 直连)
1297
1313
  *
1298
- * 工作流程:
1299
- * 1. 检查请求是否在屏蔽列表中 → 如果是,直接 abort
1300
- * 2. 检查是否匹配直连域名 → 如果是,使用 Node.js fetch 直连
1301
- * 3. 其他请求正常走代理
1302
- *
1303
- * 适用场景:
1304
- * - 代理 IP 无法访问某些 CDN 域名
1305
- * - 需要加速静态资源加载
1306
- * - 屏蔽不必要的资源请求
1307
- *
1308
1314
  * @param {import('playwright').Page} page - Playwright Page 对象
1309
1315
  * @param {Object} [options] - 配置选项
1310
1316
  * @param {string[]} [options.directDomains] - 需要直连的域名列表
@@ -1317,10 +1323,10 @@ var Interception = {
1317
1323
  directDomains = [],
1318
1324
  blockingConfig = {},
1319
1325
  fallbackToProxy = true
1320
- // 默认回退到代理,保证可用性
1321
1326
  } = options;
1322
1327
  const mergedBlockingConfig = { ...DEFAULT_BLOCKING_CONFIG, ...blockingConfig };
1323
1328
  const blockedExtensions = this.getBlockedExtensions(mergedBlockingConfig);
1329
+ const hasDirectDomains = directDomains.length > 0;
1324
1330
  const enabledCategories = [];
1325
1331
  if (mergedBlockingConfig.blockArchive) enabledCategories.push("\u538B\u7F29\u5305");
1326
1332
  if (mergedBlockingConfig.blockExecutable) enabledCategories.push("\u53EF\u6267\u884C\u6587\u4EF6");
@@ -1330,21 +1336,22 @@ var Interception = {
1330
1336
  if (mergedBlockingConfig.blockFont) enabledCategories.push("\u5B57\u4F53");
1331
1337
  if (mergedBlockingConfig.blockCss) enabledCategories.push("CSS");
1332
1338
  if (mergedBlockingConfig.blockOther) enabledCategories.push("\u5176\u4ED6");
1333
- const hasDirectDomains = directDomains.length > 0;
1334
- logger9.start("setup", hasDirectDomains ? `\u76F4\u8FDE\u57DF\u540D: [${directDomains.join(", ")}]` : "\u4EC5\u8D44\u6E90\u5C4F\u853D\u6A21\u5F0F");
1339
+ logger9.start("setup", hasDirectDomains ? `\u76F4\u8FDE\u57DF\u540D: [${directDomains.length} \u4E2A] | \u5C4F\u853D: [${enabledCategories.join(", ")}]` : `\u4EC5\u8D44\u6E90\u5C4F\u853D\u6A21\u5F0F | \u5C4F\u853D: [${enabledCategories.join(", ")}]`);
1335
1340
  await page.route("**/*", async (route) => {
1336
- const request = route.request();
1337
- const url = request.url();
1338
- const urlLower = url.toLowerCase();
1339
- const urlPath = urlLower.split("?")[0];
1340
- const shouldBlock = blockedExtensions.some((ext) => urlPath.endsWith(ext));
1341
- if (shouldBlock) {
1342
- logger9.debug(`\u5DF2\u5C4F\u853D: ${url.substring(0, 100)}`);
1343
- return route.abort();
1344
- }
1345
- if (hasDirectDomains) {
1346
- const matchesDomain = directDomains.some((domain) => url.includes(domain));
1347
- if (matchesDomain) {
1341
+ let handled = false;
1342
+ try {
1343
+ const request = route.request();
1344
+ const url = request.url();
1345
+ const urlLower = url.toLowerCase();
1346
+ const urlPath = urlLower.split("?")[0];
1347
+ const isSilent = DirectConfig.silentExtensions.some((ext) => urlPath.endsWith(ext));
1348
+ const shouldBlock = blockedExtensions.some((ext) => urlPath.endsWith(ext));
1349
+ if (shouldBlock) {
1350
+ await route.abort();
1351
+ handled = true;
1352
+ return;
1353
+ }
1354
+ if (hasDirectDomains && directDomains.some((domain) => url.includes(domain))) {
1348
1355
  try {
1349
1356
  const reqHeaders = await request.allHeaders();
1350
1357
  delete reqHeaders["host"];
@@ -1352,30 +1359,24 @@ var Interception = {
1352
1359
  const method = request.method();
1353
1360
  const postData = method !== "GET" && method !== "HEAD" ? request.postDataBuffer() : void 0;
1354
1361
  const response = await gotScraping({
1362
+ ...SHARED_GOT_OPTIONS,
1363
+ // 应用通用配置
1355
1364
  url,
1356
1365
  method,
1357
1366
  headers: reqHeaders,
1358
1367
  body: postData,
1359
1368
  responseType: "buffer",
1360
- // 直接获取 Buffer
1361
- // 【修复 1】禁用 HTTP/2,防止握手挂起
1362
- http2: false,
1363
- // 模拟浏览器指纹 (Header 部分)
1369
+ // 强制获取 Buffer
1370
+ // 模拟浏览器 TLS 指纹
1364
1371
  headerGeneratorOptions: Stealth.getTlsFingerprintOptions(userAgent),
1365
- retry: { limit: 0 },
1366
- // 【修复 2】显式使用新的 Agent 实例,彻底无视 Apify 的环境变量代理
1367
- // 注意:这会牺牲一部分 TLS 指纹模拟能力,但能保证网络通畅
1372
+ // 使用共享的 Agent 单例(keepAlive: false,不会池化连接)
1368
1373
  agent: {
1369
- http: new HttpAgent({ keepAlive: true }),
1370
- https: new HttpsAgent({ keepAlive: true, rejectUnauthorized: false })
1374
+ http: SHARED_HTTP_AGENT,
1375
+ https: SHARED_HTTPS_AGENT
1371
1376
  },
1372
- // 设置超时,防止永久 Pending
1373
- timeout: {
1374
- request: 15 * 1e3
1375
- // 建议缩短到 15s
1376
- }
1377
+ // 超时时间
1378
+ timeout: { request: DirectConfig.directTimeout * 1e3 }
1377
1379
  });
1378
- const body = response.body;
1379
1380
  const resHeaders = {};
1380
1381
  for (const [key, value] of Object.entries(response.headers)) {
1381
1382
  if (Array.isArray(value)) {
@@ -1386,29 +1387,66 @@ var Interception = {
1386
1387
  }
1387
1388
  delete resHeaders["content-encoding"];
1388
1389
  delete resHeaders["content-length"];
1389
- logger9.debug(`\u76F4\u8FDE\u6210\u529F: ${url.substring(0, 100)}`);
1390
- await route.fulfill({
1390
+ delete resHeaders["transfer-encoding"];
1391
+ delete resHeaders["connection"];
1392
+ delete resHeaders["keep-alive"];
1393
+ isSilent ? logger9.debug(`\u76F4\u8FDE\u6210\u529F: ${url}`) : logger9.info(`\u76F4\u8FDE\u6210\u529F: ${url}`);
1394
+ await safeFulfill(route, {
1391
1395
  status: response.statusCode,
1392
1396
  headers: resHeaders,
1393
- body
1397
+ body: response.body
1394
1398
  });
1399
+ handled = true;
1395
1400
  return;
1396
1401
  } catch (e) {
1402
+ const isTimeout = e.code === "ETIMEDOUT" || e.message.toLowerCase().includes("timeout");
1403
+ const action = fallbackToProxy ? "\u56DE\u9000\u4EE3\u7406" : "\u5DF2\u653E\u5F03";
1404
+ const reason = isTimeout ? `\u8D85\u65F6(${DirectConfig.directTimeout}s)` : `\u5F02\u5E38: ${e.message}`;
1405
+ logger9.warn(`\u76F4\u8FDE${reason}\uFF0C${action}: ${url}`);
1397
1406
  if (fallbackToProxy) {
1398
- logger9.warn(`\u76F4\u8FDE\u5931\u8D25\uFF0C\u56DE\u9000\u4EE3\u7406: ${url.substring(0, 80)} | \u539F\u56E0: ${e.message}`);
1399
- return route.continue();
1407
+ await safeContinue(route);
1400
1408
  } else {
1401
- logger9.warn(`\u76F4\u8FDE\u5931\u8D25: ${url.substring(0, 80)} | \u539F\u56E0: ${e.message}`);
1402
- return route.abort();
1409
+ await route.abort();
1403
1410
  }
1411
+ handled = true;
1412
+ return;
1413
+ }
1414
+ }
1415
+ await safeContinue(route);
1416
+ handled = true;
1417
+ } catch (err) {
1418
+ logger9.warn(`\u8DEF\u7531\u5904\u7406\u5F02\u5E38: ${err.message}`);
1419
+ if (!handled) {
1420
+ try {
1421
+ await route.continue();
1422
+ } catch (_) {
1404
1423
  }
1405
1424
  }
1406
1425
  }
1407
- return route.continue();
1408
1426
  });
1409
- logger9.success("setup", `\u5C4F\u853D\u5206\u7C7B: [${enabledCategories.join(", ")}]`);
1410
1427
  }
1411
1428
  };
1429
+ async function safeFulfill(route, options) {
1430
+ try {
1431
+ await route.fulfill(options);
1432
+ } catch (error) {
1433
+ if (!isIgnorableError(error)) {
1434
+ console.error(`[Interception] Fulfill Error: ${error.message}`);
1435
+ }
1436
+ }
1437
+ }
1438
+ async function safeContinue(route) {
1439
+ try {
1440
+ await route.continue();
1441
+ } catch (error) {
1442
+ if (!isIgnorableError(error)) {
1443
+ }
1444
+ }
1445
+ }
1446
+ function isIgnorableError(error) {
1447
+ const msg = error.message;
1448
+ return msg.includes("already handled") || msg.includes("Target closed") || msg.includes("closed");
1449
+ }
1412
1450
 
1413
1451
  // index.js
1414
1452
  var usePlaywrightToolKit = () => {