@skrillex1224/playwright-toolkit 2.1.35 → 2.1.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -38,9 +38,6 @@ const crawler = new PlaywrightCrawler({
38
38
  },
39
39
  preNavigationHooks: [
40
40
  async ({ page }) => {
41
- // 统一反爬:时区/语言/权限/视口
42
- await AntiCheat.applyPage(page);
43
-
44
41
  // 验证码监控
45
42
  Captcha.useCaptchaMonitor(page, {
46
43
  domSelector: '#captcha_container',
package/dist/index.cjs CHANGED
@@ -183,37 +183,86 @@ async function createApifyKit() {
183
183
  const { Actor: Actor2 } = apify;
184
184
  return {
185
185
  /**
186
- * 核心封装:执行步骤,带自动日志确认和失败截图处理
186
+ * 核心封装:执行步骤,带自动日志确认、失败截图处理和重试机制
187
+ *
188
+ * @param {string} step - 步骤名称
189
+ * @param {import('playwright').Page} page - Playwright page 对象
190
+ * @param {Function} actionFn - 执行的异步操作
191
+ * @param {Object} [options] - 配置选项
192
+ * @param {boolean} [options.failActor=true] - 失败时是否调用 Actor.fail
193
+ * @param {Object} [options.retry] - 重试配置
194
+ * @param {number} [options.retry.times=0] - 重试次数
195
+ * @param {'direct'|'refresh'} [options.retry.mode='direct'] - 重试模式
196
+ * @param {Function} [options.retry.before] - 重试前钩子,可覆盖默认等待行为
187
197
  */
188
198
  async runStep(step, page, actionFn, options = {}) {
189
- const { failActor = true } = options;
190
- logger.start(`[Step] ${step}`);
191
- try {
192
- const result = await actionFn();
193
- logger.success(`[Step] ${step}`);
194
- return result;
195
- } catch (error) {
196
- logger.fail(`[Step] ${step}`, error);
197
- if (failActor) {
198
- let base64 = "\u622A\u56FE\u5931\u8D25";
199
- try {
200
- if (page) {
201
- const buffer = await page.screenshot({ fullPage: true, type: "jpeg", quality: 60 });
202
- base64 = `data:image/jpeg;base64,${buffer.toString("base64")}`;
203
- }
204
- } catch (snapErr) {
205
- logger.warn(`\u622A\u56FE\u751F\u6210\u5931\u8D25: ${snapErr.message}`);
206
- }
207
- await this.pushFailed(error, {
208
- step,
209
- page,
210
- options,
211
- base64
212
- });
213
- await Actor2.fail(`Run Step ${step} \u5931\u8D25: ${error.message}`);
199
+ const { failActor = true, retry = {} } = options;
200
+ const { times: retryTimes = 0, mode: retryMode = "direct", before: beforeRetry } = retry;
201
+ const executeAction = async (attemptNumber) => {
202
+ const attemptLabel = attemptNumber > 0 ? ` (\u91CD\u8BD5 #${attemptNumber})` : "";
203
+ logger.start(`[Step] ${step}${attemptLabel}`);
204
+ try {
205
+ const result = await actionFn();
206
+ logger.success(`[Step] ${step}${attemptLabel}`);
207
+ return { success: true, result };
208
+ } catch (error) {
209
+ logger.fail(`[Step] ${step}${attemptLabel}`, error);
210
+ return { success: false, error };
211
+ }
212
+ };
213
+ const prepareForRetry = async (attemptNumber) => {
214
+ if (typeof beforeRetry === "function") {
215
+ logger.start(`[RetryStep] \u6267\u884C\u81EA\u5B9A\u4E49 before \u94A9\u5B50 (\u7B2C ${attemptNumber} \u6B21\u91CD\u8BD5)`);
216
+ await beforeRetry(page, attemptNumber);
217
+ logger.success(`[RetryStep] before \u94A9\u5B50\u5B8C\u6210`);
218
+ } else if (retryMode === "refresh") {
219
+ logger.start(`[RetryStep] \u5237\u65B0\u9875\u9762 (\u7B2C ${attemptNumber} \u6B21\u91CD\u8BD5)`);
220
+ await page.reload({ waitUntil: "domcontentloaded" });
221
+ logger.success(`[RetryStep] \u9875\u9762\u5237\u65B0\u5B8C\u6210`);
214
222
  } else {
215
- throw error;
223
+ logger.start(`[RetryStep] \u7B49\u5F85 3 \u79D2 (\u7B2C ${attemptNumber} \u6B21\u91CD\u8BD5)`);
224
+ await new Promise((resolve) => setTimeout(resolve, 3e3));
225
+ logger.success(`[RetryStep] \u7B49\u5F85\u5B8C\u6210`);
216
226
  }
227
+ };
228
+ let lastResult = await executeAction(0);
229
+ if (lastResult.success) {
230
+ return lastResult.result;
231
+ }
232
+ for (let attempt = 1; attempt <= retryTimes; attempt++) {
233
+ logger.start(`[RetryStep] \u51C6\u5907\u7B2C ${attempt}/${retryTimes} \u6B21\u91CD\u8BD5: ${step}`);
234
+ try {
235
+ await prepareForRetry(attempt);
236
+ } catch (prepareError) {
237
+ logger.warn(`[RetryStep] \u91CD\u8BD5\u51C6\u5907\u5931\u8D25: ${prepareError.message}`);
238
+ continue;
239
+ }
240
+ lastResult = await executeAction(attempt);
241
+ if (lastResult.success) {
242
+ return lastResult.result;
243
+ }
244
+ }
245
+ const finalError = lastResult.error;
246
+ if (failActor) {
247
+ let base64 = "\u622A\u56FE\u5931\u8D25";
248
+ try {
249
+ if (page) {
250
+ const buffer = await page.screenshot({ fullPage: true, type: "jpeg", quality: 60 });
251
+ base64 = `data:image/jpeg;base64,${buffer.toString("base64")}`;
252
+ }
253
+ } catch (snapErr) {
254
+ logger.warn(`\u622A\u56FE\u751F\u6210\u5931\u8D25: ${snapErr.message}`);
255
+ }
256
+ await this.pushFailed(finalError, {
257
+ step,
258
+ page,
259
+ options,
260
+ base64,
261
+ retryAttempts: retryTimes
262
+ });
263
+ await Actor2.fail(`Run Step ${step} \u5931\u8D25 (\u5DF2\u91CD\u8BD5 ${retryTimes} \u6B21): ${finalError.message}`);
264
+ } else {
265
+ throw finalError;
217
266
  }
218
267
  },
219
268
  /**
@@ -376,25 +425,12 @@ var BASE_CONFIG = Object.freeze({
376
425
  geolocation: null
377
426
  });
378
427
  var DEFAULT_LAUNCH_ARGS = [
379
- "--disable-blink-features=AutomationControlled",
428
+ // '--disable-blink-features=AutomationControlled', // Crawlee 可能会自动处理,过多干预反而会被识别
380
429
  "--no-sandbox",
381
430
  "--disable-setuid-sandbox",
382
431
  "--window-position=0,0",
383
432
  `--lang=${BASE_CONFIG.locale}`
384
433
  ];
385
- var ADVANCED_LAUNCH_ARGS = [
386
- ...DEFAULT_LAUNCH_ARGS,
387
- "--disable-dev-shm-usage",
388
- "--disable-background-networking",
389
- "--disable-default-apps",
390
- "--disable-extensions",
391
- "--disable-sync",
392
- "--disable-translate",
393
- "--metrics-recording-only",
394
- "--mute-audio",
395
- "--no-first-run"
396
- ];
397
- var CONTEXT_CONFIG_CACHE = /* @__PURE__ */ new WeakMap();
398
434
  function buildFingerprintOptions(locale) {
399
435
  return {
400
436
  browsers: [{ name: "chrome", minVersion: 110 }],
@@ -403,95 +439,9 @@ function buildFingerprintOptions(locale) {
403
439
  locales: [locale]
404
440
  };
405
441
  }
406
- function parseAcceptLanguage(acceptLanguage) {
407
- if (!acceptLanguage) return [];
408
- return acceptLanguage.split(",").map((part) => part.trim().split(";")[0]).filter(Boolean);
409
- }
410
- function normalizeLanguages(acceptLanguage, fallbackLocale) {
411
- const languages = parseAcceptLanguage(acceptLanguage);
412
- if (languages.length === 0) return [fallbackLocale];
413
- if (!languages.includes(fallbackLocale)) {
414
- return [fallbackLocale, ...languages];
415
- }
416
- return languages;
417
- }
418
- function getOperatingSystemsFromUserAgent(userAgent) {
419
- const lowerUA = userAgent.toLowerCase();
420
- if (lowerUA.includes("windows")) return ["windows"];
421
- if (lowerUA.includes("mac os") || lowerUA.includes("macintosh")) return ["macos"];
422
- if (lowerUA.includes("linux")) return ["linux"];
423
- return [];
424
- }
425
- function buildContextConfigKey(config) {
426
- return JSON.stringify({
427
- locale: config.locale,
428
- acceptLanguage: config.acceptLanguage,
429
- timezoneId: config.timezoneId,
430
- timezoneOffset: config.timezoneOffset
431
- });
432
- }
433
- async function applyContextSettings(context, config, languages, permissions, injectLocaleTimezone) {
434
- const contextKey = buildContextConfigKey(config);
435
- const cached = CONTEXT_CONFIG_CACHE.get(context);
436
- const isFirstInit = !cached;
437
- const effectiveConfig = cached?.config || config;
438
- const effectiveLanguages = cached?.languages || languages;
439
- if (isFirstInit) {
440
- CONTEXT_CONFIG_CACHE.set(context, {
441
- key: contextKey,
442
- config,
443
- languages
444
- });
445
- } else if (cached.key !== contextKey) {
446
- logger3.warn("applyContext", "Context already initialized; ignore conflicting locale/timezone.");
447
- }
448
- await context.setExtraHTTPHeaders({
449
- "accept-language": effectiveConfig.acceptLanguage
450
- });
451
- if (isFirstInit) {
452
- await context.addInitScript(({ locale, timezoneId, timezoneOffset, languages: languages2, applyLocaleTimezone }) => {
453
- const originalDateTimeFormat = Intl.DateTimeFormat;
454
- if (applyLocaleTimezone) {
455
- Intl.DateTimeFormat = function(locales, initOptions) {
456
- const nextLocales = locales || locale;
457
- const nextOptions = initOptions ? { ...initOptions } : {};
458
- nextOptions.timeZone = nextOptions.timeZone || timezoneId;
459
- return new originalDateTimeFormat(nextLocales, nextOptions);
460
- };
461
- Intl.DateTimeFormat.prototype = originalDateTimeFormat.prototype;
462
- Date.prototype.getTimezoneOffset = function() {
463
- return timezoneOffset;
464
- };
465
- Object.defineProperty(navigator, "language", { get: () => languages2[0] });
466
- Object.defineProperty(navigator, "languages", { get: () => languages2 });
467
- }
468
- Object.defineProperty(navigator, "webdriver", { get: () => void 0 });
469
- }, {
470
- locale: effectiveConfig.locale,
471
- timezoneId: effectiveConfig.timezoneId,
472
- timezoneOffset: effectiveConfig.timezoneOffset,
473
- languages: effectiveLanguages,
474
- applyLocaleTimezone: injectLocaleTimezone
475
- });
476
- }
477
- if (effectiveConfig.geolocation) {
478
- await context.setGeolocation(effectiveConfig.geolocation);
479
- await context.grantPermissions(["geolocation"]);
480
- }
481
- if (permissions?.length) {
482
- await context.grantPermissions(permissions);
483
- }
484
- }
485
- function resolveConfig(overrides = {}) {
486
- return {
487
- ...BASE_CONFIG,
488
- ...overrides,
489
- geolocation: overrides.geolocation === null ? null : overrides.geolocation || BASE_CONFIG.geolocation
490
- };
491
- }
492
442
  var AntiCheat = {
493
443
  /**
494
- * 获取统一的基础配置(中国、桌面端、中文语言)。
444
+ * 获取统一的基础配置
495
445
  */
496
446
  getBaseConfig() {
497
447
  return { ...BASE_CONFIG };
@@ -508,100 +458,18 @@ var AntiCheat = {
508
458
  getLaunchArgs() {
509
459
  return [...DEFAULT_LAUNCH_ARGS];
510
460
  },
511
- /**
512
- * 获取增强启动参数(高风险场景)。
513
- */
514
- getAdvancedLaunchArgs() {
515
- return [...ADVANCED_LAUNCH_ARGS];
516
- },
517
- /**
518
- * 统一应用到 BrowserContext(时区/语言/权限/地理位置)。
519
- *
520
- * @param {import('playwright').BrowserContext} context
521
- * @param {Object} [options]
522
- * @param {string} [options.locale]
523
- * @param {string} [options.acceptLanguage]
524
- * @param {string} [options.timezoneId]
525
- * @param {number} [options.timezoneOffset]
526
- * @param {import('playwright').Geolocation|null} [options.geolocation]
527
- * @param {string[]} [options.permissions]
528
- */
529
- async applyContext(context, options = {}) {
530
- const config = resolveConfig(options);
531
- const languages = normalizeLanguages(config.acceptLanguage, config.locale);
532
- const permissions = Array.isArray(options.permissions) ? options.permissions : [];
533
- await applyContextSettings(context, config, languages, permissions, true);
534
- logger3.success("applyContext", `${config.locale} | ${config.timezoneId}`);
535
- },
536
- /**
537
- * 统一应用到 Page(Context + 视口同步)。
538
- *
539
- * @param {import('playwright').Page} page
540
- * @param {Object} [options] - 传递给 applyContext 的选项
541
- */
542
- async applyPage(page, options = {}) {
543
- const config = resolveConfig(options);
544
- const languages = normalizeLanguages(config.acceptLanguage, config.locale);
545
- const permissions = Array.isArray(options.permissions) ? options.permissions : [];
546
- let injectLocaleTimezone = true;
547
- try {
548
- const env = await page.evaluate(() => ({
549
- language: navigator.language,
550
- languages: Array.isArray(navigator.languages) ? navigator.languages : [],
551
- timeZone: Intl.DateTimeFormat().resolvedOptions().timeZone,
552
- tzOffset: (/* @__PURE__ */ new Date()).getTimezoneOffset()
553
- }));
554
- const languageMatch = env.language === languages[0];
555
- const timeZoneMatch = env.timeZone === config.timezoneId && env.tzOffset === config.timezoneOffset;
556
- injectLocaleTimezone = !(languageMatch && timeZoneMatch);
557
- } catch (e) {
558
- injectLocaleTimezone = true;
559
- }
560
- await applyContextSettings(page.context(), config, languages, permissions, injectLocaleTimezone);
561
- await this.syncViewportWithScreen(page);
562
- },
563
- /**
564
- * 同步 Page 视口到 window.screen,避免视口/屏幕不一致检测。
565
- */
566
- async syncViewportWithScreen(page) {
567
- try {
568
- const screen = await page.evaluate(() => ({
569
- width: window.screen.width,
570
- height: window.screen.height
571
- }));
572
- await page.setViewportSize({
573
- width: screen.width,
574
- height: screen.height
575
- });
576
- logger3.success("syncViewport", `size=${screen.width}x${screen.height}`);
577
- } catch (e) {
578
- logger3.warn(`syncViewport \u5931\u8D25: ${e.message}\uFF0C\u56DE\u9000\u5230 1920x1080`);
579
- await page.setViewportSize({ width: 1920, height: 1080 });
580
- }
581
- },
582
461
  /**
583
462
  * 为 got-scraping 生成与浏览器一致的 TLS 指纹配置(桌面端)。
584
- *
585
- * @param {string} [userAgent]
586
463
  */
587
464
  getTlsFingerprintOptions(userAgent = "", acceptLanguage = "") {
588
- const primaryLocale = parseAcceptLanguage(acceptLanguage || BASE_CONFIG.acceptLanguage)[0] || BASE_CONFIG.locale;
589
- const fingerprint = buildFingerprintOptions(primaryLocale);
590
- const os = getOperatingSystemsFromUserAgent(userAgent);
591
- if (os.length > 0) fingerprint.operatingSystems = os;
592
- return fingerprint;
465
+ return buildFingerprintOptions(BASE_CONFIG.locale);
593
466
  },
594
467
  /**
595
- * 规范化请求头,确保语言与浏览器一致。
596
- *
597
- * @param {Record<string, string>} headers
598
- * @returns {Record<string, string>}
468
+ * 规范化请求头
599
469
  */
600
470
  applyLocaleHeaders(headers, acceptLanguage = "") {
601
- if (acceptLanguage) {
602
- headers["accept-language"] = acceptLanguage;
603
- } else if (!headers["accept-language"]) {
604
- headers["accept-language"] = BASE_CONFIG.acceptLanguage;
471
+ if (!headers["accept-language"]) {
472
+ headers["accept-language"] = acceptLanguage || BASE_CONFIG.acceptLanguage;
605
473
  }
606
474
  return headers;
607
475
  }
@@ -1037,18 +905,6 @@ var Launch = {
1037
905
  ignoreDefaultArgs: ["--enable-automation"]
1038
906
  };
1039
907
  },
1040
- /**
1041
- * 获取增强版启动选项(用于高风险反爬场景)
1042
- */
1043
- getAdvancedLaunchOptions(customArgs = []) {
1044
- return {
1045
- args: [
1046
- ...AntiCheat.getAdvancedLaunchArgs(),
1047
- ...customArgs
1048
- ],
1049
- ignoreDefaultArgs: ["--enable-automation"]
1050
- };
1051
- },
1052
908
  /**
1053
909
  * 推荐的 Fingerprint Generator 选项
1054
910
  * 确保生成的是桌面端、较新的 Chrome,以匹配我们的脚本逻辑
@@ -1582,8 +1438,6 @@ var Interception = {
1582
1438
  try {
1583
1439
  const reqHeaders = await request.allHeaders();
1584
1440
  delete reqHeaders["host"];
1585
- const currentAcceptLanguage = reqHeaders["accept-language"] || "";
1586
- AntiCheat.applyLocaleHeaders(reqHeaders, currentAcceptLanguage);
1587
1441
  const resolvedAcceptLanguage = reqHeaders["accept-language"] || "";
1588
1442
  const userAgent = reqHeaders["user-agent"] || "";
1589
1443
  const method = request.method();
@@ -1597,8 +1451,8 @@ var Interception = {
1597
1451
  body: postData,
1598
1452
  responseType: "buffer",
1599
1453
  // 强制获取 Buffer
1600
- // 模拟浏览器 TLS 指纹
1601
- headerGeneratorOptions: AntiCheat.getTlsFingerprintOptions(userAgent, resolvedAcceptLanguage),
1454
+ // 移除手动 TLS 指纹配置,使用 got-scraping 默认的高质量指纹
1455
+ // headerGeneratorOptions: ...
1602
1456
  // 使用共享的 Agent 单例(keepAlive: false,不会池化连接)
1603
1457
  agent: {
1604
1458
  http: SHARED_HTTP_AGENT,