@skrillex1224/playwright-toolkit 2.1.36 → 2.1.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -3
- package/dist/index.cjs +84 -230
- package/dist/index.cjs.map +3 -3
- package/dist/index.js +84 -230
- package/dist/index.js.map +3 -3
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -154,37 +154,86 @@ async function createApifyKit() {
|
|
|
154
154
|
const { Actor: Actor2 } = apify;
|
|
155
155
|
return {
|
|
156
156
|
/**
|
|
157
|
-
*
|
|
157
|
+
* 核心封装:执行步骤,带自动日志确认、失败截图处理和重试机制
|
|
158
|
+
*
|
|
159
|
+
* @param {string} step - 步骤名称
|
|
160
|
+
* @param {import('playwright').Page} page - Playwright page 对象
|
|
161
|
+
* @param {Function} actionFn - 执行的异步操作
|
|
162
|
+
* @param {Object} [options] - 配置选项
|
|
163
|
+
* @param {boolean} [options.failActor=true] - 失败时是否调用 Actor.fail
|
|
164
|
+
* @param {Object} [options.retry] - 重试配置
|
|
165
|
+
* @param {number} [options.retry.times=0] - 重试次数
|
|
166
|
+
* @param {'direct'|'refresh'} [options.retry.mode='direct'] - 重试模式
|
|
167
|
+
* @param {Function} [options.retry.before] - 重试前钩子,可覆盖默认等待行为
|
|
158
168
|
*/
|
|
159
169
|
async runStep(step, page, actionFn, options = {}) {
|
|
160
|
-
const { failActor = true } = options;
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
const
|
|
164
|
-
logger.
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
});
|
|
184
|
-
await Actor2.fail(`Run Step ${step} \u5931\u8D25: ${error.message}`);
|
|
170
|
+
const { failActor = true, retry = {} } = options;
|
|
171
|
+
const { times: retryTimes = 0, mode: retryMode = "direct", before: beforeRetry } = retry;
|
|
172
|
+
const executeAction = async (attemptNumber) => {
|
|
173
|
+
const attemptLabel = attemptNumber > 0 ? ` (\u91CD\u8BD5 #${attemptNumber})` : "";
|
|
174
|
+
logger.start(`[Step] ${step}${attemptLabel}`);
|
|
175
|
+
try {
|
|
176
|
+
const result = await actionFn();
|
|
177
|
+
logger.success(`[Step] ${step}${attemptLabel}`);
|
|
178
|
+
return { success: true, result };
|
|
179
|
+
} catch (error) {
|
|
180
|
+
logger.fail(`[Step] ${step}${attemptLabel}`, error);
|
|
181
|
+
return { success: false, error };
|
|
182
|
+
}
|
|
183
|
+
};
|
|
184
|
+
const prepareForRetry = async (attemptNumber) => {
|
|
185
|
+
if (typeof beforeRetry === "function") {
|
|
186
|
+
logger.start(`[RetryStep] \u6267\u884C\u81EA\u5B9A\u4E49 before \u94A9\u5B50 (\u7B2C ${attemptNumber} \u6B21\u91CD\u8BD5)`);
|
|
187
|
+
await beforeRetry(page, attemptNumber);
|
|
188
|
+
logger.success(`[RetryStep] before \u94A9\u5B50\u5B8C\u6210`);
|
|
189
|
+
} else if (retryMode === "refresh") {
|
|
190
|
+
logger.start(`[RetryStep] \u5237\u65B0\u9875\u9762 (\u7B2C ${attemptNumber} \u6B21\u91CD\u8BD5)`);
|
|
191
|
+
await page.reload({ waitUntil: "domcontentloaded" });
|
|
192
|
+
logger.success(`[RetryStep] \u9875\u9762\u5237\u65B0\u5B8C\u6210`);
|
|
185
193
|
} else {
|
|
186
|
-
|
|
194
|
+
logger.start(`[RetryStep] \u7B49\u5F85 3 \u79D2 (\u7B2C ${attemptNumber} \u6B21\u91CD\u8BD5)`);
|
|
195
|
+
await new Promise((resolve) => setTimeout(resolve, 3e3));
|
|
196
|
+
logger.success(`[RetryStep] \u7B49\u5F85\u5B8C\u6210`);
|
|
187
197
|
}
|
|
198
|
+
};
|
|
199
|
+
let lastResult = await executeAction(0);
|
|
200
|
+
if (lastResult.success) {
|
|
201
|
+
return lastResult.result;
|
|
202
|
+
}
|
|
203
|
+
for (let attempt = 1; attempt <= retryTimes; attempt++) {
|
|
204
|
+
logger.start(`[RetryStep] \u51C6\u5907\u7B2C ${attempt}/${retryTimes} \u6B21\u91CD\u8BD5: ${step}`);
|
|
205
|
+
try {
|
|
206
|
+
await prepareForRetry(attempt);
|
|
207
|
+
} catch (prepareError) {
|
|
208
|
+
logger.warn(`[RetryStep] \u91CD\u8BD5\u51C6\u5907\u5931\u8D25: ${prepareError.message}`);
|
|
209
|
+
continue;
|
|
210
|
+
}
|
|
211
|
+
lastResult = await executeAction(attempt);
|
|
212
|
+
if (lastResult.success) {
|
|
213
|
+
return lastResult.result;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
const finalError = lastResult.error;
|
|
217
|
+
if (failActor) {
|
|
218
|
+
let base64 = "\u622A\u56FE\u5931\u8D25";
|
|
219
|
+
try {
|
|
220
|
+
if (page) {
|
|
221
|
+
const buffer = await page.screenshot({ fullPage: true, type: "jpeg", quality: 60 });
|
|
222
|
+
base64 = `data:image/jpeg;base64,${buffer.toString("base64")}`;
|
|
223
|
+
}
|
|
224
|
+
} catch (snapErr) {
|
|
225
|
+
logger.warn(`\u622A\u56FE\u751F\u6210\u5931\u8D25: ${snapErr.message}`);
|
|
226
|
+
}
|
|
227
|
+
await this.pushFailed(finalError, {
|
|
228
|
+
step,
|
|
229
|
+
page,
|
|
230
|
+
options,
|
|
231
|
+
base64,
|
|
232
|
+
retryAttempts: retryTimes
|
|
233
|
+
});
|
|
234
|
+
await Actor2.fail(`Run Step ${step} \u5931\u8D25 (\u5DF2\u91CD\u8BD5 ${retryTimes} \u6B21): ${finalError.message}`);
|
|
235
|
+
} else {
|
|
236
|
+
throw finalError;
|
|
188
237
|
}
|
|
189
238
|
},
|
|
190
239
|
/**
|
|
@@ -347,25 +396,12 @@ var BASE_CONFIG = Object.freeze({
|
|
|
347
396
|
geolocation: null
|
|
348
397
|
});
|
|
349
398
|
var DEFAULT_LAUNCH_ARGS = [
|
|
350
|
-
|
|
399
|
+
// '--disable-blink-features=AutomationControlled', // Crawlee 可能会自动处理,过多干预反而会被识别
|
|
351
400
|
"--no-sandbox",
|
|
352
401
|
"--disable-setuid-sandbox",
|
|
353
402
|
"--window-position=0,0",
|
|
354
403
|
`--lang=${BASE_CONFIG.locale}`
|
|
355
404
|
];
|
|
356
|
-
var ADVANCED_LAUNCH_ARGS = [
|
|
357
|
-
...DEFAULT_LAUNCH_ARGS,
|
|
358
|
-
"--disable-dev-shm-usage",
|
|
359
|
-
"--disable-background-networking",
|
|
360
|
-
"--disable-default-apps",
|
|
361
|
-
"--disable-extensions",
|
|
362
|
-
"--disable-sync",
|
|
363
|
-
"--disable-translate",
|
|
364
|
-
"--metrics-recording-only",
|
|
365
|
-
"--mute-audio",
|
|
366
|
-
"--no-first-run"
|
|
367
|
-
];
|
|
368
|
-
var CONTEXT_CONFIG_CACHE = /* @__PURE__ */ new WeakMap();
|
|
369
405
|
function buildFingerprintOptions(locale) {
|
|
370
406
|
return {
|
|
371
407
|
browsers: [{ name: "chrome", minVersion: 110 }],
|
|
@@ -374,95 +410,9 @@ function buildFingerprintOptions(locale) {
|
|
|
374
410
|
locales: [locale]
|
|
375
411
|
};
|
|
376
412
|
}
|
|
377
|
-
function parseAcceptLanguage(acceptLanguage) {
|
|
378
|
-
if (!acceptLanguage) return [];
|
|
379
|
-
return acceptLanguage.split(",").map((part) => part.trim().split(";")[0]).filter(Boolean);
|
|
380
|
-
}
|
|
381
|
-
function normalizeLanguages(acceptLanguage, fallbackLocale) {
|
|
382
|
-
const languages = parseAcceptLanguage(acceptLanguage);
|
|
383
|
-
if (languages.length === 0) return [fallbackLocale];
|
|
384
|
-
if (!languages.includes(fallbackLocale)) {
|
|
385
|
-
return [fallbackLocale, ...languages];
|
|
386
|
-
}
|
|
387
|
-
return languages;
|
|
388
|
-
}
|
|
389
|
-
function getOperatingSystemsFromUserAgent(userAgent) {
|
|
390
|
-
const lowerUA = userAgent.toLowerCase();
|
|
391
|
-
if (lowerUA.includes("windows")) return ["windows"];
|
|
392
|
-
if (lowerUA.includes("mac os") || lowerUA.includes("macintosh")) return ["macos"];
|
|
393
|
-
if (lowerUA.includes("linux")) return ["linux"];
|
|
394
|
-
return [];
|
|
395
|
-
}
|
|
396
|
-
function buildContextConfigKey(config) {
|
|
397
|
-
return JSON.stringify({
|
|
398
|
-
locale: config.locale,
|
|
399
|
-
acceptLanguage: config.acceptLanguage,
|
|
400
|
-
timezoneId: config.timezoneId,
|
|
401
|
-
timezoneOffset: config.timezoneOffset
|
|
402
|
-
});
|
|
403
|
-
}
|
|
404
|
-
async function applyContextSettings(context, config, languages, permissions, injectLocaleTimezone) {
|
|
405
|
-
const contextKey = buildContextConfigKey(config);
|
|
406
|
-
const cached = CONTEXT_CONFIG_CACHE.get(context);
|
|
407
|
-
const isFirstInit = !cached;
|
|
408
|
-
const effectiveConfig = cached?.config || config;
|
|
409
|
-
const effectiveLanguages = cached?.languages || languages;
|
|
410
|
-
if (isFirstInit) {
|
|
411
|
-
CONTEXT_CONFIG_CACHE.set(context, {
|
|
412
|
-
key: contextKey,
|
|
413
|
-
config,
|
|
414
|
-
languages
|
|
415
|
-
});
|
|
416
|
-
} else if (cached.key !== contextKey) {
|
|
417
|
-
logger3.warn("applyContext", "Context already initialized; ignore conflicting locale/timezone.");
|
|
418
|
-
}
|
|
419
|
-
await context.setExtraHTTPHeaders({
|
|
420
|
-
"accept-language": effectiveConfig.acceptLanguage
|
|
421
|
-
});
|
|
422
|
-
if (isFirstInit) {
|
|
423
|
-
await context.addInitScript(({ locale, timezoneId, timezoneOffset, languages: languages2, applyLocaleTimezone }) => {
|
|
424
|
-
const originalDateTimeFormat = Intl.DateTimeFormat;
|
|
425
|
-
if (applyLocaleTimezone) {
|
|
426
|
-
Intl.DateTimeFormat = function(locales, initOptions) {
|
|
427
|
-
const nextLocales = locales || locale;
|
|
428
|
-
const nextOptions = initOptions ? { ...initOptions } : {};
|
|
429
|
-
nextOptions.timeZone = nextOptions.timeZone || timezoneId;
|
|
430
|
-
return new originalDateTimeFormat(nextLocales, nextOptions);
|
|
431
|
-
};
|
|
432
|
-
Intl.DateTimeFormat.prototype = originalDateTimeFormat.prototype;
|
|
433
|
-
Date.prototype.getTimezoneOffset = function() {
|
|
434
|
-
return timezoneOffset;
|
|
435
|
-
};
|
|
436
|
-
Object.defineProperty(navigator, "language", { get: () => languages2[0] });
|
|
437
|
-
Object.defineProperty(navigator, "languages", { get: () => languages2 });
|
|
438
|
-
}
|
|
439
|
-
Object.defineProperty(navigator, "webdriver", { get: () => void 0 });
|
|
440
|
-
}, {
|
|
441
|
-
locale: effectiveConfig.locale,
|
|
442
|
-
timezoneId: effectiveConfig.timezoneId,
|
|
443
|
-
timezoneOffset: effectiveConfig.timezoneOffset,
|
|
444
|
-
languages: effectiveLanguages,
|
|
445
|
-
applyLocaleTimezone: injectLocaleTimezone
|
|
446
|
-
});
|
|
447
|
-
}
|
|
448
|
-
if (effectiveConfig.geolocation) {
|
|
449
|
-
await context.setGeolocation(effectiveConfig.geolocation);
|
|
450
|
-
await context.grantPermissions(["geolocation"]);
|
|
451
|
-
}
|
|
452
|
-
if (permissions?.length) {
|
|
453
|
-
await context.grantPermissions(permissions);
|
|
454
|
-
}
|
|
455
|
-
}
|
|
456
|
-
function resolveConfig(overrides = {}) {
|
|
457
|
-
return {
|
|
458
|
-
...BASE_CONFIG,
|
|
459
|
-
...overrides,
|
|
460
|
-
geolocation: overrides.geolocation === null ? null : overrides.geolocation || BASE_CONFIG.geolocation
|
|
461
|
-
};
|
|
462
|
-
}
|
|
463
413
|
var AntiCheat = {
|
|
464
414
|
/**
|
|
465
|
-
*
|
|
415
|
+
* 获取统一的基础配置
|
|
466
416
|
*/
|
|
467
417
|
getBaseConfig() {
|
|
468
418
|
return { ...BASE_CONFIG };
|
|
@@ -479,100 +429,18 @@ var AntiCheat = {
|
|
|
479
429
|
getLaunchArgs() {
|
|
480
430
|
return [...DEFAULT_LAUNCH_ARGS];
|
|
481
431
|
},
|
|
482
|
-
/**
|
|
483
|
-
* 获取增强启动参数(高风险场景)。
|
|
484
|
-
*/
|
|
485
|
-
getAdvancedLaunchArgs() {
|
|
486
|
-
return [...ADVANCED_LAUNCH_ARGS];
|
|
487
|
-
},
|
|
488
|
-
/**
|
|
489
|
-
* 统一应用到 BrowserContext(时区/语言/权限/地理位置)。
|
|
490
|
-
*
|
|
491
|
-
* @param {import('playwright').BrowserContext} context
|
|
492
|
-
* @param {Object} [options]
|
|
493
|
-
* @param {string} [options.locale]
|
|
494
|
-
* @param {string} [options.acceptLanguage]
|
|
495
|
-
* @param {string} [options.timezoneId]
|
|
496
|
-
* @param {number} [options.timezoneOffset]
|
|
497
|
-
* @param {import('playwright').Geolocation|null} [options.geolocation]
|
|
498
|
-
* @param {string[]} [options.permissions]
|
|
499
|
-
*/
|
|
500
|
-
async applyContext(context, options = {}) {
|
|
501
|
-
const config = resolveConfig(options);
|
|
502
|
-
const languages = normalizeLanguages(config.acceptLanguage, config.locale);
|
|
503
|
-
const permissions = Array.isArray(options.permissions) ? options.permissions : [];
|
|
504
|
-
await applyContextSettings(context, config, languages, permissions, true);
|
|
505
|
-
logger3.success("applyContext", `${config.locale} | ${config.timezoneId}`);
|
|
506
|
-
},
|
|
507
|
-
/**
|
|
508
|
-
* 统一应用到 Page(Context + 视口同步)。
|
|
509
|
-
*
|
|
510
|
-
* @param {import('playwright').Page} page
|
|
511
|
-
* @param {Object} [options] - 传递给 applyContext 的选项
|
|
512
|
-
*/
|
|
513
|
-
async applyPage(page, options = {}) {
|
|
514
|
-
const config = resolveConfig(options);
|
|
515
|
-
const languages = normalizeLanguages(config.acceptLanguage, config.locale);
|
|
516
|
-
const permissions = Array.isArray(options.permissions) ? options.permissions : [];
|
|
517
|
-
let injectLocaleTimezone = true;
|
|
518
|
-
try {
|
|
519
|
-
const env = await page.evaluate(() => ({
|
|
520
|
-
language: navigator.language,
|
|
521
|
-
languages: Array.isArray(navigator.languages) ? navigator.languages : [],
|
|
522
|
-
timeZone: Intl.DateTimeFormat().resolvedOptions().timeZone,
|
|
523
|
-
tzOffset: (/* @__PURE__ */ new Date()).getTimezoneOffset()
|
|
524
|
-
}));
|
|
525
|
-
const languageMatch = env.language === languages[0];
|
|
526
|
-
const timeZoneMatch = env.timeZone === config.timezoneId && env.tzOffset === config.timezoneOffset;
|
|
527
|
-
injectLocaleTimezone = !(languageMatch && timeZoneMatch);
|
|
528
|
-
} catch (e) {
|
|
529
|
-
injectLocaleTimezone = true;
|
|
530
|
-
}
|
|
531
|
-
await applyContextSettings(page.context(), config, languages, permissions, injectLocaleTimezone);
|
|
532
|
-
await this.syncViewportWithScreen(page);
|
|
533
|
-
},
|
|
534
|
-
/**
|
|
535
|
-
* 同步 Page 视口到 window.screen,避免视口/屏幕不一致检测。
|
|
536
|
-
*/
|
|
537
|
-
async syncViewportWithScreen(page) {
|
|
538
|
-
try {
|
|
539
|
-
const screen = await page.evaluate(() => ({
|
|
540
|
-
width: window.screen.width,
|
|
541
|
-
height: window.screen.height
|
|
542
|
-
}));
|
|
543
|
-
await page.setViewportSize({
|
|
544
|
-
width: screen.width,
|
|
545
|
-
height: screen.height
|
|
546
|
-
});
|
|
547
|
-
logger3.success("syncViewport", `size=${screen.width}x${screen.height}`);
|
|
548
|
-
} catch (e) {
|
|
549
|
-
logger3.warn(`syncViewport \u5931\u8D25: ${e.message}\uFF0C\u56DE\u9000\u5230 1920x1080`);
|
|
550
|
-
await page.setViewportSize({ width: 1920, height: 1080 });
|
|
551
|
-
}
|
|
552
|
-
},
|
|
553
432
|
/**
|
|
554
433
|
* 为 got-scraping 生成与浏览器一致的 TLS 指纹配置(桌面端)。
|
|
555
|
-
*
|
|
556
|
-
* @param {string} [userAgent]
|
|
557
434
|
*/
|
|
558
435
|
getTlsFingerprintOptions(userAgent = "", acceptLanguage = "") {
|
|
559
|
-
|
|
560
|
-
const fingerprint = buildFingerprintOptions(primaryLocale);
|
|
561
|
-
const os = getOperatingSystemsFromUserAgent(userAgent);
|
|
562
|
-
if (os.length > 0) fingerprint.operatingSystems = os;
|
|
563
|
-
return fingerprint;
|
|
436
|
+
return buildFingerprintOptions(BASE_CONFIG.locale);
|
|
564
437
|
},
|
|
565
438
|
/**
|
|
566
|
-
*
|
|
567
|
-
*
|
|
568
|
-
* @param {Record<string, string>} headers
|
|
569
|
-
* @returns {Record<string, string>}
|
|
439
|
+
* 规范化请求头
|
|
570
440
|
*/
|
|
571
441
|
applyLocaleHeaders(headers, acceptLanguage = "") {
|
|
572
|
-
if (
|
|
573
|
-
headers["accept-language"] = acceptLanguage;
|
|
574
|
-
} else if (!headers["accept-language"]) {
|
|
575
|
-
headers["accept-language"] = BASE_CONFIG.acceptLanguage;
|
|
442
|
+
if (!headers["accept-language"]) {
|
|
443
|
+
headers["accept-language"] = acceptLanguage || BASE_CONFIG.acceptLanguage;
|
|
576
444
|
}
|
|
577
445
|
return headers;
|
|
578
446
|
}
|
|
@@ -1008,18 +876,6 @@ var Launch = {
|
|
|
1008
876
|
ignoreDefaultArgs: ["--enable-automation"]
|
|
1009
877
|
};
|
|
1010
878
|
},
|
|
1011
|
-
/**
|
|
1012
|
-
* 获取增强版启动选项(用于高风险反爬场景)
|
|
1013
|
-
*/
|
|
1014
|
-
getAdvancedLaunchOptions(customArgs = []) {
|
|
1015
|
-
return {
|
|
1016
|
-
args: [
|
|
1017
|
-
...AntiCheat.getAdvancedLaunchArgs(),
|
|
1018
|
-
...customArgs
|
|
1019
|
-
],
|
|
1020
|
-
ignoreDefaultArgs: ["--enable-automation"]
|
|
1021
|
-
};
|
|
1022
|
-
},
|
|
1023
879
|
/**
|
|
1024
880
|
* 推荐的 Fingerprint Generator 选项
|
|
1025
881
|
* 确保生成的是桌面端、较新的 Chrome,以匹配我们的脚本逻辑
|
|
@@ -1553,8 +1409,6 @@ var Interception = {
|
|
|
1553
1409
|
try {
|
|
1554
1410
|
const reqHeaders = await request.allHeaders();
|
|
1555
1411
|
delete reqHeaders["host"];
|
|
1556
|
-
const currentAcceptLanguage = reqHeaders["accept-language"] || "";
|
|
1557
|
-
AntiCheat.applyLocaleHeaders(reqHeaders, currentAcceptLanguage);
|
|
1558
1412
|
const resolvedAcceptLanguage = reqHeaders["accept-language"] || "";
|
|
1559
1413
|
const userAgent = reqHeaders["user-agent"] || "";
|
|
1560
1414
|
const method = request.method();
|
|
@@ -1568,8 +1422,8 @@ var Interception = {
|
|
|
1568
1422
|
body: postData,
|
|
1569
1423
|
responseType: "buffer",
|
|
1570
1424
|
// 强制获取 Buffer
|
|
1571
|
-
//
|
|
1572
|
-
headerGeneratorOptions:
|
|
1425
|
+
// 移除手动 TLS 指纹配置,使用 got-scraping 默认的高质量指纹
|
|
1426
|
+
// headerGeneratorOptions: ...
|
|
1573
1427
|
// 使用共享的 Agent 单例(keepAlive: false,不会池化连接)
|
|
1574
1428
|
agent: {
|
|
1575
1429
|
http: SHARED_HTTP_AGENT,
|