tt-help-cli-ycl 1.3.79 → 1.3.80
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/attach.js +30 -5
- package/src/cli/explore.js +11 -4
- package/src/cli/refresh.js +11 -4
- package/src/cli/test-real-attach.js +0 -0
- package/src/lib/browser/cdp.js +2 -1
- package/src/lib/parse-ssr.mjs +35 -0
- package/src/lib/tiktok-scraper.mjs +18 -0
package/package.json
CHANGED
package/src/cli/attach.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { TikTokScraper } from "../lib/tiktok-scraper.mjs";
|
|
2
|
+
import { CDNBlockedError } from "../lib/parse-ssr.mjs";
|
|
2
3
|
import { proxy as configuredProxy } from "../lib/constants.js";
|
|
3
4
|
import v8 from "node:v8";
|
|
4
5
|
|
|
@@ -249,6 +250,7 @@ export async function handleAttach(options) {
|
|
|
249
250
|
|
|
250
251
|
let successCount = 0;
|
|
251
252
|
let failCount = 0;
|
|
253
|
+
let cdnBlockedCount = 0;
|
|
252
254
|
let needRestart = false;
|
|
253
255
|
|
|
254
256
|
// 收集抓取成功的任务,记录抓取失败的
|
|
@@ -257,13 +259,20 @@ export async function handleAttach(options) {
|
|
|
257
259
|
if (result.status === "fulfilled") {
|
|
258
260
|
const { uniqueId, info, error } = result.value;
|
|
259
261
|
if (error) {
|
|
260
|
-
if (
|
|
262
|
+
if (error instanceof CDNBlockedError) {
|
|
263
|
+
attachLog(` ⚠ @${uniqueId} CDN限流 (Access Denied)`);
|
|
264
|
+
cdnBlockedCount++;
|
|
265
|
+
failCount++;
|
|
266
|
+
} else if (isBrowserClosedError(error)) {
|
|
261
267
|
needRestart = true;
|
|
268
|
+
attachLog(` ✗ @${uniqueId} 浏览器断开: ${error.message}`);
|
|
269
|
+
failCount++;
|
|
270
|
+
} else {
|
|
271
|
+
attachLog(
|
|
272
|
+
` ✗ @${uniqueId} 获取失败: ${error.message || "未知错误"}`,
|
|
273
|
+
);
|
|
274
|
+
failCount++;
|
|
262
275
|
}
|
|
263
|
-
attachLog(
|
|
264
|
-
` ✗ @${uniqueId} 获取失败: ${error.message || "未知错误"}`,
|
|
265
|
-
);
|
|
266
|
-
failCount++;
|
|
267
276
|
} else if (info) {
|
|
268
277
|
// info 可能是 { error: true, statusCode: xxx } 表示 TikTok 给了明确响应
|
|
269
278
|
if (info.error) {
|
|
@@ -327,6 +336,22 @@ export async function handleAttach(options) {
|
|
|
327
336
|
|
|
328
337
|
attachLog(` 本批结果: ${successCount} 成功, ${failCount} 失败\n`);
|
|
329
338
|
|
|
339
|
+
// CDN 限流比例超过 30% 时,冷却 + 重启浏览器
|
|
340
|
+
const cdnRatio = cdnBlockedCount / tasks.length;
|
|
341
|
+
if (cdnRatio > 0.3) {
|
|
342
|
+
const coolDownSeconds = cdnRatio > 0.8 ? 120 : 60;
|
|
343
|
+
attachLog(
|
|
344
|
+
` [Attach] CDN限流比例 ${(cdnRatio * 100).toFixed(0)}% (${cdnBlockedCount}/${tasks.length}),冷却 ${coolDownSeconds} 秒后重启浏览器...`,
|
|
345
|
+
);
|
|
346
|
+
await new Promise((r) => setTimeout(r, coolDownSeconds * 1000));
|
|
347
|
+
await recycleScraper(
|
|
348
|
+
scraper,
|
|
349
|
+
`CDN限流比例过高 (${cdnBlockedCount}/${tasks.length})`,
|
|
350
|
+
);
|
|
351
|
+
browserRestartCount++;
|
|
352
|
+
taskBatchCount = 0;
|
|
353
|
+
}
|
|
354
|
+
|
|
330
355
|
const heap = getHeapUsage();
|
|
331
356
|
if (heap.ratio >= HEAP_RESTART_RATIO) {
|
|
332
357
|
await recycleScraper(
|
package/src/cli/explore.js
CHANGED
|
@@ -29,6 +29,7 @@ import os from "os";
|
|
|
29
29
|
|
|
30
30
|
const MAX_RETRY_WAIT = 5 * 60 * 1000;
|
|
31
31
|
const STARTUP_TIKTOK_URL = "https://www.tiktok.com/@ycl5007";
|
|
32
|
+
const PAGE_GOTO_TIMEOUT = 60000; // 页面导航超时 60 秒(账户切换后需要更长时间)
|
|
32
33
|
|
|
33
34
|
async function withRetry(label, fn) {
|
|
34
35
|
let backoff = 1000;
|
|
@@ -181,8 +182,11 @@ export async function handleExplore(options) {
|
|
|
181
182
|
const page = await getOrCreatePage(browser);
|
|
182
183
|
|
|
183
184
|
// 先导航到 TikTok 页面,再检测登录状态
|
|
184
|
-
await
|
|
185
|
-
|
|
185
|
+
await withRetry("启动页面导航", async () => {
|
|
186
|
+
await page.goto(STARTUP_TIKTOK_URL, {
|
|
187
|
+
waitUntil: "domcontentloaded",
|
|
188
|
+
timeout: PAGE_GOTO_TIMEOUT,
|
|
189
|
+
});
|
|
186
190
|
});
|
|
187
191
|
|
|
188
192
|
// 检测登录状态(启动时只检测一次)
|
|
@@ -235,8 +239,11 @@ export async function handleExplore(options) {
|
|
|
235
239
|
`[健康检查] 已切换到端口 ${nextAccount.port}${effectiveProxy ? ", 代理: " + effectiveProxy : ""}`,
|
|
236
240
|
);
|
|
237
241
|
// 切换账户后先导航到 TikTok 页面,再重新检测登录状态
|
|
238
|
-
await
|
|
239
|
-
|
|
242
|
+
await withRetry("账户切换后页面导航", async () => {
|
|
243
|
+
await page.goto(STARTUP_TIKTOK_URL, {
|
|
244
|
+
waitUntil: "domcontentloaded",
|
|
245
|
+
timeout: PAGE_GOTO_TIMEOUT,
|
|
246
|
+
});
|
|
240
247
|
});
|
|
241
248
|
loggedIn = await safeCheckLogin(page);
|
|
242
249
|
console.error(
|
package/src/cli/refresh.js
CHANGED
|
@@ -28,6 +28,7 @@ import os from "os";
|
|
|
28
28
|
|
|
29
29
|
const MAX_RETRY_WAIT = 5 * 60 * 1000;
|
|
30
30
|
const STARTUP_TIKTOK_URL = "https://www.tiktok.com/@ycl5007";
|
|
31
|
+
const PAGE_GOTO_TIMEOUT = 60000; // 页面导航超时 60 秒(账户切换后需要更长时间)
|
|
31
32
|
|
|
32
33
|
async function withRetry(label, fn) {
|
|
33
34
|
let backoff = 1000;
|
|
@@ -181,8 +182,11 @@ export async function handleRefresh(options) {
|
|
|
181
182
|
const page = await getOrCreatePage(browser);
|
|
182
183
|
|
|
183
184
|
// 导航到 TikTok 页面
|
|
184
|
-
await
|
|
185
|
-
|
|
185
|
+
await withRetry("启动页面导航", async () => {
|
|
186
|
+
await page.goto(STARTUP_TIKTOK_URL, {
|
|
187
|
+
waitUntil: "domcontentloaded",
|
|
188
|
+
timeout: PAGE_GOTO_TIMEOUT,
|
|
189
|
+
});
|
|
186
190
|
});
|
|
187
191
|
|
|
188
192
|
// 检测登录状态
|
|
@@ -233,8 +237,11 @@ export async function handleRefresh(options) {
|
|
|
233
237
|
cdpOptions.proxyServer = effectiveProxy;
|
|
234
238
|
}
|
|
235
239
|
console.error(`[健康检查] 已切换到端口 ${nextAccount.port}`);
|
|
236
|
-
await
|
|
237
|
-
|
|
240
|
+
await withRetry("账户切换后页面导航", async () => {
|
|
241
|
+
await page.goto(STARTUP_TIKTOK_URL, {
|
|
242
|
+
waitUntil: "domcontentloaded",
|
|
243
|
+
timeout: PAGE_GOTO_TIMEOUT,
|
|
244
|
+
});
|
|
238
245
|
});
|
|
239
246
|
loggedIn = await isLoggedIn(page);
|
|
240
247
|
console.error(
|
|
File without changes
|
package/src/lib/browser/cdp.js
CHANGED
|
@@ -277,7 +277,8 @@ export async function switchAccount(oldAccount, newAccount, proxyServer) {
|
|
|
277
277
|
|
|
278
278
|
const browser = await ensureBrowserReady(newCdpOptions);
|
|
279
279
|
|
|
280
|
-
|
|
280
|
+
// 等待浏览器完全稳定(Windows 下 Edge 启动后需要更长时间)
|
|
281
|
+
await new Promise((r) => setTimeout(r, 15000));
|
|
281
282
|
|
|
282
283
|
return browser;
|
|
283
284
|
}
|
package/src/lib/parse-ssr.mjs
CHANGED
|
@@ -1,3 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CDN 限流错误(Akamai Access Denied)
|
|
3
|
+
*/
|
|
4
|
+
export class CDNBlockedError extends Error {
|
|
5
|
+
constructor(message = "CDN限流 (Access Denied)", reference) {
|
|
6
|
+
super(message);
|
|
7
|
+
this.name = "CDNBlockedError";
|
|
8
|
+
this.reference = reference;
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* 检测 HTML 是否为 CDN Access Denied
|
|
14
|
+
* 返回 { isBlocked: true, reference: "xxx" } 或 null
|
|
15
|
+
*/
|
|
16
|
+
export function detectAccessDenied(rawHtml) {
|
|
17
|
+
if (!rawHtml || typeof rawHtml !== "string") return null;
|
|
18
|
+
if (!rawHtml.includes("Access Denied")) return null;
|
|
19
|
+
const refMatch = rawHtml.match(/Reference\s*#\s*([\w.]+)/);
|
|
20
|
+
return { isBlocked: true, reference: refMatch ? refMatch[1] : null };
|
|
21
|
+
}
|
|
22
|
+
|
|
1
23
|
/**
|
|
2
24
|
* 判断失败是否可重试
|
|
3
25
|
* - 有 statusCode(无论值是多少):TikTok 给了明确响应,不可重试
|
|
@@ -5,6 +27,10 @@
|
|
|
5
27
|
*/
|
|
6
28
|
export function isRetryableFailure(rawHtml) {
|
|
7
29
|
if (!rawHtml || typeof rawHtml !== "string") return false;
|
|
30
|
+
// Access Denied = CDN 限流 = 可重试
|
|
31
|
+
if (detectAccessDenied(rawHtml)) {
|
|
32
|
+
return true;
|
|
33
|
+
}
|
|
8
34
|
// 没有 SSR 标记 = 空壳 HTML = 可重试
|
|
9
35
|
if (!rawHtml.includes("__UNIVERSAL_DATA_FOR_REHYDRATION__")) {
|
|
10
36
|
return true;
|
|
@@ -49,6 +75,15 @@ function parseSSR(rawHtml) {
|
|
|
49
75
|
}
|
|
50
76
|
|
|
51
77
|
export function parseUserInfo(rawHtml) {
|
|
78
|
+
// 先检查 CDN 限流
|
|
79
|
+
const denied = detectAccessDenied(rawHtml);
|
|
80
|
+
if (denied) {
|
|
81
|
+
throw new CDNBlockedError(
|
|
82
|
+
`CDN限流 (Access Denied, ref:${denied.reference || "N/A"})`,
|
|
83
|
+
denied.reference,
|
|
84
|
+
);
|
|
85
|
+
}
|
|
86
|
+
|
|
52
87
|
const data = parseSSR(rawHtml);
|
|
53
88
|
if (!data) return null;
|
|
54
89
|
const scopeKeys = data.__DEFAULT_SCOPE__
|
|
@@ -4,6 +4,8 @@ import {
|
|
|
4
4
|
parseUserInfo,
|
|
5
5
|
parseVideoInfo,
|
|
6
6
|
isRetryableFailure,
|
|
7
|
+
CDNBlockedError,
|
|
8
|
+
detectAccessDenied,
|
|
7
9
|
} from "./parse-ssr.mjs";
|
|
8
10
|
|
|
9
11
|
const DEFAULT_POOL_SIZE = 3;
|
|
@@ -225,6 +227,14 @@ export class TikTokScraper {
|
|
|
225
227
|
`https://www.tiktok.com/@${uniqueId}`,
|
|
226
228
|
slot,
|
|
227
229
|
);
|
|
230
|
+
// CDN 限流立即抛出,不重试
|
|
231
|
+
if (detectAccessDenied(rawHtml)) {
|
|
232
|
+
const denied = detectAccessDenied(rawHtml);
|
|
233
|
+
throw new CDNBlockedError(
|
|
234
|
+
`CDN限流 (Access Denied, ref:${denied.reference || "N/A"})`,
|
|
235
|
+
denied.reference,
|
|
236
|
+
);
|
|
237
|
+
}
|
|
228
238
|
let result = parseUserInfo(rawHtml);
|
|
229
239
|
for (let attempt = 1; !result && attempt <= maxRetries; attempt++) {
|
|
230
240
|
// 检查是否值得重试:用户异常/不存在则跳过重试
|
|
@@ -239,6 +249,14 @@ export class TikTokScraper {
|
|
|
239
249
|
`https://www.tiktok.com/@${uniqueId}`,
|
|
240
250
|
slot,
|
|
241
251
|
);
|
|
252
|
+
// 重试中也检查 CDN 限流
|
|
253
|
+
if (detectAccessDenied(rawHtml)) {
|
|
254
|
+
const denied = detectAccessDenied(rawHtml);
|
|
255
|
+
throw new CDNBlockedError(
|
|
256
|
+
`CDN限流 (Access Denied, ref:${denied.reference || "N/A"})`,
|
|
257
|
+
denied.reference,
|
|
258
|
+
);
|
|
259
|
+
}
|
|
242
260
|
result = parseUserInfo(rawHtml);
|
|
243
261
|
}
|
|
244
262
|
return result || null;
|