tt-help-cli-ycl 1.3.39 → 1.3.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/attach.js +112 -51
- package/src/cli/explore.js +64 -29
- package/src/lib/api-interceptor.js +6 -6
- package/src/lib/args.js +4 -0
- package/src/lib/constants.js +1 -0
- package/src/lib/page-error-detector.js +6 -2
- package/src/lib/tiktok-scraper.mjs +25 -2
- package/src/scraper/explore-core.js +9 -0
- package/src/scraper/modules/follow-extractor.js +31 -4
- package/src/videos/core.js +1 -1
package/package.json
CHANGED
package/src/cli/attach.js
CHANGED
|
@@ -1,6 +1,19 @@
|
|
|
1
1
|
import { TikTokScraper } from "../lib/tiktok-scraper.mjs";
|
|
2
|
+
import v8 from "node:v8";
|
|
2
3
|
|
|
3
4
|
const MAX_RETRY_WAIT = 5 * 60 * 1000;
|
|
5
|
+
const HEAP_RESTART_RATIO = 0.72;
|
|
6
|
+
const MAX_TASK_BATCHES_BEFORE_RESTART = 200;
|
|
7
|
+
|
|
8
|
+
function formatNow() {
|
|
9
|
+
return new Date().toLocaleString("zh-CN", {
|
|
10
|
+
hour12: false,
|
|
11
|
+
});
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function attachLog(message = "") {
|
|
15
|
+
console.error(`[${formatNow()}] ${message}`);
|
|
16
|
+
}
|
|
4
17
|
|
|
5
18
|
async function withRetry(label, fn) {
|
|
6
19
|
let backoff = 1000;
|
|
@@ -8,7 +21,7 @@ async function withRetry(label, fn) {
|
|
|
8
21
|
try {
|
|
9
22
|
return await fn();
|
|
10
23
|
} catch (err) {
|
|
11
|
-
|
|
24
|
+
attachLog(
|
|
12
25
|
`[连接] ${label} 失败: ${err.message},${backoff / 1000}秒后重试...`,
|
|
13
26
|
);
|
|
14
27
|
await new Promise((r) => setTimeout(r, backoff));
|
|
@@ -46,40 +59,52 @@ function isBrowserClosedError(err) {
|
|
|
46
59
|
);
|
|
47
60
|
}
|
|
48
61
|
|
|
62
|
+
function getHeapUsage() {
|
|
63
|
+
const heapUsed = process.memoryUsage().heapUsed;
|
|
64
|
+
const heapLimit = v8.getHeapStatistics().heap_size_limit;
|
|
65
|
+
return {
|
|
66
|
+
heapUsed,
|
|
67
|
+
heapLimit,
|
|
68
|
+
ratio: heapLimit > 0 ? heapUsed / heapLimit : 0,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async function recycleScraper(scraper, reason) {
|
|
73
|
+
attachLog(`[Attach] ${reason},正在重启浏览器实例...`);
|
|
74
|
+
await scraper.restart();
|
|
75
|
+
attachLog("[Attach] 浏览器实例重启完成");
|
|
76
|
+
}
|
|
77
|
+
|
|
49
78
|
export async function handleAttach(options) {
|
|
50
79
|
const { attachParallel, attachInterval, serverUrl, showHelp } = options;
|
|
51
80
|
let shuttingDown = false;
|
|
52
81
|
let forceExitTimer = null;
|
|
53
82
|
|
|
54
83
|
if (showHelp) {
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
);
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
console.error(
|
|
62
|
-
" -i, --interval <N> 无任务时轮询间隔,单位秒(默认: 10)",
|
|
63
|
-
);
|
|
64
|
-
console.error(
|
|
84
|
+
attachLog("用法: tt-help attach [-p 并行数] [-i 间隔秒数] [-s 服务端地址]");
|
|
85
|
+
attachLog("");
|
|
86
|
+
attachLog("参数:");
|
|
87
|
+
attachLog(" -p, --parallel <N> 并行抓取数(默认: 1)");
|
|
88
|
+
attachLog(" -i, --interval <N> 无任务时轮询间隔,单位秒(默认: 10)");
|
|
89
|
+
attachLog(
|
|
65
90
|
" -s, --server <URL> 服务端地址(默认: http://127.0.0.1:3001)",
|
|
66
91
|
);
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
92
|
+
attachLog("");
|
|
93
|
+
attachLog("说明:");
|
|
94
|
+
attachLog(
|
|
70
95
|
" 后台轮询服务端 /api/user-update-tasks 接口,自动抓取 TikTok 用户信息",
|
|
71
96
|
);
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
97
|
+
attachLog(" 抓取完成后通过 POST /api/user-info-batch 批量回传结果");
|
|
98
|
+
attachLog(" 浏览器崩溃时自动重启,支持长时间无人值守运行");
|
|
99
|
+
attachLog("");
|
|
100
|
+
attachLog("示例:");
|
|
101
|
+
attachLog(" tt-help attach");
|
|
102
|
+
attachLog(" tt-help attach -p 5 -i 10");
|
|
103
|
+
attachLog(" tt-help attach -p 3 -i 5 -s http://127.0.0.1:3001");
|
|
79
104
|
return;
|
|
80
105
|
}
|
|
81
106
|
|
|
82
|
-
|
|
107
|
+
attachLog(
|
|
83
108
|
`[Attach] 并行数: ${attachParallel}, 空闲间隔: ${attachInterval}秒, 服务端: ${serverUrl}`,
|
|
84
109
|
);
|
|
85
110
|
|
|
@@ -88,18 +113,11 @@ export async function handleAttach(options) {
|
|
|
88
113
|
if (shuttingDown) return;
|
|
89
114
|
shuttingDown = true;
|
|
90
115
|
forceExitTimer = setTimeout(() => {
|
|
91
|
-
|
|
116
|
+
attachLog("[Attach] 关闭超时,强制退出");
|
|
92
117
|
process.exit(0);
|
|
93
|
-
},
|
|
118
|
+
}, 30000);
|
|
94
119
|
forceExitTimer.unref?.();
|
|
95
|
-
|
|
96
|
-
await scraper.close().catch(() => {});
|
|
97
|
-
if (forceExitTimer) {
|
|
98
|
-
clearTimeout(forceExitTimer);
|
|
99
|
-
forceExitTimer = null;
|
|
100
|
-
}
|
|
101
|
-
console.error("[Attach] 已退出");
|
|
102
|
-
process.exit(0);
|
|
120
|
+
attachLog(`\n[Attach] 收到 ${signal},等待当前任务完成后退出...`);
|
|
103
121
|
};
|
|
104
122
|
|
|
105
123
|
const onSigint = () => {
|
|
@@ -114,21 +132,34 @@ export async function handleAttach(options) {
|
|
|
114
132
|
|
|
115
133
|
try {
|
|
116
134
|
await scraper.init();
|
|
117
|
-
|
|
135
|
+
attachLog("[Attach] 浏览器初始化完成,开始循环接收任务...");
|
|
118
136
|
|
|
119
137
|
let loopCount = 0;
|
|
120
138
|
let browserRestartCount = 0;
|
|
139
|
+
let taskBatchCount = 0;
|
|
121
140
|
|
|
122
141
|
while (!shuttingDown) {
|
|
123
142
|
loopCount++;
|
|
124
143
|
|
|
125
144
|
// 检查浏览器是否存活,不存活则重启
|
|
126
145
|
if (!scraper.isAlive()) {
|
|
127
|
-
|
|
146
|
+
attachLog(
|
|
128
147
|
`[Attach] 浏览器已关闭,正在重启 (${++browserRestartCount})...`,
|
|
129
148
|
);
|
|
130
149
|
await scraper.restart();
|
|
131
|
-
|
|
150
|
+
attachLog("[Attach] 浏览器重启完成");
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// 收到退出信号且当前无任务正在处理,则退出
|
|
154
|
+
if (shuttingDown) {
|
|
155
|
+
attachLog("[Attach] 当前任务已完成,正在关闭浏览器...");
|
|
156
|
+
await scraper.close().catch(() => {});
|
|
157
|
+
if (forceExitTimer) {
|
|
158
|
+
clearTimeout(forceExitTimer);
|
|
159
|
+
forceExitTimer = null;
|
|
160
|
+
}
|
|
161
|
+
attachLog("[Attach] 已退出");
|
|
162
|
+
process.exit(0);
|
|
132
163
|
}
|
|
133
164
|
|
|
134
165
|
const { total, tasks } = await apiGet(
|
|
@@ -136,21 +167,33 @@ export async function handleAttach(options) {
|
|
|
136
167
|
);
|
|
137
168
|
|
|
138
169
|
if (!tasks || tasks.length === 0) {
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
170
|
+
const heap = getHeapUsage();
|
|
171
|
+
if (heap.ratio >= HEAP_RESTART_RATIO) {
|
|
172
|
+
await recycleScraper(
|
|
173
|
+
scraper,
|
|
174
|
+
`空闲时堆占用 ${(heap.ratio * 100).toFixed(1)}%`,
|
|
142
175
|
);
|
|
143
176
|
}
|
|
177
|
+
if (loopCount === 1) {
|
|
178
|
+
attachLog(`[Attach] 当前无待更新任务,${attachInterval}秒后重试...`);
|
|
179
|
+
}
|
|
180
|
+
// 空闲等待时可中断
|
|
144
181
|
await new Promise((r) => setTimeout(r, attachInterval * 1000));
|
|
145
182
|
continue;
|
|
146
183
|
}
|
|
147
184
|
|
|
148
|
-
|
|
185
|
+
attachLog(`\n[Attach] 获取到 ${tasks.length} 个待更新任务...`);
|
|
186
|
+
taskBatchCount++;
|
|
187
|
+
|
|
188
|
+
// 收到退出信号但已有任务在跑,继续完成当前批次
|
|
189
|
+
if (shuttingDown) {
|
|
190
|
+
attachLog("[Attach] 正在处理当前批次任务,完成后退出...");
|
|
191
|
+
}
|
|
149
192
|
|
|
150
193
|
const results = await Promise.allSettled(
|
|
151
194
|
tasks.map(async (task) => {
|
|
152
195
|
const uniqueId = task.uniqueId;
|
|
153
|
-
|
|
196
|
+
attachLog(` → 获取 @${uniqueId} 的用户信息...`);
|
|
154
197
|
try {
|
|
155
198
|
const info = await scraper.getUserInfo(uniqueId);
|
|
156
199
|
return { uniqueId, info, error: null };
|
|
@@ -173,18 +216,18 @@ export async function handleAttach(options) {
|
|
|
173
216
|
if (isBrowserClosedError(error)) {
|
|
174
217
|
needRestart = true;
|
|
175
218
|
}
|
|
176
|
-
|
|
219
|
+
attachLog(
|
|
177
220
|
` ✗ @${uniqueId} 获取失败: ${error.message || "未知错误"}`,
|
|
178
221
|
);
|
|
179
222
|
failCount++;
|
|
180
223
|
} else if (info) {
|
|
181
224
|
successTasks.push({ uniqueId, info });
|
|
182
225
|
} else {
|
|
183
|
-
|
|
226
|
+
attachLog(` ✗ @${uniqueId} 未获取到用户信息`);
|
|
184
227
|
failCount++;
|
|
185
228
|
}
|
|
186
229
|
} else {
|
|
187
|
-
|
|
230
|
+
attachLog(
|
|
188
231
|
` ✗ 任务执行异常: ${result.reason?.message || "未知错误"}`,
|
|
189
232
|
);
|
|
190
233
|
failCount++;
|
|
@@ -201,40 +244,58 @@ export async function handleAttach(options) {
|
|
|
201
244
|
for (const r of batchRet.results) {
|
|
202
245
|
if (r.ok) {
|
|
203
246
|
successCount++;
|
|
204
|
-
|
|
247
|
+
attachLog(` ✓ @${r.uniqueId} 已提交更新`);
|
|
205
248
|
} else {
|
|
206
249
|
failCount++;
|
|
207
|
-
|
|
250
|
+
attachLog(` ✗ @${r.uniqueId} 提交失败: ${r.error}`);
|
|
208
251
|
}
|
|
209
252
|
}
|
|
210
253
|
} else {
|
|
211
254
|
successCount = successTasks.length;
|
|
212
|
-
|
|
255
|
+
attachLog(` ✓ 批量提交完成 (${successTasks.length} 条)`);
|
|
213
256
|
}
|
|
214
257
|
} catch (err) {
|
|
215
258
|
failCount += successTasks.length;
|
|
216
|
-
|
|
259
|
+
attachLog(` ✗ 批量提交失败: ${err.message}`);
|
|
217
260
|
}
|
|
218
261
|
}
|
|
219
262
|
|
|
220
|
-
|
|
263
|
+
attachLog(` 本批结果: ${successCount} 成功, ${failCount} 失败\n`);
|
|
264
|
+
|
|
265
|
+
const heap = getHeapUsage();
|
|
266
|
+
if (heap.ratio >= HEAP_RESTART_RATIO) {
|
|
267
|
+
await recycleScraper(
|
|
268
|
+
scraper,
|
|
269
|
+
`本批后堆占用 ${(heap.ratio * 100).toFixed(1)}%`,
|
|
270
|
+
);
|
|
271
|
+
browserRestartCount++;
|
|
272
|
+
taskBatchCount = 0;
|
|
273
|
+
} else if (taskBatchCount >= MAX_TASK_BATCHES_BEFORE_RESTART) {
|
|
274
|
+
await recycleScraper(scraper, `已连续处理 ${taskBatchCount} 批任务`);
|
|
275
|
+
browserRestartCount++;
|
|
276
|
+
taskBatchCount = 0;
|
|
277
|
+
}
|
|
221
278
|
|
|
222
279
|
if (needRestart) {
|
|
223
|
-
|
|
280
|
+
attachLog("[Attach] 检测到浏览器异常,将在下一轮重启...");
|
|
224
281
|
}
|
|
225
282
|
|
|
226
283
|
await new Promise((r) => setTimeout(r, 500));
|
|
227
284
|
}
|
|
228
285
|
} catch (err) {
|
|
229
|
-
|
|
286
|
+
attachLog(`[Attach] 运行异常: ${err.message}`);
|
|
230
287
|
throw err;
|
|
231
288
|
} finally {
|
|
289
|
+
// 正常退出(非信号触发)或超时强制退出时的清理
|
|
232
290
|
if (forceExitTimer) {
|
|
233
291
|
clearTimeout(forceExitTimer);
|
|
234
292
|
forceExitTimer = null;
|
|
235
293
|
}
|
|
236
294
|
process.removeListener("SIGINT", onSigint);
|
|
237
295
|
process.removeListener("SIGTERM", onSigterm);
|
|
238
|
-
|
|
296
|
+
// 只有在非信号退出时才关闭浏览器(信号退出已在循环内关闭)
|
|
297
|
+
if (!shuttingDown) {
|
|
298
|
+
await scraper.close().catch(() => {});
|
|
299
|
+
}
|
|
239
300
|
}
|
|
240
301
|
}
|
package/src/cli/explore.js
CHANGED
|
@@ -62,6 +62,7 @@ export async function handleExplore(options) {
|
|
|
62
62
|
const {
|
|
63
63
|
exploreUsernames,
|
|
64
64
|
explorePreset,
|
|
65
|
+
exploreInterval,
|
|
65
66
|
exploreMaxComments,
|
|
66
67
|
exploreMaxGuess,
|
|
67
68
|
exploreEnableFollow,
|
|
@@ -123,6 +124,7 @@ export async function handleExplore(options) {
|
|
|
123
124
|
if (exploreJobLocations) console.error(`任务国家: ${exploreJobLocations}`);
|
|
124
125
|
console.error(`视频采集: ${exploreMaxVideos || 1}`);
|
|
125
126
|
console.error(`关注/粉丝: ${exploreEnableFollow ? "启用" : "禁用"}`);
|
|
127
|
+
console.error(`空闲间隔: ${exploreInterval} 秒`);
|
|
126
128
|
console.error(`服务器: ${serverUrl}(断开会自动重连)`);
|
|
127
129
|
if (exploreMaxUsers > 0) console.error(`上限: ${exploreMaxUsers} 个用户`);
|
|
128
130
|
|
|
@@ -239,7 +241,11 @@ export async function handleExplore(options) {
|
|
|
239
241
|
? `${serverUrl}/api/job?userId=${encodeURIComponent(userId)}&locations=${encodeURIComponent(exploreJobLocations)}&loggedIn=${loggedIn}`
|
|
240
242
|
: `${serverUrl}/api/job?userId=${encodeURIComponent(userId)}&loggedIn=${loggedIn}`;
|
|
241
243
|
const job = await apiGet(jobQuery);
|
|
242
|
-
if (!job.hasJob)
|
|
244
|
+
if (!job.hasJob) {
|
|
245
|
+
console.error(`\n[Explore] 当前无任务,${exploreInterval} 秒后重试...`);
|
|
246
|
+
await new Promise((r) => setTimeout(r, exploreInterval * 1000));
|
|
247
|
+
continue;
|
|
248
|
+
}
|
|
243
249
|
|
|
244
250
|
const username = job.user.uniqueId;
|
|
245
251
|
processedCount++;
|
|
@@ -393,38 +399,67 @@ export async function handleExplore(options) {
|
|
|
393
399
|
if (result.error) {
|
|
394
400
|
consecutiveNetworkErrors++;
|
|
395
401
|
errorCount++;
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
: "other";
|
|
404
|
-
await withRetry("report error", () =>
|
|
405
|
-
apiPost(`${serverUrl}/api/error-report`, {
|
|
406
|
-
userId,
|
|
402
|
+
|
|
403
|
+
// 临时性错误:自动重试一次
|
|
404
|
+
if (result.retryable) {
|
|
405
|
+
console.error(` [临时错误] 等待 5 秒后重试 @${username}...`);
|
|
406
|
+
await new Promise((r) => setTimeout(r, 5000));
|
|
407
|
+
result = await processExplore(
|
|
408
|
+
page,
|
|
407
409
|
username,
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
410
|
+
{
|
|
411
|
+
maxVideos: exploreMaxVideos,
|
|
412
|
+
enableFollow: exploreEnableFollow,
|
|
413
|
+
loggedIn,
|
|
414
|
+
maxFollowing: exploreMaxFollowing,
|
|
415
|
+
maxFollowers: exploreMaxFollowers,
|
|
416
|
+
location: exploreLocation,
|
|
417
|
+
browser,
|
|
418
|
+
},
|
|
419
|
+
console.error,
|
|
420
|
+
);
|
|
421
|
+
// 重试成功后继续正常流程
|
|
422
|
+
if (!result.error) {
|
|
423
|
+
consecutiveNetworkErrors = 0;
|
|
424
|
+
errorCount--;
|
|
420
425
|
}
|
|
421
|
-
continue;
|
|
422
426
|
}
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
427
|
+
|
|
428
|
+
if (result.error) {
|
|
429
|
+
// 上报错误(重试后仍有错误才上报)
|
|
430
|
+
await apiPost(`${serverUrl}/api/job/${username}`, {
|
|
431
|
+
error: result.error,
|
|
432
|
+
});
|
|
433
|
+
const errorType = result.error.startsWith("被封:")
|
|
434
|
+
? "被封"
|
|
435
|
+
: consecutiveNetworkErrors > 1
|
|
436
|
+
? "network"
|
|
437
|
+
: "other";
|
|
438
|
+
await withRetry("report error", () =>
|
|
439
|
+
apiPost(`${serverUrl}/api/error-report`, {
|
|
440
|
+
userId,
|
|
441
|
+
username,
|
|
442
|
+
errorType,
|
|
443
|
+
errorMessage: result.error,
|
|
444
|
+
stage: "process",
|
|
445
|
+
errorStack: result.errorStack || "",
|
|
446
|
+
}),
|
|
447
|
+
).catch(() => {});
|
|
448
|
+
if (errorType === "被封") {
|
|
449
|
+
blockedCount++;
|
|
450
|
+
console.error(` [被封] 累计 ${blockedCount} 次`);
|
|
451
|
+
if (blockedCount >= 3) {
|
|
452
|
+
await handleAccountSwitch(`账号被封累计 ${blockedCount} 次`);
|
|
453
|
+
blockedCount = 0;
|
|
454
|
+
}
|
|
455
|
+
continue;
|
|
456
|
+
}
|
|
457
|
+
if (exploreMaxUsers > 0 && processedCount >= exploreMaxUsers) {
|
|
458
|
+
console.error(`\n已达上限 ${exploreMaxUsers} 个用户,停止处理`);
|
|
459
|
+
break;
|
|
460
|
+
}
|
|
461
|
+
continue;
|
|
426
462
|
}
|
|
427
|
-
continue;
|
|
428
463
|
}
|
|
429
464
|
|
|
430
465
|
if (result.captchaDetected) {
|
|
@@ -42,7 +42,7 @@ async function processAPIResponse(
|
|
|
42
42
|
}, newUrl);
|
|
43
43
|
} catch (e) {
|
|
44
44
|
if (
|
|
45
|
-
e.message.includes(
|
|
45
|
+
e.message.includes("Execution context was destroyed") &&
|
|
46
46
|
i < retries - 1
|
|
47
47
|
) {
|
|
48
48
|
await delay(500 * (i + 1), 500 * (i + 1));
|
|
@@ -102,10 +102,10 @@ async function fetchUserVideosAPI(page, username, maxVideos, log) {
|
|
|
102
102
|
let interceptionError = null;
|
|
103
103
|
|
|
104
104
|
try {
|
|
105
|
-
await page.goto(
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
);
|
|
105
|
+
await page.goto(`https://www.tiktok.com/@${username}`, {
|
|
106
|
+
waitUntil: "domcontentloaded",
|
|
107
|
+
timeout: 30000,
|
|
108
|
+
});
|
|
109
109
|
await assertPageUrl(page, `@${username}`);
|
|
110
110
|
|
|
111
111
|
const response = await page.waitForResponse(
|
|
@@ -158,4 +158,4 @@ async function fetchUserVideosAPI(page, username, maxVideos, log) {
|
|
|
158
158
|
return new Map();
|
|
159
159
|
}
|
|
160
160
|
|
|
161
|
-
export { fetchUserVideosAPI };
|
|
161
|
+
export { fetchUserVideosAPI };
|
package/src/lib/args.js
CHANGED
|
@@ -165,6 +165,7 @@ function parseAutoArgs(args) {
|
|
|
165
165
|
function parseExploreArgs(args) {
|
|
166
166
|
let serverUrl = defaultServer;
|
|
167
167
|
let explorePreset = "normal";
|
|
168
|
+
let exploreInterval = 10;
|
|
168
169
|
let exploreMaxComments = 10;
|
|
169
170
|
let exploreMaxGuess = 0;
|
|
170
171
|
let exploreEnableFollow = true;
|
|
@@ -186,6 +187,8 @@ function parseExploreArgs(args) {
|
|
|
186
187
|
const arg = args[i];
|
|
187
188
|
if (arg === "--server") {
|
|
188
189
|
serverUrl = args[++i];
|
|
190
|
+
} else if (arg === "-i" || arg === "--interval") {
|
|
191
|
+
exploreInterval = parseInt(args[++i], 10) || 10;
|
|
189
192
|
} else if (arg === "--max-comments") {
|
|
190
193
|
exploreMaxComments = parseInt(args[++i]) || 0;
|
|
191
194
|
} else if (arg === "--max-guess") {
|
|
@@ -239,6 +242,7 @@ function parseExploreArgs(args) {
|
|
|
239
242
|
subcommand: "explore",
|
|
240
243
|
exploreUsernames: usernames,
|
|
241
244
|
explorePreset,
|
|
245
|
+
exploreInterval,
|
|
242
246
|
exploreMaxComments,
|
|
243
247
|
exploreMaxGuess,
|
|
244
248
|
exploreEnableFollow,
|
package/src/lib/constants.js
CHANGED
|
@@ -131,6 +131,7 @@ const HELP_TEXT = [
|
|
|
131
131
|
" --max-following <数量> 最大获取关注数,默认 50(同时设置粉丝数)",
|
|
132
132
|
" --max-followers <数量> 最大获取粉丝数,默认 50",
|
|
133
133
|
" --max-users <数量> 最大处理用户数,默认无限制",
|
|
134
|
+
" -i, --interval <秒数> 无任务时轮询间隔,默认 10 秒",
|
|
134
135
|
" --port <端口号> 固定 CDP 端口(调试用,关闭自动轮换)",
|
|
135
136
|
" --base-port <端口号> 起始端口,默认 9222",
|
|
136
137
|
" --port-count <数量> 端口数量(账户数),默认 10",
|
|
@@ -56,7 +56,9 @@ const PATTERNS = {
|
|
|
56
56
|
|
|
57
57
|
export async function detectPageError(page) {
|
|
58
58
|
return page.evaluate((patterns) => {
|
|
59
|
-
const
|
|
59
|
+
const body = document.body;
|
|
60
|
+
if (!body) return null;
|
|
61
|
+
const bodyText = body.innerText;
|
|
60
62
|
const lower = bodyText.toLowerCase();
|
|
61
63
|
|
|
62
64
|
for (const [type, phrases] of Object.entries(patterns)) {
|
|
@@ -81,7 +83,9 @@ export async function detectPageErrorWithWait(page, timeout = 8000) {
|
|
|
81
83
|
try {
|
|
82
84
|
const handle = await page.waitForFunction(
|
|
83
85
|
(patterns) => {
|
|
84
|
-
const
|
|
86
|
+
const body = document.body;
|
|
87
|
+
if (!body) return null;
|
|
88
|
+
const bodyText = body.innerText;
|
|
85
89
|
const lower = bodyText.toLowerCase();
|
|
86
90
|
|
|
87
91
|
for (const [type, phrases] of Object.entries(patterns)) {
|
|
@@ -6,6 +6,7 @@ const DEFAULT_POOL_SIZE = 3;
|
|
|
6
6
|
const DEFAULT_WAF_TTL = 120000;
|
|
7
7
|
const DEFAULT_WARM_URL = "https://www.tiktok.com/@nike";
|
|
8
8
|
const BROWSER_CLOSE_TIMEOUT = 5000;
|
|
9
|
+
const DEFAULT_MAX_REQUESTS_PER_PAGE = 50;
|
|
9
10
|
|
|
10
11
|
function delay(ms) {
|
|
11
12
|
return new Promise((r) => setTimeout(r, ms));
|
|
@@ -15,6 +16,7 @@ class PageSlot {
|
|
|
15
16
|
constructor(page) {
|
|
16
17
|
this.page = page;
|
|
17
18
|
this.lock = new PromiseQueue();
|
|
19
|
+
this.requestCount = 0;
|
|
18
20
|
}
|
|
19
21
|
}
|
|
20
22
|
|
|
@@ -50,10 +52,12 @@ export class TikTokScraper {
|
|
|
50
52
|
poolSize = DEFAULT_POOL_SIZE,
|
|
51
53
|
wafTtl = DEFAULT_WAF_TTL,
|
|
52
54
|
warmUrl = DEFAULT_WARM_URL,
|
|
55
|
+
maxRequestsPerPage = DEFAULT_MAX_REQUESTS_PER_PAGE,
|
|
53
56
|
} = {}) {
|
|
54
57
|
this.poolSize = poolSize;
|
|
55
58
|
this.wafTtl = wafTtl;
|
|
56
59
|
this.warmUrl = warmUrl;
|
|
60
|
+
this.maxRequestsPerPage = maxRequestsPerPage;
|
|
57
61
|
this.browser = null;
|
|
58
62
|
this.context = null;
|
|
59
63
|
this.slots = [];
|
|
@@ -160,9 +164,16 @@ export class TikTokScraper {
|
|
|
160
164
|
|
|
161
165
|
async _ensurePage(slot) {
|
|
162
166
|
try {
|
|
163
|
-
if (
|
|
167
|
+
if (
|
|
168
|
+
!slot.page.isClosed() &&
|
|
169
|
+
slot.requestCount < this.maxRequestsPerPage
|
|
170
|
+
) {
|
|
171
|
+
return slot.page;
|
|
172
|
+
}
|
|
164
173
|
} catch {}
|
|
174
|
+
await slot.page?.close().catch(() => {});
|
|
165
175
|
slot.page = await this.context.newPage();
|
|
176
|
+
slot.requestCount = 0;
|
|
166
177
|
return slot.page;
|
|
167
178
|
}
|
|
168
179
|
|
|
@@ -174,7 +185,7 @@ export class TikTokScraper {
|
|
|
174
185
|
timeout: 15000,
|
|
175
186
|
});
|
|
176
187
|
|
|
177
|
-
|
|
188
|
+
const content = await page.evaluate(() => {
|
|
178
189
|
const rows = document.querySelectorAll("tr");
|
|
179
190
|
let content = "";
|
|
180
191
|
rows.forEach((r) => {
|
|
@@ -183,6 +194,18 @@ export class TikTokScraper {
|
|
|
183
194
|
});
|
|
184
195
|
return content;
|
|
185
196
|
});
|
|
197
|
+
|
|
198
|
+
// 导航到 about:blank 释放当前页面的 DOM 和 JS 堆
|
|
199
|
+
await page
|
|
200
|
+
.goto("about:blank", {
|
|
201
|
+
waitUntil: "domcontentloaded",
|
|
202
|
+
timeout: 5000,
|
|
203
|
+
})
|
|
204
|
+
.catch(() => {});
|
|
205
|
+
|
|
206
|
+
slot.requestCount += 1;
|
|
207
|
+
|
|
208
|
+
return content;
|
|
186
209
|
}
|
|
187
210
|
|
|
188
211
|
async getUserInfo(uniqueId) {
|
|
@@ -173,6 +173,15 @@ async function processExplore(page, username, options, log) {
|
|
|
173
173
|
result.noVideo = false;
|
|
174
174
|
log(` @${username} 认定为被封`);
|
|
175
175
|
}
|
|
176
|
+
|
|
177
|
+
// 临时性错误:超时、执行上下文销毁、网络异常 — 标记为可重试
|
|
178
|
+
const retryablePatterns = [
|
|
179
|
+
"Timeout",
|
|
180
|
+
"Execution context was destroyed",
|
|
181
|
+
"ERR_CONNECTION",
|
|
182
|
+
"net::",
|
|
183
|
+
];
|
|
184
|
+
result.retryable = retryablePatterns.some((p) => e.message.includes(p));
|
|
176
185
|
}
|
|
177
186
|
|
|
178
187
|
return result;
|
|
@@ -98,10 +98,37 @@ async function openFollowModal(page) {
|
|
|
98
98
|
"未找到关注入口元素,请确认当前页面为用户主页或页面结构已变化",
|
|
99
99
|
);
|
|
100
100
|
}
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
101
|
+
|
|
102
|
+
// 等待用户列表容器,超时说明可能被弹窗遮挡
|
|
103
|
+
let containerReady = false;
|
|
104
|
+
for (let attempt = 1; attempt <= 2; attempt++) {
|
|
105
|
+
try {
|
|
106
|
+
await page.waitForSelector("[class*=DivUserListContainer]", {
|
|
107
|
+
timeout: 30000,
|
|
108
|
+
});
|
|
109
|
+
containerReady = true;
|
|
110
|
+
break;
|
|
111
|
+
} catch (e) {
|
|
112
|
+
if (attempt === 1) {
|
|
113
|
+
// 第一次超时,刷新页面重试
|
|
114
|
+
await page.reload({ waitUntil: "domcontentloaded" });
|
|
115
|
+
await delay(2000, 3000);
|
|
116
|
+
// 重新点击 follow 入口
|
|
117
|
+
opened = await tryOpen();
|
|
118
|
+
if (!opened) {
|
|
119
|
+
throw new Error(
|
|
120
|
+
"刷新后仍未找到关注入口元素,请确认当前页面为用户主页",
|
|
121
|
+
);
|
|
122
|
+
}
|
|
123
|
+
} else {
|
|
124
|
+
throw e;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if (containerReady) {
|
|
130
|
+
await waitForListContent(page, 1, 5000);
|
|
131
|
+
}
|
|
105
132
|
}
|
|
106
133
|
|
|
107
134
|
async function switchToFollowersTab(page) {
|
package/src/videos/core.js
CHANGED
|
@@ -14,7 +14,7 @@ async function getUserInfo(page) {
|
|
|
14
14
|
return await page.evaluate(fn);
|
|
15
15
|
} catch (e) {
|
|
16
16
|
if (
|
|
17
|
-
e.message.includes(
|
|
17
|
+
e.message.includes("Execution context was destroyed") &&
|
|
18
18
|
i < retries - 1
|
|
19
19
|
) {
|
|
20
20
|
await new Promise((r) => setTimeout(r, 500 * (i + 1)));
|