tt-help-cli-ycl 1.3.48 → 1.3.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -33
- package/cli.js +9 -9
- package/package.json +52 -52
- package/scripts/run-explore copy.bat +101 -101
- package/scripts/run-explore.bat +134 -134
- package/scripts/run-explore.ps1 +159 -159
- package/scripts/run-explore.sh +121 -121
- package/scripts/test-captcha-lib.mjs +68 -0
- package/scripts/test-captcha.mjs +81 -0
- package/scripts/test-incognito-lib.mjs +36 -0
- package/scripts/test-login-state.mjs +128 -0
- package/scripts/test-safe-click.mjs +45 -0
- package/scripts/test-watch-db-smoke.mjs +246 -0
- package/src/cli/attach.js +331 -331
- package/src/cli/auto.js +265 -265
- package/src/cli/comments.js +620 -620
- package/src/cli/config.js +170 -170
- package/src/cli/db-import.js +51 -51
- package/src/cli/explore.js +555 -555
- package/src/cli/open.js +109 -111
- package/src/cli/progress.js +111 -111
- package/src/cli/refresh.js +288 -288
- package/src/cli/scrape.js +47 -47
- package/src/cli/utils.js +18 -18
- package/src/cli/videos.js +41 -41
- package/src/cli/videostats.js +196 -196
- package/src/cli/watch.js +30 -30
- package/src/lib/api-interceptor.js +161 -161
- package/src/lib/args.js +809 -809
- package/src/lib/browser/anti-detect.js +23 -23
- package/src/lib/browser/cdp.js +261 -261
- package/src/lib/browser/health-checker.js +114 -114
- package/src/lib/browser/launch.js +43 -43
- package/src/lib/browser/page.js +184 -184
- package/src/lib/constants.js +297 -297
- package/src/lib/delay.js +54 -54
- package/src/lib/explore-fetch.js +118 -118
- package/src/lib/fetcher.js +45 -45
- package/src/lib/filter.js +66 -66
- package/src/lib/io.js +54 -54
- package/src/lib/output.js +80 -80
- package/src/lib/page-error-detector.js +109 -109
- package/src/lib/parse-ssr.mjs +69 -69
- package/src/lib/parser.js +47 -47
- package/src/lib/retry.js +45 -45
- package/src/lib/scrape.js +90 -90
- package/src/lib/target-locations.js +61 -61
- package/src/lib/tiktok-scraper.mjs +98 -61
- package/src/lib/url.js +52 -52
- package/src/main.js +73 -73
- package/src/npm-main.js +70 -70
- package/src/results/user-videos-bar.lar.lar.moeta.json +37 -0
- package/src/scraper/auto-core.js +203 -203
- package/src/scraper/core.js +255 -255
- package/src/scraper/explore-core.js +208 -208
- package/src/scraper/modules/captcha-handler.js +114 -114
- package/src/scraper/modules/follow-extractor.js +250 -250
- package/src/scraper/modules/guess-extractor.js +51 -51
- package/src/scraper/modules/page-helpers.js +48 -48
- package/src/scraper/refresh-core.js +213 -213
- package/src/videos/core.js +143 -143
- package/src/watch/data-store.js +2980 -2980
- package/src/watch/public/index.html +2355 -2355
- package/src/watch/server.js +727 -727
package/src/cli/attach.js
CHANGED
|
@@ -1,331 +1,331 @@
|
|
|
1
|
-
import { TikTokScraper } from "../lib/tiktok-scraper.mjs";
|
|
2
|
-
import v8 from "node:v8";
|
|
3
|
-
|
|
4
|
-
const MAX_RETRY_WAIT = 5 * 60 * 1000;
|
|
5
|
-
const HEAP_RESTART_RATIO = 0.72;
|
|
6
|
-
const MAX_TASK_BATCHES_BEFORE_RESTART = 200;
|
|
7
|
-
|
|
8
|
-
function formatNow() {
|
|
9
|
-
return new Date().toLocaleString("zh-CN", {
|
|
10
|
-
hour12: false,
|
|
11
|
-
});
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
function attachLog(message = "") {
|
|
15
|
-
console.error(`[${formatNow()}] ${message}`);
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
async function withRetry(label, fn) {
|
|
19
|
-
let backoff = 1000;
|
|
20
|
-
while (true) {
|
|
21
|
-
try {
|
|
22
|
-
return await fn();
|
|
23
|
-
} catch (err) {
|
|
24
|
-
attachLog(
|
|
25
|
-
`[连接] ${label} 失败: ${err.message},${backoff / 1000}秒后重试...`,
|
|
26
|
-
);
|
|
27
|
-
await new Promise((r) => setTimeout(r, backoff));
|
|
28
|
-
if (backoff < MAX_RETRY_WAIT) backoff *= 2;
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
async function apiGet(url) {
|
|
34
|
-
return withRetry(`GET ${url}`, async () => {
|
|
35
|
-
const res = await fetch(url);
|
|
36
|
-
return res.json();
|
|
37
|
-
});
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
async function apiPost(url, body) {
|
|
41
|
-
return withRetry(`POST ${url}`, async () => {
|
|
42
|
-
const res = await fetch(url, {
|
|
43
|
-
method: "POST",
|
|
44
|
-
headers: { "Content-Type": "application/json" },
|
|
45
|
-
body: JSON.stringify(body),
|
|
46
|
-
});
|
|
47
|
-
return res.json();
|
|
48
|
-
});
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
function isBrowserClosedError(err) {
|
|
52
|
-
if (!err) return false;
|
|
53
|
-
const msg = err.message || err.toString() || "";
|
|
54
|
-
return (
|
|
55
|
-
msg.includes("Target page, context or browser has been closed") ||
|
|
56
|
-
msg.includes("browser has been closed") ||
|
|
57
|
-
msg.includes("browserContext.newPage") ||
|
|
58
|
-
msg.includes("Protocol error")
|
|
59
|
-
);
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
function getHeapUsage() {
|
|
63
|
-
const heapUsed = process.memoryUsage().heapUsed;
|
|
64
|
-
const heapLimit = v8.getHeapStatistics().heap_size_limit;
|
|
65
|
-
return {
|
|
66
|
-
heapUsed,
|
|
67
|
-
heapLimit,
|
|
68
|
-
ratio: heapLimit > 0 ? heapUsed / heapLimit : 0,
|
|
69
|
-
};
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
async function recycleScraper(scraper, reason) {
|
|
73
|
-
attachLog(`[Attach] ${reason},正在重启浏览器实例...`);
|
|
74
|
-
await scraper.restart();
|
|
75
|
-
attachLog("[Attach] 浏览器实例重启完成");
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
export async function handleAttach(options) {
|
|
79
|
-
const {
|
|
80
|
-
attachParallel,
|
|
81
|
-
attachInterval,
|
|
82
|
-
serverUrl,
|
|
83
|
-
attachCountries,
|
|
84
|
-
showHelp,
|
|
85
|
-
} = options;
|
|
86
|
-
let shuttingDown = false;
|
|
87
|
-
let forceExitTimer = null;
|
|
88
|
-
|
|
89
|
-
if (showHelp) {
|
|
90
|
-
attachLog(
|
|
91
|
-
"用法: tt-help attach [-p 并行数] [-i 间隔秒数] [-s 服务端地址] [-c 国家列表]",
|
|
92
|
-
);
|
|
93
|
-
attachLog("");
|
|
94
|
-
attachLog("参数:");
|
|
95
|
-
attachLog(" -p, --parallel <N> 并行抓取数(默认: 1)");
|
|
96
|
-
attachLog(" -i, --interval <N> 无任务时轮询间隔,单位秒(默认: 10)");
|
|
97
|
-
attachLog(
|
|
98
|
-
" -s, --server <URL> 服务端地址(默认: http://127.0.0.1:3001)",
|
|
99
|
-
);
|
|
100
|
-
attachLog(
|
|
101
|
-
" -c, --countries <A,B,C> 猜测国家列表(逗号分隔,如 PL,DE,FR),服务端优先返回这些国家的任务",
|
|
102
|
-
);
|
|
103
|
-
attachLog("");
|
|
104
|
-
attachLog("说明:");
|
|
105
|
-
attachLog(
|
|
106
|
-
" 后台轮询服务端 /api/user-update-tasks 接口,自动抓取 TikTok 用户信息",
|
|
107
|
-
);
|
|
108
|
-
attachLog(" 抓取完成后通过 POST /api/user-info-batch 批量回传结果");
|
|
109
|
-
attachLog(" 浏览器崩溃时自动重启,支持长时间无人值守运行");
|
|
110
|
-
attachLog("");
|
|
111
|
-
attachLog("示例:");
|
|
112
|
-
attachLog(" tt-help attach");
|
|
113
|
-
attachLog(" tt-help attach -p 5 -i 10");
|
|
114
|
-
attachLog(" tt-help attach -p 3 -i 5 -s http://127.0.0.1:3001");
|
|
115
|
-
attachLog(" tt-help attach -c PL,DE,FR -p 5");
|
|
116
|
-
return;
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
const countryStr =
|
|
120
|
-
attachCountries && attachCountries.length > 0
|
|
121
|
-
? `, 猜测国家: ${attachCountries.join(", ")}`
|
|
122
|
-
: "";
|
|
123
|
-
attachLog(
|
|
124
|
-
`[Attach] 并行数: ${attachParallel}, 空闲间隔: ${attachInterval}秒, 服务端: ${serverUrl}${countryStr}`,
|
|
125
|
-
);
|
|
126
|
-
|
|
127
|
-
const scraper = new TikTokScraper();
|
|
128
|
-
const shutdown = async (signal) => {
|
|
129
|
-
if (shuttingDown) return;
|
|
130
|
-
shuttingDown = true;
|
|
131
|
-
forceExitTimer = setTimeout(() => {
|
|
132
|
-
attachLog("[Attach] 关闭超时,强制退出");
|
|
133
|
-
process.exit(0);
|
|
134
|
-
}, 30000);
|
|
135
|
-
forceExitTimer.unref?.();
|
|
136
|
-
attachLog(`\n[Attach] 收到 ${signal},等待当前任务完成后退出...`);
|
|
137
|
-
};
|
|
138
|
-
|
|
139
|
-
const onSigint = () => {
|
|
140
|
-
void shutdown("SIGINT");
|
|
141
|
-
};
|
|
142
|
-
const onSigterm = () => {
|
|
143
|
-
void shutdown("SIGTERM");
|
|
144
|
-
};
|
|
145
|
-
|
|
146
|
-
process.on("SIGINT", onSigint);
|
|
147
|
-
process.on("SIGTERM", onSigterm);
|
|
148
|
-
|
|
149
|
-
try {
|
|
150
|
-
await scraper.init();
|
|
151
|
-
attachLog("[Attach] 浏览器初始化完成,开始循环接收任务...");
|
|
152
|
-
|
|
153
|
-
let loopCount = 0;
|
|
154
|
-
let browserRestartCount = 0;
|
|
155
|
-
let taskBatchCount = 0;
|
|
156
|
-
|
|
157
|
-
while (!shuttingDown) {
|
|
158
|
-
loopCount++;
|
|
159
|
-
|
|
160
|
-
// 检查浏览器是否存活,不存活则重启
|
|
161
|
-
if (!scraper.isAlive()) {
|
|
162
|
-
attachLog(
|
|
163
|
-
`[Attach] 浏览器已关闭,正在重启 (${++browserRestartCount})...`,
|
|
164
|
-
);
|
|
165
|
-
await scraper.restart();
|
|
166
|
-
attachLog("[Attach] 浏览器重启完成");
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
// 收到退出信号且当前无任务正在处理,则退出
|
|
170
|
-
if (shuttingDown) {
|
|
171
|
-
attachLog("[Attach] 当前任务已完成,正在关闭浏览器...");
|
|
172
|
-
await scraper.close().catch(() => {});
|
|
173
|
-
if (forceExitTimer) {
|
|
174
|
-
clearTimeout(forceExitTimer);
|
|
175
|
-
forceExitTimer = null;
|
|
176
|
-
}
|
|
177
|
-
attachLog("[Attach] 已退出");
|
|
178
|
-
process.exit(0);
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
const countryParam =
|
|
182
|
-
attachCountries && attachCountries.length > 0
|
|
183
|
-
? `&countries=${encodeURIComponent(attachCountries.join(","))}`
|
|
184
|
-
: "";
|
|
185
|
-
const { total, tasks } = await apiGet(
|
|
186
|
-
`${serverUrl}/api/user-update-tasks?limit=${attachParallel}${countryParam}`,
|
|
187
|
-
);
|
|
188
|
-
|
|
189
|
-
if (!tasks || tasks.length === 0) {
|
|
190
|
-
const heap = getHeapUsage();
|
|
191
|
-
if (heap.ratio >= HEAP_RESTART_RATIO) {
|
|
192
|
-
await recycleScraper(
|
|
193
|
-
scraper,
|
|
194
|
-
`空闲时堆占用 ${(heap.ratio * 100).toFixed(1)}%`,
|
|
195
|
-
);
|
|
196
|
-
}
|
|
197
|
-
if (loopCount === 1) {
|
|
198
|
-
attachLog(`[Attach] 当前无待更新任务,${attachInterval}秒后重试...`);
|
|
199
|
-
}
|
|
200
|
-
// 空闲等待时可中断
|
|
201
|
-
await new Promise((r) => setTimeout(r, attachInterval * 1000));
|
|
202
|
-
continue;
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
attachLog(`\n[Attach] 获取到 ${tasks.length} 个待更新任务...`);
|
|
206
|
-
taskBatchCount++;
|
|
207
|
-
|
|
208
|
-
// 收到退出信号但已有任务在跑,继续完成当前批次
|
|
209
|
-
if (shuttingDown) {
|
|
210
|
-
attachLog("[Attach] 正在处理当前批次任务,完成后退出...");
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
const results = await Promise.allSettled(
|
|
214
|
-
tasks.map(async (task) => {
|
|
215
|
-
const uniqueId = task.uniqueId;
|
|
216
|
-
const countryTag = task.guessedLocation
|
|
217
|
-
? ` [猜测国家: ${task.guessedLocation}]`
|
|
218
|
-
: "";
|
|
219
|
-
attachLog(` → 获取 @${uniqueId} 的用户信息...${countryTag}`);
|
|
220
|
-
try {
|
|
221
|
-
const info = await scraper.getUserInfo(uniqueId);
|
|
222
|
-
return { uniqueId, info, error: null };
|
|
223
|
-
} catch (err) {
|
|
224
|
-
return { uniqueId, info: null, error: err };
|
|
225
|
-
}
|
|
226
|
-
}),
|
|
227
|
-
);
|
|
228
|
-
|
|
229
|
-
let successCount = 0;
|
|
230
|
-
let failCount = 0;
|
|
231
|
-
let needRestart = false;
|
|
232
|
-
|
|
233
|
-
// 收集抓取成功的任务,记录抓取失败的
|
|
234
|
-
const successTasks = [];
|
|
235
|
-
for (const result of results) {
|
|
236
|
-
if (result.status === "fulfilled") {
|
|
237
|
-
const { uniqueId, info, error } = result.value;
|
|
238
|
-
if (error) {
|
|
239
|
-
if (isBrowserClosedError(error)) {
|
|
240
|
-
needRestart = true;
|
|
241
|
-
}
|
|
242
|
-
attachLog(
|
|
243
|
-
` ✗ @${uniqueId} 获取失败: ${error.message || "未知错误"}`,
|
|
244
|
-
);
|
|
245
|
-
failCount++;
|
|
246
|
-
} else if (info) {
|
|
247
|
-
successTasks.push({ uniqueId, info });
|
|
248
|
-
} else {
|
|
249
|
-
attachLog(` - @${uniqueId} 无用户信息`);
|
|
250
|
-
successTasks.push({ uniqueId, info: {} });
|
|
251
|
-
}
|
|
252
|
-
} else {
|
|
253
|
-
attachLog(
|
|
254
|
-
` ✗ 任务执行异常: ${result.reason?.message || "未知错误"}`,
|
|
255
|
-
);
|
|
256
|
-
failCount++;
|
|
257
|
-
}
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
// 批量提交成功的结果
|
|
261
|
-
if (successTasks.length > 0) {
|
|
262
|
-
try {
|
|
263
|
-
const batchRet = await apiPost(`${serverUrl}/api/user-info-batch`, {
|
|
264
|
-
updates: successTasks,
|
|
265
|
-
});
|
|
266
|
-
if (batchRet && batchRet.results) {
|
|
267
|
-
const nicknameMap = {};
|
|
268
|
-
for (const { uniqueId, info } of successTasks) {
|
|
269
|
-
if (info?.nickname) {
|
|
270
|
-
nicknameMap[uniqueId] = info.nickname;
|
|
271
|
-
}
|
|
272
|
-
}
|
|
273
|
-
for (const r of batchRet.results) {
|
|
274
|
-
if (r.ok) {
|
|
275
|
-
successCount++;
|
|
276
|
-
const nickname = nicknameMap[r.uniqueId] || r.uniqueId;
|
|
277
|
-
attachLog(` ✓ @${r.uniqueId} (${nickname}) 已提交更新`);
|
|
278
|
-
} else {
|
|
279
|
-
failCount++;
|
|
280
|
-
attachLog(` ✗ @${r.uniqueId} 提交失败: ${r.error}`);
|
|
281
|
-
}
|
|
282
|
-
}
|
|
283
|
-
} else {
|
|
284
|
-
successCount = successTasks.length;
|
|
285
|
-
attachLog(` ✓ 批量提交完成 (${successTasks.length} 条)`);
|
|
286
|
-
}
|
|
287
|
-
} catch (err) {
|
|
288
|
-
failCount += successTasks.length;
|
|
289
|
-
attachLog(` ✗ 批量提交失败: ${err.message}`);
|
|
290
|
-
}
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
attachLog(` 本批结果: ${successCount} 成功, ${failCount} 失败\n`);
|
|
294
|
-
|
|
295
|
-
const heap = getHeapUsage();
|
|
296
|
-
if (heap.ratio >= HEAP_RESTART_RATIO) {
|
|
297
|
-
await recycleScraper(
|
|
298
|
-
scraper,
|
|
299
|
-
`本批后堆占用 ${(heap.ratio * 100).toFixed(1)}%`,
|
|
300
|
-
);
|
|
301
|
-
browserRestartCount++;
|
|
302
|
-
taskBatchCount = 0;
|
|
303
|
-
} else if (taskBatchCount >= MAX_TASK_BATCHES_BEFORE_RESTART) {
|
|
304
|
-
await recycleScraper(scraper, `已连续处理 ${taskBatchCount} 批任务`);
|
|
305
|
-
browserRestartCount++;
|
|
306
|
-
taskBatchCount = 0;
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
if (needRestart) {
|
|
310
|
-
attachLog("[Attach] 检测到浏览器异常,将在下一轮重启...");
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
await new Promise((r) => setTimeout(r, 500));
|
|
314
|
-
}
|
|
315
|
-
} catch (err) {
|
|
316
|
-
attachLog(`[Attach] 运行异常: ${err.message}`);
|
|
317
|
-
throw err;
|
|
318
|
-
} finally {
|
|
319
|
-
// 正常退出(非信号触发)或超时强制退出时的清理
|
|
320
|
-
if (forceExitTimer) {
|
|
321
|
-
clearTimeout(forceExitTimer);
|
|
322
|
-
forceExitTimer = null;
|
|
323
|
-
}
|
|
324
|
-
process.removeListener("SIGINT", onSigint);
|
|
325
|
-
process.removeListener("SIGTERM", onSigterm);
|
|
326
|
-
// 只有在非信号退出时才关闭浏览器(信号退出已在循环内关闭)
|
|
327
|
-
if (!shuttingDown) {
|
|
328
|
-
await scraper.close().catch(() => {});
|
|
329
|
-
}
|
|
330
|
-
}
|
|
331
|
-
}
|
|
1
|
+
import { TikTokScraper } from "../lib/tiktok-scraper.mjs";
|
|
2
|
+
import v8 from "node:v8";
|
|
3
|
+
|
|
4
|
+
const MAX_RETRY_WAIT = 5 * 60 * 1000;
|
|
5
|
+
const HEAP_RESTART_RATIO = 0.72;
|
|
6
|
+
const MAX_TASK_BATCHES_BEFORE_RESTART = 200;
|
|
7
|
+
|
|
8
|
+
function formatNow() {
|
|
9
|
+
return new Date().toLocaleString("zh-CN", {
|
|
10
|
+
hour12: false,
|
|
11
|
+
});
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function attachLog(message = "") {
|
|
15
|
+
console.error(`[${formatNow()}] ${message}`);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
async function withRetry(label, fn) {
|
|
19
|
+
let backoff = 1000;
|
|
20
|
+
while (true) {
|
|
21
|
+
try {
|
|
22
|
+
return await fn();
|
|
23
|
+
} catch (err) {
|
|
24
|
+
attachLog(
|
|
25
|
+
`[连接] ${label} 失败: ${err.message},${backoff / 1000}秒后重试...`,
|
|
26
|
+
);
|
|
27
|
+
await new Promise((r) => setTimeout(r, backoff));
|
|
28
|
+
if (backoff < MAX_RETRY_WAIT) backoff *= 2;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async function apiGet(url) {
|
|
34
|
+
return withRetry(`GET ${url}`, async () => {
|
|
35
|
+
const res = await fetch(url);
|
|
36
|
+
return res.json();
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
async function apiPost(url, body) {
|
|
41
|
+
return withRetry(`POST ${url}`, async () => {
|
|
42
|
+
const res = await fetch(url, {
|
|
43
|
+
method: "POST",
|
|
44
|
+
headers: { "Content-Type": "application/json" },
|
|
45
|
+
body: JSON.stringify(body),
|
|
46
|
+
});
|
|
47
|
+
return res.json();
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function isBrowserClosedError(err) {
|
|
52
|
+
if (!err) return false;
|
|
53
|
+
const msg = err.message || err.toString() || "";
|
|
54
|
+
return (
|
|
55
|
+
msg.includes("Target page, context or browser has been closed") ||
|
|
56
|
+
msg.includes("browser has been closed") ||
|
|
57
|
+
msg.includes("browserContext.newPage") ||
|
|
58
|
+
msg.includes("Protocol error")
|
|
59
|
+
);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function getHeapUsage() {
|
|
63
|
+
const heapUsed = process.memoryUsage().heapUsed;
|
|
64
|
+
const heapLimit = v8.getHeapStatistics().heap_size_limit;
|
|
65
|
+
return {
|
|
66
|
+
heapUsed,
|
|
67
|
+
heapLimit,
|
|
68
|
+
ratio: heapLimit > 0 ? heapUsed / heapLimit : 0,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async function recycleScraper(scraper, reason) {
|
|
73
|
+
attachLog(`[Attach] ${reason},正在重启浏览器实例...`);
|
|
74
|
+
await scraper.restart();
|
|
75
|
+
attachLog("[Attach] 浏览器实例重启完成");
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export async function handleAttach(options) {
|
|
79
|
+
const {
|
|
80
|
+
attachParallel,
|
|
81
|
+
attachInterval,
|
|
82
|
+
serverUrl,
|
|
83
|
+
attachCountries,
|
|
84
|
+
showHelp,
|
|
85
|
+
} = options;
|
|
86
|
+
let shuttingDown = false;
|
|
87
|
+
let forceExitTimer = null;
|
|
88
|
+
|
|
89
|
+
if (showHelp) {
|
|
90
|
+
attachLog(
|
|
91
|
+
"用法: tt-help attach [-p 并行数] [-i 间隔秒数] [-s 服务端地址] [-c 国家列表]",
|
|
92
|
+
);
|
|
93
|
+
attachLog("");
|
|
94
|
+
attachLog("参数:");
|
|
95
|
+
attachLog(" -p, --parallel <N> 并行抓取数(默认: 1)");
|
|
96
|
+
attachLog(" -i, --interval <N> 无任务时轮询间隔,单位秒(默认: 10)");
|
|
97
|
+
attachLog(
|
|
98
|
+
" -s, --server <URL> 服务端地址(默认: http://127.0.0.1:3001)",
|
|
99
|
+
);
|
|
100
|
+
attachLog(
|
|
101
|
+
" -c, --countries <A,B,C> 猜测国家列表(逗号分隔,如 PL,DE,FR),服务端优先返回这些国家的任务",
|
|
102
|
+
);
|
|
103
|
+
attachLog("");
|
|
104
|
+
attachLog("说明:");
|
|
105
|
+
attachLog(
|
|
106
|
+
" 后台轮询服务端 /api/user-update-tasks 接口,自动抓取 TikTok 用户信息",
|
|
107
|
+
);
|
|
108
|
+
attachLog(" 抓取完成后通过 POST /api/user-info-batch 批量回传结果");
|
|
109
|
+
attachLog(" 浏览器崩溃时自动重启,支持长时间无人值守运行");
|
|
110
|
+
attachLog("");
|
|
111
|
+
attachLog("示例:");
|
|
112
|
+
attachLog(" tt-help attach");
|
|
113
|
+
attachLog(" tt-help attach -p 5 -i 10");
|
|
114
|
+
attachLog(" tt-help attach -p 3 -i 5 -s http://127.0.0.1:3001");
|
|
115
|
+
attachLog(" tt-help attach -c PL,DE,FR -p 5");
|
|
116
|
+
return;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const countryStr =
|
|
120
|
+
attachCountries && attachCountries.length > 0
|
|
121
|
+
? `, 猜测国家: ${attachCountries.join(", ")}`
|
|
122
|
+
: "";
|
|
123
|
+
attachLog(
|
|
124
|
+
`[Attach] 并行数: ${attachParallel}, 空闲间隔: ${attachInterval}秒, 服务端: ${serverUrl}${countryStr}`,
|
|
125
|
+
);
|
|
126
|
+
|
|
127
|
+
const scraper = new TikTokScraper();
|
|
128
|
+
const shutdown = async (signal) => {
|
|
129
|
+
if (shuttingDown) return;
|
|
130
|
+
shuttingDown = true;
|
|
131
|
+
forceExitTimer = setTimeout(() => {
|
|
132
|
+
attachLog("[Attach] 关闭超时,强制退出");
|
|
133
|
+
process.exit(0);
|
|
134
|
+
}, 30000);
|
|
135
|
+
forceExitTimer.unref?.();
|
|
136
|
+
attachLog(`\n[Attach] 收到 ${signal},等待当前任务完成后退出...`);
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
const onSigint = () => {
|
|
140
|
+
void shutdown("SIGINT");
|
|
141
|
+
};
|
|
142
|
+
const onSigterm = () => {
|
|
143
|
+
void shutdown("SIGTERM");
|
|
144
|
+
};
|
|
145
|
+
|
|
146
|
+
process.on("SIGINT", onSigint);
|
|
147
|
+
process.on("SIGTERM", onSigterm);
|
|
148
|
+
|
|
149
|
+
try {
|
|
150
|
+
await scraper.init();
|
|
151
|
+
attachLog("[Attach] 浏览器初始化完成,开始循环接收任务...");
|
|
152
|
+
|
|
153
|
+
let loopCount = 0;
|
|
154
|
+
let browserRestartCount = 0;
|
|
155
|
+
let taskBatchCount = 0;
|
|
156
|
+
|
|
157
|
+
while (!shuttingDown) {
|
|
158
|
+
loopCount++;
|
|
159
|
+
|
|
160
|
+
// 检查浏览器是否存活,不存活则重启
|
|
161
|
+
if (!scraper.isAlive()) {
|
|
162
|
+
attachLog(
|
|
163
|
+
`[Attach] 浏览器已关闭,正在重启 (${++browserRestartCount})...`,
|
|
164
|
+
);
|
|
165
|
+
await scraper.restart();
|
|
166
|
+
attachLog("[Attach] 浏览器重启完成");
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// 收到退出信号且当前无任务正在处理,则退出
|
|
170
|
+
if (shuttingDown) {
|
|
171
|
+
attachLog("[Attach] 当前任务已完成,正在关闭浏览器...");
|
|
172
|
+
await scraper.close().catch(() => {});
|
|
173
|
+
if (forceExitTimer) {
|
|
174
|
+
clearTimeout(forceExitTimer);
|
|
175
|
+
forceExitTimer = null;
|
|
176
|
+
}
|
|
177
|
+
attachLog("[Attach] 已退出");
|
|
178
|
+
process.exit(0);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
const countryParam =
|
|
182
|
+
attachCountries && attachCountries.length > 0
|
|
183
|
+
? `&countries=${encodeURIComponent(attachCountries.join(","))}`
|
|
184
|
+
: "";
|
|
185
|
+
const { total, tasks } = await apiGet(
|
|
186
|
+
`${serverUrl}/api/user-update-tasks?limit=${attachParallel}${countryParam}`,
|
|
187
|
+
);
|
|
188
|
+
|
|
189
|
+
if (!tasks || tasks.length === 0) {
|
|
190
|
+
const heap = getHeapUsage();
|
|
191
|
+
if (heap.ratio >= HEAP_RESTART_RATIO) {
|
|
192
|
+
await recycleScraper(
|
|
193
|
+
scraper,
|
|
194
|
+
`空闲时堆占用 ${(heap.ratio * 100).toFixed(1)}%`,
|
|
195
|
+
);
|
|
196
|
+
}
|
|
197
|
+
if (loopCount === 1) {
|
|
198
|
+
attachLog(`[Attach] 当前无待更新任务,${attachInterval}秒后重试...`);
|
|
199
|
+
}
|
|
200
|
+
// 空闲等待时可中断
|
|
201
|
+
await new Promise((r) => setTimeout(r, attachInterval * 1000));
|
|
202
|
+
continue;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
attachLog(`\n[Attach] 获取到 ${tasks.length} 个待更新任务...`);
|
|
206
|
+
taskBatchCount++;
|
|
207
|
+
|
|
208
|
+
// 收到退出信号但已有任务在跑,继续完成当前批次
|
|
209
|
+
if (shuttingDown) {
|
|
210
|
+
attachLog("[Attach] 正在处理当前批次任务,完成后退出...");
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
const results = await Promise.allSettled(
|
|
214
|
+
tasks.map(async (task) => {
|
|
215
|
+
const uniqueId = task.uniqueId;
|
|
216
|
+
const countryTag = task.guessedLocation
|
|
217
|
+
? ` [猜测国家: ${task.guessedLocation}]`
|
|
218
|
+
: "";
|
|
219
|
+
attachLog(` → 获取 @${uniqueId} 的用户信息...${countryTag}`);
|
|
220
|
+
try {
|
|
221
|
+
const info = await scraper.getUserInfo(uniqueId);
|
|
222
|
+
return { uniqueId, info, error: null };
|
|
223
|
+
} catch (err) {
|
|
224
|
+
return { uniqueId, info: null, error: err };
|
|
225
|
+
}
|
|
226
|
+
}),
|
|
227
|
+
);
|
|
228
|
+
|
|
229
|
+
let successCount = 0;
|
|
230
|
+
let failCount = 0;
|
|
231
|
+
let needRestart = false;
|
|
232
|
+
|
|
233
|
+
// 收集抓取成功的任务,记录抓取失败的
|
|
234
|
+
const successTasks = [];
|
|
235
|
+
for (const result of results) {
|
|
236
|
+
if (result.status === "fulfilled") {
|
|
237
|
+
const { uniqueId, info, error } = result.value;
|
|
238
|
+
if (error) {
|
|
239
|
+
if (isBrowserClosedError(error)) {
|
|
240
|
+
needRestart = true;
|
|
241
|
+
}
|
|
242
|
+
attachLog(
|
|
243
|
+
` ✗ @${uniqueId} 获取失败: ${error.message || "未知错误"}`,
|
|
244
|
+
);
|
|
245
|
+
failCount++;
|
|
246
|
+
} else if (info) {
|
|
247
|
+
successTasks.push({ uniqueId, info });
|
|
248
|
+
} else {
|
|
249
|
+
attachLog(` - @${uniqueId} 无用户信息`);
|
|
250
|
+
successTasks.push({ uniqueId, info: {} });
|
|
251
|
+
}
|
|
252
|
+
} else {
|
|
253
|
+
attachLog(
|
|
254
|
+
` ✗ 任务执行异常: ${result.reason?.message || "未知错误"}`,
|
|
255
|
+
);
|
|
256
|
+
failCount++;
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// 批量提交成功的结果
|
|
261
|
+
if (successTasks.length > 0) {
|
|
262
|
+
try {
|
|
263
|
+
const batchRet = await apiPost(`${serverUrl}/api/user-info-batch`, {
|
|
264
|
+
updates: successTasks,
|
|
265
|
+
});
|
|
266
|
+
if (batchRet && batchRet.results) {
|
|
267
|
+
const nicknameMap = {};
|
|
268
|
+
for (const { uniqueId, info } of successTasks) {
|
|
269
|
+
if (info?.nickname) {
|
|
270
|
+
nicknameMap[uniqueId] = info.nickname;
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
for (const r of batchRet.results) {
|
|
274
|
+
if (r.ok) {
|
|
275
|
+
successCount++;
|
|
276
|
+
const nickname = nicknameMap[r.uniqueId] || r.uniqueId;
|
|
277
|
+
attachLog(` ✓ @${r.uniqueId} (${nickname}) 已提交更新`);
|
|
278
|
+
} else {
|
|
279
|
+
failCount++;
|
|
280
|
+
attachLog(` ✗ @${r.uniqueId} 提交失败: ${r.error}`);
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
} else {
|
|
284
|
+
successCount = successTasks.length;
|
|
285
|
+
attachLog(` ✓ 批量提交完成 (${successTasks.length} 条)`);
|
|
286
|
+
}
|
|
287
|
+
} catch (err) {
|
|
288
|
+
failCount += successTasks.length;
|
|
289
|
+
attachLog(` ✗ 批量提交失败: ${err.message}`);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
attachLog(` 本批结果: ${successCount} 成功, ${failCount} 失败\n`);
|
|
294
|
+
|
|
295
|
+
const heap = getHeapUsage();
|
|
296
|
+
if (heap.ratio >= HEAP_RESTART_RATIO) {
|
|
297
|
+
await recycleScraper(
|
|
298
|
+
scraper,
|
|
299
|
+
`本批后堆占用 ${(heap.ratio * 100).toFixed(1)}%`,
|
|
300
|
+
);
|
|
301
|
+
browserRestartCount++;
|
|
302
|
+
taskBatchCount = 0;
|
|
303
|
+
} else if (taskBatchCount >= MAX_TASK_BATCHES_BEFORE_RESTART) {
|
|
304
|
+
await recycleScraper(scraper, `已连续处理 ${taskBatchCount} 批任务`);
|
|
305
|
+
browserRestartCount++;
|
|
306
|
+
taskBatchCount = 0;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
if (needRestart) {
|
|
310
|
+
attachLog("[Attach] 检测到浏览器异常,将在下一轮重启...");
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
await new Promise((r) => setTimeout(r, 500));
|
|
314
|
+
}
|
|
315
|
+
} catch (err) {
|
|
316
|
+
attachLog(`[Attach] 运行异常: ${err.message}`);
|
|
317
|
+
throw err;
|
|
318
|
+
} finally {
|
|
319
|
+
// 正常退出(非信号触发)或超时强制退出时的清理
|
|
320
|
+
if (forceExitTimer) {
|
|
321
|
+
clearTimeout(forceExitTimer);
|
|
322
|
+
forceExitTimer = null;
|
|
323
|
+
}
|
|
324
|
+
process.removeListener("SIGINT", onSigint);
|
|
325
|
+
process.removeListener("SIGTERM", onSigterm);
|
|
326
|
+
// 只有在非信号退出时才关闭浏览器(信号退出已在循环内关闭)
|
|
327
|
+
if (!shuttingDown) {
|
|
328
|
+
await scraper.close().catch(() => {});
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
}
|