tt-help-cli-ycl 1.0.7 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -17
- package/cli.js +9 -9
- package/package.json +44 -44
- package/src/auto-core.cjs +367 -288
- package/src/data-store.cjs +69 -65
- package/src/explore-core.cjs +157 -0
- package/src/get-user-videos-core.cjs +142 -164
- package/src/get-user-videos.cjs +58 -58
- package/src/lib/args.js +397 -287
- package/src/lib/auto-browser.mjs +12 -10
- package/src/lib/constants.js +151 -148
- package/src/lib/explore.js +225 -244
- package/src/lib/fetcher.js +60 -60
- package/src/lib/filter.js +66 -66
- package/src/lib/get-user-videos-browser.mjs +5 -5
- package/src/lib/io.js +76 -76
- package/src/lib/output.js +80 -80
- package/src/lib/parser.js +47 -47
- package/src/lib/scrape-browser.mjs +5 -5
- package/src/lib/scrape.js +39 -39
- package/src/main.mjs +962 -652
- package/src/scraper/core.cjs +213 -191
- package/src/scraper/index.cjs +96 -92
- package/src/scraper/modules/comment-extractor.cjs +49 -122
- package/src/scraper/modules/follow-extractor.cjs +112 -0
- package/src/scraper/modules/guess-extractor.cjs +53 -0
- package/src/scraper/modules/page-helpers.cjs +422 -422
- package/src/scraper/modules/scroll-collector.cjs +173 -0
- package/src/scraper/modules/video-scanner.cjs +43 -43
- package/src/test-auto-follow.cjs +109 -0
- package/src/test-extractors.cjs +75 -0
- package/src/test-follow.cjs +41 -0
- package/src/watch/public/index.html +271 -265
- package/src/watch/server.mjs +153 -145
- package/src/results/user-videos-bar.lar.lar.moeta.json +0 -37
|
@@ -1,422 +1,422 @@
|
|
|
1
|
-
const { chromium } = require('playwright');
|
|
2
|
-
const { exec } = require('child_process');
|
|
3
|
-
const http = require('http');
|
|
4
|
-
const os = require('os');
|
|
5
|
-
const path = require('path');
|
|
6
|
-
|
|
7
|
-
const USER_SECTION_SIZE = 12000;
|
|
8
|
-
const CDP_PORT = 9222;
|
|
9
|
-
|
|
10
|
-
// --- Retry utilities ---
|
|
11
|
-
|
|
12
|
-
const RETRYABLE_PATTERNS = [
|
|
13
|
-
'interrupted',
|
|
14
|
-
'Navigation.*interrupted',
|
|
15
|
-
'net::',
|
|
16
|
-
'ECONN',
|
|
17
|
-
'ETIMEDOUT',
|
|
18
|
-
'ENOTFOUND',
|
|
19
|
-
'EAI_AGAIN',
|
|
20
|
-
'ESOCKETRESET',
|
|
21
|
-
'connection.*refused',
|
|
22
|
-
'connection.*reset',
|
|
23
|
-
'failed.*navigate',
|
|
24
|
-
'target.*closed',
|
|
25
|
-
'crash',
|
|
26
|
-
];
|
|
27
|
-
|
|
28
|
-
function isRetryableError(error) {
|
|
29
|
-
if (!error) return false;
|
|
30
|
-
const msg = (error.message || error.toString() || '').toLowerCase();
|
|
31
|
-
return RETRYABLE_PATTERNS.some(p => new RegExp(p, 'i').test(msg));
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
async function retryWithBackoff(fn, { maxRetries = 3, baseDelay = 3000, log } = {}) {
|
|
35
|
-
let lastError;
|
|
36
|
-
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
37
|
-
try {
|
|
38
|
-
return await fn();
|
|
39
|
-
} catch (error) {
|
|
40
|
-
lastError = error;
|
|
41
|
-
if (attempt >= maxRetries || !isRetryableError(error)) {
|
|
42
|
-
throw error;
|
|
43
|
-
}
|
|
44
|
-
const jitter = Math.random() * 2000;
|
|
45
|
-
const waitTime = baseDelay * Math.pow(2, attempt) + jitter;
|
|
46
|
-
if (log) {
|
|
47
|
-
log(` [重试] ${attempt + 1}/${maxRetries},${Math.round(waitTime / 1000)}s 后重试...`);
|
|
48
|
-
}
|
|
49
|
-
await delay(Math.round(waitTime), Math.round(waitTime));
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
throw lastError;
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
// 预设延迟配置(毫秒)
|
|
56
|
-
const DELAY_PRESETS = {
|
|
57
|
-
fast: { switchMax: 300, commentMax: 200, fast: true },
|
|
58
|
-
normal: { switchMax: 1500, commentMax: 800 },
|
|
59
|
-
slow: { switchMax: 3000, commentMax: 2000 },
|
|
60
|
-
stealth: { switchMax: 5000, commentMax: 3500 },
|
|
61
|
-
};
|
|
62
|
-
|
|
63
|
-
// 当前延迟配置
|
|
64
|
-
const delayConfig = {
|
|
65
|
-
switchMax: 2500,
|
|
66
|
-
commentMax: 1500,
|
|
67
|
-
fast: false,
|
|
68
|
-
};
|
|
69
|
-
|
|
70
|
-
function setDelayConfig(config) {
|
|
71
|
-
if (typeof config === 'string') {
|
|
72
|
-
const preset = DELAY_PRESETS[config.toLowerCase()];
|
|
73
|
-
if (!preset) {
|
|
74
|
-
throw new Error(
|
|
75
|
-
`未知的延迟预设: ${config}\n` +
|
|
76
|
-
`可用预设: ${Object.keys(DELAY_PRESETS).join(', ')}`
|
|
77
|
-
);
|
|
78
|
-
}
|
|
79
|
-
delayConfig.switchMax = preset.switchMax;
|
|
80
|
-
delayConfig.commentMax = preset.commentMax;
|
|
81
|
-
delayConfig.fast = preset.fast || false;
|
|
82
|
-
} else if (typeof config === 'object') {
|
|
83
|
-
if (config.switchMax) delayConfig.switchMax = config.switchMax;
|
|
84
|
-
if (config.commentMax) delayConfig.commentMax = config.commentMax;
|
|
85
|
-
delayConfig.fast = config.fast || false;
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
function getDelayConfig() {
|
|
90
|
-
return { ...delayConfig };
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
function listDelayPresets() {
|
|
94
|
-
return DELAY_PRESETS;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
function delay(min, max) {
|
|
98
|
-
const lo = Math.min(min, max);
|
|
99
|
-
const hi = Math.max(min, max);
|
|
100
|
-
let ms;
|
|
101
|
-
if (delayConfig.fast) {
|
|
102
|
-
ms = 0;
|
|
103
|
-
} else {
|
|
104
|
-
ms = Math.floor(Math.random() * (hi - lo + 1)) + lo;
|
|
105
|
-
}
|
|
106
|
-
return new Promise(r => setTimeout(r, ms));
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
async function openCommentPanel(page) {
|
|
110
|
-
await page.evaluate(() => {
|
|
111
|
-
const tabs = document.querySelectorAll('[class*="tabbar-item"]');
|
|
112
|
-
for (const tab of tabs) {
|
|
113
|
-
if (tab.textContent?.includes('评论')) {
|
|
114
|
-
tab.click();
|
|
115
|
-
break;
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
});
|
|
119
|
-
const c = delayConfig.commentMax;
|
|
120
|
-
await delay(Math.round(c * 0.5), c);
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
async function closeCommentPanel(page) {
|
|
124
|
-
await page.evaluate(() => {
|
|
125
|
-
const rightPanel = document.querySelector('[class*="RightPanelContainer"]');
|
|
126
|
-
if (rightPanel) {
|
|
127
|
-
const tabContainer = rightPanel.querySelector('[class*="TabContainer"]');
|
|
128
|
-
if (tabContainer) {
|
|
129
|
-
const closeOverlay = tabContainer.querySelector('div:last-child');
|
|
130
|
-
if (closeOverlay) closeOverlay.click();
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
});
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
async function getVideoAuthor(page) {
|
|
137
|
-
return await page.evaluate(() => {
|
|
138
|
-
const m = window.location.href.match(/@([^/]+)\/video/);
|
|
139
|
-
return m ? '@' + m[1] : null;
|
|
140
|
-
});
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
async function swipeNextVideo(page) {
|
|
144
|
-
await page.evaluate(() => {
|
|
145
|
-
const container = document.querySelector('[class*="ColumnListContainer"]');
|
|
146
|
-
if (container) {
|
|
147
|
-
container.scrollTop += 700;
|
|
148
|
-
}
|
|
149
|
-
});
|
|
150
|
-
const s = delayConfig.switchMax;
|
|
151
|
-
await delay(Math.round(s * 0.5), s);
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
function extractUserSection(html) {
|
|
155
|
-
const idx = html.indexOf('"uniqueId"');
|
|
156
|
-
if (idx < 0) return null;
|
|
157
|
-
return html.substring(idx, idx + USER_SECTION_SIZE);
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
function parseUserSection(section) {
|
|
161
|
-
const data = {};
|
|
162
|
-
|
|
163
|
-
for (const key of ['uniqueId', 'uid', 'secUid']) {
|
|
164
|
-
const m = section.match(new RegExp(`"${key}":"([^"]*)`));
|
|
165
|
-
if (m) data[key] = m[1];
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
for (const key of ['nickname', 'signature']) {
|
|
169
|
-
const m = section.match(new RegExp(`"${key}":"((?:[^"\\\\]|\\\\.)*)"`, 'g'));
|
|
170
|
-
if (m) {
|
|
171
|
-
const raw = m[0].replace(`"${key}":"`, '').replace(/"$/, '');
|
|
172
|
-
data[key] = raw.replace(/\\n/g, '\n').replace(/\\\\/g, '\\');
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
for (const key of ['ttSeller', 'verified']) {
|
|
177
|
-
const m = section.match(new RegExp(`"${key}":\\s*(true|false)`));
|
|
178
|
-
data[key] = m ? m[1] === 'true' : undefined;
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
for (const key of ['followerCount', 'followingCount', 'heartCount', 'videoCount', 'diggCount']) {
|
|
182
|
-
const m = section.match(new RegExp(`"${key}":(\\d+)`));
|
|
183
|
-
if (m) data[key] = parseInt(m[1], 10);
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
const mt = section.match(/"createTime":(\d+)/);
|
|
187
|
-
if (mt) data.createTime = parseInt(mt[1], 10);
|
|
188
|
-
|
|
189
|
-
const ma = section.match(/"avatarLarger":"([^"]*)/);
|
|
190
|
-
if (ma) data.avatarLarger = ma[1].replace(/\\u002F/g, '/');
|
|
191
|
-
|
|
192
|
-
return data;
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
function extractLocationCreated(html) {
|
|
196
|
-
const m = html.match(/"locationCreated":"([^"]*)/);
|
|
197
|
-
return m ? m[1] : null;
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
/**
|
|
201
|
-
* 检查 CDP 端口是否已就绪
|
|
202
|
-
*/
|
|
203
|
-
function checkCDPPort() {
|
|
204
|
-
return new Promise(resolve => {
|
|
205
|
-
const req = http.get(`http://127.0.0.1:${CDP_PORT}/json`, res => {
|
|
206
|
-
res.on('data', () => {});
|
|
207
|
-
res.on('end', () => resolve(res.statusCode === 200));
|
|
208
|
-
});
|
|
209
|
-
req.on('error', () => resolve(false));
|
|
210
|
-
req.setTimeout(3000, () => { resolve(false); req.destroy(); });
|
|
211
|
-
});
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
/**
|
|
215
|
-
* 检查 Edge 主进程是否带有 --user-data-dir 参数
|
|
216
|
-
*/
|
|
217
|
-
function checkEdgeArgs() {
|
|
218
|
-
return new Promise(resolve => {
|
|
219
|
-
const platform = os.platform();
|
|
220
|
-
let command;
|
|
221
|
-
if (platform === 'darwin') {
|
|
222
|
-
command = 'ps aux | grep "[M]icrosoft Edge" | grep -v "Helper\\|crashpad" | grep "user-data-dir"';
|
|
223
|
-
} else if (platform === 'win32') {
|
|
224
|
-
command = 'wmic process where "name like \"%msedge%\"" get commandline | findstr "user-data-dir"';
|
|
225
|
-
} else {
|
|
226
|
-
command = 'ps aux | grep "[m]sedge" | grep -v "Helper\\|crashpad" | grep "user-data-dir"';
|
|
227
|
-
}
|
|
228
|
-
exec(command, (err, stdout) => {
|
|
229
|
-
resolve(!err && stdout.trim().length > 0);
|
|
230
|
-
});
|
|
231
|
-
});
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
/**
|
|
235
|
-
* 获取 Edge 浏览器可执行文件路径
|
|
236
|
-
*/
|
|
237
|
-
function getEdgePath() {
|
|
238
|
-
const platform = os.platform();
|
|
239
|
-
if (platform === 'darwin') {
|
|
240
|
-
return '"Microsoft Edge"';
|
|
241
|
-
}
|
|
242
|
-
if (platform === 'win32') {
|
|
243
|
-
return 'msedge.exe';
|
|
244
|
-
}
|
|
245
|
-
return 'msedge';
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
/**
|
|
249
|
-
* 杀掉已运行的 Edge 浏览器进程
|
|
250
|
-
*/
|
|
251
|
-
function killEdgeProcesses() {
|
|
252
|
-
return new Promise((resolve, reject) => {
|
|
253
|
-
const platform = os.platform();
|
|
254
|
-
let command;
|
|
255
|
-
|
|
256
|
-
if (platform === 'darwin') {
|
|
257
|
-
command = 'killall -9 "Microsoft Edge" 2>/dev/null; rm -f ~/Library/Caches/Microsoft\\ Edge/Singleton*; true';
|
|
258
|
-
} else if (platform === 'win32') {
|
|
259
|
-
command = 'taskkill /F /IM msedge.exe 2>nul || exit 0';
|
|
260
|
-
} else {
|
|
261
|
-
command = 'pkill -9 -f msedge 2>/dev/null; true';
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
exec(command, () => {
|
|
265
|
-
resolve();
|
|
266
|
-
});
|
|
267
|
-
});
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
/**
|
|
271
|
-
* 启动 Edge 浏览器并启用 CDP 调试端口
|
|
272
|
-
*/
|
|
273
|
-
function launchEdgeWithCDP() {
|
|
274
|
-
return new Promise((resolve, reject) => {
|
|
275
|
-
const platform = os.platform();
|
|
276
|
-
const edgePath = getEdgePath();
|
|
277
|
-
let command;
|
|
278
|
-
|
|
279
|
-
const userDataDir = path.join(os.homedir(), 'Library', 'Application Support', 'Microsoft Edge For Testing');
|
|
280
|
-
if (platform === 'darwin') {
|
|
281
|
-
command = `open -a ${edgePath} --args --remote-debugging-port=${CDP_PORT} --user-data-dir="${userDataDir}"`;
|
|
282
|
-
} else if (platform === 'win32') {
|
|
283
|
-
command = `start msedge --remote-debugging-port=${CDP_PORT} --user-data-dir="${userDataDir}"`;
|
|
284
|
-
} else {
|
|
285
|
-
command = `msedge --remote-debugging-port=${CDP_PORT} --user-data-dir="${userDataDir}" &`;
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
exec(command, (err, stdout, stderr) => {
|
|
289
|
-
if (err) {
|
|
290
|
-
reject(new Error(`启动 Edge 浏览器失败: ${err.message}`));
|
|
291
|
-
return;
|
|
292
|
-
}
|
|
293
|
-
resolve();
|
|
294
|
-
});
|
|
295
|
-
});
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
/**
|
|
299
|
-
* 等待 CDP 端口就绪,最多等待 timeout 毫秒
|
|
300
|
-
*/
|
|
301
|
-
async function waitForCDP(timeout = 30000, interval = 1000) {
|
|
302
|
-
const start = Date.now();
|
|
303
|
-
while (Date.now() - start < timeout) {
|
|
304
|
-
const ready = await checkCDPPort();
|
|
305
|
-
if (ready) return true;
|
|
306
|
-
await new Promise(r => setTimeout(r, interval));
|
|
307
|
-
}
|
|
308
|
-
return false;
|
|
309
|
-
}
|
|
310
|
-
|
|
311
|
-
/**
|
|
312
|
-
* 确保浏览器已启动且 CDP 端口可用。
|
|
313
|
-
* 如果端口未就绪,先杀掉已运行的 Edge,再以调试模式重启。
|
|
314
|
-
* 返回 playwright 的 browser 实例。
|
|
315
|
-
*/
|
|
316
|
-
async function ensureBrowserReady() {
|
|
317
|
-
const isReady = await checkCDPPort();
|
|
318
|
-
let needLaunch = !isReady;
|
|
319
|
-
|
|
320
|
-
if (!needLaunch) {
|
|
321
|
-
const edgeArgsValid = await checkEdgeArgs();
|
|
322
|
-
if (!edgeArgsValid) {
|
|
323
|
-
console.error(`Edge 已运行但启动参数不完整,正在重启...`);
|
|
324
|
-
await killEdgeProcesses();
|
|
325
|
-
await new Promise(r => setTimeout(r, 3000));
|
|
326
|
-
needLaunch = true;
|
|
327
|
-
}
|
|
328
|
-
}
|
|
329
|
-
|
|
330
|
-
if (needLaunch) {
|
|
331
|
-
const edgeRunning = await isEdgeRunning();
|
|
332
|
-
if (edgeRunning) {
|
|
333
|
-
console.error(`Edge 已运行但 CDP 端口 ${CDP_PORT} 未启用,正在重启...`);
|
|
334
|
-
await killEdgeProcesses();
|
|
335
|
-
await new Promise(r => setTimeout(r, 3000));
|
|
336
|
-
} else {
|
|
337
|
-
console.error(`CDP 端口 ${CDP_PORT} 未就绪,正在启动 Edge 浏览器...`);
|
|
338
|
-
}
|
|
339
|
-
await launchEdgeWithCDP();
|
|
340
|
-
|
|
341
|
-
console.error('等待浏览器启动...');
|
|
342
|
-
const launched = await waitForCDP();
|
|
343
|
-
if (!launched) {
|
|
344
|
-
throw new Error(
|
|
345
|
-
`等待 CDP 端口 ${CDP_PORT} 超时。请确认 Edge 浏览器已安装,\n` +
|
|
346
|
-
'或手动启动: Microsoft Edge --remote-debugging-port=9222'
|
|
347
|
-
);
|
|
348
|
-
}
|
|
349
|
-
console.error('浏览器启动成功');
|
|
350
|
-
}
|
|
351
|
-
|
|
352
|
-
const browser = await chromium.connectOverCDP(`http://127.0.0.1:${CDP_PORT}`);
|
|
353
|
-
return browser;
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
/**
|
|
357
|
-
* 检查 Edge 浏览器是否正在运行
|
|
358
|
-
*/
|
|
359
|
-
function isEdgeRunning() {
|
|
360
|
-
return new Promise(resolve => {
|
|
361
|
-
const platform = os.platform();
|
|
362
|
-
let command;
|
|
363
|
-
if (platform === 'darwin') {
|
|
364
|
-
command = 'ps aux | grep -q "[M]icrosoft Edge.app/Contents/MacOS/Microsoft Edge" 2>/dev/null';
|
|
365
|
-
} else if (platform === 'win32') {
|
|
366
|
-
command = 'tasklist /FI "IMAGENAME eq msedge.exe" 2>nul | findstr /I msedge';
|
|
367
|
-
} else {
|
|
368
|
-
command = 'pgrep -f msedge > /dev/null 2>&1';
|
|
369
|
-
}
|
|
370
|
-
exec(command, (err) => {
|
|
371
|
-
resolve(!err);
|
|
372
|
-
});
|
|
373
|
-
});
|
|
374
|
-
}
|
|
375
|
-
|
|
376
|
-
/**
|
|
377
|
-
* 在已连接的 browser 中找到或打开 TikTok 页面
|
|
378
|
-
*/
|
|
379
|
-
async function ensureTikTokPage(browser, url) {
|
|
380
|
-
const contexts = browser.contexts();
|
|
381
|
-
let page = null;
|
|
382
|
-
|
|
383
|
-
for (const ctx of contexts) {
|
|
384
|
-
for (const p of ctx.pages()) {
|
|
385
|
-
if (p.url().includes('tiktok.com')) {
|
|
386
|
-
page = p;
|
|
387
|
-
break;
|
|
388
|
-
}
|
|
389
|
-
}
|
|
390
|
-
if (page) break;
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
if (!page) {
|
|
394
|
-
console.error('未找到 TikTok 页面,正在打开...');
|
|
395
|
-
const defaultCtx = browser.contexts()[0];
|
|
396
|
-
page = await defaultCtx.newPage();
|
|
397
|
-
await retryWithBackoff(() => page.goto(url, { waitUntil: 'load', timeout: 30000 }));
|
|
398
|
-
await delay(Math.round(delayConfig.switchMax * 0.5), delayConfig.switchMax);
|
|
399
|
-
console.error('TikTok 页面已打开');
|
|
400
|
-
}
|
|
401
|
-
|
|
402
|
-
return page;
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
module.exports = {
|
|
406
|
-
delay,
|
|
407
|
-
openCommentPanel,
|
|
408
|
-
closeCommentPanel,
|
|
409
|
-
getVideoAuthor,
|
|
410
|
-
swipeNextVideo,
|
|
411
|
-
extractUserSection,
|
|
412
|
-
parseUserSection,
|
|
413
|
-
extractLocationCreated,
|
|
414
|
-
ensureBrowserReady,
|
|
415
|
-
ensureTikTokPage,
|
|
416
|
-
isEdgeRunning,
|
|
417
|
-
setDelayConfig,
|
|
418
|
-
getDelayConfig,
|
|
419
|
-
listDelayPresets,
|
|
420
|
-
retryWithBackoff,
|
|
421
|
-
isRetryableError,
|
|
422
|
-
};
|
|
1
|
+
const { chromium } = require('playwright');
|
|
2
|
+
const { exec } = require('child_process');
|
|
3
|
+
const http = require('http');
|
|
4
|
+
const os = require('os');
|
|
5
|
+
const path = require('path');
|
|
6
|
+
|
|
7
|
+
const USER_SECTION_SIZE = 12000;
|
|
8
|
+
const CDP_PORT = 9222;
|
|
9
|
+
|
|
10
|
+
// --- Retry utilities ---
|
|
11
|
+
|
|
12
|
+
const RETRYABLE_PATTERNS = [
|
|
13
|
+
'interrupted',
|
|
14
|
+
'Navigation.*interrupted',
|
|
15
|
+
'net::',
|
|
16
|
+
'ECONN',
|
|
17
|
+
'ETIMEDOUT',
|
|
18
|
+
'ENOTFOUND',
|
|
19
|
+
'EAI_AGAIN',
|
|
20
|
+
'ESOCKETRESET',
|
|
21
|
+
'connection.*refused',
|
|
22
|
+
'connection.*reset',
|
|
23
|
+
'failed.*navigate',
|
|
24
|
+
'target.*closed',
|
|
25
|
+
'crash',
|
|
26
|
+
];
|
|
27
|
+
|
|
28
|
+
function isRetryableError(error) {
|
|
29
|
+
if (!error) return false;
|
|
30
|
+
const msg = (error.message || error.toString() || '').toLowerCase();
|
|
31
|
+
return RETRYABLE_PATTERNS.some(p => new RegExp(p, 'i').test(msg));
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
async function retryWithBackoff(fn, { maxRetries = 3, baseDelay = 3000, log } = {}) {
|
|
35
|
+
let lastError;
|
|
36
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
37
|
+
try {
|
|
38
|
+
return await fn();
|
|
39
|
+
} catch (error) {
|
|
40
|
+
lastError = error;
|
|
41
|
+
if (attempt >= maxRetries || !isRetryableError(error)) {
|
|
42
|
+
throw error;
|
|
43
|
+
}
|
|
44
|
+
const jitter = Math.random() * 2000;
|
|
45
|
+
const waitTime = baseDelay * Math.pow(2, attempt) + jitter;
|
|
46
|
+
if (log) {
|
|
47
|
+
log(` [重试] ${attempt + 1}/${maxRetries},${Math.round(waitTime / 1000)}s 后重试...`);
|
|
48
|
+
}
|
|
49
|
+
await delay(Math.round(waitTime), Math.round(waitTime));
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
throw lastError;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// 预设延迟配置(毫秒)
|
|
56
|
+
const DELAY_PRESETS = {
|
|
57
|
+
fast: { switchMax: 300, commentMax: 200, fast: true },
|
|
58
|
+
normal: { switchMax: 1500, commentMax: 800 },
|
|
59
|
+
slow: { switchMax: 3000, commentMax: 2000 },
|
|
60
|
+
stealth: { switchMax: 5000, commentMax: 3500 },
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
// 当前延迟配置
|
|
64
|
+
const delayConfig = {
|
|
65
|
+
switchMax: 2500,
|
|
66
|
+
commentMax: 1500,
|
|
67
|
+
fast: false,
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
function setDelayConfig(config) {
|
|
71
|
+
if (typeof config === 'string') {
|
|
72
|
+
const preset = DELAY_PRESETS[config.toLowerCase()];
|
|
73
|
+
if (!preset) {
|
|
74
|
+
throw new Error(
|
|
75
|
+
`未知的延迟预设: ${config}\n` +
|
|
76
|
+
`可用预设: ${Object.keys(DELAY_PRESETS).join(', ')}`
|
|
77
|
+
);
|
|
78
|
+
}
|
|
79
|
+
delayConfig.switchMax = preset.switchMax;
|
|
80
|
+
delayConfig.commentMax = preset.commentMax;
|
|
81
|
+
delayConfig.fast = preset.fast || false;
|
|
82
|
+
} else if (typeof config === 'object') {
|
|
83
|
+
if (config.switchMax) delayConfig.switchMax = config.switchMax;
|
|
84
|
+
if (config.commentMax) delayConfig.commentMax = config.commentMax;
|
|
85
|
+
delayConfig.fast = config.fast || false;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function getDelayConfig() {
|
|
90
|
+
return { ...delayConfig };
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function listDelayPresets() {
|
|
94
|
+
return DELAY_PRESETS;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function delay(min, max) {
|
|
98
|
+
const lo = Math.min(min, max);
|
|
99
|
+
const hi = Math.max(min, max);
|
|
100
|
+
let ms;
|
|
101
|
+
if (delayConfig.fast) {
|
|
102
|
+
ms = 0;
|
|
103
|
+
} else {
|
|
104
|
+
ms = Math.floor(Math.random() * (hi - lo + 1)) + lo;
|
|
105
|
+
}
|
|
106
|
+
return new Promise(r => setTimeout(r, ms));
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
async function openCommentPanel(page) {
|
|
110
|
+
await page.evaluate(() => {
|
|
111
|
+
const tabs = document.querySelectorAll('[class*="tabbar-item"]');
|
|
112
|
+
for (const tab of tabs) {
|
|
113
|
+
if (tab.textContent?.includes('评论')) {
|
|
114
|
+
tab.click();
|
|
115
|
+
break;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
});
|
|
119
|
+
const c = delayConfig.commentMax;
|
|
120
|
+
await delay(Math.round(c * 0.5), c);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
async function closeCommentPanel(page) {
|
|
124
|
+
await page.evaluate(() => {
|
|
125
|
+
const rightPanel = document.querySelector('[class*="RightPanelContainer"]');
|
|
126
|
+
if (rightPanel) {
|
|
127
|
+
const tabContainer = rightPanel.querySelector('[class*="TabContainer"]');
|
|
128
|
+
if (tabContainer) {
|
|
129
|
+
const closeOverlay = tabContainer.querySelector('div:last-child');
|
|
130
|
+
if (closeOverlay) closeOverlay.click();
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
async function getVideoAuthor(page) {
|
|
137
|
+
return await page.evaluate(() => {
|
|
138
|
+
const m = window.location.href.match(/@([^/]+)\/video/);
|
|
139
|
+
return m ? '@' + m[1] : null;
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
async function swipeNextVideo(page) {
|
|
144
|
+
await page.evaluate(() => {
|
|
145
|
+
const container = document.querySelector('[class*="ColumnListContainer"]');
|
|
146
|
+
if (container) {
|
|
147
|
+
container.scrollTop += 700;
|
|
148
|
+
}
|
|
149
|
+
});
|
|
150
|
+
const s = delayConfig.switchMax;
|
|
151
|
+
await delay(Math.round(s * 0.5), s);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
function extractUserSection(html) {
|
|
155
|
+
const idx = html.indexOf('"uniqueId"');
|
|
156
|
+
if (idx < 0) return null;
|
|
157
|
+
return html.substring(idx, idx + USER_SECTION_SIZE);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
function parseUserSection(section) {
|
|
161
|
+
const data = {};
|
|
162
|
+
|
|
163
|
+
for (const key of ['uniqueId', 'uid', 'secUid']) {
|
|
164
|
+
const m = section.match(new RegExp(`"${key}":"([^"]*)`));
|
|
165
|
+
if (m) data[key] = m[1];
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
for (const key of ['nickname', 'signature']) {
|
|
169
|
+
const m = section.match(new RegExp(`"${key}":"((?:[^"\\\\]|\\\\.)*)"`, 'g'));
|
|
170
|
+
if (m) {
|
|
171
|
+
const raw = m[0].replace(`"${key}":"`, '').replace(/"$/, '');
|
|
172
|
+
data[key] = raw.replace(/\\n/g, '\n').replace(/\\\\/g, '\\');
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
for (const key of ['ttSeller', 'verified']) {
|
|
177
|
+
const m = section.match(new RegExp(`"${key}":\\s*(true|false)`));
|
|
178
|
+
data[key] = m ? m[1] === 'true' : undefined;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
for (const key of ['followerCount', 'followingCount', 'heartCount', 'videoCount', 'diggCount']) {
|
|
182
|
+
const m = section.match(new RegExp(`"${key}":(\\d+)`));
|
|
183
|
+
if (m) data[key] = parseInt(m[1], 10);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
const mt = section.match(/"createTime":(\d+)/);
|
|
187
|
+
if (mt) data.createTime = parseInt(mt[1], 10);
|
|
188
|
+
|
|
189
|
+
const ma = section.match(/"avatarLarger":"([^"]*)/);
|
|
190
|
+
if (ma) data.avatarLarger = ma[1].replace(/\\u002F/g, '/');
|
|
191
|
+
|
|
192
|
+
return data;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function extractLocationCreated(html) {
|
|
196
|
+
const m = html.match(/"locationCreated":"([^"]*)/);
|
|
197
|
+
return m ? m[1] : null;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* 检查 CDP 端口是否已就绪
|
|
202
|
+
*/
|
|
203
|
+
function checkCDPPort() {
|
|
204
|
+
return new Promise(resolve => {
|
|
205
|
+
const req = http.get(`http://127.0.0.1:${CDP_PORT}/json`, res => {
|
|
206
|
+
res.on('data', () => {});
|
|
207
|
+
res.on('end', () => resolve(res.statusCode === 200));
|
|
208
|
+
});
|
|
209
|
+
req.on('error', () => resolve(false));
|
|
210
|
+
req.setTimeout(3000, () => { resolve(false); req.destroy(); });
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* 检查 Edge 主进程是否带有 --user-data-dir 参数
|
|
216
|
+
*/
|
|
217
|
+
function checkEdgeArgs() {
|
|
218
|
+
return new Promise(resolve => {
|
|
219
|
+
const platform = os.platform();
|
|
220
|
+
let command;
|
|
221
|
+
if (platform === 'darwin') {
|
|
222
|
+
command = 'ps aux | grep "[M]icrosoft Edge" | grep -v "Helper\\|crashpad" | grep "user-data-dir"';
|
|
223
|
+
} else if (platform === 'win32') {
|
|
224
|
+
command = 'wmic process where "name like \"%msedge%\"" get commandline | findstr "user-data-dir"';
|
|
225
|
+
} else {
|
|
226
|
+
command = 'ps aux | grep "[m]sedge" | grep -v "Helper\\|crashpad" | grep "user-data-dir"';
|
|
227
|
+
}
|
|
228
|
+
exec(command, (err, stdout) => {
|
|
229
|
+
resolve(!err && stdout.trim().length > 0);
|
|
230
|
+
});
|
|
231
|
+
});
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* 获取 Edge 浏览器可执行文件路径
|
|
236
|
+
*/
|
|
237
|
+
function getEdgePath() {
|
|
238
|
+
const platform = os.platform();
|
|
239
|
+
if (platform === 'darwin') {
|
|
240
|
+
return '"Microsoft Edge"';
|
|
241
|
+
}
|
|
242
|
+
if (platform === 'win32') {
|
|
243
|
+
return 'msedge.exe';
|
|
244
|
+
}
|
|
245
|
+
return 'msedge';
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* 杀掉已运行的 Edge 浏览器进程
|
|
250
|
+
*/
|
|
251
|
+
function killEdgeProcesses() {
|
|
252
|
+
return new Promise((resolve, reject) => {
|
|
253
|
+
const platform = os.platform();
|
|
254
|
+
let command;
|
|
255
|
+
|
|
256
|
+
if (platform === 'darwin') {
|
|
257
|
+
command = 'killall -9 "Microsoft Edge" 2>/dev/null; rm -f ~/Library/Caches/Microsoft\\ Edge/Singleton*; true';
|
|
258
|
+
} else if (platform === 'win32') {
|
|
259
|
+
command = 'taskkill /F /IM msedge.exe 2>nul || exit 0';
|
|
260
|
+
} else {
|
|
261
|
+
command = 'pkill -9 -f msedge 2>/dev/null; true';
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
exec(command, () => {
|
|
265
|
+
resolve();
|
|
266
|
+
});
|
|
267
|
+
});
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* 启动 Edge 浏览器并启用 CDP 调试端口
|
|
272
|
+
*/
|
|
273
|
+
function launchEdgeWithCDP() {
|
|
274
|
+
return new Promise((resolve, reject) => {
|
|
275
|
+
const platform = os.platform();
|
|
276
|
+
const edgePath = getEdgePath();
|
|
277
|
+
let command;
|
|
278
|
+
|
|
279
|
+
const userDataDir = path.join(os.homedir(), 'Library', 'Application Support', 'Microsoft Edge For Testing');
|
|
280
|
+
if (platform === 'darwin') {
|
|
281
|
+
command = `open -a ${edgePath} --args --remote-debugging-port=${CDP_PORT} --user-data-dir="${userDataDir}"`;
|
|
282
|
+
} else if (platform === 'win32') {
|
|
283
|
+
command = `start msedge --remote-debugging-port=${CDP_PORT} --user-data-dir="${userDataDir}"`;
|
|
284
|
+
} else {
|
|
285
|
+
command = `msedge --remote-debugging-port=${CDP_PORT} --user-data-dir="${userDataDir}" &`;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
exec(command, (err, stdout, stderr) => {
|
|
289
|
+
if (err) {
|
|
290
|
+
reject(new Error(`启动 Edge 浏览器失败: ${err.message}`));
|
|
291
|
+
return;
|
|
292
|
+
}
|
|
293
|
+
resolve();
|
|
294
|
+
});
|
|
295
|
+
});
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
/**
|
|
299
|
+
* 等待 CDP 端口就绪,最多等待 timeout 毫秒
|
|
300
|
+
*/
|
|
301
|
+
async function waitForCDP(timeout = 30000, interval = 1000) {
|
|
302
|
+
const start = Date.now();
|
|
303
|
+
while (Date.now() - start < timeout) {
|
|
304
|
+
const ready = await checkCDPPort();
|
|
305
|
+
if (ready) return true;
|
|
306
|
+
await new Promise(r => setTimeout(r, interval));
|
|
307
|
+
}
|
|
308
|
+
return false;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* 确保浏览器已启动且 CDP 端口可用。
|
|
313
|
+
* 如果端口未就绪,先杀掉已运行的 Edge,再以调试模式重启。
|
|
314
|
+
* 返回 playwright 的 browser 实例。
|
|
315
|
+
*/
|
|
316
|
+
async function ensureBrowserReady() {
|
|
317
|
+
const isReady = await checkCDPPort();
|
|
318
|
+
let needLaunch = !isReady;
|
|
319
|
+
|
|
320
|
+
if (!needLaunch) {
|
|
321
|
+
const edgeArgsValid = await checkEdgeArgs();
|
|
322
|
+
if (!edgeArgsValid) {
|
|
323
|
+
console.error(`Edge 已运行但启动参数不完整,正在重启...`);
|
|
324
|
+
await killEdgeProcesses();
|
|
325
|
+
await new Promise(r => setTimeout(r, 3000));
|
|
326
|
+
needLaunch = true;
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
if (needLaunch) {
|
|
331
|
+
const edgeRunning = await isEdgeRunning();
|
|
332
|
+
if (edgeRunning) {
|
|
333
|
+
console.error(`Edge 已运行但 CDP 端口 ${CDP_PORT} 未启用,正在重启...`);
|
|
334
|
+
await killEdgeProcesses();
|
|
335
|
+
await new Promise(r => setTimeout(r, 3000));
|
|
336
|
+
} else {
|
|
337
|
+
console.error(`CDP 端口 ${CDP_PORT} 未就绪,正在启动 Edge 浏览器...`);
|
|
338
|
+
}
|
|
339
|
+
await launchEdgeWithCDP();
|
|
340
|
+
|
|
341
|
+
console.error('等待浏览器启动...');
|
|
342
|
+
const launched = await waitForCDP();
|
|
343
|
+
if (!launched) {
|
|
344
|
+
throw new Error(
|
|
345
|
+
`等待 CDP 端口 ${CDP_PORT} 超时。请确认 Edge 浏览器已安装,\n` +
|
|
346
|
+
'或手动启动: Microsoft Edge --remote-debugging-port=9222'
|
|
347
|
+
);
|
|
348
|
+
}
|
|
349
|
+
console.error('浏览器启动成功');
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
const browser = await chromium.connectOverCDP(`http://127.0.0.1:${CDP_PORT}`);
|
|
353
|
+
return browser;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
/**
|
|
357
|
+
* 检查 Edge 浏览器是否正在运行
|
|
358
|
+
*/
|
|
359
|
+
function isEdgeRunning() {
|
|
360
|
+
return new Promise(resolve => {
|
|
361
|
+
const platform = os.platform();
|
|
362
|
+
let command;
|
|
363
|
+
if (platform === 'darwin') {
|
|
364
|
+
command = 'ps aux | grep -q "[M]icrosoft Edge.app/Contents/MacOS/Microsoft Edge" 2>/dev/null';
|
|
365
|
+
} else if (platform === 'win32') {
|
|
366
|
+
command = 'tasklist /FI "IMAGENAME eq msedge.exe" 2>nul | findstr /I msedge';
|
|
367
|
+
} else {
|
|
368
|
+
command = 'pgrep -f msedge > /dev/null 2>&1';
|
|
369
|
+
}
|
|
370
|
+
exec(command, (err) => {
|
|
371
|
+
resolve(!err);
|
|
372
|
+
});
|
|
373
|
+
});
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
/**
|
|
377
|
+
* 在已连接的 browser 中找到或打开 TikTok 页面
|
|
378
|
+
*/
|
|
379
|
+
async function ensureTikTokPage(browser, url) {
|
|
380
|
+
const contexts = browser.contexts();
|
|
381
|
+
let page = null;
|
|
382
|
+
|
|
383
|
+
for (const ctx of contexts) {
|
|
384
|
+
for (const p of ctx.pages()) {
|
|
385
|
+
if (p.url().includes('tiktok.com')) {
|
|
386
|
+
page = p;
|
|
387
|
+
break;
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
if (page) break;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
if (!page) {
|
|
394
|
+
console.error('未找到 TikTok 页面,正在打开...');
|
|
395
|
+
const defaultCtx = browser.contexts()[0];
|
|
396
|
+
page = await defaultCtx.newPage();
|
|
397
|
+
await retryWithBackoff(() => page.goto(url, { waitUntil: 'load', timeout: 30000 }));
|
|
398
|
+
await delay(Math.round(delayConfig.switchMax * 0.5), delayConfig.switchMax);
|
|
399
|
+
console.error('TikTok 页面已打开');
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
return page;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
module.exports = {
|
|
406
|
+
delay,
|
|
407
|
+
openCommentPanel,
|
|
408
|
+
closeCommentPanel,
|
|
409
|
+
getVideoAuthor,
|
|
410
|
+
swipeNextVideo,
|
|
411
|
+
extractUserSection,
|
|
412
|
+
parseUserSection,
|
|
413
|
+
extractLocationCreated,
|
|
414
|
+
ensureBrowserReady,
|
|
415
|
+
ensureTikTokPage,
|
|
416
|
+
isEdgeRunning,
|
|
417
|
+
setDelayConfig,
|
|
418
|
+
getDelayConfig,
|
|
419
|
+
listDelayPresets,
|
|
420
|
+
retryWithBackoff,
|
|
421
|
+
isRetryableError,
|
|
422
|
+
};
|