tt-help-cli-ycl 1.0.6 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/auto-core.cjs +288 -0
- package/src/data-store.cjs +65 -0
- package/src/get-user-videos-core.cjs +165 -0
- package/src/get-user-videos.cjs +59 -0
- package/src/lib/args.js +227 -1
- package/src/lib/auto-browser.mjs +11 -0
- package/src/lib/constants.js +46 -0
- package/src/lib/explore.js +27 -1
- package/src/lib/fetcher.js +20 -10
- package/src/lib/get-user-videos-browser.mjs +6 -0
- package/src/lib/scrape-browser.mjs +6 -0
- package/src/main.mjs +303 -1
- package/src/results/user-videos-bar.lar.lar.moeta.json +37 -0
- package/src/scraper/core.cjs +192 -0
- package/src/scraper/index.cjs +93 -0
- package/src/scraper/modules/comment-extractor.cjs +122 -0
- package/src/scraper/modules/page-helpers.cjs +422 -0
- package/src/scraper/modules/video-scanner.cjs +43 -0
- package/src/watch/public/index.html +266 -0
- package/src/watch/server.mjs +145 -0
package/src/main.mjs
CHANGED
|
@@ -6,6 +6,7 @@ import { deduplicate, formatOutput } from './lib/output.js';
|
|
|
6
6
|
import { parseFilter, applyFilter, formatFilterDescription } from './lib/filter.js';
|
|
7
7
|
import { createProgressBar, calculateConcurrency, createMultiProgressBars, renderMultiProgressBars, clearProgressBars } from './lib/io.js';
|
|
8
8
|
import { writeFileSync, readFileSync, existsSync } from 'fs';
|
|
9
|
+
import { startWatchServer, openBrowser } from './watch/server.mjs';
|
|
9
10
|
|
|
10
11
|
function showConfig(urls, outputFile) {
|
|
11
12
|
const lines = [...CONFIG_TEXT];
|
|
@@ -313,8 +314,309 @@ async function runScrape(urls, proxyUrl, outputFile, outputFormat, filter) {
|
|
|
313
314
|
}
|
|
314
315
|
}
|
|
315
316
|
|
|
317
|
+
async function handleScrape(options) {
|
|
318
|
+
const { scrapeUrl, scrapePreset, scrapeMaxVideos, scrapeMaxComments, scrapeSwitchDelay, scrapeCommentDelay, outputFile } = options;
|
|
319
|
+
|
|
320
|
+
if (!scrapeUrl) {
|
|
321
|
+
console.error('用法: tt-help scrape <视频URL> [preset] [最大视频数] [最大评论数] [-o 输出路径]');
|
|
322
|
+
console.error('预设: fast, normal, slow, stealth');
|
|
323
|
+
console.error('选项: -o, --output <路径> 输出到文件(默认输出到 stdout)');
|
|
324
|
+
console.error(' --switch-delay <ms> 视频切换延迟(毫秒)');
|
|
325
|
+
console.error(' --comment-delay <ms> 评论滚动延迟(毫秒)');
|
|
326
|
+
process.exit(1);
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
const { runScrape } = await import('./lib/scrape-browser.mjs');
|
|
330
|
+
|
|
331
|
+
let browser;
|
|
332
|
+
try {
|
|
333
|
+
const { output, browser: b } = await runScrape({
|
|
334
|
+
videoUrl: scrapeUrl,
|
|
335
|
+
maxVideos: scrapeMaxVideos,
|
|
336
|
+
maxComments: scrapeMaxComments,
|
|
337
|
+
preset: scrapePreset,
|
|
338
|
+
switchMax: scrapeSwitchDelay,
|
|
339
|
+
commentMax: scrapeCommentDelay,
|
|
340
|
+
log: console.error,
|
|
341
|
+
});
|
|
342
|
+
browser = b;
|
|
343
|
+
|
|
344
|
+
const json = JSON.stringify(output, null, 2);
|
|
345
|
+
if (outputFile) {
|
|
346
|
+
writeFileSync(outputFile, json, 'utf-8');
|
|
347
|
+
console.error(`结果已写入: ${outputFile}`);
|
|
348
|
+
} else {
|
|
349
|
+
process.stdout.write(json + '\n');
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
const stats = output.stats;
|
|
353
|
+
console.error(`\n共 ${stats.totalVideos} 个视频, ${stats.uniqueVideoAuthors} 个视频作者, ${stats.uniqueCommentAuthors} 个评论作者`);
|
|
354
|
+
} catch (err) {
|
|
355
|
+
console.error(`浏览器抓取失败: ${err.message}`);
|
|
356
|
+
process.exit(1);
|
|
357
|
+
} finally {
|
|
358
|
+
if (browser) await browser.close().catch(() => {});
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
async function handleWatch(options) {
|
|
363
|
+
const { outputFile, watchPort } = options;
|
|
364
|
+
|
|
365
|
+
if (!outputFile) {
|
|
366
|
+
console.error('用法: tt-help watch -o <数据文件> [-p 端口]');
|
|
367
|
+
console.error('示例: tt-help watch -o data.json');
|
|
368
|
+
console.error(' tt-help watch -o data.json -p 8080');
|
|
369
|
+
process.exit(1);
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
if (!existsSync(outputFile)) {
|
|
373
|
+
console.error(`文件不存在: ${outputFile}`);
|
|
374
|
+
process.exit(1);
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
const { server, port } = await startWatchServer(outputFile, watchPort);
|
|
378
|
+
openBrowser(port);
|
|
379
|
+
|
|
380
|
+
process.once('SIGINT', () => {
|
|
381
|
+
server.close();
|
|
382
|
+
process.exit(0);
|
|
383
|
+
});
|
|
384
|
+
|
|
385
|
+
console.error(`按 Ctrl+C 停止监控服务`);
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
async function handleAuto(options) {
|
|
389
|
+
const { autoUsernames, autoCollectMax, autoScrapeDepth, autoMaxComments, autoPreset, autoSwitchDelay, autoCommentDelay, outputFile, autoWatch, autoWatchPort } = options;
|
|
390
|
+
|
|
391
|
+
const runOptions = {
|
|
392
|
+
collectMax: autoCollectMax,
|
|
393
|
+
scrapeDepth: autoScrapeDepth,
|
|
394
|
+
maxComments: autoMaxComments,
|
|
395
|
+
preset: autoPreset,
|
|
396
|
+
switchMax: autoSwitchDelay,
|
|
397
|
+
commentMax: autoCommentDelay,
|
|
398
|
+
};
|
|
399
|
+
|
|
400
|
+
// 数据源
|
|
401
|
+
const { createRequire } = await import('module');
|
|
402
|
+
const require = createRequire(import.meta.url);
|
|
403
|
+
const { createStore } = require('./data-store.cjs');
|
|
404
|
+
const store = createStore(outputFile);
|
|
405
|
+
|
|
406
|
+
// 构建队列:命令行用户名插队到前面,文件中的未处理用户追加到后面
|
|
407
|
+
const queue = [...new Set(autoUsernames)];
|
|
408
|
+
const pendingFromStore = store.getPendingUsers().filter(u => !u.restricted);
|
|
409
|
+
pendingFromStore.forEach(u => {
|
|
410
|
+
if (!queue.includes(u.uniqueId)) {
|
|
411
|
+
queue.push(u.uniqueId);
|
|
412
|
+
}
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
if (queue.length === 0) {
|
|
416
|
+
console.error('没有待处理的用户');
|
|
417
|
+
return;
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
console.error(`队列: ${queue.length} 个用户待处理`);
|
|
421
|
+
if (autoUsernames.length > 0) {
|
|
422
|
+
console.error(` 命令行: @${autoUsernames.join(', @')}`);
|
|
423
|
+
}
|
|
424
|
+
if (pendingFromStore.length > 0) {
|
|
425
|
+
console.error(` 数据源: ${pendingFromStore.length} 个未处理用户`);
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// Watch server
|
|
429
|
+
let watchServer = null;
|
|
430
|
+
let watchPort = null;
|
|
431
|
+
if (autoWatch) {
|
|
432
|
+
if (!outputFile) {
|
|
433
|
+
console.error('--watch 需要指定 -o 输出文件');
|
|
434
|
+
process.exit(1);
|
|
435
|
+
}
|
|
436
|
+
({ server: watchServer, port: watchPort } = await startWatchServer(outputFile, autoWatchPort || 3000));
|
|
437
|
+
openBrowser(watchPort);
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// 启动浏览器
|
|
441
|
+
const { ensureBrowserReady, processUser } = await import('./lib/auto-browser.mjs');
|
|
442
|
+
|
|
443
|
+
const browser = await ensureBrowserReady();
|
|
444
|
+
|
|
445
|
+
try {
|
|
446
|
+
const contexts = browser.contexts();
|
|
447
|
+
let page = null;
|
|
448
|
+
for (const ctx of contexts) {
|
|
449
|
+
for (const p of ctx.pages()) {
|
|
450
|
+
if (p.url().includes('tiktok.com')) {
|
|
451
|
+
page = p;
|
|
452
|
+
break;
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
if (page) break;
|
|
456
|
+
}
|
|
457
|
+
if (!page) {
|
|
458
|
+
const defaultCtx = contexts[0] || await browser.newContext();
|
|
459
|
+
page = await defaultCtx.newPage();
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
let processedCount = 0;
|
|
463
|
+
let errorCount = 0;
|
|
464
|
+
|
|
465
|
+
for (let i = 0; i < queue.length; i++) {
|
|
466
|
+
const username = queue[i];
|
|
467
|
+
console.error(`\n[${i + 1}/${queue.length}] 处理 @${username}...`);
|
|
468
|
+
|
|
469
|
+
const result = await processUser(page, username, { ...runOptions, browser }, console.error);
|
|
470
|
+
|
|
471
|
+
if (result.restricted) {
|
|
472
|
+
store.addUser({
|
|
473
|
+
uniqueId: username,
|
|
474
|
+
restricted: true,
|
|
475
|
+
sources: ['restricted'],
|
|
476
|
+
});
|
|
477
|
+
store.save();
|
|
478
|
+
continue;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
if (result.error) {
|
|
482
|
+
errorCount++;
|
|
483
|
+
store.addUser({
|
|
484
|
+
uniqueId: username,
|
|
485
|
+
error: result.error,
|
|
486
|
+
sources: ['error'],
|
|
487
|
+
});
|
|
488
|
+
store.save();
|
|
489
|
+
continue;
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
// 写入用户信息(持续合并更新,不管是否已存在)
|
|
493
|
+
const userEntry = {
|
|
494
|
+
uniqueId: username,
|
|
495
|
+
...result.userInfo,
|
|
496
|
+
sources: ['processed'],
|
|
497
|
+
};
|
|
498
|
+
store.addUser(userEntry);
|
|
499
|
+
|
|
500
|
+
// 发现的视频作者(持续合并更新,不管是否已存在)
|
|
501
|
+
for (const va of result.discoveredVideoAuthors) {
|
|
502
|
+
store.addUser({
|
|
503
|
+
uniqueId: va.uniqueId,
|
|
504
|
+
nickname: va.nickname,
|
|
505
|
+
locationCreated: va.locationCreated,
|
|
506
|
+
sources: ['video'],
|
|
507
|
+
});
|
|
508
|
+
if (!store.getUser(va.uniqueId) || !store.getUser(va.uniqueId).followerCount) {
|
|
509
|
+
if (!queue.includes(va.uniqueId)) {
|
|
510
|
+
queue.push(va.uniqueId);
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
// 发现的评论作者
|
|
516
|
+
for (const ca of result.discoveredCommentAuthors) {
|
|
517
|
+
const caId = ca.replace(/^@/, '');
|
|
518
|
+
store.addUser({
|
|
519
|
+
uniqueId: caId,
|
|
520
|
+
sources: ['comment'],
|
|
521
|
+
});
|
|
522
|
+
if (!store.getUser(caId) || !store.getUser(caId).followerCount) {
|
|
523
|
+
if (!queue.includes(caId)) {
|
|
524
|
+
queue.push(caId);
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
processedCount++;
|
|
530
|
+
store.save();
|
|
531
|
+
console.error(` 已保存,当前共 ${store.getAllUsers().length} 个用户`);
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
const output = store.getAllUsers();
|
|
535
|
+
if (outputFile) {
|
|
536
|
+
console.error(`\n完成: ${processedCount} 个用户已处理, ${errorCount} 个出错, 共 ${output.length} 个用户`);
|
|
537
|
+
console.error(`数据已保存到: ${outputFile}`);
|
|
538
|
+
} else {
|
|
539
|
+
const json = JSON.stringify(output, null, 2);
|
|
540
|
+
process.stdout.write(json + '\n');
|
|
541
|
+
}
|
|
542
|
+
} catch (err) {
|
|
543
|
+
console.error(`自动抓取失败: ${err.message}`);
|
|
544
|
+
if (watchServer) watchServer.close();
|
|
545
|
+
process.exit(1);
|
|
546
|
+
} finally {
|
|
547
|
+
await browser.close().catch(() => {});
|
|
548
|
+
if (watchServer) {
|
|
549
|
+
watchServer.close();
|
|
550
|
+
console.error(`Watch 监控服务已停止: http://127.0.0.1:${watchPort}`);
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
async function handleVideos(options) {
|
|
556
|
+
const { videosUsername, videosMax, outputFile } = options;
|
|
557
|
+
|
|
558
|
+
if (!videosUsername) {
|
|
559
|
+
console.error('用法: tt-help videos <用户名> [最大视频数] [-o 输出路径]');
|
|
560
|
+
console.error('示例: tt-help videos bar.lar.lar.moeta 1000');
|
|
561
|
+
console.error(' tt-help videos username 50 -o videos.json');
|
|
562
|
+
console.error('');
|
|
563
|
+
console.error('选项: -o, --output <路径> 输出到文件(默认输出到 stdout)');
|
|
564
|
+
process.exit(1);
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
const { runGetUserVideos } = await import('./lib/get-user-videos-browser.mjs');
|
|
568
|
+
|
|
569
|
+
let browser;
|
|
570
|
+
try {
|
|
571
|
+
const { output, browser: b } = await runGetUserVideos({
|
|
572
|
+
username: videosUsername,
|
|
573
|
+
maxVideos: videosMax,
|
|
574
|
+
log: console.error,
|
|
575
|
+
});
|
|
576
|
+
browser = b;
|
|
577
|
+
|
|
578
|
+
const json = JSON.stringify(output, null, 2);
|
|
579
|
+
if (outputFile) {
|
|
580
|
+
writeFileSync(outputFile, json, 'utf-8');
|
|
581
|
+
console.error(`结果已写入: ${outputFile}`);
|
|
582
|
+
} else {
|
|
583
|
+
process.stdout.write(json + '\n');
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
const stats = output.videos.length;
|
|
587
|
+
console.error(`\n共 ${stats} 个视频, 用户: @${videosUsername}`);
|
|
588
|
+
} catch (err) {
|
|
589
|
+
console.error(`获取用户视频失败: ${err.message}`);
|
|
590
|
+
process.exit(1);
|
|
591
|
+
} finally {
|
|
592
|
+
if (browser) await browser.close().catch(() => {});
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
|
|
316
596
|
async function main() {
|
|
317
|
-
const
|
|
597
|
+
const parsed = parseArgs();
|
|
598
|
+
|
|
599
|
+
if (parsed.subcommand === 'scrape') {
|
|
600
|
+
await handleScrape(parsed);
|
|
601
|
+
return;
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
if (parsed.subcommand === 'videos') {
|
|
605
|
+
await handleVideos(parsed);
|
|
606
|
+
return;
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
if (parsed.subcommand === 'auto') {
|
|
610
|
+
await handleAuto(parsed);
|
|
611
|
+
return;
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
if (parsed.subcommand === 'watch') {
|
|
615
|
+
await handleWatch(parsed);
|
|
616
|
+
return;
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
const { urls, outputFile, outputFormat, exploreCount, showConfig: showCfg, showHelp, customProxy, configAction, configValue, pipeMode, filterStr } = parsed;
|
|
318
620
|
const proxyUrl = customProxy || proxy;
|
|
319
621
|
const filter = parseFilter(filterStr);
|
|
320
622
|
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
{
|
|
2
|
+
"user": {
|
|
3
|
+
"uniqueId": "bar.lar.lar.moeta",
|
|
4
|
+
"secUid": "MS4wLjABAAAA3cgKTWvKfga0JAWeakAzx3zQ-aFAC8RuQvxD4HQFraKKsc_TbOIyMo3_ofVlXofV",
|
|
5
|
+
"nickname": "Bar Lar Lar Moetain",
|
|
6
|
+
"ttSeller": false,
|
|
7
|
+
"verified": false,
|
|
8
|
+
"followerCount": 24000,
|
|
9
|
+
"videoCount": 749,
|
|
10
|
+
"followingCount": 4293,
|
|
11
|
+
"heartCount": 254300,
|
|
12
|
+
"signature": ""
|
|
13
|
+
},
|
|
14
|
+
"totalVideos": 5,
|
|
15
|
+
"videos": [
|
|
16
|
+
{
|
|
17
|
+
"id": "7638231799084158228",
|
|
18
|
+
"url": "https://www.tiktok.com/@bar.lar.lar.moeta/video/7638231799084158228"
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": "7638162444698914068",
|
|
22
|
+
"url": "https://www.tiktok.com/@bar.lar.lar.moeta/video/7638162444698914068"
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
"id": "7638116251767819541",
|
|
26
|
+
"url": "https://www.tiktok.com/@bar.lar.lar.moeta/video/7638116251767819541"
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"id": "7638069637321690388",
|
|
30
|
+
"url": "https://www.tiktok.com/@bar.lar.lar.moeta/video/7638069637321690388"
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"id": "7637927171025112341",
|
|
34
|
+
"url": "https://www.tiktok.com/@bar.lar.lar.moeta/video/7637927171025112341"
|
|
35
|
+
}
|
|
36
|
+
]
|
|
37
|
+
}
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
const {
|
|
2
|
+
closeCommentPanel,
|
|
3
|
+
delay,
|
|
4
|
+
ensureBrowserReady,
|
|
5
|
+
ensureTikTokPage,
|
|
6
|
+
setDelayConfig,
|
|
7
|
+
getDelayConfig,
|
|
8
|
+
retryWithBackoff,
|
|
9
|
+
} = require("./modules/page-helpers.cjs");
|
|
10
|
+
const { extractCommentAuthors } = require("./modules/comment-extractor.cjs");
|
|
11
|
+
|
|
12
|
+
async function scrapeSingleVideo(page, maxComments, log) {
|
|
13
|
+
const config = getDelayConfig();
|
|
14
|
+
|
|
15
|
+
await page
|
|
16
|
+
.waitForSelector('[class*="VideoMeta"]', { timeout: 10000 })
|
|
17
|
+
.catch(() => {});
|
|
18
|
+
await delay(Math.round(config.commentMax * 0.3), config.commentMax);
|
|
19
|
+
|
|
20
|
+
const userData = await page.evaluate(() => {
|
|
21
|
+
const result = {};
|
|
22
|
+
|
|
23
|
+
const m = window.location.href.match(/\/@([^\/]+)\/video/);
|
|
24
|
+
if (m) result.uniqueId = m[1];
|
|
25
|
+
|
|
26
|
+
const authorEls = document.querySelectorAll('[class*="Author"]');
|
|
27
|
+
for (const el of authorEls) {
|
|
28
|
+
const text = (el.textContent || "").trim();
|
|
29
|
+
if (text && !text.includes("TikTok") && !text.includes("Share")) {
|
|
30
|
+
result.nickname = text;
|
|
31
|
+
break;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const html = document.documentElement.outerHTML;
|
|
36
|
+
const locMatch = html.match(/"locationCreated":"([^"]*)/);
|
|
37
|
+
if (locMatch) result.locationCreated = locMatch[1];
|
|
38
|
+
|
|
39
|
+
return result;
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
const videoAuthor = userData.uniqueId ? "@" + userData.uniqueId : null;
|
|
43
|
+
if (!videoAuthor) {
|
|
44
|
+
throw new Error("无法获取视频作者");
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const commentUsers = await extractCommentAuthors(page, maxComments);
|
|
48
|
+
await closeCommentPanel(page);
|
|
49
|
+
await delay(Math.round(config.commentMax * 0.3), config.commentMax);
|
|
50
|
+
|
|
51
|
+
const uniqueUsers = [...new Set(commentUsers)];
|
|
52
|
+
|
|
53
|
+
return {
|
|
54
|
+
videoAuthor,
|
|
55
|
+
uniqueId: userData.uniqueId,
|
|
56
|
+
nickname: userData.nickname,
|
|
57
|
+
locationCreated: userData.locationCreated,
|
|
58
|
+
commentUsers: uniqueUsers,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async function runScrape(options) {
|
|
63
|
+
const {
|
|
64
|
+
videoUrl,
|
|
65
|
+
maxVideos = 20,
|
|
66
|
+
maxComments = 999,
|
|
67
|
+
preset = null,
|
|
68
|
+
switchMax = null,
|
|
69
|
+
commentMax = null,
|
|
70
|
+
log = console.error,
|
|
71
|
+
browser: externalBrowser = null,
|
|
72
|
+
page: externalPage = null,
|
|
73
|
+
} = options;
|
|
74
|
+
|
|
75
|
+
if (preset) {
|
|
76
|
+
setDelayConfig(preset);
|
|
77
|
+
} else if (switchMax || commentMax) {
|
|
78
|
+
setDelayConfig({
|
|
79
|
+
switchMax: switchMax || 5000,
|
|
80
|
+
commentMax: commentMax || 3000,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const config = getDelayConfig();
|
|
85
|
+
|
|
86
|
+
let browser, page;
|
|
87
|
+
const isExternal = !!(externalBrowser && externalPage);
|
|
88
|
+
|
|
89
|
+
if (!isExternal) {
|
|
90
|
+
log(`视频地址: ${videoUrl}`);
|
|
91
|
+
log(
|
|
92
|
+
`视频数: ${maxVideos}, 评论数: ${maxComments}, 切换延迟: ${config.switchMax}ms, 评论延迟: ${config.commentMax}ms`,
|
|
93
|
+
);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (isExternal) {
|
|
97
|
+
browser = externalBrowser;
|
|
98
|
+
page = externalPage;
|
|
99
|
+
} else {
|
|
100
|
+
browser = await ensureBrowserReady();
|
|
101
|
+
try {
|
|
102
|
+
page = await ensureTikTokPage(browser, videoUrl);
|
|
103
|
+
} catch (e) {
|
|
104
|
+
await browser.close().catch(() => {});
|
|
105
|
+
throw e;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
await retryWithBackoff(() => page.goto(videoUrl, { waitUntil: "load", timeout: 30000 }), { log });
|
|
110
|
+
await delay(Math.round(config.switchMax * 0.5), config.switchMax);
|
|
111
|
+
await closeCommentPanel(page);
|
|
112
|
+
await delay(Math.round(config.commentMax * 0.5), config.commentMax);
|
|
113
|
+
|
|
114
|
+
const allResults = [];
|
|
115
|
+
const videoAuthors = new Set();
|
|
116
|
+
const commentUsers = new Set();
|
|
117
|
+
const allCommentAuthorsList = [];
|
|
118
|
+
|
|
119
|
+
for (let i = 0; i < maxVideos; i++) {
|
|
120
|
+
await delay(Math.round(config.commentMax * 0.3), config.commentMax);
|
|
121
|
+
|
|
122
|
+
let result;
|
|
123
|
+
try {
|
|
124
|
+
result = await scrapeSingleVideo(page, maxComments, log);
|
|
125
|
+
} catch (e) {
|
|
126
|
+
log(`[${i + 1}/${maxVideos}] 跳过: ${e.message}`);
|
|
127
|
+
if (i < maxVideos - 1) {
|
|
128
|
+
await page.evaluate(() => {
|
|
129
|
+
const container = document.querySelector(
|
|
130
|
+
'[class*="ColumnListContainer"]',
|
|
131
|
+
);
|
|
132
|
+
if (container) container.scrollTop += 700;
|
|
133
|
+
else window.scrollBy(0, 700);
|
|
134
|
+
});
|
|
135
|
+
await delay(Math.round(config.switchMax * 0.5), config.switchMax);
|
|
136
|
+
}
|
|
137
|
+
continue;
|
|
138
|
+
}
|
|
139
|
+
allResults.push(result);
|
|
140
|
+
videoAuthors.add(result.videoAuthor);
|
|
141
|
+
result.commentUsers.forEach((u) => commentUsers.add(u));
|
|
142
|
+
allCommentAuthorsList.push(...result.commentUsers);
|
|
143
|
+
|
|
144
|
+
if ((i + 1) % 5 === 0 || i === 0) {
|
|
145
|
+
log(
|
|
146
|
+
`[${i + 1}/${maxVideos}] ${result.videoAuthor} | 昵称: ${result.nickname || "-"} | 评论用户: ${result.commentUsers.length}`,
|
|
147
|
+
);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if (i < maxVideos - 1) {
|
|
151
|
+
await page.evaluate(() => {
|
|
152
|
+
const container = document.querySelector(
|
|
153
|
+
'[class*="ColumnListContainer"]',
|
|
154
|
+
);
|
|
155
|
+
if (container) container.scrollTop += 700;
|
|
156
|
+
});
|
|
157
|
+
await delay(2000, config.switchMax);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
log(
|
|
162
|
+
`\n结果: 视频作者 ${videoAuthors.size} | 评论用户 ${commentUsers.size} | 总评论 ${allCommentAuthorsList.length}`,
|
|
163
|
+
);
|
|
164
|
+
|
|
165
|
+
const videoDetails = {};
|
|
166
|
+
for (const r of allResults) {
|
|
167
|
+
const key = r.videoAuthor;
|
|
168
|
+
if (!videoDetails[key]) {
|
|
169
|
+
videoDetails[key] = {
|
|
170
|
+
videoAuthor: r.videoAuthor,
|
|
171
|
+
uniqueId: r.uniqueId,
|
|
172
|
+
nickname: r.nickname,
|
|
173
|
+
locationCreated: r.locationCreated,
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const output = {
|
|
179
|
+
videoDetails: Object.values(videoDetails),
|
|
180
|
+
commentUsers: [...commentUsers].sort(),
|
|
181
|
+
allCommentAuthorsList,
|
|
182
|
+
stats: {
|
|
183
|
+
totalVideos: allResults.length,
|
|
184
|
+
uniqueVideoAuthors: videoAuthors.size,
|
|
185
|
+
uniqueCommentAuthors: commentUsers.size,
|
|
186
|
+
},
|
|
187
|
+
};
|
|
188
|
+
|
|
189
|
+
return { output, browser, isExternal };
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
module.exports = { scrapeSingleVideo, runScrape };
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
const { runScrape } = require("./core.cjs");
|
|
2
|
+
|
|
3
|
+
async function main() {
|
|
4
|
+
const rawArgs = process.argv.slice(2);
|
|
5
|
+
|
|
6
|
+
let outputPath = null;
|
|
7
|
+
const args = [];
|
|
8
|
+
for (let i = 0; i < rawArgs.length; i++) {
|
|
9
|
+
if (rawArgs[i] === "-o" || rawArgs[i] === "--output") {
|
|
10
|
+
outputPath = rawArgs[++i];
|
|
11
|
+
} else {
|
|
12
|
+
args.push(rawArgs[i]);
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
const videoUrl = args[0];
|
|
17
|
+
|
|
18
|
+
let preset = null;
|
|
19
|
+
let maxVideos = 20;
|
|
20
|
+
let maxComments = 999;
|
|
21
|
+
let switchMax = null;
|
|
22
|
+
let commentMax = null;
|
|
23
|
+
|
|
24
|
+
if (args[1]) {
|
|
25
|
+
if (["fast", "normal", "slow", "stealth"].includes(args[1].toLowerCase())) {
|
|
26
|
+
preset = args[1].toLowerCase();
|
|
27
|
+
maxVideos = parseInt(args[2]) || 20;
|
|
28
|
+
maxComments = parseInt(args[3]) || 999;
|
|
29
|
+
} else {
|
|
30
|
+
maxVideos = parseInt(args[1]) || 20;
|
|
31
|
+
maxComments = parseInt(args[2]) || 999;
|
|
32
|
+
switchMax = parseInt(args[3]) || null;
|
|
33
|
+
commentMax = parseInt(args[4]) || null;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
if (!videoUrl) {
|
|
38
|
+
console.error("用法:");
|
|
39
|
+
console.error(
|
|
40
|
+
" 预设模式: node index.cjs <视频URL> <preset> [最大视频数] [最大评论数] [-o 输出路径]",
|
|
41
|
+
);
|
|
42
|
+
console.error(
|
|
43
|
+
" 手动模式: node index.cjs <视频URL> [最大视频数] [最大评论数] [切换延迟ms] [评论延迟ms] [-o 输出路径]",
|
|
44
|
+
);
|
|
45
|
+
console.error(
|
|
46
|
+
"预设: fast(1s/0.8s), normal(2.5s/1.5s), slow(5s/3s), stealth(8s/5s)",
|
|
47
|
+
);
|
|
48
|
+
console.error(
|
|
49
|
+
"选项: -o, --output <路径> 输出到文件; 不指定则输出到 stdout",
|
|
50
|
+
);
|
|
51
|
+
process.exit(1);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
let browser;
|
|
55
|
+
try {
|
|
56
|
+
const { output, browser: b, isExternal } = await runScrape({
|
|
57
|
+
videoUrl,
|
|
58
|
+
maxVideos,
|
|
59
|
+
maxComments,
|
|
60
|
+
preset,
|
|
61
|
+
switchMax,
|
|
62
|
+
commentMax,
|
|
63
|
+
log: console.error,
|
|
64
|
+
});
|
|
65
|
+
if (!isExternal) {
|
|
66
|
+
browser = b;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const json = JSON.stringify(output, null, 2);
|
|
70
|
+
if (outputPath) {
|
|
71
|
+
const fs = require("fs");
|
|
72
|
+
const path = require("path");
|
|
73
|
+
const resultFile = path.isAbsolute(outputPath)
|
|
74
|
+
? outputPath
|
|
75
|
+
: path.resolve(outputPath);
|
|
76
|
+
fs.mkdirSync(path.dirname(resultFile), { recursive: true });
|
|
77
|
+
fs.writeFileSync(resultFile, json);
|
|
78
|
+
console.error(`已保存到 ${resultFile}`);
|
|
79
|
+
} else {
|
|
80
|
+
process.stdout.write(json + "\n");
|
|
81
|
+
}
|
|
82
|
+
} catch (err) {
|
|
83
|
+
console.error(err.message);
|
|
84
|
+
process.exit(1);
|
|
85
|
+
} finally {
|
|
86
|
+
if (browser) await browser.close().catch(() => {});
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
main().catch((err) => {
|
|
91
|
+
console.error(err.message);
|
|
92
|
+
process.exit(1);
|
|
93
|
+
});
|