tt-help-cli-ycl 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/package.json +1 -1
  2. package/src/auto-core.mjs +174 -0
  3. package/src/cli/auto.js +94 -0
  4. package/src/cli/explore.js +117 -0
  5. package/src/cli/progress.js +111 -0
  6. package/src/cli/scrape.js +47 -0
  7. package/src/cli/utils.js +18 -0
  8. package/src/cli/videos.js +41 -0
  9. package/src/cli/watch.js +28 -0
  10. package/src/data-store.mjs +213 -0
  11. package/src/{explore-core.cjs → explore-core.mjs} +148 -157
  12. package/src/{get-user-videos-core.cjs → get-user-videos-core.mjs} +6 -23
  13. package/src/lib/args.js +19 -38
  14. package/src/lib/auto-browser.mjs +5 -12
  15. package/src/lib/browser/anti-detect.js +23 -0
  16. package/src/lib/browser/cdp.js +142 -0
  17. package/src/lib/browser/launch.js +43 -0
  18. package/src/lib/browser/page.js +62 -0
  19. package/src/lib/constants.js +13 -95
  20. package/src/lib/delay.js +54 -0
  21. package/src/lib/explore.js +16 -123
  22. package/src/lib/fetcher.js +3 -18
  23. package/src/lib/get-user-videos-browser.mjs +1 -6
  24. package/src/lib/io.js +8 -30
  25. package/src/lib/parser.js +1 -1
  26. package/src/lib/retry.js +44 -0
  27. package/src/lib/scrape-browser.mjs +1 -6
  28. package/src/lib/scrape.js +5 -4
  29. package/src/lib/url.js +52 -0
  30. package/src/main.mjs +59 -822
  31. package/src/scraper/{core.cjs → core.mjs} +25 -57
  32. package/src/scraper/modules/{comment-extractor.cjs → comment-extractor.mjs} +23 -15
  33. package/src/scraper/modules/follow-extractor.mjs +121 -0
  34. package/src/scraper/modules/{guess-extractor.cjs → guess-extractor.mjs} +3 -5
  35. package/src/scraper/modules/page-error-detector.mjs +68 -0
  36. package/src/scraper/modules/page-helpers.mjs +44 -0
  37. package/src/scraper/modules/scroll-collector.mjs +189 -0
  38. package/src/watch/public/index.html +139 -64
  39. package/src/watch/server.mjs +234 -153
  40. package/src/auto-core.cjs +0 -367
  41. package/src/data-store.cjs +0 -69
  42. package/src/get-user-videos.cjs +0 -59
  43. package/src/scraper/index.cjs +0 -97
  44. package/src/scraper/modules/follow-extractor.cjs +0 -112
  45. package/src/scraper/modules/page-helpers.cjs +0 -422
  46. package/src/scraper/modules/scroll-collector.cjs +0 -173
  47. package/src/scraper/modules/video-scanner.cjs +0 -43
package/src/main.mjs CHANGED
@@ -1,12 +1,20 @@
1
1
  import { parseArgs } from './lib/args.js';
2
2
  import { HELP_TEXT, CONFIG_TEXT, proxy, configFile, configPath, DEFAULT_PROXY, saveBrowser } from './lib/constants.js';
3
- import { fetchExplore } from './lib/explore.js';
4
- import { processUrl } from './lib/scrape.js';
5
- import { deduplicate, formatOutput } from './lib/output.js';
6
3
  import { parseFilter, applyFilter, formatFilterDescription } from './lib/filter.js';
7
- import { createProgressBar, calculateConcurrency, createMultiProgressBars, renderMultiProgressBars, clearProgressBars } from './lib/io.js';
8
4
  import { writeFileSync, readFileSync, existsSync } from 'fs';
9
- import { startWatchServer, openBrowser } from './watch/server.mjs';
5
+ import { handleScrape } from './cli/scrape.js';
6
+ import { handleVideos } from './cli/videos.js';
7
+ import { handleAuto } from './cli/auto.js';
8
+ import { handleExplore } from './cli/explore.js';
9
+ import { handleWatch } from './cli/watch.js';
10
+ import { processUrlsWithProgress } from './cli/progress.js';
11
+ import { cleanError } from './cli/utils.js';
12
+ import { fileURLToPath } from 'url';
13
+ import { dirname, join } from 'path';
14
+
15
+ const __dirname = dirname(fileURLToPath(import.meta.url));
16
+ const pkgPath = join(__dirname, '..', 'package.json');
17
+ const { version } = JSON.parse(readFileSync(pkgPath, 'utf-8'));
10
18
 
11
19
  function showConfig(urls, outputFile) {
12
20
  const lines = [...CONFIG_TEXT];
@@ -74,37 +82,23 @@ function handleConfig(action, value) {
74
82
  process.exit(1);
75
83
  }
76
84
 
77
- function randomDelay() {
78
- return new Promise(r => setTimeout(r, Math.random() * 600 + 200));
79
- }
80
-
81
- function cleanError(msg) {
82
- return msg
83
- .replace(/\x1b\[[0-9;]*m/g, '')
84
- .replace(/\s*- navigating to.*/s, '')
85
- .replace(/\s*Call log:/s, '')
86
- .trim();
87
- }
88
-
89
- async function runExplore(exploreCount, urls, proxyUrl, outputFile, outputFormat, isPipe, filter) {
85
+ async function runExploreDefault(exploreCount, urls, proxyUrl, outputFile, outputFormat, pipeMode, filter) {
90
86
  console.log(`\n代理: ${proxyUrl}`);
91
87
  console.log(`Explore 数量: ${exploreCount}`);
92
- if (urls.length > 0) {
93
- console.log(`额外 URL: ${urls.length}\n`);
94
- } else {
95
- console.log('');
96
- }
88
+ if (urls.length > 0) console.log(`额外 URL: ${urls.length}\n`);
89
+ else console.log('');
97
90
 
98
91
  const allResults = [];
99
92
 
100
93
  if (exploreCount > 0) {
101
94
  try {
95
+ const { fetchExplore } = await import('./lib/explore.js');
102
96
  const exploreResults = await fetchExplore(exploreCount);
103
97
  console.log(` 获取到 ${exploreResults.length} 个视频\n`);
104
- if (isPipe) {
98
+ if (pipeMode) {
105
99
  const videoUrls = exploreResults.map(r => r.url).filter(Boolean);
106
100
  if (videoUrls.length > 0) {
107
- await runScrape(videoUrls, proxyUrl, outputFile, outputFormat, filter);
101
+ await runScrapeDefault(videoUrls, proxyUrl, outputFile, outputFormat, filter);
108
102
  return;
109
103
  }
110
104
  }
@@ -116,844 +110,87 @@ async function runExplore(exploreCount, urls, proxyUrl, outputFile, outputFormat
116
110
  }
117
111
 
118
112
  if (urls.length > 0) {
119
- const errors = [];
120
-
121
- const concurrency = calculateConcurrency(urls.length);
122
- const bars = createMultiProgressBars(concurrency);
123
-
124
- const slots = Array.from({ length: concurrency }, () => []);
125
- urls.forEach((url, i) => slots[i % concurrency].push(url));
126
-
127
- bars.forEach((bar, i) => {
128
- bar.total = slots[i].length;
129
- bar.status = slots[i].length > 0 ? 'running' : 'done';
113
+ const { processUrl } = await import('./lib/scrape.js');
114
+ await processUrlsWithProgress({
115
+ urls,
116
+ proxyUrl,
117
+ outputFile,
118
+ outputFormat,
119
+ filter,
120
+ processFn: (url, px) => processUrl(url, px),
121
+ label: '数据',
122
+ log: console.log,
130
123
  });
131
-
132
- renderMultiProgressBars(bars);
133
-
134
- const workers = slots.map(async (slotUrls, slotIndex) => {
135
- for (const url of slotUrls) {
136
- bars[slotIndex].url = url;
137
- renderMultiProgressBars(bars);
138
-
139
- await randomDelay();
140
-
141
- try {
142
- const results = await processUrl(url, proxyUrl);
143
- allResults.push(...results);
144
- bars[slotIndex].current++;
145
- bars[slotIndex].status = 'running';
146
- } catch (err) {
147
- errors.push({ url, message: err.message });
148
- bars[slotIndex].current++;
149
- bars[slotIndex].status = 'error';
150
- }
151
-
152
- renderMultiProgressBars(bars);
153
- }
154
- bars[slotIndex].status = bars[slotIndex].current === bars[slotIndex].total ? 'done' : 'error';
155
- renderMultiProgressBars(bars);
156
- });
157
-
158
- await Promise.all(workers);
159
-
160
- clearProgressBars();
161
- console.log();
162
-
163
- if (errors.length > 0) {
164
- const msg = errors[0].message;
165
- if (msg.includes('不可用') || msg.includes('连接被拒绝') || msg.includes('连接中断') ||
166
- msg.includes('超时') || msg.includes('无法解析')) {
167
- console.error(` ${errors.length} 个请求失败,请检查代理是否可用: ${proxyUrl}\n`);
168
- } else {
169
- console.error(` ${errors.length} 个失败:`);
170
- const show = errors.slice(0, 5);
171
- for (const e of show) {
172
- console.error(` ✗ ${e.url}: ${e.message}`);
173
- }
174
- if (errors.length > 5) {
175
- console.error(` ... 还有 ${errors.length - 5} 个`);
176
- }
177
- }
178
- }
124
+ return;
179
125
  }
180
126
 
127
+ const { deduplicate, formatOutput } = await import('./lib/output.js');
181
128
  const uniqueResults = deduplicate(allResults);
182
129
  const filteredResults = applyFilter(uniqueResults, filter);
183
130
 
184
131
  if (filteredResults.length === 0) {
185
132
  console.log('\n未获取到数据');
186
- if (outputFile) {
187
- writeFileSync(outputFile, '[]', 'utf-8');
188
- }
133
+ if (outputFile) writeFileSync(outputFile, '[]', 'utf-8');
189
134
  return;
190
135
  }
191
136
 
192
137
  const output = formatOutput(filteredResults, outputFormat);
193
-
194
138
  if (outputFile) {
195
139
  writeFileSync(outputFile, output, 'utf-8');
196
140
  console.log(`\n结果已写入: ${outputFile}`);
197
141
  } else {
198
142
  console.log(output);
199
143
  }
200
-
201
- if (filter) {
202
- console.log(`\n共 ${uniqueResults.length} 个数据,过滤后 ${filteredResults.length} 个(过滤条件: ${formatFilterDescription(filter)})`);
203
- } else {
204
- console.log(`\n共 ${filteredResults.length} 个数据`);
205
- }
206
- }
207
-
208
- async function runScrape(urls, proxyUrl, outputFile, outputFormat, filter) {
209
- const allResults = [];
210
- const errors = [];
211
-
212
- if (urls.length === 0) {
213
- console.log('\n未获取到数据');
214
- if (outputFile) {
215
- writeFileSync(outputFile, '[]', 'utf-8');
216
- }
217
- return;
218
- }
219
-
220
- const concurrency = calculateConcurrency(urls.length);
221
- const bars = createMultiProgressBars(concurrency);
222
-
223
- const slots = Array.from({ length: concurrency }, () => []);
224
- urls.forEach((url, i) => slots[i % concurrency].push(url));
225
-
226
- bars.forEach((bar, i) => {
227
- bar.total = slots[i].length;
228
- bar.status = slots[i].length > 0 ? 'running' : 'done';
229
- });
230
-
231
- renderMultiProgressBars(bars);
232
144
 
233
- const workers = slots.map(async (slotUrls, slotIndex) => {
234
- for (const url of slotUrls) {
235
- bars[slotIndex].url = url;
236
- renderMultiProgressBars(bars);
237
-
238
- try {
239
- const results = await processUrl(url, proxyUrl);
240
- allResults.push(...results);
241
- bars[slotIndex].current++;
242
- bars[slotIndex].status = 'running';
243
- } catch (err) {
244
- errors.push({ url, message: err.message });
245
- bars[slotIndex].current++;
246
- bars[slotIndex].status = 'error';
247
- }
248
-
249
- renderMultiProgressBars(bars);
250
- }
251
- bars[slotIndex].status = bars[slotIndex].current === bars[slotIndex].total ? 'done' : 'error';
252
- renderMultiProgressBars(bars);
253
- });
254
-
255
- await Promise.all(workers);
256
-
257
- clearProgressBars();
258
- console.log();
259
-
260
- const uniqueResults = deduplicate(allResults);
261
- const filteredResults = applyFilter(uniqueResults, filter);
262
-
263
- if (errors.length > 0) {
264
- if (filteredResults.length === 0) {
265
- const msg = errors[0].message;
266
- if (msg.includes('不可用') || msg.includes('连接被拒绝') || msg.includes('连接中断') ||
267
- msg.includes('超时') || msg.includes('无法解析')) {
268
- console.error(` 所有请求失败,请检查代理是否可用: ${proxyUrl}\n`);
269
- } else {
270
- const show = errors.slice(0, 5);
271
- for (const e of show) {
272
- console.error(` ✗ ${e.url}: ${e.message}\n`);
273
- }
274
- if (errors.length > 5) {
275
- console.error(` ... 还有 ${errors.length - 5} 个失败\n`);
276
- }
277
- }
278
- console.log('未获取到数据');
279
- if (outputFile) {
280
- writeFileSync(outputFile, '[]', 'utf-8');
281
- }
282
- return;
283
- } else {
284
- const msg = errors[0].message;
285
- if (msg.includes('不可用') || msg.includes('连接被拒绝') || msg.includes('连接中断') ||
286
- msg.includes('超时') || msg.includes('无法解析')) {
287
- console.error(` ${errors.length} 个请求失败,请检查代理是否可用: ${proxyUrl}\n`);
288
- } else {
289
- console.error(` ${errors.length} 个失败:`);
290
- const show = errors.slice(0, 5);
291
- for (const e of show) {
292
- console.error(` ✗ ${e.url}: ${e.message}`);
293
- }
294
- if (errors.length > 5) {
295
- console.error(` ... 还有 ${errors.length - 5} 个`);
296
- }
297
- }
298
- }
299
- }
300
-
301
- const output = formatOutput(filteredResults, outputFormat);
302
-
303
- if (outputFile) {
304
- writeFileSync(outputFile, output, 'utf-8');
305
- console.log(`\n结果已写入: ${outputFile}`);
306
- } else {
307
- console.log(output);
308
- }
309
-
310
145
  if (filter) {
311
146
  console.log(`\n共 ${uniqueResults.length} 个数据,过滤后 ${filteredResults.length} 个(过滤条件: ${formatFilterDescription(filter)})`);
312
147
  } else {
313
- console.log(`\n共 ${filteredResults.length} 个用户的数据`);
314
- }
315
- }
316
-
317
- async function handleScrape(options) {
318
- const { scrapeUrl, scrapePreset, scrapeMaxVideos, scrapeMaxComments, scrapeMaxGuess, scrapeSwitchDelay, scrapeCommentDelay, outputFile } = options;
319
-
320
- if (!scrapeUrl) {
321
- console.error('用法: tt-help scrape <视频URL> [preset] [最大视频数] [最大评论数] [-o 输出路径]');
322
- console.error('预设: fast, normal, slow, stealth');
323
- console.error('选项: -o, --output <路径> 输出到文件(默认输出到 stdout)');
324
- console.error(' --switch-delay <ms> 视频切换延迟(毫秒)');
325
- console.error(' --comment-delay <ms> 评论滚动延迟(毫秒)');
326
- process.exit(1);
327
- }
328
-
329
- const { runScrape } = await import('./lib/scrape-browser.mjs');
330
-
331
- let browser;
332
- try {
333
- const { output, browser: b } = await runScrape({
334
- videoUrl: scrapeUrl,
335
- maxVideos: scrapeMaxVideos,
336
- maxComments: scrapeMaxComments,
337
- maxGuess: scrapeMaxGuess,
338
- preset: scrapePreset,
339
- switchMax: scrapeSwitchDelay,
340
- commentMax: scrapeCommentDelay,
341
- log: console.error,
342
- });
343
- browser = b;
344
-
345
- const json = JSON.stringify(output, null, 2);
346
- if (outputFile) {
347
- writeFileSync(outputFile, json, 'utf-8');
348
- console.error(`结果已写入: ${outputFile}`);
349
- } else {
350
- process.stdout.write(json + '\n');
351
- }
352
-
353
- const stats = output.stats;
354
- console.error(`\n共 ${stats.totalVideos} 个视频, ${stats.uniqueVideoAuthors} 个视频作者, ${stats.uniqueCommentAuthors} 个评论作者, ${stats.uniqueGuessAuthors} 个猜你喜欢作者`);
355
- } catch (err) {
356
- console.error(`浏览器抓取失败: ${err.message}`);
357
- process.exit(1);
358
- } finally {
359
- if (browser) await browser.close().catch(() => {});
148
+ console.log(`\n共 ${filteredResults.length} 个数据`);
360
149
  }
361
150
  }
362
151
 
363
- async function handleExplore(options) {
364
- const {
365
- exploreUsernames,
366
- explorePreset,
367
- exploreMaxComments,
368
- exploreMaxGuess,
369
- exploreEnableFollow,
370
- exploreMaxFollowing,
371
- exploreMaxFollowers,
372
- exploreLocation,
373
- exploreWatch,
374
- exploreWatchPort,
375
- exploreMaxUsers,
152
+ async function runScrapeDefault(urls, proxyUrl, outputFile, outputFormat, filter) {
153
+ const { processUrl } = await import('./lib/scrape.js');
154
+ await processUrlsWithProgress({
155
+ urls,
156
+ proxyUrl,
376
157
  outputFile,
377
- } = options;
378
-
379
- if ((!exploreUsernames || exploreUsernames.length === 0) && !outputFile) {
380
- console.error('用法: tt-help explore <用户名> [preset] [options]');
381
- console.error('示例: tt-help explore qiqi23280 fast --location ES --max-comments 50 --max-guess 10 -o results.json');
382
- console.error('');
383
- console.error('选项:');
384
- console.error(' --location <国家代码> 国家筛选,默认 ES');
385
- console.error(' --max-comments <数量> 每视频最大评论数,默认 100');
386
- console.error(' --max-guess <数量> 每视频最大猜你喜欢数,默认 0');
387
- console.error(' --enable-follow 启用关注/粉丝提取(默认启用)');
388
- console.error(' --disable-follow 禁用关注/粉丝提取');
389
- console.error(' --max-following <数量> 最大获取关注数,默认 200');
390
- console.error(' --max-followers <数量> 最大获取粉丝数,默认 200');
391
- console.error(' --max-users <数量> 最大处理用户数,默认 0(不限)');
392
- console.error(' -o, --output <路径> 输出文件(无用户名时从文件读取待处理用户)');
393
- console.error(' --watch 启动监控服务');
394
- console.error(' preset: fast | normal | slow | stealth(默认 normal)');
395
- process.exit(1);
396
- }
397
-
398
- const { createRequire } = await import('module');
399
- const require = createRequire(import.meta.url);
400
- const { createStore } = require('./data-store.cjs');
401
- const store = createStore(outputFile);
402
- const { setDelayConfig } = require('./scraper/modules/page-helpers.cjs');
403
- setDelayConfig(explorePreset);
404
-
405
- // 构建队列
406
- const queue = exploreUsernames ? [...new Set(exploreUsernames.map(u => u.replace(/^@/, '')))] : [];
407
- const existingUsers = store.getAllUsers();
408
- for (const u of existingUsers) {
409
- if (!u.processed && !queue.includes(u.uniqueId)) {
410
- queue.push(u.uniqueId);
411
- }
412
- }
413
-
414
- if (queue.length === 0) {
415
- console.error('没有待处理的用户');
416
- return;
417
- }
418
-
419
- console.error(`\n队列: ${queue.length} 个用户待处理`);
420
- console.error(` 国家筛选: ${exploreLocation}`);
421
- console.error(` 评论: ${exploreMaxComments}, 猜你喜欢: ${exploreMaxGuess}`);
422
- console.error(` 关注/粉丝: ${exploreEnableFollow ? '启用' : '禁用'}`);
423
- if (exploreMaxUsers > 0) {
424
- console.error(` 上限: ${exploreMaxUsers} 个用户`);
425
- }
426
-
427
- // Watch server
428
- let watchServer = null;
429
- let watchPortActual = null;
430
- if (exploreWatch) {
431
- if (!outputFile) {
432
- console.error('--watch 需要指定 -o 输出文件');
433
- process.exit(1);
434
- }
435
- ({ server: watchServer, port: watchPortActual } = await startWatchServer(outputFile, exploreWatchPort || 3000));
436
- openBrowser(watchPortActual);
437
- }
438
-
439
- // 启动浏览器
440
- const { ensureBrowserReady, processExplore } = await import('./lib/auto-browser.mjs');
441
- const browser = await ensureBrowserReady();
442
-
443
- try {
444
- const contexts = browser.contexts();
445
- let page = null;
446
- for (const ctx of contexts) {
447
- for (const p of ctx.pages()) {
448
- if (p.url().includes('tiktok.com')) {
449
- page = p;
450
- break;
451
- }
452
- }
453
- if (page) break;
454
- }
455
- if (!page) {
456
- const defaultCtx = contexts[0] || await browser.newContext();
457
- page = await defaultCtx.newPage();
458
- }
459
-
460
- let processedCount = 0;
461
- let errorCount = 0;
462
-
463
- for (let i = 0; i < queue.length; i++) {
464
- const username = queue[i];
465
- console.error(`\n[${i + 1}/${queue.length}] 探索 @${username}...`);
466
-
467
- // 确保页面稳定后再开始下一个用户
468
- await new Promise(r => setTimeout(r, 1000));
469
-
470
- const result = await processExplore(page, username, {
471
- maxComments: exploreMaxComments,
472
- maxGuess: exploreMaxGuess,
473
- enableFollow: exploreEnableFollow,
474
- maxFollowing: exploreMaxFollowing,
475
- maxFollowers: exploreMaxFollowers,
476
- location: exploreLocation,
477
- browser,
478
- }, console.error);
479
-
480
- if (result.restricted) {
481
- store.addUser({
482
- uniqueId: username,
483
- restricted: true,
484
- sources: ['restricted'],
485
- });
486
- store.save();
487
- continue;
488
- }
489
-
490
- if (result.error) {
491
- errorCount++;
492
- store.addUser({
493
- uniqueId: username,
494
- error: result.error,
495
- sources: ['error'],
496
- });
497
- store.save();
498
- continue;
499
- }
500
-
501
- // 写入用户信息
502
- const userEntry = {
503
- uniqueId: username,
504
- ...result.userInfo,
505
- processed: result.processed,
506
- hasFollowData: result.hasFollowData,
507
- keepFollow: result.keepFollow,
508
- locationCreated: result.locationCreated,
509
- noVideo: result.noVideo,
510
- sources: ['processed'],
511
- };
512
- store.addUser(userEntry);
513
-
514
- // 发现的视频作者
515
- for (const va of result.discoveredVideoAuthors) {
516
- store.addUser({
517
- uniqueId: va.uniqueId,
518
- nickname: va.nickname,
519
- locationCreated: va.locationCreated,
520
- sources: ['video'],
521
- });
522
- if (!store.getUser(va.uniqueId) || !store.getUser(va.uniqueId).processed) {
523
- if (!queue.includes(va.uniqueId)) {
524
- queue.push(va.uniqueId);
525
- }
526
- }
527
- }
528
-
529
- // 发现的评论作者
530
- for (const ca of result.discoveredCommentAuthors) {
531
- const caId = ca.replace(/^@/, '');
532
- store.addUser({
533
- uniqueId: caId,
534
- sources: ['comment'],
535
- });
536
- if (!store.getUser(caId) || !store.getUser(caId).processed) {
537
- if (!queue.includes(caId)) {
538
- queue.push(caId);
539
- }
540
- }
541
- }
542
-
543
- // 发现的猜你喜欢作者
544
- for (const ga of (result.discoveredGuessAuthors || [])) {
545
- const gaId = ga.replace(/^@/, '');
546
- store.addUser({
547
- uniqueId: gaId,
548
- sources: ['guess'],
549
- });
550
- if (!store.getUser(gaId) || !store.getUser(gaId).processed) {
551
- if (!queue.includes(gaId)) {
552
- queue.push(gaId);
553
- }
554
- }
555
- }
556
-
557
- // 发现的关注/粉丝(仅当 keepFollow 为 true 时)
558
- if (result.keepFollow) {
559
- for (const [handle, name] of (result.discoveredFollowing || [])) {
560
- const uid = handle.replace(/^@/, '');
561
- store.addUser({
562
- uniqueId: uid,
563
- nickname: name,
564
- sources: ['following'],
565
- });
566
- if (!store.getUser(uid) || !store.getUser(uid).processed) {
567
- if (!queue.includes(uid)) {
568
- queue.push(uid);
569
- }
570
- }
571
- }
572
-
573
- for (const [handle, name] of (result.discoveredFollowers || [])) {
574
- const uid = handle.replace(/^@/, '');
575
- store.addUser({
576
- uniqueId: uid,
577
- nickname: name,
578
- sources: ['follower'],
579
- });
580
- if (!store.getUser(uid) || !store.getUser(uid).processed) {
581
- if (!queue.includes(uid)) {
582
- queue.push(uid);
583
- }
584
- }
585
- }
586
- }
587
-
588
- processedCount++;
589
- store.save();
590
- console.error(` 已保存,当前共 ${store.getAllUsers().length} 个用户`);
591
-
592
- if (exploreMaxUsers > 0 && processedCount >= exploreMaxUsers) {
593
- console.error(`\n已达上限 ${exploreMaxUsers} 个用户,停止处理`);
594
- i = queue.length;
595
- }
596
- }
597
-
598
- const output = store.getAllUsers();
599
- if (outputFile) {
600
- console.error(`\n完成: ${processedCount} 个用户已处理, ${errorCount} 个出错, 共 ${output.length} 个用户`);
601
- console.error(`数据已保存到: ${outputFile}`);
602
- } else {
603
- const json = JSON.stringify(output, null, 2);
604
- process.stdout.write(json + '\n');
605
- }
606
- } catch (err) {
607
- console.error(`探索失败: ${err.message}`);
608
- if (watchServer) watchServer.close();
609
- process.exit(1);
610
- } finally {
611
- await browser.close().catch(() => {});
612
- if (watchServer) {
613
- watchServer.close();
614
- console.error(`Watch 监控服务已停止: http://127.0.0.1:${watchPortActual}`);
615
- }
616
- }
617
- }
618
-
619
- async function handleWatch(options) {
620
- const { outputFile, watchPort } = options;
621
-
622
- if (!outputFile) {
623
- console.error('用法: tt-help watch -o <数据文件> [-p 端口]');
624
- console.error('示例: tt-help watch -o data.json');
625
- console.error(' tt-help watch -o data.json -p 8080');
626
- process.exit(1);
627
- }
628
-
629
- if (!existsSync(outputFile)) {
630
- console.error(`文件不存在: ${outputFile}`);
631
- process.exit(1);
632
- }
633
-
634
- const { server, port } = await startWatchServer(outputFile, watchPort);
635
- openBrowser(port);
636
-
637
- process.once('SIGINT', () => {
638
- server.close();
639
- process.exit(0);
640
- });
641
-
642
- console.error(`按 Ctrl+C 停止监控服务`);
643
- }
644
-
645
- async function handleAuto(options) {
646
- const { autoUsernames, autoCollectMax, autoScrapeDepth, autoMaxComments, autoMaxGuess, autoPreset, autoSwitchDelay, autoCommentDelay, outputFile, autoWatch, autoWatchPort, autoEnableFollow, autoMaxFollowing, autoMaxFollowers } = options;
647
-
648
- const runOptions = {
649
- collectMax: autoCollectMax,
650
- scrapeDepth: autoScrapeDepth,
651
- maxComments: autoMaxComments,
652
- maxGuess: autoMaxGuess,
653
- preset: autoPreset,
654
- switchMax: autoSwitchDelay,
655
- commentMax: autoCommentDelay,
656
- enableFollow: autoEnableFollow,
657
- maxFollowing: autoMaxFollowing,
658
- maxFollowers: autoMaxFollowers,
659
- };
660
-
661
- // 数据源
662
- const { createRequire } = await import('module');
663
- const require = createRequire(import.meta.url);
664
- const { createStore } = require('./data-store.cjs');
665
- const store = createStore(outputFile);
666
-
667
- // 构建队列:命令行用户名插队到前面,文件中的未处理用户追加到后面
668
- const queue = [...new Set(autoUsernames)];
669
- const pendingFromStore = store.getPendingUsers().filter(u => !u.restricted);
670
- pendingFromStore.forEach(u => {
671
- if (!queue.includes(u.uniqueId)) {
672
- queue.push(u.uniqueId);
673
- }
158
+ outputFormat,
159
+ filter,
160
+ processFn: (url, px) => processUrl(url, px),
161
+ label: '用户的数据',
162
+ log: console.log,
674
163
  });
675
-
676
- if (queue.length === 0) {
677
- console.error('没有待处理的用户');
678
- return;
679
- }
680
-
681
- console.error(`队列: ${queue.length} 个用户待处理`);
682
- if (autoUsernames.length > 0) {
683
- console.error(` 命令行: @${autoUsernames.join(', @')}`);
684
- }
685
- if (pendingFromStore.length > 0) {
686
- console.error(` 数据源: ${pendingFromStore.length} 个未处理用户`);
687
- }
688
-
689
- // Watch server
690
- let watchServer = null;
691
- let watchPort = null;
692
- if (autoWatch) {
693
- if (!outputFile) {
694
- console.error('--watch 需要指定 -o 输出文件');
695
- process.exit(1);
696
- }
697
- ({ server: watchServer, port: watchPort } = await startWatchServer(outputFile, autoWatchPort || 3000));
698
- openBrowser(watchPort);
699
- }
700
-
701
- // 启动浏览器
702
- const { ensureBrowserReady, processUser } = await import('./lib/auto-browser.mjs');
703
-
704
- const browser = await ensureBrowserReady();
705
-
706
- try {
707
- const contexts = browser.contexts();
708
- let page = null;
709
- for (const ctx of contexts) {
710
- for (const p of ctx.pages()) {
711
- if (p.url().includes('tiktok.com')) {
712
- page = p;
713
- break;
714
- }
715
- }
716
- if (page) break;
717
- }
718
- if (!page) {
719
- const defaultCtx = contexts[0] || await browser.newContext();
720
- page = await defaultCtx.newPage();
721
- }
722
-
723
- let processedCount = 0;
724
- let errorCount = 0;
725
-
726
- for (let i = 0; i < queue.length; i++) {
727
- const username = queue[i];
728
- console.error(`\n[${i + 1}/${queue.length}] 处理 @${username}...`);
729
-
730
- const result = await processUser(page, username, { ...runOptions, browser }, console.error);
731
-
732
- if (result.restricted) {
733
- store.addUser({
734
- uniqueId: username,
735
- restricted: true,
736
- sources: ['restricted'],
737
- });
738
- store.save();
739
- continue;
740
- }
741
-
742
- if (result.error) {
743
- errorCount++;
744
- store.addUser({
745
- uniqueId: username,
746
- error: result.error,
747
- sources: ['error'],
748
- });
749
- store.save();
750
- continue;
751
- }
752
-
753
- // 写入用户信息(持续合并更新,不管是否已存在)
754
- const userEntry = {
755
- uniqueId: username,
756
- ...result.userInfo,
757
- sources: ['processed'],
758
- };
759
- store.addUser(userEntry);
760
-
761
- // 发现的视频作者(持续合并更新,不管是否已存在)
762
- for (const va of result.discoveredVideoAuthors) {
763
- store.addUser({
764
- uniqueId: va.uniqueId,
765
- nickname: va.nickname,
766
- locationCreated: va.locationCreated,
767
- sources: ['video'],
768
- });
769
- if (!store.getUser(va.uniqueId) || !store.getUser(va.uniqueId).followerCount) {
770
- if (!queue.includes(va.uniqueId)) {
771
- queue.push(va.uniqueId);
772
- }
773
- }
774
- }
775
-
776
- // 发现的评论作者
777
- for (const ca of result.discoveredCommentAuthors) {
778
- const caId = ca.replace(/^@/, '');
779
- store.addUser({
780
- uniqueId: caId,
781
- sources: ['comment'],
782
- });
783
- if (!store.getUser(caId) || !store.getUser(caId).followerCount) {
784
- if (!queue.includes(caId)) {
785
- queue.push(caId);
786
- }
787
- }
788
- }
789
-
790
- // 发现的猜你喜欢作者
791
- for (const ga of (result.discoveredGuessAuthors || [])) {
792
- const gaId = ga.replace(/^@/, '');
793
- store.addUser({
794
- uniqueId: gaId,
795
- sources: ['guess'],
796
- });
797
- if (!store.getUser(gaId) || !store.getUser(gaId).followerCount) {
798
- if (!queue.includes(gaId)) {
799
- queue.push(gaId);
800
- }
801
- }
802
- }
803
-
804
- // 发现的关注用户
805
- for (const [handle, name] of (result.discoveredFollowing || [])) {
806
- const uid = handle.replace(/^@/, '');
807
- store.addUser({
808
- uniqueId: uid,
809
- nickname: name,
810
- sources: ['following'],
811
- });
812
- if (!store.getUser(uid) || !store.getUser(uid).followerCount) {
813
- if (!queue.includes(uid)) {
814
- queue.push(uid);
815
- }
816
- }
817
- }
818
-
819
- // 发现的粉丝用户
820
- for (const [handle, name] of (result.discoveredFollowers || [])) {
821
- const uid = handle.replace(/^@/, '');
822
- store.addUser({
823
- uniqueId: uid,
824
- nickname: name,
825
- sources: ['follower'],
826
- });
827
- if (!store.getUser(uid) || !store.getUser(uid).followerCount) {
828
- if (!queue.includes(uid)) {
829
- queue.push(uid);
830
- }
831
- }
832
- }
833
-
834
- processedCount++;
835
- store.save();
836
- console.error(` 已保存,当前共 ${store.getAllUsers().length} 个用户`);
837
- }
838
-
839
- const output = store.getAllUsers();
840
- if (outputFile) {
841
- console.error(`\n完成: ${processedCount} 个用户已处理, ${errorCount} 个出错, 共 ${output.length} 个用户`);
842
- console.error(`数据已保存到: ${outputFile}`);
843
- } else {
844
- const json = JSON.stringify(output, null, 2);
845
- process.stdout.write(json + '\n');
846
- }
847
- } catch (err) {
848
- console.error(`自动抓取失败: ${err.message}`);
849
- if (watchServer) watchServer.close();
850
- process.exit(1);
851
- } finally {
852
- await browser.close().catch(() => {});
853
- if (watchServer) {
854
- watchServer.close();
855
- console.error(`Watch 监控服务已停止: http://127.0.0.1:${watchPort}`);
856
- }
857
- }
858
- }
859
-
860
- async function handleVideos(options) {
861
- const { videosUsername, videosMax, outputFile } = options;
862
-
863
- if (!videosUsername) {
864
- console.error('用法: tt-help videos <用户名> [最大视频数] [-o 输出路径]');
865
- console.error('示例: tt-help videos bar.lar.lar.moeta 1000');
866
- console.error(' tt-help videos username 50 -o videos.json');
867
- console.error('');
868
- console.error('选项: -o, --output <路径> 输出到文件(默认输出到 stdout)');
869
- process.exit(1);
870
- }
871
-
872
- const { runGetUserVideos } = await import('./lib/get-user-videos-browser.mjs');
873
-
874
- let browser;
875
- try {
876
- const { output, browser: b } = await runGetUserVideos({
877
- username: videosUsername,
878
- maxVideos: videosMax,
879
- log: console.error,
880
- });
881
- browser = b;
882
-
883
- const json = JSON.stringify(output, null, 2);
884
- if (outputFile) {
885
- writeFileSync(outputFile, json, 'utf-8');
886
- console.error(`结果已写入: ${outputFile}`);
887
- } else {
888
- process.stdout.write(json + '\n');
889
- }
890
-
891
- const stats = output.videos.length;
892
- console.error(`\n共 ${stats} 个视频, 用户: @${videosUsername}`);
893
- } catch (err) {
894
- console.error(`获取用户视频失败: ${err.message}`);
895
- process.exit(1);
896
- } finally {
897
- if (browser) await browser.close().catch(() => {});
898
- }
899
164
  }
900
165
 
901
166
  async function main() {
902
167
  const parsed = parseArgs();
903
168
 
904
- if (parsed.subcommand === 'scrape') {
905
- await handleScrape(parsed);
906
- return;
907
- }
908
-
909
- if (parsed.subcommand === 'videos') {
910
- await handleVideos(parsed);
911
- return;
912
- }
913
-
914
- if (parsed.subcommand === 'auto') {
915
- await handleAuto(parsed);
916
- return;
917
- }
918
-
919
- if (parsed.subcommand === 'explore') {
920
- await handleExplore(parsed);
921
- return;
922
- }
923
-
924
- if (parsed.subcommand === 'watch') {
925
- await handleWatch(parsed);
926
- return;
169
+ switch (parsed.subcommand) {
170
+ case 'scrape': return handleScrape(parsed);
171
+ case 'videos': return handleVideos(parsed);
172
+ case 'auto': return handleAuto(parsed);
173
+ case 'explore':return handleExplore(parsed);
174
+ case 'watch': return handleWatch(parsed);
927
175
  }
928
176
 
929
- const { urls, outputFile, outputFormat, exploreCount, showConfig: showCfg, showHelp, customProxy, configAction, configValue, pipeMode, filterStr } = parsed;
177
+ const { urls, outputFile, outputFormat, exploreCount, showConfig: showCfg, showHelp, showVersion, customProxy, configAction, configValue, pipeMode, filterStr } = parsed;
930
178
  const proxyUrl = customProxy || proxy;
931
179
  const filter = parseFilter(filterStr);
932
180
 
933
- if (showHelp) {
934
- showUsage();
935
- return;
936
- }
937
-
938
- if (configAction) {
939
- handleConfig(configAction, configValue);
940
- return;
941
- }
942
-
943
- if (showCfg) {
944
- showConfig(urls, outputFile);
945
- return;
946
- }
947
-
948
- if (urls.length === 0 && exploreCount === 0) {
949
- showUsage();
950
- return;
181
+ if (showVersion) {
182
+ console.log(version);
183
+ process.exit(0);
951
184
  }
185
+ if (showHelp) return showUsage();
186
+ if (configAction) return handleConfig(configAction, configValue);
187
+ if (showCfg) return showConfig(urls, outputFile);
188
+ if (urls.length === 0 && exploreCount === 0) return showUsage();
952
189
 
953
190
  if (exploreCount > 0) {
954
- await runExplore(exploreCount, urls, proxyUrl, outputFile, outputFormat, pipeMode, filter);
191
+ await runExploreDefault(exploreCount, urls, proxyUrl, outputFile, outputFormat, pipeMode, filter);
955
192
  } else {
956
- await runScrape(urls, proxyUrl, outputFile, outputFormat, filter);
193
+ await runScrapeDefault(urls, proxyUrl, outputFile, outputFormat, filter);
957
194
  }
958
195
  }
959
196