tt-help-cli-ycl 1.3.13 → 1.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.3.13",
3
+ "version": "1.3.14",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
@@ -39,6 +39,8 @@
39
39
  },
40
40
  "homepage": "https://github.com/jsjhycl/tt-help-cli#readme",
41
41
  "dependencies": {
42
+ "axios": "^1.16.1",
43
+ "https-proxy-agent": "^9.0.0",
42
44
  "playwright": "^1.59.1",
43
45
  "undici": "^8.1.0"
44
46
  }
@@ -0,0 +1,160 @@
1
+ import { TikTokScraper } from '../lib/tiktok-scraper.mjs';
2
+
3
+ const MAX_RETRY_WAIT = 5 * 60 * 1000;
4
+
5
+ async function withRetry(label, fn) {
6
+ let backoff = 1000;
7
+ while (true) {
8
+ try {
9
+ return await fn();
10
+ } catch (err) {
11
+ console.error(`[连接] ${label} 失败: ${err.message},${backoff / 1000}秒后重试...`);
12
+ await new Promise(r => setTimeout(r, backoff));
13
+ if (backoff < MAX_RETRY_WAIT) backoff *= 2;
14
+ }
15
+ }
16
+ }
17
+
18
+ async function apiGet(url) {
19
+ return withRetry(`GET ${url}`, async () => {
20
+ const res = await fetch(url);
21
+ return res.json();
22
+ });
23
+ }
24
+
25
+ async function apiPut(url, body) {
26
+ return withRetry(`PUT ${url}`, async () => {
27
+ const res = await fetch(url, {
28
+ method: 'PUT',
29
+ headers: { 'Content-Type': 'application/json' },
30
+ body: JSON.stringify(body),
31
+ });
32
+ return res.json();
33
+ });
34
+ }
35
+
36
+ function isBrowserClosedError(err) {
37
+ if (!err) return false;
38
+ const msg = err.message || err.toString() || '';
39
+ return msg.includes('Target page, context or browser has been closed') ||
40
+ msg.includes('browser has been closed') ||
41
+ msg.includes('browserContext.newPage') ||
42
+ msg.includes('Protocol error');
43
+ }
44
+
45
+ export async function handleAttach(options) {
46
+ const { attachParallel, attachInterval, serverUrl, showHelp } = options;
47
+
48
+ if (showHelp) {
49
+ console.error('用法: tt-help attach [-p 并行数] [-i 间隔秒数] [-s 服务端地址]');
50
+ console.error('');
51
+ console.error('参数:');
52
+ console.error(' -p, --parallel <N> 并行抓取数(默认: 1)');
53
+ console.error(' -i, --interval <N> 无任务时轮询间隔,单位秒(默认: 10)');
54
+ console.error(' -s, --server <URL> 服务端地址(默认: http://127.0.0.1:3001)');
55
+ console.error('');
56
+ console.error('说明:');
57
+ console.error(' 后台轮询服务端 /api/user-update-tasks 接口,自动抓取 TikTok 用户信息');
58
+ console.error(' 抓取完成后通过 PUT /api/user-update-result/{uniqueId} 回传结果');
59
+ console.error(' 浏览器崩溃时自动重启,支持长时间无人值守运行');
60
+ console.error('');
61
+ console.error('示例:');
62
+ console.error(' tt-help attach');
63
+ console.error(' tt-help attach -p 5 -i 10');
64
+ console.error(' tt-help attach -p 3 -i 5 -s http://127.0.0.1:3001');
65
+ return;
66
+ }
67
+
68
+ console.error(`[Attach] 并行数: ${attachParallel}, 空闲间隔: ${attachInterval}秒, 服务端: ${serverUrl}`);
69
+
70
+ const scraper = new TikTokScraper();
71
+
72
+ try {
73
+ await scraper.init();
74
+ console.error('[Attach] 浏览器初始化完成,开始循环接收任务...');
75
+
76
+ let loopCount = 0;
77
+ let browserRestartCount = 0;
78
+
79
+ while (true) {
80
+ loopCount++;
81
+
82
+ // 检查浏览器是否存活,不存活则重启
83
+ if (!scraper.isAlive()) {
84
+ console.error(`[Attach] 浏览器已关闭,正在重启 (${++browserRestartCount})...`);
85
+ await scraper.restart();
86
+ console.error('[Attach] 浏览器重启完成');
87
+ }
88
+
89
+ const { total, tasks } = await apiGet(`${serverUrl}/api/user-update-tasks?limit=${attachParallel}`);
90
+
91
+ if (!tasks || tasks.length === 0) {
92
+ if (loopCount === 1) {
93
+ console.error(`[Attach] 当前无待更新任务,${attachInterval}秒后重试...`);
94
+ }
95
+ await new Promise(r => setTimeout(r, attachInterval * 1000));
96
+ continue;
97
+ }
98
+
99
+ console.error(`\n[Attach] 获取到 ${tasks.length} 个待更新任务...`);
100
+
101
+ const results = await Promise.allSettled(
102
+ tasks.map(async (task) => {
103
+ const uniqueId = task.uniqueId;
104
+ console.error(` → 获取 @${uniqueId} 的用户信息...`);
105
+ try {
106
+ const info = await scraper.getUserInfo(uniqueId);
107
+ return { uniqueId, info, error: null };
108
+ } catch (err) {
109
+ return { uniqueId, info: null, error: err };
110
+ }
111
+ })
112
+ );
113
+
114
+ let successCount = 0;
115
+ let failCount = 0;
116
+ let needRestart = false;
117
+
118
+ for (const result of results) {
119
+ if (result.status === 'fulfilled') {
120
+ const { uniqueId, info, error } = result.value;
121
+ if (error) {
122
+ if (isBrowserClosedError(error)) {
123
+ needRestart = true;
124
+ }
125
+ console.error(` ✗ @${uniqueId} 获取失败: ${error.message || '未知错误'}`);
126
+ failCount++;
127
+ } else if (info) {
128
+ try {
129
+ await apiPut(`${serverUrl}/api/user-info/${encodeURIComponent(uniqueId)}`, info);
130
+ console.error(` ✓ @${uniqueId} 已提交更新`);
131
+ successCount++;
132
+ } catch (err) {
133
+ console.error(` ✗ @${uniqueId} 提交失败: ${err.message}`);
134
+ failCount++;
135
+ }
136
+ } else {
137
+ console.error(` ✗ @${uniqueId} 未获取到用户信息`);
138
+ failCount++;
139
+ }
140
+ } else {
141
+ console.error(` ✗ 任务执行异常: ${result.reason?.message || '未知错误'}`);
142
+ failCount++;
143
+ }
144
+ }
145
+
146
+ console.error(` 本批结果: ${successCount} 成功, ${failCount} 失败\n`);
147
+
148
+ if (needRestart) {
149
+ console.error('[Attach] 检测到浏览器异常,将在下一轮重启...');
150
+ }
151
+
152
+ await new Promise(r => setTimeout(r, 500));
153
+ }
154
+ } catch (err) {
155
+ console.error(`[Attach] 运行异常: ${err.message}`);
156
+ throw err;
157
+ } finally {
158
+ await scraper.close();
159
+ }
160
+ }
package/src/cli/config.js CHANGED
@@ -1,4 +1,4 @@
1
- import { HELP_TEXT, configPath, saveBrowser, saveUserId, getConfigText } from '../lib/constants.js';
1
+ import { HELP_TEXT, configPath, saveBrowser, saveUserId, saveMaxFollowing, saveMaxFollowers, saveMaxVideos, saveMaxComments, getConfigText } from '../lib/constants.js';
2
2
  import { readFileSync, writeFileSync, existsSync } from 'fs';
3
3
  import { fileURLToPath } from 'url';
4
4
  import { dirname, join } from 'path';
@@ -34,7 +34,7 @@ function handleConfig(action, key, value) {
34
34
  case 'set': {
35
35
  if (!key) {
36
36
  console.error('用法: tt-help config set <key> <value>');
37
- console.error(' 可用 key: proxy, server, browser, userId');
37
+ console.error(' 可用 key: proxy, server, browser, userId, maxFollowing, maxFollowers, maxVideos, maxComments');
38
38
  return;
39
39
  }
40
40
 
@@ -78,9 +78,45 @@ function handleConfig(action, key, value) {
78
78
  console.error(`用户号已更新: ${value}`);
79
79
  break;
80
80
 
81
+ case 'maxFollowing':
82
+ if (!value) {
83
+ console.error('请提供 maxFollowing 的值');
84
+ return;
85
+ }
86
+ saveMaxFollowing(value);
87
+ console.error(`商家关注采集数已更新: ${value}`);
88
+ break;
89
+
90
+ case 'maxFollowers':
91
+ if (!value) {
92
+ console.error('请提供 maxFollowers 的值');
93
+ return;
94
+ }
95
+ saveMaxFollowers(value);
96
+ console.error(`粉丝采集数已更新: ${value}`);
97
+ break;
98
+
99
+ case 'maxVideos':
100
+ if (!value) {
101
+ console.error('请提供 maxVideos 的值');
102
+ return;
103
+ }
104
+ saveMaxVideos(value);
105
+ console.error(`视频采集数已更新: ${value}`);
106
+ break;
107
+
108
+ case 'maxComments':
109
+ if (!value) {
110
+ console.error('请提供 maxComments 的值');
111
+ return;
112
+ }
113
+ saveMaxComments(value);
114
+ console.error(`评论采集数已更新: ${value}`);
115
+ break;
116
+
81
117
  default:
82
118
  console.error(`未知配置项: ${key}`);
83
- console.error(' 可用 key: proxy, server, browser, userId');
119
+ console.error(' 可用 key: proxy, server, browser, userId, maxFollowing, maxFollowers, maxVideos, maxComments');
84
120
  }
85
121
  break;
86
122
  }
@@ -45,6 +45,7 @@ export async function handleExplore(options) {
45
45
  exploreEnableFollow, exploreMaxFollowing, exploreMaxFollowers,
46
46
  exploreLocation, exploreMaxUsers, serverUrl,
47
47
  explorePort, exploreProfile, exploreUserId,
48
+ exploreMaxVideos,
48
49
  } = options;
49
50
 
50
51
  let userId = exploreUserId || configuredUserId;
@@ -64,7 +65,7 @@ export async function handleExplore(options) {
64
65
  }
65
66
 
66
67
  console.error(`\n国家筛选: ${exploreLocation}`);
67
- console.error(`评论: ${exploreMaxComments}, 猜你喜欢: ${exploreMaxGuess}`);
68
+ console.error(`视频采集: ${exploreMaxVideos || 1}`);
68
69
  console.error(`关注/粉丝: ${exploreEnableFollow ? '启用' : '禁用'}`);
69
70
  console.error(`服务器: ${serverUrl}(断开会自动重连)`);
70
71
  if (exploreMaxUsers > 0) console.error(`上限: ${exploreMaxUsers} 个用户`);
@@ -111,8 +112,7 @@ export async function handleExplore(options) {
111
112
  await delay(switchMax, switchMax * 3);
112
113
 
113
114
  let result = await processExplore(page, username, {
114
- maxComments: exploreMaxComments,
115
- maxGuess: exploreMaxGuess,
115
+ maxVideos: exploreMaxVideos,
116
116
  enableFollow: exploreEnableFollow,
117
117
  maxFollowing: exploreMaxFollowing,
118
118
  maxFollowers: exploreMaxFollowers,
@@ -128,8 +128,7 @@ export async function handleExplore(options) {
128
128
  Object.assign(page, newPage);
129
129
  // 重试当前用户
130
130
  result = await processExplore(page, username, {
131
- maxComments: exploreMaxComments,
132
- maxGuess: exploreMaxGuess,
131
+ maxVideos: exploreMaxVideos,
133
132
  enableFollow: exploreEnableFollow,
134
133
  maxFollowing: exploreMaxFollowing,
135
134
  maxFollowers: exploreMaxFollowers,
@@ -189,11 +188,6 @@ export async function handleExplore(options) {
189
188
 
190
189
  const payload = {
191
190
  userInfo: result.userInfo || {},
192
- discoveredVideoAuthors: (result.discoveredVideoAuthors || []).map(item =>
193
- typeof item === 'object' ? { ...item, guessedLocation } : item
194
- ),
195
- discoveredCommentAuthors: (result.discoveredCommentAuthors || []).map(author => ({ author, guessedLocation })),
196
- discoveredGuessAuthors: (result.discoveredGuessAuthors || []).map(author => ({ author, guessedLocation })),
197
191
  discoveredFollowing: (result.discoveredFollowing || []).map(f => ({
198
192
  handle: Array.isArray(f) ? f[0] : f,
199
193
  displayName: Array.isArray(f) ? f[1] : null,
@@ -209,8 +203,21 @@ export async function handleExplore(options) {
209
203
  keepFollow: result.keepFollow,
210
204
  locationCreated: result.locationCreated,
211
205
  noVideo: result.noVideo,
206
+ collectedVideos: result.collectedVideos,
212
207
  };
213
208
  await apiPost(`${serverUrl}/api/job/${username}`, payload);
209
+
210
+ // 视频登记
211
+ if (result.videoList && result.videoList.length > 0) {
212
+ const { registered, skipped } = await apiPost(`${serverUrl}/api/videos`, {
213
+ sourceUser: username,
214
+ videoList: result.videoList,
215
+ locationCreated: result.locationCreated,
216
+ ttSeller: result.userInfo?.ttSeller || false,
217
+ });
218
+ console.error(` 视频登记: ${registered} 条新增, ${skipped} 条已存在`);
219
+ }
220
+
214
221
  console.error(' 已提交');
215
222
 
216
223
  if (exploreMaxUsers > 0 && processedCount >= exploreMaxUsers) {
@@ -0,0 +1,88 @@
1
+ import { TikTokScraper } from '../lib/tiktok-scraper.mjs';
2
+ import { isProfileUrl, isVideoUrl, extractUniqueId, normalizeUsername } from '../lib/url.js';
3
+
4
+ async function handleInfo(options) {
5
+ const { infoUrls, infoOnlyVideo } = options;
6
+
7
+ if (!infoUrls || infoUrls.length === 0) {
8
+ console.error('用法: tt-help info <URL> [URL2 URL3...] [--onlyvideo]');
9
+ console.error('');
10
+ console.error('参数:');
11
+ console.error(' <URL> TikTok 主页或视频 URL,支持多个 URL 同时查询');
12
+ console.error(' --onlyvideo 只返回视频信息(不返回用户信息)');
13
+ console.error('');
14
+ console.error('默认行为:');
15
+ console.error(' 主页 URL → 返回用户信息(bio、region、粉丝数等)');
16
+ console.error(' 视频 URL → 返回用户信息 + 视频信息');
17
+ console.error(' 视频 URL + --onlyvideo → 只返回视频信息');
18
+ console.error('');
19
+ console.error('示例:');
20
+ console.error(' tt-help info https://www.tiktok.com/@nike');
21
+ console.error(' tt-help info https://www.tiktok.com/@nike/video/7234567890');
22
+ console.error(' tt-help info https://www.tiktok.com/@nike https://www.tiktok.com/@apple');
23
+ process.exit(1);
24
+ }
25
+
26
+ const scraper = new TikTokScraper({ poolSize: 1 });
27
+
28
+ try {
29
+ await scraper.init();
30
+ const result = {};
31
+
32
+ for (const url of infoUrls) {
33
+ if (isProfileUrl(url)) {
34
+ const uniqueId = extractUniqueId(url);
35
+ const normalized = normalizeUsername(uniqueId);
36
+ const user = await scraper.getUserInfo(normalized);
37
+ if (!user) {
38
+ console.error(`无法获取用户 @${uniqueId} 的信息`);
39
+ continue;
40
+ }
41
+ result[normalized] = { user };
42
+ console.error(`用户: @${user.uniqueId} (${user.nickname})`);
43
+ } else if (isVideoUrl(url)) {
44
+ const uniqueId = extractUniqueId(url);
45
+ const normalized = normalizeUsername(uniqueId);
46
+
47
+ if (infoOnlyVideo) {
48
+ const video = await scraper.getVideoInfo(url);
49
+ if (!video) {
50
+ console.error(`无法获取视频信息: ${url}`);
51
+ continue;
52
+ }
53
+ const key = normalized + '/video/' + video.id;
54
+ result[key] = { video };
55
+ console.error(`视频: ${video.id}`);
56
+ } else {
57
+ const [user, video] = await Promise.all([
58
+ scraper.getUserInfo(normalized),
59
+ scraper.getVideoInfo(url),
60
+ ]);
61
+ const entry = {};
62
+ if (user) {
63
+ entry.user = user;
64
+ console.error(`用户: @${user.uniqueId} (${user.nickname})`);
65
+ }
66
+ if (video) {
67
+ entry.video = video;
68
+ console.error(`视频: ${video.id}`);
69
+ }
70
+ if (!user && !video) {
71
+ console.error(`无法获取信息: ${url}`);
72
+ continue;
73
+ }
74
+ const key = normalized + '/video/' + (video ? video.id : 'unknown');
75
+ result[key] = entry;
76
+ }
77
+ } else {
78
+ console.error(`无法识别 URL: ${url}`);
79
+ }
80
+ }
81
+
82
+ console.log(JSON.stringify(result, null, 2));
83
+ } finally {
84
+ await scraper.close();
85
+ }
86
+ }
87
+
88
+ export { handleInfo };
package/src/lib/args.js CHANGED
@@ -164,6 +164,7 @@ function parseExploreArgs(args) {
164
164
  let explorePort = null;
165
165
  let exploreProfile = null;
166
166
  let exploreUserId = null;
167
+ let exploreMaxVideos = 1;
167
168
 
168
169
  const positional = [];
169
170
  const PRESETS = ['fast', 'normal', 'slow', 'stealth'];
@@ -194,6 +195,8 @@ function parseExploreArgs(args) {
194
195
  exploreProfile = args[++i];
195
196
  } else if (arg === '--user-id') {
196
197
  exploreUserId = args[++i];
198
+ } else if (arg === '--max-videos') {
199
+ exploreMaxVideos = parseInt(args[++i]) || 1;
197
200
  } else {
198
201
  positional.push(arg);
199
202
  }
@@ -226,6 +229,7 @@ function parseExploreArgs(args) {
226
229
  explorePort,
227
230
  exploreProfile,
228
231
  exploreUserId,
232
+ exploreMaxVideos,
229
233
  urls: [],
230
234
  outputFormat: 'json',
231
235
  exploreCount: 0,
@@ -276,6 +280,38 @@ function parseVideosArgs(args) {
276
280
  };
277
281
  }
278
282
 
283
+ function parseInfoArgs(args) {
284
+ let onlyVideo = false;
285
+ const urls = [];
286
+
287
+ for (let i = 0; i < args.length; i++) {
288
+ const arg = args[i];
289
+ if (arg === '--onlyvideo') {
290
+ onlyVideo = true;
291
+ } else if (!arg.startsWith('--')) {
292
+ urls.push(arg);
293
+ }
294
+ }
295
+
296
+ return {
297
+ subcommand: 'info',
298
+ infoUrls: urls,
299
+ infoOnlyVideo: onlyVideo,
300
+ outputFile: null,
301
+ outputFormat: 'json',
302
+ exploreCount: 0,
303
+ showConfig: false,
304
+ showHelp: false,
305
+ showVersion: false,
306
+ customProxy: null,
307
+ configAction: null,
308
+ configKey: null,
309
+ configValue: null,
310
+ pipeMode: false,
311
+ filterStr: null,
312
+ };
313
+ }
314
+
279
315
  function parseWatchArgs(args) {
280
316
  let outputFile = './result.json';
281
317
  let watchPort = 3001;
@@ -356,10 +392,43 @@ function parseRefreshArgs(args) {
356
392
  };
357
393
  }
358
394
 
395
+ function parseAttachArgs(args) {
396
+ let parallel = 1;
397
+ let interval = 10;
398
+ let serverUrl = defaultServer;
399
+
400
+ for (let i = 0; i < args.length; i++) {
401
+ const arg = args[i];
402
+ if (arg === '-p' || arg === '--parallel') {
403
+ parallel = parseInt(args[++i], 10) || 1;
404
+ } else if (arg === '-i' || arg === '--interval') {
405
+ interval = parseInt(args[++i], 10) || 10;
406
+ } else if (arg === '-s' || arg === '--server') {
407
+ serverUrl = args[++i];
408
+ }
409
+ }
410
+
411
+ return {
412
+ subcommand: 'attach',
413
+ attachParallel: parallel,
414
+ attachInterval: interval,
415
+ serverUrl,
416
+ urls: [],
417
+ outputFormat: 'json',
418
+ exploreCount: 0,
419
+ showConfig: false,
420
+ showHelp: false,
421
+ customProxy: null,
422
+ configAction: null,
423
+ configValue: null,
424
+ pipeMode: false,
425
+ filterStr: null,
426
+ };
427
+ }
428
+
359
429
  export function parseArgs() {
360
430
  const args = process.argv.slice(2);
361
431
 
362
- // Global flags take precedence over subcommands
363
432
  if (args.includes('-h') || args.includes('--help')) {
364
433
  return { showHelp: true, showVersion: false, subcommand: null, urls: [], outputFile: null, outputFormat: 'json', exploreCount: 0, showConfig: false, customProxy: null, configAction: null, configValue: null, pipeMode: false, filterStr: null };
365
434
  }
@@ -367,30 +436,22 @@ export function parseArgs() {
367
436
  return { showHelp: false, showVersion: true, subcommand: null, urls: [], outputFile: null, outputFormat: 'json', exploreCount: 0, showConfig: false, customProxy: null, configAction: null, configValue: null, pipeMode: false, filterStr: null };
368
437
  }
369
438
 
370
- if (args.length > 0 && args[0] === 'scrape') {
371
- return parseScrapeArgs(args.slice(1));
372
- }
373
-
374
- if (args.length > 0 && args[0] === 'videos') {
375
- return parseVideosArgs(args.slice(1));
376
- }
377
-
378
- if (args.length > 0 && args[0] === 'auto') {
379
- return parseAutoArgs(args.slice(1));
380
- }
381
-
382
439
  if (args.length > 0 && args[0] === 'explore') {
383
440
  return parseExploreArgs(args.slice(1));
384
441
  }
385
442
 
386
- if (args.length > 0 && args[0] === 'refresh') {
387
- return parseRefreshArgs(args.slice(1));
443
+ if (args.length > 0 && args[0] === 'info') {
444
+ return parseInfoArgs(args.slice(1));
388
445
  }
389
446
 
390
447
  if (args.length > 0 && args[0] === 'watch') {
391
448
  return parseWatchArgs(args.slice(1));
392
449
  }
393
450
 
451
+ if (args.length > 0 && args[0] === 'attach') {
452
+ return parseAttachArgs(args.slice(1));
453
+ }
454
+
394
455
  const urls = [];
395
456
  let inputFile = null;
396
457
  let outputFile = null;