tt-help-cli-ycl 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.0.7",
3
+ "version": "1.0.8",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
package/src/auto-core.cjs CHANGED
@@ -38,6 +38,7 @@ async function runAuto(options) {
38
38
  collectMax = 1,
39
39
  scrapeDepth = 50,
40
40
  maxComments = 200,
41
+ maxGuess = 10,
41
42
  preset = null,
42
43
  switchMax = null,
43
44
  commentMax = null,
@@ -127,6 +128,7 @@ async function runAuto(options) {
127
128
  videoUrl,
128
129
  maxVideos: scrapeDepth,
129
130
  maxComments,
131
+ maxGuess,
130
132
  preset,
131
133
  switchMax,
132
134
  commentMax,
@@ -151,6 +153,15 @@ async function runAuto(options) {
151
153
  users.set(cu, mergeUserInfo({}, { uniqueId: cu }, 'comment'));
152
154
  }
153
155
  }
156
+
157
+ // 添加猜你喜欢作者
158
+ for (const ga of (scrapeOutput.guessAuthors || [])) {
159
+ const gaId = ga.replace(/^@/, '');
160
+ if (restrictedUsers.has(gaId)) continue;
161
+ if (!users.has(gaId)) {
162
+ users.set(gaId, mergeUserInfo({}, { uniqueId: gaId }, 'guess'));
163
+ }
164
+ }
154
165
  }
155
166
 
156
167
  // 构建输出
@@ -183,6 +194,7 @@ async function processUser(page, username, options, log) {
183
194
  collectMax = 1,
184
195
  scrapeDepth = 50,
185
196
  maxComments = 200,
197
+ maxGuess = 10,
186
198
  preset = 'fast',
187
199
  switchMax = null,
188
200
  commentMax = null,
@@ -194,6 +206,7 @@ async function processUser(page, username, options, log) {
194
206
  collectedVideos: [],
195
207
  discoveredVideoAuthors: [],
196
208
  discoveredCommentAuthors: [],
209
+ discoveredGuessAuthors: [],
197
210
  error: null,
198
211
  };
199
212
 
@@ -220,9 +233,10 @@ async function processUser(page, username, options, log) {
220
233
  videoUrl: v.href,
221
234
  }));
222
235
 
223
- if (videoList.length > 0) {
236
+ if (videoList.length > 0) {
224
237
  const allVideoAuthors = new Map();
225
238
  const allCommentAuthors = new Set();
239
+ const allGuessAuthors = new Set();
226
240
 
227
241
  for (let i = 0; i < videoList.length; i++) {
228
242
  const video = videoList[i];
@@ -235,6 +249,7 @@ async function processUser(page, username, options, log) {
235
249
  videoUrl,
236
250
  maxVideos: scrapeDepth,
237
251
  maxComments,
252
+ maxGuess,
238
253
  preset,
239
254
  switchMax,
240
255
  commentMax,
@@ -262,12 +277,19 @@ async function processUser(page, username, options, log) {
262
277
  allCommentAuthors.add(cu);
263
278
  }
264
279
  }
280
+
281
+ if (scrapeOutput && scrapeOutput.guessAuthors) {
282
+ for (const ga of scrapeOutput.guessAuthors) {
283
+ allGuessAuthors.add(ga);
284
+ }
285
+ }
265
286
  }
266
287
 
267
288
  result.discoveredVideoAuthors = [...allVideoAuthors.values()];
268
289
  result.discoveredCommentAuthors = [...allCommentAuthors];
290
+ result.discoveredGuessAuthors = [...allGuessAuthors];
269
291
 
270
- log(` 发现: ${result.discoveredVideoAuthors.length} 个视频作者, ${result.discoveredCommentAuthors.length} 个评论作者`);
292
+ log(` 发现: ${result.discoveredVideoAuthors.length} 个视频作者, ${result.discoveredCommentAuthors.length} 个评论作者, ${result.discoveredGuessAuthors.length} 个猜你喜欢作者`);
271
293
  } else {
272
294
  const restricted = await isPageRestricted(page);
273
295
  result.restricted = restricted;
package/src/lib/args.js CHANGED
@@ -8,6 +8,7 @@ function parseScrapeArgs(args) {
8
8
  let scrapePreset = null;
9
9
  let scrapeMaxVideos = 20;
10
10
  let scrapeMaxComments = 999;
11
+ let scrapeMaxGuess = 10;
11
12
  let scrapeSwitchDelay = null;
12
13
  let scrapeCommentDelay = null;
13
14
  let outputFile = null;
@@ -34,9 +35,11 @@ function parseScrapeArgs(args) {
34
35
  scrapePreset = positional[1].toLowerCase();
35
36
  scrapeMaxVideos = parseInt(positional[2]) || 20;
36
37
  scrapeMaxComments = parseInt(positional[3]) || 999;
38
+ scrapeMaxGuess = parseInt(positional[4]) || 10;
37
39
  } else {
38
40
  scrapeMaxVideos = parseInt(positional[1]) || 20;
39
41
  scrapeMaxComments = parseInt(positional[2]) || 999;
42
+ scrapeMaxGuess = parseInt(positional[3]) || 10;
40
43
  }
41
44
  }
42
45
 
@@ -46,6 +49,7 @@ function parseScrapeArgs(args) {
46
49
  scrapePreset,
47
50
  scrapeMaxVideos,
48
51
  scrapeMaxComments,
52
+ scrapeMaxGuess,
49
53
  scrapeSwitchDelay,
50
54
  scrapeCommentDelay,
51
55
  outputFile,
@@ -66,6 +70,7 @@ function parseAutoArgs(args) {
66
70
  let autoCollectMax = 1;
67
71
  let autoScrapeDepth = 50;
68
72
  let autoMaxComments = 200;
73
+ let autoMaxGuess = 10;
69
74
  let autoPreset = 'fast';
70
75
  let autoSwitchDelay = null;
71
76
  let autoCommentDelay = null;
@@ -112,10 +117,12 @@ function parseAutoArgs(args) {
112
117
  autoCollectMax = parseInt(positional[j]) || 1; j++;
113
118
  autoScrapeDepth = parseInt(positional[j]) || 50; j++;
114
119
  autoMaxComments = parseInt(positional[j]) || 200; j++;
120
+ autoMaxGuess = parseInt(positional[j]) || 10; j++;
115
121
  } else if (usernames.length > 0) {
116
122
  autoCollectMax = parseInt(positional[j]) || 1; j++;
117
123
  autoScrapeDepth = parseInt(positional[j]) || 50; j++;
118
- autoMaxComments = parseInt(positional[j]) || 200;
124
+ autoMaxComments = parseInt(positional[j]) || 200; j++;
125
+ autoMaxGuess = parseInt(positional[j]) || 10;
119
126
  }
120
127
 
121
128
  return {
@@ -124,6 +131,7 @@ function parseAutoArgs(args) {
124
131
  autoCollectMax,
125
132
  autoScrapeDepth,
126
133
  autoMaxComments,
134
+ autoMaxGuess,
127
135
  autoPreset,
128
136
  autoSwitchDelay,
129
137
  autoCommentDelay,
package/src/main.mjs CHANGED
@@ -315,7 +315,7 @@ async function runScrape(urls, proxyUrl, outputFile, outputFormat, filter) {
315
315
  }
316
316
 
317
317
  async function handleScrape(options) {
318
- const { scrapeUrl, scrapePreset, scrapeMaxVideos, scrapeMaxComments, scrapeSwitchDelay, scrapeCommentDelay, outputFile } = options;
318
+ const { scrapeUrl, scrapePreset, scrapeMaxVideos, scrapeMaxComments, scrapeMaxGuess, scrapeSwitchDelay, scrapeCommentDelay, outputFile } = options;
319
319
 
320
320
  if (!scrapeUrl) {
321
321
  console.error('用法: tt-help scrape <视频URL> [preset] [最大视频数] [最大评论数] [-o 输出路径]');
@@ -334,6 +334,7 @@ async function handleScrape(options) {
334
334
  videoUrl: scrapeUrl,
335
335
  maxVideos: scrapeMaxVideos,
336
336
  maxComments: scrapeMaxComments,
337
+ maxGuess: scrapeMaxGuess,
337
338
  preset: scrapePreset,
338
339
  switchMax: scrapeSwitchDelay,
339
340
  commentMax: scrapeCommentDelay,
@@ -350,7 +351,7 @@ async function handleScrape(options) {
350
351
  }
351
352
 
352
353
  const stats = output.stats;
353
- console.error(`\n共 ${stats.totalVideos} 个视频, ${stats.uniqueVideoAuthors} 个视频作者, ${stats.uniqueCommentAuthors} 个评论作者`);
354
+ console.error(`\n共 ${stats.totalVideos} 个视频, ${stats.uniqueVideoAuthors} 个视频作者, ${stats.uniqueCommentAuthors} 个评论作者, ${stats.uniqueGuessAuthors} 个猜你喜欢作者`);
354
355
  } catch (err) {
355
356
  console.error(`浏览器抓取失败: ${err.message}`);
356
357
  process.exit(1);
@@ -386,12 +387,13 @@ async function handleWatch(options) {
386
387
  }
387
388
 
388
389
  async function handleAuto(options) {
389
- const { autoUsernames, autoCollectMax, autoScrapeDepth, autoMaxComments, autoPreset, autoSwitchDelay, autoCommentDelay, outputFile, autoWatch, autoWatchPort } = options;
390
+ const { autoUsernames, autoCollectMax, autoScrapeDepth, autoMaxComments, autoMaxGuess, autoPreset, autoSwitchDelay, autoCommentDelay, outputFile, autoWatch, autoWatchPort } = options;
390
391
 
391
392
  const runOptions = {
392
393
  collectMax: autoCollectMax,
393
394
  scrapeDepth: autoScrapeDepth,
394
395
  maxComments: autoMaxComments,
396
+ maxGuess: autoMaxGuess,
395
397
  preset: autoPreset,
396
398
  switchMax: autoSwitchDelay,
397
399
  commentMax: autoCommentDelay,
@@ -526,6 +528,20 @@ async function handleAuto(options) {
526
528
  }
527
529
  }
528
530
 
531
+ // 发现的猜你喜欢作者
532
+ for (const ga of (result.discoveredGuessAuthors || [])) {
533
+ const gaId = ga.replace(/^@/, '');
534
+ store.addUser({
535
+ uniqueId: gaId,
536
+ sources: ['guess'],
537
+ });
538
+ if (!store.getUser(gaId) || !store.getUser(gaId).followerCount) {
539
+ if (!queue.includes(gaId)) {
540
+ queue.push(gaId);
541
+ }
542
+ }
543
+ }
544
+
529
545
  processedCount++;
530
546
  store.save();
531
547
  console.error(` 已保存,当前共 ${store.getAllUsers().length} 个用户`);
@@ -8,8 +8,9 @@ const {
8
8
  retryWithBackoff,
9
9
  } = require("./modules/page-helpers.cjs");
10
10
  const { extractCommentAuthors } = require("./modules/comment-extractor.cjs");
11
+ const { extractGuessVideos } = require("./modules/guess-extractor.cjs");
11
12
 
12
- async function scrapeSingleVideo(page, maxComments, log) {
13
+ async function scrapeSingleVideo(page, maxComments, maxGuess, log, location = 'ES') {
13
14
  const config = getDelayConfig();
14
15
 
15
16
  await page
@@ -44,9 +45,16 @@ async function scrapeSingleVideo(page, maxComments, log) {
44
45
  throw new Error("无法获取视频作者");
45
46
  }
46
47
 
47
- const commentUsers = await extractCommentAuthors(page, maxComments);
48
- await closeCommentPanel(page);
49
- await delay(Math.round(config.commentMax * 0.3), config.commentMax);
48
+ let guessVideos = [];
49
+ let commentUsers = [];
50
+
51
+ if (userData.locationCreated === location) {
52
+ guessVideos = await extractGuessVideos(page, maxGuess);
53
+
54
+ commentUsers = await extractCommentAuthors(page, maxComments);
55
+ await closeCommentPanel(page);
56
+ await delay(Math.round(config.commentMax * 0.3), config.commentMax);
57
+ }
50
58
 
51
59
  const uniqueUsers = [...new Set(commentUsers)];
52
60
 
@@ -56,6 +64,7 @@ async function scrapeSingleVideo(page, maxComments, log) {
56
64
  nickname: userData.nickname,
57
65
  locationCreated: userData.locationCreated,
58
66
  commentUsers: uniqueUsers,
67
+ guessVideos,
59
68
  };
60
69
  }
61
70
 
@@ -64,6 +73,7 @@ async function runScrape(options) {
64
73
  videoUrl,
65
74
  maxVideos = 20,
66
75
  maxComments = 999,
76
+ maxGuess = 10,
67
77
  preset = null,
68
78
  switchMax = null,
69
79
  commentMax = null,
@@ -89,7 +99,7 @@ async function runScrape(options) {
89
99
  if (!isExternal) {
90
100
  log(`视频地址: ${videoUrl}`);
91
101
  log(
92
- `视频数: ${maxVideos}, 评论数: ${maxComments}, 切换延迟: ${config.switchMax}ms, 评论延迟: ${config.commentMax}ms`,
102
+ `视频数: ${maxVideos}, 评论数: ${maxComments}, 猜你喜欢: ${maxGuess}, 切换延迟: ${config.switchMax}ms, 评论延迟: ${config.commentMax}ms`,
93
103
  );
94
104
  }
95
105
 
@@ -115,13 +125,15 @@ async function runScrape(options) {
115
125
  const videoAuthors = new Set();
116
126
  const commentUsers = new Set();
117
127
  const allCommentAuthorsList = [];
128
+ const allGuessAuthors = new Set();
129
+ const allGuessVideos = [];
118
130
 
119
131
  for (let i = 0; i < maxVideos; i++) {
120
132
  await delay(Math.round(config.commentMax * 0.3), config.commentMax);
121
133
 
122
134
  let result;
123
135
  try {
124
- result = await scrapeSingleVideo(page, maxComments, log);
136
+ result = await scrapeSingleVideo(page, maxComments, maxGuess, log);
125
137
  } catch (e) {
126
138
  log(`[${i + 1}/${maxVideos}] 跳过: ${e.message}`);
127
139
  if (i < maxVideos - 1) {
@@ -140,10 +152,16 @@ async function runScrape(options) {
140
152
  videoAuthors.add(result.videoAuthor);
141
153
  result.commentUsers.forEach((u) => commentUsers.add(u));
142
154
  allCommentAuthorsList.push(...result.commentUsers);
155
+ if (result.guessVideos) {
156
+ allGuessVideos.push(...result.guessVideos);
157
+ result.guessVideos.forEach((v) => {
158
+ if (v.author) allGuessAuthors.add(v.author);
159
+ });
160
+ }
143
161
 
144
162
  if ((i + 1) % 5 === 0 || i === 0) {
145
163
  log(
146
- `[${i + 1}/${maxVideos}] ${result.videoAuthor} | 昵称: ${result.nickname || "-"} | 评论用户: ${result.commentUsers.length}`,
164
+ `[${i + 1}/${maxVideos}] ${result.videoAuthor} | 昵称: ${result.nickname || "-"} | 评论用户: ${result.commentUsers.length} | 猜你喜欢: ${result.guessVideos ? result.guessVideos.length : 0}`,
147
165
  );
148
166
  }
149
167
 
@@ -159,7 +177,7 @@ async function runScrape(options) {
159
177
  }
160
178
 
161
179
  log(
162
- `\n结果: 视频作者 ${videoAuthors.size} | 评论用户 ${commentUsers.size} | 总评论 ${allCommentAuthorsList.length}`,
180
+ `\n结果: 视频作者 ${videoAuthors.size} | 评论用户 ${commentUsers.size} | 总评论 ${allCommentAuthorsList.length} | 猜你喜欢作者 ${allGuessAuthors.size} | 总猜中视频 ${allGuessVideos.length}`,
163
181
  );
164
182
 
165
183
  const videoDetails = {};
@@ -179,10 +197,14 @@ async function runScrape(options) {
179
197
  videoDetails: Object.values(videoDetails),
180
198
  commentUsers: [...commentUsers].sort(),
181
199
  allCommentAuthorsList,
200
+ guessVideos: allGuessVideos,
201
+ guessAuthors: [...allGuessAuthors].sort(),
182
202
  stats: {
183
203
  totalVideos: allResults.length,
184
204
  uniqueVideoAuthors: videoAuthors.size,
185
205
  uniqueCommentAuthors: commentUsers.size,
206
+ uniqueGuessAuthors: allGuessAuthors.size,
207
+ totalGuessVideos: allGuessVideos.length,
186
208
  },
187
209
  };
188
210
 
@@ -18,6 +18,7 @@ async function main() {
18
18
  let preset = null;
19
19
  let maxVideos = 20;
20
20
  let maxComments = 999;
21
+ let maxGuess = 10;
21
22
  let switchMax = null;
22
23
  let commentMax = null;
23
24
 
@@ -26,21 +27,23 @@ async function main() {
26
27
  preset = args[1].toLowerCase();
27
28
  maxVideos = parseInt(args[2]) || 20;
28
29
  maxComments = parseInt(args[3]) || 999;
30
+ maxGuess = parseInt(args[4]) || 10;
29
31
  } else {
30
32
  maxVideos = parseInt(args[1]) || 20;
31
33
  maxComments = parseInt(args[2]) || 999;
32
- switchMax = parseInt(args[3]) || null;
33
- commentMax = parseInt(args[4]) || null;
34
+ maxGuess = parseInt(args[3]) || 10;
35
+ switchMax = parseInt(args[4]) || null;
36
+ commentMax = parseInt(args[5]) || null;
34
37
  }
35
38
  }
36
39
 
37
40
  if (!videoUrl) {
38
41
  console.error("用法:");
39
42
  console.error(
40
- " 预设模式: node index.cjs <视频URL> <preset> [最大视频数] [最大评论数] [-o 输出路径]",
43
+ " 预设模式: node index.cjs <视频URL> <preset> [最大视频数] [最大评论数] [猜你喜欢数] [-o 输出路径]",
41
44
  );
42
45
  console.error(
43
- " 手动模式: node index.cjs <视频URL> [最大视频数] [最大评论数] [切换延迟ms] [评论延迟ms] [-o 输出路径]",
46
+ " 手动模式: node index.cjs <视频URL> [最大视频数] [最大评论数] [猜你喜欢数] [切换延迟ms] [评论延迟ms] [-o 输出路径]",
44
47
  );
45
48
  console.error(
46
49
  "预设: fast(1s/0.8s), normal(2.5s/1.5s), slow(5s/3s), stealth(8s/5s)",
@@ -57,6 +60,7 @@ async function main() {
57
60
  videoUrl,
58
61
  maxVideos,
59
62
  maxComments,
63
+ maxGuess,
60
64
  preset,
61
65
  switchMax,
62
66
  commentMax,
@@ -0,0 +1,117 @@
1
+ const { delay, getDelayConfig, closeCommentPanel } = require('./page-helpers.cjs');
2
+
3
+ async function openGuessTab(page) {
4
+ const tabs = page.locator('[class*="tabbar-item"]');
5
+ const guessTab = tabs.filter({ hasText: /猜你喜欢/i }).first();
6
+ await guessTab.click();
7
+ const config = getDelayConfig();
8
+ await delay(Math.round(config.commentMax * 0.5), config.commentMax);
9
+ await page.waitForSelector('[class*="Related"]', { timeout: 5000 }).catch(() => {});
10
+ }
11
+
12
+ async function extractGuessVideos(page, maxVideos = 10) {
13
+ await openGuessTab(page);
14
+
15
+ const config = getDelayConfig();
16
+ const allVideos = [];
17
+ const seenIds = new Set();
18
+
19
+ let prevScrollHeight = 0;
20
+ let noChangeCount = 0;
21
+ const maxNoChange = 6;
22
+
23
+ while (allVideos.length < maxVideos) {
24
+ const snapshot = await page.evaluate(() => {
25
+ const relatedContainer = document.querySelector('[class*="Related"]');
26
+ if (!relatedContainer) return { items: [], scrollHeight: 0, containerFound: false };
27
+
28
+ function findScrollableAncestor(el) {
29
+ if (el.scrollHeight > el.clientHeight + 10) return el;
30
+ let current = el.parentElement;
31
+ while (current && current !== document.body) {
32
+ if (current.scrollHeight > current.clientHeight + 10) return current;
33
+ current = current.parentElement;
34
+ }
35
+ return el;
36
+ }
37
+
38
+ const scrollContainer = findScrollableAncestor(relatedContainer);
39
+
40
+ const result = [];
41
+ const items = relatedContainer.querySelectorAll('[class*="DivItemContainer"]');
42
+ items.forEach(item => {
43
+ const link = item.querySelector('a[href*="/video/"]');
44
+ if (!link || !link.href) return;
45
+ const videoUrl = link.href;
46
+
47
+ const m = videoUrl.match(/@([^/]+)\/video\/(\d+)/);
48
+ if (!m) return;
49
+
50
+ result.push({ author: '@' + m[1], videoId: m[2], url: videoUrl, title: '' });
51
+ });
52
+
53
+ return {
54
+ items: result,
55
+ scrollHeight: scrollContainer.scrollHeight,
56
+ containerFound: true,
57
+ };
58
+ });
59
+
60
+ if (!snapshot.containerFound) break;
61
+
62
+ for (const item of snapshot.items) {
63
+ if (allVideos.length >= maxVideos) break;
64
+ if (!seenIds.has(item.videoId)) {
65
+ seenIds.add(item.videoId);
66
+ allVideos.push(item);
67
+ }
68
+ }
69
+
70
+ if (allVideos.length >= maxVideos) break;
71
+
72
+ if (snapshot.scrollHeight === prevScrollHeight) {
73
+ noChangeCount++;
74
+ if (noChangeCount >= maxNoChange) break;
75
+ } else {
76
+ noChangeCount = 0;
77
+ }
78
+ prevScrollHeight = snapshot.scrollHeight;
79
+
80
+ await page.evaluate(async () => {
81
+ const relatedContainer = document.querySelector('[class*="Related"]');
82
+ if (!relatedContainer) return;
83
+
84
+ function findScrollableAncestor(el) {
85
+ if (el.scrollHeight > el.clientHeight + 10) return el;
86
+ let current = el.parentElement;
87
+ while (current && current !== document.body) {
88
+ if (current.scrollHeight > current.clientHeight + 10) return current;
89
+ current = current.parentElement;
90
+ }
91
+ return el;
92
+ }
93
+
94
+ const el = findScrollableAncestor(relatedContainer);
95
+ const delay = ms => new Promise(r => setTimeout(r, ms));
96
+
97
+ el.scrollTop = el.scrollHeight;
98
+ await delay(400 + Math.random() * 400);
99
+
100
+ el.scrollTop -= 100 + Math.random() * 100;
101
+ await delay(200 + Math.random() * 200);
102
+
103
+ el.scrollTop = el.scrollHeight;
104
+ });
105
+
106
+ await delay(2000, 4000);
107
+ }
108
+
109
+ await closeCommentPanel(page);
110
+ await delay(Math.round(config.commentMax * 0.3), config.commentMax);
111
+
112
+ return allVideos.slice(0, maxVideos);
113
+ }
114
+
115
+ module.exports = {
116
+ extractGuessVideos,
117
+ };
@@ -236,7 +236,7 @@ async function addUsers() {
236
236
  const res = await fetch('/api/users', {
237
237
  method: 'POST',
238
238
  headers: { 'Content-Type': 'application/json' },
239
- body: JSON.stringify({ users: names })
239
+ body: JSON.stringify({ usernames: names })
240
240
  });
241
241
  const data = await res.json();
242
242
  showToast(`${data.message || `${names.length} 个用户已插入队列`}`);